diff --git a/clang-tools-extra/clang-tidy/ClangTidy.cpp b/clang-tools-extra/clang-tidy/ClangTidy.cpp index 73d66b980a5e1..7de313ad4da6a 100644 --- a/clang-tools-extra/clang-tidy/ClangTidy.cpp +++ b/clang-tools-extra/clang-tidy/ClangTidy.cpp @@ -385,7 +385,7 @@ static CheckersList getAnalyzerCheckersAndPackages(ClangTidyContext &Context, #endif // CLANG_TIDY_ENABLE_STATIC_ANALYZER std::unique_ptr -ClangTidyASTConsumerFactory::CreateASTConsumer( +ClangTidyASTConsumerFactory::createASTConsumer( clang::CompilerInstance &Compiler, StringRef File) { // FIXME: Move this to a separate method, so that CreateASTConsumer doesn't // modify Compiler. @@ -573,7 +573,7 @@ runClangTidy(clang::tidy::ClangTidyContext &Context, Action(ClangTidyASTConsumerFactory *Factory) : Factory(Factory) {} std::unique_ptr CreateASTConsumer(CompilerInstance &Compiler, StringRef File) override { - return Factory->CreateASTConsumer(Compiler, File); + return Factory->createASTConsumer(Compiler, File); } private: diff --git a/clang-tools-extra/clang-tidy/ClangTidy.h b/clang-tools-extra/clang-tidy/ClangTidy.h index bbe4fe69123ff..507d1ce6e572d 100644 --- a/clang-tools-extra/clang-tidy/ClangTidy.h +++ b/clang-tools-extra/clang-tidy/ClangTidy.h @@ -38,7 +38,7 @@ class ClangTidyASTConsumerFactory { /// Returns an ASTConsumer that runs the specified clang-tidy checks. std::unique_ptr - CreateASTConsumer(clang::CompilerInstance &Compiler, StringRef File); + createASTConsumer(clang::CompilerInstance &Compiler, StringRef File); /// Get the list of enabled checks. std::vector getCheckNames(); diff --git a/clang-tools-extra/clang-tidy/ClangTidyDiagnosticConsumer.cpp b/clang-tools-extra/clang-tidy/ClangTidyDiagnosticConsumer.cpp index 2d3eb371b6951..456de0e979dbd 100644 --- a/clang-tools-extra/clang-tidy/ClangTidyDiagnosticConsumer.cpp +++ b/clang-tools-extra/clang-tidy/ClangTidyDiagnosticConsumer.cpp @@ -66,7 +66,7 @@ class ClangTidyDiagnosticRenderer : public DiagnosticRenderer { ? tooling::DiagnosticMessage(Message, Loc.getManager(), Loc) : tooling::DiagnosticMessage(Message); - // Make sure that if a TokenRange is receieved from the check it is unfurled + // Make sure that if a TokenRange is received from the check it is unfurled // into a real CharRange for the diagnostic printer later. // Whatever we store here gets decoupled from the current SourceManager, so // we **have to** know the exact position and length of the highlight. diff --git a/clang-tools-extra/clang-tidy/ClangTidyDiagnosticConsumer.h b/clang-tools-extra/clang-tidy/ClangTidyDiagnosticConsumer.h index 84925e81dc08e..f93deae64bb37 100644 --- a/clang-tools-extra/clang-tidy/ClangTidyDiagnosticConsumer.h +++ b/clang-tools-extra/clang-tidy/ClangTidyDiagnosticConsumer.h @@ -23,10 +23,10 @@ class CompilerInstance; class SourceManager; namespace ast_matchers { class MatchFinder; -} +} // namespace ast_matchers namespace tooling { class CompilationDatabase; -} +} // namespace tooling namespace tidy { diff --git a/clang-tools-extra/clang-tidy/ClangTidyOptions.h b/clang-tools-extra/clang-tidy/ClangTidyOptions.h index d8a4a14f5b520..3f09d39df13da 100644 --- a/clang-tools-extra/clang-tidy/ClangTidyOptions.h +++ b/clang-tools-extra/clang-tidy/ClangTidyOptions.h @@ -108,7 +108,7 @@ struct ClangTidyOptions { std::string Value; /// Priority stores relative precedence of the value loaded from config - /// files to disambigute local vs global value from different levels. + /// files to disambiguate local vs global value from different levels. unsigned Priority; }; typedef std::pair StringPair; @@ -129,8 +129,8 @@ struct ClangTidyOptions { /// and using a FileOptionsProvider, it will take a configuration file in the /// parent directory (if any exists) and apply this config file on top of the /// parent one. IF true and using a ConfigOptionsProvider, it will apply this - /// config on top of any configuation file it finds in the directory using the - /// same logic as FileOptionsProvider. If false or missing, only this + /// config on top of any configuration file it finds in the directory using + /// the same logic as FileOptionsProvider. If false or missing, only this /// configuration file will be used. llvm::Optional InheritParentConfig; diff --git a/clang-tools-extra/clang-tidy/abseil/AbseilTidyModule.cpp b/clang-tools-extra/clang-tidy/abseil/AbseilTidyModule.cpp index 7d592d7e3e559..5d99e6b500754 100644 --- a/clang-tools-extra/clang-tidy/abseil/AbseilTidyModule.cpp +++ b/clang-tools-extra/clang-tidy/abseil/AbseilTidyModule.cpp @@ -9,6 +9,7 @@ #include "../ClangTidy.h" #include "../ClangTidyModule.h" #include "../ClangTidyModuleRegistry.h" +#include "CleanupCtadCheck.h" #include "DurationAdditionCheck.h" #include "DurationComparisonCheck.h" #include "DurationConversionCastCheck.h" @@ -35,6 +36,7 @@ namespace abseil { class AbseilModule : public ClangTidyModule { public: void addCheckFactories(ClangTidyCheckFactories &CheckFactories) override { + CheckFactories.registerCheck("abseil-cleanup-ctad"); CheckFactories.registerCheck( "abseil-duration-addition"); CheckFactories.registerCheck( diff --git a/clang-tools-extra/clang-tidy/abseil/CMakeLists.txt b/clang-tools-extra/clang-tidy/abseil/CMakeLists.txt index b6ea21879dafa..e7c86fc8107dd 100644 --- a/clang-tools-extra/clang-tidy/abseil/CMakeLists.txt +++ b/clang-tools-extra/clang-tidy/abseil/CMakeLists.txt @@ -5,6 +5,7 @@ set(LLVM_LINK_COMPONENTS add_clang_library(clangTidyAbseilModule AbseilTidyModule.cpp + CleanupCtadCheck.cpp DurationAdditionCheck.cpp DurationComparisonCheck.cpp DurationConversionCastCheck.cpp diff --git a/clang-tools-extra/clang-tidy/abseil/CleanupCtadCheck.cpp b/clang-tools-extra/clang-tidy/abseil/CleanupCtadCheck.cpp new file mode 100644 index 0000000000000..bc152f1dafa7b --- /dev/null +++ b/clang-tools-extra/clang-tidy/abseil/CleanupCtadCheck.cpp @@ -0,0 +1,49 @@ +//===--- CleanupCtadCheck.cpp - clang-tidy --------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "CleanupCtadCheck.h" +#include "../utils/TransformerClangTidyCheck.h" +#include "clang/AST/ASTContext.h" +#include "clang/ASTMatchers/ASTMatchFinder.h" +#include "clang/ASTMatchers/ASTMatchers.h" +#include "clang/Tooling/Transformer/RangeSelector.h" +#include "clang/Tooling/Transformer/RewriteRule.h" +#include "clang/Tooling/Transformer/Stencil.h" +#include "llvm/ADT/StringRef.h" + +using namespace ::clang::ast_matchers; +using namespace ::clang::transformer; + +namespace clang { +namespace tidy { +namespace abseil { + +RewriteRule CleanupCtadCheckImpl() { + auto warning_message = cat("prefer absl::Cleanup's class template argument " + "deduction pattern in C++17 and higher"); + + return makeRule( + declStmt(has(varDecl( + hasType(autoType()), hasTypeLoc(typeLoc().bind("auto_type_loc")), + hasInitializer(traverse( + clang::TK_IgnoreUnlessSpelledInSource, + callExpr(callee(functionDecl(hasName("absl::MakeCleanup"))), + argumentCountIs(1), + hasArgument(0, expr().bind("make_cleanup_argument"))) + .bind("make_cleanup_call")))))), + {changeTo(node("auto_type_loc"), cat("absl::Cleanup")), + changeTo(node("make_cleanup_call"), cat(node("make_cleanup_argument")))}, + warning_message); +} + +CleanupCtadCheck::CleanupCtadCheck(StringRef Name, ClangTidyContext *Context) + : utils::TransformerClangTidyCheck(CleanupCtadCheckImpl(), Name, Context) {} + +} // namespace abseil +} // namespace tidy +} // namespace clang diff --git a/clang-tools-extra/clang-tidy/abseil/CleanupCtadCheck.h b/clang-tools-extra/clang-tidy/abseil/CleanupCtadCheck.h new file mode 100644 index 0000000000000..ce4e5c6be9d88 --- /dev/null +++ b/clang-tools-extra/clang-tidy/abseil/CleanupCtadCheck.h @@ -0,0 +1,37 @@ +//===--- CleanupCtadCheck.h - clang-tidy ------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_ABSEIL_CLEANUPCTADCHECK_H +#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_ABSEIL_CLEANUPCTADCHECK_H + +#include "../utils/TransformerClangTidyCheck.h" + +namespace clang { +namespace tidy { +namespace abseil { + +/// Suggests switching the initialization pattern of `absl::Cleanup` +/// instances from the factory function to class template argument +/// deduction (CTAD), in C++17 and higher. +/// +/// For the user-facing documentation see: +/// http://clang.llvm.org/extra/clang-tidy/checks/abseil-cleanup-ctad.html +class CleanupCtadCheck : public utils::TransformerClangTidyCheck { +public: + CleanupCtadCheck(StringRef Name, ClangTidyContext *Context); + + bool isLanguageVersionSupported(const LangOptions &LangOpts) const override { + return LangOpts.CPlusPlus17; + } +}; + +} // namespace abseil +} // namespace tidy +} // namespace clang + +#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_ABSEIL_CLEANUPCTADCHECK_H diff --git a/clang-tools-extra/clang-tidy/abseil/DurationDivisionCheck.h b/clang-tools-extra/clang-tidy/abseil/DurationDivisionCheck.h index a4e4dd57a4d80..0bb603b6ba5d0 100644 --- a/clang-tools-extra/clang-tidy/abseil/DurationDivisionCheck.h +++ b/clang-tools-extra/clang-tidy/abseil/DurationDivisionCheck.h @@ -26,8 +26,8 @@ class DurationDivisionCheck : public ClangTidyCheck { bool isLanguageVersionSupported(const LangOptions &LangOpts) const override { return LangOpts.CPlusPlus; } - void registerMatchers(ast_matchers::MatchFinder *finder) override; - void check(const ast_matchers::MatchFinder::MatchResult &result) override; + void registerMatchers(ast_matchers::MatchFinder *Finder) override; + void check(const ast_matchers::MatchFinder::MatchResult &Result) override; }; } // namespace abseil diff --git a/clang-tools-extra/clang-tidy/abseil/DurationFactoryScaleCheck.cpp b/clang-tools-extra/clang-tidy/abseil/DurationFactoryScaleCheck.cpp index 7da75fad589b4..aa839beddac6e 100644 --- a/clang-tools-extra/clang-tidy/abseil/DurationFactoryScaleCheck.cpp +++ b/clang-tools-extra/clang-tidy/abseil/DurationFactoryScaleCheck.cpp @@ -145,7 +145,7 @@ void DurationFactoryScaleCheck::check(const MatchFinder::MatchResult &Result) { return; // We first handle the cases of literal zero (both float and integer). - if (IsLiteralZero(Result, *Arg)) { + if (isLiteralZero(Result, *Arg)) { diag(Call->getBeginLoc(), "use ZeroDuration() for zero-length time intervals") << FixItHint::CreateReplacement(Call->getSourceRange(), diff --git a/clang-tools-extra/clang-tidy/abseil/DurationRewriter.cpp b/clang-tools-extra/clang-tidy/abseil/DurationRewriter.cpp index 84c7a9fa92471..671ed65d3c578 100644 --- a/clang-tools-extra/clang-tidy/abseil/DurationRewriter.cpp +++ b/clang-tools-extra/clang-tidy/abseil/DurationRewriter.cpp @@ -157,7 +157,7 @@ llvm::StringRef getTimeInverseForScale(DurationScale Scale) { } /// Returns `true` if `Node` is a value which evaluates to a literal `0`. -bool IsLiteralZero(const MatchFinder::MatchResult &Result, const Expr &Node) { +bool isLiteralZero(const MatchFinder::MatchResult &Result, const Expr &Node) { auto ZeroMatcher = anyOf(integerLiteral(equals(0)), floatLiteral(equals(0.0))); @@ -276,7 +276,7 @@ std::string rewriteExprFromNumberToDuration( rewriteInverseDurationCall(Result, Scale, RootNode)) return *MaybeRewrite; - if (IsLiteralZero(Result, RootNode)) + if (isLiteralZero(Result, RootNode)) return std::string("absl::ZeroDuration()"); return (llvm::Twine(getDurationFactoryForScale(Scale)) + "(" + @@ -294,7 +294,7 @@ std::string rewriteExprFromNumberToTime( rewriteInverseTimeCall(Result, Scale, RootNode)) return *MaybeRewrite; - if (IsLiteralZero(Result, RootNode)) + if (isLiteralZero(Result, RootNode)) return std::string("absl::UnixEpoch()"); return (llvm::Twine(getTimeFactoryForScale(Scale)) + "(" + diff --git a/clang-tools-extra/clang-tidy/abseil/DurationRewriter.h b/clang-tools-extra/clang-tidy/abseil/DurationRewriter.h index 1ae312c634321..135d15b635cdb 100644 --- a/clang-tools-extra/clang-tidy/abseil/DurationRewriter.h +++ b/clang-tools-extra/clang-tidy/abseil/DurationRewriter.h @@ -33,10 +33,10 @@ llvm::StringRef getDurationFactoryForScale(DurationScale Scale); /// Given a 'Scale', return the appropriate factory function call for /// constructing a `Time` for that scale. -llvm::StringRef getTimeFactoryForScale(DurationScale scale); +llvm::StringRef getTimeFactoryForScale(DurationScale Scale); // Determine if `Node` represents a literal floating point or integral zero. -bool IsLiteralZero(const ast_matchers::MatchFinder::MatchResult &Result, +bool isLiteralZero(const ast_matchers::MatchFinder::MatchResult &Result, const Expr &Node); /// Possibly strip a floating point cast expression. @@ -77,7 +77,7 @@ const std::pair & getDurationInverseForScale(DurationScale Scale); /// Returns the Time inverse function name for a given `Scale`. -llvm::StringRef getTimeInverseForScale(DurationScale scale); +llvm::StringRef getTimeInverseForScale(DurationScale Scale); /// Assuming `Node` has type `double` or `int` representing a time interval of /// `Scale`, return the expression to make it a suitable `Duration`. diff --git a/clang-tools-extra/clang-tidy/abseil/StringFindStartswithCheck.cpp b/clang-tools-extra/clang-tidy/abseil/StringFindStartswithCheck.cpp index aa43e7dc38851..5741c0d505d51 100644 --- a/clang-tools-extra/clang-tidy/abseil/StringFindStartswithCheck.cpp +++ b/clang-tools-extra/clang-tidy/abseil/StringFindStartswithCheck.cpp @@ -97,7 +97,7 @@ void StringFindStartswithCheck::check(const MatchFinder::MatchResult &Result) { ", " + NeedleExprCode + ")") .str()); - // Create a preprocessor #include FixIt hint (CreateIncludeInsertion checks + // Create a preprocessor #include FixIt hint (createIncludeInsertion checks // whether this already exists). Diagnostic << IncludeInserter.createIncludeInsertion( Source.getFileID(ComparisonExpr->getBeginLoc()), diff --git a/clang-tools-extra/clang-tidy/altera/UnrollLoopsCheck.cpp b/clang-tools-extra/clang-tidy/altera/UnrollLoopsCheck.cpp index 0290789e76bfe..75797c185decd 100644 --- a/clang-tools-extra/clang-tidy/altera/UnrollLoopsCheck.cpp +++ b/clang-tools-extra/clang-tidy/altera/UnrollLoopsCheck.cpp @@ -240,7 +240,7 @@ bool UnrollLoopsCheck::extractValue(int &Value, const BinaryOperator *Op, else if (RHS->isEvaluatable(*Context)) RHS->EvaluateAsRValue(Result, *Context); else - return false; // Cannot evalue either side. + return false; // Cannot evaluate either side. if (!Result.Val.isInt()) return false; // Cannot check number of iterations, return false to be // safe. diff --git a/clang-tools-extra/clang-tidy/bugprone/BranchCloneCheck.cpp b/clang-tools-extra/clang-tidy/bugprone/BranchCloneCheck.cpp index 86e3f3b7da6a2..f5f418b139560 100644 --- a/clang-tools-extra/clang-tidy/bugprone/BranchCloneCheck.cpp +++ b/clang-tools-extra/clang-tidy/bugprone/BranchCloneCheck.cpp @@ -43,7 +43,7 @@ static bool areSwitchBranchesIdentical(const SwitchBranch LHS, for (size_t I = 0, Size = LHS.size(); I < Size; I++) { // NOTE: We strip goto labels and annotations in addition to stripping // the `case X:` or `default:` labels, but it is very unlikely that this - // would casue false positives in real-world code. + // would cause false positives in real-world code. if (!areStatementsIdentical(LHS[I]->stripLabelLikeStatements(), RHS[I]->stripLabelLikeStatements(), Context)) { return false; @@ -187,10 +187,10 @@ void BranchCloneCheck::check(const MatchFinder::MatchResult &Result) { Branches.back().push_back(S); } - auto End = Branches.end(); - auto BeginCurrent = Branches.begin(); + auto *End = Branches.end(); + auto *BeginCurrent = Branches.begin(); while (BeginCurrent < End) { - auto EndCurrent = BeginCurrent + 1; + auto *EndCurrent = BeginCurrent + 1; while (EndCurrent < End && areSwitchBranchesIdentical(*BeginCurrent, *EndCurrent, Context)) { ++EndCurrent; diff --git a/clang-tools-extra/clang-tidy/bugprone/CopyConstructorInitCheck.h b/clang-tools-extra/clang-tidy/bugprone/CopyConstructorInitCheck.h index f5ee77c8c7ba0..ade353aa49408 100644 --- a/clang-tools-extra/clang-tidy/bugprone/CopyConstructorInitCheck.h +++ b/clang-tools-extra/clang-tidy/bugprone/CopyConstructorInitCheck.h @@ -31,7 +31,7 @@ class CopyConstructorInitCheck : public ClangTidyCheck { void check(const ast_matchers::MatchFinder::MatchResult &Result) override; }; -} // namespace misc +} // namespace bugprone } // namespace tidy } // namespace clang diff --git a/clang-tools-extra/clang-tidy/bugprone/EasilySwappableParametersCheck.cpp b/clang-tools-extra/clang-tidy/bugprone/EasilySwappableParametersCheck.cpp index d111016ca3457..771a8780b070e 100644 --- a/clang-tools-extra/clang-tidy/bugprone/EasilySwappableParametersCheck.cpp +++ b/clang-tools-extra/clang-tidy/bugprone/EasilySwappableParametersCheck.cpp @@ -1100,7 +1100,7 @@ class UserDefinedConversionSelector { /// an implicit conversion. void addConversion(const CXXMethodDecl *ConvFun, QualType FromType, QualType ToType) { - // Try to go from the FromType to the ToType wiht only a single implicit + // Try to go from the FromType to the ToType with only a single implicit // conversion, to see if the conversion function is applicable. MixData Mix = calculateMixability( Check, FromType, ToType, ConvFun->getASTContext(), @@ -1553,7 +1553,7 @@ static bool isIgnoredParameter(const TheCheck &Check, const ParmVarDecl *Node) { } /// This namespace contains the implementations for the suppression of -/// diagnostics from similaly used ("related") parameters. +/// diagnostics from similarly-used ("related") parameters. namespace relatedness_heuristic { static constexpr std::size_t SmallDataStructureSize = 4; diff --git a/clang-tools-extra/clang-tidy/bugprone/ForwardingReferenceOverloadCheck.cpp b/clang-tools-extra/clang-tidy/bugprone/ForwardingReferenceOverloadCheck.cpp index 857ef15f9d0c7..568f139bdb856 100644 --- a/clang-tools-extra/clang-tidy/bugprone/ForwardingReferenceOverloadCheck.cpp +++ b/clang-tools-extra/clang-tidy/bugprone/ForwardingReferenceOverloadCheck.cpp @@ -112,7 +112,8 @@ void ForwardingReferenceOverloadCheck::check( // Every parameter after the first must have a default value. const auto *Ctor = Result.Nodes.getNodeAs("ctor"); - for (auto Iter = Ctor->param_begin() + 1; Iter != Ctor->param_end(); ++Iter) { + for (auto *Iter = Ctor->param_begin() + 1; Iter != Ctor->param_end(); + ++Iter) { if (!(*Iter)->hasDefaultArg()) return; } diff --git a/clang-tools-extra/clang-tidy/bugprone/MultipleStatementMacroCheck.cpp b/clang-tools-extra/clang-tidy/bugprone/MultipleStatementMacroCheck.cpp index 4a8388c61ee07..e57feaf029a83 100644 --- a/clang-tools-extra/clang-tidy/bugprone/MultipleStatementMacroCheck.cpp +++ b/clang-tools-extra/clang-tidy/bugprone/MultipleStatementMacroCheck.cpp @@ -39,7 +39,7 @@ const Stmt *nextStmt(const MatchFinder::MatchResult &Result, const Stmt *S) { using ExpansionRanges = std::vector; -/// \bried Get all the macro expansion ranges related to `Loc`. +/// \brief Get all the macro expansion ranges related to `Loc`. /// /// The result is ordered from most inner to most outer. ExpansionRanges getExpansionRanges(SourceLocation Loc, diff --git a/clang-tools-extra/clang-tidy/bugprone/ReservedIdentifierCheck.cpp b/clang-tools-extra/clang-tidy/bugprone/ReservedIdentifierCheck.cpp index df2968e2e37c4..8da0469554250 100644 --- a/clang-tools-extra/clang-tidy/bugprone/ReservedIdentifierCheck.cpp +++ b/clang-tools-extra/clang-tidy/bugprone/ReservedIdentifierCheck.cpp @@ -152,7 +152,7 @@ getFailureInfoImpl(StringRef Name, bool IsInGlobalNamespace, } Optional -ReservedIdentifierCheck::GetDeclFailureInfo(const NamedDecl *Decl, +ReservedIdentifierCheck::getDeclFailureInfo(const NamedDecl *Decl, const SourceManager &) const { assert(Decl && Decl->getIdentifier() && !Decl->getName().empty() && !Decl->isImplicit() && @@ -163,14 +163,14 @@ ReservedIdentifierCheck::GetDeclFailureInfo(const NamedDecl *Decl, } Optional -ReservedIdentifierCheck::GetMacroFailureInfo(const Token &MacroNameTok, +ReservedIdentifierCheck::getMacroFailureInfo(const Token &MacroNameTok, const SourceManager &) const { return getFailureInfoImpl(MacroNameTok.getIdentifierInfo()->getName(), true, getLangOpts(), Invert, AllowedIdentifiers); } RenamerClangTidyCheck::DiagInfo -ReservedIdentifierCheck::GetDiagInfo(const NamingCheckId &ID, +ReservedIdentifierCheck::getDiagInfo(const NamingCheckId &ID, const NamingCheckFailure &Failure) const { return DiagInfo{Message, [&](DiagnosticBuilder &Diag) { Diag << ID.second diff --git a/clang-tools-extra/clang-tidy/bugprone/ReservedIdentifierCheck.h b/clang-tools-extra/clang-tidy/bugprone/ReservedIdentifierCheck.h index fa570902f778b..dc1206b5901b2 100644 --- a/clang-tools-extra/clang-tidy/bugprone/ReservedIdentifierCheck.h +++ b/clang-tools-extra/clang-tidy/bugprone/ReservedIdentifierCheck.h @@ -41,12 +41,12 @@ class ReservedIdentifierCheck final : public RenamerClangTidyCheck { private: llvm::Optional - GetDeclFailureInfo(const NamedDecl *Decl, + getDeclFailureInfo(const NamedDecl *Decl, const SourceManager &SM) const override; llvm::Optional - GetMacroFailureInfo(const Token &MacroNameTok, + getMacroFailureInfo(const Token &MacroNameTok, const SourceManager &SM) const override; - DiagInfo GetDiagInfo(const NamingCheckId &ID, + DiagInfo getDiagInfo(const NamingCheckId &ID, const NamingCheckFailure &Failure) const override; }; diff --git a/clang-tools-extra/clang-tidy/bugprone/SizeofExpressionCheck.cpp b/clang-tools-extra/clang-tidy/bugprone/SizeofExpressionCheck.cpp index 44a19b1f824a7..5bdf01c098dc6 100644 --- a/clang-tools-extra/clang-tidy/bugprone/SizeofExpressionCheck.cpp +++ b/clang-tools-extra/clang-tidy/bugprone/SizeofExpressionCheck.cpp @@ -176,7 +176,7 @@ void SizeofExpressionCheck::registerMatchers(MatchFinder *Finder) { .bind("sizeof-pointer-to-aggregate"), this); - // Detect expression like: sizeof(epxr) <= k for a suspicious constant 'k'. + // Detect expression like: sizeof(expr) <= k for a suspicious constant 'k'. if (WarnOnSizeOfCompareToConstant) { Finder->addMatcher( binaryOperator(matchers::isRelationalOperator(), diff --git a/clang-tools-extra/clang-tidy/bugprone/SuspiciousStringCompareCheck.cpp b/clang-tools-extra/clang-tidy/bugprone/SuspiciousStringCompareCheck.cpp index a99492d29d5fe..8542f631a2e47 100644 --- a/clang-tools-extra/clang-tidy/bugprone/SuspiciousStringCompareCheck.cpp +++ b/clang-tools-extra/clang-tidy/bugprone/SuspiciousStringCompareCheck.cpp @@ -131,7 +131,7 @@ void SuspiciousStringCompareCheck::registerMatchers(MatchFinder *Finder) { this); } - // Detect suspicious cast to an inconsistant type (i.e. not integer type). + // Detect suspicious cast to an inconsistent type (i.e. not integer type). Finder->addMatcher( traverse(TK_AsIs, implicitCastExpr(unless(hasType(isInteger())), diff --git a/clang-tools-extra/clang-tidy/bugprone/UnusedRaiiCheck.cpp b/clang-tools-extra/clang-tidy/bugprone/UnusedRaiiCheck.cpp index b87bb9e8ca953..9b8d8d7bf5f4c 100644 --- a/clang-tools-extra/clang-tidy/bugprone/UnusedRaiiCheck.cpp +++ b/clang-tools-extra/clang-tidy/bugprone/UnusedRaiiCheck.cpp @@ -84,9 +84,9 @@ void UnusedRaiiCheck::check(const MatchFinder::MatchResult &Result) { auto SR = SourceRange(Node->getLParenLoc(), Node->getRParenLoc()); auto DefaultConstruction = Node->getNumArgs() == 0; if (!DefaultConstruction) { - auto FirstArg = Node->getArg(0); + auto *FirstArg = Node->getArg(0); DefaultConstruction = isa(FirstArg); - if (auto ILE = dyn_cast(FirstArg)) { + if (auto *ILE = dyn_cast(FirstArg)) { DefaultConstruction = ILE->getNumInits() == 0; SR = SourceRange(ILE->getLBraceLoc(), ILE->getRBraceLoc()); } diff --git a/clang-tools-extra/clang-tidy/cert/CERTTidyModule.cpp b/clang-tools-extra/clang-tidy/cert/CERTTidyModule.cpp index c3cfe12cd8512..3aada6f37f37d 100644 --- a/clang-tools-extra/clang-tidy/cert/CERTTidyModule.cpp +++ b/clang-tools-extra/clang-tidy/cert/CERTTidyModule.cpp @@ -16,6 +16,7 @@ #include "../bugprone/SpuriouslyWakeUpFunctionsCheck.h" #include "../bugprone/SuspiciousMemoryComparisonCheck.h" #include "../bugprone/UnhandledSelfAssignmentCheck.h" +#include "../bugprone/UnusedReturnValueCheck.h" #include "../concurrency/ThreadCanceltypeAsynchronousCheck.h" #include "../google/UnnamedNamespaceInHeaderCheck.h" #include "../misc/NewDeleteOverloadsCheck.h" @@ -39,6 +40,193 @@ #include "ThrownExceptionTypeCheck.h" #include "VariadicFunctionDefCheck.h" +namespace { + +// Checked functions for cert-err33-c. +// The following functions are deliberately excluded because they can be called +// with NULL argument and in this case the check is not applicable: +// `mblen, mbrlen, mbrtowc, mbtowc, wctomb, wctomb_s`. +// FIXME: The check can be improved to handle such cases. +const llvm::StringRef CertErr33CCheckedFunctions = "::aligned_alloc;" + "::asctime_s;" + "::at_quick_exit;" + "::atexit;" + "::bsearch;" + "::bsearch_s;" + "::btowc;" + "::c16rtomb;" + "::c32rtomb;" + "::calloc;" + "::clock;" + "::cnd_broadcast;" + "::cnd_init;" + "::cnd_signal;" + "::cnd_timedwait;" + "::cnd_wait;" + "::ctime_s;" + "::fclose;" + "::fflush;" + "::fgetc;" + "::fgetpos;" + "::fgets;" + "::fgetwc;" + "::fopen;" + "::fopen_s;" + "::fprintf;" + "::fprintf_s;" + "::fputc;" + "::fputs;" + "::fputwc;" + "::fputws;" + "::fread;" + "::freopen;" + "::freopen_s;" + "::fscanf;" + "::fscanf_s;" + "::fseek;" + "::fsetpos;" + "::ftell;" + "::fwprintf;" + "::fwprintf_s;" + "::fwrite;" + "::fwscanf;" + "::fwscanf_s;" + "::getc;" + "::getchar;" + "::getenv;" + "::getenv_s;" + "::gets_s;" + "::getwc;" + "::getwchar;" + "::gmtime;" + "::gmtime_s;" + "::localtime;" + "::localtime_s;" + "::malloc;" + "::mbrtoc16;" + "::mbrtoc32;" + "::mbsrtowcs;" + "::mbsrtowcs_s;" + "::mbstowcs;" + "::mbstowcs_s;" + "::memchr;" + "::mktime;" + "::mtx_init;" + "::mtx_lock;" + "::mtx_timedlock;" + "::mtx_trylock;" + "::mtx_unlock;" + "::printf_s;" + "::putc;" + "::putwc;" + "::raise;" + "::realloc;" + "::remove;" + "::rename;" + "::scanf;" + "::scanf_s;" + "::setlocale;" + "::setvbuf;" + "::signal;" + "::snprintf;" + "::snprintf_s;" + "::sprintf;" + "::sprintf_s;" + "::sscanf;" + "::sscanf_s;" + "::strchr;" + "::strerror_s;" + "::strftime;" + "::strpbrk;" + "::strrchr;" + "::strstr;" + "::strtod;" + "::strtof;" + "::strtoimax;" + "::strtok;" + "::strtok_s;" + "::strtol;" + "::strtold;" + "::strtoll;" + "::strtoul;" + "::strtoull;" + "::strtoumax;" + "::strxfrm;" + "::swprintf;" + "::swprintf_s;" + "::swscanf;" + "::swscanf_s;" + "::thrd_create;" + "::thrd_detach;" + "::thrd_join;" + "::thrd_sleep;" + "::time;" + "::timespec_get;" + "::tmpfile;" + "::tmpfile_s;" + "::tmpnam;" + "::tmpnam_s;" + "::tss_create;" + "::tss_get;" + "::tss_set;" + "::ungetc;" + "::ungetwc;" + "::vfprintf;" + "::vfprintf_s;" + "::vfscanf;" + "::vfscanf_s;" + "::vfwprintf;" + "::vfwprintf_s;" + "::vfwscanf;" + "::vfwscanf_s;" + "::vprintf_s;" + "::vscanf;" + "::vscanf_s;" + "::vsnprintf;" + "::vsnprintf_s;" + "::vsprintf;" + "::vsprintf_s;" + "::vsscanf;" + "::vsscanf_s;" + "::vswprintf;" + "::vswprintf_s;" + "::vswscanf;" + "::vswscanf_s;" + "::vwprintf_s;" + "::vwscanf;" + "::vwscanf_s;" + "::wcrtomb;" + "::wcschr;" + "::wcsftime;" + "::wcspbrk;" + "::wcsrchr;" + "::wcsrtombs;" + "::wcsrtombs_s;" + "::wcsstr;" + "::wcstod;" + "::wcstof;" + "::wcstoimax;" + "::wcstok;" + "::wcstok_s;" + "::wcstol;" + "::wcstold;" + "::wcstoll;" + "::wcstombs;" + "::wcstombs_s;" + "::wcstoul;" + "::wcstoull;" + "::wcstoumax;" + "::wcsxfrm;" + "::wctob;" + "::wctrans;" + "::wctype;" + "::wmemchr;" + "::wprintf_s;" + "::wscanf;" + "::wscanf_s;"; + +} // namespace + namespace clang { namespace tidy { namespace cert { @@ -99,6 +287,10 @@ class CERTModule : public ClangTidyModule { "cert-dcl37-c"); // ENV CheckFactories.registerCheck("cert-env33-c"); + // ERR + CheckFactories.registerCheck( + "cert-err33-c"); + CheckFactories.registerCheck("cert-err34-c"); // EXP CheckFactories.registerCheck( "cert-exp42-c"); @@ -108,8 +300,6 @@ class CERTModule : public ClangTidyModule { "cert-flp37-c"); // FIO CheckFactories.registerCheck("cert-fio38-c"); - // ERR - CheckFactories.registerCheck("cert-err34-c"); // MSC CheckFactories.registerCheck("cert-msc30-c"); CheckFactories.registerCheck( @@ -131,6 +321,7 @@ class CERTModule : public ClangTidyModule { ClangTidyOptions Options; ClangTidyOptions::OptionMap &Opts = Options.CheckOptions; Opts["cert-dcl16-c.NewSuffixes"] = "L;LL;LU;LLU"; + Opts["cert-err33-c.CheckedFunctions"] = CertErr33CCheckedFunctions; Opts["cert-oop54-cpp.WarnOnlyIfThisHasSuspiciousField"] = "false"; Opts["cert-str34-c.DiagnoseSignedUnsignedCharComparisons"] = "false"; return Options; diff --git a/clang-tools-extra/clang-tidy/cppcoreguidelines/OwningMemoryCheck.cpp b/clang-tools-extra/clang-tidy/cppcoreguidelines/OwningMemoryCheck.cpp index f58cc06987fe8..13bb7246fd43a 100644 --- a/clang-tools-extra/clang-tidy/cppcoreguidelines/OwningMemoryCheck.cpp +++ b/clang-tools-extra/clang-tidy/cppcoreguidelines/OwningMemoryCheck.cpp @@ -52,7 +52,7 @@ void OwningMemoryCheck::registerMatchers(MatchFinder *Finder) { callExpr(callee(functionDecl(LegacyCreatorFunctions))); // C-style functions like `::malloc()` sometimes create owners as void* // which is expected to be cast to the correct type in C++. This case - // must be catched explicitly. + // must be caught explicitly. const auto LegacyOwnerCast = castExpr(hasSourceExpression(CreatesLegacyOwner)); // Functions that do manual resource management but cannot be updated to use diff --git a/clang-tools-extra/clang-tidy/cppcoreguidelines/ProTypeMemberInitCheck.cpp b/clang-tools-extra/clang-tidy/cppcoreguidelines/ProTypeMemberInitCheck.cpp index aa9a0ca80458d..1540a451b46df 100644 --- a/clang-tools-extra/clang-tidy/cppcoreguidelines/ProTypeMemberInitCheck.cpp +++ b/clang-tools-extra/clang-tidy/cppcoreguidelines/ProTypeMemberInitCheck.cpp @@ -122,9 +122,9 @@ enum class InitializerPlacement { // insert into the initializer list of a constructor. We use this to ensure // proper absolute ordering according to the class declaration relative to the // (perhaps improper) ordering in the existing initializer list, if any. -struct IntializerInsertion { - IntializerInsertion(InitializerPlacement Placement, - const CXXCtorInitializer *Where) +struct InitializerInsertion { + InitializerInsertion(InitializerPlacement Placement, + const CXXCtorInitializer *Where) : Placement(Placement), Where(Where) {} SourceLocation getLocation(const ASTContext &Context, @@ -186,11 +186,11 @@ const RecordDecl *getCanonicalRecordDecl(const QualType &Type) { } template -SmallVector +SmallVector computeInsertions(const CXXConstructorDecl::init_const_range &Inits, const R &OrderedDecls, const SmallPtrSetImpl &DeclsToInit) { - SmallVector Insertions; + SmallVector Insertions; Insertions.emplace_back(InitializerPlacement::New, nullptr); typename R::const_iterator Decl = std::begin(OrderedDecls); diff --git a/clang-tools-extra/clang-tidy/cppcoreguidelines/SlicingCheck.cpp b/clang-tools-extra/clang-tidy/cppcoreguidelines/SlicingCheck.cpp index 93081133190f1..7cc3d749120cf 100644 --- a/clang-tools-extra/clang-tidy/cppcoreguidelines/SlicingCheck.cpp +++ b/clang-tools-extra/clang-tidy/cppcoreguidelines/SlicingCheck.cpp @@ -71,7 +71,7 @@ void SlicingCheck::registerMatchers(MatchFinder *Finder) { /// Warns on methods overridden in DerivedDecl with respect to BaseDecl. /// FIXME: this warns on all overrides outside of the sliced path in case of /// multiple inheritance. -void SlicingCheck::DiagnoseSlicedOverriddenMethods( +void SlicingCheck::diagnoseSlicedOverriddenMethods( const Expr &Call, const CXXRecordDecl &DerivedDecl, const CXXRecordDecl &BaseDecl) { if (DerivedDecl.getCanonicalDecl() == BaseDecl.getCanonicalDecl()) @@ -92,7 +92,7 @@ void SlicingCheck::DiagnoseSlicedOverriddenMethods( if (const auto *BaseRecordType = Base.getType()->getAs()) { if (const auto *BaseRecord = cast_or_null( BaseRecordType->getDecl()->getDefinition())) - DiagnoseSlicedOverriddenMethods(Call, *BaseRecord, BaseDecl); + diagnoseSlicedOverriddenMethods(Call, *BaseRecord, BaseDecl); } } } @@ -115,7 +115,7 @@ void SlicingCheck::check(const MatchFinder::MatchResult &Result) { // class A { virtual void f(); }; // class B : public A {}; // because in that case calling A::f is the same as calling B::f. - DiagnoseSlicedOverriddenMethods(*Call, *DerivedDecl, *BaseDecl); + diagnoseSlicedOverriddenMethods(*Call, *DerivedDecl, *BaseDecl); // Warn when slicing member variables. const auto &BaseLayout = diff --git a/clang-tools-extra/clang-tidy/cppcoreguidelines/SlicingCheck.h b/clang-tools-extra/clang-tidy/cppcoreguidelines/SlicingCheck.h index 002c724f86b8c..5cf0099676d65 100644 --- a/clang-tools-extra/clang-tidy/cppcoreguidelines/SlicingCheck.h +++ b/clang-tools-extra/clang-tidy/cppcoreguidelines/SlicingCheck.h @@ -32,7 +32,7 @@ class SlicingCheck : public ClangTidyCheck { void check(const ast_matchers::MatchFinder::MatchResult &Result) override; private: - void DiagnoseSlicedOverriddenMethods(const Expr &call, + void diagnoseSlicedOverriddenMethods(const Expr &Call, const CXXRecordDecl &DerivedDecl, const CXXRecordDecl &BaseDecl); }; diff --git a/clang-tools-extra/clang-tidy/cppcoreguidelines/SpecialMemberFunctionsCheck.h b/clang-tools-extra/clang-tidy/cppcoreguidelines/SpecialMemberFunctionsCheck.h index ada765df3c4c4..5409eb17525b5 100644 --- a/clang-tools-extra/clang-tidy/cppcoreguidelines/SpecialMemberFunctionsCheck.h +++ b/clang-tools-extra/clang-tidy/cppcoreguidelines/SpecialMemberFunctionsCheck.h @@ -77,7 +77,7 @@ class SpecialMemberFunctionsCheck : public ClangTidyCheck { } // namespace clang namespace llvm { -/// Specialisation of DenseMapInfo to allow ClassDefId objects in DenseMaps +/// Specialization of DenseMapInfo to allow ClassDefId objects in DenseMaps /// FIXME: Move this to the corresponding cpp file as is done for /// clang-tidy/readability/IdentifierNamingCheck.cpp. template <> diff --git a/clang-tools-extra/clang-tidy/fuchsia/MultipleInheritanceCheck.h b/clang-tools-extra/clang-tidy/fuchsia/MultipleInheritanceCheck.h index 801124cd5f675..dc5dd8b875217 100644 --- a/clang-tools-extra/clang-tidy/fuchsia/MultipleInheritanceCheck.h +++ b/clang-tools-extra/clang-tidy/fuchsia/MultipleInheritanceCheck.h @@ -32,8 +32,8 @@ class MultipleInheritanceCheck : public ClangTidyCheck { void onEndOfTranslationUnit() override { InterfaceMap.clear(); } private: - void addNodeToInterfaceMap(const CXXRecordDecl *Node, bool isInterface); - bool getInterfaceStatus(const CXXRecordDecl *Node, bool &isInterface) const; + void addNodeToInterfaceMap(const CXXRecordDecl *Node, bool IsInterface); + bool getInterfaceStatus(const CXXRecordDecl *Node, bool &IsInterface) const; bool isCurrentClassInterface(const CXXRecordDecl *Node) const; bool isInterface(const CXXRecordDecl *Node); diff --git a/clang-tools-extra/clang-tidy/llvm/HeaderGuardCheck.cpp b/clang-tools-extra/clang-tidy/llvm/HeaderGuardCheck.cpp index 3e4c39d941938..8f8bd7a77ceb3 100644 --- a/clang-tools-extra/clang-tidy/llvm/HeaderGuardCheck.cpp +++ b/clang-tools-extra/clang-tidy/llvm/HeaderGuardCheck.cpp @@ -35,7 +35,7 @@ std::string LLVMHeaderGuardCheck::getHeaderGuard(StringRef Filename, Guard = Guard.substr(PosToolsClang + std::strlen("tools/")); // Unlike LLVM svn, LLVM git monorepo is named llvm-project, so we replace - // "/llvm-project/" with the cannonical "/llvm/". + // "/llvm-project/" with the canonical "/llvm/". const static StringRef LLVMProject = "/llvm-project/"; size_t PosLLVMProject = Guard.rfind(std::string(LLVMProject)); if (PosLLVMProject != StringRef::npos) diff --git a/clang-tools-extra/clang-tidy/misc/NoRecursionCheck.cpp b/clang-tools-extra/clang-tidy/misc/NoRecursionCheck.cpp index 1200230c53edf..e818237954891 100644 --- a/clang-tools-extra/clang-tidy/misc/NoRecursionCheck.cpp +++ b/clang-tools-extra/clang-tidy/misc/NoRecursionCheck.cpp @@ -171,7 +171,7 @@ CallStackTy pathfindSomeCycle(ArrayRef SCC) { SmartSmallSetVector CallStackSet; - // Arbitrairly take the first element of SCC as entry point. + // Arbitrarily take the first element of SCC as entry point. CallGraphNode::CallRecord EntryNode(SCC.front(), /*CallExpr=*/nullptr); // Continue recursing into subsequent callees that are part of this SCC, // and are thus known to be part of the call graph loop, until loop forms. diff --git a/clang-tools-extra/clang-tidy/misc/RedundantExpressionCheck.cpp b/clang-tools-extra/clang-tidy/misc/RedundantExpressionCheck.cpp index 6a8436054b337..f8073bff5ea98 100644 --- a/clang-tools-extra/clang-tidy/misc/RedundantExpressionCheck.cpp +++ b/clang-tools-extra/clang-tidy/misc/RedundantExpressionCheck.cpp @@ -771,7 +771,7 @@ bool isTokAtEndOfExpr(SourceRange ExprSR, Token T, const SourceManager &SM) { return SM.getExpansionLoc(ExprSR.getEnd()) == T.getLocation(); } -/// Returns true if both LhsEpxr and RhsExpr are +/// Returns true if both LhsExpr and RhsExpr are /// macro expressions and they are expanded /// from different macros. static bool areExprsFromDifferentMacros(const Expr *LhsExpr, @@ -863,7 +863,7 @@ void RedundantExpressionCheck::registerMatchers(MatchFinder *Finder) { .bind("nested-duplicates"), this); - // Conditional (trenary) operator with equivalent operands, like (Y ? X : X). + // Conditional (ternary) operator with equivalent operands, like (Y ? X : X). Finder->addMatcher( traverse(TK_AsIs, conditionalOperator(expressionsAreEquivalent(), diff --git a/clang-tools-extra/clang-tidy/misc/ThrowByValueCatchByReferenceCheck.cpp b/clang-tools-extra/clang-tidy/misc/ThrowByValueCatchByReferenceCheck.cpp index a63598fe1129f..37a81da557ea6 100644 --- a/clang-tools-extra/clang-tidy/misc/ThrowByValueCatchByReferenceCheck.cpp +++ b/clang-tools-extra/clang-tidy/misc/ThrowByValueCatchByReferenceCheck.cpp @@ -47,43 +47,43 @@ void ThrowByValueCatchByReferenceCheck::check( } bool ThrowByValueCatchByReferenceCheck::isFunctionParameter( - const DeclRefExpr *declRefExpr) { - return isa(declRefExpr->getDecl()); + const DeclRefExpr *DeclRefExpr) { + return isa(DeclRefExpr->getDecl()); } bool ThrowByValueCatchByReferenceCheck::isCatchVariable( - const DeclRefExpr *declRefExpr) { - auto *valueDecl = declRefExpr->getDecl(); - if (auto *varDecl = dyn_cast(valueDecl)) - return varDecl->isExceptionVariable(); + const DeclRefExpr *DeclRefExpr) { + auto *ValueDecl = DeclRefExpr->getDecl(); + if (auto *VarDecl = dyn_cast(ValueDecl)) + return VarDecl->isExceptionVariable(); return false; } bool ThrowByValueCatchByReferenceCheck::isFunctionOrCatchVar( - const DeclRefExpr *declRefExpr) { - return isFunctionParameter(declRefExpr) || isCatchVariable(declRefExpr); + const DeclRefExpr *DeclRefExpr) { + return isFunctionParameter(DeclRefExpr) || isCatchVariable(DeclRefExpr); } void ThrowByValueCatchByReferenceCheck::diagnoseThrowLocations( - const CXXThrowExpr *throwExpr) { - if (!throwExpr) + const CXXThrowExpr *ThrowExpr) { + if (!ThrowExpr) return; - auto *subExpr = throwExpr->getSubExpr(); - if (!subExpr) + auto *SubExpr = ThrowExpr->getSubExpr(); + if (!SubExpr) return; - auto qualType = subExpr->getType(); - if (qualType->isPointerType()) { + auto QualType = SubExpr->getType(); + if (QualType->isPointerType()) { // The code is throwing a pointer. - // In case it is strng literal, it is safe and we return. - auto *inner = subExpr->IgnoreParenImpCasts(); - if (isa(inner)) + // In case it is string literal, it is safe and we return. + auto *Inner = SubExpr->IgnoreParenImpCasts(); + if (isa(Inner)) return; // If it's a variable from a catch statement, we return as well. - auto *declRef = dyn_cast(inner); - if (declRef && isCatchVariable(declRef)) { + auto *DeclRef = dyn_cast(Inner); + if (DeclRef && isCatchVariable(DeclRef)) { return; } - diag(subExpr->getBeginLoc(), "throw expression throws a pointer; it should " + diag(SubExpr->getBeginLoc(), "throw expression throws a pointer; it should " "throw a non-pointer value instead"); } // If the throw statement does not throw by pointer then it throws by value @@ -100,61 +100,62 @@ void ThrowByValueCatchByReferenceCheck::diagnoseThrowLocations( // When encountering a CopyOrMoveConstructor: emit message if after casts, // the expression is a LValue if (CheckAnonymousTemporaries) { - bool emit = false; - auto *currentSubExpr = subExpr->IgnoreImpCasts(); - const auto *variableReference = dyn_cast(currentSubExpr); - const auto *constructorCall = dyn_cast(currentSubExpr); + bool Emit = false; + auto *CurrentSubExpr = SubExpr->IgnoreImpCasts(); + const auto *VariableReference = dyn_cast(CurrentSubExpr); + const auto *ConstructorCall = dyn_cast(CurrentSubExpr); // If we have a DeclRefExpr, we flag for emitting a diagnosis message in // case the referenced variable is neither a function parameter nor a // variable declared in the catch statement. - if (variableReference) - emit = !isFunctionOrCatchVar(variableReference); - else if (constructorCall && - constructorCall->getConstructor()->isCopyOrMoveConstructor()) { + if (VariableReference) + Emit = !isFunctionOrCatchVar(VariableReference); + else if (ConstructorCall && + ConstructorCall->getConstructor()->isCopyOrMoveConstructor()) { // If we have a copy / move construction, we emit a diagnosis message if // the object that we copy construct from is neither a function parameter // nor a variable declared in a catch statement - auto argIter = - constructorCall + auto ArgIter = + ConstructorCall ->arg_begin(); // there's only one for copy constructors - auto *currentSubExpr = (*argIter)->IgnoreImpCasts(); - if (currentSubExpr->isLValue()) { - if (auto *tmp = dyn_cast(currentSubExpr)) - emit = !isFunctionOrCatchVar(tmp); - else if (isa(currentSubExpr)) - emit = true; + auto *CurrentSubExpr = (*ArgIter)->IgnoreImpCasts(); + if (CurrentSubExpr->isLValue()) { + if (auto *Tmp = dyn_cast(CurrentSubExpr)) + Emit = !isFunctionOrCatchVar(Tmp); + else if (isa(CurrentSubExpr)) + Emit = true; } } - if (emit) - diag(subExpr->getBeginLoc(), + if (Emit) + diag(SubExpr->getBeginLoc(), "throw expression should throw anonymous temporary values instead"); } } void ThrowByValueCatchByReferenceCheck::diagnoseCatchLocations( - const CXXCatchStmt *catchStmt, ASTContext &context) { - if (!catchStmt) + const CXXCatchStmt *CatchStmt, ASTContext &Context) { + if (!CatchStmt) return; - auto caughtType = catchStmt->getCaughtType(); - if (caughtType.isNull()) + auto CaughtType = CatchStmt->getCaughtType(); + if (CaughtType.isNull()) return; - auto *varDecl = catchStmt->getExceptionDecl(); - if (const auto *PT = caughtType.getCanonicalType()->getAs()) { - const char *diagMsgCatchReference = "catch handler catches a pointer value; " - "should throw a non-pointer value and " - "catch by reference instead"; + auto *VarDecl = CatchStmt->getExceptionDecl(); + if (const auto *PT = CaughtType.getCanonicalType()->getAs()) { + const char *DiagMsgCatchReference = + "catch handler catches a pointer value; " + "should throw a non-pointer value and " + "catch by reference instead"; // We do not diagnose when catching pointer to strings since we also allow // throwing string literals. if (!PT->getPointeeType()->isAnyCharacterType()) - diag(varDecl->getBeginLoc(), diagMsgCatchReference); - } else if (!caughtType->isReferenceType()) { - const char *diagMsgCatchReference = "catch handler catches by value; " + diag(VarDecl->getBeginLoc(), DiagMsgCatchReference); + } else if (!CaughtType->isReferenceType()) { + const char *DiagMsgCatchReference = "catch handler catches by value; " "should catch by reference instead"; // If it's not a pointer and not a reference then it must be caught "by // value". In this case we should emit a diagnosis message unless the type // is trivial. - if (!caughtType.isTrivialType(context)) { - diag(varDecl->getBeginLoc(), diagMsgCatchReference); + if (!CaughtType.isTrivialType(Context)) { + diag(VarDecl->getBeginLoc(), DiagMsgCatchReference); } else if (WarnOnLargeObject) { // If the type is trivial, then catching it by reference is not dangerous. // However, catching large objects by value decreases the performance. @@ -162,9 +163,9 @@ void ThrowByValueCatchByReferenceCheck::diagnoseCatchLocations( // We can now access `ASTContext` so if `MaxSize` is an extremal value // then set it to the size of `size_t`. if (MaxSize == std::numeric_limits::max()) - MaxSize = context.getTypeSize(context.getSizeType()); - if (context.getTypeSize(caughtType) > MaxSize) - diag(varDecl->getBeginLoc(), diagMsgCatchReference); + MaxSize = Context.getTypeSize(Context.getSizeType()); + if (Context.getTypeSize(CaughtType) > MaxSize) + diag(VarDecl->getBeginLoc(), DiagMsgCatchReference); } } } diff --git a/clang-tools-extra/clang-tidy/misc/ThrowByValueCatchByReferenceCheck.h b/clang-tools-extra/clang-tidy/misc/ThrowByValueCatchByReferenceCheck.h index 3018fda6a389b..de26b6ab5e0ad 100644 --- a/clang-tools-extra/clang-tidy/misc/ThrowByValueCatchByReferenceCheck.h +++ b/clang-tools-extra/clang-tidy/misc/ThrowByValueCatchByReferenceCheck.h @@ -37,12 +37,12 @@ class ThrowByValueCatchByReferenceCheck : public ClangTidyCheck { void check(const ast_matchers::MatchFinder::MatchResult &Result) override; private: - void diagnoseThrowLocations(const CXXThrowExpr *throwExpr); - void diagnoseCatchLocations(const CXXCatchStmt *catchStmt, - ASTContext &context); - bool isFunctionParameter(const DeclRefExpr *declRefExpr); - bool isCatchVariable(const DeclRefExpr *declRefExpr); - bool isFunctionOrCatchVar(const DeclRefExpr *declRefExpr); + void diagnoseThrowLocations(const CXXThrowExpr *ThrowExpr); + void diagnoseCatchLocations(const CXXCatchStmt *CatchStmt, + ASTContext &Context); + bool isFunctionParameter(const DeclRefExpr *DeclRefExpr); + bool isCatchVariable(const DeclRefExpr *DeclRefExpr); + bool isFunctionOrCatchVar(const DeclRefExpr *DeclRefExpr); const bool CheckAnonymousTemporaries; const bool WarnOnLargeObject; const uint64_t MaxSizeOptions; // The raw value read from the options. diff --git a/clang-tools-extra/clang-tidy/modernize/AvoidBindCheck.cpp b/clang-tools-extra/clang-tidy/modernize/AvoidBindCheck.cpp index 63f9f066f1943..960eb7b3d6707 100644 --- a/clang-tools-extra/clang-tidy/modernize/AvoidBindCheck.cpp +++ b/clang-tools-extra/clang-tidy/modernize/AvoidBindCheck.cpp @@ -334,7 +334,7 @@ static void addPlaceholderArgs(const LambdaProperties &LP, ArrayRef Args = LP.BindArguments; - auto MaxPlaceholderIt = + const auto *MaxPlaceholderIt = std::max_element(Args.begin(), Args.end(), [](const BindArgument &B1, const BindArgument &B2) { return B1.PlaceHolderIndex < B2.PlaceHolderIndex; diff --git a/clang-tools-extra/clang-tidy/modernize/PassByValueCheck.cpp b/clang-tools-extra/clang-tidy/modernize/PassByValueCheck.cpp index 5acbe99714169..3caaa3c876ab3 100644 --- a/clang-tools-extra/clang-tidy/modernize/PassByValueCheck.cpp +++ b/clang-tools-extra/clang-tidy/modernize/PassByValueCheck.cpp @@ -182,7 +182,7 @@ void PassByValueCheck::check(const MatchFinder::MatchResult &Result) { if (!paramReferredExactlyOnce(Ctor, ParamDecl)) return; - // If the parameter is trivial to copy, don't move it. Moving a trivivally + // If the parameter is trivial to copy, don't move it. Moving a trivially // copyable type will cause a problem with performance-move-const-arg if (ParamDecl->getType().getNonReferenceType().isTriviallyCopyableType( *Result.Context)) diff --git a/clang-tools-extra/clang-tidy/modernize/ReplaceAutoPtrCheck.h b/clang-tools-extra/clang-tidy/modernize/ReplaceAutoPtrCheck.h index 8288c7e47d35e..892723b9aaa4d 100644 --- a/clang-tools-extra/clang-tidy/modernize/ReplaceAutoPtrCheck.h +++ b/clang-tools-extra/clang-tidy/modernize/ReplaceAutoPtrCheck.h @@ -24,7 +24,7 @@ namespace modernize { /// operations do not 'copy' the resource but they 'steal' it. /// `std::unique_ptr` uses move semantics instead, which makes the intent of /// transferring the resource explicit. This difference between the two smart -/// pointers requeres to wrap the copy-ctor and assign-operator with +/// pointers requires wrapping the copy-ctor and assign-operator with /// `std::move()`. /// /// For example, given: diff --git a/clang-tools-extra/clang-tidy/modernize/ReplaceDisallowCopyAndAssignMacroCheck.cpp b/clang-tools-extra/clang-tidy/modernize/ReplaceDisallowCopyAndAssignMacroCheck.cpp index 9752fd0a9e955..438fd4882b7fc 100644 --- a/clang-tools-extra/clang-tidy/modernize/ReplaceDisallowCopyAndAssignMacroCheck.cpp +++ b/clang-tools-extra/clang-tidy/modernize/ReplaceDisallowCopyAndAssignMacroCheck.cpp @@ -32,7 +32,7 @@ class ReplaceDisallowCopyAndAssignMacroCallbacks : public PPCallbacks { return; if (Info->getName() != Check.getMacroName()) return; - // The first argument to the DISALLOW_COPY_AND_ASSIGN macro is exptected to + // The first argument to the DISALLOW_COPY_AND_ASSIGN macro is expected to // be the class name. const Token *ClassNameTok = Args->getUnexpArgument(0); if (Args->ArgNeedsPreexpansion(ClassNameTok, PP)) diff --git a/clang-tools-extra/clang-tidy/modernize/UseNullptrCheck.h b/clang-tools-extra/clang-tidy/modernize/UseNullptrCheck.h index 143cba4da7d62..0c7f73ff85376 100644 --- a/clang-tools-extra/clang-tidy/modernize/UseNullptrCheck.h +++ b/clang-tools-extra/clang-tidy/modernize/UseNullptrCheck.h @@ -19,7 +19,7 @@ class UseNullptrCheck : public ClangTidyCheck { public: UseNullptrCheck(StringRef Name, ClangTidyContext *Context); bool isLanguageVersionSupported(const LangOptions &LangOpts) const override { - // FIXME this should be CPlusCplus11 but that causes test cases to + // FIXME this should be CPlusPlus11 but that causes test cases to // erroneously fail. return LangOpts.CPlusPlus; } diff --git a/clang-tools-extra/clang-tidy/modernize/UseOverrideCheck.cpp b/clang-tools-extra/clang-tidy/modernize/UseOverrideCheck.cpp index 510e17f58c811..63c6f6b8e7aa5 100644 --- a/clang-tools-extra/clang-tidy/modernize/UseOverrideCheck.cpp +++ b/clang-tools-extra/clang-tidy/modernize/UseOverrideCheck.cpp @@ -172,7 +172,7 @@ void UseOverrideCheck::check(const MatchFinder::MatchResult &Result) { // same line as the declaration if the beginning brace for the start of // the body falls on the next line. ReplacementText = " " + OverrideSpelling; - auto LastTokenIter = std::prev(Tokens.end()); + auto *LastTokenIter = std::prev(Tokens.end()); // When try statement is used instead of compound statement as // method body - insert override keyword before it. if (LastTokenIter->is(tok::kw_try)) diff --git a/clang-tools-extra/clang-tidy/modernize/UseTrailingReturnTypeCheck.cpp b/clang-tools-extra/clang-tidy/modernize/UseTrailingReturnTypeCheck.cpp index 72d918822270b..d8d9e28d598d1 100644 --- a/clang-tools-extra/clang-tidy/modernize/UseTrailingReturnTypeCheck.cpp +++ b/clang-tools-extra/clang-tidy/modernize/UseTrailingReturnTypeCheck.cpp @@ -78,7 +78,7 @@ struct UnqualNameVisitor : public RecursiveASTVisitor { return RecursiveASTVisitor::TraverseTypeLoc(TL); } - // Replace the base method in order to call ower own + // Replace the base method in order to call our own // TraverseTypeLoc(). bool TraverseQualifiedTypeLoc(QualifiedTypeLoc TL) { return TraverseTypeLoc(TL.getUnqualifiedLoc()); @@ -172,8 +172,8 @@ static llvm::Optional classifyToken(const FunctionDecl &F, Preprocessor &PP, Token Tok) { ClassifiedToken CT; CT.T = Tok; - CT.isQualifier = true; - CT.isSpecifier = true; + CT.IsQualifier = true; + CT.IsSpecifier = true; bool ContainsQualifiers = false; bool ContainsSpecifiers = false; bool ContainsSomethingElse = false; @@ -193,8 +193,8 @@ classifyToken(const FunctionDecl &F, Preprocessor &PP, Token Tok) { bool Qual = isCvr(T); bool Spec = isSpecifier(T); - CT.isQualifier &= Qual; - CT.isSpecifier &= Spec; + CT.IsQualifier &= Qual; + CT.IsSpecifier &= Spec; ContainsQualifiers |= Qual; ContainsSpecifiers |= Spec; ContainsSomethingElse |= !Qual && !Spec; @@ -329,7 +329,7 @@ SourceRange UseTrailingReturnTypeCheck::findReturnTypeAndCVSourceRange( !ExtendedLeft) { assert(I <= size_t(std::numeric_limits::max()) && "Integer overflow detected"); - for (int J = static_cast(I) - 1; J >= 0 && Tokens[J].isQualifier; + for (int J = static_cast(I) - 1; J >= 0 && Tokens[J].IsQualifier; J--) ReturnTypeRange.setBegin(Tokens[J].T.getLocation()); ExtendedLeft = true; @@ -337,7 +337,7 @@ SourceRange UseTrailingReturnTypeCheck::findReturnTypeAndCVSourceRange( // If we found the end of the return type, include right qualifiers. if (SM.isBeforeInTranslationUnit(ReturnTypeRange.getEnd(), Tokens[I].T.getLocation())) { - for (size_t J = I; J < Tokens.size() && Tokens[J].isQualifier; J++) + for (size_t J = I; J < Tokens.size() && Tokens[J].IsQualifier; J++) ReturnTypeRange.setEnd(Tokens[J].T.getLocation()); break; } @@ -380,7 +380,7 @@ void UseTrailingReturnTypeCheck::keepSpecifiers( SM.isBeforeInTranslationUnit(ReturnTypeCVRange.getEnd(), CT.T.getLocation())) continue; - if (!CT.isSpecifier) + if (!CT.IsSpecifier) continue; // Add the token to 'auto' and remove it from the return type, including diff --git a/clang-tools-extra/clang-tidy/modernize/UseTrailingReturnTypeCheck.h b/clang-tools-extra/clang-tidy/modernize/UseTrailingReturnTypeCheck.h index d72e8eb04c9d0..dd32e4383329f 100644 --- a/clang-tools-extra/clang-tidy/modernize/UseTrailingReturnTypeCheck.h +++ b/clang-tools-extra/clang-tidy/modernize/UseTrailingReturnTypeCheck.h @@ -18,8 +18,8 @@ namespace modernize { struct ClassifiedToken { Token T; - bool isQualifier; - bool isSpecifier; + bool IsQualifier; + bool IsSpecifier; }; /// Rewrites function signatures to use a trailing return type. diff --git a/clang-tools-extra/clang-tidy/performance/ImplicitConversionInLoopCheck.cpp b/clang-tools-extra/clang-tidy/performance/ImplicitConversionInLoopCheck.cpp index 738e376744e36..61e8366b007a0 100644 --- a/clang-tools-extra/clang-tidy/performance/ImplicitConversionInLoopCheck.cpp +++ b/clang-tools-extra/clang-tidy/performance/ImplicitConversionInLoopCheck.cpp @@ -21,7 +21,7 @@ namespace tidy { namespace performance { // Checks if the stmt is a ImplicitCastExpr with a CastKind that is not a NoOp. -// The subtelty is that in some cases (user defined conversions), we can +// The subtlety is that in some cases (user defined conversions), we can // get to ImplicitCastExpr inside each other, with the outer one a NoOp. In this // case we skip the first cast expr. static bool isNonTrivialImplicitCast(const Stmt *ST) { @@ -82,12 +82,12 @@ void ImplicitConversionInLoopCheck::check( // is a reference. This situation is fine (it probably produces the same // code at the end). if (isNonTrivialImplicitCast(Materialized->getSubExpr())) - ReportAndFix(Result.Context, VD, OperatorCall); + reportAndFix(Result.Context, VD, OperatorCall); } -void ImplicitConversionInLoopCheck::ReportAndFix( - const ASTContext *Context, const VarDecl *VD, - const Expr *OperatorCall) { +void ImplicitConversionInLoopCheck::reportAndFix(const ASTContext *Context, + const VarDecl *VD, + const Expr *OperatorCall) { // We only match on const ref, so we should print a const ref version of the // type. QualType ConstType = OperatorCall->getType().withConst(); diff --git a/clang-tools-extra/clang-tidy/performance/ImplicitConversionInLoopCheck.h b/clang-tools-extra/clang-tidy/performance/ImplicitConversionInLoopCheck.h index 4b271366f0f66..b95d75e98b3af 100644 --- a/clang-tools-extra/clang-tidy/performance/ImplicitConversionInLoopCheck.h +++ b/clang-tools-extra/clang-tidy/performance/ImplicitConversionInLoopCheck.h @@ -29,7 +29,7 @@ class ImplicitConversionInLoopCheck : public ClangTidyCheck { void check(const ast_matchers::MatchFinder::MatchResult &Result) override; private: - void ReportAndFix(const ASTContext *Context, const VarDecl *VD, + void reportAndFix(const ASTContext *Context, const VarDecl *VD, const Expr *OperatorCall); }; diff --git a/clang-tools-extra/clang-tidy/performance/InefficientVectorOperationCheck.cpp b/clang-tools-extra/clang-tidy/performance/InefficientVectorOperationCheck.cpp index f7c5112d22eab..a12d403d6ba09 100644 --- a/clang-tools-extra/clang-tidy/performance/InefficientVectorOperationCheck.cpp +++ b/clang-tools-extra/clang-tidy/performance/InefficientVectorOperationCheck.cpp @@ -84,7 +84,7 @@ void InefficientVectorOperationCheck::storeOptions( Options.store(Opts, "EnableProto", EnableProto); } -void InefficientVectorOperationCheck::AddMatcher( +void InefficientVectorOperationCheck::addMatcher( const DeclarationMatcher &TargetRecordDecl, StringRef VarDeclName, StringRef VarDeclStmtName, const DeclarationMatcher &AppendMethodDecl, StringRef AppendCallName, MatchFinder *Finder) { @@ -156,7 +156,7 @@ void InefficientVectorOperationCheck::registerMatchers(MatchFinder *Finder) { VectorLikeClasses.begin(), VectorLikeClasses.end()))); const auto AppendMethodDecl = cxxMethodDecl(hasAnyName("push_back", "emplace_back")); - AddMatcher(VectorDecl, VectorVarDeclName, VectorVarDeclStmtName, + addMatcher(VectorDecl, VectorVarDeclName, VectorVarDeclStmtName, AppendMethodDecl, PushBackOrEmplaceBackCallName, Finder); if (EnableProto) { @@ -168,7 +168,7 @@ void InefficientVectorOperationCheck::registerMatchers(MatchFinder *Finder) { // with "add_". So we exclude const methods. const auto AddFieldMethodDecl = cxxMethodDecl(matchesName("::add_"), unless(isConst())); - AddMatcher(ProtoDecl, ProtoVarDeclName, ProtoVarDeclStmtName, + addMatcher(ProtoDecl, ProtoVarDeclName, ProtoVarDeclStmtName, AddFieldMethodDecl, ProtoAddFieldCallName, Finder); } } diff --git a/clang-tools-extra/clang-tidy/performance/InefficientVectorOperationCheck.h b/clang-tools-extra/clang-tidy/performance/InefficientVectorOperationCheck.h index 533b30dc32b0e..cebe199a5acda 100644 --- a/clang-tools-extra/clang-tidy/performance/InefficientVectorOperationCheck.h +++ b/clang-tools-extra/clang-tidy/performance/InefficientVectorOperationCheck.h @@ -34,7 +34,7 @@ class InefficientVectorOperationCheck : public ClangTidyCheck { void storeOptions(ClangTidyOptions::OptionMap &Opts) override; private: - void AddMatcher(const ast_matchers::DeclarationMatcher &TargetRecordDecl, + void addMatcher(const ast_matchers::DeclarationMatcher &TargetRecordDecl, StringRef VarDeclName, StringRef VarDeclStmtName, const ast_matchers::DeclarationMatcher &AppendMethodDecl, StringRef AppendCallName, ast_matchers::MatchFinder *Finder); diff --git a/clang-tools-extra/clang-tidy/plugin/ClangTidyPlugin.cpp b/clang-tools-extra/clang-tidy/plugin/ClangTidyPlugin.cpp index b8367d6384f22..01402f1d5f8e4 100644 --- a/clang-tools-extra/clang-tidy/plugin/ClangTidyPlugin.cpp +++ b/clang-tools-extra/clang-tidy/plugin/ClangTidyPlugin.cpp @@ -48,7 +48,7 @@ class ClangTidyPluginAction : public PluginASTAction { // Create the AST consumer. ClangTidyASTConsumerFactory Factory(*Context); std::vector> Vec; - Vec.push_back(Factory.CreateASTConsumer(Compiler, File)); + Vec.push_back(Factory.createASTConsumer(Compiler, File)); return std::make_unique( std::move(Context), std::move(DiagEngine), std::move(Vec)); diff --git a/clang-tools-extra/clang-tidy/readability/ConstReturnTypeCheck.h b/clang-tools-extra/clang-tidy/readability/ConstReturnTypeCheck.h index 3a5fb08ee93b6..3f65e60487240 100644 --- a/clang-tools-extra/clang-tidy/readability/ConstReturnTypeCheck.h +++ b/clang-tools-extra/clang-tidy/readability/ConstReturnTypeCheck.h @@ -23,8 +23,8 @@ namespace readability { class ConstReturnTypeCheck : public ClangTidyCheck { public: using ClangTidyCheck::ClangTidyCheck; - void registerMatchers(ast_matchers::MatchFinder* finder) override; - void check(const ast_matchers::MatchFinder::MatchResult& result) override; + void registerMatchers(ast_matchers::MatchFinder *Finder) override; + void check(const ast_matchers::MatchFinder::MatchResult &Result) override; }; } // namespace readability diff --git a/clang-tools-extra/clang-tidy/readability/ContainerDataPointerCheck.h b/clang-tools-extra/clang-tidy/readability/ContainerDataPointerCheck.h index 0f0f8233f5d84..e11bd627614f9 100644 --- a/clang-tools-extra/clang-tidy/readability/ContainerDataPointerCheck.h +++ b/clang-tools-extra/clang-tidy/readability/ContainerDataPointerCheck.h @@ -20,8 +20,8 @@ namespace readability { /// This only replaces the case where the offset being accessed through the /// subscript operation is a known constant 0. This avoids a potential invalid /// memory access when the container is empty. Cases where the constant is not -/// explictly zero can be addressed through the clang static analyzer, and those -/// which cannot be statically identified can be caught using UBSan. +/// explicitly zero can be addressed through the clang static analyzer, and +/// those which cannot be statically identified can be caught using UBSan. class ContainerDataPointerCheck : public ClangTidyCheck { public: ContainerDataPointerCheck(StringRef Name, ClangTidyContext *Context); diff --git a/clang-tools-extra/clang-tidy/readability/ContainerSizeEmptyCheck.cpp b/clang-tools-extra/clang-tidy/readability/ContainerSizeEmptyCheck.cpp index a1d8064d23a0f..548fed9a47c33 100644 --- a/clang-tools-extra/clang-tidy/readability/ContainerSizeEmptyCheck.cpp +++ b/clang-tools-extra/clang-tidy/readability/ContainerSizeEmptyCheck.cpp @@ -90,7 +90,7 @@ AST_MATCHER(CXXConstructExpr, isDefaultConstruction) { namespace tidy { namespace readability { -using utils::IsBinaryOrTernary; +using utils::isBinaryOrTernary; ContainerSizeEmptyCheck::ContainerSizeEmptyCheck(StringRef Name, ClangTidyContext *Context) @@ -191,7 +191,7 @@ void ContainerSizeEmptyCheck::check(const MatchFinder::MatchResult &Result) { std::string ReplacementText = std::string( Lexer::getSourceText(CharSourceRange::getTokenRange(E->getSourceRange()), *Result.SourceManager, getLangOpts())); - if (IsBinaryOrTernary(E) || isa(E)) { + if (isBinaryOrTernary(E) || isa(E)) { ReplacementText = "(" + ReplacementText + ")"; } if (E->getType()->isPointerType()) diff --git a/clang-tools-extra/clang-tidy/readability/FunctionCognitiveComplexityCheck.cpp b/clang-tools-extra/clang-tidy/readability/FunctionCognitiveComplexityCheck.cpp index 9f53a5578b0ac..c27733c040833 100644 --- a/clang-tools-extra/clang-tidy/readability/FunctionCognitiveComplexityCheck.cpp +++ b/clang-tools-extra/clang-tidy/readability/FunctionCognitiveComplexityCheck.cpp @@ -86,7 +86,7 @@ struct CognitiveComplexity final { }; // The helper struct used to record one increment occurrence, with all the - // details nessesary. + // details necessary. struct Detail { const SourceLocation Loc; // What caused the increment? const unsigned short Nesting; // How deeply nested is Loc located? diff --git a/clang-tools-extra/clang-tidy/readability/IdentifierNamingCheck.cpp b/clang-tools-extra/clang-tidy/readability/IdentifierNamingCheck.cpp index 0ff0708adc4a5..d275b475f97c0 100644 --- a/clang-tools-extra/clang-tidy/readability/IdentifierNamingCheck.cpp +++ b/clang-tools-extra/clang-tidy/readability/IdentifierNamingCheck.cpp @@ -1381,7 +1381,7 @@ IdentifierNamingCheck::getFailureInfo( } llvm::Optional -IdentifierNamingCheck::GetDeclFailureInfo(const NamedDecl *Decl, +IdentifierNamingCheck::getDeclFailureInfo(const NamedDecl *Decl, const SourceManager &SM) const { SourceLocation Loc = Decl->getLocation(); const FileStyle &FileStyle = getStyleForFile(SM.getFilename(Loc)); @@ -1397,7 +1397,7 @@ IdentifierNamingCheck::GetDeclFailureInfo(const NamedDecl *Decl, } llvm::Optional -IdentifierNamingCheck::GetMacroFailureInfo(const Token &MacroNameTok, +IdentifierNamingCheck::getMacroFailureInfo(const Token &MacroNameTok, const SourceManager &SM) const { SourceLocation Loc = MacroNameTok.getLocation(); const FileStyle &Style = getStyleForFile(SM.getFilename(Loc)); @@ -1410,7 +1410,7 @@ IdentifierNamingCheck::GetMacroFailureInfo(const Token &MacroNameTok, } RenamerClangTidyCheck::DiagInfo -IdentifierNamingCheck::GetDiagInfo(const NamingCheckId &ID, +IdentifierNamingCheck::getDiagInfo(const NamingCheckId &ID, const NamingCheckFailure &Failure) const { return DiagInfo{"invalid case style for %0 '%1'", [&](DiagnosticBuilder &Diag) { diff --git a/clang-tools-extra/clang-tidy/readability/IdentifierNamingCheck.h b/clang-tools-extra/clang-tidy/readability/IdentifierNamingCheck.h index da303d785b8a4..a1621961986e7 100644 --- a/clang-tools-extra/clang-tidy/readability/IdentifierNamingCheck.h +++ b/clang-tools-extra/clang-tidy/readability/IdentifierNamingCheck.h @@ -186,12 +186,12 @@ class IdentifierNamingCheck final : public RenamerClangTidyCheck { private: llvm::Optional - GetDeclFailureInfo(const NamedDecl *Decl, + getDeclFailureInfo(const NamedDecl *Decl, const SourceManager &SM) const override; llvm::Optional - GetMacroFailureInfo(const Token &MacroNameTok, + getMacroFailureInfo(const Token &MacroNameTok, const SourceManager &SM) const override; - DiagInfo GetDiagInfo(const NamingCheckId &ID, + DiagInfo getDiagInfo(const NamingCheckId &ID, const NamingCheckFailure &Failure) const override; const FileStyle &getStyleForFile(StringRef FileName) const; diff --git a/clang-tools-extra/clang-tidy/readability/InconsistentDeclarationParameterNameCheck.cpp b/clang-tools-extra/clang-tidy/readability/InconsistentDeclarationParameterNameCheck.cpp index 1fcd1b76af852..2ef0150cccca4 100644 --- a/clang-tools-extra/clang-tidy/readability/InconsistentDeclarationParameterNameCheck.cpp +++ b/clang-tools-extra/clang-tidy/readability/InconsistentDeclarationParameterNameCheck.cpp @@ -104,8 +104,8 @@ findDifferingParamsInDeclaration(const FunctionDecl *ParameterSourceDeclaration, bool Strict) { DifferingParamsContainer DifferingParams; - auto SourceParamIt = ParameterSourceDeclaration->param_begin(); - auto OtherParamIt = OtherDeclaration->param_begin(); + const auto *SourceParamIt = ParameterSourceDeclaration->param_begin(); + const auto *OtherParamIt = OtherDeclaration->param_begin(); while (SourceParamIt != ParameterSourceDeclaration->param_end() && OtherParamIt != OtherDeclaration->param_end()) { diff --git a/clang-tools-extra/clang-tidy/readability/QualifiedAutoCheck.cpp b/clang-tools-extra/clang-tidy/readability/QualifiedAutoCheck.cpp index f2838cd0b9841..95380069f5274 100644 --- a/clang-tools-extra/clang-tidy/readability/QualifiedAutoCheck.cpp +++ b/clang-tools-extra/clang-tidy/readability/QualifiedAutoCheck.cpp @@ -226,7 +226,7 @@ void QualifiedAutoCheck::check(const MatchFinder::MatchResult &Result) { if (!isPointerConst(Var->getType())) return; // Pointer isn't const, no need to add const qualifier. if (!isAutoPointerConst(Var->getType())) - return; // Const isnt wrapped in the auto type, so must be declared + return; // Const isn't wrapped in the auto type, so must be declared // explicitly. if (Var->getType().isLocalConstQualified()) { @@ -267,7 +267,7 @@ void QualifiedAutoCheck::check(const MatchFinder::MatchResult &Result) { if (!isPointerConst(Var->getType())) return; // Pointer isn't const, no need to add const qualifier. if (!isAutoPointerConst(Var->getType())) - // Const isnt wrapped in the auto type, so must be declared explicitly. + // Const isn't wrapped in the auto type, so must be declared explicitly. return; if (llvm::Optional TypeSpec = diff --git a/clang-tools-extra/clang-tidy/tool/run-clang-tidy.py b/clang-tools-extra/clang-tidy/tool/run-clang-tidy.py index acd1ed6979c0d..e6cff6a7414d1 100755 --- a/clang-tools-extra/clang-tidy/tool/run-clang-tidy.py +++ b/clang-tools-extra/clang-tidy/tool/run-clang-tidy.py @@ -273,8 +273,8 @@ def main(): # Load the database and extract all files. database = json.load(open(os.path.join(build_path, db_path))) - files = [make_absolute(entry['file'], entry['directory']) - for entry in database] + files = set([make_absolute(entry['file'], entry['directory']) + for entry in database]) max_task = args.j if max_task == 0: diff --git a/clang-tools-extra/clang-tidy/utils/ASTUtils.cpp b/clang-tools-extra/clang-tidy/utils/ASTUtils.cpp index dae7503856410..b226e25221151 100644 --- a/clang-tools-extra/clang-tidy/utils/ASTUtils.cpp +++ b/clang-tools-extra/clang-tidy/utils/ASTUtils.cpp @@ -24,7 +24,7 @@ const FunctionDecl *getSurroundingFunction(ASTContext &Context, Statement, Context)); } -bool IsBinaryOrTernary(const Expr *E) { +bool isBinaryOrTernary(const Expr *E) { const Expr *EBase = E->IgnoreImpCasts(); if (isa(EBase) || isa(EBase)) { return true; diff --git a/clang-tools-extra/clang-tidy/utils/ASTUtils.h b/clang-tools-extra/clang-tidy/utils/ASTUtils.h index ad2a055f5fb97..b84808e09ec75 100644 --- a/clang-tools-extra/clang-tidy/utils/ASTUtils.h +++ b/clang-tools-extra/clang-tidy/utils/ASTUtils.h @@ -18,7 +18,7 @@ namespace utils { const FunctionDecl *getSurroundingFunction(ASTContext &Context, const Stmt &Statement); // Determine whether Expr is a Binary or Ternary expression. -bool IsBinaryOrTernary(const Expr *E); +bool isBinaryOrTernary(const Expr *E); /// Checks whether a macro flag is present in the given argument. Only considers /// cases of single match or match in a binary OR expression. For example, diff --git a/clang-tools-extra/clang-tidy/utils/DeclRefExprUtils.cpp b/clang-tools-extra/clang-tidy/utils/DeclRefExprUtils.cpp index 38122d5420ac2..2df0461486963 100644 --- a/clang-tools-extra/clang-tidy/utils/DeclRefExprUtils.cpp +++ b/clang-tools-extra/clang-tidy/utils/DeclRefExprUtils.cpp @@ -48,7 +48,7 @@ constReferenceDeclRefExprs(const VarDecl &VarDecl, const Stmt &Stmt, declRefExpr(to(varDecl(equalsNode(&VarDecl)))).bind("declRef"); auto ConstMethodCallee = callee(cxxMethodDecl(isConst())); // Match method call expressions where the variable is referenced as the this - // implicit object argument and opertor call expression for member operators + // implicit object argument and operator call expression for member operators // where the variable is the 0-th argument. auto Matches = match( findAll(expr(anyOf(cxxMemberCallExpr(ConstMethodCallee, on(DeclRefToVar)), diff --git a/clang-tools-extra/clang-tidy/utils/IncludeInserter.cpp b/clang-tools-extra/clang-tidy/utils/IncludeInserter.cpp index 5fc6020d4265b..6f3eed49831fd 100644 --- a/clang-tools-extra/clang-tidy/utils/IncludeInserter.cpp +++ b/clang-tools-extra/clang-tidy/utils/IncludeInserter.cpp @@ -19,7 +19,7 @@ class IncludeInserterCallback : public PPCallbacks { public: explicit IncludeInserterCallback(IncludeInserter *Inserter) : Inserter(Inserter) {} - // Implements PPCallbacks::InclusionDerective(). Records the names and source + // Implements PPCallbacks::InclusionDirective(). Records the names and source // locations of the inclusions in the main source file being processed. void InclusionDirective(SourceLocation HashLocation, const Token &IncludeToken, StringRef FileNameRef, @@ -76,7 +76,7 @@ IncludeInserter::createIncludeInsertion(FileID FileID, llvm::StringRef Header) { if (!InsertedHeaders[FileID].insert(Header).second) return llvm::None; - return getOrCreate(FileID).CreateIncludeInsertion(Header, IsAngled); + return getOrCreate(FileID).createIncludeInsertion(Header, IsAngled); } llvm::Optional @@ -92,7 +92,7 @@ void IncludeInserter::addInclude(StringRef FileName, bool IsAngled, assert(SourceMgr && "SourceMgr shouldn't be null; did you remember to call " "registerPreprocessor()?"); FileID FileID = SourceMgr->getFileID(HashLocation); - getOrCreate(FileID).AddInclude(FileName, IsAngled, HashLocation, EndLocation); + getOrCreate(FileID).addInclude(FileName, IsAngled, HashLocation, EndLocation); } } // namespace utils diff --git a/clang-tools-extra/clang-tidy/utils/IncludeSorter.cpp b/clang-tools-extra/clang-tidy/utils/IncludeSorter.cpp index de75148d127a7..fbc1dc6d52a0b 100644 --- a/clang-tools-extra/clang-tidy/utils/IncludeSorter.cpp +++ b/clang-tools-extra/clang-tidy/utils/IncludeSorter.cpp @@ -129,7 +129,7 @@ IncludeSorter::IncludeSorter(const SourceManager *SourceMgr, : SourceMgr(SourceMgr), Style(Style), CurrentFileID(FileID), CanonicalFile(makeCanonicalName(FileName, Style)) {} -void IncludeSorter::AddInclude(StringRef FileName, bool IsAngled, +void IncludeSorter::addInclude(StringRef FileName, bool IsAngled, SourceLocation HashLocation, SourceLocation EndLocation) { int Offset = findNextLine(SourceMgr->getCharacterData(EndLocation)); @@ -150,7 +150,7 @@ void IncludeSorter::AddInclude(StringRef FileName, bool IsAngled, IncludeBucket[Kind].push_back(FileName.str()); } -Optional IncludeSorter::CreateIncludeInsertion(StringRef FileName, +Optional IncludeSorter::createIncludeInsertion(StringRef FileName, bool IsAngled) { std::string IncludeStmt; if (Style == IncludeStyle::IS_Google_ObjC) { diff --git a/clang-tools-extra/clang-tidy/utils/IncludeSorter.h b/clang-tools-extra/clang-tidy/utils/IncludeSorter.h index a8cf18ca8625d..ecde60de4f9ef 100644 --- a/clang-tools-extra/clang-tidy/utils/IncludeSorter.h +++ b/clang-tools-extra/clang-tidy/utils/IncludeSorter.h @@ -41,12 +41,12 @@ class IncludeSorter { StringRef FileName, IncludeStyle Style); /// Adds the given include directive to the sorter. - void AddInclude(StringRef FileName, bool IsAngled, + void addInclude(StringRef FileName, bool IsAngled, SourceLocation HashLocation, SourceLocation EndLocation); /// Creates a quoted inclusion directive in the right sort order. Returns None /// on error or if header inclusion directive for header already exists. - Optional CreateIncludeInsertion(StringRef FileName, bool IsAngled); + Optional createIncludeInsertion(StringRef FileName, bool IsAngled); private: typedef SmallVector SourceRangeVector; diff --git a/clang-tools-extra/clang-tidy/utils/LexerUtils.cpp b/clang-tools-extra/clang-tidy/utils/LexerUtils.cpp index 88828f72e6bb4..f769db8840f79 100644 --- a/clang-tools-extra/clang-tidy/utils/LexerUtils.cpp +++ b/clang-tools-extra/clang-tidy/utils/LexerUtils.cpp @@ -178,7 +178,7 @@ static SourceLocation getSemicolonAfterStmtEndLoc(const SourceLocation &EndLoc, // All macro issues are simply resolved by ensuring it's a semicolon. if (NextTok && NextTok->is(tok::TokenKind::semi)) { // Ideally this would return `F` with spelling location `;` (NextTok) - // following the examle above. For now simply return NextTok location. + // following the example above. For now simply return NextTok location. return NextTok->getLocation(); } diff --git a/clang-tools-extra/clang-tidy/utils/RenamerClangTidyCheck.cpp b/clang-tools-extra/clang-tidy/utils/RenamerClangTidyCheck.cpp index d719f847f50d8..a0dcca7f9973c 100644 --- a/clang-tools-extra/clang-tidy/utils/RenamerClangTidyCheck.cpp +++ b/clang-tools-extra/clang-tidy/utils/RenamerClangTidyCheck.cpp @@ -23,7 +23,7 @@ using namespace clang::ast_matchers; namespace llvm { -/// Specialisation of DenseMapInfo to allow NamingCheckId objects in DenseMaps +/// Specialization of DenseMapInfo to allow NamingCheckId objects in DenseMaps template <> struct DenseMapInfo { using NamingCheckId = clang::tidy::RenamerClangTidyCheck::NamingCheckId; @@ -176,7 +176,7 @@ void RenamerClangTidyCheck::addUsage( if (!Failure.RawUsageLocs.insert(FixLocation).second) return; - if (!Failure.ShouldFix()) + if (!Failure.shouldFix()) return; if (SourceMgr && SourceMgr->isWrittenInScratchSpace(FixLocation)) @@ -265,7 +265,7 @@ NameLookup findDeclInBases(const CXXRecordDecl &Parent, StringRef DeclName, } else return NameLookup(llvm::None); // Propagate multiple resolution back up. } - return NameLookup(Found); // If nullptr, decl wasnt found. + return NameLookup(Found); // If nullptr, decl wasn't found. } void RenamerClangTidyCheck::check(const MatchFinder::MatchResult &Result) { @@ -446,7 +446,7 @@ void RenamerClangTidyCheck::check(const MatchFinder::MatchResult &Result) { return; Optional MaybeFailure = - GetDeclFailureInfo(Decl, *Result.SourceManager); + getDeclFailureInfo(Decl, *Result.SourceManager); if (!MaybeFailure) return; FailureInfo &Info = *MaybeFailure; @@ -477,7 +477,7 @@ void RenamerClangTidyCheck::checkMacro(SourceManager &SourceMgr, const Token &MacroNameTok, const MacroInfo *MI) { Optional MaybeFailure = - GetMacroFailureInfo(MacroNameTok, SourceMgr); + getMacroFailureInfo(MacroNameTok, SourceMgr); if (!MaybeFailure) return; FailureInfo &Info = *MaybeFailure; @@ -532,14 +532,14 @@ void RenamerClangTidyCheck::onEndOfTranslationUnit() { if (Failure.Info.KindName.empty()) continue; - if (Failure.ShouldNotify()) { - auto DiagInfo = GetDiagInfo(Decl, Failure); + if (Failure.shouldNotify()) { + auto DiagInfo = getDiagInfo(Decl, Failure); auto Diag = diag(Decl.first, DiagInfo.Text + getDiagnosticSuffix(Failure.FixStatus, Failure.Info.Fixup)); DiagInfo.ApplyArgs(Diag); - if (Failure.ShouldFix()) { + if (Failure.shouldFix()) { for (const auto &Loc : Failure.RawUsageLocs) { // We assume that the identifier name is made of one token only. This // is always the case as we ignore usages in macros that could build diff --git a/clang-tools-extra/clang-tidy/utils/RenamerClangTidyCheck.h b/clang-tools-extra/clang-tidy/utils/RenamerClangTidyCheck.h index fd5b32075cbe6..def04c917bbc9 100644 --- a/clang-tools-extra/clang-tidy/utils/RenamerClangTidyCheck.h +++ b/clang-tools-extra/clang-tidy/utils/RenamerClangTidyCheck.h @@ -1,4 +1,4 @@ -//===--- RenamderClangTidyCheck.h - clang-tidy ------------------*- C++ -*-===// +//===--- RenamerClangTidyCheck.h - clang-tidy -------------------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -32,7 +32,7 @@ class RenamerClangTidyCheck : public ClangTidyCheck { /// Derived classes should not implement any matching logic themselves; this /// class will do the matching and call the derived class' - /// GetDeclFailureInfo() and GetMacroFailureInfo() for determining whether a + /// getDeclFailureInfo() and getMacroFailureInfo() for determining whether a /// given identifier passes or fails the check. void registerMatchers(ast_matchers::MatchFinder *Finder) override final; void @@ -87,11 +87,11 @@ class RenamerClangTidyCheck : public ClangTidyCheck { /// /// e.g.: if the identifier was used or declared within a macro we won't /// offer a fixup for safety reasons. - bool ShouldFix() const { + bool shouldFix() const { return FixStatus == ShouldFixStatus::ShouldFix && !Info.Fixup.empty(); } - bool ShouldNotify() const { + bool shouldNotify() const { return FixStatus < ShouldFixStatus::IgnoreFailureThreshold; } @@ -109,7 +109,7 @@ class RenamerClangTidyCheck : public ClangTidyCheck { llvm::DenseMap; /// Check Macros for style violations. - void checkMacro(SourceManager &sourceMgr, const Token &MacroNameTok, + void checkMacro(SourceManager &SourceMgr, const Token &MacroNameTok, const MacroInfo *MI); /// Add a usage of a macro if it already has a violation. @@ -126,13 +126,13 @@ class RenamerClangTidyCheck : public ClangTidyCheck { /// Overridden by derived classes, returns information about if and how a Decl /// failed the check. A 'None' result means the Decl did not fail the check. virtual llvm::Optional - GetDeclFailureInfo(const NamedDecl *Decl, const SourceManager &SM) const = 0; + getDeclFailureInfo(const NamedDecl *Decl, const SourceManager &SM) const = 0; /// Overridden by derived classes, returns information about if and how a /// macro failed the check. A 'None' result means the macro did not fail the /// check. virtual llvm::Optional - GetMacroFailureInfo(const Token &MacroNameTok, + getMacroFailureInfo(const Token &MacroNameTok, const SourceManager &SM) const = 0; /// Represents customized diagnostic text and how arguments should be applied. @@ -151,7 +151,7 @@ class RenamerClangTidyCheck : public ClangTidyCheck { /// that should be emitted for the given failure. The base class will then /// further customize the diagnostic by adding info about whether the fix-it /// can be automatically applied or not. - virtual DiagInfo GetDiagInfo(const NamingCheckId &ID, + virtual DiagInfo getDiagInfo(const NamingCheckId &ID, const NamingCheckFailure &Failure) const = 0; private: diff --git a/clang-tools-extra/clang-tidy/utils/TypeTraits.h b/clang-tools-extra/clang-tidy/utils/TypeTraits.h index f4d3455e9e138..7e6b795e81271 100644 --- a/clang-tools-extra/clang-tidy/utils/TypeTraits.h +++ b/clang-tools-extra/clang-tidy/utils/TypeTraits.h @@ -37,7 +37,7 @@ bool hasNonTrivialMoveConstructor(QualType Type); /// Return true if `Type` has a non-trivial move assignment operator. bool hasNonTrivialMoveAssignment(QualType Type); -} // type_traits +} // namespace type_traits } // namespace utils } // namespace tidy } // namespace clang diff --git a/clang-tools-extra/clangd/Protocol.h b/clang-tools-extra/clangd/Protocol.h index 3a43a48367c05..3a1eae5875548 100644 --- a/clang-tools-extra/clangd/Protocol.h +++ b/clang-tools-extra/clangd/Protocol.h @@ -7,7 +7,7 @@ //===----------------------------------------------------------------------===// // // This file contains structs based on the LSP specification at -// https://github.com/Microsoft/language-server-protocol/blob/master/protocol.md +// https://github.com/Microsoft/language-server-protocol/blob/main/protocol.md // // This is not meant to be a complete implementation, new interfaces are added // when they're needed. @@ -1172,7 +1172,7 @@ enum class InsertTextFormat { /// typing in one will update others too. /// /// See also: - /// https//github.com/Microsoft/vscode/blob/master/src/vs/editor/contrib/snippet/common/snippet.md + /// https://github.com/Microsoft/vscode/blob/main/src/vs/editor/contrib/snippet/snippet.md Snippet = 2, }; diff --git a/clang-tools-extra/clangd/XRefs.cpp b/clang-tools-extra/clangd/XRefs.cpp index 85c6b7b771fce..b29d29e5104ee 100644 --- a/clang-tools-extra/clangd/XRefs.cpp +++ b/clang-tools-extra/clangd/XRefs.cpp @@ -80,6 +80,9 @@ const NamedDecl *getDefinition(const NamedDecl *D) { return VD->getDefinition(); if (const auto *FD = dyn_cast(D)) return FD->getDefinition(); + if (const auto *CTD = dyn_cast(D)) + if (const auto *RD = CTD->getTemplatedDecl()) + return RD->getDefinition(); // Objective-C classes can have three types of declarations: // // - forward declaration: @class MyClass; diff --git a/clang-tools-extra/clangd/unittests/SerializationTests.cpp b/clang-tools-extra/clangd/unittests/SerializationTests.cpp index f866635283e5f..548b027599650 100644 --- a/clang-tools-extra/clangd/unittests/SerializationTests.cpp +++ b/clang-tools-extra/clangd/unittests/SerializationTests.cpp @@ -306,8 +306,9 @@ TEST(SerializationTest, CmdlTest) { } // rlimit is part of POSIX. -// ASan uses a lot of address space, so we can't apply strict limits. -#if LLVM_ON_UNIX && !LLVM_ADDRESS_SANITIZER_BUILD +// Sanitizers use a lot of address space, so we can't apply strict limits. +#if LLVM_ON_UNIX && !LLVM_ADDRESS_SANITIZER_BUILD && \ + !LLVM_MEMORY_SANITIZER_BUILD class ScopedMemoryLimit { struct rlimit OriginalLimit; bool Succeeded = false; diff --git a/clang-tools-extra/clangd/unittests/XRefsTests.cpp b/clang-tools-extra/clangd/unittests/XRefsTests.cpp index 802367645c859..d567e0d77b39c 100644 --- a/clang-tools-extra/clangd/unittests/XRefsTests.cpp +++ b/clang-tools-extra/clangd/unittests/XRefsTests.cpp @@ -675,7 +675,7 @@ TEST(LocateSymbol, All) { R"cpp(// Declaration of explicit template specialization template - struct $decl[[Foo]] {}; + struct $decl[[$def[[Foo]]]] {}; template <> struct Fo^o {}; @@ -683,12 +683,25 @@ TEST(LocateSymbol, All) { R"cpp(// Declaration of partial template specialization template - struct $decl[[Foo]] {}; + struct $decl[[$def[[Foo]]]] {}; template struct Fo^o {}; )cpp", + R"cpp(// Definition on ClassTemplateDecl + namespace ns { + // Forward declaration. + template + struct $decl[[Foo]]; + + template + struct $def[[Foo]] {}; + } + + using ::ns::Fo^o; + )cpp", + R"cpp(// auto builtin type (not supported) ^auto x = 42; )cpp", diff --git a/clang-tools-extra/docs/ReleaseNotes.rst b/clang-tools-extra/docs/ReleaseNotes.rst index 0ca2c5a5abc6d..41934d8fb9055 100644 --- a/clang-tools-extra/docs/ReleaseNotes.rst +++ b/clang-tools-extra/docs/ReleaseNotes.rst @@ -76,6 +76,13 @@ Improvements to clang-tidy New checks ^^^^^^^^^^ +- New :doc:`abseil-cleanup-ctad + ` check. + + Suggests switching the initialization pattern of ``absl::Cleanup`` + instances from the factory function to class template argument + deduction (CTAD), in C++17 and higher. + - New :doc:`bugprone-suspicious-memory-comparison ` check. @@ -103,6 +110,11 @@ New checks New check aliases ^^^^^^^^^^^^^^^^^ +- New alias :doc:`cert-err33-c + ` to + :doc:`bugprone-unused-return-value + ` was added. + - New alias :doc:`cert-exp42-c ` to :doc:`bugprone-suspicious-memory-comparison diff --git a/clang-tools-extra/docs/clang-tidy/checks/abseil-cleanup-ctad.rst b/clang-tools-extra/docs/clang-tidy/checks/abseil-cleanup-ctad.rst new file mode 100644 index 0000000000000..b8afc8b8a8481 --- /dev/null +++ b/clang-tools-extra/docs/clang-tidy/checks/abseil-cleanup-ctad.rst @@ -0,0 +1,22 @@ +.. title:: clang-tidy - abseil-cleanup-ctad + +abseil-cleanup-ctad +=================== + +Suggests switching the initialization pattern of ``absl::Cleanup`` +instances from the factory function to class template argument +deduction (CTAD), in C++17 and higher. + +.. code-block:: c++ + + auto c1 = absl::MakeCleanup([] {}); + + const auto c2 = absl::MakeCleanup(std::function([] {})); + +becomes + +.. code-block:: c++ + + absl::Cleanup c1 = [] {}; + + const absl::Cleanup c2 = std::function([] {}); diff --git a/clang-tools-extra/docs/clang-tidy/checks/bugprone-unused-return-value.rst b/clang-tools-extra/docs/clang-tidy/checks/bugprone-unused-return-value.rst index 4cc54ed02d16b..0f33abfb2e318 100644 --- a/clang-tools-extra/docs/clang-tidy/checks/bugprone-unused-return-value.rst +++ b/clang-tools-extra/docs/clang-tidy/checks/bugprone-unused-return-value.rst @@ -45,3 +45,6 @@ Options - ``std::basic_string::empty()`` and ``std::vector::empty()``. Not using the return value often indicates that the programmer confused the function with ``clear()``. + +`cert-err33-c `_ is an alias of this check that checks a +fixed and large set of standard library functions. diff --git a/clang-tools-extra/docs/clang-tidy/checks/cert-err33-c.rst b/clang-tools-extra/docs/clang-tidy/checks/cert-err33-c.rst new file mode 100644 index 0000000000000..945bdce6d3296 --- /dev/null +++ b/clang-tools-extra/docs/clang-tidy/checks/cert-err33-c.rst @@ -0,0 +1,199 @@ +.. title:: clang-tidy - cert-err33-c + +cert-err33-c +============ + +Warns on unused function return values. Many of the standard library functions +return a value that indicates if the call was successful. Ignoring the returned +value can cause unexpected behavior if an error has occured. The following +functions are checked: + +* aligned_alloc() +* asctime_s() +* at_quick_exit() +* atexit() +* bsearch() +* bsearch_s() +* btowc() +* c16rtomb() +* c32rtomb() +* calloc() +* clock() +* cnd_broadcast() +* cnd_init() +* cnd_signal() +* cnd_timedwait() +* cnd_wait() +* ctime_s() +* fclose() +* fflush() +* fgetc() +* fgetpos() +* fgets() +* fgetwc() +* fopen() +* fopen_s() +* fprintf() +* fprintf_s() +* fputc() +* fputs() +* fputwc() +* fputws() +* fread() +* freopen() +* freopen_s() +* fscanf() +* fscanf_s() +* fseek() +* fsetpos() +* ftell() +* fwprintf() +* fwprintf_s() +* fwrite() +* fwscanf() +* fwscanf_s() +* getc() +* getchar() +* getenv() +* getenv_s() +* gets_s() +* getwc() +* getwchar() +* gmtime() +* gmtime_s() +* localtime() +* localtime_s() +* malloc() +* mbrtoc16() +* mbrtoc32() +* mbsrtowcs() +* mbsrtowcs_s() +* mbstowcs() +* mbstowcs_s() +* memchr() +* mktime() +* mtx_init() +* mtx_lock() +* mtx_timedlock() +* mtx_trylock() +* mtx_unlock() +* printf_s() +* putc() +* putwc() +* raise() +* realloc() +* remove() +* rename() +* setlocale() +* setvbuf() +* scanf() +* scanf_s() +* signal() +* snprintf() +* snprintf_s() +* sprintf() +* sprintf_s() +* sscanf() +* sscanf_s() +* strchr() +* strerror_s() +* strftime() +* strpbrk() +* strrchr() +* strstr() +* strtod() +* strtof() +* strtoimax() +* strtok() +* strtok_s() +* strtol() +* strtold() +* strtoll() +* strtoumax() +* strtoul() +* strtoull() +* strxfrm() +* swprintf() +* swprintf_s() +* swscanf() +* swscanf_s() +* thrd_create() +* thrd_detach() +* thrd_join() +* thrd_sleep() +* time() +* timespec_get() +* tmpfile() +* tmpfile_s() +* tmpnam() +* tmpnam_s() +* tss_create() +* tss_get() +* tss_set() +* ungetc() +* ungetwc() +* vfprintf() +* vfprintf_s() +* vfscanf() +* vfscanf_s() +* vfwprintf() +* vfwprintf_s() +* vfwscanf() +* vfwscanf_s() +* vprintf_s() +* vscanf() +* vscanf_s() +* vsnprintf() +* vsnprintf_s() +* vsprintf() +* vsprintf_s() +* vsscanf() +* vsscanf_s() +* vswprintf() +* vswprintf_s() +* vswscanf() +* vswscanf_s() +* vwprintf_s() +* vwscanf() +* vwscanf_s() +* wcrtomb() +* wcschr() +* wcsftime() +* wcspbrk() +* wcsrchr() +* wcsrtombs() +* wcsrtombs_s() +* wcsstr() +* wcstod() +* wcstof() +* wcstoimax() +* wcstok() +* wcstok_s() +* wcstol() +* wcstold() +* wcstoll() +* wcstombs() +* wcstombs_s() +* wcstoumax() +* wcstoul() +* wcstoull() +* wcsxfrm() +* wctob() +* wctrans() +* wctype() +* wmemchr() +* wprintf_s() +* wscanf() +* wscanf_s() + +This check is an alias of check `bugprone-unused-return-value `_ +with a fixed set of functions. + +The check corresponds to a part of CERT C Coding Standard rule `ERR33-C. +Detect and handle standard library errors +`_. +The list of checked functions is taken from the rule, with following exception: + +* The check can not differentiate if a function is called with ``NULL`` + argument. Therefore the following functions are not checked: + ``mblen``, ``mbrlen``, ``mbrtowc``, ``mbtowc``, ``wctomb``, ``wctomb_s`` diff --git a/clang-tools-extra/docs/clang-tidy/checks/list.rst b/clang-tools-extra/docs/clang-tidy/checks/list.rst index 862817a9b5f68..c9b4daf42ef04 100644 --- a/clang-tools-extra/docs/clang-tidy/checks/list.rst +++ b/clang-tools-extra/docs/clang-tidy/checks/list.rst @@ -12,6 +12,7 @@ Clang-Tidy Checks .. csv-table:: :header: "Name", "Offers fixes" + `abseil-cleanup-ctad `_, "Yes" `abseil-duration-addition `_, "Yes" `abseil-duration-comparison `_, "Yes" `abseil-duration-conversion-cast `_, "Yes" @@ -333,6 +334,7 @@ Clang-Tidy Checks `cert-dcl03-c `_, `misc-static-assert `_, "Yes" `cert-dcl16-c `_, `readability-uppercase-literal-suffix `_, "Yes" `cert-dcl37-c `_, `bugprone-reserved-identifier `_, "Yes" + `cert-err33-c `_, `bugprone-unused-return-value `_, `cert-dcl51-cpp `_, `bugprone-reserved-identifier `_, "Yes" `cert-dcl54-cpp `_, `misc-new-delete-overloads `_, `cert-dcl59-cpp `_, `google-build-namespaces `_, @@ -447,3 +449,4 @@ Clang-Tidy Checks `hicpp-vararg `_, `cppcoreguidelines-pro-type-vararg `_, `llvm-else-after-return `_, `readability-else-after-return `_, "Yes" `llvm-qualified-auto `_, `readability-qualified-auto `_, "Yes" + \ No newline at end of file diff --git a/clang-tools-extra/docs/doxygen.cfg.in b/clang-tools-extra/docs/doxygen.cfg.in index d778be30b63e5..7e1d47a7a95a5 100644 --- a/clang-tools-extra/docs/doxygen.cfg.in +++ b/clang-tools-extra/docs/doxygen.cfg.in @@ -1230,7 +1230,7 @@ CHM_FILE = HHC_LOCATION = # The GENERATE_CHI flag controls if a separate .chi index file is generated ( -# YES) or that it should be included in the master .chm file ( NO). +# YES) or that it should be included in the main .chm file ( NO). # The default value is: NO. # This tag requires that the tag GENERATE_HTMLHELP is set to YES. diff --git a/clang-tools-extra/test/clang-tidy/checkers/Inputs/readability-identifier-naming/system/coroutines.h b/clang-tools-extra/test/clang-tidy/checkers/Inputs/readability-identifier-naming/system/coroutines.h index b38da9999c52f..ed4373394351b 100644 --- a/clang-tools-extra/test/clang-tidy/checkers/Inputs/readability-identifier-naming/system/coroutines.h +++ b/clang-tools-extra/test/clang-tidy/checkers/Inputs/readability-identifier-naming/system/coroutines.h @@ -1,7 +1,6 @@ #pragma once namespace std { -namespace experimental { template struct coroutine_traits { @@ -13,7 +12,6 @@ struct coroutine_handle { static constexpr coroutine_handle from_address(void *addr) noexcept { return {}; }; }; -} // namespace experimental } // namespace std struct never_suspend { diff --git a/clang-tools-extra/test/clang-tidy/checkers/abseil-cleanup-ctad.cpp b/clang-tools-extra/test/clang-tidy/checkers/abseil-cleanup-ctad.cpp new file mode 100644 index 0000000000000..c023521bb2611 --- /dev/null +++ b/clang-tools-extra/test/clang-tidy/checkers/abseil-cleanup-ctad.cpp @@ -0,0 +1,115 @@ +// RUN: %check_clang_tidy %s abseil-cleanup-ctad -std=c++17 %t + +namespace std { + +template +struct is_same { + static const bool value = false; +}; + +template +struct is_same { static const bool value = true; }; + +template +class function { +public: + template + function(T) {} + function(const function &) {} +}; + +} // namespace std + +namespace absl { + +namespace cleanup_internal { + +struct Tag {}; + +template +class Storage { +public: + Storage() = delete; + + explicit Storage(Callback callback) {} + + Storage(Storage &&other) {} + + Storage(const Storage &other) = delete; + + Storage &operator=(Storage &&other) = delete; + + Storage &operator=(const Storage &other) = delete; + +private: + bool is_callback_engaged_; + alignas(Callback) char callback_buffer_[sizeof(Callback)]; +}; + +} // namespace cleanup_internal + +template +class Cleanup final { +public: + Cleanup(Callback callback) // NOLINT + : storage_(static_cast(callback)) {} + + Cleanup(Cleanup &&other) = default; + + void Cancel() &&; + + void Invoke() &&; + + ~Cleanup(); + +private: + cleanup_internal::Storage storage_; +}; + +template +Cleanup(Callback callback) -> Cleanup; + +template +absl::Cleanup MakeCleanup(Callback callback) { + return {static_cast(callback)}; +} + +} // namespace absl + +void test() { + auto a = absl::MakeCleanup([] {}); + // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: prefer absl::Cleanup's class template argument deduction pattern in C++17 and higher + // CHECK-FIXES: {{^}} absl::Cleanup a = [] {};{{$}} + + // Removes extra parens + auto b = absl::MakeCleanup(([] {})); + // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: prefer absl::Cleanup{{.*}}C++17 and higher + // CHECK-FIXES: {{^}} absl::Cleanup b = [] {};{{$}} + + auto c = absl::MakeCleanup(std::function([] {})); + // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: prefer absl::Cleanup{{.*}}C++17 and higher + // CHECK-FIXES: {{^}} absl::Cleanup c = std::function([] {});{{$}} + + // Removes extra parens + auto d = absl::MakeCleanup((std::function([] {}))); + // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: prefer absl::Cleanup{{.*}}C++17 and higher + // CHECK-FIXES: {{^}} absl::Cleanup d = std::function([] {});{{$}} + + const auto e = absl::MakeCleanup([] {}); + // CHECK-MESSAGES: :[[@LINE-1]]:9: warning: prefer absl::Cleanup{{.*}}C++17 and higher + // CHECK-FIXES: {{^}} const absl::Cleanup e = [] {};{{$}} + + // Removes extra parens + const auto f = absl::MakeCleanup(([] {})); + // CHECK-MESSAGES: :[[@LINE-1]]:9: warning: prefer absl::Cleanup{{.*}}C++17 and higher + // CHECK-FIXES: {{^}} const absl::Cleanup f = [] {};{{$}} + + const auto g = absl::MakeCleanup(std::function([] {})); + // CHECK-MESSAGES: :[[@LINE-1]]:9: warning: prefer absl::Cleanup{{.*}}C++17 and higher + // CHECK-FIXES: {{^}} const absl::Cleanup g = std::function([] {});{{$}} + + // Removes extra parens + const auto h = absl::MakeCleanup((std::function([] {}))); + // CHECK-MESSAGES: :[[@LINE-1]]:9: warning: prefer absl::Cleanup{{.*}}C++17 and higher + // CHECK-FIXES: {{^}} const absl::Cleanup h = std::function([] {});{{$}} +} diff --git a/clang-tools-extra/test/clang-tidy/checkers/cert-err33-c.c b/clang-tools-extra/test/clang-tidy/checkers/cert-err33-c.c new file mode 100644 index 0000000000000..b28b54366b5e5 --- /dev/null +++ b/clang-tools-extra/test/clang-tidy/checkers/cert-err33-c.c @@ -0,0 +1,25 @@ +// RUN: %check_clang_tidy %s cert-err33-c %t + +typedef __SIZE_TYPE__ size_t; +void *aligned_alloc(size_t alignment, size_t size); +void test_aligned_alloc() { + aligned_alloc(2, 10); + // CHECK-NOTES: [[@LINE-1]]:3: warning: the value returned by this function should be used + // CHECK-NOTES: [[@LINE-2]]:3: note: cast the expression to void to silence this warning +} + +long strtol(const char *restrict nptr, char **restrict endptr, int base); +void test_strtol() { + strtol("123", 0, 10); + // CHECK-NOTES: [[@LINE-1]]:3: warning: the value returned by this function should be used + // CHECK-NOTES: [[@LINE-2]]:3: note: cast the expression to void to silence this warning +} + +typedef char wchar_t; +int wscanf_s(const wchar_t *restrict format, ...); +void test_wscanf_s() { + int Val; + wscanf_s("%i", &Val); + // CHECK-NOTES: [[@LINE-1]]:3: warning: the value returned by this function should be used + // CHECK-NOTES: [[@LINE-2]]:3: note: cast the expression to void to silence this warning +} diff --git a/clang-tools-extra/test/clang-tidy/checkers/cppcoreguidelines-virtual-class-destructor.cpp b/clang-tools-extra/test/clang-tidy/checkers/cppcoreguidelines-virtual-class-destructor.cpp index 3fe392e05a95f..51535f89ac43d 100644 --- a/clang-tools-extra/test/clang-tidy/checkers/cppcoreguidelines-virtual-class-destructor.cpp +++ b/clang-tools-extra/test/clang-tidy/checkers/cppcoreguidelines-virtual-class-destructor.cpp @@ -231,9 +231,8 @@ struct DerivedFromTemplateVirtualBaseStruct : T { DerivedFromTemplateVirtualBaseStruct InstantiationWithPublicVirtualBaseStruct; // Derived from template, base has *not* virtual dtor -// CHECK-MESSAGES: :[[@LINE+8]]:8: warning: destructor of 'DerivedFromTemplateNonVirtualBaseStruct' is public and non-virtual [cppcoreguidelines-virtual-class-destructor] -// CHECK-MESSAGES: :[[@LINE+7]]:8: note: make it public and virtual -// CHECK-MESSAGES: :[[@LINE+6]]:8: warning: destructor of 'DerivedFromTemplateNonVirtualBaseStruct' is public and non-virtual [cppcoreguidelines-virtual-class-destructor] +// CHECK-MESSAGES: :[[@LINE+7]]:8: warning: destructor of 'DerivedFromTemplateNonVirtualBaseStruct' is public and non-virtual [cppcoreguidelines-virtual-class-destructor] +// CHECK-MESSAGES: :[[@LINE+6]]:8: note: make it public and virtual // CHECK-FIXES: struct DerivedFromTemplateNonVirtualBaseStruct : T { // CHECK-FIXES-NEXT: virtual ~DerivedFromTemplateNonVirtualBaseStruct() = default; // CHECK-FIXES-NEXT: virtual void foo(); @@ -256,9 +255,8 @@ using DerivedFromTemplateVirtualBaseStruct2Typedef = DerivedFromTemplateVirtualB DerivedFromTemplateVirtualBaseStruct2Typedef InstantiationWithPublicVirtualBaseStruct2; // Derived from template, base has *not* virtual dtor, to be used in a typedef -// CHECK-MESSAGES: :[[@LINE+8]]:8: warning: destructor of 'DerivedFromTemplateNonVirtualBaseStruct2' is public and non-virtual [cppcoreguidelines-virtual-class-destructor] -// CHECK-MESSAGES: :[[@LINE+7]]:8: note: make it public and virtual -// CHECK-MESSAGES: :[[@LINE+6]]:8: warning: destructor of 'DerivedFromTemplateNonVirtualBaseStruct2' is public and non-virtual [cppcoreguidelines-virtual-class-destructor] +// CHECK-MESSAGES: :[[@LINE+7]]:8: warning: destructor of 'DerivedFromTemplateNonVirtualBaseStruct2' is public and non-virtual [cppcoreguidelines-virtual-class-destructor] +// CHECK-MESSAGES: :[[@LINE+6]]:8: note: make it public and virtual // CHECK-FIXES: struct DerivedFromTemplateNonVirtualBaseStruct2 : T { // CHECK-FIXES-NEXT: virtual ~DerivedFromTemplateNonVirtualBaseStruct2() = default; // CHECK-FIXES-NEXT: virtual void foo(); diff --git a/clang-tools-extra/unittests/clang-move/ClangMoveTests.cpp b/clang-tools-extra/unittests/clang-move/ClangMoveTests.cpp index b9553e111a578..082779358fbfb 100644 --- a/clang-tools-extra/unittests/clang-move/ClangMoveTests.cpp +++ b/clang-tools-extra/unittests/clang-move/ClangMoveTests.cpp @@ -208,7 +208,9 @@ runClangMoveOnCode(const move::MoveDefinitionSpec &Spec, DeclarationReporter *const Reporter = nullptr) { clang::RewriterTestContext Context; - Context.InMemoryFileSystem->setCurrentWorkingDirectory(WorkingDir); + llvm::SmallString<16> Dir(WorkingDir); + llvm::sys::path::native(Dir); + Context.InMemoryFileSystem->setCurrentWorkingDirectory(Dir); std::map FileToFileID; @@ -224,13 +226,12 @@ runClangMoveOnCode(const move::MoveDefinitionSpec &Spec, CreateFiles(TestCCName, CC); std::map FileToReplacements; - ClangMoveContext MoveContext = {Spec, FileToReplacements, WorkingDir, "LLVM", + ClangMoveContext MoveContext = {Spec, FileToReplacements, Dir.c_str(), "LLVM", Reporter != nullptr}; auto Factory = std::make_unique( &MoveContext, Reporter); - // std::string IncludeArg = Twine("-I" + WorkingDir; tooling::runToolOnCodeWithArgs( Factory->create(), CC, Context.InMemoryFileSystem, {"-std=c++11", "-fparse-all-comments", "-I."}, TestCCName, "clang-move", diff --git a/clang/docs/ClangCommandLineReference.rst b/clang/docs/ClangCommandLineReference.rst index 31e7cd342c267..94eb3fec8a23c 100644 --- a/clang/docs/ClangCommandLineReference.rst +++ b/clang/docs/ClangCommandLineReference.rst @@ -483,7 +483,7 @@ Enable migration to use NS\_NONATOMIC\_IOSONLY macro for setting property's 'ato Enable migration to annotate property with NS\_RETURNS\_INNER\_POINTER -.. option:: -objcmt-whitelist-dir-path=, -objcmt-white-list-dir-path= +.. option:: -objcmpt-allowlist-dir-path=, -objcmt-whitelist-dir-path=, -objcmt-white-list-dir-path= Only modify files with a filename contained in the provided directory path diff --git a/clang/docs/ClangFormatStyleOptions.rst b/clang/docs/ClangFormatStyleOptions.rst index e9a381c93a46b..2d4e1b56dcb59 100644 --- a/clang/docs/ClangFormatStyleOptions.rst +++ b/clang/docs/ClangFormatStyleOptions.rst @@ -149,7 +149,7 @@ the configuration (without a prefix: ``Auto``). `_ * ``Chromium`` A style complying with `Chromium's style guide - `_ + `_ * ``Mozilla`` A style complying with `Mozilla's style guide `_ diff --git a/clang/docs/LanguageExtensions.rst b/clang/docs/LanguageExtensions.rst index 1a12f40753cc4..f30ef0f644dc3 100644 --- a/clang/docs/LanguageExtensions.rst +++ b/clang/docs/LanguageExtensions.rst @@ -2968,7 +2968,7 @@ C++ Coroutines support builtins Clang provides experimental builtins to support C++ Coroutines as defined by https://wg21.link/P0057. The following four are intended to be used by the -standard library to implement `std::experimental::coroutine_handle` type. +standard library to implement the ``std::coroutine_handle`` type. **Syntax**: diff --git a/clang/docs/LibASTMatchersReference.html b/clang/docs/LibASTMatchersReference.html index 3977d26414521..9d2398effff10 100644 --- a/clang/docs/LibASTMatchersReference.html +++ b/clang/docs/LibASTMatchersReference.html @@ -1190,6 +1190,10 @@

Node Matchers

+Matcher<LambdaCapture>lambdaCaptureMatcher<LambdaCapture>... +

+
+
 Matcher<NestedNameSpecifierLoc>nestedNameSpecifierLocMatcher<NestedNameSpecifierLoc>...
 
Same as nestedNameSpecifier but matches NestedNameSpecifierLoc.
 
@@ -4514,6 +4518,42 @@

Narrowing Matchers


 
 
+Matcher<LambdaCapture>capturesThis
+
Matches a `LambdaCapture` that refers to 'this'.
+
+Given
+class C {
+  int cc;
+  int f() {
+    auto l = [this]() { return cc; };
+    return l();
+  }
+};
+lambdaExpr(hasAnyCapture(lambdaCapture(capturesThis())))
+  matches `[this]() { return cc; }`.
+
+ + +Matcher<LambdaCapture>isImplicit +
Matches an entity that has been implicitly added by the compiler (e.g.
+implicit default/copy constructors).
+
+ + +Matcher<LambdaExpr>hasAnyCaptureLambdaCaptureMatcher InnerMatcher +
Matches any capture in a lambda expression.
+
+Given
+  void foo() {
+    int t = 5;
+    auto f = [=](){ return t; };
+  }
+lambdaExpr(hasAnyCapture(lambdaCapture())) and
+lambdaExpr(hasAnyCapture(lambdaCapture(refersToVarDecl(hasName("t")))))
+  both match `[=](){ return t; }`.
+
+ + Matcher<MemberExpr>isArrow
Matches member expressions that are called with '->' as opposed
 to '.'.
@@ -8314,30 +8354,20 @@ 

AST Traversal Matchers

-Matcher<LambdaExpr>hasAnyCaptureMatcher<CXXThisExpr> InnerMatcher -
Matches any capture of 'this' in a lambda expression.
-
-Given
-  struct foo {
-    void bar() {
-      auto f = [this](){};
-    }
-  }
-lambdaExpr(hasAnyCapture(cxxThisExpr()))
-  matches [this](){};
-
- - -Matcher<LambdaExpr>hasAnyCaptureMatcher<VarDecl> InnerMatcher -
Matches any capture of a lambda expression.
+Matcher<LambdaCapture>capturesVarMatcher<VarDecl> InnerMatcher
+
Matches a `LambdaCapture` that refers to the specified `VarDecl`. The
+`VarDecl` can be a separate variable that is captured by value or
+reference, or a synthesized variable if the capture has an initializer.
 
 Given
   void foo() {
     int x;
     auto f = [x](){};
+    auto g = [x = 1](){};
   }
-lambdaExpr(hasAnyCapture(anything()))
-  matches [x](){};
+In the matcher
+lambdaExpr(hasAnyCapture(lambdaCapture(capturesVar(hasName("x")))),
+capturesVar(hasName("x")) matches `x` and `x = 1`.
 
diff --git a/clang/docs/LibFormat.rst b/clang/docs/LibFormat.rst index 4ea84e658d1b0..833f768c54a64 100644 --- a/clang/docs/LibFormat.rst +++ b/clang/docs/LibFormat.rst @@ -53,7 +53,7 @@ several style guides are hard-coded: FormatStyle getGoogleStyle(); /// Returns a format style complying with Chromium's style guide: - /// https://chromium.googlesource.com/chromium/src/+/master/styleguide/styleguide.md + /// https://chromium.googlesource.com/chromium/src/+/refs/heads/main/styleguide/styleguide.md FormatStyle getChromiumStyle(); /// Returns a format style complying with the GNU coding standards: diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index b4595b20def53..56ed6d28949f3 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -193,6 +193,8 @@ Arm and AArch64 Support in Clang - Support has been added for the following processors (command-line identifiers in parentheses): - Arm Cortex-A510 (``cortex-a510``) + - Arm Cortex-X2 (``cortex-x2``) + - The -mtune flag is no longer ignored for AArch64. It is now possible to tune code generation for a particular CPU with -mtune without setting any architectural features. For example, compiling with @@ -200,7 +202,6 @@ Arm and AArch64 Support in Clang architecture features, but will enable certain optimizations specific to Cortex-A57 CPUs and enable the use of a more accurate scheduling model. - Internal API Changes -------------------- @@ -219,6 +220,13 @@ AST Matchers matcher or the ``hasReturnTypeLoc`` matcher. The addition of these matchers was made possible by changes to the handling of ``TypeLoc`` nodes that allows them to enjoy the same static type checking as other AST node kinds. +- ``LambdaCapture`` AST Matchers are now available. These matchers allow for + the binding of ``LambdaCapture`` nodes. The ``LambdaCapture`` matchers added + include the ``lambdaCapture`` node matcher, the ``capturesVar`` traversal + matcher, and ``capturesThis`` narrowing matcher. +- The ``hasAnyCapture`` matcher now only accepts an inner matcher of type + ``Matcher``. The matcher originally accepted an inner matcher + of type ``Matcher`` or ``Matcher``. clang-format ------------ diff --git a/clang/docs/UsersManual.rst b/clang/docs/UsersManual.rst index 68c112c964f28..8c6922db6b37d 100644 --- a/clang/docs/UsersManual.rst +++ b/clang/docs/UsersManual.rst @@ -3139,7 +3139,7 @@ extension should use reserved identifier prefix e.g. amd, arm, intel. Clang also supports language extensions documented in `The OpenCL C Language Extensions Documentation -`_. +`_. OpenCL-Specific Attributes -------------------------- diff --git a/clang/docs/doxygen.cfg.in b/clang/docs/doxygen.cfg.in index 449552d99d46b..39a346409b935 100644 --- a/clang/docs/doxygen.cfg.in +++ b/clang/docs/doxygen.cfg.in @@ -1219,7 +1219,7 @@ CHM_FILE = HHC_LOCATION = # The GENERATE_CHI flag controls if a separate .chi index file is generated ( -# YES) or that it should be included in the master .chm file ( NO). +# YES) or that it should be included in the main .chm file ( NO). # The default value is: NO. # This tag requires that the tag GENERATE_HTMLHELP is set to YES. diff --git a/clang/docs/tools/clang-formatted-files.txt b/clang/docs/tools/clang-formatted-files.txt index 45451c9090b50..8b3b480f719e6 100644 --- a/clang/docs/tools/clang-formatted-files.txt +++ b/clang/docs/tools/clang-formatted-files.txt @@ -7406,7 +7406,7 @@ mlir/lib/ExecutionEngine/JitRunner.cpp mlir/lib/ExecutionEngine/OptUtils.cpp mlir/lib/ExecutionEngine/RocmRuntimeWrappers.cpp mlir/lib/ExecutionEngine/RunnerUtils.cpp -mlir/lib/ExecutionEngine/SparseUtils.cpp +mlir/lib/ExecutionEngine/SparseTensorUtils.cpp mlir/lib/Interfaces/CallInterfaces.cpp mlir/lib/Interfaces/CastInterfaces.cpp mlir/lib/Interfaces/ControlFlowInterfaces.cpp diff --git a/clang/include/clang/AST/ASTTypeTraits.h b/clang/include/clang/AST/ASTTypeTraits.h index 144977b3cd219..6d96146a4d455 100644 --- a/clang/include/clang/AST/ASTTypeTraits.h +++ b/clang/include/clang/AST/ASTTypeTraits.h @@ -17,6 +17,7 @@ #include "clang/AST/ASTFwd.h" #include "clang/AST/DeclCXX.h" +#include "clang/AST/LambdaCapture.h" #include "clang/AST/NestedNameSpecifier.h" #include "clang/AST/TemplateBase.h" #include "clang/AST/TypeLoc.h" @@ -64,6 +65,7 @@ class ASTNodeKind { static ASTNodeKind getFromNode(const Stmt &S); static ASTNodeKind getFromNode(const Type &T); static ASTNodeKind getFromNode(const TypeLoc &T); + static ASTNodeKind getFromNode(const LambdaCapture &L); static ASTNodeKind getFromNode(const OMPClause &C); static ASTNodeKind getFromNode(const Attr &A); /// \} @@ -131,6 +133,7 @@ class ASTNodeKind { NKI_None, NKI_TemplateArgument, NKI_TemplateArgumentLoc, + NKI_LambdaCapture, NKI_TemplateName, NKI_NestedNameSpecifierLoc, NKI_QualType, @@ -197,6 +200,7 @@ class ASTNodeKind { KIND_TO_KIND_ID(CXXCtorInitializer) KIND_TO_KIND_ID(TemplateArgument) KIND_TO_KIND_ID(TemplateArgumentLoc) +KIND_TO_KIND_ID(LambdaCapture) KIND_TO_KIND_ID(TemplateName) KIND_TO_KIND_ID(NestedNameSpecifier) KIND_TO_KIND_ID(NestedNameSpecifierLoc) @@ -540,6 +544,10 @@ template <> struct DynTypedNode::BaseConverter : public ValueConverter {}; +template <> +struct DynTypedNode::BaseConverter + : public ValueConverter {}; + template <> struct DynTypedNode::BaseConverter< TemplateName, void> : public ValueConverter {}; diff --git a/clang/include/clang/AST/DeclTemplate.h b/clang/include/clang/AST/DeclTemplate.h index 2408f50c074bb..d33babef958ea 100644 --- a/clang/include/clang/AST/DeclTemplate.h +++ b/clang/include/clang/AST/DeclTemplate.h @@ -203,7 +203,8 @@ class TemplateParameterList final void print(raw_ostream &Out, const ASTContext &Context, const PrintingPolicy &Policy, bool OmitTemplateKW = false) const; - static bool shouldIncludeTypeForArgument(const TemplateParameterList *TPL, + static bool shouldIncludeTypeForArgument(const PrintingPolicy &Policy, + const TemplateParameterList *TPL, unsigned Idx); }; diff --git a/clang/include/clang/AST/OpenMPClause.h b/clang/include/clang/AST/OpenMPClause.h index 749eff57313a8..99554641a64b8 100644 --- a/clang/include/clang/AST/OpenMPClause.h +++ b/clang/include/clang/AST/OpenMPClause.h @@ -8405,6 +8405,96 @@ class OMPFilterClause final : public OMPClause, public OMPClauseWithPreInit { } }; +/// This represents 'bind' clause in the '#pragma omp ...' directives. +/// +/// \code +/// #pragma omp loop bind(parallel) +/// \endcode +class OMPBindClause final : public OMPClause { + friend class OMPClauseReader; + + /// Location of '('. + SourceLocation LParenLoc; + + /// The binding kind of 'bind' clause. + OpenMPBindClauseKind Kind = OMPC_BIND_unknown; + + /// Start location of the kind in source code. + SourceLocation KindLoc; + + /// Sets the location of '('. + void setLParenLoc(SourceLocation Loc) { LParenLoc = Loc; } + + /// Set the binding kind. + void setBindKind(OpenMPBindClauseKind K) { Kind = K; } + + /// Set the binding kind location. + void setBindKindLoc(SourceLocation KLoc) { KindLoc = KLoc; } + + /// Build 'bind' clause with kind \a K ('teams', 'parallel', or 'thread'). + /// + /// \param K Binding kind of the clause ('teams', 'parallel' or 'thread'). + /// \param KLoc Starting location of the binding kind. + /// \param StartLoc Starting location of the clause. + /// \param LParenLoc Location of '('. + /// \param EndLoc Ending location of the clause. + OMPBindClause(OpenMPBindClauseKind K, SourceLocation KLoc, + SourceLocation StartLoc, SourceLocation LParenLoc, + SourceLocation EndLoc) + : OMPClause(llvm::omp::OMPC_bind, StartLoc, EndLoc), LParenLoc(LParenLoc), + Kind(K), KindLoc(KLoc) {} + + /// Build an empty clause. + OMPBindClause() + : OMPClause(llvm::omp::OMPC_bind, SourceLocation(), SourceLocation()) {} + +public: + /// Build 'bind' clause with kind \a K ('teams', 'parallel', or 'thread'). + /// + /// \param C AST context + /// \param K Binding kind of the clause ('teams', 'parallel' or 'thread'). + /// \param KLoc Starting location of the binding kind. + /// \param StartLoc Starting location of the clause. + /// \param LParenLoc Location of '('. + /// \param EndLoc Ending location of the clause. + static OMPBindClause *Create(const ASTContext &C, OpenMPBindClauseKind K, + SourceLocation KLoc, SourceLocation StartLoc, + SourceLocation LParenLoc, SourceLocation EndLoc); + + /// Build an empty 'bind' clause. + /// + /// \param C AST context + static OMPBindClause *CreateEmpty(const ASTContext &C); + + /// Returns the location of '('. + SourceLocation getLParenLoc() const { return LParenLoc; } + + /// Returns kind of the clause. + OpenMPBindClauseKind getBindKind() const { return Kind; } + + /// Returns location of clause kind. + SourceLocation getBindKindLoc() const { return KindLoc; } + + child_range children() { + return child_range(child_iterator(), child_iterator()); + } + + const_child_range children() const { + return const_child_range(const_child_iterator(), const_child_iterator()); + } + + child_range used_children() { + return child_range(child_iterator(), child_iterator()); + } + const_child_range used_children() const { + return const_child_range(const_child_iterator(), const_child_iterator()); + } + + static bool classof(const OMPClause *T) { + return T->getClauseKind() == llvm::omp::OMPC_bind; + } +}; + /// This class implements a simple visitor for OMPClause /// subclasses. template class Ptr, typename RetTy> diff --git a/clang/include/clang/AST/PrettyPrinter.h b/clang/include/clang/AST/PrettyPrinter.h index da13d7adee445..c83f0860bd832 100644 --- a/clang/include/clang/AST/PrettyPrinter.h +++ b/clang/include/clang/AST/PrettyPrinter.h @@ -77,7 +77,8 @@ struct PrintingPolicy { SuppressDefinition(false), SuppressDefaultTemplateArguments(false), PrintCanonicalTypes(false), SkipCanonicalizationOfTemplateTypeParms(false), - PrintInjectedClassNameWithArguments(true), UsePreferredNames(true) {} + PrintInjectedClassNameWithArguments(true), UsePreferredNames(true), + UseIntegerTypeSuffixesAlways(false) {} /// Adjust this printing policy for cases where it's known that we're /// printing C++ code (for instance, if AST dumping reaches a C++-only @@ -331,8 +332,15 @@ struct PrintingPolicy { /// written. When a template argument is unnamed, printing it results in /// invalid C++ code. unsigned PrintInjectedClassNameWithArguments : 1; + + /// Whether to use C++ template preferred_name attributes when printing + /// templates. unsigned UsePreferredNames : 1; + /// Whether to use type suffixes (eg: 1U) on integral non-type template + /// parameters. + unsigned UseIntegerTypeSuffixesAlways : 1; + /// Callbacks to use to allow the behavior of printing to be customized. const PrintingCallbacks *Callbacks = nullptr; }; diff --git a/clang/include/clang/AST/RecursiveASTVisitor.h b/clang/include/clang/AST/RecursiveASTVisitor.h index 3746c421a3d05..56f7bd4b7a775 100644 --- a/clang/include/clang/AST/RecursiveASTVisitor.h +++ b/clang/include/clang/AST/RecursiveASTVisitor.h @@ -1681,10 +1681,7 @@ bool RecursiveASTVisitor::TraverseTemplateInstantiations( ClassTemplateDecl *D) { for (auto *SD : D->specializations()) { for (auto *RD : SD->redecls()) { - // We don't want to visit injected-class-names in this traversal. - if (cast(RD)->isInjectedClassName()) - continue; - + assert(!cast(RD)->isInjectedClassName()); switch ( cast(RD)->getSpecializationKind()) { // Visit the implicit instantiations with the requested pattern. @@ -3694,6 +3691,11 @@ bool RecursiveASTVisitor::VisitOMPFilterClause(OMPFilterClause *C) { return true; } +template +bool RecursiveASTVisitor::VisitOMPBindClause(OMPBindClause *C) { + return true; +} + // FIXME: look at the following tricky-seeming exprs to see if we // need to recurse on anything. These are ones that have methods // returning decls or qualtypes or nestednamespecifier -- though I'm diff --git a/clang/include/clang/ASTMatchers/ASTMatchers.h b/clang/include/clang/ASTMatchers/ASTMatchers.h index dfd1d8bc56fc8..aa49d4cf5e962 100644 --- a/clang/include/clang/ASTMatchers/ASTMatchers.h +++ b/clang/include/clang/ASTMatchers/ASTMatchers.h @@ -148,6 +148,7 @@ using CXXBaseSpecifierMatcher = internal::Matcher; using CXXCtorInitializerMatcher = internal::Matcher; using TemplateArgumentMatcher = internal::Matcher; using TemplateArgumentLocMatcher = internal::Matcher; +using LambdaCaptureMatcher = internal::Matcher; using AttrMatcher = internal::Matcher; /// @} @@ -756,7 +757,8 @@ AST_MATCHER_P(ClassTemplateSpecializationDecl, hasSpecializedTemplate, /// Matches an entity that has been implicitly added by the compiler (e.g. /// implicit default/copy constructors). AST_POLYMORPHIC_MATCHER(isImplicit, - AST_POLYMORPHIC_SUPPORTED_TYPES(Decl, Attr)) { + AST_POLYMORPHIC_SUPPORTED_TYPES(Decl, Attr, + LambdaCapture)) { return Node.isImplicit(); } @@ -4588,50 +4590,68 @@ AST_POLYMORPHIC_MATCHER_P(hasAnyArgument, return false; } -/// Matches any capture of a lambda expression. +extern const internal::VariadicAllOfMatcher lambdaCapture; + +/// Matches any capture in a lambda expression. /// /// Given /// \code /// void foo() { -/// int x; -/// auto f = [x](){}; +/// int t = 5; +/// auto f = [=](){ return t; }; /// } /// \endcode -/// lambdaExpr(hasAnyCapture(anything())) -/// matches [x](){}; -AST_MATCHER_P_OVERLOAD(LambdaExpr, hasAnyCapture, internal::Matcher, - InnerMatcher, 0) { +/// lambdaExpr(hasAnyCapture(lambdaCapture())) and +/// lambdaExpr(hasAnyCapture(lambdaCapture(refersToVarDecl(hasName("t"))))) +/// both match `[=](){ return t; }`. +AST_MATCHER_P(LambdaExpr, hasAnyCapture, LambdaCaptureMatcher, InnerMatcher) { for (const LambdaCapture &Capture : Node.captures()) { - if (Capture.capturesVariable()) { - BoundNodesTreeBuilder Result(*Builder); - if (InnerMatcher.matches(*Capture.getCapturedVar(), Finder, &Result)) { - *Builder = std::move(Result); - return true; - } + clang::ast_matchers::internal::BoundNodesTreeBuilder Result(*Builder); + if (InnerMatcher.matches(Capture, Finder, &Result)) { + *Builder = std::move(Result); + return true; } } return false; } -/// Matches any capture of 'this' in a lambda expression. +/// Matches a `LambdaCapture` that refers to the specified `VarDecl`. The +/// `VarDecl` can be a separate variable that is captured by value or +/// reference, or a synthesized variable if the capture has an initializer. /// /// Given /// \code -/// struct foo { -/// void bar() { -/// auto f = [this](){}; -/// } +/// void foo() { +/// int x; +/// auto f = [x](){}; +/// auto g = [x = 1](){}; /// } /// \endcode -/// lambdaExpr(hasAnyCapture(cxxThisExpr())) -/// matches [this](){}; -AST_MATCHER_P_OVERLOAD(LambdaExpr, hasAnyCapture, - internal::Matcher, InnerMatcher, 1) { - return llvm::any_of(Node.captures(), [](const LambdaCapture &LC) { - return LC.capturesThis(); - }); +/// In the matcher +/// lambdaExpr(hasAnyCapture(lambdaCapture(capturesVar(hasName("x")))), +/// capturesVar(hasName("x")) matches `x` and `x = 1`. +AST_MATCHER_P(LambdaCapture, capturesVar, internal::Matcher, + InnerMatcher) { + auto *capturedVar = Node.getCapturedVar(); + return capturedVar && InnerMatcher.matches(*capturedVar, Finder, Builder); } +/// Matches a `LambdaCapture` that refers to 'this'. +/// +/// Given +/// \code +/// class C { +/// int cc; +/// int f() { +/// auto l = [this]() { return cc; }; +/// return l(); +/// } +/// }; +/// \endcode +/// lambdaExpr(hasAnyCapture(lambdaCapture(capturesThis()))) +/// matches `[this]() { return cc; }`. +AST_MATCHER(LambdaCapture, capturesThis) { return Node.capturesThis(); } + /// Matches a constructor call expression which uses list initialization. AST_MATCHER(CXXConstructExpr, isListInitialization) { return Node.isListInitialization(); diff --git a/clang/include/clang/Basic/Attr.td b/clang/include/clang/Basic/Attr.td index d5d6a49c24c64..991eb27f628b8 100644 --- a/clang/include/clang/Basic/Attr.td +++ b/clang/include/clang/Basic/Attr.td @@ -2452,6 +2452,13 @@ def BTFDeclTag : InheritableAttr { let LangOpts = [COnly]; } +def BTFTypeTag : TypeAttr { + let Spellings = [Clang<"btf_type_tag">]; + let Args = [StringArgument<"BTFTypeTag">]; + let Documentation = [BTFTypeTagDocs]; + let LangOpts = [COnly]; +} + def WebAssemblyExportName : InheritableAttr, TargetSpecificAttr { let Spellings = [Clang<"export_name">]; diff --git a/clang/include/clang/Basic/AttrDocs.td b/clang/include/clang/Basic/AttrDocs.td index 98a9cfe597aaa..c4253239cbddf 100644 --- a/clang/include/clang/Basic/AttrDocs.td +++ b/clang/include/clang/Basic/AttrDocs.td @@ -2071,6 +2071,23 @@ section too. }]; } +def BTFTypeTagDocs : Documentation { + let Category = DocCatType; + let Content = [{ +Clang supports the ``__attribute__((btf_type_tag("ARGUMENT")))`` attribute for +all targets. It only has effect when ``-g`` is specified on the command line and +is currently silently ignored when not applied to a pointer type (note: this +scenario may be diagnosed in the future). + +The ``ARGUMENT`` string will be preserved in IR and emitted to DWARF for the +types used in variable declarations, function declarations, or typedef +declarations. + +For BPF targets, the ``ARGUMENT`` string will also be emitted to .BTF ELF +section. + }]; +} + def MipsInterruptDocs : Documentation { let Category = DocCatFunction; let Heading = "interrupt (MIPS)"; diff --git a/clang/include/clang/Basic/Builtins.def b/clang/include/clang/Basic/Builtins.def index 097232b261e39..eb4c2f98c7041 100644 --- a/clang/include/clang/Basic/Builtins.def +++ b/clang/include/clang/Basic/Builtins.def @@ -646,6 +646,8 @@ BUILTIN(__builtin_call_with_static_chain, "v.", "nt") BUILTIN(__builtin_elementwise_abs, "v.", "nct") BUILTIN(__builtin_elementwise_max, "v.", "nct") BUILTIN(__builtin_elementwise_min, "v.", "nct") +BUILTIN(__builtin_reduce_max, "v.", "nct") +BUILTIN(__builtin_reduce_min, "v.", "nct") BUILTIN(__builtin_matrix_transpose, "v.", "nFt") BUILTIN(__builtin_matrix_column_major_load, "v.", "nFt") diff --git a/clang/include/clang/Basic/BuiltinsPPC.def b/clang/include/clang/Basic/BuiltinsPPC.def index f82d455506860..cd6b2df10e521 100644 --- a/clang/include/clang/Basic/BuiltinsPPC.def +++ b/clang/include/clang/Basic/BuiltinsPPC.def @@ -780,6 +780,10 @@ BUILTIN(__builtin_cfuged, "ULLiULLiULLi", "") BUILTIN(__builtin_cntlzdm, "ULLiULLiULLi", "") BUILTIN(__builtin_cnttzdm, "ULLiULLiULLi", "") +// Double-double (un)pack +BUILTIN(__builtin_unpack_longdouble, "dLdIi", "") +BUILTIN(__builtin_pack_longdouble, "Lddd", "") + // Generate random number BUILTIN(__builtin_darn, "LLi", "") BUILTIN(__builtin_darn_raw, "LLi", "") diff --git a/clang/include/clang/Basic/DiagnosticDriverKinds.td b/clang/include/clang/Basic/DiagnosticDriverKinds.td index d0f78afefd3f0..76ab7770a9351 100644 --- a/clang/include/clang/Basic/DiagnosticDriverKinds.td +++ b/clang/include/clang/Basic/DiagnosticDriverKinds.td @@ -381,7 +381,7 @@ def warn_drv_diagnostics_hotness_requires_pgo : Warning< def warn_drv_clang_unsupported : Warning< "the clang compiler does not support '%0'">; def warn_drv_deprecated_arg : Warning< - "argument '%0' is deprecated%select{|, use '%2' instead}1">, InGroup; + "argument '%0' is deprecated, use '%1' instead">, InGroup; def warn_drv_assuming_mfloat_abi_is : Warning< "unknown platform, assuming -mfloat-abi=%0">; def warn_ignoring_ftabstop_value : Warning< diff --git a/clang/include/clang/Basic/DiagnosticGroups.td b/clang/include/clang/Basic/DiagnosticGroups.td index 3354e5c5bb6ed..b8c0d1deb9043 100644 --- a/clang/include/clang/Basic/DiagnosticGroups.td +++ b/clang/include/clang/Basic/DiagnosticGroups.td @@ -1273,8 +1273,9 @@ def OptionIgnored : DiagGroup<"option-ignored">; def UnknownArgument : DiagGroup<"unknown-argument">; // A warning group for warnings about code that clang accepts when -// compiling OpenCL C/C++ but which is not compatible with the SPIR spec. +// compiling OpenCL C/C++ but which is not compatible with the SPIR(-V) spec. def SpirCompat : DiagGroup<"spir-compat">; +def : DiagGroup<"spirv-compat", [SpirCompat]>; // Alias. // Warning for the GlobalISel options. def GlobalISel : DiagGroup<"global-isel">; diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index 6f31843496dc0..60d4b7e457b8e 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -866,6 +866,10 @@ def warn_fortify_source_format_overflow : Warning< " but format string expands to at least %2">, InGroup; +def warn_fortify_scanf_overflow : Warning< + "'%0' may overflow; destination buffer in argument %1 has size " + "%2, but the corresponding specifier may require size %3">, + InGroup; /// main() // static main() is not an error in C, just in C++. @@ -3316,7 +3320,8 @@ def warn_assume_aligned_too_great InGroup>; def warn_not_xl_compatible : Warning<"requesting an alignment of 16 bytes or greater for struct" - " members is not binary compatible with AIX XL 16.1 and older">, + " members is not binary compatible with IBM XL C/C++ for AIX" + " 16.1.0 and older">, InGroup; def warn_redeclaration_without_attribute_prev_attribute_ignored : Warning< "%q0 redeclared without %1 attribute: previous %1 ignored">, @@ -9852,6 +9857,8 @@ def err_ppc_builtin_only_on_arch : Error< "this builtin is only valid on POWER%0 or later CPUs">; def err_ppc_builtin_requires_vsx : Error< "this builtin requires VSX to be enabled">; +def err_ppc_builtin_requires_abi : Error< + "this builtin requires ABI -mabi=%0">; def err_ppc_invalid_use_mma_type : Error< "invalid use of PPC MMA type">; def err_ppc_invalid_test_data_class_type : Error< @@ -10766,8 +10773,8 @@ def err_omp_invariant_or_linear_dependency : Error< def err_omp_wrong_dependency_iterator_type : Error< "expected an integer or a pointer type of the outer loop counter '%0' for non-rectangular nests">; def err_target_unsupported_type - : Error<"%0 requires %select{|%2 bit size}1 %3 type support, but target " - "'%4' does not support it">; + : Error<"%0 requires %select{|%2 bit size}1 %3 %select{|return }4type support," + " but target '%5' does not support it">; def err_omp_lambda_capture_in_declare_target_not_to : Error< "variable captured in declare target region must appear in a to clause">; def err_omp_device_type_mismatch : Error< @@ -11053,19 +11060,25 @@ def err_coroutine_invalid_func_context : Error< "|a function with a deduced return type|a varargs function" "|a consteval function}0">; def err_implied_coroutine_type_not_found : Error< - "%0 type was not found; include before defining " - "a coroutine">; + "%0 type was not found; include before defining " + "a coroutine; include if your version " + "of libcxx is less than 14.0">; +def err_mixed_use_std_and_experimental_namespace_for_coroutine : Error < + "Found mixed use of std namespace and std::experimental namespace for " + "coroutine, which is disallowed. The coroutine components in " + "std::experimental namespace is deprecated. Please use coroutine components " + "under std namespace.">; def err_implicit_coroutine_std_nothrow_type_not_found : Error< "std::nothrow was not found; include before defining a coroutine which " "uses get_return_object_on_allocation_failure()">; def err_malformed_std_nothrow : Error< "std::nothrow must be a valid variable declaration">; def err_malformed_std_coroutine_handle : Error< - "std::experimental::coroutine_handle must be a class template">; + "std::coroutine_handle isn't a class template">; def err_coroutine_handle_missing_member : Error< - "std::experimental::coroutine_handle missing a member named '%0'">; + "std::coroutine_handle must have a member named '%0'">; def err_malformed_std_coroutine_traits : Error< - "'std::experimental::coroutine_traits' must be a class template">; + "std::coroutine_traits isn't a class template">; def err_implied_std_coroutine_traits_promise_type_not_found : Error< "this function cannot be a coroutine: %q0 has no member named 'promise_type'">; def err_implied_std_coroutine_traits_promise_type_not_class : Error< @@ -11365,7 +11378,7 @@ def err_builtin_invalid_arg_type: Error < "%ordinal0 argument must be a " "%select{vector, integer or floating point type|matrix|" "pointer to a valid matrix element type|" - "signed integer or floating point type}1 (was %2)">; + "signed integer or floating point type|vector type}1 (was %2)">; def err_builtin_matrix_disabled: Error< "matrix types extension is disabled. Pass -fenable-matrix to enable it">; diff --git a/clang/include/clang/Basic/LangOptions.def b/clang/include/clang/Basic/LangOptions.def index 2b01a857526ce..c9e06882cc56f 100644 --- a/clang/include/clang/Basic/LangOptions.def +++ b/clang/include/clang/Basic/LangOptions.def @@ -149,7 +149,7 @@ LANGOPT(NoMathBuiltin , 1, 0, "disable math builtin functions") LANGOPT(GNUAsm , 1, 1, "GNU-style inline assembly") LANGOPT(Coroutines , 1, 0, "C++20 coroutines") LANGOPT(DllExportInlines , 1, 1, "dllexported classes dllexport inline methods") -LANGOPT(RelaxedTemplateTemplateArgs, 1, 1, "C++17 relaxed matching of template template arguments") +LANGOPT(RelaxedTemplateTemplateArgs, 1, 0, "C++17 relaxed matching of template template arguments") LANGOPT(DoubleSquareBracketAttributes, 1, 0, "'[[]]' attributes extension for all language standard modes") diff --git a/clang/include/clang/Basic/OpenMPKinds.def b/clang/include/clang/Basic/OpenMPKinds.def index a19daf91578ed..80ebda9179455 100644 --- a/clang/include/clang/Basic/OpenMPKinds.def +++ b/clang/include/clang/Basic/OpenMPKinds.def @@ -62,6 +62,9 @@ #ifndef OPENMP_ADJUST_ARGS_KIND #define OPENMP_ADJUST_ARGS_KIND(Name) #endif +#ifndef OPENMP_BIND_KIND +#define OPENMP_BIND_KIND(Name) +#endif // Static attributes for 'schedule' clause. OPENMP_SCHEDULE_KIND(static) @@ -156,6 +159,12 @@ OPENMP_REDUCTION_MODIFIER(task) OPENMP_ADJUST_ARGS_KIND(nothing) OPENMP_ADJUST_ARGS_KIND(need_device_ptr) +// Binding kinds for the 'bind' clause. +OPENMP_BIND_KIND(teams) +OPENMP_BIND_KIND(parallel) +OPENMP_BIND_KIND(thread) + +#undef OPENMP_BIND_KIND #undef OPENMP_ADJUST_ARGS_KIND #undef OPENMP_REDUCTION_MODIFIER #undef OPENMP_DEVICE_MODIFIER diff --git a/clang/include/clang/Basic/OpenMPKinds.h b/clang/include/clang/Basic/OpenMPKinds.h index 269f8d96f0bba..e95a717f268df 100644 --- a/clang/include/clang/Basic/OpenMPKinds.h +++ b/clang/include/clang/Basic/OpenMPKinds.h @@ -174,6 +174,13 @@ enum OpenMPAdjustArgsOpKind { OMPC_ADJUST_ARGS_unknown, }; +/// OpenMP bindings for the 'bind' clause. +enum OpenMPBindClauseKind { +#define OPENMP_BIND_KIND(Name) OMPC_BIND_##Name, +#include "clang/Basic/OpenMPKinds.def" + OMPC_BIND_unknown +}; + unsigned getOpenMPSimpleClauseType(OpenMPClauseKind Kind, llvm::StringRef Str, const LangOptions &LangOpts); const char *getOpenMPSimpleClauseTypeName(OpenMPClauseKind Kind, unsigned Type); diff --git a/clang/include/clang/Basic/TargetInfo.h b/clang/include/clang/Basic/TargetInfo.h index 77a510462a65e..3e1e09417c661 100644 --- a/clang/include/clang/Basic/TargetInfo.h +++ b/clang/include/clang/Basic/TargetInfo.h @@ -203,6 +203,8 @@ class TargetInfo : public virtual TransferrableTargetInfo, bool HasFloat16; bool HasBFloat16; bool HasIbm128; + bool HasLongDouble; + bool HasFPReturn; bool HasStrictFP; unsigned char MaxAtomicPromoteWidth, MaxAtomicInlineWidth; @@ -601,6 +603,13 @@ class TargetInfo : public virtual TransferrableTargetInfo, /// Determine whether the __ibm128 type is supported on this target. virtual bool hasIbm128Type() const { return HasIbm128; } + /// Determine whether the long double type is supported on this target. + virtual bool hasLongDoubleType() const { return HasLongDouble; } + + /// Determine whether return of a floating point value is supported + /// on this target. + virtual bool hasFPReturn() const { return HasFPReturn; } + /// Determine whether constrained floating point is supported on this target. virtual bool hasStrictFP() const { return HasStrictFP; } diff --git a/clang/include/clang/Basic/riscv_vector.td b/clang/include/clang/Basic/riscv_vector.td index bae4edb2d1b30..cc242da7f1ca8 100644 --- a/clang/include/clang/Basic/riscv_vector.td +++ b/clang/include/clang/Basic/riscv_vector.td @@ -1916,11 +1916,11 @@ defm vfwredosum : RVVFloatingWidenReductionBuiltin; // 16.1. Vector Mask-Register Logical Instructions def vmand : RVVMaskBinBuiltin; def vmnand : RVVMaskBinBuiltin; -def vmandnot : RVVMaskBinBuiltin; +def vmandn : RVVMaskBinBuiltin; def vmxor : RVVMaskBinBuiltin; def vmor : RVVMaskBinBuiltin; def vmnor : RVVMaskBinBuiltin; -def vmornot : RVVMaskBinBuiltin; +def vmorn : RVVMaskBinBuiltin; def vmxnor : RVVMaskBinBuiltin; // pseudoinstructions def vmclr : RVVMaskNullaryBuiltin; @@ -1929,8 +1929,8 @@ defm vmmv_m : RVVPseudoMaskBuiltin<"vmand", "c">; defm vmnot_m : RVVPseudoMaskBuiltin<"vmnand", "c">; let HasPolicy = false in { -// 16.2. Vector mask population count vpopc -def vpopc : RVVMaskOp0Builtin<"um">; +// 16.2. Vector count population in mask vcpop.m +def vcpop : RVVMaskOp0Builtin<"um">; // 16.3. vfirst find-first-set mask bit def vfirst : RVVMaskOp0Builtin<"lm">; diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 3c20d8b76eaa5..8c419d174e56a 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -620,12 +620,15 @@ def objcmt_migrate_designated_init : Flag<["-"], "objcmt-migrate-designated-init HelpText<"Enable migration to infer NS_DESIGNATED_INITIALIZER for initializer methods">, MarshallingInfoBitfieldFlag, "FrontendOptions::ObjCMT_DesignatedInitializer">; -def objcmt_whitelist_dir_path: Joined<["-"], "objcmt-whitelist-dir-path=">, Flags<[CC1Option]>, +def objcmt_allowlist_dir_path: Joined<["-"], "objcmt-allowlist-dir-path=">, Flags<[CC1Option]>, HelpText<"Only modify files with a filename contained in the provided directory path">, - MarshallingInfoString>; + MarshallingInfoString>; +def : Joined<["-"], "objcmt-whitelist-dir-path=">, Flags<[CC1Option]>, + HelpText<"Alias for -objcmt-allowlist-dir-path">, + Alias; // The misspelt "white-list" [sic] alias is due for removal. def : Joined<["-"], "objcmt-white-list-dir-path=">, Flags<[CC1Option]>, - Alias; + Alias; // Make sure all other -ccc- options are rejected. def ccc_ : Joined<["-"], "ccc-">, Group, Flags<[Unsupported]>; @@ -2356,9 +2359,9 @@ defm application_extension : BoolFOption<"application-extension", PosFlag, NegFlag>; defm relaxed_template_template_args : BoolFOption<"relaxed-template-template-args", - LangOpts<"RelaxedTemplateTemplateArgs">, DefaultTrue, - PosFlag, - NegFlag>; + LangOpts<"RelaxedTemplateTemplateArgs">, DefaultFalse, + PosFlag, + NegFlag>; defm sized_deallocation : BoolFOption<"sized-deallocation", LangOpts<"SizedDeallocation">, DefaultFalse, PosFlag, diff --git a/clang/include/clang/Frontend/FrontendOptions.h b/clang/include/clang/Frontend/FrontendOptions.h index 41ea45ca0b103..1d9d89a28c6c4 100644 --- a/clang/include/clang/Frontend/FrontendOptions.h +++ b/clang/include/clang/Frontend/FrontendOptions.h @@ -373,7 +373,7 @@ class FrontendOptions { ObjCMT_MigrateDecls | ObjCMT_PropertyDotSyntax) }; unsigned ObjCMTAction = ObjCMT_None; - std::string ObjCMTWhiteListPath; + std::string ObjCMTAllowListPath; std::string MTMigrateDir; std::string ARCMTMigrateReportOut; diff --git a/clang/include/clang/Lex/DependencyDirectivesSourceMinimizer.h b/clang/include/clang/Lex/DependencyDirectivesSourceMinimizer.h index 9bb820156c252..121ca893e314f 100644 --- a/clang/include/clang/Lex/DependencyDirectivesSourceMinimizer.h +++ b/clang/include/clang/Lex/DependencyDirectivesSourceMinimizer.h @@ -39,6 +39,9 @@ enum TokenKind { pp_import, pp_pragma_import, pp_pragma_once, + pp_pragma_push_macro, + pp_pragma_pop_macro, + pp_pragma_include_alias, pp_include_next, pp_if, pp_ifdef, diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h index 733853e0d2dd5..8ef599c5cbf4b 100644 --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -1304,6 +1304,10 @@ class Sema final { /// The C++ "std::coroutine_traits" template, which is defined in /// \ ClassTemplateDecl *StdCoroutineTraitsCache; + /// The namespace where coroutine components are defined. In standard, + /// they are defined in std namespace. And in the previous implementation, + /// they are defined in std::experimental namespace. + NamespaceDecl *CoroTraitsNamespaceCache; /// The C++ "type_info" declaration, which is defined in \. RecordDecl *CXXTypeInfoDecl; @@ -5964,6 +5968,7 @@ class Sema final { NamespaceDecl *getOrCreateStdNamespace(); NamespaceDecl *lookupStdExperimentalNamespace(); + NamespaceDecl *getCachedCoroNamespace() { return CoroTraitsNamespaceCache; } CXXRecordDecl *getStdBadAlloc() const; EnumDecl *getStdAlignValT() const; @@ -10614,8 +10619,11 @@ class Sema final { bool buildCoroutineParameterMoves(SourceLocation Loc); VarDecl *buildCoroutinePromise(SourceLocation Loc); void CheckCompletedCoroutineBody(FunctionDecl *FD, Stmt *&Body); + /// Lookup 'coroutine_traits' in std namespace and std::experimental + /// namespace. The namespace found is recorded in Namespace. ClassTemplateDecl *lookupCoroutineTraits(SourceLocation KwLoc, - SourceLocation FuncLoc); + SourceLocation FuncLoc, + NamespaceDecl *&Namespace); /// Check that the expression co_await promise.final_suspend() shall not be /// potentially-throwing. bool checkFinalSuspendNoThrow(const Stmt *FinalSuspend); @@ -11765,6 +11773,12 @@ class Sema final { SourceLocation ColonLoc, SourceLocation EndLoc, Expr *Modifier, ArrayRef Locators); + /// Called on a well-formed 'bind' clause. + OMPClause *ActOnOpenMPBindClause(OpenMPBindClauseKind Kind, + SourceLocation KindLoc, + SourceLocation StartLoc, + SourceLocation LParenLoc, + SourceLocation EndLoc); /// The kind of conversion being performed. enum CheckedConversionKind { @@ -13101,6 +13115,7 @@ class Sema final { bool SemaBuiltinElementwiseMath(CallExpr *TheCall); bool SemaBuiltinElementwiseMathOneArg(CallExpr *TheCall); + bool SemaBuiltinReduceMath(CallExpr *TheCall); // Matrix builtin handling. ExprResult SemaBuiltinMatrixTranspose(CallExpr *TheCall, diff --git a/clang/lib/ARCMigrate/ObjCMT.cpp b/clang/lib/ARCMigrate/ObjCMT.cpp index e99c6435062fb..3dfa9a0218a73 100644 --- a/clang/lib/ARCMigrate/ObjCMT.cpp +++ b/clang/lib/ARCMigrate/ObjCMT.cpp @@ -104,7 +104,7 @@ class ObjCMigrateASTConsumer : public ASTConsumer { bool FoundationIncluded; llvm::SmallPtrSet ObjCProtocolDecls; llvm::SmallVector CFFunctionIBCandidates; - llvm::StringSet<> WhiteListFilenames; + llvm::StringSet<> AllowListFilenames; RetainSummaryManager &getSummaryManager(ASTContext &Ctx) { if (!Summaries) @@ -118,14 +118,14 @@ class ObjCMigrateASTConsumer : public ASTConsumer { FileRemapper &remapper, FileManager &fileMgr, const PPConditionalDirectiveRecord *PPRec, Preprocessor &PP, bool isOutputFile, - ArrayRef WhiteList) + ArrayRef AllowList) : MigrateDir(migrateDir), ASTMigrateActions(astMigrateActions), NSIntegerTypedefed(nullptr), NSUIntegerTypedefed(nullptr), Remapper(remapper), FileMgr(fileMgr), PPRec(PPRec), PP(PP), IsOutputFile(isOutputFile), FoundationIncluded(false) { // FIXME: StringSet should have insert(iter, iter) to use here. - for (const std::string &Val : WhiteList) - WhiteListFilenames.insert(Val); + for (const std::string &Val : AllowList) + AllowListFilenames.insert(Val); } protected: @@ -151,10 +151,10 @@ class ObjCMigrateASTConsumer : public ASTConsumer { void HandleTranslationUnit(ASTContext &Ctx) override; bool canModifyFile(StringRef Path) { - if (WhiteListFilenames.empty()) + if (AllowListFilenames.empty()) return true; - return WhiteListFilenames.find(llvm::sys::path::filename(Path)) - != WhiteListFilenames.end(); + return AllowListFilenames.find(llvm::sys::path::filename(Path)) != + AllowListFilenames.end(); } bool canModifyFile(Optional FE) { if (!FE) @@ -1986,7 +1986,7 @@ bool MigrateSourceAction::BeginInvocation(CompilerInstance &CI) { return true; } -static std::vector getWhiteListFilenames(StringRef DirPath) { +static std::vector getAllowListFilenames(StringRef DirPath) { using namespace llvm::sys::fs; using namespace llvm::sys::path; @@ -2017,16 +2017,16 @@ MigrateSourceAction::CreateASTConsumer(CompilerInstance &CI, StringRef InFile) { if (ObjCMTOpts == FrontendOptions::ObjCMT_None) { // If no specific option was given, enable literals+subscripting transforms // by default. - ObjCMTAction |= FrontendOptions::ObjCMT_Literals | - FrontendOptions::ObjCMT_Subscripting; + ObjCMTAction |= + FrontendOptions::ObjCMT_Literals | FrontendOptions::ObjCMT_Subscripting; } CI.getPreprocessor().addPPCallbacks(std::unique_ptr(PPRec)); - std::vector WhiteList = - getWhiteListFilenames(CI.getFrontendOpts().ObjCMTWhiteListPath); + std::vector AllowList = + getAllowListFilenames(CI.getFrontendOpts().ObjCMTAllowListPath); return std::make_unique( CI.getFrontendOpts().OutputFile, ObjCMTAction, Remapper, CI.getFileManager(), PPRec, CI.getPreprocessor(), - /*isOutputFile=*/true, WhiteList); + /*isOutputFile=*/true, AllowList); } namespace { diff --git a/clang/lib/AST/ASTDumper.cpp b/clang/lib/AST/ASTDumper.cpp index 3d368a0a7b63e..c6df61f79e2e8 100644 --- a/clang/lib/AST/ASTDumper.cpp +++ b/clang/lib/AST/ASTDumper.cpp @@ -90,15 +90,7 @@ void ASTDumper::dumpTemplateDeclSpecialization(const SpecializationDecl *D, // FIXME: The redecls() range sometimes has elements of a less-specific // type. (In particular, ClassTemplateSpecializationDecl::redecls() gives // us TagDecls, and should give CXXRecordDecls). - auto *Redecl = dyn_cast(RedeclWithBadType); - if (!Redecl) { - // Found the injected-class-name for a class template. This will be dumped - // as part of its surrounding class so we don't need to dump it here. - assert(isa(RedeclWithBadType) && - "expected an injected-class-name"); - continue; - } - + auto *Redecl = cast(RedeclWithBadType); switch (Redecl->getTemplateSpecializationKind()) { case TSK_ExplicitInstantiationDeclaration: case TSK_ExplicitInstantiationDefinition: diff --git a/clang/lib/AST/ASTTypeTraits.cpp b/clang/lib/AST/ASTTypeTraits.cpp index 88f86cbb367ac..b333f4618efb8 100644 --- a/clang/lib/AST/ASTTypeTraits.cpp +++ b/clang/lib/AST/ASTTypeTraits.cpp @@ -26,6 +26,7 @@ const ASTNodeKind::KindInfo ASTNodeKind::AllKindInfo[] = { {NKI_None, ""}, {NKI_None, "TemplateArgument"}, {NKI_None, "TemplateArgumentLoc"}, + {NKI_None, "LambdaCapture"}, {NKI_None, "TemplateName"}, {NKI_None, "NestedNameSpecifierLoc"}, {NKI_None, "QualType"}, diff --git a/clang/lib/AST/Decl.cpp b/clang/lib/AST/Decl.cpp index 5278dbcfd6553..68293b02235b6 100644 --- a/clang/lib/AST/Decl.cpp +++ b/clang/lib/AST/Decl.cpp @@ -1668,8 +1668,7 @@ void NamedDecl::printNestedNameSpecifier(raw_ostream &OS, if (WithGlobalNsPrefix) OS << "::"; - for (unsigned I = Contexts.size(); I != 0; --I) { - const DeclContext *DC = Contexts[I - 1]; + for (const DeclContext *DC : llvm::reverse(Contexts)) { if (const auto *Spec = dyn_cast(DC)) { OS << Spec->getName(); const TemplateArgumentList &TemplateArgs = Spec->getTemplateArgs(); diff --git a/clang/lib/AST/DeclPrinter.cpp b/clang/lib/AST/DeclPrinter.cpp index 78d7b3a3543b0..3ba750ae128b4 100644 --- a/clang/lib/AST/DeclPrinter.cpp +++ b/clang/lib/AST/DeclPrinter.cpp @@ -1111,9 +1111,9 @@ void DeclPrinter::printTemplateArguments(ArrayRef Args, if (TemplOverloaded || !Params) Args[I].print(Policy, Out, /*IncludeType*/ true); else - Args[I].print( - Policy, Out, - TemplateParameterList::shouldIncludeTypeForArgument(Params, I)); + Args[I].print(Policy, Out, + TemplateParameterList::shouldIncludeTypeForArgument( + Policy, Params, I)); } Out << ">"; } @@ -1130,7 +1130,8 @@ void DeclPrinter::printTemplateArguments(ArrayRef Args, else Args[I].getArgument().print( Policy, Out, - TemplateParameterList::shouldIncludeTypeForArgument(Params, I)); + TemplateParameterList::shouldIncludeTypeForArgument(Policy, Params, + I)); } Out << ">"; } diff --git a/clang/lib/AST/DeclTemplate.cpp b/clang/lib/AST/DeclTemplate.cpp index fa73c53866490..10f7155fcb969 100644 --- a/clang/lib/AST/DeclTemplate.cpp +++ b/clang/lib/AST/DeclTemplate.cpp @@ -202,8 +202,9 @@ bool TemplateParameterList::hasAssociatedConstraints() const { } bool TemplateParameterList::shouldIncludeTypeForArgument( - const TemplateParameterList *TPL, unsigned Idx) { - if (!TPL || Idx >= TPL->size()) + const PrintingPolicy &Policy, const TemplateParameterList *TPL, + unsigned Idx) { + if (!TPL || Idx >= TPL->size() || Policy.UseIntegerTypeSuffixesAlways) return true; const NamedDecl *TemplParam = TPL->getParam(Idx); if (const auto *ParamValueDecl = diff --git a/clang/lib/AST/Expr.cpp b/clang/lib/AST/Expr.cpp index 92bec7aa3e4a5..2dd3d28419069 100644 --- a/clang/lib/AST/Expr.cpp +++ b/clang/lib/AST/Expr.cpp @@ -807,19 +807,18 @@ std::string PredefinedExpr::ComputeName(IdentKind IK, const Decl *CurrentDecl) { std::string TemplateParams; llvm::raw_string_ostream TOut(TemplateParams); - for (SpecsTy::reverse_iterator I = Specs.rbegin(), E = Specs.rend(); - I != E; ++I) { - const TemplateParameterList *Params - = (*I)->getSpecializedTemplate()->getTemplateParameters(); - const TemplateArgumentList &Args = (*I)->getTemplateArgs(); + for (const ClassTemplateSpecializationDecl *D : llvm::reverse(Specs)) { + const TemplateParameterList *Params = + D->getSpecializedTemplate()->getTemplateParameters(); + const TemplateArgumentList &Args = D->getTemplateArgs(); assert(Params->size() == Args.size()); for (unsigned i = 0, numParams = Params->size(); i != numParams; ++i) { StringRef Param = Params->getParam(i)->getName(); if (Param.empty()) continue; TOut << Param << " = "; - Args.get(i).print( - Policy, TOut, - TemplateParameterList::shouldIncludeTypeForArgument(Params, i)); + Args.get(i).print(Policy, TOut, + TemplateParameterList::shouldIncludeTypeForArgument( + Policy, Params, i)); TOut << ", "; } } @@ -2415,10 +2414,8 @@ SourceLocation InitListExpr::getEndLoc() const { SourceLocation End = RBraceLoc; if (End.isInvalid()) { // Find the first non-null initializer from the end. - for (InitExprsTy::const_reverse_iterator I = InitExprs.rbegin(), - E = InitExprs.rend(); - I != E; ++I) { - if (Stmt *S = *I) { + for (Stmt *S : llvm::reverse(InitExprs)) { + if (S) { End = S->getEndLoc(); break; } diff --git a/clang/lib/AST/ItaniumMangle.cpp b/clang/lib/AST/ItaniumMangle.cpp index 167738e40e9ea..e79db56ee84e7 100644 --- a/clang/lib/AST/ItaniumMangle.cpp +++ b/clang/lib/AST/ItaniumMangle.cpp @@ -3580,7 +3580,7 @@ void CXXNameMangler::mangleAArch64NeonVectorType(const DependentVectorType *T) { // mangling scheme, it will be specified in the next revision. The mangling // scheme is otherwise defined in the appendices to the Procedure Call Standard // for the Arm Architecture, see -// https://github.com/ARM-software/abi-aa/blob/master/aapcs64/aapcs64.rst#appendix-c-mangling +// https://github.com/ARM-software/abi-aa/blob/main/aapcs64/aapcs64.rst#appendix-c-mangling void CXXNameMangler::mangleAArch64FixedSveVectorType(const VectorType *T) { assert((T->getVectorKind() == VectorType::SveFixedLengthDataVector || T->getVectorKind() == VectorType::SveFixedLengthPredicateVector) && diff --git a/clang/lib/AST/MicrosoftMangle.cpp b/clang/lib/AST/MicrosoftMangle.cpp index 3581b501d821c..60855793cb60b 100644 --- a/clang/lib/AST/MicrosoftMangle.cpp +++ b/clang/lib/AST/MicrosoftMangle.cpp @@ -2847,8 +2847,8 @@ void MicrosoftCXXNameMangler::mangleArtificialTagType( // Always start with the unqualified name. mangleSourceName(UnqualifiedName); - for (auto I = NestedNames.rbegin(), E = NestedNames.rend(); I != E; ++I) - mangleSourceName(*I); + for (StringRef N : llvm::reverse(NestedNames)) + mangleSourceName(N); // Terminate the whole name with an '@'. Out << '@'; diff --git a/clang/lib/AST/OpenMPClause.cpp b/clang/lib/AST/OpenMPClause.cpp index 176c040727831..ddc31aa9e93fe 100644 --- a/clang/lib/AST/OpenMPClause.cpp +++ b/clang/lib/AST/OpenMPClause.cpp @@ -161,6 +161,7 @@ const OMPClauseWithPreInit *OMPClauseWithPreInit::get(const OMPClause *C) { case OMPC_uses_allocators: case OMPC_affinity: case OMPC_when: + case OMPC_bind: break; default: break; @@ -259,6 +260,7 @@ const OMPClauseWithPostUpdate *OMPClauseWithPostUpdate::get(const OMPClause *C) case OMPC_uses_allocators: case OMPC_affinity: case OMPC_when: + case OMPC_bind: break; default: break; @@ -1586,6 +1588,16 @@ OMPInitClause *OMPInitClause::CreateEmpty(const ASTContext &C, unsigned N) { return new (Mem) OMPInitClause(N); } +OMPBindClause * +OMPBindClause::Create(const ASTContext &C, OpenMPBindClauseKind K, + SourceLocation KLoc, SourceLocation StartLoc, + SourceLocation LParenLoc, SourceLocation EndLoc) { + return new (C) OMPBindClause(K, KLoc, StartLoc, LParenLoc, EndLoc); +} + +OMPBindClause *OMPBindClause::CreateEmpty(const ASTContext &C) { + return new (C) OMPBindClause(); +} //===----------------------------------------------------------------------===// // OpenMP clauses printing methods //===----------------------------------------------------------------------===// @@ -2297,6 +2309,12 @@ void OMPClausePrinter::VisitOMPFilterClause(OMPFilterClause *Node) { OS << ")"; } +void OMPClausePrinter::VisitOMPBindClause(OMPBindClause *Node) { + OS << "bind(" + << getOpenMPSimpleClauseTypeName(OMPC_bind, unsigned(Node->getBindKind())) + << ")"; +} + void OMPTraitInfo::getAsVariantMatchInfo(ASTContext &ASTCtx, VariantMatchInfo &VMI) const { for (const OMPTraitSet &Set : Sets) { diff --git a/clang/lib/AST/StmtProfile.cpp b/clang/lib/AST/StmtProfile.cpp index b941e76f77d27..d48c50ffba8cd 100644 --- a/clang/lib/AST/StmtProfile.cpp +++ b/clang/lib/AST/StmtProfile.cpp @@ -878,6 +878,7 @@ void OMPClauseProfiler::VisitOMPAffinityClause(const OMPAffinityClause *C) { Profiler->VisitStmt(E); } void OMPClauseProfiler::VisitOMPOrderClause(const OMPOrderClause *C) {} +void OMPClauseProfiler::VisitOMPBindClause(const OMPBindClause *C) {} } // namespace void diff --git a/clang/lib/AST/TypePrinter.cpp b/clang/lib/AST/TypePrinter.cpp index b3d8a103991eb..5eedbc00092db 100644 --- a/clang/lib/AST/TypePrinter.cpp +++ b/clang/lib/AST/TypePrinter.cpp @@ -1765,6 +1765,9 @@ void TypePrinter::printAttributedAfter(const AttributedType *T, case attr::ArmMveStrictPolymorphism: OS << "__clang_arm_mve_strict_polymorphism"; break; + case attr::BTFTypeTag: + OS << "btf_type_tag"; + break; } OS << "))"; } @@ -2058,9 +2061,9 @@ printTo(raw_ostream &OS, ArrayRef Args, const PrintingPolicy &Policy, if (!FirstArg) OS << Comma; // Tries to print the argument with location info if exists. - printArgument( - Arg, Policy, ArgOS, - TemplateParameterList::shouldIncludeTypeForArgument(TPL, ParmIndex)); + printArgument(Arg, Policy, ArgOS, + TemplateParameterList::shouldIncludeTypeForArgument( + Policy, TPL, ParmIndex)); } StringRef ArgString = ArgOS.str(); diff --git a/clang/lib/AST/VTableBuilder.cpp b/clang/lib/AST/VTableBuilder.cpp index ab18d2f9e1f2e..f938565c3cb4e 100644 --- a/clang/lib/AST/VTableBuilder.cpp +++ b/clang/lib/AST/VTableBuilder.cpp @@ -1418,8 +1418,7 @@ FindNearestOverriddenMethod(const CXXMethodDecl *MD, OverriddenMethodsSetTy OverriddenMethods; ComputeAllOverriddenMethods(MD, OverriddenMethods); - for (const CXXRecordDecl *PrimaryBase : - llvm::make_range(Bases.rbegin(), Bases.rend())) { + for (const CXXRecordDecl *PrimaryBase : llvm::reverse(Bases)) { // Now check the overridden methods. for (const CXXMethodDecl *OverriddenMD : OverriddenMethods) { // We found our overridden method. @@ -3098,8 +3097,7 @@ void VFTableBuilder::AddMethods(BaseSubobject Base, unsigned BaseDepth, } static void PrintBasePath(const VPtrInfo::BasePath &Path, raw_ostream &Out) { - for (const CXXRecordDecl *Elem : - llvm::make_range(Path.rbegin(), Path.rend())) { + for (const CXXRecordDecl *Elem : llvm::reverse(Path)) { Out << "'"; Elem->printQualifiedName(Out); Out << "' in "; diff --git a/clang/lib/ASTMatchers/ASTMatchersInternal.cpp b/clang/lib/ASTMatchers/ASTMatchersInternal.cpp index b7622e3b51f1e..7680eb38283e7 100644 --- a/clang/lib/ASTMatchers/ASTMatchersInternal.cpp +++ b/clang/lib/ASTMatchers/ASTMatchersInternal.cpp @@ -768,6 +768,7 @@ const internal::VariadicDynCastAllOfMatcher const internal::VariadicDynCastAllOfMatcher templateTemplateParmDecl; +const internal::VariadicAllOfMatcher lambdaCapture; const internal::VariadicAllOfMatcher qualType; const internal::VariadicAllOfMatcher type; const internal::VariadicAllOfMatcher typeLoc; diff --git a/clang/lib/ASTMatchers/Dynamic/Registry.cpp b/clang/lib/ASTMatchers/Dynamic/Registry.cpp index 7b52a083f54c9..395051e0dfae6 100644 --- a/clang/lib/ASTMatchers/Dynamic/Registry.cpp +++ b/clang/lib/ASTMatchers/Dynamic/Registry.cpp @@ -106,7 +106,6 @@ RegistryMaps::RegistryMaps() { std::make_unique()); REGISTER_OVERLOADED_2(callee); - REGISTER_OVERLOADED_2(hasAnyCapture); REGISTER_OVERLOADED_2(hasPrefix); REGISTER_OVERLOADED_2(hasType); REGISTER_OVERLOADED_2(ignoringParens); @@ -157,6 +156,8 @@ RegistryMaps::RegistryMaps() { REGISTER_MATCHER(builtinType); REGISTER_MATCHER(cStyleCastExpr); REGISTER_MATCHER(callExpr); + REGISTER_MATCHER(capturesThis); + REGISTER_MATCHER(capturesVar); REGISTER_MATCHER(caseStmt); REGISTER_MATCHER(castExpr); REGISTER_MATCHER(characterLiteral); @@ -264,6 +265,7 @@ RegistryMaps::RegistryMaps() { REGISTER_MATCHER(hasAnyBase); REGISTER_MATCHER(hasAnyBinding); REGISTER_MATCHER(hasAnyBody); + REGISTER_MATCHER(hasAnyCapture); REGISTER_MATCHER(hasAnyClause); REGISTER_MATCHER(hasAnyConstructorInitializer); REGISTER_MATCHER(hasAnyDeclaration); @@ -465,6 +467,7 @@ RegistryMaps::RegistryMaps() { REGISTER_MATCHER(lValueReferenceType); REGISTER_MATCHER(labelDecl); REGISTER_MATCHER(labelStmt); + REGISTER_MATCHER(lambdaCapture); REGISTER_MATCHER(lambdaExpr); REGISTER_MATCHER(linkageSpecDecl); REGISTER_MATCHER(materializeTemporaryExpr); diff --git a/clang/lib/Analysis/CFG.cpp b/clang/lib/Analysis/CFG.cpp index 11987dfd55656..abf65e3efce98 100644 --- a/clang/lib/Analysis/CFG.cpp +++ b/clang/lib/Analysis/CFG.cpp @@ -1801,16 +1801,11 @@ void CFGBuilder::addLifetimeEnds(LocalScope::const_iterator B, autoCreateBlock(); // object with trivial destructor end their lifetime last (when storage // duration ends) - for (SmallVectorImpl::reverse_iterator I = DeclsTrivial.rbegin(), - E = DeclsTrivial.rend(); - I != E; ++I) - appendLifetimeEnds(Block, *I, S); + for (VarDecl *VD : llvm::reverse(DeclsTrivial)) + appendLifetimeEnds(Block, VD, S); - for (SmallVectorImpl::reverse_iterator - I = DeclsNonTrivial.rbegin(), - E = DeclsNonTrivial.rend(); - I != E; ++I) - appendLifetimeEnds(Block, *I, S); + for (VarDecl *VD : llvm::reverse(DeclsNonTrivial)) + appendLifetimeEnds(Block, VD, S); } /// Add to current block markers for ending scopes. @@ -1823,9 +1818,8 @@ void CFGBuilder::addScopesEnd(LocalScope::const_iterator B, autoCreateBlock(); - for (auto I = DeclsWithEndedScope.rbegin(), E = DeclsWithEndedScope.rend(); - I != E; ++I) - appendScopeEnd(Block, *I, S); + for (VarDecl *VD : llvm::reverse(DeclsWithEndedScope)) + appendScopeEnd(Block, VD, S); return; } @@ -1850,24 +1844,22 @@ void CFGBuilder::addAutomaticObjDtors(LocalScope::const_iterator B, for (LocalScope::const_iterator I = B; I != E; ++I) Decls.push_back(*I); - for (SmallVectorImpl::reverse_iterator I = Decls.rbegin(), - E = Decls.rend(); - I != E; ++I) { - if (hasTrivialDestructor(*I)) { + for (VarDecl *VD : llvm::reverse(Decls)) { + if (hasTrivialDestructor(VD)) { // If AddScopes is enabled and *I is a first variable in a scope, add a // ScopeEnd marker in a Block. - if (BuildOpts.AddScopes && DeclsWithEndedScope.count(*I)) { + if (BuildOpts.AddScopes && DeclsWithEndedScope.count(VD)) { autoCreateBlock(); - appendScopeEnd(Block, *I, S); + appendScopeEnd(Block, VD, S); } continue; } // If this destructor is marked as a no-return destructor, we need to // create a new block for the destructor which does not have as a successor // anything built thus far: control won't flow out of this block. - QualType Ty = (*I)->getType(); + QualType Ty = VD->getType(); if (Ty->isReferenceType()) { - Ty = getReferenceInitTemporaryType((*I)->getInit()); + Ty = getReferenceInitTemporaryType(VD->getInit()); } Ty = Context->getBaseElementType(Ty); @@ -1877,9 +1869,9 @@ void CFGBuilder::addAutomaticObjDtors(LocalScope::const_iterator B, autoCreateBlock(); // Add ScopeEnd just after automatic obj destructor. - if (BuildOpts.AddScopes && DeclsWithEndedScope.count(*I)) - appendScopeEnd(Block, *I, S); - appendAutomaticObjDtor(Block, *I, S); + if (BuildOpts.AddScopes && DeclsWithEndedScope.count(VD)) + appendScopeEnd(Block, VD, S); + appendAutomaticObjDtor(Block, VD, S); } } diff --git a/clang/lib/Analysis/CloneDetection.cpp b/clang/lib/Analysis/CloneDetection.cpp index 0a1122bd5a4af..65ac4ad6a5e53 100644 --- a/clang/lib/Analysis/CloneDetection.cpp +++ b/clang/lib/Analysis/CloneDetection.cpp @@ -147,9 +147,8 @@ void OnlyLargestCloneConstraint::constrain( // Erasing a list of indexes from the vector should be done with decreasing // indexes. As IndexesToRemove is constructed with increasing values, we just // reverse iterate over it to get the desired order. - for (auto I = IndexesToRemove.rbegin(); I != IndexesToRemove.rend(); ++I) { - Result.erase(Result.begin() + *I); - } + for (unsigned I : llvm::reverse(IndexesToRemove)) + Result.erase(Result.begin() + I); } bool FilenamePatternConstraint::isAutoGenerated( diff --git a/clang/lib/Analysis/ReachableCode.cpp b/clang/lib/Analysis/ReachableCode.cpp index be4ab0be373ee..5be8180113da8 100644 --- a/clang/lib/Analysis/ReachableCode.cpp +++ b/clang/lib/Analysis/ReachableCode.cpp @@ -87,10 +87,8 @@ static bool isDeadReturn(const CFGBlock *B, const Stmt *S) { // block, or may be in a subsequent block because of destructors. const CFGBlock *Current = B; while (true) { - for (CFGBlock::const_reverse_iterator I = Current->rbegin(), - E = Current->rend(); - I != E; ++I) { - if (Optional CS = I->getAs()) { + for (const CFGElement &CE : llvm::reverse(*Current)) { + if (Optional CS = CE.getAs()) { if (const ReturnStmt *RS = dyn_cast(CS->getStmt())) { if (RS == S) return true; diff --git a/clang/lib/Basic/OpenMPKinds.cpp b/clang/lib/Basic/OpenMPKinds.cpp index 5b01bf863bb0e..9e74e05bd8635 100644 --- a/clang/lib/Basic/OpenMPKinds.cpp +++ b/clang/lib/Basic/OpenMPKinds.cpp @@ -130,6 +130,11 @@ unsigned clang::getOpenMPSimpleClauseType(OpenMPClauseKind Kind, StringRef Str, #define OPENMP_ADJUST_ARGS_KIND(Name) .Case(#Name, OMPC_ADJUST_ARGS_##Name) #include "clang/Basic/OpenMPKinds.def" .Default(OMPC_ADJUST_ARGS_unknown); + case OMPC_bind: + return llvm::StringSwitch(Str) +#define OPENMP_BIND_KIND(Name) .Case(#Name, OMPC_BIND_##Name) +#include "clang/Basic/OpenMPKinds.def" + .Default(OMPC_BIND_unknown); case OMPC_unknown: case OMPC_threadprivate: case OMPC_if: @@ -385,6 +390,16 @@ const char *clang::getOpenMPSimpleClauseTypeName(OpenMPClauseKind Kind, #include "clang/Basic/OpenMPKinds.def" } llvm_unreachable("Invalid OpenMP 'adjust_args' clause kind"); + case OMPC_bind: + switch (Type) { + case OMPC_BIND_unknown: + return "unknown"; +#define OPENMP_BIND_KIND(Name) \ + case OMPC_BIND_##Name: \ + return #Name; +#include "clang/Basic/OpenMPKinds.def" + } + llvm_unreachable("Invalid OpenMP 'bind' clause type"); case OMPC_unknown: case OMPC_threadprivate: case OMPC_if: diff --git a/clang/lib/Basic/TargetInfo.cpp b/clang/lib/Basic/TargetInfo.cpp index 514893b47b4f4..646bbe8b73873 100644 --- a/clang/lib/Basic/TargetInfo.cpp +++ b/clang/lib/Basic/TargetInfo.cpp @@ -37,6 +37,8 @@ TargetInfo::TargetInfo(const llvm::Triple &T) : TargetOpts(), Triple(T) { HasIbm128 = false; HasFloat16 = false; HasBFloat16 = false; + HasLongDouble = true; + HasFPReturn = true; HasStrictFP = false; PointerWidth = PointerAlign = 32; BoolWidth = BoolAlign = 8; @@ -300,8 +302,11 @@ FloatModeKind TargetInfo::getRealTypeByWidth(unsigned BitWidth, if (ExplicitType == FloatModeKind::Ibm128) return hasIbm128Type() ? FloatModeKind::Ibm128 : FloatModeKind::NoFloat; - if (ExplicitType == FloatModeKind::LongDouble) - return ExplicitType; + if (&getLongDoubleFormat() == &llvm::APFloat::PPCDoubleDouble() || + &getLongDoubleFormat() == &llvm::APFloat::IEEEquad()) + return FloatModeKind::LongDouble; + if (hasFloat128Type()) + return FloatModeKind::Float128; break; } diff --git a/clang/lib/Basic/Targets.cpp b/clang/lib/Basic/Targets.cpp index d588eefbd8a43..aee6e8467d539 100644 --- a/clang/lib/Basic/Targets.cpp +++ b/clang/lib/Basic/Targets.cpp @@ -630,6 +630,19 @@ TargetInfo *AllocateTarget(const llvm::Triple &Triple, } } + case llvm::Triple::spirv32: { + if (os != llvm::Triple::UnknownOS || + Triple.getEnvironment() != llvm::Triple::UnknownEnvironment) + return nullptr; + return new SPIRV32TargetInfo(Triple, Opts); + } + case llvm::Triple::spirv64: { + if (os != llvm::Triple::UnknownOS || + Triple.getEnvironment() != llvm::Triple::UnknownEnvironment) + return nullptr; + return new SPIRV64TargetInfo(Triple, Opts); + } + case llvm::Triple::wasm32: if (Triple.getSubArch() != llvm::Triple::NoSubArch || Triple.getVendor() != llvm::Triple::UnknownVendor || diff --git a/clang/lib/Basic/Targets/SPIR.cpp b/clang/lib/Basic/Targets/SPIR.cpp index 9b7aab85314a9..09d482a8b9ef5 100644 --- a/clang/lib/Basic/Targets/SPIR.cpp +++ b/clang/lib/Basic/Targets/SPIR.cpp @@ -1,4 +1,4 @@ -//===--- SPIR.cpp - Implement SPIR target feature support -----------------===// +//===--- SPIR.cpp - Implement SPIR and SPIR-V target feature support ------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// This file implements SPIR TargetInfo objects. +// This file implements SPIR and SPIR-V TargetInfo objects. // //===----------------------------------------------------------------------===// @@ -32,3 +32,20 @@ void SPIR64TargetInfo::getTargetDefines(const LangOptions &Opts, SPIRTargetInfo::getTargetDefines(Opts, Builder); DefineStd(Builder, "SPIR64", Opts); } + +void SPIRVTargetInfo::getTargetDefines(const LangOptions &Opts, + MacroBuilder &Builder) const { + DefineStd(Builder, "SPIRV", Opts); +} + +void SPIRV32TargetInfo::getTargetDefines(const LangOptions &Opts, + MacroBuilder &Builder) const { + SPIRVTargetInfo::getTargetDefines(Opts, Builder); + DefineStd(Builder, "SPIRV32", Opts); +} + +void SPIRV64TargetInfo::getTargetDefines(const LangOptions &Opts, + MacroBuilder &Builder) const { + SPIRVTargetInfo::getTargetDefines(Opts, Builder); + DefineStd(Builder, "SPIRV64", Opts); +} diff --git a/clang/lib/Basic/Targets/SPIR.h b/clang/lib/Basic/Targets/SPIR.h index 7c5e6e5fdfe20..608f7edbb4022 100644 --- a/clang/lib/Basic/Targets/SPIR.h +++ b/clang/lib/Basic/Targets/SPIR.h @@ -1,4 +1,4 @@ -//===--- SPIR.h - Declare SPIR target feature support -----------*- C++ -*-===// +//===--- SPIR.h - Declare SPIR and SPIR-V target feature support *- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// This file declares SPIR TargetInfo objects. +// This file declares SPIR and SPIR-V TargetInfo objects. // //===----------------------------------------------------------------------===// @@ -23,6 +23,7 @@ namespace clang { namespace targets { +// Used by both the SPIR and SPIR-V targets. static const unsigned SPIRDefIsPrivMap[] = { 0, // Default 1, // opencl_global @@ -46,6 +47,7 @@ static const unsigned SPIRDefIsPrivMap[] = { 0 // ptr64 }; +// Used by both the SPIR and SPIR-V targets. static const unsigned SPIRDefIsGenMap[] = { 4, // Default // OpenCL address space values for this map are dummy and they can't be used @@ -72,9 +74,10 @@ static const unsigned SPIRDefIsGenMap[] = { 0 // ptr64 }; -class LLVM_LIBRARY_VISIBILITY SPIRTargetInfo : public TargetInfo { -public: - SPIRTargetInfo(const llvm::Triple &Triple, const TargetOptions &) +// Base class for SPIR and SPIR-V target info. +class LLVM_LIBRARY_VISIBILITY BaseSPIRTargetInfo : public TargetInfo { +protected: + BaseSPIRTargetInfo(const llvm::Triple &Triple, const TargetOptions &) : TargetInfo(Triple) { TLSSupported = false; VLASupported = false; @@ -88,13 +91,7 @@ class LLVM_LIBRARY_VISIBILITY SPIRTargetInfo : public TargetInfo { NoAsmVariants = true; } - void getTargetDefines(const LangOptions &Opts, - MacroBuilder &Builder) const override; - - bool hasFeature(StringRef Feature) const override { - return Feature == "spir"; - } - +public: // SPIR supports the half type and the only llvm intrinsic allowed in SPIR is // memcpy as per section 3 of the SPIR spec. bool useFP16ConversionIntrinsics() const override { return false; } @@ -150,7 +147,7 @@ class LLVM_LIBRARY_VISIBILITY SPIRTargetInfo : public TargetInfo { void setSupportedOpenCLOpts() override { // Assume all OpenCL extensions and optional core features are supported - // for SPIR since it is a generic target. + // for SPIR and SPIR-V since they are generic targets. supportAllOpenCLOpts(); } @@ -159,6 +156,20 @@ class LLVM_LIBRARY_VISIBILITY SPIRTargetInfo : public TargetInfo { bool hasInt128Type() const override { return false; } }; +class LLVM_LIBRARY_VISIBILITY SPIRTargetInfo : public BaseSPIRTargetInfo { +public: + SPIRTargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts) + : BaseSPIRTargetInfo(Triple, Opts) { + } + + void getTargetDefines(const LangOptions &Opts, + MacroBuilder &Builder) const override; + + bool hasFeature(StringRef Feature) const override { + return Feature == "spir"; + } +}; + class LLVM_LIBRARY_VISIBILITY SPIR32TargetInfo : public SPIRTargetInfo { public: SPIR32TargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts) @@ -282,6 +293,55 @@ class LLVM_LIBRARY_VISIBILITY MicrosoftX86_64_SPIR64TargetInfo Builder.defineMacro("_M_AMD64", "100"); } }; + +class LLVM_LIBRARY_VISIBILITY SPIRVTargetInfo : public BaseSPIRTargetInfo { +public: + SPIRVTargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts) + : BaseSPIRTargetInfo(Triple, Opts) { + assert(getTriple().getOS() == llvm::Triple::UnknownOS && + "SPIR-V target must use unknown OS"); + assert(getTriple().getEnvironment() == llvm::Triple::UnknownEnvironment && + "SPIR-V target must use unknown environment type"); + } + + void getTargetDefines(const LangOptions &Opts, + MacroBuilder &Builder) const override; + + bool hasFeature(StringRef Feature) const override { + return Feature == "spirv"; + } +}; + +class LLVM_LIBRARY_VISIBILITY SPIRV32TargetInfo : public SPIRVTargetInfo { +public: + SPIRV32TargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts) + : SPIRVTargetInfo(Triple, Opts) { + PointerWidth = PointerAlign = 32; + SizeType = TargetInfo::UnsignedInt; + PtrDiffType = IntPtrType = TargetInfo::SignedInt; + resetDataLayout("e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-" + "v96:128-v192:256-v256:256-v512:512-v1024:1024"); + } + + void getTargetDefines(const LangOptions &Opts, + MacroBuilder &Builder) const override; +}; + +class LLVM_LIBRARY_VISIBILITY SPIRV64TargetInfo : public SPIRVTargetInfo { +public: + SPIRV64TargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts) + : SPIRVTargetInfo(Triple, Opts) { + PointerWidth = PointerAlign = 64; + SizeType = TargetInfo::UnsignedLong; + PtrDiffType = IntPtrType = TargetInfo::SignedLong; + resetDataLayout("e-i64:64-v16:16-v24:32-v32:32-v48:64-" + "v96:128-v192:256-v256:256-v512:512-v1024:1024"); + } + + void getTargetDefines(const LangOptions &Opts, + MacroBuilder &Builder) const override; +}; + } // namespace targets } // namespace clang #endif // LLVM_CLANG_LIB_BASIC_TARGETS_SPIR_H diff --git a/clang/lib/Basic/Targets/X86.cpp b/clang/lib/Basic/Targets/X86.cpp index f40d8a6ed7312..454a7743dded3 100644 --- a/clang/lib/Basic/Targets/X86.cpp +++ b/clang/lib/Basic/Targets/X86.cpp @@ -338,6 +338,8 @@ bool X86TargetInfo::handleTargetFeatures(std::vector &Features, HasUINTR = true; } else if (Feature == "+crc32") { HasCRC32 = true; + } else if (Feature == "+x87") { + HasX87 = true; } X86SSEEnum Level = llvm::StringSwitch(Feature) @@ -379,6 +381,14 @@ bool X86TargetInfo::handleTargetFeatures(std::vector &Features, SimdDefaultAlign = hasFeature("avx512f") ? 512 : hasFeature("avx") ? 256 : 128; + + if (!HasX87) { + if (LongDoubleFormat == &llvm::APFloat::x87DoubleExtended()) + HasLongDouble = false; + if (getTriple().getArch() == llvm::Triple::x86) + HasFPReturn = false; + } + return true; } @@ -1038,6 +1048,7 @@ bool X86TargetInfo::hasFeature(StringRef Feature) const { .Case("x86", true) .Case("x86_32", getTriple().getArch() == llvm::Triple::x86) .Case("x86_64", getTriple().getArch() == llvm::Triple::x86_64) + .Case("x87", HasX87) .Case("xop", XOPLevel >= XOP) .Case("xsave", HasXSAVE) .Case("xsavec", HasXSAVEC) diff --git a/clang/lib/Basic/Targets/X86.h b/clang/lib/Basic/Targets/X86.h index 2c4ab0494fbb2..b9b2ac79815b0 100644 --- a/clang/lib/Basic/Targets/X86.h +++ b/clang/lib/Basic/Targets/X86.h @@ -144,6 +144,7 @@ class LLVM_LIBRARY_VISIBILITY X86TargetInfo : public TargetInfo { bool HasTSXLDTRK = false; bool HasUINTR = false; bool HasCRC32 = false; + bool HasX87 = false; protected: llvm::X86::CPUKind CPU = llvm::X86::CK_None; diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp index d214f03eaa7be..62cad95366e11 100644 --- a/clang/lib/CodeGen/BackendUtil.cpp +++ b/clang/lib/CodeGen/BackendUtil.cpp @@ -1204,20 +1204,18 @@ static void addSanitizers(const Triple &TargetTriple, auto ASanPass = [&](SanitizerMask Mask, bool CompileKernel) { if (LangOpts.Sanitize.has(Mask)) { - bool Recover = CodeGenOpts.SanitizeRecover.has(Mask); - bool UseAfterScope = CodeGenOpts.SanitizeAddressUseAfterScope; - bool ModuleUseAfterScope = asanUseGlobalsGC(TargetTriple, CodeGenOpts); + bool UseGlobalGC = asanUseGlobalsGC(TargetTriple, CodeGenOpts); bool UseOdrIndicator = CodeGenOpts.SanitizeAddressUseOdrIndicator; llvm::AsanDtorKind DestructorKind = CodeGenOpts.getSanitizeAddressDtor(); - llvm::AsanDetectStackUseAfterReturnMode UseAfterReturn = - CodeGenOpts.getSanitizeAddressUseAfterReturn(); + AddressSanitizerOptions Opts; + Opts.CompileKernel = CompileKernel; + Opts.Recover = CodeGenOpts.SanitizeRecover.has(Mask); + Opts.UseAfterScope = CodeGenOpts.SanitizeAddressUseAfterScope; + Opts.UseAfterReturn = CodeGenOpts.getSanitizeAddressUseAfterReturn(); MPM.addPass(RequireAnalysisPass()); MPM.addPass(ModuleAddressSanitizerPass( - CompileKernel, Recover, ModuleUseAfterScope, UseOdrIndicator, - DestructorKind)); - MPM.addPass(createModuleToFunctionPassAdaptor(AddressSanitizerPass( - {CompileKernel, Recover, UseAfterScope, UseAfterReturn}))); + Opts, UseGlobalGC, UseOdrIndicator, DestructorKind)); } }; ASanPass(SanitizerKind::Address, false); @@ -1342,9 +1340,6 @@ void EmitAssemblyHelper::RunOptimizationPipeline( get##Ext##PluginInfo().RegisterPassBuilderCallbacks(PB); #include "llvm/Support/Extension.def" - // Register the AA manager first so that our version is the one used. - FAM.registerPass([&] { return PB.buildDefaultAAPipeline(); }); - // Register the target library analysis directly and give it a customized // preset TLI. Triple TargetTriple(TheModule->getTargetTriple()); @@ -1589,7 +1584,7 @@ static void runThinLTOBackend( return; auto AddStream = [&](size_t Task) { - return std::make_unique(std::move(OS)); + return std::make_unique(std::move(OS)); }; lto::Config Conf; if (CGOpts.SaveTempsFilePrefix != "") { diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index a0de1bb01aac2..8ab73f7b9c5d5 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -3145,6 +3145,44 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, return RValue::get(Result); } + case Builtin::BI__builtin_reduce_max: { + auto GetIntrinsicID = [](QualType QT, llvm::Type *IrTy) { + if (IrTy->isIntOrIntVectorTy()) { + if (auto *VecTy = QT->getAs()) + QT = VecTy->getElementType(); + if (QT->isSignedIntegerType()) + return llvm::Intrinsic::vector_reduce_smax; + else + return llvm::Intrinsic::vector_reduce_umax; + } + return llvm::Intrinsic::vector_reduce_fmax; + }; + Value *Op0 = EmitScalarExpr(E->getArg(0)); + Value *Result = Builder.CreateUnaryIntrinsic( + GetIntrinsicID(E->getArg(0)->getType(), Op0->getType()), Op0, nullptr, + "rdx.min"); + return RValue::get(Result); + } + + case Builtin::BI__builtin_reduce_min: { + auto GetIntrinsicID = [](QualType QT, llvm::Type *IrTy) { + if (IrTy->isIntOrIntVectorTy()) { + if (auto *VecTy = QT->getAs()) + QT = VecTy->getElementType(); + if (QT->isSignedIntegerType()) + return llvm::Intrinsic::vector_reduce_smin; + else + return llvm::Intrinsic::vector_reduce_umin; + } + return llvm::Intrinsic::vector_reduce_fmin; + }; + Value *Op0 = EmitScalarExpr(E->getArg(0)); + Value *Result = Builder.CreateUnaryIntrinsic( + GetIntrinsicID(E->getArg(0)->getType(), Op0->getType()), Op0, nullptr, + "rdx.min"); + return RValue::get(Result); + } + case Builtin::BI__builtin_matrix_transpose: { const auto *MatrixTy = E->getArg(0)->getType()->getAs(); Value *MatValue = EmitScalarExpr(E->getArg(0)); diff --git a/clang/lib/CodeGen/CGDebugInfo.cpp b/clang/lib/CodeGen/CGDebugInfo.cpp index e3b0e1b86c58d..92c4ca665dd4e 100644 --- a/clang/lib/CodeGen/CGDebugInfo.cpp +++ b/clang/lib/CodeGen/CGDebugInfo.cpp @@ -247,6 +247,7 @@ PrintingPolicy CGDebugInfo::getPrintingPolicy() const { PP.SuppressInlineNamespace = false; PP.PrintCanonicalTypes = true; PP.UsePreferredNames = false; + PP.UseIntegerTypeSuffixesAlways = true; // Apply -fdebug-prefix-map. PP.Callbacks = &PrintCB; @@ -935,8 +936,28 @@ static llvm::dwarf::Tag getNextQualifier(Qualifiers &Q) { return (llvm::dwarf::Tag)0; } -llvm::DIType *CGDebugInfo::CreateQualifiedType(QualType Ty, - llvm::DIFile *Unit) { +// Strip MacroQualifiedTypeLoc and AttributedTypeLoc +// as their corresponding types will be ignored +// during code generation. Stripping them allows +// to maintain proper TypeLoc for a given type +// during code generation. +static TypeLoc StripMacroAttributed(TypeLoc TL) { + if (!TL) + return TL; + + while (true) { + if (auto MTL = TL.getAs()) + TL = MTL.getInnerLoc(); + else if (auto ATL = TL.getAs()) + TL = ATL.getModifiedLoc(); + else + break; + } + return TL; +} + +llvm::DIType *CGDebugInfo::CreateQualifiedType(QualType Ty, llvm::DIFile *Unit, + TypeLoc TL) { QualifierCollector Qc; const Type *T = Qc.strip(Ty); @@ -950,7 +971,15 @@ llvm::DIType *CGDebugInfo::CreateQualifiedType(QualType Ty, return getOrCreateType(QualType(T, 0), Unit); } - auto *FromTy = getOrCreateType(Qc.apply(CGM.getContext(), T), Unit); + QualType NextTy = Qc.apply(CGM.getContext(), T); + TypeLoc NextTL; + if (NextTy.hasQualifiers()) + NextTL = TL; + else if (TL) { + if (auto QTL = TL.getAs()) + NextTL = StripMacroAttributed(QTL.getNextTypeLoc()); + } + auto *FromTy = getOrCreateType(NextTy, Unit, NextTL); // No need to fill in the Name, Line, Size, Alignment, Offset in case of // CVR derived types. @@ -994,10 +1023,10 @@ llvm::DIType *CGDebugInfo::CreateType(const ObjCObjectPointerType *Ty, Ty->getPointeeType(), Unit); } -llvm::DIType *CGDebugInfo::CreateType(const PointerType *Ty, - llvm::DIFile *Unit) { +llvm::DIType *CGDebugInfo::CreateType(const PointerType *Ty, llvm::DIFile *Unit, + TypeLoc TL) { return CreatePointerLikeType(llvm::dwarf::DW_TAG_pointer_type, Ty, - Ty->getPointeeType(), Unit); + Ty->getPointeeType(), Unit, TL); } /// \return whether a C++ mangling exists for the type defined by TD. @@ -1138,7 +1167,8 @@ CGDebugInfo::getOrCreateRecordFwdDecl(const RecordType *Ty, llvm::DIType *CGDebugInfo::CreatePointerLikeType(llvm::dwarf::Tag Tag, const Type *Ty, QualType PointeeTy, - llvm::DIFile *Unit) { + llvm::DIFile *Unit, + TypeLoc TL) { // Bit size, align and offset of the type. // Size is always the size of a pointer. We can't use getTypeSize here // because that does not return the correct value for references. @@ -1148,13 +1178,52 @@ llvm::DIType *CGDebugInfo::CreatePointerLikeType(llvm::dwarf::Tag Tag, Optional DWARFAddressSpace = CGM.getTarget().getDWARFAddressSpace(AddressSpace); + llvm::DINodeArray Annotations = nullptr; + TypeLoc NextTL; + if (TL) { + SmallVector Annots; + NextTL = TL.getNextTypeLoc(); + if (NextTL) { + // Traverse all MacroQualifiedTypeLoc, QualifiedTypeLoc and + // AttributedTypeLoc type locations so we can collect + // BTFTypeTag attributes for this pointer. + while (true) { + if (auto MTL = NextTL.getAs()) { + NextTL = MTL.getInnerLoc(); + } else if (auto QTL = NextTL.getAs()) { + NextTL = QTL.getNextTypeLoc(); + } else if (auto ATL = NextTL.getAs()) { + if (const auto *A = ATL.getAttrAs()) { + StringRef BTFTypeTag = A->getBTFTypeTag(); + if (!BTFTypeTag.empty()) { + llvm::Metadata *Ops[2] = { + llvm::MDString::get(CGM.getLLVMContext(), + StringRef("btf_type_tag")), + llvm::MDString::get(CGM.getLLVMContext(), BTFTypeTag)}; + Annots.insert(Annots.begin(), + llvm::MDNode::get(CGM.getLLVMContext(), Ops)); + } + } + NextTL = ATL.getModifiedLoc(); + } else { + break; + } + } + } + + NextTL = StripMacroAttributed(TL.getNextTypeLoc()); + if (Annots.size() > 0) + Annotations = DBuilder.getOrCreateArray(Annots); + } + if (Tag == llvm::dwarf::DW_TAG_reference_type || Tag == llvm::dwarf::DW_TAG_rvalue_reference_type) return DBuilder.createReferenceType(Tag, getOrCreateType(PointeeTy, Unit), Size, Align, DWARFAddressSpace); else - return DBuilder.createPointerType(getOrCreateType(PointeeTy, Unit), Size, - Align, DWARFAddressSpace); + return DBuilder.createPointerType(getOrCreateType(PointeeTy, Unit, NextTL), + Size, Align, DWARFAddressSpace, + StringRef(), Annotations); } llvm::DIType *CGDebugInfo::getOrCreateStructPtrType(StringRef Name, @@ -1271,8 +1340,11 @@ llvm::DIType *CGDebugInfo::CreateType(const TemplateSpecializationType *Ty, llvm::DIType *CGDebugInfo::CreateType(const TypedefType *Ty, llvm::DIFile *Unit) { + TypeLoc TL; + if (const TypeSourceInfo *TSI = Ty->getDecl()->getTypeSourceInfo()) + TL = TSI->getTypeLoc(); llvm::DIType *Underlying = - getOrCreateType(Ty->getDecl()->getUnderlyingType(), Unit); + getOrCreateType(Ty->getDecl()->getUnderlyingType(), Unit, TL); if (Ty->getDecl()->hasAttr()) return Underlying; @@ -1346,7 +1418,7 @@ static llvm::DINode::DIFlags getRefFlags(const FunctionProtoType *Func) { } llvm::DIType *CGDebugInfo::CreateType(const FunctionType *Ty, - llvm::DIFile *Unit) { + llvm::DIFile *Unit, TypeLoc TL) { const auto *FPT = dyn_cast(Ty); if (FPT) { if (llvm::DIType *QTy = CreateQualifiedType(FPT, Unit)) @@ -1358,17 +1430,44 @@ llvm::DIType *CGDebugInfo::CreateType(const FunctionType *Ty, SmallVector EltTys; // Add the result type at least. - EltTys.push_back(getOrCreateType(Ty->getReturnType(), Unit)); + TypeLoc RetTL; + if (TL) { + if (auto FTL = TL.getAs()) + RetTL = FTL.getReturnLoc(); + } + EltTys.push_back(getOrCreateType(Ty->getReturnType(), Unit, RetTL)); llvm::DINode::DIFlags Flags = llvm::DINode::FlagZero; // Set up remainder of arguments if there is a prototype. // otherwise emit it as a variadic function. - if (!FPT) + if (!FPT) { EltTys.push_back(DBuilder.createUnspecifiedParameter()); - else { + } else { Flags = getRefFlags(FPT); - for (const QualType &ParamType : FPT->param_types()) - EltTys.push_back(getOrCreateType(ParamType, Unit)); + bool DoneWithTL = false; + if (TL) { + if (auto FTL = TL.getAs()) { + DoneWithTL = true; + unsigned Idx = 0; + unsigned FTL_NumParams = FTL.getNumParams(); + for (const QualType &ParamType : FPT->param_types()) { + TypeLoc ParamTL; + if (Idx < FTL_NumParams) { + if (ParmVarDecl *Param = FTL.getParam(Idx)) { + if (const TypeSourceInfo *TSI = Param->getTypeSourceInfo()) + ParamTL = TSI->getTypeLoc(); + } + } + EltTys.push_back(getOrCreateType(ParamType, Unit, ParamTL)); + Idx++; + } + } + } + + if (!DoneWithTL) { + for (const QualType &ParamType : FPT->param_types()) + EltTys.push_back(getOrCreateType(ParamType, Unit)); + } if (FPT->isVariadic()) EltTys.push_back(DBuilder.createUnspecifiedParameter()); } @@ -1439,11 +1538,13 @@ llvm::DIType *CGDebugInfo::createBitFieldType(const FieldDecl *BitFieldDecl, Flags, DebugType, Annotations); } -llvm::DIType *CGDebugInfo::createFieldType( - StringRef name, QualType type, SourceLocation loc, AccessSpecifier AS, - uint64_t offsetInBits, uint32_t AlignInBits, llvm::DIFile *tunit, - llvm::DIScope *scope, const RecordDecl *RD, llvm::DINodeArray Annotations) { - llvm::DIType *debugType = getOrCreateType(type, tunit); +llvm::DIType * +CGDebugInfo::createFieldType(StringRef name, QualType type, SourceLocation loc, + AccessSpecifier AS, uint64_t offsetInBits, + uint32_t AlignInBits, llvm::DIFile *tunit, + llvm::DIScope *scope, const RecordDecl *RD, + llvm::DINodeArray Annotations, TypeLoc TL) { + llvm::DIType *debugType = getOrCreateType(type, tunit, TL); // Get the location for the field. llvm::DIFile *file = getOrCreateFile(loc); @@ -1551,9 +1652,12 @@ void CGDebugInfo::CollectRecordNormalField( } else { auto Align = getDeclAlignIfRequired(field, CGM.getContext()); llvm::DINodeArray Annotations = CollectBTFDeclTagAnnotations(field); - FieldType = - createFieldType(name, type, field->getLocation(), field->getAccess(), - OffsetInBits, Align, tunit, RecordTy, RD, Annotations); + TypeLoc TL; + if (const TypeSourceInfo *TSI = field->getTypeSourceInfo()) + TL = TSI->getTypeLoc(); + FieldType = createFieldType(name, type, field->getLocation(), + field->getAccess(), OffsetInBits, Align, tunit, + RecordTy, RD, Annotations, TL); } elements.push_back(FieldType); @@ -3311,7 +3415,8 @@ void CGDebugInfo::completeUnusedClass(const CXXRecordDecl &D) { RetainedTypes.push_back(CGM.getContext().getRecordType(&D).getAsOpaquePtr()); } -llvm::DIType *CGDebugInfo::getOrCreateType(QualType Ty, llvm::DIFile *Unit) { +llvm::DIType *CGDebugInfo::getOrCreateType(QualType Ty, llvm::DIFile *Unit, + TypeLoc TL) { if (Ty.isNull()) return nullptr; @@ -3328,7 +3433,7 @@ llvm::DIType *CGDebugInfo::getOrCreateType(QualType Ty, llvm::DIFile *Unit) { if (auto *T = getTypeOrNull(Ty)) return T; - llvm::DIType *Res = CreateTypeNode(Ty, Unit); + llvm::DIType *Res = CreateTypeNode(Ty, Unit, TL); void *TyPtr = Ty.getAsOpaquePtr(); // And update the type cache. @@ -3372,10 +3477,11 @@ llvm::DIModule *CGDebugInfo::getParentModuleOrNull(const Decl *D) { return nullptr; } -llvm::DIType *CGDebugInfo::CreateTypeNode(QualType Ty, llvm::DIFile *Unit) { +llvm::DIType *CGDebugInfo::CreateTypeNode(QualType Ty, llvm::DIFile *Unit, + TypeLoc TL) { // Handle qualifiers, which recursively handles what they refer to. if (Ty.hasLocalQualifiers()) - return CreateQualifiedType(Ty, Unit); + return CreateQualifiedType(Ty, Unit, TL); // Work out details of type. switch (Ty->getTypeClass()) { @@ -3404,7 +3510,7 @@ llvm::DIType *CGDebugInfo::CreateTypeNode(QualType Ty, llvm::DIFile *Unit) { case Type::Complex: return CreateType(cast(Ty)); case Type::Pointer: - return CreateType(cast(Ty), Unit); + return CreateType(cast(Ty), Unit, TL); case Type::BlockPointer: return CreateType(cast(Ty), Unit); case Type::Typedef: @@ -3415,7 +3521,7 @@ llvm::DIType *CGDebugInfo::CreateTypeNode(QualType Ty, llvm::DIFile *Unit) { return CreateEnumType(cast(Ty)); case Type::FunctionProto: case Type::FunctionNoProto: - return CreateType(cast(Ty), Unit); + return CreateType(cast(Ty), Unit, TL); case Type::ConstantArray: case Type::VariableArray: case Type::IncompleteArray: @@ -3960,7 +4066,12 @@ llvm::DISubroutineType *CGDebugInfo::getOrCreateFunctionType(const Decl *D, getDwarfCC(CC)); } - return cast(getOrCreateType(FnType, F)); + TypeLoc TL; + if (const auto *FD = dyn_cast(D)) { + if (const TypeSourceInfo *TSI = FD->getTypeSourceInfo()) + TL = TSI->getTypeLoc(); + } + return cast(getOrCreateType(FnType, F, TL)); } QualType @@ -4362,8 +4473,12 @@ llvm::DILocalVariable *CGDebugInfo::EmitDeclare(const VarDecl *VD, uint64_t XOffset = 0; if (VD->hasAttr()) Ty = EmitTypeForVarWithBlocksAttr(VD, &XOffset).WrappedType; - else - Ty = getOrCreateType(VD->getType(), Unit); + else { + TypeLoc TL; + if (const TypeSourceInfo *TSI = VD->getTypeSourceInfo()) + TL = TSI->getTypeLoc(); + Ty = getOrCreateType(VD->getType(), Unit, TL); + } // If there is no debug info for this type then do not emit debug info // for this variable. @@ -5087,10 +5202,14 @@ void CGDebugInfo::EmitGlobalVariable(llvm::GlobalVariable *Var, } AppendAddressSpaceXDeref(AddressSpace, Expr); + TypeLoc TL; + if (const TypeSourceInfo *TSI = D->getTypeSourceInfo()) + TL = TSI->getTypeLoc(); + llvm::DINodeArray Annotations = CollectBTFDeclTagAnnotations(D); GVE = DBuilder.createGlobalVariableExpression( - DContext, DeclName, LinkageName, Unit, LineNo, getOrCreateType(T, Unit), - Var->hasLocalLinkage(), true, + DContext, DeclName, LinkageName, Unit, LineNo, + getOrCreateType(T, Unit, TL), Var->hasLocalLinkage(), true, Expr.empty() ? nullptr : DBuilder.createExpression(Expr), getOrCreateStaticDataMemberDeclarationOrNull(D), TemplateParameters, Align, Annotations); diff --git a/clang/lib/CodeGen/CGDebugInfo.h b/clang/lib/CodeGen/CGDebugInfo.h index f1ce035cf67cb..33eed1503bf2f 100644 --- a/clang/lib/CodeGen/CGDebugInfo.h +++ b/clang/lib/CodeGen/CGDebugInfo.h @@ -183,16 +183,19 @@ class CGDebugInfo { llvm::DIType *CreateType(const ComplexType *Ty); llvm::DIType *CreateType(const AutoType *Ty); llvm::DIType *CreateType(const ExtIntType *Ty); - llvm::DIType *CreateQualifiedType(QualType Ty, llvm::DIFile *Fg); + llvm::DIType *CreateQualifiedType(QualType Ty, llvm::DIFile *Fg, + TypeLoc TL = TypeLoc()); llvm::DIType *CreateQualifiedType(const FunctionProtoType *Ty, llvm::DIFile *Fg); llvm::DIType *CreateType(const TypedefType *Ty, llvm::DIFile *Fg); llvm::DIType *CreateType(const TemplateSpecializationType *Ty, llvm::DIFile *Fg); llvm::DIType *CreateType(const ObjCObjectPointerType *Ty, llvm::DIFile *F); - llvm::DIType *CreateType(const PointerType *Ty, llvm::DIFile *F); + llvm::DIType *CreateType(const PointerType *Ty, llvm::DIFile *F, + TypeLoc TL = TypeLoc()); llvm::DIType *CreateType(const BlockPointerType *Ty, llvm::DIFile *F); - llvm::DIType *CreateType(const FunctionType *Ty, llvm::DIFile *F); + llvm::DIType *CreateType(const FunctionType *Ty, llvm::DIFile *F, + TypeLoc TL = TypeLoc()); /// Get structure or union type. llvm::DIType *CreateType(const RecordType *Tyg); llvm::DIType *CreateTypeDefinition(const RecordType *Ty); @@ -247,7 +250,8 @@ class CGDebugInfo { /// \return namespace descriptor for the given namespace decl. llvm::DINamespace *getOrCreateNamespace(const NamespaceDecl *N); llvm::DIType *CreatePointerLikeType(llvm::dwarf::Tag Tag, const Type *Ty, - QualType PointeeTy, llvm::DIFile *F); + QualType PointeeTy, llvm::DIFile *F, + TypeLoc TL = TypeLoc()); llvm::DIType *getOrCreateStructPtrType(StringRef Name, llvm::DIType *&Cache); /// A helper function to create a subprogram for a single member @@ -313,7 +317,8 @@ class CGDebugInfo { uint64_t offsetInBits, uint32_t AlignInBits, llvm::DIFile *tunit, llvm::DIScope *scope, const RecordDecl *RD = nullptr, - llvm::DINodeArray Annotations = nullptr); + llvm::DINodeArray Annotations = nullptr, + TypeLoc TL = TypeLoc()); llvm::DIType *createFieldType(StringRef name, QualType type, SourceLocation loc, AccessSpecifier AS, @@ -633,7 +638,8 @@ class CGDebugInfo { Optional Source); /// Get the type from the cache or create a new type if necessary. - llvm::DIType *getOrCreateType(QualType Ty, llvm::DIFile *Fg); + llvm::DIType *getOrCreateType(QualType Ty, llvm::DIFile *Fg, + TypeLoc TL = TypeLoc()); /// Get a reference to a clang module. If \p CreateSkeletonCU is true, /// this also creates a split dwarf skeleton compile unit. @@ -648,7 +654,8 @@ class CGDebugInfo { llvm::DICompositeType *getOrCreateLimitedType(const RecordType *Ty); /// Create type metadata for a source language type. - llvm::DIType *CreateTypeNode(QualType Ty, llvm::DIFile *Fg); + llvm::DIType *CreateTypeNode(QualType Ty, llvm::DIFile *Fg, + TypeLoc TL = TypeLoc()); /// Create new member and increase Offset by FType's size. llvm::DIType *CreateMemberType(llvm::DIFile *Unit, QualType FType, diff --git a/clang/lib/CodeGen/CGDecl.cpp b/clang/lib/CodeGen/CGDecl.cpp index 040e672695741..574e24437861d 100644 --- a/clang/lib/CodeGen/CGDecl.cpp +++ b/clang/lib/CodeGen/CGDecl.cpp @@ -1479,6 +1479,7 @@ CodeGenFunction::EmitAutoVarAlloca(const VarDecl &D) { if (getLangOpts().OpenMP && OpenMPLocalAddr.isValid()) { address = OpenMPLocalAddr; + AllocaAddr = OpenMPLocalAddr; } else if (Ty->isConstantSizeType()) { // If this value is an array or struct with a statically determinable // constant initializer, there are optimizations we can do. @@ -1524,6 +1525,7 @@ CodeGenFunction::EmitAutoVarAlloca(const VarDecl &D) { // return slot, so that we can elide the copy when returning this // variable (C++0x [class.copy]p34). address = ReturnValue; + AllocaAddr = ReturnValue; if (const RecordType *RecordTy = Ty->getAs()) { const auto *RD = RecordTy->getDecl(); @@ -1535,7 +1537,8 @@ CodeGenFunction::EmitAutoVarAlloca(const VarDecl &D) { // applied. llvm::Value *Zero = Builder.getFalse(); Address NRVOFlag = - CreateTempAlloca(Zero->getType(), CharUnits::One(), "nrvo"); + CreateTempAlloca(Zero->getType(), CharUnits::One(), "nrvo", + /*ArraySize=*/nullptr, &AllocaAddr); EnsureInsertPoint(); Builder.CreateStore(Zero, NRVOFlag); @@ -1637,10 +1640,11 @@ CodeGenFunction::EmitAutoVarAlloca(const VarDecl &D) { DI->setLocation(D.getLocation()); // If NRVO, use a pointer to the return address. - if (UsePointerValue) + if (UsePointerValue) { DebugAddr = ReturnValuePointer; - - (void)DI->EmitDeclareOfAutoVariable(&D, DebugAddr.getPointer(), Builder, + AllocaAddr = ReturnValuePointer; + } + (void)DI->EmitDeclareOfAutoVariable(&D, AllocaAddr.getPointer(), Builder, UsePointerValue); } @@ -2498,6 +2502,7 @@ void CodeGenFunction::EmitParmDecl(const VarDecl &D, ParamValue Arg, } Address DeclPtr = Address::invalid(); + Address AllocaPtr = Address::invalid(); bool DoStore = false; bool IsScalar = hasScalarEvaluationKind(Ty); // If we already have a pointer to the argument, reuse the input pointer. @@ -2512,6 +2517,7 @@ void CodeGenFunction::EmitParmDecl(const VarDecl &D, ParamValue Arg, // from the default address space. auto AllocaAS = CGM.getASTAllocaAddressSpace(); auto *V = DeclPtr.getPointer(); + AllocaPtr = DeclPtr; auto SrcLangAS = getLangOpts().OpenCL ? LangAS::opencl_private : AllocaAS; auto DestLangAS = getLangOpts().OpenCL ? LangAS::opencl_private : LangAS::Default; @@ -2548,10 +2554,11 @@ void CodeGenFunction::EmitParmDecl(const VarDecl &D, ParamValue Arg, : Address::invalid(); if (getLangOpts().OpenMP && OpenMPLocalAddr.isValid()) { DeclPtr = OpenMPLocalAddr; + AllocaPtr = DeclPtr; } else { // Otherwise, create a temporary to hold the value. DeclPtr = CreateMemTemp(Ty, getContext().getDeclAlign(&D), - D.getName() + ".addr"); + D.getName() + ".addr", &AllocaPtr); } DoStore = true; } @@ -2627,7 +2634,7 @@ void CodeGenFunction::EmitParmDecl(const VarDecl &D, ParamValue Arg, if (CGDebugInfo *DI = getDebugInfo()) { if (CGM.getCodeGenOpts().hasReducedDebugInfo() && !CurFuncIsThunk) { llvm::DILocalVariable *DILocalVar = DI->EmitDeclareOfArgVariable( - &D, DeclPtr.getPointer(), ArgNo, Builder); + &D, AllocaPtr.getPointer(), ArgNo, Builder); if (const auto *Var = dyn_cast_or_null(&D)) DI->getParamDbgMappings().insert({Var, DILocalVar}); } diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp index a94267deb7e5a..0518038fea604 100644 --- a/clang/lib/CodeGen/CGExpr.cpp +++ b/clang/lib/CodeGen/CGExpr.cpp @@ -569,8 +569,7 @@ EmitMaterializeTemporaryExpr(const MaterializeTemporaryExpr *M) { // Perform derived-to-base casts and/or field accesses, to get from the // temporary object we created (and, potentially, for which we extended // the lifetime) to the subobject we're binding the reference to. - for (unsigned I = Adjustments.size(); I != 0; --I) { - SubobjectAdjustment &Adjustment = Adjustments[I-1]; + for (SubobjectAdjustment &Adjustment : llvm::reverse(Adjustments)) { switch (Adjustment.Kind) { case SubobjectAdjustment::DerivedToBaseAdjustment: Object = diff --git a/clang/lib/CodeGen/CGExprAgg.cpp b/clang/lib/CodeGen/CGExprAgg.cpp index ee6711c50ca3b..85c8980b41d86 100644 --- a/clang/lib/CodeGen/CGExprAgg.cpp +++ b/clang/lib/CodeGen/CGExprAgg.cpp @@ -127,6 +127,8 @@ class AggExprEmitter : public StmtVisitor { } void VisitConstantExpr(ConstantExpr *E) { + EnsureDest(E->getType()); + if (llvm::Value *Result = ConstantEmitter(CGF).tryEmitConstantExpr(E)) { CGF.EmitAggregateStore(Result, Dest.getAddress(), E->getType().isVolatileQualified()); diff --git a/clang/lib/CodeGen/CGGPUBuiltin.cpp b/clang/lib/CodeGen/CGGPUBuiltin.cpp index 58d816596b40e..0776ba58d2ab5 100644 --- a/clang/lib/CodeGen/CGGPUBuiltin.cpp +++ b/clang/lib/CodeGen/CGGPUBuiltin.cpp @@ -66,6 +66,41 @@ static llvm::Function *GetVprintfDeclaration(llvm::Module &M) { // // Note that by the time this function runs, E's args have already undergone the // standard C vararg promotion (short -> int, float -> double, etc.). + +namespace { +llvm::Value *packArgsIntoNVPTXFormatBuffer(CodeGenFunction *CGF, + const CallArgList &Args) { + const llvm::DataLayout &DL = CGF->CGM.getDataLayout(); + llvm::LLVMContext &Ctx = CGF->CGM.getLLVMContext(); + CGBuilderTy &Builder = CGF->Builder; + + // Construct and fill the args buffer that we'll pass to vprintf. + if (Args.size() <= 1) { + // If there are no args, pass a null pointer to vprintf. + return llvm::ConstantPointerNull::get(llvm::Type::getInt8PtrTy(Ctx)); + } else { + llvm::SmallVector ArgTypes; + for (unsigned I = 1, NumArgs = Args.size(); I < NumArgs; ++I) + ArgTypes.push_back(Args[I].getRValue(*CGF).getScalarVal()->getType()); + + // Using llvm::StructType is correct only because printf doesn't accept + // aggregates. If we had to handle aggregates here, we'd have to manually + // compute the offsets within the alloca -- we wouldn't be able to assume + // that the alignment of the llvm type was the same as the alignment of the + // clang type. + llvm::Type *AllocaTy = llvm::StructType::create(ArgTypes, "printf_args"); + llvm::Value *Alloca = CGF->CreateTempAlloca(AllocaTy); + + for (unsigned I = 1, NumArgs = Args.size(); I < NumArgs; ++I) { + llvm::Value *P = Builder.CreateStructGEP(AllocaTy, Alloca, I - 1); + llvm::Value *Arg = Args[I].getRValue(*CGF).getScalarVal(); + Builder.CreateAlignedStore(Arg, P, DL.getPrefTypeAlign(Arg->getType())); + } + return Builder.CreatePointerCast(Alloca, llvm::Type::getInt8PtrTy(Ctx)); + } +} +} // namespace + RValue CodeGenFunction::EmitNVPTXDevicePrintfCallExpr(const CallExpr *E, ReturnValueSlot ReturnValue) { @@ -73,7 +108,6 @@ CodeGenFunction::EmitNVPTXDevicePrintfCallExpr(const CallExpr *E, assert(E->getBuiltinCallee() == Builtin::BIprintf); assert(E->getNumArgs() >= 1); // printf always has at least one arg. - const llvm::DataLayout &DL = CGM.getDataLayout(); llvm::LLVMContext &Ctx = CGM.getLLVMContext(); CallArgList Args; @@ -90,32 +124,7 @@ CodeGenFunction::EmitNVPTXDevicePrintfCallExpr(const CallExpr *E, return RValue::get(llvm::ConstantInt::get(IntTy, 0)); } - // Construct and fill the args buffer that we'll pass to vprintf. - llvm::Value *BufferPtr; - if (Args.size() <= 1) { - // If there are no args, pass a null pointer to vprintf. - BufferPtr = llvm::ConstantPointerNull::get(llvm::Type::getInt8PtrTy(Ctx)); - } else { - llvm::SmallVector ArgTypes; - for (unsigned I = 1, NumArgs = Args.size(); I < NumArgs; ++I) - ArgTypes.push_back(Args[I].getRValue(*this).getScalarVal()->getType()); - - // Using llvm::StructType is correct only because printf doesn't accept - // aggregates. If we had to handle aggregates here, we'd have to manually - // compute the offsets within the alloca -- we wouldn't be able to assume - // that the alignment of the llvm type was the same as the alignment of the - // clang type. - llvm::Type *AllocaTy = llvm::StructType::create(ArgTypes, "printf_args"); - llvm::Value *Alloca = CreateTempAlloca(AllocaTy); - - for (unsigned I = 1, NumArgs = Args.size(); I < NumArgs; ++I) { - llvm::Value *P = Builder.CreateStructGEP(AllocaTy, Alloca, I - 1); - llvm::Value *Arg = Args[I].getRValue(*this).getScalarVal(); - Builder.CreateAlignedStore(Arg, P, DL.getPrefTypeAlign(Arg->getType())); - } - BufferPtr = Builder.CreatePointerCast(Alloca, llvm::Type::getInt8PtrTy(Ctx)); - } - + llvm::Value *BufferPtr = packArgsIntoNVPTXFormatBuffer(this, Args); // Invoke vprintf and return. llvm::Function* VprintfFunc = GetVprintfDeclaration(CGM.getModule()); auto FormatSpecifier = Args[0].getRValue(*this).getScalarVal(); diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp index b80f663a50a50..4d4037001ee0a 100644 --- a/clang/lib/CodeGen/CGStmtOpenMP.cpp +++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp @@ -5992,6 +5992,7 @@ static void emitOMPAtomicExpr(CodeGenFunction &CGF, OpenMPClauseKind Kind, case OMPC_adjust_args: case OMPC_append_args: case OMPC_memory_order: + case OMPC_bind: llvm_unreachable("Clause is not allowed in 'omp atomic'."); } } diff --git a/clang/lib/CodeGen/CodeGenFunction.cpp b/clang/lib/CodeGen/CodeGenFunction.cpp index 20da1d3d930b3..8e8009b50981d 100644 --- a/clang/lib/CodeGen/CodeGenFunction.cpp +++ b/clang/lib/CodeGen/CodeGenFunction.cpp @@ -1477,18 +1477,40 @@ void CodeGenFunction::GenerateCode(GlobalDecl GD, llvm::Function *Fn, // When generating code for a builtin with an inline declaration, use a // mangled name to hold the actual body, while keeping an external definition // in case the function pointer is referenced somewhere. - if (FD->isInlineBuiltinDeclaration() && Fn) { - std::string FDInlineName = (Fn->getName() + ".inline").str(); - llvm::Module *M = Fn->getParent(); - llvm::Function *Clone = M->getFunction(FDInlineName); - if (!Clone) { - Clone = llvm::Function::Create(Fn->getFunctionType(), - llvm::GlobalValue::InternalLinkage, - Fn->getAddressSpace(), FDInlineName, M); - Clone->addFnAttr(llvm::Attribute::AlwaysInline); + if (Fn) { + if (FD->isInlineBuiltinDeclaration()) { + std::string FDInlineName = (Fn->getName() + ".inline").str(); + llvm::Module *M = Fn->getParent(); + llvm::Function *Clone = M->getFunction(FDInlineName); + if (!Clone) { + Clone = llvm::Function::Create(Fn->getFunctionType(), + llvm::GlobalValue::InternalLinkage, + Fn->getAddressSpace(), FDInlineName, M); + Clone->addFnAttr(llvm::Attribute::AlwaysInline); + } + Fn->setLinkage(llvm::GlobalValue::ExternalLinkage); + Fn = Clone; + } + + // Detect the unusual situation where an inline version is shadowed by a + // non-inline version. In that case we should pick the external one + // everywhere. That's GCC behavior too. Unfortunately, I cannot find a way + // to detect that situation before we reach codegen, so do some late + // replacement. + else { + for (const FunctionDecl *PD = FD->getPreviousDecl(); PD; + PD = PD->getPreviousDecl()) { + if (LLVM_UNLIKELY(PD->isInlineBuiltinDeclaration())) { + std::string FDInlineName = (Fn->getName() + ".inline").str(); + llvm::Module *M = Fn->getParent(); + if (llvm::Function *Clone = M->getFunction(FDInlineName)) { + Clone->replaceAllUsesWith(Fn); + Clone->eraseFromParent(); + } + break; + } + } } - Fn->setLinkage(llvm::GlobalValue::ExternalLinkage); - Fn = Clone; } // Check if we should generate debug info for this function. diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp index 71910c0b01fbb..8fb8cafdcecd4 100644 --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -2360,9 +2360,9 @@ static void addLinkOptionsPostorder(CodeGenModule &CGM, Module *Mod, } // Import this module's dependencies. - for (unsigned I = Mod->Imports.size(); I > 0; --I) { - if (Visited.insert(Mod->Imports[I - 1]).second) - addLinkOptionsPostorder(CGM, Mod->Imports[I-1], Metadata, Visited); + for (Module *Import : llvm::reverse(Mod->Imports)) { + if (Visited.insert(Import).second) + addLinkOptionsPostorder(CGM, Import, Metadata, Visited); } // Add linker options to link against the libraries/frameworks @@ -2375,13 +2375,12 @@ static void addLinkOptionsPostorder(CodeGenModule &CGM, Module *Mod, if (Mod->UseExportAsModuleLinkName) return; - for (unsigned I = Mod->LinkLibraries.size(); I > 0; --I) { + for (const Module::LinkLibrary &LL : llvm::reverse(Mod->LinkLibraries)) { // Link against a framework. Frameworks are currently Darwin only, so we // don't to ask TargetCodeGenInfo for the spelling of the linker option. - if (Mod->LinkLibraries[I-1].IsFramework) { - llvm::Metadata *Args[2] = { - llvm::MDString::get(Context, "-framework"), - llvm::MDString::get(Context, Mod->LinkLibraries[I - 1].Library)}; + if (LL.IsFramework) { + llvm::Metadata *Args[2] = {llvm::MDString::get(Context, "-framework"), + llvm::MDString::get(Context, LL.Library)}; Metadata.push_back(llvm::MDNode::get(Context, Args)); continue; @@ -2391,13 +2390,12 @@ static void addLinkOptionsPostorder(CodeGenModule &CGM, Module *Mod, if (IsELF) { llvm::Metadata *Args[2] = { llvm::MDString::get(Context, "lib"), - llvm::MDString::get(Context, Mod->LinkLibraries[I - 1].Library), + llvm::MDString::get(Context, LL.Library), }; Metadata.push_back(llvm::MDNode::get(Context, Args)); } else { llvm::SmallString<24> Opt; - CGM.getTargetCodeGenInfo().getDependentLibraryOption( - Mod->LinkLibraries[I - 1].Library, Opt); + CGM.getTargetCodeGenInfo().getDependentLibraryOption(LL.Library, Opt); auto *OptString = llvm::MDString::get(Context, Opt); Metadata.push_back(llvm::MDNode::get(Context, OptString)); } @@ -5301,8 +5299,9 @@ void CodeGenModule::emitIFuncDefinition(GlobalDecl GD) { Aliases.push_back(GD); llvm::Type *DeclTy = getTypes().ConvertTypeForMem(D->getType()); + llvm::Type *ResolverTy = llvm::GlobalIFunc::getResolverFunctionType(DeclTy); llvm::Constant *Resolver = - GetOrCreateLLVMFunction(IFA->getResolver(), DeclTy, GD, + GetOrCreateLLVMFunction(IFA->getResolver(), ResolverTy, GD, /*ForVTable=*/false); llvm::GlobalIFunc *GIF = llvm::GlobalIFunc::create(DeclTy, 0, llvm::Function::ExternalLinkage, diff --git a/clang/lib/CodeGen/TargetInfo.cpp b/clang/lib/CodeGen/TargetInfo.cpp index ff5f1c164d26c..48b04aec7671f 100644 --- a/clang/lib/CodeGen/TargetInfo.cpp +++ b/clang/lib/CodeGen/TargetInfo.cpp @@ -10175,14 +10175,16 @@ void XCoreTargetCodeGenInfo::emitTargetMetadata( } } } + //===----------------------------------------------------------------------===// -// SPIR ABI Implementation +// Base ABI and target codegen info implementation common between SPIR and +// SPIR-V. //===----------------------------------------------------------------------===// namespace { -class SPIRABIInfo : public DefaultABIInfo { +class CommonSPIRABIInfo : public DefaultABIInfo { public: - SPIRABIInfo(CodeGenTypes &CGT) : DefaultABIInfo(CGT) { setCCs(); } + CommonSPIRABIInfo(CodeGenTypes &CGT) : DefaultABIInfo(CGT) { setCCs(); } ABIArgInfo classifyKernelArgumentType(QualType Ty) const; @@ -10192,7 +10194,7 @@ class SPIRABIInfo : public DefaultABIInfo { void setCCs(); }; -ABIArgInfo SPIRABIInfo::classifyKernelArgumentType(QualType Ty) const { +ABIArgInfo CommonSPIRABIInfo::classifyKernelArgumentType(QualType Ty) const { Ty = useFirstFieldIfTransparentUnion(Ty); if (getContext().getLangOpts().SYCLIsDevice && isAggregateTypeForABI(Ty)) { @@ -10203,7 +10205,7 @@ ABIArgInfo SPIRABIInfo::classifyKernelArgumentType(QualType Ty) const { return DefaultABIInfo::classifyArgumentType(Ty); } -void SPIRABIInfo::computeInfo(CGFunctionInfo &FI) const { +void CommonSPIRABIInfo::computeInfo(CGFunctionInfo &FI) const { llvm::CallingConv::ID CC = FI.getCallingConvention(); if (!getCXXABI().classifyReturnType(FI)) @@ -10220,22 +10222,23 @@ void SPIRABIInfo::computeInfo(CGFunctionInfo &FI) const { } // end anonymous namespace namespace { -class SPIRTargetCodeGenInfo : public TargetCodeGenInfo { +class CommonSPIRTargetCodeGenInfo : public TargetCodeGenInfo { public: - SPIRTargetCodeGenInfo(CodeGen::CodeGenTypes &CGT) - : TargetCodeGenInfo(std::make_unique(CGT)) {} - unsigned getOpenCLKernelCallingConv() const override; + CommonSPIRTargetCodeGenInfo(CodeGen::CodeGenTypes &CGT) + : TargetCodeGenInfo(std::make_unique(CGT)) {} LangAS getASTAllocaAddressSpace() const override { return getLangASFromTargetAS( getABIInfo().getDataLayout().getAllocaAddrSpace()); } + unsigned getOpenCLKernelCallingConv() const override; + bool shouldEmitStaticExternCAliases() const override; }; } // End anonymous namespace. -void SPIRABIInfo::setCCs() { +void CommonSPIRABIInfo::setCCs() { assert(getRuntimeCC() == llvm::CallingConv::C); RuntimeCC = llvm::CallingConv::SPIR_FUNC; } @@ -10243,17 +10246,17 @@ void SPIRABIInfo::setCCs() { namespace clang { namespace CodeGen { void computeSPIRKernelABIInfo(CodeGenModule &CGM, CGFunctionInfo &FI) { - SPIRABIInfo SPIRABI(CGM.getTypes()); + CommonSPIRABIInfo SPIRABI(CGM.getTypes()); SPIRABI.computeInfo(FI); } } } -unsigned SPIRTargetCodeGenInfo::getOpenCLKernelCallingConv() const { +unsigned CommonSPIRTargetCodeGenInfo::getOpenCLKernelCallingConv() const { return llvm::CallingConv::SPIR_KERNEL; } -bool SPIRTargetCodeGenInfo::shouldEmitStaticExternCAliases() const { +bool CommonSPIRTargetCodeGenInfo::shouldEmitStaticExternCAliases() const { return false; } @@ -11322,7 +11325,9 @@ const TargetCodeGenInfo &CodeGenModule::getTargetCodeGenInfo() { return SetCGInfo(new ARCTargetCodeGenInfo(Types)); case llvm::Triple::spir: case llvm::Triple::spir64: - return SetCGInfo(new SPIRTargetCodeGenInfo(Types)); + case llvm::Triple::spirv32: + case llvm::Triple::spirv64: + return SetCGInfo(new CommonSPIRTargetCodeGenInfo(Types)); case llvm::Triple::ve: return SetCGInfo(new VETargetCodeGenInfo(Types)); } diff --git a/clang/lib/DirectoryWatcher/windows/DirectoryWatcher-windows.cpp b/clang/lib/DirectoryWatcher/windows/DirectoryWatcher-windows.cpp index 1f040f60ff19d..110d402436ee9 100644 --- a/clang/lib/DirectoryWatcher/windows/DirectoryWatcher-windows.cpp +++ b/clang/lib/DirectoryWatcher/windows/DirectoryWatcher-windows.cpp @@ -88,10 +88,15 @@ DirectoryWatcherWindows::DirectoryWatcherWindows( // handle to the watcher and performing synchronous operations. { DWORD Size = GetFinalPathNameByHandleW(DirectoryHandle, NULL, 0, 0); - std::unique_ptr Buffer{new WCHAR[Size]}; + std::unique_ptr Buffer{new WCHAR[Size + 1]}; Size = GetFinalPathNameByHandleW(DirectoryHandle, Buffer.get(), Size, 0); Buffer[Size] = L'\0'; - llvm::sys::windows::UTF16ToUTF8(Buffer.get(), Size, Path); + WCHAR *Data = Buffer.get(); + if (Size >= 4 && ::memcmp(Data, L"\\\\?\\", 8) == 0) { + Data += 4; + Size -= 4; + } + llvm::sys::windows::UTF16ToUTF8(Data, Size, Path); } size_t EntrySize = sizeof(FILE_NOTIFY_INFORMATION) + MAX_PATH * sizeof(WCHAR); diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp index aecc30a3ab89e..253d6178ef3a3 100644 --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -961,7 +961,7 @@ void Driver::CreateOffloadingDeviceToolChains(Compilation &C, } else if (TT.getVendor() != llvm::Triple::UnknownVendor) SuggestedTriple += Twine("-" + TT.getVendorName()).str(); Diag(clang::diag::warn_drv_deprecated_arg) - << TT.str() << true << SuggestedTriple; + << TT.str() << SuggestedTriple; // Drop environment component. std::string EffectiveTriple = Twine(TT.getArchName() + "-" + TT.getVendorName() + "-" + diff --git a/clang/lib/Driver/SanitizerArgs.cpp b/clang/lib/Driver/SanitizerArgs.cpp index 6f426c6cad69c..36579060d8a26 100644 --- a/clang/lib/Driver/SanitizerArgs.cpp +++ b/clang/lib/Driver/SanitizerArgs.cpp @@ -683,8 +683,7 @@ SanitizerArgs::SanitizerArgs(const ToolChain &TC, Arg->claim(); if (LegacySanitizeCoverage != 0) { D.Diag(diag::warn_drv_deprecated_arg) - << Arg->getAsString(Args) << true - << "-fsanitize-coverage=trace-pc-guard"; + << Arg->getAsString(Args) << "-fsanitize-coverage=trace-pc-guard"; } continue; } @@ -719,11 +718,11 @@ SanitizerArgs::SanitizerArgs(const ToolChain &TC, // enabled. if (CoverageFeatures & CoverageTraceBB) D.Diag(clang::diag::warn_drv_deprecated_arg) - << "-fsanitize-coverage=trace-bb" << true + << "-fsanitize-coverage=trace-bb" << "-fsanitize-coverage=trace-pc-guard"; if (CoverageFeatures & Coverage8bitCounters) D.Diag(clang::diag::warn_drv_deprecated_arg) - << "-fsanitize-coverage=8bit-counters" << true + << "-fsanitize-coverage=8bit-counters" << "-fsanitize-coverage=trace-pc-guard"; int InsertionPointTypes = CoverageFunc | CoverageBB | CoverageEdge; @@ -733,7 +732,7 @@ SanitizerArgs::SanitizerArgs(const ToolChain &TC, if ((CoverageFeatures & InsertionPointTypes) && !(CoverageFeatures & InstrumentationTypes)) { D.Diag(clang::diag::warn_drv_deprecated_arg) - << "-fsanitize-coverage=[func|bb|edge]" << true + << "-fsanitize-coverage=[func|bb|edge]" << "-fsanitize-coverage=[func|bb|edge],[trace-pc-guard|trace-pc]"; } diff --git a/clang/lib/Driver/ToolChains/AVR.cpp b/clang/lib/Driver/ToolChains/AVR.cpp index 896afcc3474a4..a66cae8b4d6b2 100644 --- a/clang/lib/Driver/ToolChains/AVR.cpp +++ b/clang/lib/Driver/ToolChains/AVR.cpp @@ -28,9 +28,9 @@ using namespace llvm::opt; namespace { -const struct { +constexpr struct { StringRef Name; - std::string SubPath; + StringRef SubPath; StringRef Family; unsigned DataAddr; } MCUInfo[] = { diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index ef71ed176091e..d7d6504fbc4aa 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -624,7 +624,8 @@ getFramePointerKind(const ArgList &Args, const llvm::Triple &Triple) { A && A->getOption().matches(options::OPT_fno_omit_frame_pointer); bool OmitLeafFP = Args.hasFlag(options::OPT_momit_leaf_frame_pointer, options::OPT_mno_omit_leaf_frame_pointer, - Triple.isAArch64() || Triple.isPS4CPU()); + Triple.isAArch64() || Triple.isPS4CPU() || + Triple.isVE()); if (NoOmitFP || mustUseNonLeafFramePointerForTarget(Triple) || (!OmitFP && useFramePointerForTargetByDefault(Args, Triple))) { if (OmitLeafFP) @@ -3475,7 +3476,7 @@ static void RenderARCMigrateToolOptions(const Driver &D, const ArgList &Args, Args.AddLastArg(CmdArgs, options::OPT_objcmt_returns_innerpointer_property); Args.AddLastArg(CmdArgs, options::OPT_objcmt_ns_nonatomic_iosonly); Args.AddLastArg(CmdArgs, options::OPT_objcmt_migrate_designated_init); - Args.AddLastArg(CmdArgs, options::OPT_objcmt_whitelist_dir_path); + Args.AddLastArg(CmdArgs, options::OPT_objcmt_allowlist_dir_path); } } @@ -4848,7 +4849,7 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, TC.addClangWarningOptions(CmdArgs); // FIXME: Subclass ToolChain for SPIR and move this to addClangWarningOptions. - if (Triple.isSPIR()) + if (Triple.isSPIR() || Triple.isSPIRV()) CmdArgs.push_back("-Wspir-compat"); // Select the appropriate action. @@ -6855,14 +6856,12 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, options::OPT_fno_assume_sane_operator_new)) CmdArgs.push_back("-fno-assume-sane-operator-new"); - // -frelaxed-template-template-args is deprecated and on by default. - if (Arg *A = - Args.getLastArg(options::OPT_frelaxed_template_template_args, - options::OPT_fno_relaxed_template_template_args)) { - D.Diag(diag::warn_drv_deprecated_arg) << A->getAsString(Args) << false; - if (A->getOption().matches(options::OPT_fno_relaxed_template_template_args)) - CmdArgs.push_back("-fno-relaxed-template-template-args"); - } + // -frelaxed-template-template-args is off by default, as it is a severe + // breaking change until a corresponding change to template partial ordering + // is provided. + if (Args.hasFlag(options::OPT_frelaxed_template_template_args, + options::OPT_fno_relaxed_template_template_args, false)) + CmdArgs.push_back("-frelaxed-template-template-args"); // -fsized-deallocation is off by default, as it is an ABI-breaking change for // most platforms. diff --git a/clang/lib/Driver/ToolChains/CommonArgs.cpp b/clang/lib/Driver/ToolChains/CommonArgs.cpp index ce207663cba59..c2a7d7677703c 100644 --- a/clang/lib/Driver/ToolChains/CommonArgs.cpp +++ b/clang/lib/Driver/ToolChains/CommonArgs.cpp @@ -1898,12 +1898,12 @@ void tools::checkAMDGPUCodeObjectVersion(const Driver &D, // Emit warnings for legacy options even if they are overridden. if (Args.hasArg(options::OPT_mno_code_object_v3_legacy)) - D.Diag(diag::warn_drv_deprecated_arg) - << "-mno-code-object-v3" << true << "-mcode-object-version=2"; + D.Diag(diag::warn_drv_deprecated_arg) << "-mno-code-object-v3" + << "-mcode-object-version=2"; if (Args.hasArg(options::OPT_mcode_object_v3_legacy)) - D.Diag(diag::warn_drv_deprecated_arg) - << "-mcode-object-v3" << true << "-mcode-object-version=3"; + D.Diag(diag::warn_drv_deprecated_arg) << "-mcode-object-v3" + << "-mcode-object-version=3"; if (auto *CodeObjArg = getAMDGPUCodeObjectArgument(D, Args)) { if (CodeObjArg->getOption().getID() == diff --git a/clang/lib/Format/Format.cpp b/clang/lib/Format/Format.cpp index 16f39062423e9..c499831241ab5 100644 --- a/clang/lib/Format/Format.cpp +++ b/clang/lib/Format/Format.cpp @@ -1386,7 +1386,7 @@ FormatStyle getChromiumStyle(FormatStyle::LanguageKind Language) { ChromiumStyle.ContinuationIndentWidth = 8; ChromiumStyle.IndentWidth = 4; // See styleguide for import groups: - // https://chromium.googlesource.com/chromium/src/+/master/styleguide/java/java.md#Import-Order + // https://chromium.googlesource.com/chromium/src/+/refs/heads/main/styleguide/java/java.md#Import-Order ChromiumStyle.JavaImportGroups = { "android", "androidx", diff --git a/clang/lib/Format/FormatToken.h b/clang/lib/Format/FormatToken.h index bbce27f799bc7..06d51dd95f50f 100644 --- a/clang/lib/Format/FormatToken.h +++ b/clang/lib/Format/FormatToken.h @@ -1060,7 +1060,7 @@ struct AdditionalKeywords { bool IsJavaScriptIdentifier(const FormatToken &Tok, bool AcceptIdentifierName = true) const { // Based on the list of JavaScript & TypeScript keywords here: - // https://github.com/microsoft/TypeScript/blob/master/src/compiler/scanner.ts#L74 + // https://github.com/microsoft/TypeScript/blob/main/src/compiler/scanner.ts#L74 switch (Tok.Tok.getKind()) { case tok::kw_break: case tok::kw_case: diff --git a/clang/lib/Format/TokenAnnotator.cpp b/clang/lib/Format/TokenAnnotator.cpp index e6be3a918034b..5686a6e7587c9 100644 --- a/clang/lib/Format/TokenAnnotator.cpp +++ b/clang/lib/Format/TokenAnnotator.cpp @@ -4061,7 +4061,7 @@ bool TokenAnnotator::canBreakBefore(const AnnotatedLine &Line, Keywords.kw_interface, Keywords.kw_type, Keywords.kw_var, Keywords.kw_let, tok::kw_const)) // See grammar for 'declare' statements at: - // https://github.com/Microsoft/TypeScript/blob/master/doc/spec.md#A.10 + // https://github.com/Microsoft/TypeScript/blob/main/doc/spec-ARCHIVED.md#A.10 return false; if (Left.isOneOf(Keywords.kw_module, tok::kw_namespace) && Right.isOneOf(tok::identifier, tok::string_literal)) diff --git a/clang/lib/Frontend/InitPreprocessor.cpp b/clang/lib/Frontend/InitPreprocessor.cpp index d3c3c68d63fba..9b1a81f620048 100644 --- a/clang/lib/Frontend/InitPreprocessor.cpp +++ b/clang/lib/Frontend/InitPreprocessor.cpp @@ -614,7 +614,7 @@ static void InitializeCPlusPlusFeatureTestMacros(const LangOptions &LangOpts, //Builder.defineMacro("__cpp_consteval", "201811L"); Builder.defineMacro("__cpp_constexpr_dynamic_alloc", "201907L"); Builder.defineMacro("__cpp_constinit", "201907L"); - //Builder.defineMacro("__cpp_coroutines", "201902L"); + Builder.defineMacro("__cpp_impl_coroutine", "201902L"); Builder.defineMacro("__cpp_designated_initializers", "201707L"); Builder.defineMacro("__cpp_impl_three_way_comparison", "201907L"); //Builder.defineMacro("__cpp_modules", "201907L"); @@ -1208,7 +1208,7 @@ static void InitializePredefinedMacros(const TargetInfo &TI, if (LangOpts.OpenCL) { InitializeOpenCLFeatureTestMacros(TI, LangOpts, Builder); - if (TI.getTriple().isSPIR()) + if (TI.getTriple().isSPIR() || TI.getTriple().isSPIRV()) Builder.defineMacro("__IMAGE_SUPPORT__"); } diff --git a/clang/lib/Frontend/PrintPreprocessedOutput.cpp b/clang/lib/Frontend/PrintPreprocessedOutput.cpp index 360fa1448b128..45df86ef91cdb 100644 --- a/clang/lib/Frontend/PrintPreprocessedOutput.cpp +++ b/clang/lib/Frontend/PrintPreprocessedOutput.cpp @@ -188,19 +188,17 @@ class PrintPPOutputPPCallbacks : public PPCallbacks { /// @return Whether column adjustments are necessary. bool MoveToLine(const Token &Tok, bool RequireStartOfLine) { PresumedLoc PLoc = SM.getPresumedLoc(Tok.getLocation()); - if (PLoc.isInvalid()) - return false; + unsigned TargetLine = PLoc.isValid() ? PLoc.getLine() : CurLine; bool IsFirstInFile = Tok.isAtStartOfLine() && PLoc.getLine() == 1; - return MoveToLine(PLoc.getLine(), RequireStartOfLine) || IsFirstInFile; + return MoveToLine(TargetLine, RequireStartOfLine) || IsFirstInFile; } /// Move to the line of the provided source location. Returns true if a new /// line was inserted. bool MoveToLine(SourceLocation Loc, bool RequireStartOfLine) { PresumedLoc PLoc = SM.getPresumedLoc(Loc); - if (PLoc.isInvalid()) - return false; - return MoveToLine(PLoc.getLine(), RequireStartOfLine); + unsigned TargetLine = PLoc.isValid() ? PLoc.getLine() : CurLine; + return MoveToLine(TargetLine, RequireStartOfLine); } bool MoveToLine(unsigned LineNo, bool RequireStartOfLine); @@ -700,7 +698,7 @@ void PrintPPOutputPPCallbacks::HandleWhitespaceBeforeTok(const Token &Tok, // - The whitespace is necessary to keep the tokens apart and there is not // already a newline between them if (RequireSpace || (!MinimizeWhitespace && Tok.hasLeadingSpace()) || - ((EmittedTokensOnThisLine || EmittedTokensOnThisLine) && + ((EmittedTokensOnThisLine || EmittedDirectiveOnThisLine) && AvoidConcat(PrevPrevTok, PrevTok, Tok))) OS << ' '; } diff --git a/clang/lib/Headers/opencl-c-base.h b/clang/lib/Headers/opencl-c-base.h index a36e657612e5e..9c81ddb5e2a73 100644 --- a/clang/lib/Headers/opencl-c-base.h +++ b/clang/lib/Headers/opencl-c-base.h @@ -12,8 +12,8 @@ // Define extension macros #if (defined(__OPENCL_CPP_VERSION__) || __OPENCL_C_VERSION__ >= 200) -// For SPIR all extensions are supported. -#if defined(__SPIR__) +// For SPIR and SPIR-V all extensions are supported. +#if defined(__SPIR__) || defined(__SPIRV__) #define cl_khr_subgroup_extended_types 1 #define cl_khr_subgroup_non_uniform_vote 1 #define cl_khr_subgroup_ballot 1 @@ -45,7 +45,7 @@ #define __opencl_c_ext_fp32_global_atomic_min_max 1 #define __opencl_c_ext_fp32_local_atomic_min_max 1 -#endif // defined(__SPIR__) +#endif // defined(__SPIR__) || defined(__SPIRV__) #endif // (defined(__OPENCL_CPP_VERSION__) || __OPENCL_C_VERSION__ >= 200) // Define feature macros for OpenCL C 2.0 @@ -65,8 +65,8 @@ // Define header-only feature macros for OpenCL C 3.0. #if (__OPENCL_CPP_VERSION__ == 202100 || __OPENCL_C_VERSION__ == 300) -// For the SPIR target all features are supported. -#if defined(__SPIR__) +// For the SPIR and SPIR-V target all features are supported. +#if defined(__SPIR__) || defined(__SPIRV__) #define __opencl_c_atomic_scope_all_devices 1 #endif // defined(__SPIR__) #endif // (__OPENCL_CPP_VERSION__ == 202100 || __OPENCL_C_VERSION__ == 300) diff --git a/clang/lib/Headers/opencl-c.h b/clang/lib/Headers/opencl-c.h index 562e0551ffdc6..32af848a94c4f 100644 --- a/clang/lib/Headers/opencl-c.h +++ b/clang/lib/Headers/opencl-c.h @@ -23,11 +23,14 @@ #endif //cl_khr_3d_image_writes #endif //__OPENCL_C_VERSION__ < CL_VERSION_2_0 - -#if (defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_1_2)) && defined(__SPIR__) +#if (defined(__OPENCL_CPP_VERSION__) || \ + (__OPENCL_C_VERSION__ >= CL_VERSION_1_2)) && \ + (defined(__SPIR__) || defined(__SPIRV__)) #pragma OPENCL EXTENSION cl_intel_planar_yuv : begin #pragma OPENCL EXTENSION cl_intel_planar_yuv : end -#endif // (defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_1_2)) && defined(__SPIR__) +#endif // (defined(__OPENCL_CPP_VERSION__) || + // (__OPENCL_C_VERSION__ >= CL_VERSION_1_2)) && + // (defined(__SPIR__) || defined(__SPIRV__)) #define __ovld __attribute__((overloadable)) #define __conv __attribute__((convergent)) @@ -13333,7 +13336,7 @@ uint __ovld atomic_fetch_or(volatile __local atomic_uint *object, uint operand); int __ovld atomic_fetch_xor(volatile __global atomic_int *object, int operand); int __ovld atomic_fetch_xor(volatile __local atomic_int *object, int operand); uint __ovld atomic_fetch_xor(volatile __global atomic_uint *object, uint operand); -uint __ovld atomic_fetch_xor(volatile __local atomic_uint *object, uint operand);i +uint __ovld atomic_fetch_xor(volatile __local atomic_uint *object, uint operand); int __ovld atomic_fetch_and(volatile __global atomic_int *object, int operand); int __ovld atomic_fetch_and(volatile __local atomic_int *object, int operand); uint __ovld atomic_fetch_and(volatile __global atomic_uint *object, uint operand); diff --git a/clang/lib/Lex/DependencyDirectivesSourceMinimizer.cpp b/clang/lib/Lex/DependencyDirectivesSourceMinimizer.cpp index f5cbd5e51b9b9..f597c56837fb4 100644 --- a/clang/lib/Lex/DependencyDirectivesSourceMinimizer.cpp +++ b/clang/lib/Lex/DependencyDirectivesSourceMinimizer.cpp @@ -734,6 +734,27 @@ bool Minimizer::lexPragma(const char *&First, const char *const End) { append("#pragma once\n"); return false; } + if (FoundId.Name == "push_macro") { + // #pragma push_macro + makeToken(pp_pragma_push_macro); + append("#pragma push_macro"); + printDirectiveBody(First, End); + return false; + } + if (FoundId.Name == "pop_macro") { + // #pragma pop_macro + makeToken(pp_pragma_pop_macro); + append("#pragma pop_macro"); + printDirectiveBody(First, End); + return false; + } + if (FoundId.Name == "include_alias") { + // #pragma include_alias + makeToken(pp_pragma_include_alias); + append("#pragma include_alias"); + printDirectiveBody(First, End); + return false; + } if (FoundId.Name != "clang") { skipLine(First, End); @@ -835,6 +856,10 @@ bool Minimizer::lexPPLine(const char *&First, const char *const End) { // Figure out the token. IdInfo Id = lexIdentifier(First, End); First = Id.Last; + + if (Id.Name == "pragma") + return lexPragma(First, End); + auto Kind = llvm::StringSwitch(Id.Name) .Case("include", pp_include) .Case("__include_macros", pp___include_macros) @@ -850,7 +875,6 @@ bool Minimizer::lexPPLine(const char *&First, const char *const End) { .Case("elifndef", pp_elifndef) .Case("else", pp_else) .Case("endif", pp_endif) - .Case("pragma", pp_pragma_import) .Default(pp_none); if (Kind == pp_none) { skipDirective(Id.Name, First, End); @@ -863,9 +887,6 @@ bool Minimizer::lexPPLine(const char *&First, const char *const End) { if (Kind == pp_define) return lexDefine(First, End); - if (Kind == pp_pragma_import) - return lexPragma(First, End); - // Everything else. return lexDefault(Kind, Id.Name, First, End); } diff --git a/clang/lib/Lex/ModuleMap.cpp b/clang/lib/Lex/ModuleMap.cpp index 6324cdc5a6b0a..9fa170410da3c 100644 --- a/clang/lib/Lex/ModuleMap.cpp +++ b/clang/lib/Lex/ModuleMap.cpp @@ -167,8 +167,8 @@ static void appendSubframeworkPaths(Module *Mod, return; // Add Frameworks/Name.framework for each subframework. - for (unsigned I = Paths.size() - 1; I != 0; --I) - llvm::sys::path::append(Path, "Frameworks", Paths[I-1] + ".framework"); + for (StringRef Framework : llvm::drop_begin(llvm::reverse(Paths))) + llvm::sys::path::append(Path, "Frameworks", Framework + ".framework"); } Optional ModuleMap::findHeader( @@ -618,18 +618,18 @@ ModuleMap::findOrCreateModuleForHeaderInUmbrellaDir(const FileEntry *File) { // the actual header is located. bool Explicit = UmbrellaModule->InferExplicitSubmodules; - for (unsigned I = SkippedDirs.size(); I != 0; --I) { + for (const DirectoryEntry *SkippedDir : llvm::reverse(SkippedDirs)) { // Find or create the module that corresponds to this directory name. SmallString<32> NameBuf; StringRef Name = sanitizeFilenameAsIdentifier( - llvm::sys::path::stem(SkippedDirs[I-1]->getName()), NameBuf); + llvm::sys::path::stem(SkippedDir->getName()), NameBuf); Result = findOrCreateModule(Name, Result, /*IsFramework=*/false, Explicit).first; InferredModuleAllowedBy[Result] = UmbrellaModuleMap; Result->IsInferred = true; // Associate the module and the directory. - UmbrellaDirs[SkippedDirs[I-1]] = Result; + UmbrellaDirs[SkippedDir] = Result; // If inferred submodules export everything they import, add a // wildcard to the set of exports. @@ -745,12 +745,11 @@ ModuleMap::isHeaderUnavailableInModule(const FileEntry *Header, UmbrellaModule = UmbrellaModule->Parent; if (UmbrellaModule->InferSubmodules) { - for (unsigned I = SkippedDirs.size(); I != 0; --I) { + for (const DirectoryEntry *SkippedDir : llvm::reverse(SkippedDirs)) { // Find or create the module that corresponds to this directory name. SmallString<32> NameBuf; StringRef Name = sanitizeFilenameAsIdentifier( - llvm::sys::path::stem(SkippedDirs[I-1]->getName()), - NameBuf); + llvm::sys::path::stem(SkippedDir->getName()), NameBuf); Found = lookupModuleQualified(Name, Found); if (!Found) return false; diff --git a/clang/lib/Parse/ParseOpenMP.cpp b/clang/lib/Parse/ParseOpenMP.cpp index d0114b8fdae73..5444969f110f2 100644 --- a/clang/lib/Parse/ParseOpenMP.cpp +++ b/clang/lib/Parse/ParseOpenMP.cpp @@ -3056,7 +3056,7 @@ OMPClause *Parser::ParseOpenMPUsesAllocatorClause(OpenMPDirectiveKind DKind) { /// clause: /// if-clause | final-clause | num_threads-clause | safelen-clause | /// default-clause | private-clause | firstprivate-clause | shared-clause -/// | linear-clause | aligned-clause | collapse-clause | +/// | linear-clause | aligned-clause | collapse-clause | bind-clause | /// lastprivate-clause | reduction-clause | proc_bind-clause | /// schedule-clause | copyin-clause | copyprivate-clause | untied-clause | /// mergeable-clause | flush-clause | read-clause | write-clause | @@ -3146,6 +3146,7 @@ OMPClause *Parser::ParseOpenMPClause(OpenMPDirectiveKind DKind, case OMPC_proc_bind: case OMPC_atomic_default_mem_order: case OMPC_order: + case OMPC_bind: // OpenMP [2.14.3.1, Restrictions] // Only a single default clause may be specified on a parallel, task or // teams directive. @@ -3154,6 +3155,8 @@ OMPClause *Parser::ParseOpenMPClause(OpenMPDirectiveKind DKind, // OpenMP [5.0, Requires directive, Restrictions] // At most one atomic_default_mem_order clause can appear // on the directive + // OpenMP 5.1, 2.11.7 loop Construct, Restrictions. + // At most one bind clause can appear on a loop directive. if (!FirstClause && CKind != OMPC_order) { Diag(Tok, diag::err_omp_more_one_clause) << getOpenMPDirectiveName(DKind) << getOpenMPClauseName(CKind) << 0; @@ -3500,6 +3503,9 @@ OMPClause *Parser::ParseOpenMPInteropClause(OpenMPClauseKind Kind, /// proc_bind-clause: /// 'proc_bind' '(' 'master' | 'close' | 'spread' ')' /// +/// bind-clause: +/// 'bind' '(' 'teams' | 'parallel' | 'thread' ')' +/// /// update-clause: /// 'update' '(' 'in' | 'out' | 'inout' | 'mutexinoutset' ')' /// diff --git a/clang/lib/Sema/CodeCompleteConsumer.cpp b/clang/lib/Sema/CodeCompleteConsumer.cpp index d2dfc96a6021f..6c47cedfccf3f 100644 --- a/clang/lib/Sema/CodeCompleteConsumer.cpp +++ b/clang/lib/Sema/CodeCompleteConsumer.cpp @@ -387,14 +387,13 @@ StringRef CodeCompletionTUInfo::getParentName(const DeclContext *DC) { SmallString<128> S; llvm::raw_svector_ostream OS(S); bool First = true; - for (unsigned I = Contexts.size(); I != 0; --I) { + for (const DeclContext *CurDC : llvm::reverse(Contexts)) { if (First) First = false; else { OS << "::"; } - const DeclContext *CurDC = Contexts[I - 1]; if (const auto *CatImpl = dyn_cast(CurDC)) CurDC = CatImpl->getCategoryDecl(); diff --git a/clang/lib/Sema/Sema.cpp b/clang/lib/Sema/Sema.cpp index 11925eab039e7..ca09400f98330 100644 --- a/clang/lib/Sema/Sema.cpp +++ b/clang/lib/Sema/Sema.cpp @@ -1925,9 +1925,6 @@ Sema::SemaDiagnosticBuilder Sema::Diag(SourceLocation Loc, unsigned DiagID, } void Sema::checkTypeSupport(QualType Ty, SourceLocation Loc, ValueDecl *D) { - if (!LangOpts.SYCLIsDevice && !(LangOpts.OpenMP && LangOpts.OpenMPIsDevice)) - return; - if (isUnevaluatedContext() || Ty.isNull()) return; @@ -1950,7 +1947,7 @@ void Sema::checkTypeSupport(QualType Ty, SourceLocation Loc, ValueDecl *D) { FunctionDecl *FD = isa(C) ? cast(C) : dyn_cast_or_null(D); - auto CheckType = [&](QualType Ty) { + auto CheckDeviceType = [&](QualType Ty) { if (Ty->isDependentType()) return; @@ -1962,7 +1959,7 @@ void Sema::checkTypeSupport(QualType Ty, SourceLocation Loc, ValueDecl *D) { else PD << "expression"; targetDiag(Loc, PD, FD) - << false /*show bit size*/ << 0 /*bitsize*/ + << false /*show bit size*/ << 0 /*bitsize*/ << false /*return*/ << Ty << Context.getTargetInfo().getTriple().str(); } return; @@ -1997,6 +1994,49 @@ void Sema::checkTypeSupport(QualType Ty, SourceLocation Loc, ValueDecl *D) { if (targetDiag(Loc, PD, FD) << true /*show bit size*/ << static_cast(Context.getTypeSize(Ty)) << Ty + << false /*return*/ << Context.getTargetInfo().getTriple().str()) { + if (D) + D->setInvalidDecl(); + } + if (D) + targetDiag(D->getLocation(), diag::note_defined_here, FD) << D; + } + }; + + auto CheckType = [&](QualType Ty, bool IsRetTy = false) { + if (LangOpts.SYCLIsDevice || (LangOpts.OpenMP && LangOpts.OpenMPIsDevice)) + CheckDeviceType(Ty); + + QualType UnqualTy = Ty.getCanonicalType().getUnqualifiedType(); + const TargetInfo &TI = Context.getTargetInfo(); + if (!TI.hasLongDoubleType() && UnqualTy == Context.LongDoubleTy) { + PartialDiagnostic PD = PDiag(diag::err_target_unsupported_type); + if (D) + PD << D; + else + PD << "expression"; + + if (Diag(Loc, PD, FD) + << false /*show bit size*/ << 0 << Ty << false /*return*/ + << Context.getTargetInfo().getTriple().str()) { + if (D) + D->setInvalidDecl(); + } + if (D) + targetDiag(D->getLocation(), diag::note_defined_here, FD) << D; + } + + bool IsDouble = UnqualTy == Context.DoubleTy; + bool IsFloat = UnqualTy == Context.FloatTy; + if (IsRetTy && !TI.hasFPReturn() && (IsDouble || IsFloat)) { + PartialDiagnostic PD = PDiag(diag::err_target_unsupported_type); + if (D) + PD << D; + else + PD << "expression"; + + if (Diag(Loc, PD, FD) + << false /*show bit size*/ << 0 << Ty << true /*return*/ << Context.getTargetInfo().getTriple().str()) { if (D) D->setInvalidDecl(); @@ -2007,14 +2047,13 @@ void Sema::checkTypeSupport(QualType Ty, SourceLocation Loc, ValueDecl *D) { }; CheckType(Ty); - if (const auto *FPTy = dyn_cast(Ty)) { for (const auto &ParamTy : FPTy->param_types()) CheckType(ParamTy); - CheckType(FPTy->getReturnType()); + CheckType(FPTy->getReturnType(), /*IsRetTy=*/true); } if (const auto *FNPTy = dyn_cast(Ty)) - CheckType(FNPTy->getReturnType()); + CheckType(FNPTy->getReturnType(), /*IsRetTy=*/true); } /// Looks through the macro-expansion chain for the given diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp index 5bed658c6d585..08334da44c068 100644 --- a/clang/lib/Sema/SemaChecking.cpp +++ b/clang/lib/Sema/SemaChecking.cpp @@ -408,6 +408,64 @@ static bool SemaBuiltinCallWithStaticChain(Sema &S, CallExpr *BuiltinCall) { namespace { +class ScanfDiagnosticFormatHandler + : public analyze_format_string::FormatStringHandler { + // Accepts the argument index (relative to the first destination index) of the + // argument whose size we want. + using ComputeSizeFunction = + llvm::function_ref(unsigned)>; + + // Accepts the argument index (relative to the first destination index), the + // destination size, and the source size). + using DiagnoseFunction = + llvm::function_ref; + + ComputeSizeFunction ComputeSizeArgument; + DiagnoseFunction Diagnose; + +public: + ScanfDiagnosticFormatHandler(ComputeSizeFunction ComputeSizeArgument, + DiagnoseFunction Diagnose) + : ComputeSizeArgument(ComputeSizeArgument), Diagnose(Diagnose) {} + + bool HandleScanfSpecifier(const analyze_scanf::ScanfSpecifier &FS, + const char *StartSpecifier, + unsigned specifierLen) override { + if (!FS.consumesDataArgument()) + return true; + + unsigned NulByte = 0; + switch ((FS.getConversionSpecifier().getKind())) { + default: + return true; + case analyze_format_string::ConversionSpecifier::sArg: + case analyze_format_string::ConversionSpecifier::ScanListArg: + NulByte = 1; + break; + case analyze_format_string::ConversionSpecifier::cArg: + break; + } + + auto OptionalFW = FS.getFieldWidth(); + if (OptionalFW.getHowSpecified() != + analyze_format_string::OptionalAmount::HowSpecified::Constant) + return true; + + unsigned SourceSize = OptionalFW.getConstantAmount() + NulByte; + + auto DestSizeAPS = ComputeSizeArgument(FS.getArgIndex()); + if (!DestSizeAPS) + return true; + + unsigned DestSize = DestSizeAPS->getZExtValue(); + + if (DestSize < SourceSize) + Diagnose(FS.getArgIndex(), DestSize, SourceSize); + + return true; + } +}; + class EstimateSizeFormatHandler : public analyze_format_string::FormatStringHandler { size_t Size; @@ -615,9 +673,12 @@ void Sema::checkFortifiedBuiltinMemoryFunction(FunctionDecl *FD, // (potentially) more strict checking mode. Otherwise, conservatively assume // type 0. int BOSType = 0; - if (const auto *POS = - FD->getParamDecl(Index)->getAttr()) - BOSType = POS->getType(); + // This check can fail for variadic functions. + if (Index < FD->getNumParams()) { + if (const auto *POS = + FD->getParamDecl(Index)->getAttr()) + BOSType = POS->getType(); + } const Expr *ObjArg = TheCall->getArg(Index); uint64_t Result; @@ -642,6 +703,20 @@ void Sema::checkFortifiedBuiltinMemoryFunction(FunctionDecl *FD, unsigned DiagID = 0; bool IsChkVariant = false; + auto GetFunctionName = [&]() { + StringRef FunctionName = getASTContext().BuiltinInfo.getName(BuiltinID); + // Skim off the details of whichever builtin was called to produce a better + // diagnostic, as it's unlikely that the user wrote the __builtin + // explicitly. + if (IsChkVariant) { + FunctionName = FunctionName.drop_front(std::strlen("__builtin___")); + FunctionName = FunctionName.drop_back(std::strlen("_chk")); + } else if (FunctionName.startswith("__builtin_")) { + FunctionName = FunctionName.drop_front(std::strlen("__builtin_")); + } + return FunctionName; + }; + switch (BuiltinID) { default: return; @@ -661,6 +736,61 @@ void Sema::checkFortifiedBuiltinMemoryFunction(FunctionDecl *FD, break; } + case Builtin::BIscanf: + case Builtin::BIfscanf: + case Builtin::BIsscanf: { + unsigned FormatIndex = 1; + unsigned DataIndex = 2; + if (BuiltinID == Builtin::BIscanf) { + FormatIndex = 0; + DataIndex = 1; + } + + const auto *FormatExpr = + TheCall->getArg(FormatIndex)->IgnoreParenImpCasts(); + + const auto *Format = dyn_cast(FormatExpr); + if (!Format) + return; + + if (!Format->isAscii() && !Format->isUTF8()) + return; + + auto Diagnose = [&](unsigned ArgIndex, unsigned DestSize, + unsigned SourceSize) { + DiagID = diag::warn_fortify_scanf_overflow; + unsigned Index = ArgIndex + DataIndex; + StringRef FunctionName = GetFunctionName(); + DiagRuntimeBehavior(TheCall->getArg(Index)->getBeginLoc(), TheCall, + PDiag(DiagID) << FunctionName << (Index + 1) + << DestSize << SourceSize); + }; + + StringRef FormatStrRef = Format->getString(); + auto ShiftedComputeSizeArgument = [&](unsigned Index) { + return ComputeSizeArgument(Index + DataIndex); + }; + ScanfDiagnosticFormatHandler H(ShiftedComputeSizeArgument, Diagnose); + const char *FormatBytes = FormatStrRef.data(); + const ConstantArrayType *T = + Context.getAsConstantArrayType(Format->getType()); + assert(T && "String literal not of constant array type!"); + size_t TypeSize = T->getSize().getZExtValue(); + + // In case there's a null byte somewhere. + size_t StrLen = + std::min(std::max(TypeSize, size_t(1)) - 1, FormatStrRef.find(0)); + + analyze_format_string::ParseScanfString(H, FormatBytes, + FormatBytes + StrLen, getLangOpts(), + Context.getTargetInfo()); + + // Unlike the other cases, in this one we have already issued the diagnostic + // here, so no need to continue (because unlike the other cases, here the + // diagnostic refers to the argument number). + return; + } + case Builtin::BIsprintf: case Builtin::BI__builtin___sprintf_chk: { size_t FormatIndex = BuiltinID == Builtin::BIsprintf ? 1 : 3; @@ -771,15 +901,7 @@ void Sema::checkFortifiedBuiltinMemoryFunction(FunctionDecl *FD, SourceSize.getValue().ule(DestinationSize.getValue())) return; - StringRef FunctionName = getASTContext().BuiltinInfo.getName(BuiltinID); - // Skim off the details of whichever builtin was called to produce a better - // diagnostic, as it's unlikely that the user wrote the __builtin explicitly. - if (IsChkVariant) { - FunctionName = FunctionName.drop_front(std::strlen("__builtin___")); - FunctionName = FunctionName.drop_back(std::strlen("_chk")); - } else if (FunctionName.startswith("__builtin_")) { - FunctionName = FunctionName.drop_front(std::strlen("__builtin_")); - } + StringRef FunctionName = GetFunctionName(); SmallString<16> DestinationStr; SmallString<16> SourceStr; @@ -2005,6 +2127,11 @@ Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl, unsigned BuiltinID, if (SemaBuiltinElementwiseMath(TheCall)) return ExprError(); break; + case Builtin::BI__builtin_reduce_max: + case Builtin::BI__builtin_reduce_min: + if (SemaBuiltinReduceMath(TheCall)) + return ExprError(); + break; case Builtin::BI__builtin_matrix_transpose: return SemaBuiltinMatrixTranspose(TheCall, TheCallResult); @@ -3402,6 +3529,18 @@ bool Sema::CheckPPCBuiltinFunctionCall(const TargetInfo &TI, unsigned BuiltinID, case PPC::BI__builtin_tabortdci: return SemaBuiltinConstantArgRange(TheCall, 0, 0, 31) || SemaBuiltinConstantArgRange(TheCall, 2, 0, 31); + // According to GCC 'Basic PowerPC Built-in Functions Available on ISA 2.05', + // __builtin_(un)pack_longdouble are available only if long double uses IBM + // extended double representation. + case PPC::BI__builtin_unpack_longdouble: + if (SemaBuiltinConstantArgRange(TheCall, 1, 0, 1)) + return true; + LLVM_FALLTHROUGH; + case PPC::BI__builtin_pack_longdouble: + if (&TI.getLongDoubleFormat() != &llvm::APFloat::PPCDoubleDouble()) + return Diag(TheCall->getBeginLoc(), diag::err_ppc_builtin_requires_abi) + << "ibmlongdouble"; + return false; case PPC::BI__builtin_altivec_dst: case PPC::BI__builtin_altivec_dstt: case PPC::BI__builtin_altivec_dstst: @@ -16735,6 +16874,26 @@ bool Sema::SemaBuiltinElementwiseMath(CallExpr *TheCall) { return false; } +bool Sema::SemaBuiltinReduceMath(CallExpr *TheCall) { + if (checkArgCount(*this, TheCall, 1)) + return true; + + ExprResult A = UsualUnaryConversions(TheCall->getArg(0)); + if (A.isInvalid()) + return true; + + TheCall->setArg(0, A.get()); + const VectorType *TyA = A.get()->getType()->getAs(); + if (!TyA) { + SourceLocation ArgLoc = TheCall->getArg(0)->getBeginLoc(); + return Diag(ArgLoc, diag::err_builtin_invalid_arg_type) + << 1 << /* vector ty*/ 4 << A.get()->getType(); + } + + TheCall->setType(TyA->getElementType()); + return false; +} + ExprResult Sema::SemaBuiltinMatrixTranspose(CallExpr *TheCall, ExprResult CallResult) { if (checkArgCount(*this, TheCall, 1)) diff --git a/clang/lib/Sema/SemaCoroutine.cpp b/clang/lib/Sema/SemaCoroutine.cpp index 3d1899a57c72a..323e425585cee 100644 --- a/clang/lib/Sema/SemaCoroutine.cpp +++ b/clang/lib/Sema/SemaCoroutine.cpp @@ -53,15 +53,10 @@ static QualType lookupPromiseType(Sema &S, const FunctionDecl *FD, SourceLocation KwLoc) { const FunctionProtoType *FnType = FD->getType()->castAs(); const SourceLocation FuncLoc = FD->getLocation(); - // FIXME: Cache std::coroutine_traits once we've found it. - NamespaceDecl *StdExp = S.lookupStdExperimentalNamespace(); - if (!StdExp) { - S.Diag(KwLoc, diag::err_implied_coroutine_type_not_found) - << "std::experimental::coroutine_traits"; - return QualType(); - } - ClassTemplateDecl *CoroTraits = S.lookupCoroutineTraits(KwLoc, FuncLoc); + NamespaceDecl *CoroNamespace = nullptr; + ClassTemplateDecl *CoroTraits = + S.lookupCoroutineTraits(KwLoc, FuncLoc, CoroNamespace); if (!CoroTraits) { return QualType(); } @@ -122,7 +117,7 @@ static QualType lookupPromiseType(Sema &S, const FunctionDecl *FD, QualType PromiseType = S.Context.getTypeDeclType(Promise); auto buildElaboratedType = [&]() { - auto *NNS = NestedNameSpecifier::Create(S.Context, nullptr, StdExp); + auto *NNS = NestedNameSpecifier::Create(S.Context, nullptr, CoroNamespace); NNS = NestedNameSpecifier::Create(S.Context, NNS, false, CoroTrait.getTypePtr()); return S.Context.getElaboratedType(ETK_None, NNS, PromiseType); @@ -141,20 +136,20 @@ static QualType lookupPromiseType(Sema &S, const FunctionDecl *FD, return PromiseType; } -/// Look up the std::experimental::coroutine_handle. +/// Look up the std::coroutine_handle. static QualType lookupCoroutineHandleType(Sema &S, QualType PromiseType, SourceLocation Loc) { if (PromiseType.isNull()) return QualType(); - NamespaceDecl *StdExp = S.lookupStdExperimentalNamespace(); - assert(StdExp && "Should already be diagnosed"); + NamespaceDecl *CoroNamespace = S.getCachedCoroNamespace(); + assert(CoroNamespace && "Should already be diagnosed"); LookupResult Result(S, &S.PP.getIdentifierTable().get("coroutine_handle"), Loc, Sema::LookupOrdinaryName); - if (!S.LookupQualifiedName(Result, StdExp)) { + if (!S.LookupQualifiedName(Result, CoroNamespace)) { S.Diag(Loc, diag::err_implied_coroutine_type_not_found) - << "std::experimental::coroutine_handle"; + << "std::coroutine_handle"; return QualType(); } @@ -1000,7 +995,7 @@ static Expr *buildStdNoThrowDeclRef(Sema &S, SourceLocation Loc) { LookupResult Result(S, &S.PP.getIdentifierTable().get("nothrow"), Loc, Sema::LookupOrdinaryName); if (!S.LookupQualifiedName(Result, Std)) { - // FIXME: should have been included already. + // FIXME: should have been included already. // If we require it to include then this diagnostic is no longer // needed. S.Diag(Loc, diag::err_implicit_coroutine_std_nothrow_type_not_found); @@ -1663,25 +1658,47 @@ StmtResult Sema::BuildCoroutineBodyStmt(CoroutineBodyStmt::CtorArgs Args) { } ClassTemplateDecl *Sema::lookupCoroutineTraits(SourceLocation KwLoc, - SourceLocation FuncLoc) { + SourceLocation FuncLoc, + NamespaceDecl *&Namespace) { if (!StdCoroutineTraitsCache) { - if (auto StdExp = lookupStdExperimentalNamespace()) { - LookupResult Result(*this, - &PP.getIdentifierTable().get("coroutine_traits"), - FuncLoc, LookupOrdinaryName); - if (!LookupQualifiedName(Result, StdExp)) { + NamespaceDecl *CoroNamespace = getStdNamespace(); + LookupResult Result(*this, &PP.getIdentifierTable().get("coroutine_traits"), + FuncLoc, LookupOrdinaryName); + + if (!CoroNamespace || !LookupQualifiedName(Result, CoroNamespace)) { + /// Look up in namespace std::experimental, for compatibility. + /// TODO: Remove this extra lookup when is + /// removed. + CoroNamespace = lookupStdExperimentalNamespace(); + if (!CoroNamespace || !LookupQualifiedName(Result, CoroNamespace)) { Diag(KwLoc, diag::err_implied_coroutine_type_not_found) - << "std::experimental::coroutine_traits"; + << "std::coroutine_traits"; return nullptr; } - if (!(StdCoroutineTraitsCache = - Result.getAsSingle())) { - Result.suppressDiagnostics(); - NamedDecl *Found = *Result.begin(); - Diag(Found->getLocation(), diag::err_malformed_std_coroutine_traits); + /// TODO: Add a warning about not including + /// once we update libcxx. + } else { + /// When we found coroutine_traits in std namespace. Make sure there is no + /// misleading definition in std::experimental namespace. + NamespaceDecl *ExpNamespace = lookupStdExperimentalNamespace(); + LookupResult ExpResult(*this, + &PP.getIdentifierTable().get("coroutine_traits"), + FuncLoc, LookupOrdinaryName); + if (ExpNamespace && LookupQualifiedName(ExpResult, ExpNamespace)) { + Diag(KwLoc, + diag::err_mixed_use_std_and_experimental_namespace_for_coroutine); return nullptr; } } + + if (!(StdCoroutineTraitsCache = Result.getAsSingle())) { + Result.suppressDiagnostics(); + NamedDecl *Found = *Result.begin(); + Diag(Found->getLocation(), diag::err_malformed_std_coroutine_traits); + return nullptr; + } + CoroTraitsNamespaceCache = CoroNamespace; } + Namespace = CoroTraitsNamespaceCache; return StdCoroutineTraitsCache; } diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp index a21cb3bdc1747..e988223432dee 100644 --- a/clang/lib/Sema/SemaDecl.cpp +++ b/clang/lib/Sema/SemaDecl.cpp @@ -1944,6 +1944,12 @@ void Sema::DiagnoseUnusedButSetDecl(const VarDecl *VD) { } } + // Don't warn about __block Objective-C pointer variables, as they might + // be assigned in the block but not used elsewhere for the purpose of lifetime + // extension. + if (VD->hasAttr() && Ty->isObjCObjectPointerType()) + return; + auto iter = RefsMinusAssignments.find(VD); if (iter == RefsMinusAssignments.end()) return; @@ -9632,8 +9638,6 @@ Sema::ActOnFunctionDeclarator(Scope *S, Declarator &D, DeclContext *DC, } } - checkTypeSupport(NewFD->getType(), D.getBeginLoc(), NewFD); - if (!getLangOpts().CPlusPlus) { // Perform semantic checking on the function declaration. if (!NewFD->isInvalidDecl() && NewFD->isMain()) @@ -14930,6 +14934,9 @@ Decl *Sema::ActOnFinishFunctionBody(Decl *dcl, Stmt *Body, DeclsToCheckForDeferredDiags.insert(FD); } + if (FD && !FD->isDeleted()) + checkTypeSupport(FD->getType(), FD->getLocation(), FD); + return dcl; } @@ -17886,7 +17893,8 @@ EnumConstantDecl *Sema::CheckEnumConstant(EnumDecl *Enum, Val = DefaultLvalueConversion(Val).get(); if (Val) { - if (Enum->isDependentType() || Val->isTypeDependent()) + if (Enum->isDependentType() || Val->isTypeDependent() || + Val->containsErrors()) EltTy = Context.DependentTy; else { // FIXME: We don't allow folding in C++11 mode for an enum with a fixed diff --git a/clang/lib/Sema/SemaDeclCXX.cpp b/clang/lib/Sema/SemaDeclCXX.cpp index 05d146cb24944..562d10313ec93 100644 --- a/clang/lib/Sema/SemaDeclCXX.cpp +++ b/clang/lib/Sema/SemaDeclCXX.cpp @@ -984,9 +984,9 @@ static std::string printTemplateArgs(const PrintingPolicy &PrintingPolicy, for (auto &Arg : Args.arguments()) { if (!First) OS << ", "; - Arg.getArgument().print( - PrintingPolicy, OS, - TemplateParameterList::shouldIncludeTypeForArgument(Params, I)); + Arg.getArgument().print(PrintingPolicy, OS, + TemplateParameterList::shouldIncludeTypeForArgument( + PrintingPolicy, Params, I)); First = false; I++; } diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp index 2b5335904b969..57ea77ad8ecb1 100644 --- a/clang/lib/Sema/SemaExpr.cpp +++ b/clang/lib/Sema/SemaExpr.cpp @@ -3229,9 +3229,8 @@ ExprResult Sema::BuildDeclarationNameExpr(const CXXScopeSpec &SS, return ULE; } -static void -diagnoseUncapturableValueReference(Sema &S, SourceLocation loc, - ValueDecl *var, DeclContext *DC); +static void diagnoseUncapturableValueReference(Sema &S, SourceLocation loc, + ValueDecl *var); /// Complete semantic analysis for a reference to the given declaration. ExprResult Sema::BuildDeclarationNameExpr( @@ -3395,7 +3394,7 @@ ExprResult Sema::BuildDeclarationNameExpr( if (BD->getDeclContext() != CurContext) { auto *DD = dyn_cast_or_null(BD->getDecomposedDecl()); if (DD && DD->hasLocalStorage()) - diagnoseUncapturableValueReference(*this, Loc, BD, CurContext); + diagnoseUncapturableValueReference(*this, Loc, BD); } break; } @@ -17483,9 +17482,8 @@ void Sema::MarkCaptureUsedInEnclosingContext(VarDecl *Capture, MarkVarDeclODRUsed(Capture, Loc, *this, &CapturingScopeIndex); } -static void -diagnoseUncapturableValueReference(Sema &S, SourceLocation loc, - ValueDecl *var, DeclContext *DC) { +static void diagnoseUncapturableValueReference(Sema &S, SourceLocation loc, + ValueDecl *var) { DeclContext *VarDC = var->getDeclContext(); // If the parameter still belongs to the translation unit, then @@ -17564,7 +17562,7 @@ static DeclContext *getParentOfCapturingContextOrNull(DeclContext *DC, VarDecl * return getLambdaAwareParentOfDeclContext(DC); else if (Var->hasLocalStorage()) { if (Diagnose) - diagnoseUncapturableValueReference(S, Loc, Var, DC); + diagnoseUncapturableValueReference(S, Loc, Var); } return nullptr; } @@ -18038,7 +18036,7 @@ bool Sema::tryCaptureVariable( Diag(LSI->Lambda->getBeginLoc(), diag::note_lambda_decl); buildLambdaCaptureFixit(*this, LSI, Var); } else - diagnoseUncapturableValueReference(*this, ExprLoc, Var, DC); + diagnoseUncapturableValueReference(*this, ExprLoc, Var); } return true; } diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp index eac2d1d72db12..0505cedca0087 100644 --- a/clang/lib/Sema/SemaOpenMP.cpp +++ b/clang/lib/Sema/SemaOpenMP.cpp @@ -4689,6 +4689,7 @@ static bool checkNestingOfRegions(Sema &SemaRef, const DSAStackTy *Stack, OpenMPDirectiveKind CurrentRegion, const DeclarationNameInfo &CurrentName, OpenMPDirectiveKind CancelRegion, + OpenMPBindClauseKind BindKind, SourceLocation StartLoc) { if (Stack->getCurScope()) { OpenMPDirectiveKind ParentRegion = Stack->getParentDirective(); @@ -4899,6 +4900,16 @@ static bool checkNestingOfRegions(Sema &SemaRef, const DSAStackTy *Stack, CurrentRegion != OMPD_loop; Recommend = ShouldBeInParallelRegion; } + if (!NestingProhibited && CurrentRegion == OMPD_loop) { + // OpenMP [5.1, 2.11.7, loop Construct, Restrictions] + // If the bind clause is present on the loop construct and binding is + // teams then the corresponding loop region must be strictly nested inside + // a teams region. + NestingProhibited = BindKind == OMPC_BIND_teams && + ParentRegion != OMPD_teams && + ParentRegion != OMPD_target_teams; + Recommend = ShouldBeInTeamsRegion; + } if (!NestingProhibited && isOpenMPNestingDistributeDirective(CurrentRegion)) { // OpenMP 4.5 [2.17 Nesting of Regions] @@ -5772,10 +5783,14 @@ StmtResult Sema::ActOnOpenMPExecutableDirective( OpenMPDirectiveKind CancelRegion, ArrayRef Clauses, Stmt *AStmt, SourceLocation StartLoc, SourceLocation EndLoc) { StmtResult Res = StmtError(); + OpenMPBindClauseKind BindKind = OMPC_BIND_unknown; + if (const OMPBindClause *BC = + OMPExecutableDirective::getSingleClause(Clauses)) + BindKind = BC->getBindKind(); // First check CancelRegion which is then used in checkNestingOfRegions. if (checkCancelRegion(*this, Kind, CancelRegion, StartLoc) || checkNestingOfRegions(*this, DSAStack, Kind, DirName, CancelRegion, - StartLoc)) + BindKind, StartLoc)) return StmtError(); llvm::SmallVector ClausesWithImplicit; @@ -6354,6 +6369,7 @@ StmtResult Sema::ActOnOpenMPExecutableDirective( case OMPC_exclusive: case OMPC_uses_allocators: case OMPC_affinity: + case OMPC_bind: continue; case OMPC_allocator: case OMPC_flush: @@ -13462,6 +13478,7 @@ OMPClause *Sema::ActOnOpenMPSingleExprClause(OpenMPClauseKind Kind, Expr *Expr, case OMPC_uses_allocators: case OMPC_affinity: case OMPC_when: + case OMPC_bind: default: llvm_unreachable("Clause is not allowed."); } @@ -14292,6 +14309,7 @@ static OpenMPDirectiveKind getOpenMPCaptureRegionForClause( case OMPC_exclusive: case OMPC_uses_allocators: case OMPC_affinity: + case OMPC_bind: default: llvm_unreachable("Unexpected OpenMP clause."); } @@ -14683,6 +14701,10 @@ OMPClause *Sema::ActOnOpenMPSimpleClause( Res = ActOnOpenMPUpdateClause(static_cast(Argument), ArgumentLoc, StartLoc, LParenLoc, EndLoc); break; + case OMPC_bind: + Res = ActOnOpenMPBindClause(static_cast(Argument), + ArgumentLoc, StartLoc, LParenLoc, EndLoc); + break; case OMPC_if: case OMPC_final: case OMPC_num_threads: @@ -15049,6 +15071,7 @@ OMPClause *Sema::ActOnOpenMPSingleExprWithArgClause( case OMPC_uses_allocators: case OMPC_affinity: case OMPC_when: + case OMPC_bind: default: llvm_unreachable("Clause is not allowed."); } @@ -15842,6 +15865,7 @@ OMPClause *Sema::ActOnOpenMPVarListClause( case OMPC_detach: case OMPC_uses_allocators: case OMPC_when: + case OMPC_bind: default: llvm_unreachable("Clause is not allowed."); } @@ -21523,3 +21547,20 @@ OMPClause *Sema::ActOnOpenMPAffinityClause( return OMPAffinityClause::Create(Context, StartLoc, LParenLoc, ColonLoc, EndLoc, Modifier, Vars); } + +OMPClause *Sema::ActOnOpenMPBindClause(OpenMPBindClauseKind Kind, + SourceLocation KindLoc, + SourceLocation StartLoc, + SourceLocation LParenLoc, + SourceLocation EndLoc) { + if (Kind == OMPC_BIND_unknown) { + Diag(KindLoc, diag::err_omp_unexpected_clause_value) + << getListOfPossibleValues(OMPC_bind, /*First=*/0, + /*Last=*/unsigned(OMPC_BIND_unknown)) + << getOpenMPClauseName(OMPC_bind); + return nullptr; + } + + return OMPBindClause::Create(Context, Kind, KindLoc, StartLoc, LParenLoc, + EndLoc); +} diff --git a/clang/lib/Sema/SemaTemplate.cpp b/clang/lib/Sema/SemaTemplate.cpp index 068b74f0f8ba0..6ff87eb61bf79 100644 --- a/clang/lib/Sema/SemaTemplate.cpp +++ b/clang/lib/Sema/SemaTemplate.cpp @@ -7407,7 +7407,9 @@ bool Sema::CheckTemplateTemplateArgument(TemplateTemplateParmDecl *Param, // C++1z [temp.arg.template]p3: (DR 150) // A template-argument matches a template template-parameter P when P // is at least as specialized as the template-argument A. - // FIXME: RelaxedTemplateTemplateArgs is deprecated, should be always on. + // FIXME: We should enable RelaxedTemplateTemplateArgs by default as it is a + // defect report resolution from C++17 and shouldn't be introduced by + // concepts. if (getLangOpts().RelaxedTemplateTemplateArgs) { // Quick check for the common case: // If P contains a parameter pack, then A [...] matches P if each of A's @@ -10917,9 +10919,9 @@ Sema::getTemplateArgumentBindingsText(const TemplateParameterList *Params, } Out << " = "; - Args[I].print( - getPrintingPolicy(), Out, - TemplateParameterList::shouldIncludeTypeForArgument(Params, I)); + Args[I].print(getPrintingPolicy(), Out, + TemplateParameterList::shouldIncludeTypeForArgument( + getPrintingPolicy(), Params, I)); } Out << ']'; diff --git a/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp b/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp index 725bfd76117c6..a5557a0b1c5ee 100644 --- a/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp +++ b/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp @@ -2137,9 +2137,7 @@ TemplateDeclInstantiator::VisitFunctionTemplateDecl(FunctionTemplateDecl *D) { Decl *TemplateDeclInstantiator::VisitCXXRecordDecl(CXXRecordDecl *D) { CXXRecordDecl *PrevDecl = nullptr; - if (D->isInjectedClassName()) - PrevDecl = cast(Owner); - else if (CXXRecordDecl *PatternPrev = getPreviousDeclForInstantiation(D)) { + if (CXXRecordDecl *PatternPrev = getPreviousDeclForInstantiation(D)) { NamedDecl *Prev = SemaRef.FindInstantiatedDecl(D->getLocation(), PatternPrev, TemplateArgs); @@ -2148,6 +2146,7 @@ Decl *TemplateDeclInstantiator::VisitCXXRecordDecl(CXXRecordDecl *D) { } CXXRecordDecl *Record = nullptr; + bool IsInjectedClassName = D->isInjectedClassName(); if (D->isLambda()) Record = CXXRecordDecl::CreateLambda( SemaRef.Context, Owner, D->getLambdaTypeInfo(), D->getLocation(), @@ -2156,7 +2155,11 @@ Decl *TemplateDeclInstantiator::VisitCXXRecordDecl(CXXRecordDecl *D) { else Record = CXXRecordDecl::Create(SemaRef.Context, D->getTagKind(), Owner, D->getBeginLoc(), D->getLocation(), - D->getIdentifier(), PrevDecl); + D->getIdentifier(), PrevDecl, + /*DelayTypeCreation=*/IsInjectedClassName); + // Link the type of the injected-class-name to that of the outer class. + if (IsInjectedClassName) + (void)SemaRef.Context.getTypeDeclType(Record, cast(Owner)); // Substitute the nested name specifier, if any. if (SubstQualifier(D, Record)) @@ -2171,7 +2174,7 @@ Decl *TemplateDeclInstantiator::VisitCXXRecordDecl(CXXRecordDecl *D) { // specifier. Remove once this area of the code gets sorted out. if (D->getAccess() != AS_none) Record->setAccess(D->getAccess()); - if (!D->isInjectedClassName()) + if (!IsInjectedClassName) Record->setInstantiationOfMemberClass(D, TSK_ImplicitInstantiation); // If the original function was part of a friend declaration, @@ -2224,6 +2227,9 @@ Decl *TemplateDeclInstantiator::VisitCXXRecordDecl(CXXRecordDecl *D) { SemaRef.DiagnoseUnusedNestedTypedefs(Record); + if (IsInjectedClassName) + assert(Record->isInjectedClassName() && "Broken injected-class-name"); + return Record; } diff --git a/clang/lib/Sema/SemaType.cpp b/clang/lib/Sema/SemaType.cpp index e3f2da6e4fc39..e85100136f138 100644 --- a/clang/lib/Sema/SemaType.cpp +++ b/clang/lib/Sema/SemaType.cpp @@ -5922,6 +5922,9 @@ namespace { void VisitQualifiedTypeLoc(QualifiedTypeLoc TL) { Visit(TL.getUnqualifiedLoc()); } + // Allow to fill pointee's type locations, e.g., + // int __attr * __attr * __attr *p; + void VisitPointerTypeLoc(PointerTypeLoc TL) { Visit(TL.getNextTypeLoc()); } void VisitTypedefTypeLoc(TypedefTypeLoc TL) { TL.setNameLoc(DS.getTypeSpecTypeLoc()); } @@ -6522,6 +6525,34 @@ QualType Sema::BuildAddressSpaceAttr(QualType &T, Expr *AddrSpace, return BuildAddressSpaceAttr(T, ASIdx, AddrSpace, AttrLoc); } +static void HandleBTFTypeTagAttribute(QualType &Type, const ParsedAttr &Attr, + TypeProcessingState &State) { + Sema &S = State.getSema(); + + // Check the number of attribute arguments. + if (Attr.getNumArgs() != 1) { + S.Diag(Attr.getLoc(), diag::err_attribute_wrong_number_arguments) + << Attr << 1; + Attr.setInvalid(); + return; + } + + // Ensure the argument is a string. + auto *StrLiteral = dyn_cast(Attr.getArgAsExpr(0)); + if (!StrLiteral) { + S.Diag(Attr.getLoc(), diag::err_attribute_argument_type) + << Attr << AANT_ArgumentString; + Attr.setInvalid(); + return; + } + + ASTContext &Ctx = S.Context; + StringRef BTFTypeTag = StrLiteral->getString(); + Type = State.getAttributedType( + ::new (Ctx) BTFTypeTagAttr(Ctx, Attr, BTFTypeTag), Type, Type); + return; +} + /// HandleAddressSpaceTypeAttribute - Process an address_space attribute on the /// specified type. The attribute contains 1 argument, the id of the address /// space for the type. @@ -8214,6 +8245,11 @@ static void processTypeAttrs(TypeProcessingState &state, QualType &type, case ParsedAttr::IgnoredAttribute: break; + case ParsedAttr::AT_BTFTypeTag: + HandleBTFTypeTagAttribute(type, attr, state); + attr.setUsedAsTypeAttr(); + break; + case ParsedAttr::AT_MayAlias: // FIXME: This attribute needs to actually be handled, but if we ignore // it it breaks large amounts of Linux software. diff --git a/clang/lib/Sema/TreeTransform.h b/clang/lib/Sema/TreeTransform.h index dc3b881b01419..8f31fab934668 100644 --- a/clang/lib/Sema/TreeTransform.h +++ b/clang/lib/Sema/TreeTransform.h @@ -2260,6 +2260,19 @@ class TreeTransform { EndLoc); } + /// Build a new OpenMP 'bind' clause. + /// + /// By default, performs semantic analysis to build the new OpenMP clause. + /// Subclasses may override this routine to provide different behavior. + OMPClause *RebuildOMPBindClause(OpenMPBindClauseKind Kind, + SourceLocation KindLoc, + SourceLocation StartLoc, + SourceLocation LParenLoc, + SourceLocation EndLoc) { + return getSema().ActOnOpenMPBindClause(Kind, KindLoc, StartLoc, LParenLoc, + EndLoc); + } + /// Rebuild the operand to an Objective-C \@synchronized statement. /// /// By default, performs semantic analysis to build the new statement. @@ -3870,8 +3883,10 @@ ExprResult TreeTransform::TransformInitializer(Expr *Init, if (auto *FE = dyn_cast(Init)) Init = FE->getSubExpr(); - if (auto *AIL = dyn_cast(Init)) - Init = AIL->getCommonExpr(); + if (auto *AIL = dyn_cast(Init)) { + OpaqueValueExpr *OVE = AIL->getCommonExpr(); + Init = OVE->getSourceExpr(); + } if (MaterializeTemporaryExpr *MTE = dyn_cast(Init)) Init = MTE->getSubExpr(); @@ -10275,6 +10290,13 @@ OMPClause *TreeTransform::TransformOMPOrderClause(OMPOrderClause *C) { C->getEndLoc()); } +template +OMPClause *TreeTransform::TransformOMPBindClause(OMPBindClause *C) { + return getDerived().RebuildOMPBindClause( + C->getBindKind(), C->getBindKindLoc(), C->getBeginLoc(), + C->getLParenLoc(), C->getEndLoc()); +} + //===----------------------------------------------------------------------===// // Expression transformation //===----------------------------------------------------------------------===// diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp index 4058ccf30ebde..39d931259d4ef 100644 --- a/clang/lib/Serialization/ASTReader.cpp +++ b/clang/lib/Serialization/ASTReader.cpp @@ -8158,13 +8158,16 @@ namespace serialization { if (Reader.DeserializationListener) Reader.DeserializationListener->SelectorRead(Data.ID, Sel); - InstanceMethods.append(Data.Instance.begin(), Data.Instance.end()); - FactoryMethods.append(Data.Factory.begin(), Data.Factory.end()); + // Append methods in the reverse order, so that later we can process them + // in the order they appear in the source code by iterating through + // the vector in the reverse order. + InstanceMethods.append(Data.Instance.rbegin(), Data.Instance.rend()); + FactoryMethods.append(Data.Factory.rbegin(), Data.Factory.rend()); InstanceBits = Data.InstanceBits; FactoryBits = Data.FactoryBits; InstanceHasMoreThanOneDecl = Data.InstanceHasMoreThanOneDecl; FactoryHasMoreThanOneDecl = Data.FactoryHasMoreThanOneDecl; - return true; + return false; } /// Retrieve the instance methods found by this visitor. @@ -8193,9 +8196,8 @@ namespace serialization { /// Add the given set of methods to the method list. static void addMethodsToPool(Sema &S, ArrayRef Methods, ObjCMethodList &List) { - for (unsigned I = 0, N = Methods.size(); I != N; ++I) { - S.addMethodToGlobalList(&List, Methods[I]); - } + for (auto I = Methods.rbegin(), E = Methods.rend(); I != E; ++I) + S.addMethodToGlobalList(&List, *I); } void ASTReader::ReadMethodPool(Selector Sel) { @@ -11976,6 +11978,9 @@ OMPClause *OMPClauseReader::readClause() { case llvm::omp::OMPC_filter: C = new (Context) OMPFilterClause(); break; + case llvm::omp::OMPC_bind: + C = OMPBindClause::CreateEmpty(Context); + break; #define OMP_CLAUSE_NO_CLASS(Enum, Str) \ case llvm::omp::Enum: \ break; @@ -12960,6 +12965,12 @@ void OMPClauseReader::VisitOMPFilterClause(OMPFilterClause *C) { C->setLParenLoc(Record.readSourceLocation()); } +void OMPClauseReader::VisitOMPBindClause(OMPBindClause *C) { + C->setBindKind(Record.readEnum()); + C->setLParenLoc(Record.readSourceLocation()); + C->setBindKindLoc(Record.readSourceLocation()); +} + OMPTraitInfo *ASTRecordReader::readOMPTraitInfo() { OMPTraitInfo &TI = getContext().getNewOMPTraitInfo(); TI.Sets.resize(readUInt32()); diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp index 7c500f30e271e..4845dc70469fd 100644 --- a/clang/lib/Serialization/ASTWriter.cpp +++ b/clang/lib/Serialization/ASTWriter.cpp @@ -3045,11 +3045,11 @@ class ASTMethodPoolTrait { unsigned DataLen = 4 + 2 + 2; // 2 bytes for each of the method counts for (const ObjCMethodList *Method = &Methods.Instance; Method; Method = Method->getNext()) - if (Method->getMethod()) + if (ShouldWriteMethodListNode(Method)) DataLen += 4; for (const ObjCMethodList *Method = &Methods.Factory; Method; Method = Method->getNext()) - if (Method->getMethod()) + if (ShouldWriteMethodListNode(Method)) DataLen += 4; return emitULEBKeyDataLength(KeyLen, DataLen, Out); } @@ -3080,13 +3080,13 @@ class ASTMethodPoolTrait { unsigned NumInstanceMethods = 0; for (const ObjCMethodList *Method = &Methods.Instance; Method; Method = Method->getNext()) - if (Method->getMethod()) + if (ShouldWriteMethodListNode(Method)) ++NumInstanceMethods; unsigned NumFactoryMethods = 0; for (const ObjCMethodList *Method = &Methods.Factory; Method; Method = Method->getNext()) - if (Method->getMethod()) + if (ShouldWriteMethodListNode(Method)) ++NumFactoryMethods; unsigned InstanceBits = Methods.Instance.getBits(); @@ -3107,15 +3107,20 @@ class ASTMethodPoolTrait { LE.write(FullFactoryBits); for (const ObjCMethodList *Method = &Methods.Instance; Method; Method = Method->getNext()) - if (Method->getMethod()) + if (ShouldWriteMethodListNode(Method)) LE.write(Writer.getDeclID(Method->getMethod())); for (const ObjCMethodList *Method = &Methods.Factory; Method; Method = Method->getNext()) - if (Method->getMethod()) + if (ShouldWriteMethodListNode(Method)) LE.write(Writer.getDeclID(Method->getMethod())); assert(Out.tell() - Start == DataLen && "Data length is wrong"); } + +private: + static bool ShouldWriteMethodListNode(const ObjCMethodList *Node) { + return (Node->getMethod() && !Node->getMethod()->isFromASTFile()); + } }; } // namespace @@ -3158,15 +3163,21 @@ void ASTWriter::WriteSelectors(Sema &SemaRef) { if (Chain && ID < FirstSelectorID) { // Selector already exists. Did it change? bool changed = false; - for (ObjCMethodList *M = &Data.Instance; - !changed && M && M->getMethod(); M = M->getNext()) { - if (!M->getMethod()->isFromASTFile()) + for (ObjCMethodList *M = &Data.Instance; M && M->getMethod(); + M = M->getNext()) { + if (!M->getMethod()->isFromASTFile()) { changed = true; + Data.Instance = *M; + break; + } } - for (ObjCMethodList *M = &Data.Factory; !changed && M && M->getMethod(); + for (ObjCMethodList *M = &Data.Factory; M && M->getMethod(); M = M->getNext()) { - if (!M->getMethod()->isFromASTFile()) + if (!M->getMethod()->isFromASTFile()) { changed = true; + Data.Factory = *M; + break; + } } if (!changed) continue; @@ -6721,6 +6732,12 @@ void OMPClauseWriter::VisitOMPAffinityClause(OMPAffinityClause *C) { Record.AddStmt(E); } +void OMPClauseWriter::VisitOMPBindClause(OMPBindClause *C) { + Record.writeEnum(C->getBindKind()); + Record.AddSourceLocation(C->getLParenLoc()); + Record.AddSourceLocation(C->getBindKindLoc()); +} + void ASTRecordWriter::writeOMPTraitInfo(const OMPTraitInfo *TI) { writeUInt32(TI->Sets.size()); for (const auto &Set : TI->Sets) { diff --git a/clang/lib/StaticAnalyzer/Core/CallEvent.cpp b/clang/lib/StaticAnalyzer/Core/CallEvent.cpp index 180fa970a3543..43ffcc8f13174 100644 --- a/clang/lib/StaticAnalyzer/Core/CallEvent.cpp +++ b/clang/lib/StaticAnalyzer/Core/CallEvent.cpp @@ -411,7 +411,6 @@ void CallEvent::dump(raw_ostream &Out) const { ASTContext &Ctx = getState()->getStateManager().getContext(); if (const Expr *E = getOriginExpr()) { E->printPretty(Out, nullptr, Ctx.getPrintingPolicy()); - Out << "\n"; return; } diff --git a/clang/lib/StaticAnalyzer/Core/CheckerManager.cpp b/clang/lib/StaticAnalyzer/Core/CheckerManager.cpp index e09399a83589e..94287b7992dd9 100644 --- a/clang/lib/StaticAnalyzer/Core/CheckerManager.cpp +++ b/clang/lib/StaticAnalyzer/Core/CheckerManager.cpp @@ -26,6 +26,7 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/Support/Casting.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/FormatVariadic.h" #include #include @@ -655,7 +656,7 @@ void CheckerManager::runCheckersForEvalCall(ExplodedNodeSet &Dst, ExprEngine &Eng, const EvalCallOptions &CallOpts) { for (auto *const Pred : Src) { - bool anyEvaluated = false; + Optional evaluatorChecker; ExplodedNodeSet checkDst; NodeBuilder B(Pred, checkDst, Eng.getBuilderContext()); @@ -674,10 +675,26 @@ void CheckerManager::runCheckersForEvalCall(ExplodedNodeSet &Dst, CheckerContext C(B, Eng, Pred, L); evaluated = EvalCallChecker(Call, C); } - assert(!(evaluated && anyEvaluated) - && "There are more than one checkers evaluating the call"); +#ifndef NDEBUG + if (evaluated && evaluatorChecker) { + const auto toString = [](const CallEvent &Call) -> std::string { + std::string Buf; + llvm::raw_string_ostream OS(Buf); + Call.dump(OS); + OS.flush(); + return Buf; + }; + std::string AssertionMessage = llvm::formatv( + "The '{0}' call has been already evaluated by the {1} checker, " + "while the {2} checker also tried to evaluate the same call. At " + "most one checker supposed to evaluate a call.", + toString(Call), evaluatorChecker->getName(), + EvalCallChecker.Checker->getCheckerName()); + llvm_unreachable(AssertionMessage.c_str()); + } +#endif if (evaluated) { - anyEvaluated = true; + evaluatorChecker = EvalCallChecker.Checker->getCheckerName(); Dst.insert(checkDst); #ifdef NDEBUG break; // on release don't check that no other checker also evals. @@ -686,7 +703,7 @@ void CheckerManager::runCheckersForEvalCall(ExplodedNodeSet &Dst, } // If none of the checkers evaluated the call, ask ExprEngine to handle it. - if (!anyEvaluated) { + if (!evaluatorChecker) { NodeBuilder B(Pred, Dst, Eng.getBuilderContext()); Eng.defaultEvalCall(B, Pred, Call, CallOpts); } diff --git a/clang/lib/StaticAnalyzer/Core/RegionStore.cpp b/clang/lib/StaticAnalyzer/Core/RegionStore.cpp index 79d2fc76a42f1..135130b35ba70 100644 --- a/clang/lib/StaticAnalyzer/Core/RegionStore.cpp +++ b/clang/lib/StaticAnalyzer/Core/RegionStore.cpp @@ -437,10 +437,13 @@ class RegionStoreManager : public StoreManager { RegionBindingsRef removeSubRegionBindings(RegionBindingsConstRef B, const SubRegion *R); - Optional getConstantValFromConstArrayInitializer( - RegionBindingsConstRef B, const VarRegion *VR, const ElementRegion *R); - Optional getSValFromInitListExpr(const InitListExpr *ILE, - uint64_t Offset, QualType ElemT); + Optional + getConstantValFromConstArrayInitializer(RegionBindingsConstRef B, + const ElementRegion *R); + Optional + getSValFromInitListExpr(const InitListExpr *ILE, + const SmallVector &ConcreteOffsets, + QualType ElemT); SVal getSValFromStringLiteral(const StringLiteral *SL, uint64_t Offset, QualType ElemT); @@ -1631,9 +1634,127 @@ RegionStoreManager::findLazyBinding(RegionBindingsConstRef B, return Result; } +/// This is a helper function for `getConstantValFromConstArrayInitializer`. +/// +/// Return an array of extents of the declared array type. +/// +/// E.g. for `int x[1][2][3];` returns { 1, 2, 3 }. +static SmallVector +getConstantArrayExtents(const ConstantArrayType *CAT) { + assert(CAT && "ConstantArrayType should not be null"); + CAT = cast(CAT->getCanonicalTypeInternal()); + SmallVector Extents; + do { + Extents.push_back(CAT->getSize().getZExtValue()); + } while ((CAT = dyn_cast(CAT->getElementType()))); + return Extents; +} + +/// This is a helper function for `getConstantValFromConstArrayInitializer`. +/// +/// Return an array of offsets from nested ElementRegions and a root base +/// region. The array is never empty and a base region is never null. +/// +/// E.g. for `Element{Element{Element{VarRegion},1},2},3}` returns { 3, 2, 1 }. +/// This represents an access through indirection: `arr[1][2][3];` +/// +/// \param ER The given (possibly nested) ElementRegion. +/// +/// \note The result array is in the reverse order of indirection expression: +/// arr[1][2][3] -> { 3, 2, 1 }. This helps to provide complexity O(n), where n +/// is a number of indirections. It may not affect performance in real-life +/// code, though. +static std::pair, const MemRegion *> +getElementRegionOffsetsWithBase(const ElementRegion *ER) { + assert(ER && "ConstantArrayType should not be null"); + const MemRegion *Base; + SmallVector SValOffsets; + do { + SValOffsets.push_back(ER->getIndex()); + Base = ER->getSuperRegion(); + ER = dyn_cast(Base); + } while (ER); + return {SValOffsets, Base}; +} + +/// This is a helper function for `getConstantValFromConstArrayInitializer`. +/// +/// Convert array of offsets from `SVal` to `uint64_t` in consideration of +/// respective array extents. +/// \param SrcOffsets [in] The array of offsets of type `SVal` in reversed +/// order (expectedly received from `getElementRegionOffsetsWithBase`). +/// \param ArrayExtents [in] The array of extents. +/// \param DstOffsets [out] The array of offsets of type `uint64_t`. +/// \returns: +/// - `None` for successful convertion. +/// - `UndefinedVal` or `UnknownVal` otherwise. It's expected that this SVal +/// will be returned as a suitable value of the access operation. +/// which should be returned as a correct +/// +/// \example: +/// const int arr[10][20][30] = {}; // ArrayExtents { 10, 20, 30 } +/// int x1 = arr[4][5][6]; // SrcOffsets { NonLoc(6), NonLoc(5), NonLoc(4) } +/// // DstOffsets { 4, 5, 6 } +/// // returns None +/// int x2 = arr[42][5][-6]; // returns UndefinedVal +/// int x3 = arr[4][5][x2]; // returns UnknownVal +static Optional +convertOffsetsFromSvalToUnsigneds(const SmallVector &SrcOffsets, + const SmallVector ArrayExtents, + SmallVector &DstOffsets) { + // Check offsets for being out of bounds. + // C++20 [expr.add] 7.6.6.4 (excerpt): + // If P points to an array element i of an array object x with n + // elements, where i < 0 or i > n, the behavior is undefined. + // Dereferencing is not allowed on the "one past the last + // element", when i == n. + // Example: + // const int arr[3][2] = {{1, 2}, {3, 4}}; + // arr[0][0]; // 1 + // arr[0][1]; // 2 + // arr[0][2]; // UB + // arr[1][0]; // 3 + // arr[1][1]; // 4 + // arr[1][-1]; // UB + // arr[2][0]; // 0 + // arr[2][1]; // 0 + // arr[-2][0]; // UB + DstOffsets.resize(SrcOffsets.size()); + auto ExtentIt = ArrayExtents.begin(); + auto OffsetIt = DstOffsets.begin(); + // Reverse `SValOffsets` to make it consistent with `ArrayExtents`. + for (SVal V : llvm::reverse(SrcOffsets)) { + if (auto CI = V.getAs()) { + // When offset is out of array's bounds, result is UB. + const llvm::APSInt &Offset = CI->getValue(); + if (Offset.isNegative() || Offset.uge(*(ExtentIt++))) + return UndefinedVal(); + // Store index in a reversive order. + *(OffsetIt++) = Offset.getZExtValue(); + continue; + } + // Symbolic index presented. Return Unknown value. + // FIXME: We also need to take ElementRegions with symbolic indexes into + // account. + return UnknownVal(); + } + return None; +} + Optional RegionStoreManager::getConstantValFromConstArrayInitializer( - RegionBindingsConstRef B, const VarRegion *VR, const ElementRegion *R) { - assert(R && VR && "Regions should not be null"); + RegionBindingsConstRef B, const ElementRegion *R) { + assert(R && "ElementRegion should not be null"); + + // Treat an n-dimensional array. + SmallVector SValOffsets; + const MemRegion *Base; + std::tie(SValOffsets, Base) = getElementRegionOffsetsWithBase(R); + const VarRegion *VR = dyn_cast(Base); + if (!VR) + return None; + + assert(!SValOffsets.empty() && "getElementRegionOffsets guarantees the " + "offsets vector is not empty."); // Check if the containing array has an initialized value that we can trust. // We can trust a const value or a value of a global initializer in main(). @@ -1664,85 +1785,91 @@ Optional RegionStoreManager::getConstantValFromConstArrayInitializer( if (!CAT) return None; - // Array should be one-dimensional. - // TODO: Support multidimensional array. - if (isa(CAT->getElementType())) // is multidimensional - return None; + // Get array extents. + SmallVector Extents = getConstantArrayExtents(CAT); - // Array's offset should be a concrete value. - // Return Unknown value if symbolic index presented. - // FIXME: We also need to take ElementRegions with symbolic - // indexes into account. - const auto OffsetVal = R->getIndex().getAs(); - if (!OffsetVal.hasValue()) - return UnknownVal(); + // The number of offsets should equal to the numbers of extents, + // otherwise wrong type punning occured. For instance: + // int arr[1][2][3]; + // auto ptr = (int(*)[42])arr; + // auto x = ptr[4][2]; // UB + // FIXME: Should return UndefinedVal. + if (SValOffsets.size() != Extents.size()) + return None; - // Check offset for being out of bounds. - // C++20 [expr.add] 7.6.6.4 (excerpt): - // If P points to an array element i of an array object x with n - // elements, where i < 0 or i > n, the behavior is undefined. - // Dereferencing is not allowed on the "one past the last - // element", when i == n. - // Example: - // const int arr[4] = {1, 2}; - // const int *ptr = arr; - // int x0 = ptr[0]; // 1 - // int x1 = ptr[1]; // 2 - // int x2 = ptr[2]; // 0 - // int x3 = ptr[3]; // 0 - // int x4 = ptr[4]; // UB - // int x5 = ptr[-1]; // UB - const llvm::APSInt &OffsetInt = OffsetVal->getValue(); - const auto Offset = static_cast(OffsetInt.getExtValue()); - // Use `getZExtValue` because array extent can not be negative. - const uint64_t Extent = CAT->getSize().getZExtValue(); - // Check for `OffsetInt < 0` but NOT for `Offset < 0`, because `OffsetInt` - // CAN be negative, but `Offset` can NOT, because `Offset` is an uint64_t. - if (OffsetInt < 0 || Offset >= Extent) - return UndefinedVal(); - // From here `Offset` is in the bounds. + SmallVector ConcreteOffsets; + if (Optional V = convertOffsetsFromSvalToUnsigneds(SValOffsets, Extents, + ConcreteOffsets)) + return *V; // Handle InitListExpr. // Example: - // const char arr[] = { 1, 2, 3 }; + // const char arr[4][2] = { { 1, 2 }, { 3 }, 4, 5 }; if (const auto *ILE = dyn_cast(Init)) - return getSValFromInitListExpr(ILE, Offset, R->getElementType()); + return getSValFromInitListExpr(ILE, ConcreteOffsets, R->getElementType()); // Handle StringLiteral. // Example: // const char arr[] = "abc"; if (const auto *SL = dyn_cast(Init)) - return getSValFromStringLiteral(SL, Offset, R->getElementType()); + return getSValFromStringLiteral(SL, ConcreteOffsets.front(), + R->getElementType()); // FIXME: Handle CompoundLiteralExpr. return None; } -Optional -RegionStoreManager::getSValFromInitListExpr(const InitListExpr *ILE, - uint64_t Offset, QualType ElemT) { +/// Returns an SVal, if possible, for the specified position of an +/// initialization list. +/// +/// \param ILE The given initialization list. +/// \param Offsets The array of unsigned offsets. E.g. for the expression +/// `int x = arr[1][2][3];` an array should be { 1, 2, 3 }. +/// \param ElemT The type of the result SVal expression. +/// \return Optional SVal for the particular position in the initialization +/// list. E.g. for the list `{{1, 2},[3, 4],{5, 6}, {}}` offsets: +/// - {1, 1} returns SVal{4}, because it's the second position in the second +/// sublist; +/// - {3, 0} returns SVal{0}, because there's no explicit value at this +/// position in the sublist. +/// +/// NOTE: Inorder to get a valid SVal, a caller shall guarantee valid offsets +/// for the given initialization list. Otherwise SVal can be an equivalent to 0 +/// or lead to assertion. +Optional RegionStoreManager::getSValFromInitListExpr( + const InitListExpr *ILE, const SmallVector &Offsets, + QualType ElemT) { assert(ILE && "InitListExpr should not be null"); - // C++20 [dcl.init.string] 9.4.2.1: - // An array of ordinary character type [...] can be initialized by [...] - // an appropriately-typed string-literal enclosed in braces. - // Example: - // const char arr[] = { "abc" }; - if (ILE->isStringLiteralInit()) - if (const auto *SL = dyn_cast(ILE->getInit(0))) - return getSValFromStringLiteral(SL, Offset, ElemT); + for (uint64_t Offset : Offsets) { + // C++20 [dcl.init.string] 9.4.2.1: + // An array of ordinary character type [...] can be initialized by [...] + // an appropriately-typed string-literal enclosed in braces. + // Example: + // const char arr[] = { "abc" }; + if (ILE->isStringLiteralInit()) + if (const auto *SL = dyn_cast(ILE->getInit(0))) + return getSValFromStringLiteral(SL, Offset, ElemT); - // C++20 [expr.add] 9.4.17.5 (excerpt): - // i-th array element is value-initialized for each k < i ≤ n, - // where k is an expression-list size and n is an array extent. - if (Offset >= ILE->getNumInits()) - return svalBuilder.makeZeroVal(ElemT); + // C++20 [expr.add] 9.4.17.5 (excerpt): + // i-th array element is value-initialized for each k < i ≤ n, + // where k is an expression-list size and n is an array extent. + if (Offset >= ILE->getNumInits()) + return svalBuilder.makeZeroVal(ElemT); - // Return a constant value, if it is presented. - // FIXME: Support other SVals. - const Expr *E = ILE->getInit(Offset); - return svalBuilder.getConstantVal(E); + const Expr *E = ILE->getInit(Offset); + const auto *IL = dyn_cast(E); + if (!IL) + // Return a constant value, if it is presented. + // FIXME: Support other SVals. + return svalBuilder.getConstantVal(E); + + // Go to the nested initializer list. + ILE = IL; + } + llvm_unreachable( + "Unhandled InitListExpr sub-expressions or invalid offsets."); } /// Returns an SVal, if possible, for the specified position in a string @@ -1804,8 +1931,8 @@ SVal RegionStoreManager::getBindingForElement(RegionBindingsConstRef B, const StringLiteral *SL = StrR->getStringLiteral(); return getSValFromStringLiteral(SL, Idx.getZExtValue(), T); } - } else if (const VarRegion *VR = dyn_cast(superR)) { - if (Optional V = getConstantValFromConstArrayInitializer(B, VR, R)) + } else if (isa(superR)) { + if (Optional V = getConstantValFromConstArrayInitializer(B, R)) return *V; } diff --git a/clang/test/ARCMT/whitelisted/Inputs/header1.h b/clang/test/ARCMT/allowlisted/Inputs/header1.h similarity index 100% rename from clang/test/ARCMT/whitelisted/Inputs/header1.h rename to clang/test/ARCMT/allowlisted/Inputs/header1.h diff --git a/clang/test/ARCMT/whitelisted/header1.h b/clang/test/ARCMT/allowlisted/header1.h similarity index 100% rename from clang/test/ARCMT/whitelisted/header1.h rename to clang/test/ARCMT/allowlisted/header1.h diff --git a/clang/test/ARCMT/whitelisted/header1.h.result b/clang/test/ARCMT/allowlisted/header1.h.result similarity index 100% rename from clang/test/ARCMT/whitelisted/header1.h.result rename to clang/test/ARCMT/allowlisted/header1.h.result diff --git a/clang/test/ARCMT/whitelisted/header2.h b/clang/test/ARCMT/allowlisted/header2.h similarity index 100% rename from clang/test/ARCMT/whitelisted/header2.h rename to clang/test/ARCMT/allowlisted/header2.h diff --git a/clang/test/ARCMT/whitelisted/header2.h.result b/clang/test/ARCMT/allowlisted/header2.h.result similarity index 100% rename from clang/test/ARCMT/whitelisted/header2.h.result rename to clang/test/ARCMT/allowlisted/header2.h.result diff --git a/clang/test/ARCMT/whitelisted/objcmt-with-whitelist-impl.m b/clang/test/ARCMT/allowlisted/objcmt-with-allowlist-impl.m similarity index 75% rename from clang/test/ARCMT/whitelisted/objcmt-with-whitelist-impl.m rename to clang/test/ARCMT/allowlisted/objcmt-with-allowlist-impl.m index d734eaa10bdfd..36797eb1c843b 100644 --- a/clang/test/ARCMT/whitelisted/objcmt-with-whitelist-impl.m +++ b/clang/test/ARCMT/allowlisted/objcmt-with-allowlist-impl.m @@ -1,5 +1,5 @@ // RUN: rm -rf %t -// RUN: %clang_cc1 -objcmt-migrate-readwrite-property -objcmt-migrate-instancetype -objcmt-white-list-dir-path=%S/Inputs %s -triple x86_64-apple-darwin11 -migrate -o %t.remap +// RUN: %clang_cc1 -objcmt-migrate-readwrite-property -objcmt-migrate-instancetype -objcmt-allowlist-dir-path=%S/Inputs %s -triple x86_64-apple-darwin11 -migrate -o %t.remap // RUN: c-arcmt-test %t.remap | arcmt-test -verify-transformed-files %S/header1.h.result %s.result @interface NSObject diff --git a/clang/test/ARCMT/whitelisted/objcmt-with-whitelist-impl.m.result b/clang/test/ARCMT/allowlisted/objcmt-with-allowlist-impl.m.result similarity index 75% rename from clang/test/ARCMT/whitelisted/objcmt-with-whitelist-impl.m.result rename to clang/test/ARCMT/allowlisted/objcmt-with-allowlist-impl.m.result index b936b52b729b1..69e7b767ef2b7 100644 --- a/clang/test/ARCMT/whitelisted/objcmt-with-whitelist-impl.m.result +++ b/clang/test/ARCMT/allowlisted/objcmt-with-allowlist-impl.m.result @@ -1,5 +1,5 @@ // RUN: rm -rf %t -// RUN: %clang_cc1 -objcmt-migrate-readwrite-property -objcmt-migrate-instancetype -objcmt-white-list-dir-path=%S/Inputs %s -triple x86_64-apple-darwin11 -migrate -o %t.remap +// RUN: %clang_cc1 -objcmt-migrate-readwrite-property -objcmt-migrate-instancetype -objcmt-allowlist-dir-path=%S/Inputs %s -triple x86_64-apple-darwin11 -migrate -o %t.remap // RUN: c-arcmt-test %t.remap | arcmt-test -verify-transformed-files %S/header1.h.result %s.result @interface NSObject diff --git a/clang/test/ARCMT/whitelisted/objcmt-with-whitelist.m b/clang/test/ARCMT/allowlisted/objcmt-with-allowlist.m similarity index 79% rename from clang/test/ARCMT/whitelisted/objcmt-with-whitelist.m rename to clang/test/ARCMT/allowlisted/objcmt-with-allowlist.m index 0ea714fe59374..c22c02d3bf2e4 100644 --- a/clang/test/ARCMT/whitelisted/objcmt-with-whitelist.m +++ b/clang/test/ARCMT/allowlisted/objcmt-with-allowlist.m @@ -1,7 +1,7 @@ // RUN: rm -rf %t // RUN: %clang_cc1 -objcmt-migrate-readwrite-property -objcmt-migrate-instancetype -objcmt-migrate-ns-macros %s -triple x86_64-apple-darwin11 -migrate -o %t.remap // RUN: c-arcmt-test %t.remap | arcmt-test -verify-transformed-files %S/header1.h.result %S/header2.h.result -// RUN: %clang_cc1 -objcmt-migrate-readwrite-property -objcmt-migrate-instancetype -objcmt-migrate-ns-macros -objcmt-white-list-dir-path=%S/Inputs %s -triple x86_64-apple-darwin11 -migrate -o %t.remap +// RUN: %clang_cc1 -objcmt-migrate-readwrite-property -objcmt-migrate-instancetype -objcmt-migrate-ns-macros -objcmt-allowlist-dir-path=%S/Inputs %s -triple x86_64-apple-darwin11 -migrate -o %t.remap // RUN: c-arcmt-test %t.remap | arcmt-test -verify-transformed-files %S/header1.h.result @interface NSObject diff --git a/clang/test/AST/Inputs/std-coroutine-exp-namespace.h b/clang/test/AST/Inputs/std-coroutine-exp-namespace.h new file mode 100644 index 0000000000000..8234a19cba139 --- /dev/null +++ b/clang/test/AST/Inputs/std-coroutine-exp-namespace.h @@ -0,0 +1,71 @@ +// RUN: %clang_cc1 -triple x86_64-apple-darwin9 %s -std=c++14 -fcoroutines-ts -fsyntax-only -Wignored-qualifiers -Wno-error=return-type -verify -fblocks -Wno-unreachable-code -Wno-unused-value +#ifndef STD_COROUTINE_H +#define STD_COROUTINE_H + +namespace std { +namespace experimental { +template struct coroutine_traits { + using promise_type = typename R::promise_type; +}; + +template struct coroutine_handle; + +template <> struct coroutine_handle { + static coroutine_handle from_address(void *addr) noexcept { + coroutine_handle me; + me.ptr = addr; + return me; + } + void operator()() { resume(); } + void *address() const noexcept { return ptr; } + void resume() const { __builtin_coro_resume(ptr); } + void destroy() const { __builtin_coro_destroy(ptr); } + bool done() const { return __builtin_coro_done(ptr); } + coroutine_handle &operator=(decltype(nullptr)) { + ptr = nullptr; + return *this; + } + coroutine_handle(decltype(nullptr)) : ptr(nullptr) {} + coroutine_handle() : ptr(nullptr) {} + // void reset() { ptr = nullptr; } // add to P0057? + explicit operator bool() const { return ptr; } + +protected: + void *ptr; +}; + +template struct coroutine_handle : coroutine_handle<> { + using coroutine_handle<>::operator=; + + static coroutine_handle from_address(void *addr) noexcept { + coroutine_handle me; + me.ptr = addr; + return me; + } + + Promise &promise() const { + return *reinterpret_cast( + __builtin_coro_promise(ptr, alignof(Promise), false)); + } + static coroutine_handle from_promise(Promise &promise) { + coroutine_handle p; + p.ptr = __builtin_coro_promise(&promise, alignof(Promise), true); + return p; + } +}; + +struct suspend_always { + bool await_ready() { return false; } + void await_suspend(coroutine_handle<>) {} + void await_resume() {} +}; + +struct suspend_never { + bool await_ready() noexcept { return true; } + void await_suspend(coroutine_handle<>) noexcept {} + void await_resume() noexcept {} +}; +} // namespace experimental +} // namespace std + +#endif // STD_COROUTINE_H diff --git a/clang/test/AST/Inputs/std-coroutine.h b/clang/test/AST/Inputs/std-coroutine.h index 5a1498f00494c..98ddb91a60082 100644 --- a/clang/test/AST/Inputs/std-coroutine.h +++ b/clang/test/AST/Inputs/std-coroutine.h @@ -1,9 +1,8 @@ -// RUN: %clang_cc1 -triple x86_64-apple-darwin9 %s -std=c++14 -fcoroutines-ts -fsyntax-only -Wignored-qualifiers -Wno-error=return-type -verify -fblocks -Wno-unreachable-code -Wno-unused-value +// RUN: %clang_cc1 -triple x86_64-apple-darwin9 %s -std=c++20 -fcoroutines-ts -fsyntax-only -Wignored-qualifiers -Wno-error=return-type -verify -fblocks -Wno-unreachable-code -Wno-unused-value #ifndef STD_COROUTINE_H #define STD_COROUTINE_H namespace std { -namespace experimental { template struct coroutine_traits { using promise_type = typename R::promise_type; @@ -67,7 +66,6 @@ struct suspend_never { void await_resume() noexcept {} }; -} // namespace experimental } // namespace std #endif // STD_COROUTINE_H diff --git a/clang/test/AST/ast-dump-decl.cpp b/clang/test/AST/ast-dump-decl.cpp index 0ff947ae3b483..acbe5207f174b 100644 --- a/clang/test/AST/ast-dump-decl.cpp +++ b/clang/test/AST/ast-dump-decl.cpp @@ -321,7 +321,7 @@ namespace testClassTemplateDecl { // CHECK-NEXT: | |-TemplateArgument type 'testClassTemplateDecl::A' // CHECK-NEXT: | | `-RecordType 0{{.+}} 'testClassTemplateDecl::A' // CHECK-NEXT: | | `-CXXRecord 0x{{.+}} 'A' -// CHECK-NEXT: | |-CXXRecordDecl 0x{{.+}} prev 0x{{.+}} col:30 implicit class TestClassTemplate +// CHECK-NEXT: | |-CXXRecordDecl 0x{{.+}} col:30 implicit class TestClassTemplate // CHECK-NEXT: | |-AccessSpecDecl 0x{{.+}} col:3 public // CHECK-NEXT: | |-CXXConstructorDecl 0x{{.+}} col:5 used TestClassTemplate 'void ()' // CHECK-NEXT: | |-CXXDestructorDecl 0x{{.+}} col:5 used ~TestClassTemplate 'void () noexcept' @@ -358,7 +358,7 @@ namespace testClassTemplateDecl { // CHECK-NEXT: |-TemplateArgument type 'testClassTemplateDecl::C' // CHECK-NEXT: | `-RecordType 0{{.+}} 'testClassTemplateDecl::C' // CHECK-NEXT: | `-CXXRecord 0x{{.+}} 'C' -// CHECK-NEXT: |-CXXRecordDecl 0x{{.+}} prev 0x{{.+}} col:30 implicit class TestClassTemplate +// CHECK-NEXT: |-CXXRecordDecl 0x{{.+}} col:30 implicit class TestClassTemplate // CHECK-NEXT: |-AccessSpecDecl 0x{{.+}} col:3 public // CHECK-NEXT: |-CXXConstructorDecl 0x{{.+}} col:5 TestClassTemplate 'void ()' // CHECK-NEXT: |-CXXDestructorDecl 0x{{.+}} col:5 ~TestClassTemplate 'void ()' noexcept-unevaluated 0x{{.+}} @@ -376,7 +376,7 @@ namespace testClassTemplateDecl { // CHECK-NEXT: |-TemplateArgument type 'testClassTemplateDecl::D' // CHECK-NEXT: | `-RecordType 0{{.+}} 'testClassTemplateDecl::D' // CHECK-NEXT: | `-CXXRecord 0x{{.+}} 'D' -// CHECK-NEXT: |-CXXRecordDecl 0x{{.+}} prev 0x{{.+}} col:30 implicit class TestClassTemplate +// CHECK-NEXT: |-CXXRecordDecl 0x{{.+}} col:30 implicit class TestClassTemplate // CHECK-NEXT: |-AccessSpecDecl 0x{{.+}} col:3 public // CHECK-NEXT: |-CXXConstructorDecl 0x{{.+}} col:5 TestClassTemplate 'void ()' // CHECK-NEXT: |-CXXDestructorDecl 0x{{.+}} col:5 ~TestClassTemplate 'void ()' noexcept-unevaluated 0x{{.+}} @@ -519,7 +519,7 @@ namespace testCanonicalTemplate { // CHECK-NEXT: |-TemplateArgument type 'testCanonicalTemplate::A' // CHECK-NEXT: | `-RecordType 0x{{.+}} 'testCanonicalTemplate::A' // CHECK-NEXT: | `-CXXRecord 0x{{.+}} 'A' - // CHECK-NEXT: |-CXXRecordDecl 0x{{.+}} prev 0x{{.+}} col:31 implicit class TestClassTemplate + // CHECK-NEXT: |-CXXRecordDecl 0x{{.+}} col:31 implicit class TestClassTemplate // CHECK-NEXT: |-FriendDecl 0x{{.+}} col:40 // CHECK-NEXT: | `-ClassTemplateDecl 0x{{.+}} parent 0x{{.+}} prev 0x{{.+}} col:40 TestClassTemplate // CHECK-NEXT: | |-TemplateTypeParmDecl 0x{{.+}} col:23 typename depth 0 index 0 T2 @@ -552,7 +552,7 @@ namespace testCanonicalTemplate { // CHECK-NEXT: |-TemplateArgument type 'testCanonicalTemplate::A' // CHECK-NEXT: | `-RecordType 0x{{.+}} 'testCanonicalTemplate::A' // CHECK-NEXT: | `-CXXRecord 0x{{.+}} 'A' - // CHECK-NEXT: |-CXXRecordDecl 0x{{.+}} prev 0x{{.+}} col:31 implicit class TestClassTemplate2 + // CHECK-NEXT: |-CXXRecordDecl 0x{{.+}} col:31 implicit class TestClassTemplate2 // CHECK-NEXT: |-CXXConstructorDecl 0x{{.+}} col:31 implicit used constexpr TestClassTemplate2 'void () noexcept' inline default trivial // CHECK-NEXT: | `-CompoundStmt 0x{{.+}} // CHECK-NEXT: |-CXXConstructorDecl 0x{{.+}} col:31 implicit constexpr TestClassTemplate2 'void (const testCanonicalTemplate::TestClassTemplate2 &)' inline default trivial noexcept-unevaluated 0x{{.+}} diff --git a/clang/test/AST/ast-dump-decl.mm b/clang/test/AST/ast-dump-decl.mm index 16ca27e3b139c..69ff46101da1e 100644 --- a/clang/test/AST/ast-dump-decl.mm +++ b/clang/test/AST/ast-dump-decl.mm @@ -1,6 +1,8 @@ // Test without serialization: // RUN: %clang_cc1 -Wno-unused -fblocks -ast-dump -ast-dump-filter Test %s \ // RUN: | FileCheck --strict-whitespace %s +// RUN: %clang_cc1 -Wno-unused -fblocks -ast-dump -triple i386-windows-pc -ast-dump-filter Test %s \ +// RUN: | FileCheck --strict-whitespace %s // // Test with serialization: // RUN: %clang_cc1 -Wno-unused -fblocks -emit-pch -o %t %s @@ -55,7 +57,7 @@ void f() { // CHECK-NEXT: CXXThisExpr {{.*}} 'Test *' this } void yada(); - // CHECK: CXXMethodDecl {{.*}} col:8 used yada 'void ()' + // CHECK: CXXMethodDecl {{.*}} col:8 used yada 'void (){{.*}}' }; @protocol P diff --git a/clang/test/AST/ast-dump-openmp-begin-declare-variant_reference.cpp b/clang/test/AST/ast-dump-openmp-begin-declare-variant_reference.cpp index 6fa8cdc59bb40..35ec1cbfd8f7e 100644 --- a/clang/test/AST/ast-dump-openmp-begin-declare-variant_reference.cpp +++ b/clang/test/AST/ast-dump-openmp-begin-declare-variant_reference.cpp @@ -120,7 +120,7 @@ int test(float &&f, short &&s) { // CHECK-NEXT: | | |-TemplateArgument type 'float &' // CHECK-NEXT: | | | `-LValueReferenceType [[ADDR_7:0x[a-z0-9]*]] 'float &' // CHECK-NEXT: | | | `-BuiltinType [[ADDR_8:0x[a-z0-9]*]] 'float' -// CHECK-NEXT: | | |-CXXRecordDecl [[ADDR_9:0x[a-z0-9]*]] prev [[ADDR_6]] col:29 implicit struct remove_reference +// CHECK-NEXT: | | |-CXXRecordDecl [[ADDR_9:0x[a-z0-9]*]] col:29 implicit struct remove_reference // CHECK-NEXT: | | `-TypedefDecl [[ADDR_10:0x[a-z0-9]*]] col:67 referenced type 'float':'float' // CHECK-NEXT: | | `-SubstTemplateTypeParmType [[ADDR_11:0x[a-z0-9]*]] 'float' sugar // CHECK-NEXT: | | |-TemplateTypeParmType [[ADDR_12:0x[a-z0-9]*]] '_Tp' dependent depth 0 index 0 @@ -137,7 +137,7 @@ int test(float &&f, short &&s) { // CHECK-NEXT: | |-TemplateArgument type 'short &' // CHECK-NEXT: | | `-LValueReferenceType [[ADDR_15:0x[a-z0-9]*]] 'short &' // CHECK-NEXT: | | `-BuiltinType [[ADDR_16:0x[a-z0-9]*]] 'short' -// CHECK-NEXT: | |-CXXRecordDecl [[ADDR_17:0x[a-z0-9]*]] prev [[ADDR_14]] col:29 implicit struct remove_reference +// CHECK-NEXT: | |-CXXRecordDecl [[ADDR_17:0x[a-z0-9]*]] col:29 implicit struct remove_reference // CHECK-NEXT: | `-TypedefDecl [[ADDR_18:0x[a-z0-9]*]] col:67 referenced type 'short':'short' // CHECK-NEXT: | `-SubstTemplateTypeParmType [[ADDR_19:0x[a-z0-9]*]] 'short' sugar // CHECK-NEXT: | |-TemplateTypeParmType [[ADDR_12]] '_Tp' dependent depth 0 index 0 diff --git a/clang/test/AST/ast-dump-openmp-begin-declare-variant_template_3.cpp b/clang/test/AST/ast-dump-openmp-begin-declare-variant_template_3.cpp index 153764490c0dd..88c47b213cfbf 100644 --- a/clang/test/AST/ast-dump-openmp-begin-declare-variant_template_3.cpp +++ b/clang/test/AST/ast-dump-openmp-begin-declare-variant_template_3.cpp @@ -69,7 +69,7 @@ int test() { // CHECK-NEXT: | | | `-Destructor simple irrelevant trivial // CHECK-NEXT: | | |-TemplateArgument type 'int' // CHECK-NEXT: | | | `-BuiltinType [[ADDR_9:0x[a-z0-9]*]] 'int' -// CHECK-NEXT: | | |-CXXRecordDecl [[ADDR_10:0x[a-z0-9]*]] prev [[ADDR_8]] col:30 implicit struct S +// CHECK-NEXT: | | |-CXXRecordDecl [[ADDR_10:0x[a-z0-9]*]] col:30 implicit struct S // CHECK-NEXT: | | |-CXXConstructorDecl [[ADDR_11:0x[a-z0-9]*]] col:3 used S 'void (int, int *)' // CHECK-NEXT: | | | |-ParmVarDecl [[ADDR_12:0x[a-z0-9]*]] col:8 'int' // CHECK-NEXT: | | | |-ParmVarDecl [[ADDR_13:0x[a-z0-9]*]] col:13 'int *' diff --git a/clang/test/AST/coroutine-locals-cleanup-exp-namespace.cpp b/clang/test/AST/coroutine-locals-cleanup-exp-namespace.cpp new file mode 100644 index 0000000000000..048c6778bd05a --- /dev/null +++ b/clang/test/AST/coroutine-locals-cleanup-exp-namespace.cpp @@ -0,0 +1,107 @@ +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fcoroutines-ts -std=c++14 -fsyntax-only -ast-dump %s | FileCheck %s + +#include "Inputs/std-coroutine-exp-namespace.h" + +using namespace std::experimental; + +struct Task { + struct promise_type { + Task get_return_object() noexcept { + return Task{coroutine_handle::from_promise(*this)}; + } + + void return_void() noexcept {} + + struct final_awaiter { + bool await_ready() noexcept { return false; } + coroutine_handle<> await_suspend(coroutine_handle h) noexcept { + h.destroy(); + return {}; + } + void await_resume() noexcept {} + }; + + void unhandled_exception() noexcept {} + + final_awaiter final_suspend() noexcept { return {}; } + + suspend_always initial_suspend() noexcept { return {}; } + + template + auto await_transform(Awaitable &&awaitable) { + return awaitable.co_viaIfAsync(); + } + }; + + using handle_t = coroutine_handle; + + class Awaiter { + public: + explicit Awaiter(handle_t coro) noexcept; + Awaiter(Awaiter &&other) noexcept; + Awaiter(const Awaiter &) = delete; + ~Awaiter(); + + bool await_ready() noexcept { return false; } + handle_t await_suspend(coroutine_handle<> continuation) noexcept; + void await_resume(); + + private: + handle_t coro_; + }; + + Task(handle_t coro) noexcept : coro_(coro) {} + + handle_t coro_; + + Task(const Task &t) = delete; + Task(Task &&t) noexcept; + ~Task(); + Task &operator=(Task t) noexcept; + + Awaiter co_viaIfAsync(); +}; + +static Task foo() { + co_return; +} + +Task bar() { + auto mode = 2; + switch (mode) { + case 1: + co_await foo(); + break; + case 2: + co_await foo(); + break; + default: + break; + } +} + +// CHECK-LABEL: FunctionDecl {{.*}} bar 'Task ()' +// CHECK: SwitchStmt +// CHECK: CaseStmt +// CHECK: ExprWithCleanups {{.*}} 'void' +// CHECK-NEXT: CoawaitExpr +// CHECK-NEXT: MaterializeTemporaryExpr {{.*}} 'Task::Awaiter':'Task::Awaiter' +// CHECK: ExprWithCleanups {{.*}} 'bool' +// CHECK-NEXT: CXXMemberCallExpr {{.*}} 'bool' +// CHECK-NEXT: MemberExpr {{.*}} .await_ready +// CHECK: CallExpr {{.*}} 'void' +// CHECK-NEXT: ImplicitCastExpr {{.*}} 'void (*)(void *)' +// CHECK-NEXT: DeclRefExpr {{.*}} '__builtin_coro_resume' 'void (void *)' +// CHECK-NEXT: ExprWithCleanups {{.*}} 'void *' + +// CHECK: CaseStmt +// CHECK: ExprWithCleanups {{.*}} 'void' +// CHECK-NEXT: CoawaitExpr +// CHECK-NEXT: MaterializeTemporaryExpr {{.*}} 'Task::Awaiter':'Task::Awaiter' +// CHECK: ExprWithCleanups {{.*}} 'bool' +// CHECK-NEXT: CXXMemberCallExpr {{.*}} 'bool' +// CHECK-NEXT: MemberExpr {{.*}} .await_ready +// CHECK: CallExpr {{.*}} 'void' +// CHECK-NEXT: ImplicitCastExpr {{.*}} 'void (*)(void *)' +// CHECK-NEXT: DeclRefExpr {{.*}} '__builtin_coro_resume' 'void (void *)' +// CHECK-NEXT: ExprWithCleanups {{.*}} 'void *' diff --git a/clang/test/AST/coroutine-locals-cleanup.cpp b/clang/test/AST/coroutine-locals-cleanup.cpp index 5e993ad323f87..4e2fe6275de7d 100644 --- a/clang/test/AST/coroutine-locals-cleanup.cpp +++ b/clang/test/AST/coroutine-locals-cleanup.cpp @@ -1,8 +1,8 @@ -// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fcoroutines-ts -std=c++14 -fsyntax-only -ast-dump %s | FileCheck %s +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -std=c++20 -fsyntax-only -ast-dump %s | FileCheck %s #include "Inputs/std-coroutine.h" -using namespace std::experimental; +using namespace std; struct Task { struct promise_type { diff --git a/clang/test/AST/coroutine-source-location-crash-exp-namespace.cpp b/clang/test/AST/coroutine-source-location-crash-exp-namespace.cpp new file mode 100644 index 0000000000000..9995dee542e9c --- /dev/null +++ b/clang/test/AST/coroutine-source-location-crash-exp-namespace.cpp @@ -0,0 +1,42 @@ +// Test without serialization: +// RUN: %clang_cc1 -triple x86_64-apple-darwin9 %s -std=c++14 -fcoroutines-ts \ +// RUN: -fsyntax-only -ast-dump | FileCheck %s +// +// Test with serialization: +// RUN: %clang_cc1 -triple x86_64-apple-darwin9 -std=c++14 -fcoroutines-ts -emit-pch -o %t %s +// RUN: %clang_cc1 -x c++ -triple x86_64-apple-darwin9 -std=c++14 -fcoroutines-ts -include-pch %t \ +// RUN: -ast-dump-all /dev/null \ +// RUN: | sed -e "s/ //" -e "s/ imported//" \ +// RUN: | FileCheck %s + +#include "Inputs/std-coroutine-exp-namespace.h" + +using namespace std::experimental; + +struct A { + bool await_ready(); + void await_resume(); + template + void await_suspend(F); +}; + +struct coro_t { + struct promise_type { + coro_t get_return_object(); + suspend_never initial_suspend(); + suspend_never final_suspend() noexcept; + void return_void(); + static void unhandled_exception(); + }; +}; + +// {{0x[0-9a-fA-F]+}} +// CHECK-LABEL: FunctionDecl {{.*}} f 'coro_t (int)' +coro_t f(int n) { + A a{}; + // CHECK: CoawaitExpr {{0x[0-9a-fA-F]+}} + // CHECK-NEXT: DeclRefExpr {{0x[0-9a-fA-F]+}} + // CHECK-NEXT: CXXMemberCallExpr {{0x[0-9a-fA-F]+}} + // CHECK-NEXT: MemberExpr {{0x[0-9a-fA-F]+}} + co_await a; +} diff --git a/clang/test/AST/coroutine-source-location-crash.cpp b/clang/test/AST/coroutine-source-location-crash.cpp index 6c0184d2076d4..9b18dc817fb5b 100644 --- a/clang/test/AST/coroutine-source-location-crash.cpp +++ b/clang/test/AST/coroutine-source-location-crash.cpp @@ -1,17 +1,17 @@ // Test without serialization: -// RUN: %clang_cc1 -triple x86_64-apple-darwin9 %s -std=c++14 -fcoroutines-ts \ +// RUN: %clang_cc1 -triple x86_64-apple-darwin9 %s -std=c++20 \ // RUN: -fsyntax-only -ast-dump | FileCheck %s // // Test with serialization: -// RUN: %clang_cc1 -triple x86_64-apple-darwin9 -std=c++14 -fcoroutines-ts -emit-pch -o %t %s -// RUN: %clang_cc1 -x c++ -triple x86_64-apple-darwin9 -std=c++14 -fcoroutines-ts -include-pch %t \ +// RUN: %clang_cc1 -triple x86_64-apple-darwin9 -std=c++20 -emit-pch -o %t %s +// RUN: %clang_cc1 -x c++ -triple x86_64-apple-darwin9 -std=c++20 -include-pch %t \ // RUN: -ast-dump-all /dev/null \ // RUN: | sed -e "s/ //" -e "s/ imported//" \ // RUN: | FileCheck %s #include "Inputs/std-coroutine.h" -using namespace std::experimental; +using namespace std; struct A { bool await_ready(); diff --git a/clang/test/Analysis/initialization.c b/clang/test/Analysis/initialization.c index 9015113f86404..5900388b55de2 100644 --- a/clang/test/Analysis/initialization.c +++ b/clang/test/Analysis/initialization.c @@ -58,44 +58,35 @@ void glob_invalid_index2() { int res = ptr[x]; // expected-warning{{garbage or undefined}} } -// TODO: Support multidimensional array. const int glob_arr2[3][3] = {[0][0] = 1, [1][1] = 5, [2][0] = 7}; void glob_arr_index3() { - // FIXME: These all should be TRUE. - clang_analyzer_eval(glob_arr2[0][0] == 1); // expected-warning{{UNKNOWN}} - clang_analyzer_eval(glob_arr2[0][1] == 0); // expected-warning{{UNKNOWN}} - clang_analyzer_eval(glob_arr2[0][2] == 0); // expected-warning{{UNKNOWN}} - clang_analyzer_eval(glob_arr2[1][0] == 0); // expected-warning{{UNKNOWN}} - clang_analyzer_eval(glob_arr2[1][1] == 5); // expected-warning{{UNKNOWN}} - clang_analyzer_eval(glob_arr2[1][2] == 0); // expected-warning{{UNKNOWN}} - clang_analyzer_eval(glob_arr2[2][0] == 7); // expected-warning{{UNKNOWN}} - clang_analyzer_eval(glob_arr2[2][1] == 0); // expected-warning{{UNKNOWN}} - clang_analyzer_eval(glob_arr2[2][2] == 0); // expected-warning{{UNKNOWN}} -} - -// TODO: Support multidimensional array. + clang_analyzer_eval(glob_arr2[0][0] == 1); // expected-warning{{TRUE}} + clang_analyzer_eval(glob_arr2[0][1] == 0); // expected-warning{{TRUE}} + clang_analyzer_eval(glob_arr2[0][2] == 0); // expected-warning{{TRUE}} + clang_analyzer_eval(glob_arr2[1][0] == 0); // expected-warning{{TRUE}} + clang_analyzer_eval(glob_arr2[1][1] == 5); // expected-warning{{TRUE}} + clang_analyzer_eval(glob_arr2[1][2] == 0); // expected-warning{{TRUE}} + clang_analyzer_eval(glob_arr2[2][0] == 7); // expected-warning{{TRUE}} + clang_analyzer_eval(glob_arr2[2][1] == 0); // expected-warning{{TRUE}} + clang_analyzer_eval(glob_arr2[2][2] == 0); // expected-warning{{TRUE}} +} + void negative_index() { int x = 2, y = -2; - // FIXME: Should be UNDEFINED. - clang_analyzer_eval(glob_arr2[x][y] == 5); // expected-warning{{UNKNOWN}} + clang_analyzer_eval(glob_arr2[x][y] == 5); // expected-warning{{UNDEFINED}} x = 3; y = -3; - // FIXME: Should be UNDEFINED. - clang_analyzer_eval(glob_arr2[x][y] == 7); // expected-warning{{UNKNOWN}} + clang_analyzer_eval(glob_arr2[x][y] == 7); // expected-warning{{UNDEFINED}} } -// TODO: Support multidimensional array. void glob_invalid_index3() { int x = -1, y = -1; - // FIXME: Should warn {{garbage or undefined}}. - int res = glob_arr2[x][y]; // no-warning + int res = glob_arr2[x][y]; // expected-warning{{garbage or undefined}} } -// TODO: Support multidimensional array. void glob_invalid_index4() { int x = 3, y = 2; - // FIXME: Should warn {{garbage or undefined}}. - int res = glob_arr2[x][y]; // no-warning + int res = glob_arr2[x][y]; // expected-warning{{garbage or undefined}} } const int glob_arr_no_init[10]; diff --git a/clang/test/Analysis/initialization.cpp b/clang/test/Analysis/initialization.cpp index 21b48f55054d4..0883678c8e908 100644 --- a/clang/test/Analysis/initialization.cpp +++ b/clang/test/Analysis/initialization.cpp @@ -14,13 +14,6 @@ void definit() { clang_analyzer_eval(sarr[i].a); // expected-warning{{UNKNOWN}} } -int const arr[2][2] = {}; -void arr2init() { - int i = 1; - // FIXME: Should recognize that it is 0. - clang_analyzer_eval(arr[i][0]); // expected-warning{{UNKNOWN}} -} - int const glob_arr1[3] = {}; void glob_array_index1() { clang_analyzer_eval(glob_arr1[0] == 0); // expected-warning{{TRUE}} @@ -60,23 +53,18 @@ float no_warn_garbage_value() { return glob_arr3[0]; // no-warning (garbage or undefined) } -// TODO: Support multidimensional array. int const glob_arr4[4][2] = {}; void glob_array_index2() { - // FIXME: Should be TRUE. - clang_analyzer_eval(glob_arr4[1][0] == 0); // expected-warning{{UNKNOWN}} - // FIXME: Should be TRUE. - clang_analyzer_eval(glob_arr4[1][1] == 0); // expected-warning{{UNKNOWN}} + clang_analyzer_eval(glob_arr4[0][0] == 0); // expected-warning{{TRUE}} + clang_analyzer_eval(glob_arr4[1][0] == 0); // expected-warning{{TRUE}} + clang_analyzer_eval(glob_arr4[1][1] == 0); // expected-warning{{TRUE}} } -// TODO: Support multidimensional array. void glob_invalid_index3() { int idx = -42; - // FIXME: Should warn {{garbage or undefined}}. - auto x = glob_arr4[1][idx]; // no-warning + auto x = glob_arr4[1][idx]; // expected-warning{{garbage or undefined}} } -// TODO: Support multidimensional array. void glob_invalid_index4() { const int *ptr = glob_arr4[1]; int idx = -42; @@ -84,28 +72,18 @@ void glob_invalid_index4() { auto x = ptr[idx]; // no-warning } -// TODO: Support multidimensional array. int const glob_arr5[4][2] = {{1}, 3, 4, 5}; void glob_array_index3() { - // FIXME: Should be TRUE. - clang_analyzer_eval(glob_arr5[0][0] == 1); // expected-warning{{UNKNOWN}} - // FIXME: Should be TRUE. - clang_analyzer_eval(glob_arr5[0][1] == 0); // expected-warning{{UNKNOWN}} - // FIXME: Should be TRUE. - clang_analyzer_eval(glob_arr5[1][0] == 3); // expected-warning{{UNKNOWN}} - // FIXME: Should be TRUE. - clang_analyzer_eval(glob_arr5[1][1] == 4); // expected-warning{{UNKNOWN}} - // FIXME: Should be TRUE. - clang_analyzer_eval(glob_arr5[2][0] == 5); // expected-warning{{UNKNOWN}} - // FIXME: Should be TRUE. - clang_analyzer_eval(glob_arr5[2][1] == 0); // expected-warning{{UNKNOWN}} - // FIXME: Should be TRUE. - clang_analyzer_eval(glob_arr5[3][0] == 0); // expected-warning{{UNKNOWN}} - // FIXME: Should be TRUE. - clang_analyzer_eval(glob_arr5[3][1] == 0); // expected-warning{{UNKNOWN}} + clang_analyzer_eval(glob_arr5[0][0] == 1); // expected-warning{{TRUE}} + clang_analyzer_eval(glob_arr5[0][1] == 0); // expected-warning{{TRUE}} + clang_analyzer_eval(glob_arr5[1][0] == 3); // expected-warning{{TRUE}} + clang_analyzer_eval(glob_arr5[1][1] == 4); // expected-warning{{TRUE}} + clang_analyzer_eval(glob_arr5[2][0] == 5); // expected-warning{{TRUE}} + clang_analyzer_eval(glob_arr5[2][1] == 0); // expected-warning{{TRUE}} + clang_analyzer_eval(glob_arr5[3][0] == 0); // expected-warning{{TRUE}} + clang_analyzer_eval(glob_arr5[3][1] == 0); // expected-warning{{TRUE}} } -// TODO: Support multidimensional array. void glob_ptr_index2() { int const *ptr = glob_arr5[1]; // FIXME: Should be TRUE. @@ -120,19 +98,16 @@ void glob_ptr_index2() { clang_analyzer_eval(ptr[4] == 0); // expected-warning{{UNKNOWN}} } -// TODO: Support multidimensional array. void glob_invalid_index5() { int idx = -42; - // FIXME: Should warn {{garbage or undefined}}. - auto x = glob_arr5[1][idx]; // no-warning + auto x = glob_arr5[1][idx]; // expected-warning{{garbage or undefined}} } -// TODO: Support multidimensional array. void glob_invalid_index6() { int const *ptr = &glob_arr5[1][0]; int idx = 42; // FIXME: Should warn {{garbage or undefined}}. - auto x = ptr[idx]; // // no-warning + auto x = ptr[idx]; // no-warning } extern const int glob_arr_no_init[10]; @@ -253,3 +228,31 @@ void glob_ptr_index8() { clang_analyzer_eval(glob_ptr12[2] == 'c'); // expected-warning{{TRUE}} clang_analyzer_eval(glob_ptr12[3] == '\0'); // expected-warning{{TRUE}} } + +typedef int Int; +typedef Int const CInt; +typedef CInt Arr[2]; +typedef Arr Arr2[4]; +Arr2 glob_arr8 = {{1}, 3, 4, 5}; // const int[4][2] +void glob_array_typedef1() { + clang_analyzer_eval(glob_arr8[0][0] == 1); // expected-warning{{TRUE}} + clang_analyzer_eval(glob_arr8[0][1] == 0); // expected-warning{{TRUE}} + clang_analyzer_eval(glob_arr8[1][0] == 3); // expected-warning{{TRUE}} + clang_analyzer_eval(glob_arr8[1][1] == 4); // expected-warning{{TRUE}} + clang_analyzer_eval(glob_arr8[2][0] == 5); // expected-warning{{TRUE}} + clang_analyzer_eval(glob_arr8[2][1] == 0); // expected-warning{{TRUE}} + clang_analyzer_eval(glob_arr8[3][0] == 0); // expected-warning{{TRUE}} + clang_analyzer_eval(glob_arr8[3][1] == 0); // expected-warning{{TRUE}} +} + +const int glob_arr9[2][4] = {{(1), 2, ((3)), 4}, 5, 6, (((7)))}; +void glob_array_parentheses1() { + clang_analyzer_eval(glob_arr9[0][0] == 1); // expected-warning{{TRUE}} + clang_analyzer_eval(glob_arr9[0][1] == 2); // expected-warning{{TRUE}} + clang_analyzer_eval(glob_arr9[0][2] == 3); // expected-warning{{TRUE}} + clang_analyzer_eval(glob_arr9[0][3] == 4); // expected-warning{{TRUE}} + clang_analyzer_eval(glob_arr9[1][0] == 5); // expected-warning{{TRUE}} + clang_analyzer_eval(glob_arr9[1][1] == 6); // expected-warning{{TRUE}} + clang_analyzer_eval(glob_arr9[1][2] == 7); // expected-warning{{TRUE}} + clang_analyzer_eval(glob_arr9[1][3] == 0); // expected-warning{{TRUE}} +} diff --git a/clang/test/Analysis/more-dtors-cfg-output.cpp b/clang/test/Analysis/more-dtors-cfg-output.cpp index 964c6d94d378d..21824506c73c8 100644 --- a/clang/test/Analysis/more-dtors-cfg-output.cpp +++ b/clang/test/Analysis/more-dtors-cfg-output.cpp @@ -275,32 +275,32 @@ void new_default_ctor_with_default_arg(long count) { #if CXX2A // Boilerplate needed to test co_return: -namespace std::experimental { - template - struct coroutine_handle { - static coroutine_handle from_address(void *) noexcept; - }; -} +namespace std { +template +struct coroutine_handle { + static coroutine_handle from_address(void *) noexcept; +}; +} // namespace std struct TestPromise { TestPromise initial_suspend(); TestPromise final_suspend() noexcept; bool await_ready() noexcept; - void await_suspend(const std::experimental::coroutine_handle &) noexcept; + void await_suspend(const std::coroutine_handle &) noexcept; void await_resume() noexcept; Foo return_value(const Bar &); Bar get_return_object(); void unhandled_exception(); }; -namespace std::experimental { - template - struct coroutine_traits; - template <> - struct coroutine_traits { - using promise_type = TestPromise; - }; -} +namespace std { +template +struct coroutine_traits; +template <> +struct coroutine_traits { + using promise_type = TestPromise; +}; +} // namespace std Bar coreturn() { co_return get_bar(); diff --git a/clang/test/CXX/over/over.built/ast-20.cpp b/clang/test/CXX/over/over.built/ast-20.cpp new file mode 100644 index 0000000000000..4fd1c6edb34f6 --- /dev/null +++ b/clang/test/CXX/over/over.built/ast-20.cpp @@ -0,0 +1,22 @@ +// RUN: %clang_cc1 -std=c++20 -ast-dump %s -ast-dump-filter Test | FileCheck %s + +namespace std { + struct strong_ordering { + int n; + constexpr operator int() const { return n; } + static const strong_ordering less, equal, greater; + }; + constexpr strong_ordering strong_ordering::less{-1}, + strong_ordering::equal{0}, strong_ordering::greater{1}; +} + +template +auto Test(T* pt, U* pu) { + // CHECK: BinaryOperator {{.*}} '' '<=>' + // CHECK-NEXT: DeclRefExpr {{.*}} 'T *' lvalue ParmVar {{.*}} 'pt' 'T *' + // CHECK-NEXT: DeclRefExpr {{.*}} 'U *' lvalue ParmVar {{.*}} 'pu' 'U *' + (void)(pt <=> pu); + +} + + diff --git a/clang/test/CXX/over/over.built/ast.cpp b/clang/test/CXX/over/over.built/ast.cpp new file mode 100644 index 0000000000000..f76606b1f9869 --- /dev/null +++ b/clang/test/CXX/over/over.built/ast.cpp @@ -0,0 +1,41 @@ +// RUN: %clang_cc1 -std=c++17 -ast-dump %s -ast-dump-filter Test | FileCheck %s + +struct A{}; + +template +auto Test(T* pt, U* pu) { + // CHECK: UnaryOperator {{.*}} '' prefix '*' + // CHECK-NEXT: DeclRefExpr {{.*}} 'T *' lvalue ParmVar {{.*}} 'pt' 'T *' + (void)*pt; + + // CHECK: UnaryOperator {{.*}} '' prefix '++' + // CHECK-NEXT: DeclRefExpr {{.*}} 'T *' lvalue ParmVar {{.*}} 'pt' 'T *' + (void)(++pt); + + // CHECK: UnaryOperator {{.*}} '' prefix '+' + // CHECK-NEXT: DeclRefExpr {{.*}} 'T *' lvalue ParmVar {{.*}} 'pt' 'T *' + (void)(+pt); + + // CHECK: BinaryOperator {{.*}} '' '+' + // CHECK-NEXT: DeclRefExpr {{.*}} 'T *' lvalue ParmVar {{.*}} 'pt' 'T *' + // CHECK-NEXT: IntegerLiteral {{.*}} 'int' 3 + (void)(pt + 3); + + // CHECK: BinaryOperator {{.*}} '' '-' + // CHECK-NEXT: DeclRefExpr {{.*}} 'T *' lvalue ParmVar {{.*}} 'pt' 'T *' + // CHECK-NEXT: DeclRefExpr {{.*}} 'T *' lvalue ParmVar {{.*}} 'pt' 'T *' + (void)(pt - pt); + + // CHECK: BinaryOperator {{.*}} '' '-' + // CHECK-NEXT: DeclRefExpr {{.*}} 'T *' lvalue ParmVar {{.*}} 'pt' 'T *' + // CHECK-NEXT: DeclRefExpr {{.*}} 'U *' lvalue ParmVar {{.*}} 'pu' 'U *' + (void)(pt - pu); + + // CHECK: BinaryOperator {{.*}} '' '==' + // CHECK-NEXT: DeclRefExpr {{.*}} 'T *' lvalue ParmVar {{.*}} 'pt' 'T *' + // CHECK-NEXT: DeclRefExpr {{.*}} 'U *' lvalue ParmVar {{.*}} 'pu' 'U *' + (void)(pt == pu); + +} + + diff --git a/clang/test/CXX/over/over.built/p10.cpp b/clang/test/CXX/over/over.built/p10.cpp new file mode 100644 index 0000000000000..678056da58205 --- /dev/null +++ b/clang/test/CXX/over/over.built/p10.cpp @@ -0,0 +1,20 @@ +// RUN: %clang_cc1 -std=c++11 -verify %s -Wno-tautological-compare + +struct A{}; + +template +void f(int i, float f, bool b, char c, int* pi, A* pa, T* pt) { + (void)+i; + (void)-i; + (void)+f; + (void)-f; + (void)+b; + (void)-b; + (void)+c; + (void)-c; + + (void)-pi; // expected-error {{invalid argument type}} + (void)-pa; // expected-error {{invalid argument type}} + (void)-pt; // FIXME: we should be able to give an error here. +} + diff --git a/clang/test/CXX/over/over.built/p11.cpp b/clang/test/CXX/over/over.built/p11.cpp new file mode 100644 index 0000000000000..7ebf16b95439f --- /dev/null +++ b/clang/test/CXX/over/over.built/p11.cpp @@ -0,0 +1,12 @@ +// RUN: %clang_cc1 -std=c++11 -verify %s -Wno-tautological-compare + +template +void f(int i, float f, bool b, char c, int* pi, T* pt) { + (void)~i; + (void)~f; // expected-error {{invalid argument type}} + (void)~b; + (void)~c; + (void)~pi; // expected-error {{invalid argument type}} + (void)~pt; // FIXME: we should be able to give an error here. +} + diff --git a/clang/test/CXX/over/over.built/p13.cpp b/clang/test/CXX/over/over.built/p13.cpp new file mode 100644 index 0000000000000..de57130386e88 --- /dev/null +++ b/clang/test/CXX/over/over.built/p13.cpp @@ -0,0 +1,40 @@ +// RUN: %clang_cc1 -std=c++11 -verify %s -Wno-tautological-compare + +template +void f(int i, float f, bool b, char c, int* pi, T* pt) { + (void)(i*i); + (void)(i*f); + (void)(i*b); + (void)(i*c); + (void)(i*pi); // expected-error {{invalid operands to binary expression}} + (void)(i*pt); // FIXME + + (void)(i/i); + (void)(i/f); + (void)(i/b); + (void)(i/c); + (void)(i/pi); // expected-error {{invalid operands to binary expression}} + (void)(i/pt); // FIXME + + (void)(i-i); + (void)(i-f); + (void)(i-b); + (void)(i-c); + (void)(i-pi); // expected-error {{invalid operands to binary expression}} + (void)(i-pt); // FIXME + + (void)(i +void f(int* pi, T* pt) { + (void)(pi+3); + (void)(3+pi); + (void)(pi-3); + (void)(pi[3]); + (void)(3[pi]); + + (void)(pt+3); + (void)(3+pt); + (void)(pt-3); + (void)(pt[3]); + (void)(3[pt]); +} +// expected-no-diagnostics diff --git a/clang/test/CXX/over/over.built/p15.cpp b/clang/test/CXX/over/over.built/p15.cpp index 9b223bcbc24d5..680ffa9a3dcb7 100644 --- a/clang/test/CXX/over/over.built/p15.cpp +++ b/clang/test/CXX/over/over.built/p15.cpp @@ -1,75 +1,12 @@ // RUN: %clang_cc1 -std=c++11 -verify %s -Wno-tautological-compare -struct A { operator decltype(nullptr)(); }; // expected-note 16{{implicitly converted}} -struct B { operator const int *(); }; // expected-note 8{{implicitly converted}} -void f(A a, B b, volatile int *pi) { - (void)(a == a); - (void)(a != a); - (void)(a < a); // expected-error {{invalid operands}} - (void)(a > a); // expected-error {{invalid operands}} - (void)(a <= a); // expected-error {{invalid operands}} - (void)(a >= a); // expected-error {{invalid operands}} - - (void)(a == b); - (void)(a != b); - (void)(a < b); // expected-error {{invalid operands}} - (void)(a > b); // expected-error {{invalid operands}} - (void)(a <= b); // expected-error {{invalid operands}} - (void)(a >= b); // expected-error {{invalid operands}} - - (void)(b == a); - (void)(b != a); - (void)(b < a); // expected-error {{invalid operands}} - (void)(b > a); // expected-error {{invalid operands}} - (void)(b <= a); // expected-error {{invalid operands}} - (void)(b >= a); // expected-error {{invalid operands}} - - (void)(a == pi); - (void)(a != pi); - (void)(a < pi); // expected-error {{invalid operands}} - (void)(a > pi); // expected-error {{invalid operands}} - (void)(a <= pi); // expected-error {{invalid operands}} - (void)(a >= pi); // expected-error {{invalid operands}} - - (void)(pi == a); - (void)(pi != a); - (void)(pi < a); // expected-error {{invalid operands}} - (void)(pi > a); // expected-error {{invalid operands}} - (void)(pi <= a); // expected-error {{invalid operands}} - (void)(pi >= a); // expected-error {{invalid operands}} - - (void)(b == pi); - (void)(b != pi); - (void)(b < pi); - (void)(b > pi); - (void)(b <= pi); - (void)(b >= pi); - - (void)(pi == b); - (void)(pi != b); - (void)(pi < b); - (void)(pi > b); - (void)(pi <= b); - (void)(pi >= b); - - (void)(b == b); - (void)(b != b); - (void)(b < b); - (void)(b > b); - (void)(b <= b); - (void)(b >= b); - - (void)(pi == pi); - (void)(pi != pi); - (void)(pi < pi); - (void)(pi > pi); - (void)(pi <= pi); - (void)(pi >= pi); -} - -// FIXME: This is wrong: the type T = 'const volatile int * const * const *' -// would work here, and there exists a builtin candidate for that type. -struct C { operator const int ***(); }; -void g(C c, volatile int ***p) { - (void)(c < p); // expected-error {{invalid operands}} +template +void f(int* pi, float* pf, T* pt, U* pu, T t) { + (void)(pi - pi); + (void)(pi - pf); // expected-error {{not pointers to compatible types}} + (void)(pi - pt); + (void)(pu - pi); + (void)(pu - pt); + (void)(pu - t); + (void)(pi - t); } diff --git a/clang/test/CXX/over/over.built/p16.cpp b/clang/test/CXX/over/over.built/p16.cpp index 139e864475244..9b223bcbc24d5 100644 --- a/clang/test/CXX/over/over.built/p16.cpp +++ b/clang/test/CXX/over/over.built/p16.cpp @@ -1,8 +1,8 @@ // RUN: %clang_cc1 -std=c++11 -verify %s -Wno-tautological-compare -struct A { operator decltype(nullptr)(); }; -struct B { operator int A::*(); }; -void f(A a, B b, int A::*pi) { +struct A { operator decltype(nullptr)(); }; // expected-note 16{{implicitly converted}} +struct B { operator const int *(); }; // expected-note 8{{implicitly converted}} +void f(A a, B b, volatile int *pi) { (void)(a == a); (void)(a != a); (void)(a < a); // expected-error {{invalid operands}} @@ -40,36 +40,36 @@ void f(A a, B b, int A::*pi) { (void)(b == pi); (void)(b != pi); - (void)(b < pi); // expected-error {{invalid operands}} - (void)(b > pi); // expected-error {{invalid operands}} - (void)(b <= pi); // expected-error {{invalid operands}} - (void)(b >= pi); // expected-error {{invalid operands}} + (void)(b < pi); + (void)(b > pi); + (void)(b <= pi); + (void)(b >= pi); (void)(pi == b); (void)(pi != b); - (void)(pi < b); // expected-error {{invalid operands}} - (void)(pi > b); // expected-error {{invalid operands}} - (void)(pi <= b); // expected-error {{invalid operands}} - (void)(pi >= b); // expected-error {{invalid operands}} + (void)(pi < b); + (void)(pi > b); + (void)(pi <= b); + (void)(pi >= b); (void)(b == b); (void)(b != b); - (void)(b < b); // expected-error {{invalid operands}} - (void)(b > b); // expected-error {{invalid operands}} - (void)(b <= b); // expected-error {{invalid operands}} - (void)(b >= b); // expected-error {{invalid operands}} + (void)(b < b); + (void)(b > b); + (void)(b <= b); + (void)(b >= b); (void)(pi == pi); (void)(pi != pi); - (void)(pi < pi); // expected-error {{invalid operands}} - (void)(pi > pi); // expected-error {{invalid operands}} - (void)(pi <= pi); // expected-error {{invalid operands}} - (void)(pi >= pi); // expected-error {{invalid operands}} + (void)(pi < pi); + (void)(pi > pi); + (void)(pi <= pi); + (void)(pi >= pi); } -// FIXME: This is wrong: type T = 'const volatile int * const A::* const B::*' +// FIXME: This is wrong: the type T = 'const volatile int * const * const *' // would work here, and there exists a builtin candidate for that type. -struct C { operator const int *A::*B::*(); }; -void g(C c, volatile int *A::*B::*p) { - (void)(c == p); // expected-error {{invalid operands}} +struct C { operator const int ***(); }; +void g(C c, volatile int ***p) { + (void)(c < p); // expected-error {{invalid operands}} } diff --git a/clang/test/CXX/over/over.built/p17.cpp b/clang/test/CXX/over/over.built/p17.cpp new file mode 100644 index 0000000000000..139e864475244 --- /dev/null +++ b/clang/test/CXX/over/over.built/p17.cpp @@ -0,0 +1,75 @@ +// RUN: %clang_cc1 -std=c++11 -verify %s -Wno-tautological-compare + +struct A { operator decltype(nullptr)(); }; +struct B { operator int A::*(); }; +void f(A a, B b, int A::*pi) { + (void)(a == a); + (void)(a != a); + (void)(a < a); // expected-error {{invalid operands}} + (void)(a > a); // expected-error {{invalid operands}} + (void)(a <= a); // expected-error {{invalid operands}} + (void)(a >= a); // expected-error {{invalid operands}} + + (void)(a == b); + (void)(a != b); + (void)(a < b); // expected-error {{invalid operands}} + (void)(a > b); // expected-error {{invalid operands}} + (void)(a <= b); // expected-error {{invalid operands}} + (void)(a >= b); // expected-error {{invalid operands}} + + (void)(b == a); + (void)(b != a); + (void)(b < a); // expected-error {{invalid operands}} + (void)(b > a); // expected-error {{invalid operands}} + (void)(b <= a); // expected-error {{invalid operands}} + (void)(b >= a); // expected-error {{invalid operands}} + + (void)(a == pi); + (void)(a != pi); + (void)(a < pi); // expected-error {{invalid operands}} + (void)(a > pi); // expected-error {{invalid operands}} + (void)(a <= pi); // expected-error {{invalid operands}} + (void)(a >= pi); // expected-error {{invalid operands}} + + (void)(pi == a); + (void)(pi != a); + (void)(pi < a); // expected-error {{invalid operands}} + (void)(pi > a); // expected-error {{invalid operands}} + (void)(pi <= a); // expected-error {{invalid operands}} + (void)(pi >= a); // expected-error {{invalid operands}} + + (void)(b == pi); + (void)(b != pi); + (void)(b < pi); // expected-error {{invalid operands}} + (void)(b > pi); // expected-error {{invalid operands}} + (void)(b <= pi); // expected-error {{invalid operands}} + (void)(b >= pi); // expected-error {{invalid operands}} + + (void)(pi == b); + (void)(pi != b); + (void)(pi < b); // expected-error {{invalid operands}} + (void)(pi > b); // expected-error {{invalid operands}} + (void)(pi <= b); // expected-error {{invalid operands}} + (void)(pi >= b); // expected-error {{invalid operands}} + + (void)(b == b); + (void)(b != b); + (void)(b < b); // expected-error {{invalid operands}} + (void)(b > b); // expected-error {{invalid operands}} + (void)(b <= b); // expected-error {{invalid operands}} + (void)(b >= b); // expected-error {{invalid operands}} + + (void)(pi == pi); + (void)(pi != pi); + (void)(pi < pi); // expected-error {{invalid operands}} + (void)(pi > pi); // expected-error {{invalid operands}} + (void)(pi <= pi); // expected-error {{invalid operands}} + (void)(pi >= pi); // expected-error {{invalid operands}} +} + +// FIXME: This is wrong: type T = 'const volatile int * const A::* const B::*' +// would work here, and there exists a builtin candidate for that type. +struct C { operator const int *A::*B::*(); }; +void g(C c, volatile int *A::*B::*p) { + (void)(c == p); // expected-error {{invalid operands}} +} diff --git a/clang/test/CXX/over/over.built/p18.cpp b/clang/test/CXX/over/over.built/p18.cpp new file mode 100644 index 0000000000000..698a943cdab93 --- /dev/null +++ b/clang/test/CXX/over/over.built/p18.cpp @@ -0,0 +1,82 @@ +// RUN: %clang_cc1 -std=c++11 -verify %s -Wno-tautological-compare + +template +void f(int i, float f, bool b, int* pi, T* pt, T t) { + (void)(i % 3); + (void)(f % 3); // expected-error {{invalid operands}} + (void)(b % 3); + (void)(pi % 3); // expected-error {{invalid operands}} + (void)(pt % 3); // FIXME + (void)(t % 3); + (void)(3 % i); + (void)(3 % f); // expected-error {{invalid operands}} + (void)(3 % b); + (void)(3 % pi); // expected-error {{invalid operands}} + (void)(3 % pt); // FIXME + (void)(3 % t); + + (void)(i & 3); + (void)(f & 3); // expected-error {{invalid operands}} + (void)(b & 3); + (void)(pi & 3); // expected-error {{invalid operands}} + (void)(pt & 3); // FIXME + (void)(t & 3); + (void)(3 & i); + (void)(3 & f); // expected-error {{invalid operands}} + (void)(3 & b); + (void)(3 & pi); // expected-error {{invalid operands}} + (void)(3 & pt); // FIXME + (void)(3 & t); + + (void)(i ^ 3); + (void)(f ^ 3); // expected-error {{invalid operands}} + (void)(b ^ 3); + (void)(pi ^ 3); // expected-error {{invalid operands}} + (void)(pt ^ 3); // FIXME + (void)(t ^ 3); + (void)(3 ^ i); + (void)(3 ^ f); // expected-error {{invalid operands}} + (void)(3 ^ b); + (void)(3 ^ pi); // expected-error {{invalid operands}} + (void)(3 ^ pt); // FIXME + (void)(3 ^ t); + + (void)(i | 3); + (void)(f | 3); // expected-error {{invalid operands}} + (void)(b | 3); + (void)(pi | 3); // expected-error {{invalid operands}} + (void)(pt | 3); // FIXME + (void)(t | 3); + (void)(3 | i); + (void)(3 | f); // expected-error {{invalid operands}} + (void)(3 | b); + (void)(3 | pi); // expected-error {{invalid operands}} + (void)(3 | pt); // FIXME + (void)(3 | t); + + (void)(i << 3); + (void)(f << 3); // expected-error {{invalid operands}} + (void)(b << 3); + (void)(pi << 3); // expected-error {{invalid operands}} + (void)(pt << 3); // FIXME + (void)(t << 3); + (void)(3 << i); + (void)(3 << f); // expected-error {{invalid operands}} + (void)(3 << b); + (void)(3 << pi); // expected-error {{invalid operands}} + (void)(3 << pt); // FIXME + (void)(3 << t); + + (void)(i >> 3); + (void)(f >> 3); // expected-error {{invalid operands}} + (void)(b >> 3); + (void)(pi >> 3); // expected-error {{invalid operands}} + (void)(pt >> 3); // FIXME + (void)(t >> 3); + (void)(3 >> i); + (void)(3 >> f); // expected-error {{invalid operands}} + (void)(3 >> b); + (void)(3 >> pi); // expected-error {{invalid operands}} + (void)(3 >> pt); // FIXME + (void)(3 >> t); +} diff --git a/clang/test/CXX/over/over.built/p19.cpp b/clang/test/CXX/over/over.built/p19.cpp new file mode 100644 index 0000000000000..0317f6b45a58d --- /dev/null +++ b/clang/test/CXX/over/over.built/p19.cpp @@ -0,0 +1,24 @@ +// RUN: %clang_cc1 -std=c++11 -verify %s -Wno-tautological-compare + +template +void f(int i, float f, int* pi, T* pt, T t) { + i = i; + i *= i; + i /= i; + i += i; + i -= i; + i -= f; + i -= pi; // expected-error {{invalid operands}} + i -= pt; // FIXME + i -= t; + + f = f; + f *= f; + f /= f; + f += f; + f -= f; + f -= i; + f -= pi; // expected-error {{invalid operands}} + f -= pt; // FIXME + f -= t; +} diff --git a/clang/test/CXX/over/over.built/p20.cpp b/clang/test/CXX/over/over.built/p20.cpp new file mode 100644 index 0000000000000..92ba118ee7345 --- /dev/null +++ b/clang/test/CXX/over/over.built/p20.cpp @@ -0,0 +1,12 @@ +// RUN: %clang_cc1 -std=c++11 -verify %s -Wno-tautological-compare + +template +void f(int* pi, float* pf, T* pt, U* pu, T t) { + pi = pi; + pi = pf; // expected-error {{incompatible pointer types}} + pi = pt; + pu = pi; + pu = pt; + pi = t; + pu = t; +} diff --git a/clang/test/CXX/over/over.built/p21.cpp b/clang/test/CXX/over/over.built/p21.cpp new file mode 100644 index 0000000000000..33db6647627b9 --- /dev/null +++ b/clang/test/CXX/over/over.built/p21.cpp @@ -0,0 +1,13 @@ +// RUN: %clang_cc1 -std=c++11 -verify %s -Wno-tautological-compare + +struct A { +}; + +template +void f(int A::* pi, float A::* pf, int T::* pt, T A::* pu, T t) { + pi = pi; + pi = pf; // expected-error {{assigning to 'int A::*' from incompatible type 'float A::*'}} + pi = pt; + pi = pu; + pi = t; +} diff --git a/clang/test/CXX/over/over.built/p22.cpp b/clang/test/CXX/over/over.built/p22.cpp new file mode 100644 index 0000000000000..330f1c9231407 --- /dev/null +++ b/clang/test/CXX/over/over.built/p22.cpp @@ -0,0 +1,12 @@ +// RUN: %clang_cc1 -std=c++11 -verify %s -Wno-tautological-compare + +template +void f(int* pi, T* pt, T t) { + pi += 3; + pi += pi; // expected-error {{invalid operands}} + pt += 3; + pi += t; + pi += pt; // FIXME + pt += pi; //FIXME + pt += pt; //FIXME +} diff --git a/clang/test/CXX/over/over.built/p23.cpp b/clang/test/CXX/over/over.built/p23.cpp index a1c0d4f3f612a..dfea85906f305 100644 --- a/clang/test/CXX/over/over.built/p23.cpp +++ b/clang/test/CXX/over/over.built/p23.cpp @@ -1,26 +1,46 @@ -// RUN: %clang_cc1 -fsyntax-only -std=c++11 -verify %s -// expected-no-diagnostics +// RUN: %clang_cc1 -std=c++11 -verify %s -Wno-tautological-compare -struct Variant { - template operator T(); -}; +template +void f(int i, float f, bool b, int* pi, T* pt, T t) { + i %= 3; + f %= 3; // expected-error {{invalid operands}} + b %= 3; + pi %= 3; // expected-error {{invalid operands}} + pt %= 3; // FIXME + t %= 3; -Variant getValue(); + i &= 3; + f &= 3; // expected-error {{invalid operands}} + b &= 3; + pi &= 3; // expected-error {{invalid operands}} + pt &= 3; // FIXME + t &= 3; -void testVariant() { - bool ret1 = getValue() || getValue(); - bool ret2 = getValue() && getValue(); - bool ret3 = !getValue(); -} + i ^= 3; + f ^= 3; // expected-error {{invalid operands}} + b ^= 3; + pi ^= 3; // expected-error {{invalid operands}} + pt ^= 3; // FIXME + t ^= 3; -struct ExplicitVariant { - template explicit operator T(); -}; + i |= 3; + f |= 3; // expected-error {{invalid operands}} + b |= 3; + pi |= 3; // expected-error {{invalid operands}} + pt |= 3; // FIXME + t |= 3; -ExplicitVariant getExplicitValue(); + i <<= 3; + f <<= 3; // expected-error {{invalid operands}} + b <<= 3; + pi <<= 3; // expected-error {{invalid operands}} + pt <<= 3; // FIXME + t <<= 3; -void testExplicitVariant() { - bool ret1 = getExplicitValue() || getExplicitValue(); - bool ret2 = getExplicitValue() && getExplicitValue(); - bool ret3 = !getExplicitValue(); + i >>= 3; + f >>= 3; // expected-error {{invalid operands}} + b >>= 3; + pi >>= 3; // expected-error {{invalid operands}} + pt >>= 3; // FIXME + t >>= 3; } diff --git a/clang/test/CXX/over/over.built/p24.cpp b/clang/test/CXX/over/over.built/p24.cpp new file mode 100644 index 0000000000000..a1c0d4f3f612a --- /dev/null +++ b/clang/test/CXX/over/over.built/p24.cpp @@ -0,0 +1,26 @@ +// RUN: %clang_cc1 -fsyntax-only -std=c++11 -verify %s +// expected-no-diagnostics + +struct Variant { + template operator T(); +}; + +Variant getValue(); + +void testVariant() { + bool ret1 = getValue() || getValue(); + bool ret2 = getValue() && getValue(); + bool ret3 = !getValue(); +} + +struct ExplicitVariant { + template explicit operator T(); +}; + +ExplicitVariant getExplicitValue(); + +void testExplicitVariant() { + bool ret1 = getExplicitValue() || getExplicitValue(); + bool ret2 = getExplicitValue() && getExplicitValue(); + bool ret3 = !getExplicitValue(); +} diff --git a/clang/test/CXX/over/over.built/p25.cpp b/clang/test/CXX/over/over.built/p25.cpp index 09e550ddc0ec3..3c48dcd9aa673 100644 --- a/clang/test/CXX/over/over.built/p25.cpp +++ b/clang/test/CXX/over/over.built/p25.cpp @@ -1,16 +1,12 @@ -// RUN: %clang_cc1 -fsyntax-only -verify -std=c++11 %s -// expected-no-diagnostics - -enum class Color { Red, Green, Blue }; - -struct ConvertsToColorA { - operator Color(); -}; - -struct ConvertsToColorB { - operator Color(); -}; - -Color foo(bool cond, ConvertsToColorA ca, ConvertsToColorB cb) { - return cond? ca : cb; +// RUN: %clang_cc1 -std=c++11 -verify %s -Wno-tautological-compare + +template +void f(int i, float f, bool b, char c, int* pi, T* pt) { + (void)!i; + (void)!f; + (void)!b; + (void)!c; + (void)!pi; + (void)!pt; } +// expected-no-diagnostics diff --git a/clang/test/CXX/over/over.built/p26.cpp b/clang/test/CXX/over/over.built/p26.cpp new file mode 100644 index 0000000000000..09e550ddc0ec3 --- /dev/null +++ b/clang/test/CXX/over/over.built/p26.cpp @@ -0,0 +1,16 @@ +// RUN: %clang_cc1 -fsyntax-only -verify -std=c++11 %s +// expected-no-diagnostics + +enum class Color { Red, Green, Blue }; + +struct ConvertsToColorA { + operator Color(); +}; + +struct ConvertsToColorB { + operator Color(); +}; + +Color foo(bool cond, ConvertsToColorA ca, ConvertsToColorB cb) { + return cond? ca : cb; +} diff --git a/clang/test/CXX/over/over.built/p4.cpp b/clang/test/CXX/over/over.built/p4.cpp new file mode 100644 index 0000000000000..d7cd99c68d6a2 --- /dev/null +++ b/clang/test/CXX/over/over.built/p4.cpp @@ -0,0 +1,10 @@ +// RUN: %clang_cc1 -std=c++17 -verify %s -Wno-tautological-compare + +void f(int i, bool b) { + (void)++i; + (void)i++; + + (void)++b; // expected-error {{ISO C++17 does not allow incrementing expression of type bool}} + (void)b++; // expected-error {{ISO C++17 does not allow incrementing expression of type bool}} +} + diff --git a/clang/test/CXX/over/over.built/p5.cpp b/clang/test/CXX/over/over.built/p5.cpp new file mode 100644 index 0000000000000..4ba32564e9ad8 --- /dev/null +++ b/clang/test/CXX/over/over.built/p5.cpp @@ -0,0 +1,10 @@ +// RUN: %clang_cc1 -std=c++11 -verify %s -Wno-tautological-compare + +void f(int i, bool b) { + (void)--i; + (void)i--; + + (void)--b; // expected-error {{cannot decrement expression of type bool}} + (void)b--; // expected-error {{cannot decrement expression of type bool}} +} + diff --git a/clang/test/CXX/over/over.built/p6.cpp b/clang/test/CXX/over/over.built/p6.cpp new file mode 100644 index 0000000000000..ca81c9aecce86 --- /dev/null +++ b/clang/test/CXX/over/over.built/p6.cpp @@ -0,0 +1,23 @@ +// RUN: %clang_cc1 -std=c++11 -verify %s -Wno-tautological-compare + +struct A{}; + +template +void f(int* pi, A* pa, T* pt) { + (void)++pi; + (void)pi++; + (void)--pi; + (void)pi--; + + (void)++pa; + (void)pa++; + (void)--pa; + (void)pa--; + + (void)++pt; + (void)pt++; + (void)--pt; + (void)pt--; +} +// expected-no-diagnostics + diff --git a/clang/test/CXX/over/over.built/p7.cpp b/clang/test/CXX/over/over.built/p7.cpp new file mode 100644 index 0000000000000..348c4cdf37830 --- /dev/null +++ b/clang/test/CXX/over/over.built/p7.cpp @@ -0,0 +1,12 @@ +// RUN: %clang_cc1 -std=c++11 -verify %s -Wno-tautological-compare + +struct A{}; + +template +void f(int* pi, A* pa, T* pt) { + (void)*pi; + (void)*pa; + (void)*pt; +} +// expected-no-diagnostics + diff --git a/clang/test/CXX/over/over.built/p8.cpp b/clang/test/CXX/over/over.built/p8.cpp new file mode 100644 index 0000000000000..7e34ea3a217f4 --- /dev/null +++ b/clang/test/CXX/over/over.built/p8.cpp @@ -0,0 +1,9 @@ +// RUN: %clang_cc1 -std=c++11 -verify %s -Wno-tautological-compare + +template +void f(void(*pf)(), T(*ptf)(T)) { + (void)*pf; + (void)*ptf; +} +// expected-no-diagnostics + diff --git a/clang/test/CXX/over/over.built/p9.cpp b/clang/test/CXX/over/over.built/p9.cpp new file mode 100644 index 0000000000000..7c981e73d4cd3 --- /dev/null +++ b/clang/test/CXX/over/over.built/p9.cpp @@ -0,0 +1,12 @@ +// RUN: %clang_cc1 -std=c++11 -verify %s -Wno-tautological-compare + +struct A{}; + +template +void f(int* pi, A* pa, T* pt) { + (void)+pi; + (void)+pa; + (void)+pt; +} +// expected-no-diagnostics + diff --git a/clang/test/CXX/over/over.built/spaceship.cpp b/clang/test/CXX/over/over.built/spaceship.cpp new file mode 100644 index 0000000000000..510d601578b5a --- /dev/null +++ b/clang/test/CXX/over/over.built/spaceship.cpp @@ -0,0 +1,21 @@ +// RUN: %clang_cc1 -std=c++20 -verify %s -Wno-tautological-compare + +namespace std { + struct strong_ordering { + int n; + constexpr operator int() const { return n; } + static const strong_ordering less, equal, greater; + }; + constexpr strong_ordering strong_ordering::less{-1}, + strong_ordering::equal{0}, strong_ordering::greater{1}; +} + +template +void f(int i, int* pi, T* pt, T t) { + (void)(i <=> i); + (void)(i <=> pi); // expected-error {{comparison between pointer and integer}} + (void)(i <=> pt); + (void)(pi <=> pt); + (void)(pi <=> t); +} + diff --git a/clang/test/CXX/temp/temp.arg/temp.arg.template/p3-2a.cpp b/clang/test/CXX/temp/temp.arg/temp.arg.template/p3-2a.cpp index 2acdc2ca96b8e..8e69f134a3d14 100644 --- a/clang/test/CXX/temp/temp.arg/temp.arg.template/p3-2a.cpp +++ b/clang/test/CXX/temp/temp.arg/temp.arg.template/p3-2a.cpp @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -std=c++2a -verify %s +// RUN: %clang_cc1 -std=c++2a -frelaxed-template-template-args -verify %s template concept C = T::f(); // expected-note@-1{{similar constraint}} diff --git a/clang/test/ClangScanDeps/Inputs/preprocess_minimized_pragmas.h b/clang/test/ClangScanDeps/Inputs/preprocess_minimized_pragmas.h new file mode 100644 index 0000000000000..f0d6b090991b3 --- /dev/null +++ b/clang/test/ClangScanDeps/Inputs/preprocess_minimized_pragmas.h @@ -0,0 +1,27 @@ +// #pragma push_macro/pop_macro +#define INCLUDE_A +#pragma push_macro("INCLUDE_A") +#undef INCLUDE_A +#pragma pop_macro("INCLUDE_A") + +#ifdef INCLUDE_A +#include "a.h" +#endif + +// #pragma push_macro/pop_macro with argument macro expansion +#define INCLUDE_B +#define MACRO_NAME "INCLUDE_B" + +#pragma push_macro(MACRO_NAME) +#undef INCLUDE_B +#pragma pop_macro(MACRO_NAME) + +#ifdef INCLUDE_B +#include "b.h" +#endif + +// #pragma include_alias (MS specific) +// When compiling without MS Extensions, the pragma is not recognized, +// and the file c_alias.h is included instead of c.h +#pragma include_alias("c_alias.h", "c.h") +#include "c_alias.h" diff --git a/clang/test/ClangScanDeps/Inputs/preprocess_minimized_pragmas_cdb.json b/clang/test/ClangScanDeps/Inputs/preprocess_minimized_pragmas_cdb.json new file mode 100644 index 0000000000000..f642b94c1d9bc --- /dev/null +++ b/clang/test/ClangScanDeps/Inputs/preprocess_minimized_pragmas_cdb.json @@ -0,0 +1,12 @@ +[ +{ + "directory": "DIR", + "command": "clang -E DIR/preprocess_minimized_pragmas_basic.cpp -IInputs -target x86_64-linux", + "file": "DIR/preprocess_minimized_pragmas_basic.cpp" +}, +{ + "directory": "DIR", + "command": "clang -E DIR/preprocess_minimized_pragmas_ms.cpp -IInputs -target x86_64-windows", + "file": "DIR/preprocess_minimized_pragmas_ms.cpp" +} +] diff --git a/clang/test/ClangScanDeps/preprocess_minimized_pragmas.cpp b/clang/test/ClangScanDeps/preprocess_minimized_pragmas.cpp new file mode 100644 index 0000000000000..fa906f94df9ea --- /dev/null +++ b/clang/test/ClangScanDeps/preprocess_minimized_pragmas.cpp @@ -0,0 +1,32 @@ +// RUN: rm -rf %t.dir +// RUN: rm -rf %t.cdb +// RUN: mkdir -p %t.dir +// RUN: cp %s %t.dir/preprocess_minimized_pragmas_basic.cpp +// RUN: cp %s %t.dir/preprocess_minimized_pragmas_ms.cpp +// RUN: mkdir %t.dir/Inputs +// RUN: cp %S/Inputs/preprocess_minimized_pragmas.h %t.dir/Inputs/preprocess_minimized_pragmas.h +// RUN: touch %t.dir/Inputs/a.h +// RUN: touch %t.dir/Inputs/b.h +// RUN: touch %t.dir/Inputs/c.h +// RUN: touch %t.dir/Inputs/c_alias.h +// RUN: sed -e "s|DIR|%/t.dir|g" %S/Inputs/preprocess_minimized_pragmas_cdb.json > %t.cdb +// +// RUN: clang-scan-deps -compilation-database %t.cdb -j 1 -mode preprocess-minimized-sources | \ +// RUN: FileCheck %s +// RUN: clang-scan-deps -compilation-database %t.cdb -j 1 -mode preprocess | \ +// RUN: FileCheck %s + +#include "preprocess_minimized_pragmas.h" + +// CHECK: preprocess_minimized_pragmas_basic.cpp +// CHECK-NEXT: Inputs{{/|\\}}preprocess_minimized_pragmas.h +// CHECK-NEXT: Inputs{{/|\\}}a.h +// CHECK-NEXT: Inputs{{/|\\}}b.h +// Expect include aliasing alias "c_alias.h" -> "c.h" to fail when Microsoft extensions are off. +// CHECK-NEXT: Inputs{{/|\\}}c_alias.h + +// CHECK: preprocess_minimized_pragmas_ms.cpp +// CHECK-NEXT: Inputs{{/|\\}}preprocess_minimized_pragmas.h +// CHECK-NEXT: Inputs{{/|\\}}a.h +// CHECK-NEXT: Inputs{{/|\\}}b.h +// CHECK-NEXT: Inputs{{/|\\}}c.h diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vcpop.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vcpop.c new file mode 100644 index 0000000000000..ba29104b15afc --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vcpop.c @@ -0,0 +1,131 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: @test_vcpop_m_b1( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call i64 @llvm.riscv.vcpop.nxv64i1.i64( [[OP1:%.*]], i64 [[VL:%.*]]) +// CHECK-RV64-NEXT: ret i64 [[TMP0]] +// +unsigned long test_vcpop_m_b1(vbool1_t op1, size_t vl) { + return vcpop(op1, vl); +} + +// CHECK-RV64-LABEL: @test_vcpop_m_b2( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call i64 @llvm.riscv.vcpop.nxv32i1.i64( [[OP1:%.*]], i64 [[VL:%.*]]) +// CHECK-RV64-NEXT: ret i64 [[TMP0]] +// +unsigned long test_vcpop_m_b2(vbool2_t op1, size_t vl) { + return vcpop(op1, vl); +} + +// CHECK-RV64-LABEL: @test_vcpop_m_b4( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call i64 @llvm.riscv.vcpop.nxv16i1.i64( [[OP1:%.*]], i64 [[VL:%.*]]) +// CHECK-RV64-NEXT: ret i64 [[TMP0]] +// +unsigned long test_vcpop_m_b4(vbool4_t op1, size_t vl) { + return vcpop(op1, vl); +} + +// CHECK-RV64-LABEL: @test_vcpop_m_b8( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call i64 @llvm.riscv.vcpop.nxv8i1.i64( [[OP1:%.*]], i64 [[VL:%.*]]) +// CHECK-RV64-NEXT: ret i64 [[TMP0]] +// +unsigned long test_vcpop_m_b8(vbool8_t op1, size_t vl) { + return vcpop(op1, vl); +} + +// CHECK-RV64-LABEL: @test_vcpop_m_b16( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call i64 @llvm.riscv.vcpop.nxv4i1.i64( [[OP1:%.*]], i64 [[VL:%.*]]) +// CHECK-RV64-NEXT: ret i64 [[TMP0]] +// +unsigned long test_vcpop_m_b16(vbool16_t op1, size_t vl) { + return vcpop(op1, vl); +} + +// CHECK-RV64-LABEL: @test_vcpop_m_b32( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call i64 @llvm.riscv.vcpop.nxv2i1.i64( [[OP1:%.*]], i64 [[VL:%.*]]) +// CHECK-RV64-NEXT: ret i64 [[TMP0]] +// +unsigned long test_vcpop_m_b32(vbool32_t op1, size_t vl) { + return vcpop(op1, vl); +} + +// CHECK-RV64-LABEL: @test_vcpop_m_b64( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call i64 @llvm.riscv.vcpop.nxv1i1.i64( [[OP1:%.*]], i64 [[VL:%.*]]) +// CHECK-RV64-NEXT: ret i64 [[TMP0]] +// +unsigned long test_vcpop_m_b64(vbool64_t op1, size_t vl) { + return vcpop(op1, vl); +} + +// CHECK-RV64-LABEL: @test_vcpop_m_b1_m( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call i64 @llvm.riscv.vcpop.mask.nxv64i1.i64( [[OP1:%.*]], [[MASK:%.*]], i64 [[VL:%.*]]) +// CHECK-RV64-NEXT: ret i64 [[TMP0]] +// +unsigned long test_vcpop_m_b1_m(vbool1_t mask, vbool1_t op1, size_t vl) { + return vcpop(mask, op1, vl); +} + +// CHECK-RV64-LABEL: @test_vcpop_m_b2_m( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call i64 @llvm.riscv.vcpop.mask.nxv32i1.i64( [[OP1:%.*]], [[MASK:%.*]], i64 [[VL:%.*]]) +// CHECK-RV64-NEXT: ret i64 [[TMP0]] +// +unsigned long test_vcpop_m_b2_m(vbool2_t mask, vbool2_t op1, size_t vl) { + return vcpop(mask, op1, vl); +} + +// CHECK-RV64-LABEL: @test_vcpop_m_b4_m( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call i64 @llvm.riscv.vcpop.mask.nxv16i1.i64( [[OP1:%.*]], [[MASK:%.*]], i64 [[VL:%.*]]) +// CHECK-RV64-NEXT: ret i64 [[TMP0]] +// +unsigned long test_vcpop_m_b4_m(vbool4_t mask, vbool4_t op1, size_t vl) { + return vcpop(mask, op1, vl); +} + +// CHECK-RV64-LABEL: @test_vcpop_m_b8_m( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call i64 @llvm.riscv.vcpop.mask.nxv8i1.i64( [[OP1:%.*]], [[MASK:%.*]], i64 [[VL:%.*]]) +// CHECK-RV64-NEXT: ret i64 [[TMP0]] +// +unsigned long test_vcpop_m_b8_m(vbool8_t mask, vbool8_t op1, size_t vl) { + return vcpop(mask, op1, vl); +} + +// CHECK-RV64-LABEL: @test_vcpop_m_b16_m( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call i64 @llvm.riscv.vcpop.mask.nxv4i1.i64( [[OP1:%.*]], [[MASK:%.*]], i64 [[VL:%.*]]) +// CHECK-RV64-NEXT: ret i64 [[TMP0]] +// +unsigned long test_vcpop_m_b16_m(vbool16_t mask, vbool16_t op1, size_t vl) { + return vcpop(mask, op1, vl); +} + +// CHECK-RV64-LABEL: @test_vcpop_m_b32_m( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call i64 @llvm.riscv.vcpop.mask.nxv2i1.i64( [[OP1:%.*]], [[MASK:%.*]], i64 [[VL:%.*]]) +// CHECK-RV64-NEXT: ret i64 [[TMP0]] +// +unsigned long test_vcpop_m_b32_m(vbool32_t mask, vbool32_t op1, size_t vl) { + return vcpop(mask, op1, vl); +} + +// CHECK-RV64-LABEL: @test_vcpop_m_b64_m( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call i64 @llvm.riscv.vcpop.mask.nxv1i1.i64( [[OP1:%.*]], [[MASK:%.*]], i64 [[VL:%.*]]) +// CHECK-RV64-NEXT: ret i64 [[TMP0]] +// +unsigned long test_vcpop_m_b64_m(vbool64_t mask, vbool64_t op1, size_t vl) { + return vcpop(mask, op1, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmand.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmand.c index bebab3fcbea57..6e966a8040c8c 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmand.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmand.c @@ -67,65 +67,65 @@ vbool64_t test_vmand_mm_b64(vbool64_t op1, vbool64_t op2, size_t vl) { return vmand(op1, op2, vl); } -// CHECK-RV64-LABEL: @test_vmandnot_mm_b1( +// CHECK-RV64-LABEL: @test_vmandn_mm_b1( // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmandnot.nxv64i1.i64( [[OP1:%.*]], [[OP2:%.*]], i64 [[VL:%.*]]) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmandn.nxv64i1.i64( [[OP1:%.*]], [[OP2:%.*]], i64 [[VL:%.*]]) // CHECK-RV64-NEXT: ret [[TMP0]] // -vbool1_t test_vmandnot_mm_b1(vbool1_t op1, vbool1_t op2, size_t vl) { - return vmandnot(op1, op2, vl); +vbool1_t test_vmandn_mm_b1(vbool1_t op1, vbool1_t op2, size_t vl) { + return vmandn(op1, op2, vl); } -// CHECK-RV64-LABEL: @test_vmandnot_mm_b2( +// CHECK-RV64-LABEL: @test_vmandn_mm_b2( // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmandnot.nxv32i1.i64( [[OP1:%.*]], [[OP2:%.*]], i64 [[VL:%.*]]) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmandn.nxv32i1.i64( [[OP1:%.*]], [[OP2:%.*]], i64 [[VL:%.*]]) // CHECK-RV64-NEXT: ret [[TMP0]] // -vbool2_t test_vmandnot_mm_b2(vbool2_t op1, vbool2_t op2, size_t vl) { - return vmandnot(op1, op2, vl); +vbool2_t test_vmandn_mm_b2(vbool2_t op1, vbool2_t op2, size_t vl) { + return vmandn(op1, op2, vl); } -// CHECK-RV64-LABEL: @test_vmandnot_mm_b4( +// CHECK-RV64-LABEL: @test_vmandn_mm_b4( // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmandnot.nxv16i1.i64( [[OP1:%.*]], [[OP2:%.*]], i64 [[VL:%.*]]) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmandn.nxv16i1.i64( [[OP1:%.*]], [[OP2:%.*]], i64 [[VL:%.*]]) // CHECK-RV64-NEXT: ret [[TMP0]] // -vbool4_t test_vmandnot_mm_b4(vbool4_t op1, vbool4_t op2, size_t vl) { - return vmandnot(op1, op2, vl); +vbool4_t test_vmandn_mm_b4(vbool4_t op1, vbool4_t op2, size_t vl) { + return vmandn(op1, op2, vl); } -// CHECK-RV64-LABEL: @test_vmandnot_mm_b8( +// CHECK-RV64-LABEL: @test_vmandn_mm_b8( // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmandnot.nxv8i1.i64( [[OP1:%.*]], [[OP2:%.*]], i64 [[VL:%.*]]) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmandn.nxv8i1.i64( [[OP1:%.*]], [[OP2:%.*]], i64 [[VL:%.*]]) // CHECK-RV64-NEXT: ret [[TMP0]] // -vbool8_t test_vmandnot_mm_b8(vbool8_t op1, vbool8_t op2, size_t vl) { - return vmandnot(op1, op2, vl); +vbool8_t test_vmandn_mm_b8(vbool8_t op1, vbool8_t op2, size_t vl) { + return vmandn(op1, op2, vl); } -// CHECK-RV64-LABEL: @test_vmandnot_mm_b16( +// CHECK-RV64-LABEL: @test_vmandn_mm_b16( // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmandnot.nxv4i1.i64( [[OP1:%.*]], [[OP2:%.*]], i64 [[VL:%.*]]) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmandn.nxv4i1.i64( [[OP1:%.*]], [[OP2:%.*]], i64 [[VL:%.*]]) // CHECK-RV64-NEXT: ret [[TMP0]] // -vbool16_t test_vmandnot_mm_b16(vbool16_t op1, vbool16_t op2, size_t vl) { - return vmandnot(op1, op2, vl); +vbool16_t test_vmandn_mm_b16(vbool16_t op1, vbool16_t op2, size_t vl) { + return vmandn(op1, op2, vl); } -// CHECK-RV64-LABEL: @test_vmandnot_mm_b32( +// CHECK-RV64-LABEL: @test_vmandn_mm_b32( // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmandnot.nxv2i1.i64( [[OP1:%.*]], [[OP2:%.*]], i64 [[VL:%.*]]) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmandn.nxv2i1.i64( [[OP1:%.*]], [[OP2:%.*]], i64 [[VL:%.*]]) // CHECK-RV64-NEXT: ret [[TMP0]] // -vbool32_t test_vmandnot_mm_b32(vbool32_t op1, vbool32_t op2, size_t vl) { - return vmandnot(op1, op2, vl); +vbool32_t test_vmandn_mm_b32(vbool32_t op1, vbool32_t op2, size_t vl) { + return vmandn(op1, op2, vl); } -// CHECK-RV64-LABEL: @test_vmandnot_mm_b64( +// CHECK-RV64-LABEL: @test_vmandn_mm_b64( // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmandnot.nxv1i1.i64( [[OP1:%.*]], [[OP2:%.*]], i64 [[VL:%.*]]) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmandn.nxv1i1.i64( [[OP1:%.*]], [[OP2:%.*]], i64 [[VL:%.*]]) // CHECK-RV64-NEXT: ret [[TMP0]] // -vbool64_t test_vmandnot_mm_b64(vbool64_t op1, vbool64_t op2, size_t vl) { - return vmandnot(op1, op2, vl); +vbool64_t test_vmandn_mm_b64(vbool64_t op1, vbool64_t op2, size_t vl) { + return vmandn(op1, op2, vl); } diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmor.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmor.c index 202ac20b7fc5c..ecb662c329265 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmor.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmor.c @@ -67,65 +67,65 @@ vbool64_t test_vmor_mm_b64(vbool64_t op1, vbool64_t op2, size_t vl) { return vmor(op1, op2, vl); } -// CHECK-RV64-LABEL: @test_vmornot_mm_b1( +// CHECK-RV64-LABEL: @test_vmorn_mm_b1( // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmornot.nxv64i1.i64( [[OP1:%.*]], [[OP2:%.*]], i64 [[VL:%.*]]) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmorn.nxv64i1.i64( [[OP1:%.*]], [[OP2:%.*]], i64 [[VL:%.*]]) // CHECK-RV64-NEXT: ret [[TMP0]] // -vbool1_t test_vmornot_mm_b1(vbool1_t op1, vbool1_t op2, size_t vl) { - return vmornot(op1, op2, vl); +vbool1_t test_vmorn_mm_b1(vbool1_t op1, vbool1_t op2, size_t vl) { + return vmorn(op1, op2, vl); } -// CHECK-RV64-LABEL: @test_vmornot_mm_b2( +// CHECK-RV64-LABEL: @test_vmorn_mm_b2( // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmornot.nxv32i1.i64( [[OP1:%.*]], [[OP2:%.*]], i64 [[VL:%.*]]) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmorn.nxv32i1.i64( [[OP1:%.*]], [[OP2:%.*]], i64 [[VL:%.*]]) // CHECK-RV64-NEXT: ret [[TMP0]] // -vbool2_t test_vmornot_mm_b2(vbool2_t op1, vbool2_t op2, size_t vl) { - return vmornot(op1, op2, vl); +vbool2_t test_vmorn_mm_b2(vbool2_t op1, vbool2_t op2, size_t vl) { + return vmorn(op1, op2, vl); } -// CHECK-RV64-LABEL: @test_vmornot_mm_b4( +// CHECK-RV64-LABEL: @test_vmorn_mm_b4( // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmornot.nxv16i1.i64( [[OP1:%.*]], [[OP2:%.*]], i64 [[VL:%.*]]) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmorn.nxv16i1.i64( [[OP1:%.*]], [[OP2:%.*]], i64 [[VL:%.*]]) // CHECK-RV64-NEXT: ret [[TMP0]] // -vbool4_t test_vmornot_mm_b4(vbool4_t op1, vbool4_t op2, size_t vl) { - return vmornot(op1, op2, vl); +vbool4_t test_vmorn_mm_b4(vbool4_t op1, vbool4_t op2, size_t vl) { + return vmorn(op1, op2, vl); } -// CHECK-RV64-LABEL: @test_vmornot_mm_b8( +// CHECK-RV64-LABEL: @test_vmorn_mm_b8( // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmornot.nxv8i1.i64( [[OP1:%.*]], [[OP2:%.*]], i64 [[VL:%.*]]) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmorn.nxv8i1.i64( [[OP1:%.*]], [[OP2:%.*]], i64 [[VL:%.*]]) // CHECK-RV64-NEXT: ret [[TMP0]] // -vbool8_t test_vmornot_mm_b8(vbool8_t op1, vbool8_t op2, size_t vl) { - return vmornot(op1, op2, vl); +vbool8_t test_vmorn_mm_b8(vbool8_t op1, vbool8_t op2, size_t vl) { + return vmorn(op1, op2, vl); } -// CHECK-RV64-LABEL: @test_vmornot_mm_b16( +// CHECK-RV64-LABEL: @test_vmorn_mm_b16( // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmornot.nxv4i1.i64( [[OP1:%.*]], [[OP2:%.*]], i64 [[VL:%.*]]) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmorn.nxv4i1.i64( [[OP1:%.*]], [[OP2:%.*]], i64 [[VL:%.*]]) // CHECK-RV64-NEXT: ret [[TMP0]] // -vbool16_t test_vmornot_mm_b16(vbool16_t op1, vbool16_t op2, size_t vl) { - return vmornot(op1, op2, vl); +vbool16_t test_vmorn_mm_b16(vbool16_t op1, vbool16_t op2, size_t vl) { + return vmorn(op1, op2, vl); } -// CHECK-RV64-LABEL: @test_vmornot_mm_b32( +// CHECK-RV64-LABEL: @test_vmorn_mm_b32( // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmornot.nxv2i1.i64( [[OP1:%.*]], [[OP2:%.*]], i64 [[VL:%.*]]) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmorn.nxv2i1.i64( [[OP1:%.*]], [[OP2:%.*]], i64 [[VL:%.*]]) // CHECK-RV64-NEXT: ret [[TMP0]] // -vbool32_t test_vmornot_mm_b32(vbool32_t op1, vbool32_t op2, size_t vl) { - return vmornot(op1, op2, vl); +vbool32_t test_vmorn_mm_b32(vbool32_t op1, vbool32_t op2, size_t vl) { + return vmorn(op1, op2, vl); } -// CHECK-RV64-LABEL: @test_vmornot_mm_b64( +// CHECK-RV64-LABEL: @test_vmorn_mm_b64( // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmornot.nxv1i1.i64( [[OP1:%.*]], [[OP2:%.*]], i64 [[VL:%.*]]) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmorn.nxv1i1.i64( [[OP1:%.*]], [[OP2:%.*]], i64 [[VL:%.*]]) // CHECK-RV64-NEXT: ret [[TMP0]] // -vbool64_t test_vmornot_mm_b64(vbool64_t op1, vbool64_t op2, size_t vl) { - return vmornot(op1, op2, vl); +vbool64_t test_vmorn_mm_b64(vbool64_t op1, vbool64_t op2, size_t vl) { + return vmorn(op1, op2, vl); } diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vpopc.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vpopc.c deleted file mode 100644 index 2e3d92c93aba0..0000000000000 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vpopc.c +++ /dev/null @@ -1,131 +0,0 @@ -// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py -// REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s - -#include - -// CHECK-RV64-LABEL: @test_vpopc_m_b1( -// CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call i64 @llvm.riscv.vpopc.nxv64i1.i64( [[OP1:%.*]], i64 [[VL:%.*]]) -// CHECK-RV64-NEXT: ret i64 [[TMP0]] -// -unsigned long test_vpopc_m_b1(vbool1_t op1, size_t vl) { - return vpopc(op1, vl); -} - -// CHECK-RV64-LABEL: @test_vpopc_m_b2( -// CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call i64 @llvm.riscv.vpopc.nxv32i1.i64( [[OP1:%.*]], i64 [[VL:%.*]]) -// CHECK-RV64-NEXT: ret i64 [[TMP0]] -// -unsigned long test_vpopc_m_b2(vbool2_t op1, size_t vl) { - return vpopc(op1, vl); -} - -// CHECK-RV64-LABEL: @test_vpopc_m_b4( -// CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call i64 @llvm.riscv.vpopc.nxv16i1.i64( [[OP1:%.*]], i64 [[VL:%.*]]) -// CHECK-RV64-NEXT: ret i64 [[TMP0]] -// -unsigned long test_vpopc_m_b4(vbool4_t op1, size_t vl) { - return vpopc(op1, vl); -} - -// CHECK-RV64-LABEL: @test_vpopc_m_b8( -// CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call i64 @llvm.riscv.vpopc.nxv8i1.i64( [[OP1:%.*]], i64 [[VL:%.*]]) -// CHECK-RV64-NEXT: ret i64 [[TMP0]] -// -unsigned long test_vpopc_m_b8(vbool8_t op1, size_t vl) { - return vpopc(op1, vl); -} - -// CHECK-RV64-LABEL: @test_vpopc_m_b16( -// CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call i64 @llvm.riscv.vpopc.nxv4i1.i64( [[OP1:%.*]], i64 [[VL:%.*]]) -// CHECK-RV64-NEXT: ret i64 [[TMP0]] -// -unsigned long test_vpopc_m_b16(vbool16_t op1, size_t vl) { - return vpopc(op1, vl); -} - -// CHECK-RV64-LABEL: @test_vpopc_m_b32( -// CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call i64 @llvm.riscv.vpopc.nxv2i1.i64( [[OP1:%.*]], i64 [[VL:%.*]]) -// CHECK-RV64-NEXT: ret i64 [[TMP0]] -// -unsigned long test_vpopc_m_b32(vbool32_t op1, size_t vl) { - return vpopc(op1, vl); -} - -// CHECK-RV64-LABEL: @test_vpopc_m_b64( -// CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call i64 @llvm.riscv.vpopc.nxv1i1.i64( [[OP1:%.*]], i64 [[VL:%.*]]) -// CHECK-RV64-NEXT: ret i64 [[TMP0]] -// -unsigned long test_vpopc_m_b64(vbool64_t op1, size_t vl) { - return vpopc(op1, vl); -} - -// CHECK-RV64-LABEL: @test_vpopc_m_b1_m( -// CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call i64 @llvm.riscv.vpopc.mask.nxv64i1.i64( [[OP1:%.*]], [[MASK:%.*]], i64 [[VL:%.*]]) -// CHECK-RV64-NEXT: ret i64 [[TMP0]] -// -unsigned long test_vpopc_m_b1_m(vbool1_t mask, vbool1_t op1, size_t vl) { - return vpopc(mask, op1, vl); -} - -// CHECK-RV64-LABEL: @test_vpopc_m_b2_m( -// CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call i64 @llvm.riscv.vpopc.mask.nxv32i1.i64( [[OP1:%.*]], [[MASK:%.*]], i64 [[VL:%.*]]) -// CHECK-RV64-NEXT: ret i64 [[TMP0]] -// -unsigned long test_vpopc_m_b2_m(vbool2_t mask, vbool2_t op1, size_t vl) { - return vpopc(mask, op1, vl); -} - -// CHECK-RV64-LABEL: @test_vpopc_m_b4_m( -// CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call i64 @llvm.riscv.vpopc.mask.nxv16i1.i64( [[OP1:%.*]], [[MASK:%.*]], i64 [[VL:%.*]]) -// CHECK-RV64-NEXT: ret i64 [[TMP0]] -// -unsigned long test_vpopc_m_b4_m(vbool4_t mask, vbool4_t op1, size_t vl) { - return vpopc(mask, op1, vl); -} - -// CHECK-RV64-LABEL: @test_vpopc_m_b8_m( -// CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call i64 @llvm.riscv.vpopc.mask.nxv8i1.i64( [[OP1:%.*]], [[MASK:%.*]], i64 [[VL:%.*]]) -// CHECK-RV64-NEXT: ret i64 [[TMP0]] -// -unsigned long test_vpopc_m_b8_m(vbool8_t mask, vbool8_t op1, size_t vl) { - return vpopc(mask, op1, vl); -} - -// CHECK-RV64-LABEL: @test_vpopc_m_b16_m( -// CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call i64 @llvm.riscv.vpopc.mask.nxv4i1.i64( [[OP1:%.*]], [[MASK:%.*]], i64 [[VL:%.*]]) -// CHECK-RV64-NEXT: ret i64 [[TMP0]] -// -unsigned long test_vpopc_m_b16_m(vbool16_t mask, vbool16_t op1, size_t vl) { - return vpopc(mask, op1, vl); -} - -// CHECK-RV64-LABEL: @test_vpopc_m_b32_m( -// CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call i64 @llvm.riscv.vpopc.mask.nxv2i1.i64( [[OP1:%.*]], [[MASK:%.*]], i64 [[VL:%.*]]) -// CHECK-RV64-NEXT: ret i64 [[TMP0]] -// -unsigned long test_vpopc_m_b32_m(vbool32_t mask, vbool32_t op1, size_t vl) { - return vpopc(mask, op1, vl); -} - -// CHECK-RV64-LABEL: @test_vpopc_m_b64_m( -// CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call i64 @llvm.riscv.vpopc.mask.nxv1i1.i64( [[OP1:%.*]], [[MASK:%.*]], i64 [[VL:%.*]]) -// CHECK-RV64-NEXT: ret i64 [[TMP0]] -// -unsigned long test_vpopc_m_b64_m(vbool64_t mask, vbool64_t op1, size_t vl) { - return vpopc(mask, op1, vl); -} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vcpop.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vcpop.c new file mode 100644 index 0000000000000..78a8f11c5aaff --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vcpop.c @@ -0,0 +1,131 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: @test_vcpop_m_b1( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call i64 @llvm.riscv.vcpop.nxv64i1.i64( [[OP1:%.*]], i64 [[VL:%.*]]) +// CHECK-RV64-NEXT: ret i64 [[TMP0]] +// +unsigned long test_vcpop_m_b1(vbool1_t op1, size_t vl) { + return vcpop_m_b1(op1, vl); +} + +// CHECK-RV64-LABEL: @test_vcpop_m_b2( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call i64 @llvm.riscv.vcpop.nxv32i1.i64( [[OP1:%.*]], i64 [[VL:%.*]]) +// CHECK-RV64-NEXT: ret i64 [[TMP0]] +// +unsigned long test_vcpop_m_b2(vbool2_t op1, size_t vl) { + return vcpop_m_b2(op1, vl); +} + +// CHECK-RV64-LABEL: @test_vcpop_m_b4( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call i64 @llvm.riscv.vcpop.nxv16i1.i64( [[OP1:%.*]], i64 [[VL:%.*]]) +// CHECK-RV64-NEXT: ret i64 [[TMP0]] +// +unsigned long test_vcpop_m_b4(vbool4_t op1, size_t vl) { + return vcpop_m_b4(op1, vl); +} + +// CHECK-RV64-LABEL: @test_vcpop_m_b8( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call i64 @llvm.riscv.vcpop.nxv8i1.i64( [[OP1:%.*]], i64 [[VL:%.*]]) +// CHECK-RV64-NEXT: ret i64 [[TMP0]] +// +unsigned long test_vcpop_m_b8(vbool8_t op1, size_t vl) { + return vcpop_m_b8(op1, vl); +} + +// CHECK-RV64-LABEL: @test_vcpop_m_b16( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call i64 @llvm.riscv.vcpop.nxv4i1.i64( [[OP1:%.*]], i64 [[VL:%.*]]) +// CHECK-RV64-NEXT: ret i64 [[TMP0]] +// +unsigned long test_vcpop_m_b16(vbool16_t op1, size_t vl) { + return vcpop_m_b16(op1, vl); +} + +// CHECK-RV64-LABEL: @test_vcpop_m_b32( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call i64 @llvm.riscv.vcpop.nxv2i1.i64( [[OP1:%.*]], i64 [[VL:%.*]]) +// CHECK-RV64-NEXT: ret i64 [[TMP0]] +// +unsigned long test_vcpop_m_b32(vbool32_t op1, size_t vl) { + return vcpop_m_b32(op1, vl); +} + +// CHECK-RV64-LABEL: @test_vcpop_m_b64( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call i64 @llvm.riscv.vcpop.nxv1i1.i64( [[OP1:%.*]], i64 [[VL:%.*]]) +// CHECK-RV64-NEXT: ret i64 [[TMP0]] +// +unsigned long test_vcpop_m_b64(vbool64_t op1, size_t vl) { + return vcpop_m_b64(op1, vl); +} + +// CHECK-RV64-LABEL: @test_vcpop_m_b1_m( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call i64 @llvm.riscv.vcpop.mask.nxv64i1.i64( [[OP1:%.*]], [[MASK:%.*]], i64 [[VL:%.*]]) +// CHECK-RV64-NEXT: ret i64 [[TMP0]] +// +unsigned long test_vcpop_m_b1_m(vbool1_t mask, vbool1_t op1, size_t vl) { + return vcpop_m_b1_m(mask, op1, vl); +} + +// CHECK-RV64-LABEL: @test_vcpop_m_b2_m( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call i64 @llvm.riscv.vcpop.mask.nxv32i1.i64( [[OP1:%.*]], [[MASK:%.*]], i64 [[VL:%.*]]) +// CHECK-RV64-NEXT: ret i64 [[TMP0]] +// +unsigned long test_vcpop_m_b2_m(vbool2_t mask, vbool2_t op1, size_t vl) { + return vcpop_m_b2_m(mask, op1, vl); +} + +// CHECK-RV64-LABEL: @test_vcpop_m_b4_m( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call i64 @llvm.riscv.vcpop.mask.nxv16i1.i64( [[OP1:%.*]], [[MASK:%.*]], i64 [[VL:%.*]]) +// CHECK-RV64-NEXT: ret i64 [[TMP0]] +// +unsigned long test_vcpop_m_b4_m(vbool4_t mask, vbool4_t op1, size_t vl) { + return vcpop_m_b4_m(mask, op1, vl); +} + +// CHECK-RV64-LABEL: @test_vcpop_m_b8_m( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call i64 @llvm.riscv.vcpop.mask.nxv8i1.i64( [[OP1:%.*]], [[MASK:%.*]], i64 [[VL:%.*]]) +// CHECK-RV64-NEXT: ret i64 [[TMP0]] +// +unsigned long test_vcpop_m_b8_m(vbool8_t mask, vbool8_t op1, size_t vl) { + return vcpop_m_b8_m(mask, op1, vl); +} + +// CHECK-RV64-LABEL: @test_vcpop_m_b16_m( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call i64 @llvm.riscv.vcpop.mask.nxv4i1.i64( [[OP1:%.*]], [[MASK:%.*]], i64 [[VL:%.*]]) +// CHECK-RV64-NEXT: ret i64 [[TMP0]] +// +unsigned long test_vcpop_m_b16_m(vbool16_t mask, vbool16_t op1, size_t vl) { + return vcpop_m_b16_m(mask, op1, vl); +} + +// CHECK-RV64-LABEL: @test_vcpop_m_b32_m( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call i64 @llvm.riscv.vcpop.mask.nxv2i1.i64( [[OP1:%.*]], [[MASK:%.*]], i64 [[VL:%.*]]) +// CHECK-RV64-NEXT: ret i64 [[TMP0]] +// +unsigned long test_vcpop_m_b32_m(vbool32_t mask, vbool32_t op1, size_t vl) { + return vcpop_m_b32_m(mask, op1, vl); +} + +// CHECK-RV64-LABEL: @test_vcpop_m_b64_m( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call i64 @llvm.riscv.vcpop.mask.nxv1i1.i64( [[OP1:%.*]], [[MASK:%.*]], i64 [[VL:%.*]]) +// CHECK-RV64-NEXT: ret i64 [[TMP0]] +// +unsigned long test_vcpop_m_b64_m(vbool64_t mask, vbool64_t op1, size_t vl) { + return vcpop_m_b64_m(mask, op1, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vfwredosum.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vfwredsum.c similarity index 50% rename from clang/test/CodeGen/RISCV/rvv-intrinsics/vfwredosum.c rename to clang/test/CodeGen/RISCV/rvv-intrinsics/vfwredsum.c index 590cd741d155d..7c060e1829445 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vfwredosum.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vfwredsum.c @@ -223,3 +223,221 @@ vfloat32m1_t test_vfwredosum_vs_f16m4_f32m1_m (vbool4_t mask, vfloat32m1_t dest, vfloat32m1_t test_vfwredosum_vs_f16m8_f32m1_m (vbool2_t mask, vfloat32m1_t dest, vfloat16m8_t vector, vfloat32m1_t scalar, size_t vl) { return vfwredosum_vs_f16m8_f32m1_m(mask, dest, vector, scalar, vl); } + +// CHECK-RV64-LABEL: @test_vfwredusum_vs_f32mf2_f64m1( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vfwredusum.nxv1f64.nxv1f32.i64( [[DST:%.*]], [[VECTOR:%.*]], [[SCALAR:%.*]], i64 [[VL:%.*]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vfloat64m1_t test_vfwredusum_vs_f32mf2_f64m1(vfloat64m1_t dst, + vfloat32mf2_t vector, + vfloat64m1_t scalar, size_t vl) { + return vfwredusum_vs_f32mf2_f64m1(dst, vector, scalar, vl); +} + +// CHECK-RV64-LABEL: @test_vfwredusum_vs_f32m1_f64m1( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vfwredusum.nxv1f64.nxv2f32.i64( [[DST:%.*]], [[VECTOR:%.*]], [[SCALAR:%.*]], i64 [[VL:%.*]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vfloat64m1_t test_vfwredusum_vs_f32m1_f64m1(vfloat64m1_t dst, + vfloat32m1_t vector, + vfloat64m1_t scalar, size_t vl) { + return vfwredusum_vs_f32m1_f64m1(dst, vector, scalar, vl); +} + +// CHECK-RV64-LABEL: @test_vfwredusum_vs_f32m2_f64m1( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vfwredusum.nxv1f64.nxv4f32.i64( [[DST:%.*]], [[VECTOR:%.*]], [[SCALAR:%.*]], i64 [[VL:%.*]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vfloat64m1_t test_vfwredusum_vs_f32m2_f64m1(vfloat64m1_t dst, + vfloat32m2_t vector, + vfloat64m1_t scalar, size_t vl) { + return vfwredusum_vs_f32m2_f64m1(dst, vector, scalar, vl); +} + +// CHECK-RV64-LABEL: @test_vfwredusum_vs_f32m4_f64m1( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vfwredusum.nxv1f64.nxv8f32.i64( [[DST:%.*]], [[VECTOR:%.*]], [[SCALAR:%.*]], i64 [[VL:%.*]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vfloat64m1_t test_vfwredusum_vs_f32m4_f64m1(vfloat64m1_t dst, + vfloat32m4_t vector, + vfloat64m1_t scalar, size_t vl) { + return vfwredusum_vs_f32m4_f64m1(dst, vector, scalar, vl); +} + +// CHECK-RV64-LABEL: @test_vfwredusum_vs_f32m8_f64m1( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vfwredusum.nxv1f64.nxv16f32.i64( [[DST:%.*]], [[VECTOR:%.*]], [[SCALAR:%.*]], i64 [[VL:%.*]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vfloat64m1_t test_vfwredusum_vs_f32m8_f64m1(vfloat64m1_t dst, + vfloat32m8_t vector, + vfloat64m1_t scalar, size_t vl) { + return vfwredusum_vs_f32m8_f64m1(dst, vector, scalar, vl); +} + +// CHECK-RV64-LABEL: @test_vfwredusum_vs_f32mf2_f64m1_m( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vfwredusum.mask.nxv1f64.nxv1f32.i64( [[DST:%.*]], [[VECTOR:%.*]], [[SCALAR:%.*]], [[MASK:%.*]], i64 [[VL:%.*]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vfloat64m1_t test_vfwredusum_vs_f32mf2_f64m1_m(vbool64_t mask, vfloat64m1_t dst, + vfloat32mf2_t vector, + vfloat64m1_t scalar, size_t vl) { + return vfwredusum_vs_f32mf2_f64m1_m(mask, dst, vector, scalar, vl); +} + +// CHECK-RV64-LABEL: @test_vfwredusum_vs_f32m1_f64m1_m( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vfwredusum.mask.nxv1f64.nxv2f32.i64( [[DST:%.*]], [[VECTOR:%.*]], [[SCALAR:%.*]], [[MASK:%.*]], i64 [[VL:%.*]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vfloat64m1_t test_vfwredusum_vs_f32m1_f64m1_m(vbool32_t mask, vfloat64m1_t dst, + vfloat32m1_t vector, + vfloat64m1_t scalar, size_t vl) { + return vfwredusum_vs_f32m1_f64m1_m(mask, dst, vector, scalar, vl); +} + +// CHECK-RV64-LABEL: @test_vfwredusum_vs_f32m2_f64m1_m( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vfwredusum.mask.nxv1f64.nxv4f32.i64( [[DST:%.*]], [[VECTOR:%.*]], [[SCALAR:%.*]], [[MASK:%.*]], i64 [[VL:%.*]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vfloat64m1_t test_vfwredusum_vs_f32m2_f64m1_m(vbool16_t mask, vfloat64m1_t dst, + vfloat32m2_t vector, + vfloat64m1_t scalar, size_t vl) { + return vfwredusum_vs_f32m2_f64m1_m(mask, dst, vector, scalar, vl); +} + +// CHECK-RV64-LABEL: @test_vfwredusum_vs_f32m4_f64m1_m( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vfwredusum.mask.nxv1f64.nxv8f32.i64( [[DST:%.*]], [[VECTOR:%.*]], [[SCALAR:%.*]], [[MASK:%.*]], i64 [[VL:%.*]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vfloat64m1_t test_vfwredusum_vs_f32m4_f64m1_m(vbool8_t mask, vfloat64m1_t dst, + vfloat32m4_t vector, + vfloat64m1_t scalar, size_t vl) { + return vfwredusum_vs_f32m4_f64m1_m(mask, dst, vector, scalar, vl); +} + +// CHECK-RV64-LABEL: @test_vfwredusum_vs_f32m8_f64m1_m( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vfwredusum.mask.nxv1f64.nxv16f32.i64( [[DST:%.*]], [[VECTOR:%.*]], [[SCALAR:%.*]], [[MASK:%.*]], i64 [[VL:%.*]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vfloat64m1_t test_vfwredusum_vs_f32m8_f64m1_m(vbool4_t mask, vfloat64m1_t dst, + vfloat32m8_t vector, + vfloat64m1_t scalar, size_t vl) { + return vfwredusum_vs_f32m8_f64m1_m(mask, dst, vector, scalar, vl); +} + +// CHECK-RV64-LABEL: @test_vfwredusum_vs_f16mf4_f32m1( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vfwredusum.nxv2f32.nxv1f16.i64( [[DEST:%.*]], [[VECTOR:%.*]], [[SCALAR:%.*]], i64 [[VL:%.*]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vfloat32m1_t test_vfwredusum_vs_f16mf4_f32m1 (vfloat32m1_t dest, vfloat16mf4_t vector, vfloat32m1_t scalar, size_t vl) { + return vfwredusum_vs_f16mf4_f32m1(dest, vector, scalar, vl); +} + +// CHECK-RV64-LABEL: @test_vfwredusum_vs_f16mf2_f32m1( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vfwredusum.nxv2f32.nxv2f16.i64( [[DEST:%.*]], [[VECTOR:%.*]], [[SCALAR:%.*]], i64 [[VL:%.*]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vfloat32m1_t test_vfwredusum_vs_f16mf2_f32m1 (vfloat32m1_t dest, vfloat16mf2_t vector, vfloat32m1_t scalar, size_t vl) { + return vfwredusum_vs_f16mf2_f32m1(dest, vector, scalar, vl); +} + +// CHECK-RV64-LABEL: @test_vfwredusum_vs_f16m1_f32m1( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vfwredusum.nxv2f32.nxv4f16.i64( [[DEST:%.*]], [[VECTOR:%.*]], [[SCALAR:%.*]], i64 [[VL:%.*]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vfloat32m1_t test_vfwredusum_vs_f16m1_f32m1 (vfloat32m1_t dest, vfloat16m1_t vector, vfloat32m1_t scalar, size_t vl) { + return vfwredusum_vs_f16m1_f32m1(dest, vector, scalar, vl); +} + +// CHECK-RV64-LABEL: @test_vfwredusum_vs_f16m2_f32m1( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vfwredusum.nxv2f32.nxv8f16.i64( [[DEST:%.*]], [[VECTOR:%.*]], [[SCALAR:%.*]], i64 [[VL:%.*]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vfloat32m1_t test_vfwredusum_vs_f16m2_f32m1 (vfloat32m1_t dest, vfloat16m2_t vector, vfloat32m1_t scalar, size_t vl) { + return vfwredusum_vs_f16m2_f32m1(dest, vector, scalar, vl); +} + +// CHECK-RV64-LABEL: @test_vfwredusum_vs_f16m4_f32m1( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vfwredusum.nxv2f32.nxv16f16.i64( [[DEST:%.*]], [[VECTOR:%.*]], [[SCALAR:%.*]], i64 [[VL:%.*]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vfloat32m1_t test_vfwredusum_vs_f16m4_f32m1 (vfloat32m1_t dest, vfloat16m4_t vector, vfloat32m1_t scalar, size_t vl) { + return vfwredusum_vs_f16m4_f32m1(dest, vector, scalar, vl); +} + +// CHECK-RV64-LABEL: @test_vfwredusum_vs_f16m8_f32m1( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vfwredusum.nxv2f32.nxv32f16.i64( [[DEST:%.*]], [[VECTOR:%.*]], [[SCALAR:%.*]], i64 [[VL:%.*]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vfloat32m1_t test_vfwredusum_vs_f16m8_f32m1 (vfloat32m1_t dest, vfloat16m8_t vector, vfloat32m1_t scalar, size_t vl) { + return vfwredusum_vs_f16m8_f32m1(dest, vector, scalar, vl); +} + +// CHECK-RV64-LABEL: @test_vfwredusum_vs_f16mf4_f32m1_m( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vfwredusum.mask.nxv2f32.nxv1f16.i64( [[DEST:%.*]], [[VECTOR:%.*]], [[SCALAR:%.*]], [[MASK:%.*]], i64 [[VL:%.*]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vfloat32m1_t test_vfwredusum_vs_f16mf4_f32m1_m (vbool64_t mask, vfloat32m1_t dest, vfloat16mf4_t vector, vfloat32m1_t scalar, size_t vl) { + return vfwredusum_vs_f16mf4_f32m1_m(mask, dest, vector, scalar, vl); +} + +// CHECK-RV64-LABEL: @test_vfwredusum_vs_f16mf2_f32m1_m( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vfwredusum.mask.nxv2f32.nxv2f16.i64( [[DEST:%.*]], [[VECTOR:%.*]], [[SCALAR:%.*]], [[MASK:%.*]], i64 [[VL:%.*]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vfloat32m1_t test_vfwredusum_vs_f16mf2_f32m1_m (vbool32_t mask, vfloat32m1_t dest, vfloat16mf2_t vector, vfloat32m1_t scalar, size_t vl) { + return vfwredusum_vs_f16mf2_f32m1_m(mask, dest, vector, scalar, vl); +} + +// CHECK-RV64-LABEL: @test_vfwredusum_vs_f16m1_f32m1_m( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vfwredusum.mask.nxv2f32.nxv4f16.i64( [[DEST:%.*]], [[VECTOR:%.*]], [[SCALAR:%.*]], [[MASK:%.*]], i64 [[VL:%.*]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vfloat32m1_t test_vfwredusum_vs_f16m1_f32m1_m (vbool16_t mask, vfloat32m1_t dest, vfloat16m1_t vector, vfloat32m1_t scalar, size_t vl) { + return vfwredusum_vs_f16m1_f32m1_m(mask, dest, vector, scalar, vl); +} + +// CHECK-RV64-LABEL: @test_vfwredusum_vs_f16m2_f32m1_m( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vfwredusum.mask.nxv2f32.nxv8f16.i64( [[DEST:%.*]], [[VECTOR:%.*]], [[SCALAR:%.*]], [[MASK:%.*]], i64 [[VL:%.*]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vfloat32m1_t test_vfwredusum_vs_f16m2_f32m1_m (vbool8_t mask, vfloat32m1_t dest, vfloat16m2_t vector, vfloat32m1_t scalar, size_t vl) { + return vfwredusum_vs_f16m2_f32m1_m(mask, dest, vector, scalar, vl); +} + +// CHECK-RV64-LABEL: @test_vfwredusum_vs_f16m4_f32m1_m( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vfwredusum.mask.nxv2f32.nxv16f16.i64( [[DEST:%.*]], [[VECTOR:%.*]], [[SCALAR:%.*]], [[MASK:%.*]], i64 [[VL:%.*]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vfloat32m1_t test_vfwredusum_vs_f16m4_f32m1_m (vbool4_t mask, vfloat32m1_t dest, vfloat16m4_t vector, vfloat32m1_t scalar, size_t vl) { + return vfwredusum_vs_f16m4_f32m1_m(mask, dest, vector, scalar, vl); +} + +// CHECK-RV64-LABEL: @test_vfwredusum_vs_f16m8_f32m1_m( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vfwredusum.mask.nxv2f32.nxv32f16.i64( [[DEST:%.*]], [[VECTOR:%.*]], [[SCALAR:%.*]], [[MASK:%.*]], i64 [[VL:%.*]]) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vfloat32m1_t test_vfwredusum_vs_f16m8_f32m1_m (vbool2_t mask, vfloat32m1_t dest, vfloat16m8_t vector, vfloat32m1_t scalar, size_t vl) { + return vfwredusum_vs_f16m8_f32m1_m(mask, dest, vector, scalar, vl); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vmand.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vmand.c index d9d3caae762b8..402280d7a8ca2 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vmand.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vmand.c @@ -67,65 +67,65 @@ vbool64_t test_vmand_mm_b64(vbool64_t op1, vbool64_t op2, size_t vl) { return vmand_mm_b64(op1, op2, vl); } -// CHECK-RV64-LABEL: @test_vmandnot_mm_b1( +// CHECK-RV64-LABEL: @test_vmandn_mm_b1( // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmandnot.nxv64i1.i64( [[OP1:%.*]], [[OP2:%.*]], i64 [[VL:%.*]]) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmandn.nxv64i1.i64( [[OP1:%.*]], [[OP2:%.*]], i64 [[VL:%.*]]) // CHECK-RV64-NEXT: ret [[TMP0]] // -vbool1_t test_vmandnot_mm_b1(vbool1_t op1, vbool1_t op2, size_t vl) { - return vmandnot_mm_b1(op1, op2, vl); +vbool1_t test_vmandn_mm_b1(vbool1_t op1, vbool1_t op2, size_t vl) { + return vmandn_mm_b1(op1, op2, vl); } -// CHECK-RV64-LABEL: @test_vmandnot_mm_b2( +// CHECK-RV64-LABEL: @test_vmandn_mm_b2( // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmandnot.nxv32i1.i64( [[OP1:%.*]], [[OP2:%.*]], i64 [[VL:%.*]]) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmandn.nxv32i1.i64( [[OP1:%.*]], [[OP2:%.*]], i64 [[VL:%.*]]) // CHECK-RV64-NEXT: ret [[TMP0]] // -vbool2_t test_vmandnot_mm_b2(vbool2_t op1, vbool2_t op2, size_t vl) { - return vmandnot_mm_b2(op1, op2, vl); +vbool2_t test_vmandn_mm_b2(vbool2_t op1, vbool2_t op2, size_t vl) { + return vmandn_mm_b2(op1, op2, vl); } -// CHECK-RV64-LABEL: @test_vmandnot_mm_b4( +// CHECK-RV64-LABEL: @test_vmandn_mm_b4( // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmandnot.nxv16i1.i64( [[OP1:%.*]], [[OP2:%.*]], i64 [[VL:%.*]]) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmandn.nxv16i1.i64( [[OP1:%.*]], [[OP2:%.*]], i64 [[VL:%.*]]) // CHECK-RV64-NEXT: ret [[TMP0]] // -vbool4_t test_vmandnot_mm_b4(vbool4_t op1, vbool4_t op2, size_t vl) { - return vmandnot_mm_b4(op1, op2, vl); +vbool4_t test_vmandn_mm_b4(vbool4_t op1, vbool4_t op2, size_t vl) { + return vmandn_mm_b4(op1, op2, vl); } -// CHECK-RV64-LABEL: @test_vmandnot_mm_b8( +// CHECK-RV64-LABEL: @test_vmandn_mm_b8( // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmandnot.nxv8i1.i64( [[OP1:%.*]], [[OP2:%.*]], i64 [[VL:%.*]]) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmandn.nxv8i1.i64( [[OP1:%.*]], [[OP2:%.*]], i64 [[VL:%.*]]) // CHECK-RV64-NEXT: ret [[TMP0]] // -vbool8_t test_vmandnot_mm_b8(vbool8_t op1, vbool8_t op2, size_t vl) { - return vmandnot_mm_b8(op1, op2, vl); +vbool8_t test_vmandn_mm_b8(vbool8_t op1, vbool8_t op2, size_t vl) { + return vmandn_mm_b8(op1, op2, vl); } -// CHECK-RV64-LABEL: @test_vmandnot_mm_b16( +// CHECK-RV64-LABEL: @test_vmandn_mm_b16( // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmandnot.nxv4i1.i64( [[OP1:%.*]], [[OP2:%.*]], i64 [[VL:%.*]]) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmandn.nxv4i1.i64( [[OP1:%.*]], [[OP2:%.*]], i64 [[VL:%.*]]) // CHECK-RV64-NEXT: ret [[TMP0]] // -vbool16_t test_vmandnot_mm_b16(vbool16_t op1, vbool16_t op2, size_t vl) { - return vmandnot_mm_b16(op1, op2, vl); +vbool16_t test_vmandn_mm_b16(vbool16_t op1, vbool16_t op2, size_t vl) { + return vmandn_mm_b16(op1, op2, vl); } -// CHECK-RV64-LABEL: @test_vmandnot_mm_b32( +// CHECK-RV64-LABEL: @test_vmandn_mm_b32( // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmandnot.nxv2i1.i64( [[OP1:%.*]], [[OP2:%.*]], i64 [[VL:%.*]]) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmandn.nxv2i1.i64( [[OP1:%.*]], [[OP2:%.*]], i64 [[VL:%.*]]) // CHECK-RV64-NEXT: ret [[TMP0]] // -vbool32_t test_vmandnot_mm_b32(vbool32_t op1, vbool32_t op2, size_t vl) { - return vmandnot_mm_b32(op1, op2, vl); +vbool32_t test_vmandn_mm_b32(vbool32_t op1, vbool32_t op2, size_t vl) { + return vmandn_mm_b32(op1, op2, vl); } -// CHECK-RV64-LABEL: @test_vmandnot_mm_b64( +// CHECK-RV64-LABEL: @test_vmandn_mm_b64( // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmandnot.nxv1i1.i64( [[OP1:%.*]], [[OP2:%.*]], i64 [[VL:%.*]]) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmandn.nxv1i1.i64( [[OP1:%.*]], [[OP2:%.*]], i64 [[VL:%.*]]) // CHECK-RV64-NEXT: ret [[TMP0]] // -vbool64_t test_vmandnot_mm_b64(vbool64_t op1, vbool64_t op2, size_t vl) { - return vmandnot_mm_b64(op1, op2, vl); +vbool64_t test_vmandn_mm_b64(vbool64_t op1, vbool64_t op2, size_t vl) { + return vmandn_mm_b64(op1, op2, vl); } diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vmor.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vmor.c index 5403ad16f9075..416de0408ffa1 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vmor.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vmor.c @@ -67,65 +67,65 @@ vbool64_t test_vmor_mm_b64(vbool64_t op1, vbool64_t op2, size_t vl) { return vmor_mm_b64(op1, op2, vl); } -// CHECK-RV64-LABEL: @test_vmornot_mm_b1( +// CHECK-RV64-LABEL: @test_vmorn_mm_b1( // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmornot.nxv64i1.i64( [[OP1:%.*]], [[OP2:%.*]], i64 [[VL:%.*]]) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmorn.nxv64i1.i64( [[OP1:%.*]], [[OP2:%.*]], i64 [[VL:%.*]]) // CHECK-RV64-NEXT: ret [[TMP0]] // -vbool1_t test_vmornot_mm_b1(vbool1_t op1, vbool1_t op2, size_t vl) { - return vmornot_mm_b1(op1, op2, vl); +vbool1_t test_vmorn_mm_b1(vbool1_t op1, vbool1_t op2, size_t vl) { + return vmorn_mm_b1(op1, op2, vl); } -// CHECK-RV64-LABEL: @test_vmornot_mm_b2( +// CHECK-RV64-LABEL: @test_vmorn_mm_b2( // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmornot.nxv32i1.i64( [[OP1:%.*]], [[OP2:%.*]], i64 [[VL:%.*]]) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmorn.nxv32i1.i64( [[OP1:%.*]], [[OP2:%.*]], i64 [[VL:%.*]]) // CHECK-RV64-NEXT: ret [[TMP0]] // -vbool2_t test_vmornot_mm_b2(vbool2_t op1, vbool2_t op2, size_t vl) { - return vmornot_mm_b2(op1, op2, vl); +vbool2_t test_vmorn_mm_b2(vbool2_t op1, vbool2_t op2, size_t vl) { + return vmorn_mm_b2(op1, op2, vl); } -// CHECK-RV64-LABEL: @test_vmornot_mm_b4( +// CHECK-RV64-LABEL: @test_vmorn_mm_b4( // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmornot.nxv16i1.i64( [[OP1:%.*]], [[OP2:%.*]], i64 [[VL:%.*]]) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmorn.nxv16i1.i64( [[OP1:%.*]], [[OP2:%.*]], i64 [[VL:%.*]]) // CHECK-RV64-NEXT: ret [[TMP0]] // -vbool4_t test_vmornot_mm_b4(vbool4_t op1, vbool4_t op2, size_t vl) { - return vmornot_mm_b4(op1, op2, vl); +vbool4_t test_vmorn_mm_b4(vbool4_t op1, vbool4_t op2, size_t vl) { + return vmorn_mm_b4(op1, op2, vl); } -// CHECK-RV64-LABEL: @test_vmornot_mm_b8( +// CHECK-RV64-LABEL: @test_vmorn_mm_b8( // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmornot.nxv8i1.i64( [[OP1:%.*]], [[OP2:%.*]], i64 [[VL:%.*]]) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmorn.nxv8i1.i64( [[OP1:%.*]], [[OP2:%.*]], i64 [[VL:%.*]]) // CHECK-RV64-NEXT: ret [[TMP0]] // -vbool8_t test_vmornot_mm_b8(vbool8_t op1, vbool8_t op2, size_t vl) { - return vmornot_mm_b8(op1, op2, vl); +vbool8_t test_vmorn_mm_b8(vbool8_t op1, vbool8_t op2, size_t vl) { + return vmorn_mm_b8(op1, op2, vl); } -// CHECK-RV64-LABEL: @test_vmornot_mm_b16( +// CHECK-RV64-LABEL: @test_vmorn_mm_b16( // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmornot.nxv4i1.i64( [[OP1:%.*]], [[OP2:%.*]], i64 [[VL:%.*]]) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmorn.nxv4i1.i64( [[OP1:%.*]], [[OP2:%.*]], i64 [[VL:%.*]]) // CHECK-RV64-NEXT: ret [[TMP0]] // -vbool16_t test_vmornot_mm_b16(vbool16_t op1, vbool16_t op2, size_t vl) { - return vmornot_mm_b16(op1, op2, vl); +vbool16_t test_vmorn_mm_b16(vbool16_t op1, vbool16_t op2, size_t vl) { + return vmorn_mm_b16(op1, op2, vl); } -// CHECK-RV64-LABEL: @test_vmornot_mm_b32( +// CHECK-RV64-LABEL: @test_vmorn_mm_b32( // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmornot.nxv2i1.i64( [[OP1:%.*]], [[OP2:%.*]], i64 [[VL:%.*]]) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmorn.nxv2i1.i64( [[OP1:%.*]], [[OP2:%.*]], i64 [[VL:%.*]]) // CHECK-RV64-NEXT: ret [[TMP0]] // -vbool32_t test_vmornot_mm_b32(vbool32_t op1, vbool32_t op2, size_t vl) { - return vmornot_mm_b32(op1, op2, vl); +vbool32_t test_vmorn_mm_b32(vbool32_t op1, vbool32_t op2, size_t vl) { + return vmorn_mm_b32(op1, op2, vl); } -// CHECK-RV64-LABEL: @test_vmornot_mm_b64( +// CHECK-RV64-LABEL: @test_vmorn_mm_b64( // CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmornot.nxv1i1.i64( [[OP1:%.*]], [[OP2:%.*]], i64 [[VL:%.*]]) +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.riscv.vmorn.nxv1i1.i64( [[OP1:%.*]], [[OP2:%.*]], i64 [[VL:%.*]]) // CHECK-RV64-NEXT: ret [[TMP0]] // -vbool64_t test_vmornot_mm_b64(vbool64_t op1, vbool64_t op2, size_t vl) { - return vmornot_mm_b64(op1, op2, vl); +vbool64_t test_vmorn_mm_b64(vbool64_t op1, vbool64_t op2, size_t vl) { + return vmorn_mm_b64(op1, op2, vl); } diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vpopc.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vpopc.c deleted file mode 100644 index e155e82301657..0000000000000 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vpopc.c +++ /dev/null @@ -1,131 +0,0 @@ -// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py -// REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s - -#include - -// CHECK-RV64-LABEL: @test_vpopc_m_b1( -// CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call i64 @llvm.riscv.vpopc.nxv64i1.i64( [[OP1:%.*]], i64 [[VL:%.*]]) -// CHECK-RV64-NEXT: ret i64 [[TMP0]] -// -unsigned long test_vpopc_m_b1(vbool1_t op1, size_t vl) { - return vpopc_m_b1(op1, vl); -} - -// CHECK-RV64-LABEL: @test_vpopc_m_b2( -// CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call i64 @llvm.riscv.vpopc.nxv32i1.i64( [[OP1:%.*]], i64 [[VL:%.*]]) -// CHECK-RV64-NEXT: ret i64 [[TMP0]] -// -unsigned long test_vpopc_m_b2(vbool2_t op1, size_t vl) { - return vpopc_m_b2(op1, vl); -} - -// CHECK-RV64-LABEL: @test_vpopc_m_b4( -// CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call i64 @llvm.riscv.vpopc.nxv16i1.i64( [[OP1:%.*]], i64 [[VL:%.*]]) -// CHECK-RV64-NEXT: ret i64 [[TMP0]] -// -unsigned long test_vpopc_m_b4(vbool4_t op1, size_t vl) { - return vpopc_m_b4(op1, vl); -} - -// CHECK-RV64-LABEL: @test_vpopc_m_b8( -// CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call i64 @llvm.riscv.vpopc.nxv8i1.i64( [[OP1:%.*]], i64 [[VL:%.*]]) -// CHECK-RV64-NEXT: ret i64 [[TMP0]] -// -unsigned long test_vpopc_m_b8(vbool8_t op1, size_t vl) { - return vpopc_m_b8(op1, vl); -} - -// CHECK-RV64-LABEL: @test_vpopc_m_b16( -// CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call i64 @llvm.riscv.vpopc.nxv4i1.i64( [[OP1:%.*]], i64 [[VL:%.*]]) -// CHECK-RV64-NEXT: ret i64 [[TMP0]] -// -unsigned long test_vpopc_m_b16(vbool16_t op1, size_t vl) { - return vpopc_m_b16(op1, vl); -} - -// CHECK-RV64-LABEL: @test_vpopc_m_b32( -// CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call i64 @llvm.riscv.vpopc.nxv2i1.i64( [[OP1:%.*]], i64 [[VL:%.*]]) -// CHECK-RV64-NEXT: ret i64 [[TMP0]] -// -unsigned long test_vpopc_m_b32(vbool32_t op1, size_t vl) { - return vpopc_m_b32(op1, vl); -} - -// CHECK-RV64-LABEL: @test_vpopc_m_b64( -// CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call i64 @llvm.riscv.vpopc.nxv1i1.i64( [[OP1:%.*]], i64 [[VL:%.*]]) -// CHECK-RV64-NEXT: ret i64 [[TMP0]] -// -unsigned long test_vpopc_m_b64(vbool64_t op1, size_t vl) { - return vpopc_m_b64(op1, vl); -} - -// CHECK-RV64-LABEL: @test_vpopc_m_b1_m( -// CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call i64 @llvm.riscv.vpopc.mask.nxv64i1.i64( [[OP1:%.*]], [[MASK:%.*]], i64 [[VL:%.*]]) -// CHECK-RV64-NEXT: ret i64 [[TMP0]] -// -unsigned long test_vpopc_m_b1_m(vbool1_t mask, vbool1_t op1, size_t vl) { - return vpopc_m_b1_m(mask, op1, vl); -} - -// CHECK-RV64-LABEL: @test_vpopc_m_b2_m( -// CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call i64 @llvm.riscv.vpopc.mask.nxv32i1.i64( [[OP1:%.*]], [[MASK:%.*]], i64 [[VL:%.*]]) -// CHECK-RV64-NEXT: ret i64 [[TMP0]] -// -unsigned long test_vpopc_m_b2_m(vbool2_t mask, vbool2_t op1, size_t vl) { - return vpopc_m_b2_m(mask, op1, vl); -} - -// CHECK-RV64-LABEL: @test_vpopc_m_b4_m( -// CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call i64 @llvm.riscv.vpopc.mask.nxv16i1.i64( [[OP1:%.*]], [[MASK:%.*]], i64 [[VL:%.*]]) -// CHECK-RV64-NEXT: ret i64 [[TMP0]] -// -unsigned long test_vpopc_m_b4_m(vbool4_t mask, vbool4_t op1, size_t vl) { - return vpopc_m_b4_m(mask, op1, vl); -} - -// CHECK-RV64-LABEL: @test_vpopc_m_b8_m( -// CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call i64 @llvm.riscv.vpopc.mask.nxv8i1.i64( [[OP1:%.*]], [[MASK:%.*]], i64 [[VL:%.*]]) -// CHECK-RV64-NEXT: ret i64 [[TMP0]] -// -unsigned long test_vpopc_m_b8_m(vbool8_t mask, vbool8_t op1, size_t vl) { - return vpopc_m_b8_m(mask, op1, vl); -} - -// CHECK-RV64-LABEL: @test_vpopc_m_b16_m( -// CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call i64 @llvm.riscv.vpopc.mask.nxv4i1.i64( [[OP1:%.*]], [[MASK:%.*]], i64 [[VL:%.*]]) -// CHECK-RV64-NEXT: ret i64 [[TMP0]] -// -unsigned long test_vpopc_m_b16_m(vbool16_t mask, vbool16_t op1, size_t vl) { - return vpopc_m_b16_m(mask, op1, vl); -} - -// CHECK-RV64-LABEL: @test_vpopc_m_b32_m( -// CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call i64 @llvm.riscv.vpopc.mask.nxv2i1.i64( [[OP1:%.*]], [[MASK:%.*]], i64 [[VL:%.*]]) -// CHECK-RV64-NEXT: ret i64 [[TMP0]] -// -unsigned long test_vpopc_m_b32_m(vbool32_t mask, vbool32_t op1, size_t vl) { - return vpopc_m_b32_m(mask, op1, vl); -} - -// CHECK-RV64-LABEL: @test_vpopc_m_b64_m( -// CHECK-RV64-NEXT: entry: -// CHECK-RV64-NEXT: [[TMP0:%.*]] = call i64 @llvm.riscv.vpopc.mask.nxv1i1.i64( [[OP1:%.*]], [[MASK:%.*]], i64 [[VL:%.*]]) -// CHECK-RV64-NEXT: ret i64 [[TMP0]] -// -unsigned long test_vpopc_m_b64_m(vbool64_t mask, vbool64_t op1, size_t vl) { - return vpopc_m_b64_m(mask, op1, vl); -} diff --git a/clang/test/CodeGen/X86/ms_fmul.c b/clang/test/CodeGen/X86/ms_fmul.c index a0a1be9e217c5..d1cfcef814625 100644 --- a/clang/test/CodeGen/X86/ms_fmul.c +++ b/clang/test/CodeGen/X86/ms_fmul.c @@ -18,4 +18,4 @@ void __attribute__ ((naked)) foo(void) }} // CHECK-LABEL: foo -// CHECK: call void asm sideeffect inteldialect "fmul qword ptr static_const_table[edx + $$240]\0A\09ret" +// CHECK: call void asm sideeffect inteldialect "fmul qword ptr $0[edx + $$240]\0A\09ret" diff --git a/clang/test/CodeGen/attr-btf_type_tag-func.c b/clang/test/CodeGen/attr-btf_type_tag-func.c new file mode 100644 index 0000000000000..016e1064eb861 --- /dev/null +++ b/clang/test/CodeGen/attr-btf_type_tag-func.c @@ -0,0 +1,20 @@ +// RUN: %clang_cc1 -triple %itanium_abi_triple -debug-info-kind=limited -S -emit-llvm -o - %s | FileCheck %s + +#define __tag1 __attribute__((btf_type_tag("tag1"))) +#define __tag2 __attribute__((btf_type_tag("tag2"))) +#define __tag3 __attribute__((btf_type_tag("tag3"))) +#define __tag4 __attribute__((btf_type_tag("tag4"))) + +int __tag1 * __tag2 *foo(int __tag1 * __tag2 *arg) { return arg; } + +// CHECK: distinct !DISubprogram(name: "foo", scope: ![[#]], file: ![[#]], line: [[#]], type: ![[L9:[0-9]+]] +// CHECK: ![[L9]] = !DISubroutineType(types: ![[L10:[0-9]+]] +// CHECK: ![[L10]] = !{![[L11:[0-9]+]], ![[L11]]} +// CHECK: ![[L11]] = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: ![[L12:[0-9]+]], size: [[#]], annotations: ![[L16:[0-9]+]] +// CHECK: ![[L12]] = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: ![[L13:[0-9]+]], size: [[#]], annotations: ![[L14:[0-9]+]] +// CHECK: ![[L13]] = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed +// CHECK: ![[L14]] = !{![[L15:[0-9]+]]} +// CHECK: ![[L15]] = !{!"btf_type_tag", !"tag1"} +// CHECK: ![[L16]] = !{![[L17:[0-9]+]]} +// CHECK: ![[L17]] = !{!"btf_type_tag", !"tag2"} +// CHECK: !DILocalVariable(name: "arg", arg: 1, scope: ![[#]], file: ![[#]], line: [[#]], type: ![[L11]]) diff --git a/clang/test/CodeGen/attr-btf_type_tag-typedef-field.c b/clang/test/CodeGen/attr-btf_type_tag-typedef-field.c new file mode 100644 index 0000000000000..c80c7e9b45d96 --- /dev/null +++ b/clang/test/CodeGen/attr-btf_type_tag-typedef-field.c @@ -0,0 +1,35 @@ +// RUN: %clang_cc1 -triple %itanium_abi_triple -debug-info-kind=limited -S -emit-llvm -o - %s | FileCheck %s + +#define __tag1 __attribute__((btf_type_tag("tag1"))) +#define __tag2 __attribute__((btf_type_tag("tag2"))) + +typedef void __fn_t(int); +typedef __fn_t __tag1 __tag2 *__fn2_t; +struct t { + int __tag1 * __tag2 *a; + __fn2_t b; + long c; +}; +int *foo1(struct t *a1) { + return (int *)a1->c; +} + +// CHECK: ![[L4:[0-9]+]] = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) +// CHECK: distinct !DICompositeType(tag: DW_TAG_structure_type, name: "t", file: ![[#]], line: [[#]], size: [[#]], elements: ![[L16:[0-9]+]]) +// CHECK: ![[L16]] = !{![[L17:[0-9]+]], ![[L24:[0-9]+]], ![[L31:[0-9]+]]} +// CHECK: ![[L17]] = !DIDerivedType(tag: DW_TAG_member, name: "a", scope: ![[#]], file: ![[#]], line: [[#]], baseType: ![[L18:[0-9]+]], size: [[#]]) +// CHECK: ![[L18]] = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: ![[L19:[0-9]+]], size: [[#]], annotations: ![[L22:[0-9]+]]) +// CHECK: ![[L19]] = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: ![[L4]], size: [[#]], annotations: ![[L20:[0-9]+]]) +// CHECK: ![[L20]] = !{![[L21:[0-9]+]]} +// CHECK: ![[L21]] = !{!"btf_type_tag", !"tag1"} +// CHECK: ![[L22]] = !{![[L23:[0-9]+]]} +// CHECK: ![[L23]] = !{!"btf_type_tag", !"tag2"} +// CHECK: ![[L24]] = !DIDerivedType(tag: DW_TAG_member, name: "b", scope: ![[#]], file: ![[#]], line: [[#]], baseType: ![[L25:[0-9]+]] +// CHECK: ![[L25]] = !DIDerivedType(tag: DW_TAG_typedef, name: "__fn2_t", file: ![[#]], line: [[#]], baseType: ![[L26:[0-9]+]]) +// CHECK: ![[L26]] = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: ![[L27:[0-9]+]], size: [[#]], annotations: ![[L30:[0-9]+]]) +// CHECK: ![[L27]] = !DIDerivedType(tag: DW_TAG_typedef, name: "__fn_t", file: ![[#]], line: [[#]], baseType: ![[L28:[0-9]+]]) +// CHECK: ![[L28]] = !DISubroutineType(types: ![[L29:[0-9]+]]) +// CHECK: ![[L29]] = !{null, ![[L4]]} +// CHECK: ![[L30]] = !{![[L21]], ![[L23]]} +// CHECK: ![[L31]] = !DIDerivedType(tag: DW_TAG_member, name: "c", scope: ![[#]], file: ![[#]], line: [[#]]1, baseType: ![[L32:[0-9]+]] +// CHECK: ![[L32]] = !DIBasicType(name: "long", size: [[#]], encoding: DW_ATE_signed) diff --git a/clang/test/CodeGen/attr-btf_type_tag-var.c b/clang/test/CodeGen/attr-btf_type_tag-var.c new file mode 100644 index 0000000000000..3493d8f6d3666 --- /dev/null +++ b/clang/test/CodeGen/attr-btf_type_tag-var.c @@ -0,0 +1,31 @@ +// RUN: %clang_cc1 -triple %itanium_abi_triple -debug-info-kind=limited -S -emit-llvm -o - %s | FileCheck %s + +#define __tag1 __attribute__((btf_type_tag("tag1"))) +#define __tag2 __attribute__((btf_type_tag("tag2"))) +#define __tag3 __attribute__((btf_type_tag("tag3"))) +#define __tag4 __attribute__((btf_type_tag("tag4"))) +#define __tag5 __attribute__((btf_type_tag("tag5"))) +#define __tag6 __attribute__((btf_type_tag("tag6"))) + +const int __tag1 __tag2 volatile * const __tag3 __tag4 volatile * __tag5 __tag6 const volatile * g; + +// CHECK: distinct !DIGlobalVariable(name: "g", scope: ![[#]], file: ![[#]], line: [[#]], type: ![[L6:[0-9]+]] +// CHECK: ![[L6]] = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: ![[L7:[0-9]+]], size: [[#]], annotations: ![[L22:[0-9]+]] +// CHECK: ![[L7]] = !DIDerivedType(tag: DW_TAG_const_type, baseType: ![[L8:[0-9]+]] +// CHECK: ![[L8]] = !DIDerivedType(tag: DW_TAG_volatile_type, baseType: ![[L9:[0-9]+]] +// CHECK: ![[L9]] = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: ![[L10:[0-9]+]], size: [[#]], annotations: ![[L19:[0-9]+]] +// CHECK: ![[L10]] = !DIDerivedType(tag: DW_TAG_const_type, baseType: ![[L11:[0-9]+]] +// CHECK: ![[L11]] = !DIDerivedType(tag: DW_TAG_volatile_type, baseType: ![[L12:[0-9]+]] +// CHECK: ![[L12]] = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: ![[L13:[0-9]+]], size: [[#]], annotations: ![[L16:[0-9]+]] +// CHECK: ![[L13]] = !DIDerivedType(tag: DW_TAG_const_type, baseType: ![[L14:[0-9]+]] +// CHECK: ![[L14]] = !DIDerivedType(tag: DW_TAG_volatile_type, baseType: ![[L15:[0-9]+]] +// CHECK: ![[L15]] = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed +// CHECK: ![[L16]] = !{![[L17:[0-9]+]], ![[L18:[0-9]+]]} +// CHECK: ![[L17]] = !{!"btf_type_tag", !"tag1"} +// CHECK: ![[L18]] = !{!"btf_type_tag", !"tag2"} +// CHECK: ![[L19]] = !{![[L20:[0-9]+]], ![[L21:[0-9]+]]} +// CHECK: ![[L20]] = !{!"btf_type_tag", !"tag3"} +// CHECK: ![[L21]] = !{!"btf_type_tag", !"tag4"} +// CHECK: ![[L22]] = !{![[L23:[0-9]+]], ![[L24:[0-9]+]]} +// CHECK: ![[L23]] = !{!"btf_type_tag", !"tag5"} +// CHECK: ![[L24]] = !{!"btf_type_tag", !"tag6"} diff --git a/clang/test/CodeGen/builtins-ppc.c b/clang/test/CodeGen/builtins-ppc.c index 89c2df45a9f56..cbd53346d4b0f 100644 --- a/clang/test/CodeGen/builtins-ppc.c +++ b/clang/test/CodeGen/builtins-ppc.c @@ -36,3 +36,13 @@ void test_builtin_ppc_flm() { // CHECK: call double @llvm.ppc.setflm(double %1) res = __builtin_setflm(res); } + +double test_builtin_unpack_ldbl(long double x) { + // CHECK: call double @llvm.ppc.unpack.longdouble(ppc_fp128 %0, i32 1) + return __builtin_unpack_longdouble(x, 1); +} + +long double test_builtin_pack_ldbl(double x, double y) { + // CHECK: call ppc_fp128 @llvm.ppc.pack.longdouble(double %0, double %1) + return __builtin_pack_longdouble(x, y); +} diff --git a/clang/test/CodeGen/builtins-reduction-math.c b/clang/test/CodeGen/builtins-reduction-math.c new file mode 100644 index 0000000000000..417caed494d90 --- /dev/null +++ b/clang/test/CodeGen/builtins-reduction-math.c @@ -0,0 +1,59 @@ +// RUN: %clang_cc1 -triple x86_64-apple-darwin %s -emit-llvm -disable-llvm-passes -o - | FileCheck %s + +typedef float float4 __attribute__((ext_vector_type(4))); +typedef short int si8 __attribute__((ext_vector_type(8))); +typedef unsigned int u4 __attribute__((ext_vector_type(4))); + +__attribute__((address_space(1))) float4 vf1_as_one; + +void test_builtin_reduce_max(float4 vf1, si8 vi1, u4 vu1) { + // CHECK-LABEL: define void @test_builtin_reduce_max( + // CHECK: [[VF1:%.+]] = load <4 x float>, <4 x float>* %vf1.addr, align 16 + // CHECK-NEXT: call float @llvm.vector.reduce.fmax.v4f32(<4 x float> [[VF1]]) + float r1 = __builtin_reduce_max(vf1); + + // CHECK: [[VI1:%.+]] = load <8 x i16>, <8 x i16>* %vi1.addr, align 16 + // CHECK-NEXT: call i16 @llvm.vector.reduce.smax.v8i16(<8 x i16> [[VI1]]) + short r2 = __builtin_reduce_max(vi1); + + // CHECK: [[VU1:%.+]] = load <4 x i32>, <4 x i32>* %vu1.addr, align 16 + // CHECK-NEXT: call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> [[VU1]]) + unsigned r3 = __builtin_reduce_max(vu1); + + // CHECK: [[VF1_AS1:%.+]] = load <4 x float>, <4 x float> addrspace(1)* @vf1_as_one, align 16 + // CHECK-NEXT: [[RDX1:%.+]] = call float @llvm.vector.reduce.fmax.v4f32(<4 x float> [[VF1_AS1]]) + // CHECK-NEXT: fpext float [[RDX1]] to double + const double r4 = __builtin_reduce_max(vf1_as_one); + + // CHECK: [[CVI1:%.+]] = load <8 x i16>, <8 x i16>* %cvi1, align 16 + // CHECK-NEXT: [[RDX2:%.+]] = call i16 @llvm.vector.reduce.smax.v8i16(<8 x i16> [[CVI1]]) + // CHECK-NEXT: sext i16 [[RDX2]] to i64 + const si8 cvi1 = vi1; + unsigned long long r5 = __builtin_reduce_max(cvi1); +} + +void test_builtin_reduce_min(float4 vf1, si8 vi1, u4 vu1) { + // CHECK-LABEL: define void @test_builtin_reduce_min( + // CHECK: [[VF1:%.+]] = load <4 x float>, <4 x float>* %vf1.addr, align 16 + // CHECK-NEXT: call float @llvm.vector.reduce.fmin.v4f32(<4 x float> [[VF1]]) + float r1 = __builtin_reduce_min(vf1); + + // CHECK: [[VI1:%.+]] = load <8 x i16>, <8 x i16>* %vi1.addr, align 16 + // CHECK-NEXT: call i16 @llvm.vector.reduce.smin.v8i16(<8 x i16> [[VI1]]) + short r2 = __builtin_reduce_min(vi1); + + // CHECK: [[VU1:%.+]] = load <4 x i32>, <4 x i32>* %vu1.addr, align 16 + // CHECK-NEXT: call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> [[VU1]]) + unsigned r3 = __builtin_reduce_min(vu1); + + // CHECK: [[VF1_AS1:%.+]] = load <4 x float>, <4 x float> addrspace(1)* @vf1_as_one, align 16 + // CHECK-NEXT: [[RDX1:%.+]] = call float @llvm.vector.reduce.fmin.v4f32(<4 x float> [[VF1_AS1]]) + // CHECK-NEXT: fpext float [[RDX1]] to double + const double r4 = __builtin_reduce_min(vf1_as_one); + + // CHECK: [[CVI1:%.+]] = load <8 x i16>, <8 x i16>* %cvi1, align 16 + // CHECK-NEXT: [[RDX2:%.+]] = call i16 @llvm.vector.reduce.smin.v8i16(<8 x i16> [[CVI1]]) + // CHECK-NEXT: sext i16 [[RDX2]] to i64 + const si8 cvi1 = vi1; + unsigned long long r5 = __builtin_reduce_min(cvi1); +} diff --git a/clang/test/CodeGen/ifunc.c b/clang/test/CodeGen/ifunc.c index a88bb1878f265..fee9cc3dee99e 100644 --- a/clang/test/CodeGen/ifunc.c +++ b/clang/test/CodeGen/ifunc.c @@ -34,8 +34,8 @@ extern void goo(void) __attribute__ ((ifunc("goo_ifunc"))); void* goo_ifunc(void) { return 0; } -// CHECK: @foo = ifunc i32 (i32), bitcast (i32 (i32)* ()* @foo_ifunc to i32 (i32)*) -// CHECK: @goo = ifunc void (), bitcast (i8* ()* @goo_ifunc to void ()*) +// CHECK: @foo = ifunc i32 (i32), i32 (i32)* ()* @foo_ifunc +// CHECK: @goo = ifunc void (), bitcast (i8* ()* @goo_ifunc to void ()* ()*) // CHECK: call i32 @foo(i32 // CHECK: call void @goo() diff --git a/clang/test/CodeGen/ms-inline-asm-static-variable.c b/clang/test/CodeGen/ms-inline-asm-static-variable.c new file mode 100644 index 0000000000000..fb80bf7b2f74f --- /dev/null +++ b/clang/test/CodeGen/ms-inline-asm-static-variable.c @@ -0,0 +1,10 @@ +// REQUIRES: x86-registered-target +// Check the constraint "*m" of operand arr and the definition of arr is not removed by FE +// RUN: %clang_cc1 %s -fasm-blocks -triple i386-apple-darwin10 -emit-llvm -o - | FileCheck %s + +static int arr[10]; +void t1() { + // CHECK: @arr = internal global [10 x i32] + // CHECK: call void asm sideeffect inteldialect "mov dword ptr $0[edx * $$4],edx", "=*m,{{.*}}([10 x i32]* @arr) + __asm mov dword ptr arr[edx*4],edx +} diff --git a/clang/test/CodeGen/ms-inline-asm-variables.c b/clang/test/CodeGen/ms-inline-asm-variables.c index f8fd227610b64..7d0cb3fbcc8f8 100644 --- a/clang/test/CodeGen/ms-inline-asm-variables.c +++ b/clang/test/CodeGen/ms-inline-asm-variables.c @@ -3,19 +3,19 @@ int gVar; void t1() { - // CHECK: add eax, dword ptr gVar[eax] + // CHECK: add eax, dword ptr ${{[0-9]}}[eax] __asm add eax, dword ptr gVar[eax] - // CHECK: add dword ptr gVar[eax], eax + // CHECK: add dword ptr ${{[0-9]}}[eax], eax __asm add dword ptr [eax+gVar], eax - // CHECK: add ebx, dword ptr gVar[ebx + $$270] + // CHECK: add ebx, dword ptr ${{[0-9]}}[ebx + $$270] __asm add ebx, dword ptr gVar[271 - 82 + 81 + ebx] - // CHECK: add dword ptr gVar[ebx + $$828], ebx + // CHECK: add dword ptr ${{[0-9]}}[ebx + $$828], ebx __asm add dword ptr [ebx + gVar + 828], ebx - // CHECK: add ecx, dword ptr gVar[ecx + ecx * $$4 + $$4590] + // CHECK: add ecx, dword ptr ${{[0-9]}}[ecx + ecx * $$4 + $$4590] __asm add ecx, dword ptr gVar[4590 + ecx + ecx*4] - // CHECK: add dword ptr gVar[ecx + ecx * $$8 + $$73], ecx + // CHECK: add dword ptr ${{[0-9]}}[ecx + ecx * $$8 + $$73], ecx __asm add dword ptr [gVar + ecx + 45 + 23 - 53 + 60 - 2 + ecx*8], ecx - // CHECK: add gVar[ecx + ebx + $$7], eax + // CHECK: add ${{[0-9]}}[ecx + ebx + $$7], eax __asm add 1 + 1 + 2 + 3[gVar + ecx + ebx], eax } @@ -32,4 +32,3 @@ void t2() { // CHECK: mov ${{[0-9]}}[ebx + $$47], eax __asm mov 5 + 8 + 13 + 21[lVar + ebx], eax } - diff --git a/clang/test/CodeGen/pr52382.c b/clang/test/CodeGen/pr52382.c new file mode 100644 index 0000000000000..6150c936f6bbd --- /dev/null +++ b/clang/test/CodeGen/pr52382.c @@ -0,0 +1,19 @@ +// RUN: %clang -target x86_64-unknown-linux-gnu -S -emit-llvm -o - -fsanitize=address %s | FileCheck %s + +// Ensure that ASan properly instruments a load into a global where the index +// happens to be within the padding after the global which is used for the +// redzone. + +// This global is 400 bytes long, but gets padded with 112 bytes for redzones, +// rounding the total size after instrumentation to 512. +int global_array[100] = {-1}; + +// This access is 412 bytes after the start of the global: past the end of the +// uninstrumented array, but within the bounds of the extended instrumented +// array. We should ensure this is still instrumented. +int main(void) { return global_array[103]; } + +// CHECK: @main +// CHECK-NEXT: entry: +// CHECK: call void @__asan_report_load4 +// CHECK: } diff --git a/clang/test/CodeGen/semantic-interposition.c b/clang/test/CodeGen/semantic-interposition.c index 22923e1d494ed..3581312b8e27b 100644 --- a/clang/test/CodeGen/semantic-interposition.c +++ b/clang/test/CodeGen/semantic-interposition.c @@ -10,13 +10,13 @@ // CHECK: @var = global i32 0, align 4 // CHECK: @ext_var = external global i32, align 4 -// CHECK: @ifunc = ifunc i32 (), bitcast (i8* ()* @ifunc_resolver to i32 ()*) +// CHECK: @ifunc = ifunc i32 (), bitcast (i8* ()* @ifunc_resolver to i32 ()* ()*) // CHECK: define dso_local i32 @func() // CHECK: declare i32 @ext() // PREEMPT: @var = global i32 0, align 4 // PREEMPT: @ext_var = external global i32, align 4 -// PREEMPT: @ifunc = ifunc i32 (), bitcast (i8* ()* @ifunc_resolver to i32 ()*) +// PREEMPT: @ifunc = ifunc i32 (), bitcast (i8* ()* @ifunc_resolver to i32 ()* ()*) // PREEMPT: define i32 @func() // PREEMPT: declare i32 @ext() diff --git a/clang/test/CodeGen/strlen-inline-builtin-redecl.c b/clang/test/CodeGen/strlen-inline-builtin-redecl.c new file mode 100644 index 0000000000000..c89f843a71a3e --- /dev/null +++ b/clang/test/CodeGen/strlen-inline-builtin-redecl.c @@ -0,0 +1,21 @@ +// RUN: %clang_cc1 -triple x86_64 -S -emit-llvm -disable-llvm-passes -o - %s | FileCheck %s +// +// Verifies that clang-generated *.inline are removed when shadowed by an external definition + +// CHECK-NOT: strlen.inline + +unsigned long strnlen(const char *, unsigned long); +void fortify_panic(const char *); + +extern inline __attribute__((always_inline)) __attribute__((gnu_inline)) unsigned long strlen(const char *p) { + return 1; +} +unsigned long mystrlen(char const *s) { + return strlen(s); +} +unsigned long strlen(const char *s) { + return 2; +} +unsigned long yourstrlen(char const *s) { + return strlen(s); +} diff --git a/clang/test/CodeGen/user-func-gnu-inline-redecl.c b/clang/test/CodeGen/user-func-gnu-inline-redecl.c new file mode 100644 index 0000000000000..0415cbe1e6c70 --- /dev/null +++ b/clang/test/CodeGen/user-func-gnu-inline-redecl.c @@ -0,0 +1,20 @@ +// RUN: %clang_cc1 -triple x86_64 -S -emit-llvm -O1 -o - %s | FileCheck %s +// +// Verifies that the gnu_inline version is ignored in favor of the redecl + +extern inline __attribute__((gnu_inline)) unsigned long some_size(int c) { + return 1; +} +unsigned long mycall(int s) { + // CHECK-LABEL: i64 @mycall + // CHECK: ret i64 2 + return some_size(s); +} +unsigned long some_size(int c) { + return 2; +} +unsigned long yourcall(int s) { + // CHECK-LABEL: i64 @yourcall + // CHECK: ret i64 2 + return some_size(s); +} diff --git a/clang/test/CodeGenCXX/complex128.cpp b/clang/test/CodeGenCXX/complex128.cpp new file mode 100644 index 0000000000000..71746314b9d39 --- /dev/null +++ b/clang/test/CodeGenCXX/complex128.cpp @@ -0,0 +1,9 @@ +// RUN: %clang_cc1 -emit-llvm -triple x86_64-unknown-linux-gnu %s -o - | FileCheck %s + +// Define __complex128 type corresponding to __float128 (as in GCC headers). +typedef _Complex float __attribute__((mode(TC))) __complex128; + +void check() { + // CHECK: alloca { fp128, fp128 } + __complex128 tmp; +} diff --git a/clang/test/CodeGenCXX/cxx20-consteval-crash.cpp b/clang/test/CodeGenCXX/cxx20-consteval-crash.cpp index ef868fa85749f..19f02c4cf41d0 100644 --- a/clang/test/CodeGenCXX/cxx20-consteval-crash.cpp +++ b/clang/test/CodeGenCXX/cxx20-consteval-crash.cpp @@ -12,3 +12,15 @@ auto x2 = X(); // CHECK-NEXT: @_ZN7PR507872x2E = global i32* @_ZN7PR507872x_E, align 4 } +namespace PR51484 { +// This code would previously cause a crash. +struct X { int val; }; +consteval X g() { return {0}; } +void f() { g(); } + +// CHECK: define dso_local void @_ZN7PR514841fEv() #0 { +// CHECK: entry: +// CHECK-NOT: call i32 @_ZN7PR514841gEv() +// CHECK: ret void +// CHECK: } +} diff --git a/clang/test/CodeGenCXX/debug-info-template.cpp b/clang/test/CodeGenCXX/debug-info-template.cpp index ba25e2136b221..4843f89943564 100644 --- a/clang/test/CodeGenCXX/debug-info-template.cpp +++ b/clang/test/CodeGenCXX/debug-info-template.cpp @@ -30,7 +30,7 @@ void func(); // CHECK: ![[TCNESTED]] ={{.*}}!DICompositeType(tag: DW_TAG_structure_type, name: "nested", // CHECK-SAME: scope: ![[TC:[0-9]+]], -// CHECK: ![[TC]] = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "TC" +// CHECK: ![[TC]] = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "TC" // CHECK-SAME: templateParams: [[TCARGS:![0-9]*]] TC // CHECK: [[EMPTY:![0-9]*]] = !{} diff --git a/clang/test/CodeGenCXX/pr45964-decomp-transform.cpp b/clang/test/CodeGenCXX/pr45964-decomp-transform.cpp new file mode 100644 index 0000000000000..927624ca6e370 --- /dev/null +++ b/clang/test/CodeGenCXX/pr45964-decomp-transform.cpp @@ -0,0 +1,27 @@ +// RUN: %clang_cc1 -std=c++17 -triple x86_64-linux-gnu -emit-llvm -o - %s | FileCheck %s + +int a[1]; +// CHECK: @a = global [1 x i32] zeroinitializer +template +void test_transform() { + auto [b] = a; +} +void (*d)(){test_transform<0>}; +// CHECK-LABEL: define {{.*}} @_Z14test_transformILi0EEvv +// CHECK: [[ENTRY:.*]]: +// CHECK-NEXT: [[ARR:%.*]] = alloca [1 x i32] +// CHECK-NEXT: [[BEGIN:%.*]] = getelementptr inbounds [1 x i32], [1 x i32]* [[ARR]], i64 0, i64 0 +// CHECK-NEXT: br label %[[BODY:.*]] +// CHECK-EMPTY: +// CHECK-NEXT: [[BODY]]: +// CHECK-NEXT: [[CUR:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[NEXT:%.*]], %[[BODY]] ] +// CHECK-NEXT: [[DEST:%.*]] = getelementptr inbounds i32, i32* [[BEGIN]], i64 [[CUR]] +// CHECK-NEXT: [[SRC:%.*]] = getelementptr inbounds [1 x i32], [1 x i32]* @a, i64 0, i64 [[CUR]] +// CHECK-NEXT: [[X:%.*]] = load i32, i32* [[SRC]] +// CHECK-NEXT: store i32 [[X]], i32* [[DEST]] +// CHECK-NEXT: [[NEXT]] = add nuw i64 [[CUR]], 1 +// CHECK-NEXT: [[EQ:%.*]] = icmp eq i64 [[NEXT]], 1 +// CHECK-NEXT: br i1 [[EQ]], label %[[FIN:.*]], label %[[BODY]] +// CHECK-EMPTY: +// CHECK-NEXT: [[FIN]]: +// CHECK-NEXT: ret void diff --git a/clang/test/CodeGenCXX/ubsan-coroutines.cpp b/clang/test/CodeGenCXX/ubsan-coroutines.cpp index 8ce15ee6ae734..04ab0505f1401 100644 --- a/clang/test/CodeGenCXX/ubsan-coroutines.cpp +++ b/clang/test/CodeGenCXX/ubsan-coroutines.cpp @@ -2,7 +2,7 @@ // crash when the LLVM coroutines passes are run. // RUN: %clang_cc1 -emit-obj -std=c++2a -fsanitize=null %s -o %t.o -namespace std::experimental { +namespace std { template struct coroutine_traits { using promise_type = typename R::promise_type; }; @@ -18,11 +18,11 @@ template struct coroutine_handle : coroutine_handle { coroutine_handle() = default; static coroutine_handle from_address(void *) noexcept; }; -} +} // namespace std struct suspend_always { bool await_ready() noexcept; - void await_suspend(std::experimental::coroutine_handle<>) noexcept; + void await_suspend(std::coroutine_handle<>) noexcept; void await_resume() noexcept; }; @@ -39,7 +39,7 @@ struct task { struct awaitable { task await() { (void)co_await *this; } bool await_ready() { return false; } - bool await_suspend(std::experimental::coroutine_handle<> awaiter) { return false; } + bool await_suspend(std::coroutine_handle<> awaiter) { return false; } bool await_resume() { return false; } }; diff --git a/clang/test/CodeGenCoroutines/Inputs/coroutine-exp-namespace.h b/clang/test/CodeGenCoroutines/Inputs/coroutine-exp-namespace.h new file mode 100644 index 0000000000000..2e8b949ca1485 --- /dev/null +++ b/clang/test/CodeGenCoroutines/Inputs/coroutine-exp-namespace.h @@ -0,0 +1,82 @@ +#pragma once + +namespace std { +namespace experimental { +inline namespace coroutines_v1 { + +template struct coroutine_traits { + using promise_type = typename R::promise_type; +}; + +template struct coroutine_handle; + +template <> struct coroutine_handle { + static coroutine_handle from_address(void *addr) noexcept { + coroutine_handle me; + me.ptr = addr; + return me; + } + void operator()() { resume(); } + void *address() const noexcept { return ptr; } + void resume() const { __builtin_coro_resume(ptr); } + void destroy() const { __builtin_coro_destroy(ptr); } + bool done() const { return __builtin_coro_done(ptr); } + coroutine_handle &operator=(decltype(nullptr)) { + ptr = nullptr; + return *this; + } + coroutine_handle(decltype(nullptr)) : ptr(nullptr) {} + coroutine_handle() : ptr(nullptr) {} + // void reset() { ptr = nullptr; } // add to P0057? + explicit operator bool() const { return ptr; } + +protected: + void *ptr; +}; + +template struct coroutine_handle : coroutine_handle<> { + using coroutine_handle<>::operator=; + + static coroutine_handle from_address(void *addr) noexcept { + coroutine_handle me; + me.ptr = addr; + return me; + } + + Promise &promise() const { + return *reinterpret_cast( + __builtin_coro_promise(ptr, alignof(Promise), false)); + } + static coroutine_handle from_promise(Promise &promise) { + coroutine_handle p; + p.ptr = __builtin_coro_promise(&promise, alignof(Promise), true); + return p; + } +}; + +template +bool operator==(coroutine_handle<_PromiseT> const &_Left, + coroutine_handle<_PromiseT> const &_Right) noexcept { + return _Left.address() == _Right.address(); +} + +template +bool operator!=(coroutine_handle<_PromiseT> const &_Left, + coroutine_handle<_PromiseT> const &_Right) noexcept { + return !(_Left == _Right); +} + +struct suspend_always { + bool await_ready() { return false; } + void await_suspend(coroutine_handle<>) {} + void await_resume() {} +}; +struct suspend_never { + bool await_ready() noexcept { return true; } + void await_suspend(coroutine_handle<>) noexcept {} + void await_resume() noexcept {} +}; + +} // namespace coroutines_v1 +} // namespace experimental +} // namespace std diff --git a/clang/test/CodeGenCoroutines/Inputs/coroutine.h b/clang/test/CodeGenCoroutines/Inputs/coroutine.h index 2dd1ce7e97351..581c7166e8427 100644 --- a/clang/test/CodeGenCoroutines/Inputs/coroutine.h +++ b/clang/test/CodeGenCoroutines/Inputs/coroutine.h @@ -1,6 +1,6 @@ #pragma once -namespace std { namespace experimental { inline namespace coroutines_v1 { +namespace std { template struct coroutine_traits { using promise_type = typename R::promise_type; @@ -77,4 +77,4 @@ struct suspend_never { void await_resume() noexcept {} }; -}}} +} // namespace std diff --git a/clang/test/CodeGenCoroutines/coro-alloc-exp-namespace.cpp b/clang/test/CodeGenCoroutines/coro-alloc-exp-namespace.cpp new file mode 100644 index 0000000000000..39223c38613ac --- /dev/null +++ b/clang/test/CodeGenCoroutines/coro-alloc-exp-namespace.cpp @@ -0,0 +1,255 @@ +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fcoroutines-ts -std=c++14 \ +// RUN: -Wno-coroutine-missing-unhandled-exception -emit-llvm %s -o - -disable-llvm-passes \ +// RUN: | FileCheck %s + +namespace std { +namespace experimental { +template +struct coroutine_traits; // expected-note {{declared here}} + +template +struct coroutine_handle { + coroutine_handle() = default; + static coroutine_handle from_address(void *) noexcept { return {}; } +}; + +template <> +struct coroutine_handle { + static coroutine_handle from_address(void *) { return {}; } + coroutine_handle() = default; + template + coroutine_handle(coroutine_handle) noexcept {} +}; + +} // end namespace experimental + +struct nothrow_t {}; +constexpr nothrow_t nothrow = {}; + +} // end namespace std + +// Required when get_return_object_on_allocation_failure() is defined by +// the promise. +using SizeT = decltype(sizeof(int)); +void *operator new(SizeT __sz, const std::nothrow_t &) noexcept; +void operator delete(void *__p, const std::nothrow_t &)noexcept; + +struct suspend_always { + bool await_ready() noexcept { return false; } + void await_suspend(std::experimental::coroutine_handle<>) noexcept {} + void await_resume() noexcept {} +}; + +struct global_new_delete_tag {}; + +template <> +struct std::experimental::coroutine_traits { + struct promise_type { + void get_return_object() {} + suspend_always initial_suspend() { return {}; } + suspend_always final_suspend() noexcept { return {}; } + void return_void() {} + }; +}; + +// CHECK-LABEL: f0( +extern "C" void f0(global_new_delete_tag) { + // CHECK: %[[ID:.+]] = call token @llvm.coro.id(i32 16 + // CHECK: %[[NeedAlloc:.+]] = call i1 @llvm.coro.alloc(token %[[ID]]) + // CHECK: br i1 %[[NeedAlloc]], label %[[AllocBB:.+]], label %[[InitBB:.+]] + + // CHECK: [[AllocBB]]: + // CHECK: %[[SIZE:.+]] = call i64 @llvm.coro.size.i64() + // CHECK: %[[MEM:.+]] = call noalias nonnull i8* @_Znwm(i64 %[[SIZE]]) + // CHECK: br label %[[InitBB]] + + // CHECK: [[InitBB]]: + // CHECK: %[[PHI:.+]] = phi i8* [ null, %{{.+}} ], [ %call, %[[AllocBB]] ] + // CHECK: %[[FRAME:.+]] = call i8* @llvm.coro.begin(token %[[ID]], i8* %[[PHI]]) + + // CHECK: %[[MEM:.+]] = call i8* @llvm.coro.free(token %[[ID]], i8* %[[FRAME]]) + // CHECK: %[[NeedDealloc:.+]] = icmp ne i8* %[[MEM]], null + // CHECK: br i1 %[[NeedDealloc]], label %[[FreeBB:.+]], label %[[Afterwards:.+]] + + // CHECK: [[FreeBB]]: + // CHECK: call void @_ZdlPv(i8* %[[MEM]]) + // CHECK: br label %[[Afterwards]] + + // CHECK: [[Afterwards]]: + // CHECK: ret void + co_return; +} + +struct promise_new_tag {}; + +template <> +struct std::experimental::coroutine_traits { + struct promise_type { + void *operator new(unsigned long); + void get_return_object() {} + suspend_always initial_suspend() { return {}; } + suspend_always final_suspend() noexcept { return {}; } + void return_void() {} + }; +}; + +// CHECK-LABEL: f1( +extern "C" void f1(promise_new_tag) { + // CHECK: %[[ID:.+]] = call token @llvm.coro.id(i32 16 + // CHECK: %[[SIZE:.+]] = call i64 @llvm.coro.size.i64() + // CHECK: call i8* @_ZNSt12experimental16coroutine_traitsIJv15promise_new_tagEE12promise_typenwEm(i64 %[[SIZE]]) + + // CHECK: %[[FRAME:.+]] = call i8* @llvm.coro.begin( + // CHECK: %[[MEM:.+]] = call i8* @llvm.coro.free(token %[[ID]], i8* %[[FRAME]]) + // CHECK: call void @_ZdlPv(i8* %[[MEM]]) + co_return; +} + +struct promise_matching_placement_new_tag {}; + +template <> +struct std::experimental::coroutine_traits { + struct promise_type { + void *operator new(unsigned long, promise_matching_placement_new_tag, + int, float, double); + void get_return_object() {} + suspend_always initial_suspend() { return {}; } + suspend_always final_suspend() noexcept { return {}; } + void return_void() {} + }; +}; + +// CHECK-LABEL: f1a( +extern "C" void f1a(promise_matching_placement_new_tag, int x, float y, double z) { + // CHECK: store i32 %x, i32* %x.addr, align 4 + // CHECK: store float %y, float* %y.addr, align 4 + // CHECK: store double %z, double* %z.addr, align 8 + // CHECK: %[[ID:.+]] = call token @llvm.coro.id(i32 16 + // CHECK: %[[SIZE:.+]] = call i64 @llvm.coro.size.i64() + // CHECK: %[[INT:.+]] = load i32, i32* %x.addr, align 4 + // CHECK: %[[FLOAT:.+]] = load float, float* %y.addr, align 4 + // CHECK: %[[DOUBLE:.+]] = load double, double* %z.addr, align 8 + // CHECK: call i8* @_ZNSt12experimental16coroutine_traitsIJv34promise_matching_placement_new_tagifdEE12promise_typenwEmS1_ifd(i64 %[[SIZE]], i32 %[[INT]], float %[[FLOAT]], double %[[DOUBLE]]) + co_return; +} + +// Declare a placement form operator new, such as the one described in +// C++ 18.6.1.3.1, which takes a void* argument. +void *operator new(SizeT __sz, void *__p) noexcept; + +struct promise_matching_global_placement_new_tag {}; +struct dummy {}; +template <> +struct std::experimental::coroutine_traits { + struct promise_type { + void get_return_object() {} + suspend_always initial_suspend() { return {}; } + suspend_always final_suspend() noexcept { return {}; } + void return_void() {} + }; +}; + +// A coroutine that takes a single pointer argument should not invoke this +// placement form operator. [dcl.fct.def.coroutine]/7 dictates that lookup for +// allocation functions matching the coroutine function's signature be done +// within the scope of the promise type's class. +// CHECK-LABEL: f1b( +extern "C" void f1b(promise_matching_global_placement_new_tag, dummy *) { + // CHECK: call noalias nonnull i8* @_Znwm(i64 + co_return; +} + +struct promise_delete_tag {}; + +template <> +struct std::experimental::coroutine_traits { + struct promise_type { + void operator delete(void *); + void get_return_object() {} + suspend_always initial_suspend() { return {}; } + suspend_always final_suspend() noexcept { return {}; } + void return_void() {} + }; +}; + +// CHECK-LABEL: f2( +extern "C" void f2(promise_delete_tag) { + // CHECK: %[[ID:.+]] = call token @llvm.coro.id(i32 16 + // CHECK: %[[SIZE:.+]] = call i64 @llvm.coro.size.i64() + // CHECK: call noalias nonnull i8* @_Znwm(i64 %[[SIZE]]) + + // CHECK: %[[FRAME:.+]] = call i8* @llvm.coro.begin( + // CHECK: %[[MEM:.+]] = call i8* @llvm.coro.free(token %[[ID]], i8* %[[FRAME]]) + // CHECK: call void @_ZNSt12experimental16coroutine_traitsIJv18promise_delete_tagEE12promise_typedlEPv(i8* %[[MEM]]) + co_return; +} + +struct promise_sized_delete_tag {}; + +template <> +struct std::experimental::coroutine_traits { + struct promise_type { + void operator delete(void *, unsigned long); + void get_return_object() {} + suspend_always initial_suspend() { return {}; } + suspend_always final_suspend() noexcept { return {}; } + void return_void() {} + }; +}; + +// CHECK-LABEL: f3( +extern "C" void f3(promise_sized_delete_tag) { + // CHECK: %[[ID:.+]] = call token @llvm.coro.id(i32 16 + // CHECK: %[[SIZE:.+]] = call i64 @llvm.coro.size.i64() + // CHECK: call noalias nonnull i8* @_Znwm(i64 %[[SIZE]]) + + // CHECK: %[[FRAME:.+]] = call i8* @llvm.coro.begin( + // CHECK: %[[MEM:.+]] = call i8* @llvm.coro.free(token %[[ID]], i8* %[[FRAME]]) + // CHECK: %[[SIZE2:.+]] = call i64 @llvm.coro.size.i64() + // CHECK: call void @_ZNSt12experimental16coroutine_traitsIJv24promise_sized_delete_tagEE12promise_typedlEPvm(i8* %[[MEM]], i64 %[[SIZE2]]) + co_return; +} + +struct promise_on_alloc_failure_tag {}; + +template <> +struct std::experimental::coroutine_traits { + struct promise_type { + int get_return_object() { return 0; } + suspend_always initial_suspend() { return {}; } + suspend_always final_suspend() noexcept { return {}; } + void return_void() {} + static int get_return_object_on_allocation_failure() { return -1; } + }; +}; + +// CHECK-LABEL: f4( +extern "C" int f4(promise_on_alloc_failure_tag) { + // CHECK: %[[RetVal:.+]] = alloca i32 + // CHECK: %[[Gro:.+]] = alloca i32 + // CHECK: %[[ID:.+]] = call token @llvm.coro.id(i32 16 + // CHECK: %[[SIZE:.+]] = call i64 @llvm.coro.size.i64() + // CHECK: %[[MEM:.+]] = call noalias i8* @_ZnwmRKSt9nothrow_t(i64 %[[SIZE]], %"struct.std::nothrow_t"* nonnull align 1 dereferenceable(1) @_ZStL7nothrow) + // CHECK: %[[OK:.+]] = icmp ne i8* %[[MEM]], null + // CHECK: br i1 %[[OK]], label %[[OKBB:.+]], label %[[ERRBB:.+]] + + // CHECK: [[ERRBB]]: + // CHECK: %[[FailRet:.+]] = call i32 @_ZNSt12experimental16coroutine_traitsIJi28promise_on_alloc_failure_tagEE12promise_type39get_return_object_on_allocation_failureEv( + // CHECK: store i32 %[[FailRet]], i32* %[[RetVal]] + // CHECK: br label %[[RetBB:.+]] + + // CHECK: [[OKBB]]: + // CHECK: %[[OkRet:.+]] = call i32 @_ZNSt12experimental16coroutine_traitsIJi28promise_on_alloc_failure_tagEE12promise_type17get_return_objectEv( + // CHECK: store i32 %[[OkRet]], i32* %[[Gro]] + + // CHECK: %[[Tmp1:.*]] = load i32, i32* %[[Gro]] + // CHECK-NEXT: store i32 %[[Tmp1]], i32* %[[RetVal]] + // CHECK-NEXT: %[[Gro_CAST:.+]] = bitcast i32* %[[Gro]] to i8* + // CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* %[[Gro_CAST]]) #2 + // CHECK-NEXT: br label %[[RetBB]] + + // CHECK: [[RetBB]]: + // CHECK: %[[LoadRet:.+]] = load i32, i32* %[[RetVal]], align 4 + // CHECK: ret i32 %[[LoadRet]] + co_return; +} diff --git a/clang/test/CodeGenCoroutines/coro-alloc.cpp b/clang/test/CodeGenCoroutines/coro-alloc.cpp index c60ca5a83d484..b1a2e290b4fa3 100644 --- a/clang/test/CodeGenCoroutines/coro-alloc.cpp +++ b/clang/test/CodeGenCoroutines/coro-alloc.cpp @@ -1,9 +1,8 @@ -// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fcoroutines-ts -std=c++14 \ +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -std=c++20 \ // RUN: -Wno-coroutine-missing-unhandled-exception -emit-llvm %s -o - -disable-llvm-passes \ // RUN: | FileCheck %s namespace std { -namespace experimental { template struct coroutine_traits; // expected-note {{declared here}} @@ -21,8 +20,6 @@ struct coroutine_handle { coroutine_handle(coroutine_handle) noexcept {} }; -} // end namespace experimental - struct nothrow_t {}; constexpr nothrow_t nothrow = {}; @@ -37,14 +34,14 @@ void operator delete(void* __p, const std::nothrow_t&) noexcept; struct suspend_always { bool await_ready() noexcept { return false; } - void await_suspend(std::experimental::coroutine_handle<>) noexcept {} + void await_suspend(std::coroutine_handle<>) noexcept {} void await_resume() noexcept {} }; struct global_new_delete_tag {}; -template<> -struct std::experimental::coroutine_traits { +template <> +struct std::coroutine_traits { struct promise_type { void get_return_object() {} suspend_always initial_suspend() { return {}; } @@ -83,8 +80,8 @@ extern "C" void f0(global_new_delete_tag) { struct promise_new_tag {}; -template<> -struct std::experimental::coroutine_traits { +template <> +struct std::coroutine_traits { struct promise_type { void *operator new(unsigned long); void get_return_object() {} @@ -98,7 +95,7 @@ struct std::experimental::coroutine_traits { extern "C" void f1(promise_new_tag ) { // CHECK: %[[ID:.+]] = call token @llvm.coro.id(i32 16 // CHECK: %[[SIZE:.+]] = call i64 @llvm.coro.size.i64() - // CHECK: call i8* @_ZNSt12experimental16coroutine_traitsIJv15promise_new_tagEE12promise_typenwEm(i64 %[[SIZE]]) + // CHECK: call i8* @_ZNSt16coroutine_traitsIJv15promise_new_tagEE12promise_typenwEm(i64 %[[SIZE]]) // CHECK: %[[FRAME:.+]] = call i8* @llvm.coro.begin( // CHECK: %[[MEM:.+]] = call i8* @llvm.coro.free(token %[[ID]], i8* %[[FRAME]]) @@ -108,8 +105,8 @@ extern "C" void f1(promise_new_tag ) { struct promise_matching_placement_new_tag {}; -template<> -struct std::experimental::coroutine_traits { +template <> +struct std::coroutine_traits { struct promise_type { void *operator new(unsigned long, promise_matching_placement_new_tag, int, float, double); @@ -130,7 +127,7 @@ extern "C" void f1a(promise_matching_placement_new_tag, int x, float y , double // CHECK: %[[INT:.+]] = load i32, i32* %x.addr, align 4 // CHECK: %[[FLOAT:.+]] = load float, float* %y.addr, align 4 // CHECK: %[[DOUBLE:.+]] = load double, double* %z.addr, align 8 - // CHECK: call i8* @_ZNSt12experimental16coroutine_traitsIJv34promise_matching_placement_new_tagifdEE12promise_typenwEmS1_ifd(i64 %[[SIZE]], i32 %[[INT]], float %[[FLOAT]], double %[[DOUBLE]]) + // CHECK: call i8* @_ZNSt16coroutine_traitsIJv34promise_matching_placement_new_tagifdEE12promise_typenwEmS0_ifd(i64 %[[SIZE]], i32 %[[INT]], float %[[FLOAT]], double %[[DOUBLE]]) co_return; } @@ -140,8 +137,8 @@ void* operator new(SizeT __sz, void *__p) noexcept; struct promise_matching_global_placement_new_tag {}; struct dummy {}; -template<> -struct std::experimental::coroutine_traits { +template <> +struct std::coroutine_traits { struct promise_type { void get_return_object() {} suspend_always initial_suspend() { return {}; } @@ -162,8 +159,8 @@ extern "C" void f1b(promise_matching_global_placement_new_tag, dummy *) { struct promise_delete_tag {}; -template<> -struct std::experimental::coroutine_traits { +template <> +struct std::coroutine_traits { struct promise_type { void operator delete(void*); void get_return_object() {} @@ -181,14 +178,14 @@ extern "C" void f2(promise_delete_tag) { // CHECK: %[[FRAME:.+]] = call i8* @llvm.coro.begin( // CHECK: %[[MEM:.+]] = call i8* @llvm.coro.free(token %[[ID]], i8* %[[FRAME]]) - // CHECK: call void @_ZNSt12experimental16coroutine_traitsIJv18promise_delete_tagEE12promise_typedlEPv(i8* %[[MEM]]) + // CHECK: call void @_ZNSt16coroutine_traitsIJv18promise_delete_tagEE12promise_typedlEPv(i8* %[[MEM]]) co_return; } struct promise_sized_delete_tag {}; -template<> -struct std::experimental::coroutine_traits { +template <> +struct std::coroutine_traits { struct promise_type { void operator delete(void*, unsigned long); void get_return_object() {} @@ -207,14 +204,14 @@ extern "C" void f3(promise_sized_delete_tag) { // CHECK: %[[FRAME:.+]] = call i8* @llvm.coro.begin( // CHECK: %[[MEM:.+]] = call i8* @llvm.coro.free(token %[[ID]], i8* %[[FRAME]]) // CHECK: %[[SIZE2:.+]] = call i64 @llvm.coro.size.i64() - // CHECK: call void @_ZNSt12experimental16coroutine_traitsIJv24promise_sized_delete_tagEE12promise_typedlEPvm(i8* %[[MEM]], i64 %[[SIZE2]]) + // CHECK: call void @_ZNSt16coroutine_traitsIJv24promise_sized_delete_tagEE12promise_typedlEPvm(i8* %[[MEM]], i64 %[[SIZE2]]) co_return; } struct promise_on_alloc_failure_tag {}; -template<> -struct std::experimental::coroutine_traits { +template <> +struct std::coroutine_traits { struct promise_type { int get_return_object() { return 0; } suspend_always initial_suspend() { return {}; } @@ -235,12 +232,12 @@ extern "C" int f4(promise_on_alloc_failure_tag) { // CHECK: br i1 %[[OK]], label %[[OKBB:.+]], label %[[ERRBB:.+]] // CHECK: [[ERRBB]]: - // CHECK: %[[FailRet:.+]] = call i32 @_ZNSt12experimental16coroutine_traitsIJi28promise_on_alloc_failure_tagEE12promise_type39get_return_object_on_allocation_failureEv( + // CHECK: %[[FailRet:.+]] = call i32 @_ZNSt16coroutine_traitsIJi28promise_on_alloc_failure_tagEE12promise_type39get_return_object_on_allocation_failureEv( // CHECK: store i32 %[[FailRet]], i32* %[[RetVal]] // CHECK: br label %[[RetBB:.+]] // CHECK: [[OKBB]]: - // CHECK: %[[OkRet:.+]] = call i32 @_ZNSt12experimental16coroutine_traitsIJi28promise_on_alloc_failure_tagEE12promise_type17get_return_objectEv( + // CHECK: %[[OkRet:.+]] = call i32 @_ZNSt16coroutine_traitsIJi28promise_on_alloc_failure_tagEE12promise_type17get_return_objectEv( // CHECK: store i32 %[[OkRet]], i32* %[[Gro]] // CHECK: %[[Tmp1:.*]] = load i32, i32* %[[Gro]] diff --git a/clang/test/CodeGenCoroutines/coro-always-inline-exp-namespace.cpp b/clang/test/CodeGenCoroutines/coro-always-inline-exp-namespace.cpp new file mode 100644 index 0000000000000..ef7183b9642d5 --- /dev/null +++ b/clang/test/CodeGenCoroutines/coro-always-inline-exp-namespace.cpp @@ -0,0 +1,52 @@ +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm -fcoroutines-ts \ +// RUN: -fexperimental-new-pass-manager -O0 %s -o - | FileCheck %s +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm -fcoroutines-ts \ +// RUN: -fexperimental-new-pass-manager -fno-inline -O0 %s -o - | FileCheck %s + +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm -fcoroutines-ts \ +// RUN: -O0 %s -o - | FileCheck %s +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm -fcoroutines-ts \ +// RUN: -fno-inline -O0 %s -o - | FileCheck %s + +namespace std { +namespace experimental { + +struct handle {}; + +struct awaitable { + bool await_ready() noexcept { return true; } + // CHECK-NOT: await_suspend + inline void __attribute__((__always_inline__)) await_suspend(handle) noexcept {} + bool await_resume() noexcept { return true; } +}; + +template +struct coroutine_handle { + static handle from_address(void *address) noexcept { return {}; } +}; + +template +struct coroutine_traits { + struct promise_type { + awaitable initial_suspend() { return {}; } + awaitable final_suspend() noexcept { return {}; } + void return_void() {} + T get_return_object() { return T(); } + void unhandled_exception() {} + }; +}; +} // namespace experimental +} // namespace std + +// CHECK-LABEL: @_Z3foov +// CHECK-LABEL: entry: +// CHECK: [[CAST0:%[0-9]+]] = bitcast %"struct.std::experimental::awaitable"* %ref.tmp{{.*}} to i8* +// CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 1, i8* [[CAST0]]) +// CHECK: [[CAST1:%[0-9]+]] = bitcast %"struct.std::experimental::awaitable"* %ref.tmp{{.*}} to i8* +// CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 1, i8* [[CAST1]]) + +// CHECK: [[CAST2:%[0-9]+]] = bitcast %"struct.std::experimental::awaitable"* %ref.tmp{{.*}} to i8* +// CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 1, i8* [[CAST2]]) +// CHECK: [[CAST3:%[0-9]+]] = bitcast %"struct.std::experimental::awaitable"* %ref.tmp{{.*}} to i8* +// CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 1, i8* [[CAST3]]) +void foo() { co_return; } diff --git a/clang/test/CodeGenCoroutines/coro-always-inline.cpp b/clang/test/CodeGenCoroutines/coro-always-inline.cpp index ef7183b9642d5..39a82960e8db9 100644 --- a/clang/test/CodeGenCoroutines/coro-always-inline.cpp +++ b/clang/test/CodeGenCoroutines/coro-always-inline.cpp @@ -1,15 +1,14 @@ -// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm -fcoroutines-ts \ +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm -std=c++20 \ // RUN: -fexperimental-new-pass-manager -O0 %s -o - | FileCheck %s -// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm -fcoroutines-ts \ +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm -std=c++20 \ // RUN: -fexperimental-new-pass-manager -fno-inline -O0 %s -o - | FileCheck %s -// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm -fcoroutines-ts \ +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm -std=c++20 \ // RUN: -O0 %s -o - | FileCheck %s -// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm -fcoroutines-ts \ +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm -std=c++20 \ // RUN: -fno-inline -O0 %s -o - | FileCheck %s namespace std { -namespace experimental { struct handle {}; @@ -35,18 +34,17 @@ struct coroutine_traits { void unhandled_exception() {} }; }; -} // namespace experimental } // namespace std // CHECK-LABEL: @_Z3foov // CHECK-LABEL: entry: -// CHECK: [[CAST0:%[0-9]+]] = bitcast %"struct.std::experimental::awaitable"* %ref.tmp{{.*}} to i8* +// CHECK: [[CAST0:%[0-9]+]] = bitcast %"struct.std::awaitable"* %ref.tmp{{.*}} to i8* // CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 1, i8* [[CAST0]]) -// CHECK: [[CAST1:%[0-9]+]] = bitcast %"struct.std::experimental::awaitable"* %ref.tmp{{.*}} to i8* +// CHECK: [[CAST1:%[0-9]+]] = bitcast %"struct.std::awaitable"* %ref.tmp{{.*}} to i8* // CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 1, i8* [[CAST1]]) -// CHECK: [[CAST2:%[0-9]+]] = bitcast %"struct.std::experimental::awaitable"* %ref.tmp{{.*}} to i8* +// CHECK: [[CAST2:%[0-9]+]] = bitcast %"struct.std::awaitable"* %ref.tmp{{.*}} to i8* // CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 1, i8* [[CAST2]]) -// CHECK: [[CAST3:%[0-9]+]] = bitcast %"struct.std::experimental::awaitable"* %ref.tmp{{.*}} to i8* +// CHECK: [[CAST3:%[0-9]+]] = bitcast %"struct.std::awaitable"* %ref.tmp{{.*}} to i8* // CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 1, i8* [[CAST3]]) void foo() { co_return; } diff --git a/clang/test/CodeGenCoroutines/coro-await-domination-exp-namespace.cpp b/clang/test/CodeGenCoroutines/coro-await-domination-exp-namespace.cpp new file mode 100644 index 0000000000000..008867eb4bcf1 --- /dev/null +++ b/clang/test/CodeGenCoroutines/coro-await-domination-exp-namespace.cpp @@ -0,0 +1,36 @@ +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fcoroutines-ts -std=c++14 -emit-llvm %s -o - | FileCheck %s +#include "Inputs/coroutine-exp-namespace.h" + +using namespace std::experimental; + +struct coro { + struct promise_type { + coro get_return_object(); + suspend_never initial_suspend(); + suspend_never final_suspend() noexcept; + void return_void(); + static void unhandled_exception(); + }; +}; + +struct A { + ~A(); + bool await_ready(); + int await_resume() { return 8; } + template void await_suspend(F); +}; + +extern "C" void consume(int); + +// Verifies that domination is properly built during cleanup. +// Without CGCleanup.cpp fix verifier was reporting: +// Instruction does not dominate all uses! +// %tmp.exprcleanup = alloca i32*, align 8 +// store i32* %x, i32** %tmp.exprcleanup, align 8 + +// CHECK-LABEL: f( +extern "C" coro f(int) { + int x = 42; + x = co_await A{}; + consume(x); +} diff --git a/clang/test/CodeGenCoroutines/coro-await-domination.cpp b/clang/test/CodeGenCoroutines/coro-await-domination.cpp index 3ce8cd7a6227f..61082170fc5a6 100644 --- a/clang/test/CodeGenCoroutines/coro-await-domination.cpp +++ b/clang/test/CodeGenCoroutines/coro-await-domination.cpp @@ -1,7 +1,7 @@ -// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fcoroutines-ts -std=c++14 -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -std=c++20 -emit-llvm %s -o - | FileCheck %s #include "Inputs/coroutine.h" -using namespace std::experimental; +using namespace std; struct coro { struct promise_type { diff --git a/clang/test/CodeGenCoroutines/coro-await-exp-namespace.cpp b/clang/test/CodeGenCoroutines/coro-await-exp-namespace.cpp new file mode 100644 index 0000000000000..c7a8c7eafc032 --- /dev/null +++ b/clang/test/CodeGenCoroutines/coro-await-exp-namespace.cpp @@ -0,0 +1,355 @@ +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fcoroutines-ts -std=c++14 \ +// RUN: -emit-llvm %s -o - -disable-llvm-passes -Wno-coroutine -Wno-unused | FileCheck %s + +namespace std { +namespace experimental { +template +struct coroutine_traits; + +template struct coroutine_handle; + +template <> +struct coroutine_handle { + void *ptr; + static coroutine_handle from_address(void *); + void *address(); +}; + +template +struct coroutine_handle : coroutine_handle<> { + static coroutine_handle from_address(void *) noexcept; +}; + +} // namespace experimental +} // namespace std + +struct init_susp { + bool await_ready(); + void await_suspend(std::experimental::coroutine_handle<>); + void await_resume(); +}; +struct final_susp { + bool await_ready() noexcept; + void await_suspend(std::experimental::coroutine_handle<>) noexcept; + void await_resume() noexcept; +}; + +struct suspend_always { + int stuff; + bool await_ready(); + void await_suspend(std::experimental::coroutine_handle<>); + void await_resume(); +}; + +template <> +struct std::experimental::coroutine_traits { + struct promise_type { + void get_return_object(); + init_susp initial_suspend(); + final_susp final_suspend() noexcept; + void return_void(); + }; +}; + +// CHECK-LABEL: f0( +extern "C" void f0() { + // CHECK: %[[FRAME:.+]] = call i8* @llvm.coro.begin( + + // See if initial_suspend was issued: + // ---------------------------------- + // CHECK: call void @_ZNSt12experimental16coroutine_traitsIJvEE12promise_type15initial_suspendEv( + // CHECK-NEXT: call zeroext i1 @_ZN9init_susp11await_readyEv(%struct.init_susp* + // CHECK: %[[INITSP_ID:.+]] = call token @llvm.coro.save( + // CHECK: call i8 @llvm.coro.suspend(token %[[INITSP_ID]], i1 false) + + co_await suspend_always{}; + // See if we need to suspend: + // -------------------------- + // CHECK: %[[READY:.+]] = call zeroext i1 @_ZN14suspend_always11await_readyEv(%struct.suspend_always* {{[^,]*}} %[[AWAITABLE:.+]]) + // CHECK: br i1 %[[READY]], label %[[READY_BB:.+]], label %[[SUSPEND_BB:.+]] + + // If we are suspending: + // --------------------- + // CHECK: [[SUSPEND_BB]]: + // CHECK: %[[SUSPEND_ID:.+]] = call token @llvm.coro.save( + // --------------------------- + // Build the coroutine handle and pass it to await_suspend + // --------------------------- + // CHECK: call i8* @_ZNSt12experimental16coroutine_handleINS_16coroutine_traitsIJvEE12promise_typeEE12from_addressEPv(i8* %[[FRAME]]) + // ... many lines of code to coerce coroutine_handle into an i8* scalar + // CHECK: %[[CH:.+]] = load i8*, i8** %{{.+}} + // CHECK: call void @_ZN14suspend_always13await_suspendENSt12experimental16coroutine_handleIvEE(%struct.suspend_always* {{[^,]*}} %[[AWAITABLE]], i8* %[[CH]]) + // ------------------------- + // Generate a suspend point: + // ------------------------- + // CHECK: %[[OUTCOME:.+]] = call i8 @llvm.coro.suspend(token %[[SUSPEND_ID]], i1 false) + // CHECK: switch i8 %[[OUTCOME]], label %[[RET_BB:.+]] [ + // CHECK: i8 0, label %[[READY_BB]] + // CHECK: i8 1, label %[[CLEANUP_BB:.+]] + // CHECK: ] + + // Cleanup code goes here: + // ----------------------- + // CHECK: [[CLEANUP_BB]]: + + // When coroutine is resumed, call await_resume + // -------------------------- + // CHECK: [[READY_BB]]: + // CHECK: call void @_ZN14suspend_always12await_resumeEv(%struct.suspend_always* {{[^,]*}} %[[AWAITABLE]]) + + // See if final_suspend was issued: + // ---------------------------------- + // CHECK: call void @_ZNSt12experimental16coroutine_traitsIJvEE12promise_type13final_suspendEv( + // CHECK-NEXT: call zeroext i1 @_ZN10final_susp11await_readyEv(%struct.final_susp* + // CHECK: %[[FINALSP_ID:.+]] = call token @llvm.coro.save( + // CHECK: call i8 @llvm.coro.suspend(token %[[FINALSP_ID]], i1 true) +} + +struct suspend_maybe { + float stuff; + ~suspend_maybe(); + bool await_ready(); + bool await_suspend(std::experimental::coroutine_handle<>); + void await_resume(); +}; + +template <> +struct std::experimental::coroutine_traits { + struct promise_type { + void get_return_object(); + init_susp initial_suspend(); + final_susp final_suspend() noexcept; + void return_void(); + suspend_maybe yield_value(int); + }; +}; + +// CHECK-LABEL: f1( +extern "C" void f1(int) { + // CHECK: %[[PROMISE:.+]] = alloca %"struct.std::experimental::coroutine_traits::promise_type" + // CHECK: %[[FRAME:.+]] = call i8* @llvm.coro.begin( + co_yield 42; + // CHECK: call void @_ZNSt12experimental16coroutine_traitsIJviEE12promise_type11yield_valueEi(%struct.suspend_maybe* sret(%struct.suspend_maybe) align 4 %[[AWAITER:.+]], %"struct.std::experimental::coroutine_traits::promise_type"* {{[^,]*}} %[[PROMISE]], i32 42) + + // See if we need to suspend: + // -------------------------- + // CHECK: %[[READY:.+]] = call zeroext i1 @_ZN13suspend_maybe11await_readyEv(%struct.suspend_maybe* {{[^,]*}} %[[AWAITABLE]]) + // CHECK: br i1 %[[READY]], label %[[READY_BB:.+]], label %[[SUSPEND_BB:.+]] + + // If we are suspending: + // --------------------- + // CHECK: [[SUSPEND_BB]]: + // CHECK: %[[SUSPEND_ID:.+]] = call token @llvm.coro.save( + // --------------------------- + // Build the coroutine handle and pass it to await_suspend + // --------------------------- + // CHECK: call i8* @_ZNSt12experimental16coroutine_handleINS_16coroutine_traitsIJviEE12promise_typeEE12from_addressEPv(i8* %[[FRAME]]) + // ... many lines of code to coerce coroutine_handle into an i8* scalar + // CHECK: %[[CH:.+]] = load i8*, i8** %{{.+}} + // CHECK: %[[YES:.+]] = call zeroext i1 @_ZN13suspend_maybe13await_suspendENSt12experimental16coroutine_handleIvEE(%struct.suspend_maybe* {{[^,]*}} %[[AWAITABLE]], i8* %[[CH]]) + // ------------------------------------------- + // See if await_suspend decided not to suspend + // ------------------------------------------- + // CHECK: br i1 %[[YES]], label %[[SUSPEND_PLEASE:.+]], label %[[READY_BB]] + + // CHECK: [[SUSPEND_PLEASE]]: + // CHECK: call i8 @llvm.coro.suspend(token %[[SUSPEND_ID]], i1 false) + + // CHECK: [[READY_BB]]: + // CHECK: call void @_ZN13suspend_maybe12await_resumeEv(%struct.suspend_maybe* {{[^,]*}} %[[AWAITABLE]]) +} + +struct ComplexAwaiter { + template void await_suspend(F); + bool await_ready(); + _Complex float await_resume(); +}; +extern "C" void UseComplex(_Complex float); + +// CHECK-LABEL: @TestComplex( +extern "C" void TestComplex() { + UseComplex(co_await ComplexAwaiter{}); + // CHECK: call <2 x float> @_ZN14ComplexAwaiter12await_resumeEv(%struct.ComplexAwaiter* + // CHECK: call void @UseComplex(<2 x float> %{{.+}}) + + co_await ComplexAwaiter{}; + // CHECK: call <2 x float> @_ZN14ComplexAwaiter12await_resumeEv(%struct.ComplexAwaiter* + + _Complex float Val = co_await ComplexAwaiter{}; + // CHECK: call <2 x float> @_ZN14ComplexAwaiter12await_resumeEv(%struct.ComplexAwaiter* +} + +struct Aggr { + int X, Y, Z; + ~Aggr(); +}; +struct AggrAwaiter { + template void await_suspend(F); + bool await_ready(); + Aggr await_resume(); +}; + +extern "C" void Whatever(); +extern "C" void UseAggr(Aggr &&); + +// FIXME: Once the cleanup code is in, add testing that destructors for Aggr +// are invoked properly on the cleanup branches. + +// CHECK-LABEL: @TestAggr( +extern "C" void TestAggr() { + UseAggr(co_await AggrAwaiter{}); + Whatever(); + // CHECK: call void @_ZN11AggrAwaiter12await_resumeEv(%struct.Aggr* sret(%struct.Aggr) align 4 %[[AwaitResume:.+]], + // CHECK: call void @UseAggr(%struct.Aggr* nonnull align 4 dereferenceable(12) %[[AwaitResume]]) + // CHECK: call void @_ZN4AggrD1Ev(%struct.Aggr* {{[^,]*}} %[[AwaitResume]]) + // CHECK: call void @Whatever() + + co_await AggrAwaiter{}; + Whatever(); + // CHECK: call void @_ZN11AggrAwaiter12await_resumeEv(%struct.Aggr* sret(%struct.Aggr) align 4 %[[AwaitResume2:.+]], + // CHECK: call void @_ZN4AggrD1Ev(%struct.Aggr* {{[^,]*}} %[[AwaitResume2]]) + // CHECK: call void @Whatever() + + Aggr Val = co_await AggrAwaiter{}; + Whatever(); + // CHECK: call void @_ZN11AggrAwaiter12await_resumeEv(%struct.Aggr* sret(%struct.Aggr) align 4 %[[AwaitResume3:.+]], + // CHECK: call void @Whatever() + // CHECK: call void @_ZN4AggrD1Ev(%struct.Aggr* {{[^,]*}} %[[AwaitResume3]]) +} + +struct ScalarAwaiter { + template void await_suspend(F); + bool await_ready(); + int await_resume(); +}; + +extern "C" void UseScalar(int); + +// CHECK-LABEL: @TestScalar( +extern "C" void TestScalar() { + UseScalar(co_await ScalarAwaiter{}); + // CHECK: %[[Result:.+]] = call i32 @_ZN13ScalarAwaiter12await_resumeEv(%struct.ScalarAwaiter* + // CHECK: call void @UseScalar(i32 %[[Result]]) + + int Val = co_await ScalarAwaiter{}; + // CHECK: %[[Result2:.+]] = call i32 @_ZN13ScalarAwaiter12await_resumeEv(%struct.ScalarAwaiter* + // CHECK: store i32 %[[Result2]], i32* %[[TMP_EXPRCLEANUP:.+]], + // CHECK: %[[TMP:.+]] = load i32, i32* %[[TMP_EXPRCLEANUP]], + // CHECK: store i32 %[[TMP]], i32* %Val, + + co_await ScalarAwaiter{}; + // CHECK: call i32 @_ZN13ScalarAwaiter12await_resumeEv(%struct.ScalarAwaiter* +} + +// Test operator co_await codegen. +enum class MyInt : int {}; +ScalarAwaiter operator co_await(MyInt); + +struct MyAgg { + AggrAwaiter operator co_await(); +}; + +// CHECK-LABEL: @TestOpAwait( +extern "C" void TestOpAwait() { + co_await MyInt(42); + // CHECK: call void @_Zaw5MyInt(i32 42) + // CHECK: call i32 @_ZN13ScalarAwaiter12await_resumeEv(%struct.ScalarAwaiter* {{[^,]*}} % + + co_await MyAgg{}; + // CHECK: call void @_ZN5MyAggawEv(%struct.MyAgg* {{[^,]*}} % + // CHECK: call void @_ZN11AggrAwaiter12await_resumeEv(%struct.Aggr* sret(%struct.Aggr) align 4 % +} + +// CHECK-LABEL: EndlessLoop( +extern "C" void EndlessLoop() { + // CHECK: %[[FRAME:.+]] = call i8* @llvm.coro.begin( + + // See if initial_suspend was issued: + // ---------------------------------- + // CHECK: call void @_ZNSt12experimental16coroutine_traitsIJvEE12promise_type15initial_suspendEv( + // CHECK-NEXT: call zeroext i1 @_ZN9init_susp11await_readyEv(%struct.init_susp* + + for (;;) + co_await suspend_always{}; + + // Verify that final_suspend was NOT issued: + // ---------------------------------- + // CHECK-NOT: call void @_ZNSt12experimental16coroutine_traitsIJvEE12promise_type13final_suspendEv( + // CHECK-NOT: call zeroext i1 @_ZN10final_susp11await_readyEv(%struct.final_susp* +} + +// Verifies that we don't crash when awaiting on an lvalue. +// CHECK-LABEL: @_Z11AwaitLValuev( +void AwaitLValue() { + suspend_always lval; + co_await lval; +} + +struct RefTag {}; + +struct AwaitResumeReturnsLValue { + bool await_ready(); + void await_suspend(std::experimental::coroutine_handle<>); + RefTag &await_resume(); +}; + +template <> +struct std::experimental::coroutine_traits { + struct promise_type { + void get_return_object(); + init_susp initial_suspend(); + final_susp final_suspend() noexcept; + void return_void(); + AwaitResumeReturnsLValue yield_value(int); + }; +}; + +// Verifies that we don't crash when returning an lvalue from an await_resume() +// expression. +// CHECK-LABEL: define{{.*}} void @_Z18AwaitReturnsLValued(double %0) +void AwaitReturnsLValue(double) { + AwaitResumeReturnsLValue a; + // CHECK: %[[AVAR:.+]] = alloca %struct.AwaitResumeReturnsLValue, + // CHECK: %[[XVAR:.+]] = alloca %struct.RefTag*, + + // CHECK: %[[YVAR:.+]] = alloca %struct.RefTag*, + // CHECK-NEXT: %[[TMP1:.+]] = alloca %struct.AwaitResumeReturnsLValue, + + // CHECK: %[[TMP_EXPRCLEANUP1:.+]] = alloca %struct.RefTag*, + // CHECK: %[[ZVAR:.+]] = alloca %struct.RefTag*, + // CHECK-NEXT: %[[TMP2:.+]] = alloca %struct.AwaitResumeReturnsLValue, + // CHECK: %[[TMP_EXPRCLEANUP2:.+]] = alloca %struct.RefTag*, + + // CHECK: %[[RES1:.+]] = call nonnull align 1 dereferenceable({{.*}}) %struct.RefTag* @_ZN24AwaitResumeReturnsLValue12await_resumeEv(%struct.AwaitResumeReturnsLValue* {{[^,]*}} %[[AVAR]]) + // CHECK-NEXT: store %struct.RefTag* %[[RES1]], %struct.RefTag** %[[XVAR]], + RefTag &x = co_await a; + + // CHECK: %[[RES2:.+]] = call nonnull align 1 dereferenceable({{.*}}) %struct.RefTag* @_ZN24AwaitResumeReturnsLValue12await_resumeEv(%struct.AwaitResumeReturnsLValue* {{[^,]*}} %[[TMP1]]) + // CHECK-NEXT: store %struct.RefTag* %[[RES2]], %struct.RefTag** %[[TMP_EXPRCLEANUP1]], + // CHECK: %[[LOAD_TMP1:.+]] = load %struct.RefTag*, %struct.RefTag** %[[TMP_EXPRCLEANUP1]], + // CHECK: store %struct.RefTag* %[[LOAD_TMP1]], %struct.RefTag** %[[YVAR]], + + RefTag &y = co_await AwaitResumeReturnsLValue{}; + // CHECK: %[[RES3:.+]] = call nonnull align 1 dereferenceable({{.*}}) %struct.RefTag* @_ZN24AwaitResumeReturnsLValue12await_resumeEv(%struct.AwaitResumeReturnsLValue* {{[^,]*}} %[[TMP2]]) + // CHECK-NEXT: store %struct.RefTag* %[[RES3]], %struct.RefTag** %[[TMP_EXPRCLEANUP2]], + // CHECK: %[[LOAD_TMP2:.+]] = load %struct.RefTag*, %struct.RefTag** %[[TMP_EXPRCLEANUP2]], + // CHECK: store %struct.RefTag* %[[LOAD_TMP2]], %struct.RefTag** %[[ZVAR]], + RefTag &z = co_yield 42; +} + +struct TailCallAwait { + bool await_ready(); + std::experimental::coroutine_handle<> await_suspend(std::experimental::coroutine_handle<>); + void await_resume(); +}; + +// CHECK-LABEL: @TestTailcall( +extern "C" void TestTailcall() { + co_await TailCallAwait{}; + + // CHECK: %[[RESULT:.+]] = call i8* @_ZN13TailCallAwait13await_suspendENSt12experimental16coroutine_handleIvEE(%struct.TailCallAwait* + // CHECK: %[[COERCE:.+]] = getelementptr inbounds %"struct.std::experimental::coroutine_handle", %"struct.std::experimental::coroutine_handle"* %[[TMP:.+]], i32 0, i32 0 + // CHECK: store i8* %[[RESULT]], i8** %[[COERCE]] + // CHECK: %[[ADDR:.+]] = call i8* @_ZNSt12experimental16coroutine_handleIvE7addressEv(%"struct.std::experimental::coroutine_handle"* {{[^,]*}} %[[TMP]]) + // CHECK: call void @llvm.coro.resume(i8* %[[ADDR]]) +} diff --git a/clang/test/CodeGenCoroutines/coro-await-resume-eh-exp-namespace.cpp b/clang/test/CodeGenCoroutines/coro-await-resume-eh-exp-namespace.cpp new file mode 100644 index 0000000000000..87c021ff2d056 --- /dev/null +++ b/clang/test/CodeGenCoroutines/coro-await-resume-eh-exp-namespace.cpp @@ -0,0 +1,114 @@ +// Test the behavior of http://wg21.link/P0664, a proposal to catch any +// exceptions thrown after the initial suspend point of a coroutine by +// executing the handler specified by the promise type's 'unhandled_exception' +// member function. +// +// RUN: %clang_cc1 -std=c++14 -fcoroutines-ts \ +// RUN: -triple=x86_64-unknown-linux-gnu -emit-llvm -o - %s \ +// RUN: -fexceptions -fcxx-exceptions -disable-llvm-passes \ +// RUN: | FileCheck %s + +#include "Inputs/coroutine-exp-namespace.h" + +namespace coro = std::experimental::coroutines_v1; + +struct throwing_awaitable { + bool await_ready() { return true; } + void await_suspend(coro::coroutine_handle<>) {} + void await_resume() { throw 42; } +}; + +struct throwing_task { + struct promise_type { + auto get_return_object() { return throwing_task{}; } + auto initial_suspend() { return throwing_awaitable{}; } + auto final_suspend() noexcept { return coro::suspend_never{}; } + void return_void() {} + void unhandled_exception() {} + }; +}; + +// CHECK-LABEL: define{{.*}} void @_Z1fv() +throwing_task f() { + // A variable RESUMETHREW is used to keep track of whether the body + // of 'await_resume' threw an exception. Exceptions thrown in + // 'await_resume' are unwound to RESUMELPAD. + // CHECK: init.ready: + // CHECK-NEXT: store i1 true, i1* %[[RESUMETHREW:.+]], align 1 + // CHECK-NEXT: invoke void @_ZN18throwing_awaitable12await_resumeEv + // CHECK-NEXT: to label %[[RESUMECONT:.+]] unwind label %[[RESUMELPAD:.+]] + + // If 'await_resume' does not throw an exception, 'false' is stored in + // variable RESUMETHREW. + // CHECK: [[RESUMECONT]]: + // CHECK-NEXT: store i1 false, i1* %[[RESUMETHREW]] + // CHECK-NEXT: br label %[[RESUMETRYCONT:.+]] + + // 'unhandled_exception' is called for the exception thrown in + // 'await_resume'. The variable RESUMETHREW is never set to false, + // and a jump is made to RESUMETRYCONT. + // CHECK: [[RESUMELPAD]]: + // CHECK: br label %[[RESUMECATCH:.+]] + // CHECK: [[RESUMECATCH]]: + // CHECK: invoke void @_ZN13throwing_task12promise_type19unhandled_exceptionEv + // CHECK-NEXT: to label %[[RESUMEENDCATCH:.+]] unwind label + // CHECK: [[RESUMEENDCATCH]]: + // CHECK-NEXT: invoke void @__cxa_end_catch() + // CHECK-NEXT: to label %[[RESUMEENDCATCHCONT:.+]] unwind label + // CHECK: [[RESUMEENDCATCHCONT]]: + // CHECK-NEXT: br label %[[RESUMETRYCONT]] + // CHECK: [[RESUMETRYCONT]]: + // CHECK-NEXT: br label %[[CLEANUP:.+]] + // CHECK: [[CLEANUP]]: + // CHECK: switch i32 %{{.+}}, label %{{.+}} [ + // CHECK-NEXT: i32 0, label %[[CLEANUPCONT:.+]] + // CHECK-NEXT: ] + + // The variable RESUMETHREW is loaded and if true, then 'await_resume' + // threw an exception and the coroutine body is skipped, and the final + // suspend is executed immediately. Otherwise, the coroutine body is + // executed, and then the final suspend. + // CHECK: [[CLEANUPCONT]]: + // CHECK-NEXT: %[[RESUMETHREWLOAD:.+]] = load i1, i1* %[[RESUMETHREW]] + // CHECK-NEXT: br i1 %[[RESUMETHREWLOAD]], label %[[RESUMEDCONT:.+]], label %[[RESUMEDBODY:.+]] + + // CHECK: [[RESUMEDBODY]]: + // CHECK: invoke void @_ZN13throwing_task12promise_type11return_voidEv + // CHECK-NEXT: to label %[[REDUMEDBODYCONT:.+]] unwind label + // CHECK: [[REDUMEDBODYCONT]]: + // CHECK-NEXT: br label %[[COROFINAL:.+]] + + // CHECK: [[RESUMEDCONT]]: + // CHECK-NEXT: br label %[[COROFINAL]] + + // CHECK: [[COROFINAL]]: + // CHECK: call void @_ZN13throwing_task12promise_type13final_suspendEv + co_return; +} + +struct noexcept_awaitable { + bool await_ready() { return true; } + void await_suspend(coro::coroutine_handle<>) {} + void await_resume() noexcept {} +}; + +struct noexcept_task { + struct promise_type { + auto get_return_object() { return noexcept_task{}; } + auto initial_suspend() { return noexcept_awaitable{}; } + auto final_suspend() noexcept { return coro::suspend_never{}; } + void return_void() {} + void unhandled_exception() {} + }; +}; + +// CHECK-LABEL: define{{.*}} void @_Z1gv() +noexcept_task g() { + // If the await_resume function is marked as noexcept, none of the additional + // conditions that are present in f() above are added to the IR. + // This means that no i1 are stored before or after calling await_resume: + // CHECK: init.ready: + // CHECK-NEXT: call void @_ZN18noexcept_awaitable12await_resumeEv + // CHECK-NOT: store i1 false, i1* + co_return; +} diff --git a/clang/test/CodeGenCoroutines/coro-await-resume-eh.cpp b/clang/test/CodeGenCoroutines/coro-await-resume-eh.cpp index 1b0c3a1c5c577..25616dcc62d91 100644 --- a/clang/test/CodeGenCoroutines/coro-await-resume-eh.cpp +++ b/clang/test/CodeGenCoroutines/coro-await-resume-eh.cpp @@ -3,18 +3,16 @@ // executing the handler specified by the promise type's 'unhandled_exception' // member function. // -// RUN: %clang_cc1 -std=c++14 -fcoroutines-ts \ +// RUN: %clang_cc1 -std=c++20 \ // RUN: -triple=x86_64-unknown-linux-gnu -emit-llvm -o - %s \ // RUN: -fexceptions -fcxx-exceptions -disable-llvm-passes \ // RUN: | FileCheck %s #include "Inputs/coroutine.h" -namespace coro = std::experimental::coroutines_v1; - struct throwing_awaitable { bool await_ready() { return true; } - void await_suspend(coro::coroutine_handle<>) {} + void await_suspend(std::coroutine_handle<>) {} void await_resume() { throw 42; } }; @@ -22,7 +20,7 @@ struct throwing_task { struct promise_type { auto get_return_object() { return throwing_task{}; } auto initial_suspend() { return throwing_awaitable{}; } - auto final_suspend() noexcept { return coro::suspend_never{}; } + auto final_suspend() noexcept { return std::suspend_never{}; } void return_void() {} void unhandled_exception() {} }; @@ -88,7 +86,7 @@ throwing_task f() { struct noexcept_awaitable { bool await_ready() { return true; } - void await_suspend(coro::coroutine_handle<>) {} + void await_suspend(std::coroutine_handle<>) {} void await_resume() noexcept {} }; @@ -96,7 +94,7 @@ struct noexcept_task { struct promise_type { auto get_return_object() { return noexcept_task{}; } auto initial_suspend() { return noexcept_awaitable{}; } - auto final_suspend() noexcept { return coro::suspend_never{}; } + auto final_suspend() noexcept { return std::suspend_never{}; } void return_void() {} void unhandled_exception() {} }; diff --git a/clang/test/CodeGenCoroutines/coro-await.cpp b/clang/test/CodeGenCoroutines/coro-await.cpp index 3fa45d5f9ab68..158d553233778 100644 --- a/clang/test/CodeGenCoroutines/coro-await.cpp +++ b/clang/test/CodeGenCoroutines/coro-await.cpp @@ -1,8 +1,7 @@ -// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fcoroutines-ts -std=c++14 \ +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -std=c++20 \ // RUN: -emit-llvm %s -o - -disable-llvm-passes -Wno-coroutine -Wno-unused | FileCheck %s namespace std { -namespace experimental { template struct coroutine_traits; @@ -20,29 +19,28 @@ struct coroutine_handle : coroutine_handle<> { static coroutine_handle from_address(void *) noexcept; }; -} -} +} // namespace std struct init_susp { bool await_ready(); - void await_suspend(std::experimental::coroutine_handle<>); + void await_suspend(std::coroutine_handle<>); void await_resume(); }; struct final_susp { bool await_ready() noexcept; - void await_suspend(std::experimental::coroutine_handle<>) noexcept; + void await_suspend(std::coroutine_handle<>) noexcept; void await_resume() noexcept; }; struct suspend_always { int stuff; bool await_ready(); - void await_suspend(std::experimental::coroutine_handle<>); + void await_suspend(std::coroutine_handle<>); void await_resume(); }; -template<> -struct std::experimental::coroutine_traits { +template <> +struct std::coroutine_traits { struct promise_type { void get_return_object(); init_susp initial_suspend(); @@ -57,7 +55,7 @@ extern "C" void f0() { // See if initial_suspend was issued: // ---------------------------------- - // CHECK: call void @_ZNSt12experimental16coroutine_traitsIJvEE12promise_type15initial_suspendEv( + // CHECK: call void @_ZNSt16coroutine_traitsIJvEE12promise_type15initial_suspendEv( // CHECK-NEXT: call zeroext i1 @_ZN9init_susp11await_readyEv(%struct.init_susp* // CHECK: %[[INITSP_ID:.+]] = call token @llvm.coro.save( // CHECK: call i8 @llvm.coro.suspend(token %[[INITSP_ID]], i1 false) @@ -75,10 +73,10 @@ extern "C" void f0() { // --------------------------- // Build the coroutine handle and pass it to await_suspend // --------------------------- - // CHECK: call i8* @_ZNSt12experimental16coroutine_handleINS_16coroutine_traitsIJvEE12promise_typeEE12from_addressEPv(i8* %[[FRAME]]) + // CHECK: call i8* @_ZNSt16coroutine_handleINSt16coroutine_traitsIJvEE12promise_typeEE12from_addressEPv(i8* %[[FRAME]]) // ... many lines of code to coerce coroutine_handle into an i8* scalar // CHECK: %[[CH:.+]] = load i8*, i8** %{{.+}} - // CHECK: call void @_ZN14suspend_always13await_suspendENSt12experimental16coroutine_handleIvEE(%struct.suspend_always* {{[^,]*}} %[[AWAITABLE]], i8* %[[CH]]) + // CHECK: call void @_ZN14suspend_always13await_suspendESt16coroutine_handleIvE(%struct.suspend_always* {{[^,]*}} %[[AWAITABLE]], i8* %[[CH]]) // ------------------------- // Generate a suspend point: // ------------------------- @@ -99,7 +97,7 @@ extern "C" void f0() { // See if final_suspend was issued: // ---------------------------------- - // CHECK: call void @_ZNSt12experimental16coroutine_traitsIJvEE12promise_type13final_suspendEv( + // CHECK: call void @_ZNSt16coroutine_traitsIJvEE12promise_type13final_suspendEv( // CHECK-NEXT: call zeroext i1 @_ZN10final_susp11await_readyEv(%struct.final_susp* // CHECK: %[[FINALSP_ID:.+]] = call token @llvm.coro.save( // CHECK: call i8 @llvm.coro.suspend(token %[[FINALSP_ID]], i1 true) @@ -109,13 +107,12 @@ struct suspend_maybe { float stuff; ~suspend_maybe(); bool await_ready(); - bool await_suspend(std::experimental::coroutine_handle<>); + bool await_suspend(std::coroutine_handle<>); void await_resume(); }; - -template<> -struct std::experimental::coroutine_traits { +template <> +struct std::coroutine_traits { struct promise_type { void get_return_object(); init_susp initial_suspend(); @@ -127,10 +124,10 @@ struct std::experimental::coroutine_traits { // CHECK-LABEL: f1( extern "C" void f1(int) { - // CHECK: %[[PROMISE:.+]] = alloca %"struct.std::experimental::coroutine_traits::promise_type" + // CHECK: %[[PROMISE:.+]] = alloca %"struct.std::coroutine_traits::promise_type" // CHECK: %[[FRAME:.+]] = call i8* @llvm.coro.begin( co_yield 42; - // CHECK: call void @_ZNSt12experimental16coroutine_traitsIJviEE12promise_type11yield_valueEi(%struct.suspend_maybe* sret(%struct.suspend_maybe) align 4 %[[AWAITER:.+]], %"struct.std::experimental::coroutine_traits::promise_type"* {{[^,]*}} %[[PROMISE]], i32 42) + // CHECK: call void @_ZNSt16coroutine_traitsIJviEE12promise_type11yield_valueEi(%struct.suspend_maybe* sret(%struct.suspend_maybe) align 4 %[[AWAITER:.+]], %"struct.std::coroutine_traits::promise_type"* {{[^,]*}} %[[PROMISE]], i32 42) // See if we need to suspend: // -------------------------- @@ -144,10 +141,10 @@ extern "C" void f1(int) { // --------------------------- // Build the coroutine handle and pass it to await_suspend // --------------------------- - // CHECK: call i8* @_ZNSt12experimental16coroutine_handleINS_16coroutine_traitsIJviEE12promise_typeEE12from_addressEPv(i8* %[[FRAME]]) + // CHECK: call i8* @_ZNSt16coroutine_handleINSt16coroutine_traitsIJviEE12promise_typeEE12from_addressEPv(i8* %[[FRAME]]) // ... many lines of code to coerce coroutine_handle into an i8* scalar // CHECK: %[[CH:.+]] = load i8*, i8** %{{.+}} - // CHECK: %[[YES:.+]] = call zeroext i1 @_ZN13suspend_maybe13await_suspendENSt12experimental16coroutine_handleIvEE(%struct.suspend_maybe* {{[^,]*}} %[[AWAITABLE]], i8* %[[CH]]) + // CHECK: %[[YES:.+]] = call zeroext i1 @_ZN13suspend_maybe13await_suspendESt16coroutine_handleIvE(%struct.suspend_maybe* {{[^,]*}} %[[AWAITABLE]], i8* %[[CH]]) // ------------------------------------------- // See if await_suspend decided not to suspend // ------------------------------------------- @@ -264,7 +261,7 @@ extern "C" void EndlessLoop() { // See if initial_suspend was issued: // ---------------------------------- - // CHECK: call void @_ZNSt12experimental16coroutine_traitsIJvEE12promise_type15initial_suspendEv( + // CHECK: call void @_ZNSt16coroutine_traitsIJvEE12promise_type15initial_suspendEv( // CHECK-NEXT: call zeroext i1 @_ZN9init_susp11await_readyEv(%struct.init_susp* for (;;) @@ -272,7 +269,7 @@ extern "C" void EndlessLoop() { // Verify that final_suspend was NOT issued: // ---------------------------------- - // CHECK-NOT: call void @_ZNSt12experimental16coroutine_traitsIJvEE12promise_type13final_suspendEv( + // CHECK-NOT: call void @_ZNSt16coroutine_traitsIJvEE12promise_type13final_suspendEv( // CHECK-NOT: call zeroext i1 @_ZN10final_susp11await_readyEv(%struct.final_susp* } @@ -287,13 +284,12 @@ struct RefTag { }; struct AwaitResumeReturnsLValue { bool await_ready(); - void await_suspend(std::experimental::coroutine_handle<>); + void await_suspend(std::coroutine_handle<>); RefTag& await_resume(); }; - -template<> -struct std::experimental::coroutine_traits { +template <> +struct std::coroutine_traits { struct promise_type { void get_return_object(); init_susp initial_suspend(); @@ -338,7 +334,7 @@ void AwaitReturnsLValue(double) { struct TailCallAwait { bool await_ready(); - std::experimental::coroutine_handle<> await_suspend(std::experimental::coroutine_handle<>); + std::coroutine_handle<> await_suspend(std::coroutine_handle<>); void await_resume(); }; @@ -346,9 +342,9 @@ struct TailCallAwait { extern "C" void TestTailcall() { co_await TailCallAwait{}; - // CHECK: %[[RESULT:.+]] = call i8* @_ZN13TailCallAwait13await_suspendENSt12experimental16coroutine_handleIvEE(%struct.TailCallAwait* - // CHECK: %[[COERCE:.+]] = getelementptr inbounds %"struct.std::experimental::coroutine_handle", %"struct.std::experimental::coroutine_handle"* %[[TMP:.+]], i32 0, i32 0 + // CHECK: %[[RESULT:.+]] = call i8* @_ZN13TailCallAwait13await_suspendESt16coroutine_handleIvE(%struct.TailCallAwait* + // CHECK: %[[COERCE:.+]] = getelementptr inbounds %"struct.std::coroutine_handle", %"struct.std::coroutine_handle"* %[[TMP:.+]], i32 0, i32 0 // CHECK: store i8* %[[RESULT]], i8** %[[COERCE]] - // CHECK: %[[ADDR:.+]] = call i8* @_ZNSt12experimental16coroutine_handleIvE7addressEv(%"struct.std::experimental::coroutine_handle"* {{[^,]*}} %[[TMP]]) + // CHECK: %[[ADDR:.+]] = call i8* @_ZNSt16coroutine_handleIvE7addressEv(%"struct.std::coroutine_handle"* {{[^,]*}} %[[TMP]]) // CHECK: call void @llvm.coro.resume(i8* %[[ADDR]]) -} +} \ No newline at end of file diff --git a/clang/test/CodeGenCoroutines/coro-cleanup-exp-namespace.cpp b/clang/test/CodeGenCoroutines/coro-cleanup-exp-namespace.cpp new file mode 100644 index 0000000000000..0f80a98273149 --- /dev/null +++ b/clang/test/CodeGenCoroutines/coro-cleanup-exp-namespace.cpp @@ -0,0 +1,99 @@ +// Verify that coroutine promise and allocated memory are freed up on exception. +// RUN: %clang_cc1 -std=c++1z -fcoroutines-ts -triple=x86_64-unknown-linux-gnu -emit-llvm -o - %s -fexceptions -fcxx-exceptions -disable-llvm-passes | FileCheck %s + +namespace std::experimental { +template struct coroutine_traits; + +template struct coroutine_handle { + coroutine_handle() = default; + static coroutine_handle from_address(void *) noexcept; +}; +template <> struct coroutine_handle { + static coroutine_handle from_address(void *) noexcept; + coroutine_handle() = default; + template + coroutine_handle(coroutine_handle) noexcept; +}; +} // namespace std::experimental + +struct suspend_always { + bool await_ready() noexcept; + void await_suspend(std::experimental::coroutine_handle<>) noexcept; + void await_resume() noexcept; +}; + +template <> struct std::experimental::coroutine_traits { + struct promise_type { + void get_return_object() noexcept; + suspend_always initial_suspend() noexcept; + suspend_always final_suspend() noexcept; + void return_void() noexcept; + promise_type(); + ~promise_type(); + void unhandled_exception() noexcept; + }; +}; + +struct Cleanup { + ~Cleanup(); +}; +void may_throw(); + +// CHECK-LABEL: define{{.*}} void @_Z1fv( +void f() { + // CHECK: call noalias nonnull i8* @_Znwm(i64 + + // If promise constructor throws, check that we free the memory. + + // CHECK: invoke void @_ZNSt12experimental16coroutine_traitsIJvEE12promise_typeC1Ev( + // CHECK-NEXT: to label %{{.+}} unwind label %[[DeallocPad:.+]] + + // CHECK: [[DeallocPad]]: + // CHECK-NEXT: landingpad + // CHECK-NEXT: cleanup + // CHECK: br label %[[Dealloc:.+]] + + Cleanup cleanup; + may_throw(); + + // if may_throw throws, check that we destroy the promise and free the memory. + + // CHECK: invoke void @_Z9may_throwv( + // CHECK-NEXT: to label %{{.+}} unwind label %[[CatchPad:.+]] + + // CHECK: [[CatchPad]]: + // CHECK-NEXT: landingpad + // CHECK-NEXT: catch i8* null + // CHECK: call void @_ZN7CleanupD1Ev( + // CHECK: br label %[[Catch:.+]] + + // CHECK: [[Catch]]: + // CHECK: call i8* @__cxa_begin_catch( + // CHECK: call void @_ZNSt12experimental16coroutine_traitsIJvEE12promise_type19unhandled_exceptionEv( + // CHECK: invoke void @__cxa_end_catch() + // CHECK-NEXT: to label %[[Cont:.+]] unwind + + // CHECK: [[Cont]]: + // CHECK-NEXT: br label %[[Cont2:.+]] + // CHECK: [[Cont2]]: + // CHECK-NEXT: br label %[[Cleanup:.+]] + + // CHECK: [[Cleanup]]: + // CHECK: call void @_ZNSt12experimental16coroutine_traitsIJvEE12promise_typeD1Ev( + // CHECK: %[[Mem0:.+]] = call i8* @llvm.coro.free( + // CHECK: call void @_ZdlPv(i8* %[[Mem0]] + + // CHECK: [[Dealloc]]: + // CHECK: %[[Mem:.+]] = call i8* @llvm.coro.free( + // CHECK: call void @_ZdlPv(i8* %[[Mem]]) + + co_return; +} + +// CHECK-LABEL: define{{.*}} void @_Z1gv( +void g() { + for (;;) + co_await suspend_always{}; + // Since this is the endless loop there should be no fallthrough handler (call to 'return_void'). + // CHECK-NOT: call void @_ZNSt12experimental16coroutine_traitsIJvEE12promise_type11return_voidEv +} diff --git a/clang/test/CodeGenCoroutines/coro-cleanup.cpp b/clang/test/CodeGenCoroutines/coro-cleanup.cpp index 7ef614e817d2e..8535d51051717 100644 --- a/clang/test/CodeGenCoroutines/coro-cleanup.cpp +++ b/clang/test/CodeGenCoroutines/coro-cleanup.cpp @@ -1,7 +1,7 @@ // Verify that coroutine promise and allocated memory are freed up on exception. -// RUN: %clang_cc1 -std=c++1z -fcoroutines-ts -triple=x86_64-unknown-linux-gnu -emit-llvm -o - %s -fexceptions -fcxx-exceptions -disable-llvm-passes | FileCheck %s +// RUN: %clang_cc1 -std=c++20 -triple=x86_64-unknown-linux-gnu -emit-llvm -o - %s -fexceptions -fcxx-exceptions -disable-llvm-passes | FileCheck %s -namespace std::experimental { +namespace std { template struct coroutine_traits; template struct coroutine_handle { @@ -14,15 +14,15 @@ template <> struct coroutine_handle { template coroutine_handle(coroutine_handle) noexcept; }; -} +} // namespace std struct suspend_always { bool await_ready() noexcept; - void await_suspend(std::experimental::coroutine_handle<>) noexcept; + void await_suspend(std::coroutine_handle<>) noexcept; void await_resume() noexcept; }; -template <> struct std::experimental::coroutine_traits { +template <> struct std::coroutine_traits { struct promise_type { void get_return_object() noexcept; suspend_always initial_suspend() noexcept; @@ -43,7 +43,7 @@ void f() { // If promise constructor throws, check that we free the memory. - // CHECK: invoke void @_ZNSt12experimental16coroutine_traitsIJvEE12promise_typeC1Ev( + // CHECK: invoke void @_ZNSt16coroutine_traitsIJvEE12promise_typeC1Ev( // CHECK-NEXT: to label %{{.+}} unwind label %[[DeallocPad:.+]] // CHECK: [[DeallocPad]]: @@ -67,7 +67,7 @@ void f() { // CHECK: [[Catch]]: // CHECK: call i8* @__cxa_begin_catch( - // CHECK: call void @_ZNSt12experimental16coroutine_traitsIJvEE12promise_type19unhandled_exceptionEv( + // CHECK: call void @_ZNSt16coroutine_traitsIJvEE12promise_type19unhandled_exceptionEv( // CHECK: invoke void @__cxa_end_catch() // CHECK-NEXT: to label %[[Cont:.+]] unwind @@ -77,7 +77,7 @@ void f() { // CHECK-NEXT: br label %[[Cleanup:.+]] // CHECK: [[Cleanup]]: - // CHECK: call void @_ZNSt12experimental16coroutine_traitsIJvEE12promise_typeD1Ev( + // CHECK: call void @_ZNSt16coroutine_traitsIJvEE12promise_typeD1Ev( // CHECK: %[[Mem0:.+]] = call i8* @llvm.coro.free( // CHECK: call void @_ZdlPv(i8* %[[Mem0]] @@ -93,5 +93,5 @@ void g() { for (;;) co_await suspend_always{}; // Since this is the endless loop there should be no fallthrough handler (call to 'return_void'). - // CHECK-NOT: call void @_ZNSt12experimental16coroutine_traitsIJvEE12promise_type11return_voidEv + // CHECK-NOT: call void @_ZNSt16coroutine_traitsIJvEE12promise_type11return_voidEv } diff --git a/clang/test/CodeGenCoroutines/coro-dest-slot-exp-namespace.cpp b/clang/test/CodeGenCoroutines/coro-dest-slot-exp-namespace.cpp new file mode 100644 index 0000000000000..6ea24b0be1a5f --- /dev/null +++ b/clang/test/CodeGenCoroutines/coro-dest-slot-exp-namespace.cpp @@ -0,0 +1,40 @@ +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fcoroutines-ts -std=c++14 -emit-llvm %s -o - -disable-llvm-passes | FileCheck %s + +#include "Inputs/coroutine-exp-namespace.h" + +using namespace std::experimental; + +struct coro { + struct promise_type { + coro get_return_object(); + suspend_always initial_suspend(); + suspend_never final_suspend() noexcept; + void return_void(); + static void unhandled_exception(); + }; +}; + +extern "C" coro f(int) { co_return; } +// Verify that cleanup.dest.slot is eliminated in a coroutine. +// CHECK-LABEL: f( +// CHECK: %[[INIT_SUSPEND:.+]] = call i8 @llvm.coro.suspend( +// CHECK-NEXT: switch i8 %[[INIT_SUSPEND]], label +// CHECK-NEXT: i8 0, label %[[INIT_READY:.+]] +// CHECK-NEXT: i8 1, label %[[INIT_CLEANUP:.+]] +// CHECK-NEXT: ] +// CHECK: %[[CLEANUP_DEST0:.+]] = phi i32 [ 0, %[[INIT_READY]] ], [ 2, %[[INIT_CLEANUP]] ] + +// CHECK: %[[FINAL_SUSPEND:.+]] = call i8 @llvm.coro.suspend( +// CHECK-NEXT: switch i8 %{{.*}}, label %coro.ret [ +// CHECK-NEXT: i8 0, label %[[FINAL_READY:.+]] +// CHECK-NEXT: i8 1, label %[[FINAL_CLEANUP:.+]] +// CHECK-NEXT: ] + +// CHECK: call void @_ZNSt12experimental13coroutines_v113suspend_never12await_resumeEv( +// CHECK: %[[CLEANUP_DEST1:.+]] = phi i32 [ 0, %[[FINAL_READY]] ], [ 2, %[[FINAL_CLEANUP]] ] +// CHECK: %[[CLEANUP_DEST2:.+]] = phi i32 [ %[[CLEANUP_DEST0]], %{{.+}} ], [ %[[CLEANUP_DEST1]], %{{.+}} ], [ 0, %{{.+}} ] +// CHECK: call i8* @llvm.coro.free( +// CHECK: switch i32 %[[CLEANUP_DEST2]], label %{{.+}} [ +// CHECK-NEXT: i32 0 +// CHECK-NEXT: i32 2 +// CHECK-NEXT: ] diff --git a/clang/test/CodeGenCoroutines/coro-dest-slot.cpp b/clang/test/CodeGenCoroutines/coro-dest-slot.cpp index c7129df115261..c986130bc3bae 100644 --- a/clang/test/CodeGenCoroutines/coro-dest-slot.cpp +++ b/clang/test/CodeGenCoroutines/coro-dest-slot.cpp @@ -1,8 +1,8 @@ -// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fcoroutines-ts -std=c++14 -emit-llvm %s -o - -disable-llvm-passes | FileCheck %s +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -std=c++20 -emit-llvm %s -o - -disable-llvm-passes | FileCheck %s #include "Inputs/coroutine.h" -using namespace std::experimental; +using namespace std; struct coro { struct promise_type { @@ -30,7 +30,7 @@ extern "C" coro f(int) { co_return; } // CHECK-NEXT: i8 1, label %[[FINAL_CLEANUP:.+]] // CHECK-NEXT: ] -// CHECK: call void @_ZNSt12experimental13coroutines_v113suspend_never12await_resumeEv( +// CHECK: call void @_ZNSt13suspend_never12await_resumeEv( // CHECK: %[[CLEANUP_DEST1:.+]] = phi i32 [ 0, %[[FINAL_READY]] ], [ 2, %[[FINAL_CLEANUP]] ] // CHECK: %[[CLEANUP_DEST2:.+]] = phi i32 [ %[[CLEANUP_DEST0]], %{{.+}} ], [ %[[CLEANUP_DEST1]], %{{.+}} ], [ 0, %{{.+}} ] // CHECK: call i8* @llvm.coro.free( diff --git a/clang/test/CodeGenCoroutines/coro-dwarf-exp-namespace.cpp b/clang/test/CodeGenCoroutines/coro-dwarf-exp-namespace.cpp new file mode 100644 index 0000000000000..2849f8d80f795 --- /dev/null +++ b/clang/test/CodeGenCoroutines/coro-dwarf-exp-namespace.cpp @@ -0,0 +1,72 @@ +// RUN: %clang_cc1 -disable-llvm-optzns -std=c++2a -fcoroutines-ts \ +// RUN: -triple=x86_64 -dwarf-version=4 -debug-info-kind=limited \ +// RUN: -emit-llvm -o - %s | \ +// RUN: FileCheck %s --implicit-check-not=DILocalVariable + +namespace std::experimental { +template struct coroutine_traits; + +template struct coroutine_handle { + coroutine_handle() = default; + static coroutine_handle from_address(void *) noexcept; +}; +template <> struct coroutine_handle { + static coroutine_handle from_address(void *) noexcept; + coroutine_handle() = default; + template + coroutine_handle(coroutine_handle) noexcept; +}; +} // namespace std::experimental + +struct suspend_always { + bool await_ready() noexcept; + void await_suspend(std::experimental::coroutine_handle<>) noexcept; + void await_resume() noexcept; +}; + +template struct std::experimental::coroutine_traits { + struct promise_type { + void get_return_object() noexcept; + suspend_always initial_suspend() noexcept; + suspend_always final_suspend() noexcept; + void return_void() noexcept; + promise_type(); + ~promise_type() noexcept; + void unhandled_exception() noexcept; + }; +}; + +// TODO: Not supported yet +struct CopyOnly { + int val; + CopyOnly(const CopyOnly &) noexcept; + CopyOnly(CopyOnly &&) = delete; + ~CopyOnly(); +}; + +struct MoveOnly { + int val; + MoveOnly(const MoveOnly &) = delete; + MoveOnly(MoveOnly &&) noexcept; + ~MoveOnly(); +}; + +struct MoveAndCopy { + int val; + MoveAndCopy(const MoveAndCopy &) noexcept; + MoveAndCopy(MoveAndCopy &&) noexcept; + ~MoveAndCopy(); +}; + +void consume(int, int, int) noexcept; + +void f_coro(int val, MoveOnly moParam, MoveAndCopy mcParam) { + consume(val, moParam.val, mcParam.val); + co_return; +} + +// CHECK: ![[SP:[0-9]+]] = distinct !DISubprogram(name: "f_coro", linkageName: "_Z6f_coroi8MoveOnly11MoveAndCopy" +// CHECK: !{{[0-9]+}} = !DILocalVariable(name: "val", arg: 1, scope: ![[SP]], file: !{{[0-9]+}}, line: {{[0-9]+}}, type: !{{[0-9]+}}) +// CHECK: !{{[0-9]+}} = !DILocalVariable(name: "moParam", arg: 2, scope: ![[SP]], file: !{{[0-9]+}}, line: {{[0-9]+}}, type: !{{[0-9]+}}) +// CHECK: !{{[0-9]+}} = !DILocalVariable(name: "mcParam", arg: 3, scope: ![[SP]], file: !{{[0-9]+}}, line: {{[0-9]+}}, type: !{{[0-9]+}}) +// CHECK: !{{[0-9]+}} = !DILocalVariable(name: "__promise", diff --git a/clang/test/CodeGenCoroutines/coro-dwarf.cpp b/clang/test/CodeGenCoroutines/coro-dwarf.cpp index 2849f8d80f795..7914babe5483a 100644 --- a/clang/test/CodeGenCoroutines/coro-dwarf.cpp +++ b/clang/test/CodeGenCoroutines/coro-dwarf.cpp @@ -1,9 +1,9 @@ -// RUN: %clang_cc1 -disable-llvm-optzns -std=c++2a -fcoroutines-ts \ +// RUN: %clang_cc1 -disable-llvm-optzns -std=c++20 \ // RUN: -triple=x86_64 -dwarf-version=4 -debug-info-kind=limited \ // RUN: -emit-llvm -o - %s | \ // RUN: FileCheck %s --implicit-check-not=DILocalVariable -namespace std::experimental { +namespace std { template struct coroutine_traits; template struct coroutine_handle { @@ -16,15 +16,15 @@ template <> struct coroutine_handle { template coroutine_handle(coroutine_handle) noexcept; }; -} // namespace std::experimental +} // namespace std struct suspend_always { bool await_ready() noexcept; - void await_suspend(std::experimental::coroutine_handle<>) noexcept; + void await_suspend(std::coroutine_handle<>) noexcept; void await_resume() noexcept; }; -template struct std::experimental::coroutine_traits { +template struct std::coroutine_traits { struct promise_type { void get_return_object() noexcept; suspend_always initial_suspend() noexcept; diff --git a/clang/test/CodeGenCoroutines/coro-eh-cleanup-exp-namespace.cpp b/clang/test/CodeGenCoroutines/coro-eh-cleanup-exp-namespace.cpp new file mode 100644 index 0000000000000..4cd81929f5473 --- /dev/null +++ b/clang/test/CodeGenCoroutines/coro-eh-cleanup-exp-namespace.cpp @@ -0,0 +1,88 @@ +// RUN: %clang_cc1 -std=c++1z -fcoroutines-ts -triple=x86_64-pc-windows-msvc18.0.0 -emit-llvm %s -o - -fexceptions -fcxx-exceptions -disable-llvm-passes | FileCheck %s +// RUN: %clang_cc1 -std=c++1z -fcoroutines-ts -triple=x86_64-unknown-linux-gnu -emit-llvm -o - %s -fexceptions -fcxx-exceptions -disable-llvm-passes | FileCheck --check-prefix=CHECK-LPAD %s + +namespace std::experimental { +template struct coroutine_traits { + using promise_type = typename R::promise_type; +}; + +template struct coroutine_handle; + +template <> struct coroutine_handle { + static coroutine_handle from_address(void *) noexcept; + coroutine_handle() = default; + template + coroutine_handle(coroutine_handle) noexcept; +}; +template struct coroutine_handle : coroutine_handle { + coroutine_handle() = default; + static coroutine_handle from_address(void *) noexcept; +}; +} // namespace std::experimental + +struct suspend_always { + bool await_ready() noexcept; + void await_suspend(std::experimental::coroutine_handle<>) noexcept; + void await_resume() noexcept; +}; + +struct coro_t { + struct promise_type { + coro_t get_return_object() noexcept; + suspend_always initial_suspend() noexcept; + suspend_always final_suspend() noexcept; + void return_void() noexcept; + void unhandled_exception() noexcept; + }; +}; + +struct Cleanup { + ~Cleanup(); +}; +void may_throw(); + +coro_t f() { + Cleanup x; + may_throw(); + co_return; +} + +// CHECK: @"?f@@YA?AUcoro_t@@XZ"( +// CHECK: invoke void @"?may_throw@@YAXXZ"() +// CHECK: to label %[[CONT:.+]] unwind label %[[EHCLEANUP:.+]] +// CHECK: [[EHCLEANUP]]: +// CHECK: %[[INNERPAD:.+]] = cleanuppad within none [] +// CHECK: call void @"??1Cleanup@@QEAA@XZ"( +// CHECK: cleanupret from %{{.+}} unwind label %[[CATCHDISPATCH:.+]] + +// CHECK: [[CATCHDISPATCH]]: +// CHECK: catchswitch within none [label %[[CATCHPAD:.+]]] unwind label %[[COROENDBB:.+]] +// CHECK: [[CATCHPAD]]: +// CHECK: call void @"?unhandled_exception@promise_type@coro_t@@QEAAXXZ" + +// CHECK: [[COROENDBB]]: +// CHECK-NEXT: %[[CLPAD:.+]] = cleanuppad within none +// CHECK-NEXT: call i1 @llvm.coro.end(i8* null, i1 true) [ "funclet"(token %[[CLPAD]]) ] +// CHECK-NEXT: cleanupret from %[[CLPAD]] unwind label + +// CHECK-LPAD: @_Z1fv( +// CHECK-LPAD: invoke void @_Z9may_throwv() +// CHECK-LPAD: to label %[[CONT:.+]] unwind label %[[EHCLEANUP:.+]] +// CHECK-LPAD: [[EHCLEANUP]]: +// CHECK-LPAD: landingpad { i8*, i32 } +// CHECK-LPAD: catch +// CHECK-LPAD: call void @_ZN7CleanupD1Ev( +// CHECK-LPAD: call i8* @__cxa_begin_catch +// CHECK-LPAD: call void @_ZN6coro_t12promise_type19unhandled_exceptionEv +// CHECK-LPAD: invoke void @__cxa_end_catch() +// CHECK-LPAD: to label %{{.+}} unwind label %[[UNWINDBB:.+]] + +// CHECK-LPAD: [[UNWINDBB]]: +// CHECK-LPAD: %[[I1RESUME:.+]] = call i1 @llvm.coro.end(i8* null, i1 true) +// CHECK-LPAD: br i1 %[[I1RESUME]], label %[[EHRESUME:.+]], label +// CHECK-LPAD: [[EHRESUME]]: +// CHECK-LPAD-NEXT: %[[exn:.+]] = load i8*, i8** %exn.slot, align 8 +// CHECK-LPAD-NEXT: %[[sel:.+]] = load i32, i32* %ehselector.slot, align 4 +// CHECK-LPAD-NEXT: %[[val1:.+]] = insertvalue { i8*, i32 } undef, i8* %[[exn]], 0 +// CHECK-LPAD-NEXT: %[[val2:.+]] = insertvalue { i8*, i32 } %[[val1]], i32 %[[sel]], 1 +// CHECK-LPAD-NEXT: resume { i8*, i32 } %[[val2]] diff --git a/clang/test/CodeGenCoroutines/coro-eh-cleanup.cpp b/clang/test/CodeGenCoroutines/coro-eh-cleanup.cpp index 9801151b4f140..85777bb88fad1 100644 --- a/clang/test/CodeGenCoroutines/coro-eh-cleanup.cpp +++ b/clang/test/CodeGenCoroutines/coro-eh-cleanup.cpp @@ -1,7 +1,7 @@ -// RUN: %clang_cc1 -std=c++1z -fcoroutines-ts -triple=x86_64-pc-windows-msvc18.0.0 -emit-llvm %s -o - -fexceptions -fcxx-exceptions -disable-llvm-passes | FileCheck %s -// RUN: %clang_cc1 -std=c++1z -fcoroutines-ts -triple=x86_64-unknown-linux-gnu -emit-llvm -o - %s -fexceptions -fcxx-exceptions -disable-llvm-passes | FileCheck --check-prefix=CHECK-LPAD %s +// RUN: %clang_cc1 -std=c++20 -triple=x86_64-pc-windows-msvc18.0.0 -emit-llvm %s -o - -fexceptions -fcxx-exceptions -disable-llvm-passes | FileCheck %s +// RUN: %clang_cc1 -std=c++20 -triple=x86_64-unknown-linux-gnu -emit-llvm -o - %s -fexceptions -fcxx-exceptions -disable-llvm-passes | FileCheck --check-prefix=CHECK-LPAD %s -namespace std::experimental { +namespace std { template struct coroutine_traits { using promise_type = typename R::promise_type; }; @@ -18,11 +18,11 @@ template struct coroutine_handle: coroutine_handle { coroutine_handle() = default; static coroutine_handle from_address(void *) noexcept; }; -} +} // namespace std struct suspend_always { bool await_ready() noexcept; - void await_suspend(std::experimental::coroutine_handle<>) noexcept; + void await_suspend(std::coroutine_handle<>) noexcept; void await_resume() noexcept; }; diff --git a/clang/test/CodeGenCoroutines/coro-gro-exp-namespace.cpp b/clang/test/CodeGenCoroutines/coro-gro-exp-namespace.cpp new file mode 100644 index 0000000000000..90ab5fe350041 --- /dev/null +++ b/clang/test/CodeGenCoroutines/coro-gro-exp-namespace.cpp @@ -0,0 +1,88 @@ +// Verifies lifetime of __gro local variable +// Verify that coroutine promise and allocated memory are freed up on exception. +// RUN: %clang_cc1 -std=c++1z -fcoroutines-ts -triple=x86_64-unknown-linux-gnu -emit-llvm -o - %s -disable-llvm-passes | FileCheck %s + +namespace std::experimental { +template struct coroutine_traits; + +template struct coroutine_handle { + coroutine_handle() = default; + static coroutine_handle from_address(void *) noexcept; +}; +template <> struct coroutine_handle { + static coroutine_handle from_address(void *) noexcept; + coroutine_handle() = default; + template + coroutine_handle(coroutine_handle) noexcept; +}; +} // namespace std::experimental + +struct suspend_always { + bool await_ready() noexcept; + void await_suspend(std::experimental::coroutine_handle<>) noexcept; + void await_resume() noexcept; +}; + +struct GroType { + ~GroType(); + operator int() noexcept; +}; + +template <> struct std::experimental::coroutine_traits { + struct promise_type { + GroType get_return_object() noexcept; + suspend_always initial_suspend() noexcept; + suspend_always final_suspend() noexcept; + void return_void() noexcept; + promise_type(); + ~promise_type(); + void unhandled_exception() noexcept; + }; +}; + +struct Cleanup { + ~Cleanup(); +}; +void doSomething() noexcept; + +// CHECK: define{{.*}} i32 @_Z1fv( +int f() { + // CHECK: %[[RetVal:.+]] = alloca i32 + // CHECK: %[[GroActive:.+]] = alloca i1 + + // CHECK: %[[Size:.+]] = call i64 @llvm.coro.size.i64() + // CHECK: call noalias nonnull i8* @_Znwm(i64 %[[Size]]) + // CHECK: store i1 false, i1* %[[GroActive]] + // CHECK: call void @_ZNSt12experimental16coroutine_traitsIJiEE12promise_typeC1Ev( + // CHECK: call void @_ZNSt12experimental16coroutine_traitsIJiEE12promise_type17get_return_objectEv( + // CHECK: store i1 true, i1* %[[GroActive]] + + Cleanup cleanup; + doSomething(); + co_return; + + // CHECK: call void @_Z11doSomethingv( + // CHECK: call void @_ZNSt12experimental16coroutine_traitsIJiEE12promise_type11return_voidEv( + // CHECK: call void @_ZN7CleanupD1Ev( + + // Destroy promise and free the memory. + + // CHECK: call void @_ZNSt12experimental16coroutine_traitsIJiEE12promise_typeD1Ev( + // CHECK: %[[Mem:.+]] = call i8* @llvm.coro.free( + // CHECK: call void @_ZdlPv(i8* %[[Mem]]) + + // Initialize retval from Gro and destroy Gro + + // CHECK: %[[Conv:.+]] = call i32 @_ZN7GroTypecviEv( + // CHECK: store i32 %[[Conv]], i32* %[[RetVal]] + // CHECK: %[[IsActive:.+]] = load i1, i1* %[[GroActive]] + // CHECK: br i1 %[[IsActive]], label %[[CleanupGro:.+]], label %[[Done:.+]] + + // CHECK: [[CleanupGro]]: + // CHECK: call void @_ZN7GroTypeD1Ev( + // CHECK: br label %[[Done]] + + // CHECK: [[Done]]: + // CHECK: %[[LoadRet:.+]] = load i32, i32* %[[RetVal]] + // CHECK: ret i32 %[[LoadRet]] +} diff --git a/clang/test/CodeGenCoroutines/coro-gro-nrvo-exp-namespace.cpp b/clang/test/CodeGenCoroutines/coro-gro-nrvo-exp-namespace.cpp new file mode 100644 index 0000000000000..32e95a6528111 --- /dev/null +++ b/clang/test/CodeGenCoroutines/coro-gro-nrvo-exp-namespace.cpp @@ -0,0 +1,85 @@ +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fcoroutines-ts -std=c++14 -emit-llvm %s -o - -disable-llvm-passes | FileCheck %s + +#include "Inputs/coroutine-exp-namespace.h" + +using namespace std::experimental; + +namespace std { + +struct nothrow_t {}; +constexpr nothrow_t nothrow = {}; + +} // end namespace std + +// Required when get_return_object_on_allocation_failure() is defined by +// the promise. +void *operator new(__SIZE_TYPE__ __sz, const std::nothrow_t &) noexcept; +void operator delete(void *__p, const std::nothrow_t &)noexcept; + +template +struct promise_type { + RetObject get_return_object(); + suspend_always initial_suspend(); + suspend_never final_suspend() noexcept; + void return_void(); + static void unhandled_exception(); +}; + +struct coro { + using promise_type = promise_type; + coro(coro const &); + struct Impl; + Impl *impl; +}; + +// Verify that the NRVO is applied to the Gro object. +// CHECK-LABEL: define{{.*}} void @_Z1fi(%struct.coro* noalias sret(%struct.coro) align 8 %agg.result, i32 %0) +coro f(int) { + // CHECK: %call = call noalias nonnull i8* @_Znwm( + // CHECK-NEXT: br label %[[CoroInit:.*]] + + // CHECK: {{.*}}[[CoroInit]]: + // CHECK: store i1 false, i1* %gro.active + // CHECK: call void @{{.*get_return_objectEv}}(%struct.coro* sret(%struct.coro) align 8 %agg.result + // CHECK-NEXT: store i1 true, i1* %gro.active + co_return; +} + +template +struct promise_type_with_on_alloc_failure { + static RetObject get_return_object_on_allocation_failure(); + RetObject get_return_object(); + suspend_always initial_suspend(); + suspend_never final_suspend() noexcept; + void return_void(); + static void unhandled_exception(); +}; + +struct coro_two { + using promise_type = promise_type_with_on_alloc_failure; + coro_two(coro_two const &); + struct Impl; + Impl *impl; +}; + +// Verify that the NRVO is applied to the Gro object. +// CHECK-LABEL: define{{.*}} void @_Z1hi(%struct.coro_two* noalias sret(%struct.coro_two) align 8 %agg.result, i32 %0) +coro_two h(int) { + + // CHECK: %call = call noalias i8* @_ZnwmRKSt9nothrow_t + // CHECK-NEXT: %[[CheckNull:.*]] = icmp ne i8* %call, null + // CHECK-NEXT: br i1 %[[CheckNull]], label %[[InitOnSuccess:.*]], label %[[InitOnFailure:.*]] + + // CHECK: {{.*}}[[InitOnFailure]]: + // CHECK-NEXT: call void @{{.*get_return_object_on_allocation_failureEv}}(%struct.coro_two* sret(%struct.coro_two) align 8 %agg.result + // CHECK-NEXT: br label %[[RetLabel:.*]] + + // CHECK: {{.*}}[[InitOnSuccess]]: + // CHECK: store i1 false, i1* %gro.active + // CHECK: call void @{{.*get_return_objectEv}}(%struct.coro_two* sret(%struct.coro_two) align 8 %agg.result + // CHECK-NEXT: store i1 true, i1* %gro.active + + // CHECK: [[RetLabel]]: + // CHECK-NEXT: ret void + co_return; +} diff --git a/clang/test/CodeGenCoroutines/coro-gro-nrvo.cpp b/clang/test/CodeGenCoroutines/coro-gro-nrvo.cpp index 940617197bc95..0a8366bf45b9c 100644 --- a/clang/test/CodeGenCoroutines/coro-gro-nrvo.cpp +++ b/clang/test/CodeGenCoroutines/coro-gro-nrvo.cpp @@ -1,8 +1,8 @@ -// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fcoroutines-ts -std=c++14 -emit-llvm %s -o - -disable-llvm-passes | FileCheck %s +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fcoroutines-ts -std=c++20 -emit-llvm %s -o - -disable-llvm-passes | FileCheck %s #include "Inputs/coroutine.h" -using namespace std::experimental; +using namespace std; namespace std { diff --git a/clang/test/CodeGenCoroutines/coro-gro.cpp b/clang/test/CodeGenCoroutines/coro-gro.cpp index dd07185e3f848..bcf474cc22c36 100644 --- a/clang/test/CodeGenCoroutines/coro-gro.cpp +++ b/clang/test/CodeGenCoroutines/coro-gro.cpp @@ -1,8 +1,8 @@ // Verifies lifetime of __gro local variable // Verify that coroutine promise and allocated memory are freed up on exception. -// RUN: %clang_cc1 -std=c++1z -fcoroutines-ts -triple=x86_64-unknown-linux-gnu -emit-llvm -o - %s -disable-llvm-passes | FileCheck %s +// RUN: %clang_cc1 -std=c++20 -triple=x86_64-unknown-linux-gnu -emit-llvm -o - %s -disable-llvm-passes | FileCheck %s -namespace std::experimental { +namespace std { template struct coroutine_traits; template struct coroutine_handle { @@ -15,11 +15,11 @@ template <> struct coroutine_handle { template coroutine_handle(coroutine_handle) noexcept; }; -} +} // namespace std struct suspend_always { bool await_ready() noexcept; - void await_suspend(std::experimental::coroutine_handle<>) noexcept; + void await_suspend(std::coroutine_handle<>) noexcept; void await_resume() noexcept; }; @@ -28,7 +28,7 @@ struct GroType { operator int() noexcept; }; -template <> struct std::experimental::coroutine_traits { +template <> struct std::coroutine_traits { struct promise_type { GroType get_return_object() noexcept; suspend_always initial_suspend() noexcept; @@ -51,8 +51,8 @@ int f() { // CHECK: %[[Size:.+]] = call i64 @llvm.coro.size.i64() // CHECK: call noalias nonnull i8* @_Znwm(i64 %[[Size]]) // CHECK: store i1 false, i1* %[[GroActive]] - // CHECK: call void @_ZNSt12experimental16coroutine_traitsIJiEE12promise_typeC1Ev( - // CHECK: call void @_ZNSt12experimental16coroutine_traitsIJiEE12promise_type17get_return_objectEv( + // CHECK: call void @_ZNSt16coroutine_traitsIJiEE12promise_typeC1Ev( + // CHECK: call void @_ZNSt16coroutine_traitsIJiEE12promise_type17get_return_objectEv( // CHECK: store i1 true, i1* %[[GroActive]] Cleanup cleanup; @@ -60,12 +60,12 @@ int f() { co_return; // CHECK: call void @_Z11doSomethingv( - // CHECK: call void @_ZNSt12experimental16coroutine_traitsIJiEE12promise_type11return_voidEv( + // CHECK: call void @_ZNSt16coroutine_traitsIJiEE12promise_type11return_voidEv( // CHECK: call void @_ZN7CleanupD1Ev( // Destroy promise and free the memory. - // CHECK: call void @_ZNSt12experimental16coroutine_traitsIJiEE12promise_typeD1Ev( + // CHECK: call void @_ZNSt16coroutine_traitsIJiEE12promise_typeD1Ev( // CHECK: %[[Mem:.+]] = call i8* @llvm.coro.free( // CHECK: call void @_ZdlPv(i8* %[[Mem]]) diff --git a/clang/test/CodeGenCoroutines/coro-lambda-exp-namespace.cpp b/clang/test/CodeGenCoroutines/coro-lambda-exp-namespace.cpp new file mode 100644 index 0000000000000..2c9c446be0806 --- /dev/null +++ b/clang/test/CodeGenCoroutines/coro-lambda-exp-namespace.cpp @@ -0,0 +1,61 @@ +// Verify that we synthesized the coroutine for a lambda inside of a function template. +// RUN: %clang_cc1 -std=c++1z -fcoroutines-ts -triple=x86_64-unknown-linux-gnu -emit-llvm -o - %s -fexceptions -fcxx-exceptions -disable-llvm-passes | FileCheck %s + +namespace std::experimental { +template struct coroutine_traits { + using promise_type = typename R::promise_type; +}; + +template struct coroutine_handle; +template <> struct coroutine_handle { + static coroutine_handle from_address(void *) noexcept; + coroutine_handle() = default; + template + coroutine_handle(coroutine_handle) noexcept; +}; +template struct coroutine_handle : coroutine_handle { + coroutine_handle() = default; + static coroutine_handle from_address(void *) noexcept; +}; +} // namespace std::experimental + +struct suspend_always { + bool await_ready() noexcept; + void await_suspend(std::experimental::coroutine_handle<>) noexcept; + void await_resume() noexcept; +}; + +struct Task { + struct promise_type { + Task get_return_object(); + void return_void() {} + suspend_always initial_suspend() noexcept; + suspend_always final_suspend() noexcept; + void unhandled_exception() noexcept; + }; +}; + +template auto SyncAwait(_AwrT &&A) { + if (!A.await_ready()) { + auto AwaitAsync = [&]() -> Task { + try { + (void)(co_await A); + } catch (...) { + } + }; + Task t = AwaitAsync(); + } + return A.await_resume(); +} + +void f() { + suspend_always test; + SyncAwait(test); +} + +// Verify that we synthesized the coroutine for a lambda inside SyncAwait +// CHECK-LABEL: define linkonce_odr void @_ZZ9SyncAwaitIR14suspend_alwaysEDaOT_ENKUlvE_clEv( +// CHECK: alloca %"struct.Task::promise_type" +// CHECK: call token @llvm.coro.id( +// CHECK: call i8 @llvm.coro.suspend( +// CHECK: call i1 @llvm.coro.end( diff --git a/clang/test/CodeGenCoroutines/coro-lambda.cpp b/clang/test/CodeGenCoroutines/coro-lambda.cpp index cd3256dc07eff..26c51070f9e2d 100644 --- a/clang/test/CodeGenCoroutines/coro-lambda.cpp +++ b/clang/test/CodeGenCoroutines/coro-lambda.cpp @@ -1,7 +1,7 @@ // Verify that we synthesized the coroutine for a lambda inside of a function template. -// RUN: %clang_cc1 -std=c++1z -fcoroutines-ts -triple=x86_64-unknown-linux-gnu -emit-llvm -o - %s -fexceptions -fcxx-exceptions -disable-llvm-passes | FileCheck %s +// RUN: %clang_cc1 -std=c++20 -triple=x86_64-unknown-linux-gnu -emit-llvm -o - %s -fexceptions -fcxx-exceptions -disable-llvm-passes | FileCheck %s -namespace std::experimental { +namespace std { template struct coroutine_traits { using promise_type = typename R::promise_type; }; @@ -17,11 +17,11 @@ template struct coroutine_handle : coroutine_handle { coroutine_handle() = default; static coroutine_handle from_address(void *) noexcept; }; -} +} // namespace std struct suspend_always { bool await_ready() noexcept; - void await_suspend(std::experimental::coroutine_handle<>) noexcept; + void await_suspend(std::coroutine_handle<>) noexcept; void await_resume() noexcept; }; diff --git a/clang/test/CodeGenCoroutines/coro-newpm-pipeline-exp-namespace.cpp b/clang/test/CodeGenCoroutines/coro-newpm-pipeline-exp-namespace.cpp new file mode 100644 index 0000000000000..869e98ecdb9ec --- /dev/null +++ b/clang/test/CodeGenCoroutines/coro-newpm-pipeline-exp-namespace.cpp @@ -0,0 +1,45 @@ +// Tests that coroutine passes are added to and run by the new pass manager +// pipeline, at -O0 and above. + +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm-bc -o /dev/null \ +// RUN: -fexperimental-new-pass-manager -fdebug-pass-manager -fcoroutines-ts \ +// RUN: -O0 %s 2>&1 | FileCheck %s --check-prefixes=CHECK-ALL +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm-bc -o /dev/null \ +// RUN: -fexperimental-new-pass-manager -fdebug-pass-manager -fcoroutines-ts \ +// RUN: -O1 %s 2>&1 | FileCheck %s --check-prefixes=CHECK-ALL,CHECK-OPT +// +// CHECK-ALL: Running pass:{{.*}}CoroEarlyPass +// +// CHECK-ALL: Running pass: CoroSplitPass on (_Z3foov) +// CHECK-OPT: Running pass:{{.*}}CoroElidePass{{.*}} on {{.*}}_Z3foov{{.*}} +// +// CHECK-ALL: Running pass:{{.*}}CoroCleanupPass + +namespace std { +namespace experimental { + +struct handle {}; + +struct awaitable { + bool await_ready() noexcept { return false; } + void await_suspend(handle) noexcept {} + bool await_resume() noexcept { return true; } +}; + +template struct coroutine_handle { + static handle from_address(void *address) noexcept { return {}; } +}; + +template struct coroutine_traits { + struct promise_type { + awaitable initial_suspend() { return {}; } + awaitable final_suspend() noexcept { return {}; } + void return_void() {} + T get_return_object() { return T(); } + void unhandled_exception() {} + }; +}; +} // namespace experimental +} // namespace std + +void foo() { co_return; } diff --git a/clang/test/CodeGenCoroutines/coro-newpm-pipeline.cpp b/clang/test/CodeGenCoroutines/coro-newpm-pipeline.cpp index 869e98ecdb9ec..0aad12e5af4bf 100644 --- a/clang/test/CodeGenCoroutines/coro-newpm-pipeline.cpp +++ b/clang/test/CodeGenCoroutines/coro-newpm-pipeline.cpp @@ -2,10 +2,10 @@ // pipeline, at -O0 and above. // RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm-bc -o /dev/null \ -// RUN: -fexperimental-new-pass-manager -fdebug-pass-manager -fcoroutines-ts \ +// RUN: -fexperimental-new-pass-manager -fdebug-pass-manager -std=c++20 \ // RUN: -O0 %s 2>&1 | FileCheck %s --check-prefixes=CHECK-ALL // RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm-bc -o /dev/null \ -// RUN: -fexperimental-new-pass-manager -fdebug-pass-manager -fcoroutines-ts \ +// RUN: -fexperimental-new-pass-manager -fdebug-pass-manager -std=c++20 \ // RUN: -O1 %s 2>&1 | FileCheck %s --check-prefixes=CHECK-ALL,CHECK-OPT // // CHECK-ALL: Running pass:{{.*}}CoroEarlyPass @@ -16,7 +16,6 @@ // CHECK-ALL: Running pass:{{.*}}CoroCleanupPass namespace std { -namespace experimental { struct handle {}; @@ -39,7 +38,6 @@ template struct coroutine_traits { void unhandled_exception() {} }; }; -} // namespace experimental } // namespace std void foo() { co_return; } diff --git a/clang/test/CodeGenCoroutines/coro-params-exp-namespace.cpp b/clang/test/CodeGenCoroutines/coro-params-exp-namespace.cpp new file mode 100644 index 0000000000000..d8aa8472a83bf --- /dev/null +++ b/clang/test/CodeGenCoroutines/coro-params-exp-namespace.cpp @@ -0,0 +1,205 @@ +// Verifies that parameters are copied with move constructors +// Verifies that parameter copies are destroyed +// Vefifies that parameter copies are used in the body of the coroutine +// Verifies that parameter copies are used to construct the promise type, if that type has a matching constructor +// RUN: %clang_cc1 -std=c++1z -fcoroutines-ts -triple=x86_64-unknown-linux-gnu -emit-llvm -o - %s -disable-llvm-passes -fexceptions | FileCheck %s + +namespace std::experimental { +template struct coroutine_traits; + +template struct coroutine_handle { + coroutine_handle() = default; + static coroutine_handle from_address(void *) noexcept; +}; +template <> struct coroutine_handle { + static coroutine_handle from_address(void *) noexcept; + coroutine_handle() = default; + template + coroutine_handle(coroutine_handle) noexcept; +}; +} // namespace std::experimental + +struct suspend_always { + bool await_ready() noexcept; + void await_suspend(std::experimental::coroutine_handle<>) noexcept; + void await_resume() noexcept; +}; + +template struct std::experimental::coroutine_traits { + struct promise_type { + void get_return_object() noexcept; + suspend_always initial_suspend() noexcept; + suspend_always final_suspend() noexcept; + void return_void() noexcept; + promise_type(); + ~promise_type() noexcept; + void unhandled_exception() noexcept; + }; +}; + +// TODO: Not supported yet +struct CopyOnly { + int val; + CopyOnly(const CopyOnly &) noexcept; + CopyOnly(CopyOnly &&) = delete; + ~CopyOnly(); +}; + +struct MoveOnly { + int val; + MoveOnly(const MoveOnly &) = delete; + MoveOnly(MoveOnly &&) noexcept; + ~MoveOnly(); +}; + +struct MoveAndCopy { + int val; + MoveAndCopy(const MoveAndCopy &) noexcept; + MoveAndCopy(MoveAndCopy &&) noexcept; + ~MoveAndCopy(); +}; + +void consume(int, int, int) noexcept; + +// TODO: Add support for CopyOnly params +// CHECK: define{{.*}} void @_Z1fi8MoveOnly11MoveAndCopy(i32 %val, %struct.MoveOnly* %[[MoParam:.+]], %struct.MoveAndCopy* %[[McParam:.+]]) #0 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8* +void f(int val, MoveOnly moParam, MoveAndCopy mcParam) { + // CHECK: %[[MoCopy:.+]] = alloca %struct.MoveOnly + // CHECK: %[[McCopy:.+]] = alloca %struct.MoveAndCopy + // CHECK: store i32 %val, i32* %[[ValAddr:.+]] + + // CHECK: call i8* @llvm.coro.begin( + // CHECK: call void @_ZN8MoveOnlyC1EOS_(%struct.MoveOnly* {{[^,]*}} %[[MoCopy]], %struct.MoveOnly* nonnull align 4 dereferenceable(4) %[[MoParam]]) + // CHECK-NEXT: bitcast %struct.MoveAndCopy* %[[McCopy]] to i8* + // CHECK-NEXT: call void @llvm.lifetime.start.p0i8( + // CHECK-NEXT: call void @_ZN11MoveAndCopyC1EOS_(%struct.MoveAndCopy* {{[^,]*}} %[[McCopy]], %struct.MoveAndCopy* nonnull align 4 dereferenceable(4) %[[McParam]]) # + // CHECK-NEXT: bitcast %"struct.std::experimental::coroutine_traits::promise_type"* %__promise to i8* + // CHECK-NEXT: call void @llvm.lifetime.start.p0i8( + // CHECK-NEXT: invoke void @_ZNSt12experimental16coroutine_traitsIJvi8MoveOnly11MoveAndCopyEE12promise_typeC1Ev( + + // CHECK: call void @_ZN14suspend_always12await_resumeEv( + // CHECK: %[[IntParam:.+]] = load i32, i32* %{{.*}} + // CHECK: %[[MoGep:.+]] = getelementptr inbounds %struct.MoveOnly, %struct.MoveOnly* %[[MoCopy]], i32 0, i32 0 + // CHECK: %[[MoVal:.+]] = load i32, i32* %[[MoGep]] + // CHECK: %[[McGep:.+]] = getelementptr inbounds %struct.MoveAndCopy, %struct.MoveAndCopy* %[[McCopy]], i32 0, i32 0 + // CHECK: %[[McVal:.+]] = load i32, i32* %[[McGep]] + // CHECK: call void @_Z7consumeiii(i32 %[[IntParam]], i32 %[[MoVal]], i32 %[[McVal]]) + + consume(val, moParam.val, mcParam.val); + co_return; + + // Skip to final suspend: + // CHECK: call void @_ZNSt12experimental16coroutine_traitsIJvi8MoveOnly11MoveAndCopyEE12promise_type13final_suspendEv( + // CHECK: call void @_ZN14suspend_always12await_resumeEv( + + // Destroy promise, then parameter copies: + // CHECK: call void @_ZNSt12experimental16coroutine_traitsIJvi8MoveOnly11MoveAndCopyEE12promise_typeD1Ev(%"struct.std::experimental::coroutine_traits::promise_type"* {{[^,]*}} %__promise) + // CHECK-NEXT: bitcast %"struct.std::experimental::coroutine_traits::promise_type"* %__promise to i8* + // CHECK-NEXT: call void @llvm.lifetime.end.p0i8( + // CHECK-NEXT: call void @_ZN11MoveAndCopyD1Ev(%struct.MoveAndCopy* {{[^,]*}} %[[McCopy]]) + // CHECK-NEXT: bitcast %struct.MoveAndCopy* %[[McCopy]] to i8* + // CHECK-NEXT: call void @llvm.lifetime.end.p0i8( + // CHECK-NEXT: call void @_ZN8MoveOnlyD1Ev(%struct.MoveOnly* {{[^,]*}} %[[MoCopy]] + // CHECK-NEXT: bitcast %struct.MoveOnly* %[[MoCopy]] to i8* + // CHECK-NEXT: call void @llvm.lifetime.end.p0i8( + // CHECK-NEXT: bitcast i32* %{{.+}} to i8* + // CHECK-NEXT: call void @llvm.lifetime.end.p0i8( + // CHECK-NEXT: call i8* @llvm.coro.free( +} + +// CHECK-LABEL: void @_Z16dependent_paramsI1A1BEvT_T0_S3_(%struct.A* %x, %struct.B* %0, %struct.B* %y) +template +void dependent_params(T x, U, U y) { + // CHECK: %[[x_copy:.+]] = alloca %struct.A + // CHECK-NEXT: %[[unnamed_copy:.+]] = alloca %struct.B + // CHECK-NEXT: %[[y_copy:.+]] = alloca %struct.B + + // CHECK: call i8* @llvm.coro.begin + // CHECK-NEXT: bitcast %struct.A* %[[x_copy]] to i8* + // CHECK-NEXT: call void @llvm.lifetime.start.p0i8( + // CHECK-NEXT: call void @_ZN1AC1EOS_(%struct.A* {{[^,]*}} %[[x_copy]], %struct.A* nonnull align 4 dereferenceable(512) %x) + // CHECK-NEXT: bitcast %struct.B* %[[unnamed_copy]] to i8* + // CHECK-NEXT: call void @llvm.lifetime.start.p0i8( + // CHECK-NEXT: call void @_ZN1BC1EOS_(%struct.B* {{[^,]*}} %[[unnamed_copy]], %struct.B* nonnull align 4 dereferenceable(512) %0) + // CHECK-NEXT: bitcast %struct.B* %[[y_copy]] to i8* + // CHECK-NEXT: call void @llvm.lifetime.start.p0i8( + // CHECK-NEXT: call void @_ZN1BC1EOS_(%struct.B* {{[^,]*}} %[[y_copy]], %struct.B* nonnull align 4 dereferenceable(512) %y) + // CHECK-NEXT: bitcast %"struct.std::experimental::coroutine_traits::promise_type"* %__promise to i8* + // CHECK-NEXT: call void @llvm.lifetime.start.p0i8( + // CHECK-NEXT: invoke void @_ZNSt12experimental16coroutine_traitsIJv1A1BS2_EE12promise_typeC1Ev( + + co_return; +} + +struct A { + int WontFitIntoRegisterForSure[128]; + A(); + A(A &&) + noexcept; + ~A(); +}; + +struct B { + int WontFitIntoRegisterForSure[128]; + B(); + B(B &&) + noexcept; + ~B(); +}; + +void call_dependent_params() { + dependent_params(A{}, B{}, B{}); +} + +// Test that, when the promise type has a constructor whose signature matches +// that of the coroutine function, that constructor is used. This is an +// experimental feature that will be proposed for the Coroutines TS. + +struct promise_matching_constructor {}; + +template <> +struct std::experimental::coroutine_traits { + struct promise_type { + promise_type(promise_matching_constructor, int, float, double) {} + promise_type() = delete; + void get_return_object() {} + suspend_always initial_suspend() { return {}; } + suspend_always final_suspend() noexcept { return {}; } + void return_void() {} + void unhandled_exception() {} + }; +}; + +// CHECK-LABEL: void @_Z38coroutine_matching_promise_constructor28promise_matching_constructorifd(i32 %0, float %1, double %2) +void coroutine_matching_promise_constructor(promise_matching_constructor, int, float, double) { + // CHECK: %[[INT:.+]] = load i32, i32* %5, align 4 + // CHECK: %[[FLOAT:.+]] = load float, float* %6, align 4 + // CHECK: %[[DOUBLE:.+]] = load double, double* %7, align 8 + // CHECK: invoke void @_ZNSt12experimental16coroutine_traitsIJv28promise_matching_constructorifdEE12promise_typeC1ES1_ifd(%"struct.std::experimental::coroutine_traits::promise_type"* {{[^,]*}} %__promise, i32 %[[INT]], float %[[FLOAT]], double %[[DOUBLE]]) + co_return; +} + +struct some_class; + +struct method {}; + +template struct std::experimental::coroutine_traits { + struct promise_type { + promise_type(some_class &, float); + method get_return_object(); + suspend_always initial_suspend(); + suspend_always final_suspend() noexcept; + void return_void(); + void unhandled_exception(); + }; +}; + +struct some_class { + method good_coroutine_calls_custom_constructor(float); +}; + +// CHECK-LABEL: define{{.*}} void @_ZN10some_class39good_coroutine_calls_custom_constructorEf(%struct.some_class* +method some_class::good_coroutine_calls_custom_constructor(float) { + // CHECK: invoke void @_ZNSt12experimental16coroutine_traitsIJ6methodR10some_classfEE12promise_typeC1ES3_f(%"struct.std::experimental::coroutine_traits::promise_type"* {{[^,]*}} %__promise, %struct.some_class* nonnull align 1 dereferenceable(1) %{{.+}}, float + co_return; +} diff --git a/clang/test/CodeGenCoroutines/coro-params.cpp b/clang/test/CodeGenCoroutines/coro-params.cpp index 28753d524df28..b5f23245bd58d 100644 --- a/clang/test/CodeGenCoroutines/coro-params.cpp +++ b/clang/test/CodeGenCoroutines/coro-params.cpp @@ -2,9 +2,9 @@ // Verifies that parameter copies are destroyed // Vefifies that parameter copies are used in the body of the coroutine // Verifies that parameter copies are used to construct the promise type, if that type has a matching constructor -// RUN: %clang_cc1 -std=c++1z -fcoroutines-ts -triple=x86_64-unknown-linux-gnu -emit-llvm -o - %s -disable-llvm-passes -fexceptions | FileCheck %s +// RUN: %clang_cc1 -std=c++20 -triple=x86_64-unknown-linux-gnu -emit-llvm -o - %s -disable-llvm-passes -fexceptions | FileCheck %s -namespace std::experimental { +namespace std { template struct coroutine_traits; template struct coroutine_handle { @@ -17,15 +17,15 @@ template <> struct coroutine_handle { template coroutine_handle(coroutine_handle) noexcept; }; -} +} // namespace std struct suspend_always { bool await_ready() noexcept; - void await_suspend(std::experimental::coroutine_handle<>) noexcept; + void await_suspend(std::coroutine_handle<>) noexcept; void await_resume() noexcept; }; -template struct std::experimental::coroutine_traits { +template struct std::coroutine_traits { struct promise_type { void get_return_object() noexcept; suspend_always initial_suspend() noexcept; @@ -73,9 +73,9 @@ void f(int val, MoveOnly moParam, MoveAndCopy mcParam) { // CHECK-NEXT: bitcast %struct.MoveAndCopy* %[[McCopy]] to i8* // CHECK-NEXT: call void @llvm.lifetime.start.p0i8( // CHECK-NEXT: call void @_ZN11MoveAndCopyC1EOS_(%struct.MoveAndCopy* {{[^,]*}} %[[McCopy]], %struct.MoveAndCopy* nonnull align 4 dereferenceable(4) %[[McParam]]) # - // CHECK-NEXT: bitcast %"struct.std::experimental::coroutine_traits::promise_type"* %__promise to i8* + // CHECK-NEXT: bitcast %"struct.std::coroutine_traits::promise_type"* %__promise to i8* // CHECK-NEXT: call void @llvm.lifetime.start.p0i8( - // CHECK-NEXT: invoke void @_ZNSt12experimental16coroutine_traitsIJvi8MoveOnly11MoveAndCopyEE12promise_typeC1Ev( + // CHECK-NEXT: invoke void @_ZNSt16coroutine_traitsIJvi8MoveOnly11MoveAndCopyEE12promise_typeC1Ev( // CHECK: call void @_ZN14suspend_always12await_resumeEv( // CHECK: %[[IntParam:.+]] = load i32, i32* %{{.*}} @@ -89,12 +89,12 @@ void f(int val, MoveOnly moParam, MoveAndCopy mcParam) { co_return; // Skip to final suspend: - // CHECK: call void @_ZNSt12experimental16coroutine_traitsIJvi8MoveOnly11MoveAndCopyEE12promise_type13final_suspendEv( + // CHECK: call void @_ZNSt16coroutine_traitsIJvi8MoveOnly11MoveAndCopyEE12promise_type13final_suspendEv( // CHECK: call void @_ZN14suspend_always12await_resumeEv( // Destroy promise, then parameter copies: - // CHECK: call void @_ZNSt12experimental16coroutine_traitsIJvi8MoveOnly11MoveAndCopyEE12promise_typeD1Ev(%"struct.std::experimental::coroutine_traits::promise_type"* {{[^,]*}} %__promise) - // CHECK-NEXT: bitcast %"struct.std::experimental::coroutine_traits::promise_type"* %__promise to i8* + // CHECK: call void @_ZNSt16coroutine_traitsIJvi8MoveOnly11MoveAndCopyEE12promise_typeD1Ev(%"struct.std::coroutine_traits::promise_type"* {{[^,]*}} %__promise) + // CHECK-NEXT: bitcast %"struct.std::coroutine_traits::promise_type"* %__promise to i8* // CHECK-NEXT: call void @llvm.lifetime.end.p0i8( // CHECK-NEXT: call void @_ZN11MoveAndCopyD1Ev(%struct.MoveAndCopy* {{[^,]*}} %[[McCopy]]) // CHECK-NEXT: bitcast %struct.MoveAndCopy* %[[McCopy]] to i8* @@ -124,9 +124,9 @@ void dependent_params(T x, U, U y) { // CHECK-NEXT: bitcast %struct.B* %[[y_copy]] to i8* // CHECK-NEXT: call void @llvm.lifetime.start.p0i8( // CHECK-NEXT: call void @_ZN1BC1EOS_(%struct.B* {{[^,]*}} %[[y_copy]], %struct.B* nonnull align 4 dereferenceable(512) %y) - // CHECK-NEXT: bitcast %"struct.std::experimental::coroutine_traits::promise_type"* %__promise to i8* + // CHECK-NEXT: bitcast %"struct.std::coroutine_traits::promise_type"* %__promise to i8* // CHECK-NEXT: call void @llvm.lifetime.start.p0i8( - // CHECK-NEXT: invoke void @_ZNSt12experimental16coroutine_traitsIJv1A1BS2_EE12promise_typeC1Ev( + // CHECK-NEXT: invoke void @_ZNSt16coroutine_traitsIJv1A1BS1_EE12promise_typeC1Ev( co_return; } @@ -155,8 +155,8 @@ void call_dependent_params() { struct promise_matching_constructor {}; -template<> -struct std::experimental::coroutine_traits { +template <> +struct std::coroutine_traits { struct promise_type { promise_type(promise_matching_constructor, int, float, double) {} promise_type() = delete; @@ -173,7 +173,7 @@ void coroutine_matching_promise_constructor(promise_matching_constructor, int, f // CHECK: %[[INT:.+]] = load i32, i32* %5, align 4 // CHECK: %[[FLOAT:.+]] = load float, float* %6, align 4 // CHECK: %[[DOUBLE:.+]] = load double, double* %7, align 8 - // CHECK: invoke void @_ZNSt12experimental16coroutine_traitsIJv28promise_matching_constructorifdEE12promise_typeC1ES1_ifd(%"struct.std::experimental::coroutine_traits::promise_type"* {{[^,]*}} %__promise, i32 %[[INT]], float %[[FLOAT]], double %[[DOUBLE]]) + // CHECK: invoke void @_ZNSt16coroutine_traitsIJv28promise_matching_constructorifdEE12promise_typeC1ES0_ifd(%"struct.std::coroutine_traits::promise_type"* {{[^,]*}} %__promise, i32 %[[INT]], float %[[FLOAT]], double %[[DOUBLE]]) co_return; } @@ -181,7 +181,7 @@ struct some_class; struct method {}; -template struct std::experimental::coroutine_traits { +template struct std::coroutine_traits { struct promise_type { promise_type(some_class&, float); method get_return_object(); @@ -198,6 +198,6 @@ struct some_class { // CHECK-LABEL: define{{.*}} void @_ZN10some_class39good_coroutine_calls_custom_constructorEf(%struct.some_class* method some_class::good_coroutine_calls_custom_constructor(float) { - // CHECK: invoke void @_ZNSt12experimental16coroutine_traitsIJ6methodR10some_classfEE12promise_typeC1ES3_f(%"struct.std::experimental::coroutine_traits::promise_type"* {{[^,]*}} %__promise, %struct.some_class* nonnull align 1 dereferenceable(1) %{{.+}}, float + // CHECK: invoke void @_ZNSt16coroutine_traitsIJ6methodR10some_classfEE12promise_typeC1ES2_f(%"struct.std::coroutine_traits::promise_type"* {{[^,]*}} %__promise, %struct.some_class* nonnull align 1 dereferenceable(1) %{{.+}}, float co_return; } diff --git a/clang/test/CodeGenCoroutines/coro-promise-dtor-exp-namespace.cpp b/clang/test/CodeGenCoroutines/coro-promise-dtor-exp-namespace.cpp new file mode 100644 index 0000000000000..72831a77d3e0a --- /dev/null +++ b/clang/test/CodeGenCoroutines/coro-promise-dtor-exp-namespace.cpp @@ -0,0 +1,49 @@ +// RUN: %clang_cc1 -std=c++14 -fcoroutines-ts -triple=x86_64-pc-windows-msvc18.0.0 -emit-llvm -o - %s -fexceptions -fcxx-exceptions -disable-llvm-passes | FileCheck %s +// -triple=x86_64-unknown-linux-gnu + +#include "Inputs/coroutine-exp-namespace.h" + +namespace coro = std::experimental::coroutines_v1; + +struct coro_t { + void *p; + ~coro_t(); + struct promise_type { + coro_t get_return_object(); + coro::suspend_never initial_suspend(); + coro::suspend_never final_suspend() noexcept; + void return_void(); + promise_type(); + ~promise_type(); + void unhandled_exception(); + }; +}; + +struct Cleanup { + ~Cleanup(); +}; +void may_throw(); + +coro_t f() { + Cleanup cleanup; + may_throw(); + co_return; +} + +// CHECK-LABEL: define dso_local void @"?f@@YA?AUcoro_t@@XZ"( +// CHECK: %gro.active = alloca i1 +// CHECK: store i1 false, i1* %gro.active + +// CHECK: invoke %"struct.coro_t::promise_type"* @"??0promise_type@coro_t@@QEAA@XZ"( +// CHECK: invoke void @"?get_return_object@promise_type@coro_t@@QEAA?AU2@XZ"( +// CHECK: store i1 true, i1* %gro.active + +// CHECK: %[[IS_ACTIVE:.+]] = load i1, i1* %gro.active +// CHECK: br i1 %[[IS_ACTIVE]], label %[[CLEANUP1:.+]], label + +// CHECK: [[CLEANUP1]]: +// CHECK: %[[NRVO:.+]] = load i1, i1* %nrvo +// CHECK: br i1 %[[NRVO]], label %{{.+}}, label %[[DTOR:.+]] + +// CHECK: [[DTOR]]: +// CHECK: call void @"??1coro_t@@QEAA@XZ"( diff --git a/clang/test/CodeGenCoroutines/coro-promise-dtor.cpp b/clang/test/CodeGenCoroutines/coro-promise-dtor.cpp index 4fb0f0fef7940..41ffd1a470979 100644 --- a/clang/test/CodeGenCoroutines/coro-promise-dtor.cpp +++ b/clang/test/CodeGenCoroutines/coro-promise-dtor.cpp @@ -1,17 +1,15 @@ -// RUN: %clang_cc1 -std=c++14 -fcoroutines-ts -triple=x86_64-pc-windows-msvc18.0.0 -emit-llvm -o - %s -fexceptions -fcxx-exceptions -disable-llvm-passes | FileCheck %s +// RUN: %clang_cc1 -std=c++20 -triple=x86_64-pc-windows-msvc18.0.0 -emit-llvm -o - %s -fexceptions -fcxx-exceptions -disable-llvm-passes | FileCheck %s // -triple=x86_64-unknown-linux-gnu #include "Inputs/coroutine.h" -namespace coro = std::experimental::coroutines_v1; - struct coro_t { void* p; ~coro_t(); struct promise_type { coro_t get_return_object(); - coro::suspend_never initial_suspend(); - coro::suspend_never final_suspend() noexcept; + std::suspend_never initial_suspend(); + std::suspend_never final_suspend() noexcept; void return_void(); promise_type(); ~promise_type(); diff --git a/clang/test/CodeGenCoroutines/coro-ret-void-exp-namespace.cpp b/clang/test/CodeGenCoroutines/coro-ret-void-exp-namespace.cpp new file mode 100644 index 0000000000000..7438e7f7019e1 --- /dev/null +++ b/clang/test/CodeGenCoroutines/coro-ret-void-exp-namespace.cpp @@ -0,0 +1,53 @@ +// RUN: %clang_cc1 -std=c++14 -fcoroutines-ts -triple=x86_64-unknown-linux-gnu -emit-llvm %s -o - -disable-llvm-passes | FileCheck %s + +#include "Inputs/coroutine-exp-namespace.h" + +namespace coro = std::experimental::coroutines_v1; + +struct coro1 { + struct promise_type { + coro1 get_return_object(); + coro::suspend_never initial_suspend(); + coro::suspend_never final_suspend() noexcept; + void return_void(); + }; +}; + +coro1 f() { + co_await coro::suspend_never{}; +} + +// CHECK-LABEL: define{{.*}} void @_Z1fv( +// CHECK: call void @_ZNSt12experimental13coroutines_v113suspend_never12await_resumeEv(%"struct.std::experimental::coroutines_v1::suspend_never"* +// CHECK: call void @_ZN5coro112promise_type11return_voidEv(%"struct.coro1::promise_type"* {{[^,]*}} %__promise) + +struct A { + A(); + ~A(); +}; + +coro1 f2() { + co_return(void) A{}; +} + +// CHECK-LABEL: define{{.*}} void @_Z2f2v( +// CHECK: call void @_ZN1AC1Ev(%struct.A* {{[^,]*}} %[[AVar:.*]]) +// CHECK-NEXT: call void @_ZN1AD1Ev(%struct.A* {{[^,]*}} %[[AVar]]) +// CHECK-NEXT: call void @_ZN5coro112promise_type11return_voidEv(%"struct.coro1::promise_type"* + +struct coro2 { + struct promise_type { + coro2 get_return_object(); + coro::suspend_never initial_suspend(); + coro::suspend_never final_suspend() noexcept; + void return_value(int); + }; +}; + +coro2 g() { + co_return 42; +} + +// CHECK-LABEL: define{{.*}} void @_Z1gv( +// CHECK: call void @_ZNSt12experimental13coroutines_v113suspend_never12await_resumeEv(%"struct.std::experimental::coroutines_v1::suspend_never"* +// CHECK: call void @_ZN5coro212promise_type12return_valueEi(%"struct.coro2::promise_type"* {{[^,]*}} %__promise, i32 42) diff --git a/clang/test/CodeGenCoroutines/coro-ret-void.cpp b/clang/test/CodeGenCoroutines/coro-ret-void.cpp index 1ef2950dd020b..53b200df127b1 100644 --- a/clang/test/CodeGenCoroutines/coro-ret-void.cpp +++ b/clang/test/CodeGenCoroutines/coro-ret-void.cpp @@ -1,24 +1,22 @@ -// RUN: %clang_cc1 -std=c++14 -fcoroutines-ts -triple=x86_64-unknown-linux-gnu -emit-llvm %s -o - -disable-llvm-passes | FileCheck %s +// RUN: %clang_cc1 -std=c++20 -triple=x86_64-unknown-linux-gnu -emit-llvm %s -o - -disable-llvm-passes | FileCheck %s #include "Inputs/coroutine.h" -namespace coro = std::experimental::coroutines_v1; - struct coro1 { struct promise_type { coro1 get_return_object(); - coro::suspend_never initial_suspend(); - coro::suspend_never final_suspend() noexcept; + std::suspend_never initial_suspend(); + std::suspend_never final_suspend() noexcept; void return_void(); }; }; coro1 f() { - co_await coro::suspend_never{}; + co_await std::suspend_never{}; } // CHECK-LABEL: define{{.*}} void @_Z1fv( -// CHECK: call void @_ZNSt12experimental13coroutines_v113suspend_never12await_resumeEv(%"struct.std::experimental::coroutines_v1::suspend_never"* +// CHECK: call void @_ZNSt13suspend_never12await_resumeEv(%"struct.std::suspend_never"* // CHECK: call void @_ZN5coro112promise_type11return_voidEv(%"struct.coro1::promise_type"* {{[^,]*}} %__promise) struct A { @@ -38,8 +36,8 @@ coro1 f2() { struct coro2 { struct promise_type { coro2 get_return_object(); - coro::suspend_never initial_suspend(); - coro::suspend_never final_suspend() noexcept; + std::suspend_never initial_suspend(); + std::suspend_never final_suspend() noexcept; void return_value(int); }; }; @@ -49,5 +47,5 @@ coro2 g() { } // CHECK-LABEL: define{{.*}} void @_Z1gv( -// CHECK: call void @_ZNSt12experimental13coroutines_v113suspend_never12await_resumeEv(%"struct.std::experimental::coroutines_v1::suspend_never"* -// CHECK: call void @_ZN5coro212promise_type12return_valueEi(%"struct.coro2::promise_type"* {{[^,]*}} %__promise, i32 42) +// CHECK: call void @_ZNSt13suspend_never12await_resumeEv(%"struct.std::suspend_never"* +// CHECK: call void @_ZN5coro212promise_type12return_valueEi(%"struct.coro2::promise_type"* {{[^,]*}} %__promise, i32 42) \ No newline at end of file diff --git a/clang/test/CodeGenCoroutines/coro-return-exp-namespace.cpp b/clang/test/CodeGenCoroutines/coro-return-exp-namespace.cpp new file mode 100644 index 0000000000000..5a5b56f327d1c --- /dev/null +++ b/clang/test/CodeGenCoroutines/coro-return-exp-namespace.cpp @@ -0,0 +1,59 @@ +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fcoroutines-ts -std=c++1z -emit-llvm %s -o - -disable-llvm-passes | FileCheck %s + +namespace std::experimental { +template struct coroutine_traits; + +template struct coroutine_handle { + coroutine_handle() = default; + static coroutine_handle from_address(void *) noexcept { return {}; } +}; +template <> struct coroutine_handle { + static coroutine_handle from_address(void *) { return {}; } + coroutine_handle() = default; + template + coroutine_handle(coroutine_handle) noexcept {} +}; +} // namespace std::experimental + +struct suspend_always { + bool await_ready() noexcept; + void await_suspend(std::experimental::coroutine_handle<>) noexcept; + void await_resume() noexcept; +}; + +template <> struct std::experimental::coroutine_traits { + struct promise_type { + void get_return_object(); + suspend_always initial_suspend(); + suspend_always final_suspend() noexcept; + void return_void(); + }; +}; + +// CHECK-LABEL: f0( +extern "C" void f0() { + // CHECK: %__promise = alloca %"struct.std::experimental::coroutine_traits::promise_type" + // CHECK: %call = call noalias nonnull i8* @_Znwm( + // CHECK: call void @_ZNSt12experimental16coroutine_traitsIJvEE12promise_type11return_voidEv(%"struct.std::experimental::coroutine_traits::promise_type"* {{[^,]*}} %__promise) + // CHECK: call void @_ZdlPv + co_return; +} + +template <> +struct std::experimental::coroutine_traits { + struct promise_type { + int get_return_object(); + suspend_always initial_suspend(); + suspend_always final_suspend() noexcept; + void return_value(int); + }; +}; + +// CHECK-LABEL: f1( +extern "C" int f1() { + // CHECK: %__promise = alloca %"struct.std::experimental::coroutine_traits::promise_type" + // CHECK: %call = call noalias nonnull i8* @_Znwm( + // CHECK: call void @_ZNSt12experimental16coroutine_traitsIJiEE12promise_type12return_valueEi(%"struct.std::experimental::coroutine_traits::promise_type"* {{[^,]*}} %__promise, i32 42) + // CHECK: call void @_ZdlPv + co_return 42; +} diff --git a/clang/test/CodeGenCoroutines/coro-return-voidtype-initlist-exp-namespace.cpp b/clang/test/CodeGenCoroutines/coro-return-voidtype-initlist-exp-namespace.cpp new file mode 100644 index 0000000000000..4057358ef7aba --- /dev/null +++ b/clang/test/CodeGenCoroutines/coro-return-voidtype-initlist-exp-namespace.cpp @@ -0,0 +1,81 @@ +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fcoroutines-ts -std=c++1z -emit-llvm %s -o - -disable-llvm-passes | FileCheck %s + +namespace std { +template +struct b { b(int, a); }; +template +struct c {}; +namespace experimental { +template +struct coroutine_traits : d {}; +template +struct coroutine_handle; +template <> +struct coroutine_handle<> {}; +template +struct coroutine_handle : coroutine_handle<> { + static coroutine_handle from_address(void *) noexcept; +}; +struct e { + int await_ready(); + void await_suspend(coroutine_handle<>); + void await_resume(); +}; +} // namespace experimental +} // namespace std +template +auto ah(ag) { return ag().ah(0); } +template +struct f; +struct g { + struct h { + int await_ready() noexcept; + template + void await_suspend(std::experimental::coroutine_handle) noexcept; + void await_resume() noexcept; + }; + std::experimental::e initial_suspend(); + h final_suspend() noexcept; + template + auto await_transform(ag) { return ah(ag()); } +}; +struct j : g { + f>> get_return_object(); + void return_value(std::b>); + void unhandled_exception(); +}; +struct k { + k(std::experimental::coroutine_handle<>); + int await_ready(); +}; +template +struct f { + using promise_type = j; + std::experimental::coroutine_handle<> ar; + struct l : k { + using at = k; + l(std::experimental::coroutine_handle<> m) : at(m) {} + void await_suspend(std::experimental::coroutine_handle<>); + }; + struct n : l { + n(std::experimental::coroutine_handle<> m) : l(m) {} + am await_resume(); + }; + auto ah(int) { return n(ar); } +}; +template +auto ax(std::c, aw) -> f>; +template +struct J { static f>> bo(); }; +// CHECK-LABEL: _ZN1JIiE2boEv( +template +f>> J::bo() { + std::c bu; + int bw(0); + // CHECK: void @_ZN1j12return_valueESt1bISt1cIiiEE(%struct.j* {{[^,]*}} %__promise) + co_return {0, co_await ax(bu, bw)}; +} +void bh() { + auto cn = [] { J::bo; }; + cn(); +} diff --git a/clang/test/CodeGenCoroutines/coro-return-voidtype-initlist.cpp b/clang/test/CodeGenCoroutines/coro-return-voidtype-initlist.cpp index 5f638644291ec..4058afa93cdf6 100644 --- a/clang/test/CodeGenCoroutines/coro-return-voidtype-initlist.cpp +++ b/clang/test/CodeGenCoroutines/coro-return-voidtype-initlist.cpp @@ -1,11 +1,10 @@ -// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fcoroutines-ts -std=c++1z -emit-llvm %s -o - -disable-llvm-passes | FileCheck %s +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -std=c++20 -emit-llvm %s -o - -disable-llvm-passes | FileCheck %s namespace std { template struct b { b(int, a); }; template struct c {}; -namespace experimental { template struct coroutine_traits : d {}; template @@ -21,7 +20,6 @@ struct e { void await_suspend(coroutine_handle<>); void await_resume(); }; -} // namespace experimental } // namespace std template auto ah(ag) { return ag().ah(0); } @@ -31,10 +29,10 @@ struct g { struct h { int await_ready() noexcept; template - void await_suspend(std::experimental::coroutine_handle) noexcept; + void await_suspend(std::coroutine_handle) noexcept; void await_resume() noexcept; }; - std::experimental::e initial_suspend(); + std::e initial_suspend(); h final_suspend() noexcept; template auto await_transform(ag) { return ah(ag()); } @@ -45,20 +43,20 @@ struct j : g { void unhandled_exception(); }; struct k { - k(std::experimental::coroutine_handle<>); + k(std::coroutine_handle<>); int await_ready(); }; template struct f { using promise_type = j; - std::experimental::coroutine_handle<> ar; + std::coroutine_handle<> ar; struct l : k { using at = k; - l(std::experimental::coroutine_handle<> m) : at(m) {} - void await_suspend(std::experimental::coroutine_handle<>); + l(std::coroutine_handle<> m) : at(m) {} + void await_suspend(std::coroutine_handle<>); }; struct n : l { - n(std::experimental::coroutine_handle<> m) : l(m) {} + n(std::coroutine_handle<> m) : l(m) {} am await_resume(); }; auto ah(int) { return n(ar); } diff --git a/clang/test/CodeGenCoroutines/coro-return.cpp b/clang/test/CodeGenCoroutines/coro-return.cpp index d35954ed7a861..0ce426ad59cf6 100644 --- a/clang/test/CodeGenCoroutines/coro-return.cpp +++ b/clang/test/CodeGenCoroutines/coro-return.cpp @@ -1,6 +1,6 @@ -// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fcoroutines-ts -std=c++1z -emit-llvm %s -o - -disable-llvm-passes | FileCheck %s +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -std=c++20 -emit-llvm %s -o - -disable-llvm-passes | FileCheck %s -namespace std::experimental { +namespace std { template struct coroutine_traits; template struct coroutine_handle { @@ -13,15 +13,15 @@ template <> struct coroutine_handle { template coroutine_handle(coroutine_handle) noexcept {} }; -} +} // namespace std struct suspend_always { bool await_ready() noexcept; - void await_suspend(std::experimental::coroutine_handle<>) noexcept; + void await_suspend(std::coroutine_handle<>) noexcept; void await_resume() noexcept; }; -template <> struct std::experimental::coroutine_traits { +template <> struct std::coroutine_traits { struct promise_type { void get_return_object(); suspend_always initial_suspend(); @@ -32,15 +32,15 @@ template <> struct std::experimental::coroutine_traits { // CHECK-LABEL: f0( extern "C" void f0() { - // CHECK: %__promise = alloca %"struct.std::experimental::coroutine_traits::promise_type" + // CHECK: %__promise = alloca %"struct.std::coroutine_traits::promise_type" // CHECK: %call = call noalias nonnull i8* @_Znwm( - // CHECK: call void @_ZNSt12experimental16coroutine_traitsIJvEE12promise_type11return_voidEv(%"struct.std::experimental::coroutine_traits::promise_type"* {{[^,]*}} %__promise) + // CHECK: call void @_ZNSt16coroutine_traitsIJvEE12promise_type11return_voidEv(%"struct.std::coroutine_traits::promise_type"* {{[^,]*}} %__promise) // CHECK: call void @_ZdlPv co_return; } -template<> -struct std::experimental::coroutine_traits { +template <> +struct std::coroutine_traits { struct promise_type { int get_return_object(); suspend_always initial_suspend(); @@ -51,9 +51,9 @@ struct std::experimental::coroutine_traits { // CHECK-LABEL: f1( extern "C" int f1() { - // CHECK: %__promise = alloca %"struct.std::experimental::coroutine_traits::promise_type" + // CHECK: %__promise = alloca %"struct.std::coroutine_traits::promise_type" // CHECK: %call = call noalias nonnull i8* @_Znwm( - // CHECK: call void @_ZNSt12experimental16coroutine_traitsIJiEE12promise_type12return_valueEi(%"struct.std::experimental::coroutine_traits::promise_type"* {{[^,]*}} %__promise, i32 42) + // CHECK: call void @_ZNSt16coroutine_traitsIJiEE12promise_type12return_valueEi(%"struct.std::coroutine_traits::promise_type"* {{[^,]*}} %__promise, i32 42) // CHECK: call void @_ZdlPv co_return 42; } diff --git a/clang/test/CodeGenCoroutines/coro-symmetric-transfer-01-exp-namespace.cpp b/clang/test/CodeGenCoroutines/coro-symmetric-transfer-01-exp-namespace.cpp new file mode 100644 index 0000000000000..1b6f8ad2a1615 --- /dev/null +++ b/clang/test/CodeGenCoroutines/coro-symmetric-transfer-01-exp-namespace.cpp @@ -0,0 +1,63 @@ +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fcoroutines-ts -std=c++14 -O0 -emit-llvm %s -o - -disable-llvm-passes | FileCheck %s +// RUN: %clang -fcoroutines-ts -std=c++14 -O0 -emit-llvm -c %s -o %t -Xclang -disable-llvm-passes && %clang -c %t + +#include "Inputs/coroutine-exp-namespace.h" + +namespace coro = std::experimental::coroutines_v1; + +struct detached_task { + struct promise_type { + detached_task get_return_object() noexcept { + return detached_task{coro::coroutine_handle::from_promise(*this)}; + } + + void return_void() noexcept {} + + struct final_awaiter { + bool await_ready() noexcept { return false; } + coro::coroutine_handle<> await_suspend(coro::coroutine_handle h) noexcept { + h.destroy(); + return {}; + } + void await_resume() noexcept {} + }; + + void unhandled_exception() noexcept {} + + final_awaiter final_suspend() noexcept { return {}; } + + coro::suspend_always initial_suspend() noexcept { return {}; } + }; + + ~detached_task() { + if (coro_) { + coro_.destroy(); + coro_ = {}; + } + } + + void start() && { + auto tmp = coro_; + coro_ = {}; + tmp.resume(); + } + + coro::coroutine_handle coro_; +}; + +detached_task foo() { + co_return; +} + +// check that the lifetime of the coroutine handle used to obtain the address is contained within single basic block, and hence does not live across suspension points. +// CHECK-LABEL: final.suspend: +// CHECK: %{{.+}} = call token @llvm.coro.save(i8* null) +// CHECK: %[[HDL_CAST1:.+]] = bitcast %"struct.std::experimental::coroutines_v1::coroutine_handle.0"* %[[HDL:.+]] to i8* +// CHECK: call void @llvm.lifetime.start.p0i8(i64 8, i8* %[[HDL_CAST1]]) +// CHECK: %[[CALL:.+]] = call i8* @_ZN13detached_task12promise_type13final_awaiter13await_suspendENSt12experimental13coroutines_v116coroutine_handleIS0_EE( +// CHECK: %[[HDL_CAST2:.+]] = getelementptr inbounds %"struct.std::experimental::coroutines_v1::coroutine_handle.0", %"struct.std::experimental::coroutines_v1::coroutine_handle.0"* %[[HDL]], i32 0, i32 0 +// CHECK: store i8* %[[CALL]], i8** %[[HDL_CAST2]], align 8 +// CHECK: %[[HDL_TRANSFER:.+]] = call i8* @_ZNKSt12experimental13coroutines_v116coroutine_handleIvE7addressEv(%"struct.std::experimental::coroutines_v1::coroutine_handle.0"* nonnull align 8 dereferenceable(8) %[[HDL]]) +// CHECK: %[[HDL_CAST3:.+]] = bitcast %"struct.std::experimental::coroutines_v1::coroutine_handle.0"* %[[HDL]] to i8* +// CHECK: call void @llvm.lifetime.end.p0i8(i64 8, i8* %[[HDL_CAST3]]) +// CHECK: call void @llvm.coro.resume(i8* %[[HDL_TRANSFER]]) diff --git a/clang/test/CodeGenCoroutines/coro-symmetric-transfer-01.cpp b/clang/test/CodeGenCoroutines/coro-symmetric-transfer-01.cpp index e9f09c8da038b..b5a1bb05343bc 100644 --- a/clang/test/CodeGenCoroutines/coro-symmetric-transfer-01.cpp +++ b/clang/test/CodeGenCoroutines/coro-symmetric-transfer-01.cpp @@ -1,21 +1,19 @@ -// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fcoroutines-ts -std=c++14 -O0 -emit-llvm %s -o - -disable-llvm-passes | FileCheck %s -// RUN: %clang -fcoroutines-ts -std=c++14 -O0 -emit-llvm -c %s -o %t -Xclang -disable-llvm-passes && %clang -c %t +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -std=c++20 -O0 -emit-llvm %s -o - -disable-llvm-passes | FileCheck %s +// RUN: %clang -std=c++20 -O0 -emit-llvm -c %s -o %t -Xclang -disable-llvm-passes && %clang -c %t #include "Inputs/coroutine.h" -namespace coro = std::experimental::coroutines_v1; - struct detached_task { struct promise_type { detached_task get_return_object() noexcept { - return detached_task{coro::coroutine_handle::from_promise(*this)}; + return detached_task{std::coroutine_handle::from_promise(*this)}; } void return_void() noexcept {} struct final_awaiter { bool await_ready() noexcept { return false; } - coro::coroutine_handle<> await_suspend(coro::coroutine_handle h) noexcept { + std::coroutine_handle<> await_suspend(std::coroutine_handle h) noexcept { h.destroy(); return {}; } @@ -26,7 +24,7 @@ struct detached_task { final_awaiter final_suspend() noexcept { return {}; } - coro::suspend_always initial_suspend() noexcept { return {}; } + std::suspend_always initial_suspend() noexcept { return {}; } }; ~detached_task() { @@ -42,7 +40,7 @@ struct detached_task { tmp.resume(); } - coro::coroutine_handle coro_; + std::coroutine_handle coro_; }; detached_task foo() { @@ -52,12 +50,12 @@ detached_task foo() { // check that the lifetime of the coroutine handle used to obtain the address is contained within single basic block, and hence does not live across suspension points. // CHECK-LABEL: final.suspend: // CHECK: %{{.+}} = call token @llvm.coro.save(i8* null) -// CHECK: %[[HDL_CAST1:.+]] = bitcast %"struct.std::experimental::coroutines_v1::coroutine_handle.0"* %[[HDL:.+]] to i8* +// CHECK: %[[HDL_CAST1:.+]] = bitcast %"struct.std::coroutine_handle.0"* %[[HDL:.+]] to i8* // CHECK: call void @llvm.lifetime.start.p0i8(i64 8, i8* %[[HDL_CAST1]]) -// CHECK: %[[CALL:.+]] = call i8* @_ZN13detached_task12promise_type13final_awaiter13await_suspendENSt12experimental13coroutines_v116coroutine_handleIS0_EE( -// CHECK: %[[HDL_CAST2:.+]] = getelementptr inbounds %"struct.std::experimental::coroutines_v1::coroutine_handle.0", %"struct.std::experimental::coroutines_v1::coroutine_handle.0"* %[[HDL]], i32 0, i32 0 +// CHECK: %[[CALL:.+]] = call i8* @_ZN13detached_task12promise_type13final_awaiter13await_suspendESt16coroutine_handleIS0_E( +// CHECK: %[[HDL_CAST2:.+]] = getelementptr inbounds %"struct.std::coroutine_handle.0", %"struct.std::coroutine_handle.0"* %[[HDL]], i32 0, i32 0 // CHECK: store i8* %[[CALL]], i8** %[[HDL_CAST2]], align 8 -// CHECK: %[[HDL_TRANSFER:.+]] = call i8* @_ZNKSt12experimental13coroutines_v116coroutine_handleIvE7addressEv(%"struct.std::experimental::coroutines_v1::coroutine_handle.0"* nonnull align 8 dereferenceable(8) %[[HDL]]) -// CHECK: %[[HDL_CAST3:.+]] = bitcast %"struct.std::experimental::coroutines_v1::coroutine_handle.0"* %[[HDL]] to i8* +// CHECK: %[[HDL_TRANSFER:.+]] = call i8* @_ZNKSt16coroutine_handleIvE7addressEv(%"struct.std::coroutine_handle.0"* {{.*}}%[[HDL]]) +// CHECK: %[[HDL_CAST3:.+]] = bitcast %"struct.std::coroutine_handle.0"* %[[HDL]] to i8* // CHECK: call void @llvm.lifetime.end.p0i8(i64 8, i8* %[[HDL_CAST3]]) // CHECK: call void @llvm.coro.resume(i8* %[[HDL_TRANSFER]]) diff --git a/clang/test/CodeGenCoroutines/coro-symmetric-transfer-02-exp-namespace.cpp b/clang/test/CodeGenCoroutines/coro-symmetric-transfer-02-exp-namespace.cpp new file mode 100644 index 0000000000000..c3e745d659d4e --- /dev/null +++ b/clang/test/CodeGenCoroutines/coro-symmetric-transfer-02-exp-namespace.cpp @@ -0,0 +1,126 @@ +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fcoroutines-ts -std=c++14 -O1 -emit-llvm %s -o - -disable-llvm-passes | FileCheck %s + +#include "Inputs/coroutine-exp-namespace.h" + +namespace coro = std::experimental::coroutines_v1; + +struct Task { + struct promise_type { + Task get_return_object() noexcept { + return Task{coro::coroutine_handle::from_promise(*this)}; + } + + void return_void() noexcept {} + + struct final_awaiter { + bool await_ready() noexcept { return false; } + coro::coroutine_handle<> await_suspend(coro::coroutine_handle h) noexcept { + h.destroy(); + return {}; + } + void await_resume() noexcept {} + }; + + void unhandled_exception() noexcept {} + + final_awaiter final_suspend() noexcept { return {}; } + + coro::suspend_always initial_suspend() noexcept { return {}; } + + template + auto await_transform(Awaitable &&awaitable) { + return awaitable.co_viaIfAsync(); + } + }; + + using handle_t = coro::coroutine_handle; + + class Awaiter { + public: + explicit Awaiter(handle_t coro) noexcept; + Awaiter(Awaiter &&other) noexcept; + Awaiter(const Awaiter &) = delete; + ~Awaiter(); + + bool await_ready() noexcept { return false; } + handle_t await_suspend(coro::coroutine_handle<> continuation) noexcept; + void await_resume(); + + private: + handle_t coro_; + }; + + Task(handle_t coro) noexcept : coro_(coro) {} + + handle_t coro_; + + Task(const Task &t) = delete; + Task(Task &&t) noexcept; + ~Task(); + Task &operator=(Task t) noexcept; + + Awaiter co_viaIfAsync(); +}; + +static Task foo() { + co_return; +} + +Task bar() { + auto mode = 2; + switch (mode) { + case 1: + co_await foo(); + break; + case 2: + co_await foo(); + break; + default: + break; + } +} + +// CHECK-LABEL: define{{.*}} void @_Z3barv +// CHECK: %[[MODE:.+]] = load i32, i32* %mode +// CHECK-NEXT: switch i32 %[[MODE]], label %{{.+}} [ +// CHECK-NEXT: i32 1, label %[[CASE1:.+]] +// CHECK-NEXT: i32 2, label %[[CASE2:.+]] +// CHECK-NEXT: ] + +// CHECK: [[CASE1]]: +// CHECK: br i1 %{{.+}}, label %[[CASE1_AWAIT_READY:.+]], label %[[CASE1_AWAIT_SUSPEND:.+]] +// CHECK: [[CASE1_AWAIT_SUSPEND]]: +// CHECK-NEXT: %{{.+}} = call token @llvm.coro.save(i8* null) +// CHECK-NEXT: %[[HANDLE11:.+]] = bitcast %"struct.std::experimental::coroutines_v1::coroutine_handle"* %[[TMP1:.+]] to i8* +// CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* %[[HANDLE11]]) + +// CHECK: %[[HANDLE12:.+]] = bitcast %"struct.std::experimental::coroutines_v1::coroutine_handle"* %[[TMP1]] to i8* +// CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 8, i8* %[[HANDLE12]]) +// CHECK-NEXT: call void @llvm.coro.resume +// CHECK-NEXT: %{{.+}} = call i8 @llvm.coro.suspend +// CHECK-NEXT: switch i8 %{{.+}}, label %coro.ret [ +// CHECK-NEXT: i8 0, label %[[CASE1_AWAIT_READY]] +// CHECK-NEXT: i8 1, label %[[CASE1_AWAIT_CLEANUP:.+]] +// CHECK-NEXT: ] +// CHECK: [[CASE1_AWAIT_CLEANUP]]: +// make sure that the awaiter eventually gets cleaned up. +// CHECK: call void @{{.+Awaiter.+}} + +// CHECK: [[CASE2]]: +// CHECK: br i1 %{{.+}}, label %[[CASE2_AWAIT_READY:.+]], label %[[CASE2_AWAIT_SUSPEND:.+]] +// CHECK: [[CASE2_AWAIT_SUSPEND]]: +// CHECK-NEXT: %{{.+}} = call token @llvm.coro.save(i8* null) +// CHECK-NEXT: %[[HANDLE21:.+]] = bitcast %"struct.std::experimental::coroutines_v1::coroutine_handle"* %[[TMP2:.+]] to i8* +// CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* %[[HANDLE21]]) + +// CHECK: %[[HANDLE22:.+]] = bitcast %"struct.std::experimental::coroutines_v1::coroutine_handle"* %[[TMP2]] to i8* +// CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 8, i8* %[[HANDLE22]]) +// CHECK-NEXT: call void @llvm.coro.resume +// CHECK-NEXT: %{{.+}} = call i8 @llvm.coro.suspend +// CHECK-NEXT: switch i8 %{{.+}}, label %coro.ret [ +// CHECK-NEXT: i8 0, label %[[CASE2_AWAIT_READY]] +// CHECK-NEXT: i8 1, label %[[CASE2_AWAIT_CLEANUP:.+]] +// CHECK-NEXT: ] +// CHECK: [[CASE2_AWAIT_CLEANUP]]: +// make sure that the awaiter eventually gets cleaned up. +// CHECK: call void @{{.+Awaiter.+}} diff --git a/clang/test/CodeGenCoroutines/coro-symmetric-transfer-02.cpp b/clang/test/CodeGenCoroutines/coro-symmetric-transfer-02.cpp index 885c2db3fe822..de02765fc0c86 100644 --- a/clang/test/CodeGenCoroutines/coro-symmetric-transfer-02.cpp +++ b/clang/test/CodeGenCoroutines/coro-symmetric-transfer-02.cpp @@ -1,20 +1,18 @@ -// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fcoroutines-ts -std=c++14 -O1 -emit-llvm %s -o - -disable-llvm-passes | FileCheck %s +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -std=c++20 -O1 -emit-llvm %s -o - -disable-llvm-passes | FileCheck %s #include "Inputs/coroutine.h" -namespace coro = std::experimental::coroutines_v1; - struct Task { struct promise_type { Task get_return_object() noexcept { - return Task{coro::coroutine_handle::from_promise(*this)}; + return Task{std::coroutine_handle::from_promise(*this)}; } void return_void() noexcept {} struct final_awaiter { bool await_ready() noexcept { return false; } - coro::coroutine_handle<> await_suspend(coro::coroutine_handle h) noexcept { + std::coroutine_handle<> await_suspend(std::coroutine_handle h) noexcept { h.destroy(); return {}; } @@ -25,7 +23,7 @@ struct Task { final_awaiter final_suspend() noexcept { return {}; } - coro::suspend_always initial_suspend() noexcept { return {}; } + std::suspend_always initial_suspend() noexcept { return {}; } template auto await_transform(Awaitable &&awaitable) { @@ -33,7 +31,7 @@ struct Task { } }; - using handle_t = coro::coroutine_handle; + using handle_t = std::coroutine_handle; class Awaiter { public: @@ -43,7 +41,7 @@ struct Task { ~Awaiter(); bool await_ready() noexcept { return false; } - handle_t await_suspend(coro::coroutine_handle<> continuation) noexcept; + handle_t await_suspend(std::coroutine_handle<> continuation) noexcept; void await_resume(); private: @@ -91,10 +89,10 @@ Task bar() { // CHECK: br i1 %{{.+}}, label %[[CASE1_AWAIT_READY:.+]], label %[[CASE1_AWAIT_SUSPEND:.+]] // CHECK: [[CASE1_AWAIT_SUSPEND]]: // CHECK-NEXT: %{{.+}} = call token @llvm.coro.save(i8* null) -// CHECK-NEXT: %[[HANDLE11:.+]] = bitcast %"struct.std::experimental::coroutines_v1::coroutine_handle"* %[[TMP1:.+]] to i8* +// CHECK-NEXT: %[[HANDLE11:.+]] = bitcast %"struct.std::coroutine_handle"* %[[TMP1:.+]] to i8* // CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* %[[HANDLE11]]) -// CHECK: %[[HANDLE12:.+]] = bitcast %"struct.std::experimental::coroutines_v1::coroutine_handle"* %[[TMP1]] to i8* +// CHECK: %[[HANDLE12:.+]] = bitcast %"struct.std::coroutine_handle"* %[[TMP1]] to i8* // CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 8, i8* %[[HANDLE12]]) // CHECK-NEXT: call void @llvm.coro.resume // CHECK-NEXT: %{{.+}} = call i8 @llvm.coro.suspend @@ -110,10 +108,10 @@ Task bar() { // CHECK: br i1 %{{.+}}, label %[[CASE2_AWAIT_READY:.+]], label %[[CASE2_AWAIT_SUSPEND:.+]] // CHECK: [[CASE2_AWAIT_SUSPEND]]: // CHECK-NEXT: %{{.+}} = call token @llvm.coro.save(i8* null) -// CHECK-NEXT: %[[HANDLE21:.+]] = bitcast %"struct.std::experimental::coroutines_v1::coroutine_handle"* %[[TMP2:.+]] to i8* +// CHECK-NEXT: %[[HANDLE21:.+]] = bitcast %"struct.std::coroutine_handle"* %[[TMP2:.+]] to i8* // CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* %[[HANDLE21]]) -// CHECK: %[[HANDLE22:.+]] = bitcast %"struct.std::experimental::coroutines_v1::coroutine_handle"* %[[TMP2]] to i8* +// CHECK: %[[HANDLE22:.+]] = bitcast %"struct.std::coroutine_handle"* %[[TMP2]] to i8* // CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 8, i8* %[[HANDLE22]]) // CHECK-NEXT: call void @llvm.coro.resume // CHECK-NEXT: %{{.+}} = call i8 @llvm.coro.suspend diff --git a/clang/test/CodeGenCoroutines/coro-unhandled-exception-exp-namespace.cpp b/clang/test/CodeGenCoroutines/coro-unhandled-exception-exp-namespace.cpp new file mode 100644 index 0000000000000..96d7211b99b83 --- /dev/null +++ b/clang/test/CodeGenCoroutines/coro-unhandled-exception-exp-namespace.cpp @@ -0,0 +1,78 @@ +// RUN: %clang_cc1 -std=c++14 -fcoroutines-ts -triple=x86_64-pc-windows-msvc18.0.0 -emit-llvm %s -o - -fexceptions -fcxx-exceptions -disable-llvm-passes | FileCheck %s +// RUN: %clang_cc1 -std=c++14 -fcoroutines-ts -triple=x86_64-unknown-linux-gnu -emit-llvm -o - %s -fexceptions -fcxx-exceptions -disable-llvm-passes | FileCheck --check-prefix=CHECK-LPAD %s + +#include "Inputs/coroutine-exp-namespace.h" + +namespace coro = std::experimental::coroutines_v1; + +namespace std { +using exception_ptr = int; +exception_ptr current_exception(); +} // namespace std + +struct coro_t { + struct promise_type { + coro_t get_return_object() { + coro::coroutine_handle{}; + return {}; + } + coro::suspend_never initial_suspend() { return {}; } + coro::suspend_never final_suspend() noexcept { return {}; } + void return_void() {} + void unhandled_exception() noexcept; + }; +}; + +struct Cleanup { + ~Cleanup(); +}; +void may_throw(); + +coro_t f() { + Cleanup x; + may_throw(); + co_return; +} + +// CHECK: @"?f@@YA?AUcoro_t@@XZ"( +// CHECK: invoke void @"?may_throw@@YAXXZ"() +// CHECK: to label %{{.+}} unwind label %[[EHCLEANUP:.+]] +// CHECK: [[EHCLEANUP]]: +// CHECK: %[[INNERPAD:.+]] = cleanuppad within none [] +// CHECK: call void @"??1Cleanup@@QEAA@XZ"( +// CHECK: cleanupret from %[[INNERPAD]] unwind label %[[CATCHSW:.+]] +// CHECK: [[CATCHSW]]: +// CHECK: %[[CATCHSWTOK:.+]] = catchswitch within none [label %[[CATCH:.+]]] unwind label +// CHECK: [[CATCH]]: +// CHECK: %[[CATCHTOK:.+]] = catchpad within [[CATCHSWTOK:.+]] +// CHECK: call void @"?unhandled_exception@promise_type@coro_t@@QEAAXXZ" +// CHECK: catchret from %[[CATCHTOK]] to label %[[CATCHRETDEST:.+]] +// CHECK: [[CATCHRETDEST]]: +// CHECK-NEXT: br label %[[TRYCONT:.+]] +// CHECK: [[TRYCONT]]: +// CHECK-NEXT: br label %[[COROFIN:.+]] +// CHECK: [[COROFIN]]: +// CHECK-NEXT: bitcast %"struct.std::experimental::coroutines_v1::suspend_never"* %{{.+}} to i8* +// CHECK-NEXT: call void @llvm.lifetime.start.p0i8( +// CHECK-NEXT: call void @"?final_suspend@promise_type@coro_t@@QEAA?AUsuspend_never@coroutines_v1@experimental@std@@XZ"( + +// CHECK-LPAD: @_Z1fv( +// CHECK-LPAD: invoke void @_Z9may_throwv() +// CHECK-LPAD: to label %[[CONT:.+]] unwind label %[[CLEANUP:.+]] +// CHECK-LPAD: [[CLEANUP]]: +// CHECK-LPAD: call void @_ZN7CleanupD1Ev(%struct.Cleanup* {{[^,]*}} %x) #2 +// CHECK-LPAD: br label %[[CATCH:.+]] + +// CHECK-LPAD: [[CATCH]]: +// CHECK-LPAD: call i8* @__cxa_begin_catch +// CHECK-LPAD: call void @_ZN6coro_t12promise_type19unhandled_exceptionEv(%"struct.coro_t::promise_type"* {{[^,]*}} %__promise) #2 +// CHECK-LPAD: invoke void @__cxa_end_catch() +// CHECK-LPAD-NEXT: to label %[[CATCHRETDEST:.+]] unwind label +// CHECK-LPAD: [[CATCHRETDEST]]: +// CHECK-LPAD-NEXT: br label %[[TRYCONT:.+]] +// CHECK-LPAD: [[TRYCONT]]: +// CHECK-LPAD: br label %[[COROFIN:.+]] +// CHECK-LPAD: [[COROFIN]]: +// CHECK-LPAD-NEXT: bitcast %"struct.std::experimental::coroutines_v1::suspend_never"* %{{.+}} to i8* +// CHECK-LPAD-NEXT: call void @llvm.lifetime.start.p0i8( +// CHECK-LPAD-NEXT: call void @_ZN6coro_t12promise_type13final_suspendEv( diff --git a/clang/test/CodeGenCoroutines/coro-unhandled-exception.cpp b/clang/test/CodeGenCoroutines/coro-unhandled-exception.cpp index f038c5b3a9138..8ad0e6bc4379d 100644 --- a/clang/test/CodeGenCoroutines/coro-unhandled-exception.cpp +++ b/clang/test/CodeGenCoroutines/coro-unhandled-exception.cpp @@ -1,10 +1,8 @@ -// RUN: %clang_cc1 -std=c++14 -fcoroutines-ts -triple=x86_64-pc-windows-msvc18.0.0 -emit-llvm %s -o - -fexceptions -fcxx-exceptions -disable-llvm-passes | FileCheck %s -// RUN: %clang_cc1 -std=c++14 -fcoroutines-ts -triple=x86_64-unknown-linux-gnu -emit-llvm -o - %s -fexceptions -fcxx-exceptions -disable-llvm-passes | FileCheck --check-prefix=CHECK-LPAD %s +// RUN: %clang_cc1 -std=c++20 -triple=x86_64-pc-windows-msvc18.0.0 -emit-llvm %s -o - -fexceptions -fcxx-exceptions -disable-llvm-passes | FileCheck %s +// RUN: %clang_cc1 -std=c++20 -triple=x86_64-unknown-linux-gnu -emit-llvm -o - %s -fexceptions -fcxx-exceptions -disable-llvm-passes | FileCheck --check-prefix=CHECK-LPAD %s #include "Inputs/coroutine.h" -namespace coro = std::experimental::coroutines_v1; - namespace std { using exception_ptr = int; exception_ptr current_exception(); @@ -13,11 +11,11 @@ namespace std { struct coro_t { struct promise_type { coro_t get_return_object() { - coro::coroutine_handle{}; + std::coroutine_handle{}; return {}; } - coro::suspend_never initial_suspend() { return {}; } - coro::suspend_never final_suspend() noexcept { return {}; } + std::suspend_never initial_suspend() { return {}; } + std::suspend_never final_suspend() noexcept { return {}; } void return_void(){} void unhandled_exception() noexcept; }; @@ -50,9 +48,9 @@ coro_t f() { // CHECK: [[TRYCONT]]: // CHECK-NEXT: br label %[[COROFIN:.+]] // CHECK: [[COROFIN]]: -// CHECK-NEXT: bitcast %"struct.std::experimental::coroutines_v1::suspend_never"* %{{.+}} to i8* +// CHECK-NEXT: bitcast %"struct.std::suspend_never"* %{{.+}} to i8* // CHECK-NEXT: call void @llvm.lifetime.start.p0i8( -// CHECK-NEXT: call void @"?final_suspend@promise_type@coro_t@@QEAA?AUsuspend_never@coroutines_v1@experimental@std@@XZ"( +// CHECK-NEXT: call void @"?final_suspend@promise_type@coro_t@@QEAA?AUsuspend_never@std@@XZ"( // CHECK-LPAD: @_Z1fv( // CHECK-LPAD: invoke void @_Z9may_throwv() @@ -71,6 +69,6 @@ coro_t f() { // CHECK-LPAD: [[TRYCONT]]: // CHECK-LPAD: br label %[[COROFIN:.+]] // CHECK-LPAD: [[COROFIN]]: -// CHECK-LPAD-NEXT: bitcast %"struct.std::experimental::coroutines_v1::suspend_never"* %{{.+}} to i8* +// CHECK-LPAD-NEXT: bitcast %"struct.std::suspend_never"* %{{.+}} to i8* // CHECK-LPAD-NEXT: call void @llvm.lifetime.start.p0i8( // CHECK-LPAD-NEXT: call void @_ZN6coro_t12promise_type13final_suspendEv( diff --git a/clang/test/CodeGenCoroutines/microsoft-abi-operator-coawait-exp-namespace.cpp b/clang/test/CodeGenCoroutines/microsoft-abi-operator-coawait-exp-namespace.cpp new file mode 100644 index 0000000000000..33818c5d2fd3a --- /dev/null +++ b/clang/test/CodeGenCoroutines/microsoft-abi-operator-coawait-exp-namespace.cpp @@ -0,0 +1,24 @@ +// RUN: %clang_cc1 -triple x86_64-pc-windows-msvc18.0.0 -fcoroutines-ts -emit-llvm %s -o - -std=c++14 -disable-llvm-passes | FileCheck %s +struct no_suspend { + bool await_ready() { return true; } + template void await_suspend(F) {} + void await_resume() {} +}; + +struct A { + no_suspend operator co_await() { return {}; } +}; + +struct B {}; + +no_suspend operator co_await(B const &) { return {}; } + +// CHECK-LABEL: f( +extern "C" void f() { + A a; + B b; + // CHECK: call void @"??__LA@@QEAA?AUno_suspend@@XZ"( + a.operator co_await(); + // CHECK-NEXT: call i8 @"??__L@YA?AUno_suspend@@AEBUB@@@Z"( + operator co_await(b); +} diff --git a/clang/test/CodeGenCoroutines/microsoft-abi-operator-coawait.cpp b/clang/test/CodeGenCoroutines/microsoft-abi-operator-coawait.cpp index 26e3794930882..6abb5334bcc9b 100644 --- a/clang/test/CodeGenCoroutines/microsoft-abi-operator-coawait.cpp +++ b/clang/test/CodeGenCoroutines/microsoft-abi-operator-coawait.cpp @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -triple x86_64-pc-windows-msvc18.0.0 -fcoroutines-ts -emit-llvm %s -o - -std=c++14 -disable-llvm-passes | FileCheck %s +// RUN: %clang_cc1 -triple x86_64-pc-windows-msvc18.0.0 -emit-llvm %s -o - -std=c++20 -disable-llvm-passes | FileCheck %s struct no_suspend { bool await_ready() { return true; } template void await_suspend(F) {} diff --git a/clang/test/CodeGenOpenCL/spirv_target.cl b/clang/test/CodeGenOpenCL/spirv_target.cl new file mode 100644 index 0000000000000..07c4bde78a1a7 --- /dev/null +++ b/clang/test/CodeGenOpenCL/spirv_target.cl @@ -0,0 +1,31 @@ +// RUN: %clang_cc1 %s -triple "spirv32-unknown-unknown" -verify -emit-llvm -o - | FileCheck %s -check-prefix=SPIRV32 +// RUN: %clang_cc1 %s -triple "spirv64-unknown-unknown" -verify -emit-llvm -o - | FileCheck %s -check-prefix=SPIRV64 + +// SPIRV32: target triple = "spirv32-unknown-unknown" +// SPIRV64: target triple = "spirv64-unknown-unknown" + +typedef struct { + char c; + void *v; + void *v2; +} my_st; + +kernel void foo(global long *arg) { +#if __SPIRV32__ == 1 + int res1[sizeof(my_st) == 12 ? 1 : -1]; // expected-no-diagnostics + int res2[sizeof(void *) == 4 ? 1 : -1]; // expected-no-diagnostics + int res3[sizeof(arg) == 4 ? 1 : -1]; // expected-no-diagnostics +#elif __SPIRV64__ == 1 + int res1[sizeof(my_st) == 24 ? 1 : -1]; // expected-no-diagnostics + int res2[sizeof(void *) == 8 ? 1 : -1]; // expected-no-diagnostics + int res3[sizeof(arg) == 8 ? 1 : -1]; // expected-no-diagnostics +#endif + my_st *tmp = 0; + + // SPIRV32: store i64 4, i64 addrspace(1)* + // SPIRV64: store i64 8, i64 addrspace(1)* + arg[0] = (long)(&tmp->v); + // SPIRV32: store i64 8, i64 addrspace(1)* + // SPIRV64: store i64 16, i64 addrspace(1)* + arg[1] = (long)(&tmp->v2); +} diff --git a/clang/test/CoverageMapping/coroutine.cpp b/clang/test/CoverageMapping/coroutine.cpp index 0f7559849fb15..c9de301f81757 100644 --- a/clang/test/CoverageMapping/coroutine.cpp +++ b/clang/test/CoverageMapping/coroutine.cpp @@ -1,6 +1,6 @@ // RUN: %clang_cc1 -mllvm -emptyline-comment-coverage=false -triple x86_64-unknown-linux-gnu -fcoroutines-ts -std=c++14 -emit-llvm -fprofile-instrument=clang -fcoverage-mapping -dump-coverage-mapping %s -o - | FileCheck %s -namespace std::experimental { +namespace std { template struct coroutine_traits; @@ -16,16 +16,16 @@ struct coroutine_handle { template coroutine_handle(coroutine_handle) noexcept {} }; -} // namespace std::experimental +} // namespace std struct suspend_always { bool await_ready() noexcept; - void await_suspend(std::experimental::coroutine_handle<>) noexcept; + void await_suspend(std::coroutine_handle<>) noexcept; void await_resume() noexcept; }; template <> -struct std::experimental::coroutine_traits { +struct std::coroutine_traits { struct promise_type { int get_return_object(); suspend_always initial_suspend(); diff --git a/clang/test/Driver/aarch64-cpus.c b/clang/test/Driver/aarch64-cpus.c index 4f049c79dac1b..1c64e34608377 100644 --- a/clang/test/Driver/aarch64-cpus.c +++ b/clang/test/Driver/aarch64-cpus.c @@ -404,6 +404,15 @@ // RUN: %clang -target aarch64 -mcpu=cortex-a510+crypto -### -c %s 2>&1 | FileCheck -check-prefix=CORTEX-A510-CRYPTO %s // CORTEX-A510-CRYPTO: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-feature" "+sm4" "-target-feature" "+sha3" "-target-feature" "+sha2" "-target-feature" "+aes" +// RUN: %clang -target aarch64 -mcpu=cortex-x2 -### -c %s 2>&1 | FileCheck -check-prefix=CORTEX-X2 %s +// CORTEX-X2: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-cpu" "cortex-x2" +// CORTEX-X2-NOT: "-target-feature" "{{[+-]}}sm4" +// CORTEX-X2-NOT: "-target-feature" "{{[+-]}}sha3" +// CORTEX-X2-NOT: "-target-feature" "{{[+-]}}aes" +// CORTEX-X2-SAME: {{$}} +// RUN: %clang -target aarch64 -mcpu=cortex-x2+crypto -### -c %s 2>&1 | FileCheck -check-prefix=CORTEX-X2-CRYPTO %s +// CORTEX-X2-CRYPTO: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-feature" "+sm4" "-target-feature" "+sha3" "-target-feature" "+sha2" "-target-feature" "+aes" + // RUN: %clang -target aarch64_be -mcpu=cortex-a57 -### -c %s 2>&1 | FileCheck -check-prefix=CA57-BE %s // RUN: %clang -target aarch64 -mbig-endian -mcpu=cortex-a57 -### -c %s 2>&1 | FileCheck -check-prefix=CA57-BE %s // RUN: %clang -target aarch64_be -mbig-endian -mcpu=cortex-a57 -### -c %s 2>&1 | FileCheck -check-prefix=CA57-BE %s diff --git a/clang/test/Driver/frame-pointer-elim.c b/clang/test/Driver/frame-pointer-elim.c index 83dbf3816b684..beb14a9a671a6 100644 --- a/clang/test/Driver/frame-pointer-elim.c +++ b/clang/test/Driver/frame-pointer-elim.c @@ -90,7 +90,8 @@ // WARN-OMIT-LEAF-7S-NOT: warning: optimization flag '-momit-leaf-frame-pointer' is not supported for target 'armv7s' // WARN-OMIT-LEAF-7S: "-mframe-pointer=non-leaf" -// On AArch64 and PS4, default to omitting the frame pointer on leaf functions +// On AArch64, PS4, and VE, default to omitting the frame pointer on leaf +// functions // RUN: %clang -### -target aarch64 -S %s 2>&1 | \ // RUN: FileCheck --check-prefix=KEEP-NON-LEAF %s // RUN: %clang -### -target x86_64-scei-ps4 -S %s 2>&1 | \ @@ -99,6 +100,8 @@ // RUN: FileCheck --check-prefix=KEEP-NON-LEAF %s // RUN: %clang -### -target aarch64-apple-darwin -arch arm64_32 -S %s 2>&1 | \ // RUN: FileCheck --check-prefix=KEEP-NON-LEAF %s +// RUN: %clang -### -target ve-unknown-linux-gnu -S %s 2>&1 | \ +// RUN: FileCheck --check-prefix=KEEP-NON-LEAF %s // RUN: %clang -### -target powerpc64 -S %s 2>&1 | \ // RUN: FileCheck --check-prefix=KEEP-ALL %s diff --git a/clang/test/Driver/frelaxed-template-template-args.cpp b/clang/test/Driver/frelaxed-template-template-args.cpp deleted file mode 100644 index dd6265ba8375e..0000000000000 --- a/clang/test/Driver/frelaxed-template-template-args.cpp +++ /dev/null @@ -1,5 +0,0 @@ -// RUN: %clang -fsyntax-only -frelaxed-template-template-args %s 2>&1 | FileCheck --check-prefix=CHECK-ON %s -// RUN: %clang -fsyntax-only -fno-relaxed-template-template-args %s 2>&1 | FileCheck --check-prefix=CHECK-OFF %s - -// CHECK-ON: warning: argument '-frelaxed-template-template-args' is deprecated [-Wdeprecated] -// CHECK-OFF: warning: argument '-fno-relaxed-template-template-args' is deprecated [-Wdeprecated] diff --git a/clang/test/Driver/riscv-arch.c b/clang/test/Driver/riscv-arch.c index 4634cb7e9c9fb..bbbc0f3ded78a 100644 --- a/clang/test/Driver/riscv-arch.c +++ b/clang/test/Driver/riscv-arch.c @@ -426,6 +426,15 @@ // RUN: -fsyntax-only 2>&1 | FileCheck -check-prefix=RV32-EXPERIMENTAL-ZFH %s // RV32-EXPERIMENTAL-ZFH: "-target-feature" "+experimental-zfh" +// RUN: %clang -target riscv32-unknown-elf -march=rv32izfhmin -### %s \ +// RUN: -fsyntax-only 2>&1 | FileCheck -check-prefix=RV32-EXPERIMENTAL-ZFHMIN-NOFLAG %s +// RV32-EXPERIMENTAL-ZFHMIN-NOFLAG: error: invalid arch name 'rv32izfhmin' +// RV32-EXPERIMENTAL-ZFHMIN-NOFLAG: requires '-menable-experimental-extensions' + +// RUN: %clang -target riscv32-unknown-elf -march=rv32izfhmin0p1 -menable-experimental-extensions -### %s \ +// RUN: -fsyntax-only 2>&1 | FileCheck -check-prefix=RV32-EXPERIMENTAL-ZFHMIN %s +// RV32-EXPERIMENTAL-ZFHMIN: "-target-feature" "+experimental-zfhmin" + // RUN: %clang -target riscv32-unknown-elf -march=rv32izvamo -### %s -c 2>&1 | \ // RUN: FileCheck -check-prefix=RV32-EXPERIMENTAL-ZVAMO-NOFLAG %s // RV32-EXPERIMENTAL-ZVAMO-NOFLAG: error: invalid arch name 'rv32izvamo' diff --git a/clang/test/Headers/opencl-c-header.cl b/clang/test/Headers/opencl-c-header.cl index bcce9623fafcb..be185ff8dcf17 100644 --- a/clang/test/Headers/opencl-c-header.cl +++ b/clang/test/Headers/opencl-c-header.cl @@ -5,6 +5,9 @@ // RUN: %clang_cc1 -O0 -triple spir-unknown-unknown -internal-isystem ../../lib/Headers -include opencl-c.h -emit-llvm -o - %s -verify -cl-std=CL3.0 | FileCheck %s // RUN: %clang_cc1 -O0 -triple spir-unknown-unknown -internal-isystem ../../lib/Headers -include opencl-c.h -emit-llvm -o - %s -verify -cl-std=clc++2021 | FileCheck %s +// RUN: %clang_cc1 -O0 -triple spirv32-unknown-unknown -internal-isystem ../../lib/Headers -include opencl-c.h -emit-llvm -o - %s -verify | FileCheck %s + + // Test including the default header as a module. // The module should be compiled only once and loaded from cache afterwards. // Change the directory mode to read only to make sure no new modules are created. @@ -91,7 +94,7 @@ global atomic_int z = ATOMIC_VAR_INIT(99); // Check that extension macros are defined correctly. // For SPIR all extensions are supported. -#if defined(__SPIR__) +#if defined(__SPIR__) || defined(__SPIRV__) // Verify that cl_intel_planar_yuv extension is defined from OpenCL 1.2 onwards. #if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_1_2) diff --git a/clang/test/Index/coroutines.cpp b/clang/test/Index/coroutines.cpp index 000327ffaec49..9adda03ebc187 100644 --- a/clang/test/Index/coroutines.cpp +++ b/clang/test/Index/coroutines.cpp @@ -1,8 +1,8 @@ // RUN: c-index-test -test-load-source all -c %s -fsyntax-only -target x86_64-apple-darwin9 -fcoroutines-ts -std=c++1z -I%S/../SemaCXX/Inputs | FileCheck %s #include "std-coroutine.h" -using std::experimental::suspend_always; -using std::experimental::suspend_never; +using std::suspend_always; +using std::suspend_never; struct promise_void { void get_return_object(); @@ -13,7 +13,7 @@ struct promise_void { }; template <> -struct std::experimental::coroutine_traits { using promise_type = promise_void; }; +struct std::coroutine_traits { using promise_type = promise_void; }; void CoroutineTestRet() { co_return; diff --git a/clang/test/Lexer/cxx-features.cpp b/clang/test/Lexer/cxx-features.cpp index a36986c2551b2..2847273d1d3af 100644 --- a/clang/test/Lexer/cxx-features.cpp +++ b/clang/test/Lexer/cxx-features.cpp @@ -5,7 +5,7 @@ // RUN: %clang_cc1 -std=c++20 -fcxx-exceptions -fsized-deallocation -verify %s // RUN: %clang_cc1 -std=c++2b -fcxx-exceptions -fsized-deallocation -verify %s // -// RUN: %clang_cc1 -std=c++17 -fcxx-exceptions -fsized-deallocation -fno-relaxed-template-template-args -DNO_RELAXED_TEMPLATE_TEMPLATE_ARGS=1 -verify %s +// RUN: %clang_cc1 -std=c++17 -fcxx-exceptions -fsized-deallocation -frelaxed-template-template-args -DRELAXED_TEMPLATE_TEMPLATE_ARGS=1 -verify %s // RUN: %clang_cc1 -std=c++17 -fcxx-exceptions -fsized-deallocation -DCONCEPTS_TS=1 -verify %s // RUN: %clang_cc1 -std=c++14 -fno-rtti -fno-threadsafe-statics -verify %s -DNO_EXCEPTIONS -DNO_RTTI -DNO_THREADSAFE_STATICS -fsized-deallocation // RUN: %clang_cc1 -std=c++14 -fcoroutines-ts -DNO_EXCEPTIONS -DCOROUTINES -verify -fsized-deallocation %s @@ -95,6 +95,10 @@ #error "wrong value for __cpp_impl_three_way_comparison" #endif +#if check(impl_coroutine, 0, 0, 0, 0, 201902L, 201902L) +#error "wrong value for __cpp_impl_coroutine" +#endif + // init_captures checked below #if check(modules, 0, 0, 0, 0, 0, 0) @@ -195,9 +199,9 @@ #error "wrong value for __cpp_nontype_template_args" #endif -#if defined(NO_RELAXED_TEMPLATE_TEMPLATE_ARGS) \ - ? check(template_template_args, 0, 0, 0, 0, 0, 0) \ - : check(template_template_args, 201611, 201611, 201611, 201611, 201611, 201611) +#if defined(RELAXED_TEMPLATE_TEMPLATE_ARGS) \ + ? check(template_template_args, 0, 0, 0, 201611, 201611, 201611) \ + : check(template_template_args, 0, 0, 0, 0, 0, 0) #error "wrong value for __cpp_template_template_args" #endif diff --git a/clang/test/Lexer/minimize_source_to_dependency_directives_pragmas.c b/clang/test/Lexer/minimize_source_to_dependency_directives_pragmas.c new file mode 100644 index 0000000000000..98b1cc88e7c18 --- /dev/null +++ b/clang/test/Lexer/minimize_source_to_dependency_directives_pragmas.c @@ -0,0 +1,21 @@ +// Test that the required #pragma directives are minimized +// RUN: %clang_cc1 -print-dependency-directives-minimized-source %s 2>&1 | FileCheck %s + +#pragma once + +// some pragmas not needed in minimized source. +#pragma region TestRegion +#pragma endregion +#pragma warning "message" + +// pragmas required in the minimized source. +#pragma push_macro( "MYMACRO" ) +#pragma pop_macro("MYMACRO") +#pragma clang module import mymodule +#pragma include_alias(, "mystring.h") + +// CHECK: #pragma once +// CHECK-NEXT: #pragma push_macro( "MYMACRO" ) +// CHECK-NEXT: #pragma pop_macro("MYMACRO") +// CHECK-NEXT: #pragma clang module import mymodule +// CHECK-NEXT: #pragma include_alias(, "mystring.h") diff --git a/clang/test/Misc/target-invalid-cpu-note.c b/clang/test/Misc/target-invalid-cpu-note.c index efdc92263e7e9..62aabab678172 100644 --- a/clang/test/Misc/target-invalid-cpu-note.c +++ b/clang/test/Misc/target-invalid-cpu-note.c @@ -5,11 +5,11 @@ // RUN: not %clang_cc1 -triple arm64--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix AARCH64 // AARCH64: error: unknown target CPU 'not-a-cpu' -// AARCH64-NEXT: note: valid target CPU values are: cortex-a34, cortex-a35, cortex-a53, cortex-a55, cortex-a510, cortex-a57, cortex-a65, cortex-a65ae, cortex-a72, cortex-a73, cortex-a75, cortex-a76, cortex-a76ae, cortex-a77, cortex-a78, cortex-a78c, cortex-r82, cortex-x1, neoverse-e1, neoverse-n1, neoverse-n2, neoverse-512tvb, neoverse-v1, cyclone, apple-a7, apple-a8, apple-a9, apple-a10, apple-a11, apple-a12, apple-a13, apple-a14, apple-m1, apple-s4, apple-s5, exynos-m3, exynos-m4, exynos-m5, falkor, saphira, kryo, thunderx2t99, thunderx3t110, thunderx, thunderxt88, thunderxt81, thunderxt83, tsv110, a64fx, carmel{{$}} +// AARCH64-NEXT: note: valid target CPU values are: cortex-a34, cortex-a35, cortex-a53, cortex-a55, cortex-a510, cortex-a57, cortex-a65, cortex-a65ae, cortex-a72, cortex-a73, cortex-a75, cortex-a76, cortex-a76ae, cortex-a77, cortex-a78, cortex-a78c, cortex-r82, cortex-x1, cortex-x2, neoverse-e1, neoverse-n1, neoverse-n2, neoverse-512tvb, neoverse-v1, cyclone, apple-a7, apple-a8, apple-a9, apple-a10, apple-a11, apple-a12, apple-a13, apple-a14, apple-m1, apple-s4, apple-s5, exynos-m3, exynos-m4, exynos-m5, falkor, saphira, kryo, thunderx2t99, thunderx3t110, thunderx, thunderxt88, thunderxt81, thunderxt83, tsv110, a64fx, carmel{{$}} // RUN: not %clang_cc1 -triple arm64--- -tune-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix TUNE_AARCH64 // TUNE_AARCH64: error: unknown target CPU 'not-a-cpu' -// TUNE_AARCH64-NEXT: note: valid target CPU values are: cortex-a34, cortex-a35, cortex-a53, cortex-a55, cortex-a510, cortex-a57, cortex-a65, cortex-a65ae, cortex-a72, cortex-a73, cortex-a75, cortex-a76, cortex-a76ae, cortex-a77, cortex-a78, cortex-a78c, cortex-r82, cortex-x1, neoverse-e1, neoverse-n1, neoverse-n2, neoverse-512tvb, neoverse-v1, cyclone, apple-a7, apple-a8, apple-a9, apple-a10, apple-a11, apple-a12, apple-a13, apple-a14, apple-m1, apple-s4, apple-s5, exynos-m3, exynos-m4, exynos-m5, falkor, saphira, kryo, thunderx2t99, thunderx3t110, thunderx, thunderxt88, thunderxt81, thunderxt83, tsv110, a64fx, carmel{{$}} +// TUNE_AARCH64-NEXT: note: valid target CPU values are: cortex-a34, cortex-a35, cortex-a53, cortex-a55, cortex-a510, cortex-a57, cortex-a65, cortex-a65ae, cortex-a72, cortex-a73, cortex-a75, cortex-a76, cortex-a76ae, cortex-a77, cortex-a78, cortex-a78c, cortex-r82, cortex-x1, cortex-x2, neoverse-e1, neoverse-n1, neoverse-n2, neoverse-512tvb, neoverse-v1, cyclone, apple-a7, apple-a8, apple-a9, apple-a10, apple-a11, apple-a12, apple-a13, apple-a14, apple-m1, apple-s4, apple-s5, exynos-m3, exynos-m4, exynos-m5, falkor, saphira, kryo, thunderx2t99, thunderx3t110, thunderx, thunderxt88, thunderxt81, thunderxt83, tsv110, a64fx, carmel{{$}} // RUN: not %clang_cc1 -triple i386--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix X86 // X86: error: unknown target CPU 'not-a-cpu' diff --git a/clang/test/Modules/lookup.m b/clang/test/Modules/lookup.m index b22e41f845942..0e09bfdd7fd95 100644 --- a/clang/test/Modules/lookup.m +++ b/clang/test/Modules/lookup.m @@ -10,8 +10,8 @@ void test(id x) { [x method]; // expected-warning@-1{{multiple methods named 'method' found}} -// expected-note@Inputs/lookup_left.h:2{{using}} -// expected-note@Inputs/lookup_right.h:3{{also found}} +// expected-note@Inputs/lookup_right.h:3{{using}} +// expected-note@Inputs/lookup_left.h:2{{also found}} } // CHECK-PRINT: - (int)method; diff --git a/clang/test/Modules/lsv-debuginfo.cpp b/clang/test/Modules/lsv-debuginfo.cpp index 30d3c2583fbd2..d4c646146b37a 100644 --- a/clang/test/Modules/lsv-debuginfo.cpp +++ b/clang/test/Modules/lsv-debuginfo.cpp @@ -26,14 +26,14 @@ // CHECK: @__clang_ast = // This type isn't anchored anywhere, expect a full definition. -// CHECK: !DICompositeType({{.*}}, name: "AlignedCharArray<4, 16>", +// CHECK: !DICompositeType({{.*}}, name: "AlignedCharArray<4U, 16U>", // CHECK-SAME: elements: // C // CHECK: @__clang_ast = // Here, too. -// CHECK: !DICompositeType({{.*}}, name: "AlignedCharArray<4, 16>", +// CHECK: !DICompositeType({{.*}}, name: "AlignedCharArray<4U, 16U>", // CHECK-SAME: elements: #include diff --git a/clang/test/Modules/merge-objc-interface-visibility.m b/clang/test/Modules/merge-objc-interface-visibility.m index 181a2c716c6b3..a075611a420c8 100644 --- a/clang/test/Modules/merge-objc-interface-visibility.m +++ b/clang/test/Modules/merge-objc-interface-visibility.m @@ -4,6 +4,7 @@ // RUN: -fmodules -fimplicit-module-maps -fmodules-cache-path=%t/modules.cache // RUN: %clang_cc1 -emit-llvm -o %t/test.bc -F%t/Frameworks %t/test.m -DHIDDEN_FIRST=0 \ // RUN: -fmodules -fimplicit-module-maps -fmodules-cache-path=%t/modules.cache +// UNSUPPORTED: -zos, -aix // Test a case when Objective-C interface is imported both as hidden and as visible. diff --git a/clang/test/Modules/method_pool_transitive.m b/clang/test/Modules/method_pool_transitive.m new file mode 100644 index 0000000000000..40c4330b75009 --- /dev/null +++ b/clang/test/Modules/method_pool_transitive.m @@ -0,0 +1,40 @@ +// RUN: rm -rf %t +// RUN: split-file %s %t +// RUN: %clang_cc1 -Wobjc-multiple-method-names -fsyntax-only -fmodules-cache-path=%t/modules.cache -fmodules -fimplicit-module-maps -F %t/Frameworks %t/test.m -verify + +// Verify we are handling methods from transitive modules, not just from immediate ones. + +//--- Frameworks/Indirect.framework/Headers/Indirect.h +@interface NSObject +@end + +@interface Indirect : NSObject +- (int)method; +@end + +//--- Frameworks/Indirect.framework/Modules/module.modulemap +framework module Indirect { + header "Indirect.h" + export * +} + +//--- Frameworks/Immediate.framework/Headers/Immediate.h +#import +@interface Immediate : NSObject +- (void)method; +@end + +//--- Frameworks/Immediate.framework/Modules/module.modulemap +framework module Immediate { + header "Immediate.h" + export * +} + +//--- test.m +#import + +void test(id obj) { + [obj method]; // expected-warning{{multiple methods named 'method' found}} + // expected-note@Frameworks/Indirect.framework/Headers/Indirect.h:5{{using}} + // expected-note@Frameworks/Immediate.framework/Headers/Immediate.h:3{{also found}} +} diff --git a/clang/test/OpenMP/aarch64_vfabi_NarrowestDataSize.c b/clang/test/OpenMP/aarch64_vfabi_NarrowestDataSize.c index d65c4edaeea70..4186ec8e07301 100644 --- a/clang/test/OpenMP/aarch64_vfabi_NarrowestDataSize.c +++ b/clang/test/OpenMP/aarch64_vfabi_NarrowestDataSize.c @@ -5,7 +5,7 @@ // Note: -fopemp and -fopenmp-simd behavior are expected to be the same. // This test checks the values of Narrowest Data Size (NDS), as defined in -// https://github.com/ARM-software/abi-aa/tree/master/vfabia64 +// https://github.com/ARM-software/abi-aa/tree/main/vfabia64 // // NDS is used to compute the token in the name of AdvSIMD // vector functions when no `simdlen` is specified, with the rule: diff --git a/clang/test/OpenMP/aarch64_vfabi_WidestDataSize.c b/clang/test/OpenMP/aarch64_vfabi_WidestDataSize.c index 841a64053e5e3..9d3689a57ce94 100644 --- a/clang/test/OpenMP/aarch64_vfabi_WidestDataSize.c +++ b/clang/test/OpenMP/aarch64_vfabi_WidestDataSize.c @@ -5,7 +5,7 @@ // Note: -fopemp and -fopenmp-simd behavior are expected to be the same. // This test checks the values of Widest Data Size (WDS), as defined -// in https://github.com/ARM-software/abi-aa/tree/master/vfabia64 +// in https://github.com/ARM-software/abi-aa/tree/main/vfabia64 // // WDS is used to check the accepted values of `simdlen()` when // targeting fixed-length SVE vector function names. The values of diff --git a/clang/test/OpenMP/generic_loop_ast_print.cpp b/clang/test/OpenMP/generic_loop_ast_print.cpp index 7f5f53003e40c..e887a969aac45 100644 --- a/clang/test/OpenMP/generic_loop_ast_print.cpp +++ b/clang/test/OpenMP/generic_loop_ast_print.cpp @@ -23,7 +23,7 @@ //PRINT: template void templ_foo(T t) { //PRINT: T j, z; -//PRINT: #pragma omp loop collapse(C) reduction(+: z) lastprivate(j) +//PRINT: #pragma omp loop collapse(C) reduction(+: z) lastprivate(j) bind(thread) //PRINT: for (T i = 0; i < t; ++i) //PRINT: for (j = 0; j < t; ++j) //PRINT: z += i + j; @@ -38,12 +38,13 @@ //DUMP: DeclRefExpr{{.*}}'z' 'T' //DUMP: OMPLastprivateClause //DUMP: DeclRefExpr{{.*}}'j' 'T' +//DUMP: OMPBindClause //DUMP: ForStmt //DUMP: ForStmt //PRINT: template<> void templ_foo(int t) { //PRINT: int j, z; -//PRINT: #pragma omp loop collapse(2) reduction(+: z) lastprivate(j) +//PRINT: #pragma omp loop collapse(2) reduction(+: z) lastprivate(j) bind(thread) //PRINT: for (int i = 0; i < t; ++i) //PRINT: for (j = 0; j < t; ++j) //PRINT: z += i + j; @@ -60,12 +61,13 @@ //DUMP: DeclRefExpr{{.*}}'z' 'int':'int' //DUMP: OMPLastprivateClause //DUMP: DeclRefExpr{{.*}}'j' 'int':'int' +//DUMP: OMPBindClause //DUMP: ForStmt template void templ_foo(T t) { T j,z; - #pragma omp loop collapse(C) reduction(+:z) lastprivate(j) + #pragma omp loop collapse(C) reduction(+:z) lastprivate(j) bind(thread) for (T i = 0; i struct coroutine_traits; template struct coroutine_handle { @@ -21,15 +21,15 @@ template <> struct coroutine_handle { template coroutine_handle(coroutine_handle) noexcept; }; -} +} // namespace std struct suspend_always { bool await_ready() noexcept; - void await_suspend(std::experimental::coroutine_handle<>) noexcept; + void await_suspend(std::coroutine_handle<>) noexcept; void await_resume() noexcept; }; -template struct std::experimental::coroutine_traits { +template struct std::coroutine_traits { struct promise_type { void get_return_object() noexcept; suspend_always initial_suspend() noexcept; @@ -42,7 +42,7 @@ template struct std::experimental::coroutine_traits struct std::experimental::coroutine_traits { +template struct std::coroutine_traits { struct promise_type { int get_return_object() noexcept; suspend_always initial_suspend() noexcept; diff --git a/clang/test/Preprocessor/_Pragma-newline.c b/clang/test/Preprocessor/_Pragma-newline.c new file mode 100644 index 0000000000000..43628eaef4674 --- /dev/null +++ b/clang/test/Preprocessor/_Pragma-newline.c @@ -0,0 +1,15 @@ +// RUN: %clang_cc1 -E -o - %s | FileCheck %s +// RUN: %clang_cc1 -E -P -o - %s | FileCheck %s +// RUN: %clang_cc1 -E -fminimize-whitespace -o - %s | FileCheck %s +// RUN: %clang_cc1 -E -fminimize-whitespace -P -o - %s | FileCheck %s + +// The PragmaAssumeNonNullHandler (and maybe others) passes an invalid +// SourceLocation when inside a _Pragma. Ensure we still emit semantic +// newlines. +// See report at https://reviews.llvm.org/D104601#3105044 + +_Pragma("clang assume_nonnull begin") test _Pragma("clang assume_nonnull end") + +// CHECK: {{^}}#pragma clang assume_nonnull begin{{$}} +// CHECK: test +// CHECK: {{^}}#pragma clang assume_nonnull end{{$}} diff --git a/clang/test/Preprocessor/predefined-macros.c b/clang/test/Preprocessor/predefined-macros.c index d03651a582dbd..4c81ad204ee0d 100644 --- a/clang/test/Preprocessor/predefined-macros.c +++ b/clang/test/Preprocessor/predefined-macros.c @@ -210,7 +210,21 @@ // CHECK-SPIR64-DAG: #define __SPIR64__ 1 // CHECK-SPIR64-NOT: #define __SPIR32__ 1 -// RUN: %clang_cc1 %s -E -dM -o - -x hip -triple amdgcn-amd-amdhsa \ +// RUN: %clang_cc1 %s -E -dM -o - -x cl -triple spirv32-unknown-unknown \ +// RUN: | FileCheck -match-full-lines %s --check-prefix=CHECK-SPIRV32 +// CHECK-SPIRV32-DAG: #define __IMAGE_SUPPORT__ 1 +// CHECK-SPIRV32-DAG: #define __SPIRV__ 1 +// CHECK-SPIRV32-DAG: #define __SPIRV32__ 1 +// CHECK-SPIRV32-NOT: #define __SPIRV64__ 1 + +// RUN: %clang_cc1 %s -E -dM -o - -x cl -triple spirv64-unknown-unknown \ +// RUN: | FileCheck -match-full-lines %s --check-prefix=CHECK-SPIRV64 +// CHECK-SPIRV64-DAG: #define __IMAGE_SUPPORT__ 1 +// CHECK-SPIRV64-DAG: #define __SPIRV__ 1 +// CHECK-SPIRV64-DAG: #define __SPIRV64__ 1 +// CHECK-SPIRV64-NOT: #define __SPIRV32__ 1 + +// RUN: %clang_cc1 %s -E -dM -o - -x hip -triple amdgcn-amd-amdhsa \ // RUN: | FileCheck -match-full-lines %s --check-prefix=CHECK-HIP // CHECK-HIP-NOT: #define __CUDA_ARCH__ // CHECK-HIP: #define __HIPCC__ 1 diff --git a/clang/test/Sema/aix-attr-align.c b/clang/test/Sema/aix-attr-align.c index ac70aab669004..0fd6af4ee4c13 100644 --- a/clang/test/Sema/aix-attr-align.c +++ b/clang/test/Sema/aix-attr-align.c @@ -10,11 +10,11 @@ struct S { }; struct T { - int a[4] __attribute__((aligned(16))); // expected-warning {{requesting an alignment of 16 bytes or greater for struct members is not binary compatible with AIX XL 16.1 and older}} + int a[4] __attribute__((aligned(16))); // expected-warning {{requesting an alignment of 16 bytes or greater for struct members is not binary compatible with IBM XL C/C++ for AIX 16.1.0 and older}} }; struct U { - int a[2] __attribute__((aligned(32))); // expected-warning {{requesting an alignment of 16 bytes or greater for struct members is not binary compatible with AIX XL 16.1 and older}} + int a[2] __attribute__((aligned(32))); // expected-warning {{requesting an alignment of 16 bytes or greater for struct members is not binary compatible with IBM XL C/C++ for AIX 16.1.0 and older}} }; int a[8] __attribute__((aligned(8))); // no-warning diff --git a/clang/test/Sema/attr-btf_type_tag.c b/clang/test/Sema/attr-btf_type_tag.c new file mode 100644 index 0000000000000..b40d0e4f88107 --- /dev/null +++ b/clang/test/Sema/attr-btf_type_tag.c @@ -0,0 +1,25 @@ +// RUN: %clang_cc1 -x c -triple x86_64-pc-linux-gnu -dwarf-version=4 -fsyntax-only -verify %s + +#define __tag1 __attribute__((btf_type_tag("tag1"))) +#define __tag2 __attribute__((btf_type_tag("tag2"))) +#define __tag3 __attribute__((btf_type_tag("tag3"))) +#define __tag4 __attribute__((btf_type_tag("tag4"))) +#define __tag5 __attribute__((btf_type_tag("tag5"))) +#define __tag6 __attribute__((btf_type_tag("tag6"))) + +int __attribute__((btf_type_tag("tag1", "tag2"))) *invalid1; // expected-error {{'btf_type_tag' attribute takes one argument}} +int __attribute__((btf_type_tag(2))) *invalid2; // expected-error {{'btf_type_tag' attribute requires a string}} + +int * __tag1 __tag2 * __tag3 __tag4 * __tag5 __tag6 *g; + +typedef void __fn_t(int); +typedef __fn_t __tag1 __tag2 * __tag3 __tag4 *__fn2_t; +struct t { + int __tag1 * __tag2 * __tag3 *a; + int __tag1 __tag2 __tag3 *b; + __fn2_t c; + long d; +}; +int __tag4 * __tag5 * __tag6 *foo1(struct t __tag1 * __tag2 * __tag3 *a1) { + return (int __tag4 * __tag5 * __tag6 *)a1[0][0]->d; +} diff --git a/clang/test/Sema/builtins-reduction-math.c b/clang/test/Sema/builtins-reduction-math.c new file mode 100644 index 0000000000000..0d1aecaa99c30 --- /dev/null +++ b/clang/test/Sema/builtins-reduction-math.c @@ -0,0 +1,37 @@ +// RUN: %clang_cc1 %s -pedantic -verify -triple=x86_64-apple-darwin9 + +typedef float float4 __attribute__((ext_vector_type(4))); +typedef int int3 __attribute__((ext_vector_type(3))); +typedef unsigned unsigned4 __attribute__((ext_vector_type(4))); + +struct Foo { + char *p; +}; + +void test_builtin_reduce_max(int i, float4 v, int3 iv) { + struct Foo s = __builtin_reduce_max(iv); + // expected-error@-1 {{initializing 'struct Foo' with an expression of incompatible type 'int'}} + + i = __builtin_reduce_max(v, v); + // expected-error@-1 {{too many arguments to function call, expected 1, have 2}} + + i = __builtin_reduce_max(); + // expected-error@-1 {{too few arguments to function call, expected 1, have 0}} + + i = __builtin_reduce_max(i); + // expected-error@-1 {{1st argument must be a vector type (was 'int')}} +} + +void test_builtin_reduce_min(int i, float4 v, int3 iv) { + struct Foo s = __builtin_reduce_min(iv); + // expected-error@-1 {{initializing 'struct Foo' with an expression of incompatible type 'int'}} + + i = __builtin_reduce_min(v, v); + // expected-error@-1 {{too many arguments to function call, expected 1, have 2}} + + i = __builtin_reduce_min(); + // expected-error@-1 {{too few arguments to function call, expected 1, have 0}} + + i = __builtin_reduce_min(i); + // expected-error@-1 {{1st argument must be a vector type (was 'int')}} +} diff --git a/clang/test/Sema/warn-fortify-scanf.c b/clang/test/Sema/warn-fortify-scanf.c new file mode 100644 index 0000000000000..ec6e9220a7a3f --- /dev/null +++ b/clang/test/Sema/warn-fortify-scanf.c @@ -0,0 +1,68 @@ +// RUN: %clang_cc1 -triple x86_64-apple-macosx10.14.0 %s -verify + +typedef struct _FILE FILE; +extern int scanf(const char *format, ...); +extern int fscanf(FILE *f, const char *format, ...); +extern int sscanf(const char *input, const char *format, ...); + +void call_scanf() { + char buf10[10]; + char buf20[20]; + char buf30[30]; + scanf("%4s %5s %10s", buf20, buf30, buf10); // expected-warning {{'scanf' may overflow; destination buffer in argument 4 has size 10, but the corresponding specifier may require size 11}} + scanf("%4s %5s %11s", buf20, buf30, buf10); // expected-warning {{'scanf' may overflow; destination buffer in argument 4 has size 10, but the corresponding specifier may require size 12}} + scanf("%4s %5s %9s", buf20, buf30, buf10); + scanf("%20s %5s %9s", buf20, buf30, buf10); // expected-warning {{'scanf' may overflow; destination buffer in argument 2 has size 20, but the corresponding specifier may require size 21}} + scanf("%21s %5s %9s", buf20, buf30, buf10); // expected-warning {{'scanf' may overflow; destination buffer in argument 2 has size 20, but the corresponding specifier may require size 22}} + scanf("%19s %5s %9s", buf20, buf30, buf10); + scanf("%19s %29s %9s", buf20, buf30, buf10); + + scanf("%*21s %*30s %10s", buf10); // expected-warning {{'scanf' may overflow; destination buffer in argument 2 has size 10, but the corresponding specifier may require size 11}} + scanf("%*21s %5s", buf10); + scanf("%10s %*30s", buf10); // expected-warning {{'scanf' may overflow; destination buffer in argument 2 has size 10, but the corresponding specifier may require size 11}} + scanf("%9s %*30s", buf10); + + scanf("%4[a] %5[a] %10[a]", buf20, buf30, buf10); // expected-warning {{'scanf' may overflow; destination buffer in argument 4 has size 10, but the corresponding specifier may require size 11}} + scanf("%4[a] %5[a] %11[a]", buf20, buf30, buf10); // expected-warning {{'scanf' may overflow; destination buffer in argument 4 has size 10, but the corresponding specifier may require size 12}} + scanf("%4[a] %5[a] %9[a]", buf20, buf30, buf10); + scanf("%20[a] %5[a] %9[a]", buf20, buf30, buf10); // expected-warning {{'scanf' may overflow; destination buffer in argument 2 has size 20, but the corresponding specifier may require size 21}} + scanf("%21[a] %5[a] %9[a]", buf20, buf30, buf10); // expected-warning {{'scanf' may overflow; destination buffer in argument 2 has size 20, but the corresponding specifier may require size 22}} + scanf("%19[a] %5[a] %9[a]", buf20, buf30, buf10); + scanf("%19[a] %29[a] %9[a]", buf20, buf30, buf10); + + scanf("%4c %5c %10c", buf20, buf30, buf10); + scanf("%4c %5c %11c", buf20, buf30, buf10); // expected-warning {{'scanf' may overflow; destination buffer in argument 4 has size 10, but the corresponding specifier may require size 11}} + scanf("%4c %5c %9c", buf20, buf30, buf10); + scanf("%20c %5c %9c", buf20, buf30, buf10); + scanf("%21c %5c %9c", buf20, buf30, buf10); // expected-warning {{'scanf' may overflow; destination buffer in argument 2 has size 20, but the corresponding specifier may require size 21}} + + // Don't warn for other specifiers. + int x; + scanf("%12d", &x); +} + +void call_sscanf() { + char buf10[10]; + char buf20[20]; + char buf30[30]; + sscanf("a b c", "%4s %5s %10s", buf20, buf30, buf10); // expected-warning {{'sscanf' may overflow; destination buffer in argument 5 has size 10, but the corresponding specifier may require size 11}} + sscanf("a b c", "%4s %5s %11s", buf20, buf30, buf10); // expected-warning {{'sscanf' may overflow; destination buffer in argument 5 has size 10, but the corresponding specifier may require size 12}} + sscanf("a b c", "%4s %5s %9s", buf20, buf30, buf10); + sscanf("a b c", "%20s %5s %9s", buf20, buf30, buf10); // expected-warning {{'sscanf' may overflow; destination buffer in argument 3 has size 20, but the corresponding specifier may require size 21}} + sscanf("a b c", "%21s %5s %9s", buf20, buf30, buf10); // expected-warning {{'sscanf' may overflow; destination buffer in argument 3 has size 20, but the corresponding specifier may require size 22}} + sscanf("a b c", "%19s %5s %9s", buf20, buf30, buf10); + sscanf("a b c", "%19s %29s %9s", buf20, buf30, buf10); +} + +void call_fscanf() { + char buf10[10]; + char buf20[20]; + char buf30[30]; + fscanf(0, "%4s %5s %10s", buf20, buf30, buf10); // expected-warning {{'fscanf' may overflow; destination buffer in argument 5 has size 10, but the corresponding specifier may require size 11}} + fscanf(0, "%4s %5s %11s", buf20, buf30, buf10); // expected-warning {{'fscanf' may overflow; destination buffer in argument 5 has size 10, but the corresponding specifier may require size 12}} + fscanf(0, "%4s %5s %9s", buf20, buf30, buf10); + fscanf(0, "%20s %5s %9s", buf20, buf30, buf10); // expected-warning {{'fscanf' may overflow; destination buffer in argument 3 has size 20, but the corresponding specifier may require size 21}} + fscanf(0, "%21s %5s %9s", buf20, buf30, buf10); // expected-warning {{'fscanf' may overflow; destination buffer in argument 3 has size 20, but the corresponding specifier may require size 22}} + fscanf(0, "%19s %5s %9s", buf20, buf30, buf10); + fscanf(0, "%19s %29s %9s", buf20, buf30, buf10); +} diff --git a/clang/test/Sema/x86-no-x87.cpp b/clang/test/Sema/x86-no-x87.cpp new file mode 100644 index 0000000000000..112f6bff7e1c8 --- /dev/null +++ b/clang/test/Sema/x86-no-x87.cpp @@ -0,0 +1,164 @@ +// RUN: %clang_cc1 -fsyntax-only -verify %s -triple i686-linux-gnu -target-feature -x87 -DRET_ERROR +// RUN: %clang_cc1 -fsyntax-only -verify %s -triple i686-linux-gnu -DNOERROR + +#ifdef NOERROR +// expected-no-diagnostics +#endif + +typedef long double long_double; + +// Declaration is fine, unless it is called or defined. +double decl(long_double x, long_double y); + +template +T decl_ld_del(T); + +// No code is generated for deleted functions +long_double decl_ld_del(long_double) = delete; +double decl_ld_del(double) = delete; +float decl_ld_del(float) = delete; + +#ifndef NOERROR +// expected-error@+4{{'def' requires 'long_double' (aka 'long double') type support, but target 'i686-unknown-linux-gnu' does not support it}} +// expected-note@+3{{'def' defined here}} +// expected-note@+2{{'x' defined here}} +#endif +int def(long_double x) { +#ifndef NOERROR +// expected-error@+2{{'x' requires 'long_double' (aka 'long double') type support, but target 'i686-unknown-linux-gnu' does not support it}} +#endif + return (int)x; +} + +#ifndef NOERROR +// expected-note@+3{{'ld_args' defined here}} +// expected-note@+2{{'ld_args' defined here}} +#endif +int ld_args(long_double x, long_double y); + +int call1(float x, float y) { +#ifndef NOERROR + // expected-error@+2 2{{'ld_args' requires 'long_double' (aka 'long double') type support, but target 'i686-unknown-linux-gnu' does not support it}} +#endif + return ld_args(x, y); +} + +#ifndef NOERROR +// expected-note@+2{{'ld_ret' defined here}} +#endif +long_double ld_ret(double x, double y); + +int call2(float x, float y) { +#ifndef NOERROR + // expected-error@+2{{'ld_ret' requires 'long_double' (aka 'long double') type support, but target 'i686-unknown-linux-gnu' does not support it}} +#endif + return (int)ld_ret(x, y); +} + +int binop(double x, double y) { +#ifndef NOERROR + // expected-error@+2 2{{expression requires 'long_double' (aka 'long double') type support, but target 'i686-unknown-linux-gnu' does not support it}} +#endif + double z = (long_double)x * (long_double)y; + return (int)z; +} + +void assign1(long_double *ret, double x) { +#ifndef NOERROR + // expected-error@+2{{expression requires 'long_double' (aka 'long double') type support, but target 'i686-unknown-linux-gnu' does not support it}} +#endif + *ret = x; +} + +struct st_long_double1 { +#ifndef NOERROR + // expected-note@+2{{'ld' defined here}} +#endif + long_double ld; +}; + +struct st_long_double2 { +#ifndef NOERROR + // expected-note@+2{{'ld' defined here}} +#endif + long_double ld; +}; + +struct st_long_double3 { +#ifndef NOERROR + // expected-note@+2{{'ld' defined here}} +#endif + long_double ld; +}; + +void assign2() { + struct st_long_double1 st; +#ifndef NOERROR + // expected-error@+3{{expression requires 'long_double' (aka 'long double') type support, but target 'i686-unknown-linux-gnu' does not support it}} + // expected-error@+2{{'ld' requires 'long_double' (aka 'long double') type support, but target 'i686-unknown-linux-gnu' does not support it}} +#endif + st.ld = 0.42; +} + +void assign3() { + struct st_long_double2 st; +#ifndef NOERROR + // expected-error@+3{{expression requires 'long_double' (aka 'long double') type support, but target 'i686-unknown-linux-gnu' does not support it}} + // expected-error@+2{{'ld' requires 'long_double' (aka 'long double') type support, but target 'i686-unknown-linux-gnu' does not support it}} +#endif + st.ld = 42; +} + +void assign4(double d) { + struct st_long_double3 st; +#ifndef NOERROR + // expected-error@+3{{expression requires 'long_double' (aka 'long double') type support, but target 'i686-unknown-linux-gnu' does not support it}} + // expected-error@+2{{'ld' requires 'long_double' (aka 'long double') type support, but target 'i686-unknown-linux-gnu' does not support it}} +#endif + st.ld = d; +} + +void assign5() { + // unused variable declaration is fine + long_double ld = 0.42; +} + +#ifndef NOERROR +// expected-note@+3{{'d_ret1' defined here}} +// expected-error@+2{{'d_ret1' requires 'double' return type support, but target 'i686-unknown-linux-gnu' does not support it}} +#endif +double d_ret1(float x) { + return 0.0; +} + +#ifndef NOERROR +// expected-note@+2{{'d_ret2' defined here}} +#endif +double d_ret2(float x); + +int d_ret3(float x) { +#ifndef NOERROR + // expected-error@+2{{'d_ret2' requires 'double' return type support, but target 'i686-unknown-linux-gnu' does not support it}} +#endif + return (int)d_ret2(x); +} + +#ifndef NOERROR +// expected-note@+3{{'f_ret1' defined here}} +// expected-error@+2{{'f_ret1' requires 'float' return type support, but target 'i686-unknown-linux-gnu' does not support it}} +#endif +float f_ret1(float x) { + return 0.0f; +} + +#ifndef NOERROR +// expected-note@+2{{'f_ret2' defined here}} +#endif +float f_ret2(float x); + +int f_ret3(float x) { +#ifndef NOERROR + // expected-error@+2{{'f_ret2' requires 'float' return type support, but target 'i686-unknown-linux-gnu' does not support it}} +#endif + return (int)f_ret2(x); +} diff --git a/clang/test/Sema/x86_64-no-x87.cpp b/clang/test/Sema/x86_64-no-x87.cpp new file mode 100644 index 0000000000000..b47e69e4b350c --- /dev/null +++ b/clang/test/Sema/x86_64-no-x87.cpp @@ -0,0 +1,145 @@ +// RUN: %clang_cc1 -fsyntax-only -verify %s -triple x86_64-linux-gnu -target-feature -x87 +// RUN: %clang_cc1 -fsyntax-only -verify %s -triple x86_64-linux-gnu -DNOERROR + +#ifdef NOERROR +// expected-no-diagnostics +#endif + +typedef long double long_double; + +// Declaration is fine, unless it is called or defined. +double decl(long_double x, long_double y); + +template +T decl_ld_del(T); + +// No code is generated for deleted functions +long_double decl_ld_del(long_double) = delete; +double decl_ld_del(double) = delete; +float decl_ld_del(float) = delete; + +#ifndef NOERROR +// expected-error@+4{{'def' requires 'long_double' (aka 'long double') type support, but target 'x86_64-unknown-linux-gnu' does not support it}} +// expected-note@+3{{'def' defined here}} +// expected-note@+2{{'x' defined here}} +#endif +int def(long_double x) { +#ifndef NOERROR +// expected-error@+2{{'x' requires 'long_double' (aka 'long double') type support, but target 'x86_64-unknown-linux-gnu' does not support it}} +#endif + return (int)x; +} + +#ifndef NOERROR +// expected-note@+3{{'ld_args' defined here}} +// expected-note@+2{{'ld_args' defined here}} +#endif +int ld_args(long_double x, long_double y); + +int call1(float x, float y) { +#ifndef NOERROR + // expected-error@+2 2{{'ld_args' requires 'long_double' (aka 'long double') type support, but target 'x86_64-unknown-linux-gnu' does not support it}} +#endif + return ld_args(x, y); +} + +#ifndef NOERROR +// expected-note@+2{{'ld_ret' defined here}} +#endif +long_double ld_ret(double x, double y); + +int call2(float x, float y) { +#ifndef NOERROR + // expected-error@+2{{'ld_ret' requires 'long_double' (aka 'long double') type support, but target 'x86_64-unknown-linux-gnu' does not support it}} +#endif + return (int)ld_ret(x, y); +} + +int binop(double x, double y) { +#ifndef NOERROR + // expected-error@+2 2{{expression requires 'long_double' (aka 'long double') type support, but target 'x86_64-unknown-linux-gnu' does not support it}} +#endif + double z = (long_double)x * (long_double)y; + return (int)z; +} + +void assign1(long_double *ret, double x) { +#ifndef NOERROR + // expected-error@+2{{expression requires 'long_double' (aka 'long double') type support, but target 'x86_64-unknown-linux-gnu' does not support it}} +#endif + *ret = x; +} + +struct st_long_double1 { +#ifndef NOERROR + // expected-note@+2{{'ld' defined here}} +#endif + long_double ld; +}; + +struct st_long_double2 { +#ifndef NOERROR + // expected-note@+2{{'ld' defined here}} +#endif + long_double ld; +}; + +struct st_long_double3 { +#ifndef NOERROR + // expected-note@+2{{'ld' defined here}} +#endif + long_double ld; +}; + +void assign2() { + struct st_long_double1 st; +#ifndef NOERROR + // expected-error@+3{{expression requires 'long_double' (aka 'long double') type support, but target 'x86_64-unknown-linux-gnu' does not support it}} + // expected-error@+2{{'ld' requires 'long_double' (aka 'long double') type support, but target 'x86_64-unknown-linux-gnu' does not support it}} +#endif + st.ld = 0.42; +} + +void assign3() { + struct st_long_double2 st; +#ifndef NOERROR + // expected-error@+3{{expression requires 'long_double' (aka 'long double') type support, but target 'x86_64-unknown-linux-gnu' does not support it}} + // expected-error@+2{{'ld' requires 'long_double' (aka 'long double') type support, but target 'x86_64-unknown-linux-gnu' does not support it}} +#endif + st.ld = 42; +} + +void assign4(double d) { + struct st_long_double3 st; +#ifndef NOERROR + // expected-error@+3{{expression requires 'long_double' (aka 'long double') type support, but target 'x86_64-unknown-linux-gnu' does not support it}} + // expected-error@+2{{'ld' requires 'long_double' (aka 'long double') type support, but target 'x86_64-unknown-linux-gnu' does not support it}} +#endif + st.ld = d; +} + +void assign5() { + // unused variable declaration is fine + long_double ld = 0.42; +} + +// Double and Float return type on x86_64 do not use x87 registers +double d_ret1(float x) { + return 0.0; +} + +double d_ret2(float x); + +int d_ret3(float x) { + return (int)d_ret2(x); +} + +float f_ret1(float x) { + return 0.0f; +} + +float f_ret2(float x); + +int f_ret3(float x) { + return (int)f_ret2(x); +} diff --git a/clang/test/SemaCXX/Inputs/std-coroutine-exp-namespace.h b/clang/test/SemaCXX/Inputs/std-coroutine-exp-namespace.h new file mode 100644 index 0000000000000..27702b516fafc --- /dev/null +++ b/clang/test/SemaCXX/Inputs/std-coroutine-exp-namespace.h @@ -0,0 +1,35 @@ +// RUN: %clang_cc1 -triple x86_64-apple-darwin9 %s -std=c++14 -fcoroutines-ts -fsyntax-only -Wignored-qualifiers -Wno-error=return-type -verify -fblocks -Wno-unreachable-code -Wno-unused-value +#ifndef STD_COROUTINE_EXPERIMENTAL_H +#define STD_COROUTINE_EXPERIMENTAL_H + +namespace std { +namespace experimental { +template +struct coroutine_traits { using promise_type = typename Ret::promise_type; }; + +template +struct coroutine_handle { + static coroutine_handle from_address(void *) noexcept; +}; +template <> +struct coroutine_handle { + template + coroutine_handle(coroutine_handle) noexcept; + static coroutine_handle from_address(void *); +}; + +struct suspend_always { + bool await_ready() noexcept { return false; } + void await_suspend(coroutine_handle<>) noexcept {} + void await_resume() noexcept {} +}; + +struct suspend_never { + bool await_ready() noexcept { return true; } + void await_suspend(coroutine_handle<>) noexcept {} + void await_resume() noexcept {} +}; +} // namespace experimental +} // namespace std + +#endif // STD_COROUTINE_EXPERIMENTAL_H diff --git a/clang/test/SemaCXX/Inputs/std-coroutine.h b/clang/test/SemaCXX/Inputs/std-coroutine.h index e9af21aa51945..9809cd7cde7d8 100644 --- a/clang/test/SemaCXX/Inputs/std-coroutine.h +++ b/clang/test/SemaCXX/Inputs/std-coroutine.h @@ -1,9 +1,8 @@ -// RUN: %clang_cc1 -triple x86_64-apple-darwin9 %s -std=c++14 -fcoroutines-ts -fsyntax-only -Wignored-qualifiers -Wno-error=return-type -verify -fblocks -Wno-unreachable-code -Wno-unused-value +// RUN: %clang_cc1 -triple x86_64-apple-darwin9 %s -std=c++20 -fsyntax-only -Wignored-qualifiers -Wno-error=return-type -verify -fblocks -Wno-unreachable-code -Wno-unused-value #ifndef STD_COROUTINE_H #define STD_COROUTINE_H namespace std { -namespace experimental { template struct coroutine_traits { using promise_type = typename Ret::promise_type; }; @@ -31,7 +30,6 @@ struct suspend_never { void await_resume() noexcept {} }; -} // namespace experimental } // namespace std #endif // STD_COROUTINE_H diff --git a/clang/test/SemaCXX/co_await-range-for-exp-namespace.cpp b/clang/test/SemaCXX/co_await-range-for-exp-namespace.cpp new file mode 100644 index 0000000000000..daf78b02ca87a --- /dev/null +++ b/clang/test/SemaCXX/co_await-range-for-exp-namespace.cpp @@ -0,0 +1,171 @@ +// RUN: %clang_cc1 -triple x86_64-apple-darwin9 %s -std=c++14 -fcoroutines-ts \ +// RUN: -fsyntax-only -Wignored-qualifiers -Wno-error=return-type -verify \ +// RUN: -fblocks +#include "Inputs/std-coroutine-exp-namespace.h" + +using namespace std::experimental; + +template +struct Awaiter { + bool await_ready(); + void await_suspend(coroutine_handle<>); + Begin await_resume(); +}; + +template struct BeginTag { BeginTag() = delete; }; +template struct IncTag { IncTag() = delete; }; + +template +struct CoawaitTag { CoawaitTag() = delete; }; + +template +struct Iter { + using value_type = T; + using reference = T &; + using pointer = T *; + + IncTag operator++(); + reference operator*(); + pointer operator->(); +}; +template bool operator==(Iter, Iter); +template bool operator!=(Iter, Iter); + +template +struct Range { + BeginTag> begin(); + Iter end(); +}; + +struct MyForLoopArrayAwaiter { + struct promise_type { + MyForLoopArrayAwaiter get_return_object() { return {}; } + void return_void(); + void unhandled_exception(); + suspend_never initial_suspend(); + suspend_never final_suspend() noexcept; + template + Awaiter await_transform(T *) = delete; // expected-note {{explicitly deleted}} + }; +}; +MyForLoopArrayAwaiter g() { + int arr[10] = {0}; + for + co_await(auto i : arr) {} + // expected-error@-1 {{call to deleted member function 'await_transform'}} + // expected-note@-2 {{'await_transform' implicitly required by 'co_await' here}} +} + +struct ForLoopAwaiterBadBeginTransform { + struct promise_type { + ForLoopAwaiterBadBeginTransform get_return_object(); + void return_void(); + void unhandled_exception(); + suspend_never initial_suspend(); + suspend_never final_suspend() noexcept; + + template + Awaiter await_transform(BeginTag) = delete; // expected-note 1+ {{explicitly deleted}} + + template + CoawaitTag await_transform(IncTag); // expected-note 1+ {{candidate}} + }; +}; +ForLoopAwaiterBadBeginTransform bad_begin() { + Range R; + for + co_await(auto i : R) {} + // expected-error@-1 {{call to deleted member function 'await_transform'}} + // expected-note@-2 {{'await_transform' implicitly required by 'co_await' here}} +} +template +ForLoopAwaiterBadBeginTransform bad_begin_template(Dummy) { + Range R; + for + co_await(auto i : R) {} + // expected-error@-1 {{call to deleted member function 'await_transform'}} + // expected-note@-2 {{'await_transform' implicitly required by 'co_await' here}} +} +template ForLoopAwaiterBadBeginTransform bad_begin_template(int); // expected-note {{requested here}} + +template +Awaiter operator co_await(CoawaitTag) = delete; +// expected-note@-1 1+ {{explicitly deleted}} + +struct ForLoopAwaiterBadIncTransform { + struct promise_type { + ForLoopAwaiterBadIncTransform get_return_object(); + void return_void(); + void unhandled_exception(); + suspend_never initial_suspend(); + suspend_never final_suspend() noexcept; + + template + Awaiter await_transform(BeginTag e); + + template + CoawaitTag await_transform(IncTag); + }; +}; +ForLoopAwaiterBadIncTransform bad_inc_transform() { + Range R; + for + co_await(auto i : R) {} + // expected-error@-1 {{overload resolution selected deleted operator 'co_await'}} + // expected-note@-2 {{in implicit call to 'operator++' for iterator of type 'Range'}} +} + +template +ForLoopAwaiterBadIncTransform bad_inc_transform_template(Dummy) { + Range R; + for + co_await(auto i : R) {} + // expected-error@-1 {{overload resolution selected deleted operator 'co_await'}} + // expected-note@-2 {{in implicit call to 'operator++' for iterator of type 'Range'}} +} +template ForLoopAwaiterBadIncTransform bad_inc_transform_template(long); // expected-note {{requested here}} + +// Ensure we mark and check the function as a coroutine even if it's +// never instantiated. +template +constexpr void never_instant(T) { + static_assert(sizeof(T) != sizeof(T), "function should not be instantiated"); + for + co_await(auto i : foo(T{})) {} + // expected-error@-1 {{'co_await' cannot be used in a constexpr function}} +} + +namespace NS { +struct ForLoopAwaiterCoawaitLookup { + struct promise_type { + ForLoopAwaiterCoawaitLookup get_return_object(); + void return_void(); + void unhandled_exception(); + suspend_never initial_suspend(); + suspend_never final_suspend() noexcept; + template + CoawaitTag await_transform(BeginTag e); + template + Awaiter await_transform(IncTag); + }; +}; +} // namespace NS +using NS::ForLoopAwaiterCoawaitLookup; + +template +ForLoopAwaiterCoawaitLookup test_coawait_lookup(T) { + Range R; + for + co_await(auto i : R) {} + // expected-error@-1 {{no member named 'await_ready' in 'CoawaitTag, false>'}} +} +template ForLoopAwaiterCoawaitLookup test_coawait_lookup(int); // expected-note {{requested here}} + +// FIXME: This test should fail as well since the newly declared operator co_await +// should not be found by lookup. +namespace NS2 { +template +Awaiter operator co_await(CoawaitTag); +} +using NS2::operator co_await; +template ForLoopAwaiterCoawaitLookup test_coawait_lookup(long); diff --git a/clang/test/SemaCXX/co_await-range-for.cpp b/clang/test/SemaCXX/co_await-range-for.cpp index b6c6e6c40f977..e43f55a3434d1 100644 --- a/clang/test/SemaCXX/co_await-range-for.cpp +++ b/clang/test/SemaCXX/co_await-range-for.cpp @@ -1,10 +1,9 @@ -// RUN: %clang_cc1 -triple x86_64-apple-darwin9 %s -std=c++14 -fcoroutines-ts \ +// RUN: %clang_cc1 -triple x86_64-apple-darwin9 %s -std=c++20 \ // RUN: -fsyntax-only -Wignored-qualifiers -Wno-error=return-type -verify \ // RUN: -fblocks #include "Inputs/std-coroutine.h" -using namespace std::experimental; - +using namespace std; template struct Awaiter { diff --git a/clang/test/SemaCXX/coreturn-eh-exp-namespace.cpp b/clang/test/SemaCXX/coreturn-eh-exp-namespace.cpp new file mode 100644 index 0000000000000..d49e23ad31e23 --- /dev/null +++ b/clang/test/SemaCXX/coreturn-eh-exp-namespace.cpp @@ -0,0 +1,47 @@ +// RUN: %clang_cc1 -triple x86_64-apple-darwin9 %s -std=c++14 -fcoroutines-ts -fcxx-exceptions -fsyntax-only -Wignored-qualifiers -Wno-error=return-type -verify -fblocks -Wall -Wextra -Wno-error=unreachable-code +// expected-no-diagnostics + +#include "Inputs/std-coroutine-exp-namespace.h" + +using std::experimental::suspend_always; +using std::experimental::suspend_never; + +struct awaitable { + bool await_ready(); + void await_suspend(std::experimental::coroutine_handle<>); // FIXME: coroutine_handle + void await_resume(); +} a; + +struct object { + ~object() {} +}; + +struct promise_void_return_value { + void get_return_object(); + suspend_always initial_suspend(); + suspend_always final_suspend() noexcept; + void unhandled_exception(); + void return_value(object); +}; + +struct VoidTagReturnValue { + struct promise_type { + VoidTagReturnValue get_return_object(); + suspend_always initial_suspend(); + suspend_always final_suspend() noexcept; + void unhandled_exception(); + void return_value(object); + }; +}; + +template +struct std::experimental::coroutine_traits { using promise_type = promise_void_return_value; }; + +VoidTagReturnValue test() { + object x = {}; + try { + co_return {}; + } catch (...) { + throw; + } +} diff --git a/clang/test/SemaCXX/coreturn-eh.cpp b/clang/test/SemaCXX/coreturn-eh.cpp index 591ab8ec5c5e4..0d409b9b99bb6 100644 --- a/clang/test/SemaCXX/coreturn-eh.cpp +++ b/clang/test/SemaCXX/coreturn-eh.cpp @@ -1,14 +1,14 @@ -// RUN: %clang_cc1 -triple x86_64-apple-darwin9 %s -std=c++14 -fcoroutines-ts -fcxx-exceptions -fsyntax-only -Wignored-qualifiers -Wno-error=return-type -verify -fblocks -Wall -Wextra -Wno-error=unreachable-code +// RUN: %clang_cc1 -triple x86_64-apple-darwin9 %s -std=c++20 -fcxx-exceptions -fsyntax-only -Wignored-qualifiers -Wno-error=return-type -verify -fblocks -Wall -Wextra -Wno-error=unreachable-code // expected-no-diagnostics #include "Inputs/std-coroutine.h" -using std::experimental::suspend_always; -using std::experimental::suspend_never; +using std::suspend_always; +using std::suspend_never; struct awaitable { bool await_ready(); - void await_suspend(std::experimental::coroutine_handle<>); // FIXME: coroutine_handle + void await_suspend(std::coroutine_handle<>); // FIXME: coroutine_handle void await_resume(); } a; @@ -33,7 +33,7 @@ struct VoidTagReturnValue { }; template -struct std::experimental::coroutine_traits { using promise_type = promise_void_return_value; }; +struct std::coroutine_traits { using promise_type = promise_void_return_value; }; VoidTagReturnValue test() { object x = {}; diff --git a/clang/test/SemaCXX/coreturn-exp-namespace.cpp b/clang/test/SemaCXX/coreturn-exp-namespace.cpp new file mode 100644 index 0000000000000..c7fe212daed35 --- /dev/null +++ b/clang/test/SemaCXX/coreturn-exp-namespace.cpp @@ -0,0 +1,140 @@ +// RUN: %clang_cc1 -triple x86_64-apple-darwin9 %s -std=c++14 -fcoroutines-ts -fsyntax-only -Wignored-qualifiers -Wno-error=return-type -verify -fblocks -Wall -Wextra -Wno-error=unreachable-code +#include "Inputs/std-coroutine-exp-namespace.h" + +using std::experimental::suspend_always; +using std::experimental::suspend_never; + +struct awaitable { + bool await_ready(); + void await_suspend(std::experimental::coroutine_handle<>); // FIXME: coroutine_handle + void await_resume(); +} a; + +struct promise_void { + void get_return_object(); + suspend_always initial_suspend(); + suspend_always final_suspend() noexcept; + void return_void(); + void unhandled_exception(); +}; + +struct promise_void_return_value { + void get_return_object(); + suspend_always initial_suspend(); + suspend_always final_suspend() noexcept; + void unhandled_exception(); + void return_value(int); +}; + +struct VoidTagNoReturn { + struct promise_type { + VoidTagNoReturn get_return_object(); + suspend_always initial_suspend(); + suspend_always final_suspend() noexcept; + void unhandled_exception(); + }; +}; + +struct VoidTagReturnValue { + struct promise_type { + VoidTagReturnValue get_return_object(); + suspend_always initial_suspend(); + suspend_always final_suspend() noexcept; + void unhandled_exception(); + void return_value(int); + }; +}; + +struct VoidTagReturnVoid { + struct promise_type { + VoidTagReturnVoid get_return_object(); + suspend_always initial_suspend(); + suspend_always final_suspend() noexcept; + void unhandled_exception(); + void return_void(); + }; +}; + +struct promise_float { + float get_return_object(); + suspend_always initial_suspend(); + suspend_always final_suspend() noexcept; + void return_void(); + void unhandled_exception(); +}; + +struct promise_int { + int get_return_object(); + suspend_always initial_suspend(); + suspend_always final_suspend() noexcept; + void return_value(int); + void unhandled_exception(); +}; + +template <> +struct std::experimental::coroutine_traits { using promise_type = promise_void; }; + +template +struct std::experimental::coroutine_traits { using promise_type = promise_void_return_value; }; + +template +struct std::experimental::coroutine_traits { using promise_type = promise_float; }; + +template +struct std::experimental::coroutine_traits { using promise_type = promise_int; }; + +void test0() { co_await a; } +float test1() { co_await a; } + +int test2() { + co_await a; +} // expected-warning {{non-void coroutine does not return a value}} + +int test2a(bool b) { + if (b) + co_return 42; +} // expected-warning {{non-void coroutine does not return a value in all control paths}} + +int test3() { + co_await a; +b: + goto b; +} + +int test4() { + co_return 42; +} + +void test5(int) { + co_await a; +} // expected-warning {{non-void coroutine does not return a value}} + +void test6(int x) { + if (x) + co_return 42; +} // expected-warning {{non-void coroutine does not return a value in all control paths}} + +void test7(int y) { + if (y) + co_return 42; + else + co_return 101; +} + +VoidTagReturnVoid test8() { + co_await a; +} + +VoidTagReturnVoid test9(bool b) { + if (b) + co_return; +} + +VoidTagReturnValue test10() { + co_await a; +} // expected-warning {{non-void coroutine does not return a value}} + +VoidTagReturnValue test11(bool b) { + if (b) + co_return 42; +} // expected-warning {{non-void coroutine does not return a value in all control paths}} diff --git a/clang/test/SemaCXX/coreturn.cpp b/clang/test/SemaCXX/coreturn.cpp index eaa462016de55..7069a1040db23 100644 --- a/clang/test/SemaCXX/coreturn.cpp +++ b/clang/test/SemaCXX/coreturn.cpp @@ -1,12 +1,12 @@ -// RUN: %clang_cc1 -triple x86_64-apple-darwin9 %s -std=c++14 -fcoroutines-ts -fsyntax-only -Wignored-qualifiers -Wno-error=return-type -verify -fblocks -Wall -Wextra -Wno-error=unreachable-code +// RUN: %clang_cc1 -triple x86_64-apple-darwin9 %s -std=c++20 -fsyntax-only -Wignored-qualifiers -Wno-error=return-type -verify -fblocks -Wall -Wextra -Wno-error=unreachable-code #include "Inputs/std-coroutine.h" -using std::experimental::suspend_always; -using std::experimental::suspend_never; +using std::suspend_always; +using std::suspend_never; struct awaitable { bool await_ready(); - void await_suspend(std::experimental::coroutine_handle<>); // FIXME: coroutine_handle + void await_suspend(std::coroutine_handle<>); // FIXME: coroutine_handle void await_resume(); } a; @@ -72,16 +72,16 @@ struct promise_int { }; template <> -struct std::experimental::coroutine_traits { using promise_type = promise_void; }; +struct std::coroutine_traits { using promise_type = promise_void; }; template -struct std::experimental::coroutine_traits { using promise_type = promise_void_return_value; }; +struct std::coroutine_traits { using promise_type = promise_void_return_value; }; template -struct std::experimental::coroutine_traits { using promise_type = promise_float; }; +struct std::coroutine_traits { using promise_type = promise_float; }; template -struct std::experimental::coroutine_traits { using promise_type = promise_int; }; +struct std::coroutine_traits { using promise_type = promise_int; }; void test0() { co_await a; } float test1() { co_await a; } diff --git a/clang/test/SemaCXX/coroutine-builtins.cpp b/clang/test/SemaCXX/coroutine-builtins.cpp index 927faf6974b9b..7fa1b49172c2f 100644 --- a/clang/test/SemaCXX/coroutine-builtins.cpp +++ b/clang/test/SemaCXX/coroutine-builtins.cpp @@ -1,7 +1,8 @@ // RUN: %clang_cc1 -fsyntax-only -verify -fcoroutines-ts %s +// RUN: %clang_cc1 -fsyntax-only -verify -std=c++20 %s // RUN: %clang_cc1 -fsyntax-only -verify -DERRORS %s -// Check that we don't crash when using __builtin_coro_* without the fcoroutine-ts option +// Check that we don't crash when using __builtin_coro_* without the fcoroutine-ts or -std=c++20 option #ifdef ERRORS // expected-error@#A{{use of undeclared identifier '__builtin_coro_done'}} diff --git a/clang/test/SemaCXX/coroutine-final-suspend-noexcept-exp-namespace.cpp b/clang/test/SemaCXX/coroutine-final-suspend-noexcept-exp-namespace.cpp new file mode 100644 index 0000000000000..92bb91be93a17 --- /dev/null +++ b/clang/test/SemaCXX/coroutine-final-suspend-noexcept-exp-namespace.cpp @@ -0,0 +1,70 @@ +// This file contains references to sections of the Coroutines TS, which can be +// found at http://wg21.link/coroutines. + +// RUN: %clang_cc1 -std=c++14 -fcoroutines-ts -verify %s -fcxx-exceptions -fexceptions -Wunused-result + +namespace std { +namespace experimental { +template +struct coroutine_traits { using promise_type = typename Ret::promise_type; }; + +template +struct coroutine_handle { + static coroutine_handle from_address(void *); // expected-note 2 {{must be declared with 'noexcept'}} +}; +template <> +struct coroutine_handle { + template + coroutine_handle(coroutine_handle); // expected-note 2 {{must be declared with 'noexcept'}} +}; + +struct suspend_never { + bool await_ready() { return true; } // expected-note 2 {{must be declared with 'noexcept'}} + void await_suspend(coroutine_handle<>) {} // expected-note 2 {{must be declared with 'noexcept'}} + void await_resume() {} // expected-note 2 {{must be declared with 'noexcept'}} + ~suspend_never() noexcept(false); // expected-note 2 {{must be declared with 'noexcept'}} +}; + +struct suspend_always { + bool await_ready() { return false; } + void await_suspend(coroutine_handle<>) {} + void await_resume() {} + suspend_never operator co_await(); // expected-note 2 {{must be declared with 'noexcept'}} + ~suspend_always() noexcept(false); // expected-note 2 {{must be declared with 'noexcept'}} +}; +} // namespace experimental +} // namespace std + +using namespace std; + +struct A { + bool await_ready(); + void await_resume(); + template + void await_suspend(F); +}; + +struct coro_t { + struct promise_type { + coro_t get_return_object(); + std::experimental::suspend_never initial_suspend(); + std::experimental::suspend_always final_suspend(); // expected-note 2 {{must be declared with 'noexcept'}} + void return_void(); + static void unhandled_exception(); + }; +}; + +coro_t f(int n) { // expected-error {{the expression 'co_await __promise.final_suspend()' is required to be non-throwing}} + A a{}; + co_await a; +} + +template +coro_t f_dep(T n) { // expected-error {{the expression 'co_await __promise.final_suspend()' is required to be non-throwing}} + A a{}; + co_await a; +} + +void foo() { + f_dep(5); // expected-note {{in instantiation of function template specialization 'f_dep' requested here}} +} diff --git a/clang/test/SemaCXX/coroutine-final-suspend-noexcept.cpp b/clang/test/SemaCXX/coroutine-final-suspend-noexcept.cpp index 48c65f8afb951..8635e4156a419 100644 --- a/clang/test/SemaCXX/coroutine-final-suspend-noexcept.cpp +++ b/clang/test/SemaCXX/coroutine-final-suspend-noexcept.cpp @@ -1,10 +1,9 @@ // This file contains references to sections of the Coroutines TS, which can be // found at http://wg21.link/coroutines. -// RUN: %clang_cc1 -std=c++14 -fcoroutines-ts -verify %s -fcxx-exceptions -fexceptions -Wunused-result +// RUN: %clang_cc1 -std=c++20 -verify %s -fcxx-exceptions -fexceptions -Wunused-result namespace std { -namespace experimental { template struct coroutine_traits { using promise_type = typename Ret::promise_type; }; @@ -34,10 +33,9 @@ struct suspend_always { ~suspend_always() noexcept(false); // expected-note 2 {{must be declared with 'noexcept'}} }; -} // namespace experimental } // namespace std -using namespace std::experimental; +using namespace std; struct A { bool await_ready(); diff --git a/clang/test/SemaCXX/coroutine-mixed-exp-namespace.cpp b/clang/test/SemaCXX/coroutine-mixed-exp-namespace.cpp new file mode 100644 index 0000000000000..5d1e687d5bd8f --- /dev/null +++ b/clang/test/SemaCXX/coroutine-mixed-exp-namespace.cpp @@ -0,0 +1,27 @@ +// This file is to test the mixed use of `std::experimental::coroutine*` and `std::coroutine*` +// wouldn't make the compliler to crash and emit the diagnostic message correctly. +// RUN: %clang_cc1 -verify -std=c++20 -fsyntax-only %s + +#include "Inputs/std-coroutine-exp-namespace.h" +#include "Inputs/std-coroutine.h" + +struct my_awaitable { + bool await_ready() noexcept; + void await_suspend(std::experimental::coroutine_handle<> coro) noexcept; + void await_resume() noexcept; +}; + +struct promise_void { + void get_return_object(); + my_awaitable initial_suspend(); + my_awaitable final_suspend() noexcept; + void return_void(); + void unhandled_exception(); +}; + +template <> +struct std::coroutine_traits { using promise_type = promise_void; }; + +void test() { + co_return; // expected-error {{Found mixed use of std namespace and std::experimental namespace for coroutine, which is disallowed. The coroutine components in std::experimental namespace is deprecated. Please use coroutine components under std namespace.}} +} diff --git a/clang/test/SemaCXX/coroutine-mixed2-exp-namespace.cpp b/clang/test/SemaCXX/coroutine-mixed2-exp-namespace.cpp new file mode 100644 index 0000000000000..dcccd2b62e0c1 --- /dev/null +++ b/clang/test/SemaCXX/coroutine-mixed2-exp-namespace.cpp @@ -0,0 +1,28 @@ +// This file is to test the mixed use of `std::experimental::coroutine*` and `std::coroutine*` +// which is similar to coroutine-mixed-exp-namesapce. This file tests the relative order of +// included header wouldn't affect the diagnostic messages. +// RUN: %clang_cc1 -verify -std=c++20 -fsyntax-only %s + +#include "Inputs/std-coroutine-exp-namespace.h" +#include "Inputs/std-coroutine.h" + +struct my_awaitable { + bool await_ready() noexcept; + void await_suspend(std::experimental::coroutine_handle<> coro) noexcept; + void await_resume() noexcept; +}; + +struct promise_void { + void get_return_object(); + my_awaitable initial_suspend(); + my_awaitable final_suspend() noexcept; + void return_void(); + void unhandled_exception(); +}; + +template <> +struct std::coroutine_traits { using promise_type = promise_void; }; + +void test() { + co_return; // expected-error {{Found mixed use of std namespace and std::experimental namespace for coroutine, which is disallowed. The coroutine components in std::experimental namespace is deprecated. Please use coroutine components under std namespace.}} +} diff --git a/clang/test/SemaCXX/coroutine-rvo-exp-namespace.cpp b/clang/test/SemaCXX/coroutine-rvo-exp-namespace.cpp new file mode 100644 index 0000000000000..c65094c2e5130 --- /dev/null +++ b/clang/test/SemaCXX/coroutine-rvo-exp-namespace.cpp @@ -0,0 +1,148 @@ +// RUN: %clang_cc1 -verify -std=c++17 -fcoroutines-ts -fsyntax-only %s + +namespace std::experimental { +template struct coroutine_handle { + coroutine_handle() = default; + static coroutine_handle from_address(void *) noexcept; +}; + +template <> struct coroutine_handle { + static coroutine_handle from_address(void *) noexcept; + coroutine_handle() = default; + template + coroutine_handle(coroutine_handle) noexcept; +}; + +template +struct void_t_imp { + using type = void; +}; +template +using void_t = typename void_t_imp::type; + +template +struct traits_sfinae_base {}; + +template +struct traits_sfinae_base> { + using promise_type = typename T::promise_type; +}; + +template +struct coroutine_traits : public traits_sfinae_base {}; +} // namespace std::experimental + +struct suspend_never { + bool await_ready() noexcept; + void await_suspend(std::experimental::coroutine_handle<>) noexcept; + void await_resume() noexcept; +}; + +struct MoveOnly { + MoveOnly() = default; + MoveOnly(const MoveOnly &) = delete; + MoveOnly(MoveOnly &&) = default; +}; + +struct NoCopyNoMove { + NoCopyNoMove() = default; + NoCopyNoMove(const NoCopyNoMove &) = delete; +}; + +template +struct task { + struct promise_type { + auto initial_suspend() { return suspend_never{}; } + auto final_suspend() noexcept { return suspend_never{}; } + auto get_return_object() { return task{}; } + static void unhandled_exception() {} + void return_value(T &&value) {} // expected-note 4{{passing argument}} + }; +}; + +task local2val() { + NoCopyNoMove value; + co_return value; +} + +task local2ref() { + NoCopyNoMove value; + co_return value; // expected-error {{non-const lvalue reference to type 'NoCopyNoMove' cannot bind to a temporary of type 'NoCopyNoMove'}} +} + +// We need the move constructor for construction of the coroutine. +task param2val(MoveOnly value) { + co_return value; +} + +task lvalue2val(NoCopyNoMove &value) { + co_return value; // expected-error {{rvalue reference to type 'NoCopyNoMove' cannot bind to lvalue of type 'NoCopyNoMove'}} +} + +task rvalue2val(NoCopyNoMove &&value) { + co_return value; +} + +task lvalue2ref(NoCopyNoMove &value) { + co_return value; +} + +task rvalue2ref(NoCopyNoMove &&value) { + co_return value; // expected-error {{non-const lvalue reference to type 'NoCopyNoMove' cannot bind to a temporary of type 'NoCopyNoMove'}} +} + +struct To { + operator MoveOnly() &&; +}; +task conversion_operator() { + To t; + co_return t; +} + +struct Construct { + Construct(MoveOnly); +}; +task converting_constructor() { + MoveOnly w; + co_return w; +} + +struct Derived : MoveOnly {}; +task derived2base() { + Derived result; + co_return result; +} + +struct RetThis { + task foo() && { + co_return *this; // expected-error {{rvalue reference to type 'RetThis' cannot bind to lvalue of type 'RetThis'}} + } +}; + +template +struct is_same { static constexpr bool value = false; }; + +template +struct is_same { static constexpr bool value = true; }; + +template +struct generic_task { + struct promise_type { + auto initial_suspend() { return suspend_never{}; } + auto final_suspend() noexcept { return suspend_never{}; } + auto get_return_object() { return generic_task{}; } + static void unhandled_exception(); + template + void return_value(U &&value) { + static_assert(is_same::value); + } + }; +}; + +generic_task param2template(MoveOnly value) { + co_return value; // We should deduce U = MoveOnly. +} + +generic_task lvalue2template(NoCopyNoMove &value) { + co_return value; // We should deduce U = NoCopyNoMove&. +} diff --git a/clang/test/SemaCXX/coroutine-rvo.cpp b/clang/test/SemaCXX/coroutine-rvo.cpp index 2c4bb0792cea0..6bf1dee67557c 100644 --- a/clang/test/SemaCXX/coroutine-rvo.cpp +++ b/clang/test/SemaCXX/coroutine-rvo.cpp @@ -1,6 +1,6 @@ -// RUN: %clang_cc1 -verify -std=c++17 -fcoroutines-ts -fsyntax-only %s +// RUN: %clang_cc1 -verify -std=c++20 -fsyntax-only %s -namespace std::experimental { +namespace std { template struct coroutine_handle { coroutine_handle() = default; static coroutine_handle from_address(void *) noexcept; @@ -30,11 +30,11 @@ struct traits_sfinae_base> { template struct coroutine_traits : public traits_sfinae_base {}; -} +} // namespace std struct suspend_never { bool await_ready() noexcept; - void await_suspend(std::experimental::coroutine_handle<>) noexcept; + void await_suspend(std::coroutine_handle<>) noexcept; void await_resume() noexcept; }; diff --git a/clang/test/SemaCXX/coroutine-seh-exp-namespace.cpp b/clang/test/SemaCXX/coroutine-seh-exp-namespace.cpp new file mode 100644 index 0000000000000..cc5fce9fae7f6 --- /dev/null +++ b/clang/test/SemaCXX/coroutine-seh-exp-namespace.cpp @@ -0,0 +1,38 @@ +// RUN: %clang_cc1 -std=c++1z -fcoroutines-ts -verify %s -fcxx-exceptions -fexceptions -triple x86_64-windows-msvc -fms-extensions +namespace std::experimental { +template struct coroutine_traits; + +template struct coroutine_handle { + coroutine_handle() = default; + static coroutine_handle from_address(void *) noexcept; +}; +template <> struct coroutine_handle { + static coroutine_handle from_address(void *) noexcept; + coroutine_handle() = default; + template + coroutine_handle(coroutine_handle) noexcept; +}; +} // namespace std::experimental + +struct suspend_always { + bool await_ready() noexcept; + void await_suspend(std::experimental::coroutine_handle<>) noexcept; + void await_resume() noexcept; +}; + +template <> struct std::experimental::coroutine_traits { + struct promise_type { + void get_return_object() noexcept; + suspend_always initial_suspend() noexcept; + suspend_always final_suspend() noexcept; + void return_void() noexcept; + void unhandled_exception() noexcept; + }; +}; + +void SEH_used() { + __try { // expected-error {{cannot use SEH '__try' in a coroutine when C++ exceptions are enabled}} + co_return; // expected-note {{function is a coroutine due to use of 'co_return' here}} + } __except (0) { + } +} diff --git a/clang/test/SemaCXX/coroutine-seh.cpp b/clang/test/SemaCXX/coroutine-seh.cpp index 647bb68b31857..6e778d9988810 100644 --- a/clang/test/SemaCXX/coroutine-seh.cpp +++ b/clang/test/SemaCXX/coroutine-seh.cpp @@ -1,5 +1,5 @@ -// RUN: %clang_cc1 -std=c++1z -fcoroutines-ts -verify %s -fcxx-exceptions -fexceptions -triple x86_64-windows-msvc -fms-extensions -namespace std::experimental { +// RUN: %clang_cc1 -std=c++20 -verify %s -fcxx-exceptions -fexceptions -triple x86_64-windows-msvc -fms-extensions +namespace std { template struct coroutine_traits; template struct coroutine_handle { @@ -12,15 +12,15 @@ template <> struct coroutine_handle { template coroutine_handle(coroutine_handle) noexcept; }; -} +} // namespace std struct suspend_always { bool await_ready() noexcept; - void await_suspend(std::experimental::coroutine_handle<>) noexcept; + void await_suspend(std::coroutine_handle<>) noexcept; void await_resume() noexcept; }; -template <> struct std::experimental::coroutine_traits { +template <> struct std::coroutine_traits { struct promise_type { void get_return_object() noexcept; suspend_always initial_suspend() noexcept; diff --git a/clang/test/SemaCXX/coroutine-traits-undefined-template-exp-namespace.cpp b/clang/test/SemaCXX/coroutine-traits-undefined-template-exp-namespace.cpp new file mode 100644 index 0000000000000..4179d20f44918 --- /dev/null +++ b/clang/test/SemaCXX/coroutine-traits-undefined-template-exp-namespace.cpp @@ -0,0 +1,19 @@ +// This file contains references to sections of the Coroutines TS, which can be +// found at http://wg21.link/coroutines. + +// RUN: %clang_cc1 -std=c++14 -fcoroutines-ts -verify %s -fcxx-exceptions -fexceptions -Wunused-result + +namespace std { +namespace experimental { +template +struct coroutine_traits { + struct promise_type {}; +}; + +template <> struct coroutine_traits; // expected-note {{forward declaration of 'std::experimental::coroutine_traits'}} +} // namespace experimental +} // namespace std + +void uses_forward_declaration() { + co_return; // expected-error {{this function cannot be a coroutine: missing definition of specialization 'coroutine_traits'}} +} diff --git a/clang/test/SemaCXX/coroutine-traits-undefined-template.cpp b/clang/test/SemaCXX/coroutine-traits-undefined-template.cpp index 4687ed245ac84..ea25cea365b94 100644 --- a/clang/test/SemaCXX/coroutine-traits-undefined-template.cpp +++ b/clang/test/SemaCXX/coroutine-traits-undefined-template.cpp @@ -1,18 +1,17 @@ // This file contains references to sections of the Coroutines TS, which can be // found at http://wg21.link/coroutines. -// RUN: %clang_cc1 -std=c++14 -fcoroutines-ts -verify %s -fcxx-exceptions -fexceptions -Wunused-result +// RUN: %clang_cc1 -std=c++20 -verify %s -fcxx-exceptions -fexceptions -Wunused-result namespace std { -namespace experimental { template struct coroutine_traits { struct promise_type {}; }; -template<> struct coroutine_traits; // expected-note {{forward declaration of 'std::experimental::coroutine_traits'}} -}} // namespace std::experimental +template <> struct coroutine_traits; // expected-note {{forward declaration of 'std::coroutine_traits'}} +} // namespace std void uses_forward_declaration() { co_return; // expected-error {{this function cannot be a coroutine: missing definition of specialization 'coroutine_traits'}} diff --git a/clang/test/SemaCXX/coroutine-unhandled_exception-warning-exp-namespace.cpp b/clang/test/SemaCXX/coroutine-unhandled_exception-warning-exp-namespace.cpp new file mode 100644 index 0000000000000..5394ae78d0984 --- /dev/null +++ b/clang/test/SemaCXX/coroutine-unhandled_exception-warning-exp-namespace.cpp @@ -0,0 +1,41 @@ +// RUN: %clang_cc1 -triple x86_64-apple-darwin9 %s -std=c++14 -fcoroutines-ts \ +// RUN: -fsyntax-only -Wignored-qualifiers -Wno-error=return-type -verify \ +// RUN: -fblocks -Wno-unreachable-code -Wno-unused-value + +// RUN: %clang_cc1 -triple x86_64-apple-darwin9 %s -std=c++14 -fcoroutines-ts \ +// RUN: -fsyntax-only -Wignored-qualifiers -Wno-error=return-type -verify \ +// RUN: -fblocks -Wno-unreachable-code -Wno-unused-value \ +// RUN: -DDISABLE_WARNING -Wno-coroutine-missing-unhandled-exception + +#if __has_feature(cxx_exceptions) +#error This test requires exceptions be disabled +#endif + +#include "Inputs/std-coroutine-exp-namespace.h" + +using std::experimental::suspend_always; +using std::experimental::suspend_never; + +#ifndef DISABLE_WARNING +struct promise_void { // expected-note {{defined here}} +#else +struct promise_void { +#endif + void get_return_object(); + suspend_always initial_suspend(); + suspend_always final_suspend() noexcept; + void return_void(); +}; + +template +struct std::experimental::coroutine_traits { using promise_type = promise_void; }; + +#ifndef DISABLE_WARNING +void test0() { // expected-warning {{'promise_void' is required to declare the member 'unhandled_exception()' when exceptions are enabled}} + co_return; +} +#else +void test0() { // expected-no-diagnostics + co_return; +} +#endif diff --git a/clang/test/SemaCXX/coroutine-unhandled_exception-warning.cpp b/clang/test/SemaCXX/coroutine-unhandled_exception-warning.cpp index 88fae2e8acb26..5ea1e5d672442 100644 --- a/clang/test/SemaCXX/coroutine-unhandled_exception-warning.cpp +++ b/clang/test/SemaCXX/coroutine-unhandled_exception-warning.cpp @@ -1,8 +1,8 @@ -// RUN: %clang_cc1 -triple x86_64-apple-darwin9 %s -std=c++14 -fcoroutines-ts \ +// RUN: %clang_cc1 -triple x86_64-apple-darwin9 %s -std=c++20 \ // RUN: -fsyntax-only -Wignored-qualifiers -Wno-error=return-type -verify \ // RUN: -fblocks -Wno-unreachable-code -Wno-unused-value -// RUN: %clang_cc1 -triple x86_64-apple-darwin9 %s -std=c++14 -fcoroutines-ts \ +// RUN: %clang_cc1 -triple x86_64-apple-darwin9 %s -std=c++20 \ // RUN: -fsyntax-only -Wignored-qualifiers -Wno-error=return-type -verify \ // RUN: -fblocks -Wno-unreachable-code -Wno-unused-value \ // RUN: -DDISABLE_WARNING -Wno-coroutine-missing-unhandled-exception @@ -13,8 +13,8 @@ #include "Inputs/std-coroutine.h" -using std::experimental::suspend_always; -using std::experimental::suspend_never; +using std::suspend_always; +using std::suspend_never; #ifndef DISABLE_WARNING struct promise_void { // expected-note {{defined here}} @@ -28,7 +28,7 @@ struct promise_void { }; template -struct std::experimental::coroutine_traits { using promise_type = promise_void; }; +struct std::coroutine_traits { using promise_type = promise_void; }; #ifndef DISABLE_WARNING void test0() { // expected-warning {{'promise_void' is required to declare the member 'unhandled_exception()' when exceptions are enabled}} diff --git a/clang/test/SemaCXX/coroutine-uninitialized-warning-crash-exp-namespace.cpp b/clang/test/SemaCXX/coroutine-uninitialized-warning-crash-exp-namespace.cpp new file mode 100644 index 0000000000000..3374084edda9d --- /dev/null +++ b/clang/test/SemaCXX/coroutine-uninitialized-warning-crash-exp-namespace.cpp @@ -0,0 +1,42 @@ +// RUN: %clang_cc1 -triple x86_64-apple-darwin9 %s -std=c++14 -fcoroutines-ts -fsyntax-only -Wall -Wextra -Wuninitialized -fblocks +#include "Inputs/std-coroutine-exp-namespace.h" + +using namespace std::experimental; + +struct A { + bool await_ready() { return true; } + int await_resume() { return 42; } + template + void await_suspend(F) {} +}; + +struct coro_t { + struct promise_type { + coro_t get_return_object() { return {}; } + suspend_never initial_suspend() { return {}; } + suspend_never final_suspend() noexcept { return {}; } + A yield_value(int) { return {}; } + void return_void() {} + static void unhandled_exception() {} + }; +}; + +coro_t f(int n) { + if (n == 0) + co_return; + co_yield 42; + int x = co_await A{}; +} + +template +coro_t g(int n) { + if (n == 0) + co_return; + co_yield 42; + int x = co_await Await{}; +} + +int main() { + f(0); + g(0); +} diff --git a/clang/test/SemaCXX/coroutine-uninitialized-warning-crash.cpp b/clang/test/SemaCXX/coroutine-uninitialized-warning-crash.cpp index 391f64d37e5bf..8b471206d09fe 100644 --- a/clang/test/SemaCXX/coroutine-uninitialized-warning-crash.cpp +++ b/clang/test/SemaCXX/coroutine-uninitialized-warning-crash.cpp @@ -1,8 +1,7 @@ -// RUN: %clang_cc1 -triple x86_64-apple-darwin9 %s -std=c++14 -fcoroutines-ts -fsyntax-only -Wall -Wextra -Wuninitialized -fblocks +// RUN: %clang_cc1 -triple x86_64-apple-darwin9 %s -std=c++20 -fsyntax-only -Wall -Wextra -Wuninitialized -fblocks #include "Inputs/std-coroutine.h" -using namespace std::experimental; - +using namespace std; struct A { bool await_ready() { return true; } diff --git a/clang/test/SemaCXX/coroutine_handle-addres-return-type.cpp b/clang/test/SemaCXX/coroutine_handle-address-return-type-exp-namespace.cpp similarity index 100% rename from clang/test/SemaCXX/coroutine_handle-addres-return-type.cpp rename to clang/test/SemaCXX/coroutine_handle-address-return-type-exp-namespace.cpp diff --git a/clang/test/SemaCXX/coroutine_handle-address-return-type.cpp b/clang/test/SemaCXX/coroutine_handle-address-return-type.cpp new file mode 100644 index 0000000000000..884ff3680e1a1 --- /dev/null +++ b/clang/test/SemaCXX/coroutine_handle-address-return-type.cpp @@ -0,0 +1,75 @@ +// RUN: %clang_cc1 -verify %s -stdlib=libc++ -std=c++20 -fsyntax-only + +namespace std { +template +struct coroutine_handle; + +template <> +struct coroutine_handle { + coroutine_handle() = default; + static coroutine_handle from_address(void *) noexcept; + void *address() const; +}; + +template +struct coroutine_handle : public coroutine_handle<> { +}; + +template +struct void_t_imp { + using type = void; +}; +template +using void_t = typename void_t_imp::type; + +template +struct traits_sfinae_base {}; + +template +struct traits_sfinae_base> { + using promise_type = typename T::promise_type; +}; + +template +struct coroutine_traits : public traits_sfinae_base {}; +} // namespace std + +struct suspend_never { + bool await_ready() noexcept; + void await_suspend(std::coroutine_handle<>) noexcept; + void await_resume() noexcept; +}; + +struct task { + struct promise_type { + auto initial_suspend() { return suspend_never{}; } + auto final_suspend() noexcept { return suspend_never{}; } + auto get_return_object() { return task{}; } + static void unhandled_exception() {} + void return_void() {} + }; +}; + +namespace std { +template <> +struct coroutine_handle : public coroutine_handle<> { + coroutine_handle *address() const; // expected-warning {{return type of 'coroutine_handle<>::address should be 'void*'}} +}; +} // namespace std + +struct awaitable { + bool await_ready(); + + std::coroutine_handle + await_suspend(std::coroutine_handle<> handle); + void await_resume(); +} a; + +task f() { + co_await a; +} + +int main() { + f(); + return 0; +} diff --git a/clang/test/SemaCXX/coroutines-exp-namespace.cpp b/clang/test/SemaCXX/coroutines-exp-namespace.cpp new file mode 100644 index 0000000000000..4ea0c66c5b343 --- /dev/null +++ b/clang/test/SemaCXX/coroutines-exp-namespace.cpp @@ -0,0 +1,1450 @@ +// This file is the same as coroutines.cpp, except the components are defined in namespace std::experimental. +// The intent of this test is to make sure the std::experimental implementation still works. +// TODO: Remove this test once we drop support for . + +// RUN: %clang_cc1 -std=c++2b -fsyntax-only -verify=expected,cxx20_2b,cxx2b %s -fcxx-exceptions -fexceptions -Wunused-result +// RUN: %clang_cc1 -std=c++20 -fsyntax-only -verify=expected,cxx14_20,cxx20_2b %s -fcxx-exceptions -fexceptions -Wunused-result +// RUN: %clang_cc1 -std=c++14 -fcoroutines-ts -fsyntax-only -verify=expected,cxx14_20 %s -fcxx-exceptions -fexceptions -Wunused-result + +void no_coroutine_traits_bad_arg_await() { + co_await a; // expected-error {{include }} + // expected-error@-1 {{use of undeclared identifier 'a'}} +} + +void no_coroutine_traits_bad_arg_yield() { + co_yield a; // expected-error {{include }} + // expected-error@-1 {{use of undeclared identifier 'a'}} +} + +void no_coroutine_traits_bad_arg_return() { + co_return a; // expected-error {{include }} + // expected-error@-1 {{use of undeclared identifier 'a'}} +} + +void no_coroutine_traits() { + co_await 4; // expected-error {{std::coroutine_traits type was not found; include before defining a coroutine; include if your version of libcxx is less than 14.0}} +} + +namespace std { +namespace experimental { + +template +struct void_t_imp { + using type = void; +}; +template +using void_t = typename void_t_imp::type; + +template +struct traits_sfinae_base {}; + +template +struct traits_sfinae_base> { + using promise_type = typename T::promise_type; +}; + +template +struct coroutine_traits : public traits_sfinae_base {}; +} // namespace experimental +} // namespace std + +template struct coro {}; +template +struct std::experimental::coroutine_traits, Ps...> { + using promise_type = Promise; +}; + +struct awaitable { + bool await_ready() noexcept; + template + void await_suspend(F) noexcept; + void await_resume() noexcept; +} a; + +struct suspend_always { + bool await_ready() noexcept { return false; } + template + void await_suspend(F) noexcept; + void await_resume() noexcept {} +}; + +struct suspend_never { + bool await_ready() noexcept { return true; } + template + void await_suspend(F) noexcept; + void await_resume() noexcept {} +}; + +struct auto_await_suspend { + bool await_ready(); + template auto await_suspend(F) {} + void await_resume(); +}; + +struct DummyVoidTag {}; +DummyVoidTag no_specialization() { // expected-error {{this function cannot be a coroutine: 'std::experimental::coroutine_traits' has no member named 'promise_type'}} + co_await a; +} + +template +struct std::experimental::coroutine_traits {}; + +int no_promise_type() { // expected-error {{this function cannot be a coroutine: 'std::experimental::coroutine_traits' has no member named 'promise_type'}} + co_await a; +} + +int no_promise_type_multiple_awaits(int) { // expected-error {{this function cannot be a coroutine: 'std::experimental::coroutine_traits' has no member named 'promise_type'}} + co_await a; + co_await a; +} + +template <> +struct std::experimental::coroutine_traits { typedef int promise_type; }; +double bad_promise_type(double) { // expected-error {{this function cannot be a coroutine: 'experimental::coroutine_traits::promise_type' (aka 'int') is not a class}} + co_await a; +} + +template <> +struct std::experimental::coroutine_traits { + struct promise_type {}; +}; +double bad_promise_type_2(int) { // expected-error {{no member named 'initial_suspend'}} + co_yield 0; // expected-error {{no member named 'yield_value' in 'std::experimental::coroutine_traits::promise_type'}} +} + +struct promise; // expected-note {{forward declaration}} +struct promise_void; +struct void_tag {}; +template +struct std::experimental::coroutine_traits { using promise_type = promise; }; +template +struct std::experimental::coroutine_traits { using promise_type = promise_void; }; + +// FIXME: This diagnostic is terrible. +void undefined_promise() { // expected-error {{this function cannot be a coroutine: 'experimental::coroutine_traits::promise_type' (aka 'promise') is an incomplete type}} + co_await a; +} + +struct yielded_thing { + const char *p; + short a, b; +}; + +struct not_awaitable {}; + +struct promise { + void get_return_object(); + suspend_always initial_suspend(); + suspend_always final_suspend() noexcept; + awaitable yield_value(int); // expected-note 2{{candidate}} + awaitable yield_value(yielded_thing); // expected-note 2{{candidate}} + not_awaitable yield_value(void()); // expected-note 2{{candidate}} + void return_value(int); // expected-note 2{{here}} + void unhandled_exception(); +}; + +struct promise_void { + void get_return_object(); + suspend_always initial_suspend(); + suspend_always final_suspend() noexcept; + void return_void(); + void unhandled_exception(); +}; + +void no_coroutine_handle() { // expected-error {{std::coroutine_handle type was not found; include before defining a coroutine; include if your version of libcxx is less than 14.0}} + //expected-note@-1 {{call to 'initial_suspend' implicitly required by the initial suspend point}} + co_return 5; //expected-note {{function is a coroutine due to use of 'co_return' here}} +} + +namespace std { +namespace experimental { +template +struct coroutine_handle { + static coroutine_handle from_address(void *) noexcept; +}; +template <> +struct coroutine_handle { + template + coroutine_handle(coroutine_handle) noexcept; + static coroutine_handle from_address(void *) noexcept; +}; +} // namespace experimental +} // namespace std + +void yield() { + co_yield 0; + co_yield {"foo", 1, 2}; + co_yield {1e100}; // expected-error {{cannot be narrowed}} expected-note {{explicit cast}} expected-warning {{implicit conversion}} expected-warning {{braces around scalar}} + co_yield {"foo", __LONG_LONG_MAX__}; // expected-error {{cannot be narrowed}} expected-note {{explicit cast}} expected-warning {{changes value}} + co_yield {"foo"}; + co_yield "foo"; // expected-error {{no matching}} + co_yield 1.0; + co_yield yield; // expected-error {{no member named 'await_ready' in 'not_awaitable'}} +} + +void check_auto_await_suspend() { + co_await auto_await_suspend{}; // Should compile successfully. +} + +void coreturn(int n) { + co_await a; + if (n == 0) + co_return 3; + if (n == 1) + co_return {4}; // expected-warning {{braces around scalar initializer}} + if (n == 2) + co_return "foo"; // expected-error {{cannot initialize a parameter of type 'int' with an lvalue of type 'const char[4]'}} + co_return 42; +} + +template +void co_await_non_dependent_arg(T) { + co_await a; +} +template void co_await_non_dependent_arg(int); + +void mixed_yield() { + co_yield 0; // expected-note {{use of 'co_yield'}} + return; // expected-error {{not allowed in coroutine}} +} + +void mixed_yield_invalid() { + co_yield blah; // expected-error {{use of undeclared identifier}} + // expected-note@-1 {{function is a coroutine due to use of 'co_yield'}} + return; // expected-error {{return statement not allowed in coroutine}} +} + +template +void mixed_yield_template(T) { + co_yield blah; // expected-error {{use of undeclared identifier}} + // expected-note@-1 {{function is a coroutine due to use of 'co_yield'}} + return; // expected-error {{return statement not allowed in coroutine}} +} + +template +void mixed_yield_template2(T) { + co_yield 42; + // expected-note@-1 {{function is a coroutine due to use of 'co_yield'}} + return; // expected-error {{return statement not allowed in coroutine}} +} + +template +void mixed_yield_template3(T v) { + co_yield blah(v); + // expected-note@-1 {{function is a coroutine due to use of 'co_yield'}} + return; // expected-error {{return statement not allowed in coroutine}} +} + +void mixed_await() { + co_await a; // expected-note {{use of 'co_await'}} + return; // expected-error {{not allowed in coroutine}} +} + +void mixed_await_invalid() { + co_await 42; // expected-error {{'int' is not a structure or union}} + // expected-note@-1 {{function is a coroutine due to use of 'co_await'}} + return; // expected-error {{not allowed in coroutine}} +} + +template +void mixed_await_template(T) { + co_await 42; + // expected-note@-1 {{function is a coroutine due to use of 'co_await'}} + return; // expected-error {{not allowed in coroutine}} +} + +template +void mixed_await_template2(T v) { + co_await v; // expected-error {{'long' is not a structure or union}} + // expected-note@-1 {{function is a coroutine due to use of 'co_await'}} + return; // expected-error {{not allowed in coroutine}} +} +template void mixed_await_template2(long); // expected-note {{requested here}} + +void only_coreturn(void_tag) { + co_return; // OK +} + +void mixed_coreturn(void_tag, bool b) { + if (b) + co_return; // expected-note {{use of 'co_return'}} + else + return; // expected-error {{not allowed in coroutine}} +} + +void mixed_coreturn_invalid(bool b) { + if (b) + co_return; // expected-note {{use of 'co_return'}} + // expected-error@-1 {{no member named 'return_void' in 'promise'}} + else + return; // expected-error {{not allowed in coroutine}} +} + +template +void mixed_coreturn_template(void_tag, bool b, T v) { + if (b) + co_return v; // expected-note {{use of 'co_return'}} + // expected-error@-1 {{no member named 'return_value' in 'promise_void'}} + else + return; // expected-error {{not allowed in coroutine}} +} +template void mixed_coreturn_template(void_tag, bool, int); // expected-note {{requested here}} + +template +void mixed_coreturn_template2(bool b, T) { + if (b) + co_return v; // expected-note {{use of 'co_return'}} + // expected-error@-1 {{use of undeclared identifier 'v'}} + else + return; // expected-error {{not allowed in coroutine}} +} + +struct CtorDtor { + CtorDtor() { + co_yield 0; // expected-error {{'co_yield' cannot be used in a constructor}} + } + CtorDtor(awaitable a) { + // The spec doesn't say this is ill-formed, but it must be. + co_await a; // expected-error {{'co_await' cannot be used in a constructor}} + } + ~CtorDtor() { + co_return 0; // expected-error {{'co_return' cannot be used in a destructor}} + } + void operator=(CtorDtor &) { + co_yield 0; // OK. + } + void operator=(CtorDtor const &) { + co_yield 0; // OK. + } + void operator=(CtorDtor &&) { + co_await a; // OK. + } + void operator=(CtorDtor const &&) { + co_await a; // OK. + } + void operator=(int) { + co_await a; // OK. Not a special member + } +}; + +namespace std { +class type_info; +} + +void unevaluated() { + decltype(co_await a); // expected-error {{'co_await' cannot be used in an unevaluated context}} + // expected-warning@-1 {{declaration does not declare anything}} + sizeof(co_await a); // expected-error {{'co_await' cannot be used in an unevaluated context}} + // expected-error@-1 {{invalid application of 'sizeof' to an incomplete type 'void'}} + // expected-warning@-2 {{expression with side effects has no effect in an unevaluated context}} + typeid(co_await a); // expected-error {{'co_await' cannot be used in an unevaluated context}} + // expected-warning@-1 {{expression with side effects has no effect in an unevaluated context}} + // expected-warning@-2 {{expression result unused}} + decltype(co_yield 1); // expected-error {{'co_yield' cannot be used in an unevaluated context}} + // expected-warning@-1 {{declaration does not declare anything}} + sizeof(co_yield 2); // expected-error {{'co_yield' cannot be used in an unevaluated context}} + // expected-error@-1 {{invalid application of 'sizeof' to an incomplete type 'void'}} + // expected-warning@-2 {{expression with side effects has no effect in an unevaluated context}} + typeid(co_yield 3); // expected-error {{'co_yield' cannot be used in an unevaluated context}} + // expected-warning@-1 {{expression with side effects has no effect in an unevaluated context}} + // expected-warning@-2 {{expression result unused}} +} + +// [expr.await]p2: "An await-expression shall not appear in a default argument." +// FIXME: A better diagnostic would explicitly state that default arguments are +// not allowed. A user may not understand that this is "outside a function." +void default_argument(int arg = co_await 0) {} // expected-error {{'co_await' cannot be used outside a function}} + +void await_in_catch_coroutine() { + try { + } catch (...) { // FIXME: Emit a note diagnostic pointing out the try handler on this line. + []() -> void { co_await a; }(); // OK + co_await a; // expected-error {{'co_await' cannot be used in the handler of a try block}} + } +} + +void await_nested_in_catch_coroutine() { + try { + } catch (...) { // FIXME: Emit a note diagnostic pointing out the try handler on this line. + try { + co_await a; // expected-error {{'co_await' cannot be used in the handler of a try block}} + []() -> void { co_await a; }(); // OK + } catch (...) { + co_return 123; + } + } +} + +void await_in_lambda_in_catch_coroutine() { + try { + } catch (...) { + []() -> void { co_await a; }(); // OK + } +} + +void yield_in_catch_coroutine() { + try { + } catch (...) { + co_yield 1; // expected-error {{'co_yield' cannot be used in the handler of a try block}} + } +} + +void return_in_catch_coroutine() { + try { + } catch (...) { + co_return 123; // OK + } +} + +constexpr auto constexpr_deduced_return_coroutine() { + co_yield 0; // expected-error {{'co_yield' cannot be used in a constexpr function}} + // expected-error@-1 {{'co_yield' cannot be used in a function with a deduced return type}} +} + +void varargs_coroutine(const char *, ...) { + co_await a; // expected-error {{'co_await' cannot be used in a varargs function}} +} + +auto deduced_return_coroutine() { + co_await a; // expected-error {{'co_await' cannot be used in a function with a deduced return type}} +} + +struct outer {}; +struct await_arg_1 {}; +struct await_arg_2 {}; + +namespace adl_ns { +struct coawait_arg_type {}; +awaitable operator co_await(coawait_arg_type) noexcept; +} // namespace adl_ns + +namespace dependent_operator_co_await_lookup { +template void await_template(T t) { + // no unqualified lookup results + co_await t; // expected-error {{no member named 'await_ready' in 'dependent_operator_co_await_lookup::not_awaitable'}} + // expected-error@-1 {{call to function 'operator co_await' that is neither visible in the template definition nor found by argument-dependent lookup}} +}; +template void await_template(awaitable); + +struct indirectly_awaitable { + indirectly_awaitable(outer); +}; +awaitable operator co_await(indirectly_awaitable); // expected-note {{should be declared prior to}} +template void await_template(indirectly_awaitable); + +struct not_awaitable {}; +template void await_template(not_awaitable); // expected-note {{instantiation}} + +template void await_template_2(T t) { + // one unqualified lookup result + co_await t; +}; +template void await_template(outer); // expected-note {{instantiation}} +template void await_template_2(outer); + +struct transform_awaitable {}; +struct transformed {}; + +struct transform_promise { + typedef transform_awaitable await_arg; + coro get_return_object(); + transformed initial_suspend(); + ::adl_ns::coawait_arg_type final_suspend() noexcept; + transformed await_transform(transform_awaitable); + void unhandled_exception(); + void return_void(); +}; +template +struct basic_promise { + typedef AwaitArg await_arg; + coro get_return_object(); + awaitable initial_suspend(); + awaitable final_suspend() noexcept; + void unhandled_exception(); + void return_void(); +}; + +awaitable operator co_await(await_arg_1); + +template +coro await_template_3(U t) { + co_await t; +} + +template coro> await_template_3>(await_arg_1); + +template +struct dependent_member { + coro mem_fn() const { + co_await typename T::await_arg{}; // expected-error {{call to function 'operator co_await'}}} + } + template + coro dep_mem_fn(U t) { + co_await t; + } +}; + +template <> +struct dependent_member { + // FIXME this diagnostic is terrible + coro mem_fn() const { // expected-error {{no member named 'await_ready' in 'dependent_operator_co_await_lookup::transformed'}} + // expected-note@-1 {{call to 'initial_suspend' implicitly required by the initial suspend point}} + // expected-note@+1 {{function is a coroutine due to use of 'co_await' here}} + co_await transform_awaitable{}; + // expected-error@-1 {{no member named 'await_ready'}} + } + template + coro dep_mem_fn(U u) { co_await u; } +}; + +awaitable operator co_await(await_arg_2); // expected-note {{'operator co_await' should be declared prior to the call site}} + +template struct dependent_member, 0>; +template struct dependent_member, 0>; // expected-note {{in instantiation}} + +template <> +coro +// FIXME this diagnostic is terrible +dependent_member::dep_mem_fn(int) { // expected-error {{no member named 'await_ready' in 'dependent_operator_co_await_lookup::transformed'}} + //expected-note@-1 {{call to 'initial_suspend' implicitly required by the initial suspend point}} + //expected-note@+1 {{function is a coroutine due to use of 'co_await' here}} + co_await transform_awaitable{}; + // expected-error@-1 {{no member named 'await_ready'}} +} + +void operator co_await(transform_awaitable) = delete; +awaitable operator co_await(transformed); + +template coro + dependent_member::dep_mem_fn(transform_awaitable); + +template <> +coro dependent_member::dep_mem_fn(long) { + co_await transform_awaitable{}; +} + +template <> +struct dependent_member { + coro mem_fn() const { + co_await transform_awaitable{}; + } +}; + +template coro await_template_3(transform_awaitable); +template struct dependent_member; +template coro dependent_member::dep_mem_fn(transform_awaitable); +} // namespace dependent_operator_co_await_lookup + +struct yield_fn_tag {}; +template <> +struct std::experimental::coroutine_traits { + struct promise_type { + // FIXME: add an await_transform overload for functions + awaitable yield_value(int()); + void return_value(int()); + + suspend_never initial_suspend(); + suspend_never final_suspend() noexcept; + void get_return_object(); + void unhandled_exception(); + }; +}; + +namespace placeholder { +awaitable f(), f(int); // expected-note 4{{possible target}} +int g(), g(int); // expected-note 2{{candidate}} +void x() { + co_await f; // expected-error {{reference to overloaded function}} +} +void y() { + co_yield g; // expected-error {{no matching member function for call to 'yield_value'}} +} +void z() { + co_await a; + co_return g; // expected-error {{address of overloaded function 'g' does not match required type 'int'}} +} + +void x(yield_fn_tag) { + co_await f; // expected-error {{reference to overloaded function}} +} +void y(yield_fn_tag) { + co_yield g; +} +void z(yield_fn_tag) { + co_await a; + co_return g; +} +} // namespace placeholder + +struct bad_promise_1 { + suspend_always initial_suspend(); + suspend_always final_suspend() noexcept; + void unhandled_exception(); + void return_void(); +}; +coro missing_get_return_object() { // expected-error {{no member named 'get_return_object' in 'bad_promise_1'}} + co_await a; +} + +struct bad_promise_2 { + coro get_return_object(); + suspend_always final_suspend() noexcept; + void unhandled_exception(); + void return_void(); +}; +// FIXME: This shouldn't happen twice +coro missing_initial_suspend() { // expected-error {{no member named 'initial_suspend' in 'bad_promise_2'}} + co_await a; +} + +struct bad_promise_3 { + coro get_return_object(); + suspend_always initial_suspend(); + void unhandled_exception(); + void return_void(); +}; +coro missing_final_suspend() noexcept { // expected-error {{no member named 'final_suspend' in 'bad_promise_3'}} + co_await a; +} + +struct bad_promise_4 { + coro get_return_object(); + not_awaitable initial_suspend(); + suspend_always final_suspend() noexcept; + void return_void(); +}; +// FIXME: This diagnostic is terrible. +coro bad_initial_suspend() { // expected-error {{no member named 'await_ready' in 'not_awaitable'}} + // expected-note@-1 {{call to 'initial_suspend' implicitly required by the initial suspend point}} + co_await a; // expected-note {{function is a coroutine due to use of 'co_await' here}} +} + +struct bad_promise_5 { + coro get_return_object(); + suspend_always initial_suspend(); + not_awaitable final_suspend() noexcept; + void return_void(); +}; +// FIXME: This diagnostic is terrible. +coro bad_final_suspend() { // expected-error {{no member named 'await_ready' in 'not_awaitable'}} + // expected-note@-1 {{call to 'final_suspend' implicitly required by the final suspend point}} + co_await a; // expected-note {{function is a coroutine due to use of 'co_await' here}} +} + +struct bad_promise_6 { + coro get_return_object(); + suspend_always initial_suspend(); + suspend_always final_suspend() noexcept; + void unhandled_exception(); + void return_void(); // expected-note 2 {{member 'return_void' first declared here}} + void return_value(int) const; // expected-note 2 {{member 'return_value' first declared here}} + void return_value(int); +}; +coro bad_implicit_return() { // expected-error {{'bad_promise_6' declares both 'return_value' and 'return_void'}} + co_await a; +} + +template +coro bad_implicit_return_dependent(T) { // expected-error {{'bad_promise_6' declares both 'return_value' and 'return_void'}} + co_await a; +} +template coro bad_implicit_return_dependent(bad_promise_6); // expected-note {{in instantiation}} + +struct bad_promise_7 { // expected-note 2 {{defined here}} + coro get_return_object(); + suspend_always initial_suspend(); + suspend_always final_suspend() noexcept; + void return_void(); +}; +coro no_unhandled_exception() { // expected-error {{'bad_promise_7' is required to declare the member 'unhandled_exception()'}} + co_await a; +} + +template +coro no_unhandled_exception_dependent(T) { // expected-error {{'bad_promise_7' is required to declare the member 'unhandled_exception()'}} + co_await a; +} +template coro no_unhandled_exception_dependent(bad_promise_7); // expected-note {{in instantiation}} + +struct bad_promise_base { +private: + void return_void(); // expected-note 2 {{declared private here}} +}; +struct bad_promise_8 : bad_promise_base { + coro get_return_object(); + suspend_always initial_suspend(); + suspend_always final_suspend() noexcept; + void unhandled_exception() __attribute__((unavailable)); // expected-note 2 {{marked unavailable here}} + void unhandled_exception() const; + void unhandled_exception(void *) const; +}; +coro calls_unhandled_exception() { + // expected-error@-1 {{'unhandled_exception' is unavailable}} + // expected-error@-2 {{'return_void' is a private member}} + co_await a; +} + +template +coro calls_unhandled_exception_dependent(T) { + // expected-error@-1 {{'unhandled_exception' is unavailable}} + // expected-error@-2 {{'return_void' is a private member}} + co_await a; +} +template coro calls_unhandled_exception_dependent(bad_promise_8); // expected-note {{in instantiation}} + +struct bad_promise_9 { + coro get_return_object(); + suspend_always initial_suspend(); + suspend_always final_suspend() noexcept; + void await_transform(void *); + awaitable await_transform(int) __attribute__((unavailable)); // expected-note {{explicitly marked unavailable}} + void return_void(); + void unhandled_exception(); +}; +coro calls_await_transform() { + co_await 42; // expected-error {{'await_transform' is unavailable}} +} + +struct bad_promise_10 { + coro get_return_object(); + suspend_always initial_suspend(); + suspend_always final_suspend() noexcept; + int await_transform; + void return_void(); + void unhandled_exception(); +}; +coro bad_coawait() { + // FIXME this diagnostic is terrible + co_await 42; // expected-error {{called object type 'int' is not a function or function pointer}} + // expected-note@-1 {{call to 'await_transform' implicitly required by 'co_await' here}} +} + +struct call_operator { + template + awaitable operator()(Args...) const { return a; } +}; +void ret_void(); +struct good_promise_1 { + coro get_return_object(); + suspend_always initial_suspend(); + suspend_always final_suspend() noexcept; + void unhandled_exception(); + static const call_operator await_transform; + using Fn = void (*)(); + Fn return_void = ret_void; +}; +const call_operator good_promise_1::await_transform; +coro ok_static_coawait() { + // FIXME this diagnostic is terrible + co_await 42; +} + +template void ok_generic_lambda_coawait_PR41909() { + [](auto &arg) -> coro { // expected-warning {{expression result unused}} + co_await 12; + }; + [](auto &arg) -> coro { + co_await 24; + }("argument"); + [](auto &arg) -> coro { // expected-warning {{expression result unused}} + []() -> coro { + co_await 36; + }; + co_await 48; + }; +} +template void ok_generic_lambda_coawait_PR41909(); // expected-note {{in instantiation of function template specialization 'ok_generic_lambda_coawait_PR41909' requested here}} + +template <> struct std::experimental::coroutine_traits { using promise_type = promise; }; + +int main(int, const char **) { + co_await a; // expected-error {{'co_await' cannot be used in the 'main' function}} +} + +struct good_promise_2 { + float get_return_object(); + suspend_always initial_suspend(); + suspend_always final_suspend() noexcept; + void return_void(); + void unhandled_exception(); +}; +template <> struct std::experimental::coroutine_handle {}; + +template <> struct std::experimental::coroutine_traits { using promise_type = good_promise_2; }; + +float badly_specialized_coro_handle() { // expected-error {{std::coroutine_handle must have a member named 'from_address'}} + //expected-note@-1 {{call to 'initial_suspend' implicitly required by the initial suspend point}} + co_return; //expected-note {{function is a coroutine due to use of 'co_return' here}} +} + +namespace std { +struct nothrow_t {}; +constexpr nothrow_t nothrow = {}; +} // namespace std + +using SizeT = decltype(sizeof(int)); + +void *operator new(SizeT __sz, const std::nothrow_t &) noexcept; +void operator delete(void *__p, const std::nothrow_t &)noexcept; + +struct promise_on_alloc_failure_tag {}; + +template <> +struct std::experimental::coroutine_traits { + struct promise_type { + int get_return_object() {} + suspend_always initial_suspend() { return {}; } + suspend_always final_suspend() noexcept { return {}; } + void return_void() {} + int get_return_object_on_allocation_failure(); // expected-error{{'promise_type': 'get_return_object_on_allocation_failure()' must be a static member function}} + void unhandled_exception(); + }; +}; + +extern "C" int f(promise_on_alloc_failure_tag) { + co_return; //expected-note {{function is a coroutine due to use of 'co_return' here}} +} + +struct bad_promise_11 { + coro get_return_object(); + suspend_always initial_suspend(); + suspend_always final_suspend() noexcept; + void unhandled_exception(); + void return_void(); + +private: + static coro get_return_object_on_allocation_failure(); // expected-note 2 {{declared private here}} +}; +coro private_alloc_failure_handler() { + // expected-error@-1 {{'get_return_object_on_allocation_failure' is a private member of 'bad_promise_11'}} + co_return; // FIXME: Add a "declared coroutine here" note. +} + +template +coro dependent_private_alloc_failure_handler(T) { + // expected-error@-1 {{'get_return_object_on_allocation_failure' is a private member of 'bad_promise_11'}} + co_return; // FIXME: Add a "declared coroutine here" note. +} +template coro dependent_private_alloc_failure_handler(bad_promise_11); +// expected-note@-1 {{requested here}} + +struct bad_promise_12 { + coro get_return_object(); + suspend_always initial_suspend(); + suspend_always final_suspend() noexcept; + void unhandled_exception(); + void return_void(); + static coro get_return_object_on_allocation_failure(); + + static void *operator new(SizeT); + // expected-error@-1 2 {{'operator new' is required to have a non-throwing noexcept specification when the promise type declares 'get_return_object_on_allocation_failure()'}} +}; +coro throwing_in_class_new() { // expected-note {{call to 'operator new' implicitly required by coroutine function here}} + co_return; +} + +template +coro dependent_throwing_in_class_new(T) { // expected-note {{call to 'operator new' implicitly required by coroutine function here}} + co_return; +} +template coro dependent_throwing_in_class_new(bad_promise_12); // expected-note {{requested here}} + +struct good_promise_13 { + coro get_return_object(); + suspend_always initial_suspend(); + suspend_always final_suspend() noexcept; + void unhandled_exception(); + void return_void(); + static coro get_return_object_on_allocation_failure(); +}; +coro uses_nothrow_new() { + co_return; +} + +template +coro dependent_uses_nothrow_new(T) { + co_return; +} +template coro dependent_uses_nothrow_new(good_promise_13); + +struct good_promise_custom_new_operator { + coro get_return_object(); + suspend_always initial_suspend(); + suspend_always final_suspend() noexcept; + void return_void(); + void unhandled_exception(); + void *operator new(SizeT, double, float, int); +}; + +coro +good_coroutine_calls_custom_new_operator(double, float, int) { + co_return; +} + +struct coroutine_nonstatic_member_struct; + +struct good_promise_nonstatic_member_custom_new_operator { + coro get_return_object(); + suspend_always initial_suspend(); + suspend_always final_suspend() noexcept; + void return_void(); + void unhandled_exception(); + void *operator new(SizeT, coroutine_nonstatic_member_struct &, double); +}; + +struct good_promise_noexcept_custom_new_operator { + static coro get_return_object_on_allocation_failure(); + coro get_return_object(); + suspend_always initial_suspend(); + suspend_always final_suspend() noexcept; + void return_void(); + void unhandled_exception(); + void *operator new(SizeT, double, float, int) noexcept; +}; + +coro +good_coroutine_calls_noexcept_custom_new_operator(double, float, int) { + co_return; +} + +struct mismatch_gro_type_tag1 {}; +template <> +struct std::experimental::coroutine_traits { + struct promise_type { + void get_return_object() {} //expected-note {{member 'get_return_object' declared here}} + suspend_always initial_suspend() { return {}; } + suspend_always final_suspend() noexcept { return {}; } + void return_void() {} + void unhandled_exception(); + }; +}; + +extern "C" int f(mismatch_gro_type_tag1) { + // expected-error@-1 {{cannot initialize return object of type 'int' with an rvalue of type 'void'}} + co_return; //expected-note {{function is a coroutine due to use of 'co_return' here}} +} + +struct mismatch_gro_type_tag2 {}; +template <> +struct std::experimental::coroutine_traits { + struct promise_type { + void *get_return_object() {} //expected-note {{member 'get_return_object' declared here}} + suspend_always initial_suspend() { return {}; } + suspend_always final_suspend() noexcept { return {}; } + void return_void() {} + void unhandled_exception(); + }; +}; + +extern "C" int f(mismatch_gro_type_tag2) { + // cxx2b-error@-1 {{cannot initialize return object of type 'int' with an rvalue of type 'void *'}} + // cxx14_20-error@-2 {{cannot initialize return object of type 'int' with an lvalue of type 'void *'}} + co_return; //expected-note {{function is a coroutine due to use of 'co_return' here}} +} + +struct mismatch_gro_type_tag3 {}; +template <> +struct std::experimental::coroutine_traits { + struct promise_type { + int get_return_object() {} + static void get_return_object_on_allocation_failure() {} //expected-note {{member 'get_return_object_on_allocation_failure' declared here}} + suspend_always initial_suspend() { return {}; } + suspend_always final_suspend() noexcept { return {}; } + void return_void() {} + void unhandled_exception(); + }; +}; + +extern "C" int f(mismatch_gro_type_tag3) { + // expected-error@-1 {{cannot initialize return object of type 'int' with an rvalue of type 'void'}} + co_return; //expected-note {{function is a coroutine due to use of 'co_return' here}} +} + +struct mismatch_gro_type_tag4 {}; +template <> +struct std::experimental::coroutine_traits { + struct promise_type { + int get_return_object() {} + static char *get_return_object_on_allocation_failure() {} //expected-note {{member 'get_return_object_on_allocation_failure' declared}} + suspend_always initial_suspend() { return {}; } + suspend_always final_suspend() noexcept { return {}; } + void return_void() {} + void unhandled_exception(); + }; +}; + +extern "C" int f(mismatch_gro_type_tag4) { + // expected-error@-1 {{cannot initialize return object of type 'int' with an rvalue of type 'char *'}} + co_return; //expected-note {{function is a coroutine due to use of 'co_return' here}} +} + +struct bad_promise_no_return_func { // expected-note {{'bad_promise_no_return_func' defined here}} + coro get_return_object(); + suspend_always initial_suspend(); + suspend_always final_suspend() noexcept; + void unhandled_exception(); +}; +// FIXME: The PDTS currently specifies this as UB, technically forbidding a +// diagnostic. +coro no_return_value_or_return_void() { + // expected-error@-1 {{'bad_promise_no_return_func' must declare either 'return_value' or 'return_void'}} + co_await a; +} + +struct bad_await_suspend_return { + bool await_ready(); + // expected-error@+1 {{return type of 'await_suspend' is required to be 'void' or 'bool' (have 'char')}} + char await_suspend(std::experimental::coroutine_handle<>); + void await_resume(); +}; +struct bad_await_ready_return { + // expected-note@+1 {{return type of 'await_ready' is required to be contextually convertible to 'bool'}} + void await_ready(); + bool await_suspend(std::experimental::coroutine_handle<>); + void await_resume(); +}; +struct await_ready_explicit_bool { + struct BoolT { + explicit operator bool() const; + }; + BoolT await_ready(); + void await_suspend(std::experimental::coroutine_handle<>); + void await_resume(); +}; +template +struct await_suspend_type_test { + bool await_ready(); + // expected-error@+2 {{return type of 'await_suspend' is required to be 'void' or 'bool' (have 'bool &')}} + // expected-error@+1 {{return type of 'await_suspend' is required to be 'void' or 'bool' (have 'bool &&')}} + SuspendTy await_suspend(std::experimental::coroutine_handle<>); + // cxx20_2b-warning@-1 {{volatile-qualified return type 'const volatile bool' is deprecated}} + void await_resume(); +}; +void test_bad_suspend() { + { + // FIXME: The actual error emitted here is terrible, and no number of notes can save it. + bad_await_ready_return a; + // expected-error@+1 {{value of type 'void' is not contextually convertible to 'bool'}} + co_await a; // expected-note {{call to 'await_ready' implicitly required by coroutine function here}} + } + { + bad_await_suspend_return b; + co_await b; // expected-note {{call to 'await_suspend' implicitly required by coroutine function here}} + } + { + await_ready_explicit_bool c; + co_await c; // OK + } + { + await_suspend_type_test a; + await_suspend_type_test b; + await_suspend_type_test c; + await_suspend_type_test d; // cxx20_2b-note {{in instantiation of template class}} + co_await a; // expected-note {{call to 'await_suspend' implicitly required by coroutine function here}} + co_await b; // expected-note {{call to 'await_suspend' implicitly required by coroutine function here}} + co_await c; // OK + co_await d; // OK + } +} + +template +struct NoCopy { + NoCopy(NoCopy const &) = delete; // expected-note 2 {{deleted here}} +}; +template +void test_dependent_param(T t, U) { + // expected-error@-1 {{call to deleted constructor of 'NoCopy<0>'}} + // expected-error@-2 {{call to deleted constructor of 'NoCopy<1>'}} + ((void)t); + co_return 42; +} +template void test_dependent_param(NoCopy<0>, NoCopy<1>); // expected-note {{requested here}} + +namespace CoroHandleMemberFunctionTest { +struct CoroMemberTag {}; +struct BadCoroMemberTag {}; + +template +constexpr bool IsSameV = false; +template +constexpr bool IsSameV = true; + +template +struct TypeTest { + template + static constexpr bool IsSame = IsSameV; + + template + static constexpr bool MatchesArgs = IsSameV>; +}; + +template +struct AwaitReturnsType { + bool await_ready() const; + void await_suspend(...) const; + T await_resume() const; +}; + +template +struct CoroMemberPromise { + using TraitsT = std::experimental::coroutine_traits; + using TypeTestT = TypeTest; + using AwaitTestT = AwaitReturnsType; + + CoroMemberTag get_return_object(); + suspend_always initial_suspend(); + suspend_always final_suspend() noexcept; + + AwaitTestT yield_value(int); + + void return_void(); + void unhandled_exception(); +}; + +} // namespace CoroHandleMemberFunctionTest + +template +struct ::std::experimental::coroutine_traits { + using promise_type = CoroHandleMemberFunctionTest::CoroMemberPromise; +}; + +namespace CoroHandleMemberFunctionTest { +struct TestType { + + CoroMemberTag test_qual() { + auto TC = co_yield 0; + static_assert(TC.MatchesArgs, ""); + static_assert(!TC.MatchesArgs, ""); + static_assert(!TC.MatchesArgs, ""); + } + + CoroMemberTag test_sanity(int *) const { + auto TC = co_yield 0; + static_assert(TC.MatchesArgs, ""); // expected-error {{static_assert failed}} + static_assert(TC.MatchesArgs, ""); // expected-error {{static_assert failed}} + static_assert(TC.MatchesArgs, ""); + } + + CoroMemberTag test_qual(int *, const float &&, volatile void *volatile) const { + // cxx20_2b-warning@-1 {{volatile-qualified parameter type}} + auto TC = co_yield 0; + static_assert(TC.MatchesArgs, ""); + } + + CoroMemberTag test_qual() const volatile { + auto TC = co_yield 0; + static_assert(TC.MatchesArgs, ""); + } + + CoroMemberTag test_ref_qual() & { + auto TC = co_yield 0; + static_assert(TC.MatchesArgs, ""); + } + CoroMemberTag test_ref_qual() const & { + auto TC = co_yield 0; + static_assert(TC.MatchesArgs, ""); + } + CoroMemberTag test_ref_qual() && { + auto TC = co_yield 0; + static_assert(TC.MatchesArgs, ""); + } + CoroMemberTag test_ref_qual(const char *&) const volatile && { + auto TC = co_yield 0; + static_assert(TC.MatchesArgs, ""); + } + + CoroMemberTag test_args(int) { + auto TC = co_yield 0; + static_assert(TC.MatchesArgs, ""); + } + CoroMemberTag test_args(int, long &, void *) const { + auto TC = co_yield 0; + static_assert(TC.MatchesArgs, ""); + } + + template + CoroMemberTag test_member_template(Args...) const && { + auto TC = co_yield 0; + static_assert(TC.template MatchesArgs, ""); + } + + static CoroMemberTag test_static() { + auto TC = co_yield 0; + static_assert(TC.MatchesArgs<>, ""); + static_assert(!TC.MatchesArgs, ""); + static_assert(!TC.MatchesArgs, ""); + static_assert(!TC.MatchesArgs, ""); + } + + static CoroMemberTag test_static(volatile void *const, char &&) { + auto TC = co_yield 0; + static_assert(TC.MatchesArgs, ""); + } + + template + static CoroMemberTag test_static_template(const char *volatile &, unsigned) { + auto TC = co_yield 0; + using TCT = decltype(TC); + static_assert(TCT::MatchesArgs, ""); + static_assert(!TCT::MatchesArgs, ""); + } + + BadCoroMemberTag test_diagnostics() { + // expected-error@-1 {{this function cannot be a coroutine: 'std::experimental::coroutine_traits' has no member named 'promise_type'}} + co_return; + } + BadCoroMemberTag test_diagnostics(int) const && { + // expected-error@-1 {{this function cannot be a coroutine: 'std::experimental::coroutine_traits' has no member named 'promise_type'}} + co_return; + } + + static BadCoroMemberTag test_static_diagnostics(long *) { + // expected-error@-1 {{this function cannot be a coroutine: 'std::experimental::coroutine_traits' has no member named 'promise_type'}} + co_return; + } +}; + +template CoroMemberTag TestType::test_member_template(long, const char *) const &&; +template CoroMemberTag TestType::test_static_template(const char *volatile &, unsigned); + +template +struct DepTestType { + + CoroMemberTag test_sanity(int *) const { + auto TC = co_yield 0; + static_assert(TC.template MatchesArgs, ""); // expected-error {{static_assert failed}} + static_assert(TC.template MatchesArgs<>, ""); // expected-error {{static_assert failed}} + static_assert(TC.template MatchesArgs, ""); + } + + CoroMemberTag test_qual() { + auto TC = co_yield 0; + static_assert(TC.template MatchesArgs, ""); + static_assert(!TC.template MatchesArgs, ""); + static_assert(!TC.template MatchesArgs, ""); + } + + CoroMemberTag test_qual(int *, const float &&, volatile void *volatile) const { + // cxx20_2b-warning@-1 {{volatile-qualified parameter type}} + auto TC = co_yield 0; + static_assert(TC.template MatchesArgs, ""); + } + + CoroMemberTag test_qual() const volatile { + auto TC = co_yield 0; + static_assert(TC.template MatchesArgs, ""); + } + + CoroMemberTag test_ref_qual() & { + auto TC = co_yield 0; + static_assert(TC.template MatchesArgs, ""); + } + CoroMemberTag test_ref_qual() const & { + auto TC = co_yield 0; + static_assert(TC.template MatchesArgs, ""); + } + CoroMemberTag test_ref_qual() && { + auto TC = co_yield 0; + static_assert(TC.template MatchesArgs, ""); + } + CoroMemberTag test_ref_qual(const char *&) const volatile && { + auto TC = co_yield 0; + static_assert(TC.template MatchesArgs, ""); + } + + CoroMemberTag test_args(int) { + auto TC = co_yield 0; + static_assert(TC.template MatchesArgs, ""); + } + CoroMemberTag test_args(int, long &, void *) const { + auto TC = co_yield 0; + static_assert(TC.template MatchesArgs, ""); + } + + template + CoroMemberTag test_member_template(UArgs...) const && { + auto TC = co_yield 0; + static_assert(TC.template MatchesArgs, ""); + } + + static CoroMemberTag test_static() { + auto TC = co_yield 0; + using TCT = decltype(TC); + static_assert(TCT::MatchesArgs<>, ""); + static_assert(!TCT::MatchesArgs, ""); + static_assert(!TCT::MatchesArgs, ""); + static_assert(!TCT::MatchesArgs, ""); + + // Ensure diagnostics are actually being generated here + static_assert(TCT::MatchesArgs, ""); // expected-error {{static_assert failed}} + } + + static CoroMemberTag test_static(volatile void *const, char &&) { + auto TC = co_yield 0; + using TCT = decltype(TC); + static_assert(TCT::MatchesArgs, ""); + } + + template + static CoroMemberTag test_static_template(const char *volatile &, unsigned) { + auto TC = co_yield 0; + using TCT = decltype(TC); + static_assert(TCT::MatchesArgs, ""); + static_assert(!TCT::MatchesArgs, ""); + } +}; + +template struct DepTestType; // expected-note {{requested here}} +template CoroMemberTag DepTestType::test_member_template(long, const char *) const &&; + +template CoroMemberTag DepTestType::test_static_template(const char *volatile &, unsigned); + +struct bad_promise_deleted_constructor { + // expected-note@+1 {{'bad_promise_deleted_constructor' has been explicitly marked deleted here}} + bad_promise_deleted_constructor() = delete; + coro get_return_object(); + suspend_always initial_suspend(); + suspend_always final_suspend() noexcept; + void return_void(); + void unhandled_exception(); +}; + +coro +bad_coroutine_calls_deleted_promise_constructor() { + // expected-error@-1 {{call to deleted constructor of 'std::experimental::coroutine_traits>::promise_type' (aka 'CoroHandleMemberFunctionTest::bad_promise_deleted_constructor')}} + co_return; +} + +// Test that, when the promise type has a constructor whose signature matches +// that of the coroutine function, that constructor is used. If no matching +// constructor exists, the default constructor is used as a fallback. If no +// matching constructors exist at all, an error is emitted. This is an +// experimental feature that will be proposed for the Coroutines TS. + +struct good_promise_default_constructor { + good_promise_default_constructor(double, float, int); + good_promise_default_constructor() = default; + coro get_return_object(); + suspend_always initial_suspend(); + suspend_always final_suspend() noexcept; + void return_void(); + void unhandled_exception(); +}; + +coro +good_coroutine_calls_default_constructor() { + co_return; +} + +struct some_class; + +struct good_promise_custom_constructor { + good_promise_custom_constructor(some_class &, float, int); + good_promise_custom_constructor(double, float, int); + good_promise_custom_constructor() = delete; + coro get_return_object(); + suspend_always initial_suspend(); + suspend_always final_suspend() noexcept; + void return_void(); + void unhandled_exception(); +}; + +coro +good_coroutine_calls_custom_constructor(double, float, int) { + co_return; +} + +struct some_class { + coro + good_coroutine_calls_custom_constructor(float, int) { + co_return; + } + coro static good_coroutine_calls_custom_constructor(double, float, int) { + co_return; + } +}; + +struct bad_promise_no_matching_constructor { + bad_promise_no_matching_constructor(int, int, int); + // expected-note@+1 2 {{'bad_promise_no_matching_constructor' has been explicitly marked deleted here}} + bad_promise_no_matching_constructor() = delete; + coro get_return_object(); + suspend_always initial_suspend(); + suspend_always final_suspend() noexcept; + void return_void(); + void unhandled_exception(); +}; + +coro +bad_coroutine_calls_with_no_matching_constructor(int, int) { + // expected-error@-1 {{call to deleted constructor of 'std::experimental::coroutine_traits, int, int>::promise_type' (aka 'CoroHandleMemberFunctionTest::bad_promise_no_matching_constructor')}} + co_return; +} + +struct some_class2 { + coro + bad_coroutine_calls_with_no_matching_constructor(int, int, int) { + // expected-error@-1 {{call to deleted constructor}} + co_return; + } +}; + +} // namespace CoroHandleMemberFunctionTest + +class awaitable_no_unused_warn { +public: + using handle_type = std::experimental::coroutine_handle<>; + constexpr bool await_ready() noexcept { return false; } + void await_suspend(handle_type) noexcept {} + int await_resume() noexcept { return 1; } +}; + +class awaitable_unused_warn { +public: + using handle_type = std::experimental::coroutine_handle<>; + constexpr bool await_ready() noexcept { return false; } + void await_suspend(handle_type) noexcept {} + [[nodiscard]] int await_resume() noexcept { return 1; } +}; + +template +struct check_warning_promise { + coro get_return_object(); + Await initial_suspend(); + Await final_suspend() noexcept; + Await yield_value(int); + void return_void(); + void unhandled_exception(); +}; + +coro> +test_no_unused_warning() { + co_await awaitable_no_unused_warn(); + co_yield 42; +} + +coro> +test_unused_warning() { + co_await awaitable_unused_warn(); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + co_yield 42; // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} +} + +struct missing_await_ready { + void await_suspend(std::experimental::coroutine_handle<>); + void await_resume(); +}; +struct missing_await_suspend { + bool await_ready(); + void await_resume(); +}; +struct missing_await_resume { + bool await_ready(); + void await_suspend(std::experimental::coroutine_handle<>); +}; + +void test_missing_awaitable_members() { + co_await missing_await_ready{}; // expected-error {{no member named 'await_ready' in 'missing_await_ready'}} + co_await missing_await_suspend{}; // expected-error {{no member named 'await_suspend' in 'missing_await_suspend'}} + co_await missing_await_resume{}; // expected-error {{no member named 'await_resume' in 'missing_await_resume'}} +} diff --git a/clang/test/SemaCXX/coroutines.cpp b/clang/test/SemaCXX/coroutines.cpp index 9222066e87938..26415be800608 100644 --- a/clang/test/SemaCXX/coroutines.cpp +++ b/clang/test/SemaCXX/coroutines.cpp @@ -1,32 +1,30 @@ // This file contains references to sections of the Coroutines TS, which can be // found at http://wg21.link/coroutines. -// RUN: %clang_cc1 -std=c++2b -fsyntax-only -verify=expected,cxx20_2b,cxx2b %s -fcxx-exceptions -fexceptions -Wunused-result -// RUN: %clang_cc1 -std=c++20 -fsyntax-only -verify=expected,cxx14_20,cxx20_2b %s -fcxx-exceptions -fexceptions -Wunused-result -// RUN: %clang_cc1 -std=c++14 -fcoroutines-ts -fsyntax-only -verify=expected,cxx14_20 %s -fcxx-exceptions -fexceptions -Wunused-result +// RUN: %clang_cc1 -std=c++2b -fsyntax-only -verify=expected,cxx20_2b,cxx2b %s -fcxx-exceptions -fexceptions -Wunused-result +// RUN: %clang_cc1 -std=c++20 -fsyntax-only -verify=expected,cxx14_20,cxx20_2b %s -fcxx-exceptions -fexceptions -Wunused-result void no_coroutine_traits_bad_arg_await() { - co_await a; // expected-error {{include }} + co_await a; // expected-error {{include }} // expected-error@-1 {{use of undeclared identifier 'a'}} } void no_coroutine_traits_bad_arg_yield() { - co_yield a; // expected-error {{include }} + co_yield a; // expected-error {{include }} // expected-error@-1 {{use of undeclared identifier 'a'}} } void no_coroutine_traits_bad_arg_return() { - co_return a; // expected-error {{include }} + co_return a; // expected-error {{include }} // expected-error@-1 {{use of undeclared identifier 'a'}} } void no_coroutine_traits() { - co_await 4; // expected-error {{std::experimental::coroutine_traits type was not found; include }} + co_await 4; // expected-error {{std::coroutine_traits type was not found; include }} } namespace std { -namespace experimental { template struct void_t_imp { @@ -45,11 +43,11 @@ struct traits_sfinae_base> { template struct coroutine_traits : public traits_sfinae_base {}; -}} // namespace std::experimental +} // end of namespace std template struct coro {}; template -struct std::experimental::coroutine_traits, Ps...> { +struct std::coroutine_traits, Ps...> { using promise_type = Promise; }; @@ -81,47 +79,46 @@ struct auto_await_suspend { }; struct DummyVoidTag {}; -DummyVoidTag no_specialization() { // expected-error {{this function cannot be a coroutine: 'std::experimental::coroutine_traits' has no member named 'promise_type'}} +DummyVoidTag no_specialization() { // expected-error {{this function cannot be a coroutine: 'std::coroutine_traits' has no member named 'promise_type'}} co_await a; } template -struct std::experimental::coroutine_traits {}; +struct std::coroutine_traits {}; -int no_promise_type() { // expected-error {{this function cannot be a coroutine: 'std::experimental::coroutine_traits' has no member named 'promise_type'}} +int no_promise_type() { // expected-error {{this function cannot be a coroutine: 'std::coroutine_traits' has no member named 'promise_type'}} co_await a; } -int no_promise_type_multiple_awaits(int) { // expected-error {{this function cannot be a coroutine: 'std::experimental::coroutine_traits' has no member named 'promise_type'}} +int no_promise_type_multiple_awaits(int) { // expected-error {{this function cannot be a coroutine: 'std::coroutine_traits' has no member named 'promise_type'}} co_await a; co_await a; } template <> -struct std::experimental::coroutine_traits { typedef int promise_type; }; -double bad_promise_type(double) { // expected-error {{this function cannot be a coroutine: 'experimental::coroutine_traits::promise_type' (aka 'int') is not a class}} +struct std::coroutine_traits { typedef int promise_type; }; +double bad_promise_type(double) { // expected-error {{this function cannot be a coroutine: 'std::coroutine_traits::promise_type' (aka 'int') is not a class}} co_await a; } template <> -struct std::experimental::coroutine_traits { +struct std::coroutine_traits { struct promise_type {}; }; double bad_promise_type_2(int) { // expected-error {{no member named 'initial_suspend'}} - co_yield 0; // expected-error {{no member named 'yield_value' in 'std::experimental::coroutine_traits::promise_type'}} + co_yield 0; // expected-error {{no member named 'yield_value' in 'std::coroutine_traits::promise_type'}} } struct promise; // expected-note {{forward declaration}} struct promise_void; struct void_tag {}; template -struct std::experimental::coroutine_traits { using promise_type = promise; }; +struct std::coroutine_traits { using promise_type = promise; }; template -struct std::experimental::coroutine_traits -{ using promise_type = promise_void; }; +struct std::coroutine_traits { using promise_type = promise_void; }; // FIXME: This diagnostic is terrible. -void undefined_promise() { // expected-error {{this function cannot be a coroutine: 'experimental::coroutine_traits::promise_type' (aka 'promise') is an incomplete type}} +void undefined_promise() { // expected-error {{this function cannot be a coroutine: 'std::coroutine_traits::promise_type' (aka 'promise') is an incomplete type}} co_await a; } @@ -148,13 +145,12 @@ struct promise_void { void unhandled_exception(); }; -void no_coroutine_handle() { // expected-error {{std::experimental::coroutine_handle type was not found; include before defining a coroutine}} +void no_coroutine_handle() { // expected-error {{std::coroutine_handle type was not found; include before defining a coroutine}} //expected-note@-1 {{call to 'initial_suspend' implicitly required by the initial suspend point}} co_return 5; //expected-note {{function is a coroutine due to use of 'co_return' here}} } namespace std { -namespace experimental { template struct coroutine_handle { static coroutine_handle from_address(void *) noexcept; @@ -165,7 +161,7 @@ struct coroutine_handle { coroutine_handle(coroutine_handle) noexcept; static coroutine_handle from_address(void *) noexcept; }; -}} // namespace std::experimental +} // namespace std void yield() { co_yield 0; @@ -529,7 +525,7 @@ namespace dependent_operator_co_await_lookup { struct yield_fn_tag {}; template <> -struct std::experimental::coroutine_traits { +struct std::coroutine_traits { struct promise_type { // FIXME: add an await_transform overload for functions awaitable yield_value(int()); @@ -747,8 +743,7 @@ template void ok_generic_lambda_coawait_PR41909() { } template void ok_generic_lambda_coawait_PR41909(); // expected-note {{in instantiation of function template specialization 'ok_generic_lambda_coawait_PR41909' requested here}} -template<> struct std::experimental::coroutine_traits -{ using promise_type = promise; }; +template <> struct std::coroutine_traits { using promise_type = promise; }; int main(int, const char**) { co_await a; // expected-error {{'co_await' cannot be used in the 'main' function}} @@ -761,12 +756,11 @@ struct good_promise_2 { void return_void(); void unhandled_exception(); }; -template<> struct std::experimental::coroutine_handle {}; +template <> struct std::coroutine_handle {}; -template<> struct std::experimental::coroutine_traits -{ using promise_type = good_promise_2; }; +template <> struct std::coroutine_traits { using promise_type = good_promise_2; }; -float badly_specialized_coro_handle() { // expected-error {{std::experimental::coroutine_handle missing a member named 'from_address'}} +float badly_specialized_coro_handle() { // expected-error {{std::coroutine_handle must have a member named 'from_address'}} //expected-note@-1 {{call to 'initial_suspend' implicitly required by the initial suspend point}} co_return; //expected-note {{function is a coroutine due to use of 'co_return' here}} } @@ -785,8 +779,8 @@ void operator delete(void* __p, const std::nothrow_t&) noexcept; struct promise_on_alloc_failure_tag {}; -template<> -struct std::experimental::coroutine_traits { +template <> +struct std::coroutine_traits { struct promise_type { int get_return_object() {} suspend_always initial_suspend() { return {}; } @@ -905,8 +899,8 @@ good_coroutine_calls_noexcept_custom_new_operator(double, float, int) { } struct mismatch_gro_type_tag1 {}; -template<> -struct std::experimental::coroutine_traits { +template <> +struct std::coroutine_traits { struct promise_type { void get_return_object() {} //expected-note {{member 'get_return_object' declared here}} suspend_always initial_suspend() { return {}; } @@ -922,8 +916,8 @@ extern "C" int f(mismatch_gro_type_tag1) { } struct mismatch_gro_type_tag2 {}; -template<> -struct std::experimental::coroutine_traits { +template <> +struct std::coroutine_traits { struct promise_type { void *get_return_object() {} //expected-note {{member 'get_return_object' declared here}} suspend_always initial_suspend() { return {}; } @@ -940,8 +934,8 @@ extern "C" int f(mismatch_gro_type_tag2) { } struct mismatch_gro_type_tag3 {}; -template<> -struct std::experimental::coroutine_traits { +template <> +struct std::coroutine_traits { struct promise_type { int get_return_object() {} static void get_return_object_on_allocation_failure() {} //expected-note {{member 'get_return_object_on_allocation_failure' declared here}} @@ -959,8 +953,8 @@ extern "C" int f(mismatch_gro_type_tag3) { struct mismatch_gro_type_tag4 {}; -template<> -struct std::experimental::coroutine_traits { +template <> +struct std::coroutine_traits { struct promise_type { int get_return_object() {} static char *get_return_object_on_allocation_failure() {} //expected-note {{member 'get_return_object_on_allocation_failure' declared}} @@ -992,13 +986,13 @@ coro no_return_value_or_return_void() { struct bad_await_suspend_return { bool await_ready(); // expected-error@+1 {{return type of 'await_suspend' is required to be 'void' or 'bool' (have 'char')}} - char await_suspend(std::experimental::coroutine_handle<>); + char await_suspend(std::coroutine_handle<>); void await_resume(); }; struct bad_await_ready_return { // expected-note@+1 {{return type of 'await_ready' is required to be contextually convertible to 'bool'}} void await_ready(); - bool await_suspend(std::experimental::coroutine_handle<>); + bool await_suspend(std::coroutine_handle<>); void await_resume(); }; struct await_ready_explicit_bool { @@ -1006,7 +1000,7 @@ struct await_ready_explicit_bool { explicit operator bool() const; }; BoolT await_ready(); - void await_suspend(std::experimental::coroutine_handle<>); + void await_suspend(std::coroutine_handle<>); void await_resume(); }; template @@ -1014,7 +1008,7 @@ struct await_suspend_type_test { bool await_ready(); // expected-error@+2 {{return type of 'await_suspend' is required to be 'void' or 'bool' (have 'bool &')}} // expected-error@+1 {{return type of 'await_suspend' is required to be 'void' or 'bool' (have 'bool &&')}} - SuspendTy await_suspend(std::experimental::coroutine_handle<>); + SuspendTy await_suspend(std::coroutine_handle<>); // cxx20_2b-warning@-1 {{volatile-qualified return type 'const volatile bool' is deprecated}} void await_resume(); }; @@ -1074,7 +1068,7 @@ struct TypeTest { template static constexpr bool MatchesArgs = IsSameV>; + std::coroutine_traits>; }; template @@ -1086,7 +1080,7 @@ struct AwaitReturnsType { template struct CoroMemberPromise { - using TraitsT = std::experimental::coroutine_traits; + using TraitsT = std::coroutine_traits; using TypeTestT = TypeTest; using AwaitTestT = AwaitReturnsType; @@ -1103,7 +1097,7 @@ struct CoroMemberPromise { } // namespace CoroHandleMemberFunctionTest template -struct ::std::experimental::coroutine_traits { +struct ::std::coroutine_traits { using promise_type = CoroHandleMemberFunctionTest::CoroMemberPromise; }; @@ -1189,16 +1183,16 @@ struct TestType { } BadCoroMemberTag test_diagnostics() { - // expected-error@-1 {{this function cannot be a coroutine: 'std::experimental::coroutine_traits' has no member named 'promise_type'}} + // expected-error@-1 {{this function cannot be a coroutine: 'std::coroutine_traits' has no member named 'promise_type'}} co_return; } BadCoroMemberTag test_diagnostics(int) const && { - // expected-error@-1 {{this function cannot be a coroutine: 'std::experimental::coroutine_traits' has no member named 'promise_type'}} + // expected-error@-1 {{this function cannot be a coroutine: 'std::coroutine_traits' has no member named 'promise_type'}} co_return; } static BadCoroMemberTag test_static_diagnostics(long *) { - // expected-error@-1 {{this function cannot be a coroutine: 'std::experimental::coroutine_traits' has no member named 'promise_type'}} + // expected-error@-1 {{this function cannot be a coroutine: 'std::coroutine_traits' has no member named 'promise_type'}} co_return; } }; @@ -1310,7 +1304,7 @@ struct bad_promise_deleted_constructor { coro bad_coroutine_calls_deleted_promise_constructor() { - // expected-error@-1 {{call to deleted constructor of 'std::experimental::coroutine_traits>::promise_type' (aka 'CoroHandleMemberFunctionTest::bad_promise_deleted_constructor')}} + // expected-error@-1 {{call to deleted constructor of 'std::coroutine_traits>::promise_type' (aka 'CoroHandleMemberFunctionTest::bad_promise_deleted_constructor')}} co_return; } @@ -1377,7 +1371,7 @@ struct bad_promise_no_matching_constructor { coro bad_coroutine_calls_with_no_matching_constructor(int, int) { - // expected-error@-1 {{call to deleted constructor of 'std::experimental::coroutine_traits, int, int>::promise_type' (aka 'CoroHandleMemberFunctionTest::bad_promise_no_matching_constructor')}} + // expected-error@-1 {{call to deleted constructor of 'std::coroutine_traits, int, int>::promise_type' (aka 'CoroHandleMemberFunctionTest::bad_promise_no_matching_constructor')}} co_return; } @@ -1393,7 +1387,7 @@ bad_coroutine_calls_with_no_matching_constructor(int, int, int) { class awaitable_no_unused_warn { public: - using handle_type = std::experimental::coroutine_handle<>; + using handle_type = std::coroutine_handle<>; constexpr bool await_ready() noexcept { return false; } void await_suspend(handle_type) noexcept {} int await_resume() noexcept { return 1; } @@ -1402,7 +1396,7 @@ class awaitable_no_unused_warn { class awaitable_unused_warn { public: - using handle_type = std::experimental::coroutine_handle<>; + using handle_type = std::coroutine_handle<>; constexpr bool await_ready() noexcept { return false; } void await_suspend(handle_type) noexcept {} [[nodiscard]] int await_resume() noexcept { return 1; } @@ -1432,7 +1426,7 @@ test_unused_warning() { } struct missing_await_ready { - void await_suspend(std::experimental::coroutine_handle<>); + void await_suspend(std::coroutine_handle<>); void await_resume(); }; struct missing_await_suspend { @@ -1441,7 +1435,7 @@ struct missing_await_suspend { }; struct missing_await_resume { bool await_ready(); - void await_suspend(std::experimental::coroutine_handle<>); + void await_suspend(std::coroutine_handle<>); }; void test_missing_awaitable_members() { diff --git a/clang/test/SemaCXX/recovery-expr-type.cpp b/clang/test/SemaCXX/recovery-expr-type.cpp index 15b83e50387f7..2fdbd0d3b6c30 100644 --- a/clang/test/SemaCXX/recovery-expr-type.cpp +++ b/clang/test/SemaCXX/recovery-expr-type.cpp @@ -143,3 +143,11 @@ int fun(int *foo = no_such_function()); // expected-error {{undeclared identifie void crash1() { fun(); } void crash2() { constexpr int s = fun(); } } // namespace test12 + +namespace test13 { +enum Circular { // expected-note {{not complete until the closing '}'}} + Circular_A = Circular(1), // expected-error {{'test13::Circular' is an incomplete type}} +}; +// Enumerators can be evaluated (they evaluate as zero, but we don't care). +static_assert(Circular_A == 0 && Circular_A != 0, ""); // expected-error {{static_assert failed}} +} diff --git a/clang/test/SemaObjC/block-capture-unused-variable.m b/clang/test/SemaObjC/block-capture-unused-variable.m new file mode 100644 index 0000000000000..1d40d9fb106b0 --- /dev/null +++ b/clang/test/SemaObjC/block-capture-unused-variable.m @@ -0,0 +1,34 @@ +// RUN: %clang_cc1 -triple x86_64-apple-macos11 -fsyntax-only -fobjc-arc -fblocks -verify -Wunused-but-set-variable -Wno-objc-root-class %s + +typedef struct dispatch_queue_s *dispatch_queue_t; + +typedef void (^dispatch_block_t)(void); + +void dispatch_async(dispatch_queue_t queue, dispatch_block_t block); + +extern __attribute__((visibility("default"))) struct dispatch_queue_s _dispatch_main_q; + +id getFoo(); + +@protocol P + +@end + +@interface I + +@end + +void test() { + // no diagnostics + __block id x = getFoo(); + __block id

y = x; + __block I *z = (I *)x; + // diagnose non-block variables + id x2 = getFoo(); // expected-warning {{variable 'x2' set but not used}} + dispatch_async(&_dispatch_main_q, ^{ + x = ((void *)0); + y = x; + z = ((void *)0); + }); + x2 = getFoo(); +} diff --git a/clang/test/SemaObjC/method-conflict-1.m b/clang/test/SemaObjC/method-conflict-1.m index 654cd0166fb50..03178fcada3ec 100644 --- a/clang/test/SemaObjC/method-conflict-1.m +++ b/clang/test/SemaObjC/method-conflict-1.m @@ -73,7 +73,7 @@ - (B*) test1 { return 0; } - (A*) test2 { return 0; } // broken-warning {{conflicting return type in implementation of 'test2': 'B *' vs 'A *'}} @end -// The particular case of overriding with an id return is white-listed. +// The particular case of overriding with an id return is permitted. @interface Test4 {} - (id) test1; - (A*) test2; diff --git a/clang/test/SemaObjC/method-conflict-2.m b/clang/test/SemaObjC/method-conflict-2.m index 47c3d6c2c23ca..65f0d5534e076 100644 --- a/clang/test/SemaObjC/method-conflict-2.m +++ b/clang/test/SemaObjC/method-conflict-2.m @@ -34,7 +34,7 @@ - (B*) test1 { return 0; } - (A*) test2 { return 0; } // expected-warning {{conflicting return type in implementation of 'test2': 'B *' vs 'A *'}} @end -// The particular case of overriding with an id return is white-listed. +// The particular case of overriding with an id return is permitted. @interface Test4 {} - (id) test1; - (A*) test2; diff --git a/clang/test/SemaObjC/method-typecheck-3.m b/clang/test/SemaObjC/method-typecheck-3.m index 1999b7d47702e..a9e1f7a4f80b9 100644 --- a/clang/test/SemaObjC/method-typecheck-3.m +++ b/clang/test/SemaObjC/method-typecheck-3.m @@ -13,7 +13,7 @@ @interface B : A @end @implementation B -- (id)obj {return self;} // 'id' overrides are white-listed? +- (id)obj {return self;} // 'id' overrides are permitted? - (A*)a { return self;} // expected-warning {{conflicting return type in implementation of 'a'}} - (void)takesA: (B*)a // expected-warning {{conflicting parameter types in implementation of 'takesA:'}} {} diff --git a/clang/test/SemaSYCL/deferred-diagnostics-emit.cpp b/clang/test/SemaSYCL/deferred-diagnostics-emit.cpp index fe67dda27ec5f..e945d41eb4ba2 100644 --- a/clang/test/SemaSYCL/deferred-diagnostics-emit.cpp +++ b/clang/test/SemaSYCL/deferred-diagnostics-emit.cpp @@ -44,14 +44,14 @@ typedef const __uint128_t megeType; typedef const __float128 trickyFloatType; typedef const __int128 tricky128Type; -//templated type (late) -// expected-note@+6 {{'bar' defined here}} -// expected-note@+5 {{'bar' defined here}} -// expected-note@+4 2{{'bar<__int128>' defined here}} -// expected-note@+3 {{'bar' defined here}} -// expected-note@+2 {{'bar<__float128>' defined here}} +// templated type (late) +// expected-note@+6 2{{'bar' defined here}} +// expected-note@+5 2{{'bar' defined here}} +// expected-note@+4 4{{'bar<__int128>' defined here}} +// expected-note@+3 2{{'bar' defined here}} +// expected-note@+2 2{{'bar<__float128>' defined here}} template -T bar() { return T(); }; +T bar() { return T(); }; //#TemplatedType //false positive. early incorrectly catches template @@ -86,9 +86,13 @@ void setup_sycl_operation(const T VA[]) { // expected-error@+2 {{'malFloat' requires 128 bit size '__float128' type support, but target 'spir64' does not support it}} // expected-error@+1 {{'__float128' is not supported on this target}} auto whatFloat = malFloat; + // expected-error@#TemplatedType {{'bar<__float128>' requires 128 bit size '__float128' type support, but target 'spir64' does not support it}} + // expected-note@+3 {{called by 'operator()'}} // expected-error@+2 {{'bar<__float128>' requires 128 bit size '__float128' type support, but target 'spir64' does not support it}} // expected-error@+1 {{'__float128' is not supported on this target}} auto malAutoTemp5 = bar<__float128>(); + // expected-error@#TemplatedType {{'bar' requires 128 bit size 'const __float128' type support, but target 'spir64' does not support it}} + // expected-note@+3 {{called by 'operator()'}} // expected-error@+2 {{'bar' requires 128 bit size 'const __float128' type support, but target 'spir64' does not support it}} // expected-error@+1 {{'__float128' is not supported on this target}} auto malAutoTemp6 = bar(); @@ -109,9 +113,13 @@ void setup_sycl_operation(const T VA[]) { // expected-error@+2 {{'malIntent' requires 128 bit size '__int128' type support, but target 'spir64' does not support it}} // expected-error@+1 {{'__int128' is not supported on this target}} auto whatInt128 = malIntent; + // expected-error@#TemplatedType {{'bar<__int128>' requires 128 bit size '__int128' type support, but target 'spir64' does not support it}} + // expected-note@+3 {{called by 'operator()'}} // expected-error@+2 {{'bar<__int128>' requires 128 bit size '__int128' type support, but target 'spir64' does not support it}} // expected-error@+1 {{'__int128' is not supported on this target}} auto malAutoTemp = bar<__int128>(); + // expected-error@#TemplatedType {{'bar' requires 128 bit size 'const __int128' type support, but target 'spir64' does not support it}} + // expected-note@+3 {{called by 'operator()'}} // expected-error@+2 {{'bar' requires 128 bit size 'const __int128' type support, but target 'spir64' does not support it}} // expected-error@+1 {{'__int128' is not supported on this target}} auto malAutoTemp2 = bar(); @@ -130,9 +138,13 @@ void setup_sycl_operation(const T VA[]) { // expected-error@+2 {{'malUInt128' requires 128 bit size '__uint128_t' (aka 'unsigned __int128') type support, but target 'spir64' does not support it}} // expected-error@+1 {{'unsigned __int128' is not supported on this target}} auto whatUInt = malUInt128; + // expected-error@#TemplatedType {{'bar<__int128>' requires 128 bit size '__int128' type support, but target 'spir64' does not support it}} + // expected-note@+3 {{called by 'operator()'}} // expected-error@+2 {{'bar<__int128>' requires 128 bit size '__int128' type support, but target 'spir64' does not support it}} // expected-error@+1 {{'__int128' is not supported on this target}} auto malAutoTemp3 = bar<__int128_t>(); + // expected-error@#TemplatedType {{'bar' requires 128 bit size 'const unsigned __int128' type support, but target 'spir64' does not support it}} + // expected-note@+3 {{called by 'operator()'}} // expected-error@+2 {{'bar' requires 128 bit size 'const unsigned __int128' type support, but target 'spir64' does not support it}} // expected-error@+1 {{'unsigned __int128' is not supported on this target}} auto malAutoTemp4 = bar(); @@ -156,7 +168,7 @@ int main(int argc, char **argv) { // --- direct lambda testing --- deviceQueue.submit([&](sycl::handler &h) { - // expected-note@#KernelSingleTaskKernelFuncCall 2 {{called by 'kernel_single_task([]() { // expected-error@+1 {{zero-length arrays are not permitted in C++}} int BadArray[0]; diff --git a/clang/test/SemaSYCL/float128.cpp b/clang/test/SemaSYCL/float128.cpp index 13173d849fed1..f530a3983d005 100644 --- a/clang/test/SemaSYCL/float128.cpp +++ b/clang/test/SemaSYCL/float128.cpp @@ -29,6 +29,8 @@ void host_ok(void) { C.field1 = A; } +long double ld_func(long double arg) { return 0; }; + void usage() { // expected-note@+2 3{{'A' defined here}} // expected-error@+1 {{'__float128' is not supported on this target}} @@ -60,6 +62,9 @@ void usage() { float F2 = 0.1f; // expected-error@+1 3{{expression requires 128 bit size '__float128' type support, but target 'spir64' does not support it}} float F3 = ((__float128)F1 * (__float128)F2) / 2.0f; + + // assume that long double is supported + float F4 = ld_func(F3); }; // expected-note@+1 {{called by 'usage'}} diff --git a/clang/test/SemaSYCL/sycl-restrict.cpp b/clang/test/SemaSYCL/sycl-restrict.cpp index 6cbcef1905e6b..d20a0dce3f140 100644 --- a/clang/test/SemaSYCL/sycl-restrict.cpp +++ b/clang/test/SemaSYCL/sycl-restrict.cpp @@ -130,13 +130,13 @@ typedef __float128 trickyFloatType; typedef __int128 tricky128Type; typedef long double trickyLDType; -//templated return type -// expected-note@+5 2{{'bar' defined here}} -// expected-note@+4 {{'bar' defined here}} -// expected-note@+3 3{{'bar<__int128>' defined here}} -// expected-note@+2 2{{'bar<__float128>' defined here}} +// templated return type +// expected-note@+5 4{{'bar' defined here}} +// expected-note@+4 2{{'bar' defined here}} +// expected-note@+3 6{{'bar<__int128>' defined here}} +// expected-note@+2 4{{'bar<__float128>' defined here}} template -T bar() { return T(); }; +T bar() { return T(); }; //#TemplatedType //variable template // expected-note@+5 2{{'solutionToEverything' defined here}} @@ -240,9 +240,13 @@ void usage(myFuncDef functionPtr) { // expected-error@+2 {{'malFloat' requires 128 bit size '__float128' type support, but target 'spir64' does not support it}} // expected-error@+1 {{'__float128' is not supported on this target}} auto whatFloat = malFloat; + // expected-error@#TemplatedType {{'bar<__float128>' requires 128 bit size '__float128' type support, but target 'spir64' does not support it}} + // expected-note@+3 {{called by 'usage'}} // expected-error@+2 {{'bar<__float128>' requires 128 bit size '__float128' type support, but target 'spir64' does not support it}} // expected-error@+1 {{'__float128' is not supported on this target}} auto malAutoTemp5 = bar<__float128>(); + // expected-error@#TemplatedType {{'bar<__float128>' requires 128 bit size '__float128' type support, but target 'spir64' does not support it}} + // expected-note@+3 {{called by 'usage'}} // expected-error@+2 {{'bar<__float128>' requires 128 bit size '__float128' type support, but target 'spir64' does not support it}} // expected-error@+1 {{'__float128' is not supported on this target}} auto malAutoTemp6 = bar(); @@ -272,9 +276,13 @@ void usage(myFuncDef functionPtr) { // expected-error@+2 {{'malLD' requires 128 bit size 'long double' type support, but target 'spir64' does not support it}} // expected-error@+1 {{'long double' is not supported on this target}} auto whatLD = malLD; + // expected-error@#TemplatedType {{'bar' requires 128 bit size 'long double' type support, but target 'spir64' does not support it}} + // expected-note@+3 {{called by 'usage'}} // expected-error@+2 {{'bar' requires 128 bit size 'long double' type support, but target 'spir64' does not support it}} // expected-error@+1 {{'long double' is not supported on this target}} auto malAutoLD = bar(); + // expected-error@#TemplatedType {{'bar' requires 128 bit size 'long double' type support, but target 'spir64' does not support it}} + // expected-note@+3 {{called by 'usage'}} // expected-error@+2{{'bar' requires 128 bit size 'long double' type support, but target 'spir64' does not support it}} // expected-error@+1 {{'long double' is not supported on this target}} auto malAutoLD2 = bar(); @@ -313,9 +321,13 @@ void usage(myFuncDef functionPtr) { // expected-error@+2 {{'malIntent' requires 128 bit size '__int128' type support, but target 'spir64' does not support it}} // expected-error@+1 {{'__int128' is not supported on this target}} auto whatInt128 = malIntent; + // expected-error@#TemplatedType {{'bar<__int128>' requires 128 bit size '__int128' type support, but target 'spir64' does not support it}} + // expected-note@+3 {{called by 'usage'}} // expected-error@+2 {{'bar<__int128>' requires 128 bit size '__int128' type support, but target 'spir64' does not support it}} // expected-error@+1 {{'__int128' is not supported on this target}} auto malAutoTemp = bar<__int128>(); + // expected-error@#TemplatedType {{'bar<__int128>' requires 128 bit size '__int128' type support, but target 'spir64' does not support it}} + // expected-note@+3 {{called by 'usage'}} // expected-error@+2 {{'bar<__int128>' requires 128 bit size '__int128' type support, but target 'spir64' does not support it}} // expected-error@+1 {{'__int128' is not supported on this target}} auto malAutoTemp2 = bar(); @@ -342,9 +354,13 @@ void usage(myFuncDef functionPtr) { // expected-error@+2 {{'malUInt128' requires 128 bit size '__uint128_t' (aka 'unsigned __int128') type support, but target 'spir64' does not support it}} // expected-error@+1 {{'unsigned __int128' is not supported on this target}} auto whatUInt = malUInt128; + // expected-error@#TemplatedType {{'bar<__int128>' requires 128 bit size '__int128' type support, but target 'spir64' does not support it}} + // expected-note@+3 {{called by 'usage'}} // expected-error@+2 {{'bar<__int128>' requires 128 bit size '__int128' type support, but target 'spir64' does not support it}} // expected-error@+1 {{'__int128' is not supported on this target}} auto malAutoTemp3 = bar<__int128_t>(); + // expected-error@#TemplatedType {{'bar' requires 128 bit size 'unsigned __int128' type support, but target 'spir64' does not support it}} + // expected-note@+3 {{called by 'usage'}} // expected-error@+2 {{'bar' requires 128 bit size 'unsigned __int128' type support, but target 'spir64' does not support it}} // expected-error@+1 {{'unsigned __int128' is not supported on this target}} auto malAutoTemp4 = bar(); @@ -416,7 +432,7 @@ int use2(a_type ab, a_type *abp) { template __attribute__((sycl_kernel)) void kernel_single_task(const Func &kernelFunc) { - kernelFunc(); //#call_kernelFunc // expected-note 3{{called by 'kernel_single_task([=]() { - usage(&addInt); //#call_usage // expected-note {{called by 'operator()'}} + usage(&addInt); //#call_usage // expected-note 9{{called by 'operator()'}} a_type *p; use2(ab, p); // expected-note 2{{called by 'operator()'}} }); diff --git a/clang/test/SemaTemplate/deduction.cpp b/clang/test/SemaTemplate/deduction.cpp index 964f8b4896e7a..9c75595bc74a5 100644 --- a/clang/test/SemaTemplate/deduction.cpp +++ b/clang/test/SemaTemplate/deduction.cpp @@ -60,14 +60,12 @@ struct Replace { // Replacement of templates template class TT, typename T1, typename Arg1, typename Arg2> struct Replace, Arg1, Arg2> { -// expected-note@-1 2 {{partial specialization matches}} typedef TT::type> type; }; template class TT, typename T1, typename T2, typename Arg1, typename Arg2> struct Replace, Arg1, Arg2> { -// expected-note@-1 2 {{partial specialization matches}} typedef TT::type, typename Replace::type> type; }; @@ -81,19 +79,8 @@ struct Replace, Arg1, Arg2> { int array0[is_same::type, int>::value? 1 : -1]; int array1[is_same::type, const int>::value? 1 : -1]; - int array2[is_same, int, float>::type, vector >::value? 1 : -1]; -// expected-error@-1 {{ambiguous partial specializations of 'Replace, int, float>'}} -// FIXME: Some bad error recovery from the parser here: -// expected-error@-3 {{expected '(' for function-style cast or type construction}} -// expected-error@-4 {{no member named 'value' in the global namespace}} - int array3[is_same, int, float>::type, vector >::value? 1 : -1]; -// expected-error@-1 {{ambiguous partial specializations of 'Replace, int, float>'}} -// FIXME: Some bad error recovery from the parser here: -// expected-error@-3 {{expected '(' for function-style cast or type construction}} -// expected-error@-4 {{no member named 'value' in the global namespace}} - int array4[is_same, double, float>::type, vector >::value? 1 : -1]; // PR5911 diff --git a/clang/test/SemaTemplate/default-arguments.cpp b/clang/test/SemaTemplate/default-arguments.cpp index 84178b7e26d14..882b279de1d08 100644 --- a/clang/test/SemaTemplate/default-arguments.cpp +++ b/clang/test/SemaTemplate/default-arguments.cpp @@ -112,14 +112,15 @@ template class X = T::template apply> int array4[is_same, X4 >::value? 1 : -1]; -template struct X5 {}; +template struct X5 {}; // expected-note{{has a different type 'int'}} template struct X5b {}; template class B = X5> + template class B = X5> // expected-error{{template template argument has different}} \ + // expected-note{{previous non-type template parameter}} struct X6 {}; X6 x6a; -X6 x6b; +X6 x6b; // expected-note{{while checking a default template argument}} X6 x6c; diff --git a/clang/test/SemaTemplate/instantiate-template-template-parm.cpp b/clang/test/SemaTemplate/instantiate-template-template-parm.cpp index f97e5b7c548e8..a70c7e8b081a4 100644 --- a/clang/test/SemaTemplate/instantiate-template-template-parm.cpp +++ b/clang/test/SemaTemplate/instantiate-template-template-parm.cpp @@ -20,29 +20,30 @@ apply::type ir = i; apply::type fr = i; // expected-error{{non-const lvalue reference to type 'float' cannot bind to a value of unrelated type 'int'}} // Template template parameters -template struct B; +template struct B; // expected-note{{has a different type 'int'}} template class X> // expected-error{{cannot have type 'float'}} + template class X> // expected-error{{cannot have type 'float'}} \ + // expected-note{{with type 'long'}} struct X0 { }; X0 x0b1; X0 x0b2; // expected-note{{while substituting}} -X0 x0b3; +X0 x0b3; // expected-error{{template template argument has different template parameters}} -template class TT> +template class TT> // expected-note{{parameter with type 'int'}} struct X1 { }; template class TT> struct X2 { - X1 x1; + X1 x1; // expected-error{{has different template parameters}} }; template struct X3i { }; -template struct X3l { }; +template struct X3l { }; // expected-note{{different type 'long'}} X2 x2okay; -X2 x2bad; +X2 x2bad; // expected-note{{instantiation}} template class TT, class R = TT<1, 2> > struct Comp { diff --git a/clang/test/SemaTemplate/nested-template.cpp b/clang/test/SemaTemplate/nested-template.cpp index 5bd388d4dff3d..efbde2076b9fa 100644 --- a/clang/test/SemaTemplate/nested-template.cpp +++ b/clang/test/SemaTemplate/nested-template.cpp @@ -112,16 +112,18 @@ template struct X1::B; // Template template parameters template struct X2 { - template class> // expected-error{{cannot have type 'float'}} + template class> // expected-error{{cannot have type 'float'}} \ + // expected-note{{previous non-type template}} struct Inner { }; }; -template +template // expected-note{{template non-type parameter}} struct X2_arg; X2::Inner x2i1; X2 x2a; // expected-note{{instantiation}} -X2::Inner x2i3; +X2::Inner x2i3; // expected-error{{template template argument has different}} namespace PR10896 { template diff --git a/clang/test/SemaTemplate/temp_arg_template.cpp b/clang/test/SemaTemplate/temp_arg_template.cpp index 42926fa368929..37e1e52521263 100644 --- a/clang/test/SemaTemplate/temp_arg_template.cpp +++ b/clang/test/SemaTemplate/temp_arg_template.cpp @@ -6,11 +6,11 @@ template class X> struct A; // expected-note 2{{previous te template class X> struct B; // expected-note{{previous template template parameter is here}} -template class X> struct C; // expected-note {{previous non-type template parameter with type 'int' is here}} +template class X> struct C; // expected-note 2{{previous non-type template parameter with type 'int' is here}} template struct X; // expected-note{{too few template parameters in template template argument}} template struct Y; // expected-note{{template parameter has a different kind in template argument}} -template struct Ylong; +template struct Ylong; // expected-note{{template non-type parameter has a different type 'long' in template argument}} template struct Yref; // expected-note{{template non-type parameter has a different type 'const int &' in template argument}} namespace N { @@ -27,7 +27,7 @@ A *a4; // expected-error{{template template argument has different template p A *a5; // expected-error{{template template argument has different template parameters than its corresponding template template parameter}} B *a6; // expected-error{{template template argument has different template parameters than its corresponding template template parameter}} C *a7; -C *a8; +C *a8; // expected-error{{template template argument has different template parameters than its corresponding template template parameter}} C *a9; // expected-error{{template template argument has different template parameters than its corresponding template template parameter}} template void f(int); diff --git a/clang/test/SemaTemplate/temp_arg_template_cxx1z.cpp b/clang/test/SemaTemplate/temp_arg_template_cxx1z.cpp index 372a00efc601e..03ef78f8cf14e 100644 --- a/clang/test/SemaTemplate/temp_arg_template_cxx1z.cpp +++ b/clang/test/SemaTemplate/temp_arg_template_cxx1z.cpp @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -fsyntax-only -verify -std=c++1z %s +// RUN: %clang_cc1 -fsyntax-only -verify -std=c++1z -frelaxed-template-template-args %s // expected-note@temp_arg_template_cxx1z.cpp:* 1+{{}} diff --git a/clang/tools/driver/CMakeLists.txt b/clang/tools/driver/CMakeLists.txt index 6a7c20948feb5..72a95c802c682 100644 --- a/clang/tools/driver/CMakeLists.txt +++ b/clang/tools/driver/CMakeLists.txt @@ -96,7 +96,7 @@ if(CLANG_ORDER_FILE AND (LLVM_LINKER_IS_LD64 OR LLVM_LINKER_IS_GOLD OR LLVM_LINKER_IS_LLD)) include(LLVMCheckLinkerFlag) - if (LLVM_LINKER_IS_LD64) + if (LLVM_LINKER_IS_LD64 OR (LLVM_LINKER_IS_LLD AND APPLE)) set(LINKER_ORDER_FILE_OPTION "-Wl,-order_file,${CLANG_ORDER_FILE}") elseif (LLVM_LINKER_IS_GOLD) set(LINKER_ORDER_FILE_OPTION "-Wl,--section-ordering-file,${CLANG_ORDER_FILE}") diff --git a/clang/tools/libclang/CIndex.cpp b/clang/tools/libclang/CIndex.cpp index 0e09fabb28652..b4b7ce98e2b3b 100644 --- a/clang/tools/libclang/CIndex.cpp +++ b/clang/tools/libclang/CIndex.cpp @@ -2586,6 +2586,8 @@ void OMPClauseEnqueue::VisitOMPAffinityClause(const OMPAffinityClause *C) { for (const Expr *E : C->varlists()) Visitor->AddStmt(E); } +void OMPClauseEnqueue::VisitOMPBindClause(const OMPBindClause *C) {} + } // namespace void EnqueueVisitor::EnqueueChildren(const OMPClause *S) { diff --git a/clang/unittests/ASTMatchers/ASTMatchersNarrowingTest.cpp b/clang/unittests/ASTMatchers/ASTMatchersNarrowingTest.cpp index 877cc6185a4b1..05a01f6735a8c 100644 --- a/clang/unittests/ASTMatchers/ASTMatchersNarrowingTest.cpp +++ b/clang/unittests/ASTMatchers/ASTMatchersNarrowingTest.cpp @@ -4445,5 +4445,42 @@ TEST_P(ASTMatchersTest, HasDirectBase) { cxxRecordDecl(hasName("Derived"), hasDirectBase(hasType(cxxRecordDecl(hasName("Base"))))))); } + +TEST_P(ASTMatchersTest, CapturesThis) { + if (!GetParam().isCXX11OrLater()) { + return; + } + auto matcher = lambdaExpr(hasAnyCapture(lambdaCapture(capturesThis()))); + EXPECT_TRUE(matches("class C { int cc; int f() { auto l = [this](){ return " + "cc; }; return l(); } };", + matcher)); + EXPECT_TRUE(matches("class C { int cc; int f() { auto l = [=](){ return cc; " + "}; return l(); } };", + matcher)); + EXPECT_TRUE(matches("class C { int cc; int f() { auto l = [&](){ return cc; " + "}; return l(); } };", + matcher)); + EXPECT_FALSE(matches("class C { int cc; int f() { auto l = [cc](){ return " + "cc; }; return l(); } };", + matcher)); + EXPECT_FALSE(matches("class C { int this; int f() { auto l = [this](){ " + "return this; }; return l(); } };", + matcher)); +} + +TEST_P(ASTMatchersTest, IsImplicit_LambdaCapture) { + if (!GetParam().isCXX11OrLater()) { + return; + } + auto matcher = lambdaExpr(hasAnyCapture( + lambdaCapture(isImplicit(), capturesVar(varDecl(hasName("cc")))))); + EXPECT_TRUE( + matches("int main() { int cc; auto f = [&](){ return cc; }; }", matcher)); + EXPECT_TRUE( + matches("int main() { int cc; auto f = [=](){ return cc; }; }", matcher)); + EXPECT_FALSE(matches("int main() { int cc; auto f = [cc](){ return cc; }; }", + matcher)); +} + } // namespace ast_matchers } // namespace clang diff --git a/clang/unittests/ASTMatchers/ASTMatchersNodeTest.cpp b/clang/unittests/ASTMatchers/ASTMatchersNodeTest.cpp index 022f6ef8e5772..ed222a1fdf877 100644 --- a/clang/unittests/ASTMatchers/ASTMatchersNodeTest.cpp +++ b/clang/unittests/ASTMatchers/ASTMatchersNodeTest.cpp @@ -2237,6 +2237,65 @@ TEST_P(ASTMatchersTest, varDecl(hasName("ss"), hasTypeLoc(elaboratedTypeLoc())))); } +TEST_P(ASTMatchersTest, LambdaCaptureTest) { + if (!GetParam().isCXX11OrLater()) { + return; + } + EXPECT_TRUE(matches("int main() { int cc; auto f = [cc](){ return cc; }; }", + lambdaExpr(hasAnyCapture(lambdaCapture())))); +} + +TEST_P(ASTMatchersTest, LambdaCaptureTest_BindsToCaptureOfVarDecl) { + if (!GetParam().isCXX11OrLater()) { + return; + } + auto matcher = lambdaExpr( + hasAnyCapture(lambdaCapture(capturesVar(varDecl(hasName("cc")))))); + EXPECT_TRUE(matches("int main() { int cc; auto f = [cc](){ return cc; }; }", + matcher)); + EXPECT_TRUE(matches("int main() { int cc; auto f = [&cc](){ return cc; }; }", + matcher)); + EXPECT_TRUE( + matches("int main() { int cc; auto f = [=](){ return cc; }; }", matcher)); + EXPECT_TRUE( + matches("int main() { int cc; auto f = [&](){ return cc; }; }", matcher)); +} + +TEST_P(ASTMatchersTest, LambdaCaptureTest_BindsToCaptureWithInitializer) { + if (!GetParam().isCXX14OrLater()) { + return; + } + auto matcher = lambdaExpr(hasAnyCapture(lambdaCapture(capturesVar( + varDecl(hasName("cc"), hasInitializer(integerLiteral(equals(1)))))))); + EXPECT_TRUE( + matches("int main() { auto lambda = [cc = 1] {return cc;}; }", matcher)); + EXPECT_TRUE( + matches("int main() { int cc = 2; auto lambda = [cc = 1] {return cc;}; }", + matcher)); +} + +TEST_P(ASTMatchersTest, LambdaCaptureTest_DoesNotBindToCaptureOfVarDecl) { + if (!GetParam().isCXX11OrLater()) { + return; + } + auto matcher = lambdaExpr( + hasAnyCapture(lambdaCapture(capturesVar(varDecl(hasName("cc")))))); + EXPECT_FALSE(matches("int main() { auto f = [](){ return 5; }; }", matcher)); + EXPECT_FALSE(matches("int main() { int xx; auto f = [xx](){ return xx; }; }", + matcher)); +} + +TEST_P(ASTMatchersTest, + LambdaCaptureTest_DoesNotBindToCaptureWithInitializerAndDifferentName) { + if (!GetParam().isCXX14OrLater()) { + return; + } + EXPECT_FALSE(matches( + "int main() { auto lambda = [xx = 1] {return xx;}; }", + lambdaExpr(hasAnyCapture(lambdaCapture(capturesVar(varDecl( + hasName("cc"), hasInitializer(integerLiteral(equals(1)))))))))); +} + TEST(ASTMatchersTestObjC, ObjCMessageExpr) { // Don't find ObjCMessageExpr where none are present. EXPECT_TRUE(notMatchesObjC("", objcMessageExpr(anything()))); diff --git a/clang/unittests/ASTMatchers/ASTMatchersTraversalTest.cpp b/clang/unittests/ASTMatchers/ASTMatchersTraversalTest.cpp index f39d5ecbb095e..e1540e9096e63 100644 --- a/clang/unittests/ASTMatchers/ASTMatchersTraversalTest.cpp +++ b/clang/unittests/ASTMatchers/ASTMatchersTraversalTest.cpp @@ -563,26 +563,6 @@ TEST(Matcher, HasReceiver) { objcMessageExpr(hasReceiver(declRefExpr(to(varDecl(hasName("x")))))))); } -TEST(Matcher, HasAnyCapture) { - auto HasCaptureX = lambdaExpr(hasAnyCapture(varDecl(hasName("x")))); - EXPECT_TRUE(matches("void f() { int x = 3; [x](){}; }", HasCaptureX)); - EXPECT_TRUE(matches("void f() { int x = 3; [&x](){}; }", HasCaptureX)); - EXPECT_TRUE(notMatches("void f() { [](){}; }", HasCaptureX)); - EXPECT_TRUE(notMatches("void f() { int z = 3; [&z](){}; }", HasCaptureX)); - EXPECT_TRUE( - notMatches("struct a { void f() { [this](){}; }; };", HasCaptureX)); -} - -TEST(Matcher, CapturesThis) { - auto HasCaptureThis = lambdaExpr(hasAnyCapture(cxxThisExpr())); - EXPECT_TRUE( - matches("struct a { void f() { [this](){}; }; };", HasCaptureThis)); - EXPECT_TRUE(notMatches("void f() { [](){}; }", HasCaptureThis)); - EXPECT_TRUE(notMatches("void f() { int x = 3; [x](){}; }", HasCaptureThis)); - EXPECT_TRUE(notMatches("void f() { int x = 3; [&x](){}; }", HasCaptureThis)); - EXPECT_TRUE(notMatches("void f() { int z = 3; [&z](){}; }", HasCaptureThis)); -} - TEST(Matcher, MatchesMethodsOnLambda) { StringRef Code = R"cpp( struct A { @@ -623,7 +603,6 @@ TEST(Matcher, MatchesCoroutine) { FileContentMappings M; M.push_back(std::make_pair("/coro_header", R"cpp( namespace std { -namespace experimental { template struct void_t_imp { @@ -642,7 +621,7 @@ struct traits_sfinae_base> { template struct coroutine_traits : public traits_sfinae_base {}; -}} // namespace std::experimental +} // namespace std struct awaitable { bool await_ready() noexcept; template @@ -658,14 +637,13 @@ struct promise { void unhandled_exception(); }; template -struct std::experimental::coroutine_traits { using promise_type = promise; }; +struct std::coroutine_traits { using promise_type = promise; }; namespace std { -namespace experimental { template struct coroutine_handle { static coroutine_handle from_address(void *) noexcept; }; -}} // namespace std::experimental +} // namespace std )cpp")); StringRef CoReturnCode = R"cpp( #include diff --git a/clang/unittests/Lex/DependencyDirectivesSourceMinimizerTest.cpp b/clang/unittests/Lex/DependencyDirectivesSourceMinimizerTest.cpp index 81945cf4618c1..d65c8de7f0e19 100644 --- a/clang/unittests/Lex/DependencyDirectivesSourceMinimizerTest.cpp +++ b/clang/unittests/Lex/DependencyDirectivesSourceMinimizerTest.cpp @@ -63,6 +63,9 @@ TEST(MinimizeSourceToDependencyDirectivesTest, AllTokens) { "#import \n" "@import A;\n" "#pragma clang module import A\n" + "#pragma push_macro(A)\n" + "#pragma pop_macro(A)\n" + "#pragma include_alias(, )\n" "export module m;\n" "import m;\n", Out, Tokens)); @@ -82,10 +85,13 @@ TEST(MinimizeSourceToDependencyDirectivesTest, AllTokens) { EXPECT_EQ(pp_import, Tokens[13].K); EXPECT_EQ(decl_at_import, Tokens[14].K); EXPECT_EQ(pp_pragma_import, Tokens[15].K); - EXPECT_EQ(cxx_export_decl, Tokens[16].K); - EXPECT_EQ(cxx_module_decl, Tokens[17].K); - EXPECT_EQ(cxx_import_decl, Tokens[18].K); - EXPECT_EQ(pp_eof, Tokens[19].K); + EXPECT_EQ(pp_pragma_push_macro, Tokens[16].K); + EXPECT_EQ(pp_pragma_pop_macro, Tokens[17].K); + EXPECT_EQ(pp_pragma_include_alias, Tokens[18].K); + EXPECT_EQ(cxx_export_decl, Tokens[19].K); + EXPECT_EQ(cxx_module_decl, Tokens[20].K); + EXPECT_EQ(cxx_import_decl, Tokens[21].K); + EXPECT_EQ(pp_eof, Tokens[22].K); } TEST(MinimizeSourceToDependencyDirectivesTest, Define) { @@ -406,6 +412,22 @@ TEST(MinimizeSourceToDependencyDirectivesTest, Pragma) { ASSERT_FALSE(minimizeSourceToDependencyDirectives("#pragma A\n", Out)); EXPECT_STREQ("", Out.data()); + ASSERT_FALSE(minimizeSourceToDependencyDirectives( + "#pragma push_macro(\"MACRO\")\n", Out)); + EXPECT_STREQ("#pragma push_macro(\"MACRO\")\n", Out.data()); + + ASSERT_FALSE(minimizeSourceToDependencyDirectives( + "#pragma pop_macro(\"MACRO\")\n", Out)); + EXPECT_STREQ("#pragma pop_macro(\"MACRO\")\n", Out.data()); + + ASSERT_FALSE(minimizeSourceToDependencyDirectives( + "#pragma include_alias(\"A\", \"B\")\n", Out)); + EXPECT_STREQ("#pragma include_alias(\"A\", \"B\")\n", Out.data()); + + ASSERT_FALSE(minimizeSourceToDependencyDirectives( + "#pragma include_alias(, )\n", Out)); + EXPECT_STREQ("#pragma include_alias(, )\n", Out.data()); + ASSERT_FALSE(minimizeSourceToDependencyDirectives("#pragma clang\n", Out)); EXPECT_STREQ("", Out.data()); diff --git a/clang/unittests/StaticAnalyzer/CMakeLists.txt b/clang/unittests/StaticAnalyzer/CMakeLists.txt index 985edf4db3408..810cf75400d79 100644 --- a/clang/unittests/StaticAnalyzer/CMakeLists.txt +++ b/clang/unittests/StaticAnalyzer/CMakeLists.txt @@ -8,6 +8,7 @@ add_clang_unittest(StaticAnalysisTests BugReportInterestingnessTest.cpp CallDescriptionTest.cpp CallEventTest.cpp + ConflictingEvalCallsTest.cpp FalsePositiveRefutationBRVisitorTest.cpp NoStateChangeFuncVisitorTest.cpp ParamRegionTest.cpp diff --git a/clang/unittests/StaticAnalyzer/ConflictingEvalCallsTest.cpp b/clang/unittests/StaticAnalyzer/ConflictingEvalCallsTest.cpp new file mode 100644 index 0000000000000..405a59ffab1b3 --- /dev/null +++ b/clang/unittests/StaticAnalyzer/ConflictingEvalCallsTest.cpp @@ -0,0 +1,58 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "CheckerRegistration.h" +#include "clang/StaticAnalyzer/Core/Checker.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h" +#include "clang/StaticAnalyzer/Frontend/CheckerRegistry.h" +#include "gtest/gtest.h" + +using namespace clang; +using namespace ento; + +namespace { +class EvalCallBase : public Checker { + const CallDescription Foo = {"foo", 0}; + +public: + bool evalCall(const CallEvent &Call, CheckerContext &C) const { + return Call.isCalled(Foo); + } +}; + +class EvalCallFoo1 : public EvalCallBase {}; +class EvalCallFoo2 : public EvalCallBase {}; +void addEvalFooCheckers(AnalysisASTConsumer &AnalysisConsumer, + AnalyzerOptions &AnOpts) { + AnOpts.CheckersAndPackages = {{"test.EvalFoo1", true}, + {"test.EvalFoo2", true}}; + AnalysisConsumer.AddCheckerRegistrationFn([](CheckerRegistry &Registry) { + Registry.addChecker("test.EvalFoo1", "EmptyDescription", + "EmptyDocsUri"); + Registry.addChecker("test.EvalFoo2", "EmptyDescription", + "EmptyDocsUri"); + }); +} +} // namespace + +TEST(EvalCall, DetectConflictingEvalCalls) { +#ifdef NDEBUG + GTEST_SKIP() << "This test is only available for debug builds."; +#endif + const std::string Code = R"code( + void foo(); + void top() { + foo(); // crash + } + )code"; + constexpr auto Regex = + "The 'foo\\(\\)' call has been already evaluated by the test\\.EvalFoo1 " + "checker, while the test\\.EvalFoo2 checker also tried to evaluate the " + "same call\\. At most one checker supposed to evaluate a call\\."; + ASSERT_DEATH(runCheckerOnCode(Code), Regex); +} diff --git a/clang/www/cxx_status.html b/clang/www/cxx_status.html index db30a79a634ad..6894ca1b1ea64 100755 --- a/clang/www/cxx_status.html +++ b/clang/www/cxx_status.html @@ -596,7 +596,7 @@

C++17 implementation status

You can use Clang in C++17 mode with the -std=c++17 option (use -std=c++1z in Clang 4 and earlier).

-
+
List of features and minimum Clang version with support @@ -813,8 +813,8 @@

C++17 implementation status

- - + + @@ -842,10 +842,13 @@

C++17 implementation status

reverse construction order in that ABI. This is not fully supported during constant expression evaluation until Clang 12.
-(10): Prior to Clang 14, this feature is not enabled by -default, but can be enabled with -frelaxed-template-template-args. -Starting from Clang 14, the flag is deprecated and will be removed in a future -version. +(10): Despite being the resolution to a Defect Report, this +feature is disabled by default in all language versions, and can be enabled +explicitly with the flag -frelaxed-template-template-args in Clang 4 +onwards. +The change to the standard lacks a corresponding change for template partial +ordering, resulting in ambiguity errors for reasonable and previously-valid +code. This issue is expected to be rectified soon.

diff --git a/clang/www/related.html b/clang/www/related.html index 2c53c1a7f0d45..d45f533677654 100755 --- a/clang/www/related.html +++ b/clang/www/related.html @@ -39,8 +39,8 @@

Clang Related Projects

Site: - - https://chromium.googlesource.com/chromium/src/+/master/docs/clang.md + + https://chromium.googlesource.com/chromium/src/+/refs/heads/main/docs/clang.md

Notes on using Clang to build the Chromium web browser. diff --git a/cmake/Modules/CheckLinkerFlag.cmake b/cmake/Modules/CheckLinkerFlag.cmake new file mode 100644 index 0000000000000..722fe5b1b8ead --- /dev/null +++ b/cmake/Modules/CheckLinkerFlag.cmake @@ -0,0 +1,17 @@ +include(CMakePushCheckState) +include(CheckCCompilerFlag) + +function(llvm_check_linker_flag flag dest) + # If testing a flag with check_c_compiler_flag, it gets added to the compile + # command only, but not to the linker command in that test. If the flag + # is vital for linking to succeed, the test would fail even if it would + # have succeeded if it was included on both commands. + # + # Therefore, try adding the flag to CMAKE_REQUIRED_FLAGS, which gets + # added to both compiling and linking commands in the tests. + + cmake_push_check_state() + set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} ${flag}") + check_c_compiler_flag("" ${dest}) + cmake_pop_check_state() +endfunction() diff --git a/cmake/Modules/EnableLanguageNolink.cmake b/cmake/Modules/EnableLanguageNolink.cmake new file mode 100644 index 0000000000000..18668c6d0476f --- /dev/null +++ b/cmake/Modules/EnableLanguageNolink.cmake @@ -0,0 +1,11 @@ +macro(llvm_enable_language_nolink) + # Set CMAKE_TRY_COMPILE_TARGET_TYPE to STATIC_LIBRARY to disable linking + # in the compiler sanity checks. When bootstrapping the toolchain, + # the toolchain itself is still incomplete and sanity checks that include + # linking may fail. + set(__SAVED_TRY_COMPILE_TARGET_TYPE ${CMAKE_TRY_COMPILE_TARGET_TYPE}) + set(CMAKE_TRY_COMPILE_TARGET_TYPE STATIC_LIBRARY) + enable_language(${ARGV}) + set(CMAKE_TRY_COMPILE_TARGET_TYPE ${__SAVED_TRY_COMPILE_TARGET_TYPE}) + unset(__SAVED_TRY_COMPILE_TARGET_TYPE) +endmacro() diff --git a/compiler-rt/CMakeLists.txt b/compiler-rt/CMakeLists.txt index 17d7e4ffbdebf..90fe35dfcbe86 100644 --- a/compiler-rt/CMakeLists.txt +++ b/compiler-rt/CMakeLists.txt @@ -443,7 +443,6 @@ append_list_if(COMPILER_RT_HAS_WGNU_FLAG -Wno-gnu SANITIZER_COMMON_CFLAGS) append_list_if(COMPILER_RT_HAS_WVARIADIC_MACROS_FLAG -Wno-variadic-macros SANITIZER_COMMON_CFLAGS) append_list_if(COMPILER_RT_HAS_WC99_EXTENSIONS_FLAG -Wno-c99-extensions SANITIZER_COMMON_CFLAGS) # format-pedantic warns about passing T* for %p, which is not useful. -append_list_if(COMPILER_RT_HAS_WNO_FORMAT_PEDANTIC -Wno-format-pedantic SANITIZER_COMMON_CFLAGS) append_list_if(COMPILER_RT_HAS_WD4146_FLAG /wd4146 SANITIZER_COMMON_CFLAGS) append_list_if(COMPILER_RT_HAS_WD4291_FLAG /wd4291 SANITIZER_COMMON_CFLAGS) append_list_if(COMPILER_RT_HAS_WD4391_FLAG /wd4391 SANITIZER_COMMON_CFLAGS) diff --git a/compiler-rt/cmake/Modules/AddCompilerRT.cmake b/compiler-rt/cmake/Modules/AddCompilerRT.cmake index 76e015368f5db..6a594f92c246d 100644 --- a/compiler-rt/cmake/Modules/AddCompilerRT.cmake +++ b/compiler-rt/cmake/Modules/AddCompilerRT.cmake @@ -130,11 +130,14 @@ macro(set_output_name output name arch) else() set(triple "${TARGET_TRIPLE}") endif() - # When using arch-suffixed runtime library names, clang only looks for - # libraries named "arm" or "armhf", see getArchNameForCompilerRTLib in - # clang. Therefore, try to inspect both the arch name and the triple - # if it seems like we're building an armhf target. - if ("${arch}" MATCHES "hf$" OR "${triple}" MATCHES "hf$") + # Except for baremetal, when using arch-suffixed runtime library names, + # clang only looks for libraries named "arm" or "armhf", see + # getArchNameForCompilerRTLib in clang. Therefore, try to inspect both + # the arch name and the triple if it seems like we're building an armhf + # target. + if (COMPILER_RT_BAREMETAL_BUILD) + set(${output} "${name}-${arch}${COMPILER_RT_OS_SUFFIX}") + elseif ("${arch}" MATCHES "hf$" OR "${triple}" MATCHES "hf$") set(${output} "${name}-armhf${COMPILER_RT_OS_SUFFIX}") else() set(${output} "${name}-arm${COMPILER_RT_OS_SUFFIX}") diff --git a/compiler-rt/lib/asan/asan_allocator.cpp b/compiler-rt/lib/asan/asan_allocator.cpp index 268feac59ddda..6d7073710bd19 100644 --- a/compiler-rt/lib/asan/asan_allocator.cpp +++ b/compiler-rt/lib/asan/asan_allocator.cpp @@ -522,7 +522,7 @@ struct Allocator { size > max_user_defined_malloc_size) { if (AllocatorMayReturnNull()) { Report("WARNING: AddressSanitizer failed to allocate 0x%zx bytes\n", - (void*)size); + size); return nullptr; } uptr malloc_limit = diff --git a/compiler-rt/lib/asan/asan_descriptions.cpp b/compiler-rt/lib/asan/asan_descriptions.cpp index 2ba8a02f84106..d7d961685793d 100644 --- a/compiler-rt/lib/asan/asan_descriptions.cpp +++ b/compiler-rt/lib/asan/asan_descriptions.cpp @@ -251,7 +251,7 @@ static void PrintAccessAndVarIntersection(const StackVarDescr &var, uptr addr, } str.append("'"); if (var.line > 0) { - str.append(" (line %d)", var.line); + str.append(" (line %zd)", var.line); } if (pos_descr) { Decorator d; @@ -318,7 +318,8 @@ bool DescribeAddressIfGlobal(uptr addr, uptr access_size, } void ShadowAddressDescription::Print() const { - Printf("Address %p is located in the %s area.\n", addr, ShadowNames[kind]); + Printf("Address %p is located in the %s area.\n", (void *)addr, + ShadowNames[kind]); } void GlobalAddressDescription::Print(const char *bug_type) const { @@ -356,7 +357,7 @@ bool GlobalAddressDescription::PointsInsideTheSameVariable( void StackAddressDescription::Print() const { Decorator d; Printf("%s", d.Location()); - Printf("Address %p is located in stack of thread %s", addr, + Printf("Address %p is located in stack of thread %s", (void *)addr, AsanThreadIdAndName(tid).c_str()); if (!frame_descr) { @@ -469,7 +470,7 @@ AddressDescription::AddressDescription(uptr addr, uptr access_size, void WildAddressDescription::Print() const { Printf("Address %p is a wild pointer inside of access range of size %p.\n", - addr, access_size); + (void *)addr, (void *)access_size); } void PrintAddressDescription(uptr addr, uptr access_size, diff --git a/compiler-rt/lib/asan/asan_errors.cpp b/compiler-rt/lib/asan/asan_errors.cpp index 45166c0648773..7cd9fe911afa8 100644 --- a/compiler-rt/lib/asan/asan_errors.cpp +++ b/compiler-rt/lib/asan/asan_errors.cpp @@ -46,10 +46,9 @@ void ErrorDeadlySignal::Print() { void ErrorDoubleFree::Print() { Decorator d; Printf("%s", d.Error()); - Report( - "ERROR: AddressSanitizer: attempting %s on %p in thread %s:\n", - scariness.GetDescription(), addr_description.addr, - AsanThreadIdAndName(tid).c_str()); + Report("ERROR: AddressSanitizer: attempting %s on %p in thread %s:\n", + scariness.GetDescription(), (void *)addr_description.addr, + AsanThreadIdAndName(tid).c_str()); Printf("%s", d.Default()); scariness.Print(); GET_STACK_TRACE_FATAL(second_free_stack->trace[0], @@ -62,10 +61,9 @@ void ErrorDoubleFree::Print() { void ErrorNewDeleteTypeMismatch::Print() { Decorator d; Printf("%s", d.Error()); - Report( - "ERROR: AddressSanitizer: %s on %p in thread %s:\n", - scariness.GetDescription(), addr_description.addr, - AsanThreadIdAndName(tid).c_str()); + Report("ERROR: AddressSanitizer: %s on %p in thread %s:\n", + scariness.GetDescription(), (void *)addr_description.addr, + AsanThreadIdAndName(tid).c_str()); Printf("%s object passed to delete has wrong type:\n", d.Default()); if (delete_size != 0) { Printf( @@ -106,7 +104,7 @@ void ErrorFreeNotMalloced::Print() { Report( "ERROR: AddressSanitizer: attempting free on address " "which was not malloc()-ed: %p in thread %s\n", - addr_description.Address(), AsanThreadIdAndName(tid).c_str()); + (void *)addr_description.Address(), AsanThreadIdAndName(tid).c_str()); Printf("%s", d.Default()); CHECK_GT(free_stack->size, 0); scariness.Print(); @@ -126,7 +124,7 @@ void ErrorAllocTypeMismatch::Print() { Printf("%s", d.Error()); Report("ERROR: AddressSanitizer: %s (%s vs %s) on %p\n", scariness.GetDescription(), alloc_names[alloc_type], - dealloc_names[dealloc_type], addr_description.Address()); + dealloc_names[dealloc_type], (void *)addr_description.Address()); Printf("%s", d.Default()); CHECK_GT(dealloc_stack->size, 0); scariness.Print(); @@ -145,7 +143,7 @@ void ErrorMallocUsableSizeNotOwned::Print() { Report( "ERROR: AddressSanitizer: attempting to call malloc_usable_size() for " "pointer which is not owned: %p\n", - addr_description.Address()); + (void *)addr_description.Address()); Printf("%s", d.Default()); stack->Print(); addr_description.Print(); @@ -158,7 +156,7 @@ void ErrorSanitizerGetAllocatedSizeNotOwned::Print() { Report( "ERROR: AddressSanitizer: attempting to call " "__sanitizer_get_allocated_size() for pointer which is not owned: %p\n", - addr_description.Address()); + (void *)addr_description.Address()); Printf("%s", d.Default()); stack->Print(); addr_description.Print(); @@ -298,9 +296,10 @@ void ErrorStringFunctionMemoryRangesOverlap::Print() { Report( "ERROR: AddressSanitizer: %s: memory ranges [%p,%p) and [%p, %p) " "overlap\n", - bug_type, addr1_description.Address(), - addr1_description.Address() + length1, addr2_description.Address(), - addr2_description.Address() + length2); + bug_type, (void *)addr1_description.Address(), + (void *)(addr1_description.Address() + length1), + (void *)addr2_description.Address(), + (void *)(addr2_description.Address() + length2)); Printf("%s", d.Default()); scariness.Print(); stack->Print(); @@ -329,10 +328,10 @@ void ErrorBadParamsToAnnotateContiguousContainer::Print() { " end : %p\n" " old_mid : %p\n" " new_mid : %p\n", - beg, end, old_mid, new_mid); + (void *)beg, (void *)end, (void *)old_mid, (void *)new_mid); uptr granularity = SHADOW_GRANULARITY; if (!IsAligned(beg, granularity)) - Report("ERROR: beg is not aligned by %d\n", granularity); + Report("ERROR: beg is not aligned by %zu\n", granularity); stack->Print(); ReportErrorSummary(scariness.GetDescription(), stack); } @@ -341,7 +340,7 @@ void ErrorODRViolation::Print() { Decorator d; Printf("%s", d.Error()); Report("ERROR: AddressSanitizer: %s (%p):\n", scariness.GetDescription(), - global1.beg); + (void *)global1.beg); Printf("%s", d.Default()); InternalScopedString g1_loc; InternalScopedString g2_loc; @@ -371,7 +370,8 @@ void ErrorInvalidPointerPair::Print() { Decorator d; Printf("%s", d.Error()); Report("ERROR: AddressSanitizer: %s: %p %p\n", scariness.GetDescription(), - addr1_description.Address(), addr2_description.Address()); + (void *)addr1_description.Address(), + (void *)addr2_description.Address()); Printf("%s", d.Default()); GET_STACK_TRACE_FATAL(pc, bp); stack.Print(); @@ -538,7 +538,8 @@ static void PrintLegend(InternalScopedString *str) { static void PrintShadowBytes(InternalScopedString *str, const char *before, u8 *bytes, u8 *guilty, uptr n) { Decorator d; - if (before) str->append("%s%p:", before, bytes); + if (before) + str->append("%s%p:", before, (void *)bytes); for (uptr i = 0; i < n; i++) { u8 *p = bytes + i; const char *before = @@ -575,7 +576,7 @@ void ErrorGeneric::Print() { Printf("%s", d.Error()); uptr addr = addr_description.Address(); Report("ERROR: AddressSanitizer: %s on address %p at pc %p bp %p sp %p\n", - bug_descr, (void *)addr, pc, bp, sp); + bug_descr, (void *)addr, (void *)pc, (void *)bp, (void *)sp); Printf("%s", d.Default()); Printf("%s%s of size %zu at %p thread %s%s\n", d.Access(), diff --git a/compiler-rt/lib/asan/asan_fake_stack.cpp b/compiler-rt/lib/asan/asan_fake_stack.cpp index bf5c342ee59db..07681c10de91a 100644 --- a/compiler-rt/lib/asan/asan_fake_stack.cpp +++ b/compiler-rt/lib/asan/asan_fake_stack.cpp @@ -54,10 +54,11 @@ FakeStack *FakeStack::Create(uptr stack_size_log) { : MmapOrDie(size, "FakeStack")); res->stack_size_log_ = stack_size_log; u8 *p = reinterpret_cast(res); - VReport(1, "T%d: FakeStack created: %p -- %p stack_size_log: %zd; " + VReport(1, + "T%d: FakeStack created: %p -- %p stack_size_log: %zd; " "mmapped %zdK, noreserve=%d \n", - GetCurrentTidOrInvalid(), p, - p + FakeStack::RequiredSize(stack_size_log), stack_size_log, + GetCurrentTidOrInvalid(), (void *)p, + (void *)(p + FakeStack::RequiredSize(stack_size_log)), stack_size_log, size >> 10, flags()->uar_noreserve); return res; } diff --git a/compiler-rt/lib/asan/asan_globals.cpp b/compiler-rt/lib/asan/asan_globals.cpp index 9bf378f62071d..5f56fe6f457d1 100644 --- a/compiler-rt/lib/asan/asan_globals.cpp +++ b/compiler-rt/lib/asan/asan_globals.cpp @@ -85,12 +85,12 @@ static void ReportGlobal(const Global &g, const char *prefix) { Report( "%s Global[%p]: beg=%p size=%zu/%zu name=%s module=%s dyn_init=%zu " "odr_indicator=%p\n", - prefix, &g, (void *)g.beg, g.size, g.size_with_redzone, g.name, + prefix, (void *)&g, (void *)g.beg, g.size, g.size_with_redzone, g.name, g.module_name, g.has_dynamic_init, (void *)g.odr_indicator); if (g.location) { - Report(" location (%p): name=%s[%p], %d %d\n", g.location, - g.location->filename, g.location->filename, g.location->line_no, - g.location->column_no); + Report(" location (%p): name=%s[%p], %d %d\n", (void *)g.location, + g.location->filename, (void *)g.location->filename, + g.location->line_no, g.location->column_no); } } @@ -369,7 +369,8 @@ void __asan_register_globals(__asan_global *globals, uptr n) { global_registration_site_vector->push_back(site); if (flags()->report_globals >= 2) { PRINT_CURRENT_STACK(); - Printf("=== ID %d; %p %p\n", stack_id, &globals[0], &globals[n - 1]); + Printf("=== ID %d; %p %p\n", stack_id, (void *)&globals[0], + (void *)&globals[n - 1]); } for (uptr i = 0; i < n; i++) { if (SANITIZER_WINDOWS && globals[i].beg == 0) { diff --git a/compiler-rt/lib/asan/asan_linux.cpp b/compiler-rt/lib/asan/asan_linux.cpp index 4bcbe5d02e334..ad3693d5e6a2c 100644 --- a/compiler-rt/lib/asan/asan_linux.cpp +++ b/compiler-rt/lib/asan/asan_linux.cpp @@ -128,8 +128,8 @@ void AsanCheckIncompatibleRT() {} #else static int FindFirstDSOCallback(struct dl_phdr_info *info, size_t size, void *data) { - VReport(2, "info->dlpi_name = %s\tinfo->dlpi_addr = %p\n", - info->dlpi_name, info->dlpi_addr); + VReport(2, "info->dlpi_name = %s\tinfo->dlpi_addr = %p\n", info->dlpi_name, + (void *)info->dlpi_addr); // Continue until the first dynamic library is found if (!info->dlpi_name || info->dlpi_name[0] == 0) diff --git a/compiler-rt/lib/asan/asan_poisoning.cpp b/compiler-rt/lib/asan/asan_poisoning.cpp index 5f215fe0f9bbb..d97af91e692dc 100644 --- a/compiler-rt/lib/asan/asan_poisoning.cpp +++ b/compiler-rt/lib/asan/asan_poisoning.cpp @@ -66,7 +66,7 @@ void AsanPoisonOrUnpoisonIntraObjectRedzone(uptr ptr, uptr size, bool poison) { uptr end = ptr + size; if (Verbosity()) { Printf("__asan_%spoison_intra_object_redzone [%p,%p) %zd\n", - poison ? "" : "un", ptr, end, size); + poison ? "" : "un", (void *)ptr, (void *)end, size); if (Verbosity() >= 2) PRINT_CURRENT_STACK(); } diff --git a/compiler-rt/lib/asan/asan_report.cpp b/compiler-rt/lib/asan/asan_report.cpp index 271d896403839..1f266334b3115 100644 --- a/compiler-rt/lib/asan/asan_report.cpp +++ b/compiler-rt/lib/asan/asan_report.cpp @@ -67,14 +67,14 @@ static void PrintZoneForPointer(uptr ptr, uptr zone_ptr, const char *zone_name) { if (zone_ptr) { if (zone_name) { - Printf("malloc_zone_from_ptr(%p) = %p, which is %s\n", - ptr, zone_ptr, zone_name); + Printf("malloc_zone_from_ptr(%p) = %p, which is %s\n", (void *)ptr, + (void *)zone_ptr, zone_name); } else { Printf("malloc_zone_from_ptr(%p) = %p, which doesn't have a name\n", - ptr, zone_ptr); + (void *)ptr, (void *)zone_ptr); } } else { - Printf("malloc_zone_from_ptr(%p) = 0\n", ptr); + Printf("malloc_zone_from_ptr(%p) = 0\n", (void *)ptr); } } @@ -435,9 +435,10 @@ static inline void CheckForInvalidPointerPair(void *p1, void *p2) { void ReportMacMzReallocUnknown(uptr addr, uptr zone_ptr, const char *zone_name, BufferedStackTrace *stack) { ScopedInErrorReport in_report; - Printf("mz_realloc(%p) -- attempting to realloc unallocated memory.\n" - "This is an unrecoverable problem, exiting now.\n", - addr); + Printf( + "mz_realloc(%p) -- attempting to realloc unallocated memory.\n" + "This is an unrecoverable problem, exiting now.\n", + (void *)addr); PrintZoneForPointer(addr, zone_ptr, zone_name); stack->Print(); DescribeAddressIfHeap(addr); diff --git a/compiler-rt/lib/asan/asan_rtl.cpp b/compiler-rt/lib/asan/asan_rtl.cpp index bfaa3bc270274..1b150b393cfe0 100644 --- a/compiler-rt/lib/asan/asan_rtl.cpp +++ b/compiler-rt/lib/asan/asan_rtl.cpp @@ -557,7 +557,8 @@ void UnpoisonStack(uptr bottom, uptr top, const char *type) { "False positive error reports may follow\n" "For details see " "https://github.com/google/sanitizers/issues/189\n", - type, top, bottom, top - bottom, top - bottom); + type, (void *)top, (void *)bottom, (void *)(top - bottom), + top - bottom); return; } PoisonShadow(bottom, RoundUpTo(top - bottom, SHADOW_GRANULARITY), 0); diff --git a/compiler-rt/lib/asan/asan_shadow_setup.cpp b/compiler-rt/lib/asan/asan_shadow_setup.cpp index 6e6260d3413fc..fc6de39622b51 100644 --- a/compiler-rt/lib/asan/asan_shadow_setup.cpp +++ b/compiler-rt/lib/asan/asan_shadow_setup.cpp @@ -33,7 +33,7 @@ static void ProtectGap(uptr addr, uptr size) { "protect_shadow_gap=0:" " not protecting shadow gap, allocating gap's shadow\n" "|| `[%p, %p]` || ShadowGap's shadow ||\n", - GapShadowBeg, GapShadowEnd); + (void*)GapShadowBeg, (void*)GapShadowEnd); ReserveShadowMemoryRange(GapShadowBeg, GapShadowEnd, "unprotected gap shadow"); return; @@ -113,7 +113,7 @@ void InitializeShadowMemory() { "Shadow memory range interleaves with an existing memory mapping. " "ASan cannot proceed correctly. ABORTING.\n"); Report("ASan shadow was supposed to be located in the [%p-%p] range.\n", - shadow_start, kHighShadowEnd); + (void*)shadow_start, (void*)kHighShadowEnd); MaybeReportLinuxPIEBug(); DumpProcessMap(); Die(); diff --git a/compiler-rt/lib/asan/asan_thread.cpp b/compiler-rt/lib/asan/asan_thread.cpp index d25e8ee4f45fc..8af74254cdc76 100644 --- a/compiler-rt/lib/asan/asan_thread.cpp +++ b/compiler-rt/lib/asan/asan_thread.cpp @@ -254,7 +254,7 @@ void AsanThread::Init(const InitOptions *options) { int local = 0; VReport(1, "T%d: stack [%p,%p) size 0x%zx; local=%p\n", tid(), (void *)stack_bottom_, (void *)stack_top_, stack_top_ - stack_bottom_, - &local); + (void *)&local); } // Fuchsia doesn't use ThreadStart. @@ -443,7 +443,7 @@ AsanThread *GetCurrentThread() { void SetCurrentThread(AsanThread *t) { CHECK(t->context()); - VReport(2, "SetCurrentThread: %p for thread %p\n", t->context(), + VReport(2, "SetCurrentThread: %p for thread %p\n", (void *)t->context(), (void *)GetThreadSelf()); // Make sure we do not reset the current AsanThread. CHECK_EQ(0, AsanTSDGet()); diff --git a/compiler-rt/lib/cfi/cfi.cpp b/compiler-rt/lib/cfi/cfi.cpp index e22797fb48d8a..95853208f951e 100644 --- a/compiler-rt/lib/cfi/cfi.cpp +++ b/compiler-rt/lib/cfi/cfi.cpp @@ -359,7 +359,7 @@ ALWAYS_INLINE void CfiSlowPathCommon(u64 CallSiteTypeId, void *Ptr, return; } CFICheckFn cfi_check = sv.get_cfi_check(); - VReport(2, "__cfi_check at %p\n", cfi_check); + VReport(2, "__cfi_check at %p\n", (void *)cfi_check); cfi_check(CallSiteTypeId, Ptr, DiagData); } diff --git a/compiler-rt/lib/dfsan/dfsan.cpp b/compiler-rt/lib/dfsan/dfsan.cpp index 825570855ac4a..67f303e02eeab 100644 --- a/compiler-rt/lib/dfsan/dfsan.cpp +++ b/compiler-rt/lib/dfsan/dfsan.cpp @@ -916,7 +916,7 @@ static bool ProtectMemoryRange(uptr beg, uptr size, const char *name) { // Consider refactoring these into a shared implementation. bool InitShadow(bool init_origins) { // Let user know mapping parameters first. - VPrintf(1, "dfsan_init %p\n", &__dfsan::dfsan_init); + VPrintf(1, "dfsan_init %p\n", (void *)&__dfsan::dfsan_init); for (unsigned i = 0; i < kMemoryLayoutSize; ++i) VPrintf(1, "%s: %zx - %zx\n", kMemoryLayout[i].name, kMemoryLayout[i].start, kMemoryLayout[i].end - 1); diff --git a/compiler-rt/lib/fuzzer/FuzzerTracePC.cpp b/compiler-rt/lib/fuzzer/FuzzerTracePC.cpp index 53e3a69884e46..af8d1ce50f3fb 100644 --- a/compiler-rt/lib/fuzzer/FuzzerTracePC.cpp +++ b/compiler-rt/lib/fuzzer/FuzzerTracePC.cpp @@ -391,6 +391,7 @@ void TracePC::HandleCmp(uintptr_t PC, T Arg1, T Arg2) { ValueProfileMap.AddValue(PC * 128 + 64 + AbsoluteDistance); } +ATTRIBUTE_NO_SANITIZE_MEMORY static size_t InternalStrnlen(const char *S, size_t MaxLen) { size_t Len = 0; for (; Len < MaxLen && S[Len]; Len++) {} @@ -399,6 +400,7 @@ static size_t InternalStrnlen(const char *S, size_t MaxLen) { // Finds min of (strlen(S1), strlen(S2)). // Needed because one of these strings may actually be non-zero terminated. +ATTRIBUTE_NO_SANITIZE_MEMORY static size_t InternalStrnlen2(const char *S1, const char *S2) { size_t Len = 0; for (; S1[Len] && S2[Len]; Len++) {} diff --git a/compiler-rt/lib/hwasan/hwasan.cpp b/compiler-rt/lib/hwasan/hwasan.cpp index e8ffbbd6f48de..c2863400d9d90 100644 --- a/compiler-rt/lib/hwasan/hwasan.cpp +++ b/compiler-rt/lib/hwasan/hwasan.cpp @@ -16,6 +16,7 @@ #include "hwasan_checks.h" #include "hwasan_dynamic_shadow.h" #include "hwasan_globals.h" +#include "hwasan_mapping.h" #include "hwasan_poisoning.h" #include "hwasan_report.h" #include "hwasan_thread.h" @@ -391,8 +392,15 @@ void __hwasan_print_shadow(const void *p, uptr sz) { uptr shadow_last = MemToShadow(ptr_raw + sz - 1); Printf("HWASan shadow map for %zx .. %zx (pointer tag %x)\n", ptr_raw, ptr_raw + sz, GetTagFromPointer((uptr)p)); - for (uptr s = shadow_first; s <= shadow_last; ++s) - Printf(" %zx: %x\n", ShadowToMem(s), *(tag_t *)s); + for (uptr s = shadow_first; s <= shadow_last; ++s) { + tag_t mem_tag = *reinterpret_cast(s); + uptr granule_addr = ShadowToMem(s); + if (mem_tag && mem_tag < kShadowAlignment) + Printf(" %zx: %02x(%02x)\n", granule_addr, mem_tag, + *reinterpret_cast(granule_addr + kShadowAlignment - 1)); + else + Printf(" %zx: %02x\n", granule_addr, mem_tag); + } } sptr __hwasan_test_shadow(const void *p, uptr sz) { diff --git a/compiler-rt/lib/hwasan/hwasan_exceptions.cpp b/compiler-rt/lib/hwasan/hwasan_exceptions.cpp index 169e7876cb58a..6ed1da3354280 100644 --- a/compiler-rt/lib/hwasan/hwasan_exceptions.cpp +++ b/compiler-rt/lib/hwasan/hwasan_exceptions.cpp @@ -29,8 +29,8 @@ typedef _Unwind_Reason_Code PersonalityFn(int version, _Unwind_Action actions, // is statically linked and the sanitizer runtime and the program are linked // against different unwinders. The _Unwind_Context data structure is opaque so // it may be incompatible between unwinders. -typedef _Unwind_Word GetGRFn(_Unwind_Context* context, int index); -typedef _Unwind_Word GetCFAFn(_Unwind_Context* context); +typedef uintptr_t GetGRFn(_Unwind_Context* context, int index); +typedef uintptr_t GetCFAFn(_Unwind_Context* context); extern "C" SANITIZER_INTERFACE_ATTRIBUTE _Unwind_Reason_Code __hwasan_personality_wrapper(int version, _Unwind_Action actions, diff --git a/compiler-rt/lib/hwasan/hwasan_report.cpp b/compiler-rt/lib/hwasan/hwasan_report.cpp index d0be6b579b563..66d3d155d4094 100644 --- a/compiler-rt/lib/hwasan/hwasan_report.cpp +++ b/compiler-rt/lib/hwasan/hwasan_report.cpp @@ -518,7 +518,7 @@ static void PrintTagInfoAroundAddr(tag_t *tag_ptr, uptr num_rows, InternalScopedString s; for (tag_t *row = beg_row; row < end_row; row += row_len) { s.append("%s", row == center_row_beg ? "=>" : " "); - s.append("%p:", row); + s.append("%p:", (void *)row); for (uptr i = 0; i < row_len; i++) { s.append("%s", row + i == tag_ptr ? "[" : " "); print_tag(s, &row[i]); @@ -702,17 +702,15 @@ void ReportTagMismatch(StackTrace *stack, uptr tagged_addr, uptr access_size, tag_t mem_tag = *tag_ptr; Printf("%s", d.Access()); - Printf("%s of size %zu at %p tags: %02x/%02x (ptr/mem) in thread T%zd\n", - is_store ? "WRITE" : "READ", access_size, untagged_addr, ptr_tag, - mem_tag, t->unique_id()); if (mem_tag && mem_tag < kShadowAlignment) { tag_t *granule_ptr = reinterpret_cast((untagged_addr + offset) & ~(kShadowAlignment - 1)); // If offset is 0, (untagged_addr + offset) is not aligned to granules. // This is the offset of the leftmost accessed byte within the bad granule. u8 in_granule_offset = (untagged_addr + offset) & (kShadowAlignment - 1); + tag_t short_tag = granule_ptr[kShadowAlignment - 1]; // The first mismatch was a short granule that matched the ptr_tag. - if (granule_ptr[kShadowAlignment - 1] == ptr_tag) { + if (short_tag == ptr_tag) { // If the access starts after the end of the short granule, then the first // bad byte is the first byte of the access; otherwise it is the first // byte past the end of the short granule @@ -720,6 +718,14 @@ void ReportTagMismatch(StackTrace *stack, uptr tagged_addr, uptr access_size, offset += mem_tag - in_granule_offset; } } + Printf( + "%s of size %zu at %p tags: %02x/%02x(%02x) (ptr/mem) in thread T%zd\n", + is_store ? "WRITE" : "READ", access_size, untagged_addr, ptr_tag, + mem_tag, short_tag, t->unique_id()); + } else { + Printf("%s of size %zu at %p tags: %02x/%02x (ptr/mem) in thread T%zd\n", + is_store ? "WRITE" : "READ", access_size, untagged_addr, ptr_tag, + mem_tag, t->unique_id()); } if (offset != 0) Printf("Invalid access starting at offset %zu\n", offset); diff --git a/compiler-rt/lib/hwasan/hwasan_thread.cpp b/compiler-rt/lib/hwasan/hwasan_thread.cpp index 5b65718c4d3b2..5f05446ac7a3b 100644 --- a/compiler-rt/lib/hwasan/hwasan_thread.cpp +++ b/compiler-rt/lib/hwasan/hwasan_thread.cpp @@ -108,10 +108,9 @@ void Thread::Destroy() { } void Thread::Print(const char *Prefix) { - Printf("%sT%zd %p stack: [%p,%p) sz: %zd tls: [%p,%p)\n", Prefix, - unique_id_, this, stack_bottom(), stack_top(), - stack_top() - stack_bottom(), - tls_begin(), tls_end()); + Printf("%sT%zd %p stack: [%p,%p) sz: %zd tls: [%p,%p)\n", Prefix, unique_id_, + (void *)this, stack_bottom(), stack_top(), + stack_top() - stack_bottom(), tls_begin(), tls_end()); } static u32 xorshift(u32 state) { diff --git a/compiler-rt/lib/lsan/lsan_common.cpp b/compiler-rt/lib/lsan/lsan_common.cpp index 6ac5019f1f3c1..139abd0775547 100644 --- a/compiler-rt/lib/lsan/lsan_common.cpp +++ b/compiler-rt/lib/lsan/lsan_common.cpp @@ -188,7 +188,8 @@ void ScanRangeForPointers(uptr begin, uptr end, const char *region_type, ChunkTag tag) { CHECK(tag == kReachable || tag == kIndirectlyLeaked); const uptr alignment = flags()->pointer_alignment(); - LOG_POINTERS("Scanning %s range %p-%p.\n", region_type, begin, end); + LOG_POINTERS("Scanning %s range %p-%p.\n", region_type, (void *)begin, + (void *)end); uptr pp = begin; if (pp % alignment) pp = pp + alignment - pp % alignment; @@ -207,13 +208,15 @@ void ScanRangeForPointers(uptr begin, uptr end, LOG_POINTERS( "%p is poisoned: ignoring %p pointing into chunk %p-%p of size " "%zu.\n", - pp, p, chunk, chunk + m.requested_size(), m.requested_size()); + (void *)pp, p, (void *)chunk, (void *)(chunk + m.requested_size()), + m.requested_size()); continue; } m.set_tag(tag); - LOG_POINTERS("%p: found %p pointing into chunk %p-%p of size %zu.\n", pp, p, - chunk, chunk + m.requested_size(), m.requested_size()); + LOG_POINTERS("%p: found %p pointing into chunk %p-%p of size %zu.\n", + (void *)pp, p, (void *)chunk, + (void *)(chunk + m.requested_size()), m.requested_size()); if (frontier) frontier->push_back(chunk); } @@ -281,7 +284,7 @@ static void ProcessThreads(SuspendedThreadsList const &suspended_threads, InternalMmapVector registers; for (uptr i = 0; i < suspended_threads.ThreadCount(); i++) { tid_t os_id = static_cast(suspended_threads.GetThreadID(i)); - LOG_THREADS("Processing thread %d.\n", os_id); + LOG_THREADS("Processing thread %llu.\n", os_id); uptr stack_begin, stack_end, tls_begin, tls_end, cache_begin, cache_end; DTLS *dtls; bool thread_found = GetThreadRangesLocked(os_id, &stack_begin, &stack_end, @@ -290,14 +293,14 @@ static void ProcessThreads(SuspendedThreadsList const &suspended_threads, if (!thread_found) { // If a thread can't be found in the thread registry, it's probably in the // process of destruction. Log this event and move on. - LOG_THREADS("Thread %d not found in registry.\n", os_id); + LOG_THREADS("Thread %llu not found in registry.\n", os_id); continue; } uptr sp; PtraceRegistersStatus have_registers = suspended_threads.GetRegistersAndSP(i, ®isters, &sp); if (have_registers != REGISTERS_AVAILABLE) { - Report("Unable to get registers from thread %d.\n", os_id); + Report("Unable to get registers from thread %llu.\n", os_id); // If unable to get SP, consider the entire stack to be reachable unless // GetRegistersAndSP failed with ESRCH. if (have_registers == REGISTERS_UNAVAILABLE_FATAL) continue; @@ -313,7 +316,8 @@ static void ProcessThreads(SuspendedThreadsList const &suspended_threads, } if (flags()->use_stacks) { - LOG_THREADS("Stack at %p-%p (SP = %p).\n", stack_begin, stack_end, sp); + LOG_THREADS("Stack at %p-%p (SP = %p).\n", (void *)stack_begin, + (void *)stack_end, (void *)sp); if (sp < stack_begin || sp >= stack_end) { // SP is outside the recorded stack range (e.g. the thread is running a // signal handler on alternate stack, or swapcontext was used). @@ -327,7 +331,7 @@ static void ProcessThreads(SuspendedThreadsList const &suspended_threads, stack_begin += page_size; } LOG_THREADS("Skipped %d guard page(s) to obtain stack %p-%p.\n", - skipped, stack_begin, stack_end); + skipped, (void *)stack_begin, (void *)stack_end); } else { // Shrink the stack range to ignore out-of-scope values. stack_begin = sp; @@ -339,7 +343,7 @@ static void ProcessThreads(SuspendedThreadsList const &suspended_threads, if (flags()->use_tls) { if (tls_begin) { - LOG_THREADS("TLS at %p-%p.\n", tls_begin, tls_end); + LOG_THREADS("TLS at %p-%p.\n", (void *)tls_begin, (void *)tls_end); // If the tls and cache ranges don't overlap, scan full tls range, // otherwise, only scan the non-overlapping portions if (cache_begin == cache_end || tls_end < cache_begin || @@ -373,7 +377,8 @@ static void ProcessThreads(SuspendedThreadsList const &suspended_threads, uptr dtls_beg = dtv.beg; uptr dtls_end = dtls_beg + dtv.size; if (dtls_beg < dtls_end) { - LOG_THREADS("DTLS %zu at %p-%p.\n", id, dtls_beg, dtls_end); + LOG_THREADS("DTLS %d at %p-%p.\n", id, (void *)dtls_beg, + (void *)dtls_end); ScanRangeForPointers(dtls_beg, dtls_end, frontier, "DTLS", kReachable); } @@ -381,7 +386,7 @@ static void ProcessThreads(SuspendedThreadsList const &suspended_threads, } else { // We are handling a thread with DTLS under destruction. Log about // this and continue. - LOG_THREADS("Thread %d has DTLS under destruction.\n", os_id); + LOG_THREADS("Thread %llu has DTLS under destruction.\n", os_id); } #endif } @@ -399,8 +404,9 @@ void ScanRootRegion(Frontier *frontier, const RootRegion &root_region, uptr intersection_end = Min(region_end, root_region.begin + root_region.size); if (intersection_begin >= intersection_end) return; LOG_POINTERS("Root region %p-%p intersects with mapped region %p-%p (%s)\n", - root_region.begin, root_region.begin + root_region.size, - region_begin, region_end, + (void *)root_region.begin, + (void *)(root_region.begin + root_region.size), + (void *)region_begin, (void *)region_end, is_readable ? "readable" : "unreadable"); if (is_readable) ScanRangeForPointers(intersection_begin, intersection_end, frontier, "ROOT", @@ -460,8 +466,8 @@ static void IgnoredSuppressedCb(uptr chunk, void *arg) { if (idx >= suppressed.size() || m.stack_trace_id() != suppressed[idx]) return; - LOG_POINTERS("Suppressed: chunk %p-%p of size %zu.\n", chunk, - chunk + m.requested_size(), m.requested_size()); + LOG_POINTERS("Suppressed: chunk %p-%p of size %zu.\n", (void *)chunk, + (void *)(chunk + m.requested_size()), m.requested_size()); m.set_tag(kIgnored); } @@ -472,8 +478,8 @@ static void CollectIgnoredCb(uptr chunk, void *arg) { chunk = GetUserBegin(chunk); LsanMetadata m(chunk); if (m.allocated() && m.tag() == kIgnored) { - LOG_POINTERS("Ignored: chunk %p-%p of size %zu.\n", - chunk, chunk + m.requested_size(), m.requested_size()); + LOG_POINTERS("Ignored: chunk %p-%p of size %zu.\n", (void *)chunk, + (void *)(chunk + m.requested_size()), m.requested_size()); reinterpret_cast(arg)->push_back(chunk); } } @@ -617,8 +623,9 @@ static void ReportIfNotSuspended(ThreadContextBase *tctx, void *arg) { if (tctx->status == ThreadStatusRunning) { uptr i = InternalLowerBound(suspended_threads, tctx->os_id); if (i >= suspended_threads.size() || suspended_threads[i] != tctx->os_id) - Report("Running thread %d was not suspended. False leaks are possible.\n", - tctx->os_id); + Report( + "Running thread %llu was not suspended. False leaks are possible.\n", + tctx->os_id); } } @@ -871,7 +878,7 @@ void LeakReport::PrintLeakedObjectsForLeak(uptr index) { u32 leak_id = leaks_[index].id; for (uptr j = 0; j < leaked_objects_.size(); j++) { if (leaked_objects_[j].leak_id == leak_id) - Printf("%p (%zu bytes)\n", leaked_objects_[j].addr, + Printf("%p (%zu bytes)\n", (void *)leaked_objects_[j].addr, leaked_objects_[j].size); } } @@ -962,7 +969,7 @@ void __lsan_register_root_region(const void *begin, uptr size) { CHECK(root_regions); RootRegion region = {reinterpret_cast(begin), size}; root_regions->push_back(region); - VReport(1, "Registered root region at %p of size %llu\n", begin, size); + VReport(1, "Registered root region at %p of size %zu\n", begin, size); #endif // CAN_SANITIZE_LEAKS } @@ -979,13 +986,13 @@ void __lsan_unregister_root_region(const void *begin, uptr size) { uptr last_index = root_regions->size() - 1; (*root_regions)[i] = (*root_regions)[last_index]; root_regions->pop_back(); - VReport(1, "Unregistered root region at %p of size %llu\n", begin, size); + VReport(1, "Unregistered root region at %p of size %zu\n", begin, size); break; } } if (!removed) { Report( - "__lsan_unregister_root_region(): region at %p of size %llu has not " + "__lsan_unregister_root_region(): region at %p of size %zu has not " "been registered.\n", begin, size); Die(); diff --git a/compiler-rt/lib/memprof/memprof_thread.cpp b/compiler-rt/lib/memprof/memprof_thread.cpp index 296ae16bcce58..5bac23254e0f1 100644 --- a/compiler-rt/lib/memprof/memprof_thread.cpp +++ b/compiler-rt/lib/memprof/memprof_thread.cpp @@ -131,7 +131,7 @@ void MemprofThread::Init(const InitOptions *options) { int local = 0; VReport(1, "T%d: stack [%p,%p) size 0x%zx; local=%p\n", tid(), (void *)stack_bottom_, (void *)stack_top_, stack_top_ - stack_bottom_, - &local); + (void *)&local); } thread_return_t @@ -198,7 +198,7 @@ MemprofThread *GetCurrentThread() { void SetCurrentThread(MemprofThread *t) { CHECK(t->context()); - VReport(2, "SetCurrentThread: %p for thread %p\n", t->context(), + VReport(2, "SetCurrentThread: %p for thread %p\n", (void *)t->context(), (void *)GetThreadSelf()); // Make sure we do not reset the current MemprofThread. CHECK_EQ(0, TSDGet()); diff --git a/compiler-rt/lib/msan/msan.cpp b/compiler-rt/lib/msan/msan.cpp index 2202e34469213..1be4195e03494 100644 --- a/compiler-rt/lib/msan/msan.cpp +++ b/compiler-rt/lib/msan/msan.cpp @@ -515,7 +515,7 @@ void __msan_dump_shadow(const void *x, uptr size) { } unsigned char *s = (unsigned char*)MEM_TO_SHADOW(x); - Printf("%p[%p] ", s, x); + Printf("%p[%p] ", (void *)s, x); for (uptr i = 0; i < size; i++) Printf("%x%x ", s[i] >> 4, s[i] & 0xf); Printf("\n"); diff --git a/compiler-rt/lib/msan/msan_interceptors.cpp b/compiler-rt/lib/msan/msan_interceptors.cpp index 830190916ed70..d485149350ee2 100644 --- a/compiler-rt/lib/msan/msan_interceptors.cpp +++ b/compiler-rt/lib/msan/msan_interceptors.cpp @@ -33,6 +33,7 @@ #include "sanitizer_common/sanitizer_stackdepot.h" #include "sanitizer_common/sanitizer_libc.h" #include "sanitizer_common/sanitizer_linux.h" +#include "sanitizer_common/sanitizer_glibc_version.h" #include "sanitizer_common/sanitizer_tls_get_addr.h" #include "sanitizer_common/sanitizer_vector.h" diff --git a/compiler-rt/lib/orc/simple_packed_serialization.h b/compiler-rt/lib/orc/simple_packed_serialization.h index 3a6d245ba8227..ec43130a2ef58 100644 --- a/compiler-rt/lib/orc/simple_packed_serialization.h +++ b/compiler-rt/lib/orc/simple_packed_serialization.h @@ -354,6 +354,27 @@ class SPSSerializationTraits, SequenceT, } }; +/// Trivial serialization / deserialization for span +template <> class SPSSerializationTraits, span> { +public: + static size_t size(const span &S) { + return SPSArgList::size(static_cast(S.size())) + + S.size(); + } + static bool serialize(SPSOutputBuffer &OB, const span &S) { + if (!SPSArgList::serialize(OB, static_cast(S.size()))) + return false; + return OB.write(S.data(), S.size()); + } + static bool deserialize(SPSInputBuffer &IB, span &S) { + uint64_t Size; + if (!SPSArgList::deserialize(IB, Size)) + return false; + S = span(IB.data(), Size); + return IB.skip(Size); + } +}; + /// SPSTuple serialization for std::pair. template class SPSSerializationTraits, std::pair> { diff --git a/compiler-rt/lib/orc/unittests/simple_packed_serialization_test.cpp b/compiler-rt/lib/orc/unittests/simple_packed_serialization_test.cpp index 3b55aa9662c73..92aa58aafe3ef 100644 --- a/compiler-rt/lib/orc/unittests/simple_packed_serialization_test.cpp +++ b/compiler-rt/lib/orc/unittests/simple_packed_serialization_test.cpp @@ -130,6 +130,30 @@ TEST(SimplePackedSerializationTest, StringViewCharSequenceSerialization) { blobSerializationRoundTrip(string_view(HW)); } +TEST(SimplePackedSerializationTest, SpanSerialization) { + const char Data[] = {3, 2, 1, 0, 1, 2, 3}; // Span should handle nulls. + span OutS(Data, sizeof(Data)); + + size_t Size = SPSArgList>::size(OutS); + auto Buffer = std::make_unique(Size); + SPSOutputBuffer OB(Buffer.get(), Size); + + EXPECT_TRUE(SPSArgList>::serialize(OB, OutS)); + + SPSInputBuffer IB(Buffer.get(), Size); + + span InS; + + EXPECT_TRUE(SPSArgList>::deserialize(IB, InS)); + + // Check that the serialized and deserialized values match. + EXPECT_EQ(InS.size(), OutS.size()); + EXPECT_EQ(memcmp(OutS.data(), InS.data(), InS.size()), 0); + + // Check that the span points directly to the input buffer. + EXPECT_EQ(InS.data(), Buffer.get() + sizeof(uint64_t)); +} + TEST(SimplePackedSerializationTest, StdPairSerialization) { std::pair P(42, "foo"); blobSerializationRoundTrip, diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors.inc b/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors.inc index 066f64a393562..abb38ccfa15d2 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors.inc +++ b/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors.inc @@ -2431,14 +2431,20 @@ static int PosixSpawnImpl(void *ctx, RealSpawnPtr *real_posix_spawn, pid_t *pid, char *const envp[]) { COMMON_INTERCEPTOR_READ_RANGE(ctx, file_or_path, internal_strlen(file_or_path) + 1); - char *const *s = argv; - for (; *s; ++s) - COMMON_INTERCEPTOR_READ_RANGE(ctx, *s, internal_strlen(*s) + 1); - COMMON_INTERCEPTOR_READ_RANGE(ctx, argv, (s - argv + 1) / sizeof(*s)); - s = envp; - for (; *s; ++s) - COMMON_INTERCEPTOR_READ_RANGE(ctx, *s, internal_strlen(*s) + 1); - COMMON_INTERCEPTOR_READ_RANGE(ctx, s, (s - envp + 1) / sizeof(*s)); + if (argv) { + for (char *const *s = argv; ; ++s) { + COMMON_INTERCEPTOR_READ_RANGE(ctx, s, sizeof(*s)); + if (!*s) break; + COMMON_INTERCEPTOR_READ_RANGE(ctx, *s, internal_strlen(*s) + 1); + } + } + if (envp) { + for (char *const *s = envp; ; ++s) { + COMMON_INTERCEPTOR_READ_RANGE(ctx, s, sizeof(*s)); + if (!*s) break; + COMMON_INTERCEPTOR_READ_RANGE(ctx, *s, internal_strlen(*s) + 1); + } + } int res = real_posix_spawn(pid, file_or_path, file_actions, attrp, argv, envp); if (res == 0) @@ -2706,17 +2712,20 @@ INTERCEPTOR(int, getnameinfo, void *sockaddr, unsigned salen, char *host, #endif #if SANITIZER_INTERCEPT_GETSOCKNAME -INTERCEPTOR(int, getsockname, int sock_fd, void *addr, int *addrlen) { +INTERCEPTOR(int, getsockname, int sock_fd, void *addr, unsigned *addrlen) { void *ctx; COMMON_INTERCEPTOR_ENTER(ctx, getsockname, sock_fd, addr, addrlen); - COMMON_INTERCEPTOR_READ_RANGE(ctx, addrlen, sizeof(*addrlen)); - int addrlen_in = *addrlen; + unsigned addr_sz; + if (addrlen) { + COMMON_INTERCEPTOR_READ_RANGE(ctx, addrlen, sizeof(*addrlen)); + addr_sz = *addrlen; + } // FIXME: under ASan the call below may write to freed memory and corrupt // its metadata. See // https://github.com/google/sanitizers/issues/321. int res = REAL(getsockname)(sock_fd, addr, addrlen); - if (res == 0) { - COMMON_INTERCEPTOR_WRITE_RANGE(ctx, addr, Min(addrlen_in, *addrlen)); + if (!res && addr && addrlen) { + COMMON_INTERCEPTOR_WRITE_RANGE(ctx, addr, Min(addr_sz, *addrlen)); } return res; } @@ -3221,13 +3230,17 @@ INTERCEPTOR(int, getpeername, int sockfd, void *addr, unsigned *addrlen) { void *ctx; COMMON_INTERCEPTOR_ENTER(ctx, getpeername, sockfd, addr, addrlen); unsigned addr_sz; - if (addrlen) addr_sz = *addrlen; + if (addrlen) { + COMMON_INTERCEPTOR_READ_RANGE(ctx, addrlen, sizeof(*addrlen)); + addr_sz = *addrlen; + } // FIXME: under ASan the call below may write to freed memory and corrupt // its metadata. See // https://github.com/google/sanitizers/issues/321. int res = REAL(getpeername)(sockfd, addr, addrlen); - if (!res && addr && addrlen) + if (!res && addr && addrlen) { COMMON_INTERCEPTOR_WRITE_RANGE(ctx, addr, Min(addr_sz, *addrlen)); + } return res; } #define INIT_GETPEERNAME COMMON_INTERCEPT_FUNCTION(getpeername); @@ -9081,10 +9094,10 @@ INTERCEPTOR(char *, MD2Data, const unsigned char *data, unsigned int len, return ret; \ } -SHA2_INTERCEPTORS(224, u32); -SHA2_INTERCEPTORS(256, u32); -SHA2_INTERCEPTORS(384, u64); -SHA2_INTERCEPTORS(512, u64); +SHA2_INTERCEPTORS(224, u32) +SHA2_INTERCEPTORS(256, u32) +SHA2_INTERCEPTORS(384, u64) +SHA2_INTERCEPTORS(512, u64) #define INIT_SHA2_INTECEPTORS(LEN) \ COMMON_INTERCEPT_FUNCTION(SHA##LEN##_Init); \ diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp index ea3e5bdbc754a..fcdb891fd1da2 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp +++ b/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp @@ -1773,7 +1773,7 @@ struct __sanitizer_esr_context { static bool Aarch64GetESR(ucontext_t *ucontext, u64 *esr) { static const u32 kEsrMagic = 0x45535201; - u8 *aux = ucontext->uc_mcontext.__reserved; + u8 *aux = reinterpret_cast(ucontext->uc_mcontext.__reserved); while (true) { _aarch64_ctx *ctx = (_aarch64_ctx *)aux; if (ctx->size == 0) break; diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_platform_interceptors.h b/compiler-rt/lib/sanitizer_common/sanitizer_platform_interceptors.h index e43fe3a3cf97b..14610f2df78df 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_platform_interceptors.h +++ b/compiler-rt/lib/sanitizer_common/sanitizer_platform_interceptors.h @@ -465,9 +465,9 @@ #define SANITIZER_INTERCEPT_STAT \ (SI_FREEBSD || SI_MAC || SI_ANDROID || SI_NETBSD || SI_SOLARIS || \ SI_STAT_LINUX) -#define SANITIZER_INTERCEPT_LSTAT (SI_NETBSD || SI_FREEBSD) -#define SANITIZER_INTERCEPT___XSTAT \ - (!SANITIZER_INTERCEPT_STAT && SI_POSIX) || SI_STAT_LINUX +#define SANITIZER_INTERCEPT_LSTAT (SI_NETBSD || SI_FREEBSD || SI_STAT_LINUX) +#define SANITIZER_INTERCEPT___XSTAT \ + ((!SANITIZER_INTERCEPT_STAT && SI_POSIX) || SI_STAT_LINUX) #define SANITIZER_INTERCEPT___XSTAT64 SI_LINUX_NOT_ANDROID #define SANITIZER_INTERCEPT___LXSTAT SANITIZER_INTERCEPT___XSTAT #define SANITIZER_INTERCEPT___LXSTAT64 SI_LINUX_NOT_ANDROID diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_procmaps_solaris.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_procmaps_solaris.cpp index bf813f235bb7a..e16c4e938cb23 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_procmaps_solaris.cpp +++ b/compiler-rt/lib/sanitizer_common/sanitizer_procmaps_solaris.cpp @@ -55,7 +55,15 @@ bool MemoryMappingLayout::Next(MemoryMappedSegment *segment) { internal_snprintf(proc_path, sizeof(proc_path), "/proc/self/path/%s", xmapentry->pr_mapname); - internal_readlink(proc_path, segment->filename, segment->filename_size); + ssize_t sz = internal_readlink(proc_path, segment->filename, + segment->filename_size - 1); + + // If readlink failed, the map is anonymous. + if (sz == -1) { + segment->filename[0] = '\0'; + } else if ((size_t)sz < segment->filename_size) + // readlink doesn't NUL-terminate. + segment->filename[sz] = '\0'; } data_.current += sizeof(prxmap_t); diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_stacktrace.h b/compiler-rt/lib/sanitizer_common/sanitizer_stacktrace.h index ea330f36f7d79..11c6154b09ea7 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_stacktrace.h +++ b/compiler-rt/lib/sanitizer_common/sanitizer_stacktrace.h @@ -209,11 +209,11 @@ static inline bool IsValidFrame(uptr frame, uptr stack_top, uptr stack_bottom) { // StackTrace::GetCurrentPc() faster. #if defined(__x86_64__) # define GET_CURRENT_PC() \ - ({ \ + (__extension__({ \ uptr pc; \ asm("lea 0(%%rip), %0" : "=r"(pc)); \ pc; \ - }) + })) #else # define GET_CURRENT_PC() StackTrace::GetCurrentPc() #endif diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_stacktrace_printer.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_stacktrace_printer.cpp index ad638a84a5933..c6356dae23c13 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_stacktrace_printer.cpp +++ b/compiler-rt/lib/sanitizer_common/sanitizer_stacktrace_printer.cpp @@ -198,7 +198,8 @@ void RenderFrame(InternalScopedString *buffer, const char *format, int frame_no, } break; default: - Report("Unsupported specifier in stack frame format: %c (%p)!\n", *p, p); + Report("Unsupported specifier in stack frame format: %c (%p)!\n", *p, + (void *)p); Die(); } } @@ -250,7 +251,7 @@ void RenderData(InternalScopedString *buffer, const char *format, break; default: Report("Unsupported specifier in stack frame format: %c (%p)!\n", *p, - p); + (void *)p); Die(); } } diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_tls_get_addr.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_tls_get_addr.cpp index ce5e85df1553e..b13e2dc9e3327 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_tls_get_addr.cpp +++ b/compiler-rt/lib/sanitizer_common/sanitizer_tls_get_addr.cpp @@ -44,7 +44,7 @@ static atomic_uintptr_t number_of_live_dtls; static const uptr kDestroyedThread = -1; static void DTLS_Deallocate(DTLS::DTVBlock *block) { - VReport(2, "__tls_get_addr: DTLS_Deallocate %p\n", block); + VReport(2, "__tls_get_addr: DTLS_Deallocate %p\n", (void *)block); UnmapOrDie(block, sizeof(DTLS::DTVBlock)); atomic_fetch_sub(&number_of_live_dtls, 1, memory_order_relaxed); } @@ -66,12 +66,13 @@ static DTLS::DTVBlock *DTLS_NextBlock(atomic_uintptr_t *cur) { } uptr num_live_dtls = atomic_fetch_add(&number_of_live_dtls, 1, memory_order_relaxed); - VReport(2, "__tls_get_addr: DTLS_NextBlock %p %zd\n", &dtls, num_live_dtls); + VReport(2, "__tls_get_addr: DTLS_NextBlock %p %zd\n", (void *)&dtls, + num_live_dtls); return new_dtv; } static DTLS::DTV *DTLS_Find(uptr id) { - VReport(2, "__tls_get_addr: DTLS_Find %p %zd\n", &dtls, id); + VReport(2, "__tls_get_addr: DTLS_Find %p %zd\n", (void *)&dtls, id); static constexpr uptr kPerBlock = ARRAY_SIZE(DTLS::DTVBlock::dtvs); DTLS::DTVBlock *cur = DTLS_NextBlock(&dtls.dtv_block); if (!cur) @@ -82,7 +83,7 @@ static DTLS::DTV *DTLS_Find(uptr id) { void DTLS_Destroy() { if (!common_flags()->intercept_tls_get_addr) return; - VReport(2, "__tls_get_addr: DTLS_Destroy %p\n", &dtls); + VReport(2, "__tls_get_addr: DTLS_Destroy %p\n", (void *)&dtls); DTLS::DTVBlock *block = (DTLS::DTVBlock *)atomic_exchange( &dtls.dtv_block, kDestroyedThread, memory_order_release); while (block) { @@ -120,7 +121,7 @@ DTLS::DTV *DTLS_on_tls_get_addr(void *arg_void, void *res, VReport(2, "__tls_get_addr: %p {0x%zx,0x%zx} => %p; tls_beg: 0x%zx; sp: %p " "num_live_dtls %zd\n", - arg, arg->dso_id, arg->offset, res, tls_beg, &tls_beg, + (void *)arg, arg->dso_id, arg->offset, res, tls_beg, (void *)&tls_beg, atomic_load(&number_of_live_dtls, memory_order_relaxed)); if (dtls.last_memalign_ptr == tls_beg) { tls_size = dtls.last_memalign_size; diff --git a/compiler-rt/lib/sanitizer_common/tests/sanitizer_stackdepot_test.cpp b/compiler-rt/lib/sanitizer_common/tests/sanitizer_stackdepot_test.cpp index b2d06bfea30e0..3835ce26c4d54 100644 --- a/compiler-rt/lib/sanitizer_common/tests/sanitizer_stackdepot_test.cpp +++ b/compiler-rt/lib/sanitizer_common/tests/sanitizer_stackdepot_test.cpp @@ -106,8 +106,7 @@ TEST_F(StackDepotTest, Print) { "Stack for id .*#0 0x1.*#1 0x2.*#2 0x3.*#3 0x4.*#4 0x8.*#5 0x9.*")); } -// Temporarily disabled to investigate why buildbot fails to show broken tests. -TEST_F(StackDepotTest, DISABLED_PrintNoLock) { +TEST_F(StackDepotTest, PrintNoLock) { u32 n = 2000; std::vector idx2id(n); for (u32 i = 0; i < n; ++i) { diff --git a/compiler-rt/lib/xray/xray_x86_64.cpp b/compiler-rt/lib/xray/xray_x86_64.cpp index c58584b3a14b3..669d2e85bede5 100644 --- a/compiler-rt/lib/xray/xray_x86_64.cpp +++ b/compiler-rt/lib/xray/xray_x86_64.cpp @@ -148,7 +148,8 @@ bool patchFunctionEntry(const bool Enable, const uint32_t FuncId, int64_t TrampolineOffset = reinterpret_cast(Trampoline) - (static_cast(Address) + 11); if (TrampolineOffset < MinOffset || TrampolineOffset > MaxOffset) { - Report("XRay Entry trampoline (%p) too far from sled (%p)\n", Trampoline, + Report("XRay Entry trampoline (%p) too far from sled (%p)\n", + reinterpret_cast(Trampoline), reinterpret_cast(Address)); return false; } @@ -195,7 +196,8 @@ bool patchFunctionExit(const bool Enable, const uint32_t FuncId, (static_cast(Address) + 11); if (TrampolineOffset < MinOffset || TrampolineOffset > MaxOffset) { Report("XRay Exit trampoline (%p) too far from sled (%p)\n", - __xray_FunctionExit, reinterpret_cast(Address)); + reinterpret_cast(__xray_FunctionExit), + reinterpret_cast(Address)); return false; } if (Enable) { @@ -224,7 +226,8 @@ bool patchFunctionTailExit(const bool Enable, const uint32_t FuncId, (static_cast(Address) + 11); if (TrampolineOffset < MinOffset || TrampolineOffset > MaxOffset) { Report("XRay Tail Exit trampoline (%p) too far from sled (%p)\n", - __xray_FunctionTailExit, reinterpret_cast(Address)); + reinterpret_cast(__xray_FunctionTailExit), + reinterpret_cast(Address)); return false; } if (Enable) { diff --git a/compiler-rt/test/asan/TestCases/PR52382.c b/compiler-rt/test/asan/TestCases/PR52382.c new file mode 100644 index 0000000000000..9cab2b9640844 --- /dev/null +++ b/compiler-rt/test/asan/TestCases/PR52382.c @@ -0,0 +1,12 @@ +// RUN: %clang_asan -O0 %s -o %t -w && not %run %t 2>&1 | FileCheck %s +// RUN: %clang_asan -O2 %s -o %t -w && not %run %t 2>&1 | FileCheck %s + +int global_array[100] = {-1}; + +// This access is 412 bytes after the start of the global: past the end of the +// uninstrumented array, but within the bounds of the extended instrumented +// array. We should ensure this is still instrumented. +int main(void) { return global_array[103]; } + +// CHECK: AddressSanitizer: global-buffer-overflow on address +// CHECK: is located 12 bytes to the right of global variable 'global_array' diff --git a/compiler-rt/test/asan/TestCases/intercept-rethrow-exception.cpp b/compiler-rt/test/asan/TestCases/intercept-rethrow-exception.cpp index 4c549b6ad5c7e..b7272ad71bf52 100644 --- a/compiler-rt/test/asan/TestCases/intercept-rethrow-exception.cpp +++ b/compiler-rt/test/asan/TestCases/intercept-rethrow-exception.cpp @@ -12,6 +12,9 @@ // FIXME: Bug 42703 // XFAIL: solaris +// https://reviews.llvm.org/D111703 made compiler incompatible with released NDK. +// UNSUPPORTED: android && arm-target-arch + #include #include #include diff --git a/compiler-rt/test/hwasan/TestCases/globals-wrap-tag.c b/compiler-rt/test/hwasan/TestCases/globals-wrap-tag.c new file mode 100644 index 0000000000000..8ae18278af3c1 --- /dev/null +++ b/compiler-rt/test/hwasan/TestCases/globals-wrap-tag.c @@ -0,0 +1,294 @@ +// Tests that global tags wrap around successfully after tag 0xff on aarch64 or +// 0x3f on x86_64. +// RUN: %clang_hwasan %s -o %t +// RUN: %run %t 0 + +#include + +// Create >256 globals to ensure every tag is used. +int x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15, x16, + x17, x18, x19, x20, x21, x22, x23, x24, x25, x26, x27, x28, x29, x30, x31, + x32, x33, x34, x35, x36, x37, x38, x39, x40, x41, x42, x43, x44, x45, x46, + x47, x48, x49, x50, x51, x52, x53, x54, x55, x56, x57, x58, x59, x60, x61, + x62, x63, x64, x65, x66, x67, x68, x69, x70, x71, x72, x73, x74, x75, x76, + x77, x78, x79, x80, x81, x82, x83, x84, x85, x86, x87, x88, x89, x90, x91, + x92, x93, x94, x95, x96, x97, x98, x99, x100, x101, x102, x103, x104, x105, + x106, x107, x108, x109, x110, x111, x112, x113, x114, x115, x116, x117, + x118, x119, x120, x121, x122, x123, x124, x125, x126, x127, x128, x129, + x130, x131, x132, x133, x134, x135, x136, x137, x138, x139, x140, x141, + x142, x143, x144, x145, x146, x147, x148, x149, x150, x151, x152, x153, + x154, x155, x156, x157, x158, x159, x160, x161, x162, x163, x164, x165, + x166, x167, x168, x169, x170, x171, x172, x173, x174, x175, x176, x177, + x178, x179, x180, x181, x182, x183, x184, x185, x186, x187, x188, x189, + x190, x191, x192, x193, x194, x195, x196, x197, x198, x199, x200, x201, + x202, x203, x204, x205, x206, x207, x208, x209, x210, x211, x212, x213, + x214, x215, x216, x217, x218, x219, x220, x221, x222, x223, x224, x225, + x226, x227, x228, x229, x230, x231, x232, x233, x234, x235, x236, x237, + x238, x239, x240, x241, x242, x243, x244, x245, x246, x247, x248, x249, + x250, x251, x252, x253, x254, x255, x256; + +int main(int argc, char **argv) { + int i = atoi(argv[1]); + + // Write to every global to make sure tags match even after wrapping around + // to 0. + (&x0)[i] = 1; + (&x1)[i] = 1; + (&x2)[i] = 1; + (&x3)[i] = 1; + (&x4)[i] = 1; + (&x5)[i] = 1; + (&x6)[i] = 1; + (&x7)[i] = 1; + (&x8)[i] = 1; + (&x9)[i] = 1; + (&x10)[i] = 1; + (&x11)[i] = 1; + (&x12)[i] = 1; + (&x13)[i] = 1; + (&x14)[i] = 1; + (&x15)[i] = 1; + (&x16)[i] = 1; + (&x17)[i] = 1; + (&x18)[i] = 1; + (&x19)[i] = 1; + (&x20)[i] = 1; + (&x21)[i] = 1; + (&x22)[i] = 1; + (&x23)[i] = 1; + (&x24)[i] = 1; + (&x25)[i] = 1; + (&x26)[i] = 1; + (&x27)[i] = 1; + (&x28)[i] = 1; + (&x29)[i] = 1; + (&x30)[i] = 1; + (&x31)[i] = 1; + (&x32)[i] = 1; + (&x33)[i] = 1; + (&x34)[i] = 1; + (&x35)[i] = 1; + (&x36)[i] = 1; + (&x37)[i] = 1; + (&x38)[i] = 1; + (&x39)[i] = 1; + (&x40)[i] = 1; + (&x41)[i] = 1; + (&x42)[i] = 1; + (&x43)[i] = 1; + (&x44)[i] = 1; + (&x45)[i] = 1; + (&x46)[i] = 1; + (&x47)[i] = 1; + (&x48)[i] = 1; + (&x49)[i] = 1; + (&x50)[i] = 1; + (&x51)[i] = 1; + (&x52)[i] = 1; + (&x53)[i] = 1; + (&x54)[i] = 1; + (&x55)[i] = 1; + (&x56)[i] = 1; + (&x57)[i] = 1; + (&x58)[i] = 1; + (&x59)[i] = 1; + (&x60)[i] = 1; + (&x61)[i] = 1; + (&x62)[i] = 1; + (&x63)[i] = 1; + (&x64)[i] = 1; + (&x65)[i] = 1; + (&x66)[i] = 1; + (&x67)[i] = 1; + (&x68)[i] = 1; + (&x69)[i] = 1; + (&x70)[i] = 1; + (&x71)[i] = 1; + (&x72)[i] = 1; + (&x73)[i] = 1; + (&x74)[i] = 1; + (&x75)[i] = 1; + (&x76)[i] = 1; + (&x77)[i] = 1; + (&x78)[i] = 1; + (&x79)[i] = 1; + (&x80)[i] = 1; + (&x81)[i] = 1; + (&x82)[i] = 1; + (&x83)[i] = 1; + (&x84)[i] = 1; + (&x85)[i] = 1; + (&x86)[i] = 1; + (&x87)[i] = 1; + (&x88)[i] = 1; + (&x89)[i] = 1; + (&x90)[i] = 1; + (&x91)[i] = 1; + (&x92)[i] = 1; + (&x93)[i] = 1; + (&x94)[i] = 1; + (&x95)[i] = 1; + (&x96)[i] = 1; + (&x97)[i] = 1; + (&x98)[i] = 1; + (&x99)[i] = 1; + (&x100)[i] = 1; + (&x101)[i] = 1; + (&x102)[i] = 1; + (&x103)[i] = 1; + (&x104)[i] = 1; + (&x105)[i] = 1; + (&x106)[i] = 1; + (&x107)[i] = 1; + (&x108)[i] = 1; + (&x109)[i] = 1; + (&x110)[i] = 1; + (&x111)[i] = 1; + (&x112)[i] = 1; + (&x113)[i] = 1; + (&x114)[i] = 1; + (&x115)[i] = 1; + (&x116)[i] = 1; + (&x117)[i] = 1; + (&x118)[i] = 1; + (&x119)[i] = 1; + (&x120)[i] = 1; + (&x121)[i] = 1; + (&x122)[i] = 1; + (&x123)[i] = 1; + (&x124)[i] = 1; + (&x125)[i] = 1; + (&x126)[i] = 1; + (&x127)[i] = 1; + (&x128)[i] = 1; + (&x129)[i] = 1; + (&x130)[i] = 1; + (&x131)[i] = 1; + (&x132)[i] = 1; + (&x133)[i] = 1; + (&x134)[i] = 1; + (&x135)[i] = 1; + (&x136)[i] = 1; + (&x137)[i] = 1; + (&x138)[i] = 1; + (&x139)[i] = 1; + (&x140)[i] = 1; + (&x141)[i] = 1; + (&x142)[i] = 1; + (&x143)[i] = 1; + (&x144)[i] = 1; + (&x145)[i] = 1; + (&x146)[i] = 1; + (&x147)[i] = 1; + (&x148)[i] = 1; + (&x149)[i] = 1; + (&x150)[i] = 1; + (&x151)[i] = 1; + (&x152)[i] = 1; + (&x153)[i] = 1; + (&x154)[i] = 1; + (&x155)[i] = 1; + (&x156)[i] = 1; + (&x157)[i] = 1; + (&x158)[i] = 1; + (&x159)[i] = 1; + (&x160)[i] = 1; + (&x161)[i] = 1; + (&x162)[i] = 1; + (&x163)[i] = 1; + (&x164)[i] = 1; + (&x165)[i] = 1; + (&x166)[i] = 1; + (&x167)[i] = 1; + (&x168)[i] = 1; + (&x169)[i] = 1; + (&x170)[i] = 1; + (&x171)[i] = 1; + (&x172)[i] = 1; + (&x173)[i] = 1; + (&x174)[i] = 1; + (&x175)[i] = 1; + (&x176)[i] = 1; + (&x177)[i] = 1; + (&x178)[i] = 1; + (&x179)[i] = 1; + (&x180)[i] = 1; + (&x181)[i] = 1; + (&x182)[i] = 1; + (&x183)[i] = 1; + (&x184)[i] = 1; + (&x185)[i] = 1; + (&x186)[i] = 1; + (&x187)[i] = 1; + (&x188)[i] = 1; + (&x189)[i] = 1; + (&x190)[i] = 1; + (&x191)[i] = 1; + (&x192)[i] = 1; + (&x193)[i] = 1; + (&x194)[i] = 1; + (&x195)[i] = 1; + (&x196)[i] = 1; + (&x197)[i] = 1; + (&x198)[i] = 1; + (&x199)[i] = 1; + (&x200)[i] = 1; + (&x201)[i] = 1; + (&x202)[i] = 1; + (&x203)[i] = 1; + (&x204)[i] = 1; + (&x205)[i] = 1; + (&x206)[i] = 1; + (&x207)[i] = 1; + (&x208)[i] = 1; + (&x209)[i] = 1; + (&x210)[i] = 1; + (&x211)[i] = 1; + (&x212)[i] = 1; + (&x213)[i] = 1; + (&x214)[i] = 1; + (&x215)[i] = 1; + (&x216)[i] = 1; + (&x217)[i] = 1; + (&x218)[i] = 1; + (&x219)[i] = 1; + (&x220)[i] = 1; + (&x221)[i] = 1; + (&x222)[i] = 1; + (&x223)[i] = 1; + (&x224)[i] = 1; + (&x225)[i] = 1; + (&x226)[i] = 1; + (&x227)[i] = 1; + (&x228)[i] = 1; + (&x229)[i] = 1; + (&x230)[i] = 1; + (&x231)[i] = 1; + (&x232)[i] = 1; + (&x233)[i] = 1; + (&x234)[i] = 1; + (&x235)[i] = 1; + (&x236)[i] = 1; + (&x237)[i] = 1; + (&x238)[i] = 1; + (&x239)[i] = 1; + (&x240)[i] = 1; + (&x241)[i] = 1; + (&x242)[i] = 1; + (&x243)[i] = 1; + (&x244)[i] = 1; + (&x245)[i] = 1; + (&x246)[i] = 1; + (&x247)[i] = 1; + (&x248)[i] = 1; + (&x249)[i] = 1; + (&x250)[i] = 1; + (&x251)[i] = 1; + (&x252)[i] = 1; + (&x253)[i] = 1; + (&x254)[i] = 1; + (&x255)[i] = 1; + (&x256)[i] = 1; + + return 0; +} diff --git a/compiler-rt/test/hwasan/TestCases/heap-buffer-overflow.c b/compiler-rt/test/hwasan/TestCases/heap-buffer-overflow.c index db7d444941752..ff52a4bf298c6 100644 --- a/compiler-rt/test/hwasan/TestCases/heap-buffer-overflow.c +++ b/compiler-rt/test/hwasan/TestCases/heap-buffer-overflow.c @@ -57,7 +57,7 @@ int main(int argc, char **argv) { // CHECKM: Cause: heap-buffer-overflow // CHECKM: is located 0 bytes to the right of 1000000-byte region // -// CHECK31: tags: [[TAG:..]]/0e (ptr/mem) +// CHECK31: tags: [[TAG:..]]/0e([[TAG]]) (ptr/mem) // CHECK31-NOT: Invalid access starting at offset // CHECK31: Cause: heap-buffer-overflow // CHECK31: is located 1 bytes to the right of 30-byte region diff --git a/compiler-rt/test/hwasan/TestCases/hwasan-print-shadow.cpp b/compiler-rt/test/hwasan/TestCases/hwasan-print-shadow.cpp index 285a321c1f2eb..2be0a0591693c 100644 --- a/compiler-rt/test/hwasan/TestCases/hwasan-print-shadow.cpp +++ b/compiler-rt/test/hwasan/TestCases/hwasan-print-shadow.cpp @@ -8,26 +8,35 @@ #include int main() { - void *alloc = malloc(4096); + char *alloc = (char *)malloc(4096); + + // Simulate short granule tags. + alloc[15] = 0x00; + alloc[31] = 0xbb; + alloc[47] = 0xcc; + alloc[63] = 0xdd; + alloc[79] = 0xee; + alloc[95] = 0xff; // __hwasan_tag_memory expects untagged pointers. char *p = (char *)__hwasan_tag_pointer(alloc, 0); assert(p); + // Write tags to shadow. __hwasan_tag_memory(p, 1, 32); - __hwasan_tag_memory(p + 32, 3, 16); + __hwasan_tag_memory(p + 32, 16, 16); __hwasan_tag_memory(p + 48, 0, 32); __hwasan_tag_memory(p + 80, 4, 16); char *q = (char *)__hwasan_tag_pointer(p, 7); __hwasan_print_shadow(q + 5, 89 - 5); // CHECK: HWASan shadow map for {{.*}}5 .. {{.*}}9 (pointer tag 7) - // CHECK-NEXT: {{.*}}0: 1 - // CHECK-NEXT: {{.*}}0: 1 - // CHECK-NEXT: {{.*}}0: 3 - // CHECK-NEXT: {{.*}}0: 0 - // CHECK-NEXT: {{.*}}0: 0 - // CHECK-NEXT: {{.*}}0: 4 + // CHECK-NEXT: {{.*}}0: 01(00) + // CHECK-NEXT: {{.*}}0: 01(bb) + // CHECK-NEXT: {{.*}}0: 10 + // CHECK-NEXT: {{.*}}0: 00 + // CHECK-NEXT: {{.*}}0: 00 + // CHECK-NEXT: {{.*}}0: 04(ff) free(alloc); } diff --git a/compiler-rt/test/sanitizer_common/TestCases/Linux/get_sock_peer_name.cpp b/compiler-rt/test/sanitizer_common/TestCases/Linux/get_sock_peer_name.cpp new file mode 100644 index 0000000000000..d4e6189d61703 --- /dev/null +++ b/compiler-rt/test/sanitizer_common/TestCases/Linux/get_sock_peer_name.cpp @@ -0,0 +1,38 @@ +// Test that ASan doesn't raise false alarm when getsockname and getpeername +// are called with addrlen=nullptr; +// +// RUN: %clangxx %s -o %t && %run %t 2>&1 + +// connect() fails on Android. +// UNSUPPORTED: android + +#include +#include +#include +#include + +int main() { + const int fd = socket(AF_INET, SOCK_DGRAM, 0); + assert(fd >= 0); + + const sockaddr_in sin = { + .sin_family = AF_INET, + .sin_port = 1234, + .sin_addr = + { + .s_addr = INADDR_LOOPBACK, + }, + }; + assert(connect(fd, reinterpret_cast(&sin), sizeof(sin)) == + 0); + + errno = 0; + assert(getsockname(fd, nullptr, nullptr) == -1); + assert(errno == EFAULT); + + errno = 0; + assert(getpeername(fd, nullptr, nullptr) == -1); + assert(errno == EFAULT); + + return 0; +} diff --git a/compiler-rt/test/sanitizer_common/TestCases/Posix/posix_spawn.c b/compiler-rt/test/sanitizer_common/TestCases/Posix/posix_spawn.c index 8cd66a81ff66d..ea58b92af6097 100644 --- a/compiler-rt/test/sanitizer_common/TestCases/Posix/posix_spawn.c +++ b/compiler-rt/test/sanitizer_common/TestCases/Posix/posix_spawn.c @@ -1,9 +1,12 @@ // RUN: %clang %s -o %t && %run %t 2>&1 | FileCheck %s +// +// Older versions of Android do not have certain posix_spawn* functions. +// UNSUPPORTED: android #include #include #include -#include +#include int main(int argc, char **argv) { if (argc > 1) { @@ -13,21 +16,20 @@ int main(int argc, char **argv) { return 0; } - int s; + posix_spawnattr_t attr = {0}; + posix_spawn_file_actions_t file_actions = {0}; - posix_spawnattr_t attr; - s = posix_spawnattr_init(&attr); - assert(!s); - - posix_spawn_file_actions_t file_actions; - s = posix_spawn_file_actions_init(&file_actions); - assert(!s); - - char *const args[] = {argv[0], "2", NULL}; - char *const env[] = {"A=B", NULL}; + char *const args[] = { + argv[0], "2", "3", "4", "2", "3", "4", "2", "3", "4", + "2", "3", "4", "2", "3", "4", "2", "3", "4", NULL, + }; + char *const env[] = { + "A=B", "A=B", "A=B", "A=B", "A=B", "A=B", "A=B", "A=B", "A=B", "A=B", + "A=B", "A=B", "A=B", "A=B", "A=B", "A=B", "A=B", "A=B", "A=B", NULL, + }; pid_t pid; - s = posix_spawn(&pid, argv[0], &file_actions, &attr, args, env); + int s = posix_spawn(&pid, argv[0], &file_actions, &attr, args, env); assert(!s); waitpid(pid, &s, WUNTRACED | WCONTINUED); diff --git a/compiler-rt/test/tsan/cxa_guard_acquire.cpp b/compiler-rt/test/tsan/cxa_guard_acquire.cpp index 9d87e4912e3ba..cdbe609003d43 100644 --- a/compiler-rt/test/tsan/cxa_guard_acquire.cpp +++ b/compiler-rt/test/tsan/cxa_guard_acquire.cpp @@ -1,4 +1,3 @@ -// UNSUPPORTED: darwin // RUN: %clangxx_tsan -O1 %s -o %t && %run %t 2>&1 | FileCheck %s #include diff --git a/cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp b/cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp index 166c372f94c9d..97b09d0ab2f38 100644 --- a/cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp +++ b/cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp @@ -1,5 +1,6 @@ -// RUN: %clang %target_itanium_abi_host_triple %s -c -o - -g -Xclang -gsimple-template-names=mangled -Xclang -debug-forward-template-params \ +// RUN: %clang %target_itanium_abi_host_triple %s -c -o - -g -Xclang -gsimple-template-names=mangled -Xclang -debug-forward-template-params -std=c++20 \ // RUN: | llvm-dwarfdump --verify - +#include template struct t1 { }; @@ -67,7 +68,70 @@ struct t6 { operator t1*() { return nullptr; } + template + void operator-(int) { + } + template + void operator*(int) { + } + template + void operator/(int) { + } + template + void operator%(int) { + } + template + void operator^(int) { + } + template + void operator&(int) { + } + template + void operator|(int) { + } + template + void operator~() { + } + template + void operator!() { + } + template + void operator=(int) { + } + template + void operator>(int) { + } + template + void operator,(int) { + } + template + void operator()() { + } + template + void operator[](int) { + } + template + void operator<=>(int) { + } + template + void* operator new(std::size_t, T) { + __builtin_unreachable(); + } + template + void operator delete(void*, T) { + } + template + void* operator new[](std::size_t, T) { + __builtin_unreachable(); + } + template + void operator delete[](void*, T) { + } + template + int operator co_await() { __builtin_unreachable(); } + }; +void operator"" _suff(unsigned long long) {} template class T> void f7() { } template class T, typename T2> void f8() { } template @@ -94,6 +158,11 @@ struct t10 { template t10() { } }; + +template +void operator_not_really() { +} + int main() { struct { } A; auto L = []{}; @@ -116,7 +185,7 @@ int main() { f1(); f1(); f1(); - // f1(); + f1(); f1(); f1(); f1(); @@ -127,7 +196,7 @@ int main() { f3(); f3(); f3(); - // t4<3> v2; + t4<3> v2; f3(); f3(); f3(); @@ -169,6 +238,27 @@ int main() { v6.operator< (1); v6.operator<= (1); v6.operator t1*(); + v6.operator- (3); + v6.operator* (3); + v6.operator/ (3); + v6.operator% (3); + v6.operator^ (3); + v6.operator& (3); + v6.operator| (3); + v6.operator~ (); + v6.operator! (); + v6.operator= (3); + v6.operator> (3); + v6.operator, (3); + v6.operator() (); + v6.operator[] (3); + v6.operator<=> (3); + t6::operator new(0, 0); + t6::operator new[](0, 0); + t6::operator delete(nullptr, 0); + t6::operator delete[](nullptr, 0); + v6.operator co_await(); + 42_suff; struct t7 { }; f1(); f1(); @@ -201,12 +291,13 @@ int main() { f1(); f1(); f1(); - // f1[1]>(); + f1[1]>(); f1(); f1(); struct t8 { decltype(A) m; }; f1(); f1(); + operator_not_really(); } void t8::mem() { struct t7 { }; diff --git a/cross-project-tests/debuginfo-tests/llvm-prettyprinters/gdb/llvm-support.cpp b/cross-project-tests/debuginfo-tests/llvm-prettyprinters/gdb/llvm-support.cpp index 8679628769168..07a1a954c4e2a 100644 --- a/cross-project-tests/debuginfo-tests/llvm-prettyprinters/gdb/llvm-support.cpp +++ b/cross-project-tests/debuginfo-tests/llvm-prettyprinters/gdb/llvm-support.cpp @@ -24,7 +24,12 @@ llvm::Optional OptionalNone(llvm::None); llvm::SmallVector SmallVector = {10, 11, 12}; llvm::SmallString<5> SmallString("foo"); llvm::StringRef StringRef = "bar"; -llvm::Twine Twine = llvm::Twine(SmallString) + StringRef; +// Should test std::string in Twine too, but it's currently broken because I +// don't know how to add 'str' and 'gdb.LazyString' (can't figure out any way to +// string-ify LazyString). +std::string String = "foo"; +llvm::Twine TempTwine = llvm::Twine(String) + StringRef; +llvm::Twine Twine = TempTwine + "baz"; llvm::PointerIntPair PointerIntPair(IntPtr, 1); struct alignas(8) Z {}; diff --git a/cross-project-tests/debuginfo-tests/llvm-prettyprinters/gdb/llvm-support.gdb b/cross-project-tests/debuginfo-tests/llvm-prettyprinters/gdb/llvm-support.gdb index c6b334ffa9a80..bf7ec5a47f861 100644 --- a/cross-project-tests/debuginfo-tests/llvm-prettyprinters/gdb/llvm-support.gdb +++ b/cross-project-tests/debuginfo-tests/llvm-prettyprinters/gdb/llvm-support.gdb @@ -37,7 +37,7 @@ p SmallString # CHECK: "bar" p StringRef -# CHECK: "foobar" +# CHECK: "foobarbaz" p Twine # CHECK: llvm::StringMap with 2 elements = {["foo"] = 123, ["bar"] = 456} diff --git a/flang/docs/FlangDriver.md b/flang/docs/FlangDriver.md index 03f3e244831f3..cf363d19714f1 100644 --- a/flang/docs/FlangDriver.md +++ b/flang/docs/FlangDriver.md @@ -13,24 +13,107 @@ :local: ``` + +> **_NOTE:_** This document assumes that Flang's drivers can already generate code and +> produce executables. However, this is still work-in-progress. By making this +> assumption, we are able to prepare this document ahead-of-time and to provide +> an overview of the design that we are working towards. + There are two main drivers in Flang: * the compiler driver, `flang-new` * the frontend driver, `flang-new -fc1` -The compiler driver will allow you to control all compilation phases (i.e. -preprocessing, frontend code-generation, middlend/backend code-optimisation and -lowering, linking). For frontend specific tasks, the compiler driver creates a -Fortran compilation job and delegates it to `flang-new -fc1`, the frontend driver. +> **_NOTE:_** The diagrams in this document refer to `flang` as opposed to +> `flang-new`. This is because the diagrams reflect the final design that we +> are still working towards. See the note on [the flang script](https://github.com/llvm/llvm-project/blob/main/flang/docs/FlangDriver.md#the-flang-script) +> below for more context. + +The **compiler driver** will allow you to control all compilation phases (e.g. +preprocessing, semantic checks, code-generation, code-optimisation, lowering +and linking). For frontend specific tasks, the compiler driver creates a +Fortran compilation job and delegates it to `flang-new -fc1`, the frontend +driver. For linking, it creates a linker job and calls an external linker (e.g. +LLVM's [`lld`](https://lld.llvm.org/)). It can also call other tools such as +external assemblers (e.g. [`as`](https://www.gnu.org/software/binutils/)). In +Clang, the compiler driver can also link the generated binaries with LLVM's +static analysis/sanitizer libraries (e.g. +[MemorySanitizer](https://clang.llvm.org/docs/MemorySanitizer.html)). This is +not yet available in Flang, but will be relatively easy to support once such +libraries become available. Flang's compiler driver is intended for Flang's +end-users - its interface needs to remain stable. Otherwise, Flang's users will +have to adjust their build scripts every time a compiler flag is changed. + +| ![Compiler Driver](compiler_driver.png) | +|:--:| +| *Flang’s compiler driver and the **tools** that it runs* | + +The **frontend driver** glues together and drives all of the Flang's frontend +libraries. As such, it provides an easy-to-use and intuitive interface to the +frontend. It uses MLIR and LLVM for code-generation and can be viewed as a +driver for Flang, LLVM and MLIR libraries. Contrary to the compiler driver, it +is not capable of calling any external tools (including linkers). It is aware +of all the frontend internals that are "hidden" from the compiler driver. It +accepts many frontend-specific options not available in `flang-new` and as such +it provides a finer control over the frontend. Note that this tool is mostly +intended for Flang developers. In particular, there are no guarantees about the +stability of its interface and compiler developers can use it to experiment +with new flags. + +| ![Frontend Driver](frontend_driver.png) | +|:-:| +| *Flang's frontend driver and the **libraries** that it drives* | + +Note that similarly to `-Xclang` in `clang`, you can use `-Xflang` to forward a +frontend specific flag from the _compiler_ directly to the _frontend_ driver, +e.g.: + +```lang=bash +flang-new -Xflang -fdebug-dump-parse-tree input.f95 +``` -The frontend driver glues all of the frontend libraries together and provides -an easy-to-use and intuitive interface to the frontend. It accepts many -frontend-specific options not available in `flang-new` and as such it provides a -finer control over the frontend. Similarly to `-Xclang` in `clang`, you can use -`-Xflang` to forward the frontend specific flags from the compiler directly to -the frontend driver. +In the invocation above, `-fdebug-dump-parse-tree` is forwarded to `flang-new +-fc1`. Without the forwarding flag, `-Xflang`, you would see the following +warning: -## Compiler Driver +```lang=bash +flang-new: warning: argument unused during compilation: +``` + +As `-fdebug-dump-parse-tree` is only supported by `flang-new -fc1`, `flang-new` +will ignore it when used without `Xflang`. + +## Why Do We Need Two Drivers? +As hinted above, `flang-new` and `flang-new -fc1` are two separate tools. The +fact that these tools are accessed through one binary, `flang-new`, is just an +implementation detail. Each tool has a separate list of options, albeit defined +in the same file: `clang/include/clang/Driver/Options.td`. + +The separation helps us split various tasks and allows us to implement more +specialised tools. In particular, `flang-new` is not aware of various +compilation phases within the frontend (e.g. scanning, parsing or semantic +checks). It does not have to be. Conversely, the frontend driver, `flang-new +-fc1`, needs not to be concerned with linkers or other external tools like +assemblers. Nor does it need to know where to look for various systems +libraries, which is usually OS and platform specific. + +One helpful way of differentiating these tools is to keep in mind that: + +* the compiler driver is an end-user tool +* frontend driver is a compiler developer tool with many additional options, + +Also, Since the compiler driver can call external tools, e.g. linkers, it can +be used to generate **executables**. The frontend driver cannot call external +tools and hence can only generate **object files**. A similar model is +implemented in Clang (`clang` vs `clang -cc1` vs `clang -cc1as`), which is +based on the [architecture of +GCC](https://en.wikibooks.org/wiki/GNU_C_Compiler_Internals/GNU_C_Compiler_Architecture). +In fact, Flang needs to adhere to this model in order to be able to re-use +Clang's driver library. If you are more familiar with the [architecture of +GFortran](https://gcc.gnu.org/onlinedocs/gcc-4.7.4/gfortran/About-GNU-Fortran.html) +than Clang, then `flang-new` corresponds to `gfortran` and `flang-new -fc1` to +`f951`. +## Compiler Driver The main entry point for Flang's compiler driver is implemented in `flang/tools/flang-driver/driver.cpp`. Flang's compiler driver is implemented in terms of Clang's driver library, `clangDriver`. This approach allows us to: @@ -92,9 +175,9 @@ You can read more on the design of `clangDriver` in Clang's [Driver Design & Internals](https://clang.llvm.org/docs/DriverInternals.html). ## Frontend Driver -Flang's frontend driver is the main interface between end-users and the Flang -frontend. The high-level design is similar to Clang's frontend driver, `clang --cc1` and consists of the following classes: +Flang's frontend driver is the main interface between compiler developers and +the Flang frontend. The high-level design is similar to Clang's frontend +driver, `clang -cc1` and consists of the following classes: * `CompilerInstance`, which is a helper class that encapsulates and manages various objects that are always required by the frontend (e.g. `AllSources`, `AllCookedSources, `Parsing`, `CompilerInvocation`, etc.). In most cases diff --git a/flang/docs/compiler_driver.png b/flang/docs/compiler_driver.png new file mode 100644 index 0000000000000..6311fb7e453ea Binary files /dev/null and b/flang/docs/compiler_driver.png differ diff --git a/flang/docs/doxygen.cfg.in b/flang/docs/doxygen.cfg.in index 7ede0b0a8b3b7..6ee771c23d2b6 100644 --- a/flang/docs/doxygen.cfg.in +++ b/flang/docs/doxygen.cfg.in @@ -1222,7 +1222,7 @@ CHM_FILE = HHC_LOCATION = # The GENERATE_CHI flag controls if a separate .chi index file is generated ( -# YES) or that it should be included in the master .chm file ( NO). +# YES) or that it should be included in the main .chm file ( NO). # The default value is: NO. # This tag requires that the tag GENERATE_HTMLHELP is set to YES. diff --git a/flang/docs/frontend_driver.png b/flang/docs/frontend_driver.png new file mode 100644 index 0000000000000..2218819cc3bd0 Binary files /dev/null and b/flang/docs/frontend_driver.png differ diff --git a/flang/examples/flang-omp-report-plugin/flang-omp-report-visitor.cpp b/flang/examples/flang-omp-report-plugin/flang-omp-report-visitor.cpp index f49e72fabee84..32dcef25fedab 100644 --- a/flang/examples/flang-omp-report-plugin/flang-omp-report-visitor.cpp +++ b/flang/examples/flang-omp-report-plugin/flang-omp-report-visitor.cpp @@ -156,11 +156,6 @@ bool OpenMPCounterVisitor::Pre(const OpenMPConstruct &c) { ompWrapperStack.push_back(ow); return true; } -bool OpenMPCounterVisitor::Pre(const OmpEndLoopDirective &c) { return true; } -bool OpenMPCounterVisitor::Pre(const DoConstruct &) { - loopLogRecordStack.push_back(curLoopLogRecord); - return true; -} void OpenMPCounterVisitor::Post(const OpenMPDeclarativeConstruct &) { PostConstructsCommon(); @@ -178,27 +173,11 @@ void OpenMPCounterVisitor::PostConstructsCommon() { clauseStrings[curConstruct]}; constructClauses.push_back(r); - // Keep track of loop log records if it can potentially have the - // nowait clause added on later. - if (const auto *oc = std::get_if(curConstruct)) { - if (const auto *olc = std::get_if(&(*oc)->u)) { - const auto &beginLoopDir{ - std::get(olc->t)}; - const auto &beginDir{ - std::get(beginLoopDir.t)}; - if (beginDir.v == llvm::omp::Directive::OMPD_do || - beginDir.v == llvm::omp::Directive::OMPD_do_simd) { - curLoopLogRecord = &constructClauses.back(); - } - } - } - auto it = clauseStrings.find(curConstruct); clauseStrings.erase(it); ompWrapperStack.pop_back(); delete curConstruct; } -void OpenMPCounterVisitor::Post(const OmpEndLoopDirective &c) {} void OpenMPCounterVisitor::Post(const OmpProcBindClause::Type &c) { clauseDetails += "type=" + OmpProcBindClause::EnumToString(c) + ";"; @@ -242,26 +221,9 @@ void OpenMPCounterVisitor::Post(const OmpClause &c) { clauseDetails.clear(); } void OpenMPCounterVisitor::PostClauseCommon(const ClauseInfo &ci) { - // The end loop construct (!$omp end do) can contain a nowait clause. - // The flang parser does not parse the end loop construct as part of - // the OpenMP construct for the loop construct. So the end loop is left - // hanging as a separate executable construct. If a nowait clause is seen in - // an end loop construct we have to find the associated loop construct and - // add nowait to its list of clauses. Note: This is not a bug in flang, the - // parse tree is corrected during semantic analysis. - if (ci.clause == "nowait") { - assert(curLoopLogRecord && - "loop Construct should be visited before a nowait clause"); - curLoopLogRecord->clauses.push_back(ci); - } else { - assert(!ompWrapperStack.empty() && - "Construct should be visited before clause"); - clauseStrings[ompWrapperStack.back()].push_back(ci); - } -} -void OpenMPCounterVisitor::Post(const DoConstruct &) { - curLoopLogRecord = loopLogRecordStack.back(); - loopLogRecordStack.pop_back(); + assert( + !ompWrapperStack.empty() && "Construct should be visited before clause"); + clauseStrings[ompWrapperStack.back()].push_back(ci); } } // namespace parser } // namespace Fortran diff --git a/flang/examples/flang-omp-report-plugin/flang-omp-report-visitor.h b/flang/examples/flang-omp-report-plugin/flang-omp-report-visitor.h index 188e8f9b61f92..d31fa8cc46896 100644 --- a/flang/examples/flang-omp-report-plugin/flang-omp-report-visitor.h +++ b/flang/examples/flang-omp-report-plugin/flang-omp-report-visitor.h @@ -17,7 +17,6 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" -#include #include namespace Fortran { @@ -62,13 +61,10 @@ struct OpenMPCounterVisitor { template void Post(const A &) {} bool Pre(const OpenMPDeclarativeConstruct &c); bool Pre(const OpenMPConstruct &c); - bool Pre(const OmpEndLoopDirective &c); - bool Pre(const DoConstruct &); void Post(const OpenMPDeclarativeConstruct &); void Post(const OpenMPConstruct &); void PostConstructsCommon(); - void Post(const OmpEndLoopDirective &c); void Post(const OmpProcBindClause::Type &c); void Post(const OmpDefaultClause::Type &c); @@ -83,20 +79,9 @@ struct OpenMPCounterVisitor { void Post(const OmpCancelType::Type &c); void Post(const OmpClause &c); void PostClauseCommon(const ClauseInfo &ci); - void Post(const DoConstruct &); std::string clauseDetails{""}; - - // curLoopLogRecord and loopLogRecordStack store - // pointers to this datastructure's entries. Hence a - // vector cannot be used since pointers are invalidated - // on resize. Next best option seems to be deque. Also a - // list cannot be used since YAML gen requires a - // datastructure which can be accessed through indices. - std::deque constructClauses; - - LogRecord *curLoopLogRecord{nullptr}; - llvm::SmallVector loopLogRecordStack; + llvm::SmallVector constructClauses; llvm::SmallVector ompWrapperStack; llvm::DenseMap> clauseStrings; Parsing *parsing{nullptr}; diff --git a/flang/examples/flang-omp-report-plugin/flang-omp-report.cpp b/flang/examples/flang-omp-report-plugin/flang-omp-report.cpp index 0654513d1daea..9ee8eb1a80cbd 100644 --- a/flang/examples/flang-omp-report-plugin/flang-omp-report.cpp +++ b/flang/examples/flang-omp-report-plugin/flang-omp-report.cpp @@ -33,10 +33,6 @@ namespace llvm { namespace yaml { using llvm::yaml::IO; using llvm::yaml::MappingTraits; -template -struct SequenceTraits, - std::enable_if_t::flow>::value>> - : SequenceTraitsImpl, SequenceElementTraits::flow> {}; template <> struct MappingTraits { static void mapping(IO &io, ClauseInfo &info) { io.mapRequired("clause", info.clause); diff --git a/flang/examples/flang-omp-report-plugin/requirements.txt b/flang/examples/flang-omp-report-plugin/requirements.txt new file mode 100644 index 0000000000000..ad30971cc3689 --- /dev/null +++ b/flang/examples/flang-omp-report-plugin/requirements.txt @@ -0,0 +1,2 @@ +ruamel.yaml==0.17.16 +ruamel.yaml.clib==0.2.6 diff --git a/flang/examples/flang-omp-report-plugin/yaml_summarizer.py b/flang/examples/flang-omp-report-plugin/yaml_summarizer.py new file mode 100644 index 0000000000000..4c9981dffd015 --- /dev/null +++ b/flang/examples/flang-omp-report-plugin/yaml_summarizer.py @@ -0,0 +1,282 @@ +"""YAML Summariser + +The flang plugin ``flang-omp-report`` takes one fortran +file in and returns a YAML report file of the input file. +This becomes an issue when you want to analyse an entire project +into one final report. +The purpose of this Python script is to generate a final YAML +summary from all of the files generated by ``flang-omp-report``. + +Currently, it requires ``ruamel.yaml``, +which can be installed with: + + ``pip3 install ruamel.yaml`` + +By default it scans the directory it is ran in +for any YAML files and outputs a summary to +stdout. It can be ran as: + + ``python3 yaml_summarizer.py`` + +Parameters: + + -d --directory Specify which directory to scan. Multiple directories can be searched by + providing a semicolon seperated list of directories. + + -l --log Combine all yaml files into one log (instead of generating a summary) + + -o --output Specify a directory in which to save the summary file + + -r --recursive Recursively search directory for all yaml files + +Examples: + + ``python3 yaml_summarizer.py -d ~/llvm-project/build/ -r`` + + ``python3 yaml_summarizer.py -d "~/llvm-project/build/;~/llvm-project/flang/test/Examples"`` + + ``python3 yaml_summarizer.py -l -o ~/examples/report.yaml`` + +Pseudo-examples: + + Summary: + + $ python3 yaml_summarizer.py file_1.yaml file_2.yaml + + + Construcsts are in the form: + - construct: someOMPconstruct + count: 8 + clauses: + - clause: clauseOne + count: 4 + - clause: ClauseTwo + count: 2 + + Log: + + $ python3 yaml_summarizer.py -l file_1.yaml file_2.yaml + file_1.yaml + + file_2.yaml + + + Constructs are in the form: + - construct: someOMPConstruct + line: 12 + clauses: + - clause: clauseOne + details: 'someDetailForClause' +""" + +import sys +import glob +import argparse +from pathlib import Path +from os.path import isdir + +from ruamel.yaml import YAML + +def find_yaml_files(search_directory: Path, search_pattern: str): + """ + Find all '.yaml' files and returns an iglob iterator to them. + + Keyword arguments: + search_pattern -- Search pattern for 'iglob' to use for finding '.yaml' files. + If this is set to 'None', then it will default to just searching + for all '.yaml' files in the current directory. + """ + # @TODO: Currently *all* yaml files are read - regardless of whether they have + # been generated with 'flang-omp-report' or not. This might result in the script + # reading files that it should ignore. + if search_directory: + return glob.iglob(str(search_directory.joinpath(search_pattern)), recursive=True) + + return glob.iglob(str("/" + search_pattern), recursive=True) + +def process_log(data, result: list): + """ + Process the data input as a 'log' to the result array. This esssentially just + stitches together all of the input '.yaml' files into one result. + + Keyword arguments: + data -- Data from yaml.load() for a yaml file. So the type can be 'Any'. + result -- Array to add the processed data to. + """ + for datum in data: + items = result.get(datum['file'], []) + items.append({"construct" : datum['construct'], + "line" : datum['line'], + "clauses" : datum['clauses']}) + result[datum['file']] = items + +def add_clause(datum, construct): + """ + Add clauses to the construct if they're missing + Otherwise increment their count by one. + + Keyword arguments: + datum -- Data construct containing clauses to check. + construct -- Construct to add or increment clause count. + """ + to_check = [i['clause'] for i in construct['clauses']] + to_add = [i['clause'] for i in datum['clauses']] + clauses = construct["clauses"] + for item in to_add: + if item in to_check: + for clause in clauses: + if clause["clause"] == item: + clause["count"] += 1 + else: + clauses.append({"clause" : item, + "count" : 1}) + +def process_summary(data, result: dict): + """ + Process the data input as a 'summary' to the 'result' dictionary. + + Keyword arguments: + data -- Data from yaml.load() for a yaml file. So the type can be 'Any'. + result -- Dictionary to add the processed data to. + """ + for datum in data: + construct = next((item for item in result + if item["construct"] == datum["construct"]), None) + clauses = [] + # Add the construct and clauses to the summary if + # they haven't been seen before + if not construct: + for i in datum['clauses']: + clauses.append({"clause" : i['clause'], + "count" : 1}) + result.append({"construct" : datum['construct'], + "count" : 1, + "clauses" : clauses}) + else: + construct["count"] += 1 + + add_clause(datum, construct) + +def clean_summary(result): + """ Cleans the result after processing the yaml files with summary format.""" + # Remove all "clauses" that are empty to keep things compact + for construct in result: + if construct["clauses"] == []: + construct.pop("clauses") + +def clean_log(result): + """ Cleans the result after processing the yaml files with log format.""" + for constructs in result.values(): + for construct in constructs: + if construct["clauses"] == []: + construct.pop("clauses") + +def output_result(yaml: YAML, result, output_file: Path): + """ + Outputs result to either 'stdout' or to a output file. + + Keyword arguments: + result -- Format result to output. + output_file -- File to output result to. If this is 'None' then result will be + outputted to 'stdout'. + """ + if output_file: + with open(output_file, 'w+', encoding='utf-8') as file: + if output_file.suffix == ".yaml": + yaml.dump(result, file) + else: + file.write(result) + else: + yaml.dump(result, sys.stdout) + +def process_yaml(search_directories: list, search_pattern: str, + result_format: str, output_file: Path): + """ + Reads each yaml file, calls the appropiate format function for + the file and then ouputs the result to either 'stdout' or to an output file. + + Keyword arguments: + search_directories -- List of directory paths to search for '.yaml' files in. + search_pattern -- String pattern formatted for use with glob.iglob to find all + '.yaml' files. + result_format -- String representing output format. Current supported strings are: 'log'. + output_file -- Path to output file (If value is None, then default to outputting to 'stdout'). + """ + if result_format == "log": + result = {} + action = process_log + clean_report = clean_log + else: + result = [] + action = process_summary + clean_report = clean_summary + + yaml = YAML() + + for search_directory in search_directories: + for file in find_yaml_files(search_directory, search_pattern): + with open(file, "r", encoding='utf-8') as yaml_file: + data = yaml.load(yaml_file) + action(data, result) + + if clean_report is not None: + clean_report(result) + + output_result(yaml, result, output_file) + +def create_arg_parser(): + """ Create and return a argparse.ArgumentParser modified for script. """ + parser = argparse.ArgumentParser() + parser.add_argument("-d", "--directory", help="Specify a directory to scan", + dest="dir", type=str) + parser.add_argument("-o", "--output", help="Writes to a file instead of\ + stdout", dest="output", type=str) + parser.add_argument("-r", "--recursive", help="Recursive search for .yaml files", + dest="recursive", type=bool, nargs='?', const=True, default=False) + + exclusive_parser = parser.add_mutually_exclusive_group() + exclusive_parser.add_argument("-l", "--log", help="Modifies report format: " + "Combines the log '.yaml' files into one file.", + action='store_true', dest='log') + return parser + +def parse_arguments(): + """ Parses arguments given to script and returns a tuple of processed arguments. """ + parser = create_arg_parser() + args = parser.parse_args() + + if args.dir: + search_directory = [Path(path) for path in args.dir.split(";")] + else: + search_directory = [Path.cwd()] + + if args.recursive: + search_pattern = "**/*.yaml" + else: + search_pattern = "*.yaml" + + if args.log: + result_format = "log" + else: + result_format = "summary" + + if args.output: + if isdir(args.output): + output_file = Path(args.output).joinpath("summary.yaml") + elif isdir(Path(args.output).resolve().parent): + output_file = Path(args.output) + else: + output_file = None + + return (search_directory, search_pattern, result_format, output_file) + +def main(): + """ Main function of script. """ + (search_directory, search_pattern, result_format, output_file) = parse_arguments() + + process_yaml(search_directory, search_pattern, result_format, output_file) + + return 0 + +if __name__ == "__main__": + sys.exit(main()) diff --git a/flang/include/flang/Evaluate/real.h b/flang/include/flang/Evaluate/real.h index bf43e68f67b73..b6c313c0057c5 100644 --- a/flang/include/flang/Evaluate/real.h +++ b/flang/include/flang/Evaluate/real.h @@ -133,7 +133,7 @@ class Real : public common::RealDetails { static constexpr Real EPSILON() { Real epsilon; epsilon.Normalize( - false, exponentBias - binaryPrecision, Fraction::MASKL(1)); + false, exponentBias + 1 - binaryPrecision, Fraction::MASKL(1)); return epsilon; } static constexpr Real HUGE() { diff --git a/flang/include/flang/ISO_Fortran_binding.h b/flang/include/flang/ISO_Fortran_binding.h index bbb958728f4bd..7689eee130248 100644 --- a/flang/include/flang/ISO_Fortran_binding.h +++ b/flang/include/flang/ISO_Fortran_binding.h @@ -84,18 +84,20 @@ typedef signed char CFI_type_t; #define CFI_TYPE_LAST CFI_type_char32_t #define CFI_type_other (-1) // must be negative -/* Error code macros */ +/* Error code macros - skip some of the small values to avoid conflicts with + * other status codes mandated by the standard, e.g. those returned by + * GET_ENVIRONMENT_VARIABLE (16.9.84) */ #define CFI_SUCCESS 0 /* must be zero */ -#define CFI_ERROR_BASE_ADDR_NULL 1 -#define CFI_ERROR_BASE_ADDR_NOT_NULL 2 -#define CFI_INVALID_ELEM_LEN 3 -#define CFI_INVALID_RANK 4 -#define CFI_INVALID_TYPE 5 -#define CFI_INVALID_ATTRIBUTE 6 -#define CFI_INVALID_EXTENT 7 -#define CFI_INVALID_DESCRIPTOR 8 -#define CFI_ERROR_MEM_ALLOCATION 9 -#define CFI_ERROR_OUT_OF_BOUNDS 10 +#define CFI_ERROR_BASE_ADDR_NULL 11 +#define CFI_ERROR_BASE_ADDR_NOT_NULL 12 +#define CFI_INVALID_ELEM_LEN 13 +#define CFI_INVALID_RANK 14 +#define CFI_INVALID_TYPE 15 +#define CFI_INVALID_ATTRIBUTE 16 +#define CFI_INVALID_EXTENT 17 +#define CFI_INVALID_DESCRIPTOR 18 +#define CFI_ERROR_MEM_ALLOCATION 19 +#define CFI_ERROR_OUT_OF_BOUNDS 20 /* 18.5.2 per-dimension information */ typedef struct CFI_dim_t { diff --git a/flang/include/flang/Lower/ComplexExpr.h b/flang/include/flang/Lower/ComplexExpr.h index d3600a0cda6a5..337294587cf56 100644 --- a/flang/include/flang/Lower/ComplexExpr.h +++ b/flang/include/flang/Lower/ComplexExpr.h @@ -57,14 +57,18 @@ class ComplexExprHelper { protected: template mlir::Value extract(mlir::Value cplx) { - return builder.create(loc, getComplexPartType(cplx), - cplx, createPartId()); + return builder.create( + loc, getComplexPartType(cplx), cplx, + builder.getArrayAttr({builder.getIntegerAttr( + builder.getIndexType(), static_cast(partId))})); } template mlir::Value insert(mlir::Value cplx, mlir::Value part) { - return builder.create(loc, cplx.getType(), cplx, part, - createPartId()); + return builder.create( + loc, cplx.getType(), cplx, part, + builder.getArrayAttr({builder.getIntegerAttr( + builder.getIndexType(), static_cast(partId))})); } template diff --git a/flang/include/flang/Optimizer/CodeGen/CGOps.td b/flang/include/flang/Optimizer/CodeGen/CGOps.td index 9ebda32825a63..f76c9664a4b48 100644 --- a/flang/include/flang/Optimizer/CodeGen/CGOps.td +++ b/flang/include/flang/Optimizer/CodeGen/CGOps.td @@ -42,6 +42,7 @@ def fircg_XEmboxOp : fircg_Op<"ext_embox", [AttrSizedOperandSegments]> { The default is a vector of the value 1. - slice: A vector of triples that describe an array slice. - subcomponent: A vector of indices for subobject slicing. + - substring: A substring operator (offset, length) for CHARACTER. - LEN type parameters: A vector of runtime LEN type parameters that describe an correspond to the elemental derived type. @@ -54,14 +55,15 @@ def fircg_XEmboxOp : fircg_Op<"ext_embox", [AttrSizedOperandSegments]> { Variadic:$shift, Variadic:$slice, Variadic:$subcomponent, + Variadic:$substr, Variadic:$lenParams ); let results = (outs fir_BoxType); let assemblyFormat = [{ $memref (`(`$shape^`)`)? (`origin` $shift^)? (`[`$slice^`]`)? - (`path` $subcomponent^)? (`typeparams` $lenParams^)? attr-dict - `:` functional-type(operands, results) + (`path` $subcomponent^)? (`substr` $substr^)? (`typeparams` $lenParams^)? + attr-dict `:` functional-type(operands, results) }]; let extraClassDeclaration = [{ @@ -76,9 +78,10 @@ def fircg_XEmboxOp : fircg_Op<"ext_embox", [AttrSizedOperandSegments]> { unsigned shiftOffset() { return shapeOffset() + shape().size(); } unsigned sliceOffset() { return shiftOffset() + shift().size(); } unsigned subcomponentOffset() { return sliceOffset() + slice().size(); } - unsigned lenParamOffset() { - return subcomponentOffset() + subcomponent().size(); + unsigned substrOffset() { + return subcomponentOffset() + subcomponent().size(); } + unsigned lenParamOffset() { return substrOffset() + substr().size(); } }]; } @@ -97,6 +100,7 @@ def fircg_XReboxOp : fircg_Op<"ext_rebox", [AttrSizedOperandSegments]> { The default is a vector of the value 1. - slice: A vector of triples that describe an array slice. - subcomponent: A vector of indices for subobject slicing. + - substring: A substring operator (offset, length) for CHARACTER. The box argument is mandatory, the other arguments are optional. There must not both be a shape and slice/subcomponent arguments @@ -107,14 +111,15 @@ def fircg_XReboxOp : fircg_Op<"ext_rebox", [AttrSizedOperandSegments]> { Variadic:$shape, Variadic:$shift, Variadic:$slice, - Variadic:$subcomponent + Variadic:$subcomponent, + Variadic:$substr ); let results = (outs fir_BoxType); let assemblyFormat = [{ $box (`(`$shape^`)`)? (`origin` $shift^)? (`[`$slice^`]`)? - (`path` $subcomponent^) ? attr-dict - `:` functional-type(operands, results) + (`path` $subcomponent^)? (`substr` $substr^)? attr-dict `:` + functional-type(operands, results) }]; let extraClassDeclaration = [{ diff --git a/flang/include/flang/Optimizer/CodeGen/CGPasses.td b/flang/include/flang/Optimizer/CodeGen/CGPasses.td index 103783b7ae668..dd8921319d43b 100644 --- a/flang/include/flang/Optimizer/CodeGen/CGPasses.td +++ b/flang/include/flang/Optimizer/CodeGen/CGPasses.td @@ -23,10 +23,7 @@ def FIRToLLVMLowering : Pass<"fir-to-llvm-ir", "mlir::ModuleOp"> { will also convert ops in the standard and FIRCG dialects. }]; let constructor = "::fir::createFIRToLLVMPass()"; - let dependentDialects = [ - "fir::FIROpsDialect", "fir::FIRCodeGenDialect", "mlir::BuiltinDialect", - "mlir::LLVM::LLVMDialect", "mlir::omp::OpenMPDialect" - ]; + let dependentDialects = ["mlir::LLVM::LLVMDialect"]; } def CodeGenRewrite : Pass<"cg-rewrite"> { diff --git a/flang/include/flang/Optimizer/Dialect/FIROps.td b/flang/include/flang/Optimizer/Dialect/FIROps.td index 6a9fbcdf28a4e..6d6f79c2d7adf 100644 --- a/flang/include/flang/Optimizer/Dialect/FIROps.td +++ b/flang/include/flang/Optimizer/Dialect/FIROps.td @@ -480,7 +480,7 @@ class fir_SwitchTerminatorOp traits = []> : // The number of destination conditions that may be tested unsigned getNumConditions() { - return (*this)->getAttrOfType(getCasesAttr()).size(); + return getCases().size(); } // The selector is the value being tested to determine the destination @@ -488,6 +488,9 @@ class fir_SwitchTerminatorOp traits = []> : mlir::Value getSelector(llvm::ArrayRef operands) { return operands[0]; } + mlir::Value getSelector(mlir::ValueRange operands) { + return operands.front(); + } // The number of blocks that may be branched to unsigned getNumDest() { return (*this)->getNumSuccessors(); } @@ -498,6 +501,8 @@ class fir_SwitchTerminatorOp traits = []> : llvm::Optional> getSuccessorOperands( llvm::ArrayRef operands, unsigned cond); + llvm::Optional getSuccessorOperands( + mlir::ValueRange operands, unsigned cond); using BranchOpInterfaceTrait::getSuccessorOperands; // Helper function to deal with Optional operand forms @@ -510,6 +515,10 @@ class fir_SwitchTerminatorOp traits = []> : p.printSuccessor(succ); } + mlir::ArrayAttr getCases() { + return (*this)->getAttrOfType(getCasesAttr()); + } + unsigned targetOffsetSize(); }]; } @@ -1725,11 +1734,6 @@ def fir_ExtractValueOp : fir_OneResultOp<"extract_value", [NoSideEffect]> { let assemblyFormat = [{ $adt `,` $coor attr-dict `:` functional-type(operands, results) }]; - - let builders = [ - OpBuilder<(ins "mlir::Type":$rty, "mlir::Value":$adt, - "llvm::ArrayRef":$vcoor)> - ]; } def fir_FieldIndexOp : fir_OneResultOp<"field_index", [NoSideEffect]> { @@ -1976,11 +1980,6 @@ def fir_InsertValueOp : fir_OneResultOp<"insert_value", [NoSideEffect]> { $adt `,` $val `,` $coor attr-dict `:` functional-type(operands, results) }]; - let builders = [ - OpBuilder<(ins "mlir::Type":$rty, "mlir::Value":$adt, "mlir::Value":$val, - "llvm::ArrayRef":$vcoor)> - ]; - let hasCanonicalizer = 1; } @@ -1988,7 +1987,8 @@ def fir_InsertOnRangeOp : fir_OneResultOp<"insert_on_range", [NoSideEffect]> { let summary = "insert sub-value into a range on an existing sequence"; let description = [{ - Insert copies of a value into an entity with an array type. + Insert copies of a value into an entity with an array type of constant shape + and size. Returns a new ssa value with the same type as the original entity. The values are inserted at a contiguous range of indices in Fortran row-to-column element order as specified by lower and upper bound @@ -2011,11 +2011,6 @@ def fir_InsertOnRangeOp : fir_OneResultOp<"insert_on_range", [NoSideEffect]> { $seq `,` $val `,` $coor attr-dict `:` functional-type(operands, results) }]; - let builders = [ - OpBuilder<(ins "mlir::Type":$rty, "mlir::Value":$adt, "mlir::Value":$val, - "llvm::ArrayRef":$vcoor)> - ]; - let verifier = "return ::verify(*this);"; } diff --git a/flang/include/flang/Runtime/command.h b/flang/include/flang/Runtime/command.h index f664599e7385a..67d7e7cb0b9c3 100644 --- a/flang/include/flang/Runtime/command.h +++ b/flang/include/flang/Runtime/command.h @@ -44,7 +44,8 @@ std::int64_t RTNAME(ArgumentLength)(std::int32_t n); // Returns a STATUS as described in the standard. std::int32_t RTNAME(EnvVariableValue)(const Descriptor &name, const Descriptor *value = nullptr, bool trim_name = true, - const Descriptor *errmsg = nullptr); + const Descriptor *errmsg = nullptr, const char *sourceFile = nullptr, + int line = 0); // Try to get the significant length of the environment variable specified by // NAME. Returns 0 if it doesn't manage. diff --git a/flang/include/flang/Runtime/magic-numbers.h b/flang/include/flang/Runtime/magic-numbers.h index b2c6accdc357e..e883637b8c4b8 100644 --- a/flang/include/flang/Runtime/magic-numbers.h +++ b/flang/include/flang/Runtime/magic-numbers.h @@ -46,4 +46,10 @@ to be -1, the others must be positive. #define FORTRAN_RUNTIME_STAT_INVALID_ARG_NUMBER 107 #define FORTRAN_RUNTIME_STAT_MISSING_ARG 108 #define FORTRAN_RUNTIME_STAT_VALUE_TOO_SHORT -1 + +#if 0 +Status codes for GET_ENVIRONMENT_VARIABLE. Values mandated by the standard. +#endif +#define FORTRAN_RUNTIME_STAT_MISSING_ENV_VAR 1 +#define FORTRAN_RUNTIME_STAT_ENV_VARS_UNSUPPORTED 2 #endif diff --git a/flang/lib/Lower/CharacterExpr.cpp b/flang/lib/Lower/CharacterExpr.cpp index 551051d7987d4..0974d2f40d702 100644 --- a/flang/lib/Lower/CharacterExpr.cpp +++ b/flang/lib/Lower/CharacterExpr.cpp @@ -217,9 +217,10 @@ void Fortran::lower::CharacterExprHelper::createLengthOneAssign( auto valTy = val.getType(); // Precondition is rhs is size 1, but it may be wrapped in a fir.array. if (auto seqTy = valTy.dyn_cast()) { - auto zero = builder.createIntegerConstant(loc, builder.getIndexType(), 0); + auto zero = builder.getIntegerAttr(builder.getIndexType(), 0); valTy = seqTy.getEleTy(); - val = builder.create(loc, valTy, val, zero); + val = builder.create(loc, valTy, val, + builder.getArrayAttr(zero)); } auto addrTy = fir::ReferenceType::get(valTy); addr = builder.createConvert(loc, addrTy, addr); diff --git a/flang/lib/Optimizer/Builder/Character.cpp b/flang/lib/Optimizer/Builder/Character.cpp index 7cd2b11f8cc4f..b306d54313006 100644 --- a/flang/lib/Optimizer/Builder/Character.cpp +++ b/flang/lib/Optimizer/Builder/Character.cpp @@ -680,8 +680,9 @@ fir::factory::CharacterExprHelper::createSingletonFromCode(mlir::Value code, auto intType = builder.getIntegerType(bits); auto cast = builder.createConvert(loc, intType, code); auto undef = builder.create(loc, charType); - auto zero = builder.createIntegerConstant(loc, builder.getIndexType(), 0); - return builder.create(loc, charType, undef, cast, zero); + auto zero = builder.getIntegerAttr(builder.getIndexType(), 0); + return builder.create(loc, charType, undef, cast, + builder.getArrayAttr(zero)); } mlir::Value fir::factory::CharacterExprHelper::extractCodeFromSingleton( @@ -690,8 +691,9 @@ mlir::Value fir::factory::CharacterExprHelper::extractCodeFromSingleton( assert(type.getLen() == 1); auto bits = builder.getKindMap().getCharacterBitsize(type.getFKind()); auto intType = builder.getIntegerType(bits); - auto zero = builder.createIntegerConstant(loc, builder.getIndexType(), 0); - return builder.create(loc, intType, singleton, zero); + auto zero = builder.getIntegerAttr(builder.getIndexType(), 0); + return builder.create(loc, intType, singleton, + builder.getArrayAttr(zero)); } mlir::Value diff --git a/flang/lib/Optimizer/CodeGen/CodeGen.cpp b/flang/lib/Optimizer/CodeGen/CodeGen.cpp index 867d05a7914ba..4a0a7266a2f53 100644 --- a/flang/lib/Optimizer/CodeGen/CodeGen.cpp +++ b/flang/lib/Optimizer/CodeGen/CodeGen.cpp @@ -19,6 +19,7 @@ #include "mlir/Conversion/LLVMCommon/TypeConverter.h" #include "mlir/Conversion/StandardToLLVM/ConvertStandardToLLVM.h" #include "mlir/IR/BuiltinTypes.h" +#include "mlir/IR/Matchers.h" #include "mlir/Pass/Pass.h" #include "llvm/ADT/ArrayRef.h" @@ -44,9 +45,27 @@ class FIROpConversion : public mlir::ConvertOpToLLVMPattern { return *static_cast(this->getTypeConverter()); } }; -} // namespace -namespace { +/// FIR conversion pattern template +template +class FIROpAndTypeConversion : public FIROpConversion { +public: + using FIROpConversion::FIROpConversion; + using OpAdaptor = typename FromOp::Adaptor; + + mlir::LogicalResult + matchAndRewrite(FromOp op, OpAdaptor adaptor, + mlir::ConversionPatternRewriter &rewriter) const final { + mlir::Type ty = this->convertType(op.getType()); + return doRewrite(op, ty, adaptor, rewriter); + } + + virtual mlir::LogicalResult + doRewrite(FromOp addr, mlir::Type ty, OpAdaptor adaptor, + mlir::ConversionPatternRewriter &rewriter) const = 0; +}; + +// Lower `fir.address_of` operation to `llvm.address_of` operation. struct AddrOfOpConversion : public FIROpConversion { using FIROpConversion::FIROpConversion; @@ -60,6 +79,23 @@ struct AddrOfOpConversion : public FIROpConversion { } }; +// `fir.call` -> `llvm.call` +struct CallOpConversion : public FIROpConversion { + using FIROpConversion::FIROpConversion; + + mlir::LogicalResult + matchAndRewrite(fir::CallOp call, OpAdaptor adaptor, + mlir::ConversionPatternRewriter &rewriter) const override { + SmallVector resultTys; + for (auto r : call.getResults()) + resultTys.push_back(convertType(r.getType())); + rewriter.replaceOpWithNewOp( + call, resultTys, adaptor.getOperands(), call->getAttrs()); + return success(); + } +}; + +/// Lower `fir.has_value` operation to `llvm.return` operation. struct HasValueOpConversion : public FIROpConversion { using FIROpConversion::FIROpConversion; @@ -71,6 +107,9 @@ struct HasValueOpConversion : public FIROpConversion { } }; +/// Lower `fir.global` operation to `llvm.global` operation. +/// `fir.insert_on_range` operations are replaced with constant dense attribute +/// if they are applied on the full range. struct GlobalOpConversion : public FIROpConversion { using FIROpConversion::FIROpConversion; @@ -133,6 +172,8 @@ struct GlobalOpConversion : public FIROpConversion { return true; } + // TODO: String comparaison should be avoided. Replace linkName with an + // enumeration. mlir::LLVM::Linkage convertLinkage(Optional optLinkage) const { if (optLinkage.hasValue()) { auto name = optLinkage.getValue(); @@ -149,6 +190,78 @@ struct GlobalOpConversion : public FIROpConversion { } }; +template +void selectMatchAndRewrite(fir::LLVMTypeConverter &lowering, OP select, + typename OP::Adaptor adaptor, + mlir::ConversionPatternRewriter &rewriter) { + unsigned conds = select.getNumConditions(); + auto cases = select.getCases().getValue(); + mlir::Value selector = adaptor.selector(); + auto loc = select.getLoc(); + assert(conds > 0 && "select must have cases"); + + llvm::SmallVector destinations; + llvm::SmallVector destinationsOperands; + mlir::Block *defaultDestination; + mlir::ValueRange defaultOperands; + llvm::SmallVector caseValues; + + for (unsigned t = 0; t != conds; ++t) { + mlir::Block *dest = select.getSuccessor(t); + auto destOps = select.getSuccessorOperands(adaptor.getOperands(), t); + const mlir::Attribute &attr = cases[t]; + if (auto intAttr = attr.template dyn_cast()) { + destinations.push_back(dest); + destinationsOperands.push_back(destOps.hasValue() ? *destOps + : ValueRange()); + caseValues.push_back(intAttr.getInt()); + continue; + } + assert(attr.template dyn_cast_or_null()); + assert((t + 1 == conds) && "unit must be last"); + defaultDestination = dest; + defaultOperands = destOps.hasValue() ? *destOps : ValueRange(); + } + + // LLVM::SwitchOp takes a i32 type for the selector. + if (select.getSelector().getType() != rewriter.getI32Type()) + selector = + rewriter.create(loc, rewriter.getI32Type(), selector); + + rewriter.replaceOpWithNewOp( + select, selector, + /*defaultDestination=*/defaultDestination, + /*defaultOperands=*/defaultOperands, + /*caseValues=*/caseValues, + /*caseDestinations=*/destinations, + /*caseOperands=*/destinationsOperands, + /*branchWeights=*/ArrayRef()); +} + +/// conversion of fir::SelectOp to an if-then-else ladder +struct SelectOpConversion : public FIROpConversion { + using FIROpConversion::FIROpConversion; + + mlir::LogicalResult + matchAndRewrite(fir::SelectOp op, OpAdaptor adaptor, + mlir::ConversionPatternRewriter &rewriter) const override { + selectMatchAndRewrite(lowerTy(), op, adaptor, rewriter); + return success(); + } +}; + +/// conversion of fir::SelectRankOp to an if-then-else ladder +struct SelectRankOpConversion : public FIROpConversion { + using FIROpConversion::FIROpConversion; + + mlir::LogicalResult + matchAndRewrite(fir::SelectRankOp op, OpAdaptor adaptor, + mlir::ConversionPatternRewriter &rewriter) const override { + selectMatchAndRewrite(lowerTy(), op, adaptor, rewriter); + return success(); + } +}; + // convert to LLVM IR dialect `undef` struct UndefOpConversion : public FIROpConversion { using FIROpConversion::FIROpConversion; @@ -161,6 +274,219 @@ struct UndefOpConversion : public FIROpConversion { return success(); } }; + +// convert to LLVM IR dialect `unreachable` +struct UnreachableOpConversion : public FIROpConversion { + using FIROpConversion::FIROpConversion; + + mlir::LogicalResult + matchAndRewrite(fir::UnreachableOp unreach, OpAdaptor adaptor, + mlir::ConversionPatternRewriter &rewriter) const override { + rewriter.replaceOpWithNewOp(unreach); + return success(); + } +}; + +struct ZeroOpConversion : public FIROpConversion { + using FIROpConversion::FIROpConversion; + + mlir::LogicalResult + matchAndRewrite(fir::ZeroOp zero, OpAdaptor, + mlir::ConversionPatternRewriter &rewriter) const override { + auto ty = convertType(zero.getType()); + if (ty.isa()) { + rewriter.replaceOpWithNewOp(zero, ty); + } else if (ty.isa()) { + rewriter.replaceOpWithNewOp( + zero, ty, mlir::IntegerAttr::get(zero.getType(), 0)); + } else if (mlir::LLVM::isCompatibleFloatingPointType(ty)) { + rewriter.replaceOpWithNewOp( + zero, ty, mlir::FloatAttr::get(zero.getType(), 0.0)); + } else { + // TODO: create ConstantAggregateZero for FIR aggregate/array types. + return rewriter.notifyMatchFailure( + zero, + "conversion of fir.zero with aggregate type not implemented yet"); + } + return success(); + } +}; + +// Code shared between insert_value and extract_value Ops. +struct ValueOpCommon { + // Translate the arguments pertaining to any multidimensional array to + // row-major order for LLVM-IR. + static void toRowMajor(SmallVectorImpl &attrs, + mlir::Type ty) { + assert(ty && "type is null"); + const auto end = attrs.size(); + for (std::remove_const_t i = 0; i < end; ++i) { + if (auto seq = ty.dyn_cast()) { + const auto dim = getDimension(seq); + if (dim > 1) { + auto ub = std::min(i + dim, end); + std::reverse(attrs.begin() + i, attrs.begin() + ub); + i += dim - 1; + } + ty = getArrayElementType(seq); + } else if (auto st = ty.dyn_cast()) { + ty = st.getBody()[attrs[i].cast().getInt()]; + } else { + llvm_unreachable("index into invalid type"); + } + } + } + + static llvm::SmallVector + collectIndices(mlir::ConversionPatternRewriter &rewriter, + mlir::ArrayAttr arrAttr) { + llvm::SmallVector attrs; + for (auto i = arrAttr.begin(), e = arrAttr.end(); i != e; ++i) { + if (i->isa()) { + attrs.push_back(*i); + } else { + auto fieldName = i->cast().getValue(); + ++i; + auto ty = i->cast().getValue(); + auto index = ty.cast().getFieldIndex(fieldName); + attrs.push_back(mlir::IntegerAttr::get(rewriter.getI32Type(), index)); + } + } + return attrs; + } + +private: + static unsigned getDimension(mlir::LLVM::LLVMArrayType ty) { + unsigned result = 1; + for (auto eleTy = ty.getElementType().dyn_cast(); + eleTy; + eleTy = eleTy.getElementType().dyn_cast()) + ++result; + return result; + } + + static mlir::Type getArrayElementType(mlir::LLVM::LLVMArrayType ty) { + auto eleTy = ty.getElementType(); + while (auto arrTy = eleTy.dyn_cast()) + eleTy = arrTy.getElementType(); + return eleTy; + } +}; + +/// Extract a subobject value from an ssa-value of aggregate type +struct ExtractValueOpConversion + : public FIROpAndTypeConversion, + public ValueOpCommon { + using FIROpAndTypeConversion::FIROpAndTypeConversion; + + mlir::LogicalResult + doRewrite(fir::ExtractValueOp extractVal, mlir::Type ty, OpAdaptor adaptor, + mlir::ConversionPatternRewriter &rewriter) const override { + auto attrs = collectIndices(rewriter, extractVal.coor()); + toRowMajor(attrs, adaptor.getOperands()[0].getType()); + auto position = mlir::ArrayAttr::get(extractVal.getContext(), attrs); + rewriter.replaceOpWithNewOp( + extractVal, ty, adaptor.getOperands()[0], position); + return success(); + } +}; + +/// InsertValue is the generalized instruction for the composition of new +/// aggregate type values. +struct InsertValueOpConversion + : public FIROpAndTypeConversion, + public ValueOpCommon { + using FIROpAndTypeConversion::FIROpAndTypeConversion; + + mlir::LogicalResult + doRewrite(fir::InsertValueOp insertVal, mlir::Type ty, OpAdaptor adaptor, + mlir::ConversionPatternRewriter &rewriter) const override { + auto attrs = collectIndices(rewriter, insertVal.coor()); + toRowMajor(attrs, adaptor.getOperands()[0].getType()); + auto position = mlir::ArrayAttr::get(insertVal.getContext(), attrs); + rewriter.replaceOpWithNewOp( + insertVal, ty, adaptor.getOperands()[0], adaptor.getOperands()[1], + position); + return success(); + } +}; + +/// InsertOnRange inserts a value into a sequence over a range of offsets. +struct InsertOnRangeOpConversion + : public FIROpAndTypeConversion { + using FIROpAndTypeConversion::FIROpAndTypeConversion; + + // Increments an array of subscripts in a row major fasion. + void incrementSubscripts(const SmallVector &dims, + SmallVector &subscripts) const { + for (size_t i = dims.size(); i > 0; --i) { + if (++subscripts[i - 1] < dims[i - 1]) { + return; + } + subscripts[i - 1] = 0; + } + } + + mlir::LogicalResult + doRewrite(fir::InsertOnRangeOp range, mlir::Type ty, OpAdaptor adaptor, + mlir::ConversionPatternRewriter &rewriter) const override { + + llvm::SmallVector dims; + auto type = adaptor.getOperands()[0].getType(); + + // Iteratively extract the array dimensions from the type. + while (auto t = type.dyn_cast()) { + dims.push_back(t.getNumElements()); + type = t.getElementType(); + } + + SmallVector lBounds; + SmallVector uBounds; + + // Extract integer value from the attribute + SmallVector coordinates = llvm::to_vector<4>( + llvm::map_range(range.coor(), [](Attribute a) -> int64_t { + return a.cast().getInt(); + })); + + // Unzip the upper and lower bound and convert to a row major format. + for (auto i = coordinates.rbegin(), e = coordinates.rend(); i != e; ++i) { + uBounds.push_back(*i++); + lBounds.push_back(*i); + } + + auto &subscripts = lBounds; + auto loc = range.getLoc(); + mlir::Value lastOp = adaptor.getOperands()[0]; + mlir::Value insertVal = adaptor.getOperands()[1]; + + auto i64Ty = rewriter.getI64Type(); + while (subscripts != uBounds) { + // Convert uint64_t's to Attribute's. + SmallVector subscriptAttrs; + for (const auto &subscript : subscripts) + subscriptAttrs.push_back(IntegerAttr::get(i64Ty, subscript)); + lastOp = rewriter.create( + loc, ty, lastOp, insertVal, + ArrayAttr::get(range.getContext(), subscriptAttrs)); + + incrementSubscripts(dims, subscripts); + } + + // Convert uint64_t's to Attribute's. + SmallVector subscriptAttrs; + for (const auto &subscript : subscripts) + subscriptAttrs.push_back( + IntegerAttr::get(rewriter.getI64Type(), subscript)); + mlir::ArrayRef arrayRef(subscriptAttrs); + + rewriter.replaceOpWithNewOp( + range, ty, lastOp, insertVal, + ArrayAttr::get(range.getContext(), arrayRef)); + + return success(); + } +}; } // namespace namespace { @@ -177,10 +503,13 @@ class FIRToLLVMLowering : public fir::FIRToLLVMLoweringBase { void runOnOperation() override final { auto *context = getModule().getContext(); fir::LLVMTypeConverter typeConverter{getModule()}; - auto loc = mlir::UnknownLoc::get(context); mlir::OwningRewritePatternList pattern(context); - pattern.insert(typeConverter); + pattern.insert< + AddrOfOpConversion, CallOpConversion, ExtractValueOpConversion, + HasValueOpConversion, GlobalOpConversion, InsertOnRangeOpConversion, + InsertValueOpConversion, SelectOpConversion, SelectRankOpConversion, + UndefOpConversion, UnreachableOpConversion, ZeroOpConversion>( + typeConverter); mlir::populateStdToLLVMConversionPatterns(typeConverter, pattern); mlir::arith::populateArithmeticToLLVMConversionPatterns(typeConverter, pattern); @@ -193,7 +522,6 @@ class FIRToLLVMLowering : public fir::FIRToLLVMLoweringBase { // apply the patterns if (mlir::failed(mlir::applyFullConversion(getModule(), target, std::move(pattern)))) { - mlir::emitError(loc, "error in converting to LLVM-IR dialect\n"); signalPassFailure(); } } diff --git a/flang/lib/Optimizer/CodeGen/DescriptorModel.h b/flang/lib/Optimizer/CodeGen/DescriptorModel.h new file mode 100644 index 0000000000000..0592fea62d153 --- /dev/null +++ b/flang/lib/Optimizer/CodeGen/DescriptorModel.h @@ -0,0 +1,140 @@ +//===-- DescriptorModel.h -- model of descriptors for codegen ---*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// LLVM IR dialect models of C++ types. +// +// This supplies a set of model builders to decompose the C declaration of a +// descriptor (as encoded in ISO_Fortran_binding.h and elsewhere) and +// reconstruct that type in the LLVM IR dialect. +// +// TODO: It is understood that this is deeply incorrect as far as building a +// portability layer for cross-compilation as these reflected types are those of +// the build machine and not necessarily that of either the host or the target. +// This assumption that build == host == target is actually pervasive across the +// compiler (https://llvm.org/PR52418). +// +//===----------------------------------------------------------------------===// + +#ifndef OPTIMIZER_DESCRIPTOR_MODEL_H +#define OPTIMIZER_DESCRIPTOR_MODEL_H + +#include "flang/ISO_Fortran_binding.h" +#include "flang/Runtime/descriptor.h" +#include "mlir/Dialect/LLVMIR/LLVMTypes.h" +#include "llvm/Support/ErrorHandling.h" +#include + +namespace fir { + +using TypeBuilderFunc = mlir::Type (*)(mlir::MLIRContext *); + +/// Get the LLVM IR dialect model for building a particular C++ type, `T`. +template +TypeBuilderFunc getModel(); + +template <> +TypeBuilderFunc getModel() { + return [](mlir::MLIRContext *context) -> mlir::Type { + return mlir::LLVM::LLVMPointerType::get(mlir::IntegerType::get(context, 8)); + }; +} +template <> +TypeBuilderFunc getModel() { + return [](mlir::MLIRContext *context) -> mlir::Type { + return mlir::IntegerType::get(context, sizeof(unsigned) * 8); + }; +} +template <> +TypeBuilderFunc getModel() { + return [](mlir::MLIRContext *context) -> mlir::Type { + return mlir::IntegerType::get(context, sizeof(int) * 8); + }; +} +template <> +TypeBuilderFunc getModel() { + return [](mlir::MLIRContext *context) -> mlir::Type { + return mlir::IntegerType::get(context, sizeof(unsigned long) * 8); + }; +} +template <> +TypeBuilderFunc getModel() { + return [](mlir::MLIRContext *context) -> mlir::Type { + return mlir::IntegerType::get(context, sizeof(unsigned long long) * 8); + }; +} +template <> +TypeBuilderFunc getModel() { + return [](mlir::MLIRContext *context) -> mlir::Type { + return mlir::IntegerType::get(context, sizeof(long long) * 8); + }; +} +template <> +TypeBuilderFunc getModel() { + return [](mlir::MLIRContext *context) -> mlir::Type { + return mlir::IntegerType::get(context, + sizeof(Fortran::ISO::CFI_rank_t) * 8); + }; +} +template <> +TypeBuilderFunc getModel() { + return [](mlir::MLIRContext *context) -> mlir::Type { + return mlir::IntegerType::get(context, + sizeof(Fortran::ISO::CFI_type_t) * 8); + }; +} +template <> +TypeBuilderFunc getModel() { + return [](mlir::MLIRContext *context) -> mlir::Type { + return mlir::IntegerType::get(context, sizeof(long) * 8); + }; +} +template <> +TypeBuilderFunc getModel() { + return [](mlir::MLIRContext *context) -> mlir::Type { + auto indexTy = getModel()(context); + return mlir::LLVM::LLVMArrayType::get(indexTy, 3); + }; +} +template <> +TypeBuilderFunc +getModel>() { + return getModel(); +} + +//===----------------------------------------------------------------------===// +// Descriptor reflection +//===----------------------------------------------------------------------===// + +/// Get the type model of the field number `Field` in an ISO CFI descriptor. +template +static constexpr TypeBuilderFunc getDescFieldTypeModel() { + Fortran::ISO::Fortran_2018::CFI_cdesc_t dummyDesc{}; + // check that the descriptor is exactly 8 fields as specified in CFI_cdesc_t + // in flang/include/flang/ISO_Fortran_binding.h. + auto [a, b, c, d, e, f, g, h] = dummyDesc; + auto tup = std::tie(a, b, c, d, e, f, g, h); + auto field = std::get(tup); + return getModel(); +} + +/// An extended descriptor is defined by a class in runtime/descriptor.h. The +/// three fields in the class are hard-coded here, unlike the reflection used on +/// the ISO parts, which are a POD. +template +static constexpr TypeBuilderFunc getExtendedDescFieldTypeModel() { + if constexpr (Field == 8) { + return getModel(); + } else if constexpr (Field == 9) { + return getModel(); + } else { + llvm_unreachable("extended ISO descriptor only has 10 fields"); + } +} + +} // namespace fir + +#endif // OPTIMIZER_DESCRIPTOR_MODEL_H diff --git a/flang/lib/Optimizer/CodeGen/PreCGRewrite.cpp b/flang/lib/Optimizer/CodeGen/PreCGRewrite.cpp index 56f299f16c268..712b643e1e05a 100644 --- a/flang/lib/Optimizer/CodeGen/PreCGRewrite.cpp +++ b/flang/lib/Optimizer/CodeGen/PreCGRewrite.cpp @@ -102,7 +102,7 @@ class EmboxConversion : public mlir::OpRewritePattern { } auto xbox = rewriter.create( loc, embox.getType(), embox.memref(), shapeOpers, llvm::None, - llvm::None, llvm::None, embox.typeparams()); + llvm::None, llvm::None, llvm::None, embox.typeparams()); LLVM_DEBUG(llvm::dbgs() << "rewriting " << embox << " to " << xbox << '\n'); rewriter.replaceOp(embox, xbox.getOperation()->getResults()); return mlir::success(); @@ -124,14 +124,16 @@ class EmboxConversion : public mlir::OpRewritePattern { } llvm::SmallVector sliceOpers; llvm::SmallVector subcompOpers; + llvm::SmallVector substrOpers; if (auto s = embox.getSlice()) if (auto sliceOp = dyn_cast_or_null(s.getDefiningOp())) { - sliceOpers.append(sliceOp.triples().begin(), sliceOp.triples().end()); - subcompOpers.append(sliceOp.fields().begin(), sliceOp.fields().end()); + sliceOpers.assign(sliceOp.triples().begin(), sliceOp.triples().end()); + subcompOpers.assign(sliceOp.fields().begin(), sliceOp.fields().end()); + substrOpers.assign(sliceOp.substr().begin(), sliceOp.substr().end()); } auto xbox = rewriter.create( loc, embox.getType(), embox.memref(), shapeOpers, shiftOpers, - sliceOpers, subcompOpers, embox.typeparams()); + sliceOpers, subcompOpers, substrOpers, embox.typeparams()); LLVM_DEBUG(llvm::dbgs() << "rewriting " << embox << " to " << xbox << '\n'); rewriter.replaceOp(embox, xbox.getOperation()->getResults()); return mlir::success(); @@ -172,15 +174,17 @@ class ReboxConversion : public mlir::OpRewritePattern { } llvm::SmallVector sliceOpers; llvm::SmallVector subcompOpers; + llvm::SmallVector substrOpers; if (auto s = rebox.slice()) if (auto sliceOp = dyn_cast_or_null(s.getDefiningOp())) { sliceOpers.append(sliceOp.triples().begin(), sliceOp.triples().end()); subcompOpers.append(sliceOp.fields().begin(), sliceOp.fields().end()); + substrOpers.append(sliceOp.substr().begin(), sliceOp.substr().end()); } auto xRebox = rewriter.create( loc, rebox.getType(), rebox.box(), shapeOpers, shiftOpers, sliceOpers, - subcompOpers); + subcompOpers, substrOpers); LLVM_DEBUG(llvm::dbgs() << "rewriting " << rebox << " to " << xRebox << '\n'); rewriter.replaceOp(rebox, xRebox.getOperation()->getResults()); @@ -227,6 +231,9 @@ class ArrayCoorConversion : public mlir::OpRewritePattern { if (auto sliceOp = dyn_cast_or_null(s.getDefiningOp())) { sliceOpers.append(sliceOp.triples().begin(), sliceOp.triples().end()); subcompOpers.append(sliceOp.fields().begin(), sliceOp.fields().end()); + assert(sliceOp.substr().empty() && + "Don't allow substring operations on array_coor. This " + "restriction may be lifted in the future."); } auto xArrCoor = rewriter.create( loc, arrCoor.getType(), arrCoor.memref(), shapeOpers, shiftOpers, diff --git a/flang/lib/Optimizer/CodeGen/TypeConverter.h b/flang/lib/Optimizer/CodeGen/TypeConverter.h index fe63da90b8ecb..f8d651808b02e 100644 --- a/flang/lib/Optimizer/CodeGen/TypeConverter.h +++ b/flang/lib/Optimizer/CodeGen/TypeConverter.h @@ -13,6 +13,11 @@ #ifndef FORTRAN_OPTIMIZER_CODEGEN_TYPECONVERTER_H #define FORTRAN_OPTIMIZER_CODEGEN_TYPECONVERTER_H +#include "DescriptorModel.h" +#include "flang/Lower/Todo.h" // remove when TODO's are done +#include "flang/Optimizer/Support/FIRContext.h" +#include "flang/Optimizer/Support/KindMapping.h" +#include "llvm/ADT/StringMap.h" #include "llvm/Support/Debug.h" namespace fir { @@ -22,14 +27,117 @@ namespace fir { class LLVMTypeConverter : public mlir::LLVMTypeConverter { public: LLVMTypeConverter(mlir::ModuleOp module) - : mlir::LLVMTypeConverter(module.getContext()) { + : mlir::LLVMTypeConverter(module.getContext()), + kindMapping(getKindMapping(module)) { LLVM_DEBUG(llvm::dbgs() << "FIR type converter\n"); // Each conversion should return a value of type mlir::Type. + addConversion([&](BoxType box) { return convertBoxType(box); }); + addConversion([&](fir::LogicalType boolTy) { + return mlir::IntegerType::get( + &getContext(), kindMapping.getLogicalBitsize(boolTy.getFKind())); + }); + addConversion( + [&](fir::RecordType derived) { return convertRecordType(derived); }); addConversion( [&](fir::ReferenceType ref) { return convertPointerLike(ref); }); addConversion( [&](SequenceType sequence) { return convertSequenceType(sequence); }); + addConversion([&](mlir::TupleType tuple) { + LLVM_DEBUG(llvm::dbgs() << "type convert: " << tuple << '\n'); + llvm::SmallVector inMembers; + tuple.getFlattenedTypes(inMembers); + llvm::SmallVector members; + for (auto mem : inMembers) + members.push_back(convertType(mem).cast()); + return mlir::LLVM::LLVMStructType::getLiteral(&getContext(), members, + /*isPacked=*/false); + }); + } + + // fir.type --> llvm<"%name = { ty... }"> + mlir::Type convertRecordType(fir::RecordType derived) { + auto name = derived.getName(); + auto st = mlir::LLVM::LLVMStructType::getIdentified(&getContext(), name); + llvm::SmallVector members; + for (auto mem : derived.getTypeList()) { + members.push_back(convertType(mem.second).cast()); + } + if (mlir::succeeded(st.setBody(members, /*isPacked=*/false))) + return st; + return mlir::Type(); + } + + // Is an extended descriptor needed given the element type of a fir.box type ? + // Extended descriptors are required for derived types. + bool requiresExtendedDesc(mlir::Type boxElementType) { + auto eleTy = fir::unwrapSequenceType(boxElementType); + return eleTy.isa(); + } + + // Magic value to indicate we do not know the rank of an entity, either + // because it is assumed rank or because we have not determined it yet. + static constexpr int unknownRank() { return -1; } + + // This corresponds to the descriptor as defined in ISO_Fortran_binding.h and + // the addendum defined in descriptor.h. + mlir::Type convertBoxType(BoxType box, int rank = unknownRank()) { + // (base_addr*, elem_len, version, rank, type, attribute, f18Addendum, [dim] + SmallVector dataDescFields; + mlir::Type ele = box.getEleTy(); + // remove fir.heap/fir.ref/fir.ptr + if (auto removeIndirection = fir::dyn_cast_ptrEleTy(ele)) + ele = removeIndirection; + auto eleTy = convertType(ele); + // base_addr* + if (ele.isa() && eleTy.isa()) + dataDescFields.push_back(eleTy); + else + dataDescFields.push_back(mlir::LLVM::LLVMPointerType::get(eleTy)); + // elem_len + dataDescFields.push_back(getDescFieldTypeModel<1>()(&getContext())); + // version + dataDescFields.push_back(getDescFieldTypeModel<2>()(&getContext())); + // rank + dataDescFields.push_back(getDescFieldTypeModel<3>()(&getContext())); + // type + dataDescFields.push_back(getDescFieldTypeModel<4>()(&getContext())); + // attribute + dataDescFields.push_back(getDescFieldTypeModel<5>()(&getContext())); + // f18Addendum + dataDescFields.push_back(getDescFieldTypeModel<6>()(&getContext())); + // [dims] + if (rank == unknownRank()) { + if (auto seqTy = ele.dyn_cast()) + rank = seqTy.getDimension(); + else + rank = 0; + } + if (rank > 0) { + auto rowTy = getDescFieldTypeModel<7>()(&getContext()); + dataDescFields.push_back(mlir::LLVM::LLVMArrayType::get(rowTy, rank)); + } + // opt-type-ptr: i8* (see fir.tdesc) + if (requiresExtendedDesc(ele)) { + dataDescFields.push_back( + getExtendedDescFieldTypeModel<8>()(&getContext())); + auto rowTy = getExtendedDescFieldTypeModel<9>()(&getContext()); + dataDescFields.push_back(mlir::LLVM::LLVMArrayType::get(rowTy, 1)); + if (auto recTy = fir::unwrapSequenceType(ele).dyn_cast()) + if (recTy.getNumLenParams() > 0) { + // The descriptor design needs to be clarified regarding the number of + // length parameters in the addendum. Since it can change for + // polymorphic allocatables, it seems all length parameters cannot + // always possibly be placed in the addendum. + TODO_NOLOC("extended descriptor derived with length parameters"); + unsigned numLenParams = recTy.getNumLenParams(); + dataDescFields.push_back( + mlir::LLVM::LLVMArrayType::get(rowTy, numLenParams)); + } + } + return mlir::LLVM::LLVMPointerType::get( + mlir::LLVM::LLVMStructType::getLiteral(&getContext(), dataDescFields, + /*isPacked=*/false)); } template @@ -78,6 +186,9 @@ class LLVMTypeConverter : public mlir::LLVMTypeConverter { } return mlir::LLVM::LLVMPointerType::get(baseTy); } + +private: + KindMapping kindMapping; }; } // namespace fir diff --git a/flang/lib/Optimizer/Dialect/FIROps.cpp b/flang/lib/Optimizer/Dialect/FIROps.cpp index 83494d18aef5b..62d04b30c6941 100644 --- a/flang/lib/Optimizer/Dialect/FIROps.cpp +++ b/flang/lib/Optimizer/Dialect/FIROps.cpp @@ -1285,39 +1285,6 @@ mlir::ParseResult fir::GlobalOp::verifyValidLinkage(StringRef linkage) { return mlir::success(llvm::is_contained(validNames, linkage)); } -template -static void appendAsAttribute(llvm::SmallVectorImpl &attrs, - mlir::Value val) { - if (auto *op = val.getDefiningOp()) { - if (auto cop = mlir::dyn_cast(op)) { - // append the integer constant value - if (auto iattr = cop.value().dyn_cast()) { - attrs.push_back(iattr); - return; - } - } else if (auto fld = mlir::dyn_cast(op)) { - if constexpr (AllowFields) { - // append the field name and the record type - attrs.push_back(fld.field_idAttr()); - attrs.push_back(fld.on_typeAttr()); - return; - } - } - } - llvm::report_fatal_error("cannot build Op with these arguments"); -} - -template -static mlir::ArrayAttr collectAsAttributes(mlir::MLIRContext *ctxt, - OperationState &result, - llvm::ArrayRef inds) { - llvm::SmallVector attrs; - for (auto v : inds) - appendAsAttribute(attrs, v); - assert(!attrs.empty()); - return mlir::ArrayAttr::get(ctxt, attrs); -} - //===----------------------------------------------------------------------===// // GlobalLenOp //===----------------------------------------------------------------------===// @@ -1351,18 +1318,6 @@ static void print(mlir::OpAsmPrinter &p, fir::GlobalLenOp &op) { << ", " << op.getOperation()->getAttr(fir::GlobalLenOp::intAttrName()); } -//===----------------------------------------------------------------------===// -// ExtractValueOp -//===----------------------------------------------------------------------===// - -void fir::ExtractValueOp::build(mlir::OpBuilder &builder, - OperationState &result, mlir::Type resTy, - mlir::Value aggVal, - llvm::ArrayRef inds) { - auto aa = collectAsAttributes<>(builder.getContext(), result, inds); - build(builder, result, resTy, aggVal, aa); -} - //===----------------------------------------------------------------------===// // FieldIndexOp //===----------------------------------------------------------------------===// @@ -1430,16 +1385,10 @@ void fir::FieldIndexOp::build(mlir::OpBuilder &builder, // InsertOnRangeOp //===----------------------------------------------------------------------===// -void fir::InsertOnRangeOp::build(mlir::OpBuilder &builder, - OperationState &result, mlir::Type resTy, - mlir::Value aggVal, mlir::Value eleVal, - llvm::ArrayRef inds) { - auto aa = collectAsAttributes(builder.getContext(), result, inds); - build(builder, result, resTy, aggVal, eleVal, aa); -} - /// Range bounds must be nonnegative, and the range must not be empty. static mlir::LogicalResult verify(fir::InsertOnRangeOp op) { + if (fir::hasDynamicSize(op.seq().getType())) + return op.emitOpError("must have constant shape and size"); if (op.coor().size() < 2 || op.coor().size() % 2 != 0) return op.emitOpError("has uneven number of values in ranges"); bool rangeIsKnownToBeNonempty = false; @@ -1461,14 +1410,6 @@ static mlir::LogicalResult verify(fir::InsertOnRangeOp op) { // InsertValueOp //===----------------------------------------------------------------------===// -void fir::InsertValueOp::build(mlir::OpBuilder &builder, OperationState &result, - mlir::Type resTy, mlir::Value aggVal, - mlir::Value eleVal, - llvm::ArrayRef inds) { - auto aa = collectAsAttributes<>(builder.getContext(), result, inds); - build(builder, result, resTy, aggVal, eleVal, aa); -} - static bool checkIsIntegerConstant(mlir::Attribute attr, int64_t conVal) { if (auto iattr = attr.dyn_cast()) return iattr.getInt() == conVal; @@ -2323,6 +2264,15 @@ fir::SelectOp::getSuccessorOperands(llvm::ArrayRef operands, return {getSubOperands(oper, getSubOperands(2, operands, segments), a)}; } +llvm::Optional +fir::SelectOp::getSuccessorOperands(mlir::ValueRange operands, unsigned oper) { + auto a = + (*this)->getAttrOfType(getTargetOffsetAttr()); + auto segments = (*this)->getAttrOfType( + getOperandSegmentSizeAttr()); + return {getSubOperands(oper, getSubOperands(2, operands, segments), a)}; +} + unsigned fir::SelectOp::targetOffsetSize() { return denseElementsSize((*this)->getAttrOfType( getTargetOffsetAttr())); @@ -2616,6 +2566,16 @@ fir::SelectRankOp::getSuccessorOperands(llvm::ArrayRef operands, return {getSubOperands(oper, getSubOperands(2, operands, segments), a)}; } +llvm::Optional +fir::SelectRankOp::getSuccessorOperands(mlir::ValueRange operands, + unsigned oper) { + auto a = + (*this)->getAttrOfType(getTargetOffsetAttr()); + auto segments = (*this)->getAttrOfType( + getOperandSegmentSizeAttr()); + return {getSubOperands(oper, getSubOperands(2, operands, segments), a)}; +} + unsigned fir::SelectRankOp::targetOffsetSize() { return denseElementsSize((*this)->getAttrOfType( getTargetOffsetAttr())); diff --git a/flang/lib/Semantics/check-omp-structure.cpp b/flang/lib/Semantics/check-omp-structure.cpp index 919853792944a..5a06cce481741 100644 --- a/flang/lib/Semantics/check-omp-structure.cpp +++ b/flang/lib/Semantics/check-omp-structure.cpp @@ -1480,6 +1480,7 @@ CHECK_SIMPLE_CLAUSE(When, OMPC_when) CHECK_SIMPLE_CLAUSE(AdjustArgs, OMPC_adjust_args) CHECK_SIMPLE_CLAUSE(AppendArgs, OMPC_append_args) CHECK_SIMPLE_CLAUSE(MemoryOrder, OMPC_memory_order) +CHECK_SIMPLE_CLAUSE(Bind, OMPC_bind) CHECK_REQ_SCALAR_INT_CLAUSE(Grainsize, OMPC_grainsize) CHECK_REQ_SCALAR_INT_CLAUSE(NumTasks, OMPC_num_tasks) diff --git a/flang/lib/Semantics/expression.cpp b/flang/lib/Semantics/expression.cpp index 331b9b2cf5bc3..8ee8c9a9c9cea 100644 --- a/flang/lib/Semantics/expression.cpp +++ b/flang/lib/Semantics/expression.cpp @@ -1916,7 +1916,7 @@ auto ExpressionAnalyzer::AnalyzeProcedureComponentRef( "Base of procedure component reference is not a derived-type object"_err_en_US); } } - CHECK(!GetContextualMessages().empty()); + CHECK(context_.AnyFatalError()); return std::nullopt; } diff --git a/flang/lib/Semantics/pointer-assignment.cpp b/flang/lib/Semantics/pointer-assignment.cpp index 7003242304d5f..0fe864308fdcf 100644 --- a/flang/lib/Semantics/pointer-assignment.cpp +++ b/flang/lib/Semantics/pointer-assignment.cpp @@ -383,7 +383,7 @@ bool CheckPointerAssignment(evaluate::FoldingContext &context, if (!pointer) { return false; // error was reported } - if (!IsPointer(*pointer)) { + if (!IsPointer(pointer->GetUltimate())) { evaluate::SayWithDeclaration(context.messages(), *pointer, "'%s' is not a pointer"_err_en_US, pointer->name()); return false; diff --git a/flang/lib/Semantics/resolve-labels.cpp b/flang/lib/Semantics/resolve-labels.cpp index ff6105aa3a768..2363c832e1ec3 100644 --- a/flang/lib/Semantics/resolve-labels.cpp +++ b/flang/lib/Semantics/resolve-labels.cpp @@ -28,6 +28,12 @@ using IndexList = std::vector>; // A ProxyForScope is an integral proxy for a Fortran scope. This is required // because the parse tree does not actually have the scopes required. using ProxyForScope = unsigned; +// Minimal scope information +struct ScopeInfo { + ProxyForScope parent{}; + bool isExteriorGotoFatal{false}; + int depth{0}; +}; struct LabeledStatementInfoTuplePOD { ProxyForScope proxyForScope; parser::CharBlock parserCharBlock; @@ -153,14 +159,14 @@ static unsigned SayLabel(parser::Label label) { } struct UnitAnalysis { - UnitAnalysis() { scopeModel.push_back(0); } + UnitAnalysis() { scopeModel.emplace_back(); } SourceStmtList doStmtSources; SourceStmtList formatStmtSources; SourceStmtList otherStmtSources; SourceStmtList assignStmtSources; TargetStmtMap targetStmts; - std::vector scopeModel; + std::vector scopeModel; }; // Some parse tree record for statements simply wrap construct names; @@ -532,25 +538,34 @@ class ParseTreeAnalyzer { SemanticsContext &ErrorHandler() { return context_; } private: - bool PushSubscope() { - programUnits_.back().scopeModel.push_back(currentScope_); - currentScope_ = programUnits_.back().scopeModel.size() - 1; - return true; + ScopeInfo &PushScope() { + auto &model{programUnits_.back().scopeModel}; + int newDepth{model.empty() ? 1 : model[currentScope_].depth + 1}; + ScopeInfo &result{model.emplace_back()}; + result.parent = currentScope_; + result.depth = newDepth; + currentScope_ = model.size() - 1; + return result; } bool InitializeNewScopeContext() { programUnits_.emplace_back(UnitAnalysis{}); currentScope_ = 0u; - return PushSubscope(); + PushScope(); + return true; } - void PopScope() { - currentScope_ = programUnits_.back().scopeModel[currentScope_]; + ScopeInfo &PopScope() { + ScopeInfo &result{programUnits_.back().scopeModel[currentScope_]}; + currentScope_ = result.parent; + return result; } ProxyForScope ParentScope() { - return programUnits_.back().scopeModel[currentScope_]; + return programUnits_.back().scopeModel[currentScope_].parent; } bool SwitchToNewScope() { - PopScope(); - return PushSubscope(); + ScopeInfo &oldScope{PopScope()}; + bool isExteriorGotoFatal{oldScope.isExteriorGotoFatal}; + PushScope().isExteriorGotoFatal = isExteriorGotoFatal; + return true; } template bool PushConstructName(const A &a) { @@ -558,7 +573,13 @@ class ParseTreeAnalyzer { if (optionalName) { constructNames_.emplace_back(optionalName->ToString()); } - return PushSubscope(); + // Gotos into this construct from outside it are diagnosed, and + // are fatal unless the construct is a DO, IF, or SELECT CASE. + PushScope().isExteriorGotoFatal = + !(std::is_same_v || + std::is_same_v || + std::is_same_v); + return true; } bool PushConstructName(const parser::BlockConstruct &blockConstruct) { const auto &optionalName{ @@ -567,7 +588,8 @@ class ParseTreeAnalyzer { if (optionalName) { constructNames_.emplace_back(optionalName->ToString()); } - return PushSubscope(); + PushScope().isExteriorGotoFatal = true; + return true; } template void PopConstructNameIfPresent(const A &a) { const auto &optionalName{std::get<0>(std::get<0>(a.t).statement.t)}; @@ -796,9 +818,9 @@ class ParseTreeAnalyzer { std::vector constructNames_; }; -bool InInclusiveScope(const std::vector &scopes, - ProxyForScope tail, ProxyForScope head) { - for (; tail != head; tail = scopes[tail]) { +bool InInclusiveScope(const std::vector &scopes, ProxyForScope tail, + ProxyForScope head) { + for (; tail != head; tail = scopes[tail].parent) { if (!HasScope(tail)) { return false; } @@ -881,13 +903,13 @@ parser::CharBlock SkipLabel(const parser::CharBlock &position) { } ProxyForScope ParentScope( - const std::vector &scopes, ProxyForScope scope) { - return scopes[scope]; + const std::vector &scopes, ProxyForScope scope) { + return scopes[scope].parent; } void CheckLabelDoConstraints(const SourceStmtList &dos, const SourceStmtList &branches, const TargetStmtMap &labels, - const std::vector &scopes, SemanticsContext &context) { + const std::vector &scopes, SemanticsContext &context) { IndexList loopBodies; for (const auto &stmt : dos) { const auto &label{stmt.parserLabel}; @@ -936,7 +958,7 @@ void CheckLabelDoConstraints(const SourceStmtList &dos, // 6.2.5 void CheckScopeConstraints(const SourceStmtList &stmts, - const TargetStmtMap &labels, const std::vector &scopes, + const TargetStmtMap &labels, const std::vector &scopes, SemanticsContext &context) { for (const auto &stmt : stmts) { const auto &label{stmt.parserLabel}; @@ -955,8 +977,22 @@ void CheckScopeConstraints(const SourceStmtList &stmts, TargetStatementEnum::Format)) { continue; } + bool isFatal{false}; + ProxyForScope fromScope{scope}; + for (ProxyForScope toScope{target.proxyForScope}; fromScope != toScope; + toScope = scopes[toScope].parent) { + if (scopes[toScope].isExteriorGotoFatal) { + isFatal = true; + break; + } + if (scopes[toScope].depth == scopes[fromScope].depth) { + fromScope = scopes[fromScope].parent; + } + } context.Say(position, - "Label '%u' is in a construct that prevents its use as a branch target here"_en_US, + isFatal + ? "Label '%u' is in a construct that prevents its use as a branch target here"_err_en_US + : "Label '%u' is in a construct that prevents its use as a branch target here"_en_US, SayLabel(label)); } } @@ -990,7 +1026,7 @@ void CheckBranchTargetConstraints(const SourceStmtList &stmts, } void CheckBranchConstraints(const SourceStmtList &branches, - const TargetStmtMap &labels, const std::vector &scopes, + const TargetStmtMap &labels, const std::vector &scopes, SemanticsContext &context) { CheckScopeConstraints(branches, labels, scopes, context); CheckBranchTargetConstraints(branches, labels, context); @@ -1015,7 +1051,7 @@ void CheckDataXferTargetConstraints(const SourceStmtList &stmts, } void CheckDataTransferConstraints(const SourceStmtList &dataTransfers, - const TargetStmtMap &labels, const std::vector &scopes, + const TargetStmtMap &labels, const std::vector &scopes, SemanticsContext &context) { CheckScopeConstraints(dataTransfers, labels, scopes, context); CheckDataXferTargetConstraints(dataTransfers, labels, context); @@ -1045,7 +1081,7 @@ void CheckAssignTargetConstraints(const SourceStmtList &stmts, } void CheckAssignConstraints(const SourceStmtList &assigns, - const TargetStmtMap &labels, const std::vector &scopes, + const TargetStmtMap &labels, const std::vector &scopes, SemanticsContext &context) { CheckScopeConstraints(assigns, labels, scopes, context); CheckAssignTargetConstraints(assigns, labels, context); diff --git a/flang/runtime/assign.cpp b/flang/runtime/assign.cpp index 0455980ddd4d3..8d792011a6c8a 100644 --- a/flang/runtime/assign.cpp +++ b/flang/runtime/assign.cpp @@ -193,9 +193,9 @@ void Assign(Descriptor &to, const Descriptor &from, Terminator &terminator) { Descriptor &fromCompDesc{statDesc[1].descriptor()}; for (std::size_t j{0}; j < toElements; ++j, to.IncrementSubscripts(toAt), from.IncrementSubscripts(fromAt)) { - comp.CreatePointerDescriptor(toCompDesc, to, toAt, terminator); + comp.CreatePointerDescriptor(toCompDesc, to, terminator, toAt); comp.CreatePointerDescriptor( - fromCompDesc, from, fromAt, terminator); + fromCompDesc, from, terminator, fromAt); Assign(toCompDesc, fromCompDesc, terminator); } } else { // Component has intrinsic type; simply copy raw bytes diff --git a/flang/runtime/command.cpp b/flang/runtime/command.cpp index 325fc274e3254..70276363d9001 100644 --- a/flang/runtime/command.cpp +++ b/flang/runtime/command.cpp @@ -24,9 +24,9 @@ std::int32_t RTNAME(ArgumentCount)() { return 0; } -// Returns the length of the \p n'th argument. Assumes \p n is valid. -static std::int64_t ArgumentLength(std::int32_t n) { - std::size_t length{std::strlen(executionEnvironment.argv[n])}; +// Returns the length of the \p string. Assumes \p string is valid. +static std::int64_t StringLength(const char *string) { + std::size_t length{std::strlen(string)}; if constexpr (sizeof(std::size_t) <= sizeof(std::int64_t)) { return static_cast(length); } else { @@ -37,11 +37,12 @@ static std::int64_t ArgumentLength(std::int32_t n) { } std::int64_t RTNAME(ArgumentLength)(std::int32_t n) { - if (n < 0 || n >= executionEnvironment.argc) { + if (n < 0 || n >= executionEnvironment.argc || + !executionEnvironment.argv[n]) { return 0; } - return ArgumentLength(n); + return StringLength(executionEnvironment.argv[n]); } static bool IsValidCharDescriptor(const Descriptor *value) { @@ -54,6 +55,20 @@ static void FillWithSpaces(const Descriptor *value) { std::memset(value->OffsetElement(), ' ', value->ElementBytes()); } +static std::int32_t CopyToDescriptor(const Descriptor &value, + const char *rawValue, std::int64_t rawValueLength, + const Descriptor *errmsg) { + std::int64_t toCopy{std::min( + rawValueLength, static_cast(value.ElementBytes()))}; + std::memcpy(value.OffsetElement(), rawValue, toCopy); + + if (rawValueLength > toCopy) { + return ToErrmsg(errmsg, StatValueTooShort); + } + + return StatOk; +} + std::int32_t RTNAME(ArgumentValue)( std::int32_t n, const Descriptor *value, const Descriptor *errmsg) { if (IsValidCharDescriptor(value)) { @@ -65,18 +80,13 @@ std::int32_t RTNAME(ArgumentValue)( } if (IsValidCharDescriptor(value)) { - std::int64_t argLen{ArgumentLength(n)}; + const char *arg{executionEnvironment.argv[n]}; + std::int64_t argLen{StringLength(arg)}; if (argLen <= 0) { return ToErrmsg(errmsg, StatMissingArgument); } - std::int64_t toCopy{ - std::min(argLen, static_cast(value->ElementBytes()))}; - std::memcpy(value->OffsetElement(), executionEnvironment.argv[n], toCopy); - - if (argLen > toCopy) { - return ToErrmsg(errmsg, StatValueTooShort); - } + return CopyToDescriptor(*value, arg, argLen, errmsg); } return StatOk; @@ -90,20 +100,45 @@ static std::size_t LengthWithoutTrailingSpaces(const Descriptor &d) { return s + 1; } -std::int64_t RTNAME(EnvVariableLength)( +static const char *GetEnvVariableValue( const Descriptor &name, bool trim_name, const char *sourceFile, int line) { std::size_t nameLength{ trim_name ? LengthWithoutTrailingSpaces(name) : name.ElementBytes()}; if (nameLength == 0) { - return 0; + return nullptr; } Terminator terminator{sourceFile, line}; const char *value{executionEnvironment.GetEnv( name.OffsetElement(), nameLength, terminator)}; + return value; +} + +std::int32_t RTNAME(EnvVariableValue)(const Descriptor &name, + const Descriptor *value, bool trim_name, const Descriptor *errmsg, + const char *sourceFile, int line) { + if (IsValidCharDescriptor(value)) { + FillWithSpaces(value); + } + + const char *rawValue{GetEnvVariableValue(name, trim_name, sourceFile, line)}; + if (!rawValue) { + return ToErrmsg(errmsg, StatMissingEnvVariable); + } + + if (IsValidCharDescriptor(value)) { + return CopyToDescriptor(*value, rawValue, StringLength(rawValue), errmsg); + } + + return StatOk; +} + +std::int64_t RTNAME(EnvVariableLength)( + const Descriptor &name, bool trim_name, const char *sourceFile, int line) { + const char *value{GetEnvVariableValue(name, trim_name, sourceFile, line)}; if (!value) { return 0; } - return std::strlen(value); + return StringLength(value); } } // namespace Fortran::runtime diff --git a/flang/runtime/connection.cpp b/flang/runtime/connection.cpp index 29d3220f43fc3..765ce90520c2a 100644 --- a/flang/runtime/connection.cpp +++ b/flang/runtime/connection.cpp @@ -8,6 +8,7 @@ #include "connection.h" #include "environment.h" +#include "io-stmt.h" #include namespace Fortran::runtime::io { @@ -33,4 +34,21 @@ void ConnectionState::HandleAbsolutePosition(std::int64_t n) { void ConnectionState::HandleRelativePosition(std::int64_t n) { positionInRecord = std::max(leftTabLimit.value_or(0), positionInRecord + n); } + +SavedPosition::SavedPosition(IoStatementState &io) : io_{io} { + ConnectionState &conn{io_.GetConnectionState()}; + saved_ = conn; + conn.pinnedFrame = true; +} + +SavedPosition::~SavedPosition() { + ConnectionState &conn{io_.GetConnectionState()}; + while (conn.currentRecordNumber > saved_.currentRecordNumber) { + io_.BackspaceRecord(); + } + conn.leftTabLimit = saved_.leftTabLimit; + conn.furthestPositionInRecord = saved_.furthestPositionInRecord; + conn.positionInRecord = saved_.positionInRecord; + conn.pinnedFrame = saved_.pinnedFrame; +} } // namespace Fortran::runtime::io diff --git a/flang/runtime/connection.h b/flang/runtime/connection.h index dfeebeb522dcd..0eb2038500b24 100644 --- a/flang/runtime/connection.h +++ b/flang/runtime/connection.h @@ -17,6 +17,8 @@ namespace Fortran::runtime::io { +class IoStatementState; + enum class Direction { Output, Input }; enum class Access { Sequential, Direct, Stream }; @@ -57,40 +59,25 @@ struct ConnectionState : public ConnectionAttributes { // or an end-of-file READ condition on a sequential access file std::optional endfileRecordNumber; + // Mutable modes set at OPEN() that can be overridden in READ/WRITE & FORMAT + MutableModes modes; // BLANK=, DECIMAL=, SIGN=, ROUND=, PAD=, DELIM=, kP + // Set when processing repeated items during list-directed & NAMELIST input // in order to keep a span of records in frame on a non-positionable file, // so that backspacing to the beginning of the repeated item doesn't require // repositioning the external storage medium when that's impossible. - std::optional resumptionRecordNumber; - - // Mutable modes set at OPEN() that can be overridden in READ/WRITE & FORMAT - MutableModes modes; // BLANK=, DECIMAL=, SIGN=, ROUND=, PAD=, DELIM=, kP + bool pinnedFrame{false}; }; // Utility class for capturing and restoring a position in an input stream. class SavedPosition { public: - explicit SavedPosition(ConnectionState &c) - : connection_{c}, positionInRecord_{c.positionInRecord}, - furthestPositionInRecord_{c.furthestPositionInRecord}, - leftTabLimit_{c.leftTabLimit}, previousResumptionRecordNumber_{ - c.resumptionRecordNumber} { - c.resumptionRecordNumber = c.currentRecordNumber; - } - ~SavedPosition() { - connection_.currentRecordNumber = *connection_.resumptionRecordNumber; - connection_.resumptionRecordNumber = previousResumptionRecordNumber_; - connection_.leftTabLimit = leftTabLimit_; - connection_.furthestPositionInRecord = furthestPositionInRecord_; - connection_.positionInRecord = positionInRecord_; - } + explicit SavedPosition(IoStatementState &); + ~SavedPosition(); private: - ConnectionState &connection_; - std::int64_t positionInRecord_; - std::int64_t furthestPositionInRecord_; - std::optional leftTabLimit_; - std::optional previousResumptionRecordNumber_; + IoStatementState &io_; + ConnectionState saved_; }; } // namespace Fortran::runtime::io diff --git a/flang/runtime/descriptor-io.h b/flang/runtime/descriptor-io.h index f0d3deb69d09b..03b7e798af431 100644 --- a/flang/runtime/descriptor-io.h +++ b/flang/runtime/descriptor-io.h @@ -249,7 +249,7 @@ static bool DefaultFormattedComponentIO(IoStatementState &io, StaticDescriptor statDesc; Descriptor &desc{statDesc.descriptor()}; component.CreatePointerDescriptor( - desc, origDescriptor, origSubscripts, terminator); + desc, origDescriptor, terminator, origSubscripts); return DescriptorIO

(io, desc); } else { // Component is itself a descriptor diff --git a/flang/runtime/descriptor.cpp b/flang/runtime/descriptor.cpp index fcb1c977f0d81..a5524bd55b228 100644 --- a/flang/runtime/descriptor.cpp +++ b/flang/runtime/descriptor.cpp @@ -241,6 +241,13 @@ bool Descriptor::EstablishPointerSection(const Descriptor &source, } } raw_.rank = newRank; + if (const auto *sourceAddendum = source.Addendum()) { + if (auto *addendum{Addendum()}) { + *addendum = *sourceAddendum; + } else { + return false; + } + } return CFI_section(&raw_, &source.raw_, lower, upper, stride) == CFI_SUCCESS; } diff --git a/flang/runtime/io-stmt.cpp b/flang/runtime/io-stmt.cpp index 44fc7aefea6e7..784d14496aac0 100644 --- a/flang/runtime/io-stmt.cpp +++ b/flang/runtime/io-stmt.cpp @@ -680,14 +680,8 @@ ListDirectedStatementState::GetNextDataEdit( comma = ';'; } if (remaining_ > 0 && !realPart_) { // "r*c" repetition in progress - RUNTIME_CHECK( - io.GetIoErrorHandler(), connection.resumptionRecordNumber.has_value()); - while (connection.currentRecordNumber > - connection.resumptionRecordNumber.value_or( - connection.currentRecordNumber)) { - io.BackspaceRecord(); - } - connection.HandleAbsolutePosition(repeatPositionInRecord_); + RUNTIME_CHECK(io.GetIoErrorHandler(), repeatPosition_.has_value()); + repeatPosition_.reset(); // restores the saved position if (!imaginaryPart_) { edit.repeat = std::min(remaining_, maxRepeat); auto ch{io.GetCurrentChar()}; @@ -697,8 +691,8 @@ ListDirectedStatementState::GetNextDataEdit( } } remaining_ -= edit.repeat; - if (remaining_ <= 0) { - connection.resumptionRecordNumber.reset(); + if (remaining_ > 0) { + repeatPosition_.emplace(io); } return edit; } @@ -761,11 +755,8 @@ ListDirectedStatementState::GetNextDataEdit( edit.repeat = std::min(r, maxRepeat); remaining_ = r - edit.repeat; if (remaining_ > 0) { - connection.resumptionRecordNumber = connection.currentRecordNumber; - } else { - connection.resumptionRecordNumber.reset(); + repeatPosition_.emplace(io); } - repeatPositionInRecord_ = connection.positionInRecord; } else { // not a repetition count, just an integer value; rewind connection.positionInRecord = start; } diff --git a/flang/runtime/io-stmt.h b/flang/runtime/io-stmt.h index 7e683b146eb1c..0006cab2b2ae1 100644 --- a/flang/runtime/io-stmt.h +++ b/flang/runtime/io-stmt.h @@ -241,7 +241,7 @@ class ListDirectedStatementState private: int remaining_{0}; // for "r*" repetition - std::int64_t repeatPositionInRecord_; + std::optional repeatPosition_; bool eatComma_{false}; // consume comma after previously read item bool hitSlash_{false}; // once '/' is seen, nullify further items bool realPart_{false}; diff --git a/flang/runtime/namelist.cpp b/flang/runtime/namelist.cpp index 03fddd61f24da..0afdee5cfe94e 100644 --- a/flang/runtime/namelist.cpp +++ b/flang/runtime/namelist.cpp @@ -236,7 +236,7 @@ static bool HandleComponent(IoStatementState &io, Descriptor &desc, type{addendum ? addendum->derivedType() : nullptr}) { if (const typeInfo::Component * comp{type->FindDataComponent(compName, std::strlen(compName))}) { - comp->CreatePointerDescriptor(desc, source, nullptr, handler); + comp->CreatePointerDescriptor(desc, source, handler); return true; } else { handler.SignalError( @@ -244,6 +244,10 @@ static bool HandleComponent(IoStatementState &io, Descriptor &desc, "a component of its derived type", compName, name); } + } else if (source.type().IsDerived()) { + handler.Crash("Derived type object '%s' in NAMELIST is missing its " + "derived type information!", + name); } else { handler.SignalError("NAMELIST component reference '%%%s' of input group " "item %s for non-derived type", @@ -320,9 +324,14 @@ bool IONAME(InputNamelist)(Cookie cookie, const NamelistGroup &group) { Descriptor &mutableDescriptor{staticDesc[whichStaticDesc].descriptor()}; whichStaticDesc ^= 1; if (*next == '(') { - HandleSubscripts(io, mutableDescriptor, *useDescriptor, name); + if (!(HandleSubscripts( + io, mutableDescriptor, *useDescriptor, name))) { + return false; + } } else { - HandleComponent(io, mutableDescriptor, *useDescriptor, name); + if (!HandleComponent(io, mutableDescriptor, *useDescriptor, name)) { + return false; + } } useDescriptor = &mutableDescriptor; next = io.GetCurrentChar(); @@ -359,7 +368,7 @@ bool IsNamelistName(IoStatementState &io) { if (io.get_if>()) { ConnectionState &connection{io.GetConnectionState()}; if (connection.modes.inNamelist) { - SavedPosition savedPosition{connection}; + SavedPosition savedPosition{io}; if (auto ch{io.GetNextNonBlank()}) { if (IsLegalIdStart(*ch)) { do { @@ -368,7 +377,7 @@ bool IsNamelistName(IoStatementState &io) { } while (ch && IsLegalIdChar(*ch)); ch = io.GetNextNonBlank(); // TODO: how to deal with NaN(...) ambiguity? - return ch && (ch == '=' || ch == '(' || ch == '%'); + return ch && (*ch == '=' || *ch == '(' || *ch == '%'); } } } diff --git a/flang/runtime/stat.cpp b/flang/runtime/stat.cpp index d28187c1a4264..3ddcb2ba7c8d3 100644 --- a/flang/runtime/stat.cpp +++ b/flang/runtime/stat.cpp @@ -57,6 +57,9 @@ const char *StatErrorString(int stat) { case StatValueTooShort: return "Value too short"; + case StatMissingEnvVariable: + return "Missing environment variable"; + default: return nullptr; } diff --git a/flang/runtime/stat.h b/flang/runtime/stat.h index 5042f4bc4263c..a030784090746 100644 --- a/flang/runtime/stat.h +++ b/flang/runtime/stat.h @@ -39,6 +39,7 @@ enum Stat { StatFailedImage = FORTRAN_RUNTIME_STAT_FAILED_IMAGE, StatLocked = FORTRAN_RUNTIME_STAT_LOCKED, StatLockedOtherImage = FORTRAN_RUNTIME_STAT_LOCKED_OTHER_IMAGE, + StatMissingEnvVariable = FORTRAN_RUNTIME_STAT_MISSING_ENV_VAR, StatStoppedImage = FORTRAN_RUNTIME_STAT_STOPPED_IMAGE, StatUnlocked = FORTRAN_RUNTIME_STAT_UNLOCKED, StatUnlockedFailedImage = FORTRAN_RUNTIME_STAT_UNLOCKED_FAILED_IMAGE, diff --git a/flang/runtime/type-info.cpp b/flang/runtime/type-info.cpp index 8e26a53aac0be..37c3c1f5ab861 100644 --- a/flang/runtime/type-info.cpp +++ b/flang/runtime/type-info.cpp @@ -116,11 +116,15 @@ void Component::EstablishDescriptor(Descriptor &descriptor, } void Component::CreatePointerDescriptor(Descriptor &descriptor, - const Descriptor &container, const SubscriptValue subscripts[], - Terminator &terminator) const { + const Descriptor &container, Terminator &terminator, + const SubscriptValue *subscripts) const { RUNTIME_CHECK(terminator, genre_ == Genre::Data); EstablishDescriptor(descriptor, container, terminator); - descriptor.set_base_addr(container.Element(subscripts) + offset_); + if (subscripts) { + descriptor.set_base_addr(container.Element(subscripts) + offset_); + } else { + descriptor.set_base_addr(container.OffsetElement() + offset_); + } descriptor.raw().attribute = CFI_attribute_pointer; } @@ -167,12 +171,11 @@ static void DumpScalarCharacter( } FILE *DerivedType::Dump(FILE *f) const { - std::fprintf( - f, "DerivedType @ 0x%p:\n", reinterpret_cast(this)); + std::fprintf(f, "DerivedType @ %p:\n", reinterpret_cast(this)); const std::uint64_t *uints{reinterpret_cast(this)}; for (int j{0}; j < 64; ++j) { int offset{j * static_cast(sizeof *uints)}; - std::fprintf(f, " [+%3d](0x%p) 0x%016jx", offset, + std::fprintf(f, " [+%3d](%p) 0x%016jx", offset, reinterpret_cast(&uints[j]), static_cast(uints[j])); if (offset == offsetof(DerivedType, binding_)) { @@ -235,7 +238,7 @@ FILE *DerivedType::Dump(FILE *f) const { } FILE *Component::Dump(FILE *f) const { - std::fprintf(f, "Component @ 0x%p:\n", reinterpret_cast(this)); + std::fprintf(f, "Component @ %p:\n", reinterpret_cast(this)); std::fputs(" name: ", f); DumpScalarCharacter(f, name(), "Component::name"); if (genre_ == Genre::Data) { @@ -252,7 +255,7 @@ FILE *Component::Dump(FILE *f) const { std::fprintf(f, " category %d kind %d rank %d offset 0x%zx\n", category_, kind_, rank_, static_cast(offset_)); if (initialization_) { - std::fprintf(f, " initialization @ 0x%p:\n", + std::fprintf(f, " initialization @ %p:\n", reinterpret_cast(initialization_)); for (int j{0}; j < 128; j += sizeof(std::uint64_t)) { std::fprintf(f, " [%3d] 0x%016jx\n", j, @@ -265,7 +268,7 @@ FILE *Component::Dump(FILE *f) const { FILE *SpecialBinding::Dump(FILE *f) const { std::fprintf( - f, "SpecialBinding @ 0x%p:\n", reinterpret_cast(this)); + f, "SpecialBinding @ %p:\n", reinterpret_cast(this)); switch (which_) { case Which::ScalarAssignment: std::fputs(" ScalarAssignment", f); @@ -297,7 +300,7 @@ FILE *SpecialBinding::Dump(FILE *f) const { break; } std::fprintf(f, " isArgDescriptorSet: 0x%x\n", isArgDescriptorSet_); - std::fprintf(f, " proc: 0x%p\n", reinterpret_cast(proc_)); + std::fprintf(f, " proc: %p\n", reinterpret_cast(proc_)); return f; } diff --git a/flang/runtime/type-info.h b/flang/runtime/type-info.h index 9f9bfb6337ddb..62b8c6c3cbae2 100644 --- a/flang/runtime/type-info.h +++ b/flang/runtime/type-info.h @@ -86,9 +86,10 @@ class Component { void EstablishDescriptor( Descriptor &, const Descriptor &container, Terminator &) const; - // Creates a pointer descriptor from this component description. + // Creates a pointer descriptor from this component description, possibly + // with subscripts void CreatePointerDescriptor(Descriptor &, const Descriptor &container, - const SubscriptValue[], Terminator &) const; + Terminator &, const SubscriptValue * = nullptr) const; FILE *Dump(FILE * = stdout) const; diff --git a/flang/runtime/unit.cpp b/flang/runtime/unit.cpp index 575af15513d70..829b471f3424a 100644 --- a/flang/runtime/unit.cpp +++ b/flang/runtime/unit.cpp @@ -406,30 +406,30 @@ void ExternalFileUnit::FinishReadingRecord(IoErrorHandler &handler) { // avoid bogus crashes in END/ERR circumstances } else if (access == Access::Sequential) { RUNTIME_CHECK(handler, recordLength.has_value()); + recordOffsetInFrame_ += *recordLength; if (isFixedRecordLength && access == Access::Direct) { - frameOffsetInFile_ += recordOffsetInFrame_ + *recordLength; + frameOffsetInFile_ += recordOffsetInFrame_; recordOffsetInFrame_ = 0; } else { RUNTIME_CHECK(handler, isUnformatted.has_value()); + recordLength.reset(); if (isUnformatted.value_or(false)) { // Retain footer in frame for more efficient BACKSPACE - frameOffsetInFile_ += recordOffsetInFrame_ + *recordLength; + frameOffsetInFile_ += recordOffsetInFrame_; recordOffsetInFrame_ = sizeof(std::uint32_t); - recordLength.reset(); } else { // formatted - if (FrameLength() > recordOffsetInFrame_ + *recordLength && - Frame()[recordOffsetInFrame_ + *recordLength] == '\r') { + if (FrameLength() > recordOffsetInFrame_ && + Frame()[recordOffsetInFrame_] == '\r') { ++recordOffsetInFrame_; } if (FrameLength() >= recordOffsetInFrame_ && - Frame()[recordOffsetInFrame_ + *recordLength] == '\n') { + Frame()[recordOffsetInFrame_] == '\n') { ++recordOffsetInFrame_; } - if (!resumptionRecordNumber || mayPosition()) { - frameOffsetInFile_ += recordOffsetInFrame_ + *recordLength; + if (!pinnedFrame || mayPosition()) { + frameOffsetInFile_ += recordOffsetInFrame_; recordOffsetInFrame_ = 0; } - recordLength.reset(); } } } diff --git a/flang/test/Evaluate/folding07.f90 b/flang/test/Evaluate/folding07.f90 index 6848a1e142f3f..9d9d4e9ee6b5d 100644 --- a/flang/test/Evaluate/folding07.f90 +++ b/flang/test/Evaluate/folding07.f90 @@ -16,22 +16,22 @@ module m logical, parameter :: test_bit_size_16 = bs16 == 128 real(2), parameter :: & - eps2 = epsilon(0._2), zeps2 = real(z'1000', kind=2), deps2 = 4.8828125e-4_2 + eps2 = epsilon(0._2), zeps2 = real(z'1400', kind=2), deps2 = 9.765625e-4_2 real(3), parameter :: & - eps3 = epsilon(0._3), zeps3 = real(z'3b80', kind=3), deps3 = 3.90625e-3_3 + eps3 = epsilon(0._3), zeps3 = real(z'3c00', kind=3), deps3 = 7.8135e-3_3 real(4), parameter :: & - eps4 = epsilon(0._4), zeps4 = real(z'33800000', kind=4), & - deps4 = 5.9604644775390625e-8_4 + eps4 = epsilon(0._4), zeps4 = real(z'34000000', kind=4), & + deps4 = 1.1920928955078125e-07_4 real(8), parameter :: & - eps8 = epsilon(0._8), zeps8 = real(z'3ca0000000000000', kind=8), & - deps8 = 1.1102230246251565404236316680908203125e-16_8 + eps8 = epsilon(0._8), zeps8 = real(z'3cb0000000000000', kind=8), & + deps8 = 2.2204460492503130808472633361816406250e-16_8 real(10), parameter :: & - eps10 = epsilon(0._10), zeps10 = real(z'3fbf8000000000000000', kind=10), & - deps10 = 5.42101086242752217003726400434970855712890625e-20_10 + eps10 = epsilon(0._10), zeps10 = real(z'3fc08000000000000000', kind=10), & + deps10 = 1.08420217248550443400745280086994171142578125e-19_10 real(16), parameter :: & eps16 = epsilon(0._16), & - zeps16 = real(z'3f8e0000000000000000000000000000', kind=16), & - deps16 = 9.629649721936179265279889712924636592690508241076940976199693977832794189453125e-35_16 + zeps16 = real(z'3f8f0000000000000000000000000000', kind=16), & + deps16 = 1.9259299443872358530559779425849273185381016482153881952399387955665588378906250e-34_16 logical, parameter :: test_eps2 = eps2 == zeps2 .and. eps2 == deps2 logical, parameter :: test_eps3 = eps3 == zeps3 .and. eps3 == deps3 logical, parameter :: test_eps4 = eps4 == zeps4 .and. eps4 == deps4 diff --git a/flang/test/Examples/omp-atomic.f90 b/flang/test/Examples/omp-atomic.f90 index 7f4927092a32c..dcca34b633a3e 100644 --- a/flang/test/Examples/omp-atomic.f90 +++ b/flang/test/Examples/omp-atomic.f90 @@ -1,5 +1,3 @@ -! Check the flang-omp-report plugin for omp-atomic.f90 - ! REQUIRES: plugins, examples, shell ! RUN: %flang_fc1 -load %llvmshlibdir/flangOmpReport.so -plugin flang-omp-report -fopenmp %s -o - | FileCheck %s @@ -27,29 +25,29 @@ ! CHECK:--- ! CHECK-NEXT:- file: '{{[^"]*}}omp-atomic.f90' -! CHECK-NEXT: line: 11 +! CHECK-NEXT: line: 9 ! CHECK-NEXT: construct: atomic-read ! CHECK-NEXT: clauses: ! CHECK-NEXT: - clause: seq_cst ! CHECK-NEXT: details: '' ! CHECK-NEXT:- file: '{{[^"]*}}omp-atomic.f90' -! CHECK-NEXT: line: 14 +! CHECK-NEXT: line: 12 ! CHECK-NEXT: construct: atomic-write ! CHECK-NEXT: clauses: ! CHECK-NEXT: - clause: seq_cst ! CHECK-NEXT: details: '' ! CHECK-NEXT:- file: '{{[^"]*}}omp-atomic.f90' -! CHECK-NEXT: line: 18 +! CHECK-NEXT: line: 16 ! CHECK-NEXT: construct: atomic-capture ! CHECK-NEXT: clauses: ! CHECK-NEXT: - clause: seq_cst ! CHECK-NEXT: details: '' ! CHECK-NEXT:- file: '{{[^"]*}}omp-atomic.f90' -! CHECK-NEXT: line: 23 +! CHECK-NEXT: line: 21 ! CHECK-NEXT: construct: atomic-atomic ! CHECK-NEXT: clauses: [] ! CHECK-NEXT:- file: '{{[^"]*}}omp-atomic.f90' -! CHECK-NEXT: line: 10 +! CHECK-NEXT: line: 8 ! CHECK-NEXT: construct: parallel ! CHECK-NEXT: clauses: ! CHECK-NEXT: - clause: num_threads diff --git a/flang/test/Examples/omp-declarative-directive.f90 b/flang/test/Examples/omp-declarative-directive.f90 index 2606d0ae6f7ae..632ebcec17885 100644 --- a/flang/test/Examples/omp-declarative-directive.f90 +++ b/flang/test/Examples/omp-declarative-directive.f90 @@ -1,5 +1,3 @@ -! Check the flang-omp-report plugin for omp-declarative-directive.f90 - ! REQUIRES: plugins, examples, shell ! RUN: %flang_fc1 -load %llvmshlibdir/flangOmpReport.so -plugin flang-omp-report -fopenmp %s -o - | FileCheck %s @@ -31,13 +29,13 @@ end module m2 ! CHECK:--- ! CHECK-NEXT:- file: '{{[^"]*}}omp-declarative-directive.f90' -! CHECK-NEXT: line: 13 +! CHECK-NEXT: line: 11 ! CHECK-NEXT: construct: declare simd ! CHECK-NEXT: clauses: ! CHECK-NEXT: - clause: aligned ! CHECK-NEXT: details: a ! CHECK-NEXT:- file: '{{[^"]*}}omp-declarative-directive.f90' -! CHECK-NEXT: line: 23 +! CHECK-NEXT: line: 21 ! CHECK-NEXT: construct: declare target ! CHECK-NEXT: clauses: [] ! CHECK-NEXT:... diff --git a/flang/test/Examples/omp-device-constructs.f90 b/flang/test/Examples/omp-device-constructs.f90 index 08ee1acca0d9b..916f7c936f1ae 100644 --- a/flang/test/Examples/omp-device-constructs.f90 +++ b/flang/test/Examples/omp-device-constructs.f90 @@ -1,5 +1,3 @@ -! Check flang-omp-report --femit-yaml for omp-device-constructs.f90 - ! REQUIRES: plugins, examples, shell !RUN: %flang_fc1 -load %llvmshlibdir/flangOmpReport.so -plugin flang-omp-report -fopenmp %s -o - | FileCheck %s @@ -55,25 +53,25 @@ end program main ! CHECK: --- ! CHECK-NEXT: - file: '{{[^"]*}}omp-device-constructs.f90' -! CHECK-NEXT: line: 18 +! CHECK-NEXT: line: 16 ! CHECK-NEXT: construct: target ! CHECK-NEXT: clauses: ! CHECK-NEXT: - clause: map ! CHECK-NEXT: details: arraya ! CHECK-NEXT: - file: '{{[^"]*}}omp-device-constructs.f90' -! CHECK-NEXT: line: 24 +! CHECK-NEXT: line: 22 ! CHECK-NEXT: construct: target ! CHECK-NEXT: clauses: ! CHECK-NEXT: - clause: device ! CHECK-NEXT: details: '0' ! CHECK-NEXT: - file: '{{[^"]*}}omp-device-constructs.f90' -! CHECK-NEXT: line: 30 +! CHECK-NEXT: line: 28 ! CHECK-NEXT: construct: target ! CHECK-NEXT: clauses: ! CHECK-NEXT: - clause: defaultmap ! CHECK-NEXT: details: 'tofrom:scalar' ! CHECK-NEXT: - file: '{{[^"]*}}omp-device-constructs.f90' -! CHECK-NEXT: line: 36 +! CHECK-NEXT: line: 34 ! CHECK-NEXT: construct: teams ! CHECK-NEXT: clauses: ! CHECK-NEXT: - clause: default @@ -87,13 +85,13 @@ end program main ! CHECK-NEXT: - clause: thread_limit ! CHECK-NEXT: details: '10' ! CHECK-NEXT: - file: '{{[^"]*}}omp-device-constructs.f90' -! CHECK-NEXT: line: 42 +! CHECK-NEXT: line: 40 ! CHECK-NEXT: construct: target ! CHECK-NEXT: clauses: ! CHECK-NEXT: - clause: map ! CHECK-NEXT: details: 'tofrom:a' ! CHECK-NEXT: - file: '{{[^"]*}}omp-device-constructs.f90' -! CHECK-NEXT: line: 48 +! CHECK-NEXT: line: 46 ! CHECK-NEXT: construct: target data ! CHECK-NEXT: clauses: ! CHECK-NEXT: - clause: device diff --git a/flang/test/Examples/omp-nowait.f90 b/flang/test/Examples/omp-nowait.f90 new file mode 100644 index 0000000000000..091a952ae9106 --- /dev/null +++ b/flang/test/Examples/omp-nowait.f90 @@ -0,0 +1,297 @@ +! REQUIRES: plugins, examples, shell + +! RUN: %flang_fc1 -load %llvmshlibdir/flangOmpReport.so -plugin flang-omp-report -fopenmp %s -o - | FileCheck %s + +subroutine sb(n) +implicit none + +integer :: n +integer :: arr(n,n), brr(n,n), crr(n,n) +integer :: arr_single(n),arr_quad(n,n,n,n) +integer :: i,j,k,l,tmp,tmp1,tmp2 + +! CHECK:--- + +!Simple check with nowait +!$omp do +do i = 1, n + arr_single(i) = i +end do +!$omp end do nowait +! CHECK-NEXT:- file: '{{[^"]*}}omp-nowait.f90' +! CHECK-NEXT: line: [[@LINE-6]] +! CHECK-NEXT: construct: do +! CHECK-NEXT: clauses: +! CHECK-NEXT: - clause: nowait +! CHECK-NEXT: details: '' + +!Check for no effects on loop without nowait +!$omp do +do i = 1, n + arr_single(i) = i +end do +!$omp end do +! CHECK-NEXT:- file: '{{[^"]*}}omp-nowait.f90' +! CHECK-NEXT: line: [[@LINE-6]] +! CHECK-NEXT: construct: do +! CHECK-NEXT: clauses: [] + +!Check with another construct nested inside loop with nowait +!$omp parallel shared(arr) +!$omp do +do i = 1, n +!$omp critical + arr_single(i) = i +!$omp end critical +end do +!$omp end do nowait +!$omp end parallel +! CHECK-NEXT:- file: '{{[^"]*}}omp-nowait.f90' +! CHECK-NEXT: line: [[@LINE-7]] +! CHECK-NEXT: construct: critical +! CHECK-NEXT: clauses: [] +! CHECK-NEXT:- file: '{{[^"]*}}omp-nowait.f90' +! CHECK-NEXT: line: [[@LINE-13]] +! CHECK-NEXT: construct: do +! CHECK-NEXT: clauses: +! CHECK-NEXT: - clause: nowait +! CHECK-NEXT: details: '' +! CHECK-NEXT:- file: '{{[^"]*}}omp-nowait.f90' +! CHECK-NEXT: line: [[@LINE-20]] +! CHECK-NEXT: construct: parallel +! CHECK-NEXT: clauses: +! CHECK-NEXT: - clause: shared +! CHECK-NEXT: details: arr + +!Check with back to back loops (one with nowait) inside a parallel construct +!$omp parallel shared(arr) +!$omp do +do i=1,10 + arr(i,j) = i+j +end do +!$omp end do nowait +!$omp do schedule(guided) +do j=1,10 +end do +!$omp end do +!$omp end parallel +! CHECK-NEXT:- file: '{{[^"]*}}omp-nowait.f90' +! CHECK-NEXT: line: [[@LINE-11]] +! CHECK-NEXT: construct: do +! CHECK-NEXT: clauses: +! CHECK-NEXT: - clause: nowait +! CHECK-NEXT: details: '' +! CHECK-NEXT:- file: '{{[^"]*}}omp-nowait.f90' +! CHECK-NEXT: line: [[@LINE-12]] +! CHECK-NEXT: construct: do +! CHECK-NEXT: clauses: +! CHECK-NEXT: - clause: schedule +! CHECK-NEXT: details: guided +! CHECK-NEXT:- file: '{{[^"]*}}omp-nowait.f90' +! CHECK-NEXT: line: [[@LINE-24]] +! CHECK-NEXT: construct: parallel +! CHECK-NEXT: clauses: +! CHECK-NEXT: - clause: shared +! CHECK-NEXT: details: arr + + +!Check nested parallel do loops with a nowait outside +!$omp parallel shared(arr) +!$omp do +do i=1,10 + arr_single(i)=i + !$omp parallel + !$omp do + do j=1,10 + !$omp critical + arr(i,j) = i+j + !$omp end critical + end do + !$omp end do + !$omp end parallel +end do +!$omp end do nowait +!$omp end parallel +! CHECK-NEXT:- file: '{{[^"]*}}omp-nowait.f90' +! CHECK-NEXT: line: [[@LINE-10]] +! CHECK-NEXT: construct: critical +! CHECK-NEXT: clauses: [] +! CHECK-NEXT:- file: '{{[^"]*}}omp-nowait.f90' +! CHECK-NEXT: line: [[@LINE-16]] +! CHECK-NEXT: construct: do +! CHECK-NEXT: clauses: [] +! CHECK-NEXT:- file: '{{[^"]*}}omp-nowait.f90' +! CHECK-NEXT: line: [[@LINE-21]] +! CHECK-NEXT: construct: parallel +! CHECK-NEXT: clauses: [] +! CHECK-NEXT:- file: '{{[^"]*}}omp-nowait.f90' +! CHECK-NEXT: line: [[@LINE-28]] +! CHECK-NEXT: construct: do +! CHECK-NEXT: clauses: +! CHECK-NEXT: - clause: nowait +! CHECK-NEXT: details: '' +! CHECK-NEXT:- file: '{{[^"]*}}omp-nowait.f90' +! CHECK-NEXT: line: [[@LINE-35]] +! CHECK-NEXT: construct: parallel +! CHECK-NEXT: clauses: +! CHECK-NEXT: - clause: shared +! CHECK-NEXT: details: arr + +!Check nested parallel do loops with a nowait inside +!$omp parallel shared(arr) +!$omp do +do i=1,10 + arr_single(i)=i + !$omp parallel + !$omp do + do j=1,10 + !$omp critical + arr(i,j) = i+j + !$omp end critical + end do + !$omp end do nowait + !$omp end parallel +end do +!$omp end do +!$omp end parallel +! CHECK-NEXT:- file: '{{[^"]*}}omp-nowait.f90' +! CHECK-NEXT: line: [[@LINE-10]] +! CHECK-NEXT: construct: critical +! CHECK-NEXT: clauses: [] +! CHECK-NEXT:- file: '{{[^"]*}}omp-nowait.f90' +! CHECK-NEXT: line: [[@LINE-16]] +! CHECK-NEXT: construct: do +! CHECK-NEXT: clauses: +! CHECK-NEXT: - clause: nowait +! CHECK-NEXT: details: '' +! CHECK-NEXT:- file: '{{[^"]*}}omp-nowait.f90' +! CHECK-NEXT: line: [[@LINE-23]] +! CHECK-NEXT: construct: parallel +! CHECK-NEXT: clauses: [] +! CHECK-NEXT:- file: '{{[^"]*}}omp-nowait.f90' +! CHECK-NEXT: line: [[@LINE-30]] +! CHECK-NEXT: construct: do +! CHECK-NEXT: clauses: [] +! CHECK-NEXT:- file: '{{[^"]*}}omp-nowait.f90' +! CHECK-NEXT: line: [[@LINE-35]] +! CHECK-NEXT: construct: parallel +! CHECK-NEXT: clauses: +! CHECK-NEXT: - clause: shared +! CHECK-NEXT: details: arr + +!Check nested parallel do loops with a nowait inside +!$omp parallel +!$omp do +do i=1,10 + arr_single(i)=i + !$omp parallel shared(arr_quad) + !$omp do schedule(dynamic) + do j=1,10 + !$omp parallel + !$omp do + do k=1,10 + !$omp parallel + !$omp do + do l=1,10 + arr_quad(i,j,k,l) = i+j+k+l + end do + !$omp end do nowait + !$omp end parallel + end do + !$omp end do + !$omp end parallel + end do + !$omp end do nowait + !$omp end parallel +end do +!$omp end do +!$omp end parallel +! CHECK-NEXT:- file: '{{[^"]*}}omp-nowait.f90' +! CHECK-NEXT: line: [[@LINE-16]] +! CHECK-NEXT: construct: do +! CHECK-NEXT: clauses: +! CHECK-NEXT: - clause: nowait +! CHECK-NEXT: details: '' +! CHECK-NEXT:- file: '{{[^"]*}}omp-nowait.f90' +! CHECK-NEXT: line: [[@LINE-23]] +! CHECK-NEXT: construct: parallel +! CHECK-NEXT: clauses: [] +! CHECK-NEXT:- file: '{{[^"]*}}omp-nowait.f90' +! CHECK-NEXT: line: [[@LINE-29]] +! CHECK-NEXT: construct: do +! CHECK-NEXT: clauses: [] +! CHECK-NEXT:- file: '{{[^"]*}}omp-nowait.f90' +! CHECK-NEXT: line: [[@LINE-34]] +! CHECK-NEXT: construct: parallel +! CHECK-NEXT: clauses: [] +! CHECK-NEXT:- file: '{{[^"]*}}omp-nowait.f90' +! CHECK-NEXT: line: [[@LINE-40]] +! CHECK-NEXT: construct: do +! CHECK-NEXT: clauses: +! CHECK-NEXT: - clause: nowait +! CHECK-NEXT: details: '' +! CHECK-NEXT: - clause: schedule +! CHECK-NEXT: details: dynamic +! CHECK-NEXT:- file: '{{[^"]*}}omp-nowait.f90' +! CHECK-NEXT: line: [[@LINE-49]] +! CHECK-NEXT: construct: parallel +! CHECK-NEXT: clauses: +! CHECK-NEXT: - clause: shared +! CHECK-NEXT: details: arr_quad +! CHECK-NEXT:- file: '{{[^"]*}}omp-nowait.f90' +! CHECK-NEXT: line: [[@LINE-58]] +! CHECK-NEXT: construct: do +! CHECK-NEXT: clauses: [] +! CHECK-NEXT:- file: '{{[^"]*}}omp-nowait.f90' +! CHECK-NEXT: line: [[@LINE-63]] +! CHECK-NEXT: construct: parallel +! CHECK-NEXT: clauses: [] + + +!Check a do simd with nowait +!$omp do simd private(tmp) +do j = 1,n + do i = 1,n + tmp = arr(i,j) + brr(i,j) + crr(i,j) = tmp + end do +end do +!$omp end do simd nowait +! CHECK-NEXT:- file: '{{[^"]*}}omp-nowait.f90' +! CHECK-NEXT: line: [[@LINE-9]] +! CHECK-NEXT: construct: do simd +! CHECK-NEXT: clauses: +! CHECK-NEXT: - clause: nowait +! CHECK-NEXT: details: '' +! CHECK-NEXT: - clause: private +! CHECK-NEXT: details: tmp + + +!test nowait on non-do construct +!$omp parallel +!$omp single +tmp1 = i+j +!$omp end single + +!$omp single +tmp2 = i-j +!$omp end single nowait +!$omp end parallel +! CHECK-NEXT:- file: '{{[^"]*}}omp-nowait.f90' +! CHECK-NEXT: line: [[@LINE-9]] +! CHECK-NEXT: construct: single +! CHECK-NEXT: clauses: [] +! CHECK-NEXT:- file: '{{[^"]*}}omp-nowait.f90' +! CHECK-NEXT: line: [[@LINE-9]] +! CHECK-NEXT: construct: single +! CHECK-NEXT: clauses: +! CHECK-NEXT: - clause: nowait +! CHECK-NEXT: details: '' +! CHECK-NEXT:- file: '{{[^"]*}}omp-nowait.f90' +! CHECK-NEXT: line: [[@LINE-20]] +! CHECK-NEXT: construct: parallel +! CHECK-NEXT: clauses: [] + +end subroutine + +! CHECK-NEXT:... diff --git a/flang/test/Fir/cg-ops.fir b/flang/test/Fir/cg-ops.fir index ba28d7faf8cca..6285937ce08c5 100644 --- a/flang/test/Fir/cg-ops.fir +++ b/flang/test/Fir/cg-ops.fir @@ -1,4 +1,4 @@ -// RUN: fir-opt --pass-pipeline="builtin.func(cg-rewrite),fir.global(cg-rewrite),cse" %s | FileCheck %s +// RUN: fir-opt --split-input-file --pass-pipeline="cg-rewrite,cse" %s | FileCheck %s // CHECK-LABEL: func @codegen( // CHECK-SAME: %[[arg:.*]]: !fir @@ -16,6 +16,8 @@ func @codegen(%addr : !fir.ref>) { return } +// ----- + // CHECK-LABEL: fir.global @box_global fir.global @box_global : !fir.box> { // CHECK: %[[arr:.*]] = fir.zero_bits !fir.ref @@ -28,3 +30,37 @@ fir.global @box_global : !fir.box> { %3 = fir.embox %arr (%1) [%2] : (!fir.ref>, !fir.shapeshift<1>, !fir.slice<1>) -> !fir.box> fir.has_value %3 : !fir.box> } + +// ----- + +// fir.embox with slice with substr + +// CHECK-LABEL: func @codegen( +// CHECK-SAME: %[[arg:.*]]: !fir +func @codegen(%addr : !fir.ref>) { + // CHECK: %[[zero:.*]] = arith.constant 0 : index + %0 = arith.constant 0 : index + %1 = fir.shape_shift %0, %0 : (index, index) -> !fir.shapeshift<1> + %2 = fir.slice %0, %0, %0 substr %0, %0: (index, index, index, index, index) -> !fir.slice<1> + // CHECK: %[[box:.*]] = fircg.ext_embox %[[arg]](%[[zero]]) origin %[[zero]][%[[zero]], %[[zero]], %[[zero]]] substr %[[zero]], %[[zero]] : (!fir.ref>, index, index, index, index, index, index, index) -> !fir.box> + %3 = fir.embox %addr (%1) [%2] : (!fir.ref>, !fir.shapeshift<1>, !fir.slice<1>) -> !fir.box> + return +} + +// ----- + +// fir.rebox with slice with substr + +// CHECK-LABEL: func @codegen( +// CHECK-SAME: %[[arg:.*]]: !fir +func @codegen(%addr : !fir.box>) { + %c0 = arith.constant 0 : index + %c1 = arith.constant 1 : index + %c10 = arith.constant 10 : index + %0 = fir.slice %c10, %c1, %c1 substr %c1, %c1: (index, index, index, index, index) -> !fir.slice<1> + %1 = fir.shift %c0 : (index) -> !fir.shift<1> + %2 = fir.rebox %addr(%1) [%0] : (!fir.box>, !fir.shift<1>, !fir.slice<1>) -> !fir.box> + return +} + +// CHECK: %{{.*}} = fircg.ext_rebox %[[arg]] origin %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}] substr %{{.*}}, %{{.*}} : (!fir.box>, index, index, index, index, index, index) -> !fir.box> diff --git a/flang/test/Fir/convert-to-llvm-invalid.fir b/flang/test/Fir/convert-to-llvm-invalid.fir new file mode 100644 index 0000000000000..3ee008860ba7e --- /dev/null +++ b/flang/test/Fir/convert-to-llvm-invalid.fir @@ -0,0 +1,10 @@ +// Test FIR to LLVM IR conversion invalid cases and diagnostics. + +// RUN: fir-opt --split-input-file --fir-to-llvm-ir --verify-diagnostics %s + +func @zero_aggregate() { + // expected-error@+1{{failed to legalize operation 'fir.zero_bits'}} + %a = fir.zero_bits !fir.array<10xf32> + return +} + diff --git a/flang/test/Fir/convert-to-llvm.fir b/flang/test/Fir/convert-to-llvm.fir index 9e1b02590f193..9d8b9bb1a7a74 100644 --- a/flang/test/Fir/convert-to-llvm.fir +++ b/flang/test/Fir/convert-to-llvm.fir @@ -81,3 +81,298 @@ fir.global internal @_QEmultiarray : !fir.array<32x32xi32> { // CHECK: %[[CST:.*]] = llvm.mlir.constant(dense<1> : vector<32x32xi32>) : !llvm.array<32 x array<32 x i32>> // CHECK: llvm.return %[[CST]] : !llvm.array<32 x array<32 x i32>> // CHECK: } + +// ----- + +// Test global with insert_on_range operation not covering the full array +// in initializer region. + +fir.global internal @_QEmultiarray : !fir.array<32xi32> { + %c0_i32 = arith.constant 1 : i32 + %0 = fir.undefined !fir.array<32xi32> + %2 = fir.insert_on_range %0, %c0_i32, [5 : index, 31 : index] : (!fir.array<32xi32>, i32) -> !fir.array<32xi32> + fir.has_value %2 : !fir.array<32xi32> +} + +// CHECK: llvm.mlir.global internal @_QEmultiarray() : !llvm.array<32 x i32> { +// CHECK: %[[CST:.*]] = llvm.mlir.constant(1 : i32) : i32 +// CHECK: %{{.*}} = llvm.mlir.undef : !llvm.array<32 x i32> +// CHECK: %{{.*}} = llvm.insertvalue %[[CST]], %{{.*}}[5] : !llvm.array<32 x i32> +// CHECK-COUNT-24: %{{.*}} = llvm.insertvalue %[[CST]], %{{.*}}[{{.*}}] : !llvm.array<32 x i32> +// CHECK: %{{.*}} = llvm.insertvalue %[[CST]], %{{.*}}[31] : !llvm.array<32 x i32> +// CHECK-NOT: llvm.insertvalue +// CHECK: llvm.return %{{.*}} : !llvm.array<32 x i32> +// CHECK: } + +// ----- + +// Test fir.zero_bits operation with LLVM ptr type + +func @zero_test_ptr() { + %z = fir.zero_bits !llvm.ptr + return +} + +// CHECK: %{{.*}} = llvm.mlir.null : !llvm.ptr +// CHECK-NOT: fir.zero_bits + +// ----- + +// Test fir.zero_bits operation with integer type. + +func @zero_test_integer() { + %z0 = fir.zero_bits i8 + %z1 = fir.zero_bits i16 + %z2 = fir.zero_bits i32 + %z3 = fir.zero_bits i64 + return +} + +// CHECK: %{{.*}} = llvm.mlir.constant(0 : i8) : i8 +// CHECK: %{{.*}} = llvm.mlir.constant(0 : i16) : i16 +// CHECK: %{{.*}} = llvm.mlir.constant(0 : i32) : i32 +// CHECK: %{{.*}} = llvm.mlir.constant(0 : i64) : i64 +// CHECK-NOT: fir.zero_bits + +// ----- + +// Test fir.zero_bits operation with floating points types. + +func @zero_test_float() { + %z0 = fir.zero_bits f16 + %z1 = fir.zero_bits bf16 + %z2 = fir.zero_bits f32 + %z3 = fir.zero_bits f64 + %z4 = fir.zero_bits f80 + %z5 = fir.zero_bits f128 + return +} + +// CHECK: %{{.*}} = llvm.mlir.constant(0.000000e+00 : f16) : f16 +// CHECK: %{{.*}} = llvm.mlir.constant(0.000000e+00 : bf16) : bf16 +// CHECK: %{{.*}} = llvm.mlir.constant(0.000000e+00 : f32) : f32 +// CHECK: %{{.*}} = llvm.mlir.constant(0.000000e+00 : f64) : f64 +// CHECK: %{{.*}} = llvm.mlir.constant(0.000000e+00 : f80) : f80 +// CHECK: %{{.*}} = llvm.mlir.constant(0.000000e+00 : f128) : f128 +// CHECK-NOT: fir.zero_bits + +// ----- + +// Verify that fir.unreachable is transformed to llvm.unreachable + +// CHECK: llvm.func @test_unreachable() { +// CHECK-NEXT: llvm.unreachable +// CHECK-NEXT: } + +func @test_unreachable() { + fir.unreachable +} + +// ----- + +// Test `fir.select` operation conversion pattern. +// Check that the if-then-else ladder is correctly constructed and that we +// branch to the correct block. + +func @select(%arg : index, %arg2 : i32) -> i32 { + %0 = arith.constant 1 : i32 + %1 = arith.constant 2 : i32 + %2 = arith.constant 3 : i32 + %3 = arith.constant 4 : i32 + fir.select %arg:index [ 1, ^bb1(%0:i32), + 2, ^bb2(%2,%arg,%arg2:i32,index,i32), + 3, ^bb3(%arg2,%2:i32,i32), + 4, ^bb4(%1:i32), + unit, ^bb5 ] + ^bb1(%a : i32) : + return %a : i32 + ^bb2(%b : i32, %b2 : index, %b3:i32) : + %castidx = arith.index_cast %b2 : index to i32 + %4 = arith.addi %b, %castidx : i32 + %5 = arith.addi %4, %b3 : i32 + return %5 : i32 + ^bb3(%c:i32, %c2:i32) : + %6 = arith.addi %c, %c2 : i32 + return %6 : i32 + ^bb4(%d : i32) : + return %d : i32 + ^bb5 : + %zero = arith.constant 0 : i32 + return %zero : i32 +} + +// CHECK-LABEL: func @select( +// CHECK-SAME: %[[SELECTVALUE:.*]]: [[IDX:.*]], +// CHECK-SAME: %[[ARG1:.*]]: i32) +// CHECK: %[[C0:.*]] = llvm.mlir.constant(1 : i32) : i32 +// CHECK: %[[C1:.*]] = llvm.mlir.constant(2 : i32) : i32 +// CHECK: %[[C2:.*]] = llvm.mlir.constant(3 : i32) : i32 +// CHECK: %[[SELECTOR:.*]] = llvm.trunc %[[SELECTVALUE]] : i{{.*}} to i32 +// CHECK: llvm.switch %[[SELECTOR]], ^bb5 [ +// CHECK: 1: ^bb1(%[[C0]] : i32), +// CHECK: 2: ^bb2(%[[C2]], %[[SELECTVALUE]], %[[ARG1]] : i32, [[IDX]], i32), +// CHECK: 3: ^bb3(%[[ARG1]], %[[C2]] : i32, i32), +// CHECK: 4: ^bb4(%[[C1]] : i32) +// CHECK: ] + +// ----- + +// Test `fir.select_rank` operation conversion pattern. +// Check that the if-then-else ladder is correctly constructed and that we +// branch to the correct block. + +func @select_rank(%arg : i32, %arg2 : i32) -> i32 { + %0 = arith.constant 1 : i32 + %1 = arith.constant 2 : i32 + %2 = arith.constant 3 : i32 + %3 = arith.constant 4 : i32 + fir.select_rank %arg:i32 [ 1, ^bb1(%0:i32), + 2, ^bb2(%2,%arg,%arg2:i32,i32,i32), + 3, ^bb3(%arg2,%2:i32,i32), + 4, ^bb4(%1:i32), + unit, ^bb5 ] + ^bb1(%a : i32) : + return %a : i32 + ^bb2(%b : i32, %b2 : i32, %b3:i32) : + %4 = arith.addi %b, %b2 : i32 + %5 = arith.addi %4, %b3 : i32 + return %5 : i32 + ^bb3(%c:i32, %c2:i32) : + %6 = arith.addi %c, %c2 : i32 + return %6 : i32 + ^bb4(%d : i32) : + return %d : i32 + ^bb5 : + %zero = arith.constant 0 : i32 + return %zero : i32 +} + +// CHECK-LABEL: func @select_rank( +// CHECK-SAME: %[[SELECTVALUE:.*]]: i32, +// CHECK-SAME: %[[ARG1:.*]]: i32) +// CHECK: %[[C0:.*]] = llvm.mlir.constant(1 : i32) : i32 +// CHECK: %[[C1:.*]] = llvm.mlir.constant(2 : i32) : i32 +// CHECK: %[[C2:.*]] = llvm.mlir.constant(3 : i32) : i32 +// CHECK: llvm.switch %[[SELECTVALUE]], ^bb5 [ +// CHECK: 1: ^bb1(%[[C0]] : i32), +// CHECK: 2: ^bb2(%[[C2]], %[[SELECTVALUE]], %[[ARG1]] : i32, i32, i32), +// CHECK: 3: ^bb3(%[[ARG1]], %[[C2]] : i32, i32), +// CHECK: 4: ^bb4(%[[C1]] : i32) +// CHECK: ] + +// ----- + +// Test fir.extract_value operation conversion with derived type. + +func @extract_derived_type() -> f32 { + %0 = fir.undefined !fir.type + %1 = fir.extract_value %0, ["f", !fir.type] : (!fir.type) -> f32 + return %1 : f32 +} + +// CHECK-LABEL: llvm.func @extract_derived_type +// CHECK: %[[STRUCT:.*]] = llvm.mlir.undef : !llvm.struct<"derived", (f32)> +// CHECK: %[[VALUE:.*]] = llvm.extractvalue %[[STRUCT]][0 : i32] : !llvm.struct<"derived", (f32)> +// CHECK: llvm.return %[[VALUE]] : f32 + +// ----- + +// Test fir.extract_value operation conversion with a multi-dimensional array +// of tuple. + +func @extract_array(%a : !fir.array<10x10xtuple>) -> f32 { + %0 = fir.extract_value %a, [5 : index, 4 : index, 1 : index] : (!fir.array<10x10xtuple>) -> f32 + return %0 : f32 +} + +// CHECK-LABEL: llvm.func @extract_array( +// CHECK-SAME: %[[ARR:.*]]: !llvm.array<10 x array<10 x struct<(i32, f32)>>> +// CHECK: %[[VALUE:.*]] = llvm.extractvalue %[[ARR]][4 : index, 5 : index, 1 : index] : !llvm.array<10 x array<10 x struct<(i32, f32)>>> +// CHECK: llvm.return %[[VALUE]] : f32 + +// ----- + +// Test fir.insert_value operation conversion with a multi-dimensional array +// of tuple. + +func @extract_array(%a : !fir.array<10x10xtuple>) { + %f = arith.constant 2.0 : f32 + %i = arith.constant 1 : i32 + %0 = fir.insert_value %a, %i, [5 : index, 4 : index, 0 : index] : (!fir.array<10x10xtuple>, i32) -> !fir.array<10x10xtuple> + %1 = fir.insert_value %a, %f, [5 : index, 4 : index, 1 : index] : (!fir.array<10x10xtuple>, f32) -> !fir.array<10x10xtuple> + return +} + +// CHECK-LABEL: llvm.func @extract_array( +// CHECK-SAME: %[[ARR:.*]]: !llvm.array<10 x array<10 x struct<(i32, f32)>>> +// CHECK: %{{.*}} = llvm.insertvalue %{{.*}}, %[[ARR]][4 : index, 5 : index, 0 : index] : !llvm.array<10 x array<10 x struct<(i32, f32)>>> +// CHECK: %{{.*}} = llvm.insertvalue %{{.*}}, %[[ARR]][4 : index, 5 : index, 1 : index] : !llvm.array<10 x array<10 x struct<(i32, f32)>>> +// CHECK: llvm.return + +// ----- + +// Test fir.insert_value operation conversion with derived type. + +func @insert_tuple(%a : tuple) { + %f = arith.constant 2.0 : f32 + %1 = fir.insert_value %a, %f, [1 : index] : (tuple, f32) -> tuple + return +} + +// CHECK-LABEL: func @insert_tuple( +// CHECK-SAME: %[[TUPLE:.*]]: !llvm.struct<(i32, f32)> +// CHECK: %{{.*}} = llvm.insertvalue %{{.*}}, %[[TUPLE]][1 : index] : !llvm.struct<(i32, f32)> +// CHECK: llvm.return + +// ----- +// Test `fir.call` -> `llvm.call` conversion for functions that take no arguments +// and return nothing + +func @dummy_basic() { + return +} + +func @test_call_basic() { + fir.call @dummy_basic() : () -> () + return +} + +// CHECK-LABEL: func @test_call_basic() { +// CHECK-NEXT: llvm.call @dummy_basic() : () -> () +// CHECK-NEXT: return +// CHECK-NEXT: } + +// Test `fir.call` -> `llvm.call` conversion for functions that take one +// argument and return nothing + +func @dummy_with_arg(%arg0 : i32) { + return +} + +func @test_call_with_arg(%arg0 : i32) { + fir.call @dummy_with_arg(%arg0) : (i32) -> () + return +} + +// CHECK-LABEL: llvm.func @test_call_with_arg(%arg0: i32) { +// CHECK-NEXT: llvm.call @dummy_with_arg(%arg0) : (i32) -> () +// CHECK-NEXT: llvm.return +// CHECK-NEXT: } + +// Test `fir.call` -> `llvm.call` conversion for functions that take no +// arguments, but return a value + +func @dummy_return_val() -> i32 { + %1 = arith.constant 123 : i32 + return %1 : i32 +} + +func @test_call_return_val() -> i32 { + %1 = fir.call @dummy_return_val() : () -> (i32) + return %1 : i32 +} + +// CHECK-LABEL: llvm.func @test_call_return_val() -> i32 { +// CHECK-NEXT: %0 = llvm.call @dummy_return_val() : () -> i32 +// CHECK-NEXT: llvm.return %0 : i32 +// CHECK-NEXT: } diff --git a/flang/test/Fir/invalid.fir b/flang/test/Fir/invalid.fir index d25322245c598..8bc2ac6793e8c 100644 --- a/flang/test/Fir/invalid.fir +++ b/flang/test/Fir/invalid.fir @@ -464,6 +464,26 @@ fir.global internal @_QEmultiarray : !fir.array<32x32xi32> { // ----- +fir.global internal @_QEmultiarray : !fir.array { + %c0_i32 = arith.constant 1 : i32 + %0 = fir.undefined !fir.array + // expected-error@+1 {{'fir.insert_on_range' op must have constant shape and size}} + %2 = fir.insert_on_range %0, %c0_i32, [0 : index, 10 : index] : (!fir.array, i32) -> !fir.array + fir.has_value %2 : !fir.array +} + +// ----- + +fir.global internal @_QEmultiarray : !fir.array<*:i32> { + %c0_i32 = arith.constant 1 : i32 + %0 = fir.undefined !fir.array<*:i32> + // expected-error@+1 {{'fir.insert_on_range' op must have constant shape and size}} + %2 = fir.insert_on_range %0, %c0_i32, [0 : index, 10 : index] : (!fir.array<*:i32>, i32) -> !fir.array<*:i32> + fir.has_value %2 : !fir.array<*:i32> +} + +// ----- + func @bad_save_result(%buffer : !fir.ref>, %n :index) { %res = fir.call @array_func() : () -> !fir.array %shape = fir.shape %n : (index) -> !fir.shape<1> diff --git a/flang/test/Fir/types-to-llvm.fir b/flang/test/Fir/types-to-llvm.fir new file mode 100644 index 0000000000000..409e6dae043ca --- /dev/null +++ b/flang/test/Fir/types-to-llvm.fir @@ -0,0 +1,74 @@ +// Test FIR types conversion. + +// RUN: fir-opt --split-input-file --fir-to-llvm-ir %s | FileCheck %s + + +// Test sequence types `!fir.array` + +func private @foo0(%arg0: !fir.array<10x10xi64>) +// CHECK-LABEL: foo0 +// CHECK-SAME: !llvm.array<10 x array<10 x i64>> +func private @foo1(%arg0: !fir.array) +// CHECK-LABEL: foo1 +// CHECK-SAME: !llvm.ptr +func private @foo2(%arg0: !fir.array) +// CHECK-LABEL: foo2 +// CHECK-SAME: !llvm.ptr +func private @foo3(%arg0: !fir.array<*:i32>) +// CHECK-LABEL: foo3 +// CHECK-SAME: !llvm.ptr + +// ----- + +// Test reference types `!fir.ref` + +func private @foo0(%arg0: !fir.ref) +// CHECK-LABEL: foo0 +// CHECK-SAME: !llvm.ptr +func private @foo1(%arg0: !fir.ref>) +// CHECK-LABEL: foo1 +// CHECK-SAME: !llvm.ptr> + +// ----- + +// Test box types `!fir.box` + +func private @foo0(%arg0: !fir.box>) +// CHECK-LABEL: foo0 +// CHECK-SAME: !llvm.ptr, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, array<1 x array<3 x i{{.*}}>>)>> + +func private @foo1(%arg0: !fir.box>) +// CHECK-LABEL: foo1 +// CHECK-SAME: !llvm.ptr>, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, array<1 x array<3 x i{{.*}}>>)>> + +func private @foo2(%arg0: !fir.box>) +// CHECK-LABEL: foo2 +// CHECK-SAME: !llvm.ptr, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}})>> + +func private @foo3(%arg0: !fir.box>) +// CHECK-LABEL: foo3 +// CHECK-SAME: !llvm.ptr>, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, ptr, array<1 x i{{.*}}>)>> + +// ----- + +// Test `!fir.logical` conversion. + +func private @foo0(%arg0: !fir.logical<1>) +// CHECK-LABEL: foo0 +// CHECK-SAME: i8 + +func private @foo1(%arg0: !fir.logical<2>) +// CHECK-LABEL: foo1 +// CHECK-SAME: i16 + +func private @foo2(%arg0: !fir.logical<4>) +// CHECK-LABEL: foo2 +// CHECK-SAME: i32 + +func private @foo3(%arg0: !fir.logical<8>) +// CHECK-LABEL: foo3 +// CHECK-SAME: i64 + +func private @foo4(%arg0: !fir.logical<16>) +// CHECK-LABEL: foo4 +// CHECK-SAME: i128 diff --git a/flang/test/Semantics/label05.f90 b/flang/test/Semantics/label05.f90 index 7084574a5790c..51fd90263911c 100644 --- a/flang/test/Semantics/label05.f90 +++ b/flang/test/Semantics/label05.f90 @@ -1,12 +1,13 @@ ! RUN: not %flang_fc1 -fdebug-unparse-with-symbols %s 2>&1 | FileCheck %s ! CHECK: Label '50' was not found +! CHECK-NOT: error: Label '55' is in a construct that prevents its use as a branch target here ! CHECK: Label '55' is in a construct that prevents its use as a branch target here ! CHECK: Label '70' is not a branch target ! CHECK: Control flow use of '70' -! CHECK: Label '80' is in a construct that prevents its use as a branch target here -! CHECK: Label '90' is in a construct that prevents its use as a branch target here -! CHECK: Label '91' is in a construct that prevents its use as a branch target here -! CHECK: Label '92' is in a construct that prevents its use as a branch target here +! CHECK: error: Label '80' is in a construct that prevents its use as a branch target here +! CHECK: error: Label '90' is in a construct that prevents its use as a branch target here +! CHECK: error: Label '91' is in a construct that prevents its use as a branch target here +! CHECK: error: Label '92' is in a construct that prevents its use as a branch target here subroutine sub00(a,b,n,m) real a(n,m) diff --git a/flang/test/Semantics/label14.f90 b/flang/test/Semantics/label14.f90 index f310913f26283..7f03d829d88aa 100644 --- a/flang/test/Semantics/label14.f90 +++ b/flang/test/Semantics/label14.f90 @@ -1,8 +1,8 @@ ! Tests implemented for this standard -! 11.1.4 - 4 It is permissible to branch to and end-block-stmt only withinh its +! 11.1.4 - 4 It is permissible to branch to an end-block-stmt only within its ! Block Construct -! RUN: %flang_fc1 -fsyntax-only %s 2>&1 | FileCheck %s +! RUN: not %flang_fc1 -fsyntax-only %s 2>&1 | FileCheck %s ! CHECK: Label '20' is in a construct that prevents its use as a branch target here subroutine s1 diff --git a/flang/test/Semantics/omp-private-is-pointer-check.f90 b/flang/test/Semantics/omp-private-is-pointer-check.f90 new file mode 100644 index 0000000000000..c2b7244e46485 --- /dev/null +++ b/flang/test/Semantics/omp-private-is-pointer-check.f90 @@ -0,0 +1,10 @@ +! RUN: %flang_fc1 -fopenmp -fsyntax-only %s + +subroutine s + integer, pointer :: p + integer, target :: t + + !$omp parallel private(p) + p=>t + !$omp end parallel +end subroutine diff --git a/flang/unittests/Runtime/CommandTest.cpp b/flang/unittests/Runtime/CommandTest.cpp index cfbbd8a53383c..4ff07d88953d5 100644 --- a/flang/unittests/Runtime/CommandTest.cpp +++ b/flang/unittests/Runtime/CommandTest.cpp @@ -53,17 +53,66 @@ class CommandFixture : public ::testing::Test { const Descriptor *value, const std::string &expected) const { EXPECT_EQ(std::strncmp(value->OffsetElement(), expected.c_str(), value->ElementBytes()), - 0); + 0) + << "expected: " << expected << "\n" + << "value: " + << std::string{value->OffsetElement(), value->ElementBytes()}; } - void CheckArgumentValue(int n, const char *argv) const { + template + void CheckValue(RuntimeCall F, const char *expectedValue, + std::int32_t expectedStatus = 0, + const char *expectedErrMsg = "shouldn't change") const { OwningPtr value{CreateEmptyCharDescriptor()}; ASSERT_NE(value, nullptr); - std::string expected{GetPaddedStr(argv, value->ElementBytes())}; + OwningPtr errmsg{CharDescriptor(expectedErrMsg)}; - EXPECT_EQ(RTNAME(ArgumentValue)(n, value.get(), nullptr), 0); - CheckDescriptorEqStr(value.get(), expected); + std::string expectedValueStr{ + GetPaddedStr(expectedValue, value->ElementBytes())}; + + EXPECT_EQ(F(value.get(), errmsg.get()), expectedStatus); + CheckDescriptorEqStr(value.get(), expectedValueStr); + CheckDescriptorEqStr(errmsg.get(), expectedErrMsg); + } + + void CheckArgumentValue(const char *expectedValue, int n) const { + SCOPED_TRACE(n); + SCOPED_TRACE("Checking argument:"); + CheckValue( + [&](const Descriptor *value, const Descriptor *errmsg) { + return RTNAME(ArgumentValue)(n, value, errmsg); + }, + expectedValue); + } + + void CheckEnvVarValue( + const char *expectedValue, const char *name, bool trimName = true) const { + SCOPED_TRACE(name); + SCOPED_TRACE("Checking environment variable"); + CheckValue( + [&](const Descriptor *value, const Descriptor *errmsg) { + return RTNAME(EnvVariableValue)(*CharDescriptor(name), value, + trimName, errmsg, /*sourceFile=*/nullptr, /*line=*/0); + }, + expectedValue); + } + + void CheckMissingEnvVarValue(const char *name, bool trimName = true) const { + SCOPED_TRACE(name); + SCOPED_TRACE("Checking missing environment variable"); + + ASSERT_EQ(nullptr, std::getenv(name)) + << "Environment variable " << name << " not expected to exist"; + + OwningPtr nameDescriptor{CharDescriptor(name)}; + EXPECT_EQ(0, RTNAME(EnvVariableLength)(*nameDescriptor, trimName)); + CheckValue( + [&](const Descriptor *value, const Descriptor *errmsg) { + return RTNAME(EnvVariableValue)(*nameDescriptor, value, trimName, + errmsg, /*sourceFile=*/nullptr, /*line=*/0); + }, + "", 1, "Missing environment variable"); } void CheckMissingArgumentValue(int n, const char *errStr = nullptr) const { @@ -99,7 +148,7 @@ TEST_F(ZeroArguments, ArgumentLength) { } TEST_F(ZeroArguments, ArgumentValue) { - CheckArgumentValue(0, commandOnlyArgv[0]); + CheckArgumentValue(commandOnlyArgv[0], 0); } static const char *oneArgArgv[]{"aProgram", "anArgumentOfLength20"}; @@ -118,8 +167,8 @@ TEST_F(OneArgument, ArgumentLength) { } TEST_F(OneArgument, ArgumentValue) { - CheckArgumentValue(0, oneArgArgv[0]); - CheckArgumentValue(1, oneArgArgv[1]); + CheckArgumentValue(oneArgArgv[0], 0); + CheckArgumentValue(oneArgArgv[1], 1); } static const char *severalArgsArgv[]{ @@ -146,10 +195,10 @@ TEST_F(SeveralArguments, ArgumentLength) { } TEST_F(SeveralArguments, ArgumentValue) { - CheckArgumentValue(0, severalArgsArgv[0]); - CheckArgumentValue(1, severalArgsArgv[1]); - CheckArgumentValue(3, severalArgsArgv[3]); - CheckArgumentValue(4, severalArgsArgv[4]); + CheckArgumentValue(severalArgsArgv[0], 0); + CheckArgumentValue(severalArgsArgv[1], 1); + CheckArgumentValue(severalArgsArgv[3], 3); + CheckArgumentValue(severalArgsArgv[4], 4); } TEST_F(SeveralArguments, NoArgumentValue) { @@ -192,6 +241,7 @@ class EnvironmentVariables : public CommandFixture { protected: EnvironmentVariables() : CommandFixture(0, nullptr) { SetEnv("NAME", "VALUE"); + SetEnv("EMPTY", ""); } // If we have access to setenv, we can run some more fine-grained tests. @@ -211,23 +261,79 @@ class EnvironmentVariables : public CommandFixture { bool canSetEnv{false}; }; -TEST_F(EnvironmentVariables, Length) { - EXPECT_EQ(0, RTNAME(EnvVariableLength)(*CharDescriptor("DOESNT_EXIST"))); +TEST_F(EnvironmentVariables, Nonexistent) { + CheckMissingEnvVarValue("DOESNT_EXIST"); - EXPECT_EQ(0, RTNAME(EnvVariableLength)(*CharDescriptor(" "))); - EXPECT_EQ(0, RTNAME(EnvVariableLength)(*CharDescriptor(""))); + CheckMissingEnvVarValue(" "); + CheckMissingEnvVarValue(""); +} +TEST_F(EnvironmentVariables, Basic) { // Test a variable that's expected to exist in the environment. char *path{std::getenv("PATH")}; auto expectedLen{static_cast(std::strlen(path))}; EXPECT_EQ(expectedLen, RTNAME(EnvVariableLength)(*CharDescriptor("PATH"))); +} +TEST_F(EnvironmentVariables, Trim) { if (EnableFineGrainedTests()) { - EXPECT_EQ(5, RTNAME(EnvVariableLength)(*CharDescriptor("NAME"))); + EXPECT_EQ(5, RTNAME(EnvVariableLength)(*CharDescriptor("NAME "))); + CheckEnvVarValue("VALUE", "NAME "); + } +} - EXPECT_EQ(5, RTNAME(EnvVariableLength)(*CharDescriptor("NAME "))); - EXPECT_EQ(0, - RTNAME(EnvVariableLength)( - *CharDescriptor("NAME "), /*trim_name=*/false)); +TEST_F(EnvironmentVariables, NoTrim) { + if (EnableFineGrainedTests()) { + CheckMissingEnvVarValue("NAME ", /*trim_name=*/false); } } + +TEST_F(EnvironmentVariables, Empty) { + if (EnableFineGrainedTests()) { + EXPECT_EQ(0, RTNAME(EnvVariableLength)(*CharDescriptor("EMPTY"))); + CheckEnvVarValue("", "EMPTY"); + } +} + +TEST_F(EnvironmentVariables, NoValueOrErrmsg) { + ASSERT_EQ(std::getenv("DOESNT_EXIST"), nullptr) + << "Environment variable DOESNT_EXIST actually exists"; + EXPECT_EQ(RTNAME(EnvVariableValue)(*CharDescriptor("DOESNT_EXIST")), 1); + + if (EnableFineGrainedTests()) { + EXPECT_EQ(RTNAME(EnvVariableValue)(*CharDescriptor("NAME")), 0); + } +} + +TEST_F(EnvironmentVariables, ValueTooShort) { + if (EnableFineGrainedTests()) { + OwningPtr tooShort{CreateEmptyCharDescriptor<2>()}; + ASSERT_NE(tooShort, nullptr); + EXPECT_EQ(RTNAME(EnvVariableValue)(*CharDescriptor("NAME"), tooShort.get(), + /*trim_name=*/true, nullptr), + -1); + CheckDescriptorEqStr(tooShort.get(), "VALUE"); + + OwningPtr errMsg{CreateEmptyCharDescriptor()}; + ASSERT_NE(errMsg, nullptr); + + EXPECT_EQ(RTNAME(EnvVariableValue)(*CharDescriptor("NAME"), tooShort.get(), + /*trim_name=*/true, errMsg.get()), + -1); + + std::string expectedErrMsg{ + GetPaddedStr("Value too short", errMsg->ElementBytes())}; + CheckDescriptorEqStr(errMsg.get(), expectedErrMsg); + } +} + +TEST_F(EnvironmentVariables, ErrMsgTooShort) { + ASSERT_EQ(std::getenv("DOESNT_EXIST"), nullptr) + << "Environment variable DOESNT_EXIST actually exists"; + + OwningPtr errMsg{CreateEmptyCharDescriptor<3>()}; + EXPECT_EQ(RTNAME(EnvVariableValue)(*CharDescriptor("DOESNT_EXIST"), nullptr, + /*trim_name=*/true, errMsg.get()), + 1); + CheckDescriptorEqStr(errMsg.get(), "Mis"); +} diff --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt index 9f25e9e8efd52..a757dc35b5d22 100644 --- a/libc/config/linux/x86_64/entrypoints.txt +++ b/libc/config/linux/x86_64/entrypoints.txt @@ -31,6 +31,8 @@ set(TARGET_LIBC_ENTRYPOINTS libc.src.string.mempcpy libc.src.string.memrchr libc.src.string.memset + libc.src.string.stpcpy + libc.src.string.stpncpy libc.src.string.strcat libc.src.string.strchr libc.src.string.strcmp @@ -239,6 +241,7 @@ if(LLVM_LIBC_INCLUDE_SCUDO) # string.h entrypoints that depend on malloc libc.src.string.strdup + libc.src.string.strndup ) endif() diff --git a/libc/spec/posix.td b/libc/spec/posix.td index 45d12fa239922..e1fc63367c6a1 100644 --- a/libc/spec/posix.td +++ b/libc/spec/posix.td @@ -235,6 +235,19 @@ def POSIX : StandardSpec<"POSIX"> { ArgSpec, ArgSpec] >, + FunctionSpec< + "stpcpy", + RetValSpec, + [ArgSpec, + ArgSpec] + >, + FunctionSpec< + "stpncpy", + RetValSpec, + [ArgSpec, + ArgSpec, + ArgSpec] + >, FunctionSpec< "strnlen", RetValSpec, diff --git a/libc/spec/stdc.td b/libc/spec/stdc.td index c997d4364fb15..2218bfca283f5 100644 --- a/libc/spec/stdc.td +++ b/libc/spec/stdc.td @@ -284,6 +284,11 @@ def StdC : StandardSpec<"stdc"> { RetValSpec, [ArgSpec] >, + FunctionSpec< + "strndup", + RetValSpec, + [ArgSpec,ArgSpec] + >, FunctionSpec< "strpbrk", RetValSpec, diff --git a/libc/src/__support/CMakeLists.txt b/libc/src/__support/CMakeLists.txt index 871c3a447b80a..567ed069b82c5 100644 --- a/libc/src/__support/CMakeLists.txt +++ b/libc/src/__support/CMakeLists.txt @@ -3,6 +3,7 @@ add_subdirectory(CPP) add_header_library( common HDRS + architectures.h common.h endian.h sanitizer.h @@ -22,11 +23,22 @@ add_header_library( ) add_header_library( - str_conv_utils + str_to_integer + HDRS + str_to_integer.h + DEPENDS + .ctype_utils + libc.include.errno + libc.src.errno.__errno_location + libc.src.__support.CPP.standalone_cpp +) + +add_header_library( + str_to_float HDRS - str_conv_utils.h str_to_float.h DEPENDS + .str_to_integer .ctype_utils .high_precision_decimal libc.include.errno diff --git a/libc/src/__support/FPUtil/FEnvUtils.h b/libc/src/__support/FPUtil/FEnvUtils.h index a9d11ceb6535d..2cb62e558c80f 100644 --- a/libc/src/__support/FPUtil/FEnvUtils.h +++ b/libc/src/__support/FPUtil/FEnvUtils.h @@ -9,9 +9,11 @@ #ifndef LLVM_LIBC_SRC_SUPPORT_FPUTIL_FENVUTILS_H #define LLVM_LIBC_SRC_SUPPORT_FPUTIL_FENVUTILS_H -#ifdef __x86_64__ +#include "src/__support/architectures.h" + +#if defined(LLVM_LIBC_ARCH_X86_64) #include "x86_64/FEnvImpl.h" -#elif defined(__aarch64__) +#elif defined(LLVM_LIBC_ARCH_AARCH64) #include "aarch64/FEnvImpl.h" #else #include "DummyFEnvImpl.h" diff --git a/libc/src/__support/FPUtil/FMA.h b/libc/src/__support/FPUtil/FMA.h index c109b3470bd3e..5f35bec644ebb 100644 --- a/libc/src/__support/FPUtil/FMA.h +++ b/libc/src/__support/FPUtil/FMA.h @@ -10,10 +10,11 @@ #define LLVM_LIBC_SRC_SUPPORT_FPUTIL_FMA_H #include "src/__support/CPP/TypeTraits.h" +#include "src/__support/architectures.h" -#ifdef __x86_64__ +#if defined(LLVM_LIBC_ARCH_X86_64) #include "x86_64/FMA.h" -#elif defined(__aarch64__) +#elif defined(LLVM_LIBC_ARCH_AARCH64) #include "aarch64/FMA.h" #else #include "generic/FMA.h" diff --git a/libc/src/__support/FPUtil/PlatformDefs.h b/libc/src/__support/FPUtil/PlatformDefs.h index d9964d11106c4..61af0dae49723 100644 --- a/libc/src/__support/FPUtil/PlatformDefs.h +++ b/libc/src/__support/FPUtil/PlatformDefs.h @@ -9,7 +9,9 @@ #ifndef LLVM_LIBC_SRC_SUPPORT_FPUTIL_PLATFORM_DEFS_H #define LLVM_LIBC_SRC_SUPPORT_FPUTIL_PLATFORM_DEFS_H -#if defined(__x86_64__) || defined(__i386__) +#include "src/__support/architectures.h" + +#if defined(LLVM_LIBC_ARCH_X86) #define X87_FPU #endif diff --git a/libc/src/__support/FPUtil/PolyEval.h b/libc/src/__support/FPUtil/PolyEval.h index ccf4d60c1043a..10d406d05262b 100644 --- a/libc/src/__support/FPUtil/PolyEval.h +++ b/libc/src/__support/FPUtil/PolyEval.h @@ -10,6 +10,7 @@ #define LLVM_LIBC_SRC_SUPPORT_FPUTIL_POLYEVAL_H #include "src/__support/CPP/TypeTraits.h" +#include "src/__support/architectures.h" // Evaluate polynomial using Horner's Scheme: // With polyeval(x, a_0, a_1, ..., a_n) = a_n * x^n + ... + a_1 * x + a_0, we @@ -18,7 +19,7 @@ // Example: to evaluate x^3 + 2*x^2 + 3*x + 4, call // polyeval( x, 4.0, 3.0, 2.0, 1.0 ) -#if defined(__x86_64__) || defined(__aarch64__) +#if defined(LLVM_LIBC_ARCH_X86_64) || defined(LLVM_LIBC_ARCH_AARCH64) #include "FMA.h" namespace __llvm_libc { diff --git a/libc/src/__support/architectures.h b/libc/src/__support/architectures.h new file mode 100644 index 0000000000000..485faae47c7e3 --- /dev/null +++ b/libc/src/__support/architectures.h @@ -0,0 +1,35 @@ +//===-- Compile time architecture detection -------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#if defined(__pnacl__) || defined(__CLR_VER) +#define LLVM_LIBC_ARCH_VM +#endif + +#if (defined(_M_IX86) || defined(__i386__)) && !defined(LLVM_LIBC_ARCH_VM) +#define LLVM_LIBC_ARCH_X86_32 +#endif + +#if (defined(_M_X64) || defined(__x86_64__)) && !defined(LLVM_LIBC_ARCH_VM) +#define LLVM_LIBC_ARCH_X86_64 +#endif + +#if defined(LLVM_LIBC_ARCH_X86_32) || defined(LLVM_LIBC_ARCH_X86_64) +#define LLVM_LIBC_ARCH_X86 +#endif + +#if (defined(__arm__) || defined(_M_ARM)) +#define LLVM_LIBC_ARCH_ARM +#endif + +#if defined(__aarch64__) || defined(__arm64__) || defined(_M_ARM64) +#define LLVM_LIBC_ARCH_AARCH64 +#endif + +#if (defined(LLVM_LIBC_ARCH_AARCH64) || defined(LLVM_LIBC_ARCH_ARM)) +#define LLVM_LIBC_ARCH_ANY_ARM +#endif diff --git a/libc/src/__support/detailed_powers_of_ten.h b/libc/src/__support/detailed_powers_of_ten.h index e168b95b8998e..9a58ba5dff782 100644 --- a/libc/src/__support/detailed_powers_of_ten.h +++ b/libc/src/__support/detailed_powers_of_ten.h @@ -21,7 +21,7 @@ namespace internal { // and contains the 128 bit mantissa approximations of the powers of 10 from // -348 to 347. The exponents are implied by a linear expression with slope // 217706.0/65536.0 ≈ log(10)/log(2). This is used by the Eisel-Lemire algorithm -// in str_conv_utils.h. +// in str_to_float.h. constexpr int32_t DETAILED_POWERS_OF_TEN_MIN_EXP_10 = -348; constexpr int32_t DETAILED_POWERS_OF_TEN_MAX_EXP_10 = 347; diff --git a/libc/src/__support/high_precision_decimal.h b/libc/src/__support/high_precision_decimal.h index ae8aac37605ad..352de581a8f72 100644 --- a/libc/src/__support/high_precision_decimal.h +++ b/libc/src/__support/high_precision_decimal.h @@ -10,7 +10,7 @@ #define LIBC_SRC_SUPPORT_HIGH_PRECISION_DECIMAL_H #include "src/__support/ctype_utils.h" -#include "src/__support/str_conv_utils.h" +#include "src/__support/str_to_integer.h" #include namespace __llvm_libc { diff --git a/libc/src/__support/str_to_float.h b/libc/src/__support/str_to_float.h index b62e8ff4c4dc3..59bd1ec8e5da4 100644 --- a/libc/src/__support/str_to_float.h +++ b/libc/src/__support/str_to_float.h @@ -14,7 +14,7 @@ #include "src/__support/ctype_utils.h" #include "src/__support/detailed_powers_of_ten.h" #include "src/__support/high_precision_decimal.h" -#include "src/__support/str_conv_utils.h" +#include "src/__support/str_to_integer.h" #include namespace __llvm_libc { diff --git a/libc/src/__support/str_conv_utils.h b/libc/src/__support/str_to_integer.h similarity index 96% rename from libc/src/__support/str_conv_utils.h rename to libc/src/__support/str_to_integer.h index 13c800aac6578..ec7f6f54a88f1 100644 --- a/libc/src/__support/str_conv_utils.h +++ b/libc/src/__support/str_to_integer.h @@ -1,4 +1,4 @@ -//===-- Stdlib utils --------------------------------------------*- C++ -*-===// +//===-- String to integer conversion utils ----------------------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -6,8 +6,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LIBC_SRC_STDLIB_STDLIB_UTILS_H -#define LIBC_SRC_STDLIB_STDLIB_UTILS_H +#ifndef LIBC_SRC_SUPPORT_STR_TO_INTEGER_H +#define LIBC_SRC_SUPPORT_STR_TO_INTEGER_H #include "src/__support/CPP/Limits.h" #include "src/__support/ctype_utils.h" @@ -148,4 +148,4 @@ static inline T strtointeger(const char *__restrict src, } // namespace internal } // namespace __llvm_libc -#endif // LIBC_SRC_STDLIB_STDLIB_UTILS_H +#endif // LIBC_SRC_SUPPORT_STR_TO_INTEGER_H diff --git a/libc/src/inttypes/CMakeLists.txt b/libc/src/inttypes/CMakeLists.txt index e0e0f4d57f2b9..00fd791320fb2 100644 --- a/libc/src/inttypes/CMakeLists.txt +++ b/libc/src/inttypes/CMakeLists.txt @@ -5,7 +5,7 @@ add_entrypoint_object( HDRS strtoimax.h DEPENDS - libc.src.__support.str_conv_utils + libc.src.__support.str_to_integer ) add_entrypoint_object( @@ -15,7 +15,7 @@ add_entrypoint_object( HDRS strtoumax.h DEPENDS - libc.src.__support.str_conv_utils + libc.src.__support.str_to_integer ) add_entrypoint_object( diff --git a/libc/src/inttypes/strtoimax.cpp b/libc/src/inttypes/strtoimax.cpp index ecd59ff717383..ef5e84e2034a7 100644 --- a/libc/src/inttypes/strtoimax.cpp +++ b/libc/src/inttypes/strtoimax.cpp @@ -8,7 +8,7 @@ #include "src/inttypes/strtoimax.h" #include "src/__support/common.h" -#include "src/__support/str_conv_utils.h" +#include "src/__support/str_to_integer.h" namespace __llvm_libc { diff --git a/libc/src/inttypes/strtoumax.cpp b/libc/src/inttypes/strtoumax.cpp index cc3f1451fdae3..edf8b65f161a2 100644 --- a/libc/src/inttypes/strtoumax.cpp +++ b/libc/src/inttypes/strtoumax.cpp @@ -8,7 +8,7 @@ #include "src/inttypes/strtoumax.h" #include "src/__support/common.h" -#include "src/__support/str_conv_utils.h" +#include "src/__support/str_to_integer.h" namespace __llvm_libc { diff --git a/libc/src/stdlib/CMakeLists.txt b/libc/src/stdlib/CMakeLists.txt index 3700d9a240952..f5f339080e82f 100644 --- a/libc/src/stdlib/CMakeLists.txt +++ b/libc/src/stdlib/CMakeLists.txt @@ -5,7 +5,7 @@ add_entrypoint_object( HDRS atoi.h DEPENDS - libc.src.__support.str_conv_utils + libc.src.__support.str_to_integer ) add_entrypoint_object( @@ -15,7 +15,7 @@ add_entrypoint_object( HDRS atof.h DEPENDS - libc.src.__support.str_conv_utils + libc.src.__support.str_to_float ) add_entrypoint_object( @@ -25,7 +25,7 @@ add_entrypoint_object( HDRS atol.h DEPENDS - libc.src.__support.str_conv_utils + libc.src.__support.str_to_integer ) add_entrypoint_object( @@ -35,7 +35,7 @@ add_entrypoint_object( HDRS atoll.h DEPENDS - libc.src.__support.str_conv_utils + libc.src.__support.str_to_integer ) add_entrypoint_object( @@ -45,7 +45,7 @@ add_entrypoint_object( HDRS strtof.h DEPENDS - libc.src.__support.str_conv_utils + libc.src.__support.str_to_float ) add_entrypoint_object( @@ -55,7 +55,7 @@ add_entrypoint_object( HDRS strtod.h DEPENDS - libc.src.__support.str_conv_utils + libc.src.__support.str_to_float ) add_entrypoint_object( @@ -65,7 +65,7 @@ add_entrypoint_object( HDRS strtol.h DEPENDS - libc.src.__support.str_conv_utils + libc.src.__support.str_to_integer ) add_entrypoint_object( @@ -75,7 +75,7 @@ add_entrypoint_object( HDRS strtoll.h DEPENDS - libc.src.__support.str_conv_utils + libc.src.__support.str_to_integer ) add_entrypoint_object( @@ -85,7 +85,7 @@ add_entrypoint_object( HDRS strtoul.h DEPENDS - libc.src.__support.str_conv_utils + libc.src.__support.str_to_integer ) add_entrypoint_object( @@ -95,7 +95,7 @@ add_entrypoint_object( HDRS strtoull.h DEPENDS - libc.src.__support.str_conv_utils + libc.src.__support.str_to_integer ) add_entrypoint_object( diff --git a/libc/src/stdlib/atoi.cpp b/libc/src/stdlib/atoi.cpp index f0e57caf743d5..37cfab14443c0 100644 --- a/libc/src/stdlib/atoi.cpp +++ b/libc/src/stdlib/atoi.cpp @@ -8,7 +8,7 @@ #include "src/stdlib/atoi.h" #include "src/__support/common.h" -#include "src/__support/str_conv_utils.h" +#include "src/__support/str_to_integer.h" namespace __llvm_libc { diff --git a/libc/src/stdlib/atol.cpp b/libc/src/stdlib/atol.cpp index 8f0ed885a9c97..6a1da4c7007d9 100644 --- a/libc/src/stdlib/atol.cpp +++ b/libc/src/stdlib/atol.cpp @@ -8,7 +8,7 @@ #include "src/stdlib/atol.h" #include "src/__support/common.h" -#include "src/__support/str_conv_utils.h" +#include "src/__support/str_to_integer.h" namespace __llvm_libc { diff --git a/libc/src/stdlib/atoll.cpp b/libc/src/stdlib/atoll.cpp index c75e521382190..ffa8105d4cc6f 100644 --- a/libc/src/stdlib/atoll.cpp +++ b/libc/src/stdlib/atoll.cpp @@ -8,7 +8,7 @@ #include "src/stdlib/atoll.h" #include "src/__support/common.h" -#include "src/__support/str_conv_utils.h" +#include "src/__support/str_to_integer.h" namespace __llvm_libc { diff --git a/libc/src/stdlib/strtol.cpp b/libc/src/stdlib/strtol.cpp index 1c744c929e283..33038b51c41ee 100644 --- a/libc/src/stdlib/strtol.cpp +++ b/libc/src/stdlib/strtol.cpp @@ -8,7 +8,7 @@ #include "src/stdlib/strtol.h" #include "src/__support/common.h" -#include "src/__support/str_conv_utils.h" +#include "src/__support/str_to_integer.h" namespace __llvm_libc { diff --git a/libc/src/stdlib/strtoll.cpp b/libc/src/stdlib/strtoll.cpp index e2fc37f8bf822..e2f0aac546688 100644 --- a/libc/src/stdlib/strtoll.cpp +++ b/libc/src/stdlib/strtoll.cpp @@ -8,7 +8,7 @@ #include "src/stdlib/strtoll.h" #include "src/__support/common.h" -#include "src/__support/str_conv_utils.h" +#include "src/__support/str_to_integer.h" namespace __llvm_libc { diff --git a/libc/src/stdlib/strtoul.cpp b/libc/src/stdlib/strtoul.cpp index eab264f33347f..00696799872c4 100644 --- a/libc/src/stdlib/strtoul.cpp +++ b/libc/src/stdlib/strtoul.cpp @@ -8,7 +8,7 @@ #include "src/stdlib/strtoul.h" #include "src/__support/common.h" -#include "src/__support/str_conv_utils.h" +#include "src/__support/str_to_integer.h" namespace __llvm_libc { diff --git a/libc/src/stdlib/strtoull.cpp b/libc/src/stdlib/strtoull.cpp index bece2787ba7ea..db6c83872b55e 100644 --- a/libc/src/stdlib/strtoull.cpp +++ b/libc/src/stdlib/strtoull.cpp @@ -8,7 +8,7 @@ #include "src/stdlib/strtoull.h" #include "src/__support/common.h" -#include "src/__support/str_conv_utils.h" +#include "src/__support/str_to_integer.h" namespace __llvm_libc { diff --git a/libc/src/string/CMakeLists.txt b/libc/src/string/CMakeLists.txt index 923ba09f3c64a..d5cfb1742018f 100644 --- a/libc/src/string/CMakeLists.txt +++ b/libc/src/string/CMakeLists.txt @@ -56,6 +56,27 @@ add_entrypoint_object( memrchr.h ) +add_entrypoint_object( + stpcpy + SRCS + stpcpy.cpp + HDRS + stpcpy.h + DEPENDS + .mempcpy + .string_utils +) + +add_entrypoint_object( + stpncpy + SRCS + stpncpy.cpp + HDRS + stpncpy.h + DEPENDS + .bzero +) + add_entrypoint_object( strcat SRCS @@ -153,6 +174,18 @@ add_entrypoint_object( strncpy.h ) +add_entrypoint_object( + strndup + SRCS + strndup.cpp + HDRS + strndup.h + DEPENDS + .memcpy + .string_utils + libc.include.stdlib +) + add_entrypoint_object( strnlen SRCS @@ -310,7 +343,7 @@ endif() function(add_memcmp memcmp_name) add_implementation(memcmp ${memcmp_name} - SRCS ${LIBC_MEMCMP_SRC} + SRCS ${LIBC_SOURCE_DIR}/src/string/memcmp.cpp HDRS ${LIBC_SOURCE_DIR}/src/string/memcmp.h DEPENDS .memory_utils.memory_utils @@ -322,7 +355,6 @@ function(add_memcmp memcmp_name) endfunction() if(${LIBC_TARGET_ARCHITECTURE_IS_X86}) - set(LIBC_MEMCMP_SRC ${LIBC_SOURCE_DIR}/src/string/memcmp.cpp) add_memcmp(memcmp_x86_64_opt_sse2 COMPILE_OPTIONS -march=k8 REQUIRE SSE2) add_memcmp(memcmp_x86_64_opt_sse4 COMPILE_OPTIONS -march=nehalem REQUIRE SSE4_2) add_memcmp(memcmp_x86_64_opt_avx2 COMPILE_OPTIONS -march=haswell REQUIRE AVX2) @@ -330,11 +362,9 @@ if(${LIBC_TARGET_ARCHITECTURE_IS_X86}) add_memcmp(memcmp_opt_host COMPILE_OPTIONS ${LIBC_COMPILE_OPTIONS_NATIVE}) add_memcmp(memcmp) elseif(${LIBC_TARGET_ARCHITECTURE_IS_AARCH64}) - set(LIBC_MEMCMP_SRC ${LIBC_SOURCE_DIR}/src/string/aarch64/memcmp.cpp) - add_memcmp(memcmp) add_memcmp(memcmp_opt_host COMPILE_OPTIONS ${LIBC_COMPILE_OPTIONS_NATIVE}) + add_memcmp(memcmp) else() - set(LIBC_MEMCMP_SRC ${LIBC_SOURCE_DIR}/src/string/memcmp.cpp) add_memcmp(memcmp_opt_host COMPILE_OPTIONS ${LIBC_COMPILE_OPTIONS_NATIVE}) add_memcmp(memcmp) endif() @@ -345,7 +375,7 @@ endif() function(add_memcpy memcpy_name) add_implementation(memcpy ${memcpy_name} - SRCS ${MEMCPY_SRC} + SRCS ${LIBC_SOURCE_DIR}/src/string/memcpy.cpp HDRS ${LIBC_SOURCE_DIR}/src/string/memcpy.h DEPENDS .memory_utils.memory_utils @@ -357,7 +387,6 @@ function(add_memcpy memcpy_name) endfunction() if(${LIBC_TARGET_ARCHITECTURE_IS_X86}) - set(MEMCPY_SRC ${LIBC_SOURCE_DIR}/src/string/x86_64/memcpy.cpp) add_memcpy(memcpy_x86_64_opt_sse2 COMPILE_OPTIONS -march=k8 REQUIRE SSE2) add_memcpy(memcpy_x86_64_opt_sse4 COMPILE_OPTIONS -march=nehalem REQUIRE SSE4_2) add_memcpy(memcpy_x86_64_opt_avx2 COMPILE_OPTIONS -march=haswell REQUIRE AVX2) @@ -365,14 +394,12 @@ if(${LIBC_TARGET_ARCHITECTURE_IS_X86}) add_memcpy(memcpy_opt_host COMPILE_OPTIONS ${LIBC_COMPILE_OPTIONS_NATIVE}) add_memcpy(memcpy) elseif(${LIBC_TARGET_ARCHITECTURE_IS_AARCH64}) - set(MEMCPY_SRC ${LIBC_SOURCE_DIR}/src/string/aarch64/memcpy.cpp) # Disable tail merging as it leads to lower performance. # Note that '-mllvm' needs to be prefixed with 'SHELL:' to prevent CMake flag deduplication. add_memcpy(memcpy_opt_host COMPILE_OPTIONS ${LIBC_COMPILE_OPTIONS_NATIVE} COMPILE_OPTIONS "SHELL:-mllvm --tail-merge-threshold=0") add_memcpy(memcpy COMPILE_OPTIONS "SHELL:-mllvm --tail-merge-threshold=0") else() - set(MEMCPY_SRC ${LIBC_SOURCE_DIR}/src/string/memcpy.cpp) add_memcpy(memcpy_opt_host COMPILE_OPTIONS ${LIBC_COMPILE_OPTIONS_NATIVE}) add_memcpy(memcpy) endif() @@ -383,7 +410,7 @@ endif() function(add_memset memset_name) add_implementation(memset ${memset_name} - SRCS ${MEMSET_SRC} + SRCS ${LIBC_SOURCE_DIR}/src/string/memset.cpp HDRS ${LIBC_SOURCE_DIR}/src/string/memset.h DEPENDS .memory_utils.memory_utils @@ -395,7 +422,6 @@ function(add_memset memset_name) endfunction() if(${LIBC_TARGET_ARCHITECTURE_IS_X86}) - set(MEMSET_SRC ${LIBC_SOURCE_DIR}/src/string/memset.cpp) add_memset(memset_x86_64_opt_sse2 COMPILE_OPTIONS -march=k8 REQUIRE SSE2) add_memset(memset_x86_64_opt_sse4 COMPILE_OPTIONS -march=nehalem REQUIRE SSE4_2) add_memset(memset_x86_64_opt_avx2 COMPILE_OPTIONS -march=haswell REQUIRE AVX2) @@ -403,12 +429,10 @@ if(${LIBC_TARGET_ARCHITECTURE_IS_X86}) add_memset(memset_opt_host COMPILE_OPTIONS ${LIBC_COMPILE_OPTIONS_NATIVE}) add_memset(memset) elseif(${LIBC_TARGET_ARCHITECTURE_IS_AARCH64}) - set(MEMSET_SRC ${LIBC_SOURCE_DIR}/src/string/aarch64/memset.cpp) add_memset(memset_opt_host COMPILE_OPTIONS ${LIBC_COMPILE_OPTIONS_NATIVE} COMPILE_OPTIONS "SHELL:-mllvm --tail-merge-threshold=0") add_memset(memset COMPILE_OPTIONS "SHELL:-mllvm --tail-merge-threshold=0") else() - set(MEMSET_SRC ${LIBC_SOURCE_DIR}/src/string/memset.cpp) add_memset(memset_opt_host COMPILE_OPTIONS ${LIBC_COMPILE_OPTIONS_NATIVE}) add_memset(memset) endif() diff --git a/libc/src/string/aarch64/memcmp.cpp b/libc/src/string/aarch64/memcmp.cpp deleted file mode 100644 index 7ef3004a06150..0000000000000 --- a/libc/src/string/aarch64/memcmp.cpp +++ /dev/null @@ -1,52 +0,0 @@ -//===-- Implementation of memcmp ------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "src/string/memcmp.h" -#include "src/__support/common.h" -#include "src/string/memory_utils/elements.h" -#include // size_t - -namespace __llvm_libc { - -static int memcmp_aarch64(const char *lhs, const char *rhs, size_t count) { - // Use aarch64 strategies (_1, _2, _3 ...) - using namespace __llvm_libc::aarch64; - - if (count == 0) // [0, 0] - return 0; - if (count == 1) // [1, 1] - return ThreeWayCompare<_1>(lhs, rhs); - if (count == 2) // [2, 2] - return ThreeWayCompare<_2>(lhs, rhs); - if (count == 3) // [3, 3] - return ThreeWayCompare<_3>(lhs, rhs); - if (count < 8) // [4, 7] - return ThreeWayCompare>(lhs, rhs, count); - if (count < 16) // [8, 15] - return ThreeWayCompare>(lhs, rhs, count); - if (unlikely(count >= 128)) // [128, ∞] - return ThreeWayCompare::Then>>(lhs, rhs, count); - if (!Equals<_16>(lhs, rhs)) // [16, 16] - return ThreeWayCompare<_16>(lhs, rhs); - if (count < 32) // [17, 31] - return ThreeWayCompare>(lhs, rhs, count); - if (!Equals::Then<_16>>(lhs, rhs)) // [32, 32] - return ThreeWayCompare::Then<_16>>(lhs, rhs); - if (count < 64) // [33, 63] - return ThreeWayCompare>(lhs, rhs, count); - // [64, 127] - return ThreeWayCompare::Then>>(lhs, rhs, count); -} - -LLVM_LIBC_FUNCTION(int, memcmp, - (const void *lhs, const void *rhs, size_t count)) { - return memcmp_aarch64(reinterpret_cast(lhs), - reinterpret_cast(rhs), count); -} - -} // namespace __llvm_libc diff --git a/libc/src/string/aarch64/memcpy.cpp b/libc/src/string/aarch64/memcpy.cpp deleted file mode 100644 index 1a1fbbc026a73..0000000000000 --- a/libc/src/string/aarch64/memcpy.cpp +++ /dev/null @@ -1,77 +0,0 @@ -//===-- Implementation of memcpy ------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "src/string/memcpy.h" -#include "src/__support/common.h" -#include "src/string/memory_utils/elements.h" - -namespace __llvm_libc { - -using _1 = scalar::UINT8; -using _2 = scalar::UINT16; -using _3 = Chained; -using _4 = scalar::UINT32; -using _8 = scalar::UINT64; -using _16 = Repeated; -using _32 = Repeated; -using _64 = Repeated; - -// Design rationale -// ================ -// -// Using a profiler to observe size distributions for calls into libc -// functions, it was found most operations act on a small number of bytes. -// This makes it important to favor small sizes. -// -// We have used __builtin_expect to tell the compiler to favour lower sizes as -// that will reduce the branching overhead where that would hurt most -// proportional to total cost of copying. -// -// The function is written in C++ for several reasons: -// - The compiler can __see__ the code, this is useful when performing Profile -// Guided Optimization as the optimized code can take advantage of branching -// probabilities. -// - It also allows for easier customization and favors testing multiple -// implementation parameters. -// - As compilers and processors get better, the generated code is improved -// with little change on the code side. -// This implementation has been tuned for Neoverse-N1. -static void memcpy_aarch64(char *__restrict dst, const char *__restrict src, - size_t count) { - if (count == 0) - return; - if (count == 1) - return Copy<_1>(dst, src); - if (count == 2) - return Copy<_2>(dst, src); - if (count == 3) - return Copy<_3>(dst, src); - if (count == 4) - return Copy<_4>(dst, src); - if (count < 8) - return Copy>(dst, src, count); - if (count < 16) - return Copy>(dst, src, count); - if (count < 32) - return Copy>(dst, src, count); - if (count < 64) - return Copy>(dst, src, count); - if (count < 128) - return Copy>(dst, src, count); - return Copy::Then>>(dst, src, count); -} - -LLVM_LIBC_FUNCTION(void *, memcpy, - (void *__restrict dst, const void *__restrict src, - size_t size)) { - memcpy_aarch64(reinterpret_cast(dst), - reinterpret_cast(src), size); - return dst; -} - -} // namespace __llvm_libc diff --git a/libc/src/string/aarch64/memset.cpp b/libc/src/string/aarch64/memset.cpp deleted file mode 100644 index fa66ffe1cc993..0000000000000 --- a/libc/src/string/aarch64/memset.cpp +++ /dev/null @@ -1,49 +0,0 @@ -//===-- Implementation of memset ------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "src/string/memset.h" -#include "src/__support/common.h" -#include "src/string/memory_utils/memset_utils.h" - -namespace __llvm_libc { - -using namespace __llvm_libc::aarch64_memset; - -inline static void AArch64Memset(char *dst, int value, size_t count) { - if (count == 0) - return; - if (count <= 3) { - SplatSet<_1>(dst, value); - if (count > 1) - SplatSet>(dst, value, count); - return; - } - if (count <= 8) - return SplatSet>(dst, value, count); - if (count <= 16) - return SplatSet>(dst, value, count); - if (count <= 32) - return SplatSet>(dst, value, count); - if (count <= 96) { - SplatSet<_32>(dst, value); - if (count <= 64) - return SplatSet>(dst, value, count); - SplatSet::Then<_32>>(dst, value); - SplatSet>(dst, value, count); - return; - } - if (count < 448 || value != 0 || !AArch64ZVA(dst, count)) - return SplatSet::Then>>(dst, value, count); -} - -LLVM_LIBC_FUNCTION(void *, memset, (void *dst, int value, size_t count)) { - AArch64Memset((char *)dst, value, count); - return dst; -} - -} // namespace __llvm_libc diff --git a/libc/src/string/bzero.cpp b/libc/src/string/bzero.cpp index 3c76ef64eed5b..c57c922f6eff6 100644 --- a/libc/src/string/bzero.cpp +++ b/libc/src/string/bzero.cpp @@ -8,12 +8,12 @@ #include "src/string/bzero.h" #include "src/__support/common.h" -#include "src/string/memory_utils/memset_utils.h" +#include "src/string/memory_utils/memset_implementations.h" namespace __llvm_libc { LLVM_LIBC_FUNCTION(void, bzero, (void *ptr, size_t count)) { - GeneralPurposeMemset(reinterpret_cast(ptr), 0, count); + inline_memset(reinterpret_cast(ptr), 0, count); } } // namespace __llvm_libc diff --git a/libc/src/string/memcmp.cpp b/libc/src/string/memcmp.cpp index bb2b5e2f37791..292525e17dad0 100644 --- a/libc/src/string/memcmp.cpp +++ b/libc/src/string/memcmp.cpp @@ -7,45 +7,16 @@ //===----------------------------------------------------------------------===// #include "src/string/memcmp.h" -#include "src/__support/common.h" -#include "src/string/memory_utils/elements.h" +#include "src/string/memory_utils/memcmp_implementations.h" #include // size_t namespace __llvm_libc { -static int memcmp_impl(const char *lhs, const char *rhs, size_t count) { -#if defined(__i386__) || defined(__x86_64__) - using namespace ::__llvm_libc::x86; -#else - using namespace ::__llvm_libc::scalar; -#endif - - if (count == 0) - return 0; - if (count == 1) - return ThreeWayCompare<_1>(lhs, rhs); - if (count == 2) - return ThreeWayCompare<_2>(lhs, rhs); - if (count == 3) - return ThreeWayCompare<_3>(lhs, rhs); - if (count <= 8) - return ThreeWayCompare>(lhs, rhs, count); - if (count <= 16) - return ThreeWayCompare>(lhs, rhs, count); - if (count <= 32) - return ThreeWayCompare>(lhs, rhs, count); - if (count <= 64) - return ThreeWayCompare>(lhs, rhs, count); - if (count <= 128) - return ThreeWayCompare>(lhs, rhs, count); - return ThreeWayCompare::Then>>(lhs, rhs, count); -} - LLVM_LIBC_FUNCTION(int, memcmp, (const void *lhs, const void *rhs, size_t count)) { - return memcmp_impl(static_cast(lhs), - static_cast(rhs), count); + return inline_memcmp(static_cast(lhs), + static_cast(rhs), count); } } // namespace __llvm_libc diff --git a/libc/src/string/memcpy.cpp b/libc/src/string/memcpy.cpp index 5e70e00db1b91..ff990f48a20bc 100644 --- a/libc/src/string/memcpy.cpp +++ b/libc/src/string/memcpy.cpp @@ -8,61 +8,15 @@ #include "src/string/memcpy.h" #include "src/__support/common.h" -#include "src/string/memory_utils/elements.h" +#include "src/string/memory_utils/memcpy_implementations.h" namespace __llvm_libc { -// Design rationale -// ================ -// -// Using a profiler to observe size distributions for calls into libc -// functions, it was found most operations act on a small number of bytes. -// This makes it important to favor small sizes. -// -// The tests for `count` are in ascending order so the cost of branching is -// proportional to the cost of copying. -// -// The function is written in C++ for several reasons: -// - The compiler can __see__ the code, this is useful when performing Profile -// Guided Optimization as the optimized code can take advantage of branching -// probabilities. -// - It also allows for easier customization and favors testing multiple -// implementation parameters. -// - As compilers and processors get better, the generated code is improved -// with little change on the code side. -static void memcpy_impl(char *__restrict dst, const char *__restrict src, - size_t count) { - // Use scalar strategies (_1, _2, _3 ...) - using namespace __llvm_libc::scalar; - - if (count == 0) - return; - if (count == 1) - return Copy<_1>(dst, src); - if (count == 2) - return Copy<_2>(dst, src); - if (count == 3) - return Copy<_3>(dst, src); - if (count == 4) - return Copy<_4>(dst, src); - if (count < 8) - return Copy>(dst, src, count); - if (count < 16) - return Copy>(dst, src, count); - if (count < 32) - return Copy>(dst, src, count); - if (count < 64) - return Copy>(dst, src, count); - if (count < 128) - return Copy>(dst, src, count); - return Copy::Then>>(dst, src, count); -} - LLVM_LIBC_FUNCTION(void *, memcpy, (void *__restrict dst, const void *__restrict src, size_t size)) { - memcpy_impl(reinterpret_cast(dst), - reinterpret_cast(src), size); + inline_memcpy(reinterpret_cast(dst), + reinterpret_cast(src), size); return dst; } diff --git a/libc/src/string/memory_utils/elements_aarch64.h b/libc/src/string/memory_utils/elements_aarch64.h index 36d3074bac5bf..0c8990cd6f054 100644 --- a/libc/src/string/memory_utils/elements_aarch64.h +++ b/libc/src/string/memory_utils/elements_aarch64.h @@ -9,7 +9,9 @@ #ifndef LLVM_LIBC_SRC_STRING_MEMORY_UTILS_ELEMENTS_AARCH64_H #define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_ELEMENTS_AARCH64_H -#if defined(__arm__) || defined(__aarch64__) +#include "src/__support/architectures.h" + +#if defined(LLVM_LIBC_ARCH_AARCH64) #include #include // size_t @@ -115,6 +117,6 @@ using _32 = __llvm_libc::scalar::_32; } // namespace aarch64 } // namespace __llvm_libc -#endif // defined(__arm__) || defined(__aarch64__) +#endif // defined(LLVM_LIBC_ARCH_AARCH64) #endif // LLVM_LIBC_SRC_STRING_MEMORY_UTILS_ELEMENTS_AARCH64_H diff --git a/libc/src/string/memory_utils/elements_x86.h b/libc/src/string/memory_utils/elements_x86.h index 9b32b427f76e4..e8be55b510e20 100644 --- a/libc/src/string/memory_utils/elements_x86.h +++ b/libc/src/string/memory_utils/elements_x86.h @@ -9,8 +9,9 @@ #ifndef LLVM_LIBC_SRC_STRING_MEMORY_UTILS_ELEMENTS_X86_H #define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_ELEMENTS_X86_H -#if defined(__i386__) || defined(_M_IX86) || defined(__x86_64__) || \ - defined(_M_X64) +#include "src/__support/architectures.h" + +#if defined(LLVM_LIBC_ARCH_X86) #include // size_t #include // uint8_t, uint16_t, uint32_t, uint64_t @@ -172,7 +173,6 @@ struct Accelerator { } // namespace x86 } // namespace __llvm_libc -#endif // defined(__i386__) || defined(_M_IX86) || defined(__x86_64__) || - // defined(_M_X64) +#endif // defined(LLVM_LIBC_ARCH_X86) #endif // LLVM_LIBC_SRC_STRING_MEMORY_UTILS_ELEMENTS_X86_H diff --git a/libc/src/string/memory_utils/memcmp_implementations.h b/libc/src/string/memory_utils/memcmp_implementations.h new file mode 100644 index 0000000000000..a2934cefe4981 --- /dev/null +++ b/libc/src/string/memory_utils/memcmp_implementations.h @@ -0,0 +1,105 @@ +//===-- Implementation of memcmp ------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_STRING_MEMORY_UTILS_MEMCMP_IMPLEMENTATIONS_H +#define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_MEMCMP_IMPLEMENTATIONS_H + +#include "src/__support/architectures.h" +#include "src/__support/common.h" +#include "src/string/memory_utils/elements.h" + +#include // size_t + +namespace __llvm_libc { + +static inline int inline_memcmp(const char *lhs, const char *rhs, + size_t count) { +#if defined(LLVM_LIBC_ARCH_X86) + ///////////////////////////////////////////////////////////////////////////// + // LLVM_LIBC_ARCH_X86 + ///////////////////////////////////////////////////////////////////////////// + using namespace __llvm_libc::x86; + if (count == 0) + return 0; + if (count == 1) + return ThreeWayCompare<_1>(lhs, rhs); + if (count == 2) + return ThreeWayCompare<_2>(lhs, rhs); + if (count == 3) + return ThreeWayCompare<_3>(lhs, rhs); + if (count <= 8) + return ThreeWayCompare>(lhs, rhs, count); + if (count <= 16) + return ThreeWayCompare>(lhs, rhs, count); + if (count <= 32) + return ThreeWayCompare>(lhs, rhs, count); + if (count <= 64) + return ThreeWayCompare>(lhs, rhs, count); + if (count <= 128) + return ThreeWayCompare>(lhs, rhs, count); + return ThreeWayCompare::Then>>(lhs, rhs, count); +#elif defined(LLVM_LIBC_ARCH_AARCH64) + ///////////////////////////////////////////////////////////////////////////// + // LLVM_LIBC_ARCH_AARCH64 + ///////////////////////////////////////////////////////////////////////////// + using namespace ::__llvm_libc::aarch64; + if (count == 0) // [0, 0] + return 0; + if (count == 1) // [1, 1] + return ThreeWayCompare<_1>(lhs, rhs); + if (count == 2) // [2, 2] + return ThreeWayCompare<_2>(lhs, rhs); + if (count == 3) // [3, 3] + return ThreeWayCompare<_3>(lhs, rhs); + if (count < 8) // [4, 7] + return ThreeWayCompare>(lhs, rhs, count); + if (count < 16) // [8, 15] + return ThreeWayCompare>(lhs, rhs, count); + if (unlikely(count >= 128)) // [128, ∞] + return ThreeWayCompare::Then>>(lhs, rhs, count); + if (!Equals<_16>(lhs, rhs)) // [16, 16] + return ThreeWayCompare<_16>(lhs, rhs); + if (count < 32) // [17, 31] + return ThreeWayCompare>(lhs, rhs, count); + if (!Equals::Then<_16>>(lhs, rhs)) // [32, 32] + return ThreeWayCompare::Then<_16>>(lhs, rhs); + if (count < 64) // [33, 63] + return ThreeWayCompare>(lhs, rhs, count); + // [64, 127] + return ThreeWayCompare::Then>>(lhs, rhs, count); +#else + ///////////////////////////////////////////////////////////////////////////// + // Default + ///////////////////////////////////////////////////////////////////////////// + using namespace ::__llvm_libc::scalar; + + if (count == 0) + return 0; + if (count == 1) + return ThreeWayCompare<_1>(lhs, rhs); + if (count == 2) + return ThreeWayCompare<_2>(lhs, rhs); + if (count == 3) + return ThreeWayCompare<_3>(lhs, rhs); + if (count <= 8) + return ThreeWayCompare>(lhs, rhs, count); + if (count <= 16) + return ThreeWayCompare>(lhs, rhs, count); + if (count <= 32) + return ThreeWayCompare>(lhs, rhs, count); + if (count <= 64) + return ThreeWayCompare>(lhs, rhs, count); + if (count <= 128) + return ThreeWayCompare>(lhs, rhs, count); + return ThreeWayCompare::Then>>(lhs, rhs, count); +#endif +} + +} // namespace __llvm_libc + +#endif // LLVM_LIBC_SRC_STRING_MEMORY_UTILS_MEMCMP_IMPLEMENTATIONS_H diff --git a/libc/src/string/memory_utils/memcpy_implementations.h b/libc/src/string/memory_utils/memcpy_implementations.h new file mode 100644 index 0000000000000..2a738f7ecf1d3 --- /dev/null +++ b/libc/src/string/memory_utils/memcpy_implementations.h @@ -0,0 +1,157 @@ +//===-- Memcpy implementation -----------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_STRING_MEMORY_UTILS_MEMCPY_IMPLEMENTATIONS_H +#define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_MEMCPY_IMPLEMENTATIONS_H + +#include "src/__support/architectures.h" +#include "src/__support/common.h" +#include "src/string/memory_utils/elements.h" +#include "src/string/memory_utils/utils.h" + +#include // size_t + +// Design rationale +// ================ +// +// Using a profiler to observe size distributions for calls into libc +// functions, it was found most operations act on a small number of bytes. +// This makes it important to favor small sizes. +// +// The tests for `count` are in ascending order so the cost of branching is +// proportional to the cost of copying. +// +// The function is written in C++ for several reasons: +// - The compiler can __see__ the code, this is useful when performing Profile +// Guided Optimization as the optimized code can take advantage of branching +// probabilities. +// - It also allows for easier customization and favors testing multiple +// implementation parameters. +// - As compilers and processors get better, the generated code is improved +// with little change on the code side. + +namespace __llvm_libc { + +static inline void inline_memcpy(char *__restrict dst, + const char *__restrict src, size_t count) { +#if defined(LLVM_LIBC_ARCH_X86) + ///////////////////////////////////////////////////////////////////////////// + // LLVM_LIBC_ARCH_X86 + ///////////////////////////////////////////////////////////////////////////// + using namespace __llvm_libc::x86; + + // Whether to use only rep;movsb. + constexpr bool kUseOnlyRepMovsb = + LLVM_LIBC_IS_DEFINED(LLVM_LIBC_MEMCPY_X86_USE_ONLY_REPMOVSB); + + // kRepMovsBSize == -1 : Only CopyAligned is used. + // kRepMovsBSize == 0 : Only RepMovsb is used. + // else CopyAligned is used up to kRepMovsBSize and then RepMovsb. + constexpr size_t kRepMovsBSize = +#if defined(LLVM_LIBC_MEMCPY_X86_USE_REPMOVSB_FROM_SIZE) + LLVM_LIBC_MEMCPY_X86_USE_REPMOVSB_FROM_SIZE; +#else + -1; +#endif // LLVM_LIBC_MEMCPY_X86_USE_REPMOVSB_FROM_SIZE + + // Whether target supports AVX instructions. + constexpr bool kHasAvx = LLVM_LIBC_IS_DEFINED(__AVX__); + +#if defined(__AVX__) + using LoopBlockSize = _64; +#else + using LoopBlockSize = _32; +#endif + + if (kUseOnlyRepMovsb) + return Copy(dst, src, count); + + if (count == 0) + return; + if (count == 1) + return Copy<_1>(dst, src); + if (count == 2) + return Copy<_2>(dst, src); + if (count == 3) + return Copy<_3>(dst, src); + if (count == 4) + return Copy<_4>(dst, src); + if (count < 8) + return Copy>(dst, src, count); + if (count < 16) + return Copy>(dst, src, count); + if (count < 32) + return Copy>(dst, src, count); + if (count < 64) + return Copy>(dst, src, count); + if (count < 128) + return Copy>(dst, src, count); + if (kHasAvx && count < 256) + return Copy>(dst, src, count); + if (count <= kRepMovsBSize) + return Copy::Then>>(dst, src, + count); + return Copy(dst, src, count); +#elif defined(LLVM_LIBC_ARCH_AARCH64) + ///////////////////////////////////////////////////////////////////////////// + // LLVM_LIBC_ARCH_AARCH64 + ///////////////////////////////////////////////////////////////////////////// + using namespace __llvm_libc::scalar; + if (count == 0) + return; + if (count == 1) + return Copy<_1>(dst, src); + if (count == 2) + return Copy<_2>(dst, src); + if (count == 3) + return Copy<_3>(dst, src); + if (count == 4) + return Copy<_4>(dst, src); + if (count < 8) + return Copy>(dst, src, count); + if (count < 16) + return Copy>(dst, src, count); + if (count < 32) + return Copy>(dst, src, count); + if (count < 64) + return Copy>(dst, src, count); + if (count < 128) + return Copy>(dst, src, count); + return Copy::Then>>(dst, src, count); +#else + ///////////////////////////////////////////////////////////////////////////// + // Default + ///////////////////////////////////////////////////////////////////////////// + using namespace __llvm_libc::scalar; + if (count == 0) + return; + if (count == 1) + return Copy<_1>(dst, src); + if (count == 2) + return Copy<_2>(dst, src); + if (count == 3) + return Copy<_3>(dst, src); + if (count == 4) + return Copy<_4>(dst, src); + if (count < 8) + return Copy>(dst, src, count); + if (count < 16) + return Copy>(dst, src, count); + if (count < 32) + return Copy>(dst, src, count); + if (count < 64) + return Copy>(dst, src, count); + if (count < 128) + return Copy>(dst, src, count); + return Copy::Then>>(dst, src, count); +#endif +} + +} // namespace __llvm_libc + +#endif // LLVM_LIBC_SRC_STRING_MEMORY_UTILS_MEMCPY_IMPLEMENTATIONS_H diff --git a/libc/src/string/memory_utils/memset_utils.h b/libc/src/string/memory_utils/memset_implementations.h similarity index 53% rename from libc/src/string/memory_utils/memset_utils.h rename to libc/src/string/memory_utils/memset_implementations.h index 5b955a3e30b1e..e34b13a872fca 100644 --- a/libc/src/string/memory_utils/memset_utils.h +++ b/libc/src/string/memory_utils/memset_implementations.h @@ -1,4 +1,4 @@ -//===-- Memset utils --------------------------------------------*- C++ -*-===// +//===-- Implementation of memset and bzero --------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -6,9 +6,10 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_LIBC_SRC_STRING_MEMORY_UTILS_MEMSET_UTILS_H -#define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_MEMSET_UTILS_H +#ifndef LLVM_LIBC_SRC_STRING_MEMORY_UTILS_MEMSET_IMPLEMENTATIONS_H +#define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_MEMSET_IMPLEMENTATIONS_H +#include "src/__support/architectures.h" #include "src/string/memory_utils/elements.h" #include "src/string/memory_utils/utils.h" @@ -47,13 +48,65 @@ namespace __llvm_libc { // advance. SetAlignedBlocks<64> may waste up to 63 Bytes, SetAlignedBlocks<32> // may waste up to 31 Bytes. Benchmarks showed that SetAlignedBlocks<64> was not // superior for sizes that mattered. -inline static void GeneralPurposeMemset(char *dst, unsigned char value, - size_t count) { -#if defined(__i386__) || defined(__x86_64__) - using namespace ::__llvm_libc::x86; +inline static void inline_memset(char *dst, unsigned char value, size_t count) { +#if defined(LLVM_LIBC_ARCH_X86) + ///////////////////////////////////////////////////////////////////////////// + // LLVM_LIBC_ARCH_X86 + ///////////////////////////////////////////////////////////////////////////// + using namespace __llvm_libc::x86; + if (count == 0) + return; + if (count == 1) + return SplatSet<_1>(dst, value); + if (count == 2) + return SplatSet<_2>(dst, value); + if (count == 3) + return SplatSet<_3>(dst, value); + if (count <= 8) + return SplatSet>(dst, value, count); + if (count <= 16) + return SplatSet>(dst, value, count); + if (count <= 32) + return SplatSet>(dst, value, count); + if (count <= 64) + return SplatSet>(dst, value, count); + if (count <= 128) + return SplatSet>(dst, value, count); + return SplatSet::Then>>(dst, value, count); +#elif defined(LLVM_LIBC_ARCH_AARCH64) + ///////////////////////////////////////////////////////////////////////////// + // LLVM_LIBC_ARCH_AARCH64 + ///////////////////////////////////////////////////////////////////////////// + using namespace __llvm_libc::aarch64_memset; + if (count == 0) + return; + if (count <= 3) { + SplatSet<_1>(dst, value); + if (count > 1) + SplatSet>(dst, value, count); + return; + } + if (count <= 8) + return SplatSet>(dst, value, count); + if (count <= 16) + return SplatSet>(dst, value, count); + if (count <= 32) + return SplatSet>(dst, value, count); + if (count <= 96) { + SplatSet<_32>(dst, value); + if (count <= 64) + return SplatSet>(dst, value, count); + SplatSet::Then<_32>>(dst, value); + SplatSet>(dst, value, count); + return; + } + if (count < 448 || value != 0 || !AArch64ZVA(dst, count)) + return SplatSet::Then>>(dst, value, count); #else + ///////////////////////////////////////////////////////////////////////////// + // Default + ///////////////////////////////////////////////////////////////////////////// using namespace ::__llvm_libc::scalar; -#endif if (count == 0) return; @@ -74,8 +127,9 @@ inline static void GeneralPurposeMemset(char *dst, unsigned char value, if (count <= 128) return SplatSet>(dst, value, count); return SplatSet::Then>>(dst, value, count); +#endif } } // namespace __llvm_libc -#endif // LLVM_LIBC_SRC_STRING_MEMORY_UTILS_MEMSET_UTILS_H +#endif // LLVM_LIBC_SRC_STRING_MEMORY_UTILS_MEMSET_IMPLEMENTATIONS_H diff --git a/libc/src/string/memory_utils/utils.h b/libc/src/string/memory_utils/utils.h index d6047e1c482fe..f23a3240fde72 100644 --- a/libc/src/string/memory_utils/utils.h +++ b/libc/src/string/memory_utils/utils.h @@ -9,19 +9,13 @@ #ifndef LLVM_LIBC_SRC_MEMORY_UTILS_UTILS_H #define LLVM_LIBC_SRC_MEMORY_UTILS_UTILS_H +#include "src/__support/architectures.h" + // Cache line sizes for ARM: These values are not strictly correct since // cache line sizes depend on implementations, not architectures. There // are even implementations with cache line sizes configurable at boot // time. -#if defined(__aarch64__) -#define LLVM_LIBC_CACHELINE_SIZE 64 -#elif defined(__ARM_ARCH_5T__) -#define LLVM_LIBC_CACHELINE_SIZE 32 -#elif defined(__ARM_ARCH_7A__) -#define LLVM_LIBC_CACHELINE_SIZE 64 -#elif defined(__PPC64__) -#define LLVM_LIBC_CACHELINE_SIZE 128 -#elif defined(__i386__) || defined(__x86_64__) +#if defined(LLVM_LIBC_ARCH_AARCH64) || defined(LLVM_LIBC_ARCH_X86) #define LLVM_LIBC_CACHELINE_SIZE 64 #else #error "Unsupported platform for memory functions." diff --git a/libc/src/string/memset.cpp b/libc/src/string/memset.cpp index 945aeda234e65..549c0742dec75 100644 --- a/libc/src/string/memset.cpp +++ b/libc/src/string/memset.cpp @@ -8,13 +8,13 @@ #include "src/string/memset.h" #include "src/__support/common.h" -#include "src/string/memory_utils/memset_utils.h" +#include "src/string/memory_utils/memset_implementations.h" namespace __llvm_libc { LLVM_LIBC_FUNCTION(void *, memset, (void *dst, int value, size_t count)) { - GeneralPurposeMemset(reinterpret_cast(dst), - static_cast(value), count); + inline_memset(reinterpret_cast(dst), + static_cast(value), count); return dst; } diff --git a/libc/src/string/stpcpy.cpp b/libc/src/string/stpcpy.cpp new file mode 100644 index 0000000000000..dd48f3cfb5e88 --- /dev/null +++ b/libc/src/string/stpcpy.cpp @@ -0,0 +1,29 @@ +//===-- Implementation of stpcpy ------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/string/stpcpy.h" +#include "src/string/mempcpy.h" +#include "src/string/string_utils.h" + +#include "src/__support/common.h" +#include "src/__support/sanitizer.h" + +namespace __llvm_libc { + +LLVM_LIBC_FUNCTION(char *, stpcpy, + (char *__restrict dest, const char *__restrict src)) { + size_t size = internal::string_length(src) + 1; + char *result = + reinterpret_cast(__llvm_libc::mempcpy(dest, src, size)); + + if (result != nullptr) + return result - 1; + return nullptr; +} + +} // namespace __llvm_libc diff --git a/libc/src/string/stpcpy.h b/libc/src/string/stpcpy.h new file mode 100644 index 0000000000000..84d5738f0e2ba --- /dev/null +++ b/libc/src/string/stpcpy.h @@ -0,0 +1,18 @@ +//===-- Implementation header for stpcpy ------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_STRING_STPCPY_H +#define LLVM_LIBC_SRC_STRING_STPCPY_H + +namespace __llvm_libc { + +char *stpcpy(char *__restrict dest, const char *__restrict src); + +} // namespace __llvm_libc + +#endif // LLVM_LIBC_SRC_STRING_STPCPY_H diff --git a/libc/src/string/stpncpy.cpp b/libc/src/string/stpncpy.cpp new file mode 100644 index 0000000000000..374330ede2ca8 --- /dev/null +++ b/libc/src/string/stpncpy.cpp @@ -0,0 +1,29 @@ +//===-- Implementation of stpncpy -----------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/string/stpncpy.h" +#include "src/string/bzero.h" + +#include "src/__support/common.h" + +namespace __llvm_libc { + +LLVM_LIBC_FUNCTION(char *, stpncpy, + (char *__restrict dest, const char *__restrict src, + size_t n)) { + size_t i; + // Copy up until \0 is found. + for (i = 0; i < n && src[i] != '\0'; ++i) + dest[i] = src[i]; + // When n>strlen(src), n-strlen(src) \0 are appended. + if (n > i) + __llvm_libc::bzero(dest + i, n - i); + return dest + i; +} + +} // namespace __llvm_libc diff --git a/libc/src/string/stpncpy.h b/libc/src/string/stpncpy.h new file mode 100644 index 0000000000000..6320a290b1dd2 --- /dev/null +++ b/libc/src/string/stpncpy.h @@ -0,0 +1,20 @@ +//===-- Implementation header for stpncpy -----------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_STRING_STPNCPY_H +#define LLVM_LIBC_SRC_STRING_STPNCPY_H + +#include + +namespace __llvm_libc { + +char *stpncpy(char *__restrict dest, const char *__restrict src, size_t n); + +} // namespace __llvm_libc + +#endif // LLVM_LIBC_SRC_STRING_STPNCPY_H diff --git a/libc/src/string/strndup.cpp b/libc/src/string/strndup.cpp new file mode 100644 index 0000000000000..6c904f4a646ca --- /dev/null +++ b/libc/src/string/strndup.cpp @@ -0,0 +1,35 @@ +//===-- Implementation of strndup -----------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/string/strndup.h" +#include "src/string/memcpy.h" +#include "src/string/string_utils.h" + +#include "src/__support/common.h" + +#include +#include + +namespace __llvm_libc { + +LLVM_LIBC_FUNCTION(char *, strndup, (const char *src, size_t size)) { + if (src == nullptr) + return nullptr; + size_t len = internal::string_length(src); + if (len > size) + len = size; + char *dest = reinterpret_cast(::malloc(len + 1)); // NOLINT + if (dest == nullptr) + return nullptr; + char *result = + reinterpret_cast(__llvm_libc::memcpy(dest, src, len + 1)); + result[len] = '\0'; + return result; +} + +} // namespace __llvm_libc diff --git a/libc/src/string/strndup.h b/libc/src/string/strndup.h new file mode 100644 index 0000000000000..0a593a709aa97 --- /dev/null +++ b/libc/src/string/strndup.h @@ -0,0 +1,20 @@ +//===-- Implementation header for strndup -----------------------*- C++-*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_STRING_STRNDUP_H +#define LLVM_LIBC_SRC_STRING_STRNDUP_H + +#include + +namespace __llvm_libc { + +char *strndup(const char *src, size_t size); + +} // namespace __llvm_libc + +#endif // LLVM_LIBC_SRC_STRING_STRNDUP_H diff --git a/libc/src/string/x86_64/memcpy.cpp b/libc/src/string/x86_64/memcpy.cpp deleted file mode 100644 index 7f6e5b64b3a74..0000000000000 --- a/libc/src/string/x86_64/memcpy.cpp +++ /dev/null @@ -1,109 +0,0 @@ -//===-- Implementation of memcpy ------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "src/string/memcpy.h" -#include "src/__support/common.h" -#include "src/string/memory_utils/elements.h" - -namespace __llvm_libc { - -// Whether to use only rep;movsb. -constexpr bool kUseOnlyRepMovsb = - LLVM_LIBC_IS_DEFINED(LLVM_LIBC_MEMCPY_X86_USE_ONLY_REPMOVSB); - -// kRepMovsBSize == -1 : Only CopyAligned is used. -// kRepMovsBSize == 0 : Only RepMovsb is used. -// else CopyAligned is used up to kRepMovsBSize and then RepMovsb. -constexpr size_t kRepMovsBSize = -#ifdef LLVM_LIBC_MEMCPY_X86_USE_REPMOVSB_FROM_SIZE - LLVM_LIBC_MEMCPY_X86_USE_REPMOVSB_FROM_SIZE; -#else - -1; -#endif // LLVM_LIBC_MEMCPY_X86_USE_REPMOVSB_FROM_SIZE - -// Whether target supports AVX instructions. -constexpr bool kHasAvx = LLVM_LIBC_IS_DEFINED(__AVX__); - -#ifdef __AVX__ -using LoopBlockSize = __llvm_libc::x86::_64; -#else -using LoopBlockSize = __llvm_libc::x86::_32; -#endif - -static void CopyRepMovsb(char *__restrict dst, const char *__restrict src, - size_t count) { - // FIXME: Add MSVC support with - // #include - // __movsb(reinterpret_cast(dst), - // reinterpret_cast(src), count); - asm volatile("rep movsb" : "+D"(dst), "+S"(src), "+c"(count) : : "memory"); -} - -// Design rationale -// ================ -// -// Using a profiler to observe size distributions for calls into libc -// functions, it was found most operations act on a small number of bytes. -// This makes it important to favor small sizes. -// -// The tests for `count` are in ascending order so the cost of branching is -// proportional to the cost of copying. -// -// The function is written in C++ for several reasons: -// - The compiler can __see__ the code, this is useful when performing Profile -// Guided Optimization as the optimized code can take advantage of branching -// probabilities. -// - It also allows for easier customization and favors testing multiple -// implementation parameters. -// - As compilers and processors get better, the generated code is improved -// with little change on the code side. -static void memcpy_x86(char *__restrict dst, const char *__restrict src, - size_t count) { - // Use x86 strategies (_1, _2, _3 ...) - using namespace __llvm_libc::x86; - - if (kUseOnlyRepMovsb) - return CopyRepMovsb(dst, src, count); - - if (count == 0) - return; - if (count == 1) - return Copy<_1>(dst, src); - if (count == 2) - return Copy<_2>(dst, src); - if (count == 3) - return Copy<_3>(dst, src); - if (count == 4) - return Copy<_4>(dst, src); - if (count < 8) - return Copy>(dst, src, count); - if (count < 16) - return Copy>(dst, src, count); - if (count < 32) - return Copy>(dst, src, count); - if (count < 64) - return Copy>(dst, src, count); - if (count < 128) - return Copy>(dst, src, count); - if (kHasAvx && count < 256) - return Copy>(dst, src, count); - if (count <= kRepMovsBSize) - return Copy::Then>>(dst, src, - count); - return CopyRepMovsb(dst, src, count); -} - -LLVM_LIBC_FUNCTION(void *, memcpy, - (void *__restrict dst, const void *__restrict src, - size_t size)) { - memcpy_x86(reinterpret_cast(dst), reinterpret_cast(src), - size); - return dst; -} - -} // namespace __llvm_libc diff --git a/libc/test/src/__support/CMakeLists.txt b/libc/test/src/__support/CMakeLists.txt index 85b819c23ab60..d2367ef52f692 100644 --- a/libc/test/src/__support/CMakeLists.txt +++ b/libc/test/src/__support/CMakeLists.txt @@ -27,7 +27,7 @@ add_libc_unittest( SRCS str_to_float_test.cpp DEPENDS - libc.src.__support.str_conv_utils + libc.src.__support.str_to_float ) add_executable( diff --git a/libc/test/src/fenv/enabled_exceptions_test.cpp b/libc/test/src/fenv/enabled_exceptions_test.cpp index 7b91e98b4e94d..bf04b6418eb45 100644 --- a/libc/test/src/fenv/enabled_exceptions_test.cpp +++ b/libc/test/src/fenv/enabled_exceptions_test.cpp @@ -11,6 +11,7 @@ #include "src/fenv/fetestexcept.h" #include "src/__support/FPUtil/FEnvUtils.h" +#include "src/__support/architectures.h" #include "utils/UnitTest/FPExceptMatcher.h" #include "utils/UnitTest/Test.h" @@ -20,7 +21,7 @@ // This test enables an exception and verifies that raising that exception // triggers SIGFPE. TEST(LlvmLibcExceptionStatusTest, RaiseAndCrash) { -#ifdef __aarch64__ +#if defined(LLVM_LIBC_ARCH_AARCH64) // Few aarch64 HW implementations do not trap exceptions. We skip this test // completely on such HW. // @@ -32,7 +33,7 @@ TEST(LlvmLibcExceptionStatusTest, RaiseAndCrash) { __llvm_libc::fputil::enableExcept(FE_DIVBYZERO); if (__llvm_libc::fputil::getExcept() == 0) return; -#endif +#endif // defined(LLVM_LIBC_ARCH_AARCH64) // TODO: Install a floating point exception handler and verify that the // the expected exception was raised. One will have to longjmp back from diff --git a/libc/test/src/fenv/feenableexcept_test.cpp b/libc/test/src/fenv/feenableexcept_test.cpp index 2158f954bcd2b..f1cb8a32ea677 100644 --- a/libc/test/src/fenv/feenableexcept_test.cpp +++ b/libc/test/src/fenv/feenableexcept_test.cpp @@ -6,6 +6,7 @@ // //===----------------------------------------------------------------------===// +#include "src/__support/architectures.h" #include "src/fenv/fedisableexcept.h" #include "src/fenv/feenableexcept.h" #include "src/fenv/fegetexcept.h" @@ -15,7 +16,7 @@ #include TEST(LlvmLibcFEnvTest, EnableTest) { -#ifdef __aarch64__ +#if defined(LLVM_LIBC_ARCH_AARCH64) // Few aarch64 HW implementations do not trap exceptions. We skip this test // completely on such HW. // @@ -27,7 +28,7 @@ TEST(LlvmLibcFEnvTest, EnableTest) { __llvm_libc::feenableexcept(FE_DIVBYZERO); if (__llvm_libc::fegetexcept() == 0) return; -#endif +#endif // defined(LLVM_LIBC_ARCH_AARCH64) int excepts[] = {FE_DIVBYZERO, FE_INVALID, FE_INEXACT, FE_OVERFLOW, FE_UNDERFLOW}; diff --git a/libc/test/src/fenv/feholdexcept_test.cpp b/libc/test/src/fenv/feholdexcept_test.cpp index be836a4c23ce6..6bdea80f40373 100644 --- a/libc/test/src/fenv/feholdexcept_test.cpp +++ b/libc/test/src/fenv/feholdexcept_test.cpp @@ -9,13 +9,14 @@ #include "src/fenv/feholdexcept.h" #include "src/__support/FPUtil/FEnvUtils.h" +#include "src/__support/architectures.h" #include "utils/UnitTest/FPExceptMatcher.h" #include "utils/UnitTest/Test.h" #include TEST(LlvmLibcFEnvTest, RaiseAndCrash) { -#ifdef __aarch64__ +#if defined(LLVM_LIBC_ARCH_AARCH64) // Few aarch64 HW implementations do not trap exceptions. We skip this test // completely on such HW. // @@ -27,7 +28,7 @@ TEST(LlvmLibcFEnvTest, RaiseAndCrash) { __llvm_libc::fputil::enableExcept(FE_DIVBYZERO); if (__llvm_libc::fputil::getExcept() == 0) return; -#endif +#endif // defined(LLVM_LIBC_ARCH_AARCH64) int excepts[] = {FE_DIVBYZERO, FE_INVALID, FE_INEXACT, FE_OVERFLOW, FE_UNDERFLOW}; diff --git a/libc/test/src/string/CMakeLists.txt b/libc/test/src/string/CMakeLists.txt index 8154aaa225a2e..6713bd73b7242 100644 --- a/libc/test/src/string/CMakeLists.txt +++ b/libc/test/src/string/CMakeLists.txt @@ -42,6 +42,26 @@ add_libc_unittest( libc.src.string.memrchr ) +add_libc_unittest( + stpcpy_test + SUITE + libc_string_unittests + SRCS + stpcpy_test.cpp + DEPENDS + libc.src.string.stpcpy +) + +add_libc_unittest( + stpncpy_test + SUITE + libc_string_unittests + SRCS + stpncpy_test.cpp + DEPENDS + libc.src.string.stpncpy +) + add_libc_unittest( strcat_test SUITE @@ -143,6 +163,17 @@ add_libc_unittest( libc.src.string.strncpy ) +add_libc_unittest( + strndup_test + SUITE + libc_string_unittests + SRCS + strndup_test.cpp + DEPENDS + libc.include.stdlib + libc.src.string.strndup +) + add_libc_unittest( strnlen_test SUITE diff --git a/libc/test/src/string/stpcpy_test.cpp b/libc/test/src/string/stpcpy_test.cpp new file mode 100644 index 0000000000000..90ec5311b9b91 --- /dev/null +++ b/libc/test/src/string/stpcpy_test.cpp @@ -0,0 +1,45 @@ +//===-- Unittests for stpcpy ----------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/string/stpcpy.h" +#include "utils/UnitTest/Test.h" + +#include "src/string/string_utils.h" + +TEST(LlvmLibcStpCpyTest, EmptySrc) { + const char *empty = ""; + size_t srcSize = __llvm_libc::internal::string_length(empty); + char dest[4] = {'a', 'b', 'c', '\0'}; + + char *result = __llvm_libc::stpcpy(dest, empty); + ASSERT_EQ(dest + srcSize, result); + ASSERT_EQ(result[0], '\0'); + ASSERT_STREQ(dest, empty); +} + +TEST(LlvmLibcStpCpyTest, EmptyDest) { + const char *abc = "abc"; + size_t srcSize = __llvm_libc::internal::string_length(abc); + char dest[4]; + + char *result = __llvm_libc::stpcpy(dest, abc); + ASSERT_EQ(dest + srcSize, result); + ASSERT_EQ(result[0], '\0'); + ASSERT_STREQ(dest, abc); +} + +TEST(LlvmLibcStpCpyTest, OffsetDest) { + const char *abc = "abc"; + size_t srcSize = __llvm_libc::internal::string_length(abc); + char dest[7] = {'x', 'y', 'z'}; + + char *result = __llvm_libc::stpcpy(dest + 3, abc); + ASSERT_EQ(dest + 3 + srcSize, result); + ASSERT_EQ(result[0], '\0'); + ASSERT_STREQ(dest, "xyzabc"); +} diff --git a/libc/test/src/string/stpncpy_test.cpp b/libc/test/src/string/stpncpy_test.cpp new file mode 100644 index 0000000000000..855be58941cec --- /dev/null +++ b/libc/test/src/string/stpncpy_test.cpp @@ -0,0 +1,73 @@ +//===-- Unittests for stpncpy ---------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/__support/CPP/ArrayRef.h" +#include "src/string/stpncpy.h" +#include "utils/UnitTest/Test.h" +#include // For size_t. + +class LlvmLibcStpncpyTest : public __llvm_libc::testing::Test { +public: + void check_stpncpy(__llvm_libc::cpp::MutableArrayRef dst, + const __llvm_libc::cpp::ArrayRef src, size_t n, + const __llvm_libc::cpp::ArrayRef expected, + size_t expectedCopied) { + // Making sure we don't overflow buffer. + ASSERT_GE(dst.size(), n); + // Making sure stpncpy returns a pointer to the end of dst. + ASSERT_EQ(__llvm_libc::stpncpy(dst.data(), src.data(), n), + dst.data() + expectedCopied); + // Expected must be of the same size as dst. + ASSERT_EQ(dst.size(), expected.size()); + // Expected and dst are the same. + for (size_t i = 0; i < expected.size(); ++i) + ASSERT_EQ(expected[i], dst[i]); + } +}; + +TEST_F(LlvmLibcStpncpyTest, Untouched) { + char dst[] = {'a', 'b'}; + const char src[] = {'x', '\0'}; + const char expected[] = {'a', 'b'}; + check_stpncpy(dst, src, 0, expected, 0); +} + +TEST_F(LlvmLibcStpncpyTest, CopyOne) { + char dst[] = {'a', 'b'}; + const char src[] = {'x', 'y'}; + const char expected[] = {'x', 'b'}; // no \0 is appended + check_stpncpy(dst, src, 1, expected, 1); +} + +TEST_F(LlvmLibcStpncpyTest, CopyNull) { + char dst[] = {'a', 'b'}; + const char src[] = {'\0', 'y'}; + const char expected[] = {'\0', 'b'}; + check_stpncpy(dst, src, 1, expected, 0); +} + +TEST_F(LlvmLibcStpncpyTest, CopyPastSrc) { + char dst[] = {'a', 'b'}; + const char src[] = {'\0', 'y'}; + const char expected[] = {'\0', '\0'}; + check_stpncpy(dst, src, 2, expected, 0); +} + +TEST_F(LlvmLibcStpncpyTest, CopyTwoNoNull) { + char dst[] = {'a', 'b'}; + const char src[] = {'x', 'y'}; + const char expected[] = {'x', 'y'}; + check_stpncpy(dst, src, 2, expected, 2); +} + +TEST_F(LlvmLibcStpncpyTest, CopyTwoWithNull) { + char dst[] = {'a', 'b'}; + const char src[] = {'x', '\0'}; + const char expected[] = {'x', '\0'}; + check_stpncpy(dst, src, 2, expected, 1); +} diff --git a/libc/test/src/string/strcpy_test.cpp b/libc/test/src/string/strcpy_test.cpp index 14fa86c994d90..1b6c47741b17f 100644 --- a/libc/test/src/string/strcpy_test.cpp +++ b/libc/test/src/string/strcpy_test.cpp @@ -9,6 +9,16 @@ #include "src/string/strcpy.h" #include "utils/UnitTest/Test.h" +TEST(LlvmLibcStrCpyTest, EmptySrc) { + const char *empty = ""; + char dest[4] = {'a', 'b', 'c', '\0'}; + + char *result = __llvm_libc::strcpy(dest, empty); + ASSERT_EQ(dest, result); + ASSERT_STREQ(dest, result); + ASSERT_STREQ(dest, empty); +} + TEST(LlvmLibcStrCpyTest, EmptyDest) { const char *abc = "abc"; char dest[4]; diff --git a/libc/test/src/string/strndup_test.cpp b/libc/test/src/string/strndup_test.cpp new file mode 100644 index 0000000000000..0a17a7285b6d8 --- /dev/null +++ b/libc/test/src/string/strndup_test.cpp @@ -0,0 +1,52 @@ +//===-- Unittests for strndup ---------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/string/strndup.h" +#include "utils/UnitTest/Test.h" +#include + +TEST(LlvmLibcstrndupTest, EmptyString) { + const char *empty = ""; + + char *result = __llvm_libc::strndup(empty, 1); + ASSERT_NE(result, static_cast(nullptr)); + ASSERT_NE(empty, const_cast(result)); + ASSERT_STREQ(empty, result); + ::free(result); +} + +TEST(LlvmLibcstrndupTest, AnyString) { + const char *abc = "abc"; + + char *result = __llvm_libc::strndup(abc, 3); + + ASSERT_NE(result, static_cast(nullptr)); + ASSERT_NE(abc, const_cast(result)); + ASSERT_STREQ(abc, result); + ::free(result); + + result = __llvm_libc::strndup(abc, 1); + + ASSERT_NE(result, static_cast(nullptr)); + ASSERT_NE(abc, const_cast(result)); + ASSERT_STREQ("a", result); + ::free(result); + + result = __llvm_libc::strndup(abc, 10); + + ASSERT_NE(result, static_cast(nullptr)); + ASSERT_NE(abc, const_cast(result)); + ASSERT_STREQ(abc, result); + ::free(result); +} + +TEST(LlvmLibcstrndupTest, NullPtr) { + char *result = __llvm_libc::strndup(nullptr, 0); + + ASSERT_EQ(result, static_cast(nullptr)); +} diff --git a/libc/utils/MPFRWrapper/MPFRUtils.cpp b/libc/utils/MPFRWrapper/MPFRUtils.cpp index 9a2a125d301b7..9ae0ce6ff8097 100644 --- a/libc/utils/MPFRWrapper/MPFRUtils.cpp +++ b/libc/utils/MPFRWrapper/MPFRUtils.cpp @@ -10,6 +10,7 @@ #include "src/__support/CPP/StringView.h" #include "src/__support/FPUtil/FPBits.h" +#include "src/__support/architectures.h" #include "utils/UnitTest/FPMatcher.h" #include @@ -44,7 +45,7 @@ template <> struct Precision { static constexpr unsigned int value = 53; }; -#if !(defined(__x86_64__) || defined(__i386__)) +#if !(defined(LLVM_LIBC_ARCH_X86)) template <> struct Precision { static constexpr unsigned int value = 64; }; @@ -100,9 +101,7 @@ class MPFRNumber { mpfr_set(value, other.value, MPFR_RNDN); } - ~MPFRNumber() { - mpfr_clear(value); - } + ~MPFRNumber() { mpfr_clear(value); } MPFRNumber &operator=(const MPFRNumber &rhs) { mpfrPrecision = rhs.mpfrPrecision; diff --git a/libcxx/docs/ReleaseNotes.rst b/libcxx/docs/ReleaseNotes.rst index d2713a9bfe4bf..5f81b3ea8496c 100644 --- a/libcxx/docs/ReleaseNotes.rst +++ b/libcxx/docs/ReleaseNotes.rst @@ -78,6 +78,10 @@ API Changes exceeds the maximum supported size, as required by the C++ standard. Previously the type ``std::length_error`` was used. +- Removed the nonstandard methods ``std::chrono::file_clock::to_time_t`` and + ``std::chrono::file_clock::from_time_t``; neither libstdc++ nor MSVC STL + had such methods. + ABI Changes ----------- diff --git a/libcxx/docs/Status/Cxx20Issues.csv b/libcxx/docs/Status/Cxx20Issues.csv index 06bf8cd5caf93..4f80bae0ef1b5 100644 --- a/libcxx/docs/Status/Cxx20Issues.csv +++ b/libcxx/docs/Status/Cxx20Issues.csv @@ -276,7 +276,7 @@ "`3367 `__","Integer-class conversions should not throw","Prague","","" "`3369 `__","``span``\ 's deduction-guide for built-in arrays doesn't work","Prague","|Complete|","14.0" "`3371 `__","``visit_format_arg``\ and ``make_format_args``\ are not hidden friends","Prague","|Complete|","14.0","|format|" -"`3372 `__","``vformat_to``\ should not try to deduce ``Out``\ twice","Prague","Prague","|Complete|","14.0","|format|" +"`3372 `__","``vformat_to``\ should not try to deduce ``Out``\ twice","Prague","|Complete|","14.0","|format|" "`3373 `__","``{to,from}_chars_result``\ and ``format_to_n_result``\ need the ""we really mean what we say"" wording","Prague","","","|format|" "`3374 `__","P0653 + P1006 should have made the other ``std::to_address``\ overload ``constexpr``\ ","Prague","|Complete|","12.0" "`3375 `__","``decay``\ in ``viewable_range``\ should be ``remove_cvref``\ ","Prague","","","|ranges|" diff --git a/libcxx/docs/index.rst b/libcxx/docs/index.rst index 911ee2e3998f4..c1716566bff8d 100644 --- a/libcxx/docs/index.rst +++ b/libcxx/docs/index.rst @@ -105,6 +105,7 @@ Compiler Versions Restrictions Support policy ============ =============== ========================== ===================== Clang 12, 13 latest two stable releases per `LLVM's release page `_ AppleClang 12 latest stable release per `Xcode's release page `_ +Open XL 17.1 (AIX) latest stable release per `Open XL's documentation page `_ GCC 11 In C++11 or later only latest stable release per `GCC's release page `_ ============ =============== ========================== ===================== @@ -117,6 +118,7 @@ macOS 10.9+ i386, x86_64, arm64 Building the shared library itself req FreeBSD 10+ i386, x86_64, arm Linux i386, x86_64, arm, arm64 Windows x86_64 +AIX powerpc, powerpc64 =============== ========================= ============================ Generally speaking, libc++ should work on any platform that provides a fairly complete diff --git a/libcxx/include/__threading_support b/libcxx/include/__threading_support index 4d867167c2b16..2242a69085298 100644 --- a/libcxx/include/__threading_support +++ b/libcxx/include/__threading_support @@ -29,16 +29,9 @@ # include <__external_threading> #elif !defined(_LIBCPP_HAS_NO_THREADS) -#if defined(__APPLE__) || defined(__MVS__) -# define _LIBCPP_NO_NATIVE_SEMAPHORES -#endif - #if defined(_LIBCPP_HAS_THREAD_API_PTHREAD) # include # include -# ifndef _LIBCPP_NO_NATIVE_SEMAPHORES -# include -# endif #elif defined(_LIBCPP_HAS_THREAD_API_C11) # include #endif @@ -78,12 +71,6 @@ typedef pthread_mutex_t __libcpp_recursive_mutex_t; typedef pthread_cond_t __libcpp_condvar_t; #define _LIBCPP_CONDVAR_INITIALIZER PTHREAD_COND_INITIALIZER -#ifndef _LIBCPP_NO_NATIVE_SEMAPHORES -// Semaphore -typedef sem_t __libcpp_semaphore_t; -# define _LIBCPP_SEMAPHORE_MAX SEM_VALUE_MAX -#endif - // Execute once typedef pthread_once_t __libcpp_exec_once_flag; #define _LIBCPP_EXEC_ONCE_INITIALIZER PTHREAD_ONCE_INIT @@ -149,12 +136,6 @@ typedef void* __libcpp_recursive_mutex_t[5]; typedef void* __libcpp_condvar_t; #define _LIBCPP_CONDVAR_INITIALIZER 0 -// Semaphore -typedef void* __libcpp_semaphore_t; -#if defined(_LIBCPP_HAS_THREAD_API_WIN32) -# define _LIBCPP_SEMAPHORE_MAX (::std::numeric_limits::max()) -#endif - // Execute Once typedef void* __libcpp_exec_once_flag; #define _LIBCPP_EXEC_ONCE_INITIALIZER 0 @@ -219,26 +200,6 @@ int __libcpp_condvar_timedwait(__libcpp_condvar_t *__cv, __libcpp_mutex_t *__m, _LIBCPP_THREAD_ABI_VISIBILITY int __libcpp_condvar_destroy(__libcpp_condvar_t* __cv); -#ifndef _LIBCPP_NO_NATIVE_SEMAPHORES - -// Semaphore -_LIBCPP_THREAD_ABI_VISIBILITY -bool __libcpp_semaphore_init(__libcpp_semaphore_t* __sem, int __init); - -_LIBCPP_THREAD_ABI_VISIBILITY -bool __libcpp_semaphore_destroy(__libcpp_semaphore_t* __sem); - -_LIBCPP_THREAD_ABI_VISIBILITY -bool __libcpp_semaphore_post(__libcpp_semaphore_t* __sem); - -_LIBCPP_THREAD_ABI_VISIBILITY -bool __libcpp_semaphore_wait(__libcpp_semaphore_t* __sem); - -_LIBCPP_THREAD_ABI_VISIBILITY -bool __libcpp_semaphore_wait_timed(__libcpp_semaphore_t* __sem, chrono::nanoseconds const& __ns); - -#endif // _LIBCPP_NO_NATIVE_SEMAPHORES - // Execute once _LIBCPP_THREAD_ABI_VISIBILITY int __libcpp_execute_once(__libcpp_exec_once_flag *flag, @@ -452,38 +413,6 @@ int __libcpp_condvar_destroy(__libcpp_condvar_t *__cv) return pthread_cond_destroy(__cv); } -#ifndef _LIBCPP_NO_NATIVE_SEMAPHORES - -// Semaphore -bool __libcpp_semaphore_init(__libcpp_semaphore_t* __sem, int __init) -{ - return sem_init(__sem, 0, __init) == 0; -} - -bool __libcpp_semaphore_destroy(__libcpp_semaphore_t* __sem) -{ - return sem_destroy(__sem) == 0; -} - -bool __libcpp_semaphore_post(__libcpp_semaphore_t* __sem) -{ - return sem_post(__sem) == 0; -} - -bool __libcpp_semaphore_wait(__libcpp_semaphore_t* __sem) -{ - return sem_wait(__sem) == 0; -} - -bool __libcpp_semaphore_wait_timed(__libcpp_semaphore_t* __sem, chrono::nanoseconds const& __ns) -{ - auto const __abs_time = chrono::system_clock::now().time_since_epoch() + __ns; - __libcpp_timespec_t __ts = __thread_detail::__convert_to_timespec(__abs_time); - return sem_timedwait(__sem, &__ts) == 0; -} - -#endif //_LIBCPP_NO_NATIVE_SEMAPHORES - // Execute once int __libcpp_execute_once(__libcpp_exec_once_flag *flag, void (*init_routine)()) { diff --git a/libcxx/include/chrono b/libcxx/include/chrono index d05b3dbe37557..ec510a99b5831 100644 --- a/libcxx/include/chrono +++ b/libcxx/include/chrono @@ -2798,19 +2798,6 @@ struct _FilesystemClock { static _LIBCPP_CONSTEXPR_AFTER_CXX11 const bool is_steady = false; _LIBCPP_AVAILABILITY_FILESYSTEM _LIBCPP_FUNC_VIS static time_point now() noexcept; - - _LIBCPP_INLINE_VISIBILITY - static time_t to_time_t(const time_point& __t) noexcept { - typedef chrono::duration __secs; - return time_t( - chrono::duration_cast<__secs>(__t.time_since_epoch()).count()); - } - - _LIBCPP_INLINE_VISIBILITY - static time_point from_time_t(time_t __t) noexcept { - typedef chrono::duration __secs; - return time_point(__secs(__t)); - } }; _LIBCPP_END_NAMESPACE_FILESYSTEM #endif // !_LIBCPP_CXX03_LANG diff --git a/libcxx/include/locale b/libcxx/include/locale index c20e15f3e750b..2d37521de9a86 100644 --- a/libcxx/include/locale +++ b/libcxx/include/locale @@ -1360,6 +1360,18 @@ protected: long double __v) const; virtual iter_type do_put(iter_type __s, ios_base& __iob, char_type __fl, const void* __v) const; + + template + _LIBCPP_HIDE_FROM_ABI inline + _OutputIterator __do_put_integral(iter_type __s, ios_base& __iob, + char_type __fl, _Integral __v, + char const* __len) const; + + template + _LIBCPP_HIDE_FROM_ABI inline + _OutputIterator __do_put_floating_point(iter_type __s, ios_base& __iob, + char_type __fl, _Float __v, + char const* __len) const; }; template @@ -1456,19 +1468,22 @@ num_put<_CharT, _OutputIterator>::do_put(iter_type __s, ios_base& __iob, } template +template +_LIBCPP_HIDE_FROM_ABI inline _OutputIterator -num_put<_CharT, _OutputIterator>::do_put(iter_type __s, ios_base& __iob, - char_type __fl, long __v) const +num_put<_CharT, _OutputIterator>::__do_put_integral(iter_type __s, ios_base& __iob, + char_type __fl, _Integral __v, + char const* __len) const { // Stage 1 - Get number in narrow char char __fmt[6] = {'%', 0}; - const char* __len = "l"; - this->__format_int(__fmt+1, __len, true, __iob.flags()); + this->__format_int(__fmt+1, __len, is_signed<_Integral>::value, __iob.flags()); // Worst case is octal, with showbase enabled. Note that octal is always // printed as an unsigned value. + using _Unsigned = typename make_unsigned<_Integral>::type; _LIBCPP_CONSTEXPR const unsigned __nbuf - = (numeric_limits::digits / 3) // 1 char per 3 bits - + ((numeric_limits::digits % 3) != 0) // round up + = (numeric_limits<_Unsigned>::digits / 3) // 1 char per 3 bits + + ((numeric_limits<_Unsigned>::digits % 3) != 0) // round up + 2; // base prefix + terminating null character char __nar[__nbuf]; int __nc = __libcpp_snprintf_l(__nar, sizeof(__nar), _LIBCPP_GET_C_LOCALE, __fmt, __v); @@ -1484,33 +1499,20 @@ num_put<_CharT, _OutputIterator>::do_put(iter_type __s, ios_base& __iob, return __pad_and_output(__s, __o, __op, __oe, __iob, __fl); } +template +_OutputIterator +num_put<_CharT, _OutputIterator>::do_put(iter_type __s, ios_base& __iob, + char_type __fl, long __v) const +{ + return this->__do_put_integral(__s, __iob, __fl, __v, "l"); +} + template _OutputIterator num_put<_CharT, _OutputIterator>::do_put(iter_type __s, ios_base& __iob, char_type __fl, long long __v) const { - // Stage 1 - Get number in narrow char - char __fmt[8] = {'%', 0}; - const char* __len = "ll"; - this->__format_int(__fmt+1, __len, true, __iob.flags()); - // Worst case is octal, with showbase enabled. Note that octal is always - // printed as an unsigned value. - _LIBCPP_CONSTEXPR const unsigned __nbuf - = (numeric_limits::digits / 3) // 1 char per 3 bits - + ((numeric_limits::digits % 3) != 0) // round up - + 2; // base prefix + terminating null character - char __nar[__nbuf]; - int __nc = __libcpp_snprintf_l(__nar, sizeof(__nar), _LIBCPP_GET_C_LOCALE, __fmt, __v); - char* __ne = __nar + __nc; - char* __np = this->__identify_padding(__nar, __ne, __iob); - // Stage 2 - Widen __nar while adding thousands separators - char_type __o[2*(__nbuf-1) - 1]; - char_type* __op; // pad here - char_type* __oe; // end of output - this->__widen_and_group_int(__nar, __np, __ne, __o, __op, __oe, __iob.getloc()); - // [__o, __oe) contains thousands_sep'd wide number - // Stage 3 & 4 - return __pad_and_output(__s, __o, __op, __oe, __iob, __fl); + return this->__do_put_integral(__s, __iob, __fl, __v, "ll"); } template @@ -1518,27 +1520,7 @@ _OutputIterator num_put<_CharT, _OutputIterator>::do_put(iter_type __s, ios_base& __iob, char_type __fl, unsigned long __v) const { - // Stage 1 - Get number in narrow char - char __fmt[6] = {'%', 0}; - const char* __len = "l"; - this->__format_int(__fmt+1, __len, false, __iob.flags()); - // Worst case is octal, with showbase enabled. - _LIBCPP_CONSTEXPR const unsigned __nbuf - = (numeric_limits::digits / 3) // 1 char per 3 bits - + ((numeric_limits::digits % 3) != 0) // round up - + 2; // base prefix + terminating null character - char __nar[__nbuf]; - int __nc = __libcpp_snprintf_l(__nar, sizeof(__nar), _LIBCPP_GET_C_LOCALE, __fmt, __v); - char* __ne = __nar + __nc; - char* __np = this->__identify_padding(__nar, __ne, __iob); - // Stage 2 - Widen __nar while adding thousands separators - char_type __o[2*(__nbuf-1) - 1]; - char_type* __op; // pad here - char_type* __oe; // end of output - this->__widen_and_group_int(__nar, __np, __ne, __o, __op, __oe, __iob.getloc()); - // [__o, __oe) contains thousands_sep'd wide number - // Stage 3 & 4 - return __pad_and_output(__s, __o, __op, __oe, __iob, __fl); + return this->__do_put_integral(__s, __iob, __fl, __v, "l"); } template @@ -1546,37 +1528,19 @@ _OutputIterator num_put<_CharT, _OutputIterator>::do_put(iter_type __s, ios_base& __iob, char_type __fl, unsigned long long __v) const { - // Stage 1 - Get number in narrow char - char __fmt[8] = {'%', 0}; - const char* __len = "ll"; - this->__format_int(__fmt+1, __len, false, __iob.flags()); - // Worst case is octal, with showbase enabled. - _LIBCPP_CONSTEXPR const unsigned __nbuf - = (numeric_limits::digits / 3) // 1 char per 3 bits - + ((numeric_limits::digits % 3) != 0) // round up - + 2; // base prefix + terminating null character - char __nar[__nbuf]; - int __nc = __libcpp_snprintf_l(__nar, sizeof(__nar), _LIBCPP_GET_C_LOCALE, __fmt, __v); - char* __ne = __nar + __nc; - char* __np = this->__identify_padding(__nar, __ne, __iob); - // Stage 2 - Widen __nar while adding thousands separators - char_type __o[2*(__nbuf-1) - 1]; - char_type* __op; // pad here - char_type* __oe; // end of output - this->__widen_and_group_int(__nar, __np, __ne, __o, __op, __oe, __iob.getloc()); - // [__o, __oe) contains thousands_sep'd wide number - // Stage 3 & 4 - return __pad_and_output(__s, __o, __op, __oe, __iob, __fl); + return this->__do_put_integral(__s, __iob, __fl, __v, "ll"); } template +template +_LIBCPP_HIDE_FROM_ABI inline _OutputIterator -num_put<_CharT, _OutputIterator>::do_put(iter_type __s, ios_base& __iob, - char_type __fl, double __v) const +num_put<_CharT, _OutputIterator>::__do_put_floating_point(iter_type __s, ios_base& __iob, + char_type __fl, _Float __v, + char const* __len) const { // Stage 1 - Get number in narrow char char __fmt[8] = {'%', 0}; - const char* __len = ""; bool __specify_precision = this->__format_float(__fmt+1, __len, __iob.flags()); const unsigned __nbuf = 30; char __nar[__nbuf]; @@ -1620,55 +1584,20 @@ num_put<_CharT, _OutputIterator>::do_put(iter_type __s, ios_base& __iob, return __s; } +template +_OutputIterator +num_put<_CharT, _OutputIterator>::do_put(iter_type __s, ios_base& __iob, + char_type __fl, double __v) const +{ + return this->__do_put_floating_point(__s, __iob, __fl, __v, ""); +} + template _OutputIterator num_put<_CharT, _OutputIterator>::do_put(iter_type __s, ios_base& __iob, char_type __fl, long double __v) const { - // Stage 1 - Get number in narrow char - char __fmt[8] = {'%', 0}; - const char* __len = "L"; - bool __specify_precision = this->__format_float(__fmt+1, __len, __iob.flags()); - const unsigned __nbuf = 30; - char __nar[__nbuf]; - char* __nb = __nar; - int __nc; - if (__specify_precision) - __nc = __libcpp_snprintf_l(__nb, __nbuf, _LIBCPP_GET_C_LOCALE, __fmt, - (int)__iob.precision(), __v); - else - __nc = __libcpp_snprintf_l(__nb, __nbuf, _LIBCPP_GET_C_LOCALE, __fmt, __v); - unique_ptr __nbh(nullptr, free); - if (__nc > static_cast(__nbuf-1)) - { - if (__specify_precision) - __nc = __libcpp_asprintf_l(&__nb, _LIBCPP_GET_C_LOCALE, __fmt, (int)__iob.precision(), __v); - else - __nc = __libcpp_asprintf_l(&__nb, _LIBCPP_GET_C_LOCALE, __fmt, __v); - if (__nc == -1) - __throw_bad_alloc(); - __nbh.reset(__nb); - } - char* __ne = __nb + __nc; - char* __np = this->__identify_padding(__nb, __ne, __iob); - // Stage 2 - Widen __nar while adding thousands separators - char_type __o[2*(__nbuf-1) - 1]; - char_type* __ob = __o; - unique_ptr __obh(0, free); - if (__nb != __nar) - { - __ob = (char_type*)malloc(2*static_cast(__nc)*sizeof(char_type)); - if (__ob == 0) - __throw_bad_alloc(); - __obh.reset(__ob); - } - char_type* __op; // pad here - char_type* __oe; // end of output - this->__widen_and_group_float(__nb, __np, __ne, __ob, __op, __oe, __iob.getloc()); - // [__o, __oe) contains thousands_sep'd wide number - // Stage 3 & 4 - __s = __pad_and_output(__s, __ob, __op, __oe, __iob, __fl); - return __s; + return this->__do_put_floating_point(__s, __iob, __fl, __v, "L"); } template @@ -2963,51 +2892,31 @@ money_get<_CharT, _InputIterator>::__do_get(iter_type& __b, iter_type __e, } break; case money_base::sign: - if (__psn.size() + __nsn.size() > 0) + if (__psn.size() > 0 && *__b == __psn[0]) { - if (__psn.size() == 0 || __nsn.size() == 0) - { // sign is optional - if (__psn.size() > 0) - { // __nsn.size() == 0 - if (*__b == __psn[0]) - { - ++__b; - if (__psn.size() > 1) - __trailing_sign = &__psn; - } - else - __neg = true; - } - else if (*__b == __nsn[0]) // __nsn.size() > 0 && __psn.size() == 0 - { - ++__b; - __neg = true; - if (__nsn.size() > 1) - __trailing_sign = &__nsn; - } - } - else // sign is required - { - if (*__b == __psn[0]) - { - ++__b; - if (__psn.size() > 1) - __trailing_sign = &__psn; - } - else if (*__b == __nsn[0]) - { - ++__b; - __neg = true; - if (__nsn.size() > 1) - __trailing_sign = &__nsn; - } - else - { - __err |= ios_base::failbit; - return false; - } - } + ++__b; + __neg = false; + if (__psn.size() > 1) + __trailing_sign = &__psn; + break; + } + if (__nsn.size() > 0 && *__b == __nsn[0]) + { + ++__b; + __neg = true; + if (__nsn.size() > 1) + __trailing_sign = &__nsn; + break; } + if (__psn.size() > 0 && __nsn.size() > 0) + { // sign is required + __err |= ios_base::failbit; + return false; + } + if (__psn.size() == 0 && __nsn.size() == 0) + // locale has no way of specifying a sign. Use the initial value of __neg as a default + break; + __neg = (__nsn.size() == 0); break; case money_base::symbol: { diff --git a/libcxx/include/semaphore b/libcxx/include/semaphore index 4f9ecd0461b26..db03fb967ed17 100644 --- a/libcxx/include/semaphore +++ b/libcxx/include/semaphore @@ -67,10 +67,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD /* -__atomic_semaphore_base is the general-case implementation, to be used for -user-requested least-max values that exceed the OS implementation support -(incl. when the OS has no support of its own) and for binary semaphores. - +__atomic_semaphore_base is the general-case implementation. It is a typical Dijkstra semaphore algorithm over atomics, wait and notify functions. It avoids contention against users' own use of those facilities. @@ -108,81 +105,30 @@ public: _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY bool try_acquire_for(chrono::duration const& __rel_time) { - auto const __test_fn = [this]() -> bool { - auto __old = __a.load(memory_order_acquire); - while(1) { - if (__old == 0) - return false; - if(__a.compare_exchange_strong(__old, __old - 1, memory_order_acquire, memory_order_relaxed)) - return true; - } - }; + if (__rel_time == chrono::duration::zero()) + return try_acquire(); + auto const __test_fn = [this]() { return try_acquire(); }; return __libcpp_thread_poll_with_backoff(__test_fn, __libcpp_timed_backoff_policy(), __rel_time); } -}; - -#ifndef _LIBCPP_NO_NATIVE_SEMAPHORES - -/* - -__platform_semaphore_base a simple wrapper for the OS semaphore type. That -is, every call is routed to the OS in the most direct manner possible. - -*/ - -class __platform_semaphore_base -{ - __libcpp_semaphore_t __semaphore; - -public: - _LIBCPP_INLINE_VISIBILITY - explicit __platform_semaphore_base(ptrdiff_t __count) : - __semaphore() - { - __libcpp_semaphore_init(&__semaphore, __count); - } - _LIBCPP_INLINE_VISIBILITY - ~__platform_semaphore_base() { - __libcpp_semaphore_destroy(&__semaphore); - } - _LIBCPP_INLINE_VISIBILITY - void release(ptrdiff_t __update) - { - for(; __update; --__update) - __libcpp_semaphore_post(&__semaphore); - } - _LIBCPP_INLINE_VISIBILITY - void acquire() - { - __libcpp_semaphore_wait(&__semaphore); - } - _LIBCPP_INLINE_VISIBILITY - bool try_acquire_for(chrono::nanoseconds __rel_time) + _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY + bool try_acquire() { - return __libcpp_semaphore_wait_timed(&__semaphore, __rel_time); + auto __old = __a.load(memory_order_acquire); + while (true) { + if (__old == 0) + return false; + if (__a.compare_exchange_strong(__old, __old - 1, memory_order_acquire, memory_order_relaxed)) + return true; + } } }; -template -using __semaphore_base = - typename conditional<(__least_max_value > 1 && __least_max_value <= _LIBCPP_SEMAPHORE_MAX), - __platform_semaphore_base, - __atomic_semaphore_base>::type; - -#else - -template -using __semaphore_base = - __atomic_semaphore_base; - #define _LIBCPP_SEMAPHORE_MAX (numeric_limits::max()) -#endif //_LIBCPP_NO_NATIVE_SEMAPHORES - template class counting_semaphore { - __semaphore_base<__least_max_value> __semaphore; + __atomic_semaphore_base __semaphore; public: static constexpr ptrdiff_t max() noexcept { @@ -215,14 +161,14 @@ public: _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY bool try_acquire() { - return try_acquire_for(chrono::nanoseconds::zero()); + return __semaphore.try_acquire(); } template _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY bool try_acquire_until(chrono::time_point const& __abs_time) { auto const current = Clock::now(); - if(current >= __abs_time) + if (current >= __abs_time) return try_acquire(); else return try_acquire_for(__abs_time - current); diff --git a/libcxx/include/vector b/libcxx/include/vector index 80819080a9fbe..f19aaa1cbd7b6 100644 --- a/libcxx/include/vector +++ b/libcxx/include/vector @@ -315,167 +315,37 @@ template class __vector_base : protected __vector_base_common // This base class is historical, but it needs to remain for ABI compatibility { -public: - typedef _Allocator allocator_type; - typedef allocator_traits __alloc_traits; - typedef typename __alloc_traits::size_type size_type; -protected: - typedef _Tp value_type; - typedef value_type& reference; - typedef const value_type& const_reference; - typedef typename __alloc_traits::difference_type difference_type; - typedef typename __alloc_traits::pointer pointer; - typedef typename __alloc_traits::const_pointer const_pointer; - typedef pointer iterator; - typedef const_pointer const_iterator; + typedef _Allocator allocator_type; + typedef typename allocator_traits::pointer pointer; - pointer __begin_; - pointer __end_; - __compressed_pair __end_cap_; - - _LIBCPP_INLINE_VISIBILITY - allocator_type& __alloc() _NOEXCEPT - {return __end_cap_.second();} - _LIBCPP_INLINE_VISIBILITY - const allocator_type& __alloc() const _NOEXCEPT - {return __end_cap_.second();} - _LIBCPP_INLINE_VISIBILITY - pointer& __end_cap() _NOEXCEPT - {return __end_cap_.first();} - _LIBCPP_INLINE_VISIBILITY - const pointer& __end_cap() const _NOEXCEPT - {return __end_cap_.first();} +protected: + pointer __begin_; + pointer __end_; + __compressed_pair __end_cap_; _LIBCPP_INLINE_VISIBILITY __vector_base() - _NOEXCEPT_(is_nothrow_default_constructible::value); - _LIBCPP_INLINE_VISIBILITY __vector_base(const allocator_type& __a); -#ifndef _LIBCPP_CXX03_LANG - _LIBCPP_INLINE_VISIBILITY __vector_base(allocator_type&& __a) _NOEXCEPT; -#endif - ~__vector_base(); - - _LIBCPP_INLINE_VISIBILITY - void clear() _NOEXCEPT {__destruct_at_end(__begin_);} - _LIBCPP_INLINE_VISIBILITY - size_type capacity() const _NOEXCEPT - {return static_cast(__end_cap() - __begin_);} - - _LIBCPP_INLINE_VISIBILITY - void __destruct_at_end(pointer __new_last) _NOEXCEPT; - - _LIBCPP_INLINE_VISIBILITY - void __copy_assign_alloc(const __vector_base& __c) - {__copy_assign_alloc(__c, integral_constant());} - - _LIBCPP_INLINE_VISIBILITY - void __move_assign_alloc(__vector_base& __c) - _NOEXCEPT_( - !__alloc_traits::propagate_on_container_move_assignment::value || - is_nothrow_move_assignable::value) - {__move_assign_alloc(__c, integral_constant());} - - _LIBCPP_NORETURN _LIBCPP_HIDE_FROM_ABI - void __throw_length_error() const { -#ifndef _LIBCPP_NO_EXCEPTIONS - __vector_base_common::__throw_length_error(); -#else - _VSTD::abort(); -#endif - } - - _LIBCPP_NORETURN _LIBCPP_HIDE_FROM_ABI - void __throw_out_of_range() const { -#ifndef _LIBCPP_NO_EXCEPTIONS - __vector_base_common::__throw_out_of_range(); -#else - _VSTD::abort(); -#endif - } - -private: - _LIBCPP_INLINE_VISIBILITY - void __copy_assign_alloc(const __vector_base& __c, true_type) - { - if (__alloc() != __c.__alloc()) - { - clear(); - __alloc_traits::deallocate(__alloc(), __begin_, capacity()); - __begin_ = __end_ = __end_cap() = nullptr; - } - __alloc() = __c.__alloc(); - } - - _LIBCPP_INLINE_VISIBILITY - void __copy_assign_alloc(const __vector_base&, false_type) - {} - - _LIBCPP_INLINE_VISIBILITY - void __move_assign_alloc(__vector_base& __c, true_type) - _NOEXCEPT_(is_nothrow_move_assignable::value) - { - __alloc() = _VSTD::move(__c.__alloc()); - } - - _LIBCPP_INLINE_VISIBILITY - void __move_assign_alloc(__vector_base&, false_type) - _NOEXCEPT - {} -}; - -template -inline _LIBCPP_INLINE_VISIBILITY -void -__vector_base<_Tp, _Allocator>::__destruct_at_end(pointer __new_last) _NOEXCEPT -{ - pointer __soon_to_be_end = __end_; - while (__new_last != __soon_to_be_end) - __alloc_traits::destroy(__alloc(), _VSTD::__to_address(--__soon_to_be_end)); - __end_ = __new_last; -} - -template -inline _LIBCPP_INLINE_VISIBILITY -__vector_base<_Tp, _Allocator>::__vector_base() _NOEXCEPT_(is_nothrow_default_constructible::value) - : __begin_(nullptr), - __end_(nullptr), - __end_cap_(nullptr, __default_init_tag()) -{ -} + : __begin_(nullptr), + __end_(nullptr), + __end_cap_(nullptr, __default_init_tag()) {} -template -inline _LIBCPP_INLINE_VISIBILITY -__vector_base<_Tp, _Allocator>::__vector_base(const allocator_type& __a) - : __begin_(nullptr), - __end_(nullptr), - __end_cap_(nullptr, __a) -{ -} + _LIBCPP_INLINE_VISIBILITY __vector_base(const allocator_type& __a) + : __begin_(nullptr), + __end_(nullptr), + __end_cap_(nullptr, __a) {} #ifndef _LIBCPP_CXX03_LANG -template -inline _LIBCPP_INLINE_VISIBILITY -__vector_base<_Tp, _Allocator>::__vector_base(allocator_type&& __a) _NOEXCEPT - : __begin_(nullptr), - __end_(nullptr), - __end_cap_(nullptr, _VSTD::move(__a)) {} + _LIBCPP_INLINE_VISIBILITY __vector_base(allocator_type&& __a) _NOEXCEPT + : __begin_(nullptr), + __end_(nullptr), + __end_cap_(nullptr, _VSTD::move(__a)) {} #endif - -template -__vector_base<_Tp, _Allocator>::~__vector_base() -{ - if (__begin_ != nullptr) - { - clear(); - __alloc_traits::deallocate(__alloc(), __begin_, capacity()); - } -} +}; template */> class _LIBCPP_TEMPLATE_VIS vector + // This base class is historical, but it needs to remain for ABI compatibility. : private __vector_base<_Tp, _Allocator> { private: @@ -485,17 +355,17 @@ public: typedef vector __self; typedef _Tp value_type; typedef _Allocator allocator_type; - typedef typename __base::__alloc_traits __alloc_traits; - typedef typename __base::reference reference; - typedef typename __base::const_reference const_reference; - typedef typename __base::size_type size_type; - typedef typename __base::difference_type difference_type; - typedef typename __base::pointer pointer; - typedef typename __base::const_pointer const_pointer; + typedef allocator_traits __alloc_traits; + typedef value_type& reference; + typedef const value_type& const_reference; + typedef typename __alloc_traits::size_type size_type; + typedef typename __alloc_traits::difference_type difference_type; + typedef typename __alloc_traits::pointer pointer; + typedef typename __alloc_traits::const_pointer const_pointer; typedef __wrap_iter iterator; typedef __wrap_iter const_iterator; - typedef _VSTD::reverse_iterator reverse_iterator; - typedef _VSTD::reverse_iterator const_reverse_iterator; + typedef _VSTD::reverse_iterator reverse_iterator; + typedef _VSTD::reverse_iterator const_reverse_iterator; static_assert((is_same::value), "Allocator::value_type must be same type as value_type"); @@ -557,10 +427,16 @@ public: _LIBCPP_INLINE_VISIBILITY ~vector() { - __annotate_delete(); + __annotate_delete(); #if _LIBCPP_DEBUG_LEVEL == 2 - __get_db()->__erase_c(this); + __get_db()->__erase_c(this); #endif + + if (this->__begin_ != nullptr) + { + __clear(); + __alloc_traits::deallocate(__alloc(), this->__begin_, capacity()); + } } vector(const vector& __x); @@ -665,7 +541,7 @@ public: {return static_cast(this->__end_ - this->__begin_);} _LIBCPP_INLINE_VISIBILITY size_type capacity() const _NOEXCEPT - {return __base::capacity();} + {return static_cast(__end_cap() - this->__begin_);} _LIBCPP_NODISCARD_AFTER_CXX17 _LIBCPP_INLINE_VISIBILITY bool empty() const _NOEXCEPT {return this->__begin_ == this->__end_;} @@ -778,7 +654,7 @@ public: void clear() _NOEXCEPT { size_type __old_size = size(); - __base::clear(); + __clear(); __annotate_shrink(__old_size); __invalidate_all_iterators(); } @@ -839,7 +715,7 @@ private: { __invalidate_iterators_past(__new_last); size_type __old_size = size(); - __base::__destruct_at_end(__new_last); + __base_destruct_at_end(__new_last); __annotate_shrink(__old_size); } @@ -934,6 +810,89 @@ private: _VSTD::forward<_Args>(__args)...); ++__tx.__pos_; } + + _LIBCPP_INLINE_VISIBILITY + allocator_type& __alloc() _NOEXCEPT + {return this->__end_cap_.second();} + _LIBCPP_INLINE_VISIBILITY + const allocator_type& __alloc() const _NOEXCEPT + {return this->__end_cap_.second();} + _LIBCPP_INLINE_VISIBILITY + pointer& __end_cap() _NOEXCEPT + {return this->__end_cap_.first();} + _LIBCPP_INLINE_VISIBILITY + const pointer& __end_cap() const _NOEXCEPT + {return this->__end_cap_.first();} + + _LIBCPP_INLINE_VISIBILITY + void __clear() _NOEXCEPT {__base_destruct_at_end(this->__begin_);} + + _LIBCPP_INLINE_VISIBILITY + void __base_destruct_at_end(pointer __new_last) _NOEXCEPT { + pointer __soon_to_be_end = this->__end_; + while (__new_last != __soon_to_be_end) + __alloc_traits::destroy(__alloc(), _VSTD::__to_address(--__soon_to_be_end)); + this->__end_ = __new_last; + } + + _LIBCPP_INLINE_VISIBILITY + void __copy_assign_alloc(const vector& __c) + {__copy_assign_alloc(__c, integral_constant());} + + _LIBCPP_INLINE_VISIBILITY + void __move_assign_alloc(vector& __c) + _NOEXCEPT_( + !__alloc_traits::propagate_on_container_move_assignment::value || + is_nothrow_move_assignable::value) + {__move_assign_alloc(__c, integral_constant());} + + _LIBCPP_NORETURN _LIBCPP_HIDE_FROM_ABI + void __throw_length_error() const { +#ifndef _LIBCPP_NO_EXCEPTIONS + __vector_base_common::__throw_length_error(); +#else + _VSTD::abort(); +#endif + } + + _LIBCPP_NORETURN _LIBCPP_HIDE_FROM_ABI + void __throw_out_of_range() const { +#ifndef _LIBCPP_NO_EXCEPTIONS + __vector_base_common::__throw_out_of_range(); +#else + _VSTD::abort(); +#endif + } + + _LIBCPP_INLINE_VISIBILITY + void __copy_assign_alloc(const vector& __c, true_type) + { + if (__alloc() != __c.__alloc()) + { + __clear(); + __alloc_traits::deallocate(__alloc(), this->__begin_, capacity()); + this->__begin_ = this->__end_ = __end_cap() = nullptr; + } + __alloc() = __c.__alloc(); + } + + _LIBCPP_INLINE_VISIBILITY + void __copy_assign_alloc(const vector&, false_type) + {} + + _LIBCPP_INLINE_VISIBILITY + void __move_assign_alloc(vector& __c, true_type) + _NOEXCEPT_(is_nothrow_move_assignable::value) + { + __alloc() = _VSTD::move(__c.__alloc()); + } + + _LIBCPP_INLINE_VISIBILITY + void __move_assign_alloc(vector&, false_type) + _NOEXCEPT + {} }; #if _LIBCPP_STD_VER >= 17 @@ -1374,7 +1333,7 @@ void vector<_Tp, _Allocator>::__move_assign(vector& __c, false_type) _NOEXCEPT_(__alloc_traits::is_always_equal::value) { - if (__base::__alloc() != __c.__alloc()) + if (__alloc() != __c.__alloc()) { typedef move_iterator _Ip; assign(_Ip(__c.begin()), _Ip(__c.end())); @@ -1389,7 +1348,7 @@ vector<_Tp, _Allocator>::__move_assign(vector& __c, true_type) _NOEXCEPT_(is_nothrow_move_assignable::value) { __vdeallocate(); - __base::__move_assign_alloc(__c); // this can throw + __move_assign_alloc(__c); // this can throw this->__begin_ = __c.__begin_; this->__end_ = __c.__end_; this->__end_cap() = __c.__end_cap(); @@ -1408,7 +1367,7 @@ vector<_Tp, _Allocator>::operator=(const vector& __x) { if (this != _VSTD::addressof(__x)) { - __base::__copy_assign_alloc(__x); + __copy_assign_alloc(__x); assign(__x.__begin_, __x.__end_); } return *this; diff --git a/libcxx/src/locale.cpp b/libcxx/src/locale.cpp index 44c24d0493a97..009ee1e4e2f4c 100644 --- a/libcxx/src/locale.cpp +++ b/libcxx/src/locale.cpp @@ -4543,6 +4543,18 @@ static bool checked_string_to_wchar_convert(wchar_t& dest, } #endif // _LIBCPP_HAS_NO_WIDE_CHARACTERS +#ifdef _LIBCPP_HAS_NO_WIDE_CHARACTERS +static bool is_narrow_non_breaking_space(const char* ptr) { + // https://www.fileformat.info/info/unicode/char/202f/index.htm + return ptr[0] == '\xe2' && ptr[1] == '\x80' && ptr[2] == '\xaf'; +} + +static bool is_non_breaking_space(const char* ptr) { + // https://www.fileformat.info/info/unicode/char/0a/index.htm + return ptr[0] == '\xc2' && ptr[1] == '\xa0'; +} +#endif // _LIBCPP_HAS_NO_WIDE_CHARACTERS + static bool checked_string_to_char_convert(char& dest, const char* ptr, locale_t __loc) { @@ -4575,6 +4587,13 @@ static bool checked_string_to_char_convert(char& dest, return false; } #else // _LIBCPP_HAS_NO_WIDE_CHARACTERS + // FIXME: Work around specific multibyte sequences that we can reasonably + // translate into a different single byte. + if (is_narrow_non_breaking_space(ptr) || is_non_breaking_space(ptr)) { + dest = ' '; + return true; + } + return false; #endif // _LIBCPP_HAS_NO_WIDE_CHARACTERS _LIBCPP_UNREACHABLE(); diff --git a/libcxx/src/support/win32/thread_win32.cpp b/libcxx/src/support/win32/thread_win32.cpp index 9506822da822d..f2072b1435687 100644 --- a/libcxx/src/support/win32/thread_win32.cpp +++ b/libcxx/src/support/win32/thread_win32.cpp @@ -38,9 +38,6 @@ static_assert(alignof(__libcpp_thread_t) == alignof(HANDLE), ""); static_assert(sizeof(__libcpp_tls_key) == sizeof(DWORD), ""); static_assert(alignof(__libcpp_tls_key) == alignof(DWORD), ""); -static_assert(sizeof(__libcpp_semaphore_t) == sizeof(HANDLE), ""); -static_assert(alignof(__libcpp_semaphore_t) == alignof(HANDLE), ""); - // Mutex int __libcpp_recursive_mutex_init(__libcpp_recursive_mutex_t *__m) { @@ -274,37 +271,4 @@ int __libcpp_tls_set(__libcpp_tls_key __key, void *__p) return 0; } -// Semaphores -bool __libcpp_semaphore_init(__libcpp_semaphore_t* __sem, int __init) -{ - *(PHANDLE)__sem = CreateSemaphoreEx(nullptr, __init, _LIBCPP_SEMAPHORE_MAX, - nullptr, 0, SEMAPHORE_ALL_ACCESS); - return *__sem != nullptr; -} - -bool __libcpp_semaphore_destroy(__libcpp_semaphore_t* __sem) -{ - CloseHandle(*(PHANDLE)__sem); - return true; -} - -bool __libcpp_semaphore_post(__libcpp_semaphore_t* __sem) -{ - return ReleaseSemaphore(*(PHANDLE)__sem, 1, nullptr); -} - -bool __libcpp_semaphore_wait(__libcpp_semaphore_t* __sem) -{ - return WaitForSingleObjectEx(*(PHANDLE)__sem, INFINITE, false) == - WAIT_OBJECT_0; -} - -bool __libcpp_semaphore_wait_timed(__libcpp_semaphore_t* __sem, - chrono::nanoseconds const& __ns) -{ - chrono::milliseconds __ms = chrono::ceil(__ns); - return WaitForSingleObjectEx(*(PHANDLE)__sem, __ms.count(), false) == - WAIT_OBJECT_0; -} - _LIBCPP_END_NAMESPACE_STD diff --git a/libcxx/test/configs/ibm-libc++-shared.cfg.in b/libcxx/test/configs/ibm-libc++-shared.cfg.in index b253c842fcd76..4efdf423207ef 100644 --- a/libcxx/test/configs/ibm-libc++-shared.cfg.in +++ b/libcxx/test/configs/ibm-libc++-shared.cfg.in @@ -17,6 +17,13 @@ config.substitutions.append(('%{exec}', '{} %{{libcxx}}/utils/run.py --execdir %T --env LIBPATH=%{{install}}/lib -- '.format(sys.executable) )) +# LIBCXX-AIX-FIXME is the feature name used to XFAIL the +# initial AIX failures until they can be properly diagnosed +# and fixed. This allows easier detection of new test failures +# and regressions. Note: New failures should not be suppressed +# using this feature. (Also see llvm.org/PR52188) +config.available_features.add('LIBCXX-AIX-FIXME') + import os, site site.addsitedir(os.path.join('@LIBCXX_SOURCE_DIR@', 'utils')) import libcxx.test.params, libcxx.test.newconfig diff --git a/libcxx/test/libcxx/debug/extern-templates.sh.cpp b/libcxx/test/libcxx/debug/extern-templates.sh.cpp index 2d195cdffd330..40142931c6e5f 100644 --- a/libcxx/test/libcxx/debug/extern-templates.sh.cpp +++ b/libcxx/test/libcxx/debug/extern-templates.sh.cpp @@ -6,6 +6,8 @@ // //===----------------------------------------------------------------------===// +// XFAIL: LIBCXX-AIX-FIXME + // This test checks that we retain extern template instantiation declarations // for members of even when the debug mode is enabled, which is // necessary for correctness. See https://llvm.org/D94718 for details. diff --git a/libcxx/test/libcxx/fuzzing/random.pass.cpp b/libcxx/test/libcxx/fuzzing/random.pass.cpp index e7545ffdf4da2..dd5da49813e88 100644 --- a/libcxx/test/libcxx/fuzzing/random.pass.cpp +++ b/libcxx/test/libcxx/fuzzing/random.pass.cpp @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// // UNSUPPORTED: c++03, c++11 +// XFAIL: LIBCXX-AIX-FIXME #include #include diff --git a/libcxx/test/libcxx/gdb/gdb_pretty_printer_test.sh.cpp b/libcxx/test/libcxx/gdb/gdb_pretty_printer_test.sh.cpp index f1cbefbaba6e2..2c53bd1d11078 100644 --- a/libcxx/test/libcxx/gdb/gdb_pretty_printer_test.sh.cpp +++ b/libcxx/test/libcxx/gdb/gdb_pretty_printer_test.sh.cpp @@ -43,7 +43,7 @@ // // Or // -// Call ComparePrettyPrintToChars with that variable, and a "const char*" +// Call ComparePrettyPrintToRegex with that variable, and a "const char*" // *python* regular expression to match against the printer's output. // The set of special characters in a Python regular expression overlaps // with a lot of things the pretty printers print--brackets, for @@ -244,22 +244,22 @@ void unique_ptr_test() { void bitset_test() { std::bitset<258> i_am_empty(0); - ComparePrettyPrintToChars(i_am_empty, "std::bitset<258>"); + ComparePrettyPrintToRegex(i_am_empty, "std::bitset<258(ul)?>"); std::bitset<0> very_empty; - ComparePrettyPrintToChars(very_empty, "std::bitset<0>"); + ComparePrettyPrintToRegex(very_empty, "std::bitset<0(ul)?>"); std::bitset<15> b_000001111111100(1020); - ComparePrettyPrintToChars(b_000001111111100, - "std::bitset<15> = {[2] = 1, [3] = 1, [4] = 1, [5] = 1, [6] = 1, " - "[7] = 1, [8] = 1, [9] = 1}"); + ComparePrettyPrintToRegex(b_000001111111100, + R"(std::bitset<15(ul)?> = {\[2\] = 1, \[3\] = 1, \[4\] = 1, \[5\] = 1, \[6\] = 1, )" + R"(\[7\] = 1, \[8\] = 1, \[9\] = 1})"); std::bitset<258> b_0_129_132(0); b_0_129_132[0] = true; b_0_129_132[129] = true; b_0_129_132[132] = true; - ComparePrettyPrintToChars(b_0_129_132, - "std::bitset<258> = {[0] = 1, [129] = 1, [132] = 1}"); + ComparePrettyPrintToRegex(b_0_129_132, + R"(std::bitset<258(ul)?> = {\[0\] = 1, \[129\] = 1, \[132\] = 1})"); } void list_test() { diff --git a/libcxx/test/libcxx/language.support/support.dynamic/libcpp_deallocate.sh.cpp b/libcxx/test/libcxx/language.support/support.dynamic/libcpp_deallocate.sh.cpp index b33280b9ec352..5d23eed09f1a3 100644 --- a/libcxx/test/libcxx/language.support/support.dynamic/libcpp_deallocate.sh.cpp +++ b/libcxx/test/libcxx/language.support/support.dynamic/libcpp_deallocate.sh.cpp @@ -6,6 +6,8 @@ // //===----------------------------------------------------------------------===// +// XFAIL: LIBCXX-AIX-FIXME + // test libc++'s implementation of align_val_t, and the relevant new/delete // overloads in all dialects when -faligned-allocation is present. diff --git a/libcxx/test/libcxx/memory/trivial_abi/unique_ptr_ret.pass.cpp b/libcxx/test/libcxx/memory/trivial_abi/unique_ptr_ret.pass.cpp index cebfb24e7b079..01db9b806a433 100644 --- a/libcxx/test/libcxx/memory/trivial_abi/unique_ptr_ret.pass.cpp +++ b/libcxx/test/libcxx/memory/trivial_abi/unique_ptr_ret.pass.cpp @@ -6,6 +6,8 @@ // //===----------------------------------------------------------------------===// +// XFAIL: LIBCXX-AIX-FIXME + // // Test unique_ptr with trivial_abi as return-type. diff --git a/libcxx/test/libcxx/memory/trivial_abi/weak_ptr_ret.pass.cpp b/libcxx/test/libcxx/memory/trivial_abi/weak_ptr_ret.pass.cpp index 9cd29703f20ff..7bd2935294e55 100644 --- a/libcxx/test/libcxx/memory/trivial_abi/weak_ptr_ret.pass.cpp +++ b/libcxx/test/libcxx/memory/trivial_abi/weak_ptr_ret.pass.cpp @@ -6,6 +6,8 @@ // //===----------------------------------------------------------------------===// +// XFAIL: LIBCXX-AIX-FIXME + // // Test weak_ptr with trivial_abi as return-type. diff --git a/libcxx/test/libcxx/utilities/format/format.string/format.string.std/std_format_spec_bool.pass.cpp b/libcxx/test/libcxx/utilities/format/format.string/format.string.std/std_format_spec_bool.pass.cpp index c32d36340869f..90b24a2fc20e1 100644 --- a/libcxx/test/libcxx/utilities/format/format.string/format.string.std/std_format_spec_bool.pass.cpp +++ b/libcxx/test/libcxx/utilities/format/format.string/format.string.std/std_format_spec_bool.pass.cpp @@ -8,6 +8,7 @@ // UNSUPPORTED: c++03, c++11, c++14, c++17 // UNSUPPORTED: libcpp-no-concepts // UNSUPPORTED: libcpp-has-no-incomplete-format +// XFAIL: LIBCXX-AIX-FIXME // diff --git a/libcxx/test/libcxx/utilities/format/format.string/format.string.std/std_format_spec_char.pass.cpp b/libcxx/test/libcxx/utilities/format/format.string/format.string.std/std_format_spec_char.pass.cpp index 876c7d247d67e..018f7dd30a1d1 100644 --- a/libcxx/test/libcxx/utilities/format/format.string/format.string.std/std_format_spec_char.pass.cpp +++ b/libcxx/test/libcxx/utilities/format/format.string/format.string.std/std_format_spec_char.pass.cpp @@ -8,6 +8,7 @@ // UNSUPPORTED: c++03, c++11, c++14, c++17 // UNSUPPORTED: libcpp-no-concepts // UNSUPPORTED: libcpp-has-no-incomplete-format +// XFAIL: LIBCXX-AIX-FIXME // diff --git a/libcxx/test/libcxx/utilities/format/format.string/format.string.std/std_format_spec_integer.pass.cpp b/libcxx/test/libcxx/utilities/format/format.string/format.string.std/std_format_spec_integer.pass.cpp index 652c2caba022d..7f9fb7bc721d2 100644 --- a/libcxx/test/libcxx/utilities/format/format.string/format.string.std/std_format_spec_integer.pass.cpp +++ b/libcxx/test/libcxx/utilities/format/format.string/format.string.std/std_format_spec_integer.pass.cpp @@ -8,6 +8,7 @@ // UNSUPPORTED: c++03, c++11, c++14, c++17 // UNSUPPORTED: libcpp-no-concepts // UNSUPPORTED: libcpp-has-no-incomplete-format +// XFAIL: LIBCXX-AIX-FIXME // diff --git a/libcxx/test/libcxx/utilities/format/format.string/format.string.std/std_format_spec_string.pass.cpp b/libcxx/test/libcxx/utilities/format/format.string/format.string.std/std_format_spec_string.pass.cpp index 930a6b43ef722..b3bcecffb3f36 100644 --- a/libcxx/test/libcxx/utilities/format/format.string/format.string.std/std_format_spec_string.pass.cpp +++ b/libcxx/test/libcxx/utilities/format/format.string/format.string.std/std_format_spec_string.pass.cpp @@ -8,6 +8,7 @@ // UNSUPPORTED: c++03, c++11, c++14, c++17 // UNSUPPORTED: libcpp-no-concepts // UNSUPPORTED: libcpp-has-no-incomplete-format +// XFAIL: LIBCXX-AIX-FIXME // diff --git a/libcxx/test/libcxx/utilities/format/format.string/format.string.std/std_format_spec_string_unicode.pass.cpp b/libcxx/test/libcxx/utilities/format/format.string/format.string.std/std_format_spec_string_unicode.pass.cpp index f6337a00c833b..bf8cf4868be80 100644 --- a/libcxx/test/libcxx/utilities/format/format.string/format.string.std/std_format_spec_string_unicode.pass.cpp +++ b/libcxx/test/libcxx/utilities/format/format.string/format.string.std/std_format_spec_string_unicode.pass.cpp @@ -9,6 +9,9 @@ // UNSUPPORTED: libcpp-no-concepts // UNSUPPORTED: libcpp-has-no-incomplete-format +// Fails for 32-bit builds on AIX. +// UNSUPPORTED: LIBCXX-AIX-FIXME + // UTF-32 doesn't work properly // XFAIL: windows diff --git a/libcxx/test/std/atomics/atomics.lockfree/isalwayslockfree.pass.cpp b/libcxx/test/std/atomics/atomics.lockfree/isalwayslockfree.pass.cpp index d9ec69eebbe29..d9276463f3dab 100644 --- a/libcxx/test/std/atomics/atomics.lockfree/isalwayslockfree.pass.cpp +++ b/libcxx/test/std/atomics/atomics.lockfree/isalwayslockfree.pass.cpp @@ -8,6 +8,9 @@ // // UNSUPPORTED: libcpp-has-no-threads, c++03, c++11, c++14 +// Fails for 32-bit builds. +// UNSUPPORTED: LIBCXX-AIX-FIXME + // // static constexpr bool is_always_lock_free; diff --git a/libcxx/test/std/containers/associative/map/map.cons/assign_initializer_list.pass.cpp b/libcxx/test/std/containers/associative/map/map.cons/assign_initializer_list.pass.cpp index 824b362dca17e..a22d41e9760b9 100644 --- a/libcxx/test/std/containers/associative/map/map.cons/assign_initializer_list.pass.cpp +++ b/libcxx/test/std/containers/associative/map/map.cons/assign_initializer_list.pass.cpp @@ -74,17 +74,18 @@ void test_basic() { void duplicate_keys_test() { + test_allocator_statistics alloc_stats; typedef std::map, test_allocator > > Map; { - LIBCPP_ASSERT(test_alloc_base::alloc_count == 0); - Map s = {{1, 0}, {2, 0}, {3, 0}}; - LIBCPP_ASSERT(test_alloc_base::alloc_count == 3); + LIBCPP_ASSERT(alloc_stats.alloc_count == 0); + Map s({{1, 0}, {2, 0}, {3, 0}}, std::less(), test_allocator >(&alloc_stats)); + LIBCPP_ASSERT(alloc_stats.alloc_count == 3); s = {{4, 0}, {4, 0}, {4, 0}, {4, 0}}; - LIBCPP_ASSERT(test_alloc_base::alloc_count == 1); + LIBCPP_ASSERT(alloc_stats.alloc_count == 1); assert(s.size() == 1); assert(s.begin()->first == 4); } - LIBCPP_ASSERT(test_alloc_base::alloc_count == 0); + LIBCPP_ASSERT(alloc_stats.alloc_count == 0); } int main(int, char**) diff --git a/libcxx/test/std/containers/associative/map/map.nonmember/op_compare.pass.cpp b/libcxx/test/std/containers/associative/map/map.nonmember/op_compare.pass.cpp new file mode 100644 index 0000000000000..cfeb02e043872 --- /dev/null +++ b/libcxx/test/std/containers/associative/map/map.nonmember/op_compare.pass.cpp @@ -0,0 +1,81 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// + +// template +// bool operator==(const std::map& lhs, +// const std::map& rhs); +// +// template +// bool operator!=(const std::map& lhs, +// const std::map& rhs); +// +// template +// bool operator<(const std::map& lhs, +// const std::map& rhs); +// +// template +// bool operator>(const std::map& lhs, +// const std::map& rhs); +// +// template +// bool operator<=(const std::map& lhs, +// const std::map& rhs); +// +// template +// bool operator>=(const std::map& lhs, +// const std::map& rhs); + +#include +#include +#include + +#include "test_comparisons.h" + +int main(int, char**) { + typedef std::map map_type; + typedef map_type::value_type value_type; + { + map_type m1, m2; + m1.insert(value_type(1, "abc")); + m2.insert(value_type(2, "abc")); + const map_type& cm1 = m1, cm2 = m2; + assert(testComparisons6(cm1, cm2, false, true)); + } + { + map_type m1, m2; + m1.insert(value_type(1, "abc")); + m2.insert(value_type(1, "abc")); + const map_type& cm1 = m1, cm2 = m2; + assert(testComparisons6(cm1, cm2, true, false)); + } + { + map_type m1, m2; + m1.insert(value_type(1, "ab")); + m2.insert(value_type(1, "abc")); + const map_type& cm1 = m1, cm2 = m2; + assert(testComparisons6(cm1, cm2, false, true)); + } + { + map_type m1, m2; + m1.insert(value_type(1, "abc")); + m2.insert(value_type(1, "bcd")); + const map_type& cm1 = m1, cm2 = m2; + assert(testComparisons6(cm1, cm2, false, true)); + } + { + map_type m1, m2; + m1.insert(value_type(1, "abc")); + m2.insert(value_type(1, "abc")); + m2.insert(value_type(2, "abc")); + const map_type& cm1 = m1, cm2 = m2; + assert(testComparisons6(cm1, cm2, false, true)); + } + return 0; +} diff --git a/libcxx/test/std/containers/associative/multimap/multimap.nonmember/op_compare.pass.cpp b/libcxx/test/std/containers/associative/multimap/multimap.nonmember/op_compare.pass.cpp new file mode 100644 index 0000000000000..83aeca5917f8c --- /dev/null +++ b/libcxx/test/std/containers/associative/multimap/multimap.nonmember/op_compare.pass.cpp @@ -0,0 +1,90 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// + +// template +// bool operator==(const std::multimap& lhs, +// const std::multimap& rhs); +// +// template +// bool operator!=(const std::multimap& lhs, +// const std::multimap& rhs); +// +// template +// bool operator<(const std::multimap& lhs, +// const std::multimap& rhs); +// +// template +// bool operator>(const std::multimap& lhs, +// const std::multimap& rhs); +// +// template +// bool operator<=(const std::multimap& lhs, +// const std::multimap& rhs); +// +// template +// bool operator>=(const std::multimap& lhs, +// const std::multimap& rhs); + +#include +#include +#include + +#include "test_comparisons.h" + +int main(int, char**) { + typedef std::multimap map_type; + typedef map_type::value_type value_type; + { + map_type m1, m2; + m1.insert(value_type(1, "abc")); + m2.insert(value_type(2, "abc")); + const map_type& cm1 = m1, cm2 = m2; + assert(testComparisons6(cm1, cm2, false, true)); + } + { + map_type m1, m2; + m1.insert(value_type(1, "abc")); + m2.insert(value_type(1, "abc")); + const map_type& cm1 = m1, cm2 = m2; + assert(testComparisons6(cm1, cm2, true, false)); + } + { + map_type m1, m2; + m1.insert(value_type(1, "ab")); + m2.insert(value_type(1, "abc")); + const map_type& cm1 = m1, cm2 = m2; + assert(testComparisons6(cm1, cm2, false, true)); + } + { + map_type m1, m2; + m1.insert(value_type(1, "abc")); + m2.insert(value_type(1, "bcd")); + const map_type& cm1 = m1, cm2 = m2; + assert(testComparisons6(cm1, cm2, false, true)); + } + { + map_type m1, m2; + m1.insert(value_type(1, "abc")); + m2.insert(value_type(1, "abc")); + m2.insert(value_type(2, "abc")); + const map_type& cm1 = m1, cm2 = m2; + assert(testComparisons6(cm1, cm2, false, true)); + } + { + map_type m1, m2; + m1.insert(value_type(1, "abc")); + m2.insert(value_type(1, "abc")); + m2.insert(value_type(1, "abc")); + m2.insert(value_type(1, "bcd")); + const map_type& cm1 = m1, cm2 = m2; + assert(testComparisons6(cm1, cm2, false, true)); + } + return 0; +} diff --git a/libcxx/test/std/containers/associative/set/set.cons/assign_initializer_list.pass.cpp b/libcxx/test/std/containers/associative/set/set.cons/assign_initializer_list.pass.cpp index b04d7f576bac0..b096995513a94 100644 --- a/libcxx/test/std/containers/associative/set/set.cons/assign_initializer_list.pass.cpp +++ b/libcxx/test/std/containers/associative/set/set.cons/assign_initializer_list.pass.cpp @@ -55,17 +55,18 @@ void basic_test() { } void duplicate_keys_test() { + test_allocator_statistics alloc_stats; typedef std::set, test_allocator > Set; { - LIBCPP_ASSERT(test_alloc_base::alloc_count == 0); - Set s = {1, 2, 3}; - LIBCPP_ASSERT(test_alloc_base::alloc_count == 3); + LIBCPP_ASSERT(alloc_stats.alloc_count == 0); + Set s({1, 2, 3}, std::less(), test_allocator(&alloc_stats)); + LIBCPP_ASSERT(alloc_stats.alloc_count == 3); s = {4, 4, 4, 4, 4}; - LIBCPP_ASSERT(test_alloc_base::alloc_count == 1); + LIBCPP_ASSERT(alloc_stats.alloc_count == 1); assert(s.size() == 1); assert(*s.begin() == 4); } - LIBCPP_ASSERT(test_alloc_base::alloc_count == 0); + LIBCPP_ASSERT(alloc_stats.alloc_count == 0); } int main(int, char**) { diff --git a/libcxx/test/std/containers/container.requirements/container.requirements.general/allocator_move.pass.cpp b/libcxx/test/std/containers/container.requirements/container.requirements.general/allocator_move.pass.cpp index 0cc573e8592d9..cc8949e751dd0 100644 --- a/libcxx/test/std/containers/container.requirements/container.requirements.general/allocator_move.pass.cpp +++ b/libcxx/test/std/containers/container.requirements/container.requirements.general/allocator_move.pass.cpp @@ -27,27 +27,28 @@ template void test(int expected_num_allocs = 1) { + test_allocator_statistics alloc_stats; { - test_alloc_base::clear(); + alloc_stats.clear(); using AllocT = typename C::allocator_type; - C v(AllocT(42, 101)); + C v(AllocT(42, 101, &alloc_stats)); - assert(test_alloc_base::count == expected_num_allocs); + assert(alloc_stats.count == expected_num_allocs); - const int num_stored_allocs = test_alloc_base::count; + const int num_stored_allocs = alloc_stats.count; { const AllocT& a = v.get_allocator(); - assert(test_alloc_base::count == 1 + num_stored_allocs); + assert(alloc_stats.count == 1 + num_stored_allocs); assert(a.get_data() == 42); assert(a.get_id() == 101); } - assert(test_alloc_base::count == num_stored_allocs); - test_alloc_base::clear_ctor_counters(); + assert(alloc_stats.count == num_stored_allocs); + alloc_stats.clear_ctor_counters(); C v2 = std::move(v); - assert(test_alloc_base::count == num_stored_allocs * 2); - assert(test_alloc_base::copied == 0); - assert(test_alloc_base::moved == num_stored_allocs); + assert(alloc_stats.count == num_stored_allocs * 2); + assert(alloc_stats.copied == 0); + assert(alloc_stats.moved == num_stored_allocs); { const AllocT& a = v.get_allocator(); assert(a.get_id() == test_alloc_base::moved_value); diff --git a/libcxx/test/std/containers/sequences/array/compare.pass.cpp b/libcxx/test/std/containers/sequences/array/compare.pass.cpp index a04d5584477b6..d777b7e63bbe4 100644 --- a/libcxx/test/std/containers/sequences/array/compare.pass.cpp +++ b/libcxx/test/std/containers/sequences/array/compare.pass.cpp @@ -26,18 +26,34 @@ TEST_CONSTEXPR_CXX20 bool tests() { { typedef std::array C; - C c1 = {1, 2, 3}; - C c2 = {1, 2, 3}; - C c3 = {3, 2, 1}; - C c4 = {1, 2, 1}; + const C c1 = {1, 2, 3}; + const C c2 = {1, 2, 3}; + const C c3 = {3, 2, 1}; + const C c4 = {1, 2, 1}; assert(testComparisons6(c1, c2, true, false)); assert(testComparisons6(c1, c3, false, true)); assert(testComparisons6(c1, c4, false, false)); } { typedef std::array C; - C c1 = {}; - C c2 = {}; + const C c1 = {}; + const C c2 = {}; + assert(testComparisons6(c1, c2, true, false)); + } + { + typedef std::array C; + const C c1 = {LessAndEqComp(1), LessAndEqComp(2), LessAndEqComp(3)}; + const C c2 = {LessAndEqComp(1), LessAndEqComp(2), LessAndEqComp(3)}; + const C c3 = {LessAndEqComp(3), LessAndEqComp(2), LessAndEqComp(1)}; + const C c4 = {LessAndEqComp(1), LessAndEqComp(2), LessAndEqComp(1)}; + assert(testComparisons6(c1, c2, true, false)); + assert(testComparisons6(c1, c3, false, true)); + assert(testComparisons6(c1, c4, false, false)); + } + { + typedef std::array C; + const C c1 = {}; + const C c2 = {}; assert(testComparisons6(c1, c2, true, false)); } diff --git a/libcxx/test/std/containers/sequences/deque/compare.pass.cpp b/libcxx/test/std/containers/sequences/deque/compare.pass.cpp new file mode 100644 index 0000000000000..0ae9e6602fa71 --- /dev/null +++ b/libcxx/test/std/containers/sequences/deque/compare.pass.cpp @@ -0,0 +1,119 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// + +// template +// bool operator==(const std::deque& lhs, +// const std::deque& rhs); + +// template +// bool operator!=(const std::deque& lhs, +// const std::deque& rhs); + +// template +// bool operator<(const std::deque& lhs, +// const std::deque& rhs); + +// template +// bool operator<=(const std::deque& lhs, +// const std::deque& rhs); + +// template +// bool operator>(const std::deque& lhs, +// const std::deque& rhs); + +// template +// bool operator>=(const std::deque& lhs, +// const std::deque& rhs); + +#include +#include + +#include "test_comparisons.h" + +int main(int, char**) +{ + { + const std::deque d1, d2; + assert(testComparisons6(d1, d2, true, false)); + } + { + const std::deque d1(1, 1), d2(1, 1); + assert(testComparisons6(d1, d2, true, false)); + } + { + int items[3] = {1, 2, 3}; + const std::deque d1(items, items + 3); + const std::deque d2(items, items + 3); + assert(testComparisons6(d1, d2, true, false)); + } + { + const std::deque d1(1, 1), d2; + assert(testComparisons6(d1, d2, false, false)); + } + { + const std::deque d1(1, 1), d2(1, 2); + assert(testComparisons6(d1, d2, false, true)); + } + { + int items1[2] = {1, 2}; + int items2[2] = {1, 3}; + const std::deque d1(items1, items1 + 2); + const std::deque d2(items2, items2 + 2); + assert(testComparisons6(d1, d2, false, true)); + } + { + int items1[2] = {2, 2}; + int items2[2] = {1, 3}; + const std::deque d1(items1, items1 + 2); + const std::deque d2(items2, items2 + 2); + assert(testComparisons6(d1, d2, false, false)); + } + { + const std::deque d1, d2; + assert(testComparisons6(d1, d2, true, false)); + } + { + const std::deque d1(1, LessAndEqComp(1)); + const std::deque d2(1, LessAndEqComp(1)); + assert(testComparisons6(d1, d2, true, false)); + } + { + LessAndEqComp items[3] = {LessAndEqComp(1), LessAndEqComp(2), LessAndEqComp(3)}; + const std::deque d1(items, items + 3); + const std::deque d2(items, items + 3); + assert(testComparisons6(d1, d2, true, false)); + } + { + const std::deque d1(1, LessAndEqComp(1)); + const std::deque d2; + assert(testComparisons6(d1, d2, false, false)); + } + { + const std::deque d1(1, LessAndEqComp(1)); + const std::deque d2(1, LessAndEqComp(2)); + assert(testComparisons6(d1, d2, false, true)); + } + { + LessAndEqComp items1[2] = {LessAndEqComp(1), LessAndEqComp(2)}; + LessAndEqComp items2[2] = {LessAndEqComp(1), LessAndEqComp(3)}; + const std::deque d1(items1, items1 + 2); + const std::deque d2(items2, items2 + 2); + assert(testComparisons6(d1, d2, false, true)); + } + { + LessAndEqComp items1[2] = {LessAndEqComp(2), LessAndEqComp(2)}; + LessAndEqComp items2[2] = {LessAndEqComp(1), LessAndEqComp(3)}; + const std::deque d1(items1, items1 + 2); + const std::deque d2(items2, items2 + 2); + assert(testComparisons6(d1, d2, false, false)); + } + + return 0; +} diff --git a/libcxx/test/std/containers/sequences/deque/deque.modifiers/push_back_exception_safety.pass.cpp b/libcxx/test/std/containers/sequences/deque/deque.modifiers/push_back_exception_safety.pass.cpp index b24fd0753770b..137129368b4ad 100644 --- a/libcxx/test/std/containers/sequences/deque/deque.modifiers/push_back_exception_safety.pass.cpp +++ b/libcxx/test/std/containers/sequences/deque/deque.modifiers/push_back_exception_safety.pass.cpp @@ -86,11 +86,12 @@ int main(int, char**) } { + test_allocator_statistics alloc_stats; typedef std::deque > C; - C vec; - C vec2(vec); + C vec((test_allocator(&alloc_stats))); + C vec2(vec, test_allocator(&alloc_stats)); - C::allocator_type::throw_after = 1; + alloc_stats.throw_after = 1; try { vec.push_back(instance); assert(false); diff --git a/libcxx/test/std/containers/sequences/deque/deque.modifiers/push_front_exception_safety.pass.cpp b/libcxx/test/std/containers/sequences/deque/deque.modifiers/push_front_exception_safety.pass.cpp index 990f41f487266..79f9a14f23091 100644 --- a/libcxx/test/std/containers/sequences/deque/deque.modifiers/push_front_exception_safety.pass.cpp +++ b/libcxx/test/std/containers/sequences/deque/deque.modifiers/push_front_exception_safety.pass.cpp @@ -86,11 +86,12 @@ int main(int, char**) } { + test_allocator_statistics alloc_stats; typedef std::deque > C; - C vec; - C vec2(vec); + C vec((test_allocator(&alloc_stats))); + C vec2(vec, test_allocator(&alloc_stats)); - C::allocator_type::throw_after = 1; + alloc_stats.throw_after = 1; try { vec.push_front(instance); assert(false); diff --git a/libcxx/test/std/containers/sequences/list/compare.pass.cpp b/libcxx/test/std/containers/sequences/list/compare.pass.cpp new file mode 100644 index 0000000000000..59314922e3e80 --- /dev/null +++ b/libcxx/test/std/containers/sequences/list/compare.pass.cpp @@ -0,0 +1,117 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// + +// template< class T, class Alloc > +// bool operator==( const std::list& lhs, +// const std::list& rhs ); + +// template< class T, class Alloc > +// bool operator!=( const std::list& lhs, +// const std::list& rhs ); + +// template< class T, class Alloc > +// bool operator<( const std::list& lhs, +// const std::list& rhs ); + +// template< class T, class Alloc > +// bool operator<=( const std::list& lhs, +// const std::list& rhs ); + +// template< class T, class Alloc > +// bool operator>( const std::list& lhs, +// const std::list& rhs ); + +// template< class T, class Alloc > +// bool operator>=( const std::list& lhs, +// const std::list& rhs ); + +#include +#include + +#include "test_comparisons.h" + +int main(int, char**) { + { + const std::list l1, l2; + assert(testComparisons6(l1, l2, true, false)); + } + { + const std::list l1(1, 1), l2(1, 1); + assert(testComparisons6(l1, l2, true, false)); + } + { + int items[3] = {1, 2, 3}; + const std::list l1(items, items + 3); + const std::list l2(items, items + 3); + assert(testComparisons6(l1, l2, true, false)); + } + { + const std::list l1(1, 1), l2; + assert(testComparisons6(l1, l2, false, false)); + } + { + const std::list l1(1, 1), l2(1, 2); + assert(testComparisons6(l1, l2, false, true)); + } + { + int items1[2] = {1, 2}; + int items2[2] = {1, 3}; + const std::list l1(items1, items1 + 2); + const std::list l2(items2, items2 + 2); + assert(testComparisons6(l1, l2, false, true)); + } + { + int items1[2] = {2, 2}; + int items2[2] = {1, 3}; + const std::list l1(items1, items1 + 2); + const std::list l2(items2, items2 + 2); + assert(testComparisons6(l1, l2, false, false)); + } + { + const std::list l1, l2; + assert(testComparisons6(l1, l2, true, false)); + } + { + const std::list l1(1, LessAndEqComp(1)); + const std::list l2(1, LessAndEqComp(1)); + assert(testComparisons6(l1, l2, true, false)); + } + { + LessAndEqComp items[3] = {LessAndEqComp(1), LessAndEqComp(2), LessAndEqComp(3)}; + const std::list l1(items, items + 3); + const std::list l2(items, items + 3); + assert(testComparisons6(l1, l2, true, false)); + } + { + const std::list l1(1, LessAndEqComp(1)); + const std::list l2; + assert(testComparisons6(l1, l2, false, false)); + } + { + const std::list l1(1, LessAndEqComp(1)); + const std::list l2(1, LessAndEqComp(2)); + assert(testComparisons6(l1, l2, false, true)); + } + { + LessAndEqComp items1[2] = {LessAndEqComp(1), LessAndEqComp(2)}; + LessAndEqComp items2[2] = {LessAndEqComp(1), LessAndEqComp(3)}; + const std::list l1(items1, items1 + 2); + const std::list l2(items2, items2 + 2); + assert(testComparisons6(l1, l2, false, true)); + } + { + LessAndEqComp items1[2] = {LessAndEqComp(2), LessAndEqComp(2)}; + LessAndEqComp items2[2] = {LessAndEqComp(1), LessAndEqComp(3)}; + const std::list l1(items1, items1 + 2); + const std::list l2(items2, items2 + 2); + assert(testComparisons6(l1, l2, false, false)); + } + return 0; +} diff --git a/libcxx/test/std/containers/sequences/vector.bool/compare.pass.cpp b/libcxx/test/std/containers/sequences/vector.bool/compare.pass.cpp new file mode 100644 index 0000000000000..60fede4f3061b --- /dev/null +++ b/libcxx/test/std/containers/sequences/vector.bool/compare.pass.cpp @@ -0,0 +1,80 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// + +// bool operator==( const vector& lhs, const vector& rhs ); +// bool operator!=( const vector& lhs, const vector& rhs ); +// bool operator< ( const vector& lhs, const vector& rhs ); +// bool operator<=( const vector& lhs, const vector& rhs ); +// bool operator> ( const vector& lhs, const vector& rhs ); +// bool operator>=( const vector& lhs, const vector& rhs ); + +#include +#include + +#include "test_comparisons.h" + +int main(int, char**) { + typedef std::vector VB; + { + const VB v1, v2; + assert(testComparisons6(v1, v2, true, false)); + } + { + const VB v1(1, true); + const VB v2(1, true); + assert(testComparisons6(v1, v2, true, false)); + } + { + const VB v1(1, false); + const VB v2(1, true); + assert(testComparisons6(v1, v2, false, true)); + } + { + const VB v1, v2(1, true); + assert(testComparisons6(v1, v2, false, true)); + } + { + bool items1[3] = {false, true, false}; + bool items2[3] = {false, true, true}; + const VB v1(items1, items1 + 3); + const VB v2(items2, items2 + 3); + assert(testComparisons6(v1, v2, false, true)); + } + { + bool items1[3] = {false, false, false}; + bool items2[3] = {false, true, false}; + const VB v1(items1, items1 + 3); + const VB v2(items2, items2 + 3); + assert(testComparisons6(v1, v2, false, true)); + } + { + bool items1[2] = {false, true}; + bool items2[3] = {false, true, false}; + const VB v1(items1, items1 + 2); + const VB v2(items2, items2 + 3); + assert(testComparisons6(v1, v2, false, true)); + } + { + bool items[3] = {false, true, false}; + const VB v1(items, items + 3); + const VB v2(1, true); + assert(testComparisons6(v1, v2, false, true)); + } + { + assert( (std::vector() == std::vector())); + assert(!(std::vector() != std::vector())); + assert(!(std::vector() < std::vector())); + assert( (std::vector() <= std::vector())); + assert(!(std::vector() > std::vector())); + assert( (std::vector() >= std::vector())); + } + + return 0; +} diff --git a/libcxx/test/std/containers/sequences/vector.bool/move.pass.cpp b/libcxx/test/std/containers/sequences/vector.bool/move.pass.cpp index 8da56e6aa2efa..a554ec8ab0162 100644 --- a/libcxx/test/std/containers/sequences/vector.bool/move.pass.cpp +++ b/libcxx/test/std/containers/sequences/vector.bool/move.pass.cpp @@ -20,9 +20,10 @@ int main(int, char**) { + test_allocator_statistics alloc_stats; { - std::vector > l(test_allocator(5)); - std::vector > lo(test_allocator(5)); + std::vector > l(test_allocator(5, &alloc_stats)); + std::vector > lo(test_allocator(5, &alloc_stats)); for (int i = 1; i <= 3; ++i) { l.push_back(true); @@ -60,24 +61,24 @@ int main(int, char**) assert(l2.get_allocator() == lo.get_allocator()); } { - test_alloc_base::clear(); + alloc_stats.clear(); using Vect = std::vector >; using AllocT = Vect::allocator_type; - Vect v(test_allocator(42, 101)); - assert(test_alloc_base::count == 1); + Vect v(test_allocator(42, 101, &alloc_stats)); + assert(alloc_stats.count == 1); { const AllocT& a = v.get_allocator(); - assert(test_alloc_base::count == 2); + assert(alloc_stats.count == 2); assert(a.get_data() == 42); assert(a.get_id() == 101); } - assert(test_alloc_base::count == 1); - test_alloc_base::clear_ctor_counters(); + assert(alloc_stats.count == 1); + alloc_stats.clear_ctor_counters(); Vect v2 = std::move(v); - assert(test_alloc_base::count == 2); - assert(test_alloc_base::copied == 0); - assert(test_alloc_base::moved == 1); + assert(alloc_stats.count == 2); + assert(alloc_stats.copied == 0); + assert(alloc_stats.moved == 1); { const AllocT& a = v.get_allocator(); assert(a.get_id() == test_alloc_base::moved_value); diff --git a/libcxx/test/std/containers/sequences/vector/vector.cons/move.pass.cpp b/libcxx/test/std/containers/sequences/vector/vector.cons/move.pass.cpp index 1c2126477ba05..8c797d1a7cbe0 100644 --- a/libcxx/test/std/containers/sequences/vector/vector.cons/move.pass.cpp +++ b/libcxx/test/std/containers/sequences/vector/vector.cons/move.pass.cpp @@ -23,9 +23,10 @@ int main(int, char**) { + test_allocator_statistics alloc_stats; { - std::vector > l(test_allocator(5)); - std::vector > lo(test_allocator(5)); + std::vector > l(test_allocator(5, &alloc_stats)); + std::vector > lo(test_allocator(5, &alloc_stats)); assert(is_contiguous_container_asan_correct(l)); assert(is_contiguous_container_asan_correct(lo)); for (int i = 1; i <= 3; ++i) @@ -100,24 +101,24 @@ int main(int, char**) assert(is_contiguous_container_asan_correct(c2)); } { - test_alloc_base::clear(); + alloc_stats.clear(); using Vect = std::vector >; - Vect v(test_allocator(42, 101)); - assert(test_alloc_base::count == 1); - assert(test_alloc_base::copied == 1); - assert(test_alloc_base::moved == 0); + Vect v(test_allocator(42, 101, &alloc_stats)); + assert(alloc_stats.count == 1); + assert(alloc_stats.copied == 1); + assert(alloc_stats.moved == 0); { const test_allocator& a = v.get_allocator(); assert(a.get_data() == 42); assert(a.get_id() == 101); } - assert(test_alloc_base::count == 1); - test_alloc_base::clear_ctor_counters(); + assert(alloc_stats.count == 1); + alloc_stats.clear_ctor_counters(); Vect v2 = std::move(v); - assert(test_alloc_base::count == 2); - assert(test_alloc_base::copied == 0); - assert(test_alloc_base::moved == 1); + assert(alloc_stats.count == 2); + assert(alloc_stats.copied == 0); + assert(alloc_stats.moved == 1); { const test_allocator& a = v.get_allocator(); assert(a.get_id() == test_alloc_base::moved_value); diff --git a/libcxx/test/std/containers/unord/unord.map/eq.pass.cpp b/libcxx/test/std/containers/unord/unord.map/eq.pass.cpp index 5c924f07e5376..bcb6cb78adac4 100644 --- a/libcxx/test/std/containers/unord/unord.map/eq.pass.cpp +++ b/libcxx/test/std/containers/unord/unord.map/eq.pass.cpp @@ -25,6 +25,8 @@ #include "test_macros.h" #include "min_allocator.h" +#include "test_comparisons.h" + int main(int, char**) { { @@ -43,8 +45,7 @@ int main(int, char**) }; const C c1(std::begin(a), std::end(a)); const C c2; - assert(!(c1 == c2)); - assert( (c1 != c2)); + assert(testComparisons2(c1, c2, false)); } { typedef std::unordered_map C; @@ -62,8 +63,7 @@ int main(int, char**) }; const C c1(std::begin(a), std::end(a)); const C c2 = c1; - assert( (c1 == c2)); - assert(!(c1 != c2)); + assert(testComparisons2(c1, c2, true)); } { typedef std::unordered_map C; @@ -82,14 +82,32 @@ int main(int, char**) C c1(std::begin(a), std::end(a)); C c2 = c1; c2.rehash(30); - assert( (c1 == c2)); - assert(!(c1 != c2)); + assert(testComparisons2(c1, c2, true)); c2.insert(P(90, "ninety")); - assert(!(c1 == c2)); - assert( (c1 != c2)); + assert(testComparisons2(c1, c2, false)); + c1.insert(P(90, "ninety")); + assert(testComparisons2(c1, c2, true)); + } + { + typedef std::unordered_map C; + typedef std::pair P; + P a[] = + { + P(10, "ten"), + P(20, "twenty"), + P(30, "thirty"), + P(40, "forty"), + P(50, "fifty"), + P(60, "sixty"), + P(70, "seventy"), + P(80, "eighty"), + }; + C c1(std::begin(a), std::end(a)); + C c2 = c1; + assert(testComparisons2(c1, c2, true)); c1.insert(P(90, "ninety")); - assert( (c1 == c2)); - assert(!(c1 != c2)); + c2.insert(P(100, "onehundred")); + assert(testComparisons2(c1, c2, false)); } #if TEST_STD_VER >= 11 { @@ -109,8 +127,7 @@ int main(int, char**) }; const C c1(std::begin(a), std::end(a)); const C c2; - assert(!(c1 == c2)); - assert( (c1 != c2)); + assert(testComparisons2(c1, c2, false)); } { typedef std::unordered_map, std::equal_to, @@ -129,8 +146,7 @@ int main(int, char**) }; const C c1(std::begin(a), std::end(a)); const C c2 = c1; - assert( (c1 == c2)); - assert(!(c1 != c2)); + assert(testComparisons2(c1, c2, true)); } { typedef std::unordered_map, std::equal_to, @@ -150,14 +166,33 @@ int main(int, char**) C c1(std::begin(a), std::end(a)); C c2 = c1; c2.rehash(30); - assert( (c1 == c2)); - assert(!(c1 != c2)); + assert(testComparisons2(c1, c2, true)); c2.insert(P(90, "ninety")); - assert(!(c1 == c2)); - assert( (c1 != c2)); + assert(testComparisons2(c1, c2, false)); + c1.insert(P(90, "ninety")); + assert(testComparisons2(c1, c2, true)); + } + { + typedef std::unordered_map, std::equal_to, + min_allocator>> C; + typedef std::pair P; + P a[] = + { + P(10, "ten"), + P(20, "twenty"), + P(30, "thirty"), + P(40, "forty"), + P(50, "fifty"), + P(60, "sixty"), + P(70, "seventy"), + P(80, "eighty"), + }; + C c1(std::begin(a), std::end(a)); + C c2 = c1; + assert(testComparisons2(c1, c2, true)); c1.insert(P(90, "ninety")); - assert( (c1 == c2)); - assert(!(c1 != c2)); + c2.insert(P(100, "onehundred")); + assert(testComparisons2(c1, c2, false)); } #endif diff --git a/libcxx/test/std/containers/unord/unord.multimap/eq.pass.cpp b/libcxx/test/std/containers/unord/unord.multimap/eq.pass.cpp index b47fa32529e0e..1ebf1206d9a87 100644 --- a/libcxx/test/std/containers/unord/unord.multimap/eq.pass.cpp +++ b/libcxx/test/std/containers/unord/unord.multimap/eq.pass.cpp @@ -25,6 +25,8 @@ #include "test_macros.h" #include "min_allocator.h" +#include "test_comparisons.h" + int main(int, char**) { { @@ -46,8 +48,7 @@ int main(int, char**) }; const C c1(std::begin(a), std::end(a)); const C c2; - assert(!(c1 == c2)); - assert( (c1 != c2)); + assert(testComparisons2(c1, c2, false)); } { typedef std::unordered_multimap C; @@ -68,8 +69,7 @@ int main(int, char**) }; const C c1(std::begin(a), std::end(a)); const C c2 = c1; - assert( (c1 == c2)); - assert(!(c1 != c2)); + assert(testComparisons2(c1, c2, true)); } { typedef std::unordered_multimap C; @@ -91,14 +91,35 @@ int main(int, char**) C c1(std::begin(a), std::end(a)); C c2 = c1; c2.rehash(30); - assert( (c1 == c2)); - assert(!(c1 != c2)); + assert(testComparisons2(c1, c2, true)); c2.insert(P(90, "ninety")); - assert(!(c1 == c2)); - assert( (c1 != c2)); + assert(testComparisons2(c1, c2, false)); c1.insert(P(90, "ninety")); - assert( (c1 == c2)); - assert(!(c1 != c2)); + assert(testComparisons2(c1, c2, true)); + } + { + typedef std::unordered_multimap C; + typedef std::pair P; + P a[] = + { + P(10, "ten"), + P(20, "twenty"), + P(20, "twenty 2"), + P(30, "thirty"), + P(40, "forty"), + P(50, "fifty"), + P(50, "fifty 2"), + P(50, "fifty 3"), + P(60, "sixty"), + P(70, "seventy"), + P(80, "eighty"), + }; + C c1(std::begin(a), std::end(a)); + C c2 = c1; + assert(testComparisons2(c1, c2, true)); + c1.insert(P(70, "seventy 2")); + c2.insert(P(80, "eighty 2")); + assert(testComparisons2(c1, c2, false)); } #if TEST_STD_VER >= 11 { @@ -121,8 +142,7 @@ int main(int, char**) }; const C c1(std::begin(a), std::end(a)); const C c2; - assert(!(c1 == c2)); - assert( (c1 != c2)); + assert(testComparisons2(c1, c2, false)); } { typedef std::unordered_multimap, std::equal_to, @@ -144,8 +164,7 @@ int main(int, char**) }; const C c1(std::begin(a), std::end(a)); const C c2 = c1; - assert( (c1 == c2)); - assert(!(c1 != c2)); + assert(testComparisons2(c1, c2, true)); } { typedef std::unordered_multimap, std::equal_to, @@ -168,14 +187,36 @@ int main(int, char**) C c1(std::begin(a), std::end(a)); C c2 = c1; c2.rehash(30); - assert( (c1 == c2)); - assert(!(c1 != c2)); + assert(testComparisons2(c1, c2, true)); c2.insert(P(90, "ninety")); - assert(!(c1 == c2)); - assert( (c1 != c2)); + assert(testComparisons2(c1, c2, false)); c1.insert(P(90, "ninety")); - assert( (c1 == c2)); - assert(!(c1 != c2)); + assert(testComparisons2(c1, c2, true)); + } + { + typedef std::unordered_multimap, std::equal_to, + min_allocator>> C; + typedef std::pair P; + P a[] = + { + P(10, "ten"), + P(20, "twenty"), + P(20, "twenty 2"), + P(30, "thirty"), + P(40, "forty"), + P(50, "fifty"), + P(50, "fifty 2"), + P(50, "fifty 3"), + P(60, "sixty"), + P(70, "seventy"), + P(80, "eighty"), + }; + C c1(std::begin(a), std::end(a)); + C c2 = c1; + assert(testComparisons2(c1, c2, true)); + c1.insert(P(70, "seventy 2")); + c2.insert(P(80, "eighty 2")); + assert(testComparisons2(c1, c2, false)); } #endif diff --git a/libcxx/test/std/depr/depr.c.headers/inttypes_h.pass.cpp b/libcxx/test/std/depr/depr.c.headers/inttypes_h.pass.cpp index e759a7d3c7564..b9f9a585ed8d3 100644 --- a/libcxx/test/std/depr/depr.c.headers/inttypes_h.pass.cpp +++ b/libcxx/test/std/depr/depr.c.headers/inttypes_h.pass.cpp @@ -927,8 +927,10 @@ int main(int, char**) static_assert((std::is_same::value), ""); static_assert((std::is_same::value), ""); static_assert((std::is_same::value), ""); +#ifndef TEST_HAS_NO_WIDE_CHARACTERS static_assert((std::is_same::value), ""); static_assert((std::is_same::value), ""); +#endif return 0; } diff --git a/libcxx/test/std/depr/depr.c.headers/math_h.pass.cpp b/libcxx/test/std/depr/depr.c.headers/math_h.pass.cpp index 94a2fc4d2ff9d..7863fa6eeb499 100644 --- a/libcxx/test/std/depr/depr.c.headers/math_h.pass.cpp +++ b/libcxx/test/std/depr/depr.c.headers/math_h.pass.cpp @@ -6,6 +6,8 @@ // //===----------------------------------------------------------------------===// +// XFAIL: LIBCXX-AIX-FIXME + // #include diff --git a/libcxx/test/std/depr/depr.c.headers/stdlib_h.pass.cpp b/libcxx/test/std/depr/depr.c.headers/stdlib_h.pass.cpp index 83d17107fbbcc..978596b6f589e 100644 --- a/libcxx/test/std/depr/depr.c.headers/stdlib_h.pass.cpp +++ b/libcxx/test/std/depr/depr.c.headers/stdlib_h.pass.cpp @@ -155,6 +155,7 @@ int main(int, char**) static_assert((std::is_same::value), ""); static_assert((std::is_same::value), ""); static_assert((std::is_same::value), ""); +#ifndef TEST_HAS_NO_WIDE_CHARACTERS wchar_t* pw = 0; const wchar_t* pwc = 0; char* pc = 0; @@ -163,6 +164,7 @@ int main(int, char**) static_assert((std::is_same::value), ""); static_assert((std::is_same::value), ""); static_assert((std::is_same::value), ""); +#endif test_abs(); diff --git a/libcxx/test/std/depr/depr.c.headers/uchar_h.pass.cpp b/libcxx/test/std/depr/depr.c.headers/uchar_h.pass.cpp index 7d9ff07a65f84..c333370153c15 100644 --- a/libcxx/test/std/depr/depr.c.headers/uchar_h.pass.cpp +++ b/libcxx/test/std/depr/depr.c.headers/uchar_h.pass.cpp @@ -9,6 +9,7 @@ // XFAIL: suse-linux-enterprise-server-11 // XFAIL: darwin // XFAIL: netbsd +// XFAIL: LIBCXX-AIX-FIXME // diff --git a/libcxx/test/std/diagnostics/syserr/syserr.errcat/syserr.errcat.objects/generic_category.pass.cpp b/libcxx/test/std/diagnostics/syserr/syserr.errcat/syserr.errcat.objects/generic_category.pass.cpp index d90834681bb84..e01df7f1f1967 100644 --- a/libcxx/test/std/diagnostics/syserr/syserr.errcat/syserr.errcat.objects/generic_category.pass.cpp +++ b/libcxx/test/std/diagnostics/syserr/syserr.errcat/syserr.errcat.objects/generic_category.pass.cpp @@ -8,6 +8,7 @@ // XFAIL: suse-linux-enterprise-server-11 // XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11|12}} +// XFAIL: LIBCXX-AIX-FIXME // diff --git a/libcxx/test/std/diagnostics/syserr/syserr.errcat/syserr.errcat.objects/system_category.pass.cpp b/libcxx/test/std/diagnostics/syserr/syserr.errcat/syserr.errcat.objects/system_category.pass.cpp index 0e82934e870b0..c482ddc14ef0b 100644 --- a/libcxx/test/std/diagnostics/syserr/syserr.errcat/syserr.errcat.objects/system_category.pass.cpp +++ b/libcxx/test/std/diagnostics/syserr/syserr.errcat/syserr.errcat.objects/system_category.pass.cpp @@ -6,6 +6,8 @@ // //===----------------------------------------------------------------------===// +// XFAIL: LIBCXX-AIX-FIXME + // // class error_category diff --git a/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.cons/path.pass.cpp b/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.cons/path.pass.cpp index 19148e7c779de..29e9e92fb7de2 100644 --- a/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.cons/path.pass.cpp +++ b/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.cons/path.pass.cpp @@ -8,6 +8,8 @@ // UNSUPPORTED: c++03 +// XFAIL: LIBCXX-AIX-FIXME + // // class directory_entry diff --git a/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.mods/refresh.pass.cpp b/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.mods/refresh.pass.cpp index da436fc6b959c..85b53224a0ee8 100644 --- a/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.mods/refresh.pass.cpp +++ b/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.mods/refresh.pass.cpp @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// // UNSUPPORTED: c++03 +// XFAIL: LIBCXX-AIX-FIXME // The string reported on errors changed, which makes those tests fail when run // against already-released libc++'s. diff --git a/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.mods/replace_filename.pass.cpp b/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.mods/replace_filename.pass.cpp index 564e0e21ad6cd..0cf99f2658b5b 100644 --- a/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.mods/replace_filename.pass.cpp +++ b/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.mods/replace_filename.pass.cpp @@ -8,6 +8,8 @@ // UNSUPPORTED: c++03 +// XFAIL: LIBCXX-AIX-FIXME + // // class directory_entry diff --git a/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.obs/file_size.pass.cpp b/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.obs/file_size.pass.cpp index 271a6e826f2b7..e584fdd79ff17 100644 --- a/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.obs/file_size.pass.cpp +++ b/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.obs/file_size.pass.cpp @@ -8,6 +8,8 @@ // UNSUPPORTED: c++03 +// XFAIL: LIBCXX-AIX-FIXME + // The string reported on errors changed, which makes those tests fail when run // against already-released libc++'s. // XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.15 diff --git a/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.obs/file_type_obs.pass.cpp b/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.obs/file_type_obs.pass.cpp index 541a6d9c9ffb0..f1603abca4e10 100644 --- a/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.obs/file_type_obs.pass.cpp +++ b/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.obs/file_type_obs.pass.cpp @@ -8,6 +8,8 @@ // UNSUPPORTED: c++03 +// XFAIL: LIBCXX-AIX-FIXME + // // class directory_entry diff --git a/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.obs/hard_link_count.pass.cpp b/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.obs/hard_link_count.pass.cpp index 44eac78fe8f46..3daf5fcb6a1f8 100644 --- a/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.obs/hard_link_count.pass.cpp +++ b/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.obs/hard_link_count.pass.cpp @@ -8,6 +8,8 @@ // UNSUPPORTED: c++03 +// XFAIL: LIBCXX-AIX-FIXME + // The string reported on errors changed, which makes those tests fail when run // against already-released libc++'s. // XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.15 diff --git a/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.obs/last_write_time.pass.cpp b/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.obs/last_write_time.pass.cpp index 928248b3c2b87..746ccb71f3b9d 100644 --- a/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.obs/last_write_time.pass.cpp +++ b/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.obs/last_write_time.pass.cpp @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// // UNSUPPORTED: c++03 +// XFAIL: LIBCXX-AIX-FIXME // The string reported on errors changed, which makes those tests fail when run // against already-released libc++'s. diff --git a/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.obs/status.pass.cpp b/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.obs/status.pass.cpp index ce4f286e2b2c3..546170fc5141a 100644 --- a/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.obs/status.pass.cpp +++ b/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.obs/status.pass.cpp @@ -8,6 +8,8 @@ // UNSUPPORTED: c++03 +// XFAIL: LIBCXX-AIX-FIXME + // // class directory_entry diff --git a/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.obs/symlink_status.pass.cpp b/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.obs/symlink_status.pass.cpp index 364b832bafa75..e27f7e47fed40 100644 --- a/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.obs/symlink_status.pass.cpp +++ b/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.obs/symlink_status.pass.cpp @@ -8,6 +8,8 @@ // UNSUPPORTED: c++03 +// XFAIL: LIBCXX-AIX-FIXME + // // class directory_entry diff --git a/libcxx/test/std/input.output/filesystems/class.directory_iterator/directory_iterator.members/copy.pass.cpp b/libcxx/test/std/input.output/filesystems/class.directory_iterator/directory_iterator.members/copy.pass.cpp index 47041cb455a9f..bfe107ee3feb2 100644 --- a/libcxx/test/std/input.output/filesystems/class.directory_iterator/directory_iterator.members/copy.pass.cpp +++ b/libcxx/test/std/input.output/filesystems/class.directory_iterator/directory_iterator.members/copy.pass.cpp @@ -8,6 +8,8 @@ // UNSUPPORTED: c++03 +// XFAIL: LIBCXX-AIX-FIXME + // // class directory_iterator diff --git a/libcxx/test/std/input.output/filesystems/class.directory_iterator/directory_iterator.members/copy_assign.pass.cpp b/libcxx/test/std/input.output/filesystems/class.directory_iterator/directory_iterator.members/copy_assign.pass.cpp index e6f6d1657c32f..fe1cc43378e38 100644 --- a/libcxx/test/std/input.output/filesystems/class.directory_iterator/directory_iterator.members/copy_assign.pass.cpp +++ b/libcxx/test/std/input.output/filesystems/class.directory_iterator/directory_iterator.members/copy_assign.pass.cpp @@ -8,6 +8,8 @@ // UNSUPPORTED: c++03 +// XFAIL: LIBCXX-AIX-FIXME + // // class directory_iterator diff --git a/libcxx/test/std/input.output/filesystems/class.directory_iterator/directory_iterator.members/ctor.pass.cpp b/libcxx/test/std/input.output/filesystems/class.directory_iterator/directory_iterator.members/ctor.pass.cpp index 4ab9a2dea3360..d5eeb21e96b32 100644 --- a/libcxx/test/std/input.output/filesystems/class.directory_iterator/directory_iterator.members/ctor.pass.cpp +++ b/libcxx/test/std/input.output/filesystems/class.directory_iterator/directory_iterator.members/ctor.pass.cpp @@ -8,6 +8,8 @@ // UNSUPPORTED: c++03 +// XFAIL: LIBCXX-AIX-FIXME + // // class directory_iterator diff --git a/libcxx/test/std/input.output/filesystems/class.directory_iterator/directory_iterator.members/increment.pass.cpp b/libcxx/test/std/input.output/filesystems/class.directory_iterator/directory_iterator.members/increment.pass.cpp index 09b513974aaa9..721a865899647 100644 --- a/libcxx/test/std/input.output/filesystems/class.directory_iterator/directory_iterator.members/increment.pass.cpp +++ b/libcxx/test/std/input.output/filesystems/class.directory_iterator/directory_iterator.members/increment.pass.cpp @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// // UNSUPPORTED: c++03 +// XFAIL: LIBCXX-AIX-FIXME // diff --git a/libcxx/test/std/input.output/filesystems/class.directory_iterator/directory_iterator.members/move.pass.cpp b/libcxx/test/std/input.output/filesystems/class.directory_iterator/directory_iterator.members/move.pass.cpp index 6ae6eed1ac022..29ed2090aaf62 100644 --- a/libcxx/test/std/input.output/filesystems/class.directory_iterator/directory_iterator.members/move.pass.cpp +++ b/libcxx/test/std/input.output/filesystems/class.directory_iterator/directory_iterator.members/move.pass.cpp @@ -8,6 +8,8 @@ // UNSUPPORTED: c++03 +// XFAIL: LIBCXX-AIX-FIXME + // // class directory_iterator diff --git a/libcxx/test/std/input.output/filesystems/class.directory_iterator/directory_iterator.members/move_assign.pass.cpp b/libcxx/test/std/input.output/filesystems/class.directory_iterator/directory_iterator.members/move_assign.pass.cpp index d29f7b330862a..6a9aaae28f0b7 100644 --- a/libcxx/test/std/input.output/filesystems/class.directory_iterator/directory_iterator.members/move_assign.pass.cpp +++ b/libcxx/test/std/input.output/filesystems/class.directory_iterator/directory_iterator.members/move_assign.pass.cpp @@ -8,6 +8,8 @@ // UNSUPPORTED: c++03 +// XFAIL: LIBCXX-AIX-FIXME + // // class directory_iterator diff --git a/libcxx/test/std/input.output/filesystems/class.directory_iterator/directory_iterator.nonmembers/begin_end.pass.cpp b/libcxx/test/std/input.output/filesystems/class.directory_iterator/directory_iterator.nonmembers/begin_end.pass.cpp index f6f0bbe7687a8..407af95705c46 100644 --- a/libcxx/test/std/input.output/filesystems/class.directory_iterator/directory_iterator.nonmembers/begin_end.pass.cpp +++ b/libcxx/test/std/input.output/filesystems/class.directory_iterator/directory_iterator.nonmembers/begin_end.pass.cpp @@ -8,6 +8,8 @@ // UNSUPPORTED: c++03 +// XFAIL: LIBCXX-AIX-FIXME + // // class directory_iterator diff --git a/libcxx/test/std/input.output/filesystems/class.path/path.member/path.assign/move.pass.cpp b/libcxx/test/std/input.output/filesystems/class.path/path.member/path.assign/move.pass.cpp index cb25c66bf5120..69dff085e58f8 100644 --- a/libcxx/test/std/input.output/filesystems/class.path/path.member/path.assign/move.pass.cpp +++ b/libcxx/test/std/input.output/filesystems/class.path/path.member/path.assign/move.pass.cpp @@ -8,6 +8,8 @@ // UNSUPPORTED: c++03 +// XFAIL: LIBCXX-AIX-FIXME + // // class path diff --git a/libcxx/test/std/input.output/filesystems/class.path/path.member/path.construct/move.pass.cpp b/libcxx/test/std/input.output/filesystems/class.path/path.member/path.construct/move.pass.cpp index cdf39e33d3fd1..40a8c54f802d6 100644 --- a/libcxx/test/std/input.output/filesystems/class.path/path.member/path.construct/move.pass.cpp +++ b/libcxx/test/std/input.output/filesystems/class.path/path.member/path.construct/move.pass.cpp @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// // UNSUPPORTED: c++03 +// XFAIL: LIBCXX-AIX-FIXME // diff --git a/libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.members/copy.pass.cpp b/libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.members/copy.pass.cpp index aacf160a22293..1b731a4b39fed 100644 --- a/libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.members/copy.pass.cpp +++ b/libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.members/copy.pass.cpp @@ -8,6 +8,8 @@ // UNSUPPORTED: c++03 +// XFAIL: LIBCXX-AIX-FIXME + // // class recursive_directory_iterator diff --git a/libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.members/copy_assign.pass.cpp b/libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.members/copy_assign.pass.cpp index dc689ad3d9f50..ff3320e73f576 100644 --- a/libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.members/copy_assign.pass.cpp +++ b/libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.members/copy_assign.pass.cpp @@ -8,6 +8,8 @@ // UNSUPPORTED: c++03 +// XFAIL: LIBCXX-AIX-FIXME + // // class recursive_directory_iterator diff --git a/libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.members/ctor.pass.cpp b/libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.members/ctor.pass.cpp index 5a18c46546897..f4fde828b3279 100644 --- a/libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.members/ctor.pass.cpp +++ b/libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.members/ctor.pass.cpp @@ -8,6 +8,8 @@ // UNSUPPORTED: c++03 +// XFAIL: LIBCXX-AIX-FIXME + // // class directory_iterator diff --git a/libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.members/depth.pass.cpp b/libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.members/depth.pass.cpp index 4983665978d10..bdf23b17beaf9 100644 --- a/libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.members/depth.pass.cpp +++ b/libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.members/depth.pass.cpp @@ -8,6 +8,8 @@ // UNSUPPORTED: c++03 +// XFAIL: LIBCXX-AIX-FIXME + // // class recursive_directory_iterator diff --git a/libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.members/disable_recursion_pending.pass.cpp b/libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.members/disable_recursion_pending.pass.cpp index e9dc9648ec687..0a6b73af42fa0 100644 --- a/libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.members/disable_recursion_pending.pass.cpp +++ b/libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.members/disable_recursion_pending.pass.cpp @@ -8,6 +8,8 @@ // UNSUPPORTED: c++03 +// XFAIL: LIBCXX-AIX-FIXME + // // class recursive_directory_iterator diff --git a/libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.members/increment.pass.cpp b/libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.members/increment.pass.cpp index 4b8390e2a125d..4d363b91aa3bf 100644 --- a/libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.members/increment.pass.cpp +++ b/libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.members/increment.pass.cpp @@ -8,6 +8,8 @@ // UNSUPPORTED: c++03 +// XFAIL: LIBCXX-AIX-FIXME + // // class recursive_directory_iterator diff --git a/libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.members/move.pass.cpp b/libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.members/move.pass.cpp index 7dcc47d610020..81f0d8f3c7a05 100644 --- a/libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.members/move.pass.cpp +++ b/libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.members/move.pass.cpp @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// // UNSUPPORTED: c++03 +// XFAIL: LIBCXX-AIX-FIXME // diff --git a/libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.members/move_assign.pass.cpp b/libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.members/move_assign.pass.cpp index 03c85f27f608c..cb113ca2413d2 100644 --- a/libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.members/move_assign.pass.cpp +++ b/libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.members/move_assign.pass.cpp @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// // UNSUPPORTED: c++03 +// XFAIL: LIBCXX-AIX-FIXME // diff --git a/libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.members/pop.pass.cpp b/libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.members/pop.pass.cpp index 40b09f1e79a99..6df6746e323a4 100644 --- a/libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.members/pop.pass.cpp +++ b/libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.members/pop.pass.cpp @@ -8,6 +8,8 @@ // UNSUPPORTED: c++03 +// XFAIL: LIBCXX-AIX-FIXME + // // class recursive_directory_iterator diff --git a/libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.members/recursion_pending.pass.cpp b/libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.members/recursion_pending.pass.cpp index 82c82c71b181c..5fcf0f1078c7e 100644 --- a/libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.members/recursion_pending.pass.cpp +++ b/libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.members/recursion_pending.pass.cpp @@ -8,6 +8,8 @@ // UNSUPPORTED: c++03 +// XFAIL: LIBCXX-AIX-FIXME + // // class recursive_directory_iterator diff --git a/libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.nonmembers/begin_end.pass.cpp b/libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.nonmembers/begin_end.pass.cpp index 104e419fa9e91..fc3c846db9006 100644 --- a/libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.nonmembers/begin_end.pass.cpp +++ b/libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.nonmembers/begin_end.pass.cpp @@ -8,6 +8,8 @@ // UNSUPPORTED: c++03 +// XFAIL: LIBCXX-AIX-FIXME + // // class recursive_directory_iterator diff --git a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.canonical/canonical.pass.cpp b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.canonical/canonical.pass.cpp index 707b646be1798..98a8a91a44f53 100644 --- a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.canonical/canonical.pass.cpp +++ b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.canonical/canonical.pass.cpp @@ -8,6 +8,8 @@ // UNSUPPORTED: c++03 +// XFAIL: LIBCXX-AIX-FIXME + // // path canonical(const path& p); diff --git a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.copy/copy.pass.cpp b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.copy/copy.pass.cpp index 3f9574de0bb01..603d2e78fb9cc 100644 --- a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.copy/copy.pass.cpp +++ b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.copy/copy.pass.cpp @@ -8,6 +8,8 @@ // UNSUPPORTED: c++03 +// XFAIL: LIBCXX-AIX-FIXME + // // void copy(const path& from, const path& to); diff --git a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.create_directories/create_directories.pass.cpp b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.create_directories/create_directories.pass.cpp index d6b18e2e043be..8c57cbc771108 100644 --- a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.create_directories/create_directories.pass.cpp +++ b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.create_directories/create_directories.pass.cpp @@ -8,6 +8,8 @@ // UNSUPPORTED: c++03 +// XFAIL: LIBCXX-AIX-FIXME + // This test requires the dylib support introduced in D92769. // XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.15 diff --git a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.create_directory/create_directory_with_attributes.pass.cpp b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.create_directory/create_directory_with_attributes.pass.cpp index 4d5cdf31e5b59..424e7e55df1f9 100644 --- a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.create_directory/create_directory_with_attributes.pass.cpp +++ b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.create_directory/create_directory_with_attributes.pass.cpp @@ -8,6 +8,8 @@ // UNSUPPORTED: c++03 +// XFAIL: LIBCXX-AIX-FIXME + // This test requires the dylib support introduced in D92769. // XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.15 diff --git a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.current_path/current_path.pass.cpp b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.current_path/current_path.pass.cpp index 5e90c4452a9db..64f8effeb9fa1 100644 --- a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.current_path/current_path.pass.cpp +++ b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.current_path/current_path.pass.cpp @@ -8,6 +8,8 @@ // UNSUPPORTED: c++03 +// XFAIL: LIBCXX-AIX-FIXME + // // path current_path(); diff --git a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.equivalent/equivalent.pass.cpp b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.equivalent/equivalent.pass.cpp index 5fe888609a92e..e2fdeff1159aa 100644 --- a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.equivalent/equivalent.pass.cpp +++ b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.equivalent/equivalent.pass.cpp @@ -8,6 +8,8 @@ // UNSUPPORTED: c++03 +// XFAIL: LIBCXX-AIX-FIXME + // // bool equivalent(path const& lhs, path const& rhs); diff --git a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.exists/exists.pass.cpp b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.exists/exists.pass.cpp index 13e8b95d1e540..b15482c0dd393 100644 --- a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.exists/exists.pass.cpp +++ b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.exists/exists.pass.cpp @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// // UNSUPPORTED: c++03 +// XFAIL: LIBCXX-AIX-FIXME // diff --git a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.file_size/file_size.pass.cpp b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.file_size/file_size.pass.cpp index 413ba881b59f1..cce1499176012 100644 --- a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.file_size/file_size.pass.cpp +++ b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.file_size/file_size.pass.cpp @@ -8,6 +8,8 @@ // UNSUPPORTED: c++03 +// XFAIL: LIBCXX-AIX-FIXME + // The string reported on errors changed, which makes those tests fail when run // against already-released libc++'s. // XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.15 diff --git a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.hard_lk_ct/hard_link_count.pass.cpp b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.hard_lk_ct/hard_link_count.pass.cpp index 38b26710f1a95..6e1ea695b40d9 100644 --- a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.hard_lk_ct/hard_link_count.pass.cpp +++ b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.hard_lk_ct/hard_link_count.pass.cpp @@ -8,6 +8,8 @@ // UNSUPPORTED: c++03 +// XFAIL: LIBCXX-AIX-FIXME + // // uintmax_t hard_link_count(const path& p); diff --git a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.is_block_file/is_block_file.pass.cpp b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.is_block_file/is_block_file.pass.cpp index d28898472a94b..85e297d2f58a3 100644 --- a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.is_block_file/is_block_file.pass.cpp +++ b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.is_block_file/is_block_file.pass.cpp @@ -8,6 +8,8 @@ // UNSUPPORTED: c++03 +// XFAIL: LIBCXX-AIX-FIXME + // // bool is_block_file(file_status s) noexcept diff --git a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.is_char_file/is_character_file.pass.cpp b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.is_char_file/is_character_file.pass.cpp index 738e06cc1ad55..85723dbd3220b 100644 --- a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.is_char_file/is_character_file.pass.cpp +++ b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.is_char_file/is_character_file.pass.cpp @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// // UNSUPPORTED: c++03 +// XFAIL: LIBCXX-AIX-FIXME // diff --git a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.is_directory/is_directory.pass.cpp b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.is_directory/is_directory.pass.cpp index 8b1e41ef7f1b8..51b063b40a97a 100644 --- a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.is_directory/is_directory.pass.cpp +++ b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.is_directory/is_directory.pass.cpp @@ -8,6 +8,8 @@ // UNSUPPORTED: c++03 +// XFAIL: LIBCXX-AIX-FIXME + // // bool is_directory(file_status s) noexcept diff --git a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.is_empty/is_empty.pass.cpp b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.is_empty/is_empty.pass.cpp index 8478037a03c68..930541bd5e357 100644 --- a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.is_empty/is_empty.pass.cpp +++ b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.is_empty/is_empty.pass.cpp @@ -8,6 +8,8 @@ // UNSUPPORTED: c++03 +// XFAIL: LIBCXX-AIX-FIXME + // // bool is_empty(path const& p); diff --git a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.is_fifo/is_fifo.pass.cpp b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.is_fifo/is_fifo.pass.cpp index 0169fed28f54f..39b059b5552ac 100644 --- a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.is_fifo/is_fifo.pass.cpp +++ b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.is_fifo/is_fifo.pass.cpp @@ -8,6 +8,8 @@ // UNSUPPORTED: c++03 +// XFAIL: LIBCXX-AIX-FIXME + // // bool is_fifo(file_status s) noexcept diff --git a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.is_other/is_other.pass.cpp b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.is_other/is_other.pass.cpp index f84eb7dd32d81..9fc1dfd083509 100644 --- a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.is_other/is_other.pass.cpp +++ b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.is_other/is_other.pass.cpp @@ -8,6 +8,8 @@ // UNSUPPORTED: c++03 +// XFAIL: LIBCXX-AIX-FIXME + // // bool is_other(file_status s) noexcept diff --git a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.is_regular_file/is_regular_file.pass.cpp b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.is_regular_file/is_regular_file.pass.cpp index 441f15a9c5d11..a1fb918a36aad 100644 --- a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.is_regular_file/is_regular_file.pass.cpp +++ b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.is_regular_file/is_regular_file.pass.cpp @@ -8,6 +8,8 @@ // UNSUPPORTED: c++03 +// XFAIL: LIBCXX-AIX-FIXME + // // bool is_regular_file(file_status s) noexcept diff --git a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.is_socket/is_socket.pass.cpp b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.is_socket/is_socket.pass.cpp index 21aa537094344..cf6da35300de3 100644 --- a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.is_socket/is_socket.pass.cpp +++ b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.is_socket/is_socket.pass.cpp @@ -8,6 +8,8 @@ // UNSUPPORTED: c++03 +// XFAIL: LIBCXX-AIX-FIXME + // // bool is_socket(file_status s) noexcept diff --git a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.is_symlink/is_symlink.pass.cpp b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.is_symlink/is_symlink.pass.cpp index d8ec533058c7a..43552f935c138 100644 --- a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.is_symlink/is_symlink.pass.cpp +++ b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.is_symlink/is_symlink.pass.cpp @@ -8,6 +8,8 @@ // UNSUPPORTED: c++03 +// XFAIL: LIBCXX-AIX-FIXME + // // bool is_symlink(file_status s) noexcept diff --git a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.last_write_time/last_write_time.pass.cpp b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.last_write_time/last_write_time.pass.cpp index c7dc60681ceee..500aa9137e1f1 100644 --- a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.last_write_time/last_write_time.pass.cpp +++ b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.last_write_time/last_write_time.pass.cpp @@ -8,6 +8,8 @@ // UNSUPPORTED: c++03 +// XFAIL: LIBCXX-AIX-FIXME + // The string reported on errors changed, which makes those tests fail when run // against already-released libc++'s. // XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.15 @@ -433,7 +435,6 @@ TEST_CASE(read_last_write_time_static_env_test) TEST_CASE(get_last_write_time_dynamic_env_test) { - using Clock = file_time_type::clock; using Sec = std::chrono::seconds; scoped_test_env env; @@ -446,11 +447,9 @@ TEST_CASE(get_last_write_time_dynamic_env_test) const TimeSpec dir_write_time = dir_times.write; file_time_type ftime = last_write_time(file); - TEST_CHECK(Clock::to_time_t(ftime) == file_write_time.tv_sec); TEST_CHECK(CompareTime(ftime, file_write_time)); file_time_type dtime = last_write_time(dir); - TEST_CHECK(Clock::to_time_t(dtime) == dir_write_time.tv_sec); TEST_CHECK(CompareTime(dtime, dir_write_time)); SleepFor(Sec(2)); diff --git a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.permissions/permissions.pass.cpp b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.permissions/permissions.pass.cpp index 14a288f3e3338..2d3afc2dd4d5d 100644 --- a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.permissions/permissions.pass.cpp +++ b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.permissions/permissions.pass.cpp @@ -8,6 +8,8 @@ // UNSUPPORTED: c++03 +// XFAIL: LIBCXX-AIX-FIXME + // // void permissions(const path& p, perms prms, diff --git a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.read_symlink/read_symlink.pass.cpp b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.read_symlink/read_symlink.pass.cpp index ae94253903089..c3f8effb4bb32 100644 --- a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.read_symlink/read_symlink.pass.cpp +++ b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.read_symlink/read_symlink.pass.cpp @@ -8,6 +8,8 @@ // UNSUPPORTED: c++03 +// XFAIL: LIBCXX-AIX-FIXME + // // path read_symlink(const path& p); diff --git a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.relative/relative.pass.cpp b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.relative/relative.pass.cpp index 0c056057927d3..53700e94e00ce 100644 --- a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.relative/relative.pass.cpp +++ b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.relative/relative.pass.cpp @@ -8,6 +8,8 @@ // UNSUPPORTED: c++03 +// XFAIL: LIBCXX-AIX-FIXME + // // path proximate(const path& p, error_code &ec) diff --git a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.rename/rename.pass.cpp b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.rename/rename.pass.cpp index c651bf1785823..e5d46f3c992b7 100644 --- a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.rename/rename.pass.cpp +++ b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.rename/rename.pass.cpp @@ -8,6 +8,8 @@ // UNSUPPORTED: c++03 +// XFAIL: LIBCXX-AIX-FIXME + // // void rename(const path& old_p, const path& new_p); diff --git a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.resize_file/resize_file.pass.cpp b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.resize_file/resize_file.pass.cpp index 504561749759c..8405b74801518 100644 --- a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.resize_file/resize_file.pass.cpp +++ b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.resize_file/resize_file.pass.cpp @@ -8,6 +8,8 @@ // UNSUPPORTED: c++03 +// XFAIL: LIBCXX-AIX-FIXME + // // void resize_file(const path& p, uintmax_t new_size); diff --git a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.space/space.pass.cpp b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.space/space.pass.cpp index c0317966d4fe8..44f60240a497e 100644 --- a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.space/space.pass.cpp +++ b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.space/space.pass.cpp @@ -8,6 +8,8 @@ // UNSUPPORTED: c++03 +// XFAIL: LIBCXX-AIX-FIXME + // // space_info space(const path& p); diff --git a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.status/status.pass.cpp b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.status/status.pass.cpp index 3fa9f58b77b95..f819a0f4aee52 100644 --- a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.status/status.pass.cpp +++ b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.status/status.pass.cpp @@ -8,6 +8,8 @@ // UNSUPPORTED: c++03 +// XFAIL: LIBCXX-AIX-FIXME + // // file_status status(const path& p); diff --git a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.symlink_status/symlink_status.pass.cpp b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.symlink_status/symlink_status.pass.cpp index a1d8ba6e09fc7..3fa0f99538cb5 100644 --- a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.symlink_status/symlink_status.pass.cpp +++ b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.symlink_status/symlink_status.pass.cpp @@ -8,6 +8,8 @@ // UNSUPPORTED: c++03 +// XFAIL: LIBCXX-AIX-FIXME + // // file_status symlink_status(const path& p); diff --git a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.weakly_canonical/weakly_canonical.pass.cpp b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.weakly_canonical/weakly_canonical.pass.cpp index b0909da011710..4ad6ba7e6e277 100644 --- a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.weakly_canonical/weakly_canonical.pass.cpp +++ b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.weakly_canonical/weakly_canonical.pass.cpp @@ -8,6 +8,8 @@ // UNSUPPORTED: c++03 +// XFAIL: LIBCXX-AIX-FIXME + // // path weakly_canonical(const path& p); diff --git a/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.array/new_align_val_t_nothrow_replace.pass.cpp b/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.array/new_align_val_t_nothrow_replace.pass.cpp index 5e85227444bd5..3fe08bcc7eb6d 100644 --- a/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.array/new_align_val_t_nothrow_replace.pass.cpp +++ b/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.array/new_align_val_t_nothrow_replace.pass.cpp @@ -9,6 +9,8 @@ // UNSUPPORTED: c++03, c++11, c++14 // UNSUPPORTED: sanitizer-new-delete +// XFAIL: LIBCXX-AIX-FIXME + // Aligned allocation was not provided before macosx10.14 and as a result we // get availability errors when the deployment target is older than macosx10.14. // XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11|12|13}} diff --git a/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.array/new_array_nothrow_replace.pass.cpp b/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.array/new_array_nothrow_replace.pass.cpp index dcfc603f5da94..267e1d554d069 100644 --- a/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.array/new_array_nothrow_replace.pass.cpp +++ b/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.array/new_array_nothrow_replace.pass.cpp @@ -10,6 +10,7 @@ // UNSUPPORTED: sanitizer-new-delete // XFAIL: libcpp-no-vcruntime +// XFAIL: LIBCXX-AIX-FIXME #include #include diff --git a/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.array/new_array_replace.pass.cpp b/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.array/new_array_replace.pass.cpp index 384c97beca9b9..46874433ffe94 100644 --- a/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.array/new_array_replace.pass.cpp +++ b/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.array/new_array_replace.pass.cpp @@ -10,7 +10,7 @@ // UNSUPPORTED: sanitizer-new-delete // XFAIL: libcpp-no-vcruntime - +// XFAIL: LIBCXX-AIX-FIXME #include #include diff --git a/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.single/new_align_val_t_nothrow_replace.pass.cpp b/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.single/new_align_val_t_nothrow_replace.pass.cpp index fddc4494ce50c..d60c8847fe23b 100644 --- a/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.single/new_align_val_t_nothrow_replace.pass.cpp +++ b/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.single/new_align_val_t_nothrow_replace.pass.cpp @@ -9,6 +9,8 @@ // UNSUPPORTED: c++03, c++11, c++14 // UNSUPPORTED: sanitizer-new-delete +// XFAIL: LIBCXX-AIX-FIXME + // Aligned allocation was not provided before macosx10.14 and as a result we // get availability errors when the deployment target is older than macosx10.14. // XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11|12|13}} diff --git a/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.single/new_nothrow_replace.pass.cpp b/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.single/new_nothrow_replace.pass.cpp index 4182788d43b88..d21f8f86ac900 100644 --- a/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.single/new_nothrow_replace.pass.cpp +++ b/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.single/new_nothrow_replace.pass.cpp @@ -10,6 +10,7 @@ // UNSUPPORTED: sanitizer-new-delete // XFAIL: libcpp-no-vcruntime +// XFAIL: LIBCXX-AIX-FIXME #include #include diff --git a/libcxx/test/std/language.support/support.runtime/cstdlib.pass.cpp b/libcxx/test/std/language.support/support.runtime/cstdlib.pass.cpp index 19d07503b9633..2900ee2b3262e 100644 --- a/libcxx/test/std/language.support/support.runtime/cstdlib.pass.cpp +++ b/libcxx/test/std/language.support/support.runtime/cstdlib.pass.cpp @@ -146,6 +146,7 @@ int main(int, char**) static_assert((std::is_same::value), ""); static_assert((std::is_same::value), ""); static_assert((std::is_same::value), ""); +#ifndef TEST_HAS_NO_WIDE_CHARACTERS wchar_t* pw = 0; const wchar_t* pwc = 0; char* pc = 0; @@ -154,6 +155,7 @@ int main(int, char**) static_assert((std::is_same::value), ""); static_assert((std::is_same::value), ""); static_assert((std::is_same::value), ""); +#endif test_abs(); diff --git a/libcxx/test/std/language.support/support.runtime/ctime.timespec.compile.pass.cpp b/libcxx/test/std/language.support/support.runtime/ctime.timespec.compile.pass.cpp index 4fd502433d484..d0d06c477c980 100644 --- a/libcxx/test/std/language.support/support.runtime/ctime.timespec.compile.pass.cpp +++ b/libcxx/test/std/language.support/support.runtime/ctime.timespec.compile.pass.cpp @@ -10,7 +10,7 @@ // std::timespec and std::timespec_get // UNSUPPORTED: c++03, c++11, c++14 - +// XFAIL: LIBCXX-AIX-FIXME // ::timespec_get is provided by the C library, but it's marked as // unavailable until macOS 10.15 // XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11|12|13|14}} diff --git a/libcxx/test/std/localization/locale.categories/category.collate/locale.collate.byname/compare.pass.cpp b/libcxx/test/std/localization/locale.categories/category.collate/locale.collate.byname/compare.pass.cpp index 65baa2a01fc87..0a4088f2e9318 100644 --- a/libcxx/test/std/localization/locale.categories/category.collate/locale.collate.byname/compare.pass.cpp +++ b/libcxx/test/std/localization/locale.categories/category.collate/locale.collate.byname/compare.pass.cpp @@ -6,6 +6,7 @@ // //===----------------------------------------------------------------------===// +// XFAIL: LIBCXX-AIX-FIXME // REQUIRES: locale.en_US.UTF-8 // diff --git a/libcxx/test/std/localization/locale.categories/category.collate/locale.collate.byname/hash.pass.cpp b/libcxx/test/std/localization/locale.categories/category.collate/locale.collate.byname/hash.pass.cpp index 83757636e051b..383b978355d43 100644 --- a/libcxx/test/std/localization/locale.categories/category.collate/locale.collate.byname/hash.pass.cpp +++ b/libcxx/test/std/localization/locale.categories/category.collate/locale.collate.byname/hash.pass.cpp @@ -9,7 +9,7 @@ // REQUIRES: locale.en_US.UTF-8 // https://llvm.org/PR41018 -// XFAIL: windows-dll +// XFAIL: windows-dll && msvc // diff --git a/libcxx/test/std/localization/locale.categories/category.collate/locale.collate/locale.collate.members/compare.pass.cpp b/libcxx/test/std/localization/locale.categories/category.collate/locale.collate/locale.collate.members/compare.pass.cpp index a3023cf9ba363..8028b44465917 100644 --- a/libcxx/test/std/localization/locale.categories/category.collate/locale.collate/locale.collate.members/compare.pass.cpp +++ b/libcxx/test/std/localization/locale.categories/category.collate/locale.collate/locale.collate.members/compare.pass.cpp @@ -7,7 +7,7 @@ //===----------------------------------------------------------------------===// // https://llvm.org/PR41018 -// XFAIL: windows-dll +// XFAIL: windows-dll && msvc // diff --git a/libcxx/test/std/localization/locale.categories/category.collate/locale.collate/locale.collate.members/hash.pass.cpp b/libcxx/test/std/localization/locale.categories/category.collate/locale.collate/locale.collate.members/hash.pass.cpp index 7a298c5dab907..7bcfe917f9497 100644 --- a/libcxx/test/std/localization/locale.categories/category.collate/locale.collate/locale.collate.members/hash.pass.cpp +++ b/libcxx/test/std/localization/locale.categories/category.collate/locale.collate/locale.collate.members/hash.pass.cpp @@ -7,7 +7,7 @@ //===----------------------------------------------------------------------===// // https://llvm.org/PR41018 -// XFAIL: windows-dll +// XFAIL: windows-dll && msvc // diff --git a/libcxx/test/std/localization/locale.categories/category.ctype/facet.ctype.special/facet.ctype.char.dtor/dtor.pass.cpp b/libcxx/test/std/localization/locale.categories/category.ctype/facet.ctype.special/facet.ctype.char.dtor/dtor.pass.cpp index 7be7e8d574e6c..cca0d76d20e3d 100644 --- a/libcxx/test/std/localization/locale.categories/category.ctype/facet.ctype.special/facet.ctype.char.dtor/dtor.pass.cpp +++ b/libcxx/test/std/localization/locale.categories/category.ctype/facet.ctype.special/facet.ctype.char.dtor/dtor.pass.cpp @@ -6,6 +6,8 @@ // //===----------------------------------------------------------------------===// +// XFAIL: LIBCXX-AIX-FIXME + // // template <> class ctype diff --git a/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype.byname/widen_1.pass.cpp b/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype.byname/widen_1.pass.cpp index f737feb35f736..4149764c93f30 100644 --- a/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype.byname/widen_1.pass.cpp +++ b/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype.byname/widen_1.pass.cpp @@ -8,6 +8,7 @@ // REQUIRES: locale.en_US.UTF-8 // XFAIL: LIBCXX-WINDOWS-FIXME +// XFAIL: LIBCXX-AIX-FIXME // XFAIL: libcpp-has-no-wide-characters // diff --git a/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype.byname/widen_many.pass.cpp b/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype.byname/widen_many.pass.cpp index d50639b998f96..56051f18ef2f8 100644 --- a/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype.byname/widen_many.pass.cpp +++ b/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype.byname/widen_many.pass.cpp @@ -8,6 +8,7 @@ // REQUIRES: locale.en_US.UTF-8 // XFAIL: LIBCXX-WINDOWS-FIXME +// XFAIL: LIBCXX-AIX-FIXME // XFAIL: libcpp-has-no-wide-characters // diff --git a/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.get/locale.money.get.members/get_long_double_fr_FR.pass.cpp b/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.get/locale.money.get.members/get_long_double_fr_FR.pass.cpp index b1efbb5fa5aa7..450d8b89bb750 100644 --- a/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.get/locale.money.get.members/get_long_double_fr_FR.pass.cpp +++ b/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.get/locale.money.get.members/get_long_double_fr_FR.pass.cpp @@ -13,9 +13,6 @@ // XFAIL: LIBCXX-WINDOWS-FIXME -// TODO(mordante): Investigate these localization/format failures since updating the Docker image in CI -// UNSUPPORTED: stdlib=libc++ - // REQUIRES: locale.fr_FR.UTF-8 // diff --git a/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.get/locale.money.get.members/get_long_double_zh_CN.pass.cpp b/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.get/locale.money.get.members/get_long_double_zh_CN.pass.cpp index 3e59cfd013aa7..b391028bc7a1e 100644 --- a/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.get/locale.money.get.members/get_long_double_zh_CN.pass.cpp +++ b/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.get/locale.money.get.members/get_long_double_zh_CN.pass.cpp @@ -10,6 +10,7 @@ // XFAIL: netbsd // XFAIL: LIBCXX-WINDOWS-FIXME +// XFAIL: LIBCXX-AIX-FIXME // REQUIRES: locale.zh_CN.UTF-8 diff --git a/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.put/locale.money.put.members/put_long_double_fr_FR.pass.cpp b/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.put/locale.money.put.members/put_long_double_fr_FR.pass.cpp index d155e099b9f85..01b61d1a17dfd 100644 --- a/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.put/locale.money.put.members/put_long_double_fr_FR.pass.cpp +++ b/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.put/locale.money.put.members/put_long_double_fr_FR.pass.cpp @@ -13,9 +13,6 @@ // XFAIL: LIBCXX-WINDOWS-FIXME -// TODO(mordante): Investigate these localization/format failures since updating the Docker image in CI -// UNSUPPORTED: stdlib=libc++ - // REQUIRES: locale.fr_FR.UTF-8 // diff --git a/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.put/locale.money.put.members/put_long_double_zh_CN.pass.cpp b/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.put/locale.money.put.members/put_long_double_zh_CN.pass.cpp index a19b17c7822a4..321487542e4f0 100644 --- a/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.put/locale.money.put.members/put_long_double_zh_CN.pass.cpp +++ b/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.put/locale.money.put.members/put_long_double_zh_CN.pass.cpp @@ -10,6 +10,7 @@ // XFAIL: netbsd // XFAIL: LIBCXX-WINDOWS-FIXME +// XFAIL: LIBCXX-AIX-FIXME // REQUIRES: locale.zh_CN.UTF-8 diff --git a/libcxx/test/std/localization/locale.categories/category.monetary/locale.moneypunct.byname/thousands_sep.pass.cpp b/libcxx/test/std/localization/locale.categories/category.monetary/locale.moneypunct.byname/thousands_sep.pass.cpp index 9a991451aed12..4953625916485 100644 --- a/libcxx/test/std/localization/locale.categories/category.monetary/locale.moneypunct.byname/thousands_sep.pass.cpp +++ b/libcxx/test/std/localization/locale.categories/category.monetary/locale.moneypunct.byname/thousands_sep.pass.cpp @@ -11,9 +11,6 @@ // XFAIL: LIBCXX-WINDOWS-FIXME -// TODO(mordante): Investigate these localization/format failures since updating the Docker image in CI -// UNSUPPORTED: stdlib=libc++ - // REQUIRES: locale.en_US.UTF-8 // REQUIRES: locale.fr_FR.UTF-8 // REQUIRES: locale.ru_RU.UTF-8 diff --git a/libcxx/test/std/localization/locale.categories/category.numeric/locale.nm.put/facet.num.put.members/put_double.pass.cpp b/libcxx/test/std/localization/locale.categories/category.numeric/locale.nm.put/facet.num.put.members/put_double.pass.cpp index c653d54f1c0fb..93d54c9b5376f 100644 --- a/libcxx/test/std/localization/locale.categories/category.numeric/locale.nm.put/facet.num.put.members/put_double.pass.cpp +++ b/libcxx/test/std/localization/locale.categories/category.numeric/locale.nm.put/facet.num.put.members/put_double.pass.cpp @@ -6,6 +6,8 @@ // //===----------------------------------------------------------------------===// +// XFAIL: LIBCXX-AIX-FIXME + // // class num_put diff --git a/libcxx/test/std/localization/locale.categories/category.numeric/locale.nm.put/facet.num.put.members/put_long_double.pass.cpp b/libcxx/test/std/localization/locale.categories/category.numeric/locale.nm.put/facet.num.put.members/put_long_double.pass.cpp index ae0837b9e5df6..24170b8cbe237 100644 --- a/libcxx/test/std/localization/locale.categories/category.numeric/locale.nm.put/facet.num.put.members/put_long_double.pass.cpp +++ b/libcxx/test/std/localization/locale.categories/category.numeric/locale.nm.put/facet.num.put.members/put_long_double.pass.cpp @@ -6,6 +6,8 @@ // //===----------------------------------------------------------------------===// +// XFAIL: LIBCXX-AIX-FIXME + // // class num_put diff --git a/libcxx/test/std/localization/locale.categories/category.time/locale.time.put/locale.time.put.members/put2.pass.cpp b/libcxx/test/std/localization/locale.categories/category.time/locale.time.put/locale.time.put.members/put2.pass.cpp index 20b3f4337d68a..12db93ea54340 100644 --- a/libcxx/test/std/localization/locale.categories/category.time/locale.time.put/locale.time.put.members/put2.pass.cpp +++ b/libcxx/test/std/localization/locale.categories/category.time/locale.time.put/locale.time.put.members/put2.pass.cpp @@ -6,8 +6,6 @@ // //===----------------------------------------------------------------------===// -// XFAIL: LIBCXX-WINDOWS-FIXME - // // class time_put @@ -321,11 +319,15 @@ int main(int, char**) std::string ex(str, iter.base()); // assert(ex == "-0400"); depends on time zone } +#ifndef _WIN32 + // The Windows strftime() doesn't support the "%+" format. Depending on CRT + // configuration of the invalid parameter handler, this can abort the process. { iter = f.put(output_iterator(str), ios, '*', &t, '+'); std::string ex(str, iter.base()); // assert(ex == "Sat May 2 13:03:06 EDT 2009"); depends on time zone } +#endif { iter = f.put(output_iterator(str), ios, '*', &t, '%'); std::string ex(str, iter.base()); diff --git a/libcxx/test/std/localization/locale.categories/facet.numpunct/locale.numpunct.byname/thousands_sep.pass.cpp b/libcxx/test/std/localization/locale.categories/facet.numpunct/locale.numpunct.byname/thousands_sep.pass.cpp index 004ede52c44b0..555d37db278be 100644 --- a/libcxx/test/std/localization/locale.categories/facet.numpunct/locale.numpunct.byname/thousands_sep.pass.cpp +++ b/libcxx/test/std/localization/locale.categories/facet.numpunct/locale.numpunct.byname/thousands_sep.pass.cpp @@ -11,9 +11,6 @@ // XFAIL: LIBCXX-WINDOWS-FIXME -// TODO(mordante): Investigate these localization/format failures since updating the Docker image in CI -// UNSUPPORTED: stdlib=libc++ - // REQUIRES: locale.en_US.UTF-8 // REQUIRES: locale.fr_FR.UTF-8 diff --git a/libcxx/test/std/localization/locales/locale/locale.operators/compare.pass.cpp b/libcxx/test/std/localization/locales/locale/locale.operators/compare.pass.cpp index a93aa18224616..c435f86f7e9e7 100644 --- a/libcxx/test/std/localization/locales/locale/locale.operators/compare.pass.cpp +++ b/libcxx/test/std/localization/locales/locale/locale.operators/compare.pass.cpp @@ -7,7 +7,7 @@ //===----------------------------------------------------------------------===// // https://llvm.org/PR41018 -// XFAIL: windows-dll +// XFAIL: windows-dll && msvc // diff --git a/libcxx/test/std/numerics/c.math/cmath.pass.cpp b/libcxx/test/std/numerics/c.math/cmath.pass.cpp index fc953954e5c1b..160959b1075c0 100644 --- a/libcxx/test/std/numerics/c.math/cmath.pass.cpp +++ b/libcxx/test/std/numerics/c.math/cmath.pass.cpp @@ -6,6 +6,8 @@ // //===----------------------------------------------------------------------===// +// XFAIL: LIBCXX-AIX-FIXME + // #include diff --git a/libcxx/test/std/numerics/complex.number/cmplx.over/arg.pass.cpp b/libcxx/test/std/numerics/complex.number/cmplx.over/arg.pass.cpp index 0152761da67cc..d608e42612346 100644 --- a/libcxx/test/std/numerics/complex.number/cmplx.over/arg.pass.cpp +++ b/libcxx/test/std/numerics/complex.number/cmplx.over/arg.pass.cpp @@ -6,6 +6,8 @@ // //===----------------------------------------------------------------------===// +// XFAIL: LIBCXX-AIX-FIXME + // // template diff --git a/libcxx/test/std/numerics/complex.number/cmplx.over/pow.pass.cpp b/libcxx/test/std/numerics/complex.number/cmplx.over/pow.pass.cpp index 54a6cba9c0011..2835dc2b7ba91 100644 --- a/libcxx/test/std/numerics/complex.number/cmplx.over/pow.pass.cpp +++ b/libcxx/test/std/numerics/complex.number/cmplx.over/pow.pass.cpp @@ -6,6 +6,8 @@ // //===----------------------------------------------------------------------===// +// XFAIL: LIBCXX-AIX-FIXME + // // template diff --git a/libcxx/test/std/numerics/complex.number/complex.transcendentals/acos.pass.cpp b/libcxx/test/std/numerics/complex.number/complex.transcendentals/acos.pass.cpp index 3158a3bc33d1c..fa4d055b18479 100644 --- a/libcxx/test/std/numerics/complex.number/complex.transcendentals/acos.pass.cpp +++ b/libcxx/test/std/numerics/complex.number/complex.transcendentals/acos.pass.cpp @@ -6,6 +6,8 @@ // //===----------------------------------------------------------------------===// +// XFAIL: LIBCXX-AIX-FIXME + // // template diff --git a/libcxx/test/std/numerics/complex.number/complex.transcendentals/acosh.pass.cpp b/libcxx/test/std/numerics/complex.number/complex.transcendentals/acosh.pass.cpp index 424a3b1b82e1a..5ce55b550a2ee 100644 --- a/libcxx/test/std/numerics/complex.number/complex.transcendentals/acosh.pass.cpp +++ b/libcxx/test/std/numerics/complex.number/complex.transcendentals/acosh.pass.cpp @@ -6,6 +6,8 @@ // //===----------------------------------------------------------------------===// +// XFAIL: LIBCXX-AIX-FIXME + // // template diff --git a/libcxx/test/std/numerics/complex.number/complex.transcendentals/asin.pass.cpp b/libcxx/test/std/numerics/complex.number/complex.transcendentals/asin.pass.cpp index 51da1c002a294..751e8217714d9 100644 --- a/libcxx/test/std/numerics/complex.number/complex.transcendentals/asin.pass.cpp +++ b/libcxx/test/std/numerics/complex.number/complex.transcendentals/asin.pass.cpp @@ -6,6 +6,8 @@ // //===----------------------------------------------------------------------===// +// XFAIL: LIBCXX-AIX-FIXME + // // template diff --git a/libcxx/test/std/numerics/complex.number/complex.transcendentals/asinh.pass.cpp b/libcxx/test/std/numerics/complex.number/complex.transcendentals/asinh.pass.cpp index b53509242c378..6c88b535d9548 100644 --- a/libcxx/test/std/numerics/complex.number/complex.transcendentals/asinh.pass.cpp +++ b/libcxx/test/std/numerics/complex.number/complex.transcendentals/asinh.pass.cpp @@ -6,6 +6,8 @@ // //===----------------------------------------------------------------------===// +// XFAIL: LIBCXX-AIX-FIXME + // // template diff --git a/libcxx/test/std/numerics/complex.number/complex.transcendentals/atan.pass.cpp b/libcxx/test/std/numerics/complex.number/complex.transcendentals/atan.pass.cpp index f0c801649509d..7326d1eee49d0 100644 --- a/libcxx/test/std/numerics/complex.number/complex.transcendentals/atan.pass.cpp +++ b/libcxx/test/std/numerics/complex.number/complex.transcendentals/atan.pass.cpp @@ -6,6 +6,8 @@ // //===----------------------------------------------------------------------===// +// XFAIL: LIBCXX-AIX-FIXME + // // template diff --git a/libcxx/test/std/numerics/complex.number/complex.transcendentals/atanh.pass.cpp b/libcxx/test/std/numerics/complex.number/complex.transcendentals/atanh.pass.cpp index a126032bf8c24..79c7a07330b4c 100644 --- a/libcxx/test/std/numerics/complex.number/complex.transcendentals/atanh.pass.cpp +++ b/libcxx/test/std/numerics/complex.number/complex.transcendentals/atanh.pass.cpp @@ -6,6 +6,8 @@ // //===----------------------------------------------------------------------===// +// XFAIL: LIBCXX-AIX-FIXME + // // template diff --git a/libcxx/test/std/numerics/complex.number/complex.transcendentals/log.pass.cpp b/libcxx/test/std/numerics/complex.number/complex.transcendentals/log.pass.cpp index 562d125e05323..1e413293807d9 100644 --- a/libcxx/test/std/numerics/complex.number/complex.transcendentals/log.pass.cpp +++ b/libcxx/test/std/numerics/complex.number/complex.transcendentals/log.pass.cpp @@ -6,6 +6,8 @@ // //===----------------------------------------------------------------------===// +// XFAIL: LIBCXX-AIX-FIXME + // // template diff --git a/libcxx/test/std/numerics/complex.number/complex.transcendentals/log10.pass.cpp b/libcxx/test/std/numerics/complex.number/complex.transcendentals/log10.pass.cpp index 78818f0de15b2..4aa9381ecc5e8 100644 --- a/libcxx/test/std/numerics/complex.number/complex.transcendentals/log10.pass.cpp +++ b/libcxx/test/std/numerics/complex.number/complex.transcendentals/log10.pass.cpp @@ -6,6 +6,8 @@ // //===----------------------------------------------------------------------===// +// XFAIL: LIBCXX-AIX-FIXME + // // template diff --git a/libcxx/test/std/numerics/complex.number/complex.transcendentals/pow_complex_complex.pass.cpp b/libcxx/test/std/numerics/complex.number/complex.transcendentals/pow_complex_complex.pass.cpp index 91754fac4d0a8..ddf1393117cf1 100644 --- a/libcxx/test/std/numerics/complex.number/complex.transcendentals/pow_complex_complex.pass.cpp +++ b/libcxx/test/std/numerics/complex.number/complex.transcendentals/pow_complex_complex.pass.cpp @@ -6,6 +6,8 @@ // //===----------------------------------------------------------------------===// +// XFAIL: LIBCXX-AIX-FIXME + // // template diff --git a/libcxx/test/std/numerics/complex.number/complex.transcendentals/pow_complex_scalar.pass.cpp b/libcxx/test/std/numerics/complex.number/complex.transcendentals/pow_complex_scalar.pass.cpp index 4b1aef23281db..7abdf7bea9cae 100644 --- a/libcxx/test/std/numerics/complex.number/complex.transcendentals/pow_complex_scalar.pass.cpp +++ b/libcxx/test/std/numerics/complex.number/complex.transcendentals/pow_complex_scalar.pass.cpp @@ -6,6 +6,8 @@ // //===----------------------------------------------------------------------===// +// XFAIL: LIBCXX-AIX-FIXME + // // template diff --git a/libcxx/test/std/numerics/complex.number/complex.transcendentals/pow_scalar_complex.pass.cpp b/libcxx/test/std/numerics/complex.number/complex.transcendentals/pow_scalar_complex.pass.cpp index 6022fddfaa755..f367fe0824068 100644 --- a/libcxx/test/std/numerics/complex.number/complex.transcendentals/pow_scalar_complex.pass.cpp +++ b/libcxx/test/std/numerics/complex.number/complex.transcendentals/pow_scalar_complex.pass.cpp @@ -6,6 +6,8 @@ // //===----------------------------------------------------------------------===// +// XFAIL: LIBCXX-AIX-FIXME + // // template diff --git a/libcxx/test/std/numerics/complex.number/complex.transcendentals/sqrt.pass.cpp b/libcxx/test/std/numerics/complex.number/complex.transcendentals/sqrt.pass.cpp index 12fd9a2c0440a..11e7b16251ba1 100644 --- a/libcxx/test/std/numerics/complex.number/complex.transcendentals/sqrt.pass.cpp +++ b/libcxx/test/std/numerics/complex.number/complex.transcendentals/sqrt.pass.cpp @@ -6,6 +6,8 @@ // //===----------------------------------------------------------------------===// +// XFAIL: LIBCXX-AIX-FIXME + // // template diff --git a/libcxx/test/std/numerics/complex.number/complex.value.ops/arg.pass.cpp b/libcxx/test/std/numerics/complex.number/complex.value.ops/arg.pass.cpp index 49c54372a8e00..c8878c3e03102 100644 --- a/libcxx/test/std/numerics/complex.number/complex.value.ops/arg.pass.cpp +++ b/libcxx/test/std/numerics/complex.number/complex.value.ops/arg.pass.cpp @@ -6,6 +6,8 @@ // //===----------------------------------------------------------------------===// +// XFAIL: LIBCXX-AIX-FIXME + // // template diff --git a/libcxx/test/std/numerics/numeric.ops/numeric.ops.midpoint/midpoint.float.pass.cpp b/libcxx/test/std/numerics/numeric.ops/numeric.ops.midpoint/midpoint.float.pass.cpp index 4247e2b9e2313..009bb881b93bd 100644 --- a/libcxx/test/std/numerics/numeric.ops/numeric.ops.midpoint/midpoint.float.pass.cpp +++ b/libcxx/test/std/numerics/numeric.ops/numeric.ops.midpoint/midpoint.float.pass.cpp @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// // // UNSUPPORTED: c++03, c++11, c++14, c++17 +// XFAIL: LIBCXX-AIX-FIXME // // template diff --git a/libcxx/test/std/re/re.results/re.results.const/move.pass.cpp b/libcxx/test/std/re/re.results/re.results.const/move.pass.cpp index 82f2b3e655310..0806edef1429b 100644 --- a/libcxx/test/std/re/re.results/re.results.const/move.pass.cpp +++ b/libcxx/test/std/re/re.results/re.results.const/move.pass.cpp @@ -37,16 +37,17 @@ test(const Allocator& a) int main(int, char**) { + test_allocator_statistics alloc_stats; test (std::allocator >()); #ifndef TEST_HAS_NO_WIDE_CHARACTERS test(std::allocator >()); #endif - test (test_allocator >(3)); - assert(test_alloc_base::moved == 1); + test (test_allocator >(3, &alloc_stats)); + assert(alloc_stats.moved == 1); #ifndef TEST_HAS_NO_WIDE_CHARACTERS - test(test_allocator >(3)); - assert(test_alloc_base::moved == 2); + test(test_allocator >(3, &alloc_stats)); + assert(alloc_stats.moved == 2); #endif return 0; diff --git a/libcxx/test/std/re/re.traits/isctype.pass.cpp b/libcxx/test/std/re/re.traits/isctype.pass.cpp index c6bba18a1227d..a044fe2f61d40 100644 --- a/libcxx/test/std/re/re.traits/isctype.pass.cpp +++ b/libcxx/test/std/re/re.traits/isctype.pass.cpp @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// // XFAIL: LIBCXX-WINDOWS-FIXME +// XFAIL: LIBCXX-AIX-FIXME // diff --git a/libcxx/test/std/re/re.traits/lookup_classname.pass.cpp b/libcxx/test/std/re/re.traits/lookup_classname.pass.cpp index ddf4663697554..100cae752a873 100644 --- a/libcxx/test/std/re/re.traits/lookup_classname.pass.cpp +++ b/libcxx/test/std/re/re.traits/lookup_classname.pass.cpp @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// // XFAIL: LIBCXX-WINDOWS-FIXME +// XFAIL: LIBCXX-AIX-FIXME // diff --git a/libcxx/test/std/strings/basic.string/string.capacity/capacity.pass.cpp b/libcxx/test/std/strings/basic.string/string.capacity/capacity.pass.cpp index 02187c5193af9..c401384a23dc6 100644 --- a/libcxx/test/std/strings/basic.string/string.capacity/capacity.pass.cpp +++ b/libcxx/test/std/strings/basic.string/string.capacity/capacity.pass.cpp @@ -18,11 +18,13 @@ #include "test_macros.h" +test_allocator_statistics alloc_stats; + template void test(S s) { - S::allocator_type::throw_after = 0; + alloc_stats.throw_after = 0; #ifndef TEST_HAS_NO_EXCEPTIONS try #endif @@ -37,14 +39,14 @@ test(S s) assert(false); } #endif - S::allocator_type::throw_after = INT_MAX; + alloc_stats.throw_after = INT_MAX; } int main(int, char**) { { typedef std::basic_string, test_allocator > S; - S s; + S s((test_allocator(&alloc_stats))); test(s); s.assign(10, 'a'); s.erase(5); diff --git a/libcxx/test/std/strings/basic.string/string.cons/move_alloc.pass.cpp b/libcxx/test/std/strings/basic.string/string.cons/move_alloc.pass.cpp index 00af31d0c18c0..d38e7c4116841 100644 --- a/libcxx/test/std/strings/basic.string/string.cons/move_alloc.pass.cpp +++ b/libcxx/test/std/strings/basic.string/string.cons/move_alloc.pass.cpp @@ -19,7 +19,6 @@ #include "test_allocator.h" #include "min_allocator.h" - template void test(S s0, const typename S::allocator_type& a) @@ -33,9 +32,9 @@ test(S s0, const typename S::allocator_type& a) assert(s2.get_allocator() == a); } - int main(int, char**) { + test_allocator_statistics alloc_stats; { typedef test_allocator A; typedef std::basic_string, A> S; @@ -44,12 +43,12 @@ int main(int, char**) #elif TEST_STD_VER >= 11 static_assert((noexcept(S()) == std::is_nothrow_move_constructible::value), "" ); #endif - test(S(), A(3)); - test(S("1"), A(5)); - test(S("1234567890123456789012345678901234567890123456789012345678901234567890"), A(7)); + test(S(), A(3, &alloc_stats)); + test(S("1"), A(5, &alloc_stats)); + test(S("1234567890123456789012345678901234567890123456789012345678901234567890"), A(7, &alloc_stats)); } - int alloc_count = test_alloc_base::alloc_count; + int alloc_count = alloc_stats.alloc_count; { typedef test_allocator A; typedef std::basic_string, A> S; @@ -58,10 +57,10 @@ int main(int, char**) #elif TEST_STD_VER >= 11 static_assert((noexcept(S()) == std::is_nothrow_move_constructible::value), "" ); #endif - S s1 ( "Twas brillig, and the slivy toves did gyre and gymbal in the wabe" ); - S s2 (std::move(s1), A(1)); + S s1 ( "Twas brillig, and the slivy toves did gyre and gymbal in the wabe", A(&alloc_stats)); + S s2 (std::move(s1), A(1, &alloc_stats)); } - assert ( test_alloc_base::alloc_count == alloc_count ); + assert ( alloc_stats.alloc_count == alloc_count ); { typedef min_allocator A; typedef std::basic_string, A> S; diff --git a/libcxx/test/std/thread/futures/futures.promise/alloc_ctor.pass.cpp b/libcxx/test/std/thread/futures/futures.promise/alloc_ctor.pass.cpp index 150a277fe0aea..cbb2c2e7e25fe 100644 --- a/libcxx/test/std/thread/futures/futures.promise/alloc_ctor.pass.cpp +++ b/libcxx/test/std/thread/futures/futures.promise/alloc_ctor.pass.cpp @@ -25,31 +25,32 @@ int main(int, char**) { - assert(test_alloc_base::alloc_count == 0); + test_allocator_statistics alloc_stats; + assert(alloc_stats.alloc_count == 0); { - std::promise p(std::allocator_arg, test_allocator(42)); - assert(test_alloc_base::alloc_count == 1); + std::promise p(std::allocator_arg, test_allocator(42, &alloc_stats)); + assert(alloc_stats.alloc_count == 1); std::future f = p.get_future(); - assert(test_alloc_base::alloc_count == 1); + assert(alloc_stats.alloc_count == 1); assert(f.valid()); } - assert(test_alloc_base::alloc_count == 0); + assert(alloc_stats.alloc_count == 0); { - std::promise p(std::allocator_arg, test_allocator(42)); - assert(test_alloc_base::alloc_count == 1); + std::promise p(std::allocator_arg, test_allocator(42, &alloc_stats)); + assert(alloc_stats.alloc_count == 1); std::future f = p.get_future(); - assert(test_alloc_base::alloc_count == 1); + assert(alloc_stats.alloc_count == 1); assert(f.valid()); } - assert(test_alloc_base::alloc_count == 0); + assert(alloc_stats.alloc_count == 0); { - std::promise p(std::allocator_arg, test_allocator(42)); - assert(test_alloc_base::alloc_count == 1); + std::promise p(std::allocator_arg, test_allocator(42, &alloc_stats)); + assert(alloc_stats.alloc_count == 1); std::future f = p.get_future(); - assert(test_alloc_base::alloc_count == 1); + assert(alloc_stats.alloc_count == 1); assert(f.valid()); } - assert(test_alloc_base::alloc_count == 0); + assert(alloc_stats.alloc_count == 0); // Test with a minimal allocator { std::promise p(std::allocator_arg, bare_allocator()); diff --git a/libcxx/test/std/thread/futures/futures.promise/move_assign.pass.cpp b/libcxx/test/std/thread/futures/futures.promise/move_assign.pass.cpp index f3978e79f47e1..ac504efbd6027 100644 --- a/libcxx/test/std/thread/futures/futures.promise/move_assign.pass.cpp +++ b/libcxx/test/std/thread/futures/futures.promise/move_assign.pass.cpp @@ -23,15 +23,16 @@ int main(int, char**) { - assert(test_alloc_base::alloc_count == 0); + test_allocator_statistics alloc_stats; + assert(alloc_stats.alloc_count == 0); { - std::promise p0(std::allocator_arg, test_allocator()); - std::promise p(std::allocator_arg, test_allocator()); - assert(test_alloc_base::alloc_count == 2); + std::promise p0(std::allocator_arg, test_allocator(&alloc_stats)); + std::promise p(std::allocator_arg, test_allocator(&alloc_stats)); + assert(alloc_stats.alloc_count == 2); p = std::move(p0); - assert(test_alloc_base::alloc_count == 1); + assert(alloc_stats.alloc_count == 1); std::future f = p.get_future(); - assert(test_alloc_base::alloc_count == 1); + assert(alloc_stats.alloc_count == 1); assert(f.valid()); #ifndef TEST_HAS_NO_EXCEPTIONS try @@ -44,17 +45,17 @@ int main(int, char**) assert(e.code() == make_error_code(std::future_errc::no_state)); } #endif - assert(test_alloc_base::alloc_count == 1); + assert(alloc_stats.alloc_count == 1); } - assert(test_alloc_base::alloc_count == 0); + assert(alloc_stats.alloc_count == 0); { - std::promise p0(std::allocator_arg, test_allocator()); - std::promise p(std::allocator_arg, test_allocator()); - assert(test_alloc_base::alloc_count == 2); + std::promise p0(std::allocator_arg, test_allocator(&alloc_stats)); + std::promise p(std::allocator_arg, test_allocator(&alloc_stats)); + assert(alloc_stats.alloc_count == 2); p = std::move(p0); - assert(test_alloc_base::alloc_count == 1); + assert(alloc_stats.alloc_count == 1); std::future f = p.get_future(); - assert(test_alloc_base::alloc_count == 1); + assert(alloc_stats.alloc_count == 1); assert(f.valid()); #ifndef TEST_HAS_NO_EXCEPTIONS try @@ -67,17 +68,17 @@ int main(int, char**) assert(e.code() == make_error_code(std::future_errc::no_state)); } #endif - assert(test_alloc_base::alloc_count == 1); + assert(alloc_stats.alloc_count == 1); } - assert(test_alloc_base::alloc_count == 0); + assert(alloc_stats.alloc_count == 0); { - std::promise p0(std::allocator_arg, test_allocator()); - std::promise p(std::allocator_arg, test_allocator()); - assert(test_alloc_base::alloc_count == 2); + std::promise p0(std::allocator_arg, test_allocator(&alloc_stats)); + std::promise p(std::allocator_arg, test_allocator(&alloc_stats)); + assert(alloc_stats.alloc_count == 2); p = std::move(p0); - assert(test_alloc_base::alloc_count == 1); + assert(alloc_stats.alloc_count == 1); std::future f = p.get_future(); - assert(test_alloc_base::alloc_count == 1); + assert(alloc_stats.alloc_count == 1); assert(f.valid()); #ifndef TEST_HAS_NO_EXCEPTIONS try @@ -90,9 +91,9 @@ int main(int, char**) assert(e.code() == make_error_code(std::future_errc::no_state)); } #endif - assert(test_alloc_base::alloc_count == 1); + assert(alloc_stats.alloc_count == 1); } - assert(test_alloc_base::alloc_count == 0); + assert(alloc_stats.alloc_count == 0); return 0; } diff --git a/libcxx/test/std/thread/futures/futures.promise/move_ctor.pass.cpp b/libcxx/test/std/thread/futures/futures.promise/move_ctor.pass.cpp index 1fdd61cc3bbee..9775e6655835c 100644 --- a/libcxx/test/std/thread/futures/futures.promise/move_ctor.pass.cpp +++ b/libcxx/test/std/thread/futures/futures.promise/move_ctor.pass.cpp @@ -23,13 +23,14 @@ int main(int, char**) { - assert(test_alloc_base::alloc_count == 0); + test_allocator_statistics alloc_stats; + assert(alloc_stats.alloc_count == 0); { - std::promise p0(std::allocator_arg, test_allocator()); + std::promise p0(std::allocator_arg, test_allocator(&alloc_stats)); std::promise p(std::move(p0)); - assert(test_alloc_base::alloc_count == 1); + assert(alloc_stats.alloc_count == 1); std::future f = p.get_future(); - assert(test_alloc_base::alloc_count == 1); + assert(alloc_stats.alloc_count == 1); assert(f.valid()); #ifndef TEST_HAS_NO_EXCEPTIONS try @@ -41,16 +42,16 @@ int main(int, char**) { assert(e.code() == make_error_code(std::future_errc::no_state)); } - assert(test_alloc_base::alloc_count == 1); + assert(alloc_stats.alloc_count == 1); #endif } - assert(test_alloc_base::alloc_count == 0); + assert(alloc_stats.alloc_count == 0); { - std::promise p0(std::allocator_arg, test_allocator()); + std::promise p0(std::allocator_arg, test_allocator(&alloc_stats)); std::promise p(std::move(p0)); - assert(test_alloc_base::alloc_count == 1); + assert(alloc_stats.alloc_count == 1); std::future f = p.get_future(); - assert(test_alloc_base::alloc_count == 1); + assert(alloc_stats.alloc_count == 1); assert(f.valid()); #ifndef TEST_HAS_NO_EXCEPTIONS try @@ -62,16 +63,16 @@ int main(int, char**) { assert(e.code() == make_error_code(std::future_errc::no_state)); } - assert(test_alloc_base::alloc_count == 1); + assert(alloc_stats.alloc_count == 1); #endif } - assert(test_alloc_base::alloc_count == 0); + assert(alloc_stats.alloc_count == 0); { - std::promise p0(std::allocator_arg, test_allocator()); + std::promise p0(std::allocator_arg, test_allocator(&alloc_stats)); std::promise p(std::move(p0)); - assert(test_alloc_base::alloc_count == 1); + assert(alloc_stats.alloc_count == 1); std::future f = p.get_future(); - assert(test_alloc_base::alloc_count == 1); + assert(alloc_stats.alloc_count == 1); assert(f.valid()); #ifndef TEST_HAS_NO_EXCEPTIONS try @@ -83,10 +84,10 @@ int main(int, char**) { assert(e.code() == make_error_code(std::future_errc::no_state)); } - assert(test_alloc_base::alloc_count == 1); + assert(alloc_stats.alloc_count == 1); #endif } - assert(test_alloc_base::alloc_count == 0); + assert(alloc_stats.alloc_count == 0); return 0; } diff --git a/libcxx/test/std/thread/futures/futures.promise/swap.pass.cpp b/libcxx/test/std/thread/futures/futures.promise/swap.pass.cpp index bdb4595d62ab4..fbf77b121d474 100644 --- a/libcxx/test/std/thread/futures/futures.promise/swap.pass.cpp +++ b/libcxx/test/std/thread/futures/futures.promise/swap.pass.cpp @@ -25,63 +25,64 @@ int main(int, char**) { - assert(test_alloc_base::alloc_count == 0); + test_allocator_statistics alloc_stats; + assert(alloc_stats.alloc_count == 0); { - std::promise p0(std::allocator_arg, test_allocator()); - std::promise p(std::allocator_arg, test_allocator()); - assert(test_alloc_base::alloc_count == 2); + std::promise p0(std::allocator_arg, test_allocator(&alloc_stats)); + std::promise p(std::allocator_arg, test_allocator(&alloc_stats)); + assert(alloc_stats.alloc_count == 2); p.swap(p0); - assert(test_alloc_base::alloc_count == 2); + assert(alloc_stats.alloc_count == 2); std::future f = p.get_future(); - assert(test_alloc_base::alloc_count == 2); + assert(alloc_stats.alloc_count == 2); assert(f.valid()); f = p0.get_future(); assert(f.valid()); - assert(test_alloc_base::alloc_count == 2); + assert(alloc_stats.alloc_count == 2); } - assert(test_alloc_base::alloc_count == 0); + assert(alloc_stats.alloc_count == 0); { - std::promise p0(std::allocator_arg, test_allocator()); - std::promise p(std::allocator_arg, test_allocator()); - assert(test_alloc_base::alloc_count == 2); + std::promise p0(std::allocator_arg, test_allocator(&alloc_stats)); + std::promise p(std::allocator_arg, test_allocator(&alloc_stats)); + assert(alloc_stats.alloc_count == 2); swap(p, p0); - assert(test_alloc_base::alloc_count == 2); + assert(alloc_stats.alloc_count == 2); std::future f = p.get_future(); - assert(test_alloc_base::alloc_count == 2); + assert(alloc_stats.alloc_count == 2); assert(f.valid()); f = p0.get_future(); assert(f.valid()); - assert(test_alloc_base::alloc_count == 2); + assert(alloc_stats.alloc_count == 2); } - assert(test_alloc_base::alloc_count == 0); + assert(alloc_stats.alloc_count == 0); { - std::promise p0(std::allocator_arg, test_allocator()); + std::promise p0(std::allocator_arg, test_allocator(&alloc_stats)); std::promise p; - assert(test_alloc_base::alloc_count == 1); + assert(alloc_stats.alloc_count == 1); p.swap(p0); - assert(test_alloc_base::alloc_count == 1); + assert(alloc_stats.alloc_count == 1); std::future f = p.get_future(); - assert(test_alloc_base::alloc_count == 1); + assert(alloc_stats.alloc_count == 1); assert(f.valid()); f = p0.get_future(); assert(f.valid()); - assert(test_alloc_base::alloc_count == 1); + assert(alloc_stats.alloc_count == 1); } - assert(test_alloc_base::alloc_count == 0); + assert(alloc_stats.alloc_count == 0); { - std::promise p0(std::allocator_arg, test_allocator()); + std::promise p0(std::allocator_arg, test_allocator(&alloc_stats)); std::promise p; - assert(test_alloc_base::alloc_count == 1); + assert(alloc_stats.alloc_count == 1); swap(p, p0); - assert(test_alloc_base::alloc_count == 1); + assert(alloc_stats.alloc_count == 1); std::future f = p.get_future(); - assert(test_alloc_base::alloc_count == 1); + assert(alloc_stats.alloc_count == 1); assert(f.valid()); f = p0.get_future(); assert(f.valid()); - assert(test_alloc_base::alloc_count == 1); + assert(alloc_stats.alloc_count == 1); } - assert(test_alloc_base::alloc_count == 0); + assert(alloc_stats.alloc_count == 0); return 0; } diff --git a/libcxx/test/std/thread/futures/futures.shared_future/dtor.pass.cpp b/libcxx/test/std/thread/futures/futures.shared_future/dtor.pass.cpp index 3d72b983d6b05..027073fde8cc3 100644 --- a/libcxx/test/std/thread/futures/futures.shared_future/dtor.pass.cpp +++ b/libcxx/test/std/thread/futures/futures.shared_future/dtor.pass.cpp @@ -24,49 +24,50 @@ int main(int, char**) { - assert(test_alloc_base::alloc_count == 0); + test_allocator_statistics alloc_stats; + assert(alloc_stats.alloc_count == 0); { typedef int T; std::shared_future f; { - std::promise p(std::allocator_arg, test_allocator()); - assert(test_alloc_base::alloc_count == 1); + std::promise p(std::allocator_arg, test_allocator(&alloc_stats)); + assert(alloc_stats.alloc_count == 1); f = p.get_future(); - assert(test_alloc_base::alloc_count == 1); + assert(alloc_stats.alloc_count == 1); assert(f.valid()); } - assert(test_alloc_base::alloc_count == 1); + assert(alloc_stats.alloc_count == 1); assert(f.valid()); } - assert(test_alloc_base::alloc_count == 0); + assert(alloc_stats.alloc_count == 0); { typedef int& T; std::shared_future f; { - std::promise p(std::allocator_arg, test_allocator()); - assert(test_alloc_base::alloc_count == 1); + std::promise p(std::allocator_arg, test_allocator(&alloc_stats)); + assert(alloc_stats.alloc_count == 1); f = p.get_future(); - assert(test_alloc_base::alloc_count == 1); + assert(alloc_stats.alloc_count == 1); assert(f.valid()); } - assert(test_alloc_base::alloc_count == 1); + assert(alloc_stats.alloc_count == 1); assert(f.valid()); } - assert(test_alloc_base::alloc_count == 0); + assert(alloc_stats.alloc_count == 0); { typedef void T; std::shared_future f; { - std::promise p(std::allocator_arg, test_allocator()); - assert(test_alloc_base::alloc_count == 1); + std::promise p(std::allocator_arg, test_allocator(&alloc_stats)); + assert(alloc_stats.alloc_count == 1); f = p.get_future(); - assert(test_alloc_base::alloc_count == 1); + assert(alloc_stats.alloc_count == 1); assert(f.valid()); } - assert(test_alloc_base::alloc_count == 1); + assert(alloc_stats.alloc_count == 1); assert(f.valid()); } - assert(test_alloc_base::alloc_count == 0); + assert(alloc_stats.alloc_count == 0); return 0; } diff --git a/libcxx/test/std/thread/futures/futures.task/futures.task.members/ctor_func_alloc.pass.cpp b/libcxx/test/std/thread/futures/futures.task/futures.task.members/ctor_func_alloc.pass.cpp index 2255a4549534a..788afbdffdfe3 100644 --- a/libcxx/test/std/thread/futures/futures.task/futures.task.members/ctor_func_alloc.pass.cpp +++ b/libcxx/test/std/thread/futures/futures.task/futures.task.members/ctor_func_alloc.pass.cpp @@ -47,10 +47,11 @@ int func(int i) { return i; } int main(int, char**) { + test_allocator_statistics alloc_stats; { std::packaged_task p(std::allocator_arg, - test_allocator(), A(5)); - assert(test_alloc_base::alloc_count > 0); + test_allocator(&alloc_stats), A(5)); + assert(alloc_stats.alloc_count > 0); assert(p.valid()); std::future f = p.get_future(); p(3, 'a'); @@ -58,14 +59,14 @@ int main(int, char**) assert(A::n_copies == 0); assert(A::n_moves > 0); } - assert(test_alloc_base::alloc_count == 0); + assert(alloc_stats.alloc_count == 0); A::n_copies = 0; A::n_moves = 0; { A a(5); std::packaged_task p(std::allocator_arg, - test_allocator(), a); - assert(test_alloc_base::alloc_count > 0); + test_allocator(&alloc_stats), a); + assert(alloc_stats.alloc_count > 0); assert(p.valid()); std::future f = p.get_future(); p(3, 'a'); @@ -73,31 +74,31 @@ int main(int, char**) assert(A::n_copies > 0); assert(A::n_moves >= 0); } - assert(test_alloc_base::alloc_count == 0); + assert(alloc_stats.alloc_count == 0); A::n_copies = 0; A::n_moves = 0; { A a(5); - std::packaged_task p(std::allocator_arg, test_allocator(), &func); - assert(test_alloc_base::alloc_count > 0); + std::packaged_task p(std::allocator_arg, test_allocator(&alloc_stats), &func); + assert(alloc_stats.alloc_count > 0); assert(p.valid()); std::future f = p.get_future(); p(4); assert(f.get() == 4); } - assert(test_alloc_base::alloc_count == 0); + assert(alloc_stats.alloc_count == 0); A::n_copies = 0; A::n_moves = 0; { A a(5); - std::packaged_task p(std::allocator_arg, test_allocator(), func); - assert(test_alloc_base::alloc_count > 0); + std::packaged_task p(std::allocator_arg, test_allocator(&alloc_stats), func); + assert(alloc_stats.alloc_count > 0); assert(p.valid()); std::future f = p.get_future(); p(4); assert(f.get() == 4); } - assert(test_alloc_base::alloc_count == 0); + assert(alloc_stats.alloc_count == 0); A::n_copies = 0; A::n_moves = 0; { diff --git a/libcxx/test/std/thread/futures/futures.unique_future/dtor.pass.cpp b/libcxx/test/std/thread/futures/futures.unique_future/dtor.pass.cpp index 05bfe2bea7f4f..643fb6a4259a6 100644 --- a/libcxx/test/std/thread/futures/futures.unique_future/dtor.pass.cpp +++ b/libcxx/test/std/thread/futures/futures.unique_future/dtor.pass.cpp @@ -24,49 +24,50 @@ int main(int, char**) { - assert(test_alloc_base::alloc_count == 0); + test_allocator_statistics alloc_stats; + assert(alloc_stats.alloc_count == 0); { typedef int T; std::future f; { - std::promise p(std::allocator_arg, test_allocator()); - assert(test_alloc_base::alloc_count == 1); + std::promise p(std::allocator_arg, test_allocator(&alloc_stats)); + assert(alloc_stats.alloc_count == 1); f = p.get_future(); - assert(test_alloc_base::alloc_count == 1); + assert(alloc_stats.alloc_count == 1); assert(f.valid()); } - assert(test_alloc_base::alloc_count == 1); + assert(alloc_stats.alloc_count == 1); assert(f.valid()); } - assert(test_alloc_base::alloc_count == 0); + assert(alloc_stats.alloc_count == 0); { typedef int& T; std::future f; { - std::promise p(std::allocator_arg, test_allocator()); - assert(test_alloc_base::alloc_count == 1); + std::promise p(std::allocator_arg, test_allocator(&alloc_stats)); + assert(alloc_stats.alloc_count == 1); f = p.get_future(); - assert(test_alloc_base::alloc_count == 1); + assert(alloc_stats.alloc_count == 1); assert(f.valid()); } - assert(test_alloc_base::alloc_count == 1); + assert(alloc_stats.alloc_count == 1); assert(f.valid()); } - assert(test_alloc_base::alloc_count == 0); + assert(alloc_stats.alloc_count == 0); { typedef void T; std::future f; { - std::promise p(std::allocator_arg, test_allocator()); - assert(test_alloc_base::alloc_count == 1); + std::promise p(std::allocator_arg, test_allocator(&alloc_stats)); + assert(alloc_stats.alloc_count == 1); f = p.get_future(); - assert(test_alloc_base::alloc_count == 1); + assert(alloc_stats.alloc_count == 1); assert(f.valid()); } - assert(test_alloc_base::alloc_count == 1); + assert(alloc_stats.alloc_count == 1); assert(f.valid()); } - assert(test_alloc_base::alloc_count == 0); + assert(alloc_stats.alloc_count == 0); return 0; } diff --git a/libcxx/test/std/thread/thread.semaphore/acquire.pass.cpp b/libcxx/test/std/thread/thread.semaphore/acquire.pass.cpp index cd08e2ba81017..5e418381fd3ad 100644 --- a/libcxx/test/std/thread/thread.semaphore/acquire.pass.cpp +++ b/libcxx/test/std/thread/thread.semaphore/acquire.pass.cpp @@ -13,6 +13,9 @@ // macOS 11.0. // XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11|12|13|14|15}} +// TODO(ldionne): This test fails on Ubuntu Focal on our CI nodes (and only there), in 32 bit mode. +// UNSUPPORTED: linux && 32bits-on-64bits + // #include diff --git a/libcxx/test/std/thread/thread.semaphore/ctor.compile.pass.cpp b/libcxx/test/std/thread/thread.semaphore/ctor.compile.pass.cpp index 785a57e29d492..a3569abc229c6 100644 --- a/libcxx/test/std/thread/thread.semaphore/ctor.compile.pass.cpp +++ b/libcxx/test/std/thread/thread.semaphore/ctor.compile.pass.cpp @@ -24,7 +24,8 @@ static_assert(!std::is_default_constructible>::value, static_assert(!std::is_convertible::value, ""); static_assert(!std::is_convertible>::value, ""); -#if 0 // TODO FIXME: the ctor should be constexpr when TEST_STD_VER > 17 +#if TEST_STD_VER > 17 +// Test constexpr-constructibility. (But not destructibility.) constinit std::binary_semaphore bs(1); constinit std::counting_semaphore cs(1); #endif diff --git a/libcxx/test/std/thread/thread.semaphore/release.pass.cpp b/libcxx/test/std/thread/thread.semaphore/release.pass.cpp index e491e13e50f95..39f46d865dbea 100644 --- a/libcxx/test/std/thread/thread.semaphore/release.pass.cpp +++ b/libcxx/test/std/thread/thread.semaphore/release.pass.cpp @@ -13,6 +13,9 @@ // macOS 11.0. // XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11|12|13|14|15}} +// TODO(ldionne): This test fails on Ubuntu Focal on our CI nodes (and only there), in 32 bit mode. +// UNSUPPORTED: linux && 32bits-on-64bits + // #include diff --git a/libcxx/test/std/thread/thread.semaphore/try_acquire.pass.cpp b/libcxx/test/std/thread/thread.semaphore/try_acquire.pass.cpp index 974e3c366e906..c15b0515a345b 100644 --- a/libcxx/test/std/thread/thread.semaphore/try_acquire.pass.cpp +++ b/libcxx/test/std/thread/thread.semaphore/try_acquire.pass.cpp @@ -30,14 +30,17 @@ int main(int, char**) std::counting_semaphore<> s(1); assert(s.try_acquire()); + assert(!s.try_acquire()); s.release(); assert(s.try_acquire()); + assert(!s.try_acquire()); s.release(2); std::thread t = support::make_test_thread([&](){ assert(s.try_acquire()); }); t.join(); assert(s.try_acquire()); + assert(!s.try_acquire()); return 0; } diff --git a/libcxx/test/std/thread/thread.threads/thread.thread.class/thread.thread.constr/F.pass.cpp b/libcxx/test/std/thread/thread.threads/thread.thread.class/thread.thread.constr/F.pass.cpp index 8cd4022724316..c0468fd8ca228 100644 --- a/libcxx/test/std/thread/thread.threads/thread.thread.class/thread.thread.constr/F.pass.cpp +++ b/libcxx/test/std/thread/thread.threads/thread.thread.class/thread.thread.constr/F.pass.cpp @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// // // UNSUPPORTED: libcpp-has-no-threads +// XFAIL: LIBCXX-AIX-FIXME // diff --git a/libcxx/test/std/thread/thread.threads/thread.thread.class/thread.thread.static/hardware_concurrency.pass.cpp b/libcxx/test/std/thread/thread.threads/thread.thread.class/thread.thread.static/hardware_concurrency.pass.cpp index 65e99e015af95..1b22ff9e1da5f 100644 --- a/libcxx/test/std/thread/thread.threads/thread.thread.class/thread.thread.static/hardware_concurrency.pass.cpp +++ b/libcxx/test/std/thread/thread.threads/thread.thread.class/thread.thread.static/hardware_concurrency.pass.cpp @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// // // UNSUPPORTED: libcpp-has-no-threads +// XFAIL: LIBCXX-AIX-FIXME // diff --git a/libcxx/test/std/utilities/function.objects/bind/func.bind/func.bind.bind/copy.pass.cpp b/libcxx/test/std/utilities/function.objects/bind/func.bind/func.bind.bind/copy.pass.cpp index 3403d742915f1..d5af63aad35fd 100644 --- a/libcxx/test/std/utilities/function.objects/bind/func.bind/func.bind.bind/copy.pass.cpp +++ b/libcxx/test/std/utilities/function.objects/bind/func.bind/func.bind.bind/copy.pass.cpp @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// // UNSUPPORTED: c++03 +// XFAIL: LIBCXX-AIX-FIXME // diff --git a/libcxx/test/std/utilities/function.objects/func.wrap/func.wrap.func/func.wrap.func.con/alloc_F.pass.cpp b/libcxx/test/std/utilities/function.objects/func.wrap/func.wrap.func/func.wrap.func.con/alloc_F.pass.cpp index b5821d861e866..e364e9eebc046 100644 --- a/libcxx/test/std/utilities/function.objects/func.wrap/func.wrap.func/func.wrap.func.con/alloc_F.pass.cpp +++ b/libcxx/test/std/utilities/function.objects/func.wrap/func.wrap.func/func.wrap.func.con/alloc_F.pass.cpp @@ -25,7 +25,6 @@ #include "count_new.h" #include "../function_types.h" - #if TEST_STD_VER >= 11 struct RValueCallable { template @@ -37,6 +36,8 @@ struct LValueCallable { }; #endif +test_allocator_statistics alloc_stats; + class DummyClass {}; template @@ -69,7 +70,7 @@ void test_FreeFunction(AllocType& alloc) std::function f2(std::allocator_arg, alloc, target); // The allocator may not fit in the small object buffer, if we allocated // check it was done via the allocator. - assert(globalMemCounter.checkOutstandingNewEq(test_alloc_base::alloc_count)); + assert(globalMemCounter.checkOutstandingNewEq(alloc_stats.alloc_count)); assert(f2.template target()); assert(*f2.template target() == target); assert(f2.template target() == 0); @@ -86,7 +87,7 @@ void test_MemFunClass(AllocType& alloc) TargetType target = &MemFunClass::foo; assert(globalMemCounter.checkOutstandingNewEq(0)); std::function f2(std::allocator_arg, alloc, target); - assert(globalMemCounter.checkOutstandingNewEq(test_alloc_base::alloc_count)); + assert(globalMemCounter.checkOutstandingNewEq(alloc_stats.alloc_count)); assert(f2.template target()); assert(*f2.template target() == target); assert(f2.template target() == 0); @@ -111,15 +112,14 @@ void test_for_alloc(Alloc& alloc) { test_MemFunClass(alloc); } -int main(int, char**) -{ +int main(int, char**) { globalMemCounter.reset(); { bare_allocator bare_alloc; test_for_alloc(bare_alloc); } { - non_default_test_allocator non_default_alloc(42); + non_default_test_allocator non_default_alloc(42, &alloc_stats); test_for_alloc(non_default_alloc); } #if TEST_STD_VER >= 11 diff --git a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.const/nullptr_t_deleter_allocator.pass.cpp b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.const/nullptr_t_deleter_allocator.pass.cpp index fb5e4f4d8f2e6..4d59d7f5667f8 100644 --- a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.const/nullptr_t_deleter_allocator.pass.cpp +++ b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.const/nullptr_t_deleter_allocator.pass.cpp @@ -30,8 +30,9 @@ int A::count = 0; int main(int, char**) { + test_allocator_statistics alloc_stats; { - std::shared_ptr p(nullptr, test_deleter(3), test_allocator(5)); + std::shared_ptr p(nullptr, test_deleter(3), test_allocator(5, &alloc_stats)); assert(A::count == 0); assert(p.use_count() == 1); assert(p.get() == 0); @@ -42,14 +43,14 @@ int main(int, char**) assert(d); assert(d->state() == 3); #endif - assert(test_allocator::count == 1); - assert(test_allocator::alloc_count == 1); + assert(alloc_stats.count == 1); + assert(alloc_stats.alloc_count == 1); } assert(A::count == 0); assert(test_deleter::count == 0); assert(test_deleter::dealloc_count == 1); - assert(test_allocator::count == 0); - assert(test_allocator::alloc_count == 0); + assert(alloc_stats.count == 0); + assert(alloc_stats.alloc_count == 0); test_deleter::dealloc_count = 0; // Test an allocator with a minimal interface { diff --git a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.const/nullptr_t_deleter_allocator_throw.pass.cpp b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.const/nullptr_t_deleter_allocator_throw.pass.cpp index ce9c3e5875eb7..5864433105581 100644 --- a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.const/nullptr_t_deleter_allocator_throw.pass.cpp +++ b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.const/nullptr_t_deleter_allocator_throw.pass.cpp @@ -30,10 +30,11 @@ int A::count = 0; int main(int, char**) { + test_allocator_statistics alloc_stats; try { - test_allocator::throw_after = 0; - std::shared_ptr p(nullptr, test_deleter(3), test_allocator(5)); + alloc_stats.throw_after = 0; + std::shared_ptr p(nullptr, test_deleter(3), test_allocator(5, &alloc_stats)); assert(false); } catch (std::bad_alloc&) @@ -41,8 +42,8 @@ int main(int, char**) assert(A::count == 0); assert(test_deleter::count == 0); assert(test_deleter::dealloc_count == 1); - assert(test_allocator::count == 0); - assert(test_allocator::alloc_count == 0); + assert(alloc_stats.count == 0); + assert(alloc_stats.alloc_count == 0); } return 0; diff --git a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.const/pointer_deleter_allocator.pass.cpp b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.const/pointer_deleter_allocator.pass.cpp index d42ce342b790b..3b86b07d79234 100644 --- a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.const/pointer_deleter_allocator.pass.cpp +++ b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.const/pointer_deleter_allocator.pass.cpp @@ -62,9 +62,10 @@ class MoveDeleter int main(int, char**) { + test_allocator_statistics alloc_stats; { A* ptr = new A; - std::shared_ptr p(ptr, test_deleter(3), test_allocator(5)); + std::shared_ptr p(ptr, test_deleter(3), test_allocator(5, &alloc_stats)); assert(A::count == 1); assert(p.use_count() == 1); assert(p.get() == ptr); @@ -75,14 +76,14 @@ int main(int, char**) assert(d); assert(d->state() == 3); #endif - assert(test_allocator::count == 1); - assert(test_allocator::alloc_count == 1); + assert(alloc_stats.count == 1); + assert(alloc_stats.alloc_count == 1); } assert(A::count == 0); assert(test_deleter::count == 0); assert(test_deleter::dealloc_count == 1); - assert(test_allocator::count == 0); - assert(test_allocator::alloc_count == 0); + assert(alloc_stats.count == 0); + assert(alloc_stats.alloc_count == 0); test_deleter::dealloc_count = 0; // Test an allocator with a minimal interface { diff --git a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.const/pointer_deleter_allocator_throw.pass.cpp b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.const/pointer_deleter_allocator_throw.pass.cpp index 240fd358e063f..6baf2debaa999 100644 --- a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.const/pointer_deleter_allocator_throw.pass.cpp +++ b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.const/pointer_deleter_allocator_throw.pass.cpp @@ -30,20 +30,21 @@ int A::count = 0; int main(int, char**) { + test_allocator_statistics alloc_stats; A* ptr = new A; try { - test_allocator::throw_after = 0; - std::shared_ptr p(ptr, test_deleter(3), test_allocator(5)); + alloc_stats.throw_after = 0; + std::shared_ptr p(ptr, test_deleter(3), test_allocator(5, &alloc_stats)); assert(false); } catch (std::bad_alloc&) { - assert(A::count == 0); - assert(test_deleter::count == 0); + assert(alloc_stats.count == 0); + assert(alloc_stats.count == 0); assert(test_deleter::dealloc_count == 1); - assert(test_allocator::count == 0); - assert(test_allocator::alloc_count == 0); + assert(alloc_stats.count == 0); + assert(alloc_stats.alloc_count == 0); } return 0; diff --git a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.create/allocate_shared.pass.cpp b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.create/allocate_shared.pass.cpp index b892fb4fd9b15..472cb3d0a7d7c 100644 --- a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.create/allocate_shared.pass.cpp +++ b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.create/allocate_shared.pass.cpp @@ -141,17 +141,18 @@ int main(int, char**) test >(); test >(); + test_allocator_statistics alloc_stats; { int i = 67; char c = 'e'; - std::shared_ptr p = std::allocate_shared(test_allocator(54), i, c); - assert(test_allocator::alloc_count == 1); + std::shared_ptr p = std::allocate_shared(test_allocator(54, &alloc_stats), i, c); + assert(alloc_stats.alloc_count == 1); assert(A::count == 1); assert(p->get_int() == 67); assert(p->get_char() == 'e'); } assert(A::count == 0); - assert(test_allocator::alloc_count == 0); + assert(alloc_stats.alloc_count == 0); { int i = 67; char c = 'e'; diff --git a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.mod/reset_pointer_deleter_allocator.pass.cpp b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.mod/reset_pointer_deleter_allocator.pass.cpp index 17afcca06e013..a6ea07ea37cee 100644 --- a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.mod/reset_pointer_deleter_allocator.pass.cpp +++ b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.mod/reset_pointer_deleter_allocator.pass.cpp @@ -43,10 +43,11 @@ int A::count = 0; int main(int, char**) { + test_allocator_statistics alloc_stats; { std::shared_ptr p(new B); A* ptr = new A; - p.reset(ptr, test_deleter(3), test_allocator(4)); + p.reset(ptr, test_deleter(3), test_allocator(4, &alloc_stats)); assert(A::count == 1); assert(B::count == 1); assert(p.use_count() == 1); @@ -58,18 +59,18 @@ int main(int, char**) assert(d); assert(d->state() == 3); #endif - assert(test_allocator::count == 1); - assert(test_allocator::alloc_count == 1); + assert(alloc_stats.count == 1); + assert(alloc_stats.alloc_count == 1); } assert(A::count == 0); assert(test_deleter::count == 0); assert(test_deleter::dealloc_count == 1); - assert(test_allocator::count == 0); - assert(test_allocator::alloc_count == 0); + assert(alloc_stats.count == 0); + assert(alloc_stats.alloc_count == 0); { std::shared_ptr p; A* ptr = new A; - p.reset(ptr, test_deleter(3), test_allocator(4)); + p.reset(ptr, test_deleter(3), test_allocator(4, &alloc_stats)); assert(A::count == 1); assert(B::count == 1); assert(p.use_count() == 1); @@ -81,14 +82,14 @@ int main(int, char**) assert(d); assert(d->state() == 3); #endif - assert(test_allocator::count == 1); - assert(test_allocator::alloc_count == 1); + assert(alloc_stats.count == 1); + assert(alloc_stats.alloc_count == 1); } assert(A::count == 0); assert(test_deleter::count == 0); assert(test_deleter::dealloc_count == 2); - assert(test_allocator::count == 0); - assert(test_allocator::alloc_count == 0); + assert(alloc_stats.count == 0); + assert(alloc_stats.alloc_count == 0); #if TEST_STD_VER > 14 { diff --git a/libcxx/test/std/utilities/meta/meta.unary.prop.query/alignment_of.pass.cpp b/libcxx/test/std/utilities/meta/meta.unary.prop.query/alignment_of.pass.cpp index 43cc5bfcefc35..63db26e543100 100644 --- a/libcxx/test/std/utilities/meta/meta.unary.prop.query/alignment_of.pass.cpp +++ b/libcxx/test/std/utilities/meta/meta.unary.prop.query/alignment_of.pass.cpp @@ -6,6 +6,9 @@ // //===----------------------------------------------------------------------===// +// Fails for 32-bit builds on AIX. +// UNSUPPORTED: LIBCXX-AIX-FIXME + // type_traits // alignment_of diff --git a/libcxx/test/support/test_allocator.h b/libcxx/test/support/test_allocator.h index e0e402a87971c..b5320067ca04b 100644 --- a/libcxx/test/support/test_allocator.h +++ b/libcxx/test/support/test_allocator.h @@ -26,23 +26,16 @@ inline typename std::allocator_traits::size_type alloc_max_size(Alloc con return AT::max_size(a); } -class test_alloc_base { -protected: - static int time_to_throw; - -public: - static int throw_after; - static int count; - static int alloc_count; - static int copied; - static int moved; - static int converted; - - const static int destructed_value = -1; - const static int default_value = 0; - const static int moved_value = INT_MAX; - - static void clear() { +struct test_allocator_statistics { + int time_to_throw = 0; + int throw_after = INT_MAX; + int count = 0; + int alloc_count = 0; + int copied = 0; + int moved = 0; + int converted = 0; + + TEST_CONSTEXPR_CXX14 void clear() { assert(count == 0 && "clearing leaking allocator data?"); count = 0; time_to_throw = 0; @@ -51,25 +44,24 @@ class test_alloc_base { clear_ctor_counters(); } - static void clear_ctor_counters() { + TEST_CONSTEXPR_CXX14 void clear_ctor_counters() { copied = 0; moved = 0; converted = 0; } }; -int test_alloc_base::count = 0; -int test_alloc_base::time_to_throw = 0; -int test_alloc_base::alloc_count = 0; -int test_alloc_base::throw_after = INT_MAX; -int test_alloc_base::copied = 0; -int test_alloc_base::moved = 0; -int test_alloc_base::converted = 0; +struct test_alloc_base { + TEST_CONSTEXPR static const int destructed_value = -1; + TEST_CONSTEXPR static const int moved_value = INT_MAX; +}; template -class test_allocator : public test_alloc_base { - int data_; // participates in equality - int id_; // unique identifier, doesn't participate in equality +class test_allocator { + int data_ = 0; // participates in equality + int id_ = 0; // unique identifier, doesn't participate in equality + test_allocator_statistics* stats_ = nullptr; + template friend class test_allocator; @@ -87,74 +79,113 @@ class test_allocator : public test_alloc_base { typedef test_allocator other; }; - test_allocator() TEST_NOEXCEPT : data_(0), id_(0) { ++count; } - explicit test_allocator(int i, int id = 0) TEST_NOEXCEPT : data_(i), id_(id) { ++count; } - test_allocator(const test_allocator& a) TEST_NOEXCEPT : data_(a.data_), id_(a.id_) { - ++count; - ++copied; - assert(a.data_ != destructed_value && a.id_ != destructed_value && "copying from destroyed allocator"); + TEST_CONSTEXPR test_allocator() TEST_NOEXCEPT = default; + + TEST_CONSTEXPR_CXX14 explicit test_allocator(test_allocator_statistics* stats) TEST_NOEXCEPT : stats_(stats) { + if (stats_ != nullptr) + ++stats_->count; + } + + TEST_CONSTEXPR explicit test_allocator(int data) TEST_NOEXCEPT : data_(data) {} + + TEST_CONSTEXPR_CXX14 explicit test_allocator(int data, test_allocator_statistics* stats) TEST_NOEXCEPT + : data_(data), stats_(stats) { + if (stats != nullptr) + ++stats_->count; + } + + TEST_CONSTEXPR explicit test_allocator(int data, int id) TEST_NOEXCEPT : data_(data), id_(id) {} + + TEST_CONSTEXPR_CXX14 explicit test_allocator(int data, int id, test_allocator_statistics* stats) TEST_NOEXCEPT + : data_(data), id_(id), stats_(stats) { + if (stats_ != nullptr) + ++stats_->count; } + + TEST_CONSTEXPR_CXX14 test_allocator(const test_allocator& a) TEST_NOEXCEPT + : data_(a.data_), id_(a.id_), stats_(a.stats_) { + assert(a.data_ != test_alloc_base::destructed_value && a.id_ != test_alloc_base::destructed_value && + "copying from destroyed allocator"); + if (stats_ != nullptr) { + ++stats_->count; + ++stats_->copied; + } + } + #if TEST_STD_VER >= 11 - test_allocator(test_allocator&& a) TEST_NOEXCEPT : data_(a.data_), id_(a.id_) { - ++count; - ++moved; - assert(a.data_ != destructed_value && a.id_ != destructed_value && "moving from destroyed allocator"); - a.data_ = moved_value; - a.id_ = moved_value; + TEST_CONSTEXPR_CXX14 test_allocator(test_allocator&& a) TEST_NOEXCEPT : data_(a.data_), id_(a.id_), stats_(a.stats_) { + if (stats_ != nullptr) { + ++stats_->count; + ++stats_->moved; + } + assert(a.data_ != test_alloc_base::destructed_value && a.id_ != test_alloc_base::destructed_value && + "moving from destroyed allocator"); + a.data_ = test_alloc_base::moved_value; + a.id_ = test_alloc_base::moved_value; } #endif + template - test_allocator(const test_allocator& a) TEST_NOEXCEPT : data_(a.data_), id_(a.id_) { - ++count; - ++converted; + TEST_CONSTEXPR_CXX14 test_allocator(const test_allocator& a) TEST_NOEXCEPT + : data_(a.data_), id_(a.id_), stats_(a.stats_) { + if (stats_ != nullptr) { + ++stats_->count; + ++stats_->converted; + } } - ~test_allocator() TEST_NOEXCEPT { - assert(data_ >= 0); - assert(id_ >= 0); - --count; - data_ = destructed_value; - id_ = destructed_value; + + TEST_CONSTEXPR_CXX20 ~test_allocator() TEST_NOEXCEPT { + assert(data_ != test_alloc_base::destructed_value); + assert(id_ != test_alloc_base::destructed_value); + if (stats_ != nullptr) + --stats_->count; + data_ = test_alloc_base::destructed_value; + id_ = test_alloc_base::destructed_value; } - pointer address(reference x) const { return &x; } - const_pointer address(const_reference x) const { return &x; } - pointer allocate(size_type n, const void* = 0) { - assert(data_ >= 0); - if (time_to_throw >= throw_after) { -#ifndef TEST_HAS_NO_EXCEPTIONS - throw std::bad_alloc(); -#else - std::terminate(); -#endif + + TEST_CONSTEXPR pointer address(reference x) const { return &x; } + TEST_CONSTEXPR const_pointer address(const_reference x) const { return &x; } + + TEST_CONSTEXPR_CXX14 pointer allocate(size_type n, const void* = 0) { + assert(data_ != test_alloc_base::destructed_value); + if (stats_ != nullptr) { + if (stats_->time_to_throw >= stats_->throw_after) + TEST_THROW(std::bad_alloc()); + ++stats_->time_to_throw; + ++stats_->alloc_count; } - ++time_to_throw; - ++alloc_count; - return (pointer)::operator new(n * sizeof(T)); + return std::allocator().allocate(n); } - void deallocate(pointer p, size_type) { - assert(data_ >= 0); - --alloc_count; - ::operator delete((void*)p); + + TEST_CONSTEXPR_CXX14 void deallocate(pointer p, size_type s) { + assert(data_ != test_alloc_base::destructed_value); + if (stats_ != nullptr) + --stats_->alloc_count; + std::allocator().deallocate(p, s); } - size_type max_size() const TEST_NOEXCEPT { return UINT_MAX / sizeof(T); } + + TEST_CONSTEXPR size_type max_size() const TEST_NOEXCEPT { return UINT_MAX / sizeof(T); } + #if TEST_STD_VER < 11 void construct(pointer p, const T& val) { ::new (static_cast(p)) T(val); } #else template - void construct(pointer p, U&& val) { + TEST_CONSTEXPR_CXX14 void construct(pointer p, U&& val) { ::new (static_cast(p)) T(std::forward(val)); } #endif - void destroy(pointer p) { p->~T(); } - friend bool operator==(const test_allocator& x, const test_allocator& y) { return x.data_ == y.data_; } - friend bool operator!=(const test_allocator& x, const test_allocator& y) { return !(x == y); } + TEST_CONSTEXPR_CXX14 void destroy(pointer p) { p->~T(); } + TEST_CONSTEXPR friend bool operator==(const test_allocator& x, const test_allocator& y) { return x.data_ == y.data_; } + TEST_CONSTEXPR friend bool operator!=(const test_allocator& x, const test_allocator& y) { return !(x == y); } - int get_data() const { return data_; } - int get_id() const { return id_; } + TEST_CONSTEXPR int get_data() const { return data_; } + TEST_CONSTEXPR int get_id() const { return id_; } }; template -class non_default_test_allocator : public test_alloc_base { - int data_; +class non_default_test_allocator { + int data_ = 0; + test_allocator_statistics* stats_ = nullptr; template friend class non_default_test_allocator; @@ -173,59 +204,71 @@ class non_default_test_allocator : public test_alloc_base { typedef non_default_test_allocator other; }; - // non_default_test_allocator() TEST_NOEXCEPT : data_(0) {++count;} - explicit non_default_test_allocator(int i) TEST_NOEXCEPT : data_(i) { ++count; } - non_default_test_allocator(const non_default_test_allocator& a) TEST_NOEXCEPT : data_(a.data_) { ++count; } + TEST_CONSTEXPR_CXX14 + explicit non_default_test_allocator(int i, test_allocator_statistics* stats = nullptr) TEST_NOEXCEPT + : data_(i), stats_(stats) { + if (stats_ != nullptr) { + ++stats_->count; + } + } + + TEST_CONSTEXPR_CXX14 + non_default_test_allocator(const non_default_test_allocator& a) TEST_NOEXCEPT : data_(a.data_), stats_(a.stats_) { + if (stats_ != nullptr) + ++stats_->count; + } + template - non_default_test_allocator(const non_default_test_allocator& a) TEST_NOEXCEPT : data_(a.data_) { - ++count; + TEST_CONSTEXPR_CXX14 non_default_test_allocator(const non_default_test_allocator& a) TEST_NOEXCEPT + : data_(a.data_), stats_(a.stats_) { + if (stats_ != nullptr) + ++stats_->count; } - ~non_default_test_allocator() TEST_NOEXCEPT { - assert(data_ >= 0); - --count; - data_ = -1; + + TEST_CONSTEXPR_CXX20 ~non_default_test_allocator() TEST_NOEXCEPT { + assert(data_ != test_alloc_base::destructed_value); + if (stats_ != nullptr) + --stats_->count; + data_ = test_alloc_base::destructed_value; } - pointer address(reference x) const { return &x; } - const_pointer address(const_reference x) const { return &x; } - pointer allocate(size_type n, const void* = 0) { - assert(data_ >= 0); - if (time_to_throw >= throw_after) { -#ifndef TEST_HAS_NO_EXCEPTIONS - throw std::bad_alloc(); -#else - std::terminate(); -#endif + + TEST_CONSTEXPR pointer address(reference x) const { return &x; } + TEST_CONSTEXPR const_pointer address(const_reference x) const { return &x; } + + TEST_CONSTEXPR_CXX20 pointer allocate(size_type n, const void* = nullptr) { + assert(data_ != test_alloc_base::destructed_value); + if (stats_ != nullptr) { + if (stats_->time_to_throw >= stats_->throw_after) + TEST_THROW(std::bad_alloc()); + ++stats_->time_to_throw; + ++stats_->alloc_count; } - ++time_to_throw; - ++alloc_count; - return (pointer)::operator new(n * sizeof(T)); - } - void deallocate(pointer p, size_type) { - assert(data_ >= 0); - --alloc_count; - ::operator delete((void*)p); + return std::allocator().allocate(n); } - size_type max_size() const TEST_NOEXCEPT { return UINT_MAX / sizeof(T); } -#if TEST_STD_VER < 11 - void construct(pointer p, const T& val) { ::new (static_cast(p)) T(val); } -#else - template - void construct(pointer p, U&& val) { - ::new (static_cast(p)) T(std::forward(val)); + + TEST_CONSTEXPR_CXX20 void deallocate(pointer p, size_type n) { + assert(data_ != test_alloc_base::destructed_value); + if (stats_ != nullptr) + --stats_->alloc_count; + std::allocator().deallocate(p, n); } -#endif - void destroy(pointer p) { p->~T(); } - friend bool operator==(const non_default_test_allocator& x, const non_default_test_allocator& y) { + TEST_CONSTEXPR size_type max_size() const TEST_NOEXCEPT { return UINT_MAX / sizeof(T); } + + TEST_CONSTEXPR friend bool operator==(const non_default_test_allocator& x, const non_default_test_allocator& y) { return x.data_ == y.data_; } - friend bool operator!=(const non_default_test_allocator& x, const non_default_test_allocator& y) { return !(x == y); } + + TEST_CONSTEXPR friend bool operator!=(const non_default_test_allocator& x, const non_default_test_allocator& y) { + return !(x == y); + } }; template <> -class test_allocator : public test_alloc_base { - int data_; - int id_; +class test_allocator { + int data_ = 0; + int id_ = 0; + test_allocator_statistics* stats_ = nullptr; template friend class test_allocator; @@ -242,26 +285,46 @@ class test_allocator : public test_alloc_base { typedef test_allocator other; }; - test_allocator() TEST_NOEXCEPT : data_(0), id_(0) {} - explicit test_allocator(int i, int id = 0) TEST_NOEXCEPT : data_(i), id_(id) {} - test_allocator(const test_allocator& a) TEST_NOEXCEPT : data_(a.data_), id_(a.id_) {} + TEST_CONSTEXPR test_allocator() TEST_NOEXCEPT = default; + + TEST_CONSTEXPR_CXX14 explicit test_allocator(test_allocator_statistics* stats) TEST_NOEXCEPT : stats_(stats) {} + + TEST_CONSTEXPR explicit test_allocator(int data) TEST_NOEXCEPT : data_(data) {} + + TEST_CONSTEXPR explicit test_allocator(int data, test_allocator_statistics* stats) TEST_NOEXCEPT + : data_(data), stats_(stats) + {} + + TEST_CONSTEXPR explicit test_allocator(int data, int id) : data_(data), id_(id) {} + + TEST_CONSTEXPR_CXX14 explicit test_allocator(int data, int id, test_allocator_statistics* stats) TEST_NOEXCEPT + : data_(data), id_(id), stats_(stats) + {} + + TEST_CONSTEXPR_CXX14 explicit test_allocator(const test_allocator& a) TEST_NOEXCEPT + : data_(a.data_), id_(a.id_), stats_(a.stats_) + {} + template - test_allocator(const test_allocator& a) TEST_NOEXCEPT : data_(a.data_), id_(a.id_) {} - ~test_allocator() TEST_NOEXCEPT { - data_ = -1; - id_ = -1; + TEST_CONSTEXPR_CXX14 test_allocator(const test_allocator& a) TEST_NOEXCEPT + : data_(a.data_), id_(a.id_), stats_(a.stats_) + {} + + TEST_CONSTEXPR_CXX20 ~test_allocator() TEST_NOEXCEPT { + data_ = test_alloc_base::destructed_value; + id_ = test_alloc_base::destructed_value; } - int get_id() const { return id_; } - int get_data() const { return data_; } + TEST_CONSTEXPR int get_id() const { return id_; } + TEST_CONSTEXPR int get_data() const { return data_; } - friend bool operator==(const test_allocator& x, const test_allocator& y) { return x.data_ == y.data_; } - friend bool operator!=(const test_allocator& x, const test_allocator& y) { return !(x == y); } + TEST_CONSTEXPR friend bool operator==(const test_allocator& x, const test_allocator& y) { return x.data_ == y.data_; } + TEST_CONSTEXPR friend bool operator!=(const test_allocator& x, const test_allocator& y) { return !(x == y); } }; template class other_allocator { - int data_; + int data_ = -1; template friend class other_allocator; @@ -269,17 +332,22 @@ class other_allocator { public: typedef T value_type; - other_allocator() : data_(-1) {} - explicit other_allocator(int i) : data_(i) {} + TEST_CONSTEXPR_CXX14 other_allocator() {} + TEST_CONSTEXPR_CXX14 explicit other_allocator(int i) : data_(i) {} + template - other_allocator(const other_allocator& a) : data_(a.data_) {} - T* allocate(std::size_t n) { return (T*)::operator new(n * sizeof(T)); } - void deallocate(T* p, std::size_t) { ::operator delete((void*)p); } + TEST_CONSTEXPR_CXX14 other_allocator(const other_allocator& a) : data_(a.data_) {} - other_allocator select_on_container_copy_construction() const { return other_allocator(-2); } + TEST_CONSTEXPR_CXX20 T* allocate(std::size_t n) { return std::allocator().allocate(n); } + TEST_CONSTEXPR_CXX20 void deallocate(T* p, std::size_t s) { std::allocator().deallocate(p, s); } - friend bool operator==(const other_allocator& x, const other_allocator& y) { return x.data_ == y.data_; } - friend bool operator!=(const other_allocator& x, const other_allocator& y) { return !(x == y); } + TEST_CONSTEXPR_CXX14 other_allocator select_on_container_copy_construction() const { return other_allocator(-2); } + + TEST_CONSTEXPR_CXX14 friend bool operator==(const other_allocator& x, const other_allocator& y) { + return x.data_ == y.data_; + } + + TEST_CONSTEXPR_CXX14 friend bool operator!=(const other_allocator& x, const other_allocator& y) { return !(x == y); } typedef std::true_type propagate_on_container_copy_assignment; typedef std::true_type propagate_on_container_move_assignment; @@ -301,15 +369,15 @@ struct Tag_X { // All constructors must be passed the Tag type. // DefaultInsertable into vector>, - Tag_X(Ctor_Tag) {} + constexpr Tag_X(Ctor_Tag) {} // CopyInsertable into vector>, - Tag_X(Ctor_Tag, const Tag_X&) {} + constexpr Tag_X(Ctor_Tag, const Tag_X&) {} // MoveInsertable into vector>, and - Tag_X(Ctor_Tag, Tag_X&&) {} + constexpr Tag_X(Ctor_Tag, Tag_X&&) {} // EmplaceConstructible into vector> from args. template - Tag_X(Ctor_Tag, Args&&...) {} + constexpr Tag_X(Ctor_Tag, Args&&...) {} // not DefaultConstructible, CopyConstructible or MoveConstructible. Tag_X() = delete; @@ -317,15 +385,13 @@ struct Tag_X { Tag_X(Tag_X&&) = delete; // CopyAssignable. - Tag_X& operator=(const Tag_X&) { return *this; } + TEST_CONSTEXPR_CXX14 Tag_X& operator=(const Tag_X&) { return *this; }; // MoveAssignable. - Tag_X& operator=(Tag_X&&) { return *this; } + TEST_CONSTEXPR_CXX14 Tag_X& operator=(Tag_X&&) { return *this; }; private: - // Not Destructible. - ~Tag_X() {} - + ~Tag_X() = default; // Erasable from vector>. friend class TaggingAllocator; }; @@ -337,71 +403,109 @@ class TaggingAllocator { TaggingAllocator() = default; template - TaggingAllocator(const TaggingAllocator&) {} - - T* allocate(std::size_t n) { return std::allocator{}.allocate(n); } - - void deallocate(T* p, std::size_t n) { std::allocator{}.deallocate(p, n); } + constexpr TaggingAllocator(const TaggingAllocator&){}; template void construct(Tag_X* p, Args&&... args) { ::new ((void*)p) Tag_X(Ctor_Tag{}, std::forward(args)...); } - template - void construct(U* p, Args&&... args) { - ::new ((void*)p) U(std::forward(args)...); - } - - template + template void destroy(U* p) { p->~U(); } -}; - -template -bool operator==(const TaggingAllocator&, const TaggingAllocator&) { - return true; -} -template -bool operator!=(const TaggingAllocator&, const TaggingAllocator&) { - return false; -} + TEST_CONSTEXPR_CXX20 T* allocate(std::size_t n) { return std::allocator{}.allocate(n); } + TEST_CONSTEXPR_CXX20 void deallocate(T* p, std::size_t n) { std::allocator{}.deallocate(p, n); } +}; #endif template struct limited_alloc_handle { - std::size_t outstanding_; - void* last_alloc_; - - limited_alloc_handle() : outstanding_(0), last_alloc_(nullptr) {} + std::size_t outstanding_ = 0; + void* last_alloc_ = nullptr; template - T* allocate(std::size_t N) { + TEST_CONSTEXPR_CXX20 T* allocate(std::size_t N) { if (N + outstanding_ > MaxAllocs) TEST_THROW(std::bad_alloc()); - last_alloc_ = ::operator new(N * sizeof(T)); + last_alloc_ = std::allocator().allocate(N); outstanding_ += N; return static_cast(last_alloc_); } - void deallocate(void* ptr, std::size_t N) { + template + TEST_CONSTEXPR_CXX20 void deallocate(T* ptr, std::size_t N) { if (ptr == last_alloc_) { last_alloc_ = nullptr; assert(outstanding_ >= N); outstanding_ -= N; } - ::operator delete(ptr); + std::allocator().deallocate(ptr, N); } }; +namespace detail { +template +class thread_unsafe_shared_ptr { +public: + thread_unsafe_shared_ptr() = default; + + TEST_CONSTEXPR_CXX14 thread_unsafe_shared_ptr(const thread_unsafe_shared_ptr& other) : block(other.block) { + ++block->ref_count; + } + + TEST_CONSTEXPR_CXX20 ~thread_unsafe_shared_ptr() { + --block->ref_count; + if (block->ref_count != 0) + return; + typedef std::allocator_traits > allocator_traits; + std::allocator alloc; + allocator_traits::destroy(alloc, block); + allocator_traits::deallocate(alloc, block, 1); + } + + TEST_CONSTEXPR const T& operator*() const { return block->content; } + TEST_CONSTEXPR const T* operator->() const { return &block->content; } + TEST_CONSTEXPR_CXX14 T& operator*() { return block->content; } + TEST_CONSTEXPR_CXX14 T* operator->() { return &block->content; } + TEST_CONSTEXPR_CXX14 T* get() { return &block->content; } + TEST_CONSTEXPR const T* get() const { return &block->content; } + +private: + struct control_block { + template + TEST_CONSTEXPR control_block(Args... args) : content(std::forward(args)...) {} + size_t ref_count = 1; + T content; + }; + + control_block* block = nullptr; + + template + friend TEST_CONSTEXPR_CXX20 thread_unsafe_shared_ptr make_thread_unsafe_shared(Args...); +}; + +template +TEST_CONSTEXPR_CXX20 thread_unsafe_shared_ptr make_thread_unsafe_shared(Args... args) { + typedef typename thread_unsafe_shared_ptr::control_block control_block_type; + typedef std::allocator_traits > allocator_traits; + + thread_unsafe_shared_ptr ptr; + std::allocator alloc; + ptr.block = allocator_traits::allocate(alloc, 1); + allocator_traits::construct(alloc, ptr.block, std::forward(args)...); + + return ptr; +} +} // namespace detail + template class limited_allocator { template friend class limited_allocator; typedef limited_alloc_handle BuffT; - std::shared_ptr handle_; + detail::thread_unsafe_shared_ptr handle_; public: typedef T value_type; @@ -417,29 +521,28 @@ class limited_allocator { typedef limited_allocator other; }; - limited_allocator() : handle_(new BuffT) {} + TEST_CONSTEXPR_CXX20 limited_allocator() : handle_(detail::make_thread_unsafe_shared()) {} - limited_allocator(limited_allocator const& other) : handle_(other.handle_) {} + limited_allocator(limited_allocator const&) = default; template - explicit limited_allocator(limited_allocator const& other) : handle_(other.handle_) {} + TEST_CONSTEXPR explicit limited_allocator(limited_allocator const& other) : handle_(other.handle_) {} limited_allocator& operator=(const limited_allocator&) = delete; - pointer allocate(size_type n) { return handle_->template allocate(n); } - void deallocate(pointer p, size_type n) { handle_->deallocate(p, n); } - size_type max_size() const { return N; } - - BuffT* getHandle() const { return handle_.get(); } + TEST_CONSTEXPR_CXX20 pointer allocate(size_type n) { return handle_->template allocate(n); } + TEST_CONSTEXPR_CXX20 void deallocate(pointer p, size_type n) { handle_->template deallocate(p, n); } + TEST_CONSTEXPR size_type max_size() const { return N; } + TEST_CONSTEXPR BuffT* getHandle() const { return handle_.get(); } }; template -inline bool operator==(limited_allocator const& LHS, limited_allocator const& RHS) { +TEST_CONSTEXPR inline bool operator==(limited_allocator const& LHS, limited_allocator const& RHS) { return LHS.getHandle() == RHS.getHandle(); } template -inline bool operator!=(limited_allocator const& LHS, limited_allocator const& RHS) { +TEST_CONSTEXPR inline bool operator!=(limited_allocator const& LHS, limited_allocator const& RHS) { return !(LHS == RHS); } diff --git a/libcxx/test/support/test_comparisons.h b/libcxx/test/support/test_comparisons.h index 9d666545abdf8..90440f4b18d61 100644 --- a/libcxx/test/support/test_comparisons.h +++ b/libcxx/test/support/test_comparisons.h @@ -176,13 +176,13 @@ void AssertComparisons2ConvertibleToBool() struct LessAndEqComp { int value; - LessAndEqComp(int v) : value(v) {} + TEST_CONSTEXPR_CXX14 LessAndEqComp(int v) : value(v) {} - friend bool operator<(const LessAndEqComp& lhs, const LessAndEqComp& rhs) { + friend TEST_CONSTEXPR_CXX14 bool operator<(const LessAndEqComp& lhs, const LessAndEqComp& rhs) { return lhs.value < rhs.value; } - friend bool operator==(const LessAndEqComp& lhs, const LessAndEqComp& rhs) { + friend TEST_CONSTEXPR_CXX14 bool operator==(const LessAndEqComp& lhs, const LessAndEqComp& rhs) { return lhs.value == rhs.value; } }; diff --git a/libcxx/utils/ci/Dockerfile b/libcxx/utils/ci/Dockerfile index 6e1d2bf4cd8d1..653fef0620db0 100644 --- a/libcxx/utils/ci/Dockerfile +++ b/libcxx/utils/ci/Dockerfile @@ -37,7 +37,7 @@ ENV DEBIAN_FRONTEND=noninteractive # CI builders to rebuild their copy of the Docker image. This is not a great # solution, however without that, the CI builders will keep the same cached # Docker image forever. -RUN echo 1 +RUN echo 2 RUN apt-get update && apt-get install -y bash curl @@ -91,7 +91,7 @@ WORKDIR /home/libcxx-builder # Install the Buildkite agent and dependencies. This must be done as non-root # for the Buildkite agent to be installed in a path where we can find it. -RUN bash -c "$(curl -sL https://raw.githubusercontent.com/buildkite/agent/master/install.sh)" +RUN bash -c "$(curl -sL https://raw.githubusercontent.com/buildkite/agent/main/install.sh)" ENV PATH="${PATH}:/home/libcxx-builder/.buildkite-agent/bin" RUN echo "tags=\"queue=libcxx-builders,arch=$(uname -m),os=linux\"" >> "/home/libcxx-builder/.buildkite-agent/buildkite-agent.cfg" diff --git a/libcxx/utils/ci/buildkite-pipeline.yml b/libcxx/utils/ci/buildkite-pipeline.yml index 924bf2de4e06f..df9f23df82a6b 100644 --- a/libcxx/utils/ci/buildkite-pipeline.yml +++ b/libcxx/utils/ci/buildkite-pipeline.yml @@ -680,3 +680,37 @@ steps: - exit_status: -1 # Agent was lost limit: 2 timeout_in_minutes: 120 + + - label: "AIX (32-bit)" + command: "libcxx/utils/ci/run-buildbot aix" + artifact_paths: + - "**/test-results.xml" + retry: + automatic: + - exit_status: -1 # Agent was lost + limit: 2 + env: + OBJECT_MODE: "32" + agents: + queue: libcxx-builders + os: aix + retry: + automatic: + - exit_status: -1 # Agent was lost + limit: 2 + timeout_in_minutes: 120 + + - label: "AIX (64-bit)" + command: "libcxx/utils/ci/run-buildbot aix" + artifact_paths: + - "**/test-results.xml" + env: + OBJECT_MODE: "64" + agents: + queue: libcxx-builders + os: aix + retry: + automatic: + - exit_status: -1 # Agent was lost + limit: 2 + timeout_in_minutes: 120 diff --git a/libcxx/utils/ci/run-buildbot-container b/libcxx/utils/ci/run-buildbot-container index 30e8b181bf92c..5d975ebfc4f4e 100755 --- a/libcxx/utils/ci/run-buildbot-container +++ b/libcxx/utils/ci/run-buildbot-container @@ -12,6 +12,12 @@ # state in `/llvm` is shared between the container and the host machine, which # is useful for editing files on the host machine and re-running the build bot # in the container. +# +# If you are on Linux you will likely not be able to write to the mount because +# the user in the container doesn't have permissions to do so. +# If you need to do this, give that user permission to do so after running +# the container or add this flag to run the container as your local user IDs: +# --user $(id -u):$(id -g) set -e @@ -21,4 +27,4 @@ if [[ ! -d "${MONOREPO_ROOT}/libcxx/utils/ci" ]]; then exit 1 fi docker pull ldionne/libcxx-builder -docker run -it --volume "${MONOREPO_ROOT}:/llvm" --workdir "/llvm" ldionne/libcxx-builder bash +docker run -it --volume "${MONOREPO_ROOT}:/llvm" --workdir "/llvm" --cap-add=SYS_PTRACE ldionne/libcxx-builder bash diff --git a/libcxxabi/test/test_exception_storage.pass.cpp b/libcxxabi/test/test_exception_storage.pass.cpp index 460d27b78ea79..5a68be7ec4174 100644 --- a/libcxxabi/test/test_exception_storage.pass.cpp +++ b/libcxxabi/test/test_exception_storage.pass.cpp @@ -44,9 +44,7 @@ size_t thread_globals [ NUMTHREADS ] = { 0 }; std::__libcpp_thread_t threads [ NUMTHREADS ]; #endif -int main () { - int retVal = 0; - +int main() { #ifndef _LIBCXXABI_HAS_NO_THREADS // Make the threads, let them run, and wait for them to finish for ( int i = 0; i < NUMTHREADS; ++i ) @@ -54,6 +52,7 @@ int main () { for ( int i = 0; i < NUMTHREADS; ++i ) std::__libcpp_thread_join ( &threads [ i ] ); + int retVal = 0; for ( int i = 0; i < NUMTHREADS; ++i ) { if ( 0 == thread_globals [ i ] ) { std::printf("Thread #%d had a zero global\n", i); @@ -68,12 +67,11 @@ int main () { retVal = 2; } } + return retVal; #else // _LIBCXXABI_HAS_NO_THREADS size_t thread_globals; + thread_code(&thread_globals); // Check that __cxa_get_globals() is not NULL. - if (thread_code(&thread_globals) == 0) { - retVal = 1; - } + return (thread_globals == 0) ? 1 : 0; #endif // !_LIBCXXABI_HAS_NO_THREADS - return retVal; } diff --git a/libunwind/CMakeLists.txt b/libunwind/CMakeLists.txt index f16d49a8fb8a4..bec0d1f1f7c9e 100644 --- a/libunwind/CMakeLists.txt +++ b/libunwind/CMakeLists.txt @@ -23,7 +23,11 @@ set(LIBUNWIND_LIBCXX_PATH "${CMAKE_CURRENT_LIST_DIR}/../libcxx" CACHE PATH "Specify path to libc++ source.") if (CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR OR LIBUNWIND_STANDALONE_BUILD) - project(libunwind LANGUAGES C CXX ASM) + # We may have an incomplete toolchain - do language support tests without + # linking. + include(EnableLanguageNolink) + project(libunwind LANGUAGES NONE) + llvm_enable_language_nolink(C CXX ASM) set(PACKAGE_NAME libunwind) set(PACKAGE_VERSION 14.0.0git) @@ -179,6 +183,14 @@ include(HandleLibunwindFlags) # Get required flags. add_target_flags_if(LIBUNWIND_BUILD_32_BITS "-m32") +# Compiler tests may be failing if the compiler implicitly links in libunwind, +# which doesn't exist yet. This gets waived by --unwindlib=none +# later in config-ix below, but the tests for --target etc before that may +# be failing due to it. Only test compilation, not linking, for these +# tests here now. +set(CMAKE_TRY_COMPILE_TARGET_TYPE_ORIG ${CMAKE_TRY_COMPILE_TARGET_TYPE}) +set(CMAKE_TRY_COMPILE_TARGET_TYPE STATIC_LIBRARY) + if(LIBUNWIND_TARGET_TRIPLE) add_target_flags_if_supported("--target=${LIBUNWIND_TARGET_TRIPLE}") endif() @@ -188,6 +200,7 @@ endif() if(LIBUNWIND_SYSROOT) add_target_flags_if_supported("--sysroot=${LIBUNWIND_SYSROOT}") endif() +set(CMAKE_TRY_COMPILE_TARGET_TYPE ${CMAKE_TRY_COMPILE_TARGET_TYPE_ORIG}) # Configure compiler. include(config-ix) diff --git a/libunwind/cmake/config-ix.cmake b/libunwind/cmake/config-ix.cmake index 78f116c15e0a3..ec1073395859f 100644 --- a/libunwind/cmake/config-ix.cmake +++ b/libunwind/cmake/config-ix.cmake @@ -2,9 +2,18 @@ include(CMakePushCheckState) include(CheckCCompilerFlag) include(CheckCXXCompilerFlag) include(CheckLibraryExists) +include(CheckLinkerFlag) include(CheckSymbolExists) include(CheckCSourceCompiles) +# The compiler driver may be implicitly trying to link against libunwind, which +# might not work if libunwind doesn't exist yet. Try to check if +# --unwindlib=none is supported, and use that if possible. +llvm_check_linker_flag("--unwindlib=none" LIBUNWIND_SUPPORTS_UNWINDLIB_NONE_FLAG) +if (LIBUNWIND_SUPPORTS_UNWINDLIB_NONE_FLAG) + set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} --unwindlib=none") +endif() + check_library_exists(c fopen "" LIBUNWIND_HAS_C_LIB) if (NOT LIBUNWIND_USE_COMPILER_RT) @@ -25,11 +34,11 @@ endif() # required for the link to go through. We remove sanitizers from the # configuration checks to avoid spurious link errors. -check_c_compiler_flag(-nostdlib++ LIBUNWIND_SUPPORTS_NOSTDLIBXX_FLAG) +llvm_check_linker_flag(-nostdlib++ LIBUNWIND_SUPPORTS_NOSTDLIBXX_FLAG) if (LIBUNWIND_SUPPORTS_NOSTDLIBXX_FLAG) set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} -nostdlib++") else() - check_c_compiler_flag(-nodefaultlibs LIBUNWIND_SUPPORTS_NODEFAULTLIBS_FLAG) + llvm_check_linker_flag(-nodefaultlibs LIBUNWIND_SUPPORTS_NODEFAULTLIBS_FLAG) if (LIBUNWIND_SUPPORTS_NODEFAULTLIBS_FLAG) set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} -nodefaultlibs") endif() diff --git a/libunwind/src/CMakeLists.txt b/libunwind/src/CMakeLists.txt index ce3217fa805d9..3b20e97c856ac 100644 --- a/libunwind/src/CMakeLists.txt +++ b/libunwind/src/CMakeLists.txt @@ -83,6 +83,7 @@ if (LIBUNWIND_ENABLE_THREADS) endif() # Setup flags. +add_link_flags_if(LIBUNWIND_SUPPORTS_UNWINDLIB_NONE_FLAG --unwindlib=none) if (LIBUNWIND_SUPPORTS_NOSTDLIBXX_FLAG) add_link_flags_if_supported(-nostdlib++) else() diff --git a/lld/COFF/LTO.cpp b/lld/COFF/LTO.cpp index 505360663f4f9..f117b62192c84 100644 --- a/lld/COFF/LTO.cpp +++ b/lld/COFF/LTO.cpp @@ -164,7 +164,7 @@ std::vector BitcodeCompiler::compile(COFFLinkerContext &ctx) { // The /lldltocache option specifies the path to a directory in which to cache // native object files for ThinLTO incremental builds. If a path was // specified, configure LTO to use it as the cache directory. - NativeObjectCache cache; + FileCache cache; if (!config->ltoCache.empty()) cache = check(localCache("ThinLTO", "Thin", config->ltoCache, @@ -174,7 +174,7 @@ std::vector BitcodeCompiler::compile(COFFLinkerContext &ctx) { checkError(ltoObj->run( [&](size_t task) { - return std::make_unique( + return std::make_unique( std::make_unique(buf[task])); }, cache)); diff --git a/lld/Common/ErrorHandler.cpp b/lld/Common/ErrorHandler.cpp index c8c5ffed71268..399b6cac75470 100644 --- a/lld/Common/ErrorHandler.cpp +++ b/lld/Common/ErrorHandler.cpp @@ -192,12 +192,12 @@ void ErrorHandler::log(const Twine &msg) { reportDiagnostic(logName, Colors::RESET, "", msg); } -void ErrorHandler::message(const Twine &msg) { +void ErrorHandler::message(const Twine &msg, llvm::raw_ostream &s) { if (disableOutput) return; std::lock_guard lock(mu); - lld::outs() << msg << "\n"; - lld::outs().flush(); + s << msg << "\n"; + s.flush(); } void ErrorHandler::warn(const Twine &msg) { diff --git a/lld/ELF/LTO.cpp b/lld/ELF/LTO.cpp index 5f206fc97b3ca..a42d216e4e778 100644 --- a/lld/ELF/LTO.cpp +++ b/lld/ELF/LTO.cpp @@ -304,7 +304,7 @@ std::vector BitcodeCompiler::compile() { // The --thinlto-cache-dir option specifies the path to a directory in which // to cache native object files for ThinLTO incremental builds. If a path was // specified, configure LTO to use it as the cache directory. - NativeObjectCache cache; + FileCache cache; if (!config->thinLTOCacheDir.empty()) cache = check(localCache("ThinLTO", "Thin", config->thinLTOCacheDir, @@ -315,7 +315,7 @@ std::vector BitcodeCompiler::compile() { if (!bitcodeFiles.empty()) checkError(ltoObj->run( [&](size_t task) { - return std::make_unique( + return std::make_unique( std::make_unique(buf[task])); }, cache)); diff --git a/lld/ELF/Relocations.cpp b/lld/ELF/Relocations.cpp index a8f103b3c86a9..e14b21bc123bc 100644 --- a/lld/ELF/Relocations.cpp +++ b/lld/ELF/Relocations.cpp @@ -224,7 +224,7 @@ static bool isRelExpr(RelExpr expr) { static bool isStaticLinkTimeConstant(RelExpr e, RelType type, const Symbol &sym, InputSectionBase &s, uint64_t relOff) { // These expressions always compute a constant - if (oneofadjustTlsExpr(type, R_RELAX_TLS_LD_TO_LE), - type, offset, addend, &sym}); + if (expr == R_DTPREL) { + if (toExecRelax) + expr = target->adjustTlsExpr(type, R_RELAX_TLS_LD_TO_LE); + c.relocations.push_back({expr, type, offset, addend, &sym}); return 1; } diff --git a/lld/MachO/ConcatOutputSection.cpp b/lld/MachO/ConcatOutputSection.cpp index 5ab49f77b150a..17da4d045585d 100644 --- a/lld/MachO/ConcatOutputSection.cpp +++ b/lld/MachO/ConcatOutputSection.cpp @@ -353,7 +353,7 @@ void ConcatOutputSection::writeTo(uint8_t *buf) const { size_t i = 0, ie = inputs.size(); size_t t = 0, te = thunks.size(); while (i < ie || t < te) { - while (i < ie && (t == te || inputs[i]->getSize() == 0 || + while (i < ie && (t == te || inputs[i]->empty() || inputs[i]->outSecOff < thunks[t]->outSecOff)) { inputs[i]->writeTo(buf + inputs[i]->outSecOff); ++i; diff --git a/lld/MachO/Config.h b/lld/MachO/Config.h index 698839895ba61..090eb7a28cf9a 100644 --- a/lld/MachO/Config.h +++ b/lld/MachO/Config.h @@ -148,6 +148,7 @@ struct Configuration { bool deadStripDylibs = false; bool demangle = false; bool deadStrip = false; + bool errorForArchMismatch = false; PlatformInfo platformInfo; NamespaceKind namespaceKind = NamespaceKind::twolevel; UndefinedSymbolTreatment undefinedSymbolTreatment = diff --git a/lld/MachO/Driver.cpp b/lld/MachO/Driver.cpp index 528e78055728d..861ba97066754 100644 --- a/lld/MachO/Driver.cpp +++ b/lld/MachO/Driver.cpp @@ -80,19 +80,39 @@ static HeaderFileType getOutputType(const InputArgList &args) { } } +static DenseMap resolvedLibraries; static Optional findLibrary(StringRef name) { - if (config->searchDylibsFirst) { - if (Optional path = findPathCombination( - "lib" + name, config->librarySearchPaths, {".tbd", ".dylib"})) - return path; + CachedHashStringRef key(name); + auto entry = resolvedLibraries.find(key); + if (entry != resolvedLibraries.end()) + return entry->second; + + auto doFind = [&] { + if (config->searchDylibsFirst) { + if (Optional path = findPathCombination( + "lib" + name, config->librarySearchPaths, {".tbd", ".dylib"})) + return path; + return findPathCombination("lib" + name, config->librarySearchPaths, + {".a"}); + } return findPathCombination("lib" + name, config->librarySearchPaths, - {".a"}); - } - return findPathCombination("lib" + name, config->librarySearchPaths, - {".tbd", ".dylib", ".a"}); + {".tbd", ".dylib", ".a"}); + }; + + Optional path = doFind(); + if (path) + resolvedLibraries[key] = *path; + + return path; } +static DenseMap resolvedFrameworks; static Optional findFramework(StringRef name) { + CachedHashStringRef key(name); + auto entry = resolvedFrameworks.find(key); + if (entry != resolvedFrameworks.end()) + return entry->second; + SmallString<260> symlink; StringRef suffix; std::tie(name, suffix) = name.split(","); @@ -108,13 +128,13 @@ static Optional findFramework(StringRef name) { // only append suffix if realpath() succeeds Twine suffixed = location + suffix; if (fs::exists(suffixed)) - return saver.save(suffixed.str()); + return resolvedFrameworks[key] = saver.save(suffixed.str()); } // Suffix lookup failed, fall through to the no-suffix case. } if (Optional path = resolveDylibPath(symlink.str())) - return path; + return resolvedFrameworks[key] = *path; } return {}; } @@ -174,7 +194,7 @@ static std::vector getSystemLibraryRoots(InputArgList &args) { for (const Arg *arg : args.filtered(OPT_syslibroot)) roots.push_back(arg->getValue()); // NOTE: the final `-syslibroot` being `/` will ignore all roots - if (roots.size() && roots.back() == "/") + if (!roots.empty() && roots.back() == "/") roots.clear(); // NOTE: roots can never be empty - add an empty root to simplify the library // and framework search path computation. @@ -206,7 +226,9 @@ static llvm::CachePruningPolicy getLTOCachePolicy(InputArgList &args) { args.filtered(OPT_thinlto_cache_policy, OPT_prune_interval_lto, OPT_prune_after_lto, OPT_max_relative_cache_size_lto)) { switch (arg->getOption().getID()) { - case OPT_thinlto_cache_policy: add(arg->getValue()); break; + case OPT_thinlto_cache_policy: + add(arg->getValue()); + break; case OPT_prune_interval_lto: if (!strcmp("-1", arg->getValue())) add("prune_interval=87600h"); // 10 years @@ -374,9 +396,10 @@ static void addFramework(StringRef name, bool isNeeded, bool isWeak, } // Parses LC_LINKER_OPTION contents, which can add additional command line -// flags. +// flags. This directly parses the flags instead of using the standard argument +// parser to improve performance. void macho::parseLCLinkerOption(InputFile *f, unsigned argc, StringRef data) { - SmallVector argv; + SmallVector argv; size_t offset = 0; for (unsigned i = 0; i < argc && offset < data.size(); ++i) { argv.push_back(data.data() + offset); @@ -385,32 +408,20 @@ void macho::parseLCLinkerOption(InputFile *f, unsigned argc, StringRef data) { if (argv.size() != argc || offset > data.size()) fatal(toString(f) + ": invalid LC_LINKER_OPTION"); - MachOOptTable table; - unsigned missingIndex, missingCount; - InputArgList args = table.ParseArgs(argv, missingIndex, missingCount); - if (missingCount) - fatal(Twine(args.getArgString(missingIndex)) + ": missing argument"); - for (const Arg *arg : args.filtered(OPT_UNKNOWN)) - error("unknown argument: " + arg->getAsString(args)); - - for (const Arg *arg : args) { - switch (arg->getOption().getID()) { - case OPT_l: { - StringRef name = arg->getValue(); - ForceLoad forceLoadArchive = - config->forceLoadSwift && name.startswith("swift") ? ForceLoad::Yes - : ForceLoad::No; - addLibrary(name, /*isNeeded=*/false, /*isWeak=*/false, - /*isReexport=*/false, /*isExplicit=*/false, forceLoadArchive); - break; - } - case OPT_framework: - addFramework(arg->getValue(), /*isNeeded=*/false, /*isWeak=*/false, - /*isReexport=*/false, /*isExplicit=*/false, ForceLoad::No); - break; - default: - error(arg->getSpelling() + " is not allowed in LC_LINKER_OPTION"); - } + unsigned i = 0; + StringRef arg = argv[i]; + if (arg.consume_front("-l")) { + ForceLoad forceLoadArchive = + config->forceLoadSwift && arg.startswith("swift") ? ForceLoad::Yes + : ForceLoad::No; + addLibrary(arg, /*isNeeded=*/false, /*isWeak=*/false, + /*isReexport=*/false, /*isExplicit=*/false, forceLoadArchive); + } else if (arg == "-framework") { + StringRef name = argv[++i]; + addFramework(name, /*isNeeded=*/false, /*isWeak=*/false, + /*isReexport=*/false, /*isExplicit=*/false, ForceLoad::No); + } else { + error(arg + " is not allowed in LC_LINKER_OPTION"); } } @@ -758,6 +769,8 @@ static void warnIfUnimplementedOption(const Option &opt) { case OPT_grp_ignored: warn("Option `" + opt.getPrefixedName() + "' is ignored."); break; + case OPT_grp_ignored_silently: + break; default: warn("Option `" + opt.getPrefixedName() + "' is not yet implemented. Stay tuned..."); @@ -1074,6 +1087,9 @@ bool macho::link(ArrayRef argsArr, bool canExitEarly, errorHandler().cleanupCallback = []() { freeArena(); + resolvedFrameworks.clear(); + resolvedLibraries.clear(); + cachedReads.clear(); concatOutputSections.clear(); inputFiles.clear(); inputSections.clear(); @@ -1202,6 +1218,7 @@ bool macho::link(ArrayRef argsArr, bool canExitEarly, config->printWhyLoad = args.hasArg(OPT_why_load); config->omitDebugInfo = args.hasArg(OPT_S); config->outputType = getOutputType(args); + config->errorForArchMismatch = args.hasArg(OPT_arch_errors_fatal); if (const Arg *arg = args.getLastArg(OPT_bundle_loader)) { if (config->outputType != MH_BUNDLE) error("-bundle_loader can only be used with MachO bundle output"); @@ -1362,15 +1379,17 @@ bool macho::link(ArrayRef argsArr, bool canExitEarly, config->platform() == PlatformKind::macOS); if (args.hasArg(OPT_v)) { - message(getLLDVersion()); + message(getLLDVersion(), lld::errs()); message(StringRef("Library search paths:") + - (config->librarySearchPaths.empty() - ? "" - : "\n\t" + join(config->librarySearchPaths, "\n\t"))); + (config->librarySearchPaths.empty() + ? "" + : "\n\t" + join(config->librarySearchPaths, "\n\t")), + lld::errs()); message(StringRef("Framework search paths:") + - (config->frameworkSearchPaths.empty() - ? "" - : "\n\t" + join(config->frameworkSearchPaths, "\n\t"))); + (config->frameworkSearchPaths.empty() + ? "" + : "\n\t" + join(config->frameworkSearchPaths, "\n\t")), + lld::errs()); } config->progName = argsArr[0]; diff --git a/lld/MachO/InputFiles.cpp b/lld/MachO/InputFiles.cpp index 1f4313dd296f5..361f24a8531b6 100644 --- a/lld/MachO/InputFiles.cpp +++ b/lld/MachO/InputFiles.cpp @@ -173,8 +173,19 @@ static bool checkCompatibility(const InputFile *input) { return true; } +// This cache mostly exists to store system libraries (and .tbds) as they're +// loaded, rather than the input archives, which are already cached at a higher +// level, and other files like the filelist that are only read once. +// Theoretically this caching could be more efficient by hoisting it, but that +// would require altering many callers to track the state. +DenseMap macho::cachedReads; // Open a given file path and return it as a memory-mapped file. Optional macho::readFile(StringRef path) { + CachedHashStringRef key(path); + auto entry = cachedReads.find(key); + if (entry != cachedReads.end()) + return entry->second; + ErrorOr> mbOrErr = MemoryBuffer::getFile(path); if (std::error_code ec = mbOrErr.getError()) { error("cannot open " + path + ": " + ec.message()); @@ -192,7 +203,7 @@ Optional macho::readFile(StringRef path) { read32be(&hdr->magic) != FAT_MAGIC) { if (tar) tar->append(relativeToRoot(path), mbref.getBuffer()); - return mbref; + return cachedReads[key] = mbref; } // Object files and archive files may be fat files, which contain multiple @@ -217,7 +228,8 @@ Optional macho::readFile(StringRef path) { error(path + ": slice extends beyond end of file"); if (tar) tar->append(relativeToRoot(path), mbref.getBuffer()); - return MemoryBufferRef(StringRef(buf + offset, size), path.copy(bAlloc)); + return cachedReads[key] = MemoryBufferRef(StringRef(buf + offset, size), + path.copy(bAlloc)); } error("unable to find matching architecture in " + path); @@ -269,7 +281,7 @@ void ObjFile::parseSections(ArrayRef
sections) { auto splitRecords = [&](int recordSize) -> void { subsections.push_back({}); - if (data.size() == 0) + if (data.empty()) return; SubsectionMap &subsecMap = subsections.back(); @@ -619,8 +631,7 @@ macho::Symbol *ObjFile::parseNonSectionSymbol(const NList &sym, } } -template -static bool isUndef(const NList &sym) { +template static bool isUndef(const NList &sym) { return (sym.n_type & N_TYPE) == N_UNDF && sym.n_value == 0; } @@ -790,9 +801,12 @@ template void ObjFile::parse() { Architecture arch = getArchitectureFromCpuType(hdr->cputype, hdr->cpusubtype); if (arch != config->arch()) { - error(toString(this) + " has architecture " + getArchitectureName(arch) + - " which is incompatible with target architecture " + - getArchitectureName(config->arch())); + auto msg = config->errorForArchMismatch + ? static_cast(error) + : warn; + msg(toString(this) + " has architecture " + getArchitectureName(arch) + + " which is incompatible with target architecture " + + getArchitectureName(config->arch())); return; } @@ -1211,7 +1225,7 @@ DylibFile::DylibFile(const InterfaceFile &interface, DylibFile *umbrella, void DylibFile::parseReexports(const InterfaceFile &interface) { const InterfaceFile *topLevel = interface.getParent() == nullptr ? &interface : interface.getParent(); - for (InterfaceFileRef intfRef : interface.reexportedLibraries()) { + for (const InterfaceFileRef &intfRef : interface.reexportedLibraries()) { InterfaceFile::const_target_range targets = intfRef.targets(); if (is_contained(skipPlatformChecks, intfRef.getInstallName()) || is_contained(targets, config->platformInfo.target)) diff --git a/lld/MachO/InputFiles.h b/lld/MachO/InputFiles.h index e51e5d557d3b3..e5f12ad9c6d98 100644 --- a/lld/MachO/InputFiles.h +++ b/lld/MachO/InputFiles.h @@ -14,6 +14,7 @@ #include "lld/Common/LLVM.h" #include "lld/Common/Memory.h" +#include "llvm/ADT/CachedHashString.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/SetVector.h" #include "llvm/BinaryFormat/MachO.h" @@ -211,6 +212,7 @@ class BitcodeFile final : public InputFile { }; extern llvm::SetVector inputFiles; +extern llvm::DenseMap cachedReads; llvm::Optional readFile(StringRef path); diff --git a/lld/MachO/InputSection.cpp b/lld/MachO/InputSection.cpp index 879f7da1c45c5..ebb4d204d9acc 100644 --- a/lld/MachO/InputSection.cpp +++ b/lld/MachO/InputSection.cpp @@ -110,7 +110,7 @@ void ConcatInputSection::foldIdentical(ConcatInputSection *copy) { copy->symbols.clear(); // Remove duplicate compact unwind info for symbols at the same address. - if (symbols.size() == 0) + if (symbols.empty()) return; it = symbols.begin(); uint64_t v = (*it)->value; diff --git a/lld/MachO/InputSection.h b/lld/MachO/InputSection.h index 85036135570aa..621cea5073b9b 100644 --- a/lld/MachO/InputSection.h +++ b/lld/MachO/InputSection.h @@ -38,6 +38,7 @@ class InputSection { Kind kind() const { return shared->sectionKind; } virtual ~InputSection() = default; virtual uint64_t getSize() const { return data.size(); } + virtual bool empty() const { return data.empty(); } InputFile *getFile() const { return shared->file; } StringRef getName() const { return shared->name; } StringRef getSegName() const { return shared->segname; } @@ -115,7 +116,7 @@ class ConcatInputSection final : public InputSection { // ConcatInputSections are entirely live or dead, so the offset is irrelevant. bool isLive(uint64_t off) const override { return live; } void markLive(uint64_t off) override { live = true; } - bool isCoalescedWeak() const { return wasCoalesced && symbols.size() == 0; } + bool isCoalescedWeak() const { return wasCoalesced && symbols.empty(); } bool shouldOmitFromOutput() const { return !live || isCoalescedWeak(); } bool isHashableForICF() const; void hashForICF(); diff --git a/lld/MachO/LTO.cpp b/lld/MachO/LTO.cpp index d1eef6a6f8f82..c71ea33d28965 100644 --- a/lld/MachO/LTO.cpp +++ b/lld/MachO/LTO.cpp @@ -105,7 +105,7 @@ std::vector BitcodeCompiler::compile() { // The -cache_path_lto option specifies the path to a directory in which // to cache native object files for ThinLTO incremental builds. If a path was // specified, configure LTO to use it as the cache directory. - NativeObjectCache cache; + FileCache cache; if (!config->thinLTOCacheDir.empty()) cache = check(localCache("ThinLTO", "Thin", config->thinLTOCacheDir, @@ -115,7 +115,7 @@ std::vector BitcodeCompiler::compile() { checkError(ltoObj->run( [&](size_t task) { - return std::make_unique( + return std::make_unique( std::make_unique(buf[task])); }, cache)); diff --git a/lld/MachO/MarkLive.cpp b/lld/MachO/MarkLive.cpp index 4b6e2315b942c..e4aba8fb90d42 100644 --- a/lld/MachO/MarkLive.cpp +++ b/lld/MachO/MarkLive.cpp @@ -96,8 +96,7 @@ void markLive() { } // -u symbols for (Symbol *sym : config->explicitUndefineds) - if (auto *defined = dyn_cast(sym)) - addSym(defined); + addSym(sym); // local symbols explicitly marked .no_dead_strip for (const InputFile *file : inputFiles) if (auto *objFile = dyn_cast(file)) diff --git a/lld/MachO/Options.td b/lld/MachO/Options.td index 39212597e31fb..f4eb302a95b77 100644 --- a/lld/MachO/Options.td +++ b/lld/MachO/Options.td @@ -862,7 +862,6 @@ def no_arch_warnings : Flag<["-"], "no_arch_warnings">, Group; def arch_errors_fatal : Flag<["-"], "arch_errors_fatal">, HelpText<"Escalate to errors any warnings about inputs whose architecture does not match the -arch option">, - Flags<[HelpHidden]>, Group; def e : Separate<["-"], "e">, MetaVarName<"">, @@ -946,6 +945,9 @@ def mllvm : Separate<["-"], "mllvm">, def mcpu : Separate<["-"], "mcpu">, HelpText<"Processor family target for LTO code generation">, Group; +def no_dtrace_dof : Flag<["-"], "no_dtrace_dof">, + HelpText<"Disable dtrace-dof processing (default).">, + Group; def grp_deprecated : OptionGroup<"deprecated">, HelpText<"DEPRECATED">; @@ -1271,18 +1273,10 @@ def no_compact_unwind : Flag<["-"], "no_compact_unwind">, HelpText<"This option is undocumented in ld64">, Flags<[HelpHidden]>, Group; -def no_dtrace_dof : Flag<["-"], "no_dtrace_dof">, - HelpText<"This option is undocumented in ld64">, - Flags<[HelpHidden]>, - Group; def no_new_main : Flag<["-"], "no_new_main">, HelpText<"This option is undocumented in ld64">, Flags<[HelpHidden]>, Group; -def objc_abi_version : Separate<["-"], "objc_abi_version">, - HelpText<"This option is undocumented in ld64">, - Flags<[HelpHidden]>, - Group; def pause : Flag<["-"], "pause">, HelpText<"This option is undocumented in ld64">, Flags<[HelpHidden]>, @@ -1326,3 +1320,10 @@ def new_linker : Flag<["-"], "new_linker">, HelpText<"This option is ignored in ld64">, Flags<[HelpHidden]>, Group; + +def grp_ignored_silently : OptionGroup<"ignored_silently">, HelpText<"IGNORED SILENTLY">; + +def objc_abi_version : Separate<["-"], "objc_abi_version">, + HelpText<"This option only applies to i386 in ld64">, + Flags<[HelpHidden]>, + Group; diff --git a/lld/MachO/UnwindInfoSection.cpp b/lld/MachO/UnwindInfoSection.cpp index dca537493de49..f3f7fdbcd0f41 100644 --- a/lld/MachO/UnwindInfoSection.cpp +++ b/lld/MachO/UnwindInfoSection.cpp @@ -602,7 +602,7 @@ void UnwindInfoSectionImpl::writeTo(uint8_t *buf) const { *ep++ = (it->second << COMPRESSED_ENTRY_FUNC_OFFSET_BITS) | (cuep->functionAddress - functionAddressBase); } - if (page.localEncodings.size() != 0) + if (!page.localEncodings.empty()) memcpy(ep, page.localEncodings.data(), page.localEncodings.size() * sizeof(uint32_t)); } else { diff --git a/lld/docs/WebAssembly.rst b/lld/docs/WebAssembly.rst index c01df99cddb96..89690d800588b 100644 --- a/lld/docs/WebAssembly.rst +++ b/lld/docs/WebAssembly.rst @@ -205,6 +205,6 @@ Missing features supported. - No support for creating shared libraries. The spec for shared libraries in WebAssembly is still in flux: - https://github.com/WebAssembly/tool-conventions/blob/master/DynamicLinking.md + https://github.com/WebAssembly/tool-conventions/blob/main/DynamicLinking.md -.. _linking: https://github.com/WebAssembly/tool-conventions/blob/master/Linking.md +.. _linking: https://github.com/WebAssembly/tool-conventions/blob/main/Linking.md diff --git a/lld/include/lld/Common/ErrorHandler.h b/lld/include/lld/Common/ErrorHandler.h index 04602fad6d516..d95a2537c1f2c 100644 --- a/lld/include/lld/Common/ErrorHandler.h +++ b/lld/include/lld/Common/ErrorHandler.h @@ -109,7 +109,7 @@ class ErrorHandler { void error(const Twine &msg, ErrorTag tag, ArrayRef args); [[noreturn]] void fatal(const Twine &msg); void log(const Twine &msg); - void message(const Twine &msg); + void message(const Twine &msg, llvm::raw_ostream &s); void warn(const Twine &msg); void reset() { @@ -137,7 +137,9 @@ inline void error(const Twine &msg, ErrorTag tag, ArrayRef args) { } [[noreturn]] inline void fatal(const Twine &msg) { errorHandler().fatal(msg); } inline void log(const Twine &msg) { errorHandler().log(msg); } -inline void message(const Twine &msg) { errorHandler().message(msg); } +inline void message(const Twine &msg, llvm::raw_ostream &s = outs()) { + errorHandler().message(msg, s); +} inline void warn(const Twine &msg) { errorHandler().warn(msg); } inline uint64_t errorCount() { return errorHandler().errorCount; } diff --git a/lld/test/ELF/pack-dyn-relocs-arm2.s b/lld/test/ELF/pack-dyn-relocs-arm2.s index 0648edba217fb..cf2cd8bb597ec 100644 --- a/lld/test/ELF/pack-dyn-relocs-arm2.s +++ b/lld/test/ELF/pack-dyn-relocs-arm2.s @@ -45,7 +45,7 @@ // CHECK-NEXT: } // RUN: llvm-readobj -S --dynamic-table %t.exe | FileCheck --check-prefix=HEADER %s -// HEADER: 0x00000023 RELRSZ 0xC +// HEADER: 0x00000023 RELRSZ 12 (bytes) .data .align 2 diff --git a/lld/test/ELF/pack-dyn-relocs.s b/lld/test/ELF/pack-dyn-relocs.s index 220addac24518..eed26d824906e 100644 --- a/lld/test/ELF/pack-dyn-relocs.s +++ b/lld/test/ELF/pack-dyn-relocs.s @@ -137,8 +137,8 @@ // RELR32-HEADERS-NEXT: EntrySize: 4 // RELR32-HEADERS: 0x00000024 RELR [[ADDR]] -// RELR32-HEADERS: 0x00000023 RELRSZ 0x8 -// RELR32-HEADERS: 0x00000025 RELRENT 0x4 +// RELR32-HEADERS: 0x00000023 RELRSZ 8 (bytes) +// RELR32-HEADERS: 0x00000025 RELRENT 4 (bytes) /// SHT_RELR section contains address/bitmap entries /// encoding the offsets for relative relocation. @@ -319,8 +319,8 @@ // RELR64-HEADERS-NEXT: EntrySize: 8 // RELR64-HEADERS: 0x0000000000000024 RELR [[ADDR]] -// RELR64-HEADERS: 0x0000000000000023 RELRSZ 0x10 -// RELR64-HEADERS: 0x0000000000000025 RELRENT 0x8 +// RELR64-HEADERS: 0x0000000000000023 RELRSZ 16 (bytes) +// RELR64-HEADERS: 0x0000000000000025 RELRENT 8 (bytes) /// SHT_RELR section contains address/bitmap entries /// encoding the offsets for relative relocation. diff --git a/lld/test/MachO/compact-unwind.s b/lld/test/MachO/compact-unwind.s index d1d4f23fb51e2..876268d485037 100644 --- a/lld/test/MachO/compact-unwind.s +++ b/lld/test/MachO/compact-unwind.s @@ -2,23 +2,23 @@ # RUN: rm -rf %t; split-file %s %t # RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin19.0.0 %t/my-personality.s -o %t/x86_64-my-personality.o # RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin19.0.0 %t/main.s -o %t/x86_64-main.o -# RUN: %lld -arch x86_64 -pie -lSystem -lc++ %t/x86_64-my-personality.o %t/x86_64-main.o -o %t/x86_64-personality-first +# RUN: %lld -arch x86_64 -lSystem -lc++ %t/x86_64-my-personality.o %t/x86_64-main.o -o %t/x86_64-personality-first # RUN: llvm-objdump --macho --unwind-info --syms --indirect-symbols --rebase %t/x86_64-personality-first | FileCheck %s --check-prefixes=FIRST,CHECK -D#%x,BASE=0x100000000 -# RUN: %lld -dead_strip -arch x86_64 -pie -lSystem -lc++ %t/x86_64-main.o %t/x86_64-my-personality.o -o %t/x86_64-personality-second +# RUN: %lld -dead_strip -arch x86_64 -lSystem -lc++ %t/x86_64-main.o %t/x86_64-my-personality.o -o %t/x86_64-personality-second # RUN: llvm-objdump --macho --unwind-info --syms --indirect-symbols --rebase %t/x86_64-personality-second | FileCheck %s --check-prefixes=SECOND,CHECK -D#%x,BASE=0x100000000 # RUN: llvm-mc -filetype=obj -triple=arm64-apple-darwin19.0.0 %t/my-personality.s -o %t/arm64-my-personality.o # RUN: llvm-mc -filetype=obj -triple=arm64-apple-darwin19.0.0 %t/main.s -o %t/arm64-main.o -# RUN: %lld -arch arm64 -pie -lSystem -lc++ %t/arm64-my-personality.o %t/arm64-main.o -o %t/arm64-personality-first +# RUN: %lld -arch arm64 -lSystem -lc++ %t/arm64-my-personality.o %t/arm64-main.o -o %t/arm64-personality-first # RUN: llvm-objdump --macho --unwind-info --syms --indirect-symbols --rebase %t/arm64-personality-first | FileCheck %s --check-prefixes=FIRST,CHECK -D#%x,BASE=0x100000000 -# RUN: %lld -dead_strip -arch arm64 -pie -lSystem -lc++ %t/arm64-main.o %t/arm64-my-personality.o -o %t/arm64-personality-second +# RUN: %lld -dead_strip -arch arm64 -lSystem -lc++ %t/arm64-main.o %t/arm64-my-personality.o -o %t/arm64-personality-second # RUN: llvm-objdump --macho --unwind-info --syms --indirect-symbols --rebase %t/arm64-personality-second | FileCheck %s --check-prefixes=SECOND,CHECK -D#%x,BASE=0x100000000 # RUN: llvm-mc -filetype=obj -triple=arm64_32-apple-watchos %t/my-personality.s -o %t/arm64-32-my-personality.o # RUN: llvm-mc -filetype=obj -triple=arm64_32-apple-watchos %t/main.s -o %t/arm64-32-main.o -# RUN: %lld-watchos -pie -lSystem -lc++ %t/arm64-32-my-personality.o %t/arm64-32-main.o -o %t/arm64-32-personality-first +# RUN: %lld-watchos -lSystem -lc++ %t/arm64-32-my-personality.o %t/arm64-32-main.o -o %t/arm64-32-personality-first # RUN: llvm-objdump --macho --unwind-info --syms --indirect-symbols --rebase %t/arm64-32-personality-first | FileCheck %s --check-prefixes=FIRST,CHECK -D#%x,BASE=0x4000 -# RUN: %lld-watchos -dead_strip -pie -lSystem -lc++ %t/arm64-32-main.o %t/arm64-32-my-personality.o -o %t/arm64-32-personality-second +# RUN: %lld-watchos -dead_strip -lSystem -lc++ %t/arm64-32-main.o %t/arm64-32-my-personality.o -o %t/arm64-32-personality-second # RUN: llvm-objdump --macho --unwind-info --syms --indirect-symbols --rebase %t/arm64-32-personality-second | FileCheck %s --check-prefixes=SECOND,CHECK -D#%x,BASE=0x4000 # FIRST: Indirect symbols for (__DATA_CONST,__got) diff --git a/lld/test/MachO/error-limit.test b/lld/test/MachO/error-limit.test index 79eaa3d522311..75f55084bbd59 100644 --- a/lld/test/MachO/error-limit.test +++ b/lld/test/MachO/error-limit.test @@ -1,3 +1,7 @@ +## We're intentionally testing fatal errors, which isn't supported for testing +## when main is run twice. +XFAIL: main-run-twice + ## Check that we only see 20 (the default error-limit) "cannot open" errors RUN: not %lld A B C D E F G H I J K L M N O P Q R S T U V W X Y Z 2>&1 | \ RUN: FileCheck -check-prefix=DEFAULT %s diff --git a/lld/test/MachO/invalid/bad-archive.s b/lld/test/MachO/invalid/bad-archive.s index 24f7a5d6f8354..0a405ed896653 100644 --- a/lld/test/MachO/invalid/bad-archive.s +++ b/lld/test/MachO/invalid/bad-archive.s @@ -1,3 +1,7 @@ +## We're intentionally testing fatal errors (for malformed input files), and +## fatal errors aren't supported for testing when main is run twice. +# XFAIL: main-run-twice + # REQUIRES: x86 # RUN: echo "!" > %t.a # RUN: echo "foo" >> %t.a diff --git a/lld/test/MachO/invalid/compact-unwind-bad-reloc.s b/lld/test/MachO/invalid/compact-unwind-bad-reloc.s index 3e189dada0cae..b6b6c36ccfac2 100644 --- a/lld/test/MachO/invalid/compact-unwind-bad-reloc.s +++ b/lld/test/MachO/invalid/compact-unwind-bad-reloc.s @@ -2,8 +2,8 @@ # RUN: rm -rf %t; split-file %s %t # RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin19.0.0 %t/bad-function.s -o %t/bad-function.o # RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin19.0.0 %t/bad-personality.s -o %t/bad-personality.o -# RUN: not %lld -pie -lSystem -lc++ %t/bad-function.o -o %t 2>&1 | FileCheck %s -DFILE=%t/bad-function.o -# RUN: not %lld -pie -lSystem -lc++ %t/bad-personality.o -o %t 2>&1 | FileCheck %s -DFILE=%t/bad-personality.o +# RUN: not %lld -lSystem -lc++ %t/bad-function.o -o %t 2>&1 | FileCheck %s -DFILE=%t/bad-function.o +# RUN: not %lld -lSystem -lc++ %t/bad-personality.o -o %t 2>&1 | FileCheck %s -DFILE=%t/bad-personality.o # CHECK: error: compact unwind references address in [[FILE]]:(__data) which is not in segment __TEXT #--- bad-function.s diff --git a/lld/test/MachO/invalid/compact-unwind-personalities.s b/lld/test/MachO/invalid/compact-unwind-personalities.s index 744a4edfcb982..94ed68b60159c 100644 --- a/lld/test/MachO/invalid/compact-unwind-personalities.s +++ b/lld/test/MachO/invalid/compact-unwind-personalities.s @@ -1,7 +1,7 @@ # REQUIRES: x86 # RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin19.0.0 %s -o %t.o -# RUN: not %lld -pie -lSystem -lc++ %t.o -o %t 2>&1 | FileCheck %s --check-prefix=TOO-MANY -# RUN: not %lld -pie -lSystem %t.o -o %t 2>&1 | FileCheck %s --check-prefix=UNDEF +# RUN: not %lld -lSystem -lc++ %t.o -o %t 2>&1 | FileCheck %s --check-prefix=TOO-MANY +# RUN: not %lld -lSystem %t.o -o %t 2>&1 | FileCheck %s --check-prefix=UNDEF # TOO-MANY: error: too many personalities (4) for compact unwind to encode # UNDEF: error: undefined symbol: ___gxx_personality_v0 diff --git a/lld/test/MachO/invalid/cstring-dedup.s b/lld/test/MachO/invalid/cstring-dedup.s index c2af78d344641..287f7d2156a31 100644 --- a/lld/test/MachO/invalid/cstring-dedup.s +++ b/lld/test/MachO/invalid/cstring-dedup.s @@ -1,3 +1,7 @@ +## We're intentionally testing fatal errors (for malformed input files), and +## fatal errors aren't supported for testing when main is run twice. +# XFAIL: main-run-twice + # REQUIRES: x86 # RUN: rm -rf %t; split-file %s %t # RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/not-terminated.s -o %t/not-terminated.o diff --git a/lld/test/MachO/invalid/incompatible-arch.s b/lld/test/MachO/invalid/incompatible-arch.s index 4984c178ec7bc..27fc1c83d04be 100644 --- a/lld/test/MachO/invalid/incompatible-arch.s +++ b/lld/test/MachO/invalid/incompatible-arch.s @@ -3,8 +3,12 @@ # RUN: rm -rf %t && mkdir -p %t # RUN: llvm-mc -filetype=obj -triple=arm64-apple-darwin %s -o %t/test.o -# RUN: not %lld -arch x86_64 -lSystem %t/test.o -o /dev/null 2>&1 | FileCheck %s -DFILE=%t/test.o -# CHECK: error: {{.*}}[[FILE]] has architecture arm64 which is incompatible with target architecture x86_64 +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %s -o %t/native.o +# RUN: not %no_fatal_warnings_lld -arch x86_64 -lSystem %t/test.o -o /dev/null -arch_errors_fatal 2>&1 | FileCheck %s -DFILE=%t/test.o --check-prefix=CHECK-ERROR +# RUN: %no_fatal_warnings_lld -arch x86_64 -lSystem %t/test.o %t/native.o -o /dev/null 2>&1 | FileCheck %s -DFILE=%t/test.o --check-prefix=CHECK-WARNING +# RUN: %lld -arch arm64 -lSystem %t/test.o -arch_errors_fatal -o /dev/null +# CHECK-ERROR: error: {{.*}}[[FILE]] has architecture arm64 which is incompatible with target architecture x86_64 +# CHECK-WARNING: warning: {{.*}}[[FILE]] has architecture arm64 which is incompatible with target architecture x86_64 # RUN: %lld -dylib -arch arm64 -platform_version macOS 10.14 10.15 -o %t/out.dylib %t/test.o @@ -30,7 +34,7 @@ # RUN: -o /dev/null 2>&1 | FileCheck %s --check-prefix=OBJ-VERSION # OBJ-VERSION: warning: {{.*}}test_x86.o has version 10.15.0, which is newer than target minimum of 10.14.0 -## Test that simulators platforms are compat with their simulatees. +## Test that simulators platforms are compat with their simulatees. # RUN: llvm-mc -filetype=obj -triple=x86_64-apple-ios14.0 %s -o %t/test_x86_ios.o # RUN: llvm-mc -filetype=obj -triple=x86_64-apple-ios14.0-simulator %s -o %t/test_x86_ios_sim.o @@ -43,7 +47,6 @@ # RUN: not %lld -dylib -platform_version watchos-simulator 14.0.0 14.0.0 %t/test_x86_ios_sim.o \ # RUN: -o /dev/null 2>&1 | FileCheck %s --check-prefix=CROSS-SIM2 # CROSS-SIM2: {{.*}}test_x86_ios_sim.o has platform iOS Simulator, which is different from target platform watchOS Simulator - .globl _main _main: diff --git a/lld/test/MachO/invalid/lto-bitcode-nodatalayout.ll b/lld/test/MachO/invalid/lto-bitcode-nodatalayout.ll index 5e1c43c928d91..f2ab80213a2c3 100644 --- a/lld/test/MachO/invalid/lto-bitcode-nodatalayout.ll +++ b/lld/test/MachO/invalid/lto-bitcode-nodatalayout.ll @@ -1,3 +1,7 @@ +;; We're intentionally testing fatal errors (for malformed input files), and +;; fatal errors aren't supported for testing when main is run twice. +; XFAIL: main-run-twice + ; REQUIRES: x86 ; RUN: llvm-as %s -o %t.o ; RUN: not %lld %t.o -o /dev/null 2>&1 | FileCheck %s diff --git a/lld/test/MachO/invalid/reserved-section-name.s b/lld/test/MachO/invalid/reserved-section-name.s index 7614a1001f16a..97dd4376c88f1 100644 --- a/lld/test/MachO/invalid/reserved-section-name.s +++ b/lld/test/MachO/invalid/reserved-section-name.s @@ -1,3 +1,7 @@ +## We're intentionally testing fatal errors (for malformed input files), and +## fatal errors aren't supported for testing when main is run twice. +# XFAIL: main-run-twice + # REQUIRES: x86 # RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %s -o %t.o # RUN: not %lld -o %t %t.o 2>&1 | FileCheck %s -DFILE=%t.o diff --git a/lld/test/MachO/linkedit-contiguity.s b/lld/test/MachO/linkedit-contiguity.s index a67e86d247f47..cbdc1ae330099 100644 --- a/lld/test/MachO/linkedit-contiguity.s +++ b/lld/test/MachO/linkedit-contiguity.s @@ -10,7 +10,7 @@ # RUN: %lld %t/foo.o -dylib -o %t/libfoo.dylib # RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/test.s -o %t/test.o -# RUN: %lld -lSystem -pie -adhoc_codesign -o %t/test %t/libfoo.dylib %t/test.o +# RUN: %lld -lSystem -adhoc_codesign -o %t/test %t/libfoo.dylib %t/test.o # RUN: llvm-objdump --macho --all-headers %t/test | FileCheck %s diff --git a/lld/test/MachO/lto-internalize.ll b/lld/test/MachO/lto-internalize.ll index 477669375e3f8..c9bac63b730b9 100644 --- a/lld/test/MachO/lto-internalize.ll +++ b/lld/test/MachO/lto-internalize.ll @@ -8,7 +8,7 @@ ; RUN: llvm-as %t/test.s -o %t/test.o ; RUN: llvm-as %t/baz.s -o %t/baz.o ; RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/regular.s -o %t/regular.o -; RUN: %lld -pie -lSystem %t/test.o %t/baz.o %t/regular.o -o %t/test -save-temps +; RUN: %lld -lSystem %t/test.o %t/baz.o %t/regular.o -o %t/test -save-temps ; RUN: llvm-dis < %t/test.0.2.internalize.bc | FileCheck %s ; RUN: llvm-objdump --macho --syms %t/test | FileCheck %s --check-prefix=SYMTAB diff --git a/lld/test/MachO/mh-execute-header.s b/lld/test/MachO/mh-execute-header.s index 4a62b27bb8fa8..0ed7501132bd7 100644 --- a/lld/test/MachO/mh-execute-header.s +++ b/lld/test/MachO/mh-execute-header.s @@ -1,7 +1,7 @@ # REQUIRES: x86 # RUN: rm -rf %t; mkdir %t # RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %s -o %t/test.o -# RUN: %lld -o %t/test.pie %t/test.o -pie +# RUN: %lld -o %t/test.pie %t/test.o # RUN: llvm-objdump --macho --syms %t/test.pie | FileCheck %s --check-prefix=PIE # RUN: %lld -o %t/test.no_pie %t/test.o -no_pie diff --git a/lld/test/MachO/mh-header-link.s b/lld/test/MachO/mh-header-link.s index 80b8f47928b5c..7683d7a434361 100644 --- a/lld/test/MachO/mh-header-link.s +++ b/lld/test/MachO/mh-header-link.s @@ -9,10 +9,10 @@ ## (but not in other types of files) # RUN: llvm-mc %t/dylib.s -triple=x86_64-apple-macos10.15 -filetype=obj -o %t/dylib.o -# RUN: %lld -pie -dylib -dead_strip %t/dylib.o -o %t/dylib.out +# RUN: %lld -dylib -dead_strip %t/dylib.o -o %t/dylib.out # RUN: llvm-objdump -m --syms %t/dylib.out | FileCheck %s --check-prefix DYLIB -# RUN: not %lld -pie -o /dev/null %t/dylib.o 2>&1 | FileCheck %s --check-prefix ERR-DYLIB +# RUN: not %lld -o /dev/null %t/dylib.o 2>&1 | FileCheck %s --check-prefix ERR-DYLIB # DYLIB: SYMBOL TABLE: # DYLIB-NEXT: {{[0-9a-f]+}} g F __TEXT,__text _main @@ -21,10 +21,10 @@ ## Test that in an executable, we can link against __mh_execute_header # RUN: llvm-mc %t/main.s -triple=x86_64-apple-macos10.15 -filetype=obj -o %t/exec.o -# RUN: %lld -pie -dead_strip -lSystem %t/exec.o -o %t/exec.out +# RUN: %lld -dead_strip -lSystem %t/exec.o -o %t/exec.out ## But it would be an error trying to reference __mh_execute_header in a dylib -# RUN: not %lld -pie -o /dev/null -dylib %t/exec.o 2>&1 | FileCheck %s --check-prefix ERR-EXEC +# RUN: not %lld -o /dev/null -dylib %t/exec.o 2>&1 | FileCheck %s --check-prefix ERR-EXEC # ERR-EXEC: error: undefined symbol: __mh_execute_header diff --git a/lld/test/MachO/search-paths.test b/lld/test/MachO/search-paths.test index 15d1b564cf929..88f4fabc37b48 100644 --- a/lld/test/MachO/search-paths.test +++ b/lld/test/MachO/search-paths.test @@ -1,6 +1,4 @@ -UNSUPPORTED: darwin - -RUN: mkdir -p %t1 %t2 +RUN: rm -rf %t1 %t2; mkdir -p %t1 %t2 RUN: %lld -v -dylib -o /dev/null -L%t1 -F%t2 2>&1 \ RUN: | FileCheck -DLDIR=%t1 -DFDIR=%t2 %s diff --git a/lld/test/MachO/silent-ignore.s b/lld/test/MachO/silent-ignore.s new file mode 100644 index 0000000000000..5700a77c9313e --- /dev/null +++ b/lld/test/MachO/silent-ignore.s @@ -0,0 +1,28 @@ +# REQUIRES: x86 + +## Check that we correctly parse these flags, even though they are +## unimplemented. ## We may still emit warnings or errors for some of the +## unimplemented ones (but those ## errors are squelched because of the +## `--version` flag.) +# RUN: %lld --version \ +# RUN: -dynamic \ +# RUN: -no_deduplicate \ +# RUN: -lto_library /lib/foo \ +# RUN: -macosx_version_min 0 \ +# RUN: -no_dtrace_dof \ +# RUN: -dependency_info /path/to/dependency_info.dat \ +# RUN: -lto_library ../lib/libLTO.dylib \ +# RUN: -mllvm -time-passes \ +# RUN: -objc_abi_version 2 \ +# RUN: -ios_simulator_version_min 9.0.0 \ +# RUN: -sdk_version 13.2 +# RUN: not %lld -v --not-an-ignored-argument 2>&1 | FileCheck %s +# CHECK: error: unknown argument '--not-an-ignored-argument' + +## Check that we don't emit any warnings nor errors for these unimplemented flags. +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %s -o %t.o +# RUN: %lld %t.o -o /dev/null -objc_abi_version 2 + +.globl _main +_main: + ret diff --git a/lld/test/MachO/silent-ignore.test b/lld/test/MachO/silent-ignore.test deleted file mode 100644 index 3e96d7183ff42..0000000000000 --- a/lld/test/MachO/silent-ignore.test +++ /dev/null @@ -1,13 +0,0 @@ -RUN: %lld --version \ -RUN: -dynamic \ -RUN: -no_deduplicate \ -RUN: -lto_library /lib/foo \ -RUN: -macosx_version_min 0 \ -RUN: -dependency_info /path/to/dependency_info.dat \ -RUN: -lto_library ../lib/libLTO.dylib \ -RUN: -mllvm -time-passes \ -RUN: -objc_abi_version 2 \ -RUN: -ios_simulator_version_min 9.0.0 \ -RUN: -sdk_version 13.2 -RUN: not %lld -v --not-an-ignored-argument 2>&1 | FileCheck %s -CHECK: error: unknown argument '--not-an-ignored-argument' diff --git a/lld/test/MachO/stabs.s b/lld/test/MachO/stabs.s index 3538c2011793b..07dd2e77e9e01 100644 --- a/lld/test/MachO/stabs.s +++ b/lld/test/MachO/stabs.s @@ -130,7 +130,7 @@ ## Check that we don't attempt to emit rebase opcodes for the debug sections ## when building a PIE (since we have filtered the sections out). -# RUN: %lld -lSystem -pie %t/test.o %t/foo.a %t/no-debug.o -o %t/test +# RUN: %lld -lSystem %t/test.o %t/foo.a %t/no-debug.o -o %t/test # RUN: llvm-objdump --macho --rebase %t/test | FileCheck %s --check-prefix=PIE # PIE: Rebase table: # PIE-NEXT: segment section address type diff --git a/lld/test/MachO/start-end.s b/lld/test/MachO/start-end.s index 86e1d900bf3d5..3ca0048939b07 100644 --- a/lld/test/MachO/start-end.s +++ b/lld/test/MachO/start-end.s @@ -67,6 +67,13 @@ ## Test that the link succeeds with dead-stripping enabled too. # RUN: %lld -dead_strip -lSystem %t/main.o -o %t/stripped.out +# RUN: llvm-objdump --macho --syms --section-headers %t/stripped.out > %t-stripped-dump.txt +# RUN: llvm-objdump --macho -d --no-symbolic-operands --no-show-raw-insn %t/stripped.out >> %t-stripped-dump.txt +# RUN: FileCheck --check-prefix=STRIP %s < %t-stripped-dump.txt + +## -u 'section$start$*' does not cause an undefined symbol error. This matches ld64. +# RUN: %lld -dead_strip -lSystem %t/main.o -u 'section$start$__FOO$__notexist' -o %t/stripped1.out +# RUN: llvm-objdump --section-headers %t/stripped1.out | FileCheck --check-prefix=STRIP2 %s ## (Fun fact: `-e 'section$start$__TEXT$__text -dead_strip` strips ## everything in the text section because markLive runs well before @@ -76,6 +83,42 @@ ## and the output program crashes when running. This matches ld64's ## behavior.) +# STRIP-LABEL: Sections: +# STRIP-NEXT: Idx Name Size VMA Type +# STRIP-NEXT: 0 __text {{[0-9a-f]*}} [[#%x, TEXTSTART:]] TEXT +# STRIP-NEXT: 1 __cstring 00000000 [[#%x, CSTRINGSTART:]] DATA +# STRIP-NEXT: 2 __data 00000000 +# STRIP-NEXT: 3 __llvm_orderfile 00000000 +# STRIP-NEXT: 4 __mybss 00000000 +# STRIP-NEXT: 5 __bar 00000000 +# STRIP-NEXT: 6 __ever 00000000 +# STRIP-NEXT: 7 __lookup 00000000 +# STRIP-NEXT: 8 symbol 00000000 +# STRIP-NEXT: 9 __quux 00000000 + +# STRIP-LABEL: SYMBOL TABLE: +# STRIP-NOT: section$start$__FOO$__bar + +# STRIP-LABEL: _main: +# STRIP: [[#%x, PC1:]]: +# STRIP-SAME: leaq [[#%d, TEXTSTART - PC1 - 7]](%rip), %rax +# STRIP-NEXT: [[#%x, PC2:]]: +# STRIP-SAME: leaq [[#%d, CSTRINGSTART - PC2 - 7]](%rip), %rbx + +# STRIP2-LABEL: Sections: +# STRIP2-NEXT: Idx Name Size VMA Type +# STRIP2-NEXT: 0 __text {{[0-9a-f]*}} [[#%x, TEXTSTART:]] TEXT +# STRIP2-NEXT: 1 __cstring 00000000 [[#%x, CSTRINGSTART:]] DATA +# STRIP2-NEXT: 2 __data 00000000 +# STRIP2-NEXT: 3 __llvm_orderfile 00000000 +# STRIP2-NEXT: 4 __mybss 00000000 +# STRIP2-NEXT: 5 __bar 00000000 +# STRIP2-NEXT: 6 __notexist 00000000 +# STRIP2-NEXT: 7 __ever 00000000 +# STRIP2-NEXT: 8 __lookup 00000000 +# STRIP2-NEXT: 9 symbol 00000000 +# STRIP2-NEXT: 10 __quux 00000000 + # CHECK-LABEL: Sections: # CHECK-NEXT: Idx Name Size VMA Type # CHECK: 0 __text {{[0-9a-f]*}} [[#%x, TEXTSTART:]] TEXT diff --git a/lld/test/MachO/syslibroot.test b/lld/test/MachO/syslibroot.test index 1a71ea538a0f5..55c08104358d0 100644 --- a/lld/test/MachO/syslibroot.test +++ b/lld/test/MachO/syslibroot.test @@ -1,68 +1,77 @@ # Ensure that a nonexistent path is ignored with a syslibroot +# Don't use %lld to not pick up the default syslibroot flag. -RUN: ld64.lld -arch x86_64 -platform_version macos 10 11 -v -dylib -o /dev/null \ -RUN: -syslibroot /var/empty | FileCheck %s -check-prefix CHECK-NONEXISTENT-SYSLIBROOT +RUN: ld64.lld -arch x86_64 -platform_version macos 10 11 -v -dylib \ +RUN: -o /dev/null -syslibroot /var/empty 2>&1 \ +RUN: | FileCheck %s -check-prefix CHECK-NONEXISTENT-SYSLIBROOT CHECK-NONEXISTENT-SYSLIBROOT: Library search paths: CHECK-NONEXISTENT-SYSLIBROOT-NEXT: Framework search paths: RUN: mkdir -p %t/usr/lib -RUN: ld64.lld -arch x86_64 -platform_version macos 10 11 -v -dylib -o /dev/null \ -RUN: -syslibroot %t 2>&1 | FileCheck %s -check-prefix CHECK-SYSLIBROOT -DROOT=%t +RUN: ld64.lld -arch x86_64 -platform_version macos 10 11 -v -dylib \ +RUN: -o /dev/null -syslibroot %t 2>&1 \ +RUN: | FileCheck %s -check-prefix CHECK-SYSLIBROOT -DROOT=%t CHECK-SYSLIBROOT-NOT: directory not found{{.*}}usr/local/lib CHECK-SYSLIBROOT: Library search paths: CHECK-SYSLIBROOT-NEXT: [[ROOT]]/usr/lib RUN: mkdir -p %t/Library/libxml2-development -RUN: ld64.lld -arch x86_64 -platform_version macos 10 11 -v -dylib -o /dev/null \ -RUN: -syslibroot %t -L /Library/libxml2-development | FileCheck %s -check-prefix CHECK-ABSOLUTE-PATH-REROOTED -DROOT=%t +RUN: ld64.lld -arch x86_64 -platform_version macos 10 11 -v -dylib \ +RUN: -o /dev/null -syslibroot %t -L /Library/libxml2-development 2>&1 \ +RUN: | FileCheck %s -check-prefix CHECK-ABSOLUTE-PATH-REROOTED -DROOT=%t CHECK-ABSOLUTE-PATH-REROOTED: Library search paths: CHECK-ABSOLUTE-PATH-REROOTED: [[ROOT]]/Library/libxml2-development CHECK-ABSOLUTE-PATH-REROOTED: [[ROOT]]/usr/lib -RUN: ld64.lld -arch x86_64 -platform_version macos 10 11 -v -dylib -o /dev/null \ -RUN: -syslibroot %t -L %t/Library/libxml2-development | FileCheck %s -check-prefix CHECK-PATH-WITHOUT-REROOT -DPATH=%t/Library/libxml2-development +RUN: ld64.lld -arch x86_64 -platform_version macos 10 11 -v -dylib \ +RUN: -o /dev/null -syslibroot %t -L %t/Library/libxml2-development 2>&1 \ +RUN: | FileCheck %s -check-prefix CHECK-PATH-WITHOUT-REROOT -DPATH=%t/Library/libxml2-development CHECK-PATH-WITHOUT-REROOT: Library search paths: CHECK-PATH-WITHOUT-REROOT-NEXT: [[PATH]] RUN: mkdir -p %t.2/usr/lib -RUN: ld64.lld -arch x86_64 -platform_version macos 10 11 -v -dylib -o /dev/null \ -RUN: -syslibroot %t -syslibroot %t.2 | FileCheck %s -check-prefix CHECK-SYSLIBROOT-MATRIX -DROOT=%t +RUN: ld64.lld -arch x86_64 -platform_version macos 10 11 -v -dylib \ +RUN: -o /dev/null -syslibroot %t -syslibroot %t.2 2>&1 \ +RUN: | FileCheck %s -check-prefix CHECK-SYSLIBROOT-MATRIX -DROOT=%t CHECK-SYSLIBROOT-MATRIX: Library search paths: CHECK-SYSLIBROOT-MATRIX: [[ROOT]]/usr/lib CHECK-SYSLIBROOT-MATRIX: [[ROOT]].2/usr/lib -RUN: ld64.lld -arch x86_64 -platform_version macos 10 11 -v -dylib -o /dev/null \ -RUN: -syslibroot %t -syslibroot %t.2 -syslibroot / | \ -RUN: FileCheck %s -check-prefix CHECK-SYSLIBROOT-IGNORED -DROOT=%t +RUN: ld64.lld -arch x86_64 -platform_version macos 10 11 -v -dylib \ +RUN: -o /dev/null -syslibroot %t -syslibroot %t.2 -syslibroot / 2>&1 \ +RUN: | FileCheck %s -check-prefix CHECK-SYSLIBROOT-IGNORED -DROOT=%t CHECK-SYSLIBROOT-IGNORED: Library search paths: CHECK-SYSLIBROOT-IGNORED-NOT: [[ROOT]]/usr/lib CHECK-SYSLIBROOT-IGNORED-NOT: [[ROOT]].2/usr/lib RUN: mkdir -p %t/System/Library/Frameworks -RUN: ld64.lld -arch x86_64 -platform_version macos 10 11 -v -dylib -o /dev/null \ -RUN: -syslibroot %t | FileCheck %s -check-prefix CHECK-SYSLIBROOT-FRAMEWORK -DROOT=%t +RUN: ld64.lld -arch x86_64 -platform_version macos 10 11 -v -dylib \ +RUN: -o /dev/null -syslibroot %t 2>&1 \ +RUN: | FileCheck %s -check-prefix CHECK-SYSLIBROOT-FRAMEWORK -DROOT=%t CHECK-SYSLIBROOT-FRAMEWORK: Framework search paths: CHECK-SYSLIBROOT-FRAMEWORK: [[ROOT]]/System/Library/Frameworks RUN: mkdir -p %t/Library/Frameworks RUN: mkdir -p %t.2/Library/Frameworks -RUN: ld64.lld -arch x86_64 -platform_version macos 10 11 -v -dylib -o /dev/null \ -RUN: -syslibroot %t -syslibroot %t.2 -F /Library/Frameworks | \ -RUN: FileCheck %s -check-prefix CHECK-SYSLIBROOT-FRAMEWORK-MATRIX -DROOT=%t +RUN: ld64.lld -arch x86_64 -platform_version macos 10 11 -v -dylib \ +RUN: -o /dev/null -syslibroot %t -syslibroot %t.2 \ +RUN: -F /Library/Frameworks 2>&1 \ +RUN: | FileCheck %s -check-prefix CHECK-SYSLIBROOT-FRAMEWORK-MATRIX -DROOT=%t CHECK-SYSLIBROOT-FRAMEWORK-MATRIX: Framework search paths: CHECK-SYSLIBROOT-FRAMEWORK-MATRIX: [[ROOT]]/Library/Frameworks CHECK-SYSLIBROOT-FRAMEWORK-MATRIX: [[ROOT]].2/Library/Frameworks -RUN: ld64.lld -arch x86_64 -platform_version macos 10 11 -v -dylib -o /dev/null \ -RUN: -syslibroot %t -syslibroot %t.2 -syslibroot / -F /Library/Frameworks | \ -RUN: FileCheck %s -check-prefix CHECK-SYSLIBROOT-FRAMEWORK-IGNORED -DROOT=%t +RUN: ld64.lld -arch x86_64 -platform_version macos 10 11 -v -dylib \ +RUN: -o /dev/null -syslibroot %t -syslibroot %t.2 -syslibroot / \ +RUN: -F /Library/Frameworks 2>&1 \ +RUN: | FileCheck %s -check-prefix CHECK-SYSLIBROOT-FRAMEWORK-IGNORED -DROOT=%t CHECK-SYSLIBROOT-FRAMEWORK-IGNORED: Framework search paths: CHECK-SYSLIBROOT-FRAMEWORK-IGNORED-NOT: [[ROOT]]/Library/Frameworks diff --git a/lld/test/MachO/time-trace.s b/lld/test/MachO/time-trace.s index c4e5cc3d92cce..e75bcd2722993 100644 --- a/lld/test/MachO/time-trace.s +++ b/lld/test/MachO/time-trace.s @@ -1,3 +1,7 @@ +## When running main twice, we'll also output the time trace JSON twice, which +## breaks JSON parsing. +# XFAIL: main-run-twice + # REQUIRES: x86 # RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %s -o %t.o diff --git a/lld/test/MachO/tlv.s b/lld/test/MachO/tlv.s index 9c74ea082ba7a..f188bf7279a89 100644 --- a/lld/test/MachO/tlv.s +++ b/lld/test/MachO/tlv.s @@ -3,13 +3,13 @@ # RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/regular.s -o %t/regular.o # RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/tbss.s -o %t/tbss.o -# RUN: %lld -lSystem -no_pie -o %t/regular %t/regular.o -# RUN: llvm-otool -hv %t/regular | FileCheck %s --check-prefix=HEADER -# RUN: llvm-objdump -d --bind --rebase %t/regular | FileCheck %s --check-prefixes=REG,LINKEDIT -# RUN: llvm-objdump --macho --section=__DATA,__thread_vars %t/regular | \ +# RUN: %lld -lSystem -no_pie -o %t/regular-no-pie %t/regular.o +# RUN: llvm-otool -hv %t/regular-no-pie | FileCheck %s --check-prefix=HEADER +# RUN: llvm-objdump -d --bind --rebase %t/regular-no-pie | FileCheck %s --check-prefixes=REG,LINKEDIT +# RUN: llvm-objdump --macho --section=__DATA,__thread_vars %t/regular-no-pie | \ # RUN: FileCheck %s --check-prefix=REG-TLVP -# RUN: %lld -lSystem -pie %t/regular.o -o %t/regular-pie +# RUN: %lld -lSystem %t/regular.o -o %t/regular-pie # RUN: llvm-otool -hv %t/regular-pie | FileCheck %s --check-prefix=HEADER # RUN: llvm-objdump -d --bind --rebase %t/regular-pie | FileCheck %s --check-prefixes=REG,LINKEDIT # RUN: llvm-objdump --macho --section=__DATA,__thread_vars %t/regular-pie | \ diff --git a/lld/test/lit.cfg.py b/lld/test/lit.cfg.py index 225104243bf26..d03a383cc8758 100644 --- a/lld/test/lit.cfg.py +++ b/lld/test/lit.cfg.py @@ -81,7 +81,22 @@ # Set a fake constant version so that we get consistent output. config.environment['LLD_VERSION'] = 'LLD 1.0' -config.environment['LLD_IN_TEST'] = '1' + +# LLD_IN_TEST determines how many times `main` is run inside each process, which +# lets us test that it's cleaning up after itself and resetting global state +# correctly (which is important for usage as a library). +run_lld_main_twice = lit_config.params.get('RUN_LLD_MAIN_TWICE', False) +if not run_lld_main_twice: + config.environment['LLD_IN_TEST'] = '1' +else: + config.environment['LLD_IN_TEST'] = '2' + # Many ELF tests fail in this mode. + config.excludes.append('ELF') + # Some old Mach-O backend tests fail, and it's due for removal anyway. + config.excludes.append('mach-o') + # Some new Mach-O backend tests fail; give them a way to mark themselves + # unsupported in this mode. + config.available_features.add('main-run-twice') # Indirectly check if the mt.exe Microsoft utility exists by searching for # cvtres, which always accompanies it. Alternatively, check if we can use diff --git a/lld/test/wasm/debuginfo.test b/lld/test/wasm/debuginfo.test index 79be57e87b0c9..9cb1cc31e515a 100644 --- a/lld/test/wasm/debuginfo.test +++ b/lld/test/wasm/debuginfo.test @@ -46,7 +46,7 @@ CHECK-NEXT: DW_AT_name ("hi_foo.c") CHECK: DW_TAG_variable CHECK-NEXT: DW_AT_name ("y") -CHECK-NEXT: DW_AT_type (0x000000ac "int [2]") +CHECK-NEXT: DW_AT_type (0x000000ac "int[2]") CHECK-NEXT: DW_AT_external (true) CHECK-NEXT: DW_AT_decl_file ("{{.*}}hi_foo.c") CHECK-NEXT: DW_AT_decl_line (1) @@ -68,7 +68,7 @@ CHECK-NEXT: DW_AT_encoding (DW_ATE_unsigned) CHECK: DW_TAG_variable CHECK-NEXT: DW_AT_name ("z") -CHECK-NEXT: DW_AT_type (0x000000ac "int [2]") +CHECK-NEXT: DW_AT_type (0x000000ac "int[2]") CHECK-NEXT: DW_AT_external (true) CHECK-NEXT: DW_AT_decl_file ("{{.*}}hi_foo.c") CHECK-NEXT: DW_AT_decl_line (8) diff --git a/lld/wasm/Driver.cpp b/lld/wasm/Driver.cpp index 3a2d2fde34e25..59abfaadf3989 100644 --- a/lld/wasm/Driver.cpp +++ b/lld/wasm/Driver.cpp @@ -632,7 +632,7 @@ static void createSyntheticSymbols() { // __table_base) from the environment and use these as the offset at // which to load our static data and function table. // See: - // https://github.com/WebAssembly/tool-conventions/blob/master/DynamicLinking.md + // https://github.com/WebAssembly/tool-conventions/blob/main/DynamicLinking.md auto *globalType = is64 ? &globalTypeI64 : &globalTypeI32; WasmSym::memoryBase = createUndefinedGlobal("__memory_base", globalType); WasmSym::tableBase = createUndefinedGlobal("__table_base", globalType); diff --git a/lld/wasm/LTO.cpp b/lld/wasm/LTO.cpp index 4659278be7535..68d29eee6d5b3 100644 --- a/lld/wasm/LTO.cpp +++ b/lld/wasm/LTO.cpp @@ -127,7 +127,7 @@ std::vector BitcodeCompiler::compile() { // The --thinlto-cache-dir option specifies the path to a directory in which // to cache native object files for ThinLTO incremental builds. If a path was // specified, configure LTO to use it as the cache directory. - NativeObjectCache cache; + FileCache cache; if (!config->thinLTOCacheDir.empty()) cache = check(localCache("ThinLTO", "Thin", config->thinLTOCacheDir, @@ -137,7 +137,7 @@ std::vector BitcodeCompiler::compile() { checkError(ltoObj->run( [&](size_t task) { - return std::make_unique( + return std::make_unique( std::make_unique(buf[task])); }, cache)); diff --git a/lld/wasm/SyntheticSections.h b/lld/wasm/SyntheticSections.h index 956c896148db0..9a425ea4f6ec3 100644 --- a/lld/wasm/SyntheticSections.h +++ b/lld/wasm/SyntheticSections.h @@ -71,7 +71,7 @@ class SyntheticSection : public OutputSection { // Create the custom "dylink" section containing information for the dynamic // linker. // See -// https://github.com/WebAssembly/tool-conventions/blob/master/DynamicLinking.md +// https://github.com/WebAssembly/tool-conventions/blob/main/DynamicLinking.md class DylinkSection : public SyntheticSection { public: DylinkSection() : SyntheticSection(llvm::wasm::WASM_SEC_CUSTOM, "dylink.0") {} diff --git a/lldb/docs/doxygen.cfg.in b/lldb/docs/doxygen.cfg.in index 7750d89fd267e..5712779e6b2c5 100644 --- a/lldb/docs/doxygen.cfg.in +++ b/lldb/docs/doxygen.cfg.in @@ -916,7 +916,7 @@ HHC_LOCATION = # If the GENERATE_HTMLHELP tag is set to YES, the GENERATE_CHI flag # controls if a separate .chi index file is generated (YES) or that -# it should be included in the master .chm file (NO). +# it should be included in the main .chm file (NO). GENERATE_CHI = NO diff --git a/lldb/docs/man/lldb.rst b/lldb/docs/man/lldb.rst index b75288db380de..35db1dc68c129 100644 --- a/lldb/docs/man/lldb.rst +++ b/lldb/docs/man/lldb.rst @@ -111,7 +111,7 @@ COMMANDS .. option:: --source-quietly - Tells the debugger to execute this one-line lldb command before any file has been loaded. + Tells the debugger not to echo commands while sourcing files or one-line commands provided on the command line. .. option:: --source diff --git a/lldb/include/lldb/Core/PluginManager.h b/lldb/include/lldb/Core/PluginManager.h index cd720d5a9d0fc..7dc99bf3e7558 100644 --- a/lldb/include/lldb/Core/PluginManager.h +++ b/lldb/include/lldb/Core/PluginManager.h @@ -426,7 +426,7 @@ class PluginManager { GetInstrumentationRuntimeCreateCallbackAtIndex(uint32_t idx); // TypeSystem - static bool RegisterPlugin(ConstString name, const char *description, + static bool RegisterPlugin(llvm::StringRef name, llvm::StringRef description, TypeSystemCreateInstance create_callback, LanguageSet supported_languages_for_types, LanguageSet supported_languages_for_expressions); @@ -441,7 +441,7 @@ class PluginManager { static LanguageSet GetAllTypeSystemSupportedLanguagesForExpressions(); // REPL - static bool RegisterPlugin(ConstString name, const char *description, + static bool RegisterPlugin(llvm::StringRef name, llvm::StringRef description, REPLCreateInstance create_callback, LanguageSet supported_languages); diff --git a/lldb/include/lldb/Target/PathMappingList.h b/lldb/include/lldb/Target/PathMappingList.h index d788d120c47e9..f1cc779ea50fe 100644 --- a/lldb/include/lldb/Target/PathMappingList.h +++ b/lldb/include/lldb/Target/PathMappingList.h @@ -32,8 +32,7 @@ class PathMappingList { const PathMappingList &operator=(const PathMappingList &rhs); - void Append(ConstString path, ConstString replacement, - bool notify); + void Append(llvm::StringRef path, llvm::StringRef replacement, bool notify); void Append(const PathMappingList &rhs, bool notify); @@ -49,17 +48,16 @@ class PathMappingList { bool GetPathsAtIndex(uint32_t idx, ConstString &path, ConstString &new_path) const; - void Insert(ConstString path, ConstString replacement, + void Insert(llvm::StringRef path, llvm::StringRef replacement, uint32_t insert_idx, bool notify); bool Remove(size_t index, bool notify); bool Remove(ConstString path, bool notify); - bool Replace(ConstString path, ConstString replacement, - bool notify); + bool Replace(llvm::StringRef path, llvm::StringRef replacement, bool notify); - bool Replace(ConstString path, ConstString replacement, + bool Replace(llvm::StringRef path, llvm::StringRef replacement, uint32_t index, bool notify); bool RemapPath(ConstString path, ConstString &new_path) const; @@ -104,7 +102,7 @@ class PathMappingList { /// The newly remapped filespec that is guaranteed to exist. llvm::Optional FindFile(const FileSpec &orig_spec) const; - uint32_t FindIndexForPath(ConstString path) const; + uint32_t FindIndexForPath(llvm::StringRef path) const; uint32_t GetModificationID() const { return m_mod_id; } diff --git a/lldb/include/lldb/Target/Process.h b/lldb/include/lldb/Target/Process.h index e9743ed17dd46..d36c60e24887d 100644 --- a/lldb/include/lldb/Target/Process.h +++ b/lldb/include/lldb/Target/Process.h @@ -1792,7 +1792,7 @@ class Process : public std::enable_shared_from_this, /// /// If load_addr is within the address space the process has mapped /// range_info will be filled in with the start and end of that range as - /// well as the permissions for that range and range_info.GetMapped will + /// well as the permissions for that range and range_info. GetMapped will /// return true. /// /// If load_addr is outside any mapped region then range_info will have its @@ -1801,23 +1801,21 @@ class Process : public std::enable_shared_from_this, /// there are no valid mapped ranges between load_addr and the end of the /// process address space. /// - /// GetMemoryRegionInfo will only return an error if it is unimplemented for - /// the current process. + /// GetMemoryRegionInfo calls DoGetMemoryRegionInfo. Override that function in + /// process subclasses. /// /// \param[in] load_addr - /// The load address to query the range_info for. + /// The load address to query the range_info for. May include non + /// address bits, these will be removed by the the ABI plugin if there is + /// one. /// /// \param[out] range_info /// An range_info value containing the details of the range. /// /// \return /// An error value. - virtual Status GetMemoryRegionInfo(lldb::addr_t load_addr, - MemoryRegionInfo &range_info) { - Status error; - error.SetErrorString("Process::GetMemoryRegionInfo() not supported"); - return error; - } + Status GetMemoryRegionInfo(lldb::addr_t load_addr, + MemoryRegionInfo &range_info); /// Obtain all the mapped memory regions within this process. /// @@ -2637,6 +2635,26 @@ void PruneThreadPlans(); virtual size_t DoReadMemory(lldb::addr_t vm_addr, void *buf, size_t size, Status &error) = 0; + /// DoGetMemoryRegionInfo is called by GetMemoryRegionInfo after it has + /// removed non address bits from load_addr. Override this method in + /// subclasses of Process. + /// + /// See GetMemoryRegionInfo for details of the logic. + /// + /// \param[in] load_addr + /// The load address to query the range_info for. (non address bits + /// removed) + /// + /// \param[out] range_info + /// An range_info value containing the details of the range. + /// + /// \return + /// An error value. + virtual Status DoGetMemoryRegionInfo(lldb::addr_t load_addr, + MemoryRegionInfo &range_info) { + return Status("Process::DoGetMemoryRegionInfo() not supported"); + } + lldb::StateType GetPrivateState(); /// The "private" side of resuming a process. This doesn't alter the state diff --git a/lldb/include/lldb/Utility/FileSpec.h b/lldb/include/lldb/Utility/FileSpec.h index 0f4e6505e4337..305cd04f95c01 100644 --- a/lldb/include/lldb/Utility/FileSpec.h +++ b/lldb/include/lldb/Utility/FileSpec.h @@ -202,7 +202,7 @@ class FileSpec { /// \return /// \b true if the file path is case sensitive (POSIX), false /// if case insensitive (Windows). - bool IsCaseSensitive() const { return m_style != Style::windows; } + bool IsCaseSensitive() const { return is_style_posix(m_style); } /// Dump this object to a Stream. /// diff --git a/lldb/packages/Python/lldbsuite/test/tools/lldb-vscode/lldbvscode_testcase.py b/lldb/packages/Python/lldbsuite/test/tools/lldb-vscode/lldbvscode_testcase.py index 0a55fc0ead1e4..255a4805a9737 100644 --- a/lldb/packages/Python/lldbsuite/test/tools/lldb-vscode/lldbvscode_testcase.py +++ b/lldb/packages/Python/lldbsuite/test/tools/lldb-vscode/lldbvscode_testcase.py @@ -251,7 +251,7 @@ def attach(self, program=None, pid=None, waitFor=None, trace=None, initCommands=None, preRunCommands=None, stopCommands=None, exitCommands=None, attachCommands=None, coreFile=None, disconnectAutomatically=True, terminateCommands=None, - postRunCommands=None): + postRunCommands=None, sourceMap=None): '''Build the default Makefile target, create the VSCode debug adaptor, and attach to the process. ''' @@ -271,7 +271,8 @@ def cleanup(): initCommands=initCommands, preRunCommands=preRunCommands, stopCommands=stopCommands, exitCommands=exitCommands, attachCommands=attachCommands, terminateCommands=terminateCommands, - coreFile=coreFile, postRunCommands=postRunCommands) + coreFile=coreFile, postRunCommands=postRunCommands, + sourceMap=sourceMap) if not (response and response['success']): self.assertTrue(response['success'], 'attach failed (%s)' % (response['message'])) diff --git a/lldb/packages/Python/lldbsuite/test/tools/lldb-vscode/vscode.py b/lldb/packages/Python/lldbsuite/test/tools/lldb-vscode/vscode.py index df057d5e63aa6..603b1545cd714 100644 --- a/lldb/packages/Python/lldbsuite/test/tools/lldb-vscode/vscode.py +++ b/lldb/packages/Python/lldbsuite/test/tools/lldb-vscode/vscode.py @@ -506,7 +506,8 @@ def request_attach(self, program=None, pid=None, waitFor=None, trace=None, initCommands=None, preRunCommands=None, stopCommands=None, exitCommands=None, attachCommands=None, terminateCommands=None, - coreFile=None, postRunCommands=None): + coreFile=None, postRunCommands=None, + sourceMap=None): args_dict = {} if pid is not None: args_dict['pid'] = pid @@ -533,6 +534,8 @@ def request_attach(self, program=None, pid=None, waitFor=None, trace=None, args_dict['coreFile'] = coreFile if postRunCommands: args_dict['postRunCommands'] = postRunCommands + if sourceMap: + args_dict['sourceMap'] = sourceMap command_dict = { 'command': 'attach', 'type': 'request', diff --git a/lldb/source/API/SBTarget.cpp b/lldb/source/API/SBTarget.cpp index 9db5b6d03c3fc..98158f457a04f 100644 --- a/lldb/source/API/SBTarget.cpp +++ b/lldb/source/API/SBTarget.cpp @@ -214,8 +214,8 @@ SBStructuredData SBTarget::GetStatistics() { if (!target_sp) return LLDB_RECORD_RESULT(data); std::string json_str = - llvm::formatv("{0:2}", - DebuggerStats::ReportStatistics(target_sp->GetDebugger(), + llvm::formatv("{0:2}", + DebuggerStats::ReportStatistics(target_sp->GetDebugger(), target_sp.get())).str(); data.m_impl_up->SetObjectSP(StructuredData::ParseJSON(json_str)); return LLDB_RECORD_RESULT(data); @@ -1586,13 +1586,13 @@ void SBTarget::AppendImageSearchPath(const char *from, const char *to, if (!target_sp) return error.SetErrorString("invalid target"); - const ConstString csFrom(from), csTo(to); - if (!csFrom) + llvm::StringRef srFrom = from, srTo = to; + if (srFrom.empty()) return error.SetErrorString(" path can't be empty"); - if (!csTo) + if (srTo.empty()) return error.SetErrorString(" path can't be empty"); - target_sp->GetImageSearchPathList().Append(csFrom, csTo, true); + target_sp->GetImageSearchPathList().Append(srFrom, srTo, true); } lldb::SBModule SBTarget::AddModule(const char *path, const char *triple, diff --git a/lldb/source/Commands/CommandObjectTarget.cpp b/lldb/source/Commands/CommandObjectTarget.cpp index e0a88a710fb97..2a42eb22938d7 100644 --- a/lldb/source/Commands/CommandObjectTarget.cpp +++ b/lldb/source/Commands/CommandObjectTarget.cpp @@ -1047,8 +1047,7 @@ class CommandObjectTargetModulesSearchPathsAdd : public CommandObjectParsed { } bool last_pair = ((argc - i) == 2); target->GetImageSearchPathList().Append( - ConstString(from), ConstString(to), - last_pair); // Notify if this is the last pair + from, to, last_pair); // Notify if this is the last pair result.SetStatus(eReturnStatusSuccessFinishNoResult); } else { if (from[0]) @@ -1175,8 +1174,8 @@ class CommandObjectTargetModulesSearchPathsInsert : public CommandObjectParsed { if (from[0] && to[0]) { bool last_pair = ((argc - i) == 2); - target->GetImageSearchPathList().Insert( - ConstString(from), ConstString(to), insert_idx, last_pair); + target->GetImageSearchPathList().Insert(from, to, insert_idx, + last_pair); result.SetStatus(eReturnStatusSuccessFinishNoResult); } else { if (from[0]) diff --git a/lldb/source/Core/Module.cpp b/lldb/source/Core/Module.cpp index c7538db7dd240..283e18707dbba 100644 --- a/lldb/source/Core/Module.cpp +++ b/lldb/source/Core/Module.cpp @@ -1614,15 +1614,14 @@ llvm::Optional Module::RemapSourceFile(llvm::StringRef path) const void Module::RegisterXcodeSDK(llvm::StringRef sdk_name, llvm::StringRef sysroot) { XcodeSDK sdk(sdk_name.str()); - ConstString sdk_path(HostInfo::GetXcodeSDKPath(sdk)); - if (!sdk_path) + llvm::StringRef sdk_path(HostInfo::GetXcodeSDKPath(sdk)); + if (sdk_path.empty()) return; // If the SDK changed for a previously registered source path, update it. // This could happend with -fdebug-prefix-map, otherwise it's unlikely. - ConstString sysroot_cs(sysroot); - if (!m_source_mappings.Replace(sysroot_cs, sdk_path, true)) + if (!m_source_mappings.Replace(sysroot, sdk_path, true)) // In the general case, however, append it to the list. - m_source_mappings.Append(sysroot_cs, sdk_path, false); + m_source_mappings.Append(sysroot, sdk_path, false); } bool Module::MergeArchitecture(const ArchSpec &arch_spec) { diff --git a/lldb/source/Core/ModuleList.cpp b/lldb/source/Core/ModuleList.cpp index 629c075cbc00a..9176c9dbb357b 100644 --- a/lldb/source/Core/ModuleList.cpp +++ b/lldb/source/Core/ModuleList.cpp @@ -122,8 +122,7 @@ void ModuleListProperties::UpdateSymlinkMappings() { FileSpec resolved; Status status = FileSystem::Instance().Readlink(symlink, resolved); if (status.Success()) - m_symlink_paths.Append(ConstString(symlink.GetPath()), - ConstString(resolved.GetPath()), notify); + m_symlink_paths.Append(symlink.GetPath(), resolved.GetPath(), notify); } } diff --git a/lldb/source/Core/PluginManager.cpp b/lldb/source/Core/PluginManager.cpp index 5ba427a24ed45..801591129244f 100644 --- a/lldb/source/Core/PluginManager.cpp +++ b/lldb/source/Core/PluginManager.cpp @@ -185,15 +185,14 @@ template struct PluginInstance { typedef Callback CallbackType; PluginInstance() = default; - PluginInstance(ConstString name, std::string description, - Callback create_callback = nullptr, + PluginInstance(llvm::StringRef name, llvm::StringRef description, + Callback create_callback, DebuggerInitializeCallback debugger_init_callback = nullptr) - : name(name), description(std::move(description)), - create_callback(create_callback), + : name(name), description(description), create_callback(create_callback), debugger_init_callback(debugger_init_callback) {} - ConstString name; - std::string description; + llvm::StringRef name; + llvm::StringRef description; Callback create_callback; DebuggerInitializeCallback debugger_init_callback; }; @@ -201,12 +200,12 @@ template struct PluginInstance { template class PluginInstances { public: template - bool RegisterPlugin(ConstString name, const char *description, + bool RegisterPlugin(llvm::StringRef name, llvm::StringRef description, typename Instance::CallbackType callback, - Args &&... args) { + Args &&...args) { if (!callback) return false; - assert((bool)name); + assert(!name.empty()); Instance instance = Instance(name, description, callback, std::forward(args)...); m_instances.push_back(instance); @@ -233,20 +232,20 @@ template class PluginInstances { return nullptr; } - const char *GetDescriptionAtIndex(uint32_t idx) { + llvm::StringRef GetDescriptionAtIndex(uint32_t idx) { if (Instance *instance = GetInstanceAtIndex(idx)) - return instance->description.c_str(); - return nullptr; + return instance->description; + return ""; } - const char *GetNameAtIndex(uint32_t idx) { + llvm::StringRef GetNameAtIndex(uint32_t idx) { if (Instance *instance = GetInstanceAtIndex(idx)) - return instance->name.GetCString(); - return nullptr; + return instance->name; + return ""; } - typename Instance::CallbackType GetCallbackForName(ConstString name) { - if (!name) + typename Instance::CallbackType GetCallbackForName(llvm::StringRef name) { + if (name.empty()) return nullptr; for (auto &instance : m_instances) { if (name == instance.name) @@ -288,8 +287,7 @@ static ABIInstances &GetABIInstances() { bool PluginManager::RegisterPlugin(llvm::StringRef name, llvm::StringRef description, ABICreateInstance create_callback) { - return GetABIInstances().RegisterPlugin( - ConstString(name), description.str().c_str(), create_callback); + return GetABIInstances().RegisterPlugin(name, description, create_callback); } bool PluginManager::UnregisterPlugin(ABICreateInstance create_callback) { @@ -313,8 +311,7 @@ static ArchitectureInstances &GetArchitectureInstances() { void PluginManager::RegisterPlugin(llvm::StringRef name, llvm::StringRef description, ArchitectureCreateInstance create_callback) { - GetArchitectureInstances().push_back( - {ConstString(name), std::string(description), create_callback}); + GetArchitectureInstances().push_back({name, description, create_callback}); } void PluginManager::UnregisterPlugin( @@ -352,8 +349,8 @@ static DisassemblerInstances &GetDisassemblerInstances() { bool PluginManager::RegisterPlugin(llvm::StringRef name, llvm::StringRef description, DisassemblerCreateInstance create_callback) { - return GetDisassemblerInstances().RegisterPlugin( - ConstString(name), description.str().c_str(), create_callback); + return GetDisassemblerInstances().RegisterPlugin(name, description, + create_callback); } bool PluginManager::UnregisterPlugin( @@ -369,7 +366,7 @@ PluginManager::GetDisassemblerCreateCallbackAtIndex(uint32_t idx) { DisassemblerCreateInstance PluginManager::GetDisassemblerCreateCallbackForPluginName( llvm::StringRef name) { - return GetDisassemblerInstances().GetCallbackForName(ConstString(name)); + return GetDisassemblerInstances().GetCallbackForName(name); } #pragma mark DynamicLoader @@ -387,8 +384,7 @@ bool PluginManager::RegisterPlugin( DynamicLoaderCreateInstance create_callback, DebuggerInitializeCallback debugger_init_callback) { return GetDynamicLoaderInstances().RegisterPlugin( - ConstString(name), description.str().c_str(), create_callback, - debugger_init_callback); + name, description, create_callback, debugger_init_callback); } bool PluginManager::UnregisterPlugin( @@ -404,7 +400,7 @@ PluginManager::GetDynamicLoaderCreateCallbackAtIndex(uint32_t idx) { DynamicLoaderCreateInstance PluginManager::GetDynamicLoaderCreateCallbackForPluginName( llvm::StringRef name) { - return GetDynamicLoaderInstances().GetCallbackForName(ConstString(name)); + return GetDynamicLoaderInstances().GetCallbackForName(name); } #pragma mark JITLoader @@ -422,8 +418,7 @@ bool PluginManager::RegisterPlugin( JITLoaderCreateInstance create_callback, DebuggerInitializeCallback debugger_init_callback) { return GetJITLoaderInstances().RegisterPlugin( - ConstString(name), description.str().c_str(), create_callback, - debugger_init_callback); + name, description, create_callback, debugger_init_callback); } bool PluginManager::UnregisterPlugin(JITLoaderCreateInstance create_callback) { @@ -449,8 +444,8 @@ static EmulateInstructionInstances &GetEmulateInstructionInstances() { bool PluginManager::RegisterPlugin( llvm::StringRef name, llvm::StringRef description, EmulateInstructionCreateInstance create_callback) { - return GetEmulateInstructionInstances().RegisterPlugin( - ConstString(name), description.str().c_str(), create_callback); + return GetEmulateInstructionInstances().RegisterPlugin(name, description, + create_callback); } bool PluginManager::UnregisterPlugin( @@ -466,7 +461,7 @@ PluginManager::GetEmulateInstructionCreateCallbackAtIndex(uint32_t idx) { EmulateInstructionCreateInstance PluginManager::GetEmulateInstructionCreateCallbackForPluginName( llvm::StringRef name) { - return GetEmulateInstructionInstances().GetCallbackForName(ConstString(name)); + return GetEmulateInstructionInstances().GetCallbackForName(name); } #pragma mark OperatingSystem @@ -484,8 +479,7 @@ bool PluginManager::RegisterPlugin( OperatingSystemCreateInstance create_callback, DebuggerInitializeCallback debugger_init_callback) { return GetOperatingSystemInstances().RegisterPlugin( - ConstString(name), description.str().c_str(), create_callback, - debugger_init_callback); + name, description, create_callback, debugger_init_callback); } bool PluginManager::UnregisterPlugin( @@ -501,7 +495,7 @@ PluginManager::GetOperatingSystemCreateCallbackAtIndex(uint32_t idx) { OperatingSystemCreateInstance PluginManager::GetOperatingSystemCreateCallbackForPluginName( llvm::StringRef name) { - return GetOperatingSystemInstances().GetCallbackForName(ConstString(name)); + return GetOperatingSystemInstances().GetCallbackForName(name); } #pragma mark Language @@ -517,8 +511,8 @@ static LanguageInstances &GetLanguageInstances() { bool PluginManager::RegisterPlugin(llvm::StringRef name, llvm::StringRef description, LanguageCreateInstance create_callback) { - return GetLanguageInstances().RegisterPlugin( - ConstString(name), description.str().c_str(), create_callback); + return GetLanguageInstances().RegisterPlugin(name, description, + create_callback); } bool PluginManager::UnregisterPlugin(LanguageCreateInstance create_callback) { @@ -535,14 +529,13 @@ PluginManager::GetLanguageCreateCallbackAtIndex(uint32_t idx) { struct LanguageRuntimeInstance : public PluginInstance { LanguageRuntimeInstance( - ConstString name, std::string description, + llvm::StringRef name, llvm::StringRef description, CallbackType create_callback, DebuggerInitializeCallback debugger_init_callback, LanguageRuntimeGetCommandObject command_callback, LanguageRuntimeGetExceptionPrecondition precondition_callback) : PluginInstance( - name, std::move(description), create_callback, - debugger_init_callback), + name, description, create_callback, debugger_init_callback), command_callback(command_callback), precondition_callback(precondition_callback) {} @@ -563,8 +556,8 @@ bool PluginManager::RegisterPlugin( LanguageRuntimeGetCommandObject command_callback, LanguageRuntimeGetExceptionPrecondition precondition_callback) { return GetLanguageRuntimeInstances().RegisterPlugin( - ConstString(name), description.str().c_str(), create_callback, nullptr, - command_callback, precondition_callback); + name, description, create_callback, nullptr, command_callback, + precondition_callback); } bool PluginManager::UnregisterPlugin( @@ -606,8 +599,8 @@ static SystemRuntimeInstances &GetSystemRuntimeInstances() { bool PluginManager::RegisterPlugin( llvm::StringRef name, llvm::StringRef description, SystemRuntimeCreateInstance create_callback) { - return GetSystemRuntimeInstances().RegisterPlugin( - ConstString(name), description.str().c_str(), create_callback); + return GetSystemRuntimeInstances().RegisterPlugin(name, description, + create_callback); } bool PluginManager::UnregisterPlugin( @@ -624,11 +617,12 @@ PluginManager::GetSystemRuntimeCreateCallbackAtIndex(uint32_t idx) { struct ObjectFileInstance : public PluginInstance { ObjectFileInstance( - ConstString name, std::string description, CallbackType create_callback, + llvm::StringRef name, llvm::StringRef description, + CallbackType create_callback, ObjectFileCreateMemoryInstance create_memory_callback, ObjectFileGetModuleSpecifications get_module_specifications, ObjectFileSaveCore save_core) - : PluginInstance(name, std::move(description), + : PluginInstance(name, description, create_callback), create_memory_callback(create_memory_callback), get_module_specifications(get_module_specifications), @@ -652,8 +646,8 @@ bool PluginManager::RegisterPlugin( ObjectFileGetModuleSpecifications get_module_specifications, ObjectFileSaveCore save_core) { return GetObjectFileInstances().RegisterPlugin( - ConstString(name), description.str().c_str(), create_callback, - create_memory_callback, get_module_specifications, save_core); + name, description, create_callback, create_memory_callback, + get_module_specifications, save_core); } bool PluginManager::UnregisterPlugin(ObjectFileCreateInstance create_callback) { @@ -687,7 +681,7 @@ PluginManager::GetObjectFileCreateMemoryCallbackForPluginName( llvm::StringRef name) { const auto &instances = GetObjectFileInstances().GetInstances(); for (auto &instance : instances) { - if (instance.name.GetStringRef() == name) + if (instance.name == name) return instance.create_memory_callback; } return nullptr; @@ -710,7 +704,7 @@ Status PluginManager::SaveCore(const lldb::ProcessSP &process_sp, Status error; auto &instances = GetObjectFileInstances().GetInstances(); for (auto &instance : instances) { - if (plugin_name.empty() || instance.name.GetStringRef() == plugin_name) { + if (plugin_name.empty() || instance.name == plugin_name) { if (instance.save_core && instance.save_core(process_sp, outfile, core_style, error)) return error; @@ -726,10 +720,11 @@ Status PluginManager::SaveCore(const lldb::ProcessSP &process_sp, struct ObjectContainerInstance : public PluginInstance { ObjectContainerInstance( - ConstString name, std::string description, CallbackType create_callback, + llvm::StringRef name, llvm::StringRef description, + CallbackType create_callback, ObjectFileGetModuleSpecifications get_module_specifications) - : PluginInstance( - name, std::move(description), create_callback), + : PluginInstance(name, description, + create_callback), get_module_specifications(get_module_specifications) {} ObjectFileGetModuleSpecifications get_module_specifications; @@ -746,8 +741,7 @@ bool PluginManager::RegisterPlugin( ObjectContainerCreateInstance create_callback, ObjectFileGetModuleSpecifications get_module_specifications) { return GetObjectContainerInstances().RegisterPlugin( - ConstString(name), description.str().c_str(), create_callback, - get_module_specifications); + name, description, create_callback, get_module_specifications); } bool PluginManager::UnregisterPlugin( @@ -784,8 +778,7 @@ bool PluginManager::RegisterPlugin( PlatformCreateInstance create_callback, DebuggerInitializeCallback debugger_init_callback) { return GetPlatformInstances().RegisterPlugin( - ConstString(name), description.str().c_str(), create_callback, - debugger_init_callback); + name, description, create_callback, debugger_init_callback); } bool PluginManager::UnregisterPlugin(PlatformCreateInstance create_callback) { @@ -808,14 +801,14 @@ PluginManager::GetPlatformCreateCallbackAtIndex(uint32_t idx) { PlatformCreateInstance PluginManager::GetPlatformCreateCallbackForPluginName(llvm::StringRef name) { - return GetPlatformInstances().GetCallbackForName(ConstString(name)); + return GetPlatformInstances().GetCallbackForName(name); } void PluginManager::AutoCompletePlatformName(llvm::StringRef name, CompletionRequest &request) { for (const auto &instance : GetPlatformInstances().GetInstances()) { - if (instance.name.GetStringRef().startswith(name)) - request.AddCompletion(instance.name.GetCString()); + if (instance.name.startswith(name)) + request.AddCompletion(instance.name); } } @@ -834,8 +827,7 @@ bool PluginManager::RegisterPlugin( ProcessCreateInstance create_callback, DebuggerInitializeCallback debugger_init_callback) { return GetProcessInstances().RegisterPlugin( - ConstString(name), description.str().c_str(), create_callback, - debugger_init_callback); + name, description, create_callback, debugger_init_callback); } bool PluginManager::UnregisterPlugin(ProcessCreateInstance create_callback) { @@ -857,14 +849,14 @@ PluginManager::GetProcessCreateCallbackAtIndex(uint32_t idx) { ProcessCreateInstance PluginManager::GetProcessCreateCallbackForPluginName(llvm::StringRef name) { - return GetProcessInstances().GetCallbackForName(ConstString(name)); + return GetProcessInstances().GetCallbackForName(name); } void PluginManager::AutoCompleteProcessName(llvm::StringRef name, CompletionRequest &request) { for (const auto &instance : GetProcessInstances().GetInstances()) { - if (instance.name.GetStringRef().startswith(name)) - request.AddCompletion(instance.name.GetCString(), instance.description); + if (instance.name.startswith(name)) + request.AddCompletion(instance.name, instance.description); } } @@ -872,11 +864,11 @@ void PluginManager::AutoCompleteProcessName(llvm::StringRef name, struct ScriptInterpreterInstance : public PluginInstance { - ScriptInterpreterInstance(ConstString name, std::string description, + ScriptInterpreterInstance(llvm::StringRef name, llvm::StringRef description, CallbackType create_callback, lldb::ScriptLanguage language) - : PluginInstance( - name, std::move(description), create_callback), + : PluginInstance(name, description, + create_callback), language(language) {} lldb::ScriptLanguage language = lldb::eScriptLanguageNone; @@ -894,8 +886,7 @@ bool PluginManager::RegisterPlugin( lldb::ScriptLanguage script_language, ScriptInterpreterCreateInstance create_callback) { return GetScriptInterpreterInstances().RegisterPlugin( - ConstString(name), description.str().c_str(), create_callback, - script_language); + name, description, create_callback, script_language); } bool PluginManager::UnregisterPlugin( @@ -931,12 +922,12 @@ PluginManager::GetScriptInterpreterForLanguage(lldb::ScriptLanguage script_lang, struct StructuredDataPluginInstance : public PluginInstance { StructuredDataPluginInstance( - ConstString name, std::string description, CallbackType create_callback, + llvm::StringRef name, llvm::StringRef description, + CallbackType create_callback, DebuggerInitializeCallback debugger_init_callback, StructuredDataFilterLaunchInfo filter_callback) : PluginInstance( - name, std::move(description), create_callback, - debugger_init_callback), + name, description, create_callback, debugger_init_callback), filter_callback(filter_callback) {} StructuredDataFilterLaunchInfo filter_callback = nullptr; @@ -956,8 +947,8 @@ bool PluginManager::RegisterPlugin( DebuggerInitializeCallback debugger_init_callback, StructuredDataFilterLaunchInfo filter_callback) { return GetStructuredDataPluginInstances().RegisterPlugin( - ConstString(name), description.str().c_str(), create_callback, - debugger_init_callback, filter_callback); + name, description, create_callback, debugger_init_callback, + filter_callback); } bool PluginManager::UnregisterPlugin( @@ -998,8 +989,7 @@ bool PluginManager::RegisterPlugin( SymbolFileCreateInstance create_callback, DebuggerInitializeCallback debugger_init_callback) { return GetSymbolFileInstances().RegisterPlugin( - ConstString(name), description.str().c_str(), create_callback, - debugger_init_callback); + name, description, create_callback, debugger_init_callback); } bool PluginManager::UnregisterPlugin(SymbolFileCreateInstance create_callback) { @@ -1024,8 +1014,8 @@ static SymbolVendorInstances &GetSymbolVendorInstances() { bool PluginManager::RegisterPlugin(llvm::StringRef name, llvm::StringRef description, SymbolVendorCreateInstance create_callback) { - return GetSymbolVendorInstances().RegisterPlugin( - ConstString(name), description.str().c_str(), create_callback); + return GetSymbolVendorInstances().RegisterPlugin(name, description, + create_callback); } bool PluginManager::UnregisterPlugin( @@ -1043,12 +1033,12 @@ PluginManager::GetSymbolVendorCreateCallbackAtIndex(uint32_t idx) { struct TraceInstance : public PluginInstance { TraceInstance( - ConstString name, std::string description, + llvm::StringRef name, llvm::StringRef description, CallbackType create_callback_for_session_file, TraceCreateInstanceForLiveProcess create_callback_for_live_process, llvm::StringRef schema) : PluginInstance( - name, std::move(description), create_callback_for_session_file), + name, description, create_callback_for_session_file), schema(schema), create_callback_for_live_process(create_callback_for_live_process) {} @@ -1069,9 +1059,8 @@ bool PluginManager::RegisterPlugin( TraceCreateInstanceForLiveProcess create_callback_for_live_process, llvm::StringRef schema) { return GetTracePluginInstances().RegisterPlugin( - ConstString(name), description.str().c_str(), - create_callback_for_session_file, create_callback_for_live_process, - schema); + name, description, create_callback_for_session_file, + create_callback_for_live_process, schema); } bool PluginManager::UnregisterPlugin( @@ -1082,20 +1071,20 @@ bool PluginManager::UnregisterPlugin( TraceCreateInstanceForSessionFile PluginManager::GetTraceCreateCallback(llvm::StringRef plugin_name) { - return GetTracePluginInstances().GetCallbackForName(ConstString(plugin_name)); + return GetTracePluginInstances().GetCallbackForName(plugin_name); } TraceCreateInstanceForLiveProcess PluginManager::GetTraceCreateCallbackForLiveProcess(llvm::StringRef plugin_name) { for (const TraceInstance &instance : GetTracePluginInstances().GetInstances()) - if (instance.name.GetStringRef() == plugin_name) + if (instance.name == plugin_name) return instance.create_callback_for_live_process; return nullptr; } llvm::StringRef PluginManager::GetTraceSchema(llvm::StringRef plugin_name) { for (const TraceInstance &instance : GetTracePluginInstances().GetInstances()) - if (instance.name.GetStringRef() == plugin_name) + if (instance.name == plugin_name) return instance.schema; return llvm::StringRef(); } @@ -1112,11 +1101,11 @@ llvm::StringRef PluginManager::GetTraceSchema(size_t index) { struct TraceExporterInstance : public PluginInstance { TraceExporterInstance( - ConstString name, std::string description, + llvm::StringRef name, llvm::StringRef description, TraceExporterCreateInstance create_instance, ThreadTraceExportCommandCreator create_thread_trace_export_command) - : PluginInstance( - name, std::move(description), create_instance), + : PluginInstance(name, description, + create_instance), create_thread_trace_export_command(create_thread_trace_export_command) { } @@ -1135,14 +1124,12 @@ bool PluginManager::RegisterPlugin( TraceExporterCreateInstance create_callback, ThreadTraceExportCommandCreator create_thread_trace_export_command) { return GetTraceExporterInstances().RegisterPlugin( - ConstString(name), description.str().c_str(), create_callback, - create_thread_trace_export_command); + name, description, create_callback, create_thread_trace_export_command); } TraceExporterCreateInstance PluginManager::GetTraceExporterCreateCallback(llvm::StringRef plugin_name) { - return GetTraceExporterInstances().GetCallbackForName( - ConstString(plugin_name)); + return GetTraceExporterInstances().GetCallbackForName(plugin_name); } bool PluginManager::UnregisterPlugin( @@ -1176,8 +1163,8 @@ static UnwindAssemblyInstances &GetUnwindAssemblyInstances() { bool PluginManager::RegisterPlugin( llvm::StringRef name, llvm::StringRef description, UnwindAssemblyCreateInstance create_callback) { - return GetUnwindAssemblyInstances().RegisterPlugin( - ConstString(name), description.str().c_str(), create_callback); + return GetUnwindAssemblyInstances().RegisterPlugin(name, description, + create_callback); } bool PluginManager::UnregisterPlugin( @@ -1203,8 +1190,8 @@ static MemoryHistoryInstances &GetMemoryHistoryInstances() { bool PluginManager::RegisterPlugin( llvm::StringRef name, llvm::StringRef description, MemoryHistoryCreateInstance create_callback) { - return GetMemoryHistoryInstances().RegisterPlugin( - ConstString(name), description.str().c_str(), create_callback); + return GetMemoryHistoryInstances().RegisterPlugin(name, description, + create_callback); } bool PluginManager::UnregisterPlugin( @@ -1222,10 +1209,11 @@ PluginManager::GetMemoryHistoryCreateCallbackAtIndex(uint32_t idx) { struct InstrumentationRuntimeInstance : public PluginInstance { InstrumentationRuntimeInstance( - ConstString name, std::string description, CallbackType create_callback, + llvm::StringRef name, llvm::StringRef description, + CallbackType create_callback, InstrumentationRuntimeGetType get_type_callback) - : PluginInstance( - name, std::move(description), create_callback), + : PluginInstance(name, description, + create_callback), get_type_callback(get_type_callback) {} InstrumentationRuntimeGetType get_type_callback = nullptr; @@ -1244,8 +1232,7 @@ bool PluginManager::RegisterPlugin( InstrumentationRuntimeCreateInstance create_callback, InstrumentationRuntimeGetType get_type_callback) { return GetInstrumentationRuntimeInstances().RegisterPlugin( - ConstString(name), description.str().c_str(), create_callback, - get_type_callback); + name, description, create_callback, get_type_callback); } bool PluginManager::UnregisterPlugin( @@ -1269,11 +1256,11 @@ PluginManager::GetInstrumentationRuntimeCreateCallbackAtIndex(uint32_t idx) { #pragma mark TypeSystem struct TypeSystemInstance : public PluginInstance { - TypeSystemInstance(ConstString name, std::string description, + TypeSystemInstance(llvm::StringRef name, llvm::StringRef description, CallbackType create_callback, LanguageSet supported_languages_for_types, LanguageSet supported_languages_for_expressions) - : PluginInstance(name, std::move(description), + : PluginInstance(name, description, create_callback), supported_languages_for_types(supported_languages_for_types), supported_languages_for_expressions( @@ -1291,7 +1278,7 @@ static TypeSystemInstances &GetTypeSystemInstances() { } bool PluginManager::RegisterPlugin( - ConstString name, const char *description, + llvm::StringRef name, llvm::StringRef description, TypeSystemCreateInstance create_callback, LanguageSet supported_languages_for_types, LanguageSet supported_languages_for_expressions) { @@ -1328,10 +1315,9 @@ LanguageSet PluginManager::GetAllTypeSystemSupportedLanguagesForExpressions() { #pragma mark REPL struct REPLInstance : public PluginInstance { - REPLInstance(ConstString name, std::string description, + REPLInstance(llvm::StringRef name, llvm::StringRef description, CallbackType create_callback, LanguageSet supported_languages) - : PluginInstance(name, std::move(description), - create_callback), + : PluginInstance(name, description, create_callback), supported_languages(supported_languages) {} LanguageSet supported_languages; @@ -1344,7 +1330,7 @@ static REPLInstances &GetREPLInstances() { return g_instances; } -bool PluginManager::RegisterPlugin(ConstString name, const char *description, +bool PluginManager::RegisterPlugin(llvm::StringRef name, llvm::StringRef description, REPLCreateInstance create_callback, LanguageSet supported_languages) { return GetREPLInstances().RegisterPlugin(name, description, create_callback, diff --git a/lldb/source/Interpreter/OptionValuePathMappings.cpp b/lldb/source/Interpreter/OptionValuePathMappings.cpp index e6a366f39061d..543b0e1b8ea8a 100644 --- a/lldb/source/Interpreter/OptionValuePathMappings.cpp +++ b/lldb/source/Interpreter/OptionValuePathMappings.cpp @@ -62,10 +62,10 @@ Status OptionValuePathMappings::SetValueFromString(llvm::StringRef value, const char *orginal_path = args.GetArgumentAtIndex(i); const char *replace_path = args.GetArgumentAtIndex(i + 1); if (VerifyPathExists(replace_path)) { - ConstString a(orginal_path); - ConstString b(replace_path); - if (!m_path_mappings.Replace(a, b, idx, m_notify_changes)) - m_path_mappings.Append(a, b, m_notify_changes); + if (!m_path_mappings.Replace(orginal_path, replace_path, idx, + m_notify_changes)) + m_path_mappings.Append(orginal_path, replace_path, + m_notify_changes); changed = true; } else { std::string previousError = @@ -102,9 +102,7 @@ Status OptionValuePathMappings::SetValueFromString(llvm::StringRef value, const char *orginal_path = args.GetArgumentAtIndex(i); const char *replace_path = args.GetArgumentAtIndex(i + 1); if (VerifyPathExists(replace_path)) { - ConstString a(orginal_path); - ConstString b(replace_path); - m_path_mappings.Append(a, b, m_notify_changes); + m_path_mappings.Append(orginal_path, replace_path, m_notify_changes); m_value_was_set = true; changed = true; } else { @@ -139,9 +137,8 @@ Status OptionValuePathMappings::SetValueFromString(llvm::StringRef value, const char *orginal_path = args.GetArgumentAtIndex(i); const char *replace_path = args.GetArgumentAtIndex(i + 1); if (VerifyPathExists(replace_path)) { - ConstString a(orginal_path); - ConstString b(replace_path); - m_path_mappings.Insert(a, b, idx, m_notify_changes); + m_path_mappings.Insert(orginal_path, replace_path, idx, + m_notify_changes); changed = true; idx++; } else { diff --git a/lldb/source/Plugins/ABI/AArch64/ABIMacOSX_arm64.cpp b/lldb/source/Plugins/ABI/AArch64/ABIMacOSX_arm64.cpp index 2b5a038fbc91c..ccfbeec3d5891 100644 --- a/lldb/source/Plugins/ABI/AArch64/ABIMacOSX_arm64.cpp +++ b/lldb/source/Plugins/ABI/AArch64/ABIMacOSX_arm64.cpp @@ -402,7 +402,7 @@ bool ABIMacOSX_arm64::CreateDefaultUnwindPlan(UnwindPlan &unwind_plan) { // volatile (and specifically only the lower 8 bytes of these regs), the rest // of the fp/SIMD registers are volatile. // -// v. https://github.com/ARM-software/abi-aa/blob/master/aapcs64/ +// v. https://github.com/ARM-software/abi-aa/blob/main/aapcs64/ // We treat x29 as callee preserved also, else the unwinder won't try to // retrieve fp saves. diff --git a/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCRuntimeV2.cpp b/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCRuntimeV2.cpp index 240b458c21ea3..bd6b6335ca8c9 100644 --- a/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCRuntimeV2.cpp +++ b/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCRuntimeV2.cpp @@ -951,50 +951,70 @@ class CommandObjectMultiwordObjC_TaggedPointer_Info Process *process = m_exe_ctx.GetProcessPtr(); ExecutionContext exe_ctx(process); + ObjCLanguageRuntime *objc_runtime = ObjCLanguageRuntime::Get(*process); - if (objc_runtime) { - ObjCLanguageRuntime::TaggedPointerVendor *tagged_ptr_vendor = - objc_runtime->GetTaggedPointerVendor(); - if (tagged_ptr_vendor) { - for (size_t i = 0; i < command.GetArgumentCount(); i++) { - const char *arg_str = command.GetArgumentAtIndex(i); - if (!arg_str) - continue; - Status error; - lldb::addr_t arg_addr = OptionArgParser::ToAddress( - &exe_ctx, arg_str, LLDB_INVALID_ADDRESS, &error); - if (arg_addr == 0 || arg_addr == LLDB_INVALID_ADDRESS || error.Fail()) - continue; - auto descriptor_sp = tagged_ptr_vendor->GetClassDescriptor(arg_addr); - if (!descriptor_sp) - continue; - uint64_t info_bits = 0; - uint64_t value_bits = 0; - uint64_t payload = 0; - if (descriptor_sp->GetTaggedPointerInfo(&info_bits, &value_bits, - &payload)) { - result.GetOutputStream().Printf( - "0x%" PRIx64 " is tagged.\n\tpayload = 0x%" PRIx64 - "\n\tvalue = 0x%" PRIx64 "\n\tinfo bits = 0x%" PRIx64 - "\n\tclass = %s\n", - (uint64_t)arg_addr, payload, value_bits, info_bits, - descriptor_sp->GetClassName().AsCString("")); - } else { - result.GetOutputStream().Printf("0x%" PRIx64 " is not tagged.\n", - (uint64_t)arg_addr); - } - } - } else { - result.AppendError("current process has no tagged pointer support"); + if (!objc_runtime) { + result.AppendError("current process has no Objective-C runtime loaded"); + result.SetStatus(lldb::eReturnStatusFailed); + return false; + } + + ObjCLanguageRuntime::TaggedPointerVendor *tagged_ptr_vendor = + objc_runtime->GetTaggedPointerVendor(); + if (!tagged_ptr_vendor) { + result.AppendError("current process has no tagged pointer support"); + result.SetStatus(lldb::eReturnStatusFailed); + return false; + } + + for (size_t i = 0; i < command.GetArgumentCount(); i++) { + const char *arg_str = command.GetArgumentAtIndex(i); + if (!arg_str) + continue; + + Status error; + lldb::addr_t arg_addr = OptionArgParser::ToAddress( + &exe_ctx, arg_str, LLDB_INVALID_ADDRESS, &error); + if (arg_addr == 0 || arg_addr == LLDB_INVALID_ADDRESS || error.Fail()) { + result.AppendErrorWithFormatv( + "could not convert '{0}' to a valid address\n", arg_str); result.SetStatus(lldb::eReturnStatusFailed); return false; } - result.SetStatus(lldb::eReturnStatusSuccessFinishResult); - return true; + + if (!tagged_ptr_vendor->IsPossibleTaggedPointer(arg_addr)) { + result.GetOutputStream().Format("{0:x16} is not tagged\n", arg_addr); + continue; + } + + auto descriptor_sp = tagged_ptr_vendor->GetClassDescriptor(arg_addr); + if (!descriptor_sp) { + result.AppendErrorWithFormatv( + "could not get class descriptor for {0:x16}\n", arg_addr); + result.SetStatus(lldb::eReturnStatusFailed); + return false; + } + + uint64_t info_bits = 0; + uint64_t value_bits = 0; + uint64_t payload = 0; + if (descriptor_sp->GetTaggedPointerInfo(&info_bits, &value_bits, + &payload)) { + result.GetOutputStream().Format( + "{0:x} is tagged\n" + "\tpayload = {1:x16}\n" + "\tvalue = {2:x16}\n" + "\tinfo bits = {3:x16}\n" + "\tclass = {4}\n", + arg_addr, payload, value_bits, info_bits, + descriptor_sp->GetClassName().AsCString("")); + } else { + result.GetOutputStream().Format("{0:x16} is not tagged\n", arg_addr); + } } - result.AppendError("current process has no Objective-C runtime loaded"); - result.SetStatus(lldb::eReturnStatusFailed); - return false; + + result.SetStatus(lldb::eReturnStatusSuccessFinishResult); + return true; } }; diff --git a/lldb/source/Plugins/Platform/MacOSX/PlatformDarwin.cpp b/lldb/source/Plugins/Platform/MacOSX/PlatformDarwin.cpp index 88ee1618454f6..995b0703bc459 100644 --- a/lldb/source/Plugins/Platform/MacOSX/PlatformDarwin.cpp +++ b/lldb/source/Plugins/Platform/MacOSX/PlatformDarwin.cpp @@ -555,589 +555,133 @@ bool PlatformDarwin::x86GetSupportedArchitectureAtIndex(uint32_t idx, return false; } +static llvm::ArrayRef GetCompatibleArchs(ArchSpec::Core core) { + switch (core) { + default: + LLVM_FALLTHROUGH; + case ArchSpec::eCore_arm_arm64e: { + static const char *g_arm64e_compatible_archs[] = { + "arm64e", "arm64", "armv7", "armv7f", "armv7k", "armv7s", + "armv7m", "armv7em", "armv6m", "armv6", "armv5", "armv4", + "arm", "thumbv7", "thumbv7f", "thumbv7k", "thumbv7s", "thumbv7m", + "thumbv7em", "thumbv6m", "thumbv6", "thumbv5", "thumbv4t", "thumb", + }; + return {g_arm64e_compatible_archs}; + } + case ArchSpec::eCore_arm_arm64: { + static const char *g_arm64_compatible_archs[] = { + "arm64", "armv7", "armv7f", "armv7k", "armv7s", "armv7m", + "armv7em", "armv6m", "armv6", "armv5", "armv4", "arm", + "thumbv7", "thumbv7f", "thumbv7k", "thumbv7s", "thumbv7m", "thumbv7em", + "thumbv6m", "thumbv6", "thumbv5", "thumbv4t", "thumb", + }; + return {g_arm64_compatible_archs}; + } + case ArchSpec::eCore_arm_armv7: { + static const char *g_armv7_compatible_archs[] = { + "armv7", "armv6m", "armv6", "armv5", "armv4", "arm", + "thumbv7", "thumbv6m", "thumbv6", "thumbv5", "thumbv4t", "thumb", + }; + return {g_armv7_compatible_archs}; + } + case ArchSpec::eCore_arm_armv7f: { + static const char *g_armv7f_compatible_archs[] = { + "armv7f", "armv7", "armv6m", "armv6", "armv5", + "armv4", "arm", "thumbv7f", "thumbv7", "thumbv6m", + "thumbv6", "thumbv5", "thumbv4t", "thumb", + }; + return {g_armv7f_compatible_archs}; + } + case ArchSpec::eCore_arm_armv7k: { + static const char *g_armv7k_compatible_archs[] = { + "armv7k", "armv7", "armv6m", "armv6", "armv5", + "armv4", "arm", "thumbv7k", "thumbv7", "thumbv6m", + "thumbv6", "thumbv5", "thumbv4t", "thumb", + }; + return {g_armv7k_compatible_archs}; + } + case ArchSpec::eCore_arm_armv7s: { + static const char *g_armv7s_compatible_archs[] = { + "armv7s", "armv7", "armv6m", "armv6", "armv5", + "armv4", "arm", "thumbv7s", "thumbv7", "thumbv6m", + "thumbv6", "thumbv5", "thumbv4t", "thumb", + }; + return {g_armv7s_compatible_archs}; + } + case ArchSpec::eCore_arm_armv7m: { + static const char *g_armv7m_compatible_archs[] = { + "armv7m", "armv7", "armv6m", "armv6", "armv5", + "armv4", "arm", "thumbv7m", "thumbv7", "thumbv6m", + "thumbv6", "thumbv5", "thumbv4t", "thumb", + }; + return {g_armv7m_compatible_archs}; + } + case ArchSpec::eCore_arm_armv7em: { + static const char *g_armv7em_compatible_archs[] = { + "armv7em", "armv7", "armv6m", "armv6", "armv5", + "armv4", "arm", "thumbv7em", "thumbv7", "thumbv6m", + "thumbv6", "thumbv5", "thumbv4t", "thumb", + }; + return {g_armv7em_compatible_archs}; + } + case ArchSpec::eCore_arm_armv6m: { + static const char *g_armv6m_compatible_archs[] = { + "armv6m", "armv6", "armv5", "armv4", "arm", + "thumbv6m", "thumbv6", "thumbv5", "thumbv4t", "thumb", + }; + return {g_armv6m_compatible_archs}; + } + case ArchSpec::eCore_arm_armv6: { + static const char *g_armv6_compatible_archs[] = { + "armv6", "armv5", "armv4", "arm", + "thumbv6", "thumbv5", "thumbv4t", "thumb", + }; + return {g_armv6_compatible_archs}; + } + case ArchSpec::eCore_arm_armv5: { + static const char *g_armv5_compatible_archs[] = { + "armv5", "armv4", "arm", "thumbv5", "thumbv4t", "thumb", + }; + return {g_armv5_compatible_archs}; + } + case ArchSpec::eCore_arm_armv4: { + static const char *g_armv4_compatible_archs[] = { + "armv4", + "arm", + "thumbv4t", + "thumb", + }; + return {g_armv4_compatible_archs}; + } + } + return {}; +} + +const char *PlatformDarwin::GetCompatibleArch(ArchSpec::Core core, size_t idx) { + llvm::ArrayRef compatible_archs = GetCompatibleArchs(core); + if (!compatible_archs.data()) + return nullptr; + if (idx < compatible_archs.size()) + return compatible_archs[idx]; + return nullptr; +} + /// The architecture selection rules for arm processors These cpu subtypes have /// distinct names (e.g. armv7f) but armv7 binaries run fine on an armv7f /// processor. bool PlatformDarwin::ARMGetSupportedArchitectureAtIndex(uint32_t idx, ArchSpec &arch) { - ArchSpec system_arch(GetSystemArchitecture()); - -#if defined(TARGET_OS_TV) && TARGET_OS_TV == 1 -#define OSNAME "tvos" -#elif defined(TARGET_OS_WATCH) && TARGET_OS_WATCH == 1 -#define OSNAME "watchos" -#elif defined(TARGET_OS_BRIDGE) && TARGET_OS_BRIDGE == 1 -#define OSNAME "bridgeos" -#elif defined(TARGET_OS_OSX) && TARGET_OS_OSX == 1 -#define OSNAME "macosx" -#else -#define OSNAME "ios" -#endif - -#if TARGET_OS_OSX - if (IsHost()) { - if (idx == 0) { - arch.SetTriple("arm64e-apple-macosx"); - return true; - } else if (idx == 1) { - arch.SetTriple("arm64-apple-macosx"); - return true; - } - return false; - } -#endif - + const ArchSpec system_arch = GetSystemArchitecture(); const ArchSpec::Core system_core = system_arch.GetCore(); - switch (system_core) { - default: - switch (idx) { - case 0: - arch.SetTriple("arm64-apple-" OSNAME); - return true; - case 1: - arch.SetTriple("armv7-apple-" OSNAME); - return true; - case 2: - arch.SetTriple("armv7f-apple-" OSNAME); - return true; - case 3: - arch.SetTriple("armv7k-apple-" OSNAME); - return true; - case 4: - arch.SetTriple("armv7s-apple-" OSNAME); - return true; - case 5: - arch.SetTriple("armv7m-apple-" OSNAME); - return true; - case 6: - arch.SetTriple("armv7em-apple-" OSNAME); - return true; - case 7: - arch.SetTriple("armv6m-apple-" OSNAME); - return true; - case 8: - arch.SetTriple("armv6-apple-" OSNAME); - return true; - case 9: - arch.SetTriple("armv5-apple-" OSNAME); - return true; - case 10: - arch.SetTriple("armv4-apple-" OSNAME); - return true; - case 11: - arch.SetTriple("arm-apple-" OSNAME); - return true; - case 12: - arch.SetTriple("thumbv7-apple-" OSNAME); - return true; - case 13: - arch.SetTriple("thumbv7f-apple-" OSNAME); - return true; - case 14: - arch.SetTriple("thumbv7k-apple-" OSNAME); - return true; - case 15: - arch.SetTriple("thumbv7s-apple-" OSNAME); - return true; - case 16: - arch.SetTriple("thumbv7m-apple-" OSNAME); - return true; - case 17: - arch.SetTriple("thumbv7em-apple-" OSNAME); - return true; - case 18: - arch.SetTriple("thumbv6m-apple-" OSNAME); - return true; - case 19: - arch.SetTriple("thumbv6-apple-" OSNAME); - return true; - case 20: - arch.SetTriple("thumbv5-apple-" OSNAME); - return true; - case 21: - arch.SetTriple("thumbv4t-apple-" OSNAME); - return true; - case 22: - arch.SetTriple("thumb-apple-" OSNAME); - return true; - default: - break; - } - break; - - case ArchSpec::eCore_arm_arm64: - switch (idx) { - case 0: - arch.SetTriple("arm64-apple-" OSNAME); - return true; - case 1: - arch.SetTriple("armv7s-apple-" OSNAME); - return true; - case 2: - arch.SetTriple("armv7f-apple-" OSNAME); - return true; - case 3: - arch.SetTriple("armv7m-apple-" OSNAME); - return true; - case 4: - arch.SetTriple("armv7em-apple-" OSNAME); - return true; - case 5: - arch.SetTriple("armv7-apple-" OSNAME); - return true; - case 6: - arch.SetTriple("armv6m-apple-" OSNAME); - return true; - case 7: - arch.SetTriple("armv6-apple-" OSNAME); - return true; - case 8: - arch.SetTriple("armv5-apple-" OSNAME); - return true; - case 9: - arch.SetTriple("armv4-apple-" OSNAME); - return true; - case 10: - arch.SetTriple("arm-apple-" OSNAME); - return true; - case 11: - arch.SetTriple("thumbv7-apple-" OSNAME); - return true; - case 12: - arch.SetTriple("thumbv7f-apple-" OSNAME); - return true; - case 13: - arch.SetTriple("thumbv7k-apple-" OSNAME); - return true; - case 14: - arch.SetTriple("thumbv7s-apple-" OSNAME); - return true; - case 15: - arch.SetTriple("thumbv7m-apple-" OSNAME); - return true; - case 16: - arch.SetTriple("thumbv7em-apple-" OSNAME); - return true; - case 17: - arch.SetTriple("thumbv6m-apple-" OSNAME); - return true; - case 18: - arch.SetTriple("thumbv6-apple-" OSNAME); - return true; - case 19: - arch.SetTriple("thumbv5-apple-" OSNAME); - return true; - case 20: - arch.SetTriple("thumbv4t-apple-" OSNAME); - return true; - case 21: - arch.SetTriple("thumb-apple-" OSNAME); - return true; - default: - break; - } - break; - - case ArchSpec::eCore_arm_armv7f: - switch (idx) { - case 0: - arch.SetTriple("armv7f-apple-" OSNAME); - return true; - case 1: - arch.SetTriple("armv7-apple-" OSNAME); - return true; - case 2: - arch.SetTriple("armv6m-apple-" OSNAME); - return true; - case 3: - arch.SetTriple("armv6-apple-" OSNAME); - return true; - case 4: - arch.SetTriple("armv5-apple-" OSNAME); - return true; - case 5: - arch.SetTriple("armv4-apple-" OSNAME); - return true; - case 6: - arch.SetTriple("arm-apple-" OSNAME); - return true; - case 7: - arch.SetTriple("thumbv7f-apple-" OSNAME); - return true; - case 8: - arch.SetTriple("thumbv7-apple-" OSNAME); - return true; - case 9: - arch.SetTriple("thumbv6m-apple-" OSNAME); - return true; - case 10: - arch.SetTriple("thumbv6-apple-" OSNAME); - return true; - case 11: - arch.SetTriple("thumbv5-apple-" OSNAME); - return true; - case 12: - arch.SetTriple("thumbv4t-apple-" OSNAME); - return true; - case 13: - arch.SetTriple("thumb-apple-" OSNAME); - return true; - default: - break; - } - break; - - case ArchSpec::eCore_arm_armv7k: - switch (idx) { - case 0: - arch.SetTriple("armv7k-apple-" OSNAME); - return true; - case 1: - arch.SetTriple("armv7-apple-" OSNAME); - return true; - case 2: - arch.SetTriple("armv6m-apple-" OSNAME); - return true; - case 3: - arch.SetTriple("armv6-apple-" OSNAME); - return true; - case 4: - arch.SetTriple("armv5-apple-" OSNAME); - return true; - case 5: - arch.SetTriple("armv4-apple-" OSNAME); - return true; - case 6: - arch.SetTriple("arm-apple-" OSNAME); - return true; - case 7: - arch.SetTriple("thumbv7k-apple-" OSNAME); - return true; - case 8: - arch.SetTriple("thumbv7-apple-" OSNAME); - return true; - case 9: - arch.SetTriple("thumbv6m-apple-" OSNAME); - return true; - case 10: - arch.SetTriple("thumbv6-apple-" OSNAME); - return true; - case 11: - arch.SetTriple("thumbv5-apple-" OSNAME); - return true; - case 12: - arch.SetTriple("thumbv4t-apple-" OSNAME); - return true; - case 13: - arch.SetTriple("thumb-apple-" OSNAME); - return true; - default: - break; - } - break; - - case ArchSpec::eCore_arm_armv7s: - switch (idx) { - case 0: - arch.SetTriple("armv7s-apple-" OSNAME); - return true; - case 1: - arch.SetTriple("armv7-apple-" OSNAME); - return true; - case 2: - arch.SetTriple("armv6m-apple-" OSNAME); - return true; - case 3: - arch.SetTriple("armv6-apple-" OSNAME); - return true; - case 4: - arch.SetTriple("armv5-apple-" OSNAME); - return true; - case 5: - arch.SetTriple("armv4-apple-" OSNAME); - return true; - case 6: - arch.SetTriple("arm-apple-" OSNAME); - return true; - case 7: - arch.SetTriple("thumbv7s-apple-" OSNAME); - return true; - case 8: - arch.SetTriple("thumbv7-apple-" OSNAME); - return true; - case 9: - arch.SetTriple("thumbv6m-apple-" OSNAME); - return true; - case 10: - arch.SetTriple("thumbv6-apple-" OSNAME); - return true; - case 11: - arch.SetTriple("thumbv5-apple-" OSNAME); - return true; - case 12: - arch.SetTriple("thumbv4t-apple-" OSNAME); - return true; - case 13: - arch.SetTriple("thumb-apple-" OSNAME); - return true; - default: - break; - } - break; - - case ArchSpec::eCore_arm_armv7m: - switch (idx) { - case 0: - arch.SetTriple("armv7m-apple-" OSNAME); - return true; - case 1: - arch.SetTriple("armv7-apple-" OSNAME); - return true; - case 2: - arch.SetTriple("armv6m-apple-" OSNAME); - return true; - case 3: - arch.SetTriple("armv6-apple-" OSNAME); - return true; - case 4: - arch.SetTriple("armv5-apple-" OSNAME); - return true; - case 5: - arch.SetTriple("armv4-apple-" OSNAME); - return true; - case 6: - arch.SetTriple("arm-apple-" OSNAME); - return true; - case 7: - arch.SetTriple("thumbv7m-apple-" OSNAME); - return true; - case 8: - arch.SetTriple("thumbv7-apple-" OSNAME); - return true; - case 9: - arch.SetTriple("thumbv6m-apple-" OSNAME); - return true; - case 10: - arch.SetTriple("thumbv6-apple-" OSNAME); - return true; - case 11: - arch.SetTriple("thumbv5-apple-" OSNAME); - return true; - case 12: - arch.SetTriple("thumbv4t-apple-" OSNAME); - return true; - case 13: - arch.SetTriple("thumb-apple-" OSNAME); - return true; - default: - break; - } - break; - - case ArchSpec::eCore_arm_armv7em: - switch (idx) { - case 0: - arch.SetTriple("armv7em-apple-" OSNAME); - return true; - case 1: - arch.SetTriple("armv7-apple-" OSNAME); - return true; - case 2: - arch.SetTriple("armv6m-apple-" OSNAME); - return true; - case 3: - arch.SetTriple("armv6-apple-" OSNAME); - return true; - case 4: - arch.SetTriple("armv5-apple-" OSNAME); - return true; - case 5: - arch.SetTriple("armv4-apple-" OSNAME); - return true; - case 6: - arch.SetTriple("arm-apple-" OSNAME); - return true; - case 7: - arch.SetTriple("thumbv7em-apple-" OSNAME); - return true; - case 8: - arch.SetTriple("thumbv7-apple-" OSNAME); - return true; - case 9: - arch.SetTriple("thumbv6m-apple-" OSNAME); - return true; - case 10: - arch.SetTriple("thumbv6-apple-" OSNAME); - return true; - case 11: - arch.SetTriple("thumbv5-apple-" OSNAME); - return true; - case 12: - arch.SetTriple("thumbv4t-apple-" OSNAME); - return true; - case 13: - arch.SetTriple("thumb-apple-" OSNAME); - return true; - default: - break; - } - break; - - case ArchSpec::eCore_arm_armv7: - switch (idx) { - case 0: - arch.SetTriple("armv7-apple-" OSNAME); - return true; - case 1: - arch.SetTriple("armv6m-apple-" OSNAME); - return true; - case 2: - arch.SetTriple("armv6-apple-" OSNAME); - return true; - case 3: - arch.SetTriple("armv5-apple-" OSNAME); - return true; - case 4: - arch.SetTriple("armv4-apple-" OSNAME); - return true; - case 5: - arch.SetTriple("arm-apple-" OSNAME); - return true; - case 6: - arch.SetTriple("thumbv7-apple-" OSNAME); - return true; - case 7: - arch.SetTriple("thumbv6m-apple-" OSNAME); - return true; - case 8: - arch.SetTriple("thumbv6-apple-" OSNAME); - return true; - case 9: - arch.SetTriple("thumbv5-apple-" OSNAME); - return true; - case 10: - arch.SetTriple("thumbv4t-apple-" OSNAME); - return true; - case 11: - arch.SetTriple("thumb-apple-" OSNAME); - return true; - default: - break; - } - break; - - case ArchSpec::eCore_arm_armv6m: - switch (idx) { - case 0: - arch.SetTriple("armv6m-apple-" OSNAME); - return true; - case 1: - arch.SetTriple("armv6-apple-" OSNAME); - return true; - case 2: - arch.SetTriple("armv5-apple-" OSNAME); - return true; - case 3: - arch.SetTriple("armv4-apple-" OSNAME); - return true; - case 4: - arch.SetTriple("arm-apple-" OSNAME); - return true; - case 5: - arch.SetTriple("thumbv6m-apple-" OSNAME); - return true; - case 6: - arch.SetTriple("thumbv6-apple-" OSNAME); - return true; - case 7: - arch.SetTriple("thumbv5-apple-" OSNAME); - return true; - case 8: - arch.SetTriple("thumbv4t-apple-" OSNAME); - return true; - case 9: - arch.SetTriple("thumb-apple-" OSNAME); - return true; - default: - break; - } - break; - - case ArchSpec::eCore_arm_armv6: - switch (idx) { - case 0: - arch.SetTriple("armv6-apple-" OSNAME); - return true; - case 1: - arch.SetTriple("armv5-apple-" OSNAME); - return true; - case 2: - arch.SetTriple("armv4-apple-" OSNAME); - return true; - case 3: - arch.SetTriple("arm-apple-" OSNAME); - return true; - case 4: - arch.SetTriple("thumbv6-apple-" OSNAME); - return true; - case 5: - arch.SetTriple("thumbv5-apple-" OSNAME); - return true; - case 6: - arch.SetTriple("thumbv4t-apple-" OSNAME); - return true; - case 7: - arch.SetTriple("thumb-apple-" OSNAME); - return true; - default: - break; - } - break; - case ArchSpec::eCore_arm_armv5: - switch (idx) { - case 0: - arch.SetTriple("armv5-apple-" OSNAME); - return true; - case 1: - arch.SetTriple("armv4-apple-" OSNAME); - return true; - case 2: - arch.SetTriple("arm-apple-" OSNAME); - return true; - case 3: - arch.SetTriple("thumbv5-apple-" OSNAME); - return true; - case 4: - arch.SetTriple("thumbv4t-apple-" OSNAME); - return true; - case 5: - arch.SetTriple("thumb-apple-" OSNAME); - return true; - default: - break; - } - break; - - case ArchSpec::eCore_arm_armv4: - switch (idx) { - case 0: - arch.SetTriple("armv4-apple-" OSNAME); - return true; - case 1: - arch.SetTriple("arm-apple-" OSNAME); - return true; - case 2: - arch.SetTriple("thumbv4t-apple-" OSNAME); - return true; - case 3: - arch.SetTriple("thumb-apple-" OSNAME); - return true; - default: - break; - } - break; + if (const char *compatible_arch = GetCompatibleArch(system_core, idx)) { + llvm::Triple triple; + triple.setArchName(compatible_arch); + triple.setVendor(llvm::Triple::VendorType::Apple); + arch.SetTriple(triple); + return true; } + arch.Clear(); return false; } diff --git a/lldb/source/Plugins/Platform/MacOSX/PlatformDarwin.h b/lldb/source/Plugins/Platform/MacOSX/PlatformDarwin.h index c3862f14a040c..28f257300571e 100644 --- a/lldb/source/Plugins/Platform/MacOSX/PlatformDarwin.h +++ b/lldb/source/Plugins/Platform/MacOSX/PlatformDarwin.h @@ -103,6 +103,9 @@ class PlatformDarwin : public PlatformPOSIX { static lldb_private::FileSpec GetCurrentCommandLineToolsDirectory(); protected: + static const char *GetCompatibleArch(lldb_private::ArchSpec::Core core, + size_t idx); + struct CrashInfoAnnotations { uint64_t version; // unsigned long uint64_t message; // char * diff --git a/lldb/source/Plugins/Process/Windows/Common/NativeRegisterContextWindows_arm64.cpp b/lldb/source/Plugins/Process/Windows/Common/NativeRegisterContextWindows_arm64.cpp index 1294928e09a5d..fc65945723218 100644 --- a/lldb/source/Plugins/Process/Windows/Common/NativeRegisterContextWindows_arm64.cpp +++ b/lldb/source/Plugins/Process/Windows/Common/NativeRegisterContextWindows_arm64.cpp @@ -220,7 +220,7 @@ Status NativeRegisterContextWindows_arm64::GPRRead(const uint32_t reg, reg_value.SetUInt64(tls_context.Pc); break; case gpr_cpsr_arm64: - reg_value.SetUInt64(tls_context.Cpsr); + reg_value.SetUInt32(tls_context.Cpsr); break; case gpr_w0_arm64: @@ -317,7 +317,7 @@ NativeRegisterContextWindows_arm64::GPRWrite(const uint32_t reg, tls_context.Pc = reg_value.GetAsUInt64(); break; case gpr_cpsr_arm64: - tls_context.Cpsr = reg_value.GetAsUInt64(); + tls_context.Cpsr = reg_value.GetAsUInt32(); break; case gpr_w0_arm64: diff --git a/lldb/source/Plugins/Process/Windows/Common/ProcessWindows.cpp b/lldb/source/Plugins/Process/Windows/Common/ProcessWindows.cpp index 97ba1bdc9ee9e..f6e89bb0662a3 100644 --- a/lldb/source/Plugins/Process/Windows/Common/ProcessWindows.cpp +++ b/lldb/source/Plugins/Process/Windows/Common/ProcessWindows.cpp @@ -601,8 +601,8 @@ Status ProcessWindows::DoDeallocateMemory(lldb::addr_t ptr) { return ProcessDebugger::DeallocateMemory(ptr); } -Status ProcessWindows::GetMemoryRegionInfo(lldb::addr_t vm_addr, - MemoryRegionInfo &info) { +Status ProcessWindows::DoGetMemoryRegionInfo(lldb::addr_t vm_addr, + MemoryRegionInfo &info) { return ProcessDebugger::GetMemoryRegionInfo(vm_addr, info); } diff --git a/lldb/source/Plugins/Process/Windows/Common/ProcessWindows.h b/lldb/source/Plugins/Process/Windows/Common/ProcessWindows.h index fc83649818edd..6f6f93f588e33 100644 --- a/lldb/source/Plugins/Process/Windows/Common/ProcessWindows.h +++ b/lldb/source/Plugins/Process/Windows/Common/ProcessWindows.h @@ -78,8 +78,6 @@ class ProcessWindows : public Process, public ProcessDebugger { lldb::addr_t DoAllocateMemory(size_t size, uint32_t permissions, Status &error) override; Status DoDeallocateMemory(lldb::addr_t ptr) override; - Status GetMemoryRegionInfo(lldb::addr_t vm_addr, - MemoryRegionInfo &info) override; lldb::addr_t GetImageInfoAddress() override; @@ -103,6 +101,10 @@ class ProcessWindows : public Process, public ProcessDebugger { Status EnableWatchpoint(Watchpoint *wp, bool notify = true) override; Status DisableWatchpoint(Watchpoint *wp, bool notify = true) override; +protected: + Status DoGetMemoryRegionInfo(lldb::addr_t vm_addr, + MemoryRegionInfo &info) override; + private: struct WatchpointInfo { uint32_t slot_id; diff --git a/lldb/source/Plugins/Process/Windows/Common/arm64/RegisterContextWindows_arm64.cpp b/lldb/source/Plugins/Process/Windows/Common/arm64/RegisterContextWindows_arm64.cpp index 10bab7d37561d..3ce288597c86a 100644 --- a/lldb/source/Plugins/Process/Windows/Common/arm64/RegisterContextWindows_arm64.cpp +++ b/lldb/source/Plugins/Process/Windows/Common/arm64/RegisterContextWindows_arm64.cpp @@ -171,7 +171,7 @@ bool RegisterContextWindows_arm64::ReadRegister(const RegisterInfo *reg_info, reg_value.SetUInt64(m_context.Pc); break; case gpr_cpsr: - reg_value.SetUInt64(m_context.Cpsr); + reg_value.SetUInt32(m_context.Cpsr); break; case gpr_w0: @@ -385,7 +385,7 @@ bool RegisterContextWindows_arm64::WriteRegister( m_context.Pc = reg_value.GetAsUInt64(); break; case gpr_cpsr: - m_context.Cpsr = reg_value.GetAsUInt64(); + m_context.Cpsr = reg_value.GetAsUInt32(); break; case fpu_v0: diff --git a/lldb/source/Plugins/Process/elf-core/ProcessElfCore.cpp b/lldb/source/Plugins/Process/elf-core/ProcessElfCore.cpp index b852a01643753..23b346d5c17f1 100644 --- a/lldb/source/Plugins/Process/elf-core/ProcessElfCore.cpp +++ b/lldb/source/Plugins/Process/elf-core/ProcessElfCore.cpp @@ -281,8 +281,8 @@ size_t ProcessElfCore::ReadMemory(lldb::addr_t addr, void *buf, size_t size, return DoReadMemory(addr, buf, size, error); } -Status ProcessElfCore::GetMemoryRegionInfo(lldb::addr_t load_addr, - MemoryRegionInfo ®ion_info) { +Status ProcessElfCore::DoGetMemoryRegionInfo(lldb::addr_t load_addr, + MemoryRegionInfo ®ion_info) { region_info.Clear(); const VMRangeToPermissions::Entry *permission_entry = m_core_range_infos.FindEntryThatContainsOrFollows(load_addr); diff --git a/lldb/source/Plugins/Process/elf-core/ProcessElfCore.h b/lldb/source/Plugins/Process/elf-core/ProcessElfCore.h index 67df3c5fac76a..fd36e50278168 100644 --- a/lldb/source/Plugins/Process/elf-core/ProcessElfCore.h +++ b/lldb/source/Plugins/Process/elf-core/ProcessElfCore.h @@ -86,10 +86,6 @@ class ProcessElfCore : public lldb_private::PostMortemProcess { size_t DoReadMemory(lldb::addr_t addr, void *buf, size_t size, lldb_private::Status &error) override; - lldb_private::Status - GetMemoryRegionInfo(lldb::addr_t load_addr, - lldb_private::MemoryRegionInfo ®ion_info) override; - lldb::addr_t GetImageInfoAddress() override; lldb_private::ArchSpec GetArchitecture(); @@ -105,6 +101,10 @@ class ProcessElfCore : public lldb_private::PostMortemProcess { bool DoUpdateThreadList(lldb_private::ThreadList &old_thread_list, lldb_private::ThreadList &new_thread_list) override; + lldb_private::Status + DoGetMemoryRegionInfo(lldb::addr_t load_addr, + lldb_private::MemoryRegionInfo ®ion_info) override; + private: struct NT_FILE_Entry { lldb::addr_t start; diff --git a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationClient.cpp b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationClient.cpp index 6fd51126762eb..364a37a0cc283 100644 --- a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationClient.cpp +++ b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationClient.cpp @@ -1109,9 +1109,8 @@ void GDBRemoteCommunicationClient::MaybeEnableCompression( if (avail_type != CompressionType::None) { StringExtractorGDBRemote response; - llvm::Twine packet = "QEnableCompression:type:" + avail_name + ";"; - if (SendPacketAndWaitForResponse(packet.str(), response) != - PacketResult::Success) + std::string packet = "QEnableCompression:type:" + avail_name.str() + ";"; + if (SendPacketAndWaitForResponse(packet, response) != PacketResult::Success) return; if (response.IsOKResponse()) { diff --git a/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.cpp b/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.cpp index 4f78ae428147c..ce97240e8d905 100644 --- a/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.cpp +++ b/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.cpp @@ -2903,8 +2903,8 @@ lldb::addr_t ProcessGDBRemote::DoAllocateMemory(size_t size, return allocated_addr; } -Status ProcessGDBRemote::GetMemoryRegionInfo(addr_t load_addr, - MemoryRegionInfo ®ion_info) { +Status ProcessGDBRemote::DoGetMemoryRegionInfo(addr_t load_addr, + MemoryRegionInfo ®ion_info) { Status error(m_gdb_comm.GetMemoryRegionInfo(load_addr, region_info)); return error; diff --git a/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.h b/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.h index 9e0583408e42a..8134bc6b530d6 100644 --- a/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.h +++ b/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.h @@ -144,9 +144,6 @@ class ProcessGDBRemote : public Process, lldb::addr_t DoAllocateMemory(size_t size, uint32_t permissions, Status &error) override; - Status GetMemoryRegionInfo(lldb::addr_t load_addr, - MemoryRegionInfo ®ion_info) override; - Status DoDeallocateMemory(lldb::addr_t ptr) override; // Process STDIO @@ -424,6 +421,9 @@ class ProcessGDBRemote : public Process, Status DoWriteMemoryTags(lldb::addr_t addr, size_t len, int32_t type, const std::vector &tags) override; + Status DoGetMemoryRegionInfo(lldb::addr_t load_addr, + MemoryRegionInfo ®ion_info) override; + private: // For ProcessGDBRemote only std::string m_partial_profile_data; diff --git a/lldb/source/Plugins/Process/mach-core/ProcessMachCore.cpp b/lldb/source/Plugins/Process/mach-core/ProcessMachCore.cpp index 6aed04565eb01..59c04590672d3 100644 --- a/lldb/source/Plugins/Process/mach-core/ProcessMachCore.cpp +++ b/lldb/source/Plugins/Process/mach-core/ProcessMachCore.cpp @@ -633,8 +633,8 @@ size_t ProcessMachCore::DoReadMemory(addr_t addr, void *buf, size_t size, return bytes_read; } -Status ProcessMachCore::GetMemoryRegionInfo(addr_t load_addr, - MemoryRegionInfo ®ion_info) { +Status ProcessMachCore::DoGetMemoryRegionInfo(addr_t load_addr, + MemoryRegionInfo ®ion_info) { region_info.Clear(); const VMRangeToPermissions::Entry *permission_entry = m_core_range_infos.FindEntryThatContainsOrFollows(load_addr); diff --git a/lldb/source/Plugins/Process/mach-core/ProcessMachCore.h b/lldb/source/Plugins/Process/mach-core/ProcessMachCore.h index e55bfcfcc723b..b5ca515a7d3fb 100644 --- a/lldb/source/Plugins/Process/mach-core/ProcessMachCore.h +++ b/lldb/source/Plugins/Process/mach-core/ProcessMachCore.h @@ -68,10 +68,6 @@ class ProcessMachCore : public lldb_private::PostMortemProcess { size_t DoReadMemory(lldb::addr_t addr, void *buf, size_t size, lldb_private::Status &error) override; - lldb_private::Status - GetMemoryRegionInfo(lldb::addr_t load_addr, - lldb_private::MemoryRegionInfo ®ion_info) override; - lldb::addr_t GetImageInfoAddress() override; protected: @@ -84,6 +80,10 @@ class ProcessMachCore : public lldb_private::PostMortemProcess { lldb_private::ObjectFile *GetCoreObjectFile(); + lldb_private::Status + DoGetMemoryRegionInfo(lldb::addr_t load_addr, + lldb_private::MemoryRegionInfo ®ion_info) override; + private: bool GetDynamicLoaderAddress(lldb::addr_t addr); diff --git a/lldb/source/Plugins/Process/minidump/ProcessMinidump.cpp b/lldb/source/Plugins/Process/minidump/ProcessMinidump.cpp index 08cf58b0a84a1..736cfa0700886 100644 --- a/lldb/source/Plugins/Process/minidump/ProcessMinidump.cpp +++ b/lldb/source/Plugins/Process/minidump/ProcessMinidump.cpp @@ -439,8 +439,8 @@ void ProcessMinidump::BuildMemoryRegions() { llvm::sort(*m_memory_regions); } -Status ProcessMinidump::GetMemoryRegionInfo(lldb::addr_t load_addr, - MemoryRegionInfo ®ion) { +Status ProcessMinidump::DoGetMemoryRegionInfo(lldb::addr_t load_addr, + MemoryRegionInfo ®ion) { BuildMemoryRegions(); region = MinidumpParser::GetMemoryRegionInfo(*m_memory_regions, load_addr); return Status(); diff --git a/lldb/source/Plugins/Process/minidump/ProcessMinidump.h b/lldb/source/Plugins/Process/minidump/ProcessMinidump.h index 3501d38a0f27e..5360269199cdd 100644 --- a/lldb/source/Plugins/Process/minidump/ProcessMinidump.h +++ b/lldb/source/Plugins/Process/minidump/ProcessMinidump.h @@ -75,9 +75,6 @@ class ProcessMinidump : public PostMortemProcess { ArchSpec GetArchitecture(); - Status GetMemoryRegionInfo(lldb::addr_t load_addr, - MemoryRegionInfo &range_info) override; - Status GetMemoryRegions( lldb_private::MemoryRegionInfos ®ion_list) override; @@ -98,6 +95,9 @@ class ProcessMinidump : public PostMortemProcess { bool DoUpdateThreadList(ThreadList &old_thread_list, ThreadList &new_thread_list) override; + Status DoGetMemoryRegionInfo(lldb::addr_t load_addr, + MemoryRegionInfo &range_info) override; + void ReadModuleList(); lldb::ModuleSP GetOrCreateModule(lldb_private::UUID minidump_uuid, diff --git a/lldb/source/Plugins/Process/scripted/ScriptedProcess.cpp b/lldb/source/Plugins/Process/scripted/ScriptedProcess.cpp index c0eefbf60337c..63c68c2a20236 100644 --- a/lldb/source/Plugins/Process/scripted/ScriptedProcess.cpp +++ b/lldb/source/Plugins/Process/scripted/ScriptedProcess.cpp @@ -249,8 +249,8 @@ ArchSpec ScriptedProcess::GetArchitecture() { return GetTarget().GetArchitecture(); } -Status ScriptedProcess::GetMemoryRegionInfo(lldb::addr_t load_addr, - MemoryRegionInfo ®ion) { +Status ScriptedProcess::DoGetMemoryRegionInfo(lldb::addr_t load_addr, + MemoryRegionInfo ®ion) { CheckInterpreterAndScriptObject(); Status error; diff --git a/lldb/source/Plugins/Process/scripted/ScriptedProcess.h b/lldb/source/Plugins/Process/scripted/ScriptedProcess.h index 68cc6aa372104..fd4a94b9b6adb 100644 --- a/lldb/source/Plugins/Process/scripted/ScriptedProcess.h +++ b/lldb/source/Plugins/Process/scripted/ScriptedProcess.h @@ -86,9 +86,6 @@ class ScriptedProcess : public Process { ArchSpec GetArchitecture(); - Status GetMemoryRegionInfo(lldb::addr_t load_addr, - MemoryRegionInfo &range_info) override; - Status GetMemoryRegions(lldb_private::MemoryRegionInfos ®ion_list) override; @@ -102,6 +99,9 @@ class ScriptedProcess : public Process { bool DoUpdateThreadList(ThreadList &old_thread_list, ThreadList &new_thread_list) override; + Status DoGetMemoryRegionInfo(lldb::addr_t load_addr, + MemoryRegionInfo &range_info) override; + private: friend class ScriptedThread; diff --git a/lldb/source/Plugins/REPL/Clang/ClangREPL.h b/lldb/source/Plugins/REPL/Clang/ClangREPL.h index 3666a53a2ce39..07b7f73b1fafb 100644 --- a/lldb/source/Plugins/REPL/Clang/ClangREPL.h +++ b/lldb/source/Plugins/REPL/Clang/ClangREPL.h @@ -28,9 +28,7 @@ class ClangREPL : public REPL { Debugger *debugger, Target *target, const char *repl_options); - static lldb_private::ConstString GetPluginNameStatic() { - return ConstString("ClangREPL"); - } + static llvm::StringRef GetPluginNameStatic() { return "ClangREPL"; } protected: Status DoInitialization() override; diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp index b6b99d2e9bcf6..4ac6e165dda37 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp @@ -843,7 +843,8 @@ TypeSP DWARFASTParserClang::ParseEnum(const SymbolContext &sc, } clang_type = m_ast.CreateEnumerationType( - attrs.name.GetCString(), GetClangDeclContextContainingDIE(die, nullptr), + attrs.name.GetStringRef(), + GetClangDeclContextContainingDIE(die, nullptr), GetOwningClangModule(die), attrs.decl, enumerator_clang_type, attrs.is_scoped_enum); } else { diff --git a/lldb/source/Plugins/SymbolFile/NativePDB/PdbAstBuilder.cpp b/lldb/source/Plugins/SymbolFile/NativePDB/PdbAstBuilder.cpp index 8856c7e72e08a..c29fc2230a674 100644 --- a/lldb/source/Plugins/SymbolFile/NativePDB/PdbAstBuilder.cpp +++ b/lldb/source/Plugins/SymbolFile/NativePDB/PdbAstBuilder.cpp @@ -1105,7 +1105,7 @@ clang::QualType PdbAstBuilder::CreateEnumType(PdbTypeSymId id, Declaration declaration; CompilerType enum_ct = m_clang.CreateEnumerationType( - uname.c_str(), decl_context, OptionalClangModuleID(), declaration, + uname, decl_context, OptionalClangModuleID(), declaration, ToCompilerType(underlying_type), er.isScoped()); TypeSystemClang::StartTagDeclarationDefinition(enum_ct); diff --git a/lldb/source/Plugins/SymbolFile/PDB/PDBASTParser.cpp b/lldb/source/Plugins/SymbolFile/PDB/PDBASTParser.cpp index 78a0d09a681ae..f45287fd0fff0 100644 --- a/lldb/source/Plugins/SymbolFile/PDB/PDBASTParser.cpp +++ b/lldb/source/Plugins/SymbolFile/PDB/PDBASTParser.cpp @@ -497,7 +497,7 @@ lldb::TypeSP PDBASTParser::CreateLLDBTypeFromPDBType(const PDBSymbol &type) { // Class). Set it false for now. bool isScoped = false; - ast_enum = m_ast.CreateEnumerationType(name.c_str(), decl_context, + ast_enum = m_ast.CreateEnumerationType(name, decl_context, OptionalClangModuleID(), decl, builtin_type, isScoped); diff --git a/lldb/source/Plugins/SymbolVendor/MacOSX/SymbolVendorMacOSX.cpp b/lldb/source/Plugins/SymbolVendor/MacOSX/SymbolVendorMacOSX.cpp index 6e97f3d440f3f..d9f4174b19a3c 100644 --- a/lldb/source/Plugins/SymbolVendor/MacOSX/SymbolVendorMacOSX.cpp +++ b/lldb/source/Plugins/SymbolVendor/MacOSX/SymbolVendorMacOSX.cpp @@ -239,7 +239,7 @@ SymbolVendorMacOSX::CreateInstance(const lldb::ModuleSP &module_sp, DBGSourcePath = resolved_source_path.GetPath(); } module_sp->GetSourceMappingList().Append( - key, ConstString(DBGSourcePath), true); + key.GetStringRef(), DBGSourcePath, true); // With version 2 of DBGSourcePathRemapping, we // can chop off the last two filename parts // from the source remapping and get a more @@ -254,8 +254,7 @@ SymbolVendorMacOSX::CreateInstance(const lldb::ModuleSP &module_sp, source_path.RemoveLastPathComponent(); source_path.RemoveLastPathComponent(); module_sp->GetSourceMappingList().Append( - ConstString(build_path.GetPath().c_str()), - ConstString(source_path.GetPath().c_str()), + build_path.GetPath(), source_path.GetPath(), true); } } @@ -276,8 +275,7 @@ SymbolVendorMacOSX::CreateInstance(const lldb::ModuleSP &module_sp, DBGSourcePath = resolved_source_path.GetPath(); } module_sp->GetSourceMappingList().Append( - ConstString(DBGBuildSourcePath), - ConstString(DBGSourcePath), true); + DBGBuildSourcePath, DBGSourcePath, true); } } } diff --git a/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp b/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp index 9beccf30f94be..077104b8fe857 100644 --- a/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp +++ b/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp @@ -570,10 +570,6 @@ TypeSystemClang::TypeSystemClang(llvm::StringRef name, // Destructor TypeSystemClang::~TypeSystemClang() { Finalize(); } -ConstString TypeSystemClang::GetPluginNameStatic() { - return ConstString("clang"); -} - lldb::TypeSystemSP TypeSystemClang::CreateInstance(lldb::LanguageType language, lldb_private::Module *module, Target *target) { @@ -1331,19 +1327,16 @@ CompilerType TypeSystemClang::CreateRecordType( decl->setAnonymousStructOrUnion(true); } - if (decl) { - if (metadata) - SetMetadata(decl, *metadata); + if (metadata) + SetMetadata(decl, *metadata); - if (access_type != eAccessNone) - decl->setAccess(ConvertAccessTypeToAccessSpecifier(access_type)); + if (access_type != eAccessNone) + decl->setAccess(ConvertAccessTypeToAccessSpecifier(access_type)); - if (decl_ctx) - decl_ctx->addDecl(decl); + if (decl_ctx) + decl_ctx->addDecl(decl); - return GetType(ast.getTagDeclType(decl)); - } - return CompilerType(); + return GetType(ast.getTagDeclType(decl)); } namespace { @@ -1550,7 +1543,7 @@ static bool ClassTemplateAllowsToInstantiationArgs( ClassTemplateDecl *TypeSystemClang::CreateClassTemplateDecl( DeclContext *decl_ctx, OptionalClangModuleID owning_module, - lldb::AccessType access_type, const char *class_name, int kind, + lldb::AccessType access_type, llvm::StringRef class_name, int kind, const TemplateParameterInfos &template_param_infos) { ASTContext &ast = getASTContext(); @@ -1609,15 +1602,13 @@ ClassTemplateDecl *TypeSystemClang::CreateClassTemplateDecl( template_cxx_decl->setDescribedClassTemplate(class_template_decl); SetOwningModule(class_template_decl, owning_module); - if (class_template_decl) { - if (access_type != eAccessNone) - class_template_decl->setAccess( - ConvertAccessTypeToAccessSpecifier(access_type)); + if (access_type != eAccessNone) + class_template_decl->setAccess( + ConvertAccessTypeToAccessSpecifier(access_type)); - decl_ctx->addDecl(class_template_decl); + decl_ctx->addDecl(class_template_decl); - VerifyDecl(class_template_decl); - } + VerifyDecl(class_template_decl); return class_template_decl; } @@ -1807,7 +1798,7 @@ CompilerType TypeSystemClang::CreateObjCClass( decl->setImplicit(isInternal); SetOwningModule(decl, owning_module); - if (decl && metadata) + if (metadata) SetMetadata(decl, *metadata); return GetType(ast.getObjCInterfaceType(decl)); @@ -2145,8 +2136,7 @@ FunctionDecl *TypeSystemClang::CreateFunctionDeclaration( ? ConstexprSpecKind::Constexpr : ConstexprSpecKind::Unspecified); SetOwningModule(func_decl, owning_module); - if (func_decl) - decl_ctx->addDecl(func_decl); + decl_ctx->addDecl(func_decl); VerifyDecl(func_decl); @@ -2292,7 +2282,7 @@ CompilerType TypeSystemClang::GetOrCreateStructForIdentifier( #pragma mark Enumeration Types CompilerType TypeSystemClang::CreateEnumerationType( - const char *name, clang::DeclContext *decl_ctx, + llvm::StringRef name, clang::DeclContext *decl_ctx, OptionalClangModuleID owning_module, const Declaration &decl, const CompilerType &integer_clang_type, bool is_scoped) { // TODO: Do something intelligent with the Declaration object passed in @@ -2303,24 +2293,21 @@ CompilerType TypeSystemClang::CreateEnumerationType( // const bool IsFixed = false; EnumDecl *enum_decl = EnumDecl::CreateDeserialized(ast, 0); enum_decl->setDeclContext(decl_ctx); - if (name && name[0]) + if (!name.empty()) enum_decl->setDeclName(&ast.Idents.get(name)); enum_decl->setScoped(is_scoped); enum_decl->setScopedUsingClassTag(is_scoped); enum_decl->setFixed(false); SetOwningModule(enum_decl, owning_module); - if (enum_decl) { - if (decl_ctx) - decl_ctx->addDecl(enum_decl); + if (decl_ctx) + decl_ctx->addDecl(enum_decl); - // TODO: check if we should be setting the promotion type too? - enum_decl->setIntegerType(ClangUtil::GetQualType(integer_clang_type)); + // TODO: check if we should be setting the promotion type too? + enum_decl->setIntegerType(ClangUtil::GetQualType(integer_clang_type)); - enum_decl->setAccess(AS_public); // TODO respect what's in the debug info + enum_decl->setAccess(AS_public); // TODO respect what's in the debug info - return GetType(ast.getTagDeclType(enum_decl)); - } - return CompilerType(); + return GetType(ast.getTagDeclType(enum_decl)); } CompilerType TypeSystemClang::GetIntTypeFromBitSize(size_t bit_size, diff --git a/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.h b/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.h index 93300439806d4..f3a07397ec440 100644 --- a/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.h +++ b/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.h @@ -138,11 +138,9 @@ class TypeSystemClang : public TypeSystem { void Finalize() override; // PluginInterface functions - llvm::StringRef GetPluginName() override { - return GetPluginNameStatic().GetStringRef(); - } + llvm::StringRef GetPluginName() override { return GetPluginNameStatic(); } - static ConstString GetPluginNameStatic(); + static llvm::StringRef GetPluginNameStatic() { return "clang"; } static lldb::TypeSystemSP CreateInstance(lldb::LanguageType language, Module *module, Target *target); @@ -345,11 +343,10 @@ class TypeSystemClang : public TypeSystem { clang::FunctionDecl *func_decl, clang::FunctionTemplateDecl *Template, const TemplateParameterInfos &infos); - clang::ClassTemplateDecl * - CreateClassTemplateDecl(clang::DeclContext *decl_ctx, - OptionalClangModuleID owning_module, - lldb::AccessType access_type, const char *class_name, - int kind, const TemplateParameterInfos &infos); + clang::ClassTemplateDecl *CreateClassTemplateDecl( + clang::DeclContext *decl_ctx, OptionalClangModuleID owning_module, + lldb::AccessType access_type, llvm::StringRef class_name, int kind, + const TemplateParameterInfos &infos); clang::TemplateTemplateParmDecl * CreateTemplateTemplateParmDecl(const char *template_name); @@ -419,7 +416,7 @@ class TypeSystemClang : public TypeSystem { size_t element_count, bool is_vector); // Enumeration Types - CompilerType CreateEnumerationType(const char *name, + CompilerType CreateEnumerationType(llvm::StringRef name, clang::DeclContext *decl_ctx, OptionalClangModuleID owning_module, const Declaration &decl, diff --git a/lldb/source/Symbol/LocateSymbolFileMacOSX.cpp b/lldb/source/Symbol/LocateSymbolFileMacOSX.cpp index 2655e4de9063a..a13b4a7a54f2c 100644 --- a/lldb/source/Symbol/LocateSymbolFileMacOSX.cpp +++ b/lldb/source/Symbol/LocateSymbolFileMacOSX.cpp @@ -414,9 +414,8 @@ static bool GetModuleSpecInfoFromUUIDDictionary(CFDictionaryRef uuid_dict, // last two filename parts from the source remapping and get a more // general source remapping that still works. Add this as another // option in addition to the full source path remap. - module_spec.GetSourceMappingList().Append( - ConstString(DBGBuildSourcePath.c_str()), - ConstString(DBGSourcePath.c_str()), true); + module_spec.GetSourceMappingList().Append(DBGBuildSourcePath, + DBGSourcePath, true); if (do_truncate_remapping_names) { FileSpec build_path(DBGBuildSourcePath.c_str()); FileSpec source_path(DBGSourcePath.c_str()); @@ -425,8 +424,7 @@ static bool GetModuleSpecInfoFromUUIDDictionary(CFDictionaryRef uuid_dict, source_path.RemoveLastPathComponent(); source_path.RemoveLastPathComponent(); module_spec.GetSourceMappingList().Append( - ConstString(build_path.GetPath().c_str()), - ConstString(source_path.GetPath().c_str()), true); + build_path.GetPath(), source_path.GetPath(), true); } } } @@ -458,9 +456,8 @@ static bool GetModuleSpecInfoFromUUIDDictionary(CFDictionaryRef uuid_dict, FileSystem::Instance().Resolve(resolved_source_path); DBGSourcePath = resolved_source_path.GetPath(); } - module_spec.GetSourceMappingList().Append( - ConstString(DBGBuildSourcePath.c_str()), - ConstString(DBGSourcePath.c_str()), true); + module_spec.GetSourceMappingList().Append(DBGBuildSourcePath, + DBGSourcePath, true); } } return success; diff --git a/lldb/source/Target/PathMappingList.cpp b/lldb/source/Target/PathMappingList.cpp index b660c310ef31a..e49f6213cf27d 100644 --- a/lldb/source/Target/PathMappingList.cpp +++ b/lldb/source/Target/PathMappingList.cpp @@ -30,11 +30,11 @@ namespace { // with the raw path pair, which doesn't work anymore because the paths have // been normalized when the debug info was loaded. So we need to store // nomalized path pairs to ensure things match up. - ConstString NormalizePath(ConstString path) { - // If we use "path" to construct a FileSpec, it will normalize the path for - // us. We then grab the string and turn it back into a ConstString. - return ConstString(FileSpec(path.GetStringRef()).GetPath()); - } +std::string NormalizePath(llvm::StringRef path) { + // If we use "path" to construct a FileSpec, it will normalize the path for + // us. We then grab the string. + return FileSpec(path).GetPath(); +} } // PathMappingList constructor PathMappingList::PathMappingList() : m_pairs() {} @@ -59,8 +59,8 @@ const PathMappingList &PathMappingList::operator=(const PathMappingList &rhs) { PathMappingList::~PathMappingList() = default; -void PathMappingList::Append(ConstString path, - ConstString replacement, bool notify) { +void PathMappingList::Append(llvm::StringRef path, llvm::StringRef replacement, + bool notify) { ++m_mod_id; m_pairs.emplace_back(pair(NormalizePath(path), NormalizePath(replacement))); if (notify && m_callback) @@ -78,9 +78,8 @@ void PathMappingList::Append(const PathMappingList &rhs, bool notify) { } } -void PathMappingList::Insert(ConstString path, - ConstString replacement, uint32_t index, - bool notify) { +void PathMappingList::Insert(llvm::StringRef path, llvm::StringRef replacement, + uint32_t index, bool notify) { ++m_mod_id; iterator insert_iter; if (index >= m_pairs.size()) @@ -93,9 +92,8 @@ void PathMappingList::Insert(ConstString path, m_callback(*this, m_callback_baton); } -bool PathMappingList::Replace(ConstString path, - ConstString replacement, uint32_t index, - bool notify) { +bool PathMappingList::Replace(llvm::StringRef path, llvm::StringRef replacement, + uint32_t index, bool notify) { if (index >= m_pairs.size()) return false; ++m_mod_id; @@ -218,18 +216,22 @@ bool PathMappingList::ReverseRemapPath(const FileSpec &file, FileSpec &fixed) co } llvm::Optional PathMappingList::FindFile(const FileSpec &orig_spec) const { - if (auto remapped = RemapPath(orig_spec.GetPath(), /*only_if_exists=*/true)) + // We must normalize the orig_spec again using the host's path style, + // otherwise there will be mismatch between the host and remote platform + // if they use different path styles. + if (auto remapped = RemapPath(NormalizePath(orig_spec.GetPath()), + /*only_if_exists=*/true)) return remapped; return {}; } -bool PathMappingList::Replace(ConstString path, - ConstString new_path, bool notify) { +bool PathMappingList::Replace(llvm::StringRef path, llvm::StringRef new_path, + bool notify) { uint32_t idx = FindIndexForPath(path); if (idx < m_pairs.size()) { ++m_mod_id; - m_pairs[idx].second = new_path; + m_pairs[idx].second = ConstString(new_path); if (notify && m_callback) m_callback(*this, m_callback_baton); return true; @@ -285,8 +287,8 @@ bool PathMappingList::GetPathsAtIndex(uint32_t idx, ConstString &path, return false; } -uint32_t PathMappingList::FindIndexForPath(ConstString orig_path) const { - const ConstString path = NormalizePath(orig_path); +uint32_t PathMappingList::FindIndexForPath(llvm::StringRef orig_path) const { + const ConstString path = ConstString(NormalizePath(orig_path)); const_iterator pos; const_iterator begin = m_pairs.begin(); const_iterator end = m_pairs.end(); diff --git a/lldb/source/Target/Process.cpp b/lldb/source/Target/Process.cpp index d901d523a1036..1ae82289bdbfe 100644 --- a/lldb/source/Target/Process.cpp +++ b/lldb/source/Target/Process.cpp @@ -5896,6 +5896,13 @@ Process::AdvanceAddressToNextBranchInstruction(Address default_stop_addr, return retval; } +Status Process::GetMemoryRegionInfo(lldb::addr_t load_addr, + MemoryRegionInfo &range_info) { + if (auto abi = GetABI()) + load_addr = abi->FixDataAddress(load_addr); + return DoGetMemoryRegionInfo(load_addr, range_info); +} + Status Process::GetMemoryRegions(lldb_private::MemoryRegionInfos ®ion_list) { diff --git a/lldb/source/Utility/FileSpec.cpp b/lldb/source/Utility/FileSpec.cpp index bea3c6d6268b3..601edb86c1b0c 100644 --- a/lldb/source/Utility/FileSpec.cpp +++ b/lldb/source/Utility/FileSpec.cpp @@ -43,9 +43,7 @@ static constexpr FileSpec::Style GetNativeStyle() { } bool PathStyleIsPosix(FileSpec::Style style) { - return (style == FileSpec::Style::posix || - (style == FileSpec::Style::native && - GetNativeStyle() == FileSpec::Style::posix)); + return llvm::sys::path::is_style_posix(style); } const char *GetPathSeparators(FileSpec::Style style) { diff --git a/lldb/test/API/functionalities/data-formatter/data-formatter-cpp/TestDataFormatterCpp.py b/lldb/test/API/functionalities/data-formatter/data-formatter-cpp/TestDataFormatterCpp.py index bc681825d4a3b..e6b1d43599989 100644 --- a/lldb/test/API/functionalities/data-formatter/data-formatter-cpp/TestDataFormatterCpp.py +++ b/lldb/test/API/functionalities/data-formatter/data-formatter-cpp/TestDataFormatterCpp.py @@ -120,32 +120,32 @@ def cleanup(): ' = ptr = ', ' "1234567890123456789012345678901234567890123456789012345678901234ABC"']) - self.runCmd("type summary add -c Point") + self.runCmd("type summary add -c TestPoint") self.expect("frame variable iAmSomewhere", substrs=['x = 4', 'y = 6']) self.expect("type summary list", - substrs=['Point', + substrs=['TestPoint', 'one-line']) - self.runCmd("type summary add --summary-string \"y=${var.y%x}\" Point") + self.runCmd("type summary add --summary-string \"y=${var.y%x}\" TestPoint") self.expect("frame variable iAmSomewhere", substrs=['y=0x']) self.runCmd( - "type summary add --summary-string \"y=${var.y},x=${var.x}\" Point") + "type summary add --summary-string \"y=${var.y},x=${var.x}\" TestPoint") self.expect("frame variable iAmSomewhere", substrs=['y=6', 'x=4']) - self.runCmd("type summary add --summary-string \"hello\" Point -e") + self.runCmd("type summary add --summary-string \"hello\" TestPoint -e") self.expect("type summary list", - substrs=['Point', + substrs=['TestPoint', 'show children']) self.expect("frame variable iAmSomewhere", diff --git a/lldb/test/API/functionalities/data-formatter/data-formatter-cpp/main.cpp b/lldb/test/API/functionalities/data-formatter/data-formatter-cpp/main.cpp index b22a3d8c732ad..c81a68fd2094a 100644 --- a/lldb/test/API/functionalities/data-formatter/data-formatter-cpp/main.cpp +++ b/lldb/test/API/functionalities/data-formatter/data-formatter-cpp/main.cpp @@ -22,10 +22,10 @@ typedef Type3 Type4; // should show as char typedef int ChildType; // should show as int typedef int AnotherChildType; // should show as int -struct Point { +struct TestPoint { int x; int y; - Point(int X = 3, int Y = 2) : x(X), y(Y) {} + TestPoint(int X = 3, int Y = 2) : x(X), y(Y) {} }; typedef float ShowMyGuts; @@ -85,7 +85,7 @@ int main (int argc, const char * argv[]) Speed* SPPtrILookHex = new Speed(16); - Point iAmSomewhere(4,6); + TestPoint iAmSomewhere(4,6); i_am_cool *cool_pointer = (i_am_cool*)malloc(sizeof(i_am_cool)*3); cool_pointer[0] = i_am_cool(3,-3.141592,'E'); diff --git a/lldb/test/API/functionalities/data-formatter/data-formatter-globals/main.cpp b/lldb/test/API/functionalities/data-formatter/data-formatter-globals/main.cpp index 4e4baeda5ff8f..570c21779674d 100644 --- a/lldb/test/API/functionalities/data-formatter/data-formatter-globals/main.cpp +++ b/lldb/test/API/functionalities/data-formatter/data-formatter-globals/main.cpp @@ -1,7 +1,3 @@ -#include -#include -#include - struct Point { int x; int y; diff --git a/lldb/test/API/lang/cpp/class-loading-via-member-typedef/Makefile b/lldb/test/API/lang/cpp/class-loading-via-member-typedef/Makefile new file mode 100644 index 0000000000000..99998b20bcb05 --- /dev/null +++ b/lldb/test/API/lang/cpp/class-loading-via-member-typedef/Makefile @@ -0,0 +1,3 @@ +CXX_SOURCES := main.cpp + +include Makefile.rules diff --git a/lldb/test/API/lang/cpp/class-loading-via-member-typedef/TestClassLoadingViaMemberTypedef.py b/lldb/test/API/lang/cpp/class-loading-via-member-typedef/TestClassLoadingViaMemberTypedef.py new file mode 100644 index 0000000000000..6d1a85fff7214 --- /dev/null +++ b/lldb/test/API/lang/cpp/class-loading-via-member-typedef/TestClassLoadingViaMemberTypedef.py @@ -0,0 +1,41 @@ +""" +Tests loading of classes when the loading is triggered via a typedef inside the +class (and not via the normal LLDB lookup that first resolves the surrounding +class). +""" + +import lldb +from lldbsuite.test.decorators import * +from lldbsuite.test.lldbtest import * +from lldbsuite.test import lldbutil + + +class TestCase(TestBase): + + mydir = TestBase.compute_mydir(__file__) + + @no_debug_info_test + def test(self): + self.build() + self.createTestTarget() + + # Print the top-level typedef which triggers the loading of the class + # that the typedef is defined inside. + self.expect_expr( + "pull_in_classes", + result_type="StructWithMember::MemberTypedef", + result_value="0", + ) + + # Print the classes and check their types. + self.expect_expr( + "struct_to_print", + result_type="StructWithMember", + result_children=[ + ValueCheck( + name="m", + type="StructWithNested::Nested::OtherTypedef", + children=[ValueCheck(name="i", value="0", type="int")], + ) + ], + ) diff --git a/lldb/test/API/lang/cpp/class-loading-via-member-typedef/main.cpp b/lldb/test/API/lang/cpp/class-loading-via-member-typedef/main.cpp new file mode 100644 index 0000000000000..ba08d3bcbfd59 --- /dev/null +++ b/lldb/test/API/lang/cpp/class-loading-via-member-typedef/main.cpp @@ -0,0 +1,31 @@ +struct TopLevelStruct { + int i; +}; + +// Contains a templated nested class with a typedef. +struct StructWithNested { + template + struct Nested { + // Typedef in a class. Intended to be referenced directly so that it can + // trigger the loading of the surrounding classes. + typedef TopLevelStruct OtherTypedef; + }; +}; + +// Contains a typedef. +struct StructWithMember { + // This member pulls in the typedef (and classes) above. + StructWithNested::Nested::OtherTypedef m; + // Typedef in a class. Intended to be referenced directly so that it can + // trigger the loading of the surrounding class. + typedef int MemberTypedef; +}; + +// This is printed and will pull in the typedef in StructWithmember. +StructWithMember::MemberTypedef pull_in_classes; + + +StructWithMember struct_to_print; + + +int main() {} diff --git a/lldb/test/API/lang/objc/tagged-pointer/Makefile b/lldb/test/API/lang/objc/tagged-pointer/Makefile new file mode 100644 index 0000000000000..afecbf969483e --- /dev/null +++ b/lldb/test/API/lang/objc/tagged-pointer/Makefile @@ -0,0 +1,4 @@ +OBJC_SOURCES := main.m +LD_EXTRAS := -lobjc -framework Foundation + +include Makefile.rules diff --git a/lldb/test/API/lang/objc/tagged-pointer/TestTaggedPointerCmd.py b/lldb/test/API/lang/objc/tagged-pointer/TestTaggedPointerCmd.py new file mode 100644 index 0000000000000..e5ad111217de9 --- /dev/null +++ b/lldb/test/API/lang/objc/tagged-pointer/TestTaggedPointerCmd.py @@ -0,0 +1,20 @@ +import lldb +from lldbsuite.test.decorators import * +from lldbsuite.test.lldbtest import * +from lldbsuite.test import lldbutil + + +class TestTaggedPointerCommand(TestBase): + + mydir = TestBase.compute_mydir(__file__) + + @no_debug_info_test + def test(self): + self.build() + lldbutil.run_to_source_breakpoint(self,"// break here", lldb.SBFileSpec("main.m")) + + self.expect("lang objc tagged-pointer info bogus", error=True, + patterns=["could not convert 'bogus' to a valid address"]) + + self.expect("lang objc tagged-pointer info 0x0", error=True, + patterns=["could not convert '0x0' to a valid address"]) diff --git a/lldb/test/API/lang/objc/tagged-pointer/main.m b/lldb/test/API/lang/objc/tagged-pointer/main.m new file mode 100644 index 0000000000000..11a9781482f11 --- /dev/null +++ b/lldb/test/API/lang/objc/tagged-pointer/main.m @@ -0,0 +1,6 @@ +#import +int main() { + id n1 = [NSNumber numberWithInt:1]; + printf("%x\n", n1); // break here + return 0; +} diff --git a/lldb/test/API/linux/aarch64/tagged_memory_region/Makefile b/lldb/test/API/linux/aarch64/tagged_memory_region/Makefile new file mode 100644 index 0000000000000..10495940055b6 --- /dev/null +++ b/lldb/test/API/linux/aarch64/tagged_memory_region/Makefile @@ -0,0 +1,3 @@ +C_SOURCES := main.c + +include Makefile.rules diff --git a/lldb/test/API/linux/aarch64/tagged_memory_region/TestAArch64LinuxTaggedMemoryRegion.py b/lldb/test/API/linux/aarch64/tagged_memory_region/TestAArch64LinuxTaggedMemoryRegion.py new file mode 100644 index 0000000000000..b175f6234b10b --- /dev/null +++ b/lldb/test/API/linux/aarch64/tagged_memory_region/TestAArch64LinuxTaggedMemoryRegion.py @@ -0,0 +1,42 @@ +""" +Test that "memory region" lookup uses the ABI plugin to remove +non address bits from addresses before lookup. +""" + + + +import lldb +from lldbsuite.test.decorators import * +from lldbsuite.test.lldbtest import * +from lldbsuite.test import lldbutil + + +class AArch64LinuxTaggedMemoryRegionTestCase(TestBase): + + mydir = TestBase.compute_mydir(__file__) + + NO_DEBUG_INFO_TESTCASE = True + + # AArch64 Linux always enables the top byte ignore feature + @skipUnlessArch("aarch64") + @skipUnlessPlatform(["linux"]) + def test_mte_regions(self): + self.build() + self.runCmd("file " + self.getBuildArtifact("a.out"), CURRENT_EXECUTABLE_SET) + + lldbutil.run_break_set_by_file_and_line(self, "main.c", + line_number('main.c', '// Set break point at this line.'), + num_expected_locations=1) + + self.runCmd("run", RUN_SUCCEEDED) + + if self.process().GetState() == lldb.eStateExited: + self.fail("Test program failed to run.") + + self.expect("thread list", STOPPED_DUE_TO_BREAKPOINT, + substrs=['stopped', + 'stop reason = breakpoint']) + + # Despite the non address bits we should find a region + self.expect("memory region the_page", patterns=[ + "\[0x[0-9A-Fa-f]+-0x[0-9A-Fa-f]+\) r-x"]) diff --git a/lldb/test/API/linux/aarch64/tagged_memory_region/main.c b/lldb/test/API/linux/aarch64/tagged_memory_region/main.c new file mode 100644 index 0000000000000..29f99d73e12d1 --- /dev/null +++ b/lldb/test/API/linux/aarch64/tagged_memory_region/main.c @@ -0,0 +1,17 @@ +#include +#include +#include +#include + +int main(int argc, char const *argv[]) { + void *the_page = mmap(0, sysconf(_SC_PAGESIZE), PROT_READ | PROT_EXEC, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + if (the_page == MAP_FAILED) + return 1; + + // Put something in the top byte (AArch64 Linux always enables top byte + // ignore) + the_page = (void *)((size_t)the_page | ((size_t)0x34 << 56)); + + return 0; // Set break point at this line. +} diff --git a/lldb/test/API/tools/lldb-vscode/coreFile/TestVSCode_coreFile.py b/lldb/test/API/tools/lldb-vscode/coreFile/TestVSCode_coreFile.py index 55efd91d827a6..56a93ccd6c8ab 100644 --- a/lldb/test/API/tools/lldb-vscode/coreFile/TestVSCode_coreFile.py +++ b/lldb/test/API/tools/lldb-vscode/coreFile/TestVSCode_coreFile.py @@ -41,3 +41,18 @@ def test_core_file(self): self.vscode.request_next(threadId=32259) self.assertEquals(self.get_stackFrames(), expected_frames) + + @skipIfWindows + @skipIfRemote + def test_core_file_source_mapping(self): + ''' Test that sourceMap property is correctly applied when loading a core ''' + current_dir = os.path.dirname(os.path.realpath(__file__)) + exe_file = os.path.join(current_dir, "linux-x86_64.out") + core_file = os.path.join(current_dir, "linux-x86_64.core") + + self.create_debug_adaptor() + + source_map = [["/home/labath/test", current_dir]] + self.attach(exe_file, coreFile=core_file, sourceMap=source_map) + + self.assertTrue(current_dir in self.get_stackFrames()[0]['source']['path']) diff --git a/lldb/test/API/tools/lldb-vscode/coreFile/main.c b/lldb/test/API/tools/lldb-vscode/coreFile/main.c new file mode 100644 index 0000000000000..389bf7b51f4d6 --- /dev/null +++ b/lldb/test/API/tools/lldb-vscode/coreFile/main.c @@ -0,0 +1 @@ +/* Fake source file for core dump source mapping test */ diff --git a/lldb/test/CMakeLists.txt b/lldb/test/CMakeLists.txt index dcb59cfb6c1c2..d03f17335a245 100644 --- a/lldb/test/CMakeLists.txt +++ b/lldb/test/CMakeLists.txt @@ -117,23 +117,12 @@ if(TARGET clang) endif() else() # We require libcxx for the test suite, so if we aren't building it, - # try to provide a helpful error about how to resolve the situation. + # provide a helpful error about how to resolve the situation. if(NOT TARGET cxx AND NOT libcxx IN_LIST LLVM_ENABLE_RUNTIMES) - if(LLVM_ENABLE_PROJECTS STREQUAL "") - # If `LLVM_ENABLE_PROJECTS` is not being used (implying that we are - # using the old layout), suggest checking it out. - message(FATAL_ERROR - "LLDB test suite requires libc++, but it is currently disabled. " - "Please checkout `libcxx` in `llvm/projects` or disable tests " - "via `LLDB_INCLUDE_TESTS=OFF`.") - else() - # If `LLVM_ENABLE_PROJECTS` is being used, suggest adding it. - message(FATAL_ERROR - "LLDB test suite requires libc++, but it is currently disabled. " - "Please add `libcxx` to `LLVM_ENABLE_PROJECTS` or " - "`LLVM_ENABLE_RUNTIMES`, or disable tests via " - "`LLDB_INCLUDE_TESTS=OFF`.") - endif() + message(FATAL_ERROR + "LLDB test suite requires libc++, but it is currently disabled. " + "Please add `libcxx` to `LLVM_ENABLE_RUNTIMES` or disable tests via " + "`LLDB_INCLUDE_TESTS=OFF`.") endif() endif() endif() diff --git a/lldb/test/Shell/Driver/TestQuiet.test b/lldb/test/Shell/Driver/TestQuiet.test new file mode 100644 index 0000000000000..8598792aeba07 --- /dev/null +++ b/lldb/test/Shell/Driver/TestQuiet.test @@ -0,0 +1,7 @@ +RUN: %lldb -b -Q -o "expr 40 + 2" | FileCheck %s +RUN: %lldb -b -Q -O "expr 40 + 2" | FileCheck %s + +CHECK-NOT: expr +CHECK-NOT: lldb +CHECK-NOT: source +CHECK: 42 \ No newline at end of file diff --git a/lldb/test/Shell/Settings/Inputs/EchoCommandsQuiet.out b/lldb/test/Shell/Settings/Inputs/EchoCommandsQuiet.out index 12ad094292dea..11eee55f1f768 100644 --- a/lldb/test/Shell/Settings/Inputs/EchoCommandsQuiet.out +++ b/lldb/test/Shell/Settings/Inputs/EchoCommandsQuiet.out @@ -1,2 +1,4 @@ -# CHECK: (lldb) command source -s 1 {{.*\n}} -# CHECK-NEXT: (lldb) command source -s 1 {{.*\n}} +CHECK: start +CHECK-NOT: source +CHECK-NOT: lldb +CHECK-NEXT: done diff --git a/lldb/test/Shell/Settings/TestEchoCommands.test b/lldb/test/Shell/Settings/TestEchoCommands.test index 67547eaabf89c..234b9742bfa2a 100644 --- a/lldb/test/Shell/Settings/TestEchoCommands.test +++ b/lldb/test/Shell/Settings/TestEchoCommands.test @@ -1,4 +1,8 @@ # RUN: %lldb -x -b -o 'settings set interpreter.echo-comment-commands true' -s %S/Inputs/EchoCommandsTest.in | FileCheck %S/Inputs/EchoCommandsAll.out # RUN: %lldb -x -b -o 'settings set interpreter.echo-comment-commands false' -s %S/Inputs/EchoCommandsTest.in | FileCheck %S/Inputs/EchoCommandsNoComments.out # RUN: %lldb -x -b -o 'settings set interpreter.echo-commands false' -s %S/Inputs/EchoCommandsTest.in | FileCheck %S/Inputs/EchoCommandsNone.out -# RUN: %lldb -x -b --source-quietly -s %S/Inputs/EchoCommandsTest.in | FileCheck %S/Inputs/EchoCommandsQuiet.out + +RUN: echo start >%t.file +RUN: %lldb -x -b --source-quietly -s %S/Inputs/EchoCommandsTest.in >>%t.file +RUN: echo done >>%t.file +RUN: FileCheck %S/Inputs/EchoCommandsQuiet.out <%t.file diff --git a/lldb/test/Shell/helper/build.py b/lldb/test/Shell/helper/build.py index 5689373d37a51..005f12bc09cf8 100755 --- a/lldb/test/Shell/helper/build.py +++ b/lldb/test/Shell/helper/build.py @@ -277,7 +277,10 @@ class MsvcBuilder(Builder): def __init__(self, toolchain_type, args): Builder.__init__(self, toolchain_type, args, '.obj') - self.msvc_arch_str = 'x86' if self.arch == '32' else 'x64' + if os.getenv('PLATFORM') == 'arm64': + self.msvc_arch_str = 'arm' if self.arch == '32' else 'arm64' + else: + self.msvc_arch_str = 'x86' if self.arch == '32' else 'x64' if toolchain_type == 'msvc': # Make sure we're using the appropriate toolchain for the desired diff --git a/lldb/tools/debugserver/source/MacOSX/arm64/DNBArchImplARM64.cpp b/lldb/tools/debugserver/source/MacOSX/arm64/DNBArchImplARM64.cpp index 305eb14de5335..e065718c7df94 100644 --- a/lldb/tools/debugserver/source/MacOSX/arm64/DNBArchImplARM64.cpp +++ b/lldb/tools/debugserver/source/MacOSX/arm64/DNBArchImplARM64.cpp @@ -159,7 +159,7 @@ kern_return_t DNBArchMachARM64::GetGPRState(bool force) { uint64_t log_fp = m_state.context.gpr.__fp; uint64_t log_lr = m_state.context.gpr.__lr; uint64_t log_sp = m_state.context.gpr.__sp; - uint64_t log_pc = m_state.context.gpr.__pc, + uint64_t log_pc = m_state.context.gpr.__pc; #endif DNBLogThreaded( "thread_get_state(0x%4.4x, %u, &gpr, %u) => 0x%8.8x (count = %u) regs" diff --git a/lldb/tools/debugserver/source/TTYState.cpp b/lldb/tools/debugserver/source/TTYState.cpp index 96699a360499e..9fe83a3b72e53 100644 --- a/lldb/tools/debugserver/source/TTYState.cpp +++ b/lldb/tools/debugserver/source/TTYState.cpp @@ -39,20 +39,19 @@ bool TTYState::GetTTYState(int fd, bool saveProcessGroup) { } bool TTYState::SetTTYState() const { - int result = 0; if (IsValid()) { if (TFlagsValid()) - result = fcntl(m_fd, F_SETFL, m_tflags); + fcntl(m_fd, F_SETFL, m_tflags); if (TTYStateValid()) - result = tcsetattr(m_fd, TCSANOW, &m_ttystate); + tcsetattr(m_fd, TCSANOW, &m_ttystate); if (ProcessGroupValid()) { // Save the original signal handler. void (*saved_sigttou_callback)(int) = NULL; saved_sigttou_callback = (void (*)(int))signal(SIGTTOU, SIG_IGN); // Set the process group - result = tcsetpgrp(m_fd, m_processGroup); + tcsetpgrp(m_fd, m_processGroup); // Restore the original signal handler. signal(SIGTTOU, saved_sigttou_callback); } diff --git a/lldb/tools/driver/Driver.cpp b/lldb/tools/driver/Driver.cpp index a6a4a2a1b80b8..df070ebe4db8d 100644 --- a/lldb/tools/driver/Driver.cpp +++ b/lldb/tools/driver/Driver.cpp @@ -609,6 +609,7 @@ int Driver::MainLoop() { options.SetSpawnThread(false); options.SetStopOnError(true); options.SetStopOnCrash(m_option_data.m_batch); + options.SetEchoCommands(!m_option_data.m_source_quietly); SBCommandInterpreterRunResult results = m_debugger.RunCommandInterpreter(options); diff --git a/lldb/tools/driver/Options.td b/lldb/tools/driver/Options.td index 8bcb0e7bc52e9..be2b4bfa30044 100644 --- a/lldb/tools/driver/Options.td +++ b/lldb/tools/driver/Options.td @@ -110,7 +110,7 @@ def: Flag<["-"], "b">, Group; def source_quietly: F<"source-quietly">, - HelpText<"Tells the debugger to execute this one-line lldb command before any file has been loaded.">, + HelpText<"Tells the debugger not to echo commands while sourcing files or one-line commands provided on the command line.">, Group; def: Flag<["-"], "Q">, Alias, diff --git a/lldb/tools/lldb-vscode/lldb-vscode.cpp b/lldb/tools/lldb-vscode/lldb-vscode.cpp index 71bdcaef0dff4..445a06c9da702 100644 --- a/lldb/tools/lldb-vscode/lldb-vscode.cpp +++ b/lldb/tools/lldb-vscode/lldb-vscode.cpp @@ -616,6 +616,8 @@ void request_attach(const llvm::json::Object &request) { // Run any initialize LLDB commands the user specified in the launch.json g_vsc.RunInitCommands(); + SetSourceMapFromArguments(*arguments); + lldb::SBError status; g_vsc.SetTarget(g_vsc.CreateTargetFromArguments(*arguments, status)); if (status.Fail()) { @@ -657,8 +659,6 @@ void request_attach(const llvm::json::Object &request) { g_vsc.target = g_vsc.debugger.GetSelectedTarget(); } - SetSourceMapFromArguments(*arguments); - if (error.Success() && core_file.empty()) { auto attached_pid = g_vsc.target.GetProcess().GetProcessID(); if (attached_pid == LLDB_INVALID_PROCESS_ID) { diff --git a/lldb/unittests/Core/DumpDataExtractorTest.cpp b/lldb/unittests/Core/DumpDataExtractorTest.cpp index f76014aa938c0..f229dacb6e6f7 100644 --- a/lldb/unittests/Core/DumpDataExtractorTest.cpp +++ b/lldb/unittests/Core/DumpDataExtractorTest.cpp @@ -187,7 +187,7 @@ TEST(DumpDataExtractorTest, Formats) { // Some normal numbers. TestDump(std::vector{0b0100001001001000}, lldb::Format::eFormatVectorOfFloat16, -#ifdef _WIN32 +#if defined(_MSC_VER) && _MSC_VER < 1920 // FIXME: This should print the same on all platforms. "{3.14063}"); #else diff --git a/lldb/unittests/Platform/PlatformDarwinTest.cpp b/lldb/unittests/Platform/PlatformDarwinTest.cpp index 285dc2ee3db78..73a0b37fbc773 100644 --- a/lldb/unittests/Platform/PlatformDarwinTest.cpp +++ b/lldb/unittests/Platform/PlatformDarwinTest.cpp @@ -20,6 +20,7 @@ using namespace lldb_private; struct PlatformDarwinTester : public PlatformDarwin { public: using PlatformDarwin::FindComponentInPath; + using PlatformDarwin::GetCompatibleArch; }; TEST(PlatformDarwinTest, TestParseVersionBuildDir) { @@ -66,3 +67,95 @@ TEST(PlatformDarwinTest, FindComponentInPath) { EXPECT_EQ("", PlatformDarwinTester::FindComponentInPath("/path/to/foo", "bar")); } + +TEST(PlatformDarwinTest, GetCompatibleArchARM64) { + const ArchSpec::Core core = ArchSpec::eCore_arm_arm64; + EXPECT_STREQ("arm64", PlatformDarwinTester::GetCompatibleArch(core, 0)); + EXPECT_STREQ("armv7", PlatformDarwinTester::GetCompatibleArch(core, 1)); + EXPECT_STREQ("armv4", PlatformDarwinTester::GetCompatibleArch(core, 10)); + EXPECT_STREQ("arm", PlatformDarwinTester::GetCompatibleArch(core, 11)); + EXPECT_STREQ("thumbv7", PlatformDarwinTester::GetCompatibleArch(core, 12)); + EXPECT_STREQ("thumbv4t", PlatformDarwinTester::GetCompatibleArch(core, 21)); + EXPECT_STREQ("thumb", PlatformDarwinTester::GetCompatibleArch(core, 22)); + EXPECT_EQ(nullptr, PlatformDarwinTester::GetCompatibleArch(core, 23)); +} + +TEST(PlatformDarwinTest, GetCompatibleArchARMv7f) { + const ArchSpec::Core core = ArchSpec::eCore_arm_armv7f; + EXPECT_STREQ("armv7f", PlatformDarwinTester::GetCompatibleArch(core, 0)); + EXPECT_STREQ("armv7", PlatformDarwinTester::GetCompatibleArch(core, 1)); + EXPECT_STREQ("arm", PlatformDarwinTester::GetCompatibleArch(core, 6)); + EXPECT_STREQ("thumbv7f", PlatformDarwinTester::GetCompatibleArch(core, 7)); +} + +TEST(PlatformDarwinTest, GetCompatibleArchARMv7k) { + const ArchSpec::Core core = ArchSpec::eCore_arm_armv7k; + EXPECT_STREQ("armv7k", PlatformDarwinTester::GetCompatibleArch(core, 0)); + EXPECT_STREQ("armv7", PlatformDarwinTester::GetCompatibleArch(core, 1)); + EXPECT_STREQ("arm", PlatformDarwinTester::GetCompatibleArch(core, 6)); + EXPECT_STREQ("thumbv7k", PlatformDarwinTester::GetCompatibleArch(core, 7)); +} + +TEST(PlatformDarwinTest, GetCompatibleArchARMv7s) { + const ArchSpec::Core core = ArchSpec::eCore_arm_armv7s; + EXPECT_STREQ("armv7s", PlatformDarwinTester::GetCompatibleArch(core, 0)); + EXPECT_STREQ("armv7", PlatformDarwinTester::GetCompatibleArch(core, 1)); + EXPECT_STREQ("arm", PlatformDarwinTester::GetCompatibleArch(core, 6)); + EXPECT_STREQ("thumbv7s", PlatformDarwinTester::GetCompatibleArch(core, 7)); +} + +TEST(PlatformDarwinTest, GetCompatibleArchARMv7m) { + const ArchSpec::Core core = ArchSpec::eCore_arm_armv7m; + EXPECT_STREQ("armv7m", PlatformDarwinTester::GetCompatibleArch(core, 0)); + EXPECT_STREQ("armv7", PlatformDarwinTester::GetCompatibleArch(core, 1)); + EXPECT_STREQ("arm", PlatformDarwinTester::GetCompatibleArch(core, 6)); + EXPECT_STREQ("thumbv7m", PlatformDarwinTester::GetCompatibleArch(core, 7)); +} + +TEST(PlatformDarwinTest, GetCompatibleArchARMv7em) { + const ArchSpec::Core core = ArchSpec::eCore_arm_armv7em; + EXPECT_STREQ("armv7em", PlatformDarwinTester::GetCompatibleArch(core, 0)); + EXPECT_STREQ("armv7", PlatformDarwinTester::GetCompatibleArch(core, 1)); + EXPECT_STREQ("arm", PlatformDarwinTester::GetCompatibleArch(core, 6)); + EXPECT_STREQ("thumbv7em", PlatformDarwinTester::GetCompatibleArch(core, 7)); +} + +TEST(PlatformDarwinTest, GetCompatibleArchARMv7) { + const ArchSpec::Core core = ArchSpec::eCore_arm_armv7; + EXPECT_STREQ("armv7", PlatformDarwinTester::GetCompatibleArch(core, 0)); + EXPECT_STREQ("armv6m", PlatformDarwinTester::GetCompatibleArch(core, 1)); + EXPECT_STREQ("arm", PlatformDarwinTester::GetCompatibleArch(core, 5)); + EXPECT_STREQ("thumbv7", PlatformDarwinTester::GetCompatibleArch(core, 6)); +} + +TEST(PlatformDarwinTest, GetCompatibleArchARMv6m) { + const ArchSpec::Core core = ArchSpec::eCore_arm_armv6m; + EXPECT_STREQ("armv6m", PlatformDarwinTester::GetCompatibleArch(core, 0)); + EXPECT_STREQ("armv6", PlatformDarwinTester::GetCompatibleArch(core, 1)); + EXPECT_STREQ("arm", PlatformDarwinTester::GetCompatibleArch(core, 4)); + EXPECT_STREQ("thumbv6m", PlatformDarwinTester::GetCompatibleArch(core, 5)); +} + +TEST(PlatformDarwinTest, GetCompatibleArchARMv6) { + const ArchSpec::Core core = ArchSpec::eCore_arm_armv6; + EXPECT_STREQ("armv6", PlatformDarwinTester::GetCompatibleArch(core, 0)); + EXPECT_STREQ("armv5", PlatformDarwinTester::GetCompatibleArch(core, 1)); + EXPECT_STREQ("arm", PlatformDarwinTester::GetCompatibleArch(core, 3)); + EXPECT_STREQ("thumbv6", PlatformDarwinTester::GetCompatibleArch(core, 4)); +} + +TEST(PlatformDarwinTest, GetCompatibleArchARMv5) { + const ArchSpec::Core core = ArchSpec::eCore_arm_armv5; + EXPECT_STREQ("armv5", PlatformDarwinTester::GetCompatibleArch(core, 0)); + EXPECT_STREQ("armv4", PlatformDarwinTester::GetCompatibleArch(core, 1)); + EXPECT_STREQ("arm", PlatformDarwinTester::GetCompatibleArch(core, 2)); + EXPECT_STREQ("thumbv5", PlatformDarwinTester::GetCompatibleArch(core, 3)); +} + +TEST(PlatformDarwinTest, GetCompatibleArchARMv4) { + const ArchSpec::Core core = ArchSpec::eCore_arm_armv4; + EXPECT_STREQ("armv4", PlatformDarwinTester::GetCompatibleArch(core, 0)); + EXPECT_STREQ("arm", PlatformDarwinTester::GetCompatibleArch(core, 1)); + EXPECT_STREQ("thumbv4t", PlatformDarwinTester::GetCompatibleArch(core, 2)); + EXPECT_STREQ("thumb", PlatformDarwinTester::GetCompatibleArch(core, 3)); +} diff --git a/lldb/unittests/Process/minidump/Inputs/linux-x86_64.cpp b/lldb/unittests/Process/minidump/Inputs/linux-x86_64.cpp index 827fe67b503b8..830b090ed808e 100644 --- a/lldb/unittests/Process/minidump/Inputs/linux-x86_64.cpp +++ b/lldb/unittests/Process/minidump/Inputs/linux-x86_64.cpp @@ -1,5 +1,5 @@ // Example source from breakpad's linux tutorial -// https://chromium.googlesource.com/breakpad/breakpad/+/master/docs/linux_starter_guide.md +// https://chromium.googlesource.com/breakpad/breakpad/+/main/docs/linux_starter_guide.md #include #include diff --git a/lldb/unittests/Target/CMakeLists.txt b/lldb/unittests/Target/CMakeLists.txt index c126597c79daf..3b23550feaf9c 100644 --- a/lldb/unittests/Target/CMakeLists.txt +++ b/lldb/unittests/Target/CMakeLists.txt @@ -7,6 +7,7 @@ add_lldb_unittest(TargetTests PathMappingListTest.cpp RemoteAwarePlatformTest.cpp StackFrameRecognizerTest.cpp + FindFileTest.cpp LINK_LIBS lldbCore diff --git a/lldb/unittests/Target/FindFileTest.cpp b/lldb/unittests/Target/FindFileTest.cpp new file mode 100644 index 0000000000000..77d374d1f498d --- /dev/null +++ b/lldb/unittests/Target/FindFileTest.cpp @@ -0,0 +1,97 @@ +//===-- FindFileTest.cpp -------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "TestingSupport/TestUtilities.h" +#include "lldb/Host/FileSystem.h" +#include "lldb/Host/HostInfo.h" +#include "lldb/Target/PathMappingList.h" +#include "lldb/Utility/FileSpec.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/FileUtilities.h" +#include "gtest/gtest.h" +#include + +using namespace llvm; +using namespace llvm::sys::fs; +using namespace lldb_private; + +namespace { +struct Matches { + FileSpec original; + llvm::StringRef remapped; + Matches(const char *o, const char *r) : original(o), remapped(r) {} + Matches(const char *o, llvm::sys::path::Style style, const char *r) + : original(o, style), remapped(r) {} +}; + +class FindFileTest : public testing::Test { +public: + void SetUp() override { + FileSystem::Initialize(); + HostInfo::Initialize(); + } + void TearDown() override { + HostInfo::Terminate(); + FileSystem::Terminate(); + } +}; +} // namespace + +static void TestFileFindings(const PathMappingList &map, + llvm::ArrayRef matches, + llvm::ArrayRef fails) { + for (const auto &fail : fails) { + SCOPED_TRACE(fail.GetCString()); + EXPECT_FALSE(map.FindFile(fail)); + } + + for (const auto &match : matches) { + SCOPED_TRACE(match.original.GetPath() + " -> " + match.remapped); + llvm::Optional remapped; + + EXPECT_TRUE(bool(remapped = map.FindFile(match.original))); + EXPECT_TRUE(FileSpec(remapped.getValue()).GetPath() == + ConstString(match.remapped).GetStringRef()); + } +} + +TEST_F(FindFileTest, FindFileTests) { + const auto *Info = testing::UnitTest::GetInstance()->current_test_info(); + llvm::SmallString<128> DirName, FileName; + int fd; + + ASSERT_NO_ERROR(createUniqueDirectory(Info->name(), DirName)); + + sys::path::append(FileName, Twine(DirName), Twine("test")); + ASSERT_NO_ERROR(openFile(FileName, fd, CD_CreateAlways, FA_Read, OF_None)); + + llvm::FileRemover dir_remover(DirName); + llvm::FileRemover file_remover(FileName); + PathMappingList map; + + map.Append("/old", DirName.str(), false); + map.Append(R"(C:\foo)", DirName.str(), false); + + Matches matches[] = { + {"/old", llvm::sys::path::Style::posix, DirName.c_str()}, + {"/old/test", llvm::sys::path::Style::posix, FileName.c_str()}, + {R"(C:\foo)", llvm::sys::path::Style::windows, DirName.c_str()}, + {R"(C:\foo\test)", llvm::sys::path::Style::windows, FileName.c_str()}}; + + std::vector fails{ + // path not mapped + FileSpec("/foo", llvm::sys::path::Style::posix), + FileSpec("/new", llvm::sys::path::Style::posix), + FileSpec(R"(C:\new)", llvm::sys::path::Style::windows), + // path mapped, but file not exist + FileSpec("/old/test1", llvm::sys::path::Style::posix), + FileSpec(R"(C:\foo\test2)", llvm::sys::path::Style::windows)}; + + TestFileFindings(map, matches, fails); +} diff --git a/lldb/unittests/Target/PathMappingListTest.cpp b/lldb/unittests/Target/PathMappingListTest.cpp index 90b6f1134a2b6..31077d83c2c7f 100644 --- a/lldb/unittests/Target/PathMappingListTest.cpp +++ b/lldb/unittests/Target/PathMappingListTest.cpp @@ -66,16 +66,16 @@ TEST(PathMappingListTest, RelativeTests) { #endif }; PathMappingList map; - map.Append(ConstString("."), ConstString("/tmp"), false); + map.Append(".", "/tmp", false); TestPathMappings(map, matches, fails); PathMappingList map2; - map2.Append(ConstString(""), ConstString("/tmp"), false); + map2.Append("", "/tmp", false); TestPathMappings(map, matches, fails); } TEST(PathMappingListTest, AbsoluteTests) { PathMappingList map; - map.Append(ConstString("/old"), ConstString("/new"), false); + map.Append("/old", "/new", false); Matches matches[] = { {"/old", "/new"}, {"/old/", "/new"}, @@ -97,7 +97,7 @@ TEST(PathMappingListTest, AbsoluteTests) { TEST(PathMappingListTest, RemapRoot) { PathMappingList map; - map.Append(ConstString("/"), ConstString("/new"), false); + map.Append("/", "/new", false); Matches matches[] = { {"/old", "/new/old"}, {"/old/", "/new/old"}, @@ -118,7 +118,7 @@ TEST(PathMappingListTest, RemapRoot) { #ifndef _WIN32 TEST(PathMappingListTest, CrossPlatformTests) { PathMappingList map; - map.Append(ConstString(R"(C:\old)"), ConstString("/new"), false); + map.Append(R"(C:\old)", "/new", false); Matches matches[] = { {R"(C:\old)", llvm::sys::path::Style::windows, "/new"}, {R"(C:\old\)", llvm::sys::path::Style::windows, "/new"}, diff --git a/llvm-spirv/CMakeLists.txt b/llvm-spirv/CMakeLists.txt index 164d583684f5c..690f4b5b25bcf 100644 --- a/llvm-spirv/CMakeLists.txt +++ b/llvm-spirv/CMakeLists.txt @@ -1,6 +1,8 @@ cmake_minimum_required(VERSION 3.13.4) -set (BASE_LLVM_VERSION 14.0.0) +if(NOT DEFINED BASE_LLVM_VERSION) + set (BASE_LLVM_VERSION 14.0.0) +endif(NOT DEFINED BASE_LLVM_VERSION) set(LLVM_SPIRV_VERSION ${BASE_LLVM_VERSION}.0) include(FetchContent) diff --git a/llvm-spirv/lib/SPIRV/OCLUtil.cpp b/llvm-spirv/lib/SPIRV/OCLUtil.cpp index c71660e513e90..89fae7dc82038 100644 --- a/llvm-spirv/lib/SPIRV/OCLUtil.cpp +++ b/llvm-spirv/lib/SPIRV/OCLUtil.cpp @@ -1014,20 +1014,9 @@ class OCLBuiltinFuncMangleInfo : public SPIRV::BuiltinFuncMangleInfo { FunctionType *InvokeTy = getBlockInvokeTy(F, BlockArgIdx); if (InvokeTy->getNumParams() > 1) setLocalArgBlock(BlockArgIdx); - } else if (NameRef.equals("enqueue_kernel")) { - assert(F && "lack of necessary information"); - setEnumArg(1, SPIR::PRIMITIVE_KERNEL_ENQUEUE_FLAGS_T); - addUnsignedArg(3); - setArgAttr(4, SPIR::ATTR_CONST); - // If there are arguments other then block context then these are pointers - // to local memory so this built-in must be mangled accordingly. - const size_t BlockArgIdx = 6; - FunctionType *InvokeTy = getBlockInvokeTy(F, BlockArgIdx); - if (InvokeTy->getNumParams() > 1) { - setLocalArgBlock(BlockArgIdx); - addUnsignedArg(BlockArgIdx + 1); - setVarArg(BlockArgIdx + 2); - } + } else if (NameRef.startswith("__enqueue_kernel")) { + // clang doesn't mangle enqueue_kernel builtins + setAsDontMangle(); } else if (NameRef.startswith("get_") || NameRef.equals("nan") || NameRef.equals("mem_fence") || NameRef.startswith("shuffle")) { addUnsignedArg(-1); diff --git a/llvm-spirv/lib/SPIRV/SPIRVInternal.h b/llvm-spirv/lib/SPIRV/SPIRVInternal.h index 608187d2513d3..3319ea15b7839 100644 --- a/llvm-spirv/lib/SPIRV/SPIRVInternal.h +++ b/llvm-spirv/lib/SPIRV/SPIRVInternal.h @@ -465,7 +465,7 @@ class BuiltinFuncMangleInfo { /// Translate builtin function name and set /// argument attributes and unsigned args. BuiltinFuncMangleInfo(const std::string &UniqName = "") - : LocalArgBlockIdx(-1), VarArgIdx(-1) { + : LocalArgBlockIdx(-1), VarArgIdx(-1), DontMangle(false) { if (!UniqName.empty()) init(UniqName); } @@ -492,6 +492,7 @@ class BuiltinFuncMangleInfo { assert(0 <= Ndx && "it is not allowed to set less than zero index"); VarArgIdx = Ndx; } + void setAsDontMangle() { DontMangle = true; } bool isArgUnsigned(int Ndx) { return UnsignedArgs.count(-1) || UnsignedArgs.count(Ndx); } @@ -511,6 +512,7 @@ class BuiltinFuncMangleInfo { *Enum = Loc->second; return true; } + bool avoidMangling() { return DontMangle; } unsigned getArgAttr(int Ndx) { auto Loc = Attrs.find(Ndx); if (Loc == Attrs.end()) @@ -549,6 +551,9 @@ class BuiltinFuncMangleInfo { int LocalArgBlockIdx; // index of a block with local arguments, idx < 0 if // none int VarArgIdx; // index of ellipsis argument, idx < 0 if none +private: + bool DontMangle; // clang doesn't apply mangling for some builtin functions + // (i.e. enqueue_kernel) }; /// \returns a vector of types for a collection of values. @@ -695,7 +700,7 @@ bool getSPIRVBuiltin(const std::string &Name, spv::BuiltIn &Builtin); /// false for other functions bool oclIsBuiltin(StringRef Name, StringRef &DemangledName, bool IsCpp = false); -/// Check if a function type is void(void). +/// Check if a function returns void bool isVoidFuncTy(FunctionType *FT); /// \returns true if \p T is a function pointer type. diff --git a/llvm-spirv/lib/SPIRV/SPIRVLowerBitCastToNonStandardType.cpp b/llvm-spirv/lib/SPIRV/SPIRVLowerBitCastToNonStandardType.cpp index ec2de2058db6b..070fc61353a3d 100644 --- a/llvm-spirv/lib/SPIRV/SPIRVLowerBitCastToNonStandardType.cpp +++ b/llvm-spirv/lib/SPIRV/SPIRVLowerBitCastToNonStandardType.cpp @@ -105,8 +105,8 @@ bool lowerBitCastToNonStdVec(Instruction *OldInst, Value *NewInst, } // Handle extractelement instruction which is following the load else if (auto *EEI = dyn_cast(U)) { - uint64_t NumElemsInOldVec = OldVecTy->getElementCount().getValue(); - uint64_t NumElemsInNewVec = NewVecTy->getElementCount().getValue(); + uint64_t NumElemsInOldVec = OldVecTy->getElementCount().getFixedValue(); + uint64_t NumElemsInNewVec = NewVecTy->getElementCount().getFixedValue(); uint64_t OldElemIdx = cast(EEI->getIndexOperand())->getZExtValue(); uint64_t NewElemIdx = @@ -168,7 +168,8 @@ class SPIRVLowerBitCastToNonStandardTypePass continue; VectorType *SrcVecTy = getVectorType(BC->getSrcTy()); if (SrcVecTy) { - uint64_t NumElemsInSrcVec = SrcVecTy->getElementCount().getValue(); + uint64_t NumElemsInSrcVec = + SrcVecTy->getElementCount().getFixedValue(); if (!isValidVectorSize(NumElemsInSrcVec)) report_fatal_error( llvm::Twine("Unsupported vector type with the size of: " + @@ -177,7 +178,8 @@ class SPIRVLowerBitCastToNonStandardTypePass } VectorType *DestVecTy = getVectorType(BC->getDestTy()); if (DestVecTy) { - uint64_t NumElemsInDestVec = DestVecTy->getElementCount().getValue(); + uint64_t NumElemsInDestVec = + DestVecTy->getElementCount().getFixedValue(); if (!isValidVectorSize(NumElemsInDestVec)) BCastsToNonStdVec.push_back(&I); } diff --git a/llvm-spirv/lib/SPIRV/SPIRVReader.cpp b/llvm-spirv/lib/SPIRV/SPIRVReader.cpp index dee1a0d8366fb..ce373baa8c21d 100644 --- a/llvm-spirv/lib/SPIRV/SPIRVReader.cpp +++ b/llvm-spirv/lib/SPIRV/SPIRVReader.cpp @@ -976,19 +976,6 @@ Value *SPIRVToLLVM::transValue(SPIRVValue *BV, Function *F, BasicBlock *BB, return V; } -Value *SPIRVToLLVM::transDeviceEvent(SPIRVValue *BV, Function *F, - BasicBlock *BB) { - auto Val = transValue(BV, F, BB, false); - auto Ty = dyn_cast(Val->getType()); - assert(Ty && "Invalid Device Event"); - if (Ty->getAddressSpace() == SPIRAS_Generic) - return Val; - - IRBuilder<> Builder(BB); - auto EventTy = PointerType::get(Ty->getElementType(), SPIRAS_Generic); - return Builder.CreateAddrSpaceCast(Val, EventTy); -} - Value *SPIRVToLLVM::transConvertInst(SPIRVValue *BV, Function *F, BasicBlock *BB) { SPIRVUnary *BC = static_cast(BV); @@ -2356,9 +2343,6 @@ Value *SPIRVToLLVM::transValueWithoutDecoration(SPIRVValue *BV, Function *F, case OpSignBitSet: return mapValue( BV, transOCLRelational(static_cast(BV), BB)); - case OpEnqueueKernel: - return mapValue( - BV, transEnqueueKernelBI(static_cast(BV), BB)); case OpGetKernelWorkGroupSize: case OpGetKernelPreferredWorkGroupSizeMultiple: return mapValue( @@ -2864,100 +2848,6 @@ Value *SPIRVToLLVM::transBlockInvoke(SPIRVValue *Invoke, BasicBlock *BB) { Int8PtrTyGen, "", BB); } -Instruction *SPIRVToLLVM::transEnqueueKernelBI(SPIRVInstruction *BI, - BasicBlock *BB) { - Type *Int32Ty = Type::getInt32Ty(*Context); - Type *Int64Ty = Type::getInt64Ty(*Context); - Type *IntTy = - M->getDataLayout().getPointerSizeInBits(0) == 32 ? Int32Ty : Int64Ty; - - // Find or create enqueue kernel BI declaration - auto Ops = BI->getOperands(); - bool HasVaargs = Ops.size() > 10; - bool HasEvents = true; - SPIRVValue *EventRet = Ops[5]; - if (EventRet->getOpCode() == OpConstantNull) { - SPIRVValue *NumEvents = Ops[3]; - if (NumEvents->getOpCode() == OpConstant) { - SPIRVConstant *NE = static_cast(NumEvents); - HasEvents = NE->getZExtIntValue() != 0; - } else if (NumEvents->getOpCode() == OpConstantNull) - HasEvents = false; - } - - std::string FName = ""; - if (!HasVaargs && !HasEvents) - FName = "__enqueue_kernel_basic"; - else if (!HasVaargs && HasEvents) - FName = "__enqueue_kernel_basic_events"; - else if (HasVaargs && !HasEvents) - FName = "__enqueue_kernel_varargs"; - else - FName = "__enqueue_kernel_events_varargs"; - - Function *F = M->getFunction(FName); - if (!F) { - SmallVector Tys = { - transType(Ops[0]->getType()), // queue - Int32Ty, // flags - transType(Ops[2]->getType()), // ndrange - }; - if (HasEvents) { - Type *EventTy = PointerType::get( - getSPIRVOpaquePtrType(M, OpTypeDeviceEvent), SPIRAS_Generic); - - Tys.push_back(Int32Ty); - Tys.push_back(EventTy); - Tys.push_back(EventTy); - } - - Tys.push_back(Type::getInt8PtrTy(*Context, SPIRAS_Generic)); - Tys.push_back(Type::getInt8PtrTy(*Context, SPIRAS_Generic)); - - if (HasVaargs) { - // Number of block invoke arguments (local arguments) - Tys.push_back(Int32Ty); - // Array of sizes of block invoke arguments - Tys.push_back(PointerType::get(IntTy, SPIRAS_Private)); - } - - FunctionType *FT = FunctionType::get(Int32Ty, Tys, false); - F = Function::Create(FT, GlobalValue::ExternalLinkage, FName, M); - if (isFuncNoUnwind()) - F->addFnAttr(Attribute::NoUnwind); - } - - // Create call to enqueue kernel BI - SmallVector Args = { - transValue(Ops[0], F, BB, false), // queue - transValue(Ops[1], F, BB, false), // flags - transValue(Ops[2], F, BB, false), // ndrange - }; - - if (HasEvents) { - Args.push_back(transValue(Ops[3], F, BB, false)); // events number - Args.push_back(transDeviceEvent(Ops[4], F, BB)); // event_wait_list - Args.push_back(transDeviceEvent(Ops[5], F, BB)); // event_ret - } - - Args.push_back(transBlockInvoke(Ops[6], BB)); // block_invoke - Args.push_back(transValue(Ops[7], F, BB, false)); // block_literal - - if (HasVaargs) { - // Number of local arguments - Args.push_back(ConstantInt::get(Int32Ty, Ops.size() - 10)); - // GEP to array of sizes of local arguments - if (Ops[10]->getOpCode() == OpPtrAccessChain) - Args.push_back(transValue(Ops[10], F, BB, false)); - else - llvm_unreachable("Not implemented"); - } - auto Call = CallInst::Create(F, Args, "", BB); - setName(Call, BI); - setAttrByCalledFunc(Call); - return Call; -} - Instruction *SPIRVToLLVM::transWGSizeQueryBI(SPIRVInstruction *BI, BasicBlock *BB) { std::string FName = @@ -3024,23 +2914,18 @@ Instruction *SPIRVToLLVM::transBuiltinFromInst(const std::string &FuncName, transOCLBuiltinFromInstPreproc(BI, RetTy, Ops); std::vector ArgTys = transTypeVector(SPIRVInstruction::getOperandTypes(Ops)); - bool HasFuncPtrArg = false; for (auto &I : ArgTys) { if (isa(I)) { I = PointerType::get(I, SPIRAS_Private); - HasFuncPtrArg = true; } } - if (!HasFuncPtrArg) { - if (BM->getDesiredBIsRepresentation() != BIsRepresentation::SPIRVFriendlyIR) - mangleOpenClBuiltin(FuncName, ArgTys, MangledName); - else - MangledName = - getSPIRVFriendlyIRFunctionName(FuncName, BI->getOpCode(), ArgTys); - } else { - MangledName = decorateSPIRVFunction(FuncName); - } + if (BM->getDesiredBIsRepresentation() != BIsRepresentation::SPIRVFriendlyIR) + mangleOpenClBuiltin(FuncName, ArgTys, MangledName); + else + MangledName = + getSPIRVFriendlyIRFunctionName(FuncName, BI->getOpCode(), ArgTys); + Function *Func = M->getFunction(MangledName); FunctionType *FT = FunctionType::get(RetTy, ArgTys, false); // ToDo: Some intermediate functions have duplicate names with @@ -3049,7 +2934,7 @@ Instruction *SPIRVToLLVM::transBuiltinFromInst(const std::string &FuncName, // names. However it is better to have a way to differentiate // between intermidiate functions and final functions and make // sure final functions have unique names. - SPIRVDBG(if (!HasFuncPtrArg && Func && Func->getFunctionType() != FT) { + SPIRVDBG(if (Func && Func->getFunctionType() != FT) { dbgs() << "Warning: Function name conflict:\n" << *Func << '\n' << " => " << *FT << '\n'; diff --git a/llvm-spirv/lib/SPIRV/SPIRVReader.h b/llvm-spirv/lib/SPIRV/SPIRVReader.h index e7c47fc066e5d..09919023b5524 100644 --- a/llvm-spirv/lib/SPIRV/SPIRVReader.h +++ b/llvm-spirv/lib/SPIRV/SPIRVReader.h @@ -89,7 +89,6 @@ class SPIRVToLLVM { bool CreatePlaceHolder = true); Value *transValueWithoutDecoration(SPIRVValue *, Function *F, BasicBlock *, bool CreatePlaceHolder = true); - Value *transDeviceEvent(SPIRVValue *BV, Function *F, BasicBlock *BB); bool transDecoration(SPIRVValue *, Value *); bool transAlign(SPIRVValue *, Value *); Instruction *transOCLBuiltinFromExtInst(SPIRVExtInst *BC, BasicBlock *BB); @@ -97,7 +96,6 @@ class SPIRVToLLVM { Function *F, BasicBlock *); Function *transFunction(SPIRVFunction *F); Value *transBlockInvoke(SPIRVValue *Invoke, BasicBlock *BB); - Instruction *transEnqueueKernelBI(SPIRVInstruction *BI, BasicBlock *BB); Instruction *transWGSizeQueryBI(SPIRVInstruction *BI, BasicBlock *BB); Instruction *transSGSizeQueryBI(SPIRVInstruction *BI, BasicBlock *BB); bool transFPContractMetadata(); diff --git a/llvm-spirv/lib/SPIRV/SPIRVToOCL.cpp b/llvm-spirv/lib/SPIRV/SPIRVToOCL.cpp index 602fbaf4b97d6..5943ec5b75f66 100644 --- a/llvm-spirv/lib/SPIRV/SPIRVToOCL.cpp +++ b/llvm-spirv/lib/SPIRV/SPIRVToOCL.cpp @@ -177,6 +177,10 @@ void SPIRVToOCLBase::visitCallInst(CallInst &CI) { visitCallSPIRVImageQueryBuiltIn(&CI, OC); return; } + if (OC == OpEnqueueKernel) { + visitCallSPIRVEnqueueKernel(&CI, OC); + return; + } if (OCLSPIRVBuiltinMap::rfind(OC)) visitCallSPIRVBuiltin(&CI, OC); } @@ -1021,7 +1025,7 @@ void SPIRVToOCLBase::visitCallSPIRVVStore(CallInst *CI, OCLExtOpKind Kind) { Kind == OpenCLLIB::Vstorea_halfn || Kind == OpenCLLIB::Vstorea_halfn_r || Kind == OpenCLLIB::Vstoren) { if (auto DataType = dyn_cast(Args[0]->getType())) { - uint64_t NumElements = DataType->getElementCount().getValue(); + uint64_t NumElements = DataType->getElementCount().getFixedValue(); assert((NumElements == 2 || NumElements == 3 || NumElements == 4 || NumElements == 8 || NumElements == 16) && "Unsupported vector size for vstore instruction!"); diff --git a/llvm-spirv/lib/SPIRV/SPIRVToOCL.h b/llvm-spirv/lib/SPIRV/SPIRVToOCL.h index 794e5fde385e9..e15816d0cf3ce 100644 --- a/llvm-spirv/lib/SPIRV/SPIRVToOCL.h +++ b/llvm-spirv/lib/SPIRV/SPIRVToOCL.h @@ -210,6 +210,9 @@ class SPIRVToOCLBase : public InstVisitor { /// - OCL1.2: barrier virtual void visitCallSPIRVControlBarrier(CallInst *CI) = 0; + /// Transform __spirv_EnqueueKernel to __enqueue_kernel + virtual void visitCallSPIRVEnqueueKernel(CallInst *CI, Op OC) = 0; + /// Conduct generic mutations for all atomic builtins virtual CallInst *mutateCommonAtomicArguments(CallInst *CI, Op OC) = 0; @@ -298,6 +301,9 @@ class SPIRVToOCL12Base : public SPIRVToOCLBase { /// the same semantics as OpAtomicCompareExchange. Instruction *visitCallSPIRVAtomicCmpExchg(CallInst *CI) override; + /// Trigger assert, since OpenCL 1.2 doesn't support enqueue_kernel + void visitCallSPIRVEnqueueKernel(CallInst *CI, Op OC) override; + /// Conduct generic mutations for all atomic builtins CallInst *mutateCommonAtomicArguments(CallInst *CI, Op OC) override; @@ -361,6 +367,9 @@ class SPIRVToOCL20Base : public SPIRVToOCLBase { /// atomic_fetch_add_explicit / atomic_fetch_sub_explicit Instruction *visitCallSPIRVAtomicIncDec(CallInst *CI, Op OC) override; + /// Transform __spirv_EnqueueKernel to __enqueue_kernel + void visitCallSPIRVEnqueueKernel(CallInst *CI, Op OC) override; + /// Conduct generic mutations for all atomic builtins CallInst *mutateCommonAtomicArguments(CallInst *CI, Op OC) override; diff --git a/llvm-spirv/lib/SPIRV/SPIRVToOCL12.cpp b/llvm-spirv/lib/SPIRV/SPIRVToOCL12.cpp index 01103ffef1ee3..0fbbc9f88747a 100644 --- a/llvm-spirv/lib/SPIRV/SPIRVToOCL12.cpp +++ b/llvm-spirv/lib/SPIRV/SPIRVToOCL12.cpp @@ -255,6 +255,10 @@ Instruction *SPIRVToOCL12Base::visitCallSPIRVAtomicBuiltin(CallInst *CI, return NewCI; } +void SPIRVToOCL12Base::visitCallSPIRVEnqueueKernel(CallInst *CI, Op OC) { + assert(0 && "OpenCL 1.2 doesn't support enqueue_kernel!"); +} + std::string SPIRVToOCL12Base::mapFPAtomicName(Op OC) { assert(isFPAtomicOpCode(OC) && "Not intended to handle other opcodes than " "AtomicF{Add/Min/Max}EXT!"); diff --git a/llvm-spirv/lib/SPIRV/SPIRVToOCL20.cpp b/llvm-spirv/lib/SPIRV/SPIRVToOCL20.cpp index e358a1481dcb5..c873cd157749b 100644 --- a/llvm-spirv/lib/SPIRV/SPIRVToOCL20.cpp +++ b/llvm-spirv/lib/SPIRV/SPIRVToOCL20.cpp @@ -279,6 +279,79 @@ Instruction *SPIRVToOCL20Base::visitCallSPIRVAtomicCmpExchg(CallInst *CI) { &Attrs); } +void SPIRVToOCL20Base::visitCallSPIRVEnqueueKernel(CallInst *CI, Op OC) { + assert(CI->getCalledFunction() && "Unexpected indirect call"); + AttributeList Attrs = CI->getCalledFunction()->getAttributes(); + Instruction *PInsertBefore = CI; + + mutateCallInstOCL( + M, CI, + [=](CallInst *, std::vector &Args) { + bool HasVaargs = Args.size() > 10; + bool HasEvents = true; + Value *EventRet = Args[5]; + if (isa(EventRet)) { + Value *NumEvents = Args[3]; + if (isa(NumEvents)) { + ConstantInt *NE = cast(NumEvents); + HasEvents = NE->getZExtValue() != 0; + } + } + + Value *Invoke = Args[6]; + auto *Int8PtrTyGen = Type::getInt8PtrTy(*Ctx, SPIRAS_Generic); + Args[6] = CastInst::CreatePointerBitCastOrAddrSpaceCast( + Invoke, Int8PtrTyGen, "", PInsertBefore); + + // Don't remove arguments immediately, just mark them as removed with + // nullptr, and remove them at the end of processing. It allows for + // easier understanding of which argument is going to be removed. + auto MarkAsRemoved = [&Args](size_t Start, size_t End) { + assert(Start <= End); + for (size_t I = Start; I < End; I++) + Args[I] = nullptr; + }; + + if (!HasEvents) { + // Mark arguments at indices 3 (Num Events), 4 (Wait Events), 5 (Ret + // Event) as removed. + MarkAsRemoved(3, 6); + } + + if (!HasVaargs) { + // Mark arguments at indices 8 (Param Size), 9 (Param Align) as + // removed. + MarkAsRemoved(8, 10); + } else { + // GEP to array of sizes of local arguments + Value *GEP = Args[10]; + size_t NumLocalArgs = Args.size() - 10; + + // Mark all SPIRV-specific arguments as removed + MarkAsRemoved(8, Args.size()); + + Type *Int32Ty = Type::getInt32Ty(*Ctx); + Args[8] = ConstantInt::get(Int32Ty, NumLocalArgs); + Args[9] = GEP; + } + + Args.erase(std::remove(Args.begin(), Args.end(), nullptr), Args.end()); + + std::string FName = ""; + if (!HasVaargs && !HasEvents) + FName = "__enqueue_kernel_basic"; + else if (!HasVaargs && HasEvents) + FName = "__enqueue_kernel_basic_events"; + else if (HasVaargs && !HasEvents) + FName = "__enqueue_kernel_varargs"; + else + FName = "__enqueue_kernel_events_varargs"; + + return FName; + }, + &Attrs); +} + } // namespace SPIRV INITIALIZE_PASS(SPIRVToOCL20Legacy, "spvtoocl20", diff --git a/llvm-spirv/lib/SPIRV/SPIRVUtil.cpp b/llvm-spirv/lib/SPIRV/SPIRVUtil.cpp index 77ab88c88ecff..b42ee08c65183 100644 --- a/llvm-spirv/lib/SPIRV/SPIRVUtil.cpp +++ b/llvm-spirv/lib/SPIRV/SPIRVUtil.cpp @@ -244,9 +244,7 @@ void getFunctionTypeParameterTypes(llvm::FunctionType *FT, } } -bool isVoidFuncTy(FunctionType *FT) { - return FT->getReturnType()->isVoidTy() && FT->getNumParams() == 0; -} +bool isVoidFuncTy(FunctionType *FT) { return FT->getReturnType()->isVoidTy(); } bool isPointerToOpaqueStructType(llvm::Type *Ty) { if (auto PT = dyn_cast(Ty)) @@ -1426,6 +1424,8 @@ std::string mangleBuiltin(StringRef UniqName, ArrayRef ArgTypes, if (!BtnInfo) return std::string(UniqName); BtnInfo->init(UniqName); + if (BtnInfo->avoidMangling()) + return std::string(UniqName); std::string MangledName; LLVM_DEBUG(dbgs() << "[mangle] " << UniqName << " => "); SPIR::FunctionDescriptor FD; diff --git a/llvm-spirv/lib/SPIRV/libSPIRV/SPIRV.debug.h b/llvm-spirv/lib/SPIRV/libSPIRV/SPIRV.debug.h index 2fabc6365caa7..026a308376372 100644 --- a/llvm-spirv/lib/SPIRV/libSPIRV/SPIRV.debug.h +++ b/llvm-spirv/lib/SPIRV/libSPIRV/SPIRV.debug.h @@ -264,7 +264,8 @@ enum ExpressionOpCode { RegvalType = 161, DerefType = 162, XderefType = 163, - Reinterpret = 164 + Reinterpret = 164, + LLVMArg = 165, }; enum ImportedEntityTag { @@ -756,6 +757,7 @@ static std::map OpCountMap { // { DerefType, 3 }, // { XderefType, 3 }, // { Reinterpret, 2 }, + { LLVMArg, 2 }, }; } @@ -1002,6 +1004,7 @@ inline void DbgExpressionOpCodeMap::init() { add(dwarf::DW_OP_xderef_size, SPIRVDebug::XderefSize ); add(dwarf::DW_OP_nop, SPIRVDebug::Nop); add(dwarf::DW_OP_push_object_address, SPIRVDebug::PushObjectAddress ); + add(dwarf::DW_OP_LLVM_arg, SPIRVDebug::LLVMArg); } typedef SPIRVMap diff --git a/llvm-spirv/lib/SPIRV/libSPIRV/SPIRVInstruction.h b/llvm-spirv/lib/SPIRV/libSPIRV/SPIRVInstruction.h index 043999f46223a..2c47e492584a5 100644 --- a/llvm-spirv/lib/SPIRV/libSPIRV/SPIRVInstruction.h +++ b/llvm-spirv/lib/SPIRV/libSPIRV/SPIRVInstruction.h @@ -2814,7 +2814,7 @@ class SPIRVExpectKHRInstBase : public SPIRVInstTemplateBase { typedef SPIRVInstTemplate \ SPIRV##x; _SPIRV_OP(ExpectKHR, true, 5) -#undef _SPIRV_OP_INTERNAL +#undef _SPIRV_OP class SPIRVDotKHRBase : public SPIRVInstTemplateBase { protected: diff --git a/llvm-spirv/test/DebugInfo/DebugInfoLLVMArg.ll b/llvm-spirv/test/DebugInfo/DebugInfoLLVMArg.ll new file mode 100644 index 0000000000000..f42063fc50f04 --- /dev/null +++ b/llvm-spirv/test/DebugInfo/DebugInfoLLVMArg.ll @@ -0,0 +1,45 @@ +; This test checks that DW_OP_LLVM_arg operation goes through round trip translation correctly. + +; RUN: llvm-as %s -o %t.bc +; RUN: llvm-spirv %t.bc -o %t.spv --spirv-allow-extra-diexpressions +; RUN: llvm-spirv %t.spv -to-text -o %t.spt +; RUN: FileCheck < %t.spt %s --check-prefix=CHECK-SPIRV +; RUN: llvm-spirv -r %t.spv -o %t.rev.bc +; RUN: llvm-dis < %t.rev.bc | FileCheck %s --check-prefix=CHECK-LLVM + +; CHECK-SPIRV: [[#DEBUG_LOC_VAR:]] [[#]] DebugLocalVariable +; CHECK-SPIRV: [[#EXPR_ARG_0:]] [[#]] DebugOperation 165 0 +; CHECK-SPIRV: [[#EXPR_ARG_1:]] [[#]] DebugOperation 165 1 +; CHECK-SPIRV: [[#EXPR_ARG_2:]] [[#]] DebugOperation 1 +; CHECK-SPIRV: [[#EXPRESSION:]] [[#]] DebugExpression [[#EXPR_ARG_0]] [[#EXPR_ARG_1]] [[#EXPR_ARG_2]] +; CHECK-SPIRV: DebugValue [[#DEBUG_LOC_VAR]] [[#]] [[#EXPRESSION]] + +target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" +target triple = "spir64-unknown-unknown" + +declare void @llvm.dbg.value(metadata, metadata, metadata) nounwind readnone speculatable + +define void @DbgIntrinsics() sanitize_memtag { +entry: + %x = alloca i32, align 4 +; CHECK-LLVM: call void @llvm.dbg.value(metadata i32* %x, metadata ![[#]], metadata !DIExpression(DW_OP_LLVM_arg, 0, DW_OP_LLVM_arg, 1, DW_OP_plus)) + call void @llvm.dbg.value(metadata !DIArgList(i32* %x, i32* %x), metadata !6, metadata !DIExpression(DW_OP_LLVM_arg, 0, DW_OP_LLVM_arg, 1, DW_OP_plus)), !dbg !10 + store i32 42, i32* %x, align 4 + ret void +} + + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!8, !9} + +!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 9.0.0", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, nameTableKind: None) +!1 = !DIFile(filename: "stack-tagging.cc", directory: "/tmp") +!2 = !{} +!3 = distinct !DISubprogram(name: "DbgIntrinsics", linkageName: "DbgIntrinsics", scope: !1, file: !1, line: 3, type: !4, scopeLine: 3, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !2) +!4 = !DISubroutineType(types: !5) +!5 = !{null} +!6 = !DILocalVariable(name: "x", scope: !3, file: !1, line: 4, type: !7) +!7 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) +!8 = !{i32 2, !"Dwarf Version", i32 4} +!9 = !{i32 2, !"Debug Info Version", i32 3} +!10 = !DILocation(line: 1, column: 2, scope: !3) diff --git a/llvm-spirv/test/transcoding/enqueue_kernel.cl b/llvm-spirv/test/transcoding/enqueue_kernel.cl index 685d35259b4b2..05a462fa9ef4a 100644 --- a/llvm-spirv/test/transcoding/enqueue_kernel.cl +++ b/llvm-spirv/test/transcoding/enqueue_kernel.cl @@ -3,9 +3,12 @@ // RUN: FileCheck < %t.spv.txt %s --check-prefix=CHECK-SPIRV // RUN: llvm-spirv %t.bc -o %t.spv // RUN: spirv-val %t.spv -// RUN: llvm-spirv -r %t.spv -o %t.rev.bc +// RUN: llvm-spirv -r %t.spv --spirv-target-env CL2.0 -o %t.rev.bc // RUN: llvm-dis %t.rev.bc // RUN: FileCheck < %t.rev.ll %s --check-prefix=CHECK-LLVM +// RUN: llvm-spirv -r %t.spv --spirv-target-env SPV-IR -o %t.rev.bc +// RUN: llvm-dis %t.rev.bc +// RUN: FileCheck < %t.rev.ll %s --check-prefix=CHECK-SPV-IR // CHECK-SPIRV: EntryPoint {{[0-9]+}} [[BlockKer1:[0-9]+]] "__device_side_enqueue_block_invoke_kernel" // CHECK-SPIRV: EntryPoint {{[0-9]+}} [[BlockKer2:[0-9]+]] "__device_side_enqueue_block_invoke_2_kernel" @@ -64,7 +67,8 @@ kernel void device_side_enqueue(global int *a, global int *b, int i, char c0) { // CHECK-LLVM: [[InterCast2:%[0-9]+]] = bitcast %struct.__opencl_block_literal_generic* [[Block2]] to i8 // CHECK-LLVM: [[Block2Ptr:%[0-9]+]] = addrspacecast i8* [[InterCast2]] to i8 addrspace(4)* // CHECK-LLVM: [[BlockInv2:%[0-9]+]] = addrspacecast void (i8 addrspace(4)*)* @__device_side_enqueue_block_invoke_kernel to i8 addrspace(4)* - // CHECK-LLVM: call i32 @__enqueue_kernel_basic(%opencl.queue_t* {{.*}}, i32 {{.*}}, %struct.ndrange_t* {{.*}}, i8 addrspace(4)* [[BlockInv2]], i8 addrspace(4)* [[Block2Ptr]]) + // CHECK-LLVM: call spir_func i32 @__enqueue_kernel_basic(%opencl.queue_t* {{.*}}, i32 {{.*}}, %struct.ndrange_t* {{.*}}, i8 addrspace(4)* [[BlockInv2]], i8 addrspace(4)* [[Block2Ptr]]) + // CHECK-SPV-IR: call spir_func i32 @_Z21__spirv_EnqueueKernelP13__spirv_Queuei9ndrange_tiPU3AS4P19__spirv_DeviceEventS7_U13block_pointerFvvEPU3AS4cii(%spirv.Queue* {{.*}}, i32 {{.*}}, %struct.ndrange_t* {{.*}}, i32 0, %spirv.DeviceEvent* addrspace(4)* null, %spirv.DeviceEvent* addrspace(4)* null, void (i8 addrspace(4)*)* @__device_side_enqueue_block_invoke_kernel, i8 addrspace(4)* {{.*}}, i32 {{.*}}, i32 {{.*}}) enqueue_kernel(default_queue, flags, ndrange, ^(void) { a[i] = c0; @@ -84,7 +88,8 @@ kernel void device_side_enqueue(global int *a, global int *b, int i, char c0) { // CHECK-LLVM: [[InterCast3:%[0-9]+]] = bitcast %struct.__opencl_block_literal_generic* [[Block3]] to i8 // CHECK-LLVM: [[Block3Ptr:%[0-9]+]] = addrspacecast i8* [[InterCast3]] to i8 addrspace(4) // CHECK-LLVM: [[BlockInv3:%[0-9]+]] = addrspacecast void (i8 addrspace(4)*)* @__device_side_enqueue_block_invoke_2_kernel to i8 addrspace(4)* - // CHECK-LLVM: call i32 @__enqueue_kernel_basic_events(%opencl.queue_t* {{.*}}, i32 {{.*}}, %struct.ndrange_t* {{.*}}, i32 2, %opencl.clk_event_t* addrspace(4)* {{.*}}, %opencl.clk_event_t* addrspace(4)* {{.*}}, i8 addrspace(4)* [[BlockInv3]], i8 addrspace(4)* [[Block3Ptr]]) + // CHECK-LLVM: call spir_func i32 @__enqueue_kernel_basic_events(%opencl.queue_t* {{.*}}, i32 {{.*}}, %struct.ndrange_t* {{.*}}, i32 2, %opencl.clk_event_t* addrspace(4)* {{.*}}, %opencl.clk_event_t* addrspace(4)* {{.*}}, i8 addrspace(4)* [[BlockInv3]], i8 addrspace(4)* [[Block3Ptr]]) + // CHECK-SPV-IR: call spir_func i32 @_Z21__spirv_EnqueueKernelP13__spirv_Queuei9ndrange_tiPU3AS4P19__spirv_DeviceEventS7_U13block_pointerFvvEPU3AS4cii(%spirv.Queue* {{.*}}, i32 {{.*}}, %struct.ndrange_t* {{.*}}, i32 2, %spirv.DeviceEvent* addrspace(4)* {{.*}}, %spirv.DeviceEvent* addrspace(4)* {{.*}}, void (i8 addrspace(4)*)* @__device_side_enqueue_block_invoke_2_kernel, i8 addrspace(4)* %{{.*}}, i32 {{.*}}, i32 {{.*}}) enqueue_kernel(default_queue, flags, ndrange, 2, &event_wait_list, &clk_event, ^(void) { a[i] = b[i]; @@ -104,7 +109,8 @@ kernel void device_side_enqueue(global int *a, global int *b, int i, char c0) { // CHECK-LLVM: [[Block0Tmp:%[0-9]+]] = bitcast [[BlockTy1]] addrspace(1)* @__block_literal_global to i8 addrspace(1)* // CHECK-LLVM: [[Block0:%[0-9]+]] = addrspacecast i8 addrspace(1)* [[Block0Tmp]] to i8 addrspace(4)* // CHECK-LLVM: [[BlockInv0:%[0-9]+]] = addrspacecast void (i8 addrspace(4)*, i8 addrspace(3)*)* @__device_side_enqueue_block_invoke_3_kernel to i8 addrspace(4)* - // CHECK-LLVM: call i32 @__enqueue_kernel_events_varargs(%opencl.queue_t* {{.*}}, i32 {{.*}}, %struct.ndrange_t* {{.*}}, i32 2, %opencl.clk_event_t* addrspace(4)* {{.*}}, %opencl.clk_event_t* addrspace(4)* {{.*}}, i8 addrspace(4)* [[BlockInv0]], i8 addrspace(4)* [[Block0]], i32 1, i32* {{.*}}) + // CHECK-LLVM: call spir_func i32 @__enqueue_kernel_events_varargs(%opencl.queue_t* {{.*}}, i32 {{.*}}, %struct.ndrange_t* {{.*}}, i32 2, %opencl.clk_event_t* addrspace(4)* {{.*}}, %opencl.clk_event_t* addrspace(4)* {{.*}}, i8 addrspace(4)* [[BlockInv0]], i8 addrspace(4)* [[Block0]], i32 1, i32* {{.*}}) + // CHECK-SPV-IR: call spir_func i32 @_Z21__spirv_EnqueueKernelP13__spirv_Queuei9ndrange_tiPU3AS4P19__spirv_DeviceEventS7_U13block_pointerFvvEPU3AS4ciiPi(%spirv.Queue* {{.*}}, i32 {{.*}}, %struct.ndrange_t* {{.*}}, i32 2, %spirv.DeviceEvent* addrspace(4)* {{.*}}, %spirv.DeviceEvent* addrspace(4)* {{.*}}, void (i8 addrspace(4)*, i8 addrspace(3)*)* @__device_side_enqueue_block_invoke_3_kernel, i8 addrspace(4)* {{.*}}, i32 {{.*}}, i32 {{.*}}, i32* {{.*}}) enqueue_kernel(default_queue, flags, ndrange, 2, event_wait_list2, &clk_event, ^(local void *p) { return; @@ -126,7 +132,8 @@ kernel void device_side_enqueue(global int *a, global int *b, int i, char c0) { // CHECK-LLVM: [[Block1Tmp:%[0-9]+]] = bitcast [[BlockTy1]] addrspace(1)* @__block_literal_global.1 to i8 addrspace(1)* // CHECK-LLVM: [[Block1:%[0-9]+]] = addrspacecast i8 addrspace(1)* [[Block1Tmp]] to i8 addrspace(4)* // CHECK-LLVM: [[BlockInv1:%[0-9]+]] = addrspacecast void (i8 addrspace(4)*, i8 addrspace(3)*, i8 addrspace(3)*, i8 addrspace(3)*)* @__device_side_enqueue_block_invoke_4_kernel to i8 addrspace(4)* - // CHECK-LLVM: call i32 @__enqueue_kernel_varargs(%opencl.queue_t* {{.*}}, i32 {{.*}}, %struct.ndrange_t* {{.*}}, i8 addrspace(4)* [[BlockInv1]], i8 addrspace(4)* [[Block1]], i32 3, i32* {{.*}}) + // CHECK-LLVM: call spir_func i32 @__enqueue_kernel_varargs(%opencl.queue_t* {{.*}}, i32 {{.*}}, %struct.ndrange_t* {{.*}}, i8 addrspace(4)* [[BlockInv1]], i8 addrspace(4)* [[Block1]], i32 3, i32* {{.*}}) + // CHECK-SPV-IR: call spir_func i32 @_Z21__spirv_EnqueueKernelP13__spirv_Queuei9ndrange_tiPU3AS4P19__spirv_DeviceEventS7_U13block_pointerFvvEPU3AS4ciiPiSB_SB_(%spirv.Queue* {{.*}}, i32 {{.*}}, %struct.ndrange_t* {{.*}}, i32 0, %spirv.DeviceEvent* addrspace(4)* null, %spirv.DeviceEvent* addrspace(4)* null, void (i8 addrspace(4)*, i8 addrspace(3)*, i8 addrspace(3)*, i8 addrspace(3)*)* @__device_side_enqueue_block_invoke_4_kernel, i8 addrspace(4)* {{.*}}, i32 {{.*}}, i32 {{.*}}, i32* {{.*}}, i32* {{.*}}, i32* {{.*}}) enqueue_kernel(default_queue, flags, ndrange, ^(local void *p1, local void *p2, local void *p3) { return; @@ -146,7 +153,8 @@ kernel void device_side_enqueue(global int *a, global int *b, int i, char c0) { // CHECK-LLVM: [[InterCast5:%[0-9]+]] = bitcast %struct.__opencl_block_literal_generic* [[Block5]] to i8 // CHECK-LLVM: [[Block5Ptr:%[0-9]+]] = addrspacecast i8* [[InterCast5]] to i8 addrspace(4) // CHECK-LLVM: [[BlockInv5:%[0-9]+]] = addrspacecast void (i8 addrspace(4)*)* @__device_side_enqueue_block_invoke_5_kernel to i8 addrspace(4)* - // CHECK-LLVM: call i32 @__enqueue_kernel_basic_events(%opencl.queue_t* {{.*}}, i32 {{.*}}, %struct.ndrange_t* {{.*}}, i32 0, %opencl.clk_event_t* addrspace(4)* null, %opencl.clk_event_t* addrspace(4)* {{.*}}, i8 addrspace(4)* [[BlockInv5]], i8 addrspace(4)* [[Block5Ptr]]) + // CHECK-LLVM: call spir_func i32 @__enqueue_kernel_basic_events(%opencl.queue_t* {{.*}}, i32 {{.*}}, %struct.ndrange_t* {{.*}}, i32 0, %opencl.clk_event_t* addrspace(4)* null, %opencl.clk_event_t* addrspace(4)* {{.*}}, i8 addrspace(4)* [[BlockInv5]], i8 addrspace(4)* [[Block5Ptr]]) + // CHECK-SPV-IR: call spir_func i32 @_Z21__spirv_EnqueueKernelP13__spirv_Queuei9ndrange_tiPU3AS4P19__spirv_DeviceEventS7_U13block_pointerFvvEPU3AS4cii(%spirv.Queue* {{.*}}, i32 {{.*}}, %struct.ndrange_t* {{.*}}, i32 0, %spirv.DeviceEvent* addrspace(4)* null, %spirv.DeviceEvent* addrspace(4)* {{.*}}, void (i8 addrspace(4)*)* @__device_side_enqueue_block_invoke_5_kernel, i8 addrspace(4)* {{.*}}, i32 {{.*}}, i32 {{.*}}) enqueue_kernel(default_queue, flags, ndrange, 0, NULL, &clk_event, ^(void) { a[i] = b[i]; diff --git a/llvm/CMakeLists.txt b/llvm/CMakeLists.txt index de76984116c8b..6c0e30b8f7f82 100644 --- a/llvm/CMakeLists.txt +++ b/llvm/CMakeLists.txt @@ -361,6 +361,14 @@ endif() option(LLVM_ENABLE_CRASH_DUMPS "Turn on memory dumps on crashes. Currently only implemented on Windows." OFF) +set(WINDOWS_PREFER_FORWARD_SLASH_DEFAULT OFF) +if (MINGW) + # Cygwin doesn't identify itself as Windows, and thus gets path::Style::posix + # as native path style, regardless of what this is set to. + set(WINDOWS_PREFER_FORWARD_SLASH_DEFAULT ON) +endif() +option(LLVM_WINDOWS_PREFER_FORWARD_SLASH "Prefer path names with forward slashes on Windows." ${WINDOWS_PREFER_FORWARD_SLASH_DEFAULT}) + option(LLVM_ENABLE_FFI "Use libffi to call external functions from the interpreter" OFF) set(FFI_LIBRARY_DIR "" CACHE PATH "Additional directory, where CMake should search for libffi.so") set(FFI_INCLUDE_DIR "" CACHE PATH "Additional directory, where CMake should search for ffi.h or ffi/ffi.h") @@ -806,7 +814,7 @@ if(LLVM_INCLUDE_TESTS) endif() # For up-to-date instructions for installing the Tensorflow dependency, refer to -# the bot setup script: https://github.com/google/ml-compiler-opt/blob/master/buildbot/buildbot_init.sh +# the bot setup script: https://github.com/google/ml-compiler-opt/blob/main/buildbot/buildbot_init.sh # In this case, the latest C API library is available for download from # https://www.tensorflow.org/install/lang_c. # We will expose the conditional compilation variable, @@ -840,7 +848,7 @@ if (TENSORFLOW_C_LIB_PATH) endif() # For up-to-date instructions for installing the Tensorflow dependency, refer to -# the bot setup script: https://github.com/google/ml-compiler-opt/blob/master/buildbot/buildbot_init.sh +# the bot setup script: https://github.com/google/ml-compiler-opt/blob/main/buildbot/buildbot_init.sh # Specifically, assuming python3 is installed: # python3 -m pip install --upgrade pip && python3 -m pip install --user tf_nightly==2.3.0.dev20200528 # Then set TENSORFLOW_AOT_PATH to the package install - usually it's ~/.local/lib/python3.7/site-packages/tensorflow diff --git a/llvm/cmake/modules/AddLLVM.cmake b/llvm/cmake/modules/AddLLVM.cmake index 695f8bef07dd6..6e3bfd1310646 100644 --- a/llvm/cmake/modules/AddLLVM.cmake +++ b/llvm/cmake/modules/AddLLVM.cmake @@ -166,33 +166,43 @@ function(add_llvm_symbol_exports target_name export_file) set(LLVM_COMMON_DEPENDS ${LLVM_COMMON_DEPENDS} PARENT_SCOPE) endfunction(add_llvm_symbol_exports) -if (NOT DEFINED LLVM_LINKER_DETECTED) +if (NOT DEFINED LLVM_LINKER_DETECTED AND NOT WIN32) + # Detect what linker we have here. + if(APPLE) + # Linkers with ld64-compatible flags. + set(version_flag "-Wl,-v") + else() + # Linkers with BFD ld-compatible flags. + set(version_flag "-Wl,--version") + endif() + + if(LLVM_USE_LINKER) + set(command ${CMAKE_C_COMPILER} -fuse-ld=${LLVM_USE_LINKER} ${version_flag}) + else() + separate_arguments(flags UNIX_COMMAND "${CMAKE_EXE_LINKER_FLAGS}") + set(command ${CMAKE_C_COMPILER} ${flags} ${version_flag}) + endif() + execute_process( + COMMAND ${command} + OUTPUT_VARIABLE stdout + ERROR_VARIABLE stderr + ) + if(APPLE) - execute_process( - COMMAND "${CMAKE_LINKER}" -v - ERROR_VARIABLE stderr - ) if("${stderr}" MATCHES "PROJECT:ld64") set(LLVM_LINKER_DETECTED YES CACHE INTERNAL "") set(LLVM_LINKER_IS_LD64 YES CACHE INTERNAL "") message(STATUS "Linker detection: ld64") + elseif("${stderr}" MATCHES "^LLD" OR + "${stdout}" MATCHES "^LLD") + set(LLVM_LINKER_DETECTED YES CACHE INTERNAL "") + set(LLVM_LINKER_IS_LLD YES CACHE INTERNAL "") + message(STATUS "Linker detection: lld") else() set(LLVM_LINKER_DETECTED NO CACHE INTERNAL "") message(STATUS "Linker detection: unknown") endif() - elseif(NOT WIN32) - # Detect what linker we have here - if( LLVM_USE_LINKER ) - set(command ${CMAKE_C_COMPILER} -fuse-ld=${LLVM_USE_LINKER} -Wl,--version) - else() - separate_arguments(flags UNIX_COMMAND "${CMAKE_EXE_LINKER_FLAGS}") - set(command ${CMAKE_C_COMPILER} ${flags} -Wl,--version) - endif() - execute_process( - COMMAND ${command} - OUTPUT_VARIABLE stdout - ERROR_VARIABLE stderr - ) + else() if("${stdout}" MATCHES "^mold") set(LLVM_LINKER_DETECTED YES CACHE INTERNAL "") message(STATUS "Linker detection: mold") diff --git a/llvm/cmake/modules/TableGen.cmake b/llvm/cmake/modules/TableGen.cmake index 5e9e2674405ee..442b000e8a2a6 100644 --- a/llvm/cmake/modules/TableGen.cmake +++ b/llvm/cmake/modules/TableGen.cmake @@ -80,6 +80,10 @@ function(tablegen project ofn) set(tblgen_change_flag "--write-if-changed") endif() + if (NOT LLVM_ENABLE_WARNINGS) + list(APPEND LLVM_TABLEGEN_FLAGS "-no-warn-on-unused-template-args") + endif() + # We need both _TABLEGEN_TARGET and _TABLEGEN_EXE in the DEPENDS list # (both the target and the file) to have .inc files rebuilt on # a tablegen change, as cmake does not propagate file-level dependencies diff --git a/llvm/docs/CompileCudaWithLLVM.rst b/llvm/docs/CompileCudaWithLLVM.rst index a2d7fd0b7453a..a63f5a1114132 100644 --- a/llvm/docs/CompileCudaWithLLVM.rst +++ b/llvm/docs/CompileCudaWithLLVM.rst @@ -144,9 +144,9 @@ device side. ---------------------------- In clang, ``math.h`` and ``cmath`` are available and `pass -`_ +`_ `tests -`_ +`_ adapted from libc++'s test suite. In nvcc ``math.h`` and ``cmath`` are mostly available. Versions of ``::foof`` diff --git a/llvm/docs/HowToAddABuilder.rst b/llvm/docs/HowToAddABuilder.rst index 4e8e84aed6271..95dcf0307eb45 100644 --- a/llvm/docs/HowToAddABuilder.rst +++ b/llvm/docs/HowToAddABuilder.rst @@ -121,6 +121,14 @@ Here are the steps you can follow to do so: Please make sure your builder name and its builddir are unique through the file. + All new builders should default to using the "'collapseRequests': False" + configuration. This causes the builder to build each commit individually + and not merge build requests. To maximize quality of feedback to developers, + we *strongly prefer* builders to be configured not to collapse requests. + This flag should be removed only after all reasonable efforts have been + exhausted to improve build times such that the builder can keep up with + commit flow. + It is possible to allow email addresses to unconditionally receive notifications on build failure; for this you'll need to add an ``InformativeMailNotifier`` to ``buildbot/osuosl/master/config/status.py``. diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index 601911d1e035d..3a5bc9c199d55 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -19558,7 +19558,7 @@ This is an overloaded intrinsic. :: declare <2 x double> @llvm.experimental.vp.splice.v2f64(<2 x double> %vec1, <2 x double> %vec2, i32 %imm, <2 x i1> %mask, i32 %evl1, i32 %evl2) - declare @llvm.experimental.vp.splice.nxv4i32( %vec1, %vec2, i32 %imm, <2 x i1> %mask i32 %evl1, i32 %evl2) + declare @llvm.experimental.vp.splice.nxv4i32( %vec1, %vec2, i32 %imm, %mask, i32 %evl1, i32 %evl2) Overview: """"""""" @@ -19604,6 +19604,225 @@ Examples: llvm.experimental.vp.splice(, , -2, 3, 2) ==> ; trailing elements +.. _int_vp_load: + +'``llvm.vp.load``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" +This is an overloaded intrinsic. + +:: + + declare <4 x float> @llvm.vp.load.v4f32.p0v4f32(<4 x float>* %ptr, <4 x i1> %mask, i32 %evl) + declare @llvm.vp.load.nxv2i16.p0nxv2i16(* %ptr, %mask, i32 %evl) + declare <8 x float> @llvm.vp.load.v8f32.p1v8f32(<8 x float> addrspace(1)* %ptr, <8 x i1> %mask, i32 %evl) + declare @llvm.vp.load.nxv1i64.p6nxv1i64( addrspace(6)* %ptr, %mask, i32 %evl) + +Overview: +""""""""" + +The '``llvm.vp.load.*``' intrinsic is the vector length predicated version of +the :ref:`llvm.masked.load ` intrinsic. + +Arguments: +"""""""""" + +The first operand is the base pointer for the load. The second operand is a +vector of boolean values with the same number of elements as the return type. +The third is the explicit vector length of the operation. The return type and +underlying type of the base pointer are the same vector types. + +Semantics: +"""""""""" + +The '``llvm.vp.load``' intrinsic reads a vector from memory in the same way as +the '``llvm.masked.load``' intrinsic, where the mask is taken from the +combination of the '``mask``' and '``evl``' operands in the usual VP way. Of +the '``llvm.masked.load``' operands not set by '``llvm.vp.load``': the +'``passthru``' operand is implicitly ``undef``; the '``alignment``' operand is +taken as the ABI alignment of the return type as specified by the +:ref:`datalayout string`. + +Examples: +""""""""" + +.. code-block:: text + + %r = call <8 x i8> @llvm.vp.load.v8i8.p0v8i8(<8 x i8>* %ptr, <8 x i1> %mask, i32 %evl) + ;; For all lanes below %evl, %r is lane-wise equivalent to %also.r + ;; Note that since the alignment is ultimately up to the data layout + ;; string, 8 (the default) is used as an example. + + %also.r = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* %ptr, i32 8, <8 x i1> %mask, <8 x i8> undef) + + +.. _int_vp_store: + +'``llvm.vp.store``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" +This is an overloaded intrinsic. + +:: + + declare void @llvm.vp.store.v4f32.p0v4f32(<4 x float> %val, <4 x float>* %ptr, <4 x i1> %mask, i32 %evl) + declare void @llvm.vp.store.nxv2i16.p0nxv2i16( %val, * %ptr, %mask, i32 %evl) + declare void @llvm.vp.store.v8f32.p1v8f32(<8 x float> %val, <8 x float> addrspace(1)* %ptr, <8 x i1> %mask, i32 %evl) + declare void @llvm.vp.store.nxv1i64.p6nxv1i64( %val, addrspace(6)* %ptr, %mask, i32 %evl) + +Overview: +""""""""" + +The '``llvm.vp.store.*``' intrinsic is the vector length predicated version of +the :ref:`llvm.masked.store ` intrinsic. + +Arguments: +"""""""""" + +The first operand is the vector value to be written to memory. The second +operand is the base pointer for the store. It has the same underlying type as +the value operand. The third operand is a vector of boolean values with the +same number of elements as the return type. The fourth is the explicit vector +length of the operation. + +Semantics: +"""""""""" + +The '``llvm.vp.store``' intrinsic reads a vector from memory in the same way as +the '``llvm.masked.store``' intrinsic, where the mask is taken from the +combination of the '``mask``' and '``evl``' operands in the usual VP way. The +'``alignment``' operand of the '``llvm.masked.store``' intrinsic is not set by +'``llvm.vp.store``': it is taken as the ABI alignment of the type of the +'``value``' operand as specified by the :ref:`datalayout +string`. + +Examples: +""""""""" + +.. code-block:: text + + call void @llvm.vp.store.v8i8.p0v8i8(<8 x i8> %val, <8 x i8>* %ptr, <8 x i1> %mask, i32 %evl) + ;; For all lanes below %evl, the call above is lane-wise equivalent to the call below. + ;; Note that since the alignment is ultimately up to the data layout + ;; string, 8 (the default) is used as an example. + + call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> %val, <8 x i8>* %ptr, i32 8, <8 x i1> %mask) + + +.. _int_vp_gather: + +'``llvm.vp.gather``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" +This is an overloaded intrinsic. + +:: + + declare <4 x double> @llvm.vp.gather.v4f64.v4p0f64(<4 x double*> %ptrs, <4 x i1> %mask, i32 %evl) + declare @llvm.vp.gather.nxv2i8.nxv2p0i8( %ptrs, %mask, i32 %evl) + declare <2 x float> @llvm.vp.gather.v2f32.v2p2f32(<2 x float addrspace(2)*> %ptrs, <2 x i1> %mask, i32 %evl) + declare @llvm.vp.gather.nxv4i32.nxv4p4i32( %ptrs, %mask, i32 %evl) + +Overview: +""""""""" + +The '``llvm.vp.gather.*``' intrinsic is the vector length predicated version of +the :ref:`llvm.masked.gather ` intrinsic. + +Arguments: +"""""""""" + +The first operand is a vector of pointers which holds all memory addresses to +read. The second operand is a vector of boolean values with the same number of +elements as the return type. The third is the explicit vector length of the +operation. The return type and underlying type of the vector of pointers are +the same vector types. + +Semantics: +"""""""""" + +The '``llvm.vp.gather``' intrinsic reads multiple scalar values from memory in +the same way as the '``llvm.masked.gather``' intrinsic, where the mask is taken +from the combination of the '``mask``' and '``evl``' operands in the usual VP +way. Of the '``llvm.masked.gather``' operands not set by '``llvm.vp.gather``': +the '``passthru``' operand is implicitly ``undef``; the '``alignment``' operand +is taken as the ABI alignment of the source addresses as specified by the +:ref:`datalayout string`. + +Examples: +""""""""" + +.. code-block:: text + + %r = call void @llvm.vp.scatter.v8i8.v8p0i8(<8 x i8> %val, <8 x i8*> %ptrs, <8 x i1> %mask, i32 %evl) + ;; For all lanes below %evl, %r is lane-wise equivalent to %also.r + ;; Note that since the alignment is ultimately up to the data layout + ;; string, 8 is used as an example. + + %also.r = call void @llvm.masked.scatter.v8i8.v8p0i8(<8 x i8> %val, <8 x i8*> %ptrs, i32 8, <8 x i1> %mask, <8 x i8> undef) + + +.. _int_vp_scatter: + +'``llvm.vp.scatter``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" +This is an overloaded intrinsic. + +:: + + declare void @llvm.vp.scatter.v4f64.v4p0f64(<4 x double> %val, <4 x double*> %ptrs, <4 x i1> %mask, i32 %evl) + declare void @llvm.vp.scatter.nxv2i8.nxv2p0i8( %val, %ptrs, %mask, i32 %evl) + declare void @llvm.vp.scatter.v2f32.v2p2f32(<2 x float> %val, <2 x float addrspace(2)*> %ptrs, <2 x i1> %mask, i32 %evl) + declare void @llvm.vp.scatter.nxv4i32.nxv4p4i32( %val, %ptrs, %mask, i32 %evl) + +Overview: +""""""""" + +The '``llvm.vp.scatter.*``' intrinsic is the vector length predicated version of +the :ref:`llvm.masked.scatter ` intrinsic. + +Arguments: +"""""""""" + +The first operand is a vector value to be written to memory. The second operand +is a vector of pointers, pointing to where the value elements should be stored. +The third operand is a vector of boolean values with the same number of +elements as the return type. The fourth is the explicit vector length of the +operation. + +Semantics: +"""""""""" + +The '``llvm.vp.scatter``' intrinsic writes multiple scalar values to memory in +the same way as the '``llvm.masked.scatter``' intrinsic, where the mask is +taken from the combination of the '``mask``' and '``evl``' operands in the +usual VP way. The '``alignment``' operand of the '``llvm.masked.scatter``' +intrinsic is not set by '``llvm.vp.scatter``': it is taken as the ABI alignment +of the destination addresses as specified by the :ref:`datalayout +string`. + +Examples: +""""""""" + +.. code-block:: text + + call void @llvm.vp.scatter.v8i8.v8p0i8(<8 x i8> %val, <8 x i8*> %ptrs, <8 x i1> %mask, i32 %evl) + ;; For all lanes below %evl, the call above is lane-wise equivalent to the call below. + ;; Note that since the alignment is ultimately up to the data layout + ;; string, 8 is used as an example. + + call void @llvm.masked.scatter.v8i8.v8p0i8(<8 x i8> %val, <8 x i8*> %ptrs, i32 8, <8 x i1> %mask) + + .. _int_mload_mstore: Masked Vector Load and Store Intrinsics diff --git a/llvm/docs/Proposals/TestSuite.rst b/llvm/docs/Proposals/TestSuite.rst index 29507495c1168..33bb890244ffb 100644 --- a/llvm/docs/Proposals/TestSuite.rst +++ b/llvm/docs/Proposals/TestSuite.rst @@ -219,7 +219,7 @@ http://www.nwchem-sw.org/index.php/Benchmarks TVM ---- -https://github.com/dmlc/tvm/tree/master/apps/benchmark +https://github.com/dmlc/tvm/tree/main/apps/benchmark HydroBench ---------- @@ -227,7 +227,7 @@ https://github.com/HydroBench/Hydro ParRes ------ -https://github.com/ParRes/Kernels/tree/master/Cxx11 +https://github.com/ParRes/Kernels/tree/default/Cxx11 Applications/Libraries ====================== diff --git a/llvm/docs/TestingGuide.rst b/llvm/docs/TestingGuide.rst index 88f08cd88c32a..6449661e5d38a 100644 --- a/llvm/docs/TestingGuide.rst +++ b/llvm/docs/TestingGuide.rst @@ -275,6 +275,45 @@ Put related tests into a single file rather than having a separate file per test. Check if there are files already covering your feature and consider adding your code there instead of creating a new file. +Generating assertions in regression tests +----------------------------------------- + +Some regression test cases are very large and complex to write/update by hand. +In that case to reduce the human work we can use the scripts available in +llvm/utils/ to generate the assertions. + +For example to generate assertions in an :program:`llc`-based test, run: + + .. code-block:: bash + + % llvm/utils/update_llc_test_checks.py --llc-binary build/bin/llc test.ll + +And if you want to update assertions in an existing test case, pass `-u` option +which first check the ``NOTE:`` line exists and matches the script name. + +These are the most common scripts and their purposes/applications in generating +assertions: + +.. code-block:: + + update_analyze_test_checks.py + opt --analyze --costmodel + + update_cc_test_checks.py + C/C++, or clang/clang++ (IR checks) + + update_llc_test_checks.py + llc (assembly checks) + + update_mca_test_checks.py + llvm-mca + + update_mir_test_checks.py + llc (MIR checks) + + update_test_checks.py + opt + Extra files ----------- @@ -294,9 +333,10 @@ using ``split-file`` to extract them. For example, ;--- b.ll ... -The parts are separated by the regex ``^(.|//)--- ``. By default the -extracted content has leading empty lines to preserve line numbers. Specify -``--no-leading-lines`` to drop leading lines. +The parts are separated by the regex ``^(.|//)--- ``. + +If you want to test relative line numbers like ``[[#@LINE+1]]``, specify +``--leading-lines`` to add leading empty lines to preserve line numbers. If the extra files are large, the idiomatic place to put them is in a subdirectory ``Inputs``. You can then refer to the extra files as ``%S/Inputs/foo.bar``. diff --git a/llvm/docs/doxygen.cfg.in b/llvm/docs/doxygen.cfg.in index 7a6d531ad2559..b19ca6d215807 100644 --- a/llvm/docs/doxygen.cfg.in +++ b/llvm/docs/doxygen.cfg.in @@ -1220,7 +1220,7 @@ CHM_FILE = HHC_LOCATION = # The GENERATE_CHI flag controls if a separate .chi index file is generated ( -# YES) or that it should be included in the master .chm file ( NO). +# YES) or that it should be included in the main .chm file ( NO). # The default value is: NO. # This tag requires that the tag GENERATE_HTMLHELP is set to YES. diff --git a/llvm/include/llvm-c/Comdat.h b/llvm/include/llvm-c/Comdat.h index 81cde1107fa45..8002bc0581af7 100644 --- a/llvm/include/llvm-c/Comdat.h +++ b/llvm/include/llvm-c/Comdat.h @@ -19,6 +19,13 @@ LLVM_C_EXTERN_C_BEGIN +/** + * @defgroup LLVMCCoreComdat Comdats + * @ingroup LLVMCCore + * + * @{ + */ + typedef enum { LLVMAnyComdatSelectionKind, ///< The linker may choose any COMDAT. LLVMExactMatchComdatSelectionKind, ///< The data referenced by the COMDAT must @@ -66,6 +73,10 @@ LLVMComdatSelectionKind LLVMGetComdatSelectionKind(LLVMComdatRef C); */ void LLVMSetComdatSelectionKind(LLVMComdatRef C, LLVMComdatSelectionKind Kind); +/** + * @} + */ + LLVM_C_EXTERN_C_END #endif diff --git a/llvm/include/llvm-c/Core.h b/llvm/include/llvm-c/Core.h index a7324c6787807..d170eff17951c 100644 --- a/llvm/include/llvm-c/Core.h +++ b/llvm/include/llvm-c/Core.h @@ -4091,6 +4091,7 @@ void LLVMDisposeMemoryBuffer(LLVMMemoryBufferRef MemBuf); /** * @defgroup LLVMCCorePassRegistry Pass Registry + * @ingroup LLVMCCore * * @{ */ @@ -4105,6 +4106,7 @@ LLVMPassRegistryRef LLVMGetGlobalPassRegistry(void); /** * @defgroup LLVMCCorePassManagers Pass Managers + * @ingroup LLVMCCore * * @{ */ diff --git a/llvm/include/llvm-c/DebugInfo.h b/llvm/include/llvm-c/DebugInfo.h index 0c7b5db087e3d..d7fb898b60d21 100644 --- a/llvm/include/llvm-c/DebugInfo.h +++ b/llvm/include/llvm-c/DebugInfo.h @@ -21,6 +21,13 @@ LLVM_C_EXTERN_C_BEGIN +/** + * @defgroup LLVMCCoreDebugInfo Debug Information + * @ingroup LLVMCCore + * + * @{ + */ + /** * Debug info flags. */ @@ -1367,6 +1374,10 @@ void LLVMInstructionSetDebugLoc(LLVMValueRef Inst, LLVMMetadataRef Loc); */ LLVMMetadataKind LLVMGetMetadataKind(LLVMMetadataRef Metadata); +/** + * @} + */ + LLVM_C_EXTERN_C_END #endif diff --git a/llvm/include/llvm-c/DisassemblerTypes.h b/llvm/include/llvm-c/DisassemblerTypes.h index ae5c682275941..53baaef110335 100644 --- a/llvm/include/llvm-c/DisassemblerTypes.h +++ b/llvm/include/llvm-c/DisassemblerTypes.h @@ -17,6 +17,12 @@ #include #endif +/** + * @addtogroup LLVMCDisassembler + * + * @{ + */ + /** * An opaque reference to a disassembler context. */ @@ -157,4 +163,8 @@ typedef const char *(*LLVMSymbolLookupCallback)(void *DisInfo, /* The output reference is to a C++ symbol name. */ #define LLVMDisassembler_ReferenceType_DeMangled_Name 9 +/** + * @} + */ + #endif diff --git a/llvm/include/llvm-c/Error.h b/llvm/include/llvm-c/Error.h index bc702ac7a1bf7..c3baaf65186aa 100644 --- a/llvm/include/llvm-c/Error.h +++ b/llvm/include/llvm-c/Error.h @@ -18,6 +18,13 @@ LLVM_C_EXTERN_C_BEGIN +/** + * @defgroup LLVMCError Error Handling + * @ingroup LLVMC + * + * @{ + */ + #define LLVMErrorSuccess 0 /** @@ -67,6 +74,10 @@ LLVMErrorTypeId LLVMGetStringErrorTypeId(void); */ LLVMErrorRef LLVMCreateStringError(const char *ErrMsg); +/** + * @} + */ + LLVM_C_EXTERN_C_END #endif diff --git a/llvm/include/llvm-c/ErrorHandling.h b/llvm/include/llvm-c/ErrorHandling.h index 5ba099c209c00..d9b9f22752b8f 100644 --- a/llvm/include/llvm-c/ErrorHandling.h +++ b/llvm/include/llvm-c/ErrorHandling.h @@ -18,6 +18,12 @@ LLVM_C_EXTERN_C_BEGIN +/** + * @addtogroup LLVMCError + * + * @{ + */ + typedef void (*LLVMFatalErrorHandler)(const char *Reason); /** @@ -42,6 +48,10 @@ void LLVMResetFatalErrorHandler(void); */ void LLVMEnablePrettyStackTrace(void); +/** + * @} + */ + LLVM_C_EXTERN_C_END #endif diff --git a/llvm/include/llvm-c/IRReader.h b/llvm/include/llvm-c/IRReader.h index 5a3f633c3d919..905b84fa5a869 100644 --- a/llvm/include/llvm-c/IRReader.h +++ b/llvm/include/llvm-c/IRReader.h @@ -19,6 +19,13 @@ LLVM_C_EXTERN_C_BEGIN +/** + * @defgroup LLVMCCoreIRReader IR Reader + * @ingroup LLVMCCore + * + * @{ + */ + /** * Read LLVM IR from a memory buffer and convert it into an in-memory Module * object. Returns 0 on success. @@ -32,6 +39,10 @@ LLVMBool LLVMParseIRInContext(LLVMContextRef ContextRef, LLVMMemoryBufferRef MemBuf, LLVMModuleRef *OutM, char **OutMessage); +/** + * @} + */ + LLVM_C_EXTERN_C_END #endif diff --git a/llvm/include/llvm-c/LLJIT.h b/llvm/include/llvm-c/LLJIT.h index f689ca0f1cf0b..a06133aac4fb0 100644 --- a/llvm/include/llvm-c/LLJIT.h +++ b/llvm/include/llvm-c/LLJIT.h @@ -31,6 +31,13 @@ LLVM_C_EXTERN_C_BEGIN +/** + * @defgroup LLVMCExecutionEngineLLJIT LLJIT + * @ingroup LLVMCExecutionEngine + * + * @{ + */ + /** * A function for constructing an ObjectLinkingLayer instance to be used * by an LLJIT instance. @@ -235,6 +242,10 @@ LLVMOrcIRTransformLayerRef LLVMOrcLLJITGetIRTransformLayer(LLVMOrcLLJITRef J); */ const char *LLVMOrcLLJITGetDataLayoutStr(LLVMOrcLLJITRef J); +/** + * @} + */ + LLVM_C_EXTERN_C_END #endif /* LLVM_C_LLJIT_H */ diff --git a/llvm/include/llvm-c/Linker.h b/llvm/include/llvm-c/Linker.h index 1ad9cc9587532..acff5d5e22253 100644 --- a/llvm/include/llvm-c/Linker.h +++ b/llvm/include/llvm-c/Linker.h @@ -19,6 +19,13 @@ LLVM_C_EXTERN_C_BEGIN +/** + * @defgroup LLVMCCoreLinker Linker + * @ingroup LLVMCCore + * + * @{ + */ + /* This enum is provided for backwards-compatibility only. It has no effect. */ typedef enum { LLVMLinkerDestroySource = 0, /* This is the default behavior. */ @@ -35,4 +42,8 @@ LLVMBool LLVMLinkModules2(LLVMModuleRef Dest, LLVMModuleRef Src); LLVM_C_EXTERN_C_END +/** + * @} + */ + #endif diff --git a/llvm/include/llvm-c/Orc.h b/llvm/include/llvm-c/Orc.h index 480ce83f77349..e2f30b7cdf45b 100644 --- a/llvm/include/llvm-c/Orc.h +++ b/llvm/include/llvm-c/Orc.h @@ -33,6 +33,13 @@ LLVM_C_EXTERN_C_BEGIN +/** + * @defgroup LLVMCExecutionEngineORC On-Request-Compilation + * @ingroup LLVMCExecutionEngine + * + * @{ + */ + /** * Represents an address in the executor process. */ @@ -1176,6 +1183,10 @@ void LLVMOrcDisposeDumpObjects(LLVMOrcDumpObjectsRef DumpObjects); LLVMErrorRef LLVMOrcDumpObjects_CallOperator(LLVMOrcDumpObjectsRef DumpObjects, LLVMMemoryBufferRef *ObjBuffer); +/** + * @} + */ + LLVM_C_EXTERN_C_END #endif /* LLVM_C_ORC_H */ diff --git a/llvm/include/llvm-c/OrcEE.h b/llvm/include/llvm-c/OrcEE.h index 2435e7421a42b..e7ae0f5e6be20 100644 --- a/llvm/include/llvm-c/OrcEE.h +++ b/llvm/include/llvm-c/OrcEE.h @@ -32,6 +32,13 @@ LLVM_C_EXTERN_C_BEGIN +/** + * @defgroup LLVMCExecutionEngineORCEE ExecutionEngine-based ORC Utils + * @ingroup LLVMCExecutionEngine + * + * @{ + */ + /** * Create a RTDyldObjectLinkingLayer instance using the standard * SectionMemoryManager for memory management. @@ -50,6 +57,10 @@ void LLVMOrcRTDyldObjectLinkingLayerRegisterJITEventListener( LLVMOrcObjectLayerRef RTDyldObjLinkingLayer, LLVMJITEventListenerRef Listener); +/** + * @} + */ + LLVM_C_EXTERN_C_END #endif /* LLVM_C_ORCEE_H */ diff --git a/llvm/include/llvm-c/Support.h b/llvm/include/llvm-c/Support.h index 866df32efa980..17657861b32b9 100644 --- a/llvm/include/llvm-c/Support.h +++ b/llvm/include/llvm-c/Support.h @@ -20,6 +20,12 @@ LLVM_C_EXTERN_C_BEGIN +/** + * @addtogroup LLVMCCore + * + * @{ + */ + /** * This function permanently loads the dynamic library at the given path. * It is safe to call this function multiple times for the same library. @@ -57,6 +63,10 @@ void *LLVMSearchForAddressOfSymbol(const char *symbolName); */ void LLVMAddSymbol(const char *symbolName, void *symbolValue); +/** + * @} + */ + LLVM_C_EXTERN_C_END #endif diff --git a/llvm/include/llvm-c/TargetMachine.h b/llvm/include/llvm-c/TargetMachine.h index f82edd948b595..23c8c63ff0b46 100644 --- a/llvm/include/llvm-c/TargetMachine.h +++ b/llvm/include/llvm-c/TargetMachine.h @@ -25,6 +25,12 @@ LLVM_C_EXTERN_C_BEGIN +/** + * @addtogroup LLVMCTarget + * + * @{ + */ + typedef struct LLVMOpaqueTargetMachine *LLVMTargetMachineRef; typedef struct LLVMTarget *LLVMTargetRef; @@ -156,6 +162,10 @@ char* LLVMGetHostCPUFeatures(void); /** Adds the target-specific analysis passes to the pass manager. */ void LLVMAddAnalysisPasses(LLVMTargetMachineRef T, LLVMPassManagerRef PM); +/** + * @} + */ + LLVM_C_EXTERN_C_END #endif diff --git a/llvm/include/llvm-c/Transforms/PassBuilder.h b/llvm/include/llvm-c/Transforms/PassBuilder.h index 0de9be610bbee..6d9f1b45c7074 100644 --- a/llvm/include/llvm-c/Transforms/PassBuilder.h +++ b/llvm/include/llvm-c/Transforms/PassBuilder.h @@ -18,6 +18,13 @@ #include "llvm-c/TargetMachine.h" #include "llvm-c/Types.h" +/** + * @defgroup LLVMCCoreNewPM New Pass Manager + * @ingroup LLVMCCore + * + * @{ + */ + LLVM_C_EXTERN_C_BEGIN /** @@ -97,6 +104,10 @@ void LLVMPassBuilderOptionsSetMergeFunctions(LLVMPassBuilderOptionsRef Options, */ void LLVMDisposePassBuilderOptions(LLVMPassBuilderOptionsRef Options); +/** + * @} + */ + LLVM_C_EXTERN_C_END #endif // LLVM_C_TRANSFORMS_PASSBUILDER_H diff --git a/llvm/include/llvm/ADT/CombinationGenerator.h b/llvm/include/llvm/ADT/CombinationGenerator.h new file mode 100644 index 0000000000000..ab6afd555726d --- /dev/null +++ b/llvm/include/llvm/ADT/CombinationGenerator.h @@ -0,0 +1,148 @@ +//===-- llvm/ADT/CombinationGenerator.h ------------------------*- C++ -*--===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// Combination generator. +/// +/// Example: given input {{0, 1}, {2}, {3, 4}} it will produce the following +/// combinations: {0, 2, 3}, {0, 2, 4}, {1, 2, 3}, {1, 2, 4}. +/// +/// It is useful to think of input as vector-of-vectors, where the +/// outer vector is the variable space, and inner vector is choice space. +/// The number of choices for each variable can be different. +/// +/// As for implementation, it is useful to think of this as a weird number, +/// where each digit (==variable) may have different base (==number of choices). +/// Thus modelling of 'produce next combination' is exactly analogous to the +/// incrementing of an number - increment lowest digit (pick next choice for the +/// variable), and if it wrapped to the beginning then increment next digit. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ADT_COMBINATIONGENERATOR_H +#define LLVM_ADT_COMBINATIONGENERATOR_H + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" +#include +#include + +namespace llvm { + +template +class CombinationGenerator { + template struct WrappingIterator { + using value_type = T; + + const ArrayRef Range; + typename decltype(Range)::const_iterator Position; + + // Rewind the tape, placing the position to again point at the beginning. + void rewind() { Position = Range.begin(); } + + // Advance position forward, possibly wrapping to the beginning. + // Returns whether the wrap happened. + bool advance() { + ++Position; + bool Wrapped = Position == Range.end(); + if (Wrapped) + rewind(); + return Wrapped; + } + + // Get the value at which we are currently pointing. + const value_type &operator*() const { return *Position; } + + WrappingIterator(ArrayRef Range_) : Range(Range_) { + assert(!Range.empty() && "The range must not be empty."); + rewind(); + } + }; + + const ArrayRef VariablesChoices; + + void performGeneration( + const function_ref)> Callback) const { + SmallVector, variable_smallsize> + VariablesState; + + // 'increment' of the the whole VariablesState is defined identically to the + // increment of a number: starting from the least significant element, + // increment it, and if it wrapped, then propagate that carry by also + // incrementing next (more significant) element. + auto IncrementState = + [](MutableArrayRef> VariablesState) + -> bool { + for (WrappingIterator &Variable : + llvm::reverse(VariablesState)) { + bool Wrapped = Variable.advance(); + if (!Wrapped) + return false; // There you go, next combination is ready. + // We have carry - increment more significant variable next.. + } + return true; // MSB variable wrapped, no more unique combinations. + }; + + // Initialize the per-variable state to refer to the possible choices for + // that variable. + VariablesState.reserve(VariablesChoices.size()); + for (ArrayRef VC : VariablesChoices) + VariablesState.emplace_back(VC); + + // Temporary buffer to store each combination before performing Callback. + SmallVector CurrentCombination; + CurrentCombination.resize(VariablesState.size()); + + while (true) { + // Gather the currently-selected variable choices into a vector. + for (auto I : llvm::zip(VariablesState, CurrentCombination)) + std::get<1>(I) = *std::get<0>(I); + // And pass the new combination into callback, as intended. + if (/*Abort=*/Callback(CurrentCombination)) + return; + // And tick the state to next combination, which will be unique. + if (IncrementState(VariablesState)) + return; // All combinations produced. + } + }; + +public: + CombinationGenerator(ArrayRef VariablesChoices_) + : VariablesChoices(VariablesChoices_) { +#ifndef NDEBUG + assert(!VariablesChoices.empty() && "There should be some variables."); + llvm::for_each(VariablesChoices, [](ArrayRef VariableChoices) { + assert(!VariableChoices.empty() && + "There must always be some choice, at least a placeholder one."); + }); +#endif + } + + // How many combinations can we produce, max? + // This is at most how many times the callback will be called. + size_t numCombinations() const { + size_t NumVariants = 1; + for (ArrayRef VariableChoices : VariablesChoices) + NumVariants *= VariableChoices.size(); + assert(NumVariants >= 1 && + "We should always end up producing at least one combination"); + return NumVariants; + } + + // Actually perform exhaustive combination generation. + // Each result will be passed into the callback. + void generate(const function_ref)> Callback) { + performGeneration(Callback); + } +}; + +} // namespace llvm + +#endif diff --git a/llvm/include/llvm/ADT/EquivalenceClasses.h b/llvm/include/llvm/ADT/EquivalenceClasses.h index 273b00f99d5d8..de6bb3bca7e33 100644 --- a/llvm/include/llvm/ADT/EquivalenceClasses.h +++ b/llvm/include/llvm/ADT/EquivalenceClasses.h @@ -30,7 +30,8 @@ namespace llvm { /// /// This implementation is an efficient implementation that only stores one copy /// of the element being indexed per entry in the set, and allows any arbitrary -/// type to be indexed (as long as it can be ordered with operator<). +/// type to be indexed (as long as it can be ordered with operator< or a +/// comparator is provided). /// /// Here is a simple example using integers: /// @@ -54,7 +55,7 @@ namespace llvm { /// 4 /// 5 1 2 /// -template +template > class EquivalenceClasses { /// ECValue - The EquivalenceClasses data structure is just a set of these. /// Each of these represents a relation for a value. First it stores the @@ -101,22 +102,40 @@ class EquivalenceClasses { assert(RHS.isLeader() && RHS.getNext() == nullptr && "Not a singleton!"); } - bool operator<(const ECValue &UFN) const { return Data < UFN.Data; } - bool isLeader() const { return (intptr_t)Next & 1; } const ElemTy &getData() const { return Data; } const ECValue *getNext() const { return (ECValue*)((intptr_t)Next & ~(intptr_t)1); } + }; + + /// A wrapper of the comparator, to be passed to the set. + struct ECValueComparator { + using is_transparent = void; + + ECValueComparator() : compare(Compare()) {} + + bool operator()(const ECValue &lhs, const ECValue &rhs) const { + return compare(lhs.Data, rhs.Data); + } + + template + bool operator()(const T &lhs, const ECValue &rhs) const { + return compare(lhs, rhs.Data); + } + + template + bool operator()(const ECValue &lhs, const T &rhs) const { + return compare(lhs.Data, rhs); + } - template - bool operator<(const T &Val) const { return Data < Val; } + const Compare compare; }; /// TheMapping - This implicitly provides a mapping from ElemTy values to the /// ECValues, it just keeps the key as part of the value. - std::set TheMapping; + std::set TheMapping; public: EquivalenceClasses() = default; diff --git a/llvm/include/llvm/ADT/STLExtras.h b/llvm/include/llvm/ADT/STLExtras.h index 68bc656042073..daa6d257dd000 100644 --- a/llvm/include/llvm/ADT/STLExtras.h +++ b/llvm/include/llvm/ADT/STLExtras.h @@ -1251,20 +1251,39 @@ class indexed_accessor_range } }; +namespace detail { +/// Return a reference to the first or second member of a reference. Otherwise, +/// return a copy of the member of a temporary. +/// +/// When passing a range whose iterators return values instead of references, +/// the reference must be dropped from `decltype((elt.first))`, which will +/// always be a reference, to avoid returning a reference to a temporary. +template class first_or_second_type { +public: + using type = + typename std::conditional_t::value, FirstTy, + std::remove_reference_t>; +}; +} // end namespace detail + /// Given a container of pairs, return a range over the first elements. template auto make_first_range(ContainerTy &&c) { - return llvm::map_range( - std::forward(c), - [](decltype((*std::begin(c))) elt) -> decltype((elt.first)) { - return elt.first; - }); + using EltTy = decltype((*std::begin(c))); + return llvm::map_range(std::forward(c), + [](EltTy elt) -> typename detail::first_or_second_type< + EltTy, decltype((elt.first))>::type { + return elt.first; + }); } /// Given a container of pairs, return a range over the second elements. template auto make_second_range(ContainerTy &&c) { + using EltTy = decltype((*std::begin(c))); return llvm::map_range( std::forward(c), - [](decltype((*std::begin(c))) elt) -> decltype((elt.second)) { + [](EltTy elt) -> + typename detail::first_or_second_type::type { return elt.second; }); } diff --git a/llvm/include/llvm/ADT/Sequence.h b/llvm/include/llvm/ADT/Sequence.h index 9fcb3034ee73f..fdbf397984d0b 100644 --- a/llvm/include/llvm/ADT/Sequence.h +++ b/llvm/include/llvm/ADT/Sequence.h @@ -31,6 +31,50 @@ /// /// Prints: `0 1 2 3 `. /// +/// Similar to `seq` and `seq_inclusive`, the `enum_seq` and +/// `enum_seq_inclusive` functions produce sequences of enum values that can be +/// iterated over. +/// To enable iteration with enum types, you need to either mark enums as safe +/// to iterate on by specializing `enum_iteration_traits`, or opt into +/// potentially unsafe iteration at every callsite by passing +/// `force_iteration_on_noniterable_enum`. +/// +/// Examples with enum types: +/// ``` +/// namespace X { +/// enum class MyEnum : unsigned {A = 0, B, C}; +/// } // namespace X +/// +/// template <> struct enum_iteration_traits { +/// static contexpr bool is_iterable = true; +/// }; +/// +/// class MyClass { +/// public: +/// enum Safe { D = 3, E, F }; +/// enum MaybeUnsafe { G = 1, H = 2, I = 4 }; +/// }; +/// +/// template <> struct enum_iteration_traits { +/// static contexpr bool is_iterable = true; +/// }; +/// ``` +/// +/// ``` +/// for (auto v : enum_seq(MyClass::Safe::D, MyClass::Safe::F)) +/// outs() << int(v) << " "; +/// ``` +/// +/// Prints: `3 4 `. +/// +/// ``` +/// for (auto v : enum_seq(MyClass::MaybeUnsafe::H, MyClass::MaybeUnsafe::I, +/// force_iteration_on_noniterable_enum)) +/// outs() << int(v) << " "; +/// ``` +/// +/// Prints: `2 3 `. +/// //===----------------------------------------------------------------------===// #ifndef LLVM_ADT_SEQUENCE_H @@ -39,12 +83,31 @@ #include // assert #include // std::random_access_iterator_tag #include // std::numeric_limits -#include // std::underlying_type, std::is_enum +#include // std::is_integral, std::is_enum, std::underlying_type, + // std::enable_if #include "llvm/Support/MathExtras.h" // AddOverflow / SubOverflow namespace llvm { +// Enum traits that marks enums as safe or unsafe to iterate over. +// By default, enum types are *not* considered safe for iteration. +// To allow iteration for your enum type, provide a specialization with +// `is_iterable` set to `true` in the `llvm` namespace. +// Alternatively, you can pass the `force_iteration_on_noniterable_enum` tag +// to `enum_seq` or `enum_seq_inclusive`. +template struct enum_iteration_traits { + static constexpr bool is_iterable = false; +}; + +struct force_iteration_on_noniterable_enum_t { + explicit force_iteration_on_noniterable_enum_t() = default; +}; + +// TODO: Make this `inline` once we update to C++17 to avoid ORD violations. +constexpr force_iteration_on_noniterable_enum_t + force_iteration_on_noniterable_enum; + namespace detail { // Returns whether a value of type U can be represented with type T. @@ -234,27 +297,81 @@ template struct iota_range { iterator PastEndValue; }; -/// Iterate over an integral/enum type from Begin up to - but not including - -/// End. -/// Note on enum iteration: `seq` will generate each consecutive value, even if -/// no enumerator with that value exists. +/// Iterate over an integral type from Begin up to - but not including - End. /// Note: Begin and End values have to be within [INTMAX_MIN, INTMAX_MAX] for /// forward iteration (resp. [INTMAX_MIN + 1, INTMAX_MAX] for reverse /// iteration). -template auto seq(T Begin, T End) { +template ::value && + !std::is_enum::value>> +auto seq(T Begin, T End) { return iota_range(Begin, End, false); } -/// Iterate over an integral/enum type from Begin to End inclusive. -/// Note on enum iteration: `seq_inclusive` will generate each consecutive -/// value, even if no enumerator with that value exists. +/// Iterate over an integral type from Begin to End inclusive. /// Note: Begin and End values have to be within [INTMAX_MIN, INTMAX_MAX - 1] /// for forward iteration (resp. [INTMAX_MIN + 1, INTMAX_MAX - 1] for reverse /// iteration). -template auto seq_inclusive(T Begin, T End) { +template ::value && + !std::is_enum::value>> +auto seq_inclusive(T Begin, T End) { return iota_range(Begin, End, true); } +/// Iterate over an enum type from Begin up to - but not including - End. +/// Note: `enum_seq` will generate each consecutive value, even if no +/// enumerator with that value exists. +/// Note: Begin and End values have to be within [INTMAX_MIN, INTMAX_MAX] for +/// forward iteration (resp. [INTMAX_MIN + 1, INTMAX_MAX] for reverse +/// iteration). +template ::value>> +auto enum_seq(EnumT Begin, EnumT End) { + static_assert(enum_iteration_traits::is_iterable, + "Enum type is not marked as iterable."); + return iota_range(Begin, End, false); +} + +/// Iterate over an enum type from Begin up to - but not including - End, even +/// when `EnumT` is not marked as safely iterable by `enum_iteration_traits`. +/// Note: `enum_seq` will generate each consecutive value, even if no +/// enumerator with that value exists. +/// Note: Begin and End values have to be within [INTMAX_MIN, INTMAX_MAX] for +/// forward iteration (resp. [INTMAX_MIN + 1, INTMAX_MAX] for reverse +/// iteration). +template ::value>> +auto enum_seq(EnumT Begin, EnumT End, force_iteration_on_noniterable_enum_t) { + return iota_range(Begin, End, false); +} + +/// Iterate over an enum type from Begin to End inclusive. +/// Note: `enum_seq_inclusive` will generate each consecutive value, even if no +/// enumerator with that value exists. +/// Note: Begin and End values have to be within [INTMAX_MIN, INTMAX_MAX - 1] +/// for forward iteration (resp. [INTMAX_MIN + 1, INTMAX_MAX - 1] for reverse +/// iteration). +template ::value>> +auto enum_seq_inclusive(EnumT Begin, EnumT End) { + static_assert(enum_iteration_traits::is_iterable, + "Enum type is not marked as iterable."); + return iota_range(Begin, End, true); +} + +/// Iterate over an enum type from Begin to End inclusive, even when `EnumT` +/// is not marked as safely iterable by `enum_iteration_traits`. +/// Note: `enum_seq_inclusive` will generate each consecutive value, even if no +/// enumerator with that value exists. +/// Note: Begin and End values have to be within [INTMAX_MIN, INTMAX_MAX - 1] +/// for forward iteration (resp. [INTMAX_MIN + 1, INTMAX_MAX - 1] for reverse +/// iteration). +template ::value>> +auto enum_seq_inclusive(EnumT Begin, EnumT End, + force_iteration_on_noniterable_enum_t) { + return iota_range(Begin, End, true); +} + } // end namespace llvm #endif // LLVM_ADT_SEQUENCE_H diff --git a/llvm/include/llvm/ADT/SmallVector.h b/llvm/include/llvm/ADT/SmallVector.h index b8a11030fc331..0d13524f25ce0 100644 --- a/llvm/include/llvm/ADT/SmallVector.h +++ b/llvm/include/llvm/ADT/SmallVector.h @@ -1239,13 +1239,22 @@ inline size_t capacity_in_bytes(const SmallVector &X) { return X.capacity_in_bytes(); } +template +using ValueTypeFromRangeType = + typename std::remove_const()))>::type>::type; + /// Given a range of type R, iterate the entire range and return a /// SmallVector with elements of the vector. This is useful, for example, /// when you want to iterate a range and then sort the results. template -SmallVector()))>::type>::type, - Size> +SmallVector, Size> to_vector(R &&Range) { + return {std::begin(Range), std::end(Range)}; +} +template +SmallVector, + CalculateSmallVectorDefaultInlinedElements< + ValueTypeFromRangeType>::value> to_vector(R &&Range) { return {std::begin(Range), std::end(Range)}; } diff --git a/llvm/include/llvm/ADT/StringExtras.h b/llvm/include/llvm/ADT/StringExtras.h index 0c2868040a44a..2ca672e7855b0 100644 --- a/llvm/include/llvm/ADT/StringExtras.h +++ b/llvm/include/llvm/ADT/StringExtras.h @@ -67,22 +67,27 @@ inline ArrayRef arrayRefFromStringRef(StringRef Input) { /// /// If \p C is not a valid hex digit, -1U is returned. inline unsigned hexDigitValue(char C) { - struct HexTable { - unsigned LUT[255] = {}; - constexpr HexTable() { - // Default initialize everything to invalid. - for (int i = 0; i < 255; ++i) - LUT[i] = ~0U; - // Initialize `0`-`9`. - for (int i = 0; i < 10; ++i) - LUT['0' + i] = i; - // Initialize `A`-`F` and `a`-`f`. - for (int i = 0; i < 6; ++i) - LUT['A' + i] = LUT['a' + i] = 10 + i; - } + /* clang-format off */ + static const int16_t LUT[256] = { + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1, // '0'..'9' + -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 'A'..'F' + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 'a'..'f' + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, }; - constexpr HexTable Table; - return Table.LUT[static_cast(C)]; + /* clang-format on */ + return LUT[static_cast(C)]; } /// Checks if character \p C is one of the 10 decimal digits. @@ -210,24 +215,31 @@ inline bool tryGetFromHex(StringRef Input, std::string &Output) { if (Input.empty()) return true; - Output.reserve((Input.size() + 1) / 2); + // If the input string is not properly aligned on 2 nibbles we pad out the + // front with a 0 prefix; e.g. `ABC` -> `0ABC`. + Output.resize((Input.size() + 1) / 2); + char *OutputPtr = const_cast(Output.data()); if (Input.size() % 2 == 1) { uint8_t Hex = 0; if (!tryGetHexFromNibbles('0', Input.front(), Hex)) return false; - - Output.push_back(Hex); + *OutputPtr++ = Hex; Input = Input.drop_front(); } - assert(Input.size() % 2 == 0); - while (!Input.empty()) { + // Convert the nibble pairs (e.g. `9C`) into bytes (0x9C). + // With the padding above we know the input is aligned and the output expects + // exactly half as many bytes as nibbles in the input. + size_t InputSize = Input.size(); + assert(InputSize % 2 == 0); + const char *InputPtr = Input.data(); + for (size_t OutputIndex = 0; OutputIndex < InputSize / 2; ++OutputIndex) { uint8_t Hex = 0; - if (!tryGetHexFromNibbles(Input[0], Input[1], Hex)) + if (!tryGetHexFromNibbles(InputPtr[OutputIndex * 2 + 0], // MSB + InputPtr[OutputIndex * 2 + 1], // LSB + Hex)) return false; - - Output.push_back(Hex); - Input = Input.drop_front(2); + OutputPtr[OutputIndex] = Hex; } return true; } diff --git a/llvm/include/llvm/ADT/Triple.h b/llvm/include/llvm/ADT/Triple.h index 4bcb68a397d6a..67b1d486cf7a5 100644 --- a/llvm/include/llvm/ADT/Triple.h +++ b/llvm/include/llvm/ADT/Triple.h @@ -93,6 +93,8 @@ class Triple { hsail64, // AMD HSAIL with 64-bit pointers spir, // SPIR: standard portable IR for OpenCL 32-bit version spir64, // SPIR: standard portable IR for OpenCL 64-bit version + spirv32, // SPIR-V with 32-bit pointers + spirv64, // SPIR-V with 64-bit pointers kalimba, // Kalimba: generic kalimba shave, // SHAVE: Movidius vector VLIW processors lanai, // Lanai: Lanai 32-bit @@ -705,6 +707,11 @@ class Triple { return getArch() == Triple::spir || getArch() == Triple::spir64; } + /// Tests whether the target is SPIR-V (32/64-bit). + bool isSPIRV() const { + return getArch() == Triple::spirv32 || getArch() == Triple::spirv64; + } + /// Tests whether the target is NVPTX (32- or 64-bit). bool isNVPTX() const { return getArch() == Triple::nvptx || getArch() == Triple::nvptx64; diff --git a/llvm/include/llvm/Analysis/CGSCCPassManager.h b/llvm/include/llvm/Analysis/CGSCCPassManager.h index 9acb7c0328ba1..856073260c747 100644 --- a/llvm/include/llvm/Analysis/CGSCCPassManager.h +++ b/llvm/include/llvm/Analysis/CGSCCPassManager.h @@ -477,11 +477,12 @@ class CGSCCToFunctionPassAdaptor public: using PassConceptT = detail::PassConcept; - explicit CGSCCToFunctionPassAdaptor(std::unique_ptr Pass) - : Pass(std::move(Pass)) {} + explicit CGSCCToFunctionPassAdaptor(std::unique_ptr Pass, + bool EagerlyInvalidate) + : Pass(std::move(Pass)), EagerlyInvalidate(EagerlyInvalidate) {} CGSCCToFunctionPassAdaptor(CGSCCToFunctionPassAdaptor &&Arg) - : Pass(std::move(Arg.Pass)) {} + : Pass(std::move(Arg.Pass)), EagerlyInvalidate(Arg.EagerlyInvalidate) {} friend void swap(CGSCCToFunctionPassAdaptor &LHS, CGSCCToFunctionPassAdaptor &RHS) { @@ -499,7 +500,10 @@ class CGSCCToFunctionPassAdaptor void printPipeline(raw_ostream &OS, function_ref MapClassName2PassName) { - OS << "function("; + OS << "function"; + if (EagerlyInvalidate) + OS << ""; + OS << "("; Pass->printPipeline(OS, MapClassName2PassName); OS << ")"; } @@ -508,13 +512,15 @@ class CGSCCToFunctionPassAdaptor private: std::unique_ptr Pass; + bool EagerlyInvalidate; }; /// A function to deduce a function pass type and wrap it in the /// templated adaptor. template CGSCCToFunctionPassAdaptor -createCGSCCToFunctionPassAdaptor(FunctionPassT &&Pass) { +createCGSCCToFunctionPassAdaptor(FunctionPassT &&Pass, + bool EagerlyInvalidate = false) { using PassModelT = detail::PassModel; @@ -522,7 +528,8 @@ createCGSCCToFunctionPassAdaptor(FunctionPassT &&Pass) { // causing terrible compile times. return CGSCCToFunctionPassAdaptor( std::unique_ptr( - new PassModelT(std::forward(Pass)))); + new PassModelT(std::forward(Pass))), + EagerlyInvalidate); } /// A helper that repeats an SCC pass each time an indirect call is refined to diff --git a/llvm/include/llvm/Analysis/ScalarEvolution.h b/llvm/include/llvm/Analysis/ScalarEvolution.h index 6f83cb8b270ea..6e86f9c9b1b11 100644 --- a/llvm/include/llvm/Analysis/ScalarEvolution.h +++ b/llvm/include/llvm/Analysis/ScalarEvolution.h @@ -523,9 +523,6 @@ class ScalarEvolution { /// scAddRecExpr. The result will be cached in HasRecMap. bool containsAddRecurrence(const SCEV *S); - /// Erase Value from ValueExprMap and ExprValueMap. - void eraseValueFromMap(Value *V); - /// Is operation \p BinOp between \p LHS and \p RHS provably does not have /// a signed/unsigned overflow (\p Signed)? bool willNotOverflow(Instruction::BinaryOps BinOp, bool Signed, @@ -793,6 +790,13 @@ class ScalarEvolution { /// Returns 0 if the trip count is unknown or not constant. unsigned getSmallConstantMaxTripCount(const Loop *L); + /// Returns the upper bound of the loop trip count infered from array size. + /// Can not access bytes starting outside the statically allocated size + /// without being immediate UB. + /// Returns SCEVCouldNotCompute if the trip count could not inferred + /// from array accesses. + const SCEV *getConstantMaxTripCountFromArray(const Loop *L); + /// Returns the largest constant divisor of the trip count as a normal /// unsigned value, if possible. This means that the actual trip count is /// always a multiple of the returned value. Returns 1 if the trip count is @@ -1897,6 +1901,9 @@ class ScalarEvolution { /// Return an existing SCEV for V if there is one, otherwise return nullptr. const SCEV *getExistingSCEV(Value *V); + /// Erase Value from ValueExprMap and ExprValueMap. + void eraseValueFromMap(Value *V); + /// Return false iff given SCEV contains a SCEVUnknown with NULL value- /// pointer. bool checkValidity(const SCEV *S) const; @@ -2025,10 +2032,6 @@ class ScalarEvolution { /// an add rec on said loop. void getUsedLoops(const SCEV *S, SmallPtrSetImpl &LoopsUsed); - /// Find all of the loops transitively used in \p S, and update \c LoopUsers - /// accordingly. - void addToLoopUseLists(const SCEV *S); - /// Try to match the pattern generated by getURemExpr(A, B). If successful, /// Assign A and B to LHS and RHS, respectively. bool matchURem(const SCEV *Expr, const SCEV *&LHS, const SCEV *&RHS); @@ -2041,9 +2044,8 @@ class ScalarEvolution { FoldingSet UniquePreds; BumpPtrAllocator SCEVAllocator; - /// This maps loops to a list of SCEV expressions that (transitively) use said - /// loop. - DenseMap> LoopUsers; + /// This maps loops to a list of addrecs that directly use said loop. + DenseMap> LoopUsers; /// Cache tentative mappings from UnknownSCEVs in a Loop, to a SCEV expression /// they can be rewritten into under certain predicates. diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h index 370ab30726848..e3cf87612e9c3 100644 --- a/llvm/include/llvm/Analysis/TargetTransformInfo.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h @@ -916,6 +916,9 @@ class TargetTransformInfo { /// architectural maximum vector length, and None otherwise. Optional getMaxVScale() const; + /// \return the value of vscale to tune the cost model for. + Optional getVScaleForTuning() const; + /// \return True if the vectorization factor should be chosen to /// make the vector of the smallest element type match the size of a /// vector register. For wider element types, this could result in @@ -1113,6 +1116,20 @@ class TargetTransformInfo { InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index = -1) const; + /// \return The cost of replication shuffle of \p VF elements typed \p EltTy + /// \p ReplicationFactor times. + /// + /// For example, the mask for \p ReplicationFactor=3 and \p VF=4 is: + /// <0,0,0,1,1,1,2,2,2,3,3,3> + InstructionCost getReplicationShuffleCost(Type *EltTy, int ReplicationFactor, + int VF, + const APInt &DemandedSrcElts, + const APInt &DemandedReplicatedElts, + TTI::TargetCostKind CostKind); + InstructionCost getReplicationShuffleCost(Type *EltTy, int ReplicationFactor, + int VF, ArrayRef Mask, + TTI::TargetCostKind CostKind); + /// \return The cost of Load and Store instructions. InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, @@ -1576,6 +1593,7 @@ class TargetTransformInfo::Concept { virtual TypeSize getRegisterBitWidth(RegisterKind K) const = 0; virtual unsigned getMinVectorRegisterBitWidth() const = 0; virtual Optional getMaxVScale() const = 0; + virtual Optional getVScaleForTuning() const = 0; virtual bool shouldMaximizeVectorBandwidth() const = 0; virtual ElementCount getMinimumVF(unsigned ElemWidth, bool IsScalable) const = 0; @@ -1636,6 +1654,15 @@ class TargetTransformInfo::Concept { const Instruction *I) = 0; virtual InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) = 0; + + virtual InstructionCost getReplicationShuffleCost( + Type *EltTy, int ReplicationFactor, int VF, const APInt &DemandedSrcElts, + const APInt &DemandedReplicatedElts, TTI::TargetCostKind CostKind) = 0; + virtual InstructionCost + getReplicationShuffleCost(Type *EltTy, int ReplicationFactor, int VF, + ArrayRef Mask, + TTI::TargetCostKind CostKind) = 0; + virtual InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, @@ -2037,6 +2064,9 @@ class TargetTransformInfo::Model final : public TargetTransformInfo::Concept { Optional getMaxVScale() const override { return Impl.getMaxVScale(); } + Optional getVScaleForTuning() const override { + return Impl.getVScaleForTuning(); + } bool shouldMaximizeVectorBandwidth() const override { return Impl.shouldMaximizeVectorBandwidth(); } @@ -2137,6 +2167,22 @@ class TargetTransformInfo::Model final : public TargetTransformInfo::Concept { unsigned Index) override { return Impl.getVectorInstrCost(Opcode, Val, Index); } + InstructionCost + getReplicationShuffleCost(Type *EltTy, int ReplicationFactor, int VF, + const APInt &DemandedSrcElts, + const APInt &DemandedReplicatedElts, + TTI::TargetCostKind CostKind) override { + return Impl.getReplicationShuffleCost(EltTy, ReplicationFactor, VF, + DemandedSrcElts, + DemandedReplicatedElts, CostKind); + } + InstructionCost + getReplicationShuffleCost(Type *EltTy, int ReplicationFactor, int VF, + ArrayRef Mask, + TTI::TargetCostKind CostKind) override { + return Impl.getReplicationShuffleCost(EltTy, ReplicationFactor, VF, Mask, + CostKind); + } InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h index 07344fc05036c..6f02b88e17db2 100644 --- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -399,6 +399,7 @@ class TargetTransformInfoImplBase { unsigned getMinVectorRegisterBitWidth() const { return 128; } Optional getMaxVScale() const { return None; } + Optional getVScaleForTuning() const { return None; } bool shouldMaximizeVectorBandwidth() const { return false; } @@ -544,6 +545,18 @@ class TargetTransformInfoImplBase { return 1; } + unsigned getReplicationShuffleCost(Type *EltTy, int ReplicationFactor, int VF, + const APInt &DemandedSrcElts, + const APInt &DemandedReplicatedElts, + TTI::TargetCostKind CostKind) { + return 1; + } + unsigned getReplicationShuffleCost(Type *EltTy, int ReplicationFactor, int VF, + ArrayRef Mask, + TTI::TargetCostKind CostKind) { + return 1; + } + InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, @@ -1099,6 +1112,12 @@ class TargetTransformInfoImplCRTPBase : public TargetTransformInfoImplBase { SubIndex, FixedVectorType::get(VecTy->getScalarType(), NumSubElts)); + int ReplicationFactor, VF; + if (Shuffle->isReplicationMask(ReplicationFactor, VF)) + return TargetTTI->getReplicationShuffleCost( + VecSrcTy->getElementType(), ReplicationFactor, VF, + Shuffle->getShuffleMask(), CostKind); + return CostKind == TTI::TCK_RecipThroughput ? -1 : 1; } diff --git a/llvm/include/llvm/Analysis/ValueTracking.h b/llvm/include/llvm/Analysis/ValueTracking.h index 3ba84a99e3406..b4f38a3e976fd 100644 --- a/llvm/include/llvm/Analysis/ValueTracking.h +++ b/llvm/include/llvm/Analysis/ValueTracking.h @@ -203,6 +203,15 @@ constexpr unsigned MaxAnalysisRecursionDepth = 6; const DominatorTree *DT = nullptr, bool UseInstrInfo = true); + /// Get the minimum bit size for this Value \p Op as a signed integer. + /// i.e. x == sext(trunc(x to MinSignedBits) to bitwidth(x)). + /// Similar to the APInt::getMinSignedBits function. + unsigned ComputeMinSignedBits(const Value *Op, const DataLayout &DL, + unsigned Depth = 0, + AssumptionCache *AC = nullptr, + const Instruction *CxtI = nullptr, + const DominatorTree *DT = nullptr); + /// This function computes the integer multiple of Base that equals V. If /// successful, it returns true and returns the multiple in Multiple. If /// unsuccessful, it returns false. Also, if V can be simplified to an diff --git a/llvm/include/llvm/BinaryFormat/ELF.h b/llvm/include/llvm/BinaryFormat/ELF.h index df9fd49606378..a270fd399aeb3 100644 --- a/llvm/include/llvm/BinaryFormat/ELF.h +++ b/llvm/include/llvm/BinaryFormat/ELF.h @@ -1602,6 +1602,16 @@ enum { NT_FREEBSD_PROCSTAT_AUXV = 16, }; +// OpenBSD core note types. +enum { + NT_OPENBSD_PROCINFO = 10, + NT_OPENBSD_AUXV = 11, + NT_OPENBSD_REGS = 20, + NT_OPENBSD_FPREGS = 21, + NT_OPENBSD_XFPREGS = 22, + NT_OPENBSD_WCOOKIE = 23, +}; + // AMDGPU-specific section indices. enum { SHN_AMDGPU_LDS = 0xff00, // Variable in LDS; symbol encoded like SHN_COMMON diff --git a/llvm/include/llvm/BinaryFormat/ELFRelocs/RISCV.def b/llvm/include/llvm/BinaryFormat/ELFRelocs/RISCV.def index 9f2f0540bcbd3..4544509504449 100644 --- a/llvm/include/llvm/BinaryFormat/ELFRelocs/RISCV.def +++ b/llvm/include/llvm/BinaryFormat/ELFRelocs/RISCV.def @@ -46,10 +46,6 @@ ELF_RELOC(R_RISCV_ALIGN, 43) ELF_RELOC(R_RISCV_RVC_BRANCH, 44) ELF_RELOC(R_RISCV_RVC_JUMP, 45) ELF_RELOC(R_RISCV_RVC_LUI, 46) -ELF_RELOC(R_RISCV_GPREL_I, 47) -ELF_RELOC(R_RISCV_GPREL_S, 48) -ELF_RELOC(R_RISCV_TPREL_I, 49) -ELF_RELOC(R_RISCV_TPREL_S, 50) ELF_RELOC(R_RISCV_RELAX, 51) ELF_RELOC(R_RISCV_SUB6, 52) ELF_RELOC(R_RISCV_SET6, 53) diff --git a/llvm/include/llvm/BinaryFormat/Wasm.h b/llvm/include/llvm/BinaryFormat/Wasm.h index 0940d189136d3..0bc8c4e167d85 100644 --- a/llvm/include/llvm/BinaryFormat/Wasm.h +++ b/llvm/include/llvm/BinaryFormat/Wasm.h @@ -7,7 +7,7 @@ //===----------------------------------------------------------------------===// // // This file defines manifest constants for the wasm object file format. -// See: https://github.com/WebAssembly/design/blob/master/BinaryEncoding.md +// See: https://github.com/WebAssembly/design/blob/main/BinaryEncoding.md // //===----------------------------------------------------------------------===// diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h index 9b116a8c65544..8f43caf1eb282 100644 --- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -665,6 +665,7 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { } Optional getMaxVScale() const { return None; } + Optional getVScaleForTuning() const { return None; } /// Estimate the overhead of scalarizing an instruction. Insert and Extract /// are set if the demanded result elements need to be inserted and/or @@ -1113,6 +1114,71 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { return LT.first; } + InstructionCost getReplicationShuffleCost(Type *EltTy, int ReplicationFactor, + int VF, + const APInt &DemandedSrcElts, + const APInt &DemandedReplicatedElts, + TTI::TargetCostKind CostKind) { + InstructionCost Cost; + + auto *SrcVT = FixedVectorType::get(EltTy, VF); + auto *ReplicatedVT = FixedVectorType::get(EltTy, VF * ReplicationFactor); + + // The Mask shuffling cost is extract all the elements of the Mask + // and insert each of them Factor times into the wide vector: + // + // E.g. an interleaved group with factor 3: + // %mask = icmp ult <8 x i32> %vec1, %vec2 + // %interleaved.mask = shufflevector <8 x i1> %mask, <8 x i1> undef, + // <24 x i32> <0,0,0,1,1,1,2,2,2,3,3,3,4,4,4,5,5,5,6,6,6,7,7,7> + // The cost is estimated as extract all mask elements from the <8xi1> mask + // vector and insert them factor times into the <24xi1> shuffled mask + // vector. + Cost += thisT()->getScalarizationOverhead(SrcVT, DemandedSrcElts, + /*Insert*/ false, + /*Extract*/ true); + Cost += + thisT()->getScalarizationOverhead(ReplicatedVT, DemandedReplicatedElts, + /*Insert*/ true, /*Extract*/ false); + + return Cost; + } + + InstructionCost getReplicationShuffleCost(Type *EltTy, int ReplicationFactor, + int VF, ArrayRef Mask, + TTI::TargetCostKind CostKind) { + assert(Mask.size() == (unsigned)VF * ReplicationFactor && "Bad mask size."); + + APInt DemandedSrcElts = APInt::getNullValue(VF); + + ArrayRef RemainingMask = Mask; + for (int i = 0; i < VF; i++) { + ArrayRef CurrSubMask = RemainingMask.take_front(ReplicationFactor); + RemainingMask = RemainingMask.drop_front(CurrSubMask.size()); + + assert(all_of(CurrSubMask, + [i](int MaskElt) { + return MaskElt == UndefMaskElem || MaskElt == i; + }) && + "Not a replication mask."); + + if (any_of(CurrSubMask, + [](int MaskElt) { return MaskElt != UndefMaskElem; })) + DemandedSrcElts.setBit(i); + } + assert(RemainingMask.empty() && "Did not consume the entire mask?"); + + APInt DemandedReplicatedElts = APInt::getNullValue(Mask.size()); + for (auto I : enumerate(Mask)) { + if (I.value() != UndefMaskElem) + DemandedReplicatedElts.setBit(I.index()); + } + + return thisT()->getReplicationShuffleCost(EltTy, ReplicationFactor, VF, + DemandedSrcElts, + DemandedReplicatedElts, CostKind); + } + InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, @@ -1239,6 +1305,9 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { assert(Indices.size() <= Factor && "Interleaved memory op has too many members"); + const APInt DemandedAllSubElts = APInt::getAllOnes(NumSubElts); + const APInt DemandedAllResultElts = APInt::getAllOnes(NumElts); + APInt DemandedLoadStoreElts = APInt::getZero(NumElts); for (unsigned Index : Indices) { assert(Index < Factor && "Invalid index for interleaved memory op"); @@ -1256,7 +1325,8 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { // The cost is estimated as extract elements at 0, 2, 4, 6 from the // <8 x i32> vector and insert them into a <4 x i32> vector. InstructionCost InsSubCost = - getScalarizationOverhead(SubVT, /*Insert*/ true, /*Extract*/ false); + thisT()->getScalarizationOverhead(SubVT, DemandedAllSubElts, + /*Insert*/ true, /*Extract*/ false); Cost += Indices.size() * InsSubCost; Cost += thisT()->getScalarizationOverhead(VT, DemandedLoadStoreElts, @@ -1276,7 +1346,8 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { // excluding gaps) from both <4 x i32> vectors and insert into the <12 x // i32> vector. InstructionCost ExtSubCost = - getScalarizationOverhead(SubVT, /*Insert*/ false, /*Extract*/ true); + thisT()->getScalarizationOverhead(SubVT, DemandedAllSubElts, + /*Insert*/ false, /*Extract*/ true); Cost += ExtSubCost * Indices.size(); Cost += thisT()->getScalarizationOverhead(VT, DemandedLoadStoreElts, /*Insert*/ true, @@ -1287,31 +1358,22 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { return Cost; Type *I8Type = Type::getInt8Ty(VT->getContext()); - auto *MaskVT = FixedVectorType::get(I8Type, NumElts); - SubVT = FixedVectorType::get(I8Type, NumSubElts); - // The Mask shuffling cost is extract all the elements of the Mask - // and insert each of them Factor times into the wide vector: - // - // E.g. an interleaved group with factor 3: - // %mask = icmp ult <8 x i32> %vec1, %vec2 - // %interleaved.mask = shufflevector <8 x i1> %mask, <8 x i1> undef, - // <24 x i32> <0,0,0,1,1,1,2,2,2,3,3,3,4,4,4,5,5,5,6,6,6,7,7,7> - // The cost is estimated as extract all mask elements from the <8xi1> mask - // vector and insert them factor times into the <24xi1> shuffled mask - // vector. - Cost += getScalarizationOverhead(SubVT, /*Insert*/ false, /*Extract*/ true); - Cost += - getScalarizationOverhead(MaskVT, /*Insert*/ true, /*Extract*/ false); + Cost += thisT()->getReplicationShuffleCost( + I8Type, Factor, NumSubElts, DemandedAllSubElts, + UseMaskForGaps ? DemandedLoadStoreElts : DemandedAllResultElts, + CostKind); // The Gaps mask is invariant and created outside the loop, therefore the // cost of creating it is not accounted for here. However if we have both // a MaskForGaps and some other mask that guards the execution of the // memory access, we need to account for the cost of And-ing the two masks // inside the loop. - if (UseMaskForGaps) + if (UseMaskForGaps) { + auto *MaskVT = FixedVectorType::get(I8Type, NumElts); Cost += thisT()->getArithmeticInstrCost(BinaryOperator::And, MaskVT, CostKind); + } return Cost; } diff --git a/llvm/include/llvm/CodeGen/ISDOpcodes.h b/llvm/include/llvm/CodeGen/ISDOpcodes.h index d15878aa23c44..fd106f55a43d1 100644 --- a/llvm/include/llvm/CodeGen/ISDOpcodes.h +++ b/llvm/include/llvm/CodeGen/ISDOpcodes.h @@ -1260,6 +1260,11 @@ static const int FIRST_TARGET_STRICTFP_OPCODE = BUILTIN_OP_END + 400; /// be used with SelectionDAG::getMemIntrinsicNode. static const int FIRST_TARGET_MEMORY_OPCODE = BUILTIN_OP_END + 500; +/// Whether this is bitwise logic opcode. +inline bool isBitwiseLogicOp(unsigned Opcode) { + return Opcode == ISD::AND || Opcode == ISD::OR || Opcode == ISD::XOR; +} + /// Get underlying scalar opcode for VECREDUCE opcode. /// For example ISD::AND for ISD::VECREDUCE_AND. NodeType getVecReduceBaseOpcode(unsigned VecReduceOpcode); diff --git a/llvm/include/llvm/CodeGen/LinkAllAsmWriterComponents.h b/llvm/include/llvm/CodeGen/LinkAllAsmWriterComponents.h index 81b0025fdddc5..c22f9d49f374b 100644 --- a/llvm/include/llvm/CodeGen/LinkAllAsmWriterComponents.h +++ b/llvm/include/llvm/CodeGen/LinkAllAsmWriterComponents.h @@ -24,6 +24,9 @@ namespace { // delete it all as dead code, even with whole program optimization, // yet is effectively a NO-OP. As the compiler isn't smart enough // to know that getenv() never returns -1, this will do the job. + // This is so that globals in the translation units where these functions + // are defined are forced to be initialized, populating various + // registries. if (std::getenv("bar") != (char*) -1) return; diff --git a/llvm/include/llvm/CodeGen/LinkAllCodegenComponents.h b/llvm/include/llvm/CodeGen/LinkAllCodegenComponents.h index 1b13ff53ac857..d615a5db45045 100644 --- a/llvm/include/llvm/CodeGen/LinkAllCodegenComponents.h +++ b/llvm/include/llvm/CodeGen/LinkAllCodegenComponents.h @@ -27,6 +27,9 @@ namespace { // delete it all as dead code, even with whole program optimization, // yet is effectively a NO-OP. As the compiler isn't smart enough // to know that getenv() never returns -1, this will do the job. + // This is so that globals in the translation units where these functions + // are defined are forced to be initialized, populating various + // registries. if (std::getenv("bar") != (char*) -1) return; diff --git a/llvm/include/llvm/CodeGen/LiveIntervalUnion.h b/llvm/include/llvm/CodeGen/LiveIntervalUnion.h index 4ebe0f2dcfd88..3b6a4a379d720 100644 --- a/llvm/include/llvm/CodeGen/LiveIntervalUnion.h +++ b/llvm/include/llvm/CodeGen/LiveIntervalUnion.h @@ -114,12 +114,19 @@ class LiveIntervalUnion { const LiveRange *LR = nullptr; LiveRange::const_iterator LRI; ///< current position in LR ConstSegmentIter LiveUnionI; ///< current position in LiveUnion - Optional> InterferingVRegs; + SmallVector InterferingVRegs; bool CheckedFirstInterference = false; bool SeenAllInterferences = false; unsigned Tag = 0; unsigned UserTag = 0; + // Count the virtual registers in this union that interfere with this + // query's live virtual register, up to maxInterferingRegs. + unsigned collectInterferingVRegs(unsigned MaxInterferingRegs); + + // Was this virtual register visited during collectInterferingVRegs? + bool isSeenInterference(LiveInterval *VirtReg) const; + public: Query() = default; Query(const LiveRange &LR, const LiveIntervalUnion &LIU) @@ -131,7 +138,7 @@ class LiveIntervalUnion { const LiveIntervalUnion &NewLiveUnion) { LiveUnion = &NewLiveUnion; LR = &NewLR; - InterferingVRegs = None; + InterferingVRegs.clear(); CheckedFirstInterference = false; SeenAllInterferences = false; Tag = NewLiveUnion.getTag(); @@ -151,20 +158,12 @@ class LiveIntervalUnion { // Does this live virtual register interfere with the union? bool checkInterference() { return collectInterferingVRegs(1); } - // Count the virtual registers in this union that interfere with this - // query's live virtual register, up to maxInterferingRegs. - unsigned collectInterferingVRegs( - unsigned MaxInterferingRegs = std::numeric_limits::max()); - - // Was this virtual register visited during collectInterferingVRegs? - bool isSeenInterference(LiveInterval *VirtReg) const; - - // Did collectInterferingVRegs collect all interferences? - bool seenAllInterferences() const { return SeenAllInterferences; } - // Vector generated by collectInterferingVRegs. - const SmallVectorImpl &interferingVRegs() const { - return *InterferingVRegs; + const SmallVectorImpl &interferingVRegs( + unsigned MaxInterferingRegs = std::numeric_limits::max()) { + if (!SeenAllInterferences || MaxInterferingRegs < InterferingVRegs.size()) + collectInterferingVRegs(MaxInterferingRegs); + return InterferingVRegs; } }; diff --git a/llvm/include/llvm/CodeGen/LiveVariables.h b/llvm/include/llvm/CodeGen/LiveVariables.h index 9b0667bbbeb01..dee316677b258 100644 --- a/llvm/include/llvm/CodeGen/LiveVariables.h +++ b/llvm/include/llvm/CodeGen/LiveVariables.h @@ -188,6 +188,12 @@ class LiveVariables : public MachineFunctionPass { //===--------------------------------------------------------------------===// // API to update live variable information + /// Recompute liveness from scratch for a virtual register \p Reg that is + /// known to have a single def that dominates all uses. This can be useful + /// after removing some uses of \p Reg. It is not necessary for the whole + /// machine function to be in SSA form. + void recomputeForSingleDefVirtReg(Register Reg); + /// replaceKillInstruction - Update register kill info by replacing a kill /// instruction with a new one. void replaceKillInstruction(Register Reg, MachineInstr &OldMI, diff --git a/llvm/include/llvm/CodeGen/SelectionDAG.h b/llvm/include/llvm/CodeGen/SelectionDAG.h index edc70dda05d89..395900c168d70 100644 --- a/llvm/include/llvm/CodeGen/SelectionDAG.h +++ b/llvm/include/llvm/CodeGen/SelectionDAG.h @@ -1733,8 +1733,7 @@ class SelectionDAG { ArrayRef Ops); SDValue FoldConstantVectorArithmetic(unsigned Opcode, const SDLoc &DL, EVT VT, - ArrayRef Ops, - const SDNodeFlags Flags = SDNodeFlags()); + ArrayRef Ops); /// Fold floating-point operations with 2 operands when both operands are /// constants and/or undefined. @@ -1837,6 +1836,19 @@ class SelectionDAG { unsigned ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, unsigned Depth = 0) const; + /// Get the minimum bit size for this Value \p Op as a signed integer. + /// i.e. x == sext(trunc(x to MinSignedBits) to bitwidth(x)). + /// Similar to the APInt::getMinSignedBits function. + /// Helper wrapper to ComputeNumSignBits. + unsigned ComputeMinSignedBits(SDValue Op, unsigned Depth = 0) const; + + /// Get the minimum bit size for this Value \p Op as a signed integer. + /// i.e. x == sext(trunc(x to MinSignedBits) to bitwidth(x)). + /// Similar to the APInt::getMinSignedBits function. + /// Helper wrapper to ComputeNumSignBits. + unsigned ComputeMinSignedBits(SDValue Op, const APInt &DemandedElts, + unsigned Depth = 0) const; + /// Return true if this function can prove that \p Op is never poison /// and, if \p PoisonOnly is false, does not have undef bits. bool isGuaranteedNotToBeUndefOrPoison(SDValue Op, bool PoisonOnly = false, diff --git a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h index cc00af90ec678..c2c5dbc264785 100644 --- a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h +++ b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h @@ -2049,6 +2049,14 @@ class BuildVectorSDNode : public SDNode { int32_t getConstantFPSplatPow2ToLog2Int(BitVector *UndefElements, uint32_t BitWidth) const; + /// Extract the raw bit data from a build vector of Undef, Constant or + /// ConstantFP node elements. Each raw bit element will be \p + /// DstEltSizeInBits wide, undef elements are treated as zero, and entirely + /// undefined elements are flagged in \p UndefElements. + bool getConstantRawBits(bool IsLittleEndian, unsigned DstEltSizeInBits, + SmallVectorImpl &RawBitElements, + BitVector &UndefElements) const; + bool isConstant() const; static bool classof(const SDNode *N) { diff --git a/llvm/include/llvm/Config/config.h.cmake b/llvm/include/llvm/Config/config.h.cmake index 37a0d234844d1..1d982b544a63a 100644 --- a/llvm/include/llvm/Config/config.h.cmake +++ b/llvm/include/llvm/Config/config.h.cmake @@ -19,6 +19,10 @@ /* Define to 1 to enable crash memory dumps, and to 0 otherwise. */ #cmakedefine01 LLVM_ENABLE_CRASH_DUMPS +/* Define to 1 to prefer forward slashes on Windows, and to 0 prefer + backslashes. */ +#cmakedefine01 LLVM_WINDOWS_PREFER_FORWARD_SLASH + /* Define to 1 if you have the `backtrace' function. */ #cmakedefine HAVE_BACKTRACE ${HAVE_BACKTRACE} diff --git a/llvm/include/llvm/ExecutionEngine/MCJIT.h b/llvm/include/llvm/ExecutionEngine/MCJIT.h index 8253bf98963b8..adce98f380c54 100644 --- a/llvm/include/llvm/ExecutionEngine/MCJIT.h +++ b/llvm/include/llvm/ExecutionEngine/MCJIT.h @@ -26,6 +26,9 @@ namespace { // delete it all as dead code, even with whole program optimization, // yet is effectively a NO-OP. As the compiler isn't smart enough // to know that getenv() never returns -1, this will do the job. + // This is so that globals in the translation units where these functions + // are defined are forced to be initialized, populating various + // registries. if (std::getenv("bar") != (char*) -1) return; diff --git a/llvm/include/llvm/ExecutionEngine/Orc/IndirectionUtils.h b/llvm/include/llvm/ExecutionEngine/Orc/IndirectionUtils.h index 78e3ceef50e21..4d6d46595fc3c 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/IndirectionUtils.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/IndirectionUtils.h @@ -45,6 +45,13 @@ class PointerType; class Triple; class Twine; class Value; +class MCDisassembler; +class MCInstrAnalysis; + +namespace jitlink { +class LinkGraph; +class Symbol; +} // namespace jitlink namespace orc { @@ -557,6 +564,33 @@ GlobalAlias *cloneGlobalAliasDecl(Module &Dst, const GlobalAlias &OrigA, void cloneModuleFlagsMetadata(Module &Dst, const Module &Src, ValueToValueMapTy &VMap); +/// Introduce relocations to \p Sym in its own definition if there are any +/// pointers formed via PC-relative address that do not already have a +/// relocation. +/// +/// This is useful when introducing indirection via a stub function at link time +/// without compiler support. If a function pointer is formed without a +/// relocation, e.g. in the definition of \c foo +/// +/// \code +/// _foo: +/// leaq -7(%rip), rax # form pointer to _foo without relocation +/// _bar: +/// leaq (%rip), %rax # uses X86_64_RELOC_SIGNED to '_foo' +/// \endcode +/// +/// the pointer to \c _foo computed by \c _foo and \c _bar may differ if we +/// introduce a stub for _foo. If the pointer is used as a key, this may be +/// observable to the program. This pass will attempt to introduce the missing +/// "self-relocation" on the leaq instruction. +/// +/// This is based on disassembly and should be considered "best effort". It may +/// silently fail to add relocations. +Error addFunctionPointerRelocationsToCurrentSymbol(jitlink::Symbol &Sym, + jitlink::LinkGraph &G, + MCDisassembler &Disassembler, + MCInstrAnalysis &MIA); + } // end namespace orc } // end namespace llvm diff --git a/llvm/include/llvm/Frontend/OpenMP/OMP.td b/llvm/include/llvm/Frontend/OpenMP/OMP.td index 7ef0614c9f99c..211573132bac4 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMP.td +++ b/llvm/include/llvm/Frontend/OpenMP/OMP.td @@ -365,6 +365,10 @@ def OMPC_Filter : Clause<"filter"> { } def OMPC_When: Clause<"when"> {} +def OMPC_Bind : Clause<"bind"> { + let clangClass = "OMPBindClause"; +} + //===----------------------------------------------------------------------===// // Definition of OpenMP directives //===----------------------------------------------------------------------===// @@ -1739,6 +1743,7 @@ def OMP_loop : Directive<"loop"> { VersionedClause, ]; let allowedOnceClauses = [ + VersionedClause, VersionedClause, VersionedClause, ]; diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def index 1de170836151a..f5b017608f060 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def +++ b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def @@ -450,6 +450,7 @@ __OMP_RTL(__kmpc_get_shared_variables, false, Void, VoidPtrPtrPtr) __OMP_RTL(__kmpc_parallel_level, false, Int8, ) __OMP_RTL(__kmpc_is_spmd_exec_mode, false, Int8, ) __OMP_RTL(__kmpc_barrier_simple_spmd, false, Void, IdentPtr, Int32) +__OMP_RTL(__kmpc_barrier_simple_generic, false, Void, IdentPtr, Int32) __OMP_RTL(__kmpc_warp_active_thread_mask, false, Int64,) __OMP_RTL(__kmpc_syncwarp, false, Void, Int64) @@ -593,6 +594,8 @@ __OMP_RTL_ATTRS(__kmpc_barrier, BarrierAttrs, AttributeSet(), ParamAttrs(ReadOnlyPtrAttrs)) __OMP_RTL_ATTRS(__kmpc_barrier_simple_spmd, BarrierAttrs, AttributeSet(), ParamAttrs(ReadOnlyPtrAttrs)) +__OMP_RTL_ATTRS(__kmpc_barrier_simple_generic, BarrierAttrs, AttributeSet(), + ParamAttrs(ReadOnlyPtrAttrs)) __OMP_RTL_ATTRS(__kmpc_warp_active_thread_mask, BarrierAttrs, AttributeSet(), ParamAttrs()) __OMP_RTL_ATTRS(__kmpc_syncwarp, BarrierAttrs, AttributeSet(), ParamAttrs()) diff --git a/llvm/include/llvm/IR/ConstantRange.h b/llvm/include/llvm/IR/ConstantRange.h index b0db7ee713337..fea4d0da1d0d3 100644 --- a/llvm/include/llvm/IR/ConstantRange.h +++ b/llvm/include/llvm/IR/ConstantRange.h @@ -189,6 +189,11 @@ class LLVM_NODISCARD ConstantRange { /// successful. bool getEquivalentICmp(CmpInst::Predicate &Pred, APInt &RHS) const; + /// Set up \p Pred, \p RHS and \p Offset such that (V + Offset) Pred RHS + /// is true iff V is in the range. Prefers using Offset == 0 if possible. + void + getEquivalentICmp(CmpInst::Predicate &Pred, APInt &RHS, APInt &Offset) const; + /// Return the lower value for this range. const APInt &getLower() const { return Lower; } @@ -327,6 +332,14 @@ class LLVM_NODISCARD ConstantRange { ConstantRange unionWith(const ConstantRange &CR, PreferredRangeType Type = Smallest) const; + /// Intersect the two ranges and return the result if it can be represented + /// exactly, otherwise return None. + Optional exactIntersectWith(const ConstantRange &CR) const; + + /// Union the two ranges and return the result if it can be represented + /// exactly, otherwise return None. + Optional exactUnionWith(const ConstantRange &CR) const; + /// Return a new range representing the possible values resulting /// from an application of the specified cast operator to this range. \p /// BitWidth is the target bitwidth of the cast. For casts which don't diff --git a/llvm/include/llvm/IR/DIBuilder.h b/llvm/include/llvm/IR/DIBuilder.h index 90165095bb0c0..61c6dd885980e 100644 --- a/llvm/include/llvm/IR/DIBuilder.h +++ b/llvm/include/llvm/IR/DIBuilder.h @@ -219,11 +219,12 @@ namespace llvm { /// \param AlignInBits Alignment. (optional) /// \param DWARFAddressSpace DWARF address space. (optional) /// \param Name Pointer type name. (optional) - DIDerivedType *createPointerType(DIType *PointeeTy, uint64_t SizeInBits, - uint32_t AlignInBits = 0, - Optional DWARFAddressSpace = - None, - StringRef Name = ""); + /// \param Annotations Member annotations. + DIDerivedType * + createPointerType(DIType *PointeeTy, uint64_t SizeInBits, + uint32_t AlignInBits = 0, + Optional DWARFAddressSpace = None, + StringRef Name = "", DINodeArray Annotations = nullptr); /// Create debugging information entry for a pointer to member. /// \param PointeeTy Type pointed to by this pointer. diff --git a/llvm/include/llvm/IR/DataLayout.h b/llvm/include/llvm/IR/DataLayout.h index e65b128883e99..46acd403bef1c 100644 --- a/llvm/include/llvm/IR/DataLayout.h +++ b/llvm/include/llvm/IR/DataLayout.h @@ -377,8 +377,8 @@ class DataLayout { /// the backends/clients are updated. unsigned getPointerSize(unsigned AS = 0) const; - /// Returns the maximum pointer size over all address spaces. - unsigned getMaxPointerSize() const; + /// Returns the maximum index size over all address spaces. + unsigned getMaxIndexSize() const; // Index size used for address calculation. unsigned getIndexSize(unsigned AS) const; @@ -410,9 +410,9 @@ class DataLayout { return getPointerSize(AS) * 8; } - /// Returns the maximum pointer size over all address spaces. - unsigned getMaxPointerSizeInBits() const { - return getMaxPointerSize() * 8; + /// Returns the maximum index size over all address spaces. + unsigned getMaxIndexSizeInBits() const { + return getMaxIndexSize() * 8; } /// Size in bits of index used for address calculation in getelementptr. diff --git a/llvm/include/llvm/IR/GlobalIFunc.h b/llvm/include/llvm/IR/GlobalIFunc.h index 4dc184c2336fe..10088ee2fff42 100644 --- a/llvm/include/llvm/IR/GlobalIFunc.h +++ b/llvm/include/llvm/IR/GlobalIFunc.h @@ -80,6 +80,10 @@ class GlobalIFunc final : public GlobalObject, public ilist_node { static_cast(this)->getResolverFunction()); } + static FunctionType *getResolverFunctionType(Type *IFuncValTy) { + return FunctionType::get(IFuncValTy->getPointerTo(), false); + } + // Methods for support type inquiry through isa, cast, and dyn_cast: static bool classof(const Value *V) { return V->getValueID() == Value::GlobalIFuncVal; diff --git a/llvm/include/llvm/IR/InstrTypes.h b/llvm/include/llvm/IR/InstrTypes.h index a2180a8d4b2a3..143a87f4997d3 100644 --- a/llvm/include/llvm/IR/InstrTypes.h +++ b/llvm/include/llvm/IR/InstrTypes.h @@ -19,6 +19,7 @@ #include "llvm/ADT/None.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/Sequence.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Twine.h" @@ -755,6 +756,20 @@ class CmpInst : public Instruction { using PredicateField = Bitfield::Element; + /// Returns the sequence of all FCmp predicates. + static auto FCmpPredicates() { + return enum_seq_inclusive(Predicate::FIRST_FCMP_PREDICATE, + Predicate::LAST_FCMP_PREDICATE, + force_iteration_on_noniterable_enum); + } + + /// Returns the sequence of all ICmp predicates. + static auto ICmpPredicates() { + return enum_seq_inclusive(Predicate::FIRST_ICMP_PREDICATE, + Predicate::LAST_ICMP_PREDICATE, + force_iteration_on_noniterable_enum); + } + protected: CmpInst(Type *ty, Instruction::OtherOps op, Predicate pred, Value *LHS, Value *RHS, const Twine &Name = "", diff --git a/llvm/include/llvm/IR/Instructions.h b/llvm/include/llvm/IR/Instructions.h index 3c7911d251364..0ef78881c6d7a 100644 --- a/llvm/include/llvm/IR/Instructions.h +++ b/llvm/include/llvm/IR/Instructions.h @@ -1339,6 +1339,10 @@ class ICmpInst: public CmpInst { return P == ICMP_SLE || P == ICMP_ULE; } + /// Returns the sequence of all ICmp predicates. + /// + static auto predicates() { return ICmpPredicates(); } + /// Exchange the two operands to this instruction in such a way that it does /// not modify the semantics of the instruction. The predicate value may be /// changed to retain the same result if the predicate is order dependent @@ -1461,6 +1465,10 @@ class FCmpInst: public CmpInst { Op<0>().swap(Op<1>()); } + /// Returns the sequence of all FCmp predicates. + /// + static auto predicates() { return FCmpPredicates(); } + /// Methods for support type inquiry through isa, cast, and dyn_cast: static bool classof(const Instruction *I) { return I->getOpcode() == Instruction::FCmp; @@ -2346,6 +2354,27 @@ class ShuffleVectorInst : public Instruction { return isInsertSubvectorMask(ShuffleMask, NumSrcElts, NumSubElts, Index); } + /// Return true if this shuffle mask replicates each of the \p VF elements + /// in a vector \p ReplicationFactor times. + /// For example, the mask for \p ReplicationFactor=3 and \p VF=4 is: + /// <0,0,0,1,1,1,2,2,2,3,3,3> + static bool isReplicationMask(ArrayRef Mask, int &ReplicationFactor, + int &VF); + static bool isReplicationMask(const Constant *Mask, int &ReplicationFactor, + int &VF) { + assert(Mask->getType()->isVectorTy() && "Shuffle needs vector constant."); + // Not possible to express a shuffle mask for a scalable vector for this + // case. + if (isa(Mask->getType())) + return false; + SmallVector MaskAsInts; + getShuffleMask(Mask, MaskAsInts); + return isReplicationMask(MaskAsInts, ReplicationFactor, VF); + } + + /// Return true if this shuffle mask is a replication mask. + bool isReplicationMask(int &ReplicationFactor, int &VF) const; + /// Change values in a shuffle permute mask assuming the two vector operands /// of length InVecNumElts have swapped position. static void commuteShuffleMask(MutableArrayRef Mask, diff --git a/llvm/include/llvm/IR/Intrinsics.h b/llvm/include/llvm/IR/Intrinsics.h index 80a2f5a8cd3e4..2ff48380ac282 100644 --- a/llvm/include/llvm/IR/Intrinsics.h +++ b/llvm/include/llvm/IR/Intrinsics.h @@ -140,7 +140,8 @@ namespace Intrinsic { Subdivide2Argument, Subdivide4Argument, VecOfBitcastsToInt, - AMX + AMX, + PPCQuad, } Kind; union { diff --git a/llvm/include/llvm/IR/IntrinsicsBPF.td b/llvm/include/llvm/IR/IntrinsicsBPF.td index 4b4dd94b1599f..a6bd6f841aab2 100644 --- a/llvm/include/llvm/IR/IntrinsicsBPF.td +++ b/llvm/include/llvm/IR/IntrinsicsBPF.td @@ -34,4 +34,7 @@ let TargetPrefix = "bpf" in { // All intrinsics start with "llvm.bpf." [IntrNoMem]>; def int_bpf_passthrough : GCCBuiltin<"__builtin_bpf_passthrough">, Intrinsic<[llvm_any_ty], [llvm_i32_ty, llvm_any_ty], [IntrNoMem]>; + def int_bpf_compare : GCCBuiltin<"__builtin_bpf_compare">, + Intrinsic<[llvm_i1_ty], [llvm_i32_ty, llvm_anyint_ty, llvm_anyint_ty], + [IntrNoMem]>; } diff --git a/llvm/include/llvm/IR/IntrinsicsNVVM.td b/llvm/include/llvm/IR/IntrinsicsNVVM.td index 3a75d992e7f14..131696f9ed796 100644 --- a/llvm/include/llvm/IR/IntrinsicsNVVM.td +++ b/llvm/include/llvm/IR/IntrinsicsNVVM.td @@ -557,7 +557,7 @@ class SHFL_INFO { let TargetPrefix = "nvvm" in { def int_nvvm_prmt : GCCBuiltin<"__nvvm_prmt">, - Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem, Commutative]>; // @@ -565,150 +565,150 @@ let TargetPrefix = "nvvm" in { // def int_nvvm_fmin_f : GCCBuiltin<"__nvvm_fmin_f">, - Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], - [IntrNoMem, Commutative]>; + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], + [IntrNoMem, IntrSpeculatable, Commutative]>; def int_nvvm_fmin_ftz_f : GCCBuiltin<"__nvvm_fmin_ftz_f">, - Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], - [IntrNoMem, Commutative]>; + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], + [IntrNoMem, IntrSpeculatable, Commutative]>; def int_nvvm_fmax_f : GCCBuiltin<"__nvvm_fmax_f">, - Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty] - , [IntrNoMem, Commutative]>; + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty] + , [IntrNoMem, IntrSpeculatable, Commutative]>; def int_nvvm_fmax_ftz_f : GCCBuiltin<"__nvvm_fmax_ftz_f">, - Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], - [IntrNoMem, Commutative]>; + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], + [IntrNoMem, IntrSpeculatable, Commutative]>; def int_nvvm_fmin_d : GCCBuiltin<"__nvvm_fmin_d">, - Intrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty], - [IntrNoMem, Commutative]>; + DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty], + [IntrNoMem, IntrSpeculatable, Commutative]>; def int_nvvm_fmax_d : GCCBuiltin<"__nvvm_fmax_d">, - Intrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty], - [IntrNoMem, Commutative]>; + DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty], + [IntrNoMem, IntrSpeculatable, Commutative]>; // // Multiplication // def int_nvvm_mulhi_i : GCCBuiltin<"__nvvm_mulhi_i">, - Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], - [IntrNoMem, Commutative]>; + DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], + [IntrNoMem, IntrSpeculatable, Commutative]>; def int_nvvm_mulhi_ui : GCCBuiltin<"__nvvm_mulhi_ui">, - Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], - [IntrNoMem, Commutative]>; + DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], + [IntrNoMem, IntrSpeculatable, Commutative]>; def int_nvvm_mulhi_ll : GCCBuiltin<"__nvvm_mulhi_ll">, - Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty], - [IntrNoMem, Commutative]>; + DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty], + [IntrNoMem, IntrSpeculatable, Commutative]>; def int_nvvm_mulhi_ull : GCCBuiltin<"__nvvm_mulhi_ull">, - Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty], - [IntrNoMem, Commutative]>; + DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty], + [IntrNoMem, IntrSpeculatable, Commutative]>; def int_nvvm_mul_rn_ftz_f : GCCBuiltin<"__nvvm_mul_rn_ftz_f">, - Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], - [IntrNoMem, Commutative]>; + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], + [IntrNoMem, IntrSpeculatable, Commutative]>; def int_nvvm_mul_rn_f : GCCBuiltin<"__nvvm_mul_rn_f">, - Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], - [IntrNoMem, Commutative]>; + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], + [IntrNoMem, IntrSpeculatable, Commutative]>; def int_nvvm_mul_rz_ftz_f : GCCBuiltin<"__nvvm_mul_rz_ftz_f">, - Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], - [IntrNoMem, Commutative]>; + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], + [IntrNoMem, IntrSpeculatable, Commutative]>; def int_nvvm_mul_rz_f : GCCBuiltin<"__nvvm_mul_rz_f">, - Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], - [IntrNoMem, Commutative]>; + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], + [IntrNoMem, IntrSpeculatable, Commutative]>; def int_nvvm_mul_rm_ftz_f : GCCBuiltin<"__nvvm_mul_rm_ftz_f">, - Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], - [IntrNoMem, Commutative]>; + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], + [IntrNoMem, IntrSpeculatable, Commutative]>; def int_nvvm_mul_rm_f : GCCBuiltin<"__nvvm_mul_rm_f">, - Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], - [IntrNoMem, Commutative]>; + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], + [IntrNoMem, IntrSpeculatable, Commutative]>; def int_nvvm_mul_rp_ftz_f : GCCBuiltin<"__nvvm_mul_rp_ftz_f">, - Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], - [IntrNoMem, Commutative]>; + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], + [IntrNoMem, IntrSpeculatable, Commutative]>; def int_nvvm_mul_rp_f : GCCBuiltin<"__nvvm_mul_rp_f">, - Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], - [IntrNoMem, Commutative]>; + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], + [IntrNoMem, IntrSpeculatable, Commutative]>; def int_nvvm_mul_rn_d : GCCBuiltin<"__nvvm_mul_rn_d">, - Intrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty], - [IntrNoMem, Commutative]>; + DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty], + [IntrNoMem, IntrSpeculatable, Commutative]>; def int_nvvm_mul_rz_d : GCCBuiltin<"__nvvm_mul_rz_d">, - Intrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty], - [IntrNoMem, Commutative]>; + DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty], + [IntrNoMem, IntrSpeculatable, Commutative]>; def int_nvvm_mul_rm_d : GCCBuiltin<"__nvvm_mul_rm_d">, - Intrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty], - [IntrNoMem, Commutative]>; + DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty], + [IntrNoMem, IntrSpeculatable, Commutative]>; def int_nvvm_mul_rp_d : GCCBuiltin<"__nvvm_mul_rp_d">, - Intrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty], - [IntrNoMem, Commutative]>; + DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty], + [IntrNoMem, IntrSpeculatable, Commutative]>; def int_nvvm_mul24_i : GCCBuiltin<"__nvvm_mul24_i">, - Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], - [IntrNoMem, Commutative]>; + DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], + [IntrNoMem, IntrSpeculatable, Commutative]>; def int_nvvm_mul24_ui : GCCBuiltin<"__nvvm_mul24_ui">, - Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], - [IntrNoMem, Commutative]>; + DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], + [IntrNoMem, IntrSpeculatable, Commutative]>; // // Div // def int_nvvm_div_approx_ftz_f : GCCBuiltin<"__nvvm_div_approx_ftz_f">, - Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], - [IntrNoMem, Commutative]>; + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], + [IntrNoMem]>; def int_nvvm_div_approx_f : GCCBuiltin<"__nvvm_div_approx_f">, - Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], - [IntrNoMem, Commutative]>; + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], + [IntrNoMem]>; def int_nvvm_div_rn_ftz_f : GCCBuiltin<"__nvvm_div_rn_ftz_f">, - Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], - [IntrNoMem, Commutative]>; + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], + [IntrNoMem]>; def int_nvvm_div_rn_f : GCCBuiltin<"__nvvm_div_rn_f">, - Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], - [IntrNoMem, Commutative]>; + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], + [IntrNoMem]>; def int_nvvm_div_rz_ftz_f : GCCBuiltin<"__nvvm_div_rz_ftz_f">, - Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], - [IntrNoMem, Commutative]>; + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], + [IntrNoMem]>; def int_nvvm_div_rz_f : GCCBuiltin<"__nvvm_div_rz_f">, - Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], - [IntrNoMem, Commutative]>; + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], + [IntrNoMem]>; def int_nvvm_div_rm_ftz_f : GCCBuiltin<"__nvvm_div_rm_ftz_f">, - Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], - [IntrNoMem, Commutative]>; + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], + [IntrNoMem]>; def int_nvvm_div_rm_f : GCCBuiltin<"__nvvm_div_rm_f">, - Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], - [IntrNoMem, Commutative]>; + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], + [IntrNoMem]>; def int_nvvm_div_rp_ftz_f : GCCBuiltin<"__nvvm_div_rp_ftz_f">, - Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], - [IntrNoMem, Commutative]>; + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], + [IntrNoMem]>; def int_nvvm_div_rp_f : GCCBuiltin<"__nvvm_div_rp_f">, - Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], - [IntrNoMem, Commutative]>; + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], + [IntrNoMem]>; def int_nvvm_div_rn_d : GCCBuiltin<"__nvvm_div_rn_d">, - Intrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty], - [IntrNoMem, Commutative]>; + DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty], + [IntrNoMem]>; def int_nvvm_div_rz_d : GCCBuiltin<"__nvvm_div_rz_d">, - Intrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty], - [IntrNoMem, Commutative]>; + DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty], + [IntrNoMem]>; def int_nvvm_div_rm_d : GCCBuiltin<"__nvvm_div_rm_d">, - Intrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty], - [IntrNoMem, Commutative]>; + DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty], + [IntrNoMem]>; def int_nvvm_div_rp_d : GCCBuiltin<"__nvvm_div_rp_d">, - Intrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty], - [IntrNoMem, Commutative]>; + DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty], + [IntrNoMem]>; // // Sad // def int_nvvm_sad_i : GCCBuiltin<"__nvvm_sad_i">, - Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem, Commutative]>; def int_nvvm_sad_ui : GCCBuiltin<"__nvvm_sad_ui">, - Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem, Commutative]>; // @@ -716,493 +716,493 @@ let TargetPrefix = "nvvm" in { // def int_nvvm_floor_ftz_f : GCCBuiltin<"__nvvm_floor_ftz_f">, - Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>; def int_nvvm_floor_f : GCCBuiltin<"__nvvm_floor_f">, - Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>; def int_nvvm_floor_d : GCCBuiltin<"__nvvm_floor_d">, - Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>; def int_nvvm_ceil_ftz_f : GCCBuiltin<"__nvvm_ceil_ftz_f">, - Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>; def int_nvvm_ceil_f : GCCBuiltin<"__nvvm_ceil_f">, - Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>; def int_nvvm_ceil_d : GCCBuiltin<"__nvvm_ceil_d">, - Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>; // // Abs // def int_nvvm_fabs_ftz_f : GCCBuiltin<"__nvvm_fabs_ftz_f">, - Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>; def int_nvvm_fabs_f : GCCBuiltin<"__nvvm_fabs_f">, - Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>; def int_nvvm_fabs_d : GCCBuiltin<"__nvvm_fabs_d">, - Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>; // // Round // def int_nvvm_round_ftz_f : GCCBuiltin<"__nvvm_round_ftz_f">, - Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>; def int_nvvm_round_f : GCCBuiltin<"__nvvm_round_f">, - Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>; def int_nvvm_round_d : GCCBuiltin<"__nvvm_round_d">, - Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>; // // Trunc // def int_nvvm_trunc_ftz_f : GCCBuiltin<"__nvvm_trunc_ftz_f">, - Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>; def int_nvvm_trunc_f : GCCBuiltin<"__nvvm_trunc_f">, - Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>; def int_nvvm_trunc_d : GCCBuiltin<"__nvvm_trunc_d">, - Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>; // // Saturate // def int_nvvm_saturate_ftz_f : GCCBuiltin<"__nvvm_saturate_ftz_f">, - Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>; def int_nvvm_saturate_f : GCCBuiltin<"__nvvm_saturate_f">, - Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>; def int_nvvm_saturate_d : GCCBuiltin<"__nvvm_saturate_d">, - Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>; // // Exp2 Log2 // def int_nvvm_ex2_approx_ftz_f : GCCBuiltin<"__nvvm_ex2_approx_ftz_f">, - Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; def int_nvvm_ex2_approx_f : GCCBuiltin<"__nvvm_ex2_approx_f">, - Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; def int_nvvm_ex2_approx_d : GCCBuiltin<"__nvvm_ex2_approx_d">, - Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>; def int_nvvm_lg2_approx_ftz_f : GCCBuiltin<"__nvvm_lg2_approx_ftz_f">, - Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; def int_nvvm_lg2_approx_f : GCCBuiltin<"__nvvm_lg2_approx_f">, - Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; def int_nvvm_lg2_approx_d : GCCBuiltin<"__nvvm_lg2_approx_d">, - Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>; // // Sin Cos // def int_nvvm_sin_approx_ftz_f : GCCBuiltin<"__nvvm_sin_approx_ftz_f">, - Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; def int_nvvm_sin_approx_f : GCCBuiltin<"__nvvm_sin_approx_f">, - Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; def int_nvvm_cos_approx_ftz_f : GCCBuiltin<"__nvvm_cos_approx_ftz_f">, - Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; def int_nvvm_cos_approx_f : GCCBuiltin<"__nvvm_cos_approx_f">, - Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; // // Fma // def int_nvvm_fma_rn_ftz_f : GCCBuiltin<"__nvvm_fma_rn_ftz_f">, - Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty], - [IntrNoMem, Commutative]>; + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty], + [IntrNoMem, IntrSpeculatable]>; def int_nvvm_fma_rn_f : GCCBuiltin<"__nvvm_fma_rn_f">, - Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty], - [IntrNoMem, Commutative]>; + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty], + [IntrNoMem, IntrSpeculatable]>; def int_nvvm_fma_rz_ftz_f : GCCBuiltin<"__nvvm_fma_rz_ftz_f">, - Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty], - [IntrNoMem, Commutative]>; + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty], + [IntrNoMem, IntrSpeculatable]>; def int_nvvm_fma_rz_f : GCCBuiltin<"__nvvm_fma_rz_f">, - Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty], - [IntrNoMem, Commutative]>; + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty], + [IntrNoMem, IntrSpeculatable]>; def int_nvvm_fma_rm_ftz_f : GCCBuiltin<"__nvvm_fma_rm_ftz_f">, - Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty], - [IntrNoMem, Commutative]>; + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty], + [IntrNoMem, IntrSpeculatable]>; def int_nvvm_fma_rm_f : GCCBuiltin<"__nvvm_fma_rm_f">, - Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty], - [IntrNoMem, Commutative]>; + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty], + [IntrNoMem, IntrSpeculatable]>; def int_nvvm_fma_rp_ftz_f : GCCBuiltin<"__nvvm_fma_rp_ftz_f">, - Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty], - [IntrNoMem, Commutative]>; + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty], + [IntrNoMem, IntrSpeculatable]>; def int_nvvm_fma_rp_f : GCCBuiltin<"__nvvm_fma_rp_f">, - Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty], - [IntrNoMem, Commutative]>; + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty], + [IntrNoMem, IntrSpeculatable]>; def int_nvvm_fma_rn_d : GCCBuiltin<"__nvvm_fma_rn_d">, - Intrinsic<[llvm_double_ty], + DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty, llvm_double_ty], - [IntrNoMem, Commutative]>; + [IntrNoMem, IntrSpeculatable]>; def int_nvvm_fma_rz_d : GCCBuiltin<"__nvvm_fma_rz_d">, - Intrinsic<[llvm_double_ty], + DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty, llvm_double_ty], - [IntrNoMem, Commutative]>; + [IntrNoMem, IntrSpeculatable]>; def int_nvvm_fma_rm_d : GCCBuiltin<"__nvvm_fma_rm_d">, - Intrinsic<[llvm_double_ty], + DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty, llvm_double_ty], - [IntrNoMem, Commutative]>; + [IntrNoMem, IntrSpeculatable]>; def int_nvvm_fma_rp_d : GCCBuiltin<"__nvvm_fma_rp_d">, - Intrinsic<[llvm_double_ty], + DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty, llvm_double_ty], - [IntrNoMem, Commutative]>; + [IntrNoMem, IntrSpeculatable]>; // // Rcp // def int_nvvm_rcp_rn_ftz_f : GCCBuiltin<"__nvvm_rcp_rn_ftz_f">, - Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; def int_nvvm_rcp_rn_f : GCCBuiltin<"__nvvm_rcp_rn_f">, - Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; def int_nvvm_rcp_rz_ftz_f : GCCBuiltin<"__nvvm_rcp_rz_ftz_f">, - Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; def int_nvvm_rcp_rz_f : GCCBuiltin<"__nvvm_rcp_rz_f">, - Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; def int_nvvm_rcp_rm_ftz_f : GCCBuiltin<"__nvvm_rcp_rm_ftz_f">, - Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; def int_nvvm_rcp_rm_f : GCCBuiltin<"__nvvm_rcp_rm_f">, - Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; def int_nvvm_rcp_rp_ftz_f : GCCBuiltin<"__nvvm_rcp_rp_ftz_f">, - Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; def int_nvvm_rcp_rp_f : GCCBuiltin<"__nvvm_rcp_rp_f">, - Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; def int_nvvm_rcp_rn_d : GCCBuiltin<"__nvvm_rcp_rn_d">, - Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>; def int_nvvm_rcp_rz_d : GCCBuiltin<"__nvvm_rcp_rz_d">, - Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>; def int_nvvm_rcp_rm_d : GCCBuiltin<"__nvvm_rcp_rm_d">, - Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>; def int_nvvm_rcp_rp_d : GCCBuiltin<"__nvvm_rcp_rp_d">, - Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>; def int_nvvm_rcp_approx_ftz_d : GCCBuiltin<"__nvvm_rcp_approx_ftz_d">, - Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>; // // Sqrt // def int_nvvm_sqrt_f : GCCBuiltin<"__nvvm_sqrt_f">, - Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; def int_nvvm_sqrt_rn_ftz_f : GCCBuiltin<"__nvvm_sqrt_rn_ftz_f">, - Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; def int_nvvm_sqrt_rn_f : GCCBuiltin<"__nvvm_sqrt_rn_f">, - Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; def int_nvvm_sqrt_rz_ftz_f : GCCBuiltin<"__nvvm_sqrt_rz_ftz_f">, - Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; def int_nvvm_sqrt_rz_f : GCCBuiltin<"__nvvm_sqrt_rz_f">, - Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; def int_nvvm_sqrt_rm_ftz_f : GCCBuiltin<"__nvvm_sqrt_rm_ftz_f">, - Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; def int_nvvm_sqrt_rm_f : GCCBuiltin<"__nvvm_sqrt_rm_f">, - Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; def int_nvvm_sqrt_rp_ftz_f : GCCBuiltin<"__nvvm_sqrt_rp_ftz_f">, - Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; def int_nvvm_sqrt_rp_f : GCCBuiltin<"__nvvm_sqrt_rp_f">, - Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; def int_nvvm_sqrt_approx_ftz_f : GCCBuiltin<"__nvvm_sqrt_approx_ftz_f">, - Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; def int_nvvm_sqrt_approx_f : GCCBuiltin<"__nvvm_sqrt_approx_f">, - Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; def int_nvvm_sqrt_rn_d : GCCBuiltin<"__nvvm_sqrt_rn_d">, - Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>; def int_nvvm_sqrt_rz_d : GCCBuiltin<"__nvvm_sqrt_rz_d">, - Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>; def int_nvvm_sqrt_rm_d : GCCBuiltin<"__nvvm_sqrt_rm_d">, - Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>; def int_nvvm_sqrt_rp_d : GCCBuiltin<"__nvvm_sqrt_rp_d">, - Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>; // // Rsqrt // def int_nvvm_rsqrt_approx_ftz_f : GCCBuiltin<"__nvvm_rsqrt_approx_ftz_f">, - Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; def int_nvvm_rsqrt_approx_f : GCCBuiltin<"__nvvm_rsqrt_approx_f">, - Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; def int_nvvm_rsqrt_approx_d : GCCBuiltin<"__nvvm_rsqrt_approx_d">, - Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>; // // Add // def int_nvvm_add_rn_ftz_f : GCCBuiltin<"__nvvm_add_rn_ftz_f">, - Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], - [IntrNoMem, Commutative]>; + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], + [IntrNoMem, IntrSpeculatable, Commutative]>; def int_nvvm_add_rn_f : GCCBuiltin<"__nvvm_add_rn_f">, - Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], - [IntrNoMem, Commutative]>; + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], + [IntrNoMem, IntrSpeculatable, Commutative]>; def int_nvvm_add_rz_ftz_f : GCCBuiltin<"__nvvm_add_rz_ftz_f">, - Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], - [IntrNoMem, Commutative]>; + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], + [IntrNoMem, IntrSpeculatable, Commutative]>; def int_nvvm_add_rz_f : GCCBuiltin<"__nvvm_add_rz_f">, - Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], - [IntrNoMem, Commutative]>; + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], + [IntrNoMem, IntrSpeculatable, Commutative]>; def int_nvvm_add_rm_ftz_f : GCCBuiltin<"__nvvm_add_rm_ftz_f">, - Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], - [IntrNoMem, Commutative]>; + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], + [IntrNoMem, IntrSpeculatable, Commutative]>; def int_nvvm_add_rm_f : GCCBuiltin<"__nvvm_add_rm_f">, - Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], - [IntrNoMem, Commutative]>; + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], + [IntrNoMem, IntrSpeculatable, Commutative]>; def int_nvvm_add_rp_ftz_f : GCCBuiltin<"__nvvm_add_rp_ftz_f">, - Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], - [IntrNoMem, Commutative]>; + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], + [IntrNoMem, IntrSpeculatable, Commutative]>; def int_nvvm_add_rp_f : GCCBuiltin<"__nvvm_add_rp_f">, - Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], - [IntrNoMem, Commutative]>; + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], + [IntrNoMem, IntrSpeculatable, Commutative]>; def int_nvvm_add_rn_d : GCCBuiltin<"__nvvm_add_rn_d">, - Intrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty], - [IntrNoMem, Commutative]>; + DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty], + [IntrNoMem, IntrSpeculatable, Commutative]>; def int_nvvm_add_rz_d : GCCBuiltin<"__nvvm_add_rz_d">, - Intrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty], - [IntrNoMem, Commutative]>; + DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty], + [IntrNoMem, IntrSpeculatable, Commutative]>; def int_nvvm_add_rm_d : GCCBuiltin<"__nvvm_add_rm_d">, - Intrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty], - [IntrNoMem, Commutative]>; + DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty], + [IntrNoMem, IntrSpeculatable, Commutative]>; def int_nvvm_add_rp_d : GCCBuiltin<"__nvvm_add_rp_d">, - Intrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty], - [IntrNoMem, Commutative]>; + DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty], + [IntrNoMem, IntrSpeculatable, Commutative]>; // // Convert // def int_nvvm_d2f_rn_ftz : GCCBuiltin<"__nvvm_d2f_rn_ftz">, - Intrinsic<[llvm_float_ty], [llvm_double_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>; def int_nvvm_d2f_rn : GCCBuiltin<"__nvvm_d2f_rn">, - Intrinsic<[llvm_float_ty], [llvm_double_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>; def int_nvvm_d2f_rz_ftz : GCCBuiltin<"__nvvm_d2f_rz_ftz">, - Intrinsic<[llvm_float_ty], [llvm_double_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>; def int_nvvm_d2f_rz : GCCBuiltin<"__nvvm_d2f_rz">, - Intrinsic<[llvm_float_ty], [llvm_double_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>; def int_nvvm_d2f_rm_ftz : GCCBuiltin<"__nvvm_d2f_rm_ftz">, - Intrinsic<[llvm_float_ty], [llvm_double_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>; def int_nvvm_d2f_rm : GCCBuiltin<"__nvvm_d2f_rm">, - Intrinsic<[llvm_float_ty], [llvm_double_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>; def int_nvvm_d2f_rp_ftz : GCCBuiltin<"__nvvm_d2f_rp_ftz">, - Intrinsic<[llvm_float_ty], [llvm_double_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>; def int_nvvm_d2f_rp : GCCBuiltin<"__nvvm_d2f_rp">, - Intrinsic<[llvm_float_ty], [llvm_double_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>; def int_nvvm_d2i_rn : GCCBuiltin<"__nvvm_d2i_rn">, - Intrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>; def int_nvvm_d2i_rz : GCCBuiltin<"__nvvm_d2i_rz">, - Intrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>; def int_nvvm_d2i_rm : GCCBuiltin<"__nvvm_d2i_rm">, - Intrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>; def int_nvvm_d2i_rp : GCCBuiltin<"__nvvm_d2i_rp">, - Intrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>; def int_nvvm_d2ui_rn : GCCBuiltin<"__nvvm_d2ui_rn">, - Intrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>; def int_nvvm_d2ui_rz : GCCBuiltin<"__nvvm_d2ui_rz">, - Intrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>; def int_nvvm_d2ui_rm : GCCBuiltin<"__nvvm_d2ui_rm">, - Intrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>; def int_nvvm_d2ui_rp : GCCBuiltin<"__nvvm_d2ui_rp">, - Intrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>; def int_nvvm_i2d_rn : GCCBuiltin<"__nvvm_i2d_rn">, - Intrinsic<[llvm_double_ty], [llvm_i32_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_i32_ty], [IntrNoMem, IntrSpeculatable]>; def int_nvvm_i2d_rz : GCCBuiltin<"__nvvm_i2d_rz">, - Intrinsic<[llvm_double_ty], [llvm_i32_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_i32_ty], [IntrNoMem, IntrSpeculatable]>; def int_nvvm_i2d_rm : GCCBuiltin<"__nvvm_i2d_rm">, - Intrinsic<[llvm_double_ty], [llvm_i32_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_i32_ty], [IntrNoMem, IntrSpeculatable]>; def int_nvvm_i2d_rp : GCCBuiltin<"__nvvm_i2d_rp">, - Intrinsic<[llvm_double_ty], [llvm_i32_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_i32_ty], [IntrNoMem, IntrSpeculatable]>; def int_nvvm_ui2d_rn : GCCBuiltin<"__nvvm_ui2d_rn">, - Intrinsic<[llvm_double_ty], [llvm_i32_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_i32_ty], [IntrNoMem, IntrSpeculatable]>; def int_nvvm_ui2d_rz : GCCBuiltin<"__nvvm_ui2d_rz">, - Intrinsic<[llvm_double_ty], [llvm_i32_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_i32_ty], [IntrNoMem, IntrSpeculatable]>; def int_nvvm_ui2d_rm : GCCBuiltin<"__nvvm_ui2d_rm">, - Intrinsic<[llvm_double_ty], [llvm_i32_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_i32_ty], [IntrNoMem, IntrSpeculatable]>; def int_nvvm_ui2d_rp : GCCBuiltin<"__nvvm_ui2d_rp">, - Intrinsic<[llvm_double_ty], [llvm_i32_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_i32_ty], [IntrNoMem, IntrSpeculatable]>; def int_nvvm_f2i_rn_ftz : GCCBuiltin<"__nvvm_f2i_rn_ftz">, - Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>; def int_nvvm_f2i_rn : GCCBuiltin<"__nvvm_f2i_rn">, - Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>; def int_nvvm_f2i_rz_ftz : GCCBuiltin<"__nvvm_f2i_rz_ftz">, - Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>; def int_nvvm_f2i_rz : GCCBuiltin<"__nvvm_f2i_rz">, - Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>; def int_nvvm_f2i_rm_ftz : GCCBuiltin<"__nvvm_f2i_rm_ftz">, - Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>; def int_nvvm_f2i_rm : GCCBuiltin<"__nvvm_f2i_rm">, - Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>; def int_nvvm_f2i_rp_ftz : GCCBuiltin<"__nvvm_f2i_rp_ftz">, - Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>; def int_nvvm_f2i_rp : GCCBuiltin<"__nvvm_f2i_rp">, - Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>; def int_nvvm_f2ui_rn_ftz : GCCBuiltin<"__nvvm_f2ui_rn_ftz">, - Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>; def int_nvvm_f2ui_rn : GCCBuiltin<"__nvvm_f2ui_rn">, - Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>; def int_nvvm_f2ui_rz_ftz : GCCBuiltin<"__nvvm_f2ui_rz_ftz">, - Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>; def int_nvvm_f2ui_rz : GCCBuiltin<"__nvvm_f2ui_rz">, - Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>; def int_nvvm_f2ui_rm_ftz : GCCBuiltin<"__nvvm_f2ui_rm_ftz">, - Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>; def int_nvvm_f2ui_rm : GCCBuiltin<"__nvvm_f2ui_rm">, - Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>; def int_nvvm_f2ui_rp_ftz : GCCBuiltin<"__nvvm_f2ui_rp_ftz">, - Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>; def int_nvvm_f2ui_rp : GCCBuiltin<"__nvvm_f2ui_rp">, - Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>; def int_nvvm_i2f_rn : GCCBuiltin<"__nvvm_i2f_rn">, - Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem, IntrSpeculatable]>; def int_nvvm_i2f_rz : GCCBuiltin<"__nvvm_i2f_rz">, - Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem, IntrSpeculatable]>; def int_nvvm_i2f_rm : GCCBuiltin<"__nvvm_i2f_rm">, - Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem, IntrSpeculatable]>; def int_nvvm_i2f_rp : GCCBuiltin<"__nvvm_i2f_rp">, - Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem, IntrSpeculatable]>; def int_nvvm_ui2f_rn : GCCBuiltin<"__nvvm_ui2f_rn">, - Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem, IntrSpeculatable]>; def int_nvvm_ui2f_rz : GCCBuiltin<"__nvvm_ui2f_rz">, - Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem, IntrSpeculatable]>; def int_nvvm_ui2f_rm : GCCBuiltin<"__nvvm_ui2f_rm">, - Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem, IntrSpeculatable]>; def int_nvvm_ui2f_rp : GCCBuiltin<"__nvvm_ui2f_rp">, - Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem, IntrSpeculatable]>; def int_nvvm_lohi_i2d : GCCBuiltin<"__nvvm_lohi_i2d">, - Intrinsic<[llvm_double_ty], [llvm_i32_ty, llvm_i32_ty], - [IntrNoMem, Commutative]>; + DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_i32_ty, llvm_i32_ty], + [IntrNoMem, IntrSpeculatable, Commutative]>; def int_nvvm_d2i_lo : GCCBuiltin<"__nvvm_d2i_lo">, - Intrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>; def int_nvvm_d2i_hi : GCCBuiltin<"__nvvm_d2i_hi">, - Intrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>; def int_nvvm_f2ll_rn_ftz : GCCBuiltin<"__nvvm_f2ll_rn_ftz">, - Intrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>; def int_nvvm_f2ll_rn : GCCBuiltin<"__nvvm_f2ll_rn">, - Intrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>; def int_nvvm_f2ll_rz_ftz : GCCBuiltin<"__nvvm_f2ll_rz_ftz">, - Intrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>; def int_nvvm_f2ll_rz : GCCBuiltin<"__nvvm_f2ll_rz">, - Intrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>; def int_nvvm_f2ll_rm_ftz : GCCBuiltin<"__nvvm_f2ll_rm_ftz">, - Intrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>; def int_nvvm_f2ll_rm : GCCBuiltin<"__nvvm_f2ll_rm">, - Intrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>; def int_nvvm_f2ll_rp_ftz : GCCBuiltin<"__nvvm_f2ll_rp_ftz">, - Intrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>; def int_nvvm_f2ll_rp : GCCBuiltin<"__nvvm_f2ll_rp">, - Intrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>; def int_nvvm_f2ull_rn_ftz : GCCBuiltin<"__nvvm_f2ull_rn_ftz">, - Intrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>; def int_nvvm_f2ull_rn : GCCBuiltin<"__nvvm_f2ull_rn">, - Intrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>; def int_nvvm_f2ull_rz_ftz : GCCBuiltin<"__nvvm_f2ull_rz_ftz">, - Intrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>; def int_nvvm_f2ull_rz : GCCBuiltin<"__nvvm_f2ull_rz">, - Intrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>; def int_nvvm_f2ull_rm_ftz : GCCBuiltin<"__nvvm_f2ull_rm_ftz">, - Intrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>; def int_nvvm_f2ull_rm : GCCBuiltin<"__nvvm_f2ull_rm">, - Intrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>; def int_nvvm_f2ull_rp_ftz : GCCBuiltin<"__nvvm_f2ull_rp_ftz">, - Intrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>; def int_nvvm_f2ull_rp : GCCBuiltin<"__nvvm_f2ull_rp">, - Intrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>; def int_nvvm_d2ll_rn : GCCBuiltin<"__nvvm_d2ll_rn">, - Intrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>; def int_nvvm_d2ll_rz : GCCBuiltin<"__nvvm_d2ll_rz">, - Intrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>; def int_nvvm_d2ll_rm : GCCBuiltin<"__nvvm_d2ll_rm">, - Intrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>; def int_nvvm_d2ll_rp : GCCBuiltin<"__nvvm_d2ll_rp">, - Intrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>; def int_nvvm_d2ull_rn : GCCBuiltin<"__nvvm_d2ull_rn">, - Intrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>; def int_nvvm_d2ull_rz : GCCBuiltin<"__nvvm_d2ull_rz">, - Intrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>; def int_nvvm_d2ull_rm : GCCBuiltin<"__nvvm_d2ull_rm">, - Intrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>; def int_nvvm_d2ull_rp : GCCBuiltin<"__nvvm_d2ull_rp">, - Intrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>; def int_nvvm_ll2f_rn : GCCBuiltin<"__nvvm_ll2f_rn">, - Intrinsic<[llvm_float_ty], [llvm_i64_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_i64_ty], [IntrNoMem, IntrSpeculatable]>; def int_nvvm_ll2f_rz : GCCBuiltin<"__nvvm_ll2f_rz">, - Intrinsic<[llvm_float_ty], [llvm_i64_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_i64_ty], [IntrNoMem, IntrSpeculatable]>; def int_nvvm_ll2f_rm : GCCBuiltin<"__nvvm_ll2f_rm">, - Intrinsic<[llvm_float_ty], [llvm_i64_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_i64_ty], [IntrNoMem, IntrSpeculatable]>; def int_nvvm_ll2f_rp : GCCBuiltin<"__nvvm_ll2f_rp">, - Intrinsic<[llvm_float_ty], [llvm_i64_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_i64_ty], [IntrNoMem, IntrSpeculatable]>; def int_nvvm_ull2f_rn : GCCBuiltin<"__nvvm_ull2f_rn">, - Intrinsic<[llvm_float_ty], [llvm_i64_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_i64_ty], [IntrNoMem, IntrSpeculatable]>; def int_nvvm_ull2f_rz : GCCBuiltin<"__nvvm_ull2f_rz">, - Intrinsic<[llvm_float_ty], [llvm_i64_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_i64_ty], [IntrNoMem, IntrSpeculatable]>; def int_nvvm_ull2f_rm : GCCBuiltin<"__nvvm_ull2f_rm">, - Intrinsic<[llvm_float_ty], [llvm_i64_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_i64_ty], [IntrNoMem, IntrSpeculatable]>; def int_nvvm_ull2f_rp : GCCBuiltin<"__nvvm_ull2f_rp">, - Intrinsic<[llvm_float_ty], [llvm_i64_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_i64_ty], [IntrNoMem, IntrSpeculatable]>; def int_nvvm_ll2d_rn : GCCBuiltin<"__nvvm_ll2d_rn">, - Intrinsic<[llvm_double_ty], [llvm_i64_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_i64_ty], [IntrNoMem, IntrSpeculatable]>; def int_nvvm_ll2d_rz : GCCBuiltin<"__nvvm_ll2d_rz">, - Intrinsic<[llvm_double_ty], [llvm_i64_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_i64_ty], [IntrNoMem, IntrSpeculatable]>; def int_nvvm_ll2d_rm : GCCBuiltin<"__nvvm_ll2d_rm">, - Intrinsic<[llvm_double_ty], [llvm_i64_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_i64_ty], [IntrNoMem, IntrSpeculatable]>; def int_nvvm_ll2d_rp : GCCBuiltin<"__nvvm_ll2d_rp">, - Intrinsic<[llvm_double_ty], [llvm_i64_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_i64_ty], [IntrNoMem, IntrSpeculatable]>; def int_nvvm_ull2d_rn : GCCBuiltin<"__nvvm_ull2d_rn">, - Intrinsic<[llvm_double_ty], [llvm_i64_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_i64_ty], [IntrNoMem, IntrSpeculatable]>; def int_nvvm_ull2d_rz : GCCBuiltin<"__nvvm_ull2d_rz">, - Intrinsic<[llvm_double_ty], [llvm_i64_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_i64_ty], [IntrNoMem, IntrSpeculatable]>; def int_nvvm_ull2d_rm : GCCBuiltin<"__nvvm_ull2d_rm">, - Intrinsic<[llvm_double_ty], [llvm_i64_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_i64_ty], [IntrNoMem, IntrSpeculatable]>; def int_nvvm_ull2d_rp : GCCBuiltin<"__nvvm_ull2d_rp">, - Intrinsic<[llvm_double_ty], [llvm_i64_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_i64_ty], [IntrNoMem, IntrSpeculatable]>; def int_nvvm_f2h_rn_ftz : GCCBuiltin<"__nvvm_f2h_rn_ftz">, - Intrinsic<[llvm_i16_ty], [llvm_float_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_i16_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>; def int_nvvm_f2h_rn : GCCBuiltin<"__nvvm_f2h_rn">, - Intrinsic<[llvm_i16_ty], [llvm_float_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_i16_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>; // // Bitcast // def int_nvvm_bitcast_f2i : GCCBuiltin<"__nvvm_bitcast_f2i">, - Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>; def int_nvvm_bitcast_i2f : GCCBuiltin<"__nvvm_bitcast_i2f">, - Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem, IntrSpeculatable]>; def int_nvvm_bitcast_ll2d : GCCBuiltin<"__nvvm_bitcast_ll2d">, - Intrinsic<[llvm_double_ty], [llvm_i64_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_i64_ty], [IntrNoMem, IntrSpeculatable]>; def int_nvvm_bitcast_d2ll : GCCBuiltin<"__nvvm_bitcast_d2ll">, - Intrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem]>; + DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>; // FNS def int_nvvm_fns : GCCBuiltin<"__nvvm_fns">, - Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; // Atomics not available as llvm intrinsics. @@ -1473,37 +1473,37 @@ def int_nvvm_ldg_global_p : Intrinsic<[llvm_anyptr_ty], // - This complements the llvm bitcast, which can be used to cast one type // of pointer to another type of pointer, while the address space remains // the same. -def int_nvvm_ptr_local_to_gen: Intrinsic<[llvm_anyptr_ty], - [llvm_anyptr_ty], [IntrNoMem], +def int_nvvm_ptr_local_to_gen: DefaultAttrsIntrinsic<[llvm_anyptr_ty], + [llvm_anyptr_ty], [IntrNoMem, IntrSpeculatable], "llvm.nvvm.ptr.local.to.gen">; -def int_nvvm_ptr_shared_to_gen: Intrinsic<[llvm_anyptr_ty], - [llvm_anyptr_ty], [IntrNoMem], +def int_nvvm_ptr_shared_to_gen: DefaultAttrsIntrinsic<[llvm_anyptr_ty], + [llvm_anyptr_ty], [IntrNoMem, IntrSpeculatable], "llvm.nvvm.ptr.shared.to.gen">; -def int_nvvm_ptr_global_to_gen: Intrinsic<[llvm_anyptr_ty], - [llvm_anyptr_ty], [IntrNoMem], +def int_nvvm_ptr_global_to_gen: DefaultAttrsIntrinsic<[llvm_anyptr_ty], + [llvm_anyptr_ty], [IntrNoMem, IntrSpeculatable], "llvm.nvvm.ptr.global.to.gen">; -def int_nvvm_ptr_constant_to_gen: Intrinsic<[llvm_anyptr_ty], - [llvm_anyptr_ty], [IntrNoMem], +def int_nvvm_ptr_constant_to_gen: DefaultAttrsIntrinsic<[llvm_anyptr_ty], + [llvm_anyptr_ty], [IntrNoMem, IntrSpeculatable], "llvm.nvvm.ptr.constant.to.gen">; -def int_nvvm_ptr_gen_to_global: Intrinsic<[llvm_anyptr_ty], - [llvm_anyptr_ty], [IntrNoMem], +def int_nvvm_ptr_gen_to_global: DefaultAttrsIntrinsic<[llvm_anyptr_ty], + [llvm_anyptr_ty], [IntrNoMem, IntrSpeculatable], "llvm.nvvm.ptr.gen.to.global">; -def int_nvvm_ptr_gen_to_shared: Intrinsic<[llvm_anyptr_ty], - [llvm_anyptr_ty], [IntrNoMem], +def int_nvvm_ptr_gen_to_shared: DefaultAttrsIntrinsic<[llvm_anyptr_ty], + [llvm_anyptr_ty], [IntrNoMem, IntrSpeculatable], "llvm.nvvm.ptr.gen.to.shared">; -def int_nvvm_ptr_gen_to_local: Intrinsic<[llvm_anyptr_ty], - [llvm_anyptr_ty], [IntrNoMem], +def int_nvvm_ptr_gen_to_local: DefaultAttrsIntrinsic<[llvm_anyptr_ty], + [llvm_anyptr_ty], [IntrNoMem, IntrSpeculatable], "llvm.nvvm.ptr.gen.to.local">; -def int_nvvm_ptr_gen_to_constant: Intrinsic<[llvm_anyptr_ty], - [llvm_anyptr_ty], [IntrNoMem], +def int_nvvm_ptr_gen_to_constant: DefaultAttrsIntrinsic<[llvm_anyptr_ty], + [llvm_anyptr_ty], [IntrNoMem, IntrSpeculatable], "llvm.nvvm.ptr.gen.to.constant">; // Used in nvvm internally to help address space opt and ptx code generation // This is for params that are passed to kernel functions by pointer by-val. def int_nvvm_ptr_gen_to_param: Intrinsic<[llvm_anyptr_ty], [llvm_anyptr_ty], - [IntrNoMem], + [IntrNoMem, IntrSpeculatable], "llvm.nvvm.ptr.gen.to.param">; // Move intrinsics, used in nvvm internally @@ -1541,149 +1541,149 @@ def int_nvvm_reflect : // isspacep.{const, global, local, shared} def int_nvvm_isspacep_const - : Intrinsic<[llvm_i1_ty], [llvm_ptr_ty], [IntrNoMem], + : DefaultAttrsIntrinsic<[llvm_i1_ty], [llvm_ptr_ty], [IntrNoMem, IntrSpeculatable], "llvm.nvvm.isspacep.const">, GCCBuiltin<"__nvvm_isspacep_const">; def int_nvvm_isspacep_global - : Intrinsic<[llvm_i1_ty], [llvm_ptr_ty], [IntrNoMem], + : DefaultAttrsIntrinsic<[llvm_i1_ty], [llvm_ptr_ty], [IntrNoMem, IntrSpeculatable], "llvm.nvvm.isspacep.global">, GCCBuiltin<"__nvvm_isspacep_global">; def int_nvvm_isspacep_local - : Intrinsic<[llvm_i1_ty], [llvm_ptr_ty], [IntrNoMem], + : DefaultAttrsIntrinsic<[llvm_i1_ty], [llvm_ptr_ty], [IntrNoMem, IntrSpeculatable], "llvm.nvvm.isspacep.local">, GCCBuiltin<"__nvvm_isspacep_local">; def int_nvvm_isspacep_shared - : Intrinsic<[llvm_i1_ty], [llvm_ptr_ty], [IntrNoMem], + : DefaultAttrsIntrinsic<[llvm_i1_ty], [llvm_ptr_ty], [IntrNoMem, IntrSpeculatable], "llvm.nvvm.isspacep.shared">, GCCBuiltin<"__nvvm_isspacep_shared">; // Environment register read def int_nvvm_read_ptx_sreg_envreg0 - : Intrinsic<[llvm_i32_ty], [], [IntrNoMem], + : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable], "llvm.nvvm.read.ptx.sreg.envreg0">, GCCBuiltin<"__nvvm_read_ptx_sreg_envreg0">; def int_nvvm_read_ptx_sreg_envreg1 - : Intrinsic<[llvm_i32_ty], [], [IntrNoMem], + : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable], "llvm.nvvm.read.ptx.sreg.envreg1">, GCCBuiltin<"__nvvm_read_ptx_sreg_envreg1">; def int_nvvm_read_ptx_sreg_envreg2 - : Intrinsic<[llvm_i32_ty], [], [IntrNoMem], + : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable], "llvm.nvvm.read.ptx.sreg.envreg2">, GCCBuiltin<"__nvvm_read_ptx_sreg_envreg2">; def int_nvvm_read_ptx_sreg_envreg3 - : Intrinsic<[llvm_i32_ty], [], [IntrNoMem], + : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable], "llvm.nvvm.read.ptx.sreg.envreg3">, GCCBuiltin<"__nvvm_read_ptx_sreg_envreg3">; def int_nvvm_read_ptx_sreg_envreg4 - : Intrinsic<[llvm_i32_ty], [], [IntrNoMem], + : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable], "llvm.nvvm.read.ptx.sreg.envreg4">, GCCBuiltin<"__nvvm_read_ptx_sreg_envreg4">; def int_nvvm_read_ptx_sreg_envreg5 - : Intrinsic<[llvm_i32_ty], [], [IntrNoMem], + : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable], "llvm.nvvm.read.ptx.sreg.envreg5">, GCCBuiltin<"__nvvm_read_ptx_sreg_envreg5">; def int_nvvm_read_ptx_sreg_envreg6 - : Intrinsic<[llvm_i32_ty], [], [IntrNoMem], + : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable], "llvm.nvvm.read.ptx.sreg.envreg6">, GCCBuiltin<"__nvvm_read_ptx_sreg_envreg6">; def int_nvvm_read_ptx_sreg_envreg7 - : Intrinsic<[llvm_i32_ty], [], [IntrNoMem], + : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable], "llvm.nvvm.read.ptx.sreg.envreg7">, GCCBuiltin<"__nvvm_read_ptx_sreg_envreg7">; def int_nvvm_read_ptx_sreg_envreg8 - : Intrinsic<[llvm_i32_ty], [], [IntrNoMem], + : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable], "llvm.nvvm.read.ptx.sreg.envreg8">, GCCBuiltin<"__nvvm_read_ptx_sreg_envreg8">; def int_nvvm_read_ptx_sreg_envreg9 - : Intrinsic<[llvm_i32_ty], [], [IntrNoMem], + : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable], "llvm.nvvm.read.ptx.sreg.envreg9">, GCCBuiltin<"__nvvm_read_ptx_sreg_envreg9">; def int_nvvm_read_ptx_sreg_envreg10 - : Intrinsic<[llvm_i32_ty], [], [IntrNoMem], + : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable], "llvm.nvvm.read.ptx.sreg.envreg10">, GCCBuiltin<"__nvvm_read_ptx_sreg_envreg10">; def int_nvvm_read_ptx_sreg_envreg11 - : Intrinsic<[llvm_i32_ty], [], [IntrNoMem], + : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable], "llvm.nvvm.read.ptx.sreg.envreg11">, GCCBuiltin<"__nvvm_read_ptx_sreg_envreg11">; def int_nvvm_read_ptx_sreg_envreg12 - : Intrinsic<[llvm_i32_ty], [], [IntrNoMem], + : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable], "llvm.nvvm.read.ptx.sreg.envreg12">, GCCBuiltin<"__nvvm_read_ptx_sreg_envreg12">; def int_nvvm_read_ptx_sreg_envreg13 - : Intrinsic<[llvm_i32_ty], [], [IntrNoMem], + : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable], "llvm.nvvm.read.ptx.sreg.envreg13">, GCCBuiltin<"__nvvm_read_ptx_sreg_envreg13">; def int_nvvm_read_ptx_sreg_envreg14 - : Intrinsic<[llvm_i32_ty], [], [IntrNoMem], + : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable], "llvm.nvvm.read.ptx.sreg.envreg14">, GCCBuiltin<"__nvvm_read_ptx_sreg_envreg14">; def int_nvvm_read_ptx_sreg_envreg15 - : Intrinsic<[llvm_i32_ty], [], [IntrNoMem], + : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable], "llvm.nvvm.read.ptx.sreg.envreg15">, GCCBuiltin<"__nvvm_read_ptx_sreg_envreg15">; def int_nvvm_read_ptx_sreg_envreg16 - : Intrinsic<[llvm_i32_ty], [], [IntrNoMem], + : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable], "llvm.nvvm.read.ptx.sreg.envreg16">, GCCBuiltin<"__nvvm_read_ptx_sreg_envreg16">; def int_nvvm_read_ptx_sreg_envreg17 - : Intrinsic<[llvm_i32_ty], [], [IntrNoMem], + : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable], "llvm.nvvm.read.ptx.sreg.envreg17">, GCCBuiltin<"__nvvm_read_ptx_sreg_envreg17">; def int_nvvm_read_ptx_sreg_envreg18 - : Intrinsic<[llvm_i32_ty], [], [IntrNoMem], + : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable], "llvm.nvvm.read.ptx.sreg.envreg18">, GCCBuiltin<"__nvvm_read_ptx_sreg_envreg18">; def int_nvvm_read_ptx_sreg_envreg19 - : Intrinsic<[llvm_i32_ty], [], [IntrNoMem], + : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable], "llvm.nvvm.read.ptx.sreg.envreg19">, GCCBuiltin<"__nvvm_read_ptx_sreg_envreg19">; def int_nvvm_read_ptx_sreg_envreg20 - : Intrinsic<[llvm_i32_ty], [], [IntrNoMem], + : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable], "llvm.nvvm.read.ptx.sreg.envreg20">, GCCBuiltin<"__nvvm_read_ptx_sreg_envreg20">; def int_nvvm_read_ptx_sreg_envreg21 - : Intrinsic<[llvm_i32_ty], [], [IntrNoMem], + : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable], "llvm.nvvm.read.ptx.sreg.envreg21">, GCCBuiltin<"__nvvm_read_ptx_sreg_envreg21">; def int_nvvm_read_ptx_sreg_envreg22 - : Intrinsic<[llvm_i32_ty], [], [IntrNoMem], + : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable], "llvm.nvvm.read.ptx.sreg.envreg22">, GCCBuiltin<"__nvvm_read_ptx_sreg_envreg22">; def int_nvvm_read_ptx_sreg_envreg23 - : Intrinsic<[llvm_i32_ty], [], [IntrNoMem], + : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable], "llvm.nvvm.read.ptx.sreg.envreg23">, GCCBuiltin<"__nvvm_read_ptx_sreg_envreg23">; def int_nvvm_read_ptx_sreg_envreg24 - : Intrinsic<[llvm_i32_ty], [], [IntrNoMem], + : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable], "llvm.nvvm.read.ptx.sreg.envreg24">, GCCBuiltin<"__nvvm_read_ptx_sreg_envreg24">; def int_nvvm_read_ptx_sreg_envreg25 - : Intrinsic<[llvm_i32_ty], [], [IntrNoMem], + : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable], "llvm.nvvm.read.ptx.sreg.envreg25">, GCCBuiltin<"__nvvm_read_ptx_sreg_envreg25">; def int_nvvm_read_ptx_sreg_envreg26 - : Intrinsic<[llvm_i32_ty], [], [IntrNoMem], + : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable], "llvm.nvvm.read.ptx.sreg.envreg26">, GCCBuiltin<"__nvvm_read_ptx_sreg_envreg26">; def int_nvvm_read_ptx_sreg_envreg27 - : Intrinsic<[llvm_i32_ty], [], [IntrNoMem], + : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable], "llvm.nvvm.read.ptx.sreg.envreg27">, GCCBuiltin<"__nvvm_read_ptx_sreg_envreg27">; def int_nvvm_read_ptx_sreg_envreg28 - : Intrinsic<[llvm_i32_ty], [], [IntrNoMem], + : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable], "llvm.nvvm.read.ptx.sreg.envreg28">, GCCBuiltin<"__nvvm_read_ptx_sreg_envreg28">; def int_nvvm_read_ptx_sreg_envreg29 - : Intrinsic<[llvm_i32_ty], [], [IntrNoMem], + : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable], "llvm.nvvm.read.ptx.sreg.envreg29">, GCCBuiltin<"__nvvm_read_ptx_sreg_envreg29">; def int_nvvm_read_ptx_sreg_envreg30 - : Intrinsic<[llvm_i32_ty], [], [IntrNoMem], + : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable], "llvm.nvvm.read.ptx.sreg.envreg30">, GCCBuiltin<"__nvvm_read_ptx_sreg_envreg30">; def int_nvvm_read_ptx_sreg_envreg31 - : Intrinsic<[llvm_i32_ty], [], [IntrNoMem], + : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable], "llvm.nvvm.read.ptx.sreg.envreg31">, GCCBuiltin<"__nvvm_read_ptx_sreg_envreg31">; @@ -4288,49 +4288,49 @@ def int_nvvm_sust_p_3d_v4i32_trap def int_nvvm_rotate_b32 - : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], - [IntrNoMem], "llvm.nvvm.rotate.b32">, + : DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], + [IntrNoMem, IntrSpeculatable], "llvm.nvvm.rotate.b32">, GCCBuiltin<"__nvvm_rotate_b32">; def int_nvvm_rotate_b64 - :Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i32_ty], - [IntrNoMem], "llvm.nvvm.rotate.b64">, + : DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i32_ty], + [IntrNoMem, IntrSpeculatable], "llvm.nvvm.rotate.b64">, GCCBuiltin<"__nvvm_rotate_b64">; def int_nvvm_rotate_right_b64 - : Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i32_ty], - [IntrNoMem], "llvm.nvvm.rotate.right.b64">, + : DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i32_ty], + [IntrNoMem, IntrSpeculatable], "llvm.nvvm.rotate.right.b64">, GCCBuiltin<"__nvvm_rotate_right_b64">; def int_nvvm_swap_lo_hi_b64 - : Intrinsic<[llvm_i64_ty], [llvm_i64_ty], - [IntrNoMem], "llvm.nvvm.swap.lo.hi.b64">, + : DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_i64_ty], + [IntrNoMem, IntrSpeculatable], "llvm.nvvm.swap.lo.hi.b64">, GCCBuiltin<"__nvvm_swap_lo_hi_b64">; // Accessing special registers. multiclass PTXReadSRegIntrinsic_v4i32 { // FIXME: Do we need the 128-bit integer type version? -// def _r64 : Intrinsic<[llvm_i128_ty], [], [IntrNoMem]>; +// def _r64 : Intrinsic<[llvm_i128_ty], [], [IntrNoMem, IntrSpeculatable]>; // FIXME: Enable this once v4i32 support is enabled in back-end. -// def _v4i16 : Intrinsic<[llvm_v4i32_ty], [], [IntrNoMem]>; +// def _v4i16 : Intrinsic<[llvm_v4i32_ty], [], [IntrNoMem, IntrSpeculatable]>; - def _x : Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>, + def _x : Intrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable]>, GCCBuiltin<"__nvvm_read_ptx_sreg_" # regname # "_x">; - def _y : Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>, + def _y : Intrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable]>, GCCBuiltin<"__nvvm_read_ptx_sreg_" # regname # "_y">; - def _z : Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>, + def _z : Intrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable]>, GCCBuiltin<"__nvvm_read_ptx_sreg_" # regname # "_z">; - def _w : Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>, + def _w : Intrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable]>, GCCBuiltin<"__nvvm_read_ptx_sreg_" # regname # "_w">; } class PTXReadSRegIntrinsic_r32 - : Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>, + : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable]>, GCCBuiltin<"__nvvm_read_ptx_sreg_" # name>; class PTXReadSRegIntrinsic_r64 - : Intrinsic<[llvm_i64_ty], [], [IntrNoMem]>, + : DefaultAttrsIntrinsic<[llvm_i64_ty], [], [IntrNoMem, IntrSpeculatable]>, GCCBuiltin<"__nvvm_read_ptx_sreg_" # name>; // Intrinsics to read registers with non-constant values. E.g. the values that diff --git a/llvm/include/llvm/IR/IntrinsicsPowerPC.td b/llvm/include/llvm/IR/IntrinsicsPowerPC.td index e60bad8fd9f0c..8290342c0d51a 100644 --- a/llvm/include/llvm/IR/IntrinsicsPowerPC.td +++ b/llvm/include/llvm/IR/IntrinsicsPowerPC.td @@ -31,10 +31,12 @@ let TargetPrefix = "ppc" in { // All intrinsics start with "llvm.ppc.". // Get content from current FPSCR register def int_ppc_readflm : GCCBuiltin<"__builtin_readflm">, - Intrinsic<[llvm_double_ty], [], [IntrNoMem]>; + Intrinsic<[llvm_double_ty], [], + [IntrNoMerge, IntrHasSideEffects]>; // Set FPSCR register, and return previous content def int_ppc_setflm : GCCBuiltin<"__builtin_setflm">, - Intrinsic<[llvm_double_ty], [llvm_double_ty], []>; + Intrinsic<[llvm_double_ty], [llvm_double_ty], + [IntrHasSideEffects]>; // Intrinsics for [double]word extended forms of divide instructions def int_ppc_divwe : GCCBuiltin<"__builtin_divwe">, @@ -50,6 +52,15 @@ let TargetPrefix = "ppc" in { // All intrinsics start with "llvm.ppc.". Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty], [IntrNoMem]>; + def int_ppc_unpack_longdouble : GCCBuiltin<"__builtin_unpack_longdouble">, + Intrinsic<[llvm_double_ty], + [llvm_ppcf128_ty, llvm_i32_ty], + [IntrNoMem]>; + def int_ppc_pack_longdouble : GCCBuiltin<"__builtin_pack_longdouble">, + Intrinsic<[llvm_ppcf128_ty], + [llvm_double_ty, llvm_double_ty], + [IntrNoMem]>; + // Generate a random number def int_ppc_darn : GCCBuiltin<"__builtin_darn">, Intrinsic<[llvm_i64_ty], [], [IntrNoMem]>; @@ -1744,6 +1755,11 @@ let TargetPrefix = "ppc" in { def int_ppc_test_data_class_f : Intrinsic<[llvm_i32_ty], [llvm_float_ty, llvm_i32_ty], [IntrNoMem, ImmArg>]>; + + def int_ppc_convert_f128_to_ppcf128 + : Intrinsic<[llvm_ppcf128_ty], [llvm_f128_ty], [IntrNoMem]>; + def int_ppc_convert_ppcf128_to_f128 + : Intrinsic<[llvm_f128_ty], [llvm_ppcf128_ty], [IntrNoMem]>; } //===----------------------------------------------------------------------===// diff --git a/llvm/include/llvm/IR/IntrinsicsRISCV.td b/llvm/include/llvm/IR/IntrinsicsRISCV.td index 3dad1581e1fb1..3ceb347e97bff 100644 --- a/llvm/include/llvm/IR/IntrinsicsRISCV.td +++ b/llvm/include/llvm/IR/IntrinsicsRISCV.td @@ -1194,16 +1194,16 @@ let TargetPrefix = "riscv" in { def int_riscv_vmand: RISCVBinaryAAANoMask; def int_riscv_vmnand: RISCVBinaryAAANoMask; - def int_riscv_vmandnot: RISCVBinaryAAANoMask; + def int_riscv_vmandn: RISCVBinaryAAANoMask; def int_riscv_vmxor: RISCVBinaryAAANoMask; def int_riscv_vmor: RISCVBinaryAAANoMask; def int_riscv_vmnor: RISCVBinaryAAANoMask; - def int_riscv_vmornot: RISCVBinaryAAANoMask; + def int_riscv_vmorn: RISCVBinaryAAANoMask; def int_riscv_vmxnor: RISCVBinaryAAANoMask; def int_riscv_vmclr : RISCVNullaryIntrinsic; def int_riscv_vmset : RISCVNullaryIntrinsic; - defm vpopc : RISCVMaskUnarySOut; + defm vcpop : RISCVMaskUnarySOut; defm vfirst : RISCVMaskUnarySOut; defm vmsbf : RISCVMaskUnaryMOut; defm vmsof : RISCVMaskUnaryMOut; diff --git a/llvm/include/llvm/IR/PassManager.h b/llvm/include/llvm/IR/PassManager.h index d4f0bb331ea8f..e88d2233daba8 100644 --- a/llvm/include/llvm/IR/PassManager.h +++ b/llvm/include/llvm/IR/PassManager.h @@ -1204,8 +1204,9 @@ class ModuleToFunctionPassAdaptor public: using PassConceptT = detail::PassConcept; - explicit ModuleToFunctionPassAdaptor(std::unique_ptr Pass) - : Pass(std::move(Pass)) {} + explicit ModuleToFunctionPassAdaptor(std::unique_ptr Pass, + bool EagerlyInvalidate) + : Pass(std::move(Pass)), EagerlyInvalidate(EagerlyInvalidate) {} /// Runs the function pass across every function in the module. PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); @@ -1216,13 +1217,15 @@ class ModuleToFunctionPassAdaptor private: std::unique_ptr Pass; + bool EagerlyInvalidate; }; /// A function to deduce a function pass type and wrap it in the /// templated adaptor. template ModuleToFunctionPassAdaptor -createModuleToFunctionPassAdaptor(FunctionPassT &&Pass) { +createModuleToFunctionPassAdaptor(FunctionPassT &&Pass, + bool EagerlyInvalidate = false) { using PassModelT = detail::PassModel; @@ -1230,7 +1233,8 @@ createModuleToFunctionPassAdaptor(FunctionPassT &&Pass) { // causing terrible compile times. return ModuleToFunctionPassAdaptor( std::unique_ptr( - new PassModelT(std::forward(Pass)))); + new PassModelT(std::forward(Pass))), + EagerlyInvalidate); } /// A utility pass template to force an analysis result to be available. diff --git a/llvm/include/llvm/IR/RuntimeLibcalls.def b/llvm/include/llvm/IR/RuntimeLibcalls.def index 5f295660557ad..62d67308114f8 100644 --- a/llvm/include/llvm/IR/RuntimeLibcalls.def +++ b/llvm/include/llvm/IR/RuntimeLibcalls.def @@ -376,6 +376,8 @@ HANDLE_LIBCALL(UINTTOFP_I128_F64, "__floatuntidf") HANDLE_LIBCALL(UINTTOFP_I128_F80, "__floatuntixf") HANDLE_LIBCALL(UINTTOFP_I128_F128, "__floatuntitf") HANDLE_LIBCALL(UINTTOFP_I128_PPCF128, "__floatuntitf") +HANDLE_LIBCALL(CONVERT_F128_PPCF128, "__extendkftf2") +HANDLE_LIBCALL(CONVERT_PPCF128_F128, "__trunctfkf2") // Comparison HANDLE_LIBCALL(OEQ_F32, "__eqsf2") diff --git a/llvm/include/llvm/LTO/LTO.h b/llvm/include/llvm/LTO/LTO.h index d87cb76c422aa..d2b0fef1ca47b 100644 --- a/llvm/include/llvm/LTO/LTO.h +++ b/llvm/include/llvm/LTO/LTO.h @@ -194,7 +194,7 @@ class InputFile { using ThinBackend = std::function( const Config &C, ModuleSummaryIndex &CombinedIndex, StringMap &ModuleToDefinedGVSummaries, - AddStreamFn AddStream, NativeObjectCache Cache)>; + AddStreamFn AddStream, FileCache Cache)>; /// This ThinBackend runs the individual backend jobs in-process. /// The default value means to use one job per hardware core (not hyper-thread). @@ -267,7 +267,7 @@ class LTO { /// /// The client will receive at most one callback (via either AddStream or /// Cache) for each task identifier. - Error run(AddStreamFn AddStream, NativeObjectCache Cache = nullptr); + Error run(AddStreamFn AddStream, FileCache Cache = nullptr); /// Static method that returns a list of libcall symbols that can be generated /// by LTO but might not be visible from bitcode symbol table. @@ -399,7 +399,7 @@ class LTO { const SymbolResolution *&ResI, const SymbolResolution *ResE); Error runRegularLTO(AddStreamFn AddStream); - Error runThinLTO(AddStreamFn AddStream, NativeObjectCache Cache, + Error runThinLTO(AddStreamFn AddStream, FileCache Cache, const DenseSet &GUIDPreservedSymbols); Error checkPartiallySplit(); diff --git a/llvm/include/llvm/LinkAllIR.h b/llvm/include/llvm/LinkAllIR.h index 4b0aabeee7017..ceed784d557de 100644 --- a/llvm/include/llvm/LinkAllIR.h +++ b/llvm/include/llvm/LinkAllIR.h @@ -38,6 +38,9 @@ namespace { // delete it all as dead code, even with whole program optimization, // yet is effectively a NO-OP. As the compiler isn't smart enough // to know that getenv() never returns -1, this will do the job. + // This is so that globals in the translation units where these functions + // are defined are forced to be initialized, populating various + // registries. if (std::getenv("bar") != (char*) -1) return; llvm::LLVMContext Context; diff --git a/llvm/include/llvm/LinkAllPasses.h b/llvm/include/llvm/LinkAllPasses.h index 5707a4d8abe1d..c8b9aaeed76a5 100644 --- a/llvm/include/llvm/LinkAllPasses.h +++ b/llvm/include/llvm/LinkAllPasses.h @@ -37,9 +37,6 @@ #include "llvm/CodeGen/Passes.h" #include "llvm/IR/Function.h" #include "llvm/IR/IRPrintingPasses.h" -#include "llvm/SYCLLowerIR/LowerESIMD.h" -#include "llvm/SYCLLowerIR/LowerWGLocalMemory.h" -#include "llvm/SYCLLowerIR/LowerWGScope.h" #include "llvm/Support/Valgrind.h" #include "llvm/Transforms/AggressiveInstCombine/AggressiveInstCombine.h" #include "llvm/Transforms/IPO.h" @@ -49,7 +46,6 @@ #include "llvm/Transforms/InstCombine/InstCombine.h" #include "llvm/Transforms/Instrumentation.h" #include "llvm/Transforms/Instrumentation/BoundsChecking.h" -#include "llvm/Transforms/Instrumentation/SPIRITTAnnotations.h" #include "llvm/Transforms/ObjCARC.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Scalar/GVN.h" @@ -68,6 +64,9 @@ namespace { // delete it all as dead code, even with whole program optimization, // yet is effectively a NO-OP. As the compiler isn't smart enough // to know that getenv() never returns -1, this will do the job. + // This is so that globals in the translation units where these functions + // are defined are forced to be initialized, populating various + // registries. if (std::getenv("bar") != (char*) -1) return; @@ -96,7 +95,6 @@ namespace { (void) llvm::createControlHeightReductionLegacyPass(); (void) llvm::createCostModelAnalysisPass(); (void) llvm::createDeadArgEliminationPass(); - (void) llvm::createDeadArgEliminationSYCLPass(); (void) llvm::createDeadCodeEliminationPass(); (void) llvm::createDeadStoreEliminationPass(); (void) llvm::createDependenceAnalysisWrapperPass(); @@ -204,12 +202,6 @@ namespace { (void) llvm::createMergeICmpsLegacyPass(); (void) llvm::createExpandMemCmpPass(); (void) llvm::createExpandVectorPredicationPass(); - (void)llvm::createSYCLLowerWGScopePass(); - (void)llvm::createSYCLLowerESIMDPass(); - (void)llvm::createESIMDLowerLoadStorePass(); - (void)llvm::createESIMDLowerVecArgPass(); - (void)llvm::createSPIRITTAnnotationsLegacyPass(); - (void)llvm::createSYCLLowerWGLocalMemoryLegacyPass(); std::string buf; llvm::raw_string_ostream os(buf); (void) llvm::createPrintModulePass(os); diff --git a/llvm/include/llvm/MC/MCInstrAnalysis.h b/llvm/include/llvm/MC/MCInstrAnalysis.h index 68506d1fced45..632a7d8f820e4 100644 --- a/llvm/include/llvm/MC/MCInstrAnalysis.h +++ b/llvm/include/llvm/MC/MCInstrAnalysis.h @@ -158,6 +158,11 @@ class MCInstrAnalysis { evaluateMemoryOperandAddress(const MCInst &Inst, const MCSubtargetInfo *STI, uint64_t Addr, uint64_t Size) const; + /// Given an instruction with a memory operand that could require relocation, + /// returns the offset within the instruction of that relocation. + virtual Optional + getMemoryOperandRelocationOffset(const MCInst &Inst, uint64_t Size) const; + /// Returns (PLT virtual address, GOT virtual address) pairs for PLT entries. virtual std::vector> findPltEntries(uint64_t PltSectionVA, ArrayRef PltContents, diff --git a/llvm/include/llvm/Object/ELF.h b/llvm/include/llvm/Object/ELF.h index b7af7795b8c9f..37f23c435ae14 100644 --- a/llvm/include/llvm/Object/ELF.h +++ b/llvm/include/llvm/Object/ELF.h @@ -388,8 +388,7 @@ class ELFFile { Expected> getSectionContentsAsArray(const Elf_Shdr &Sec) const; Expected> getSectionContents(const Elf_Shdr &Sec) const; Expected> getSegmentContents(const Elf_Phdr &Phdr) const; - Expected> - decodeBBAddrMap(const Elf_Shdr &Sec) const; + Expected> decodeBBAddrMap(const Elf_Shdr &Sec) const; }; using ELF32LEFile = ELFFile; diff --git a/llvm/include/llvm/Object/ELFObjectFile.h b/llvm/include/llvm/Object/ELFObjectFile.h index 8b45342b4530a..716b94d92d032 100644 --- a/llvm/include/llvm/Object/ELFObjectFile.h +++ b/llvm/include/llvm/Object/ELFObjectFile.h @@ -733,7 +733,8 @@ Expected ELFObjectFile::getSymbolFlags(DataRefImpl Sym) const { } else if (EF.getHeader().e_machine == ELF::EM_ARM) { if (Expected NameOrErr = getSymbolName(Sym)) { StringRef Name = *NameOrErr; - if (Name.startswith("$d") || Name.startswith("$t") || + // TODO Investigate why empty name symbols need to be marked. + if (Name.empty() || Name.startswith("$d") || Name.startswith("$t") || Name.startswith("$a")) Result |= SymbolRef::SF_FormatSpecific; } else { diff --git a/llvm/include/llvm/Object/ELFTypes.h b/llvm/include/llvm/Object/ELFTypes.h index 54ebd751d8d22..e59a63d939890 100644 --- a/llvm/include/llvm/Object/ELFTypes.h +++ b/llvm/include/llvm/Object/ELFTypes.h @@ -44,7 +44,6 @@ template struct Elf_Nhdr_Impl; template class Elf_Note_Impl; template class Elf_Note_Iterator_Impl; template struct Elf_CGProfile_Impl; -template struct Elf_BBAddrMap_Impl; template struct ELFType { private: @@ -76,7 +75,6 @@ template struct ELFType { using Note = Elf_Note_Impl>; using NoteIterator = Elf_Note_Iterator_Impl>; using CGProfile = Elf_CGProfile_Impl>; - using BBAddrMap = Elf_BBAddrMap_Impl>; using DynRange = ArrayRef; using ShdrRange = ArrayRef; using SymRange = ArrayRef; @@ -131,7 +129,6 @@ using ELF64BE = ELFType; using Elf_Note = typename ELFT::Note; \ using Elf_Note_Iterator = typename ELFT::NoteIterator; \ using Elf_CGProfile = typename ELFT::CGProfile; \ - using Elf_BBAddrMap = typename ELFT::BBAddrMap; \ using Elf_Dyn_Range = typename ELFT::DynRange; \ using Elf_Shdr_Range = typename ELFT::ShdrRange; \ using Elf_Sym_Range = typename ELFT::SymRange; \ @@ -797,9 +794,8 @@ template struct Elf_Mips_ABIFlags { }; // Struct representing the BBAddrMap for one function. -template struct Elf_BBAddrMap_Impl { - LLVM_ELF_IMPORT_TYPES_ELFT(ELFT) - uintX_t Addr; // Function address +struct BBAddrMap { + uint64_t Addr; // Function address // Struct representing the BBAddrMap information for one basic block. struct BBEntry { uint32_t Offset; // Offset of basic block relative to function start. diff --git a/llvm/include/llvm/Object/MachO.h b/llvm/include/llvm/Object/MachO.h index d2ad12e98deb8..ca5d63e4074f0 100644 --- a/llvm/include/llvm/Object/MachO.h +++ b/llvm/include/llvm/Object/MachO.h @@ -311,6 +311,9 @@ class MachOObjectFile : public ObjectFile { bool isSectionBitcode(DataRefImpl Sec) const override; bool isDebugSection(DataRefImpl Sec) const override; + /// Return the raw contents of an entire segment. + ArrayRef getSegmentContents(StringRef SegmentName) const; + /// When dsymutil generates the companion file, it strips all unnecessary /// sections (e.g. everything in the _TEXT segment) by omitting their body /// and setting the offset in their corresponding load command to zero. diff --git a/llvm/include/llvm/Object/Wasm.h b/llvm/include/llvm/Object/Wasm.h index 69ae0dc126d1a..e4802c087b8b4 100644 --- a/llvm/include/llvm/Object/Wasm.h +++ b/llvm/include/llvm/Object/Wasm.h @@ -9,7 +9,7 @@ // This file declares the WasmObjectFile class, which implements the ObjectFile // interface for Wasm files. // -// See: https://github.com/WebAssembly/design/blob/master/BinaryEncoding.md +// See: https://github.com/WebAssembly/design/blob/main/BinaryEncoding.md // //===----------------------------------------------------------------------===// diff --git a/llvm/include/llvm/ObjectYAML/MachOYAML.h b/llvm/include/llvm/ObjectYAML/MachOYAML.h index 5d1d3ee235948..ee89f4eac61f5 100644 --- a/llvm/include/llvm/ObjectYAML/MachOYAML.h +++ b/llvm/include/llvm/ObjectYAML/MachOYAML.h @@ -131,6 +131,7 @@ struct Object { std::vector LoadCommands; std::vector
Sections; LinkEditData LinkEdit; + Optional RawLinkEditSegment; DWARFYAML::Data DWARF; }; diff --git a/llvm/include/llvm/ObjectYAML/XCOFFYAML.h b/llvm/include/llvm/ObjectYAML/XCOFFYAML.h index 20d072c1991fc..4f07e24586224 100644 --- a/llvm/include/llvm/ObjectYAML/XCOFFYAML.h +++ b/llvm/include/llvm/ObjectYAML/XCOFFYAML.h @@ -24,7 +24,7 @@ struct FileHeader { uint16_t NumberOfSections; int32_t TimeStamp; llvm::yaml::Hex64 SymbolTableOffset; - uint32_t NumberOfSymTableEntries; + int32_t NumberOfSymTableEntries; uint16_t AuxHeaderSize; llvm::yaml::Hex16 Flags; }; diff --git a/llvm/include/llvm/Passes/PassBuilder.h b/llvm/include/llvm/Passes/PassBuilder.h index 943ad316b082e..4cbda10d63bba 100644 --- a/llvm/include/llvm/Passes/PassBuilder.h +++ b/llvm/include/llvm/Passes/PassBuilder.h @@ -73,6 +73,15 @@ class PipelineTuningOptions { /// Tuning option to enable/disable function merging. Its default value is /// false. bool MergeFunctions; + + // Experimental option to eagerly invalidate more analyses. This has the + // potential to decrease max memory usage in exchange for more compile time. + // This may affect codegen due to either passes using analyses only when + // cached, or invalidating and recalculating an analysis that was + // stale/imprecise but still valid. Currently this invalidates all function + // analyses after various module->function or cgscc->function adaptors in the + // default pipelines. + bool EagerlyInvalidateAnalyses; }; /// This class provides access to building LLVM's passes. diff --git a/llvm/include/llvm/Passes/StandardInstrumentations.h b/llvm/include/llvm/Passes/StandardInstrumentations.h index b89a151f88c0b..6cab4ce7d138b 100644 --- a/llvm/include/llvm/Passes/StandardInstrumentations.h +++ b/llvm/include/llvm/Passes/StandardInstrumentations.h @@ -333,7 +333,16 @@ class EmptyData { // The data saved for comparing functions. template -class FuncDataT : public OrderedChangedData> {}; +class FuncDataT : public OrderedChangedData> { +public: + FuncDataT(std::string S) : EntryBlockName(S) {} + + // Return the name of the entry block + std::string getEntryBlockName() const { return EntryBlockName; } + +protected: + std::string EntryBlockName; +}; // The data saved for comparing IRs. template @@ -350,7 +359,6 @@ template class IRComparer { // Compare the 2 IRs. \p handleFunctionCompare is called to handle the // compare of a function. When \p InModule is set, // this function is being handled as part of comparing a module. - void compare( bool CompareModule, std::function::const_iterator begin() const { + return Successors.begin(); + } + StringMap::const_iterator end() const { + return Successors.end(); + } + + // Return the label of the basic block reached on a transition on \p S. + const StringRef getSuccessorLabel(StringRef S) const { + assert(Successors.count(S) == 1 && "Expected to find successor."); + return Successors.find(S)->getValue(); + } + +protected: + // Add a transition to \p Succ on \p Label + void addSuccessorLabel(StringRef Succ, StringRef Label) { + std::pair SS{Succ.str(), Label.str()}; + Successors.insert(SS); + } + + StringMap Successors; +}; + +// A change reporter that builds a website with links to pdf files showing +// dot control flow graphs with changed instructions shown in colour. +class DotCfgChangeReporter : public ChangeReporter> { +public: + DotCfgChangeReporter(bool Verbose); + ~DotCfgChangeReporter() override; + void registerCallbacks(PassInstrumentationCallbacks &PIC); + +protected: + // Initialize the HTML file and output the header. + bool initializeHTML(); + + // Called on the first IR processed. + void handleInitialIR(Any IR) override; + // Called before and after a pass to get the representation of the IR. + void generateIRRepresentation(Any IR, StringRef PassID, + IRDataT &Output) override; + // Called when the pass is not iteresting. + void omitAfter(StringRef PassID, std::string &Name) override; + // Called when an interesting IR has changed. + void handleAfter(StringRef PassID, std::string &Name, + const IRDataT &Before, const IRDataT &After, + Any) override; + // Called when an interesting pass is invalidated. + void handleInvalidated(StringRef PassID) override; + // Called when the IR or pass is not interesting. + void handleFiltered(StringRef PassID, std::string &Name) override; + // Called when an ignored pass is encountered. + void handleIgnored(StringRef PassID, std::string &Name) override; + + // Generate the pdf file into \p Dir / \p PDFFileName using \p DotFile as + // input and return the html tag with \Text as the content. + static std::string genHTML(StringRef Text, StringRef DotFile, + StringRef PDFFileName); + + void handleFunctionCompare(StringRef Name, StringRef Prefix, StringRef PassID, + StringRef Divider, bool InModule, unsigned Minor, + const FuncDataT &Before, + const FuncDataT &After); + + unsigned N = 0; + std::unique_ptr HTML; +}; + /// This class provides an interface to register all the standard pass /// instrumentations and manages their state (if any). class StandardInstrumentations { @@ -420,6 +503,7 @@ class StandardInstrumentations { IRChangedPrinter PrintChangedIR; PseudoProbeVerifier PseudoProbeVerification; InLineChangePrinter PrintChangedDiff; + DotCfgChangeReporter WebsiteChangeReporter; VerifyInstrumentation Verify; bool VerifyEach; diff --git a/llvm/include/llvm/Support/AArch64TargetParser.def b/llvm/include/llvm/Support/AArch64TargetParser.def index a281109687403..e8b336bc3ece3 100644 --- a/llvm/include/llvm/Support/AArch64TargetParser.def +++ b/llvm/include/llvm/Support/AArch64TargetParser.def @@ -182,6 +182,10 @@ AARCH64_CPU_NAME("cortex-r82", ARMV8R, FK_CRYPTO_NEON_FP_ARMV8, false, AARCH64_CPU_NAME("cortex-x1", ARMV8_2A, FK_CRYPTO_NEON_FP_ARMV8, false, (AArch64::AEK_FP16 | AArch64::AEK_DOTPROD | AArch64::AEK_RCPC | AArch64::AEK_SSBS)) +AARCH64_CPU_NAME("cortex-x2", ARMV9A, FK_NEON_FP_ARMV8, false, + (AArch64::AEK_MTE | AArch64::AEK_BF16 | AArch64::AEK_I8MM | + AArch64::AEK_PAUTH | AArch64::AEK_SSBS | AArch64::AEK_SVE2BITPERM | + AArch64::AEK_SB | AArch64::AEK_FP16FML)) AARCH64_CPU_NAME("neoverse-e1", ARMV8_2A, FK_CRYPTO_NEON_FP_ARMV8, false, (AArch64::AEK_DOTPROD | AArch64::AEK_FP16 | AArch64::AEK_RAS | AArch64::AEK_RCPC | AArch64::AEK_SSBS)) diff --git a/llvm/include/llvm/Support/Caching.h b/llvm/include/llvm/Support/Caching.h index 819165212b0c7..1e5fea17f7088 100644 --- a/llvm/include/llvm/Support/Caching.h +++ b/llvm/include/llvm/Support/Caching.h @@ -1,4 +1,4 @@ -//===- Caching.h - LLVM File Cache Handling Configuration -------*- C++ -*-===// +//===- Caching.h - LLVM Local File Cache ------------------------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -6,8 +6,9 @@ // //===----------------------------------------------------------------------===// // -// This file defines the localCache function, which allows clients to add a -// filesystem cache. This is used by ThinLTO. +// This file defines the CachedFileStream and the localCache function, which +// simplifies caching files on the local filesystem in a directory whose +// contents are managed by a CachePruningPolicy. // //===----------------------------------------------------------------------===// @@ -15,48 +16,43 @@ #define LLVM_SUPPORT_CACHING_H #include "llvm/Support/Error.h" -#include "llvm/Support/MemoryBuffer.h" -#include "llvm/Support/thread.h" namespace llvm { -/// This class wraps an output stream for a native object. Most clients should -/// just be able to return an instance of this base class from the stream -/// callback, but if a client needs to perform some action after the stream is -/// written to, that can be done by deriving from this class and overriding the -/// destructor. -class NativeObjectStream { +class MemoryBuffer; + +/// This class wraps an output stream for a file. Most clients should just be +/// able to return an instance of this base class from the stream callback, but +/// if a client needs to perform some action after the stream is written to, +/// that can be done by deriving from this class and overriding the destructor. +class CachedFileStream { public: - NativeObjectStream(std::unique_ptr OS) - : OS(std::move(OS)) {} + CachedFileStream(std::unique_ptr OS) : OS(std::move(OS)) {} std::unique_ptr OS; - virtual ~NativeObjectStream() = default; + virtual ~CachedFileStream() = default; }; -/// This type defines the callback to add a native object that is generated on -/// the fly. +/// This type defines the callback to add a file that is generated on the fly. /// /// Stream callbacks must be thread safe. using AddStreamFn = - std::function(unsigned Task)>; + std::function>(unsigned Task)>; -/// This is the type of a native object cache. To request an item from the -/// cache, pass a unique string as the Key. For hits, the cached file will be -/// added to the link and this function will return AddStreamFn(). For misses, -/// the cache will return a stream callback which must be called at most once to -/// produce content for the stream. The native object stream produced by the -/// stream callback will add the file to the link after the stream is written -/// to. +/// This is the type of a file cache. To request an item from the cache, pass a +/// unique string as the Key. For hits, the cached file will be added to the +/// link and this function will return AddStreamFn(). For misses, the cache will +/// return a stream callback which must be called at most once to produce +/// content for the stream. The file stream produced by the stream callback will +/// add the file to the link after the stream is written to. /// /// Clients generally look like this: /// /// if (AddStreamFn AddStream = Cache(Task, Key)) /// ProduceContent(AddStream); -using NativeObjectCache = - std::function; +using FileCache = + std::function(unsigned Task, StringRef Key)>; -/// This type defines the callback to add a pre-existing native object file -/// (e.g. in a cache). +/// This type defines the callback to add a pre-existing file (e.g. in a cache). /// /// Buffer callbacks must be thread safe. using AddBufferFn = @@ -67,10 +63,9 @@ using AddBufferFn = /// the cache directory if it does not already exist. The cache name appears in /// error messages for errors during caching. The temporary file prefix is used /// in the temporary file naming scheme used when writing files atomically. -Expected localCache(Twine CacheNameRef, - Twine TempFilePrefixRef, - Twine CacheDirectoryPathRef, - AddBufferFn AddBuffer); +Expected localCache(Twine CacheNameRef, Twine TempFilePrefixRef, + Twine CacheDirectoryPathRef, + AddBufferFn AddBuffer); } // namespace llvm #endif diff --git a/llvm/include/llvm/Support/DOTGraphTraits.h b/llvm/include/llvm/Support/DOTGraphTraits.h index a73538fa14624..ffa9abe328c83 100644 --- a/llvm/include/llvm/Support/DOTGraphTraits.h +++ b/llvm/include/llvm/Support/DOTGraphTraits.h @@ -65,6 +65,11 @@ struct DefaultDOTGraphTraits { return false; } + // renderNodesUsingHTML - If the function returns true, nodes will be + // rendered using HTML-like labels which allows colors, etc in the nodes + // and the edge source labels. + static bool renderNodesUsingHTML() { return false; } + /// getNodeLabel - Given a node and a pointer to the top level graph, return /// the label to print in the node. template diff --git a/llvm/include/llvm/Support/GraphWriter.h b/llvm/include/llvm/Support/GraphWriter.h index b886bf45f474b..11a31bf401606 100644 --- a/llvm/include/llvm/Support/GraphWriter.h +++ b/llvm/include/llvm/Support/GraphWriter.h @@ -66,6 +66,7 @@ template class GraphWriter { raw_ostream &O; const GraphType &G; + bool RenderUsingHTML = false; using DOTTraits = DOTGraphTraits; using GTraits = GraphTraits; @@ -86,6 +87,9 @@ class GraphWriter { child_iterator EE = GTraits::child_end(Node); bool hasEdgeSourceLabels = false; + if (RenderUsingHTML) + O << "
"; + for (unsigned i = 0; EI != EE && i != 64; ++EI, ++i) { std::string label = DTraits.getEdgeSourceLabel(Node, EI); @@ -94,14 +98,22 @@ class GraphWriter { hasEdgeSourceLabels = true; - if (i) - O << "|"; + if (RenderUsingHTML) + O << ""; + else { + if (i) + O << "|"; - O << "" << DOT::EscapeString(label); + O << "" << DOT::EscapeString(label); + } } - if (EI != EE && hasEdgeSourceLabels) - O << "|truncated..."; + if (EI != EE && hasEdgeSourceLabels) { + if (RenderUsingHTML) + O << ""; + else + O << "|truncated..."; + } return hasEdgeSourceLabels; } @@ -109,6 +121,7 @@ class GraphWriter { public: GraphWriter(raw_ostream &o, const GraphType &g, bool SN) : O(o), G(g) { DTraits = DOTTraits(SN); + RenderUsingHTML = DTraits.renderNodesUsingHTML(); } void writeGraph(const std::string &Title = "") { @@ -163,12 +176,39 @@ class GraphWriter { void writeNode(NodeRef Node) { std::string NodeAttributes = DTraits.getNodeAttributes(Node, G); - O << "\tNode" << static_cast(Node) << " [shape=record,"; + O << "\tNode" << static_cast(Node) << " [shape="; + if (RenderUsingHTML) + O << "none,"; + else + O << "record,"; + if (!NodeAttributes.empty()) O << NodeAttributes << ","; - O << "label=\"{"; + O << "label="; + + if (RenderUsingHTML) { + // Count the numbewr of edges out of the node to determine how + // many columns to span (max 64) + unsigned ColSpan = 0; + child_iterator EI = GTraits::child_begin(Node); + child_iterator EE = GTraits::child_end(Node); + for (; EI != EE && ColSpan != 64; ++EI, ++ColSpan) + ; + if (ColSpan == 0) + ColSpan = 1; + // Include truncated messages when counting. + if (EI != EE) + ++ColSpan; + O << "<
Matching template template parameters to compatible argumentsP0522R0 (DR)Clang 4 (10)P0522R0Partial (10)
Removing deprecated dynamic exception specifications
" << label << "truncated...
"; + else + O << DOT::EscapeString(DTraits.getNodeLabel(Node, G)); // If we should include the address of the node in the label, do so now. std::string Id = DTraits.getNodeIdentifierLabel(Node, G); @@ -185,15 +225,25 @@ class GraphWriter { bool hasEdgeSourceLabels = getEdgeSourceLabels(EdgeSourceLabels, Node); if (hasEdgeSourceLabels) { - if (!DTraits.renderGraphFromBottomUp()) O << "|"; - - O << "{" << EdgeSourceLabels.str() << "}"; - - if (DTraits.renderGraphFromBottomUp()) O << "|"; + if (!DTraits.renderGraphFromBottomUp()) + if (!RenderUsingHTML) + O << "|"; + + if (RenderUsingHTML) + O << EdgeSourceLabels.str(); + else + O << "{" << EdgeSourceLabels.str() << "}"; + + if (DTraits.renderGraphFromBottomUp()) + if (!RenderUsingHTML) + O << "|"; } if (DTraits.renderGraphFromBottomUp()) { - O << DOT::EscapeString(DTraits.getNodeLabel(Node, G)); + if (RenderUsingHTML) + O << DTraits.getNodeLabel(Node, G); + else + O << DOT::EscapeString(DTraits.getNodeLabel(Node, G)); // If we should include the address of the node in the label, do so now. std::string Id = DTraits.getNodeIdentifierLabel(Node, G); @@ -215,12 +265,17 @@ class GraphWriter { << DOT::EscapeString(DTraits.getEdgeDestLabel(Node, i)); } - if (i != e) - O << "|truncated..."; - O << "}"; + if (RenderUsingHTML) + O << ""; + else if (i != e) + O << "|truncated...}"; } - O << "}\"];\n"; // Finish printing the "node" line + if (RenderUsingHTML) + O << "
"; + } else + O << "\"{"; if (!DTraits.renderGraphFromBottomUp()) { - O << DOT::EscapeString(DTraits.getNodeLabel(Node, G)); + if (RenderUsingHTML) + O << DTraits.getNodeLabel(Node, G) << "... truncated
>"; + else + O << "}\""; + O << "];\n"; // Finish printing the "node" line // Output all of the edges now child_iterator EI = GTraits::child_begin(Node); diff --git a/llvm/include/llvm/Support/MachineValueType.h b/llvm/include/llvm/Support/MachineValueType.h index 5c73cece85c3f..ce10a4c58dfe8 100644 --- a/llvm/include/llvm/Support/MachineValueType.h +++ b/llvm/include/llvm/Support/MachineValueType.h @@ -1405,51 +1405,61 @@ namespace llvm { /// SimpleValueType Iteration /// @{ static auto all_valuetypes() { - return seq_inclusive(MVT::FIRST_VALUETYPE, MVT::LAST_VALUETYPE); + return enum_seq_inclusive(MVT::FIRST_VALUETYPE, MVT::LAST_VALUETYPE, + force_iteration_on_noniterable_enum); } static auto integer_valuetypes() { - return seq_inclusive(MVT::FIRST_INTEGER_VALUETYPE, - MVT::LAST_INTEGER_VALUETYPE); + return enum_seq_inclusive(MVT::FIRST_INTEGER_VALUETYPE, + MVT::LAST_INTEGER_VALUETYPE, + force_iteration_on_noniterable_enum); } static auto fp_valuetypes() { - return seq_inclusive(MVT::FIRST_FP_VALUETYPE, MVT::LAST_FP_VALUETYPE); + return enum_seq_inclusive(MVT::FIRST_FP_VALUETYPE, MVT::LAST_FP_VALUETYPE, + force_iteration_on_noniterable_enum); } static auto vector_valuetypes() { - return seq_inclusive(MVT::FIRST_VECTOR_VALUETYPE, - MVT::LAST_VECTOR_VALUETYPE); + return enum_seq_inclusive(MVT::FIRST_VECTOR_VALUETYPE, + MVT::LAST_VECTOR_VALUETYPE, + force_iteration_on_noniterable_enum); } static auto fixedlen_vector_valuetypes() { - return seq_inclusive(MVT::FIRST_FIXEDLEN_VECTOR_VALUETYPE, - MVT::LAST_FIXEDLEN_VECTOR_VALUETYPE); + return enum_seq_inclusive(MVT::FIRST_FIXEDLEN_VECTOR_VALUETYPE, + MVT::LAST_FIXEDLEN_VECTOR_VALUETYPE, + force_iteration_on_noniterable_enum); } static auto scalable_vector_valuetypes() { - return seq_inclusive(MVT::FIRST_SCALABLE_VECTOR_VALUETYPE, - MVT::LAST_SCALABLE_VECTOR_VALUETYPE); + return enum_seq_inclusive(MVT::FIRST_SCALABLE_VECTOR_VALUETYPE, + MVT::LAST_SCALABLE_VECTOR_VALUETYPE, + force_iteration_on_noniterable_enum); } static auto integer_fixedlen_vector_valuetypes() { - return seq_inclusive(MVT::FIRST_INTEGER_FIXEDLEN_VECTOR_VALUETYPE, - MVT::LAST_INTEGER_FIXEDLEN_VECTOR_VALUETYPE); + return enum_seq_inclusive(MVT::FIRST_INTEGER_FIXEDLEN_VECTOR_VALUETYPE, + MVT::LAST_INTEGER_FIXEDLEN_VECTOR_VALUETYPE, + force_iteration_on_noniterable_enum); } static auto fp_fixedlen_vector_valuetypes() { - return seq_inclusive(MVT::FIRST_FP_FIXEDLEN_VECTOR_VALUETYPE, - MVT::LAST_FP_FIXEDLEN_VECTOR_VALUETYPE); + return enum_seq_inclusive(MVT::FIRST_FP_FIXEDLEN_VECTOR_VALUETYPE, + MVT::LAST_FP_FIXEDLEN_VECTOR_VALUETYPE, + force_iteration_on_noniterable_enum); } static auto integer_scalable_vector_valuetypes() { - return seq_inclusive(MVT::FIRST_INTEGER_SCALABLE_VECTOR_VALUETYPE, - MVT::LAST_INTEGER_SCALABLE_VECTOR_VALUETYPE); + return enum_seq_inclusive(MVT::FIRST_INTEGER_SCALABLE_VECTOR_VALUETYPE, + MVT::LAST_INTEGER_SCALABLE_VECTOR_VALUETYPE, + force_iteration_on_noniterable_enum); } static auto fp_scalable_vector_valuetypes() { - return seq_inclusive(MVT::FIRST_FP_SCALABLE_VECTOR_VALUETYPE, - MVT::LAST_FP_SCALABLE_VECTOR_VALUETYPE); + return enum_seq_inclusive(MVT::FIRST_FP_SCALABLE_VECTOR_VALUETYPE, + MVT::LAST_FP_SCALABLE_VECTOR_VALUETYPE, + force_iteration_on_noniterable_enum); } /// @} }; diff --git a/llvm/include/llvm/Support/Path.h b/llvm/include/llvm/Support/Path.h index eb063d0719961..da5095714f487 100644 --- a/llvm/include/llvm/Support/Path.h +++ b/llvm/include/llvm/Support/Path.h @@ -25,7 +25,13 @@ namespace llvm { namespace sys { namespace path { -enum class Style { windows, posix, native }; +enum class Style { + native, + posix, + windows_slash, + windows_backslash, + windows = windows_backslash, // deprecated +}; /// Check if \p S uses POSIX path rules. constexpr bool is_style_posix(Style S) { @@ -257,6 +263,17 @@ void native(const Twine &path, SmallVectorImpl &result, /// @param path A path that is transformed to native format. void native(SmallVectorImpl &path, Style style = Style::native); +/// For Windows path styles, convert path to use the preferred path separators. +/// For other styles, do nothing. +/// +/// @param path A path that is transformed to preferred format. +inline void make_preferred(SmallVectorImpl &path, + Style style = Style::native) { + if (!is_style_windows(style)) + return; + native(path, style); +} + /// Replaces backslashes with slashes if Windows. /// /// @param path processed path diff --git a/llvm/include/llvm/Support/TypeSize.h b/llvm/include/llvm/Support/TypeSize.h index 30bbbd7db8c97..b9a8327bcc04d 100644 --- a/llvm/include/llvm/Support/TypeSize.h +++ b/llvm/include/llvm/Support/TypeSize.h @@ -229,7 +229,6 @@ class UnivariateLinearPolyBase { bool isZero() const { return !Value; } bool isNonZero() const { return !isZero(); } explicit operator bool() const { return isNonZero(); } - ScalarTy getValue() const { return Value; } ScalarTy getValue(unsigned Dim) const { return Dim == UnivariateDim ? Value : 0; } @@ -294,7 +293,7 @@ class LinearPolySize : public UnivariateLinearPolyBase { static LeafTy getNull() { return get(0, false); } /// Returns the minimum value this size can represent. - ScalarTy getKnownMinValue() const { return this->getValue(); } + ScalarTy getKnownMinValue() const { return this->Value; } /// Returns whether the size is scaled by a runtime quantity (vscale). bool isScalable() const { return this->UnivariateDim == ScalableDim; } /// A return value of true indicates we know at compile time that the number diff --git a/llvm/include/llvm/TableGen/Record.h b/llvm/include/llvm/TableGen/Record.h index 713d9375448cc..2cf6ef13092eb 100644 --- a/llvm/include/llvm/TableGen/Record.h +++ b/llvm/include/llvm/TableGen/Record.h @@ -1414,6 +1414,7 @@ class RecordVal { SMLoc Loc; // Source location of definition of name. PointerIntPair TyAndKind; Init *Value; + bool IsUsed = false; public: RecordVal(Init *N, RecTy *T, FieldKind K); @@ -1458,6 +1459,11 @@ class RecordVal { /// Set the value and source location of the field. bool setValue(Init *V, SMLoc NewLoc); + /// Whether this value is used. Useful for reporting warnings, for example + /// when a template argument is unused. + void setUsed(bool Used) { IsUsed = Used; } + bool isUsed() const { return IsUsed; } + void dump() const; /// Print the value to an output stream, possibly with a semicolon. @@ -1632,6 +1638,7 @@ class Record { } void checkRecordAssertions(); + void checkUnusedTemplateArgs(); bool isSubClassOf(const Record *R) const { for (const auto &SCPair : SuperClasses) diff --git a/llvm/include/llvm/Transforms/InstCombine/InstCombiner.h b/llvm/include/llvm/Transforms/InstCombine/InstCombiner.h index 42735b7308862..c6aee439b5a05 100644 --- a/llvm/include/llvm/Transforms/InstCombine/InstCombiner.h +++ b/llvm/include/llvm/Transforms/InstCombine/InstCombiner.h @@ -480,6 +480,11 @@ class LLVM_LIBRARY_VISIBILITY InstCombiner { return llvm::ComputeNumSignBits(Op, DL, Depth, &AC, CxtI, &DT); } + unsigned ComputeMinSignedBits(const Value *Op, unsigned Depth = 0, + const Instruction *CxtI = nullptr) const { + return llvm::ComputeMinSignedBits(Op, DL, Depth, &AC, CxtI, &DT); + } + OverflowResult computeOverflowForUnsignedMul(const Value *LHS, const Value *RHS, const Instruction *CxtI) const { diff --git a/llvm/include/llvm/Transforms/Instrumentation/AddressSanitizer.h b/llvm/include/llvm/Transforms/Instrumentation/AddressSanitizer.h index d4eda186a05b3..c13407a440913 100644 --- a/llvm/include/llvm/Transforms/Instrumentation/AddressSanitizer.h +++ b/llvm/include/llvm/Transforms/Instrumentation/AddressSanitizer.h @@ -90,17 +90,11 @@ class ASanGlobalsMetadataAnalysis }; struct AddressSanitizerOptions { - AddressSanitizerOptions() - : AddressSanitizerOptions(false, false, false, - AsanDetectStackUseAfterReturnMode::Runtime){}; - AddressSanitizerOptions(bool CompileKernel, bool Recover, bool UseAfterScope, - AsanDetectStackUseAfterReturnMode UseAfterReturn) - : CompileKernel(CompileKernel), Recover(Recover), - UseAfterScope(UseAfterScope), UseAfterReturn(UseAfterReturn){}; - bool CompileKernel; - bool Recover; - bool UseAfterScope; - AsanDetectStackUseAfterReturnMode UseAfterReturn; + bool CompileKernel = false; + bool Recover = false; + bool UseAfterScope = false; + AsanDetectStackUseAfterReturnMode UseAfterReturn = + AsanDetectStackUseAfterReturnMode::Runtime; }; /// Public interface to the address sanitizer pass for instrumenting code to @@ -112,7 +106,7 @@ struct AddressSanitizerOptions { /// surrounding requested memory to be checked for invalid accesses. class AddressSanitizerPass : public PassInfoMixin { public: - explicit AddressSanitizerPass(AddressSanitizerOptions Options) + AddressSanitizerPass(const AddressSanitizerOptions &Options) : Options(Options){}; PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); void printPipeline(raw_ostream &OS, @@ -131,8 +125,8 @@ class AddressSanitizerPass : public PassInfoMixin { class ModuleAddressSanitizerPass : public PassInfoMixin { public: - explicit ModuleAddressSanitizerPass( - bool CompileKernel = false, bool Recover = false, bool UseGlobalGC = true, + ModuleAddressSanitizerPass( + const AddressSanitizerOptions &Options, bool UseGlobalGC = true, bool UseOdrIndicator = false, AsanDtorKind DestructorKind = AsanDtorKind::Global); PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); @@ -141,8 +135,7 @@ class ModuleAddressSanitizerPass static bool isRequired() { return true; } private: - bool CompileKernel; - bool Recover; + AddressSanitizerOptions Options; bool UseGlobalGC; bool UseOdrIndicator; AsanDtorKind DestructorKind; diff --git a/llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h b/llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h index 4bea4c237771e..8970afb3aeaa1 100644 --- a/llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h +++ b/llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h @@ -129,6 +129,13 @@ void ReplaceInstWithInst(BasicBlock::InstListType &BIL, /// To. Copies DebugLoc from BI to I, if I doesn't already have a DebugLoc. void ReplaceInstWithInst(Instruction *From, Instruction *To); +/// Check if we can prove that all paths starting from this block converge +/// to a block that either has a @llvm.experimental.deoptimize call +/// prior to its terminating return instruction or is terminated by unreachable. +/// All blocks in the traversed sequence must have an unique successor, maybe +/// except for the last one. +bool IsBlockFollowedByDeoptOrUnreachable(const BasicBlock *BB); + /// Option class for critical edge splitting. /// /// This provides a builder interface for overriding the default options used diff --git a/llvm/include/llvm/Transforms/Utils/Local.h b/llvm/include/llvm/Transforms/Utils/Local.h index 3c529abce85a2..72cb606eb51a2 100644 --- a/llvm/include/llvm/Transforms/Utils/Local.h +++ b/llvm/include/llvm/Transforms/Utils/Local.h @@ -55,6 +55,7 @@ class MDNode; class MemorySSAUpdater; class PHINode; class StoreInst; +class SwitchInst; class TargetLibraryInfo; class TargetTransformInfo; @@ -237,6 +238,10 @@ CallInst *createCallMatchingInvoke(InvokeInst *II); /// This function converts the specified invoek into a normall call. void changeToCall(InvokeInst *II, DomTreeUpdater *DTU = nullptr); +/// This function removes the default destination from the specified switch. +void createUnreachableSwitchDefault(SwitchInst *Switch, + DomTreeUpdater *DTU = nullptr); + ///===---------------------------------------------------------------------===// /// Dbg Intrinsic utilities /// diff --git a/llvm/lib/Analysis/BasicAliasAnalysis.cpp b/llvm/lib/Analysis/BasicAliasAnalysis.cpp index 7895b57dfae69..88b0f37b1d48a 100644 --- a/llvm/lib/Analysis/BasicAliasAnalysis.cpp +++ b/llvm/lib/Analysis/BasicAliasAnalysis.cpp @@ -360,8 +360,10 @@ struct LinearExpression { } LinearExpression mul(const APInt &Other, bool MulIsNSW) const { - return LinearExpression(Val, Scale * Other, Offset * Other, - IsNSW && (Other.isOne() || MulIsNSW)); + // The check for zero offset is necessary, because generally + // (X +nsw Y) *nsw Z does not imply (X *nsw Z) +nsw (Y *nsw Z). + bool NSW = IsNSW && (Other.isOne() || (MulIsNSW && Offset.isZero())); + return LinearExpression(Val, Scale * Other, Offset * Other, NSW); } }; } @@ -463,14 +465,14 @@ static LinearExpression GetLinearExpression( return Val; } -/// To ensure a pointer offset fits in an integer of size PointerSize -/// (in bits) when that size is smaller than the maximum pointer size. This is +/// To ensure a pointer offset fits in an integer of size IndexSize +/// (in bits) when that size is smaller than the maximum index size. This is /// an issue, for example, in particular for 32b pointers with negative indices /// that rely on two's complement wrap-arounds for precise alias information -/// where the maximum pointer size is 64b. -static APInt adjustToPointerSize(const APInt &Offset, unsigned PointerSize) { - assert(PointerSize <= Offset.getBitWidth() && "Invalid PointerSize!"); - unsigned ShiftBits = Offset.getBitWidth() - PointerSize; +/// where the maximum index size is 64b. +static APInt adjustToIndexSize(const APInt &Offset, unsigned IndexSize) { + assert(IndexSize <= Offset.getBitWidth() && "Invalid IndexSize!"); + unsigned ShiftBits = Offset.getBitWidth() - IndexSize; return (Offset << ShiftBits).ashr(ShiftBits); } @@ -547,9 +549,9 @@ BasicAAResult::DecomposeGEPExpression(const Value *V, const DataLayout &DL, SearchTimes++; const Instruction *CxtI = dyn_cast(V); - unsigned MaxPointerSize = DL.getMaxPointerSizeInBits(); + unsigned MaxIndexSize = DL.getMaxIndexSizeInBits(); DecomposedGEP Decomposed; - Decomposed.Offset = APInt(MaxPointerSize, 0); + Decomposed.Offset = APInt(MaxIndexSize, 0); do { // See if this is a bitcast or GEP. const Operator *Op = dyn_cast(V); @@ -618,7 +620,7 @@ BasicAAResult::DecomposeGEPExpression(const Value *V, const DataLayout &DL, unsigned AS = GEPOp->getPointerAddressSpace(); // Walk the indices of the GEP, accumulating them into BaseOff/VarIndices. gep_type_iterator GTI = gep_type_begin(GEPOp); - unsigned PointerSize = DL.getPointerSizeInBits(AS); + unsigned IndexSize = DL.getIndexSizeInBits(AS); // Assume all GEP operands are constants until proven otherwise. bool GepHasConstantOffset = true; for (User::const_op_iterator I = GEPOp->op_begin() + 1, E = GEPOp->op_end(); @@ -641,26 +643,26 @@ BasicAAResult::DecomposeGEPExpression(const Value *V, const DataLayout &DL, continue; Decomposed.Offset += DL.getTypeAllocSize(GTI.getIndexedType()).getFixedSize() * - CIdx->getValue().sextOrTrunc(MaxPointerSize); + CIdx->getValue().sextOrTrunc(MaxIndexSize); continue; } GepHasConstantOffset = false; - // If the integer type is smaller than the pointer size, it is implicitly - // sign extended to pointer size. + // If the integer type is smaller than the index size, it is implicitly + // sign extended or truncated to index size. unsigned Width = Index->getType()->getIntegerBitWidth(); - unsigned SExtBits = PointerSize > Width ? PointerSize - Width : 0; - unsigned TruncBits = PointerSize < Width ? Width - PointerSize : 0; + unsigned SExtBits = IndexSize > Width ? IndexSize - Width : 0; + unsigned TruncBits = IndexSize < Width ? Width - IndexSize : 0; LinearExpression LE = GetLinearExpression( CastedValue(Index, 0, SExtBits, TruncBits), DL, 0, AC, DT); // Scale by the type size. unsigned TypeSize = DL.getTypeAllocSize(GTI.getIndexedType()).getFixedSize(); - LE = LE.mul(APInt(PointerSize, TypeSize), GEPOp->isInBounds()); - Decomposed.Offset += LE.Offset.sextOrSelf(MaxPointerSize); - APInt Scale = LE.Scale.sextOrSelf(MaxPointerSize); + LE = LE.mul(APInt(IndexSize, TypeSize), GEPOp->isInBounds()); + Decomposed.Offset += LE.Offset.sextOrSelf(MaxIndexSize); + APInt Scale = LE.Scale.sextOrSelf(MaxIndexSize); // If we already had an occurrence of this index variable, merge this // scale into it. For example, we want to handle: @@ -676,8 +678,8 @@ BasicAAResult::DecomposeGEPExpression(const Value *V, const DataLayout &DL, } // Make sure that we have a scale that makes sense for this target's - // pointer size. - Scale = adjustToPointerSize(Scale, PointerSize); + // index size. + Scale = adjustToIndexSize(Scale, IndexSize); if (!!Scale) { VariableGEPIndex Entry = {LE.Val, Scale, CxtI, LE.IsNSW}; @@ -687,7 +689,7 @@ BasicAAResult::DecomposeGEPExpression(const Value *V, const DataLayout &DL, // Take care of wrap-arounds if (GepHasConstantOffset) - Decomposed.Offset = adjustToPointerSize(Decomposed.Offset, PointerSize); + Decomposed.Offset = adjustToIndexSize(Decomposed.Offset, IndexSize); // Analyze the base pointer next. V = GEPOp->getOperand(0); @@ -1184,7 +1186,7 @@ AliasResult BasicAAResult::aliasGEP( // is less than the size of the associated memory object, then we know // that the objects are partially overlapping. If the difference is // greater, we know they do not overlap. - if (DecompGEP1.Offset != 0 && DecompGEP1.VarIndices.empty()) { + if (DecompGEP1.VarIndices.empty()) { APInt &Off = DecompGEP1.Offset; // Initialize for Off >= 0 (V2 <= GEP1) case. @@ -1206,124 +1208,124 @@ AliasResult BasicAAResult::aliasGEP( Off = -Off; } - if (VLeftSize.hasValue()) { - const uint64_t LSize = VLeftSize.getValue(); - if (Off.ult(LSize)) { - // Conservatively drop processing if a phi was visited and/or offset is - // too big. - AliasResult AR = AliasResult::PartialAlias; - if (VRightSize.hasValue() && Off.ule(INT32_MAX) && - (Off + VRightSize.getValue()).ule(LSize)) { - // Memory referenced by right pointer is nested. Save the offset in - // cache. Note that originally offset estimated as GEP1-V2, but - // AliasResult contains the shift that represents GEP1+Offset=V2. - AR.setOffset(-Off.getSExtValue()); - AR.swap(Swapped); - } - return AR; + if (!VLeftSize.hasValue()) + return AliasResult::MayAlias; + + const uint64_t LSize = VLeftSize.getValue(); + if (Off.ult(LSize)) { + // Conservatively drop processing if a phi was visited and/or offset is + // too big. + AliasResult AR = AliasResult::PartialAlias; + if (VRightSize.hasValue() && Off.ule(INT32_MAX) && + (Off + VRightSize.getValue()).ule(LSize)) { + // Memory referenced by right pointer is nested. Save the offset in + // cache. Note that originally offset estimated as GEP1-V2, but + // AliasResult contains the shift that represents GEP1+Offset=V2. + AR.setOffset(-Off.getSExtValue()); + AR.swap(Swapped); } - return AliasResult::NoAlias; + return AR; } + return AliasResult::NoAlias; } - if (!DecompGEP1.VarIndices.empty()) { - APInt GCD; - ConstantRange OffsetRange = ConstantRange(DecompGEP1.Offset); - for (unsigned i = 0, e = DecompGEP1.VarIndices.size(); i != e; ++i) { - const VariableGEPIndex &Index = DecompGEP1.VarIndices[i]; - const APInt &Scale = Index.Scale; - APInt ScaleForGCD = Scale; - if (!Index.IsNSW) - ScaleForGCD = APInt::getOneBitSet(Scale.getBitWidth(), - Scale.countTrailingZeros()); - - if (i == 0) - GCD = ScaleForGCD.abs(); - else - GCD = APIntOps::GreatestCommonDivisor(GCD, ScaleForGCD.abs()); - - ConstantRange CR = - computeConstantRange(Index.Val.V, true, &AC, Index.CxtI); - KnownBits Known = - computeKnownBits(Index.Val.V, DL, 0, &AC, Index.CxtI, DT); - CR = CR.intersectWith( - ConstantRange::fromKnownBits(Known, /* Signed */ true), - ConstantRange::Signed); - - assert(OffsetRange.getBitWidth() == Scale.getBitWidth() && - "Bit widths are normalized to MaxPointerSize"); - OffsetRange = OffsetRange.add( - Index.Val.evaluateWith(CR).sextOrTrunc(OffsetRange.getBitWidth()) - .smul_fast(ConstantRange(Scale))); - } + // We need to know both acess sizes for all the following heuristics. + if (!V1Size.hasValue() || !V2Size.hasValue()) + return AliasResult::MayAlias; - // We now have accesses at two offsets from the same base: - // 1. (...)*GCD + DecompGEP1.Offset with size V1Size - // 2. 0 with size V2Size - // Using arithmetic modulo GCD, the accesses are at - // [ModOffset..ModOffset+V1Size) and [0..V2Size). If the first access fits - // into the range [V2Size..GCD), then we know they cannot overlap. - APInt ModOffset = DecompGEP1.Offset.srem(GCD); - if (ModOffset.isNegative()) - ModOffset += GCD; // We want mod, not rem. - if (V1Size.hasValue() && V2Size.hasValue() && - ModOffset.uge(V2Size.getValue()) && - (GCD - ModOffset).uge(V1Size.getValue())) - return AliasResult::NoAlias; + APInt GCD; + ConstantRange OffsetRange = ConstantRange(DecompGEP1.Offset); + for (unsigned i = 0, e = DecompGEP1.VarIndices.size(); i != e; ++i) { + const VariableGEPIndex &Index = DecompGEP1.VarIndices[i]; + const APInt &Scale = Index.Scale; + APInt ScaleForGCD = Scale; + if (!Index.IsNSW) + ScaleForGCD = APInt::getOneBitSet(Scale.getBitWidth(), + Scale.countTrailingZeros()); + + if (i == 0) + GCD = ScaleForGCD.abs(); + else + GCD = APIntOps::GreatestCommonDivisor(GCD, ScaleForGCD.abs()); + + ConstantRange CR = + computeConstantRange(Index.Val.V, true, &AC, Index.CxtI); + KnownBits Known = + computeKnownBits(Index.Val.V, DL, 0, &AC, Index.CxtI, DT); + CR = CR.intersectWith( + ConstantRange::fromKnownBits(Known, /* Signed */ true), + ConstantRange::Signed); + CR = Index.Val.evaluateWith(CR).sextOrTrunc(OffsetRange.getBitWidth()); + + assert(OffsetRange.getBitWidth() == Scale.getBitWidth() && + "Bit widths are normalized to MaxIndexSize"); + if (Index.IsNSW) + OffsetRange = OffsetRange.add(CR.smul_sat(ConstantRange(Scale))); + else + OffsetRange = OffsetRange.add(CR.smul_fast(ConstantRange(Scale))); + } - if (V1Size.hasValue() && V2Size.hasValue()) { - // Compute ranges of potentially accessed bytes for both accesses. If the - // interseciton is empty, there can be no overlap. - unsigned BW = OffsetRange.getBitWidth(); - ConstantRange Range1 = OffsetRange.add( - ConstantRange(APInt(BW, 0), APInt(BW, V1Size.getValue()))); - ConstantRange Range2 = - ConstantRange(APInt(BW, 0), APInt(BW, V2Size.getValue())); - if (Range1.intersectWith(Range2).isEmptySet()) - return AliasResult::NoAlias; - } + // We now have accesses at two offsets from the same base: + // 1. (...)*GCD + DecompGEP1.Offset with size V1Size + // 2. 0 with size V2Size + // Using arithmetic modulo GCD, the accesses are at + // [ModOffset..ModOffset+V1Size) and [0..V2Size). If the first access fits + // into the range [V2Size..GCD), then we know they cannot overlap. + APInt ModOffset = DecompGEP1.Offset.srem(GCD); + if (ModOffset.isNegative()) + ModOffset += GCD; // We want mod, not rem. + if (ModOffset.uge(V2Size.getValue()) && + (GCD - ModOffset).uge(V1Size.getValue())) + return AliasResult::NoAlias; - if (V1Size.hasValue() && V2Size.hasValue()) { - // Try to determine the range of values for VarIndex such that - // VarIndex <= -MinAbsVarIndex || MinAbsVarIndex <= VarIndex. - Optional MinAbsVarIndex; - if (DecompGEP1.VarIndices.size() == 1) { - // VarIndex = Scale*V. - const VariableGEPIndex &Var = DecompGEP1.VarIndices[0]; - if (Var.Val.TruncBits == 0 && - isKnownNonZero(Var.Val.V, DL, 0, &AC, Var.CxtI, DT)) { - // If V != 0 then abs(VarIndex) >= abs(Scale). - MinAbsVarIndex = Var.Scale.abs(); - } - } else if (DecompGEP1.VarIndices.size() == 2) { - // VarIndex = Scale*V0 + (-Scale)*V1. - // If V0 != V1 then abs(VarIndex) >= abs(Scale). - // Check that VisitedPhiBBs is empty, to avoid reasoning about - // inequality of values across loop iterations. - const VariableGEPIndex &Var0 = DecompGEP1.VarIndices[0]; - const VariableGEPIndex &Var1 = DecompGEP1.VarIndices[1]; - if (Var0.Scale == -Var1.Scale && Var0.Val.TruncBits == 0 && - Var0.Val.hasSameCastsAs(Var1.Val) && VisitedPhiBBs.empty() && - isKnownNonEqual(Var0.Val.V, Var1.Val.V, DL, &AC, /* CxtI */ nullptr, - DT)) - MinAbsVarIndex = Var0.Scale.abs(); - } + // Compute ranges of potentially accessed bytes for both accesses. If the + // interseciton is empty, there can be no overlap. + unsigned BW = OffsetRange.getBitWidth(); + ConstantRange Range1 = OffsetRange.add( + ConstantRange(APInt(BW, 0), APInt(BW, V1Size.getValue()))); + ConstantRange Range2 = + ConstantRange(APInt(BW, 0), APInt(BW, V2Size.getValue())); + if (Range1.intersectWith(Range2).isEmptySet()) + return AliasResult::NoAlias; - if (MinAbsVarIndex) { - // The constant offset will have added at least +/-MinAbsVarIndex to it. - APInt OffsetLo = DecompGEP1.Offset - *MinAbsVarIndex; - APInt OffsetHi = DecompGEP1.Offset + *MinAbsVarIndex; - // We know that Offset <= OffsetLo || Offset >= OffsetHi - if (OffsetLo.isNegative() && (-OffsetLo).uge(V1Size.getValue()) && - OffsetHi.isNonNegative() && OffsetHi.uge(V2Size.getValue())) - return AliasResult::NoAlias; - } + // Try to determine the range of values for VarIndex such that + // VarIndex <= -MinAbsVarIndex || MinAbsVarIndex <= VarIndex. + Optional MinAbsVarIndex; + if (DecompGEP1.VarIndices.size() == 1) { + // VarIndex = Scale*V. + const VariableGEPIndex &Var = DecompGEP1.VarIndices[0]; + if (Var.Val.TruncBits == 0 && + isKnownNonZero(Var.Val.V, DL, 0, &AC, Var.CxtI, DT)) { + // If V != 0 then abs(VarIndex) >= abs(Scale). + MinAbsVarIndex = Var.Scale.abs(); } + } else if (DecompGEP1.VarIndices.size() == 2) { + // VarIndex = Scale*V0 + (-Scale)*V1. + // If V0 != V1 then abs(VarIndex) >= abs(Scale). + // Check that VisitedPhiBBs is empty, to avoid reasoning about + // inequality of values across loop iterations. + const VariableGEPIndex &Var0 = DecompGEP1.VarIndices[0]; + const VariableGEPIndex &Var1 = DecompGEP1.VarIndices[1]; + if (Var0.Scale == -Var1.Scale && Var0.Val.TruncBits == 0 && + Var0.Val.hasSameCastsAs(Var1.Val) && VisitedPhiBBs.empty() && + isKnownNonEqual(Var0.Val.V, Var1.Val.V, DL, &AC, /* CxtI */ nullptr, + DT)) + MinAbsVarIndex = Var0.Scale.abs(); + } - if (constantOffsetHeuristic(DecompGEP1, V1Size, V2Size, &AC, DT)) + if (MinAbsVarIndex) { + // The constant offset will have added at least +/-MinAbsVarIndex to it. + APInt OffsetLo = DecompGEP1.Offset - *MinAbsVarIndex; + APInt OffsetHi = DecompGEP1.Offset + *MinAbsVarIndex; + // We know that Offset <= OffsetLo || Offset >= OffsetHi + if (OffsetLo.isNegative() && (-OffsetLo).uge(V1Size.getValue()) && + OffsetHi.isNonNegative() && OffsetHi.uge(V2Size.getValue())) return AliasResult::NoAlias; } + if (constantOffsetHeuristic(DecompGEP1, V1Size, V2Size, &AC, DT)) + return AliasResult::NoAlias; + // Statically, we can see that the base objects are the same, but the // pointers have dynamic offsets which we can't resolve. And none of our // little tricks above worked. diff --git a/llvm/lib/Analysis/CGSCCPassManager.cpp b/llvm/lib/Analysis/CGSCCPassManager.cpp index c2ba3d662bf37..ae60b9c2e1e44 100644 --- a/llvm/lib/Analysis/CGSCCPassManager.cpp +++ b/llvm/lib/Analysis/CGSCCPassManager.cpp @@ -38,8 +38,6 @@ using namespace llvm; // Explicit template instantiations and specialization definitions for core // template typedefs. namespace llvm { -extern cl::opt EagerlyInvalidateAnalyses; - static cl::opt AbortOnMaxDevirtIterationsReached( "abort-on-max-devirt-iterations-reached", cl::desc("Abort when the max iterations for devirtualization CGSCC repeat " @@ -557,8 +555,7 @@ PreservedAnalyses CGSCCToFunctionPassAdaptor::run(LazyCallGraph::SCC &C, // We know that the function pass couldn't have invalidated any other // function's analyses (that's the contract of a function pass), so // directly handle the function analysis manager's invalidation here. - FAM.invalidate(F, EagerlyInvalidateAnalyses ? PreservedAnalyses::none() - : PassPA); + FAM.invalidate(F, EagerlyInvalidate ? PreservedAnalyses::none() : PassPA); // Then intersect the preserved set so that invalidation of module // analyses will eventually occur when the module pass completes. diff --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp index 6a27dd7a74dde..d40658398f1b0 100644 --- a/llvm/lib/Analysis/InstructionSimplify.cpp +++ b/llvm/lib/Analysis/InstructionSimplify.cpp @@ -3665,30 +3665,6 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, CRHS->getPointerOperand(), Q)) return C; - if (GetElementPtrInst *GLHS = dyn_cast(LHS)) { - if (GEPOperator *GRHS = dyn_cast(RHS)) { - if (GLHS->getPointerOperand() == GRHS->getPointerOperand() && - GLHS->hasAllConstantIndices() && GRHS->hasAllConstantIndices() && - (ICmpInst::isEquality(Pred) || - (GLHS->isInBounds() && GRHS->isInBounds() && - Pred == ICmpInst::getSignedPredicate(Pred)))) { - // The bases are equal and the indices are constant. Build a constant - // expression GEP with the same indices and a null base pointer to see - // what constant folding can make out of it. - Constant *Null = Constant::getNullValue(GLHS->getPointerOperandType()); - SmallVector IndicesLHS(GLHS->indices()); - Constant *NewLHS = ConstantExpr::getGetElementPtr( - GLHS->getSourceElementType(), Null, IndicesLHS); - - SmallVector IndicesRHS(GRHS->indices()); - Constant *NewRHS = ConstantExpr::getGetElementPtr( - GLHS->getSourceElementType(), Null, IndicesRHS); - Constant *NewICmp = ConstantExpr::getICmp(Pred, NewLHS, NewRHS); - return ConstantFoldConstant(NewICmp, Q.DL); - } - } - } - // If the comparison is with the result of a select instruction, check whether // comparing with either branch of the select always yields the same value. if (isa(LHS) || isa(RHS)) diff --git a/llvm/lib/Analysis/LazyCallGraph.cpp b/llvm/lib/Analysis/LazyCallGraph.cpp index 672a2c6eba582..0007c54b16d07 100644 --- a/llvm/lib/Analysis/LazyCallGraph.cpp +++ b/llvm/lib/Analysis/LazyCallGraph.cpp @@ -1973,28 +1973,10 @@ void LazyCallGraph::visitReferences(SmallVectorImpl &Worklist, continue; } - // The blockaddress constant expression is a weird special case, we can't - // generically walk its operands the way we do for all other constants. - if (BlockAddress *BA = dyn_cast(C)) { - // If we've already visited the function referred to by the block - // address, we don't need to revisit it. - if (Visited.count(BA->getFunction())) - continue; - - // If all of the blockaddress' users are instructions within the - // referred to function, we don't need to insert a cycle. - if (llvm::all_of(BA->users(), [&](User *U) { - if (Instruction *I = dyn_cast(U)) - return I->getFunction() == BA->getFunction(); - return false; - })) - continue; - - // Otherwise we should go visit the referred to function. - Visited.insert(BA->getFunction()); - Worklist.push_back(BA->getFunction()); + // blockaddresses are weird and don't participate in the call graph anyway, + // skip them. + if (isa(C)) continue; - } for (Value *Op : C->operand_values()) if (Visited.insert(cast(Op)).second) diff --git a/llvm/lib/Analysis/Lint.cpp b/llvm/lib/Analysis/Lint.cpp index cf055b182786b..f9a7a5bdf434c 100644 --- a/llvm/lib/Analysis/Lint.cpp +++ b/llvm/lib/Analysis/Lint.cpp @@ -715,6 +715,7 @@ PreservedAnalyses LintPass::run(Function &F, FunctionAnalysisManager &AM) { return PreservedAnalyses::all(); } +namespace { class LintLegacyPass : public FunctionPass { public: static char ID; // Pass identification, replacement for typeid @@ -733,6 +734,7 @@ class LintLegacyPass : public FunctionPass { } void print(raw_ostream &O, const Module *M) const override {} }; +} // namespace char LintLegacyPass::ID = 0; INITIALIZE_PASS_BEGIN(LintLegacyPass, "lint", "Statically lint-checks LLVM IR", diff --git a/llvm/lib/Analysis/MemorySSAUpdater.cpp b/llvm/lib/Analysis/MemorySSAUpdater.cpp index d7ebe0e884e03..8de0adc0fd289 100644 --- a/llvm/lib/Analysis/MemorySSAUpdater.cpp +++ b/llvm/lib/Analysis/MemorySSAUpdater.cpp @@ -1135,11 +1135,7 @@ void MemorySSAUpdater::applyInsertUpdates(ArrayRef Updates, if (auto DefsList = MSSA->getWritableBlockDefs(BlockWithDefsToReplace)) { for (auto &DefToReplaceUses : *DefsList) { BasicBlock *DominatingBlock = DefToReplaceUses.getBlock(); - Value::use_iterator UI = DefToReplaceUses.use_begin(), - E = DefToReplaceUses.use_end(); - for (; UI != E;) { - Use &U = *UI; - ++UI; + for (Use &U : llvm::make_early_inc_range(DefToReplaceUses.uses())) { MemoryAccess *Usr = cast(U.getUser()); if (MemoryPhi *UsrPhi = dyn_cast(Usr)) { BasicBlock *DominatedBlock = UsrPhi->getIncomingBlock(U); diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp index 36386764e88e9..5d6b0e4799e89 100644 --- a/llvm/lib/Analysis/ScalarEvolution.cpp +++ b/llvm/lib/Analysis/ScalarEvolution.cpp @@ -1098,7 +1098,6 @@ const SCEV *ScalarEvolution::getLosslessPtrToIntExpr(const SCEV *Op, SCEV *S = new (SCEVAllocator) SCEVPtrToIntExpr(ID.Intern(SCEVAllocator), Op, IntPtrTy); UniqueSCEVs.InsertNode(S, IP); - addToLoopUseLists(S); registerUser(S, Op); return S; } @@ -1219,7 +1218,6 @@ const SCEV *ScalarEvolution::getTruncateExpr(const SCEV *Op, Type *Ty, SCEV *S = new (SCEVAllocator) SCEVTruncateExpr(ID.Intern(SCEVAllocator), Op, Ty); UniqueSCEVs.InsertNode(S, IP); - addToLoopUseLists(S); registerUser(S, Op); return S; } @@ -1274,7 +1272,6 @@ const SCEV *ScalarEvolution::getTruncateExpr(const SCEV *Op, Type *Ty, SCEV *S = new (SCEVAllocator) SCEVTruncateExpr(ID.Intern(SCEVAllocator), Op, Ty); UniqueSCEVs.InsertNode(S, IP); - addToLoopUseLists(S); registerUser(S, Op); return S; } @@ -1604,7 +1601,6 @@ ScalarEvolution::getZeroExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth) { SCEV *S = new (SCEVAllocator) SCEVZeroExtendExpr(ID.Intern(SCEVAllocator), Op, Ty); UniqueSCEVs.InsertNode(S, IP); - addToLoopUseLists(S); registerUser(S, Op); return S; } @@ -1874,7 +1870,6 @@ ScalarEvolution::getZeroExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth) { SCEV *S = new (SCEVAllocator) SCEVZeroExtendExpr(ID.Intern(SCEVAllocator), Op, Ty); UniqueSCEVs.InsertNode(S, IP); - addToLoopUseLists(S); registerUser(S, Op); return S; } @@ -1914,7 +1909,6 @@ ScalarEvolution::getSignExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth) { SCEV *S = new (SCEVAllocator) SCEVSignExtendExpr(ID.Intern(SCEVAllocator), Op, Ty); UniqueSCEVs.InsertNode(S, IP); - addToLoopUseLists(S); registerUser(S, Op); return S; } @@ -2112,7 +2106,6 @@ ScalarEvolution::getSignExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth) { SCEV *S = new (SCEVAllocator) SCEVSignExtendExpr(ID.Intern(SCEVAllocator), Op, Ty); UniqueSCEVs.InsertNode(S, IP); - addToLoopUseLists(S); registerUser(S, { Op }); return S; } @@ -2898,7 +2891,6 @@ ScalarEvolution::getOrCreateAddExpr(ArrayRef Ops, S = new (SCEVAllocator) SCEVAddExpr(ID.Intern(SCEVAllocator), O, Ops.size()); UniqueSCEVs.InsertNode(S, IP); - addToLoopUseLists(S); registerUser(S, Ops); } S->setNoWrapFlags(Flags); @@ -2922,7 +2914,7 @@ ScalarEvolution::getOrCreateAddRecExpr(ArrayRef Ops, S = new (SCEVAllocator) SCEVAddRecExpr(ID.Intern(SCEVAllocator), O, Ops.size(), L); UniqueSCEVs.InsertNode(S, IP); - addToLoopUseLists(S); + LoopUsers[L].push_back(S); registerUser(S, Ops); } setNoWrapFlags(S, Flags); @@ -2945,7 +2937,6 @@ ScalarEvolution::getOrCreateMulExpr(ArrayRef Ops, S = new (SCEVAllocator) SCEVMulExpr(ID.Intern(SCEVAllocator), O, Ops.size()); UniqueSCEVs.InsertNode(S, IP); - addToLoopUseLists(S); registerUser(S, Ops); } S->setNoWrapFlags(Flags); @@ -3455,7 +3446,6 @@ const SCEV *ScalarEvolution::getUDivExpr(const SCEV *LHS, SCEV *S = new (SCEVAllocator) SCEVUDivExpr(ID.Intern(SCEVAllocator), LHS, RHS); UniqueSCEVs.InsertNode(S, IP); - addToLoopUseLists(S); registerUser(S, {LHS, RHS}); return S; } @@ -3850,7 +3840,6 @@ const SCEV *ScalarEvolution::getMinMaxExpr(SCEVTypes Kind, SCEVMinMaxExpr(ID.Intern(SCEVAllocator), Kind, O, Ops.size()); UniqueSCEVs.InsertNode(S, IP); - addToLoopUseLists(S); registerUser(S, Ops); return S; } @@ -6589,6 +6578,26 @@ ScalarEvolution::getNonTrivialDefiningScopeBound(const SCEV *S) { return nullptr; } +/// Fills \p Ops with unique operands of \p S, if it has operands. If not, +/// \p Ops remains unmodified. +static void collectUniqueOps(const SCEV *S, + SmallVectorImpl &Ops) { + SmallPtrSet Unique; + auto InsertUnique = [&](const SCEV *S) { + if (Unique.insert(S).second) + Ops.push_back(S); + }; + if (auto *S2 = dyn_cast(S)) + for (auto *Op : S2->operands()) + InsertUnique(Op); + else if (auto *S2 = dyn_cast(S)) + for (auto *Op : S2->operands()) + InsertUnique(Op); + else if (auto *S2 = dyn_cast(S)) + for (auto *Op : S2->operands()) + InsertUnique(Op); +} + const Instruction * ScalarEvolution::getDefiningScopeBound(ArrayRef Ops) { // Do a bounded search of the def relation of the requested SCEVs. @@ -6612,15 +6621,12 @@ ScalarEvolution::getDefiningScopeBound(ArrayRef Ops) { if (auto *DefI = getNonTrivialDefiningScopeBound(S)) { if (!Bound || DT.dominates(Bound, DefI)) Bound = DefI; - } else if (auto *S2 = dyn_cast(S)) - for (auto *Op : S2->operands()) - pushOp(Op); - else if (auto *S2 = dyn_cast(S)) - for (auto *Op : S2->operands()) - pushOp(Op); - else if (auto *S2 = dyn_cast(S)) - for (auto *Op : S2->operands()) + } else { + SmallVector Ops; + collectUniqueOps(S, Ops); + for (auto *Op : Ops) pushOp(Op); + } } return Bound ? Bound : &*F.getEntryBlock().begin(); } @@ -7263,6 +7269,131 @@ unsigned ScalarEvolution::getSmallConstantMaxTripCount(const Loop *L) { return getConstantTripCount(MaxExitCount); } +const SCEV *ScalarEvolution::getConstantMaxTripCountFromArray(const Loop *L) { + // We can't infer from Array in Irregular Loop. + // FIXME: It's hard to infer loop bound from array operated in Nested Loop. + if (!L->isLoopSimplifyForm() || !L->isInnermost()) + return getCouldNotCompute(); + + // FIXME: To make the scene more typical, we only analysis loops that have + // one exiting block and that block must be the latch. To make it easier to + // capture loops that have memory access and memory access will be executed + // in each iteration. + const BasicBlock *LoopLatch = L->getLoopLatch(); + assert(LoopLatch && "See defination of simplify form loop."); + if (L->getExitingBlock() != LoopLatch) + return getCouldNotCompute(); + + const DataLayout &DL = getDataLayout(); + SmallVector InferCountColl; + for (auto *BB : L->getBlocks()) { + // Go here, we can know that Loop is a single exiting and simplified form + // loop. Make sure that infer from Memory Operation in those BBs must be + // executed in loop. First step, we can make sure that max execution time + // of MemAccessBB in loop represents latch max excution time. + // If MemAccessBB does not dom Latch, skip. + // Entry + // │ + // ┌─────▼─────┐ + // │Loop Header◄─────┐ + // └──┬──────┬─┘ │ + // │ │ │ + // ┌────────▼──┐ ┌─▼─────┐ │ + // │MemAccessBB│ │OtherBB│ │ + // └────────┬──┘ └─┬─────┘ │ + // │ │ │ + // ┌─▼──────▼─┐ │ + // │Loop Latch├─────┘ + // └────┬─────┘ + // ▼ + // Exit + if (!DT.dominates(BB, LoopLatch)) + continue; + + for (Instruction &Inst : *BB) { + // Find Memory Operation Instruction. + auto *GEP = getLoadStorePointerOperand(&Inst); + if (!GEP) + continue; + + auto *ElemSize = dyn_cast(getElementSize(&Inst)); + // Do not infer from scalar type, eg."ElemSize = sizeof()". + if (!ElemSize) + continue; + + // Use a existing polynomial recurrence on the trip count. + auto *AddRec = dyn_cast(getSCEV(GEP)); + if (!AddRec) + continue; + auto *ArrBase = dyn_cast(getPointerBase(AddRec)); + auto *Step = dyn_cast(AddRec->getStepRecurrence(*this)); + if (!ArrBase || !Step) + continue; + assert(isLoopInvariant(ArrBase, L) && "See addrec definition"); + + // Only handle { %array + step }, + // FIXME: {(SCEVAddRecExpr) + step } could not be analysed here. + if (AddRec->getStart() != ArrBase) + continue; + + // Memory operation pattern which have gaps. + // Or repeat memory opreation. + // And index of GEP wraps arround. + if (Step->getAPInt().getActiveBits() > 32 || + Step->getAPInt().getZExtValue() != + ElemSize->getAPInt().getZExtValue() || + Step->isZero() || Step->getAPInt().isNegative()) + continue; + + // Only infer from stack array which has certain size. + // Make sure alloca instruction is not excuted in loop. + AllocaInst *AllocateInst = dyn_cast(ArrBase->getValue()); + if (!AllocateInst || L->contains(AllocateInst->getParent())) + continue; + + // Make sure only handle normal array. + auto *Ty = dyn_cast(AllocateInst->getAllocatedType()); + auto *ArrSize = dyn_cast(AllocateInst->getArraySize()); + if (!Ty || !ArrSize || !ArrSize->isOne()) + continue; + // Also make sure step was increased the same with sizeof allocated + // element type. + const PointerType *GEPT = dyn_cast(GEP->getType()); + if (Ty->getElementType() != GEPT->getElementType()) + continue; + + // FIXME: Since gep indices are silently zext to the indexing type, + // we will have a narrow gep index which wraps around rather than + // increasing strictly, we shoule ensure that step is increasing + // strictly by the loop iteration. + // Now we can infer a max execution time by MemLength/StepLength. + const SCEV *MemSize = + getConstant(Step->getType(), DL.getTypeAllocSize(Ty)); + auto *MaxExeCount = + dyn_cast(getUDivCeilSCEV(MemSize, Step)); + if (!MaxExeCount || MaxExeCount->getAPInt().getActiveBits() > 32) + continue; + + // If the loop reaches the maximum number of executions, we can not + // access bytes starting outside the statically allocated size without + // being immediate UB. But it is allowed to enter loop header one more + // time. + auto *InferCount = dyn_cast( + getAddExpr(MaxExeCount, getOne(MaxExeCount->getType()))); + // Discard the maximum number of execution times under 32bits. + if (!InferCount || InferCount->getAPInt().getActiveBits() > 32) + continue; + + InferCountColl.push_back(InferCount); + } + } + + if (InferCountColl.size() == 0) + return getCouldNotCompute(); + + return getUMinFromMismatchedTypes(InferCountColl); +} + unsigned ScalarEvolution::getSmallConstantTripMultiple(const Loop *L) { SmallVector ExitingBlocks; L->getExitingBlocks(ExitingBlocks); @@ -11661,9 +11792,34 @@ ScalarEvolution::howManyLessThans(const SCEV *LHS, const SCEV *RHS, SmallVector Operands{AR->operands()}; Flags = StrengthenNoWrapFlags(this, scAddRecExpr, Operands, Flags); - - setNoWrapFlags(const_cast(AR), Flags); } + + auto canProveNUW = [&]() { + if (!isLoopInvariant(RHS, L)) + return false; + + if (!isKnownNonZero(AR->getStepRecurrence(*this))) + // We need the sequence defined by AR to strictly increase in the + // unsigned integer domain for the logic below to hold. + return false; + + const unsigned InnerBitWidth = getTypeSizeInBits(AR->getType()); + const unsigned OuterBitWidth = getTypeSizeInBits(RHS->getType()); + // If RHS <=u Limit, then there must exist a value V in the sequence + // defined by AR (e.g. {Start,+,Step}) such that V >u RHS, and + // V <=u UINT_MAX. Thus, we must exit the loop before unsigned + // overflow occurs. This limit also implies that a signed comparison + // (in the wide bitwidth) is equivalent to an unsigned comparison as + // the high bits on both sides must be zero. + APInt StrideMax = getUnsignedRangeMax(AR->getStepRecurrence(*this)); + APInt Limit = APInt::getMaxValue(InnerBitWidth) - (StrideMax - 1); + Limit = Limit.zext(OuterBitWidth); + return getUnsignedRangeMax(applyLoopGuards(RHS, L)).ule(Limit); + }; + if (!hasFlags(Flags, SCEV::FlagNUW) && canProveNUW()) + Flags = setFlags(Flags, SCEV::FlagNUW); + + setNoWrapFlags(const_cast(AR), Flags); if (AR->hasNoUnsignedWrap()) { // Emulate what getZeroExtendExpr would have done during construction // if we'd been able to infer the fact just above at that time. @@ -12540,7 +12696,7 @@ ScalarEvolution::getLoopDisposition(const SCEV *S, const Loop *L) { Values.emplace_back(L, LoopVariant); LoopDisposition D = computeLoopDisposition(S, L); auto &Values2 = LoopDispositions[S]; - for (auto &V : make_range(Values2.rbegin(), Values2.rend())) { + for (auto &V : llvm::reverse(Values2)) { if (V.getPointer() == L) { V.setInt(D); break; @@ -12648,7 +12804,7 @@ ScalarEvolution::getBlockDisposition(const SCEV *S, const BasicBlock *BB) { Values.emplace_back(BB, DoesNotDominateBlock); BlockDisposition D = computeBlockDisposition(S, BB); auto &Values2 = BlockDispositions[S]; - for (auto &V : make_range(Values2.rbegin(), Values2.rend())) { + for (auto &V : llvm::reverse(Values2)) { if (V.getPointer() == BB) { V.setInt(D); break; @@ -12808,13 +12964,6 @@ ScalarEvolution::getUsedLoops(const SCEV *S, SCEVTraversal(F).visitAll(S); } -void ScalarEvolution::addToLoopUseLists(const SCEV *S) { - SmallPtrSet LoopsUsed; - getUsedLoops(S, LoopsUsed); - for (auto *L : LoopsUsed) - LoopUsers[L].push_back(S); -} - void ScalarEvolution::verify() const { ScalarEvolution &SE = *const_cast(this); ScalarEvolution SE2(F, TLI, AC, DT, LI); @@ -12908,15 +13057,8 @@ void ScalarEvolution::verify() const { // Verify intergity of SCEV users. for (const auto &S : UniqueSCEVs) { - SmallPtrSet Ops; - if (const auto *NS = dyn_cast(&S)) - Ops.insert(NS->op_begin(), NS->op_end()); - else if (const auto *CS = dyn_cast(&S)) - Ops.insert(CS->getOperand()); - else if (const auto *DS = dyn_cast(&S)) { - Ops.insert(DS->getLHS()); - Ops.insert(DS->getRHS()); - } + SmallVector Ops; + collectUniqueOps(&S, Ops); for (const auto *Op : Ops) { // We do not store dependencies of constants. if (isa(Op)) diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp index 800896fa6a053..3200c72546b75 100644 --- a/llvm/lib/Analysis/TargetTransformInfo.cpp +++ b/llvm/lib/Analysis/TargetTransformInfo.cpp @@ -604,6 +604,10 @@ Optional TargetTransformInfo::getMaxVScale() const { return TTIImpl->getMaxVScale(); } +Optional TargetTransformInfo::getVScaleForTuning() const { + return TTIImpl->getVScaleForTuning(); +} + bool TargetTransformInfo::shouldMaximizeVectorBandwidth() const { return TTIImpl->shouldMaximizeVectorBandwidth(); } @@ -824,6 +828,24 @@ InstructionCost TargetTransformInfo::getVectorInstrCost(unsigned Opcode, return Cost; } +InstructionCost TargetTransformInfo::getReplicationShuffleCost( + Type *EltTy, int ReplicationFactor, int VF, const APInt &DemandedSrcElts, + const APInt &DemandedReplicatedElts, TTI::TargetCostKind CostKind) { + InstructionCost Cost = TTIImpl->getReplicationShuffleCost( + EltTy, ReplicationFactor, VF, DemandedSrcElts, DemandedReplicatedElts, + CostKind); + assert(Cost >= 0 && "TTI should not produce negative costs!"); + return Cost; +} +InstructionCost TargetTransformInfo::getReplicationShuffleCost( + Type *EltTy, int ReplicationFactor, int VF, ArrayRef Mask, + TTI::TargetCostKind CostKind) { + InstructionCost Cost = TTIImpl->getReplicationShuffleCost( + EltTy, ReplicationFactor, VF, Mask, CostKind); + assert(Cost >= 0 && "TTI should not produce negative costs!"); + return Cost; +} + InstructionCost TargetTransformInfo::getMemoryOpCost( unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, const Instruction *I) const { diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp index 0585a63a00af2..0e0701714e6e6 100644 --- a/llvm/lib/Analysis/ValueTracking.cpp +++ b/llvm/lib/Analysis/ValueTracking.cpp @@ -396,6 +396,14 @@ unsigned llvm::ComputeNumSignBits(const Value *V, const DataLayout &DL, V, Depth, Query(DL, AC, safeCxtI(V, CxtI), DT, UseInstrInfo)); } +unsigned llvm::ComputeMinSignedBits(const Value *V, const DataLayout &DL, + unsigned Depth, AssumptionCache *AC, + const Instruction *CxtI, + const DominatorTree *DT) { + unsigned SignBits = ComputeNumSignBits(V, DL, Depth, AC, CxtI, DT); + return V->getType()->getScalarSizeInBits() - SignBits + 1; +} + static void computeKnownBitsAddSub(bool Add, const Value *Op0, const Value *Op1, bool NSW, const APInt &DemandedElts, KnownBits &KnownOut, KnownBits &Known2, diff --git a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp index 9ba76022e6549..c568461e62b05 100644 --- a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp +++ b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp @@ -2279,7 +2279,11 @@ Error BitcodeReader::resolveGlobalAndIndirectSymbolInits() { return error("Alias and aliasee types don't match"); GA->setAliasee(C); } else if (auto *GI = dyn_cast(GV)) { - GI->setResolver(C); + Type *ResolverFTy = + GlobalIFunc::getResolverFunctionType(GI->getValueType()); + // Transparently fix up the type for compatiblity with older bitcode + GI->setResolver( + ConstantExpr::getBitCast(C, ResolverFTy->getPointerTo())); } else { return error("Expected an alias or an ifunc"); } @@ -5534,21 +5538,16 @@ Error BitcodeReader::materialize(GlobalValue *GV) { // Upgrade any old intrinsic calls in the function. for (auto &I : UpgradedIntrinsics) { - for (auto UI = I.first->materialized_user_begin(), UE = I.first->user_end(); - UI != UE;) { - User *U = *UI; - ++UI; + for (User *U : llvm::make_early_inc_range(I.first->materialized_users())) if (CallInst *CI = dyn_cast(U)) UpgradeIntrinsicCall(CI, I.second); - } } // Update calls to the remangled intrinsics for (auto &I : RemangledIntrinsics) - for (auto UI = I.first->materialized_user_begin(), UE = I.first->user_end(); - UI != UE;) + for (User *U : llvm::make_early_inc_range(I.first->materialized_users())) // Don't expect any other users than call sites - cast(*UI++)->setCalledFunction(I.second); + cast(U)->setCalledFunction(I.second); // Finish fn->subprogram upgrade for materialized functions. if (DISubprogram *SP = MDLoader->lookupSubprogramForFunction(F)) diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index 54aaa335d24be..cc848d28a9a7a 100644 --- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -1415,7 +1415,7 @@ void AsmPrinter::emitFunctionBody() { }); R << "BasicBlock: " << ore::NV("BasicBlock", MBB.getName()) << "\n"; for (auto &KV : MnemonicVec) { - auto Name = (Twine("INST_") + KV.first.trim()).str(); + auto Name = (Twine("INST_") + getToken(KV.first.trim()).first).str(); R << KV.first << ": " << ore::NV(Name, KV.second) << "\n"; } ORE->emit(R); diff --git a/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp b/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp index 8d828f9b4ce3c..f997dc89b398d 100644 --- a/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp @@ -561,6 +561,44 @@ void CodeViewDebug::emitCodeViewMagicVersion() { OS.emitInt32(COFF::DEBUG_SECTION_MAGIC); } +static SourceLanguage MapDWLangToCVLang(unsigned DWLang) { + switch (DWLang) { + case dwarf::DW_LANG_C: + case dwarf::DW_LANG_C89: + case dwarf::DW_LANG_C99: + case dwarf::DW_LANG_C11: + case dwarf::DW_LANG_ObjC: + return SourceLanguage::C; + case dwarf::DW_LANG_C_plus_plus: + case dwarf::DW_LANG_C_plus_plus_03: + case dwarf::DW_LANG_C_plus_plus_11: + case dwarf::DW_LANG_C_plus_plus_14: + return SourceLanguage::Cpp; + case dwarf::DW_LANG_Fortran77: + case dwarf::DW_LANG_Fortran90: + case dwarf::DW_LANG_Fortran95: + case dwarf::DW_LANG_Fortran03: + case dwarf::DW_LANG_Fortran08: + return SourceLanguage::Fortran; + case dwarf::DW_LANG_Pascal83: + return SourceLanguage::Pascal; + case dwarf::DW_LANG_Cobol74: + case dwarf::DW_LANG_Cobol85: + return SourceLanguage::Cobol; + case dwarf::DW_LANG_Java: + return SourceLanguage::Java; + case dwarf::DW_LANG_D: + return SourceLanguage::D; + case dwarf::DW_LANG_Swift: + return SourceLanguage::Swift; + default: + // There's no CodeView representation for this language, and CV doesn't + // have an "unknown" option for the language field, so we'll use MASM, + // as it's very low level. + return SourceLanguage::Masm; + } +} + void CodeViewDebug::beginModule(Module *M) { // If module doesn't have named metadata anchors or COFF debug section // is not available, skip any debug info related stuff. @@ -574,6 +612,13 @@ void CodeViewDebug::beginModule(Module *M) { TheCPU = mapArchToCVCPUType(Triple(M->getTargetTriple()).getArch()); + // Get the current source language. + NamedMDNode *CUs = MMI->getModule()->getNamedMetadata("llvm.dbg.cu"); + const MDNode *Node = *CUs->operands().begin(); + const auto *CU = cast(Node); + + CurrentSourceLanguage = MapDWLangToCVLang(CU->getSourceLanguage()); + collectGlobalVariableInfo(); // Check if we should emit type record hashes. @@ -731,44 +776,6 @@ void CodeViewDebug::emitTypeGlobalHashes() { } } -static SourceLanguage MapDWLangToCVLang(unsigned DWLang) { - switch (DWLang) { - case dwarf::DW_LANG_C: - case dwarf::DW_LANG_C89: - case dwarf::DW_LANG_C99: - case dwarf::DW_LANG_C11: - case dwarf::DW_LANG_ObjC: - return SourceLanguage::C; - case dwarf::DW_LANG_C_plus_plus: - case dwarf::DW_LANG_C_plus_plus_03: - case dwarf::DW_LANG_C_plus_plus_11: - case dwarf::DW_LANG_C_plus_plus_14: - return SourceLanguage::Cpp; - case dwarf::DW_LANG_Fortran77: - case dwarf::DW_LANG_Fortran90: - case dwarf::DW_LANG_Fortran95: - case dwarf::DW_LANG_Fortran03: - case dwarf::DW_LANG_Fortran08: - return SourceLanguage::Fortran; - case dwarf::DW_LANG_Pascal83: - return SourceLanguage::Pascal; - case dwarf::DW_LANG_Cobol74: - case dwarf::DW_LANG_Cobol85: - return SourceLanguage::Cobol; - case dwarf::DW_LANG_Java: - return SourceLanguage::Java; - case dwarf::DW_LANG_D: - return SourceLanguage::D; - case dwarf::DW_LANG_Swift: - return SourceLanguage::Swift; - default: - // There's no CodeView representation for this language, and CV doesn't - // have an "unknown" option for the language field, so we'll use MASM, - // as it's very low level. - return SourceLanguage::Masm; - } -} - namespace { struct Version { int Part[4]; @@ -798,12 +805,8 @@ void CodeViewDebug::emitCompilerInformation() { MCSymbol *CompilerEnd = beginSymbolRecord(SymbolKind::S_COMPILE3); uint32_t Flags = 0; - NamedMDNode *CUs = MMI->getModule()->getNamedMetadata("llvm.dbg.cu"); - const MDNode *Node = *CUs->operands().begin(); - const auto *CU = cast(Node); - // The low byte of the flags indicates the source language. - Flags = MapDWLangToCVLang(CU->getSourceLanguage()); + Flags = CurrentSourceLanguage; // TODO: Figure out which other flags need to be set. if (MMI->getModule()->getProfileSummary(/*IsCS*/ false) != nullptr) { Flags |= static_cast(CompileSym3Flags::PGO); @@ -815,6 +818,10 @@ void CodeViewDebug::emitCompilerInformation() { OS.AddComment("CPUType"); OS.emitInt16(static_cast(TheCPU)); + NamedMDNode *CUs = MMI->getModule()->getNamedMetadata("llvm.dbg.cu"); + const MDNode *Node = *CUs->operands().begin(); + const auto *CU = cast(Node); + StringRef CompilerVersion = CU->getProducer(); Version FrontVer = parseVersion(CompilerVersion); OS.AddComment("Frontend version"); @@ -1574,6 +1581,8 @@ TypeIndex CodeViewDebug::lowerType(const DIType *Ty, const DIType *ClassTy) { return lowerTypeClass(cast(Ty)); case dwarf::DW_TAG_union_type: return lowerTypeUnion(cast(Ty)); + case dwarf::DW_TAG_string_type: + return lowerTypeString(cast(Ty)); case dwarf::DW_TAG_unspecified_type: if (Ty->getName() == "decltype(nullptr)") return TypeIndex::NullptrT(); @@ -1618,14 +1627,19 @@ TypeIndex CodeViewDebug::lowerTypeArray(const DICompositeType *Ty) { const DISubrange *Subrange = cast(Element); int64_t Count = -1; - // Calculate the count if either LowerBound is absent or is zero and - // either of Count or UpperBound are constant. - auto *LI = Subrange->getLowerBound().dyn_cast(); - if (!Subrange->getRawLowerBound() || (LI && (LI->getSExtValue() == 0))) { - if (auto *CI = Subrange->getCount().dyn_cast()) - Count = CI->getSExtValue(); - else if (auto *UI = Subrange->getUpperBound().dyn_cast()) - Count = UI->getSExtValue() + 1; // LowerBound is zero + + // If Subrange has a Count field, use it. + // Otherwise, if it has an upperboud, use (upperbound - lowerbound + 1), + // where lowerbound is from the LowerBound field of the Subrange, + // or the language default lowerbound if that field is unspecified. + if (auto *CI = Subrange->getCount().dyn_cast()) + Count = CI->getSExtValue(); + else if (auto *UI = Subrange->getUpperBound().dyn_cast()) { + // Fortran uses 1 as the default lowerbound; other languages use 0. + int64_t Lowerbound = (moduleIsInFortran()) ? 1 : 0; + auto *LI = Subrange->getLowerBound().dyn_cast(); + Lowerbound = (LI) ? LI->getSExtValue() : Lowerbound; + Count = UI->getSExtValue() - Lowerbound + 1; } // Forward declarations of arrays without a size and VLAs use a count of -1. @@ -1651,6 +1665,26 @@ TypeIndex CodeViewDebug::lowerTypeArray(const DICompositeType *Ty) { return ElementTypeIndex; } +// This function lowers a Fortran character type (DIStringType). +// Note that it handles only the character*n variant (using SizeInBits +// field in DIString to describe the type size) at the moment. +// Other variants (leveraging the StringLength and StringLengthExp +// fields in DIStringType) remain TBD. +TypeIndex CodeViewDebug::lowerTypeString(const DIStringType *Ty) { + TypeIndex CharType = TypeIndex(SimpleTypeKind::NarrowCharacter); + uint64_t ArraySize = Ty->getSizeInBits() >> 3; + StringRef Name = Ty->getName(); + // IndexType is size_t, which depends on the bitness of the target. + TypeIndex IndexType = getPointerSizeInBytes() == 8 + ? TypeIndex(SimpleTypeKind::UInt64Quad) + : TypeIndex(SimpleTypeKind::UInt32Long); + + // Create a type of character array of ArraySize. + ArrayRecord AR(CharType, IndexType, ArraySize, Name); + + return TypeTable.writeLeafType(AR); +} + TypeIndex CodeViewDebug::lowerTypeBasic(const DIBasicType *Ty) { TypeIndex Index; dwarf::TypeKind Kind; @@ -2183,6 +2217,7 @@ void CodeViewDebug::clear() { TypeIndices.clear(); CompleteTypeIndices.clear(); ScopeGlobals.clear(); + CVGlobalVariableOffsets.clear(); } void CodeViewDebug::collectMemberInfo(ClassInfo &Info, @@ -3068,6 +3103,15 @@ void CodeViewDebug::collectGlobalVariableInfo() { const DIGlobalVariable *DIGV = GVE->getVariable(); const DIExpression *DIE = GVE->getExpression(); + if ((DIE->getNumElements() == 2) && + (DIE->getElement(0) == dwarf::DW_OP_plus_uconst)) + // Record the constant offset for the variable. + // + // A Fortran common block uses this idiom to encode the offset + // of a variable from the common block's starting address. + CVGlobalVariableOffsets.insert( + std::make_pair(DIGV, DIE->getElement(1))); + // Emit constant global variables in a global symbol section. if (GlobalMap.count(GVE) == 0 && DIE->isConstant()) { CVGlobalVariable CVGV = {DIGV, DIE}; @@ -3232,7 +3276,11 @@ void CodeViewDebug::emitDebugInfoForGlobal(const CVGlobalVariable &CVGV) { if (const auto *MemberDecl = dyn_cast_or_null( DIGV->getRawStaticDataMemberDeclaration())) Scope = MemberDecl->getScope(); - std::string QualifiedName = getFullyQualifiedName(Scope, DIGV->getName()); + // For Fortran, the scoping portion is elided in its name so that we can + // reference the variable in the command line of the VS debugger. + std::string QualifiedName = + (moduleIsInFortran()) ? std::string(DIGV->getName()) + : getFullyQualifiedName(Scope, DIGV->getName()); if (const GlobalVariable *GV = CVGV.GVInfo.dyn_cast()) { @@ -3248,7 +3296,13 @@ void CodeViewDebug::emitDebugInfoForGlobal(const CVGlobalVariable &CVGV) { OS.AddComment("Type"); OS.emitInt32(getCompleteTypeIndex(DIGV->getType()).getIndex()); OS.AddComment("DataOffset"); - OS.EmitCOFFSecRel32(GVSym, /*Offset=*/0); + + uint64_t Offset = 0; + if (CVGlobalVariableOffsets.find(DIGV) != CVGlobalVariableOffsets.end()) + // Use the offset seen while collecting info on globals. + Offset = CVGlobalVariableOffsets[DIGV]; + OS.EmitCOFFSecRel32(GVSym, Offset); + OS.AddComment("Segment"); OS.EmitCOFFSectionIndex(GVSym); OS.AddComment("Name"); diff --git a/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h b/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h index d133474ee5aab..6f88e15ee8fee 100644 --- a/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h +++ b/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h @@ -186,6 +186,13 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase { }; FunctionInfo *CurFn = nullptr; + codeview::SourceLanguage CurrentSourceLanguage = + codeview::SourceLanguage::Masm; + + // This map records the constant offset in DIExpression of the + // DIGlobalVariableExpression referencing the DIGlobalVariable. + DenseMap CVGlobalVariableOffsets; + // Map used to seperate variables according to the lexical scope they belong // in. This is populated by recordLocalVariable() before // collectLexicalBlocks() separates the variables between the FunctionInfo @@ -400,6 +407,7 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase { codeview::TypeIndex lowerType(const DIType *Ty, const DIType *ClassTy); codeview::TypeIndex lowerTypeAlias(const DIDerivedType *Ty); codeview::TypeIndex lowerTypeArray(const DICompositeType *Ty); + codeview::TypeIndex lowerTypeString(const DIStringType *Ty); codeview::TypeIndex lowerTypeBasic(const DIBasicType *Ty); codeview::TypeIndex lowerTypePointer( const DIDerivedType *Ty, @@ -464,6 +472,11 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase { /// Gather post-function debug information. void endFunctionImpl(const MachineFunction *) override; + /// Check if the current module is in Fortran. + bool moduleIsInFortran() { + return CurrentSourceLanguage == codeview::SourceLanguage::Fortran; + } + public: CodeViewDebug(AsmPrinter *AP); diff --git a/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp b/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp index 802f0e880514e..5f4ee747fcca3 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp @@ -93,19 +93,15 @@ void DIEHash::addParentContext(const DIE &Parent) { // Reverse iterate over our list to go from the outermost construct to the // innermost. - for (SmallVectorImpl::reverse_iterator I = Parents.rbegin(), - E = Parents.rend(); - I != E; ++I) { - const DIE &Die = **I; - + for (const DIE *Die : llvm::reverse(Parents)) { // ... Append the letter "C" to the sequence... addULEB128('C'); // ... Followed by the DWARF tag of the construct... - addULEB128(Die.getTag()); + addULEB128(Die->getTag()); // ... Then the name, taken from the DW_AT_name attribute. - StringRef Name = getDIEStringAttr(Die, dwarf::DW_AT_name); + StringRef Name = getDIEStringAttr(*Die, dwarf::DW_AT_name); LLVM_DEBUG(dbgs() << "... adding context: " << Name << "\n"); if (!Name.empty()) addString(Name); diff --git a/llvm/lib/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp b/llvm/lib/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp index bb24f1414ef1f..dd795079ac1a5 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp @@ -252,8 +252,8 @@ void DbgValueHistoryMap::trimLocationRanges( // Now actually remove the entries. Iterate backwards so that our remaining // ToRemove indices are valid after each erase. - for (auto Itr = ToRemove.rbegin(), End = ToRemove.rend(); Itr != End; ++Itr) - HistoryMapEntries.erase(HistoryMapEntries.begin() + *Itr); + for (EntryIndex Idx : llvm::reverse(ToRemove)) + HistoryMapEntries.erase(HistoryMapEntries.begin() + Idx); } } diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp index f1af9e2373f81..976e359051443 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp @@ -672,7 +672,7 @@ std::string DwarfUnit::getParentContextString(const DIScope *Context) const { // Reverse iterate over our list to go from the outermost construct to the // innermost. - for (const DIScope *Ctx : make_range(Parents.rbegin(), Parents.rend())) { + for (const DIScope *Ctx : llvm::reverse(Parents)) { StringRef Name = Ctx->getName(); if (Name.empty() && isa(Ctx)) Name = "(anonymous namespace)"; diff --git a/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp b/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp index e589c2e64abde..150f193248346 100644 --- a/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp @@ -812,8 +812,7 @@ void EHStreamer::emitTypeInfos(unsigned TTypeEncoding, MCSymbol *TTBaseLabel) { Entry = TypeInfos.size(); } - for (const GlobalValue *GV : make_range(TypeInfos.rbegin(), - TypeInfos.rend())) { + for (const GlobalValue *GV : llvm::reverse(TypeInfos)) { if (VerboseAsm) Asm->OutStreamer->AddComment("TypeInfo " + Twine(Entry--)); Asm->emitTTypeReference(GV, TTypeEncoding); diff --git a/llvm/lib/CodeGen/BranchRelaxation.cpp b/llvm/lib/CodeGen/BranchRelaxation.cpp index 28d336823df74..50825ccf9baca 100644 --- a/llvm/lib/CodeGen/BranchRelaxation.cpp +++ b/llvm/lib/CodeGen/BranchRelaxation.cpp @@ -485,7 +485,7 @@ bool BranchRelaxation::fixupUnconditionalBranch(MachineInstr &MI) { MachineBasicBlock *PrevBB = &*std::prev(DestBB->getIterator()); if (auto *FT = PrevBB->getFallThrough()) { assert(FT == DestBB); - TII->insertUnconditionalBranch(*PrevBB, DestBB, DebugLoc()); + TII->insertUnconditionalBranch(*PrevBB, FT, DebugLoc()); // Recalculate the block size. BlockInfo[PrevBB->getNumber()].Size = computeBlockSize(*PrevBB); } diff --git a/llvm/lib/CodeGen/BreakFalseDeps.cpp b/llvm/lib/CodeGen/BreakFalseDeps.cpp index b11db3e657703..558700bd9b3bb 100644 --- a/llvm/lib/CodeGen/BreakFalseDeps.cpp +++ b/llvm/lib/CodeGen/BreakFalseDeps.cpp @@ -244,7 +244,7 @@ void BreakFalseDeps::processUndefReads(MachineBasicBlock *MBB) { MachineInstr *UndefMI = UndefReads.back().first; unsigned OpIdx = UndefReads.back().second; - for (MachineInstr &I : make_range(MBB->rbegin(), MBB->rend())) { + for (MachineInstr &I : llvm::reverse(*MBB)) { // Update liveness, including the current instruction's defs. LiveRegSet.stepBackward(I); diff --git a/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp b/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp index 6e7db95b5c2a0..c6c0b79cd7e78 100644 --- a/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp +++ b/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp @@ -138,26 +138,22 @@ bool DeadMachineInstructionElim::eliminateDeadMI(MachineFunction &MF) { // Now scan the instructions and delete dead ones, tracking physreg // liveness as we go. - for (MachineBasicBlock::reverse_iterator MII = MBB->rbegin(), - MIE = MBB->rend(); - MII != MIE;) { - MachineInstr *MI = &*MII++; - + for (MachineInstr &MI : llvm::make_early_inc_range(llvm::reverse(*MBB))) { // If the instruction is dead, delete it! - if (isDead(MI)) { - LLVM_DEBUG(dbgs() << "DeadMachineInstructionElim: DELETING: " << *MI); + if (isDead(&MI)) { + LLVM_DEBUG(dbgs() << "DeadMachineInstructionElim: DELETING: " << MI); // It is possible that some DBG_VALUE instructions refer to this // instruction. They get marked as undef and will be deleted // in the live debug variable analysis. - MI->eraseFromParentAndMarkDBGValuesForRemoval(); + MI.eraseFromParentAndMarkDBGValuesForRemoval(); AnyChanges = true; ++NumDeletes; continue; } // Record the physreg defs. - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - const MachineOperand &MO = MI->getOperand(i); + for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI.getOperand(i); if (MO.isReg() && MO.isDef()) { Register Reg = MO.getReg(); if (Register::isPhysicalRegister(Reg)) { @@ -175,8 +171,8 @@ bool DeadMachineInstructionElim::eliminateDeadMI(MachineFunction &MF) { } // Record the physreg uses, after the defs, in case a physreg is // both defined and used in the same instruction. - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - const MachineOperand &MO = MI->getOperand(i); + for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI.getOperand(i); if (MO.isReg() && MO.isUse()) { Register Reg = MO.getReg(); if (Register::isPhysicalRegister(Reg)) { diff --git a/llvm/lib/CodeGen/GCRootLowering.cpp b/llvm/lib/CodeGen/GCRootLowering.cpp index 58269e172c573..a46d197553573 100644 --- a/llvm/lib/CodeGen/GCRootLowering.cpp +++ b/llvm/lib/CodeGen/GCRootLowering.cpp @@ -193,8 +193,8 @@ bool LowerIntrinsics::DoLowering(Function &F, GCStrategy &S) { bool MadeChange = false; for (BasicBlock &BB : F) - for (BasicBlock::iterator II = BB.begin(), E = BB.end(); II != E;) { - IntrinsicInst *CI = dyn_cast(II++); + for (Instruction &I : llvm::make_early_inc_range(BB)) { + IntrinsicInst *CI = dyn_cast(&I); if (!CI) continue; diff --git a/llvm/lib/CodeGen/GlobalISel/Combiner.cpp b/llvm/lib/CodeGen/GlobalISel/Combiner.cpp index 6f103bca6892f..381c6df5c97ad 100644 --- a/llvm/lib/CodeGen/GlobalISel/Combiner.cpp +++ b/llvm/lib/CodeGen/GlobalISel/Combiner.cpp @@ -130,16 +130,15 @@ bool Combiner::combineMachineInstrs(MachineFunction &MF, WrapperObserver.addObserver(CSEInfo); RAIIDelegateInstaller DelInstall(MF, &WrapperObserver); for (MachineBasicBlock *MBB : post_order(&MF)) { - for (auto MII = MBB->rbegin(), MIE = MBB->rend(); MII != MIE;) { - MachineInstr *CurMI = &*MII; - ++MII; + for (MachineInstr &CurMI : + llvm::make_early_inc_range(llvm::reverse(*MBB))) { // Erase dead insts before even adding to the list. - if (isTriviallyDead(*CurMI, *MRI)) { - LLVM_DEBUG(dbgs() << *CurMI << "Is dead; erasing.\n"); - CurMI->eraseFromParentAndMarkDBGValuesForRemoval(); + if (isTriviallyDead(CurMI, *MRI)) { + LLVM_DEBUG(dbgs() << CurMI << "Is dead; erasing.\n"); + CurMI.eraseFromParentAndMarkDBGValuesForRemoval(); continue; } - WorkList.deferred_insert(CurMI); + WorkList.deferred_insert(&CurMI); } } WorkList.finalize(); diff --git a/llvm/lib/CodeGen/InlineSpiller.cpp b/llvm/lib/CodeGen/InlineSpiller.cpp index 992b2e6287425..64e1f43514562 100644 --- a/llvm/lib/CodeGen/InlineSpiller.cpp +++ b/llvm/lib/CodeGen/InlineSpiller.cpp @@ -341,9 +341,8 @@ void InlineSpiller::collectRegsToSpill() { if (Original == Reg) return; - for (MachineRegisterInfo::reg_instr_iterator - RI = MRI.reg_instr_begin(Reg), E = MRI.reg_instr_end(); RI != E; ) { - MachineInstr &MI = *RI++; + for (MachineInstr &MI : + llvm::make_early_inc_range(MRI.reg_instructions(Reg))) { Register SnipReg = isFullCopyOf(MI, Reg); if (!isSibling(SnipReg)) continue; @@ -465,10 +464,8 @@ void InlineSpiller::eliminateRedundantSpills(LiveInterval &SLI, VNInfo *VNI) { LLVM_DEBUG(dbgs() << "Merged to stack int: " << *StackInt << '\n'); // Find all spills and copies of VNI. - for (MachineRegisterInfo::use_instr_nodbg_iterator - UI = MRI.use_instr_nodbg_begin(Reg), E = MRI.use_instr_nodbg_end(); - UI != E; ) { - MachineInstr &MI = *UI++; + for (MachineInstr &MI : + llvm::make_early_inc_range(MRI.use_nodbg_instructions(Reg))) { if (!MI.isCopy() && !MI.mayStore()) continue; SlotIndex Idx = LIS.getInstructionIndex(MI); @@ -676,11 +673,7 @@ void InlineSpiller::reMaterializeAll() { bool anyRemat = false; for (Register Reg : RegsToSpill) { LiveInterval &LI = LIS.getInterval(Reg); - for (MachineRegisterInfo::reg_bundle_iterator - RegI = MRI.reg_bundle_begin(Reg), E = MRI.reg_bundle_end(); - RegI != E; ) { - MachineInstr &MI = *RegI++; - + for (MachineInstr &MI : llvm::make_early_inc_range(MRI.reg_bundles(Reg))) { // Debug values are not allowed to affect codegen. if (MI.isDebugValue()) continue; @@ -1071,57 +1064,53 @@ void InlineSpiller::spillAroundUses(Register Reg) { LiveInterval &OldLI = LIS.getInterval(Reg); // Iterate over instructions using Reg. - for (MachineRegisterInfo::reg_bundle_iterator - RegI = MRI.reg_bundle_begin(Reg), E = MRI.reg_bundle_end(); - RegI != E; ) { - MachineInstr *MI = &*(RegI++); - + for (MachineInstr &MI : llvm::make_early_inc_range(MRI.reg_bundles(Reg))) { // Debug values are not allowed to affect codegen. - if (MI->isDebugValue()) { + if (MI.isDebugValue()) { // Modify DBG_VALUE now that the value is in a spill slot. - MachineBasicBlock *MBB = MI->getParent(); - LLVM_DEBUG(dbgs() << "Modifying debug info due to spill:\t" << *MI); - buildDbgValueForSpill(*MBB, MI, *MI, StackSlot, Reg); + MachineBasicBlock *MBB = MI.getParent(); + LLVM_DEBUG(dbgs() << "Modifying debug info due to spill:\t" << MI); + buildDbgValueForSpill(*MBB, &MI, MI, StackSlot, Reg); MBB->erase(MI); continue; } - assert(!MI->isDebugInstr() && "Did not expect to find a use in debug " + assert(!MI.isDebugInstr() && "Did not expect to find a use in debug " "instruction that isn't a DBG_VALUE"); // Ignore copies to/from snippets. We'll delete them. - if (SnippetCopies.count(MI)) + if (SnippetCopies.count(&MI)) continue; // Stack slot accesses may coalesce away. - if (coalesceStackAccess(MI, Reg)) + if (coalesceStackAccess(&MI, Reg)) continue; // Analyze instruction. SmallVector, 8> Ops; - VirtRegInfo RI = AnalyzeVirtRegInBundle(*MI, Reg, &Ops); + VirtRegInfo RI = AnalyzeVirtRegInBundle(MI, Reg, &Ops); // Find the slot index where this instruction reads and writes OldLI. // This is usually the def slot, except for tied early clobbers. - SlotIndex Idx = LIS.getInstructionIndex(*MI).getRegSlot(); + SlotIndex Idx = LIS.getInstructionIndex(MI).getRegSlot(); if (VNInfo *VNI = OldLI.getVNInfoAt(Idx.getRegSlot(true))) if (SlotIndex::isSameInstr(Idx, VNI->def)) Idx = VNI->def; // Check for a sibling copy. - Register SibReg = isFullCopyOf(*MI, Reg); + Register SibReg = isFullCopyOf(MI, Reg); if (SibReg && isSibling(SibReg)) { // This may actually be a copy between snippets. if (isRegToSpill(SibReg)) { - LLVM_DEBUG(dbgs() << "Found new snippet copy: " << *MI); - SnippetCopies.insert(MI); + LLVM_DEBUG(dbgs() << "Found new snippet copy: " << MI); + SnippetCopies.insert(&MI); continue; } if (RI.Writes) { - if (hoistSpillInsideBB(OldLI, *MI)) { + if (hoistSpillInsideBB(OldLI, MI)) { // This COPY is now dead, the value is already in the stack slot. - MI->getOperand(0).setIsDead(); - DeadDefs.push_back(MI); + MI.getOperand(0).setIsDead(); + DeadDefs.push_back(&MI); continue; } } else { @@ -1141,7 +1130,7 @@ void InlineSpiller::spillAroundUses(Register Reg) { Register NewVReg = Edit->createFrom(Reg); if (RI.Reads) - insertReload(NewVReg, Idx, MI); + insertReload(NewVReg, Idx, &MI); // Rewrite instruction operands. bool hasLiveDef = false; @@ -1156,12 +1145,12 @@ void InlineSpiller::spillAroundUses(Register Reg) { hasLiveDef = true; } } - LLVM_DEBUG(dbgs() << "\trewrite: " << Idx << '\t' << *MI << '\n'); + LLVM_DEBUG(dbgs() << "\trewrite: " << Idx << '\t' << MI << '\n'); // FIXME: Use a second vreg if instruction has no tied ops. if (RI.Writes) if (hasLiveDef) - insertSpill(NewVReg, true, MI); + insertSpill(NewVReg, true, &MI); } } @@ -1196,10 +1185,8 @@ void InlineSpiller::spillAll() { // Finally delete the SnippetCopies. for (Register Reg : RegsToSpill) { - for (MachineRegisterInfo::reg_instr_iterator - RI = MRI.reg_instr_begin(Reg), E = MRI.reg_instr_end(); - RI != E; ) { - MachineInstr &MI = *(RI++); + for (MachineInstr &MI : + llvm::make_early_inc_range(MRI.reg_instructions(Reg))) { assert(SnippetCopies.count(&MI) && "Remaining use wasn't a snippet copy"); // FIXME: Do this with a LiveRangeEdit callback. LIS.RemoveMachineInstrFromMaps(MI); diff --git a/llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.cpp b/llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.cpp index fe9eb6dd6e9ee..691977dc34e6e 100644 --- a/llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.cpp +++ b/llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.cpp @@ -52,6 +52,7 @@ static cl::opt InputDbgValueLimit( "Maximum input DBG_VALUE insts supported by debug range extension"), cl::init(50000), cl::Hidden); +namespace { /// Generic LiveDebugValues pass. Calls through to VarLocBasedLDV or /// InstrRefBasedLDV to perform location propagation, via the LDVImpl /// base class. @@ -81,6 +82,7 @@ class LiveDebugValues : public MachineFunctionPass { TargetPassConfig *TPC; MachineDominatorTree MDT; }; +} // namespace char LiveDebugValues::ID = 0; diff --git a/llvm/lib/CodeGen/LiveDebugVariables.cpp b/llvm/lib/CodeGen/LiveDebugVariables.cpp index c929c1dfc0daf..dcd546f9c6dbe 100644 --- a/llvm/lib/CodeGen/LiveDebugVariables.cpp +++ b/llvm/lib/CodeGen/LiveDebugVariables.cpp @@ -1294,13 +1294,9 @@ bool LDVImpl::runOnMachineFunction(MachineFunction &mf, bool InstrRef) { static void removeDebugInstrs(MachineFunction &mf) { for (MachineBasicBlock &MBB : mf) { - for (auto MBBI = MBB.begin(), MBBE = MBB.end(); MBBI != MBBE; ) { - if (!MBBI->isDebugInstr()) { - ++MBBI; - continue; - } - MBBI = MBB.erase(MBBI); - } + for (MachineInstr &MI : llvm::make_early_inc_range(MBB)) + if (MI.isDebugInstr()) + MBB.erase(&MI); } } diff --git a/llvm/lib/CodeGen/LiveIntervalUnion.cpp b/llvm/lib/CodeGen/LiveIntervalUnion.cpp index dfa523d4bf41c..50b31e1eb247b 100644 --- a/llvm/lib/CodeGen/LiveIntervalUnion.cpp +++ b/llvm/lib/CodeGen/LiveIntervalUnion.cpp @@ -112,7 +112,7 @@ LiveInterval *LiveIntervalUnion::getOneVReg() const { // Scan the vector of interfering virtual registers in this union. Assume it's // quite small. bool LiveIntervalUnion::Query::isSeenInterference(LiveInterval *VirtReg) const { - return is_contained(*InterferingVRegs, VirtReg); + return is_contained(InterferingVRegs, VirtReg); } // Collect virtual registers in this union that interfere with this @@ -124,14 +124,11 @@ bool LiveIntervalUnion::Query::isSeenInterference(LiveInterval *VirtReg) const { // 2. SeenAllInterferences == true: InterferingVRegs complete, iterators unused. // 3. Iterators left at the last seen intersection. // -unsigned LiveIntervalUnion::Query:: -collectInterferingVRegs(unsigned MaxInterferingRegs) { - if (!InterferingVRegs) - InterferingVRegs.emplace(); - +unsigned +LiveIntervalUnion::Query::collectInterferingVRegs(unsigned MaxInterferingRegs) { // Fast path return if we already have the desired information. - if (SeenAllInterferences || InterferingVRegs->size() >= MaxInterferingRegs) - return InterferingVRegs->size(); + if (SeenAllInterferences || InterferingVRegs.size() >= MaxInterferingRegs) + return InterferingVRegs.size(); // Set up iterators on the first call. if (!CheckedFirstInterference) { @@ -160,14 +157,14 @@ collectInterferingVRegs(unsigned MaxInterferingRegs) { LiveInterval *VReg = LiveUnionI.value(); if (VReg != RecentReg && !isSeenInterference(VReg)) { RecentReg = VReg; - InterferingVRegs->push_back(VReg); - if (InterferingVRegs->size() >= MaxInterferingRegs) - return InterferingVRegs->size(); + InterferingVRegs.push_back(VReg); + if (InterferingVRegs.size() >= MaxInterferingRegs) + return InterferingVRegs.size(); } // This LiveUnion segment is no longer interesting. if (!(++LiveUnionI).valid()) { SeenAllInterferences = true; - return InterferingVRegs->size(); + return InterferingVRegs.size(); } } @@ -188,7 +185,7 @@ collectInterferingVRegs(unsigned MaxInterferingRegs) { LiveUnionI.advanceTo(LRI->start); } SeenAllInterferences = true; - return InterferingVRegs->size(); + return InterferingVRegs.size(); } void LiveIntervalUnion::Array::init(LiveIntervalUnion::Allocator &Alloc, diff --git a/llvm/lib/CodeGen/LivePhysRegs.cpp b/llvm/lib/CodeGen/LivePhysRegs.cpp index c0c7848139e4d..d4848f16dcf2d 100644 --- a/llvm/lib/CodeGen/LivePhysRegs.cpp +++ b/llvm/lib/CodeGen/LivePhysRegs.cpp @@ -81,22 +81,24 @@ void LivePhysRegs::stepForward(const MachineInstr &MI, SmallVectorImpl> &Clobbers) { // Remove killed registers from the set. for (ConstMIBundleOperands O(MI); O.isValid(); ++O) { - if (O->isReg() && !O->isDebug()) { + if (O->isReg()) { + if (O->isDebug()) + continue; Register Reg = O->getReg(); - if (!Register::isPhysicalRegister(Reg)) + if (!Reg.isPhysical()) continue; if (O->isDef()) { // Note, dead defs are still recorded. The caller should decide how to // handle them. Clobbers.push_back(std::make_pair(Reg, &*O)); } else { - if (!O->isKill()) - continue; assert(O->isUse()); - removeReg(Reg); + if (O->isKill()) + removeReg(Reg); } - } else if (O->isRegMask()) + } else if (O->isRegMask()) { removeRegsInMask(*O, &Clobbers); + } } // Add defs to the set. @@ -250,7 +252,7 @@ void llvm::computeLiveIns(LivePhysRegs &LiveRegs, const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo(); LiveRegs.init(TRI); LiveRegs.addLiveOutsNoPristines(MBB); - for (const MachineInstr &MI : make_range(MBB.rbegin(), MBB.rend())) + for (const MachineInstr &MI : llvm::reverse(MBB)) LiveRegs.stepBackward(MI); } @@ -287,7 +289,7 @@ void llvm::recomputeLivenessFlags(MachineBasicBlock &MBB) { LiveRegs.init(TRI); LiveRegs.addLiveOutsNoPristines(MBB); - for (MachineInstr &MI : make_range(MBB.rbegin(), MBB.rend())) { + for (MachineInstr &MI : llvm::reverse(MBB)) { // Recompute dead flags. for (MIBundleOperands MO(MI); MO.isValid(); ++MO) { if (!MO->isReg() || !MO->isDef() || MO->isDebug()) @@ -296,7 +298,7 @@ void llvm::recomputeLivenessFlags(MachineBasicBlock &MBB) { Register Reg = MO->getReg(); if (Reg == 0) continue; - assert(Register::isPhysicalRegister(Reg)); + assert(Reg.isPhysical()); bool IsNotLive = LiveRegs.available(MRI, Reg); @@ -325,7 +327,7 @@ void llvm::recomputeLivenessFlags(MachineBasicBlock &MBB) { Register Reg = MO->getReg(); if (Reg == 0) continue; - assert(Register::isPhysicalRegister(Reg)); + assert(Reg.isPhysical()); bool IsNotLive = LiveRegs.available(MRI, Reg); MO->setIsKill(IsNotLive); diff --git a/llvm/lib/CodeGen/LiveVariables.cpp b/llvm/lib/CodeGen/LiveVariables.cpp index 300a9059de88c..51ba4b7e53eba 100644 --- a/llvm/lib/CodeGen/LiveVariables.cpp +++ b/llvm/lib/CodeGen/LiveVariables.cpp @@ -669,6 +669,86 @@ bool LiveVariables::runOnMachineFunction(MachineFunction &mf) { return false; } +void LiveVariables::recomputeForSingleDefVirtReg(Register Reg) { + assert(Reg.isVirtual()); + + VarInfo &VI = getVarInfo(Reg); + VI.AliveBlocks.clear(); + VI.Kills.clear(); + + MachineInstr &DefMI = *MRI->getUniqueVRegDef(Reg); + MachineBasicBlock &DefBB = *DefMI.getParent(); + + // Handle the case where all uses have been removed. + if (MRI->use_nodbg_empty(Reg)) { + VI.Kills.push_back(&DefMI); + DefMI.addRegisterDead(Reg, nullptr); + return; + } + DefMI.clearRegisterDeads(Reg); + + // Initialize a worklist of BBs that Reg is live-to-end of. (Here + // "live-to-end" means Reg is live at the end of a block even if it is only + // live because of phi uses in a successor. This is different from isLiveOut() + // which does not consider phi uses.) + SmallVector LiveToEndBlocks; + SparseBitVector<> UseBlocks; + for (auto &UseMO : MRI->use_nodbg_operands(Reg)) { + UseMO.setIsKill(false); + MachineInstr &UseMI = *UseMO.getParent(); + MachineBasicBlock &UseBB = *UseMI.getParent(); + UseBlocks.set(UseBB.getNumber()); + if (UseMI.isPHI()) { + // If Reg is used in a phi then it is live-to-end of the corresponding + // predecessor. + unsigned Idx = UseMI.getOperandNo(&UseMO); + LiveToEndBlocks.push_back(UseMI.getOperand(Idx + 1).getMBB()); + } else if (&UseBB == &DefBB) { + // A non-phi use in the same BB as the single def must come after the def. + } else { + // Otherwise Reg must be live-to-end of all predecessors. + LiveToEndBlocks.append(UseBB.pred_begin(), UseBB.pred_end()); + } + } + + // Iterate over the worklist adding blocks to AliveBlocks. + bool LiveToEndOfDefBB = false; + while (!LiveToEndBlocks.empty()) { + MachineBasicBlock &BB = *LiveToEndBlocks.pop_back_val(); + if (&BB == &DefBB) { + LiveToEndOfDefBB = true; + continue; + } + if (VI.AliveBlocks.test(BB.getNumber())) + continue; + VI.AliveBlocks.set(BB.getNumber()); + LiveToEndBlocks.append(BB.pred_begin(), BB.pred_end()); + } + + // Recompute kill flags. For each block in which Reg is used but is not + // live-through, find the last instruction that uses Reg. Ignore phi nodes + // because they should not be included in Kills. + for (unsigned UseBBNum : UseBlocks) { + if (VI.AliveBlocks.test(UseBBNum)) + continue; + MachineBasicBlock &UseBB = *MF->getBlockNumbered(UseBBNum); + if (&UseBB == &DefBB && LiveToEndOfDefBB) + continue; + for (auto &MI : reverse(UseBB)) { + if (MI.isDebugOrPseudoInstr()) + continue; + if (MI.isPHI()) + break; + if (MI.readsRegister(Reg)) { + assert(!MI.killsRegister(Reg)); + MI.addRegisterKilled(Reg, nullptr); + VI.Kills.push_back(&MI); + break; + } + } + } +} + /// replaceKillInstruction - Update register kill info by replacing a kill /// instruction with a new one. void LiveVariables::replaceKillInstruction(Register Reg, MachineInstr &OldMI, diff --git a/llvm/lib/CodeGen/MachineSink.cpp b/llvm/lib/CodeGen/MachineSink.cpp index 3b5a974a0ebfc..748218ec01b51 100644 --- a/llvm/lib/CodeGen/MachineSink.cpp +++ b/llvm/lib/CodeGen/MachineSink.cpp @@ -476,14 +476,13 @@ bool MachineSinking::runOnMachineFunction(MachineFunction &MF) { // of a def-use chain, if there is any. // TODO: Sort the candidates using a cost-model. unsigned i = 0; - for (auto It = Candidates.rbegin(); It != Candidates.rend(); ++It) { + for (MachineInstr *I : llvm::reverse(Candidates)) { if (i++ == SinkIntoLoopLimit) { LLVM_DEBUG(dbgs() << "LoopSink: Limit reached of instructions to " "be analysed."); break; } - MachineInstr *I = *It; if (!SinkIntoLoop(L, *I)) break; EverMadeChange = true; diff --git a/llvm/lib/CodeGen/ModuloSchedule.cpp b/llvm/lib/CodeGen/ModuloSchedule.cpp index 120ecd7ae0595..43cca1b26c713 100644 --- a/llvm/lib/CodeGen/ModuloSchedule.cpp +++ b/llvm/lib/CodeGen/ModuloSchedule.cpp @@ -700,11 +700,9 @@ void ModuloScheduleExpander::removeDeadInstructions(MachineBasicBlock *KernelBB, MBBVectorTy &EpilogBBs) { // For each epilog block, check that the value defined by each instruction // is used. If not, delete it. - for (MBBVectorTy::reverse_iterator MBB = EpilogBBs.rbegin(), - MBE = EpilogBBs.rend(); - MBB != MBE; ++MBB) - for (MachineBasicBlock::reverse_instr_iterator MI = (*MBB)->instr_rbegin(), - ME = (*MBB)->instr_rend(); + for (MachineBasicBlock *MBB : llvm::reverse(EpilogBBs)) + for (MachineBasicBlock::reverse_instr_iterator MI = MBB->instr_rbegin(), + ME = MBB->instr_rend(); MI != ME;) { // From DeadMachineInstructionElem. Don't delete inline assembly. if (MI->isInlineAsm()) { @@ -1617,32 +1615,32 @@ void PeelingModuloScheduleExpander::moveStageBetweenBlocks( MachineBasicBlock *DestBB, MachineBasicBlock *SourceBB, unsigned Stage) { auto InsertPt = DestBB->getFirstNonPHI(); DenseMap Remaps; - for (auto I = SourceBB->getFirstNonPHI(); I != SourceBB->end();) { - MachineInstr *MI = &*I++; - if (MI->isPHI()) { + for (MachineInstr &MI : llvm::make_early_inc_range( + llvm::make_range(SourceBB->getFirstNonPHI(), SourceBB->end()))) { + if (MI.isPHI()) { // This is an illegal PHI. If we move any instructions using an illegal // PHI, we need to create a legal Phi. - if (getStage(MI) != Stage) { + if (getStage(&MI) != Stage) { // The legal Phi is not necessary if the illegal phi's stage // is being moved. - Register PhiR = MI->getOperand(0).getReg(); + Register PhiR = MI.getOperand(0).getReg(); auto RC = MRI.getRegClass(PhiR); Register NR = MRI.createVirtualRegister(RC); MachineInstr *NI = BuildMI(*DestBB, DestBB->getFirstNonPHI(), DebugLoc(), TII->get(TargetOpcode::PHI), NR) .addReg(PhiR) .addMBB(SourceBB); - BlockMIs[{DestBB, CanonicalMIs[MI]}] = NI; - CanonicalMIs[NI] = CanonicalMIs[MI]; + BlockMIs[{DestBB, CanonicalMIs[&MI]}] = NI; + CanonicalMIs[NI] = CanonicalMIs[&MI]; Remaps[PhiR] = NR; } } - if (getStage(MI) != Stage) + if (getStage(&MI) != Stage) continue; - MI->removeFromParent(); - DestBB->insert(InsertPt, MI); - auto *KernelMI = CanonicalMIs[MI]; - BlockMIs[{DestBB, KernelMI}] = MI; + MI.removeFromParent(); + DestBB->insert(InsertPt, &MI); + auto *KernelMI = CanonicalMIs[&MI]; + BlockMIs[{DestBB, KernelMI}] = &MI; BlockMIs.erase({SourceBB, KernelMI}); } SmallVector PhiToDelete; diff --git a/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp b/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp index 80c38f3ec341d..9547fe6f93de5 100644 --- a/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp +++ b/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp @@ -36,9 +36,8 @@ static bool lowerLoadRelative(Function &F) { Type *Int32PtrTy = Int32Ty->getPointerTo(); Type *Int8Ty = Type::getInt8Ty(F.getContext()); - for (auto I = F.use_begin(), E = F.use_end(); I != E;) { - auto CI = dyn_cast(I->getUser()); - ++I; + for (Use &U : llvm::make_early_inc_range(F.uses())) { + auto CI = dyn_cast(U.getUser()); if (!CI || CI->getCalledOperand() != &F) continue; diff --git a/llvm/lib/CodeGen/RegAllocBasic.cpp b/llvm/lib/CodeGen/RegAllocBasic.cpp index b65d580779581..a9816b13e7980 100644 --- a/llvm/lib/CodeGen/RegAllocBasic.cpp +++ b/llvm/lib/CodeGen/RegAllocBasic.cpp @@ -217,9 +217,7 @@ bool RABasic::spillInterferences(LiveInterval &VirtReg, MCRegister PhysReg, // Collect interferences assigned to any alias of the physical register. for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) { LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, *Units); - Q.collectInterferingVRegs(); - for (unsigned i = Q.interferingVRegs().size(); i; --i) { - LiveInterval *Intf = Q.interferingVRegs()[i - 1]; + for (auto *Intf : reverse(Q.interferingVRegs())) { if (!Intf->isSpillable() || Intf->weight() > VirtReg.weight()) return false; Intfs.push_back(Intf); diff --git a/llvm/lib/CodeGen/RegAllocGreedy.cpp b/llvm/lib/CodeGen/RegAllocGreedy.cpp index 3f58afca47470..0e4e23f61587e 100644 --- a/llvm/lib/CodeGen/RegAllocGreedy.cpp +++ b/llvm/lib/CodeGen/RegAllocGreedy.cpp @@ -955,11 +955,12 @@ bool RAGreedy::canEvictInterference( for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) { LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, *Units); // If there is 10 or more interferences, chances are one is heavier. - if (Q.collectInterferingVRegs(10) >= 10) + const auto &Interferences = Q.interferingVRegs(10); + if (Interferences.size() >= 10) return false; // Check if any interfering live range is heavier than MaxWeight. - for (LiveInterval *Intf : reverse(Q.interferingVRegs())) { + for (LiveInterval *Intf : reverse(Interferences)) { assert(Register::isVirtualRegister(Intf->reg()) && "Only expecting virtual register interference from query"); @@ -1037,7 +1038,6 @@ bool RAGreedy::canEvictInterferenceInRange(const LiveInterval &VirtReg, for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) { LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, *Units); - Q.collectInterferingVRegs(); // Check if any interfering live range is heavier than MaxWeight. for (const LiveInterval *Intf : reverse(Q.interferingVRegs())) { @@ -1127,7 +1127,6 @@ void RAGreedy::evictInterference(LiveInterval &VirtReg, MCRegister PhysReg, // should be fast, we may need to recalculate if when different physregs // overlap the same register unit so we had different SubRanges queried // against it. - Q.collectInterferingVRegs(); ArrayRef IVR = Q.interferingVRegs(); Intfs.append(IVR.begin(), IVR.end()); } @@ -2547,8 +2546,9 @@ bool RAGreedy::mayRecolorAllInterferences( LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, *Units); // If there is LastChanceRecoloringMaxInterference or more interferences, // chances are one would not be recolorable. - if (Q.collectInterferingVRegs(LastChanceRecoloringMaxInterference) >= - LastChanceRecoloringMaxInterference && !ExhaustiveSearch) { + if (Q.interferingVRegs(LastChanceRecoloringMaxInterference).size() >= + LastChanceRecoloringMaxInterference && + !ExhaustiveSearch) { LLVM_DEBUG(dbgs() << "Early abort: too many interferences.\n"); CutOffInfo |= CO_Interf; return false; diff --git a/llvm/lib/CodeGen/RegisterCoalescer.cpp b/llvm/lib/CodeGen/RegisterCoalescer.cpp index 582ad35f8da95..f20d47a580f7c 100644 --- a/llvm/lib/CodeGen/RegisterCoalescer.cpp +++ b/llvm/lib/CodeGen/RegisterCoalescer.cpp @@ -1573,9 +1573,8 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP, // If the virtual SrcReg is completely eliminated, update all DBG_VALUEs // to describe DstReg instead. if (MRI->use_nodbg_empty(SrcReg)) { - for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(SrcReg); - UI != MRI->use_end();) { - MachineOperand &UseMO = *UI++; + for (MachineOperand &UseMO : + llvm::make_early_inc_range(MRI->use_operands(SrcReg))) { MachineInstr *UseMI = UseMO.getParent(); if (UseMI->isDebugInstr()) { if (Register::isPhysicalRegister(DstReg)) diff --git a/llvm/lib/CodeGen/SafeStack.cpp b/llvm/lib/CodeGen/SafeStack.cpp index 4197424e163ff..50d9d64bfcfda 100644 --- a/llvm/lib/CodeGen/SafeStack.cpp +++ b/llvm/lib/CodeGen/SafeStack.cpp @@ -702,9 +702,8 @@ void SafeStack::moveDynamicAllocasToUnsafeStack( if (!DynamicAllocas.empty()) { // Now go through the instructions again, replacing stacksave/stackrestore. - for (inst_iterator It = inst_begin(&F), Ie = inst_end(&F); It != Ie;) { - Instruction *I = &*(It++); - auto II = dyn_cast(I); + for (Instruction &I : llvm::make_early_inc_range(instructions(&F))) { + auto *II = dyn_cast(&I); if (!II) continue; diff --git a/llvm/lib/CodeGen/ScheduleDAG.cpp b/llvm/lib/CodeGen/ScheduleDAG.cpp index 60f8eec1b9bc7..ef3afab2b7307 100644 --- a/llvm/lib/CodeGen/ScheduleDAG.cpp +++ b/llvm/lib/CodeGen/ScheduleDAG.cpp @@ -577,8 +577,7 @@ void ScheduleDAGTopologicalSort::DFS(const SUnit *SU, int UpperBound, SU = WorkList.back(); WorkList.pop_back(); Visited.set(SU->NodeNum); - for (const SDep &SuccDep - : make_range(SU->Succs.rbegin(), SU->Succs.rend())) { + for (const SDep &SuccDep : llvm::reverse(SU->Succs)) { unsigned s = SuccDep.getSUnit()->NodeNum; // Edges to non-SUnits are allowed but ignored (e.g. ExitSU). if (s >= Node2Index.size()) diff --git a/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp b/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp index 504341e4c96d0..3f013eb6024ed 100644 --- a/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp +++ b/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp @@ -1112,7 +1112,7 @@ void ScheduleDAGInstrs::fixupKills(MachineBasicBlock &MBB) { LiveRegs.addLiveOuts(MBB); // Examine block from end to start... - for (MachineInstr &MI : make_range(MBB.rbegin(), MBB.rend())) { + for (MachineInstr &MI : llvm::reverse(MBB)) { if (MI.isDebugOrPseudoInstr()) continue; diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 4753d27380282..bf4b418a2273b 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -6980,10 +6980,7 @@ SDValue DAGCombiner::MatchFunnelPosNeg(SDValue N0, SDValue N1, SDValue Pos, // a rot[lr]. This also matches funnel shift patterns, similar to rotation but // with different shifted sources. SDValue DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) { - // Must be a legal type. Expanded 'n promoted things won't work with rotates. EVT VT = LHS.getValueType(); - if (!TLI.isTypeLegal(VT)) - return SDValue(); // The target must have at least one rotate/funnel flavor. bool HasROTL = hasOperation(ISD::ROTL, VT); @@ -8555,25 +8552,42 @@ static SDValue combineShiftToMULH(SDNode *N, SelectionDAG &DAG, // Both operands must be equivalent extend nodes. SDValue LeftOp = ShiftOperand.getOperand(0); SDValue RightOp = ShiftOperand.getOperand(1); + bool IsSignExt = LeftOp.getOpcode() == ISD::SIGN_EXTEND; bool IsZeroExt = LeftOp.getOpcode() == ISD::ZERO_EXTEND; - if ((!(IsSignExt || IsZeroExt)) || LeftOp.getOpcode() != RightOp.getOpcode()) + if (!IsSignExt && !IsZeroExt) return SDValue(); + EVT NarrowVT = LeftOp.getOperand(0).getValueType(); + unsigned NarrowVTSize = NarrowVT.getScalarSizeInBits(); + + SDValue MulhRightOp; + if (ConstantSDNode *Constant = isConstOrConstSplat(RightOp)) { + unsigned ActiveBits = IsSignExt + ? Constant->getAPIntValue().getMinSignedBits() + : Constant->getAPIntValue().getActiveBits(); + if (ActiveBits > NarrowVTSize) + return SDValue(); + MulhRightOp = DAG.getConstant( + Constant->getAPIntValue().trunc(NarrowVT.getScalarSizeInBits()), DL, + NarrowVT); + } else { + if (LeftOp.getOpcode() != RightOp.getOpcode()) + return SDValue(); + // Check that the two extend nodes are the same type. + if (NarrowVT != RightOp.getOperand(0).getValueType()) + return SDValue(); + MulhRightOp = RightOp.getOperand(0); + } + EVT WideVT = LeftOp.getValueType(); // Proceed with the transformation if the wide types match. assert((WideVT == RightOp.getValueType()) && "Cannot have a multiply node with two different operand types."); - EVT NarrowVT = LeftOp.getOperand(0).getValueType(); - // Check that the two extend nodes are the same type. - if (NarrowVT != RightOp.getOperand(0).getValueType()) - return SDValue(); - // Proceed with the transformation if the wide type is twice as large // as the narrow type. - unsigned NarrowVTSize = NarrowVT.getScalarSizeInBits(); if (WideVT.getScalarSizeInBits() != 2 * NarrowVTSize) return SDValue(); @@ -8592,8 +8606,8 @@ static SDValue combineShiftToMULH(SDNode *N, SelectionDAG &DAG, if (!TLI.isOperationLegalOrCustom(MulhOpcode, NarrowVT)) return SDValue(); - SDValue Result = DAG.getNode(MulhOpcode, DL, NarrowVT, LeftOp.getOperand(0), - RightOp.getOperand(0)); + SDValue Result = + DAG.getNode(MulhOpcode, DL, NarrowVT, LeftOp.getOperand(0), MulhRightOp); return (N->getOpcode() == ISD::SRA ? DAG.getSExtOrTrunc(Result, DL, WideVT) : DAG.getZExtOrTrunc(Result, DL, WideVT)); } @@ -9530,6 +9544,40 @@ static SDValue foldBoolSelectToLogic(SDNode *N, SelectionDAG &DAG) { return SDValue(); } +static SDValue foldVSelectToSignBitSplatMask(SDNode *N, SelectionDAG &DAG) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + SDValue N2 = N->getOperand(2); + EVT VT = N->getValueType(0); + if (N0.getOpcode() != ISD::SETCC || !N0.hasOneUse()) + return SDValue(); + + SDValue Cond0 = N0.getOperand(0); + SDValue Cond1 = N0.getOperand(1); + ISD::CondCode CC = cast(N0.getOperand(2))->get(); + if (VT != Cond0.getValueType()) + return SDValue(); + + // (Cond0 s< 0) ? N1 : 0 --> (Cond0 s>> BW-1) & N1 + if (CC == ISD::SETLT && isNullOrNullSplat(Cond1) && isNullOrNullSplat(N2)) { + SDLoc DL(N); + SDValue ShiftAmt = DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT); + SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Cond0, ShiftAmt); + return DAG.getNode(ISD::AND, DL, VT, Sra, N1); + } + + // (Cond0 s< 0) ? -1 : N2 --> (Cond0 s>> BW-1) | N2 + if (CC == ISD::SETLT && isNullOrNullSplat(Cond1) && + isAllOnesOrAllOnesSplat(N1)) { + SDLoc DL(N); + SDValue ShiftAmt = DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT); + SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Cond0, ShiftAmt); + return DAG.getNode(ISD::OR, DL, VT, Sra, N2); + } + + return SDValue(); +} + SDValue DAGCombiner::visitSELECT(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); @@ -10220,6 +10268,10 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) { if (SDValue V = foldVSelectOfConstants(N)) return V; + if (hasOperation(ISD::SRA, VT)) + if (SDValue V = foldVSelectToSignBitSplatMask(N, DAG)) + return V; + return SDValue(); } @@ -12067,7 +12119,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, N0, N1); // If the input is already sign extended, just drop the extension. - if (DAG.ComputeNumSignBits(N0) >= (VTBits - ExtVTBits + 1)) + if (ExtVTBits >= DAG.ComputeMinSignedBits(N0)) return N0; // fold (sext_in_reg (sext_in_reg x, VT2), VT1) -> (sext_in_reg x, minVT) pt2 @@ -12083,8 +12135,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) { SDValue N00 = N0.getOperand(0); unsigned N00Bits = N00.getScalarValueSizeInBits(); - if ((N00Bits <= ExtVTBits || - (N00Bits - DAG.ComputeNumSignBits(N00)) < ExtVTBits) && + if ((N00Bits <= ExtVTBits || DAG.ComputeMinSignedBits(N00) <= ExtVTBits) && (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT))) return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00); } @@ -12103,8 +12154,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { APInt DemandedSrcElts = APInt::getLowBitsSet(SrcElts, DstElts); if ((N00Bits == ExtVTBits || (!IsZext && (N00Bits < ExtVTBits || - (N00Bits - DAG.ComputeNumSignBits(N00, DemandedSrcElts)) < - ExtVTBits))) && + DAG.ComputeMinSignedBits(N00) <= ExtVTBits))) && (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND_VECTOR_INREG, VT))) return DAG.getNode(ISD::SIGN_EXTEND_VECTOR_INREG, SDLoc(N), VT, N00); @@ -12987,68 +13037,30 @@ ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) { return ConstantFoldBITCASTofBUILD_VECTOR(Tmp, DstEltVT); } - SDLoc DL(BV); - // Okay, we know the src/dst types are both integers of differing types. - // Handling growing first. assert(SrcEltVT.isInteger() && DstEltVT.isInteger()); - if (SrcBitSize < DstBitSize) { - unsigned NumInputsPerOutput = DstBitSize/SrcBitSize; - SmallVector Ops; - for (unsigned i = 0, e = BV->getNumOperands(); i != e; - i += NumInputsPerOutput) { - bool isLE = DAG.getDataLayout().isLittleEndian(); - APInt NewBits = APInt(DstBitSize, 0); - bool EltIsUndef = true; - for (unsigned j = 0; j != NumInputsPerOutput; ++j) { - // Shift the previously computed bits over. - NewBits <<= SrcBitSize; - SDValue Op = BV->getOperand(i+ (isLE ? (NumInputsPerOutput-j-1) : j)); - if (Op.isUndef()) continue; - EltIsUndef = false; - - NewBits |= cast(Op)->getAPIntValue(). - zextOrTrunc(SrcBitSize).zext(DstBitSize); - } - - if (EltIsUndef) - Ops.push_back(DAG.getUNDEF(DstEltVT)); - else - Ops.push_back(DAG.getConstant(NewBits, DL, DstEltVT)); - } + // TODO: Should ConstantFoldBITCASTofBUILD_VECTOR always take a + // BuildVectorSDNode? + auto *BVN = cast(BV); - EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, Ops.size()); - return DAG.getBuildVector(VT, DL, Ops); - } + // Extract the constant raw bit data. + BitVector UndefElements; + SmallVector RawBits; + bool IsLE = DAG.getDataLayout().isLittleEndian(); + if (!BVN->getConstantRawBits(IsLE, DstBitSize, RawBits, UndefElements)) + return SDValue(); - // Finally, this must be the case where we are shrinking elements: each input - // turns into multiple outputs. - unsigned NumOutputsPerInput = SrcBitSize/DstBitSize; - EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, - NumOutputsPerInput*BV->getNumOperands()); + SDLoc DL(BV); SmallVector Ops; - - for (const SDValue &Op : BV->op_values()) { - if (Op.isUndef()) { - Ops.append(NumOutputsPerInput, DAG.getUNDEF(DstEltVT)); - continue; - } - - APInt OpVal = cast(Op)-> - getAPIntValue().zextOrTrunc(SrcBitSize); - - for (unsigned j = 0; j != NumOutputsPerInput; ++j) { - APInt ThisVal = OpVal.trunc(DstBitSize); - Ops.push_back(DAG.getConstant(ThisVal, DL, DstEltVT)); - OpVal.lshrInPlace(DstBitSize); - } - - // For big endian targets, swap the order of the pieces of each element. - if (DAG.getDataLayout().isBigEndian()) - std::reverse(Ops.end()-NumOutputsPerInput, Ops.end()); + for (unsigned I = 0, E = RawBits.size(); I != E; ++I) { + if (UndefElements[I]) + Ops.push_back(DAG.getUNDEF(DstEltVT)); + else + Ops.push_back(DAG.getConstant(RawBits[I], DL, DstEltVT)); } + EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, Ops.size()); return DAG.getBuildVector(VT, DL, Ops); } @@ -22256,8 +22268,8 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) { SDNodeFlags Flags = N->getFlags(); // See if we can constant fold the vector operation. - if (SDValue Fold = DAG.FoldConstantVectorArithmetic( - Opcode, SDLoc(LHS), LHS.getValueType(), Ops, N->getFlags())) + if (SDValue Fold = DAG.FoldConstantArithmetic(Opcode, SDLoc(LHS), + LHS.getValueType(), Ops)) return Fold; // Move unary shuffles with identical masks after a vector binop: diff --git a/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp b/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp index ee81f59b52b6b..4d1449bc2751e 100644 --- a/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp @@ -196,10 +196,8 @@ void FastISel::flushLocalValueMap() { EmitStartPt ? MachineBasicBlock::reverse_iterator(EmitStartPt) : FuncInfo.MBB->rend(); MachineBasicBlock::reverse_iterator RI(LastLocalValue); - for (; RI != RE;) { - MachineInstr &LocalMI = *RI; - // Increment before erasing what it points to. - ++RI; + for (MachineInstr &LocalMI : + llvm::make_early_inc_range(llvm::make_range(RI, RE))) { Register DefReg = findLocalRegDef(LocalMI); if (!DefReg) continue; diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index 29812ef8f1a07..6ce7822734911 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -1719,10 +1719,8 @@ void DAGTypeLegalizer::PromoteSetCCOperands(SDValue &NewLHS,SDValue &NewRHS, // If the width of OpL/OpR excluding the duplicated sign bits is no greater // than the width of NewLHS/NewRH, we can avoid inserting real truncate // instruction, which is redundant eventually. - unsigned OpLEffectiveBits = - OpL.getScalarValueSizeInBits() - DAG.ComputeNumSignBits(OpL) + 1; - unsigned OpREffectiveBits = - OpR.getScalarValueSizeInBits() - DAG.ComputeNumSignBits(OpR) + 1; + unsigned OpLEffectiveBits = DAG.ComputeMinSignedBits(OpL); + unsigned OpREffectiveBits = DAG.ComputeMinSignedBits(OpR); if (OpLEffectiveBits <= NewLHS.getScalarValueSizeInBits() && OpREffectiveBits <= NewRHS.getScalarValueSizeInBits()) { NewLHS = OpL; @@ -4377,18 +4375,45 @@ void DAGTypeLegalizer::ExpandIntRes_VECREDUCE(SDNode *N, void DAGTypeLegalizer::ExpandIntRes_Rotate(SDNode *N, SDValue &Lo, SDValue &Hi) { - // Lower the rotate to shifts and ORs which can be expanded. - SDValue Res; - TLI.expandROT(N, true /*AllowVectorOps*/, Res, DAG); + // Delegate to funnel-shift expansion. + SDLoc DL(N); + unsigned Opcode = N->getOpcode() == ISD::ROTL ? ISD::FSHL : ISD::FSHR; + SDValue Res = DAG.getNode(Opcode, DL, N->getValueType(0), N->getOperand(0), + N->getOperand(0), N->getOperand(1)); SplitInteger(Res, Lo, Hi); } -void DAGTypeLegalizer::ExpandIntRes_FunnelShift(SDNode *N, - SDValue &Lo, SDValue &Hi) { - // Lower the funnel shift to shifts and ORs which can be expanded. - SDValue Res; - TLI.expandFunnelShift(N, Res, DAG); - SplitInteger(Res, Lo, Hi); +void DAGTypeLegalizer::ExpandIntRes_FunnelShift(SDNode *N, SDValue &Lo, + SDValue &Hi) { + // Values numbered from least significant to most significant. + SDValue In1, In2, In3, In4; + GetExpandedInteger(N->getOperand(0), In3, In4); + GetExpandedInteger(N->getOperand(1), In1, In2); + EVT HalfVT = In1.getValueType(); + + SDLoc DL(N); + unsigned Opc = N->getOpcode(); + SDValue ShAmt = N->getOperand(2); + EVT ShAmtVT = ShAmt.getValueType(); + EVT ShAmtCCVT = getSetCCResultType(ShAmtVT); + + // If the shift amount is at least half the bitwidth, swap the inputs. + unsigned HalfVTBits = HalfVT.getScalarSizeInBits(); + SDValue AndNode = DAG.getNode(ISD::AND, DL, ShAmtVT, ShAmt, + DAG.getConstant(HalfVTBits, DL, ShAmtVT)); + SDValue Cond = + DAG.getSetCC(DL, ShAmtCCVT, AndNode, DAG.getConstant(0, DL, ShAmtVT), + Opc == ISD::FSHL ? ISD::SETNE : ISD::SETEQ); + + // Expand to a pair of funnel shifts. + EVT NewShAmtVT = TLI.getShiftAmountTy(HalfVT, DAG.getDataLayout()); + SDValue NewShAmt = DAG.getAnyExtOrTrunc(ShAmt, DL, NewShAmtVT); + + SDValue Select1 = DAG.getNode(ISD::SELECT, DL, HalfVT, Cond, In1, In2); + SDValue Select2 = DAG.getNode(ISD::SELECT, DL, HalfVT, Cond, In2, In3); + SDValue Select3 = DAG.getNode(ISD::SELECT, DL, HalfVT, Cond, In3, In4); + Lo = DAG.getNode(Opc, DL, HalfVT, Select2, Select1, NewShAmt); + Hi = DAG.getNode(Opc, DL, HalfVT, Select3, Select2, NewShAmt); } void DAGTypeLegalizer::ExpandIntRes_VSCALE(SDNode *N, SDValue &Lo, diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 1c25f5f952917..72c39886c23c6 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -4286,6 +4286,18 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, return std::max(FirstAnswer, Mask.countLeadingOnes()); } +unsigned SelectionDAG::ComputeMinSignedBits(SDValue Op, unsigned Depth) const { + unsigned SignBits = ComputeNumSignBits(Op, Depth); + return Op.getScalarValueSizeInBits() - SignBits + 1; +} + +unsigned SelectionDAG::ComputeMinSignedBits(SDValue Op, + const APInt &DemandedElts, + unsigned Depth) const { + unsigned SignBits = ComputeNumSignBits(Op, DemandedElts, Depth); + return Op.getScalarValueSizeInBits() - SignBits + 1; +} + bool SelectionDAG::isGuaranteedNotToBeUndefOrPoison(SDValue Op, bool PoisonOnly, unsigned Depth) const { // Early out for FREEZE. @@ -5256,7 +5268,7 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, if (Opcode >= ISD::BUILTIN_OP_END || Opcode == ISD::CONCAT_VECTORS) return SDValue(); - // For now, the array Ops should only contain two values. + // TODO: For now, the array Ops should only contain two values. // This enforcement will be removed once this function is merged with // FoldConstantVectorArithmetic if (Ops.size() != 2) @@ -5293,6 +5305,19 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, if (GlobalAddressSDNode *GA = dyn_cast(N2)) return FoldSymbolOffset(Opcode, VT, GA, N1); + // If this is a bitwise logic opcode see if we can fold bitcasted ops. + // TODO: Can we generalize this and fold any bitcasted constant data? + if (ISD::isBitwiseLogicOp(Opcode) && N1->getOpcode() == ISD::BITCAST && + N2->getOpcode() == ISD::BITCAST) { + SDValue InnerN1 = peekThroughBitcasts(N1->getOperand(0)); + SDValue InnerN2 = peekThroughBitcasts(N2->getOperand(0)); + EVT InnerVT = InnerN1.getValueType(); + if (InnerVT == InnerN2.getValueType() && InnerVT.isInteger()) + if (SDValue C = + FoldConstantArithmetic(Opcode, DL, InnerVT, {InnerN1, InnerN2})) + return getBitcast(VT, C); + } + // For fixed width vectors, extract each constant element and fold them // individually. Either input may be an undef value. bool IsBVOrSV1 = N1->getOpcode() == ISD::BUILD_VECTOR || @@ -5316,18 +5341,18 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, } SmallVector Outputs; - unsigned NumOps = 0; + unsigned NumElts = 0; if (IsBVOrSV1) - NumOps = std::max(NumOps, N1->getNumOperands()); + NumElts = std::max(NumElts, N1->getNumOperands()); if (IsBVOrSV2) - NumOps = std::max(NumOps, N2->getNumOperands()); - assert(NumOps != 0 && "Expected non-zero operands"); + NumElts = std::max(NumElts, N2->getNumOperands()); + assert(NumElts != 0 && "Expected non-zero operands"); // Scalable vectors should only be SPLAT_VECTOR or UNDEF here. We only need // one iteration for that. - assert((!VT.isScalableVector() || NumOps == 1) && + assert((!VT.isScalableVector() || NumElts == 1) && "Scalable vector should only have one scalar"); - for (unsigned I = 0; I != NumOps; ++I) { + for (unsigned I = 0; I != NumElts; ++I) { // We can have a fixed length SPLAT_VECTOR and a BUILD_VECTOR so we need // to use operand 0 of the SPLAT_VECTOR for each fixed element. SDValue V1; @@ -5388,8 +5413,7 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, // TODO: Merge with FoldConstantArithmetic SDValue SelectionDAG::FoldConstantVectorArithmetic(unsigned Opcode, const SDLoc &DL, EVT VT, - ArrayRef Ops, - const SDNodeFlags Flags) { + ArrayRef Ops) { // If the opcode is a target-specific ISD node, there's nothing we can // do here and the operand rules may not line up with the below, so // bail early. @@ -5473,7 +5497,7 @@ SDValue SelectionDAG::FoldConstantVectorArithmetic(unsigned Opcode, } // Constant fold the scalar operands. - SDValue ScalarResult = getNode(Opcode, DL, SVT, ScalarOps, Flags); + SDValue ScalarResult = getNode(Opcode, DL, SVT, ScalarOps); // Legalize the (integer) scalar constant if necessary. if (LegalSVT != SVT) @@ -9807,21 +9831,20 @@ unsigned SelectionDAG::AssignTopologicalOrder() { // before SortedPos will contain the topological sort index, and the // Node Id fields for nodes At SortedPos and after will contain the // count of outstanding operands. - for (allnodes_iterator I = allnodes_begin(),E = allnodes_end(); I != E; ) { - SDNode *N = &*I++; - checkForCycles(N, this); - unsigned Degree = N->getNumOperands(); + for (SDNode &N : llvm::make_early_inc_range(allnodes())) { + checkForCycles(&N, this); + unsigned Degree = N.getNumOperands(); if (Degree == 0) { // A node with no uses, add it to the result array immediately. - N->setNodeId(DAGSize++); - allnodes_iterator Q(N); + N.setNodeId(DAGSize++); + allnodes_iterator Q(&N); if (Q != SortedPos) SortedPos = AllNodes.insert(SortedPos, AllNodes.remove(Q)); assert(SortedPos != AllNodes.end() && "Overran node list"); ++SortedPos; } else { // Temporarily use the Node Id as scratch space for the degree count. - N->setNodeId(Degree); + N.setNodeId(Degree); } } @@ -10905,6 +10928,73 @@ BuildVectorSDNode::getConstantFPSplatPow2ToLog2Int(BitVector *UndefElements, return -1; } +bool BuildVectorSDNode::getConstantRawBits( + bool IsLittleEndian, unsigned DstEltSizeInBits, + SmallVectorImpl &RawBitElements, BitVector &UndefElements) const { + // Early-out if this contains anything but Undef/Constant/ConstantFP. + if (!isConstant()) + return false; + + unsigned NumSrcOps = getNumOperands(); + unsigned SrcEltSizeInBits = getValueType(0).getScalarSizeInBits(); + assert(((NumSrcOps * SrcEltSizeInBits) % DstEltSizeInBits) == 0 && + "Invalid bitcast scale"); + + unsigned NumDstOps = (NumSrcOps * SrcEltSizeInBits) / DstEltSizeInBits; + UndefElements.clear(); + UndefElements.resize(NumDstOps, false); + RawBitElements.assign(NumDstOps, APInt::getNullValue(DstEltSizeInBits)); + + // Concatenate src elements constant bits together into dst element. + if (SrcEltSizeInBits <= DstEltSizeInBits) { + unsigned Scale = DstEltSizeInBits / SrcEltSizeInBits; + for (unsigned I = 0; I != NumDstOps; ++I) { + UndefElements.set(I); + APInt &RawBits = RawBitElements[I]; + for (unsigned J = 0; J != Scale; ++J) { + unsigned Idx = (I * Scale) + (IsLittleEndian ? J : (Scale - J - 1)); + SDValue Op = getOperand(Idx); + if (Op.isUndef()) + continue; + UndefElements.reset(I); + auto *CInt = dyn_cast(Op); + auto *CFP = dyn_cast(Op); + assert((CInt || CFP) && "Unknown constant"); + APInt EltBits = + CInt ? CInt->getAPIntValue().truncOrSelf(SrcEltSizeInBits) + : CFP->getValueAPF().bitcastToAPInt(); + assert(EltBits.getBitWidth() == SrcEltSizeInBits && + "Illegal constant bitwidths"); + RawBits.insertBits(EltBits, J * SrcEltSizeInBits); + } + } + return true; + } + + // Split src element constant bits into dst elements. + unsigned Scale = SrcEltSizeInBits / DstEltSizeInBits; + for (unsigned I = 0; I != NumSrcOps; ++I) { + SDValue Op = getOperand(I); + if (Op.isUndef()) { + UndefElements.set(I * Scale, (I + 1) * Scale); + continue; + } + auto *CInt = dyn_cast(Op); + auto *CFP = dyn_cast(Op); + assert((CInt || CFP) && "Unknown constant"); + APInt EltBits = + CInt ? CInt->getAPIntValue() : CFP->getValueAPF().bitcastToAPInt(); + + for (unsigned J = 0; J != Scale; ++J) { + unsigned Idx = (I * Scale) + (IsLittleEndian ? J : (Scale - J - 1)); + APInt &RawBits = RawBitElements[Idx]; + RawBits = EltBits.extractBits(DstEltSizeInBits, J * DstEltSizeInBits); + } + } + + return true; +} + bool BuildVectorSDNode::isConstant() const { for (const SDValue &Op : op_values()) { unsigned Opc = Op.getOpcode(); diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 4ccb429d91d6a..44cbb2b450558 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -1803,9 +1803,9 @@ bool TargetLowering::SimplifyDemandedBits( // If we only care about the highest bit, don't bother shifting right. if (DemandedBits.isSignMask()) { - unsigned NumSignBits = - TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1); - bool AlreadySignExtended = NumSignBits >= BitWidth - ExVTBits + 1; + unsigned MinSignedBits = + TLO.DAG.ComputeMinSignedBits(Op0, DemandedElts, Depth + 1); + bool AlreadySignExtended = ExVTBits >= MinSignedBits; // However if the input is already sign extended we expect the sign // extension to be dropped altogether later and do not simplify. if (!AlreadySignExtended) { diff --git a/llvm/lib/CodeGen/TailDuplicator.cpp b/llvm/lib/CodeGen/TailDuplicator.cpp index 1fb7128a6decb..806cb17e3036f 100644 --- a/llvm/lib/CodeGen/TailDuplicator.cpp +++ b/llvm/lib/CodeGen/TailDuplicator.cpp @@ -70,6 +70,12 @@ static cl::opt TailDupIndirectBranchSize( "end with indirect branches."), cl::init(20), cl::Hidden); +static cl::opt TailDupJmpTableLoopSize( + "tail-dup-jmptable-loop-size", + cl::desc("Maximum loop latches to consider tail duplication that are " + "successors of loop header."), + cl::init(128), cl::Hidden); + static cl::opt TailDupVerify("tail-dup-verify", cl::desc("Verify sanity of PHI instructions during taildup"), @@ -563,6 +569,29 @@ bool TailDuplicator::shouldTailDuplicate(bool IsSimple, if (TailBB.isSuccessor(&TailBB)) return false; + // When doing tail-duplication with jumptable loops like: + // 1 -> 2 <-> 3 | + // \ <-> 4 | + // \ <-> 5 | + // \ <-> ... | + // \---> rest | + // quadratic number of edges and much more loops are added to CFG. This + // may cause compile time regression when jumptable is quiet large. + // So set the limit on jumptable cases. + auto isLargeJumpTableLoop = [](const MachineBasicBlock &TailBB) { + const SmallPtrSet Preds(TailBB.pred_begin(), + TailBB.pred_end()); + // Check the basic block has large number of successors, all of them only + // have one successor which is the basic block itself. + return llvm::count_if( + TailBB.successors(), [&](const MachineBasicBlock *SuccBB) { + return Preds.count(SuccBB) && SuccBB->succ_size() == 1; + }) > TailDupJmpTableLoopSize; + }; + + if (isLargeJumpTableLoop(TailBB)) + return false; + // Set the limit on the cost to duplicate. When optimizing for size, // duplicate only one, because one branch instruction can be eliminated to // compensate for the duplication. diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp index 811d208313038..0dd45be54464d 100644 --- a/llvm/lib/CodeGen/TargetLoweringBase.cpp +++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp @@ -1987,8 +1987,11 @@ void TargetLoweringBase::insertSSPDeclarations(Module &M) const { auto *GV = new GlobalVariable(M, Type::getInt8PtrTy(M.getContext()), false, GlobalVariable::ExternalLinkage, nullptr, "__stack_chk_guard"); + + // FreeBSD has "__stack_chk_guard" defined externally on libc.so if (TM.getRelocationModel() == Reloc::Static && - !TM.getTargetTriple().isWindowsGNUEnvironment()) + !TM.getTargetTriple().isWindowsGNUEnvironment() && + !TM.getTargetTriple().isOSFreeBSD()) GV->setDSOLocal(true); } } diff --git a/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp b/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp index fdd2bc6c9f8ba..ca7d7e9dbf5f2 100644 --- a/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp +++ b/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp @@ -1539,15 +1539,23 @@ TwoAddressInstructionPass::processTiedPairs(MachineInstr *MI, if (LIS) { LastCopyIdx = LIS->InsertMachineInstrInMaps(*PrevMI).getRegSlot(); + SlotIndex endIdx = + LIS->getInstructionIndex(*MI).getRegSlot(IsEarlyClobber); if (RegA.isVirtual()) { LiveInterval &LI = LIS->getInterval(RegA); VNInfo *VNI = LI.getNextValue(LastCopyIdx, LIS->getVNInfoAllocator()); - SlotIndex endIdx = - LIS->getInstructionIndex(*MI).getRegSlot(IsEarlyClobber); - LI.addSegment(LiveInterval::Segment(LastCopyIdx, endIdx, VNI)); + LI.addSegment(LiveRange::Segment(LastCopyIdx, endIdx, VNI)); for (auto &S : LI.subranges()) { VNI = S.getNextValue(LastCopyIdx, LIS->getVNInfoAllocator()); - S.addSegment(LiveInterval::Segment(LastCopyIdx, endIdx, VNI)); + S.addSegment(LiveRange::Segment(LastCopyIdx, endIdx, VNI)); + } + } else { + for (MCRegUnitIterator Unit(RegA, TRI); Unit.isValid(); ++Unit) { + if (LiveRange *LR = LIS->getCachedRegUnit(*Unit)) { + VNInfo *VNI = + LR->getNextValue(LastCopyIdx, LIS->getVNInfoAllocator()); + LR->addSegment(LiveRange::Segment(LastCopyIdx, endIdx, VNI)); + } } } } diff --git a/llvm/lib/CodeGen/VirtRegMap.cpp b/llvm/lib/CodeGen/VirtRegMap.cpp index 0f164e2637a2d..61ddba9403cd0 100644 --- a/llvm/lib/CodeGen/VirtRegMap.cpp +++ b/llvm/lib/CodeGen/VirtRegMap.cpp @@ -541,13 +541,10 @@ void VirtRegRewriter::rewrite() { for (MachineFunction::iterator MBBI = MF->begin(), MBBE = MF->end(); MBBI != MBBE; ++MBBI) { LLVM_DEBUG(MBBI->print(dbgs(), Indexes)); - for (MachineBasicBlock::instr_iterator - MII = MBBI->instr_begin(), MIE = MBBI->instr_end(); MII != MIE;) { - MachineInstr *MI = &*MII; - ++MII; - - for (MachineInstr::mop_iterator MOI = MI->operands_begin(), - MOE = MI->operands_end(); MOI != MOE; ++MOI) { + for (MachineInstr &MI : llvm::make_early_inc_range(MBBI->instrs())) { + for (MachineInstr::mop_iterator MOI = MI.operands_begin(), + MOE = MI.operands_end(); + MOI != MOE; ++MOI) { MachineOperand &MO = *MOI; // Make sure MRI knows about registers clobbered by regmasks. @@ -574,7 +571,7 @@ void VirtRegRewriter::rewrite() { // have to add implicit killed operands for the super-register. A // partial redef always kills and redefines the super-register. if ((MO.readsReg() && (MO.isDef() || MO.isKill())) || - (MO.isDef() && subRegLiveThrough(*MI, PhysReg))) + (MO.isDef() && subRegLiveThrough(MI, PhysReg))) SuperKills.push_back(PhysReg); if (MO.isDef()) { @@ -619,20 +616,20 @@ void VirtRegRewriter::rewrite() { // Add any missing super-register kills after rewriting the whole // instruction. while (!SuperKills.empty()) - MI->addRegisterKilled(SuperKills.pop_back_val(), TRI, true); + MI.addRegisterKilled(SuperKills.pop_back_val(), TRI, true); while (!SuperDeads.empty()) - MI->addRegisterDead(SuperDeads.pop_back_val(), TRI, true); + MI.addRegisterDead(SuperDeads.pop_back_val(), TRI, true); while (!SuperDefs.empty()) - MI->addRegisterDefined(SuperDefs.pop_back_val(), TRI); + MI.addRegisterDefined(SuperDefs.pop_back_val(), TRI); - LLVM_DEBUG(dbgs() << "> " << *MI); + LLVM_DEBUG(dbgs() << "> " << MI); - expandCopyBundle(*MI); + expandCopyBundle(MI); // We can remove identity copies right now. - handleIdentityCopy(*MI); + handleIdentityCopy(MI); } } diff --git a/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp b/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp index f1a905ebcafbf..b24894c43c3a2 100644 --- a/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp +++ b/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp @@ -226,9 +226,6 @@ struct DWARFTypePrinter { } case DW_TAG_array_type: { appendQualifiedNameBefore(Inner); - if (Word) - OS << ' '; - Word = false; break; } case DW_TAG_reference_type: @@ -305,18 +302,19 @@ struct DWARFTypePrinter { } else EndedWithTemplate = Name.endswith(">"); OS << Name; - // FIXME: This needs to be a bit more narrow, it would fail to - // reconstitute a non-operator overload that is a template, like - // "operator_thing" - if (!Name.endswith(">") && !Name.startswith("operator")) { - if (appendTemplateParameters(D)) { - if (EndedWithTemplate) - OS << ' '; - OS << '>'; - EndedWithTemplate = true; - Word = true; - } - } + // This check would be insufficient for operator overloads like + // "operator>>" - but for now Clang doesn't try to simplify them, so this + // is OK. Add more nuanced operator overload handling here if/when needed. + if (Name.endswith(">")) + break; + if (!appendTemplateParameters(D)) + break; + + if (EndedWithTemplate) + OS << ' '; + OS << '>'; + EndedWithTemplate = true; + Word = true; break; } } diff --git a/llvm/lib/DebugInfo/DWARF/DWARFFormValue.cpp b/llvm/lib/DebugInfo/DWARF/DWARFFormValue.cpp index c4e479191a692..cea0f63bbf817 100644 --- a/llvm/lib/DebugInfo/DWARF/DWARFFormValue.cpp +++ b/llvm/lib/DebugInfo/DWARF/DWARFFormValue.cpp @@ -767,10 +767,10 @@ Optional DWARFFormValue::getAsFile(DILineInfoSpecifier::FileLineInfoKind Kind) const { if (U == nullptr || !isFormClass(FC_Constant)) return None; - DWARFUnit *DU = const_cast(U); - if (auto *LT = U->getContext().getLineTableForUnit(DU->getLinkedUnit())) { + DWARFUnit *DLU = const_cast(U)->getLinkedUnit(); + if (auto *LT = DLU->getContext().getLineTableForUnit(DLU)) { std::string FileName; - if (LT->getFileNameByIndex(Value.uval, DU->getCompilationDir(), Kind, + if (LT->getFileNameByIndex(Value.uval, DLU->getCompilationDir(), Kind, FileName)) return FileName; } diff --git a/llvm/lib/DebugInfo/PDB/Native/PDBFileBuilder.cpp b/llvm/lib/DebugInfo/PDB/Native/PDBFileBuilder.cpp index a508f163a2d84..f33125474e3a4 100644 --- a/llvm/lib/DebugInfo/PDB/Native/PDBFileBuilder.cpp +++ b/llvm/lib/DebugInfo/PDB/Native/PDBFileBuilder.cpp @@ -103,7 +103,7 @@ void PDBFileBuilder::addInjectedSource(StringRef Name, // table and the hash value is dependent on the exact contents of the string. // link.exe lowercases a path and converts / to \, so we must do the same. SmallString<64> VName; - sys::path::native(Name.lower(), VName); + sys::path::native(Name.lower(), VName, sys::path::Style::windows_backslash); uint32_t NI = getStringTableBuilder().insert(Name); uint32_t VNI = getStringTableBuilder().insert(VName); diff --git a/llvm/lib/ExecutionEngine/Orc/CMakeLists.txt b/llvm/lib/ExecutionEngine/Orc/CMakeLists.txt index 7e9180cf7f68a..60e7751dc9680 100644 --- a/llvm/lib/ExecutionEngine/Orc/CMakeLists.txt +++ b/llvm/lib/ExecutionEngine/Orc/CMakeLists.txt @@ -51,6 +51,7 @@ add_llvm_component_library(LLVMOrcJIT OrcShared OrcTargetProcess MC + MCDisassembler Passes RuntimeDyld Support diff --git a/llvm/lib/ExecutionEngine/Orc/IndirectionUtils.cpp b/llvm/lib/ExecutionEngine/Orc/IndirectionUtils.cpp index e8dd1bb90c9ad..ee1630a2ffa88 100644 --- a/llvm/lib/ExecutionEngine/Orc/IndirectionUtils.cpp +++ b/llvm/lib/ExecutionEngine/Orc/IndirectionUtils.cpp @@ -9,12 +9,17 @@ #include "llvm/ExecutionEngine/Orc/IndirectionUtils.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/Triple.h" +#include "llvm/ExecutionEngine/JITLink/x86_64.h" #include "llvm/ExecutionEngine/Orc/OrcABISupport.h" #include "llvm/IR/IRBuilder.h" +#include "llvm/MC/MCDisassembler/MCDisassembler.h" +#include "llvm/MC/MCInstrAnalysis.h" #include "llvm/Support/Format.h" #include "llvm/Transforms/Utils/Cloning.h" #include +#define DEBUG_TYPE "orc" + using namespace llvm; using namespace llvm::orc; @@ -372,5 +377,77 @@ void cloneModuleFlagsMetadata(Module &Dst, const Module &Src, Dst.addModuleFlag(MapMetadata(MF, VMap)); } +Error addFunctionPointerRelocationsToCurrentSymbol(jitlink::Symbol &Sym, + jitlink::LinkGraph &G, + MCDisassembler &Disassembler, + MCInstrAnalysis &MIA) { + // AArch64 appears to already come with the necessary relocations. Among other + // architectures, only x86_64 is currently implemented here. + if (G.getTargetTriple().getArch() != Triple::x86_64) + return Error::success(); + + raw_null_ostream CommentStream; + auto &STI = Disassembler.getSubtargetInfo(); + + // Determine the function bounds + auto &B = Sym.getBlock(); + assert(!B.isZeroFill() && "expected content block"); + auto SymAddress = Sym.getAddress(); + auto SymStartInBlock = + (const uint8_t *)B.getContent().data() + Sym.getOffset(); + auto SymSize = Sym.getSize() ? Sym.getSize() : B.getSize() - Sym.getOffset(); + auto Content = makeArrayRef(SymStartInBlock, SymSize); + + LLVM_DEBUG(dbgs() << "Adding self-relocations to " << Sym.getName() << "\n"); + + SmallDenseSet ExistingRelocations; + for (auto &E : B.edges()) { + if (E.isRelocation()) + ExistingRelocations.insert(E.getOffset()); + } + + size_t I = 0; + while (I < Content.size()) { + MCInst Instr; + uint64_t InstrSize = 0; + uint64_t InstrStart = SymAddress + I; + auto DecodeStatus = Disassembler.getInstruction( + Instr, InstrSize, Content.drop_front(I), InstrStart, CommentStream); + if (DecodeStatus != MCDisassembler::Success) { + LLVM_DEBUG(dbgs() << "Aborting due to disassembly failure at address " + << InstrStart); + return make_error( + formatv("failed to disassemble at address {0:x16}", InstrStart), + inconvertibleErrorCode()); + } + // Advance to the next instruction. + I += InstrSize; + + // Check for a PC-relative address equal to the symbol itself. + auto PCRelAddr = + MIA.evaluateMemoryOperandAddress(Instr, &STI, InstrStart, InstrSize); + if (!PCRelAddr.hasValue() || PCRelAddr.getValue() != SymAddress) + continue; + + auto RelocOffInInstr = + MIA.getMemoryOperandRelocationOffset(Instr, InstrSize); + if (!RelocOffInInstr.hasValue() || + InstrSize - RelocOffInInstr.getValue() != 4) { + LLVM_DEBUG(dbgs() << "Skipping unknown self-relocation at " + << InstrStart); + continue; + } + + auto RelocOffInBlock = + InstrStart + *RelocOffInInstr - SymAddress + Sym.getOffset(); + if (ExistingRelocations.contains(RelocOffInBlock)) + continue; + + LLVM_DEBUG(dbgs() << "Adding delta32 self-relocation at " << InstrStart); + B.addEdge(jitlink::x86_64::Delta32, RelocOffInBlock, Sym, /*Addend=*/-4); + } + return Error::success(); +} + } // End namespace orc. } // End namespace llvm. diff --git a/llvm/lib/ExecutionEngine/Orc/SimpleRemoteEPC.cpp b/llvm/lib/ExecutionEngine/Orc/SimpleRemoteEPC.cpp index 734e709b93844..47364a92a4517 100644 --- a/llvm/lib/ExecutionEngine/Orc/SimpleRemoteEPC.cpp +++ b/llvm/lib/ExecutionEngine/Orc/SimpleRemoteEPC.cpp @@ -374,13 +374,18 @@ void SimpleRemoteEPC::handleCallWrapper( uint64_t RemoteSeqNo, ExecutorAddr TagAddr, SimpleRemoteEPCArgBytesVector ArgBytes) { assert(ES && "No ExecutionSession attached"); - ES->runJITDispatchHandler( - [this, RemoteSeqNo](shared::WrapperFunctionResult WFR) { - if (auto Err = sendMessage(SimpleRemoteEPCOpcode::Result, RemoteSeqNo, - ExecutorAddr(), {WFR.data(), WFR.size()})) - getExecutionSession().reportError(std::move(Err)); + D->dispatch(makeGenericNamedTask( + [this, RemoteSeqNo, TagAddr, ArgBytes = std::move(ArgBytes)]() { + ES->runJITDispatchHandler( + [this, RemoteSeqNo](shared::WrapperFunctionResult WFR) { + if (auto Err = + sendMessage(SimpleRemoteEPCOpcode::Result, RemoteSeqNo, + ExecutorAddr(), {WFR.data(), WFR.size()})) + getExecutionSession().reportError(std::move(Err)); + }, + TagAddr.getValue(), ArgBytes); }, - TagAddr.getValue(), ArgBytes); + "callWrapper task")); } Error SimpleRemoteEPC::handleHangup(SimpleRemoteEPCArgBytesVector ArgBytes) { diff --git a/llvm/lib/IR/ConstantRange.cpp b/llvm/lib/IR/ConstantRange.cpp index d37c96917459f..a0f2179bddb47 100644 --- a/llvm/lib/IR/ConstantRange.cpp +++ b/llvm/lib/IR/ConstantRange.cpp @@ -183,38 +183,41 @@ CmpInst::Predicate ConstantRange::getEquivalentPredWithFlippedSignedness( return CmpInst::Predicate::BAD_ICMP_PREDICATE; } -bool ConstantRange::getEquivalentICmp(CmpInst::Predicate &Pred, - APInt &RHS) const { - bool Success = false; - +void ConstantRange::getEquivalentICmp(CmpInst::Predicate &Pred, + APInt &RHS, APInt &Offset) const { + Offset = APInt(getBitWidth(), 0); if (isFullSet() || isEmptySet()) { Pred = isEmptySet() ? CmpInst::ICMP_ULT : CmpInst::ICMP_UGE; RHS = APInt(getBitWidth(), 0); - Success = true; } else if (auto *OnlyElt = getSingleElement()) { Pred = CmpInst::ICMP_EQ; RHS = *OnlyElt; - Success = true; } else if (auto *OnlyMissingElt = getSingleMissingElement()) { Pred = CmpInst::ICMP_NE; RHS = *OnlyMissingElt; - Success = true; } else if (getLower().isMinSignedValue() || getLower().isMinValue()) { Pred = getLower().isMinSignedValue() ? CmpInst::ICMP_SLT : CmpInst::ICMP_ULT; RHS = getUpper(); - Success = true; } else if (getUpper().isMinSignedValue() || getUpper().isMinValue()) { Pred = getUpper().isMinSignedValue() ? CmpInst::ICMP_SGE : CmpInst::ICMP_UGE; RHS = getLower(); - Success = true; + } else { + Pred = CmpInst::ICMP_ULT; + RHS = getUpper() - getLower(); + Offset = -getLower(); } - assert((!Success || ConstantRange::makeExactICmpRegion(Pred, RHS) == *this) && + assert(ConstantRange::makeExactICmpRegion(Pred, RHS) == add(Offset) && "Bad result!"); +} - return Success; +bool ConstantRange::getEquivalentICmp(CmpInst::Predicate &Pred, + APInt &RHS) const { + APInt Offset; + getEquivalentICmp(Pred, RHS, Offset); + return Offset.isZero(); } bool ConstantRange::icmp(CmpInst::Predicate Pred, @@ -378,11 +381,10 @@ ConstantRange::isSizeStrictlySmallerThan(const ConstantRange &Other) const { bool ConstantRange::isSizeLargerThan(uint64_t MaxSize) const { - assert(MaxSize && "MaxSize can't be 0."); // If this a full set, we need special handling to avoid needing an extra bit // to represent the size. if (isFullSet()) - return APInt::getMaxValue(getBitWidth()).ugt(MaxSize - 1); + return MaxSize == 0 || APInt::getMaxValue(getBitWidth()).ugt(MaxSize - 1); return (Upper - Lower).ugt(MaxSize); } @@ -679,6 +681,24 @@ ConstantRange ConstantRange::unionWith(const ConstantRange &CR, return ConstantRange(std::move(L), std::move(U)); } +Optional +ConstantRange::exactIntersectWith(const ConstantRange &CR) const { + // TODO: This can be implemented more efficiently. + ConstantRange Result = intersectWith(CR); + if (Result == inverse().unionWith(CR.inverse()).inverse()) + return Result; + return None; +} + +Optional +ConstantRange::exactUnionWith(const ConstantRange &CR) const { + // TODO: This can be implemented more efficiently. + ConstantRange Result = unionWith(CR); + if (Result == inverse().intersectWith(CR.inverse()).inverse()) + return Result; + return None; +} + ConstantRange ConstantRange::castOp(Instruction::CastOps CastOp, uint32_t ResultBitWidth) const { switch (CastOp) { diff --git a/llvm/lib/IR/DIBuilder.cpp b/llvm/lib/IR/DIBuilder.cpp index de4ab5981883d..12320cf4af9f7 100644 --- a/llvm/lib/IR/DIBuilder.cpp +++ b/llvm/lib/IR/DIBuilder.cpp @@ -287,17 +287,16 @@ DIDerivedType *DIBuilder::createQualifiedType(unsigned Tag, DIType *FromTy) { 0, 0, None, DINode::FlagZero); } -DIDerivedType *DIBuilder::createPointerType( - DIType *PointeeTy, - uint64_t SizeInBits, - uint32_t AlignInBits, - Optional DWARFAddressSpace, - StringRef Name) { +DIDerivedType * +DIBuilder::createPointerType(DIType *PointeeTy, uint64_t SizeInBits, + uint32_t AlignInBits, + Optional DWARFAddressSpace, + StringRef Name, DINodeArray Annotations) { // FIXME: Why is there a name here? return DIDerivedType::get(VMContext, dwarf::DW_TAG_pointer_type, Name, nullptr, 0, nullptr, PointeeTy, SizeInBits, - AlignInBits, 0, DWARFAddressSpace, - DINode::FlagZero); + AlignInBits, 0, DWARFAddressSpace, DINode::FlagZero, + nullptr, Annotations); } DIDerivedType *DIBuilder::createMemberPointerType(DIType *PointeeTy, diff --git a/llvm/lib/IR/DataLayout.cpp b/llvm/lib/IR/DataLayout.cpp index 5edff7a741362..2ace180482628 100644 --- a/llvm/lib/IR/DataLayout.cpp +++ b/llvm/lib/IR/DataLayout.cpp @@ -707,12 +707,12 @@ unsigned DataLayout::getPointerSize(unsigned AS) const { return getPointerAlignElem(AS).TypeByteWidth; } -unsigned DataLayout::getMaxPointerSize() const { - unsigned MaxPointerSize = 0; +unsigned DataLayout::getMaxIndexSize() const { + unsigned MaxIndexSize = 0; for (auto &P : Pointers) - MaxPointerSize = std::max(MaxPointerSize, P.TypeByteWidth); + MaxIndexSize = std::max(MaxIndexSize, P.IndexWidth); - return MaxPointerSize; + return MaxIndexSize; } unsigned DataLayout::getPointerTypeSizeInBits(Type *Ty) const { diff --git a/llvm/lib/IR/DebugInfo.cpp b/llvm/lib/IR/DebugInfo.cpp index 6e36400d8d672..7c69fbf7085d0 100644 --- a/llvm/lib/IR/DebugInfo.cpp +++ b/llvm/lib/IR/DebugInfo.cpp @@ -447,8 +447,7 @@ bool llvm::stripDebugInfo(Function &F) { DenseMap LoopIDsMap; for (BasicBlock &BB : F) { - for (auto II = BB.begin(), End = BB.end(); II != End;) { - Instruction &I = *II++; // We may delete the instruction, increment now. + for (Instruction &I : llvm::make_early_inc_range(BB)) { if (isa(&I)) { I.eraseFromParent(); Changed = true; diff --git a/llvm/lib/IR/Function.cpp b/llvm/lib/IR/Function.cpp index 2049dc16a3c15..7eddffab13b9d 100644 --- a/llvm/lib/IR/Function.cpp +++ b/llvm/lib/IR/Function.cpp @@ -979,7 +979,8 @@ enum IIT_Info { IIT_BF16 = 48, IIT_STRUCT9 = 49, IIT_V256 = 50, - IIT_AMX = 51 + IIT_AMX = 51, + IIT_PPCF128 = 52 }; static void DecodeIITType(unsigned &NextElt, ArrayRef Infos, @@ -1026,6 +1027,9 @@ static void DecodeIITType(unsigned &NextElt, ArrayRef Infos, case IIT_F128: OutputTable.push_back(IITDescriptor::get(IITDescriptor::Quad, 0)); return; + case IIT_PPCF128: + OutputTable.push_back(IITDescriptor::get(IITDescriptor::PPCQuad, 0)); + return; case IIT_I1: OutputTable.push_back(IITDescriptor::get(IITDescriptor::Integer, 1)); return; @@ -1250,6 +1254,7 @@ static Type *DecodeFixedType(ArrayRef &Infos, case IITDescriptor::Float: return Type::getFloatTy(Context); case IITDescriptor::Double: return Type::getDoubleTy(Context); case IITDescriptor::Quad: return Type::getFP128Ty(Context); + case IITDescriptor::PPCQuad: return Type::getPPC_FP128Ty(Context); case IITDescriptor::Integer: return IntegerType::get(Context, D.Integer_Width); @@ -1432,6 +1437,7 @@ static bool matchIntrinsicType( case IITDescriptor::Float: return !Ty->isFloatTy(); case IITDescriptor::Double: return !Ty->isDoubleTy(); case IITDescriptor::Quad: return !Ty->isFP128Ty(); + case IITDescriptor::PPCQuad: return !Ty->isPPC_FP128Ty(); case IITDescriptor::Integer: return !Ty->isIntegerTy(D.Integer_Width); case IITDescriptor::Vector: { VectorType *VT = dyn_cast(Ty); diff --git a/llvm/lib/IR/Globals.cpp b/llvm/lib/IR/Globals.cpp index 0aff4906bcf15..9f38288095e3a 100644 --- a/llvm/lib/IR/Globals.cpp +++ b/llvm/lib/IR/Globals.cpp @@ -541,5 +541,5 @@ void GlobalIFunc::eraseFromParent() { const Function *GlobalIFunc::getResolverFunction() const { DenseSet Aliases; - return cast(findBaseObject(getResolver(), Aliases)); + return dyn_cast(findBaseObject(getResolver(), Aliases)); } diff --git a/llvm/lib/IR/Instructions.cpp b/llvm/lib/IR/Instructions.cpp index 1bc4ebc7ac162..c42df49d97ea2 100644 --- a/llvm/lib/IR/Instructions.cpp +++ b/llvm/lib/IR/Instructions.cpp @@ -2436,6 +2436,87 @@ bool ShuffleVectorInst::isConcat() const { return isIdentityMaskImpl(getShuffleMask(), NumMaskElts); } +static bool isReplicationMaskWithParams(ArrayRef Mask, + int ReplicationFactor, int VF) { + assert(Mask.size() == (unsigned)ReplicationFactor * VF && + "Unexpected mask size."); + + for (int CurrElt : seq(0, VF)) { + ArrayRef CurrSubMask = Mask.take_front(ReplicationFactor); + assert(CurrSubMask.size() == (unsigned)ReplicationFactor && + "Run out of mask?"); + Mask = Mask.drop_front(ReplicationFactor); + if (!all_of(CurrSubMask, [CurrElt](int MaskElt) { + return MaskElt == UndefMaskElem || MaskElt == CurrElt; + })) + return false; + } + assert(Mask.empty() && "Did not consume the whole mask?"); + + return true; +} + +bool ShuffleVectorInst::isReplicationMask(ArrayRef Mask, + int &ReplicationFactor, int &VF) { + // undef-less case is trivial. + if (none_of(Mask, [](int MaskElt) { return MaskElt == UndefMaskElem; })) { + ReplicationFactor = + Mask.take_while([](int MaskElt) { return MaskElt == 0; }).size(); + if (ReplicationFactor == 0 || Mask.size() % ReplicationFactor != 0) + return false; + VF = Mask.size() / ReplicationFactor; + return isReplicationMaskWithParams(Mask, ReplicationFactor, VF); + } + + // However, if the mask contains undef's, we have to enumerate possible tuples + // and pick one. There are bounds on replication factor: [1, mask size] + // (where RF=1 is an identity shuffle, RF=mask size is a broadcast shuffle) + // Additionally, mask size is a replication factor multiplied by vector size, + // which further significantly reduces the search space. + + // Before doing that, let's perform basic sanity check first. + int Largest = -1; + for (int MaskElt : Mask) { + if (MaskElt == UndefMaskElem) + continue; + // Elements must be in non-decreasing order. + if (MaskElt < Largest) + return false; + Largest = std::max(Largest, MaskElt); + } + + // Prefer larger replication factor if all else equal. + for (int PossibleReplicationFactor : + reverse(seq_inclusive(1, Mask.size()))) { + if (Mask.size() % PossibleReplicationFactor != 0) + continue; + int PossibleVF = Mask.size() / PossibleReplicationFactor; + if (!isReplicationMaskWithParams(Mask, PossibleReplicationFactor, + PossibleVF)) + continue; + ReplicationFactor = PossibleReplicationFactor; + VF = PossibleVF; + return true; + } + + return false; +} + +bool ShuffleVectorInst::isReplicationMask(int &ReplicationFactor, + int &VF) const { + // Not possible to express a shuffle mask for a scalable vector for this + // case. + if (isa(getType())) + return false; + + VF = cast(Op<0>()->getType())->getNumElements(); + if (ShuffleMask.size() % VF != 0) + return false; + ReplicationFactor = ShuffleMask.size() / VF; + + return isReplicationMaskWithParams(ShuffleMask, ReplicationFactor, VF); +} + //===----------------------------------------------------------------------===// // InsertValueInst Class //===----------------------------------------------------------------------===// diff --git a/llvm/lib/IR/LLVMContext.cpp b/llvm/lib/IR/LLVMContext.cpp index dce5d17c9eea0..90716d9c81a6c 100644 --- a/llvm/lib/IR/LLVMContext.cpp +++ b/llvm/lib/IR/LLVMContext.cpp @@ -351,9 +351,9 @@ std::unique_ptr LLVMContext::getDiagnosticHandler() { void LLVMContext::enableOpaquePointers() const { assert(pImpl->PointerTypes.empty() && pImpl->ASPointerTypes.empty() && "Must be called before creating any pointer types"); - pImpl->OpaquePointers = true; + pImpl->setOpaquePointers(true); } bool LLVMContext::supportsTypedPointers() const { - return !pImpl->OpaquePointers; + return !pImpl->getOpaquePointers(); } diff --git a/llvm/lib/IR/LLVMContextImpl.cpp b/llvm/lib/IR/LLVMContextImpl.cpp index 068bc58b6796b..ebbf382aea385 100644 --- a/llvm/lib/IR/LLVMContextImpl.cpp +++ b/llvm/lib/IR/LLVMContextImpl.cpp @@ -35,8 +35,7 @@ LLVMContextImpl::LLVMContextImpl(LLVMContext &C) X86_FP80Ty(C, Type::X86_FP80TyID), FP128Ty(C, Type::FP128TyID), PPC_FP128Ty(C, Type::PPC_FP128TyID), X86_MMXTy(C, Type::X86_MMXTyID), X86_AMXTy(C, Type::X86_AMXTyID), Int1Ty(C, 1), Int8Ty(C, 8), - Int16Ty(C, 16), Int32Ty(C, 32), Int64Ty(C, 64), Int128Ty(C, 128), - OpaquePointers(OpaquePointersCL) {} + Int16Ty(C, 16), Int32Ty(C, 32), Int64Ty(C, 64), Int128Ty(C, 128) {} LLVMContextImpl::~LLVMContextImpl() { // NOTE: We need to delete the contents of OwnedModules, but Module's dtor @@ -233,3 +232,11 @@ OptPassGate &LLVMContextImpl::getOptPassGate() const { void LLVMContextImpl::setOptPassGate(OptPassGate& OPG) { this->OPG = &OPG; } + +bool LLVMContextImpl::getOpaquePointers() { + if (LLVM_UNLIKELY(!(OpaquePointers.hasValue()))) + OpaquePointers = OpaquePointersCL; + return *OpaquePointers; +} + +void LLVMContextImpl::setOpaquePointers(bool OP) { OpaquePointers = OP; } diff --git a/llvm/lib/IR/LLVMContextImpl.h b/llvm/lib/IR/LLVMContextImpl.h index b17f581faaa64..d84714d9b1f14 100644 --- a/llvm/lib/IR/LLVMContextImpl.h +++ b/llvm/lib/IR/LLVMContextImpl.h @@ -1461,10 +1461,7 @@ class LLVMContextImpl { unsigned NamedStructTypesUniqueID = 0; DenseMap, ArrayType*> ArrayTypes; - DenseMap, VectorType*> VectorTypes; - // TODO: clean up the following after we no longer support non-opaque pointer - // types. - bool OpaquePointers; + DenseMap, VectorType *> VectorTypes; DenseMap PointerTypes; // Pointers in AddrSpace = 0 DenseMap, PointerType*> ASPointerTypes; @@ -1544,6 +1541,14 @@ class LLVMContextImpl { /// The lifetime of the object must be guaranteed to extend as long as the /// LLVMContext is used by compilation. void setOptPassGate(OptPassGate&); + + // TODO: clean up the following after we no longer support non-opaque pointer + // types. + bool getOpaquePointers(); + void setOpaquePointers(bool OP); + +private: + Optional OpaquePointers; }; } // end namespace llvm diff --git a/llvm/lib/IR/PassManager.cpp b/llvm/lib/IR/PassManager.cpp index 4965a7d533437..d933003ccdf7a 100644 --- a/llvm/lib/IR/PassManager.cpp +++ b/llvm/lib/IR/PassManager.cpp @@ -15,17 +15,6 @@ using namespace llvm; namespace llvm { -// Experimental option to eagerly invalidate more analyses. This has the -// potential to decrease max memory usage in exchange for more compile time. -// This may affect codegen due to either passes using analyses only when -// cached, or invalidating and recalculating an analysis that was -// stale/imprecise but still valid. Currently this invalidates all function -// analyses after a module->function or cgscc->function adaptor. -// TODO: make this a PipelineTuningOption. -cl::opt EagerlyInvalidateAnalyses( - "eagerly-invalidate-analyses", cl::init(false), cl::Hidden, - cl::desc("Eagerly invalidate more analyses in default pipelines")); - // Explicit template instantiations and specialization defininitions for core // template typedefs. template class AllAnalysesOn; @@ -105,7 +94,10 @@ bool FunctionAnalysisManagerModuleProxy::Result::invalidate( void ModuleToFunctionPassAdaptor::printPipeline( raw_ostream &OS, function_ref MapClassName2PassName) { - OS << "function("; + OS << "function"; + if (EagerlyInvalidate) + OS << ""; + OS << "("; Pass->printPipeline(OS, MapClassName2PassName); OS << ")"; } @@ -141,8 +133,7 @@ PreservedAnalyses ModuleToFunctionPassAdaptor::run(Module &M, // We know that the function pass couldn't have invalidated any other // function's analyses (that's the contract of a function pass), so // directly handle the function analysis manager's invalidation here. - FAM.invalidate(F, EagerlyInvalidateAnalyses ? PreservedAnalyses::none() - : PassPA); + FAM.invalidate(F, EagerlyInvalidate ? PreservedAnalyses::none() : PassPA); // Then intersect the preserved set so that invalidation of module // analyses will eventually occur when the module pass completes. diff --git a/llvm/lib/IR/ReplaceConstant.cpp b/llvm/lib/IR/ReplaceConstant.cpp index 51f0d2738f7e7..cfd8deba5a53e 100644 --- a/llvm/lib/IR/ReplaceConstant.cpp +++ b/llvm/lib/IR/ReplaceConstant.cpp @@ -15,6 +15,7 @@ #include "llvm/IR/IRBuilder.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/NoFolder.h" +#include "llvm/IR/ValueMap.h" namespace llvm { @@ -33,7 +34,8 @@ void convertConstantExprsToInstructions( Instruction *I, std::map>> &CEPaths, SmallPtrSetImpl *Insts) { - SmallPtrSet Visited; + ValueMap Visited; + for (Use &U : I->operands()) { // The operand U is either not a constant expression operand or the // constant expression paths do not belong to U, ignore U. @@ -48,23 +50,47 @@ void convertConstantExprsToInstructions( BI = &(*(BB->getFirstInsertionPt())); } - // Go through the paths associated with operand U, and convert all the - // constant expressions along all paths to corresponding instructions. + // Go through all the paths associated with operand U, and convert all the + // constant expressions along all the paths to corresponding instructions. auto *II = I; auto &Paths = CEPaths[&U]; for (auto &Path : Paths) { for (auto *CE : Path) { - if (!Visited.insert(CE).second) - continue; - auto *NI = CE->getAsInstruction(BI); + // Instruction which is equivalent to CE. + Instruction *NI = nullptr; + + if (!Visited.count(CE)) { + // CE is encountered first time, convert it into a corresponding + // instruction NI, and appropriately insert NI before the parent + // instruction. + NI = CE->getAsInstruction(BI); + + // Mark CE as visited by mapping CE to NI. + Visited[CE] = NI; + + // If required collect NI. + if (Insts) + Insts->insert(NI); + } else { + // We had already encountered CE, the correponding instruction already + // exist, use it to replace CE. + NI = Visited[CE]; + } + + assert(NI && "Expected an instruction corresponding to constant " + "expression."); + + // Replace all uses of constant expression CE by the corresponding + // instruction NI within the current parent instruction. II->replaceUsesOfWith(CE, NI); - CE->removeDeadConstantUsers(); BI = II = NI; - if (Insts) - Insts->insert(NI); } } } + + // Remove all converted constant expressions which are dead by now. + for (auto Item : Visited) + Item.first->removeDeadConstantUsers(); } void collectConstantExprPaths( diff --git a/llvm/lib/IR/Type.cpp b/llvm/lib/IR/Type.cpp index 0a28a001ef0d7..d59d87ad631b0 100644 --- a/llvm/lib/IR/Type.cpp +++ b/llvm/lib/IR/Type.cpp @@ -733,7 +733,7 @@ PointerType *PointerType::get(Type *EltTy, unsigned AddressSpace) { LLVMContextImpl *CImpl = EltTy->getContext().pImpl; // Automatically convert typed pointers to opaque pointers. - if (CImpl->OpaquePointers) + if (CImpl->getOpaquePointers()) return get(EltTy->getContext(), AddressSpace); // Since AddressSpace #0 is the common case, we special case it. @@ -747,7 +747,7 @@ PointerType *PointerType::get(Type *EltTy, unsigned AddressSpace) { PointerType *PointerType::get(LLVMContext &C, unsigned AddressSpace) { LLVMContextImpl *CImpl = C.pImpl; - assert(CImpl->OpaquePointers && + assert(CImpl->getOpaquePointers() && "Can only create opaque pointers in opaque pointer mode"); // Since AddressSpace #0 is the common case, we special case it. diff --git a/llvm/lib/IR/Value.cpp b/llvm/lib/IR/Value.cpp index 4136a9afc9cf0..b475c83278744 100644 --- a/llvm/lib/IR/Value.cpp +++ b/llvm/lib/IR/Value.cpp @@ -546,9 +546,7 @@ void Value::replaceUsesWithIf(Value *New, SmallVector, 8> Consts; SmallPtrSet Visited; - for (use_iterator UI = use_begin(), E = use_end(); UI != E;) { - Use &U = *UI; - ++UI; + for (Use &U : llvm::make_early_inc_range(uses())) { if (!ShouldReplace(U)) continue; // Must handle Constants specially, we cannot call replaceUsesOfWith on a diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp index de092ec632f4c..8985b9ca70875 100644 --- a/llvm/lib/IR/Verifier.cpp +++ b/llvm/lib/IR/Verifier.cpp @@ -415,15 +415,18 @@ class Verifier : public InstVisitor, VerifierSupport { for (const GlobalAlias &GA : M.aliases()) visitGlobalAlias(GA); + for (const GlobalIFunc &GI : M.ifuncs()) + visitGlobalIFunc(GI); + for (const NamedMDNode &NMD : M.named_metadata()) visitNamedMDNode(NMD); for (const StringMapEntry &SMEC : M.getComdatSymbolTable()) visitComdat(SMEC.getValue()); - visitModuleFlags(M); - visitModuleIdents(M); - visitModuleCommandLines(M); + visitModuleFlags(); + visitModuleIdents(); + visitModuleCommandLines(); verifyCompileUnits(); @@ -440,6 +443,7 @@ class Verifier : public InstVisitor, VerifierSupport { void visitGlobalValue(const GlobalValue &GV); void visitGlobalVariable(const GlobalVariable &GV); void visitGlobalAlias(const GlobalAlias &GA); + void visitGlobalIFunc(const GlobalIFunc &GI); void visitAliaseeSubExpr(const GlobalAlias &A, const Constant &C); void visitAliaseeSubExpr(SmallPtrSetImpl &Visited, const GlobalAlias &A, const Constant &C); @@ -448,9 +452,9 @@ class Verifier : public InstVisitor, VerifierSupport { void visitMetadataAsValue(const MetadataAsValue &MD, Function *F); void visitValueAsMetadata(const ValueAsMetadata &MD, Function *F); void visitComdat(const Comdat &C); - void visitModuleIdents(const Module &M); - void visitModuleCommandLines(const Module &M); - void visitModuleFlags(const Module &M); + void visitModuleIdents(); + void visitModuleCommandLines(); + void visitModuleFlags(); void visitModuleFlag(const MDNode *Op, DenseMap &SeenIDs, SmallVectorImpl &Requirements); @@ -823,6 +827,21 @@ void Verifier::visitGlobalAlias(const GlobalAlias &GA) { visitGlobalValue(GA); } +void Verifier::visitGlobalIFunc(const GlobalIFunc &GI) { + // Pierce through ConstantExprs and GlobalAliases and check that the resolver + // has a Function + const Function *Resolver = GI.getResolverFunction(); + Assert(Resolver, "IFunc must have a Function resolver", &GI); + + // Check that the immediate resolver operand (prior to any bitcasts) has the + // correct type + const Type *ResolverTy = GI.getResolver()->getType(); + const Type *ResolverFuncTy = + GlobalIFunc::getResolverFunctionType(GI.getValueType()); + Assert(ResolverTy == ResolverFuncTy->getPointerTo(), + "IFunc resolver has incorrect type", &GI); +} + void Verifier::visitNamedMDNode(const NamedMDNode &NMD) { // There used to be various other llvm.dbg.* nodes, but we don't support // upgrading them and we want to reserve the namespace for future uses. @@ -1497,7 +1516,7 @@ void Verifier::visitComdat(const Comdat &C) { "comdat global value has private linkage", GV); } -void Verifier::visitModuleIdents(const Module &M) { +void Verifier::visitModuleIdents() { const NamedMDNode *Idents = M.getNamedMetadata("llvm.ident"); if (!Idents) return; @@ -1514,7 +1533,7 @@ void Verifier::visitModuleIdents(const Module &M) { } } -void Verifier::visitModuleCommandLines(const Module &M) { +void Verifier::visitModuleCommandLines() { const NamedMDNode *CommandLines = M.getNamedMetadata("llvm.commandline"); if (!CommandLines) return; @@ -1532,7 +1551,7 @@ void Verifier::visitModuleCommandLines(const Module &M) { } } -void Verifier::visitModuleFlags(const Module &M) { +void Verifier::visitModuleFlags() { const NamedMDNode *Flags = M.getModuleFlagsMetadata(); if (!Flags) return; diff --git a/llvm/lib/LTO/LTO.cpp b/llvm/lib/LTO/LTO.cpp index acfd27f6a6279..6ce2ed265739e 100644 --- a/llvm/lib/LTO/LTO.cpp +++ b/llvm/lib/LTO/LTO.cpp @@ -997,7 +997,7 @@ Error LTO::checkPartiallySplit() { return Error::success(); } -Error LTO::run(AddStreamFn AddStream, NativeObjectCache Cache) { +Error LTO::run(AddStreamFn AddStream, FileCache Cache) { // Compute "dead" symbols, we don't want to import/export these! DenseSet GUIDPreservedSymbols; DenseMap GUIDPrevailingResolutions; @@ -1183,7 +1183,7 @@ namespace { class InProcessThinBackend : public ThinBackendProc { ThreadPool BackendThreadPool; AddStreamFn AddStream; - NativeObjectCache Cache; + FileCache Cache; std::set CfiFunctionDefs; std::set CfiFunctionDecls; @@ -1195,7 +1195,7 @@ class InProcessThinBackend : public ThinBackendProc { const Config &Conf, ModuleSummaryIndex &CombinedIndex, ThreadPoolStrategy ThinLTOParallelism, const StringMap &ModuleToDefinedGVSummaries, - AddStreamFn AddStream, NativeObjectCache Cache) + AddStreamFn AddStream, FileCache Cache) : ThinBackendProc(Conf, CombinedIndex, ModuleToDefinedGVSummaries), BackendThreadPool(ThinLTOParallelism), AddStream(std::move(AddStream)), Cache(std::move(Cache)) { @@ -1208,8 +1208,8 @@ class InProcessThinBackend : public ThinBackendProc { } Error runThinLTOBackendThread( - AddStreamFn AddStream, NativeObjectCache Cache, unsigned Task, - BitcodeModule BM, ModuleSummaryIndex &CombinedIndex, + AddStreamFn AddStream, FileCache Cache, unsigned Task, BitcodeModule BM, + ModuleSummaryIndex &CombinedIndex, const FunctionImporter::ImportMapTy &ImportList, const FunctionImporter::ExportSetTy &ExportList, const std::map &ResolvedODR, @@ -1239,7 +1239,11 @@ class InProcessThinBackend : public ThinBackendProc { computeLTOCacheKey(Key, Conf, CombinedIndex, ModuleID, ImportList, ExportList, ResolvedODR, DefinedGlobals, CfiFunctionDefs, CfiFunctionDecls); - if (AddStreamFn CacheAddStream = Cache(Task, Key)) + Expected CacheAddStreamOrErr = Cache(Task, Key); + if (Error Err = CacheAddStreamOrErr.takeError()) + return Err; + AddStreamFn &CacheAddStream = *CacheAddStreamOrErr; + if (CacheAddStream) return RunThinBackend(CacheAddStream); return Error::success(); @@ -1301,7 +1305,7 @@ class InProcessThinBackend : public ThinBackendProc { ThinBackend lto::createInProcessThinBackend(ThreadPoolStrategy Parallelism) { return [=](const Config &Conf, ModuleSummaryIndex &CombinedIndex, const StringMap &ModuleToDefinedGVSummaries, - AddStreamFn AddStream, NativeObjectCache Cache) { + AddStreamFn AddStream, FileCache Cache) { return std::make_unique( Conf, CombinedIndex, Parallelism, ModuleToDefinedGVSummaries, AddStream, Cache); @@ -1395,14 +1399,14 @@ ThinBackend lto::createWriteIndexesThinBackend( raw_fd_ostream *LinkedObjectsFile, IndexWriteCallback OnWrite) { return [=](const Config &Conf, ModuleSummaryIndex &CombinedIndex, const StringMap &ModuleToDefinedGVSummaries, - AddStreamFn AddStream, NativeObjectCache Cache) { + AddStreamFn AddStream, FileCache Cache) { return std::make_unique( Conf, CombinedIndex, ModuleToDefinedGVSummaries, OldPrefix, NewPrefix, ShouldEmitImportsFiles, LinkedObjectsFile, OnWrite); }; } -Error LTO::runThinLTO(AddStreamFn AddStream, NativeObjectCache Cache, +Error LTO::runThinLTO(AddStreamFn AddStream, FileCache Cache, const DenseSet &GUIDPreservedSymbols) { timeTraceProfilerBegin("ThinLink", StringRef("")); auto TimeTraceScopeExit = llvm::make_scope_exit([]() { diff --git a/llvm/lib/LTO/LTOBackend.cpp b/llvm/lib/LTO/LTOBackend.cpp index dfdd6bff8ad66..be06556b0c3bf 100644 --- a/llvm/lib/LTO/LTOBackend.cpp +++ b/llvm/lib/LTO/LTOBackend.cpp @@ -251,18 +251,16 @@ static void runNewPMPasses(const Config &Conf, Module &Mod, TargetMachine *TM, TLII->disableAllFunctions(); FAM.registerPass([&] { return TargetLibraryAnalysis(*TLII); }); - AAManager AA; // Parse a custom AA pipeline if asked to. if (!Conf.AAPipeline.empty()) { + AAManager AA; if (auto Err = PB.parseAAPipeline(AA, Conf.AAPipeline)) { report_fatal_error(Twine("unable to parse AA pipeline description '") + Conf.AAPipeline + "': " + toString(std::move(Err))); } - } else { - AA = PB.buildDefaultAAPipeline(); + // Register the AA manager first so that our version is the one used. + FAM.registerPass([&] { return std::move(AA); }); } - // Register the AA manager first so that our version is the one used. - FAM.registerPass([&] { return std::move(AA); }); // Register all the basic analyses with the managers. PB.registerModuleAnalyses(MAM); @@ -411,7 +409,10 @@ static void codegen(const Config &Conf, TargetMachine *TM, EC.message()); } - auto Stream = AddStream(Task); + Expected> StreamOrErr = AddStream(Task); + if (Error Err = StreamOrErr.takeError()) + report_fatal_error(std::move(Err)); + std::unique_ptr &Stream = *StreamOrErr; legacy::PassManager CodeGenPasses; CodeGenPasses.add( createImmutableModuleSummaryIndexWrapperPass(&CombinedIndex)); diff --git a/llvm/lib/LTO/LTOCodeGenerator.cpp b/llvm/lib/LTO/LTOCodeGenerator.cpp index 7dca994e735c4..088e45c9e8dcc 100644 --- a/llvm/lib/LTO/LTOCodeGenerator.cpp +++ b/llvm/lib/LTO/LTOCodeGenerator.cpp @@ -245,7 +245,7 @@ bool LTOCodeGenerator::compileOptimizedToFile(const char **Name) { // make unique temp output file to put generated code SmallString<128> Filename; - auto AddStream = [&](size_t Task) -> std::unique_ptr { + auto AddStream = [&](size_t Task) -> std::unique_ptr { StringRef Extension(Config.CGFileType == CGFT_AssemblyFile ? "s" : "o"); int FD; @@ -254,7 +254,7 @@ bool LTOCodeGenerator::compileOptimizedToFile(const char **Name) { if (EC) emitError(EC.message()); - return std::make_unique( + return std::make_unique( std::make_unique(FD, true)); }; diff --git a/llvm/lib/LTO/ThinLTOCodeGenerator.cpp b/llvm/lib/LTO/ThinLTOCodeGenerator.cpp index b4c1836c678da..9474d8c9dafbf 100644 --- a/llvm/lib/LTO/ThinLTOCodeGenerator.cpp +++ b/llvm/lib/LTO/ThinLTOCodeGenerator.cpp @@ -291,11 +291,6 @@ static void optimizeModuleNewPM(Module &TheModule, TargetMachine &TM, TLII->disableAllFunctions(); FAM.registerPass([&] { return TargetLibraryAnalysis(*TLII); }); - AAManager AA = PB.buildDefaultAAPipeline(); - - // Register the AA manager first so that our version is the one used. - FAM.registerPass([&] { return std::move(AA); }); - // Register all the basic analyses with the managers. PB.registerModuleAnalyses(MAM); PB.registerCGSCCAnalyses(CGAM); diff --git a/llvm/lib/Linker/LinkModules.cpp b/llvm/lib/Linker/LinkModules.cpp index 77e5c85cfc195..f9f51bf17d951 100644 --- a/llvm/lib/Linker/LinkModules.cpp +++ b/llvm/lib/Linker/LinkModules.cpp @@ -485,20 +485,14 @@ bool ModuleLinker::run() { // Alias have to go first, since we are not able to find their comdats // otherwise. - for (auto I = DstM.alias_begin(), E = DstM.alias_end(); I != E;) { - GlobalAlias &GV = *I++; + for (GlobalAlias &GV : llvm::make_early_inc_range(DstM.aliases())) dropReplacedComdat(GV, ReplacedDstComdats); - } - for (auto I = DstM.global_begin(), E = DstM.global_end(); I != E;) { - GlobalVariable &GV = *I++; + for (GlobalVariable &GV : llvm::make_early_inc_range(DstM.globals())) dropReplacedComdat(GV, ReplacedDstComdats); - } - for (auto I = DstM.begin(), E = DstM.end(); I != E;) { - Function &GV = *I++; + for (Function &GV : llvm::make_early_inc_range(DstM)) dropReplacedComdat(GV, ReplacedDstComdats); - } for (GlobalVariable &GV : SrcM->globals()) if (GV.hasLinkOnceLinkage()) diff --git a/llvm/lib/MC/MCInstrAnalysis.cpp b/llvm/lib/MC/MCInstrAnalysis.cpp index ef5516c1afb2b..52b59185c6fca 100644 --- a/llvm/lib/MC/MCInstrAnalysis.cpp +++ b/llvm/lib/MC/MCInstrAnalysis.cpp @@ -34,3 +34,9 @@ Optional MCInstrAnalysis::evaluateMemoryOperandAddress( uint64_t Size) const { return None; } + +Optional +MCInstrAnalysis::getMemoryOperandRelocationOffset(const MCInst &Inst, + uint64_t Size) const { + return None; +} \ No newline at end of file diff --git a/llvm/lib/MC/MachObjectWriter.cpp b/llvm/lib/MC/MachObjectWriter.cpp index 10ae27c2acc20..277d88cf1cd27 100644 --- a/llvm/lib/MC/MachObjectWriter.cpp +++ b/llvm/lib/MC/MachObjectWriter.cpp @@ -965,7 +965,7 @@ uint64_t MachObjectWriter::writeObject(MCAssembler &Asm, // Write the section relocation entries, in reverse order to match 'as' // (approximately, the exact algorithm is more complicated than this). std::vector &Relocs = Relocations[&Sec]; - for (const RelAndSymbol &Rel : make_range(Relocs.rbegin(), Relocs.rend())) { + for (const RelAndSymbol &Rel : llvm::reverse(Relocs)) { W.write(Rel.MRE.r_word0); W.write(Rel.MRE.r_word1); } diff --git a/llvm/lib/MC/WasmObjectWriter.cpp b/llvm/lib/MC/WasmObjectWriter.cpp index 015931ba4cac4..636c1d238932f 100644 --- a/llvm/lib/MC/WasmObjectWriter.cpp +++ b/llvm/lib/MC/WasmObjectWriter.cpp @@ -1087,7 +1087,7 @@ uint32_t WasmObjectWriter::writeDataSection(const MCAsmLayout &Layout) { void WasmObjectWriter::writeRelocSection( uint32_t SectionIndex, StringRef Name, std::vector &Relocs) { - // See: https://github.com/WebAssembly/tool-conventions/blob/master/Linking.md + // See: https://github.com/WebAssembly/tool-conventions/blob/main/Linking.md // for descriptions of the reloc sections. if (Relocs.empty()) diff --git a/llvm/lib/Object/ELF.cpp b/llvm/lib/Object/ELF.cpp index 1eabc29ac5d45..84181ae5e501d 100644 --- a/llvm/lib/Object/ELF.cpp +++ b/llvm/lib/Object/ELF.cpp @@ -622,14 +622,14 @@ ELFFile::toMappedAddr(uint64_t VAddr, WarningHandler WarnHandler) const { } template -Expected> +Expected> ELFFile::decodeBBAddrMap(const Elf_Shdr &Sec) const { Expected> ContentsOrErr = getSectionContents(Sec); if (!ContentsOrErr) return ContentsOrErr.takeError(); ArrayRef Content = *ContentsOrErr; DataExtractor Data(Content, isLE(), ELFT::Is64Bits ? 8 : 4); - std::vector FunctionEntries; + std::vector FunctionEntries; DataExtractor::Cursor Cur(0); Error ULEBSizeErr = Error::success(); @@ -656,7 +656,7 @@ ELFFile::decodeBBAddrMap(const Elf_Shdr &Sec) const { while (!ULEBSizeErr && Cur && Cur.tell() < Content.size()) { uintX_t Address = static_cast(Data.getAddress(Cur)); uint32_t NumBlocks = ReadULEB128AsUInt32(); - std::vector BBEntries; + std::vector BBEntries; for (uint32_t BlockID = 0; !ULEBSizeErr && Cur && (BlockID < NumBlocks); ++BlockID) { uint32_t Offset = ReadULEB128AsUInt32(); diff --git a/llvm/lib/Object/MachOObjectFile.cpp b/llvm/lib/Object/MachOObjectFile.cpp index e84defb6786e8..7501661591f06 100644 --- a/llvm/lib/Object/MachOObjectFile.cpp +++ b/llvm/lib/Object/MachOObjectFile.cpp @@ -2048,6 +2048,46 @@ bool MachOObjectFile::isDebugSection(DataRefImpl Sec) const { SectionName == "__swift_ast"; } +namespace { +template +ArrayRef getSegmentContents(const MachOObjectFile &Obj, + MachOObjectFile::LoadCommandInfo LoadCmd, + StringRef SegmentName) { + auto SegmentOrErr = getStructOrErr(Obj, LoadCmd.Ptr); + if (!SegmentOrErr) { + consumeError(SegmentOrErr.takeError()); + return {}; + } + auto &Segment = SegmentOrErr.get(); + if (StringRef(Segment.segname, 16).startswith(SegmentName)) + return arrayRefFromStringRef(Obj.getData().slice( + Segment.fileoff, Segment.fileoff + Segment.filesize)); + return {}; +} +} // namespace + +ArrayRef +MachOObjectFile::getSegmentContents(StringRef SegmentName) const { + for (auto LoadCmd : load_commands()) { + ArrayRef Contents; + switch (LoadCmd.C.cmd) { + case MachO::LC_SEGMENT: + Contents = ::getSegmentContents(*this, LoadCmd, + SegmentName); + break; + case MachO::LC_SEGMENT_64: + Contents = ::getSegmentContents(*this, LoadCmd, + SegmentName); + break; + default: + continue; + } + if (!Contents.empty()) + return Contents; + } + return {}; +} + unsigned MachOObjectFile::getSectionID(SectionRef Sec) const { return Sec.getRawDataRefImpl().d.a; } diff --git a/llvm/lib/Object/WasmObjectFile.cpp b/llvm/lib/Object/WasmObjectFile.cpp index fedfcae959122..6a19b159f3d57 100644 --- a/llvm/lib/Object/WasmObjectFile.cpp +++ b/llvm/lib/Object/WasmObjectFile.cpp @@ -359,7 +359,7 @@ Error WasmObjectFile::parseDylinkSection(ReadContext &Ctx) { Error WasmObjectFile::parseDylink0Section(ReadContext &Ctx) { // See - // https://github.com/WebAssembly/tool-conventions/blob/master/DynamicLinking.md + // https://github.com/WebAssembly/tool-conventions/blob/main/DynamicLinking.md HasDylinkSection = true; const uint8_t *OrigEnd = Ctx.End; diff --git a/llvm/lib/ObjectYAML/COFFYAML.cpp b/llvm/lib/ObjectYAML/COFFYAML.cpp index 5a85e22e4f49c..6e5cdce89060f 100644 --- a/llvm/lib/ObjectYAML/COFFYAML.cpp +++ b/llvm/lib/ObjectYAML/COFFYAML.cpp @@ -448,24 +448,24 @@ void MappingTraits::mapping(IO &IO, MappingNormalization NDC( IO, PH.Header.DLLCharacteristics); - IO.mapRequired("AddressOfEntryPoint", PH.Header.AddressOfEntryPoint); - IO.mapRequired("ImageBase", PH.Header.ImageBase); - IO.mapRequired("SectionAlignment", PH.Header.SectionAlignment); - IO.mapRequired("FileAlignment", PH.Header.FileAlignment); - IO.mapRequired("MajorOperatingSystemVersion", + IO.mapOptional("AddressOfEntryPoint", PH.Header.AddressOfEntryPoint); + IO.mapOptional("ImageBase", PH.Header.ImageBase); + IO.mapOptional("SectionAlignment", PH.Header.SectionAlignment, 1); + IO.mapOptional("FileAlignment", PH.Header.FileAlignment, 1); + IO.mapOptional("MajorOperatingSystemVersion", PH.Header.MajorOperatingSystemVersion); - IO.mapRequired("MinorOperatingSystemVersion", + IO.mapOptional("MinorOperatingSystemVersion", PH.Header.MinorOperatingSystemVersion); - IO.mapRequired("MajorImageVersion", PH.Header.MajorImageVersion); - IO.mapRequired("MinorImageVersion", PH.Header.MinorImageVersion); - IO.mapRequired("MajorSubsystemVersion", PH.Header.MajorSubsystemVersion); - IO.mapRequired("MinorSubsystemVersion", PH.Header.MinorSubsystemVersion); - IO.mapRequired("Subsystem", NWS->Subsystem); - IO.mapRequired("DLLCharacteristics", NDC->Characteristics); - IO.mapRequired("SizeOfStackReserve", PH.Header.SizeOfStackReserve); - IO.mapRequired("SizeOfStackCommit", PH.Header.SizeOfStackCommit); - IO.mapRequired("SizeOfHeapReserve", PH.Header.SizeOfHeapReserve); - IO.mapRequired("SizeOfHeapCommit", PH.Header.SizeOfHeapCommit); + IO.mapOptional("MajorImageVersion", PH.Header.MajorImageVersion); + IO.mapOptional("MinorImageVersion", PH.Header.MinorImageVersion); + IO.mapOptional("MajorSubsystemVersion", PH.Header.MajorSubsystemVersion); + IO.mapOptional("MinorSubsystemVersion", PH.Header.MinorSubsystemVersion); + IO.mapOptional("Subsystem", NWS->Subsystem); + IO.mapOptional("DLLCharacteristics", NDC->Characteristics); + IO.mapOptional("SizeOfStackReserve", PH.Header.SizeOfStackReserve); + IO.mapOptional("SizeOfStackCommit", PH.Header.SizeOfStackCommit); + IO.mapOptional("SizeOfHeapReserve", PH.Header.SizeOfHeapReserve); + IO.mapOptional("SizeOfHeapCommit", PH.Header.SizeOfHeapCommit); IO.mapOptional("NumberOfRvaAndSize", PH.Header.NumberOfRvaAndSize, COFF::NUM_DATA_DIRECTORIES + 1); diff --git a/llvm/lib/ObjectYAML/ELFYAML.cpp b/llvm/lib/ObjectYAML/ELFYAML.cpp index a9e63a68e45e0..fdf9aeae16228 100644 --- a/llvm/lib/ObjectYAML/ELFYAML.cpp +++ b/llvm/lib/ObjectYAML/ELFYAML.cpp @@ -155,6 +155,13 @@ void ScalarEnumerationTraits::enumeration( ECase(NT_FREEBSD_PROCSTAT_OSREL); ECase(NT_FREEBSD_PROCSTAT_PSSTRINGS); ECase(NT_FREEBSD_PROCSTAT_AUXV); + // OpenBSD core note types. + ECase(NT_OPENBSD_PROCINFO); + ECase(NT_OPENBSD_AUXV); + ECase(NT_OPENBSD_REGS); + ECase(NT_OPENBSD_FPREGS); + ECase(NT_OPENBSD_XFPREGS); + ECase(NT_OPENBSD_WCOOKIE); // AMD specific notes. (Code Object V2) ECase(NT_AMD_HSA_CODE_OBJECT_VERSION); ECase(NT_AMD_HSA_HSAIL); diff --git a/llvm/lib/ObjectYAML/MachOEmitter.cpp b/llvm/lib/ObjectYAML/MachOEmitter.cpp index 63179ae614005..c653c29ec9a74 100644 --- a/llvm/lib/ObjectYAML/MachOEmitter.cpp +++ b/llvm/lib/ObjectYAML/MachOEmitter.cpp @@ -288,6 +288,7 @@ void MachOWriter::writeLoadCommands(raw_ostream &OS) { } Error MachOWriter::writeSectionData(raw_ostream &OS) { + uint64_t LinkEditOff = 0; for (auto &LC : Obj.LoadCommands) { switch (LC.Data.load_command_data.cmd) { case MachO::LC_SEGMENT: @@ -297,6 +298,9 @@ Error MachOWriter::writeSectionData(raw_ostream &OS) { if (0 == strncmp(&LC.Data.segment_command_data.segname[0], "__LINKEDIT", 16)) { FoundLinkEditSeg = true; + LinkEditOff = segOff; + if (Obj.RawLinkEditSegment) + continue; writeLinkEditData(OS); } for (auto &Sec : LC.Sections) { @@ -344,6 +348,13 @@ Error MachOWriter::writeSectionData(raw_ostream &OS) { } } + if (Obj.RawLinkEditSegment) { + ZeroToOffset(OS, LinkEditOff); + if (OS.tell() - fileStart > LinkEditOff || !LinkEditOff) + return createStringError(errc::invalid_argument, + "section offsets don't line up"); + Obj.RawLinkEditSegment->writeAsBinary(OS); + } return Error::success(); } diff --git a/llvm/lib/ObjectYAML/MachOYAML.cpp b/llvm/lib/ObjectYAML/MachOYAML.cpp index 757e46cefc406..c9562bd72258a 100644 --- a/llvm/lib/ObjectYAML/MachOYAML.cpp +++ b/llvm/lib/ObjectYAML/MachOYAML.cpp @@ -110,6 +110,9 @@ void MappingTraits::mapping(IO &IO, Object.DWARF.Is64BitAddrSize = Object.Header.magic == MachO::MH_MAGIC_64 || Object.Header.magic == MachO::MH_CIGAM_64; IO.mapOptional("LoadCommands", Object.LoadCommands); + + if (Object.RawLinkEditSegment || !IO.outputting()) + IO.mapOptional("__LINKEDIT", Object.RawLinkEditSegment); if(!Object.LinkEdit.isEmpty() || !IO.outputting()) IO.mapOptional("LinkEditData", Object.LinkEdit); diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp index 074c1f354d5bd..5b0f570247e13 100644 --- a/llvm/lib/Passes/PassBuilder.cpp +++ b/llvm/lib/Passes/PassBuilder.cpp @@ -322,6 +322,15 @@ class NoOpFunctionAnalysis : public AnalysisInfoMixin { static StringRef name() { return "NoOpFunctionAnalysis"; } }; +/// No-op loop nest pass which does nothing. +struct NoOpLoopNestPass : PassInfoMixin { + PreservedAnalyses run(LoopNest &L, LoopAnalysisManager &, + LoopStandardAnalysisResults &, LPMUpdater &) { + return PreservedAnalyses::all(); + } + static StringRef name() { return "NoOpLoopNestPass"; } +}; + /// No-op loop pass which does nothing. struct NoOpLoopPass : PassInfoMixin { PreservedAnalyses run(Loop &L, LoopAnalysisManager &, @@ -381,6 +390,8 @@ PassBuilder::PassBuilder(TargetMachine *TM, PipelineTuningOptions PTO, PIC->addClassToPassName(CLASS, NAME); #define FUNCTION_ANALYSIS(NAME, CREATE_PASS) \ PIC->addClassToPassName(decltype(CREATE_PASS)::name(), NAME); +#define LOOPNEST_PASS(NAME, CREATE_PASS) \ + PIC->addClassToPassName(decltype(CREATE_PASS)::name(), NAME); #define LOOP_PASS(NAME, CREATE_PASS) \ PIC->addClassToPassName(decltype(CREATE_PASS)::name(), NAME); #define LOOP_PASS_WITH_PARAMS(NAME, CLASS, CREATE_PASS, PARSER, PARAMS) \ @@ -416,6 +427,11 @@ void PassBuilder::registerCGSCCAnalyses(CGSCCAnalysisManager &CGAM) { } void PassBuilder::registerFunctionAnalyses(FunctionAnalysisManager &FAM) { + // We almost always want the default alias analysis pipeline. + // If a user wants a different one, they can register their own before calling + // registerFunctionAnalyses(). + FAM.registerPass([&] { return buildDefaultAAPipeline(); }); + #define FUNCTION_ANALYSIS(NAME, CREATE_PASS) \ FAM.registerPass([&] { return CREATE_PASS; }); #include "PassRegistry.def" @@ -583,10 +599,6 @@ Expected parseLowerMatrixIntrinsicsPassOptions(StringRef Params) { return parseSinglePassOption(Params, "minimal", "LowerMatrixIntrinsics"); } -Expected parseModuleAddressSanitizerPassOptions(StringRef Params) { - return parseSinglePassOption(Params, "kernel", "ModuleAddressSanitizer"); -} - Expected parseASanPassOptions(StringRef Params) { AddressSanitizerOptions Result; while (!Params.empty()) { @@ -834,7 +846,7 @@ static bool isModulePassName(StringRef Name, CallbacksT &Callbacks) { return true; if (Name == "cgscc") return true; - if (Name == "function") + if (Name == "function" || Name == "function") return true; // Explicitly handle custom-parsed pass names. @@ -860,7 +872,7 @@ static bool isCGSCCPassName(StringRef Name, CallbacksT &Callbacks) { // Explicitly handle pass manager names. if (Name == "cgscc") return true; - if (Name == "function") + if (Name == "function" || Name == "function") return true; // Explicitly handle custom-parsed pass names. @@ -886,7 +898,7 @@ static bool isCGSCCPassName(StringRef Name, CallbacksT &Callbacks) { template static bool isFunctionPassName(StringRef Name, CallbacksT &Callbacks) { // Explicitly handle pass manager names. - if (Name == "function") + if (Name == "function" || Name == "function") return true; if (Name == "loop" || Name == "loop-mssa") return true; @@ -909,6 +921,28 @@ static bool isFunctionPassName(StringRef Name, CallbacksT &Callbacks) { return callbacksAcceptPassName(Name, Callbacks); } +template +static bool isLoopNestPassName(StringRef Name, CallbacksT &Callbacks, + bool &UseMemorySSA) { + UseMemorySSA = false; + + // Explicitly handle custom-parsed pass names. + if (parseRepeatPassName(Name)) + return true; + + if (Name == "lnicm") { + UseMemorySSA = true; + return true; + } + +#define LOOPNEST_PASS(NAME, CREATE_PASS) \ + if (Name == NAME) \ + return true; +#include "PassRegistry.def" + + return callbacksAcceptPassName(Name, Callbacks); +} + template static bool isLoopPassName(StringRef Name, CallbacksT &Callbacks, bool &UseMemorySSA) { @@ -1015,11 +1049,12 @@ Error PassBuilder::parseModulePass(ModulePassManager &MPM, MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(std::move(CGPM))); return Error::success(); } - if (Name == "function") { + if (Name == "function" || Name == "function") { FunctionPassManager FPM; if (auto Err = parseFunctionPassPipeline(FPM, InnerPipeline)) return Err; - MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM))); + MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM), + Name != "function")); return Error::success(); } if (auto Count = parseRepeatPassName(Name)) { @@ -1141,6 +1176,12 @@ Error PassBuilder::parseModulePass(ModulePassManager &MPM, MPM.addPass(createModuleToFunctionPassAdaptor(CREATE_PASS(Params.get()))); \ return Error::success(); \ } +#define LOOPNEST_PASS(NAME, CREATE_PASS) \ + if (Name == NAME) { \ + MPM.addPass(createModuleToFunctionPassAdaptor( \ + createFunctionToLoopPassAdaptor(CREATE_PASS, false, false))); \ + return Error::success(); \ + } #define LOOP_PASS(NAME, CREATE_PASS) \ if (Name == NAME) { \ MPM.addPass(createModuleToFunctionPassAdaptor( \ @@ -1182,12 +1223,13 @@ Error PassBuilder::parseCGSCCPass(CGSCCPassManager &CGPM, CGPM.addPass(std::move(NestedCGPM)); return Error::success(); } - if (Name == "function") { + if (Name == "function" || Name == "function") { FunctionPassManager FPM; if (auto Err = parseFunctionPassPipeline(FPM, InnerPipeline)) return Err; // Add the nested pass manager with the appropriate adaptor. - CGPM.addPass(createCGSCCToFunctionPassAdaptor(std::move(FPM))); + CGPM.addPass( + createCGSCCToFunctionPassAdaptor(std::move(FPM), Name != "function")); return Error::success(); } if (auto Count = parseRepeatPassName(Name)) { @@ -1256,6 +1298,12 @@ Error PassBuilder::parseCGSCCPass(CGSCCPassManager &CGPM, CGPM.addPass(createCGSCCToFunctionPassAdaptor(CREATE_PASS(Params.get()))); \ return Error::success(); \ } +#define LOOPNEST_PASS(NAME, CREATE_PASS) \ + if (Name == NAME) { \ + CGPM.addPass(createCGSCCToFunctionPassAdaptor( \ + createFunctionToLoopPassAdaptor(CREATE_PASS, false, false))); \ + return Error::success(); \ + } #define LOOP_PASS(NAME, CREATE_PASS) \ if (Name == NAME) { \ CGPM.addPass(createCGSCCToFunctionPassAdaptor( \ @@ -1360,6 +1408,11 @@ Error PassBuilder::parseFunctionPass(FunctionPassManager &FPM, // bool UseMemorySSA = !("canon-freeze" || "loop-predication" || // "guard-widening"); // The risk is that it may become obsolete if we're not careful. +#define LOOPNEST_PASS(NAME, CREATE_PASS) \ + if (Name == NAME) { \ + FPM.addPass(createFunctionToLoopPassAdaptor(CREATE_PASS, false, false)); \ + return Error::success(); \ + } #define LOOP_PASS(NAME, CREATE_PASS) \ if (Name == NAME) { \ FPM.addPass(createFunctionToLoopPassAdaptor(CREATE_PASS, false, false)); \ @@ -1418,6 +1471,11 @@ Error PassBuilder::parseLoopPass(LoopPassManager &LPM, } // Now expand the basic registered passes from the .inc file. +#define LOOPNEST_PASS(NAME, CREATE_PASS) \ + if (Name == NAME) { \ + LPM.addPass(CREATE_PASS); \ + return Error::success(); \ + } #define LOOP_PASS(NAME, CREATE_PASS) \ if (Name == NAME) { \ LPM.addPass(CREATE_PASS); \ @@ -1545,6 +1603,10 @@ Error PassBuilder::parsePassPipeline(ModulePassManager &MPM, } else if (isFunctionPassName(FirstName, FunctionPipelineParsingCallbacks)) { Pipeline = {{"function", std::move(*Pipeline)}}; + } else if (isLoopNestPassName(FirstName, LoopPipelineParsingCallbacks, + UseMemorySSA)) { + Pipeline = {{"function", {{UseMemorySSA ? "loop-mssa" : "loop", + std::move(*Pipeline)}}}}; } else if (isLoopPassName(FirstName, LoopPipelineParsingCallbacks, UseMemorySSA)) { Pipeline = {{"function", {{UseMemorySSA ? "loop-mssa" : "loop", @@ -1739,6 +1801,10 @@ void PassBuilder::printPassNames(raw_ostream &OS) { OS << "Function alias analyses:\n"; #define FUNCTION_ALIAS_ANALYSIS(NAME, CREATE_PASS) printPassName(NAME, OS); +#include "PassRegistry.def" + + OS << "LoopNest passes:\n"; +#define LOOPNEST_PASS(NAME, CREATE_PASS) printPassName(NAME, OS); #include "PassRegistry.def" OS << "Loop passes:\n"; diff --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp index 2009a687ae7d6..8b95b4c53e1d6 100644 --- a/llvm/lib/Passes/PassBuilderPipelines.cpp +++ b/llvm/lib/Passes/PassBuilderPipelines.cpp @@ -162,6 +162,10 @@ static cl::opt EnableO3NonTrivialUnswitching( "enable-npm-O3-nontrivial-unswitch", cl::init(true), cl::Hidden, cl::ZeroOrMore, cl::desc("Enable non-trivial loop unswitching for -O3")); +static cl::opt EnableEagerlyInvalidateAnalyses( + "eagerly-invalidate-analyses", cl::init(false), cl::Hidden, + cl::desc("Eagerly invalidate more analyses in default pipelines")); + PipelineTuningOptions::PipelineTuningOptions() { LoopInterleaving = true; LoopVectorization = true; @@ -172,6 +176,7 @@ PipelineTuningOptions::PipelineTuningOptions() { LicmMssaNoAccForPromotionCap = SetLicmMssaNoAccForPromotionCap; CallGraphProfile = true; MergeFunctions = false; + EagerlyInvalidateAnalyses = EnableEagerlyInvalidateAnalyses; } namespace llvm { @@ -596,7 +601,8 @@ void PassBuilder::addPGOInstrPasses(ModulePassManager &MPM, FPM.addPass(InstCombinePass()); // Combine silly sequences. invokePeepholeEPCallbacks(FPM, Level); - CGPipeline.addPass(createCGSCCToFunctionPassAdaptor(std::move(FPM))); + CGPipeline.addPass(createCGSCCToFunctionPassAdaptor( + std::move(FPM), PTO.EagerlyInvalidateAnalyses)); MPM.addPass(std::move(MIWP)); @@ -623,7 +629,8 @@ void PassBuilder::addPGOInstrPasses(ModulePassManager &MPM, FPM.addPass(createFunctionToLoopPassAdaptor( LoopRotatePass(Level != OptimizationLevel::Oz), /*UseMemorySSA=*/false, /*UseBlockFrequencyInfo=*/false)); - MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM))); + MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM), + PTO.EagerlyInvalidateAnalyses)); // Add the profile lowering pass. InstrProfOptions Options; @@ -723,7 +730,8 @@ PassBuilder::buildInlinerPipeline(OptimizationLevel Level, // Lastly, add the core function simplification pipeline nested inside the // CGSCC walk. MainCGPipeline.addPass(createCGSCCToFunctionPassAdaptor( - buildFunctionSimplificationPipeline(Level, Phase))); + buildFunctionSimplificationPipeline(Level, Phase), + PTO.EagerlyInvalidateAnalyses)); MainCGPipeline.addPass(CoroSplitPass(Level != OptimizationLevel::O0)); @@ -792,7 +800,8 @@ PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level, // FIXME: revisit how SampleProfileLoad/Inliner/ICP is structured. if (LoadSampleProfile) EarlyFPM.addPass(InstCombinePass()); - MPM.addPass(createModuleToFunctionPassAdaptor(std::move(EarlyFPM))); + MPM.addPass(createModuleToFunctionPassAdaptor(std::move(EarlyFPM), + PTO.EagerlyInvalidateAnalyses)); if (LoadSampleProfile) { // Annotate sample profile right after early FPM to ensure freshness of @@ -832,7 +841,7 @@ PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level, C(MPM, Level); // Specialize functions with IPSCCP. - if (EnableFunctionSpecialization) + if (EnableFunctionSpecialization && Level == OptimizationLevel::O3) MPM.addPass(FunctionSpecializationPass()); // Interprocedural constant propagation now that basic cleanup has occurred @@ -866,7 +875,8 @@ PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level, invokePeepholeEPCallbacks(GlobalCleanupPM, Level); GlobalCleanupPM.addPass(SimplifyCFGPass()); - MPM.addPass(createModuleToFunctionPassAdaptor(std::move(GlobalCleanupPM))); + MPM.addPass(createModuleToFunctionPassAdaptor(std::move(GlobalCleanupPM), + PTO.EagerlyInvalidateAnalyses)); // Add all the requested passes for instrumentation PGO, if requested. if (PGOOpt && Phase != ThinOrFullLTOPhase::ThinLTOPostLink && @@ -1093,11 +1103,16 @@ PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level, for (auto &C : VectorizerStartEPCallbacks) C(OptimizePM, Level); + LoopPassManager LPM; // First rotate loops that may have been un-rotated by prior passes. // Disable header duplication at -Oz. + LPM.addPass(LoopRotatePass(Level != OptimizationLevel::Oz, LTOPreLink)); + // Some loops may have become dead by now. Try to delete them. + // FIXME: see disscussion in https://reviews.llvm.org/D112851 + // this may need to be revisited once GVN is more powerful. + LPM.addPass(LoopDeletionPass()); OptimizePM.addPass(createFunctionToLoopPassAdaptor( - LoopRotatePass(Level != OptimizationLevel::Oz, LTOPreLink), - /*UseMemorySSA=*/false, /*UseBlockFrequencyInfo=*/false)); + std::move(LPM), /*UseMemorySSA=*/false, /*UseBlockFrequencyInfo=*/false)); // Distribute loops to allow partial vectorization. I.e. isolate dependences // into separate loop that would otherwise inhibit vectorization. This is @@ -1149,7 +1164,8 @@ PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level, OptimizePM.addPass(CoroCleanupPass()); // Add the core optimizing pipeline. - MPM.addPass(createModuleToFunctionPassAdaptor(std::move(OptimizePM))); + MPM.addPass(createModuleToFunctionPassAdaptor(std::move(OptimizePM), + PTO.EagerlyInvalidateAnalyses)); for (auto &C : OptimizerLastEPCallbacks) C(MPM, Level); @@ -1392,7 +1408,8 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level, if (Level.getSpeedupLevel() > 1) { FunctionPassManager EarlyFPM; EarlyFPM.addPass(CallSiteSplittingPass()); - MPM.addPass(createModuleToFunctionPassAdaptor(std::move(EarlyFPM))); + MPM.addPass(createModuleToFunctionPassAdaptor( + std::move(EarlyFPM), PTO.EagerlyInvalidateAnalyses)); // Indirect call promotion. This should promote all the targets that are // left by the earlier promotion pass that promotes intra-module targets. @@ -1401,7 +1418,7 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level, MPM.addPass(PGOIndirectCallPromotion( true /* InLTO */, PGOOpt && PGOOpt->Action == PGOOptions::SampleUse)); - if (EnableFunctionSpecialization) + if (EnableFunctionSpecialization && Level == OptimizationLevel::O3) MPM.addPass(FunctionSpecializationPass()); // Propagate constants at call sites into the functions they call. This // opens opportunities for globalopt (and inlining) by substituting function @@ -1468,7 +1485,8 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level, PeepholeFPM.addPass(InstCombinePass()); invokePeepholeEPCallbacks(PeepholeFPM, Level); - MPM.addPass(createModuleToFunctionPassAdaptor(std::move(PeepholeFPM))); + MPM.addPass(createModuleToFunctionPassAdaptor(std::move(PeepholeFPM), + PTO.EagerlyInvalidateAnalyses)); // Note: historically, the PruneEH pass was run first to deduce nounwind and // generally clean up exception handling overhead. It isn't clear this is @@ -1515,7 +1533,8 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level, FPM.addPass(TailCallElimPass()); // Run a few AA driver optimizations here and now to cleanup the code. - MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM))); + MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM), + PTO.EagerlyInvalidateAnalyses)); MPM.addPass( createModuleToPostOrderCGSCCPassAdaptor(PostOrderFunctionAttrsPass())); @@ -1572,7 +1591,8 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level, invokePeepholeEPCallbacks(MainFPM, Level); MainFPM.addPass(JumpThreadingPass(/*InsertFreezeWhenUnfoldingSelect*/ true)); - MPM.addPass(createModuleToFunctionPassAdaptor(std::move(MainFPM))); + MPM.addPass(createModuleToFunctionPassAdaptor(std::move(MainFPM), + PTO.EagerlyInvalidateAnalyses)); // Lower type metadata and the type.test intrinsic. This pass supports // clang's control flow integrity mechanisms (-fsanitize=cfi*) and needs diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def index 28470c5b3f2d5..7c2e9592f3d25 100644 --- a/llvm/lib/Passes/PassRegistry.def +++ b/llvm/lib/Passes/PassRegistry.def @@ -142,12 +142,10 @@ MODULE_PASS_WITH_PARAMS("hwasan", "kernel;recover") MODULE_PASS_WITH_PARAMS("asan-module", "ModuleAddressSanitizerPass", - [](bool CompileKernel) { - return ModuleAddressSanitizerPass(CompileKernel, - false, true, - false); + [](AddressSanitizerOptions Opts) { + return ModuleAddressSanitizerPass(Opts); }, - parseModuleAddressSanitizerPassOptions, + parseASanPassOptions, "kernel") #undef MODULE_PASS_WITH_PARAMS @@ -460,6 +458,16 @@ FUNCTION_PASS_WITH_PARAMS("print", "may;must") #undef FUNCTION_PASS_WITH_PARAMS +#ifndef LOOPNEST_PASS +#define LOOPNEST_PASS(NAME, CREATE_PASS) +#endif +LOOPNEST_PASS("lnicm", LNICMPass()) +LOOPNEST_PASS("loop-flatten", LoopFlattenPass()) +LOOPNEST_PASS("loop-interchange", LoopInterchangePass()) +LOOPNEST_PASS("loop-unroll-and-jam", LoopUnrollAndJamPass()) +LOOPNEST_PASS("no-op-loopnest", NoOpLoopNestPass()) +#undef LOOPNEST_PASS + #ifndef LOOP_ANALYSIS #define LOOP_ANALYSIS(NAME, CREATE_PASS) #endif @@ -477,11 +485,8 @@ LOOP_PASS("canon-freeze", CanonicalizeFreezeInLoopsPass()) LOOP_PASS("dot-ddg", DDGDotPrinterPass()) LOOP_PASS("invalidate", InvalidateAllAnalysesPass()) LOOP_PASS("licm", LICMPass()) -LOOP_PASS("lnicm", LNICMPass()) -LOOP_PASS("loop-flatten", LoopFlattenPass()) LOOP_PASS("loop-idiom", LoopIdiomRecognizePass()) LOOP_PASS("loop-instsimplify", LoopInstSimplifyPass()) -LOOP_PASS("loop-interchange", LoopInterchangePass()) LOOP_PASS("loop-rotate", LoopRotatePass()) LOOP_PASS("no-op-loop", NoOpLoopPass()) LOOP_PASS("print", PrintLoopPass(dbgs())) @@ -489,7 +494,6 @@ LOOP_PASS("loop-deletion", LoopDeletionPass()) LOOP_PASS("loop-simplifycfg", LoopSimplifyCFGPass()) LOOP_PASS("loop-reduce", LoopStrengthReducePass()) LOOP_PASS("indvars", IndVarSimplifyPass()) -LOOP_PASS("loop-unroll-and-jam", LoopUnrollAndJamPass()) LOOP_PASS("loop-unroll-full", LoopFullUnrollPass()) LOOP_PASS("print-access-info", LoopAccessInfoPrinterPass(dbgs())) LOOP_PASS("print", DDGAnalysisPrinterPass(dbgs())) diff --git a/llvm/lib/Passes/StandardInstrumentations.cpp b/llvm/lib/Passes/StandardInstrumentations.cpp index 727d02afea17a..8e6be6730ea42 100644 --- a/llvm/lib/Passes/StandardInstrumentations.cpp +++ b/llvm/lib/Passes/StandardInstrumentations.cpp @@ -29,10 +29,14 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/FormatVariadic.h" +#include "llvm/Support/GraphWriter.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/Program.h" +#include "llvm/Support/Regex.h" #include "llvm/Support/raw_ostream.h" +#include #include +#include #include using namespace llvm; @@ -40,10 +44,11 @@ using namespace llvm; cl::opt PreservedCFGCheckerInstrumentation::VerifyPreservedCFG( "verify-cfg-preserved", cl::Hidden, #ifdef NDEBUG - cl::init(false)); + cl::init(false) #else - cl::init(true)); + cl::init(true) #endif + ); // An option that prints out the IR after passes, similar to // -print-after-all except that it only prints the IR after passes that @@ -79,7 +84,9 @@ enum class ChangePrinter { PrintChangedDiffVerbose, PrintChangedDiffQuiet, PrintChangedColourDiffVerbose, - PrintChangedColourDiffQuiet + PrintChangedColourDiffQuiet, + PrintChangedDotCfgVerbose, + PrintChangedDotCfgQuiet }; static cl::opt PrintChanged( "print-changed", cl::desc("Print changed IRs"), cl::Hidden, @@ -95,6 +102,10 @@ static cl::opt PrintChanged( "Display patch-like changes with color"), clEnumValN(ChangePrinter::PrintChangedColourDiffQuiet, "cdiff-quiet", "Display patch-like changes in quiet mode with color"), + clEnumValN(ChangePrinter::PrintChangedDotCfgVerbose, "dot-cfg", + "Create a website with graphical changes"), + clEnumValN(ChangePrinter::PrintChangedDotCfgQuiet, "dot-cfg-quiet", + "Create a website with graphical changes in quiet mode"), // Sentinel value for unspecified option. clEnumValN(ChangePrinter::PrintChangedVerbose, "", ""))); @@ -119,6 +130,40 @@ static cl::opt DiffBinary("print-changed-diff-path", cl::Hidden, cl::init("diff"), cl::desc("system diff used by change reporters")); +// An option for specifying the dot used by +// print-changed=[dot-cfg | dot-cfg-quiet] +static cl::opt + DotBinary("print-changed-dot-path", cl::Hidden, cl::init("dot"), + cl::desc("system dot used by change reporters")); + +// An option that determines the colour used for elements that are only +// in the before part. Must be a colour named in appendix J of +// https://graphviz.org/pdf/dotguide.pdf +cl::opt + BeforeColour("dot-cfg-before-color", + cl::desc("Color for dot-cfg before elements."), cl::Hidden, + cl::init("red")); +// An option that determines the colour used for elements that are only +// in the after part. Must be a colour named in appendix J of +// https://graphviz.org/pdf/dotguide.pdf +cl::opt AfterColour("dot-cfg-after-color", + cl::desc("Color for dot-cfg after elements."), + cl::Hidden, cl::init("forestgreen")); +// An option that determines the colour used for elements that are in both +// the before and after parts. Must be a colour named in appendix J of +// https://graphviz.org/pdf/dotguide.pdf +cl::opt + CommonColour("dot-cfg-common-color", + cl::desc("Color for dot-cfg common elements."), cl::Hidden, + cl::init("black")); + +// An option that determines where the generated website file (named +// passes.html) and the associated pdf files (named diff_*.pdf) are saved. +static cl::opt DotCfgDir( + "dot-cfg-dir", + cl::desc("Generate dot files into specified directory for changed IRs"), + cl::Hidden, cl::init("./")); + namespace { // Perform a system based diff between \p Before and \p After, using @@ -367,6 +412,21 @@ bool isIgnored(StringRef PassID) { "DevirtSCCRepeatedPass", "ModuleInlinerWrapperPass"}); } +std::string makeHTMLReady(StringRef SR) { + std::string S; + while (true) { + StringRef Clean = + SR.take_until([](char C) { return C == '<' || C == '>'; }); + S.append(Clean.str()); + SR = SR.drop_front(Clean.size()); + if (SR.size() == 0) + return S; + S.append(SR[0] == '<' ? "<" : ">"); + SR = SR.drop_front(); + } + llvm_unreachable("problems converting string to HTML"); +} + // Return the module when that is the appropriate level of comparison for \p IR. const Module *getModuleForComparison(Any IR) { if (any_isa(IR)) @@ -644,7 +704,7 @@ void IRComparer::compare( } unsigned Minor = 0; - FuncDataT Missing; + FuncDataT Missing(""); IRDataT::report(Before, After, [&](const FuncDataT *B, const FuncDataT *A) { assert((B || A) && "Both functions cannot be missing."); @@ -679,7 +739,7 @@ template void IRComparer::analyzeIR(Any IR, IRDataT &Data) { template bool IRComparer::generateFunctionData(IRDataT &Data, const Function &F) { if (!F.isDeclaration() && isFunctionInPrintList(F.getName())) { - FuncDataT FD; + FuncDataT FD(F.getEntryBlock().getName().str()); for (const auto &B : F) { FD.getOrder().emplace_back(B.getName()); FD.getData().insert({B.getName(), B}); @@ -1200,8 +1260,862 @@ void InLineChangePrinter::registerCallbacks(PassInstrumentationCallbacks &PIC) { TextChangeReporter>::registerRequiredCallbacks(PIC); } +namespace { + +enum IRChangeDiffType { InBefore, InAfter, IsCommon, NumIRChangeDiffTypes }; + +// Describe where a given element exists. +std::string Colours[NumIRChangeDiffTypes]; + +class DisplayNode; +class DotCfgDiffDisplayGraph; + +// Base class for a node or edge in the dot-cfg-changes graph. +class DisplayElement { +public: + // Is this in before, after, or both? + IRChangeDiffType getType() const { return Type; } + +protected: + DisplayElement(IRChangeDiffType T) : Type(T) {} + const IRChangeDiffType Type; +}; + +// An edge representing a transition between basic blocks in the +// dot-cfg-changes graph. +class DisplayEdge : public DisplayElement { +public: + DisplayEdge(std::string V, DisplayNode &Node, IRChangeDiffType T) + : DisplayElement(T), Value(V), Node(Node) {} + // The value on which the transition is made. + std::string getValue() const { return Value; } + // The node (representing a basic block) reached by this transition. + const DisplayNode &getDestinationNode() const { return Node; } + +protected: + std::string Value; + const DisplayNode &Node; +}; + +// A node in the dot-cfg-changes graph which represents a basic block. +class DisplayNode : public DisplayElement { +public: + // \p C is the content for the node, \p T indicates the colour for the + // outline of the node + DisplayNode(std::string C, IRChangeDiffType T) + : DisplayElement(T), Content(C) {} + + // Iterator to the child nodes. Required by GraphWriter. + using ChildIterator = std::unordered_set::const_iterator; + ChildIterator children_begin() const { return Children.cbegin(); } + ChildIterator children_end() const { return Children.cend(); } + + // Iterator for the edges. Required by GraphWriter. + using EdgeIterator = std::vector::const_iterator; + EdgeIterator edges_begin() const { return EdgePtrs.cbegin(); } + EdgeIterator edges_end() const { return EdgePtrs.cend(); } + + // Create an edge to \p Node on value \p V, with type \p T. + void createEdge(StringRef V, DisplayNode &Node, IRChangeDiffType T); + + // Return the content of this node. + std::string getContent() const { return Content; } + + // Return the type of the edge to node \p S. + const DisplayEdge &getEdge(const DisplayNode &To) const { + assert(EdgeMap.find(&To) != EdgeMap.end() && "Expected to find edge."); + return *EdgeMap.find(&To)->second; + } + + // Return the value for the transition to basic block \p S. + // Required by GraphWriter. + std::string getEdgeSourceLabel(const DisplayNode &Sink) const { + return getEdge(Sink).getValue(); + } + + void createEdgeMap(); + +protected: + const std::string Content; + + // Place to collect all of the edges. Once they are all in the vector, + // the vector will not reallocate so then we can use pointers to them, + // which are required by the graph writing routines. + std::vector Edges; + + std::vector EdgePtrs; + std::unordered_set Children; + std::unordered_map EdgeMap; + + // Safeguard adding of edges. + bool AllEdgesCreated = false; +}; + +// Class representing a difference display (corresponds to a pdf file). +class DotCfgDiffDisplayGraph { +public: + DotCfgDiffDisplayGraph(std::string Name) : GraphName(Name) {} + + // Generate the file into \p DotFile. + void generateDotFile(StringRef DotFile); + + // Iterator to the nodes. Required by GraphWriter. + using NodeIterator = std::vector::const_iterator; + NodeIterator nodes_begin() const { + assert(NodeGenerationComplete && "Unexpected children iterator creation"); + return NodePtrs.cbegin(); + } + NodeIterator nodes_end() const { + assert(NodeGenerationComplete && "Unexpected children iterator creation"); + return NodePtrs.cend(); + } + + // Record the index of the entry node. At this point, we can build up + // vectors of pointers that are required by the graph routines. + void setEntryNode(unsigned N) { + // At this point, there will be no new nodes. + assert(!NodeGenerationComplete && "Unexpected node creation"); + NodeGenerationComplete = true; + for (auto &N : Nodes) + NodePtrs.emplace_back(&N); + + EntryNode = NodePtrs[N]; + } + + // Create a node. + void createNode(std::string C, IRChangeDiffType T) { + assert(!NodeGenerationComplete && "Unexpected node creation"); + Nodes.emplace_back(C, T); + } + // Return the node at index \p N to avoid problems with vectors reallocating. + DisplayNode &getNode(unsigned N) { + assert(N < Nodes.size() && "Node is out of bounds"); + return Nodes[N]; + } + unsigned size() const { + assert(NodeGenerationComplete && "Unexpected children iterator creation"); + return Nodes.size(); + } + + // Return the name of the graph. Required by GraphWriter. + std::string getGraphName() const { return GraphName; } + + // Return the string representing the differences for basic block \p Node. + // Required by GraphWriter. + std::string getNodeLabel(const DisplayNode &Node) const { + return Node.getContent(); + } + + // Return a string with colour information for Dot. Required by GraphWriter. + std::string getNodeAttributes(const DisplayNode &Node) const { + return attribute(Node.getType()); + } + + // Return a string with colour information for Dot. Required by GraphWriter. + std::string getEdgeColorAttr(const DisplayNode &From, + const DisplayNode &To) const { + return attribute(From.getEdge(To).getType()); + } + + // Get the starting basic block. Required by GraphWriter. + DisplayNode *getEntryNode() const { + assert(NodeGenerationComplete && "Unexpected children iterator creation"); + return EntryNode; + } + +protected: + // Return the string containing the colour to use as a Dot attribute. + std::string attribute(IRChangeDiffType T) const; + + bool NodeGenerationComplete = false; + const std::string GraphName; + std::vector Nodes; + std::vector NodePtrs; + DisplayNode *EntryNode = nullptr; +}; + +void DisplayNode::createEdge(StringRef V, DisplayNode &Node, + IRChangeDiffType T) { + assert(!AllEdgesCreated && "Expected to be able to still create edges."); + Edges.emplace_back(V.str(), Node, T); + Children.insert(&Node); +} + +void DisplayNode::createEdgeMap() { + // No more edges will be added so we can now use pointers to the edges + // as the vector will not grow and reallocate. + AllEdgesCreated = true; + for (auto &E : Edges) + EdgeMap.insert({&E.getDestinationNode(), &E}); +} + +class DotCfgDiffNode; +class DotCfgDiff; + +// A class representing a basic block in the Dot difference graph. +class DotCfgDiffNode { +public: + DotCfgDiffNode() = delete; + + // Create a node in Dot difference graph \p G representing the basic block + // represented by \p BD with type \p T (where it exists). + DotCfgDiffNode(DotCfgDiff &G, unsigned N, const BlockDataT &BD, + IRChangeDiffType T) + : Graph(G), N(N), Data{&BD, nullptr}, Type(T) {} + DotCfgDiffNode(const DotCfgDiffNode &DN) + : Graph(DN.Graph), N(DN.N), Data{DN.Data[0], DN.Data[1]}, Type(DN.Type), + EdgesMap(DN.EdgesMap), Children(DN.Children), Edges(DN.Edges) {} + + unsigned getIndex() const { return N; } + + // The label of the basic block + StringRef getLabel() const { + assert(Data[0] && "Expected Data[0] to be set."); + return Data[0]->getLabel(); + } + // Return where this block exists. + IRChangeDiffType getType() const { return Type; } + // Change this basic block from being only in before to being common. + // Save the pointer to \p Other. + void setCommon(const BlockDataT &Other) { + assert(!Data[1] && "Expected only one block datum"); + Data[1] = &Other; + Type = IsCommon; + } + // Add an edge to \p E of type {\p Value, \p T}. + void addEdge(unsigned E, StringRef Value, IRChangeDiffType T) { + // This is a new edge or it is an edge being made common. + assert((EdgesMap.count(E) == 0 || T == IsCommon) && + "Unexpected edge count and type."); + EdgesMap[E] = {Value.str(), T}; + } + // Record the children and create edges. + void finalize(DotCfgDiff &G); + + // Return the type of the edge to node \p S. + std::pair getEdge(const unsigned S) const { + assert(EdgesMap.count(S) == 1 && "Expected to find edge."); + return EdgesMap.at(S); + } + + // Return the string representing the basic block. + std::string getBodyContent() const; + + void createDisplayEdges(DotCfgDiffDisplayGraph &Graph, unsigned DisplayNode, + std::map &NodeMap) const; + +protected: + DotCfgDiff &Graph; + const unsigned N; + const BlockDataT *Data[2]; + IRChangeDiffType Type; + std::map> EdgesMap; + std::vector Children; + std::vector Edges; +}; + +// Class representing the difference graph between two functions. +class DotCfgDiff { +public: + // \p Title is the title given to the graph. \p EntryNodeName is the + // entry node for the function. \p Before and \p After are the before + // after versions of the function, respectively. \p Dir is the directory + // in which to store the results. + DotCfgDiff(StringRef Title, const FuncDataT &Before, + const FuncDataT &After); + + DotCfgDiff(const DotCfgDiff &) = delete; + DotCfgDiff &operator=(const DotCfgDiff &) = delete; + + DotCfgDiffDisplayGraph createDisplayGraph(StringRef Title, + StringRef EntryNodeName); + + // Return a string consisting of the labels for the \p Source and \p Sink. + // The combination allows distinguishing changing transitions on the + // same value (ie, a transition went to X before and goes to Y after). + // Required by GraphWriter. + StringRef getEdgeSourceLabel(const unsigned &Source, + const unsigned &Sink) const { + std::string S = + getNode(Source).getLabel().str() + " " + getNode(Sink).getLabel().str(); + assert(EdgeLabels.count(S) == 1 && "Expected to find edge label."); + return EdgeLabels.find(S)->getValue(); + } + + // Return the number of basic blocks (nodes). Required by GraphWriter. + unsigned size() const { return Nodes.size(); } + + const DotCfgDiffNode &getNode(unsigned N) const { + assert(N < Nodes.size() && "Unexpected index for node reference"); + return Nodes[N]; + } + +protected: + // Return the string surrounded by HTML to make it the appropriate colour. + std::string colourize(std::string S, IRChangeDiffType T) const; + + void createNode(StringRef Label, const BlockDataT &BD, + IRChangeDiffType T) { + unsigned Pos = Nodes.size(); + Nodes.emplace_back(*this, Pos, BD, T); + NodePosition.insert({Label, Pos}); + } + + // TODO Nodes should probably be a StringMap after the + // display graph is separated out, which would remove the need for + // NodePosition. + std::vector Nodes; + StringMap NodePosition; + const std::string GraphName; + + StringMap EdgeLabels; +}; + +std::string DotCfgDiffNode::getBodyContent() const { + if (Type == IsCommon) { + assert(Data[1] && "Expected Data[1] to be set."); + + StringRef SR[2]; + for (unsigned I = 0; I < 2; ++I) { + SR[I] = Data[I]->getBody(); + // drop initial '\n' if present + if (SR[I][0] == '\n') + SR[I] = SR[I].drop_front(); + // drop predecessors as they can be big and are redundant + SR[I] = SR[I].drop_until([](char C) { return C == '\n'; }).drop_front(); + } + + SmallString<80> OldLineFormat = formatv( + "%l
", Colours[InBefore]); + SmallString<80> NewLineFormat = formatv( + "%l
", Colours[InAfter]); + SmallString<80> UnchangedLineFormat = formatv( + "%l
", Colours[IsCommon]); + std::string Diff = Data[0]->getLabel().str(); + Diff += ":\n
" + + doSystemDiff(makeHTMLReady(SR[0]), makeHTMLReady(SR[1]), + OldLineFormat, NewLineFormat, UnchangedLineFormat); + + // Diff adds in some empty colour changes which are not valid HTML + // so remove them. Colours are all lowercase alpha characters (as + // listed in https://graphviz.org/pdf/dotguide.pdf). + Regex R(""); + while (true) { + std::string Error; + std::string S = R.sub("", Diff, &Error); + if (Error != "") + return Error; + if (S == Diff) + return Diff; + Diff = S; + } + llvm_unreachable("Should not get here"); + } + + // Put node out in the appropriate colour. + assert(!Data[1] && "Data[1] is set unexpectedly."); + std::string Body = makeHTMLReady(Data[0]->getBody()); + const StringRef BS = Body; + StringRef BS1 = BS; + // Drop leading newline, if present. + if (BS.front() == '\n') + BS1 = BS1.drop_front(1); + // Get label. + StringRef Label = BS1.take_until([](char C) { return C == ':'; }); + // drop predecessors as they can be big and are redundant + BS1 = BS1.drop_until([](char C) { return C == '\n'; }).drop_front(); + + std::string S = "" + Label.str() + ":"; + + // align each line to the left. + while (BS1.size()) { + S.append("
"); + StringRef Line = BS1.take_until([](char C) { return C == '\n'; }); + S.append(Line.str()); + BS1 = BS1.drop_front(Line.size() + 1); + } + S.append("
"); + return S; +} + +std::string DotCfgDiff::colourize(std::string S, IRChangeDiffType T) const { + if (S.length() == 0) + return S; + return "" + S + ""; +} + +std::string DotCfgDiffDisplayGraph::attribute(IRChangeDiffType T) const { + return "color=" + Colours[T]; +} + +DotCfgDiff::DotCfgDiff(StringRef Title, const FuncDataT &Before, + const FuncDataT &After) + : GraphName(Title.str()) { + StringMap EdgesMap; + + // Handle each basic block in the before IR. + for (auto &B : Before.getData()) { + StringRef Label = B.getKey(); + const BlockDataT &BD = B.getValue(); + createNode(Label, BD, InBefore); + + // Create transitions with names made up of the from block label, the value + // on which the transition is made and the to block label. + for (StringMap::const_iterator Sink = BD.getData().begin(), + E = BD.getData().end(); + Sink != E; ++Sink) { + std::string Key = (Label + " " + Sink->getKey().str()).str() + " " + + BD.getData().getSuccessorLabel(Sink->getKey()).str(); + EdgesMap.insert({Key, InBefore}); + } + } + + // Handle each basic block in the after IR + for (auto &A : After.getData()) { + StringRef Label = A.getKey(); + const BlockDataT &BD = A.getValue(); + unsigned C = NodePosition.count(Label); + if (C == 0) + // This only exists in the after IR. Create the node. + createNode(Label, BD, InAfter); + else { + assert(C == 1 && "Unexpected multiple nodes."); + Nodes[NodePosition[Label]].setCommon(BD); + } + // Add in the edges between the nodes (as common or only in after). + for (StringMap::const_iterator Sink = BD.getData().begin(), + E = BD.getData().end(); + Sink != E; ++Sink) { + std::string Key = (Label + " " + Sink->getKey().str()).str() + " " + + BD.getData().getSuccessorLabel(Sink->getKey()).str(); + unsigned C = EdgesMap.count(Key); + if (C == 0) + EdgesMap.insert({Key, InAfter}); + else { + EdgesMap[Key] = IsCommon; + } + } + } + + // Now go through the map of edges and add them to the node. + for (auto &E : EdgesMap) { + // Extract the source, sink and value from the edge key. + StringRef S = E.getKey(); + auto SP1 = S.rsplit(' '); + auto &SourceSink = SP1.first; + auto SP2 = SourceSink.split(' '); + StringRef Source = SP2.first; + StringRef Sink = SP2.second; + StringRef Value = SP1.second; + + assert(NodePosition.count(Source) == 1 && "Expected to find node."); + DotCfgDiffNode &SourceNode = Nodes[NodePosition[Source]]; + assert(NodePosition.count(Sink) == 1 && "Expected to find node."); + unsigned SinkNode = NodePosition[Sink]; + IRChangeDiffType T = E.second; + + // Look for an edge from Source to Sink + if (EdgeLabels.count(SourceSink) == 0) + EdgeLabels.insert({SourceSink, colourize(Value.str(), T)}); + else { + StringRef V = EdgeLabels.find(SourceSink)->getValue(); + std::string NV = colourize(V.str() + " " + Value.str(), T); + T = IsCommon; + EdgeLabels[SourceSink] = NV; + } + SourceNode.addEdge(SinkNode, Value, T); + } + for (auto &I : Nodes) + I.finalize(*this); +} + +DotCfgDiffDisplayGraph DotCfgDiff::createDisplayGraph(StringRef Title, + StringRef EntryNodeName) { + assert(NodePosition.count(EntryNodeName) == 1 && + "Expected to find entry block in map."); + unsigned Entry = NodePosition[EntryNodeName]; + assert(Entry < Nodes.size() && "Expected to find entry node"); + DotCfgDiffDisplayGraph G(Title.str()); + + std::map NodeMap; + + int EntryIndex = -1; + unsigned Index = 0; + for (auto &I : Nodes) { + if (I.getIndex() == Entry) + EntryIndex = Index; + G.createNode(I.getBodyContent(), I.getType()); + NodeMap.insert({I.getIndex(), Index++}); + } + assert(EntryIndex >= 0 && "Expected entry node index to be set."); + G.setEntryNode(EntryIndex); + + for (auto &I : NodeMap) { + unsigned SourceNode = I.first; + unsigned DisplayNode = I.second; + getNode(SourceNode).createDisplayEdges(G, DisplayNode, NodeMap); + } + return G; +} + +void DotCfgDiffNode::createDisplayEdges( + DotCfgDiffDisplayGraph &DisplayGraph, unsigned DisplayNodeIndex, + std::map &NodeMap) const { + + DisplayNode &SourceDisplayNode = DisplayGraph.getNode(DisplayNodeIndex); + + for (auto I : Edges) { + unsigned SinkNodeIndex = I; + IRChangeDiffType Type = getEdge(SinkNodeIndex).second; + const DotCfgDiffNode *SinkNode = &Graph.getNode(SinkNodeIndex); + + StringRef Label = Graph.getEdgeSourceLabel(getIndex(), SinkNodeIndex); + DisplayNode &SinkDisplayNode = DisplayGraph.getNode(SinkNode->getIndex()); + SourceDisplayNode.createEdge(Label, SinkDisplayNode, Type); + } + SourceDisplayNode.createEdgeMap(); +} + +void DotCfgDiffNode::finalize(DotCfgDiff &G) { + for (auto E : EdgesMap) { + Children.emplace_back(E.first); + Edges.emplace_back(E.first); + } +} + +} // namespace + +namespace llvm { + +template <> struct GraphTraits { + using NodeRef = const DisplayNode *; + using ChildIteratorType = DisplayNode::ChildIterator; + using nodes_iterator = DotCfgDiffDisplayGraph::NodeIterator; + using EdgeRef = const DisplayEdge *; + using ChildEdgeIterator = DisplayNode::EdgeIterator; + + static NodeRef getEntryNode(const DotCfgDiffDisplayGraph *G) { + return G->getEntryNode(); + } + static ChildIteratorType child_begin(NodeRef N) { + return N->children_begin(); + } + static ChildIteratorType child_end(NodeRef N) { return N->children_end(); } + static nodes_iterator nodes_begin(const DotCfgDiffDisplayGraph *G) { + return G->nodes_begin(); + } + static nodes_iterator nodes_end(const DotCfgDiffDisplayGraph *G) { + return G->nodes_end(); + } + static ChildEdgeIterator child_edge_begin(NodeRef N) { + return N->edges_begin(); + } + static ChildEdgeIterator child_edge_end(NodeRef N) { return N->edges_end(); } + static NodeRef edge_dest(EdgeRef E) { return &E->getDestinationNode(); } + static unsigned size(const DotCfgDiffDisplayGraph *G) { return G->size(); } +}; + +template <> +struct DOTGraphTraits : public DefaultDOTGraphTraits { + explicit DOTGraphTraits(bool Simple = false) + : DefaultDOTGraphTraits(Simple) {} + + static bool renderNodesUsingHTML() { return true; } + static std::string getGraphName(const DotCfgDiffDisplayGraph *DiffData) { + return DiffData->getGraphName(); + } + static std::string + getGraphProperties(const DotCfgDiffDisplayGraph *DiffData) { + return "\tsize=\"190, 190\";\n"; + } + static std::string getNodeLabel(const DisplayNode *Node, + const DotCfgDiffDisplayGraph *DiffData) { + return DiffData->getNodeLabel(*Node); + } + static std::string getNodeAttributes(const DisplayNode *Node, + const DotCfgDiffDisplayGraph *DiffData) { + return DiffData->getNodeAttributes(*Node); + } + static std::string getEdgeSourceLabel(const DisplayNode *From, + DisplayNode::ChildIterator &To) { + return From->getEdgeSourceLabel(**To); + } + static std::string getEdgeAttributes(const DisplayNode *From, + DisplayNode::ChildIterator &To, + const DotCfgDiffDisplayGraph *DiffData) { + return DiffData->getEdgeColorAttr(*From, **To); + } +}; + +} // namespace llvm + +namespace { + +void DotCfgDiffDisplayGraph::generateDotFile(StringRef DotFile) { + std::error_code EC; + raw_fd_ostream OutStream(DotFile, EC); + if (EC) { + errs() << "Error: " << EC.message() << "\n"; + return; + } + WriteGraph(OutStream, this, false); + OutStream.flush(); + OutStream.close(); +} + +} // namespace + namespace llvm { +DCData::DCData(const BasicBlock &B) { + // Build up transition labels. + const Instruction *Term = B.getTerminator(); + if (const BranchInst *Br = dyn_cast(Term)) + if (Br->isUnconditional()) + addSuccessorLabel(Br->getSuccessor(0)->getName().str(), ""); + else { + addSuccessorLabel(Br->getSuccessor(0)->getName().str(), "true"); + addSuccessorLabel(Br->getSuccessor(1)->getName().str(), "false"); + } + else if (const SwitchInst *Sw = dyn_cast(Term)) { + addSuccessorLabel(Sw->case_default()->getCaseSuccessor()->getName().str(), + "default"); + for (auto &C : Sw->cases()) { + assert(C.getCaseValue() && "Expected to find case value."); + SmallString<20> Value = formatv("{0}", C.getCaseValue()->getSExtValue()); + addSuccessorLabel(C.getCaseSuccessor()->getName().str(), Value); + } + } else + for (const_succ_iterator I = succ_begin(&B), E = succ_end(&B); I != E; ++I) + addSuccessorLabel((*I)->getName().str(), ""); +} + +DotCfgChangeReporter::DotCfgChangeReporter(bool Verbose) + : ChangeReporter>(Verbose) { + // Set up the colours based on the hidden options. + Colours[InBefore] = BeforeColour; + Colours[InAfter] = AfterColour; + Colours[IsCommon] = CommonColour; +} + +void DotCfgChangeReporter::handleFunctionCompare( + StringRef Name, StringRef Prefix, StringRef PassID, StringRef Divider, + bool InModule, unsigned Minor, const FuncDataT &Before, + const FuncDataT &After) { + assert(HTML && "Expected outstream to be set"); + SmallString<8> Extender; + SmallString<8> Number; + // Handle numbering and file names. + if (InModule) { + Extender = formatv("{0}_{1}", N, Minor); + Number = formatv("{0}.{1}", N, Minor); + } else { + Extender = formatv("{0}", N); + Number = formatv("{0}", N); + } + // Create a temporary file name for the dot file. + SmallVector SV; + sys::fs::createUniquePath("cfgdot-%%%%%%.dot", SV, true); + std::string DotFile = Twine(SV).str(); + + SmallString<20> PDFFileName = formatv("diff_{0}.pdf", Extender); + SmallString<200> Text; + + Text = formatv("{0}.{1}{2}{3}{4}", Number, Prefix, makeHTMLReady(PassID), + Divider, Name); + + DotCfgDiff Diff(Text, Before, After); + std::string EntryBlockName = After.getEntryBlockName(); + // Use the before entry block if the after entry block was removed. + if (EntryBlockName == "") + EntryBlockName = Before.getEntryBlockName(); + assert(EntryBlockName != "" && "Expected to find entry block"); + + DotCfgDiffDisplayGraph DG = Diff.createDisplayGraph(Text, EntryBlockName); + DG.generateDotFile(DotFile); + + *HTML << genHTML(Text, DotFile, PDFFileName); + std::error_code EC = sys::fs::remove(DotFile); + if (EC) + errs() << "Error: " << EC.message() << "\n"; +} + +std::string DotCfgChangeReporter::genHTML(StringRef Text, StringRef DotFile, + StringRef PDFFileName) { + SmallString<20> PDFFile = formatv("{0}/{1}", DotCfgDir, PDFFileName); + // Create the PDF file. + static ErrorOr DotExe = sys::findProgramByName(DotBinary); + if (!DotExe) + return "Unable to find dot executable."; + + StringRef Args[] = {DotBinary, "-Tpdf", "-o", PDFFile, DotFile}; + int Result = sys::ExecuteAndWait(*DotExe, Args, None); + if (Result < 0) + return "Error executing system dot."; + + // Create the HTML tag refering to the PDF file. + SmallString<200> S = formatv( + "
{1}
\n", PDFFileName, Text); + return S.c_str(); +} + +void DotCfgChangeReporter::handleInitialIR(Any IR) { + assert(HTML && "Expected outstream to be set"); + *HTML << "\n" + << "
\n" + << "

\n"; + // Create representation of IR + IRDataT Data; + IRComparer::analyzeIR(IR, Data); + // Now compare it against itself, which will have everything the + // same and will generate the files. + IRComparer(Data, Data) + .compare(getModuleForComparison(IR), + [&](bool InModule, unsigned Minor, + const FuncDataT &Before, + const FuncDataT &After) -> void { + handleFunctionCompare("", " ", "Initial IR", "", InModule, + Minor, Before, After); + }); + *HTML << "

\n" + << "

\n"; + ++N; +} + +void DotCfgChangeReporter::generateIRRepresentation(Any IR, StringRef PassID, + IRDataT &Data) { + IRComparer::analyzeIR(IR, Data); +} + +void DotCfgChangeReporter::omitAfter(StringRef PassID, std::string &Name) { + assert(HTML && "Expected outstream to be set"); + SmallString<20> Banner = + formatv(" {0}. Pass {1} on {2} omitted because no change
\n", + N, makeHTMLReady(PassID), Name); + *HTML << Banner; + ++N; +} + +void DotCfgChangeReporter::handleAfter(StringRef PassID, std::string &Name, + const IRDataT &Before, + const IRDataT &After, Any IR) { + assert(HTML && "Expected outstream to be set"); + IRComparer(Before, After) + .compare(getModuleForComparison(IR), + [&](bool InModule, unsigned Minor, + const FuncDataT &Before, + const FuncDataT &After) -> void { + handleFunctionCompare(Name, " Pass ", PassID, " on ", InModule, + Minor, Before, After); + }); + *HTML << "

\n"; + ++N; +} + +void DotCfgChangeReporter::handleInvalidated(StringRef PassID) { + assert(HTML && "Expected outstream to be set"); + SmallString<20> Banner = + formatv(" {0}. {1} invalidated
\n", N, makeHTMLReady(PassID)); + *HTML << Banner; + ++N; +} + +void DotCfgChangeReporter::handleFiltered(StringRef PassID, std::string &Name) { + assert(HTML && "Expected outstream to be set"); + SmallString<20> Banner = + formatv(" {0}. Pass {1} on {2} filtered out
\n", N, + makeHTMLReady(PassID), Name); + *HTML << Banner; + ++N; +} + +void DotCfgChangeReporter::handleIgnored(StringRef PassID, std::string &Name) { + assert(HTML && "Expected outstream to be set"); + SmallString<20> Banner = formatv(" {0}. {1} on {2} ignored
\n", N, + makeHTMLReady(PassID), Name); + *HTML << Banner; + ++N; +} + +bool DotCfgChangeReporter::initializeHTML() { + std::error_code EC; + HTML = std::make_unique(DotCfgDir + "/passes.html", EC); + if (EC) { + HTML = nullptr; + return false; + } + + *HTML << "" + << "" + << "" + << "" + << "passes.html" + << "\n" + << ""; + return true; +} + +DotCfgChangeReporter::~DotCfgChangeReporter() { + if (!HTML) + return; + *HTML + << "" + << "" + << "\n"; + HTML->flush(); + HTML->close(); +} + +void DotCfgChangeReporter::registerCallbacks( + PassInstrumentationCallbacks &PIC) { + if ((PrintChanged == ChangePrinter::PrintChangedDotCfgVerbose || + PrintChanged == ChangePrinter::PrintChangedDotCfgQuiet)) { + SmallString<128> OutputDir; + sys::fs::expand_tilde(DotCfgDir, OutputDir); + sys::fs::make_absolute(OutputDir); + assert(!OutputDir.empty() && "expected output dir to be non-empty"); + DotCfgDir = OutputDir.c_str(); + if (initializeHTML()) { + ChangeReporter>::registerRequiredCallbacks(PIC); + return; + } + dbgs() << "Unable to open output stream for -cfg-dot-changed\n"; + } +} + StandardInstrumentations::StandardInstrumentations( bool DebugLogging, bool VerifyEach, PrintPassOptions PrintPassOpts) : PrintPass(DebugLogging, PrintPassOpts), OptNone(DebugLogging), @@ -1211,6 +2125,8 @@ StandardInstrumentations::StandardInstrumentations( PrintChanged == ChangePrinter::PrintChangedColourDiffVerbose, PrintChanged == ChangePrinter::PrintChangedColourDiffVerbose || PrintChanged == ChangePrinter::PrintChangedColourDiffQuiet), + WebsiteChangeReporter(PrintChanged == + ChangePrinter::PrintChangedDotCfgVerbose), Verify(DebugLogging), VerifyEach(VerifyEach) {} void StandardInstrumentations::registerCallbacks( @@ -1227,6 +2143,7 @@ void StandardInstrumentations::registerCallbacks( if (VerifyEach) Verify.registerCallbacks(PIC); PrintChangedDiff.registerCallbacks(PIC); + WebsiteChangeReporter.registerCallbacks(PIC); } template class ChangeReporter; diff --git a/llvm/lib/ProfileData/SampleProf.cpp b/llvm/lib/ProfileData/SampleProf.cpp index cb5dd92913205..fd8fd3b675b78 100644 --- a/llvm/lib/ProfileData/SampleProf.cpp +++ b/llvm/lib/ProfileData/SampleProf.cpp @@ -245,9 +245,13 @@ const FunctionSamples *FunctionSamples::findFunctionSamples( else Discriminator = DIL->getBaseDiscriminator(); + // Use C++ linkage name if possible. + StringRef Name = PrevDIL->getScope()->getSubprogram()->getLinkageName(); + if (Name.empty()) + Name = PrevDIL->getScope()->getSubprogram()->getName(); + S.push_back( - std::make_pair(LineLocation(getOffset(DIL), Discriminator), - PrevDIL->getScope()->getSubprogram()->getLinkageName())); + std::make_pair(LineLocation(getOffset(DIL), Discriminator), Name)); PrevDIL = DIL; } if (S.size() == 0) diff --git a/llvm/lib/ProfileData/SampleProfReader.cpp b/llvm/lib/ProfileData/SampleProfReader.cpp index fae5cac795653..c99a19020511f 100644 --- a/llvm/lib/ProfileData/SampleProfReader.cpp +++ b/llvm/lib/ProfileData/SampleProfReader.cpp @@ -1228,6 +1228,12 @@ static std::string getSecFlagsStr(const SecHdrTableEntry &Entry) { if (hasSecFlag(Entry, SecFuncOffsetFlags::SecFlagOrdered)) Flags.append("ordered,"); break; + case SecFuncMetadata: + if (hasSecFlag(Entry, SecFuncMetadataFlags::SecFlagIsProbeBased)) + Flags.append("probe,"); + if (hasSecFlag(Entry, SecFuncMetadataFlags::SecFlagHasAttribute)) + Flags.append("attr,"); + break; default: break; } diff --git a/llvm/lib/Support/Caching.cpp b/llvm/lib/Support/Caching.cpp index b4a50ede093bb..a2fe37a266177 100644 --- a/llvm/lib/Support/Caching.cpp +++ b/llvm/lib/Support/Caching.cpp @@ -1,4 +1,4 @@ -//===-Caching.cpp - LLVM File Cache Handling ------------------------------===// +//===-Caching.cpp - LLVM Local File Cache ---------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -6,18 +6,17 @@ // //===----------------------------------------------------------------------===// // -// This file implements the Caching used by ThinLTO. +// This file implements the localCache function, which simplifies creating, +// adding to, and querying a local file system cache. localCache takes care of +// periodically pruning older files from the cache using a CachePruningPolicy. // //===----------------------------------------------------------------------===// #include "llvm/Support/Caching.h" -#include "llvm/ADT/StringExtras.h" #include "llvm/Support/Errc.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/Path.h" -#include "llvm/Support/Process.h" -#include "llvm/Support/raw_ostream.h" #if !defined(_MSC_VER) && !defined(__MINGW32__) #include @@ -27,10 +26,10 @@ using namespace llvm; -Expected llvm::localCache(Twine CacheNameRef, - Twine TempFilePrefixRef, - Twine CacheDirectoryPathRef, - AddBufferFn AddBuffer) { +Expected llvm::localCache(Twine CacheNameRef, + Twine TempFilePrefixRef, + Twine CacheDirectoryPathRef, + AddBufferFn AddBuffer) { if (std::error_code EC = sys::fs::create_directories(CacheDirectoryPathRef)) return errorCodeToError(EC); @@ -40,7 +39,7 @@ Expected llvm::localCache(Twine CacheNameRef, TempFilePrefixRef.toVector(TempFilePrefix); CacheDirectoryPathRef.toVector(CacheDirectoryPath); - return [=](unsigned Task, StringRef Key) -> AddStreamFn { + return [=](unsigned Task, StringRef Key) -> Expected { // This choice of file name allows the cache to be pruned (see pruneCache() // in include/llvm/Support/CachePruning.h). SmallString<64> EntryPath; @@ -72,12 +71,12 @@ Expected llvm::localCache(Twine CacheNameRef, // Since the file is probably being deleted we handle it in the same way as // if the file did not exist at all. if (EC != errc::no_such_file_or_directory && EC != errc::permission_denied) - report_fatal_error(Twine("Failed to open cache file ") + EntryPath + - ": " + EC.message() + "\n"); + return createStringError(EC, Twine("Failed to open cache file ") + + EntryPath + ": " + EC.message() + "\n"); - // This native object stream is responsible for commiting the resulting - // file to the cache and calling AddBuffer to add it to the link. - struct CacheStream : NativeObjectStream { + // This file stream is responsible for commiting the resulting file to the + // cache and calling AddBuffer to add it to the link. + struct CacheStream : CachedFileStream { AddBufferFn AddBuffer; sys::fs::TempFile TempFile; std::string EntryPath; @@ -86,11 +85,14 @@ Expected llvm::localCache(Twine CacheNameRef, CacheStream(std::unique_ptr OS, AddBufferFn AddBuffer, sys::fs::TempFile TempFile, std::string EntryPath, unsigned Task) - : NativeObjectStream(std::move(OS)), AddBuffer(std::move(AddBuffer)), + : CachedFileStream(std::move(OS)), AddBuffer(std::move(AddBuffer)), TempFile(std::move(TempFile)), EntryPath(std::move(EntryPath)), Task(Task) {} ~CacheStream() { + // TODO: Manually commit rather than using non-trivial destructor, + // allowing to replace report_fatal_errors with a return Error. + // Make sure the stream is closed before committing it. OS.reset(); @@ -138,17 +140,17 @@ Expected llvm::localCache(Twine CacheNameRef, } }; - return [=](size_t Task) -> std::unique_ptr { + return [=](size_t Task) -> Expected> { // Write to a temporary to avoid race condition SmallString<64> TempFilenameModel; sys::path::append(TempFilenameModel, CacheDirectoryPath, TempFilePrefix + "-%%%%%%.tmp.o"); Expected Temp = sys::fs::TempFile::create( TempFilenameModel, sys::fs::owner_read | sys::fs::owner_write); - if (!Temp) { - errs() << "Error: " << toString(Temp.takeError()) << "\n"; - report_fatal_error(CacheName + ": Can't get a temporary file"); - } + if (!Temp) + return createStringError(errc::io_error, + toString(Temp.takeError()) + ": " + CacheName + + ": Can't get a temporary file"); // This CacheStream will move the temporary file into the cache when done. return std::make_unique( diff --git a/llvm/lib/Support/Path.cpp b/llvm/lib/Support/Path.cpp index a5045f6f1cb8c..3957547dfaaac 100644 --- a/llvm/lib/Support/Path.cpp +++ b/llvm/lib/Support/Path.cpp @@ -12,6 +12,7 @@ #include "llvm/Support/Path.h" #include "llvm/ADT/ArrayRef.h" +#include "llvm/Config/config.h" #include "llvm/Config/llvm-config.h" #include "llvm/Support/Endian.h" #include "llvm/Support/Errc.h" @@ -37,13 +38,16 @@ namespace { using llvm::sys::path::Style; inline Style real_style(Style style) { + if (style != Style::native) + return style; if (is_style_posix(style)) return Style::posix; - return Style::windows; + return LLVM_WINDOWS_PREFER_FORWARD_SLASH ? Style::windows_slash + : Style::windows_backslash; } inline const char *separators(Style style) { - if (real_style(style) == Style::windows) + if (is_style_windows(style)) return "\\/"; return "/"; } @@ -547,7 +551,9 @@ void native(SmallVectorImpl &Path, Style style) { if (Path.empty()) return; if (is_style_windows(style)) { - std::replace(Path.begin(), Path.end(), '/', '\\'); + for (char &Ch : Path) + if (is_separator(Ch, style)) + Ch = preferred_separator(style); if (Path[0] == '~' && (Path.size() == 1 || is_separator(Path[1], style))) { SmallString<128> PathHome; home_directory(PathHome); @@ -601,7 +607,7 @@ bool is_separator(char value, Style style) { } StringRef get_separator(Style style) { - if (is_style_windows(style)) + if (real_style(style) == Style::windows) return "\\"; return "/"; } @@ -1212,9 +1218,7 @@ Error TempFile::discard() { std::error_code RemoveEC; if (Remove && !TmpName.empty()) { RemoveEC = fs::remove(TmpName); -#ifndef _WIN32 sys::DontRemoveFileOnSignal(TmpName); -#endif if (!RemoveEC) TmpName = ""; } else { @@ -1260,8 +1264,8 @@ Error TempFile::keep(const Twine &Name) { if (RenameEC) remove(TmpName); } - sys::DontRemoveFileOnSignal(TmpName); #endif + sys::DontRemoveFileOnSignal(TmpName); if (!RenameEC) TmpName = ""; @@ -1283,9 +1287,8 @@ Error TempFile::keep() { auto H = reinterpret_cast(_get_osfhandle(FD)); if (std::error_code EC = setDeleteDisposition(H, false)) return errorCodeToError(EC); -#else - sys::DontRemoveFileOnSignal(TmpName); #endif + sys::DontRemoveFileOnSignal(TmpName); TmpName = ""; @@ -1309,17 +1312,20 @@ Expected TempFile::create(const Twine &Model, unsigned Mode, TempFile Ret(ResultPath, FD); #ifdef _WIN32 auto H = reinterpret_cast(_get_osfhandle(FD)); + bool SetSignalHandler = false; if (std::error_code EC = setDeleteDisposition(H, true)) { Ret.RemoveOnClose = true; + SetSignalHandler = true; } #else - if (sys::RemoveFileOnSignal(ResultPath)) { + bool SetSignalHandler = true; +#endif + if (SetSignalHandler && sys::RemoveFileOnSignal(ResultPath)) { // Make sure we delete the file when RemoveFileOnSignal fails. consumeError(Ret.discard()); std::error_code EC(errc::operation_not_permitted); return errorCodeToError(EC); } -#endif return std::move(Ret); } } // namespace fs diff --git a/llvm/lib/Support/RISCVISAInfo.cpp b/llvm/lib/Support/RISCVISAInfo.cpp index 8bbfc757f0755..8e984002f90d2 100644 --- a/llvm/lib/Support/RISCVISAInfo.cpp +++ b/llvm/lib/Support/RISCVISAInfo.cpp @@ -64,6 +64,7 @@ static const RISCVSupportedExtension SupportedExperimentalExtensions[] = { {"zvamo", RISCVExtensionVersion{0, 10}}, {"zvlsseg", RISCVExtensionVersion{0, 10}}, + {"zfhmin", RISCVExtensionVersion{0, 1}}, {"zfh", RISCVExtensionVersion{0, 1}}, }; diff --git a/llvm/lib/Support/Timer.cpp b/llvm/lib/Support/Timer.cpp index d69199133f6a6..08e1a8a0e0aa7 100644 --- a/llvm/lib/Support/Timer.cpp +++ b/llvm/lib/Support/Timer.cpp @@ -393,8 +393,7 @@ void TimerGroup::PrintQueuedTimers(raw_ostream &OS) { OS << " --- Name ---\n"; // Loop through all of the timing data, printing it out. - for (const PrintRecord &Record : make_range(TimersToPrint.rbegin(), - TimersToPrint.rend())) { + for (const PrintRecord &Record : llvm::reverse(TimersToPrint)) { Record.Time.print(Total, OS); OS << Record.Description << '\n'; } diff --git a/llvm/lib/Support/Triple.cpp b/llvm/lib/Support/Triple.cpp index 127144364d86e..b224a4579876e 100644 --- a/llvm/lib/Support/Triple.cpp +++ b/llvm/lib/Support/Triple.cpp @@ -68,6 +68,8 @@ StringRef Triple::getArchTypeName(ArchType Kind) { case sparcv9: return "sparcv9"; case spir64: return "spir64"; case spir: return "spir"; + case spirv32: return "spirv32"; + case spirv64: return "spirv64"; case systemz: return "s390x"; case tce: return "tce"; case tcele: return "tcele"; @@ -148,6 +150,10 @@ StringRef Triple::getArchTypePrefix(ArchType Kind) { case spir: case spir64: return "spir"; + + case spirv32: + case spirv64: return "spirv"; + case kalimba: return "kalimba"; case lanai: return "lanai"; case shave: return "shave"; @@ -325,6 +331,8 @@ Triple::ArchType Triple::getArchTypeForLLVMName(StringRef Name) { .Case("amdil64", amdil64) .Case("hsail", hsail) .Case("hsail64", hsail64) + .StartsWith("spirv64", spirv64) + .StartsWith("spirv32", spirv32) .StartsWith("spir64", spir64) .StartsWith("spir", spir) .Case("kalimba", kalimba) @@ -459,6 +467,8 @@ static Triple::ArchType parseArch(StringRef ArchName) { .Case("amdil64", Triple::amdil64) .Case("hsail", Triple::hsail) .Case("hsail64", Triple::hsail64) + .StartsWith("spirv64", Triple::spirv64) + .StartsWith("spirv32", Triple::spirv32) .StartsWith("spir64", Triple::spir64) .StartsWith("spir", Triple::spir) .StartsWith("kalimba", Triple::kalimba) @@ -779,6 +789,11 @@ static Triple::ObjectFormatType getDefaultFormat(const Triple &T) { case Triple::wasm32: case Triple::wasm64: return Triple::Wasm; + + case Triple::spirv32: + case Triple::spirv64: + // TODO: In future this will be Triple::SPIRV. + return Triple::UnknownObjectFormat; } llvm_unreachable("unknown architecture"); } @@ -1349,6 +1364,7 @@ static unsigned getArchPointerBitWidth(llvm::Triple::ArchType Arch) { case llvm::Triple::sparc: case llvm::Triple::sparcel: case llvm::Triple::spir: + case llvm::Triple::spirv32: case llvm::Triple::tce: case llvm::Triple::tcele: case llvm::Triple::thumb: @@ -1375,6 +1391,7 @@ static unsigned getArchPointerBitWidth(llvm::Triple::ArchType Arch) { case llvm::Triple::riscv64: case llvm::Triple::sparcv9: case llvm::Triple::spir64: + case llvm::Triple::spirv64: case llvm::Triple::systemz: case llvm::Triple::ve: case llvm::Triple::wasm64: @@ -1435,6 +1452,7 @@ Triple Triple::get32BitArchVariant() const { case Triple::sparc: case Triple::sparcel: case Triple::spir: + case Triple::spirv32: case Triple::tce: case Triple::tcele: case Triple::thumb: @@ -1463,6 +1481,7 @@ Triple Triple::get32BitArchVariant() const { case Triple::riscv64: T.setArch(Triple::riscv32); break; case Triple::sparcv9: T.setArch(Triple::sparc); break; case Triple::spir64: T.setArch(Triple::spir); break; + case Triple::spirv64: T.setArch(Triple::spirv32); break; case Triple::wasm64: T.setArch(Triple::wasm32); break; case Triple::x86_64: T.setArch(Triple::x86); break; } @@ -1508,6 +1527,7 @@ Triple Triple::get64BitArchVariant() const { case Triple::riscv64: case Triple::sparcv9: case Triple::spir64: + case Triple::spirv64: case Triple::systemz: case Triple::ve: case Triple::wasm64: @@ -1534,6 +1554,7 @@ Triple Triple::get64BitArchVariant() const { case Triple::riscv32: T.setArch(Triple::riscv64); break; case Triple::sparc: T.setArch(Triple::sparcv9); break; case Triple::spir: T.setArch(Triple::spir64); break; + case Triple::spirv32: T.setArch(Triple::spirv64); break; case Triple::thumb: T.setArch(Triple::aarch64); break; case Triple::thumbeb: T.setArch(Triple::aarch64_be); break; case Triple::wasm32: T.setArch(Triple::wasm64); break; @@ -1570,6 +1591,8 @@ Triple Triple::getBigEndianArchVariant() const { case Triple::shave: case Triple::spir64: case Triple::spir: + case Triple::spirv32: + case Triple::spirv64: case Triple::wasm32: case Triple::wasm64: case Triple::x86: @@ -1673,6 +1696,8 @@ bool Triple::isLittleEndian() const { case Triple::sparcel: case Triple::spir64: case Triple::spir: + case Triple::spirv32: + case Triple::spirv64: case Triple::tcele: case Triple::thumb: case Triple::ve: diff --git a/llvm/lib/Support/VirtualFileSystem.cpp b/llvm/lib/Support/VirtualFileSystem.cpp index 433b071832468..a4abfe19bcbdc 100644 --- a/llvm/lib/Support/VirtualFileSystem.cpp +++ b/llvm/lib/Support/VirtualFileSystem.cpp @@ -1041,9 +1041,10 @@ static llvm::sys::path::Style getExistingStyle(llvm::StringRef Path) { // Detect the path style in use by checking the first separator. llvm::sys::path::Style style = llvm::sys::path::Style::native; const size_t n = Path.find_first_of("/\\"); + // Can't distinguish between posix and windows_slash here. if (n != static_cast(-1)) style = (Path[n] == '/') ? llvm::sys::path::Style::posix - : llvm::sys::path::Style::windows; + : llvm::sys::path::Style::windows_backslash; return style; } @@ -1117,6 +1118,7 @@ class llvm::vfs::RedirectingFSDirIterImpl } }; +namespace { /// Directory iterator implementation for \c RedirectingFileSystem's /// directory remap entries that maps the paths reported by the external /// file system's directory iterator back to the virtual directory's path. @@ -1155,6 +1157,7 @@ class RedirectingFSDirRemapIterImpl : public llvm::vfs::detail::DirIterImpl { return EC; } }; +} // namespace llvm::ErrorOr RedirectingFileSystem::getCurrentWorkingDirectory() const { @@ -1187,8 +1190,10 @@ std::error_code RedirectingFileSystem::isLocal(const Twine &Path_, } std::error_code RedirectingFileSystem::makeAbsolute(SmallVectorImpl &Path) const { + // is_absolute(..., Style::windows_*) accepts paths with both slash types. if (llvm::sys::path::is_absolute(Path, llvm::sys::path::Style::posix) || - llvm::sys::path::is_absolute(Path, llvm::sys::path::Style::windows)) + llvm::sys::path::is_absolute(Path, + llvm::sys::path::Style::windows_backslash)) return {}; auto WorkingDir = getCurrentWorkingDirectory(); @@ -1199,9 +1204,15 @@ std::error_code RedirectingFileSystem::makeAbsolute(SmallVectorImpl &Path) // is native and there is no way to override that. Since we know WorkingDir // is absolute, we can use it to determine which style we actually have and // append Path ourselves. - sys::path::Style style = sys::path::Style::windows; + sys::path::Style style = sys::path::Style::windows_backslash; if (sys::path::is_absolute(WorkingDir.get(), sys::path::Style::posix)) { style = sys::path::Style::posix; + } else { + // Distinguish between windows_backslash and windows_slash; getExistingStyle + // returns posix for a path with windows_slash. + if (getExistingStyle(WorkingDir.get()) != + sys::path::Style::windows_backslash) + style = sys::path::Style::windows_slash; } std::string Result = WorkingDir.get(); @@ -1619,8 +1630,9 @@ class llvm::vfs::RedirectingFileSystemParser { // which style we have, and use it consistently. if (sys::path::is_absolute(Name, sys::path::Style::posix)) { path_style = sys::path::Style::posix; - } else if (sys::path::is_absolute(Name, sys::path::Style::windows)) { - path_style = sys::path::Style::windows; + } else if (sys::path::is_absolute(Name, + sys::path::Style::windows_backslash)) { + path_style = sys::path::Style::windows_backslash; } else { assert(NameValueNode && "Name presence should be checked earlier"); error(NameValueNode, diff --git a/llvm/lib/Support/Windows/Path.inc b/llvm/lib/Support/Windows/Path.inc index f4c2628b1a55d..b15e71a9ce2a2 100644 --- a/llvm/lib/Support/Windows/Path.inc +++ b/llvm/lib/Support/Windows/Path.inc @@ -74,6 +74,11 @@ std::error_code widenPath(const Twine &Path8, SmallVectorImpl &Path16, SmallString Path8Str; Path8.toVector(Path8Str); + // If the path is a long path, mangled into forward slashes, normalize + // back to backslashes here. + if (Path8Str.startswith("//?/")) + llvm::sys::path::native(Path8Str, path::Style::windows_backslash); + if (std::error_code EC = UTF8ToUTF16(Path8Str, Path16)) return EC; @@ -100,8 +105,10 @@ std::error_code widenPath(const Twine &Path8, SmallVectorImpl &Path16, } // Remove '.' and '..' because long paths treat these as real path components. + // Explicitly use the backslash form here, as we're prepending the \\?\ + // prefix. llvm::sys::path::native(Path8Str, path::Style::windows); - llvm::sys::path::remove_dots(Path8Str, true); + llvm::sys::path::remove_dots(Path8Str, true, path::Style::windows); const StringRef RootName = llvm::sys::path::root_name(Path8Str); assert(!RootName.empty() && @@ -145,6 +152,7 @@ std::string getMainExecutable(const char *argv0, void *MainExecAddr) { if (UTF16ToUTF8(PathName.data(), PathName.size(), PathNameUTF8)) return ""; + llvm::sys::path::make_preferred(PathNameUTF8); return std::string(PathNameUTF8.data()); } @@ -207,7 +215,13 @@ std::error_code current_path(SmallVectorImpl &result) { // On success, GetCurrentDirectoryW returns the number of characters not // including the null-terminator. cur_path.set_size(len); - return UTF16ToUTF8(cur_path.begin(), cur_path.size(), result); + + if (std::error_code EC = + UTF16ToUTF8(cur_path.begin(), cur_path.size(), result)) + return EC; + + llvm::sys::path::make_preferred(result); + return std::error_code(); } std::error_code set_current_path(const Twine &path) { @@ -388,7 +402,11 @@ static std::error_code realPathFromHandle(HANDLE H, } // Convert the result from UTF-16 to UTF-8. - return UTF16ToUTF8(Data, CountChars, RealPath); + if (std::error_code EC = UTF16ToUTF8(Data, CountChars, RealPath)) + return EC; + + llvm::sys::path::make_preferred(RealPath); + return std::error_code(); } std::error_code is_local(int FD, bool &Result) { @@ -1407,6 +1425,8 @@ static bool getKnownFolderPath(KNOWNFOLDERID folderId, bool ok = !UTF16ToUTF8(path, ::wcslen(path), result); ::CoTaskMemFree(path); + if (ok) + llvm::sys::path::make_preferred(result); return ok; } @@ -1467,6 +1487,7 @@ void system_temp_directory(bool ErasedOnReboot, SmallVectorImpl &Result) { // Fall back to a system default. const char *DefaultResult = "C:\\Temp"; Result.append(DefaultResult, DefaultResult + strlen(DefaultResult)); + llvm::sys::path::make_preferred(Result); } } // end namespace path diff --git a/llvm/lib/Support/Windows/Process.inc b/llvm/lib/Support/Windows/Process.inc index a0d94e6e253b4..6732063b562e6 100644 --- a/llvm/lib/Support/Windows/Process.inc +++ b/llvm/lib/Support/Windows/Process.inc @@ -261,6 +261,7 @@ windows::GetCommandLineArguments(SmallVectorImpl &Args, EC = GetExecutableName(Filename); if (EC) return EC; + sys::path::make_preferred(Arg0); sys::path::append(Arg0, Filename); Args[0] = Saver.save(Arg0).data(); return std::error_code(); diff --git a/llvm/lib/Support/Windows/Program.inc b/llvm/lib/Support/Windows/Program.inc index 824834c1cbbe9..a9cf2db7ec72d 100644 --- a/llvm/lib/Support/Windows/Program.inc +++ b/llvm/lib/Support/Windows/Program.inc @@ -103,6 +103,7 @@ ErrorOr sys::findProgramByName(StringRef Name, if (U8Result.empty()) return mapWindowsError(::GetLastError()); + llvm::sys::path::make_preferred(U8Result); return std::string(U8Result.begin(), U8Result.end()); } diff --git a/llvm/lib/TableGen/Main.cpp b/llvm/lib/TableGen/Main.cpp index 0b1024648b66c..762255b43136a 100644 --- a/llvm/lib/TableGen/Main.cpp +++ b/llvm/lib/TableGen/Main.cpp @@ -55,6 +55,10 @@ WriteIfChanged("write-if-changed", cl::desc("Only write output if it changed")); static cl::opt TimePhases("time-phases", cl::desc("Time phases of parser and backend")); +static cl::opt NoWarnOnUnusedTemplateArgs( + "no-warn-on-unused-template-args", + cl::desc("Disable unused template argument warnings.")); + static int reportError(const char *ProgName, Twine Msg) { errs() << ProgName << ": " << Msg; errs().flush(); @@ -107,7 +111,7 @@ int llvm::TableGenMain(const char *argv0, TableGenMainFn *MainFn) { // it later. SrcMgr.setIncludeDirs(IncludeDirs); - TGParser Parser(SrcMgr, MacroNames, Records); + TGParser Parser(SrcMgr, MacroNames, Records, NoWarnOnUnusedTemplateArgs); if (Parser.ParseFile()) return 1; diff --git a/llvm/lib/TableGen/Record.cpp b/llvm/lib/TableGen/Record.cpp index aee8b853a0d90..a81014ef6157a 100644 --- a/llvm/lib/TableGen/Record.cpp +++ b/llvm/lib/TableGen/Record.cpp @@ -2660,6 +2660,16 @@ void Record::checkRecordAssertions() { } } +// Report a warning if the record has unused template arguments. +void Record::checkUnusedTemplateArgs() { + for (const Init *TA : getTemplateArgs()) { + const RecordVal *Arg = getValue(TA); + if (!Arg->isUsed()) + PrintWarning(Arg->getLoc(), + "unused template argument: " + Twine(Arg->getName())); + } +} + #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) LLVM_DUMP_METHOD void RecordKeeper::dump() const { errs() << *this; } #endif diff --git a/llvm/lib/TableGen/TGParser.cpp b/llvm/lib/TableGen/TGParser.cpp index ed7963031b242..6ccca4d69f40a 100644 --- a/llvm/lib/TableGen/TGParser.cpp +++ b/llvm/lib/TableGen/TGParser.cpp @@ -874,8 +874,9 @@ Init *TGParser::ParseIDValue(Record *CurRec, StringInit *Name, SMLoc NameLoc, Record *TemplateRec = CurMultiClass ? &CurMultiClass->Rec : CurRec; if (TemplateRec->isTemplateArg(TemplateArgName)) { - const RecordVal *RV = TemplateRec->getValue(TemplateArgName); + RecordVal *RV = TemplateRec->getValue(TemplateArgName); assert(RV && "Template arg doesn't exist??"); + RV->setUsed(true); return VarInit::get(TemplateArgName, RV->getType()); } else if (Name->getValue() == "NAME") { return VarInit::get(TemplateArgName, StringRecTy::get()); @@ -3346,7 +3347,12 @@ bool TGParser::ParseClass() { if (ParseTemplateArgList(CurRec)) return true; - return ParseObjectBody(CurRec); + if (ParseObjectBody(CurRec)) + return true; + + if (!NoWarnOnUnusedTemplateArgs) + CurRec->checkUnusedTemplateArgs(); + return false; } /// ParseLetList - Parse a non-empty list of assignment expressions into a list @@ -3541,6 +3547,9 @@ bool TGParser::ParseMultiClass() { PopLocalScope(MulticlassScope); } + if (!NoWarnOnUnusedTemplateArgs) + CurMultiClass->Rec.checkUnusedTemplateArgs(); + CurMultiClass = nullptr; return false; } diff --git a/llvm/lib/TableGen/TGParser.h b/llvm/lib/TableGen/TGParser.h index 6e3c5186e4f66..00883c858d581 100644 --- a/llvm/lib/TableGen/TGParser.h +++ b/llvm/lib/TableGen/TGParser.h @@ -160,10 +160,13 @@ class TGParser { // exist. }; + bool NoWarnOnUnusedTemplateArgs = false; + public: - TGParser(SourceMgr &SM, ArrayRef Macros, - RecordKeeper &records) - : Lex(SM, Macros), CurMultiClass(nullptr), Records(records) {} + TGParser(SourceMgr &SM, ArrayRef Macros, RecordKeeper &records, + const bool NoWarnOnUnusedTemplateArgs = false) + : Lex(SM, Macros), CurMultiClass(nullptr), Records(records), + NoWarnOnUnusedTemplateArgs(NoWarnOnUnusedTemplateArgs) {} /// ParseFile - Main entrypoint for parsing a tblgen file. These parser /// routines return true on error, or false on success. diff --git a/llvm/lib/Target/AArch64/AArch64.td b/llvm/lib/Target/AArch64/AArch64.td index 9224f84fa84fa..7cb6cc8c84821 100644 --- a/llvm/lib/Target/AArch64/AArch64.td +++ b/llvm/lib/Target/AArch64/AArch64.td @@ -691,6 +691,12 @@ def TuneX1 : SubtargetFeature<"cortex-x1", "ARMProcFamily", "CortexX1", FeatureFuseAES, FeaturePostRAScheduler]>; +def TuneX2 : SubtargetFeature<"cortex-x2", "ARMProcFamily", "CortexX2", + "Cortex-X2 ARM processors", [ + FeatureFuseAES, + FeaturePostRAScheduler, + FeatureCmpBccFusion]>; + def TuneA64FX : SubtargetFeature<"a64fx", "ARMProcFamily", "A64FX", "Fujitsu A64FX processors", [ FeaturePostRAScheduler, @@ -941,6 +947,10 @@ def ProcessorFeatures { list X1 = [HasV8_2aOps, FeatureCrypto, FeatureFPARMv8, FeatureNEON, FeatureRCPC, FeaturePerfMon, FeatureSPE, FeatureFullFP16, FeatureDotProd]; + list X2 = [HasV9_0aOps, FeatureNEON, FeaturePerfMon, + FeatureMatMulInt8, FeatureBF16, FeatureAM, + FeatureMTE, FeatureETE, FeatureSVE2BitPerm, + FeatureFP16FML]; list A64FX = [HasV8_2aOps, FeatureFPARMv8, FeatureNEON, FeatureSHA2, FeaturePerfMon, FeatureFullFP16, FeatureSVE, FeatureComplxNum]; @@ -1049,6 +1059,8 @@ def : ProcessorModel<"cortex-r82", CortexA55Model, ProcessorFeatures.R82, [TuneR82]>; def : ProcessorModel<"cortex-x1", CortexA57Model, ProcessorFeatures.X1, [TuneX1]>; +def : ProcessorModel<"cortex-x2", CortexA57Model, ProcessorFeatures.X2, + [TuneX2]>; def : ProcessorModel<"neoverse-e1", CortexA53Model, ProcessorFeatures.NeoverseE1, [TuneNeoverseE1]>; def : ProcessorModel<"neoverse-n1", CortexA57Model, diff --git a/llvm/lib/Target/AArch64/AArch64A53Fix835769.cpp b/llvm/lib/Target/AArch64/AArch64A53Fix835769.cpp index e80fe2cada09c..7fd51a98ad947 100644 --- a/llvm/lib/Target/AArch64/AArch64A53Fix835769.cpp +++ b/llvm/lib/Target/AArch64/AArch64A53Fix835769.cpp @@ -159,7 +159,7 @@ static MachineInstr *getLastNonPseudo(MachineBasicBlock &MBB, // If there is no non-pseudo in the current block, loop back around and try // the previous block (if there is one). while ((FMBB = getBBFallenThrough(FMBB, TII))) { - for (MachineInstr &I : make_range(FMBB->rbegin(), FMBB->rend())) + for (MachineInstr &I : llvm::reverse(*FMBB)) if (!I.isPseudo()) return &I; } diff --git a/llvm/lib/Target/AArch64/AArch64AdvSIMDScalarPass.cpp b/llvm/lib/Target/AArch64/AArch64AdvSIMDScalarPass.cpp index c996d2df8c381..cd67e058a9c15 100644 --- a/llvm/lib/Target/AArch64/AArch64AdvSIMDScalarPass.cpp +++ b/llvm/lib/Target/AArch64/AArch64AdvSIMDScalarPass.cpp @@ -377,8 +377,7 @@ void AArch64AdvSIMDScalar::transformInstruction(MachineInstr &MI) { // processMachineBasicBlock - Main optimzation loop. bool AArch64AdvSIMDScalar::processMachineBasicBlock(MachineBasicBlock *MBB) { bool Changed = false; - for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E;) { - MachineInstr &MI = *I++; + for (MachineInstr &MI : llvm::make_early_inc_range(*MBB)) { if (isProfitableToTransform(MI)) { transformInstruction(MI); Changed = true; diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp index 16aed245b80b4..1dd4f19a9fc6c 100644 --- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp @@ -2536,9 +2536,7 @@ bool AArch64FrameLowering::spillCalleeSavedRegisters( } return true; } - for (auto RPII = RegPairs.rbegin(), RPIE = RegPairs.rend(); RPII != RPIE; - ++RPII) { - RegPairInfo RPI = *RPII; + for (const RegPairInfo &RPI : llvm::reverse(RegPairs)) { unsigned Reg1 = RPI.Reg1; unsigned Reg2 = RPI.Reg2; unsigned StrOpc; diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.h b/llvm/lib/Target/AArch64/AArch64FrameLowering.h index f8adaf36db845..e6d997f91b47b 100644 --- a/llvm/lib/Target/AArch64/AArch64FrameLowering.h +++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.h @@ -67,8 +67,6 @@ class AArch64FrameLowering : public TargetFrameLowering { bool hasFP(const MachineFunction &MF) const override; bool hasReservedCallFrame(const MachineFunction &MF) const override; - bool hasSwiftExtendedFrame(const MachineFunction &MF) const; - bool assignCalleeSavedSpillSlots(MachineFunction &MF, const TargetRegisterInfo *TRI, std::vector &CSI, diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index c3624058b42c3..07011ab02070b 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -10912,7 +10912,7 @@ SDValue AArch64TargetLowering::LowerINSERT_SUBVECTOR(SDValue Op, SDLoc DL(Op); EVT VT = Op.getValueType(); - if (!isTypeLegal(VT) || !VT.isInteger()) + if (!isTypeLegal(VT)) return SDValue(); SDValue Vec0 = Op.getOperand(0); @@ -10922,9 +10922,19 @@ SDValue AArch64TargetLowering::LowerINSERT_SUBVECTOR(SDValue Op, if (VT.getVectorElementCount() != (InVT.getVectorElementCount() * 2)) return SDValue(); - // Extend elements of smaller vector... - EVT WideVT = InVT.widenIntegerVectorElementType(*(DAG.getContext())); - SDValue ExtVec = DAG.getNode(ISD::ANY_EXTEND, DL, WideVT, Vec1); + EVT WideVT; + SDValue ExtVec; + + if (VT.isFloatingPoint()) { + // The InVT type should be legal. We can safely cast the unpacked + // subvector from InVT -> VT. + WideVT = VT; + ExtVec = getSVESafeBitCast(VT, Vec1, DAG); + } else { + // Extend elements of smaller vector... + WideVT = InVT.widenIntegerVectorElementType(*(DAG.getContext())); + ExtVec = DAG.getNode(ISD::ANY_EXTEND, DL, WideVT, Vec1); + } if (Idx == 0) { SDValue HiVec0 = DAG.getNode(AArch64ISD::UUNPKHI, DL, WideVT, Vec0); @@ -12056,6 +12066,18 @@ bool AArch64TargetLowering::isLegalInterleavedAccessType( } static ScalableVectorType *getSVEContainerIRType(FixedVectorType *VTy) { + if (VTy->getElementType() == Type::getDoubleTy(VTy->getContext())) + return ScalableVectorType::get(VTy->getElementType(), 2); + + if (VTy->getElementType() == Type::getFloatTy(VTy->getContext())) + return ScalableVectorType::get(VTy->getElementType(), 4); + + if (VTy->getElementType() == Type::getBFloatTy(VTy->getContext())) + return ScalableVectorType::get(VTy->getElementType(), 8); + + if (VTy->getElementType() == Type::getHalfTy(VTy->getContext())) + return ScalableVectorType::get(VTy->getElementType(), 8); + if (VTy->getElementType() == Type::getInt64Ty(VTy->getContext())) return ScalableVectorType::get(VTy->getElementType(), 2); @@ -12684,7 +12706,8 @@ bool AArch64TargetLowering::isFMAFasterThanFMulAndFAdd(const Function &F, bool AArch64TargetLowering::generateFMAsInMachineCombiner( EVT VT, CodeGenOpt::Level OptLevel) const { - return (OptLevel >= CodeGenOpt::Aggressive) && !VT.isScalableVector(); + return (OptLevel >= CodeGenOpt::Aggressive) && !VT.isScalableVector() && + !useSVEForFixedLengthVectorVT(VT); } const MCPhysReg * @@ -13089,7 +13112,7 @@ static SDValue performCommonVectorExtendCombine(SDValue VectorShuffle, DAG.getAnyExtOrTrunc(Extend.getOperand(0), DL, PreExtendType), DAG.getConstant(0, DL, MVT::i64)); - std::vector ShuffleMask(TargetType.getVectorElementCount().getValue()); + std::vector ShuffleMask(TargetType.getVectorNumElements()); SDValue VectorShuffleNode = DAG.getVectorShuffle(PreExtendVT, DL, InsertVectorNode, @@ -13131,12 +13154,44 @@ static SDValue performMulCombine(SDNode *N, SelectionDAG &DAG, if (DCI.isBeforeLegalizeOps()) return SDValue(); + // Canonicalize X*(Y+1) -> X*Y+X and (X+1)*Y -> X*Y+Y, + // and in MachineCombiner pass, add+mul will be combined into madd. + // Similarly, X*(1-Y) -> X - X*Y and (1-Y)*X -> X - Y*X. + SDLoc DL(N); + EVT VT = N->getValueType(0); + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + SDValue MulOper; + unsigned AddSubOpc; + + auto IsAddSubWith1 = [&](SDValue V) -> bool { + AddSubOpc = V->getOpcode(); + if ((AddSubOpc == ISD::ADD || AddSubOpc == ISD::SUB) && V->hasOneUse()) { + SDValue Opnd = V->getOperand(1); + MulOper = V->getOperand(0); + if (AddSubOpc == ISD::SUB) + std::swap(Opnd, MulOper); + if (auto C = dyn_cast(Opnd)) + return C->isOne(); + } + return false; + }; + + if (IsAddSubWith1(N0)) { + SDValue MulVal = DAG.getNode(ISD::MUL, DL, VT, N1, MulOper); + return DAG.getNode(AddSubOpc, DL, VT, N1, MulVal); + } + + if (IsAddSubWith1(N1)) { + SDValue MulVal = DAG.getNode(ISD::MUL, DL, VT, N0, MulOper); + return DAG.getNode(AddSubOpc, DL, VT, N0, MulVal); + } + // The below optimizations require a constant RHS. - if (!isa(N->getOperand(1))) + if (!isa(N1)) return SDValue(); - SDValue N0 = N->getOperand(0); - ConstantSDNode *C = cast(N->getOperand(1)); + ConstantSDNode *C = cast(N1); const APInt &ConstValue = C->getAPIntValue(); // Allow the scaling to be folded into the `cnt` instruction by preventing @@ -13177,7 +13232,7 @@ static SDValue performMulCombine(SDNode *N, SelectionDAG &DAG, // and shift+add+shift. APInt ShiftedConstValue = ConstValue.ashr(TrailingZeroes); - unsigned ShiftAmt, AddSubOpc; + unsigned ShiftAmt; // Is the shifted value the LHS operand of the add/sub? bool ShiftValUseIsN0 = true; // Do we need to negate the result? @@ -13214,8 +13269,6 @@ static SDValue performMulCombine(SDNode *N, SelectionDAG &DAG, return SDValue(); } - SDLoc DL(N); - EVT VT = N->getValueType(0); SDValue ShiftedVal = DAG.getNode(ISD::SHL, DL, VT, N0, DAG.getConstant(ShiftAmt, DL, MVT::i64)); @@ -15560,6 +15613,18 @@ static SDValue performSpliceCombine(SDNode *N, SelectionDAG &DAG) { return SDValue(); } +static SDValue performUnpackCombine(SDNode *N, SelectionDAG &DAG) { + assert((N->getOpcode() == AArch64ISD::UUNPKHI || + N->getOpcode() == AArch64ISD::UUNPKLO) && + "Unexpected Opcode!"); + + // uunpklo/hi undef -> undef + if (N->getOperand(0).isUndef()) + return DAG.getUNDEF(N->getValueType(0)); + + return SDValue(); +} + static SDValue performUzpCombine(SDNode *N, SelectionDAG &DAG) { SDLoc DL(N); SDValue Op0 = N->getOperand(0); @@ -17226,6 +17291,9 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N, return performNVCASTCombine(N); case AArch64ISD::SPLICE: return performSpliceCombine(N, DAG); + case AArch64ISD::UUNPKLO: + case AArch64ISD::UUNPKHI: + return performUnpackCombine(N, DAG); case AArch64ISD::UZP1: return performUzpCombine(N, DAG); case AArch64ISD::SETCC_MERGE_ZERO: diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td index 6cfd2a51743a9..3d8826f6b2278 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -3754,35 +3754,56 @@ defm FCVTZU : FPToIntegerScaled<0b11, 0b001, "fcvtzu", any_fp_to_uint>; // AArch64's FCVT instructions saturate when out of range. multiclass FPToIntegerSatPats { + let Predicates = [HasFullFP16] in { def : Pat<(i32 (to_int_sat f16:$Rn, i32)), (!cast(INST # UWHr) f16:$Rn)>; - def : Pat<(i32 (to_int_sat f32:$Rn, i32)), - (!cast(INST # UWSr) f32:$Rn)>; - def : Pat<(i32 (to_int_sat f64:$Rn, i32)), - (!cast(INST # UWDr) f64:$Rn)>; def : Pat<(i64 (to_int_sat f16:$Rn, i64)), (!cast(INST # UXHr) f16:$Rn)>; + } + def : Pat<(i32 (to_int_sat f32:$Rn, i32)), + (!cast(INST # UWSr) f32:$Rn)>; def : Pat<(i64 (to_int_sat f32:$Rn, i64)), (!cast(INST # UXSr) f32:$Rn)>; + def : Pat<(i32 (to_int_sat f64:$Rn, i32)), + (!cast(INST # UWDr) f64:$Rn)>; def : Pat<(i64 (to_int_sat f64:$Rn, i64)), (!cast(INST # UXDr) f64:$Rn)>; + + let Predicates = [HasFullFP16] in { + def : Pat<(i32 (to_int_sat (fmul f16:$Rn, fixedpoint_f16_i32:$scale), i32)), + (!cast(INST # SWHri) $Rn, $scale)>; + def : Pat<(i64 (to_int_sat (fmul f16:$Rn, fixedpoint_f16_i64:$scale), i64)), + (!cast(INST # SXHri) $Rn, $scale)>; + } + def : Pat<(i32 (to_int_sat (fmul f32:$Rn, fixedpoint_f32_i32:$scale), i32)), + (!cast(INST # SWSri) $Rn, $scale)>; + def : Pat<(i64 (to_int_sat (fmul f32:$Rn, fixedpoint_f32_i64:$scale), i64)), + (!cast(INST # SXSri) $Rn, $scale)>; + def : Pat<(i32 (to_int_sat (fmul f64:$Rn, fixedpoint_f64_i32:$scale), i32)), + (!cast(INST # SWDri) $Rn, $scale)>; + def : Pat<(i64 (to_int_sat (fmul f64:$Rn, fixedpoint_f64_i64:$scale), i64)), + (!cast(INST # SXDri) $Rn, $scale)>; } defm : FPToIntegerSatPats; defm : FPToIntegerSatPats; multiclass FPToIntegerIntPats { + let Predicates = [HasFullFP16] in { def : Pat<(i32 (round f16:$Rn)), (!cast(INST # UWHr) $Rn)>; def : Pat<(i64 (round f16:$Rn)), (!cast(INST # UXHr) $Rn)>; + } def : Pat<(i32 (round f32:$Rn)), (!cast(INST # UWSr) $Rn)>; def : Pat<(i64 (round f32:$Rn)), (!cast(INST # UXSr) $Rn)>; def : Pat<(i32 (round f64:$Rn)), (!cast(INST # UWDr) $Rn)>; def : Pat<(i64 (round f64:$Rn)), (!cast(INST # UXDr) $Rn)>; + let Predicates = [HasFullFP16] in { def : Pat<(i32 (round (fmul f16:$Rn, fixedpoint_f16_i32:$scale))), (!cast(INST # SWHri) $Rn, $scale)>; def : Pat<(i64 (round (fmul f16:$Rn, fixedpoint_f16_i64:$scale))), (!cast(INST # SXHri) $Rn, $scale)>; + } def : Pat<(i32 (round (fmul f32:$Rn, fixedpoint_f32_i32:$scale))), (!cast(INST # SWSri) $Rn, $scale)>; def : Pat<(i64 (round (fmul f32:$Rn, fixedpoint_f32_i64:$scale))), @@ -3807,10 +3828,12 @@ multiclass FPToIntegerPats(INST # UXDr) f64:$Rn)>; // These instructions saturate like fp_to_[su]int_sat. + let Predicates = [HasFullFP16] in { def : Pat<(i32 (to_int_sat (round f16:$Rn), i32)), (!cast(INST # UWHr) f16:$Rn)>; def : Pat<(i64 (to_int_sat (round f16:$Rn), i64)), (!cast(INST # UXHr) f16:$Rn)>; + } def : Pat<(i32 (to_int_sat (round f32:$Rn), i32)), (!cast(INST # UWSr) f32:$Rn)>; def : Pat<(i64 (to_int_sat (round f32:$Rn), i64)), @@ -5753,7 +5776,7 @@ def : Pat<(i32 (vector_extract (v8i16 (insert_subvector undef, (v4i16 (AArch64uaddv (v4i16 (AArch64uaddlp (v8i8 V64:$op))))), (i64 0))), (i64 0))), (EXTRACT_SUBREG (INSERT_SUBREG (v4i16 (IMPLICIT_DEF)), - (UADDLVv4i16v V64:$op), ssub), ssub)>; + (UADDLVv8i8v V64:$op), hsub), ssub)>; def : Pat<(i32 (vector_extract (v8i16 (AArch64uaddv (v8i16 (AArch64uaddlp (v16i8 V128:$op))))), (i64 0))), (EXTRACT_SUBREG (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), diff --git a/llvm/lib/Target/AArch64/AArch64StackTaggingPreRA.cpp b/llvm/lib/Target/AArch64/AArch64StackTaggingPreRA.cpp index 5d8af760f5acc..d2488f61eb4be 100644 --- a/llvm/lib/Target/AArch64/AArch64StackTaggingPreRA.cpp +++ b/llvm/lib/Target/AArch64/AArch64StackTaggingPreRA.cpp @@ -176,20 +176,19 @@ bool AArch64StackTaggingPreRA::mayUseUncheckedLoadStore() { } void AArch64StackTaggingPreRA::uncheckUsesOf(unsigned TaggedReg, int FI) { - for (auto UI = MRI->use_instr_begin(TaggedReg), E = MRI->use_instr_end(); - UI != E;) { - MachineInstr *UseI = &*(UI++); - if (isUncheckedLoadOrStoreOpcode(UseI->getOpcode())) { + for (MachineInstr &UseI : + llvm::make_early_inc_range(MRI->use_instructions(TaggedReg))) { + if (isUncheckedLoadOrStoreOpcode(UseI.getOpcode())) { // FI operand is always the one before the immediate offset. - unsigned OpIdx = TII->getLoadStoreImmIdx(UseI->getOpcode()) - 1; - if (UseI->getOperand(OpIdx).isReg() && - UseI->getOperand(OpIdx).getReg() == TaggedReg) { - UseI->getOperand(OpIdx).ChangeToFrameIndex(FI); - UseI->getOperand(OpIdx).setTargetFlags(AArch64II::MO_TAGGED); + unsigned OpIdx = TII->getLoadStoreImmIdx(UseI.getOpcode()) - 1; + if (UseI.getOperand(OpIdx).isReg() && + UseI.getOperand(OpIdx).getReg() == TaggedReg) { + UseI.getOperand(OpIdx).ChangeToFrameIndex(FI); + UseI.getOperand(OpIdx).setTargetFlags(AArch64II::MO_TAGGED); } - } else if (UseI->isCopy() && - Register::isVirtualRegister(UseI->getOperand(0).getReg())) { - uncheckUsesOf(UseI->getOperand(0).getReg(), FI); + } else if (UseI.isCopy() && + Register::isVirtualRegister(UseI.getOperand(0).getReg())) { + uncheckUsesOf(UseI.getOperand(0).getReg(), FI); } } } diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp index 3072b5a412192..27001d85158e7 100644 --- a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp +++ b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp @@ -104,6 +104,10 @@ void AArch64Subtarget::initializeProperties() { case CortexX1: PrefFunctionLogAlignment = 4; break; + case CortexX2: + PrefFunctionLogAlignment = 4; + VScaleForTuning = 1; + break; case A64FX: CacheLineSize = 256; PrefFunctionLogAlignment = 3; diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.h b/llvm/lib/Target/AArch64/AArch64Subtarget.h index a34e14896192a..42ea63d8f5243 100644 --- a/llvm/lib/Target/AArch64/AArch64Subtarget.h +++ b/llvm/lib/Target/AArch64/AArch64Subtarget.h @@ -62,6 +62,7 @@ class AArch64Subtarget final : public AArch64GenSubtargetInfo { CortexA78C, CortexR82, CortexX1, + CortexX2, ExynosM3, Falkor, Kryo, diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp index e5bfe3dc76a0e..5182f4d204089 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -698,38 +698,29 @@ static Optional instCombineSVEPTest(InstCombiner &IC, static Optional instCombineSVEVectorFMLA(InstCombiner &IC, IntrinsicInst &II) { // fold (fadd p a (fmul p b c)) -> (fma p a b c) - Value *p, *FMul, *a, *b, *c; - auto m_SVEFAdd = [](auto p, auto w, auto x) { - return m_CombineOr(m_Intrinsic(p, w, x), - m_Intrinsic(p, x, w)); - }; - auto m_SVEFMul = [](auto p, auto y, auto z) { - return m_Intrinsic(p, y, z); - }; - if (!match(&II, m_SVEFAdd(m_Value(p), m_Value(a), - m_CombineAnd(m_Value(FMul), - m_SVEFMul(m_Deferred(p), m_Value(b), - m_Value(c)))))) + Value *P = II.getOperand(0); + Value *A = II.getOperand(1); + auto FMul = II.getOperand(2); + Value *B, *C; + if (!match(FMul, m_Intrinsic( + m_Specific(P), m_Value(B), m_Value(C)))) return None; if (!FMul->hasOneUse()) return None; llvm::FastMathFlags FAddFlags = II.getFastMathFlags(); - llvm::FastMathFlags FMulFlags = cast(FMul)->getFastMathFlags(); - // Don't combine when FMul & Fadd flags differ to prevent the loss of any - // additional important flags - if (FAddFlags != FMulFlags) + // Stop the combine when the flags on the inputs differ in case dropping flags + // would lead to us missing out on more beneficial optimizations. + if (FAddFlags != cast(FMul)->getFastMathFlags()) return None; - bool AllowReassoc = FAddFlags.allowReassoc() && FMulFlags.allowReassoc(); - bool AllowContract = FAddFlags.allowContract() && FMulFlags.allowContract(); - if (!AllowReassoc || !AllowContract) + if (!FAddFlags.allowContract()) return None; IRBuilder<> Builder(II.getContext()); Builder.SetInsertPoint(&II); auto FMLA = Builder.CreateIntrinsic(Intrinsic::aarch64_sve_fmla, - {II.getType()}, {p, a, b, c}, &II); + {II.getType()}, {P, A, B, C}, &II); FMLA->setFastMathFlags(FAddFlags); return IC.replaceInstUsesWith(II, FMLA); } @@ -765,8 +756,7 @@ static Optional instCombineSVEVectorBinOp(InstCombiner &IC, static Optional instCombineSVEVectorFAdd(InstCombiner &IC, IntrinsicInst &II) { - auto FMLA = instCombineSVEVectorFMLA(IC, II); - if (FMLA) + if (auto FMLA = instCombineSVEVectorFMLA(IC, II)) return FMLA; return instCombineSVEVectorBinOp(IC, II); } @@ -911,6 +901,74 @@ static Optional instCombineSVEZip(InstCombiner &IC, return None; } +static Optional instCombineLD1GatherIndex(InstCombiner &IC, + IntrinsicInst &II) { + Value *Mask = II.getOperand(0); + Value *BasePtr = II.getOperand(1); + Value *Index = II.getOperand(2); + Type *Ty = II.getType(); + Type *BasePtrTy = BasePtr->getType(); + Value *PassThru = ConstantAggregateZero::get(Ty); + + // Contiguous gather => masked load. + // (sve.ld1.gather.index Mask BasePtr (sve.index IndexBase 1)) + // => (masked.load (gep BasePtr IndexBase) Align Mask zeroinitializer) + Value *IndexBase; + if (match(Index, m_Intrinsic( + m_Value(IndexBase), m_SpecificInt(1)))) { + IRBuilder<> Builder(II.getContext()); + Builder.SetInsertPoint(&II); + + Align Alignment = + BasePtr->getPointerAlignment(II.getModule()->getDataLayout()); + + Type *VecPtrTy = PointerType::getUnqual(Ty); + Value *Ptr = Builder.CreateGEP(BasePtrTy->getPointerElementType(), BasePtr, + IndexBase); + Ptr = Builder.CreateBitCast(Ptr, VecPtrTy); + CallInst *MaskedLoad = + Builder.CreateMaskedLoad(Ty, Ptr, Alignment, Mask, PassThru); + MaskedLoad->takeName(&II); + return IC.replaceInstUsesWith(II, MaskedLoad); + } + + return None; +} + +static Optional instCombineST1ScatterIndex(InstCombiner &IC, + IntrinsicInst &II) { + Value *Val = II.getOperand(0); + Value *Mask = II.getOperand(1); + Value *BasePtr = II.getOperand(2); + Value *Index = II.getOperand(3); + Type *Ty = Val->getType(); + Type *BasePtrTy = BasePtr->getType(); + + // Contiguous scatter => masked store. + // (sve.ld1.scatter.index Value Mask BasePtr (sve.index IndexBase 1)) + // => (masked.store Value (gep BasePtr IndexBase) Align Mask) + Value *IndexBase; + if (match(Index, m_Intrinsic( + m_Value(IndexBase), m_SpecificInt(1)))) { + IRBuilder<> Builder(II.getContext()); + Builder.SetInsertPoint(&II); + + Align Alignment = + BasePtr->getPointerAlignment(II.getModule()->getDataLayout()); + + Value *Ptr = Builder.CreateGEP(BasePtrTy->getPointerElementType(), BasePtr, + IndexBase); + Type *VecPtrTy = PointerType::getUnqual(Ty); + Ptr = Builder.CreateBitCast(Ptr, VecPtrTy); + + (void)Builder.CreateMaskedStore(Val, Ptr, Alignment, Mask); + + return IC.eraseInstFromFunction(II); + } + + return None; +} + Optional AArch64TTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const { @@ -963,6 +1021,10 @@ AArch64TTIImpl::instCombineIntrinsic(InstCombiner &IC, case Intrinsic::aarch64_sve_zip1: case Intrinsic::aarch64_sve_zip2: return instCombineSVEZip(IC, II); + case Intrinsic::aarch64_sve_ld1_gather_index: + return instCombineLD1GatherIndex(IC, II); + case Intrinsic::aarch64_sve_st1_scatter_index: + return instCombineST1ScatterIndex(IC, II); } return None; diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h index f7de7c0ee176d..c4e20bb12f8cf 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h @@ -125,6 +125,9 @@ class AArch64TTIImpl : public BasicTTIImplBase { return ST->getMinVectorRegisterBitWidth(); } + Optional getVScaleForTuning() const { + return ST->getVScaleForTuning(); + } /// Try to return an estimate cost factor that can be used as a multiplier /// when scalarizing an operation for a vector with ElementCount \p VF. diff --git a/llvm/lib/Target/AArch64/GISel/AArch64PreLegalizerCombiner.cpp b/llvm/lib/Target/AArch64/GISel/AArch64PreLegalizerCombiner.cpp index a0b4f4644426d..d3f4130d2ba14 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64PreLegalizerCombiner.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64PreLegalizerCombiner.cpp @@ -220,6 +220,121 @@ static bool applyFoldGlobalOffset(MachineInstr &MI, MachineRegisterInfo &MRI, return true; } +static bool tryToSimplifyUADDO(MachineInstr &MI, MachineIRBuilder &B, + CombinerHelper &Helper, + GISelChangeObserver &Observer) { + // Try simplify G_UADDO with 8 or 16 bit operands to wide G_ADD and TBNZ if + // result is only used in the no-overflow case. It is restricted to cases + // where we know that the high-bits of the operands are 0. If there's an + // overflow, then the the 9th or 17th bit must be set, which can be checked + // using TBNZ. + // + // Change (for UADDOs on 8 and 16 bits): + // + // %z0 = G_ASSERT_ZEXT _ + // %op0 = G_TRUNC %z0 + // %z1 = G_ASSERT_ZEXT _ + // %op1 = G_TRUNC %z1 + // %val, %cond = G_UADDO %op0, %op1 + // G_BRCOND %cond, %error.bb + // + // error.bb: + // (no successors and no uses of %val) + // + // To: + // + // %z0 = G_ASSERT_ZEXT _ + // %z1 = G_ASSERT_ZEXT _ + // %add = G_ADD %z0, %z1 + // %val = G_TRUNC %add + // %bit = G_AND %add, 1 << scalar-size-in-bits(%op1) + // %cond = G_ICMP NE, %bit, 0 + // G_BRCOND %cond, %error.bb + + auto &MRI = *B.getMRI(); + + MachineOperand *DefOp0 = MRI.getOneDef(MI.getOperand(2).getReg()); + MachineOperand *DefOp1 = MRI.getOneDef(MI.getOperand(3).getReg()); + Register Op0Wide; + Register Op1Wide; + if (!mi_match(DefOp0->getParent(), MRI, m_GTrunc(m_Reg(Op0Wide))) || + !mi_match(DefOp1->getParent(), MRI, m_GTrunc(m_Reg(Op1Wide)))) + return false; + LLT WideTy0 = MRI.getType(Op0Wide); + LLT WideTy1 = MRI.getType(Op1Wide); + Register ResVal = MI.getOperand(0).getReg(); + LLT OpTy = MRI.getType(ResVal); + MachineInstr *Op0WideDef = MRI.getVRegDef(Op0Wide); + MachineInstr *Op1WideDef = MRI.getVRegDef(Op1Wide); + + unsigned OpTySize = OpTy.getScalarSizeInBits(); + // First check that the G_TRUNC feeding the G_UADDO are no-ops, because the + // inputs have been zero-extended. + if (Op0WideDef->getOpcode() != TargetOpcode::G_ASSERT_ZEXT || + Op1WideDef->getOpcode() != TargetOpcode::G_ASSERT_ZEXT || + OpTySize != Op0WideDef->getOperand(2).getImm() || + OpTySize != Op1WideDef->getOperand(2).getImm()) + return false; + + // Only scalar UADDO with either 8 or 16 bit operands are handled. + if (!WideTy0.isScalar() || !WideTy1.isScalar() || WideTy0 != WideTy1 || + OpTySize >= WideTy0.getScalarSizeInBits() || + (OpTySize != 8 && OpTySize != 16)) + return false; + + // The overflow-status result must be used by a branch only. + Register ResStatus = MI.getOperand(1).getReg(); + if (!MRI.hasOneNonDBGUse(ResStatus)) + return false; + MachineInstr *CondUser = &*MRI.use_instr_nodbg_begin(ResStatus); + if (CondUser->getOpcode() != TargetOpcode::G_BRCOND) + return false; + + // Make sure the computed result is only used in the no-overflow blocks. + MachineBasicBlock *CurrentMBB = MI.getParent(); + MachineBasicBlock *FailMBB = CondUser->getOperand(1).getMBB(); + if (!FailMBB->succ_empty() || CondUser->getParent() != CurrentMBB) + return false; + if (any_of(MRI.use_nodbg_instructions(ResVal), + [&MI, FailMBB, CurrentMBB](MachineInstr &I) { + return &MI != &I && + (I.getParent() == FailMBB || I.getParent() == CurrentMBB); + })) + return false; + + // Remove G_ADDO. + B.setInstrAndDebugLoc(*MI.getNextNode()); + MI.eraseFromParent(); + + // Emit wide add. + Register AddDst = MRI.cloneVirtualRegister(Op0Wide); + B.buildInstr(TargetOpcode::G_ADD, {AddDst}, {Op0Wide, Op1Wide}); + + // Emit check of the 9th or 17th bit and update users (the branch). This will + // later be folded to TBNZ. + Register CondBit = MRI.cloneVirtualRegister(Op0Wide); + B.buildAnd( + CondBit, AddDst, + B.buildConstant(LLT::scalar(32), OpTySize == 8 ? 1 << 8 : 1 << 16)); + B.buildICmp(CmpInst::ICMP_NE, ResStatus, CondBit, + B.buildConstant(LLT::scalar(32), 0)); + + // Update ZEXts users of the result value. Because all uses are in the + // no-overflow case, we know that the top bits are 0 and we can ignore ZExts. + B.buildZExtOrTrunc(ResVal, AddDst); + for (MachineOperand &U : make_early_inc_range(MRI.use_operands(ResVal))) { + Register WideReg; + if (mi_match(U.getParent(), MRI, m_GZExt(m_Reg(WideReg)))) { + auto OldR = U.getParent()->getOperand(0).getReg(); + Observer.erasingInstr(*U.getParent()); + U.getParent()->eraseFromParent(); + Helper.replaceRegWith(MRI, OldR, AddDst); + } + } + + return true; +} + class AArch64PreLegalizerCombinerHelperState { protected: CombinerHelper &Helper; @@ -272,6 +387,8 @@ bool AArch64PreLegalizerCombinerInfo::combine(GISelChangeObserver &Observer, return Helper.tryCombineConcatVectors(MI); case TargetOpcode::G_SHUFFLE_VECTOR: return Helper.tryCombineShuffleVector(MI); + case TargetOpcode::G_UADDO: + return tryToSimplifyUADDO(MI, B, Helper, Observer); case TargetOpcode::G_MEMCPY_INLINE: return Helper.tryEmitMemcpyInline(MI); case TargetOpcode::G_MEMCPY: diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp index 70a0e9da6af3d..dbb8e85713cba 100644 --- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp +++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp @@ -160,8 +160,11 @@ static uint64_t adjustFixupValue(const MCFixup &Fixup, const MCValue &Target, return AdrImmBits(Value & 0x1fffffULL); case AArch64::fixup_aarch64_pcrel_adrp_imm21: assert(!IsResolved); - if (TheTriple.isOSBinFormatCOFF()) + if (TheTriple.isOSBinFormatCOFF()) { + if (!isInt<21>(SignedValue)) + Ctx.reportError(Fixup.getLoc(), "fixup value out of range"); return AdrImmBits(Value & 0x1fffffULL); + } return AdrImmBits((Value & 0x1fffff000ULL) >> 12); case AArch64::fixup_aarch64_ldr_pcrel_imm19: case AArch64::fixup_aarch64_pcrel_branch19: diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td index c962c2150b5f4..3242fea4edda1 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.td +++ b/llvm/lib/Target/AMDGPU/AMDGPU.td @@ -18,7 +18,6 @@ def p4 : PtrValueType; def p5 : PtrValueType; def p6 : PtrValueType; - class BoolToList { list ret = !if(Value, [1], []); } diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp index c3b2f9462ce78..f0aadab3302ff 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp @@ -112,6 +112,7 @@ static bool isDSAddress(const Constant *C) { return AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::REGION_ADDRESS; } +namespace { class AMDGPUInformationCache : public InformationCache { public: AMDGPUInformationCache(const Module &M, AnalysisGetter &AG, @@ -643,6 +644,7 @@ class AMDGPUAttributor : public ModulePass { TargetMachine *TM; static char ID; }; +} // namespace char AMDGPUAttributor::ID = 0; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp index 118a32580f669..43928d7c2a096 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp @@ -355,14 +355,23 @@ bool AMDGPUCallLowering::lowerReturn(MachineIRBuilder &B, const Value *Val, auto const &ST = MF.getSubtarget(); - unsigned ReturnOpc = - IsShader ? AMDGPU::SI_RETURN_TO_EPILOG : AMDGPU::S_SETPC_B64_return; + unsigned ReturnOpc = 0; + if (IsShader) + ReturnOpc = AMDGPU::SI_RETURN_TO_EPILOG; + else if (CC == CallingConv::AMDGPU_Gfx) + ReturnOpc = AMDGPU::S_SETPC_B64_return_gfx; + else + ReturnOpc = AMDGPU::S_SETPC_B64_return; auto Ret = B.buildInstrNoInsert(ReturnOpc); Register ReturnAddrVReg; if (ReturnOpc == AMDGPU::S_SETPC_B64_return) { ReturnAddrVReg = MRI.createVirtualRegister(&AMDGPU::CCR_SGPR_64RegClass); Ret.addUse(ReturnAddrVReg); + } else if (ReturnOpc == AMDGPU::S_SETPC_B64_return_gfx) { + ReturnAddrVReg = + MRI.createVirtualRegister(&AMDGPU::Gfx_CCR_SGPR_64RegClass); + Ret.addUse(ReturnAddrVReg); } if (!FLI.CanLowerReturn) @@ -370,7 +379,8 @@ bool AMDGPUCallLowering::lowerReturn(MachineIRBuilder &B, const Value *Val, else if (!lowerReturnVal(B, Val, VRegs, Ret)) return false; - if (ReturnOpc == AMDGPU::S_SETPC_B64_return) { + if (ReturnOpc == AMDGPU::S_SETPC_B64_return || + ReturnOpc == AMDGPU::S_SETPC_B64_return_gfx) { const SIRegisterInfo *TRI = ST.getRegisterInfo(); Register LiveInReturn = MF.addLiveIn(TRI->getReturnAddressReg(MF), &AMDGPU::SGPR_64RegClass); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td b/llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td index 34fb7b840d316..1682d43ae671a 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td +++ b/llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td @@ -20,11 +20,13 @@ def CC_SI_Gfx : CallingConv<[ // 0-3 are reserved for the stack buffer descriptor // 30-31 are reserved for the return address // 32 is reserved for the stack pointer + // 33 is reserved for the frame pointer + // 34 is reserved for the base pointer CCIfInReg>>, CCIfNotInReg>, CCIfType<[i1, i16], CCIfExtend>>, - // 0-3 are reserved for the stack buffer descriptor - // 32 is reserved for the stack pointer - CCIfInReg>>, - CCIfNotInReg; +def CSR_AMDGPU_SI_Gfx_SGPRs_4_29 : CalleeSavedRegs< + (sequence "SGPR%u", 4, 29) +>; + +def CSR_AMDGPU_SI_Gfx_SGPRs_64_105 : CalleeSavedRegs< + (sequence "SGPR%u", 64, 105) +>; + // Just to get the regmask, not for calling convention purposes. def CSR_AMDGPU_AllVGPRs : CalleeSavedRegs< (sequence "VGPR%u", 0, 255) @@ -190,6 +189,14 @@ def CSR_AMDGPU_HighRegs_With_AGPRs : CalleeSavedRegs< (add CSR_AMDGPU_HighRegs, CSR_AMDGPU_AGPRs_32_255) >; +def CSR_AMDGPU_SI_Gfx : CalleeSavedRegs< + (add CSR_AMDGPU_VGPRs, CSR_AMDGPU_SI_Gfx_SGPRs_4_29, CSR_AMDGPU_SI_Gfx_SGPRs_64_105) +>; + +def CSR_AMDGPU_SI_Gfx_With_AGPRs : CalleeSavedRegs< + (add CSR_AMDGPU_SI_Gfx, CSR_AMDGPU_AGPRs_32_255) +>; + def CSR_AMDGPU_NoRegs : CalleeSavedRegs<(add)>; // Calling convention for leaf functions diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp index 20defbc883c18..cee56ee972945 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp @@ -641,8 +641,8 @@ void AMDGPUDAGToDAGISel::Select(SDNode *N) { uint32_t OffsetVal = Offset->getZExtValue(); uint32_t WidthVal = Width->getZExtValue(); - ReplaceNode(N, getS_BFE(Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32, - SDLoc(N), N->getOperand(0), OffsetVal, WidthVal)); + ReplaceNode(N, getBFE32(Signed, SDLoc(N), N->getOperand(0), OffsetVal, + WidthVal)); return; } case AMDGPUISD::DIV_SCALE: { @@ -1947,9 +1947,17 @@ bool AMDGPUDAGToDAGISel::SelectMOVRELOffset(SDValue Index, return true; } -SDNode *AMDGPUDAGToDAGISel::getS_BFE(unsigned Opcode, const SDLoc &DL, +SDNode *AMDGPUDAGToDAGISel::getBFE32(bool IsSigned, const SDLoc &DL, SDValue Val, uint32_t Offset, uint32_t Width) { + if (Val->isDivergent()) { + unsigned Opcode = IsSigned ? AMDGPU::V_BFE_I32_e64 : AMDGPU::V_BFE_U32_e64; + SDValue Off = CurDAG->getTargetConstant(Offset, DL, MVT::i32); + SDValue W = CurDAG->getTargetConstant(Width, DL, MVT::i32); + + return CurDAG->getMachineNode(Opcode, DL, MVT::i32, Val, Off, W); + } + unsigned Opcode = IsSigned ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32; // Transformation function, pack the offset and width of a BFE into // the format expected by the S_BFE_I32 / S_BFE_U32. In the second // source, bits [5:0] contain the offset and bits [22:16] the width. @@ -1974,10 +1982,8 @@ void AMDGPUDAGToDAGISel::SelectS_BFEFromShifts(SDNode *N) { if (0 < BVal && BVal <= CVal && CVal < 32) { bool Signed = N->getOpcode() == ISD::SRA; - unsigned Opcode = Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32; - - ReplaceNode(N, getS_BFE(Opcode, SDLoc(N), Shl.getOperand(0), CVal - BVal, - 32 - CVal)); + ReplaceNode(N, getBFE32(Signed, SDLoc(N), Shl.getOperand(0), CVal - BVal, + 32 - CVal)); return; } } @@ -2000,9 +2006,8 @@ void AMDGPUDAGToDAGISel::SelectS_BFE(SDNode *N) { if (isMask_32(MaskVal)) { uint32_t WidthVal = countPopulation(MaskVal); - - ReplaceNode(N, getS_BFE(AMDGPU::S_BFE_U32, SDLoc(N), - Srl.getOperand(0), ShiftVal, WidthVal)); + ReplaceNode(N, getBFE32(false, SDLoc(N), Srl.getOperand(0), ShiftVal, + WidthVal)); return; } } @@ -2022,9 +2027,8 @@ void AMDGPUDAGToDAGISel::SelectS_BFE(SDNode *N) { if (isMask_32(MaskVal)) { uint32_t WidthVal = countPopulation(MaskVal); - - ReplaceNode(N, getS_BFE(AMDGPU::S_BFE_U32, SDLoc(N), - And.getOperand(0), ShiftVal, WidthVal)); + ReplaceNode(N, getBFE32(false, SDLoc(N), And.getOperand(0), ShiftVal, + WidthVal)); return; } } @@ -2051,7 +2055,7 @@ void AMDGPUDAGToDAGISel::SelectS_BFE(SDNode *N) { break; unsigned Width = cast(N->getOperand(1))->getVT().getSizeInBits(); - ReplaceNode(N, getS_BFE(AMDGPU::S_BFE_I32, SDLoc(N), Src.getOperand(0), + ReplaceNode(N, getBFE32(true, SDLoc(N), Src.getOperand(0), Amt->getZExtValue(), Width)); return; } diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h index 6f41191658499..c1d9673f067ef 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h @@ -233,9 +233,8 @@ class AMDGPUDAGToDAGISel : public SelectionDAGISel { void SelectMAD_64_32(SDNode *N); void SelectFMA_W_CHAIN(SDNode *N); void SelectFMUL_W_CHAIN(SDNode *N); - - SDNode *getS_BFE(unsigned Opcode, const SDLoc &DL, SDValue Val, - uint32_t Offset, uint32_t Width); + SDNode *getBFE32(bool IsSigned, const SDLoc &DL, SDValue Val, uint32_t Offset, + uint32_t Width); void SelectS_BFEFromShifts(SDNode *N); void SelectS_BFE(SDNode *N); bool isCBranchSCC(const SDNode *N) const; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp index 16d9b302348ce..2d42e7977365c 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -49,11 +49,9 @@ unsigned AMDGPUTargetLowering::numBitsUnsigned(SDValue Op, SelectionDAG &DAG) { } unsigned AMDGPUTargetLowering::numBitsSigned(SDValue Op, SelectionDAG &DAG) { - EVT VT = Op.getValueType(); - // In order for this to be a signed 24-bit value, bit 23, must // be a sign bit. - return VT.getSizeInBits() - DAG.ComputeNumSignBits(Op) + 1; + return DAG.ComputeMinSignedBits(Op); } AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM, @@ -4352,6 +4350,7 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const { NODE_NAME_CASE(TC_RETURN) NODE_NAME_CASE(TRAP) NODE_NAME_CASE(RET_FLAG) + NODE_NAME_CASE(RET_GFX_FLAG) NODE_NAME_CASE(RETURN_TO_EPILOG) NODE_NAME_CASE(ENDPGM) NODE_NAME_CASE(DWORDADDR) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h index 6bbcef2fe721c..03632ac18598a 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h @@ -343,7 +343,7 @@ namespace AMDGPUISD { enum NodeType : unsigned { // AMDIL ISD Opcodes FIRST_NUMBER = ISD::BUILTIN_OP_END, - UMUL, // 32bit unsigned multiplication + UMUL, // 32bit unsigned multiplication BRANCH_COND, // End AMDIL ISD Opcodes @@ -366,6 +366,9 @@ enum NodeType : unsigned { // Return with values from a non-entry function. RET_FLAG, + // Return with values from a non-entry function (AMDGPU_Gfx CC). + RET_GFX_FLAG, + DWORDADDR, FRACT, @@ -422,10 +425,10 @@ enum NodeType : unsigned { DOT4, CARRY, BORROW, - BFE_U32, // Extract range of bits with zero extension to 32-bits. - BFE_I32, // Extract range of bits with sign extension to 32-bits. - BFI, // (src0 & src1) | (~src0 & src2) - BFM, // Insert a range of bits into a 32-bit word. + BFE_U32, // Extract range of bits with zero extension to 32-bits. + BFE_I32, // Extract range of bits with sign extension to 32-bits. + BFI, // (src0 & src1) | (~src0 & src2) + BFM, // Insert a range of bits into a 32-bit word. FFBH_U32, // ctlz with -1 if input is zero. FFBH_I32, FFBL_B32, // cttz with -1 if input is zero. @@ -534,7 +537,6 @@ enum NodeType : unsigned { LAST_AMDGPU_ISD_NUMBER }; - } // End namespace AMDGPUISD } // End namespace llvm diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td index 55e03ae35ce04..391dc84285392 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td @@ -359,6 +359,10 @@ def AMDGPUret_flag : SDNode<"AMDGPUISD::RET_FLAG", SDTypeProfile<0, 1, [SDTCisPt [SDNPHasChain, SDNPOptInGlue, SDNPVariadic] >; +def AMDGPUret_gfx_flag : SDNode<"AMDGPUISD::RET_GFX_FLAG", SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>, + [SDNPHasChain, SDNPOptInGlue, SDNPVariadic] +>; + //===----------------------------------------------------------------------===// // Intrinsic/Custom node compatibility PatFrags diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h index 26d3e47215497..b70e6883bae23 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h @@ -132,7 +132,6 @@ class AMDGPUInstructionSelector final : public InstructionSelector { bool hasVgprParts(ArrayRef AddrInfo) const; void getAddrModeInfo(const MachineInstr &Load, const MachineRegisterInfo &MRI, SmallVectorImpl &AddrInfo) const; - bool selectSMRD(MachineInstr &I, ArrayRef AddrInfo) const; void initM0(MachineInstr &I) const; bool selectG_LOAD_STORE_ATOMICRMW(MachineInstr &I) const; diff --git a/llvm/lib/Target/AMDGPU/AMDGPULateCodeGenPrepare.cpp b/llvm/lib/Target/AMDGPU/AMDGPULateCodeGenPrepare.cpp index 4971b010870d1..9e86bd0c2b970 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULateCodeGenPrepare.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULateCodeGenPrepare.cpp @@ -95,10 +95,8 @@ bool AMDGPULateCodeGenPrepare::runOnFunction(Function &F) { bool Changed = false; for (auto &BB : F) - for (auto BI = BB.begin(), BE = BB.end(); BI != BE; /*EMPTY*/) { - Instruction *I = &*BI++; - Changed |= visit(*I); - } + for (Instruction &I : llvm::make_early_inc_range(BB)) + Changed |= visit(I); return Changed; } diff --git a/llvm/lib/Target/AMDGPU/AMDGPULowerIntrinsics.cpp b/llvm/lib/Target/AMDGPU/AMDGPULowerIntrinsics.cpp index 714e74faaf13d..b700dd5aa3018 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULowerIntrinsics.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULowerIntrinsics.cpp @@ -76,9 +76,8 @@ bool AMDGPULowerIntrinsics::expandMemIntrinsicUses(Function &F) { Intrinsic::ID ID = F.getIntrinsicID(); bool Changed = false; - for (auto I = F.user_begin(), E = F.user_end(); I != E;) { - Instruction *Inst = cast(*I); - ++I; + for (User *U : llvm::make_early_inc_range(F.users())) { + Instruction *Inst = cast(U); switch (ID) { case Intrinsic::memcpy: { diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp b/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp index 99e72815db2e8..ef86d760184aa 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp @@ -120,7 +120,8 @@ void AMDGPUMCInstLower::lower(const MachineInstr *MI, MCInst &OutMI) const { // FIXME: Should be able to handle this with emitPseudoExpansionLowering. We // need to select it to the subtarget specific version, and there's no way to // do that with a single pseudo source operation. - if (Opcode == AMDGPU::S_SETPC_B64_return) + if (Opcode == AMDGPU::S_SETPC_B64_return || + Opcode == AMDGPU::S_SETPC_B64_return_gfx) Opcode = AMDGPU::S_SETPC_B64; else if (Opcode == AMDGPU::SI_CALL) { // SI_CALL is just S_SWAPPC_B64 with an additional operand to track the diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMachineCFGStructurizer.cpp b/llvm/lib/Target/AMDGPU/AMDGPUMachineCFGStructurizer.cpp index 778c7fb7ca09f..095249d6dd36f 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUMachineCFGStructurizer.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUMachineCFGStructurizer.cpp @@ -748,10 +748,8 @@ void LinearizedRegion::storeLiveOuts(MachineBasicBlock *MBB, // If we have a successor with a PHI, source coming from this MBB we have to // add the register as live out - for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(), - E = MBB->succ_end(); - SI != E; ++SI) { - for (auto &II : *(*SI)) { + for (MachineBasicBlock *Succ : MBB->successors()) { + for (auto &II : *Succ) { if (II.isPHI()) { MachineInstr &PHI = II; int numPreds = getPHINumInputs(PHI); @@ -760,7 +758,7 @@ void LinearizedRegion::storeLiveOuts(MachineBasicBlock *MBB, unsigned PHIReg = getPHISourceReg(PHI, i); LLVM_DEBUG(dbgs() << "Add LiveOut (PhiSource " << printMBBReference(*MBB) - << " -> " << printMBBReference(*(*SI)) + << " -> " << printMBBReference(*Succ) << "): " << printReg(PHIReg, TRI) << "\n"); addLiveOut(PHIReg); } @@ -1235,11 +1233,7 @@ bool AMDGPUMachineCFGStructurizer::regionIsSimpleIf(RegionMRT *Region) { return false; } - for (MachineBasicBlock::const_succ_iterator SI = Entry->succ_begin(), - E = Entry->succ_end(); - SI != E; ++SI) { - MachineBasicBlock *Current = *SI; - + for (MachineBasicBlock *Current : Entry->successors()) { if (Current == Succ) { FoundBypass = true; } else if ((Current->succ_size() == 1) && @@ -1277,10 +1271,7 @@ static void fixRegionTerminator(RegionMRT *Region) { auto Exit = LRegion->getExit(); SmallPtrSet Successors; - for (MachineBasicBlock::const_succ_iterator SI = Exit->succ_begin(), - SE = Exit->succ_end(); - SI != SE; ++SI) { - MachineBasicBlock *Succ = *SI; + for (MachineBasicBlock *Succ : Exit->successors()) { if (LRegion->contains(Succ)) { // Do not allow re-assign assert(InternalSucc == nullptr); @@ -1774,27 +1765,20 @@ static void removeExternalCFGEdges(MachineBasicBlock *StartMBB, unsigned SuccSize = StartMBB->succ_size(); if (SuccSize > 0) { MachineBasicBlock *StartMBBSucc = *(StartMBB->succ_begin()); - for (MachineBasicBlock::succ_iterator PI = EndMBB->succ_begin(), - E = EndMBB->succ_end(); - PI != E; ++PI) { + for (MachineBasicBlock *Succ : EndMBB->successors()) { // Either we have a back-edge to the entry block, or a back-edge to the // successor of the entry block since the block may be split. - if ((*PI) != StartMBB && - !((*PI) == StartMBBSucc && StartMBB != EndMBB && SuccSize == 1)) { + if (Succ != StartMBB && + !(Succ == StartMBBSucc && StartMBB != EndMBB && SuccSize == 1)) { Succs.insert( - std::pair(EndMBB, *PI)); + std::pair(EndMBB, Succ)); } } } - for (MachineBasicBlock::pred_iterator PI = StartMBB->pred_begin(), - E = StartMBB->pred_end(); - PI != E; ++PI) { - if ((*PI) != EndMBB) { - Succs.insert( - std::pair(*PI, StartMBB)); - } - } + for (MachineBasicBlock *Pred : StartMBB->predecessors()) + if (Pred != EndMBB) + Succs.insert(std::make_pair(Pred, StartMBB)); for (auto SI : Succs) { std::pair Edge = SI; @@ -1812,14 +1796,9 @@ MachineBasicBlock *AMDGPUMachineCFGStructurizer::createIfBlock( MachineBasicBlock *IfBB = MF->CreateMachineBasicBlock(); if (InheritPreds) { - for (MachineBasicBlock::pred_iterator PI = CodeBBStart->pred_begin(), - E = CodeBBStart->pred_end(); - PI != E; ++PI) { - if ((*PI) != CodeBBEnd) { - MachineBasicBlock *Pred = (*PI); + for (MachineBasicBlock *Pred : CodeBBStart->predecessors()) + if (Pred != CodeBBEnd) Pred->addSuccessor(IfBB); - } - } } removeExternalCFGEdges(CodeBBStart, CodeBBEnd); @@ -2566,11 +2545,9 @@ static void removeOldExitPreds(RegionMRT *Region) { static bool mbbHasBackEdge(MachineBasicBlock *MBB, SmallPtrSet &MBBs) { - for (auto SI = MBB->succ_begin(), SE = MBB->succ_end(); SI != SE; ++SI) { - if (MBBs.contains(*SI)) { + for (MachineBasicBlock *Succ : MBB->successors()) + if (MBBs.contains(Succ)) return true; - } - } return false; } @@ -2588,11 +2565,9 @@ static bool containsNewBackedge(MRT *Tree, } } else { RegionMRT *Region = Tree->getRegionMRT(); - SetVector *Children = Region->getChildren(); - for (auto CI = Children->rbegin(), CE = Children->rend(); CI != CE; ++CI) { - if (containsNewBackedge(*CI, MBBs)) + for (MRT *C : llvm::reverse(*Region->getChildren())) + if (containsNewBackedge(C, MBBs)) return true; - } } return false; } diff --git a/llvm/lib/Target/AMDGPU/AMDILCFGStructurizer.cpp b/llvm/lib/Target/AMDGPU/AMDILCFGStructurizer.cpp index 52ac401a2ccb3..e87dfd6bde7bf 100644 --- a/llvm/lib/Target/AMDGPU/AMDILCFGStructurizer.cpp +++ b/llvm/lib/Target/AMDGPU/AMDILCFGStructurizer.cpp @@ -632,9 +632,8 @@ bool AMDGPUCFGStructurizer::isReturnBlock(MachineBasicBlock *MBB) { void AMDGPUCFGStructurizer::cloneSuccessorList(MachineBasicBlock *DstMBB, MachineBasicBlock *SrcMBB) { - for (MachineBasicBlock::succ_iterator It = SrcMBB->succ_begin(), - iterEnd = SrcMBB->succ_end(); It != iterEnd; ++It) - DstMBB->addSuccessor(*It); // *iter's predecessor is also taken care of + for (MachineBasicBlock *Succ : SrcMBB->successors()) + DstMBB->addSuccessor(Succ); // *iter's predecessor is also taken care of } MachineBasicBlock *AMDGPUCFGStructurizer::clone(MachineBasicBlock *MBB) { @@ -1323,12 +1322,9 @@ int AMDGPUCFGStructurizer::improveSimpleJumpintoIf(MachineBasicBlock *HeadMBB, insertInstrBefore(I, R600::ENDIF); // put initReg = 2 to other predecessors of landBlk - for (MachineBasicBlock::pred_iterator PI = LandBlk->pred_begin(), - PE = LandBlk->pred_end(); PI != PE; ++PI) { - MachineBasicBlock *MBB = *PI; + for (MachineBasicBlock *MBB : LandBlk->predecessors()) if (MBB != TrueMBB && MBB != FalseMBB) report_fatal_error("Extra register needed to handle CFG"); - } } LLVM_DEBUG( dbgs() << "result from improveSimpleJumpintoIf: "; diff --git a/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp b/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp index 2bf365168048f..a8c85ec4e5eaa 100644 --- a/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp +++ b/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp @@ -612,8 +612,7 @@ bool GCNDPPCombine::runOnMachineFunction(MachineFunction &MF) { bool Changed = false; for (auto &MBB : MF) { - for (auto I = MBB.rbegin(), E = MBB.rend(); I != E;) { - auto &MI = *I++; + for (MachineInstr &MI : llvm::make_early_inc_range(llvm::reverse(MBB))) { if (MI.getOpcode() == AMDGPU::V_MOV_B32_dpp && combineDPPMov(MI)) { Changed = true; ++NumDPPMovsCombined; diff --git a/llvm/lib/Target/AMDGPU/GCNPreRAOptimizations.cpp b/llvm/lib/Target/AMDGPU/GCNPreRAOptimizations.cpp index e8298533f0dcd..f7eb3ad3e242e 100644 --- a/llvm/lib/Target/AMDGPU/GCNPreRAOptimizations.cpp +++ b/llvm/lib/Target/AMDGPU/GCNPreRAOptimizations.cpp @@ -100,8 +100,9 @@ bool GCNPreRAOptimizations::processReg(Register Reg) { // Some subtargets cannot do an AGPR to AGPR copy directly, and need an // intermdiate temporary VGPR register. Try to find the defining // accvgpr_write to avoid temporary registers. + if (!IsAGPRDst) - break; + return false; Register SrcReg = I.getOperand(1).getReg(); diff --git a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp index 3ec82fe48a240..73b4eb0e65728 100644 --- a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp @@ -125,8 +125,8 @@ static void buildPrologSpill(const GCNSubtarget &ST, const SIRegisterInfo &TRI, const SIMachineFunctionInfo &FuncInfo, LivePhysRegs &LiveRegs, MachineFunction &MF, MachineBasicBlock &MBB, - MachineBasicBlock::iterator I, Register SpillReg, - int FI) { + MachineBasicBlock::iterator I, const DebugLoc &DL, + Register SpillReg, int FI) { unsigned Opc = ST.enableFlatScratch() ? AMDGPU::SCRATCH_STORE_DWORD_SADDR : AMDGPU::BUFFER_STORE_DWORD_OFFSET; @@ -136,7 +136,7 @@ static void buildPrologSpill(const GCNSubtarget &ST, const SIRegisterInfo &TRI, PtrInfo, MachineMemOperand::MOStore, FrameInfo.getObjectSize(FI), FrameInfo.getObjectAlign(FI)); LiveRegs.addReg(SpillReg); - TRI.buildSpillLoadStore(MBB, I, Opc, FI, SpillReg, true, + TRI.buildSpillLoadStore(MBB, I, DL, Opc, FI, SpillReg, true, FuncInfo.getStackPtrOffsetReg(), 0, MMO, nullptr, &LiveRegs); LiveRegs.removeReg(SpillReg); @@ -147,8 +147,8 @@ static void buildEpilogRestore(const GCNSubtarget &ST, const SIMachineFunctionInfo &FuncInfo, LivePhysRegs &LiveRegs, MachineFunction &MF, MachineBasicBlock &MBB, - MachineBasicBlock::iterator I, Register SpillReg, - int FI) { + MachineBasicBlock::iterator I, + const DebugLoc &DL, Register SpillReg, int FI) { unsigned Opc = ST.enableFlatScratch() ? AMDGPU::SCRATCH_LOAD_DWORD_SADDR : AMDGPU::BUFFER_LOAD_DWORD_OFFSET; @@ -157,7 +157,7 @@ static void buildEpilogRestore(const GCNSubtarget &ST, MachineMemOperand *MMO = MF.getMachineMemOperand( PtrInfo, MachineMemOperand::MOLoad, FrameInfo.getObjectSize(FI), FrameInfo.getObjectAlign(FI)); - TRI.buildSpillLoadStore(MBB, I, Opc, FI, SpillReg, false, + TRI.buildSpillLoadStore(MBB, I, DL, Opc, FI, SpillReg, false, FuncInfo.getStackPtrOffsetReg(), 0, MMO, nullptr, &LiveRegs); } @@ -776,7 +776,7 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF, ScratchExecCopy = buildScratchExecCopy(LiveRegs, MF, MBB, MBBI, /*IsProlog*/ true); - buildPrologSpill(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, Reg.VGPR, + buildPrologSpill(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, DL, Reg.VGPR, *Reg.FI); } @@ -791,7 +791,8 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF, ScratchExecCopy = buildScratchExecCopy(LiveRegs, MF, MBB, MBBI, /*IsProlog*/ true); - buildPrologSpill(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, VGPR, *FI); + buildPrologSpill(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, DL, VGPR, + *FI); } if (ScratchExecCopy) { @@ -817,7 +818,7 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF, BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpVGPR) .addReg(FramePtrReg); - buildPrologSpill(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, TmpVGPR, + buildPrologSpill(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, DL, TmpVGPR, FramePtrFI); } @@ -835,7 +836,7 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF, BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpVGPR) .addReg(BasePtrReg); - buildPrologSpill(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, TmpVGPR, + buildPrologSpill(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, DL, TmpVGPR, BasePtrFI); } @@ -1031,8 +1032,8 @@ void SIFrameLowering::emitEpilogue(MachineFunction &MF, MRI, LiveRegs, AMDGPU::VGPR_32RegClass); if (!TmpVGPR) report_fatal_error("failed to find free scratch register"); - buildEpilogRestore(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, TmpVGPR, - FramePtrFI); + buildEpilogRestore(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, DL, + TmpVGPR, FramePtrFI); BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), FramePtrReg) .addReg(TmpVGPR, RegState::Kill); } else { @@ -1057,8 +1058,8 @@ void SIFrameLowering::emitEpilogue(MachineFunction &MF, MRI, LiveRegs, AMDGPU::VGPR_32RegClass); if (!TmpVGPR) report_fatal_error("failed to find free scratch register"); - buildEpilogRestore(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, TmpVGPR, - BasePtrFI); + buildEpilogRestore(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, DL, + TmpVGPR, BasePtrFI); BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), BasePtrReg) .addReg(TmpVGPR, RegState::Kill); } else { @@ -1083,8 +1084,8 @@ void SIFrameLowering::emitEpilogue(MachineFunction &MF, ScratchExecCopy = buildScratchExecCopy(LiveRegs, MF, MBB, MBBI, /*IsProlog*/ false); - buildEpilogRestore(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, Reg.VGPR, - *Reg.FI); + buildEpilogRestore(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, DL, + Reg.VGPR, *Reg.FI); } for (const auto &Reg : FuncInfo->WWMReservedRegs) { @@ -1097,7 +1098,8 @@ void SIFrameLowering::emitEpilogue(MachineFunction &MF, ScratchExecCopy = buildScratchExecCopy(LiveRegs, MF, MBB, MBBI, /*IsProlog*/ false); - buildEpilogRestore(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, VGPR, *FI); + buildEpilogRestore(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, DL, VGPR, + *FI); } if (ScratchExecCopy) { @@ -1157,11 +1159,7 @@ void SIFrameLowering::processFunctionBeforeFrameFinalized( bool SeenDbgInstr = false; for (MachineBasicBlock &MBB : MF) { - MachineBasicBlock::iterator Next; - for (auto I = MBB.begin(), E = MBB.end(); I != E; I = Next) { - MachineInstr &MI = *I; - Next = std::next(I); - + for (MachineInstr &MI : llvm::make_early_inc_range(MBB)) { if (MI.isDebugInstr()) SeenDbgInstr = true; diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index a3be162432daa..877b4500364e8 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -2616,9 +2616,12 @@ SITargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, SDValue ReturnAddrReg = CreateLiveInRegister( DAG, &AMDGPU::SReg_64RegClass, TRI->getReturnAddressReg(MF), MVT::i64); - SDValue ReturnAddrVirtualReg = DAG.getRegister( - MF.getRegInfo().createVirtualRegister(&AMDGPU::CCR_SGPR_64RegClass), - MVT::i64); + SDValue ReturnAddrVirtualReg = + DAG.getRegister(MF.getRegInfo().createVirtualRegister( + CallConv != CallingConv::AMDGPU_Gfx + ? &AMDGPU::CCR_SGPR_64RegClass + : &AMDGPU::Gfx_CCR_SGPR_64RegClass), + MVT::i64); Chain = DAG.getCopyToReg(Chain, DL, ReturnAddrVirtualReg, ReturnAddrReg, Flag); Flag = Chain.getValue(1); @@ -2681,8 +2684,15 @@ SITargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, RetOps.push_back(Flag); unsigned Opc = AMDGPUISD::ENDPGM; - if (!IsWaveEnd) - Opc = IsShader ? AMDGPUISD::RETURN_TO_EPILOG : AMDGPUISD::RET_FLAG; + if (!IsWaveEnd) { + if (IsShader) + Opc = AMDGPUISD::RETURN_TO_EPILOG; + else if (CallConv == CallingConv::AMDGPU_Gfx) + Opc = AMDGPUISD::RET_GFX_FLAG; + else + Opc = AMDGPUISD::RET_FLAG; + } + return DAG.getNode(Opc, DL, MVT::Other, RetOps); } diff --git a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp index 7d6f79922d2e1..ea36ee46bea16 100644 --- a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp +++ b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp @@ -963,6 +963,7 @@ bool SIInsertWaitcnts::generateWaitcntInstBefore( // with knowledge of the called routines. if (MI.getOpcode() == AMDGPU::SI_RETURN_TO_EPILOG || MI.getOpcode() == AMDGPU::S_SETPC_B64_return || + MI.getOpcode() == AMDGPU::S_SETPC_B64_return_gfx || (MI.isReturn() && MI.isCall() && !callWaitsOnFunctionEntry(MI))) { Wait = Wait.combined(AMDGPU::Waitcnt::allZero(ST->hasVscnt())); } diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index 1de2854dc2986..778eefe92beec 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -3471,6 +3471,9 @@ bool SIInstrInfo::isInlineConstant(const MachineOperand &MO, uint32_t Trunc = static_cast(Imm); return AMDGPU::isInlinableLiteralV216(Trunc, ST.hasInv2PiInlineImm()); } + case AMDGPU::OPERAND_KIMM32: + case AMDGPU::OPERAND_KIMM16: + return false; default: llvm_unreachable("invalid bitwidth"); } @@ -3597,11 +3600,13 @@ bool SIInstrInfo::canShrink(const MachineInstr &MI, // Additional verification is needed for sdst/src2. return true; } - case AMDGPU::V_MAC_F32_e64: case AMDGPU::V_MAC_F16_e64: - case AMDGPU::V_FMAC_F32_e64: + case AMDGPU::V_MAC_F32_e64: + case AMDGPU::V_MAC_LEGACY_F32_e64: case AMDGPU::V_FMAC_F16_e64: + case AMDGPU::V_FMAC_F32_e64: case AMDGPU::V_FMAC_F64_e64: + case AMDGPU::V_FMAC_LEGACY_F32_e64: if (!Src2->isReg() || !RI.isVGPR(MRI, Src2->getReg()) || hasModifiersSet(MI, AMDGPU::OpName::src2_modifiers)) return false; @@ -5197,8 +5202,7 @@ void SIInstrInfo::legalizeGenericOperand(MachineBasicBlock &InsertMBB, return; Register DstReg = MRI.createVirtualRegister(DstRC); - MachineInstr *Copy = - BuildMI(InsertMBB, I, DL, get(AMDGPU::COPY), DstReg).add(Op); + auto Copy = BuildMI(InsertMBB, I, DL, get(AMDGPU::COPY), DstReg).add(Op); Op.setReg(DstReg); Op.setSubReg(0); @@ -5220,7 +5224,7 @@ void SIInstrInfo::legalizeGenericOperand(MachineBasicBlock &InsertMBB, } if (!RI.isSGPRClass(DstRC) && !Copy->readsRegister(AMDGPU::EXEC, &RI) && !ImpDef) - Copy->addOperand(MachineOperand::CreateReg(AMDGPU::EXEC, false, true)); + Copy.addReg(AMDGPU::EXEC, RegState::Implicit); } // Emit the actual waterfall loop, executing the wrapped instruction for each @@ -7306,31 +7310,19 @@ unsigned SIInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const { return Size; } - // 4-byte instructions may have a 32-bit literal encoded after them. Check - // operands that coud ever be literals. + // Instructions may have a 32-bit literal encoded after them. Check + // operands that could ever be literals. if (isVALU(MI) || isSALU(MI)) { - int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); - if (Src0Idx == -1) - return DescSize; // No operands. - - if (isLiteralConstantLike(MI.getOperand(Src0Idx), Desc.OpInfo[Src0Idx])) - return isVOP3(MI) ? 12 : (DescSize + 4); - - int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1); - if (Src1Idx == -1) - return DescSize; - - if (isLiteralConstantLike(MI.getOperand(Src1Idx), Desc.OpInfo[Src1Idx])) - return isVOP3(MI) ? 12 : (DescSize + 4); - - int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2); - if (Src2Idx == -1) + if (isDPP(MI)) return DescSize; - - if (isLiteralConstantLike(MI.getOperand(Src2Idx), Desc.OpInfo[Src2Idx])) - return isVOP3(MI) ? 12 : (DescSize + 4); - - return DescSize; + bool HasLiteral = false; + for (int I = 0, E = MI.getNumExplicitOperands(); I != E; ++I) { + if (isLiteralConstant(MI, I)) { + HasLiteral = true; + break; + } + } + return HasLiteral ? DescSize + 4 : DescSize; } // Check whether we have extra NSA words. @@ -7418,19 +7410,16 @@ void SIInstrInfo::convertNonUniformLoopRegion( Register BackEdgeReg = MRI.createVirtualRegister(RI.getBoolRC()); MachineInstrBuilder HeaderPHIBuilder = BuildMI(*(MF), Branch->getDebugLoc(), get(TargetOpcode::PHI), DstReg); - for (MachineBasicBlock::pred_iterator PI = LoopEntry->pred_begin(), - E = LoopEntry->pred_end(); - PI != E; ++PI) { - if (*PI == LoopEnd) { + for (MachineBasicBlock *PMBB : LoopEntry->predecessors()) { + if (PMBB == LoopEnd) { HeaderPHIBuilder.addReg(BackEdgeReg); } else { - MachineBasicBlock *PMBB = *PI; Register ZeroReg = MRI.createVirtualRegister(RI.getBoolRC()); materializeImmediate(*PMBB, PMBB->getFirstTerminator(), DebugLoc(), ZeroReg, 0); HeaderPHIBuilder.addReg(ZeroReg); } - HeaderPHIBuilder.addMBB(*PI); + HeaderPHIBuilder.addMBB(PMBB); } MachineInstr *HeaderPhi = HeaderPHIBuilder; MachineInstr *SIIFBREAK = BuildMI(*(MF), Branch->getDebugLoc(), diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td index 750d1981e7638..d5f9cb8ba4936 100644 --- a/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -1871,40 +1871,92 @@ def : GCNPat < // Conversion Patterns //===----------------------------------------------------------------------===// -def : GCNPat<(i32 (sext_inreg i32:$src, i1)), +class UniformSextInreg : PatFrag< + (ops node:$src), + (sext_inreg $src, VT), + [{ return !N->isDivergent(); }]>; + +def : GCNPat<(i32 (UniformSextInreg i32:$src)), (S_BFE_I32 i32:$src, (i32 65536))>; // 0 | 1 << 16 // Handle sext_inreg in i64 def : GCNPat < - (i64 (sext_inreg i64:$src, i1)), + (i64 (UniformSextInreg i64:$src)), (S_BFE_I64 i64:$src, (i32 0x10000)) // 0 | 1 << 16 >; def : GCNPat < - (i16 (sext_inreg i16:$src, i1)), + (i16 (UniformSextInreg i16:$src)), (S_BFE_I32 $src, (i32 0x00010000)) // 0 | 1 << 16 >; def : GCNPat < - (i16 (sext_inreg i16:$src, i8)), + (i16 (UniformSextInreg i16:$src)), (S_BFE_I32 $src, (i32 0x80000)) // 0 | 8 << 16 >; def : GCNPat < - (i64 (sext_inreg i64:$src, i8)), + (i64 (UniformSextInreg i64:$src)), (S_BFE_I64 i64:$src, (i32 0x80000)) // 0 | 8 << 16 >; def : GCNPat < - (i64 (sext_inreg i64:$src, i16)), + (i64 (UniformSextInreg i64:$src)), (S_BFE_I64 i64:$src, (i32 0x100000)) // 0 | 16 << 16 >; def : GCNPat < - (i64 (sext_inreg i64:$src, i32)), + (i64 (UniformSextInreg i64:$src)), (S_BFE_I64 i64:$src, (i32 0x200000)) // 0 | 32 << 16 >; + +class DivergentSextInreg : PatFrag< + (ops node:$src), + (sext_inreg $src, VT), + [{ return N->isDivergent(); }]>; + +def : GCNPat<(i32 (DivergentSextInreg i32:$src)), + (V_BFE_I32_e64 i32:$src, (i32 0), (i32 1))>; + +def : GCNPat < + (i16 (DivergentSextInreg i16:$src)), + (V_BFE_I32_e64 $src, (i32 0), (i32 1)) // 0 | 1 << 16 +>; + +def : GCNPat < + (i16 (DivergentSextInreg i16:$src)), + (V_BFE_I32_e64 $src, (i32 0), (i32 8)) // 0 | 8 << 16 +>; + +def : GCNPat < + (i64 (DivergentSextInreg i64:$src)), + (REG_SEQUENCE VReg_64, + (V_BFE_I32_e64 (i32 (EXTRACT_SUBREG i64:$src, sub0)), (i32 0), (i32 1)), sub0, + (V_ASHRREV_I32_e32 (i32 31), (V_BFE_I32_e64 (i32 (EXTRACT_SUBREG i64:$src, sub0)), (i32 0), (i32 1))), sub1) +>; + +def : GCNPat < + (i64 (DivergentSextInreg i64:$src)), + (REG_SEQUENCE VReg_64, + (V_BFE_I32_e64 (i32 (EXTRACT_SUBREG i64:$src, sub0)), (i32 0), (i32 8)/* 0 | 8 << 16 */), sub0, + (V_ASHRREV_I32_e32 (i32 31), (V_BFE_I32_e64 (i32 (EXTRACT_SUBREG i64:$src, sub0)), (i32 0), (i32 8))), sub1) +>; + +def : GCNPat < + (i64 (DivergentSextInreg i64:$src)), + (REG_SEQUENCE VReg_64, + (V_BFE_I32_e64 (i32 (EXTRACT_SUBREG i64:$src, sub0)), (i32 0), (i32 16)/* 0 | 16 << 16 */), sub0, + (V_ASHRREV_I32_e32 (i32 31), (V_BFE_I32_e64 (i32 (EXTRACT_SUBREG i64:$src, sub0)), (i32 0), (i32 16))), sub1) +>; + +def : GCNPat < + (i64 (DivergentSextInreg i64:$src)), + (REG_SEQUENCE VReg_64, + (i32 (EXTRACT_SUBREG i64:$src, sub0)), sub0, + (V_ASHRREV_I32_e32 (i32 31), (i32 (EXTRACT_SUBREG i64:$src, sub0))), sub1) +>; + def : GCNPat < (i64 (zext i32:$src)), (REG_SEQUENCE SReg_64, $src, sub0, (S_MOV_B32 (i32 0)), sub1) diff --git a/llvm/lib/Target/AMDGPU/SILateBranchLowering.cpp b/llvm/lib/Target/AMDGPU/SILateBranchLowering.cpp index d560b477b8ba7..4fa8ec7111343 100644 --- a/llvm/lib/Target/AMDGPU/SILateBranchLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SILateBranchLowering.cpp @@ -140,11 +140,7 @@ bool SILateBranchLowering::runOnMachineFunction(MachineFunction &MF) { bool MadeChange = false; for (MachineBasicBlock &MBB : MF) { - MachineBasicBlock::iterator I, Next; - for (I = MBB.begin(); I != MBB.end(); I = Next) { - Next = std::next(I); - MachineInstr &MI = *I; - + for (MachineInstr &MI : llvm::make_early_inc_range(MBB)) { switch (MI.getOpcode()) { case AMDGPU::S_BRANCH: // Optimize out branches to the next block. diff --git a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp index 58415ecc94327..1f5312e5cb148 100644 --- a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp +++ b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp @@ -2065,7 +2065,7 @@ SILoadStoreOptimizer::collectMergeableInsts( // adjacent to each other in the list, which will make it easier to find // matches. MergeList.sort( - [] (const CombineInfo &A, CombineInfo &B) { + [] (const CombineInfo &A, const CombineInfo &B) { return A.Offset < B.Offset; }); ++I; diff --git a/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp b/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp index 10696939d369f..ec89de25ec11b 100644 --- a/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp +++ b/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp @@ -52,6 +52,7 @@ #include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "llvm/ADT/SmallSet.h" #include "llvm/CodeGen/LiveIntervals.h" +#include "llvm/CodeGen/LiveVariables.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunctionPass.h" @@ -70,6 +71,7 @@ class SILowerControlFlow : public MachineFunctionPass { const SIRegisterInfo *TRI = nullptr; const SIInstrInfo *TII = nullptr; LiveIntervals *LIS = nullptr; + LiveVariables *LV = nullptr; MachineDominatorTree *MDT = nullptr; MachineRegisterInfo *MRI = nullptr; SetVector LoweredEndCf; @@ -237,6 +239,8 @@ void SILowerControlFlow::emitIf(MachineInstr &MI) { BuildMI(MBB, I, DL, TII->get(AndOpc), Tmp) .addReg(CopyReg) .add(Cond); + if (LV) + LV->replaceKillInstruction(Cond.getReg(), MI, *And); setImpSCCDefDead(*And, true); @@ -254,6 +258,8 @@ void SILowerControlFlow::emitIf(MachineInstr &MI) { MachineInstr *SetExec = BuildMI(MBB, I, DL, TII->get(MovTermOpc), Exec) .addReg(Tmp, RegState::Kill); + if (LV) + LV->getVarInfo(Tmp).Kills.push_back(SetExec); // Skip ahead to the unconditional branch in case there are other terminators // present. @@ -307,6 +313,8 @@ void SILowerControlFlow::emitElse(MachineInstr &MI) { MachineInstr *OrSaveExec = BuildMI(MBB, Start, DL, TII->get(OrSaveExecOpc), SaveReg) .add(MI.getOperand(1)); // Saved EXEC + if (LV) + LV->replaceKillInstruction(MI.getOperand(1).getReg(), MI, *OrSaveExec); MachineBasicBlock *DestBB = MI.getOperand(2).getMBB(); @@ -380,15 +388,22 @@ void SILowerControlFlow::emitIfBreak(MachineInstr &MI) { And = BuildMI(MBB, &MI, DL, TII->get(AndOpc), AndReg) .addReg(Exec) .add(MI.getOperand(1)); + if (LV) + LV->replaceKillInstruction(MI.getOperand(1).getReg(), MI, *And); Or = BuildMI(MBB, &MI, DL, TII->get(OrOpc), Dst) .addReg(AndReg) .add(MI.getOperand(2)); if (LIS) LIS->createAndComputeVirtRegInterval(AndReg); - } else + } else { Or = BuildMI(MBB, &MI, DL, TII->get(OrOpc), Dst) .add(MI.getOperand(1)) .add(MI.getOperand(2)); + if (LV) + LV->replaceKillInstruction(MI.getOperand(1).getReg(), MI, *Or); + } + if (LV) + LV->replaceKillInstruction(MI.getOperand(2).getReg(), MI, *Or); if (LIS) { if (And) @@ -490,6 +505,8 @@ MachineBasicBlock *SILowerControlFlow::emitEndCf(MachineInstr &MI) { BuildMI(MBB, InsPt, DL, TII->get(Opcode), Exec) .addReg(Exec) .add(MI.getOperand(0)); + if (LV) + LV->replaceKillInstruction(MI.getOperand(0).getReg(), MI, *NewMI); LoweredEndCf.insert(NewMI); @@ -581,7 +598,12 @@ void SILowerControlFlow::optimizeEndCf() { LLVM_DEBUG(dbgs() << "Skip redundant "; MI->dump()); if (LIS) LIS->RemoveMachineInstrFromMaps(*MI); + Register Reg; + if (LV) + Reg = TII->getNamedOperand(*MI, AMDGPU::OpName::src1)->getReg(); MI->eraseFromParent(); + if (LV) + LV->recomputeForSingleDefVirtReg(Reg); removeMBBifRedundant(MBB); } } @@ -697,6 +719,8 @@ void SILowerControlFlow::lowerInitExec(MachineBasicBlock *MBB, auto BfeMI = BuildMI(*MBB, FirstMI, DL, TII->get(AMDGPU::S_BFE_U32), CountReg) .addReg(InputReg) .addImm((MI.getOperand(1).getImm() & Mask) | 0x70000); + if (LV) + LV->recomputeForSingleDefVirtReg(InputReg); auto BfmMI = BuildMI(*MBB, FirstMI, DL, TII->get(IsWave32 ? AMDGPU::S_BFM_B32 : AMDGPU::S_BFM_B64), Exec) @@ -705,6 +729,8 @@ void SILowerControlFlow::lowerInitExec(MachineBasicBlock *MBB, auto CmpMI = BuildMI(*MBB, FirstMI, DL, TII->get(AMDGPU::S_CMP_EQ_U32)) .addReg(CountReg, RegState::Kill) .addImm(WavefrontSize); + if (LV) + LV->getVarInfo(CountReg).Kills.push_back(CmpMI); auto CmovMI = BuildMI(*MBB, FirstMI, DL, TII->get(IsWave32 ? AMDGPU::S_CMOV_B32 : AMDGPU::S_CMOV_B64), @@ -777,17 +803,14 @@ bool SILowerControlFlow::removeMBBifRedundant(MachineBasicBlock &MBB) { } bool SILowerControlFlow::runOnMachineFunction(MachineFunction &MF) { - // FIXME: This pass causes verification failures. - // See: https://bugs.llvm.org/show_bug.cgi?id=52204 - MF.getProperties().set( - MachineFunctionProperties::Property::FailsVerification); - const GCNSubtarget &ST = MF.getSubtarget(); TII = ST.getInstrInfo(); TRI = &TII->getRegisterInfo(); // This doesn't actually need LiveIntervals, but we can preserve them. LIS = getAnalysisIfAvailable(); + // This doesn't actually need LiveVariables, but we can preserve them. + LV = getAnalysisIfAvailable(); MDT = getAnalysisIfAvailable(); MRI = &MF.getRegInfo(); BoolRC = TRI->getBoolRC(); diff --git a/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp b/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp index 193d106a1230b..fee3b7028de23 100644 --- a/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp +++ b/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp @@ -332,11 +332,7 @@ bool SILowerSGPRSpills::runOnMachineFunction(MachineFunction &MF) { BitVector SpillFIs(MFI.getObjectIndexEnd(), false); for (MachineBasicBlock &MBB : MF) { - MachineBasicBlock::iterator Next; - for (auto I = MBB.begin(), E = MBB.end(); I != E; I = Next) { - MachineInstr &MI = *I; - Next = std::next(I); - + for (MachineInstr &MI : llvm::make_early_inc_range(MBB)) { if (!TII->isSGPRSpill(MI)) continue; diff --git a/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp b/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp index 125de947c61de..29f072ca1e6c3 100644 --- a/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp +++ b/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp @@ -368,7 +368,7 @@ class SIGfx6CacheControl : public SICacheControl { public: - SIGfx6CacheControl(const GCNSubtarget &ST) : SICacheControl(ST) {}; + SIGfx6CacheControl(const GCNSubtarget &ST) : SICacheControl(ST) {} bool enableLoadCacheBypass(const MachineBasicBlock::iterator &MI, SIAtomicScope Scope, @@ -409,7 +409,7 @@ class SIGfx6CacheControl : public SICacheControl { class SIGfx7CacheControl : public SIGfx6CacheControl { public: - SIGfx7CacheControl(const GCNSubtarget &ST) : SIGfx6CacheControl(ST) {}; + SIGfx7CacheControl(const GCNSubtarget &ST) : SIGfx6CacheControl(ST) {} bool insertAcquire(MachineBasicBlock::iterator &MI, SIAtomicScope Scope, @@ -421,7 +421,7 @@ class SIGfx7CacheControl : public SIGfx6CacheControl { class SIGfx90ACacheControl : public SIGfx7CacheControl { public: - SIGfx90ACacheControl(const GCNSubtarget &ST) : SIGfx7CacheControl(ST) {}; + SIGfx90ACacheControl(const GCNSubtarget &ST) : SIGfx7CacheControl(ST) {} bool enableLoadCacheBypass(const MachineBasicBlock::iterator &MI, SIAtomicScope Scope, @@ -470,7 +470,7 @@ class SIGfx10CacheControl : public SIGfx7CacheControl { public: - SIGfx10CacheControl(const GCNSubtarget &ST) : SIGfx7CacheControl(ST) {}; + SIGfx10CacheControl(const GCNSubtarget &ST) : SIGfx7CacheControl(ST) {} bool enableLoadCacheBypass(const MachineBasicBlock::iterator &MI, SIAtomicScope Scope, @@ -855,7 +855,7 @@ bool SIGfx6CacheControl::enableVolatileAndOrNonTemporal( // instructions. The latter are always marked as volatile so cannot sensibly // handle it as do not want to pessimize all atomics. Also they do not support // the nontemporal attribute. - assert( Op == SIMemOp::LOAD || Op == SIMemOp::STORE); + assert(Op == SIMemOp::LOAD || Op == SIMemOp::STORE); bool Changed = false; @@ -1031,8 +1031,8 @@ bool SIGfx6CacheControl::insertRelease(MachineBasicBlock::iterator &MI, SIAtomicAddrSpace AddrSpace, bool IsCrossAddrSpaceOrdering, Position Pos) const { - return insertWait(MI, Scope, AddrSpace, SIMemOp::LOAD | SIMemOp::STORE, - IsCrossAddrSpaceOrdering, Pos); + return insertWait(MI, Scope, AddrSpace, SIMemOp::LOAD | SIMemOp::STORE, + IsCrossAddrSpaceOrdering, Pos); } bool SIGfx7CacheControl::insertAcquire(MachineBasicBlock::iterator &MI, @@ -1104,7 +1104,8 @@ bool SIGfx90ACacheControl::enableLoadCacheBypass( // different CUs. Therefore need to bypass the L1 which is per CU. // Otherwise in non-threadgroup split mode all waves of a work-group are // on the same CU, and so the L1 does not need to be bypassed. - if (ST.isTgSplitEnabled()) Changed |= enableGLCBit(MI); + if (ST.isTgSplitEnabled()) + Changed |= enableGLCBit(MI); break; case SIAtomicScope::WAVEFRONT: case SIAtomicScope::SINGLETHREAD: @@ -1200,14 +1201,13 @@ bool SIGfx90ACacheControl::enableVolatileAndOrNonTemporal( // instructions. The latter are always marked as volatile so cannot sensibly // handle it as do not want to pessimize all atomics. Also they do not support // the nontemporal attribute. - assert( Op == SIMemOp::LOAD || Op == SIMemOp::STORE); + assert(Op == SIMemOp::LOAD || Op == SIMemOp::STORE); bool Changed = false; if (IsVolatile) { - if (Op == SIMemOp::LOAD) { + if (Op == SIMemOp::LOAD) Changed |= enableGLCBit(MI); - } // Ensure operation has completed at system scope to cause all volatile // operations to be visible outside the program in a global order. Do not @@ -1394,7 +1394,8 @@ bool SIGfx10CacheControl::enableLoadCacheBypass( // the WGP. Therefore need to bypass the L0 which is per CU. Otherwise in // CU mode all waves of a work-group are on the same CU, and so the L0 // does not need to be bypassed. - if (!ST.isCuModeEnabled()) Changed |= enableGLCBit(MI); + if (!ST.isCuModeEnabled()) + Changed |= enableGLCBit(MI); break; case SIAtomicScope::WAVEFRONT: case SIAtomicScope::SINGLETHREAD: @@ -1428,12 +1429,11 @@ bool SIGfx10CacheControl::enableVolatileAndOrNonTemporal( // instructions. The latter are always marked as volatile so cannot sensibly // handle it as do not want to pessimize all atomics. Also they do not support // the nontemporal attribute. - assert( Op == SIMemOp::LOAD || Op == SIMemOp::STORE); + assert(Op == SIMemOp::LOAD || Op == SIMemOp::STORE); bool Changed = false; if (IsVolatile) { - if (Op == SIMemOp::LOAD) { Changed |= enableGLCBit(MI); Changed |= enableDLCBit(MI); diff --git a/llvm/lib/Target/AMDGPU/SIModeRegister.cpp b/llvm/lib/Target/AMDGPU/SIModeRegister.cpp index 3d659eca47db3..2ba726118619d 100644 --- a/llvm/lib/Target/AMDGPU/SIModeRegister.cpp +++ b/llvm/lib/Target/AMDGPU/SIModeRegister.cpp @@ -373,12 +373,8 @@ void SIModeRegister::processBlockPhase2(MachineBasicBlock &MBB, BlockInfo[ThisBlock]->Exit = TmpStatus; // Add the successors to the work list so we can propagate the changed exit // status. - for (MachineBasicBlock::succ_iterator S = MBB.succ_begin(), - E = MBB.succ_end(); - S != E; S = std::next(S)) { - MachineBasicBlock &B = *(*S); - Phase2List.push(&B); - } + for (MachineBasicBlock *Succ : MBB.successors()) + Phase2List.push(Succ); } BlockInfo[ThisBlock]->ExitSet = ExitSet; if (RevisitRequired) diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp index bff98b304baf5..bfbe84f696f8a 100644 --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -360,10 +360,13 @@ const MCPhysReg *SIRegisterInfo::getCalleeSavedRegs( case CallingConv::C: case CallingConv::Fast: case CallingConv::Cold: - case CallingConv::AMDGPU_Gfx: return MF->getSubtarget().hasGFX90AInsts() ? CSR_AMDGPU_HighRegs_With_AGPRs_SaveList : CSR_AMDGPU_HighRegs_SaveList; + case CallingConv::AMDGPU_Gfx: + return MF->getSubtarget().hasGFX90AInsts() + ? CSR_AMDGPU_SI_Gfx_With_AGPRs_SaveList + : CSR_AMDGPU_SI_Gfx_SaveList; default: { // Dummy to not crash RegisterClassInfo. static const MCPhysReg NoCalleeSavedReg = AMDGPU::NoRegister; @@ -383,10 +386,13 @@ const uint32_t *SIRegisterInfo::getCallPreservedMask(const MachineFunction &MF, case CallingConv::C: case CallingConv::Fast: case CallingConv::Cold: - case CallingConv::AMDGPU_Gfx: return MF.getSubtarget().hasGFX90AInsts() ? CSR_AMDGPU_HighRegs_With_AGPRs_RegMask : CSR_AMDGPU_HighRegs_RegMask; + case CallingConv::AMDGPU_Gfx: + return MF.getSubtarget().hasGFX90AInsts() + ? CSR_AMDGPU_SI_Gfx_With_AGPRs_RegMask + : CSR_AMDGPU_SI_Gfx_RegMask; default: return nullptr; } @@ -1073,7 +1079,7 @@ static unsigned getFlatScratchSpillOpcode(const SIInstrInfo *TII, } void SIRegisterInfo::buildSpillLoadStore( - MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, + MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, unsigned LoadStoreOp, int Index, Register ValueReg, bool IsKill, MCRegister ScratchOffsetReg, int64_t InstOffset, MachineMemOperand *MMO, RegScavenger *RS, LivePhysRegs *LiveRegs) const { @@ -1085,7 +1091,6 @@ void SIRegisterInfo::buildSpillLoadStore( const SIMachineFunctionInfo *FuncInfo = MF->getInfo(); const MCInstrDesc *Desc = &TII->get(LoadStoreOp); - const DebugLoc &DL = MI != MBB.end() ? MI->getDebugLoc() : DebugLoc(); bool IsStore = Desc->mayStore(); bool IsFlat = TII->isFLATScratch(LoadStoreOp); @@ -1349,12 +1354,12 @@ void SIRegisterInfo::buildVGPRSpillLoadStore(SGPRSpillBuilder &SB, int Index, if (IsLoad) { unsigned Opc = ST.enableFlatScratch() ? AMDGPU::SCRATCH_LOAD_DWORD_SADDR : AMDGPU::BUFFER_LOAD_DWORD_OFFSET; - buildSpillLoadStore(*SB.MBB, SB.MI, Opc, Index, SB.TmpVGPR, false, FrameReg, - Offset * SB.EltSize, MMO, SB.RS); + buildSpillLoadStore(*SB.MBB, SB.MI, SB.DL, Opc, Index, SB.TmpVGPR, false, + FrameReg, Offset * SB.EltSize, MMO, SB.RS); } else { unsigned Opc = ST.enableFlatScratch() ? AMDGPU::SCRATCH_STORE_DWORD_SADDR : AMDGPU::BUFFER_STORE_DWORD_OFFSET; - buildSpillLoadStore(*SB.MBB, SB.MI, Opc, Index, SB.TmpVGPR, IsKill, + buildSpillLoadStore(*SB.MBB, SB.MI, SB.DL, Opc, Index, SB.TmpVGPR, IsKill, FrameReg, Offset * SB.EltSize, MMO, SB.RS); // This only ever adds one VGPR spill SB.MFI.addToSpilledVGPRs(1); @@ -1747,7 +1752,7 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI, : AMDGPU::BUFFER_STORE_DWORD_OFFSET; auto *MBB = MI->getParent(); buildSpillLoadStore( - *MBB, MI, Opc, Index, VData->getReg(), VData->isKill(), FrameReg, + *MBB, MI, DL, Opc, Index, VData->getReg(), VData->isKill(), FrameReg, TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm(), *MI->memoperands_begin(), RS); MFI->addToSpilledVGPRs(getNumSubRegsForSpillOp(MI->getOpcode())); @@ -1783,7 +1788,7 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI, : AMDGPU::BUFFER_LOAD_DWORD_OFFSET; auto *MBB = MI->getParent(); buildSpillLoadStore( - *MBB, MI, Opc, Index, VData->getReg(), VData->isKill(), FrameReg, + *MBB, MI, DL, Opc, Index, VData->getReg(), VData->isKill(), FrameReg, TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm(), *MI->memoperands_begin(), RS); MI->eraseFromParent(); diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.h b/llvm/lib/Target/AMDGPU/SIRegisterInfo.h index a4b0a5e13fec1..8d90ddb1cf4cf 100644 --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.h +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.h @@ -371,10 +371,11 @@ class SIRegisterInfo final : public AMDGPUGenRegisterInfo { // For creating spill instructions during frame lowering, where no scavenger // is available, LiveRegs can be used. void buildSpillLoadStore(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, unsigned LoadStoreOp, - int Index, Register ValueReg, bool ValueIsKill, - MCRegister ScratchOffsetReg, int64_t InstrOffset, - MachineMemOperand *MMO, RegScavenger *RS, + MachineBasicBlock::iterator MI, const DebugLoc &DL, + unsigned LoadStoreOp, int Index, Register ValueReg, + bool ValueIsKill, MCRegister ScratchOffsetReg, + int64_t InstrOffset, MachineMemOperand *MMO, + RegScavenger *RS, LivePhysRegs *LiveRegs = nullptr) const; }; diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td index 49dbb895ef2c5..efae899694870 100644 --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td @@ -691,6 +691,14 @@ def CCR_SGPR_64 : RegisterClass<"AMDGPU", SGPR_64.RegTypes, 32, let AllocationPriority = SGPR_64.AllocationPriority; } +// Call clobbered 64-bit SGPRs for AMDGPU_Gfx CC +def Gfx_CCR_SGPR_64 : RegisterClass<"AMDGPU", SGPR_64.RegTypes, 32, + (add (trunc (shl SGPR_64, 15), 1), // s[30:31] + (trunc (shl SGPR_64, 18), 14))> { // s[36:37]-s[s62:63] + let CopyCost = SGPR_64.CopyCost; + let AllocationPriority = SGPR_64.AllocationPriority; +} + def TTMP_64 : RegisterClass<"AMDGPU", [v2i32, i64, f64, v4i16, v4f16], 32, (add TTMP_64Regs)> { let isAllocatable = 0; diff --git a/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp b/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp index 1f6781301bfe1..6f63f686635a5 100644 --- a/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp +++ b/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp @@ -1029,11 +1029,8 @@ void SIWholeQuadMode::lowerBlock(MachineBasicBlock &MBB) { SmallVector SplitPoints; char State = BI.InitialState; - auto II = MBB.getFirstNonPHI(), IE = MBB.end(); - while (II != IE) { - auto Next = std::next(II); - MachineInstr &MI = *II; - + for (MachineInstr &MI : llvm::make_early_inc_range( + llvm::make_range(MBB.getFirstNonPHI(), MBB.end()))) { if (StateTransition.count(&MI)) State = StateTransition[&MI]; @@ -1051,8 +1048,6 @@ void SIWholeQuadMode::lowerBlock(MachineBasicBlock &MBB) { } if (SplitPoint) SplitPoints.push_back(SplitPoint); - - II = Next; } // Perform splitting after instruction scan to simplify iteration. @@ -1498,11 +1493,6 @@ void SIWholeQuadMode::lowerKillInstrs(bool IsWQM) { } bool SIWholeQuadMode::runOnMachineFunction(MachineFunction &MF) { - // This pass is a convenient place to re-enable machine verification after the - // problems caused by SILowerControlFlow have been fixed. - MF.getProperties().reset( - MachineFunctionProperties::Property::FailsVerification); - LLVM_DEBUG(dbgs() << "SI Whole Quad Mode on " << MF.getName() << " ------------- \n"); LLVM_DEBUG(MF.dump();); diff --git a/llvm/lib/Target/AMDGPU/SOPInstructions.td b/llvm/lib/Target/AMDGPU/SOPInstructions.td index 2c2a31898ef88..61ecc13620a11 100644 --- a/llvm/lib/Target/AMDGPU/SOPInstructions.td +++ b/llvm/lib/Target/AMDGPU/SOPInstructions.td @@ -265,6 +265,7 @@ def S_SETPC_B64 : SOP1_1 <"s_setpc_b64">; let isReturn = 1 in { // Define variant marked as return rather than branch. def S_SETPC_B64_return : SOP1_1<"", CCR_SGPR_64, [(AMDGPUret_flag i64:$src0)]>; +def S_SETPC_B64_return_gfx : SOP1_1<"", Gfx_CCR_SGPR_64, [(AMDGPUret_gfx_flag i64:$src0)]>; } } // End isTerminator = 1, isBarrier = 1 diff --git a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp index f19cf84253f00..a94e1012b236c 100644 --- a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -3265,9 +3265,8 @@ bool ARMBaseInstrInfo::optimizeCompareInstr( // live-out. If it is live-out, do not optimize. if (!isSafe) { MachineBasicBlock *MBB = CmpInstr.getParent(); - for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(), - SE = MBB->succ_end(); SI != SE; ++SI) - if ((*SI)->isLiveIn(ARM::CPSR)) + for (MachineBasicBlock *Succ : MBB->successors()) + if (Succ->isLiveIn(ARM::CPSR)) return false; } diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp index e7747e4cf25db..f49d39a4a8386 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -10956,10 +10956,9 @@ void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr &MI, static MachineBasicBlock *OtherSucc(MachineBasicBlock *MBB, MachineBasicBlock *Succ) { - for (MachineBasicBlock::succ_iterator I = MBB->succ_begin(), - E = MBB->succ_end(); I != E; ++I) - if (*I != Succ) - return *I; + for (MachineBasicBlock *S : MBB->successors()) + if (S != Succ) + return S; llvm_unreachable("Expecting a BB with two successors!"); } @@ -11457,13 +11456,9 @@ static bool checkAndUpdateCPSRKill(MachineBasicBlock::iterator SelectItr, // If we hit the end of the block, check whether CPSR is live into a // successor. if (miI == BB->end()) { - for (MachineBasicBlock::succ_iterator sItr = BB->succ_begin(), - sEnd = BB->succ_end(); - sItr != sEnd; ++sItr) { - MachineBasicBlock* succ = *sItr; - if (succ->isLiveIn(ARM::CPSR)) + for (MachineBasicBlock *Succ : BB->successors()) + if (Succ->isLiveIn(ARM::CPSR)) return false; - } } // We found a def, or hit the end of the basic block and CPSR wasn't live diff --git a/llvm/lib/Target/ARM/ARMTargetMachine.cpp b/llvm/lib/Target/ARM/ARMTargetMachine.cpp index 35c09f8f50aaf..833c7effd31cd 100644 --- a/llvm/lib/Target/ARM/ARMTargetMachine.cpp +++ b/llvm/lib/Target/ARM/ARMTargetMachine.cpp @@ -541,7 +541,6 @@ void ARMPassConfig::addPreSched2() { return !MF.getSubtarget().isThumb1Only(); })); } - addPass(createMVEVPTBlockPass()); addPass(createThumb2ITBlockPass()); // Add both scheduling passes to give the subtarget an opportunity to pick @@ -551,6 +550,7 @@ void ARMPassConfig::addPreSched2() { addPass(&PostRASchedulerID); } + addPass(createMVEVPTBlockPass()); addPass(createARMIndirectThunks()); addPass(createARMSLSHardeningPass()); } diff --git a/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp index 3aa9491001cf1..64d2e1bfa9b23 100644 --- a/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -10961,7 +10961,7 @@ bool ARMAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, // Only after the instruction is fully processed, we can validate it if (wasInITBlock && hasV8Ops() && isThumb() && - !isV8EligibleForIT(&Inst)) { + !isV8EligibleForIT(&Inst) && !getTargetOptions().MCNoDeprecatedWarn) { Warning(IDLoc, "deprecated instruction in IT block"); } } diff --git a/llvm/lib/Target/ARM/MVEGatherScatterLowering.cpp b/llvm/lib/Target/ARM/MVEGatherScatterLowering.cpp index 4981b8051657a..a911344b6b1b1 100644 --- a/llvm/lib/Target/ARM/MVEGatherScatterLowering.cpp +++ b/llvm/lib/Target/ARM/MVEGatherScatterLowering.cpp @@ -76,6 +76,7 @@ class MVEGatherScatterLowering : public FunctionPass { private: LoopInfo *LI = nullptr; + const DataLayout *DL; // Check this is a valid gather with correct alignment bool isLegalTypeAndAlignment(unsigned NumElements, unsigned ElemSize, @@ -149,10 +150,10 @@ class MVEGatherScatterLowering : public FunctionPass { bool optimiseOffsets(Value *Offsets, BasicBlock *BB, LoopInfo *LI); // Pushes the given add out of the loop void pushOutAdd(PHINode *&Phi, Value *OffsSecondOperand, unsigned StartIndex); - // Pushes the given mul out of the loop - void pushOutMul(PHINode *&Phi, Value *IncrementPerRound, - Value *OffsSecondOperand, unsigned LoopIncrement, - IRBuilder<> &Builder); + // Pushes the given mul or shl out of the loop + void pushOutMulShl(unsigned Opc, PHINode *&Phi, Value *IncrementPerRound, + Value *OffsSecondOperand, unsigned LoopIncrement, + IRBuilder<> &Builder); }; } // end anonymous namespace @@ -335,14 +336,15 @@ int MVEGatherScatterLowering::computeScale(unsigned GEPElemSize, Optional MVEGatherScatterLowering::getIfConst(const Value *V) { const Constant *C = dyn_cast(V); - if (C != nullptr) + if (C && C->getSplatValue()) return Optional{C->getUniqueInteger().getSExtValue()}; if (!isa(V)) return Optional{}; const Instruction *I = cast(V); - if (I->getOpcode() == Instruction::Add || - I->getOpcode() == Instruction::Mul) { + if (I->getOpcode() == Instruction::Add || I->getOpcode() == Instruction::Or || + I->getOpcode() == Instruction::Mul || + I->getOpcode() == Instruction::Shl) { Optional Op0 = getIfConst(I->getOperand(0)); Optional Op1 = getIfConst(I->getOperand(1)); if (!Op0 || !Op1) @@ -351,18 +353,30 @@ Optional MVEGatherScatterLowering::getIfConst(const Value *V) { return Optional{Op0.getValue() + Op1.getValue()}; if (I->getOpcode() == Instruction::Mul) return Optional{Op0.getValue() * Op1.getValue()}; + if (I->getOpcode() == Instruction::Shl) + return Optional{Op0.getValue() << Op1.getValue()}; + if (I->getOpcode() == Instruction::Or) + return Optional{Op0.getValue() | Op1.getValue()}; } return Optional{}; } +// Return true if I is an Or instruction that is equivalent to an add, due to +// the operands having no common bits set. +static bool isAddLikeOr(Instruction *I, const DataLayout &DL) { + return I->getOpcode() == Instruction::Or && + haveNoCommonBitsSet(I->getOperand(0), I->getOperand(1), DL); +} + std::pair MVEGatherScatterLowering::getVarAndConst(Value *Inst, int TypeScale) { std::pair ReturnFalse = std::pair(nullptr, 0); - // At this point, the instruction we're looking at must be an add or we - // bail out + // At this point, the instruction we're looking at must be an add or an + // add-like-or. Instruction *Add = dyn_cast(Inst); - if (Add == nullptr || Add->getOpcode() != Instruction::Add) + if (Add == nullptr || + (Add->getOpcode() != Instruction::Add && isAddLikeOr(Add, *DL))) return ReturnFalse; Value *Summand; @@ -737,10 +751,9 @@ Instruction *MVEGatherScatterLowering::tryCreateIncrementingGatScat( // The gep was in charge of making sure the offsets are scaled correctly // - calculate that factor so it can be applied by hand - DataLayout DT = I->getParent()->getParent()->getParent()->getDataLayout(); int TypeScale = - computeScale(DT.getTypeSizeInBits(GEP->getOperand(0)->getType()), - DT.getTypeSizeInBits(GEP->getType()) / + computeScale(DL->getTypeSizeInBits(GEP->getOperand(0)->getType()), + DL->getTypeSizeInBits(GEP->getType()) / cast(GEP->getType())->getNumElements()); if (TypeScale == -1) return nullptr; @@ -888,11 +901,11 @@ void MVEGatherScatterLowering::pushOutAdd(PHINode *&Phi, Phi->removeIncomingValue(StartIndex); } -void MVEGatherScatterLowering::pushOutMul(PHINode *&Phi, - Value *IncrementPerRound, - Value *OffsSecondOperand, - unsigned LoopIncrement, - IRBuilder<> &Builder) { +void MVEGatherScatterLowering::pushOutMulShl(unsigned Opcode, PHINode *&Phi, + Value *IncrementPerRound, + Value *OffsSecondOperand, + unsigned LoopIncrement, + IRBuilder<> &Builder) { LLVM_DEBUG(dbgs() << "masked gathers/scatters: optimising mul instruction\n"); // Create a new scalar add outside of the loop and transform it to a splat @@ -901,12 +914,13 @@ void MVEGatherScatterLowering::pushOutMul(PHINode *&Phi, Phi->getIncomingBlock(LoopIncrement == 1 ? 0 : 1)->back()); // Create a new index - Value *StartIndex = BinaryOperator::Create( - Instruction::Mul, Phi->getIncomingValue(LoopIncrement == 1 ? 0 : 1), - OffsSecondOperand, "PushedOutMul", InsertionPoint); + Value *StartIndex = + BinaryOperator::Create((Instruction::BinaryOps)Opcode, + Phi->getIncomingValue(LoopIncrement == 1 ? 0 : 1), + OffsSecondOperand, "PushedOutMul", InsertionPoint); Instruction *Product = - BinaryOperator::Create(Instruction::Mul, IncrementPerRound, + BinaryOperator::Create((Instruction::BinaryOps)Opcode, IncrementPerRound, OffsSecondOperand, "Product", InsertionPoint); // Increment NewIndex by Product instead of the multiplication Instruction *NewIncrement = BinaryOperator::Create( @@ -923,7 +937,7 @@ void MVEGatherScatterLowering::pushOutMul(PHINode *&Phi, // Check whether all usages of this instruction are as offsets of // gathers/scatters or simple arithmetics only used by gathers/scatters -static bool hasAllGatScatUsers(Instruction *I) { +static bool hasAllGatScatUsers(Instruction *I, const DataLayout &DL) { if (I->hasNUses(0)) { return false; } @@ -936,8 +950,10 @@ static bool hasAllGatScatUsers(Instruction *I) { return Gatscat; } else { unsigned OpCode = cast(U)->getOpcode(); - if ((OpCode == Instruction::Add || OpCode == Instruction::Mul) && - hasAllGatScatUsers(cast(U))) { + if ((OpCode == Instruction::Add || OpCode == Instruction::Mul || + OpCode == Instruction::Shl || + isAddLikeOr(cast(U), DL)) && + hasAllGatScatUsers(cast(U), DL)) { continue; } return false; @@ -955,14 +971,15 @@ bool MVEGatherScatterLowering::optimiseOffsets(Value *Offsets, BasicBlock *BB, if (!isa(Offsets)) return false; Instruction *Offs = cast(Offsets); - if (Offs->getOpcode() != Instruction::Add && - Offs->getOpcode() != Instruction::Mul) + if (Offs->getOpcode() != Instruction::Add && !isAddLikeOr(Offs, *DL) && + Offs->getOpcode() != Instruction::Mul && + Offs->getOpcode() != Instruction::Shl) return false; Loop *L = LI->getLoopFor(BB); if (L == nullptr) return false; if (!Offs->hasOneUse()) { - if (!hasAllGatScatUsers(Offs)) + if (!hasAllGatScatUsers(Offs, *DL)) return false; } @@ -1060,11 +1077,13 @@ bool MVEGatherScatterLowering::optimiseOffsets(Value *Offsets, BasicBlock *BB, switch (Offs->getOpcode()) { case Instruction::Add: + case Instruction::Or: pushOutAdd(NewPhi, OffsSecondOperand, IncrementingBlock == 1 ? 0 : 1); break; case Instruction::Mul: - pushOutMul(NewPhi, IncrementPerRound, OffsSecondOperand, IncrementingBlock, - Builder); + case Instruction::Shl: + pushOutMulShl(Offs->getOpcode(), NewPhi, IncrementPerRound, + OffsSecondOperand, IncrementingBlock, Builder); break; default: return false; @@ -1214,6 +1233,7 @@ bool MVEGatherScatterLowering::runOnFunction(Function &F) { if (!ST->hasMVEIntegerOps()) return false; LI = &getAnalysis().getLoopInfo(); + DL = &F.getParent()->getDataLayout(); SmallVector Gathers; SmallVector Scatters; diff --git a/llvm/lib/Target/AVR/AVRFrameLowering.cpp b/llvm/lib/Target/AVR/AVRFrameLowering.cpp index a4bde546442b3..672611ea22347 100644 --- a/llvm/lib/Target/AVR/AVRFrameLowering.cpp +++ b/llvm/lib/Target/AVR/AVRFrameLowering.cpp @@ -303,16 +303,16 @@ static void fixStackStores(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const TargetInstrInfo &TII, Register FP) { // Iterate through the BB until we hit a call instruction or we reach the end. - for (auto I = MI, E = MBB.end(); I != E && !I->isCall();) { - MachineBasicBlock::iterator NextMI = std::next(I); - MachineInstr &MI = *I; - unsigned Opcode = I->getOpcode(); + for (MachineInstr &MI : + llvm::make_early_inc_range(llvm::make_range(MI, MBB.end()))) { + if (MI.isCall()) + break; + + unsigned Opcode = MI.getOpcode(); // Only care of pseudo store instructions where SP is the base pointer. - if (Opcode != AVR::STDSPQRr && Opcode != AVR::STDWSPQRr) { - I = NextMI; + if (Opcode != AVR::STDSPQRr && Opcode != AVR::STDWSPQRr) continue; - } assert(MI.getOperand(0).getReg() == AVR::SP && "Invalid register, should be SP!"); @@ -324,8 +324,6 @@ static void fixStackStores(MachineBasicBlock &MBB, MI.setDesc(TII.get(STOpc)); MI.getOperand(0).setReg(FP); - - I = NextMI; } } diff --git a/llvm/lib/Target/BPF/BPFAdjustOpt.cpp b/llvm/lib/Target/BPF/BPFAdjustOpt.cpp index 7088d55e1a714..64bedf4da1ddc 100644 --- a/llvm/lib/Target/BPF/BPFAdjustOpt.cpp +++ b/llvm/lib/Target/BPF/BPFAdjustOpt.cpp @@ -15,6 +15,7 @@ #include "BPFTargetMachine.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicsBPF.h" #include "llvm/IR/Module.h" #include "llvm/IR/PatternMatch.h" #include "llvm/IR/Type.h" @@ -66,6 +67,7 @@ class BPFAdjustOptImpl { Module *M; SmallVector PassThroughs; + bool adjustICmpToBuiltin(); void adjustBasicBlock(BasicBlock &BB); bool serializeICMPCrossBB(BasicBlock &BB); void adjustInst(Instruction &I); @@ -85,14 +87,72 @@ ModulePass *llvm::createBPFAdjustOpt() { return new BPFAdjustOpt(); } bool BPFAdjustOpt::runOnModule(Module &M) { return BPFAdjustOptImpl(&M).run(); } bool BPFAdjustOptImpl::run() { + bool Changed = adjustICmpToBuiltin(); + for (Function &F : *M) for (auto &BB : F) { adjustBasicBlock(BB); for (auto &I : BB) adjustInst(I); } + return insertPassThrough() || Changed; +} + +// Commit acabad9ff6bf ("[InstCombine] try to canonicalize icmp with +// trunc op into mask and cmp") added a transformation to +// convert "(conv)a < power_2_const" to "a & " in certain +// cases and bpf kernel verifier has to handle the resulted code +// conservatively and this may reject otherwise legitimate program. +// Here, we change related icmp code to a builtin which will +// be restored to original icmp code later to prevent that +// InstCombine transformatin. +bool BPFAdjustOptImpl::adjustICmpToBuiltin() { + bool Changed = false; + ICmpInst *ToBeDeleted = nullptr; + for (Function &F : *M) + for (auto &BB : F) + for (auto &I : BB) { + if (ToBeDeleted) { + ToBeDeleted->eraseFromParent(); + ToBeDeleted = nullptr; + } + + auto *Icmp = dyn_cast(&I); + if (!Icmp) + continue; + + Value *Op0 = Icmp->getOperand(0); + if (!isa(Op0)) + continue; + + auto ConstOp1 = dyn_cast(Icmp->getOperand(1)); + if (!ConstOp1) + continue; + + auto ConstOp1Val = ConstOp1->getValue().getZExtValue(); + auto Op = Icmp->getPredicate(); + if (Op == ICmpInst::ICMP_ULT) { + if ((ConstOp1Val - 1) & ConstOp1Val) + continue; + } else if (Op == ICmpInst::ICMP_ULE) { + if (ConstOp1Val & (ConstOp1Val + 1)) + continue; + } else { + continue; + } + + Constant *Opcode = + ConstantInt::get(Type::getInt32Ty(BB.getContext()), Op); + Function *Fn = Intrinsic::getDeclaration( + M, Intrinsic::bpf_compare, {Op0->getType(), ConstOp1->getType()}); + auto *NewInst = CallInst::Create(Fn, {Opcode, Op0, ConstOp1}); + BB.getInstList().insert(I.getIterator(), NewInst); + Icmp->replaceAllUsesWith(NewInst); + Changed = true; + ToBeDeleted = Icmp; + } - return insertPassThrough(); + return Changed; } bool BPFAdjustOptImpl::insertPassThrough() { diff --git a/llvm/lib/Target/BPF/BPFCheckAndAdjustIR.cpp b/llvm/lib/Target/BPF/BPFCheckAndAdjustIR.cpp index 5239218ad0035..cf1bc3f7c5bc0 100644 --- a/llvm/lib/Target/BPF/BPFCheckAndAdjustIR.cpp +++ b/llvm/lib/Target/BPF/BPFCheckAndAdjustIR.cpp @@ -46,6 +46,7 @@ class BPFCheckAndAdjustIR final : public ModulePass { void checkIR(Module &M); bool adjustIR(Module &M); bool removePassThroughBuiltin(Module &M); + bool removeCompareBuiltin(Module &M); }; } // End anonymous namespace @@ -120,8 +121,50 @@ bool BPFCheckAndAdjustIR::removePassThroughBuiltin(Module &M) { return Changed; } +bool BPFCheckAndAdjustIR::removeCompareBuiltin(Module &M) { + // Remove __builtin_bpf_compare()'s which are used to prevent + // certain IR optimizations. Now major IR optimizations are done, + // remove them. + bool Changed = false; + CallInst *ToBeDeleted = nullptr; + for (Function &F : M) + for (auto &BB : F) + for (auto &I : BB) { + if (ToBeDeleted) { + ToBeDeleted->eraseFromParent(); + ToBeDeleted = nullptr; + } + + auto *Call = dyn_cast(&I); + if (!Call) + continue; + auto *GV = dyn_cast(Call->getCalledOperand()); + if (!GV) + continue; + if (!GV->getName().startswith("llvm.bpf.compare")) + continue; + + Changed = true; + Value *Arg0 = Call->getArgOperand(0); + Value *Arg1 = Call->getArgOperand(1); + Value *Arg2 = Call->getArgOperand(2); + + auto OpVal = cast(Arg0)->getValue().getZExtValue(); + CmpInst::Predicate Opcode = (CmpInst::Predicate)OpVal; + + auto *ICmp = new ICmpInst(Opcode, Arg1, Arg2); + BB.getInstList().insert(Call->getIterator(), ICmp); + + Call->replaceAllUsesWith(ICmp); + ToBeDeleted = Call; + } + return Changed; +} + bool BPFCheckAndAdjustIR::adjustIR(Module &M) { - return removePassThroughBuiltin(M); + bool Changed = removePassThroughBuiltin(M); + Changed = removeCompareBuiltin(M) || Changed; + return Changed; } bool BPFCheckAndAdjustIR::runOnModule(Module &M) { diff --git a/llvm/lib/Target/BPF/BPFMISimplifyPatchable.cpp b/llvm/lib/Target/BPF/BPFMISimplifyPatchable.cpp index ae1f5ea21c127..7e829ea43e89f 100644 --- a/llvm/lib/Target/BPF/BPFMISimplifyPatchable.cpp +++ b/llvm/lib/Target/BPF/BPFMISimplifyPatchable.cpp @@ -97,15 +97,13 @@ void BPFMISimplifyPatchable::checkADDrr(MachineRegisterInfo *MRI, // Go through all uses of %1 as in %1 = ADD_rr %2, %3 const MachineOperand Op0 = Inst->getOperand(0); - auto Begin = MRI->use_begin(Op0.getReg()), End = MRI->use_end(); - decltype(End) NextI; - for (auto I = Begin; I != End; I = NextI) { - NextI = std::next(I); + for (MachineOperand &MO : + llvm::make_early_inc_range(MRI->use_operands(Op0.getReg()))) { // The candidate needs to have a unique definition. - if (!MRI->getUniqueVRegDef(I->getReg())) + if (!MRI->getUniqueVRegDef(MO.getReg())) continue; - MachineInstr *DefInst = I->getParent(); + MachineInstr *DefInst = MO.getParent(); unsigned Opcode = DefInst->getOpcode(); unsigned COREOp; if (Opcode == BPF::LDB || Opcode == BPF::LDH || Opcode == BPF::LDW || @@ -131,7 +129,7 @@ void BPFMISimplifyPatchable::checkADDrr(MachineRegisterInfo *MRI, Opcode == BPF::STD || Opcode == BPF::STB32 || Opcode == BPF::STH32 || Opcode == BPF::STW32) { const MachineOperand &Opnd = DefInst->getOperand(0); - if (Opnd.isReg() && Opnd.getReg() == I->getReg()) + if (Opnd.isReg() && Opnd.getReg() == MO.getReg()) continue; } diff --git a/llvm/lib/Target/BPF/BTF.def b/llvm/lib/Target/BPF/BTF.def index aeb81c6fecaf5..0ae4194bc512e 100644 --- a/llvm/lib/Target/BPF/BTF.def +++ b/llvm/lib/Target/BPF/BTF.def @@ -32,5 +32,6 @@ HANDLE_BTF_KIND(14, VAR) HANDLE_BTF_KIND(15, DATASEC) HANDLE_BTF_KIND(16, FLOAT) HANDLE_BTF_KIND(17, DECL_TAG) +HANDLE_BTF_KIND(18, TYPE_TAG) #undef HANDLE_BTF_KIND diff --git a/llvm/lib/Target/BPF/BTF.h b/llvm/lib/Target/BPF/BTF.h index 95a64609ae389..e54b97cd49a98 100644 --- a/llvm/lib/Target/BPF/BTF.h +++ b/llvm/lib/Target/BPF/BTF.h @@ -113,7 +113,7 @@ struct CommonType { /// "Size" tells the size of the type it is describing. /// /// "Type" is used by PTR, TYPEDEF, VOLATILE, CONST, RESTRICT, - /// FUNC, FUNC_PROTO, VAR and DECL_TAG. + /// FUNC, FUNC_PROTO, VAR, DECL_TAG and TYPE_TAG. /// "Type" is a type_id referring to another type. union { uint32_t Size; diff --git a/llvm/lib/Target/BPF/BTFDebug.cpp b/llvm/lib/Target/BPF/BTFDebug.cpp index bbe8eb1071aeb..401bdfa47659e 100644 --- a/llvm/lib/Target/BPF/BTFDebug.cpp +++ b/llvm/lib/Target/BPF/BTFDebug.cpp @@ -408,6 +408,19 @@ void BTFTypeDeclTag::emitType(MCStreamer &OS) { OS.emitInt32(Info); } +BTFTypeTypeTag::BTFTypeTypeTag(uint32_t BaseTypeId, StringRef Tag) : Tag(Tag) { + Kind = BTF::BTF_KIND_TYPE_TAG; + BTFType.Info = Kind << 24; + BTFType.Type = BaseTypeId; +} + +void BTFTypeTypeTag::completeType(BTFDebug &BDebug) { + if (IsCompleted) + return; + IsCompleted = true; + BTFType.NameOff = BDebug.addString(Tag); +} + uint32_t BTFStringTable::addString(StringRef S) { // Check whether the string already exists. for (auto &OffsetM : OffsetToIdMap) { @@ -658,9 +671,41 @@ void BTFDebug::visitDerivedType(const DIDerivedType *DTy, uint32_t &TypeId, } } - if (Tag == dwarf::DW_TAG_pointer_type || Tag == dwarf::DW_TAG_typedef || - Tag == dwarf::DW_TAG_const_type || Tag == dwarf::DW_TAG_volatile_type || - Tag == dwarf::DW_TAG_restrict_type) { + if (Tag == dwarf::DW_TAG_pointer_type) { + SmallVector MDStrs; + DINodeArray Annots = DTy->getAnnotations(); + if (Annots) { + for (const Metadata *Annotations : Annots->operands()) { + const MDNode *MD = cast(Annotations); + const MDString *Name = cast(MD->getOperand(0)); + if (!Name->getString().equals("btf_type_tag")) + continue; + MDStrs.push_back(cast(MD->getOperand(1))); + } + } + + if (MDStrs.size() > 0) { + auto TypeEntry = std::make_unique(DTy, Tag, false); + unsigned TmpTypeId = addType(std::move(TypeEntry)); + for (unsigned I = MDStrs.size(); I > 0; I--) { + const MDString *Value = MDStrs[I - 1]; + if (I != 1) { + auto TypeEntry = + std::make_unique(TmpTypeId, Value->getString()); + TmpTypeId = addType(std::move(TypeEntry)); + } else { + auto TypeEntry = + std::make_unique(TmpTypeId, Value->getString()); + TypeId = addType(std::move(TypeEntry), DTy); + } + } + } else { + auto TypeEntry = std::make_unique(DTy, Tag, false); + TypeId = addType(std::move(TypeEntry), DTy); + } + } else if (Tag == dwarf::DW_TAG_typedef || Tag == dwarf::DW_TAG_const_type || + Tag == dwarf::DW_TAG_volatile_type || + Tag == dwarf::DW_TAG_restrict_type) { auto TypeEntry = std::make_unique(DTy, Tag, false); TypeId = addType(std::move(TypeEntry), DTy); if (Tag == dwarf::DW_TAG_typedef) diff --git a/llvm/lib/Target/BPF/BTFDebug.h b/llvm/lib/Target/BPF/BTFDebug.h index 9e0203f292b26..69b53f20ab717 100644 --- a/llvm/lib/Target/BPF/BTFDebug.h +++ b/llvm/lib/Target/BPF/BTFDebug.h @@ -216,6 +216,14 @@ class BTFTypeDeclTag : public BTFTypeBase { void emitType(MCStreamer &OS) override; }; +class BTFTypeTypeTag : public BTFTypeBase { + StringRef Tag; + +public: + BTFTypeTypeTag(uint32_t BaseTypeId, StringRef Tag); + void completeType(BTFDebug &BDebug) override; +}; + /// String table. class BTFStringTable { /// String table size in bytes. diff --git a/llvm/lib/Target/CSKY/AsmParser/CSKYAsmParser.cpp b/llvm/lib/Target/CSKY/AsmParser/CSKYAsmParser.cpp index df85c8cdb96da..ebc04b40d428d 100644 --- a/llvm/lib/Target/CSKY/AsmParser/CSKYAsmParser.cpp +++ b/llvm/lib/Target/CSKY/AsmParser/CSKYAsmParser.cpp @@ -755,6 +755,10 @@ bool CSKYAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, SMLoc ErrorLoc = ((CSKYOperand &)*Operands[ErrorInfo]).getStartLoc(); return Error(ErrorLoc, "register is out of range"); } + case Match_InvalidSPOperand: { + SMLoc ErrorLoc = ((CSKYOperand &)*Operands[ErrorInfo]).getStartLoc(); + return Error(ErrorLoc, "operand must be sp register"); + } case Match_RequiresSameSrcAndDst: { SMLoc ErrorLoc = ((CSKYOperand &)*Operands[ErrorInfo]).getStartLoc(); return Error(ErrorLoc, "src and dst operand must be same"); diff --git a/llvm/lib/Target/CSKY/CSKY.td b/llvm/lib/Target/CSKY/CSKY.td index 499ff4fa7b542..e26781ca6aa1d 100644 --- a/llvm/lib/Target/CSKY/CSKY.td +++ b/llvm/lib/Target/CSKY/CSKY.td @@ -12,6 +12,12 @@ include "llvm/Target/Target.td" // CSKY subtarget features and instruction predicates. //===----------------------------------------------------------------------===// +def FeatureBTST16 : SubtargetFeature<"btst16", "HasBTST16", "true", + "Use the 16-bit btsti instruction">; +def HasBTST16 : Predicate<"Subtarget->hasBTST16()">, + AssemblerPredicate<(all_of FeatureBTST16), + "Use the 16-bit btsti instruction">; + // Atomic Support def FeatureExtendLrw : SubtargetFeature<"elrw", "HasExtendLrw", "true", "Use the extend LRW instruction">; @@ -19,6 +25,12 @@ def HasExtendLrw : Predicate<"Subtarget->hasExtendLrw()">, AssemblerPredicate<(all_of FeatureExtendLrw), "Use the extend LRW instruction">; +def FeatureJAVA + : SubtargetFeature<"java", "HasJAVA", "true", "Enable java instructions">; +def HasJAVA : Predicate<"Subtarget->hasJAVA()">, + AssemblerPredicate<(all_of FeatureJAVA), + "Enable java instructions">; + def FeatureDoloop : SubtargetFeature<"doloop", "HasDoloop", "true", "Enable doloop instructions">; def HasDoloop : Predicate<"Subtarget->hasDoloop()">, diff --git a/llvm/lib/Target/CSKY/CSKYInstrFormats16Instr.td b/llvm/lib/Target/CSKY/CSKYInstrFormats16Instr.td new file mode 100644 index 0000000000000..6d42bddcdd782 --- /dev/null +++ b/llvm/lib/Target/CSKY/CSKYInstrFormats16Instr.td @@ -0,0 +1,219 @@ +//===- CSKYInstrFormats16Instr.td - 16-bit Instr. Formats -*- tablegen --*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +class J16 sop, string opstr, dag ins> + : CSKY16Inst { + bits<10> offset; + let Inst{15} = 0; + let Inst{14 - 10} = sop; + let Inst{9 - 0} = offset; +} + +class J16_B sop, string opstr> + : CSKY16Inst { + bits<10> offset; + let Inst{15} = 0; + let Inst{14 - 10} = sop; + let Inst{9 - 0} = offset; +} + +class R16_XYZ sop, string opstr, SDNode opnode> : CSKY16Inst { + bits<3> rz; + bits<3> rx; + bits<3> ry; + let Inst{15 - 11} = 0b01011; + let Inst{10 - 8} = rx; + let Inst{7 - 5} = rz; + let Inst{4 - 2} = ry; + let Inst{1, 0} = sop; +} + +class R16_XZ_BINOP op, bits<2> sop, string opstr, PatFrag opnode> : CSKY16Inst< + AddrModeNone, (outs sGPR:$rz), (ins sGPR:$rZ, sGPR:$rx), !strconcat(opstr, "\t$rz, $rx"), + [(set sGPR:$rz, (opnode sGPR:$rZ, sGPR:$rx))]> { + bits<4> rz; + bits<4> rx; + let Inst{15, 14} = 0b01; + let Inst{13 - 10} = op; + let Inst{9 - 6} = rz; + let Inst{5 - 2} = rx; + let Inst{1, 0} = sop; + let Constraints = "$rz = $rZ"; +} + +class R16_XZ_BINOP_NOPat op, bits<2> sop, string opstr> : CSKY16Inst< + AddrModeNone, (outs sGPR:$rz), (ins sGPR:$rZ, sGPR:$rx), !strconcat(opstr, "\t$rz, $rx"), + []> { + bits<4> rz; + bits<4> rx; + let Inst{15, 14} = 0b01; + let Inst{13 - 10} = op; + let Inst{9 - 6} = rz; + let Inst{5 - 2} = rx; + let Inst{1, 0} = sop; + let Constraints = "$rz = $rZ"; +} + +class R16_XZ_BINOP_C op, bits<2> sop, string opstr> : CSKY16Inst< + AddrModeNone, (outs sGPR:$rz, CARRY:$cout), + (ins sGPR:$rZ, sGPR:$rx, CARRY:$cin), !strconcat(opstr, "\t$rz, $rx"), []> { + bits<4> rz; + bits<4> rx; + let Inst{15, 14} = 0b01; + let Inst{13 - 10} = op; + let Inst{9 - 6} = rz; + let Inst{5 - 2} = rx; + let Inst{1, 0} = sop; + let Constraints = "$rz = $rZ"; +} + +class R16_XZ_UNOP op, bits<2> sop, string opstr> : CSKY16Inst< + AddrModeNone, (outs sGPR:$rz), (ins sGPR:$rx), !strconcat(opstr, "\t$rz, $rx"), + []> { + bits<4> rz; + bits<4> rx; + let Inst{15, 14} = 0b01; + let Inst{13 - 10} = op; + let Inst{9 - 6} = rz; + let Inst{5 - 2} = rx; + let Inst{1, 0} = sop; +} + +class R16_XY_CMP sop, string opstr> : CSKY16Inst< + AddrModeNone, (outs CARRY:$ca), (ins sGPR:$rx, sGPR:$ry), !strconcat(opstr, "\t$rx, $ry"), + []> { + bits<4> ry; + bits<4> rx; + let Inst{15, 14} = 0b01; + let Inst{13 - 10} = 0b1001; + let Inst{9 - 6} = ry; + let Inst{5 - 2} = rx; + let Inst{1, 0} = sop; + let isCompare = 1; +} + +class R16_X_J op_rz, bits<2> sop, string opstr> : CSKY16Inst< + AddrModeNone, (outs), (ins sGPR:$rx), !strconcat(opstr, "\t$rx"), []> { + bits<4> rx; + let Inst{15, 14} = 0b01; + let Inst{13 - 6} = op_rz; + let Inst{5 - 2} = rx; + let Inst{1, 0} = sop; +} + +class I16_Z_8 op, dag ins, string asmstr> + : CSKY16Inst { + bits<3> rz; + bits<8> imm8; + let Inst{15, 14} = 0b00; + let Inst{13 - 11} = op; + let Inst{10 - 8} = rz; + let Inst{7 - 0} = imm8; +} + +class I16_Z_5 sop, dag outs, dag ins,string opstr> + : CSKY16Inst { + bits<3> rz; + bits<5> imm5; + let Inst{15, 14} = 0b00; + let Inst{13 - 11} = 0b111; + let Inst{10 - 8} = rz; + let Inst{7 - 5} = sop; + let Inst{4 - 0} = imm5; +} + +class I16_X_CMP sop, string opstr, Operand Immoperand> : CSKY16Inst< + AddrModeNone, (outs CARRY:$ca), (ins mGPR:$rx, Immoperand:$imm5), + !strconcat(opstr, "\t$rx, $imm5"), []> { + bits<3> rx; + bits<5> imm5; + let Inst{15, 14} = 0b00; + let Inst{13 - 11} = 0b111; + let Inst{10 - 8} = rx; + let Inst{7 - 5} = sop; + let Inst{4 - 0} = imm5; + let isCompare = 1; +} + +class I16_SP_IMM7 sop, string opstr> : CSKY16Inst< + AddrModeNone, (outs SPOp:$sp2), (ins SPOp:$sp1, uimm7_2:$imm7), + !strconcat(opstr, "\t$sp2, $sp1, $imm7"), []> { + bits<7> imm7; + let Inst{15, 14} = 0b00; + let Inst{13 - 10} = 0b0101; + let Inst{9, 8} = imm7{6,5}; + let Inst{7 - 5} = sop; + let Inst{4 - 0} = imm7{4 - 0}; +} + +class I16_XZ_IMM5 sop, string opstr, SDNode opnode> : CSKY16Inst< + AddrModeNone, (outs mGPR:$rz), (ins mGPR:$rx, uimm5:$imm5), + !strconcat(opstr, "\t$rz, $rx, $imm5"), [(set mGPR:$rz, (opnode mGPR:$rx, uimm5:$imm5))]> { + bits<3> rx; + bits<3> rz; + bits<5> imm5; + let Inst{15, 14} = 0b01; + let Inst{13 - 11} = sop; + let Inst{10 - 8} = rx; + let Inst{7 - 5} = rz; + let Inst{4 - 0} = imm5; +} + +class I16_XZ_LDST sop, string opstr, dag outs, dag ins> + : CSKY16Inst { + bits<3> rx; + bits<3> rz; + bits<5> imm; + let Inst{15, 14} = 0b10; + let Inst{13 - 11} = sop; + let Inst{10 - 8} = rx; + let Inst{7 - 5} = rz; + let Inst{4 - 0} = imm; +} + +class I16_ZSP_LDST sop, string opstr, dag outs, dag ins> : CSKY16Inst< + am, outs, ins, !strconcat(opstr, "\t$rz, ($sp, ${addr})"), + []> { + bits<3> rz; + bits<8> addr; + let Inst{15, 14} = 0b10; + let Inst{13 - 11} = sop; + let Inst{10 - 8} = addr{7 - 5}; + let Inst{7 - 5} = rz; + let Inst{4 - 0} = addr{4 - 0}; +} + +class I16_XZ_IMM3 sop, string opstr, SDNode opnode> : CSKY16Inst< + AddrModeNone, (outs mGPR:$rz), (ins mGPR:$rx, oimm3:$oimm3), + !strconcat(opstr, "\t$rz, $rx, $oimm3"), [(set mGPR:$rz, (opnode mGPR:$rx, oimm3:$oimm3))]> { + bits<3> rx; + bits<3> rz; + bits<3> oimm3; + let Inst{15, 14} = 0b01; + let Inst{13 - 11} = 0b011; + let Inst{10 - 8} = rx; + let Inst{7 - 5} = rz; + let Inst{4 - 2} = oimm3; + let Inst{1, 0} = sop; +} + +class I16_BPushPop op, bits<2> uop, dag out, dag ins, string opstr> : + CSKY16Inst{ + bits<3> rz; + let Inst{15- 5} = op; + let Inst{4 -2} = rz; + let Inst{1,0} = uop; + let Predicates = [HasJAVA]; + let hasSideEffects = 1; +} diff --git a/llvm/lib/Target/CSKY/CSKYInstrInfo.td b/llvm/lib/Target/CSKY/CSKYInstrInfo.td index 628d5b57c7e66..9dda3159e446e 100644 --- a/llvm/lib/Target/CSKY/CSKYInstrInfo.td +++ b/llvm/lib/Target/CSKY/CSKYInstrInfo.td @@ -947,3 +947,5 @@ def PseudoTLSLA32 : CSKYPseudo<(outs GPR:$dst1, GPR:$dst2), let hasSideEffects = 0, isNotDuplicable = 1 in def CONSTPOOL_ENTRY : CSKYPseudo<(outs), (ins i32imm:$instid, i32imm:$cpidx, i32imm:$size), "", []>; + +include "CSKYInstrInfo16Instr.td" diff --git a/llvm/lib/Target/CSKY/CSKYInstrInfo16Instr.td b/llvm/lib/Target/CSKY/CSKYInstrInfo16Instr.td new file mode 100644 index 0000000000000..c98f436221557 --- /dev/null +++ b/llvm/lib/Target/CSKY/CSKYInstrInfo16Instr.td @@ -0,0 +1,452 @@ +//===-- CSKYInstrInfo16Instr.td - CSKY 16-bit Instruction --*- tablegen -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file describes the CSKY 16-bit instructions in TableGen format. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// CSKY specific DAG Nodes. +//===----------------------------------------------------------------------===// + +// Target-dependent nodes. +def CSKY_NIE : SDNode<"CSKYISD::NIE", SDTNone, + [SDNPHasChain, SDNPOptInGlue]>; +def CSKY_NIR : SDNode<"CSKYISD::NIR", SDTNone, + [SDNPHasChain, SDNPOptInGlue]>; + +//===----------------------------------------------------------------------===// +// Operand and SDNode transformation definitions. +//===----------------------------------------------------------------------===// + +def br_symbol_16bit : Operand { + let EncoderMethod = + "getBranchSymbolOpValue"; + let ParserMatchClass = CSKYSymbol; + let DecoderMethod = "decodeSImmOperand<10, 1>"; + let PrintMethod = "printCSKYSymbolOperand"; + let OperandType = "OPERAND_PCREL"; +} + +def SPOperand : AsmOperandClass { + let Name = "SPOperand"; + let RenderMethod = "addRegOperands"; + let DiagnosticType = !strconcat("Invalid", Name); +} + +def SPOp : RegisterOperand { + let ParserMatchClass = SPOperand; +} + +def constpool_symbol_16bit : Operand { + let ParserMatchClass = Constpool; + let EncoderMethod = + "getConstpoolSymbolOpValue"; + let DecoderMethod = "decodeLRW16Imm8"; + let PrintMethod = "printConstpool"; + let OperandType = "OPERAND_PCREL"; +} + +//===----------------------------------------------------------------------===// +// Instruction Formats +//===----------------------------------------------------------------------===// + +include "CSKYInstrFormats16Instr.td" + +//===----------------------------------------------------------------------===// +// Instruction definitions. +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Basic ALU instructions. +//===----------------------------------------------------------------------===// + +let isCommutable = 1, isAdd = 1 in + def ADDU16 : R16_XYZ<0, "addu16", add>; +let Pattern = [(set mGPR:$rz, (sub mGPR:$rx, mGPR:$ry))] in + def SUBU16 : R16_XYZ<1, "subu16", sub>; + +let isCommutable = 1, isAdd = 1 in + def ADDC16 : R16_XZ_BINOP_C<0b1000, 0b01, "addc16">; +def SUBC16 : R16_XZ_BINOP_C<0b1000, 0b11, "subc16">; + +let isCommutable = 1 in { + let isAdd = 1 in + def ADDU16XZ : R16_XZ_BINOP<0b1000, 0b00, "addu16", BinOpFrag<(add node:$LHS, node:$RHS)>>; + def AND16 : R16_XZ_BINOP<0b1010, 0b00, "and16", BinOpFrag<(and node:$LHS, node:$RHS)>>; + def OR16 : R16_XZ_BINOP<0b1011, 0b00, "or16", BinOpFrag<(or node:$LHS, node:$RHS)>>; + def XOR16 : R16_XZ_BINOP<0b1011, 0b01, "xor16", BinOpFrag<(xor node:$LHS, node:$RHS)>>; + def NOR16 : R16_XZ_BINOP<0b1011, 0b10, "nor16", BinOpFrag<(not (or node:$LHS, node:$RHS))>>; + let isCodeGenOnly = 1 in + def NOT16 : R16_XZ_UNOP<0b1011, 0b10, "not16">; + def MULT16 : R16_XZ_BINOP<0b1111, 0b00, "mult16", BinOpFrag<(mul node:$LHS, node:$RHS)>>; +} +def SUBU16XZ : R16_XZ_BINOP<0b1000, 0b10, "subu16", BinOpFrag<(sub node:$LHS, node:$RHS)>>; +def ANDN16 : R16_XZ_BINOP<0b1010, 0b01, "andn16", BinOpFrag<(and node:$LHS, (not node:$RHS))>>; +def LSL16 : R16_XZ_BINOP<0b1100, 0b00, "lsl16", BinOpFrag<(shl node:$LHS, node:$RHS)>>; +def LSR16 : R16_XZ_BINOP<0b1100, 0b01, "lsr16", BinOpFrag<(srl node:$LHS, node:$RHS)>>; +def ASR16 : R16_XZ_BINOP<0b1100, 0b10, "asr16", BinOpFrag<(sra node:$LHS, node:$RHS)>>; +def ROTL16 : R16_XZ_BINOP<0b1100, 0b11, "rotl16", BinOpFrag<(rotl node:$LHS, (and node:$RHS, 0x1f))>>; + +def MULSH16 : R16_XZ_BINOP_NOPat<0b1111, 0b01, "mulsh16">; + +def ZEXTB16 : R16_XZ_UNOP<0b1101, 0b00, "zextb16">; +def ZEXTH16 : R16_XZ_UNOP<0b1101, 0b01, "zexth16">; +def SEXTB16 : R16_XZ_UNOP<0b1101, 0b10, "sextb16">; +def SEXTH16 : R16_XZ_UNOP<0b1101, 0b11, "sexth16">; + +let Constraints = "$rZ = $rz", isReMaterializable = 1, isAsCheapAsAMove = 1 in { + let isAdd = 1, Pattern = [(set mGPR:$rz, (add mGPR:$rZ, oimm8:$imm8))] in + def ADDI16 : I16_Z_8<0b100, (ins mGPR:$rZ, oimm8:$imm8), "addi16\t$rz, $imm8">; + let Pattern = [(set mGPR:$rz, (sub mGPR:$rZ, oimm8:$imm8))] in + def SUBI16 : I16_Z_8<0b101, (ins mGPR:$rZ, oimm8:$imm8), "subi16\t$rz, $imm8">; +} + +let isAdd = 1 in +def ADDI16ZSP : I16_Z_8<0b011, (ins SPOp:$sp, uimm8_2:$imm8), + "addi16\t$rz, $sp, $imm8">; + +let isAdd = 1 in +def ADDI16SPSP : I16_SP_IMM7<0b000,"addi16">; +def SUBI16SPSP : I16_SP_IMM7<0b001,"subi16">; + +let isReMaterializable = 1, isAsCheapAsAMove = 1 in { + def LSLI16 : I16_XZ_IMM5<0, "lsli16", shl>; + def LSRI16 : I16_XZ_IMM5<1, "lsri16", srl>; + def ASRI16 : I16_XZ_IMM5<2, "asri16", sra>; +} + +let isAdd = 1 in +def ADDI16XZ : I16_XZ_IMM3<0b10, "addi16", add>; +def SUBI16XZ : I16_XZ_IMM3<0b11, "subi16", sub>; + +let Size = 4 in +def NEG16 : CSKYPseudo<(outs mGPR:$rd), (ins mGPR:$rx), "neg16 $rd, $rx", []>; + +let Size = 4 in +def RSUBI16 : CSKYPseudo<(outs mGPR:$rd), + (ins mGPR:$rx, uimm8:$imm8), "rsubi16 $rd, $rx, $imm8", []>; + +//===----------------------------------------------------------------------===// +// Load & Store instructions. +//===----------------------------------------------------------------------===// + +def LD16B : I16_XZ_LDST; +def LD16H : I16_XZ_LDST; +def LD16W : I16_XZ_LDST; +def ST16B : I16_XZ_LDST; +def ST16H : I16_XZ_LDST; +def ST16W : I16_XZ_LDST; + +def LD16WSP : I16_ZSP_LDST; +def ST16WSP : I16_ZSP_LDST; + +//===----------------------------------------------------------------------===// +// Compare instructions. +//===----------------------------------------------------------------------===// + +def CMPHS16 : R16_XY_CMP<0, "cmphs16">; +def CMPLT16 : R16_XY_CMP<1, "cmplt16">; +let isCommutable = 1 in +def CMPNE16 : R16_XY_CMP<2, "cmpne16">; + + +def CMPHSI16 : I16_X_CMP<0, "cmphsi16", oimm5>; +def CMPLTI16 : I16_X_CMP<1, "cmplti16", oimm5>; +def CMPLEI16 : CSKYPseudo<(outs CARRY:$ca), (ins mGPR:$rx, uimm5:$imm5), + "cmplei16\t$rx, $imm5", []>; +def CMPNEI16 : I16_X_CMP<2, "cmpnei16", uimm5>; + +//===----------------------------------------------------------------------===// +// Data move instructions. +//===----------------------------------------------------------------------===// + + +def MOVI16 : I16_Z_8<0b110, (ins uimm8:$imm8), "movi16\t$rz, $imm8"> { + let isReMaterializable = 1; + let isAsCheapAsAMove = 1; + let isMoveImm = 1; + let Pattern = [(set mGPR:$rz, uimm8:$imm8)]; +} + +def MOV16 : CSKY16Inst { + bits<4> rz; + bits<4> rx; + let Inst{15,14} = 0b01; + let Inst{13 - 10} = 0b1011; + let Inst{9 - 6} = rz; + let Inst{5 - 2} = rx; + let Inst{1,0} = 0b11; +} + +// MVC16 is not in "cskyv2 instructions reference manul" +def MVCV16 : CSKY16Inst { + bits<4> rz; + let Inst{15,14} = 0b01; + let Inst{13 - 10} = 0b1001; + let Inst{9 - 6} = rz; + let Inst{5 - 2} = 0; + let Inst{1,0} = 0b11; +} + + +//===----------------------------------------------------------------------===// +// Branch and call instructions. +//===----------------------------------------------------------------------===// + +let isBranch = 1, isTerminator = 1 in { + let isBarrier = 1, isPredicable = 1 in + def BR16 : J16<1, "br16", (ins br_symbol_16bit:$offset)>; + + def BT16 : J16_B<2, "bt16">; + def BF16 : J16_B<3, "bf16">; +} + +def JMP16 : R16_X_J<0b11100000, 0b00, "jmp16"> { + let isBranch = 1; + let isTerminator = 1; + let isBarrier = 1; + let isIndirectBranch = 1; + let Pattern = [(brind sGPR:$rx)]; +} + +def JSR16 : R16_X_J<0b11101111, 0b01, "jsr16"> { + let isCall = 1; + let Defs = [ R15 ]; +} + +def RTS16 : CSKY16Inst { + let isTerminator = 1; + let isReturn = 1; + let isBarrier = 1; + let Inst = 0b0111100000111100; + let Uses = [R15]; + let isCodeGenOnly = 1; +} + +def JMPIX16 : CSKY16Inst { + bits<3> rx; + bits<2> indeximm2; + let Inst{15,14} = 0b00; + let Inst{13 - 11} = 0b111; + let Inst{10 - 8} = rx; + let Inst{7 - 2} = 0b111000; + let Inst{1,0} = indeximm2; + let Predicates = [HasJAVA]; + let Uses = [R30]; +} + +//===----------------------------------------------------------------------===// +// Symbol address instructions. +//===----------------------------------------------------------------------===// + +def LRW16 : CSKY16Inst { + bits<3> rz; + bits<8> label; + let Inst{15 - 13} = 0b000; + let Inst{12} = label{7}; + let Inst{11,10} = 0b00; + let Inst{9,8} = label{6,5}; + let Inst{7 - 5} = rz; + let Inst{4 - 0} = label{4-0}; + let mayLoad = 1; + let mayStore = 0; +} + +def LRW16_Gen : CSKY16Inst { + bits<3> rz; + bits<8> label; + let Inst{15 - 13} = 0b000; + let Inst{12} = label{7}; + let Inst{11,10} = 0b00; + let Inst{9,8} = label{6,5}; + let Inst{7 - 5} = rz; + let Inst{4 - 0} = label{4-0}; + let mayLoad = 1; + let mayStore = 0; + let isCodeGenOnly = 1; +} + + +//===----------------------------------------------------------------------===// +// Other operation instructions. +//===----------------------------------------------------------------------===// + +def REVB16 : R16_XZ_UNOP<0b1110, 0b10, "revb16">; +def REVH16 : R16_XZ_UNOP<0b1110, 0b11, "revh16">; + +let isCodeGenOnly = 1 in +def SETC16 : CSKY16Inst { + let Inst{15, 14} = 0b01; + let Inst{13 - 10} = 0b1001; + let Inst{9 - 6} = 0; + let Inst{5 - 2} = 0; + let Inst{1, 0} = 0; + let isCompare = 1; +} + +let isCodeGenOnly = 1 in +def CLRC16 : CSKY16Inst { + let Inst{15, 14} = 0b01; + let Inst{13 - 10} = 0b1001; + let Inst{9 - 6} = 0; + let Inst{5 - 2} = 0; + let Inst{1, 0} = 2; + let isCompare = 1; +} + +let Constraints = "$rZ = $rz" in { + def BCLRI16 : I16_Z_5<0b100, (outs mGPR:$rz), (ins mGPR:$rZ, uimm5:$imm5), + "bclri16">; + def BSETI16 : I16_Z_5<0b101, (outs mGPR:$rz), (ins mGPR:$rZ, uimm5:$imm5), + "bseti16">; +} + +let Predicates = [HasBTST16] in + def BTSTI16 : I16_Z_5<0b110, (outs CARRY:$ca), (ins mGPR:$rz, uimm5:$imm5), + "btsti16">; + +def TST16 : CSKY16Inst { + bits<4> ry; + bits<4> rx; + let Inst{15,14} = 0b01; + let Inst{13 - 10} = 0b1010; + let Inst{9 - 6} = ry; + let Inst{5 - 2} = rx; + let Inst{1,0} = 0b10; + let isCompare = 1; +} + +def TSTNBZ16 : CSKY16Inst { + bits<4> rx; + let Inst{15,14} = 0b01; + let Inst{13 - 10} = 0b1010; + let Inst{9 - 6} = 0b0000; + let Inst{5 - 2} = rx; + let Inst{1,0} = 0b11; + let isCompare = 1; +} + +//===----------------------------------------------------------------------===// +// Special instructions. +//===----------------------------------------------------------------------===// + +def BKPT : CSKY16Inst { + let Inst = 0; +} + +let mayStore = 1 in { +def BPUSHH : I16_BPushPop<0b00010100111, 0, (outs), (ins mGPR:$rz), "bpush.h $rz">; +def BPUSHW : I16_BPushPop<0b00010100111, 0b10, (outs), (ins mGPR:$rz), "bpush.w $rz">; +} + +let mayLoad = 1 in { +def BPOPH : I16_BPushPop<0b00010100101, 0, (outs mGPR:$rz), (ins), "bpop.h $rz">; +def BPOPW : I16_BPushPop<0b00010100101, 0b10, (outs mGPR:$rz), (ins), "bpop.w $rz">; +} + +def NIE : CSKY16Inst { + let Inst = 0b0001010001100000; +} + +let isBarrier = 1, isReturn = 1, isTerminator = 1 in +def NIR : CSKY16Inst { + let Inst = 0b0001010001100001; +} + +def IPUSH16 : CSKY16Inst { + let Inst{15- 5} = 0b00010100011; + let Inst{4-0} = 0b00010; + let Predicates = [iHasE1]; + let Defs = [R14]; + let Uses = [R14, R0, R1, R2, R3, R12, R13]; + let mayStore = 1; +} + +def IPOP16 : CSKY16Inst { + let Inst{15- 5} = 0b00010100011; + let Inst{4-0} = 0b00011; + let Predicates = [iHasE1]; + let Defs = [R14, R0, R1, R2, R3, R12, R13]; + let Uses = [R14]; + let mayLoad = 1; +} + +def PUSH16 : CSKY16Inst { + bits<5> regs; + + let Inst{15- 5} = 0b00010100110; + let Inst{4-0} = regs; + let Predicates = [iHasE1]; + let Defs = [R14]; + let Uses = [R14]; + let mayStore = 1; +} + +def POP16 : CSKY16Inst { + bits<5> regs; + + let Inst{15- 5} = 0b00010100100; + let Inst{4-0} = regs; + let Predicates = [iHasE1]; + let Defs = [R14]; + let Uses = [R14]; + let mayLoad = 1; +} + +//===----------------------------------------------------------------------===// +// CSKYPseudo +//===----------------------------------------------------------------------===// + +let usesCustomInserter = 1 in { + def ISEL16 : CSKYPseudo<(outs sGPR:$dst), + (ins CARRY:$cond, sGPR:$src1, sGPR:$src2), + "!isel16\t$dst, $src1, src2", + [(set sGPR:$dst, (select CARRY:$cond, sGPR:$src1, sGPR:$src2))]>; +} + +class JBranchPseudo : + CSKYPseudo { + let isBranch = 1; + let isTerminator = 1; + let isIndirectBranch = 1; + let mayLoad = 1; + let Size = 2; +} + +let isBarrier = 1 in +def JBR16 : JBranchPseudo<(outs), + (ins br_symbol_16bit:$src1), "jbr16\t$src1">; +def JBT16 : JBranchPseudo<(outs), + (ins CARRY:$ca, br_symbol_16bit:$src1), "jbt16\t$src1">; +def JBF16 : JBranchPseudo<(outs), + (ins CARRY:$ca, br_symbol_16bit:$src1), "jbf16\t$src1">; + +let mayLoad = 1, Size = 2, isCodeGenOnly = 0 in +def PseudoLRW16 : CSKYPseudo<(outs mGPR:$rz), + (ins bare_symbol:$src), "lrw16 $rz, $src", []>; diff --git a/llvm/lib/Target/CSKY/CSKYRegisterInfo.td b/llvm/lib/Target/CSKY/CSKYRegisterInfo.td index aef4589a67f29..7548c22bb2c5e 100644 --- a/llvm/lib/Target/CSKY/CSKYRegisterInfo.td +++ b/llvm/lib/Target/CSKY/CSKYRegisterInfo.td @@ -153,6 +153,21 @@ def GPR : RegisterClass<"CSKY", [i32], 32, let Size = 32; } +// Register class for R0 - R15. +// Some 16-bit integer instructions can only access R0 - R15. +def sGPR : RegisterClass<"CSKY", [i32], 32, + (add (sequence "R%u", 0, 3), (sequence "R%u", 12, 13), R15, + (sequence "R%u", 4, 11), R14)> { + let Size = 32; +} + +// Register class for R0 - R7. +// Some 16-bit integer instructions can only access R0 - R7. +def mGPR : RegisterClass<"CSKY", [i32], 32, + (add (sequence "R%u", 0, 7))> { + let Size = 32; +} + def GPRPair : RegisterClass<"CSKY", [untyped], 32, (add GPRTuple)> { let Size = 64; } diff --git a/llvm/lib/Target/CSKY/CSKYSubtarget.cpp b/llvm/lib/Target/CSKY/CSKYSubtarget.cpp index dc0d2c40c265a..963c2ede9c441 100644 --- a/llvm/lib/Target/CSKY/CSKYSubtarget.cpp +++ b/llvm/lib/Target/CSKY/CSKYSubtarget.cpp @@ -36,6 +36,8 @@ CSKYSubtarget &CSKYSubtarget::initializeSubtargetDependencies( HasFPUv3SingleFloat = false; HasFPUv3DoubleFloat = false; + HasBTST16 = false; + HasJAVA = false; HasExtendLrw = false; HasDoloop = false; HasHighRegisters = false; @@ -70,4 +72,3 @@ bool CSKYSubtarget::useHardFloatABI() const { else return FloatABI == FloatABI::Hard; } - diff --git a/llvm/lib/Target/CSKY/CSKYSubtarget.h b/llvm/lib/Target/CSKY/CSKYSubtarget.h index ece9450c8452f..4cd590e8e76e6 100644 --- a/llvm/lib/Target/CSKY/CSKYSubtarget.h +++ b/llvm/lib/Target/CSKY/CSKYSubtarget.h @@ -43,6 +43,8 @@ class CSKYSubtarget : public CSKYGenSubtargetInfo { bool HasFPUv3SingleFloat; bool HasFPUv3DoubleFloat; + bool HasBTST16; + bool HasJAVA; bool HasExtendLrw; bool HasDoloop; bool HasHighRegisters; @@ -95,6 +97,8 @@ class CSKYSubtarget : public CSKYGenSubtargetInfo { bool hasFPUv3() const { return HasFPUv3SingleFloat || HasFPUv3DoubleFloat; } bool hasAnyFloatExt() const { return hasFPUv2() || hasFPUv3(); }; + bool hasBTST16() const { return HasBTST16; } + bool hasJAVA() const { return HasJAVA; } bool hasExtendLrw() const { return HasExtendLrw; } bool hasDoloop() const { return HasDoloop; } bool hasHighRegisters() const { return HasHighRegisters; } diff --git a/llvm/lib/Target/CSKY/CSKYTargetMachine.h b/llvm/lib/Target/CSKY/CSKYTargetMachine.h index 3c8834417d49a..ecb9fe9530773 100644 --- a/llvm/lib/Target/CSKY/CSKYTargetMachine.h +++ b/llvm/lib/Target/CSKY/CSKYTargetMachine.h @@ -13,6 +13,7 @@ #ifndef LLVM_LIB_TARGET_CSKY_CSKYTARGETMACHINE_H #define LLVM_LIB_TARGET_CSKY_CSKYTARGETMACHINE_H +#include "CSKYSubtarget.h" #include "llvm/IR/DataLayout.h" #include "llvm/Target/TargetMachine.h" diff --git a/llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp b/llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp index 7f5cf157a2025..cf9e22bc5ebe7 100644 --- a/llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp +++ b/llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp @@ -1723,8 +1723,8 @@ bool CopyPropagation::propagateRegCopy(MachineInstr &MI) { bool CopyPropagation::processBlock(MachineBasicBlock &B, const RegisterSet&) { std::vector Instrs; - for (auto I = B.rbegin(), E = B.rend(); I != E; ++I) - Instrs.push_back(&*I); + for (MachineInstr &MI : llvm::reverse(B)) + Instrs.push_back(&MI); bool Changed = false; for (auto I : Instrs) { diff --git a/llvm/lib/Target/Hexagon/HexagonConstPropagation.cpp b/llvm/lib/Target/Hexagon/HexagonConstPropagation.cpp index 3660fa4b06abb..7579a5e5f0b29 100644 --- a/llvm/lib/Target/Hexagon/HexagonConstPropagation.cpp +++ b/llvm/lib/Target/Hexagon/HexagonConstPropagation.cpp @@ -1045,13 +1045,9 @@ bool MachineConstPropagator::rewrite(MachineFunction &MF) { // erase instructions during rewriting, so this needs to be delayed until // now. for (MachineBasicBlock &B : MF) { - MachineBasicBlock::iterator I = B.begin(), E = B.end(); - while (I != E) { - auto Next = std::next(I); - if (I->isBranch() && !InstrExec.count(&*I)) - B.erase(I); - I = Next; - } + for (MachineInstr &MI : llvm::make_early_inc_range(B)) + if (MI.isBranch() && !InstrExec.count(&MI)) + B.erase(&MI); } return Changed; } @@ -3132,11 +3128,9 @@ void HexagonConstEvaluator::replaceAllRegUsesWith(Register FromReg, Register ToReg) { assert(FromReg.isVirtual()); assert(ToReg.isVirtual()); - for (auto I = MRI->use_begin(FromReg), E = MRI->use_end(); I != E;) { - MachineOperand &O = *I; - ++I; + for (MachineOperand &O : + llvm::make_early_inc_range(MRI->use_operands(FromReg))) O.setReg(ToReg); - } } bool HexagonConstEvaluator::rewriteHexBranch(MachineInstr &BrI, diff --git a/llvm/lib/Target/Hexagon/HexagonExpandCondsets.cpp b/llvm/lib/Target/Hexagon/HexagonExpandCondsets.cpp index fcc8804639255..c444cf557c217 100644 --- a/llvm/lib/Target/Hexagon/HexagonExpandCondsets.cpp +++ b/llvm/lib/Target/Hexagon/HexagonExpandCondsets.cpp @@ -1070,20 +1070,18 @@ bool HexagonExpandCondsets::predicate(MachineInstr &TfrI, bool Cond, bool HexagonExpandCondsets::predicateInBlock(MachineBasicBlock &B, std::set &UpdRegs) { bool Changed = false; - MachineBasicBlock::iterator I, E, NextI; - for (I = B.begin(), E = B.end(); I != E; I = NextI) { - NextI = std::next(I); - unsigned Opc = I->getOpcode(); + for (MachineInstr &MI : llvm::make_early_inc_range(B)) { + unsigned Opc = MI.getOpcode(); if (Opc == Hexagon::A2_tfrt || Opc == Hexagon::A2_tfrf) { - bool Done = predicate(*I, (Opc == Hexagon::A2_tfrt), UpdRegs); + bool Done = predicate(MI, (Opc == Hexagon::A2_tfrt), UpdRegs); if (!Done) { // If we didn't predicate I, we may need to remove it in case it is // an "identity" copy, e.g. %1 = A2_tfrt %2, %1. - if (RegisterRef(I->getOperand(0)) == RegisterRef(I->getOperand(2))) { - for (auto &Op : I->operands()) + if (RegisterRef(MI.getOperand(0)) == RegisterRef(MI.getOperand(2))) { + for (auto &Op : MI.operands()) if (Op.isReg()) UpdRegs.insert(Op.getReg()); - removeInstr(*I); + removeInstr(MI); } } Changed |= Done; diff --git a/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp b/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp index c824bac30833f..1c40e7d7eefa1 100644 --- a/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp +++ b/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp @@ -281,11 +281,10 @@ static unsigned getMaxCalleeSavedReg(ArrayRef CSI, /// frame to be already in place. static bool needsStackFrame(const MachineBasicBlock &MBB, const BitVector &CSR, const HexagonRegisterInfo &HRI) { - for (auto &I : MBB) { - const MachineInstr *MI = &I; - if (MI->isCall()) + for (const MachineInstr &MI : MBB) { + if (MI.isCall()) return true; - unsigned Opc = MI->getOpcode(); + unsigned Opc = MI.getOpcode(); switch (Opc) { case Hexagon::PS_alloca: case Hexagon::PS_aligna: @@ -294,7 +293,7 @@ static bool needsStackFrame(const MachineBasicBlock &MBB, const BitVector &CSR, break; } // Check individual operands. - for (const MachineOperand &MO : MI->operands()) { + for (const MachineOperand &MO : MI.operands()) { // While the presence of a frame index does not prove that a stack // frame will be required, all frame indexes should be within alloc- // frame/deallocframe. Otherwise, the code that translates a frame diff --git a/llvm/lib/Target/Hexagon/HexagonGenMux.cpp b/llvm/lib/Target/Hexagon/HexagonGenMux.cpp index 07f85e69abbab..d0f36291201af 100644 --- a/llvm/lib/Target/Hexagon/HexagonGenMux.cpp +++ b/llvm/lib/Target/Hexagon/HexagonGenMux.cpp @@ -232,22 +232,19 @@ bool HexagonGenMux::genMuxInBlock(MachineBasicBlock &B) { CondsetMap CM; MuxInfoList ML; - MachineBasicBlock::iterator NextI, End = B.end(); - for (MachineBasicBlock::iterator I = B.begin(); I != End; I = NextI) { - MachineInstr *MI = &*I; - NextI = std::next(I); - unsigned Opc = MI->getOpcode(); + for (MachineInstr &MI : llvm::make_early_inc_range(B)) { + unsigned Opc = MI.getOpcode(); if (!isCondTransfer(Opc)) continue; - Register DR = MI->getOperand(0).getReg(); + Register DR = MI.getOperand(0).getReg(); if (isRegPair(DR)) continue; - MachineOperand &PredOp = MI->getOperand(1); + MachineOperand &PredOp = MI.getOperand(1); if (PredOp.isUndef()) continue; Register PR = PredOp.getReg(); - unsigned Idx = I2X.lookup(MI); + unsigned Idx = I2X.lookup(&MI); CondsetMap::iterator F = CM.find(DR); bool IfTrue = HII->isPredicatedTrue(Opc); @@ -360,21 +357,21 @@ bool HexagonGenMux::genMuxInBlock(MachineBasicBlock &B) { return true; return false; }; - for (auto I = B.rbegin(), E = B.rend(); I != E; ++I) { - if (I->isDebugInstr()) + for (MachineInstr &I : llvm::reverse(B)) { + if (I.isDebugInstr()) continue; // This isn't 100% accurate, but it's safe. // It won't detect (as a kill) a case like this // r0 = add r0, 1 <-- r0 should be "killed" // ... = r0 - for (MachineOperand &Op : I->operands()) { + for (MachineOperand &Op : I.operands()) { if (!Op.isReg() || !Op.isUse()) continue; assert(Op.getSubReg() == 0 && "Should have physical registers only"); bool Live = IsLive(Op.getReg()); Op.setIsKill(!Live); } - LPR.stepBackward(*I); + LPR.stepBackward(I); } return Changed; diff --git a/llvm/lib/Target/Hexagon/HexagonHardwareLoops.cpp b/llvm/lib/Target/Hexagon/HexagonHardwareLoops.cpp index 78e4eeaae546e..a4971ad712eb9 100644 --- a/llvm/lib/Target/Hexagon/HexagonHardwareLoops.cpp +++ b/llvm/lib/Target/Hexagon/HexagonHardwareLoops.cpp @@ -1094,15 +1094,15 @@ void HexagonHardwareLoops::removeIfDead(MachineInstr *MI) { if (!MO.isReg() || !MO.isDef()) continue; Register Reg = MO.getReg(); - MachineRegisterInfo::use_iterator nextI; - for (MachineRegisterInfo::use_iterator I = MRI->use_begin(Reg), - E = MRI->use_end(); I != E; I = nextI) { - nextI = std::next(I); // I is invalidated by the setReg - MachineInstr *UseMI = I->getParent(); + // We use make_early_inc_range here because setReg below invalidates the + // iterator. + for (MachineOperand &MO : + llvm::make_early_inc_range(MRI->use_operands(Reg))) { + MachineInstr *UseMI = MO.getParent(); if (UseMI == MI) continue; - if (I->isDebug()) - I->setReg(0U); + if (MO.isDebug()) + MO.setReg(0U); } } diff --git a/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp b/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp index d1c6f613ad83e..2679e399852f2 100644 --- a/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp +++ b/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp @@ -2247,8 +2247,8 @@ SDValue HexagonDAGToDAGISel::balanceSubTree(SDNode *N, bool TopLevel) { } void HexagonDAGToDAGISel::rebalanceAddressTrees() { - for (auto I = CurDAG->allnodes_begin(), E = CurDAG->allnodes_end(); I != E;) { - SDNode *N = &*I++; + for (SDNode &Node : llvm::make_early_inc_range(CurDAG->allnodes())) { + SDNode *N = &Node; if (N->getOpcode() != ISD::LOAD && N->getOpcode() != ISD::STORE) continue; diff --git a/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp b/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp index 6a5051f699595..76220eff4d510 100644 --- a/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp +++ b/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp @@ -169,13 +169,13 @@ MachineInstr *HexagonInstrInfo::findLoopInstr(MachineBasicBlock *BB, continue; if (PB == BB) continue; - for (auto I = PB->instr_rbegin(), E = PB->instr_rend(); I != E; ++I) { - unsigned Opc = I->getOpcode(); + for (MachineInstr &I : llvm::reverse(PB->instrs())) { + unsigned Opc = I.getOpcode(); if (Opc == LOOPi || Opc == LOOPr) - return &*I; + return &I; // We've reached a different loop, which means the loop01 has been // removed. - if (Opc == EndLoopOp && I->getOperand(0).getMBB() != TargetBB) + if (Opc == EndLoopOp && I.getOperand(0).getMBB() != TargetBB) return nullptr; } // Check the predecessors for the LOOP instruction. diff --git a/llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp b/llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp index 8ddd52df4aba4..04522480b0e7e 100644 --- a/llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp +++ b/llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp @@ -1490,10 +1490,8 @@ void PolynomialMultiplyRecognize::cleanupLoopBody(BasicBlock *LoopB) { if (Value *SV = SimplifyInstruction(&I, {DL, &TLI, &DT})) I.replaceAllUsesWith(SV); - for (auto I = LoopB->begin(), N = I; I != LoopB->end(); I = N) { - N = std::next(I); - RecursivelyDeleteTriviallyDeadInstructions(&*I, &TLI); - } + for (Instruction &I : llvm::make_early_inc_range(*LoopB)) + RecursivelyDeleteTriviallyDeadInstructions(&I, &TLI); } unsigned PolynomialMultiplyRecognize::getInverseMxN(unsigned QP) { diff --git a/llvm/lib/Target/Hexagon/HexagonSplitConst32AndConst64.cpp b/llvm/lib/Target/Hexagon/HexagonSplitConst32AndConst64.cpp index f9fb14c190ff7..4890c3dbb7bca 100644 --- a/llvm/lib/Target/Hexagon/HexagonSplitConst32AndConst64.cpp +++ b/llvm/lib/Target/Hexagon/HexagonSplitConst32AndConst64.cpp @@ -70,9 +70,7 @@ bool HexagonSplitConst32AndConst64::runOnMachineFunction(MachineFunction &Fn) { // Loop over all of the basic blocks for (MachineBasicBlock &B : Fn) { - for (auto I = B.begin(), E = B.end(); I != E; ) { - MachineInstr &MI = *I; - ++I; + for (MachineInstr &MI : llvm::make_early_inc_range(B)) { unsigned Opc = MI.getOpcode(); if (Opc == Hexagon::CONST32) { diff --git a/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp b/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp index 191aef71386ff..1d325553f45a2 100644 --- a/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp +++ b/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp @@ -234,16 +234,9 @@ bool HexagonPacketizer::runOnMachineFunction(MachineFunction &MF) { // dependence between Insn 0 and Insn 2. This can lead to incorrect // packetization for (MachineBasicBlock &MB : MF) { - auto End = MB.end(); - auto MI = MB.begin(); - while (MI != End) { - auto NextI = std::next(MI); - if (MI->isKill()) { - MB.erase(MI); - End = MB.end(); - } - MI = NextI; - } + for (MachineInstr &MI : llvm::make_early_inc_range(MB)) + if (MI.isKill()) + MB.erase(&MI); } // TinyCore with Duplexes: Translate to big-instructions. @@ -1160,12 +1153,9 @@ bool HexagonPacketizerList::cannotCoexist(const MachineInstr &MI, void HexagonPacketizerList::unpacketizeSoloInstrs(MachineFunction &MF) { for (auto &B : MF) { MachineBasicBlock::iterator BundleIt; - MachineBasicBlock::instr_iterator NextI; - for (auto I = B.instr_begin(), E = B.instr_end(); I != E; I = NextI) { - NextI = std::next(I); - MachineInstr &MI = *I; + for (MachineInstr &MI : llvm::make_early_inc_range(B.instrs())) { if (MI.isBundle()) - BundleIt = I; + BundleIt = MI.getIterator(); if (!MI.isInsideBundle()) continue; diff --git a/llvm/lib/Target/Mips/MipsCallLowering.cpp b/llvm/lib/Target/Mips/MipsCallLowering.cpp index 97062cf619a28..f6ec34c7f403d 100644 --- a/llvm/lib/Target/Mips/MipsCallLowering.cpp +++ b/llvm/lib/Target/Mips/MipsCallLowering.cpp @@ -24,6 +24,7 @@ using namespace llvm; MipsCallLowering::MipsCallLowering(const MipsTargetLowering &TLI) : CallLowering(&TLI) {} +namespace { struct MipsOutgoingValueAssigner : public CallLowering::OutgoingValueAssigner { /// This is the name of the function being called /// FIXME: Relying on this is unsound @@ -80,7 +81,6 @@ struct MipsIncomingValueAssigner : public CallLowering::IncomingValueAssigner { } }; -namespace { class MipsIncomingValueHandler : public CallLowering::IncomingValueHandler { const MipsSubtarget &STI; diff --git a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.cpp b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.cpp index ec0c92ccf5c52..953d95e55f658 100644 --- a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.cpp @@ -195,13 +195,12 @@ unsigned NVPTXInstrInfo::insertBranch(MachineBasicBlock &MBB, if (Cond.empty()) // Unconditional branch BuildMI(&MBB, DL, get(NVPTX::GOTO)).addMBB(TBB); else // Conditional branch - BuildMI(&MBB, DL, get(NVPTX::CBranch)).addReg(Cond[0].getReg()) - .addMBB(TBB); + BuildMI(&MBB, DL, get(NVPTX::CBranch)).add(Cond[0]).addMBB(TBB); return 1; } // Two-way Conditional Branch. - BuildMI(&MBB, DL, get(NVPTX::CBranch)).addReg(Cond[0].getReg()).addMBB(TBB); + BuildMI(&MBB, DL, get(NVPTX::CBranch)).add(Cond[0]).addMBB(TBB); BuildMI(&MBB, DL, get(NVPTX::GOTO)).addMBB(FBB); return 2; } diff --git a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td index 8048c95673df6..0abf6957a79cf 100644 --- a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td +++ b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td @@ -2248,8 +2248,18 @@ class MoveParamInst : !strconcat("mov", asmstr, " \t$dst, $src;"), [(set regclass:$dst, (MoveParam regclass:$src))]>; +class MoveParamSymbolInst : + NVPTXInst<(outs regclass:$dst), (ins srcty:$src), + !strconcat("mov", asmstr, " \t$dst, $src;"), + [(set regclass:$dst, (MoveParam texternalsym:$src))]>; + def MoveParamI64 : MoveParamInst; def MoveParamI32 : MoveParamInst; + +def MoveParamSymbolI64 : MoveParamSymbolInst; +def MoveParamSymbolI32 : MoveParamSymbolInst; + def MoveParamI16 : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$src), "cvt.u16.u32 \t$dst, $src;", diff --git a/llvm/lib/Target/NVPTX/NVPTXRegisterInfo.cpp b/llvm/lib/Target/NVPTX/NVPTXRegisterInfo.cpp index 98724f37fceca..ec7307265bcaf 100644 --- a/llvm/lib/Target/NVPTX/NVPTXRegisterInfo.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXRegisterInfo.cpp @@ -107,6 +107,14 @@ NVPTXRegisterInfo::getCalleeSavedRegs(const MachineFunction *) const { BitVector NVPTXRegisterInfo::getReservedRegs(const MachineFunction &MF) const { BitVector Reserved(getNumRegs()); + for (unsigned Reg = NVPTX::ENVREG0; Reg <= NVPTX::ENVREG31; ++Reg) { + markSuperRegs(Reserved, Reg); + } + markSuperRegs(Reserved, NVPTX::VRFrame32); + markSuperRegs(Reserved, NVPTX::VRFrameLocal32); + markSuperRegs(Reserved, NVPTX::VRFrame64); + markSuperRegs(Reserved, NVPTX::VRFrameLocal64); + markSuperRegs(Reserved, NVPTX::VRDepot); return Reserved; } diff --git a/llvm/lib/Target/PowerPC/PPC.td b/llvm/lib/Target/PowerPC/PPC.td index 8e983acb450b6..a1ff20bb36121 100644 --- a/llvm/lib/Target/PowerPC/PPC.td +++ b/llvm/lib/Target/PowerPC/PPC.td @@ -187,6 +187,22 @@ def FeatureAddisLoadFusion : SubtargetFeature<"fuse-addis-load", def FeatureStoreFusion : SubtargetFeature<"fuse-store", "HasStoreFusion", "true", "Target supports store clustering", [FeatureFusion]>; +def FeatureArithAddFusion : + SubtargetFeature<"fuse-arith-add", "HasArithAddFusion", "true", + "Target supports Arithmetic Operations with Add fusion", + [FeatureFusion]>; +def FeatureAddLogicalFusion : + SubtargetFeature<"fuse-add-logical", "HasAddLogicalFusion", "true", + "Target supports Add with Logical Operations fusion", + [FeatureFusion]>; +def FeatureLogicalAddFusion : + SubtargetFeature<"fuse-logical-add", "HasLogicalAddFusion", "true", + "Target supports Logical with Add Operations fusion", + [FeatureFusion]>; +def FeatureLogicalFusion : + SubtargetFeature<"fuse-logical", "HasLogicalFusion", "true", + "Target supports Logical Operations fusion", + [FeatureFusion]>; def FeatureUnalignedFloats : SubtargetFeature<"allow-unaligned-fp-access", "AllowsUnalignedFPAccess", "true", "CPU does not trap on unaligned FP access">; @@ -375,7 +391,10 @@ def ProcessorFeatures { // Power10 // For P10 CPU we assume that all of the existing features from Power9 // still exist with the exception of those we know are Power9 specific. - list FusionFeatures = [FeatureStoreFusion]; + list FusionFeatures = [ + FeatureStoreFusion, FeatureAddLogicalFusion, FeatureLogicalAddFusion, + FeatureLogicalFusion, FeatureArithAddFusion + ]; list P10AdditionalFeatures = !listconcat(FusionFeatures, [ DirectivePwr10, FeatureISA3_1, FeaturePrefixInstrs, diff --git a/llvm/lib/Target/PowerPC/PPCEarlyReturn.cpp b/llvm/lib/Target/PowerPC/PPCEarlyReturn.cpp index efed44d31b377..5a2c295d947e1 100644 --- a/llvm/lib/Target/PowerPC/PPCEarlyReturn.cpp +++ b/llvm/lib/Target/PowerPC/PPCEarlyReturn.cpp @@ -185,12 +185,9 @@ namespace { // nothing to do. if (MF.size() < 2) return Changed; - - // We can't use a range-based for loop due to clobbering the iterator. - for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E;) { - MachineBasicBlock &B = *I++; + + for (MachineBasicBlock &B : llvm::make_early_inc_range(MF)) Changed |= processBlock(B); - } return Changed; } diff --git a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp index 93fae891dd9b3..d63044c9760d6 100644 --- a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -5825,6 +5825,69 @@ void PPCDAGToDAGISel::Select(SDNode *N) { return; } } + case PPCISD::LD_SPLAT: { + // Here we want to handle splat load for type v16i8 and v8i16 when there is + // no direct move, we don't need to use stack for this case. If target has + // direct move, we should be able to get the best selection in the .td file. + if (!Subtarget->hasAltivec() || Subtarget->hasDirectMove()) + break; + + EVT Type = N->getValueType(0); + if (Type != MVT::v16i8 && Type != MVT::v8i16) + break; + + SDValue ZeroReg = + CurDAG->getRegister(Subtarget->isPPC64() ? PPC::ZERO8 : PPC::ZERO, + Subtarget->isPPC64() ? MVT::i64 : MVT::i32); + unsigned LIOpcode = Subtarget->isPPC64() ? PPC::LI8 : PPC::LI; + // v16i8 LD_SPLAT addr + // ======> + // Mask = LVSR/LVSL 0, addr + // LoadLow = LXV 0, addr + // Perm = VPERM LoadLow, LoadLow, Mask + // Splat = VSPLTB 15/0, Perm + // + // v8i16 LD_SPLAT addr + // ======> + // Mask = LVSR/LVSL 0, addr + // LoadLow = LXV 0, addr + // LoadHigh = LXV (LI, 1), addr + // Perm = VPERM LoadLow, LoadHigh, Mask + // Splat = VSPLTH 7/0, Perm + unsigned SplatOp = (Type == MVT::v16i8) ? PPC::VSPLTB : PPC::VSPLTH; + unsigned SplatElemIndex = + Subtarget->isLittleEndian() ? ((Type == MVT::v16i8) ? 15 : 7) : 0; + + SDNode *Mask = CurDAG->getMachineNode( + Subtarget->isLittleEndian() ? PPC::LVSR : PPC::LVSL, dl, Type, ZeroReg, + N->getOperand(1)); + + SDNode *LoadLow = + CurDAG->getMachineNode(PPC::LVX, dl, MVT::v16i8, MVT::Other, + {ZeroReg, N->getOperand(1), N->getOperand(0)}); + + SDNode *LoadHigh = LoadLow; + if (Type == MVT::v8i16) { + LoadHigh = CurDAG->getMachineNode( + PPC::LVX, dl, MVT::v16i8, MVT::Other, + {SDValue(CurDAG->getMachineNode( + LIOpcode, dl, MVT::i32, + CurDAG->getTargetConstant(1, dl, MVT::i8)), + 0), + N->getOperand(1), SDValue(LoadLow, 1)}); + } + + CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 1), SDValue(LoadHigh, 1)); + transferMemOperands(N, LoadHigh); + + SDNode *Perm = + CurDAG->getMachineNode(PPC::VPERM, dl, Type, SDValue(LoadLow, 0), + SDValue(LoadHigh, 0), SDValue(Mask, 0)); + CurDAG->SelectNodeTo(N, SplatOp, Type, + CurDAG->getTargetConstant(SplatElemIndex, dl, MVT::i8), + SDValue(Perm, 0)); + return; + } } SelectCode(N); diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index 36d08415290a9..d8333023a0b59 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -601,6 +601,8 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, // We want to custom lower some of our intrinsics. setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); + setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::f64, Custom); + setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::ppcf128, Custom); // To handle counter-based loop conditions. setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i1, Custom); @@ -1712,6 +1714,8 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const { case PPCISD::EXTRACT_VSX_REG: return "PPCISD::EXTRACT_VSX_REG"; case PPCISD::XXMFACC: return "PPCISD::XXMFACC"; case PPCISD::LD_SPLAT: return "PPCISD::LD_SPLAT"; + case PPCISD::ZEXT_LD_SPLAT: return "PPCISD::ZEXT_LD_SPLAT"; + case PPCISD::SEXT_LD_SPLAT: return "PPCISD::SEXT_LD_SPLAT"; case PPCISD::FNMSUB: return "PPCISD::FNMSUB"; case PPCISD::STRICT_FADDRTZ: return "PPCISD::STRICT_FADDRTZ"; @@ -9060,6 +9064,34 @@ bool llvm::checkConvertToNonDenormSingle(APFloat &ArgAPFloat) { return (!LosesInfo && !APFloatToConvert.isDenormal()); } +static bool isValidSplatLoad(const PPCSubtarget &Subtarget, const SDValue &Op, + unsigned &Opcode) { + const SDNode *InputNode = Op.getOperand(0).getNode(); + if (!InputNode || !ISD::isUNINDEXEDLoad(InputNode)) + return false; + + if (!Subtarget.hasVSX()) + return false; + + EVT Ty = Op->getValueType(0); + if (Ty == MVT::v2f64 || Ty == MVT::v4f32 || Ty == MVT::v4i32 || + Ty == MVT::v8i16 || Ty == MVT::v16i8) + return true; + + if (Ty == MVT::v2i64) { + // Check the extend type, when the input type is i32, and the output vector + // type is v2i64. + if (cast(Op.getOperand(0))->getMemoryVT() == MVT::i32) { + if (ISD::isZEXTLoad(InputNode)) + Opcode = PPCISD::ZEXT_LD_SPLAT; + if (ISD::isSEXTLoad(InputNode)) + Opcode = PPCISD::SEXT_LD_SPLAT; + } + return true; + } + return false; +} + // If this is a case we can't handle, return null and let the default // expansion code take care of it. If we CAN select this case, and if it // selects to a single instruction, return Op. Otherwise, if we can codegen @@ -9123,17 +9155,26 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op, } if (!BVNIsConstantSplat || SplatBitSize > 32) { + unsigned NewOpcode = PPCISD::LD_SPLAT; - bool IsPermutedLoad = false; - const SDValue *InputLoad = - getNormalLoadInput(Op.getOperand(0), IsPermutedLoad); // Handle load-and-splat patterns as we have instructions that will do this // in one go. - if (InputLoad && DAG.isSplatValue(Op, true)) { + if (DAG.isSplatValue(Op, true) && + isValidSplatLoad(Subtarget, Op, NewOpcode)) { + const SDValue *InputLoad = &Op.getOperand(0); LoadSDNode *LD = cast(*InputLoad); - // We have handling for 4 and 8 byte elements. - unsigned ElementSize = LD->getMemoryVT().getScalarSizeInBits(); + // If the input load is an extending load, it will be an i32 -> i64 + // extending load and isValidSplatLoad() will update NewOpcode. + unsigned MemorySize = LD->getMemoryVT().getScalarSizeInBits(); + unsigned ElementSize = + MemorySize * ((NewOpcode == PPCISD::LD_SPLAT) ? 1 : 2); + + assert(((ElementSize == 2 * MemorySize) + ? (NewOpcode == PPCISD::ZEXT_LD_SPLAT || + NewOpcode == PPCISD::SEXT_LD_SPLAT) + : (NewOpcode == PPCISD::LD_SPLAT)) && + "Unmatched element size and opcode!\n"); // Checking for a single use of this load, we have to check for vector // width (128 bits) / ElementSize uses (since each operand of the @@ -9142,18 +9183,45 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op, for (SDValue BVInOp : Op->ops()) if (BVInOp.isUndef()) NumUsesOfInputLD--; + + // Exclude somes case where LD_SPLAT is worse than scalar_to_vector: + // Below cases should also happen for "lfiwzx/lfiwax + LE target + index + // 1" and "lxvrhx + BE target + index 7" and "lxvrbx + BE target + index + // 15", but funciton IsValidSplatLoad() now will only return true when + // the data at index 0 is not nullptr. So we will not get into trouble for + // these cases. + // + // case 1 - lfiwzx/lfiwax + // 1.1: load result is i32 and is sign/zero extend to i64; + // 1.2: build a v2i64 vector type with above loaded value; + // 1.3: the vector has only one value at index 0, others are all undef; + // 1.4: on BE target, so that lfiwzx/lfiwax does not need any permute. + if (NumUsesOfInputLD == 1 && + (Op->getValueType(0) == MVT::v2i64 && NewOpcode != PPCISD::LD_SPLAT && + !Subtarget.isLittleEndian() && Subtarget.hasVSX() && + Subtarget.hasLFIWAX())) + return SDValue(); + + // case 2 - lxvr[hb]x + // 2.1: load result is at most i16; + // 2.2: build a vector with above loaded value; + // 2.3: the vector has only one value at index 0, others are all undef; + // 2.4: on LE target, so that lxvr[hb]x does not need any permute. + if (NumUsesOfInputLD == 1 && Subtarget.isLittleEndian() && + Subtarget.isISA3_1() && ElementSize <= 16) + return SDValue(); + assert(NumUsesOfInputLD > 0 && "No uses of input LD of a build_vector?"); if (InputLoad->getNode()->hasNUsesOfValue(NumUsesOfInputLD, 0) && - ((Subtarget.hasVSX() && ElementSize == 64) || - (Subtarget.hasP9Vector() && ElementSize == 32))) { + Subtarget.hasVSX()) { SDValue Ops[] = { LD->getChain(), // Chain LD->getBasePtr(), // Ptr DAG.getValueType(Op.getValueType()) // VT }; SDValue LdSplt = DAG.getMemIntrinsicNode( - PPCISD::LD_SPLAT, dl, DAG.getVTList(Op.getValueType(), MVT::Other), - Ops, LD->getMemoryVT(), LD->getMemOperand()); + NewOpcode, dl, DAG.getVTList(Op.getValueType(), MVT::Other), Ops, + LD->getMemoryVT(), LD->getMemOperand()); // Replace all uses of the output chain of the original load with the // output chain of the new load. DAG.ReplaceAllUsesOfValueWith(InputLoad->getValue(1), @@ -10362,6 +10430,16 @@ SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, } return DAG.getMergeValues(RetOps, dl); } + + case Intrinsic::ppc_unpack_longdouble: { + auto *Idx = dyn_cast(Op.getOperand(2)); + assert(Idx && (Idx->getSExtValue() == 0 || Idx->getSExtValue() == 1) && + "Argument of long double unpack must be 0 or 1!"); + return DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::f64, Op.getOperand(1), + DAG.getConstant(!!(Idx->getSExtValue()), dl, + Idx->getValueType(0))); + } + case Intrinsic::ppc_compare_exp_lt: case Intrinsic::ppc_compare_exp_gt: case Intrinsic::ppc_compare_exp_eq: @@ -10406,6 +10484,17 @@ SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, DAG.getTargetConstant(PPC::PRED_EQ, dl, MVT::i32)}), 0); } + case Intrinsic::ppc_convert_f128_to_ppcf128: + case Intrinsic::ppc_convert_ppcf128_to_f128: { + RTLIB::Libcall LC = IntrinsicID == Intrinsic::ppc_convert_ppcf128_to_f128 + ? RTLIB::CONVERT_PPCF128_F128 + : RTLIB::CONVERT_F128_PPCF128; + MakeLibCallOptions CallOptions; + std::pair Result = + makeLibCall(DAG, LC, Op.getValueType(), Op.getOperand(1), CallOptions, + dl, SDValue()); + return Result.first; + } } // If this is a lowered altivec predicate compare, CompareOpc is set to the @@ -11043,6 +11132,18 @@ void PPCTargetLowering::ReplaceNodeResults(SDNode *N, Results.push_back(NewInt.getValue(1)); break; } + case ISD::INTRINSIC_WO_CHAIN: { + switch (cast(N->getOperand(0))->getZExtValue()) { + case Intrinsic::ppc_pack_longdouble: + Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::ppcf128, + N->getOperand(2), N->getOperand(1))); + break; + case Intrinsic::ppc_convert_f128_to_ppcf128: + Results.push_back(LowerINTRINSIC_WO_CHAIN(SDValue(N, 0), DAG)); + break; + } + break; + } case ISD::VAARG: { if (!Subtarget.isSVR4ABI() || Subtarget.isPPC64()) return; diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h index f557f4031667e..450bc48ec1439 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.h +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h @@ -559,6 +559,14 @@ namespace llvm { /// instructions such as LXVDSX, LXVWSX. LD_SPLAT, + /// VSRC, CHAIN = ZEXT_LD_SPLAT, CHAIN, Ptr - a splatting load memory + /// that zero-extends. + ZEXT_LD_SPLAT, + + /// VSRC, CHAIN = SEXT_LD_SPLAT, CHAIN, Ptr - a splatting load memory + /// that sign-extends. + SEXT_LD_SPLAT, + /// CHAIN = STXVD2X CHAIN, VSRC, Ptr - Occurs only for little endian. /// Maps directly to an stxvd2x instruction that will be preceded by /// an xxswapd. diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/llvm/lib/Target/PowerPC/PPCInstrInfo.td index 22f64641ce28c..3aaf5c389c8b3 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.td +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.td @@ -3108,14 +3108,14 @@ def MTFSB1 : XForm_43<63, 38, (outs), (ins u5imm:$FM), PPC970_DGroup_Single, PPC970_Unit_FPU; } -let Defs = [RM] in { +let Defs = [RM], hasSideEffects = 1 in { let isCodeGenOnly = 1 in def MTFSFb : XFLForm<63, 711, (outs), (ins i32imm:$FM, f8rc:$rT), "mtfsf $FM, $rT", IIC_IntMTFSB0, [(int_ppc_mtfsf timm:$FM, f64:$rT)]>, PPC970_DGroup_Single, PPC970_Unit_FPU; } -let Uses = [RM] in { +let Uses = [RM], hasSideEffects = 1 in { def MFFS : XForm_42<63, 583, (outs f8rc:$rT), (ins), "mffs $rT", IIC_IntMFFS, [(set f64:$rT, (PPCmffs))]>, @@ -4503,7 +4503,7 @@ def MCRFS : XLForm_3<63, 64, (outs crrc:$BF), (ins crrc:$BFA), // All MTFSF variants may change the rounding mode so conservatively set it // as an implicit def for all of them. let Predicates = [HasFPU] in { -let Defs = [RM] in { +let Defs = [RM], hasSideEffects = 1 in { let isCodeGenOnly = 1, Pattern = [(int_ppc_mtfsfi timm:$BF, timm:$U)], W = 0 in def MTFSFIb : XLForm_4<63, 134, (outs), (ins u3imm:$BF, u4imm:$U), diff --git a/llvm/lib/Target/PowerPC/PPCInstrVSX.td b/llvm/lib/Target/PowerPC/PPCInstrVSX.td index b00e58010b06b..506c7fa1bfd04 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrVSX.td +++ b/llvm/lib/Target/PowerPC/PPCInstrVSX.td @@ -138,6 +138,10 @@ def PPCldvsxlh : SDNode<"PPCISD::LD_VSX_LH", SDT_PPCldvsxlh, [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; def PPCldsplat : SDNode<"PPCISD::LD_SPLAT", SDT_PPCldsplat, [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; +def PPCzextldsplat : SDNode<"PPCISD::ZEXT_LD_SPLAT", SDT_PPCldsplat, + [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; +def PPCsextldsplat : SDNode<"PPCISD::SEXT_LD_SPLAT", SDT_PPCldsplat, + [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; def PPCSToV : SDNode<"PPCISD::SCALAR_TO_VECTOR_PERMUTED", SDTypeProfile<1, 1, []>, []>; @@ -1062,6 +1066,14 @@ let hasSideEffects = 0 in { [(set v2i64:$XT, (PPCxxpermdi v2i64:$XA, v2i64:$XB, imm32SExt16:$DM))]>; let isCodeGenOnly = 1 in + // Note that the input register class for `$XA` of XXPERMDIs is `vsfrc` which + // is not the same with the input register class(`vsrc`) of XXPERMDI instruction. + // We did this on purpose because: + // 1: The input is primarily for loads that load a partial vector(LFIWZX, + // etc.), no need for SUBREG_TO_REG. + // 2: With `vsfrc` register class, in the final assembly, float registers + // like `f0` are used instead of vector scalar register like `vs0`. This + // helps readability. def XXPERMDIs : XX3Form_2s<60, 10, (outs vsrc:$XT), (ins vsfrc:$XA, u2imm:$DM), "xxpermdi $XT, $XA, $XA, $DM", IIC_VecPerm, []>; def XXSEL : XX4Form<60, 3, @@ -2827,10 +2839,20 @@ def : Pat<(v4f32 (build_vector (f32 (fpround f64:$A)), (f32 (fpround f64:$A)), def : Pat<(v4f32 (build_vector f32:$A, f32:$A, f32:$A, f32:$A)), (v4f32 (XXSPLTW (v4f32 (XSCVDPSPN $A)), 0))>; + +// Splat loads. def : Pat<(v2f64 (PPCldsplat ForceXForm:$A)), (v2f64 (LXVDSX ForceXForm:$A))>; +def : Pat<(v4f32 (PPCldsplat ForceXForm:$A)), + (v4f32 (XXSPLTW (SUBREG_TO_REG (i64 1), (LFIWZX ForceXForm:$A), sub_64), 1))>; def : Pat<(v2i64 (PPCldsplat ForceXForm:$A)), (v2i64 (LXVDSX ForceXForm:$A))>; +def : Pat<(v4i32 (PPCldsplat ForceXForm:$A)), + (v4i32 (XXSPLTW (SUBREG_TO_REG (i64 1), (LFIWZX ForceXForm:$A), sub_64), 1))>; +def : Pat<(v2i64 (PPCzextldsplat ForceXForm:$A)), + (v2i64 (XXPERMDIs (LFIWZX ForceXForm:$A), 0))>; +def : Pat<(v2i64 (PPCsextldsplat ForceXForm:$A)), + (v2i64 (XXPERMDIs (LFIWAX ForceXForm:$A), 0))>; // Build vectors of floating point converted to i64. def : Pat<(v2i64 (build_vector FltToLong.A, FltToLong.A)), @@ -3540,6 +3562,12 @@ def : Pat<(v16i8 (PPCmtvsrz i32:$A)), def : Pat<(v4i32 (build_vector immSExt5NonZero:$A, immSExt5NonZero:$A, immSExt5NonZero:$A, immSExt5NonZero:$A)), (v4i32 (VSPLTISW imm:$A))>; + +// Splat loads. +def : Pat<(v8i16 (PPCldsplat ForceXForm:$A)), + (v8i16 (VSPLTHs 3, (MTVSRWZ (LHZX ForceXForm:$A))))>; +def : Pat<(v16i8 (PPCldsplat ForceXForm:$A)), + (v16i8 (VSPLTBs 7, (MTVSRWZ (LBZX ForceXForm:$A))))>; } // HasVSX, HasDirectMove // Big endian VSX subtarget with direct moves. @@ -4087,6 +4115,10 @@ def : Pat<(v4f32 (PPCldsplat ForceXForm:$A)), (v4f32 (LXVWSX ForceXForm:$A))>; def : Pat<(v4i32 (PPCldsplat ForceXForm:$A)), (v4i32 (LXVWSX ForceXForm:$A))>; +def : Pat<(v8i16 (PPCldsplat ForceXForm:$A)), + (v8i16 (VSPLTHs 3, (LXSIHZX ForceXForm:$A)))>; +def : Pat<(v16i8 (PPCldsplat ForceXForm:$A)), + (v16i8 (VSPLTBs 7, (LXSIBZX ForceXForm:$A)))>; } // HasVSX, HasP9Vector // Any Power9 VSX subtarget with equivalent length but better Power10 VSX diff --git a/llvm/lib/Target/PowerPC/PPCLoopInstrFormPrep.cpp b/llvm/lib/Target/PowerPC/PPCLoopInstrFormPrep.cpp index 9641a9af5063d..31b95cda5f61b 100644 --- a/llvm/lib/Target/PowerPC/PPCLoopInstrFormPrep.cpp +++ b/llvm/lib/Target/PowerPC/PPCLoopInstrFormPrep.cpp @@ -115,17 +115,18 @@ using namespace llvm; -static cl::opt MaxVarsPrep("ppc-formprep-max-vars", - cl::Hidden, cl::init(24), - cl::desc("Potential common base number threshold per function for PPC loop " - "prep")); +static cl::opt + MaxVarsPrep("ppc-formprep-max-vars", cl::Hidden, cl::init(24), + cl::ZeroOrMore, + cl::desc("Potential common base number threshold per function " + "for PPC loop prep")); static cl::opt PreferUpdateForm("ppc-formprep-prefer-update", cl::init(true), cl::Hidden, cl::desc("prefer update form when ds form is also a update form")); static cl::opt EnableChainCommoning( - "ppc-formprep-chain-commoning", cl::init(true), cl::Hidden, + "ppc-formprep-chain-commoning", cl::init(false), cl::Hidden, cl::desc("Enable chain commoning in PPC loop prepare pass.")); // Sum of following 3 per loop thresholds for all loops can not be larger diff --git a/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp b/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp index 4bbb6ed85a6ce..b9311ba080655 100644 --- a/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp +++ b/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp @@ -322,8 +322,7 @@ static void convertUnprimedAccPHIs(const PPCInstrInfo *TII, SmallVectorImpl &PHIs, Register Dst) { DenseMap ChangedPHIMap; - for (auto It = PHIs.rbegin(), End = PHIs.rend(); It != End; ++It) { - MachineInstr *PHI = *It; + for (MachineInstr *PHI : llvm::reverse(PHIs)) { SmallVector, 4> PHIOps; // We check if the current PHI node can be changed by looking at its // operands. If all the operands are either copies from primed @@ -603,14 +602,24 @@ bool PPCMIPeephole::simplifyCode(void) { ToErase = &MI; Simplified = true; } - } else if ((Immed == 0 || Immed == 3) && DefOpc == PPC::XXPERMDIs && + } else if ((Immed == 0 || Immed == 3 || Immed == 2) && + DefOpc == PPC::XXPERMDIs && (DefMI->getOperand(2).getImm() == 0 || DefMI->getOperand(2).getImm() == 3)) { + ToErase = &MI; + Simplified = true; + // Swap of a splat, convert to copy. + if (Immed == 2) { + LLVM_DEBUG(dbgs() << "Optimizing swap(splat) => copy(splat): "); + LLVM_DEBUG(MI.dump()); + BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY), + MI.getOperand(0).getReg()) + .add(MI.getOperand(1)); + break; + } // Splat fed by another splat - switch the output of the first // and remove the second. DefMI->getOperand(0).setReg(MI.getOperand(0).getReg()); - ToErase = &MI; - Simplified = true; LLVM_DEBUG(dbgs() << "Removing redundant splat: "); LLVM_DEBUG(MI.dump()); } diff --git a/llvm/lib/Target/PowerPC/PPCMacroFusion.cpp b/llvm/lib/Target/PowerPC/PPCMacroFusion.cpp index d12c6d9cd406c..bdff5109c1e13 100644 --- a/llvm/lib/Target/PowerPC/PPCMacroFusion.cpp +++ b/llvm/lib/Target/PowerPC/PPCMacroFusion.cpp @@ -75,6 +75,19 @@ static bool matchingRegOps(const MachineInstr &FirstMI, return Op1.getReg() == Op2.getReg(); } +static bool matchingImmOps(const MachineInstr &MI, + int MIOpIndex, + int64_t Expect, + unsigned ExtendFrom = 64) { + const MachineOperand &Op = MI.getOperand(MIOpIndex); + if (!Op.isImm()) + return false; + int64_t Imm = Op.getImm(); + if (ExtendFrom < 64) + Imm = SignExtend64(Imm, ExtendFrom); + return Imm == Expect; +} + // Return true if the FirstMI meets the constraints of SecondMI according to // fusion specification. static bool checkOpConstraints(FusionFeature::FusionKind Kd, @@ -116,7 +129,7 @@ static bool checkOpConstraints(FusionFeature::FusionKind Kd, if (((Imm & 0xFFF0) != 0) && ((Imm & 0xFFF0) != 0xFFF0)) return false; - // If si = 1111111111110000 and the msb of the d/ds field of the load equals + // If si = 1111111111110000 and the msb of the d/ds field of the load equals // 1, then fusion does not occur. if ((Imm & 0xFFF0) == 0xFFF0) { const MachineOperand &D = SecondMI.getOperand(1); @@ -132,6 +145,10 @@ static bool checkOpConstraints(FusionFeature::FusionKind Kd, } return true; } + + case FusionFeature::FK_SldiAdd: + return (matchingImmOps(FirstMI, 2, 3) && matchingImmOps(FirstMI, 3, 60)) || + (matchingImmOps(FirstMI, 2, 6) && matchingImmOps(FirstMI, 3, 57)); } llvm_unreachable("All the cases should have been handled"); diff --git a/llvm/lib/Target/PowerPC/PPCMacroFusion.def b/llvm/lib/Target/PowerPC/PPCMacroFusion.def index c7e4e7c22e0a6..469a24800423a 100644 --- a/llvm/lib/Target/PowerPC/PPCMacroFusion.def +++ b/llvm/lib/Target/PowerPC/PPCMacroFusion.def @@ -41,5 +41,42 @@ FUSION_FEATURE(AddisLoad, hasAddisLoadFusion, 2, \ FUSION_OP_SET(ADDIS, ADDIS8, ADDIStocHA8), \ FUSION_OP_SET(LD, LBZ, LBZ8, LHZ, LHZ8, LWZ, LWZ8)) +// Power10 User Manual Section 19.1.5.4, Fusion +// {add, mulld} - add +FUSION_FEATURE(ArithAdd, hasArithAddFusion, -1, + FUSION_OP_SET(ADD4, ADD8, MULLD), FUSION_OP_SET(ADD4, ADD8)) + +// {add, subf} - {and, nand, nor, or} +FUSION_FEATURE(ArithLogical, hasAddLogicalFusion, -1, + FUSION_OP_SET(ADD4, ADD8, SUBF, SUBF8), + FUSION_OP_SET(AND, AND8, OR, OR8, NAND, NAND8, NOR, NOR8)) + +// {and, andc, eqv, nand, nor, or, orc, xor} - {add, subf} +FUSION_FEATURE(LogicalArith, hasLogicalAddFusion, -1, + FUSION_OP_SET(AND, ANDC, EQV, NAND, NOR, OR, ORC, XOR, AND8, + ANDC8, EQV8, NAND8, NOR8, OR8, ORC8, XOR8), + FUSION_OP_SET(ADD4, ADD8, SUBF, SUBF8)) + +// Either of {and, andc, eqv, nand, nor, or, orc, xor} +FUSION_FEATURE(Logical, hasLogicalFusion, -1, + FUSION_OP_SET(AND, ANDC, EQV, NAND, NOR, OR, ORC, XOR, AND8, + ANDC8, EQV8, NAND8, NOR8, OR8, ORC8, XOR8), + FUSION_OP_SET(AND, ANDC, EQV, NAND, NOR, OR, ORC, XOR, AND8, + ANDC8, EQV8, NAND8, NOR8, OR8, ORC8, XOR8)) + +// vaddudm - vaddudm +FUSION_FEATURE(VecAdd, hasArithAddFusion, -1, FUSION_OP_SET(VADDUDM), + FUSION_OP_SET(VADDUDM)) + +// Either of {vand, vandc, veqv, vnand, vnor, vor, vorc, vxor} +FUSION_FEATURE(VecLogical, hasLogicalFusion, -1, + FUSION_OP_SET(VAND, VANDC, VEQV, VNAND, VNOR, VOR, VORC, VXOR), + FUSION_OP_SET(VAND, VANDC, VEQV, VNAND, VNOR, VOR, VORC, VXOR)) + +// sldi rx, ra, {3, 6} - {add, subf} +// sldi rx, ra n is alias of rldicr rx, ra, n, 63-n +FUSION_FEATURE(SldiAdd, hasArithAddFusion, -1, FUSION_OP_SET(RLDICR, RLDICR_32), + FUSION_OP_SET(ADD4, ADD8, SUBF, SUBF8)) + #undef FUSION_FEATURE #undef FUSION_OP_SET diff --git a/llvm/lib/Target/PowerPC/PPCSubtarget.cpp b/llvm/lib/Target/PowerPC/PPCSubtarget.cpp index b9684f61795a6..dfc29dbb10f19 100644 --- a/llvm/lib/Target/PowerPC/PPCSubtarget.cpp +++ b/llvm/lib/Target/PowerPC/PPCSubtarget.cpp @@ -127,6 +127,10 @@ void PPCSubtarget::initializeEnvironment() { HasStoreFusion = false; HasAddiLoadFusion = false; HasAddisLoadFusion = false; + HasArithAddFusion = false; + HasAddLogicalFusion = false; + HasLogicalAddFusion = false; + HasLogicalFusion = false; IsISA2_06 = false; IsISA2_07 = false; IsISA3_0 = false; diff --git a/llvm/lib/Target/PowerPC/PPCSubtarget.h b/llvm/lib/Target/PowerPC/PPCSubtarget.h index e2502cae7681f..783ea121ccb83 100644 --- a/llvm/lib/Target/PowerPC/PPCSubtarget.h +++ b/llvm/lib/Target/PowerPC/PPCSubtarget.h @@ -147,6 +147,10 @@ class PPCSubtarget : public PPCGenSubtargetInfo { bool HasStoreFusion; bool HasAddiLoadFusion; bool HasAddisLoadFusion; + bool HasArithAddFusion; + bool HasAddLogicalFusion; + bool HasLogicalAddFusion; + bool HasLogicalFusion; bool IsISA2_06; bool IsISA2_07; bool IsISA3_0; @@ -332,6 +336,10 @@ class PPCSubtarget : public PPCGenSubtargetInfo { bool hasStoreFusion() const { return HasStoreFusion; } bool hasAddiLoadFusion() const { return HasAddiLoadFusion; } bool hasAddisLoadFusion() const { return HasAddisLoadFusion; } + bool hasArithAddFusion() const { return HasArithAddFusion; } + bool hasAddLogicalFusion() const { return HasAddLogicalFusion; } + bool hasLogicalAddFusion() const { return HasLogicalAddFusion; } + bool hasLogicalFusion() const { return HasLogicalFusion; } bool needsSwapsForVSXMemOps() const { return hasVSX() && isLittleEndian() && !hasP9Vector(); } diff --git a/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp b/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp index 3186d197931d0..fbd487fbcfd50 100644 --- a/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp +++ b/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp @@ -208,11 +208,9 @@ namespace { bool Changed = false; - for (MachineFunction::iterator I = MF.begin(); I != MF.end();) { - MachineBasicBlock &B = *I++; + for (MachineBasicBlock &B : llvm::make_early_inc_range(MF)) if (processBlock(B)) Changed = true; - } return Changed; } diff --git a/llvm/lib/Target/PowerPC/PPCTOCRegDeps.cpp b/llvm/lib/Target/PowerPC/PPCTOCRegDeps.cpp index 3811f88e86b10..8120975c4fb25 100644 --- a/llvm/lib/Target/PowerPC/PPCTOCRegDeps.cpp +++ b/llvm/lib/Target/PowerPC/PPCTOCRegDeps.cpp @@ -131,11 +131,9 @@ namespace { bool runOnMachineFunction(MachineFunction &MF) override { bool Changed = false; - for (MachineFunction::iterator I = MF.begin(); I != MF.end();) { - MachineBasicBlock &B = *I++; + for (MachineBasicBlock &B : llvm::make_early_inc_range(MF)) if (processBlock(B)) Changed = true; - } return Changed; } diff --git a/llvm/lib/Target/PowerPC/PPCVSXCopy.cpp b/llvm/lib/Target/PowerPC/PPCVSXCopy.cpp index fd9bf96d38eba..7272e6edefc5e 100644 --- a/llvm/lib/Target/PowerPC/PPCVSXCopy.cpp +++ b/llvm/lib/Target/PowerPC/PPCVSXCopy.cpp @@ -148,11 +148,9 @@ namespace { bool Changed = false; - for (MachineFunction::iterator I = MF.begin(); I != MF.end();) { - MachineBasicBlock &B = *I++; + for (MachineBasicBlock &B : llvm::make_early_inc_range(MF)) if (processBlock(B)) Changed = true; - } return Changed; } @@ -169,4 +167,3 @@ INITIALIZE_PASS(PPCVSXCopy, DEBUG_TYPE, char PPCVSXCopy::ID = 0; FunctionPass* llvm::createPPCVSXCopyPass() { return new PPCVSXCopy(); } - diff --git a/llvm/lib/Target/PowerPC/PPCVSXFMAMutate.cpp b/llvm/lib/Target/PowerPC/PPCVSXFMAMutate.cpp index de39c9c5d1f1b..0be35adc35c72 100644 --- a/llvm/lib/Target/PowerPC/PPCVSXFMAMutate.cpp +++ b/llvm/lib/Target/PowerPC/PPCVSXFMAMutate.cpp @@ -361,11 +361,9 @@ namespace { if (DisableVSXFMAMutate) return Changed; - for (MachineFunction::iterator I = MF.begin(); I != MF.end();) { - MachineBasicBlock &B = *I++; + for (MachineBasicBlock &B : llvm::make_early_inc_range(MF)) if (processBlock(B)) Changed = true; - } return Changed; } diff --git a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp index 9a7a2f7551272..f00813f1301ab 100644 --- a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp +++ b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp @@ -2377,7 +2377,7 @@ void RISCVAsmParser::emitVMSGE(MCInst &Inst, unsigned Opcode, SMLoc IDLoc, // masked va >= x, vd == v0 // // pseudoinstruction: vmsge{u}.vx vd, va, x, v0.t, vt - // expansion: vmslt{u}.vx vt, va, x; vmandnot.mm vd, vd, vt + // expansion: vmslt{u}.vx vt, va, x; vmandn.mm vd, vd, vt assert(Inst.getOperand(0).getReg() == RISCV::V0 && "The destination register should be V0."); assert(Inst.getOperand(1).getReg() != RISCV::V0 && @@ -2387,7 +2387,7 @@ void RISCVAsmParser::emitVMSGE(MCInst &Inst, unsigned Opcode, SMLoc IDLoc, .addOperand(Inst.getOperand(2)) .addOperand(Inst.getOperand(3)) .addOperand(Inst.getOperand(4))); - emitToStreamer(Out, MCInstBuilder(RISCV::VMANDNOT_MM) + emitToStreamer(Out, MCInstBuilder(RISCV::VMANDN_MM) .addOperand(Inst.getOperand(0)) .addOperand(Inst.getOperand(0)) .addOperand(Inst.getOperand(1))); @@ -2395,7 +2395,7 @@ void RISCVAsmParser::emitVMSGE(MCInst &Inst, unsigned Opcode, SMLoc IDLoc, // masked va >= x, any vd // // pseudoinstruction: vmsge{u}.vx vd, va, x, v0.t, vt - // expansion: vmslt{u}.vx vt, va, x; vmandnot.mm vt, v0, vt; vmandnot.mm vd, + // expansion: vmslt{u}.vx vt, va, x; vmandn.mm vt, v0, vt; vmandn.mm vd, // vd, v0; vmor.mm vd, vt, vd assert(Inst.getOperand(1).getReg() != RISCV::V0 && "The temporary vector register should not be V0."); @@ -2404,11 +2404,11 @@ void RISCVAsmParser::emitVMSGE(MCInst &Inst, unsigned Opcode, SMLoc IDLoc, .addOperand(Inst.getOperand(2)) .addOperand(Inst.getOperand(3)) .addReg(RISCV::NoRegister)); - emitToStreamer(Out, MCInstBuilder(RISCV::VMANDNOT_MM) + emitToStreamer(Out, MCInstBuilder(RISCV::VMANDN_MM) .addOperand(Inst.getOperand(1)) .addReg(RISCV::V0) .addOperand(Inst.getOperand(1))); - emitToStreamer(Out, MCInstBuilder(RISCV::VMANDNOT_MM) + emitToStreamer(Out, MCInstBuilder(RISCV::VMANDN_MM) .addOperand(Inst.getOperand(0)) .addOperand(Inst.getOperand(0)) .addReg(RISCV::V0)); diff --git a/llvm/lib/Target/RISCV/RISCV.td b/llvm/lib/Target/RISCV/RISCV.td index 48dbcfee886c2..772a4f8ecd535 100644 --- a/llvm/lib/Target/RISCV/RISCV.td +++ b/llvm/lib/Target/RISCV/RISCV.td @@ -41,10 +41,18 @@ def HasStdExtD : Predicate<"Subtarget->hasStdExtD()">, AssemblerPredicate<(all_of FeatureStdExtD), "'D' (Double-Precision Floating-Point)">; +def FeatureStdExtZfhmin + : SubtargetFeature<"experimental-zfhmin", "HasStdExtZfhmin", "true", + "'Zfhmin' (Half-Precision Floating-Point Minimal)", + [FeatureStdExtF]>; +def HasStdExtZfhmin : Predicate<"Subtarget->hasStdExtZfhmin()">, + AssemblerPredicate<(all_of FeatureStdExtZfhmin), + "'Zfhmin' (Half-Precision Floating-Point Minimal)">; + def FeatureStdExtZfh : SubtargetFeature<"experimental-zfh", "HasStdExtZfh", "true", "'Zfh' (Half-Precision Floating-Point)", - [FeatureStdExtF]>; + [FeatureStdExtZfhmin, FeatureStdExtF]>; def HasStdExtZfh : Predicate<"Subtarget->hasStdExtZfh()">, AssemblerPredicate<(all_of FeatureStdExtZfh), "'Zfh' (Half-Precision Floating-Point)">; diff --git a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp index 74f56bf78a1c8..595c3cdfbb1d5 100644 --- a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp @@ -1046,7 +1046,8 @@ bool RISCVFrameLowering::spillCalleeSavedRegisters( // Insert the spill to the stack frame. Register Reg = CS.getReg(); const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); - TII.storeRegToStackSlot(MBB, MI, Reg, true, CS.getFrameIdx(), RC, TRI); + TII.storeRegToStackSlot(MBB, MI, Reg, !MBB.isLiveIn(Reg), CS.getFrameIdx(), + RC, TRI); } return true; diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp index 4a1d5770eb88e..66a34d73dd370 100644 --- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp @@ -808,7 +808,7 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) { } bool IsUnsigned = IntNo == Intrinsic::riscv_vmsgeu_mask; MVT Src1VT = Src1.getSimpleValueType(); - unsigned VMSLTOpcode, VMSLTMaskOpcode, VMXOROpcode, VMANDNOTOpcode; + unsigned VMSLTOpcode, VMSLTMaskOpcode, VMXOROpcode, VMANDNOpcode; switch (RISCVTargetLowering::getLMUL(Src1VT)) { default: llvm_unreachable("Unexpected LMUL!"); @@ -861,31 +861,31 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) { llvm_unreachable("Unexpected LMUL!"); case RISCVII::VLMUL::LMUL_F8: VMXOROpcode = RISCV::PseudoVMXOR_MM_MF8; - VMANDNOTOpcode = RISCV::PseudoVMANDNOT_MM_MF8; + VMANDNOpcode = RISCV::PseudoVMANDN_MM_MF8; break; case RISCVII::VLMUL::LMUL_F4: VMXOROpcode = RISCV::PseudoVMXOR_MM_MF4; - VMANDNOTOpcode = RISCV::PseudoVMANDNOT_MM_MF4; + VMANDNOpcode = RISCV::PseudoVMANDN_MM_MF4; break; case RISCVII::VLMUL::LMUL_F2: VMXOROpcode = RISCV::PseudoVMXOR_MM_MF2; - VMANDNOTOpcode = RISCV::PseudoVMANDNOT_MM_MF2; + VMANDNOpcode = RISCV::PseudoVMANDN_MM_MF2; break; case RISCVII::VLMUL::LMUL_1: VMXOROpcode = RISCV::PseudoVMXOR_MM_M1; - VMANDNOTOpcode = RISCV::PseudoVMANDNOT_MM_M1; + VMANDNOpcode = RISCV::PseudoVMANDN_MM_M1; break; case RISCVII::VLMUL::LMUL_2: VMXOROpcode = RISCV::PseudoVMXOR_MM_M2; - VMANDNOTOpcode = RISCV::PseudoVMANDNOT_MM_M2; + VMANDNOpcode = RISCV::PseudoVMANDN_MM_M2; break; case RISCVII::VLMUL::LMUL_4: VMXOROpcode = RISCV::PseudoVMXOR_MM_M4; - VMANDNOTOpcode = RISCV::PseudoVMANDNOT_MM_M4; + VMANDNOpcode = RISCV::PseudoVMANDN_MM_M4; break; case RISCVII::VLMUL::LMUL_8: VMXOROpcode = RISCV::PseudoVMXOR_MM_M8; - VMANDNOTOpcode = RISCV::PseudoVMANDNOT_MM_M8; + VMANDNOpcode = RISCV::PseudoVMANDN_MM_M8; break; } SDValue SEW = CurDAG->getTargetConstant( @@ -896,13 +896,13 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) { SDValue MaskedOff = Node->getOperand(1); SDValue Mask = Node->getOperand(4); // If the MaskedOff value and the Mask are the same value use - // vmslt{u}.vx vt, va, x; vmandnot.mm vd, vd, vt + // vmslt{u}.vx vt, va, x; vmandn.mm vd, vd, vt // This avoids needing to copy v0 to vd before starting the next sequence. if (Mask == MaskedOff) { SDValue Cmp = SDValue( CurDAG->getMachineNode(VMSLTOpcode, DL, VT, {Src1, Src2, VL, SEW}), 0); - ReplaceNode(Node, CurDAG->getMachineNode(VMANDNOTOpcode, DL, VT, + ReplaceNode(Node, CurDAG->getMachineNode(VMANDNOpcode, DL, VT, {Mask, Cmp, VL, MaskSEW})); return; } diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 5e1acc2950783..9d8d11e8b9807 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -533,6 +533,10 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, setOperationAction(ISD::SPLAT_VECTOR, VT, Legal); setOperationAction(ISD::SPLAT_VECTOR_PARTS, VT, Custom); + // Vectors implement MULHS/MULHU. + setOperationAction(ISD::SMUL_LOHI, VT, Expand); + setOperationAction(ISD::UMUL_LOHI, VT, Expand); + setOperationAction(ISD::SMIN, VT, Legal); setOperationAction(ISD::SMAX, VT, Legal); setOperationAction(ISD::UMIN, VT, Legal); @@ -1166,7 +1170,7 @@ bool RISCVTargetLowering::shouldSinkOperands( bool RISCVTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT, bool ForCodeSize) const { - if (VT == MVT::f16 && !Subtarget.hasStdExtZfh()) + if (VT == MVT::f16 && !Subtarget.hasStdExtZfhmin()) return false; if (VT == MVT::f32 && !Subtarget.hasStdExtF()) return false; @@ -1186,9 +1190,9 @@ bool RISCVTargetLowering::hasBitPreservingFPLogic(EVT VT) const { MVT RISCVTargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const { - // Use f32 to pass f16 if it is legal and Zfh is not enabled. We might still - // end up using a GPR but that will be decided based on ABI. - if (VT == MVT::f16 && Subtarget.hasStdExtF() && !Subtarget.hasStdExtZfh()) + // Use f32 to pass f16 if it is legal and Zfhmin/Zfh is not enabled. + // We might still end up using a GPR but that will be decided based on ABI. + if (VT == MVT::f16 && Subtarget.hasStdExtF() && !Subtarget.hasStdExtZfhmin()) return MVT::f32; return TargetLowering::getRegisterTypeForCallingConv(Context, CC, VT); @@ -1197,9 +1201,9 @@ MVT RISCVTargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context, unsigned RISCVTargetLowering::getNumRegistersForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const { - // Use f32 to pass f16 if it is legal and Zfh is not enabled. We might still - // end up using a GPR but that will be decided based on ABI. - if (VT == MVT::f16 && Subtarget.hasStdExtF() && !Subtarget.hasStdExtZfh()) + // Use f32 to pass f16 if it is legal and Zfhmin/Zfh is not enabled. + // We might still end up using a GPR but that will be decided based on ABI. + if (VT == MVT::f16 && Subtarget.hasStdExtF() && !Subtarget.hasStdExtZfhmin()) return 1; return TargetLowering::getNumRegistersForCallingConv(Context, CC, VT); @@ -4189,26 +4193,26 @@ SDValue RISCVTargetLowering::lowerVectorMaskVecReduction(SDValue Op, llvm_unreachable("Unhandled reduction"); case ISD::VECREDUCE_AND: case ISD::VP_REDUCE_AND: { - // vpopc ~x == 0 + // vcpop ~x == 0 SDValue TrueMask = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL); Vec = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Vec, TrueMask, VL); - Vec = DAG.getNode(RISCVISD::VPOPC_VL, DL, XLenVT, Vec, Mask, VL); + Vec = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Vec, Mask, VL); CC = ISD::SETEQ; BaseOpc = ISD::AND; break; } case ISD::VECREDUCE_OR: case ISD::VP_REDUCE_OR: - // vpopc x != 0 - Vec = DAG.getNode(RISCVISD::VPOPC_VL, DL, XLenVT, Vec, Mask, VL); + // vcpop x != 0 + Vec = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Vec, Mask, VL); CC = ISD::SETNE; BaseOpc = ISD::OR; break; case ISD::VECREDUCE_XOR: case ISD::VP_REDUCE_XOR: { - // ((vpopc x) & 1) != 0 + // ((vcpop x) & 1) != 0 SDValue One = DAG.getConstant(1, DL, XLenVT); - Vec = DAG.getNode(RISCVISD::VPOPC_VL, DL, XLenVT, Vec, Mask, VL); + Vec = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Vec, Mask, VL); Vec = DAG.getNode(ISD::AND, DL, XLenVT, Vec, One); CC = ISD::SETNE; BaseOpc = ISD::XOR; @@ -4223,7 +4227,7 @@ SDValue RISCVTargetLowering::lowerVectorMaskVecReduction(SDValue Op, // Now include the start value in the operation. // Note that we must return the start value when no elements are operated - // upon. The vpopc instructions we've emitted in each case above will return + // upon. The vcpop instructions we've emitted in each case above will return // 0 for an inactive vector, and so we've already received the neutral value: // AND gives us (0 == 0) -> 1 and OR/XOR give us (0 != 0) -> 0. Therefore we // can simply include the start value. @@ -4871,24 +4875,38 @@ SDValue RISCVTargetLowering::lowerMaskedLoad(SDValue Op, PassThru = MLoad->getPassThru(); } + bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode()); + MVT XLenVT = Subtarget.getXLenVT(); MVT ContainerVT = VT; if (VT.isFixedLengthVector()) { ContainerVT = getContainerForFixedLengthVector(VT); - MVT MaskVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount()); - - Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget); PassThru = convertToScalableVector(ContainerVT, PassThru, DAG, Subtarget); + if (!IsUnmasked) { + MVT MaskVT = + MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount()); + Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget); + } } if (!VL) VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second; + unsigned IntID = + IsUnmasked ? Intrinsic::riscv_vle : Intrinsic::riscv_vle_mask; + SmallVector Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)}; + if (!IsUnmasked) + Ops.push_back(PassThru); + Ops.push_back(BasePtr); + if (!IsUnmasked) + Ops.push_back(Mask); + Ops.push_back(VL); + if (!IsUnmasked) + Ops.push_back(DAG.getTargetConstant(RISCVII::TAIL_AGNOSTIC, DL, XLenVT)); + SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other}); - SDValue IntID = DAG.getTargetConstant(Intrinsic::riscv_vle_mask, DL, XLenVT); - SDValue Policy = DAG.getTargetConstant(RISCVII::TAIL_AGNOSTIC, DL, XLenVT); - SDValue Ops[] = {Chain, IntID, PassThru, BasePtr, Mask, VL, Policy}; + SDValue Result = DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, MemVT, MMO); Chain = Result.getValue(1); @@ -6425,7 +6443,19 @@ static SDValue combineSelectAndUseCommutative(SDNode *N, SelectionDAG &DAG, // Transform (add (mul x, c0), c1) -> // (add (mul (add x, c1/c0), c0), c1%c0). -// if c1/c0 and c1%c0 are simm12, while c1 is not. +// if c1/c0 and c1%c0 are simm12, while c1 is not. A special corner case +// that should be excluded is when c0*(c1/c0) is simm12, which will lead +// to an infinite loop in DAGCombine if transformed. +// Or transform (add (mul x, c0), c1) -> +// (add (mul (add x, c1/c0+1), c0), c1%c0-c0), +// if c1/c0+1 and c1%c0-c0 are simm12, while c1 is not. A special corner +// case that should be excluded is when c0*(c1/c0+1) is simm12, which will +// lead to an infinite loop in DAGCombine if transformed. +// Or transform (add (mul x, c0), c1) -> +// (add (mul (add x, c1/c0-1), c0), c1%c0+c0), +// if c1/c0-1 and c1%c0+c0 are simm12, while c1 is not. A special corner +// case that should be excluded is when c0*(c1/c0-1) is simm12, which will +// lead to an infinite loop in DAGCombine if transformed. // Or transform (add (mul x, c0), c1) -> // (mul (add x, c1/c0), c0). // if c1%c0 is zero, and c1/c0 is simm12 while c1 is not. @@ -6446,35 +6476,37 @@ static SDValue transformAddImmMulImm(SDNode *N, SelectionDAG &DAG, return SDValue(); int64_t C0 = N0C->getSExtValue(); int64_t C1 = N1C->getSExtValue(); - if (C0 == -1 || C0 == 0 || C0 == 1 || (C1 / C0) == 0 || isInt<12>(C1) || - !isInt<12>(C1 % C0) || !isInt<12>(C1 / C0)) + int64_t CA, CB; + if (C0 == -1 || C0 == 0 || C0 == 1 || isInt<12>(C1)) return SDValue(); - // If C0 * (C1 / C0) is a 12-bit integer, this transform will be reversed. - if (isInt<12>(C0 * (C1 / C0))) + // Search for proper CA (non-zero) and CB that both are simm12. + if ((C1 / C0) != 0 && isInt<12>(C1 / C0) && isInt<12>(C1 % C0) && + !isInt<12>(C0 * (C1 / C0))) { + CA = C1 / C0; + CB = C1 % C0; + } else if ((C1 / C0 + 1) != 0 && isInt<12>(C1 / C0 + 1) && + isInt<12>(C1 % C0 - C0) && !isInt<12>(C0 * (C1 / C0 + 1))) { + CA = C1 / C0 + 1; + CB = C1 % C0 - C0; + } else if ((C1 / C0 - 1) != 0 && isInt<12>(C1 / C0 - 1) && + isInt<12>(C1 % C0 + C0) && !isInt<12>(C0 * (C1 / C0 - 1))) { + CA = C1 / C0 - 1; + CB = C1 % C0 + C0; + } else return SDValue(); // Build new nodes (add (mul (add x, c1/c0), c0), c1%c0). SDLoc DL(N); SDValue New0 = DAG.getNode(ISD::ADD, DL, VT, N0->getOperand(0), - DAG.getConstant(C1 / C0, DL, VT)); + DAG.getConstant(CA, DL, VT)); SDValue New1 = DAG.getNode(ISD::MUL, DL, VT, New0, DAG.getConstant(C0, DL, VT)); - if ((C1 % C0) == 0) - return New1; - return DAG.getNode(ISD::ADD, DL, VT, New1, DAG.getConstant(C1 % C0, DL, VT)); + return DAG.getNode(ISD::ADD, DL, VT, New1, DAG.getConstant(CB, DL, VT)); } static SDValue performADDCombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget) { - // Transform (add (mul x, c0), c1) -> - // (add (mul (add x, c1/c0), c0), c1%c0). - // if c1/c0 and c1%c0 are simm12, while c1 is not. - // Or transform (add (mul x, c0), c1) -> - // (mul (add x, c1/c0), c0). - // if c1%c0 is zero, and c1/c0 is simm12 while c1 is not. if (SDValue V = transformAddImmMulImm(N, DAG, Subtarget)) return V; - // Fold (add (shl x, c0), (shl y, c1)) -> - // (SLLI (SH*ADD x, y), c0), if c1-c0 equals to [1|2|3]. if (SDValue V = transformAddShlImm(N, DAG, Subtarget)) return V; // fold (add (select lhs, rhs, cc, 0, y), x) -> @@ -9259,7 +9291,7 @@ const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const { NODE_NAME_CASE(VRGATHEREI16_VV_VL) NODE_NAME_CASE(VSEXT_VL) NODE_NAME_CASE(VZEXT_VL) - NODE_NAME_CASE(VPOPC_VL) + NODE_NAME_CASE(VCPOP_VL) NODE_NAME_CASE(VLE_VL) NODE_NAME_CASE(VSE_VL) NODE_NAME_CASE(READ_CSR) diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h index dfec9166bc3b5..a30f904c1ad67 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.h +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h @@ -263,8 +263,8 @@ enum NodeType : unsigned { VSEXT_VL, VZEXT_VL, - // vpopc.m with additional mask and VL operands. - VPOPC_VL, + // vcpop.m with additional mask and VL operands. + VCPOP_VL, // Reads value of CSR. // The first operand is a chain pointer. The second specifies address of the diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoV.td b/llvm/lib/Target/RISCV/RISCVInstrInfoV.td index cbc344411a479..e3c8728c98ae0 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoV.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoV.td @@ -115,61 +115,68 @@ class VLFSched : Sched <[!cast("WriteVLDFF" # n), //===----------------------------------------------------------------------===// let hasSideEffects = 0, mayLoad = 1, mayStore = 0 in { -// load vd, (rs1) +// unit-stride load vd, (rs1), vm +class VUnitStrideLoad + : RVInstVLU<0b000, width.Value{3}, LUMOPUnitStride, width.Value{2-0}, + (outs VR:$vd), + (ins GPR:$rs1, VMaskOp:$vm), opcodestr, "$vd, (${rs1})$vm">; + +let vm = 1, RVVConstraint = NoConstraint in { +// unit-stride whole register load vlr.v vd, (rs1) +class VWholeLoad nf, RISCVWidth width, string opcodestr, RegisterClass VRC> + : RVInstVLU { + let Uses = []; +} + +// unit-stride mask load vd, (rs1) class VUnitStrideLoadMask : RVInstVLU<0b000, LSWidth8.Value{3}, LUMOPUnitStrideMask, LSWidth8.Value{2-0}, (outs VR:$vd), - (ins GPR:$rs1), opcodestr, "$vd, (${rs1})"> { - let vm = 1; - let RVVConstraint = NoConstraint; -} + (ins GPR:$rs1), opcodestr, "$vd, (${rs1})">; +} // vm = 1, RVVConstraint = NoConstraint -// load vd, (rs1), vm -class VUnitStrideLoad - : RVInstVLU<0b000, width.Value{3}, lumop, width.Value{2-0}, +// unit-stride fault-only-first load vd, (rs1), vm +class VUnitStrideLoadFF + : RVInstVLU<0b000, width.Value{3}, LUMOPUnitStrideFF, width.Value{2-0}, (outs VR:$vd), (ins GPR:$rs1, VMaskOp:$vm), opcodestr, "$vd, (${rs1})$vm">; -// load vd, (rs1), rs2, vm +// strided load vd, (rs1), rs2, vm class VStridedLoad : RVInstVLS<0b000, width.Value{3}, width.Value{2-0}, (outs VR:$vd), (ins GPR:$rs1, GPR:$rs2, VMaskOp:$vm), opcodestr, "$vd, (${rs1}), $rs2$vm">; -// load vd, (rs1), vs2, vm +// indexed load vd, (rs1), vs2, vm class VIndexedLoad : RVInstVLX<0b000, width.Value{3}, mop, width.Value{2-0}, (outs VR:$vd), (ins GPR:$rs1, VR:$vs2, VMaskOp:$vm), opcodestr, "$vd, (${rs1}), $vs2$vm">; -// vlr.v vd, (rs1) -class VWholeLoad nf, RISCVWidth width, string opcodestr, RegisterClass VRC> - : RVInstVLU { - let vm = 1; - let Uses = []; - let RVVConstraint = NoConstraint; -} +// unit-stride segment load vd, (rs1), vm +class VUnitStrideSegmentLoad nf, RISCVWidth width, string opcodestr> + : RVInstVLU; -// segment load vd, (rs1), vm -class VUnitStrideSegmentLoad nf, RISCVLSUMOP lumop, - RISCVWidth width, string opcodestr> - : RVInstVLU nf, RISCVWidth width, string opcodestr> + : RVInstVLU; -// segment load vd, (rs1), rs2, vm +// strided segment load vd, (rs1), rs2, vm class VStridedSegmentLoad nf, RISCVWidth width, string opcodestr> : RVInstVLS; -// segment load vd, (rs1), vs2, vm +// indexed segment load vd, (rs1), vs2, vm class VIndexedSegmentLoad nf, RISCVMOP mop, RISCVWidth width, string opcodestr> : RVInstVLX nf, RISCVMOP mop, RISCVWidth width, } // hasSideEffects = 0, mayLoad = 1, mayStore = 0 let hasSideEffects = 0, mayLoad = 0, mayStore = 1 in { -// store vd, vs3, (rs1) +// unit-stride store vd, vs3, (rs1), vm +class VUnitStrideStore + : RVInstVSU<0b000, width.Value{3}, SUMOPUnitStride, width.Value{2-0}, + (outs), (ins VR:$vs3, GPR:$rs1, VMaskOp:$vm), opcodestr, + "$vs3, (${rs1})$vm">; + +let vm = 1 in { +// vsr.v vd, (rs1) +class VWholeStore nf, string opcodestr, RegisterClass VRC> + : RVInstVSU { + let Uses = []; +} + +// unit-stride mask store vd, vs3, (rs1) class VUnitStrideStoreMask : RVInstVSU<0b000, LSWidth8.Value{3}, SUMOPUnitStrideMask, LSWidth8.Value{2-0}, (outs), (ins VR:$vs3, GPR:$rs1), opcodestr, - "$vs3, (${rs1})"> { - let vm = 1; -} - -// store vd, vs3, (rs1), vm -class VUnitStrideStore - : RVInstVSU<0b000, width.Value{3}, sumop, width.Value{2-0}, - (outs), (ins VR:$vs3, GPR:$rs1, VMaskOp:$vm), opcodestr, - "$vs3, (${rs1})$vm">; + "$vs3, (${rs1})">; +} // vm = 1 -// store vd, vs3, (rs1), rs2, vm +// strided store vd, vs3, (rs1), rs2, vm class VStridedStore : RVInstVSS<0b000, width.Value{3}, width.Value{2-0}, (outs), (ins VR:$vs3, GPR:$rs1, GPR:$rs2, VMaskOp:$vm), opcodestr, "$vs3, (${rs1}), $rs2$vm">; -// store vd, vs3, (rs1), vs2, vm +// indexed store vd, vs3, (rs1), vs2, vm class VIndexedStore : RVInstVSX<0b000, width.Value{3}, mop, width.Value{2-0}, (outs), (ins VR:$vs3, GPR:$rs1, VR:$vs2, VMaskOp:$vm), opcodestr, "$vs3, (${rs1}), $vs2$vm">; -// vsr.v vd, (rs1) -class VWholeStore nf, string opcodestr, RegisterClass VRC> - : RVInstVSU { - let vm = 1; - let Uses = []; -} - // segment store vd, vs3, (rs1), vm class VUnitStrideSegmentStore nf, RISCVWidth width, string opcodestr> : RVInstVSU { def _UNWD : VAMONoWd; } -multiclass VWholeLoad1 { - def E8_V : VWholeLoad<0, LSWidth8, opcodestr # "e8.v", VRC>, - Sched<[WriteVLD1R8, ReadVLDX]>; - def E16_V : VWholeLoad<0, LSWidth16, opcodestr # "e16.v", VRC>, - Sched<[WriteVLD1R16, ReadVLDX]>; - def E32_V : VWholeLoad<0, LSWidth32, opcodestr # "e32.v", VRC>, - Sched<[WriteVLD1R32, ReadVLDX]>; - def E64_V : VWholeLoad<0, LSWidth64, opcodestr # "e64.v", VRC>, - Sched<[WriteVLD1R64, ReadVLDX]>; -} - -multiclass VWholeLoad2 { - def E8_V : VWholeLoad<1, LSWidth8, opcodestr # "e8.v", VRC>, - Sched<[WriteVLD2R8, ReadVLDX]>; - def E16_V : VWholeLoad<1, LSWidth16, opcodestr # "e16.v", VRC>, - Sched<[WriteVLD2R16, ReadVLDX]>; - def E32_V : VWholeLoad<1, LSWidth32, opcodestr # "e32.v", VRC>, - Sched<[WriteVLD2R32, ReadVLDX]>; - def E64_V : VWholeLoad<1, LSWidth64, opcodestr # "e64.v", VRC>, - Sched<[WriteVLD2R64, ReadVLDX]>; -} - -multiclass VWholeLoad4 { - def E8_V : VWholeLoad<3, LSWidth8, opcodestr # "e8.v", VRC>, - Sched<[WriteVLD4R8, ReadVLDX]>; - def E16_V : VWholeLoad<3, LSWidth16, opcodestr # "e16.v", VRC>, - Sched<[WriteVLD4R16, ReadVLDX]>; - def E32_V : VWholeLoad<3, LSWidth32, opcodestr # "e32.v", VRC>, - Sched<[WriteVLD4R32, ReadVLDX]>; - def E64_V : VWholeLoad<3, LSWidth64, opcodestr # "e64.v", VRC>, - Sched<[WriteVLD1R64, ReadVLDX]>; -} - -multiclass VWholeLoad8 { - def E8_V : VWholeLoad<7, LSWidth8, opcodestr # "e8.v", VRC>, - Sched<[WriteVLD8R8, ReadVLDX]>; - def E16_V : VWholeLoad<7, LSWidth16, opcodestr # "e16.v", VRC>, - Sched<[WriteVLD8R16, ReadVLDX]>; - def E32_V : VWholeLoad<7, LSWidth32, opcodestr # "e32.v", VRC>, - Sched<[WriteVLD8R32, ReadVLDX]>; - def E64_V : VWholeLoad<7, LSWidth64, opcodestr # "e64.v", VRC>, - Sched<[WriteVLD8R64, ReadVLDX]>; +multiclass VWholeLoadN nf, string opcodestr, RegisterClass VRC> { + foreach l = [8, 16, 32, 64] in { + defvar w = !cast("LSWidth" # l); + defvar s = !cast("WriteVLD" # !add(nf, 1) # "R" # l); + + def E # l # _V : VWholeLoad, + Sched<[s, ReadVLDX]>; + } } //===----------------------------------------------------------------------===// @@ -840,22 +811,23 @@ def VSETVL : RVInstSetVL<(outs GPR:$rd), (ins GPR:$rs1, GPR:$rs2), } // hasSideEffects = 1, mayLoad = 0, mayStore = 0 // Vector Unit-Stride Instructions -def VLE8_V : VUnitStrideLoad, +def VLE8_V : VUnitStrideLoad, VLESched<8>; -def VLE16_V : VUnitStrideLoad, +def VLE16_V : VUnitStrideLoad, VLESched<16>; -def VLE32_V : VUnitStrideLoad, +def VLE32_V : VUnitStrideLoad, VLESched<32>; -def VLE64_V : VUnitStrideLoad, +def VLE64_V : VUnitStrideLoad, VLESched<64>; -def VLE8FF_V : VUnitStrideLoad, +// Vector Unit-Stride Fault-only-First Loads +def VLE8FF_V : VUnitStrideLoadFF, VLFSched<8>; -def VLE16FF_V : VUnitStrideLoad, +def VLE16FF_V : VUnitStrideLoadFF, VLFSched<16>; -def VLE32FF_V : VUnitStrideLoad, +def VLE32FF_V : VUnitStrideLoadFF, VLFSched<32>; -def VLE64FF_V : VUnitStrideLoad, +def VLE64FF_V : VUnitStrideLoadFF, VLFSched<64>; def VLM_V : VUnitStrideLoadMask<"vlm.v">, @@ -867,13 +839,13 @@ def : InstAlias<"vle1.v $vd, (${rs1})", def : InstAlias<"vse1.v $vs3, (${rs1})", (VSM_V VR:$vs3, GPR:$rs1), 0>; -def VSE8_V : VUnitStrideStore, +def VSE8_V : VUnitStrideStore, VSESched<8>; -def VSE16_V : VUnitStrideStore, +def VSE16_V : VUnitStrideStore, VSESched<16>; -def VSE32_V : VUnitStrideStore, +def VSE32_V : VUnitStrideStore, VSESched<32>; -def VSE64_V : VUnitStrideStore, +def VSE64_V : VUnitStrideStore, VSESched<64>; // Vector Strided Instructions @@ -914,10 +886,10 @@ def VSOXEI # n # _V : VSXSched; } -defm VL1R : VWholeLoad1<"vl1r", VR>; -defm VL2R : VWholeLoad2<"vl2r", VRM2>; -defm VL4R : VWholeLoad4<"vl4r", VRM4>; -defm VL8R : VWholeLoad8<"vl8r", VRM8>; +defm VL1R : VWholeLoadN<0, "vl1r", VR>; +defm VL2R : VWholeLoadN<1, "vl2r", VRM2>; +defm VL4R : VWholeLoadN<3, "vl4r", VRM4>; +defm VL8R : VWholeLoadN<7, "vl8r", VRM8>; def : InstAlias<"vl1r.v $vd, (${rs1})", (VL1RE8_V VR:$vd, GPR:$rs1)>; def : InstAlias<"vl2r.v $vd, (${rs1})", (VL2RE8_V VRM2:$vd, GPR:$rs1)>; @@ -1361,11 +1333,11 @@ let Predicates = [HasStdExtV] in { let RVVConstraint = NoConstraint in { defm VMAND_M : VMALU_MV_Mask<"vmand", 0b011001, "m">; defm VMNAND_M : VMALU_MV_Mask<"vmnand", 0b011101, "m">; -defm VMANDNOT_M : VMALU_MV_Mask<"vmandnot", 0b011000, "m">; +defm VMANDN_M : VMALU_MV_Mask<"vmandn", 0b011000, "m">; defm VMXOR_M : VMALU_MV_Mask<"vmxor", 0b011011, "m">; defm VMOR_M : VMALU_MV_Mask<"vmor", 0b011010, "m">; defm VMNOR_M : VMALU_MV_Mask<"vmnor", 0b011110, "m">; -defm VMORNOT_M : VMALU_MV_Mask<"vmornot", 0b011100, "m">; +defm VMORN_M : VMALU_MV_Mask<"vmorn", 0b011100, "m">; defm VMXNOR_M : VMALU_MV_Mask<"vmxnor", 0b011111, "m">; } @@ -1378,13 +1350,18 @@ def : InstAlias<"vmset.m $vd", def : InstAlias<"vmnot.m $vd, $vs", (VMNAND_MM VR:$vd, VR:$vs, VR:$vs)>; +def : InstAlias<"vmandnot.mm $vd, $vs2, $vs1", + (VMANDN_MM VR:$vd, VR:$vs2, VR:$vs1), 0>; +def : InstAlias<"vmornot.mm $vd, $vs2, $vs1", + (VMORN_MM VR:$vd, VR:$vs2, VR:$vs1), 0>; + let hasSideEffects = 0, mayLoad = 0, mayStore = 0, RVVConstraint = NoConstraint in { -// Vector mask population count vpopc -def VPOPC_M : RVInstV<0b010000, 0b10000, OPMVV, (outs GPR:$vd), +// Vector mask population count vcpop +def VCPOP_M : RVInstV<0b010000, 0b10000, OPMVV, (outs GPR:$vd), (ins VR:$vs2, VMaskOp:$vm), - "vpopc.m", "$vd, $vs2$vm">, + "vcpop.m", "$vd, $vs2$vm">, Sched<[WriteVMPopV, ReadVMPopV, ReadVMask]>; // vfirst find-first-set mask bit @@ -1395,6 +1372,9 @@ def VFIRST_M : RVInstV<0b010000, 0b10001, OPMVV, (outs GPR:$vd), } // hasSideEffects = 0, mayLoad = 0, mayStore = 0 +def : InstAlias<"vpopc.m $vd, $vs2$vm", + (VCPOP_M GPR:$vd, VR:$vs2, VMaskOp:$vm), 0>; + let Constraints = "@earlyclobber $vd", RVVConstraint = Iota in { // vmsbf.m set-before-first mask bit @@ -1493,15 +1473,15 @@ foreach n = [1, 2, 4, 8] in { let Predicates = [HasStdExtZvlsseg] in { foreach nf=2-8 in { - def VLSEG#nf#E8_V : VUnitStrideSegmentLoad; - def VLSEG#nf#E16_V : VUnitStrideSegmentLoad; - def VLSEG#nf#E32_V : VUnitStrideSegmentLoad; - def VLSEG#nf#E64_V : VUnitStrideSegmentLoad; - - def VLSEG#nf#E8FF_V : VUnitStrideSegmentLoad; - def VLSEG#nf#E16FF_V : VUnitStrideSegmentLoad; - def VLSEG#nf#E32FF_V : VUnitStrideSegmentLoad; - def VLSEG#nf#E64FF_V : VUnitStrideSegmentLoad; + def VLSEG#nf#E8_V : VUnitStrideSegmentLoad; + def VLSEG#nf#E16_V : VUnitStrideSegmentLoad; + def VLSEG#nf#E32_V : VUnitStrideSegmentLoad; + def VLSEG#nf#E64_V : VUnitStrideSegmentLoad; + + def VLSEG#nf#E8FF_V : VUnitStrideSegmentLoadFF; + def VLSEG#nf#E16FF_V : VUnitStrideSegmentLoadFF; + def VLSEG#nf#E32FF_V : VUnitStrideSegmentLoadFF; + def VLSEG#nf#E64FF_V : VUnitStrideSegmentLoadFF; def VSSEG#nf#E8_V : VUnitStrideSegmentStore; def VSSEG#nf#E16_V : VUnitStrideSegmentStore; diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td index a33e0275a9ad4..a82e333e6bab5 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td @@ -4007,11 +4007,11 @@ defm PseudoVFWREDOSUM : VPseudoReductionV_VS; defm PseudoVMAND: VPseudoBinaryM_MM; defm PseudoVMNAND: VPseudoBinaryM_MM; -defm PseudoVMANDNOT: VPseudoBinaryM_MM; +defm PseudoVMANDN: VPseudoBinaryM_MM; defm PseudoVMXOR: VPseudoBinaryM_MM; defm PseudoVMOR: VPseudoBinaryM_MM; defm PseudoVMNOR: VPseudoBinaryM_MM; -defm PseudoVMORNOT: VPseudoBinaryM_MM; +defm PseudoVMORN: VPseudoBinaryM_MM; defm PseudoVMXNOR: VPseudoBinaryM_MM; // Pseudo instructions @@ -4019,10 +4019,10 @@ defm PseudoVMCLR : VPseudoNullaryPseudoM<"VMXOR">; defm PseudoVMSET : VPseudoNullaryPseudoM<"VMXNOR">; //===----------------------------------------------------------------------===// -// 16.2. Vector mask population count vpopc +// 16.2. Vector mask population count vcpop //===----------------------------------------------------------------------===// -defm PseudoVPOPC: VPseudoUnaryS_M; +defm PseudoVCPOP: VPseudoUnaryS_M; //===----------------------------------------------------------------------===// // 16.3. vfirst find-first-set mask bit @@ -4676,11 +4676,11 @@ let Predicates = [HasVInstructions] in { //===----------------------------------------------------------------------===// defm : VPatBinaryM_MM<"int_riscv_vmand", "PseudoVMAND">; defm : VPatBinaryM_MM<"int_riscv_vmnand", "PseudoVMNAND">; -defm : VPatBinaryM_MM<"int_riscv_vmandnot", "PseudoVMANDNOT">; +defm : VPatBinaryM_MM<"int_riscv_vmandn", "PseudoVMANDN">; defm : VPatBinaryM_MM<"int_riscv_vmxor", "PseudoVMXOR">; defm : VPatBinaryM_MM<"int_riscv_vmor", "PseudoVMOR">; defm : VPatBinaryM_MM<"int_riscv_vmnor", "PseudoVMNOR">; -defm : VPatBinaryM_MM<"int_riscv_vmornot", "PseudoVMORNOT">; +defm : VPatBinaryM_MM<"int_riscv_vmorn", "PseudoVMORN">; defm : VPatBinaryM_MM<"int_riscv_vmxnor", "PseudoVMXNOR">; // pseudo instructions @@ -4688,9 +4688,9 @@ defm : VPatNullaryM<"int_riscv_vmclr", "PseudoVMCLR">; defm : VPatNullaryM<"int_riscv_vmset", "PseudoVMSET">; //===----------------------------------------------------------------------===// -// 16.2. Vector mask population count vpopc +// 16.2. Vector count population in mask vcpop.m //===----------------------------------------------------------------------===// -defm : VPatUnaryS_M<"int_riscv_vpopc", "PseudoVPOPC">; +defm : VPatUnaryS_M<"int_riscv_vcpop", "PseudoVCPOP">; //===----------------------------------------------------------------------===// // 16.3. vfirst find-first-set mask bit diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td index 9c61161c2dc32..711ad4335ecee 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td @@ -367,7 +367,7 @@ multiclass VPatNConvertFP2ISDNode_V { // Patterns. //===----------------------------------------------------------------------===// -let Predicates = [HasStdExtV] in { +let Predicates = [HasVInstructions] in { // 7.4. Vector Unit-Stride Instructions foreach vti = !listconcat(FractionalGroupIntegerVectors, @@ -561,10 +561,10 @@ foreach mti = AllMasks in { VR:$rs1, VR:$rs2, mti.AVL, mti.Log2SEW)>; def : Pat<(mti.Mask (and VR:$rs1, (rvv_vnot VR:$rs2))), - (!cast("PseudoVMANDNOT_MM_"#mti.LMul.MX) + (!cast("PseudoVMANDN_MM_"#mti.LMul.MX) VR:$rs1, VR:$rs2, mti.AVL, mti.Log2SEW)>; def : Pat<(mti.Mask (or VR:$rs1, (rvv_vnot VR:$rs2))), - (!cast("PseudoVMORNOT_MM_"#mti.LMul.MX) + (!cast("PseudoVMORN_MM_"#mti.LMul.MX) VR:$rs1, VR:$rs2, mti.AVL, mti.Log2SEW)>; // Handle rvv_vnot the same as the vmnot.m pseudoinstruction. @@ -573,9 +573,9 @@ foreach mti = AllMasks in { VR:$rs, VR:$rs, mti.AVL, mti.Log2SEW)>; } -} // Predicates = [HasStdExtV] +} // Predicates = [HasVInstructions] -let Predicates = [HasStdExtV, HasStdExtF] in { +let Predicates = [HasVInstructionsAnyF] in { // 14.2. Vector Single-Width Floating-Point Add/Subtract Instructions defm : VPatBinaryFPSDNode_VV_VF; @@ -757,13 +757,13 @@ foreach fvtiToFWti = AllWidenableFloatVectors in { (!cast("PseudoVFNCVT_F_F_W_"#fvti.LMul.MX) fwti.RegClass:$rs1, fvti.AVL, fvti.Log2SEW)>; } -} // Predicates = [HasStdExtV, HasStdExtF] +} // Predicates = [HasVInstructionsAnyF] //===----------------------------------------------------------------------===// // Vector Splats //===----------------------------------------------------------------------===// -let Predicates = [HasStdExtV] in { +let Predicates = [HasVInstructions] in { foreach vti = AllIntegerVectors in { def : Pat<(vti.Vector (SplatPat GPR:$rs1)), (!cast("PseudoVMV_V_X_" # vti.LMul.MX) @@ -772,9 +772,9 @@ foreach vti = AllIntegerVectors in { (!cast("PseudoVMV_V_I_" # vti.LMul.MX) simm5:$rs1, vti.AVL, vti.Log2SEW)>; } -} // Predicates = [HasStdExtV] +} // Predicates = [HasVInstructions] -let Predicates = [HasStdExtV, HasStdExtF] in { +let Predicates = [HasVInstructionsAnyF] in { foreach fvti = AllFloatVectors in { def : Pat<(fvti.Vector (splat_vector fvti.ScalarRegClass:$rs1)), (!cast("PseudoVFMV_V_"#fvti.ScalarSuffix#"_"#fvti.LMul.MX) @@ -785,12 +785,12 @@ foreach fvti = AllFloatVectors in { (!cast("PseudoVMV_V_I_"#fvti.LMul.MX) 0, fvti.AVL, fvti.Log2SEW)>; } -} // Predicates = [HasStdExtV, HasStdExtF] +} // Predicates = [HasVInstructionsAnyF] //===----------------------------------------------------------------------===// // Vector Element Extracts //===----------------------------------------------------------------------===// -let Predicates = [HasStdExtV, HasStdExtF] in +let Predicates = [HasVInstructionsAnyF] in foreach vti = AllFloatVectors in { defvar vmv_f_s_inst = !cast(!strconcat("PseudoVFMV_", vti.ScalarSuffix, diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td index 9656ccd36ed13..73b97e1c36759 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td @@ -199,7 +199,7 @@ def true_mask : PatLeaf<(riscv_vmset_vl (XLenVT srcvalue))>; def riscv_vmnot_vl : PatFrag<(ops node:$rs, node:$vl), (riscv_vmxor_vl node:$rs, true_mask, node:$vl)>; -def riscv_vpopc_vl : SDNode<"RISCVISD::VPOPC_VL", +def riscv_vcpop_vl : SDNode<"RISCVISD::VCPOP_VL", SDTypeProfile<1, 3, [SDTCisVT<0, XLenVT>, SDTCisVec<1>, SDTCisInt<1>, SDTCVecEltisVT<2, i1>, @@ -606,7 +606,7 @@ multiclass VPatReductionVL { // Patterns. //===----------------------------------------------------------------------===// -let Predicates = [HasStdExtV] in { +let Predicates = [HasVInstructions] in { // 7.4. Vector Unit-Stride Instructions foreach vti = AllVectors in { @@ -925,10 +925,10 @@ defm : VPatBinaryVL_VV_VX_VI; defm : VPatBinaryVL_VV_VX; defm : VPatBinaryVL_VV_VX; -} // Predicates = [HasStdExtV] +} // Predicates = [HasVInstructions] // 15.1. Vector Single-Width Integer Reduction Instructions -let Predicates = [HasStdExtV] in { +let Predicates = [HasVInstructions] in { defm : VPatReductionVL; defm : VPatReductionVL; defm : VPatReductionVL; @@ -937,17 +937,17 @@ defm : VPatReductionVL; defm : VPatReductionVL; defm : VPatReductionVL; defm : VPatReductionVL; -} // Predicates = [HasStdExtV] +} // Predicates = [HasVInstructions] // 15.3. Vector Single-Width Floating-Point Reduction Instructions -let Predicates = [HasStdExtV, HasStdExtF] in { +let Predicates = [HasVInstructionsAnyF] in { defm : VPatReductionVL; defm : VPatReductionVL; defm : VPatReductionVL; defm : VPatReductionVL; -} // Predicates = [HasStdExtV, HasStdExtF] +} // Predicates = [HasVInstructionsAnyF] -let Predicates = [HasStdExtV, HasStdExtF] in { +let Predicates = [HasVInstructionsAnyF] in { // 14.2. Vector Single-Width Floating-Point Add/Subtract Instructions defm : VPatBinaryFPVL_VV_VF; @@ -1209,9 +1209,9 @@ foreach fvti = AllFloatVectors in { } } -} // Predicates = [HasStdExtV, HasStdExtF] +} // Predicates = [HasVInstructionsAnyF] -let Predicates = [HasStdExtV] in { +let Predicates = [HasVInstructions] in { foreach mti = AllMasks in { // 16.1 Vector Mask-Register Logical Instructions @@ -1233,12 +1233,12 @@ foreach mti = AllMasks in { def : Pat<(mti.Mask (riscv_vmand_vl VR:$rs1, (riscv_vmnot_vl VR:$rs2, VLOpFrag), VLOpFrag)), - (!cast("PseudoVMANDNOT_MM_" # mti.LMul.MX) + (!cast("PseudoVMANDN_MM_" # mti.LMul.MX) VR:$rs1, VR:$rs2, GPR:$vl, mti.Log2SEW)>; def : Pat<(mti.Mask (riscv_vmor_vl VR:$rs1, (riscv_vmnot_vl VR:$rs2, VLOpFrag), VLOpFrag)), - (!cast("PseudoVMORNOT_MM_" # mti.LMul.MX) + (!cast("PseudoVMORN_MM_" # mti.LMul.MX) VR:$rs1, VR:$rs2, GPR:$vl, mti.Log2SEW)>; // XOR is associative so we need 2 patterns for VMXNOR. def : Pat<(mti.Mask (riscv_vmxor_vl (riscv_vmnot_vl VR:$rs1, @@ -1268,20 +1268,20 @@ foreach mti = AllMasks in { (!cast("PseudoVMNAND_MM_" # mti.LMul.MX) VR:$rs, VR:$rs, GPR:$vl, mti.Log2SEW)>; - // 16.2 Vector Mask Population Count vpopc - def : Pat<(XLenVT (riscv_vpopc_vl (mti.Mask VR:$rs2), (mti.Mask true_mask), + // 16.2 Vector count population in mask vcpop.m + def : Pat<(XLenVT (riscv_vcpop_vl (mti.Mask VR:$rs2), (mti.Mask true_mask), VLOpFrag)), - (!cast("PseudoVPOPC_M_" # mti.BX) + (!cast("PseudoVCPOP_M_" # mti.BX) VR:$rs2, GPR:$vl, mti.Log2SEW)>; - def : Pat<(XLenVT (riscv_vpopc_vl (mti.Mask VR:$rs2), (mti.Mask V0), + def : Pat<(XLenVT (riscv_vcpop_vl (mti.Mask VR:$rs2), (mti.Mask V0), VLOpFrag)), - (!cast("PseudoVPOPC_M_" # mti.BX # "_MASK") + (!cast("PseudoVCPOP_M_" # mti.BX # "_MASK") VR:$rs2, (mti.Mask V0), GPR:$vl, mti.Log2SEW)>; } -} // Predicates = [HasStdExtV] +} // Predicates = [HasVInstructions] -let Predicates = [HasStdExtV] in { +let Predicates = [HasVInstructions] in { // 17.1. Integer Scalar Move Instructions // 17.4. Vector Register Gather Instruction foreach vti = AllIntegerVectors in { @@ -1361,9 +1361,9 @@ foreach vti = AllIntegerVectors in { } } -} // Predicates = [HasStdExtV] +} // Predicates = [HasVInstructions] -let Predicates = [HasStdExtV, HasStdExtF] in { +let Predicates = [HasVInstructionsAnyF] in { // 17.2. Floating-Point Scalar Move Instructions foreach vti = AllFloatVectors in { @@ -1443,7 +1443,7 @@ foreach vti = AllFloatVectors in { } } -} // Predicates = [HasStdExtV, HasStdExtF] +} // Predicates = [HasVInstructionsAnyF] //===----------------------------------------------------------------------===// // Miscellaneous RISCVISD SDNodes @@ -1467,7 +1467,7 @@ def riscv_slide1up_vl : SDNode<"RISCVISD::VSLIDE1UP_VL", SDTRVVSlide1, []>; def riscv_slidedown_vl : SDNode<"RISCVISD::VSLIDEDOWN_VL", SDTRVVSlide, []>; def riscv_slide1down_vl : SDNode<"RISCVISD::VSLIDE1DOWN_VL", SDTRVVSlide1, []>; -let Predicates = [HasStdExtV] in { +let Predicates = [HasVInstructions] in { foreach vti = AllIntegerVectors in { def : Pat<(vti.Vector (riscv_vid_vl (vti.Mask true_mask), @@ -1520,4 +1520,4 @@ foreach vti = !listconcat(AllIntegerVectors, AllFloatVectors) in { GPR:$vl, vti.Log2SEW)>; } -} // Predicates = [HasStdExtV] +} // Predicates = [HasVInstructions] diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td index 012a751fe2c7c..a33494461869e 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td @@ -66,7 +66,7 @@ class FPCmpH_rr funct3, string opcodestr> // Instructions //===----------------------------------------------------------------------===// -let Predicates = [HasStdExtZfh] in { +let Predicates = [HasStdExtZfhmin] in { let hasSideEffects = 0, mayLoad = 1, mayStore = 0 in def FLH : RVInstI<0b001, OPC_LOAD_FP, (outs FPR16:$rd), (ins GPR:$rs1, simm12:$imm12), @@ -81,7 +81,9 @@ def FSH : RVInstS<0b001, OPC_STORE_FP, (outs), (ins FPR16:$rs2, GPR:$rs1, simm12:$imm12), "fsh", "$rs2, ${imm12}(${rs1})">, Sched<[WriteFST16, ReadStoreData, ReadFMemBase]>; +} // Predicates = [HasStdExtZfhmin] +let Predicates = [HasStdExtZfh] in { def FMADD_H : FPFMAH_rrr_frm, Sched<[WriteFMA16, ReadFMA16, ReadFMA16, ReadFMA16]>; def : FPFMAHDynFrmAlias; @@ -148,7 +150,9 @@ def FCVT_H_WU : FPUnaryOp_r_frm<0b1101010, FPR16, GPR, "fcvt.h.wu">, let rs2 = 0b00001; } def : FPUnaryOpDynFrmAlias; +} // Predicates = [HasStdExtZfh] +let Predicates = [HasStdExtZfhmin] in { def FCVT_H_S : FPUnaryOp_r_frm<0b0100010, FPR16, FPR32, "fcvt.h.s">, Sched<[WriteFCvtF32ToF16, ReadFCvtF32ToF16]> { let rs2 = 0b00000; @@ -169,7 +173,9 @@ def FMV_H_X : FPUnaryOp_r<0b1111010, 0b000, FPR16, GPR, "fmv.h.x">, Sched<[WriteFMovI16ToF16, ReadFMovI16ToF16]> { let rs2 = 0b00000; } +} // Predicates = [HasStdExtZfhmin] +let Predicates = [HasStdExtZfh] in { def FEQ_H : FPCmpH_rr<0b010, "feq.h">; def FLT_H : FPCmpH_rr<0b001, "flt.h">; def FLE_H : FPCmpH_rr<0b000, "fle.h">; @@ -206,7 +212,7 @@ def FCVT_H_LU : FPUnaryOp_r_frm<0b1101010, FPR16, GPR, "fcvt.h.lu">, def : FPUnaryOpDynFrmAlias; } // Predicates = [HasStdExtZfh, IsRV64] -let Predicates = [HasStdExtZfh, HasStdExtD] in { +let Predicates = [HasStdExtZfhmin, HasStdExtD] in { def FCVT_H_D : FPUnaryOp_r_frm<0b0100010, FPR16, FPR64, "fcvt.h.d">, Sched<[WriteFCvtF64ToF16, ReadFCvtF64ToF16]> { let rs2 = 0b00001; @@ -217,16 +223,18 @@ def FCVT_D_H : FPUnaryOp_r<0b0100001, 0b000, FPR64, FPR16, "fcvt.d.h">, Sched<[WriteFCvtF16ToF64, ReadFCvtF16ToF64]> { let rs2 = 0b00010; } -} // Predicates = [HasStdExtZfh, HasStdExtD] +} // Predicates = [HasStdExtZfhmin, HasStdExtD] //===----------------------------------------------------------------------===// // Assembler Pseudo Instructions (User-Level ISA, Version 2.2, Chapter 20) //===----------------------------------------------------------------------===// -let Predicates = [HasStdExtZfh] in { +let Predicates = [HasStdExtZfhmin] in { def : InstAlias<"flh $rd, (${rs1})", (FLH FPR16:$rd, GPR:$rs1, 0), 0>; def : InstAlias<"fsh $rs2, (${rs1})", (FSH FPR16:$rs2, GPR:$rs1, 0), 0>; +} // Predicates = [HasStdExtZfhmin] +let Predicates = [HasStdExtZfh] in { def : InstAlias<"fmv.h $rd, $rs", (FSGNJ_H FPR16:$rd, FPR16:$rs, FPR16:$rs)>; def : InstAlias<"fabs.h $rd, $rs", (FSGNJX_H FPR16:$rd, FPR16:$rs, FPR16:$rs)>; def : InstAlias<"fneg.h $rd, $rs", (FSGNJN_H FPR16:$rd, FPR16:$rs, FPR16:$rs)>; @@ -237,10 +245,12 @@ def : InstAlias<"fgt.h $rd, $rs, $rt", (FLT_H GPR:$rd, FPR16:$rt, FPR16:$rs), 0>; def : InstAlias<"fge.h $rd, $rs, $rt", (FLE_H GPR:$rd, FPR16:$rt, FPR16:$rs), 0>; +} // Predicates = [HasStdExtZfh] +let Predicates = [HasStdExtZfhmin] in { def PseudoFLH : PseudoFloatLoad<"flh", FPR16>; def PseudoFSH : PseudoStore<"fsh", FPR16>; -} // Predicates = [HasStdExtZfh] +} // Predicates = [HasStdExtZfhmin] //===----------------------------------------------------------------------===// // Pseudo-instructions and codegen patterns @@ -313,7 +323,9 @@ def : PatFpr16Fpr16; def : PatFpr16Fpr16; def Select_FPR16_Using_CC_GPR : SelectCC_rrirr; +} // Predicates = [HasStdExtZfh] +let Predicates = [HasStdExtZfhmin] in { /// Loads defm : LdPat; @@ -331,7 +343,7 @@ def : Pat<(fpextend FPR16:$rs1), (FCVT_S_H FPR16:$rs1)>; // Moves (no conversion) def : Pat<(riscv_fmv_h_x GPR:$src), (FMV_H_X GPR:$src)>; def : Pat<(riscv_fmv_x_anyexth FPR16:$src), (FMV_X_H FPR16:$src)>; -} // Predicates = [HasStdExtZfh] +} // Predicates = [HasStdExtZfhmin] let Predicates = [HasStdExtZfh, IsRV32] in { // half->[u]int. Round-to-zero must be used. @@ -383,7 +395,7 @@ def : Pat<(sint_to_fp (i64 GPR:$rs1)), (FCVT_H_L $rs1, 0b111)>; def : Pat<(uint_to_fp (i64 GPR:$rs1)), (FCVT_H_LU $rs1, 0b111)>; } // Predicates = [HasStdExtZfh, IsRV64] -let Predicates = [HasStdExtZfh, HasStdExtD] in { +let Predicates = [HasStdExtZfhmin, HasStdExtD] in { /// Float conversion operations // f64 -> f16, f16 -> f64 def : Pat<(fpround FPR64:$rs1), (FCVT_H_D FPR64:$rs1, 0b111)>; @@ -393,4 +405,4 @@ def : Pat<(fpextend FPR16:$rs1), (FCVT_D_H FPR16:$rs1)>; def : Pat<(fcopysign FPR16:$rs1, FPR64:$rs2), (FSGNJ_H $rs1, (FCVT_H_D $rs2, 0b111))>; def : Pat<(fcopysign FPR64:$rs1, FPR16:$rs2), (FSGNJ_D $rs1, (FCVT_D_H $rs2))>; -} +} // Predicates = [HasStdExtZfhmin, HasStdExtD] diff --git a/llvm/lib/Target/RISCV/RISCVSubtarget.h b/llvm/lib/Target/RISCV/RISCVSubtarget.h index 0a9a8eff027af..deb2a11f98f10 100644 --- a/llvm/lib/Target/RISCV/RISCVSubtarget.h +++ b/llvm/lib/Target/RISCV/RISCVSubtarget.h @@ -52,6 +52,7 @@ class RISCVSubtarget : public RISCVGenSubtargetInfo { bool HasStdExtV = false; bool HasStdExtZvlsseg = false; bool HasStdExtZvamo = false; + bool HasStdExtZfhmin = false; bool HasStdExtZfh = false; bool HasRV64 = false; bool IsRV32E = false; @@ -118,6 +119,7 @@ class RISCVSubtarget : public RISCVGenSubtargetInfo { bool hasStdExtV() const { return HasStdExtV; } bool hasStdExtZvlsseg() const { return HasStdExtZvlsseg; } bool hasStdExtZvamo() const { return HasStdExtZvamo; } + bool hasStdExtZfhmin() const { return HasStdExtZfhmin; } bool hasStdExtZfh() const { return HasStdExtZfh; } bool is64Bit() const { return HasRV64; } bool isRV32E() const { return IsRV32E; } @@ -138,8 +140,8 @@ class RISCVSubtarget : public RISCVGenSubtargetInfo { bool hasVInstructionsF16() const { return HasStdExtV && hasStdExtZfh(); } bool hasVInstructionsF32() const { return HasStdExtV && hasStdExtF(); } bool hasVInstructionsF64() const { return HasStdExtV && hasStdExtD(); } - // D and Zfh imply F. - bool hasVInstructionsAnyF() const { return HasStdExtV && hasStdExtF(); } + // F16 and F64 both require F32. + bool hasVInstructionsAnyF() const { return hasVInstructionsF32(); } unsigned getMaxInterleaveFactor() const { return hasVInstructions() ? MaxInterleaveFactor : 1; } diff --git a/llvm/lib/Target/SystemZ/SystemZShortenInst.cpp b/llvm/lib/Target/SystemZ/SystemZShortenInst.cpp index 3d27b70d6ef9d..254e5e92449b2 100644 --- a/llvm/lib/Target/SystemZ/SystemZShortenInst.cpp +++ b/llvm/lib/Target/SystemZ/SystemZShortenInst.cpp @@ -211,8 +211,7 @@ bool SystemZShortenInst::processBlock(MachineBasicBlock &MBB) { LiveRegs.addLiveOuts(MBB); // Iterate backwards through the block looking for instructions to change. - for (auto MBBI = MBB.rbegin(), MBBE = MBB.rend(); MBBI != MBBE; ++MBBI) { - MachineInstr &MI = *MBBI; + for (MachineInstr &MI : llvm::reverse(MBB)) { switch (MI.getOpcode()) { case SystemZ::IILF: Changed |= shortenIIF(MI, SystemZ::LLILL, SystemZ::LLILH); diff --git a/llvm/lib/Target/VE/MCTargetDesc/VEMCAsmInfo.cpp b/llvm/lib/Target/VE/MCTargetDesc/VEMCAsmInfo.cpp index 76824335239b7..9f29fc092c697 100644 --- a/llvm/lib/Target/VE/MCTargetDesc/VEMCAsmInfo.cpp +++ b/llvm/lib/Target/VE/MCTargetDesc/VEMCAsmInfo.cpp @@ -37,5 +37,4 @@ VEELFMCAsmInfo::VEELFMCAsmInfo(const Triple &TheTriple) { UsesELFSectionDirectiveForBSS = true; SupportsDebugInformation = true; - UseIntegratedAssembler = false; } diff --git a/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp b/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp index 77898238b2632..7d1e6c553f812 100644 --- a/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp +++ b/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp @@ -1114,6 +1114,8 @@ class WebAssemblyAsmParser final : public MCTargetAsmParser { void onEndOfFunction(SMLoc ErrorLoc) { TC.endOfFunction(ErrorLoc); + // Reset the type checker state. + TC.Clear(); // Automatically output a .size directive, so it becomes optional for the // user. diff --git a/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmTypeCheck.cpp b/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmTypeCheck.cpp index 9e4162a969cb9..a6b5d4252f2f9 100644 --- a/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmTypeCheck.cpp +++ b/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmTypeCheck.cpp @@ -74,6 +74,9 @@ bool WebAssemblyAsmTypeCheck::typeError(SMLoc ErrorLoc, const Twine &Msg) { // which are mostly not helpful. if (TypeErrorThisFunction) return true; + // If we're currently in unreachable code, we surpress errors as well. + if (Unreachable) + return true; TypeErrorThisFunction = true; dumpTypeStack("current stack: "); return Parser.Error(ErrorLoc, Msg); @@ -170,17 +173,18 @@ bool WebAssemblyAsmTypeCheck::getGlobal(SMLoc ErrorLoc, const MCInst &Inst, return false; } -void WebAssemblyAsmTypeCheck::endOfFunction(SMLoc ErrorLoc) { +bool WebAssemblyAsmTypeCheck::endOfFunction(SMLoc ErrorLoc) { // Check the return types. for (auto RVT : llvm::reverse(ReturnTypes)) { - popType(ErrorLoc, RVT); + if (popType(ErrorLoc, RVT)) + return true; } if (!Stack.empty()) { - typeError(ErrorLoc, - std::to_string(Stack.size()) + " superfluous return values"); + return typeError(ErrorLoc, std::to_string(Stack.size()) + + " superfluous return values"); } - // Reset the type checker state. - Clear(); + Unreachable = true; + return false; } bool WebAssemblyAsmTypeCheck::typeCheck(SMLoc ErrorLoc, const MCInst &Inst) { @@ -219,10 +223,17 @@ bool WebAssemblyAsmTypeCheck::typeCheck(SMLoc ErrorLoc, const MCInst &Inst) { Name == "else" || Name == "end_try") { if (checkEnd(ErrorLoc)) return true; + if (Name == "end_block") + Unreachable = false; + } else if (Name == "return") { + if (endOfFunction(ErrorLoc)) + return true; } else if (Name == "call_indirect" || Name == "return_call_indirect") { // Function value. if (popType(ErrorLoc, wasm::ValType::I32)) return true; if (checkSig(ErrorLoc, LastSig)) return true; + if (Name == "return_call_indirect" && endOfFunction(ErrorLoc)) + return true; } else if (Name == "call" || Name == "return_call") { const MCSymbolRefExpr *SymRef; if (getSymRef(ErrorLoc, Inst, SymRef)) @@ -233,6 +244,8 @@ bool WebAssemblyAsmTypeCheck::typeCheck(SMLoc ErrorLoc, const MCInst &Inst) { return typeError(ErrorLoc, StringRef("symbol ") + WasmSym->getName() + " missing .functype"); if (checkSig(ErrorLoc, *Sig)) return true; + if (Name == "return_call" && endOfFunction(ErrorLoc)) + return true; } else if (Name == "catch") { const MCSymbolRefExpr *SymRef; if (getSymRef(ErrorLoc, Inst, SymRef)) @@ -248,6 +261,8 @@ bool WebAssemblyAsmTypeCheck::typeCheck(SMLoc ErrorLoc, const MCInst &Inst) { } else if (Name == "ref.null") { auto VT = static_cast(Inst.getOperand(0).getImm()); Stack.push_back(VT); + } else if (Name == "unreachable") { + Unreachable = true; } else { // The current instruction is a stack instruction which doesn't have // explicit operands that indicate push/pop types, so we get those from diff --git a/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmTypeCheck.h b/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmTypeCheck.h index a15a69b504180..aa35213ccca3b 100644 --- a/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmTypeCheck.h +++ b/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmTypeCheck.h @@ -32,15 +32,9 @@ class WebAssemblyAsmTypeCheck final { SmallVector ReturnTypes; wasm::WasmSignature LastSig; bool TypeErrorThisFunction = false; + bool Unreachable = false; bool is64; - void Clear() { - Stack.clear(); - LocalTypes.clear(); - ReturnTypes.clear(); - TypeErrorThisFunction = false; - } - void dumpTypeStack(Twine Msg); bool typeError(SMLoc ErrorLoc, const Twine &Msg); bool popType(SMLoc ErrorLoc, Optional EVT); @@ -57,8 +51,16 @@ class WebAssemblyAsmTypeCheck final { void funcDecl(const wasm::WasmSignature &Sig); void localDecl(const SmallVector &Locals); void setLastSig(const wasm::WasmSignature &Sig) { LastSig = Sig; } - void endOfFunction(SMLoc ErrorLoc); + bool endOfFunction(SMLoc ErrorLoc); bool typeCheck(SMLoc ErrorLoc, const MCInst &Inst); + + void Clear() { + Stack.clear(); + LocalTypes.clear(); + ReturnTypes.clear(); + TypeErrorThisFunction = false; + Unreachable = false; + } }; } // end namespace llvm diff --git a/llvm/lib/Target/WebAssembly/README.txt b/llvm/lib/Target/WebAssembly/README.txt index 934a3ba3bc4aa..ab1cd8f0f84a2 100644 --- a/llvm/lib/Target/WebAssembly/README.txt +++ b/llvm/lib/Target/WebAssembly/README.txt @@ -2,11 +2,11 @@ The object format emitted by the WebAssembly backed is documented in: - * https://github.com/WebAssembly/tool-conventions/blob/master/Linking.md + * https://github.com/WebAssembly/tool-conventions/blob/main/Linking.md The C ABI is described in: - * https://github.com/WebAssembly/tool-conventions/blob/master/BasicCABI.md + * https://github.com/WebAssembly/tool-conventions/blob/main/BasicCABI.md For more information on WebAssembly itself, see the home page: @@ -31,8 +31,8 @@ For more information, see: The following documents contain some information on the semantics and binary encoding of WebAssembly itself: - * https://github.com/WebAssembly/design/blob/master/Semantics.md - * https://github.com/WebAssembly/design/blob/master/BinaryEncoding.md + * https://github.com/WebAssembly/design/blob/main/Semantics.md + * https://github.com/WebAssembly/design/blob/main/BinaryEncoding.md Some notes on ways that the generated code could be improved follow: diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp index 3851300f615e7..0d3f516932618 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp @@ -506,7 +506,7 @@ void WebAssemblyAsmPrinter::EmitTargetFeatures(Module &M) { // This is an "architecture", not a "feature", but we emit it as such for // the benefit of tools like Binaryen and consistency with other producers. // FIXME: Subtarget is null here, so can't Subtarget->hasAddr64() ? - if (M.getDataLayout().getMaxPointerSize() == 8) { + if (M.getDataLayout().getPointerSize() == 8) { // Can't use EmitFeature since "wasm-feature-memory64" is not a module // flag. EmittedFeatures.push_back({wasm::WASM_FEATURE_PREFIX_USED, "memory64"}); diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp index 7507344037871..c137f990ac582 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp @@ -1670,8 +1670,7 @@ void WebAssemblyCFGStackify::rewriteDepthImmediates(MachineFunction &MF) { SmallVector Stack; SmallVector EHPadStack; for (auto &MBB : reverse(MF)) { - for (auto I = MBB.rbegin(), E = MBB.rend(); I != E; ++I) { - MachineInstr &MI = *I; + for (MachineInstr &MI : llvm::reverse(MBB)) { switch (MI.getOpcode()) { case WebAssembly::BLOCK: case WebAssembly::TRY: diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyExplicitLocals.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyExplicitLocals.cpp index a933d1a4f4215..910a4e5e0d1ac 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyExplicitLocals.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyExplicitLocals.cpp @@ -379,9 +379,14 @@ bool WebAssemblyExplicitLocals::runOnMachineFunction(MachineFunction &MF) { const TargetRegisterClass *RC = MRI.getRegClass(OldReg); Register NewReg = MRI.createVirtualRegister(RC); unsigned Opc = getLocalGetOpcode(RC); - InsertPt = - BuildMI(MBB, InsertPt, MI.getDebugLoc(), TII->get(Opc), NewReg) - .addImm(LocalId); + // Use a InsertPt as our DebugLoc, since MI may be discontinuous from + // the where this local is being inserted, causing non-linear stepping + // in the debugger or function entry points where variables aren't live + // yet. Alternative is previous instruction, but that is strictly worse + // since it can point at the previous statement. + // See crbug.com/1251909, crbug.com/1249745 + InsertPt = BuildMI(MBB, InsertPt, InsertPt->getDebugLoc(), + TII->get(Opc), NewReg).addImm(LocalId); MO.setReg(NewReg); MFI.stackifyVReg(MRI, NewReg); Changed = true; diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp index d1d7c2d74a8bf..7e75989d3def9 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp @@ -66,7 +66,6 @@ class WebAssemblyDAGToDAGISel final : public SelectionDAGISel { bool SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID, std::vector &OutOps) override; - bool SelectExternRefAddr(const SDValue &Addr, const SDValue &Base); // Include the pieces autogenerated from the target description. #include "WebAssemblyGenDAGISel.inc" diff --git a/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp b/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp index 511d263abbaab..f4a553365b02f 100644 --- a/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp +++ b/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp @@ -1758,8 +1758,8 @@ bool X86AsmParser::CreateMemForMSInlineAsm( // It is widely common for MS InlineAsm to use a global variable and one/two // registers in a mmory expression, and though unaccessible via rip/eip. if (IsGlobalLV && (BaseReg || IndexReg)) { - Operands.push_back( - X86Operand::CreateMem(getPointerWidth(), Disp, Start, End, Size)); + Operands.push_back(X86Operand::CreateMem(getPointerWidth(), Disp, Start, + End, Size, Identifier, Decl)); return false; } // Otherwise, we set the base register to a non-zero value @@ -2551,6 +2551,8 @@ bool X86AsmParser::ParseIntelOperand(OperandVector &Operands) { StringRef ErrMsg; unsigned BaseReg = SM.getBaseReg(); unsigned IndexReg = SM.getIndexReg(); + if (IndexReg && BaseReg == X86::RIP) + BaseReg = 0; unsigned Scale = SM.getScale(); if (!PtrInOperand) Size = SM.getElementSize() << 3; @@ -4093,12 +4095,12 @@ void X86AsmParser::applyLVICFIMitigation(MCInst &Inst, MCStreamer &Out) { // be found here: // https://software.intel.com/security-software-guidance/insights/deep-dive-load-value-injection#specialinstructions switch (Inst.getOpcode()) { - case X86::RETW: - case X86::RETL: - case X86::RETQ: - case X86::RETIL: - case X86::RETIQ: - case X86::RETIW: { + case X86::RET16: + case X86::RET32: + case X86::RET64: + case X86::RETI16: + case X86::RETI32: + case X86::RETI64: { MCInst ShlInst, FenceInst; bool Parse32 = is32BitMode() || Code16GCC; unsigned Basereg = diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp index c358776a08105..d4f39b5713941 100644 --- a/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp +++ b/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp @@ -116,13 +116,6 @@ cl::opt X86PadForBranchAlign( "x86-pad-for-branch-align", cl::init(true), cl::Hidden, cl::desc("Pad previous instructions to implement branch alignment")); -class X86ELFObjectWriter : public MCELFObjectTargetWriter { -public: - X86ELFObjectWriter(bool is64Bit, uint8_t OSABI, uint16_t EMachine, - bool HasRelocationAddend, bool foobar) - : MCELFObjectTargetWriter(is64Bit, OSABI, EMachine, HasRelocationAddend) {} -}; - class X86AsmBackend : public MCAsmBackend { const MCSubtargetInfo &STI; std::unique_ptr MCII; diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp index aeea0aa872bdc..9da0a8129f230 100644 --- a/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp +++ b/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp @@ -408,6 +408,9 @@ class X86MCInstrAnalysis : public MCInstrAnalysis { const MCSubtargetInfo *STI, uint64_t Addr, uint64_t Size) const override; + Optional + getMemoryOperandRelocationOffset(const MCInst &Inst, + uint64_t Size) const override; }; #define GET_STIPREDICATE_DEFS_FOR_MC_ANALYSIS @@ -557,6 +560,30 @@ Optional X86MCInstrAnalysis::evaluateMemoryOperandAddress( return None; } +Optional +X86MCInstrAnalysis::getMemoryOperandRelocationOffset(const MCInst &Inst, + uint64_t Size) const { + if (Inst.getOpcode() != X86::LEA64r) + return None; + const MCInstrDesc &MCID = Info->get(Inst.getOpcode()); + int MemOpStart = X86II::getMemoryOperandNo(MCID.TSFlags); + if (MemOpStart == -1) + return None; + MemOpStart += X86II::getOperandBias(MCID); + const MCOperand &SegReg = Inst.getOperand(MemOpStart + X86::AddrSegmentReg); + const MCOperand &BaseReg = Inst.getOperand(MemOpStart + X86::AddrBaseReg); + const MCOperand &IndexReg = Inst.getOperand(MemOpStart + X86::AddrIndexReg); + const MCOperand &ScaleAmt = Inst.getOperand(MemOpStart + X86::AddrScaleAmt); + const MCOperand &Disp = Inst.getOperand(MemOpStart + X86::AddrDisp); + // Must be a simple rip-relative address. + if (BaseReg.getReg() != X86::RIP || SegReg.getReg() != 0 || + IndexReg.getReg() != 0 || ScaleAmt.getImm() != 1 || !Disp.isImm()) + return None; + // rip-relative ModR/M immediate is 32 bits. + assert(Size > 4 && "invalid instruction size for rip-relative lea"); + return Size - 4; +} + } // end of namespace X86_MC } // end of namespace llvm diff --git a/llvm/lib/Target/X86/X86AvoidStoreForwardingBlocks.cpp b/llvm/lib/Target/X86/X86AvoidStoreForwardingBlocks.cpp index 4a3206f370955..04931afdec51c 100644 --- a/llvm/lib/Target/X86/X86AvoidStoreForwardingBlocks.cpp +++ b/llvm/lib/Target/X86/X86AvoidStoreForwardingBlocks.cpp @@ -360,22 +360,17 @@ findPotentialBlockers(MachineInstr *LoadInst) { if (BlockCount < InspectionLimit) { MachineBasicBlock *MBB = LoadInst->getParent(); int LimitLeft = InspectionLimit - BlockCount; - for (MachineBasicBlock::pred_iterator PB = MBB->pred_begin(), - PE = MBB->pred_end(); - PB != PE; ++PB) { - MachineBasicBlock *PMBB = *PB; + for (MachineBasicBlock *PMBB : MBB->predecessors()) { int PredCount = 0; - for (MachineBasicBlock::reverse_iterator PBInst = PMBB->rbegin(), - PME = PMBB->rend(); - PBInst != PME; ++PBInst) { - if (PBInst->isMetaInstruction()) + for (MachineInstr &PBInst : llvm::reverse(*PMBB)) { + if (PBInst.isMetaInstruction()) continue; PredCount++; if (PredCount >= LimitLeft) break; - if (PBInst->getDesc().isCall()) + if (PBInst.getDesc().isCall()) break; - PotentialBlockers.push_back(&*PBInst); + PotentialBlockers.push_back(&PBInst); } } } diff --git a/llvm/lib/Target/X86/X86CmovConversion.cpp b/llvm/lib/Target/X86/X86CmovConversion.cpp index 05349a7c01f89..863438793acfd 100644 --- a/llvm/lib/Target/X86/X86CmovConversion.cpp +++ b/llvm/lib/Target/X86/X86CmovConversion.cpp @@ -582,10 +582,9 @@ static bool checkEFLAGSLive(MachineInstr *MI) { } // We hit the end of the block, check whether EFLAGS is live into a successor. - for (auto I = BB->succ_begin(), E = BB->succ_end(); I != E; ++I) { - if ((*I)->isLiveIn(X86::EFLAGS)) + for (MachineBasicBlock *Succ : BB->successors()) + if (Succ->isLiveIn(X86::EFLAGS)) return true; - } return false; } @@ -797,8 +796,7 @@ void X86CmovConverterPass::convertCmovInstsToBranches( MOp.setIsKill(false); } } - MBB->erase(MachineBasicBlock::iterator(MI), - std::next(MachineBasicBlock::iterator(MI))); + MBB->erase(&MI); // Add this PHI to the rewrite table. FalseBBRegRewriteTable[NewCMOV->getOperand(0).getReg()] = TmpReg; diff --git a/llvm/lib/Target/X86/X86ExpandPseudo.cpp b/llvm/lib/Target/X86/X86ExpandPseudo.cpp index ca7dfcdbb9cae..01dc509df7956 100644 --- a/llvm/lib/Target/X86/X86ExpandPseudo.cpp +++ b/llvm/lib/Target/X86/X86ExpandPseudo.cpp @@ -394,10 +394,10 @@ bool X86ExpandPseudo::ExpandMI(MachineBasicBlock &MBB, MachineInstrBuilder MIB; if (StackAdj == 0) { MIB = BuildMI(MBB, MBBI, DL, - TII->get(STI->is64Bit() ? X86::RETQ : X86::RETL)); + TII->get(STI->is64Bit() ? X86::RET64 : X86::RET32)); } else if (isUInt<16>(StackAdj)) { MIB = BuildMI(MBB, MBBI, DL, - TII->get(STI->is64Bit() ? X86::RETIQ : X86::RETIL)) + TII->get(STI->is64Bit() ? X86::RETI64 : X86::RETI32)) .addImm(StackAdj); } else { assert(!STI->is64Bit() && @@ -407,7 +407,7 @@ bool X86ExpandPseudo::ExpandMI(MachineBasicBlock &MBB, BuildMI(MBB, MBBI, DL, TII->get(X86::POP32r)).addReg(X86::ECX, RegState::Define); X86FL->emitSPUpdate(MBB, MBBI, DL, StackAdj, /*InEpilogue=*/true); BuildMI(MBB, MBBI, DL, TII->get(X86::PUSH32r)).addReg(X86::ECX); - MIB = BuildMI(MBB, MBBI, DL, TII->get(X86::RETL)); + MIB = BuildMI(MBB, MBBI, DL, TII->get(X86::RET32)); } for (unsigned I = 1, E = MBBI->getNumOperands(); I != E; ++I) MIB.add(MBBI->getOperand(I)); @@ -648,35 +648,24 @@ void X86ExpandPseudo::ExpandVastartSaveXmmRegs( EntryBlk->end()); TailBlk->transferSuccessorsAndUpdatePHIs(EntryBlk); - int64_t FrameIndex = VAStartPseudoInstr->getOperand(1).getImm(); - Register BaseReg; - uint64_t FrameOffset = - X86FL->getFrameIndexReference(*Func, FrameIndex, BaseReg).getFixed(); - uint64_t VarArgsRegsOffset = VAStartPseudoInstr->getOperand(2).getImm(); + uint64_t FrameOffset = VAStartPseudoInstr->getOperand(4).getImm(); + uint64_t VarArgsRegsOffset = VAStartPseudoInstr->getOperand(6).getImm(); // TODO: add support for YMM and ZMM here. unsigned MOVOpc = STI->hasAVX() ? X86::VMOVAPSmr : X86::MOVAPSmr; // In the XMM save block, save all the XMM argument registers. - for (int64_t OpndIdx = 3, RegIdx = 0; + for (int64_t OpndIdx = 7, RegIdx = 0; OpndIdx < VAStartPseudoInstr->getNumOperands() - 1; OpndIdx++, RegIdx++) { - - int64_t Offset = FrameOffset + VarArgsRegsOffset + RegIdx * 16; - - MachineMemOperand *MMO = Func->getMachineMemOperand( - MachinePointerInfo::getFixedStack(*Func, FrameIndex, Offset), - MachineMemOperand::MOStore, - /*Size=*/16, Align(16)); - - BuildMI(GuardedRegsBlk, DL, TII->get(MOVOpc)) - .addReg(BaseReg) - .addImm(/*Scale=*/1) - .addReg(/*IndexReg=*/0) - .addImm(/*Disp=*/Offset) - .addReg(/*Segment=*/0) - .addReg(VAStartPseudoInstr->getOperand(OpndIdx).getReg()) - .addMemOperand(MMO); + auto NewMI = BuildMI(GuardedRegsBlk, DL, TII->get(MOVOpc)); + for (int i = 0; i < X86::AddrNumOperands; ++i) { + if (i == X86::AddrDisp) + NewMI.addImm(FrameOffset + VarArgsRegsOffset + RegIdx * 16); + else + NewMI.add(VAStartPseudoInstr->getOperand(i + 1)); + } + NewMI.addReg(VAStartPseudoInstr->getOperand(OpndIdx).getReg()); assert(Register::isPhysicalRegister( VAStartPseudoInstr->getOperand(OpndIdx).getReg())); } diff --git a/llvm/lib/Target/X86/X86FastISel.cpp b/llvm/lib/Target/X86/X86FastISel.cpp index d5e7e2f10820d..d87e9782699b5 100644 --- a/llvm/lib/Target/X86/X86FastISel.cpp +++ b/llvm/lib/Target/X86/X86FastISel.cpp @@ -1304,11 +1304,11 @@ bool X86FastISel::X86SelectRet(const Instruction *I) { MachineInstrBuilder MIB; if (X86MFInfo->getBytesToPopOnReturn()) { MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, - TII.get(Subtarget->is64Bit() ? X86::RETIQ : X86::RETIL)) + TII.get(Subtarget->is64Bit() ? X86::RETI64 : X86::RETI32)) .addImm(X86MFInfo->getBytesToPopOnReturn()); } else { MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, - TII.get(Subtarget->is64Bit() ? X86::RETQ : X86::RETL)); + TII.get(Subtarget->is64Bit() ? X86::RET64 : X86::RET32)); } for (unsigned i = 0, e = RetRegs.size(); i != e; ++i) MIB.addReg(RetRegs[i], RegState::Implicit); diff --git a/llvm/lib/Target/X86/X86FloatingPoint.cpp b/llvm/lib/Target/X86/X86FloatingPoint.cpp index 67314b1dad442..8ee503e58e470 100644 --- a/llvm/lib/Target/X86/X86FloatingPoint.cpp +++ b/llvm/lib/Target/X86/X86FloatingPoint.cpp @@ -1733,16 +1733,14 @@ void FPS::setKillFlags(MachineBasicBlock &MBB) const { LPR.addLiveOuts(MBB); - for (MachineBasicBlock::reverse_iterator I = MBB.rbegin(), E = MBB.rend(); - I != E; ++I) { - if (I->isDebugInstr()) + for (MachineInstr &MI : llvm::reverse(MBB)) { + if (MI.isDebugInstr()) continue; std::bitset<8> Defs; SmallVector Uses; - MachineInstr &MI = *I; - for (auto &MO : I->operands()) { + for (auto &MO : MI.operands()) { if (!MO.isReg()) continue; diff --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp index 6370190a8726c..29bf71ba1f3ed 100644 --- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -435,6 +435,18 @@ namespace { return getI8Imm((Index * VecVT.getScalarSizeInBits()) / VecWidth, DL); } + SDValue getPermuteVINSERTCommutedImmediate(SDNode *N, unsigned VecWidth, + const SDLoc &DL) { + assert(VecWidth == 128 && "Unexpected vector width"); + uint64_t Index = N->getConstantOperandVal(2); + MVT VecVT = N->getSimpleValueType(0); + uint64_t InsertIdx = (Index * VecVT.getScalarSizeInBits()) / VecWidth; + assert((InsertIdx == 0 || InsertIdx == 1) && "Bad insertf128 index"); + // vinsert(0,sub,vec) -> [sub0][vec1] -> vperm2x128(0x30,vec,sub) + // vinsert(1,sub,vec) -> [vec0][sub0] -> vperm2x128(0x02,vec,sub) + return getI8Imm(InsertIdx ? 0x02 : 0x30, DL); + } + // Helper to detect unneeded and instructions on shift amounts. Called // from PatFrags in tablegen. bool isUnneededShiftMask(SDNode *N, unsigned Width) const { diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 9d46892dd20b7..aa60a8cbb3bf6 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -1682,6 +1682,10 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::STRICT_FSETCCS, VT, Custom); } + // With BWI, expanding (and promoting the shifts) is the better. + if (!Subtarget.useBWIRegs()) + setOperationAction(ISD::ROTL, MVT::v32i16, Custom); + for (auto VT : { MVT::v64i8, MVT::v32i16 }) { setOperationAction(ISD::ABS, VT, HasBWI ? Legal : Custom); setOperationAction(ISD::CTPOP, VT, Subtarget.hasBITALG() ? Legal : Custom); @@ -3742,13 +3746,19 @@ void VarArgsLoweringHelper::createVarArgAreaAndStoreRegisters( SmallVector SaveXMMOps; SaveXMMOps.push_back(Chain); SaveXMMOps.push_back(ALVal); - SaveXMMOps.push_back( - DAG.getTargetConstant(FuncInfo->getRegSaveFrameIndex(), DL, MVT::i32)); + SaveXMMOps.push_back(RSFIN); SaveXMMOps.push_back( DAG.getTargetConstant(FuncInfo->getVarArgsFPOffset(), DL, MVT::i32)); llvm::append_range(SaveXMMOps, LiveXMMRegs); - MemOps.push_back(DAG.getNode(X86ISD::VASTART_SAVE_XMM_REGS, DL, - MVT::Other, SaveXMMOps)); + MachineMemOperand *StoreMMO = + DAG.getMachineFunction().getMachineMemOperand( + MachinePointerInfo::getFixedStack( + DAG.getMachineFunction(), FuncInfo->getRegSaveFrameIndex(), + Offset), + MachineMemOperand::MOStore, 128, Align(16)); + MemOps.push_back(DAG.getMemIntrinsicNode(X86ISD::VASTART_SAVE_XMM_REGS, + DL, DAG.getVTList(MVT::Other), + SaveXMMOps, MVT::i8, StoreMMO)); } if (!MemOps.empty()) @@ -4531,7 +4541,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, // address into a register. Callee = LowerGlobalOrExternal(Callee, DAG, /*ForCall=*/true); } else if (Subtarget.isTarget64BitILP32() && - Callee->getValueType(0) == MVT::i32) { + Callee.getValueType() == MVT::i32) { // Zero-extend the 32-bit Callee address into a 64-bit according to x32 ABI Callee = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i64, Callee); } @@ -6869,40 +6879,17 @@ static bool getTargetConstantBitsFromNode(SDValue Op, unsigned EltSizeInBits, } // Extract constant bits from build vector. - if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode())) { + if (auto *BV = dyn_cast(Op)) { + BitVector Undefs; + SmallVector SrcEltBits; unsigned SrcEltSizeInBits = VT.getScalarSizeInBits(); - unsigned NumSrcElts = SizeInBits / SrcEltSizeInBits; - - APInt UndefSrcElts(NumSrcElts, 0); - SmallVector SrcEltBits(NumSrcElts, APInt(SrcEltSizeInBits, 0)); - for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i) { - const SDValue &Src = Op.getOperand(i); - if (Src.isUndef()) { - UndefSrcElts.setBit(i); - continue; - } - auto *Cst = cast(Src); - SrcEltBits[i] = Cst->getAPIntValue().zextOrTrunc(SrcEltSizeInBits); + if (BV->getConstantRawBits(true, SrcEltSizeInBits, SrcEltBits, Undefs)) { + APInt UndefSrcElts = APInt::getNullValue(SrcEltBits.size()); + for (unsigned I = 0, E = SrcEltBits.size(); I != E; ++I) + if (Undefs[I]) + UndefSrcElts.setBit(I); + return CastBitData(UndefSrcElts, SrcEltBits); } - return CastBitData(UndefSrcElts, SrcEltBits); - } - if (ISD::isBuildVectorOfConstantFPSDNodes(Op.getNode())) { - unsigned SrcEltSizeInBits = VT.getScalarSizeInBits(); - unsigned NumSrcElts = SizeInBits / SrcEltSizeInBits; - - APInt UndefSrcElts(NumSrcElts, 0); - SmallVector SrcEltBits(NumSrcElts, APInt(SrcEltSizeInBits, 0)); - for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i) { - const SDValue &Src = Op.getOperand(i); - if (Src.isUndef()) { - UndefSrcElts.setBit(i); - continue; - } - auto *Cst = cast(Src); - APInt RawBits = Cst->getValueAPF().bitcastToAPInt(); - SrcEltBits[i] = RawBits.zextOrTrunc(SrcEltSizeInBits); - } - return CastBitData(UndefSrcElts, SrcEltBits); } // Extract constant bits from constant pool vector. @@ -15583,14 +15570,28 @@ static SDValue lowerV8I16Shuffle(const SDLoc &DL, ArrayRef Mask, int NumEvenDrops = canLowerByDroppingEvenElements(Mask, false); if ((NumEvenDrops == 1 || NumEvenDrops == 2) && Subtarget.hasSSE41() && !Subtarget.hasVLX()) { - SmallVector DWordClearOps(4, DAG.getConstant(0, DL, MVT::i32)); - for (unsigned i = 0; i != 4; i += 1 << (NumEvenDrops - 1)) - DWordClearOps[i] = DAG.getConstant(0xFFFF, DL, MVT::i32); - SDValue DWordClearMask = DAG.getBuildVector(MVT::v4i32, DL, DWordClearOps); - V1 = DAG.getNode(ISD::AND, DL, MVT::v4i32, DAG.getBitcast(MVT::v4i32, V1), - DWordClearMask); - V2 = DAG.getNode(ISD::AND, DL, MVT::v4i32, DAG.getBitcast(MVT::v4i32, V2), - DWordClearMask); + // Check if this is part of a 256-bit vector truncation. + if (NumEvenDrops == 2 && Subtarget.hasAVX2() && + peekThroughBitcasts(V1).getOpcode() == ISD::EXTRACT_SUBVECTOR && + peekThroughBitcasts(V2).getOpcode() == ISD::EXTRACT_SUBVECTOR) { + SDValue V1V2 = concatSubVectors(V1, V2, DAG, DL); + V1V2 = DAG.getNode(X86ISD::BLENDI, DL, MVT::v16i16, V1V2, + getZeroVector(MVT::v16i16, Subtarget, DAG, DL), + DAG.getTargetConstant(0xEE, DL, MVT::i8)); + V1V2 = DAG.getBitcast(MVT::v8i32, V1V2); + V1 = extract128BitVector(V1V2, 0, DAG, DL); + V2 = extract128BitVector(V1V2, 4, DAG, DL); + } else { + SmallVector DWordClearOps(4, DAG.getConstant(0, DL, MVT::i32)); + for (unsigned i = 0; i != 4; i += 1 << (NumEvenDrops - 1)) + DWordClearOps[i] = DAG.getConstant(0xFFFF, DL, MVT::i32); + SDValue DWordClearMask = + DAG.getBuildVector(MVT::v4i32, DL, DWordClearOps); + V1 = DAG.getNode(ISD::AND, DL, MVT::v4i32, DAG.getBitcast(MVT::v4i32, V1), + DWordClearMask); + V2 = DAG.getNode(ISD::AND, DL, MVT::v4i32, DAG.getBitcast(MVT::v4i32, V2), + DWordClearMask); + } // Now pack things back together. SDValue Result = DAG.getNode(X86ISD::PACKUS, DL, MVT::v8i16, V1, V2); if (NumEvenDrops == 2) { @@ -16432,21 +16433,12 @@ static SDValue lowerV2X128Shuffle(const SDLoc &DL, MVT VT, SDValue V1, bool SplatHi = isShuffleEquivalent(Mask, {2, 3, 2, 3}, V1); if ((SplatLo || SplatHi) && !Subtarget.hasAVX512() && V1.hasOneUse() && X86::mayFoldLoad(peekThroughOneUseBitcasts(V1), Subtarget)) { + MVT MemVT = VT.getHalfNumVectorElementsVT(); + unsigned Ofs = SplatLo ? 0 : MemVT.getStoreSize(); auto *Ld = cast(peekThroughOneUseBitcasts(V1)); - if (!Ld->isNonTemporal()) { - MVT MemVT = VT.getHalfNumVectorElementsVT(); - unsigned Ofs = SplatLo ? 0 : MemVT.getStoreSize(); - SDVTList Tys = DAG.getVTList(VT, MVT::Other); - SDValue Ptr = DAG.getMemBasePlusOffset(Ld->getBasePtr(), - TypeSize::Fixed(Ofs), DL); - SDValue Ops[] = {Ld->getChain(), Ptr}; - SDValue BcastLd = DAG.getMemIntrinsicNode( - X86ISD::SUBV_BROADCAST_LOAD, DL, Tys, Ops, MemVT, - DAG.getMachineFunction().getMachineMemOperand( - Ld->getMemOperand(), Ofs, MemVT.getStoreSize())); - DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), BcastLd.getValue(1)); - return BcastLd; - } + if (SDValue BcstLd = getBROADCAST_LOAD(X86ISD::SUBV_BROADCAST_LOAD, DL, + VT, MemVT, Ld, Ofs, DAG)) + return BcstLd; } // With AVX2, use VPERMQ/VPERMPD for unary shuffles to allow memory folding. @@ -23098,16 +23090,10 @@ static SDValue EmitCmp(SDValue Op0, SDValue Op1, unsigned X86CC, // For equality comparisons try to use SIGN_EXTEND if the input was // truncate from something with enough sign bits. if (Op0.getOpcode() == ISD::TRUNCATE) { - SDValue In = Op0.getOperand(0); - unsigned EffBits = - In.getScalarValueSizeInBits() - DAG.ComputeNumSignBits(In) + 1; - if (EffBits <= 16) + if (DAG.ComputeMinSignedBits(Op0.getOperand(0)) <= 16) ExtendOp = ISD::SIGN_EXTEND; } else if (Op1.getOpcode() == ISD::TRUNCATE) { - SDValue In = Op1.getOperand(0); - unsigned EffBits = - In.getScalarValueSizeInBits() - DAG.ComputeNumSignBits(In) + 1; - if (EffBits <= 16) + if (DAG.ComputeMinSignedBits(Op1.getOperand(0)) <= 16) ExtendOp = ISD::SIGN_EXTEND; } } @@ -29130,7 +29116,7 @@ static SDValue convertShiftLeftToScale(SDValue Amt, const SDLoc &dl, MVT VT = Amt.getSimpleValueType(); if (!(VT == MVT::v8i16 || VT == MVT::v4i32 || (Subtarget.hasInt256() && VT == MVT::v16i16) || - (Subtarget.hasVBMI2() && VT == MVT::v32i16) || + (Subtarget.hasAVX512() && VT == MVT::v32i16) || (!Subtarget.hasAVX512() && VT == MVT::v16i8))) return SDValue(); @@ -29842,9 +29828,11 @@ static SDValue LowerRotate(SDValue Op, const X86Subtarget &Subtarget, return DAG.getNode(ISD::OR, DL, VT, SHL, SRL); } - // As with shifts, convert the rotation amount to a multiplication factor. + // As with shifts, attempt to convert the rotation amount to a multiplication + // factor, fallback to general expansion. SDValue Scale = convertShiftLeftToScale(Amt, DL, Subtarget, DAG); - assert(Scale && "Failed to convert ROTL amount to scale"); + if (!Scale) + return SDValue(); // v8i16/v16i16: perform unsigned multiply hi/lo and OR the results. if (EltSizeInBits == 16) { @@ -33092,13 +33080,9 @@ static bool isEFLAGSLiveAfter(MachineBasicBlock::iterator Itr, // If we hit the end of the block, check whether EFLAGS is live into a // successor. - for (MachineBasicBlock::succ_iterator sItr = BB->succ_begin(), - sEnd = BB->succ_end(); - sItr != sEnd; ++sItr) { - MachineBasicBlock* succ = *sItr; - if (succ->isLiveIn(X86::EFLAGS)) + for (MachineBasicBlock *Succ : BB->successors()) + if (Succ->isLiveIn(X86::EFLAGS)) return true; - } return false; } @@ -33835,14 +33819,11 @@ X86TargetLowering::EmitLoweredSelect(MachineInstr &MI, } // Transfer any debug instructions inside the CMOV sequence to the sunk block. - auto DbgEnd = MachineBasicBlock::iterator(LastCMOV); - auto DbgIt = MachineBasicBlock::iterator(MI); - while (DbgIt != DbgEnd) { - auto Next = std::next(DbgIt); - if (DbgIt->isDebugInstr()) - SinkMBB->push_back(DbgIt->removeFromParent()); - DbgIt = Next; - } + auto DbgRange = llvm::make_range(MachineBasicBlock::iterator(MI), + MachineBasicBlock::iterator(LastCMOV)); + for (MachineInstr &MI : llvm::make_early_inc_range(DbgRange)) + if (MI.isDebugInstr()) + SinkMBB->push_back(MI.removeFromParent()); // Transfer the remainder of ThisMBB and its successor edges to SinkMBB. SinkMBB->splice(SinkMBB->end(), ThisMBB, @@ -36888,15 +36869,17 @@ static SDValue combineX86ShuffleChain(ArrayRef Inputs, SDValue Root, 256); } - // If we're splatting the low subvector, an insert-subvector 'concat' + // If we're inserting the low subvector, an insert-subvector 'concat' // pattern is quicker than VPERM2X128. // TODO: Add AVX2 support instead of VPERMQ/VPERMPD. - if (Mask[0] == 0 && Mask[1] == 0 && !Subtarget.hasAVX2()) { + if (BaseMask[0] == 0 && (BaseMask[1] == 0 || BaseMask[1] == 2) && + !Subtarget.hasAVX2()) { if (Depth == 0 && Root.getOpcode() == ISD::INSERT_SUBVECTOR) return SDValue(); // Nothing to do! - Res = CanonicalizeShuffleInput(RootVT, V1); - Res = extractSubVector(Res, 0, DAG, DL, 128); - return concatSubVectors(Res, Res, DAG, DL); + SDValue Lo = CanonicalizeShuffleInput(RootVT, V1); + SDValue Hi = CanonicalizeShuffleInput(RootVT, BaseMask[1] == 0 ? V1 : V2); + Hi = extractSubVector(Hi, 0, DAG, DL, 128); + return insertSubVector(Lo, Hi, NumRootElts / 2, DAG, DL, 128); } if (Depth == 0 && Root.getOpcode() == X86ISD::VPERM2X128) @@ -39743,17 +39726,24 @@ bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode( SDValue RHS = Op.getOperand(1); APInt DemandedSrcElts = APIntOps::ScaleBitMask(DemandedElts, 2 * NumElts); - APInt DemandedRHSElts = DemandedSrcElts; - if (SimplifyDemandedVectorElts(RHS, DemandedRHSElts, RHSUndef, RHSZero, TLO, + if (SimplifyDemandedVectorElts(LHS, DemandedSrcElts, LHSUndef, LHSZero, TLO, + Depth + 1)) + return true; + if (SimplifyDemandedVectorElts(RHS, DemandedSrcElts, RHSUndef, RHSZero, TLO, Depth + 1)) return true; - // If RHS elements are known zero then we don't need the LHS equivalent. + // TODO: Multiply by zero. + + // If RHS/LHS elements are known zero then we don't need the LHS/RHS equivalent. APInt DemandedLHSElts = DemandedSrcElts & ~RHSZero; if (SimplifyDemandedVectorElts(LHS, DemandedLHSElts, LHSUndef, LHSZero, TLO, Depth + 1)) return true; - // TODO: Multiply by zero. + APInt DemandedRHSElts = DemandedSrcElts & ~LHSZero; + if (SimplifyDemandedVectorElts(RHS, DemandedRHSElts, RHSUndef, RHSZero, TLO, + Depth + 1)) + return true; break; } case X86ISD::PSADBW: { @@ -40129,15 +40119,11 @@ bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode( SDLoc DL(Op); EVT BcstVT = EVT::getVectorVT(*TLO.DAG.getContext(), VT.getScalarType(), ExtSizeInBits / VT.getScalarSizeInBits()); - SDVTList Tys = TLO.DAG.getVTList(BcstVT, MVT::Other); - SDValue Ops[] = {MemIntr->getOperand(0), MemIntr->getOperand(1)}; - SDValue Bcst = - TLO.DAG.getMemIntrinsicNode(X86ISD::SUBV_BROADCAST_LOAD, DL, Tys, - Ops, MemVT, MemIntr->getMemOperand()); - TLO.DAG.makeEquivalentMemoryOrdering(SDValue(MemIntr, 1), - Bcst.getValue(1)); - return TLO.CombineTo(Op, insertSubVector(TLO.DAG.getUNDEF(VT), Bcst, 0, - TLO.DAG, DL, ExtSizeInBits)); + if (SDValue BcstLd = + getBROADCAST_LOAD(Opc, DL, BcstVT, MemVT, MemIntr, 0, TLO.DAG)) + return TLO.CombineTo(Op, + insertSubVector(TLO.DAG.getUNDEF(VT), BcstLd, 0, + TLO.DAG, DL, ExtSizeInBits)); } break; } @@ -44480,8 +44466,8 @@ static SDValue combineMulToPMADDWD(SDNode *N, SelectionDAG &DAG, N1.getOperand(0).getScalarValueSizeInBits() <= 8)) return SDValue(); - // Sign bits must extend through the upper 17 bits. - if (DAG.ComputeNumSignBits(N1) < 17 || DAG.ComputeNumSignBits(N0) < 17) + // Sign bits must extend down to the lowest i16. + if (DAG.ComputeMinSignedBits(N1) > 16 || DAG.ComputeMinSignedBits(N0) > 16) return SDValue(); // At least one of the elements must be zero in the upper 17 bits, or can be @@ -51676,36 +51662,21 @@ static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT, if (Op0.getOpcode() == X86ISD::VBROADCAST) return DAG.getNode(Op0.getOpcode(), DL, VT, Op0.getOperand(0)); - // If this scalar/subvector broadcast_load is inserted into both halves, use - // a larger broadcast_load. Update other uses to use an extracted subvector. - if (Op0.getOpcode() == X86ISD::VBROADCAST_LOAD || + // If this simple subvector or scalar/subvector broadcast_load is inserted + // into both halves, use a larger broadcast_load. Update other uses to use + // an extracted subvector. + if (ISD::isNormalLoad(Op0.getNode()) || + Op0.getOpcode() == X86ISD::VBROADCAST_LOAD || Op0.getOpcode() == X86ISD::SUBV_BROADCAST_LOAD) { - auto *MemIntr = cast(Op0); - SDVTList Tys = DAG.getVTList(VT, MVT::Other); - SDValue Ops[] = {MemIntr->getChain(), MemIntr->getBasePtr()}; - SDValue BcastLd = DAG.getMemIntrinsicNode(Op0.getOpcode(), DL, Tys, Ops, - MemIntr->getMemoryVT(), - MemIntr->getMemOperand()); - DAG.ReplaceAllUsesOfValueWith( - Op0, extractSubVector(BcastLd, 0, DAG, DL, Op0.getValueSizeInBits())); - DAG.ReplaceAllUsesOfValueWith(SDValue(MemIntr, 1), BcastLd.getValue(1)); - return BcastLd; - } - - // If this is a simple subvector load repeated across multiple lanes, then - // broadcast the load. Update other uses to use an extracted subvector. - if (auto *Ld = dyn_cast(Op0)) { - if (Ld->isSimple() && !Ld->isNonTemporal() && - Ld->getExtensionType() == ISD::NON_EXTLOAD) { - SDVTList Tys = DAG.getVTList(VT, MVT::Other); - SDValue Ops[] = {Ld->getChain(), Ld->getBasePtr()}; - SDValue BcastLd = - DAG.getMemIntrinsicNode(X86ISD::SUBV_BROADCAST_LOAD, DL, Tys, Ops, - Ld->getMemoryVT(), Ld->getMemOperand()); - DAG.ReplaceAllUsesOfValueWith( - Op0, - extractSubVector(BcastLd, 0, DAG, DL, Op0.getValueSizeInBits())); - DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), BcastLd.getValue(1)); + auto *Mem = cast(Op0); + unsigned Opc = Op0.getOpcode() == X86ISD::VBROADCAST_LOAD + ? X86ISD::VBROADCAST_LOAD + : X86ISD::SUBV_BROADCAST_LOAD; + if (SDValue BcastLd = + getBROADCAST_LOAD(Opc, DL, VT, Mem->getMemoryVT(), Mem, 0, DAG)) { + SDValue BcastSrc = + extractSubVector(BcastLd, 0, DAG, DL, Op0.getValueSizeInBits()); + DAG.ReplaceAllUsesOfValueWith(Op0, BcastSrc); return BcastLd; } } @@ -52468,6 +52439,7 @@ static SDValue combinePMULDQ(SDNode *N, SelectionDAG &DAG, // Simplify VPMADDUBSW/VPMADDWD operations. static SDValue combineVPMADD(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI) { + EVT VT = N->getValueType(0); SDValue LHS = N->getOperand(0); SDValue RHS = N->getOperand(1); @@ -52475,7 +52447,14 @@ static SDValue combineVPMADD(SDNode *N, SelectionDAG &DAG, // Don't return LHS/RHS as it may contain UNDEFs. if (ISD::isBuildVectorAllZeros(LHS.getNode()) || ISD::isBuildVectorAllZeros(RHS.getNode())) - return DAG.getConstant(0, SDLoc(N), N->getValueType(0)); + return DAG.getConstant(0, SDLoc(N), VT); + + APInt KnownUndef, KnownZero; + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + APInt DemandedElts = APInt::getAllOnes(VT.getVectorNumElements()); + if (TLI.SimplifyDemandedVectorElts(SDValue(N, 0), DemandedElts, KnownUndef, + KnownZero, DCI)) + return SDValue(N, 0); return SDValue(); } diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h index f6f1b4356446b..6805cb75f0f22 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.h +++ b/llvm/lib/Target/X86/X86ISelLowering.h @@ -650,10 +650,6 @@ namespace llvm { // packed single precision. DPBF16PS, - // Save xmm argument registers to the stack, according to %al. An operator - // is needed so that this can be expanded with control flow. - VASTART_SAVE_XMM_REGS, - // A stack checking function call. On Windows it's _chkstk call. DYN_ALLOCA, @@ -871,6 +867,10 @@ namespace llvm { AESENCWIDE256KL, AESDECWIDE256KL, + // Save xmm argument registers to the stack, according to %al. An operator + // is needed so that this can be expanded with control flow. + VASTART_SAVE_XMM_REGS, + // WARNING: Do not add anything in the end unless you want the node to // have memop! In fact, starting from FIRST_TARGET_MEMORY_OPCODE all // opcodes will be thought as target memory ops! diff --git a/llvm/lib/Target/X86/X86IndirectThunks.cpp b/llvm/lib/Target/X86/X86IndirectThunks.cpp index 3d96d198b4096..e08b4b7c03c65 100644 --- a/llvm/lib/Target/X86/X86IndirectThunks.cpp +++ b/llvm/lib/Target/X86/X86IndirectThunks.cpp @@ -212,7 +212,7 @@ void RetpolineThunkInserter::populateThunk(MachineFunction &MF) { MF.push_back(CallTarget); const unsigned CallOpc = Is64Bit ? X86::CALL64pcrel32 : X86::CALLpcrel32; - const unsigned RetOpc = Is64Bit ? X86::RETQ : X86::RETL; + const unsigned RetOpc = Is64Bit ? X86::RET64 : X86::RET32; Entry->addLiveIn(ThunkReg); BuildMI(Entry, DebugLoc(), TII->get(CallOpc)).addSym(TargetSym); diff --git a/llvm/lib/Target/X86/X86InstrCompiler.td b/llvm/lib/Target/X86/X86InstrCompiler.td index bb878660231e5..ba52283b570db 100644 --- a/llvm/lib/Target/X86/X86InstrCompiler.td +++ b/llvm/lib/Target/X86/X86InstrCompiler.td @@ -69,16 +69,12 @@ def : Pat<(X86callseq_start timm:$amt1, timm:$amt2), let SchedRW = [WriteSystem] in { // x86-64 va_start lowering magic. -let hasSideEffects = 1, Defs = [EFLAGS] in { +let hasSideEffects = 1, mayStore = 1, Defs = [EFLAGS] in { def VASTART_SAVE_XMM_REGS : I<0, Pseudo, (outs), - (ins GR8:$al, - i32imm:$regsavefi, i32imm:$offset, - variable_ops), - "#VASTART_SAVE_XMM_REGS $al, $regsavefi, $offset", - [(X86vastart_save_xmm_regs GR8:$al, - timm:$regsavefi, - timm:$offset), + (ins GR8:$al, i8mem:$regsavefi, variable_ops), + "#VASTART_SAVE_XMM_REGS $al, $regsavefi", + [(X86vastart_save_xmm_regs GR8:$al, addr:$regsavefi), (implicit EFLAGS)]>; } diff --git a/llvm/lib/Target/X86/X86InstrControl.td b/llvm/lib/Target/X86/X86InstrControl.td index 680389662aa87..6d969962afffa 100644 --- a/llvm/lib/Target/X86/X86InstrControl.td +++ b/llvm/lib/Target/X86/X86InstrControl.td @@ -20,30 +20,30 @@ // ST1 arguments when returning values on the x87 stack. let isTerminator = 1, isReturn = 1, isBarrier = 1, hasCtrlDep = 1, FPForm = SpecialFP, SchedRW = [WriteJumpLd] in { - def RETL : I <0xC3, RawFrm, (outs), (ins variable_ops), + def RET32 : I <0xC3, RawFrm, (outs), (ins variable_ops), "ret{l}", []>, OpSize32, Requires<[Not64BitMode]>; - def RETQ : I <0xC3, RawFrm, (outs), (ins variable_ops), + def RET64 : I <0xC3, RawFrm, (outs), (ins variable_ops), "ret{q}", []>, OpSize32, Requires<[In64BitMode]>; - def RETW : I <0xC3, RawFrm, (outs), (ins), + def RET16 : I <0xC3, RawFrm, (outs), (ins), "ret{w}", []>, OpSize16; - def RETIL : Ii16<0xC2, RawFrm, (outs), (ins i16imm:$amt, variable_ops), + def RETI32 : Ii16<0xC2, RawFrm, (outs), (ins i16imm:$amt, variable_ops), "ret{l}\t$amt", []>, OpSize32, Requires<[Not64BitMode]>; - def RETIQ : Ii16<0xC2, RawFrm, (outs), (ins i16imm:$amt, variable_ops), + def RETI64 : Ii16<0xC2, RawFrm, (outs), (ins i16imm:$amt, variable_ops), "ret{q}\t$amt", []>, OpSize32, Requires<[In64BitMode]>; - def RETIW : Ii16<0xC2, RawFrm, (outs), (ins i16imm:$amt), + def RETI16 : Ii16<0xC2, RawFrm, (outs), (ins i16imm:$amt), "ret{w}\t$amt", []>, OpSize16; - def LRETL : I <0xCB, RawFrm, (outs), (ins), + def LRET32 : I <0xCB, RawFrm, (outs), (ins), "{l}ret{l|f}", []>, OpSize32; - def LRETQ : RI <0xCB, RawFrm, (outs), (ins), + def LRET64 : RI <0xCB, RawFrm, (outs), (ins), "{l}ret{|f}q", []>, Requires<[In64BitMode]>; - def LRETW : I <0xCB, RawFrm, (outs), (ins), + def LRET16 : I <0xCB, RawFrm, (outs), (ins), "{l}ret{w|f}", []>, OpSize16; - def LRETIL : Ii16<0xCA, RawFrm, (outs), (ins i16imm:$amt), - "{l}ret{l|f}\t$amt", []>, OpSize32; - def LRETIQ : RIi16<0xCA, RawFrm, (outs), (ins i16imm:$amt), - "{l}ret{|f}q\t$amt", []>, Requires<[In64BitMode]>; - def LRETIW : Ii16<0xCA, RawFrm, (outs), (ins i16imm:$amt), - "{l}ret{w|f}\t$amt", []>, OpSize16; + def LRETI32 : Ii16<0xCA, RawFrm, (outs), (ins i16imm:$amt), + "{l}ret{l|f}\t$amt", []>, OpSize32; + def LRETI64 : RIi16<0xCA, RawFrm, (outs), (ins i16imm:$amt), + "{l}ret{|f}q\t$amt", []>, Requires<[In64BitMode]>; + def LRETI16 : Ii16<0xCA, RawFrm, (outs), (ins i16imm:$amt), + "{l}ret{w|f}\t$amt", []>, OpSize16; // The machine return from interrupt instruction, but sometimes we need to // perform a post-epilogue stack adjustment. Codegen emits the pseudo form diff --git a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td index 258b2a15d5e4a..166f1f8c3251c 100644 --- a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td +++ b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -1058,6 +1058,12 @@ def INSERT_get_vinsert128_imm : SDNodeXForm; +// INSERT_get_vperm2x128_imm xform function: convert insert_subvector index to +// commuted VPERM2F128/VPERM2I128 imm. +def INSERT_get_vperm2x128_commutedimm : SDNodeXForm; + // EXTRACT_get_vextract256_imm xform function: convert extract_subvector index // to VEXTRACTF64x4 imm. def EXTRACT_get_vextract256_imm : SDNodeXFormsucc_begin(), SE = MBB->succ_end(); SI != SE; ++SI) { - if ((*SI)->isEHPad() || (*SI == TBB && FallthroughBB)) + for (MachineBasicBlock *Succ : MBB->successors()) { + if (Succ->isEHPad() || (Succ == TBB && FallthroughBB)) continue; // Return a nullptr if we found more than one fallthrough successor. if (FallthroughBB && FallthroughBB != TBB) return nullptr; - FallthroughBB = *SI; + FallthroughBB = Succ; } return FallthroughBB; } @@ -3257,13 +3257,13 @@ bool X86InstrInfo::analyzeBranchPredicate(MachineBasicBlock &MBB, MachineInstr *ConditionDef = nullptr; bool SingleUseCondition = true; - for (auto I = std::next(MBB.rbegin()), E = MBB.rend(); I != E; ++I) { - if (I->modifiesRegister(X86::EFLAGS, TRI)) { - ConditionDef = &*I; + for (MachineInstr &MI : llvm::drop_begin(llvm::reverse(MBB))) { + if (MI.modifiesRegister(X86::EFLAGS, TRI)) { + ConditionDef = &MI; break; } - if (I->readsRegister(X86::EFLAGS, TRI)) + if (MI.readsRegister(X86::EFLAGS, TRI)) SingleUseCondition = false; } @@ -4411,7 +4411,7 @@ bool X86InstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg, // It is safe to remove CmpInstr if EFLAGS is redefined or killed. // If we are done with the basic block, we need to check whether EFLAGS is // live-out. - bool IsSafe = false; + bool FlagsMayLiveOut = true; SmallVector, 4> OpsToUpdate; MachineBasicBlock::iterator AfterCmpInstr = std::next(MachineBasicBlock::iterator(CmpInstr)); @@ -4421,7 +4421,7 @@ bool X86InstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg, // We should check the usage if this instruction uses and updates EFLAGS. if (!UseEFLAGS && ModifyEFLAGS) { // It is safe to remove CmpInstr if EFLAGS is updated again. - IsSafe = true; + FlagsMayLiveOut = false; break; } if (!UseEFLAGS && !ModifyEFLAGS) @@ -4542,14 +4542,14 @@ bool X86InstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg, } if (ModifyEFLAGS || Instr.killsRegister(X86::EFLAGS, TRI)) { // It is safe to remove CmpInstr if EFLAGS is updated again or killed. - IsSafe = true; + FlagsMayLiveOut = false; break; } } - // If EFLAGS is not killed nor re-defined, we should check whether it is - // live-out. If it is live-out, do not optimize. - if ((MI || IsSwapped) && !IsSafe) { + // If we have to update users but EFLAGS is live-out abort, since we cannot + // easily find all of the users. + if (ShouldUpdateCC && FlagsMayLiveOut) { for (MachineBasicBlock *Successor : CmpMBB.successors()) if (Successor->isLiveIn(X86::EFLAGS)) return false; @@ -9363,7 +9363,7 @@ void X86InstrInfo::buildOutlinedFrame(MachineBasicBlock &MBB, // We're a normal call, so our sequence doesn't have a return instruction. // Add it in. - MachineInstr *retq = BuildMI(MF, DebugLoc(), get(X86::RETQ)); + MachineInstr *retq = BuildMI(MF, DebugLoc(), get(X86::RET64)); MBB.insert(MBB.end(), retq); } diff --git a/llvm/lib/Target/X86/X86InstrInfo.td b/llvm/lib/Target/X86/X86InstrInfo.td index 6a619aff0a45c..fee9939b8dfc7 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.td +++ b/llvm/lib/Target/X86/X86InstrInfo.td @@ -91,8 +91,7 @@ def SDT_X86Call : SDTypeProfile<0, -1, [SDTCisVT<0, iPTR>]>; def SDT_X86NtBrind : SDTypeProfile<0, -1, [SDTCisVT<0, iPTR>]>; def SDT_X86VASTART_SAVE_XMM_REGS : SDTypeProfile<0, -1, [SDTCisVT<0, i8>, - SDTCisVT<1, iPTR>, - SDTCisVT<2, iPTR>]>; + SDTCisPtrTy<1>]>; def SDT_X86VAARG : SDTypeProfile<1, -1, [SDTCisPtrTy<0>, SDTCisPtrTy<1>, @@ -184,7 +183,7 @@ def X86iret : SDNode<"X86ISD::IRET", SDTX86Ret, def X86vastart_save_xmm_regs : SDNode<"X86ISD::VASTART_SAVE_XMM_REGS", SDT_X86VASTART_SAVE_XMM_REGS, - [SDNPHasChain, SDNPVariadic]>; + [SDNPHasChain, SDNPMayStore, SDNPMemOperand, SDNPVariadic]>; def X86vaarg64 : SDNode<"X86ISD::VAARG_64", SDT_X86VAARG, [SDNPHasChain, SDNPMayLoad, SDNPMayStore, diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td index eeed43089f242..035f139e6f332 100644 --- a/llvm/lib/Target/X86/X86InstrSSE.td +++ b/llvm/lib/Target/X86/X86InstrSSE.td @@ -7111,29 +7111,37 @@ let Predicates = [HasAVX1Only] in { def : Pat<(v8i32 immAllOnesV), (VCMPPSYrri (AVX_SET0), (AVX_SET0), 0xf)>; } -multiclass vinsert_lowering { +multiclass vinsert_lowering { def : Pat<(vinsert128_insert:$ins (To VR256:$src1), (From VR128:$src2), (iPTR imm)), (!cast(InstrStr#rr) VR256:$src1, VR128:$src2, (INSERT_get_vinsert128_imm VR256:$ins))>; def : Pat<(vinsert128_insert:$ins (To VR256:$src1), - (From (memop_frag addr:$src2)), + (From (frommemop_frag addr:$src2)), (iPTR imm)), (!cast(InstrStr#rm) VR256:$src1, addr:$src2, (INSERT_get_vinsert128_imm VR256:$ins))>; + // Folding "To" vector - convert to perm2x128 and commute inputs. + def : Pat<(vinsert128_insert:$ins (To (tomemop_frag addr:$src1)), + (From VR128:$src2), + (iPTR imm)), + (!cast(PermStr#rm) + (INSERT_SUBREG (To (IMPLICIT_DEF)), VR128:$src2, sub_xmm), + addr:$src1, (INSERT_get_vperm2x128_commutedimm VR256:$ins))>; } let Predicates = [HasAVX, NoVLX] in { - defm : vinsert_lowering<"VINSERTF128", v4f32, v8f32, loadv4f32>; - defm : vinsert_lowering<"VINSERTF128", v2f64, v4f64, loadv2f64>; + defm : vinsert_lowering<"VINSERTF128", "VPERM2F128", v4f32, v8f32, loadv4f32, loadv8f32>; + defm : vinsert_lowering<"VINSERTF128", "VPERM2F128", v2f64, v4f64, loadv2f64, loadv4f64>; } let Predicates = [HasAVX1Only] in { - defm : vinsert_lowering<"VINSERTF128", v2i64, v4i64, loadv2i64>; - defm : vinsert_lowering<"VINSERTF128", v4i32, v8i32, loadv4i32>; - defm : vinsert_lowering<"VINSERTF128", v8i16, v16i16, loadv8i16>; - defm : vinsert_lowering<"VINSERTF128", v16i8, v32i8, loadv16i8>; + defm : vinsert_lowering<"VINSERTF128", "VPERM2F128", v2i64, v4i64, loadv2i64, loadv4i64>; + defm : vinsert_lowering<"VINSERTF128", "VPERM2F128", v4i32, v8i32, loadv4i32, loadv8i32>; + defm : vinsert_lowering<"VINSERTF128", "VPERM2F128", v8i16, v16i16, loadv8i16, loadv16i16>; + defm : vinsert_lowering<"VINSERTF128", "VPERM2F128", v16i8, v32i8, loadv16i8, loadv32i8>; } //===----------------------------------------------------------------------===// @@ -7622,6 +7630,10 @@ let Predicates = [HasAVX1Only] in { (VINSERTF128rr (INSERT_SUBREG (v8f32 (IMPLICIT_DEF)), (v4f32 (VPERMILPSri (v4f32 (COPY_TO_REGCLASS FR32:$src, VR128)), 0)), sub_xmm), (v4f32 (VPERMILPSri (v4f32 (COPY_TO_REGCLASS FR32:$src, VR128)), 0)), 1)>; + def : Pat<(v8f32 (X86VBroadcast v4f32:$src)), + (VINSERTF128rr (INSERT_SUBREG (v8f32 (IMPLICIT_DEF)), + (v4f32 (VPERMILPSri VR128:$src, 0)), sub_xmm), + (v4f32 (VPERMILPSri VR128:$src, 0)), 1)>; def : Pat<(v4f64 (X86VBroadcast FR64:$src)), (VINSERTF128rr (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)), (v2f64 (VMOVDDUPrr (v2f64 (COPY_TO_REGCLASS FR64:$src, VR128)))), sub_xmm), @@ -7742,10 +7754,10 @@ def VINSERTI128rm : AVX2AIi8<0x38, MRMSrcMem, (outs VR256:$dst), } let Predicates = [HasAVX2, NoVLX] in { - defm : vinsert_lowering<"VINSERTI128", v2i64, v4i64, loadv2i64>; - defm : vinsert_lowering<"VINSERTI128", v4i32, v8i32, loadv4i32>; - defm : vinsert_lowering<"VINSERTI128", v8i16, v16i16, loadv8i16>; - defm : vinsert_lowering<"VINSERTI128", v16i8, v32i8, loadv16i8>; + defm : vinsert_lowering<"VINSERTI128", "VPERM2I128", v2i64, v4i64, loadv2i64, loadv4i64>; + defm : vinsert_lowering<"VINSERTI128", "VPERM2I128", v4i32, v8i32, loadv4i32, loadv8i32>; + defm : vinsert_lowering<"VINSERTI128", "VPERM2I128", v8i16, v16i16, loadv8i16, loadv16i16>; + defm : vinsert_lowering<"VINSERTI128", "VPERM2I128", v16i8, v32i8, loadv16i8, loadv32i8>; } //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/X86/X86LoadValueInjectionRetHardening.cpp b/llvm/lib/Target/X86/X86LoadValueInjectionRetHardening.cpp index 7b6276c1d87e0..e562748c98fe8 100644 --- a/llvm/lib/Target/X86/X86LoadValueInjectionRetHardening.cpp +++ b/llvm/lib/Target/X86/X86LoadValueInjectionRetHardening.cpp @@ -76,7 +76,7 @@ bool X86LoadValueInjectionRetHardeningPass::runOnMachineFunction( bool Modified = false; for (auto &MBB : MF) { for (auto MBBI = MBB.begin(); MBBI != MBB.end(); ++MBBI) { - if (MBBI->getOpcode() != X86::RETQ) + if (MBBI->getOpcode() != X86::RET64) continue; unsigned ClobberReg = TRI->findDeadCallerSavedReg(MBB, MBBI); diff --git a/llvm/lib/Target/X86/X86LowerAMXIntrinsics.cpp b/llvm/lib/Target/X86/X86LowerAMXIntrinsics.cpp index 248069f4deb49..243ae213d665e 100644 --- a/llvm/lib/Target/X86/X86LowerAMXIntrinsics.cpp +++ b/llvm/lib/Target/X86/X86LowerAMXIntrinsics.cpp @@ -498,8 +498,8 @@ X86LowerAMXIntrinsics::lowerTileDP(Instruction *TileDP) { Value *ResAMX = Builder.CreateBitCast(ResVec, Type::getX86_AMXTy(Builder.getContext())); // Delete TileDP intrinsic and do some clean-up. - for (auto UI = TileDP->use_begin(), UE = TileDP->use_end(); UI != UE;) { - Instruction *I = cast((UI++)->getUser()); + for (Use &U : llvm::make_early_inc_range(TileDP->uses())) { + Instruction *I = cast(U.getUser()); Value *Vec; if (match(I, m_BitCast(m_Value(Vec)))) { I->replaceAllUsesWith(ResVec); @@ -542,9 +542,8 @@ bool X86LowerAMXIntrinsics::lowerTileLoadStore(Instruction *TileLoadStore) { Value *ResAMX = Builder.CreateBitCast(ResVec, Type::getX86_AMXTy(Builder.getContext())); // Delete tileloadd6 intrinsic and do some clean-up - for (auto UI = TileLoadStore->use_begin(), UE = TileLoadStore->use_end(); - UI != UE;) { - Instruction *I = cast((UI++)->getUser()); + for (Use &U : llvm::make_early_inc_range(TileLoadStore->uses())) { + Instruction *I = cast(U.getUser()); Value *Vec; if (match(I, m_BitCast(m_Value(Vec)))) { I->replaceAllUsesWith(ResVec); @@ -631,6 +630,7 @@ bool X86LowerAMXIntrinsics::visit() { return C; } +namespace { class X86LowerAMXIntrinsicsLegacyPass : public FunctionPass { public: static char ID; @@ -665,6 +665,7 @@ class X86LowerAMXIntrinsicsLegacyPass : public FunctionPass { AU.addRequired(); } }; +} // namespace static const char PassName[] = "Lower AMX intrinsics"; char X86LowerAMXIntrinsicsLegacyPass::ID = 0; diff --git a/llvm/lib/Target/X86/X86LowerAMXType.cpp b/llvm/lib/Target/X86/X86LowerAMXType.cpp index 719cd532fc01b..8e82119f71a38 100644 --- a/llvm/lib/Target/X86/X86LowerAMXType.cpp +++ b/llvm/lib/Target/X86/X86LowerAMXType.cpp @@ -302,9 +302,7 @@ bool X86LowerAMXType::visit() { Col2Row.clear(); for (BasicBlock *BB : post_order(&Func)) { - for (BasicBlock::reverse_iterator II = BB->rbegin(), IE = BB->rend(); - II != IE;) { - Instruction &Inst = *II++; + for (Instruction &Inst : llvm::make_early_inc_range(llvm::reverse(*BB))) { auto *Bitcast = dyn_cast(&Inst); if (!Bitcast) continue; diff --git a/llvm/lib/Target/X86/X86LowerTileCopy.cpp b/llvm/lib/Target/X86/X86LowerTileCopy.cpp index 03692d1957686..d6b42145859d8 100644 --- a/llvm/lib/Target/X86/X86LowerTileCopy.cpp +++ b/llvm/lib/Target/X86/X86LowerTileCopy.cpp @@ -75,9 +75,7 @@ bool X86LowerTileCopy::runOnMachineFunction(MachineFunction &MF) { bool Changed = false; for (MachineBasicBlock &MBB : MF) { - for (MachineBasicBlock::iterator MII = MBB.begin(), MIE = MBB.end(); - MII != MIE;) { - MachineInstr &MI = *MII++; + for (MachineInstr &MI : llvm::make_early_inc_range(MBB)) { if (!MI.isCopy()) continue; MachineOperand &DstMO = MI.getOperand(0); diff --git a/llvm/lib/Target/X86/X86MCInstLower.cpp b/llvm/lib/Target/X86/X86MCInstLower.cpp index 9e1812a9c8b46..882f7af224a1d 100644 --- a/llvm/lib/Target/X86/X86MCInstLower.cpp +++ b/llvm/lib/Target/X86/X86MCInstLower.cpp @@ -421,7 +421,7 @@ static void SimplifyShortMoveForm(X86AsmPrinter &Printer, MCInst &Inst, } static unsigned getRetOpcode(const X86Subtarget &Subtarget) { - return Subtarget.is64Bit() ? X86::RETQ : X86::RETL; + return Subtarget.is64Bit() ? X86::RET64 : X86::RET32; } Optional diff --git a/llvm/lib/Target/X86/X86OptimizeLEAs.cpp b/llvm/lib/Target/X86/X86OptimizeLEAs.cpp index 659fb632cc50d..6967a96ce83bf 100644 --- a/llvm/lib/Target/X86/X86OptimizeLEAs.cpp +++ b/llvm/lib/Target/X86/X86OptimizeLEAs.cpp @@ -653,9 +653,8 @@ bool X86OptimizeLEAPass::removeRedundantLEAs(MemOpMap &LEAs) { // isReplaceable function. Register FirstVReg = First.getOperand(0).getReg(); Register LastVReg = Last.getOperand(0).getReg(); - for (auto UI = MRI->use_begin(LastVReg), UE = MRI->use_end(); - UI != UE;) { - MachineOperand &MO = *UI++; + for (MachineOperand &MO : + llvm::make_early_inc_range(MRI->use_operands(LastVReg))) { MachineInstr &MI = *MO.getParent(); if (MI.isDebugValue()) { diff --git a/llvm/lib/Target/X86/X86PadShortFunction.cpp b/llvm/lib/Target/X86/X86PadShortFunction.cpp index e10dab72078d3..47ae517ae76d7 100644 --- a/llvm/lib/Target/X86/X86PadShortFunction.cpp +++ b/llvm/lib/Target/X86/X86PadShortFunction.cpp @@ -174,12 +174,9 @@ void PadShortFunc::findReturns(MachineBasicBlock *MBB, unsigned int Cycles) { } // Follow branches in BB and look for returns - for (MachineBasicBlock::succ_iterator I = MBB->succ_begin(); - I != MBB->succ_end(); ++I) { - if (*I == MBB) - continue; - findReturns(*I, Cycles); - } + for (MachineBasicBlock *Succ : MBB->successors()) + if (Succ != MBB) + findReturns(Succ, Cycles); } /// cyclesUntilReturn - return true if the MBB has a return instruction, diff --git a/llvm/lib/Target/X86/X86RegisterInfo.cpp b/llvm/lib/Target/X86/X86RegisterInfo.cpp index c4748423baeaf..130cb61cdde24 100644 --- a/llvm/lib/Target/X86/X86RegisterInfo.cpp +++ b/llvm/lib/Target/X86/X86RegisterInfo.cpp @@ -816,10 +816,10 @@ unsigned X86RegisterInfo::findDeadCallerSavedReg( return 0; case TargetOpcode::PATCHABLE_RET: case X86::RET: - case X86::RETL: - case X86::RETQ: - case X86::RETIL: - case X86::RETIQ: + case X86::RET32: + case X86::RET64: + case X86::RETI32: + case X86::RETI64: case X86::TCRETURNdi: case X86::TCRETURNri: case X86::TCRETURNmi: diff --git a/llvm/lib/Target/X86/X86SchedBroadwell.td b/llvm/lib/Target/X86/X86SchedBroadwell.td index 8dcef8900be2f..2827981b7fb0a 100644 --- a/llvm/lib/Target/X86/X86SchedBroadwell.td +++ b/llvm/lib/Target/X86/X86SchedBroadwell.td @@ -1110,7 +1110,7 @@ def BWWriteResGroup84 : SchedWriteRes<[BWPort6,BWPort23,BWPort0156]> { let NumMicroOps = 3; let ResourceCycles = [1,1,1]; } -def: InstRW<[BWWriteResGroup84], (instrs LRETQ, RETQ)>; +def: InstRW<[BWWriteResGroup84], (instrs LRET64, RET64)>; def BWWriteResGroup87 : SchedWriteRes<[BWPort4,BWPort23,BWPort237,BWPort06]> { let Latency = 7; diff --git a/llvm/lib/Target/X86/X86SchedHaswell.td b/llvm/lib/Target/X86/X86SchedHaswell.td index 4feb8a866ffd1..68961d6245abd 100644 --- a/llvm/lib/Target/X86/X86SchedHaswell.td +++ b/llvm/lib/Target/X86/X86SchedHaswell.td @@ -710,7 +710,7 @@ def HWWriteRETI : SchedWriteRes<[HWPort23, HWPort6, HWPort015]> { let NumMicroOps = 4; let ResourceCycles = [1, 2, 1]; } -def : InstRW<[HWWriteRETI], (instregex "RETI(L|Q|W)", "LRETI(L|Q|W)")>; +def : InstRW<[HWWriteRETI], (instregex "RETI(16|32|64)", "LRETI(16|32|64)")>; // BOUND. // r,m. @@ -1188,7 +1188,7 @@ def HWWriteResGroup41 : SchedWriteRes<[HWPort6,HWPort23,HWPort0156]> { let NumMicroOps = 3; let ResourceCycles = [1,1,1]; } -def: InstRW<[HWWriteResGroup41], (instrs LRETQ, RETL, RETQ)>; +def: InstRW<[HWWriteResGroup41], (instrs LRET64, RET32, RET64)>; def HWWriteResGroup44 : SchedWriteRes<[HWPort4,HWPort6,HWPort237,HWPort0156]> { let Latency = 3; diff --git a/llvm/lib/Target/X86/X86SchedIceLake.td b/llvm/lib/Target/X86/X86SchedIceLake.td index 5561ccf0361f3..889b9b7fa6666 100644 --- a/llvm/lib/Target/X86/X86SchedIceLake.td +++ b/llvm/lib/Target/X86/X86SchedIceLake.td @@ -1444,7 +1444,7 @@ def ICXWriteResGroup104 : SchedWriteRes<[ICXPort6,ICXPort23,ICXPort0156]> { let NumMicroOps = 3; let ResourceCycles = [1,1,1]; } -def: InstRW<[ICXWriteResGroup104], (instrs LRETQ, RETQ)>; +def: InstRW<[ICXWriteResGroup104], (instrs LRET64, RET64)>; def ICXWriteResGroup106 : SchedWriteRes<[ICXPort4,ICXPort5,ICXPort237]> { let Latency = 7; diff --git a/llvm/lib/Target/X86/X86SchedSandyBridge.td b/llvm/lib/Target/X86/X86SchedSandyBridge.td index 1a55f7cda70d0..c8d7b0f72c1c9 100644 --- a/llvm/lib/Target/X86/X86SchedSandyBridge.td +++ b/llvm/lib/Target/X86/X86SchedSandyBridge.td @@ -606,7 +606,7 @@ def SBWriteResGroup2 : SchedWriteRes<[SBPort5]> { def: InstRW<[SBWriteResGroup2], (instrs FDECSTP, FINCSTP, FFREE, FFREEP, FNOP, LD_Frr, ST_Frr, ST_FPrr)>; def: InstRW<[SBWriteResGroup2], (instrs LOOP, LOOPE, LOOPNE)>; // FIXME: This seems wrong compared to other Intel CPUs. -def: InstRW<[SBWriteResGroup2], (instrs RETQ)>; +def: InstRW<[SBWriteResGroup2], (instrs RET64)>; def SBWriteResGroup4 : SchedWriteRes<[SBPort05]> { let Latency = 1; diff --git a/llvm/lib/Target/X86/X86SchedSkylakeClient.td b/llvm/lib/Target/X86/X86SchedSkylakeClient.td index ba0f4a7088d9d..7d3229c3b023b 100644 --- a/llvm/lib/Target/X86/X86SchedSkylakeClient.td +++ b/llvm/lib/Target/X86/X86SchedSkylakeClient.td @@ -1175,7 +1175,7 @@ def SKLWriteResGroup98 : SchedWriteRes<[SKLPort6,SKLPort23,SKLPort0156]> { let NumMicroOps = 3; let ResourceCycles = [1,1,1]; } -def: InstRW<[SKLWriteResGroup98], (instrs LRETQ, RETQ)>; +def: InstRW<[SKLWriteResGroup98], (instrs LRET64, RET64)>; def SKLWriteResGroup100 : SchedWriteRes<[SKLPort4,SKLPort23,SKLPort237,SKLPort06]> { let Latency = 7; diff --git a/llvm/lib/Target/X86/X86SchedSkylakeServer.td b/llvm/lib/Target/X86/X86SchedSkylakeServer.td index 0287e00c4bfa2..1d8417aef41e3 100644 --- a/llvm/lib/Target/X86/X86SchedSkylakeServer.td +++ b/llvm/lib/Target/X86/X86SchedSkylakeServer.td @@ -1436,7 +1436,7 @@ def SKXWriteResGroup104 : SchedWriteRes<[SKXPort6,SKXPort23,SKXPort0156]> { let NumMicroOps = 3; let ResourceCycles = [1,1,1]; } -def: InstRW<[SKXWriteResGroup104], (instrs LRETQ, RETQ)>; +def: InstRW<[SKXWriteResGroup104], (instrs LRET64, RET64)>; def SKXWriteResGroup106 : SchedWriteRes<[SKXPort4,SKXPort5,SKXPort237]> { let Latency = 7; diff --git a/llvm/lib/Target/X86/X86ScheduleAtom.td b/llvm/lib/Target/X86/X86ScheduleAtom.td index 97dcef4ed380e..6fd98280f560c 100644 --- a/llvm/lib/Target/X86/X86ScheduleAtom.td +++ b/llvm/lib/Target/X86/X86ScheduleAtom.td @@ -540,7 +540,7 @@ def : InstRW<[AtomWrite0_1_1], (instrs POP32r, POP64r, PUSH16rmr, PUSH32rmr, PUSH64rmr, PUSH16i8, PUSH32i8, PUSH64i8, PUSH64i32, XCH_F)>; -def : InstRW<[AtomWrite0_1_1], (instregex "RETI(L|Q|W)$", +def : InstRW<[AtomWrite0_1_1], (instregex "RETI(16|32|64)$", "IRET(16|32|64)?")>; def AtomWrite0_1_5 : SchedWriteRes<[AtomPort0, AtomPort1]> { @@ -819,8 +819,8 @@ def AtomWrite01_79 : SchedWriteRes<[AtomPort01]> { let Latency = 79; let ResourceCycles = [79]; } -def : InstRW<[AtomWrite01_79], (instregex "RET(L|Q|W)?$", - "LRETI?(L|Q|W)")>; +def : InstRW<[AtomWrite01_79], (instregex "RET(16|32|64)?$", + "LRETI?(16|32|64)")>; def AtomWrite01_92 : SchedWriteRes<[AtomPort01]> { let Latency = 92; diff --git a/llvm/lib/Target/X86/X86ScheduleZnver1.td b/llvm/lib/Target/X86/X86ScheduleZnver1.td index 1fb015c7fc538..8e30e5e10ca80 100644 --- a/llvm/lib/Target/X86/X86ScheduleZnver1.td +++ b/llvm/lib/Target/X86/X86ScheduleZnver1.td @@ -697,7 +697,7 @@ def : InstRW<[WriteMicrocoded], (instregex "CALL(16|32)m")>; def ZnWriteRET : SchedWriteRes<[ZnALU03]> { let NumMicroOps = 2; } -def : InstRW<[ZnWriteRET], (instregex "RET(L|Q|W)", "LRET(L|Q|W)", +def : InstRW<[ZnWriteRET], (instregex "RET(16|32|64)", "LRET(16|32|64)", "IRET(16|32|64)")>; //-- Logic instructions --// diff --git a/llvm/lib/Target/X86/X86ScheduleZnver2.td b/llvm/lib/Target/X86/X86ScheduleZnver2.td index 44d873f763b05..a83c89e2f28a0 100644 --- a/llvm/lib/Target/X86/X86ScheduleZnver2.td +++ b/llvm/lib/Target/X86/X86ScheduleZnver2.td @@ -697,7 +697,7 @@ def : InstRW<[WriteMicrocoded], (instregex "CALL(16|32)m")>; def Zn2WriteRET : SchedWriteRes<[Zn2ALU03]> { let NumMicroOps = 2; } -def : InstRW<[Zn2WriteRET], (instregex "RET(L|Q|W)", "LRET(L|Q|W)", +def : InstRW<[Zn2WriteRET], (instregex "RET(16|32|64)", "LRET(16|32|64)", "IRET(16|32|64)")>; //-- Logic instructions --// diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp index af7cf984c5937..ebde29f0ba6e6 100644 --- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp @@ -2706,8 +2706,8 @@ X86TTIImpl::getTypeBasedIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, static const CostTblEntry AVX512CostTbl[] = { { ISD::ABS, MVT::v8i64, 1 }, { ISD::ABS, MVT::v16i32, 1 }, - { ISD::ABS, MVT::v32i16, 2 }, // FIXME: include split - { ISD::ABS, MVT::v64i8, 2 }, // FIXME: include split + { ISD::ABS, MVT::v32i16, 2 }, + { ISD::ABS, MVT::v64i8, 2 }, { ISD::ABS, MVT::v4i64, 1 }, { ISD::ABS, MVT::v2i64, 1 }, { ISD::BITREVERSE, MVT::v8i64, 36 }, @@ -2731,26 +2731,26 @@ X86TTIImpl::getTypeBasedIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, { ISD::CTTZ, MVT::v64i8, 18 }, { ISD::SMAX, MVT::v8i64, 1 }, { ISD::SMAX, MVT::v16i32, 1 }, - { ISD::SMAX, MVT::v32i16, 2 }, // FIXME: include split - { ISD::SMAX, MVT::v64i8, 2 }, // FIXME: include split + { ISD::SMAX, MVT::v32i16, 2 }, + { ISD::SMAX, MVT::v64i8, 2 }, { ISD::SMAX, MVT::v4i64, 1 }, { ISD::SMAX, MVT::v2i64, 1 }, { ISD::SMIN, MVT::v8i64, 1 }, { ISD::SMIN, MVT::v16i32, 1 }, - { ISD::SMIN, MVT::v32i16, 2 }, // FIXME: include split - { ISD::SMIN, MVT::v64i8, 2 }, // FIXME: include split + { ISD::SMIN, MVT::v32i16, 2 }, + { ISD::SMIN, MVT::v64i8, 2 }, { ISD::SMIN, MVT::v4i64, 1 }, { ISD::SMIN, MVT::v2i64, 1 }, { ISD::UMAX, MVT::v8i64, 1 }, { ISD::UMAX, MVT::v16i32, 1 }, - { ISD::UMAX, MVT::v32i16, 2 }, // FIXME: include split - { ISD::UMAX, MVT::v64i8, 2 }, // FIXME: include split + { ISD::UMAX, MVT::v32i16, 2 }, + { ISD::UMAX, MVT::v64i8, 2 }, { ISD::UMAX, MVT::v4i64, 1 }, { ISD::UMAX, MVT::v2i64, 1 }, { ISD::UMIN, MVT::v8i64, 1 }, { ISD::UMIN, MVT::v16i32, 1 }, - { ISD::UMIN, MVT::v32i16, 2 }, // FIXME: include split - { ISD::UMIN, MVT::v64i8, 2 }, // FIXME: include split + { ISD::UMIN, MVT::v32i16, 2 }, + { ISD::UMIN, MVT::v64i8, 2 }, { ISD::UMIN, MVT::v4i64, 1 }, { ISD::UMIN, MVT::v2i64, 1 }, { ISD::USUBSAT, MVT::v16i32, 2 }, // pmaxud + psubd @@ -2761,14 +2761,14 @@ X86TTIImpl::getTypeBasedIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, { ISD::UADDSAT, MVT::v2i64, 3 }, // not + pminuq + paddq { ISD::UADDSAT, MVT::v4i64, 3 }, // not + pminuq + paddq { ISD::UADDSAT, MVT::v8i64, 3 }, // not + pminuq + paddq - { ISD::SADDSAT, MVT::v32i16, 2 }, // FIXME: include split - { ISD::SADDSAT, MVT::v64i8, 2 }, // FIXME: include split - { ISD::SSUBSAT, MVT::v32i16, 2 }, // FIXME: include split - { ISD::SSUBSAT, MVT::v64i8, 2 }, // FIXME: include split - { ISD::UADDSAT, MVT::v32i16, 2 }, // FIXME: include split - { ISD::UADDSAT, MVT::v64i8, 2 }, // FIXME: include split - { ISD::USUBSAT, MVT::v32i16, 2 }, // FIXME: include split - { ISD::USUBSAT, MVT::v64i8, 2 }, // FIXME: include split + { ISD::SADDSAT, MVT::v32i16, 2 }, + { ISD::SADDSAT, MVT::v64i8, 2 }, + { ISD::SSUBSAT, MVT::v32i16, 2 }, + { ISD::SSUBSAT, MVT::v64i8, 2 }, + { ISD::UADDSAT, MVT::v32i16, 2 }, + { ISD::UADDSAT, MVT::v64i8, 2 }, + { ISD::USUBSAT, MVT::v32i16, 2 }, + { ISD::USUBSAT, MVT::v64i8, 2 }, { ISD::FMAXNUM, MVT::f32, 2 }, { ISD::FMAXNUM, MVT::v4f32, 2 }, { ISD::FMAXNUM, MVT::v8f32, 2 }, @@ -5053,12 +5053,46 @@ InstructionCost X86TTIImpl::getInterleavedMemoryOpCostAVX512( // Get the cost of one memory operation. auto *SingleMemOpTy = FixedVectorType::get(VecTy->getElementType(), LegalVT.getVectorNumElements()); - InstructionCost MemOpCost = getMemoryOpCost( - Opcode, SingleMemOpTy, MaybeAlign(Alignment), AddressSpace, CostKind); + InstructionCost MemOpCost; + if (UseMaskForCond || UseMaskForGaps) + MemOpCost = getMaskedMemoryOpCost(Opcode, SingleMemOpTy, Alignment, + AddressSpace, CostKind); + else + MemOpCost = getMemoryOpCost(Opcode, SingleMemOpTy, MaybeAlign(Alignment), + AddressSpace, CostKind); unsigned VF = VecTy->getNumElements() / Factor; MVT VT = MVT::getVectorVT(MVT::getVT(VecTy->getScalarType()), VF); + // FIXME: this is the most conservative estimate for the mask cost. + InstructionCost MaskCost; + if (UseMaskForCond || UseMaskForGaps) { + APInt DemandedLoadStoreElts = APInt::getZero(VecTy->getNumElements()); + for (unsigned Index : Indices) { + assert(Index < Factor && "Invalid index for interleaved memory op"); + for (unsigned Elm = 0; Elm < VF; Elm++) + DemandedLoadStoreElts.setBit(Index + Elm * Factor); + } + + Type *I8Type = Type::getInt8Ty(VecTy->getContext()); + + MaskCost = getReplicationShuffleCost( + I8Type, Factor, VF, APInt::getAllOnes(VF), + UseMaskForGaps ? DemandedLoadStoreElts + : APInt::getAllOnes(VecTy->getNumElements()), + CostKind); + + // The Gaps mask is invariant and created outside the loop, therefore the + // cost of creating it is not accounted for here. However if we have both + // a MaskForGaps and some other mask that guards the execution of the + // memory access, we need to account for the cost of And-ing the two masks + // inside the loop. + if (UseMaskForGaps) { + auto *MaskVT = FixedVectorType::get(I8Type, VecTy->getNumElements()); + MaskCost += getArithmeticInstrCost(BinaryOperator::And, MaskVT, CostKind); + } + } + if (Opcode == Instruction::Load) { // The tables (AVX512InterleavedLoadTbl and AVX512InterleavedStoreTbl) // contain the cost of the optimized shuffle sequence that the @@ -5074,7 +5108,7 @@ InstructionCost X86TTIImpl::getInterleavedMemoryOpCostAVX512( if (const auto *Entry = CostTableLookup(AVX512InterleavedLoadTbl, Factor, VT)) - return NumOfMemOps * MemOpCost + Entry->Cost; + return MaskCost + NumOfMemOps * MemOpCost + Entry->Cost; //If an entry does not exist, fallback to the default implementation. // Kind of shuffle depends on number of loaded values. @@ -5111,7 +5145,8 @@ InstructionCost X86TTIImpl::getInterleavedMemoryOpCostAVX512( NumOfMoves = NumOfResults * NumOfShufflesPerResult / 2; InstructionCost Cost = NumOfResults * NumOfShufflesPerResult * ShuffleCost + - NumOfUnfoldedLoads * MemOpCost + NumOfMoves; + MaskCost + NumOfUnfoldedLoads * MemOpCost + + NumOfMoves; return Cost; } @@ -5133,7 +5168,7 @@ InstructionCost X86TTIImpl::getInterleavedMemoryOpCostAVX512( if (const auto *Entry = CostTableLookup(AVX512InterleavedStoreTbl, Factor, VT)) - return NumOfMemOps * MemOpCost + Entry->Cost; + return MaskCost + NumOfMemOps * MemOpCost + Entry->Cost; //If an entry does not exist, fallback to the default implementation. // There is no strided stores meanwhile. And store can't be folded in @@ -5147,6 +5182,7 @@ InstructionCost X86TTIImpl::getInterleavedMemoryOpCostAVX512( // We need additional instructions to keep sources. unsigned NumOfMoves = NumOfMemOps * NumOfShufflesPerStore / 2; InstructionCost Cost = + MaskCost + NumOfMemOps * (MemOpCost + NumOfShufflesPerStore * ShuffleCost) + NumOfMoves; return Cost; @@ -5157,10 +5193,6 @@ InstructionCost X86TTIImpl::getInterleavedMemoryOpCost( Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond, bool UseMaskForGaps) { auto *VecTy = cast(BaseTy); - if (UseMaskForCond || UseMaskForGaps) - return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices, - Alignment, AddressSpace, CostKind, - UseMaskForCond, UseMaskForGaps); auto isSupportedOnAVX512 = [&](Type *VecTy, bool HasBW) { Type *EltTy = cast(VecTy)->getElementType(); @@ -5177,6 +5209,11 @@ InstructionCost X86TTIImpl::getInterleavedMemoryOpCost( Opcode, VecTy, Factor, Indices, Alignment, AddressSpace, CostKind, UseMaskForCond, UseMaskForGaps); + if (UseMaskForCond || UseMaskForGaps) + return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices, + Alignment, AddressSpace, CostKind, + UseMaskForCond, UseMaskForGaps); + // Get estimation for interleaved load/store operations for SSE-AVX2. // As opposed to AVX-512, SSE-AVX2 do not have generic shuffles that allow // computing the cost using a generic formula as a function of generic diff --git a/llvm/lib/Target/X86/X86VZeroUpper.cpp b/llvm/lib/Target/X86/X86VZeroUpper.cpp index c3031b6985520..59b5dc111ce34 100644 --- a/llvm/lib/Target/X86/X86VZeroUpper.cpp +++ b/llvm/lib/Target/X86/X86VZeroUpper.cpp @@ -271,10 +271,8 @@ void VZeroUpperInserter::processBasicBlock(MachineBasicBlock &MBB) { << getBlockExitStateName(CurState) << '\n'); if (CurState == EXITS_DIRTY) - for (MachineBasicBlock::succ_iterator SI = MBB.succ_begin(), - SE = MBB.succ_end(); - SI != SE; ++SI) - addDirtySuccessor(**SI); + for (MachineBasicBlock *Succ : MBB.successors()) + addDirtySuccessor(*Succ); BlockStates[MBB.getNumber()].ExitState = CurState; } diff --git a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp index 79147b1439ec1..7243e39c90293 100644 --- a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp +++ b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp @@ -378,10 +378,10 @@ static bool foldUnusualPatterns(Function &F, DominatorTree &DT) { // Also, we want to avoid matching partial patterns. // TODO: It would be more efficient if we removed dead instructions // iteratively in this loop rather than waiting until the end. - for (Instruction &I : make_range(BB.rbegin(), BB.rend())) { + for (Instruction &I : llvm::reverse(BB)) { MadeChange |= foldAnyOrAllBitsSet(I); MadeChange |= foldGuardedFunnelShift(I, DT); - MadeChange |= tryToRecognizePopCount(I); + MadeChange |= tryToRecognizePopCount(I); } } diff --git a/llvm/lib/Transforms/Coroutines/CoroFrame.cpp b/llvm/lib/Transforms/Coroutines/CoroFrame.cpp index f9f3f234a7dad..ac3d078714ce1 100644 --- a/llvm/lib/Transforms/Coroutines/CoroFrame.cpp +++ b/llvm/lib/Transforms/Coroutines/CoroFrame.cpp @@ -2245,12 +2245,7 @@ static Value *emitSetAndGetSwiftErrorValueAround(Instruction *Call, /// intrinsics and attempting to MemToReg the alloca away. static void eliminateSwiftErrorAlloca(Function &F, AllocaInst *Alloca, coro::Shape &Shape) { - for (auto UI = Alloca->use_begin(), UE = Alloca->use_end(); UI != UE; ) { - // We're likely changing the use list, so use a mutation-safe - // iteration pattern. - auto &Use = *UI; - ++UI; - + for (Use &Use : llvm::make_early_inc_range(Alloca->uses())) { // swifterror values can only be used in very specific ways. // We take advantage of that here. auto User = Use.getUser(); diff --git a/llvm/lib/Transforms/Coroutines/CoroSplit.cpp b/llvm/lib/Transforms/Coroutines/CoroSplit.cpp index 118b2a3fa8849..1c45267effba5 100644 --- a/llvm/lib/Transforms/Coroutines/CoroSplit.cpp +++ b/llvm/lib/Transforms/Coroutines/CoroSplit.cpp @@ -520,8 +520,8 @@ void CoroCloner::replaceRetconOrAsyncSuspendUses() { } // Try to peephole extracts of an aggregate return. - for (auto UI = NewS->use_begin(), UE = NewS->use_end(); UI != UE; ) { - auto EVI = dyn_cast((UI++)->getUser()); + for (Use &U : llvm::make_early_inc_range(NewS->uses())) { + auto *EVI = dyn_cast(U.getUser()); if (!EVI || EVI->getNumIndices() != 1) continue; @@ -1974,9 +1974,9 @@ static void replacePrepare(CallInst *Prepare, LazyCallGraph &CG, // %2 = bitcast %1 to [[TYPE]] // ==> // %2 = @some_function - for (auto UI = Prepare->use_begin(), UE = Prepare->use_end(); UI != UE;) { + for (Use &U : llvm::make_early_inc_range(Prepare->uses())) { // Look for bitcasts back to the original function type. - auto *Cast = dyn_cast((UI++)->getUser()); + auto *Cast = dyn_cast(U.getUser()); if (!Cast || Cast->getType() != Fn->getType()) continue; @@ -2016,10 +2016,9 @@ static void replacePrepare(CallInst *Prepare, CallGraph &CG) { // %2 = bitcast %1 to [[TYPE]] // ==> // %2 = @some_function - for (auto UI = Prepare->use_begin(), UE = Prepare->use_end(); - UI != UE; ) { + for (Use &U : llvm::make_early_inc_range(Prepare->uses())) { // Look for bitcasts back to the original function type. - auto *Cast = dyn_cast((UI++)->getUser()); + auto *Cast = dyn_cast(U.getUser()); if (!Cast || Cast->getType() != Fn->getType()) continue; // Check whether the replacement will introduce new direct calls. @@ -2056,9 +2055,9 @@ static void replacePrepare(CallInst *Prepare, CallGraph &CG) { static bool replaceAllPrepares(Function *PrepareFn, LazyCallGraph &CG, LazyCallGraph::SCC &C) { bool Changed = false; - for (auto PI = PrepareFn->use_begin(), PE = PrepareFn->use_end(); PI != PE;) { + for (Use &P : llvm::make_early_inc_range(PrepareFn->uses())) { // Intrinsics can only be used in calls. - auto *Prepare = cast((PI++)->getUser()); + auto *Prepare = cast(P.getUser()); replacePrepare(Prepare, CG, C); Changed = true; } @@ -2074,10 +2073,9 @@ static bool replaceAllPrepares(Function *PrepareFn, LazyCallGraph &CG, /// switch coroutines, which are lowered in multiple stages). static bool replaceAllPrepares(Function *PrepareFn, CallGraph &CG) { bool Changed = false; - for (auto PI = PrepareFn->use_begin(), PE = PrepareFn->use_end(); - PI != PE; ) { + for (Use &P : llvm::make_early_inc_range(PrepareFn->uses())) { // Intrinsics can only be used in calls. - auto *Prepare = cast((PI++)->getUser()); + auto *Prepare = cast(P.getUser()); replacePrepare(Prepare, CG); Changed = true; } diff --git a/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp b/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp index dd31073d1343a..0d1062e6b94b3 100644 --- a/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp +++ b/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp @@ -1051,7 +1051,10 @@ PreservedAnalyses ArgumentPromotionPass::run(LazyCallGraph::SCC &C, if (!Changed) return PreservedAnalyses::all(); - return PreservedAnalyses::none(); + PreservedAnalyses PA; + // We've cleared out analyses for deleted functions. + PA.preserve(); + return PA; } namespace { diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp index badb118ec2a44..cd1d0f3163fd2 100644 --- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp +++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp @@ -483,6 +483,7 @@ static void clampReturnedValueStates( S ^= *T; } +namespace { /// Helper class for generic deduction: return value -> returned position. template @@ -1031,6 +1033,7 @@ struct AA::PointerInfo::State : public AbstractState { BooleanState BS; }; +namespace { struct AAPointerInfoImpl : public StateWrapper { using BaseTy = StateWrapper; @@ -5074,6 +5077,7 @@ struct AANoCaptureCallSiteReturned final : AANoCaptureImpl { STATS_DECLTRACK_CSRET_ATTR(nocapture) } }; +} // namespace /// ------------------ Value Simplify Attribute ---------------------------- @@ -5094,6 +5098,7 @@ bool ValueSimplifyStateType::unionAssumed(Optional Other) { return true; } +namespace { struct AAValueSimplifyImpl : AAValueSimplify { AAValueSimplifyImpl(const IRPosition &IRP, Attributor &A) : AAValueSimplify(IRP, A) {} @@ -7373,6 +7378,7 @@ void AAMemoryBehaviorFloating::analyzeUseIn(Attributor &A, const Use &U, if (UserI->mayWriteToMemory()) removeAssumedBits(NO_WRITES); } +} // namespace /// -------------------- Memory Locations Attributes --------------------------- /// Includes read-none, argmemonly, inaccessiblememonly, diff --git a/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp b/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp index a71c93f21e3d2..e6d8398766b8f 100644 --- a/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp +++ b/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp @@ -206,8 +206,8 @@ bool DeadArgumentEliminationPass::DeleteDeadVarargs(Function &Fn) { // to pass in a smaller number of arguments into the new function. // std::vector Args; - for (Value::user_iterator I = Fn.user_begin(), E = Fn.user_end(); I != E; ) { - CallBase *CB = dyn_cast(*I++); + for (User *U : llvm::make_early_inc_range(Fn.users())) { + CallBase *CB = dyn_cast(U); if (!CB) continue; diff --git a/llvm/lib/Transforms/IPO/ExtractGV.cpp b/llvm/lib/Transforms/IPO/ExtractGV.cpp index ba0efd46af165..387f114f6ffab 100644 --- a/llvm/lib/Transforms/IPO/ExtractGV.cpp +++ b/llvm/lib/Transforms/IPO/ExtractGV.cpp @@ -121,32 +121,27 @@ namespace { } // Visit the Aliases. - for (Module::alias_iterator I = M.alias_begin(), E = M.alias_end(); - I != E;) { - Module::alias_iterator CurI = I; - ++I; - - bool Delete = deleteStuff == (bool)Named.count(&*CurI); - makeVisible(*CurI, Delete); + for (GlobalAlias &GA : llvm::make_early_inc_range(M.aliases())) { + bool Delete = deleteStuff == (bool)Named.count(&GA); + makeVisible(GA, Delete); if (Delete) { - Type *Ty = CurI->getValueType(); + Type *Ty = GA.getValueType(); - CurI->removeFromParent(); + GA.removeFromParent(); llvm::Value *Declaration; if (FunctionType *FTy = dyn_cast(Ty)) { - Declaration = Function::Create(FTy, GlobalValue::ExternalLinkage, - CurI->getAddressSpace(), - CurI->getName(), &M); + Declaration = + Function::Create(FTy, GlobalValue::ExternalLinkage, + GA.getAddressSpace(), GA.getName(), &M); } else { Declaration = - new GlobalVariable(M, Ty, false, GlobalValue::ExternalLinkage, - nullptr, CurI->getName()); - + new GlobalVariable(M, Ty, false, GlobalValue::ExternalLinkage, + nullptr, GA.getName()); } - CurI->replaceAllUsesWith(Declaration); - delete &*CurI; + GA.replaceAllUsesWith(Declaration); + delete &GA; } } diff --git a/llvm/lib/Transforms/IPO/FunctionAttrs.cpp b/llvm/lib/Transforms/IPO/FunctionAttrs.cpp index 8918d01f3ec27..935375e89393c 100644 --- a/llvm/lib/Transforms/IPO/FunctionAttrs.cpp +++ b/llvm/lib/Transforms/IPO/FunctionAttrs.cpp @@ -1752,7 +1752,8 @@ static SCCNodesResult createSCCNodeSet(ArrayRef Functions) { SCCNodesResult Res; Res.HasUnknownCall = false; for (Function *F : Functions) { - if (!F || F->hasOptNone() || F->hasFnAttribute(Attribute::Naked)) { + if (!F || F->hasOptNone() || F->hasFnAttribute(Attribute::Naked) || + F->isPresplitCoroutine()) { // Treat any function we're trying not to optimize as if it were an // indirect call and omit it from the node set used below. Res.HasUnknownCall = true; diff --git a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp index fdb240d581252..fbd083bb9bbfd 100644 --- a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp +++ b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp @@ -229,6 +229,7 @@ static void removeSSACopy(Module &M) { removeSSACopy(F); } +namespace { class FunctionSpecializer { /// The IPSCCP Solver. @@ -737,6 +738,7 @@ class FunctionSpecializer { } } }; +} // namespace bool llvm::runFunctionSpecialization( Module &M, const DataLayout &DL, diff --git a/llvm/lib/Transforms/IPO/GlobalOpt.cpp b/llvm/lib/Transforms/IPO/GlobalOpt.cpp index 1d40a3b52aae2..b2c2efed7db88 100644 --- a/llvm/lib/Transforms/IPO/GlobalOpt.cpp +++ b/llvm/lib/Transforms/IPO/GlobalOpt.cpp @@ -2605,12 +2605,11 @@ static bool OptimizeEmptyGlobalCXXDtors(Function *CXAAtExitFn) { // and remove them. bool Changed = false; - for (auto I = CXAAtExitFn->user_begin(), E = CXAAtExitFn->user_end(); - I != E;) { + for (User *U : llvm::make_early_inc_range(CXAAtExitFn->users())) { // We're only interested in calls. Theoretically, we could handle invoke // instructions as well, but neither llvm-gcc nor clang generate invokes // to __cxa_atexit. - CallInst *CI = dyn_cast(*I++); + CallInst *CI = dyn_cast(U); if (!CI) continue; diff --git a/llvm/lib/Transforms/IPO/GlobalSplit.cpp b/llvm/lib/Transforms/IPO/GlobalSplit.cpp index 365b269dc3bf6..e7d698c42fcf3 100644 --- a/llvm/lib/Transforms/IPO/GlobalSplit.cpp +++ b/llvm/lib/Transforms/IPO/GlobalSplit.cpp @@ -154,11 +154,8 @@ static bool splitGlobals(Module &M) { return false; bool Changed = false; - for (auto I = M.global_begin(); I != M.global_end();) { - GlobalVariable &GV = *I; - ++I; + for (GlobalVariable &GV : llvm::make_early_inc_range(M.globals())) Changed |= splitGlobal(GV); - } return Changed; } diff --git a/llvm/lib/Transforms/IPO/IROutliner.cpp b/llvm/lib/Transforms/IPO/IROutliner.cpp index e079bc1db11f2..b8a314c54f18c 100644 --- a/llvm/lib/Transforms/IPO/IROutliner.cpp +++ b/llvm/lib/Transforms/IPO/IROutliner.cpp @@ -135,12 +135,8 @@ struct OutlinableGroup { /// \param SourceBB - the BasicBlock to pull Instructions from. /// \param TargetBB - the BasicBlock to put Instruction into. static void moveBBContents(BasicBlock &SourceBB, BasicBlock &TargetBB) { - BasicBlock::iterator BBCurr, BBEnd, BBNext; - for (BBCurr = SourceBB.begin(), BBEnd = SourceBB.end(); BBCurr != BBEnd; - BBCurr = BBNext) { - BBNext = std::next(BBCurr); - BBCurr->moveBefore(TargetBB, TargetBB.end()); - } + for (Instruction &I : llvm::make_early_inc_range(SourceBB)) + I.moveBefore(TargetBB, TargetBB.end()); } /// A function to sort the keys of \p Map, which must be a mapping of constant @@ -527,23 +523,20 @@ Function *IROutliner::createFunction(Module &M, OutlinableGroup &Group, /// \param [out] NewEnds - The return blocks of the new overall function. static void moveFunctionData(Function &Old, Function &New, DenseMap &NewEnds) { - Function::iterator CurrBB, NextBB, FinalBB; - for (CurrBB = Old.begin(), FinalBB = Old.end(); CurrBB != FinalBB; - CurrBB = NextBB) { - NextBB = std::next(CurrBB); - CurrBB->removeFromParent(); - CurrBB->insertInto(&New); - Instruction *I = CurrBB->getTerminator(); + for (BasicBlock &CurrBB : llvm::make_early_inc_range(Old)) { + CurrBB.removeFromParent(); + CurrBB.insertInto(&New); + Instruction *I = CurrBB.getTerminator(); // For each block we find a return instruction is, it is a potential exit // path for the function. We keep track of each block based on the return // value here. if (ReturnInst *RI = dyn_cast(I)) - NewEnds.insert(std::make_pair(RI->getReturnValue(), &(*CurrBB))); + NewEnds.insert(std::make_pair(RI->getReturnValue(), &CurrBB)); std::vector DebugInsts; - for (Instruction &Val : *CurrBB) { + for (Instruction &Val : CurrBB) { // We must handle the scoping of called functions differently than // other outlined instructions. if (!isa(&Val)) { @@ -2213,6 +2206,7 @@ bool IROutliner::run(Module &M) { } // Pass Manager Boilerplate +namespace { class IROutlinerLegacyPass : public ModulePass { public: static char ID; @@ -2228,6 +2222,7 @@ class IROutlinerLegacyPass : public ModulePass { bool runOnModule(Module &M) override; }; +} // namespace bool IROutlinerLegacyPass::runOnModule(Module &M) { if (skipModule(M)) diff --git a/llvm/lib/Transforms/IPO/LowerTypeTests.cpp b/llvm/lib/Transforms/IPO/LowerTypeTests.cpp index 26aa858cb8279..f78971f0e586f 100644 --- a/llvm/lib/Transforms/IPO/LowerTypeTests.cpp +++ b/llvm/lib/Transforms/IPO/LowerTypeTests.cpp @@ -1773,11 +1773,7 @@ static bool isDirectCall(Use& U) { void LowerTypeTestsModule::replaceCfiUses(Function *Old, Value *New, bool IsJumpTableCanonical) { SmallSetVector Constants; - auto UI = Old->use_begin(), E = Old->use_end(); - for (; UI != E;) { - Use &U = *UI; - ++UI; - + for (Use &U : llvm::make_early_inc_range(Old->uses())) { // Skip block addresses if (isa(U.getUser())) continue; @@ -1814,12 +1810,11 @@ bool LowerTypeTestsModule::lower() { M.getFunction(Intrinsic::getName(Intrinsic::type_test)); if (DropTypeTests && TypeTestFunc) { - for (auto UI = TypeTestFunc->use_begin(), UE = TypeTestFunc->use_end(); - UI != UE;) { - auto *CI = cast((*UI++).getUser()); + for (Use &U : llvm::make_early_inc_range(TypeTestFunc->uses())) { + auto *CI = cast(U.getUser()); // Find and erase llvm.assume intrinsics for this llvm.type.test call. - for (auto CIU = CI->use_begin(), CIUE = CI->use_end(); CIU != CIUE;) - if (auto *Assume = dyn_cast((*CIU++).getUser())) + for (Use &CIU : llvm::make_early_inc_range(CI->uses())) + if (auto *Assume = dyn_cast(CIU.getUser())) Assume->eraseFromParent(); // If the assume was merged with another assume, we might have a use on a // phi (which will feed the assume). Simply replace the use on the phi @@ -1857,13 +1852,9 @@ bool LowerTypeTestsModule::lower() { return false; if (ImportSummary) { - if (TypeTestFunc) { - for (auto UI = TypeTestFunc->use_begin(), UE = TypeTestFunc->use_end(); - UI != UE;) { - auto *CI = cast((*UI++).getUser()); - importTypeTest(CI); - } - } + if (TypeTestFunc) + for (Use &U : llvm::make_early_inc_range(TypeTestFunc->uses())) + importTypeTest(cast(U.getUser())); if (ICallBranchFunnelFunc && !ICallBranchFunnelFunc->use_empty()) report_fatal_error( diff --git a/llvm/lib/Transforms/IPO/MergeFunctions.cpp b/llvm/lib/Transforms/IPO/MergeFunctions.cpp index 9e6dd879ac01d..97ef872c54999 100644 --- a/llvm/lib/Transforms/IPO/MergeFunctions.cpp +++ b/llvm/lib/Transforms/IPO/MergeFunctions.cpp @@ -463,17 +463,15 @@ bool MergeFunctions::runOnModule(Module &M) { // Replace direct callers of Old with New. void MergeFunctions::replaceDirectCallers(Function *Old, Function *New) { Constant *BitcastNew = ConstantExpr::getBitCast(New, Old->getType()); - for (auto UI = Old->use_begin(), UE = Old->use_end(); UI != UE;) { - Use *U = &*UI; - ++UI; - CallBase *CB = dyn_cast(U->getUser()); - if (CB && CB->isCallee(U)) { + for (Use &U : llvm::make_early_inc_range(Old->uses())) { + CallBase *CB = dyn_cast(U.getUser()); + if (CB && CB->isCallee(&U)) { // Do not copy attributes from the called function to the call-site. // Function comparison ensures that the attributes are the same up to // type congruences in byval(), in which case we need to keep the byval // type of the call-site, not the callee function. remove(CB->getFunction()); - U->set(BitcastNew); + U.set(BitcastNew); } } } diff --git a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp index 5957661eed3ff..33ca121c71fef 100644 --- a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp +++ b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp @@ -22,6 +22,7 @@ #include "llvm/ADT/EnumeratedArray.h" #include "llvm/ADT/PostOrderIterator.h" #include "llvm/ADT/Statistic.h" +#include "llvm/ADT/StringRef.h" #include "llvm/Analysis/CallGraph.h" #include "llvm/Analysis/CallGraphSCCPass.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" @@ -43,6 +44,8 @@ #include "llvm/Transforms/Utils/CallGraphUpdater.h" #include "llvm/Transforms/Utils/CodeExtractor.h" +#include + using namespace llvm; using namespace omp; @@ -594,6 +597,10 @@ struct KernelInfoState : AbstractState { /// See AbstractState::indicateOptimisticFixpoint(...) ChangeStatus indicateOptimisticFixpoint() override { IsAtFixpoint = true; + ReachingKernelEntries.indicateOptimisticFixpoint(); + SPMDCompatibilityTracker.indicateOptimisticFixpoint(); + ReachedKnownParallelRegions.indicateOptimisticFixpoint(); + ReachedUnknownParallelRegions.indicateOptimisticFixpoint(); return ChangeStatus::UNCHANGED; } @@ -633,13 +640,15 @@ struct KernelInfoState : AbstractState { KernelInfoState operator^=(const KernelInfoState &KIS) { // Do not merge two different _init and _deinit call sites. if (KIS.KernelInitCB) { - if(KernelInitCB && KernelInitCB != KIS.KernelInitCB) - llvm_unreachable("Kernel that calls another kernel violates OpenMP-Opt assumptions."); + if (KernelInitCB && KernelInitCB != KIS.KernelInitCB) + llvm_unreachable("Kernel that calls another kernel violates OpenMP-Opt " + "assumptions."); KernelInitCB = KIS.KernelInitCB; } if (KIS.KernelDeinitCB) { - if(KernelDeinitCB && KernelDeinitCB != KIS.KernelDeinitCB) - llvm_unreachable("Kernel that calls another kernel violates OpenMP-Opt assumptions."); + if (KernelDeinitCB && KernelDeinitCB != KIS.KernelDeinitCB) + llvm_unreachable("Kernel that calls another kernel violates OpenMP-Opt " + "assumptions."); KernelDeinitCB = KIS.KernelDeinitCB; } SPMDCompatibilityTracker ^= KIS.SPMDCompatibilityTracker; @@ -1872,6 +1881,8 @@ struct OpenMPOpt { OMPRTL___kmpc_kernel_end_parallel); ExternalizationRAII BarrierSPMD(OMPInfoCache, OMPRTL___kmpc_barrier_simple_spmd); + ExternalizationRAII BarrierGeneric(OMPInfoCache, + OMPRTL___kmpc_barrier_simple_generic); ExternalizationRAII ThreadId(OMPInfoCache, OMPRTL___kmpc_get_hardware_thread_id_in_block); @@ -2941,7 +2952,7 @@ struct AAKernelInfoFunction : AAKernelInfo { // state. As long as we are not in an invalid state, we will create a // custom state machine so the value should be a `i1 false`. If we are // in an invalid state, we won't change the value that is in the IR. - if (!isValidState()) + if (!ReachedKnownParallelRegions.isValidState()) return nullptr; // If we have disabled state machine rewrites, don't make a custom one. if (DisableOpenMPOptStateMachineRewrite) @@ -3031,6 +3042,18 @@ struct AAKernelInfoFunction : AAKernelInfo { SPMDCompatibilityTracker.indicatePessimisticFixpoint(); } + /// Sanitize the string \p S such that it is a suitable global symbol name. + static std::string sanitizeForGlobalName(std::string S) { + std::replace_if( + S.begin(), S.end(), + [](const char C) { + return !((C >= 'a' && C <= 'z') || (C >= 'A' && C <= 'Z') || + (C >= '0' && C <= '9') || C == '_'); + }, + '.'); + return S; + } + /// Modify the IR based on the KernelInfoState as the fixpoint iteration is /// finished now. ChangeStatus manifest(Attributor &A) override { @@ -3039,19 +3062,16 @@ struct AAKernelInfoFunction : AAKernelInfo { if (!KernelInitCB || !KernelDeinitCB) return ChangeStatus::UNCHANGED; - // Known SPMD-mode kernels need no manifest changes. - if (SPMDCompatibilityTracker.isKnown()) - return ChangeStatus::UNCHANGED; - // If we can we change the execution mode to SPMD-mode otherwise we build a // custom state machine. - if (!mayContainParallelRegion() || !changeToSPMDMode(A)) + ChangeStatus Changed = ChangeStatus::UNCHANGED; + if (!changeToSPMDMode(A, Changed)) return buildCustomStateMachine(A); - return ChangeStatus::CHANGED; + return Changed; } - bool changeToSPMDMode(Attributor &A) { + bool changeToSPMDMode(Attributor &A, ChangeStatus &Changed) { auto &OMPInfoCache = static_cast(A.getInfoCache()); if (!SPMDCompatibilityTracker.isAssumed()) { @@ -3083,6 +3103,24 @@ struct AAKernelInfoFunction : AAKernelInfo { return false; } + // Check if the kernel is already in SPMD mode, if so, return success. + Function *Kernel = getAnchorScope(); + GlobalVariable *ExecMode = Kernel->getParent()->getGlobalVariable( + (Kernel->getName() + "_exec_mode").str()); + assert(ExecMode && "Kernel without exec mode?"); + assert(ExecMode->getInitializer() && "ExecMode doesn't have initializer!"); + + // Set the global exec mode flag to indicate SPMD-Generic mode. + assert(isa(ExecMode->getInitializer()) && + "ExecMode is not an integer!"); + const int8_t ExecModeVal = + cast(ExecMode->getInitializer())->getSExtValue(); + if (ExecModeVal != OMP_TGT_EXEC_MODE_GENERIC) + return true; + + // We will now unconditionally modify the IR, indicate a change. + Changed = ChangeStatus::CHANGED; + auto CreateGuardedRegion = [&](Instruction *RegionStartI, Instruction *RegionEndI) { LoopInfo *LI = nullptr; @@ -3161,8 +3199,9 @@ struct AAKernelInfoFunction : AAKernelInfo { auto *SharedMem = new GlobalVariable( M, I.getType(), /* IsConstant */ false, GlobalValue::InternalLinkage, UndefValue::get(I.getType()), - I.getName() + ".guarded.output.alloc", nullptr, - GlobalValue::NotThreadLocal, + sanitizeForGlobalName( + (I.getName() + ".guarded.output.alloc").str()), + nullptr, GlobalValue::NotThreadLocal, static_cast(AddressSpace::Shared)); // Emit a store instruction to update the value. @@ -3173,11 +3212,8 @@ struct AAKernelInfoFunction : AAKernelInfo { RegionBarrierBB->getTerminator()); // Emit a load instruction and replace uses of the output value. - for (Instruction *UsrI : OutsideUsers) { - assert(UsrI->getParent() == RegionExitBB && - "Expected escaping users in exit region"); + for (Instruction *UsrI : OutsideUsers) UsrI->replaceUsesOfWith(&I, LoadI); - } } auto &OMPInfoCache = static_cast(A.getInfoCache()); @@ -3295,17 +3331,6 @@ struct AAKernelInfoFunction : AAKernelInfo { // Adjust the global exec mode flag that tells the runtime what mode this // kernel is executed in. - Function *Kernel = getAnchorScope(); - GlobalVariable *ExecMode = Kernel->getParent()->getGlobalVariable( - (Kernel->getName() + "_exec_mode").str()); - assert(ExecMode && "Kernel without exec mode?"); - assert(ExecMode->getInitializer() && "ExecMode doesn't have initializer!"); - - // Set the global exec mode flag to indicate SPMD-Generic mode. - assert(isa(ExecMode->getInitializer()) && - "ExecMode is not an integer!"); - const int8_t ExecModeVal = - cast(ExecMode->getInitializer())->getSExtValue(); assert(ExecModeVal == OMP_TGT_EXEC_MODE_GENERIC && "Initially non-SPMD kernel has SPMD exec mode!"); ExecMode->setInitializer( @@ -3433,7 +3458,7 @@ struct AAKernelInfoFunction : AAKernelInfo { // InitCB = __kmpc_target_init(...) // bool IsWorker = InitCB >= 0; // if (IsWorker) { - // SMBeginBB: __kmpc_barrier_simple_spmd(...); + // SMBeginBB: __kmpc_barrier_simple_generic(...); // void *WorkFn; // bool Active = __kmpc_kernel_parallel(&WorkFn); // if (!WorkFn) return; @@ -3447,7 +3472,7 @@ struct AAKernelInfoFunction : AAKernelInfo { // ((WorkFnTy*)WorkFn)(...); // SMEndParallelBB: __kmpc_kernel_end_parallel(...); // } - // SMDoneBB: __kmpc_barrier_simple_spmd(...); + // SMDoneBB: __kmpc_barrier_simple_generic(...); // goto SMBeginBB; // } // UserCodeEntryBB: // user code @@ -3515,7 +3540,7 @@ struct AAKernelInfoFunction : AAKernelInfo { FunctionCallee BarrierFn = OMPInfoCache.OMPBuilder.getOrCreateRuntimeFunction( - M, OMPRTL___kmpc_barrier_simple_spmd); + M, OMPRTL___kmpc_barrier_simple_generic); CallInst::Create(BarrierFn, {Ident, GTid}, "", StateMachineBeginBB) ->setDebugLoc(DLoc); @@ -3651,7 +3676,7 @@ struct AAKernelInfoFunction : AAKernelInfo { // Check for AAHeapToStack moved objects which must not be guarded. auto &HS = A.getAAFor( *this, IRPosition::function(*I.getFunction()), - DepClassTy::REQUIRED); + DepClassTy::OPTIONAL); if (llvm::all_of(Objects, [&HS](const Value *Obj) { auto *CB = dyn_cast(Obj); if (!CB) @@ -3682,6 +3707,7 @@ struct AAKernelInfoFunction : AAKernelInfo { } // Callback to check a call instruction. + bool AllParallelRegionStatesWereFixed = true; bool AllSPMDStatesWereFixed = true; auto CheckCallInst = [&](Instruction &I) { auto &CB = cast(I); @@ -3689,16 +3715,38 @@ struct AAKernelInfoFunction : AAKernelInfo { *this, IRPosition::callsite_function(CB), DepClassTy::OPTIONAL); getState() ^= CBAA.getState(); AllSPMDStatesWereFixed &= CBAA.SPMDCompatibilityTracker.isAtFixpoint(); + AllParallelRegionStatesWereFixed &= + CBAA.ReachedKnownParallelRegions.isAtFixpoint(); + AllParallelRegionStatesWereFixed &= + CBAA.ReachedUnknownParallelRegions.isAtFixpoint(); return true; }; bool UsedAssumedInformationInCheckCallInst = false; if (!A.checkForAllCallLikeInstructions( CheckCallInst, *this, UsedAssumedInformationInCheckCallInst)) { - LLVM_DEBUG(dbgs() << TAG << "Failed to visit all call-like instructions!\n";); + LLVM_DEBUG(dbgs() << TAG + << "Failed to visit all call-like instructions!\n";); return indicatePessimisticFixpoint(); } + // If we haven't used any assumed information for the reached parallel + // region states we can fix it. + if (!UsedAssumedInformationInCheckCallInst && + AllParallelRegionStatesWereFixed) { + ReachedKnownParallelRegions.indicateOptimisticFixpoint(); + ReachedUnknownParallelRegions.indicateOptimisticFixpoint(); + } + + // If we are sure there are no parallel regions in the kernel we do not + // want SPMD mode. + if (IsKernelEntry && ReachedUnknownParallelRegions.isAtFixpoint() && + ReachedKnownParallelRegions.isAtFixpoint() && + ReachedUnknownParallelRegions.isValidState() && + ReachedKnownParallelRegions.isValidState() && + !mayContainParallelRegion()) + SPMDCompatibilityTracker.indicatePessimisticFixpoint(); + // If we haven't used any assumed information for the SPMD state we can fix // it. if (!UsedAssumedInformationInCheckRWInst && diff --git a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp index 32fcad4ea83b7..1819c3c720092 100644 --- a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp +++ b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp @@ -1031,7 +1031,7 @@ void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) { createPGOIndirectCallPromotionLegacyPass(true, !PGOSampleUse.empty())); // Propage constant function arguments by specializing the functions. - if (EnableFunctionSpecialization) + if (EnableFunctionSpecialization && OptLevel > 2) PM.add(createFunctionSpecializationPass()); // Propagate constants at call sites into the functions they call. This diff --git a/llvm/lib/Transforms/IPO/SCCP.cpp b/llvm/lib/Transforms/IPO/SCCP.cpp index 081398a390fad..5779553ee7324 100644 --- a/llvm/lib/Transforms/IPO/SCCP.cpp +++ b/llvm/lib/Transforms/IPO/SCCP.cpp @@ -135,6 +135,7 @@ PreservedAnalyses FunctionSpecializationPass::run(Module &M, return PA; } +namespace { struct FunctionSpecializationLegacyPass : public ModulePass { static char ID; // Pass identification, replacement for typeid FunctionSpecializationLegacyPass() : ModulePass(ID) {} @@ -175,6 +176,7 @@ struct FunctionSpecializationLegacyPass : public ModulePass { return runFunctionSpecialization(M, DL, GetTLI, GetTTI, GetAC, GetAnalysis); } }; +} // namespace char FunctionSpecializationLegacyPass::ID = 0; diff --git a/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp b/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp index a7e68a6767a6b..61054e7ae46fe 100644 --- a/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp +++ b/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp @@ -1786,10 +1786,8 @@ void DevirtModule::scanTypeTestUsers( // points to a member of the type identifier %md. Group calls by (type ID, // offset) pair (effectively the identity of the virtual function) and store // to CallSlots. - for (auto I = TypeTestFunc->use_begin(), E = TypeTestFunc->use_end(); - I != E;) { - auto CI = dyn_cast(I->getUser()); - ++I; + for (Use &U : llvm::make_early_inc_range(TypeTestFunc->uses())) { + auto *CI = dyn_cast(U.getUser()); if (!CI) continue; @@ -1858,11 +1856,8 @@ void DevirtModule::scanTypeTestUsers( void DevirtModule::scanTypeCheckedLoadUsers(Function *TypeCheckedLoadFunc) { Function *TypeTestFunc = Intrinsic::getDeclaration(&M, Intrinsic::type_test); - for (auto I = TypeCheckedLoadFunc->use_begin(), - E = TypeCheckedLoadFunc->use_end(); - I != E;) { - auto CI = dyn_cast(I->getUser()); - ++I; + for (Use &U : llvm::make_early_inc_range(TypeCheckedLoadFunc->uses())) { + auto *CI = dyn_cast(U.getUser()); if (!CI) continue; diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp index 6c93969df0ab3..2c43158890f99 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp @@ -2168,6 +2168,13 @@ Instruction *InstCombinerImpl::visitSub(BinaryOperator &I) { return replaceInstUsesWith( I, Builder.CreateIntrinsic(Intrinsic::umin, {I.getType()}, {Op0, Y})); + // umax(X, Op1) - Op1 --> usub.sat(X, Op1) + // TODO: The one-use restriction is not strictly necessary, but it may + // require improving other pattern matching and/or codegen. + if (match(Op0, m_OneUse(m_UMax(m_Value(X), m_Specific(Op1))))) + return replaceInstUsesWith( + I, Builder.CreateIntrinsic(Intrinsic::usub_sat, {Ty}, {X, Op1})); + // C - ctpop(X) => ctpop(~X) if C is bitwidth if (match(Op0, m_SpecificInt(Ty->getScalarSizeInBits())) && match(Op1, m_OneUse(m_Intrinsic(m_Value(X))))) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp index 56b6f7d156744..7e40e358b6a22 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp @@ -2292,18 +2292,18 @@ static bool areInverseVectorBitmasks(Constant *C1, Constant *C2) { /// vector composed of all-zeros or all-ones values and is the bitwise 'not' of /// B, it can be used as the condition operand of a select instruction. Value *InstCombinerImpl::getSelectCondition(Value *A, Value *B) { - // Step 1: We may have peeked through bitcasts in the caller. + // We may have peeked through bitcasts in the caller. // Exit immediately if we don't have (vector) integer types. Type *Ty = A->getType(); if (!Ty->isIntOrIntVectorTy() || !B->getType()->isIntOrIntVectorTy()) return nullptr; - // Step 2: We need 0 or all-1's bitmasks. + // We need 0 or all-1's bitmasks. if (ComputeNumSignBits(A) != Ty->getScalarSizeInBits()) return nullptr; - // Step 3: If B is the 'not' value of A, we have our answer. - if (match(A, m_Not(m_Specific(B)))) { + // If B is the 'not' value of A, we have our answer. + if (match(B, m_Not(m_Specific(A)))) { // If these are scalars or vectors of i1, A can be used directly. if (Ty->isIntOrIntVectorTy(1)) return A; @@ -2704,60 +2704,50 @@ Instruction *InstCombinerImpl::visitOr(BinaryOperator &I) { if (match(Op0, m_And(m_Value(A), m_Value(C))) && match(Op1, m_And(m_Value(B), m_Value(D)))) { - // (A & MaskC0) | (B & MaskC1) - const APInt *MaskC0, *MaskC1; - if (match(C, m_APInt(MaskC0)) && match(D, m_APInt(MaskC1)) && - *MaskC0 == ~*MaskC1) { + // (A & C0) | (B & C1) + const APInt *C0, *C1; + if (match(C, m_APInt(C0)) && match(D, m_APInt(C1))) { Value *X; + if (*C0 == ~*C1) { + // ((X | B) & MaskC) | (B & ~MaskC) -> (X & MaskC) | B + if (match(A, m_c_Or(m_Value(X), m_Specific(B)))) + return BinaryOperator::CreateOr(Builder.CreateAnd(X, *C0), B); + // (A & MaskC) | ((X | A) & ~MaskC) -> (X & ~MaskC) | A + if (match(B, m_c_Or(m_Specific(A), m_Value(X)))) + return BinaryOperator::CreateOr(Builder.CreateAnd(X, *C1), A); + + // ((X ^ B) & MaskC) | (B & ~MaskC) -> (X & MaskC) ^ B + if (match(A, m_c_Xor(m_Value(X), m_Specific(B)))) + return BinaryOperator::CreateXor(Builder.CreateAnd(X, *C0), B); + // (A & MaskC) | ((X ^ A) & ~MaskC) -> (X & ~MaskC) ^ A + if (match(B, m_c_Xor(m_Specific(A), m_Value(X)))) + return BinaryOperator::CreateXor(Builder.CreateAnd(X, *C1), A); + } - // ((X | B) & MaskC) | (B & ~MaskC) -> (X & MaskC) | B - if (match(A, m_c_Or(m_Value(X), m_Specific(B)))) - return BinaryOperator::CreateOr(Builder.CreateAnd(X, *MaskC0), B); - // (A & MaskC) | ((X | A) & ~MaskC) -> (X & ~MaskC) | A - if (match(B, m_c_Or(m_Specific(A), m_Value(X)))) - return BinaryOperator::CreateOr(Builder.CreateAnd(X, *MaskC1), A); - - // ((X ^ B) & MaskC) | (B & ~MaskC) -> (X & MaskC) ^ B - if (match(A, m_c_Xor(m_Value(X), m_Specific(B)))) - return BinaryOperator::CreateXor(Builder.CreateAnd(X, *MaskC0), B); - // (A & MaskC) | ((X ^ A) & ~MaskC) -> (X & ~MaskC) ^ A - if (match(B, m_c_Xor(m_Specific(A), m_Value(X)))) - return BinaryOperator::CreateXor(Builder.CreateAnd(X, *MaskC1), A); - } - - // (A & C1)|(B & C2) - ConstantInt *C1, *C2; - if (match(C, m_ConstantInt(C1)) && match(D, m_ConstantInt(C2))) { - Value *V1 = nullptr, *V2 = nullptr; - if ((C1->getValue() & C2->getValue()).isZero()) { - // ((V | N) & C1) | (V & C2) --> (V|N) & (C1|C2) - // iff (C1&C2) == 0 and (N&~C1) == 0 - if (match(A, m_Or(m_Value(V1), m_Value(V2))) && - ((V1 == B && - MaskedValueIsZero(V2, ~C1->getValue(), 0, &I)) || // (V|N) - (V2 == B && - MaskedValueIsZero(V1, ~C1->getValue(), 0, &I)))) // (N|V) - return BinaryOperator::CreateAnd(A, - Builder.getInt(C1->getValue()|C2->getValue())); - // Or commutes, try both ways. - if (match(B, m_Or(m_Value(V1), m_Value(V2))) && - ((V1 == A && - MaskedValueIsZero(V2, ~C2->getValue(), 0, &I)) || // (V|N) - (V2 == A && - MaskedValueIsZero(V1, ~C2->getValue(), 0, &I)))) // (N|V) - return BinaryOperator::CreateAnd(B, - Builder.getInt(C1->getValue()|C2->getValue())); - - // ((V|C3)&C1) | ((V|C4)&C2) --> (V|C3|C4)&(C1|C2) - // iff (C1&C2) == 0 and (C3&~C1) == 0 and (C4&~C2) == 0. - ConstantInt *C3 = nullptr, *C4 = nullptr; - if (match(A, m_Or(m_Value(V1), m_ConstantInt(C3))) && - (C3->getValue() & ~C1->getValue()).isZero() && - match(B, m_Or(m_Specific(V1), m_ConstantInt(C4))) && - (C4->getValue() & ~C2->getValue()).isZero()) { - V2 = Builder.CreateOr(V1, ConstantExpr::getOr(C3, C4), "bitfield"); - return BinaryOperator::CreateAnd(V2, - Builder.getInt(C1->getValue()|C2->getValue())); + if ((*C0 & *C1).isZero()) { + // ((X | B) & C0) | (B & C1) --> (X | B) & (C0 | C1) + // iff (C0 & C1) == 0 and (X & ~C0) == 0 + if (match(A, m_c_Or(m_Value(X), m_Specific(B))) && + MaskedValueIsZero(X, ~*C0, 0, &I)) { + Constant *C01 = ConstantInt::get(I.getType(), *C0 | *C1); + return BinaryOperator::CreateAnd(A, C01); + } + // (A & C0) | ((X | A) & C1) --> (X | A) & (C0 | C1) + // iff (C0 & C1) == 0 and (X & ~C1) == 0 + if (match(B, m_c_Or(m_Value(X), m_Specific(A))) && + MaskedValueIsZero(X, ~*C1, 0, &I)) { + Constant *C01 = ConstantInt::get(I.getType(), *C0 | *C1); + return BinaryOperator::CreateAnd(B, C01); + } + // ((X | C2) & C0) | ((X | C3) & C1) --> (X | C2 | C3) & (C0 | C1) + // iff (C0 & C1) == 0 and (C2 & ~C0) == 0 and (C3 & ~C1) == 0. + const APInt *C2, *C3; + if (match(A, m_Or(m_Value(X), m_APInt(C2))) && + match(B, m_Or(m_Specific(X), m_APInt(C3))) && + (*C2 & ~*C0).isZero() && (*C3 & ~*C1).isZero()) { + Value *Or = Builder.CreateOr(X, *C2 | *C3, "bitfield"); + Constant *C01 = ConstantInt::get(I.getType(), *C0 | *C1); + return BinaryOperator::CreateAnd(Or, C01); } } } @@ -3611,7 +3601,7 @@ Instruction *InstCombinerImpl::visitXor(BinaryOperator &I) { if (match(Op0, m_OneUse(m_TruncOrSelf( m_AShr(m_Value(X), m_APIntAllowUndef(CA))))) && *CA == X->getType()->getScalarSizeInBits() - 1 && - !C1->isAllOnesValue()) { + !match(C1, m_AllOnes())) { assert(!C1->isZeroValue() && "Unexpected xor with 0"); Value *ICmp = Builder.CreateICmpSGT(X, Constant::getAllOnesValue(X->getType())); diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp index 6f73c4e3f4235..b697ddd37aa03 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -1270,9 +1270,8 @@ static Instruction *processUGT_ADDCST_ADD(ICmpInst &I, Value *A, Value *B, // This is only really a signed overflow check if the inputs have been // sign-extended; check for that condition. For example, if CI2 is 2^31 and // the operands of the add are 64 bits wide, we need at least 33 sign bits. - unsigned NeededSignBits = CI1->getBitWidth() - NewWidth + 1; - if (IC.ComputeNumSignBits(A, 0, &I) < NeededSignBits || - IC.ComputeNumSignBits(B, 0, &I) < NeededSignBits) + if (IC.ComputeMinSignedBits(A, 0, &I) > NewWidth || + IC.ComputeMinSignedBits(B, 0, &I) > NewWidth) return nullptr; // In order to replace the original add with a narrower @@ -2749,6 +2748,14 @@ Instruction *InstCombinerImpl::foldICmpAddConstant(ICmpInst &Cmp, return new ICmpInst(ICmpInst::ICMP_NE, Builder.CreateAnd(X, ~C), ConstantExpr::getNeg(cast(Y))); + // The range test idiom can use either ult or ugt. Arbitrarily canonicalize + // to the ult form. + // X+C2 >u C -> X+(C2-C-1) (X & MaskC) == 0 - unsigned SrcBits = X->getType()->getScalarSizeInBits(); - unsigned DstBits = Op0->getType()->getScalarSizeInBits(); - APInt MaskC = APInt::getOneBitSet(SrcBits, DstBits) - C->zext(SrcBits); + if (!match(Op0, m_OneUse(m_Trunc(m_Value(X)))) || !match(Op1, m_APInt(C))) + return nullptr; + + unsigned SrcBits = X->getType()->getScalarSizeInBits(); + if (Pred == ICmpInst::ICMP_ULT) { + if (C->isPowerOf2()) { + // If C is a power-of-2: + // (trunc X) u< C --> (X & -C) == 0 (are all masked-high-bits clear?) + Constant *MaskC = ConstantInt::get(X->getType(), (-*C).zext(SrcBits)); Value *And = Builder.CreateAnd(X, MaskC); Constant *Zero = ConstantInt::getNullValue(X->getType()); return new ICmpInst(ICmpInst::ICMP_EQ, And, Zero); } - // TODO: Handle ugt. + // TODO: Handle C is negative-power-of-2. } + // TODO: Handle ugt. return nullptr; } diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp index c19da8a01c262..4a1e82ae9c1d0 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -1298,15 +1298,23 @@ static Value *canonicalizeClampLike(SelectInst &Sel0, ICmpInst &Cmp0, // Said condition must be one-use. if (!Cmp0.hasOneUse()) return nullptr; + ICmpInst::Predicate Pred0 = Cmp0.getPredicate(); Value *Cmp00 = Cmp0.getOperand(0); Constant *C0; if (!match(Cmp0.getOperand(1), m_CombineAnd(m_AnyIntegralConstant(), m_Constant(C0)))) return nullptr; - // Canonicalize Cmp0 into the form we expect. + + if (!isa(Sel1)) { + Pred0 = ICmpInst::getInversePredicate(Pred0); + std::swap(X, Sel1); + } + + // Canonicalize Cmp0 into ult or uge. // FIXME: we shouldn't care about lanes that are 'undef' in the end? - switch (Cmp0.getPredicate()) { + switch (Pred0) { case ICmpInst::Predicate::ICMP_ULT: + case ICmpInst::Predicate::ICMP_UGE: // Although icmp ult %x, 0 is an unusual thing to try and should generally // have been simplified, it does not verify with undef inputs so ensure we // are not in a strange state. @@ -1316,25 +1324,16 @@ static Value *canonicalizeClampLike(SelectInst &Sel0, ICmpInst &Cmp0, return nullptr; break; // Great! case ICmpInst::Predicate::ICMP_ULE: - // We'd have to increment C0 by one, and for that it must not have all-ones - // element, but then it would have been canonicalized to 'ult' before - // we get here. So we can't do anything useful with 'ule'. - return nullptr; case ICmpInst::Predicate::ICMP_UGT: - // We want to canonicalize it to 'ult', so we'll need to increment C0, - // which again means it must not have any all-ones elements. + // We want to canonicalize it to 'ult' or 'uge', so we'll need to increment + // C0, which again means it must not have any all-ones elements. if (!match(C0, m_SpecificInt_ICMP( ICmpInst::Predicate::ICMP_NE, APInt::getAllOnes(C0->getType()->getScalarSizeInBits())))) return nullptr; // Can't do, have all-ones element[s]. C0 = InstCombiner::AddOne(C0); - std::swap(X, Sel1); break; - case ICmpInst::Predicate::ICMP_UGE: - // The only way we'd get this predicate if this `icmp` has extra uses, - // but then we won't be able to do this fold. - return nullptr; default: return nullptr; // Unknown predicate. } @@ -1407,6 +1406,8 @@ static Value *canonicalizeClampLike(SelectInst &Sel0, ICmpInst &Cmp0, // The thresholds of this clamp-like pattern. auto *ThresholdLowIncl = ConstantExpr::getNeg(C1); auto *ThresholdHighExcl = ConstantExpr::getSub(C0, C1); + if (Pred0 == ICmpInst::Predicate::ICMP_UGE) + std::swap(ThresholdLowIncl, ThresholdHighExcl); // The fold has a precondition 1: C2 s>= ThresholdLow auto *Precond1 = ConstantExpr::getICmp(ICmpInst::Predicate::ICMP_SGE, C2, @@ -2304,16 +2305,6 @@ Instruction *InstCombinerImpl::matchSAddSubSat(Instruction &MinMax1) { // Create the new type (which can be a vector type) Type *NewTy = Ty->getWithNewBitWidth(NewBitWidth); - // Match the two extends from the add/sub - Value *A, *B; - if(!match(AddSub, m_BinOp(m_SExt(m_Value(A)), m_SExt(m_Value(B))))) - return nullptr; - // And check the incoming values are of a type smaller than or equal to the - // size of the saturation. Otherwise the higher bits can cause different - // results. - if (A->getType()->getScalarSizeInBits() > NewBitWidth || - B->getType()->getScalarSizeInBits() > NewBitWidth) - return nullptr; Intrinsic::ID IntrinsicID; if (AddSub->getOpcode() == Instruction::Add) @@ -2323,10 +2314,16 @@ Instruction *InstCombinerImpl::matchSAddSubSat(Instruction &MinMax1) { else return nullptr; + // The two operands of the add/sub must be nsw-truncatable to the NewTy. This + // is usually achieved via a sext from a smaller type. + if (ComputeMinSignedBits(AddSub->getOperand(0), 0, AddSub) > NewBitWidth || + ComputeMinSignedBits(AddSub->getOperand(1), 0, AddSub) > NewBitWidth) + return nullptr; + // Finally create and return the sat intrinsic, truncated to the new type Function *F = Intrinsic::getDeclaration(MinMax1.getModule(), IntrinsicID, NewTy); - Value *AT = Builder.CreateSExt(A, NewTy); - Value *BT = Builder.CreateSExt(B, NewTy); + Value *AT = Builder.CreateTrunc(AddSub->getOperand(0), NewTy); + Value *BT = Builder.CreateTrunc(AddSub->getOperand(1), NewTy); Value *Sat = Builder.CreateCall(F, {AT, BT}); return CastInst::Create(Instruction::SExt, Sat, Ty); } diff --git a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp index d6d39587e0985..32e5378971408 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp @@ -2268,12 +2268,8 @@ static Instruction *foldShuffleWithInsert(ShuffleVectorInst &Shuf, SmallVector Mask; Shuf.getShuffleMask(Mask); - // The shuffle must not change vector sizes. - // TODO: This restriction could be removed if the insert has only one use - // (because the transform would require a new length-changing shuffle). int NumElts = Mask.size(); - if (NumElts != (int)(cast(V0->getType())->getNumElements())) - return nullptr; + int InpNumElts = cast(V0->getType())->getNumElements(); // This is a specialization of a fold in SimplifyDemandedVectorElts. We may // not be able to handle it there if the insertelement has >1 use. @@ -2290,11 +2286,16 @@ static Instruction *foldShuffleWithInsert(ShuffleVectorInst &Shuf, if (match(V1, m_InsertElt(m_Value(X), m_Value(), m_ConstantInt(IdxC)))) { // Offset the index constant by the vector width because we are checking for // accesses to the 2nd vector input of the shuffle. - IdxC += NumElts; + IdxC += InpNumElts; // shuf ?, (inselt X, ?, IdxC), Mask --> shuf ?, X, Mask if (!is_contained(Mask, (int)IdxC)) return IC.replaceOperand(Shuf, 1, X); } + // For the rest of the transform, the shuffle must not change vector sizes. + // TODO: This restriction could be removed if the insert has only one use + // (because the transform would require a new length-changing shuffle). + if (NumElts != InpNumElts) + return nullptr; // shuffle (insert ?, Scalar, IndexC), V1, Mask --> insert V1, Scalar, IndexC' auto isShufflingScalarIntoOp1 = [&](Value *&Scalar, ConstantInt *&IndexC) { diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp index 1b401566dad75..4892b06735eab 100644 --- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -3138,26 +3138,21 @@ Instruction *InstCombinerImpl::visitExtractValueInst(ExtractValueInst &EV) { // checking for overflow. const APInt *C; if (match(WO->getRHS(), m_APInt(C))) { - // Compute the no-wrap range [X,Y) for LHS given RHS=C, then - // check for the inverted range using range offset trick (i.e. - // use a subtract to shift the range to bottom of either the - // signed or unsigned domain and then use a single compare to - // check range membership). + // Compute the no-wrap range for LHS given RHS=C, then construct an + // equivalent icmp, potentially using an offset. ConstantRange NWR = ConstantRange::makeExactNoWrapRegion(WO->getBinaryOp(), *C, WO->getNoWrapKind()); - APInt Min = WO->isSigned() ? NWR.getSignedMin() : NWR.getUnsignedMin(); - NWR = NWR.subtract(Min); CmpInst::Predicate Pred; - APInt NewRHSC; - if (NWR.getEquivalentICmp(Pred, NewRHSC)) { - auto *OpTy = WO->getRHS()->getType(); - auto *NewLHS = Builder.CreateSub(WO->getLHS(), - ConstantInt::get(OpTy, Min)); - return new ICmpInst(ICmpInst::getInversePredicate(Pred), NewLHS, - ConstantInt::get(OpTy, NewRHSC)); - } + APInt NewRHSC, Offset; + NWR.getEquivalentICmp(Pred, NewRHSC, Offset); + auto *OpTy = WO->getRHS()->getType(); + auto *NewLHS = WO->getLHS(); + if (Offset != 0) + NewLHS = Builder.CreateAdd(NewLHS, ConstantInt::get(OpTy, Offset)); + return new ICmpInst(ICmpInst::getInversePredicate(Pred), NewLHS, + ConstantInt::get(OpTy, NewRHSC)); } } } diff --git a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp index 5563fc14d151b..b56329ad76ae6 100644 --- a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp @@ -26,6 +26,7 @@ #include "llvm/ADT/Triple.h" #include "llvm/ADT/Twine.h" #include "llvm/Analysis/MemoryBuiltins.h" +#include "llvm/Analysis/StackSafetyAnalysis.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/BinaryFormat/MachO.h" @@ -47,6 +48,7 @@ #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/InlineAsm.h" +#include "llvm/IR/InstIterator.h" #include "llvm/IR/InstVisitor.h" #include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instruction.h" @@ -211,6 +213,11 @@ static cl::opt ClInstrumentWrites( "asan-instrument-writes", cl::desc("instrument write instructions"), cl::Hidden, cl::init(true)); +static cl::opt + ClUseStackSafety("asan-use-stack-safety", cl::Hidden, cl::init(false), + cl::Hidden, cl::desc("Use Stack Safety analysis results"), + cl::Optional); + static cl::opt ClInstrumentAtomics( "asan-instrument-atomics", cl::desc("instrument atomic instructions (rmw, cmpxchg)"), cl::Hidden, @@ -647,6 +654,7 @@ char ASanGlobalsMetadataWrapperPass::ID = 0; /// AddressSanitizer: instrument the code in module to find memory bugs. struct AddressSanitizer { AddressSanitizer(Module &M, const GlobalsMetadata *GlobalsMD, + const StackSafetyGlobalInfo *SSGI, bool CompileKernel = false, bool Recover = false, bool UseAfterScope = false, AsanDetectStackUseAfterReturnMode UseAfterReturn = @@ -657,7 +665,7 @@ struct AddressSanitizer { UseAfterScope(UseAfterScope || ClUseAfterScope), UseAfterReturn(ClUseAfterReturn.getNumOccurrences() ? ClUseAfterReturn : UseAfterReturn), - GlobalsMD(*GlobalsMD) { + GlobalsMD(*GlobalsMD), SSGI(SSGI) { C = &(M.getContext()); LongSize = M.getDataLayout().getPointerSizeInBits(); IntptrTy = Type::getIntNTy(*C, LongSize); @@ -686,7 +694,7 @@ struct AddressSanitizer { /// Check if we want (and can) handle this alloca. bool isInterestingAlloca(const AllocaInst &AI); - bool ignoreAccess(Value *Ptr); + bool ignoreAccess(Instruction *Inst, Value *Ptr); void getInterestingMemoryOperands( Instruction *I, SmallVectorImpl &Interesting); @@ -771,6 +779,7 @@ struct AddressSanitizer { FunctionCallee AsanMemmove, AsanMemcpy, AsanMemset; Value *LocalDynamicShadow = nullptr; const GlobalsMetadata &GlobalsMD; + const StackSafetyGlobalInfo *SSGI; DenseMap ProcessedAllocas; FunctionCallee AMDGPUAddressShared; @@ -797,16 +806,22 @@ class AddressSanitizerLegacyPass : public FunctionPass { void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired(); + if (ClUseStackSafety) + AU.addRequired(); AU.addRequired(); } bool runOnFunction(Function &F) override { GlobalsMetadata &GlobalsMD = getAnalysis().getGlobalsMD(); + const StackSafetyGlobalInfo *const SSGI = + ClUseStackSafety + ? &getAnalysis().getResult() + : nullptr; const TargetLibraryInfo *TLI = &getAnalysis().getTLI(F); - AddressSanitizer ASan(*F.getParent(), &GlobalsMD, CompileKernel, Recover, - UseAfterScope, UseAfterReturn); + AddressSanitizer ASan(*F.getParent(), &GlobalsMD, SSGI, CompileKernel, + Recover, UseAfterScope, UseAfterReturn); return ASan.instrumentFunction(F, TLI); } @@ -1260,8 +1275,9 @@ PreservedAnalyses AddressSanitizerPass::run(Function &F, Module &M = *F.getParent(); if (auto *R = MAMProxy.getCachedResult(M)) { const TargetLibraryInfo *TLI = &AM.getResult(F); - AddressSanitizer Sanitizer(M, R, Options.CompileKernel, Options.Recover, - Options.UseAfterScope, Options.UseAfterReturn); + AddressSanitizer Sanitizer(M, R, nullptr, Options.CompileKernel, + Options.Recover, Options.UseAfterScope, + Options.UseAfterReturn); if (Sanitizer.instrumentFunction(F, TLI)) return PreservedAnalyses::none(); return PreservedAnalyses::all(); @@ -1288,26 +1304,36 @@ void ModuleAddressSanitizerPass::printPipeline( static_cast *>(this)->printPipeline( OS, MapClassName2PassName); OS << "<"; - if (CompileKernel) + if (Options.CompileKernel) OS << "kernel"; OS << ">"; } ModuleAddressSanitizerPass::ModuleAddressSanitizerPass( - bool CompileKernel, bool Recover, bool UseGlobalGC, bool UseOdrIndicator, - AsanDtorKind DestructorKind) - : CompileKernel(CompileKernel), Recover(Recover), UseGlobalGC(UseGlobalGC), + const AddressSanitizerOptions &Options, bool UseGlobalGC, + bool UseOdrIndicator, AsanDtorKind DestructorKind) + : Options(Options), UseGlobalGC(UseGlobalGC), UseOdrIndicator(UseOdrIndicator), DestructorKind(DestructorKind) {} PreservedAnalyses ModuleAddressSanitizerPass::run(Module &M, - AnalysisManager &AM) { - GlobalsMetadata &GlobalsMD = AM.getResult(M); - ModuleAddressSanitizer Sanitizer(M, &GlobalsMD, CompileKernel, Recover, - UseGlobalGC, UseOdrIndicator, - DestructorKind); - if (Sanitizer.instrumentModule(M)) - return PreservedAnalyses::none(); - return PreservedAnalyses::all(); + ModuleAnalysisManager &MAM) { + GlobalsMetadata &GlobalsMD = MAM.getResult(M); + ModuleAddressSanitizer ModuleSanitizer(M, &GlobalsMD, Options.CompileKernel, + Options.Recover, UseGlobalGC, + UseOdrIndicator, DestructorKind); + bool Modified = false; + auto &FAM = MAM.getResult(M).getManager(); + const StackSafetyGlobalInfo *const SSGI = + ClUseStackSafety ? &MAM.getResult(M) : nullptr; + for (Function &F : M) { + AddressSanitizer FunctionSanitizer( + M, &GlobalsMD, SSGI, Options.CompileKernel, Options.Recover, + Options.UseAfterScope, Options.UseAfterReturn); + const TargetLibraryInfo &TLI = FAM.getResult(F); + Modified |= FunctionSanitizer.instrumentFunction(F, &TLI); + } + Modified |= ModuleSanitizer.instrumentModule(M); + return Modified ? PreservedAnalyses::none() : PreservedAnalyses::all(); } INITIALIZE_PASS(ASanGlobalsMetadataWrapperPass, "asan-globals-md", @@ -1322,6 +1348,7 @@ INITIALIZE_PASS_BEGIN( "AddressSanitizer: detects use-after-free and out-of-bounds bugs.", false, false) INITIALIZE_PASS_DEPENDENCY(ASanGlobalsMetadataWrapperPass) +INITIALIZE_PASS_DEPENDENCY(StackSafetyGlobalInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) INITIALIZE_PASS_END( AddressSanitizerLegacyPass, "asan", @@ -1460,7 +1487,7 @@ bool AddressSanitizer::isInterestingAlloca(const AllocaInst &AI) { return IsInteresting; } -bool AddressSanitizer::ignoreAccess(Value *Ptr) { +bool AddressSanitizer::ignoreAccess(Instruction *Inst, Value *Ptr) { // Instrument acesses from different address spaces only for AMDGPU. Type *PtrTy = cast(Ptr->getType()->getScalarType()); if (PtrTy->getPointerAddressSpace() != 0 && @@ -1481,6 +1508,10 @@ bool AddressSanitizer::ignoreAccess(Value *Ptr) { if (ClSkipPromotableAllocas && !isInterestingAlloca(*AI)) return true; + if (SSGI != nullptr && SSGI->stackAccessIsSafe(*Inst) && + findAllocaForValue(Ptr)) + return true; + return false; } @@ -1495,22 +1526,22 @@ void AddressSanitizer::getInterestingMemoryOperands( return; if (LoadInst *LI = dyn_cast(I)) { - if (!ClInstrumentReads || ignoreAccess(LI->getPointerOperand())) + if (!ClInstrumentReads || ignoreAccess(LI, LI->getPointerOperand())) return; Interesting.emplace_back(I, LI->getPointerOperandIndex(), false, LI->getType(), LI->getAlign()); } else if (StoreInst *SI = dyn_cast(I)) { - if (!ClInstrumentWrites || ignoreAccess(SI->getPointerOperand())) + if (!ClInstrumentWrites || ignoreAccess(LI, SI->getPointerOperand())) return; Interesting.emplace_back(I, SI->getPointerOperandIndex(), true, SI->getValueOperand()->getType(), SI->getAlign()); } else if (AtomicRMWInst *RMW = dyn_cast(I)) { - if (!ClInstrumentAtomics || ignoreAccess(RMW->getPointerOperand())) + if (!ClInstrumentAtomics || ignoreAccess(LI, RMW->getPointerOperand())) return; Interesting.emplace_back(I, RMW->getPointerOperandIndex(), true, RMW->getValOperand()->getType(), None); } else if (AtomicCmpXchgInst *XCHG = dyn_cast(I)) { - if (!ClInstrumentAtomics || ignoreAccess(XCHG->getPointerOperand())) + if (!ClInstrumentAtomics || ignoreAccess(LI, XCHG->getPointerOperand())) return; Interesting.emplace_back(I, XCHG->getPointerOperandIndex(), true, XCHG->getCompareOperand()->getType(), None); @@ -1525,7 +1556,7 @@ void AddressSanitizer::getInterestingMemoryOperands( return; auto BasePtr = CI->getOperand(OpOffset); - if (ignoreAccess(BasePtr)) + if (ignoreAccess(LI, BasePtr)) return; auto Ty = cast(BasePtr->getType())->getElementType(); MaybeAlign Alignment = Align(1); @@ -1537,7 +1568,7 @@ void AddressSanitizer::getInterestingMemoryOperands( } else { for (unsigned ArgNo = 0; ArgNo < CI->arg_size(); ArgNo++) { if (!ClInstrumentByval || !CI->isByValArgument(ArgNo) || - ignoreAccess(CI->getArgOperand(ArgNo))) + ignoreAccess(LI, CI->getArgOperand(ArgNo))) continue; Type *Ty = CI->getParamByValType(ArgNo); Interesting.emplace_back(I, ArgNo, false, Ty, Align(1)); @@ -2841,6 +2872,8 @@ bool AddressSanitizer::suppressInstrumentationSiteForDebug(int &Instrumented) { bool AddressSanitizer::instrumentFunction(Function &F, const TargetLibraryInfo *TLI) { + if (F.empty()) + return false; if (F.getLinkage() == GlobalValue::AvailableExternallyLinkage) return false; if (!ClDebugFunc.empty() && ClDebugFunc == F.getName()) return false; if (F.getName().startswith("__asan_")) return false; diff --git a/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp index ea9f61db1ced3..38c219ce34654 100644 --- a/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp @@ -1363,28 +1363,25 @@ bool DataFlowSanitizer::runImpl(Module &M) { // Give function aliases prefixes when necessary, and build wrappers where the // instrumentedness is inconsistent. - for (Module::alias_iterator AI = M.alias_begin(), AE = M.alias_end(); - AI != AE;) { - GlobalAlias *GA = &*AI; - ++AI; + for (GlobalAlias &GA : llvm::make_early_inc_range(M.aliases())) { // Don't stop on weak. We assume people aren't playing games with the // instrumentedness of overridden weak aliases. - auto *F = dyn_cast(GA->getAliaseeObject()); + auto *F = dyn_cast(GA.getAliaseeObject()); if (!F) continue; - bool GAInst = isInstrumented(GA), FInst = isInstrumented(F); + bool GAInst = isInstrumented(&GA), FInst = isInstrumented(F); if (GAInst && FInst) { - addGlobalNameSuffix(GA); + addGlobalNameSuffix(&GA); } else if (GAInst != FInst) { // Non-instrumented alias of an instrumented function, or vice versa. // Replace the alias with a native-ABI wrapper of the aliasee. The pass // below will take care of instrumenting it. Function *NewF = - buildWrapperFunction(F, "", GA->getLinkage(), F->getFunctionType()); - GA->replaceAllUsesWith(ConstantExpr::getBitCast(NewF, GA->getType())); - NewF->takeName(GA); - GA->eraseFromParent(); + buildWrapperFunction(F, "", GA.getLinkage(), F->getFunctionType()); + GA.replaceAllUsesWith(ConstantExpr::getBitCast(NewF, GA.getType())); + NewF->takeName(&GA); + GA.eraseFromParent(); FnsToInstrument.push_back(NewF); } } diff --git a/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp index eeb71ed67cc9f..62c265e40dab1 100644 --- a/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp @@ -1771,9 +1771,10 @@ void HWAddressSanitizer::instrumentGlobals() { Hasher.update(M.getSourceFileName()); MD5::MD5Result Hash; Hasher.final(Hash); - uint8_t Tag = Hash[0] & TagMaskByte; + uint8_t Tag = Hash[0]; for (GlobalVariable *GV : Globals) { + Tag &= TagMaskByte; // Skip tag 0 in order to avoid collisions with untagged memory. if (Tag == 0) Tag = 1; diff --git a/llvm/lib/Transforms/Scalar/CallSiteSplitting.cpp b/llvm/lib/Transforms/Scalar/CallSiteSplitting.cpp index 113615a8e3f01..95de59fa8262c 100644 --- a/llvm/lib/Transforms/Scalar/CallSiteSplitting.cpp +++ b/llvm/lib/Transforms/Scalar/CallSiteSplitting.cpp @@ -467,7 +467,7 @@ static PredsWithCondsTy shouldSplitOnPredicatedArgument(CallBase &CB, BasicBlock *StopAt = CSDTNode ? CSDTNode->getIDom()->getBlock() : nullptr; SmallVector, 2> PredsCS; - for (auto *Pred : make_range(Preds.rbegin(), Preds.rend())) { + for (auto *Pred : llvm::reverse(Preds)) { ConditionsTy Conditions; // Record condition on edge BB(CS) <- Pred recordCondition(CB, Pred, CB.getParent(), Conditions); diff --git a/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp b/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp index 6dbd3da240597..ca9567dc7ac8a 100644 --- a/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp +++ b/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp @@ -67,6 +67,7 @@ STATISTIC(NumUDivURemsNarrowed, STATISTIC(NumAShrs, "Number of ashr converted to lshr"); STATISTIC(NumSRems, "Number of srem converted to urem"); STATISTIC(NumSExt, "Number of sext converted to zext"); +STATISTIC(NumSICmps, "Number of signed icmp preds simplified to unsigned"); STATISTIC(NumAnd, "Number of ands removed"); STATISTIC(NumNW, "Number of no-wrap deductions"); STATISTIC(NumNSW, "Number of no-signed-wrap deductions"); @@ -295,11 +296,34 @@ static bool processMemAccess(Instruction *I, LazyValueInfo *LVI) { return true; } +static bool processICmp(ICmpInst *Cmp, LazyValueInfo *LVI) { + // Only for signed relational comparisons of scalar integers. + if (Cmp->getType()->isVectorTy() || + !Cmp->getOperand(0)->getType()->isIntegerTy()) + return false; + + if (!Cmp->isSigned()) + return false; + + ICmpInst::Predicate UnsignedPred = + ConstantRange::getEquivalentPredWithFlippedSignedness( + Cmp->getPredicate(), LVI->getConstantRange(Cmp->getOperand(0), Cmp), + LVI->getConstantRange(Cmp->getOperand(1), Cmp)); + + if (UnsignedPred == ICmpInst::Predicate::BAD_ICMP_PREDICATE) + return false; + + ++NumSICmps; + Cmp->setPredicate(UnsignedPred); + + return true; +} + /// See if LazyValueInfo's ability to exploit edge conditions or range /// information is sufficient to prove this comparison. Even for local /// conditions, this can sometimes prove conditions instcombine can't by /// exploiting range information. -static bool processCmp(CmpInst *Cmp, LazyValueInfo *LVI) { +static bool constantFoldCmp(CmpInst *Cmp, LazyValueInfo *LVI) { Value *Op0 = Cmp->getOperand(0); auto *C = dyn_cast(Cmp->getOperand(1)); if (!C) @@ -318,6 +342,17 @@ static bool processCmp(CmpInst *Cmp, LazyValueInfo *LVI) { return true; } +static bool processCmp(CmpInst *Cmp, LazyValueInfo *LVI) { + if (constantFoldCmp(Cmp, LVI)) + return true; + + if (auto *ICmp = dyn_cast(Cmp)) + if (processICmp(ICmp, LVI)) + return true; + + return false; +} + /// Simplify a switch instruction by removing cases which can never fire. If the /// uselessness of a case could be determined locally then constant propagation /// would already have figured it out. Instead, walk the predecessors and @@ -341,7 +376,13 @@ static bool processSwitch(SwitchInst *I, LazyValueInfo *LVI, // ConstantFoldTerminator() as the underlying SwitchInst can be changed. SwitchInstProfUpdateWrapper SI(*I); - for (auto CI = SI->case_begin(), CE = SI->case_end(); CI != CE;) { + APInt Low = + APInt::getSignedMaxValue(Cond->getType()->getScalarSizeInBits()); + APInt High = + APInt::getSignedMinValue(Cond->getType()->getScalarSizeInBits()); + + SwitchInst::CaseIt CI = SI->case_begin(); + for (auto CE = SI->case_end(); CI != CE;) { ConstantInt *Case = CI->getCaseValue(); LazyValueInfo::Tristate State = LVI->getPredicateAt(CmpInst::ICMP_EQ, Cond, Case, I, @@ -374,9 +415,28 @@ static bool processSwitch(SwitchInst *I, LazyValueInfo *LVI, break; } + // Get Lower/Upper bound from switch cases. + Low = APIntOps::smin(Case->getValue(), Low); + High = APIntOps::smax(Case->getValue(), High); + // Increment the case iterator since we didn't delete it. ++CI; } + + // Try to simplify default case as unreachable + if (CI == SI->case_end() && SI->getNumCases() != 0 && + !isa(SI->getDefaultDest()->getFirstNonPHIOrDbg())) { + const ConstantRange SIRange = + LVI->getConstantRange(SI->getCondition(), SI); + + // If the numbered switch cases cover the entire range of the condition, + // then the default case is not reachable. + if (SIRange.getSignedMin() == Low && SIRange.getSignedMax() == High && + SI->getNumCases() == High - Low + 1) { + createUnreachableSwitchDefault(SI, &DTU); + Changed = true; + } + } } if (Changed) diff --git a/llvm/lib/Transforms/Scalar/GuardWidening.cpp b/llvm/lib/Transforms/Scalar/GuardWidening.cpp index b1f393765cb9d..82b81003ef217 100644 --- a/llvm/lib/Transforms/Scalar/GuardWidening.cpp +++ b/llvm/lib/Transforms/Scalar/GuardWidening.cpp @@ -518,27 +518,20 @@ bool GuardWideningImpl::widenCondCommon(Value *Cond0, Value *Cond1, ConstantRange CR1 = ConstantRange::makeExactICmpRegion(Pred1, RHS1->getValue()); - // SubsetIntersect is a subset of the actual mathematical intersection of - // CR0 and CR1, while SupersetIntersect is a superset of the actual - // mathematical intersection. If these two ConstantRanges are equal, then - // we know we were able to represent the actual mathematical intersection - // of CR0 and CR1, and can use the same to generate an icmp instruction. - // // Given what we're doing here and the semantics of guards, it would - // actually be correct to just use SubsetIntersect, but that may be too + // be correct to use a subset intersection, but that may be too // aggressive in cases we care about. - auto SubsetIntersect = CR0.inverse().unionWith(CR1.inverse()).inverse(); - auto SupersetIntersect = CR0.intersectWith(CR1); - - APInt NewRHSAP; - CmpInst::Predicate Pred; - if (SubsetIntersect == SupersetIntersect && - SubsetIntersect.getEquivalentICmp(Pred, NewRHSAP)) { - if (InsertPt) { - ConstantInt *NewRHS = ConstantInt::get(Cond0->getContext(), NewRHSAP); - Result = new ICmpInst(InsertPt, Pred, LHS, NewRHS, "wide.chk"); + if (Optional Intersect = CR0.exactIntersectWith(CR1)) { + APInt NewRHSAP; + CmpInst::Predicate Pred; + if (Intersect->getEquivalentICmp(Pred, NewRHSAP)) { + if (InsertPt) { + ConstantInt *NewRHS = + ConstantInt::get(Cond0->getContext(), NewRHSAP); + Result = new ICmpInst(InsertPt, Pred, LHS, NewRHS, "wide.chk"); + } + return true; } - return true; } } } diff --git a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp index 678d341ddcbd4..817e7682cff85 100644 --- a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp +++ b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp @@ -156,7 +156,8 @@ class IndVarSimplify { bool rewriteNonIntegerIVs(Loop *L); bool simplifyAndExtend(Loop *L, SCEVExpander &Rewriter, LoopInfo *LI); - /// See if we can convert an exit condition from signed to unsigned. + /// Try to improve our exit conditions by converting condition from signed + /// to unsigned or rotating computation out of the loop. /// (See inline comment about why this is duplicated from simplifyAndExtend) bool canonicalizeExitCondition(Loop *L); /// Try to eliminate loop exits based on analyzeable exit counts @@ -1438,8 +1439,12 @@ bool IndVarSimplify::canonicalizeExitCondition(Loop *L) { // For the range reasoning, avoid computing SCEVs in the loop to avoid // poisoning cache with sub-optimal results. For the must-execute case, // this is a neccessary precondition for correctness. - if (!L->isLoopInvariant(RHS)) - continue; + if (!L->isLoopInvariant(RHS)) { + if (!L->isLoopInvariant(LHS)) + continue; + // Same logic applies for the inverse case + std::swap(LHS, RHS); + } // Match (icmp signed-cond zext, RHS) Value *LHSOp = nullptr; @@ -1451,7 +1456,8 @@ bool IndVarSimplify::canonicalizeExitCondition(Loop *L) { const unsigned OuterBitWidth = DL.getTypeSizeInBits(RHS->getType()); auto FullCR = ConstantRange::getFull(InnerBitWidth); FullCR = FullCR.zeroExtend(OuterBitWidth); - if (FullCR.contains(SE->getUnsignedRange(SE->getSCEV(RHS)))) { + auto RHSCR = SE->getUnsignedRange(SE->applyLoopGuards(SE->getSCEV(RHS), L)); + if (FullCR.contains(RHSCR)) { // We have now matched icmp signed-cond zext(X), zext(Y'), and can thus // replace the signed condition with the unsigned version. ICmp->setPredicate(ICmp->getUnsignedPredicate()); @@ -1460,28 +1466,82 @@ bool IndVarSimplify::canonicalizeExitCondition(Loop *L) { // have not changed exit counts, or the values produced by the compare. continue; } + } - // If we have a loop which would be undefined if infinite, and it has at - // most one possible dynamic exit, then we can conclude that exit must - // be taken. If that exit must be taken, and we know the LHS can only - // take values in the positive domain, then we can conclude RHS must - // also be in that same range, and replace a signed compare with an - // unsigned one. - // If the exit might not be taken in a well defined program. - if (ExitingBlocks.size() == 1 && SE->loopHasNoAbnormalExits(L) && - SE->loopIsFiniteByAssumption(L)) { - // We have now matched icmp signed-cond zext(X), zext(Y'), and can thus - // replace the signed condition with the unsigned version. - ICmp->setPredicate(ICmp->getUnsignedPredicate()); - Changed = true; + // Now that we've canonicalized the condition to match the extend, + // see if we can rotate the extend out of the loop. + for (auto *ExitingBB : ExitingBlocks) { + auto *BI = dyn_cast(ExitingBB->getTerminator()); + if (!BI) + continue; + assert(BI->isConditional() && "exit branch must be conditional"); - // Given we've changed exit counts, notify SCEV. - // Some nested loops may share same folded exit basic block, - // thus we need to notify top most loop. - SE->forgetTopmostLoop(L); + auto *ICmp = dyn_cast(BI->getCondition()); + if (!ICmp || !ICmp->hasOneUse() || !ICmp->isUnsigned()) + continue; + + bool Swapped = false; + auto *LHS = ICmp->getOperand(0); + auto *RHS = ICmp->getOperand(1); + if (L->isLoopInvariant(LHS) == L->isLoopInvariant(RHS)) + // Nothing to rotate + continue; + if (L->isLoopInvariant(LHS)) { + // Same logic applies for the inverse case until we actually pick + // which operand of the compare to update. + Swapped = true; + std::swap(LHS, RHS); + } + assert(!L->isLoopInvariant(LHS) && L->isLoopInvariant(RHS)); + + // Match (icmp unsigned-cond zext, RHS) + // TODO: Extend to handle corresponding sext/signed-cmp case + // TODO: Extend to other invertible functions + Value *LHSOp = nullptr; + if (!match(LHS, m_ZExt(m_Value(LHSOp)))) + continue; + + // In general, we only rotate if we can do so without increasing the number + // of instructions. The exception is when we have an zext(add-rec). The + // reason for allowing this exception is that we know we need to get rid + // of the zext for SCEV to be able to compute a trip count for said loops; + // we consider the new trip count valuable enough to increase instruction + // count by one. + if (!LHS->hasOneUse() && !isa(SE->getSCEV(LHSOp))) + continue; + + // Given a icmp unsigned-cond zext(Op) where zext(trunc(RHS)) == RHS + // replace with an icmp of the form icmp unsigned-cond Op, trunc(RHS) + // when zext is loop varying and RHS is loop invariant. This converts + // loop varying work to loop-invariant work. + auto doRotateTransform = [&]() { + assert(ICmp->isUnsigned() && "must have proven unsigned already"); + auto *NewRHS = + CastInst::Create(Instruction::Trunc, RHS, LHSOp->getType(), "", + L->getLoopPreheader()->getTerminator()); + ICmp->setOperand(Swapped ? 1 : 0, LHSOp); + ICmp->setOperand(Swapped ? 0 : 1, NewRHS); + if (LHS->use_empty()) + DeadInsts.push_back(LHS); + }; + + + const DataLayout &DL = ExitingBB->getModule()->getDataLayout(); + const unsigned InnerBitWidth = DL.getTypeSizeInBits(LHSOp->getType()); + const unsigned OuterBitWidth = DL.getTypeSizeInBits(RHS->getType()); + auto FullCR = ConstantRange::getFull(InnerBitWidth); + FullCR = FullCR.zeroExtend(OuterBitWidth); + auto RHSCR = SE->getUnsignedRange(SE->applyLoopGuards(SE->getSCEV(RHS), L)); + if (FullCR.contains(RHSCR)) { + doRotateTransform(); + Changed = true; + // Note, we are leaving SCEV in an unfortunately imprecise case here + // as rotation tends to reveal information about trip counts not + // previously visible. continue; } } + return Changed; } @@ -1866,8 +1926,8 @@ bool IndVarSimplify::run(Loop *L) { // Eliminate redundant IV cycles. NumElimIV += Rewriter.replaceCongruentIVs(L, DT, DeadInsts); - // Try to convert exit conditions to unsigned - // Note: Handles invalidation internally if needed. + // Try to convert exit conditions to unsigned and rotate computation + // out of the loop. Note: Handles invalidation internally if needed. Changed |= canonicalizeExitCondition(L); // Try to eliminate loop exits based on analyzeable exit counts diff --git a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp index 1d09bb1d5ced7..9f605b4ac4ad8 100644 --- a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp +++ b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp @@ -1763,6 +1763,7 @@ bool LoopInterchangeTransform::adjustLoopLinks() { return Changed; } +namespace { /// Main LoopInterchange Pass. struct LoopInterchangeLegacyPass : public LoopPass { static char ID; @@ -1791,6 +1792,7 @@ struct LoopInterchangeLegacyPass : public LoopPass { return LoopInterchange(SE, LI, DI, DT, ORE).run(L); } }; +} // namespace char LoopInterchangeLegacyPass::ID = 0; diff --git a/llvm/lib/Transforms/Scalar/LoopSink.cpp b/llvm/lib/Transforms/Scalar/LoopSink.cpp index a01287f587d7a..c9c9e60d09218 100644 --- a/llvm/lib/Transforms/Scalar/LoopSink.cpp +++ b/llvm/lib/Transforms/Scalar/LoopSink.cpp @@ -323,15 +323,14 @@ static bool sinkLoopInvariantInstructions(Loop &L, AAResults &AA, LoopInfo &LI, // Traverse preheader's instructions in reverse order becaue if A depends // on B (A appears after B), A needs to be sinked first before B can be // sinked. - for (auto II = Preheader->rbegin(), E = Preheader->rend(); II != E;) { - Instruction *I = &*II++; + for (Instruction &I : llvm::make_early_inc_range(llvm::reverse(*Preheader))) { // No need to check for instruction's operands are loop invariant. - assert(L.hasLoopInvariantOperands(I) && + assert(L.hasLoopInvariantOperands(&I) && "Insts in a loop's preheader should have loop invariant operands!"); - if (!canSinkOrHoistInst(*I, &AA, &DT, &L, CurAST, MSSAU.get(), false, + if (!canSinkOrHoistInst(I, &AA, &DT, &L, CurAST, MSSAU.get(), false, LICMFlags.get())) continue; - if (sinkInstruction(L, *I, ColdLoopBBs, LoopBlockNumber, LI, DT, BFI, + if (sinkInstruction(L, I, ColdLoopBBs, LoopBlockNumber, LI, DT, BFI, MSSAU.get())) Changed = true; } diff --git a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp index 10b47c3585a9c..4ffcdba1ae1e8 100644 --- a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -5868,6 +5868,7 @@ void LoopStrengthReduce::getAnalysisUsage(AnalysisUsage &AU) const { AU.addPreserved(); } +namespace { struct SCEVDbgValueBuilder { SCEVDbgValueBuilder() = default; SCEVDbgValueBuilder(const SCEVDbgValueBuilder &Base) { @@ -6115,6 +6116,7 @@ struct DVIRecoveryRec { Metadata *LocationOp; const llvm::SCEV *SCEV; }; +} // namespace static void RewriteDVIUsingIterCount(DVIRecoveryRec CachedDVI, const SCEVDbgValueBuilder &IterationCount, diff --git a/llvm/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp b/llvm/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp index ead8082f30360..1c186e9a04889 100644 --- a/llvm/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp +++ b/llvm/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp @@ -357,11 +357,10 @@ static bool lowerExpectIntrinsic(Function &F) { // Remove llvm.expect intrinsics. Iterate backwards in order // to process select instructions before the intrinsic gets // removed. - for (auto BI = BB.rbegin(), BE = BB.rend(); BI != BE;) { - Instruction *Inst = &*BI++; - CallInst *CI = dyn_cast(Inst); + for (Instruction &Inst : llvm::make_early_inc_range(llvm::reverse(BB))) { + CallInst *CI = dyn_cast(&Inst); if (!CI) { - if (SelectInst *SI = dyn_cast(Inst)) { + if (SelectInst *SI = dyn_cast(&Inst)) { if (handleBrSelExpect(*SI)) ExpectIntrinsicsHandled++; } diff --git a/llvm/lib/Transforms/Scalar/Scalarizer.cpp b/llvm/lib/Transforms/Scalar/Scalarizer.cpp index 5f2e7bfe378bd..6b7419abe1d1f 100644 --- a/llvm/lib/Transforms/Scalar/Scalarizer.cpp +++ b/llvm/lib/Transforms/Scalar/Scalarizer.cpp @@ -66,6 +66,15 @@ static cl::opt namespace { +BasicBlock::iterator skipPastPhiNodesAndDbg(BasicBlock::iterator Itr) { + BasicBlock *BB = Itr->getParent(); + if (isa(Itr)) + Itr = BB->getFirstInsertionPt(); + if (Itr != BB->end()) + Itr = skipDebugIntrinsics(Itr); + return Itr; +} + // Used to store the scattered form of a vector. using ValueVector = SmallVector; @@ -371,10 +380,11 @@ Scatterer ScalarizerVisitor::scatter(Instruction *Point, Value *V) { return Scatterer(Point->getParent(), Point->getIterator(), UndefValue::get(V->getType())); // Put the scattered form of an instruction directly after the - // instruction. + // instruction, skipping over PHI nodes and debug intrinsics. BasicBlock *BB = VOp->getParent(); - return Scatterer(BB, std::next(BasicBlock::iterator(VOp)), - V, &Scattered[V]); + return Scatterer( + BB, skipPastPhiNodesAndDbg(std::next(BasicBlock::iterator(VOp))), V, + &Scattered[V]); } // In the fallback case, just put the scattered before Point and // keep the result local to Point. diff --git a/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp b/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp index fe221326b652f..ffa2f9adb9788 100644 --- a/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp +++ b/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp @@ -1167,8 +1167,8 @@ bool SeparateConstOffsetFromGEP::run(Function &F) { if (!DT->isReachableFromEntry(&B)) continue; - for (BasicBlock::iterator I = B.begin(), IE = B.end(); I != IE;) - if (GetElementPtrInst *GEP = dyn_cast(I++)) + for (Instruction &I : llvm::make_early_inc_range(B)) + if (GetElementPtrInst *GEP = dyn_cast(&I)) Changed |= splitGEP(GEP); // No need to split GEP ConstantExprs because all its indices are constant // already. diff --git a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp index 9752f521bb241..a27da047bfd39 100644 --- a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp +++ b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp @@ -50,7 +50,6 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/GenericDomTree.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Transforms/Scalar/SimpleLoopUnswitch.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Cloning.h" #include "llvm/Transforms/Utils/Local.h" diff --git a/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp b/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp index 1a07697009eaa..6469c899feead 100644 --- a/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp +++ b/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp @@ -39,6 +39,7 @@ #include "llvm/IR/Value.h" #include "llvm/IR/ValueHandle.h" #include "llvm/Support/Casting.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/Local.h" @@ -52,6 +53,12 @@ using namespace llvm; #define DEBUG_TYPE "basicblock-utils" +static cl::opt MaxDeoptOrUnreachableSuccessorCheckDepth( + "max-deopt-or-unreachable-succ-check-depth", cl::init(8), cl::Hidden, + cl::desc("Set the maximum path length when checking whether a basic block " + "is followed by a block that either has a terminating " + "deoptimizing call or is terminated with an unreachable")); + void llvm::DetatchDeadBlocks( ArrayRef BBs, SmallVectorImpl *Updates, @@ -485,6 +492,20 @@ void llvm::ReplaceInstWithInst(BasicBlock::InstListType &BIL, BI = New; } +bool llvm::IsBlockFollowedByDeoptOrUnreachable(const BasicBlock *BB) { + // Remember visited blocks to avoid infinite loop + SmallPtrSet VisitedBlocks; + unsigned Depth = 0; + while (BB && Depth++ < MaxDeoptOrUnreachableSuccessorCheckDepth && + VisitedBlocks.insert(BB).second) { + if (BB->getTerminatingDeoptimizeCall() || + isa(BB->getTerminator())) + return true; + BB = BB->getUniqueSuccessor(); + } + return false; +} + void llvm::ReplaceInstWithInst(Instruction *From, Instruction *To) { BasicBlock::iterator BI(From); ReplaceInstWithInst(From->getParent()->getInstList(), BI, To); diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp index 8bd09198ee745..96aff563aa9b6 100644 --- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp +++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp @@ -1046,9 +1046,8 @@ static void eraseLifetimeMarkersOnInputs(const SetVector &Blocks, const SetVector &SunkAllocas, SetVector &LifetimesStart) { for (BasicBlock *BB : Blocks) { - for (auto It = BB->begin(), End = BB->end(); It != End;) { - auto *II = dyn_cast(&*It); - ++It; + for (Instruction &I : llvm::make_early_inc_range(*BB)) { + auto *II = dyn_cast(&I); if (!II || !II->isLifetimeStartOrEnd()) continue; @@ -1619,11 +1618,8 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC, // Remove @llvm.assume calls that will be moved to the new function from the // old function's assumption cache. for (BasicBlock *Block : Blocks) { - for (auto It = Block->begin(), End = Block->end(); It != End;) { - Instruction *I = &*It; - ++It; - - if (auto *AI = dyn_cast(I)) { + for (Instruction &I : llvm::make_early_inc_range(*Block)) { + if (auto *AI = dyn_cast(&I)) { if (AC) AC->unregisterAssumption(AI); AI->eraseFromParent(); diff --git a/llvm/lib/Transforms/Utils/CodeMoverUtils.cpp b/llvm/lib/Transforms/Utils/CodeMoverUtils.cpp index ea29d4ec5beaf..648f4e64a4d2a 100644 --- a/llvm/lib/Transforms/Utils/CodeMoverUtils.cpp +++ b/llvm/lib/Transforms/Utils/CodeMoverUtils.cpp @@ -410,11 +410,9 @@ void llvm::moveInstructionsToTheBeginning(BasicBlock &FromBB, BasicBlock &ToBB, DominatorTree &DT, const PostDominatorTree &PDT, DependenceInfo &DI) { - for (auto It = ++FromBB.rbegin(); It != FromBB.rend();) { + for (Instruction &I : + llvm::make_early_inc_range(llvm::drop_begin(llvm::reverse(FromBB)))) { Instruction *MovePos = ToBB.getFirstNonPHIOrDbg(); - Instruction &I = *It; - // Increment the iterator before modifying FromBB. - ++It; if (isSafeToMoveBefore(I, *MovePos, DT, &PDT, &DI)) I.moveBefore(MovePos); diff --git a/llvm/lib/Transforms/Utils/InlineFunction.cpp b/llvm/lib/Transforms/Utils/InlineFunction.cpp index 5b3d13bafa2ac..9dc303c40c223 100644 --- a/llvm/lib/Transforms/Utils/InlineFunction.cpp +++ b/llvm/lib/Transforms/Utils/InlineFunction.cpp @@ -828,6 +828,7 @@ static void PropagateCallSiteMetadata(CallBase &CB, Function::iterator FStart, } } +namespace { /// Utility for cloning !noalias and !alias.scope metadata. When a code region /// using scoped alias metadata is inlined, the aliasing relationships may not /// hold between the two version. It is necessary to create a deep clone of the @@ -849,6 +850,7 @@ class ScopedAliasMetadataDeepCloner { /// metadata. void remap(Function::iterator FStart, Function::iterator FEnd); }; +} // namespace ScopedAliasMetadataDeepCloner::ScopedAliasMetadataDeepCloner( const Function *F) { @@ -1672,21 +1674,19 @@ inlineRetainOrClaimRVCalls(CallBase &CB, objcarc::ARCInstKind RVCallKind, for (auto *RI : Returns) { Value *RetOpnd = objcarc::GetRCIdentityRoot(RI->getOperand(0)); - BasicBlock::reverse_iterator I = ++(RI->getIterator().getReverse()); - BasicBlock::reverse_iterator EI = RI->getParent()->rend(); bool InsertRetainCall = IsRetainRV; IRBuilder<> Builder(RI->getContext()); // Walk backwards through the basic block looking for either a matching // autoreleaseRV call or an unannotated call. - for (; I != EI;) { - auto CurI = I++; - + auto InstRange = llvm::make_range(++(RI->getIterator().getReverse()), + RI->getParent()->rend()); + for (Instruction &I : llvm::make_early_inc_range(InstRange)) { // Ignore casts. - if (isa(*CurI)) + if (isa(I)) continue; - if (auto *II = dyn_cast(&*CurI)) { + if (auto *II = dyn_cast(&I)) { if (II->getIntrinsicID() != Intrinsic::objc_autoreleaseReturnValue || !II->hasNUses(0) || objcarc::GetRCIdentityRoot(II->getOperand(0)) != RetOpnd) @@ -1709,7 +1709,7 @@ inlineRetainOrClaimRVCalls(CallBase &CB, objcarc::ARCInstKind RVCallKind, break; } - auto *CI = dyn_cast(&*CurI); + auto *CI = dyn_cast(&I); if (!CI) break; @@ -2298,8 +2298,8 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI, BB != E; ++BB) { // Add bundle operands to any top-level call sites. SmallVector OpBundles; - for (BasicBlock::iterator BBI = BB->begin(), E = BB->end(); BBI != E;) { - CallBase *I = dyn_cast(&*BBI++); + for (Instruction &II : llvm::make_early_inc_range(*BB)) { + CallBase *I = dyn_cast(&II); if (!I) continue; diff --git a/llvm/lib/Transforms/Utils/Local.cpp b/llvm/lib/Transforms/Utils/Local.cpp index 3e36f498523d6..74ab37fadf36e 100644 --- a/llvm/lib/Transforms/Utils/Local.cpp +++ b/llvm/lib/Transforms/Utils/Local.cpp @@ -2190,6 +2190,26 @@ void llvm::changeToCall(InvokeInst *II, DomTreeUpdater *DTU) { DTU->applyUpdates({{DominatorTree::Delete, BB, UnwindDestBB}}); } +void llvm::createUnreachableSwitchDefault(SwitchInst *Switch, + DomTreeUpdater *DTU) { + LLVM_DEBUG(dbgs() << "SimplifyCFG: switch default is dead.\n"); + auto *BB = Switch->getParent(); + auto *OrigDefaultBlock = Switch->getDefaultDest(); + OrigDefaultBlock->removePredecessor(BB); + BasicBlock *NewDefaultBlock = BasicBlock::Create( + BB->getContext(), BB->getName() + ".unreachabledefault", BB->getParent(), + OrigDefaultBlock); + new UnreachableInst(Switch->getContext(), NewDefaultBlock); + Switch->setDefaultDest(&*NewDefaultBlock); + if (DTU) { + SmallVector Updates; + Updates.push_back({DominatorTree::Insert, BB, &*NewDefaultBlock}); + if (!is_contained(successors(BB), OrigDefaultBlock)) + Updates.push_back({DominatorTree::Delete, BB, &*OrigDefaultBlock}); + DTU->applyUpdates(Updates); + } +} + BasicBlock *llvm::changeToInvokeAndSplitBasicBlock(CallInst *CI, BasicBlock *UnwindEdge, DomTreeUpdater *DTU) { diff --git a/llvm/lib/Transforms/Utils/LoopPeel.cpp b/llvm/lib/Transforms/Utils/LoopPeel.cpp index b2fe5648eedb1..d6bdc1bd028b8 100644 --- a/llvm/lib/Transforms/Utils/LoopPeel.cpp +++ b/llvm/lib/Transforms/Utils/LoopPeel.cpp @@ -103,15 +103,15 @@ bool llvm::canPeel(Loop *L) { SmallVector Exits; L->getUniqueNonLatchExitBlocks(Exits); // The latch must either be the only exiting block or all non-latch exit - // blocks have either a deopt or unreachable terminator. Both deopt and - // unreachable terminators are a strong indication they are not taken. Note - // that this is a profitability check, not a legality check. Also note that - // LoopPeeling currently can only update the branch weights of latch blocks - // and branch weights to blocks with deopt or unreachable do not need + // blocks have either a deopt or unreachable terminator or compose a chain of + // blocks where the last one is either deopt or unreachable terminated. Both + // deopt and unreachable terminators are a strong indication they are not + // taken. Note that this is a profitability check, not a legality check. Also + // note that LoopPeeling currently can only update the branch weights of latch + // blocks and branch weights to blocks with deopt or unreachable do not need // updating. return all_of(Exits, [](const BasicBlock *BB) { - return BB->getTerminatingDeoptimizeCall() || - isa(BB->getTerminator()); + return IsBlockFollowedByDeoptOrUnreachable(BB); }); } diff --git a/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp b/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp index 692e60a9701e0..c66fd7bb05881 100644 --- a/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp +++ b/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp @@ -134,15 +134,7 @@ static void RewriteUsesOfClonedInstructions(BasicBlock *OrigHeader, SSA.AddAvailableValue(OrigPreheader, OrigPreHeaderVal); // Visit each use of the OrigHeader instruction. - for (Value::use_iterator UI = OrigHeaderVal->use_begin(), - UE = OrigHeaderVal->use_end(); - UI != UE;) { - // Grab the use before incrementing the iterator. - Use &U = *UI; - - // Increment the iterator before removing the use from the list. - ++UI; - + for (Use &U : llvm::make_early_inc_range(OrigHeaderVal->uses())) { // SSAUpdater can't handle a non-PHI use in the same block as an // earlier def. We can easily handle those cases manually. Instruction *UserInst = cast(U.getUser()); @@ -404,9 +396,8 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) { D->getExpression()}; }; SmallDenseSet DbgIntrinsics; - for (auto I = std::next(OrigPreheader->rbegin()), E = OrigPreheader->rend(); - I != E; ++I) { - if (auto *DII = dyn_cast(&*I)) + for (Instruction &I : llvm::drop_begin(llvm::reverse(*OrigPreheader))) { + if (auto *DII = dyn_cast(&I)) DbgIntrinsics.insert(makeHash(DII)); else break; diff --git a/llvm/lib/Transforms/Utils/LoopUtils.cpp b/llvm/lib/Transforms/Utils/LoopUtils.cpp index e2eae484f5907..f0f0793356839 100644 --- a/llvm/lib/Transforms/Utils/LoopUtils.cpp +++ b/llvm/lib/Transforms/Utils/LoopUtils.cpp @@ -612,10 +612,7 @@ void llvm::deleteDeadLoop(Loop *L, DominatorTree *DT, ScalarEvolution *SE, for (auto *Block : L->blocks()) for (Instruction &I : *Block) { auto *Undef = UndefValue::get(I.getType()); - for (Value::use_iterator UI = I.use_begin(), E = I.use_end(); - UI != E;) { - Use &U = *UI; - ++UI; + for (Use &U : llvm::make_early_inc_range(I.uses())) { if (auto *Usr = dyn_cast(U.getUser())) if (L->contains(Usr->getParent())) continue; diff --git a/llvm/lib/Transforms/Utils/SCCPSolver.cpp b/llvm/lib/Transforms/Utils/SCCPSolver.cpp index 4475474e6b16f..c7401d79446c5 100644 --- a/llvm/lib/Transforms/Utils/SCCPSolver.cpp +++ b/llvm/lib/Transforms/Utils/SCCPSolver.cpp @@ -808,6 +808,9 @@ void SCCPInstVisitor::visitCastInst(CastInst &I) { return; ValueLatticeElement OpSt = getValueState(I.getOperand(0)); + if (OpSt.isUnknownOrUndef()) + return; + if (Constant *OpC = getConstant(OpSt)) { // Fold the constant as we build. Constant *C = ConstantFoldCastOperand(I.getOpcode(), OpC, I.getType(), DL); @@ -815,9 +818,14 @@ void SCCPInstVisitor::visitCastInst(CastInst &I) { return; // Propagate constant value markConstant(&I, C); - } else if (OpSt.isConstantRange() && I.getDestTy()->isIntegerTy()) { + } else if (I.getDestTy()->isIntegerTy()) { auto &LV = getValueState(&I); - ConstantRange OpRange = OpSt.getConstantRange(); + ConstantRange OpRange = + OpSt.isConstantRange() + ? OpSt.getConstantRange() + : ConstantRange::getFull( + I.getOperand(0)->getType()->getScalarSizeInBits()); + Type *DestTy = I.getDestTy(); // Vectors where all elements have the same known constant range are treated // as a single constant range in the lattice. When bitcasting such vectors, @@ -832,7 +840,7 @@ void SCCPInstVisitor::visitCastInst(CastInst &I) { ConstantRange Res = OpRange.castOp(I.getOpcode(), DL.getTypeSizeInBits(DestTy)); mergeInValue(LV, &I, ValueLatticeElement::getRange(Res)); - } else if (!OpSt.isUnknownOrUndef()) + } else markOverdefined(&I); } diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp index 7b49f47778e07..3eab293b433e9 100644 --- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -4782,26 +4782,6 @@ static bool CasesAreContiguous(SmallVectorImpl &Cases) { return true; } -static void createUnreachableSwitchDefault(SwitchInst *Switch, - DomTreeUpdater *DTU) { - LLVM_DEBUG(dbgs() << "SimplifyCFG: switch default is dead.\n"); - auto *BB = Switch->getParent(); - auto *OrigDefaultBlock = Switch->getDefaultDest(); - OrigDefaultBlock->removePredecessor(BB); - BasicBlock *NewDefaultBlock = BasicBlock::Create( - BB->getContext(), BB->getName() + ".unreachabledefault", BB->getParent(), - OrigDefaultBlock); - new UnreachableInst(Switch->getContext(), NewDefaultBlock); - Switch->setDefaultDest(&*NewDefaultBlock); - if (DTU) { - SmallVector Updates; - Updates.push_back({DominatorTree::Insert, BB, &*NewDefaultBlock}); - if (!is_contained(successors(BB), OrigDefaultBlock)) - Updates.push_back({DominatorTree::Delete, BB, &*OrigDefaultBlock}); - DTU->applyUpdates(Updates); - } -} - /// Turn a switch with two reachable destinations into an integer range /// comparison and branch. bool SimplifyCFGOpt::TurnSwitchRangeIntoICmp(SwitchInst *SI, diff --git a/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp b/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp index bd30be011472d..5b7fd4349c6c8 100644 --- a/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp @@ -942,6 +942,7 @@ bool simplifyLoopIVs(Loop *L, ScalarEvolution *SE, DominatorTree *DT, } // namespace llvm +namespace { //===----------------------------------------------------------------------===// // Widen Induction Variables - Extend the width of an IV to cover its // widest uses. @@ -1072,7 +1073,7 @@ class WidenIV { private: SmallVector NarrowIVUsers; }; - +} // namespace /// Determine the insertion point for this user. By default, insert immediately /// before the user. SCEVExpander or LICM will hoist loop invariants out of the diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h index 5a2eb809b6812..a7d6609f8c569 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h +++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h @@ -303,12 +303,9 @@ class LoopVectorizationPlanner { /// Look through the existing plans and return true if we have one with all /// the vectorization factors in question. - bool hasPlanWithVFs(const ArrayRef VFs) const { - return any_of(VPlans, [&](const VPlanPtr &Plan) { - return all_of(VFs, [&](const ElementCount &VF) { - return Plan->hasVF(VF); - }); - }); + bool hasPlanWithVF(ElementCount VF) const { + return any_of(VPlans, + [&](const VPlanPtr &Plan) { return Plan->hasVF(VF); }); } /// Test a \p Predicate on a \p Range of VF's. Return the value of applying diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 909788c5a87bd..b7e077c461df3 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -1103,11 +1103,10 @@ static OptimizationRemarkAnalysis createLVAnalysis(const char *PassName, } /// Return a value for Step multiplied by VF. -static Value *createStepForVF(IRBuilder<> &B, Constant *Step, ElementCount VF) { - assert(isa(Step) && "Expected an integer step"); - Constant *StepVal = ConstantInt::get( - Step->getType(), - cast(Step)->getSExtValue() * VF.getKnownMinValue()); +static Value *createStepForVF(IRBuilder<> &B, Type *Ty, ElementCount VF, + int64_t Step) { + assert(Ty->isIntegerTy() && "Expected an integer step"); + Constant *StepVal = ConstantInt::get(Ty, Step * VF.getKnownMinValue()); return VF.isScalable() ? B.CreateVScale(StepVal) : StepVal; } @@ -1123,7 +1122,7 @@ static Value *getRuntimeVFAsFloat(IRBuilder<> &B, Type *FTy, ElementCount VF) { assert(FTy->isFloatingPointTy() && "Expected floating point type!"); Type *IntTy = IntegerType::get(FTy->getContext(), FTy->getScalarSizeInBits()); Value *RuntimeVF = getRuntimeVF(B, IntTy, VF); - return B.CreateSIToFP(RuntimeVF, FTy); + return B.CreateUIToFP(RuntimeVF, FTy); } void reportVectorizationFailure(const StringRef DebugMsg, @@ -2034,7 +2033,6 @@ class GeneratedRTChecks { if (MemCheckExp.isInsertedInstruction(&I)) continue; SE.forgetValue(&I); - SE.eraseValueFromMap(&I); I.eraseFromParent(); } } @@ -2294,12 +2292,7 @@ void InnerLoopVectorizer::createVectorIntOrFpInductionPHI( Start = Builder.CreateCast(Instruction::Trunc, Start, TruncType); } - Value *Zero; - if (Start->getType()->isFloatingPointTy()) - Zero = ConstantFP::get(Start->getType(), 0); - else - Zero = ConstantInt::get(Start->getType(), 0); - + Value *Zero = getSignedIntOrFpConstant(Start->getType(), 0); Value *SplatStart = Builder.CreateVectorSplat(VF, Start); Value *SteppedStart = getStepVector(SplatStart, Zero, Step, II.getInductionOpcode()); @@ -2626,8 +2619,7 @@ void InnerLoopVectorizer::buildScalarSteps(Value *ScalarIV, Value *Step, } for (unsigned Part = 0; Part < UF; ++Part) { - Value *StartIdx0 = - createStepForVF(Builder, ConstantInt::get(IntStepTy, Part), VF); + Value *StartIdx0 = createStepForVF(Builder, IntStepTy, VF, Part); if (!IsUniform && VF.isScalable()) { auto *SplatStartIdx = Builder.CreateVectorSplat(VF, StartIdx0); @@ -2969,7 +2961,8 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction( if (isMaskRequired) // Reverse of a null all-one mask is a null mask. BlockInMaskParts[Part] = reverseVector(BlockInMaskParts[Part]); } else { - Value *Increment = createStepForVF(Builder, Builder.getInt32(Part), VF); + Value *Increment = + createStepForVF(Builder, Builder.getInt32Ty(), VF, Part); PartPtr = cast( Builder.CreateGEP(ScalarDataTy, Ptr, Increment)); PartPtr->setIsInBounds(InBounds); @@ -3188,7 +3181,7 @@ Value *InnerLoopVectorizer::getOrCreateVectorTripCount(Loop *L) { Type *Ty = TC->getType(); // This is where we can make the step a runtime constant. - Value *Step = createStepForVF(Builder, ConstantInt::get(Ty, UF), VF); + Value *Step = createStepForVF(Builder, Ty, VF, UF); // If the tail is to be folded by masking, round the number of iterations N // up to a multiple of Step instead of rounding down. This is done by first @@ -3278,8 +3271,7 @@ void InnerLoopVectorizer::emitMinimumIterationCountCheck(Loop *L, // If tail is to be folded, vector loop takes care of all iterations. Value *CheckMinIters = Builder.getFalse(); if (!Cost->foldTailByMasking()) { - Value *Step = - createStepForVF(Builder, ConstantInt::get(Count->getType(), UF), VF); + Value *Step = createStepForVF(Builder, Count->getType(), VF, UF); CheckMinIters = Builder.CreateICmp(P, Count, Step, "min.iters.check"); } // Create new preheader for vector loop. @@ -3755,7 +3747,7 @@ BasicBlock *InnerLoopVectorizer::createVectorizedLoopSkeleton() { // The loop step is equal to the vectorization factor (num of SIMD elements) // times the unroll factor (num of SIMD instructions). Builder.SetInsertPoint(&*Lp->getHeader()->getFirstInsertionPt()); - Value *Step = createStepForVF(Builder, ConstantInt::get(IdxTy, UF), VF); + Value *Step = createStepForVF(Builder, IdxTy, VF, UF); Value *CountRoundDown = getOrCreateVectorTripCount(Lp); Induction = createInductionVariable(Lp, StartIdx, CountRoundDown, Step, @@ -4355,7 +4347,7 @@ void InnerLoopVectorizer::fixReduction(VPReductionPHIRecipe *PhiR, RdxDesc.getOpcode(), PhiTy, TargetTransformInfo::ReductionFlags())) { auto *VecRdxPhi = - cast(State.get(PhiR->getVPSingleValue(), Part)); + cast(State.get(PhiR, Part)); VecRdxPhi->setIncomingValueForBlock( LI->getLoopFor(LoopVectorBody)->getLoopLatch(), Sel); } @@ -4376,13 +4368,10 @@ void InnerLoopVectorizer::fixReduction(VPReductionPHIRecipe *PhiR, Value *Trunc = Builder.CreateTrunc(RdxParts[Part], RdxVecTy); Value *Extnd = RdxDesc.isSigned() ? Builder.CreateSExt(Trunc, VecTy) : Builder.CreateZExt(Trunc, VecTy); - for (Value::user_iterator UI = RdxParts[Part]->user_begin(); - UI != RdxParts[Part]->user_end();) - if (*UI != Trunc) { - (*UI++)->replaceUsesOfWith(RdxParts[Part], Extnd); + for (User *U : llvm::make_early_inc_range(RdxParts[Part]->users())) + if (U != Trunc) { + U->replaceUsesOfWith(RdxParts[Part], Extnd); RdxParts[Part] = Extnd; - } else { - ++UI; } } Builder.SetInsertPoint(&*LoopMiddleBlock->getFirstInsertionPt()); @@ -4760,8 +4749,8 @@ void InnerLoopVectorizer::widenPHIInstruction(Instruction *PN, } for (unsigned Part = 0; Part < UF; ++Part) { - Value *PartStart = createStepForVF( - Builder, ConstantInt::get(PtrInd->getType(), Part), VF); + Value *PartStart = + createStepForVF(Builder, PtrInd->getType(), VF, Part); if (NeedsVectorIndex) { // Here we cache the whole vector, which means we can support the @@ -5384,12 +5373,14 @@ void LoopVectorizationCostModel::collectLoopUniforms(ElementCount VF) { return (!I || !TheLoop->contains(I)); }; + // Worklist containing uniform instructions demanding lane 0. SetVector Worklist; BasicBlock *Latch = TheLoop->getLoopLatch(); - // Instructions that are scalar with predication must not be considered - // uniform after vectorization, because that would create an erroneous - // replicating region where only a single instance out of VF should be formed. + // Add uniform instructions demanding lane 0 to the worklist. Instructions + // that are scalar with predication must not be considered uniform after + // vectorization, because that would create an erroneous replicating region + // where only a single instance out of VF should be formed. // TODO: optimize such seldom cases if found important, see PR40816. auto addToWorklistIfAllowed = [&](Instruction *I) -> void { if (isOutOfScope(I)) { @@ -6030,19 +6021,27 @@ bool LoopVectorizationCostModel::isMoreProfitable( return RTCostA < RTCostB; } - // When set to preferred, for now assume vscale may be larger than 1, so - // that scalable vectorization is slightly favorable over fixed-width - // vectorization. + // Improve estimate for the vector width if it is scalable. + unsigned EstimatedWidthA = A.Width.getKnownMinValue(); + unsigned EstimatedWidthB = B.Width.getKnownMinValue(); + if (Optional VScale = TTI.getVScaleForTuning()) { + if (A.Width.isScalable()) + EstimatedWidthA *= VScale.getValue(); + if (B.Width.isScalable()) + EstimatedWidthB *= VScale.getValue(); + } + + // When set to preferred, for now assume vscale may be larger than 1 (or the + // one being tuned for), so that scalable vectorization is slightly favorable + // over fixed-width vectorization. if (Hints->isScalableVectorizationPreferred()) if (A.Width.isScalable() && !B.Width.isScalable()) - return (CostA * B.Width.getKnownMinValue()) <= - (CostB * A.Width.getKnownMinValue()); + return (CostA * B.Width.getFixedValue()) <= (CostB * EstimatedWidthA); // To avoid the need for FP division: // (CostA / A.Width) < (CostB / B.Width) // <=> (CostA * B.Width) < (CostB * A.Width) - return (CostA * B.Width.getKnownMinValue()) < - (CostB * A.Width.getKnownMinValue()); + return (CostA * EstimatedWidthB) < (CostB * EstimatedWidthA); } VectorizationFactor LoopVectorizationCostModel::selectVectorizationFactor( @@ -6072,11 +6071,22 @@ VectorizationFactor LoopVectorizationCostModel::selectVectorizationFactor( VectorizationCostTy C = expectedCost(i, &InvalidCosts); VectorizationFactor Candidate(i, C.first); - LLVM_DEBUG( - dbgs() << "LV: Vector loop of width " << i << " costs: " - << (Candidate.Cost / Candidate.Width.getKnownMinValue()) - << (i.isScalable() ? " (assuming a minimum vscale of 1)" : "") - << ".\n"); + +#ifndef NDEBUG + unsigned AssumedMinimumVscale = 1; + if (Optional VScale = TTI.getVScaleForTuning()) + AssumedMinimumVscale = VScale.getValue(); + unsigned Width = + Candidate.Width.isScalable() + ? Candidate.Width.getKnownMinValue() * AssumedMinimumVscale + : Candidate.Width.getFixedValue(); + LLVM_DEBUG(dbgs() << "LV: Vector loop of width " << i + << " costs: " << (Candidate.Cost / Width)); + if (i.isScalable()) + LLVM_DEBUG(dbgs() << " (assuming a minimum vscale of " + << AssumedMinimumVscale << ")"); + LLVM_DEBUG(dbgs() << ".\n"); +#endif if (!C.second && !ForceVectorization) { LLVM_DEBUG( @@ -6240,15 +6250,6 @@ LoopVectorizationCostModel::selectEpilogueVectorizationFactor( return Result; } - // FIXME: This can be fixed for scalable vectors later, because at this stage - // the LoopVectorizer will only consider vectorizing a loop with scalable - // vectors when the loop has a hint to enable vectorization for a given VF. - if (MainLoopVF.isScalable()) { - LLVM_DEBUG(dbgs() << "LEV: Epilogue vectorization for scalable vectors not " - "yet supported.\n"); - return Result; - } - // Not really a cost consideration, but check for unsupported cases here to // simplify the logic. if (!isCandidateForEpilogueVectorization(*TheLoop, MainLoopVF)) { @@ -6261,7 +6262,7 @@ LoopVectorizationCostModel::selectEpilogueVectorizationFactor( if (EpilogueVectorizationForceVF > 1) { LLVM_DEBUG(dbgs() << "LEV: Epilogue vectorization factor is forced.\n";); ElementCount ForcedEC = ElementCount::getFixed(EpilogueVectorizationForceVF); - if (LVP.hasPlanWithVFs({MainLoopVF, ForcedEC})) + if (LVP.hasPlanWithVF(ForcedEC)) return {ForcedEC, 0}; else { LLVM_DEBUG( @@ -6279,14 +6280,24 @@ LoopVectorizationCostModel::selectEpilogueVectorizationFactor( return Result; } - if (!isEpilogueVectorizationProfitable(MainLoopVF)) + auto FixedMainLoopVF = ElementCount::getFixed(MainLoopVF.getKnownMinValue()); + if (MainLoopVF.isScalable()) + LLVM_DEBUG( + dbgs() << "LEV: Epilogue vectorization using scalable vectors not " + "yet supported. Converting to fixed-width (VF=" + << FixedMainLoopVF << ") instead\n"); + + if (!isEpilogueVectorizationProfitable(FixedMainLoopVF)) { + LLVM_DEBUG(dbgs() << "LEV: Epilogue vectorization is not profitable for " + "this loop\n"); return Result; + } for (auto &NextVF : ProfitableVFs) - if (ElementCount::isKnownLT(NextVF.Width, MainLoopVF) && + if (ElementCount::isKnownLT(NextVF.Width, FixedMainLoopVF) && (Result.Width.getFixedValue() == 1 || isMoreProfitable(NextVF, Result)) && - LVP.hasPlanWithVFs({MainLoopVF, NextVF.Width})) + LVP.hasPlanWithVF(NextVF.Width)) Result = NextVF; if (Result != VectorizationFactor::Disabled()) @@ -8415,7 +8426,9 @@ BasicBlock *EpilogueVectorizerMainLoop::createEpilogueVectorizedLoopSkeleton() { OldInduction = Legal->getPrimaryInduction(); Type *IdxTy = Legal->getWidestInductionType(); Value *StartIdx = ConstantInt::get(IdxTy, 0); - Constant *Step = ConstantInt::get(IdxTy, VF.getKnownMinValue() * UF); + + IRBuilder<> B(&*Lp->getLoopPreheader()->getFirstInsertionPt()); + Value *Step = getRuntimeVF(B, IdxTy, VF * UF); Value *CountRoundDown = getOrCreateVectorTripCount(Lp); EPI.VectorTripCount = CountRoundDown; Induction = @@ -8464,7 +8477,7 @@ BasicBlock *EpilogueVectorizerMainLoop::emitMinimumIterationCountCheck( ICmpInst::ICMP_ULE : ICmpInst::ICMP_ULT; Value *CheckMinIters = Builder.CreateICmp( - P, Count, getRuntimeVF(Builder, Count->getType(), VFactor * UFactor), + P, Count, createStepForVF(Builder, Count->getType(), VFactor, UFactor), "min.iters.check"); if (!ForEpilogue) @@ -8616,10 +8629,11 @@ EpilogueVectorizerEpilogueLoop::emitMinimumVectorEpilogueIterCountCheck( auto P = Cost->requiresScalarEpilogue(EPI.EpilogueVF) ? ICmpInst::ICMP_ULE : ICmpInst::ICMP_ULT; - Value *CheckMinIters = Builder.CreateICmp( - P, Count, - getRuntimeVF(Builder, Count->getType(), EPI.EpilogueVF * EPI.EpilogueUF), - "min.epilog.iters.check"); + Value *CheckMinIters = + Builder.CreateICmp(P, Count, + createStepForVF(Builder, Count->getType(), + EPI.EpilogueVF, EPI.EpilogueUF), + "min.epilog.iters.check"); ReplaceInstWithInst( Insert->getTerminator(), @@ -8745,9 +8759,9 @@ VPValue *VPRecipeBuilder::createBlockInMask(BasicBlock *BB, VPlanPtr &Plan) { if (Legal->getPrimaryInduction()) IV = Plan->getOrAddVPValue(Legal->getPrimaryInduction()); else { - auto IVRecipe = new VPWidenCanonicalIVRecipe(); + auto *IVRecipe = new VPWidenCanonicalIVRecipe(); Builder.getInsertBlock()->insert(IVRecipe, NewInsertionPoint); - IV = IVRecipe->getVPSingleValue(); + IV = IVRecipe; } VPValue *BTC = Plan->getOrCreateBackedgeTakenCount(); bool TailFolded = !CM.isScalarEpilogueAllowed(); @@ -9475,16 +9489,20 @@ VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes( if (!RecurPhi) continue; + VPRecipeBase *PrevRecipe = RecurPhi->getBackedgeRecipe(); + VPBasicBlock *InsertBlock = PrevRecipe->getParent(); + auto *Region = GetReplicateRegion(PrevRecipe); + if (Region) + InsertBlock = cast(Region->getSingleSuccessor()); + if (Region || PrevRecipe->isPhi()) + Builder.setInsertPoint(InsertBlock, InsertBlock->getFirstNonPhi()); + else + Builder.setInsertPoint(InsertBlock, std::next(PrevRecipe->getIterator())); + auto *RecurSplice = cast( Builder.createNaryOp(VPInstruction::FirstOrderRecurrenceSplice, {RecurPhi, RecurPhi->getBackedgeValue()})); - VPRecipeBase *PrevRecipe = RecurPhi->getBackedgeRecipe(); - if (auto *Region = GetReplicateRegion(PrevRecipe)) { - VPBasicBlock *Succ = cast(Region->getSingleSuccessor()); - RecurSplice->moveBefore(*Succ, Succ->getFirstNonPhi()); - } else - RecurSplice->moveAfter(PrevRecipe); RecurPhi->replaceAllUsesWith(RecurSplice); // Set the first operand of RecurSplice to RecurPhi again, after replacing // all users. @@ -9776,6 +9794,9 @@ void VPReductionRecipe::execute(VPTransformState &State) { Value *PrevInChain = State.get(getChainOp(), 0); RecurKind Kind = RdxDesc->getRecurrenceKind(); bool IsOrdered = State.ILV->useOrderedReductions(*RdxDesc); + // Propagate the fast-math flags carried by the underlying instruction. + IRBuilderBase::FastMathFlagGuard FMFGuard(State.Builder); + State.Builder.setFastMathFlags(RdxDesc->getFastMathFlags()); for (unsigned Part = 0; Part < State.UF; ++Part) { Value *NewVecOp = State.get(getVecOp(), Part); if (VPValue *Cond = getCondOp()) { @@ -10422,7 +10443,6 @@ bool LoopVectorizePass::processLoop(Loop *L) { F->getParent()->getDataLayout()); if (!VF.Width.isScalar() || IC > 1) Checks.Create(L, *LVL.getLAI(), PSE.getUnionPredicate()); - VPlan &BestPlan = LVP.getBestPlanFor(VF.Width); using namespace ore; if (!VectorizeLoop) { @@ -10431,6 +10451,8 @@ bool LoopVectorizePass::processLoop(Loop *L) { // interleave it. InnerLoopUnroller Unroller(L, PSE, LI, DT, TLI, TTI, AC, ORE, IC, &LVL, &CM, BFI, PSI, Checks); + + VPlan &BestPlan = LVP.getBestPlanFor(VF.Width); LVP.executePlan(VF.Width, IC, BestPlan, Unroller, DT); ORE->emit([&]() { @@ -10454,7 +10476,9 @@ bool LoopVectorizePass::processLoop(Loop *L) { EpilogueVectorizerMainLoop MainILV(L, PSE, LI, DT, TLI, TTI, AC, ORE, EPI, &LVL, &CM, BFI, PSI, Checks); - LVP.executePlan(EPI.MainLoopVF, EPI.MainLoopUF, BestPlan, MainILV, DT); + VPlan &BestMainPlan = LVP.getBestPlanFor(EPI.MainLoopVF); + LVP.executePlan(EPI.MainLoopVF, EPI.MainLoopUF, BestMainPlan, MainILV, + DT); ++LoopsVectorized; simplifyLoop(L, DT, LI, SE, AC, nullptr, false /* PreserveLCSSA */); @@ -10467,7 +10491,9 @@ bool LoopVectorizePass::processLoop(Loop *L) { EpilogueVectorizerEpilogueLoop EpilogILV(L, PSE, LI, DT, TLI, TTI, AC, ORE, EPI, &LVL, &CM, BFI, PSI, Checks); - LVP.executePlan(EPI.EpilogueVF, EPI.EpilogueUF, BestPlan, EpilogILV, + + VPlan &BestEpiPlan = LVP.getBestPlanFor(EPI.EpilogueVF); + LVP.executePlan(EPI.EpilogueVF, EPI.EpilogueUF, BestEpiPlan, EpilogILV, DT); ++LoopsEpilogueVectorized; @@ -10476,6 +10502,8 @@ bool LoopVectorizePass::processLoop(Loop *L) { } else { InnerLoopVectorizer LB(L, PSE, LI, DT, TLI, TTI, AC, ORE, VF.Width, IC, &LVL, &CM, BFI, PSI, Checks); + + VPlan &BestPlan = LVP.getBestPlanFor(VF.Width); LVP.executePlan(VF.Width, IC, BestPlan, LB, DT); ++LoopsVectorized; diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index ec0f1111985d3..4521e502e5248 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -588,7 +588,6 @@ static void addMask(SmallVectorImpl &Mask, ArrayRef SubMask) { /// values 3 and 7 respectively: /// before: 6 9 5 4 9 2 1 0 /// after: 6 3 5 4 7 2 1 0 -/// \returns Fixed ordering. static void fixupOrderingIndices(SmallVectorImpl &Order) { const unsigned Sz = Order.size(); SmallBitVector UsedIndices(Sz); @@ -6683,28 +6682,28 @@ void BoUpSLP::optimizeGatherSequence() { "Worklist not sorted properly!"); BasicBlock *BB = (*I)->getBlock(); // For all instructions in blocks containing gather sequences: - for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e;) { - Instruction *In = &*it++; - if (isDeleted(In)) + for (Instruction &In : llvm::make_early_inc_range(*BB)) { + if (isDeleted(&In)) continue; - if (!isa(In) && !isa(In) && - !isa(In)) + if (!isa(&In) && !isa(&In) && + !isa(&In)) continue; // Check if we can replace this instruction with any of the // visited instructions. + bool Replaced = false; for (Instruction *v : Visited) { - if (In->isIdenticalTo(v) && - DT->dominates(v->getParent(), In->getParent())) { - In->replaceAllUsesWith(v); - eraseInstruction(In); - In = nullptr; + if (In.isIdenticalTo(v) && + DT->dominates(v->getParent(), In.getParent())) { + In.replaceAllUsesWith(v); + eraseInstruction(&In); + Replaced = true; break; } } - if (In) { - assert(!is_contained(Visited, In)); - Visited.push_back(In); + if (!Replaced) { + assert(!is_contained(Visited, &In)); + Visited.push_back(&In); } } } diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp index 7646255b7b134..638467f94e1c1 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp @@ -1264,7 +1264,7 @@ void VPWidenCanonicalIVRecipe::execute(VPTransformState &State) { VF.isScalar() ? Indices.back() : ConstantVector::get(Indices); // Add the consecutive indices to the vector value. Value *CanonicalVectorIV = Builder.CreateAdd(VStart, VStep, "vec.iv"); - State.set(getVPSingleValue(), CanonicalVectorIV, Part); + State.set(this, CanonicalVectorIV, Part); } } @@ -1272,7 +1272,7 @@ void VPWidenCanonicalIVRecipe::execute(VPTransformState &State) { void VPWidenCanonicalIVRecipe::print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const { O << Indent << "EMIT "; - getVPSingleValue()->printAsOperand(O, SlotTracker); + printAsOperand(O, SlotTracker); O << " = WIDEN-CANONICAL-INDUCTION"; } #endif diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index df65541ce7fc5..00ee31007cb7f 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -1593,11 +1593,11 @@ class VPWidenMemoryInstructionRecipe : public VPRecipeBase { }; /// A Recipe for widening the canonical induction variable of the vector loop. -class VPWidenCanonicalIVRecipe : public VPRecipeBase { +class VPWidenCanonicalIVRecipe : public VPRecipeBase, public VPValue { public: - VPWidenCanonicalIVRecipe() : VPRecipeBase(VPWidenCanonicalIVSC, {}) { - new VPValue(nullptr, this); - } + VPWidenCanonicalIVRecipe() + : VPRecipeBase(VPWidenCanonicalIVSC, {}), + VPValue(VPValue::VPVWidenCanonicalIVSC, nullptr, this) {} ~VPWidenCanonicalIVRecipe() override = default; diff --git a/llvm/test/Analysis/BasicAA/assume-index-positive.ll b/llvm/test/Analysis/BasicAA/assume-index-positive.ll index b7ae7060bab40..13f51aba5db70 100644 --- a/llvm/test/Analysis/BasicAA/assume-index-positive.ll +++ b/llvm/test/Analysis/BasicAA/assume-index-positive.ll @@ -145,12 +145,12 @@ define void @shl_of_non_negative(i8* %ptr, i64 %a) { ret void } -; TODO: Unlike the previous case, %ptr.neg and %ptr.shl can't alias, because +; Unlike the previous case, %ptr.neg and %ptr.shl can't alias, because ; shl nsw of non-negative is non-negative. define void @shl_nsw_of_non_negative(i8* %ptr, i64 %a) { ; CHECK-LABEL: Function: shl_nsw_of_non_negative ; CHECK: NoAlias: i8* %ptr.a, i8* %ptr.neg -; CHECK: MayAlias: i8* %ptr.neg, i8* %ptr.shl +; CHECK: NoAlias: i8* %ptr.neg, i8* %ptr.shl %a.cmp = icmp sge i64 %a, 0 call void @llvm.assume(i1 %a.cmp) %ptr.neg = getelementptr i8, i8* %ptr, i64 -2 diff --git a/llvm/test/Analysis/BasicAA/index-size.ll b/llvm/test/Analysis/BasicAA/index-size.ll new file mode 100644 index 0000000000000..da365014dd6bb --- /dev/null +++ b/llvm/test/Analysis/BasicAA/index-size.ll @@ -0,0 +1,18 @@ +; RUN: opt -basic-aa -aa-eval -print-all-alias-modref-info -disable-output %s 2>&1 | FileCheck %s + +target datalayout = "p:64:64:64:32" + +; gep.1 and gep.2 must alias, because they are truncated to the index size +; (32-bit), not the pointer size (64-bit). +define void @mustalias_due_to_index_size(i8* %ptr) { +; CHECK-LABEL: Function: mustalias_due_to_index_size +; CHECK-NEXT: MustAlias: i8* %gep.1, i8* %ptr +; CHECK-NEXT: MustAlias: i8* %gep.2, i8* %ptr +; CHECK-NEXT: MustAlias: i8* %gep.1, i8* %gep.2 +; + %gep.1 = getelementptr i8, i8* %ptr, i64 4294967296 + store i8 0, i8* %gep.1 + %gep.2 = getelementptr i8, i8* %ptr, i64 0 + store i8 1, i8* %gep.2 + ret void +} diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-accesses-with-gaps.ll b/llvm/test/Analysis/CostModel/X86/interleaved-store-accesses-with-gaps.ll new file mode 100644 index 0000000000000..5b7a7bdbadfeb --- /dev/null +++ b/llvm/test/Analysis/CostModel/X86/interleaved-store-accesses-with-gaps.ll @@ -0,0 +1,204 @@ +; RUN: opt -loop-vectorize -enable-interleaved-mem-accesses -prefer-predicate-over-epilogue=predicate-dont-vectorize -S -mcpu=skx --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=DISABLED_MASKED_STRIDED +; RUN: opt -loop-vectorize -enable-interleaved-mem-accesses -enable-masked-interleaved-mem-accesses -prefer-predicate-over-epilogue=predicate-dont-vectorize -S -mcpu=skx --debug-only=loop-vectorize < %s 2>&1 | FileCheck %s --check-prefix=ENABLED_MASKED_STRIDED +; REQUIRES: asserts + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; (1) Interleave-group with factor 4, storing only 2 members out of the 4. +; Check that when we allow masked-memops to support interleave-group with gaps, +; the store is vectorized using a wide masked store, with a 1,1,0,0,1,1,0,0,... mask. +; Check that when we don't allow masked-memops to support interleave-group with gaps, +; the store is scalarized. +; The input IR was generated from this source: +; for(i=0;i<1024;i++){ +; points[i*4] = x[i]; +; points[i*4 + 1] = y[i]; +; } +; (relates to the testcase in PR50566) + +; DISABLED_MASKED_STRIDED: LV: Checking a loop in "test1" +; +; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %0, i16* %arrayidx2, align 2 +; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %2, i16* %arrayidx7, align 2 +; +; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 26 for VF 2 For instruction: store i16 %0, i16* %arrayidx2, align 2 +; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 26 for VF 2 For instruction: store i16 %2, i16* %arrayidx7, align 2 +; +; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 54 for VF 4 For instruction: store i16 %0, i16* %arrayidx2, align 2 +; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 54 for VF 4 For instruction: store i16 %2, i16* %arrayidx7, align 2 +; +; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 110 for VF 8 For instruction: store i16 %0, i16* %arrayidx2, align 2 +; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 110 for VF 8 For instruction: store i16 %2, i16* %arrayidx7, align 2 +; +; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 228 for VF 16 For instruction: store i16 %0, i16* %arrayidx2, align 2 +; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 228 for VF 16 For instruction: store i16 %2, i16* %arrayidx7, align 2 + +; ENABLED_MASKED_STRIDED: LV: Checking a loop in "test1" +; +; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %0, i16* %arrayidx2, align 2 +; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %2, i16* %arrayidx7, align 2 +; +; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 0 for VF 2 For instruction: store i16 %0, i16* %arrayidx2, align 2 +; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 15 for VF 2 For instruction: store i16 %2, i16* %arrayidx7, align 2 +; +; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 0 for VF 4 For instruction: store i16 %0, i16* %arrayidx2, align 2 +; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 21 for VF 4 For instruction: store i16 %2, i16* %arrayidx7, align 2 +; +; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 0 for VF 8 For instruction: store i16 %0, i16* %arrayidx2, align 2 +; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 36 for VF 8 For instruction: store i16 %2, i16* %arrayidx7, align 2 +; +; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 0 for VF 16 For instruction: store i16 %0, i16* %arrayidx2, align 2 +; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 73 for VF 16 For instruction: store i16 %2, i16* %arrayidx7, align 2 + +define void @test1(i16* noalias nocapture %points, i16* noalias nocapture readonly %x, i16* noalias nocapture readonly %y) { +entry: + br label %for.body + +for.body: + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %arrayidx = getelementptr inbounds i16, i16* %x, i64 %indvars.iv + %0 = load i16, i16* %arrayidx, align 2 + %1 = shl nuw nsw i64 %indvars.iv, 2 + %arrayidx2 = getelementptr inbounds i16, i16* %points, i64 %1 + store i16 %0, i16* %arrayidx2, align 2 + %arrayidx4 = getelementptr inbounds i16, i16* %y, i64 %indvars.iv + %2 = load i16, i16* %arrayidx4, align 2 + %3 = or i64 %1, 1 + %arrayidx7 = getelementptr inbounds i16, i16* %points, i64 %3 + store i16 %2, i16* %arrayidx7, align 2 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond.not = icmp eq i64 %indvars.iv.next, 1024 + br i1 %exitcond.not, label %for.end, label %for.body + +for.end: + ret void +} + +; (2) Same as above, but this time the gaps mask of the store is also And-ed with the +; fold-tail mask. If using masked memops to vectorize interleaved-group with gaps is +; not allowed, the store is scalarized and predicated. +; The input IR was generated from this source: +; for(i=0;i 0). +; If using masked memops to vectorize interleaved-group with gaps is +; not allowed, the store is scalarized and predicated. +; Here the Interleave-group is with factor 3, storing only 1 member out of the 3. +; The input IR was generated from this source: +; for(i=0;i<1024;i++){ +; if (x[i] > 0) +; points[i*3] = x[i]; +; } + +; DISABLED_MASKED_STRIDED: LV: Checking a loop in "test" +; +; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %0, i16* %arrayidx6, align 2 +; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 2 for VF 2 For instruction: store i16 %0, i16* %arrayidx6, align 2 +; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 4 for VF 4 For instruction: store i16 %0, i16* %arrayidx6, align 2 +; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 8 for VF 8 For instruction: store i16 %0, i16* %arrayidx6, align 2 +; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 20 for VF 16 For instruction: store i16 %0, i16* %arrayidx6, align 2 + +; ENABLED_MASKED_STRIDED: LV: Checking a loop in "test" +; +; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %0, i16* %arrayidx6, align 2 +; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 2 for VF 2 For instruction: store i16 %0, i16* %arrayidx6, align 2 +; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 4 for VF 4 For instruction: store i16 %0, i16* %arrayidx6, align 2 +; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 8 for VF 8 For instruction: store i16 %0, i16* %arrayidx6, align 2 +; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 20 for VF 16 For instruction: store i16 %0, i16* %arrayidx6, align 2 + +define void @test(i16* noalias nocapture %points, i16* noalias nocapture readonly %x, i16* noalias nocapture readnone %y) { +entry: + br label %for.body + +for.body: + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.inc ] + %arrayidx = getelementptr inbounds i16, i16* %x, i64 %indvars.iv + %0 = load i16, i16* %arrayidx, align 2 + %cmp1 = icmp sgt i16 %0, 0 + br i1 %cmp1, label %if.then, label %for.inc + +if.then: + %1 = mul nuw nsw i64 %indvars.iv, 3 + %arrayidx6 = getelementptr inbounds i16, i16* %points, i64 %1 + store i16 %0, i16* %arrayidx6, align 2 + br label %for.inc + +for.inc: + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond.not = icmp eq i64 %indvars.iv.next, 1024 + br i1 %exitcond.not, label %for.end, label %for.body + +for.end: + ret void +} diff --git a/llvm/test/Analysis/CostModel/X86/shuffle-replication-i32.ll b/llvm/test/Analysis/CostModel/X86/shuffle-replication-i32.ll new file mode 100644 index 0000000000000..702f4eaa4ff66 --- /dev/null +++ b/llvm/test/Analysis/CostModel/X86/shuffle-replication-i32.ll @@ -0,0 +1,465 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py +; RUN: opt < %s -cost-model -mtriple=x86_64-pc-linux-gnu -analyze -mattr=+sse2 | FileCheck %s --check-prefixes=SSE2 +; RUN: opt < %s -cost-model -mtriple=x86_64-pc-linux-gnu -analyze -mattr=+sse3 | FileCheck %s --check-prefixes=SSE3 +; RUN: opt < %s -cost-model -mtriple=x86_64-pc-linux-gnu -analyze -mattr=+ssse3 | FileCheck %s --check-prefixes=SSSE3 +; RUN: opt < %s -cost-model -mtriple=x86_64-pc-linux-gnu -analyze -mattr=+sse4.1| FileCheck %s --check-prefixes=SSE41 +; RUN: opt < %s -cost-model -mtriple=x86_64-pc-linux-gnu -analyze -mattr=+sse4.2 | FileCheck %s --check-prefixes=SSE42 +; RUN: opt < %s -cost-model -mtriple=x86_64-pc-linux-gnu -analyze -mattr=+avx | FileCheck %s --check-prefixes=AVX +; RUN: opt < %s -cost-model -mtriple=x86_64-pc-linux-gnu -analyze -mattr=+avx2 | FileCheck %s --check-prefixes=AVX +; RUN: opt < %s -cost-model -mtriple=x86_64-pc-linux-gnu -analyze -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512 +; RUN: opt < %s -cost-model -mtriple=x86_64-pc-linux-gnu -analyze -mattr=+avx512f,+avx512vl | FileCheck %s --check-prefixes=AVX512 + +define void @replication_i32_stride2() nounwind { +; SSE2-LABEL: 'replication_i32_stride2' +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <32 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 168 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <64 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; SSE3-LABEL: 'replication_i32_stride2' +; SSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <4 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <8 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <16 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <32 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 168 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <64 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; SSSE3-LABEL: 'replication_i32_stride2' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <32 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 168 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <64 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; SSE41-LABEL: 'replication_i32_stride2' +; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <4 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <8 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <16 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <32 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <64 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; SSE42-LABEL: 'replication_i32_stride2' +; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <32 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <64 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; AVX-LABEL: 'replication_i32_stride2' +; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <32 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <64 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; AVX512-LABEL: 'replication_i32_stride2' +; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <32 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <64 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; + %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <4 x i32> + %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <8 x i32> + %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <16 x i32> + %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <32 x i32> + %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <64 x i32> + ret void +} + +define void @replication_i32_stride3() nounwind { +; SSE2-LABEL: 'replication_i32_stride3' +; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <6 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <12 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <24 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 124 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <48 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 248 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <96 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; SSE3-LABEL: 'replication_i32_stride3' +; SSE3-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <6 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <12 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <24 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 124 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <48 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 248 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <96 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; SSSE3-LABEL: 'replication_i32_stride3' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <6 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <12 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <24 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 124 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <48 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 248 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <96 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; SSE41-LABEL: 'replication_i32_stride3' +; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <6 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <12 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <24 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <48 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <96 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; SSE42-LABEL: 'replication_i32_stride3' +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <6 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <12 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <24 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <48 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <96 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; AVX-LABEL: 'replication_i32_stride3' +; AVX-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <6 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <12 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <24 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <48 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 168 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <96 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; AVX512-LABEL: 'replication_i32_stride3' +; AVX512-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <6 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <12 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <24 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <48 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 176 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <96 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; + %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <6 x i32> + %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <12 x i32> + %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <24 x i32> + %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <48 x i32> + %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <96 x i32> + ret void +} + +define void @replication_i32_stride4() nounwind { +; SSE2-LABEL: 'replication_i32_stride4' +; SSE2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <32 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <64 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 280 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <128 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; SSE3-LABEL: 'replication_i32_stride4' +; SSE3-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <8 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <16 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <32 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <64 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 280 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <128 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; SSSE3-LABEL: 'replication_i32_stride4' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <32 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <64 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 280 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <128 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; SSE41-LABEL: 'replication_i32_stride4' +; SSE41-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <8 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <16 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <32 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <64 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <128 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; SSE42-LABEL: 'replication_i32_stride4' +; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <32 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <64 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <128 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; AVX-LABEL: 'replication_i32_stride4' +; AVX-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <32 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 104 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <64 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 208 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <128 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; AVX512-LABEL: 'replication_i32_stride4' +; AVX512-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <32 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <64 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 216 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <128 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; + %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <8 x i32> + %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <16 x i32> + %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <32 x i32> + %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <64 x i32> + %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <128 x i32> + ret void +} + +define void @replication_i32_stride5() nounwind { +; SSE2-LABEL: 'replication_i32_stride5' +; SSE2-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <10 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <20 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 102 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <40 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 204 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <80 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 408 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <160 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; SSE3-LABEL: 'replication_i32_stride5' +; SSE3-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <10 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <20 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 102 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <40 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 204 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <80 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 408 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <160 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; SSSE3-LABEL: 'replication_i32_stride5' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <10 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <20 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 102 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <40 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 204 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <80 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 408 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <160 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; SSE41-LABEL: 'replication_i32_stride5' +; SSE41-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <10 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <20 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <40 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <80 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <160 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; SSE42-LABEL: 'replication_i32_stride5' +; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <10 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <20 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <40 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <80 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <160 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; AVX-LABEL: 'replication_i32_stride5' +; AVX-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <10 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <20 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <40 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 124 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <80 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 248 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <160 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; AVX512-LABEL: 'replication_i32_stride5' +; AVX512-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <10 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <20 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <40 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <80 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <160 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; + %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <10 x i32> + %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <20 x i32> + %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <40 x i32> + %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <80 x i32> + %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <160 x i32> + ret void +} + +define void @replication_i32_stride6() nounwind { +; SSE2-LABEL: 'replication_i32_stride6' +; SSE2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <12 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <24 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <48 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 220 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <96 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 440 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <192 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; SSE3-LABEL: 'replication_i32_stride6' +; SSE3-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <12 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <24 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <48 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 220 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <96 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 440 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <192 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; SSSE3-LABEL: 'replication_i32_stride6' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <12 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <24 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <48 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 220 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <96 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 440 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <192 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; SSE41-LABEL: 'replication_i32_stride6' +; SSE41-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <12 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <24 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <48 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <96 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 224 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <192 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; SSE42-LABEL: 'replication_i32_stride6' +; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <12 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <24 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <48 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <96 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 224 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <192 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; AVX-LABEL: 'replication_i32_stride6' +; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <12 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <24 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <48 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <96 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 288 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <192 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; AVX512-LABEL: 'replication_i32_stride6' +; AVX512-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <12 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <24 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <48 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 148 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <96 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 296 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <192 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; + %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <12 x i32> + %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <24 x i32> + %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <48 x i32> + %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <96 x i32> + %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <192 x i32> + ret void +} + +define void @replication_i32_stride7() nounwind { +; SSE2-LABEL: 'replication_i32_stride7' +; SSE2-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <14 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <28 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 118 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <56 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 236 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <112 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 472 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <224 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; SSE3-LABEL: 'replication_i32_stride7' +; SSE3-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <14 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <28 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 118 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <56 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 236 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <112 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 472 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <224 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; SSSE3-LABEL: 'replication_i32_stride7' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <14 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <28 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 118 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <56 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 236 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <112 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 472 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <224 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; SSE41-LABEL: 'replication_i32_stride7' +; SSE41-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <14 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <28 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <56 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <112 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <224 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; SSE42-LABEL: 'replication_i32_stride7' +; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <14 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <28 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <56 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <112 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <224 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; AVX-LABEL: 'replication_i32_stride7' +; AVX-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <14 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <28 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 82 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <56 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 164 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <112 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 328 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <224 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; AVX512-LABEL: 'replication_i32_stride7' +; AVX512-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <14 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <28 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 82 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <56 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 168 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <112 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 336 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <224 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; + %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <14 x i32> + %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <28 x i32> + %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <56 x i32> + %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <112 x i32> + %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <224 x i32> + ret void +} + +define void @replication_i32_stride8() nounwind { +; SSE2-LABEL: 'replication_i32_stride8' +; SSE2-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <32 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 126 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <64 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 252 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <128 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 504 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <256 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; SSE3-LABEL: 'replication_i32_stride8' +; SSE3-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <16 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <32 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 126 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <64 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 252 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <128 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 504 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <256 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; SSSE3-LABEL: 'replication_i32_stride8' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <32 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 126 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <64 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 252 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <128 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 504 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <256 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; SSE41-LABEL: 'replication_i32_stride8' +; SSE41-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <16 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <32 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <64 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <128 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 288 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <256 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; SSE42-LABEL: 'replication_i32_stride8' +; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <32 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <64 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <128 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 288 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <256 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; AVX-LABEL: 'replication_i32_stride8' +; AVX-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <32 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <64 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 184 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <128 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 368 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <256 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; AVX512-LABEL: 'replication_i32_stride8' +; AVX512-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <32 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <64 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 188 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <128 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 376 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <256 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; + %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <16 x i32> + %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <32 x i32> + %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <64 x i32> + %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <128 x i32> + %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <256 x i32> + ret void +} diff --git a/llvm/test/Analysis/CostModel/X86/shuffle-replication-i64.ll b/llvm/test/Analysis/CostModel/X86/shuffle-replication-i64.ll new file mode 100644 index 0000000000000..73cd261676808 --- /dev/null +++ b/llvm/test/Analysis/CostModel/X86/shuffle-replication-i64.ll @@ -0,0 +1,409 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py +; RUN: opt < %s -cost-model -mtriple=x86_64-pc-linux-gnu -analyze -mattr=+sse2 | FileCheck %s --check-prefixes=SSE2 +; RUN: opt < %s -cost-model -mtriple=x86_64-pc-linux-gnu -analyze -mattr=+sse3 | FileCheck %s --check-prefixes=SSE3 +; RUN: opt < %s -cost-model -mtriple=x86_64-pc-linux-gnu -analyze -mattr=+ssse3 | FileCheck %s --check-prefixes=SSSE3 +; RUN: opt < %s -cost-model -mtriple=x86_64-pc-linux-gnu -analyze -mattr=+sse4.1| FileCheck %s --check-prefixes=SSE41 +; RUN: opt < %s -cost-model -mtriple=x86_64-pc-linux-gnu -analyze -mattr=+sse4.2 | FileCheck %s --check-prefixes=SSE42 +; RUN: opt < %s -cost-model -mtriple=x86_64-pc-linux-gnu -analyze -mattr=+avx | FileCheck %s --check-prefixes=AVX +; RUN: opt < %s -cost-model -mtriple=x86_64-pc-linux-gnu -analyze -mattr=+avx2 | FileCheck %s --check-prefixes=AVX +; RUN: opt < %s -cost-model -mtriple=x86_64-pc-linux-gnu -analyze -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512 +; RUN: opt < %s -cost-model -mtriple=x86_64-pc-linux-gnu -analyze -mattr=+avx512f,+avx512vl | FileCheck %s --check-prefixes=AVX512 + +define void @replication_i64_stride2() nounwind { +; SSE2-LABEL: 'replication_i64_stride2' +; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <32 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; SSE3-LABEL: 'replication_i64_stride2' +; SSE3-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <4 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <8 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <16 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <32 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; SSSE3-LABEL: 'replication_i64_stride2' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <32 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; SSE41-LABEL: 'replication_i64_stride2' +; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <4 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <8 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <16 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <32 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; SSE42-LABEL: 'replication_i64_stride2' +; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <32 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; AVX-LABEL: 'replication_i64_stride2' +; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <32 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; AVX512-LABEL: 'replication_i64_stride2' +; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <32 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; + %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <4 x i32> + %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <8 x i32> + %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <16 x i32> + %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <32 x i32> + ret void +} + +define void @replication_i64_stride3() nounwind { +; SSE2-LABEL: 'replication_i64_stride3' +; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <6 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <12 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <24 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 104 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <48 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; SSE3-LABEL: 'replication_i64_stride3' +; SSE3-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <6 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <12 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <24 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 104 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <48 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; SSSE3-LABEL: 'replication_i64_stride3' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <6 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <12 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <24 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 104 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <48 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; SSE41-LABEL: 'replication_i64_stride3' +; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <6 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <12 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <24 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <48 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; SSE42-LABEL: 'replication_i64_stride3' +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <6 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <12 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <24 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <48 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; AVX-LABEL: 'replication_i64_stride3' +; AVX-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <6 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <12 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <24 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <48 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; AVX512-LABEL: 'replication_i64_stride3' +; AVX512-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <6 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <12 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <24 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <48 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; + %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <6 x i32> + %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <12 x i32> + %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <24 x i32> + %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <48 x i32> + ret void +} + +define void @replication_i64_stride4() nounwind { +; SSE2-LABEL: 'replication_i64_stride4' +; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <32 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <64 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; SSE3-LABEL: 'replication_i64_stride4' +; SSE3-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <8 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <16 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <32 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <64 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; SSSE3-LABEL: 'replication_i64_stride4' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <32 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <64 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; SSE41-LABEL: 'replication_i64_stride4' +; SSE41-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <8 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <16 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <32 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <64 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; SSE42-LABEL: 'replication_i64_stride4' +; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <32 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <64 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; AVX-LABEL: 'replication_i64_stride4' +; AVX-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <32 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <64 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; AVX512-LABEL: 'replication_i64_stride4' +; AVX512-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <32 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 124 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <64 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; + %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <8 x i32> + %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <16 x i32> + %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <32 x i32> + %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <64 x i32> + ret void +} + +define void @replication_i64_stride5() nounwind { +; SSE2-LABEL: 'replication_i64_stride5' +; SSE2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <10 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <20 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <40 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 168 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <80 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; SSE3-LABEL: 'replication_i64_stride5' +; SSE3-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <10 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <20 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <40 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 168 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <80 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; SSSE3-LABEL: 'replication_i64_stride5' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <10 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <20 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <40 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 168 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <80 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; SSE41-LABEL: 'replication_i64_stride5' +; SSE41-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <10 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <20 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <40 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <80 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; SSE42-LABEL: 'replication_i64_stride5' +; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <10 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <20 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <40 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <80 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; AVX-LABEL: 'replication_i64_stride5' +; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <10 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <20 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <40 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <80 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; AVX512-LABEL: 'replication_i64_stride5' +; AVX512-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <10 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <20 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <40 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 148 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <80 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; + %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <10 x i32> + %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <20 x i32> + %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <40 x i32> + %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <80 x i32> + ret void +} + +define void @replication_i64_stride6() nounwind { +; SSE2-LABEL: 'replication_i64_stride6' +; SSE2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <12 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <24 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <48 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 184 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <96 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; SSE3-LABEL: 'replication_i64_stride6' +; SSE3-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <12 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <24 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <48 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 184 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <96 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; SSSE3-LABEL: 'replication_i64_stride6' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <12 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <24 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <48 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 184 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <96 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; SSE41-LABEL: 'replication_i64_stride6' +; SSE41-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <12 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <24 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <48 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <96 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; SSE42-LABEL: 'replication_i64_stride6' +; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <12 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <24 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <48 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <96 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; AVX-LABEL: 'replication_i64_stride6' +; AVX-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <12 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <24 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <48 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 168 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <96 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; AVX512-LABEL: 'replication_i64_stride6' +; AVX512-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <12 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <24 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 86 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <48 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 172 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <96 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; + %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <12 x i32> + %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <24 x i32> + %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <48 x i32> + %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <96 x i32> + ret void +} + +define void @replication_i64_stride7() nounwind { +; SSE2-LABEL: 'replication_i64_stride7' +; SSE2-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <14 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <28 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <56 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 200 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <112 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; SSE3-LABEL: 'replication_i64_stride7' +; SSE3-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <14 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <28 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <56 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 200 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <112 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; SSSE3-LABEL: 'replication_i64_stride7' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <14 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <28 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <56 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 200 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <112 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; SSE41-LABEL: 'replication_i64_stride7' +; SSE41-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <14 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <28 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <56 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <112 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; SSE42-LABEL: 'replication_i64_stride7' +; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <14 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <28 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <56 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <112 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; AVX-LABEL: 'replication_i64_stride7' +; AVX-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <14 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <28 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <56 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <112 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; AVX512-LABEL: 'replication_i64_stride7' +; AVX512-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <14 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <28 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 98 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <56 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 196 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <112 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; + %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <14 x i32> + %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <28 x i32> + %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <56 x i32> + %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <112 x i32> + ret void +} + +define void @replication_i64_stride8() nounwind { +; SSE2-LABEL: 'replication_i64_stride8' +; SSE2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <32 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <64 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 216 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <128 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; SSE3-LABEL: 'replication_i64_stride8' +; SSE3-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <16 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <32 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <64 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 216 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <128 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; SSSE3-LABEL: 'replication_i64_stride8' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <32 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <64 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 216 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <128 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; SSE41-LABEL: 'replication_i64_stride8' +; SSE41-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <16 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <32 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <64 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <128 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; SSE42-LABEL: 'replication_i64_stride8' +; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <32 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <64 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <128 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; AVX-LABEL: 'replication_i64_stride8' +; AVX-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <32 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <64 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 216 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <128 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; AVX512-LABEL: 'replication_i64_stride8' +; AVX512-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <32 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <64 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 220 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <128 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; + %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <16 x i32> + %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <32 x i32> + %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <64 x i32> + %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <128 x i32> + ret void +} diff --git a/llvm/test/Analysis/CostModel/X86/shuffle-replication-i8.ll b/llvm/test/Analysis/CostModel/X86/shuffle-replication-i8.ll new file mode 100644 index 0000000000000..7e3e59749643b --- /dev/null +++ b/llvm/test/Analysis/CostModel/X86/shuffle-replication-i8.ll @@ -0,0 +1,579 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py +; RUN: opt < %s -cost-model -mtriple=x86_64-pc-linux-gnu -analyze -mattr=+sse2 | FileCheck %s --check-prefixes=SSE2 +; RUN: opt < %s -cost-model -mtriple=x86_64-pc-linux-gnu -analyze -mattr=+sse3 | FileCheck %s --check-prefixes=SSE3 +; RUN: opt < %s -cost-model -mtriple=x86_64-pc-linux-gnu -analyze -mattr=+ssse3 | FileCheck %s --check-prefixes=SSSE3 +; RUN: opt < %s -cost-model -mtriple=x86_64-pc-linux-gnu -analyze -mattr=+sse4.1| FileCheck %s --check-prefixes=SSE41 +; RUN: opt < %s -cost-model -mtriple=x86_64-pc-linux-gnu -analyze -mattr=+sse4.2 | FileCheck %s --check-prefixes=SSE42 +; RUN: opt < %s -cost-model -mtriple=x86_64-pc-linux-gnu -analyze -mattr=+avx | FileCheck %s --check-prefixes=AVX +; RUN: opt < %s -cost-model -mtriple=x86_64-pc-linux-gnu -analyze -mattr=+avx2 | FileCheck %s --check-prefixes=AVX +; RUN: opt < %s -cost-model -mtriple=x86_64-pc-linux-gnu -analyze -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512 +; RUN: opt < %s -cost-model -mtriple=x86_64-pc-linux-gnu -analyze -mattr=+avx512f,+avx512vl | FileCheck %s --check-prefixes=AVX512 +; RUN: opt < %s -cost-model -mtriple=x86_64-pc-linux-gnu -analyze -mattr=+avx512f,+avx512vbmi | FileCheck %s --check-prefixes=AVX512 +; RUN: opt < %s -cost-model -mtriple=x86_64-pc-linux-gnu -analyze -mattr=+avx512f,+avx512vl,+avx512vbmi | FileCheck %s --check-prefixes=AVX512 + +define void @replication_i8_stride2() nounwind { +; SSE2-LABEL: 'replication_i8_stride2' +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 93 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <32 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 186 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <64 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 372 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <128 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 744 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <256 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; SSE3-LABEL: 'replication_i8_stride2' +; SSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <4 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <8 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <16 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 93 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <32 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 186 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <64 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 372 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <128 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 744 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <256 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; SSSE3-LABEL: 'replication_i8_stride2' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 93 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <32 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 186 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <64 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 372 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <128 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 744 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <256 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; SSE41-LABEL: 'replication_i8_stride2' +; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <4 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <8 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <16 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <32 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <64 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <128 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 384 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <256 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; SSE42-LABEL: 'replication_i8_stride2' +; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <32 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <64 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <128 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 384 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <256 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; AVX-LABEL: 'replication_i8_stride2' +; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <32 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 116 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <64 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 232 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <128 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 464 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <256 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; AVX512-LABEL: 'replication_i8_stride2' +; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <32 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 116 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <64 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 248 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <128 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 496 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <256 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; + %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <4 x i32> + %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <8 x i32> + %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <16 x i32> + %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <32 x i32> + %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <64 x i32> + %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <128 x i32> + %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <256 x i32> + ret void +} + +define void @replication_i8_stride3() nounwind { +; SSE2-LABEL: 'replication_i8_stride3' +; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <6 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <12 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 69 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <24 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 139 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <48 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 278 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <96 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 556 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <192 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1112 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <384 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; SSE3-LABEL: 'replication_i8_stride3' +; SSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <6 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <12 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 69 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <24 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 139 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <48 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 278 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <96 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 556 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <192 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 1112 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <384 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; SSSE3-LABEL: 'replication_i8_stride3' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <6 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <12 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 69 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <24 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 139 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <48 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 278 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <96 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 556 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <192 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1112 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <384 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; SSE41-LABEL: 'replication_i8_stride3' +; SSE41-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <6 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <12 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <24 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <48 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <96 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <192 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 512 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <384 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; SSE42-LABEL: 'replication_i8_stride3' +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <6 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <12 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <24 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <48 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <96 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <192 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 512 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <384 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; AVX-LABEL: 'replication_i8_stride3' +; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <6 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <12 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <24 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <48 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 150 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <96 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 300 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <192 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 600 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <384 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; AVX512-LABEL: 'replication_i8_stride3' +; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <6 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <12 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <24 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <48 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 150 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <96 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 316 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <192 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 632 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <384 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; + %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <6 x i32> + %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <12 x i32> + %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <24 x i32> + %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <48 x i32> + %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <96 x i32> + %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <192 x i32> + %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <384 x i32> + ret void +} + +define void @replication_i8_stride4() nounwind { +; SSE2-LABEL: 'replication_i8_stride4' +; SSE2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 77 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <32 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 155 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <64 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 310 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <128 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 620 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <256 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1240 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <512 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; SSE3-LABEL: 'replication_i8_stride4' +; SSE3-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <8 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <16 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 77 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <32 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 155 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <64 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 310 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <128 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 620 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <256 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 1240 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <512 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; SSSE3-LABEL: 'replication_i8_stride4' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 77 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <32 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 155 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <64 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 310 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <128 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 620 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <256 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1240 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <512 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; SSE41-LABEL: 'replication_i8_stride4' +; SSE41-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <8 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <16 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <32 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <64 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <128 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <256 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <512 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; SSE42-LABEL: 'replication_i8_stride4' +; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <32 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <64 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <128 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <256 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <512 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; AVX-LABEL: 'replication_i8_stride4' +; AVX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <32 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <64 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 184 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <128 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 368 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <256 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 736 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <512 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; AVX512-LABEL: 'replication_i8_stride4' +; AVX512-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <32 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <64 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 184 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <128 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 384 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <256 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 768 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <512 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; + %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <8 x i32> + %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <16 x i32> + %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <32 x i32> + %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <64 x i32> + %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <128 x i32> + %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <256 x i32> + %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <512 x i32> + ret void +} + +define void @replication_i8_stride5() nounwind { +; SSE2-LABEL: 'replication_i8_stride5' +; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <10 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 57 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <20 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 115 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <40 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 231 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <80 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 462 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <160 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 924 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <320 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1848 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <640 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; SSE3-LABEL: 'replication_i8_stride5' +; SSE3-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <10 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 57 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <20 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 115 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <40 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 231 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <80 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 462 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <160 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 924 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <320 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 1848 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <640 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; SSSE3-LABEL: 'replication_i8_stride5' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <10 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 57 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <20 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 115 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <40 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 231 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <80 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 462 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <160 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 924 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <320 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1848 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <640 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; SSE41-LABEL: 'replication_i8_stride5' +; SSE41-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <10 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <20 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <40 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <80 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <160 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 384 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <320 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 768 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <640 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; SSE42-LABEL: 'replication_i8_stride5' +; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <10 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <20 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <40 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <80 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <160 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 384 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <320 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 768 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <640 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; AVX-LABEL: 'replication_i8_stride5' +; AVX-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <10 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <20 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <40 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 101 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <80 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 218 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <160 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 436 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <320 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 872 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <640 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; AVX512-LABEL: 'replication_i8_stride5' +; AVX512-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <10 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <20 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <40 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 101 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <80 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 218 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <160 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 452 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <320 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 904 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <640 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; + %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <10 x i32> + %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <20 x i32> + %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <40 x i32> + %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <80 x i32> + %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <160 x i32> + %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <320 x i32> + %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <640 x i32> + ret void +} + +define void @replication_i8_stride6() nounwind { +; SSE2-LABEL: 'replication_i8_stride6' +; SSE2-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <12 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <24 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 123 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <48 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 247 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <96 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 494 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <192 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 988 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <384 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1976 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <768 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; SSE3-LABEL: 'replication_i8_stride6' +; SSE3-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <12 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <24 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 123 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <48 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 247 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <96 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 494 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <192 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 988 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <384 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 1976 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <768 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; SSSE3-LABEL: 'replication_i8_stride6' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <12 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <24 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 123 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <48 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 247 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <96 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 494 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <192 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 988 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <384 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1976 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <768 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; SSE41-LABEL: 'replication_i8_stride6' +; SSE41-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <12 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <24 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <48 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <96 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 224 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <192 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 448 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <384 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 896 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <768 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; SSE42-LABEL: 'replication_i8_stride6' +; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <12 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <24 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <48 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <96 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 224 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <192 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 448 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <384 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 896 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <768 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; AVX-LABEL: 'replication_i8_stride6' +; AVX-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <12 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <24 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <48 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 118 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <96 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 252 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <192 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 504 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <384 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1008 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <768 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; AVX512-LABEL: 'replication_i8_stride6' +; AVX512-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <12 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <24 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <48 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 118 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <96 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 252 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <192 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 520 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <384 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1040 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <768 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; + %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <12 x i32> + %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <24 x i32> + %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <48 x i32> + %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <96 x i32> + %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <192 x i32> + %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <384 x i32> + %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <768 x i32> + ret void +} + +define void @replication_i8_stride7() nounwind { +; SSE2-LABEL: 'replication_i8_stride7' +; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <14 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <28 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 131 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <56 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 263 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <112 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 526 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <224 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1052 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <448 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2104 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <896 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; SSE3-LABEL: 'replication_i8_stride7' +; SSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <14 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <28 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 131 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <56 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 263 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <112 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 526 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <224 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 1052 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <448 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 2104 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <896 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; SSSE3-LABEL: 'replication_i8_stride7' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <14 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <28 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 131 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <56 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 263 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <112 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 526 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <224 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1052 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <448 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2104 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <896 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; SSE41-LABEL: 'replication_i8_stride7' +; SSE41-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <14 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <28 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <56 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <112 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <224 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 512 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <448 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 1024 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <896 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; SSE42-LABEL: 'replication_i8_stride7' +; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <14 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <28 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <56 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <112 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <224 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 512 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <448 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1024 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <896 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; AVX-LABEL: 'replication_i8_stride7' +; AVX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <14 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <28 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 69 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <56 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 135 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <112 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 286 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <224 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 572 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <448 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1144 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <896 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; AVX512-LABEL: 'replication_i8_stride7' +; AVX512-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <14 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <28 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 69 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <56 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 135 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <112 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 286 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <224 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 588 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <448 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1176 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <896 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; + %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <14 x i32> + %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <28 x i32> + %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <56 x i32> + %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <112 x i32> + %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <224 x i32> + %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <448 x i32> + %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <896 x i32> + ret void +} + +define void @replication_i8_stride8() nounwind { +; SSE2-LABEL: 'replication_i8_stride8' +; SSE2-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 69 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <32 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 139 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <64 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 279 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <128 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 558 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <256 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1116 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <512 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2232 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <1024 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; SSE3-LABEL: 'replication_i8_stride8' +; SSE3-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <16 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 69 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <32 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 139 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <64 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 279 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <128 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 558 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <256 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 1116 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <512 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 2232 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <1024 x i32> +; SSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; SSSE3-LABEL: 'replication_i8_stride8' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 69 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <32 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 139 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <64 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 279 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <128 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 558 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <256 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1116 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <512 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2232 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <1024 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; SSE41-LABEL: 'replication_i8_stride8' +; SSE41-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <16 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <32 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <64 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <128 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 288 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <256 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 576 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <512 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 1152 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <1024 x i32> +; SSE41-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; SSE42-LABEL: 'replication_i8_stride8' +; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <32 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <64 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <128 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 288 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <256 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 576 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <512 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1152 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <1024 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; AVX-LABEL: 'replication_i8_stride8' +; AVX-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <32 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <64 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 152 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <128 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <256 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <512 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1280 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <1024 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; AVX512-LABEL: 'replication_i8_stride8' +; AVX512-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <32 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <64 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 152 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <128 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <256 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 656 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <512 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1312 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <1024 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; + %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <16 x i32> + %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <32 x i32> + %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <64 x i32> + %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <128 x i32> + %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <256 x i32> + %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <512 x i32> + %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <1024 x i32> + ret void +} diff --git a/llvm/test/Analysis/LazyCallGraph/blockaddress.ll b/llvm/test/Analysis/LazyCallGraph/blockaddress.ll new file mode 100644 index 0000000000000..cf2d00bce6db8 --- /dev/null +++ b/llvm/test/Analysis/LazyCallGraph/blockaddress.ll @@ -0,0 +1,28 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -passes="cgscc(function(sccp,simplifycfg))" < %s -S | FileCheck %s + +define i32 @baz(i32 %y, i1 %b) { +; CHECK-LABEL: @baz( +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 [[B:%.*]], label [[LAB:%.*]], label [[FOR_COND:%.*]] +; CHECK: for.cond: +; CHECK-NEXT: [[P_0:%.*]] = phi i8* [ null, [[FOR_COND]] ], [ blockaddress(@baz, [[LAB]]), [[ENTRY:%.*]] ] +; CHECK-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds i8, i8* [[P_0]], i64 1 +; CHECK-NEXT: br label [[FOR_COND]] +; CHECK: lab: +; CHECK-NEXT: ret i32 0 +; +entry: + br i1 %b, label %lab, label %for.cond.preheader + +for.cond.preheader: + br label %for.cond + +for.cond: + %p.0 = phi i8* [ null, %for.cond ], [ blockaddress(@baz, %lab), %for.cond.preheader ] + %incdec.ptr = getelementptr inbounds i8, i8* %p.0, i64 1 + br label %for.cond + +lab: + ret i32 0 +} diff --git a/llvm/test/Analysis/ScalarEvolution/trip-count-implied-addrec.ll b/llvm/test/Analysis/ScalarEvolution/trip-count-implied-addrec.ll index f7e978e1faf4d..1d9babddedc3a 100644 --- a/llvm/test/Analysis/ScalarEvolution/trip-count-implied-addrec.ll +++ b/llvm/test/Analysis/ScalarEvolution/trip-count-implied-addrec.ll @@ -279,11 +279,11 @@ for.end: ; preds = %for.body, %entry define void @rhs_narrow_range(i16 %n.raw) { ; CHECK-LABEL: 'rhs_narrow_range' ; CHECK-NEXT: Determining loop execution counts for: @rhs_narrow_range -; CHECK-NEXT: Loop %for.body: Unpredictable backedge-taken count. -; CHECK-NEXT: Loop %for.body: Unpredictable max backedge-taken count. +; CHECK-NEXT: Loop %for.body: backedge-taken count is (-1 + (1 umax (2 * (zext i7 (trunc i16 (%n.raw /u 2) to i7) to i16)))) +; CHECK-NEXT: Loop %for.body: max backedge-taken count is 253 ; CHECK-NEXT: Loop %for.body: Predicated backedge-taken count is (-1 + (1 umax (2 * (zext i7 (trunc i16 (%n.raw /u 2) to i7) to i16)))) ; CHECK-NEXT: Predicates: -; CHECK-NEXT: {1,+,1}<%for.body> Added Flags: +; CHECK: Loop %for.body: Trip multiple is 1 ; entry: %n = and i16 %n.raw, 254 @@ -301,6 +301,150 @@ for.end: ; preds = %for.body, %entry ret void } +define void @ugt_constant_rhs(i16 %n.raw, i8 %start) mustprogress { +; +; CHECK-LABEL: 'ugt_constant_rhs' +; CHECK-NEXT: Determining loop execution counts for: @ugt_constant_rhs +; CHECK-NEXT: Loop %for.body: Unpredictable backedge-taken count. +; CHECK-NEXT: Loop %for.body: Unpredictable max backedge-taken count. +; CHECK-NEXT: Loop %for.body: Unpredictable predicated backedge-taken count. +; +entry: + br label %for.body + +for.body: ; preds = %entry, %for.body + %iv = phi i8 [ %iv.next, %for.body ], [ %start, %entry ] + %iv.next = add i8 %iv, 1 + %zext = zext i8 %iv.next to i16 + %cmp = icmp ugt i16 %zext, 254 + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %for.body, %entry + ret void +} + +define void @ult_constant_rhs(i16 %n.raw, i8 %start) { +; +; CHECK-LABEL: 'ult_constant_rhs' +; CHECK-NEXT: Determining loop execution counts for: @ult_constant_rhs +; CHECK-NEXT: Loop %for.body: backedge-taken count is (255 + (-1 * (zext i8 (1 + %start) to i16))) +; CHECK-NEXT: Loop %for.body: max backedge-taken count is 255 +; CHECK-NEXT: Loop %for.body: Predicated backedge-taken count is (255 + (-1 * (zext i8 (1 + %start) to i16))) +; CHECK-NEXT: Predicates: +; CHECK: Loop %for.body: Trip multiple is 1 +; +entry: + br label %for.body + +for.body: ; preds = %entry, %for.body + %iv = phi i8 [ %iv.next, %for.body ], [ %start, %entry ] + %iv.next = add i8 %iv, 1 + %zext = zext i8 %iv.next to i16 + %cmp = icmp ult i16 %zext, 255 + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %for.body, %entry + ret void +} + +define void @ult_constant_rhs_stride2(i16 %n.raw, i8 %start) { +; +; CHECK-LABEL: 'ult_constant_rhs_stride2' +; CHECK-NEXT: Determining loop execution counts for: @ult_constant_rhs_stride2 +; CHECK-NEXT: Loop %for.body: backedge-taken count is ((1 + (-1 * (zext i8 (2 + %start) to i16)) + (254 umax (zext i8 (2 + %start) to i16))) /u 2) +; CHECK-NEXT: Loop %for.body: max backedge-taken count is 127 +; CHECK-NEXT: Loop %for.body: Predicated backedge-taken count is ((1 + (-1 * (zext i8 (2 + %start) to i16)) + (254 umax (zext i8 (2 + %start) to i16))) /u 2) +; CHECK-NEXT: Predicates: +; CHECK: Loop %for.body: Trip multiple is 1 +; +entry: + br label %for.body + +for.body: ; preds = %entry, %for.body + %iv = phi i8 [ %iv.next, %for.body ], [ %start, %entry ] + %iv.next = add i8 %iv, 2 + %zext = zext i8 %iv.next to i16 + %cmp = icmp ult i16 %zext, 254 + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %for.body, %entry + ret void +} + +define void @ult_constant_rhs_stride2_neg(i16 %n.raw, i8 %start) { +; +; CHECK-LABEL: 'ult_constant_rhs_stride2_neg' +; CHECK-NEXT: Determining loop execution counts for: @ult_constant_rhs_stride2_neg +; CHECK-NEXT: Loop %for.body: Unpredictable backedge-taken count. +; CHECK-NEXT: Loop %for.body: Unpredictable max backedge-taken count. +; CHECK-NEXT: Loop %for.body: Predicated backedge-taken count is ((256 + (-1 * (zext i8 (2 + %start) to i16))) /u 2) +; CHECK-NEXT: Predicates: +; CHECK-NEXT: {(2 + %start),+,2}<%for.body> Added Flags: +; +entry: + br label %for.body + +for.body: ; preds = %entry, %for.body + %iv = phi i8 [ %iv.next, %for.body ], [ %start, %entry ] + %iv.next = add i8 %iv, 2 + %zext = zext i8 %iv.next to i16 + %cmp = icmp ult i16 %zext, 255 + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %for.body, %entry + ret void +} + + +define void @ult_restricted_rhs(i16 %n.raw) { +; CHECK-LABEL: 'ult_restricted_rhs' +; CHECK-NEXT: Determining loop execution counts for: @ult_restricted_rhs +; CHECK-NEXT: Loop %for.body: backedge-taken count is (-1 + (1 umax (zext i8 (trunc i16 %n.raw to i8) to i16))) +; CHECK-NEXT: Loop %for.body: max backedge-taken count is 254 +; CHECK-NEXT: Loop %for.body: Predicated backedge-taken count is (-1 + (1 umax (zext i8 (trunc i16 %n.raw to i8) to i16))) +; CHECK-NEXT: Predicates: +; CHECK: Loop %for.body: Trip multiple is 1 +; +entry: + %n = and i16 %n.raw, 255 + br label %for.body + +for.body: ; preds = %entry, %for.body + %iv = phi i8 [ %iv.next, %for.body ], [ 0, %entry ] + %iv.next = add i8 %iv, 1 + %zext = zext i8 %iv.next to i16 + %cmp = icmp ult i16 %zext, %n + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %for.body, %entry + ret void +} + +define void @ult_guarded_rhs(i16 %n) {; +; CHECK-LABEL: 'ult_guarded_rhs' +; CHECK-NEXT: Determining loop execution counts for: @ult_guarded_rhs +; CHECK-NEXT: Loop %for.body: backedge-taken count is (-1 + (1 umax %n)) +; CHECK-NEXT: Loop %for.body: max backedge-taken count is -2 +; CHECK-NEXT: Loop %for.body: Predicated backedge-taken count is (-1 + (1 umax %n)) +; CHECK-NEXT: Predicates: +; CHECK: Loop %for.body: Trip multiple is 1 +; +entry: + %in_range = icmp ult i16 %n, 256 + br i1 %in_range, label %for.body, label %for.end + +for.body: ; preds = %entry, %for.body + %iv = phi i8 [ %iv.next, %for.body ], [ 0, %entry ] + %iv.next = add i8 %iv, 1 + %zext = zext i8 %iv.next to i16 + %cmp = icmp ult i16 %zext, %n + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %for.body, %entry + ret void +} + + declare void @llvm.assume(i1) diff --git a/llvm/test/Assembler/ifunc-asm.ll b/llvm/test/Assembler/ifunc-asm.ll index bef243a25dd0a..e32587fdc25a8 100644 --- a/llvm/test/Assembler/ifunc-asm.ll +++ b/llvm/test/Assembler/ifunc-asm.ll @@ -2,11 +2,20 @@ target triple = "x86_64-unknown-linux-gnu" -@foo = ifunc i32 (i32), i64 ()* @foo_ifunc -; CHECK: @foo = ifunc i32 (i32), i64 ()* @foo_ifunc +@foo = ifunc i32 (i32), i32 (i32)* ()* @foo_ifunc +; CHECK: @foo = ifunc i32 (i32), i32 (i32)* ()* @foo_ifunc -define internal i64 @foo_ifunc() { +@strlen = ifunc i64 (i8*), bitcast (i64 (i32*)* ()* @mistyped_strlen_resolver to i64 (i8*)* ()*) +; CHECK: strlen = ifunc i64 (i8*), bitcast (i64 (i32*)* ()* @mistyped_strlen_resolver to i64 (i8*)* ()*) + +define internal i32 (i32)* @foo_ifunc() { +entry: + ret i32 (i32)* null +} +; CHECK: define internal i32 (i32)* @foo_ifunc() + +define internal i64 (i32*)* @mistyped_strlen_resolver() { entry: - ret i64 0 + ret i64 (i32*)* null } -; CHECK: define internal i64 @foo_ifunc() +; CHECK: define internal i64 (i32*)* @mistyped_strlen_resolver() diff --git a/llvm/test/Assembler/ifunc-dsolocal.ll b/llvm/test/Assembler/ifunc-dsolocal.ll index 63242cb3f24fb..f8e2c3af1500e 100644 --- a/llvm/test/Assembler/ifunc-dsolocal.ll +++ b/llvm/test/Assembler/ifunc-dsolocal.ll @@ -1,9 +1,9 @@ ; RUN: llvm-as < %s | llvm-dis | FileCheck %s -@foo = dso_local ifunc i32 (i32), i64 ()* @foo_ifunc -; CHECK: @foo = dso_local ifunc i32 (i32), i64 ()* @foo_ifunc +@foo = dso_local ifunc i32 (i32), i32 (i32)* ()* @foo_ifunc +; CHECK: @foo = dso_local ifunc i32 (i32), i32 (i32)* ()* @foo_ifunc -define internal i64 @foo_ifunc() { +define internal i32 (i32)* @foo_ifunc() { entry: - ret i64 0 + ret i32 (i32)* null } diff --git a/llvm/test/Assembler/ifunc-use-list-order.ll b/llvm/test/Assembler/ifunc-use-list-order.ll index efd8dcc3f8791..167406a3be255 100644 --- a/llvm/test/Assembler/ifunc-use-list-order.ll +++ b/llvm/test/Assembler/ifunc-use-list-order.ll @@ -6,11 +6,11 @@ ; Alias for ifunc. @alias_foo = alias void (), void ()* @foo_ifunc -@foo_ifunc = ifunc void (), i8* ()* @foo_resolver +@foo_ifunc = ifunc void (), void ()* ()* @foo_resolver -define i8* @foo_resolver() { +define void ()* @foo_resolver() { entry: - ret i8* null + ret void ()* null } ; Function referencing ifunc. @@ -26,12 +26,11 @@ entry: ; Alias for function. @alias_bar = alias void (), void ()* @bar -@bar_ifunc = ifunc void (), i8* ()* @bar2_ifunc -@bar2_ifunc = ifunc i8* (), i8* ()* @bar_resolver +@bar_ifunc = ifunc void (), void ()* ()* @bar_resolver -define i8* @bar_resolver() { +define void ()* @bar_resolver() { entry: - ret i8* null + ret void ()* null } ; Function referencing bar. diff --git a/llvm/test/Bindings/llvm-c/echo.ll b/llvm/test/Bindings/llvm-c/echo.ll index 64e516c1970fc..c2fc7b108bf3b 100644 --- a/llvm/test/Bindings/llvm-c/echo.ll +++ b/llvm/test/Bindings/llvm-c/echo.ll @@ -29,11 +29,11 @@ module asm "classical GAS" @aliased4 = weak alias i32, i32* @var @aliased5 = weak_odr alias i32, i32* @var -@ifunc = ifunc i32 (i32), i64 ()* @ifunc_resolver +@ifunc = ifunc i32 (i32), i32 (i32)* ()* @ifunc_resolver -define i64 @ifunc_resolver() { +define i32 (i32)* @ifunc_resolver() { entry: - ret i64 0 + ret i32 (i32)* null } define { i64, %S* } @unpackrepack(%S %s) { diff --git a/llvm/test/Bitcode/attr-btf_type_tag.ll b/llvm/test/Bitcode/attr-btf_type_tag.ll new file mode 100644 index 0000000000000..ab0634bb147a0 --- /dev/null +++ b/llvm/test/Bitcode/attr-btf_type_tag.ll @@ -0,0 +1,36 @@ +; RUN: llvm-as < %s | llvm-dis | FileCheck %s +; +; Source: +; #define __tag1 __attribute__((btf_type_tag("tag1"))) +; int __tag1 *g; +; Compilation flag: +; clang -S -g -emit-llvm test.c + +@g = dso_local global i32* null, align 8, !dbg !0 + +!llvm.dbg.cu = !{!2} +!llvm.module.flags = !{!9, !10, !11, !12, !13} +!llvm.ident = !{!14} + +!0 = !DIGlobalVariableExpression(var: !1, expr: !DIExpression()) +!1 = distinct !DIGlobalVariable(name: "g", scope: !2, file: !3, line: 2, type: !5, isLocal: false, isDefinition: true) +!2 = distinct !DICompileUnit(language: DW_LANG_C99, file: !3, producer: "clang version 14.0.0 (https://github.com/llvm/llvm-project.git 248122328bfefe82608a2e110af3a3ff04279ddf)", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, globals: !4, splitDebugInlining: false, nameTableKind: None) +!3 = !DIFile(filename: "test.c", directory: "/tmp/home/yhs/work/tests/llvm/btf_tag_type") +!4 = !{!0} +!5 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !6, size: 64, annotations: !7) +!6 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) +!7 = !{!8} +!8 = !{!"btf_type_tag", !"tag1"} + +; CHECK: distinct !DIGlobalVariable(name: "g", scope: !2, file: !3, line: 2, type: !5 +; CHECK: !5 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !6, size: 64, annotations: !7) +; CHECK-NEXT: !6 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) +; CHECK-NEXT: !7 = !{!8} +; CHECK-NEXT: !8 = !{!"btf_type_tag", !"tag1"} + +!9 = !{i32 7, !"Dwarf Version", i32 4} +!10 = !{i32 2, !"Debug Info Version", i32 3} +!11 = !{i32 1, !"wchar_size", i32 4} +!12 = !{i32 7, !"uwtable", i32 1} +!13 = !{i32 7, !"frame-pointer", i32 2} +!14 = !{!"clang version 14.0.0 (https://github.com/llvm/llvm-project.git 248122328bfefe82608a2e110af3a3ff04279ddf)"} diff --git a/llvm/test/Bitcode/compatibility-3.9.ll b/llvm/test/Bitcode/compatibility-3.9.ll index 6c0d827f80625..a203717993f21 100644 --- a/llvm/test/Bitcode/compatibility-3.9.ll +++ b/llvm/test/Bitcode/compatibility-3.9.ll @@ -256,19 +256,19 @@ declare void @g.f1() ; IFunc -- Linkage @ifunc.external = external ifunc void (), i8* ()* @ifunc_resolver -; CHECK: @ifunc.external = ifunc void (), i8* ()* @ifunc_resolver +; CHECK: @ifunc.external = ifunc void (), bitcast (i8* ()* @ifunc_resolver to void ()* ()*) @ifunc.private = private ifunc void (), i8* ()* @ifunc_resolver -; CHECK: @ifunc.private = private ifunc void (), i8* ()* @ifunc_resolver +; CHECK: @ifunc.private = private ifunc void (), bitcast (i8* ()* @ifunc_resolver to void ()* ()*) @ifunc.internal = internal ifunc void (), i8* ()* @ifunc_resolver -; CHECK: @ifunc.internal = internal ifunc void (), i8* ()* @ifunc_resolver +; CHECK: @ifunc.internal = internal ifunc void (), bitcast (i8* ()* @ifunc_resolver to void ()* ()*) ; IFunc -- Visibility @ifunc.default = default ifunc void (), i8* ()* @ifunc_resolver -; CHECK: @ifunc.default = ifunc void (), i8* ()* @ifunc_resolver +; CHECK: @ifunc.default = ifunc void (), bitcast (i8* ()* @ifunc_resolver to void ()* ()*) @ifunc.hidden = hidden ifunc void (), i8* ()* @ifunc_resolver -; CHECK: @ifunc.hidden = hidden ifunc void (), i8* ()* @ifunc_resolver +; CHECK: @ifunc.hidden = hidden ifunc void (), bitcast (i8* ()* @ifunc_resolver to void ()* ()*) @ifunc.protected = protected ifunc void (), i8* ()* @ifunc_resolver -; CHECK: @ifunc.protected = protected ifunc void (), i8* ()* @ifunc_resolver +; CHECK: @ifunc.protected = protected ifunc void (), bitcast (i8* ()* @ifunc_resolver to void ()* ()*) define i8* @ifunc_resolver() { entry: diff --git a/llvm/test/Bitcode/compatibility-4.0.ll b/llvm/test/Bitcode/compatibility-4.0.ll index c17ece7f5c899..c0953cbbbcdc7 100644 --- a/llvm/test/Bitcode/compatibility-4.0.ll +++ b/llvm/test/Bitcode/compatibility-4.0.ll @@ -256,19 +256,19 @@ declare void @g.f1() ; IFunc -- Linkage @ifunc.external = external ifunc void (), i8* ()* @ifunc_resolver -; CHECK: @ifunc.external = ifunc void (), i8* ()* @ifunc_resolver +; CHECK: @ifunc.external = ifunc void (), bitcast (i8* ()* @ifunc_resolver to void ()* ()*) @ifunc.private = private ifunc void (), i8* ()* @ifunc_resolver -; CHECK: @ifunc.private = private ifunc void (), i8* ()* @ifunc_resolver +; CHECK: @ifunc.private = private ifunc void (), bitcast (i8* ()* @ifunc_resolver to void ()* ()*) @ifunc.internal = internal ifunc void (), i8* ()* @ifunc_resolver -; CHECK: @ifunc.internal = internal ifunc void (), i8* ()* @ifunc_resolver +; CHECK: @ifunc.internal = internal ifunc void (), bitcast (i8* ()* @ifunc_resolver to void ()* ()*) ; IFunc -- Visibility @ifunc.default = default ifunc void (), i8* ()* @ifunc_resolver -; CHECK: @ifunc.default = ifunc void (), i8* ()* @ifunc_resolver +; CHECK: @ifunc.default = ifunc void (), bitcast (i8* ()* @ifunc_resolver to void ()* ()*) @ifunc.hidden = hidden ifunc void (), i8* ()* @ifunc_resolver -; CHECK: @ifunc.hidden = hidden ifunc void (), i8* ()* @ifunc_resolver +; CHECK: @ifunc.hidden = hidden ifunc void (), bitcast (i8* ()* @ifunc_resolver to void ()* ()*) @ifunc.protected = protected ifunc void (), i8* ()* @ifunc_resolver -; CHECK: @ifunc.protected = protected ifunc void (), i8* ()* @ifunc_resolver +; CHECK: @ifunc.protected = protected ifunc void (), bitcast (i8* ()* @ifunc_resolver to void ()* ()*) define i8* @ifunc_resolver() { entry: diff --git a/llvm/test/Bitcode/compatibility-5.0.ll b/llvm/test/Bitcode/compatibility-5.0.ll index 6da717f053971..abc3cb3ae9485 100644 --- a/llvm/test/Bitcode/compatibility-5.0.ll +++ b/llvm/test/Bitcode/compatibility-5.0.ll @@ -256,19 +256,19 @@ declare void @g.f1() ; IFunc -- Linkage @ifunc.external = external ifunc void (), i8* ()* @ifunc_resolver -; CHECK: @ifunc.external = ifunc void (), i8* ()* @ifunc_resolver +; CHECK: @ifunc.external = ifunc void (), bitcast (i8* ()* @ifunc_resolver to void ()* ()*) @ifunc.private = private ifunc void (), i8* ()* @ifunc_resolver -; CHECK: @ifunc.private = private ifunc void (), i8* ()* @ifunc_resolver +; CHECK: @ifunc.private = private ifunc void (), bitcast (i8* ()* @ifunc_resolver to void ()* ()*) @ifunc.internal = internal ifunc void (), i8* ()* @ifunc_resolver -; CHECK: @ifunc.internal = internal ifunc void (), i8* ()* @ifunc_resolver +; CHECK: @ifunc.internal = internal ifunc void (), bitcast (i8* ()* @ifunc_resolver to void ()* ()*) ; IFunc -- Visibility @ifunc.default = default ifunc void (), i8* ()* @ifunc_resolver -; CHECK: @ifunc.default = ifunc void (), i8* ()* @ifunc_resolver +; CHECK: @ifunc.default = ifunc void (), bitcast (i8* ()* @ifunc_resolver to void ()* ()*) @ifunc.hidden = hidden ifunc void (), i8* ()* @ifunc_resolver -; CHECK: @ifunc.hidden = hidden ifunc void (), i8* ()* @ifunc_resolver +; CHECK: @ifunc.hidden = hidden ifunc void (), bitcast (i8* ()* @ifunc_resolver to void ()* ()*) @ifunc.protected = protected ifunc void (), i8* ()* @ifunc_resolver -; CHECK: @ifunc.protected = protected ifunc void (), i8* ()* @ifunc_resolver +; CHECK: @ifunc.protected = protected ifunc void (), bitcast (i8* ()* @ifunc_resolver to void ()* ()*) define i8* @ifunc_resolver() { entry: diff --git a/llvm/test/Bitcode/compatibility-6.0.ll b/llvm/test/Bitcode/compatibility-6.0.ll index 467b75abd5a89..a9a114f577af3 100644 --- a/llvm/test/Bitcode/compatibility-6.0.ll +++ b/llvm/test/Bitcode/compatibility-6.0.ll @@ -255,19 +255,19 @@ declare void @g.f1() ; IFunc -- Linkage @ifunc.external = external ifunc void (), i8* ()* @ifunc_resolver -; CHECK: @ifunc.external = ifunc void (), i8* ()* @ifunc_resolver +; CHECK: @ifunc.external = ifunc void (), bitcast (i8* ()* @ifunc_resolver to void ()* ()*) @ifunc.private = private ifunc void (), i8* ()* @ifunc_resolver -; CHECK: @ifunc.private = private ifunc void (), i8* ()* @ifunc_resolver +; CHECK: @ifunc.private = private ifunc void (), bitcast (i8* ()* @ifunc_resolver to void ()* ()*) @ifunc.internal = internal ifunc void (), i8* ()* @ifunc_resolver -; CHECK: @ifunc.internal = internal ifunc void (), i8* ()* @ifunc_resolver +; CHECK: @ifunc.internal = internal ifunc void (), bitcast (i8* ()* @ifunc_resolver to void ()* ()*) ; IFunc -- Visibility @ifunc.default = default ifunc void (), i8* ()* @ifunc_resolver -; CHECK: @ifunc.default = ifunc void (), i8* ()* @ifunc_resolver +; CHECK: @ifunc.default = ifunc void (), bitcast (i8* ()* @ifunc_resolver to void ()* ()*) @ifunc.hidden = hidden ifunc void (), i8* ()* @ifunc_resolver -; CHECK: @ifunc.hidden = hidden ifunc void (), i8* ()* @ifunc_resolver +; CHECK: @ifunc.hidden = hidden ifunc void (), bitcast (i8* ()* @ifunc_resolver to void ()* ()*) @ifunc.protected = protected ifunc void (), i8* ()* @ifunc_resolver -; CHECK: @ifunc.protected = protected ifunc void (), i8* ()* @ifunc_resolver +; CHECK: @ifunc.protected = protected ifunc void (), bitcast (i8* ()* @ifunc_resolver to void ()* ()*) define i8* @ifunc_resolver() { entry: diff --git a/llvm/test/Bitcode/compatibility.ll b/llvm/test/Bitcode/compatibility.ll index 2281938c6d834..2e73810380f65 100644 --- a/llvm/test/Bitcode/compatibility.ll +++ b/llvm/test/Bitcode/compatibility.ll @@ -264,28 +264,28 @@ declare void @g.f1() ; * @ ; IFunc -- Linkage -@ifunc.external = external ifunc void (), i8* ()* @ifunc_resolver -; CHECK: @ifunc.external = ifunc void (), i8* ()* @ifunc_resolver -@ifunc.private = private ifunc void (), i8* ()* @ifunc_resolver -; CHECK: @ifunc.private = private ifunc void (), i8* ()* @ifunc_resolver -@ifunc.internal = internal ifunc void (), i8* ()* @ifunc_resolver -; CHECK: @ifunc.internal = internal ifunc void (), i8* ()* @ifunc_resolver +@ifunc.external = external ifunc void (), void ()* ()* @ifunc_resolver +; CHECK: @ifunc.external = ifunc void (), void ()* ()* @ifunc_resolver +@ifunc.private = private ifunc void (), void ()* ()* @ifunc_resolver +; CHECK: @ifunc.private = private ifunc void (), void ()* ()* @ifunc_resolver +@ifunc.internal = internal ifunc void (), void ()* ()* @ifunc_resolver +; CHECK: @ifunc.internal = internal ifunc void (), void ()* ()* @ifunc_resolver ; IFunc -- Visibility -@ifunc.default = default ifunc void (), i8* ()* @ifunc_resolver -; CHECK: @ifunc.default = ifunc void (), i8* ()* @ifunc_resolver -@ifunc.hidden = hidden ifunc void (), i8* ()* @ifunc_resolver -; CHECK: @ifunc.hidden = hidden ifunc void (), i8* ()* @ifunc_resolver -@ifunc.protected = protected ifunc void (), i8* ()* @ifunc_resolver -; CHECK: @ifunc.protected = protected ifunc void (), i8* ()* @ifunc_resolver +@ifunc.default = default ifunc void (), void ()* ()* @ifunc_resolver +; CHECK: @ifunc.default = ifunc void (), void ()* ()* @ifunc_resolver +@ifunc.hidden = hidden ifunc void (), void ()* ()* @ifunc_resolver +; CHECK: @ifunc.hidden = hidden ifunc void (), void ()* ()* @ifunc_resolver +@ifunc.protected = protected ifunc void (), void ()* ()* @ifunc_resolver +; CHECK: @ifunc.protected = protected ifunc void (), void ()* ()* @ifunc_resolver ; IFunc -- partition -; CHECK: @ifunc.partition = ifunc void (), i8* ()* @ifunc_resolver, partition "part" -@ifunc.partition = ifunc void (), i8* ()* @ifunc_resolver, partition "part" +; CHECK: @ifunc.partition = ifunc void (), void ()* ()* @ifunc_resolver, partition "part" +@ifunc.partition = ifunc void (), void ()* ()* @ifunc_resolver, partition "part" -define i8* @ifunc_resolver() { +define void ()* @ifunc_resolver() { entry: - ret i8* null + ret void ()* null } ;; Functions diff --git a/llvm/test/Bitcode/dso_local_equivalent.ll b/llvm/test/Bitcode/dso_local_equivalent.ll index 13eec78020b7c..819586ea6c531 100644 --- a/llvm/test/Bitcode/dso_local_equivalent.ll +++ b/llvm/test/Bitcode/dso_local_equivalent.ll @@ -65,12 +65,12 @@ define void @call_dso_local_alias_func() { ret void } -@ifunc_func = ifunc void (), i64 ()* @resolver -@dso_local_ifunc_func = dso_local ifunc void (), i64 ()* @resolver +@ifunc_func = ifunc void (), void ()* ()* @resolver +@dso_local_ifunc_func = dso_local ifunc void (), void ()* ()* @resolver -define internal i64 @resolver() { +define internal void ()* @resolver() { entry: - ret i64 0 + ret void ()* null } define void @call_ifunc_func() { diff --git a/llvm/test/Bitcode/dso_location.ll b/llvm/test/Bitcode/dso_location.ll index 3ad511bad430b..43f96780fbc20 100644 --- a/llvm/test/Bitcode/dso_location.ll +++ b/llvm/test/Bitcode/dso_location.ll @@ -27,8 +27,8 @@ @preemptable_alias = dso_preemptable alias i32, i32* @hidden_local_global ; CHECK-DAG: @preemptable_alias = alias i32, i32* @hidden_local_global -@preemptable_ifunc = dso_preemptable ifunc void (), i8* ()* @ifunc_resolver -; CHECK-DAG: @preemptable_ifunc = ifunc void (), i8* ()* @ifunc_resolver +@preemptable_ifunc = dso_preemptable ifunc void (), void ()* ()* @ifunc_resolver +; CHECK-DAG: @preemptable_ifunc = ifunc void (), void ()* ()* @ifunc_resolver declare dso_local default void @default_local() ; CHECK: declare dso_local void @default_local() @@ -41,7 +41,7 @@ entry: ret void } -define i8* @ifunc_resolver() { +define void ()* @ifunc_resolver() { entry: - ret i8* null + ret void ()* null } diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-memcpy-with-debug-info.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-memcpy-with-debug-info.mir index 26c9f579b8219..f2f99133c032b 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-memcpy-with-debug-info.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-memcpy-with-debug-info.mir @@ -20,7 +20,7 @@ !llvm.module.flags = !{!4} !0 = distinct !DICompileUnit(language: DW_LANG_C, file: !1, producer: "debugify", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2) - !1 = !DIFile(filename: "/Users/vsk/src/llvm-backup-master/llvm/test/CodeGen/AArch64/GlobalISel/legalize-memcpy-et-al.mir", directory: "/") + !1 = !DIFile(filename: "/Users/vsk/src/llvm-backup-main/llvm/test/CodeGen/AArch64/GlobalISel/legalize-memcpy-et-al.mir", directory: "/") !2 = !{} !3 = !{i32 6} !4 = !{i32 2, !"Debug Info Version", i32 3} diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/uaddo-8-16-bits.mir b/llvm/test/CodeGen/AArch64/GlobalISel/uaddo-8-16-bits.mir index c52deada5cdac..8a235764e6156 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/uaddo-8-16-bits.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/uaddo-8-16-bits.mir @@ -13,14 +13,14 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 ; CHECK-NEXT: [[ASSERT_ZEXT:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[COPY]], 8 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[ASSERT_ZEXT]](s32) ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1 ; CHECK-NEXT: [[ASSERT_ZEXT1:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[COPY1]], 8 - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[ASSERT_ZEXT1]](s32) - ; CHECK-NEXT: [[UADDO:%[0-9]+]]:_(s8), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[TRUNC]], [[TRUNC1]] - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s1) = G_CONSTANT i1 true - ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[UADDO1]], [[C]] - ; CHECK-NEXT: G_BRCOND [[XOR]](s1), %bb.2 + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ASSERT_ZEXT]], [[ASSERT_ZEXT1]] + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 256 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ADD]], [[C]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[AND]](s32), [[C1]] + ; CHECK-NEXT: G_BRCOND [[ICMP]](s1), %bb.2 ; CHECK-NEXT: G_BR %bb.1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: @@ -29,8 +29,7 @@ body: | ; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.trap) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: - ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO]](s8) - ; CHECK-NEXT: $w0 = COPY [[ZEXT]](s32) + ; CHECK-NEXT: $w0 = COPY [[ADD]](s32) ; CHECK-NEXT: RET_ReallyLR implicit $w0 bb.1.entry: successors: %bb.2(0x00000800), %bb.3(0x7ffff800) @@ -68,14 +67,14 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 ; CHECK-NEXT: [[ASSERT_ZEXT:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[COPY]], 16 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[ASSERT_ZEXT]](s32) ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1 ; CHECK-NEXT: [[ASSERT_ZEXT1:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[COPY1]], 16 - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[ASSERT_ZEXT1]](s32) - ; CHECK-NEXT: [[UADDO:%[0-9]+]]:_(s16), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[TRUNC]], [[TRUNC1]] - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s1) = G_CONSTANT i1 true - ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[UADDO1]], [[C]] - ; CHECK-NEXT: G_BRCOND [[XOR]](s1), %bb.2 + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ASSERT_ZEXT]], [[ASSERT_ZEXT1]] + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65536 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ADD]], [[C]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[AND]](s32), [[C1]] + ; CHECK-NEXT: G_BRCOND [[ICMP]](s1), %bb.2 ; CHECK-NEXT: G_BR %bb.1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: @@ -84,8 +83,7 @@ body: | ; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.trap) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: - ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO]](s16) - ; CHECK-NEXT: $w0 = COPY [[ZEXT]](s32) + ; CHECK-NEXT: $w0 = COPY [[ADD]](s32) ; CHECK-NEXT: RET_ReallyLR implicit $w0 bb.1: successors: %bb.2(0x00000800), %bb.3(0x7ffff800) @@ -123,14 +121,14 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 ; CHECK-NEXT: [[ASSERT_ZEXT:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[COPY]], 16 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[ASSERT_ZEXT]](s32) ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1 ; CHECK-NEXT: [[ASSERT_ZEXT1:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[COPY1]], 16 - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[ASSERT_ZEXT1]](s32) - ; CHECK-NEXT: [[UADDO:%[0-9]+]]:_(s16), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[TRUNC]], [[TRUNC1]] - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s1) = G_CONSTANT i1 true - ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[UADDO1]], [[C]] - ; CHECK-NEXT: G_BRCOND [[XOR]](s1), %bb.2 + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ASSERT_ZEXT]], [[ASSERT_ZEXT1]] + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65536 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ADD]], [[C]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[AND]](s32), [[C1]] + ; CHECK-NEXT: G_BRCOND [[ICMP]](s1), %bb.2 ; CHECK-NEXT: G_BR %bb.1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: @@ -142,12 +140,11 @@ body: | ; CHECK-NEXT: liveins: $x2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UADDO]](s16) - ; CHECK-NEXT: $w0 = COPY [[ANYEXT]](s32) + ; CHECK-NEXT: $w0 = COPY [[ADD]](s32) ; CHECK-NEXT: BLR renamable $x2, implicit-def dead $lr, implicit $sp, implicit $w0 ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp - ; CHECK-NEXT: $w0 = COPY [[ANYEXT]](s32) + ; CHECK-NEXT: $w0 = COPY [[ADD]](s32) ; CHECK-NEXT: BLR killed renamable $x2, implicit-def dead $lr, implicit $sp, implicit $w0 ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp ; CHECK-NEXT: RET_ReallyLR @@ -196,14 +193,14 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 ; CHECK-NEXT: [[ASSERT_ZEXT:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[COPY]], 16 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[ASSERT_ZEXT]](s32) ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1 ; CHECK-NEXT: [[ASSERT_ZEXT1:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[COPY1]], 16 - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[ASSERT_ZEXT1]](s32) - ; CHECK-NEXT: [[UADDO:%[0-9]+]]:_(s16), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[TRUNC]], [[TRUNC1]] - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s1) = G_CONSTANT i1 true - ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[UADDO1]], [[C]] - ; CHECK-NEXT: G_BRCOND [[XOR]](s1), %bb.2 + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ASSERT_ZEXT]], [[ASSERT_ZEXT1]] + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65536 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ADD]], [[C]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[AND]](s32), [[C1]] + ; CHECK-NEXT: G_BRCOND [[ICMP]](s1), %bb.2 ; CHECK-NEXT: G_BR %bb.1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: @@ -212,8 +209,7 @@ body: | ; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.trap) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UADDO]](s16) - ; CHECK-NEXT: $w0 = COPY [[ANYEXT]](s32) + ; CHECK-NEXT: $w0 = COPY [[ADD]](s32) ; CHECK-NEXT: RET_ReallyLR implicit $w0 bb.1: successors: %bb.2(0x00000800), %bb.3(0x7ffff800) @@ -251,15 +247,15 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 ; CHECK-NEXT: [[ASSERT_ZEXT:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[COPY]], 16 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[ASSERT_ZEXT]](s32) ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1 ; CHECK-NEXT: [[ASSERT_ZEXT1:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[COPY1]], 16 - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[ASSERT_ZEXT1]](s32) - ; CHECK-NEXT: [[UADDO:%[0-9]+]]:_(s16), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[TRUNC]], [[TRUNC1]] - ; CHECK-NEXT: DBG_VALUE [[UADDO1]](s1) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s1) = G_CONSTANT i1 true - ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[UADDO1]], [[C]] - ; CHECK-NEXT: G_BRCOND [[XOR]](s1), %bb.2 + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ASSERT_ZEXT]], [[ASSERT_ZEXT1]] + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65536 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ADD]], [[C]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[AND]](s32), [[C1]] + ; CHECK-NEXT: DBG_VALUE [[ICMP]](s1) + ; CHECK-NEXT: G_BRCOND [[ICMP]](s1), %bb.2 ; CHECK-NEXT: G_BR %bb.1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: @@ -268,8 +264,7 @@ body: | ; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.trap) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: - ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO]](s16) - ; CHECK-NEXT: $w0 = COPY [[ZEXT]](s32) + ; CHECK-NEXT: $w0 = COPY [[ADD]](s32) ; CHECK-NEXT: RET_ReallyLR implicit $w0 bb.1: successors: %bb.2(0x00000800), %bb.3(0x7ffff800) @@ -308,15 +303,15 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 ; CHECK-NEXT: [[ASSERT_ZEXT:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[COPY]], 16 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[ASSERT_ZEXT]](s32) ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1 ; CHECK-NEXT: [[ASSERT_ZEXT1:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[COPY1]], 16 - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[ASSERT_ZEXT1]](s32) - ; CHECK-NEXT: [[UADDO:%[0-9]+]]:_(s16), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[TRUNC]], [[TRUNC1]] - ; CHECK-NEXT: DBG_VALUE [[UADDO]](s16) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s1) = G_CONSTANT i1 true - ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[UADDO1]], [[C]] - ; CHECK-NEXT: G_BRCOND [[XOR]](s1), %bb.2 + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ASSERT_ZEXT]], [[ASSERT_ZEXT1]] + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65536 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ADD]], [[C]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[AND]](s32), [[C1]] + ; CHECK-NEXT: DBG_VALUE $noreg + ; CHECK-NEXT: G_BRCOND [[ICMP]](s1), %bb.2 ; CHECK-NEXT: G_BR %bb.1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: @@ -325,8 +320,7 @@ body: | ; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.trap) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: - ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO]](s16) - ; CHECK-NEXT: $w0 = COPY [[ZEXT]](s32) + ; CHECK-NEXT: $w0 = COPY [[ADD]](s32) ; CHECK-NEXT: RET_ReallyLR implicit $w0 bb.1: successors: %bb.2(0x00000800), %bb.3(0x7ffff800) diff --git a/llvm/test/CodeGen/AArch64/fcvt-fixed.ll b/llvm/test/CodeGen/AArch64/fcvt-fixed.ll index ccb3616b70bfa..79978af6f80ed 100644 --- a/llvm/test/CodeGen/AArch64/fcvt-fixed.ll +++ b/llvm/test/CodeGen/AArch64/fcvt-fixed.ll @@ -1,195 +1,994 @@ -; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s -; RUN: llc -verify-machineinstrs < %s -mtriple=arm64-apple-ios7.0 -O0 +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s --check-prefixes=CHECK,CHECK-NO16 +; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -mattr=+fullfp16 | FileCheck %s --check-prefixes=CHECK,CHECK-FP16 + +; fptoui + +define i32 @fcvtzs_f32_i32_7(float %flt) { +; CHECK-LABEL: fcvtzs_f32_i32_7: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzs w0, s0, #7 +; CHECK-NEXT: ret + %fix = fmul float %flt, 128.0 + %cvt = fptosi float %fix to i32 + ret i32 %cvt +} + +define i32 @fcvtzs_f32_i32_32(float %flt) { +; CHECK-LABEL: fcvtzs_f32_i32_32: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzs w0, s0, #32 +; CHECK-NEXT: ret + %fix = fmul float %flt, 4294967296.0 + %cvt = fptosi float %fix to i32 + ret i32 %cvt +} + +define i64 @fcvtzs_f32_i64_7(float %flt) { +; CHECK-LABEL: fcvtzs_f32_i64_7: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzs x0, s0, #7 +; CHECK-NEXT: ret + %fix = fmul float %flt, 128.0 + %cvt = fptosi float %fix to i64 + ret i64 %cvt +} + +define i64 @fcvtzs_f32_i64_64(float %flt) { +; CHECK-LABEL: fcvtzs_f32_i64_64: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzs x0, s0, #64 +; CHECK-NEXT: ret + %fix = fmul float %flt, 18446744073709551616.0 + %cvt = fptosi float %fix to i64 + ret i64 %cvt +} + +define i32 @fcvtzs_f64_i32_7(double %dbl) { +; CHECK-LABEL: fcvtzs_f64_i32_7: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzs w0, d0, #7 +; CHECK-NEXT: ret + %fix = fmul double %dbl, 128.0 + %cvt = fptosi double %fix to i32 + ret i32 %cvt +} + +define i32 @fcvtzs_f64_i32_32(double %dbl) { +; CHECK-LABEL: fcvtzs_f64_i32_32: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzs w0, d0, #32 +; CHECK-NEXT: ret + %fix = fmul double %dbl, 4294967296.0 + %cvt = fptosi double %fix to i32 + ret i32 %cvt +} + +define i64 @fcvtzs_f64_i64_7(double %dbl) { +; CHECK-LABEL: fcvtzs_f64_i64_7: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzs x0, d0, #7 +; CHECK-NEXT: ret + %fix = fmul double %dbl, 128.0 + %cvt = fptosi double %fix to i64 + ret i64 %cvt +} + +define i64 @fcvtzs_f64_i64_64(double %dbl) { +; CHECK-LABEL: fcvtzs_f64_i64_64: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzs x0, d0, #64 +; CHECK-NEXT: ret + %fix = fmul double %dbl, 18446744073709551616.0 + %cvt = fptosi double %fix to i64 + ret i64 %cvt +} + +define i32 @fcvtzs_f16_i32_7(half %flt) { +; CHECK-NO16-LABEL: fcvtzs_f16_i32_7: +; CHECK-NO16: // %bb.0: +; CHECK-NO16-NEXT: mov w8, #1124073472 +; CHECK-NO16-NEXT: fcvt s0, h0 +; CHECK-NO16-NEXT: fmov s1, w8 +; CHECK-NO16-NEXT: fmul s0, s0, s1 +; CHECK-NO16-NEXT: fcvt h0, s0 +; CHECK-NO16-NEXT: fcvt s0, h0 +; CHECK-NO16-NEXT: fcvtzs w0, s0 +; CHECK-NO16-NEXT: ret +; +; CHECK-FP16-LABEL: fcvtzs_f16_i32_7: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: fcvtzs w0, h0, #7 +; CHECK-FP16-NEXT: ret + %fix = fmul half %flt, 128.0 + %cvt = fptosi half %fix to i32 + ret i32 %cvt +} + +define i32 @fcvtzs_f16_i32_15(half %flt) { +; CHECK-NO16-LABEL: fcvtzs_f16_i32_15: +; CHECK-NO16: // %bb.0: +; CHECK-NO16-NEXT: mov w8, #1191182336 +; CHECK-NO16-NEXT: fcvt s0, h0 +; CHECK-NO16-NEXT: fmov s1, w8 +; CHECK-NO16-NEXT: fmul s0, s0, s1 +; CHECK-NO16-NEXT: fcvt h0, s0 +; CHECK-NO16-NEXT: fcvt s0, h0 +; CHECK-NO16-NEXT: fcvtzs w0, s0 +; CHECK-NO16-NEXT: ret +; +; CHECK-FP16-LABEL: fcvtzs_f16_i32_15: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: fcvtzs w0, h0, #15 +; CHECK-FP16-NEXT: ret + %fix = fmul half %flt, 32768.0 + %cvt = fptosi half %fix to i32 + ret i32 %cvt +} + +define i64 @fcvtzs_f16_i64_7(half %flt) { +; CHECK-NO16-LABEL: fcvtzs_f16_i64_7: +; CHECK-NO16: // %bb.0: +; CHECK-NO16-NEXT: mov w8, #1124073472 +; CHECK-NO16-NEXT: fcvt s0, h0 +; CHECK-NO16-NEXT: fmov s1, w8 +; CHECK-NO16-NEXT: fmul s0, s0, s1 +; CHECK-NO16-NEXT: fcvt h0, s0 +; CHECK-NO16-NEXT: fcvt s0, h0 +; CHECK-NO16-NEXT: fcvtzs x0, s0 +; CHECK-NO16-NEXT: ret +; +; CHECK-FP16-LABEL: fcvtzs_f16_i64_7: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: fcvtzs x0, h0, #7 +; CHECK-FP16-NEXT: ret + %fix = fmul half %flt, 128.0 + %cvt = fptosi half %fix to i64 + ret i64 %cvt +} + +define i64 @fcvtzs_f16_i64_15(half %flt) { +; CHECK-NO16-LABEL: fcvtzs_f16_i64_15: +; CHECK-NO16: // %bb.0: +; CHECK-NO16-NEXT: mov w8, #1191182336 +; CHECK-NO16-NEXT: fcvt s0, h0 +; CHECK-NO16-NEXT: fmov s1, w8 +; CHECK-NO16-NEXT: fmul s0, s0, s1 +; CHECK-NO16-NEXT: fcvt h0, s0 +; CHECK-NO16-NEXT: fcvt s0, h0 +; CHECK-NO16-NEXT: fcvtzs x0, s0 +; CHECK-NO16-NEXT: ret +; +; CHECK-FP16-LABEL: fcvtzs_f16_i64_15: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: fcvtzs x0, h0, #15 +; CHECK-FP16-NEXT: ret + %fix = fmul half %flt, 32768.0 + %cvt = fptosi half %fix to i64 + ret i64 %cvt +} + +; fptoui + +define i32 @fcvtzu_f32_i32_7(float %flt) { +; CHECK-LABEL: fcvtzu_f32_i32_7: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzu w0, s0, #7 +; CHECK-NEXT: ret + %fix = fmul float %flt, 128.0 + %cvt = fptoui float %fix to i32 + ret i32 %cvt +} + +define i32 @fcvtzu_f32_i32_32(float %flt) { +; CHECK-LABEL: fcvtzu_f32_i32_32: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzu w0, s0, #32 +; CHECK-NEXT: ret + %fix = fmul float %flt, 4294967296.0 + %cvt = fptoui float %fix to i32 + ret i32 %cvt +} + +define i64 @fcvtzu_f32_i64_7(float %flt) { +; CHECK-LABEL: fcvtzu_f32_i64_7: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzu x0, s0, #7 +; CHECK-NEXT: ret + %fix = fmul float %flt, 128.0 + %cvt = fptoui float %fix to i64 + ret i64 %cvt +} + +define i64 @fcvtzu_f32_i64_64(float %flt) { +; CHECK-LABEL: fcvtzu_f32_i64_64: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzu x0, s0, #64 +; CHECK-NEXT: ret + %fix = fmul float %flt, 18446744073709551616.0 + %cvt = fptoui float %fix to i64 + ret i64 %cvt +} + +define i32 @fcvtzu_f64_i32_7(double %dbl) { +; CHECK-LABEL: fcvtzu_f64_i32_7: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzu w0, d0, #7 +; CHECK-NEXT: ret + %fix = fmul double %dbl, 128.0 + %cvt = fptoui double %fix to i32 + ret i32 %cvt +} -; (The O0 test is to make sure FastISel still constrains its operands properly -; and the verifier doesn't trigger). +define i32 @fcvtzu_f64_i32_32(double %dbl) { +; CHECK-LABEL: fcvtzu_f64_i32_32: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzu w0, d0, #32 +; CHECK-NEXT: ret + %fix = fmul double %dbl, 4294967296.0 + %cvt = fptoui double %fix to i32 + ret i32 %cvt +} -@var32 = global i32 0 -@var64 = global i64 0 +define i64 @fcvtzu_f64_i64_7(double %dbl) { +; CHECK-LABEL: fcvtzu_f64_i64_7: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzu x0, d0, #7 +; CHECK-NEXT: ret + %fix = fmul double %dbl, 128.0 + %cvt = fptoui double %fix to i64 + ret i64 %cvt +} -define void @test_fcvtzs(float %flt, double %dbl) { -; CHECK-LABEL: test_fcvtzs: +define i64 @fcvtzu_f64_i64_64(double %dbl) { +; CHECK-LABEL: fcvtzu_f64_i64_64: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzu x0, d0, #64 +; CHECK-NEXT: ret + %fix = fmul double %dbl, 18446744073709551616.0 + %cvt = fptoui double %fix to i64 + ret i64 %cvt +} - %fix1 = fmul float %flt, 128.0 - %cvt1 = fptosi float %fix1 to i32 -; CHECK: fcvtzs {{w[0-9]+}}, {{s[0-9]+}}, #7 - store volatile i32 %cvt1, i32* @var32 +define i32 @fcvtzu_f16_i32_7(half %flt) { +; CHECK-NO16-LABEL: fcvtzu_f16_i32_7: +; CHECK-NO16: // %bb.0: +; CHECK-NO16-NEXT: mov w8, #1124073472 +; CHECK-NO16-NEXT: fcvt s0, h0 +; CHECK-NO16-NEXT: fmov s1, w8 +; CHECK-NO16-NEXT: fmul s0, s0, s1 +; CHECK-NO16-NEXT: fcvt h0, s0 +; CHECK-NO16-NEXT: fcvt s0, h0 +; CHECK-NO16-NEXT: fcvtzu w0, s0 +; CHECK-NO16-NEXT: ret +; +; CHECK-FP16-LABEL: fcvtzu_f16_i32_7: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: fcvtzu w0, h0, #7 +; CHECK-FP16-NEXT: ret + %fix = fmul half %flt, 128.0 + %cvt = fptoui half %fix to i32 + ret i32 %cvt +} - %fix2 = fmul float %flt, 4294967296.0 - %cvt2 = fptosi float %fix2 to i32 -; CHECK: fcvtzs {{w[0-9]+}}, {{s[0-9]+}}, #32 - store volatile i32 %cvt2, i32* @var32 +define i32 @fcvtzu_f16_i32_15(half %flt) { +; CHECK-NO16-LABEL: fcvtzu_f16_i32_15: +; CHECK-NO16: // %bb.0: +; CHECK-NO16-NEXT: mov w8, #1191182336 +; CHECK-NO16-NEXT: fcvt s0, h0 +; CHECK-NO16-NEXT: fmov s1, w8 +; CHECK-NO16-NEXT: fmul s0, s0, s1 +; CHECK-NO16-NEXT: fcvt h0, s0 +; CHECK-NO16-NEXT: fcvt s0, h0 +; CHECK-NO16-NEXT: fcvtzu w0, s0 +; CHECK-NO16-NEXT: ret +; +; CHECK-FP16-LABEL: fcvtzu_f16_i32_15: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: fcvtzu w0, h0, #15 +; CHECK-FP16-NEXT: ret + %fix = fmul half %flt, 32768.0 + %cvt = fptoui half %fix to i32 + ret i32 %cvt +} - %fix3 = fmul float %flt, 128.0 - %cvt3 = fptosi float %fix3 to i64 -; CHECK: fcvtzs {{x[0-9]+}}, {{s[0-9]+}}, #7 - store volatile i64 %cvt3, i64* @var64 +define i64 @fcvtzu_f16_i64_7(half %flt) { +; CHECK-NO16-LABEL: fcvtzu_f16_i64_7: +; CHECK-NO16: // %bb.0: +; CHECK-NO16-NEXT: mov w8, #1124073472 +; CHECK-NO16-NEXT: fcvt s0, h0 +; CHECK-NO16-NEXT: fmov s1, w8 +; CHECK-NO16-NEXT: fmul s0, s0, s1 +; CHECK-NO16-NEXT: fcvt h0, s0 +; CHECK-NO16-NEXT: fcvt s0, h0 +; CHECK-NO16-NEXT: fcvtzu x0, s0 +; CHECK-NO16-NEXT: ret +; +; CHECK-FP16-LABEL: fcvtzu_f16_i64_7: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: fcvtzu x0, h0, #7 +; CHECK-FP16-NEXT: ret + %fix = fmul half %flt, 128.0 + %cvt = fptoui half %fix to i64 + ret i64 %cvt +} - %fix4 = fmul float %flt, 18446744073709551616.0 - %cvt4 = fptosi float %fix4 to i64 -; CHECK: fcvtzs {{x[0-9]+}}, {{s[0-9]+}}, #64 - store volatile i64 %cvt4, i64* @var64 - - %fix5 = fmul double %dbl, 128.0 - %cvt5 = fptosi double %fix5 to i32 -; CHECK: fcvtzs {{w[0-9]+}}, {{d[0-9]+}}, #7 - store volatile i32 %cvt5, i32* @var32 - - %fix6 = fmul double %dbl, 4294967296.0 - %cvt6 = fptosi double %fix6 to i32 -; CHECK: fcvtzs {{w[0-9]+}}, {{d[0-9]+}}, #32 - store volatile i32 %cvt6, i32* @var32 - - %fix7 = fmul double %dbl, 128.0 - %cvt7 = fptosi double %fix7 to i64 -; CHECK: fcvtzs {{x[0-9]+}}, {{d[0-9]+}}, #7 - store volatile i64 %cvt7, i64* @var64 - - %fix8 = fmul double %dbl, 18446744073709551616.0 - %cvt8 = fptosi double %fix8 to i64 -; CHECK: fcvtzs {{x[0-9]+}}, {{d[0-9]+}}, #64 - store volatile i64 %cvt8, i64* @var64 - - ret void +define i64 @fcvtzu_f16_i64_15(half %flt) { +; CHECK-NO16-LABEL: fcvtzu_f16_i64_15: +; CHECK-NO16: // %bb.0: +; CHECK-NO16-NEXT: mov w8, #1191182336 +; CHECK-NO16-NEXT: fcvt s0, h0 +; CHECK-NO16-NEXT: fmov s1, w8 +; CHECK-NO16-NEXT: fmul s0, s0, s1 +; CHECK-NO16-NEXT: fcvt h0, s0 +; CHECK-NO16-NEXT: fcvt s0, h0 +; CHECK-NO16-NEXT: fcvtzu x0, s0 +; CHECK-NO16-NEXT: ret +; +; CHECK-FP16-LABEL: fcvtzu_f16_i64_15: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: fcvtzu x0, h0, #15 +; CHECK-FP16-NEXT: ret + %fix = fmul half %flt, 32768.0 + %cvt = fptoui half %fix to i64 + ret i64 %cvt } -define void @test_fcvtzu(float %flt, double %dbl) { -; CHECK-LABEL: test_fcvtzu: +; sitofp - %fix1 = fmul float %flt, 128.0 - %cvt1 = fptoui float %fix1 to i32 -; CHECK: fcvtzu {{w[0-9]+}}, {{s[0-9]+}}, #7 - store volatile i32 %cvt1, i32* @var32 +define float @scvtf_f32_i32_7(i32 %int) { +; CHECK-LABEL: scvtf_f32_i32_7: +; CHECK: // %bb.0: +; CHECK-NEXT: scvtf s0, w0, #7 +; CHECK-NEXT: ret + %cvt = sitofp i32 %int to float + %fix = fdiv float %cvt, 128.0 + ret float %fix +} - %fix2 = fmul float %flt, 4294967296.0 - %cvt2 = fptoui float %fix2 to i32 -; CHECK: fcvtzu {{w[0-9]+}}, {{s[0-9]+}}, #32 - store volatile i32 %cvt2, i32* @var32 - - %fix3 = fmul float %flt, 128.0 - %cvt3 = fptoui float %fix3 to i64 -; CHECK: fcvtzu {{x[0-9]+}}, {{s[0-9]+}}, #7 - store volatile i64 %cvt3, i64* @var64 - - %fix4 = fmul float %flt, 18446744073709551616.0 - %cvt4 = fptoui float %fix4 to i64 -; CHECK: fcvtzu {{x[0-9]+}}, {{s[0-9]+}}, #64 - store volatile i64 %cvt4, i64* @var64 - - %fix5 = fmul double %dbl, 128.0 - %cvt5 = fptoui double %fix5 to i32 -; CHECK: fcvtzu {{w[0-9]+}}, {{d[0-9]+}}, #7 - store volatile i32 %cvt5, i32* @var32 - - %fix6 = fmul double %dbl, 4294967296.0 - %cvt6 = fptoui double %fix6 to i32 -; CHECK: fcvtzu {{w[0-9]+}}, {{d[0-9]+}}, #32 - store volatile i32 %cvt6, i32* @var32 - - %fix7 = fmul double %dbl, 128.0 - %cvt7 = fptoui double %fix7 to i64 -; CHECK: fcvtzu {{x[0-9]+}}, {{d[0-9]+}}, #7 - store volatile i64 %cvt7, i64* @var64 - - %fix8 = fmul double %dbl, 18446744073709551616.0 - %cvt8 = fptoui double %fix8 to i64 -; CHECK: fcvtzu {{x[0-9]+}}, {{d[0-9]+}}, #64 - store volatile i64 %cvt8, i64* @var64 - - ret void -} - -@varfloat = global float 0.0 -@vardouble = global double 0.0 - -define void @test_scvtf(i32 %int, i64 %long) { -; CHECK-LABEL: test_scvtf: - - %cvt1 = sitofp i32 %int to float - %fix1 = fdiv float %cvt1, 128.0 -; CHECK: scvtf {{s[0-9]+}}, {{w[0-9]+}}, #7 - store volatile float %fix1, float* @varfloat - - %cvt2 = sitofp i32 %int to float - %fix2 = fdiv float %cvt2, 4294967296.0 -; CHECK: scvtf {{s[0-9]+}}, {{w[0-9]+}}, #32 - store volatile float %fix2, float* @varfloat - - %cvt3 = sitofp i64 %long to float - %fix3 = fdiv float %cvt3, 128.0 -; CHECK: scvtf {{s[0-9]+}}, {{x[0-9]+}}, #7 - store volatile float %fix3, float* @varfloat - - %cvt4 = sitofp i64 %long to float - %fix4 = fdiv float %cvt4, 18446744073709551616.0 -; CHECK: scvtf {{s[0-9]+}}, {{x[0-9]+}}, #64 - store volatile float %fix4, float* @varfloat - - %cvt5 = sitofp i32 %int to double - %fix5 = fdiv double %cvt5, 128.0 -; CHECK: scvtf {{d[0-9]+}}, {{w[0-9]+}}, #7 - store volatile double %fix5, double* @vardouble - - %cvt6 = sitofp i32 %int to double - %fix6 = fdiv double %cvt6, 4294967296.0 -; CHECK: scvtf {{d[0-9]+}}, {{w[0-9]+}}, #32 - store volatile double %fix6, double* @vardouble - - %cvt7 = sitofp i64 %long to double - %fix7 = fdiv double %cvt7, 128.0 -; CHECK: scvtf {{d[0-9]+}}, {{x[0-9]+}}, #7 - store volatile double %fix7, double* @vardouble - - %cvt8 = sitofp i64 %long to double - %fix8 = fdiv double %cvt8, 18446744073709551616.0 -; CHECK: scvtf {{d[0-9]+}}, {{x[0-9]+}}, #64 - store volatile double %fix8, double* @vardouble - - ret void -} - -define void @test_ucvtf(i32 %int, i64 %long) { -; CHECK-LABEL: test_ucvtf: - - %cvt1 = uitofp i32 %int to float - %fix1 = fdiv float %cvt1, 128.0 -; CHECK: ucvtf {{s[0-9]+}}, {{w[0-9]+}}, #7 - store volatile float %fix1, float* @varfloat - - %cvt2 = uitofp i32 %int to float - %fix2 = fdiv float %cvt2, 4294967296.0 -; CHECK: ucvtf {{s[0-9]+}}, {{w[0-9]+}}, #32 - store volatile float %fix2, float* @varfloat - - %cvt3 = uitofp i64 %long to float - %fix3 = fdiv float %cvt3, 128.0 -; CHECK: ucvtf {{s[0-9]+}}, {{x[0-9]+}}, #7 - store volatile float %fix3, float* @varfloat - - %cvt4 = uitofp i64 %long to float - %fix4 = fdiv float %cvt4, 18446744073709551616.0 -; CHECK: ucvtf {{s[0-9]+}}, {{x[0-9]+}}, #64 - store volatile float %fix4, float* @varfloat - - %cvt5 = uitofp i32 %int to double - %fix5 = fdiv double %cvt5, 128.0 -; CHECK: ucvtf {{d[0-9]+}}, {{w[0-9]+}}, #7 - store volatile double %fix5, double* @vardouble - - %cvt6 = uitofp i32 %int to double - %fix6 = fdiv double %cvt6, 4294967296.0 -; CHECK: ucvtf {{d[0-9]+}}, {{w[0-9]+}}, #32 - store volatile double %fix6, double* @vardouble - - %cvt7 = uitofp i64 %long to double - %fix7 = fdiv double %cvt7, 128.0 -; CHECK: ucvtf {{d[0-9]+}}, {{x[0-9]+}}, #7 - store volatile double %fix7, double* @vardouble +define float @scvtf_f32_i32_32(i32 %int) { +; CHECK-LABEL: scvtf_f32_i32_32: +; CHECK: // %bb.0: +; CHECK-NEXT: scvtf s0, w0, #32 +; CHECK-NEXT: ret + %cvt = sitofp i32 %int to float + %fix = fdiv float %cvt, 4294967296.0 + ret float %fix +} + +define float @scvtf_f32_i64_7(i64 %long) { +; CHECK-LABEL: scvtf_f32_i64_7: +; CHECK: // %bb.0: +; CHECK-NEXT: scvtf s0, x0, #7 +; CHECK-NEXT: ret + %cvt = sitofp i64 %long to float + %fix = fdiv float %cvt, 128.0 + ret float %fix +} + +define float @scvtf_f32_i64_64(i64 %long) { +; CHECK-LABEL: scvtf_f32_i64_64: +; CHECK: // %bb.0: +; CHECK-NEXT: scvtf s0, x0, #64 +; CHECK-NEXT: ret + %cvt = sitofp i64 %long to float + %fix = fdiv float %cvt, 18446744073709551616.0 + ret float %fix +} + +define double @scvtf_f64_i32_7(i32 %int) { +; CHECK-LABEL: scvtf_f64_i32_7: +; CHECK: // %bb.0: +; CHECK-NEXT: scvtf d0, w0, #7 +; CHECK-NEXT: ret + %cvt = sitofp i32 %int to double + %fix = fdiv double %cvt, 128.0 + ret double %fix +} + +define double @scvtf_f64_i32_32(i32 %int) { +; CHECK-LABEL: scvtf_f64_i32_32: +; CHECK: // %bb.0: +; CHECK-NEXT: scvtf d0, w0, #32 +; CHECK-NEXT: ret + %cvt = sitofp i32 %int to double + %fix = fdiv double %cvt, 4294967296.0 + ret double %fix +} - %cvt8 = uitofp i64 %long to double - %fix8 = fdiv double %cvt8, 18446744073709551616.0 -; CHECK: ucvtf {{d[0-9]+}}, {{x[0-9]+}}, #64 - store volatile double %fix8, double* @vardouble +define double @scvtf_f64_i64_7(i64 %long) { +; CHECK-LABEL: scvtf_f64_i64_7: +; CHECK: // %bb.0: +; CHECK-NEXT: scvtf d0, x0, #7 +; CHECK-NEXT: ret + %cvt = sitofp i64 %long to double + %fix = fdiv double %cvt, 128.0 + ret double %fix +} + +define double @scvtf_f64_i64_64(i64 %long) { +; CHECK-LABEL: scvtf_f64_i64_64: +; CHECK: // %bb.0: +; CHECK-NEXT: scvtf d0, x0, #64 +; CHECK-NEXT: ret + %cvt = sitofp i64 %long to double + %fix = fdiv double %cvt, 18446744073709551616.0 + ret double %fix +} + +define half @scvtf_f16_i32_7(i32 %int) { +; CHECK-NO16-LABEL: scvtf_f16_i32_7: +; CHECK-NO16: // %bb.0: +; CHECK-NO16-NEXT: scvtf s0, w0 +; CHECK-NO16-NEXT: mov w8, #1124073472 +; CHECK-NO16-NEXT: fmov s1, w8 +; CHECK-NO16-NEXT: fcvt h0, s0 +; CHECK-NO16-NEXT: fcvt s0, h0 +; CHECK-NO16-NEXT: fdiv s0, s0, s1 +; CHECK-NO16-NEXT: fcvt h0, s0 +; CHECK-NO16-NEXT: ret +; +; CHECK-FP16-LABEL: scvtf_f16_i32_7: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: scvtf h0, w0, #7 +; CHECK-FP16-NEXT: ret + %cvt = sitofp i32 %int to half + %fix = fdiv half %cvt, 128.0 + ret half %fix +} + +define half @scvtf_f16_i32_15(i32 %int) { +; CHECK-NO16-LABEL: scvtf_f16_i32_15: +; CHECK-NO16: // %bb.0: +; CHECK-NO16-NEXT: scvtf s0, w0 +; CHECK-NO16-NEXT: mov w8, #1191182336 +; CHECK-NO16-NEXT: fmov s1, w8 +; CHECK-NO16-NEXT: fcvt h0, s0 +; CHECK-NO16-NEXT: fcvt s0, h0 +; CHECK-NO16-NEXT: fdiv s0, s0, s1 +; CHECK-NO16-NEXT: fcvt h0, s0 +; CHECK-NO16-NEXT: ret +; +; CHECK-FP16-LABEL: scvtf_f16_i32_15: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: scvtf h0, w0, #15 +; CHECK-FP16-NEXT: ret + %cvt = sitofp i32 %int to half + %fix = fdiv half %cvt, 32768.0 + ret half %fix +} + +define half @scvtf_f16_i64_7(i64 %long) { +; CHECK-NO16-LABEL: scvtf_f16_i64_7: +; CHECK-NO16: // %bb.0: +; CHECK-NO16-NEXT: scvtf s0, x0 +; CHECK-NO16-NEXT: mov w8, #1124073472 +; CHECK-NO16-NEXT: fmov s1, w8 +; CHECK-NO16-NEXT: fcvt h0, s0 +; CHECK-NO16-NEXT: fcvt s0, h0 +; CHECK-NO16-NEXT: fdiv s0, s0, s1 +; CHECK-NO16-NEXT: fcvt h0, s0 +; CHECK-NO16-NEXT: ret +; +; CHECK-FP16-LABEL: scvtf_f16_i64_7: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: scvtf h0, x0, #7 +; CHECK-FP16-NEXT: ret + %cvt = sitofp i64 %long to half + %fix = fdiv half %cvt, 128.0 + ret half %fix +} + +define half @scvtf_f16_i64_15(i64 %long) { +; CHECK-NO16-LABEL: scvtf_f16_i64_15: +; CHECK-NO16: // %bb.0: +; CHECK-NO16-NEXT: scvtf s0, x0 +; CHECK-NO16-NEXT: mov w8, #1191182336 +; CHECK-NO16-NEXT: fmov s1, w8 +; CHECK-NO16-NEXT: fcvt h0, s0 +; CHECK-NO16-NEXT: fcvt s0, h0 +; CHECK-NO16-NEXT: fdiv s0, s0, s1 +; CHECK-NO16-NEXT: fcvt h0, s0 +; CHECK-NO16-NEXT: ret +; +; CHECK-FP16-LABEL: scvtf_f16_i64_15: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: scvtf h0, x0, #15 +; CHECK-FP16-NEXT: ret + %cvt = sitofp i64 %long to half + %fix = fdiv half %cvt, 32768.0 + ret half %fix +} + +; uitofp + +define float @ucvtf_f32_i32_7(i32 %int) { +; CHECK-LABEL: ucvtf_f32_i32_7: +; CHECK: // %bb.0: +; CHECK-NEXT: ucvtf s0, w0, #7 +; CHECK-NEXT: ret + %cvt = uitofp i32 %int to float + %fix = fdiv float %cvt, 128.0 + ret float %fix +} + +define float @ucvtf_f32_i32_32(i32 %int) { +; CHECK-LABEL: ucvtf_f32_i32_32: +; CHECK: // %bb.0: +; CHECK-NEXT: ucvtf s0, w0, #32 +; CHECK-NEXT: ret + %cvt = uitofp i32 %int to float + %fix = fdiv float %cvt, 4294967296.0 + ret float %fix +} + +define float @ucvtf_f32_i64_7(i64 %long) { +; CHECK-LABEL: ucvtf_f32_i64_7: +; CHECK: // %bb.0: +; CHECK-NEXT: ucvtf s0, x0, #7 +; CHECK-NEXT: ret + %cvt = uitofp i64 %long to float + %fix = fdiv float %cvt, 128.0 + ret float %fix +} + +define float @ucvtf_f32_i64_64(i64 %long) { +; CHECK-LABEL: ucvtf_f32_i64_64: +; CHECK: // %bb.0: +; CHECK-NEXT: ucvtf s0, x0, #64 +; CHECK-NEXT: ret + %cvt = uitofp i64 %long to float + %fix = fdiv float %cvt, 18446744073709551616.0 + ret float %fix +} + +define double @ucvtf_f64_i32_7(i32 %int) { +; CHECK-LABEL: ucvtf_f64_i32_7: +; CHECK: // %bb.0: +; CHECK-NEXT: ucvtf d0, w0, #7 +; CHECK-NEXT: ret + %cvt = uitofp i32 %int to double + %fix = fdiv double %cvt, 128.0 + ret double %fix +} + +define double @ucvtf_f64_i32_32(i32 %int) { +; CHECK-LABEL: ucvtf_f64_i32_32: +; CHECK: // %bb.0: +; CHECK-NEXT: ucvtf d0, w0, #32 +; CHECK-NEXT: ret + %cvt = uitofp i32 %int to double + %fix = fdiv double %cvt, 4294967296.0 + ret double %fix +} + +define double @ucvtf_f64_i64_7(i64 %long) { +; CHECK-LABEL: ucvtf_f64_i64_7: +; CHECK: // %bb.0: +; CHECK-NEXT: ucvtf d0, x0, #7 +; CHECK-NEXT: ret + %cvt = uitofp i64 %long to double + %fix = fdiv double %cvt, 128.0 + ret double %fix +} + +define double @ucvtf_f64_i64_64(i64 %long) { +; CHECK-LABEL: ucvtf_f64_i64_64: +; CHECK: // %bb.0: +; CHECK-NEXT: ucvtf d0, x0, #64 +; CHECK-NEXT: ret + %cvt = uitofp i64 %long to double + %fix = fdiv double %cvt, 18446744073709551616.0 + ret double %fix +} + +define half @ucvtf_f16_i32_7(i32 %int) { +; CHECK-NO16-LABEL: ucvtf_f16_i32_7: +; CHECK-NO16: // %bb.0: +; CHECK-NO16-NEXT: ucvtf s0, w0 +; CHECK-NO16-NEXT: mov w8, #1124073472 +; CHECK-NO16-NEXT: fmov s1, w8 +; CHECK-NO16-NEXT: fcvt h0, s0 +; CHECK-NO16-NEXT: fcvt s0, h0 +; CHECK-NO16-NEXT: fdiv s0, s0, s1 +; CHECK-NO16-NEXT: fcvt h0, s0 +; CHECK-NO16-NEXT: ret +; +; CHECK-FP16-LABEL: ucvtf_f16_i32_7: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: ucvtf h0, w0, #7 +; CHECK-FP16-NEXT: ret + %cvt = uitofp i32 %int to half + %fix = fdiv half %cvt, 128.0 + ret half %fix +} + +define half @ucvtf_f16_i32_15(i32 %int) { +; CHECK-NO16-LABEL: ucvtf_f16_i32_15: +; CHECK-NO16: // %bb.0: +; CHECK-NO16-NEXT: ucvtf s0, w0 +; CHECK-NO16-NEXT: mov w8, #1191182336 +; CHECK-NO16-NEXT: fmov s1, w8 +; CHECK-NO16-NEXT: fcvt h0, s0 +; CHECK-NO16-NEXT: fcvt s0, h0 +; CHECK-NO16-NEXT: fdiv s0, s0, s1 +; CHECK-NO16-NEXT: fcvt h0, s0 +; CHECK-NO16-NEXT: ret +; +; CHECK-FP16-LABEL: ucvtf_f16_i32_15: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: ucvtf h0, w0, #15 +; CHECK-FP16-NEXT: ret + %cvt = uitofp i32 %int to half + %fix = fdiv half %cvt, 32768.0 + ret half %fix +} + +define half @ucvtf_f16_i64_7(i64 %long) { +; CHECK-NO16-LABEL: ucvtf_f16_i64_7: +; CHECK-NO16: // %bb.0: +; CHECK-NO16-NEXT: ucvtf s0, x0 +; CHECK-NO16-NEXT: mov w8, #1124073472 +; CHECK-NO16-NEXT: fmov s1, w8 +; CHECK-NO16-NEXT: fcvt h0, s0 +; CHECK-NO16-NEXT: fcvt s0, h0 +; CHECK-NO16-NEXT: fdiv s0, s0, s1 +; CHECK-NO16-NEXT: fcvt h0, s0 +; CHECK-NO16-NEXT: ret +; +; CHECK-FP16-LABEL: ucvtf_f16_i64_7: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: ucvtf h0, x0, #7 +; CHECK-FP16-NEXT: ret + %cvt = uitofp i64 %long to half + %fix = fdiv half %cvt, 128.0 + ret half %fix +} + +define half @ucvtf_f16_i64_15(i64 %long) { +; CHECK-NO16-LABEL: ucvtf_f16_i64_15: +; CHECK-NO16: // %bb.0: +; CHECK-NO16-NEXT: ucvtf s0, x0 +; CHECK-NO16-NEXT: mov w8, #1191182336 +; CHECK-NO16-NEXT: fmov s1, w8 +; CHECK-NO16-NEXT: fcvt h0, s0 +; CHECK-NO16-NEXT: fcvt s0, h0 +; CHECK-NO16-NEXT: fdiv s0, s0, s1 +; CHECK-NO16-NEXT: fcvt h0, s0 +; CHECK-NO16-NEXT: ret +; +; CHECK-FP16-LABEL: ucvtf_f16_i64_15: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: ucvtf h0, x0, #15 +; CHECK-FP16-NEXT: ret + %cvt = uitofp i64 %long to half + %fix = fdiv half %cvt, 32768.0 + ret half %fix +} + + +; fptoui.sat + +declare i32 @llvm.fptosi.sat.i32.f32(float) +declare i64 @llvm.fptosi.sat.i64.f32(float) +declare i32 @llvm.fptosi.sat.i32.f64(double) +declare i64 @llvm.fptosi.sat.i64.f64(double) +declare i32 @llvm.fptosi.sat.i32.f16(half) +declare i64 @llvm.fptosi.sat.i64.f16(half) + +define i32 @fcvtzs_sat_f32_i32_7(float %flt) { +; CHECK-LABEL: fcvtzs_sat_f32_i32_7: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzs w0, s0, #7 +; CHECK-NEXT: ret + %fix = fmul float %flt, 128.0 + %cvt = call i32 @llvm.fptosi.sat.i32.f32(float %fix) + ret i32 %cvt +} + +define i32 @fcvtzs_sat_f32_i32_32(float %flt) { +; CHECK-LABEL: fcvtzs_sat_f32_i32_32: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzs w0, s0, #32 +; CHECK-NEXT: ret + %fix = fmul float %flt, 4294967296.0 + %cvt = call i32 @llvm.fptosi.sat.i32.f32(float %fix) + ret i32 %cvt +} + +define i64 @fcvtzs_sat_f32_i64_64(float %flt) { +; CHECK-LABEL: fcvtzs_sat_f32_i64_64: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzs x0, s0, #64 +; CHECK-NEXT: ret + %fix = fmul float %flt, 18446744073709551616.0 + %cvt = call i64 @llvm.fptosi.sat.i64.f32(float %fix) + ret i64 %cvt +} + +define i32 @fcvtzs_sat_f64_i32_7(double %dbl) { +; CHECK-LABEL: fcvtzs_sat_f64_i32_7: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzs w0, d0, #7 +; CHECK-NEXT: ret + %fix = fmul double %dbl, 128.0 + %cvt = call i32 @llvm.fptosi.sat.i32.f64(double %fix) + ret i32 %cvt +} + +define i32 @fcvtzs_sat_f64_i32_32(double %dbl) { +; CHECK-LABEL: fcvtzs_sat_f64_i32_32: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzs w0, d0, #32 +; CHECK-NEXT: ret + %fix = fmul double %dbl, 4294967296.0 + %cvt = call i32 @llvm.fptosi.sat.i32.f64(double %fix) + ret i32 %cvt +} + +define i64 @fcvtzs_sat_f64_i64_7(double %dbl) { +; CHECK-LABEL: fcvtzs_sat_f64_i64_7: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzs x0, d0, #7 +; CHECK-NEXT: ret + %fix = fmul double %dbl, 128.0 + %cvt = call i64 @llvm.fptosi.sat.i64.f64(double %fix) + ret i64 %cvt +} + +define i64 @fcvtzs_sat_f64_i64_64(double %dbl) { +; CHECK-LABEL: fcvtzs_sat_f64_i64_64: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzs x0, d0, #64 +; CHECK-NEXT: ret + %fix = fmul double %dbl, 18446744073709551616.0 + %cvt = call i64 @llvm.fptosi.sat.i64.f64(double %fix) + ret i64 %cvt +} + +define i32 @fcvtzs_sat_f16_i32_7(half %dbl) { +; CHECK-NO16-LABEL: fcvtzs_sat_f16_i32_7: +; CHECK-NO16: // %bb.0: +; CHECK-NO16-NEXT: mov w8, #1124073472 +; CHECK-NO16-NEXT: fcvt s0, h0 +; CHECK-NO16-NEXT: fmov s1, w8 +; CHECK-NO16-NEXT: fmul s0, s0, s1 +; CHECK-NO16-NEXT: fcvt h0, s0 +; CHECK-NO16-NEXT: fcvt s0, h0 +; CHECK-NO16-NEXT: fcvtzs w0, s0 +; CHECK-NO16-NEXT: ret +; +; CHECK-FP16-LABEL: fcvtzs_sat_f16_i32_7: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: fcvtzs w0, h0, #7 +; CHECK-FP16-NEXT: ret + %fix = fmul half %dbl, 128.0 + %cvt = call i32 @llvm.fptosi.sat.i32.f16(half %fix) + ret i32 %cvt +} + +define i32 @fcvtzs_sat_f16_i32_15(half %dbl) { +; CHECK-NO16-LABEL: fcvtzs_sat_f16_i32_15: +; CHECK-NO16: // %bb.0: +; CHECK-NO16-NEXT: mov w8, #1191182336 +; CHECK-NO16-NEXT: fcvt s0, h0 +; CHECK-NO16-NEXT: fmov s1, w8 +; CHECK-NO16-NEXT: fmul s0, s0, s1 +; CHECK-NO16-NEXT: fcvt h0, s0 +; CHECK-NO16-NEXT: fcvt s0, h0 +; CHECK-NO16-NEXT: fcvtzs w0, s0 +; CHECK-NO16-NEXT: ret +; +; CHECK-FP16-LABEL: fcvtzs_sat_f16_i32_15: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: fcvtzs w0, h0, #15 +; CHECK-FP16-NEXT: ret + %fix = fmul half %dbl, 32768.0 + %cvt = call i32 @llvm.fptosi.sat.i32.f16(half %fix) + ret i32 %cvt +} + +define i64 @fcvtzs_sat_f16_i64_7(half %dbl) { +; CHECK-NO16-LABEL: fcvtzs_sat_f16_i64_7: +; CHECK-NO16: // %bb.0: +; CHECK-NO16-NEXT: mov w8, #1124073472 +; CHECK-NO16-NEXT: fcvt s0, h0 +; CHECK-NO16-NEXT: fmov s1, w8 +; CHECK-NO16-NEXT: fmul s0, s0, s1 +; CHECK-NO16-NEXT: fcvt h0, s0 +; CHECK-NO16-NEXT: fcvt s0, h0 +; CHECK-NO16-NEXT: fcvtzs x0, s0 +; CHECK-NO16-NEXT: ret +; +; CHECK-FP16-LABEL: fcvtzs_sat_f16_i64_7: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: fcvtzs x0, h0, #7 +; CHECK-FP16-NEXT: ret + %fix = fmul half %dbl, 128.0 + %cvt = call i64 @llvm.fptosi.sat.i64.f16(half %fix) + ret i64 %cvt +} + +define i64 @fcvtzs_sat_f16_i64_15(half %dbl) { +; CHECK-NO16-LABEL: fcvtzs_sat_f16_i64_15: +; CHECK-NO16: // %bb.0: +; CHECK-NO16-NEXT: mov w8, #1191182336 +; CHECK-NO16-NEXT: fcvt s0, h0 +; CHECK-NO16-NEXT: fmov s1, w8 +; CHECK-NO16-NEXT: fmul s0, s0, s1 +; CHECK-NO16-NEXT: fcvt h0, s0 +; CHECK-NO16-NEXT: fcvt s0, h0 +; CHECK-NO16-NEXT: fcvtzs x0, s0 +; CHECK-NO16-NEXT: ret +; +; CHECK-FP16-LABEL: fcvtzs_sat_f16_i64_15: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: fcvtzs x0, h0, #15 +; CHECK-FP16-NEXT: ret + %fix = fmul half %dbl, 32768.0 + %cvt = call i64 @llvm.fptosi.sat.i64.f16(half %fix) + ret i64 %cvt +} + +; fptoui + +declare i32 @llvm.fptoui.sat.i32.f32(float) +declare i64 @llvm.fptoui.sat.i64.f32(float) +declare i32 @llvm.fptoui.sat.i32.f64(double) +declare i64 @llvm.fptoui.sat.i64.f64(double) +declare i32 @llvm.fptoui.sat.i32.f16(half) +declare i64 @llvm.fptoui.sat.i64.f16(half) + +define i32 @fcvtzu_sat_f32_i32_7(float %flt) { +; CHECK-LABEL: fcvtzu_sat_f32_i32_7: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzu w0, s0, #7 +; CHECK-NEXT: ret + %fix = fmul float %flt, 128.0 + %cvt = call i32 @llvm.fptoui.sat.i32.f32(float %fix) + ret i32 %cvt +} + +define i32 @fcvtzu_sat_f32_i32_32(float %flt) { +; CHECK-LABEL: fcvtzu_sat_f32_i32_32: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzu w0, s0, #32 +; CHECK-NEXT: ret + %fix = fmul float %flt, 4294967296.0 + %cvt = call i32 @llvm.fptoui.sat.i32.f32(float %fix) + ret i32 %cvt +} + +define i64 @fcvtzu_sat_f32_i64_64(float %flt) { +; CHECK-LABEL: fcvtzu_sat_f32_i64_64: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzu x0, s0, #64 +; CHECK-NEXT: ret + %fix = fmul float %flt, 18446744073709551616.0 + %cvt = call i64 @llvm.fptoui.sat.i64.f32(float %fix) + ret i64 %cvt +} + +define i32 @fcvtzu_sat_f64_i32_7(double %dbl) { +; CHECK-LABEL: fcvtzu_sat_f64_i32_7: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzu w0, d0, #7 +; CHECK-NEXT: ret + %fix = fmul double %dbl, 128.0 + %cvt = call i32 @llvm.fptoui.sat.i32.f64(double %fix) + ret i32 %cvt +} + +define i32 @fcvtzu_sat_f64_i32_32(double %dbl) { +; CHECK-LABEL: fcvtzu_sat_f64_i32_32: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzu w0, d0, #32 +; CHECK-NEXT: ret + %fix = fmul double %dbl, 4294967296.0 + %cvt = call i32 @llvm.fptoui.sat.i32.f64(double %fix) + ret i32 %cvt +} + +define i64 @fcvtzu_sat_f64_i64_7(double %dbl) { +; CHECK-LABEL: fcvtzu_sat_f64_i64_7: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzu x0, d0, #7 +; CHECK-NEXT: ret + %fix = fmul double %dbl, 128.0 + %cvt = call i64 @llvm.fptoui.sat.i64.f64(double %fix) + ret i64 %cvt +} + +define i64 @fcvtzu_sat_f64_i64_64(double %dbl) { +; CHECK-LABEL: fcvtzu_sat_f64_i64_64: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzu x0, d0, #64 +; CHECK-NEXT: ret + %fix = fmul double %dbl, 18446744073709551616.0 + %cvt = call i64 @llvm.fptoui.sat.i64.f64(double %fix) + ret i64 %cvt +} + +define i32 @fcvtzu_sat_f16_i32_7(half %dbl) { +; CHECK-NO16-LABEL: fcvtzu_sat_f16_i32_7: +; CHECK-NO16: // %bb.0: +; CHECK-NO16-NEXT: mov w8, #1124073472 +; CHECK-NO16-NEXT: fcvt s0, h0 +; CHECK-NO16-NEXT: fmov s1, w8 +; CHECK-NO16-NEXT: fmul s0, s0, s1 +; CHECK-NO16-NEXT: fcvt h0, s0 +; CHECK-NO16-NEXT: fcvt s0, h0 +; CHECK-NO16-NEXT: fcvtzu w0, s0 +; CHECK-NO16-NEXT: ret +; +; CHECK-FP16-LABEL: fcvtzu_sat_f16_i32_7: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: fcvtzu w0, h0, #7 +; CHECK-FP16-NEXT: ret + %fix = fmul half %dbl, 128.0 + %cvt = call i32 @llvm.fptoui.sat.i32.f16(half %fix) + ret i32 %cvt +} + +define i32 @fcvtzu_sat_f16_i32_15(half %dbl) { +; CHECK-NO16-LABEL: fcvtzu_sat_f16_i32_15: +; CHECK-NO16: // %bb.0: +; CHECK-NO16-NEXT: mov w8, #1191182336 +; CHECK-NO16-NEXT: fcvt s0, h0 +; CHECK-NO16-NEXT: fmov s1, w8 +; CHECK-NO16-NEXT: fmul s0, s0, s1 +; CHECK-NO16-NEXT: fcvt h0, s0 +; CHECK-NO16-NEXT: fcvt s0, h0 +; CHECK-NO16-NEXT: fcvtzu w0, s0 +; CHECK-NO16-NEXT: ret +; +; CHECK-FP16-LABEL: fcvtzu_sat_f16_i32_15: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: fcvtzu w0, h0, #15 +; CHECK-FP16-NEXT: ret + %fix = fmul half %dbl, 32768.0 + %cvt = call i32 @llvm.fptoui.sat.i32.f16(half %fix) + ret i32 %cvt +} + +define i64 @fcvtzu_sat_f16_i64_7(half %dbl) { +; CHECK-NO16-LABEL: fcvtzu_sat_f16_i64_7: +; CHECK-NO16: // %bb.0: +; CHECK-NO16-NEXT: mov w8, #1124073472 +; CHECK-NO16-NEXT: fcvt s0, h0 +; CHECK-NO16-NEXT: fmov s1, w8 +; CHECK-NO16-NEXT: fmul s0, s0, s1 +; CHECK-NO16-NEXT: fcvt h0, s0 +; CHECK-NO16-NEXT: fcvt s0, h0 +; CHECK-NO16-NEXT: fcvtzu x0, s0 +; CHECK-NO16-NEXT: ret +; +; CHECK-FP16-LABEL: fcvtzu_sat_f16_i64_7: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: fcvtzu x0, h0, #7 +; CHECK-FP16-NEXT: ret + %fix = fmul half %dbl, 128.0 + %cvt = call i64 @llvm.fptoui.sat.i64.f16(half %fix) + ret i64 %cvt +} - ret void +define i64 @fcvtzu_sat_f16_i64_15(half %dbl) { +; CHECK-NO16-LABEL: fcvtzu_sat_f16_i64_15: +; CHECK-NO16: // %bb.0: +; CHECK-NO16-NEXT: mov w8, #1191182336 +; CHECK-NO16-NEXT: fcvt s0, h0 +; CHECK-NO16-NEXT: fmov s1, w8 +; CHECK-NO16-NEXT: fmul s0, s0, s1 +; CHECK-NO16-NEXT: fcvt h0, s0 +; CHECK-NO16-NEXT: fcvt s0, h0 +; CHECK-NO16-NEXT: fcvtzu x0, s0 +; CHECK-NO16-NEXT: ret +; +; CHECK-FP16-LABEL: fcvtzu_sat_f16_i64_15: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: fcvtzu x0, h0, #15 +; CHECK-FP16-NEXT: ret + %fix = fmul half %dbl, 32768.0 + %cvt = call i64 @llvm.fptoui.sat.i64.f16(half %fix) + ret i64 %cvt } diff --git a/llvm/test/CodeGen/AArch64/funnel-shift.ll b/llvm/test/CodeGen/AArch64/funnel-shift.ll index c199ad0f76c4d..51dc7ce2d061d 100644 --- a/llvm/test/CodeGen/AArch64/funnel-shift.ll +++ b/llvm/test/CodeGen/AArch64/funnel-shift.ll @@ -46,29 +46,19 @@ define i64 @fshl_i64(i64 %x, i64 %y, i64 %z) { define i128 @fshl_i128(i128 %x, i128 %y, i128 %z) nounwind { ; CHECK-LABEL: fshl_i128: ; CHECK: // %bb.0: +; CHECK-NEXT: tst x4, #0x40 ; CHECK-NEXT: mvn w8, w4 -; CHECK-NEXT: extr x9, x3, x2, #1 -; CHECK-NEXT: lsr x10, x3, #1 -; CHECK-NEXT: and x12, x8, #0x7f -; CHECK-NEXT: lsl x11, x10, #1 -; CHECK-NEXT: tst x12, #0x40 -; CHECK-NEXT: lsl x11, x11, x4 +; CHECK-NEXT: csel x9, x2, x3, ne +; CHECK-NEXT: csel x10, x3, x0, ne +; CHECK-NEXT: lsr x9, x9, #1 +; CHECK-NEXT: lsl x11, x10, x4 +; CHECK-NEXT: csel x12, x0, x1, ne +; CHECK-NEXT: lsr x10, x10, #1 ; CHECK-NEXT: lsr x9, x9, x8 -; CHECK-NEXT: orr x9, x11, x9 -; CHECK-NEXT: lsr x11, x0, #1 -; CHECK-NEXT: lsr x10, x10, x8 -; CHECK-NEXT: lsl x12, x1, x4 -; CHECK-NEXT: lsr x8, x11, x8 -; CHECK-NEXT: and x11, x4, #0x7f -; CHECK-NEXT: csel x9, x10, x9, ne -; CHECK-NEXT: csel x10, xzr, x10, ne -; CHECK-NEXT: orr x8, x12, x8 -; CHECK-NEXT: lsl x12, x0, x4 -; CHECK-NEXT: tst x11, #0x40 -; CHECK-NEXT: csel x8, x12, x8, ne -; CHECK-NEXT: csel x11, xzr, x12, ne -; CHECK-NEXT: orr x1, x8, x10 +; CHECK-NEXT: lsl x12, x12, x4 +; CHECK-NEXT: lsr x8, x10, x8 ; CHECK-NEXT: orr x0, x11, x9 +; CHECK-NEXT: orr x1, x12, x8 ; CHECK-NEXT: ret %f = call i128 @llvm.fshl.i128(i128 %x, i128 %y, i128 %z) ret i128 %f diff --git a/llvm/test/CodeGen/AArch64/icmp-shift-opt.ll b/llvm/test/CodeGen/AArch64/icmp-shift-opt.ll index 3532881a2223e..f24534d007431 100644 --- a/llvm/test/CodeGen/AArch64/icmp-shift-opt.ll +++ b/llvm/test/CodeGen/AArch64/icmp-shift-opt.ll @@ -139,4 +139,21 @@ define i1 @opt_setcc_expanded_shl_wrong_shifts(i64 %a, i64 %b) nounwind { ret i1 %cmp } +define i1 @opt_setcc_shl_ne_zero_i256(i256 %a) nounwind { +; CHECK-LABEL: opt_setcc_shl_ne_zero_i256: +; CHECK: // %bb.0: +; CHECK-NEXT: extr x8, x3, x2, #47 +; CHECK-NEXT: extr x9, x2, x1, #47 +; CHECK-NEXT: extr x10, x1, x0, #47 +; CHECK-NEXT: orr x9, x9, x0, lsl #17 +; CHECK-NEXT: orr x8, x10, x8 +; CHECK-NEXT: orr x8, x9, x8 +; CHECK-NEXT: cmp x8, #0 +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ret + %shl = shl i256 %a, 17 + %cmp = icmp ne i256 %shl, 0 + ret i1 %cmp +} + declare void @use(i128 %a) diff --git a/llvm/test/CodeGen/AArch64/madd-combiner.ll b/llvm/test/CodeGen/AArch64/madd-combiner.ll index 8a3b5fdcee877..07fbcddb307e8 100644 --- a/llvm/test/CodeGen/AArch64/madd-combiner.ll +++ b/llvm/test/CodeGen/AArch64/madd-combiner.ll @@ -1,20 +1,25 @@ -; RUN: llc -mtriple=aarch64-apple-darwin -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=aarch64-apple-darwin -fast-isel -verify-machineinstrs < %s | FileCheck %s +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=aarch64-apple-darwin -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-ISEL +; RUN: llc -mtriple=aarch64-apple-darwin -fast-isel -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-FAST ; Test that we use the correct register class. define i32 @mul_add_imm(i32 %a, i32 %b) { -; CHECK-LABEL: mul_add_imm -; CHECK: orr [[REG:w[0-9]+]], wzr, #0x4 -; CHECK-NEXT: madd {{w[0-9]+}}, w0, w1, [[REG]] +; CHECK-LABEL: mul_add_imm: +; CHECK: ; %bb.0: +; CHECK-NEXT: orr w8, wzr, #0x4 +; CHECK-NEXT: madd w0, w0, w1, w8 +; CHECK-NEXT: ret %1 = mul i32 %a, %b %2 = add i32 %1, 4 ret i32 %2 } define i32 @mul_sub_imm1(i32 %a, i32 %b) { -; CHECK-LABEL: mul_sub_imm1 -; CHECK: mov [[REG:w[0-9]+]], #4 -; CHECK-NEXT: msub {{w[0-9]+}}, w0, w1, [[REG]] +; CHECK-LABEL: mul_sub_imm1: +; CHECK: ; %bb.0: +; CHECK-NEXT: mov w8, #4 +; CHECK-NEXT: msub w0, w0, w1, w8 +; CHECK-NEXT: ret %1 = mul i32 %a, %b %2 = sub i32 4, %1 ret i32 %2 @@ -22,6 +27,29 @@ define i32 @mul_sub_imm1(i32 %a, i32 %b) { ; bugpoint reduced test case. This only tests that we pass the MI verifier. define void @mul_add_imm2() { +; CHECK-ISEL-LABEL: mul_add_imm2: +; CHECK-ISEL: ; %bb.0: ; %entry +; CHECK-ISEL-NEXT: mov w8, #1 +; CHECK-ISEL-NEXT: LBB2_1: ; %for.body8 +; CHECK-ISEL-NEXT: ; =>This Inner Loop Header: Depth=1 +; CHECK-ISEL-NEXT: cbnz w8, LBB2_1 +; CHECK-ISEL-NEXT: ; %bb.2: ; %for.end20 +; CHECK-ISEL-NEXT: ret +; +; CHECK-FAST-LABEL: mul_add_imm2: +; CHECK-FAST: ; %bb.0: ; %entry +; CHECK-FAST-NEXT: mov x8, #-3 +; CHECK-FAST-NEXT: orr x9, xzr, #0xfffffffffffffffd +; CHECK-FAST-NEXT: madd x8, x8, x8, x9 +; CHECK-FAST-NEXT: mov x9, #45968 +; CHECK-FAST-NEXT: movk x9, #48484, lsl #16 +; CHECK-FAST-NEXT: movk x9, #323, lsl #32 +; CHECK-FAST-NEXT: LBB2_1: ; %for.body8 +; CHECK-FAST-NEXT: ; =>This Inner Loop Header: Depth=1 +; CHECK-FAST-NEXT: cmp x8, x9 +; CHECK-FAST-NEXT: b.lt LBB2_1 +; CHECK-FAST-NEXT: ; %bb.2: ; %for.end20 +; CHECK-FAST-NEXT: ret entry: br label %for.body for.body: @@ -35,3 +63,141 @@ for.end20: ret void } +define i32 @add1_mul_val1(i32 %a, i32 %b) { +; CHECK-ISEL-LABEL: add1_mul_val1: +; CHECK-ISEL: ; %bb.0: +; CHECK-ISEL-NEXT: madd w0, w1, w0, w1 +; CHECK-ISEL-NEXT: ret +; +; CHECK-FAST-LABEL: add1_mul_val1: +; CHECK-FAST: ; %bb.0: +; CHECK-FAST-NEXT: add w8, w0, #1 +; CHECK-FAST-NEXT: mul w0, w8, w1 +; CHECK-FAST-NEXT: ret + %1 = add i32 %a, 1 + %2 = mul i32 %1, %b + ret i32 %2 +} + +define i32 @add1_mul_val2(i32 %a, i32 %b) { +; CHECK-ISEL-LABEL: add1_mul_val2: +; CHECK-ISEL: ; %bb.0: +; CHECK-ISEL-NEXT: madd w0, w0, w1, w0 +; CHECK-ISEL-NEXT: ret +; +; CHECK-FAST-LABEL: add1_mul_val2: +; CHECK-FAST: ; %bb.0: +; CHECK-FAST-NEXT: add w8, w1, #1 +; CHECK-FAST-NEXT: mul w0, w0, w8 +; CHECK-FAST-NEXT: ret + %1 = add i32 %b, 1 + %2 = mul i32 %a, %1 + ret i32 %2 +} + +define i64 @add1_mul_val3(i64 %a, i64 %b) { +; CHECK-ISEL-LABEL: add1_mul_val3: +; CHECK-ISEL: ; %bb.0: +; CHECK-ISEL-NEXT: madd x0, x0, x1, x0 +; CHECK-ISEL-NEXT: ret +; +; CHECK-FAST-LABEL: add1_mul_val3: +; CHECK-FAST: ; %bb.0: +; CHECK-FAST-NEXT: add x8, x1, #1 +; CHECK-FAST-NEXT: mul x0, x0, x8 +; CHECK-FAST-NEXT: ret + %1 = add i64 %b, 1 + %2 = mul i64 %a, %1 + ret i64 %2 +} + +define i64 @add1_mul_val4(i64 %a, i64 %b, i64 %c) { +; CHECK-ISEL-LABEL: add1_mul_val4: +; CHECK-ISEL: ; %bb.0: +; CHECK-ISEL-NEXT: add x8, x0, x2 +; CHECK-ISEL-NEXT: madd x0, x8, x1, x8 +; CHECK-ISEL-NEXT: ret +; +; CHECK-FAST-LABEL: add1_mul_val4: +; CHECK-FAST: ; %bb.0: +; CHECK-FAST-NEXT: add x8, x1, #1 +; CHECK-FAST-NEXT: add x9, x0, x2 +; CHECK-FAST-NEXT: mul x0, x9, x8 +; CHECK-FAST-NEXT: ret + %1 = add i64 %a, %c + %2 = add i64 %b, 1 + %3 = mul i64 %1, %2 + ret i64 %3 +} + +define i32 @sub1_mul_val1(i32 %a, i32 %b) { +; CHECK-ISEL-LABEL: sub1_mul_val1: +; CHECK-ISEL: ; %bb.0: +; CHECK-ISEL-NEXT: msub w0, w1, w0, w1 +; CHECK-ISEL-NEXT: ret +; +; CHECK-FAST-LABEL: sub1_mul_val1: +; CHECK-FAST: ; %bb.0: +; CHECK-FAST-NEXT: mov w8, #1 +; CHECK-FAST-NEXT: sub w8, w8, w0 +; CHECK-FAST-NEXT: mul w0, w8, w1 +; CHECK-FAST-NEXT: ret + %1 = sub i32 1, %a + %2 = mul i32 %1, %b + ret i32 %2 +} + +define i32 @sub1_mul_val2(i32 %a, i32 %b) { +; CHECK-ISEL-LABEL: sub1_mul_val2: +; CHECK-ISEL: ; %bb.0: +; CHECK-ISEL-NEXT: msub w0, w0, w1, w0 +; CHECK-ISEL-NEXT: ret +; +; CHECK-FAST-LABEL: sub1_mul_val2: +; CHECK-FAST: ; %bb.0: +; CHECK-FAST-NEXT: mov w8, #1 +; CHECK-FAST-NEXT: sub w8, w8, w1 +; CHECK-FAST-NEXT: mul w0, w0, w8 +; CHECK-FAST-NEXT: ret + %1 = sub i32 1, %b + %2 = mul i32 %a, %1 + ret i32 %2 +} + +define i64 @sub1_mul_val3(i64 %a, i64 %b) { +; CHECK-ISEL-LABEL: sub1_mul_val3: +; CHECK-ISEL: ; %bb.0: +; CHECK-ISEL-NEXT: msub x0, x0, x1, x0 +; CHECK-ISEL-NEXT: ret +; +; CHECK-FAST-LABEL: sub1_mul_val3: +; CHECK-FAST: ; %bb.0: +; CHECK-FAST-NEXT: mov x8, #1 +; CHECK-FAST-NEXT: sub x8, x8, x1 +; CHECK-FAST-NEXT: mul x0, x0, x8 +; CHECK-FAST-NEXT: ret + %1 = sub i64 1, %b + %2 = mul i64 %a, %1 + ret i64 %2 +} + +define i64 @sub1_mul_val4(i64 %a, i64 %b) { +; CHECK-ISEL-LABEL: sub1_mul_val4: +; CHECK-ISEL: ; %bb.0: +; CHECK-ISEL-NEXT: sub x8, x0, #1 +; CHECK-ISEL-NEXT: msub x0, x8, x1, x8 +; CHECK-ISEL-NEXT: ret +; +; CHECK-FAST-LABEL: sub1_mul_val4: +; CHECK-FAST: ; %bb.0: +; CHECK-FAST-NEXT: mov x8, #1 +; CHECK-FAST-NEXT: sub x9, x0, #1 +; CHECK-FAST-NEXT: sub x8, x8, x1 +; CHECK-FAST-NEXT: mul x0, x9, x8 +; CHECK-FAST-NEXT: ret + %1 = sub i64 %a, 1 + %2 = sub i64 1, %b + %3 = mul i64 %1, %2 + ret i64 %3 +} + diff --git a/llvm/test/CodeGen/AArch64/neon-uaddlv.ll b/llvm/test/CodeGen/AArch64/neon-uaddlv.ll index 3bc55f49f27eb..bfc288109b0c2 100644 --- a/llvm/test/CodeGen/AArch64/neon-uaddlv.ll +++ b/llvm/test/CodeGen/AArch64/neon-uaddlv.ll @@ -17,7 +17,7 @@ define i16 @uaddlv4h_from_v8i8(<8 x i8>* %A) nounwind { ; CHECK-LABEL: uaddlv4h_from_v8i8: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] -; CHECK-NEXT: uaddlv s0, v0.4h +; CHECK-NEXT: uaddlv h0, v0.8b ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret %tmp1 = load <8 x i8>, <8 x i8>* %A diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-fma.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-fma.ll new file mode 100644 index 0000000000000..b226636617969 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-fma.ll @@ -0,0 +1,309 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -O3 -aarch64-sve-vector-bits-min=2048 < %s | FileCheck %s + +target triple = "aarch64-unknown-linux-gnu" + +; +; FMA +; + +; Don't use SVE for 64-bit vectors. +define <4 x half> @fma_v4f16(<4 x half> %op1, <4 x half> %op2, <4 x half> %op3) #0 { +; CHECK-LABEL: fma_v4f16: +; CHECK: // %bb.0: +; CHECK-NEXT: fmla v2.4h, v0.4h, v1.4h +; CHECK-NEXT: fmov d0, d2 +; CHECK-NEXT: ret + %mul = fmul contract <4 x half> %op1, %op2 + %res = fadd contract <4 x half> %mul, %op3 + ret <4 x half> %res +} + +; Don't use SVE for 128-bit vectors. +define <8 x half> @fma_v8f16(<8 x half> %op1, <8 x half> %op2, <8 x half> %op3) #0 { +; CHECK-LABEL: fma_v8f16: +; CHECK: // %bb.0: +; CHECK-NEXT: fmla v2.8h, v0.8h, v1.8h +; CHECK-NEXT: mov v0.16b, v2.16b +; CHECK-NEXT: ret + %mul = fmul contract <8 x half> %op1, %op2 + %res = fadd contract <8 x half> %mul, %op3 + ret <8 x half> %res +} + +define void @fma_v16f16(<16 x half>* %a, <16 x half>* %b, <16 x half>* %c) #0 { +; CHECK-LABEL: fma_v16f16: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.h, vl16 +; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] +; CHECK-NEXT: ld1h { z1.h }, p0/z, [x1] +; CHECK-NEXT: ld1h { z2.h }, p0/z, [x2] +; CHECK-NEXT: fmad z0.h, p0/m, z1.h, z2.h +; CHECK-NEXT: st1h { z0.h }, p0, [x0] +; CHECK-NEXT: ret + %op1 = load <16 x half>, <16 x half>* %a + %op2 = load <16 x half>, <16 x half>* %b + %op3 = load <16 x half>, <16 x half>* %c + %mul = fmul contract <16 x half> %op1, %op2 + %res = fadd contract <16 x half> %mul, %op3 + store <16 x half> %res, <16 x half>* %a + ret void +} + +define void @fma_v32f16(<32 x half>* %a, <32 x half>* %b, <32 x half>* %c) #0 { +; CHECK-LABEL: fma_v32f16: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.h, vl32 +; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] +; CHECK-NEXT: ld1h { z1.h }, p0/z, [x1] +; CHECK-NEXT: ld1h { z2.h }, p0/z, [x2] +; CHECK-NEXT: fmad z0.h, p0/m, z1.h, z2.h +; CHECK-NEXT: st1h { z0.h }, p0, [x0] +; CHECK-NEXT: ret + %op1 = load <32 x half>, <32 x half>* %a + %op2 = load <32 x half>, <32 x half>* %b + %op3 = load <32 x half>, <32 x half>* %c + %mul = fmul contract <32 x half> %op1, %op2 + %res = fadd contract <32 x half> %mul, %op3 + store <32 x half> %res, <32 x half>* %a + ret void +} + +define void @fma_v64f16(<64 x half>* %a, <64 x half>* %b, <64 x half>* %c) #0 { +; CHECK-LABEL: fma_v64f16: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.h, vl64 +; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] +; CHECK-NEXT: ld1h { z1.h }, p0/z, [x1] +; CHECK-NEXT: ld1h { z2.h }, p0/z, [x2] +; CHECK-NEXT: fmad z0.h, p0/m, z1.h, z2.h +; CHECK-NEXT: st1h { z0.h }, p0, [x0] +; CHECK-NEXT: ret + %op1 = load <64 x half>, <64 x half>* %a + %op2 = load <64 x half>, <64 x half>* %b + %op3 = load <64 x half>, <64 x half>* %c + %mul = fmul contract <64 x half> %op1, %op2 + %res = fadd contract <64 x half> %mul, %op3 + store <64 x half> %res, <64 x half>* %a + ret void +} + +define void @fma_v128f16(<128 x half>* %a, <128 x half>* %b, <128 x half>* %c) #0 { +; CHECK-LABEL: fma_v128f16: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.h, vl128 +; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] +; CHECK-NEXT: ld1h { z1.h }, p0/z, [x1] +; CHECK-NEXT: ld1h { z2.h }, p0/z, [x2] +; CHECK-NEXT: fmad z0.h, p0/m, z1.h, z2.h +; CHECK-NEXT: st1h { z0.h }, p0, [x0] +; CHECK-NEXT: ret + %op1 = load <128 x half>, <128 x half>* %a + %op2 = load <128 x half>, <128 x half>* %b + %op3 = load <128 x half>, <128 x half>* %c + %mul = fmul contract <128 x half> %op1, %op2 + %res = fadd contract <128 x half> %mul, %op3 + store <128 x half> %res, <128 x half>* %a + ret void +} + +; Don't use SVE for 64-bit vectors. +define <2 x float> @fma_v2f32(<2 x float> %op1, <2 x float> %op2, <2 x float> %op3) #0 { +; CHECK-LABEL: fma_v2f32: +; CHECK: // %bb.0: +; CHECK-NEXT: fmla v2.2s, v0.2s, v1.2s +; CHECK-NEXT: fmov d0, d2 +; CHECK-NEXT: ret + %mul = fmul contract <2 x float> %op1, %op2 + %res = fadd contract <2 x float> %mul, %op3 + ret <2 x float> %res +} + +; Don't use SVE for 128-bit vectors. +define <4 x float> @fma_v4f32(<4 x float> %op1, <4 x float> %op2, <4 x float> %op3) #0 { +; CHECK-LABEL: fma_v4f32: +; CHECK: // %bb.0: +; CHECK-NEXT: fmla v2.4s, v0.4s, v1.4s +; CHECK-NEXT: mov v0.16b, v2.16b +; CHECK-NEXT: ret + %mul = fmul contract <4 x float> %op1, %op2 + %res = fadd contract <4 x float> %mul, %op3 + ret <4 x float> %res +} + +define void @fma_v8f32(<8 x float>* %a, <8 x float>* %b, <8 x float>* %c) #0 { +; CHECK-LABEL: fma_v8f32: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s, vl8 +; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] +; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1] +; CHECK-NEXT: ld1w { z2.s }, p0/z, [x2] +; CHECK-NEXT: fmad z0.s, p0/m, z1.s, z2.s +; CHECK-NEXT: st1w { z0.s }, p0, [x0] +; CHECK-NEXT: ret + %op1 = load <8 x float>, <8 x float>* %a + %op2 = load <8 x float>, <8 x float>* %b + %op3 = load <8 x float>, <8 x float>* %c + %mul = fmul contract <8 x float> %op1, %op2 + %res = fadd contract <8 x float> %mul, %op3 + store <8 x float> %res, <8 x float>* %a + ret void +} + +define void @fma_v16f32(<16 x float>* %a, <16 x float>* %b, <16 x float>* %c) #0 { +; CHECK-LABEL: fma_v16f32: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s, vl16 +; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] +; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1] +; CHECK-NEXT: ld1w { z2.s }, p0/z, [x2] +; CHECK-NEXT: fmad z0.s, p0/m, z1.s, z2.s +; CHECK-NEXT: st1w { z0.s }, p0, [x0] +; CHECK-NEXT: ret + %op1 = load <16 x float>, <16 x float>* %a + %op2 = load <16 x float>, <16 x float>* %b + %op3 = load <16 x float>, <16 x float>* %c + %mul = fmul contract <16 x float> %op1, %op2 + %res = fadd contract <16 x float> %mul, %op3 + store <16 x float> %res, <16 x float>* %a + ret void +} + +define void @fma_v32f32(<32 x float>* %a, <32 x float>* %b, <32 x float>* %c) #0 { +; CHECK-LABEL: fma_v32f32: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s, vl32 +; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] +; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1] +; CHECK-NEXT: ld1w { z2.s }, p0/z, [x2] +; CHECK-NEXT: fmad z0.s, p0/m, z1.s, z2.s +; CHECK-NEXT: st1w { z0.s }, p0, [x0] +; CHECK-NEXT: ret + %op1 = load <32 x float>, <32 x float>* %a + %op2 = load <32 x float>, <32 x float>* %b + %op3 = load <32 x float>, <32 x float>* %c + %mul = fmul contract <32 x float> %op1, %op2 + %res = fadd contract <32 x float> %mul, %op3 + store <32 x float> %res, <32 x float>* %a + ret void +} + +define void @fma_v64f32(<64 x float>* %a, <64 x float>* %b, <64 x float>* %c) #0 { +; CHECK-LABEL: fma_v64f32: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s, vl64 +; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] +; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1] +; CHECK-NEXT: ld1w { z2.s }, p0/z, [x2] +; CHECK-NEXT: fmad z0.s, p0/m, z1.s, z2.s +; CHECK-NEXT: st1w { z0.s }, p0, [x0] +; CHECK-NEXT: ret + %op1 = load <64 x float>, <64 x float>* %a + %op2 = load <64 x float>, <64 x float>* %b + %op3 = load <64 x float>, <64 x float>* %c + %mul = fmul contract <64 x float> %op1, %op2 + %res = fadd contract <64 x float> %mul, %op3 + store <64 x float> %res, <64 x float>* %a + ret void +} + +; Don't use SVE for 64-bit vectors. +define <1 x double> @fma_v1f64(<1 x double> %op1, <1 x double> %op2, <1 x double> %op3) #0 { +; CHECK-LABEL: fma_v1f64: +; CHECK: // %bb.0: +; CHECK-NEXT: fmadd d0, d0, d1, d2 +; CHECK-NEXT: ret + %mul = fmul contract <1 x double> %op1, %op2 + %res = fadd contract <1 x double> %mul, %op3 + ret <1 x double> %res +} + +; Don't use SVE for 128-bit vectors. +define <2 x double> @fma_v2f64(<2 x double> %op1, <2 x double> %op2, <2 x double> %op3) #0 { +; CHECK-LABEL: fma_v2f64: +; CHECK: // %bb.0: +; CHECK-NEXT: fmla v2.2d, v0.2d, v1.2d +; CHECK-NEXT: mov v0.16b, v2.16b +; CHECK-NEXT: ret + %mul = fmul contract <2 x double> %op1, %op2 + %res = fadd contract <2 x double> %mul, %op3 + ret <2 x double> %res +} + +define void @fma_v4f64(<4 x double>* %a, <4 x double>* %b, <4 x double>* %c) #0 { +; CHECK-LABEL: fma_v4f64: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d, vl4 +; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0] +; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1] +; CHECK-NEXT: ld1d { z2.d }, p0/z, [x2] +; CHECK-NEXT: fmad z0.d, p0/m, z1.d, z2.d +; CHECK-NEXT: st1d { z0.d }, p0, [x0] +; CHECK-NEXT: ret + %op1 = load <4 x double>, <4 x double>* %a + %op2 = load <4 x double>, <4 x double>* %b + %op3 = load <4 x double>, <4 x double>* %c + %mul = fmul contract <4 x double> %op1, %op2 + %res = fadd contract <4 x double> %mul, %op3 + store <4 x double> %res, <4 x double>* %a + ret void +} + +define void @fma_v8f64(<8 x double>* %a, <8 x double>* %b, <8 x double>* %c) #0 { +; CHECK-LABEL: fma_v8f64: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d, vl8 +; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0] +; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1] +; CHECK-NEXT: ld1d { z2.d }, p0/z, [x2] +; CHECK-NEXT: fmad z0.d, p0/m, z1.d, z2.d +; CHECK-NEXT: st1d { z0.d }, p0, [x0] +; CHECK-NEXT: ret + %op1 = load <8 x double>, <8 x double>* %a + %op2 = load <8 x double>, <8 x double>* %b + %op3 = load <8 x double>, <8 x double>* %c + %mul = fmul contract <8 x double> %op1, %op2 + %res = fadd contract <8 x double> %mul, %op3 + store <8 x double> %res, <8 x double>* %a + ret void +} + +define void @fma_v16f64(<16 x double>* %a, <16 x double>* %b, <16 x double>* %c) #0 { +; CHECK-LABEL: fma_v16f64: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d, vl16 +; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0] +; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1] +; CHECK-NEXT: ld1d { z2.d }, p0/z, [x2] +; CHECK-NEXT: fmad z0.d, p0/m, z1.d, z2.d +; CHECK-NEXT: st1d { z0.d }, p0, [x0] +; CHECK-NEXT: ret + %op1 = load <16 x double>, <16 x double>* %a + %op2 = load <16 x double>, <16 x double>* %b + %op3 = load <16 x double>, <16 x double>* %c + %mul = fmul contract <16 x double> %op1, %op2 + %res = fadd contract <16 x double> %mul, %op3 + store <16 x double> %res, <16 x double>* %a + ret void +} + +define void @fma_v32f64(<32 x double>* %a, <32 x double>* %b, <32 x double>* %c) #0 { +; CHECK-LABEL: fma_v32f64: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d, vl32 +; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0] +; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1] +; CHECK-NEXT: ld1d { z2.d }, p0/z, [x2] +; CHECK-NEXT: fmad z0.d, p0/m, z1.d, z2.d +; CHECK-NEXT: st1d { z0.d }, p0, [x0] +; CHECK-NEXT: ret + %op1 = load <32 x double>, <32 x double>* %a + %op2 = load <32 x double>, <32 x double>* %b + %op3 = load <32 x double>, <32 x double>* %c + %mul = fmul contract <32 x double> %op1, %op2 + %res = fadd contract <32 x double> %mul, %op3 + store <32 x double> %res, <32 x double>* %a + ret void +} + +attributes #0 = { "target-features"="+sve" } diff --git a/llvm/test/CodeGen/AArch64/sve-insert-vector.ll b/llvm/test/CodeGen/AArch64/sve-insert-vector.ll index 68e34dcd2940f..b2ce25b7eb1b0 100644 --- a/llvm/test/CodeGen/AArch64/sve-insert-vector.ll +++ b/llvm/test/CodeGen/AArch64/sve-insert-vector.ll @@ -404,8 +404,7 @@ declare @llvm.experimental.vector.insert.nxv8f16.nxv2f16( @insert_nxv3i32_nxv2i32( %sv0) { ; CHECK-LABEL: insert_nxv3i32_nxv2i32: ; CHECK: // %bb.0: -; CHECK-NEXT: uunpkhi z1.d, z0.s -; CHECK-NEXT: uzp1 z0.s, z0.s, z1.s +; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s ; CHECK-NEXT: ret %v0 = call @llvm.experimental.vector.insert.nxv3i32.nxv2i32( undef, %sv0, i64 0) ret %v0 @@ -425,14 +424,7 @@ define @insert_nxv3i32_nxv2i32_2( %sv0, @insert_nxv3f32_nxv2f32( %sv0) nounwind { ; CHECK-LABEL: insert_nxv3f32_nxv2f32: ; CHECK: // %bb.0: -; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill -; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: st1w { z0.d }, p0, [sp] -; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: ld1w { z0.s }, p0/z, [sp] -; CHECK-NEXT: addvl sp, sp, #1 -; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s ; CHECK-NEXT: ret %v0 = call @llvm.experimental.vector.insert.nxv3f32.nxv2f32( undef, %sv0, i64 0) ret %v0 @@ -443,13 +435,9 @@ define @insert_nxv6i32_nxv2i32( %sv0, @cmp_sel_0_or_1_vec(<4 x i32> %x, <4 x i32> %y) { ret <4 x i32> %add } +define <16 x i8> @signbit_mask_v16i8(<16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: signbit_mask_v16i8: +; CHECK: // %bb.0: +; CHECK-NEXT: sshr v0.16b, v0.16b, #7 +; CHECK-NEXT: and v0.16b, v0.16b, v1.16b +; CHECK-NEXT: ret + %cond = icmp slt <16 x i8> %a, zeroinitializer + %r = select <16 x i1> %cond, <16 x i8> %b, <16 x i8> zeroinitializer + ret <16 x i8> %r +} + +define <8 x i16> @signbit_mask_v8i16(<8 x i16> %a, <8 x i16> %b) { +; CHECK-LABEL: signbit_mask_v8i16: +; CHECK: // %bb.0: +; CHECK-NEXT: sshr v0.8h, v0.8h, #15 +; CHECK-NEXT: and v0.16b, v0.16b, v1.16b +; CHECK-NEXT: ret + %cond = icmp slt <8 x i16> %a, zeroinitializer + %r = select <8 x i1> %cond, <8 x i16> %b, <8 x i16> zeroinitializer + ret <8 x i16> %r +} + +define <4 x i32> @signbit_mask_v4i32(<4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: signbit_mask_v4i32: +; CHECK: // %bb.0: +; CHECK-NEXT: sshr v0.4s, v0.4s, #31 +; CHECK-NEXT: and v0.16b, v0.16b, v1.16b +; CHECK-NEXT: ret + %cond = icmp slt <4 x i32> %a, zeroinitializer + %r = select <4 x i1> %cond, <4 x i32> %b, <4 x i32> zeroinitializer + ret <4 x i32> %r +} + +define <2 x i64> @signbit_mask_v2i64(<2 x i64> %a, <2 x i64> %b) { +; CHECK-LABEL: signbit_mask_v2i64: +; CHECK: // %bb.0: +; CHECK-NEXT: sshr v0.2d, v0.2d, #63 +; CHECK-NEXT: and v0.16b, v0.16b, v1.16b +; CHECK-NEXT: ret + %cond = icmp slt <2 x i64> %a, zeroinitializer + %r = select <2 x i1> %cond, <2 x i64> %b, <2 x i64> zeroinitializer + ret <2 x i64> %r +} + +define <16 x i8> @signbit_setmask_v16i8(<16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: signbit_setmask_v16i8: +; CHECK: // %bb.0: +; CHECK-NEXT: sshr v0.16b, v0.16b, #7 +; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b +; CHECK-NEXT: ret + %cond = icmp slt <16 x i8> %a, zeroinitializer + %r = select <16 x i1> %cond, <16 x i8> , <16 x i8> %b + ret <16 x i8> %r +} + +define <8 x i16> @signbit_setmask_v8i16(<8 x i16> %a, <8 x i16> %b) { +; CHECK-LABEL: signbit_setmask_v8i16: +; CHECK: // %bb.0: +; CHECK-NEXT: sshr v0.8h, v0.8h, #15 +; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b +; CHECK-NEXT: ret + %cond = icmp slt <8 x i16> %a, zeroinitializer + %r = select <8 x i1> %cond, <8 x i16> , <8 x i16> %b + ret <8 x i16> %r +} + +define <4 x i32> @signbit_setmask_v4i32(<4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: signbit_setmask_v4i32: +; CHECK: // %bb.0: +; CHECK-NEXT: sshr v0.4s, v0.4s, #31 +; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b +; CHECK-NEXT: ret + %cond = icmp slt <4 x i32> %a, zeroinitializer + %r = select <4 x i1> %cond, <4 x i32> , <4 x i32> %b + ret <4 x i32> %r +} + +define <2 x i64> @signbit_setmask_v2i64(<2 x i64> %a, <2 x i64> %b) { +; CHECK-LABEL: signbit_setmask_v2i64: +; CHECK: // %bb.0: +; CHECK-NEXT: sshr v0.2d, v0.2d, #63 +; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b +; CHECK-NEXT: ret + %cond = icmp slt <2 x i64> %a, zeroinitializer + %r = select <2 x i1> %cond, <2 x i64> , <2 x i64> %b + ret <2 x i64> %r +} + +define <16 x i8> @not_signbit_mask_v16i8(<16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: not_signbit_mask_v16i8: +; CHECK: // %bb.0: +; CHECK-NEXT: movi v2.2d, #0xffffffffffffffff +; CHECK-NEXT: cmgt v0.16b, v0.16b, v2.16b +; CHECK-NEXT: and v0.16b, v1.16b, v0.16b +; CHECK-NEXT: ret + %cond = icmp sgt <16 x i8> %a, + %r = select <16 x i1> %cond, <16 x i8> %b, <16 x i8> zeroinitializer + ret <16 x i8> %r +} + +define <8 x i16> @not_signbit_mask_v8i16(<8 x i16> %a, <8 x i16> %b) { +; CHECK-LABEL: not_signbit_mask_v8i16: +; CHECK: // %bb.0: +; CHECK-NEXT: movi v2.2d, #0xffffffffffffffff +; CHECK-NEXT: cmgt v0.8h, v0.8h, v2.8h +; CHECK-NEXT: and v0.16b, v1.16b, v0.16b +; CHECK-NEXT: ret + %cond = icmp sgt <8 x i16> %a, + %r = select <8 x i1> %cond, <8 x i16> %b, <8 x i16> zeroinitializer + ret <8 x i16> %r +} + +define <4 x i32> @not_signbit_mask_v4i32(<4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: not_signbit_mask_v4i32: +; CHECK: // %bb.0: +; CHECK-NEXT: movi v2.2d, #0xffffffffffffffff +; CHECK-NEXT: cmgt v0.4s, v0.4s, v2.4s +; CHECK-NEXT: and v0.16b, v1.16b, v0.16b +; CHECK-NEXT: ret + %cond = icmp sgt <4 x i32> %a, + %r = select <4 x i1> %cond, <4 x i32> %b, <4 x i32> zeroinitializer + ret <4 x i32> %r +} + +define <2 x i64> @not_signbit_mask_v2i64(<2 x i64> %a, <2 x i64> %b) { +; CHECK-LABEL: not_signbit_mask_v2i64: +; CHECK: // %bb.0: +; CHECK-NEXT: movi v2.2d, #0xffffffffffffffff +; CHECK-NEXT: cmgt v0.2d, v0.2d, v2.2d +; CHECK-NEXT: and v0.16b, v1.16b, v0.16b +; CHECK-NEXT: ret + %cond = icmp sgt <2 x i64> %a, + %r = select <2 x i1> %cond, <2 x i64> %b, <2 x i64> zeroinitializer + ret <2 x i64> %r +} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-non-fixed.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-non-fixed.ll index e097fcb90ffbd..67cb7683fbf5b 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-non-fixed.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-non-fixed.ll @@ -18,10 +18,10 @@ define amdgpu_gfx void @test_gfx_call_external_void_func_void() #0 { ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_gfx_void_func_void ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY1]](<4 x s32>) - ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_gfx_void_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_gfx_void_func_void, csr_amdgpu_si_gfx, implicit $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY2]] + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gfx_ccr_sgpr_64 = COPY [[COPY]] + ; CHECK-NEXT: S_SETPC_B64_return_gfx [[COPY2]] call amdgpu_gfx void @external_gfx_void_func_void() ret void } @@ -39,10 +39,10 @@ define amdgpu_gfx void @test_gfx_call_external_void_func_i32_imm(i32) #0 { ; CHECK-NEXT: $vgpr0 = COPY [[C]](s32) ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY2]](<4 x s32>) - ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_gfx_void_func_i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_gfx_void_func_i32, csr_amdgpu_si_gfx, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY3]] + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gfx_ccr_sgpr_64 = COPY [[COPY1]] + ; CHECK-NEXT: S_SETPC_B64_return_gfx [[COPY3]] call amdgpu_gfx void @external_gfx_void_func_i32(i32 42) ret void } @@ -60,10 +60,10 @@ define amdgpu_gfx void @test_gfx_call_external_void_func_i32_imm_inreg(i32 inreg ; CHECK-NEXT: $sgpr4 = COPY [[C]](s32) ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY2]](<4 x s32>) - ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_gfx_void_func_i32_inreg, csr_amdgpu_highregs, implicit $sgpr4, implicit $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_gfx_void_func_i32_inreg, csr_amdgpu_si_gfx, implicit $sgpr4, implicit $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY3]] + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gfx_ccr_sgpr_64 = COPY [[COPY1]] + ; CHECK-NEXT: S_SETPC_B64_return_gfx [[COPY3]] call amdgpu_gfx void @external_gfx_void_func_i32_inreg(i32 inreg 42) ret void } @@ -88,10 +88,10 @@ define amdgpu_gfx void @test_gfx_call_external_void_func_struct_i8_i32() #0 { ; CHECK-NEXT: $vgpr1 = COPY [[LOAD2]](s32) ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY1]](<4 x s32>) - ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_gfx_void_func_struct_i8_i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_gfx_void_func_struct_i8_i32, csr_amdgpu_si_gfx, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY2]] + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gfx_ccr_sgpr_64 = COPY [[COPY]] + ; CHECK-NEXT: S_SETPC_B64_return_gfx [[COPY2]] %ptr0 = load { i8, i32 } addrspace(1)*, { i8, i32 } addrspace(1)* addrspace(4)* undef %val = load { i8, i32 }, { i8, i32 } addrspace(1)* %ptr0 call amdgpu_gfx void @external_gfx_void_func_struct_i8_i32({ i8, i32 } %val) @@ -118,10 +118,10 @@ define amdgpu_gfx void @test_gfx_call_external_void_func_struct_i8_i32_inreg() # ; CHECK-NEXT: $sgpr5 = COPY [[LOAD2]](s32) ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY1]](<4 x s32>) - ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_gfx_void_func_struct_i8_i32_inreg, csr_amdgpu_highregs, implicit $sgpr4, implicit $sgpr5, implicit $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_gfx_void_func_struct_i8_i32_inreg, csr_amdgpu_si_gfx, implicit $sgpr4, implicit $sgpr5, implicit $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY2]] + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gfx_ccr_sgpr_64 = COPY [[COPY]] + ; CHECK-NEXT: S_SETPC_B64_return_gfx [[COPY2]] %ptr0 = load { i8, i32 } addrspace(1)*, { i8, i32 } addrspace(1)* addrspace(4)* undef %val = load { i8, i32 }, { i8, i32 } addrspace(1)* %ptr0 call amdgpu_gfx void @external_gfx_void_func_struct_i8_i32_inreg({ i8, i32 } inreg %val) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll index c70a348a0cb8e..55449b9cca495 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll @@ -141,12 +141,12 @@ define amdgpu_gfx void @test_gfx_call_external_i32_func_i32_imm(i32 addrspace(1) ; GCN-NEXT: $vgpr0 = COPY [[C]](s32) ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY3]](<4 x s32>) - ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_gfx_i32_func_i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0 + ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_gfx_i32_func_i32, csr_amdgpu_si_gfx, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0 ; GCN-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GCN-NEXT: G_STORE [[COPY4]](s32), [[MV]](p1) :: (volatile store (s32) into %ir.out, addrspace 1) - ; GCN-NEXT: [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] - ; GCN-NEXT: S_SETPC_B64_return [[COPY5]] + ; GCN-NEXT: [[COPY5:%[0-9]+]]:gfx_ccr_sgpr_64 = COPY [[COPY2]] + ; GCN-NEXT: S_SETPC_B64_return_gfx [[COPY5]] %val = call amdgpu_gfx i32 @external_gfx_i32_func_i32(i32 42) store volatile i32 %val, i32 addrspace(1)* %out ret void @@ -219,13 +219,13 @@ define amdgpu_gfx void @test_gfx_call_external_i1_func_void() #0 { ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_gfx_i1_func_void ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY1]](<4 x s32>) - ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_gfx_i1_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0 + ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_gfx_i1_func_void, csr_amdgpu_si_gfx, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0 ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[COPY2]](s32) ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GCN-NEXT: G_STORE [[TRUNC]](s1), [[DEF]](p1) :: (volatile store (s1) into `i1 addrspace(1)* undef`, addrspace 1) - ; GCN-NEXT: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; GCN-NEXT: S_SETPC_B64_return [[COPY3]] + ; GCN-NEXT: [[COPY3:%[0-9]+]]:gfx_ccr_sgpr_64 = COPY [[COPY]] + ; GCN-NEXT: S_SETPC_B64_return_gfx [[COPY3]] %val = call amdgpu_gfx i1 @external_gfx_i1_func_void() store volatile i1 %val, i1 addrspace(1)* undef ret void @@ -415,14 +415,14 @@ define amdgpu_gfx void @test_gfx_call_external_i8_func_void() #0 { ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_gfx_i8_func_void ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY1]](<4 x s32>) - ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_gfx_i8_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0 + ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_gfx_i8_func_void, csr_amdgpu_si_gfx, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0 ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) ; GCN-NEXT: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC]](s16) ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GCN-NEXT: G_STORE [[TRUNC1]](s8), [[DEF]](p1) :: (volatile store (s8) into `i8 addrspace(1)* undef`, addrspace 1) - ; GCN-NEXT: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; GCN-NEXT: S_SETPC_B64_return [[COPY3]] + ; GCN-NEXT: [[COPY3:%[0-9]+]]:gfx_ccr_sgpr_64 = COPY [[COPY]] + ; GCN-NEXT: S_SETPC_B64_return_gfx [[COPY3]] %val = call amdgpu_gfx i8 @external_gfx_i8_func_void() store volatile i8 %val, i8 addrspace(1)* undef ret void @@ -784,12 +784,12 @@ define amdgpu_gfx void @test_gfx_call_external_i32_func_void() #0 { ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_gfx_i32_func_void ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY1]](<4 x s32>) - ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_gfx_i32_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0 + ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_gfx_i32_func_void, csr_amdgpu_si_gfx, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0 ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GCN-NEXT: G_STORE [[COPY2]](s32), [[DEF]](p1) :: (volatile store (s32) into `i32 addrspace(1)* undef`, addrspace 1) - ; GCN-NEXT: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; GCN-NEXT: S_SETPC_B64_return [[COPY3]] + ; GCN-NEXT: [[COPY3:%[0-9]+]]:gfx_ccr_sgpr_64 = COPY [[COPY]] + ; GCN-NEXT: S_SETPC_B64_return_gfx [[COPY3]] %val = call amdgpu_gfx i32 @external_gfx_i32_func_void() store volatile i32 %val, i32 addrspace(1)* undef ret void @@ -2480,7 +2480,7 @@ define amdgpu_gfx void @test_gfx_call_external_i32_i64_func_void() #0 { ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_gfx_i32_i64_func_void ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY2]](<4 x s32>) - ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_gfx_i32_i64_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2 + ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_gfx_i32_i64_func_void, csr_amdgpu_si_gfx, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2 ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GCN-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr2 @@ -2488,8 +2488,8 @@ define amdgpu_gfx void @test_gfx_call_external_i32_i64_func_void() #0 { ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GCN-NEXT: G_STORE [[COPY3]](s32), [[DEF]](p1) :: (volatile store (s32) into `i32 addrspace(1)* undef`, addrspace 1) ; GCN-NEXT: G_STORE [[MV]](s64), [[COPY1]](p1) :: (volatile store (s64) into `i64 addrspace(1)* undef`, addrspace 1) - ; GCN-NEXT: [[COPY6:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; GCN-NEXT: S_SETPC_B64_return [[COPY6]] + ; GCN-NEXT: [[COPY6:%[0-9]+]]:gfx_ccr_sgpr_64 = COPY [[COPY]] + ; GCN-NEXT: S_SETPC_B64_return_gfx [[COPY6]] %val = call amdgpu_gfx { i32, i64 } @external_gfx_i32_i64_func_void() %val.0 = extractvalue { i32, i64 } %val, 0 %val.1 = extractvalue { i32, i64 } %val, 1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll index bd068cd3b45e9..0071ca9275e7c 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll @@ -156,10 +156,10 @@ define amdgpu_gfx void @test_gfx_call_external_void_func_void() #0 { ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_gfx_void_func_void ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY1]](<4 x s32>) - ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_gfx_void_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_gfx_void_func_void, csr_amdgpu_si_gfx, implicit $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY2]] + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gfx_ccr_sgpr_64 = COPY [[COPY]] + ; CHECK-NEXT: S_SETPC_B64_return_gfx [[COPY2]] call amdgpu_gfx void @external_gfx_void_func_void() ret void } @@ -899,10 +899,10 @@ define amdgpu_gfx void @test_gfx_call_external_void_func_i32_imm(i32) #0 { ; CHECK-NEXT: $vgpr0 = COPY [[C]](s32) ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY2]](<4 x s32>) - ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_gfx_void_func_i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_gfx_void_func_i32, csr_amdgpu_si_gfx, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY3]] + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gfx_ccr_sgpr_64 = COPY [[COPY1]] + ; CHECK-NEXT: S_SETPC_B64_return_gfx [[COPY3]] call amdgpu_gfx void @external_gfx_void_func_i32(i32 42) ret void } @@ -920,10 +920,10 @@ define amdgpu_gfx void @test_gfx_call_external_void_func_i32_imm_inreg(i32 inreg ; CHECK-NEXT: $sgpr4 = COPY [[C]](s32) ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY2]](<4 x s32>) - ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_gfx_void_func_i32_inreg, csr_amdgpu_highregs, implicit $sgpr4, implicit $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_gfx_void_func_i32_inreg, csr_amdgpu_si_gfx, implicit $sgpr4, implicit $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY3]] + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gfx_ccr_sgpr_64 = COPY [[COPY1]] + ; CHECK-NEXT: S_SETPC_B64_return_gfx [[COPY3]] call amdgpu_gfx void @external_gfx_void_func_i32_inreg(i32 inreg 42) ret void } @@ -3893,10 +3893,10 @@ define amdgpu_gfx void @test_gfx_call_external_void_func_struct_i8_i32() #0 { ; CHECK-NEXT: $vgpr1 = COPY [[LOAD2]](s32) ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY1]](<4 x s32>) - ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_gfx_void_func_struct_i8_i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_gfx_void_func_struct_i8_i32, csr_amdgpu_si_gfx, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY2]] + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gfx_ccr_sgpr_64 = COPY [[COPY]] + ; CHECK-NEXT: S_SETPC_B64_return_gfx [[COPY2]] %ptr0 = load { i8, i32 } addrspace(1)*, { i8, i32 } addrspace(1)* addrspace(4)* undef %val = load { i8, i32 }, { i8, i32 } addrspace(1)* %ptr0 call amdgpu_gfx void @external_gfx_void_func_struct_i8_i32({ i8, i32 } %val) @@ -3923,10 +3923,10 @@ define amdgpu_gfx void @test_gfx_call_external_void_func_struct_i8_i32_inreg() # ; CHECK-NEXT: $sgpr5 = COPY [[LOAD2]](s32) ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY1]](<4 x s32>) - ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_gfx_void_func_struct_i8_i32_inreg, csr_amdgpu_highregs, implicit $sgpr4, implicit $sgpr5, implicit $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_gfx_void_func_struct_i8_i32_inreg, csr_amdgpu_si_gfx, implicit $sgpr4, implicit $sgpr5, implicit $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY2]] + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gfx_ccr_sgpr_64 = COPY [[COPY]] + ; CHECK-NEXT: S_SETPC_B64_return_gfx [[COPY2]] %ptr0 = load { i8, i32 } addrspace(1)*, { i8, i32 } addrspace(1)* addrspace(4)* undef %val = load { i8, i32 }, { i8, i32 } addrspace(1)* %ptr0 call amdgpu_gfx void @external_gfx_void_func_struct_i8_i32_inreg({ i8, i32 } inreg %val) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-indirect-call.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-indirect-call.ll index ee15b1a0249f9..e635c5b3649a3 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-indirect-call.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-indirect-call.ll @@ -66,10 +66,10 @@ define amdgpu_gfx void @test_gfx_indirect_call_sgpr_ptr(void()* %fptr) { ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY3]](<4 x s32>) - ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[MV]](p0), 0, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[MV]](p0), 0, csr_amdgpu_si_gfx, implicit $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY4]] + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:gfx_ccr_sgpr_64 = COPY [[COPY2]] + ; CHECK-NEXT: S_SETPC_B64_return_gfx [[COPY4]] call amdgpu_gfx void %fptr() ret void } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.fmul.legacy.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.fmul.legacy.ll index 1b2409e2ba73a..f5d7abd1e36bb 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.fmul.legacy.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.fmul.legacy.ll @@ -272,7 +272,7 @@ define float @v_mad_legacy_f32(float %a, float %b, float %c) #2 { ; GFX6-LABEL: v_mad_legacy_f32: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX6-NEXT: v_mac_legacy_f32_e64 v2, v0, v1 +; GFX6-NEXT: v_mac_legacy_f32_e32 v2, v0, v1 ; GFX6-NEXT: v_mov_b32_e32 v0, v2 ; GFX6-NEXT: s_setpc_b64 s[30:31] ; @@ -292,7 +292,7 @@ define float @v_mad_legacy_f32(float %a, float %b, float %c) #2 { ; GFX101: ; %bb.0: ; GFX101-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX101-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX101-NEXT: v_mac_legacy_f32_e64 v2, v0, v1 +; GFX101-NEXT: v_mac_legacy_f32_e32 v2, v0, v1 ; GFX101-NEXT: v_mov_b32_e32 v0, v2 ; GFX101-NEXT: s_setpc_b64 s[30:31] ; diff --git a/llvm/test/CodeGen/AMDGPU/agpr-to-agpr-copy.mir b/llvm/test/CodeGen/AMDGPU/agpr-to-agpr-copy.mir index c61f92d55d31c..a459ead8e674f 100644 --- a/llvm/test/CodeGen/AMDGPU/agpr-to-agpr-copy.mir +++ b/llvm/test/CodeGen/AMDGPU/agpr-to-agpr-copy.mir @@ -10,20 +10,21 @@ body: | liveins: $sgpr0_sgpr1 ; GFX908-LABEL: name: test_mfma_f32_4x4x1f32_propagate_vgpr ; GFX908: liveins: $sgpr0_sgpr1 - ; GFX908: [[COPY:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr0_sgpr1 - ; GFX908: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 36, 0 :: (dereferenceable invariant load (s64), align 4, addrspace 4) - ; GFX908: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX908: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1123418112, implicit $exec - ; GFX908: undef %4.sub0:areg_128 = V_ACCVGPR_WRITE_B32_e64 [[V_MOV_B32_e32_1]], implicit $exec - ; GFX908: %4.sub1:areg_128 = COPY [[V_MOV_B32_e32_1]] - ; GFX908: %4.sub2:areg_128 = COPY [[V_MOV_B32_e32_1]] - ; GFX908: %4.sub3:areg_128 = COPY [[V_MOV_B32_e32_1]] - ; GFX908: [[V_MOV_B32_e32_2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1073741824, implicit $exec - ; GFX908: [[V_MOV_B32_e32_3:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1065353216, implicit $exec - ; GFX908: [[V_MFMA_F32_4X4X1F32_e64_:%[0-9]+]]:areg_128 = V_MFMA_F32_4X4X1F32_e64 [[V_MOV_B32_e32_3]], [[V_MOV_B32_e32_2]], %4, 0, 0, 0, implicit $mode, implicit $exec - ; GFX908: [[COPY1:%[0-9]+]]:vreg_128 = COPY [[V_MFMA_F32_4X4X1F32_e64_]] - ; GFX908: GLOBAL_STORE_DWORDX4_SADDR [[V_MOV_B32_e32_]], [[COPY1]], [[S_LOAD_DWORDX2_IMM]], 0, 0, implicit $exec :: (store (s128), addrspace 1) - ; GFX908: S_ENDPGM 0 + ; GFX908-NEXT: {{ $}} + ; GFX908-NEXT: [[COPY:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr0_sgpr1 + ; GFX908-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 36, 0 :: (dereferenceable invariant load (s64), align 4, addrspace 4) + ; GFX908-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; GFX908-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1123418112, implicit $exec + ; GFX908-NEXT: undef %4.sub0:areg_128 = V_ACCVGPR_WRITE_B32_e64 [[V_MOV_B32_e32_1]], implicit $exec + ; GFX908-NEXT: %4.sub1:areg_128 = COPY [[V_MOV_B32_e32_1]] + ; GFX908-NEXT: %4.sub2:areg_128 = COPY [[V_MOV_B32_e32_1]] + ; GFX908-NEXT: %4.sub3:areg_128 = COPY [[V_MOV_B32_e32_1]] + ; GFX908-NEXT: [[V_MOV_B32_e32_2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1073741824, implicit $exec + ; GFX908-NEXT: [[V_MOV_B32_e32_3:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1065353216, implicit $exec + ; GFX908-NEXT: [[V_MFMA_F32_4X4X1F32_e64_:%[0-9]+]]:areg_128 = V_MFMA_F32_4X4X1F32_e64 [[V_MOV_B32_e32_3]], [[V_MOV_B32_e32_2]], %4, 0, 0, 0, implicit $mode, implicit $exec + ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY [[V_MFMA_F32_4X4X1F32_e64_]] + ; GFX908-NEXT: GLOBAL_STORE_DWORDX4_SADDR [[V_MOV_B32_e32_]], [[COPY1]], [[S_LOAD_DWORDX2_IMM]], 0, 0, implicit $exec :: (store (s128), addrspace 1) + ; GFX908-NEXT: S_ENDPGM 0 %1:sgpr_64(p4) = COPY $sgpr0_sgpr1 %4:sreg_64_xexec = S_LOAD_DWORDX2_IMM %1:sgpr_64(p4), 36, 0 :: (dereferenceable invariant load (s64), align 4, addrspace 4) %5:vgpr_32 = V_MOV_B32_e32 0, implicit $exec @@ -48,19 +49,20 @@ body: | liveins: $sgpr0_sgpr1 ; GFX908-LABEL: name: test_mfma_f32_4x4x1f32_no_propagate_imm ; GFX908: liveins: $sgpr0_sgpr1 - ; GFX908: [[COPY:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr0_sgpr1 - ; GFX908: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 36, 0 :: (dereferenceable invariant load (s64), align 4, addrspace 4) - ; GFX908: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GFX908: undef %3.sub0:areg_128 = V_ACCVGPR_WRITE_B32_e64 1073741824, implicit $exec - ; GFX908: %3.sub1:areg_128 = COPY %3.sub0 - ; GFX908: %3.sub2:areg_128 = COPY %3.sub0 - ; GFX908: %3.sub3:areg_128 = COPY %3.sub0 - ; GFX908: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1073741824, implicit $exec - ; GFX908: [[V_MOV_B32_e32_2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1065353216, implicit $exec - ; GFX908: [[V_MFMA_F32_4X4X1F32_e64_:%[0-9]+]]:areg_128 = V_MFMA_F32_4X4X1F32_e64 [[V_MOV_B32_e32_2]], [[V_MOV_B32_e32_1]], %3, 0, 0, 0, implicit $mode, implicit $exec - ; GFX908: [[COPY1:%[0-9]+]]:vreg_128 = COPY [[V_MFMA_F32_4X4X1F32_e64_]] - ; GFX908: GLOBAL_STORE_DWORDX4_SADDR [[V_MOV_B32_e32_]], [[COPY1]], [[S_LOAD_DWORDX2_IMM]], 0, 0, implicit $exec :: (store (s128), addrspace 1) - ; GFX908: S_ENDPGM 0 + ; GFX908-NEXT: {{ $}} + ; GFX908-NEXT: [[COPY:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr0_sgpr1 + ; GFX908-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[COPY]](p4), 36, 0 :: (dereferenceable invariant load (s64), align 4, addrspace 4) + ; GFX908-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; GFX908-NEXT: undef %3.sub0:areg_128 = V_ACCVGPR_WRITE_B32_e64 1073741824, implicit $exec + ; GFX908-NEXT: %3.sub1:areg_128 = COPY %3.sub0 + ; GFX908-NEXT: %3.sub2:areg_128 = COPY %3.sub0 + ; GFX908-NEXT: %3.sub3:areg_128 = COPY %3.sub0 + ; GFX908-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1073741824, implicit $exec + ; GFX908-NEXT: [[V_MOV_B32_e32_2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1065353216, implicit $exec + ; GFX908-NEXT: [[V_MFMA_F32_4X4X1F32_e64_:%[0-9]+]]:areg_128 = V_MFMA_F32_4X4X1F32_e64 [[V_MOV_B32_e32_2]], [[V_MOV_B32_e32_1]], %3, 0, 0, 0, implicit $mode, implicit $exec + ; GFX908-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY [[V_MFMA_F32_4X4X1F32_e64_]] + ; GFX908-NEXT: GLOBAL_STORE_DWORDX4_SADDR [[V_MOV_B32_e32_]], [[COPY1]], [[S_LOAD_DWORDX2_IMM]], 0, 0, implicit $exec :: (store (s128), addrspace 1) + ; GFX908-NEXT: S_ENDPGM 0 %1:sgpr_64(p4) = COPY $sgpr0_sgpr1 %4:sreg_64_xexec = S_LOAD_DWORDX2_IMM %1:sgpr_64(p4), 36, 0 :: (dereferenceable invariant load (s64), align 4, addrspace 4) %5:vgpr_32 = V_MOV_B32_e32 0, implicit $exec @@ -84,12 +86,13 @@ body: | liveins: $vgpr0_vgpr1_vgpr2_vgpr3 ; GFX908-LABEL: name: test_vgpr_subreg_propagate ; GFX908: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 - ; GFX908: [[COPY:%[0-9]+]]:vreg_128 = COPY $vgpr0_vgpr1_vgpr2_vgpr3, implicit $exec - ; GFX908: undef %1.sub0:areg_128 = V_ACCVGPR_WRITE_B32_e64 [[COPY]].sub0, implicit $exec - ; GFX908: %1.sub1:areg_128 = COPY [[COPY]].sub0 - ; GFX908: %1.sub2:areg_128 = COPY [[COPY]].sub0 - ; GFX908: %1.sub3:areg_128 = COPY [[COPY]].sub0 - ; GFX908: S_ENDPGM 0, implicit [[COPY]], implicit %1 + ; GFX908-NEXT: {{ $}} + ; GFX908-NEXT: [[COPY:%[0-9]+]]:vreg_128 = COPY $vgpr0_vgpr1_vgpr2_vgpr3, implicit $exec + ; GFX908-NEXT: undef %1.sub0:areg_128 = V_ACCVGPR_WRITE_B32_e64 [[COPY]].sub0, implicit $exec + ; GFX908-NEXT: %1.sub1:areg_128 = COPY [[COPY]].sub0 + ; GFX908-NEXT: %1.sub2:areg_128 = COPY [[COPY]].sub0 + ; GFX908-NEXT: %1.sub3:areg_128 = COPY [[COPY]].sub0 + ; GFX908-NEXT: S_ENDPGM 0, implicit [[COPY]], implicit %1 %0:vreg_128 = COPY $vgpr0_vgpr1_vgpr2_vgpr3, implicit $exec undef %1.sub0:areg_128 = V_ACCVGPR_WRITE_B32_e64 %0.sub0, implicit $exec %1.sub1:areg_128 = COPY %1.sub0:areg_128 @@ -106,11 +109,12 @@ body: | liveins: $vgpr0_vgpr1 ; GFX908-LABEL: name: test_nonmatching_agpr_subreg_no_propagate ; GFX908: liveins: $vgpr0_vgpr1 - ; GFX908: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1, implicit $exec - ; GFX908: undef %1.sub0:areg_64 = V_ACCVGPR_WRITE_B32_e64 [[COPY]].sub0, implicit $exec - ; GFX908: %1.sub1:areg_64 = V_ACCVGPR_WRITE_B32_e64 [[COPY]].sub1, implicit $exec - ; GFX908: [[COPY1:%[0-9]+]]:areg_64 = COPY %1 - ; GFX908: S_ENDPGM 0, implicit [[COPY]], implicit %1, implicit [[COPY1]] + ; GFX908-NEXT: {{ $}} + ; GFX908-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1, implicit $exec + ; GFX908-NEXT: undef %1.sub0:areg_64 = V_ACCVGPR_WRITE_B32_e64 [[COPY]].sub0, implicit $exec + ; GFX908-NEXT: %1.sub1:areg_64 = V_ACCVGPR_WRITE_B32_e64 [[COPY]].sub1, implicit $exec + ; GFX908-NEXT: [[COPY1:%[0-9]+]]:areg_64 = COPY %1 + ; GFX908-NEXT: S_ENDPGM 0, implicit [[COPY]], implicit %1, implicit [[COPY1]] %0:vreg_64 = COPY $vgpr0_vgpr1, implicit $exec undef %1.sub0:areg_64 = V_ACCVGPR_WRITE_B32_e64 %0.sub0, implicit $exec %1.sub1:areg_64 = V_ACCVGPR_WRITE_B32_e64 %0.sub1, implicit $exec @@ -126,11 +130,12 @@ body: | liveins: $vgpr0_vgpr1 ; GFX908-LABEL: name: test_subreg_to_single_agpr_reg_propagate ; GFX908: liveins: $vgpr0_vgpr1 - ; GFX908: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1, implicit $exec - ; GFX908: undef %1.sub0:areg_64 = V_ACCVGPR_WRITE_B32_e64 [[COPY]].sub0, implicit $exec - ; GFX908: %1.sub1:areg_64 = V_ACCVGPR_WRITE_B32_e64 [[COPY]].sub1, implicit $exec - ; GFX908: [[COPY1:%[0-9]+]]:agpr_32 = COPY [[COPY]].sub1 - ; GFX908: S_ENDPGM 0, implicit [[COPY]], implicit %1, implicit [[COPY1]] + ; GFX908-NEXT: {{ $}} + ; GFX908-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1, implicit $exec + ; GFX908-NEXT: undef %1.sub0:areg_64 = V_ACCVGPR_WRITE_B32_e64 [[COPY]].sub0, implicit $exec + ; GFX908-NEXT: %1.sub1:areg_64 = V_ACCVGPR_WRITE_B32_e64 [[COPY]].sub1, implicit $exec + ; GFX908-NEXT: [[COPY1:%[0-9]+]]:agpr_32 = COPY [[COPY]].sub1 + ; GFX908-NEXT: S_ENDPGM 0, implicit [[COPY]], implicit %1, implicit [[COPY1]] %0:vreg_64 = COPY $vgpr0_vgpr1, implicit $exec undef %1.sub0:areg_64 = V_ACCVGPR_WRITE_B32_e64 %0.sub0, implicit $exec %1.sub1:areg_64 = V_ACCVGPR_WRITE_B32_e64 %0.sub1, implicit $exec diff --git a/llvm/test/CodeGen/AMDGPU/amdpal-callable.ll b/llvm/test/CodeGen/AMDGPU/amdpal-callable.ll index c86cdfce05df7..accc9a1026360 100644 --- a/llvm/test/CodeGen/AMDGPU/amdpal-callable.ll +++ b/llvm/test/CodeGen/AMDGPU/amdpal-callable.ll @@ -153,14 +153,14 @@ attributes #0 = { nounwind } ; GCN-NEXT: .shader_functions: ; GCN-NEXT: dynamic_stack: ; GCN-NEXT: .lds_size: 0{{$}} -; GCN-NEXT: .sgpr_count: 0x24{{$}} +; GCN-NEXT: .sgpr_count: 0x28{{$}} ; GCN-NEXT: .stack_frame_size_in_bytes: 0x10{{$}} ; SDAG-NEXT: .vgpr_count: 0x2{{$}} ; GISEL-NEXT: .vgpr_count: 0x3{{$}} ; GCN-NEXT: dynamic_stack_loop: ; GCN-NEXT: .lds_size: 0{{$}} -; SDAG-NEXT: .sgpr_count: 0x22{{$}} -; GISEL-NEXT: .sgpr_count: 0x24{{$}} +; SDAG-NEXT: .sgpr_count: 0x25{{$}} +; GISEL-NEXT: .sgpr_count: 0x26{{$}} ; GCN-NEXT: .stack_frame_size_in_bytes: 0x10{{$}} ; SDAG-NEXT: .vgpr_count: 0x3{{$}} ; GISEL-NEXT: .vgpr_count: 0x4{{$}} @@ -176,26 +176,26 @@ attributes #0 = { nounwind } ; GCN-NEXT: .vgpr_count: 0x1{{$}} ; GCN-NEXT: no_stack_call: ; GCN-NEXT: .lds_size: 0{{$}} -; GCN-NEXT: .sgpr_count: 0x21{{$}} +; GCN-NEXT: .sgpr_count: 0x26{{$}} ; GCN-NEXT: .stack_frame_size_in_bytes: 0{{$}} ; GCN-NEXT: .vgpr_count: 0x2{{$}} ; GCN-NEXT: no_stack_extern_call: ; GCN-NEXT: .lds_size: 0{{$}} -; GFX8-NEXT: .sgpr_count: 0x24{{$}} -; GFX9-NEXT: .sgpr_count: 0x28{{$}} +; GFX8-NEXT: .sgpr_count: 0x28{{$}} +; GFX9-NEXT: .sgpr_count: 0x2c{{$}} ; GCN-NEXT: .stack_frame_size_in_bytes: 0x10{{$}} ; GCN-NEXT: .vgpr_count: 0x29{{$}} ; GCN-NEXT: no_stack_extern_call_many_args: ; GCN-NEXT: .lds_size: 0{{$}} -; GFX8-NEXT: .sgpr_count: 0x24{{$}} -; GFX9-NEXT: .sgpr_count: 0x28{{$}} +; GFX8-NEXT: .sgpr_count: 0x28{{$}} +; GFX9-NEXT: .sgpr_count: 0x2c{{$}} ; GCN-NEXT: .stack_frame_size_in_bytes: 0x90{{$}} ; SDAG-NEXT: .vgpr_count: 0x2a{{$}} ; GISEL-NEXT: .vgpr_count: 0x34{{$}} ; GCN-NEXT: no_stack_indirect_call: ; GCN-NEXT: .lds_size: 0{{$}} -; GFX8-NEXT: .sgpr_count: 0x24{{$}} -; GFX9-NEXT: .sgpr_count: 0x28{{$}} +; GFX8-NEXT: .sgpr_count: 0x28{{$}} +; GFX9-NEXT: .sgpr_count: 0x2c{{$}} ; GCN-NEXT: .stack_frame_size_in_bytes: 0x10{{$}} ; SDAG-NEXT: .vgpr_count: 0x2a{{$}} ; GISEL-NEXT: .vgpr_count: 0x34{{$}} @@ -206,7 +206,7 @@ attributes #0 = { nounwind } ; GCN-NEXT: .vgpr_count: 0x1{{$}} ; GCN-NEXT: simple_lds_recurse: ; GCN-NEXT: .lds_size: 0x100{{$}} -; GCN-NEXT: .sgpr_count: 0x24{{$}} +; GCN-NEXT: .sgpr_count: 0x26{{$}} ; GCN-NEXT: .stack_frame_size_in_bytes: 0x10{{$}} ; GCN-NEXT: .vgpr_count: 0x29{{$}} ; GCN-NEXT: simple_stack: @@ -216,25 +216,25 @@ attributes #0 = { nounwind } ; GCN-NEXT: .vgpr_count: 0x2{{$}} ; GCN-NEXT: simple_stack_call: ; GCN-NEXT: .lds_size: 0{{$}} -; GCN-NEXT: .sgpr_count: 0x22{{$}} +; GCN-NEXT: .sgpr_count: 0x26{{$}} ; GCN-NEXT: .stack_frame_size_in_bytes: 0x20{{$}} ; GCN-NEXT: .vgpr_count: 0x3{{$}} ; GCN-NEXT: simple_stack_extern_call: ; GCN-NEXT: .lds_size: 0{{$}} -; GFX8-NEXT: .sgpr_count: 0x24{{$}} -; GFX9-NEXT: .sgpr_count: 0x28{{$}} +; GFX8-NEXT: .sgpr_count: 0x28{{$}} +; GFX9-NEXT: .sgpr_count: 0x2c{{$}} ; GCN-NEXT: .stack_frame_size_in_bytes: 0x20{{$}} ; GCN-NEXT: .vgpr_count: 0x2a{{$}} ; GCN-NEXT: simple_stack_indirect_call: ; GCN-NEXT: .lds_size: 0{{$}} -; GFX8-NEXT: .sgpr_count: 0x24{{$}} -; GFX9-NEXT: .sgpr_count: 0x28{{$}} +; GFX8-NEXT: .sgpr_count: 0x28{{$}} +; GFX9-NEXT: .sgpr_count: 0x2c{{$}} ; GCN-NEXT: .stack_frame_size_in_bytes: 0x20{{$}} ; SDAG-NEXT: .vgpr_count: 0x2b{{$}} ; GISEL-NEXT: .vgpr_count: 0x34{{$}} ; GCN-NEXT: simple_stack_recurse: ; GCN-NEXT: .lds_size: 0{{$}} -; GCN-NEXT: .sgpr_count: 0x24{{$}} +; GCN-NEXT: .sgpr_count: 0x26{{$}} ; GCN-NEXT: .stack_frame_size_in_bytes: 0x20{{$}} ; GCN-NEXT: .vgpr_count: 0x2a{{$}} ; GCN-NEXT: ... diff --git a/llvm/test/CodeGen/AMDGPU/combine-sreg64-inits.mir b/llvm/test/CodeGen/AMDGPU/combine-sreg64-inits.mir index 5f49f6d4ea8fc..b1df3aca226b4 100644 --- a/llvm/test/CodeGen/AMDGPU/combine-sreg64-inits.mir +++ b/llvm/test/CodeGen/AMDGPU/combine-sreg64-inits.mir @@ -1,36 +1,87 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc -march=amdgcn -verify-machineinstrs -run-pass=liveintervals,amdgpu-pre-ra-optimizations %s -o - | FileCheck -check-prefix=GCN %s --- -# GCN-LABEL: name: combine_sreg64_inits -# GCN: %0:sgpr_64 = S_MOV_B64_IMM_PSEUDO 8589934593 -# GCN: S_NOP 0 name: combine_sreg64_inits tracksRegLiveness: true body: | bb.0: + ; GCN-LABEL: name: combine_sreg64_inits + ; GCN: dead %0:sgpr_64 = S_MOV_B64_IMM_PSEUDO 8589934593 + ; GCN-NEXT: S_NOP 0 undef %0.sub0:sgpr_64 = S_MOV_B32 1 S_NOP 0 %0.sub1:sgpr_64 = S_MOV_B32 2 ... --- -# GCN-LABEL: name: combine_sreg64_inits_swap -# GCN: %0:sgpr_64 = S_MOV_B64_IMM_PSEUDO 8589934593 -# GCN: S_NOP 0 name: combine_sreg64_inits_swap tracksRegLiveness: true body: | bb.0: + ; GCN-LABEL: name: combine_sreg64_inits_swap + ; GCN: dead %0:sgpr_64 = S_MOV_B64_IMM_PSEUDO 8589934593 + ; GCN-NEXT: S_NOP 0 undef %0.sub1:sgpr_64 = S_MOV_B32 2 S_NOP 0 %0.sub0:sgpr_64 = S_MOV_B32 1 ... --- -# GCN-LABEL: name: sreg64_inits_different_blocks -# GCN: undef %0.sub0:sgpr_64 = S_MOV_B32 1 -# GCN: %0.sub1:sgpr_64 = S_MOV_B32 2 +name: sreg64_subreg_copy_0 +tracksRegLiveness: true +body: | + bb.0: + ; GCN-LABEL: name: sreg64_subreg_copy_0 + ; GCN: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF + ; GCN-NEXT: undef %1.sub0:sgpr_64 = COPY [[DEF]] + ; GCN-NEXT: %1.sub0:sgpr_64 = S_MOV_B32 1 + ; GCN-NEXT: dead %1.sub1:sgpr_64 = S_MOV_B32 2 + %0:sgpr_32 = IMPLICIT_DEF + undef %1.sub0:sgpr_64 = COPY %0:sgpr_32 + %1.sub0:sgpr_64 = S_MOV_B32 1 + %1.sub1:sgpr_64 = S_MOV_B32 2 +... +--- +name: sreg64_subreg_copy_1 +tracksRegLiveness: true +body: | + bb.0: + ; GCN-LABEL: name: sreg64_subreg_copy_1 + ; GCN: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF + ; GCN-NEXT: undef %1.sub0:sgpr_64 = S_MOV_B32 1 + ; GCN-NEXT: %1.sub1:sgpr_64 = COPY [[DEF]] + ; GCN-NEXT: dead %1.sub1:sgpr_64 = S_MOV_B32 2 + %0:sgpr_32 = IMPLICIT_DEF + undef %1.sub0:sgpr_64 = S_MOV_B32 1 + %1.sub1:sgpr_64 = COPY %0:sgpr_32 + %1.sub1:sgpr_64 = S_MOV_B32 2 +... +--- +name: sreg64_subreg_copy_2 +tracksRegLiveness: true +body: | + bb.0: + ; GCN-LABEL: name: sreg64_subreg_copy_2 + ; GCN: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF + ; GCN-NEXT: undef %1.sub0:sgpr_64 = S_MOV_B32 1 + ; GCN-NEXT: %1.sub1:sgpr_64 = S_MOV_B32 2 + ; GCN-NEXT: dead %1.sub0:sgpr_64 = COPY [[DEF]] + %0:sgpr_32 = IMPLICIT_DEF + undef %1.sub0:sgpr_64 = S_MOV_B32 1 + %1.sub1:sgpr_64 = S_MOV_B32 2 + %1.sub0:sgpr_64 = COPY %0:sgpr_32 +... +--- name: sreg64_inits_different_blocks tracksRegLiveness: true body: | + ; GCN-LABEL: name: sreg64_inits_different_blocks + ; GCN: bb.0: + ; GCN-NEXT: successors: %bb.1(0x80000000) + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: undef %0.sub0:sgpr_64 = S_MOV_B32 1 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: bb.1: + ; GCN-NEXT: dead %0.sub1:sgpr_64 = S_MOV_B32 2 bb.0: undef %0.sub0:sgpr_64 = S_MOV_B32 1 @@ -38,61 +89,61 @@ body: | %0.sub1:sgpr_64 = S_MOV_B32 2 ... --- -# GCN-LABEL: name: sreg64_inits_two_defs_sub1 -# GCN: undef %0.sub0:sgpr_64 = S_MOV_B32 1 -# GCN: %0.sub1:sgpr_64 = S_MOV_B32 2 -# GCN: %0.sub1:sgpr_64 = S_MOV_B32 3 name: sreg64_inits_two_defs_sub1 tracksRegLiveness: true body: | bb.0: + ; GCN-LABEL: name: sreg64_inits_two_defs_sub1 + ; GCN: undef %0.sub0:sgpr_64 = S_MOV_B32 1 + ; GCN-NEXT: %0.sub1:sgpr_64 = S_MOV_B32 2 + ; GCN-NEXT: dead %0.sub1:sgpr_64 = S_MOV_B32 3 undef %0.sub0:sgpr_64 = S_MOV_B32 1 %0.sub1:sgpr_64 = S_MOV_B32 2 %0.sub1:sgpr_64 = S_MOV_B32 3 ... --- -# GCN-LABEL: name: sreg64_inits_two_defs_sub0 -# GCN: undef %0.sub0:sgpr_64 = S_MOV_B32 1 -# GCN: %0.sub1:sgpr_64 = S_MOV_B32 2 -# GCN: %0.sub0:sgpr_64 = S_MOV_B32 3 name: sreg64_inits_two_defs_sub0 tracksRegLiveness: true body: | bb.0: + ; GCN-LABEL: name: sreg64_inits_two_defs_sub0 + ; GCN: undef %0.sub0:sgpr_64 = S_MOV_B32 1 + ; GCN-NEXT: %0.sub1:sgpr_64 = S_MOV_B32 2 + ; GCN-NEXT: dead %0.sub0:sgpr_64 = S_MOV_B32 3 undef %0.sub0:sgpr_64 = S_MOV_B32 1 %0.sub1:sgpr_64 = S_MOV_B32 2 %0.sub0:sgpr_64 = S_MOV_B32 3 ... --- -# GCN-LABEL: name: sreg64_inits_full_def -# GCN: undef %1.sub0:sgpr_64 = S_MOV_B32 1 -# GCN: %0:sgpr_64 = S_MOV_B64 3 name: sreg64_inits_full_def tracksRegLiveness: true body: | bb.0: + ; GCN-LABEL: name: sreg64_inits_full_def + ; GCN: dead undef %1.sub0:sgpr_64 = S_MOV_B32 1 + ; GCN-NEXT: dead %0:sgpr_64 = S_MOV_B64 3 undef %0.sub0:sgpr_64 = S_MOV_B32 1 %0:sgpr_64 = S_MOV_B64 3 ... --- -# GCN-LABEL: name: sreg64_inits_imp_use -# GCN: %0.sub0:sgpr_64 = S_MOV_B32 1, implicit $m0 -# GCN: %0.sub1:sgpr_64 = S_MOV_B32 2 name: sreg64_inits_imp_use tracksRegLiveness: true body: | bb.0: + ; GCN-LABEL: name: sreg64_inits_imp_use + ; GCN: undef %0.sub0:sgpr_64 = S_MOV_B32 1, implicit $m0 + ; GCN-NEXT: dead %0.sub1:sgpr_64 = S_MOV_B32 2 undef %0.sub0:sgpr_64 = S_MOV_B32 1, implicit $m0 %0.sub1:sgpr_64 = S_MOV_B32 2 ... --- -# GCN-LABEL: name: sreg64_inits_imp_def -# GCN: %0.sub0:sgpr_64 = S_MOV_B32 1, implicit-def $scc -# GCN: %0.sub1:sgpr_64 = S_MOV_B32 2 name: sreg64_inits_imp_def tracksRegLiveness: true body: | bb.0: + ; GCN-LABEL: name: sreg64_inits_imp_def + ; GCN: undef %0.sub0:sgpr_64 = S_MOV_B32 1, implicit-def $scc + ; GCN-NEXT: dead %0.sub1:sgpr_64 = S_MOV_B32 2 undef %0.sub0:sgpr_64 = S_MOV_B32 1, implicit-def $scc %0.sub1:sgpr_64 = S_MOV_B32 2 ... diff --git a/llvm/test/CodeGen/AMDGPU/divergence-driven-bfe-isel.ll b/llvm/test/CodeGen/AMDGPU/divergence-driven-bfe-isel.ll new file mode 100644 index 0000000000000..ead58214bdf99 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/divergence-driven-bfe-isel.ll @@ -0,0 +1,25 @@ +; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s + +; GCN_LABEL: @bfe_uniform +; GCN: s_bfe_u32 s{{[0-9]+}}, s{{[0-9]+}}, 0x40010 +define amdgpu_kernel void @bfe_uniform(i32 %val, i32 addrspace(1)* %out) { + %hibits = lshr i32 %val, 16 + %masked = and i32 %hibits, 15 + store i32 %masked, i32 addrspace(1)* %out + ret void +} + +; GCN_LABEL: @bfe_divergent +; GCN: v_bfe_u32 v{{[0-9]+}}, v{{[0-9]+}}, 16, 4 +define amdgpu_kernel void @bfe_divergent(i32 %val, i32 addrspace(1)* %out) { + %tid = call i32 @llvm.amdgcn.workitem.id.x() + %divergent = add i32 %val, %tid + %hibits = lshr i32 %divergent, 16 + %masked = and i32 %hibits, 15 + store i32 %masked, i32 addrspace(1)* %out + ret void +} + + +declare i32 @llvm.amdgcn.workitem.id.x() + diff --git a/llvm/test/CodeGen/AMDGPU/extract_vector_dynelt.ll b/llvm/test/CodeGen/AMDGPU/extract_vector_dynelt.ll index 50a505d5e14ba..b66ab4e577aaf 100644 --- a/llvm/test/CodeGen/AMDGPU/extract_vector_dynelt.ll +++ b/llvm/test/CodeGen/AMDGPU/extract_vector_dynelt.ll @@ -234,7 +234,7 @@ entry: ; GCN-LABEL: {{^}}double8_extelt: ; GCN-NOT: buffer_ ; GCN-NOT: s_or_b32 -; GCN-DAG: s_mov_b32 [[ZERO:s[0-9]+]], 0 +; GCN-DAG: s_mov_b32 [[ZERO:s[0-9]+]], 0{{$}} ; GCN-DAG: v_mov_b32_e32 v[[#BASE:]], [[ZERO]] ; GCN-DAG: s_mov_b32 m0, [[IND:s[0-9]+]] ; GCN-DAG: v_movrels_b32_e32 v[[RES_LO:[0-9]+]], v[[#BASE]] @@ -250,7 +250,7 @@ entry: ; GCN-LABEL: {{^}}double7_extelt: ; GCN-NOT: buffer_ ; GCN-NOT: s_or_b32 -; GCN-DAG: s_mov_b32 [[ZERO:s[0-9]+]], 0 +; GCN-DAG: s_mov_b32 [[ZERO:s[0-9]+]], 0{{$}} ; GCN-DAG: v_mov_b32_e32 v[[#BASE:]], [[ZERO]] ; GCN-DAG: s_mov_b32 m0, [[IND:s[0-9]+]] ; GCN-DAG: v_movrels_b32_e32 v[[RES_LO:[0-9]+]], v[[#BASE]] @@ -294,7 +294,7 @@ entry: ; GCN-LABEL: {{^}}double15_extelt: ; GCN-NOT: buffer_ ; GCN-NOT: s_or_b32 -; GCN-DAG: s_mov_b32 [[ZERO:s[0-9]+]], 0 +; GCN-DAG: s_mov_b32 [[ZERO:s[0-9]+]], 0{{$}} ; GCN-DAG: v_mov_b32_e32 v[[#BASE:]], [[ZERO]] ; GCN-DAG: s_mov_b32 m0, [[IND:s[0-9]+]] ; GCN-DAG: v_movrels_b32_e32 v[[RES_LO:[0-9]+]], v[[#BASE]] @@ -310,7 +310,7 @@ entry: ; GCN-LABEL: {{^}}double16_extelt: ; GCN-NOT: buffer_ ; GCN-NOT: s_or_b32 -; GCN-DAG: s_mov_b32 [[ZERO:s[0-9]+]], 0 +; GCN-DAG: s_mov_b32 [[ZERO:s[0-9]+]], 0{{$}} ; GCN-DAG: v_mov_b32_e32 v[[#BASE:]], [[ZERO]] ; GCN-DAG: s_mov_b32 m0, [[IND:s[0-9]+]] ; GCN-DAG: v_movrels_b32_e32 v[[RES_LO:[0-9]+]], v[[#BASE]] diff --git a/llvm/test/CodeGen/AMDGPU/gfx-callable-argument-types.ll b/llvm/test/CodeGen/AMDGPU/gfx-callable-argument-types.ll index 16022e33b84c9..29d1518ba19b6 100644 --- a/llvm/test/CodeGen/AMDGPU/gfx-callable-argument-types.ll +++ b/llvm/test/CodeGen/AMDGPU/gfx-callable-argument-types.ll @@ -96,59 +96,59 @@ define amdgpu_gfx void @test_call_external_void_func_i1_imm() #0 { ; GFX9-LABEL: test_call_external_void_func_i1_imm: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[4:5] +; GFX9-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-NEXT: v_writelane_b32 v40, s33, 2 ; GFX9-NEXT: s_mov_b32 s33, s32 ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: v_mov_b32_e32 v0, 1 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[4:5] -; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_i1@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_i1@rel32@hi+12 +; GFX9-NEXT: s_getpc_b64 s[30:31] +; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_i1@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_i1@rel32@hi+12 ; GFX9-NEXT: buffer_store_byte v0, off, s[0:3], s32 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GFX9-NEXT: v_readlane_b32 s4, v40, 0 -; GFX9-NEXT: v_readlane_b32 s5, v40, 1 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 -; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[6:7] +; GFX9-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: s_setpc_b64 s[4:5] +; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: test_call_external_void_func_i1_imm: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_or_saveexec_b32 s4, -1 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s4 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 ; GFX10-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-NEXT: v_mov_b32_e32 v0, 1 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: s_getpc_b64 s[4:5] -; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_i1@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s5, s5, external_void_func_i1@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: buffer_store_byte v0, off, s[0:3], s32 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GFX10-NEXT: v_readlane_b32 s4, v40, 0 -; GFX10-NEXT: v_readlane_b32 s5, v40, 1 +; GFX10-NEXT: s_getpc_b64 s[30:31] +; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_i1@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_i1@rel32@hi+12 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-NEXT: s_or_saveexec_b32 s6, -1 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s6 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: s_setpc_b64 s[4:5] +; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-SCRATCH-LABEL: test_call_external_void_func_i1_imm: ; GFX10-SCRATCH: ; %bb.0: @@ -169,16 +169,16 @@ define amdgpu_gfx void @test_call_external_void_func_i1_imm() #0 { ; GFX10-SCRATCH-NEXT: scratch_store_byte off, v0, s32 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) -; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1] +; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] call amdgpu_gfx void @external_void_func_i1(i1 true) ret void } @@ -187,9 +187,9 @@ define amdgpu_gfx void @test_call_external_void_func_i1_signext(i32) #0 { ; GFX9-LABEL: test_call_external_void_func_i1_signext: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[4:5] +; GFX9-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-NEXT: global_load_ubyte v0, v[0:1], off glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: v_writelane_b32 v40, s33, 2 @@ -197,53 +197,53 @@ define amdgpu_gfx void @test_call_external_void_func_i1_signext(i32) #0 { ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[4:5] -; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_i1_signext@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_i1_signext@rel32@hi+12 +; GFX9-NEXT: s_getpc_b64 s[30:31] +; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_i1_signext@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_i1_signext@rel32@hi+12 ; GFX9-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX9-NEXT: buffer_store_byte v0, off, s[0:3], s32 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GFX9-NEXT: v_readlane_b32 s4, v40, 0 -; GFX9-NEXT: v_readlane_b32 s5, v40, 1 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 -; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[6:7] +; GFX9-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: s_setpc_b64 s[4:5] +; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: test_call_external_void_func_i1_signext: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_or_saveexec_b32 s4, -1 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s4 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off glc dlc ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: s_getpc_b64 s[4:5] -; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_i1_signext@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s5, s5, external_void_func_i1_signext@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 +; GFX10-NEXT: s_getpc_b64 s[30:31] +; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_i1_signext@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_i1_signext@rel32@hi+12 ; GFX10-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX10-NEXT: buffer_store_byte v0, off, s[0:3], s32 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GFX10-NEXT: v_readlane_b32 s4, v40, 0 -; GFX10-NEXT: v_readlane_b32 s5, v40, 1 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-NEXT: s_or_saveexec_b32 s6, -1 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s6 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: s_setpc_b64 s[4:5] +; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-SCRATCH-LABEL: test_call_external_void_func_i1_signext: ; GFX10-SCRATCH: ; %bb.0: @@ -266,16 +266,16 @@ define amdgpu_gfx void @test_call_external_void_func_i1_signext(i32) #0 { ; GFX10-SCRATCH-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX10-SCRATCH-NEXT: scratch_store_byte off, v0, s32 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) -; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1] +; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] %var = load volatile i1, i1 addrspace(1)* undef call amdgpu_gfx void @external_void_func_i1_signext(i1 signext%var) ret void @@ -285,9 +285,9 @@ define amdgpu_gfx void @test_call_external_void_func_i1_zeroext(i32) #0 { ; GFX9-LABEL: test_call_external_void_func_i1_zeroext: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[4:5] +; GFX9-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-NEXT: global_load_ubyte v0, v[0:1], off glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: v_writelane_b32 v40, s33, 2 @@ -295,53 +295,53 @@ define amdgpu_gfx void @test_call_external_void_func_i1_zeroext(i32) #0 { ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[4:5] -; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_i1_zeroext@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_i1_zeroext@rel32@hi+12 +; GFX9-NEXT: s_getpc_b64 s[30:31] +; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_i1_zeroext@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_i1_zeroext@rel32@hi+12 ; GFX9-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX9-NEXT: buffer_store_byte v0, off, s[0:3], s32 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GFX9-NEXT: v_readlane_b32 s4, v40, 0 -; GFX9-NEXT: v_readlane_b32 s5, v40, 1 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 -; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[6:7] +; GFX9-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: s_setpc_b64 s[4:5] +; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: test_call_external_void_func_i1_zeroext: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_or_saveexec_b32 s4, -1 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s4 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off glc dlc ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: s_getpc_b64 s[4:5] -; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_i1_zeroext@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s5, s5, external_void_func_i1_zeroext@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 +; GFX10-NEXT: s_getpc_b64 s[30:31] +; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_i1_zeroext@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_i1_zeroext@rel32@hi+12 ; GFX10-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX10-NEXT: buffer_store_byte v0, off, s[0:3], s32 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GFX10-NEXT: v_readlane_b32 s4, v40, 0 -; GFX10-NEXT: v_readlane_b32 s5, v40, 1 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-NEXT: s_or_saveexec_b32 s6, -1 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s6 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: s_setpc_b64 s[4:5] +; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-SCRATCH-LABEL: test_call_external_void_func_i1_zeroext: ; GFX10-SCRATCH: ; %bb.0: @@ -364,16 +364,16 @@ define amdgpu_gfx void @test_call_external_void_func_i1_zeroext(i32) #0 { ; GFX10-SCRATCH-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX10-SCRATCH-NEXT: scratch_store_byte off, v0, s32 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) -; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1] +; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] %var = load volatile i1, i1 addrspace(1)* undef call amdgpu_gfx void @external_void_func_i1_zeroext(i1 zeroext %var) ret void @@ -383,57 +383,57 @@ define amdgpu_gfx void @test_call_external_void_func_i8_imm(i32) #0 { ; GFX9-LABEL: test_call_external_void_func_i8_imm: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[4:5] +; GFX9-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-NEXT: v_writelane_b32 v40, s33, 2 ; GFX9-NEXT: s_mov_b32 s33, s32 ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: v_mov_b32_e32 v0, 0x7b ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[4:5] -; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_i8@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_i8@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GFX9-NEXT: v_readlane_b32 s4, v40, 0 -; GFX9-NEXT: v_readlane_b32 s5, v40, 1 +; GFX9-NEXT: s_getpc_b64 s[30:31] +; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_i8@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_i8@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 -; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[6:7] +; GFX9-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: s_setpc_b64 s[4:5] +; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: test_call_external_void_func_i8_imm: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_or_saveexec_b32 s4, -1 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s4 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 ; GFX10-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-NEXT: v_mov_b32_e32 v0, 0x7b ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: s_getpc_b64 s[4:5] -; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_i8@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s5, s5, external_void_func_i8@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GFX10-NEXT: v_readlane_b32 s4, v40, 0 -; GFX10-NEXT: v_readlane_b32 s5, v40, 1 +; GFX10-NEXT: s_getpc_b64 s[30:31] +; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_i8@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_i8@rel32@hi+12 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-NEXT: s_or_saveexec_b32 s6, -1 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s6 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: s_setpc_b64 s[4:5] +; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-SCRATCH-LABEL: test_call_external_void_func_i8_imm: ; GFX10-SCRATCH: ; %bb.0: @@ -453,16 +453,16 @@ define amdgpu_gfx void @test_call_external_void_func_i8_imm(i32) #0 { ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) -; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1] +; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] call amdgpu_gfx void @external_void_func_i8(i8 123) ret void } @@ -471,9 +471,9 @@ define amdgpu_gfx void @test_call_external_void_func_i8_signext(i32) #0 { ; GFX9-LABEL: test_call_external_void_func_i8_signext: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[4:5] +; GFX9-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-NEXT: global_load_sbyte v0, v[0:1], off glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: v_writelane_b32 v40, s33, 2 @@ -481,49 +481,49 @@ define amdgpu_gfx void @test_call_external_void_func_i8_signext(i32) #0 { ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[4:5] -; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_i8_signext@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_i8_signext@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GFX9-NEXT: v_readlane_b32 s4, v40, 0 -; GFX9-NEXT: v_readlane_b32 s5, v40, 1 +; GFX9-NEXT: s_getpc_b64 s[30:31] +; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_i8_signext@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_i8_signext@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 -; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[6:7] +; GFX9-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: s_setpc_b64 s[4:5] +; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: test_call_external_void_func_i8_signext: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_or_saveexec_b32 s4, -1 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s4 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 ; GFX10-NEXT: global_load_sbyte v0, v[0:1], off glc dlc ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: s_getpc_b64 s[4:5] -; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_i8_signext@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s5, s5, external_void_func_i8_signext@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GFX10-NEXT: v_readlane_b32 s4, v40, 0 -; GFX10-NEXT: v_readlane_b32 s5, v40, 1 +; GFX10-NEXT: s_getpc_b64 s[30:31] +; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_i8_signext@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_i8_signext@rel32@hi+12 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-NEXT: s_or_saveexec_b32 s6, -1 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s6 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: s_setpc_b64 s[4:5] +; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-SCRATCH-LABEL: test_call_external_void_func_i8_signext: ; GFX10-SCRATCH: ; %bb.0: @@ -544,16 +544,16 @@ define amdgpu_gfx void @test_call_external_void_func_i8_signext(i32) #0 { ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) -; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1] +; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] %var = load volatile i8, i8 addrspace(1)* undef call amdgpu_gfx void @external_void_func_i8_signext(i8 signext %var) ret void @@ -563,9 +563,9 @@ define amdgpu_gfx void @test_call_external_void_func_i8_zeroext(i32) #0 { ; GFX9-LABEL: test_call_external_void_func_i8_zeroext: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[4:5] +; GFX9-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-NEXT: global_load_ubyte v0, v[0:1], off glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: v_writelane_b32 v40, s33, 2 @@ -573,49 +573,49 @@ define amdgpu_gfx void @test_call_external_void_func_i8_zeroext(i32) #0 { ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[4:5] -; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_i8_zeroext@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_i8_zeroext@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GFX9-NEXT: v_readlane_b32 s4, v40, 0 -; GFX9-NEXT: v_readlane_b32 s5, v40, 1 +; GFX9-NEXT: s_getpc_b64 s[30:31] +; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_i8_zeroext@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_i8_zeroext@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 -; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[6:7] +; GFX9-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: s_setpc_b64 s[4:5] +; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: test_call_external_void_func_i8_zeroext: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_or_saveexec_b32 s4, -1 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s4 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off glc dlc ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: s_getpc_b64 s[4:5] -; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_i8_zeroext@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s5, s5, external_void_func_i8_zeroext@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GFX10-NEXT: v_readlane_b32 s4, v40, 0 -; GFX10-NEXT: v_readlane_b32 s5, v40, 1 +; GFX10-NEXT: s_getpc_b64 s[30:31] +; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_i8_zeroext@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_i8_zeroext@rel32@hi+12 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-NEXT: s_or_saveexec_b32 s6, -1 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s6 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: s_setpc_b64 s[4:5] +; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-SCRATCH-LABEL: test_call_external_void_func_i8_zeroext: ; GFX10-SCRATCH: ; %bb.0: @@ -636,16 +636,16 @@ define amdgpu_gfx void @test_call_external_void_func_i8_zeroext(i32) #0 { ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) -; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1] +; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] %var = load volatile i8, i8 addrspace(1)* undef call amdgpu_gfx void @external_void_func_i8_zeroext(i8 zeroext %var) ret void @@ -655,57 +655,57 @@ define amdgpu_gfx void @test_call_external_void_func_i16_imm() #0 { ; GFX9-LABEL: test_call_external_void_func_i16_imm: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[4:5] +; GFX9-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-NEXT: v_writelane_b32 v40, s33, 2 ; GFX9-NEXT: s_mov_b32 s33, s32 ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: v_mov_b32_e32 v0, 0x7b ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[4:5] -; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_i16@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_i16@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GFX9-NEXT: v_readlane_b32 s4, v40, 0 -; GFX9-NEXT: v_readlane_b32 s5, v40, 1 +; GFX9-NEXT: s_getpc_b64 s[30:31] +; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_i16@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_i16@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 -; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[6:7] +; GFX9-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: s_setpc_b64 s[4:5] +; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: test_call_external_void_func_i16_imm: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_or_saveexec_b32 s4, -1 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s4 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 ; GFX10-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-NEXT: v_mov_b32_e32 v0, 0x7b ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: s_getpc_b64 s[4:5] -; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_i16@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s5, s5, external_void_func_i16@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GFX10-NEXT: v_readlane_b32 s4, v40, 0 -; GFX10-NEXT: v_readlane_b32 s5, v40, 1 +; GFX10-NEXT: s_getpc_b64 s[30:31] +; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_i16@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_i16@rel32@hi+12 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-NEXT: s_or_saveexec_b32 s6, -1 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s6 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: s_setpc_b64 s[4:5] +; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-SCRATCH-LABEL: test_call_external_void_func_i16_imm: ; GFX10-SCRATCH: ; %bb.0: @@ -725,16 +725,16 @@ define amdgpu_gfx void @test_call_external_void_func_i16_imm() #0 { ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) -; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1] +; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] call amdgpu_gfx void @external_void_func_i16(i16 123) ret void } @@ -743,9 +743,9 @@ define amdgpu_gfx void @test_call_external_void_func_i16_signext(i32) #0 { ; GFX9-LABEL: test_call_external_void_func_i16_signext: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[4:5] +; GFX9-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-NEXT: global_load_ushort v0, v[0:1], off glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: v_writelane_b32 v40, s33, 2 @@ -753,49 +753,49 @@ define amdgpu_gfx void @test_call_external_void_func_i16_signext(i32) #0 { ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[4:5] -; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_i16_signext@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_i16_signext@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GFX9-NEXT: v_readlane_b32 s4, v40, 0 -; GFX9-NEXT: v_readlane_b32 s5, v40, 1 +; GFX9-NEXT: s_getpc_b64 s[30:31] +; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_i16_signext@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_i16_signext@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 -; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[6:7] +; GFX9-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: s_setpc_b64 s[4:5] +; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: test_call_external_void_func_i16_signext: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_or_saveexec_b32 s4, -1 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s4 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 ; GFX10-NEXT: global_load_ushort v0, v[0:1], off glc dlc ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: s_getpc_b64 s[4:5] -; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_i16_signext@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s5, s5, external_void_func_i16_signext@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GFX10-NEXT: v_readlane_b32 s4, v40, 0 -; GFX10-NEXT: v_readlane_b32 s5, v40, 1 +; GFX10-NEXT: s_getpc_b64 s[30:31] +; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_i16_signext@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_i16_signext@rel32@hi+12 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-NEXT: s_or_saveexec_b32 s6, -1 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s6 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: s_setpc_b64 s[4:5] +; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-SCRATCH-LABEL: test_call_external_void_func_i16_signext: ; GFX10-SCRATCH: ; %bb.0: @@ -816,16 +816,16 @@ define amdgpu_gfx void @test_call_external_void_func_i16_signext(i32) #0 { ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) -; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1] +; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] %var = load volatile i16, i16 addrspace(1)* undef call amdgpu_gfx void @external_void_func_i16_signext(i16 signext %var) ret void @@ -835,9 +835,9 @@ define amdgpu_gfx void @test_call_external_void_func_i16_zeroext(i32) #0 { ; GFX9-LABEL: test_call_external_void_func_i16_zeroext: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[4:5] +; GFX9-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-NEXT: global_load_ushort v0, v[0:1], off glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: v_writelane_b32 v40, s33, 2 @@ -845,49 +845,49 @@ define amdgpu_gfx void @test_call_external_void_func_i16_zeroext(i32) #0 { ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[4:5] -; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_i16_zeroext@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_i16_zeroext@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GFX9-NEXT: v_readlane_b32 s4, v40, 0 -; GFX9-NEXT: v_readlane_b32 s5, v40, 1 +; GFX9-NEXT: s_getpc_b64 s[30:31] +; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_i16_zeroext@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_i16_zeroext@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 -; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[6:7] +; GFX9-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: s_setpc_b64 s[4:5] +; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: test_call_external_void_func_i16_zeroext: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_or_saveexec_b32 s4, -1 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s4 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 ; GFX10-NEXT: global_load_ushort v0, v[0:1], off glc dlc ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: s_getpc_b64 s[4:5] -; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_i16_zeroext@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s5, s5, external_void_func_i16_zeroext@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GFX10-NEXT: v_readlane_b32 s4, v40, 0 -; GFX10-NEXT: v_readlane_b32 s5, v40, 1 +; GFX10-NEXT: s_getpc_b64 s[30:31] +; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_i16_zeroext@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_i16_zeroext@rel32@hi+12 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-NEXT: s_or_saveexec_b32 s6, -1 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s6 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: s_setpc_b64 s[4:5] +; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-SCRATCH-LABEL: test_call_external_void_func_i16_zeroext: ; GFX10-SCRATCH: ; %bb.0: @@ -908,16 +908,16 @@ define amdgpu_gfx void @test_call_external_void_func_i16_zeroext(i32) #0 { ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) -; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1] +; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] %var = load volatile i16, i16 addrspace(1)* undef call amdgpu_gfx void @external_void_func_i16_zeroext(i16 zeroext %var) ret void @@ -927,57 +927,57 @@ define amdgpu_gfx void @test_call_external_void_func_i32_imm(i32) #0 { ; GFX9-LABEL: test_call_external_void_func_i32_imm: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[4:5] +; GFX9-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-NEXT: v_writelane_b32 v40, s33, 2 ; GFX9-NEXT: s_mov_b32 s33, s32 ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: v_mov_b32_e32 v0, 42 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[4:5] -; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_i32@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_i32@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GFX9-NEXT: v_readlane_b32 s4, v40, 0 -; GFX9-NEXT: v_readlane_b32 s5, v40, 1 +; GFX9-NEXT: s_getpc_b64 s[30:31] +; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_i32@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_i32@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 -; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[6:7] +; GFX9-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: s_setpc_b64 s[4:5] +; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: test_call_external_void_func_i32_imm: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_or_saveexec_b32 s4, -1 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s4 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 ; GFX10-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-NEXT: v_mov_b32_e32 v0, 42 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: s_getpc_b64 s[4:5] -; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_i32@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s5, s5, external_void_func_i32@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GFX10-NEXT: v_readlane_b32 s4, v40, 0 -; GFX10-NEXT: v_readlane_b32 s5, v40, 1 +; GFX10-NEXT: s_getpc_b64 s[30:31] +; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_i32@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_i32@rel32@hi+12 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-NEXT: s_or_saveexec_b32 s6, -1 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s6 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: s_setpc_b64 s[4:5] +; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-SCRATCH-LABEL: test_call_external_void_func_i32_imm: ; GFX10-SCRATCH: ; %bb.0: @@ -997,16 +997,16 @@ define amdgpu_gfx void @test_call_external_void_func_i32_imm(i32) #0 { ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) -; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1] +; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] call amdgpu_gfx void @external_void_func_i32(i32 42) ret void } @@ -1015,9 +1015,9 @@ define amdgpu_gfx void @test_call_external_void_func_i64_imm() #0 { ; GFX9-LABEL: test_call_external_void_func_i64_imm: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[4:5] +; GFX9-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-NEXT: v_writelane_b32 v40, s33, 2 ; GFX9-NEXT: s_mov_b32 s33, s32 ; GFX9-NEXT: s_addk_i32 s32, 0x400 @@ -1025,49 +1025,49 @@ define amdgpu_gfx void @test_call_external_void_func_i64_imm() #0 { ; GFX9-NEXT: v_mov_b32_e32 v0, 0x7b ; GFX9-NEXT: v_mov_b32_e32 v1, 0 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[4:5] -; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_i64@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_i64@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GFX9-NEXT: v_readlane_b32 s4, v40, 0 -; GFX9-NEXT: v_readlane_b32 s5, v40, 1 +; GFX9-NEXT: s_getpc_b64 s[30:31] +; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_i64@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_i64@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 -; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[6:7] +; GFX9-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: s_setpc_b64 s[4:5] +; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: test_call_external_void_func_i64_imm: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_or_saveexec_b32 s4, -1 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s4 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 ; GFX10-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-NEXT: v_mov_b32_e32 v0, 0x7b ; GFX10-NEXT: v_mov_b32_e32 v1, 0 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-NEXT: s_getpc_b64 s[4:5] -; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_i64@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s5, s5, external_void_func_i64@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GFX10-NEXT: v_readlane_b32 s4, v40, 0 -; GFX10-NEXT: v_readlane_b32 s5, v40, 1 +; GFX10-NEXT: s_getpc_b64 s[30:31] +; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_i64@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_i64@rel32@hi+12 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-NEXT: s_or_saveexec_b32 s6, -1 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s6 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: s_setpc_b64 s[4:5] +; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-SCRATCH-LABEL: test_call_external_void_func_i64_imm: ; GFX10-SCRATCH: ; %bb.0: @@ -1088,16 +1088,16 @@ define amdgpu_gfx void @test_call_external_void_func_i64_imm() #0 { ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_i64@rel32@hi+12 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) -; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1] +; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] call amdgpu_gfx void @external_void_func_i64(i64 123) ret void } @@ -1106,9 +1106,9 @@ define amdgpu_gfx void @test_call_external_void_func_v2i64() #0 { ; GFX9-LABEL: test_call_external_void_func_v2i64: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[4:5] +; GFX9-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: v_mov_b32_e32 v1, 0 ; GFX9-NEXT: global_load_dwordx4 v[0:3], v[0:1], off @@ -1117,50 +1117,50 @@ define amdgpu_gfx void @test_call_external_void_func_v2i64() #0 { ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[4:5] -; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v2i64@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v2i64@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GFX9-NEXT: v_readlane_b32 s4, v40, 0 -; GFX9-NEXT: v_readlane_b32 s5, v40, 1 +; GFX9-NEXT: s_getpc_b64 s[30:31] +; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v2i64@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v2i64@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 -; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[6:7] +; GFX9-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: s_setpc_b64 s[4:5] +; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: test_call_external_void_func_v2i64: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_or_saveexec_b32 s4, -1 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s4 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 ; GFX10-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-NEXT: v_mov_b32_e32 v1, 0 ; GFX10-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: s_getpc_b64 s[4:5] -; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_v2i64@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s5, s5, external_void_func_v2i64@rel32@hi+12 ; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GFX10-NEXT: v_readlane_b32 s4, v40, 0 -; GFX10-NEXT: v_readlane_b32 s5, v40, 1 +; GFX10-NEXT: s_getpc_b64 s[30:31] +; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v2i64@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v2i64@rel32@hi+12 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-NEXT: s_or_saveexec_b32 s6, -1 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s6 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: s_setpc_b64 s[4:5] +; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v2i64: ; GFX10-SCRATCH: ; %bb.0: @@ -1182,16 +1182,16 @@ define amdgpu_gfx void @test_call_external_void_func_v2i64() #0 { ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) -; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1] +; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] %val = load <2 x i64>, <2 x i64> addrspace(1)* null call amdgpu_gfx void @external_void_func_v2i64(<2 x i64> %val) ret void @@ -1201,9 +1201,9 @@ define amdgpu_gfx void @test_call_external_void_func_v2i64_imm() #0 { ; GFX9-LABEL: test_call_external_void_func_v2i64_imm: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[4:5] +; GFX9-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-NEXT: v_writelane_b32 v40, s33, 2 ; GFX9-NEXT: s_mov_b32 s33, s32 ; GFX9-NEXT: s_addk_i32 s32, 0x400 @@ -1213,28 +1213,28 @@ define amdgpu_gfx void @test_call_external_void_func_v2i64_imm() #0 { ; GFX9-NEXT: v_mov_b32_e32 v2, 3 ; GFX9-NEXT: v_mov_b32_e32 v3, 4 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[4:5] -; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v2i64@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v2i64@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GFX9-NEXT: v_readlane_b32 s4, v40, 0 -; GFX9-NEXT: v_readlane_b32 s5, v40, 1 +; GFX9-NEXT: s_getpc_b64 s[30:31] +; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v2i64@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v2i64@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 -; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[6:7] +; GFX9-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: s_setpc_b64 s[4:5] +; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: test_call_external_void_func_v2i64_imm: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_or_saveexec_b32 s4, -1 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s4 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 ; GFX10-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-NEXT: v_mov_b32_e32 v0, 1 ; GFX10-NEXT: v_mov_b32_e32 v1, 2 @@ -1243,21 +1243,21 @@ define amdgpu_gfx void @test_call_external_void_func_v2i64_imm() #0 { ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: s_getpc_b64 s[4:5] -; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_v2i64@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s5, s5, external_void_func_v2i64@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GFX10-NEXT: v_readlane_b32 s4, v40, 0 -; GFX10-NEXT: v_readlane_b32 s5, v40, 1 +; GFX10-NEXT: s_getpc_b64 s[30:31] +; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v2i64@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v2i64@rel32@hi+12 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-NEXT: s_or_saveexec_b32 s6, -1 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s6 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: s_setpc_b64 s[4:5] +; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v2i64_imm: ; GFX10-SCRATCH: ; %bb.0: @@ -1280,16 +1280,16 @@ define amdgpu_gfx void @test_call_external_void_func_v2i64_imm() #0 { ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v2i64@rel32@hi+12 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) -; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1] +; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] call amdgpu_gfx void @external_void_func_v2i64(<2 x i64> ) ret void } @@ -1298,9 +1298,9 @@ define amdgpu_gfx void @test_call_external_void_func_v3i64() #0 { ; GFX9-LABEL: test_call_external_void_func_v3i64: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[4:5] +; GFX9-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: v_mov_b32_e32 v1, 0 ; GFX9-NEXT: global_load_dwordx4 v[0:3], v[0:1], off @@ -1311,28 +1311,28 @@ define amdgpu_gfx void @test_call_external_void_func_v3i64() #0 { ; GFX9-NEXT: v_mov_b32_e32 v4, 1 ; GFX9-NEXT: v_mov_b32_e32 v5, 2 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[4:5] -; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v3i64@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v3i64@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GFX9-NEXT: v_readlane_b32 s4, v40, 0 -; GFX9-NEXT: v_readlane_b32 s5, v40, 1 +; GFX9-NEXT: s_getpc_b64 s[30:31] +; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v3i64@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v3i64@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 -; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[6:7] +; GFX9-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: s_setpc_b64 s[4:5] +; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: test_call_external_void_func_v3i64: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_or_saveexec_b32 s4, -1 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s4 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 ; GFX10-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-NEXT: v_mov_b32_e32 v1, 0 ; GFX10-NEXT: v_writelane_b32 v40, s33, 2 @@ -1342,21 +1342,21 @@ define amdgpu_gfx void @test_call_external_void_func_v3i64() #0 { ; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: s_getpc_b64 s[4:5] -; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_v3i64@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s5, s5, external_void_func_v3i64@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GFX10-NEXT: v_readlane_b32 s4, v40, 0 -; GFX10-NEXT: v_readlane_b32 s5, v40, 1 +; GFX10-NEXT: s_getpc_b64 s[30:31] +; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v3i64@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v3i64@rel32@hi+12 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-NEXT: s_or_saveexec_b32 s6, -1 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s6 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: s_setpc_b64 s[4:5] +; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v3i64: ; GFX10-SCRATCH: ; %bb.0: @@ -1380,16 +1380,16 @@ define amdgpu_gfx void @test_call_external_void_func_v3i64() #0 { ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v3i64@rel32@hi+12 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) -; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1] +; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] %load = load <2 x i64>, <2 x i64> addrspace(1)* null %val = shufflevector <2 x i64> %load, <2 x i64> , <3 x i32> @@ -1401,9 +1401,9 @@ define amdgpu_gfx void @test_call_external_void_func_v4i64() #0 { ; GFX9-LABEL: test_call_external_void_func_v4i64: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[4:5] +; GFX9-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: v_mov_b32_e32 v1, 0 ; GFX9-NEXT: global_load_dwordx4 v[0:3], v[0:1], off @@ -1416,28 +1416,28 @@ define amdgpu_gfx void @test_call_external_void_func_v4i64() #0 { ; GFX9-NEXT: v_mov_b32_e32 v6, 3 ; GFX9-NEXT: v_mov_b32_e32 v7, 4 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[4:5] -; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v4i64@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v4i64@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GFX9-NEXT: v_readlane_b32 s4, v40, 0 -; GFX9-NEXT: v_readlane_b32 s5, v40, 1 +; GFX9-NEXT: s_getpc_b64 s[30:31] +; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v4i64@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v4i64@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 -; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[6:7] +; GFX9-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: s_setpc_b64 s[4:5] +; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: test_call_external_void_func_v4i64: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_or_saveexec_b32 s4, -1 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s4 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 ; GFX10-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-NEXT: v_mov_b32_e32 v1, 0 ; GFX10-NEXT: v_writelane_b32 v40, s33, 2 @@ -1449,21 +1449,21 @@ define amdgpu_gfx void @test_call_external_void_func_v4i64() #0 { ; GFX10-NEXT: v_mov_b32_e32 v7, 4 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: s_getpc_b64 s[4:5] -; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_v4i64@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s5, s5, external_void_func_v4i64@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GFX10-NEXT: v_readlane_b32 s4, v40, 0 -; GFX10-NEXT: v_readlane_b32 s5, v40, 1 +; GFX10-NEXT: s_getpc_b64 s[30:31] +; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v4i64@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v4i64@rel32@hi+12 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-NEXT: s_or_saveexec_b32 s6, -1 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s6 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: s_setpc_b64 s[4:5] +; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v4i64: ; GFX10-SCRATCH: ; %bb.0: @@ -1489,16 +1489,16 @@ define amdgpu_gfx void @test_call_external_void_func_v4i64() #0 { ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v4i64@rel32@hi+12 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) -; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1] +; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] %load = load <2 x i64>, <2 x i64> addrspace(1)* null %val = shufflevector <2 x i64> %load, <2 x i64> , <4 x i32> call amdgpu_gfx void @external_void_func_v4i64(<4 x i64> %val) @@ -1509,57 +1509,57 @@ define amdgpu_gfx void @test_call_external_void_func_f16_imm() #0 { ; GFX9-LABEL: test_call_external_void_func_f16_imm: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[4:5] +; GFX9-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-NEXT: v_writelane_b32 v40, s33, 2 ; GFX9-NEXT: s_mov_b32 s33, s32 ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: v_mov_b32_e32 v0, 0x4400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[4:5] -; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_f16@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_f16@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GFX9-NEXT: v_readlane_b32 s4, v40, 0 -; GFX9-NEXT: v_readlane_b32 s5, v40, 1 +; GFX9-NEXT: s_getpc_b64 s[30:31] +; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_f16@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_f16@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 -; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[6:7] +; GFX9-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: s_setpc_b64 s[4:5] +; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: test_call_external_void_func_f16_imm: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_or_saveexec_b32 s4, -1 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s4 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 ; GFX10-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-NEXT: v_mov_b32_e32 v0, 0x4400 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: s_getpc_b64 s[4:5] -; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_f16@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s5, s5, external_void_func_f16@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GFX10-NEXT: v_readlane_b32 s4, v40, 0 -; GFX10-NEXT: v_readlane_b32 s5, v40, 1 +; GFX10-NEXT: s_getpc_b64 s[30:31] +; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_f16@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_f16@rel32@hi+12 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-NEXT: s_or_saveexec_b32 s6, -1 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s6 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: s_setpc_b64 s[4:5] +; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-SCRATCH-LABEL: test_call_external_void_func_f16_imm: ; GFX10-SCRATCH: ; %bb.0: @@ -1579,16 +1579,16 @@ define amdgpu_gfx void @test_call_external_void_func_f16_imm() #0 { ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) -; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1] +; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] call amdgpu_gfx void @external_void_func_f16(half 4.0) ret void } @@ -1597,57 +1597,57 @@ define amdgpu_gfx void @test_call_external_void_func_f32_imm() #0 { ; GFX9-LABEL: test_call_external_void_func_f32_imm: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[4:5] +; GFX9-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-NEXT: v_writelane_b32 v40, s33, 2 ; GFX9-NEXT: s_mov_b32 s33, s32 ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: v_mov_b32_e32 v0, 4.0 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[4:5] -; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_f32@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_f32@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GFX9-NEXT: v_readlane_b32 s4, v40, 0 -; GFX9-NEXT: v_readlane_b32 s5, v40, 1 +; GFX9-NEXT: s_getpc_b64 s[30:31] +; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_f32@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_f32@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 -; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[6:7] +; GFX9-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: s_setpc_b64 s[4:5] +; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: test_call_external_void_func_f32_imm: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_or_saveexec_b32 s4, -1 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s4 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 ; GFX10-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-NEXT: v_mov_b32_e32 v0, 4.0 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: s_getpc_b64 s[4:5] -; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_f32@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s5, s5, external_void_func_f32@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GFX10-NEXT: v_readlane_b32 s4, v40, 0 -; GFX10-NEXT: v_readlane_b32 s5, v40, 1 +; GFX10-NEXT: s_getpc_b64 s[30:31] +; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_f32@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_f32@rel32@hi+12 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-NEXT: s_or_saveexec_b32 s6, -1 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s6 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: s_setpc_b64 s[4:5] +; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-SCRATCH-LABEL: test_call_external_void_func_f32_imm: ; GFX10-SCRATCH: ; %bb.0: @@ -1667,16 +1667,16 @@ define amdgpu_gfx void @test_call_external_void_func_f32_imm() #0 { ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) -; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1] +; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] call amdgpu_gfx void @external_void_func_f32(float 4.0) ret void } @@ -1685,9 +1685,9 @@ define amdgpu_gfx void @test_call_external_void_func_v2f32_imm() #0 { ; GFX9-LABEL: test_call_external_void_func_v2f32_imm: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[4:5] +; GFX9-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-NEXT: v_writelane_b32 v40, s33, 2 ; GFX9-NEXT: s_mov_b32 s33, s32 ; GFX9-NEXT: s_addk_i32 s32, 0x400 @@ -1695,49 +1695,49 @@ define amdgpu_gfx void @test_call_external_void_func_v2f32_imm() #0 { ; GFX9-NEXT: v_mov_b32_e32 v0, 1.0 ; GFX9-NEXT: v_mov_b32_e32 v1, 2.0 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[4:5] -; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v2f32@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v2f32@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GFX9-NEXT: v_readlane_b32 s4, v40, 0 -; GFX9-NEXT: v_readlane_b32 s5, v40, 1 +; GFX9-NEXT: s_getpc_b64 s[30:31] +; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v2f32@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v2f32@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 -; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[6:7] +; GFX9-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: s_setpc_b64 s[4:5] +; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: test_call_external_void_func_v2f32_imm: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_or_saveexec_b32 s4, -1 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s4 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 ; GFX10-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-NEXT: v_mov_b32_e32 v0, 1.0 ; GFX10-NEXT: v_mov_b32_e32 v1, 2.0 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-NEXT: s_getpc_b64 s[4:5] -; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_v2f32@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s5, s5, external_void_func_v2f32@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GFX10-NEXT: v_readlane_b32 s4, v40, 0 -; GFX10-NEXT: v_readlane_b32 s5, v40, 1 +; GFX10-NEXT: s_getpc_b64 s[30:31] +; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v2f32@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v2f32@rel32@hi+12 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-NEXT: s_or_saveexec_b32 s6, -1 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s6 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: s_setpc_b64 s[4:5] +; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v2f32_imm: ; GFX10-SCRATCH: ; %bb.0: @@ -1758,16 +1758,16 @@ define amdgpu_gfx void @test_call_external_void_func_v2f32_imm() #0 { ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v2f32@rel32@hi+12 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) -; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1] +; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] call amdgpu_gfx void @external_void_func_v2f32(<2 x float> ) ret void } @@ -1776,9 +1776,9 @@ define amdgpu_gfx void @test_call_external_void_func_v3f32_imm() #0 { ; GFX9-LABEL: test_call_external_void_func_v3f32_imm: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[4:5] +; GFX9-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-NEXT: v_writelane_b32 v40, s33, 2 ; GFX9-NEXT: s_mov_b32 s33, s32 ; GFX9-NEXT: s_addk_i32 s32, 0x400 @@ -1787,28 +1787,28 @@ define amdgpu_gfx void @test_call_external_void_func_v3f32_imm() #0 { ; GFX9-NEXT: v_mov_b32_e32 v1, 2.0 ; GFX9-NEXT: v_mov_b32_e32 v2, 4.0 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[4:5] -; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v3f32@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v3f32@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GFX9-NEXT: v_readlane_b32 s4, v40, 0 -; GFX9-NEXT: v_readlane_b32 s5, v40, 1 +; GFX9-NEXT: s_getpc_b64 s[30:31] +; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v3f32@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v3f32@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 -; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[6:7] +; GFX9-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: s_setpc_b64 s[4:5] +; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: test_call_external_void_func_v3f32_imm: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_or_saveexec_b32 s4, -1 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s4 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 ; GFX10-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-NEXT: v_mov_b32_e32 v0, 1.0 ; GFX10-NEXT: v_mov_b32_e32 v1, 2.0 @@ -1816,21 +1816,21 @@ define amdgpu_gfx void @test_call_external_void_func_v3f32_imm() #0 { ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: s_getpc_b64 s[4:5] -; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_v3f32@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s5, s5, external_void_func_v3f32@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GFX10-NEXT: v_readlane_b32 s4, v40, 0 -; GFX10-NEXT: v_readlane_b32 s5, v40, 1 +; GFX10-NEXT: s_getpc_b64 s[30:31] +; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v3f32@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v3f32@rel32@hi+12 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-NEXT: s_or_saveexec_b32 s6, -1 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s6 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: s_setpc_b64 s[4:5] +; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v3f32_imm: ; GFX10-SCRATCH: ; %bb.0: @@ -1852,16 +1852,16 @@ define amdgpu_gfx void @test_call_external_void_func_v3f32_imm() #0 { ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v3f32@rel32@hi+12 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) -; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1] +; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] call amdgpu_gfx void @external_void_func_v3f32(<3 x float> ) ret void } @@ -1870,9 +1870,9 @@ define amdgpu_gfx void @test_call_external_void_func_v5f32_imm() #0 { ; GFX9-LABEL: test_call_external_void_func_v5f32_imm: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[4:5] +; GFX9-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-NEXT: v_writelane_b32 v40, s33, 2 ; GFX9-NEXT: s_mov_b32 s33, s32 ; GFX9-NEXT: s_addk_i32 s32, 0x400 @@ -1883,28 +1883,28 @@ define amdgpu_gfx void @test_call_external_void_func_v5f32_imm() #0 { ; GFX9-NEXT: v_mov_b32_e32 v3, -1.0 ; GFX9-NEXT: v_mov_b32_e32 v4, 0.5 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[4:5] -; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v5f32@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v5f32@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GFX9-NEXT: v_readlane_b32 s4, v40, 0 -; GFX9-NEXT: v_readlane_b32 s5, v40, 1 +; GFX9-NEXT: s_getpc_b64 s[30:31] +; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v5f32@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v5f32@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 -; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[6:7] +; GFX9-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: s_setpc_b64 s[4:5] +; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: test_call_external_void_func_v5f32_imm: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_or_saveexec_b32 s4, -1 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s4 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 ; GFX10-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-NEXT: v_mov_b32_e32 v0, 1.0 ; GFX10-NEXT: v_mov_b32_e32 v1, 2.0 @@ -1914,21 +1914,21 @@ define amdgpu_gfx void @test_call_external_void_func_v5f32_imm() #0 { ; GFX10-NEXT: v_mov_b32_e32 v4, 0.5 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: s_getpc_b64 s[4:5] -; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_v5f32@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s5, s5, external_void_func_v5f32@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GFX10-NEXT: v_readlane_b32 s4, v40, 0 -; GFX10-NEXT: v_readlane_b32 s5, v40, 1 +; GFX10-NEXT: s_getpc_b64 s[30:31] +; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v5f32@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v5f32@rel32@hi+12 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-NEXT: s_or_saveexec_b32 s6, -1 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s6 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: s_setpc_b64 s[4:5] +; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v5f32_imm: ; GFX10-SCRATCH: ; %bb.0: @@ -1952,16 +1952,16 @@ define amdgpu_gfx void @test_call_external_void_func_v5f32_imm() #0 { ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v5f32@rel32@hi+12 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) -; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1] +; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] call amdgpu_gfx void @external_void_func_v5f32(<5 x float> ) ret void } @@ -1970,9 +1970,9 @@ define amdgpu_gfx void @test_call_external_void_func_f64_imm() #0 { ; GFX9-LABEL: test_call_external_void_func_f64_imm: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[4:5] +; GFX9-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-NEXT: v_writelane_b32 v40, s33, 2 ; GFX9-NEXT: s_mov_b32 s33, s32 ; GFX9-NEXT: s_addk_i32 s32, 0x400 @@ -1980,49 +1980,49 @@ define amdgpu_gfx void @test_call_external_void_func_f64_imm() #0 { ; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: v_mov_b32_e32 v1, 0x40100000 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[4:5] -; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_f64@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_f64@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GFX9-NEXT: v_readlane_b32 s4, v40, 0 -; GFX9-NEXT: v_readlane_b32 s5, v40, 1 +; GFX9-NEXT: s_getpc_b64 s[30:31] +; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_f64@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_f64@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 -; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[6:7] +; GFX9-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: s_setpc_b64 s[4:5] +; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: test_call_external_void_func_f64_imm: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_or_saveexec_b32 s4, -1 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s4 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 ; GFX10-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-NEXT: v_mov_b32_e32 v1, 0x40100000 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-NEXT: s_getpc_b64 s[4:5] -; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_f64@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s5, s5, external_void_func_f64@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GFX10-NEXT: v_readlane_b32 s4, v40, 0 -; GFX10-NEXT: v_readlane_b32 s5, v40, 1 +; GFX10-NEXT: s_getpc_b64 s[30:31] +; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_f64@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_f64@rel32@hi+12 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-NEXT: s_or_saveexec_b32 s6, -1 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s6 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: s_setpc_b64 s[4:5] +; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-SCRATCH-LABEL: test_call_external_void_func_f64_imm: ; GFX10-SCRATCH: ; %bb.0: @@ -2043,16 +2043,16 @@ define amdgpu_gfx void @test_call_external_void_func_f64_imm() #0 { ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_f64@rel32@hi+12 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) -; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1] +; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] call amdgpu_gfx void @external_void_func_f64(double 4.0) ret void } @@ -2061,9 +2061,9 @@ define amdgpu_gfx void @test_call_external_void_func_v2f64_imm() #0 { ; GFX9-LABEL: test_call_external_void_func_v2f64_imm: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[4:5] +; GFX9-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-NEXT: v_writelane_b32 v40, s33, 2 ; GFX9-NEXT: s_mov_b32 s33, s32 ; GFX9-NEXT: s_addk_i32 s32, 0x400 @@ -2073,28 +2073,28 @@ define amdgpu_gfx void @test_call_external_void_func_v2f64_imm() #0 { ; GFX9-NEXT: v_mov_b32_e32 v2, 0 ; GFX9-NEXT: v_mov_b32_e32 v3, 0x40100000 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[4:5] -; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v2f64@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v2f64@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GFX9-NEXT: v_readlane_b32 s4, v40, 0 -; GFX9-NEXT: v_readlane_b32 s5, v40, 1 +; GFX9-NEXT: s_getpc_b64 s[30:31] +; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v2f64@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v2f64@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 -; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[6:7] +; GFX9-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: s_setpc_b64 s[4:5] +; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: test_call_external_void_func_v2f64_imm: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_or_saveexec_b32 s4, -1 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s4 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 ; GFX10-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-NEXT: v_mov_b32_e32 v1, 2.0 @@ -2103,21 +2103,21 @@ define amdgpu_gfx void @test_call_external_void_func_v2f64_imm() #0 { ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: s_getpc_b64 s[4:5] -; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_v2f64@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s5, s5, external_void_func_v2f64@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GFX10-NEXT: v_readlane_b32 s4, v40, 0 -; GFX10-NEXT: v_readlane_b32 s5, v40, 1 +; GFX10-NEXT: s_getpc_b64 s[30:31] +; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v2f64@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v2f64@rel32@hi+12 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-NEXT: s_or_saveexec_b32 s6, -1 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s6 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: s_setpc_b64 s[4:5] +; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v2f64_imm: ; GFX10-SCRATCH: ; %bb.0: @@ -2140,16 +2140,16 @@ define amdgpu_gfx void @test_call_external_void_func_v2f64_imm() #0 { ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v2f64@rel32@hi+12 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) -; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1] +; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] call amdgpu_gfx void @external_void_func_v2f64(<2 x double> ) ret void } @@ -2158,9 +2158,9 @@ define amdgpu_gfx void @test_call_external_void_func_v3f64_imm() #0 { ; GFX9-LABEL: test_call_external_void_func_v3f64_imm: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[4:5] +; GFX9-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-NEXT: v_writelane_b32 v40, s33, 2 ; GFX9-NEXT: s_mov_b32 s33, s32 ; GFX9-NEXT: s_addk_i32 s32, 0x400 @@ -2172,28 +2172,28 @@ define amdgpu_gfx void @test_call_external_void_func_v3f64_imm() #0 { ; GFX9-NEXT: v_mov_b32_e32 v4, 0 ; GFX9-NEXT: v_mov_b32_e32 v5, 0x40200000 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[4:5] -; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v3f64@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v3f64@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GFX9-NEXT: v_readlane_b32 s4, v40, 0 -; GFX9-NEXT: v_readlane_b32 s5, v40, 1 +; GFX9-NEXT: s_getpc_b64 s[30:31] +; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v3f64@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v3f64@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 -; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[6:7] +; GFX9-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: s_setpc_b64 s[4:5] +; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: test_call_external_void_func_v3f64_imm: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_or_saveexec_b32 s4, -1 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s4 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 ; GFX10-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-NEXT: v_mov_b32_e32 v1, 2.0 @@ -2205,20 +2205,20 @@ define amdgpu_gfx void @test_call_external_void_func_v3f64_imm() #0 { ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_getpc_b64 s[4:5] -; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_v3f64@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s5, s5, external_void_func_v3f64@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GFX10-NEXT: v_readlane_b32 s4, v40, 0 -; GFX10-NEXT: v_readlane_b32 s5, v40, 1 +; GFX10-NEXT: s_getpc_b64 s[30:31] +; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v3f64@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v3f64@rel32@hi+12 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-NEXT: s_or_saveexec_b32 s6, -1 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s6 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: s_setpc_b64 s[4:5] +; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v3f64_imm: ; GFX10-SCRATCH: ; %bb.0: @@ -2243,16 +2243,16 @@ define amdgpu_gfx void @test_call_external_void_func_v3f64_imm() #0 { ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v3f64@rel32@lo+4 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v3f64@rel32@hi+12 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) -; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1] +; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] call amdgpu_gfx void @external_void_func_v3f64(<3 x double> ) ret void } @@ -2261,57 +2261,57 @@ define amdgpu_gfx void @test_call_external_void_func_v2i16() #0 { ; GFX9-LABEL: test_call_external_void_func_v2i16: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[4:5] +; GFX9-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-NEXT: global_load_dword v0, v[0:1], off ; GFX9-NEXT: v_writelane_b32 v40, s33, 2 ; GFX9-NEXT: s_mov_b32 s33, s32 ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[4:5] -; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v2i16@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v2i16@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GFX9-NEXT: v_readlane_b32 s4, v40, 0 -; GFX9-NEXT: v_readlane_b32 s5, v40, 1 +; GFX9-NEXT: s_getpc_b64 s[30:31] +; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v2i16@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v2i16@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 -; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[6:7] +; GFX9-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: s_setpc_b64 s[4:5] +; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: test_call_external_void_func_v2i16: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_or_saveexec_b32 s4, -1 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s4 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 ; GFX10-NEXT: global_load_dword v0, v[0:1], off ; GFX10-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: s_getpc_b64 s[4:5] -; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_v2i16@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s5, s5, external_void_func_v2i16@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GFX10-NEXT: v_readlane_b32 s4, v40, 0 -; GFX10-NEXT: v_readlane_b32 s5, v40, 1 +; GFX10-NEXT: s_getpc_b64 s[30:31] +; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v2i16@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v2i16@rel32@hi+12 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-NEXT: s_or_saveexec_b32 s6, -1 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s6 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: s_setpc_b64 s[4:5] +; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v2i16: ; GFX10-SCRATCH: ; %bb.0: @@ -2331,16 +2331,16 @@ define amdgpu_gfx void @test_call_external_void_func_v2i16() #0 { ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) -; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1] +; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] %val = load <2 x i16>, <2 x i16> addrspace(1)* undef call amdgpu_gfx void @external_void_func_v2i16(<2 x i16> %val) ret void @@ -2350,57 +2350,57 @@ define amdgpu_gfx void @test_call_external_void_func_v3i16() #0 { ; GFX9-LABEL: test_call_external_void_func_v3i16: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[4:5] +; GFX9-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-NEXT: global_load_dwordx2 v[0:1], v[0:1], off ; GFX9-NEXT: v_writelane_b32 v40, s33, 2 ; GFX9-NEXT: s_mov_b32 s33, s32 ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[4:5] -; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v3i16@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v3i16@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GFX9-NEXT: v_readlane_b32 s4, v40, 0 -; GFX9-NEXT: v_readlane_b32 s5, v40, 1 +; GFX9-NEXT: s_getpc_b64 s[30:31] +; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v3i16@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v3i16@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 -; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[6:7] +; GFX9-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: s_setpc_b64 s[4:5] +; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: test_call_external_void_func_v3i16: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_or_saveexec_b32 s4, -1 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s4 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 ; GFX10-NEXT: global_load_dwordx2 v[0:1], v[0:1], off ; GFX10-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: s_getpc_b64 s[4:5] -; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_v3i16@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s5, s5, external_void_func_v3i16@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GFX10-NEXT: v_readlane_b32 s4, v40, 0 -; GFX10-NEXT: v_readlane_b32 s5, v40, 1 +; GFX10-NEXT: s_getpc_b64 s[30:31] +; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v3i16@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v3i16@rel32@hi+12 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-NEXT: s_or_saveexec_b32 s6, -1 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s6 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: s_setpc_b64 s[4:5] +; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v3i16: ; GFX10-SCRATCH: ; %bb.0: @@ -2420,16 +2420,16 @@ define amdgpu_gfx void @test_call_external_void_func_v3i16() #0 { ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) -; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1] +; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] %val = load <3 x i16>, <3 x i16> addrspace(1)* undef call amdgpu_gfx void @external_void_func_v3i16(<3 x i16> %val) ret void @@ -2439,57 +2439,57 @@ define amdgpu_gfx void @test_call_external_void_func_v3f16() #0 { ; GFX9-LABEL: test_call_external_void_func_v3f16: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[4:5] +; GFX9-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-NEXT: global_load_dwordx2 v[0:1], v[0:1], off ; GFX9-NEXT: v_writelane_b32 v40, s33, 2 ; GFX9-NEXT: s_mov_b32 s33, s32 ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[4:5] -; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v3f16@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v3f16@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GFX9-NEXT: v_readlane_b32 s4, v40, 0 -; GFX9-NEXT: v_readlane_b32 s5, v40, 1 +; GFX9-NEXT: s_getpc_b64 s[30:31] +; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v3f16@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v3f16@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 -; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[6:7] +; GFX9-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: s_setpc_b64 s[4:5] +; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: test_call_external_void_func_v3f16: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_or_saveexec_b32 s4, -1 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s4 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 ; GFX10-NEXT: global_load_dwordx2 v[0:1], v[0:1], off ; GFX10-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: s_getpc_b64 s[4:5] -; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_v3f16@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s5, s5, external_void_func_v3f16@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GFX10-NEXT: v_readlane_b32 s4, v40, 0 -; GFX10-NEXT: v_readlane_b32 s5, v40, 1 +; GFX10-NEXT: s_getpc_b64 s[30:31] +; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v3f16@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v3f16@rel32@hi+12 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-NEXT: s_or_saveexec_b32 s6, -1 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s6 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: s_setpc_b64 s[4:5] +; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v3f16: ; GFX10-SCRATCH: ; %bb.0: @@ -2509,16 +2509,16 @@ define amdgpu_gfx void @test_call_external_void_func_v3f16() #0 { ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) -; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1] +; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] %val = load <3 x half>, <3 x half> addrspace(1)* undef call amdgpu_gfx void @external_void_func_v3f16(<3 x half> %val) ret void @@ -2528,9 +2528,9 @@ define amdgpu_gfx void @test_call_external_void_func_v3i16_imm() #0 { ; GFX9-LABEL: test_call_external_void_func_v3i16_imm: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[4:5] +; GFX9-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-NEXT: v_writelane_b32 v40, s33, 2 ; GFX9-NEXT: s_mov_b32 s33, s32 ; GFX9-NEXT: s_addk_i32 s32, 0x400 @@ -2538,49 +2538,49 @@ define amdgpu_gfx void @test_call_external_void_func_v3i16_imm() #0 { ; GFX9-NEXT: v_mov_b32_e32 v0, 0x20001 ; GFX9-NEXT: v_mov_b32_e32 v1, 3 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[4:5] -; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v3i16@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v3i16@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GFX9-NEXT: v_readlane_b32 s4, v40, 0 -; GFX9-NEXT: v_readlane_b32 s5, v40, 1 +; GFX9-NEXT: s_getpc_b64 s[30:31] +; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v3i16@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v3i16@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 -; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[6:7] +; GFX9-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: s_setpc_b64 s[4:5] +; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: test_call_external_void_func_v3i16_imm: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_or_saveexec_b32 s4, -1 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s4 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 ; GFX10-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-NEXT: v_mov_b32_e32 v0, 0x20001 ; GFX10-NEXT: v_mov_b32_e32 v1, 3 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-NEXT: s_getpc_b64 s[4:5] -; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_v3i16@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s5, s5, external_void_func_v3i16@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GFX10-NEXT: v_readlane_b32 s4, v40, 0 -; GFX10-NEXT: v_readlane_b32 s5, v40, 1 +; GFX10-NEXT: s_getpc_b64 s[30:31] +; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v3i16@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v3i16@rel32@hi+12 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-NEXT: s_or_saveexec_b32 s6, -1 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s6 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: s_setpc_b64 s[4:5] +; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v3i16_imm: ; GFX10-SCRATCH: ; %bb.0: @@ -2601,16 +2601,16 @@ define amdgpu_gfx void @test_call_external_void_func_v3i16_imm() #0 { ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v3i16@rel32@hi+12 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) -; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1] +; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] call amdgpu_gfx void @external_void_func_v3i16(<3 x i16> ) ret void } @@ -2619,9 +2619,9 @@ define amdgpu_gfx void @test_call_external_void_func_v3f16_imm() #0 { ; GFX9-LABEL: test_call_external_void_func_v3f16_imm: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[4:5] +; GFX9-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-NEXT: v_writelane_b32 v40, s33, 2 ; GFX9-NEXT: s_mov_b32 s33, s32 ; GFX9-NEXT: s_addk_i32 s32, 0x400 @@ -2629,49 +2629,49 @@ define amdgpu_gfx void @test_call_external_void_func_v3f16_imm() #0 { ; GFX9-NEXT: v_mov_b32_e32 v0, 0x40003c00 ; GFX9-NEXT: v_mov_b32_e32 v1, 0x4400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[4:5] -; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v3f16@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v3f16@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GFX9-NEXT: v_readlane_b32 s4, v40, 0 -; GFX9-NEXT: v_readlane_b32 s5, v40, 1 +; GFX9-NEXT: s_getpc_b64 s[30:31] +; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v3f16@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v3f16@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 -; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[6:7] +; GFX9-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: s_setpc_b64 s[4:5] +; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: test_call_external_void_func_v3f16_imm: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_or_saveexec_b32 s4, -1 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s4 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 ; GFX10-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-NEXT: v_mov_b32_e32 v0, 0x40003c00 ; GFX10-NEXT: v_mov_b32_e32 v1, 0x4400 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-NEXT: s_getpc_b64 s[4:5] -; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_v3f16@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s5, s5, external_void_func_v3f16@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GFX10-NEXT: v_readlane_b32 s4, v40, 0 -; GFX10-NEXT: v_readlane_b32 s5, v40, 1 +; GFX10-NEXT: s_getpc_b64 s[30:31] +; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v3f16@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v3f16@rel32@hi+12 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-NEXT: s_or_saveexec_b32 s6, -1 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s6 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: s_setpc_b64 s[4:5] +; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v3f16_imm: ; GFX10-SCRATCH: ; %bb.0: @@ -2692,16 +2692,16 @@ define amdgpu_gfx void @test_call_external_void_func_v3f16_imm() #0 { ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v3f16@rel32@hi+12 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) -; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1] +; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] call amdgpu_gfx void @external_void_func_v3f16(<3 x half> ) ret void } @@ -2710,57 +2710,57 @@ define amdgpu_gfx void @test_call_external_void_func_v4i16() #0 { ; GFX9-LABEL: test_call_external_void_func_v4i16: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[4:5] +; GFX9-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-NEXT: global_load_dwordx2 v[0:1], v[0:1], off ; GFX9-NEXT: v_writelane_b32 v40, s33, 2 ; GFX9-NEXT: s_mov_b32 s33, s32 ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[4:5] -; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v4i16@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v4i16@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GFX9-NEXT: v_readlane_b32 s4, v40, 0 -; GFX9-NEXT: v_readlane_b32 s5, v40, 1 +; GFX9-NEXT: s_getpc_b64 s[30:31] +; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v4i16@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v4i16@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 -; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[6:7] +; GFX9-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: s_setpc_b64 s[4:5] +; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: test_call_external_void_func_v4i16: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_or_saveexec_b32 s4, -1 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s4 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 ; GFX10-NEXT: global_load_dwordx2 v[0:1], v[0:1], off ; GFX10-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: s_getpc_b64 s[4:5] -; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_v4i16@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s5, s5, external_void_func_v4i16@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GFX10-NEXT: v_readlane_b32 s4, v40, 0 -; GFX10-NEXT: v_readlane_b32 s5, v40, 1 +; GFX10-NEXT: s_getpc_b64 s[30:31] +; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v4i16@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v4i16@rel32@hi+12 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-NEXT: s_or_saveexec_b32 s6, -1 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s6 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: s_setpc_b64 s[4:5] +; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v4i16: ; GFX10-SCRATCH: ; %bb.0: @@ -2780,16 +2780,16 @@ define amdgpu_gfx void @test_call_external_void_func_v4i16() #0 { ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) -; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1] +; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] %val = load <4 x i16>, <4 x i16> addrspace(1)* undef call amdgpu_gfx void @external_void_func_v4i16(<4 x i16> %val) ret void @@ -2799,9 +2799,9 @@ define amdgpu_gfx void @test_call_external_void_func_v4i16_imm() #0 { ; GFX9-LABEL: test_call_external_void_func_v4i16_imm: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[4:5] +; GFX9-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-NEXT: v_writelane_b32 v40, s33, 2 ; GFX9-NEXT: s_mov_b32 s33, s32 ; GFX9-NEXT: s_addk_i32 s32, 0x400 @@ -2809,49 +2809,49 @@ define amdgpu_gfx void @test_call_external_void_func_v4i16_imm() #0 { ; GFX9-NEXT: v_mov_b32_e32 v0, 0x20001 ; GFX9-NEXT: v_mov_b32_e32 v1, 0x40003 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[4:5] -; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v4i16@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v4i16@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GFX9-NEXT: v_readlane_b32 s4, v40, 0 -; GFX9-NEXT: v_readlane_b32 s5, v40, 1 +; GFX9-NEXT: s_getpc_b64 s[30:31] +; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v4i16@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v4i16@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 -; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[6:7] +; GFX9-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: s_setpc_b64 s[4:5] +; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: test_call_external_void_func_v4i16_imm: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_or_saveexec_b32 s4, -1 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s4 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 ; GFX10-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-NEXT: v_mov_b32_e32 v0, 0x20001 ; GFX10-NEXT: v_mov_b32_e32 v1, 0x40003 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-NEXT: s_getpc_b64 s[4:5] -; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_v4i16@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s5, s5, external_void_func_v4i16@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GFX10-NEXT: v_readlane_b32 s4, v40, 0 -; GFX10-NEXT: v_readlane_b32 s5, v40, 1 +; GFX10-NEXT: s_getpc_b64 s[30:31] +; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v4i16@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v4i16@rel32@hi+12 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-NEXT: s_or_saveexec_b32 s6, -1 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s6 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: s_setpc_b64 s[4:5] +; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v4i16_imm: ; GFX10-SCRATCH: ; %bb.0: @@ -2872,16 +2872,16 @@ define amdgpu_gfx void @test_call_external_void_func_v4i16_imm() #0 { ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v4i16@rel32@hi+12 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) -; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1] +; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] call amdgpu_gfx void @external_void_func_v4i16(<4 x i16> ) ret void } @@ -2890,57 +2890,57 @@ define amdgpu_gfx void @test_call_external_void_func_v2f16() #0 { ; GFX9-LABEL: test_call_external_void_func_v2f16: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[4:5] +; GFX9-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-NEXT: global_load_dword v0, v[0:1], off ; GFX9-NEXT: v_writelane_b32 v40, s33, 2 ; GFX9-NEXT: s_mov_b32 s33, s32 ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[4:5] -; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v2f16@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v2f16@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GFX9-NEXT: v_readlane_b32 s4, v40, 0 -; GFX9-NEXT: v_readlane_b32 s5, v40, 1 +; GFX9-NEXT: s_getpc_b64 s[30:31] +; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v2f16@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v2f16@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 -; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[6:7] +; GFX9-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: s_setpc_b64 s[4:5] +; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: test_call_external_void_func_v2f16: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_or_saveexec_b32 s4, -1 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s4 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 ; GFX10-NEXT: global_load_dword v0, v[0:1], off ; GFX10-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: s_getpc_b64 s[4:5] -; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_v2f16@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s5, s5, external_void_func_v2f16@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GFX10-NEXT: v_readlane_b32 s4, v40, 0 -; GFX10-NEXT: v_readlane_b32 s5, v40, 1 +; GFX10-NEXT: s_getpc_b64 s[30:31] +; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v2f16@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v2f16@rel32@hi+12 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-NEXT: s_or_saveexec_b32 s6, -1 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s6 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: s_setpc_b64 s[4:5] +; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v2f16: ; GFX10-SCRATCH: ; %bb.0: @@ -2960,16 +2960,16 @@ define amdgpu_gfx void @test_call_external_void_func_v2f16() #0 { ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) -; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1] +; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] %val = load <2 x half>, <2 x half> addrspace(1)* undef call amdgpu_gfx void @external_void_func_v2f16(<2 x half> %val) ret void @@ -2979,57 +2979,57 @@ define amdgpu_gfx void @test_call_external_void_func_v2i32() #0 { ; GFX9-LABEL: test_call_external_void_func_v2i32: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[4:5] +; GFX9-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-NEXT: global_load_dwordx2 v[0:1], v[0:1], off ; GFX9-NEXT: v_writelane_b32 v40, s33, 2 ; GFX9-NEXT: s_mov_b32 s33, s32 ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[4:5] -; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v2i32@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v2i32@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GFX9-NEXT: v_readlane_b32 s4, v40, 0 -; GFX9-NEXT: v_readlane_b32 s5, v40, 1 +; GFX9-NEXT: s_getpc_b64 s[30:31] +; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v2i32@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v2i32@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 -; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[6:7] +; GFX9-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: s_setpc_b64 s[4:5] +; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: test_call_external_void_func_v2i32: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_or_saveexec_b32 s4, -1 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s4 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 ; GFX10-NEXT: global_load_dwordx2 v[0:1], v[0:1], off ; GFX10-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: s_getpc_b64 s[4:5] -; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_v2i32@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s5, s5, external_void_func_v2i32@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GFX10-NEXT: v_readlane_b32 s4, v40, 0 -; GFX10-NEXT: v_readlane_b32 s5, v40, 1 +; GFX10-NEXT: s_getpc_b64 s[30:31] +; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v2i32@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v2i32@rel32@hi+12 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-NEXT: s_or_saveexec_b32 s6, -1 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s6 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: s_setpc_b64 s[4:5] +; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v2i32: ; GFX10-SCRATCH: ; %bb.0: @@ -3049,16 +3049,16 @@ define amdgpu_gfx void @test_call_external_void_func_v2i32() #0 { ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) -; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1] +; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] %val = load <2 x i32>, <2 x i32> addrspace(1)* undef call amdgpu_gfx void @external_void_func_v2i32(<2 x i32> %val) ret void @@ -3068,9 +3068,9 @@ define amdgpu_gfx void @test_call_external_void_func_v2i32_imm() #0 { ; GFX9-LABEL: test_call_external_void_func_v2i32_imm: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[4:5] +; GFX9-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-NEXT: v_writelane_b32 v40, s33, 2 ; GFX9-NEXT: s_mov_b32 s33, s32 ; GFX9-NEXT: s_addk_i32 s32, 0x400 @@ -3078,49 +3078,49 @@ define amdgpu_gfx void @test_call_external_void_func_v2i32_imm() #0 { ; GFX9-NEXT: v_mov_b32_e32 v0, 1 ; GFX9-NEXT: v_mov_b32_e32 v1, 2 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[4:5] -; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v2i32@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v2i32@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GFX9-NEXT: v_readlane_b32 s4, v40, 0 -; GFX9-NEXT: v_readlane_b32 s5, v40, 1 +; GFX9-NEXT: s_getpc_b64 s[30:31] +; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v2i32@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v2i32@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 -; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[6:7] +; GFX9-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: s_setpc_b64 s[4:5] +; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: test_call_external_void_func_v2i32_imm: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_or_saveexec_b32 s4, -1 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s4 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 ; GFX10-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-NEXT: v_mov_b32_e32 v0, 1 ; GFX10-NEXT: v_mov_b32_e32 v1, 2 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-NEXT: s_getpc_b64 s[4:5] -; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_v2i32@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s5, s5, external_void_func_v2i32@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GFX10-NEXT: v_readlane_b32 s4, v40, 0 -; GFX10-NEXT: v_readlane_b32 s5, v40, 1 +; GFX10-NEXT: s_getpc_b64 s[30:31] +; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v2i32@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v2i32@rel32@hi+12 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-NEXT: s_or_saveexec_b32 s6, -1 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s6 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: s_setpc_b64 s[4:5] +; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v2i32_imm: ; GFX10-SCRATCH: ; %bb.0: @@ -3141,16 +3141,16 @@ define amdgpu_gfx void @test_call_external_void_func_v2i32_imm() #0 { ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v2i32@rel32@hi+12 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) -; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1] +; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] call amdgpu_gfx void @external_void_func_v2i32(<2 x i32> ) ret void } @@ -3159,9 +3159,9 @@ define amdgpu_gfx void @test_call_external_void_func_v3i32_imm(i32) #0 { ; GFX9-LABEL: test_call_external_void_func_v3i32_imm: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[4:5] +; GFX9-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-NEXT: v_writelane_b32 v40, s33, 2 ; GFX9-NEXT: s_mov_b32 s33, s32 ; GFX9-NEXT: s_addk_i32 s32, 0x400 @@ -3170,28 +3170,28 @@ define amdgpu_gfx void @test_call_external_void_func_v3i32_imm(i32) #0 { ; GFX9-NEXT: v_mov_b32_e32 v1, 4 ; GFX9-NEXT: v_mov_b32_e32 v2, 5 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[4:5] -; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v3i32@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v3i32@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GFX9-NEXT: v_readlane_b32 s4, v40, 0 -; GFX9-NEXT: v_readlane_b32 s5, v40, 1 +; GFX9-NEXT: s_getpc_b64 s[30:31] +; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v3i32@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v3i32@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 -; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[6:7] +; GFX9-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: s_setpc_b64 s[4:5] +; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: test_call_external_void_func_v3i32_imm: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_or_saveexec_b32 s4, -1 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s4 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 ; GFX10-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-NEXT: v_mov_b32_e32 v0, 3 ; GFX10-NEXT: v_mov_b32_e32 v1, 4 @@ -3199,21 +3199,21 @@ define amdgpu_gfx void @test_call_external_void_func_v3i32_imm(i32) #0 { ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: s_getpc_b64 s[4:5] -; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_v3i32@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s5, s5, external_void_func_v3i32@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GFX10-NEXT: v_readlane_b32 s4, v40, 0 -; GFX10-NEXT: v_readlane_b32 s5, v40, 1 +; GFX10-NEXT: s_getpc_b64 s[30:31] +; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v3i32@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v3i32@rel32@hi+12 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-NEXT: s_or_saveexec_b32 s6, -1 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s6 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: s_setpc_b64 s[4:5] +; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v3i32_imm: ; GFX10-SCRATCH: ; %bb.0: @@ -3235,16 +3235,16 @@ define amdgpu_gfx void @test_call_external_void_func_v3i32_imm(i32) #0 { ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v3i32@rel32@hi+12 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) -; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1] +; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] call amdgpu_gfx void @external_void_func_v3i32(<3 x i32> ) ret void } @@ -3253,9 +3253,9 @@ define amdgpu_gfx void @test_call_external_void_func_v3i32_i32(i32) #0 { ; GFX9-LABEL: test_call_external_void_func_v3i32_i32: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[4:5] +; GFX9-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-NEXT: v_writelane_b32 v40, s33, 2 ; GFX9-NEXT: s_mov_b32 s33, s32 ; GFX9-NEXT: s_addk_i32 s32, 0x400 @@ -3265,28 +3265,28 @@ define amdgpu_gfx void @test_call_external_void_func_v3i32_i32(i32) #0 { ; GFX9-NEXT: v_mov_b32_e32 v2, 5 ; GFX9-NEXT: v_mov_b32_e32 v3, 6 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[4:5] -; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v3i32_i32@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v3i32_i32@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GFX9-NEXT: v_readlane_b32 s4, v40, 0 -; GFX9-NEXT: v_readlane_b32 s5, v40, 1 +; GFX9-NEXT: s_getpc_b64 s[30:31] +; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v3i32_i32@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v3i32_i32@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 -; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[6:7] +; GFX9-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: s_setpc_b64 s[4:5] +; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: test_call_external_void_func_v3i32_i32: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_or_saveexec_b32 s4, -1 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s4 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 ; GFX10-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-NEXT: v_mov_b32_e32 v0, 3 ; GFX10-NEXT: v_mov_b32_e32 v1, 4 @@ -3295,21 +3295,21 @@ define amdgpu_gfx void @test_call_external_void_func_v3i32_i32(i32) #0 { ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: s_getpc_b64 s[4:5] -; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_v3i32_i32@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s5, s5, external_void_func_v3i32_i32@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GFX10-NEXT: v_readlane_b32 s4, v40, 0 -; GFX10-NEXT: v_readlane_b32 s5, v40, 1 +; GFX10-NEXT: s_getpc_b64 s[30:31] +; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v3i32_i32@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v3i32_i32@rel32@hi+12 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-NEXT: s_or_saveexec_b32 s6, -1 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s6 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: s_setpc_b64 s[4:5] +; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v3i32_i32: ; GFX10-SCRATCH: ; %bb.0: @@ -3332,16 +3332,16 @@ define amdgpu_gfx void @test_call_external_void_func_v3i32_i32(i32) #0 { ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v3i32_i32@rel32@hi+12 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) -; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1] +; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] call amdgpu_gfx void @external_void_func_v3i32_i32(<3 x i32> , i32 6) ret void } @@ -3350,57 +3350,57 @@ define amdgpu_gfx void @test_call_external_void_func_v4i32() #0 { ; GFX9-LABEL: test_call_external_void_func_v4i32: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[4:5] +; GFX9-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-NEXT: global_load_dwordx4 v[0:3], v[0:1], off ; GFX9-NEXT: v_writelane_b32 v40, s33, 2 ; GFX9-NEXT: s_mov_b32 s33, s32 ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[4:5] -; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v4i32@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v4i32@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GFX9-NEXT: v_readlane_b32 s4, v40, 0 -; GFX9-NEXT: v_readlane_b32 s5, v40, 1 +; GFX9-NEXT: s_getpc_b64 s[30:31] +; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v4i32@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v4i32@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 -; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[6:7] +; GFX9-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: s_setpc_b64 s[4:5] +; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: test_call_external_void_func_v4i32: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_or_saveexec_b32 s4, -1 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s4 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 ; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off ; GFX10-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: s_getpc_b64 s[4:5] -; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_v4i32@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s5, s5, external_void_func_v4i32@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GFX10-NEXT: v_readlane_b32 s4, v40, 0 -; GFX10-NEXT: v_readlane_b32 s5, v40, 1 +; GFX10-NEXT: s_getpc_b64 s[30:31] +; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v4i32@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v4i32@rel32@hi+12 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-NEXT: s_or_saveexec_b32 s6, -1 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s6 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: s_setpc_b64 s[4:5] +; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v4i32: ; GFX10-SCRATCH: ; %bb.0: @@ -3420,16 +3420,16 @@ define amdgpu_gfx void @test_call_external_void_func_v4i32() #0 { ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) -; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1] +; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] %val = load <4 x i32>, <4 x i32> addrspace(1)* undef call amdgpu_gfx void @external_void_func_v4i32(<4 x i32> %val) ret void @@ -3439,9 +3439,9 @@ define amdgpu_gfx void @test_call_external_void_func_v4i32_imm() #0 { ; GFX9-LABEL: test_call_external_void_func_v4i32_imm: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[4:5] +; GFX9-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-NEXT: v_writelane_b32 v40, s33, 2 ; GFX9-NEXT: s_mov_b32 s33, s32 ; GFX9-NEXT: s_addk_i32 s32, 0x400 @@ -3451,28 +3451,28 @@ define amdgpu_gfx void @test_call_external_void_func_v4i32_imm() #0 { ; GFX9-NEXT: v_mov_b32_e32 v2, 3 ; GFX9-NEXT: v_mov_b32_e32 v3, 4 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[4:5] -; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v4i32@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v4i32@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GFX9-NEXT: v_readlane_b32 s4, v40, 0 -; GFX9-NEXT: v_readlane_b32 s5, v40, 1 +; GFX9-NEXT: s_getpc_b64 s[30:31] +; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v4i32@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v4i32@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 -; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[6:7] +; GFX9-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: s_setpc_b64 s[4:5] +; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: test_call_external_void_func_v4i32_imm: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_or_saveexec_b32 s4, -1 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s4 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 ; GFX10-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-NEXT: v_mov_b32_e32 v0, 1 ; GFX10-NEXT: v_mov_b32_e32 v1, 2 @@ -3481,21 +3481,21 @@ define amdgpu_gfx void @test_call_external_void_func_v4i32_imm() #0 { ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: s_getpc_b64 s[4:5] -; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_v4i32@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s5, s5, external_void_func_v4i32@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GFX10-NEXT: v_readlane_b32 s4, v40, 0 -; GFX10-NEXT: v_readlane_b32 s5, v40, 1 +; GFX10-NEXT: s_getpc_b64 s[30:31] +; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v4i32@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v4i32@rel32@hi+12 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-NEXT: s_or_saveexec_b32 s6, -1 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s6 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: s_setpc_b64 s[4:5] +; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v4i32_imm: ; GFX10-SCRATCH: ; %bb.0: @@ -3518,16 +3518,16 @@ define amdgpu_gfx void @test_call_external_void_func_v4i32_imm() #0 { ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v4i32@rel32@hi+12 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) -; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1] +; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] call amdgpu_gfx void @external_void_func_v4i32(<4 x i32> ) ret void } @@ -3536,9 +3536,9 @@ define amdgpu_gfx void @test_call_external_void_func_v5i32_imm() #0 { ; GFX9-LABEL: test_call_external_void_func_v5i32_imm: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[4:5] +; GFX9-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-NEXT: v_writelane_b32 v40, s33, 2 ; GFX9-NEXT: s_mov_b32 s33, s32 ; GFX9-NEXT: s_addk_i32 s32, 0x400 @@ -3549,28 +3549,28 @@ define amdgpu_gfx void @test_call_external_void_func_v5i32_imm() #0 { ; GFX9-NEXT: v_mov_b32_e32 v3, 4 ; GFX9-NEXT: v_mov_b32_e32 v4, 5 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[4:5] -; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v5i32@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v5i32@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GFX9-NEXT: v_readlane_b32 s4, v40, 0 -; GFX9-NEXT: v_readlane_b32 s5, v40, 1 +; GFX9-NEXT: s_getpc_b64 s[30:31] +; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v5i32@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v5i32@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 -; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[6:7] +; GFX9-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: s_setpc_b64 s[4:5] +; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: test_call_external_void_func_v5i32_imm: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_or_saveexec_b32 s4, -1 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s4 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 ; GFX10-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-NEXT: v_mov_b32_e32 v0, 1 ; GFX10-NEXT: v_mov_b32_e32 v1, 2 @@ -3580,21 +3580,21 @@ define amdgpu_gfx void @test_call_external_void_func_v5i32_imm() #0 { ; GFX10-NEXT: v_mov_b32_e32 v4, 5 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: s_getpc_b64 s[4:5] -; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_v5i32@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s5, s5, external_void_func_v5i32@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GFX10-NEXT: v_readlane_b32 s4, v40, 0 -; GFX10-NEXT: v_readlane_b32 s5, v40, 1 +; GFX10-NEXT: s_getpc_b64 s[30:31] +; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v5i32@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v5i32@rel32@hi+12 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-NEXT: s_or_saveexec_b32 s6, -1 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s6 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: s_setpc_b64 s[4:5] +; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v5i32_imm: ; GFX10-SCRATCH: ; %bb.0: @@ -3618,16 +3618,16 @@ define amdgpu_gfx void @test_call_external_void_func_v5i32_imm() #0 { ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v5i32@rel32@hi+12 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) -; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1] +; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] call amdgpu_gfx void @external_void_func_v5i32(<5 x i32> ) ret void } @@ -3636,66 +3636,67 @@ define amdgpu_gfx void @test_call_external_void_func_v8i32() #0 { ; GFX9-LABEL: test_call_external_void_func_v8i32: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[4:5] -; GFX9-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 -; GFX9-NEXT: v_mov_b32_e32 v8, 0 +; GFX9-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-NEXT: v_writelane_b32 v40, s33, 2 +; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: v_writelane_b32 v40, s31, 1 +; GFX9-NEXT: s_load_dwordx2 s[30:31], s[30:31], 0x0 +; GFX9-NEXT: v_mov_b32_e32 v8, 0 ; GFX9-NEXT: s_mov_b32 s33, s32 ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: global_load_dwordx4 v[0:3], v8, s[4:5] -; GFX9-NEXT: global_load_dwordx4 v[4:7], v8, s[4:5] offset:16 -; GFX9-NEXT: v_writelane_b32 v40, s30, 0 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[4:5] -; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v8i32@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v8i32@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GFX9-NEXT: v_readlane_b32 s4, v40, 0 -; GFX9-NEXT: v_readlane_b32 s5, v40, 1 +; GFX9-NEXT: global_load_dwordx4 v[0:3], v8, s[30:31] +; GFX9-NEXT: global_load_dwordx4 v[4:7], v8, s[30:31] offset:16 +; GFX9-NEXT: s_getpc_b64 s[30:31] +; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v8i32@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v8i32@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 -; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[6:7] +; GFX9-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: s_setpc_b64 s[4:5] +; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: test_call_external_void_func_v8i32: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_or_saveexec_b32 s4, -1 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s4 -; GFX10-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 -; GFX10-NEXT: v_mov_b32_e32 v8, 0 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 ; GFX10-NEXT: v_writelane_b32 v40, s33, 2 +; GFX10-NEXT: v_mov_b32_e32 v8, 0 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-NEXT: s_clause 0x1 -; GFX10-NEXT: global_load_dwordx4 v[0:3], v8, s[4:5] -; GFX10-NEXT: global_load_dwordx4 v[4:7], v8, s[4:5] offset:16 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-NEXT: s_getpc_b64 s[4:5] -; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_v8i32@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s5, s5, external_void_func_v8i32@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GFX10-NEXT: v_readlane_b32 s4, v40, 0 -; GFX10-NEXT: v_readlane_b32 s5, v40, 1 +; GFX10-NEXT: s_load_dwordx2 s[30:31], s[30:31], 0x0 +; GFX10-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-NEXT: s_clause 0x1 +; GFX10-NEXT: global_load_dwordx4 v[0:3], v8, s[30:31] +; GFX10-NEXT: global_load_dwordx4 v[4:7], v8, s[30:31] offset:16 +; GFX10-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-NEXT: s_getpc_b64 s[30:31] +; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v8i32@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v8i32@rel32@hi+12 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-NEXT: s_or_saveexec_b32 s6, -1 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s6 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: s_setpc_b64 s[4:5] +; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v8i32: ; GFX10-SCRATCH: ; %bb.0: @@ -3720,16 +3721,16 @@ define amdgpu_gfx void @test_call_external_void_func_v8i32() #0 { ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v8i32@rel32@hi+12 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) -; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1] +; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] %ptr = load <8 x i32> addrspace(1)*, <8 x i32> addrspace(1)* addrspace(4)* undef %val = load <8 x i32>, <8 x i32> addrspace(1)* %ptr call amdgpu_gfx void @external_void_func_v8i32(<8 x i32> %val) @@ -3740,9 +3741,9 @@ define amdgpu_gfx void @test_call_external_void_func_v8i32_imm() #0 { ; GFX9-LABEL: test_call_external_void_func_v8i32_imm: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[4:5] +; GFX9-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-NEXT: v_writelane_b32 v40, s33, 2 ; GFX9-NEXT: s_mov_b32 s33, s32 ; GFX9-NEXT: s_addk_i32 s32, 0x400 @@ -3756,28 +3757,28 @@ define amdgpu_gfx void @test_call_external_void_func_v8i32_imm() #0 { ; GFX9-NEXT: v_mov_b32_e32 v6, 7 ; GFX9-NEXT: v_mov_b32_e32 v7, 8 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[4:5] -; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v8i32@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v8i32@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GFX9-NEXT: v_readlane_b32 s4, v40, 0 -; GFX9-NEXT: v_readlane_b32 s5, v40, 1 +; GFX9-NEXT: s_getpc_b64 s[30:31] +; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v8i32@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v8i32@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 -; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[6:7] +; GFX9-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: s_setpc_b64 s[4:5] +; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: test_call_external_void_func_v8i32_imm: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_or_saveexec_b32 s4, -1 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s4 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 ; GFX10-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-NEXT: v_mov_b32_e32 v0, 1 ; GFX10-NEXT: v_mov_b32_e32 v1, 2 @@ -3791,20 +3792,20 @@ define amdgpu_gfx void @test_call_external_void_func_v8i32_imm() #0 { ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_getpc_b64 s[4:5] -; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_v8i32@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s5, s5, external_void_func_v8i32@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GFX10-NEXT: v_readlane_b32 s4, v40, 0 -; GFX10-NEXT: v_readlane_b32 s5, v40, 1 +; GFX10-NEXT: s_getpc_b64 s[30:31] +; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v8i32@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v8i32@rel32@hi+12 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-NEXT: s_or_saveexec_b32 s6, -1 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s6 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: s_setpc_b64 s[4:5] +; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v8i32_imm: ; GFX10-SCRATCH: ; %bb.0: @@ -3831,16 +3832,16 @@ define amdgpu_gfx void @test_call_external_void_func_v8i32_imm() #0 { ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v8i32@rel32@lo+4 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v8i32@rel32@hi+12 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) -; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1] +; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] call amdgpu_gfx void @external_void_func_v8i32(<8 x i32> ) ret void } @@ -3849,70 +3850,71 @@ define amdgpu_gfx void @test_call_external_void_func_v16i32() #0 { ; GFX9-LABEL: test_call_external_void_func_v16i32: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[4:5] -; GFX9-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 -; GFX9-NEXT: v_mov_b32_e32 v16, 0 +; GFX9-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-NEXT: v_writelane_b32 v40, s33, 2 +; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: v_writelane_b32 v40, s31, 1 +; GFX9-NEXT: s_load_dwordx2 s[30:31], s[30:31], 0x0 +; GFX9-NEXT: v_mov_b32_e32 v16, 0 ; GFX9-NEXT: s_mov_b32 s33, s32 ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: global_load_dwordx4 v[0:3], v16, s[4:5] -; GFX9-NEXT: global_load_dwordx4 v[4:7], v16, s[4:5] offset:16 -; GFX9-NEXT: global_load_dwordx4 v[8:11], v16, s[4:5] offset:32 -; GFX9-NEXT: global_load_dwordx4 v[12:15], v16, s[4:5] offset:48 -; GFX9-NEXT: v_writelane_b32 v40, s30, 0 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[4:5] -; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v16i32@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v16i32@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GFX9-NEXT: v_readlane_b32 s4, v40, 0 -; GFX9-NEXT: v_readlane_b32 s5, v40, 1 +; GFX9-NEXT: global_load_dwordx4 v[0:3], v16, s[30:31] +; GFX9-NEXT: global_load_dwordx4 v[4:7], v16, s[30:31] offset:16 +; GFX9-NEXT: global_load_dwordx4 v[8:11], v16, s[30:31] offset:32 +; GFX9-NEXT: global_load_dwordx4 v[12:15], v16, s[30:31] offset:48 +; GFX9-NEXT: s_getpc_b64 s[30:31] +; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v16i32@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v16i32@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 -; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[6:7] +; GFX9-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: s_setpc_b64 s[4:5] +; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: test_call_external_void_func_v16i32: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_or_saveexec_b32 s4, -1 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s4 -; GFX10-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 -; GFX10-NEXT: v_mov_b32_e32 v16, 0 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 ; GFX10-NEXT: v_writelane_b32 v40, s33, 2 +; GFX10-NEXT: v_mov_b32_e32 v16, 0 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-NEXT: s_clause 0x3 -; GFX10-NEXT: global_load_dwordx4 v[0:3], v16, s[4:5] -; GFX10-NEXT: global_load_dwordx4 v[4:7], v16, s[4:5] offset:16 -; GFX10-NEXT: global_load_dwordx4 v[8:11], v16, s[4:5] offset:32 -; GFX10-NEXT: global_load_dwordx4 v[12:15], v16, s[4:5] offset:48 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-NEXT: s_getpc_b64 s[4:5] -; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_v16i32@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s5, s5, external_void_func_v16i32@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GFX10-NEXT: v_readlane_b32 s4, v40, 0 -; GFX10-NEXT: v_readlane_b32 s5, v40, 1 +; GFX10-NEXT: s_load_dwordx2 s[30:31], s[30:31], 0x0 +; GFX10-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-NEXT: s_clause 0x3 +; GFX10-NEXT: global_load_dwordx4 v[0:3], v16, s[30:31] +; GFX10-NEXT: global_load_dwordx4 v[4:7], v16, s[30:31] offset:16 +; GFX10-NEXT: global_load_dwordx4 v[8:11], v16, s[30:31] offset:32 +; GFX10-NEXT: global_load_dwordx4 v[12:15], v16, s[30:31] offset:48 +; GFX10-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-NEXT: s_getpc_b64 s[30:31] +; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v16i32@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v16i32@rel32@hi+12 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-NEXT: s_or_saveexec_b32 s6, -1 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s6 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: s_setpc_b64 s[4:5] +; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v16i32: ; GFX10-SCRATCH: ; %bb.0: @@ -3939,16 +3941,16 @@ define amdgpu_gfx void @test_call_external_void_func_v16i32() #0 { ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v16i32@rel32@hi+12 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) -; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1] +; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] %ptr = load <16 x i32> addrspace(1)*, <16 x i32> addrspace(1)* addrspace(4)* undef %val = load <16 x i32>, <16 x i32> addrspace(1)* %ptr call amdgpu_gfx void @external_void_func_v16i32(<16 x i32> %val) @@ -3959,79 +3961,80 @@ define amdgpu_gfx void @test_call_external_void_func_v32i32() #0 { ; GFX9-LABEL: test_call_external_void_func_v32i32: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[4:5] -; GFX9-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 -; GFX9-NEXT: v_mov_b32_e32 v28, 0 +; GFX9-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-NEXT: v_writelane_b32 v40, s33, 2 +; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: v_writelane_b32 v40, s31, 1 +; GFX9-NEXT: s_load_dwordx2 s[30:31], s[30:31], 0x0 +; GFX9-NEXT: v_mov_b32_e32 v28, 0 ; GFX9-NEXT: s_mov_b32 s33, s32 ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: global_load_dwordx4 v[0:3], v28, s[4:5] -; GFX9-NEXT: global_load_dwordx4 v[4:7], v28, s[4:5] offset:16 -; GFX9-NEXT: global_load_dwordx4 v[8:11], v28, s[4:5] offset:32 -; GFX9-NEXT: global_load_dwordx4 v[12:15], v28, s[4:5] offset:48 -; GFX9-NEXT: global_load_dwordx4 v[16:19], v28, s[4:5] offset:64 -; GFX9-NEXT: global_load_dwordx4 v[20:23], v28, s[4:5] offset:80 -; GFX9-NEXT: global_load_dwordx4 v[24:27], v28, s[4:5] offset:96 +; GFX9-NEXT: global_load_dwordx4 v[0:3], v28, s[30:31] +; GFX9-NEXT: global_load_dwordx4 v[4:7], v28, s[30:31] offset:16 +; GFX9-NEXT: global_load_dwordx4 v[8:11], v28, s[30:31] offset:32 +; GFX9-NEXT: global_load_dwordx4 v[12:15], v28, s[30:31] offset:48 +; GFX9-NEXT: global_load_dwordx4 v[16:19], v28, s[30:31] offset:64 +; GFX9-NEXT: global_load_dwordx4 v[20:23], v28, s[30:31] offset:80 +; GFX9-NEXT: global_load_dwordx4 v[24:27], v28, s[30:31] offset:96 ; GFX9-NEXT: s_nop 0 -; GFX9-NEXT: global_load_dwordx4 v[28:31], v28, s[4:5] offset:112 -; GFX9-NEXT: v_writelane_b32 v40, s30, 0 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[4:5] -; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v32i32@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v32i32@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GFX9-NEXT: v_readlane_b32 s4, v40, 0 -; GFX9-NEXT: v_readlane_b32 s5, v40, 1 +; GFX9-NEXT: global_load_dwordx4 v[28:31], v28, s[30:31] offset:112 +; GFX9-NEXT: s_getpc_b64 s[30:31] +; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v32i32@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v32i32@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 -; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[6:7] +; GFX9-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: s_setpc_b64 s[4:5] +; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: test_call_external_void_func_v32i32: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_or_saveexec_b32 s4, -1 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s4 -; GFX10-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 -; GFX10-NEXT: v_mov_b32_e32 v32, 0 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 ; GFX10-NEXT: v_writelane_b32 v40, s33, 2 +; GFX10-NEXT: v_mov_b32_e32 v32, 0 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-NEXT: s_clause 0x7 -; GFX10-NEXT: global_load_dwordx4 v[0:3], v32, s[4:5] -; GFX10-NEXT: global_load_dwordx4 v[4:7], v32, s[4:5] offset:16 -; GFX10-NEXT: global_load_dwordx4 v[8:11], v32, s[4:5] offset:32 -; GFX10-NEXT: global_load_dwordx4 v[12:15], v32, s[4:5] offset:48 -; GFX10-NEXT: global_load_dwordx4 v[16:19], v32, s[4:5] offset:64 -; GFX10-NEXT: global_load_dwordx4 v[20:23], v32, s[4:5] offset:80 -; GFX10-NEXT: global_load_dwordx4 v[24:27], v32, s[4:5] offset:96 -; GFX10-NEXT: global_load_dwordx4 v[28:31], v32, s[4:5] offset:112 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-NEXT: s_getpc_b64 s[4:5] -; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_v32i32@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s5, s5, external_void_func_v32i32@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GFX10-NEXT: v_readlane_b32 s4, v40, 0 -; GFX10-NEXT: v_readlane_b32 s5, v40, 1 +; GFX10-NEXT: s_load_dwordx2 s[30:31], s[30:31], 0x0 +; GFX10-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-NEXT: s_clause 0x7 +; GFX10-NEXT: global_load_dwordx4 v[0:3], v32, s[30:31] +; GFX10-NEXT: global_load_dwordx4 v[4:7], v32, s[30:31] offset:16 +; GFX10-NEXT: global_load_dwordx4 v[8:11], v32, s[30:31] offset:32 +; GFX10-NEXT: global_load_dwordx4 v[12:15], v32, s[30:31] offset:48 +; GFX10-NEXT: global_load_dwordx4 v[16:19], v32, s[30:31] offset:64 +; GFX10-NEXT: global_load_dwordx4 v[20:23], v32, s[30:31] offset:80 +; GFX10-NEXT: global_load_dwordx4 v[24:27], v32, s[30:31] offset:96 +; GFX10-NEXT: global_load_dwordx4 v[28:31], v32, s[30:31] offset:112 +; GFX10-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-NEXT: s_getpc_b64 s[30:31] +; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v32i32@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v32i32@rel32@hi+12 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-NEXT: s_or_saveexec_b32 s6, -1 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s6 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: s_setpc_b64 s[4:5] +; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v32i32: ; GFX10-SCRATCH: ; %bb.0: @@ -4062,16 +4065,16 @@ define amdgpu_gfx void @test_call_external_void_func_v32i32() #0 { ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v32i32@rel32@hi+12 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) -; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1] +; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] %ptr = load <32 x i32> addrspace(1)*, <32 x i32> addrspace(1)* addrspace(4)* undef %val = load <32 x i32>, <32 x i32> addrspace(1)* %ptr call amdgpu_gfx void @external_void_func_v32i32(<32 x i32> %val) @@ -4082,86 +4085,87 @@ define amdgpu_gfx void @test_call_external_void_func_v32i32_i32(i32) #0 { ; GFX9-LABEL: test_call_external_void_func_v32i32_i32: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[4:5] -; GFX9-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 -; GFX9-NEXT: v_mov_b32_e32 v28, 0 +; GFX9-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-NEXT: v_writelane_b32 v40, s33, 2 +; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: v_writelane_b32 v40, s31, 1 +; GFX9-NEXT: s_load_dwordx2 s[30:31], s[30:31], 0x0 +; GFX9-NEXT: v_mov_b32_e32 v28, 0 ; GFX9-NEXT: s_mov_b32 s33, s32 ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: global_load_dwordx4 v[0:3], v28, s[4:5] -; GFX9-NEXT: global_load_dwordx4 v[4:7], v28, s[4:5] offset:16 -; GFX9-NEXT: global_load_dwordx4 v[8:11], v28, s[4:5] offset:32 -; GFX9-NEXT: global_load_dwordx4 v[12:15], v28, s[4:5] offset:48 -; GFX9-NEXT: global_load_dwordx4 v[16:19], v28, s[4:5] offset:64 -; GFX9-NEXT: global_load_dwordx4 v[20:23], v28, s[4:5] offset:80 -; GFX9-NEXT: global_load_dwordx4 v[24:27], v28, s[4:5] offset:96 +; GFX9-NEXT: global_load_dwordx4 v[0:3], v28, s[30:31] +; GFX9-NEXT: global_load_dwordx4 v[4:7], v28, s[30:31] offset:16 +; GFX9-NEXT: global_load_dwordx4 v[8:11], v28, s[30:31] offset:32 +; GFX9-NEXT: global_load_dwordx4 v[12:15], v28, s[30:31] offset:48 +; GFX9-NEXT: global_load_dwordx4 v[16:19], v28, s[30:31] offset:64 +; GFX9-NEXT: global_load_dwordx4 v[20:23], v28, s[30:31] offset:80 +; GFX9-NEXT: global_load_dwordx4 v[24:27], v28, s[30:31] offset:96 ; GFX9-NEXT: s_nop 0 -; GFX9-NEXT: global_load_dwordx4 v[28:31], v28, s[4:5] offset:112 -; GFX9-NEXT: v_writelane_b32 v40, s30, 0 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[4:5] -; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v32i32_i32@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v32i32_i32@rel32@hi+12 +; GFX9-NEXT: global_load_dwordx4 v[28:31], v28, s[30:31] offset:112 +; GFX9-NEXT: s_getpc_b64 s[30:31] +; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v32i32_i32@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v32i32_i32@rel32@hi+12 ; GFX9-NEXT: s_waitcnt vmcnt(7) ; GFX9-NEXT: global_load_dword v32, v[0:1], off ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_store_dword v32, off, s[0:3], s32 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GFX9-NEXT: v_readlane_b32 s4, v40, 0 -; GFX9-NEXT: v_readlane_b32 s5, v40, 1 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 -; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[6:7] +; GFX9-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: s_setpc_b64 s[4:5] +; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: test_call_external_void_func_v32i32_i32: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_or_saveexec_b32 s4, -1 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s4 -; GFX10-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 -; GFX10-NEXT: v_mov_b32_e32 v32, 0 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 ; GFX10-NEXT: v_writelane_b32 v40, s33, 2 +; GFX10-NEXT: v_mov_b32_e32 v32, 0 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-NEXT: v_writelane_b32 v40, s31, 1 +; GFX10-NEXT: s_load_dwordx2 s[30:31], s[30:31], 0x0 ; GFX10-NEXT: global_load_dword v33, v[0:1], off ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: s_clause 0x7 -; GFX10-NEXT: global_load_dwordx4 v[0:3], v32, s[4:5] -; GFX10-NEXT: global_load_dwordx4 v[4:7], v32, s[4:5] offset:16 -; GFX10-NEXT: global_load_dwordx4 v[8:11], v32, s[4:5] offset:32 -; GFX10-NEXT: global_load_dwordx4 v[12:15], v32, s[4:5] offset:48 -; GFX10-NEXT: global_load_dwordx4 v[16:19], v32, s[4:5] offset:64 -; GFX10-NEXT: global_load_dwordx4 v[20:23], v32, s[4:5] offset:80 -; GFX10-NEXT: global_load_dwordx4 v[24:27], v32, s[4:5] offset:96 -; GFX10-NEXT: global_load_dwordx4 v[28:31], v32, s[4:5] offset:112 -; GFX10-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-NEXT: s_getpc_b64 s[4:5] -; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_v32i32_i32@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s5, s5, external_void_func_v32i32_i32@rel32@hi+12 -; GFX10-NEXT: v_writelane_b32 v40, s31, 1 +; GFX10-NEXT: global_load_dwordx4 v[0:3], v32, s[30:31] +; GFX10-NEXT: global_load_dwordx4 v[4:7], v32, s[30:31] offset:16 +; GFX10-NEXT: global_load_dwordx4 v[8:11], v32, s[30:31] offset:32 +; GFX10-NEXT: global_load_dwordx4 v[12:15], v32, s[30:31] offset:48 +; GFX10-NEXT: global_load_dwordx4 v[16:19], v32, s[30:31] offset:64 +; GFX10-NEXT: global_load_dwordx4 v[20:23], v32, s[30:31] offset:80 +; GFX10-NEXT: global_load_dwordx4 v[24:27], v32, s[30:31] offset:96 +; GFX10-NEXT: global_load_dwordx4 v[28:31], v32, s[30:31] offset:112 +; GFX10-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-NEXT: s_getpc_b64 s[30:31] +; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v32i32_i32@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v32i32_i32@rel32@hi+12 ; GFX10-NEXT: s_waitcnt vmcnt(8) ; GFX10-NEXT: buffer_store_dword v33, off, s[0:3], s32 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GFX10-NEXT: v_readlane_b32 s4, v40, 0 -; GFX10-NEXT: v_readlane_b32 s5, v40, 1 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-NEXT: s_or_saveexec_b32 s6, -1 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s6 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: s_setpc_b64 s[4:5] +; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v32i32_i32: ; GFX10-SCRATCH: ; %bb.0: @@ -4195,16 +4199,16 @@ define amdgpu_gfx void @test_call_external_void_func_v32i32_i32(i32) #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(8) ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v33, s32 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) -; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1] +; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] %ptr0 = load <32 x i32> addrspace(1)*, <32 x i32> addrspace(1)* addrspace(4)* undef %val0 = load <32 x i32>, <32 x i32> addrspace(1)* %ptr0 %val1 = load i32, i32 addrspace(1)* undef @@ -4216,9 +4220,9 @@ define amdgpu_gfx void @test_call_external_i32_func_i32_imm(i32 addrspace(1)* %o ; GFX9-LABEL: test_call_external_i32_func_i32_imm: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[4:5] +; GFX9-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-NEXT: v_writelane_b32 v40, s33, 2 ; GFX9-NEXT: s_mov_b32 s33, s32 ; GFX9-NEXT: s_addk_i32 s32, 0x400 @@ -4229,32 +4233,32 @@ define amdgpu_gfx void @test_call_external_i32_func_i32_imm(i32 addrspace(1)* %o ; GFX9-NEXT: v_mov_b32_e32 v0, 42 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: v_mov_b32_e32 v42, v1 -; GFX9-NEXT: s_getpc_b64 s[4:5] -; GFX9-NEXT: s_add_u32 s4, s4, external_i32_func_i32@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s5, s5, external_i32_func_i32@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] +; GFX9-NEXT: s_getpc_b64 s[30:31] +; GFX9-NEXT: s_add_u32 s30, s30, external_i32_func_i32@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s31, s31, external_i32_func_i32@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] ; GFX9-NEXT: global_store_dword v[41:42], v0, off ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_load_dword v42, off, s[0:3], s33 ; 4-byte Folded Reload ; GFX9-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload -; GFX9-NEXT: v_readlane_b32 s4, v40, 0 -; GFX9-NEXT: v_readlane_b32 s5, v40, 1 +; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 -; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[6:7] +; GFX9-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: s_setpc_b64 s[4:5] +; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: test_call_external_i32_func_i32_imm: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_or_saveexec_b32 s4, -1 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s4 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 ; GFX10-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill @@ -4264,26 +4268,26 @@ define amdgpu_gfx void @test_call_external_i32_func_i32_imm(i32 addrspace(1)* %o ; GFX10-NEXT: v_mov_b32_e32 v0, 42 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_mov_b32_e32 v42, v1 -; GFX10-NEXT: s_getpc_b64 s[4:5] -; GFX10-NEXT: s_add_u32 s4, s4, external_i32_func_i32@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s5, s5, external_i32_func_i32@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5] +; GFX10-NEXT: s_getpc_b64 s[30:31] +; GFX10-NEXT: s_add_u32 s30, s30, external_i32_func_i32@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s31, s31, external_i32_func_i32@rel32@hi+12 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] ; GFX10-NEXT: global_store_dword v[41:42], v0, off ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_clause 0x1 ; GFX10-NEXT: buffer_load_dword v42, off, s[0:3], s33 ; GFX10-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:4 -; GFX10-NEXT: v_readlane_b32 s4, v40, 0 -; GFX10-NEXT: v_readlane_b32 s5, v40, 1 +; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-NEXT: s_or_saveexec_b32 s6, -1 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s6 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: s_setpc_b64 s[4:5] +; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-SCRATCH-LABEL: test_call_external_i32_func_i32_imm: ; GFX10-SCRATCH: ; %bb.0: @@ -4312,16 +4316,16 @@ define amdgpu_gfx void @test_call_external_i32_func_i32_imm(i32 addrspace(1)* %o ; GFX10-SCRATCH-NEXT: s_clause 0x1 ; GFX10-SCRATCH-NEXT: scratch_load_dword v42, off, s33 ; GFX10-SCRATCH-NEXT: scratch_load_dword v41, off, s33 offset:4 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 offset:8 ; 4-byte Folded Reload ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) -; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1] +; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] %val = call amdgpu_gfx i32 @external_i32_func_i32(i32 42) store volatile i32 %val, i32 addrspace(1)* %out ret void @@ -4331,66 +4335,67 @@ define amdgpu_gfx void @test_call_external_void_func_struct_i8_i32() #0 { ; GFX9-LABEL: test_call_external_void_func_struct_i8_i32: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[4:5] -; GFX9-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 -; GFX9-NEXT: v_mov_b32_e32 v2, 0 +; GFX9-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-NEXT: v_writelane_b32 v40, s33, 2 +; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: v_writelane_b32 v40, s31, 1 +; GFX9-NEXT: s_load_dwordx2 s[30:31], s[30:31], 0x0 +; GFX9-NEXT: v_mov_b32_e32 v2, 0 ; GFX9-NEXT: s_mov_b32 s33, s32 ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: global_load_dword v1, v2, s[4:5] offset:4 -; GFX9-NEXT: global_load_ubyte v0, v2, s[4:5] -; GFX9-NEXT: v_writelane_b32 v40, s30, 0 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[4:5] -; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_struct_i8_i32@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_struct_i8_i32@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GFX9-NEXT: v_readlane_b32 s4, v40, 0 -; GFX9-NEXT: v_readlane_b32 s5, v40, 1 +; GFX9-NEXT: global_load_dword v1, v2, s[30:31] offset:4 +; GFX9-NEXT: global_load_ubyte v0, v2, s[30:31] +; GFX9-NEXT: s_getpc_b64 s[30:31] +; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_struct_i8_i32@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_struct_i8_i32@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 -; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[6:7] +; GFX9-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: s_setpc_b64 s[4:5] +; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: test_call_external_void_func_struct_i8_i32: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_or_saveexec_b32 s4, -1 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s4 -; GFX10-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 -; GFX10-NEXT: v_mov_b32_e32 v2, 0 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 ; GFX10-NEXT: v_writelane_b32 v40, s33, 2 +; GFX10-NEXT: v_mov_b32_e32 v2, 0 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-NEXT: s_clause 0x1 -; GFX10-NEXT: global_load_ubyte v0, v2, s[4:5] -; GFX10-NEXT: global_load_dword v1, v2, s[4:5] offset:4 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-NEXT: s_getpc_b64 s[4:5] -; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_struct_i8_i32@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s5, s5, external_void_func_struct_i8_i32@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GFX10-NEXT: v_readlane_b32 s4, v40, 0 -; GFX10-NEXT: v_readlane_b32 s5, v40, 1 +; GFX10-NEXT: s_load_dwordx2 s[30:31], s[30:31], 0x0 +; GFX10-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-NEXT: s_clause 0x1 +; GFX10-NEXT: global_load_ubyte v0, v2, s[30:31] +; GFX10-NEXT: global_load_dword v1, v2, s[30:31] offset:4 +; GFX10-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-NEXT: s_getpc_b64 s[30:31] +; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_struct_i8_i32@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_struct_i8_i32@rel32@hi+12 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-NEXT: s_or_saveexec_b32 s6, -1 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s6 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: s_setpc_b64 s[4:5] +; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-SCRATCH-LABEL: test_call_external_void_func_struct_i8_i32: ; GFX10-SCRATCH: ; %bb.0: @@ -4415,16 +4420,16 @@ define amdgpu_gfx void @test_call_external_void_func_struct_i8_i32() #0 { ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_struct_i8_i32@rel32@hi+12 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) -; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1] +; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] %ptr0 = load { i8, i32 } addrspace(1)*, { i8, i32 } addrspace(1)* addrspace(4)* undef %val = load { i8, i32 }, { i8, i32 } addrspace(1)* %ptr0 call amdgpu_gfx void @external_void_func_struct_i8_i32({ i8, i32 } %val) @@ -4435,9 +4440,9 @@ define amdgpu_gfx void @test_call_external_void_func_byval_struct_i8_i32() #0 { ; GFX9-LABEL: test_call_external_void_func_byval_struct_i8_i32: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[4:5] +; GFX9-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-NEXT: v_writelane_b32 v40, s33, 2 ; GFX9-NEXT: s_mov_b32 s33, s32 ; GFX9-NEXT: v_mov_b32_e32 v0, 3 @@ -4448,28 +4453,28 @@ define amdgpu_gfx void @test_call_external_void_func_byval_struct_i8_i32() #0 { ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:4 ; GFX9-NEXT: v_lshrrev_b32_e64 v0, 6, s33 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[4:5] -; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_byval_struct_i8_i32@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_byval_struct_i8_i32@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GFX9-NEXT: v_readlane_b32 s4, v40, 0 -; GFX9-NEXT: v_readlane_b32 s5, v40, 1 +; GFX9-NEXT: s_getpc_b64 s[30:31] +; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_byval_struct_i8_i32@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_byval_struct_i8_i32@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 -; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[6:7] +; GFX9-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: s_setpc_b64 s[4:5] +; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: test_call_external_void_func_byval_struct_i8_i32: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_or_saveexec_b32 s4, -1 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s4 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 ; GFX10-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-NEXT: v_mov_b32_e32 v0, 3 ; GFX10-NEXT: v_mov_b32_e32 v1, 8 @@ -4479,21 +4484,21 @@ define amdgpu_gfx void @test_call_external_void_func_byval_struct_i8_i32() #0 { ; GFX10-NEXT: buffer_store_byte v0, off, s[0:3], s33 ; GFX10-NEXT: buffer_store_dword v1, off, s[0:3], s33 offset:4 ; GFX10-NEXT: v_lshrrev_b32_e64 v0, 5, s33 -; GFX10-NEXT: s_getpc_b64 s[4:5] -; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_byval_struct_i8_i32@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s5, s5, external_void_func_byval_struct_i8_i32@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GFX10-NEXT: v_readlane_b32 s4, v40, 0 -; GFX10-NEXT: v_readlane_b32 s5, v40, 1 +; GFX10-NEXT: s_getpc_b64 s[30:31] +; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_byval_struct_i8_i32@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_byval_struct_i8_i32@rel32@hi+12 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-NEXT: s_or_saveexec_b32 s6, -1 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s6 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: s_setpc_b64 s[4:5] +; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-SCRATCH-LABEL: test_call_external_void_func_byval_struct_i8_i32: ; GFX10-SCRATCH: ; %bb.0: @@ -4517,16 +4522,16 @@ define amdgpu_gfx void @test_call_external_void_func_byval_struct_i8_i32() #0 { ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_byval_struct_i8_i32@rel32@hi+12 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 offset:8 ; 4-byte Folded Reload ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) -; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1] +; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] %val = alloca { i8, i32 }, align 4, addrspace(5) %gep0 = getelementptr inbounds { i8, i32 }, { i8, i32 } addrspace(5)* %val, i32 0, i32 0 %gep1 = getelementptr inbounds { i8, i32 }, { i8, i32 } addrspace(5)* %val, i32 0, i32 1 @@ -4540,9 +4545,9 @@ define amdgpu_gfx void @test_call_external_void_func_sret_struct_i8_i32_byval_st ; GFX9-LABEL: test_call_external_void_func_sret_struct_i8_i32_byval_struct_i8_i32: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[4:5] +; GFX9-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-NEXT: v_writelane_b32 v40, s33, 2 ; GFX9-NEXT: s_mov_b32 s33, s32 ; GFX9-NEXT: v_mov_b32_e32 v0, 3 @@ -4555,14 +4560,14 @@ define amdgpu_gfx void @test_call_external_void_func_sret_struct_i8_i32_byval_st ; GFX9-NEXT: v_add_u32_e32 v0, 8, v0 ; GFX9-NEXT: v_lshrrev_b32_e64 v1, 6, s33 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[4:5] -; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] +; GFX9-NEXT: s_getpc_b64 s[30:31] +; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] ; GFX9-NEXT: buffer_load_ubyte v0, off, s[0:3], s33 offset:8 ; GFX9-NEXT: buffer_load_dword v1, off, s[0:3], s33 offset:12 -; GFX9-NEXT: v_readlane_b32 s4, v40, 0 -; GFX9-NEXT: v_readlane_b32 s5, v40, 1 +; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_addk_i32 s32, 0xf800 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_waitcnt vmcnt(0) @@ -4570,20 +4575,20 @@ define amdgpu_gfx void @test_call_external_void_func_sret_struct_i8_i32_byval_st ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: global_store_dword v[0:1], v1, off ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[6:7] +; GFX9-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: s_setpc_b64 s[4:5] +; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: test_call_external_void_func_sret_struct_i8_i32_byval_struct_i8_i32: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_or_saveexec_b32 s4, -1 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s4 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 ; GFX10-NEXT: v_mov_b32_e32 v0, 3 ; GFX10-NEXT: v_mov_b32_e32 v1, 8 ; GFX10-NEXT: v_writelane_b32 v40, s33, 2 @@ -4594,17 +4599,17 @@ define amdgpu_gfx void @test_call_external_void_func_sret_struct_i8_i32_byval_st ; GFX10-NEXT: v_lshrrev_b32_e64 v0, 5, s33 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_lshrrev_b32_e64 v1, 5, s33 -; GFX10-NEXT: s_getpc_b64 s[4:5] -; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s5, s5, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@rel32@hi+12 ; GFX10-NEXT: v_add_nc_u32_e32 v0, 8, v0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5] +; GFX10-NEXT: s_getpc_b64 s[30:31] +; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@rel32@hi+12 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] ; GFX10-NEXT: s_clause 0x1 ; GFX10-NEXT: buffer_load_ubyte v0, off, s[0:3], s33 offset:8 ; GFX10-NEXT: buffer_load_dword v1, off, s[0:3], s33 offset:12 -; GFX10-NEXT: v_readlane_b32 s4, v40, 0 -; GFX10-NEXT: v_readlane_b32 s5, v40, 1 +; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_addk_i32 s32, 0xfc00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_waitcnt vmcnt(0) @@ -4612,12 +4617,12 @@ define amdgpu_gfx void @test_call_external_void_func_sret_struct_i8_i32_byval_st ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: global_store_dword v[0:1], v1, off ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_or_saveexec_b32 s6, -1 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s6 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: s_setpc_b64 s[4:5] +; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-SCRATCH-LABEL: test_call_external_void_func_sret_struct_i8_i32_byval_struct_i8_i32: ; GFX10-SCRATCH: ; %bb.0: @@ -4646,8 +4651,8 @@ define amdgpu_gfx void @test_call_external_void_func_sret_struct_i8_i32_byval_st ; GFX10-SCRATCH-NEXT: s_clause 0x1 ; GFX10-SCRATCH-NEXT: scratch_load_ubyte v0, off, s33 offset:8 ; GFX10-SCRATCH-NEXT: scratch_load_dword v1, off, s33 offset:12 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_addk_i32 s32, 0xffe0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) @@ -4655,12 +4660,12 @@ define amdgpu_gfx void @test_call_external_void_func_sret_struct_i8_i32_byval_st ; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-SCRATCH-NEXT: global_store_dword v[0:1], v1, off ; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 offset:16 ; 4-byte Folded Reload ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) -; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1] +; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] %in.val = alloca { i8, i32 }, align 4, addrspace(5) %out.val = alloca { i8, i32 }, align 4, addrspace(5) %in.gep0 = getelementptr inbounds { i8, i32 }, { i8, i32 } addrspace(5)* %in.val, i32 0, i32 0 @@ -4682,21 +4687,21 @@ define amdgpu_gfx void @test_call_external_void_func_v16i8() #0 { ; GFX9-LABEL: test_call_external_void_func_v16i8: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[4:5] -; GFX9-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 -; GFX9-NEXT: v_mov_b32_e32 v0, 0 +; GFX9-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-NEXT: v_writelane_b32 v40, s33, 2 +; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: v_writelane_b32 v40, s31, 1 +; GFX9-NEXT: s_load_dwordx2 s[30:31], s[30:31], 0x0 +; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: s_mov_b32 s33, s32 ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: global_load_dwordx4 v[0:3], v0, s[4:5] -; GFX9-NEXT: v_writelane_b32 v40, s30, 0 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[4:5] -; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v16i8@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v16i8@rel32@hi+12 +; GFX9-NEXT: global_load_dwordx4 v[0:3], v0, s[30:31] +; GFX9-NEXT: s_getpc_b64 s[30:31] +; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v16i8@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v16i8@rel32@hi+12 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: v_lshrrev_b32_e32 v16, 8, v0 ; GFX9-NEXT: v_lshrrev_b32_e32 v17, 16, v0 @@ -4716,38 +4721,38 @@ define amdgpu_gfx void @test_call_external_void_func_v16i8() #0 { ; GFX9-NEXT: v_mov_b32_e32 v1, v16 ; GFX9-NEXT: v_mov_b32_e32 v2, v17 ; GFX9-NEXT: v_mov_b32_e32 v3, v18 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GFX9-NEXT: v_readlane_b32 s4, v40, 0 -; GFX9-NEXT: v_readlane_b32 s5, v40, 1 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 -; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[6:7] +; GFX9-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: s_setpc_b64 s[4:5] +; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: test_call_external_void_func_v16i8: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_or_saveexec_b32 s4, -1 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s4 -; GFX10-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 -; GFX10-NEXT: v_mov_b32_e32 v0, 0 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 ; GFX10-NEXT: v_writelane_b32 v40, s33, 2 +; GFX10-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 +; GFX10-NEXT: s_load_dwordx2 s[30:31], s[30:31], 0x0 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-NEXT: global_load_dwordx4 v[0:3], v0, s[4:5] +; GFX10-NEXT: global_load_dwordx4 v[0:3], v0, s[30:31] ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_getpc_b64 s[4:5] -; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_v16i8@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s5, s5, external_void_func_v16i8@rel32@hi+12 +; GFX10-NEXT: s_getpc_b64 s[30:31] +; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v16i8@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v16i8@rel32@hi+12 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_lshrrev_b32_e32 v16, 8, v0 ; GFX10-NEXT: v_lshrrev_b32_e32 v17, 16, v0 @@ -4767,17 +4772,17 @@ define amdgpu_gfx void @test_call_external_void_func_v16i8() #0 { ; GFX10-NEXT: v_mov_b32_e32 v1, v16 ; GFX10-NEXT: v_mov_b32_e32 v2, v17 ; GFX10-NEXT: v_mov_b32_e32 v3, v18 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GFX10-NEXT: v_readlane_b32 s4, v40, 0 -; GFX10-NEXT: v_readlane_b32 s5, v40, 1 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-NEXT: s_or_saveexec_b32 s6, -1 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s6 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: s_setpc_b64 s[4:5] +; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v16i8: ; GFX10-SCRATCH: ; %bb.0: @@ -4820,16 +4825,16 @@ define amdgpu_gfx void @test_call_external_void_func_v16i8() #0 { ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v2, v17 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v3, v18 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) -; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1] +; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] %ptr = load <16 x i8> addrspace(1)*, <16 x i8> addrspace(1)* addrspace(4)* undef %val = load <16 x i8>, <16 x i8> addrspace(1)* %ptr call amdgpu_gfx void @external_void_func_v16i8(<16 x i8> %val) @@ -4840,44 +4845,269 @@ define void @tail_call_byval_align16(<32 x i32> %val, double %tmp) #0 { ; GFX9-LABEL: tail_call_byval_align16: ; GFX9: ; %bb.0: ; %entry ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:12 -; GFX9-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:8 -; GFX9-NEXT: s_getpc_b64 s[4:5] -; GFX9-NEXT: s_add_u32 s4, s4, byval_align16_f64_arg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s5, s5, byval_align16_f64_arg@rel32@hi+12 -; GFX9-NEXT: s_waitcnt vmcnt(1) -; GFX9-NEXT: buffer_store_dword v32, off, s[0:3], s32 offset:4 -; GFX9-NEXT: s_waitcnt vmcnt(1) -; GFX9-NEXT: buffer_store_dword v33, off, s[0:3], s32 +; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1 +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; GFX9-NEXT: s_mov_b64 exec, s[4:5] +; GFX9-NEXT: v_writelane_b32 v40, s33, 30 +; GFX9-NEXT: s_mov_b32 s33, s32 +; GFX9-NEXT: buffer_load_dword v32, off, s[0:3], s33 offset:8 +; GFX9-NEXT: buffer_load_dword v33, off, s[0:3], s33 offset:12 +; GFX9-NEXT: v_writelane_b32 v40, s34, 0 +; GFX9-NEXT: v_writelane_b32 v40, s35, 1 +; GFX9-NEXT: v_writelane_b32 v40, s36, 2 +; GFX9-NEXT: v_writelane_b32 v40, s37, 3 +; GFX9-NEXT: v_writelane_b32 v40, s38, 4 +; GFX9-NEXT: v_writelane_b32 v40, s39, 5 +; GFX9-NEXT: v_writelane_b32 v40, s40, 6 +; GFX9-NEXT: v_writelane_b32 v40, s41, 7 +; GFX9-NEXT: v_writelane_b32 v40, s42, 8 +; GFX9-NEXT: v_writelane_b32 v40, s43, 9 +; GFX9-NEXT: v_writelane_b32 v40, s44, 10 +; GFX9-NEXT: v_writelane_b32 v40, s45, 11 +; GFX9-NEXT: v_writelane_b32 v40, s46, 12 +; GFX9-NEXT: v_writelane_b32 v40, s47, 13 +; GFX9-NEXT: v_writelane_b32 v40, s48, 14 +; GFX9-NEXT: v_writelane_b32 v40, s49, 15 +; GFX9-NEXT: v_writelane_b32 v40, s50, 16 +; GFX9-NEXT: v_writelane_b32 v40, s51, 17 +; GFX9-NEXT: v_writelane_b32 v40, s52, 18 +; GFX9-NEXT: v_writelane_b32 v40, s53, 19 +; GFX9-NEXT: v_writelane_b32 v40, s54, 20 +; GFX9-NEXT: v_writelane_b32 v40, s55, 21 +; GFX9-NEXT: v_writelane_b32 v40, s56, 22 +; GFX9-NEXT: v_writelane_b32 v40, s57, 23 +; GFX9-NEXT: v_writelane_b32 v40, s58, 24 +; GFX9-NEXT: v_writelane_b32 v40, s59, 25 +; GFX9-NEXT: v_writelane_b32 v40, s60, 26 +; GFX9-NEXT: v_writelane_b32 v40, s61, 27 +; GFX9-NEXT: s_addk_i32 s32, 0x800 +; GFX9-NEXT: v_writelane_b32 v40, s62, 28 +; GFX9-NEXT: v_writelane_b32 v40, s63, 29 +; GFX9-NEXT: s_mov_b64 s[4:5], s[30:31] +; GFX9-NEXT: s_getpc_b64 s[6:7] +; GFX9-NEXT: s_add_u32 s6, s6, byval_align16_f64_arg@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s7, s7, byval_align16_f64_arg@rel32@hi+12 +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:4 +; GFX9-NEXT: buffer_store_dword v32, off, s[0:3], s32 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[6:7] +; GFX9-NEXT: v_readlane_b32 s63, v40, 29 +; GFX9-NEXT: v_readlane_b32 s62, v40, 28 +; GFX9-NEXT: v_readlane_b32 s61, v40, 27 +; GFX9-NEXT: v_readlane_b32 s60, v40, 26 +; GFX9-NEXT: v_readlane_b32 s59, v40, 25 +; GFX9-NEXT: v_readlane_b32 s58, v40, 24 +; GFX9-NEXT: v_readlane_b32 s57, v40, 23 +; GFX9-NEXT: v_readlane_b32 s56, v40, 22 +; GFX9-NEXT: v_readlane_b32 s55, v40, 21 +; GFX9-NEXT: v_readlane_b32 s54, v40, 20 +; GFX9-NEXT: v_readlane_b32 s53, v40, 19 +; GFX9-NEXT: v_readlane_b32 s52, v40, 18 +; GFX9-NEXT: v_readlane_b32 s51, v40, 17 +; GFX9-NEXT: v_readlane_b32 s50, v40, 16 +; GFX9-NEXT: v_readlane_b32 s49, v40, 15 +; GFX9-NEXT: v_readlane_b32 s48, v40, 14 +; GFX9-NEXT: v_readlane_b32 s47, v40, 13 +; GFX9-NEXT: v_readlane_b32 s46, v40, 12 +; GFX9-NEXT: v_readlane_b32 s45, v40, 11 +; GFX9-NEXT: v_readlane_b32 s44, v40, 10 +; GFX9-NEXT: v_readlane_b32 s43, v40, 9 +; GFX9-NEXT: v_readlane_b32 s42, v40, 8 +; GFX9-NEXT: v_readlane_b32 s41, v40, 7 +; GFX9-NEXT: v_readlane_b32 s40, v40, 6 +; GFX9-NEXT: v_readlane_b32 s39, v40, 5 +; GFX9-NEXT: v_readlane_b32 s38, v40, 4 +; GFX9-NEXT: v_readlane_b32 s37, v40, 3 +; GFX9-NEXT: v_readlane_b32 s36, v40, 2 +; GFX9-NEXT: v_readlane_b32 s35, v40, 1 +; GFX9-NEXT: v_readlane_b32 s34, v40, 0 +; GFX9-NEXT: s_addk_i32 s32, 0xf800 +; GFX9-NEXT: v_readlane_b32 s33, v40, 30 +; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload +; GFX9-NEXT: s_mov_b64 exec, s[6:7] +; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[4:5] ; ; GFX10-LABEL: tail_call_byval_align16: ; GFX10: ; %bb.0: ; %entry ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: s_or_saveexec_b32 s4, -1 +; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; GFX10-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-NEXT: s_mov_b32 exec_lo, s4 +; GFX10-NEXT: v_writelane_b32 v40, s33, 30 +; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_clause 0x1 -; GFX10-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:12 -; GFX10-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:8 -; GFX10-NEXT: s_getpc_b64 s[4:5] -; GFX10-NEXT: s_add_u32 s4, s4, byval_align16_f64_arg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s5, s5, byval_align16_f64_arg@rel32@hi+12 +; GFX10-NEXT: buffer_load_dword v32, off, s[0:3], s33 offset:12 +; GFX10-NEXT: buffer_load_dword v33, off, s[0:3], s33 offset:8 +; GFX10-NEXT: s_addk_i32 s32, 0x400 +; GFX10-NEXT: v_writelane_b32 v40, s34, 0 +; GFX10-NEXT: s_mov_b64 s[4:5], s[30:31] +; GFX10-NEXT: s_getpc_b64 s[6:7] +; GFX10-NEXT: s_add_u32 s6, s6, byval_align16_f64_arg@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s7, s7, byval_align16_f64_arg@rel32@hi+12 ; GFX10-NEXT: s_waitcnt vmcnt(1) ; GFX10-NEXT: buffer_store_dword v32, off, s[0:3], s32 offset:4 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: buffer_store_dword v33, off, s[0:3], s32 +; GFX10-NEXT: v_writelane_b32 v40, s35, 1 +; GFX10-NEXT: v_writelane_b32 v40, s36, 2 +; GFX10-NEXT: v_writelane_b32 v40, s37, 3 +; GFX10-NEXT: v_writelane_b32 v40, s38, 4 +; GFX10-NEXT: v_writelane_b32 v40, s39, 5 +; GFX10-NEXT: v_writelane_b32 v40, s40, 6 +; GFX10-NEXT: v_writelane_b32 v40, s41, 7 +; GFX10-NEXT: v_writelane_b32 v40, s42, 8 +; GFX10-NEXT: v_writelane_b32 v40, s43, 9 +; GFX10-NEXT: v_writelane_b32 v40, s44, 10 +; GFX10-NEXT: v_writelane_b32 v40, s45, 11 +; GFX10-NEXT: v_writelane_b32 v40, s46, 12 +; GFX10-NEXT: v_writelane_b32 v40, s47, 13 +; GFX10-NEXT: v_writelane_b32 v40, s48, 14 +; GFX10-NEXT: v_writelane_b32 v40, s49, 15 +; GFX10-NEXT: v_writelane_b32 v40, s50, 16 +; GFX10-NEXT: v_writelane_b32 v40, s51, 17 +; GFX10-NEXT: v_writelane_b32 v40, s52, 18 +; GFX10-NEXT: v_writelane_b32 v40, s53, 19 +; GFX10-NEXT: v_writelane_b32 v40, s54, 20 +; GFX10-NEXT: v_writelane_b32 v40, s55, 21 +; GFX10-NEXT: v_writelane_b32 v40, s56, 22 +; GFX10-NEXT: v_writelane_b32 v40, s57, 23 +; GFX10-NEXT: v_writelane_b32 v40, s58, 24 +; GFX10-NEXT: v_writelane_b32 v40, s59, 25 +; GFX10-NEXT: v_writelane_b32 v40, s60, 26 +; GFX10-NEXT: v_writelane_b32 v40, s61, 27 +; GFX10-NEXT: v_writelane_b32 v40, s62, 28 +; GFX10-NEXT: v_writelane_b32 v40, s63, 29 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[6:7] +; GFX10-NEXT: v_readlane_b32 s63, v40, 29 +; GFX10-NEXT: v_readlane_b32 s62, v40, 28 +; GFX10-NEXT: v_readlane_b32 s61, v40, 27 +; GFX10-NEXT: v_readlane_b32 s60, v40, 26 +; GFX10-NEXT: v_readlane_b32 s59, v40, 25 +; GFX10-NEXT: v_readlane_b32 s58, v40, 24 +; GFX10-NEXT: v_readlane_b32 s57, v40, 23 +; GFX10-NEXT: v_readlane_b32 s56, v40, 22 +; GFX10-NEXT: v_readlane_b32 s55, v40, 21 +; GFX10-NEXT: v_readlane_b32 s54, v40, 20 +; GFX10-NEXT: v_readlane_b32 s53, v40, 19 +; GFX10-NEXT: v_readlane_b32 s52, v40, 18 +; GFX10-NEXT: v_readlane_b32 s51, v40, 17 +; GFX10-NEXT: v_readlane_b32 s50, v40, 16 +; GFX10-NEXT: v_readlane_b32 s49, v40, 15 +; GFX10-NEXT: v_readlane_b32 s48, v40, 14 +; GFX10-NEXT: v_readlane_b32 s47, v40, 13 +; GFX10-NEXT: v_readlane_b32 s46, v40, 12 +; GFX10-NEXT: v_readlane_b32 s45, v40, 11 +; GFX10-NEXT: v_readlane_b32 s44, v40, 10 +; GFX10-NEXT: v_readlane_b32 s43, v40, 9 +; GFX10-NEXT: v_readlane_b32 s42, v40, 8 +; GFX10-NEXT: v_readlane_b32 s41, v40, 7 +; GFX10-NEXT: v_readlane_b32 s40, v40, 6 +; GFX10-NEXT: v_readlane_b32 s39, v40, 5 +; GFX10-NEXT: v_readlane_b32 s38, v40, 4 +; GFX10-NEXT: v_readlane_b32 s37, v40, 3 +; GFX10-NEXT: v_readlane_b32 s36, v40, 2 +; GFX10-NEXT: v_readlane_b32 s35, v40, 1 +; GFX10-NEXT: v_readlane_b32 s34, v40, 0 +; GFX10-NEXT: s_addk_i32 s32, 0xfc00 +; GFX10-NEXT: v_readlane_b32 s33, v40, 30 +; GFX10-NEXT: s_or_saveexec_b32 s6, -1 +; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload +; GFX10-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-NEXT: s_mov_b32 exec_lo, s6 +; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[4:5] ; ; GFX10-SCRATCH-LABEL: tail_call_byval_align16: ; GFX10-SCRATCH: ; %bb.0: ; %entry ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-SCRATCH-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-SCRATCH-NEXT: scratch_load_dwordx2 v[32:33], off, s32 offset:8 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 +; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 offset:16 ; 4-byte Folded Spill +; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 30 +; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 32 +; GFX10-SCRATCH-NEXT: scratch_load_dwordx2 v[32:33], off, s33 offset:8 +; GFX10-SCRATCH-NEXT: s_mov_b64 s[4:5], s[30:31] +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s34, 0 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, byval_align16_f64_arg@rel32@lo+4 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, byval_align16_f64_arg@rel32@hi+12 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s35, 1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s36, 2 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s37, 3 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s38, 4 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s39, 5 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s40, 6 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s41, 7 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s42, 8 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s43, 9 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s44, 10 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s45, 11 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s46, 12 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s47, 13 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s48, 14 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s49, 15 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s50, 16 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s51, 17 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s52, 18 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s53, 19 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s54, 20 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s55, 21 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s56, 22 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s57, 23 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s58, 24 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s59, 25 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s60, 26 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s61, 27 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s62, 28 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s63, 29 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) ; GFX10-SCRATCH-NEXT: scratch_store_dwordx2 off, v[32:33], s32 -; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1] +; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] +; GFX10-SCRATCH-NEXT: v_readlane_b32 s63, v40, 29 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s62, v40, 28 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s61, v40, 27 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s60, v40, 26 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s59, v40, 25 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s58, v40, 24 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s57, v40, 23 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s56, v40, 22 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s55, v40, 21 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s54, v40, 20 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s53, v40, 19 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s52, v40, 18 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s51, v40, 17 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s50, v40, 16 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s49, v40, 15 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s48, v40, 14 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s47, v40, 13 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s46, v40, 12 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s45, v40, 11 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s44, v40, 10 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s43, v40, 9 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s42, v40, 8 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s41, v40, 7 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s40, v40, 6 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s39, v40, 5 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s38, v40, 4 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s37, v40, 3 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s36, v40, 2 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s35, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s34, v40, 0 +; GFX10-SCRATCH-NEXT: s_addk_i32 s32, 0xffe0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 30 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 +; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 offset:16 ; 4-byte Folded Reload +; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 +; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) +; GFX10-SCRATCH-NEXT: s_setpc_b64 s[4:5] entry: %alloca = alloca double, align 8, addrspace(5) tail call amdgpu_gfx void @byval_align16_f64_arg(<32 x i32> %val, double addrspace(5)* byval(double) align 16 %alloca) @@ -4889,59 +5119,59 @@ define amdgpu_gfx void @test_call_external_void_func_i1_imm_inreg() #0 { ; GFX9-LABEL: test_call_external_void_func_i1_imm_inreg: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[4:5] +; GFX9-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-NEXT: v_writelane_b32 v40, s33, 2 ; GFX9-NEXT: s_mov_b32 s33, s32 ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: v_mov_b32_e32 v0, 1 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[4:5] -; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_i1_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_i1_inreg@rel32@hi+12 +; GFX9-NEXT: s_getpc_b64 s[30:31] +; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_i1_inreg@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_i1_inreg@rel32@hi+12 ; GFX9-NEXT: buffer_store_byte v0, off, s[0:3], s32 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GFX9-NEXT: v_readlane_b32 s4, v40, 0 -; GFX9-NEXT: v_readlane_b32 s5, v40, 1 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 -; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[6:7] +; GFX9-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: s_setpc_b64 s[4:5] +; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: test_call_external_void_func_i1_imm_inreg: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_or_saveexec_b32 s4, -1 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s4 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 ; GFX10-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-NEXT: v_mov_b32_e32 v0, 1 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: s_getpc_b64 s[4:5] -; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_i1_inreg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s5, s5, external_void_func_i1_inreg@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: buffer_store_byte v0, off, s[0:3], s32 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GFX10-NEXT: v_readlane_b32 s4, v40, 0 -; GFX10-NEXT: v_readlane_b32 s5, v40, 1 +; GFX10-NEXT: s_getpc_b64 s[30:31] +; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_i1_inreg@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_i1_inreg@rel32@hi+12 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-NEXT: s_or_saveexec_b32 s6, -1 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s6 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: s_setpc_b64 s[4:5] +; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-SCRATCH-LABEL: test_call_external_void_func_i1_imm_inreg: ; GFX10-SCRATCH: ; %bb.0: @@ -4962,16 +5192,16 @@ define amdgpu_gfx void @test_call_external_void_func_i1_imm_inreg() #0 { ; GFX10-SCRATCH-NEXT: scratch_store_byte off, v0, s32 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) -; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1] +; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] call amdgpu_gfx void @external_void_func_i1_inreg(i1 inreg true) ret void } @@ -4980,57 +5210,61 @@ define amdgpu_gfx void @test_call_external_void_func_i8_imm_inreg(i32) #0 { ; GFX9-LABEL: test_call_external_void_func_i8_imm_inreg: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[4:5] -; GFX9-NEXT: v_writelane_b32 v40, s33, 2 +; GFX9-NEXT: s_mov_b64 exec, s[34:35] +; GFX9-NEXT: v_writelane_b32 v40, s33, 3 +; GFX9-NEXT: v_writelane_b32 v40, s4, 0 ; GFX9-NEXT: s_mov_b32 s33, s32 ; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: v_writelane_b32 v40, s30, 1 ; GFX9-NEXT: s_movk_i32 s4, 0x7b -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[6:7] -; GFX9-NEXT: s_add_u32 s6, s6, external_void_func_i8_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s7, s7, external_void_func_i8_inreg@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[6:7] +; GFX9-NEXT: v_writelane_b32 v40, s31, 2 +; GFX9-NEXT: s_getpc_b64 s[30:31] +; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_i8_inreg@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_i8_inreg@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX9-NEXT: v_readlane_b32 s30, v40, 1 +; GFX9-NEXT: v_readlane_b32 s31, v40, 2 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 -; GFX9-NEXT: v_readlane_b32 s5, v40, 1 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 -; GFX9-NEXT: v_readlane_b32 s33, v40, 2 -; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GFX9-NEXT: v_readlane_b32 s33, v40, 3 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[6:7] +; GFX9-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: s_setpc_b64 s[4:5] +; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: test_call_external_void_func_i8_imm_inreg: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_or_saveexec_b32 s4, -1 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s4 -; GFX10-NEXT: v_writelane_b32 v40, s33, 2 -; GFX10-NEXT: s_movk_i32 s4, 0x7b +; GFX10-NEXT: s_mov_b32 exec_lo, s34 +; GFX10-NEXT: v_writelane_b32 v40, s33, 3 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: s_getpc_b64 s[6:7] -; GFX10-NEXT: s_add_u32 s6, s6, external_void_func_i8_inreg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s7, s7, external_void_func_i8_inreg@rel32@hi+12 -; GFX10-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[6:7] +; GFX10-NEXT: v_writelane_b32 v40, s4, 0 +; GFX10-NEXT: s_movk_i32 s4, 0x7b +; GFX10-NEXT: v_writelane_b32 v40, s30, 1 +; GFX10-NEXT: v_writelane_b32 v40, s31, 2 +; GFX10-NEXT: s_getpc_b64 s[30:31] +; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_i8_inreg@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_i8_inreg@rel32@hi+12 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX10-NEXT: v_readlane_b32 s30, v40, 1 +; GFX10-NEXT: v_readlane_b32 s31, v40, 2 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 -; GFX10-NEXT: v_readlane_b32 s5, v40, 1 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 -; GFX10-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-NEXT: s_or_saveexec_b32 s6, -1 +; GFX10-NEXT: v_readlane_b32 s33, v40, 3 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s6 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: s_setpc_b64 s[4:5] +; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-SCRATCH-LABEL: test_call_external_void_func_i8_imm_inreg: ; GFX10-SCRATCH: ; %bb.0: @@ -5040,26 +5274,28 @@ define amdgpu_gfx void @test_call_external_void_func_i8_imm_inreg(i32) #0 { ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2 -; GFX10-SCRATCH-NEXT: s_movk_i32 s4, 0x7b +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 3 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_i8_inreg@rel32@lo+4 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_i8_inreg@rel32@hi+12 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 +; GFX10-SCRATCH-NEXT: s_movk_i32 s4, 0x7b +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 2 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 2 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 3 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) -; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1] +; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] call amdgpu_gfx void @external_void_func_i8_inreg(i8 inreg 123) ret void } @@ -5068,57 +5304,61 @@ define amdgpu_gfx void @test_call_external_void_func_i16_imm_inreg() #0 { ; GFX9-LABEL: test_call_external_void_func_i16_imm_inreg: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[4:5] -; GFX9-NEXT: v_writelane_b32 v40, s33, 2 +; GFX9-NEXT: s_mov_b64 exec, s[34:35] +; GFX9-NEXT: v_writelane_b32 v40, s33, 3 +; GFX9-NEXT: v_writelane_b32 v40, s4, 0 ; GFX9-NEXT: s_mov_b32 s33, s32 ; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: v_writelane_b32 v40, s30, 1 ; GFX9-NEXT: s_movk_i32 s4, 0x7b -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[6:7] -; GFX9-NEXT: s_add_u32 s6, s6, external_void_func_i16_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s7, s7, external_void_func_i16_inreg@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[6:7] +; GFX9-NEXT: v_writelane_b32 v40, s31, 2 +; GFX9-NEXT: s_getpc_b64 s[30:31] +; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_i16_inreg@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_i16_inreg@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX9-NEXT: v_readlane_b32 s30, v40, 1 +; GFX9-NEXT: v_readlane_b32 s31, v40, 2 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 -; GFX9-NEXT: v_readlane_b32 s5, v40, 1 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 -; GFX9-NEXT: v_readlane_b32 s33, v40, 2 -; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GFX9-NEXT: v_readlane_b32 s33, v40, 3 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[6:7] +; GFX9-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: s_setpc_b64 s[4:5] +; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: test_call_external_void_func_i16_imm_inreg: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_or_saveexec_b32 s4, -1 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s4 -; GFX10-NEXT: v_writelane_b32 v40, s33, 2 -; GFX10-NEXT: s_movk_i32 s4, 0x7b +; GFX10-NEXT: s_mov_b32 exec_lo, s34 +; GFX10-NEXT: v_writelane_b32 v40, s33, 3 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: s_getpc_b64 s[6:7] -; GFX10-NEXT: s_add_u32 s6, s6, external_void_func_i16_inreg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s7, s7, external_void_func_i16_inreg@rel32@hi+12 -; GFX10-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[6:7] +; GFX10-NEXT: v_writelane_b32 v40, s4, 0 +; GFX10-NEXT: s_movk_i32 s4, 0x7b +; GFX10-NEXT: v_writelane_b32 v40, s30, 1 +; GFX10-NEXT: v_writelane_b32 v40, s31, 2 +; GFX10-NEXT: s_getpc_b64 s[30:31] +; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_i16_inreg@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_i16_inreg@rel32@hi+12 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX10-NEXT: v_readlane_b32 s30, v40, 1 +; GFX10-NEXT: v_readlane_b32 s31, v40, 2 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 -; GFX10-NEXT: v_readlane_b32 s5, v40, 1 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 -; GFX10-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-NEXT: s_or_saveexec_b32 s6, -1 +; GFX10-NEXT: v_readlane_b32 s33, v40, 3 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s6 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: s_setpc_b64 s[4:5] +; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-SCRATCH-LABEL: test_call_external_void_func_i16_imm_inreg: ; GFX10-SCRATCH: ; %bb.0: @@ -5128,26 +5368,28 @@ define amdgpu_gfx void @test_call_external_void_func_i16_imm_inreg() #0 { ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2 -; GFX10-SCRATCH-NEXT: s_movk_i32 s4, 0x7b +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 3 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_i16_inreg@rel32@lo+4 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_i16_inreg@rel32@hi+12 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 +; GFX10-SCRATCH-NEXT: s_movk_i32 s4, 0x7b +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 2 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 2 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 3 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) -; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1] +; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] call amdgpu_gfx void @external_void_func_i16_inreg(i16 inreg 123) ret void } @@ -5156,57 +5398,61 @@ define amdgpu_gfx void @test_call_external_void_func_i32_imm_inreg(i32) #0 { ; GFX9-LABEL: test_call_external_void_func_i32_imm_inreg: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[4:5] -; GFX9-NEXT: v_writelane_b32 v40, s33, 2 +; GFX9-NEXT: s_mov_b64 exec, s[34:35] +; GFX9-NEXT: v_writelane_b32 v40, s33, 3 +; GFX9-NEXT: v_writelane_b32 v40, s4, 0 ; GFX9-NEXT: s_mov_b32 s33, s32 ; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: v_writelane_b32 v40, s30, 1 ; GFX9-NEXT: s_mov_b32 s4, 42 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[6:7] -; GFX9-NEXT: s_add_u32 s6, s6, external_void_func_i32_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s7, s7, external_void_func_i32_inreg@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[6:7] +; GFX9-NEXT: v_writelane_b32 v40, s31, 2 +; GFX9-NEXT: s_getpc_b64 s[30:31] +; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_i32_inreg@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_i32_inreg@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX9-NEXT: v_readlane_b32 s30, v40, 1 +; GFX9-NEXT: v_readlane_b32 s31, v40, 2 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 -; GFX9-NEXT: v_readlane_b32 s5, v40, 1 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 -; GFX9-NEXT: v_readlane_b32 s33, v40, 2 -; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GFX9-NEXT: v_readlane_b32 s33, v40, 3 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[6:7] +; GFX9-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: s_setpc_b64 s[4:5] +; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: test_call_external_void_func_i32_imm_inreg: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_or_saveexec_b32 s4, -1 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s4 -; GFX10-NEXT: v_writelane_b32 v40, s33, 2 -; GFX10-NEXT: s_mov_b32 s4, 42 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 +; GFX10-NEXT: v_writelane_b32 v40, s33, 3 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: s_getpc_b64 s[6:7] -; GFX10-NEXT: s_add_u32 s6, s6, external_void_func_i32_inreg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s7, s7, external_void_func_i32_inreg@rel32@hi+12 -; GFX10-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[6:7] +; GFX10-NEXT: v_writelane_b32 v40, s4, 0 +; GFX10-NEXT: s_mov_b32 s4, 42 +; GFX10-NEXT: v_writelane_b32 v40, s30, 1 +; GFX10-NEXT: v_writelane_b32 v40, s31, 2 +; GFX10-NEXT: s_getpc_b64 s[30:31] +; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_i32_inreg@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_i32_inreg@rel32@hi+12 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX10-NEXT: v_readlane_b32 s30, v40, 1 +; GFX10-NEXT: v_readlane_b32 s31, v40, 2 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 -; GFX10-NEXT: v_readlane_b32 s5, v40, 1 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 -; GFX10-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-NEXT: s_or_saveexec_b32 s6, -1 +; GFX10-NEXT: v_readlane_b32 s33, v40, 3 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s6 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: s_setpc_b64 s[4:5] +; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-SCRATCH-LABEL: test_call_external_void_func_i32_imm_inreg: ; GFX10-SCRATCH: ; %bb.0: @@ -5216,26 +5462,28 @@ define amdgpu_gfx void @test_call_external_void_func_i32_imm_inreg(i32) #0 { ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2 -; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 42 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 3 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_i32_inreg@rel32@lo+4 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_i32_inreg@rel32@hi+12 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 +; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 42 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 2 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 2 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 3 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) -; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1] +; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] call amdgpu_gfx void @external_void_func_i32_inreg(i32 inreg 42) ret void } @@ -5244,59 +5492,67 @@ define amdgpu_gfx void @test_call_external_void_func_i64_imm_inreg() #0 { ; GFX9-LABEL: test_call_external_void_func_i64_imm_inreg: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[4:5] -; GFX9-NEXT: v_writelane_b32 v40, s33, 2 +; GFX9-NEXT: s_mov_b64 exec, s[34:35] +; GFX9-NEXT: v_writelane_b32 v40, s33, 4 +; GFX9-NEXT: v_writelane_b32 v40, s4, 0 +; GFX9-NEXT: v_writelane_b32 v40, s5, 1 ; GFX9-NEXT: s_mov_b32 s33, s32 ; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: v_writelane_b32 v40, s30, 2 ; GFX9-NEXT: s_movk_i32 s4, 0x7b ; GFX9-NEXT: s_mov_b32 s5, 0 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[6:7] -; GFX9-NEXT: s_add_u32 s6, s6, external_void_func_i64_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s7, s7, external_void_func_i64_inreg@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[6:7] -; GFX9-NEXT: v_readlane_b32 s4, v40, 0 +; GFX9-NEXT: v_writelane_b32 v40, s31, 3 +; GFX9-NEXT: s_getpc_b64 s[30:31] +; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_i64_inreg@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_i64_inreg@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX9-NEXT: v_readlane_b32 s30, v40, 2 +; GFX9-NEXT: v_readlane_b32 s31, v40, 3 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 +; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 -; GFX9-NEXT: v_readlane_b32 s33, v40, 2 -; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GFX9-NEXT: v_readlane_b32 s33, v40, 4 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[6:7] +; GFX9-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: s_setpc_b64 s[4:5] +; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: test_call_external_void_func_i64_imm_inreg: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_or_saveexec_b32 s4, -1 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s4 -; GFX10-NEXT: v_writelane_b32 v40, s33, 2 -; GFX10-NEXT: s_movk_i32 s4, 0x7b -; GFX10-NEXT: s_mov_b32 s5, 0 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 +; GFX10-NEXT: v_writelane_b32 v40, s33, 4 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-NEXT: s_getpc_b64 s[6:7] -; GFX10-NEXT: s_add_u32 s6, s6, external_void_func_i64_inreg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s7, s7, external_void_func_i64_inreg@rel32@hi+12 -; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[6:7] -; GFX10-NEXT: v_readlane_b32 s4, v40, 0 +; GFX10-NEXT: v_writelane_b32 v40, s4, 0 +; GFX10-NEXT: s_movk_i32 s4, 0x7b +; GFX10-NEXT: v_writelane_b32 v40, s5, 1 +; GFX10-NEXT: s_mov_b32 s5, 0 +; GFX10-NEXT: v_writelane_b32 v40, s30, 2 +; GFX10-NEXT: v_writelane_b32 v40, s31, 3 +; GFX10-NEXT: s_getpc_b64 s[30:31] +; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_i64_inreg@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_i64_inreg@rel32@hi+12 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX10-NEXT: v_readlane_b32 s30, v40, 2 +; GFX10-NEXT: v_readlane_b32 s31, v40, 3 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 +; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 -; GFX10-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-NEXT: s_or_saveexec_b32 s6, -1 +; GFX10-NEXT: v_readlane_b32 s33, v40, 4 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s6 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: s_setpc_b64 s[4:5] +; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-SCRATCH-LABEL: test_call_external_void_func_i64_imm_inreg: ; GFX10-SCRATCH: ; %bb.0: @@ -5306,27 +5562,31 @@ define amdgpu_gfx void @test_call_external_void_func_i64_imm_inreg() #0 { ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2 -; GFX10-SCRATCH-NEXT: s_movk_i32 s4, 0x7b -; GFX10-SCRATCH-NEXT: s_mov_b32 s5, 0 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 4 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_i64_inreg@rel32@lo+4 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_i64_inreg@rel32@hi+12 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 +; GFX10-SCRATCH-NEXT: s_movk_i32 s4, 0x7b +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1 +; GFX10-SCRATCH-NEXT: s_mov_b32 s5, 0 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 2 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 3 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 2 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 3 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 4 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) -; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1] +; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] call amdgpu_gfx void @external_void_func_i64_inreg(i64 inreg 123) ret void } @@ -5335,59 +5595,75 @@ define amdgpu_gfx void @test_call_external_void_func_v2i64_inreg() #0 { ; GFX9-LABEL: test_call_external_void_func_v2i64_inreg: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[4:5] -; GFX9-NEXT: s_mov_b64 s[4:5], 0 -; GFX9-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x0 -; GFX9-NEXT: v_writelane_b32 v40, s33, 2 +; GFX9-NEXT: s_mov_b64 exec, s[34:35] +; GFX9-NEXT: v_writelane_b32 v40, s33, 6 +; GFX9-NEXT: v_writelane_b32 v40, s4, 0 +; GFX9-NEXT: v_writelane_b32 v40, s5, 1 +; GFX9-NEXT: v_writelane_b32 v40, s6, 2 +; GFX9-NEXT: v_writelane_b32 v40, s7, 3 +; GFX9-NEXT: v_writelane_b32 v40, s30, 4 +; GFX9-NEXT: v_writelane_b32 v40, s31, 5 +; GFX9-NEXT: s_mov_b64 s[30:31], 0 +; GFX9-NEXT: s_load_dwordx4 s[4:7], s[30:31], 0x0 ; GFX9-NEXT: s_mov_b32 s33, s32 ; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s30, 0 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[8:9] -; GFX9-NEXT: s_add_u32 s8, s8, external_void_func_v2i64_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s9, s9, external_void_func_v2i64_inreg@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[8:9] -; GFX9-NEXT: v_readlane_b32 s4, v40, 0 +; GFX9-NEXT: s_getpc_b64 s[30:31] +; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v2i64_inreg@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v2i64_inreg@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX9-NEXT: v_readlane_b32 s30, v40, 4 +; GFX9-NEXT: v_readlane_b32 s31, v40, 5 +; GFX9-NEXT: v_readlane_b32 s7, v40, 3 +; GFX9-NEXT: v_readlane_b32 s6, v40, 2 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 +; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 -; GFX9-NEXT: v_readlane_b32 s33, v40, 2 -; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GFX9-NEXT: v_readlane_b32 s33, v40, 6 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[6:7] +; GFX9-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: s_setpc_b64 s[4:5] +; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: test_call_external_void_func_v2i64_inreg: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_or_saveexec_b32 s4, -1 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s4 -; GFX10-NEXT: s_mov_b64 s[4:5], 0 -; GFX10-NEXT: v_writelane_b32 v40, s33, 2 -; GFX10-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x0 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 +; GFX10-NEXT: v_writelane_b32 v40, s33, 6 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: s_getpc_b64 s[8:9] -; GFX10-NEXT: s_add_u32 s8, s8, external_void_func_v2i64_inreg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s9, s9, external_void_func_v2i64_inreg@rel32@hi+12 -; GFX10-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[8:9] -; GFX10-NEXT: v_readlane_b32 s4, v40, 0 +; GFX10-NEXT: v_writelane_b32 v40, s4, 0 +; GFX10-NEXT: v_writelane_b32 v40, s5, 1 +; GFX10-NEXT: v_writelane_b32 v40, s6, 2 +; GFX10-NEXT: v_writelane_b32 v40, s7, 3 +; GFX10-NEXT: v_writelane_b32 v40, s30, 4 +; GFX10-NEXT: v_writelane_b32 v40, s31, 5 +; GFX10-NEXT: s_mov_b64 s[30:31], 0 +; GFX10-NEXT: s_load_dwordx4 s[4:7], s[30:31], 0x0 +; GFX10-NEXT: s_getpc_b64 s[30:31] +; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v2i64_inreg@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v2i64_inreg@rel32@hi+12 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX10-NEXT: v_readlane_b32 s30, v40, 4 +; GFX10-NEXT: v_readlane_b32 s31, v40, 5 +; GFX10-NEXT: v_readlane_b32 s7, v40, 3 +; GFX10-NEXT: v_readlane_b32 s6, v40, 2 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 +; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 -; GFX10-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-NEXT: s_or_saveexec_b32 s6, -1 +; GFX10-NEXT: v_readlane_b32 s33, v40, 6 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s6 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: s_setpc_b64 s[4:5] +; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v2i64_inreg: ; GFX10-SCRATCH: ; %bb.0: @@ -5397,27 +5673,35 @@ define amdgpu_gfx void @test_call_external_void_func_v2i64_inreg() #0 { ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 6 ; GFX10-SCRATCH-NEXT: s_mov_b64 s[0:1], 0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2 -; GFX10-SCRATCH-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s6, 2 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s7, 3 +; GFX10-SCRATCH-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x0 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v2i64_inreg@rel32@lo+4 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v2i64_inreg@rel32@hi+12 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 4 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 5 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 4 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 5 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s7, v40, 3 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s6, v40, 2 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 6 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) -; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1] +; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] %val = load <2 x i64>, <2 x i64> addrspace(4)* null call amdgpu_gfx void @external_void_func_v2i64_inreg(<2 x i64> inreg %val) ret void @@ -5427,63 +5711,79 @@ define amdgpu_gfx void @test_call_external_void_func_v2i64_imm_inreg() #0 { ; GFX9-LABEL: test_call_external_void_func_v2i64_imm_inreg: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[4:5] -; GFX9-NEXT: v_writelane_b32 v40, s33, 2 +; GFX9-NEXT: s_mov_b64 exec, s[34:35] +; GFX9-NEXT: v_writelane_b32 v40, s33, 6 +; GFX9-NEXT: v_writelane_b32 v40, s4, 0 +; GFX9-NEXT: v_writelane_b32 v40, s5, 1 +; GFX9-NEXT: v_writelane_b32 v40, s6, 2 +; GFX9-NEXT: v_writelane_b32 v40, s7, 3 ; GFX9-NEXT: s_mov_b32 s33, s32 ; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: v_writelane_b32 v40, s30, 4 ; GFX9-NEXT: s_mov_b32 s4, 1 ; GFX9-NEXT: s_mov_b32 s5, 2 ; GFX9-NEXT: s_mov_b32 s6, 3 ; GFX9-NEXT: s_mov_b32 s7, 4 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[8:9] -; GFX9-NEXT: s_add_u32 s8, s8, external_void_func_v2i64_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s9, s9, external_void_func_v2i64_inreg@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[8:9] -; GFX9-NEXT: v_readlane_b32 s4, v40, 0 +; GFX9-NEXT: v_writelane_b32 v40, s31, 5 +; GFX9-NEXT: s_getpc_b64 s[30:31] +; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v2i64_inreg@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v2i64_inreg@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX9-NEXT: v_readlane_b32 s30, v40, 4 +; GFX9-NEXT: v_readlane_b32 s31, v40, 5 +; GFX9-NEXT: v_readlane_b32 s7, v40, 3 +; GFX9-NEXT: v_readlane_b32 s6, v40, 2 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 +; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 -; GFX9-NEXT: v_readlane_b32 s33, v40, 2 -; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GFX9-NEXT: v_readlane_b32 s33, v40, 6 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[6:7] +; GFX9-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: s_setpc_b64 s[4:5] +; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: test_call_external_void_func_v2i64_imm_inreg: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_or_saveexec_b32 s4, -1 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s4 -; GFX10-NEXT: v_writelane_b32 v40, s33, 2 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 +; GFX10-NEXT: v_writelane_b32 v40, s33, 6 +; GFX10-NEXT: s_mov_b32 s33, s32 +; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: s_mov_b32 s4, 1 +; GFX10-NEXT: v_writelane_b32 v40, s5, 1 ; GFX10-NEXT: s_mov_b32 s5, 2 +; GFX10-NEXT: v_writelane_b32 v40, s6, 2 ; GFX10-NEXT: s_mov_b32 s6, 3 +; GFX10-NEXT: v_writelane_b32 v40, s7, 3 ; GFX10-NEXT: s_mov_b32 s7, 4 -; GFX10-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-NEXT: s_mov_b32 s33, s32 -; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: s_getpc_b64 s[8:9] -; GFX10-NEXT: s_add_u32 s8, s8, external_void_func_v2i64_inreg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s9, s9, external_void_func_v2i64_inreg@rel32@hi+12 -; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[8:9] -; GFX10-NEXT: v_readlane_b32 s4, v40, 0 +; GFX10-NEXT: v_writelane_b32 v40, s30, 4 +; GFX10-NEXT: v_writelane_b32 v40, s31, 5 +; GFX10-NEXT: s_getpc_b64 s[30:31] +; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v2i64_inreg@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v2i64_inreg@rel32@hi+12 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX10-NEXT: v_readlane_b32 s30, v40, 4 +; GFX10-NEXT: v_readlane_b32 s31, v40, 5 +; GFX10-NEXT: v_readlane_b32 s7, v40, 3 +; GFX10-NEXT: v_readlane_b32 s6, v40, 2 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 +; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 -; GFX10-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-NEXT: s_or_saveexec_b32 s6, -1 +; GFX10-NEXT: v_readlane_b32 s33, v40, 6 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s6 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: s_setpc_b64 s[4:5] +; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v2i64_imm_inreg: ; GFX10-SCRATCH: ; %bb.0: @@ -5493,29 +5793,37 @@ define amdgpu_gfx void @test_call_external_void_func_v2i64_imm_inreg() #0 { ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2 -; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 1 -; GFX10-SCRATCH-NEXT: s_mov_b32 s5, 2 -; GFX10-SCRATCH-NEXT: s_mov_b32 s6, 3 -; GFX10-SCRATCH-NEXT: s_mov_b32 s7, 4 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 6 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v2i64_inreg@rel32@lo+4 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v2i64_inreg@rel32@hi+12 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 +; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1 +; GFX10-SCRATCH-NEXT: s_mov_b32 s5, 2 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s6, 2 +; GFX10-SCRATCH-NEXT: s_mov_b32 s6, 3 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s7, 3 +; GFX10-SCRATCH-NEXT: s_mov_b32 s7, 4 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 4 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 5 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 4 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 5 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s7, v40, 3 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s6, v40, 2 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 6 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) -; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1] +; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] call amdgpu_gfx void @external_void_func_v2i64_inreg(<2 x i64> inreg ) ret void } @@ -5524,63 +5832,87 @@ define amdgpu_gfx void @test_call_external_void_func_v3i64_inreg() #0 { ; GFX9-LABEL: test_call_external_void_func_v3i64_inreg: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[4:5] -; GFX9-NEXT: s_mov_b64 s[4:5], 0 -; GFX9-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x0 -; GFX9-NEXT: v_writelane_b32 v40, s33, 2 +; GFX9-NEXT: s_mov_b64 exec, s[34:35] +; GFX9-NEXT: v_writelane_b32 v40, s33, 8 +; GFX9-NEXT: v_writelane_b32 v40, s4, 0 +; GFX9-NEXT: v_writelane_b32 v40, s5, 1 +; GFX9-NEXT: v_writelane_b32 v40, s6, 2 +; GFX9-NEXT: v_writelane_b32 v40, s7, 3 +; GFX9-NEXT: v_writelane_b32 v40, s8, 4 +; GFX9-NEXT: v_writelane_b32 v40, s9, 5 +; GFX9-NEXT: v_writelane_b32 v40, s30, 6 +; GFX9-NEXT: v_writelane_b32 v40, s31, 7 +; GFX9-NEXT: s_mov_b64 s[30:31], 0 +; GFX9-NEXT: s_load_dwordx4 s[4:7], s[30:31], 0x0 ; GFX9-NEXT: s_mov_b32 s33, s32 ; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_mov_b32 s8, 1 ; GFX9-NEXT: s_mov_b32 s9, 2 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[10:11] -; GFX9-NEXT: s_add_u32 s10, s10, external_void_func_v3i64_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s11, s11, external_void_func_v3i64_inreg@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[10:11] -; GFX9-NEXT: v_readlane_b32 s4, v40, 0 +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v3i64_inreg@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v3i64_inreg@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] +; GFX9-NEXT: v_readlane_b32 s30, v40, 6 +; GFX9-NEXT: v_readlane_b32 s31, v40, 7 +; GFX9-NEXT: v_readlane_b32 s9, v40, 5 +; GFX9-NEXT: v_readlane_b32 s8, v40, 4 +; GFX9-NEXT: v_readlane_b32 s7, v40, 3 +; GFX9-NEXT: v_readlane_b32 s6, v40, 2 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 +; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 -; GFX9-NEXT: v_readlane_b32 s33, v40, 2 -; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GFX9-NEXT: v_readlane_b32 s33, v40, 8 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[6:7] +; GFX9-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: s_setpc_b64 s[4:5] +; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: test_call_external_void_func_v3i64_inreg: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_or_saveexec_b32 s4, -1 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s4 -; GFX10-NEXT: s_mov_b64 s[4:5], 0 -; GFX10-NEXT: v_writelane_b32 v40, s33, 2 -; GFX10-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x0 -; GFX10-NEXT: s_mov_b32 s8, 1 -; GFX10-NEXT: s_mov_b32 s9, 2 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 +; GFX10-NEXT: v_writelane_b32 v40, s33, 8 ; GFX10-NEXT: s_mov_b32 s33, s32 -; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: s_getpc_b64 s[10:11] -; GFX10-NEXT: s_add_u32 s10, s10, external_void_func_v3i64_inreg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s11, s11, external_void_func_v3i64_inreg@rel32@hi+12 -; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[10:11] -; GFX10-NEXT: v_readlane_b32 s4, v40, 0 +; GFX10-NEXT: v_writelane_b32 v40, s4, 0 +; GFX10-NEXT: v_writelane_b32 v40, s5, 1 +; GFX10-NEXT: v_writelane_b32 v40, s6, 2 +; GFX10-NEXT: v_writelane_b32 v40, s7, 3 +; GFX10-NEXT: v_writelane_b32 v40, s8, 4 +; GFX10-NEXT: s_mov_b32 s8, 1 +; GFX10-NEXT: v_writelane_b32 v40, s9, 5 +; GFX10-NEXT: s_mov_b32 s9, 2 +; GFX10-NEXT: v_writelane_b32 v40, s30, 6 +; GFX10-NEXT: v_writelane_b32 v40, s31, 7 +; GFX10-NEXT: s_mov_b64 s[30:31], 0 +; GFX10-NEXT: s_load_dwordx4 s[4:7], s[30:31], 0x0 +; GFX10-NEXT: s_getpc_b64 s[30:31] +; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v3i64_inreg@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v3i64_inreg@rel32@hi+12 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX10-NEXT: v_readlane_b32 s30, v40, 6 +; GFX10-NEXT: v_readlane_b32 s31, v40, 7 +; GFX10-NEXT: v_readlane_b32 s9, v40, 5 +; GFX10-NEXT: v_readlane_b32 s8, v40, 4 +; GFX10-NEXT: v_readlane_b32 s7, v40, 3 +; GFX10-NEXT: v_readlane_b32 s6, v40, 2 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 +; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 -; GFX10-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-NEXT: s_or_saveexec_b32 s6, -1 +; GFX10-NEXT: v_readlane_b32 s33, v40, 8 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s6 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: s_setpc_b64 s[4:5] +; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v3i64_inreg: ; GFX10-SCRATCH: ; %bb.0: @@ -5590,29 +5922,41 @@ define amdgpu_gfx void @test_call_external_void_func_v3i64_inreg() #0 { ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 8 ; GFX10-SCRATCH-NEXT: s_mov_b64 s[0:1], 0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2 -; GFX10-SCRATCH-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x0 -; GFX10-SCRATCH-NEXT: s_mov_b32 s8, 1 -; GFX10-SCRATCH-NEXT: s_mov_b32 s9, 2 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s6, 2 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s7, 3 +; GFX10-SCRATCH-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x0 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v3i64_inreg@rel32@lo+4 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v3i64_inreg@rel32@hi+12 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s8, 4 +; GFX10-SCRATCH-NEXT: s_mov_b32 s8, 1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s9, 5 +; GFX10-SCRATCH-NEXT: s_mov_b32 s9, 2 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 6 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 7 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 6 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 7 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s9, v40, 5 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s8, v40, 4 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s7, v40, 3 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s6, v40, 2 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 8 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) -; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1] +; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] %load = load <2 x i64>, <2 x i64> addrspace(4)* null %val = shufflevector <2 x i64> %load, <2 x i64> , <3 x i32> @@ -5624,67 +5968,99 @@ define amdgpu_gfx void @test_call_external_void_func_v4i64_inreg() #0 { ; GFX9-LABEL: test_call_external_void_func_v4i64_inreg: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[4:5] -; GFX9-NEXT: s_mov_b64 s[4:5], 0 -; GFX9-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x0 -; GFX9-NEXT: v_writelane_b32 v40, s33, 2 +; GFX9-NEXT: s_mov_b64 exec, s[34:35] +; GFX9-NEXT: v_writelane_b32 v40, s33, 10 +; GFX9-NEXT: v_writelane_b32 v40, s4, 0 +; GFX9-NEXT: v_writelane_b32 v40, s5, 1 +; GFX9-NEXT: v_writelane_b32 v40, s6, 2 +; GFX9-NEXT: v_writelane_b32 v40, s7, 3 +; GFX9-NEXT: v_writelane_b32 v40, s8, 4 +; GFX9-NEXT: v_writelane_b32 v40, s9, 5 +; GFX9-NEXT: v_writelane_b32 v40, s10, 6 +; GFX9-NEXT: v_writelane_b32 v40, s11, 7 +; GFX9-NEXT: v_writelane_b32 v40, s30, 8 +; GFX9-NEXT: v_writelane_b32 v40, s31, 9 +; GFX9-NEXT: s_mov_b64 s[30:31], 0 +; GFX9-NEXT: s_load_dwordx4 s[4:7], s[30:31], 0x0 ; GFX9-NEXT: s_mov_b32 s33, s32 ; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_mov_b32 s8, 1 ; GFX9-NEXT: s_mov_b32 s9, 2 ; GFX9-NEXT: s_mov_b32 s10, 3 ; GFX9-NEXT: s_mov_b32 s11, 4 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[12:13] -; GFX9-NEXT: s_add_u32 s12, s12, external_void_func_v4i64_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s13, s13, external_void_func_v4i64_inreg@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[12:13] -; GFX9-NEXT: v_readlane_b32 s4, v40, 0 +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v4i64_inreg@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v4i64_inreg@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] +; GFX9-NEXT: v_readlane_b32 s30, v40, 8 +; GFX9-NEXT: v_readlane_b32 s31, v40, 9 +; GFX9-NEXT: v_readlane_b32 s11, v40, 7 +; GFX9-NEXT: v_readlane_b32 s10, v40, 6 +; GFX9-NEXT: v_readlane_b32 s9, v40, 5 +; GFX9-NEXT: v_readlane_b32 s8, v40, 4 +; GFX9-NEXT: v_readlane_b32 s7, v40, 3 +; GFX9-NEXT: v_readlane_b32 s6, v40, 2 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 +; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 -; GFX9-NEXT: v_readlane_b32 s33, v40, 2 -; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GFX9-NEXT: v_readlane_b32 s33, v40, 10 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[6:7] +; GFX9-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: s_setpc_b64 s[4:5] +; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: test_call_external_void_func_v4i64_inreg: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_or_saveexec_b32 s4, -1 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s4 -; GFX10-NEXT: s_mov_b64 s[4:5], 0 -; GFX10-NEXT: v_writelane_b32 v40, s33, 2 -; GFX10-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x0 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 +; GFX10-NEXT: v_writelane_b32 v40, s33, 10 +; GFX10-NEXT: s_mov_b32 s33, s32 +; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: v_writelane_b32 v40, s4, 0 +; GFX10-NEXT: v_writelane_b32 v40, s5, 1 +; GFX10-NEXT: v_writelane_b32 v40, s6, 2 +; GFX10-NEXT: v_writelane_b32 v40, s7, 3 +; GFX10-NEXT: v_writelane_b32 v40, s8, 4 ; GFX10-NEXT: s_mov_b32 s8, 1 +; GFX10-NEXT: v_writelane_b32 v40, s9, 5 ; GFX10-NEXT: s_mov_b32 s9, 2 +; GFX10-NEXT: v_writelane_b32 v40, s10, 6 ; GFX10-NEXT: s_mov_b32 s10, 3 -; GFX10-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-NEXT: v_writelane_b32 v40, s11, 7 ; GFX10-NEXT: s_mov_b32 s11, 4 -; GFX10-NEXT: s_mov_b32 s33, s32 -; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: s_getpc_b64 s[12:13] -; GFX10-NEXT: s_add_u32 s12, s12, external_void_func_v4i64_inreg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s13, s13, external_void_func_v4i64_inreg@rel32@hi+12 -; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[12:13] -; GFX10-NEXT: v_readlane_b32 s4, v40, 0 +; GFX10-NEXT: v_writelane_b32 v40, s30, 8 +; GFX10-NEXT: v_writelane_b32 v40, s31, 9 +; GFX10-NEXT: s_mov_b64 s[30:31], 0 +; GFX10-NEXT: s_load_dwordx4 s[4:7], s[30:31], 0x0 +; GFX10-NEXT: s_getpc_b64 s[30:31] +; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v4i64_inreg@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v4i64_inreg@rel32@hi+12 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX10-NEXT: v_readlane_b32 s30, v40, 8 +; GFX10-NEXT: v_readlane_b32 s31, v40, 9 +; GFX10-NEXT: v_readlane_b32 s11, v40, 7 +; GFX10-NEXT: v_readlane_b32 s10, v40, 6 +; GFX10-NEXT: v_readlane_b32 s9, v40, 5 +; GFX10-NEXT: v_readlane_b32 s8, v40, 4 +; GFX10-NEXT: v_readlane_b32 s7, v40, 3 +; GFX10-NEXT: v_readlane_b32 s6, v40, 2 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 +; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 -; GFX10-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-NEXT: s_or_saveexec_b32 s6, -1 +; GFX10-NEXT: v_readlane_b32 s33, v40, 10 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s6 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: s_setpc_b64 s[4:5] +; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v4i64_inreg: ; GFX10-SCRATCH: ; %bb.0: @@ -5694,31 +6070,47 @@ define amdgpu_gfx void @test_call_external_void_func_v4i64_inreg() #0 { ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 10 ; GFX10-SCRATCH-NEXT: s_mov_b64 s[0:1], 0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2 -; GFX10-SCRATCH-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x0 -; GFX10-SCRATCH-NEXT: s_mov_b32 s8, 1 -; GFX10-SCRATCH-NEXT: s_mov_b32 s9, 2 -; GFX10-SCRATCH-NEXT: s_mov_b32 s10, 3 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-SCRATCH-NEXT: s_mov_b32 s11, 4 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s6, 2 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s7, 3 +; GFX10-SCRATCH-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x0 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v4i64_inreg@rel32@lo+4 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v4i64_inreg@rel32@hi+12 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s8, 4 +; GFX10-SCRATCH-NEXT: s_mov_b32 s8, 1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s9, 5 +; GFX10-SCRATCH-NEXT: s_mov_b32 s9, 2 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s10, 6 +; GFX10-SCRATCH-NEXT: s_mov_b32 s10, 3 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s11, 7 +; GFX10-SCRATCH-NEXT: s_mov_b32 s11, 4 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 8 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 9 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 8 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 9 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s11, v40, 7 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s10, v40, 6 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s9, v40, 5 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s8, v40, 4 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s7, v40, 3 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s6, v40, 2 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 10 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) -; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1] +; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] %load = load <2 x i64>, <2 x i64> addrspace(4)* null %val = shufflevector <2 x i64> %load, <2 x i64> , <4 x i32> call amdgpu_gfx void @external_void_func_v4i64_inreg(<4 x i64> inreg %val) @@ -5729,57 +6121,61 @@ define amdgpu_gfx void @test_call_external_void_func_f16_imm_inreg() #0 { ; GFX9-LABEL: test_call_external_void_func_f16_imm_inreg: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[4:5] -; GFX9-NEXT: v_writelane_b32 v40, s33, 2 +; GFX9-NEXT: s_mov_b64 exec, s[34:35] +; GFX9-NEXT: v_writelane_b32 v40, s33, 3 +; GFX9-NEXT: v_writelane_b32 v40, s4, 0 ; GFX9-NEXT: s_mov_b32 s33, s32 ; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: v_writelane_b32 v40, s30, 1 ; GFX9-NEXT: s_movk_i32 s4, 0x4400 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[6:7] -; GFX9-NEXT: s_add_u32 s6, s6, external_void_func_f16_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s7, s7, external_void_func_f16_inreg@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[6:7] +; GFX9-NEXT: v_writelane_b32 v40, s31, 2 +; GFX9-NEXT: s_getpc_b64 s[30:31] +; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_f16_inreg@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_f16_inreg@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX9-NEXT: v_readlane_b32 s30, v40, 1 +; GFX9-NEXT: v_readlane_b32 s31, v40, 2 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 -; GFX9-NEXT: v_readlane_b32 s5, v40, 1 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 -; GFX9-NEXT: v_readlane_b32 s33, v40, 2 -; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GFX9-NEXT: v_readlane_b32 s33, v40, 3 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[6:7] +; GFX9-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: s_setpc_b64 s[4:5] +; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: test_call_external_void_func_f16_imm_inreg: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_or_saveexec_b32 s4, -1 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s4 -; GFX10-NEXT: v_writelane_b32 v40, s33, 2 -; GFX10-NEXT: s_movk_i32 s4, 0x4400 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 +; GFX10-NEXT: v_writelane_b32 v40, s33, 3 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: s_getpc_b64 s[6:7] -; GFX10-NEXT: s_add_u32 s6, s6, external_void_func_f16_inreg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s7, s7, external_void_func_f16_inreg@rel32@hi+12 -; GFX10-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[6:7] +; GFX10-NEXT: v_writelane_b32 v40, s4, 0 +; GFX10-NEXT: s_movk_i32 s4, 0x4400 +; GFX10-NEXT: v_writelane_b32 v40, s30, 1 +; GFX10-NEXT: v_writelane_b32 v40, s31, 2 +; GFX10-NEXT: s_getpc_b64 s[30:31] +; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_f16_inreg@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_f16_inreg@rel32@hi+12 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX10-NEXT: v_readlane_b32 s30, v40, 1 +; GFX10-NEXT: v_readlane_b32 s31, v40, 2 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 -; GFX10-NEXT: v_readlane_b32 s5, v40, 1 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 -; GFX10-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-NEXT: s_or_saveexec_b32 s6, -1 +; GFX10-NEXT: v_readlane_b32 s33, v40, 3 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s6 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: s_setpc_b64 s[4:5] +; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-SCRATCH-LABEL: test_call_external_void_func_f16_imm_inreg: ; GFX10-SCRATCH: ; %bb.0: @@ -5789,26 +6185,28 @@ define amdgpu_gfx void @test_call_external_void_func_f16_imm_inreg() #0 { ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2 -; GFX10-SCRATCH-NEXT: s_movk_i32 s4, 0x4400 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 3 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_f16_inreg@rel32@lo+4 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_f16_inreg@rel32@hi+12 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 +; GFX10-SCRATCH-NEXT: s_movk_i32 s4, 0x4400 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 2 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 2 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 3 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) -; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1] +; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] call amdgpu_gfx void @external_void_func_f16_inreg(half inreg 4.0) ret void } @@ -5817,57 +6215,61 @@ define amdgpu_gfx void @test_call_external_void_func_f32_imm_inreg() #0 { ; GFX9-LABEL: test_call_external_void_func_f32_imm_inreg: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[4:5] -; GFX9-NEXT: v_writelane_b32 v40, s33, 2 +; GFX9-NEXT: s_mov_b64 exec, s[34:35] +; GFX9-NEXT: v_writelane_b32 v40, s33, 3 +; GFX9-NEXT: v_writelane_b32 v40, s4, 0 ; GFX9-NEXT: s_mov_b32 s33, s32 ; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: v_writelane_b32 v40, s30, 1 ; GFX9-NEXT: s_mov_b32 s4, 4.0 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[6:7] -; GFX9-NEXT: s_add_u32 s6, s6, external_void_func_f32_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s7, s7, external_void_func_f32_inreg@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[6:7] +; GFX9-NEXT: v_writelane_b32 v40, s31, 2 +; GFX9-NEXT: s_getpc_b64 s[30:31] +; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_f32_inreg@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_f32_inreg@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX9-NEXT: v_readlane_b32 s30, v40, 1 +; GFX9-NEXT: v_readlane_b32 s31, v40, 2 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 -; GFX9-NEXT: v_readlane_b32 s5, v40, 1 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 -; GFX9-NEXT: v_readlane_b32 s33, v40, 2 -; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GFX9-NEXT: v_readlane_b32 s33, v40, 3 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[6:7] +; GFX9-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: s_setpc_b64 s[4:5] +; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: test_call_external_void_func_f32_imm_inreg: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_or_saveexec_b32 s4, -1 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s4 -; GFX10-NEXT: v_writelane_b32 v40, s33, 2 -; GFX10-NEXT: s_mov_b32 s4, 4.0 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 +; GFX10-NEXT: v_writelane_b32 v40, s33, 3 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: s_getpc_b64 s[6:7] -; GFX10-NEXT: s_add_u32 s6, s6, external_void_func_f32_inreg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s7, s7, external_void_func_f32_inreg@rel32@hi+12 -; GFX10-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[6:7] +; GFX10-NEXT: v_writelane_b32 v40, s4, 0 +; GFX10-NEXT: s_mov_b32 s4, 4.0 +; GFX10-NEXT: v_writelane_b32 v40, s30, 1 +; GFX10-NEXT: v_writelane_b32 v40, s31, 2 +; GFX10-NEXT: s_getpc_b64 s[30:31] +; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_f32_inreg@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_f32_inreg@rel32@hi+12 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX10-NEXT: v_readlane_b32 s30, v40, 1 +; GFX10-NEXT: v_readlane_b32 s31, v40, 2 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 -; GFX10-NEXT: v_readlane_b32 s5, v40, 1 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 -; GFX10-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-NEXT: s_or_saveexec_b32 s6, -1 +; GFX10-NEXT: v_readlane_b32 s33, v40, 3 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s6 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: s_setpc_b64 s[4:5] +; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-SCRATCH-LABEL: test_call_external_void_func_f32_imm_inreg: ; GFX10-SCRATCH: ; %bb.0: @@ -5877,26 +6279,28 @@ define amdgpu_gfx void @test_call_external_void_func_f32_imm_inreg() #0 { ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2 -; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 4.0 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 3 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_f32_inreg@rel32@lo+4 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_f32_inreg@rel32@hi+12 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 +; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 4.0 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 2 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 2 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 3 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) -; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1] +; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] call amdgpu_gfx void @external_void_func_f32_inreg(float inreg 4.0) ret void } @@ -5905,59 +6309,67 @@ define amdgpu_gfx void @test_call_external_void_func_v2f32_imm_inreg() #0 { ; GFX9-LABEL: test_call_external_void_func_v2f32_imm_inreg: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[4:5] -; GFX9-NEXT: v_writelane_b32 v40, s33, 2 +; GFX9-NEXT: s_mov_b64 exec, s[34:35] +; GFX9-NEXT: v_writelane_b32 v40, s33, 4 +; GFX9-NEXT: v_writelane_b32 v40, s4, 0 +; GFX9-NEXT: v_writelane_b32 v40, s5, 1 ; GFX9-NEXT: s_mov_b32 s33, s32 ; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: v_writelane_b32 v40, s30, 2 ; GFX9-NEXT: s_mov_b32 s4, 1.0 ; GFX9-NEXT: s_mov_b32 s5, 2.0 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[6:7] -; GFX9-NEXT: s_add_u32 s6, s6, external_void_func_v2f32_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s7, s7, external_void_func_v2f32_inreg@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[6:7] -; GFX9-NEXT: v_readlane_b32 s4, v40, 0 +; GFX9-NEXT: v_writelane_b32 v40, s31, 3 +; GFX9-NEXT: s_getpc_b64 s[30:31] +; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v2f32_inreg@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v2f32_inreg@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX9-NEXT: v_readlane_b32 s30, v40, 2 +; GFX9-NEXT: v_readlane_b32 s31, v40, 3 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 +; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 -; GFX9-NEXT: v_readlane_b32 s33, v40, 2 -; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GFX9-NEXT: v_readlane_b32 s33, v40, 4 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[6:7] +; GFX9-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: s_setpc_b64 s[4:5] +; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: test_call_external_void_func_v2f32_imm_inreg: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_or_saveexec_b32 s4, -1 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s4 -; GFX10-NEXT: v_writelane_b32 v40, s33, 2 -; GFX10-NEXT: s_mov_b32 s4, 1.0 -; GFX10-NEXT: s_mov_b32 s5, 2.0 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 +; GFX10-NEXT: v_writelane_b32 v40, s33, 4 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-NEXT: s_getpc_b64 s[6:7] -; GFX10-NEXT: s_add_u32 s6, s6, external_void_func_v2f32_inreg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s7, s7, external_void_func_v2f32_inreg@rel32@hi+12 -; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[6:7] -; GFX10-NEXT: v_readlane_b32 s4, v40, 0 +; GFX10-NEXT: v_writelane_b32 v40, s4, 0 +; GFX10-NEXT: s_mov_b32 s4, 1.0 +; GFX10-NEXT: v_writelane_b32 v40, s5, 1 +; GFX10-NEXT: s_mov_b32 s5, 2.0 +; GFX10-NEXT: v_writelane_b32 v40, s30, 2 +; GFX10-NEXT: v_writelane_b32 v40, s31, 3 +; GFX10-NEXT: s_getpc_b64 s[30:31] +; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v2f32_inreg@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v2f32_inreg@rel32@hi+12 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX10-NEXT: v_readlane_b32 s30, v40, 2 +; GFX10-NEXT: v_readlane_b32 s31, v40, 3 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 +; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 -; GFX10-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-NEXT: s_or_saveexec_b32 s6, -1 +; GFX10-NEXT: v_readlane_b32 s33, v40, 4 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s6 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: s_setpc_b64 s[4:5] +; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v2f32_imm_inreg: ; GFX10-SCRATCH: ; %bb.0: @@ -5967,27 +6379,31 @@ define amdgpu_gfx void @test_call_external_void_func_v2f32_imm_inreg() #0 { ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2 -; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 1.0 -; GFX10-SCRATCH-NEXT: s_mov_b32 s5, 2.0 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 4 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v2f32_inreg@rel32@lo+4 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v2f32_inreg@rel32@hi+12 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 +; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 1.0 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1 +; GFX10-SCRATCH-NEXT: s_mov_b32 s5, 2.0 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 2 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 3 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 2 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 3 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 4 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) -; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1] +; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] call amdgpu_gfx void @external_void_func_v2f32_inreg(<2 x float> inreg ) ret void } @@ -5996,61 +6412,73 @@ define amdgpu_gfx void @test_call_external_void_func_v3f32_imm_inreg() #0 { ; GFX9-LABEL: test_call_external_void_func_v3f32_imm_inreg: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[4:5] -; GFX9-NEXT: v_writelane_b32 v40, s33, 2 +; GFX9-NEXT: s_mov_b64 exec, s[34:35] +; GFX9-NEXT: v_writelane_b32 v40, s33, 5 +; GFX9-NEXT: v_writelane_b32 v40, s4, 0 +; GFX9-NEXT: v_writelane_b32 v40, s5, 1 +; GFX9-NEXT: v_writelane_b32 v40, s6, 2 ; GFX9-NEXT: s_mov_b32 s33, s32 ; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: v_writelane_b32 v40, s30, 3 ; GFX9-NEXT: s_mov_b32 s4, 1.0 ; GFX9-NEXT: s_mov_b32 s5, 2.0 ; GFX9-NEXT: s_mov_b32 s6, 4.0 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[8:9] -; GFX9-NEXT: s_add_u32 s8, s8, external_void_func_v3f32_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s9, s9, external_void_func_v3f32_inreg@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[8:9] -; GFX9-NEXT: v_readlane_b32 s4, v40, 0 +; GFX9-NEXT: v_writelane_b32 v40, s31, 4 +; GFX9-NEXT: s_getpc_b64 s[30:31] +; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v3f32_inreg@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v3f32_inreg@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX9-NEXT: v_readlane_b32 s30, v40, 3 +; GFX9-NEXT: v_readlane_b32 s31, v40, 4 +; GFX9-NEXT: v_readlane_b32 s6, v40, 2 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 +; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 -; GFX9-NEXT: v_readlane_b32 s33, v40, 2 -; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GFX9-NEXT: v_readlane_b32 s33, v40, 5 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[6:7] +; GFX9-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: s_setpc_b64 s[4:5] +; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: test_call_external_void_func_v3f32_imm_inreg: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_or_saveexec_b32 s4, -1 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s4 -; GFX10-NEXT: v_writelane_b32 v40, s33, 2 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 +; GFX10-NEXT: v_writelane_b32 v40, s33, 5 +; GFX10-NEXT: s_mov_b32 s33, s32 +; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: s_mov_b32 s4, 1.0 +; GFX10-NEXT: v_writelane_b32 v40, s5, 1 ; GFX10-NEXT: s_mov_b32 s5, 2.0 +; GFX10-NEXT: v_writelane_b32 v40, s6, 2 ; GFX10-NEXT: s_mov_b32 s6, 4.0 -; GFX10-NEXT: s_mov_b32 s33, s32 -; GFX10-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: s_getpc_b64 s[8:9] -; GFX10-NEXT: s_add_u32 s8, s8, external_void_func_v3f32_inreg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s9, s9, external_void_func_v3f32_inreg@rel32@hi+12 -; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[8:9] -; GFX10-NEXT: v_readlane_b32 s4, v40, 0 +; GFX10-NEXT: v_writelane_b32 v40, s30, 3 +; GFX10-NEXT: v_writelane_b32 v40, s31, 4 +; GFX10-NEXT: s_getpc_b64 s[30:31] +; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v3f32_inreg@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v3f32_inreg@rel32@hi+12 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX10-NEXT: v_readlane_b32 s30, v40, 3 +; GFX10-NEXT: v_readlane_b32 s31, v40, 4 +; GFX10-NEXT: v_readlane_b32 s6, v40, 2 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 +; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 -; GFX10-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-NEXT: s_or_saveexec_b32 s6, -1 +; GFX10-NEXT: v_readlane_b32 s33, v40, 5 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s6 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: s_setpc_b64 s[4:5] +; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v3f32_imm_inreg: ; GFX10-SCRATCH: ; %bb.0: @@ -6060,28 +6488,34 @@ define amdgpu_gfx void @test_call_external_void_func_v3f32_imm_inreg() #0 { ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2 -; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 1.0 -; GFX10-SCRATCH-NEXT: s_mov_b32 s5, 2.0 -; GFX10-SCRATCH-NEXT: s_mov_b32 s6, 4.0 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 5 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v3f32_inreg@rel32@lo+4 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v3f32_inreg@rel32@hi+12 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 +; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 1.0 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1 +; GFX10-SCRATCH-NEXT: s_mov_b32 s5, 2.0 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s6, 2 +; GFX10-SCRATCH-NEXT: s_mov_b32 s6, 4.0 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 3 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 4 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 3 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 4 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s6, v40, 2 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 5 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) -; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1] +; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] call amdgpu_gfx void @external_void_func_v3f32_inreg(<3 x float> inreg ) ret void } @@ -6090,65 +6524,85 @@ define amdgpu_gfx void @test_call_external_void_func_v5f32_imm_inreg() #0 { ; GFX9-LABEL: test_call_external_void_func_v5f32_imm_inreg: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[4:5] -; GFX9-NEXT: v_writelane_b32 v40, s33, 2 +; GFX9-NEXT: s_mov_b64 exec, s[34:35] +; GFX9-NEXT: v_writelane_b32 v40, s33, 7 +; GFX9-NEXT: v_writelane_b32 v40, s4, 0 +; GFX9-NEXT: v_writelane_b32 v40, s5, 1 +; GFX9-NEXT: v_writelane_b32 v40, s6, 2 +; GFX9-NEXT: v_writelane_b32 v40, s7, 3 +; GFX9-NEXT: v_writelane_b32 v40, s8, 4 ; GFX9-NEXT: s_mov_b32 s33, s32 ; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: v_writelane_b32 v40, s30, 5 ; GFX9-NEXT: s_mov_b32 s4, 1.0 ; GFX9-NEXT: s_mov_b32 s5, 2.0 ; GFX9-NEXT: s_mov_b32 s6, 4.0 ; GFX9-NEXT: s_mov_b32 s7, -1.0 ; GFX9-NEXT: s_mov_b32 s8, 0.5 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[10:11] -; GFX9-NEXT: s_add_u32 s10, s10, external_void_func_v5f32_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s11, s11, external_void_func_v5f32_inreg@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[10:11] -; GFX9-NEXT: v_readlane_b32 s4, v40, 0 +; GFX9-NEXT: v_writelane_b32 v40, s31, 6 +; GFX9-NEXT: s_getpc_b64 s[30:31] +; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v5f32_inreg@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v5f32_inreg@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX9-NEXT: v_readlane_b32 s30, v40, 5 +; GFX9-NEXT: v_readlane_b32 s31, v40, 6 +; GFX9-NEXT: v_readlane_b32 s8, v40, 4 +; GFX9-NEXT: v_readlane_b32 s7, v40, 3 +; GFX9-NEXT: v_readlane_b32 s6, v40, 2 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 +; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 -; GFX9-NEXT: v_readlane_b32 s33, v40, 2 -; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GFX9-NEXT: v_readlane_b32 s33, v40, 7 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[6:7] +; GFX9-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: s_setpc_b64 s[4:5] +; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: test_call_external_void_func_v5f32_imm_inreg: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_or_saveexec_b32 s4, -1 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s4 -; GFX10-NEXT: v_writelane_b32 v40, s33, 2 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 +; GFX10-NEXT: v_writelane_b32 v40, s33, 7 +; GFX10-NEXT: s_mov_b32 s33, s32 +; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: s_mov_b32 s4, 1.0 +; GFX10-NEXT: v_writelane_b32 v40, s5, 1 ; GFX10-NEXT: s_mov_b32 s5, 2.0 +; GFX10-NEXT: v_writelane_b32 v40, s6, 2 ; GFX10-NEXT: s_mov_b32 s6, 4.0 +; GFX10-NEXT: v_writelane_b32 v40, s7, 3 ; GFX10-NEXT: s_mov_b32 s7, -1.0 -; GFX10-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-NEXT: v_writelane_b32 v40, s8, 4 ; GFX10-NEXT: s_mov_b32 s8, 0.5 -; GFX10-NEXT: s_mov_b32 s33, s32 -; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: s_getpc_b64 s[10:11] -; GFX10-NEXT: s_add_u32 s10, s10, external_void_func_v5f32_inreg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s11, s11, external_void_func_v5f32_inreg@rel32@hi+12 -; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[10:11] -; GFX10-NEXT: v_readlane_b32 s4, v40, 0 +; GFX10-NEXT: v_writelane_b32 v40, s30, 5 +; GFX10-NEXT: v_writelane_b32 v40, s31, 6 +; GFX10-NEXT: s_getpc_b64 s[30:31] +; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v5f32_inreg@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v5f32_inreg@rel32@hi+12 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX10-NEXT: v_readlane_b32 s30, v40, 5 +; GFX10-NEXT: v_readlane_b32 s31, v40, 6 +; GFX10-NEXT: v_readlane_b32 s8, v40, 4 +; GFX10-NEXT: v_readlane_b32 s7, v40, 3 +; GFX10-NEXT: v_readlane_b32 s6, v40, 2 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 +; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 -; GFX10-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-NEXT: s_or_saveexec_b32 s6, -1 +; GFX10-NEXT: v_readlane_b32 s33, v40, 7 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s6 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: s_setpc_b64 s[4:5] +; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v5f32_imm_inreg: ; GFX10-SCRATCH: ; %bb.0: @@ -6158,30 +6612,40 @@ define amdgpu_gfx void @test_call_external_void_func_v5f32_imm_inreg() #0 { ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2 -; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 1.0 -; GFX10-SCRATCH-NEXT: s_mov_b32 s5, 2.0 -; GFX10-SCRATCH-NEXT: s_mov_b32 s6, 4.0 -; GFX10-SCRATCH-NEXT: s_mov_b32 s7, -1.0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-SCRATCH-NEXT: s_mov_b32 s8, 0.5 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 7 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v5f32_inreg@rel32@lo+4 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v5f32_inreg@rel32@hi+12 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 +; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 1.0 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1 +; GFX10-SCRATCH-NEXT: s_mov_b32 s5, 2.0 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s6, 2 +; GFX10-SCRATCH-NEXT: s_mov_b32 s6, 4.0 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s7, 3 +; GFX10-SCRATCH-NEXT: s_mov_b32 s7, -1.0 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s8, 4 +; GFX10-SCRATCH-NEXT: s_mov_b32 s8, 0.5 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 5 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 6 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 5 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 6 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s8, v40, 4 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s7, v40, 3 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s6, v40, 2 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 7 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) -; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1] +; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] call amdgpu_gfx void @external_void_func_v5f32_inreg(<5 x float> inreg ) ret void } @@ -6190,59 +6654,67 @@ define amdgpu_gfx void @test_call_external_void_func_f64_imm_inreg() #0 { ; GFX9-LABEL: test_call_external_void_func_f64_imm_inreg: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[4:5] -; GFX9-NEXT: v_writelane_b32 v40, s33, 2 +; GFX9-NEXT: s_mov_b64 exec, s[34:35] +; GFX9-NEXT: v_writelane_b32 v40, s33, 4 +; GFX9-NEXT: v_writelane_b32 v40, s4, 0 +; GFX9-NEXT: v_writelane_b32 v40, s5, 1 ; GFX9-NEXT: s_mov_b32 s33, s32 ; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: v_writelane_b32 v40, s30, 2 ; GFX9-NEXT: s_mov_b32 s4, 0 ; GFX9-NEXT: s_mov_b32 s5, 0x40100000 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[6:7] -; GFX9-NEXT: s_add_u32 s6, s6, external_void_func_f64_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s7, s7, external_void_func_f64_inreg@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[6:7] -; GFX9-NEXT: v_readlane_b32 s4, v40, 0 +; GFX9-NEXT: v_writelane_b32 v40, s31, 3 +; GFX9-NEXT: s_getpc_b64 s[30:31] +; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_f64_inreg@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_f64_inreg@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX9-NEXT: v_readlane_b32 s30, v40, 2 +; GFX9-NEXT: v_readlane_b32 s31, v40, 3 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 +; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 -; GFX9-NEXT: v_readlane_b32 s33, v40, 2 -; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GFX9-NEXT: v_readlane_b32 s33, v40, 4 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[6:7] +; GFX9-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: s_setpc_b64 s[4:5] +; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: test_call_external_void_func_f64_imm_inreg: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_or_saveexec_b32 s4, -1 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s4 -; GFX10-NEXT: v_writelane_b32 v40, s33, 2 -; GFX10-NEXT: s_mov_b32 s4, 0 -; GFX10-NEXT: s_mov_b32 s5, 0x40100000 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 +; GFX10-NEXT: v_writelane_b32 v40, s33, 4 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-NEXT: s_getpc_b64 s[6:7] -; GFX10-NEXT: s_add_u32 s6, s6, external_void_func_f64_inreg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s7, s7, external_void_func_f64_inreg@rel32@hi+12 -; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[6:7] -; GFX10-NEXT: v_readlane_b32 s4, v40, 0 +; GFX10-NEXT: v_writelane_b32 v40, s4, 0 +; GFX10-NEXT: s_mov_b32 s4, 0 +; GFX10-NEXT: v_writelane_b32 v40, s5, 1 +; GFX10-NEXT: s_mov_b32 s5, 0x40100000 +; GFX10-NEXT: v_writelane_b32 v40, s30, 2 +; GFX10-NEXT: v_writelane_b32 v40, s31, 3 +; GFX10-NEXT: s_getpc_b64 s[30:31] +; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_f64_inreg@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_f64_inreg@rel32@hi+12 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX10-NEXT: v_readlane_b32 s30, v40, 2 +; GFX10-NEXT: v_readlane_b32 s31, v40, 3 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 +; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 -; GFX10-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-NEXT: s_or_saveexec_b32 s6, -1 +; GFX10-NEXT: v_readlane_b32 s33, v40, 4 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s6 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: s_setpc_b64 s[4:5] +; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-SCRATCH-LABEL: test_call_external_void_func_f64_imm_inreg: ; GFX10-SCRATCH: ; %bb.0: @@ -6252,27 +6724,31 @@ define amdgpu_gfx void @test_call_external_void_func_f64_imm_inreg() #0 { ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2 -; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 0 -; GFX10-SCRATCH-NEXT: s_mov_b32 s5, 0x40100000 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 4 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_f64_inreg@rel32@lo+4 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_f64_inreg@rel32@hi+12 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 +; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 0 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1 +; GFX10-SCRATCH-NEXT: s_mov_b32 s5, 0x40100000 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 2 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 3 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 2 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 3 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 4 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) -; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1] +; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] call amdgpu_gfx void @external_void_func_f64_inreg(double inreg 4.0) ret void } @@ -6281,63 +6757,79 @@ define amdgpu_gfx void @test_call_external_void_func_v2f64_imm_inreg() #0 { ; GFX9-LABEL: test_call_external_void_func_v2f64_imm_inreg: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[4:5] -; GFX9-NEXT: v_writelane_b32 v40, s33, 2 +; GFX9-NEXT: s_mov_b64 exec, s[34:35] +; GFX9-NEXT: v_writelane_b32 v40, s33, 6 +; GFX9-NEXT: v_writelane_b32 v40, s4, 0 +; GFX9-NEXT: v_writelane_b32 v40, s5, 1 +; GFX9-NEXT: v_writelane_b32 v40, s6, 2 +; GFX9-NEXT: v_writelane_b32 v40, s7, 3 ; GFX9-NEXT: s_mov_b32 s33, s32 ; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: v_writelane_b32 v40, s30, 4 ; GFX9-NEXT: s_mov_b32 s4, 0 ; GFX9-NEXT: s_mov_b32 s5, 2.0 ; GFX9-NEXT: s_mov_b32 s6, 0 ; GFX9-NEXT: s_mov_b32 s7, 0x40100000 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[8:9] -; GFX9-NEXT: s_add_u32 s8, s8, external_void_func_v2f64_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s9, s9, external_void_func_v2f64_inreg@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[8:9] -; GFX9-NEXT: v_readlane_b32 s4, v40, 0 +; GFX9-NEXT: v_writelane_b32 v40, s31, 5 +; GFX9-NEXT: s_getpc_b64 s[30:31] +; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v2f64_inreg@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v2f64_inreg@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX9-NEXT: v_readlane_b32 s30, v40, 4 +; GFX9-NEXT: v_readlane_b32 s31, v40, 5 +; GFX9-NEXT: v_readlane_b32 s7, v40, 3 +; GFX9-NEXT: v_readlane_b32 s6, v40, 2 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 +; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 -; GFX9-NEXT: v_readlane_b32 s33, v40, 2 -; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GFX9-NEXT: v_readlane_b32 s33, v40, 6 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[6:7] +; GFX9-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: s_setpc_b64 s[4:5] +; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: test_call_external_void_func_v2f64_imm_inreg: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_or_saveexec_b32 s4, -1 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s4 -; GFX10-NEXT: v_writelane_b32 v40, s33, 2 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 +; GFX10-NEXT: v_writelane_b32 v40, s33, 6 +; GFX10-NEXT: s_mov_b32 s33, s32 +; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: s_mov_b32 s4, 0 +; GFX10-NEXT: v_writelane_b32 v40, s5, 1 ; GFX10-NEXT: s_mov_b32 s5, 2.0 +; GFX10-NEXT: v_writelane_b32 v40, s6, 2 ; GFX10-NEXT: s_mov_b32 s6, 0 +; GFX10-NEXT: v_writelane_b32 v40, s7, 3 ; GFX10-NEXT: s_mov_b32 s7, 0x40100000 -; GFX10-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-NEXT: s_mov_b32 s33, s32 -; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: s_getpc_b64 s[8:9] -; GFX10-NEXT: s_add_u32 s8, s8, external_void_func_v2f64_inreg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s9, s9, external_void_func_v2f64_inreg@rel32@hi+12 -; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[8:9] -; GFX10-NEXT: v_readlane_b32 s4, v40, 0 +; GFX10-NEXT: v_writelane_b32 v40, s30, 4 +; GFX10-NEXT: v_writelane_b32 v40, s31, 5 +; GFX10-NEXT: s_getpc_b64 s[30:31] +; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v2f64_inreg@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v2f64_inreg@rel32@hi+12 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX10-NEXT: v_readlane_b32 s30, v40, 4 +; GFX10-NEXT: v_readlane_b32 s31, v40, 5 +; GFX10-NEXT: v_readlane_b32 s7, v40, 3 +; GFX10-NEXT: v_readlane_b32 s6, v40, 2 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 +; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 -; GFX10-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-NEXT: s_or_saveexec_b32 s6, -1 +; GFX10-NEXT: v_readlane_b32 s33, v40, 6 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s6 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: s_setpc_b64 s[4:5] +; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v2f64_imm_inreg: ; GFX10-SCRATCH: ; %bb.0: @@ -6347,29 +6839,37 @@ define amdgpu_gfx void @test_call_external_void_func_v2f64_imm_inreg() #0 { ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2 -; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 0 -; GFX10-SCRATCH-NEXT: s_mov_b32 s5, 2.0 -; GFX10-SCRATCH-NEXT: s_mov_b32 s6, 0 -; GFX10-SCRATCH-NEXT: s_mov_b32 s7, 0x40100000 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 6 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v2f64_inreg@rel32@lo+4 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v2f64_inreg@rel32@hi+12 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 +; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 0 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1 +; GFX10-SCRATCH-NEXT: s_mov_b32 s5, 2.0 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s6, 2 +; GFX10-SCRATCH-NEXT: s_mov_b32 s6, 0 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s7, 3 +; GFX10-SCRATCH-NEXT: s_mov_b32 s7, 0x40100000 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 4 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 5 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 4 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 5 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s7, v40, 3 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s6, v40, 2 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 6 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) -; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1] +; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] call amdgpu_gfx void @external_void_func_v2f64_inreg(<2 x double> inreg ) ret void } @@ -6378,67 +6878,91 @@ define amdgpu_gfx void @test_call_external_void_func_v3f64_imm_inreg() #0 { ; GFX9-LABEL: test_call_external_void_func_v3f64_imm_inreg: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[4:5] -; GFX9-NEXT: v_writelane_b32 v40, s33, 2 +; GFX9-NEXT: s_mov_b64 exec, s[34:35] +; GFX9-NEXT: v_writelane_b32 v40, s33, 8 +; GFX9-NEXT: v_writelane_b32 v40, s4, 0 +; GFX9-NEXT: v_writelane_b32 v40, s5, 1 +; GFX9-NEXT: v_writelane_b32 v40, s6, 2 +; GFX9-NEXT: v_writelane_b32 v40, s7, 3 +; GFX9-NEXT: v_writelane_b32 v40, s8, 4 +; GFX9-NEXT: v_writelane_b32 v40, s9, 5 ; GFX9-NEXT: s_mov_b32 s33, s32 ; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: v_writelane_b32 v40, s30, 6 ; GFX9-NEXT: s_mov_b32 s4, 0 ; GFX9-NEXT: s_mov_b32 s5, 2.0 ; GFX9-NEXT: s_mov_b32 s6, 0 ; GFX9-NEXT: s_mov_b32 s7, 0x40100000 ; GFX9-NEXT: s_mov_b32 s8, 0 ; GFX9-NEXT: s_mov_b32 s9, 0x40200000 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[10:11] -; GFX9-NEXT: s_add_u32 s10, s10, external_void_func_v3f64_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s11, s11, external_void_func_v3f64_inreg@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[10:11] -; GFX9-NEXT: v_readlane_b32 s4, v40, 0 +; GFX9-NEXT: v_writelane_b32 v40, s31, 7 +; GFX9-NEXT: s_getpc_b64 s[30:31] +; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v3f64_inreg@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v3f64_inreg@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX9-NEXT: v_readlane_b32 s30, v40, 6 +; GFX9-NEXT: v_readlane_b32 s31, v40, 7 +; GFX9-NEXT: v_readlane_b32 s9, v40, 5 +; GFX9-NEXT: v_readlane_b32 s8, v40, 4 +; GFX9-NEXT: v_readlane_b32 s7, v40, 3 +; GFX9-NEXT: v_readlane_b32 s6, v40, 2 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 +; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 -; GFX9-NEXT: v_readlane_b32 s33, v40, 2 -; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GFX9-NEXT: v_readlane_b32 s33, v40, 8 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[6:7] +; GFX9-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: s_setpc_b64 s[4:5] +; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: test_call_external_void_func_v3f64_imm_inreg: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_or_saveexec_b32 s4, -1 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s4 -; GFX10-NEXT: v_writelane_b32 v40, s33, 2 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 +; GFX10-NEXT: v_writelane_b32 v40, s33, 8 +; GFX10-NEXT: s_mov_b32 s33, s32 +; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: s_mov_b32 s4, 0 +; GFX10-NEXT: v_writelane_b32 v40, s5, 1 ; GFX10-NEXT: s_mov_b32 s5, 2.0 +; GFX10-NEXT: v_writelane_b32 v40, s6, 2 ; GFX10-NEXT: s_mov_b32 s6, 0 +; GFX10-NEXT: v_writelane_b32 v40, s7, 3 ; GFX10-NEXT: s_mov_b32 s7, 0x40100000 -; GFX10-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-NEXT: v_writelane_b32 v40, s8, 4 ; GFX10-NEXT: s_mov_b32 s8, 0 +; GFX10-NEXT: v_writelane_b32 v40, s9, 5 ; GFX10-NEXT: s_mov_b32 s9, 0x40200000 -; GFX10-NEXT: s_mov_b32 s33, s32 -; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_getpc_b64 s[10:11] -; GFX10-NEXT: s_add_u32 s10, s10, external_void_func_v3f64_inreg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s11, s11, external_void_func_v3f64_inreg@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[10:11] -; GFX10-NEXT: v_readlane_b32 s4, v40, 0 +; GFX10-NEXT: v_writelane_b32 v40, s30, 6 +; GFX10-NEXT: v_writelane_b32 v40, s31, 7 +; GFX10-NEXT: s_getpc_b64 s[30:31] +; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v3f64_inreg@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v3f64_inreg@rel32@hi+12 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX10-NEXT: v_readlane_b32 s30, v40, 6 +; GFX10-NEXT: v_readlane_b32 s31, v40, 7 +; GFX10-NEXT: v_readlane_b32 s9, v40, 5 +; GFX10-NEXT: v_readlane_b32 s8, v40, 4 +; GFX10-NEXT: v_readlane_b32 s7, v40, 3 +; GFX10-NEXT: v_readlane_b32 s6, v40, 2 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 +; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 -; GFX10-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-NEXT: s_or_saveexec_b32 s6, -1 +; GFX10-NEXT: v_readlane_b32 s33, v40, 8 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s6 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: s_setpc_b64 s[4:5] +; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v3f64_imm_inreg: ; GFX10-SCRATCH: ; %bb.0: @@ -6448,31 +6972,43 @@ define amdgpu_gfx void @test_call_external_void_func_v3f64_imm_inreg() #0 { ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 8 +; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] +; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v3f64_inreg@rel32@lo+4 +; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v3f64_inreg@rel32@hi+12 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 0 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1 ; GFX10-SCRATCH-NEXT: s_mov_b32 s5, 2.0 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s6, 2 ; GFX10-SCRATCH-NEXT: s_mov_b32 s6, 0 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s7, 3 ; GFX10-SCRATCH-NEXT: s_mov_b32 s7, 0x40100000 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s8, 4 ; GFX10-SCRATCH-NEXT: s_mov_b32 s8, 0 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s9, 5 ; GFX10-SCRATCH-NEXT: s_mov_b32 s9, 0x40200000 -; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] -; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v3f64_inreg@rel32@lo+4 -; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v3f64_inreg@rel32@hi+12 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 6 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 7 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 6 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 7 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s9, v40, 5 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s8, v40, 4 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s7, v40, 3 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s6, v40, 2 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 8 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) -; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1] +; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] call amdgpu_gfx void @external_void_func_v3f64_inreg(<3 x double> inreg ) ret void } @@ -6481,57 +7017,61 @@ define amdgpu_gfx void @test_call_external_void_func_v2i16_inreg() #0 { ; GFX9-LABEL: test_call_external_void_func_v2i16_inreg: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[4:5] -; GFX9-NEXT: s_load_dword s4, s[4:5], 0x0 -; GFX9-NEXT: v_writelane_b32 v40, s33, 2 +; GFX9-NEXT: s_mov_b64 exec, s[34:35] +; GFX9-NEXT: v_writelane_b32 v40, s33, 3 +; GFX9-NEXT: v_writelane_b32 v40, s4, 0 +; GFX9-NEXT: v_writelane_b32 v40, s30, 1 +; GFX9-NEXT: s_load_dword s4, s[30:31], 0x0 ; GFX9-NEXT: s_mov_b32 s33, s32 ; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s30, 0 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[6:7] -; GFX9-NEXT: s_add_u32 s6, s6, external_void_func_v2i16_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s7, s7, external_void_func_v2i16_inreg@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[6:7] +; GFX9-NEXT: v_writelane_b32 v40, s31, 2 +; GFX9-NEXT: s_getpc_b64 s[30:31] +; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v2i16_inreg@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v2i16_inreg@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX9-NEXT: v_readlane_b32 s30, v40, 1 +; GFX9-NEXT: v_readlane_b32 s31, v40, 2 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 -; GFX9-NEXT: v_readlane_b32 s5, v40, 1 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 -; GFX9-NEXT: v_readlane_b32 s33, v40, 2 -; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GFX9-NEXT: v_readlane_b32 s33, v40, 3 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[6:7] +; GFX9-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: s_setpc_b64 s[4:5] +; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: test_call_external_void_func_v2i16_inreg: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_or_saveexec_b32 s4, -1 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s4 -; GFX10-NEXT: s_load_dword s4, s[4:5], 0x0 -; GFX10-NEXT: v_writelane_b32 v40, s33, 2 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 +; GFX10-NEXT: v_writelane_b32 v40, s33, 3 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: s_getpc_b64 s[6:7] -; GFX10-NEXT: s_add_u32 s6, s6, external_void_func_v2i16_inreg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s7, s7, external_void_func_v2i16_inreg@rel32@hi+12 -; GFX10-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[6:7] +; GFX10-NEXT: v_writelane_b32 v40, s4, 0 +; GFX10-NEXT: v_writelane_b32 v40, s30, 1 +; GFX10-NEXT: s_load_dword s4, s[30:31], 0x0 +; GFX10-NEXT: v_writelane_b32 v40, s31, 2 +; GFX10-NEXT: s_getpc_b64 s[30:31] +; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v2i16_inreg@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v2i16_inreg@rel32@hi+12 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX10-NEXT: v_readlane_b32 s30, v40, 1 +; GFX10-NEXT: v_readlane_b32 s31, v40, 2 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 -; GFX10-NEXT: v_readlane_b32 s5, v40, 1 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 -; GFX10-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-NEXT: s_or_saveexec_b32 s6, -1 +; GFX10-NEXT: v_readlane_b32 s33, v40, 3 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s6 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: s_setpc_b64 s[4:5] +; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v2i16_inreg: ; GFX10-SCRATCH: ; %bb.0: @@ -6541,26 +7081,28 @@ define amdgpu_gfx void @test_call_external_void_func_v2i16_inreg() #0 { ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 -; GFX10-SCRATCH-NEXT: s_load_dword s4, s[0:1], 0x0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 3 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 +; GFX10-SCRATCH-NEXT: s_load_dword s4, s[0:1], 0x0 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v2i16_inreg@rel32@lo+4 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v2i16_inreg@rel32@hi+12 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 2 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 2 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 3 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) -; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1] +; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] %val = load <2 x i16>, <2 x i16> addrspace(4)* undef call amdgpu_gfx void @external_void_func_v2i16_inreg(<2 x i16> inreg %val) ret void @@ -6570,57 +7112,65 @@ define amdgpu_gfx void @test_call_external_void_func_v3i16_inreg() #0 { ; GFX9-LABEL: test_call_external_void_func_v3i16_inreg: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[4:5] -; GFX9-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 -; GFX9-NEXT: v_writelane_b32 v40, s33, 2 +; GFX9-NEXT: s_mov_b64 exec, s[34:35] +; GFX9-NEXT: v_writelane_b32 v40, s33, 4 +; GFX9-NEXT: v_writelane_b32 v40, s4, 0 +; GFX9-NEXT: v_writelane_b32 v40, s5, 1 +; GFX9-NEXT: v_writelane_b32 v40, s30, 2 +; GFX9-NEXT: s_load_dwordx2 s[4:5], s[30:31], 0x0 ; GFX9-NEXT: s_mov_b32 s33, s32 ; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s30, 0 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[6:7] -; GFX9-NEXT: s_add_u32 s6, s6, external_void_func_v3i16_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s7, s7, external_void_func_v3i16_inreg@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[6:7] -; GFX9-NEXT: v_readlane_b32 s4, v40, 0 +; GFX9-NEXT: v_writelane_b32 v40, s31, 3 +; GFX9-NEXT: s_getpc_b64 s[30:31] +; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v3i16_inreg@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v3i16_inreg@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX9-NEXT: v_readlane_b32 s30, v40, 2 +; GFX9-NEXT: v_readlane_b32 s31, v40, 3 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 +; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 -; GFX9-NEXT: v_readlane_b32 s33, v40, 2 -; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GFX9-NEXT: v_readlane_b32 s33, v40, 4 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[6:7] +; GFX9-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: s_setpc_b64 s[4:5] +; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: test_call_external_void_func_v3i16_inreg: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_or_saveexec_b32 s4, -1 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s4 -; GFX10-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 -; GFX10-NEXT: v_writelane_b32 v40, s33, 2 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 +; GFX10-NEXT: v_writelane_b32 v40, s33, 4 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: s_getpc_b64 s[6:7] -; GFX10-NEXT: s_add_u32 s6, s6, external_void_func_v3i16_inreg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s7, s7, external_void_func_v3i16_inreg@rel32@hi+12 -; GFX10-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[6:7] -; GFX10-NEXT: v_readlane_b32 s4, v40, 0 +; GFX10-NEXT: v_writelane_b32 v40, s4, 0 +; GFX10-NEXT: v_writelane_b32 v40, s5, 1 +; GFX10-NEXT: v_writelane_b32 v40, s30, 2 +; GFX10-NEXT: s_load_dwordx2 s[4:5], s[30:31], 0x0 +; GFX10-NEXT: v_writelane_b32 v40, s31, 3 +; GFX10-NEXT: s_getpc_b64 s[30:31] +; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v3i16_inreg@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v3i16_inreg@rel32@hi+12 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX10-NEXT: v_readlane_b32 s30, v40, 2 +; GFX10-NEXT: v_readlane_b32 s31, v40, 3 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 +; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 -; GFX10-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-NEXT: s_or_saveexec_b32 s6, -1 +; GFX10-NEXT: v_readlane_b32 s33, v40, 4 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s6 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: s_setpc_b64 s[4:5] +; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v3i16_inreg: ; GFX10-SCRATCH: ; %bb.0: @@ -6630,26 +7180,30 @@ define amdgpu_gfx void @test_call_external_void_func_v3i16_inreg() #0 { ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 -; GFX10-SCRATCH-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 4 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1 +; GFX10-SCRATCH-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v3i16_inreg@rel32@lo+4 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v3i16_inreg@rel32@hi+12 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 2 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 3 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 2 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 3 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 4 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) -; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1] +; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] %val = load <3 x i16>, <3 x i16> addrspace(4)* undef call amdgpu_gfx void @external_void_func_v3i16_inreg(<3 x i16> inreg %val) ret void @@ -6659,57 +7213,65 @@ define amdgpu_gfx void @test_call_external_void_func_v3f16_inreg() #0 { ; GFX9-LABEL: test_call_external_void_func_v3f16_inreg: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[4:5] -; GFX9-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 -; GFX9-NEXT: v_writelane_b32 v40, s33, 2 +; GFX9-NEXT: s_mov_b64 exec, s[34:35] +; GFX9-NEXT: v_writelane_b32 v40, s33, 4 +; GFX9-NEXT: v_writelane_b32 v40, s4, 0 +; GFX9-NEXT: v_writelane_b32 v40, s5, 1 +; GFX9-NEXT: v_writelane_b32 v40, s30, 2 +; GFX9-NEXT: s_load_dwordx2 s[4:5], s[30:31], 0x0 ; GFX9-NEXT: s_mov_b32 s33, s32 ; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s30, 0 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[6:7] -; GFX9-NEXT: s_add_u32 s6, s6, external_void_func_v3f16_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s7, s7, external_void_func_v3f16_inreg@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[6:7] -; GFX9-NEXT: v_readlane_b32 s4, v40, 0 +; GFX9-NEXT: v_writelane_b32 v40, s31, 3 +; GFX9-NEXT: s_getpc_b64 s[30:31] +; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v3f16_inreg@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v3f16_inreg@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX9-NEXT: v_readlane_b32 s30, v40, 2 +; GFX9-NEXT: v_readlane_b32 s31, v40, 3 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 +; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 -; GFX9-NEXT: v_readlane_b32 s33, v40, 2 -; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GFX9-NEXT: v_readlane_b32 s33, v40, 4 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[6:7] +; GFX9-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: s_setpc_b64 s[4:5] +; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: test_call_external_void_func_v3f16_inreg: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_or_saveexec_b32 s4, -1 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s4 -; GFX10-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 -; GFX10-NEXT: v_writelane_b32 v40, s33, 2 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 +; GFX10-NEXT: v_writelane_b32 v40, s33, 4 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: s_getpc_b64 s[6:7] -; GFX10-NEXT: s_add_u32 s6, s6, external_void_func_v3f16_inreg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s7, s7, external_void_func_v3f16_inreg@rel32@hi+12 -; GFX10-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[6:7] -; GFX10-NEXT: v_readlane_b32 s4, v40, 0 +; GFX10-NEXT: v_writelane_b32 v40, s4, 0 +; GFX10-NEXT: v_writelane_b32 v40, s5, 1 +; GFX10-NEXT: v_writelane_b32 v40, s30, 2 +; GFX10-NEXT: s_load_dwordx2 s[4:5], s[30:31], 0x0 +; GFX10-NEXT: v_writelane_b32 v40, s31, 3 +; GFX10-NEXT: s_getpc_b64 s[30:31] +; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v3f16_inreg@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v3f16_inreg@rel32@hi+12 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX10-NEXT: v_readlane_b32 s30, v40, 2 +; GFX10-NEXT: v_readlane_b32 s31, v40, 3 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 +; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 -; GFX10-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-NEXT: s_or_saveexec_b32 s6, -1 +; GFX10-NEXT: v_readlane_b32 s33, v40, 4 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s6 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: s_setpc_b64 s[4:5] +; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v3f16_inreg: ; GFX10-SCRATCH: ; %bb.0: @@ -6719,26 +7281,30 @@ define amdgpu_gfx void @test_call_external_void_func_v3f16_inreg() #0 { ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 -; GFX10-SCRATCH-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 4 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1 +; GFX10-SCRATCH-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v3f16_inreg@rel32@lo+4 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v3f16_inreg@rel32@hi+12 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 2 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 3 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 2 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 3 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 4 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) -; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1] +; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] %val = load <3 x half>, <3 x half> addrspace(4)* undef call amdgpu_gfx void @external_void_func_v3f16_inreg(<3 x half> inreg %val) ret void @@ -6748,59 +7314,67 @@ define amdgpu_gfx void @test_call_external_void_func_v3i16_imm_inreg() #0 { ; GFX9-LABEL: test_call_external_void_func_v3i16_imm_inreg: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[4:5] -; GFX9-NEXT: v_writelane_b32 v40, s33, 2 +; GFX9-NEXT: s_mov_b64 exec, s[34:35] +; GFX9-NEXT: v_writelane_b32 v40, s33, 4 +; GFX9-NEXT: v_writelane_b32 v40, s4, 0 +; GFX9-NEXT: v_writelane_b32 v40, s5, 1 ; GFX9-NEXT: s_mov_b32 s33, s32 ; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: v_writelane_b32 v40, s30, 2 ; GFX9-NEXT: s_mov_b32 s4, 0x20001 ; GFX9-NEXT: s_mov_b32 s5, 3 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[6:7] -; GFX9-NEXT: s_add_u32 s6, s6, external_void_func_v3i16_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s7, s7, external_void_func_v3i16_inreg@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[6:7] -; GFX9-NEXT: v_readlane_b32 s4, v40, 0 +; GFX9-NEXT: v_writelane_b32 v40, s31, 3 +; GFX9-NEXT: s_getpc_b64 s[30:31] +; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v3i16_inreg@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v3i16_inreg@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX9-NEXT: v_readlane_b32 s30, v40, 2 +; GFX9-NEXT: v_readlane_b32 s31, v40, 3 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 +; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 -; GFX9-NEXT: v_readlane_b32 s33, v40, 2 -; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GFX9-NEXT: v_readlane_b32 s33, v40, 4 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[6:7] +; GFX9-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: s_setpc_b64 s[4:5] +; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: test_call_external_void_func_v3i16_imm_inreg: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_or_saveexec_b32 s4, -1 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s4 -; GFX10-NEXT: v_writelane_b32 v40, s33, 2 -; GFX10-NEXT: s_mov_b32 s4, 0x20001 -; GFX10-NEXT: s_mov_b32 s5, 3 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 +; GFX10-NEXT: v_writelane_b32 v40, s33, 4 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-NEXT: s_getpc_b64 s[6:7] -; GFX10-NEXT: s_add_u32 s6, s6, external_void_func_v3i16_inreg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s7, s7, external_void_func_v3i16_inreg@rel32@hi+12 -; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[6:7] -; GFX10-NEXT: v_readlane_b32 s4, v40, 0 +; GFX10-NEXT: v_writelane_b32 v40, s4, 0 +; GFX10-NEXT: s_mov_b32 s4, 0x20001 +; GFX10-NEXT: v_writelane_b32 v40, s5, 1 +; GFX10-NEXT: s_mov_b32 s5, 3 +; GFX10-NEXT: v_writelane_b32 v40, s30, 2 +; GFX10-NEXT: v_writelane_b32 v40, s31, 3 +; GFX10-NEXT: s_getpc_b64 s[30:31] +; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v3i16_inreg@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v3i16_inreg@rel32@hi+12 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX10-NEXT: v_readlane_b32 s30, v40, 2 +; GFX10-NEXT: v_readlane_b32 s31, v40, 3 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 +; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 -; GFX10-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-NEXT: s_or_saveexec_b32 s6, -1 +; GFX10-NEXT: v_readlane_b32 s33, v40, 4 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s6 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: s_setpc_b64 s[4:5] +; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v3i16_imm_inreg: ; GFX10-SCRATCH: ; %bb.0: @@ -6810,27 +7384,31 @@ define amdgpu_gfx void @test_call_external_void_func_v3i16_imm_inreg() #0 { ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2 -; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 0x20001 -; GFX10-SCRATCH-NEXT: s_mov_b32 s5, 3 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 4 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v3i16_inreg@rel32@lo+4 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v3i16_inreg@rel32@hi+12 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 +; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 0x20001 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1 +; GFX10-SCRATCH-NEXT: s_mov_b32 s5, 3 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 2 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 3 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 2 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 3 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 4 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) -; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1] +; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] call amdgpu_gfx void @external_void_func_v3i16_inreg(<3 x i16> inreg ) ret void } @@ -6839,59 +7417,67 @@ define amdgpu_gfx void @test_call_external_void_func_v3f16_imm_inreg() #0 { ; GFX9-LABEL: test_call_external_void_func_v3f16_imm_inreg: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[4:5] -; GFX9-NEXT: v_writelane_b32 v40, s33, 2 +; GFX9-NEXT: s_mov_b64 exec, s[34:35] +; GFX9-NEXT: v_writelane_b32 v40, s33, 4 +; GFX9-NEXT: v_writelane_b32 v40, s4, 0 +; GFX9-NEXT: v_writelane_b32 v40, s5, 1 ; GFX9-NEXT: s_mov_b32 s33, s32 ; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: v_writelane_b32 v40, s30, 2 ; GFX9-NEXT: s_mov_b32 s4, 0x40003c00 ; GFX9-NEXT: s_movk_i32 s5, 0x4400 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[6:7] -; GFX9-NEXT: s_add_u32 s6, s6, external_void_func_v3f16_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s7, s7, external_void_func_v3f16_inreg@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[6:7] -; GFX9-NEXT: v_readlane_b32 s4, v40, 0 +; GFX9-NEXT: v_writelane_b32 v40, s31, 3 +; GFX9-NEXT: s_getpc_b64 s[30:31] +; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v3f16_inreg@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v3f16_inreg@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX9-NEXT: v_readlane_b32 s30, v40, 2 +; GFX9-NEXT: v_readlane_b32 s31, v40, 3 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 +; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 -; GFX9-NEXT: v_readlane_b32 s33, v40, 2 -; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GFX9-NEXT: v_readlane_b32 s33, v40, 4 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[6:7] +; GFX9-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: s_setpc_b64 s[4:5] +; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: test_call_external_void_func_v3f16_imm_inreg: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_or_saveexec_b32 s4, -1 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s4 -; GFX10-NEXT: v_writelane_b32 v40, s33, 2 -; GFX10-NEXT: s_mov_b32 s4, 0x40003c00 -; GFX10-NEXT: s_movk_i32 s5, 0x4400 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 +; GFX10-NEXT: v_writelane_b32 v40, s33, 4 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-NEXT: s_getpc_b64 s[6:7] -; GFX10-NEXT: s_add_u32 s6, s6, external_void_func_v3f16_inreg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s7, s7, external_void_func_v3f16_inreg@rel32@hi+12 -; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[6:7] -; GFX10-NEXT: v_readlane_b32 s4, v40, 0 +; GFX10-NEXT: v_writelane_b32 v40, s4, 0 +; GFX10-NEXT: s_mov_b32 s4, 0x40003c00 +; GFX10-NEXT: v_writelane_b32 v40, s5, 1 +; GFX10-NEXT: s_movk_i32 s5, 0x4400 +; GFX10-NEXT: v_writelane_b32 v40, s30, 2 +; GFX10-NEXT: v_writelane_b32 v40, s31, 3 +; GFX10-NEXT: s_getpc_b64 s[30:31] +; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v3f16_inreg@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v3f16_inreg@rel32@hi+12 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX10-NEXT: v_readlane_b32 s30, v40, 2 +; GFX10-NEXT: v_readlane_b32 s31, v40, 3 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 +; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 -; GFX10-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-NEXT: s_or_saveexec_b32 s6, -1 +; GFX10-NEXT: v_readlane_b32 s33, v40, 4 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s6 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: s_setpc_b64 s[4:5] +; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v3f16_imm_inreg: ; GFX10-SCRATCH: ; %bb.0: @@ -6901,27 +7487,31 @@ define amdgpu_gfx void @test_call_external_void_func_v3f16_imm_inreg() #0 { ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2 -; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 0x40003c00 -; GFX10-SCRATCH-NEXT: s_movk_i32 s5, 0x4400 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 4 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v3f16_inreg@rel32@lo+4 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v3f16_inreg@rel32@hi+12 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 +; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 0x40003c00 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1 +; GFX10-SCRATCH-NEXT: s_movk_i32 s5, 0x4400 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 2 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 3 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 2 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 3 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 4 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) -; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1] +; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] call amdgpu_gfx void @external_void_func_v3f16_inreg(<3 x half> inreg ) ret void } @@ -6930,57 +7520,65 @@ define amdgpu_gfx void @test_call_external_void_func_v4i16_inreg() #0 { ; GFX9-LABEL: test_call_external_void_func_v4i16_inreg: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[4:5] -; GFX9-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 -; GFX9-NEXT: v_writelane_b32 v40, s33, 2 +; GFX9-NEXT: s_mov_b64 exec, s[34:35] +; GFX9-NEXT: v_writelane_b32 v40, s33, 4 +; GFX9-NEXT: v_writelane_b32 v40, s4, 0 +; GFX9-NEXT: v_writelane_b32 v40, s5, 1 +; GFX9-NEXT: v_writelane_b32 v40, s30, 2 +; GFX9-NEXT: s_load_dwordx2 s[4:5], s[30:31], 0x0 ; GFX9-NEXT: s_mov_b32 s33, s32 ; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s30, 0 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[6:7] -; GFX9-NEXT: s_add_u32 s6, s6, external_void_func_v4i16_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s7, s7, external_void_func_v4i16_inreg@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[6:7] -; GFX9-NEXT: v_readlane_b32 s4, v40, 0 +; GFX9-NEXT: v_writelane_b32 v40, s31, 3 +; GFX9-NEXT: s_getpc_b64 s[30:31] +; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v4i16_inreg@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v4i16_inreg@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX9-NEXT: v_readlane_b32 s30, v40, 2 +; GFX9-NEXT: v_readlane_b32 s31, v40, 3 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 +; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 -; GFX9-NEXT: v_readlane_b32 s33, v40, 2 -; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GFX9-NEXT: v_readlane_b32 s33, v40, 4 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[6:7] +; GFX9-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: s_setpc_b64 s[4:5] +; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: test_call_external_void_func_v4i16_inreg: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_or_saveexec_b32 s4, -1 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s4 -; GFX10-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 -; GFX10-NEXT: v_writelane_b32 v40, s33, 2 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 +; GFX10-NEXT: v_writelane_b32 v40, s33, 4 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: s_getpc_b64 s[6:7] -; GFX10-NEXT: s_add_u32 s6, s6, external_void_func_v4i16_inreg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s7, s7, external_void_func_v4i16_inreg@rel32@hi+12 -; GFX10-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[6:7] -; GFX10-NEXT: v_readlane_b32 s4, v40, 0 +; GFX10-NEXT: v_writelane_b32 v40, s4, 0 +; GFX10-NEXT: v_writelane_b32 v40, s5, 1 +; GFX10-NEXT: v_writelane_b32 v40, s30, 2 +; GFX10-NEXT: s_load_dwordx2 s[4:5], s[30:31], 0x0 +; GFX10-NEXT: v_writelane_b32 v40, s31, 3 +; GFX10-NEXT: s_getpc_b64 s[30:31] +; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v4i16_inreg@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v4i16_inreg@rel32@hi+12 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX10-NEXT: v_readlane_b32 s30, v40, 2 +; GFX10-NEXT: v_readlane_b32 s31, v40, 3 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 +; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 -; GFX10-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-NEXT: s_or_saveexec_b32 s6, -1 +; GFX10-NEXT: v_readlane_b32 s33, v40, 4 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s6 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: s_setpc_b64 s[4:5] +; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v4i16_inreg: ; GFX10-SCRATCH: ; %bb.0: @@ -6990,26 +7588,30 @@ define amdgpu_gfx void @test_call_external_void_func_v4i16_inreg() #0 { ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 -; GFX10-SCRATCH-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 4 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1 +; GFX10-SCRATCH-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v4i16_inreg@rel32@lo+4 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v4i16_inreg@rel32@hi+12 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 2 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 3 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 2 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 3 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 4 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) -; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1] +; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] %val = load <4 x i16>, <4 x i16> addrspace(4)* undef call amdgpu_gfx void @external_void_func_v4i16_inreg(<4 x i16> inreg %val) ret void @@ -7019,59 +7621,67 @@ define amdgpu_gfx void @test_call_external_void_func_v4i16_imm_inreg() #0 { ; GFX9-LABEL: test_call_external_void_func_v4i16_imm_inreg: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[4:5] -; GFX9-NEXT: v_writelane_b32 v40, s33, 2 +; GFX9-NEXT: s_mov_b64 exec, s[34:35] +; GFX9-NEXT: v_writelane_b32 v40, s33, 4 +; GFX9-NEXT: v_writelane_b32 v40, s4, 0 +; GFX9-NEXT: v_writelane_b32 v40, s5, 1 ; GFX9-NEXT: s_mov_b32 s33, s32 ; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: v_writelane_b32 v40, s30, 2 ; GFX9-NEXT: s_mov_b32 s4, 0x20001 ; GFX9-NEXT: s_mov_b32 s5, 0x40003 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[6:7] -; GFX9-NEXT: s_add_u32 s6, s6, external_void_func_v4i16_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s7, s7, external_void_func_v4i16_inreg@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[6:7] -; GFX9-NEXT: v_readlane_b32 s4, v40, 0 +; GFX9-NEXT: v_writelane_b32 v40, s31, 3 +; GFX9-NEXT: s_getpc_b64 s[30:31] +; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v4i16_inreg@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v4i16_inreg@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX9-NEXT: v_readlane_b32 s30, v40, 2 +; GFX9-NEXT: v_readlane_b32 s31, v40, 3 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 +; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 -; GFX9-NEXT: v_readlane_b32 s33, v40, 2 -; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GFX9-NEXT: v_readlane_b32 s33, v40, 4 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[6:7] +; GFX9-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: s_setpc_b64 s[4:5] +; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: test_call_external_void_func_v4i16_imm_inreg: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_or_saveexec_b32 s4, -1 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s4 -; GFX10-NEXT: v_writelane_b32 v40, s33, 2 -; GFX10-NEXT: s_mov_b32 s4, 0x20001 -; GFX10-NEXT: s_mov_b32 s5, 0x40003 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 +; GFX10-NEXT: v_writelane_b32 v40, s33, 4 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-NEXT: s_getpc_b64 s[6:7] -; GFX10-NEXT: s_add_u32 s6, s6, external_void_func_v4i16_inreg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s7, s7, external_void_func_v4i16_inreg@rel32@hi+12 -; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[6:7] -; GFX10-NEXT: v_readlane_b32 s4, v40, 0 +; GFX10-NEXT: v_writelane_b32 v40, s4, 0 +; GFX10-NEXT: s_mov_b32 s4, 0x20001 +; GFX10-NEXT: v_writelane_b32 v40, s5, 1 +; GFX10-NEXT: s_mov_b32 s5, 0x40003 +; GFX10-NEXT: v_writelane_b32 v40, s30, 2 +; GFX10-NEXT: v_writelane_b32 v40, s31, 3 +; GFX10-NEXT: s_getpc_b64 s[30:31] +; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v4i16_inreg@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v4i16_inreg@rel32@hi+12 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX10-NEXT: v_readlane_b32 s30, v40, 2 +; GFX10-NEXT: v_readlane_b32 s31, v40, 3 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 +; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 -; GFX10-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-NEXT: s_or_saveexec_b32 s6, -1 +; GFX10-NEXT: v_readlane_b32 s33, v40, 4 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s6 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: s_setpc_b64 s[4:5] +; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v4i16_imm_inreg: ; GFX10-SCRATCH: ; %bb.0: @@ -7081,27 +7691,31 @@ define amdgpu_gfx void @test_call_external_void_func_v4i16_imm_inreg() #0 { ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2 -; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 0x20001 -; GFX10-SCRATCH-NEXT: s_mov_b32 s5, 0x40003 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 4 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v4i16_inreg@rel32@lo+4 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v4i16_inreg@rel32@hi+12 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 +; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 0x20001 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1 +; GFX10-SCRATCH-NEXT: s_mov_b32 s5, 0x40003 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 2 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 3 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 2 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 3 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 4 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) -; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1] +; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] call amdgpu_gfx void @external_void_func_v4i16_inreg(<4 x i16> inreg ) ret void } @@ -7110,57 +7724,61 @@ define amdgpu_gfx void @test_call_external_void_func_v2f16_inreg() #0 { ; GFX9-LABEL: test_call_external_void_func_v2f16_inreg: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[4:5] -; GFX9-NEXT: s_load_dword s4, s[4:5], 0x0 -; GFX9-NEXT: v_writelane_b32 v40, s33, 2 +; GFX9-NEXT: s_mov_b64 exec, s[34:35] +; GFX9-NEXT: v_writelane_b32 v40, s33, 3 +; GFX9-NEXT: v_writelane_b32 v40, s4, 0 +; GFX9-NEXT: v_writelane_b32 v40, s30, 1 +; GFX9-NEXT: s_load_dword s4, s[30:31], 0x0 ; GFX9-NEXT: s_mov_b32 s33, s32 ; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s30, 0 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[6:7] -; GFX9-NEXT: s_add_u32 s6, s6, external_void_func_v2f16_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s7, s7, external_void_func_v2f16_inreg@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[6:7] +; GFX9-NEXT: v_writelane_b32 v40, s31, 2 +; GFX9-NEXT: s_getpc_b64 s[30:31] +; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v2f16_inreg@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v2f16_inreg@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX9-NEXT: v_readlane_b32 s30, v40, 1 +; GFX9-NEXT: v_readlane_b32 s31, v40, 2 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 -; GFX9-NEXT: v_readlane_b32 s5, v40, 1 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 -; GFX9-NEXT: v_readlane_b32 s33, v40, 2 -; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GFX9-NEXT: v_readlane_b32 s33, v40, 3 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[6:7] +; GFX9-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: s_setpc_b64 s[4:5] +; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: test_call_external_void_func_v2f16_inreg: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_or_saveexec_b32 s4, -1 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s4 -; GFX10-NEXT: s_load_dword s4, s[4:5], 0x0 -; GFX10-NEXT: v_writelane_b32 v40, s33, 2 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 +; GFX10-NEXT: v_writelane_b32 v40, s33, 3 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: s_getpc_b64 s[6:7] -; GFX10-NEXT: s_add_u32 s6, s6, external_void_func_v2f16_inreg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s7, s7, external_void_func_v2f16_inreg@rel32@hi+12 -; GFX10-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[6:7] +; GFX10-NEXT: v_writelane_b32 v40, s4, 0 +; GFX10-NEXT: v_writelane_b32 v40, s30, 1 +; GFX10-NEXT: s_load_dword s4, s[30:31], 0x0 +; GFX10-NEXT: v_writelane_b32 v40, s31, 2 +; GFX10-NEXT: s_getpc_b64 s[30:31] +; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v2f16_inreg@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v2f16_inreg@rel32@hi+12 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX10-NEXT: v_readlane_b32 s30, v40, 1 +; GFX10-NEXT: v_readlane_b32 s31, v40, 2 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 -; GFX10-NEXT: v_readlane_b32 s5, v40, 1 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 -; GFX10-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-NEXT: s_or_saveexec_b32 s6, -1 +; GFX10-NEXT: v_readlane_b32 s33, v40, 3 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s6 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: s_setpc_b64 s[4:5] +; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v2f16_inreg: ; GFX10-SCRATCH: ; %bb.0: @@ -7170,26 +7788,28 @@ define amdgpu_gfx void @test_call_external_void_func_v2f16_inreg() #0 { ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 -; GFX10-SCRATCH-NEXT: s_load_dword s4, s[0:1], 0x0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 3 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 +; GFX10-SCRATCH-NEXT: s_load_dword s4, s[0:1], 0x0 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v2f16_inreg@rel32@lo+4 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v2f16_inreg@rel32@hi+12 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 2 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 2 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 3 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) -; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1] +; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] %val = load <2 x half>, <2 x half> addrspace(4)* undef call amdgpu_gfx void @external_void_func_v2f16_inreg(<2 x half> inreg %val) ret void @@ -7199,57 +7819,65 @@ define amdgpu_gfx void @test_call_external_void_func_v2i32_inreg() #0 { ; GFX9-LABEL: test_call_external_void_func_v2i32_inreg: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[4:5] -; GFX9-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 -; GFX9-NEXT: v_writelane_b32 v40, s33, 2 +; GFX9-NEXT: s_mov_b64 exec, s[34:35] +; GFX9-NEXT: v_writelane_b32 v40, s33, 4 +; GFX9-NEXT: v_writelane_b32 v40, s4, 0 +; GFX9-NEXT: v_writelane_b32 v40, s5, 1 +; GFX9-NEXT: v_writelane_b32 v40, s30, 2 +; GFX9-NEXT: s_load_dwordx2 s[4:5], s[30:31], 0x0 ; GFX9-NEXT: s_mov_b32 s33, s32 ; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s30, 0 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[6:7] -; GFX9-NEXT: s_add_u32 s6, s6, external_void_func_v2i32_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s7, s7, external_void_func_v2i32_inreg@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[6:7] -; GFX9-NEXT: v_readlane_b32 s4, v40, 0 +; GFX9-NEXT: v_writelane_b32 v40, s31, 3 +; GFX9-NEXT: s_getpc_b64 s[30:31] +; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v2i32_inreg@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v2i32_inreg@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX9-NEXT: v_readlane_b32 s30, v40, 2 +; GFX9-NEXT: v_readlane_b32 s31, v40, 3 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 +; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 -; GFX9-NEXT: v_readlane_b32 s33, v40, 2 -; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GFX9-NEXT: v_readlane_b32 s33, v40, 4 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[6:7] +; GFX9-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: s_setpc_b64 s[4:5] +; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: test_call_external_void_func_v2i32_inreg: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_or_saveexec_b32 s4, -1 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s4 -; GFX10-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 -; GFX10-NEXT: v_writelane_b32 v40, s33, 2 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 +; GFX10-NEXT: v_writelane_b32 v40, s33, 4 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: s_getpc_b64 s[6:7] -; GFX10-NEXT: s_add_u32 s6, s6, external_void_func_v2i32_inreg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s7, s7, external_void_func_v2i32_inreg@rel32@hi+12 -; GFX10-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[6:7] -; GFX10-NEXT: v_readlane_b32 s4, v40, 0 +; GFX10-NEXT: v_writelane_b32 v40, s4, 0 +; GFX10-NEXT: v_writelane_b32 v40, s5, 1 +; GFX10-NEXT: v_writelane_b32 v40, s30, 2 +; GFX10-NEXT: s_load_dwordx2 s[4:5], s[30:31], 0x0 +; GFX10-NEXT: v_writelane_b32 v40, s31, 3 +; GFX10-NEXT: s_getpc_b64 s[30:31] +; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v2i32_inreg@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v2i32_inreg@rel32@hi+12 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX10-NEXT: v_readlane_b32 s30, v40, 2 +; GFX10-NEXT: v_readlane_b32 s31, v40, 3 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 +; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 -; GFX10-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-NEXT: s_or_saveexec_b32 s6, -1 +; GFX10-NEXT: v_readlane_b32 s33, v40, 4 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s6 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: s_setpc_b64 s[4:5] +; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v2i32_inreg: ; GFX10-SCRATCH: ; %bb.0: @@ -7259,26 +7887,30 @@ define amdgpu_gfx void @test_call_external_void_func_v2i32_inreg() #0 { ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 -; GFX10-SCRATCH-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 4 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1 +; GFX10-SCRATCH-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v2i32_inreg@rel32@lo+4 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v2i32_inreg@rel32@hi+12 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 2 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 3 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 2 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 3 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 4 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) -; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1] +; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] %val = load <2 x i32>, <2 x i32> addrspace(4)* undef call amdgpu_gfx void @external_void_func_v2i32_inreg(<2 x i32> inreg %val) ret void @@ -7288,59 +7920,67 @@ define amdgpu_gfx void @test_call_external_void_func_v2i32_imm_inreg() #0 { ; GFX9-LABEL: test_call_external_void_func_v2i32_imm_inreg: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[4:5] -; GFX9-NEXT: v_writelane_b32 v40, s33, 2 +; GFX9-NEXT: s_mov_b64 exec, s[34:35] +; GFX9-NEXT: v_writelane_b32 v40, s33, 4 +; GFX9-NEXT: v_writelane_b32 v40, s4, 0 +; GFX9-NEXT: v_writelane_b32 v40, s5, 1 ; GFX9-NEXT: s_mov_b32 s33, s32 ; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: v_writelane_b32 v40, s30, 2 ; GFX9-NEXT: s_mov_b32 s4, 1 ; GFX9-NEXT: s_mov_b32 s5, 2 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[6:7] -; GFX9-NEXT: s_add_u32 s6, s6, external_void_func_v2i32_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s7, s7, external_void_func_v2i32_inreg@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[6:7] -; GFX9-NEXT: v_readlane_b32 s4, v40, 0 +; GFX9-NEXT: v_writelane_b32 v40, s31, 3 +; GFX9-NEXT: s_getpc_b64 s[30:31] +; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v2i32_inreg@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v2i32_inreg@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX9-NEXT: v_readlane_b32 s30, v40, 2 +; GFX9-NEXT: v_readlane_b32 s31, v40, 3 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 +; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 -; GFX9-NEXT: v_readlane_b32 s33, v40, 2 -; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GFX9-NEXT: v_readlane_b32 s33, v40, 4 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[6:7] +; GFX9-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: s_setpc_b64 s[4:5] +; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: test_call_external_void_func_v2i32_imm_inreg: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_or_saveexec_b32 s4, -1 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s4 -; GFX10-NEXT: v_writelane_b32 v40, s33, 2 -; GFX10-NEXT: s_mov_b32 s4, 1 -; GFX10-NEXT: s_mov_b32 s5, 2 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 +; GFX10-NEXT: v_writelane_b32 v40, s33, 4 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-NEXT: s_getpc_b64 s[6:7] -; GFX10-NEXT: s_add_u32 s6, s6, external_void_func_v2i32_inreg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s7, s7, external_void_func_v2i32_inreg@rel32@hi+12 -; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[6:7] -; GFX10-NEXT: v_readlane_b32 s4, v40, 0 +; GFX10-NEXT: v_writelane_b32 v40, s4, 0 +; GFX10-NEXT: s_mov_b32 s4, 1 +; GFX10-NEXT: v_writelane_b32 v40, s5, 1 +; GFX10-NEXT: s_mov_b32 s5, 2 +; GFX10-NEXT: v_writelane_b32 v40, s30, 2 +; GFX10-NEXT: v_writelane_b32 v40, s31, 3 +; GFX10-NEXT: s_getpc_b64 s[30:31] +; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v2i32_inreg@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v2i32_inreg@rel32@hi+12 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX10-NEXT: v_readlane_b32 s30, v40, 2 +; GFX10-NEXT: v_readlane_b32 s31, v40, 3 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 +; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 -; GFX10-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-NEXT: s_or_saveexec_b32 s6, -1 +; GFX10-NEXT: v_readlane_b32 s33, v40, 4 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s6 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: s_setpc_b64 s[4:5] +; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v2i32_imm_inreg: ; GFX10-SCRATCH: ; %bb.0: @@ -7350,27 +7990,31 @@ define amdgpu_gfx void @test_call_external_void_func_v2i32_imm_inreg() #0 { ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2 -; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 1 -; GFX10-SCRATCH-NEXT: s_mov_b32 s5, 2 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 4 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v2i32_inreg@rel32@lo+4 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v2i32_inreg@rel32@hi+12 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 +; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1 +; GFX10-SCRATCH-NEXT: s_mov_b32 s5, 2 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 2 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 3 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 2 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 3 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 4 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) -; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1] +; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] call amdgpu_gfx void @external_void_func_v2i32_inreg(<2 x i32> inreg ) ret void } @@ -7379,61 +8023,73 @@ define amdgpu_gfx void @test_call_external_void_func_v3i32_imm_inreg(i32) #0 { ; GFX9-LABEL: test_call_external_void_func_v3i32_imm_inreg: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[4:5] -; GFX9-NEXT: v_writelane_b32 v40, s33, 2 +; GFX9-NEXT: s_mov_b64 exec, s[34:35] +; GFX9-NEXT: v_writelane_b32 v40, s33, 5 +; GFX9-NEXT: v_writelane_b32 v40, s4, 0 +; GFX9-NEXT: v_writelane_b32 v40, s5, 1 +; GFX9-NEXT: v_writelane_b32 v40, s6, 2 ; GFX9-NEXT: s_mov_b32 s33, s32 ; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: v_writelane_b32 v40, s30, 3 ; GFX9-NEXT: s_mov_b32 s4, 3 ; GFX9-NEXT: s_mov_b32 s5, 4 ; GFX9-NEXT: s_mov_b32 s6, 5 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[8:9] -; GFX9-NEXT: s_add_u32 s8, s8, external_void_func_v3i32_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s9, s9, external_void_func_v3i32_inreg@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[8:9] -; GFX9-NEXT: v_readlane_b32 s4, v40, 0 +; GFX9-NEXT: v_writelane_b32 v40, s31, 4 +; GFX9-NEXT: s_getpc_b64 s[30:31] +; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v3i32_inreg@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v3i32_inreg@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX9-NEXT: v_readlane_b32 s30, v40, 3 +; GFX9-NEXT: v_readlane_b32 s31, v40, 4 +; GFX9-NEXT: v_readlane_b32 s6, v40, 2 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 +; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 -; GFX9-NEXT: v_readlane_b32 s33, v40, 2 -; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GFX9-NEXT: v_readlane_b32 s33, v40, 5 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[6:7] +; GFX9-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: s_setpc_b64 s[4:5] +; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: test_call_external_void_func_v3i32_imm_inreg: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_or_saveexec_b32 s4, -1 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s4 -; GFX10-NEXT: v_writelane_b32 v40, s33, 2 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 +; GFX10-NEXT: v_writelane_b32 v40, s33, 5 +; GFX10-NEXT: s_mov_b32 s33, s32 +; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: s_mov_b32 s4, 3 +; GFX10-NEXT: v_writelane_b32 v40, s5, 1 ; GFX10-NEXT: s_mov_b32 s5, 4 +; GFX10-NEXT: v_writelane_b32 v40, s6, 2 ; GFX10-NEXT: s_mov_b32 s6, 5 -; GFX10-NEXT: s_mov_b32 s33, s32 -; GFX10-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: s_getpc_b64 s[8:9] -; GFX10-NEXT: s_add_u32 s8, s8, external_void_func_v3i32_inreg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s9, s9, external_void_func_v3i32_inreg@rel32@hi+12 -; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[8:9] -; GFX10-NEXT: v_readlane_b32 s4, v40, 0 +; GFX10-NEXT: v_writelane_b32 v40, s30, 3 +; GFX10-NEXT: v_writelane_b32 v40, s31, 4 +; GFX10-NEXT: s_getpc_b64 s[30:31] +; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v3i32_inreg@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v3i32_inreg@rel32@hi+12 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX10-NEXT: v_readlane_b32 s30, v40, 3 +; GFX10-NEXT: v_readlane_b32 s31, v40, 4 +; GFX10-NEXT: v_readlane_b32 s6, v40, 2 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 +; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 -; GFX10-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-NEXT: s_or_saveexec_b32 s6, -1 +; GFX10-NEXT: v_readlane_b32 s33, v40, 5 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s6 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: s_setpc_b64 s[4:5] +; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v3i32_imm_inreg: ; GFX10-SCRATCH: ; %bb.0: @@ -7443,28 +8099,34 @@ define amdgpu_gfx void @test_call_external_void_func_v3i32_imm_inreg(i32) #0 { ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2 -; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 3 -; GFX10-SCRATCH-NEXT: s_mov_b32 s5, 4 -; GFX10-SCRATCH-NEXT: s_mov_b32 s6, 5 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 5 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v3i32_inreg@rel32@lo+4 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v3i32_inreg@rel32@hi+12 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 +; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 3 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1 +; GFX10-SCRATCH-NEXT: s_mov_b32 s5, 4 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s6, 2 +; GFX10-SCRATCH-NEXT: s_mov_b32 s6, 5 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 3 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 4 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 3 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 4 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s6, v40, 2 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 5 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) -; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1] +; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] call amdgpu_gfx void @external_void_func_v3i32_inreg(<3 x i32> inreg ) ret void } @@ -7473,63 +8135,79 @@ define amdgpu_gfx void @test_call_external_void_func_v3i32_i32_inreg(i32) #0 { ; GFX9-LABEL: test_call_external_void_func_v3i32_i32_inreg: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[4:5] -; GFX9-NEXT: v_writelane_b32 v40, s33, 2 +; GFX9-NEXT: s_mov_b64 exec, s[34:35] +; GFX9-NEXT: v_writelane_b32 v40, s33, 6 +; GFX9-NEXT: v_writelane_b32 v40, s4, 0 +; GFX9-NEXT: v_writelane_b32 v40, s5, 1 +; GFX9-NEXT: v_writelane_b32 v40, s6, 2 +; GFX9-NEXT: v_writelane_b32 v40, s7, 3 ; GFX9-NEXT: s_mov_b32 s33, s32 ; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: v_writelane_b32 v40, s30, 4 ; GFX9-NEXT: s_mov_b32 s4, 3 ; GFX9-NEXT: s_mov_b32 s5, 4 ; GFX9-NEXT: s_mov_b32 s6, 5 ; GFX9-NEXT: s_mov_b32 s7, 6 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[8:9] -; GFX9-NEXT: s_add_u32 s8, s8, external_void_func_v3i32_i32_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s9, s9, external_void_func_v3i32_i32_inreg@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[8:9] -; GFX9-NEXT: v_readlane_b32 s4, v40, 0 +; GFX9-NEXT: v_writelane_b32 v40, s31, 5 +; GFX9-NEXT: s_getpc_b64 s[30:31] +; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v3i32_i32_inreg@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v3i32_i32_inreg@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX9-NEXT: v_readlane_b32 s30, v40, 4 +; GFX9-NEXT: v_readlane_b32 s31, v40, 5 +; GFX9-NEXT: v_readlane_b32 s7, v40, 3 +; GFX9-NEXT: v_readlane_b32 s6, v40, 2 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 +; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 -; GFX9-NEXT: v_readlane_b32 s33, v40, 2 -; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GFX9-NEXT: v_readlane_b32 s33, v40, 6 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[6:7] +; GFX9-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: s_setpc_b64 s[4:5] +; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: test_call_external_void_func_v3i32_i32_inreg: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_or_saveexec_b32 s4, -1 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s4 -; GFX10-NEXT: v_writelane_b32 v40, s33, 2 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 +; GFX10-NEXT: v_writelane_b32 v40, s33, 6 +; GFX10-NEXT: s_mov_b32 s33, s32 +; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: s_mov_b32 s4, 3 +; GFX10-NEXT: v_writelane_b32 v40, s5, 1 ; GFX10-NEXT: s_mov_b32 s5, 4 +; GFX10-NEXT: v_writelane_b32 v40, s6, 2 ; GFX10-NEXT: s_mov_b32 s6, 5 +; GFX10-NEXT: v_writelane_b32 v40, s7, 3 ; GFX10-NEXT: s_mov_b32 s7, 6 -; GFX10-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-NEXT: s_mov_b32 s33, s32 -; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: s_getpc_b64 s[8:9] -; GFX10-NEXT: s_add_u32 s8, s8, external_void_func_v3i32_i32_inreg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s9, s9, external_void_func_v3i32_i32_inreg@rel32@hi+12 -; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[8:9] -; GFX10-NEXT: v_readlane_b32 s4, v40, 0 +; GFX10-NEXT: v_writelane_b32 v40, s30, 4 +; GFX10-NEXT: v_writelane_b32 v40, s31, 5 +; GFX10-NEXT: s_getpc_b64 s[30:31] +; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v3i32_i32_inreg@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v3i32_i32_inreg@rel32@hi+12 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX10-NEXT: v_readlane_b32 s30, v40, 4 +; GFX10-NEXT: v_readlane_b32 s31, v40, 5 +; GFX10-NEXT: v_readlane_b32 s7, v40, 3 +; GFX10-NEXT: v_readlane_b32 s6, v40, 2 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 +; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 -; GFX10-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-NEXT: s_or_saveexec_b32 s6, -1 +; GFX10-NEXT: v_readlane_b32 s33, v40, 6 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s6 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: s_setpc_b64 s[4:5] +; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v3i32_i32_inreg: ; GFX10-SCRATCH: ; %bb.0: @@ -7539,29 +8217,37 @@ define amdgpu_gfx void @test_call_external_void_func_v3i32_i32_inreg(i32) #0 { ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2 -; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 3 -; GFX10-SCRATCH-NEXT: s_mov_b32 s5, 4 -; GFX10-SCRATCH-NEXT: s_mov_b32 s6, 5 -; GFX10-SCRATCH-NEXT: s_mov_b32 s7, 6 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 6 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v3i32_i32_inreg@rel32@lo+4 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v3i32_i32_inreg@rel32@hi+12 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 +; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 3 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1 +; GFX10-SCRATCH-NEXT: s_mov_b32 s5, 4 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s6, 2 +; GFX10-SCRATCH-NEXT: s_mov_b32 s6, 5 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s7, 3 +; GFX10-SCRATCH-NEXT: s_mov_b32 s7, 6 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 4 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 5 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 4 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 5 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s7, v40, 3 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s6, v40, 2 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 6 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) -; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1] +; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] call amdgpu_gfx void @external_void_func_v3i32_i32_inreg(<3 x i32> inreg , i32 inreg 6) ret void } @@ -7570,57 +8256,73 @@ define amdgpu_gfx void @test_call_external_void_func_v4i32_inreg() #0 { ; GFX9-LABEL: test_call_external_void_func_v4i32_inreg: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[4:5] -; GFX9-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x0 -; GFX9-NEXT: v_writelane_b32 v40, s33, 2 +; GFX9-NEXT: s_mov_b64 exec, s[34:35] +; GFX9-NEXT: v_writelane_b32 v40, s33, 6 +; GFX9-NEXT: v_writelane_b32 v40, s4, 0 +; GFX9-NEXT: v_writelane_b32 v40, s5, 1 +; GFX9-NEXT: v_writelane_b32 v40, s6, 2 +; GFX9-NEXT: v_writelane_b32 v40, s7, 3 +; GFX9-NEXT: v_writelane_b32 v40, s30, 4 +; GFX9-NEXT: s_load_dwordx4 s[4:7], s[30:31], 0x0 ; GFX9-NEXT: s_mov_b32 s33, s32 ; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s30, 0 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[8:9] -; GFX9-NEXT: s_add_u32 s8, s8, external_void_func_v4i32_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s9, s9, external_void_func_v4i32_inreg@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[8:9] -; GFX9-NEXT: v_readlane_b32 s4, v40, 0 +; GFX9-NEXT: v_writelane_b32 v40, s31, 5 +; GFX9-NEXT: s_getpc_b64 s[30:31] +; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v4i32_inreg@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v4i32_inreg@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX9-NEXT: v_readlane_b32 s30, v40, 4 +; GFX9-NEXT: v_readlane_b32 s31, v40, 5 +; GFX9-NEXT: v_readlane_b32 s7, v40, 3 +; GFX9-NEXT: v_readlane_b32 s6, v40, 2 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 +; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 -; GFX9-NEXT: v_readlane_b32 s33, v40, 2 -; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GFX9-NEXT: v_readlane_b32 s33, v40, 6 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[6:7] +; GFX9-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: s_setpc_b64 s[4:5] +; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: test_call_external_void_func_v4i32_inreg: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_or_saveexec_b32 s4, -1 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s4 -; GFX10-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x0 -; GFX10-NEXT: v_writelane_b32 v40, s33, 2 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 +; GFX10-NEXT: v_writelane_b32 v40, s33, 6 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: s_getpc_b64 s[8:9] -; GFX10-NEXT: s_add_u32 s8, s8, external_void_func_v4i32_inreg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s9, s9, external_void_func_v4i32_inreg@rel32@hi+12 -; GFX10-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[8:9] -; GFX10-NEXT: v_readlane_b32 s4, v40, 0 +; GFX10-NEXT: v_writelane_b32 v40, s4, 0 +; GFX10-NEXT: v_writelane_b32 v40, s5, 1 +; GFX10-NEXT: v_writelane_b32 v40, s6, 2 +; GFX10-NEXT: v_writelane_b32 v40, s7, 3 +; GFX10-NEXT: v_writelane_b32 v40, s30, 4 +; GFX10-NEXT: s_load_dwordx4 s[4:7], s[30:31], 0x0 +; GFX10-NEXT: v_writelane_b32 v40, s31, 5 +; GFX10-NEXT: s_getpc_b64 s[30:31] +; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v4i32_inreg@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v4i32_inreg@rel32@hi+12 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX10-NEXT: v_readlane_b32 s30, v40, 4 +; GFX10-NEXT: v_readlane_b32 s31, v40, 5 +; GFX10-NEXT: v_readlane_b32 s7, v40, 3 +; GFX10-NEXT: v_readlane_b32 s6, v40, 2 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 +; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 -; GFX10-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-NEXT: s_or_saveexec_b32 s6, -1 +; GFX10-NEXT: v_readlane_b32 s33, v40, 6 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s6 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: s_setpc_b64 s[4:5] +; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v4i32_inreg: ; GFX10-SCRATCH: ; %bb.0: @@ -7630,26 +8332,34 @@ define amdgpu_gfx void @test_call_external_void_func_v4i32_inreg() #0 { ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 -; GFX10-SCRATCH-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 6 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s6, 2 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s7, 3 +; GFX10-SCRATCH-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x0 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v4i32_inreg@rel32@lo+4 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v4i32_inreg@rel32@hi+12 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 4 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 5 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 4 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 5 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s7, v40, 3 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s6, v40, 2 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 6 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) -; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1] +; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] %val = load <4 x i32>, <4 x i32> addrspace(4)* undef call amdgpu_gfx void @external_void_func_v4i32_inreg(<4 x i32> inreg %val) ret void @@ -7659,63 +8369,79 @@ define amdgpu_gfx void @test_call_external_void_func_v4i32_imm_inreg() #0 { ; GFX9-LABEL: test_call_external_void_func_v4i32_imm_inreg: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[4:5] -; GFX9-NEXT: v_writelane_b32 v40, s33, 2 +; GFX9-NEXT: s_mov_b64 exec, s[34:35] +; GFX9-NEXT: v_writelane_b32 v40, s33, 6 +; GFX9-NEXT: v_writelane_b32 v40, s4, 0 +; GFX9-NEXT: v_writelane_b32 v40, s5, 1 +; GFX9-NEXT: v_writelane_b32 v40, s6, 2 +; GFX9-NEXT: v_writelane_b32 v40, s7, 3 ; GFX9-NEXT: s_mov_b32 s33, s32 ; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: v_writelane_b32 v40, s30, 4 ; GFX9-NEXT: s_mov_b32 s4, 1 ; GFX9-NEXT: s_mov_b32 s5, 2 ; GFX9-NEXT: s_mov_b32 s6, 3 ; GFX9-NEXT: s_mov_b32 s7, 4 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[8:9] -; GFX9-NEXT: s_add_u32 s8, s8, external_void_func_v4i32_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s9, s9, external_void_func_v4i32_inreg@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[8:9] -; GFX9-NEXT: v_readlane_b32 s4, v40, 0 +; GFX9-NEXT: v_writelane_b32 v40, s31, 5 +; GFX9-NEXT: s_getpc_b64 s[30:31] +; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v4i32_inreg@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v4i32_inreg@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX9-NEXT: v_readlane_b32 s30, v40, 4 +; GFX9-NEXT: v_readlane_b32 s31, v40, 5 +; GFX9-NEXT: v_readlane_b32 s7, v40, 3 +; GFX9-NEXT: v_readlane_b32 s6, v40, 2 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 +; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 -; GFX9-NEXT: v_readlane_b32 s33, v40, 2 -; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GFX9-NEXT: v_readlane_b32 s33, v40, 6 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[6:7] +; GFX9-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: s_setpc_b64 s[4:5] +; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: test_call_external_void_func_v4i32_imm_inreg: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_or_saveexec_b32 s4, -1 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s4 -; GFX10-NEXT: v_writelane_b32 v40, s33, 2 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 +; GFX10-NEXT: v_writelane_b32 v40, s33, 6 +; GFX10-NEXT: s_mov_b32 s33, s32 +; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: s_mov_b32 s4, 1 +; GFX10-NEXT: v_writelane_b32 v40, s5, 1 ; GFX10-NEXT: s_mov_b32 s5, 2 +; GFX10-NEXT: v_writelane_b32 v40, s6, 2 ; GFX10-NEXT: s_mov_b32 s6, 3 +; GFX10-NEXT: v_writelane_b32 v40, s7, 3 ; GFX10-NEXT: s_mov_b32 s7, 4 -; GFX10-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-NEXT: s_mov_b32 s33, s32 -; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: s_getpc_b64 s[8:9] -; GFX10-NEXT: s_add_u32 s8, s8, external_void_func_v4i32_inreg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s9, s9, external_void_func_v4i32_inreg@rel32@hi+12 -; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[8:9] -; GFX10-NEXT: v_readlane_b32 s4, v40, 0 +; GFX10-NEXT: v_writelane_b32 v40, s30, 4 +; GFX10-NEXT: v_writelane_b32 v40, s31, 5 +; GFX10-NEXT: s_getpc_b64 s[30:31] +; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v4i32_inreg@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v4i32_inreg@rel32@hi+12 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX10-NEXT: v_readlane_b32 s30, v40, 4 +; GFX10-NEXT: v_readlane_b32 s31, v40, 5 +; GFX10-NEXT: v_readlane_b32 s7, v40, 3 +; GFX10-NEXT: v_readlane_b32 s6, v40, 2 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 +; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 -; GFX10-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-NEXT: s_or_saveexec_b32 s6, -1 +; GFX10-NEXT: v_readlane_b32 s33, v40, 6 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s6 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: s_setpc_b64 s[4:5] +; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v4i32_imm_inreg: ; GFX10-SCRATCH: ; %bb.0: @@ -7725,29 +8451,37 @@ define amdgpu_gfx void @test_call_external_void_func_v4i32_imm_inreg() #0 { ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2 -; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 1 -; GFX10-SCRATCH-NEXT: s_mov_b32 s5, 2 -; GFX10-SCRATCH-NEXT: s_mov_b32 s6, 3 -; GFX10-SCRATCH-NEXT: s_mov_b32 s7, 4 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 6 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v4i32_inreg@rel32@lo+4 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v4i32_inreg@rel32@hi+12 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 +; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1 +; GFX10-SCRATCH-NEXT: s_mov_b32 s5, 2 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s6, 2 +; GFX10-SCRATCH-NEXT: s_mov_b32 s6, 3 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s7, 3 +; GFX10-SCRATCH-NEXT: s_mov_b32 s7, 4 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 4 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 5 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 4 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 5 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s7, v40, 3 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s6, v40, 2 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 6 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) -; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1] +; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] call amdgpu_gfx void @external_void_func_v4i32_inreg(<4 x i32> inreg ) ret void } @@ -7756,65 +8490,85 @@ define amdgpu_gfx void @test_call_external_void_func_v5i32_imm_inreg() #0 { ; GFX9-LABEL: test_call_external_void_func_v5i32_imm_inreg: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[4:5] -; GFX9-NEXT: v_writelane_b32 v40, s33, 2 +; GFX9-NEXT: s_mov_b64 exec, s[34:35] +; GFX9-NEXT: v_writelane_b32 v40, s33, 7 +; GFX9-NEXT: v_writelane_b32 v40, s4, 0 +; GFX9-NEXT: v_writelane_b32 v40, s5, 1 +; GFX9-NEXT: v_writelane_b32 v40, s6, 2 +; GFX9-NEXT: v_writelane_b32 v40, s7, 3 +; GFX9-NEXT: v_writelane_b32 v40, s8, 4 ; GFX9-NEXT: s_mov_b32 s33, s32 ; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: v_writelane_b32 v40, s30, 5 ; GFX9-NEXT: s_mov_b32 s4, 1 ; GFX9-NEXT: s_mov_b32 s5, 2 ; GFX9-NEXT: s_mov_b32 s6, 3 ; GFX9-NEXT: s_mov_b32 s7, 4 ; GFX9-NEXT: s_mov_b32 s8, 5 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[10:11] -; GFX9-NEXT: s_add_u32 s10, s10, external_void_func_v5i32_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s11, s11, external_void_func_v5i32_inreg@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[10:11] -; GFX9-NEXT: v_readlane_b32 s4, v40, 0 +; GFX9-NEXT: v_writelane_b32 v40, s31, 6 +; GFX9-NEXT: s_getpc_b64 s[30:31] +; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v5i32_inreg@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v5i32_inreg@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX9-NEXT: v_readlane_b32 s30, v40, 5 +; GFX9-NEXT: v_readlane_b32 s31, v40, 6 +; GFX9-NEXT: v_readlane_b32 s8, v40, 4 +; GFX9-NEXT: v_readlane_b32 s7, v40, 3 +; GFX9-NEXT: v_readlane_b32 s6, v40, 2 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 +; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 -; GFX9-NEXT: v_readlane_b32 s33, v40, 2 -; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GFX9-NEXT: v_readlane_b32 s33, v40, 7 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[6:7] +; GFX9-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: s_setpc_b64 s[4:5] +; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: test_call_external_void_func_v5i32_imm_inreg: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_or_saveexec_b32 s4, -1 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s4 -; GFX10-NEXT: v_writelane_b32 v40, s33, 2 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 +; GFX10-NEXT: v_writelane_b32 v40, s33, 7 +; GFX10-NEXT: s_mov_b32 s33, s32 +; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: s_mov_b32 s4, 1 +; GFX10-NEXT: v_writelane_b32 v40, s5, 1 ; GFX10-NEXT: s_mov_b32 s5, 2 +; GFX10-NEXT: v_writelane_b32 v40, s6, 2 ; GFX10-NEXT: s_mov_b32 s6, 3 +; GFX10-NEXT: v_writelane_b32 v40, s7, 3 ; GFX10-NEXT: s_mov_b32 s7, 4 -; GFX10-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-NEXT: v_writelane_b32 v40, s8, 4 ; GFX10-NEXT: s_mov_b32 s8, 5 -; GFX10-NEXT: s_mov_b32 s33, s32 -; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: s_getpc_b64 s[10:11] -; GFX10-NEXT: s_add_u32 s10, s10, external_void_func_v5i32_inreg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s11, s11, external_void_func_v5i32_inreg@rel32@hi+12 -; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[10:11] -; GFX10-NEXT: v_readlane_b32 s4, v40, 0 +; GFX10-NEXT: v_writelane_b32 v40, s30, 5 +; GFX10-NEXT: v_writelane_b32 v40, s31, 6 +; GFX10-NEXT: s_getpc_b64 s[30:31] +; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v5i32_inreg@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v5i32_inreg@rel32@hi+12 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX10-NEXT: v_readlane_b32 s30, v40, 5 +; GFX10-NEXT: v_readlane_b32 s31, v40, 6 +; GFX10-NEXT: v_readlane_b32 s8, v40, 4 +; GFX10-NEXT: v_readlane_b32 s7, v40, 3 +; GFX10-NEXT: v_readlane_b32 s6, v40, 2 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 +; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 -; GFX10-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-NEXT: s_or_saveexec_b32 s6, -1 +; GFX10-NEXT: v_readlane_b32 s33, v40, 7 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s6 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: s_setpc_b64 s[4:5] +; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v5i32_imm_inreg: ; GFX10-SCRATCH: ; %bb.0: @@ -7824,30 +8578,40 @@ define amdgpu_gfx void @test_call_external_void_func_v5i32_imm_inreg() #0 { ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2 -; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 1 -; GFX10-SCRATCH-NEXT: s_mov_b32 s5, 2 -; GFX10-SCRATCH-NEXT: s_mov_b32 s6, 3 -; GFX10-SCRATCH-NEXT: s_mov_b32 s7, 4 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-SCRATCH-NEXT: s_mov_b32 s8, 5 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 7 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v5i32_inreg@rel32@lo+4 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v5i32_inreg@rel32@hi+12 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 +; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1 +; GFX10-SCRATCH-NEXT: s_mov_b32 s5, 2 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s6, 2 +; GFX10-SCRATCH-NEXT: s_mov_b32 s6, 3 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s7, 3 +; GFX10-SCRATCH-NEXT: s_mov_b32 s7, 4 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s8, 4 +; GFX10-SCRATCH-NEXT: s_mov_b32 s8, 5 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 5 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 6 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 5 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 6 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s8, v40, 4 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s7, v40, 3 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s6, v40, 2 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 7 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) -; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1] +; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] call amdgpu_gfx void @external_void_func_v5i32_inreg(<5 x i32> inreg ) ret void } @@ -7856,61 +8620,93 @@ define amdgpu_gfx void @test_call_external_void_func_v8i32_inreg() #0 { ; GFX9-LABEL: test_call_external_void_func_v8i32_inreg: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[4:5] -; GFX9-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 -; GFX9-NEXT: v_writelane_b32 v40, s33, 2 +; GFX9-NEXT: s_mov_b64 exec, s[34:35] +; GFX9-NEXT: v_writelane_b32 v40, s33, 10 +; GFX9-NEXT: v_writelane_b32 v40, s4, 0 +; GFX9-NEXT: v_writelane_b32 v40, s5, 1 +; GFX9-NEXT: v_writelane_b32 v40, s6, 2 +; GFX9-NEXT: v_writelane_b32 v40, s7, 3 +; GFX9-NEXT: v_writelane_b32 v40, s8, 4 +; GFX9-NEXT: v_writelane_b32 v40, s9, 5 +; GFX9-NEXT: v_writelane_b32 v40, s10, 6 +; GFX9-NEXT: v_writelane_b32 v40, s11, 7 +; GFX9-NEXT: v_writelane_b32 v40, s30, 8 +; GFX9-NEXT: v_writelane_b32 v40, s31, 9 +; GFX9-NEXT: s_load_dwordx2 s[30:31], s[30:31], 0x0 ; GFX9-NEXT: s_mov_b32 s33, s32 ; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v8i32_inreg@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v8i32_inreg@rel32@hi+12 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: s_load_dwordx8 s[4:11], s[4:5], 0x0 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[12:13] -; GFX9-NEXT: s_add_u32 s12, s12, external_void_func_v8i32_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s13, s13, external_void_func_v8i32_inreg@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[12:13] -; GFX9-NEXT: v_readlane_b32 s4, v40, 0 +; GFX9-NEXT: s_load_dwordx8 s[4:11], s[30:31], 0x0 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] +; GFX9-NEXT: v_readlane_b32 s30, v40, 8 +; GFX9-NEXT: v_readlane_b32 s31, v40, 9 +; GFX9-NEXT: v_readlane_b32 s11, v40, 7 +; GFX9-NEXT: v_readlane_b32 s10, v40, 6 +; GFX9-NEXT: v_readlane_b32 s9, v40, 5 +; GFX9-NEXT: v_readlane_b32 s8, v40, 4 +; GFX9-NEXT: v_readlane_b32 s7, v40, 3 +; GFX9-NEXT: v_readlane_b32 s6, v40, 2 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 +; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 -; GFX9-NEXT: v_readlane_b32 s33, v40, 2 -; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GFX9-NEXT: v_readlane_b32 s33, v40, 10 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[6:7] +; GFX9-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: s_setpc_b64 s[4:5] +; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: test_call_external_void_func_v8i32_inreg: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_or_saveexec_b32 s4, -1 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s4 -; GFX10-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 -; GFX10-NEXT: v_writelane_b32 v40, s33, 2 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 +; GFX10-NEXT: v_writelane_b32 v40, s33, 10 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: s_getpc_b64 s[12:13] -; GFX10-NEXT: s_add_u32 s12, s12, external_void_func_v8i32_inreg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s13, s13, external_void_func_v8i32_inreg@rel32@hi+12 -; GFX10-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-NEXT: v_writelane_b32 v40, s31, 1 +; GFX10-NEXT: v_writelane_b32 v40, s4, 0 +; GFX10-NEXT: v_writelane_b32 v40, s5, 1 +; GFX10-NEXT: v_writelane_b32 v40, s6, 2 +; GFX10-NEXT: v_writelane_b32 v40, s7, 3 +; GFX10-NEXT: v_writelane_b32 v40, s8, 4 +; GFX10-NEXT: v_writelane_b32 v40, s9, 5 +; GFX10-NEXT: v_writelane_b32 v40, s10, 6 +; GFX10-NEXT: v_writelane_b32 v40, s11, 7 +; GFX10-NEXT: v_writelane_b32 v40, s30, 8 +; GFX10-NEXT: v_writelane_b32 v40, s31, 9 +; GFX10-NEXT: s_load_dwordx2 s[30:31], s[30:31], 0x0 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-NEXT: s_load_dwordx8 s[4:11], s[4:5], 0x0 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[12:13] -; GFX10-NEXT: v_readlane_b32 s4, v40, 0 +; GFX10-NEXT: s_load_dwordx8 s[4:11], s[30:31], 0x0 +; GFX10-NEXT: s_getpc_b64 s[30:31] +; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v8i32_inreg@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v8i32_inreg@rel32@hi+12 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX10-NEXT: v_readlane_b32 s30, v40, 8 +; GFX10-NEXT: v_readlane_b32 s31, v40, 9 +; GFX10-NEXT: v_readlane_b32 s11, v40, 7 +; GFX10-NEXT: v_readlane_b32 s10, v40, 6 +; GFX10-NEXT: v_readlane_b32 s9, v40, 5 +; GFX10-NEXT: v_readlane_b32 s8, v40, 4 +; GFX10-NEXT: v_readlane_b32 s7, v40, 3 +; GFX10-NEXT: v_readlane_b32 s6, v40, 2 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 +; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 -; GFX10-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-NEXT: s_or_saveexec_b32 s6, -1 +; GFX10-NEXT: v_readlane_b32 s33, v40, 10 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s6 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: s_setpc_b64 s[4:5] +; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v8i32_inreg: ; GFX10-SCRATCH: ; %bb.0: @@ -7920,28 +8716,44 @@ define amdgpu_gfx void @test_call_external_void_func_v8i32_inreg() #0 { ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 10 ; GFX10-SCRATCH-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s6, 2 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s7, 3 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s8, 4 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s9, 5 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s10, 6 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s11, 7 ; GFX10-SCRATCH-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-SCRATCH-NEXT: s_load_dwordx8 s[4:11], s[0:1], 0x0 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v8i32_inreg@rel32@lo+4 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v8i32_inreg@rel32@hi+12 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 8 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 9 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 8 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 9 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s11, v40, 7 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s10, v40, 6 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s9, v40, 5 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s8, v40, 4 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s7, v40, 3 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s6, v40, 2 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 10 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) -; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1] +; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] %ptr = load <8 x i32> addrspace(4)*, <8 x i32> addrspace(4)* addrspace(4)* undef %val = load <8 x i32>, <8 x i32> addrspace(4)* %ptr call amdgpu_gfx void @external_void_func_v8i32_inreg(<8 x i32> inreg %val) @@ -7952,13 +8764,21 @@ define amdgpu_gfx void @test_call_external_void_func_v8i32_imm_inreg() #0 { ; GFX9-LABEL: test_call_external_void_func_v8i32_imm_inreg: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1 -; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[4:5] -; GFX9-NEXT: v_writelane_b32 v40, s33, 2 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill +; GFX9-NEXT: s_mov_b64 exec, s[34:35] +; GFX9-NEXT: v_writelane_b32 v40, s33, 10 +; GFX9-NEXT: v_writelane_b32 v40, s4, 0 +; GFX9-NEXT: v_writelane_b32 v40, s5, 1 +; GFX9-NEXT: v_writelane_b32 v40, s6, 2 +; GFX9-NEXT: v_writelane_b32 v40, s7, 3 +; GFX9-NEXT: v_writelane_b32 v40, s8, 4 +; GFX9-NEXT: v_writelane_b32 v40, s9, 5 +; GFX9-NEXT: v_writelane_b32 v40, s10, 6 +; GFX9-NEXT: v_writelane_b32 v40, s11, 7 ; GFX9-NEXT: s_mov_b32 s33, s32 ; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: v_writelane_b32 v40, s30, 8 ; GFX9-NEXT: s_mov_b32 s4, 1 ; GFX9-NEXT: s_mov_b32 s5, 2 ; GFX9-NEXT: s_mov_b32 s6, 3 @@ -7967,56 +8787,80 @@ define amdgpu_gfx void @test_call_external_void_func_v8i32_imm_inreg() #0 { ; GFX9-NEXT: s_mov_b32 s9, 6 ; GFX9-NEXT: s_mov_b32 s10, 7 ; GFX9-NEXT: s_mov_b32 s11, 8 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[12:13] -; GFX9-NEXT: s_add_u32 s12, s12, external_void_func_v8i32_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s13, s13, external_void_func_v8i32_inreg@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[12:13] -; GFX9-NEXT: v_readlane_b32 s4, v40, 0 +; GFX9-NEXT: v_writelane_b32 v40, s31, 9 +; GFX9-NEXT: s_getpc_b64 s[30:31] +; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v8i32_inreg@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v8i32_inreg@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX9-NEXT: v_readlane_b32 s30, v40, 8 +; GFX9-NEXT: v_readlane_b32 s31, v40, 9 +; GFX9-NEXT: v_readlane_b32 s11, v40, 7 +; GFX9-NEXT: v_readlane_b32 s10, v40, 6 +; GFX9-NEXT: v_readlane_b32 s9, v40, 5 +; GFX9-NEXT: v_readlane_b32 s8, v40, 4 +; GFX9-NEXT: v_readlane_b32 s7, v40, 3 +; GFX9-NEXT: v_readlane_b32 s6, v40, 2 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 +; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 -; GFX9-NEXT: v_readlane_b32 s33, v40, 2 -; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GFX9-NEXT: v_readlane_b32 s33, v40, 10 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[6:7] +; GFX9-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: s_setpc_b64 s[4:5] +; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: test_call_external_void_func_v8i32_imm_inreg: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_or_saveexec_b32 s4, -1 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s4 -; GFX10-NEXT: v_writelane_b32 v40, s33, 2 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 +; GFX10-NEXT: v_writelane_b32 v40, s33, 10 +; GFX10-NEXT: s_mov_b32 s33, s32 +; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: s_mov_b32 s4, 1 +; GFX10-NEXT: v_writelane_b32 v40, s5, 1 ; GFX10-NEXT: s_mov_b32 s5, 2 +; GFX10-NEXT: v_writelane_b32 v40, s6, 2 ; GFX10-NEXT: s_mov_b32 s6, 3 +; GFX10-NEXT: v_writelane_b32 v40, s7, 3 ; GFX10-NEXT: s_mov_b32 s7, 4 -; GFX10-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-NEXT: v_writelane_b32 v40, s8, 4 ; GFX10-NEXT: s_mov_b32 s8, 5 +; GFX10-NEXT: v_writelane_b32 v40, s9, 5 ; GFX10-NEXT: s_mov_b32 s9, 6 +; GFX10-NEXT: v_writelane_b32 v40, s10, 6 ; GFX10-NEXT: s_mov_b32 s10, 7 +; GFX10-NEXT: v_writelane_b32 v40, s11, 7 ; GFX10-NEXT: s_mov_b32 s11, 8 -; GFX10-NEXT: s_mov_b32 s33, s32 -; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_getpc_b64 s[12:13] -; GFX10-NEXT: s_add_u32 s12, s12, external_void_func_v8i32_inreg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s13, s13, external_void_func_v8i32_inreg@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[12:13] -; GFX10-NEXT: v_readlane_b32 s4, v40, 0 +; GFX10-NEXT: v_writelane_b32 v40, s30, 8 +; GFX10-NEXT: v_writelane_b32 v40, s31, 9 +; GFX10-NEXT: s_getpc_b64 s[30:31] +; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v8i32_inreg@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v8i32_inreg@rel32@hi+12 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX10-NEXT: v_readlane_b32 s30, v40, 8 +; GFX10-NEXT: v_readlane_b32 s31, v40, 9 +; GFX10-NEXT: v_readlane_b32 s11, v40, 7 +; GFX10-NEXT: v_readlane_b32 s10, v40, 6 +; GFX10-NEXT: v_readlane_b32 s9, v40, 5 +; GFX10-NEXT: v_readlane_b32 s8, v40, 4 +; GFX10-NEXT: v_readlane_b32 s7, v40, 3 +; GFX10-NEXT: v_readlane_b32 s6, v40, 2 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 +; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 -; GFX10-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-NEXT: s_or_saveexec_b32 s6, -1 +; GFX10-NEXT: v_readlane_b32 s33, v40, 10 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s6 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: s_setpc_b64 s[4:5] +; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v8i32_imm_inreg: ; GFX10-SCRATCH: ; %bb.0: @@ -8026,33 +8870,49 @@ define amdgpu_gfx void @test_call_external_void_func_v8i32_imm_inreg() #0 { ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 10 +; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 +; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 +; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] +; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v8i32_inreg@rel32@lo+4 +; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v8i32_inreg@rel32@hi+12 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s4, 1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1 ; GFX10-SCRATCH-NEXT: s_mov_b32 s5, 2 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s6, 2 ; GFX10-SCRATCH-NEXT: s_mov_b32 s6, 3 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s7, 3 ; GFX10-SCRATCH-NEXT: s_mov_b32 s7, 4 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s8, 4 ; GFX10-SCRATCH-NEXT: s_mov_b32 s8, 5 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s9, 5 ; GFX10-SCRATCH-NEXT: s_mov_b32 s9, 6 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s10, 6 ; GFX10-SCRATCH-NEXT: s_mov_b32 s10, 7 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s11, 7 ; GFX10-SCRATCH-NEXT: s_mov_b32 s11, 8 -; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 -; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] -; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v8i32_inreg@rel32@lo+4 -; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v8i32_inreg@rel32@hi+12 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 8 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 9 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 8 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 9 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s11, v40, 7 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s10, v40, 6 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s9, v40, 5 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s8, v40, 4 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s7, v40, 3 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s6, v40, 2 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 10 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) -; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1] +; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] call amdgpu_gfx void @external_void_func_v8i32_inreg(<8 x i32> inreg ) ret void } @@ -8061,61 +8921,125 @@ define amdgpu_gfx void @test_call_external_void_func_v16i32_inreg() #0 { ; GFX9-LABEL: test_call_external_void_func_v16i32_inreg: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[4:5] -; GFX9-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 -; GFX9-NEXT: v_writelane_b32 v40, s33, 2 +; GFX9-NEXT: s_mov_b64 exec, s[34:35] +; GFX9-NEXT: v_writelane_b32 v40, s33, 18 +; GFX9-NEXT: v_writelane_b32 v40, s4, 0 +; GFX9-NEXT: v_writelane_b32 v40, s5, 1 +; GFX9-NEXT: v_writelane_b32 v40, s6, 2 +; GFX9-NEXT: v_writelane_b32 v40, s7, 3 +; GFX9-NEXT: v_writelane_b32 v40, s8, 4 +; GFX9-NEXT: v_writelane_b32 v40, s9, 5 +; GFX9-NEXT: v_writelane_b32 v40, s10, 6 +; GFX9-NEXT: v_writelane_b32 v40, s11, 7 +; GFX9-NEXT: v_writelane_b32 v40, s12, 8 +; GFX9-NEXT: v_writelane_b32 v40, s13, 9 +; GFX9-NEXT: v_writelane_b32 v40, s14, 10 +; GFX9-NEXT: v_writelane_b32 v40, s15, 11 +; GFX9-NEXT: v_writelane_b32 v40, s16, 12 +; GFX9-NEXT: v_writelane_b32 v40, s17, 13 +; GFX9-NEXT: v_writelane_b32 v40, s18, 14 +; GFX9-NEXT: v_writelane_b32 v40, s19, 15 +; GFX9-NEXT: v_writelane_b32 v40, s30, 16 +; GFX9-NEXT: v_writelane_b32 v40, s31, 17 +; GFX9-NEXT: s_load_dwordx2 s[30:31], s[30:31], 0x0 ; GFX9-NEXT: s_mov_b32 s33, s32 ; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v16i32_inreg@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v16i32_inreg@rel32@hi+12 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: s_load_dwordx16 s[4:19], s[4:5], 0x0 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[20:21] -; GFX9-NEXT: s_add_u32 s20, s20, external_void_func_v16i32_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s21, s21, external_void_func_v16i32_inreg@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[20:21] -; GFX9-NEXT: v_readlane_b32 s4, v40, 0 +; GFX9-NEXT: s_load_dwordx16 s[4:19], s[30:31], 0x0 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] +; GFX9-NEXT: v_readlane_b32 s30, v40, 16 +; GFX9-NEXT: v_readlane_b32 s31, v40, 17 +; GFX9-NEXT: v_readlane_b32 s19, v40, 15 +; GFX9-NEXT: v_readlane_b32 s18, v40, 14 +; GFX9-NEXT: v_readlane_b32 s17, v40, 13 +; GFX9-NEXT: v_readlane_b32 s16, v40, 12 +; GFX9-NEXT: v_readlane_b32 s15, v40, 11 +; GFX9-NEXT: v_readlane_b32 s14, v40, 10 +; GFX9-NEXT: v_readlane_b32 s13, v40, 9 +; GFX9-NEXT: v_readlane_b32 s12, v40, 8 +; GFX9-NEXT: v_readlane_b32 s11, v40, 7 +; GFX9-NEXT: v_readlane_b32 s10, v40, 6 +; GFX9-NEXT: v_readlane_b32 s9, v40, 5 +; GFX9-NEXT: v_readlane_b32 s8, v40, 4 +; GFX9-NEXT: v_readlane_b32 s7, v40, 3 +; GFX9-NEXT: v_readlane_b32 s6, v40, 2 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 +; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 -; GFX9-NEXT: v_readlane_b32 s33, v40, 2 -; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GFX9-NEXT: v_readlane_b32 s33, v40, 18 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[6:7] +; GFX9-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: s_setpc_b64 s[4:5] +; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: test_call_external_void_func_v16i32_inreg: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_or_saveexec_b32 s4, -1 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s4 -; GFX10-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 -; GFX10-NEXT: v_writelane_b32 v40, s33, 2 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 +; GFX10-NEXT: v_writelane_b32 v40, s33, 18 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: s_getpc_b64 s[20:21] -; GFX10-NEXT: s_add_u32 s20, s20, external_void_func_v16i32_inreg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s21, s21, external_void_func_v16i32_inreg@rel32@hi+12 -; GFX10-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-NEXT: v_writelane_b32 v40, s31, 1 +; GFX10-NEXT: v_writelane_b32 v40, s4, 0 +; GFX10-NEXT: v_writelane_b32 v40, s5, 1 +; GFX10-NEXT: v_writelane_b32 v40, s6, 2 +; GFX10-NEXT: v_writelane_b32 v40, s7, 3 +; GFX10-NEXT: v_writelane_b32 v40, s8, 4 +; GFX10-NEXT: v_writelane_b32 v40, s9, 5 +; GFX10-NEXT: v_writelane_b32 v40, s10, 6 +; GFX10-NEXT: v_writelane_b32 v40, s11, 7 +; GFX10-NEXT: v_writelane_b32 v40, s12, 8 +; GFX10-NEXT: v_writelane_b32 v40, s13, 9 +; GFX10-NEXT: v_writelane_b32 v40, s14, 10 +; GFX10-NEXT: v_writelane_b32 v40, s15, 11 +; GFX10-NEXT: v_writelane_b32 v40, s16, 12 +; GFX10-NEXT: v_writelane_b32 v40, s17, 13 +; GFX10-NEXT: v_writelane_b32 v40, s18, 14 +; GFX10-NEXT: v_writelane_b32 v40, s19, 15 +; GFX10-NEXT: v_writelane_b32 v40, s30, 16 +; GFX10-NEXT: v_writelane_b32 v40, s31, 17 +; GFX10-NEXT: s_load_dwordx2 s[30:31], s[30:31], 0x0 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-NEXT: s_load_dwordx16 s[4:19], s[4:5], 0x0 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[20:21] -; GFX10-NEXT: v_readlane_b32 s4, v40, 0 +; GFX10-NEXT: s_load_dwordx16 s[4:19], s[30:31], 0x0 +; GFX10-NEXT: s_getpc_b64 s[30:31] +; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v16i32_inreg@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v16i32_inreg@rel32@hi+12 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX10-NEXT: v_readlane_b32 s30, v40, 16 +; GFX10-NEXT: v_readlane_b32 s31, v40, 17 +; GFX10-NEXT: v_readlane_b32 s19, v40, 15 +; GFX10-NEXT: v_readlane_b32 s18, v40, 14 +; GFX10-NEXT: v_readlane_b32 s17, v40, 13 +; GFX10-NEXT: v_readlane_b32 s16, v40, 12 +; GFX10-NEXT: v_readlane_b32 s15, v40, 11 +; GFX10-NEXT: v_readlane_b32 s14, v40, 10 +; GFX10-NEXT: v_readlane_b32 s13, v40, 9 +; GFX10-NEXT: v_readlane_b32 s12, v40, 8 +; GFX10-NEXT: v_readlane_b32 s11, v40, 7 +; GFX10-NEXT: v_readlane_b32 s10, v40, 6 +; GFX10-NEXT: v_readlane_b32 s9, v40, 5 +; GFX10-NEXT: v_readlane_b32 s8, v40, 4 +; GFX10-NEXT: v_readlane_b32 s7, v40, 3 +; GFX10-NEXT: v_readlane_b32 s6, v40, 2 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 +; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 -; GFX10-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-NEXT: s_or_saveexec_b32 s6, -1 +; GFX10-NEXT: v_readlane_b32 s33, v40, 18 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s6 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: s_setpc_b64 s[4:5] +; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v16i32_inreg: ; GFX10-SCRATCH: ; %bb.0: @@ -8125,28 +9049,60 @@ define amdgpu_gfx void @test_call_external_void_func_v16i32_inreg() #0 { ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 18 ; GFX10-SCRATCH-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s6, 2 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s7, 3 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s8, 4 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s9, 5 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s10, 6 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s11, 7 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s12, 8 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s13, 9 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s14, 10 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s15, 11 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s16, 12 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s17, 13 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s18, 14 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s19, 15 ; GFX10-SCRATCH-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-SCRATCH-NEXT: s_load_dwordx16 s[4:19], s[0:1], 0x0 ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v16i32_inreg@rel32@lo+4 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v16i32_inreg@rel32@hi+12 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 16 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 17 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 16 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 17 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s19, v40, 15 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s18, v40, 14 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s17, v40, 13 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s16, v40, 12 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s15, v40, 11 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s14, v40, 10 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s13, v40, 9 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s12, v40, 8 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s11, v40, 7 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s10, v40, 6 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s9, v40, 5 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s8, v40, 4 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s7, v40, 3 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s6, v40, 2 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 18 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) -; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1] +; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] %ptr = load <16 x i32> addrspace(4)*, <16 x i32> addrspace(4)* addrspace(4)* undef %val = load <16 x i32>, <16 x i32> addrspace(4)* %ptr call amdgpu_gfx void @external_void_func_v16i32_inreg(<16 x i32> inreg %val) @@ -8157,34 +9113,47 @@ define amdgpu_gfx void @test_call_external_void_func_v32i32_inreg() #0 { ; GFX9-LABEL: test_call_external_void_func_v32i32_inreg: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[4:5] -; GFX9-NEXT: v_writelane_b32 v40, s33, 18 -; GFX9-NEXT: v_writelane_b32 v40, s36, 0 -; GFX9-NEXT: v_writelane_b32 v40, s37, 1 -; GFX9-NEXT: v_writelane_b32 v40, s38, 2 -; GFX9-NEXT: v_writelane_b32 v40, s39, 3 -; GFX9-NEXT: v_writelane_b32 v40, s40, 4 -; GFX9-NEXT: v_writelane_b32 v40, s41, 5 -; GFX9-NEXT: v_writelane_b32 v40, s42, 6 -; GFX9-NEXT: v_writelane_b32 v40, s43, 7 -; GFX9-NEXT: v_writelane_b32 v40, s44, 8 -; GFX9-NEXT: v_writelane_b32 v40, s45, 9 -; GFX9-NEXT: v_writelane_b32 v40, s46, 10 -; GFX9-NEXT: s_load_dwordx2 s[20:21], s[4:5], 0x0 -; GFX9-NEXT: v_writelane_b32 v40, s47, 11 -; GFX9-NEXT: v_writelane_b32 v40, s48, 12 -; GFX9-NEXT: v_writelane_b32 v40, s49, 13 -; GFX9-NEXT: v_writelane_b32 v40, s50, 14 -; GFX9-NEXT: v_writelane_b32 v40, s51, 15 -; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: s_load_dwordx16 s[4:19], s[20:21], 0x0 -; GFX9-NEXT: s_load_dwordx16 s[36:51], s[20:21], 0x40 +; GFX9-NEXT: s_mov_b64 exec, s[34:35] +; GFX9-NEXT: v_writelane_b32 v40, s33, 28 +; GFX9-NEXT: v_writelane_b32 v40, s4, 0 +; GFX9-NEXT: v_writelane_b32 v40, s5, 1 +; GFX9-NEXT: v_writelane_b32 v40, s6, 2 +; GFX9-NEXT: v_writelane_b32 v40, s7, 3 +; GFX9-NEXT: v_writelane_b32 v40, s8, 4 +; GFX9-NEXT: v_writelane_b32 v40, s9, 5 +; GFX9-NEXT: v_writelane_b32 v40, s10, 6 +; GFX9-NEXT: v_writelane_b32 v40, s11, 7 +; GFX9-NEXT: v_writelane_b32 v40, s12, 8 +; GFX9-NEXT: v_writelane_b32 v40, s13, 9 +; GFX9-NEXT: v_writelane_b32 v40, s14, 10 +; GFX9-NEXT: v_writelane_b32 v40, s15, 11 +; GFX9-NEXT: v_writelane_b32 v40, s16, 12 +; GFX9-NEXT: v_writelane_b32 v40, s17, 13 +; GFX9-NEXT: v_writelane_b32 v40, s18, 14 +; GFX9-NEXT: v_writelane_b32 v40, s19, 15 +; GFX9-NEXT: v_writelane_b32 v40, s20, 16 +; GFX9-NEXT: v_writelane_b32 v40, s21, 17 +; GFX9-NEXT: v_writelane_b32 v40, s22, 18 +; GFX9-NEXT: v_writelane_b32 v40, s23, 19 +; GFX9-NEXT: v_writelane_b32 v40, s24, 20 +; GFX9-NEXT: v_writelane_b32 v40, s25, 21 +; GFX9-NEXT: v_writelane_b32 v40, s26, 22 +; GFX9-NEXT: v_writelane_b32 v40, s27, 23 +; GFX9-NEXT: v_writelane_b32 v40, s28, 24 +; GFX9-NEXT: v_writelane_b32 v40, s29, 25 +; GFX9-NEXT: v_writelane_b32 v40, s30, 26 +; GFX9-NEXT: v_writelane_b32 v40, s31, 27 +; GFX9-NEXT: s_load_dwordx2 s[30:31], s[30:31], 0x0 ; GFX9-NEXT: s_mov_b32 s33, s32 ; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s30, 16 -; GFX9-NEXT: v_writelane_b32 v40, s31, 17 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: s_load_dwordx16 s[4:19], s[30:31], 0x0 +; GFX9-NEXT: s_load_dwordx16 s[36:51], s[30:31], 0x40 +; GFX9-NEXT: s_getpc_b64 s[30:31] +; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v32i32_inreg@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v32i32_inreg@rel32@hi+12 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v0, s46 ; GFX9-NEXT: v_mov_b32_e32 v1, s47 @@ -8208,70 +9177,87 @@ define amdgpu_gfx void @test_call_external_void_func_v32i32_inreg() #0 { ; GFX9-NEXT: s_mov_b32 s28, s44 ; GFX9-NEXT: s_mov_b32 s29, s45 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:20 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v32i32_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v32i32_inreg@rel32@hi+12 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX9-NEXT: v_readlane_b32 s4, v40, 16 -; GFX9-NEXT: v_readlane_b32 s5, v40, 17 -; GFX9-NEXT: v_readlane_b32 s51, v40, 15 -; GFX9-NEXT: v_readlane_b32 s50, v40, 14 -; GFX9-NEXT: v_readlane_b32 s49, v40, 13 -; GFX9-NEXT: v_readlane_b32 s48, v40, 12 -; GFX9-NEXT: v_readlane_b32 s47, v40, 11 -; GFX9-NEXT: v_readlane_b32 s46, v40, 10 -; GFX9-NEXT: v_readlane_b32 s45, v40, 9 -; GFX9-NEXT: v_readlane_b32 s44, v40, 8 -; GFX9-NEXT: v_readlane_b32 s43, v40, 7 -; GFX9-NEXT: v_readlane_b32 s42, v40, 6 -; GFX9-NEXT: v_readlane_b32 s41, v40, 5 -; GFX9-NEXT: v_readlane_b32 s40, v40, 4 -; GFX9-NEXT: v_readlane_b32 s39, v40, 3 -; GFX9-NEXT: v_readlane_b32 s38, v40, 2 -; GFX9-NEXT: v_readlane_b32 s37, v40, 1 -; GFX9-NEXT: v_readlane_b32 s36, v40, 0 +; GFX9-NEXT: v_readlane_b32 s30, v40, 26 +; GFX9-NEXT: v_readlane_b32 s31, v40, 27 +; GFX9-NEXT: v_readlane_b32 s29, v40, 25 +; GFX9-NEXT: v_readlane_b32 s28, v40, 24 +; GFX9-NEXT: v_readlane_b32 s27, v40, 23 +; GFX9-NEXT: v_readlane_b32 s26, v40, 22 +; GFX9-NEXT: v_readlane_b32 s25, v40, 21 +; GFX9-NEXT: v_readlane_b32 s24, v40, 20 +; GFX9-NEXT: v_readlane_b32 s23, v40, 19 +; GFX9-NEXT: v_readlane_b32 s22, v40, 18 +; GFX9-NEXT: v_readlane_b32 s21, v40, 17 +; GFX9-NEXT: v_readlane_b32 s20, v40, 16 +; GFX9-NEXT: v_readlane_b32 s19, v40, 15 +; GFX9-NEXT: v_readlane_b32 s18, v40, 14 +; GFX9-NEXT: v_readlane_b32 s17, v40, 13 +; GFX9-NEXT: v_readlane_b32 s16, v40, 12 +; GFX9-NEXT: v_readlane_b32 s15, v40, 11 +; GFX9-NEXT: v_readlane_b32 s14, v40, 10 +; GFX9-NEXT: v_readlane_b32 s13, v40, 9 +; GFX9-NEXT: v_readlane_b32 s12, v40, 8 +; GFX9-NEXT: v_readlane_b32 s11, v40, 7 +; GFX9-NEXT: v_readlane_b32 s10, v40, 6 +; GFX9-NEXT: v_readlane_b32 s9, v40, 5 +; GFX9-NEXT: v_readlane_b32 s8, v40, 4 +; GFX9-NEXT: v_readlane_b32 s7, v40, 3 +; GFX9-NEXT: v_readlane_b32 s6, v40, 2 +; GFX9-NEXT: v_readlane_b32 s5, v40, 1 +; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 -; GFX9-NEXT: v_readlane_b32 s33, v40, 18 -; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GFX9-NEXT: v_readlane_b32 s33, v40, 28 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[6:7] +; GFX9-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: s_setpc_b64 s[4:5] +; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: test_call_external_void_func_v32i32_inreg: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_or_saveexec_b32 s4, -1 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s4 -; GFX10-NEXT: v_writelane_b32 v40, s33, 18 -; GFX10-NEXT: s_load_dwordx2 s[20:21], s[4:5], 0x0 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 +; GFX10-NEXT: v_writelane_b32 v40, s33, 28 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: v_writelane_b32 v40, s36, 0 -; GFX10-NEXT: v_writelane_b32 v40, s37, 1 -; GFX10-NEXT: v_writelane_b32 v40, s38, 2 -; GFX10-NEXT: v_writelane_b32 v40, s39, 3 -; GFX10-NEXT: v_writelane_b32 v40, s40, 4 -; GFX10-NEXT: v_writelane_b32 v40, s41, 5 -; GFX10-NEXT: v_writelane_b32 v40, s42, 6 -; GFX10-NEXT: v_writelane_b32 v40, s43, 7 -; GFX10-NEXT: v_writelane_b32 v40, s44, 8 -; GFX10-NEXT: v_writelane_b32 v40, s45, 9 -; GFX10-NEXT: v_writelane_b32 v40, s46, 10 -; GFX10-NEXT: v_writelane_b32 v40, s47, 11 -; GFX10-NEXT: v_writelane_b32 v40, s48, 12 -; GFX10-NEXT: v_writelane_b32 v40, s49, 13 -; GFX10-NEXT: v_writelane_b32 v40, s50, 14 -; GFX10-NEXT: v_writelane_b32 v40, s51, 15 +; GFX10-NEXT: v_writelane_b32 v40, s4, 0 +; GFX10-NEXT: v_writelane_b32 v40, s5, 1 +; GFX10-NEXT: v_writelane_b32 v40, s6, 2 +; GFX10-NEXT: v_writelane_b32 v40, s7, 3 +; GFX10-NEXT: v_writelane_b32 v40, s8, 4 +; GFX10-NEXT: v_writelane_b32 v40, s9, 5 +; GFX10-NEXT: v_writelane_b32 v40, s10, 6 +; GFX10-NEXT: v_writelane_b32 v40, s11, 7 +; GFX10-NEXT: v_writelane_b32 v40, s12, 8 +; GFX10-NEXT: v_writelane_b32 v40, s13, 9 +; GFX10-NEXT: v_writelane_b32 v40, s14, 10 +; GFX10-NEXT: v_writelane_b32 v40, s15, 11 +; GFX10-NEXT: v_writelane_b32 v40, s16, 12 +; GFX10-NEXT: v_writelane_b32 v40, s17, 13 +; GFX10-NEXT: v_writelane_b32 v40, s18, 14 +; GFX10-NEXT: v_writelane_b32 v40, s19, 15 +; GFX10-NEXT: v_writelane_b32 v40, s20, 16 +; GFX10-NEXT: v_writelane_b32 v40, s21, 17 +; GFX10-NEXT: v_writelane_b32 v40, s22, 18 +; GFX10-NEXT: v_writelane_b32 v40, s23, 19 +; GFX10-NEXT: v_writelane_b32 v40, s24, 20 +; GFX10-NEXT: v_writelane_b32 v40, s25, 21 +; GFX10-NEXT: v_writelane_b32 v40, s26, 22 +; GFX10-NEXT: v_writelane_b32 v40, s27, 23 +; GFX10-NEXT: v_writelane_b32 v40, s28, 24 +; GFX10-NEXT: v_writelane_b32 v40, s29, 25 +; GFX10-NEXT: v_writelane_b32 v40, s30, 26 +; GFX10-NEXT: v_writelane_b32 v40, s31, 27 +; GFX10-NEXT: s_load_dwordx2 s[30:31], s[30:31], 0x0 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: s_clause 0x1 -; GFX10-NEXT: s_load_dwordx16 s[36:51], s[20:21], 0x40 -; GFX10-NEXT: s_load_dwordx16 s[4:19], s[20:21], 0x0 -; GFX10-NEXT: v_writelane_b32 v40, s30, 16 -; GFX10-NEXT: v_writelane_b32 v40, s31, 17 +; GFX10-NEXT: s_load_dwordx16 s[36:51], s[30:31], 0x40 +; GFX10-NEXT: s_load_dwordx16 s[4:19], s[30:31], 0x0 ; GFX10-NEXT: s_getpc_b64 s[30:31] ; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v32i32_inreg@rel32@lo+4 ; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v32i32_inreg@rel32@hi+12 @@ -8299,32 +9285,42 @@ define amdgpu_gfx void @test_call_external_void_func_v32i32_inreg() #0 { ; GFX10-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:16 ; GFX10-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:20 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX10-NEXT: v_readlane_b32 s4, v40, 16 -; GFX10-NEXT: v_readlane_b32 s5, v40, 17 -; GFX10-NEXT: v_readlane_b32 s51, v40, 15 -; GFX10-NEXT: v_readlane_b32 s50, v40, 14 -; GFX10-NEXT: v_readlane_b32 s49, v40, 13 -; GFX10-NEXT: v_readlane_b32 s48, v40, 12 -; GFX10-NEXT: v_readlane_b32 s47, v40, 11 -; GFX10-NEXT: v_readlane_b32 s46, v40, 10 -; GFX10-NEXT: v_readlane_b32 s45, v40, 9 -; GFX10-NEXT: v_readlane_b32 s44, v40, 8 -; GFX10-NEXT: v_readlane_b32 s43, v40, 7 -; GFX10-NEXT: v_readlane_b32 s42, v40, 6 -; GFX10-NEXT: v_readlane_b32 s41, v40, 5 -; GFX10-NEXT: v_readlane_b32 s40, v40, 4 -; GFX10-NEXT: v_readlane_b32 s39, v40, 3 -; GFX10-NEXT: v_readlane_b32 s38, v40, 2 -; GFX10-NEXT: v_readlane_b32 s37, v40, 1 -; GFX10-NEXT: v_readlane_b32 s36, v40, 0 +; GFX10-NEXT: v_readlane_b32 s30, v40, 26 +; GFX10-NEXT: v_readlane_b32 s31, v40, 27 +; GFX10-NEXT: v_readlane_b32 s29, v40, 25 +; GFX10-NEXT: v_readlane_b32 s28, v40, 24 +; GFX10-NEXT: v_readlane_b32 s27, v40, 23 +; GFX10-NEXT: v_readlane_b32 s26, v40, 22 +; GFX10-NEXT: v_readlane_b32 s25, v40, 21 +; GFX10-NEXT: v_readlane_b32 s24, v40, 20 +; GFX10-NEXT: v_readlane_b32 s23, v40, 19 +; GFX10-NEXT: v_readlane_b32 s22, v40, 18 +; GFX10-NEXT: v_readlane_b32 s21, v40, 17 +; GFX10-NEXT: v_readlane_b32 s20, v40, 16 +; GFX10-NEXT: v_readlane_b32 s19, v40, 15 +; GFX10-NEXT: v_readlane_b32 s18, v40, 14 +; GFX10-NEXT: v_readlane_b32 s17, v40, 13 +; GFX10-NEXT: v_readlane_b32 s16, v40, 12 +; GFX10-NEXT: v_readlane_b32 s15, v40, 11 +; GFX10-NEXT: v_readlane_b32 s14, v40, 10 +; GFX10-NEXT: v_readlane_b32 s13, v40, 9 +; GFX10-NEXT: v_readlane_b32 s12, v40, 8 +; GFX10-NEXT: v_readlane_b32 s11, v40, 7 +; GFX10-NEXT: v_readlane_b32 s10, v40, 6 +; GFX10-NEXT: v_readlane_b32 s9, v40, 5 +; GFX10-NEXT: v_readlane_b32 s8, v40, 4 +; GFX10-NEXT: v_readlane_b32 s7, v40, 3 +; GFX10-NEXT: v_readlane_b32 s6, v40, 2 +; GFX10-NEXT: v_readlane_b32 s5, v40, 1 +; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 -; GFX10-NEXT: v_readlane_b32 s33, v40, 18 -; GFX10-NEXT: s_or_saveexec_b32 s6, -1 +; GFX10-NEXT: v_readlane_b32 s33, v40, 28 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s6 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: s_setpc_b64 s[4:5] +; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v32i32_inreg: ; GFX10-SCRATCH: ; %bb.0: @@ -8334,26 +9330,26 @@ define amdgpu_gfx void @test_call_external_void_func_v32i32_inreg() #0 { ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 18 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 28 ; GFX10-SCRATCH-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s36, 0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s37, 1 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s38, 2 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s39, 3 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s40, 4 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s41, 5 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s42, 6 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s43, 7 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s44, 8 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s45, 9 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s46, 10 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s47, 11 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s48, 12 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s49, 13 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s50, 14 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s51, 15 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s6, 2 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s7, 3 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s8, 4 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s9, 5 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s10, 6 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s11, 7 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s12, 8 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s13, 9 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s14, 10 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s15, 11 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s16, 12 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s17, 13 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s18, 14 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s19, 15 ; GFX10-SCRATCH-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-SCRATCH-NEXT: s_clause 0x1 ; GFX10-SCRATCH-NEXT: s_load_dwordx16 s[36:51], s[0:1], 0x40 @@ -8361,54 +9357,74 @@ define amdgpu_gfx void @test_call_external_void_func_v32i32_inreg() #0 { ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v32i32_inreg@rel32@lo+4 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v32i32_inreg@rel32@hi+12 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 16 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 17 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s20, 16 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s21, 17 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s22, 18 ; GFX10-SCRATCH-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v4, s50 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s23, 19 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v5, s51 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, s46 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, s47 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v2, s48 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s24, 20 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v3, s49 ; GFX10-SCRATCH-NEXT: s_mov_b32 s20, s36 ; GFX10-SCRATCH-NEXT: s_mov_b32 s21, s37 ; GFX10-SCRATCH-NEXT: s_mov_b32 s22, s38 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s25, 21 ; GFX10-SCRATCH-NEXT: s_mov_b32 s23, s39 ; GFX10-SCRATCH-NEXT: s_mov_b32 s24, s40 ; GFX10-SCRATCH-NEXT: s_mov_b32 s25, s41 +; GFX10-SCRATCH-NEXT: scratch_store_dwordx2 off, v[4:5], s32 offset:16 +; GFX10-SCRATCH-NEXT: scratch_store_dwordx4 off, v[0:3], s32 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s26, 22 ; GFX10-SCRATCH-NEXT: s_mov_b32 s26, s42 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s27, 23 ; GFX10-SCRATCH-NEXT: s_mov_b32 s27, s43 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s28, 24 ; GFX10-SCRATCH-NEXT: s_mov_b32 s28, s44 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s29, 25 ; GFX10-SCRATCH-NEXT: s_mov_b32 s29, s45 -; GFX10-SCRATCH-NEXT: scratch_store_dwordx2 off, v[4:5], s32 offset:16 -; GFX10-SCRATCH-NEXT: scratch_store_dwordx4 off, v[0:3], s32 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 26 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 27 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 16 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 17 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s51, v40, 15 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s50, v40, 14 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s49, v40, 13 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s48, v40, 12 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s47, v40, 11 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s46, v40, 10 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s45, v40, 9 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s44, v40, 8 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s43, v40, 7 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s42, v40, 6 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s41, v40, 5 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s40, v40, 4 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s39, v40, 3 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s38, v40, 2 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s37, v40, 1 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s36, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 26 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 27 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s29, v40, 25 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s28, v40, 24 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s27, v40, 23 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s26, v40, 22 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s25, v40, 21 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s24, v40, 20 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s23, v40, 19 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s22, v40, 18 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s21, v40, 17 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s20, v40, 16 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s19, v40, 15 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s18, v40, 14 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s17, v40, 13 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s16, v40, 12 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s15, v40, 11 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s14, v40, 10 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s13, v40, 9 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s12, v40, 8 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s11, v40, 7 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s10, v40, 6 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s9, v40, 5 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s8, v40, 4 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s7, v40, 3 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s6, v40, 2 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 18 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 28 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) -; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1] +; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] %ptr = load <32 x i32> addrspace(4)*, <32 x i32> addrspace(4)* addrspace(4)* undef %val = load <32 x i32>, <32 x i32> addrspace(4)* %ptr call amdgpu_gfx void @external_void_func_v32i32_inreg(<32 x i32> inreg %val) @@ -8419,36 +9435,53 @@ define amdgpu_gfx void @test_call_external_void_func_v32i32_i32_inreg(i32) #0 { ; GFX9-LABEL: test_call_external_void_func_v32i32_i32_inreg: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[4:5] -; GFX9-NEXT: v_writelane_b32 v40, s33, 18 -; GFX9-NEXT: v_writelane_b32 v40, s36, 0 -; GFX9-NEXT: v_writelane_b32 v40, s37, 1 -; GFX9-NEXT: v_writelane_b32 v40, s38, 2 -; GFX9-NEXT: v_writelane_b32 v40, s39, 3 -; GFX9-NEXT: v_writelane_b32 v40, s40, 4 -; GFX9-NEXT: v_writelane_b32 v40, s41, 5 -; GFX9-NEXT: v_writelane_b32 v40, s42, 6 -; GFX9-NEXT: v_writelane_b32 v40, s43, 7 -; GFX9-NEXT: v_writelane_b32 v40, s44, 8 -; GFX9-NEXT: v_writelane_b32 v40, s45, 9 -; GFX9-NEXT: v_writelane_b32 v40, s46, 10 -; GFX9-NEXT: v_writelane_b32 v40, s47, 11 -; GFX9-NEXT: s_load_dwordx2 s[20:21], s[4:5], 0x0 -; GFX9-NEXT: s_load_dword s22, s[4:5], 0x0 -; GFX9-NEXT: v_writelane_b32 v40, s48, 12 -; GFX9-NEXT: v_writelane_b32 v40, s49, 13 -; GFX9-NEXT: v_writelane_b32 v40, s50, 14 -; GFX9-NEXT: v_writelane_b32 v40, s51, 15 -; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: s_load_dwordx16 s[4:19], s[20:21], 0x0 -; GFX9-NEXT: s_load_dwordx16 s[36:51], s[20:21], 0x40 +; GFX9-NEXT: s_mov_b64 exec, s[34:35] +; GFX9-NEXT: v_writelane_b32 v40, s33, 28 +; GFX9-NEXT: v_writelane_b32 v40, s4, 0 +; GFX9-NEXT: v_writelane_b32 v40, s5, 1 +; GFX9-NEXT: v_writelane_b32 v40, s6, 2 +; GFX9-NEXT: v_writelane_b32 v40, s7, 3 +; GFX9-NEXT: v_writelane_b32 v40, s8, 4 +; GFX9-NEXT: v_writelane_b32 v40, s9, 5 +; GFX9-NEXT: v_writelane_b32 v40, s10, 6 +; GFX9-NEXT: v_writelane_b32 v40, s11, 7 +; GFX9-NEXT: v_writelane_b32 v40, s12, 8 +; GFX9-NEXT: v_writelane_b32 v40, s13, 9 +; GFX9-NEXT: v_writelane_b32 v40, s14, 10 +; GFX9-NEXT: v_writelane_b32 v40, s15, 11 +; GFX9-NEXT: v_writelane_b32 v40, s16, 12 +; GFX9-NEXT: v_writelane_b32 v40, s17, 13 +; GFX9-NEXT: v_writelane_b32 v40, s18, 14 +; GFX9-NEXT: v_writelane_b32 v40, s19, 15 +; GFX9-NEXT: v_writelane_b32 v40, s20, 16 +; GFX9-NEXT: v_writelane_b32 v40, s21, 17 +; GFX9-NEXT: v_writelane_b32 v40, s22, 18 +; GFX9-NEXT: v_writelane_b32 v40, s23, 19 +; GFX9-NEXT: v_writelane_b32 v40, s24, 20 +; GFX9-NEXT: v_writelane_b32 v40, s25, 21 +; GFX9-NEXT: v_writelane_b32 v40, s26, 22 +; GFX9-NEXT: v_writelane_b32 v40, s27, 23 +; GFX9-NEXT: v_writelane_b32 v40, s28, 24 +; GFX9-NEXT: v_writelane_b32 v40, s29, 25 +; GFX9-NEXT: v_writelane_b32 v40, s30, 26 +; GFX9-NEXT: v_writelane_b32 v40, s31, 27 +; GFX9-NEXT: s_load_dwordx2 s[30:31], s[30:31], 0x0 ; GFX9-NEXT: s_mov_b32 s33, s32 ; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_mov_b32_e32 v0, s22 -; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:24 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: s_load_dword s34, s[30:31], 0x0 +; GFX9-NEXT: ; kill: killed $sgpr30_sgpr31 +; GFX9-NEXT: ; kill: killed $sgpr30_sgpr31 +; GFX9-NEXT: s_load_dwordx16 s[4:19], s[30:31], 0x0 +; GFX9-NEXT: s_load_dwordx16 s[36:51], s[30:31], 0x40 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v0, s34 +; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:24 +; GFX9-NEXT: s_getpc_b64 s[30:31] +; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v32i32_i32_inreg@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v32i32_i32_inreg@rel32@hi+12 ; GFX9-NEXT: v_mov_b32_e32 v0, s46 ; GFX9-NEXT: v_mov_b32_e32 v1, s47 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 @@ -8458,7 +9491,6 @@ define amdgpu_gfx void @test_call_external_void_func_v32i32_i32_inreg(i32) #0 { ; GFX9-NEXT: v_mov_b32_e32 v0, s49 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:12 ; GFX9-NEXT: v_mov_b32_e32 v0, s50 -; GFX9-NEXT: v_writelane_b32 v40, s30, 16 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:16 ; GFX9-NEXT: v_mov_b32_e32 v0, s51 ; GFX9-NEXT: s_mov_b32 s20, s36 @@ -8471,83 +9503,100 @@ define amdgpu_gfx void @test_call_external_void_func_v32i32_i32_inreg(i32) #0 { ; GFX9-NEXT: s_mov_b32 s27, s43 ; GFX9-NEXT: s_mov_b32 s28, s44 ; GFX9-NEXT: s_mov_b32 s29, s45 -; GFX9-NEXT: v_writelane_b32 v40, s31, 17 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:20 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v32i32_i32_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v32i32_i32_inreg@rel32@hi+12 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX9-NEXT: v_readlane_b32 s4, v40, 16 -; GFX9-NEXT: v_readlane_b32 s5, v40, 17 -; GFX9-NEXT: v_readlane_b32 s51, v40, 15 -; GFX9-NEXT: v_readlane_b32 s50, v40, 14 -; GFX9-NEXT: v_readlane_b32 s49, v40, 13 -; GFX9-NEXT: v_readlane_b32 s48, v40, 12 -; GFX9-NEXT: v_readlane_b32 s47, v40, 11 -; GFX9-NEXT: v_readlane_b32 s46, v40, 10 -; GFX9-NEXT: v_readlane_b32 s45, v40, 9 -; GFX9-NEXT: v_readlane_b32 s44, v40, 8 -; GFX9-NEXT: v_readlane_b32 s43, v40, 7 -; GFX9-NEXT: v_readlane_b32 s42, v40, 6 -; GFX9-NEXT: v_readlane_b32 s41, v40, 5 -; GFX9-NEXT: v_readlane_b32 s40, v40, 4 -; GFX9-NEXT: v_readlane_b32 s39, v40, 3 -; GFX9-NEXT: v_readlane_b32 s38, v40, 2 -; GFX9-NEXT: v_readlane_b32 s37, v40, 1 -; GFX9-NEXT: v_readlane_b32 s36, v40, 0 +; GFX9-NEXT: v_readlane_b32 s30, v40, 26 +; GFX9-NEXT: v_readlane_b32 s31, v40, 27 +; GFX9-NEXT: v_readlane_b32 s29, v40, 25 +; GFX9-NEXT: v_readlane_b32 s28, v40, 24 +; GFX9-NEXT: v_readlane_b32 s27, v40, 23 +; GFX9-NEXT: v_readlane_b32 s26, v40, 22 +; GFX9-NEXT: v_readlane_b32 s25, v40, 21 +; GFX9-NEXT: v_readlane_b32 s24, v40, 20 +; GFX9-NEXT: v_readlane_b32 s23, v40, 19 +; GFX9-NEXT: v_readlane_b32 s22, v40, 18 +; GFX9-NEXT: v_readlane_b32 s21, v40, 17 +; GFX9-NEXT: v_readlane_b32 s20, v40, 16 +; GFX9-NEXT: v_readlane_b32 s19, v40, 15 +; GFX9-NEXT: v_readlane_b32 s18, v40, 14 +; GFX9-NEXT: v_readlane_b32 s17, v40, 13 +; GFX9-NEXT: v_readlane_b32 s16, v40, 12 +; GFX9-NEXT: v_readlane_b32 s15, v40, 11 +; GFX9-NEXT: v_readlane_b32 s14, v40, 10 +; GFX9-NEXT: v_readlane_b32 s13, v40, 9 +; GFX9-NEXT: v_readlane_b32 s12, v40, 8 +; GFX9-NEXT: v_readlane_b32 s11, v40, 7 +; GFX9-NEXT: v_readlane_b32 s10, v40, 6 +; GFX9-NEXT: v_readlane_b32 s9, v40, 5 +; GFX9-NEXT: v_readlane_b32 s8, v40, 4 +; GFX9-NEXT: v_readlane_b32 s7, v40, 3 +; GFX9-NEXT: v_readlane_b32 s6, v40, 2 +; GFX9-NEXT: v_readlane_b32 s5, v40, 1 +; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 -; GFX9-NEXT: v_readlane_b32 s33, v40, 18 -; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GFX9-NEXT: v_readlane_b32 s33, v40, 28 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[6:7] +; GFX9-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: s_setpc_b64 s[4:5] +; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: test_call_external_void_func_v32i32_i32_inreg: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_or_saveexec_b32 s4, -1 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s4 -; GFX10-NEXT: v_writelane_b32 v40, s33, 18 -; GFX10-NEXT: s_clause 0x1 -; GFX10-NEXT: s_load_dwordx2 s[20:21], s[4:5], 0x0 -; GFX10-NEXT: s_load_dword s22, s[4:5], 0x0 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 +; GFX10-NEXT: v_writelane_b32 v40, s33, 28 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: v_writelane_b32 v40, s36, 0 -; GFX10-NEXT: v_writelane_b32 v40, s37, 1 -; GFX10-NEXT: v_writelane_b32 v40, s38, 2 -; GFX10-NEXT: v_writelane_b32 v40, s39, 3 +; GFX10-NEXT: v_writelane_b32 v40, s4, 0 +; GFX10-NEXT: v_writelane_b32 v40, s5, 1 +; GFX10-NEXT: v_writelane_b32 v40, s6, 2 +; GFX10-NEXT: v_writelane_b32 v40, s7, 3 +; GFX10-NEXT: v_writelane_b32 v40, s8, 4 +; GFX10-NEXT: v_writelane_b32 v40, s9, 5 +; GFX10-NEXT: v_writelane_b32 v40, s10, 6 +; GFX10-NEXT: v_writelane_b32 v40, s11, 7 +; GFX10-NEXT: v_writelane_b32 v40, s12, 8 +; GFX10-NEXT: v_writelane_b32 v40, s13, 9 +; GFX10-NEXT: v_writelane_b32 v40, s14, 10 +; GFX10-NEXT: v_writelane_b32 v40, s15, 11 +; GFX10-NEXT: v_writelane_b32 v40, s16, 12 +; GFX10-NEXT: v_writelane_b32 v40, s17, 13 +; GFX10-NEXT: v_writelane_b32 v40, s18, 14 +; GFX10-NEXT: v_writelane_b32 v40, s19, 15 +; GFX10-NEXT: v_writelane_b32 v40, s20, 16 +; GFX10-NEXT: v_writelane_b32 v40, s21, 17 +; GFX10-NEXT: v_writelane_b32 v40, s22, 18 +; GFX10-NEXT: v_writelane_b32 v40, s23, 19 +; GFX10-NEXT: v_writelane_b32 v40, s24, 20 +; GFX10-NEXT: v_writelane_b32 v40, s25, 21 +; GFX10-NEXT: v_writelane_b32 v40, s26, 22 +; GFX10-NEXT: v_writelane_b32 v40, s27, 23 +; GFX10-NEXT: v_writelane_b32 v40, s28, 24 +; GFX10-NEXT: v_writelane_b32 v40, s29, 25 +; GFX10-NEXT: v_writelane_b32 v40, s30, 26 +; GFX10-NEXT: v_writelane_b32 v40, s31, 27 +; GFX10-NEXT: s_load_dwordx2 s[30:31], s[30:31], 0x0 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-NEXT: v_mov_b32_e32 v0, s22 -; GFX10-NEXT: v_writelane_b32 v40, s40, 4 -; GFX10-NEXT: v_writelane_b32 v40, s41, 5 -; GFX10-NEXT: v_writelane_b32 v40, s42, 6 -; GFX10-NEXT: v_writelane_b32 v40, s43, 7 -; GFX10-NEXT: v_writelane_b32 v40, s44, 8 -; GFX10-NEXT: v_writelane_b32 v40, s45, 9 -; GFX10-NEXT: v_writelane_b32 v40, s46, 10 -; GFX10-NEXT: v_writelane_b32 v40, s47, 11 -; GFX10-NEXT: v_writelane_b32 v40, s48, 12 -; GFX10-NEXT: v_writelane_b32 v40, s49, 13 -; GFX10-NEXT: v_writelane_b32 v40, s50, 14 -; GFX10-NEXT: v_writelane_b32 v40, s51, 15 -; GFX10-NEXT: s_clause 0x1 -; GFX10-NEXT: s_load_dwordx16 s[36:51], s[20:21], 0x40 -; GFX10-NEXT: s_load_dwordx16 s[4:19], s[20:21], 0x0 -; GFX10-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:24 -; GFX10-NEXT: v_writelane_b32 v40, s30, 16 -; GFX10-NEXT: v_writelane_b32 v40, s31, 17 +; GFX10-NEXT: s_clause 0x2 +; GFX10-NEXT: s_load_dword s34, s[30:31], 0x0 +; GFX10-NEXT: ; meta instruction +; GFX10-NEXT: ; meta instruction +; GFX10-NEXT: s_load_dwordx16 s[36:51], s[30:31], 0x40 +; GFX10-NEXT: s_load_dwordx16 s[4:19], s[30:31], 0x0 ; GFX10-NEXT: s_getpc_b64 s[30:31] ; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v32i32_i32_inreg@rel32@lo+4 ; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v32i32_i32_inreg@rel32@hi+12 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-NEXT: v_mov_b32_e32 v0, s46 +; GFX10-NEXT: v_mov_b32_e32 v0, s34 ; GFX10-NEXT: v_mov_b32_e32 v1, s47 ; GFX10-NEXT: v_mov_b32_e32 v2, s48 +; GFX10-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:24 +; GFX10-NEXT: v_mov_b32_e32 v0, s46 ; GFX10-NEXT: v_mov_b32_e32 v3, s49 ; GFX10-NEXT: s_mov_b32 s20, s36 ; GFX10-NEXT: s_mov_b32 s21, s37 @@ -8568,32 +9617,42 @@ define amdgpu_gfx void @test_call_external_void_func_v32i32_i32_inreg(i32) #0 { ; GFX10-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:16 ; GFX10-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:20 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX10-NEXT: v_readlane_b32 s4, v40, 16 -; GFX10-NEXT: v_readlane_b32 s5, v40, 17 -; GFX10-NEXT: v_readlane_b32 s51, v40, 15 -; GFX10-NEXT: v_readlane_b32 s50, v40, 14 -; GFX10-NEXT: v_readlane_b32 s49, v40, 13 -; GFX10-NEXT: v_readlane_b32 s48, v40, 12 -; GFX10-NEXT: v_readlane_b32 s47, v40, 11 -; GFX10-NEXT: v_readlane_b32 s46, v40, 10 -; GFX10-NEXT: v_readlane_b32 s45, v40, 9 -; GFX10-NEXT: v_readlane_b32 s44, v40, 8 -; GFX10-NEXT: v_readlane_b32 s43, v40, 7 -; GFX10-NEXT: v_readlane_b32 s42, v40, 6 -; GFX10-NEXT: v_readlane_b32 s41, v40, 5 -; GFX10-NEXT: v_readlane_b32 s40, v40, 4 -; GFX10-NEXT: v_readlane_b32 s39, v40, 3 -; GFX10-NEXT: v_readlane_b32 s38, v40, 2 -; GFX10-NEXT: v_readlane_b32 s37, v40, 1 -; GFX10-NEXT: v_readlane_b32 s36, v40, 0 +; GFX10-NEXT: v_readlane_b32 s30, v40, 26 +; GFX10-NEXT: v_readlane_b32 s31, v40, 27 +; GFX10-NEXT: v_readlane_b32 s29, v40, 25 +; GFX10-NEXT: v_readlane_b32 s28, v40, 24 +; GFX10-NEXT: v_readlane_b32 s27, v40, 23 +; GFX10-NEXT: v_readlane_b32 s26, v40, 22 +; GFX10-NEXT: v_readlane_b32 s25, v40, 21 +; GFX10-NEXT: v_readlane_b32 s24, v40, 20 +; GFX10-NEXT: v_readlane_b32 s23, v40, 19 +; GFX10-NEXT: v_readlane_b32 s22, v40, 18 +; GFX10-NEXT: v_readlane_b32 s21, v40, 17 +; GFX10-NEXT: v_readlane_b32 s20, v40, 16 +; GFX10-NEXT: v_readlane_b32 s19, v40, 15 +; GFX10-NEXT: v_readlane_b32 s18, v40, 14 +; GFX10-NEXT: v_readlane_b32 s17, v40, 13 +; GFX10-NEXT: v_readlane_b32 s16, v40, 12 +; GFX10-NEXT: v_readlane_b32 s15, v40, 11 +; GFX10-NEXT: v_readlane_b32 s14, v40, 10 +; GFX10-NEXT: v_readlane_b32 s13, v40, 9 +; GFX10-NEXT: v_readlane_b32 s12, v40, 8 +; GFX10-NEXT: v_readlane_b32 s11, v40, 7 +; GFX10-NEXT: v_readlane_b32 s10, v40, 6 +; GFX10-NEXT: v_readlane_b32 s9, v40, 5 +; GFX10-NEXT: v_readlane_b32 s8, v40, 4 +; GFX10-NEXT: v_readlane_b32 s7, v40, 3 +; GFX10-NEXT: v_readlane_b32 s6, v40, 2 +; GFX10-NEXT: v_readlane_b32 s5, v40, 1 +; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 -; GFX10-NEXT: v_readlane_b32 s33, v40, 18 -; GFX10-NEXT: s_or_saveexec_b32 s6, -1 +; GFX10-NEXT: v_readlane_b32 s33, v40, 28 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s6 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: s_setpc_b64 s[4:5] +; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-SCRATCH-LABEL: test_call_external_void_func_v32i32_i32_inreg: ; GFX10-SCRATCH: ; %bb.0: @@ -8603,26 +9662,26 @@ define amdgpu_gfx void @test_call_external_void_func_v32i32_i32_inreg(i32) #0 { ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 ; 4-byte Folded Spill ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 18 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 28 ; GFX10-SCRATCH-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 16 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s36, 0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s37, 1 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s38, 2 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s39, 3 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s40, 4 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s41, 5 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s42, 6 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s43, 7 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s44, 8 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s45, 9 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s46, 10 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s47, 11 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s48, 12 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s49, 13 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s50, 14 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s51, 15 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s4, 0 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s5, 1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s6, 2 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s7, 3 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s8, 4 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s9, 5 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s10, 6 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s11, 7 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s12, 8 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s13, 9 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s14, 10 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s15, 11 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s16, 12 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s17, 13 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s18, 14 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s19, 15 ; GFX10-SCRATCH-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-SCRATCH-NEXT: s_clause 0x2 ; GFX10-SCRATCH-NEXT: s_load_dword s2, s[0:1], 0x0 @@ -8633,56 +9692,76 @@ define amdgpu_gfx void @test_call_external_void_func_v32i32_i32_inreg(i32) #0 { ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v32i32_i32_inreg@rel32@lo+4 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v32i32_i32_inreg@rel32@hi+12 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 16 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 17 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s20, 16 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s21, 17 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s22, 18 ; GFX10-SCRATCH-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v6, s2 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v4, s50 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s23, 19 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v5, s51 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v0, s46 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v1, s47 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v2, s48 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s24, 20 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v3, s49 ; GFX10-SCRATCH-NEXT: s_mov_b32 s20, s36 ; GFX10-SCRATCH-NEXT: s_mov_b32 s21, s37 ; GFX10-SCRATCH-NEXT: s_mov_b32 s22, s38 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s25, 21 ; GFX10-SCRATCH-NEXT: s_mov_b32 s23, s39 ; GFX10-SCRATCH-NEXT: s_mov_b32 s24, s40 ; GFX10-SCRATCH-NEXT: s_mov_b32 s25, s41 +; GFX10-SCRATCH-NEXT: scratch_store_dword off, v6, s32 offset:24 +; GFX10-SCRATCH-NEXT: scratch_store_dwordx2 off, v[4:5], s32 offset:16 +; GFX10-SCRATCH-NEXT: scratch_store_dwordx4 off, v[0:3], s32 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s26, 22 ; GFX10-SCRATCH-NEXT: s_mov_b32 s26, s42 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s27, 23 ; GFX10-SCRATCH-NEXT: s_mov_b32 s27, s43 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s28, 24 ; GFX10-SCRATCH-NEXT: s_mov_b32 s28, s44 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s29, 25 ; GFX10-SCRATCH-NEXT: s_mov_b32 s29, s45 -; GFX10-SCRATCH-NEXT: scratch_store_dword off, v6, s32 offset:24 -; GFX10-SCRATCH-NEXT: scratch_store_dwordx2 off, v[4:5], s32 offset:16 -; GFX10-SCRATCH-NEXT: scratch_store_dwordx4 off, v[0:3], s32 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 26 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 27 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 16 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 17 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s51, v40, 15 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s50, v40, 14 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s49, v40, 13 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s48, v40, 12 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s47, v40, 11 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s46, v40, 10 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s45, v40, 9 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s44, v40, 8 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s43, v40, 7 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s42, v40, 6 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s41, v40, 5 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s40, v40, 4 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s39, v40, 3 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s38, v40, 2 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s37, v40, 1 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s36, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 26 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 27 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s29, v40, 25 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s28, v40, 24 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s27, v40, 23 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s26, v40, 22 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s25, v40, 21 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s24, v40, 20 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s23, v40, 19 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s22, v40, 18 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s21, v40, 17 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s20, v40, 16 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s19, v40, 15 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s18, v40, 14 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s17, v40, 13 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s16, v40, 12 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s15, v40, 11 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s14, v40, 10 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s13, v40, 9 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s12, v40, 8 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s11, v40, 7 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s10, v40, 6 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s9, v40, 5 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s8, v40, 4 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s7, v40, 3 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s6, v40, 2 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 18 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 28 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) -; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1] +; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] %ptr0 = load <32 x i32> addrspace(4)*, <32 x i32> addrspace(4)* addrspace(4)* undef %val0 = load <32 x i32>, <32 x i32> addrspace(4)* %ptr0 %val1 = load i32, i32 addrspace(4)* undef @@ -8694,9 +9773,9 @@ define amdgpu_gfx void @stack_passed_arg_alignment_v32i32_f64(<32 x i32> %val, d ; GFX9-LABEL: stack_passed_arg_alignment_v32i32_f64: ; GFX9: ; %bb.0: ; %entry ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[4:5] +; GFX9-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-NEXT: v_writelane_b32 v40, s33, 2 ; GFX9-NEXT: s_mov_b32 s33, s32 ; GFX9-NEXT: buffer_load_dword v32, off, s[0:3], s33 @@ -8704,32 +9783,32 @@ define amdgpu_gfx void @stack_passed_arg_alignment_v32i32_f64(<32 x i32> %val, d ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[4:5] -; GFX9-NEXT: s_add_u32 s4, s4, stack_passed_f64_arg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s5, s5, stack_passed_f64_arg@rel32@hi+12 +; GFX9-NEXT: s_getpc_b64 s[30:31] +; GFX9-NEXT: s_add_u32 s30, s30, stack_passed_f64_arg@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s31, s31, stack_passed_f64_arg@rel32@hi+12 ; GFX9-NEXT: s_waitcnt vmcnt(1) ; GFX9-NEXT: buffer_store_dword v32, off, s[0:3], s32 ; GFX9-NEXT: s_waitcnt vmcnt(1) ; GFX9-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:4 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GFX9-NEXT: v_readlane_b32 s4, v40, 0 -; GFX9-NEXT: v_readlane_b32 s5, v40, 1 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 -; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[6:7] +; GFX9-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: s_setpc_b64 s[4:5] +; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: stack_passed_arg_alignment_v32i32_f64: ; GFX10: ; %bb.0: ; %entry ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_or_saveexec_b32 s4, -1 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s4 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 ; GFX10-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_clause 0x1 @@ -8737,25 +9816,25 @@ define amdgpu_gfx void @stack_passed_arg_alignment_v32i32_f64(<32 x i32> %val, d ; GFX10-NEXT: buffer_load_dword v33, off, s[0:3], s33 offset:4 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-NEXT: s_getpc_b64 s[4:5] -; GFX10-NEXT: s_add_u32 s4, s4, stack_passed_f64_arg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s5, s5, stack_passed_f64_arg@rel32@hi+12 ; GFX10-NEXT: s_waitcnt vmcnt(1) ; GFX10-NEXT: buffer_store_dword v32, off, s[0:3], s32 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:4 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GFX10-NEXT: v_readlane_b32 s4, v40, 0 -; GFX10-NEXT: v_readlane_b32 s5, v40, 1 +; GFX10-NEXT: s_getpc_b64 s[30:31] +; GFX10-NEXT: s_add_u32 s30, s30, stack_passed_f64_arg@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s31, s31, stack_passed_f64_arg@rel32@hi+12 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-NEXT: s_or_saveexec_b32 s6, -1 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s6 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: s_setpc_b64 s[4:5] +; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-SCRATCH-LABEL: stack_passed_arg_alignment_v32i32_f64: ; GFX10-SCRATCH: ; %bb.0: ; %entry @@ -8777,16 +9856,16 @@ define amdgpu_gfx void @stack_passed_arg_alignment_v32i32_f64(<32 x i32> %val, d ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) ; GFX10-SCRATCH-NEXT: scratch_store_dwordx2 off, v[32:33], s32 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 offset:8 ; 4-byte Folded Reload ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) -; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1] +; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] entry: call amdgpu_gfx void @stack_passed_f64_arg(<32 x i32> %val, double %tmp) ret void @@ -8796,9 +9875,9 @@ define amdgpu_gfx void @stack_12xv3i32() #0 { ; GFX9-LABEL: stack_12xv3i32: ; GFX9: ; %bb.0: ; %entry ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[4:5] +; GFX9-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-NEXT: v_writelane_b32 v40, s33, 2 ; GFX9-NEXT: s_mov_b32 s33, s32 ; GFX9-NEXT: s_addk_i32 s32, 0x400 @@ -8844,28 +9923,28 @@ define amdgpu_gfx void @stack_12xv3i32() #0 { ; GFX9-NEXT: v_mov_b32_e32 v30, 10 ; GFX9-NEXT: v_mov_b32_e32 v31, 11 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[4:5] -; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_12xv3i32@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_12xv3i32@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GFX9-NEXT: v_readlane_b32 s4, v40, 0 -; GFX9-NEXT: v_readlane_b32 s5, v40, 1 +; GFX9-NEXT: s_getpc_b64 s[30:31] +; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_12xv3i32@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_12xv3i32@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 -; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[6:7] +; GFX9-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: s_setpc_b64 s[4:5] +; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: stack_12xv3i32: ; GFX10: ; %bb.0: ; %entry ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_or_saveexec_b32 s4, -1 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s4 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 ; GFX10-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-NEXT: v_mov_b32_e32 v0, 12 ; GFX10-NEXT: v_mov_b32_e32 v1, 13 @@ -8911,20 +9990,20 @@ define amdgpu_gfx void @stack_12xv3i32() #0 { ; GFX10-NEXT: v_mov_b32_e32 v30, 10 ; GFX10-NEXT: v_mov_b32_e32 v31, 11 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_getpc_b64 s[4:5] -; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_12xv3i32@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s5, s5, external_void_func_12xv3i32@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GFX10-NEXT: v_readlane_b32 s4, v40, 0 -; GFX10-NEXT: v_readlane_b32 s5, v40, 1 +; GFX10-NEXT: s_getpc_b64 s[30:31] +; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_12xv3i32@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_12xv3i32@rel32@hi+12 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-NEXT: s_or_saveexec_b32 s6, -1 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s6 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: s_setpc_b64 s[4:5] +; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-SCRATCH-LABEL: stack_12xv3i32: ; GFX10-SCRATCH: ; %bb.0: ; %entry @@ -8980,16 +10059,16 @@ define amdgpu_gfx void @stack_12xv3i32() #0 { ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_12xv3i32@rel32@lo+4 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_12xv3i32@rel32@hi+12 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) -; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1] +; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] entry: call amdgpu_gfx void @external_void_func_12xv3i32( <3 x i32>, @@ -9011,9 +10090,9 @@ define amdgpu_gfx void @stack_8xv5i32() #0 { ; GFX9-LABEL: stack_8xv5i32: ; GFX9: ; %bb.0: ; %entry ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[4:5] +; GFX9-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-NEXT: v_writelane_b32 v40, s33, 2 ; GFX9-NEXT: s_mov_b32 s33, s32 ; GFX9-NEXT: s_addk_i32 s32, 0x400 @@ -9067,28 +10146,28 @@ define amdgpu_gfx void @stack_8xv5i32() #0 { ; GFX9-NEXT: v_mov_b32_e32 v30, 6 ; GFX9-NEXT: v_mov_b32_e32 v31, 7 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[4:5] -; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_8xv5i32@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_8xv5i32@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GFX9-NEXT: v_readlane_b32 s4, v40, 0 -; GFX9-NEXT: v_readlane_b32 s5, v40, 1 +; GFX9-NEXT: s_getpc_b64 s[30:31] +; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_8xv5i32@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_8xv5i32@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 -; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[6:7] +; GFX9-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: s_setpc_b64 s[4:5] +; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: stack_8xv5i32: ; GFX10: ; %bb.0: ; %entry ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_or_saveexec_b32 s4, -1 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s4 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 ; GFX10-NEXT: v_mov_b32_e32 v0, 8 ; GFX10-NEXT: v_mov_b32_e32 v1, 9 ; GFX10-NEXT: v_mov_b32_e32 v2, 10 @@ -9142,20 +10221,20 @@ define amdgpu_gfx void @stack_8xv5i32() #0 { ; GFX10-NEXT: v_mov_b32_e32 v30, 6 ; GFX10-NEXT: v_mov_b32_e32 v31, 7 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_getpc_b64 s[4:5] -; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_8xv5i32@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s5, s5, external_void_func_8xv5i32@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GFX10-NEXT: v_readlane_b32 s4, v40, 0 -; GFX10-NEXT: v_readlane_b32 s5, v40, 1 +; GFX10-NEXT: s_getpc_b64 s[30:31] +; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_8xv5i32@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_8xv5i32@rel32@hi+12 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-NEXT: s_or_saveexec_b32 s6, -1 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s6 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: s_setpc_b64 s[4:5] +; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-SCRATCH-LABEL: stack_8xv5i32: ; GFX10-SCRATCH: ; %bb.0: ; %entry @@ -9216,16 +10295,16 @@ define amdgpu_gfx void @stack_8xv5i32() #0 { ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_8xv5i32@rel32@lo+4 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_8xv5i32@rel32@hi+12 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) -; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1] +; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] entry: call amdgpu_gfx void @external_void_func_8xv5i32( <5 x i32>, @@ -9243,9 +10322,9 @@ define amdgpu_gfx void @stack_8xv5f32() #0 { ; GFX9-LABEL: stack_8xv5f32: ; GFX9: ; %bb.0: ; %entry ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[4:5] +; GFX9-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-NEXT: v_writelane_b32 v40, s33, 2 ; GFX9-NEXT: s_mov_b32 s33, s32 ; GFX9-NEXT: s_addk_i32 s32, 0x400 @@ -9299,28 +10378,28 @@ define amdgpu_gfx void @stack_8xv5f32() #0 { ; GFX9-NEXT: v_mov_b32_e32 v30, 0x40c00000 ; GFX9-NEXT: v_mov_b32_e32 v31, 0x40e00000 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[4:5] -; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_8xv5f32@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_8xv5f32@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GFX9-NEXT: v_readlane_b32 s4, v40, 0 -; GFX9-NEXT: v_readlane_b32 s5, v40, 1 +; GFX9-NEXT: s_getpc_b64 s[30:31] +; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_8xv5f32@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_8xv5f32@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 -; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[6:7] +; GFX9-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: s_setpc_b64 s[4:5] +; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: stack_8xv5f32: ; GFX10: ; %bb.0: ; %entry ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_or_saveexec_b32 s4, -1 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s4 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 ; GFX10-NEXT: v_mov_b32_e32 v0, 0x41000000 ; GFX10-NEXT: v_mov_b32_e32 v1, 0x41100000 ; GFX10-NEXT: v_mov_b32_e32 v2, 0x41200000 @@ -9374,20 +10453,20 @@ define amdgpu_gfx void @stack_8xv5f32() #0 { ; GFX10-NEXT: v_mov_b32_e32 v30, 0x40c00000 ; GFX10-NEXT: v_mov_b32_e32 v31, 0x40e00000 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_getpc_b64 s[4:5] -; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_8xv5f32@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s5, s5, external_void_func_8xv5f32@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GFX10-NEXT: v_readlane_b32 s4, v40, 0 -; GFX10-NEXT: v_readlane_b32 s5, v40, 1 +; GFX10-NEXT: s_getpc_b64 s[30:31] +; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_8xv5f32@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_8xv5f32@rel32@hi+12 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-NEXT: s_or_saveexec_b32 s6, -1 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s6 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: s_setpc_b64 s[4:5] +; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-SCRATCH-LABEL: stack_8xv5f32: ; GFX10-SCRATCH: ; %bb.0: ; %entry @@ -9448,16 +10527,16 @@ define amdgpu_gfx void @stack_8xv5f32() #0 { ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_8xv5f32@rel32@lo+4 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_8xv5f32@rel32@hi+12 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s0, v40, 0 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s1, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s2, -1 +; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 ; 4-byte Folded Reload ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s2 +; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) -; GFX10-SCRATCH-NEXT: s_setpc_b64 s[0:1] +; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] entry: call amdgpu_gfx void @external_void_func_8xv5f32( <5 x float>, diff --git a/llvm/test/CodeGen/AMDGPU/gfx-callable-preserved-registers.ll b/llvm/test/CodeGen/AMDGPU/gfx-callable-preserved-registers.ll index 2f5f1485c98c2..fa8f51cf412fa 100644 --- a/llvm/test/CodeGen/AMDGPU/gfx-callable-preserved-registers.ll +++ b/llvm/test/CodeGen/AMDGPU/gfx-callable-preserved-registers.ll @@ -8,69 +8,69 @@ define amdgpu_gfx void @test_call_external_void_func_void_clobber_s30_s31_call_e ; GFX9-LABEL: test_call_external_void_func_void_clobber_s30_s31_call_external_void_func_void: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[4:5] +; GFX9-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-NEXT: v_writelane_b32 v40, s33, 4 -; GFX9-NEXT: v_writelane_b32 v40, s34, 0 -; GFX9-NEXT: v_writelane_b32 v40, s35, 1 +; GFX9-NEXT: v_writelane_b32 v40, s4, 0 +; GFX9-NEXT: v_writelane_b32 v40, s5, 1 ; GFX9-NEXT: s_mov_b32 s33, s32 ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 2 ; GFX9-NEXT: v_writelane_b32 v40, s31, 3 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_void@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_void@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] +; GFX9-NEXT: s_getpc_b64 s[4:5] +; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_void@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_void@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GFX9-NEXT: ;;#ASMSTART ; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s4, v40, 2 -; GFX9-NEXT: v_readlane_b32 s5, v40, 3 -; GFX9-NEXT: v_readlane_b32 s35, v40, 1 -; GFX9-NEXT: v_readlane_b32 s34, v40, 0 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] +; GFX9-NEXT: v_readlane_b32 s30, v40, 2 +; GFX9-NEXT: v_readlane_b32 s31, v40, 3 +; GFX9-NEXT: v_readlane_b32 s5, v40, 1 +; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 4 -; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[6:7] +; GFX9-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: s_setpc_b64 s[4:5] +; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: test_call_external_void_func_void_clobber_s30_s31_call_external_void_func_void: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_or_saveexec_b32 s4, -1 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s4 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 ; GFX10-NEXT: v_writelane_b32 v40, s33, 4 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: v_writelane_b32 v40, s34, 0 -; GFX10-NEXT: v_writelane_b32 v40, s35, 1 -; GFX10-NEXT: s_getpc_b64 s[34:35] -; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_void@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_void@rel32@hi+12 +; GFX10-NEXT: v_writelane_b32 v40, s4, 0 +; GFX10-NEXT: v_writelane_b32 v40, s5, 1 +; GFX10-NEXT: s_getpc_b64 s[4:5] +; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_void@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s5, s5, external_void_func_void@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s30, 2 ; GFX10-NEXT: v_writelane_b32 v40, s31, 3 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] +; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ;;#ASMEND -; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s4, v40, 2 -; GFX10-NEXT: v_readlane_b32 s5, v40, 3 -; GFX10-NEXT: v_readlane_b32 s35, v40, 1 -; GFX10-NEXT: v_readlane_b32 s34, v40, 0 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5] +; GFX10-NEXT: v_readlane_b32 s30, v40, 2 +; GFX10-NEXT: v_readlane_b32 s31, v40, 3 +; GFX10-NEXT: v_readlane_b32 s5, v40, 1 +; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 4 -; GFX10-NEXT: s_or_saveexec_b32 s6, -1 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s6 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: s_setpc_b64 s[4:5] +; GFX10-NEXT: s_setpc_b64 s[30:31] call amdgpu_gfx void @external_void_func_void() call void asm sideeffect "", ""() #0 call amdgpu_gfx void @external_void_func_void() @@ -81,21 +81,21 @@ define amdgpu_gfx void @void_func_void_clobber_s30_s31() #1 { ; GFX9-LABEL: void_func_void_clobber_s30_s31: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_mov_b64 s[4:5], s[30:31] +; GFX9-NEXT: s_mov_b64 s[36:37], s[30:31] ; GFX9-NEXT: ;;#ASMSTART ; GFX9-NEXT: ; clobber ; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[4:5] +; GFX9-NEXT: s_setpc_b64 s[36:37] ; ; GFX10-LABEL: void_func_void_clobber_s30_s31: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_mov_b64 s[4:5], s[30:31] +; GFX10-NEXT: s_mov_b64 s[36:37], s[30:31] ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ; clobber ; GFX10-NEXT: ;;#ASMEND -; GFX10-NEXT: s_setpc_b64 s[4:5] +; GFX10-NEXT: s_setpc_b64 s[36:37] call void asm sideeffect "; clobber", "~{s[30:31]}"() #0 ret void } @@ -104,75 +104,75 @@ define amdgpu_gfx void @test_call_void_func_void_mayclobber_s31(i32 addrspace(1) ; GFX9-LABEL: test_call_void_func_void_mayclobber_s31: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[4:5] +; GFX9-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-NEXT: v_writelane_b32 v40, s33, 3 -; GFX9-NEXT: v_writelane_b32 v40, s34, 0 +; GFX9-NEXT: v_writelane_b32 v40, s4, 0 +; GFX9-NEXT: v_writelane_b32 v40, s30, 1 ; GFX9-NEXT: s_mov_b32 s33, s32 ; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s30, 1 ; GFX9-NEXT: v_writelane_b32 v40, s31, 2 ; GFX9-NEXT: ;;#ASMSTART ; GFX9-NEXT: ; def s31 ; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_mov_b32 s34, s31 -; GFX9-NEXT: s_getpc_b64 s[4:5] -; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_void@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_void@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GFX9-NEXT: v_readlane_b32 s4, v40, 1 -; GFX9-NEXT: s_mov_b32 s31, s34 +; GFX9-NEXT: s_mov_b32 s4, s31 +; GFX9-NEXT: s_getpc_b64 s[30:31] +; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_void@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_void@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX9-NEXT: s_mov_b32 s31, s4 ; GFX9-NEXT: ;;#ASMSTART ; GFX9-NEXT: ; use s31 ; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: v_readlane_b32 s5, v40, 2 -; GFX9-NEXT: v_readlane_b32 s34, v40, 0 +; GFX9-NEXT: v_readlane_b32 s30, v40, 1 +; GFX9-NEXT: v_readlane_b32 s31, v40, 2 +; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 3 -; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[6:7] +; GFX9-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: s_setpc_b64 s[4:5] +; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: test_call_void_func_void_mayclobber_s31: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_or_saveexec_b32 s4, -1 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s4 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 ; GFX10-NEXT: v_writelane_b32 v40, s33, 3 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: s_getpc_b64 s[4:5] -; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_void@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s5, s5, external_void_func_void@rel32@hi+12 -; GFX10-NEXT: v_writelane_b32 v40, s34, 0 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_void@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_void@rel32@hi+12 +; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: v_writelane_b32 v40, s30, 1 ; GFX10-NEXT: v_writelane_b32 v40, s31, 2 ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ; def s31 ; GFX10-NEXT: ;;#ASMEND -; GFX10-NEXT: s_mov_b32 s34, s31 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GFX10-NEXT: v_readlane_b32 s4, v40, 1 -; GFX10-NEXT: s_mov_b32 s31, s34 +; GFX10-NEXT: s_mov_b32 s4, s31 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] +; GFX10-NEXT: s_mov_b32 s31, s4 ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ; use s31 ; GFX10-NEXT: ;;#ASMEND -; GFX10-NEXT: v_readlane_b32 s5, v40, 2 -; GFX10-NEXT: v_readlane_b32 s34, v40, 0 +; GFX10-NEXT: v_readlane_b32 s30, v40, 1 +; GFX10-NEXT: v_readlane_b32 s31, v40, 2 +; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 3 -; GFX10-NEXT: s_or_saveexec_b32 s6, -1 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s6 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: s_setpc_b64 s[4:5] +; GFX10-NEXT: s_setpc_b64 s[30:31] %s31 = call i32 asm sideeffect "; def $0", "={s31}"() call amdgpu_gfx void @external_void_func_void() call void asm sideeffect "; use $0", "{s31}"(i32 %s31) @@ -183,9 +183,9 @@ define amdgpu_gfx void @test_call_void_func_void_mayclobber_v31(i32 addrspace(1) ; GFX9-LABEL: test_call_void_func_void_mayclobber_v31: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[4:5] +; GFX9-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-NEXT: v_writelane_b32 v40, s33, 2 ; GFX9-NEXT: s_mov_b32 s33, s32 ; GFX9-NEXT: s_addk_i32 s32, 0x400 @@ -196,33 +196,33 @@ define amdgpu_gfx void @test_call_void_func_void_mayclobber_v31(i32 addrspace(1) ; GFX9-NEXT: ; def v31 ; GFX9-NEXT: ;;#ASMEND ; GFX9-NEXT: v_mov_b32_e32 v41, v31 -; GFX9-NEXT: s_getpc_b64 s[4:5] -; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_void@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_void@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] +; GFX9-NEXT: s_getpc_b64 s[30:31] +; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_void@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_void@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] ; GFX9-NEXT: v_mov_b32_e32 v31, v41 ; GFX9-NEXT: ;;#ASMSTART ; GFX9-NEXT: ; use v31 ; GFX9-NEXT: ;;#ASMEND ; GFX9-NEXT: buffer_load_dword v41, off, s[0:3], s33 ; 4-byte Folded Reload -; GFX9-NEXT: v_readlane_b32 s4, v40, 0 -; GFX9-NEXT: v_readlane_b32 s5, v40, 1 +; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 -; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[6:7] +; GFX9-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: s_setpc_b64 s[4:5] +; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: test_call_void_func_void_mayclobber_v31: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_or_saveexec_b32 s4, -1 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s4 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 ; GFX10-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 @@ -232,26 +232,26 @@ define amdgpu_gfx void @test_call_void_func_void_mayclobber_v31(i32 addrspace(1) ; GFX10-NEXT: ;;#ASMEND ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_mov_b32_e32 v41, v31 -; GFX10-NEXT: s_getpc_b64 s[4:5] -; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_void@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s5, s5, external_void_func_void@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5] +; GFX10-NEXT: s_getpc_b64 s[30:31] +; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_void@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_void@rel32@hi+12 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] ; GFX10-NEXT: v_mov_b32_e32 v31, v41 ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ; use v31 ; GFX10-NEXT: ;;#ASMEND ; GFX10-NEXT: buffer_load_dword v41, off, s[0:3], s33 ; 4-byte Folded Reload -; GFX10-NEXT: v_readlane_b32 s4, v40, 0 -; GFX10-NEXT: v_readlane_b32 s5, v40, 1 +; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-NEXT: s_or_saveexec_b32 s6, -1 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s6 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: s_setpc_b64 s[4:5] +; GFX10-NEXT: s_setpc_b64 s[30:31] %v31 = call i32 asm sideeffect "; def $0", "={v31}"() call amdgpu_gfx void @external_void_func_void() call void asm sideeffect "; use $0", "{v31}"(i32 %v31) @@ -263,67 +263,75 @@ define amdgpu_gfx void @test_call_void_func_void_preserves_s33(i32 addrspace(1)* ; GFX9-LABEL: test_call_void_func_void_preserves_s33: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[4:5] -; GFX9-NEXT: v_writelane_b32 v40, s33, 2 +; GFX9-NEXT: s_mov_b64 exec, s[34:35] +; GFX9-NEXT: v_writelane_b32 v40, s33, 3 +; GFX9-NEXT: v_writelane_b32 v40, s4, 0 ; GFX9-NEXT: s_mov_b32 s33, s32 ; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s30, 0 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 +; GFX9-NEXT: v_writelane_b32 v40, s30, 1 +; GFX9-NEXT: v_writelane_b32 v40, s31, 2 ; GFX9-NEXT: ;;#ASMSTART ; GFX9-NEXT: ; def s33 ; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_getpc_b64 s[4:5] -; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_void@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_void@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GFX9-NEXT: v_readlane_b32 s4, v40, 0 +; GFX9-NEXT: s_mov_b32 s4, s33 +; GFX9-NEXT: s_getpc_b64 s[30:31] +; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_void@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_void@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX9-NEXT: s_mov_b32 s33, s4 +; GFX9-NEXT: v_readlane_b32 s30, v40, 1 ; GFX9-NEXT: ;;#ASMSTART ; GFX9-NEXT: ; use s33 ; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: v_readlane_b32 s5, v40, 1 +; GFX9-NEXT: v_readlane_b32 s31, v40, 2 +; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 -; GFX9-NEXT: v_readlane_b32 s33, v40, 2 -; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GFX9-NEXT: v_readlane_b32 s33, v40, 3 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[6:7] +; GFX9-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: s_setpc_b64 s[4:5] +; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: test_call_void_func_void_preserves_s33: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_or_saveexec_b32 s4, -1 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s4 -; GFX10-NEXT: v_writelane_b32 v40, s33, 2 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 +; GFX10-NEXT: v_writelane_b32 v40, s33, 3 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: s_getpc_b64 s[4:5] -; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_void@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s5, s5, external_void_func_void@rel32@hi+12 ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ; def s33 ; GFX10-NEXT: ;;#ASMEND -; GFX10-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GFX10-NEXT: v_readlane_b32 s4, v40, 0 +; GFX10-NEXT: v_writelane_b32 v40, s4, 0 +; GFX10-NEXT: s_mov_b32 s4, s33 +; GFX10-NEXT: v_writelane_b32 v40, s30, 1 +; GFX10-NEXT: v_writelane_b32 v40, s31, 2 +; GFX10-NEXT: s_getpc_b64 s[30:31] +; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_void@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_void@rel32@hi+12 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX10-NEXT: s_mov_b32 s33, s4 +; GFX10-NEXT: v_readlane_b32 s30, v40, 1 ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ; use s33 ; GFX10-NEXT: ;;#ASMEND -; GFX10-NEXT: v_readlane_b32 s5, v40, 1 +; GFX10-NEXT: v_readlane_b32 s31, v40, 2 +; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 -; GFX10-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-NEXT: s_or_saveexec_b32 s6, -1 +; GFX10-NEXT: v_readlane_b32 s33, v40, 3 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s6 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: s_setpc_b64 s[4:5] +; GFX10-NEXT: s_setpc_b64 s[30:31] %s33 = call i32 asm sideeffect "; def $0", "={s33}"() call amdgpu_gfx void @external_void_func_void() call void asm sideeffect "; use $0", "{s33}"(i32 %s33) @@ -334,11 +342,11 @@ define amdgpu_gfx void @test_call_void_func_void_preserves_s34(i32 addrspace(1)* ; GFX9-LABEL: test_call_void_func_void_preserves_s34: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[4:5] +; GFX9-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-NEXT: v_writelane_b32 v40, s33, 3 -; GFX9-NEXT: v_writelane_b32 v40, s34, 0 +; GFX9-NEXT: v_writelane_b32 v40, s4, 0 ; GFX9-NEXT: s_mov_b32 s33, s32 ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 1 @@ -346,59 +354,63 @@ define amdgpu_gfx void @test_call_void_func_void_preserves_s34(i32 addrspace(1)* ; GFX9-NEXT: ;;#ASMSTART ; GFX9-NEXT: ; def s34 ; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_getpc_b64 s[4:5] -; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_void@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_void@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GFX9-NEXT: v_readlane_b32 s4, v40, 1 +; GFX9-NEXT: s_mov_b32 s4, s34 +; GFX9-NEXT: s_getpc_b64 s[30:31] +; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_void@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_void@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX9-NEXT: v_readlane_b32 s30, v40, 1 +; GFX9-NEXT: s_mov_b32 s34, s4 ; GFX9-NEXT: ;;#ASMSTART ; GFX9-NEXT: ; use s34 ; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: v_readlane_b32 s5, v40, 2 -; GFX9-NEXT: v_readlane_b32 s34, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 2 +; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 3 -; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[6:7] +; GFX9-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: s_setpc_b64 s[4:5] +; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: test_call_void_func_void_preserves_s34: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_or_saveexec_b32 s4, -1 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s4 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 ; GFX10-NEXT: v_writelane_b32 v40, s33, 3 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: s_getpc_b64 s[4:5] -; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_void@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s5, s5, external_void_func_void@rel32@hi+12 -; GFX10-NEXT: v_writelane_b32 v40, s34, 0 ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ; def s34 ; GFX10-NEXT: ;;#ASMEND +; GFX10-NEXT: v_writelane_b32 v40, s4, 0 +; GFX10-NEXT: s_mov_b32 s4, s34 ; GFX10-NEXT: v_writelane_b32 v40, s30, 1 ; GFX10-NEXT: v_writelane_b32 v40, s31, 2 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GFX10-NEXT: v_readlane_b32 s4, v40, 1 +; GFX10-NEXT: s_getpc_b64 s[30:31] +; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_void@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_void@rel32@hi+12 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX10-NEXT: v_readlane_b32 s30, v40, 1 +; GFX10-NEXT: s_mov_b32 s34, s4 ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ; use s34 ; GFX10-NEXT: ;;#ASMEND -; GFX10-NEXT: v_readlane_b32 s5, v40, 2 -; GFX10-NEXT: v_readlane_b32 s34, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 2 +; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 3 -; GFX10-NEXT: s_or_saveexec_b32 s6, -1 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s6 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: s_setpc_b64 s[4:5] +; GFX10-NEXT: s_setpc_b64 s[30:31] %s34 = call i32 asm sideeffect "; def $0", "={s34}"() call amdgpu_gfx void @external_void_func_void() call void asm sideeffect "; use $0", "{s34}"(i32 %s34) @@ -409,9 +421,9 @@ define amdgpu_gfx void @test_call_void_func_void_preserves_v40(i32 addrspace(1)* ; GFX9-LABEL: test_call_void_func_void_preserves_v40: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[4:5] +; GFX9-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-NEXT: v_writelane_b32 v41, s33, 2 ; GFX9-NEXT: s_mov_b32 s33, s32 ; GFX9-NEXT: s_addk_i32 s32, 0x400 @@ -421,32 +433,32 @@ define amdgpu_gfx void @test_call_void_func_void_preserves_v40(i32 addrspace(1)* ; GFX9-NEXT: ;;#ASMSTART ; GFX9-NEXT: ; def v40 ; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_getpc_b64 s[4:5] -; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_void@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_void@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] +; GFX9-NEXT: s_getpc_b64 s[30:31] +; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_void@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_void@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] ; GFX9-NEXT: ;;#ASMSTART ; GFX9-NEXT: ; use v40 ; GFX9-NEXT: ;;#ASMEND ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload -; GFX9-NEXT: v_readlane_b32 s4, v41, 0 -; GFX9-NEXT: v_readlane_b32 s5, v41, 1 +; GFX9-NEXT: v_readlane_b32 s30, v41, 0 +; GFX9-NEXT: v_readlane_b32 s31, v41, 1 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v41, 2 -; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_load_dword v41, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[6:7] +; GFX9-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: s_setpc_b64 s[4:5] +; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: test_call_void_func_void_preserves_v40: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_or_saveexec_b32 s4, -1 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 ; GFX10-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s4 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 ; GFX10-NEXT: v_writelane_b32 v41, s33, 2 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 @@ -455,25 +467,25 @@ define amdgpu_gfx void @test_call_void_func_void_preserves_v40(i32 addrspace(1)* ; GFX10-NEXT: ; def v40 ; GFX10-NEXT: ;;#ASMEND ; GFX10-NEXT: v_writelane_b32 v41, s30, 0 -; GFX10-NEXT: s_getpc_b64 s[4:5] -; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_void@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s5, s5, external_void_func_void@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v41, s31, 1 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5] +; GFX10-NEXT: s_getpc_b64 s[30:31] +; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_void@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_void@rel32@hi+12 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ; use v40 ; GFX10-NEXT: ;;#ASMEND ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload -; GFX10-NEXT: v_readlane_b32 s4, v41, 0 -; GFX10-NEXT: v_readlane_b32 s5, v41, 1 +; GFX10-NEXT: v_readlane_b32 s30, v41, 0 +; GFX10-NEXT: v_readlane_b32 s31, v41, 1 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v41, 2 -; GFX10-NEXT: s_or_saveexec_b32 s6, -1 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 ; GFX10-NEXT: buffer_load_dword v41, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s6 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: s_setpc_b64 s[4:5] +; GFX10-NEXT: s_setpc_b64 s[30:31] %v40 = call i32 asm sideeffect "; def $0", "={v40}"() call amdgpu_gfx void @external_void_func_void() call void asm sideeffect "; use $0", "{v40}"(i32 %v40) @@ -568,55 +580,55 @@ define amdgpu_gfx void @test_call_void_func_void_clobber_s33() #0 { ; GFX9-LABEL: test_call_void_func_void_clobber_s33: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[4:5] +; GFX9-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-NEXT: v_writelane_b32 v40, s33, 2 ; GFX9-NEXT: s_mov_b32 s33, s32 ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[4:5] -; GFX9-NEXT: s_add_u32 s4, s4, void_func_void_clobber_s33@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s5, s5, void_func_void_clobber_s33@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GFX9-NEXT: v_readlane_b32 s4, v40, 0 -; GFX9-NEXT: v_readlane_b32 s5, v40, 1 +; GFX9-NEXT: s_getpc_b64 s[30:31] +; GFX9-NEXT: s_add_u32 s30, s30, void_func_void_clobber_s33@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s31, s31, void_func_void_clobber_s33@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 -; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[6:7] +; GFX9-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: s_setpc_b64 s[4:5] +; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: test_call_void_func_void_clobber_s33: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_or_saveexec_b32 s4, -1 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s4 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 ; GFX10-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: s_getpc_b64 s[4:5] -; GFX10-NEXT: s_add_u32 s4, s4, void_func_void_clobber_s33@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s5, s5, void_func_void_clobber_s33@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GFX10-NEXT: v_readlane_b32 s4, v40, 0 -; GFX10-NEXT: v_readlane_b32 s5, v40, 1 +; GFX10-NEXT: s_getpc_b64 s[30:31] +; GFX10-NEXT: s_add_u32 s30, s30, void_func_void_clobber_s33@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s31, s31, void_func_void_clobber_s33@rel32@hi+12 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-NEXT: s_or_saveexec_b32 s6, -1 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s6 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: s_setpc_b64 s[4:5] +; GFX10-NEXT: s_setpc_b64 s[30:31] call amdgpu_gfx void @void_func_void_clobber_s33() ret void } @@ -625,55 +637,55 @@ define amdgpu_gfx void @test_call_void_func_void_clobber_s34() #0 { ; GFX9-LABEL: test_call_void_func_void_clobber_s34: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[4:5] +; GFX9-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-NEXT: v_writelane_b32 v40, s33, 2 ; GFX9-NEXT: s_mov_b32 s33, s32 ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[4:5] -; GFX9-NEXT: s_add_u32 s4, s4, void_func_void_clobber_s34@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s5, s5, void_func_void_clobber_s34@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GFX9-NEXT: v_readlane_b32 s4, v40, 0 -; GFX9-NEXT: v_readlane_b32 s5, v40, 1 +; GFX9-NEXT: s_getpc_b64 s[30:31] +; GFX9-NEXT: s_add_u32 s30, s30, void_func_void_clobber_s34@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s31, s31, void_func_void_clobber_s34@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 -; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[6:7] +; GFX9-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: s_setpc_b64 s[4:5] +; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: test_call_void_func_void_clobber_s34: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_or_saveexec_b32 s4, -1 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s4 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 ; GFX10-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: s_getpc_b64 s[4:5] -; GFX10-NEXT: s_add_u32 s4, s4, void_func_void_clobber_s34@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s5, s5, void_func_void_clobber_s34@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GFX10-NEXT: v_readlane_b32 s4, v40, 0 -; GFX10-NEXT: v_readlane_b32 s5, v40, 1 +; GFX10-NEXT: s_getpc_b64 s[30:31] +; GFX10-NEXT: s_add_u32 s30, s30, void_func_void_clobber_s34@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s31, s31, void_func_void_clobber_s34@rel32@hi+12 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 -; GFX10-NEXT: s_or_saveexec_b32 s6, -1 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s6 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: s_setpc_b64 s[4:5] +; GFX10-NEXT: s_setpc_b64 s[30:31] call amdgpu_gfx void @void_func_void_clobber_s34() ret void } @@ -682,11 +694,11 @@ define amdgpu_gfx void @callee_saved_sgpr_kernel() #1 { ; GFX9-LABEL: callee_saved_sgpr_kernel: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[4:5] +; GFX9-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-NEXT: v_writelane_b32 v40, s33, 3 -; GFX9-NEXT: v_writelane_b32 v40, s40, 0 +; GFX9-NEXT: v_writelane_b32 v40, s4, 0 ; GFX9-NEXT: s_mov_b32 s33, s32 ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 1 @@ -694,59 +706,61 @@ define amdgpu_gfx void @callee_saved_sgpr_kernel() #1 { ; GFX9-NEXT: ;;#ASMSTART ; GFX9-NEXT: ; def s40 ; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_getpc_b64 s[4:5] -; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_void@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_void@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GFX9-NEXT: v_readlane_b32 s4, v40, 1 +; GFX9-NEXT: s_mov_b32 s4, s40 +; GFX9-NEXT: s_getpc_b64 s[30:31] +; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_void@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_void@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX9-NEXT: v_readlane_b32 s30, v40, 1 ; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s40 +; GFX9-NEXT: ; use s4 ; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: v_readlane_b32 s5, v40, 2 -; GFX9-NEXT: v_readlane_b32 s40, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 2 +; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 3 -; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[6:7] +; GFX9-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: s_setpc_b64 s[4:5] +; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: callee_saved_sgpr_kernel: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_or_saveexec_b32 s4, -1 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s4 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 ; GFX10-NEXT: v_writelane_b32 v40, s33, 3 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: s_getpc_b64 s[4:5] -; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_void@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s5, s5, external_void_func_void@rel32@hi+12 -; GFX10-NEXT: v_writelane_b32 v40, s40, 0 ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ; def s40 ; GFX10-NEXT: ;;#ASMEND +; GFX10-NEXT: v_writelane_b32 v40, s4, 0 +; GFX10-NEXT: s_mov_b32 s4, s40 ; GFX10-NEXT: v_writelane_b32 v40, s30, 1 ; GFX10-NEXT: v_writelane_b32 v40, s31, 2 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GFX10-NEXT: v_readlane_b32 s4, v40, 1 +; GFX10-NEXT: s_getpc_b64 s[30:31] +; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_void@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_void@rel32@hi+12 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX10-NEXT: v_readlane_b32 s30, v40, 1 ; GFX10-NEXT: ;;#ASMSTART -; GFX10-NEXT: ; use s40 +; GFX10-NEXT: ; use s4 ; GFX10-NEXT: ;;#ASMEND -; GFX10-NEXT: v_readlane_b32 s5, v40, 2 -; GFX10-NEXT: v_readlane_b32 s40, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 2 +; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 3 -; GFX10-NEXT: s_or_saveexec_b32 s6, -1 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s6 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: s_setpc_b64 s[4:5] +; GFX10-NEXT: s_setpc_b64 s[30:31] %s40 = call i32 asm sideeffect "; def s40", "={s40}"() #0 call amdgpu_gfx void @external_void_func_void() call void asm sideeffect "; use $0", "s"(i32 %s40) #0 @@ -757,11 +771,11 @@ define amdgpu_gfx void @callee_saved_sgpr_vgpr_kernel() #1 { ; GFX9-LABEL: callee_saved_sgpr_vgpr_kernel: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[4:5] +; GFX9-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-NEXT: v_writelane_b32 v40, s33, 3 -; GFX9-NEXT: v_writelane_b32 v40, s40, 0 +; GFX9-NEXT: v_writelane_b32 v40, s4, 0 ; GFX9-NEXT: s_mov_b32 s33, s32 ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 1 @@ -770,76 +784,78 @@ define amdgpu_gfx void @callee_saved_sgpr_vgpr_kernel() #1 { ; GFX9-NEXT: ;;#ASMSTART ; GFX9-NEXT: ; def s40 ; GFX9-NEXT: ;;#ASMEND +; GFX9-NEXT: s_mov_b32 s4, s40 ; GFX9-NEXT: ;;#ASMSTART ; GFX9-NEXT: ; def v32 ; GFX9-NEXT: ;;#ASMEND ; GFX9-NEXT: v_mov_b32_e32 v41, v32 -; GFX9-NEXT: s_getpc_b64 s[4:5] -; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_void@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_void@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] +; GFX9-NEXT: s_getpc_b64 s[30:31] +; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_void@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_void@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] ; GFX9-NEXT: ;;#ASMSTART -; GFX9-NEXT: ; use s40 +; GFX9-NEXT: ; use s4 ; GFX9-NEXT: ;;#ASMEND ; GFX9-NEXT: ;;#ASMSTART ; GFX9-NEXT: ; use v41 ; GFX9-NEXT: ;;#ASMEND ; GFX9-NEXT: buffer_load_dword v41, off, s[0:3], s33 ; 4-byte Folded Reload -; GFX9-NEXT: v_readlane_b32 s4, v40, 1 -; GFX9-NEXT: v_readlane_b32 s5, v40, 2 -; GFX9-NEXT: v_readlane_b32 s40, v40, 0 +; GFX9-NEXT: v_readlane_b32 s30, v40, 1 +; GFX9-NEXT: v_readlane_b32 s31, v40, 2 +; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 3 -; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[6:7] +; GFX9-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: s_setpc_b64 s[4:5] +; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: callee_saved_sgpr_vgpr_kernel: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_or_saveexec_b32 s4, -1 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s4 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 ; GFX10-NEXT: v_writelane_b32 v40, s33, 3 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: buffer_store_dword v41, off, s[0:3], s33 ; 4-byte Folded Spill -; GFX10-NEXT: s_getpc_b64 s[4:5] -; GFX10-NEXT: s_add_u32 s4, s4, external_void_func_void@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s5, s5, external_void_func_void@rel32@hi+12 -; GFX10-NEXT: v_writelane_b32 v40, s40, 0 ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ; def s40 ; GFX10-NEXT: ;;#ASMEND +; GFX10-NEXT: v_writelane_b32 v40, s4, 0 +; GFX10-NEXT: s_mov_b32 s4, s40 ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ; def v32 ; GFX10-NEXT: ;;#ASMEND ; GFX10-NEXT: v_mov_b32_e32 v41, v32 ; GFX10-NEXT: v_writelane_b32 v40, s30, 1 ; GFX10-NEXT: v_writelane_b32 v40, s31, 2 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5] +; GFX10-NEXT: s_getpc_b64 s[30:31] +; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_void@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_void@rel32@hi+12 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] ; GFX10-NEXT: ;;#ASMSTART -; GFX10-NEXT: ; use s40 +; GFX10-NEXT: ; use s4 ; GFX10-NEXT: ;;#ASMEND ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ; use v41 ; GFX10-NEXT: ;;#ASMEND ; GFX10-NEXT: buffer_load_dword v41, off, s[0:3], s33 ; 4-byte Folded Reload -; GFX10-NEXT: v_readlane_b32 s4, v40, 1 -; GFX10-NEXT: v_readlane_b32 s5, v40, 2 -; GFX10-NEXT: v_readlane_b32 s40, v40, 0 +; GFX10-NEXT: v_readlane_b32 s30, v40, 1 +; GFX10-NEXT: v_readlane_b32 s31, v40, 2 +; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 3 -; GFX10-NEXT: s_or_saveexec_b32 s6, -1 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s6 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: s_setpc_b64 s[4:5] +; GFX10-NEXT: s_setpc_b64 s[30:31] %s40 = call i32 asm sideeffect "; def s40", "={s40}"() #0 %v32 = call i32 asm sideeffect "; def v32", "={v32}"() #0 call amdgpu_gfx void @external_void_func_void() diff --git a/llvm/test/CodeGen/AMDGPU/gfx-callable-return-types.ll b/llvm/test/CodeGen/AMDGPU/gfx-callable-return-types.ll index d5c769a9f12e1..7eca95becc00a 100644 --- a/llvm/test/CodeGen/AMDGPU/gfx-callable-return-types.ll +++ b/llvm/test/CodeGen/AMDGPU/gfx-callable-return-types.ll @@ -23,27 +23,27 @@ define amdgpu_gfx void @call_i1() #0 { ; GFX9-LABEL: call_i1: ; GFX9: ; %bb.0: ; %entry ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_getpc_b64 s[6:7] -; GFX9-NEXT: s_add_u32 s6, s6, return_i1@gotpcrel32@lo+4 -; GFX9-NEXT: s_addc_u32 s7, s7, return_i1@gotpcrel32@hi+12 -; GFX9-NEXT: s_load_dwordx2 s[6:7], s[6:7], 0x0 -; GFX9-NEXT: s_mov_b64 s[4:5], s[30:31] +; GFX9-NEXT: s_mov_b64 s[36:37], s[30:31] +; GFX9-NEXT: s_getpc_b64 s[30:31] +; GFX9-NEXT: s_add_u32 s30, s30, return_i1@gotpcrel32@lo+4 +; GFX9-NEXT: s_addc_u32 s31, s31, return_i1@gotpcrel32@hi+12 +; GFX9-NEXT: s_load_dwordx2 s[30:31], s[30:31], 0x0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: s_swappc_b64 s[30:31], s[6:7] -; GFX9-NEXT: s_setpc_b64 s[4:5] +; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX9-NEXT: s_setpc_b64 s[36:37] ; ; GFX10-LABEL: call_i1: ; GFX10: ; %bb.0: ; %entry ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_getpc_b64 s[6:7] -; GFX10-NEXT: s_add_u32 s6, s6, return_i1@gotpcrel32@lo+4 -; GFX10-NEXT: s_addc_u32 s7, s7, return_i1@gotpcrel32@hi+12 -; GFX10-NEXT: s_mov_b64 s[4:5], s[30:31] -; GFX10-NEXT: s_load_dwordx2 s[6:7], s[6:7], 0x0 +; GFX10-NEXT: s_mov_b64 s[36:37], s[30:31] +; GFX10-NEXT: s_getpc_b64 s[30:31] +; GFX10-NEXT: s_add_u32 s30, s30, return_i1@gotpcrel32@lo+4 +; GFX10-NEXT: s_addc_u32 s31, s31, return_i1@gotpcrel32@hi+12 +; GFX10-NEXT: s_load_dwordx2 s[30:31], s[30:31], 0x0 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-NEXT: s_swappc_b64 s[30:31], s[6:7] -; GFX10-NEXT: s_setpc_b64 s[4:5] +; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX10-NEXT: s_setpc_b64 s[36:37] entry: call amdgpu_gfx i1 @return_i1() ret void @@ -70,27 +70,27 @@ define amdgpu_gfx void @call_i16() #0 { ; GFX9-LABEL: call_i16: ; GFX9: ; %bb.0: ; %entry ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_getpc_b64 s[6:7] -; GFX9-NEXT: s_add_u32 s6, s6, return_i16@gotpcrel32@lo+4 -; GFX9-NEXT: s_addc_u32 s7, s7, return_i16@gotpcrel32@hi+12 -; GFX9-NEXT: s_load_dwordx2 s[6:7], s[6:7], 0x0 -; GFX9-NEXT: s_mov_b64 s[4:5], s[30:31] +; GFX9-NEXT: s_mov_b64 s[36:37], s[30:31] +; GFX9-NEXT: s_getpc_b64 s[30:31] +; GFX9-NEXT: s_add_u32 s30, s30, return_i16@gotpcrel32@lo+4 +; GFX9-NEXT: s_addc_u32 s31, s31, return_i16@gotpcrel32@hi+12 +; GFX9-NEXT: s_load_dwordx2 s[30:31], s[30:31], 0x0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: s_swappc_b64 s[30:31], s[6:7] -; GFX9-NEXT: s_setpc_b64 s[4:5] +; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX9-NEXT: s_setpc_b64 s[36:37] ; ; GFX10-LABEL: call_i16: ; GFX10: ; %bb.0: ; %entry ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_getpc_b64 s[6:7] -; GFX10-NEXT: s_add_u32 s6, s6, return_i16@gotpcrel32@lo+4 -; GFX10-NEXT: s_addc_u32 s7, s7, return_i16@gotpcrel32@hi+12 -; GFX10-NEXT: s_mov_b64 s[4:5], s[30:31] -; GFX10-NEXT: s_load_dwordx2 s[6:7], s[6:7], 0x0 +; GFX10-NEXT: s_mov_b64 s[36:37], s[30:31] +; GFX10-NEXT: s_getpc_b64 s[30:31] +; GFX10-NEXT: s_add_u32 s30, s30, return_i16@gotpcrel32@lo+4 +; GFX10-NEXT: s_addc_u32 s31, s31, return_i16@gotpcrel32@hi+12 +; GFX10-NEXT: s_load_dwordx2 s[30:31], s[30:31], 0x0 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-NEXT: s_swappc_b64 s[30:31], s[6:7] -; GFX10-NEXT: s_setpc_b64 s[4:5] +; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX10-NEXT: s_setpc_b64 s[36:37] entry: call amdgpu_gfx i16 @return_i16() ret void @@ -117,27 +117,27 @@ define amdgpu_gfx void @call_2xi16() #0 { ; GFX9-LABEL: call_2xi16: ; GFX9: ; %bb.0: ; %entry ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_getpc_b64 s[6:7] -; GFX9-NEXT: s_add_u32 s6, s6, return_2xi16@gotpcrel32@lo+4 -; GFX9-NEXT: s_addc_u32 s7, s7, return_2xi16@gotpcrel32@hi+12 -; GFX9-NEXT: s_load_dwordx2 s[6:7], s[6:7], 0x0 -; GFX9-NEXT: s_mov_b64 s[4:5], s[30:31] +; GFX9-NEXT: s_mov_b64 s[36:37], s[30:31] +; GFX9-NEXT: s_getpc_b64 s[30:31] +; GFX9-NEXT: s_add_u32 s30, s30, return_2xi16@gotpcrel32@lo+4 +; GFX9-NEXT: s_addc_u32 s31, s31, return_2xi16@gotpcrel32@hi+12 +; GFX9-NEXT: s_load_dwordx2 s[30:31], s[30:31], 0x0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: s_swappc_b64 s[30:31], s[6:7] -; GFX9-NEXT: s_setpc_b64 s[4:5] +; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX9-NEXT: s_setpc_b64 s[36:37] ; ; GFX10-LABEL: call_2xi16: ; GFX10: ; %bb.0: ; %entry ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_getpc_b64 s[6:7] -; GFX10-NEXT: s_add_u32 s6, s6, return_2xi16@gotpcrel32@lo+4 -; GFX10-NEXT: s_addc_u32 s7, s7, return_2xi16@gotpcrel32@hi+12 -; GFX10-NEXT: s_mov_b64 s[4:5], s[30:31] -; GFX10-NEXT: s_load_dwordx2 s[6:7], s[6:7], 0x0 +; GFX10-NEXT: s_mov_b64 s[36:37], s[30:31] +; GFX10-NEXT: s_getpc_b64 s[30:31] +; GFX10-NEXT: s_add_u32 s30, s30, return_2xi16@gotpcrel32@lo+4 +; GFX10-NEXT: s_addc_u32 s31, s31, return_2xi16@gotpcrel32@hi+12 +; GFX10-NEXT: s_load_dwordx2 s[30:31], s[30:31], 0x0 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-NEXT: s_swappc_b64 s[30:31], s[6:7] -; GFX10-NEXT: s_setpc_b64 s[4:5] +; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX10-NEXT: s_setpc_b64 s[36:37] entry: call amdgpu_gfx <2 x i16> @return_2xi16() ret void @@ -166,27 +166,27 @@ define amdgpu_gfx void @call_3xi16() #0 { ; GFX9-LABEL: call_3xi16: ; GFX9: ; %bb.0: ; %entry ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_getpc_b64 s[6:7] -; GFX9-NEXT: s_add_u32 s6, s6, return_3xi16@gotpcrel32@lo+4 -; GFX9-NEXT: s_addc_u32 s7, s7, return_3xi16@gotpcrel32@hi+12 -; GFX9-NEXT: s_load_dwordx2 s[6:7], s[6:7], 0x0 -; GFX9-NEXT: s_mov_b64 s[4:5], s[30:31] +; GFX9-NEXT: s_mov_b64 s[36:37], s[30:31] +; GFX9-NEXT: s_getpc_b64 s[30:31] +; GFX9-NEXT: s_add_u32 s30, s30, return_3xi16@gotpcrel32@lo+4 +; GFX9-NEXT: s_addc_u32 s31, s31, return_3xi16@gotpcrel32@hi+12 +; GFX9-NEXT: s_load_dwordx2 s[30:31], s[30:31], 0x0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: s_swappc_b64 s[30:31], s[6:7] -; GFX9-NEXT: s_setpc_b64 s[4:5] +; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX9-NEXT: s_setpc_b64 s[36:37] ; ; GFX10-LABEL: call_3xi16: ; GFX10: ; %bb.0: ; %entry ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_getpc_b64 s[6:7] -; GFX10-NEXT: s_add_u32 s6, s6, return_3xi16@gotpcrel32@lo+4 -; GFX10-NEXT: s_addc_u32 s7, s7, return_3xi16@gotpcrel32@hi+12 -; GFX10-NEXT: s_mov_b64 s[4:5], s[30:31] -; GFX10-NEXT: s_load_dwordx2 s[6:7], s[6:7], 0x0 +; GFX10-NEXT: s_mov_b64 s[36:37], s[30:31] +; GFX10-NEXT: s_getpc_b64 s[30:31] +; GFX10-NEXT: s_add_u32 s30, s30, return_3xi16@gotpcrel32@lo+4 +; GFX10-NEXT: s_addc_u32 s31, s31, return_3xi16@gotpcrel32@hi+12 +; GFX10-NEXT: s_load_dwordx2 s[30:31], s[30:31], 0x0 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-NEXT: s_swappc_b64 s[30:31], s[6:7] -; GFX10-NEXT: s_setpc_b64 s[4:5] +; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX10-NEXT: s_setpc_b64 s[36:37] entry: call amdgpu_gfx <3 x i16> @return_3xi16() ret void @@ -1241,41 +1241,41 @@ define amdgpu_gfx void @call_512xi32() #0 { ; GFX9-LABEL: call_512xi32: ; GFX9: ; %bb.0: ; %entry ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_mov_b32 s8, s33 +; GFX9-NEXT: s_mov_b32 s34, s33 ; GFX9-NEXT: s_add_i32 s33, s32, 0x1ffc0 ; GFX9-NEXT: s_and_b32 s33, s33, 0xfffe0000 ; GFX9-NEXT: s_add_i32 s32, s32, 0x60000 -; GFX9-NEXT: s_getpc_b64 s[6:7] -; GFX9-NEXT: s_add_u32 s6, s6, return_512xi32@gotpcrel32@lo+4 -; GFX9-NEXT: s_addc_u32 s7, s7, return_512xi32@gotpcrel32@hi+12 -; GFX9-NEXT: s_load_dwordx2 s[6:7], s[6:7], 0x0 +; GFX9-NEXT: s_mov_b64 s[36:37], s[30:31] +; GFX9-NEXT: s_getpc_b64 s[30:31] +; GFX9-NEXT: s_add_u32 s30, s30, return_512xi32@gotpcrel32@lo+4 +; GFX9-NEXT: s_addc_u32 s31, s31, return_512xi32@gotpcrel32@hi+12 +; GFX9-NEXT: s_load_dwordx2 s[30:31], s[30:31], 0x0 ; GFX9-NEXT: v_lshrrev_b32_e64 v0, 6, s33 -; GFX9-NEXT: s_mov_b64 s[4:5], s[30:31] ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: s_swappc_b64 s[30:31], s[6:7] +; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] ; GFX9-NEXT: s_add_i32 s32, s32, 0xfffa0000 -; GFX9-NEXT: s_mov_b32 s33, s8 -; GFX9-NEXT: s_setpc_b64 s[4:5] +; GFX9-NEXT: s_mov_b32 s33, s34 +; GFX9-NEXT: s_setpc_b64 s[36:37] ; ; GFX10-LABEL: call_512xi32: ; GFX10: ; %bb.0: ; %entry ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_mov_b32 s8, s33 +; GFX10-NEXT: s_mov_b32 s34, s33 ; GFX10-NEXT: s_add_i32 s33, s32, 0xffe0 -; GFX10-NEXT: s_mov_b64 s[4:5], s[30:31] +; GFX10-NEXT: s_mov_b64 s[36:37], s[30:31] ; GFX10-NEXT: s_and_b32 s33, s33, 0xffff0000 ; GFX10-NEXT: s_add_i32 s32, s32, 0x30000 -; GFX10-NEXT: s_getpc_b64 s[6:7] -; GFX10-NEXT: s_add_u32 s6, s6, return_512xi32@gotpcrel32@lo+4 -; GFX10-NEXT: s_addc_u32 s7, s7, return_512xi32@gotpcrel32@hi+12 +; GFX10-NEXT: s_getpc_b64 s[30:31] +; GFX10-NEXT: s_add_u32 s30, s30, return_512xi32@gotpcrel32@lo+4 +; GFX10-NEXT: s_addc_u32 s31, s31, return_512xi32@gotpcrel32@hi+12 ; GFX10-NEXT: v_lshrrev_b32_e64 v0, 5, s33 -; GFX10-NEXT: s_load_dwordx2 s[6:7], s[6:7], 0x0 +; GFX10-NEXT: s_load_dwordx2 s[30:31], s[30:31], 0x0 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-NEXT: s_swappc_b64 s[30:31], s[6:7] +; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] ; GFX10-NEXT: s_add_i32 s32, s32, 0xfffd0000 -; GFX10-NEXT: s_mov_b32 s33, s8 -; GFX10-NEXT: s_setpc_b64 s[4:5] +; GFX10-NEXT: s_mov_b32 s33, s34 +; GFX10-NEXT: s_setpc_b64 s[36:37] entry: call amdgpu_gfx <512 x i32> @return_512xi32() ret void diff --git a/llvm/test/CodeGen/AMDGPU/indirect-call.ll b/llvm/test/CodeGen/AMDGPU/indirect-call.ll index 7749499153492..43a306d9c5946 100644 --- a/llvm/test/CodeGen/AMDGPU/indirect-call.ll +++ b/llvm/test/CodeGen/AMDGPU/indirect-call.ll @@ -1074,41 +1074,90 @@ define void @test_indirect_call_vgpr_ptr_inreg_arg(void(i32)* %fptr) { ; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1 ; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill ; GCN-NEXT: s_mov_b64 exec, s[4:5] -; GCN-NEXT: v_writelane_b32 v40, s33, 6 +; GCN-NEXT: v_writelane_b32 v40, s33, 30 ; GCN-NEXT: s_mov_b32 s33, s32 ; GCN-NEXT: s_addk_i32 s32, 0x400 ; GCN-NEXT: v_writelane_b32 v40, s34, 0 ; GCN-NEXT: v_writelane_b32 v40, s35, 1 ; GCN-NEXT: v_writelane_b32 v40, s36, 2 ; GCN-NEXT: v_writelane_b32 v40, s37, 3 -; GCN-NEXT: v_writelane_b32 v40, s30, 4 -; GCN-NEXT: v_writelane_b32 v40, s31, 5 -; GCN-NEXT: s_mov_b64 s[34:35], exec -; GCN-NEXT: BB6_1: ; =>This Inner Loop Header: Depth=1 -; GCN-NEXT: v_readfirstlane_b32 s6, v0 -; GCN-NEXT: v_readfirstlane_b32 s7, v1 -; GCN-NEXT: v_cmp_eq_u64_e32 vcc, s[6:7], v[0:1] -; GCN-NEXT: s_and_saveexec_b64 s[36:37], vcc +; GCN-NEXT: v_writelane_b32 v40, s38, 4 +; GCN-NEXT: v_writelane_b32 v40, s39, 5 +; GCN-NEXT: v_writelane_b32 v40, s40, 6 +; GCN-NEXT: v_writelane_b32 v40, s41, 7 +; GCN-NEXT: v_writelane_b32 v40, s42, 8 +; GCN-NEXT: v_writelane_b32 v40, s43, 9 +; GCN-NEXT: v_writelane_b32 v40, s44, 10 +; GCN-NEXT: v_writelane_b32 v40, s45, 11 +; GCN-NEXT: v_writelane_b32 v40, s46, 12 +; GCN-NEXT: v_writelane_b32 v40, s47, 13 +; GCN-NEXT: v_writelane_b32 v40, s48, 14 +; GCN-NEXT: v_writelane_b32 v40, s49, 15 +; GCN-NEXT: v_writelane_b32 v40, s50, 16 +; GCN-NEXT: v_writelane_b32 v40, s51, 17 +; GCN-NEXT: v_writelane_b32 v40, s52, 18 +; GCN-NEXT: v_writelane_b32 v40, s53, 19 +; GCN-NEXT: v_writelane_b32 v40, s54, 20 +; GCN-NEXT: v_writelane_b32 v40, s55, 21 +; GCN-NEXT: v_writelane_b32 v40, s56, 22 +; GCN-NEXT: v_writelane_b32 v40, s57, 23 +; GCN-NEXT: v_writelane_b32 v40, s58, 24 +; GCN-NEXT: v_writelane_b32 v40, s59, 25 +; GCN-NEXT: v_writelane_b32 v40, s60, 26 +; GCN-NEXT: v_writelane_b32 v40, s61, 27 +; GCN-NEXT: v_writelane_b32 v40, s62, 28 +; GCN-NEXT: v_writelane_b32 v40, s63, 29 +; GCN-NEXT: s_mov_b64 s[6:7], s[30:31] +; GCN-NEXT: s_mov_b64 s[8:9], exec ; GCN-NEXT: s_movk_i32 s4, 0x7b -; GCN-NEXT: s_swappc_b64 s[30:31], s[6:7] +; GCN-NEXT: BB6_1: ; =>This Inner Loop Header: Depth=1 +; GCN-NEXT: v_readfirstlane_b32 s12, v0 +; GCN-NEXT: v_readfirstlane_b32 s13, v1 +; GCN-NEXT: v_cmp_eq_u64_e32 vcc, s[12:13], v[0:1] +; GCN-NEXT: s_and_saveexec_b64 s[10:11], vcc +; GCN-NEXT: s_swappc_b64 s[30:31], s[12:13] ; GCN-NEXT: ; implicit-def: $vgpr0_vgpr1 -; GCN-NEXT: s_xor_b64 exec, exec, s[36:37] +; GCN-NEXT: s_xor_b64 exec, exec, s[10:11] ; GCN-NEXT: s_cbranch_execnz BB6_1 ; GCN-NEXT: ; %bb.2: -; GCN-NEXT: s_mov_b64 exec, s[34:35] -; GCN-NEXT: v_readlane_b32 s4, v40, 4 -; GCN-NEXT: v_readlane_b32 s5, v40, 5 +; GCN-NEXT: s_mov_b64 exec, s[8:9] +; GCN-NEXT: v_readlane_b32 s63, v40, 29 +; GCN-NEXT: v_readlane_b32 s62, v40, 28 +; GCN-NEXT: v_readlane_b32 s61, v40, 27 +; GCN-NEXT: v_readlane_b32 s60, v40, 26 +; GCN-NEXT: v_readlane_b32 s59, v40, 25 +; GCN-NEXT: v_readlane_b32 s58, v40, 24 +; GCN-NEXT: v_readlane_b32 s57, v40, 23 +; GCN-NEXT: v_readlane_b32 s56, v40, 22 +; GCN-NEXT: v_readlane_b32 s55, v40, 21 +; GCN-NEXT: v_readlane_b32 s54, v40, 20 +; GCN-NEXT: v_readlane_b32 s53, v40, 19 +; GCN-NEXT: v_readlane_b32 s52, v40, 18 +; GCN-NEXT: v_readlane_b32 s51, v40, 17 +; GCN-NEXT: v_readlane_b32 s50, v40, 16 +; GCN-NEXT: v_readlane_b32 s49, v40, 15 +; GCN-NEXT: v_readlane_b32 s48, v40, 14 +; GCN-NEXT: v_readlane_b32 s47, v40, 13 +; GCN-NEXT: v_readlane_b32 s46, v40, 12 +; GCN-NEXT: v_readlane_b32 s45, v40, 11 +; GCN-NEXT: v_readlane_b32 s44, v40, 10 +; GCN-NEXT: v_readlane_b32 s43, v40, 9 +; GCN-NEXT: v_readlane_b32 s42, v40, 8 +; GCN-NEXT: v_readlane_b32 s41, v40, 7 +; GCN-NEXT: v_readlane_b32 s40, v40, 6 +; GCN-NEXT: v_readlane_b32 s39, v40, 5 +; GCN-NEXT: v_readlane_b32 s38, v40, 4 ; GCN-NEXT: v_readlane_b32 s37, v40, 3 ; GCN-NEXT: v_readlane_b32 s36, v40, 2 ; GCN-NEXT: v_readlane_b32 s35, v40, 1 ; GCN-NEXT: v_readlane_b32 s34, v40, 0 ; GCN-NEXT: s_addk_i32 s32, 0xfc00 -; GCN-NEXT: v_readlane_b32 s33, v40, 6 -; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GCN-NEXT: v_readlane_b32 s33, v40, 30 +; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1 ; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GCN-NEXT: s_mov_b64 exec, s[6:7] +; GCN-NEXT: s_mov_b64 exec, s[4:5] ; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: s_setpc_b64 s[4:5] +; GCN-NEXT: s_setpc_b64 s[6:7] ; ; GISEL-LABEL: test_indirect_call_vgpr_ptr_inreg_arg: ; GISEL: ; %bb.0: @@ -1116,41 +1165,90 @@ define void @test_indirect_call_vgpr_ptr_inreg_arg(void(i32)* %fptr) { ; GISEL-NEXT: s_or_saveexec_b64 s[4:5], -1 ; GISEL-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill ; GISEL-NEXT: s_mov_b64 exec, s[4:5] -; GISEL-NEXT: v_writelane_b32 v40, s33, 6 +; GISEL-NEXT: v_writelane_b32 v40, s33, 30 ; GISEL-NEXT: s_mov_b32 s33, s32 ; GISEL-NEXT: s_addk_i32 s32, 0x400 ; GISEL-NEXT: v_writelane_b32 v40, s34, 0 ; GISEL-NEXT: v_writelane_b32 v40, s35, 1 ; GISEL-NEXT: v_writelane_b32 v40, s36, 2 ; GISEL-NEXT: v_writelane_b32 v40, s37, 3 -; GISEL-NEXT: v_writelane_b32 v40, s30, 4 -; GISEL-NEXT: v_writelane_b32 v40, s31, 5 -; GISEL-NEXT: s_mov_b64 s[34:35], exec -; GISEL-NEXT: BB6_1: ; =>This Inner Loop Header: Depth=1 -; GISEL-NEXT: v_readfirstlane_b32 s6, v0 -; GISEL-NEXT: v_readfirstlane_b32 s7, v1 -; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, s[6:7], v[0:1] -; GISEL-NEXT: s_and_saveexec_b64 s[36:37], vcc +; GISEL-NEXT: v_writelane_b32 v40, s38, 4 +; GISEL-NEXT: v_writelane_b32 v40, s39, 5 +; GISEL-NEXT: v_writelane_b32 v40, s40, 6 +; GISEL-NEXT: v_writelane_b32 v40, s41, 7 +; GISEL-NEXT: v_writelane_b32 v40, s42, 8 +; GISEL-NEXT: v_writelane_b32 v40, s43, 9 +; GISEL-NEXT: v_writelane_b32 v40, s44, 10 +; GISEL-NEXT: v_writelane_b32 v40, s45, 11 +; GISEL-NEXT: v_writelane_b32 v40, s46, 12 +; GISEL-NEXT: v_writelane_b32 v40, s47, 13 +; GISEL-NEXT: v_writelane_b32 v40, s48, 14 +; GISEL-NEXT: v_writelane_b32 v40, s49, 15 +; GISEL-NEXT: v_writelane_b32 v40, s50, 16 +; GISEL-NEXT: v_writelane_b32 v40, s51, 17 +; GISEL-NEXT: v_writelane_b32 v40, s52, 18 +; GISEL-NEXT: v_writelane_b32 v40, s53, 19 +; GISEL-NEXT: v_writelane_b32 v40, s54, 20 +; GISEL-NEXT: v_writelane_b32 v40, s55, 21 +; GISEL-NEXT: v_writelane_b32 v40, s56, 22 +; GISEL-NEXT: v_writelane_b32 v40, s57, 23 +; GISEL-NEXT: v_writelane_b32 v40, s58, 24 +; GISEL-NEXT: v_writelane_b32 v40, s59, 25 +; GISEL-NEXT: v_writelane_b32 v40, s60, 26 +; GISEL-NEXT: v_writelane_b32 v40, s61, 27 +; GISEL-NEXT: v_writelane_b32 v40, s62, 28 +; GISEL-NEXT: v_writelane_b32 v40, s63, 29 +; GISEL-NEXT: s_mov_b64 s[6:7], s[30:31] ; GISEL-NEXT: s_movk_i32 s4, 0x7b -; GISEL-NEXT: s_swappc_b64 s[30:31], s[6:7] +; GISEL-NEXT: s_mov_b64 s[8:9], exec +; GISEL-NEXT: BB6_1: ; =>This Inner Loop Header: Depth=1 +; GISEL-NEXT: v_readfirstlane_b32 s10, v0 +; GISEL-NEXT: v_readfirstlane_b32 s11, v1 +; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, s[10:11], v[0:1] +; GISEL-NEXT: s_and_saveexec_b64 s[12:13], vcc +; GISEL-NEXT: s_swappc_b64 s[30:31], s[10:11] ; GISEL-NEXT: ; implicit-def: $vgpr0_vgpr1 -; GISEL-NEXT: s_xor_b64 exec, exec, s[36:37] +; GISEL-NEXT: s_xor_b64 exec, exec, s[12:13] ; GISEL-NEXT: s_cbranch_execnz BB6_1 ; GISEL-NEXT: ; %bb.2: -; GISEL-NEXT: s_mov_b64 exec, s[34:35] -; GISEL-NEXT: v_readlane_b32 s4, v40, 4 -; GISEL-NEXT: v_readlane_b32 s5, v40, 5 +; GISEL-NEXT: s_mov_b64 exec, s[8:9] +; GISEL-NEXT: v_readlane_b32 s63, v40, 29 +; GISEL-NEXT: v_readlane_b32 s62, v40, 28 +; GISEL-NEXT: v_readlane_b32 s61, v40, 27 +; GISEL-NEXT: v_readlane_b32 s60, v40, 26 +; GISEL-NEXT: v_readlane_b32 s59, v40, 25 +; GISEL-NEXT: v_readlane_b32 s58, v40, 24 +; GISEL-NEXT: v_readlane_b32 s57, v40, 23 +; GISEL-NEXT: v_readlane_b32 s56, v40, 22 +; GISEL-NEXT: v_readlane_b32 s55, v40, 21 +; GISEL-NEXT: v_readlane_b32 s54, v40, 20 +; GISEL-NEXT: v_readlane_b32 s53, v40, 19 +; GISEL-NEXT: v_readlane_b32 s52, v40, 18 +; GISEL-NEXT: v_readlane_b32 s51, v40, 17 +; GISEL-NEXT: v_readlane_b32 s50, v40, 16 +; GISEL-NEXT: v_readlane_b32 s49, v40, 15 +; GISEL-NEXT: v_readlane_b32 s48, v40, 14 +; GISEL-NEXT: v_readlane_b32 s47, v40, 13 +; GISEL-NEXT: v_readlane_b32 s46, v40, 12 +; GISEL-NEXT: v_readlane_b32 s45, v40, 11 +; GISEL-NEXT: v_readlane_b32 s44, v40, 10 +; GISEL-NEXT: v_readlane_b32 s43, v40, 9 +; GISEL-NEXT: v_readlane_b32 s42, v40, 8 +; GISEL-NEXT: v_readlane_b32 s41, v40, 7 +; GISEL-NEXT: v_readlane_b32 s40, v40, 6 +; GISEL-NEXT: v_readlane_b32 s39, v40, 5 +; GISEL-NEXT: v_readlane_b32 s38, v40, 4 ; GISEL-NEXT: v_readlane_b32 s37, v40, 3 ; GISEL-NEXT: v_readlane_b32 s36, v40, 2 ; GISEL-NEXT: v_readlane_b32 s35, v40, 1 ; GISEL-NEXT: v_readlane_b32 s34, v40, 0 ; GISEL-NEXT: s_addk_i32 s32, 0xfc00 -; GISEL-NEXT: v_readlane_b32 s33, v40, 6 -; GISEL-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GISEL-NEXT: v_readlane_b32 s33, v40, 30 +; GISEL-NEXT: s_or_saveexec_b64 s[4:5], -1 ; GISEL-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GISEL-NEXT: s_mov_b64 exec, s[6:7] +; GISEL-NEXT: s_mov_b64 exec, s[4:5] ; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_setpc_b64 s[4:5] +; GISEL-NEXT: s_setpc_b64 s[6:7] call amdgpu_gfx void %fptr(i32 inreg 123) ret void } @@ -1162,7 +1260,7 @@ define i32 @test_indirect_call_vgpr_ptr_arg_and_reuse(i32 %i, void(i32)* %fptr) ; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1 ; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; GCN-NEXT: s_mov_b64 exec, s[4:5] -; GCN-NEXT: v_writelane_b32 v40, s33, 6 +; GCN-NEXT: v_writelane_b32 v40, s33, 30 ; GCN-NEXT: s_mov_b32 s33, s32 ; GCN-NEXT: s_addk_i32 s32, 0x400 ; GCN-NEXT: buffer_store_dword v41, off, s[0:3], s33 ; 4-byte Folded Spill @@ -1170,32 +1268,81 @@ define i32 @test_indirect_call_vgpr_ptr_arg_and_reuse(i32 %i, void(i32)* %fptr) ; GCN-NEXT: v_writelane_b32 v40, s35, 1 ; GCN-NEXT: v_writelane_b32 v40, s36, 2 ; GCN-NEXT: v_writelane_b32 v40, s37, 3 -; GCN-NEXT: v_writelane_b32 v40, s30, 4 -; GCN-NEXT: v_writelane_b32 v40, s31, 5 +; GCN-NEXT: v_writelane_b32 v40, s38, 4 +; GCN-NEXT: v_writelane_b32 v40, s39, 5 +; GCN-NEXT: v_writelane_b32 v40, s40, 6 +; GCN-NEXT: v_writelane_b32 v40, s41, 7 +; GCN-NEXT: v_writelane_b32 v40, s42, 8 +; GCN-NEXT: v_writelane_b32 v40, s43, 9 +; GCN-NEXT: v_writelane_b32 v40, s44, 10 +; GCN-NEXT: v_writelane_b32 v40, s45, 11 +; GCN-NEXT: v_writelane_b32 v40, s46, 12 +; GCN-NEXT: v_writelane_b32 v40, s47, 13 +; GCN-NEXT: v_writelane_b32 v40, s48, 14 +; GCN-NEXT: v_writelane_b32 v40, s49, 15 +; GCN-NEXT: v_writelane_b32 v40, s50, 16 +; GCN-NEXT: v_writelane_b32 v40, s51, 17 +; GCN-NEXT: v_writelane_b32 v40, s52, 18 +; GCN-NEXT: v_writelane_b32 v40, s53, 19 +; GCN-NEXT: v_writelane_b32 v40, s54, 20 +; GCN-NEXT: v_writelane_b32 v40, s55, 21 +; GCN-NEXT: v_writelane_b32 v40, s56, 22 +; GCN-NEXT: v_writelane_b32 v40, s57, 23 +; GCN-NEXT: v_writelane_b32 v40, s58, 24 +; GCN-NEXT: v_writelane_b32 v40, s59, 25 +; GCN-NEXT: v_writelane_b32 v40, s60, 26 +; GCN-NEXT: v_writelane_b32 v40, s61, 27 +; GCN-NEXT: v_writelane_b32 v40, s62, 28 +; GCN-NEXT: v_writelane_b32 v40, s63, 29 +; GCN-NEXT: s_mov_b64 s[4:5], s[30:31] ; GCN-NEXT: v_mov_b32_e32 v41, v0 -; GCN-NEXT: s_mov_b64 s[34:35], exec +; GCN-NEXT: s_mov_b64 s[6:7], exec ; GCN-NEXT: BB7_1: ; =>This Inner Loop Header: Depth=1 -; GCN-NEXT: v_readfirstlane_b32 s4, v1 -; GCN-NEXT: v_readfirstlane_b32 s5, v2 -; GCN-NEXT: v_cmp_eq_u64_e32 vcc, s[4:5], v[1:2] -; GCN-NEXT: s_and_saveexec_b64 s[36:37], vcc +; GCN-NEXT: v_readfirstlane_b32 s10, v1 +; GCN-NEXT: v_readfirstlane_b32 s11, v2 +; GCN-NEXT: v_cmp_eq_u64_e32 vcc, s[10:11], v[1:2] +; GCN-NEXT: s_and_saveexec_b64 s[8:9], vcc ; GCN-NEXT: v_mov_b32_e32 v0, v41 -; GCN-NEXT: s_swappc_b64 s[30:31], s[4:5] +; GCN-NEXT: s_swappc_b64 s[30:31], s[10:11] ; GCN-NEXT: ; implicit-def: $vgpr1_vgpr2 -; GCN-NEXT: s_xor_b64 exec, exec, s[36:37] +; GCN-NEXT: s_xor_b64 exec, exec, s[8:9] ; GCN-NEXT: s_cbranch_execnz BB7_1 ; GCN-NEXT: ; %bb.2: -; GCN-NEXT: s_mov_b64 exec, s[34:35] +; GCN-NEXT: s_mov_b64 exec, s[6:7] ; GCN-NEXT: v_mov_b32_e32 v0, v41 -; GCN-NEXT: v_readlane_b32 s4, v40, 4 -; GCN-NEXT: v_readlane_b32 s5, v40, 5 +; GCN-NEXT: v_readlane_b32 s63, v40, 29 +; GCN-NEXT: v_readlane_b32 s62, v40, 28 +; GCN-NEXT: v_readlane_b32 s61, v40, 27 +; GCN-NEXT: v_readlane_b32 s60, v40, 26 +; GCN-NEXT: v_readlane_b32 s59, v40, 25 +; GCN-NEXT: v_readlane_b32 s58, v40, 24 +; GCN-NEXT: v_readlane_b32 s57, v40, 23 +; GCN-NEXT: v_readlane_b32 s56, v40, 22 +; GCN-NEXT: v_readlane_b32 s55, v40, 21 +; GCN-NEXT: v_readlane_b32 s54, v40, 20 +; GCN-NEXT: v_readlane_b32 s53, v40, 19 +; GCN-NEXT: v_readlane_b32 s52, v40, 18 +; GCN-NEXT: v_readlane_b32 s51, v40, 17 +; GCN-NEXT: v_readlane_b32 s50, v40, 16 +; GCN-NEXT: v_readlane_b32 s49, v40, 15 +; GCN-NEXT: v_readlane_b32 s48, v40, 14 +; GCN-NEXT: v_readlane_b32 s47, v40, 13 +; GCN-NEXT: v_readlane_b32 s46, v40, 12 +; GCN-NEXT: v_readlane_b32 s45, v40, 11 +; GCN-NEXT: v_readlane_b32 s44, v40, 10 +; GCN-NEXT: v_readlane_b32 s43, v40, 9 +; GCN-NEXT: v_readlane_b32 s42, v40, 8 +; GCN-NEXT: v_readlane_b32 s41, v40, 7 +; GCN-NEXT: v_readlane_b32 s40, v40, 6 +; GCN-NEXT: v_readlane_b32 s39, v40, 5 +; GCN-NEXT: v_readlane_b32 s38, v40, 4 ; GCN-NEXT: v_readlane_b32 s37, v40, 3 ; GCN-NEXT: v_readlane_b32 s36, v40, 2 ; GCN-NEXT: v_readlane_b32 s35, v40, 1 ; GCN-NEXT: v_readlane_b32 s34, v40, 0 ; GCN-NEXT: buffer_load_dword v41, off, s[0:3], s33 ; 4-byte Folded Reload ; GCN-NEXT: s_addk_i32 s32, 0xfc00 -; GCN-NEXT: v_readlane_b32 s33, v40, 6 +; GCN-NEXT: v_readlane_b32 s33, v40, 30 ; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1 ; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload ; GCN-NEXT: s_mov_b64 exec, s[6:7] @@ -1208,7 +1355,7 @@ define i32 @test_indirect_call_vgpr_ptr_arg_and_reuse(i32 %i, void(i32)* %fptr) ; GISEL-NEXT: s_or_saveexec_b64 s[4:5], -1 ; GISEL-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; GISEL-NEXT: s_mov_b64 exec, s[4:5] -; GISEL-NEXT: v_writelane_b32 v40, s33, 6 +; GISEL-NEXT: v_writelane_b32 v40, s33, 30 ; GISEL-NEXT: s_mov_b32 s33, s32 ; GISEL-NEXT: s_addk_i32 s32, 0x400 ; GISEL-NEXT: buffer_store_dword v41, off, s[0:3], s33 ; 4-byte Folded Spill @@ -1216,32 +1363,81 @@ define i32 @test_indirect_call_vgpr_ptr_arg_and_reuse(i32 %i, void(i32)* %fptr) ; GISEL-NEXT: v_writelane_b32 v40, s35, 1 ; GISEL-NEXT: v_writelane_b32 v40, s36, 2 ; GISEL-NEXT: v_writelane_b32 v40, s37, 3 +; GISEL-NEXT: v_writelane_b32 v40, s38, 4 +; GISEL-NEXT: v_writelane_b32 v40, s39, 5 +; GISEL-NEXT: v_writelane_b32 v40, s40, 6 +; GISEL-NEXT: v_writelane_b32 v40, s41, 7 +; GISEL-NEXT: v_writelane_b32 v40, s42, 8 +; GISEL-NEXT: v_writelane_b32 v40, s43, 9 +; GISEL-NEXT: v_writelane_b32 v40, s44, 10 +; GISEL-NEXT: v_writelane_b32 v40, s45, 11 +; GISEL-NEXT: v_writelane_b32 v40, s46, 12 +; GISEL-NEXT: v_writelane_b32 v40, s47, 13 +; GISEL-NEXT: v_writelane_b32 v40, s48, 14 +; GISEL-NEXT: v_writelane_b32 v40, s49, 15 +; GISEL-NEXT: v_writelane_b32 v40, s50, 16 +; GISEL-NEXT: v_writelane_b32 v40, s51, 17 +; GISEL-NEXT: v_writelane_b32 v40, s52, 18 +; GISEL-NEXT: v_writelane_b32 v40, s53, 19 +; GISEL-NEXT: v_writelane_b32 v40, s54, 20 +; GISEL-NEXT: v_writelane_b32 v40, s55, 21 +; GISEL-NEXT: v_writelane_b32 v40, s56, 22 +; GISEL-NEXT: v_writelane_b32 v40, s57, 23 +; GISEL-NEXT: v_writelane_b32 v40, s58, 24 +; GISEL-NEXT: v_writelane_b32 v40, s59, 25 +; GISEL-NEXT: v_writelane_b32 v40, s60, 26 +; GISEL-NEXT: v_writelane_b32 v40, s61, 27 +; GISEL-NEXT: v_writelane_b32 v40, s62, 28 +; GISEL-NEXT: v_writelane_b32 v40, s63, 29 ; GISEL-NEXT: v_mov_b32_e32 v41, v0 -; GISEL-NEXT: v_writelane_b32 v40, s30, 4 -; GISEL-NEXT: v_writelane_b32 v40, s31, 5 -; GISEL-NEXT: s_mov_b64 s[34:35], exec +; GISEL-NEXT: s_mov_b64 s[4:5], s[30:31] +; GISEL-NEXT: s_mov_b64 s[6:7], exec ; GISEL-NEXT: BB7_1: ; =>This Inner Loop Header: Depth=1 -; GISEL-NEXT: v_readfirstlane_b32 s4, v1 -; GISEL-NEXT: v_readfirstlane_b32 s5, v2 -; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, s[4:5], v[1:2] -; GISEL-NEXT: s_and_saveexec_b64 s[36:37], vcc +; GISEL-NEXT: v_readfirstlane_b32 s8, v1 +; GISEL-NEXT: v_readfirstlane_b32 s9, v2 +; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, s[8:9], v[1:2] +; GISEL-NEXT: s_and_saveexec_b64 s[10:11], vcc ; GISEL-NEXT: v_mov_b32_e32 v0, v41 -; GISEL-NEXT: s_swappc_b64 s[30:31], s[4:5] +; GISEL-NEXT: s_swappc_b64 s[30:31], s[8:9] ; GISEL-NEXT: ; implicit-def: $vgpr1_vgpr2 -; GISEL-NEXT: s_xor_b64 exec, exec, s[36:37] +; GISEL-NEXT: s_xor_b64 exec, exec, s[10:11] ; GISEL-NEXT: s_cbranch_execnz BB7_1 ; GISEL-NEXT: ; %bb.2: -; GISEL-NEXT: s_mov_b64 exec, s[34:35] +; GISEL-NEXT: s_mov_b64 exec, s[6:7] ; GISEL-NEXT: v_mov_b32_e32 v0, v41 -; GISEL-NEXT: v_readlane_b32 s4, v40, 4 -; GISEL-NEXT: v_readlane_b32 s5, v40, 5 +; GISEL-NEXT: v_readlane_b32 s63, v40, 29 +; GISEL-NEXT: v_readlane_b32 s62, v40, 28 +; GISEL-NEXT: v_readlane_b32 s61, v40, 27 +; GISEL-NEXT: v_readlane_b32 s60, v40, 26 +; GISEL-NEXT: v_readlane_b32 s59, v40, 25 +; GISEL-NEXT: v_readlane_b32 s58, v40, 24 +; GISEL-NEXT: v_readlane_b32 s57, v40, 23 +; GISEL-NEXT: v_readlane_b32 s56, v40, 22 +; GISEL-NEXT: v_readlane_b32 s55, v40, 21 +; GISEL-NEXT: v_readlane_b32 s54, v40, 20 +; GISEL-NEXT: v_readlane_b32 s53, v40, 19 +; GISEL-NEXT: v_readlane_b32 s52, v40, 18 +; GISEL-NEXT: v_readlane_b32 s51, v40, 17 +; GISEL-NEXT: v_readlane_b32 s50, v40, 16 +; GISEL-NEXT: v_readlane_b32 s49, v40, 15 +; GISEL-NEXT: v_readlane_b32 s48, v40, 14 +; GISEL-NEXT: v_readlane_b32 s47, v40, 13 +; GISEL-NEXT: v_readlane_b32 s46, v40, 12 +; GISEL-NEXT: v_readlane_b32 s45, v40, 11 +; GISEL-NEXT: v_readlane_b32 s44, v40, 10 +; GISEL-NEXT: v_readlane_b32 s43, v40, 9 +; GISEL-NEXT: v_readlane_b32 s42, v40, 8 +; GISEL-NEXT: v_readlane_b32 s41, v40, 7 +; GISEL-NEXT: v_readlane_b32 s40, v40, 6 +; GISEL-NEXT: v_readlane_b32 s39, v40, 5 +; GISEL-NEXT: v_readlane_b32 s38, v40, 4 ; GISEL-NEXT: v_readlane_b32 s37, v40, 3 ; GISEL-NEXT: v_readlane_b32 s36, v40, 2 ; GISEL-NEXT: v_readlane_b32 s35, v40, 1 ; GISEL-NEXT: v_readlane_b32 s34, v40, 0 ; GISEL-NEXT: buffer_load_dword v41, off, s[0:3], s33 ; 4-byte Folded Reload ; GISEL-NEXT: s_addk_i32 s32, 0xfc00 -; GISEL-NEXT: v_readlane_b32 s33, v40, 6 +; GISEL-NEXT: v_readlane_b32 s33, v40, 30 ; GISEL-NEXT: s_or_saveexec_b64 s[6:7], -1 ; GISEL-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload ; GISEL-NEXT: s_mov_b64 exec, s[6:7] @@ -1262,38 +1458,87 @@ define i32 @test_indirect_call_vgpr_ptr_arg_and_return(i32 %i, i32(i32)* %fptr) ; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1 ; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill ; GCN-NEXT: s_mov_b64 exec, s[4:5] -; GCN-NEXT: v_writelane_b32 v40, s33, 6 +; GCN-NEXT: v_writelane_b32 v40, s33, 30 ; GCN-NEXT: s_mov_b32 s33, s32 ; GCN-NEXT: s_addk_i32 s32, 0x400 ; GCN-NEXT: v_writelane_b32 v40, s34, 0 ; GCN-NEXT: v_writelane_b32 v40, s35, 1 ; GCN-NEXT: v_writelane_b32 v40, s36, 2 ; GCN-NEXT: v_writelane_b32 v40, s37, 3 -; GCN-NEXT: v_writelane_b32 v40, s30, 4 -; GCN-NEXT: v_writelane_b32 v40, s31, 5 -; GCN-NEXT: s_mov_b64 s[34:35], exec +; GCN-NEXT: v_writelane_b32 v40, s38, 4 +; GCN-NEXT: v_writelane_b32 v40, s39, 5 +; GCN-NEXT: v_writelane_b32 v40, s40, 6 +; GCN-NEXT: v_writelane_b32 v40, s41, 7 +; GCN-NEXT: v_writelane_b32 v40, s42, 8 +; GCN-NEXT: v_writelane_b32 v40, s43, 9 +; GCN-NEXT: v_writelane_b32 v40, s44, 10 +; GCN-NEXT: v_writelane_b32 v40, s45, 11 +; GCN-NEXT: v_writelane_b32 v40, s46, 12 +; GCN-NEXT: v_writelane_b32 v40, s47, 13 +; GCN-NEXT: v_writelane_b32 v40, s48, 14 +; GCN-NEXT: v_writelane_b32 v40, s49, 15 +; GCN-NEXT: v_writelane_b32 v40, s50, 16 +; GCN-NEXT: v_writelane_b32 v40, s51, 17 +; GCN-NEXT: v_writelane_b32 v40, s52, 18 +; GCN-NEXT: v_writelane_b32 v40, s53, 19 +; GCN-NEXT: v_writelane_b32 v40, s54, 20 +; GCN-NEXT: v_writelane_b32 v40, s55, 21 +; GCN-NEXT: v_writelane_b32 v40, s56, 22 +; GCN-NEXT: v_writelane_b32 v40, s57, 23 +; GCN-NEXT: v_writelane_b32 v40, s58, 24 +; GCN-NEXT: v_writelane_b32 v40, s59, 25 +; GCN-NEXT: v_writelane_b32 v40, s60, 26 +; GCN-NEXT: v_writelane_b32 v40, s61, 27 +; GCN-NEXT: v_writelane_b32 v40, s62, 28 +; GCN-NEXT: v_writelane_b32 v40, s63, 29 +; GCN-NEXT: s_mov_b64 s[4:5], s[30:31] +; GCN-NEXT: s_mov_b64 s[6:7], exec ; GCN-NEXT: BB8_1: ; =>This Inner Loop Header: Depth=1 -; GCN-NEXT: v_readfirstlane_b32 s4, v1 -; GCN-NEXT: v_readfirstlane_b32 s5, v2 -; GCN-NEXT: v_cmp_eq_u64_e32 vcc, s[4:5], v[1:2] -; GCN-NEXT: s_and_saveexec_b64 s[36:37], vcc -; GCN-NEXT: s_swappc_b64 s[30:31], s[4:5] +; GCN-NEXT: v_readfirstlane_b32 s10, v1 +; GCN-NEXT: v_readfirstlane_b32 s11, v2 +; GCN-NEXT: v_cmp_eq_u64_e32 vcc, s[10:11], v[1:2] +; GCN-NEXT: s_and_saveexec_b64 s[8:9], vcc +; GCN-NEXT: s_swappc_b64 s[30:31], s[10:11] ; GCN-NEXT: v_mov_b32_e32 v3, v0 ; GCN-NEXT: ; implicit-def: $vgpr1_vgpr2 ; GCN-NEXT: ; implicit-def: $vgpr0 -; GCN-NEXT: s_xor_b64 exec, exec, s[36:37] +; GCN-NEXT: s_xor_b64 exec, exec, s[8:9] ; GCN-NEXT: s_cbranch_execnz BB8_1 ; GCN-NEXT: ; %bb.2: -; GCN-NEXT: s_mov_b64 exec, s[34:35] +; GCN-NEXT: s_mov_b64 exec, s[6:7] ; GCN-NEXT: v_mov_b32_e32 v0, v3 -; GCN-NEXT: v_readlane_b32 s4, v40, 4 -; GCN-NEXT: v_readlane_b32 s5, v40, 5 +; GCN-NEXT: v_readlane_b32 s63, v40, 29 +; GCN-NEXT: v_readlane_b32 s62, v40, 28 +; GCN-NEXT: v_readlane_b32 s61, v40, 27 +; GCN-NEXT: v_readlane_b32 s60, v40, 26 +; GCN-NEXT: v_readlane_b32 s59, v40, 25 +; GCN-NEXT: v_readlane_b32 s58, v40, 24 +; GCN-NEXT: v_readlane_b32 s57, v40, 23 +; GCN-NEXT: v_readlane_b32 s56, v40, 22 +; GCN-NEXT: v_readlane_b32 s55, v40, 21 +; GCN-NEXT: v_readlane_b32 s54, v40, 20 +; GCN-NEXT: v_readlane_b32 s53, v40, 19 +; GCN-NEXT: v_readlane_b32 s52, v40, 18 +; GCN-NEXT: v_readlane_b32 s51, v40, 17 +; GCN-NEXT: v_readlane_b32 s50, v40, 16 +; GCN-NEXT: v_readlane_b32 s49, v40, 15 +; GCN-NEXT: v_readlane_b32 s48, v40, 14 +; GCN-NEXT: v_readlane_b32 s47, v40, 13 +; GCN-NEXT: v_readlane_b32 s46, v40, 12 +; GCN-NEXT: v_readlane_b32 s45, v40, 11 +; GCN-NEXT: v_readlane_b32 s44, v40, 10 +; GCN-NEXT: v_readlane_b32 s43, v40, 9 +; GCN-NEXT: v_readlane_b32 s42, v40, 8 +; GCN-NEXT: v_readlane_b32 s41, v40, 7 +; GCN-NEXT: v_readlane_b32 s40, v40, 6 +; GCN-NEXT: v_readlane_b32 s39, v40, 5 +; GCN-NEXT: v_readlane_b32 s38, v40, 4 ; GCN-NEXT: v_readlane_b32 s37, v40, 3 ; GCN-NEXT: v_readlane_b32 s36, v40, 2 ; GCN-NEXT: v_readlane_b32 s35, v40, 1 ; GCN-NEXT: v_readlane_b32 s34, v40, 0 ; GCN-NEXT: s_addk_i32 s32, 0xfc00 -; GCN-NEXT: v_readlane_b32 s33, v40, 6 +; GCN-NEXT: v_readlane_b32 s33, v40, 30 ; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1 ; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload ; GCN-NEXT: s_mov_b64 exec, s[6:7] @@ -1306,38 +1551,87 @@ define i32 @test_indirect_call_vgpr_ptr_arg_and_return(i32 %i, i32(i32)* %fptr) ; GISEL-NEXT: s_or_saveexec_b64 s[4:5], -1 ; GISEL-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill ; GISEL-NEXT: s_mov_b64 exec, s[4:5] -; GISEL-NEXT: v_writelane_b32 v40, s33, 6 +; GISEL-NEXT: v_writelane_b32 v40, s33, 30 ; GISEL-NEXT: s_mov_b32 s33, s32 ; GISEL-NEXT: s_addk_i32 s32, 0x400 ; GISEL-NEXT: v_writelane_b32 v40, s34, 0 ; GISEL-NEXT: v_writelane_b32 v40, s35, 1 ; GISEL-NEXT: v_writelane_b32 v40, s36, 2 ; GISEL-NEXT: v_writelane_b32 v40, s37, 3 -; GISEL-NEXT: v_writelane_b32 v40, s30, 4 -; GISEL-NEXT: v_writelane_b32 v40, s31, 5 -; GISEL-NEXT: s_mov_b64 s[34:35], exec +; GISEL-NEXT: v_writelane_b32 v40, s38, 4 +; GISEL-NEXT: v_writelane_b32 v40, s39, 5 +; GISEL-NEXT: v_writelane_b32 v40, s40, 6 +; GISEL-NEXT: v_writelane_b32 v40, s41, 7 +; GISEL-NEXT: v_writelane_b32 v40, s42, 8 +; GISEL-NEXT: v_writelane_b32 v40, s43, 9 +; GISEL-NEXT: v_writelane_b32 v40, s44, 10 +; GISEL-NEXT: v_writelane_b32 v40, s45, 11 +; GISEL-NEXT: v_writelane_b32 v40, s46, 12 +; GISEL-NEXT: v_writelane_b32 v40, s47, 13 +; GISEL-NEXT: v_writelane_b32 v40, s48, 14 +; GISEL-NEXT: v_writelane_b32 v40, s49, 15 +; GISEL-NEXT: v_writelane_b32 v40, s50, 16 +; GISEL-NEXT: v_writelane_b32 v40, s51, 17 +; GISEL-NEXT: v_writelane_b32 v40, s52, 18 +; GISEL-NEXT: v_writelane_b32 v40, s53, 19 +; GISEL-NEXT: v_writelane_b32 v40, s54, 20 +; GISEL-NEXT: v_writelane_b32 v40, s55, 21 +; GISEL-NEXT: v_writelane_b32 v40, s56, 22 +; GISEL-NEXT: v_writelane_b32 v40, s57, 23 +; GISEL-NEXT: v_writelane_b32 v40, s58, 24 +; GISEL-NEXT: v_writelane_b32 v40, s59, 25 +; GISEL-NEXT: v_writelane_b32 v40, s60, 26 +; GISEL-NEXT: v_writelane_b32 v40, s61, 27 +; GISEL-NEXT: v_writelane_b32 v40, s62, 28 +; GISEL-NEXT: v_writelane_b32 v40, s63, 29 +; GISEL-NEXT: s_mov_b64 s[4:5], s[30:31] +; GISEL-NEXT: s_mov_b64 s[6:7], exec ; GISEL-NEXT: BB8_1: ; =>This Inner Loop Header: Depth=1 -; GISEL-NEXT: v_readfirstlane_b32 s4, v1 -; GISEL-NEXT: v_readfirstlane_b32 s5, v2 -; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, s[4:5], v[1:2] -; GISEL-NEXT: s_and_saveexec_b64 s[36:37], vcc -; GISEL-NEXT: s_swappc_b64 s[30:31], s[4:5] +; GISEL-NEXT: v_readfirstlane_b32 s8, v1 +; GISEL-NEXT: v_readfirstlane_b32 s9, v2 +; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, s[8:9], v[1:2] +; GISEL-NEXT: s_and_saveexec_b64 s[10:11], vcc +; GISEL-NEXT: s_swappc_b64 s[30:31], s[8:9] ; GISEL-NEXT: v_mov_b32_e32 v3, v0 ; GISEL-NEXT: ; implicit-def: $vgpr1_vgpr2 ; GISEL-NEXT: ; implicit-def: $vgpr0 -; GISEL-NEXT: s_xor_b64 exec, exec, s[36:37] +; GISEL-NEXT: s_xor_b64 exec, exec, s[10:11] ; GISEL-NEXT: s_cbranch_execnz BB8_1 ; GISEL-NEXT: ; %bb.2: -; GISEL-NEXT: s_mov_b64 exec, s[34:35] +; GISEL-NEXT: s_mov_b64 exec, s[6:7] ; GISEL-NEXT: v_mov_b32_e32 v0, v3 -; GISEL-NEXT: v_readlane_b32 s4, v40, 4 -; GISEL-NEXT: v_readlane_b32 s5, v40, 5 +; GISEL-NEXT: v_readlane_b32 s63, v40, 29 +; GISEL-NEXT: v_readlane_b32 s62, v40, 28 +; GISEL-NEXT: v_readlane_b32 s61, v40, 27 +; GISEL-NEXT: v_readlane_b32 s60, v40, 26 +; GISEL-NEXT: v_readlane_b32 s59, v40, 25 +; GISEL-NEXT: v_readlane_b32 s58, v40, 24 +; GISEL-NEXT: v_readlane_b32 s57, v40, 23 +; GISEL-NEXT: v_readlane_b32 s56, v40, 22 +; GISEL-NEXT: v_readlane_b32 s55, v40, 21 +; GISEL-NEXT: v_readlane_b32 s54, v40, 20 +; GISEL-NEXT: v_readlane_b32 s53, v40, 19 +; GISEL-NEXT: v_readlane_b32 s52, v40, 18 +; GISEL-NEXT: v_readlane_b32 s51, v40, 17 +; GISEL-NEXT: v_readlane_b32 s50, v40, 16 +; GISEL-NEXT: v_readlane_b32 s49, v40, 15 +; GISEL-NEXT: v_readlane_b32 s48, v40, 14 +; GISEL-NEXT: v_readlane_b32 s47, v40, 13 +; GISEL-NEXT: v_readlane_b32 s46, v40, 12 +; GISEL-NEXT: v_readlane_b32 s45, v40, 11 +; GISEL-NEXT: v_readlane_b32 s44, v40, 10 +; GISEL-NEXT: v_readlane_b32 s43, v40, 9 +; GISEL-NEXT: v_readlane_b32 s42, v40, 8 +; GISEL-NEXT: v_readlane_b32 s41, v40, 7 +; GISEL-NEXT: v_readlane_b32 s40, v40, 6 +; GISEL-NEXT: v_readlane_b32 s39, v40, 5 +; GISEL-NEXT: v_readlane_b32 s38, v40, 4 ; GISEL-NEXT: v_readlane_b32 s37, v40, 3 ; GISEL-NEXT: v_readlane_b32 s36, v40, 2 ; GISEL-NEXT: v_readlane_b32 s35, v40, 1 ; GISEL-NEXT: v_readlane_b32 s34, v40, 0 ; GISEL-NEXT: s_addk_i32 s32, 0xfc00 -; GISEL-NEXT: v_readlane_b32 s33, v40, 6 +; GISEL-NEXT: v_readlane_b32 s33, v40, 30 ; GISEL-NEXT: s_or_saveexec_b64 s[6:7], -1 ; GISEL-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload ; GISEL-NEXT: s_mov_b64 exec, s[6:7] @@ -1355,35 +1649,84 @@ define void @test_indirect_tail_call_vgpr_ptr(void()* %fptr) { ; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1 ; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill ; GCN-NEXT: s_mov_b64 exec, s[4:5] -; GCN-NEXT: v_writelane_b32 v40, s33, 6 +; GCN-NEXT: v_writelane_b32 v40, s33, 30 ; GCN-NEXT: s_mov_b32 s33, s32 ; GCN-NEXT: s_addk_i32 s32, 0x400 ; GCN-NEXT: v_writelane_b32 v40, s34, 0 ; GCN-NEXT: v_writelane_b32 v40, s35, 1 ; GCN-NEXT: v_writelane_b32 v40, s36, 2 ; GCN-NEXT: v_writelane_b32 v40, s37, 3 -; GCN-NEXT: v_writelane_b32 v40, s30, 4 -; GCN-NEXT: v_writelane_b32 v40, s31, 5 -; GCN-NEXT: s_mov_b64 s[34:35], exec +; GCN-NEXT: v_writelane_b32 v40, s38, 4 +; GCN-NEXT: v_writelane_b32 v40, s39, 5 +; GCN-NEXT: v_writelane_b32 v40, s40, 6 +; GCN-NEXT: v_writelane_b32 v40, s41, 7 +; GCN-NEXT: v_writelane_b32 v40, s42, 8 +; GCN-NEXT: v_writelane_b32 v40, s43, 9 +; GCN-NEXT: v_writelane_b32 v40, s44, 10 +; GCN-NEXT: v_writelane_b32 v40, s45, 11 +; GCN-NEXT: v_writelane_b32 v40, s46, 12 +; GCN-NEXT: v_writelane_b32 v40, s47, 13 +; GCN-NEXT: v_writelane_b32 v40, s48, 14 +; GCN-NEXT: v_writelane_b32 v40, s49, 15 +; GCN-NEXT: v_writelane_b32 v40, s50, 16 +; GCN-NEXT: v_writelane_b32 v40, s51, 17 +; GCN-NEXT: v_writelane_b32 v40, s52, 18 +; GCN-NEXT: v_writelane_b32 v40, s53, 19 +; GCN-NEXT: v_writelane_b32 v40, s54, 20 +; GCN-NEXT: v_writelane_b32 v40, s55, 21 +; GCN-NEXT: v_writelane_b32 v40, s56, 22 +; GCN-NEXT: v_writelane_b32 v40, s57, 23 +; GCN-NEXT: v_writelane_b32 v40, s58, 24 +; GCN-NEXT: v_writelane_b32 v40, s59, 25 +; GCN-NEXT: v_writelane_b32 v40, s60, 26 +; GCN-NEXT: v_writelane_b32 v40, s61, 27 +; GCN-NEXT: v_writelane_b32 v40, s62, 28 +; GCN-NEXT: v_writelane_b32 v40, s63, 29 +; GCN-NEXT: s_mov_b64 s[4:5], s[30:31] +; GCN-NEXT: s_mov_b64 s[6:7], exec ; GCN-NEXT: BB9_1: ; =>This Inner Loop Header: Depth=1 -; GCN-NEXT: v_readfirstlane_b32 s4, v0 -; GCN-NEXT: v_readfirstlane_b32 s5, v1 -; GCN-NEXT: v_cmp_eq_u64_e32 vcc, s[4:5], v[0:1] -; GCN-NEXT: s_and_saveexec_b64 s[36:37], vcc -; GCN-NEXT: s_swappc_b64 s[30:31], s[4:5] +; GCN-NEXT: v_readfirstlane_b32 s10, v0 +; GCN-NEXT: v_readfirstlane_b32 s11, v1 +; GCN-NEXT: v_cmp_eq_u64_e32 vcc, s[10:11], v[0:1] +; GCN-NEXT: s_and_saveexec_b64 s[8:9], vcc +; GCN-NEXT: s_swappc_b64 s[30:31], s[10:11] ; GCN-NEXT: ; implicit-def: $vgpr0_vgpr1 -; GCN-NEXT: s_xor_b64 exec, exec, s[36:37] +; GCN-NEXT: s_xor_b64 exec, exec, s[8:9] ; GCN-NEXT: s_cbranch_execnz BB9_1 ; GCN-NEXT: ; %bb.2: -; GCN-NEXT: s_mov_b64 exec, s[34:35] -; GCN-NEXT: v_readlane_b32 s4, v40, 4 -; GCN-NEXT: v_readlane_b32 s5, v40, 5 +; GCN-NEXT: s_mov_b64 exec, s[6:7] +; GCN-NEXT: v_readlane_b32 s63, v40, 29 +; GCN-NEXT: v_readlane_b32 s62, v40, 28 +; GCN-NEXT: v_readlane_b32 s61, v40, 27 +; GCN-NEXT: v_readlane_b32 s60, v40, 26 +; GCN-NEXT: v_readlane_b32 s59, v40, 25 +; GCN-NEXT: v_readlane_b32 s58, v40, 24 +; GCN-NEXT: v_readlane_b32 s57, v40, 23 +; GCN-NEXT: v_readlane_b32 s56, v40, 22 +; GCN-NEXT: v_readlane_b32 s55, v40, 21 +; GCN-NEXT: v_readlane_b32 s54, v40, 20 +; GCN-NEXT: v_readlane_b32 s53, v40, 19 +; GCN-NEXT: v_readlane_b32 s52, v40, 18 +; GCN-NEXT: v_readlane_b32 s51, v40, 17 +; GCN-NEXT: v_readlane_b32 s50, v40, 16 +; GCN-NEXT: v_readlane_b32 s49, v40, 15 +; GCN-NEXT: v_readlane_b32 s48, v40, 14 +; GCN-NEXT: v_readlane_b32 s47, v40, 13 +; GCN-NEXT: v_readlane_b32 s46, v40, 12 +; GCN-NEXT: v_readlane_b32 s45, v40, 11 +; GCN-NEXT: v_readlane_b32 s44, v40, 10 +; GCN-NEXT: v_readlane_b32 s43, v40, 9 +; GCN-NEXT: v_readlane_b32 s42, v40, 8 +; GCN-NEXT: v_readlane_b32 s41, v40, 7 +; GCN-NEXT: v_readlane_b32 s40, v40, 6 +; GCN-NEXT: v_readlane_b32 s39, v40, 5 +; GCN-NEXT: v_readlane_b32 s38, v40, 4 ; GCN-NEXT: v_readlane_b32 s37, v40, 3 ; GCN-NEXT: v_readlane_b32 s36, v40, 2 ; GCN-NEXT: v_readlane_b32 s35, v40, 1 ; GCN-NEXT: v_readlane_b32 s34, v40, 0 ; GCN-NEXT: s_addk_i32 s32, 0xfc00 -; GCN-NEXT: v_readlane_b32 s33, v40, 6 +; GCN-NEXT: v_readlane_b32 s33, v40, 30 ; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1 ; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload ; GCN-NEXT: s_mov_b64 exec, s[6:7] @@ -1396,35 +1739,84 @@ define void @test_indirect_tail_call_vgpr_ptr(void()* %fptr) { ; GISEL-NEXT: s_or_saveexec_b64 s[4:5], -1 ; GISEL-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill ; GISEL-NEXT: s_mov_b64 exec, s[4:5] -; GISEL-NEXT: v_writelane_b32 v40, s33, 6 +; GISEL-NEXT: v_writelane_b32 v40, s33, 30 ; GISEL-NEXT: s_mov_b32 s33, s32 ; GISEL-NEXT: s_addk_i32 s32, 0x400 ; GISEL-NEXT: v_writelane_b32 v40, s34, 0 ; GISEL-NEXT: v_writelane_b32 v40, s35, 1 ; GISEL-NEXT: v_writelane_b32 v40, s36, 2 ; GISEL-NEXT: v_writelane_b32 v40, s37, 3 -; GISEL-NEXT: v_writelane_b32 v40, s30, 4 -; GISEL-NEXT: v_writelane_b32 v40, s31, 5 -; GISEL-NEXT: s_mov_b64 s[34:35], exec +; GISEL-NEXT: v_writelane_b32 v40, s38, 4 +; GISEL-NEXT: v_writelane_b32 v40, s39, 5 +; GISEL-NEXT: v_writelane_b32 v40, s40, 6 +; GISEL-NEXT: v_writelane_b32 v40, s41, 7 +; GISEL-NEXT: v_writelane_b32 v40, s42, 8 +; GISEL-NEXT: v_writelane_b32 v40, s43, 9 +; GISEL-NEXT: v_writelane_b32 v40, s44, 10 +; GISEL-NEXT: v_writelane_b32 v40, s45, 11 +; GISEL-NEXT: v_writelane_b32 v40, s46, 12 +; GISEL-NEXT: v_writelane_b32 v40, s47, 13 +; GISEL-NEXT: v_writelane_b32 v40, s48, 14 +; GISEL-NEXT: v_writelane_b32 v40, s49, 15 +; GISEL-NEXT: v_writelane_b32 v40, s50, 16 +; GISEL-NEXT: v_writelane_b32 v40, s51, 17 +; GISEL-NEXT: v_writelane_b32 v40, s52, 18 +; GISEL-NEXT: v_writelane_b32 v40, s53, 19 +; GISEL-NEXT: v_writelane_b32 v40, s54, 20 +; GISEL-NEXT: v_writelane_b32 v40, s55, 21 +; GISEL-NEXT: v_writelane_b32 v40, s56, 22 +; GISEL-NEXT: v_writelane_b32 v40, s57, 23 +; GISEL-NEXT: v_writelane_b32 v40, s58, 24 +; GISEL-NEXT: v_writelane_b32 v40, s59, 25 +; GISEL-NEXT: v_writelane_b32 v40, s60, 26 +; GISEL-NEXT: v_writelane_b32 v40, s61, 27 +; GISEL-NEXT: v_writelane_b32 v40, s62, 28 +; GISEL-NEXT: v_writelane_b32 v40, s63, 29 +; GISEL-NEXT: s_mov_b64 s[4:5], s[30:31] +; GISEL-NEXT: s_mov_b64 s[6:7], exec ; GISEL-NEXT: BB9_1: ; =>This Inner Loop Header: Depth=1 -; GISEL-NEXT: v_readfirstlane_b32 s4, v0 -; GISEL-NEXT: v_readfirstlane_b32 s5, v1 -; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, s[4:5], v[0:1] -; GISEL-NEXT: s_and_saveexec_b64 s[36:37], vcc -; GISEL-NEXT: s_swappc_b64 s[30:31], s[4:5] +; GISEL-NEXT: v_readfirstlane_b32 s8, v0 +; GISEL-NEXT: v_readfirstlane_b32 s9, v1 +; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, s[8:9], v[0:1] +; GISEL-NEXT: s_and_saveexec_b64 s[10:11], vcc +; GISEL-NEXT: s_swappc_b64 s[30:31], s[8:9] ; GISEL-NEXT: ; implicit-def: $vgpr0_vgpr1 -; GISEL-NEXT: s_xor_b64 exec, exec, s[36:37] +; GISEL-NEXT: s_xor_b64 exec, exec, s[10:11] ; GISEL-NEXT: s_cbranch_execnz BB9_1 ; GISEL-NEXT: ; %bb.2: -; GISEL-NEXT: s_mov_b64 exec, s[34:35] -; GISEL-NEXT: v_readlane_b32 s4, v40, 4 -; GISEL-NEXT: v_readlane_b32 s5, v40, 5 +; GISEL-NEXT: s_mov_b64 exec, s[6:7] +; GISEL-NEXT: v_readlane_b32 s63, v40, 29 +; GISEL-NEXT: v_readlane_b32 s62, v40, 28 +; GISEL-NEXT: v_readlane_b32 s61, v40, 27 +; GISEL-NEXT: v_readlane_b32 s60, v40, 26 +; GISEL-NEXT: v_readlane_b32 s59, v40, 25 +; GISEL-NEXT: v_readlane_b32 s58, v40, 24 +; GISEL-NEXT: v_readlane_b32 s57, v40, 23 +; GISEL-NEXT: v_readlane_b32 s56, v40, 22 +; GISEL-NEXT: v_readlane_b32 s55, v40, 21 +; GISEL-NEXT: v_readlane_b32 s54, v40, 20 +; GISEL-NEXT: v_readlane_b32 s53, v40, 19 +; GISEL-NEXT: v_readlane_b32 s52, v40, 18 +; GISEL-NEXT: v_readlane_b32 s51, v40, 17 +; GISEL-NEXT: v_readlane_b32 s50, v40, 16 +; GISEL-NEXT: v_readlane_b32 s49, v40, 15 +; GISEL-NEXT: v_readlane_b32 s48, v40, 14 +; GISEL-NEXT: v_readlane_b32 s47, v40, 13 +; GISEL-NEXT: v_readlane_b32 s46, v40, 12 +; GISEL-NEXT: v_readlane_b32 s45, v40, 11 +; GISEL-NEXT: v_readlane_b32 s44, v40, 10 +; GISEL-NEXT: v_readlane_b32 s43, v40, 9 +; GISEL-NEXT: v_readlane_b32 s42, v40, 8 +; GISEL-NEXT: v_readlane_b32 s41, v40, 7 +; GISEL-NEXT: v_readlane_b32 s40, v40, 6 +; GISEL-NEXT: v_readlane_b32 s39, v40, 5 +; GISEL-NEXT: v_readlane_b32 s38, v40, 4 ; GISEL-NEXT: v_readlane_b32 s37, v40, 3 ; GISEL-NEXT: v_readlane_b32 s36, v40, 2 ; GISEL-NEXT: v_readlane_b32 s35, v40, 1 ; GISEL-NEXT: v_readlane_b32 s34, v40, 0 ; GISEL-NEXT: s_addk_i32 s32, 0xfc00 -; GISEL-NEXT: v_readlane_b32 s33, v40, 6 +; GISEL-NEXT: v_readlane_b32 s33, v40, 30 ; GISEL-NEXT: s_or_saveexec_b64 s[6:7], -1 ; GISEL-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload ; GISEL-NEXT: s_mov_b64 exec, s[6:7] diff --git a/llvm/test/CodeGen/AMDGPU/inline-asm.i128.ll b/llvm/test/CodeGen/AMDGPU/inline-asm.i128.ll index ebb05a0086819..3a5654f63a428 100644 --- a/llvm/test/CodeGen/AMDGPU/inline-asm.i128.ll +++ b/llvm/test/CodeGen/AMDGPU/inline-asm.i128.ll @@ -8,15 +8,15 @@ define amdgpu_kernel void @s_input_output_i128() { ; GFX908-LABEL: name: s_input_output_i128 ; GFX908: bb.0 (%ir-block.0): - ; GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 5111818 /* regdef:SGPR_128 */, def %4 + ; GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 5439498 /* regdef:SGPR_128 */, def %4 ; GFX908-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY %4 - ; GFX908-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 5111817 /* reguse:SGPR_128 */, [[COPY]] + ; GFX908-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 5439497 /* reguse:SGPR_128 */, [[COPY]] ; GFX908-NEXT: S_ENDPGM 0 ; GFX90A-LABEL: name: s_input_output_i128 ; GFX90A: bb.0 (%ir-block.0): - ; GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 5111818 /* regdef:SGPR_128 */, def %4 + ; GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 5439498 /* regdef:SGPR_128 */, def %4 ; GFX90A-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY %4 - ; GFX90A-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 5111817 /* reguse:SGPR_128 */, [[COPY]] + ; GFX90A-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 5439497 /* reguse:SGPR_128 */, [[COPY]] ; GFX90A-NEXT: S_ENDPGM 0 %val = tail call i128 asm sideeffect "; def $0", "=s"() call void asm sideeffect "; use $0", "s"(i128 %val) @@ -26,15 +26,15 @@ define amdgpu_kernel void @s_input_output_i128() { define amdgpu_kernel void @v_input_output_i128() { ; GFX908-LABEL: name: v_input_output_i128 ; GFX908: bb.0 (%ir-block.0): - ; GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 4718602 /* regdef:VReg_128 */, def %4 + ; GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 5046282 /* regdef:VReg_128 */, def %4 ; GFX908-NEXT: [[COPY:%[0-9]+]]:vreg_128 = COPY %4 - ; GFX908-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 4718601 /* reguse:VReg_128 */, [[COPY]] + ; GFX908-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 5046281 /* reguse:VReg_128 */, [[COPY]] ; GFX908-NEXT: S_ENDPGM 0 ; GFX90A-LABEL: name: v_input_output_i128 ; GFX90A: bb.0 (%ir-block.0): - ; GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 4849674 /* regdef:VReg_128_Align2 */, def %4 + ; GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 5177354 /* regdef:VReg_128_Align2 */, def %4 ; GFX90A-NEXT: [[COPY:%[0-9]+]]:vreg_128_align2 = COPY %4 - ; GFX90A-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 4849673 /* reguse:VReg_128_Align2 */, [[COPY]] + ; GFX90A-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 5177353 /* reguse:VReg_128_Align2 */, [[COPY]] ; GFX90A-NEXT: S_ENDPGM 0 %val = tail call i128 asm sideeffect "; def $0", "=v"() call void asm sideeffect "; use $0", "v"(i128 %val) @@ -44,15 +44,15 @@ define amdgpu_kernel void @v_input_output_i128() { define amdgpu_kernel void @a_input_output_i128() { ; GFX908-LABEL: name: a_input_output_i128 ; GFX908: bb.0 (%ir-block.0): - ; GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 4653066 /* regdef:AReg_128 */, def %4 + ; GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 4980746 /* regdef:AReg_128 */, def %4 ; GFX908-NEXT: [[COPY:%[0-9]+]]:areg_128 = COPY %4 - ; GFX908-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 4653065 /* reguse:AReg_128 */, [[COPY]] + ; GFX908-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 4980745 /* reguse:AReg_128 */, [[COPY]] ; GFX908-NEXT: S_ENDPGM 0 ; GFX90A-LABEL: name: a_input_output_i128 ; GFX90A: bb.0 (%ir-block.0): - ; GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 4784138 /* regdef:AReg_128_Align2 */, def %4 + ; GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 5111818 /* regdef:AReg_128_Align2 */, def %4 ; GFX90A-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY %4 - ; GFX90A-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 4784137 /* reguse:AReg_128_Align2 */, [[COPY]] + ; GFX90A-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 5111817 /* reguse:AReg_128_Align2 */, [[COPY]] ; GFX90A-NEXT: S_ENDPGM 0 %val = call i128 asm sideeffect "; def $0", "=a"() call void asm sideeffect "; use $0", "a"(i128 %val) diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fma.legacy.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fma.legacy.ll index 5c333f0ce97d3..11035f1ddbf40 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fma.legacy.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fma.legacy.ll @@ -7,13 +7,25 @@ define float @v_fma(float %a, float %b, float %c) { ; GCN: ; %bb.0: ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN-NEXT: s_waitcnt_vscnt null, 0x0 -; GCN-NEXT: v_fmac_legacy_f32_e64 v2, v0, v1 +; GCN-NEXT: v_fmac_legacy_f32_e32 v2, v0, v1 ; GCN-NEXT: v_mov_b32_e32 v0, v2 ; GCN-NEXT: s_setpc_b64 s[30:31] %fma = call float @llvm.amdgcn.fma.legacy(float %a, float %b, float %c) ret float %fma } +define float @v_fma_imm(float %a, float %c) { +; GCN-LABEL: v_fma_imm: +; GCN: ; %bb.0: +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: s_waitcnt_vscnt null, 0x0 +; GCN-NEXT: v_fmac_legacy_f32_e32 v1, 0x41200000, v0 +; GCN-NEXT: v_mov_b32_e32 v0, v1 +; GCN-NEXT: s_setpc_b64 s[30:31] + %fma = call float @llvm.amdgcn.fma.legacy(float %a, float 10.0, float %c) + ret float %fma +} + define float @v_fabs_fma(float %a, float %b, float %c) { ; GCN-LABEL: v_fabs_fma: ; GCN: ; %bb.0: diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fmul.legacy.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fmul.legacy.ll index ea06ffeac80c9..7d30163d6ea61 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fmul.legacy.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fmul.legacy.ll @@ -51,7 +51,7 @@ define amdgpu_kernel void @test_add_mul_legacy_f32(float addrspace(1)* %out, flo } ; GCN-LABEL: {{^}}test_mad_legacy_f32: -; GFX6: v_mac_legacy_f32_e64 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}} +; GFX6: v_mac_legacy_f32_e32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}} ; GFX8: v_mad_legacy_f32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}} ; GFX9: v_mad_legacy_f32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}} ; GFX101: v_mac_legacy_f32_e64 v{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}} @@ -64,6 +64,20 @@ define amdgpu_kernel void @test_mad_legacy_f32(float addrspace(1)* %out, float % ret void } +; GCN-LABEL: {{^}}test_mad_legacy_f32_imm: +; GFX6: v_mac_legacy_f32_e32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}} +; GFX8: v_mad_legacy_f32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}} +; GFX9: v_mad_legacy_f32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}} +; GFX101: v_mac_legacy_f32_e64 v{{[0-9]+}}, 0x41200000, s{{[0-9]+}} +; GFX103: v_mul_legacy_f32_e64 v{{[0-9]+}}, 0x41200000, s{{[0-9]+}} +; GFX103: v_add_f32_e32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}} +define amdgpu_kernel void @test_mad_legacy_f32_imm(float addrspace(1)* %out, float %a, float %c) #2 { + %mul = call float @llvm.amdgcn.fmul.legacy(float %a, float 10.0) + %add = fadd float %mul, %c + store float %add, float addrspace(1)* %out, align 4 + ret void +} + ; GCN-LABEL: {{^}}test_mad_legacy_fneg_f32: ; MADMACF32: v_mad_legacy_f32 v{{[0-9]+}}, -s{{[0-9]+}}, -{{[sv][0-9]+}}, v{{[0-9]+}} ; NOMADMACF32: v_mul_legacy_f32_e64 v{{[0-9]+}}, -s{{[0-9]+}}, -s{{[0-9]+}} diff --git a/llvm/test/CodeGen/AMDGPU/llvm.round.f64.ll b/llvm/test/CodeGen/AMDGPU/llvm.round.f64.ll index e68b93bed96fe..0220940f2934c 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.round.f64.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.round.f64.ll @@ -79,14 +79,14 @@ define amdgpu_kernel void @v_round_f64(double addrspace(1)* %out, double addrspa ; SI-NEXT: s_waitcnt lgkmcnt(0) ; SI-NEXT: s_mov_b64 s[0:1], s[6:7] ; SI-NEXT: buffer_load_dwordx2 v[2:3], v[0:1], s[0:3], 0 addr64 -; SI-NEXT: s_movk_i32 s7, 0xfc01 +; SI-NEXT: s_movk_i32 s6, 0xfc01 ; SI-NEXT: s_mov_b32 s0, -1 ; SI-NEXT: s_mov_b32 s1, 0xfffff -; SI-NEXT: s_brev_b32 s6, -2 +; SI-NEXT: s_brev_b32 s7, -2 ; SI-NEXT: v_mov_b32_e32 v8, 0x3ff00000 ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: v_bfe_u32 v4, v3, 20, 11 -; SI-NEXT: v_add_i32_e32 v6, vcc, s7, v4 +; SI-NEXT: v_add_i32_e32 v6, vcc, s6, v4 ; SI-NEXT: v_lshr_b64 v[4:5], s[0:1], v6 ; SI-NEXT: v_and_b32_e32 v7, 0x80000000, v3 ; SI-NEXT: v_not_b32_e32 v4, v4 @@ -100,7 +100,7 @@ define amdgpu_kernel void @v_round_f64(double addrspace(1)* %out, double addrspa ; SI-NEXT: v_cndmask_b32_e32 v5, v5, v3, vcc ; SI-NEXT: v_cndmask_b32_e32 v4, v4, v2, vcc ; SI-NEXT: v_add_f64 v[6:7], v[2:3], -v[4:5] -; SI-NEXT: v_bfi_b32 v2, s6, v8, v3 +; SI-NEXT: v_bfi_b32 v2, s7, v8, v3 ; SI-NEXT: v_cmp_ge_f64_e64 vcc, |v[6:7]|, 0.5 ; SI-NEXT: s_mov_b64 s[6:7], s[2:3] ; SI-NEXT: v_cndmask_b32_e32 v3, 0, v2, vcc diff --git a/llvm/test/CodeGen/AMDGPU/lower-kernel-lds-constexpr.ll b/llvm/test/CodeGen/AMDGPU/lower-kernel-lds-constexpr.ll index b50d75aa682a2..ae55b1a5521d0 100644 --- a/llvm/test/CodeGen/AMDGPU/lower-kernel-lds-constexpr.ll +++ b/llvm/test/CodeGen/AMDGPU/lower-kernel-lds-constexpr.ll @@ -10,6 +10,7 @@ ; CHECK: %llvm.amdgcn.kernel.k3.lds.t = type { [32 x i8] } ; CHECK: %llvm.amdgcn.kernel.k4.lds.t = type { [2 x i8] } ; CHECK: %llvm.amdgcn.kernel.k5.lds.t = type { [505 x i32] } +; CHECK: %llvm.amdgcn.kernel.k6.lds.t = type { [4 x i32] } ; Use constant from different kernels ;. @@ -19,6 +20,7 @@ ; CHECK: @llvm.amdgcn.kernel.k3.lds = internal addrspace(3) global %llvm.amdgcn.kernel.k3.lds.t undef, align 16 ; CHECK: @llvm.amdgcn.kernel.k4.lds = internal addrspace(3) global %llvm.amdgcn.kernel.k4.lds.t undef, align 2 ; CHECK: @llvm.amdgcn.kernel.k5.lds = internal addrspace(3) global %llvm.amdgcn.kernel.k5.lds.t undef, align 16 +; CHECK: @llvm.amdgcn.kernel.k6.lds = internal addrspace(3) global %llvm.amdgcn.kernel.k6.lds.t undef, align 16 ;. define amdgpu_kernel void @k0(i64 %x) { ; CHECK-LABEL: @k0( @@ -112,3 +114,19 @@ define amdgpu_kernel void @k5() { call void undef(i32* getelementptr inbounds ([505 x i32], [505 x i32]* addrspacecast ([505 x i32] addrspace(3)* @lds.4 to [505 x i32]*), i64 0, i64 0), i32* getelementptr inbounds ([505 x i32], [505 x i32]* addrspacecast ([505 x i32] addrspace(3)* @lds.4 to [505 x i32]*), i64 0, i64 0)) ret void } + +@lds.5 = internal addrspace(3) global [4 x i32] undef, align 4 + +; Both the *value* and *pointer* operands of store instruction are constant expressions, and +; both of these constant expression paths use same lds - @lds.5. Hence both of these constant +; expression operands of store should be replaced by corresponding instruction sequence. +define amdgpu_kernel void @k6() { +; CHECK-LABEL: @k6( +; CHECK-NEXT: %1 = getelementptr inbounds [4 x i32], [4 x i32] addrspace(3)* getelementptr inbounds (%llvm.amdgcn.kernel.k6.lds.t, %llvm.amdgcn.kernel.k6.lds.t addrspace(3)* @llvm.amdgcn.kernel.k6.lds, i32 0, i32 0), i32 0, i32 2 +; CHECK-NEXT: %2 = ptrtoint i32 addrspace(3)* %1 to i32 +; CHECK-NEXT: store i32 %2, i32 addrspace(3)* %1, align 8 +; CHECK-NEXT: ret void +; + store i32 ptrtoint (i32 addrspace(3)* getelementptr inbounds ([4 x i32], [4 x i32] addrspace(3)* @lds.5, i32 0, i32 2) to i32), i32 addrspace(3)* getelementptr inbounds ([4 x i32], [4 x i32] addrspace(3)* @lds.5, i32 0, i32 2) + ret void +} diff --git a/llvm/test/CodeGen/AMDGPU/no-source-locations-in-prologue.ll b/llvm/test/CodeGen/AMDGPU/no-source-locations-in-prologue.ll new file mode 100644 index 0000000000000..84d4fd3b90363 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/no-source-locations-in-prologue.ll @@ -0,0 +1,71 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -O0 -verify-machineinstrs < %s | FileCheck %s + +; Test that source locations (.loc directives) are not added to the code within the prologue. + +; Function Attrs: convergent mustprogress nounwind +define hidden void @_ZL3barv() #0 !dbg !1644 { +; CHECK-LABEL: _ZL3barv: +; CHECK: .Lfunc_begin0: +; CHECK-NEXT: .file 0 "/tmp" "lane-info.cpp" md5 0x4ab9b75a30baffdf0f6f536a80e3e382 +; CHECK-NEXT: .loc 0 30 0 ; lane-info.cpp:30:0 +; CHECK-NEXT: .cfi_sections .debug_frame +; CHECK-NEXT: .cfi_startproc +; CHECK-NEXT: ; %bb.0: ; %entry +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: s_or_saveexec_b64 s[4:5], -1 +; CHECK-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill +; CHECK-NEXT: s_mov_b64 exec, s[4:5] +; CHECK-NEXT: v_writelane_b32 v40, s33, 2 +; CHECK-NEXT: s_mov_b32 s33, s32 +; CHECK-NEXT: s_add_i32 s32, s32, 0x400 +; CHECK-NEXT: .Ltmp0: +; CHECK-NEXT: .loc 0 31 3 prologue_end ; lane-info.cpp:31:3 +; CHECK-NEXT: v_writelane_b32 v40, s30, 0 +; CHECK-NEXT: v_writelane_b32 v40, s31, 1 +; CHECK-NEXT: s_getpc_b64 s[4:5] +; CHECK-NEXT: s_add_u32 s4, s4, _ZL13sleep_foreverv@gotpcrel32@lo+4 +; CHECK-NEXT: s_addc_u32 s5, s5, _ZL13sleep_foreverv@gotpcrel32@hi+12 +; CHECK-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 +; CHECK-NEXT: s_mov_b64 s[10:11], s[2:3] +; CHECK-NEXT: s_mov_b64 s[8:9], s[0:1] +; CHECK-NEXT: s_mov_b64 s[0:1], s[8:9] +; CHECK-NEXT: s_mov_b64 s[2:3], s[10:11] +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: s_swappc_b64 s[30:31], s[4:5] +; CHECK-NEXT: .Ltmp1: +; CHECK-NEXT: v_readlane_b32 s30, v40, 0 +; CHECK-NEXT: v_readlane_b32 s31, v40, 1 +; CHECK-NEXT: s_add_i32 s32, s32, 0xfffffc00 +; CHECK-NEXT: v_readlane_b32 s33, v40, 2 +; CHECK-NEXT: s_or_saveexec_b64 s[4:5], -1 +; CHECK-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload +; CHECK-NEXT: s_mov_b64 exec, s[4:5] +; CHECK-NEXT: .loc 0 32 1 ; lane-info.cpp:32:1 +; CHECK-NEXT: s_waitcnt vmcnt(0) +; CHECK-NEXT: s_setpc_b64 s[30:31] +; CHECK-NEXT: .Ltmp2: +entry: + call void @_ZL13sleep_foreverv(), !dbg !1646 + ret void, !dbg !1647 +} + +; Function Attrs: convergent nounwind +declare void @_ZL13sleep_foreverv() #0 + +attributes #0 = { nounwind "frame-pointer"="all" } + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!1638, !1639, !1640, !1641} +!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_11, file: !1, producer: "clang version 13.0.0)", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, splitDebugInlining: false, nameTableKind: None) +!1 = !DIFile(filename: "lane-info.cpp", directory: "/tmp", checksumkind: CSK_MD5, checksum: "4ab9b75a30baffdf0f6f536a80e3e382") +!371 = !DISubroutineType(types: !372) +!372 = !{null} +!1638 = !{i32 7, !"Dwarf Version", i32 5} +!1639 = !{i32 2, !"Debug Info Version", i32 3} +!1640 = !{i32 1, !"wchar_size", i32 4} +!1641 = !{i32 7, !"PIC Level", i32 1} +!1644 = distinct !DISubprogram(name: "bar", linkageName: "_ZL3barv", scope: !1, file: !1, line: 29, type: !371, scopeLine: 30, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagLocalToUnit | DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !1645) +!1645 = !{} +!1646 = !DILocation(line: 31, column: 3, scope: !1644) +!1647 = !DILocation(line: 32, column: 1, scope: !1644) diff --git a/llvm/test/CodeGen/AMDGPU/pal-simple-indirect-call.ll b/llvm/test/CodeGen/AMDGPU/pal-simple-indirect-call.ll index f890ab2625f1b..d9d511f72ae8e 100644 --- a/llvm/test/CodeGen/AMDGPU/pal-simple-indirect-call.ll +++ b/llvm/test/CodeGen/AMDGPU/pal-simple-indirect-call.ll @@ -28,31 +28,32 @@ define amdgpu_cs void @test_simple_indirect_call() { ; Attributor adds work-group-size attribute. This should be ok. ; GFX9-LABEL: test_simple_indirect_call: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_getpc_b64 s[36:37] -; GFX9-NEXT: s_mov_b32 s36, s0 -; GFX9-NEXT: s_load_dwordx4 s[36:39], s[36:37], 0x10 +; GFX9-NEXT: s_getpc_b64 s[8:9] +; GFX9-NEXT: s_mov_b32 s8, s0 +; GFX9-NEXT: s_load_dwordx4 s[8:11], s[8:9], 0x10 ; GFX9-NEXT: s_getpc_b64 s[4:5] ; GFX9-NEXT: s_mov_b32 s32, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: s_add_u32 s36, s36, s0 -; GFX9-NEXT: s_addc_u32 s37, s37, 0 -; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] -; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX9-NEXT: s_add_u32 s8, s8, s0 +; GFX9-NEXT: s_addc_u32 s9, s9, 0 +; GFX9-NEXT: s_mov_b64 s[0:1], s[8:9] +; GFX9-NEXT: s_mov_b64 s[2:3], s[10:11] ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GFX9-NEXT: s_endpgm +; ; GFX10-LABEL: test_simple_indirect_call: ; GFX10: ; %bb.0: -; GFX10-NEXT: s_getpc_b64 s[36:37] -; GFX10-NEXT: s_mov_b32 s36, s0 +; GFX10-NEXT: s_getpc_b64 s[8:9] +; GFX10-NEXT: s_mov_b32 s8, s0 ; GFX10-NEXT: s_getpc_b64 s[4:5] -; GFX10-NEXT: s_load_dwordx4 s[36:39], s[36:37], 0x10 +; GFX10-NEXT: s_load_dwordx4 s[8:11], s[8:9], 0x10 ; GFX10-NEXT: s_mov_b32 s32, 0 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-NEXT: s_bitset0_b32 s39, 21 -; GFX10-NEXT: s_add_u32 s36, s36, s0 -; GFX10-NEXT: s_addc_u32 s37, s37, 0 -; GFX10-NEXT: s_mov_b64 s[0:1], s[36:37] -; GFX10-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX10-NEXT: s_bitset0_b32 s11, 21 +; GFX10-NEXT: s_add_u32 s8, s8, s0 +; GFX10-NEXT: s_addc_u32 s9, s9, 0 +; GFX10-NEXT: s_mov_b64 s[0:1], s[8:9] +; GFX10-NEXT: s_mov_b64 s[2:3], s[10:11] ; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GFX10-NEXT: s_endpgm diff --git a/llvm/test/CodeGen/AMDGPU/shift-and-i128-ubfe.ll b/llvm/test/CodeGen/AMDGPU/shift-and-i128-ubfe.ll index 36e02000608da..9f79364cf40f6 100644 --- a/llvm/test/CodeGen/AMDGPU/shift-and-i128-ubfe.ll +++ b/llvm/test/CodeGen/AMDGPU/shift-and-i128-ubfe.ll @@ -1,17 +1,28 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s ; Extract the high bit of the 1st quarter -; GCN-LABEL: {{^}}v_uextract_bit_31_i128: -; GCN: buffer_load_dword [[VAL:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} - -; GCN: v_mov_b32_e32 v[[ZERO0:[0-9]+]], 0{{$}} -; GCN: v_mov_b32_e32 v[[ZERO1:[0-9]+]], v[[ZERO0]]{{$}} -; GCN: v_mov_b32_e32 v[[ZERO2:[0-9]+]], v[[ZERO0]]{{$}} -; GCN: v_lshrrev_b32_e32 v[[SHIFT:[0-9]+]], 31, [[VAL]] - -; GCN: buffer_store_dwordx4 v{{\[}}[[SHIFT]]:[[ZERO2]]{{\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} -; GCN: s_endpgm define amdgpu_kernel void @v_uextract_bit_31_i128(i128 addrspace(1)* %out, i128 addrspace(1)* %in) #1 { +; GCN-LABEL: v_uextract_bit_31_i128: +; GCN: ; %bb.0: +; GCN-NEXT: s_load_dwordx4 s[8:11], s[0:1], 0x9 +; GCN-NEXT: s_ashr_i32 s3, s2, 31 +; GCN-NEXT: s_lshl_b64 s[0:1], s[2:3], 4 +; GCN-NEXT: v_mov_b32_e32 v5, s1 +; GCN-NEXT: s_mov_b32 s7, 0xf000 +; GCN-NEXT: s_mov_b32 s6, 0 +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: s_mov_b64 s[4:5], s[10:11] +; GCN-NEXT: v_mov_b32_e32 v4, s0 +; GCN-NEXT: buffer_load_dword v0, v[4:5], s[4:7], 0 addr64 +; GCN-NEXT: v_mov_b32_e32 v1, 0 +; GCN-NEXT: v_mov_b32_e32 v2, v1 +; GCN-NEXT: s_mov_b64 s[10:11], s[6:7] +; GCN-NEXT: v_mov_b32_e32 v3, v1 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: v_lshrrev_b32_e32 v0, 31, v0 +; GCN-NEXT: buffer_store_dwordx4 v[0:3], v[4:5], s[8:11], 0 addr64 +; GCN-NEXT: s_endpgm %id.x = tail call i32 @llvm.amdgcn.workgroup.id.x() %in.gep = getelementptr i128, i128 addrspace(1)* %in, i32 %id.x %out.gep = getelementptr i128, i128 addrspace(1)* %out, i32 %id.x @@ -23,18 +34,26 @@ define amdgpu_kernel void @v_uextract_bit_31_i128(i128 addrspace(1)* %out, i128 } ; Extract the high bit of the 2nd quarter -; GCN-LABEL: {{^}}v_uextract_bit_63_i128: -; GCN-DAG: buffer_load_dword [[VAL:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4{{$}} - -; GCN-DAG: v_mov_b32_e32 v[[ZERO0:[0-9]+]], 0{{$}} -; GCN: v_mov_b32_e32 v[[ZERO1:[0-9]+]], v[[ZERO0]]{{$}} -; GCN: v_mov_b32_e32 v[[ZERO2:[0-9]+]], v[[ZERO0]]{{$}} -; GCN: v_mov_b32_e32 v[[ZERO3:[0-9]+]], v[[ZERO0]]{{$}} -; GCN-DAG: v_lshrrev_b32_e32 v[[SHIFT:[0-9]+]], 31, [[VAL]] - -; GCN-DAG: buffer_store_dwordx4 v{{\[}}[[SHIFT]]:[[ZERO3]]{{\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} -; GCN: s_endpgm define amdgpu_kernel void @v_uextract_bit_63_i128(i128 addrspace(1)* %out, i128 addrspace(1)* %in) #1 { +; GCN-LABEL: v_uextract_bit_63_i128: +; GCN: ; %bb.0: +; GCN-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; GCN-NEXT: s_mov_b32 s7, 0xf000 +; GCN-NEXT: s_mov_b32 s6, 0 +; GCN-NEXT: v_lshlrev_b32_e32 v4, 4, v0 +; GCN-NEXT: v_mov_b32_e32 v5, 0 +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: s_mov_b64 s[8:9], s[2:3] +; GCN-NEXT: s_mov_b64 s[10:11], s[6:7] +; GCN-NEXT: buffer_load_dword v0, v[4:5], s[8:11], 0 addr64 offset:4 +; GCN-NEXT: v_mov_b32_e32 v1, v5 +; GCN-NEXT: v_mov_b32_e32 v2, v5 +; GCN-NEXT: s_mov_b64 s[4:5], s[0:1] +; GCN-NEXT: v_mov_b32_e32 v3, v5 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: v_lshrrev_b32_e32 v0, 31, v0 +; GCN-NEXT: buffer_store_dwordx4 v[0:3], v[4:5], s[4:7], 0 addr64 +; GCN-NEXT: s_endpgm %id.x = tail call i32 @llvm.amdgcn.workitem.id.x() %in.gep = getelementptr i128, i128 addrspace(1)* %in, i32 %id.x %out.gep = getelementptr i128, i128 addrspace(1)* %out, i32 %id.x @@ -46,17 +65,28 @@ define amdgpu_kernel void @v_uextract_bit_63_i128(i128 addrspace(1)* %out, i128 } ; Extract the high bit of the 3rd quarter -; GCN-LABEL: {{^}}v_uextract_bit_95_i128: -; GCN: buffer_load_dword [[VAL:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}} - -; GCN-DAG: v_mov_b32_e32 v[[ZERO0:[0-9]+]], 0{{$}} -; GCN: v_mov_b32_e32 v[[ZERO1:[0-9]+]], v[[ZERO0]]{{$}} -; GCN: v_mov_b32_e32 v[[ZERO2:[0-9]+]], v[[ZERO0]]{{$}} -; GCN-DAG: v_lshrrev_b32_e32 v[[SHIFT:[0-9]+]], 31, [[VAL]] - -; GCN-DAG: buffer_store_dwordx4 v{{\[}}[[SHIFT]]:[[ZERO2]]{{\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} -; GCN: s_endpgm define amdgpu_kernel void @v_uextract_bit_95_i128(i128 addrspace(1)* %out, i128 addrspace(1)* %in) #1 { +; GCN-LABEL: v_uextract_bit_95_i128: +; GCN: ; %bb.0: +; GCN-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 +; GCN-NEXT: s_ashr_i32 s3, s2, 31 +; GCN-NEXT: s_lshl_b64 s[0:1], s[2:3], 4 +; GCN-NEXT: s_mov_b32 s3, 0xf000 +; GCN-NEXT: s_mov_b32 s2, 0 +; GCN-NEXT: v_mov_b32_e32 v5, s1 +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: s_mov_b64 s[8:9], s[6:7] +; GCN-NEXT: s_mov_b64 s[10:11], s[2:3] +; GCN-NEXT: v_mov_b32_e32 v4, s0 +; GCN-NEXT: buffer_load_dword v0, v[4:5], s[8:11], 0 addr64 offset:8 +; GCN-NEXT: v_mov_b32_e32 v1, 0 +; GCN-NEXT: v_mov_b32_e32 v2, v1 +; GCN-NEXT: s_mov_b64 s[0:1], s[4:5] +; GCN-NEXT: v_mov_b32_e32 v3, v1 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: v_lshrrev_b32_e32 v0, 31, v0 +; GCN-NEXT: buffer_store_dwordx4 v[0:3], v[4:5], s[0:3], 0 addr64 +; GCN-NEXT: s_endpgm %id.x = tail call i32 @llvm.amdgcn.workgroup.id.x() %in.gep = getelementptr i128, i128 addrspace(1)* %in, i32 %id.x %out.gep = getelementptr i128, i128 addrspace(1)* %out, i32 %id.x @@ -68,18 +98,26 @@ define amdgpu_kernel void @v_uextract_bit_95_i128(i128 addrspace(1)* %out, i128 } ; Extract the high bit of the 4th quarter -; GCN-LABEL: {{^}}v_uextract_bit_127_i128: -; GCN-DAG: buffer_load_dword [[VAL:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:12{{$}} - -; GCN-DAG: v_mov_b32_e32 v[[ZERO0:[0-9]+]], 0{{$}} -; GCN: v_mov_b32_e32 v[[ZERO1:[0-9]+]], v[[ZERO0]]{{$}} -; GCN: v_mov_b32_e32 v[[ZERO2:[0-9]+]], v[[ZERO0]]{{$}} -; GCN: v_mov_b32_e32 v[[ZERO3:[0-9]+]], v[[ZERO0]]{{$}} -; GCN-DAG: v_lshrrev_b32_e32 v[[SHIFT:[0-9]+]], 31, [[VAL]] - -; GCN-DAG: buffer_store_dwordx4 v{{\[}}[[SHIFT]]:[[ZERO3]]{{\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} -; GCN: s_endpgm define amdgpu_kernel void @v_uextract_bit_127_i128(i128 addrspace(1)* %out, i128 addrspace(1)* %in) #1 { +; GCN-LABEL: v_uextract_bit_127_i128: +; GCN: ; %bb.0: +; GCN-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; GCN-NEXT: s_mov_b32 s7, 0xf000 +; GCN-NEXT: s_mov_b32 s6, 0 +; GCN-NEXT: v_lshlrev_b32_e32 v4, 4, v0 +; GCN-NEXT: v_mov_b32_e32 v5, 0 +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: s_mov_b64 s[8:9], s[2:3] +; GCN-NEXT: s_mov_b64 s[10:11], s[6:7] +; GCN-NEXT: buffer_load_dword v0, v[4:5], s[8:11], 0 addr64 offset:12 +; GCN-NEXT: v_mov_b32_e32 v1, v5 +; GCN-NEXT: v_mov_b32_e32 v2, v5 +; GCN-NEXT: s_mov_b64 s[4:5], s[0:1] +; GCN-NEXT: v_mov_b32_e32 v3, v5 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: v_lshrrev_b32_e32 v0, 31, v0 +; GCN-NEXT: buffer_store_dwordx4 v[0:3], v[4:5], s[4:7], 0 addr64 +; GCN-NEXT: s_endpgm %id.x = tail call i32 @llvm.amdgcn.workitem.id.x() %in.gep = getelementptr i128, i128 addrspace(1)* %in, i32 %id.x %out.gep = getelementptr i128, i128 addrspace(1)* %out, i32 %id.x @@ -91,19 +129,26 @@ define amdgpu_kernel void @v_uextract_bit_127_i128(i128 addrspace(1)* %out, i128 } ; Spans more than 2 dword boundaries -; GCN-LABEL: {{^}}v_uextract_bit_34_100_i128: -; GCN-DAG: buffer_load_dwordx4 v{{\[}}[[VAL0:[0-9]+]]:[[VAL3:[0-9]+]]{{\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} - -; GCN-DAG: v_lshl_b64 v{{\[}}[[SHLLO:[0-9]+]]:[[SHLHI:[0-9]+]]{{\]}}, v{{\[[0-9]+:[0-9]+\]}}, 30 -; GCN-DAG: v_lshrrev_b32_e32 v[[ELT1PART:[0-9]+]], 2, v{{[0-9]+}} -; GCN-DAG: v_bfe_u32 v[[ELT2PART:[0-9]+]], v[[VAL3]], 2, 2{{$}} -; GCN-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}} -; GCN-DAG: v_or_b32_e32 v[[OR0:[0-9]+]], v[[ELT1PART]], v[[SHLLO]] -; GCN-DAG: v_mov_b32_e32 v[[ZERO1:[0-9]+]], v[[ZERO]]{{$}} - -; GCN-DAG: buffer_store_dwordx4 v{{\[}}[[OR0]]:[[ZERO1]]{{\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} -; GCN: s_endpgm define amdgpu_kernel void @v_uextract_bit_34_100_i128(i128 addrspace(1)* %out, i128 addrspace(1)* %in) #1 { +; GCN-LABEL: v_uextract_bit_34_100_i128: +; GCN: ; %bb.0: +; GCN-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 +; GCN-NEXT: s_mov_b32 s3, 0xf000 +; GCN-NEXT: s_mov_b32 s2, 0 +; GCN-NEXT: v_lshlrev_b32_e32 v8, 4, v0 +; GCN-NEXT: v_mov_b32_e32 v9, 0 +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: s_mov_b64 s[0:1], s[6:7] +; GCN-NEXT: buffer_load_dwordx4 v[0:3], v[8:9], s[0:3], 0 addr64 +; GCN-NEXT: s_mov_b64 s[6:7], s[2:3] +; GCN-NEXT: v_mov_b32_e32 v7, v9 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: v_lshl_b64 v[4:5], v[2:3], 30 +; GCN-NEXT: v_lshrrev_b32_e32 v0, 2, v1 +; GCN-NEXT: v_bfe_u32 v6, v3, 2, 2 +; GCN-NEXT: v_or_b32_e32 v4, v0, v4 +; GCN-NEXT: buffer_store_dwordx4 v[4:7], v[8:9], s[4:7], 0 addr64 +; GCN-NEXT: s_endpgm %id.x = tail call i32 @llvm.amdgcn.workitem.id.x() %in.gep = getelementptr i128, i128 addrspace(1)* %in, i32 %id.x %out.gep = getelementptr i128, i128 addrspace(1)* %out, i32 %id.x diff --git a/llvm/test/CodeGen/AMDGPU/srem-seteq-illegal-types.ll b/llvm/test/CodeGen/AMDGPU/srem-seteq-illegal-types.ll index 9bafe57b786e2..9b24fc80e6f45 100644 --- a/llvm/test/CodeGen/AMDGPU/srem-seteq-illegal-types.ll +++ b/llvm/test/CodeGen/AMDGPU/srem-seteq-illegal-types.ll @@ -62,13 +62,13 @@ define <3 x i1> @test_srem_vec(<3 x i31> %X) nounwind { ; CHECK-NEXT: v_bfe_i32 v3, v2, 0, 31 ; CHECK-NEXT: v_bfe_i32 v4, v1, 0, 31 ; CHECK-NEXT: v_bfe_i32 v5, v0, 0, 31 -; CHECK-NEXT: s_mov_b32 s6, 0x38e38e39 -; CHECK-NEXT: s_mov_b32 s7, 0xc71c71c7 -; CHECK-NEXT: s_brev_b32 s4, -2 -; CHECK-NEXT: s_mov_b32 s5, 0x7ffffffd -; CHECK-NEXT: v_mul_hi_i32 v5, v5, s6 -; CHECK-NEXT: v_mul_hi_i32 v4, v4, s6 -; CHECK-NEXT: v_mul_hi_i32 v3, v3, s7 +; CHECK-NEXT: s_mov_b32 s4, 0x38e38e39 +; CHECK-NEXT: s_mov_b32 s5, 0xc71c71c7 +; CHECK-NEXT: s_brev_b32 s6, -2 +; CHECK-NEXT: s_mov_b32 s7, 0x7ffffffd +; CHECK-NEXT: v_mul_hi_i32 v5, v5, s4 +; CHECK-NEXT: v_mul_hi_i32 v4, v4, s4 +; CHECK-NEXT: v_mul_hi_i32 v3, v3, s5 ; CHECK-NEXT: v_lshrrev_b32_e32 v6, 31, v5 ; CHECK-NEXT: v_lshrrev_b32_e32 v5, 1, v5 ; CHECK-NEXT: v_lshrrev_b32_e32 v7, 31, v4 @@ -84,12 +84,12 @@ define <3 x i1> @test_srem_vec(<3 x i31> %X) nounwind { ; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v5 ; CHECK-NEXT: v_sub_i32_e32 v1, vcc, v1, v4 ; CHECK-NEXT: v_sub_i32_e32 v2, vcc, v2, v3 -; CHECK-NEXT: v_and_b32_e32 v2, s4, v2 -; CHECK-NEXT: v_and_b32_e32 v1, s4, v1 -; CHECK-NEXT: v_and_b32_e32 v0, s4, v0 +; CHECK-NEXT: v_and_b32_e32 v2, s6, v2 +; CHECK-NEXT: v_and_b32_e32 v1, s6, v1 +; CHECK-NEXT: v_and_b32_e32 v0, s6, v0 ; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 3, v0 ; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc -; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, s5, v1 +; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, s7, v1 ; CHECK-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc ; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 3, v2 ; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc diff --git a/llvm/test/CodeGen/AMDGPU/tail-call-amdgpu-gfx.ll b/llvm/test/CodeGen/AMDGPU/tail-call-amdgpu-gfx.ll index e6de73faefbe5..4894eeca58253 100644 --- a/llvm/test/CodeGen/AMDGPU/tail-call-amdgpu-gfx.ll +++ b/llvm/test/CodeGen/AMDGPU/tail-call-amdgpu-gfx.ll @@ -17,12 +17,28 @@ define amdgpu_gfx float @caller(float %arg0) { ; GCN-LABEL: caller: ; GCN: ; %bb.0: ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: s_or_saveexec_b64 s[34:35], -1 +; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s32 ; 4-byte Folded Spill +; GCN-NEXT: s_mov_b64 exec, s[34:35] +; GCN-NEXT: v_writelane_b32 v1, s33, 1 +; GCN-NEXT: s_mov_b32 s33, s32 +; GCN-NEXT: s_addk_i32 s32, 0x400 +; GCN-NEXT: v_writelane_b32 v1, s4, 0 ; GCN-NEXT: v_add_f32_e32 v0, 1.0, v0 ; GCN-NEXT: s_mov_b32 s4, 2.0 -; GCN-NEXT: s_getpc_b64 s[6:7] -; GCN-NEXT: s_add_u32 s6, s6, callee@rel32@lo+4 -; GCN-NEXT: s_addc_u32 s7, s7, callee@rel32@hi+12 -; GCN-NEXT: s_setpc_b64 s[6:7] +; GCN-NEXT: s_mov_b64 s[36:37], s[30:31] +; GCN-NEXT: s_getpc_b64 s[30:31] +; GCN-NEXT: s_add_u32 s30, s30, callee@rel32@lo+4 +; GCN-NEXT: s_addc_u32 s31, s31, callee@rel32@hi+12 +; GCN-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GCN-NEXT: v_readlane_b32 s4, v1, 0 +; GCN-NEXT: s_addk_i32 s32, 0xfc00 +; GCN-NEXT: v_readlane_b32 s33, v1, 1 +; GCN-NEXT: s_or_saveexec_b64 s[30:31], -1 +; GCN-NEXT: buffer_load_dword v1, off, s[0:3], s32 ; 4-byte Folded Reload +; GCN-NEXT: s_mov_b64 exec, s[30:31] +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: s_setpc_b64 s[36:37] %add = fadd float %arg0, 1.0 %call = tail call amdgpu_gfx float @callee(float %add, float inreg 2.0) ret float %call diff --git a/llvm/test/CodeGen/AMDGPU/vgpr-liverange-ir.ll b/llvm/test/CodeGen/AMDGPU/vgpr-liverange-ir.ll index 9b6929d29c0cd..4061eda1b74f1 100644 --- a/llvm/test/CodeGen/AMDGPU/vgpr-liverange-ir.ll +++ b/llvm/test/CodeGen/AMDGPU/vgpr-liverange-ir.ll @@ -261,7 +261,7 @@ define amdgpu_ps float @loop(i32 %z, float %v, i32 inreg %bound, float(float)* % ; SI-NEXT: [[COPY6:%[0-9]+]]:sgpr_128 = COPY $sgpr100_sgpr101_sgpr102_sgpr103 ; SI-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY killed [[COPY6]] ; SI-NEXT: $vgpr0 = COPY killed [[PHI5]] - ; SI-NEXT: dead $sgpr30_sgpr31 = SI_CALL killed [[REG_SEQUENCE1]], 0, csr_amdgpu_highregs, implicit killed $sgpr0_sgpr1_sgpr2_sgpr3, implicit killed $vgpr0, implicit-def $vgpr0 + ; SI-NEXT: dead $sgpr30_sgpr31 = SI_CALL killed [[REG_SEQUENCE1]], 0, csr_amdgpu_si_gfx, implicit killed $sgpr0_sgpr1_sgpr2_sgpr3, implicit killed $vgpr0, implicit-def $vgpr0 ; SI-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32 ; SI-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY killed $vgpr0 ; SI-NEXT: $exec_lo = S_XOR_B32_term $exec_lo, killed [[S_AND_SAVEEXEC_B32_]], implicit-def dead $scc @@ -294,7 +294,7 @@ define amdgpu_ps float @loop(i32 %z, float %v, i32 inreg %bound, float(float)* % ; SI-NEXT: [[COPY9:%[0-9]+]]:sgpr_128 = COPY $sgpr100_sgpr101_sgpr102_sgpr103 ; SI-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY killed [[COPY9]] ; SI-NEXT: $vgpr0 = COPY killed [[PHI7]] - ; SI-NEXT: dead $sgpr30_sgpr31 = SI_CALL killed [[REG_SEQUENCE3]], 0, csr_amdgpu_highregs, implicit killed $sgpr0_sgpr1_sgpr2_sgpr3, implicit killed $vgpr0, implicit-def $vgpr0 + ; SI-NEXT: dead $sgpr30_sgpr31 = SI_CALL killed [[REG_SEQUENCE3]], 0, csr_amdgpu_si_gfx, implicit killed $sgpr0_sgpr1_sgpr2_sgpr3, implicit killed $vgpr0, implicit-def $vgpr0 ; SI-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32 ; SI-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY killed $vgpr0 ; SI-NEXT: $exec_lo = S_XOR_B32_term $exec_lo, killed [[S_AND_SAVEEXEC_B32_1]], implicit-def dead $scc @@ -374,7 +374,7 @@ define amdgpu_ps float @loop_with_use(i32 %z, float %v, i32 inreg %bound, float( ; SI-NEXT: [[COPY6:%[0-9]+]]:sgpr_128 = COPY $sgpr100_sgpr101_sgpr102_sgpr103 ; SI-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY killed [[COPY6]] ; SI-NEXT: $vgpr0 = COPY [[COPY4]] - ; SI-NEXT: dead $sgpr30_sgpr31 = SI_CALL killed [[REG_SEQUENCE1]], 0, csr_amdgpu_highregs, implicit killed $sgpr0_sgpr1_sgpr2_sgpr3, implicit killed $vgpr0, implicit-def $vgpr0 + ; SI-NEXT: dead $sgpr30_sgpr31 = SI_CALL killed [[REG_SEQUENCE1]], 0, csr_amdgpu_si_gfx, implicit killed $sgpr0_sgpr1_sgpr2_sgpr3, implicit killed $vgpr0, implicit-def $vgpr0 ; SI-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32 ; SI-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY killed $vgpr0 ; SI-NEXT: $exec_lo = S_XOR_B32_term $exec_lo, killed [[S_AND_SAVEEXEC_B32_]], implicit-def dead $scc @@ -406,7 +406,7 @@ define amdgpu_ps float @loop_with_use(i32 %z, float %v, i32 inreg %bound, float( ; SI-NEXT: [[COPY9:%[0-9]+]]:sgpr_128 = COPY $sgpr100_sgpr101_sgpr102_sgpr103 ; SI-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY killed [[COPY9]] ; SI-NEXT: $vgpr0 = COPY [[COPY4]] - ; SI-NEXT: dead $sgpr30_sgpr31 = SI_CALL killed [[REG_SEQUENCE3]], 0, csr_amdgpu_highregs, implicit killed $sgpr0_sgpr1_sgpr2_sgpr3, implicit killed $vgpr0, implicit-def $vgpr0 + ; SI-NEXT: dead $sgpr30_sgpr31 = SI_CALL killed [[REG_SEQUENCE3]], 0, csr_amdgpu_si_gfx, implicit killed $sgpr0_sgpr1_sgpr2_sgpr3, implicit killed $vgpr0, implicit-def $vgpr0 ; SI-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32 ; SI-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY killed $vgpr0 ; SI-NEXT: $exec_lo = S_XOR_B32_term $exec_lo, killed [[S_AND_SAVEEXEC_B32_1]], implicit-def dead $scc diff --git a/llvm/test/CodeGen/AMDGPU/vgpr-liverange.ll b/llvm/test/CodeGen/AMDGPU/vgpr-liverange.ll index 3087fdf9d5474..57841c00355d4 100644 --- a/llvm/test/CodeGen/AMDGPU/vgpr-liverange.ll +++ b/llvm/test/CodeGen/AMDGPU/vgpr-liverange.ll @@ -158,61 +158,61 @@ define amdgpu_ps float @loop(i32 %z, float %v, i32 inreg %bound, float(float)* % ; SI-LABEL: loop: ; SI: ; %bb.0: ; %main_body ; SI-NEXT: v_mov_b32_e32 v6, v0 -; SI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; SI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; SI-NEXT: s_mov_b32 s38, -1 +; SI-NEXT: s_mov_b32 s12, SCRATCH_RSRC_DWORD0 +; SI-NEXT: s_mov_b32 s13, SCRATCH_RSRC_DWORD1 +; SI-NEXT: s_mov_b32 s14, -1 ; SI-NEXT: v_mov_b32_e32 v0, v1 ; SI-NEXT: v_cmp_gt_i32_e32 vcc_lo, 6, v6 -; SI-NEXT: s_mov_b32 s39, 0x31c16000 -; SI-NEXT: s_add_u32 s36, s36, s1 -; SI-NEXT: s_addc_u32 s37, s37, 0 +; SI-NEXT: s_mov_b32 s15, 0x31c16000 +; SI-NEXT: s_add_u32 s12, s12, s1 +; SI-NEXT: s_addc_u32 s13, s13, 0 ; SI-NEXT: s_mov_b32 s32, 0 ; SI-NEXT: ; implicit-def: $vgpr1 ; SI-NEXT: s_and_saveexec_b32 s0, vcc_lo -; SI-NEXT: s_xor_b32 s33, exec_lo, s0 +; SI-NEXT: s_xor_b32 s4, exec_lo, s0 ; SI-NEXT: s_cbranch_execz BB3_4 ; SI-NEXT: ; %bb.1: ; %else -; SI-NEXT: s_mov_b32 s34, exec_lo +; SI-NEXT: s_mov_b32 s5, exec_lo ; SI-NEXT: BB3_2: ; =>This Inner Loop Header: Depth=1 -; SI-NEXT: v_readfirstlane_b32 s4, v4 -; SI-NEXT: v_readfirstlane_b32 s5, v5 -; SI-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[4:5] -; SI-NEXT: s_and_saveexec_b32 s35, vcc_lo -; SI-NEXT: s_mov_b64 s[0:1], s[36:37] -; SI-NEXT: s_mov_b64 s[2:3], s[38:39] -; SI-NEXT: s_swappc_b64 s[30:31], s[4:5] +; SI-NEXT: v_readfirstlane_b32 s6, v4 +; SI-NEXT: v_readfirstlane_b32 s7, v5 +; SI-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[6:7], v[4:5] +; SI-NEXT: s_and_saveexec_b32 s8, vcc_lo +; SI-NEXT: s_mov_b64 s[0:1], s[12:13] +; SI-NEXT: s_mov_b64 s[2:3], s[14:15] +; SI-NEXT: s_swappc_b64 s[30:31], s[6:7] ; SI-NEXT: v_mov_b32_e32 v1, v0 ; SI-NEXT: ; implicit-def: $vgpr4_vgpr5 ; SI-NEXT: ; implicit-def: $vgpr0 -; SI-NEXT: s_xor_b32 exec_lo, exec_lo, s35 +; SI-NEXT: s_xor_b32 exec_lo, exec_lo, s8 ; SI-NEXT: s_cbranch_execnz BB3_2 ; SI-NEXT: ; %bb.3: -; SI-NEXT: s_mov_b32 exec_lo, s34 +; SI-NEXT: s_mov_b32 exec_lo, s5 ; SI-NEXT: ; implicit-def: $vgpr0 ; SI-NEXT: ; implicit-def: $vgpr2 ; SI-NEXT: BB3_4: ; %Flow -; SI-NEXT: s_or_saveexec_b32 s33, s33 -; SI-NEXT: s_xor_b32 exec_lo, exec_lo, s33 +; SI-NEXT: s_or_saveexec_b32 s4, s4 +; SI-NEXT: s_xor_b32 exec_lo, exec_lo, s4 ; SI-NEXT: s_cbranch_execz BB3_8 ; SI-NEXT: ; %bb.5: ; %if -; SI-NEXT: s_mov_b32 s34, exec_lo +; SI-NEXT: s_mov_b32 s5, exec_lo ; SI-NEXT: BB3_6: ; =>This Inner Loop Header: Depth=1 -; SI-NEXT: v_readfirstlane_b32 s4, v2 -; SI-NEXT: v_readfirstlane_b32 s5, v3 -; SI-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[2:3] -; SI-NEXT: s_and_saveexec_b32 s35, vcc_lo -; SI-NEXT: s_mov_b64 s[0:1], s[36:37] -; SI-NEXT: s_mov_b64 s[2:3], s[38:39] -; SI-NEXT: s_swappc_b64 s[30:31], s[4:5] +; SI-NEXT: v_readfirstlane_b32 s6, v2 +; SI-NEXT: v_readfirstlane_b32 s7, v3 +; SI-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[6:7], v[2:3] +; SI-NEXT: s_and_saveexec_b32 s8, vcc_lo +; SI-NEXT: s_mov_b64 s[0:1], s[12:13] +; SI-NEXT: s_mov_b64 s[2:3], s[14:15] +; SI-NEXT: s_swappc_b64 s[30:31], s[6:7] ; SI-NEXT: v_mov_b32_e32 v1, v0 ; SI-NEXT: ; implicit-def: $vgpr2_vgpr3 ; SI-NEXT: ; implicit-def: $vgpr0 -; SI-NEXT: s_xor_b32 exec_lo, exec_lo, s35 +; SI-NEXT: s_xor_b32 exec_lo, exec_lo, s8 ; SI-NEXT: s_cbranch_execnz BB3_6 ; SI-NEXT: ; %bb.7: -; SI-NEXT: s_mov_b32 exec_lo, s34 +; SI-NEXT: s_mov_b32 exec_lo, s5 ; SI-NEXT: BB3_8: ; %end -; SI-NEXT: s_or_b32 exec_lo, exec_lo, s33 +; SI-NEXT: s_or_b32 exec_lo, exec_lo, s4 ; SI-NEXT: v_mov_b32_e32 v0, v1 ; SI-NEXT: ; return to shader part epilog main_body: @@ -236,58 +236,58 @@ end: define amdgpu_ps float @loop_with_use(i32 %z, float %v, i32 inreg %bound, float(float)* %extern_func, float(float)* %extern_func2) #0 { ; SI-LABEL: loop_with_use: ; SI: ; %bb.0: ; %main_body -; SI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; SI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; SI-NEXT: s_mov_b32 s38, -1 +; SI-NEXT: s_mov_b32 s12, SCRATCH_RSRC_DWORD0 +; SI-NEXT: s_mov_b32 s13, SCRATCH_RSRC_DWORD1 +; SI-NEXT: s_mov_b32 s14, -1 ; SI-NEXT: v_mov_b32_e32 v40, v1 ; SI-NEXT: v_cmp_gt_i32_e32 vcc_lo, 6, v0 -; SI-NEXT: s_mov_b32 s39, 0x31c16000 -; SI-NEXT: s_add_u32 s36, s36, s1 -; SI-NEXT: s_addc_u32 s37, s37, 0 +; SI-NEXT: s_mov_b32 s15, 0x31c16000 +; SI-NEXT: s_add_u32 s12, s12, s1 +; SI-NEXT: s_addc_u32 s13, s13, 0 ; SI-NEXT: s_mov_b32 s32, 0 ; SI-NEXT: ; implicit-def: $vgpr0 ; SI-NEXT: s_and_saveexec_b32 s0, vcc_lo -; SI-NEXT: s_xor_b32 s33, exec_lo, s0 +; SI-NEXT: s_xor_b32 s4, exec_lo, s0 ; SI-NEXT: s_cbranch_execz BB4_4 ; SI-NEXT: ; %bb.1: ; %else -; SI-NEXT: s_mov_b32 s34, exec_lo +; SI-NEXT: s_mov_b32 s5, exec_lo ; SI-NEXT: BB4_2: ; =>This Inner Loop Header: Depth=1 -; SI-NEXT: v_readfirstlane_b32 s4, v4 -; SI-NEXT: v_readfirstlane_b32 s5, v5 -; SI-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[4:5] -; SI-NEXT: s_and_saveexec_b32 s35, vcc_lo +; SI-NEXT: v_readfirstlane_b32 s6, v4 +; SI-NEXT: v_readfirstlane_b32 s7, v5 +; SI-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[6:7], v[4:5] +; SI-NEXT: s_and_saveexec_b32 s8, vcc_lo ; SI-NEXT: v_mov_b32_e32 v0, v40 -; SI-NEXT: s_mov_b64 s[0:1], s[36:37] -; SI-NEXT: s_mov_b64 s[2:3], s[38:39] -; SI-NEXT: s_swappc_b64 s[30:31], s[4:5] +; SI-NEXT: s_mov_b64 s[0:1], s[12:13] +; SI-NEXT: s_mov_b64 s[2:3], s[14:15] +; SI-NEXT: s_swappc_b64 s[30:31], s[6:7] ; SI-NEXT: ; implicit-def: $vgpr4_vgpr5 -; SI-NEXT: s_xor_b32 exec_lo, exec_lo, s35 +; SI-NEXT: s_xor_b32 exec_lo, exec_lo, s8 ; SI-NEXT: s_cbranch_execnz BB4_2 ; SI-NEXT: ; %bb.3: -; SI-NEXT: s_mov_b32 exec_lo, s34 +; SI-NEXT: s_mov_b32 exec_lo, s5 ; SI-NEXT: ; implicit-def: $vgpr2 ; SI-NEXT: BB4_4: ; %Flow -; SI-NEXT: s_or_saveexec_b32 s33, s33 -; SI-NEXT: s_xor_b32 exec_lo, exec_lo, s33 +; SI-NEXT: s_or_saveexec_b32 s4, s4 +; SI-NEXT: s_xor_b32 exec_lo, exec_lo, s4 ; SI-NEXT: s_cbranch_execz BB4_8 ; SI-NEXT: ; %bb.5: ; %if -; SI-NEXT: s_mov_b32 s34, exec_lo +; SI-NEXT: s_mov_b32 s5, exec_lo ; SI-NEXT: BB4_6: ; =>This Inner Loop Header: Depth=1 -; SI-NEXT: v_readfirstlane_b32 s4, v2 -; SI-NEXT: v_readfirstlane_b32 s5, v3 -; SI-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[2:3] -; SI-NEXT: s_and_saveexec_b32 s35, vcc_lo +; SI-NEXT: v_readfirstlane_b32 s6, v2 +; SI-NEXT: v_readfirstlane_b32 s7, v3 +; SI-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[6:7], v[2:3] +; SI-NEXT: s_and_saveexec_b32 s8, vcc_lo ; SI-NEXT: v_mov_b32_e32 v0, v40 -; SI-NEXT: s_mov_b64 s[0:1], s[36:37] -; SI-NEXT: s_mov_b64 s[2:3], s[38:39] -; SI-NEXT: s_swappc_b64 s[30:31], s[4:5] +; SI-NEXT: s_mov_b64 s[0:1], s[12:13] +; SI-NEXT: s_mov_b64 s[2:3], s[14:15] +; SI-NEXT: s_swappc_b64 s[30:31], s[6:7] ; SI-NEXT: ; implicit-def: $vgpr2_vgpr3 -; SI-NEXT: s_xor_b32 exec_lo, exec_lo, s35 +; SI-NEXT: s_xor_b32 exec_lo, exec_lo, s8 ; SI-NEXT: s_cbranch_execnz BB4_6 ; SI-NEXT: ; %bb.7: -; SI-NEXT: s_mov_b32 exec_lo, s34 +; SI-NEXT: s_mov_b32 exec_lo, s5 ; SI-NEXT: BB4_8: ; %end -; SI-NEXT: s_or_b32 exec_lo, exec_lo, s33 +; SI-NEXT: s_or_b32 exec_lo, exec_lo, s4 ; SI-NEXT: v_add_f32_e32 v0, v0, v40 ; SI-NEXT: ; return to shader part epilog main_body: diff --git a/llvm/test/CodeGen/AMDGPU/wwm-reserved-spill.ll b/llvm/test/CodeGen/AMDGPU/wwm-reserved-spill.ll index 3d655d23e73c6..f796910ee6015 100644 --- a/llvm/test/CodeGen/AMDGPU/wwm-reserved-spill.ll +++ b/llvm/test/CodeGen/AMDGPU/wwm-reserved-spill.ll @@ -6,77 +6,75 @@ define amdgpu_gfx void @strict_wwm_no_cfg(<4 x i32> inreg %tmp14) { ; GFX9-O0-LABEL: strict_wwm_no_cfg: ; GFX9-O0: ; %bb.0: ; GFX9-O0-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-O0-NEXT: s_or_saveexec_b64 s[8:9], -1 +; GFX9-O0-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; GFX9-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; GFX9-O0-NEXT: s_mov_b64 exec, s[8:9] -; GFX9-O0-NEXT: s_mov_b32 s8, s7 -; GFX9-O0-NEXT: s_mov_b32 s9, s6 -; GFX9-O0-NEXT: s_mov_b32 s10, s5 -; GFX9-O0-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 -; GFX9-O0-NEXT: s_mov_b32 s5, s10 -; GFX9-O0-NEXT: s_mov_b32 s6, s9 -; GFX9-O0-NEXT: s_mov_b32 s7, s8 -; GFX9-O0-NEXT: ; kill: def $sgpr8_sgpr9_sgpr10_sgpr11 killed $sgpr4_sgpr5_sgpr6_sgpr7 -; GFX9-O0-NEXT: s_mov_b32 s8, 0 -; GFX9-O0-NEXT: buffer_load_dwordx2 v[5:6], off, s[4:7], s8 +; GFX9-O0-NEXT: s_mov_b64 exec, s[34:35] +; GFX9-O0-NEXT: s_mov_b32 s36, s4 +; GFX9-O0-NEXT: ; kill: def $sgpr36 killed $sgpr36 def $sgpr36_sgpr37_sgpr38_sgpr39 +; GFX9-O0-NEXT: s_mov_b32 s37, s5 +; GFX9-O0-NEXT: s_mov_b32 s38, s6 +; GFX9-O0-NEXT: s_mov_b32 s39, s7 +; GFX9-O0-NEXT: ; kill: def $sgpr40_sgpr41_sgpr42_sgpr43 killed $sgpr36_sgpr37_sgpr38_sgpr39 +; GFX9-O0-NEXT: s_mov_b32 s34, 0 +; GFX9-O0-NEXT: buffer_load_dwordx2 v[5:6], off, s[36:39], s34 ; GFX9-O0-NEXT: s_waitcnt vmcnt(0) ; GFX9-O0-NEXT: v_mov_b32_e32 v4, v5 ; GFX9-O0-NEXT: v_mov_b32_e32 v3, v6 ; GFX9-O0-NEXT: v_mov_b32_e32 v0, v4 ; GFX9-O0-NEXT: s_not_b64 exec, exec -; GFX9-O0-NEXT: v_mov_b32_e32 v0, s8 +; GFX9-O0-NEXT: v_mov_b32_e32 v0, s34 ; GFX9-O0-NEXT: s_not_b64 exec, exec ; GFX9-O0-NEXT: v_mov_b32_e32 v1, v3 ; GFX9-O0-NEXT: s_not_b64 exec, exec -; GFX9-O0-NEXT: v_mov_b32_e32 v1, s8 +; GFX9-O0-NEXT: v_mov_b32_e32 v1, s34 ; GFX9-O0-NEXT: s_not_b64 exec, exec -; GFX9-O0-NEXT: s_or_saveexec_b64 s[10:11], -1 -; GFX9-O0-NEXT: v_mov_b32_e32 v2, s8 +; GFX9-O0-NEXT: s_or_saveexec_b64 s[40:41], -1 +; GFX9-O0-NEXT: v_mov_b32_e32 v2, s34 ; GFX9-O0-NEXT: s_nop 1 ; GFX9-O0-NEXT: v_mov_b32_dpp v2, v0 row_bcast:31 row_mask:0xc bank_mask:0xf ; GFX9-O0-NEXT: v_add_u32_e64 v0, v0, v2 -; GFX9-O0-NEXT: s_mov_b64 exec, s[10:11] +; GFX9-O0-NEXT: s_mov_b64 exec, s[40:41] ; GFX9-O0-NEXT: v_mov_b32_e32 v3, v0 -; GFX9-O0-NEXT: s_or_saveexec_b64 s[10:11], -1 -; GFX9-O0-NEXT: v_mov_b32_e32 v0, s8 +; GFX9-O0-NEXT: s_or_saveexec_b64 s[40:41], -1 +; GFX9-O0-NEXT: v_mov_b32_e32 v0, s34 ; GFX9-O0-NEXT: s_nop 1 ; GFX9-O0-NEXT: v_mov_b32_dpp v0, v1 row_bcast:31 row_mask:0xc bank_mask:0xf ; GFX9-O0-NEXT: v_add_u32_e64 v0, v1, v0 -; GFX9-O0-NEXT: s_mov_b64 exec, s[10:11] +; GFX9-O0-NEXT: s_mov_b64 exec, s[40:41] ; GFX9-O0-NEXT: v_mov_b32_e32 v4, v0 -; GFX9-O0-NEXT: v_cmp_eq_u32_e64 s[10:11], v3, v4 -; GFX9-O0-NEXT: v_cndmask_b32_e64 v3, 0, 1, s[10:11] -; GFX9-O0-NEXT: s_mov_b32 s9, 1 -; GFX9-O0-NEXT: v_lshlrev_b32_e64 v3, s9, v3 -; GFX9-O0-NEXT: s_mov_b32 s9, 2 -; GFX9-O0-NEXT: v_mov_b32_e32 v4, s9 +; GFX9-O0-NEXT: v_cmp_eq_u32_e64 s[40:41], v3, v4 +; GFX9-O0-NEXT: v_cndmask_b32_e64 v3, 0, 1, s[40:41] +; GFX9-O0-NEXT: s_mov_b32 s35, 1 +; GFX9-O0-NEXT: v_lshlrev_b32_e64 v3, s35, v3 +; GFX9-O0-NEXT: s_mov_b32 s35, 2 +; GFX9-O0-NEXT: v_mov_b32_e32 v4, s35 ; GFX9-O0-NEXT: v_and_b32_e32 v3, v3, v4 -; GFX9-O0-NEXT: buffer_store_dword v3, off, s[4:7], s8 offset:4 -; GFX9-O0-NEXT: s_or_saveexec_b64 s[4:5], -1 +; GFX9-O0-NEXT: buffer_store_dword v3, off, s[36:39], s34 offset:4 +; GFX9-O0-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload -; GFX9-O0-NEXT: s_mov_b64 exec, s[4:5] +; GFX9-O0-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-O0-NEXT: s_waitcnt vmcnt(0) ; GFX9-O0-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-O3-LABEL: strict_wwm_no_cfg: ; GFX9-O3: ; %bb.0: ; GFX9-O3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-O3-NEXT: s_or_saveexec_b64 s[8:9], -1 +; GFX9-O3-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-O3-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-O3-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; GFX9-O3-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; GFX9-O3-NEXT: s_waitcnt vmcnt(0) ; GFX9-O3-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; GFX9-O3-NEXT: s_mov_b64 exec, s[8:9] +; GFX9-O3-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-O3-NEXT: buffer_load_dwordx2 v[2:3], off, s[4:7], 0 -; GFX9-O3-NEXT: s_or_saveexec_b64 s[8:9], -1 +; GFX9-O3-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-O3-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-O3-NEXT: v_mov_b32_e32 v1, 0 -; GFX9-O3-NEXT: s_mov_b64 exec, s[8:9] +; GFX9-O3-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-O3-NEXT: s_not_b64 exec, exec ; GFX9-O3-NEXT: s_waitcnt vmcnt(0) ; GFX9-O3-NEXT: v_mov_b32_e32 v2, 0 @@ -84,12 +82,12 @@ define amdgpu_gfx void @strict_wwm_no_cfg(<4 x i32> inreg %tmp14) { ; GFX9-O3-NEXT: s_not_b64 exec, exec ; GFX9-O3-NEXT: v_mov_b32_e32 v3, 0 ; GFX9-O3-NEXT: s_not_b64 exec, exec -; GFX9-O3-NEXT: s_or_saveexec_b64 s[8:9], -1 +; GFX9-O3-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-O3-NEXT: v_mov_b32_dpp v1, v2 row_bcast:31 row_mask:0xc bank_mask:0xf ; GFX9-O3-NEXT: v_mov_b32_dpp v0, v3 row_bcast:31 row_mask:0xc bank_mask:0xf ; GFX9-O3-NEXT: v_add_u32_e32 v1, v2, v1 ; GFX9-O3-NEXT: v_add_u32_e32 v0, v3, v0 -; GFX9-O3-NEXT: s_mov_b64 exec, s[8:9] +; GFX9-O3-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-O3-NEXT: v_mov_b32_e32 v4, v1 ; GFX9-O3-NEXT: v_mov_b32_e32 v5, v0 ; GFX9-O3-NEXT: v_cmp_eq_u32_e32 vcc, v4, v5 @@ -97,14 +95,14 @@ define amdgpu_gfx void @strict_wwm_no_cfg(<4 x i32> inreg %tmp14) { ; GFX9-O3-NEXT: v_lshlrev_b32_e32 v4, 1, v4 ; GFX9-O3-NEXT: v_and_b32_e32 v4, 2, v4 ; GFX9-O3-NEXT: buffer_store_dword v4, off, s[4:7], 0 offset:4 -; GFX9-O3-NEXT: s_or_saveexec_b64 s[4:5], -1 +; GFX9-O3-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-O3-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX9-O3-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload ; GFX9-O3-NEXT: s_nop 0 ; GFX9-O3-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload ; GFX9-O3-NEXT: s_nop 0 ; GFX9-O3-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload -; GFX9-O3-NEXT: s_mov_b64 exec, s[4:5] +; GFX9-O3-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-O3-NEXT: s_waitcnt vmcnt(0) ; GFX9-O3-NEXT: s_setpc_b64 s[30:31] %tmp100 = call <2 x float> @llvm.amdgcn.raw.buffer.load.v2f32(<4 x i32> %tmp14, i32 0, i32 0, i32 0) @@ -136,52 +134,51 @@ define amdgpu_gfx void @strict_wwm_cfg(<4 x i32> inreg %tmp14, i32 %arg) { ; GFX9-O0-LABEL: strict_wwm_cfg: ; GFX9-O0: ; %bb.0: ; %entry ; GFX9-O0-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-O0-NEXT: s_or_saveexec_b64 s[8:9], -1 +; GFX9-O0-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill ; GFX9-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; GFX9-O0-NEXT: s_mov_b64 exec, s[8:9] +; GFX9-O0-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-O0-NEXT: v_writelane_b32 v5, s30, 0 ; GFX9-O0-NEXT: v_writelane_b32 v5, s31, 1 -; GFX9-O0-NEXT: s_mov_b32 s8, s4 -; GFX9-O0-NEXT: ; kill: def $sgpr8 killed $sgpr8 def $sgpr8_sgpr9_sgpr10_sgpr11 -; GFX9-O0-NEXT: s_mov_b32 s9, s5 -; GFX9-O0-NEXT: s_mov_b32 s10, s6 -; GFX9-O0-NEXT: s_mov_b32 s11, s7 -; GFX9-O0-NEXT: s_mov_b64 s[4:5], s[8:9] -; GFX9-O0-NEXT: s_mov_b64 s[6:7], s[10:11] -; GFX9-O0-NEXT: v_writelane_b32 v5, s4, 2 -; GFX9-O0-NEXT: v_writelane_b32 v5, s5, 3 -; GFX9-O0-NEXT: v_writelane_b32 v5, s6, 4 -; GFX9-O0-NEXT: v_writelane_b32 v5, s7, 5 -; GFX9-O0-NEXT: s_mov_b32 s4, 0 -; GFX9-O0-NEXT: s_nop 0 -; GFX9-O0-NEXT: buffer_load_dwordx2 v[3:4], off, s[8:11], s4 +; GFX9-O0-NEXT: s_mov_b32 s36, s4 +; GFX9-O0-NEXT: ; kill: def $sgpr36 killed $sgpr36 def $sgpr36_sgpr37_sgpr38_sgpr39 +; GFX9-O0-NEXT: s_mov_b32 s37, s5 +; GFX9-O0-NEXT: s_mov_b32 s38, s6 +; GFX9-O0-NEXT: s_mov_b32 s39, s7 +; GFX9-O0-NEXT: s_mov_b64 s[42:43], s[38:39] +; GFX9-O0-NEXT: s_mov_b64 s[40:41], s[36:37] +; GFX9-O0-NEXT: v_writelane_b32 v5, s40, 2 +; GFX9-O0-NEXT: v_writelane_b32 v5, s41, 3 +; GFX9-O0-NEXT: v_writelane_b32 v5, s42, 4 +; GFX9-O0-NEXT: v_writelane_b32 v5, s43, 5 +; GFX9-O0-NEXT: s_mov_b32 s30, 0 +; GFX9-O0-NEXT: buffer_load_dwordx2 v[3:4], off, s[36:39], s30 ; GFX9-O0-NEXT: s_waitcnt vmcnt(0) ; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_waitcnt vmcnt(0) ; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; GFX9-O0-NEXT: ; implicit-def: $sgpr6_sgpr7 +; GFX9-O0-NEXT: ; implicit-def: $sgpr34_sgpr35 ; GFX9-O0-NEXT: v_mov_b32_e32 v1, v3 ; GFX9-O0-NEXT: s_not_b64 exec, exec -; GFX9-O0-NEXT: v_mov_b32_e32 v1, s4 +; GFX9-O0-NEXT: v_mov_b32_e32 v1, s30 ; GFX9-O0-NEXT: s_not_b64 exec, exec -; GFX9-O0-NEXT: s_or_saveexec_b64 s[6:7], -1 -; GFX9-O0-NEXT: v_mov_b32_e32 v2, s4 +; GFX9-O0-NEXT: s_or_saveexec_b64 s[34:35], -1 +; GFX9-O0-NEXT: v_mov_b32_e32 v2, s30 ; GFX9-O0-NEXT: s_nop 1 ; GFX9-O0-NEXT: v_mov_b32_dpp v2, v1 row_bcast:31 row_mask:0xc bank_mask:0xf ; GFX9-O0-NEXT: v_add_u32_e64 v1, v1, v2 -; GFX9-O0-NEXT: s_mov_b64 exec, s[6:7] +; GFX9-O0-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-O0-NEXT: v_mov_b32_e32 v3, v1 ; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; GFX9-O0-NEXT: v_cmp_eq_u32_e64 s[6:7], v0, s4 -; GFX9-O0-NEXT: v_mov_b32_e32 v0, s4 +; GFX9-O0-NEXT: v_cmp_eq_u32_e64 s[34:35], v0, s30 +; GFX9-O0-NEXT: v_mov_b32_e32 v0, s30 ; GFX9-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; GFX9-O0-NEXT: s_mov_b64 s[4:5], exec -; GFX9-O0-NEXT: v_writelane_b32 v5, s4, 6 -; GFX9-O0-NEXT: v_writelane_b32 v5, s5, 7 -; GFX9-O0-NEXT: s_and_b64 s[4:5], s[4:5], s[6:7] -; GFX9-O0-NEXT: s_mov_b64 exec, s[4:5] +; GFX9-O0-NEXT: s_mov_b64 s[30:31], exec +; GFX9-O0-NEXT: v_writelane_b32 v5, s30, 6 +; GFX9-O0-NEXT: v_writelane_b32 v5, s31, 7 +; GFX9-O0-NEXT: s_and_b64 s[30:31], s[30:31], s[34:35] +; GFX9-O0-NEXT: s_mov_b64 exec, s[30:31] ; GFX9-O0-NEXT: s_cbranch_execz BB1_2 ; GFX9-O0-NEXT: ; %bb.1: ; %if ; GFX9-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload @@ -189,100 +186,100 @@ define amdgpu_gfx void @strict_wwm_cfg(<4 x i32> inreg %tmp14, i32 %arg) { ; GFX9-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload ; GFX9-O0-NEXT: s_waitcnt vmcnt(0) ; GFX9-O0-NEXT: v_mov_b32_e32 v0, v4 -; GFX9-O0-NEXT: s_or_saveexec_b64 s[4:5], -1 +; GFX9-O0-NEXT: s_or_saveexec_b64 s[30:31], -1 ; GFX9-O0-NEXT: v_mov_b32_e32 v1, 0 -; GFX9-O0-NEXT: s_mov_b64 exec, s[4:5] +; GFX9-O0-NEXT: s_mov_b64 exec, s[30:31] ; GFX9-O0-NEXT: v_mov_b32_e32 v2, v0 ; GFX9-O0-NEXT: s_not_b64 exec, exec ; GFX9-O0-NEXT: v_mov_b32_e32 v2, v1 ; GFX9-O0-NEXT: s_not_b64 exec, exec -; GFX9-O0-NEXT: s_or_saveexec_b64 s[4:5], -1 +; GFX9-O0-NEXT: s_or_saveexec_b64 s[30:31], -1 ; GFX9-O0-NEXT: v_mov_b32_dpp v1, v2 row_bcast:31 row_mask:0xc bank_mask:0xf ; GFX9-O0-NEXT: v_add_u32_e64 v1, v2, v1 -; GFX9-O0-NEXT: s_mov_b64 exec, s[4:5] +; GFX9-O0-NEXT: s_mov_b64 exec, s[30:31] ; GFX9-O0-NEXT: v_mov_b32_e32 v0, v1 ; GFX9-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; GFX9-O0-NEXT: BB1_2: ; %merge -; GFX9-O0-NEXT: v_readlane_b32 s6, v5, 6 -; GFX9-O0-NEXT: v_readlane_b32 s7, v5, 7 -; GFX9-O0-NEXT: s_or_b64 exec, exec, s[6:7] -; GFX9-O0-NEXT: v_readlane_b32 s4, v5, 0 -; GFX9-O0-NEXT: v_readlane_b32 s5, v5, 1 -; GFX9-O0-NEXT: v_readlane_b32 s8, v5, 2 -; GFX9-O0-NEXT: v_readlane_b32 s9, v5, 3 -; GFX9-O0-NEXT: v_readlane_b32 s10, v5, 4 -; GFX9-O0-NEXT: v_readlane_b32 s11, v5, 5 +; GFX9-O0-NEXT: v_readlane_b32 s34, v5, 6 +; GFX9-O0-NEXT: v_readlane_b32 s35, v5, 7 +; GFX9-O0-NEXT: s_or_b64 exec, exec, s[34:35] +; GFX9-O0-NEXT: v_readlane_b32 s30, v5, 0 +; GFX9-O0-NEXT: v_readlane_b32 s31, v5, 1 +; GFX9-O0-NEXT: v_readlane_b32 s36, v5, 2 +; GFX9-O0-NEXT: v_readlane_b32 s37, v5, 3 +; GFX9-O0-NEXT: v_readlane_b32 s38, v5, 4 +; GFX9-O0-NEXT: v_readlane_b32 s39, v5, 5 ; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload ; GFX9-O0-NEXT: s_waitcnt vmcnt(0) -; GFX9-O0-NEXT: v_cmp_eq_u32_e64 s[6:7], v0, v3 -; GFX9-O0-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[6:7] -; GFX9-O0-NEXT: s_mov_b32 s6, 1 -; GFX9-O0-NEXT: v_lshlrev_b32_e64 v0, s6, v0 -; GFX9-O0-NEXT: s_mov_b32 s6, 2 -; GFX9-O0-NEXT: v_mov_b32_e32 v3, s6 +; GFX9-O0-NEXT: v_cmp_eq_u32_e64 s[34:35], v0, v3 +; GFX9-O0-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[34:35] +; GFX9-O0-NEXT: s_mov_b32 s34, 1 +; GFX9-O0-NEXT: v_lshlrev_b32_e64 v0, s34, v0 +; GFX9-O0-NEXT: s_mov_b32 s34, 2 +; GFX9-O0-NEXT: v_mov_b32_e32 v3, s34 ; GFX9-O0-NEXT: v_and_b32_e32 v0, v0, v3 -; GFX9-O0-NEXT: s_mov_b32 s6, 0 -; GFX9-O0-NEXT: buffer_store_dword v0, off, s[8:11], s6 offset:4 -; GFX9-O0-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GFX9-O0-NEXT: s_mov_b32 s34, 0 +; GFX9-O0-NEXT: buffer_store_dword v0, off, s[36:39], s34 offset:4 +; GFX9-O0-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload -; GFX9-O0-NEXT: s_mov_b64 exec, s[6:7] +; GFX9-O0-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-O0-NEXT: s_waitcnt vmcnt(0) -; GFX9-O0-NEXT: s_setpc_b64 s[4:5] +; GFX9-O0-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-O3-LABEL: strict_wwm_cfg: ; GFX9-O3: ; %bb.0: ; %entry ; GFX9-O3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-O3-NEXT: s_or_saveexec_b64 s[8:9], -1 +; GFX9-O3-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-O3-NEXT: buffer_store_dword v1, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-O3-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; GFX9-O3-NEXT: s_waitcnt vmcnt(0) ; GFX9-O3-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; GFX9-O3-NEXT: s_mov_b64 exec, s[8:9] +; GFX9-O3-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-O3-NEXT: buffer_load_dwordx2 v[2:3], off, s[4:7], 0 ; GFX9-O3-NEXT: v_mov_b32_e32 v5, 0 -; GFX9-O3-NEXT: s_or_saveexec_b64 s[8:9], -1 +; GFX9-O3-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-O3-NEXT: v_mov_b32_e32 v1, 0 -; GFX9-O3-NEXT: s_mov_b64 exec, s[8:9] +; GFX9-O3-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-O3-NEXT: s_not_b64 exec, exec ; GFX9-O3-NEXT: s_waitcnt vmcnt(0) ; GFX9-O3-NEXT: v_mov_b32_e32 v2, 0 ; GFX9-O3-NEXT: s_not_b64 exec, exec -; GFX9-O3-NEXT: s_or_saveexec_b64 s[8:9], -1 +; GFX9-O3-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-O3-NEXT: v_mov_b32_dpp v1, v2 row_bcast:31 row_mask:0xc bank_mask:0xf ; GFX9-O3-NEXT: v_add_u32_e32 v1, v2, v1 -; GFX9-O3-NEXT: s_mov_b64 exec, s[8:9] +; GFX9-O3-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-O3-NEXT: v_mov_b32_e32 v4, v1 ; GFX9-O3-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 -; GFX9-O3-NEXT: s_and_saveexec_b64 s[8:9], vcc +; GFX9-O3-NEXT: s_and_saveexec_b64 s[34:35], vcc ; GFX9-O3-NEXT: ; %bb.1: ; %if -; GFX9-O3-NEXT: s_or_saveexec_b64 s[10:11], -1 +; GFX9-O3-NEXT: s_or_saveexec_b64 s[36:37], -1 ; GFX9-O3-NEXT: v_mov_b32_e32 v1, 0 -; GFX9-O3-NEXT: s_mov_b64 exec, s[10:11] +; GFX9-O3-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-O3-NEXT: s_not_b64 exec, exec ; GFX9-O3-NEXT: v_mov_b32_e32 v3, 0 ; GFX9-O3-NEXT: s_not_b64 exec, exec -; GFX9-O3-NEXT: s_or_saveexec_b64 s[10:11], -1 +; GFX9-O3-NEXT: s_or_saveexec_b64 s[36:37], -1 ; GFX9-O3-NEXT: v_mov_b32_dpp v1, v3 row_bcast:31 row_mask:0xc bank_mask:0xf ; GFX9-O3-NEXT: v_add_u32_e32 v1, v3, v1 -; GFX9-O3-NEXT: s_mov_b64 exec, s[10:11] +; GFX9-O3-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-O3-NEXT: v_mov_b32_e32 v5, v1 ; GFX9-O3-NEXT: ; %bb.2: ; %merge -; GFX9-O3-NEXT: s_or_b64 exec, exec, s[8:9] +; GFX9-O3-NEXT: s_or_b64 exec, exec, s[34:35] ; GFX9-O3-NEXT: v_cmp_eq_u32_e32 vcc, v4, v5 ; GFX9-O3-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc ; GFX9-O3-NEXT: v_lshlrev_b32_e32 v0, 1, v0 ; GFX9-O3-NEXT: v_and_b32_e32 v0, 2, v0 ; GFX9-O3-NEXT: buffer_store_dword v0, off, s[4:7], 0 offset:4 -; GFX9-O3-NEXT: s_or_saveexec_b64 s[4:5], -1 +; GFX9-O3-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-O3-NEXT: buffer_load_dword v1, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX9-O3-NEXT: s_nop 0 ; GFX9-O3-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload ; GFX9-O3-NEXT: s_nop 0 ; GFX9-O3-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload -; GFX9-O3-NEXT: s_mov_b64 exec, s[4:5] +; GFX9-O3-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-O3-NEXT: s_waitcnt vmcnt(0) ; GFX9-O3-NEXT: s_setpc_b64 s[30:31] entry: @@ -343,92 +340,89 @@ define amdgpu_gfx void @strict_wwm_call(<4 x i32> inreg %tmp14, i32 inreg %arg) ; GFX9-O0-LABEL: strict_wwm_call: ; GFX9-O0: ; %bb.0: ; GFX9-O0-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-O0-NEXT: s_or_saveexec_b64 s[10:11], -1 +; GFX9-O0-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; GFX9-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; GFX9-O0-NEXT: s_mov_b64 exec, s[10:11] +; GFX9-O0-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-O0-NEXT: v_writelane_b32 v3, s33, 2 ; GFX9-O0-NEXT: s_mov_b32 s33, s32 ; GFX9-O0-NEXT: s_add_i32 s32, s32, 0x400 ; GFX9-O0-NEXT: v_writelane_b32 v3, s30, 0 ; GFX9-O0-NEXT: v_writelane_b32 v3, s31, 1 -; GFX9-O0-NEXT: s_mov_b32 s9, s8 -; GFX9-O0-NEXT: s_mov_b32 s8, s7 -; GFX9-O0-NEXT: s_mov_b32 s10, s6 -; GFX9-O0-NEXT: s_mov_b32 s11, s5 -; GFX9-O0-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 -; GFX9-O0-NEXT: s_mov_b32 s5, s11 -; GFX9-O0-NEXT: s_mov_b32 s6, s10 -; GFX9-O0-NEXT: s_mov_b32 s7, s8 -; GFX9-O0-NEXT: ; kill: def $sgpr12_sgpr13_sgpr14_sgpr15 killed $sgpr4_sgpr5_sgpr6_sgpr7 -; GFX9-O0-NEXT: s_mov_b32 s8, 0 -; GFX9-O0-NEXT: v_mov_b32_e32 v0, s9 +; GFX9-O0-NEXT: s_mov_b32 s36, s4 +; GFX9-O0-NEXT: ; kill: def $sgpr36 killed $sgpr36 def $sgpr36_sgpr37_sgpr38_sgpr39 +; GFX9-O0-NEXT: s_mov_b32 s37, s5 +; GFX9-O0-NEXT: s_mov_b32 s38, s6 +; GFX9-O0-NEXT: s_mov_b32 s39, s7 +; GFX9-O0-NEXT: ; kill: def $sgpr40_sgpr41_sgpr42_sgpr43 killed $sgpr36_sgpr37_sgpr38_sgpr39 +; GFX9-O0-NEXT: s_mov_b32 s34, 0 +; GFX9-O0-NEXT: v_mov_b32_e32 v0, s8 ; GFX9-O0-NEXT: v_mov_b32_e32 v2, v0 ; GFX9-O0-NEXT: s_not_b64 exec, exec -; GFX9-O0-NEXT: v_mov_b32_e32 v2, s8 +; GFX9-O0-NEXT: v_mov_b32_e32 v2, s34 ; GFX9-O0-NEXT: s_not_b64 exec, exec -; GFX9-O0-NEXT: s_or_saveexec_b64 s[10:11], -1 -; GFX9-O0-NEXT: s_getpc_b64 s[12:13] -; GFX9-O0-NEXT: s_add_u32 s12, s12, strict_wwm_called@rel32@lo+4 -; GFX9-O0-NEXT: s_addc_u32 s13, s13, strict_wwm_called@rel32@hi+12 -; GFX9-O0-NEXT: s_mov_b64 s[18:19], s[2:3] -; GFX9-O0-NEXT: s_mov_b64 s[16:17], s[0:1] -; GFX9-O0-NEXT: s_mov_b64 s[0:1], s[16:17] -; GFX9-O0-NEXT: s_mov_b64 s[2:3], s[18:19] +; GFX9-O0-NEXT: s_or_saveexec_b64 s[40:41], -1 +; GFX9-O0-NEXT: s_getpc_b64 s[30:31] +; GFX9-O0-NEXT: s_add_u32 s30, s30, strict_wwm_called@rel32@lo+4 +; GFX9-O0-NEXT: s_addc_u32 s31, s31, strict_wwm_called@rel32@hi+12 +; GFX9-O0-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX9-O0-NEXT: s_mov_b64 s[44:45], s[0:1] +; GFX9-O0-NEXT: s_mov_b64 s[0:1], s[44:45] +; GFX9-O0-NEXT: s_mov_b64 s[2:3], s[46:47] ; GFX9-O0-NEXT: v_mov_b32_e32 v0, v2 -; GFX9-O0-NEXT: s_swappc_b64 s[30:31], s[12:13] +; GFX9-O0-NEXT: s_swappc_b64 s[30:31], s[30:31] ; GFX9-O0-NEXT: v_readlane_b32 s30, v3, 0 ; GFX9-O0-NEXT: v_readlane_b32 s31, v3, 1 ; GFX9-O0-NEXT: v_mov_b32_e32 v1, v0 ; GFX9-O0-NEXT: v_add_u32_e64 v1, v1, v2 -; GFX9-O0-NEXT: s_mov_b64 exec, s[10:11] +; GFX9-O0-NEXT: s_mov_b64 exec, s[40:41] ; GFX9-O0-NEXT: v_mov_b32_e32 v0, v1 -; GFX9-O0-NEXT: buffer_store_dword v0, off, s[4:7], s8 offset:4 +; GFX9-O0-NEXT: buffer_store_dword v0, off, s[36:39], s34 offset:4 ; GFX9-O0-NEXT: s_add_i32 s32, s32, 0xfffffc00 ; GFX9-O0-NEXT: v_readlane_b32 s33, v3, 2 -; GFX9-O0-NEXT: s_or_saveexec_b64 s[4:5], -1 +; GFX9-O0-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload -; GFX9-O0-NEXT: s_mov_b64 exec, s[4:5] +; GFX9-O0-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-O0-NEXT: s_waitcnt vmcnt(0) ; GFX9-O0-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-O3-LABEL: strict_wwm_call: ; GFX9-O3: ; %bb.0: ; GFX9-O3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-O3-NEXT: s_or_saveexec_b64 s[10:11], -1 +; GFX9-O3-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-O3-NEXT: buffer_store_dword v2, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-O3-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; GFX9-O3-NEXT: s_mov_b64 exec, s[10:11] -; GFX9-O3-NEXT: s_mov_b32 s14, s33 +; GFX9-O3-NEXT: s_mov_b64 exec, s[34:35] +; GFX9-O3-NEXT: s_mov_b32 s38, s33 ; GFX9-O3-NEXT: s_mov_b32 s33, s32 ; GFX9-O3-NEXT: s_addk_i32 s32, 0x400 -; GFX9-O3-NEXT: s_mov_b64 s[10:11], s[30:31] +; GFX9-O3-NEXT: s_mov_b64 s[36:37], s[30:31] ; GFX9-O3-NEXT: v_mov_b32_e32 v2, s8 ; GFX9-O3-NEXT: s_not_b64 exec, exec ; GFX9-O3-NEXT: v_mov_b32_e32 v2, 0 ; GFX9-O3-NEXT: s_not_b64 exec, exec -; GFX9-O3-NEXT: s_or_saveexec_b64 s[8:9], -1 +; GFX9-O3-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-O3-NEXT: v_mov_b32_e32 v0, v2 -; GFX9-O3-NEXT: s_getpc_b64 s[12:13] -; GFX9-O3-NEXT: s_add_u32 s12, s12, strict_wwm_called@rel32@lo+4 -; GFX9-O3-NEXT: s_addc_u32 s13, s13, strict_wwm_called@rel32@hi+12 -; GFX9-O3-NEXT: s_swappc_b64 s[30:31], s[12:13] +; GFX9-O3-NEXT: s_getpc_b64 s[30:31] +; GFX9-O3-NEXT: s_add_u32 s30, s30, strict_wwm_called@rel32@lo+4 +; GFX9-O3-NEXT: s_addc_u32 s31, s31, strict_wwm_called@rel32@hi+12 +; GFX9-O3-NEXT: s_swappc_b64 s[30:31], s[30:31] ; GFX9-O3-NEXT: v_mov_b32_e32 v1, v0 ; GFX9-O3-NEXT: v_add_u32_e32 v1, v1, v2 -; GFX9-O3-NEXT: s_mov_b64 exec, s[8:9] +; GFX9-O3-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-O3-NEXT: v_mov_b32_e32 v0, v1 ; GFX9-O3-NEXT: buffer_store_dword v0, off, s[4:7], 0 offset:4 ; GFX9-O3-NEXT: s_addk_i32 s32, 0xfc00 -; GFX9-O3-NEXT: s_mov_b32 s33, s14 -; GFX9-O3-NEXT: s_or_saveexec_b64 s[4:5], -1 +; GFX9-O3-NEXT: s_mov_b32 s33, s38 +; GFX9-O3-NEXT: s_or_saveexec_b64 s[30:31], -1 ; GFX9-O3-NEXT: buffer_load_dword v2, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX9-O3-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload -; GFX9-O3-NEXT: s_mov_b64 exec, s[4:5] +; GFX9-O3-NEXT: s_mov_b64 exec, s[30:31] ; GFX9-O3-NEXT: s_waitcnt vmcnt(0) -; GFX9-O3-NEXT: s_setpc_b64 s[10:11] +; GFX9-O3-NEXT: s_setpc_b64 s[36:37] %tmp107 = tail call i32 @llvm.amdgcn.set.inactive.i32(i32 %arg, i32 0) %tmp134 = call amdgpu_gfx i32 @strict_wwm_called(i32 %tmp107) %tmp136 = add i32 %tmp134, %tmp107 @@ -449,32 +443,32 @@ define amdgpu_gfx i64 @strict_wwm_called_i64(i64 %a) noinline { ; GFX9-O0-NEXT: v_mov_b32_e32 v3, v6 ; GFX9-O0-NEXT: v_mov_b32_e32 v0, v7 ; GFX9-O0-NEXT: v_mov_b32_e32 v1, v7 -; GFX9-O0-NEXT: v_add_co_u32_e64 v4, s[4:5], v2, v3 -; GFX9-O0-NEXT: v_addc_co_u32_e64 v0, s[4:5], v0, v1, s[4:5] +; GFX9-O0-NEXT: v_add_co_u32_e64 v4, s[34:35], v2, v3 +; GFX9-O0-NEXT: v_addc_co_u32_e64 v0, s[34:35], v0, v1, s[34:35] ; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec ; GFX9-O0-NEXT: v_mov_b32_e32 v5, v0 -; GFX9-O0-NEXT: s_mov_b32 s4, 32 +; GFX9-O0-NEXT: s_mov_b32 s34, 32 ; GFX9-O0-NEXT: v_mov_b32_e32 v0, v6 ; GFX9-O0-NEXT: v_mov_b32_e32 v1, v7 -; GFX9-O0-NEXT: v_lshrrev_b64 v[0:1], s4, v[0:1] +; GFX9-O0-NEXT: v_lshrrev_b64 v[0:1], s34, v[0:1] ; GFX9-O0-NEXT: v_mov_b32_e32 v1, v0 ; GFX9-O0-NEXT: v_mov_b32_e32 v0, v4 ; GFX9-O0-NEXT: v_mul_lo_u32 v2, v0, v1 ; GFX9-O0-NEXT: ; kill: def $vgpr6 killed $vgpr6 killed $vgpr6_vgpr7 killed $exec ; GFX9-O0-NEXT: v_mul_hi_u32 v1, v0, v6 -; GFX9-O0-NEXT: v_lshrrev_b64 v[7:8], s4, v[4:5] +; GFX9-O0-NEXT: v_lshrrev_b64 v[7:8], s34, v[4:5] ; GFX9-O0-NEXT: v_mov_b32_e32 v3, v7 ; GFX9-O0-NEXT: v_mul_lo_u32 v3, v3, v6 ; GFX9-O0-NEXT: v_add3_u32 v1, v1, v2, v3 -; GFX9-O0-NEXT: ; implicit-def: $sgpr5 -; GFX9-O0-NEXT: ; implicit-def: $sgpr6 -; GFX9-O0-NEXT: v_mov_b32_e32 v3, s5 +; GFX9-O0-NEXT: ; implicit-def: $sgpr35 +; GFX9-O0-NEXT: ; implicit-def: $sgpr36 +; GFX9-O0-NEXT: v_mov_b32_e32 v3, s35 ; GFX9-O0-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec ; GFX9-O0-NEXT: v_mov_b32_e32 v2, v3 -; GFX9-O0-NEXT: v_lshlrev_b64 v[1:2], s4, v[1:2] +; GFX9-O0-NEXT: v_lshlrev_b64 v[1:2], s34, v[1:2] ; GFX9-O0-NEXT: v_mov_b32_e32 v3, v2 ; GFX9-O0-NEXT: v_mul_lo_u32 v6, v0, v6 -; GFX9-O0-NEXT: s_mov_b32 s5, 0 +; GFX9-O0-NEXT: s_mov_b32 s35, 0 ; GFX9-O0-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-O0-NEXT: ; kill: def $vgpr6 killed $vgpr6 def $vgpr6_vgpr7 killed $exec ; GFX9-O0-NEXT: v_mov_b32_e32 v7, v0 @@ -489,12 +483,12 @@ define amdgpu_gfx i64 @strict_wwm_called_i64(i64 %a) noinline { ; GFX9-O0-NEXT: v_mov_b32_e32 v3, v4 ; GFX9-O0-NEXT: v_mov_b32_e32 v0, v7 ; GFX9-O0-NEXT: v_mov_b32_e32 v2, v5 -; GFX9-O0-NEXT: v_sub_co_u32_e64 v1, s[6:7], v1, v3 -; GFX9-O0-NEXT: v_subb_co_u32_e64 v0, s[6:7], v0, v2, s[6:7] +; GFX9-O0-NEXT: v_sub_co_u32_e64 v1, s[36:37], v1, v3 +; GFX9-O0-NEXT: v_subb_co_u32_e64 v0, s[36:37], v0, v2, s[36:37] ; GFX9-O0-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec ; GFX9-O0-NEXT: v_mov_b32_e32 v2, v0 ; GFX9-O0-NEXT: v_mov_b32_e32 v0, v1 -; GFX9-O0-NEXT: v_lshrrev_b64 v[1:2], s4, v[1:2] +; GFX9-O0-NEXT: v_lshrrev_b64 v[1:2], s34, v[1:2] ; GFX9-O0-NEXT: ; kill: def $vgpr1 killed $vgpr1 killed $vgpr1_vgpr2 killed $exec ; GFX9-O0-NEXT: s_setpc_b64 s[30:31] ; @@ -521,7 +515,7 @@ define amdgpu_gfx void @strict_wwm_call_i64(<4 x i32> inreg %tmp14, i64 inreg %a ; GFX9-O0-LABEL: strict_wwm_call_i64: ; GFX9-O0: ; %bb.0: ; GFX9-O0-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-O0-NEXT: s_or_saveexec_b64 s[10:11], -1 +; GFX9-O0-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-O0-NEXT: buffer_store_dword v11, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill ; GFX9-O0-NEXT: buffer_store_dword v9, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_waitcnt vmcnt(0) @@ -536,78 +530,75 @@ define amdgpu_gfx void @strict_wwm_call_i64(<4 x i32> inreg %tmp14, i64 inreg %a ; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill ; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill ; GFX9-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill -; GFX9-O0-NEXT: s_mov_b64 exec, s[10:11] -; GFX9-O0-NEXT: v_writelane_b32 v11, s33, 9 +; GFX9-O0-NEXT: s_mov_b64 exec, s[34:35] +; GFX9-O0-NEXT: v_writelane_b32 v11, s33, 8 ; GFX9-O0-NEXT: s_mov_b32 s33, s32 ; GFX9-O0-NEXT: s_add_i32 s32, s32, 0xc00 ; GFX9-O0-NEXT: v_writelane_b32 v11, s30, 0 ; GFX9-O0-NEXT: v_writelane_b32 v11, s31, 1 -; GFX9-O0-NEXT: v_writelane_b32 v11, s9, 2 -; GFX9-O0-NEXT: v_writelane_b32 v11, s8, 3 -; GFX9-O0-NEXT: s_mov_b32 s8, s6 -; GFX9-O0-NEXT: v_readlane_b32 s6, v11, 3 -; GFX9-O0-NEXT: v_writelane_b32 v11, s8, 4 -; GFX9-O0-NEXT: s_mov_b32 s12, s5 -; GFX9-O0-NEXT: v_readlane_b32 s5, v11, 4 -; GFX9-O0-NEXT: s_mov_b32 s8, s4 -; GFX9-O0-NEXT: v_readlane_b32 s4, v11, 2 -; GFX9-O0-NEXT: ; kill: def $sgpr8 killed $sgpr8 def $sgpr8_sgpr9_sgpr10_sgpr11 -; GFX9-O0-NEXT: s_mov_b32 s9, s12 -; GFX9-O0-NEXT: s_mov_b32 s10, s5 -; GFX9-O0-NEXT: s_mov_b32 s11, s7 -; GFX9-O0-NEXT: v_writelane_b32 v11, s8, 5 -; GFX9-O0-NEXT: v_writelane_b32 v11, s9, 6 -; GFX9-O0-NEXT: v_writelane_b32 v11, s10, 7 -; GFX9-O0-NEXT: v_writelane_b32 v11, s11, 8 -; GFX9-O0-NEXT: ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7 -; GFX9-O0-NEXT: s_mov_b32 s7, s4 -; GFX9-O0-NEXT: ; kill: def $sgpr4_sgpr5 killed $sgpr6_sgpr7 -; GFX9-O0-NEXT: s_mov_b64 s[4:5], 0 -; GFX9-O0-NEXT: v_mov_b32_e32 v0, s6 -; GFX9-O0-NEXT: v_mov_b32_e32 v1, s7 +; GFX9-O0-NEXT: s_mov_b32 s34, s8 +; GFX9-O0-NEXT: s_mov_b32 s36, s4 +; GFX9-O0-NEXT: ; kill: def $sgpr36 killed $sgpr36 def $sgpr36_sgpr37_sgpr38_sgpr39 +; GFX9-O0-NEXT: s_mov_b32 s37, s5 +; GFX9-O0-NEXT: s_mov_b32 s38, s6 +; GFX9-O0-NEXT: s_mov_b32 s39, s7 +; GFX9-O0-NEXT: v_writelane_b32 v11, s36, 2 +; GFX9-O0-NEXT: v_writelane_b32 v11, s37, 3 +; GFX9-O0-NEXT: v_writelane_b32 v11, s38, 4 +; GFX9-O0-NEXT: v_writelane_b32 v11, s39, 5 +; GFX9-O0-NEXT: ; kill: def $sgpr34 killed $sgpr34 def $sgpr34_sgpr35 +; GFX9-O0-NEXT: s_mov_b32 s35, s9 +; GFX9-O0-NEXT: ; kill: def $sgpr30_sgpr31 killed $sgpr34_sgpr35 +; GFX9-O0-NEXT: s_mov_b64 s[30:31], 0 +; GFX9-O0-NEXT: v_mov_b32_e32 v0, s34 +; GFX9-O0-NEXT: v_mov_b32_e32 v1, s35 ; GFX9-O0-NEXT: v_mov_b32_e32 v10, v1 ; GFX9-O0-NEXT: v_mov_b32_e32 v9, v0 ; GFX9-O0-NEXT: s_not_b64 exec, exec -; GFX9-O0-NEXT: v_mov_b32_e32 v9, s4 -; GFX9-O0-NEXT: v_mov_b32_e32 v10, s5 +; GFX9-O0-NEXT: v_mov_b32_e32 v9, s30 +; GFX9-O0-NEXT: v_mov_b32_e32 v10, s31 ; GFX9-O0-NEXT: s_not_b64 exec, exec -; GFX9-O0-NEXT: s_or_saveexec_b64 s[8:9], -1 +; GFX9-O0-NEXT: s_or_saveexec_b64 s[30:31], -1 +; GFX9-O0-NEXT: v_writelane_b32 v11, s30, 6 +; GFX9-O0-NEXT: v_writelane_b32 v11, s31, 7 ; GFX9-O0-NEXT: v_mov_b32_e32 v2, v9 -; GFX9-O0-NEXT: s_mov_b32 s4, 32 -; GFX9-O0-NEXT: ; implicit-def: $sgpr6_sgpr7 -; GFX9-O0-NEXT: v_lshrrev_b64 v[3:4], s4, v[9:10] -; GFX9-O0-NEXT: s_getpc_b64 s[4:5] -; GFX9-O0-NEXT: s_add_u32 s4, s4, strict_wwm_called_i64@gotpcrel32@lo+4 -; GFX9-O0-NEXT: s_addc_u32 s5, s5, strict_wwm_called_i64@gotpcrel32@hi+12 -; GFX9-O0-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 -; GFX9-O0-NEXT: s_mov_b64 s[14:15], s[2:3] -; GFX9-O0-NEXT: s_mov_b64 s[12:13], s[0:1] -; GFX9-O0-NEXT: s_mov_b64 s[0:1], s[12:13] -; GFX9-O0-NEXT: s_mov_b64 s[2:3], s[14:15] +; GFX9-O0-NEXT: s_mov_b32 s30, 32 +; GFX9-O0-NEXT: ; implicit-def: $sgpr34_sgpr35 +; GFX9-O0-NEXT: v_lshrrev_b64 v[3:4], s30, v[9:10] +; GFX9-O0-NEXT: s_getpc_b64 s[30:31] +; GFX9-O0-NEXT: s_add_u32 s30, s30, strict_wwm_called_i64@gotpcrel32@lo+4 +; GFX9-O0-NEXT: s_addc_u32 s31, s31, strict_wwm_called_i64@gotpcrel32@hi+12 +; GFX9-O0-NEXT: s_load_dwordx2 s[30:31], s[30:31], 0x0 +; GFX9-O0-NEXT: s_mov_b64 s[38:39], s[2:3] +; GFX9-O0-NEXT: s_mov_b64 s[36:37], s[0:1] +; GFX9-O0-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX9-O0-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX9-O0-NEXT: v_mov_b32_e32 v0, v2 ; GFX9-O0-NEXT: v_mov_b32_e32 v1, v3 ; GFX9-O0-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-O0-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GFX9-O0-NEXT: v_readlane_b32 s4, v11, 5 -; GFX9-O0-NEXT: v_readlane_b32 s5, v11, 6 -; GFX9-O0-NEXT: v_readlane_b32 s6, v11, 7 -; GFX9-O0-NEXT: v_readlane_b32 s7, v11, 8 +; GFX9-O0-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX9-O0-NEXT: v_readlane_b32 s34, v11, 6 +; GFX9-O0-NEXT: v_readlane_b32 s35, v11, 7 +; GFX9-O0-NEXT: v_readlane_b32 s36, v11, 2 +; GFX9-O0-NEXT: v_readlane_b32 s37, v11, 3 +; GFX9-O0-NEXT: v_readlane_b32 s38, v11, 4 +; GFX9-O0-NEXT: v_readlane_b32 s39, v11, 5 ; GFX9-O0-NEXT: v_readlane_b32 s30, v11, 0 ; GFX9-O0-NEXT: v_readlane_b32 s31, v11, 1 ; GFX9-O0-NEXT: v_mov_b32_e32 v2, v0 ; GFX9-O0-NEXT: v_mov_b32_e32 v3, v1 ; GFX9-O0-NEXT: v_mov_b32_e32 v4, v9 ; GFX9-O0-NEXT: v_mov_b32_e32 v5, v10 -; GFX9-O0-NEXT: v_add_co_u32_e64 v2, s[10:11], v2, v4 -; GFX9-O0-NEXT: v_addc_co_u32_e64 v3, s[10:11], v3, v5, s[10:11] -; GFX9-O0-NEXT: s_mov_b64 exec, s[8:9] +; GFX9-O0-NEXT: v_add_co_u32_e64 v2, s[40:41], v2, v4 +; GFX9-O0-NEXT: v_addc_co_u32_e64 v3, s[40:41], v3, v5, s[40:41] +; GFX9-O0-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-O0-NEXT: v_mov_b32_e32 v0, v2 ; GFX9-O0-NEXT: v_mov_b32_e32 v1, v3 -; GFX9-O0-NEXT: s_mov_b32 s8, 0 -; GFX9-O0-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], s8 offset:4 +; GFX9-O0-NEXT: s_mov_b32 s34, 0 +; GFX9-O0-NEXT: buffer_store_dwordx2 v[0:1], off, s[36:39], s34 offset:4 ; GFX9-O0-NEXT: s_add_i32 s32, s32, 0xfffff400 -; GFX9-O0-NEXT: v_readlane_b32 s33, v11, 9 -; GFX9-O0-NEXT: s_or_saveexec_b64 s[4:5], -1 +; GFX9-O0-NEXT: v_readlane_b32 s33, v11, 8 +; GFX9-O0-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-O0-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload ; GFX9-O0-NEXT: s_nop 0 ; GFX9-O0-NEXT: buffer_load_dword v9, off, s[0:3], s32 ; 4-byte Folded Reload @@ -628,14 +619,14 @@ define amdgpu_gfx void @strict_wwm_call_i64(<4 x i32> inreg %tmp14, i64 inreg %a ; GFX9-O0-NEXT: s_nop 0 ; GFX9-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload -; GFX9-O0-NEXT: s_mov_b64 exec, s[4:5] +; GFX9-O0-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-O0-NEXT: s_waitcnt vmcnt(0) ; GFX9-O0-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-O3-LABEL: strict_wwm_call_i64: ; GFX9-O3: ; %bb.0: ; GFX9-O3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-O3-NEXT: s_or_saveexec_b64 s[10:11], -1 +; GFX9-O3-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-O3-NEXT: buffer_store_dword v6, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-O3-NEXT: s_waitcnt vmcnt(0) ; GFX9-O3-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill @@ -644,37 +635,37 @@ define amdgpu_gfx void @strict_wwm_call_i64(<4 x i32> inreg %tmp14, i64 inreg %a ; GFX9-O3-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill ; GFX9-O3-NEXT: s_waitcnt vmcnt(0) ; GFX9-O3-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; GFX9-O3-NEXT: s_mov_b64 exec, s[10:11] -; GFX9-O3-NEXT: s_mov_b32 s14, s33 +; GFX9-O3-NEXT: s_mov_b64 exec, s[34:35] +; GFX9-O3-NEXT: s_mov_b32 s38, s33 ; GFX9-O3-NEXT: s_mov_b32 s33, s32 ; GFX9-O3-NEXT: s_addk_i32 s32, 0x800 -; GFX9-O3-NEXT: s_mov_b64 s[10:11], s[30:31] +; GFX9-O3-NEXT: s_mov_b64 s[36:37], s[30:31] ; GFX9-O3-NEXT: v_mov_b32_e32 v6, s8 ; GFX9-O3-NEXT: v_mov_b32_e32 v7, s9 ; GFX9-O3-NEXT: s_not_b64 exec, exec ; GFX9-O3-NEXT: v_mov_b32_e32 v6, 0 ; GFX9-O3-NEXT: v_mov_b32_e32 v7, 0 ; GFX9-O3-NEXT: s_not_b64 exec, exec -; GFX9-O3-NEXT: s_or_saveexec_b64 s[8:9], -1 -; GFX9-O3-NEXT: s_getpc_b64 s[12:13] -; GFX9-O3-NEXT: s_add_u32 s12, s12, strict_wwm_called_i64@gotpcrel32@lo+4 -; GFX9-O3-NEXT: s_addc_u32 s13, s13, strict_wwm_called_i64@gotpcrel32@hi+12 -; GFX9-O3-NEXT: s_load_dwordx2 s[12:13], s[12:13], 0x0 +; GFX9-O3-NEXT: s_or_saveexec_b64 s[34:35], -1 +; GFX9-O3-NEXT: s_getpc_b64 s[30:31] +; GFX9-O3-NEXT: s_add_u32 s30, s30, strict_wwm_called_i64@gotpcrel32@lo+4 +; GFX9-O3-NEXT: s_addc_u32 s31, s31, strict_wwm_called_i64@gotpcrel32@hi+12 +; GFX9-O3-NEXT: s_load_dwordx2 s[30:31], s[30:31], 0x0 ; GFX9-O3-NEXT: v_mov_b32_e32 v0, v6 ; GFX9-O3-NEXT: v_mov_b32_e32 v1, v7 ; GFX9-O3-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-O3-NEXT: s_swappc_b64 s[30:31], s[12:13] +; GFX9-O3-NEXT: s_swappc_b64 s[30:31], s[30:31] ; GFX9-O3-NEXT: v_mov_b32_e32 v2, v0 ; GFX9-O3-NEXT: v_mov_b32_e32 v3, v1 ; GFX9-O3-NEXT: v_add_co_u32_e32 v2, vcc, v2, v6 ; GFX9-O3-NEXT: v_addc_co_u32_e32 v3, vcc, v3, v7, vcc -; GFX9-O3-NEXT: s_mov_b64 exec, s[8:9] +; GFX9-O3-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-O3-NEXT: v_mov_b32_e32 v0, v2 ; GFX9-O3-NEXT: v_mov_b32_e32 v1, v3 ; GFX9-O3-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 offset:4 ; GFX9-O3-NEXT: s_addk_i32 s32, 0xf800 -; GFX9-O3-NEXT: s_mov_b32 s33, s14 -; GFX9-O3-NEXT: s_or_saveexec_b64 s[4:5], -1 +; GFX9-O3-NEXT: s_mov_b32 s33, s38 +; GFX9-O3-NEXT: s_or_saveexec_b64 s[30:31], -1 ; GFX9-O3-NEXT: buffer_load_dword v6, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX9-O3-NEXT: s_nop 0 ; GFX9-O3-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload @@ -685,9 +676,9 @@ define amdgpu_gfx void @strict_wwm_call_i64(<4 x i32> inreg %tmp14, i64 inreg %a ; GFX9-O3-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload ; GFX9-O3-NEXT: s_nop 0 ; GFX9-O3-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload -; GFX9-O3-NEXT: s_mov_b64 exec, s[4:5] +; GFX9-O3-NEXT: s_mov_b64 exec, s[30:31] ; GFX9-O3-NEXT: s_waitcnt vmcnt(0) -; GFX9-O3-NEXT: s_setpc_b64 s[10:11] +; GFX9-O3-NEXT: s_setpc_b64 s[36:37] %tmp107 = tail call i64 @llvm.amdgcn.set.inactive.i64(i64 %arg, i64 0) %tmp134 = call amdgpu_gfx i64 @strict_wwm_called_i64(i64 %tmp107) %tmp136 = add i64 %tmp134, %tmp107 @@ -701,38 +692,36 @@ define amdgpu_gfx void @strict_wwm_amdgpu_cs_main(<4 x i32> inreg %desc, i32 %in ; GFX9-O0-LABEL: strict_wwm_amdgpu_cs_main: ; GFX9-O0: ; %bb.0: ; GFX9-O0-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-O0-NEXT: s_or_saveexec_b64 s[8:9], -1 +; GFX9-O0-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_waitcnt vmcnt(0) ; GFX9-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; GFX9-O0-NEXT: s_mov_b64 exec, s[8:9] -; GFX9-O0-NEXT: s_mov_b32 s8, s7 -; GFX9-O0-NEXT: s_mov_b32 s9, s6 -; GFX9-O0-NEXT: s_mov_b32 s10, s5 -; GFX9-O0-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 -; GFX9-O0-NEXT: s_mov_b32 s5, s10 -; GFX9-O0-NEXT: s_mov_b32 s6, s9 -; GFX9-O0-NEXT: s_mov_b32 s7, s8 -; GFX9-O0-NEXT: ; kill: def $sgpr8_sgpr9_sgpr10_sgpr11 killed $sgpr4_sgpr5_sgpr6_sgpr7 -; GFX9-O0-NEXT: s_mov_b32 s8, 5 -; GFX9-O0-NEXT: v_lshlrev_b32_e64 v0, s8, v0 -; GFX9-O0-NEXT: s_mov_b32 s8, 0 -; GFX9-O0-NEXT: buffer_load_dwordx4 v[10:13], v0, s[4:7], s8 offen -; GFX9-O0-NEXT: buffer_load_dwordx2 v[3:4], v0, s[4:7], s8 offen offset:16 +; GFX9-O0-NEXT: s_mov_b64 exec, s[34:35] +; GFX9-O0-NEXT: s_mov_b32 s36, s4 +; GFX9-O0-NEXT: ; kill: def $sgpr36 killed $sgpr36 def $sgpr36_sgpr37_sgpr38_sgpr39 +; GFX9-O0-NEXT: s_mov_b32 s37, s5 +; GFX9-O0-NEXT: s_mov_b32 s38, s6 +; GFX9-O0-NEXT: s_mov_b32 s39, s7 +; GFX9-O0-NEXT: ; kill: def $sgpr40_sgpr41_sgpr42_sgpr43 killed $sgpr36_sgpr37_sgpr38_sgpr39 +; GFX9-O0-NEXT: s_mov_b32 s34, 5 +; GFX9-O0-NEXT: v_lshlrev_b32_e64 v0, s34, v0 +; GFX9-O0-NEXT: s_mov_b32 s34, 0 +; GFX9-O0-NEXT: buffer_load_dwordx4 v[10:13], v0, s[36:39], s34 offen +; GFX9-O0-NEXT: buffer_load_dwordx2 v[3:4], v0, s[36:39], s34 offen offset:16 ; GFX9-O0-NEXT: s_waitcnt vmcnt(1) ; GFX9-O0-NEXT: v_mov_b32_e32 v7, v11 ; GFX9-O0-NEXT: v_mov_b32_e32 v5, v10 ; GFX9-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec ; GFX9-O0-NEXT: v_mov_b32_e32 v6, v7 -; GFX9-O0-NEXT: s_mov_b32 s9, 0x7fffffff -; GFX9-O0-NEXT: s_mov_b32 s10, -1 -; GFX9-O0-NEXT: ; kill: def $sgpr10 killed $sgpr10 def $sgpr10_sgpr11 -; GFX9-O0-NEXT: s_mov_b32 s11, s9 +; GFX9-O0-NEXT: s_mov_b32 s35, 0x7fffffff +; GFX9-O0-NEXT: s_mov_b32 s40, -1 +; GFX9-O0-NEXT: ; kill: def $sgpr40 killed $sgpr40 def $sgpr40_sgpr41 +; GFX9-O0-NEXT: s_mov_b32 s41, s35 ; GFX9-O0-NEXT: v_mov_b32_e32 v1, v5 ; GFX9-O0-NEXT: v_mov_b32_e32 v2, v6 ; GFX9-O0-NEXT: s_not_b64 exec, exec -; GFX9-O0-NEXT: v_mov_b32_e32 v1, s10 -; GFX9-O0-NEXT: v_mov_b32_e32 v2, s11 +; GFX9-O0-NEXT: v_mov_b32_e32 v1, s40 +; GFX9-O0-NEXT: v_mov_b32_e32 v2, s41 ; GFX9-O0-NEXT: s_not_b64 exec, exec ; GFX9-O0-NEXT: v_mov_b32_e32 v8, v1 ; GFX9-O0-NEXT: v_mov_b32_e32 v9, v2 @@ -743,8 +732,8 @@ define amdgpu_gfx void @strict_wwm_amdgpu_cs_main(<4 x i32> inreg %desc, i32 %in ; GFX9-O0-NEXT: v_mov_b32_e32 v1, v5 ; GFX9-O0-NEXT: v_mov_b32_e32 v2, v6 ; GFX9-O0-NEXT: s_not_b64 exec, exec -; GFX9-O0-NEXT: v_mov_b32_e32 v1, s10 -; GFX9-O0-NEXT: v_mov_b32_e32 v2, s11 +; GFX9-O0-NEXT: v_mov_b32_e32 v1, s40 +; GFX9-O0-NEXT: v_mov_b32_e32 v2, s41 ; GFX9-O0-NEXT: s_not_b64 exec, exec ; GFX9-O0-NEXT: v_mov_b32_e32 v6, v1 ; GFX9-O0-NEXT: v_mov_b32_e32 v7, v2 @@ -752,8 +741,8 @@ define amdgpu_gfx void @strict_wwm_amdgpu_cs_main(<4 x i32> inreg %desc, i32 %in ; GFX9-O0-NEXT: v_mov_b32_e32 v1, v3 ; GFX9-O0-NEXT: v_mov_b32_e32 v2, v4 ; GFX9-O0-NEXT: s_not_b64 exec, exec -; GFX9-O0-NEXT: v_mov_b32_e32 v1, s10 -; GFX9-O0-NEXT: v_mov_b32_e32 v2, s11 +; GFX9-O0-NEXT: v_mov_b32_e32 v1, s40 +; GFX9-O0-NEXT: v_mov_b32_e32 v2, s41 ; GFX9-O0-NEXT: s_not_b64 exec, exec ; GFX9-O0-NEXT: v_mov_b32_e32 v3, v1 ; GFX9-O0-NEXT: v_mov_b32_e32 v4, v2 @@ -765,20 +754,20 @@ define amdgpu_gfx void @strict_wwm_amdgpu_cs_main(<4 x i32> inreg %desc, i32 %in ; GFX9-O0-NEXT: v_mov_b32_e32 v6, v11 ; GFX9-O0-NEXT: v_mov_b32_e32 v7, v10 ; GFX9-O0-NEXT: v_mov_b32_e32 v8, v9 -; GFX9-O0-NEXT: buffer_store_dwordx4 v[5:8], v0, s[4:7], s8 offen -; GFX9-O0-NEXT: buffer_store_dwordx2 v[3:4], v0, s[4:7], s8 offen offset:16 -; GFX9-O0-NEXT: s_or_saveexec_b64 s[4:5], -1 +; GFX9-O0-NEXT: buffer_store_dwordx4 v[5:8], v0, s[36:39], s34 offen +; GFX9-O0-NEXT: buffer_store_dwordx2 v[3:4], v0, s[36:39], s34 offen offset:16 +; GFX9-O0-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX9-O0-NEXT: s_nop 0 ; GFX9-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload -; GFX9-O0-NEXT: s_mov_b64 exec, s[4:5] +; GFX9-O0-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-O0-NEXT: s_waitcnt vmcnt(0) ; GFX9-O0-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-O3-LABEL: strict_wwm_amdgpu_cs_main: ; GFX9-O3: ; %bb.0: ; GFX9-O3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-O3-NEXT: s_or_saveexec_b64 s[8:9], -1 +; GFX9-O3-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-O3-NEXT: buffer_store_dword v1, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-O3-NEXT: s_waitcnt vmcnt(0) ; GFX9-O3-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill @@ -787,25 +776,25 @@ define amdgpu_gfx void @strict_wwm_amdgpu_cs_main(<4 x i32> inreg %desc, i32 %in ; GFX9-O3-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill ; GFX9-O3-NEXT: s_waitcnt vmcnt(0) ; GFX9-O3-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; GFX9-O3-NEXT: s_mov_b64 exec, s[8:9] +; GFX9-O3-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-O3-NEXT: v_lshlrev_b32_e32 v0, 5, v0 ; GFX9-O3-NEXT: buffer_load_dwordx4 v[1:4], v0, s[4:7], 0 offen ; GFX9-O3-NEXT: buffer_load_dwordx2 v[5:6], v0, s[4:7], 0 offen offset:16 -; GFX9-O3-NEXT: s_mov_b32 s8, -1 -; GFX9-O3-NEXT: s_brev_b32 s9, -2 +; GFX9-O3-NEXT: s_mov_b32 s34, -1 +; GFX9-O3-NEXT: s_brev_b32 s35, -2 ; GFX9-O3-NEXT: s_not_b64 exec, exec ; GFX9-O3-NEXT: s_waitcnt vmcnt(1) -; GFX9-O3-NEXT: v_mov_b32_e32 v1, s8 -; GFX9-O3-NEXT: v_mov_b32_e32 v2, s9 +; GFX9-O3-NEXT: v_mov_b32_e32 v1, s34 +; GFX9-O3-NEXT: v_mov_b32_e32 v2, s35 ; GFX9-O3-NEXT: s_not_b64 exec, exec ; GFX9-O3-NEXT: s_not_b64 exec, exec -; GFX9-O3-NEXT: v_mov_b32_e32 v3, s8 -; GFX9-O3-NEXT: v_mov_b32_e32 v4, s9 +; GFX9-O3-NEXT: v_mov_b32_e32 v3, s34 +; GFX9-O3-NEXT: v_mov_b32_e32 v4, s35 ; GFX9-O3-NEXT: s_not_b64 exec, exec ; GFX9-O3-NEXT: s_not_b64 exec, exec ; GFX9-O3-NEXT: s_waitcnt vmcnt(0) -; GFX9-O3-NEXT: v_mov_b32_e32 v5, s8 -; GFX9-O3-NEXT: v_mov_b32_e32 v6, s9 +; GFX9-O3-NEXT: v_mov_b32_e32 v5, s34 +; GFX9-O3-NEXT: v_mov_b32_e32 v6, s35 ; GFX9-O3-NEXT: s_not_b64 exec, exec ; GFX9-O3-NEXT: v_mov_b32_e32 v7, v1 ; GFX9-O3-NEXT: v_mov_b32_e32 v9, v3 @@ -815,7 +804,7 @@ define amdgpu_gfx void @strict_wwm_amdgpu_cs_main(<4 x i32> inreg %desc, i32 %in ; GFX9-O3-NEXT: v_mov_b32_e32 v12, v6 ; GFX9-O3-NEXT: buffer_store_dwordx4 v[7:10], v0, s[4:7], 0 offen ; GFX9-O3-NEXT: buffer_store_dwordx2 v[11:12], v0, s[4:7], 0 offen offset:16 -; GFX9-O3-NEXT: s_or_saveexec_b64 s[4:5], -1 +; GFX9-O3-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-O3-NEXT: buffer_load_dword v1, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX9-O3-NEXT: s_nop 0 ; GFX9-O3-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload @@ -827,7 +816,7 @@ define amdgpu_gfx void @strict_wwm_amdgpu_cs_main(<4 x i32> inreg %desc, i32 %in ; GFX9-O3-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload ; GFX9-O3-NEXT: s_nop 0 ; GFX9-O3-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload -; GFX9-O3-NEXT: s_mov_b64 exec, s[4:5] +; GFX9-O3-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-O3-NEXT: s_waitcnt vmcnt(0) ; GFX9-O3-NEXT: s_setpc_b64 s[30:31] %tmp17 = shl i32 %index, 5 diff --git a/llvm/test/CodeGen/ARM/O3-pipeline.ll b/llvm/test/CodeGen/ARM/O3-pipeline.ll index 885632838a83b..d0ef18240205c 100644 --- a/llvm/test/CodeGen/ARM/O3-pipeline.ll +++ b/llvm/test/CodeGen/ARM/O3-pipeline.ll @@ -151,12 +151,12 @@ ; CHECK-NEXT: Machine Natural Loop Construction ; CHECK-NEXT: Machine Block Frequency Analysis ; CHECK-NEXT: If Converter -; CHECK-NEXT: MVE VPT block insertion pass ; CHECK-NEXT: Thumb IT blocks insertion pass ; CHECK-NEXT: MachineDominator Tree Construction ; CHECK-NEXT: Machine Natural Loop Construction ; CHECK-NEXT: PostRA Machine Instruction Scheduler ; CHECK-NEXT: Post RA top-down list latency scheduler +; CHECK-NEXT: MVE VPT block insertion pass ; CHECK-NEXT: ARM Indirect Thunks ; CHECK-NEXT: ARM sls hardening pass ; CHECK-NEXT: Analyze Machine Code For Garbage Collection diff --git a/llvm/test/CodeGen/ARM/funnel-shift-rot.ll b/llvm/test/CodeGen/ARM/funnel-shift-rot.ll index 55157875d355f..de5bd2a7040b9 100644 --- a/llvm/test/CodeGen/ARM/funnel-shift-rot.ll +++ b/llvm/test/CodeGen/ARM/funnel-shift-rot.ll @@ -67,61 +67,24 @@ define i32 @rotl_i32(i32 %x, i32 %z) { } define i64 @rotl_i64(i64 %x, i64 %z) { -; SCALAR-LABEL: rotl_i64: -; SCALAR: @ %bb.0: -; SCALAR-NEXT: .save {r4, r5, r11, lr} -; SCALAR-NEXT: push {r4, r5, r11, lr} -; SCALAR-NEXT: rsb r3, r2, #0 -; SCALAR-NEXT: and r4, r2, #63 -; SCALAR-NEXT: and lr, r3, #63 -; SCALAR-NEXT: rsb r3, lr, #32 -; SCALAR-NEXT: lsl r2, r0, r4 -; SCALAR-NEXT: lsr r12, r0, lr -; SCALAR-NEXT: orr r3, r12, r1, lsl r3 -; SCALAR-NEXT: subs r12, lr, #32 -; SCALAR-NEXT: lsrpl r3, r1, r12 -; SCALAR-NEXT: subs r5, r4, #32 -; SCALAR-NEXT: movwpl r2, #0 -; SCALAR-NEXT: cmp r5, #0 -; SCALAR-NEXT: orr r2, r2, r3 -; SCALAR-NEXT: rsb r3, r4, #32 -; SCALAR-NEXT: lsr r3, r0, r3 -; SCALAR-NEXT: orr r3, r3, r1, lsl r4 -; SCALAR-NEXT: lslpl r3, r0, r5 -; SCALAR-NEXT: lsr r0, r1, lr -; SCALAR-NEXT: cmp r12, #0 -; SCALAR-NEXT: movwpl r0, #0 -; SCALAR-NEXT: orr r1, r3, r0 -; SCALAR-NEXT: mov r0, r2 -; SCALAR-NEXT: pop {r4, r5, r11, pc} -; -; NEON-LABEL: rotl_i64: -; NEON: @ %bb.0: -; NEON-NEXT: .save {r4, r5, r11, lr} -; NEON-NEXT: push {r4, r5, r11, lr} -; NEON-NEXT: and r12, r2, #63 -; NEON-NEXT: rsb r2, r2, #0 -; NEON-NEXT: rsb r3, r12, #32 -; NEON-NEXT: and r4, r2, #63 -; NEON-NEXT: subs lr, r12, #32 -; NEON-NEXT: lsr r3, r0, r3 -; NEON-NEXT: lsr r2, r1, r4 -; NEON-NEXT: orr r3, r3, r1, lsl r12 -; NEON-NEXT: lslpl r3, r0, lr -; NEON-NEXT: subs r5, r4, #32 -; NEON-NEXT: movwpl r2, #0 -; NEON-NEXT: cmp r5, #0 -; NEON-NEXT: orr r2, r3, r2 -; NEON-NEXT: lsr r3, r0, r4 -; NEON-NEXT: rsb r4, r4, #32 -; NEON-NEXT: lsl r0, r0, r12 -; NEON-NEXT: orr r3, r3, r1, lsl r4 -; NEON-NEXT: lsrpl r3, r1, r5 -; NEON-NEXT: cmp lr, #0 -; NEON-NEXT: movwpl r0, #0 -; NEON-NEXT: mov r1, r2 -; NEON-NEXT: orr r0, r0, r3 -; NEON-NEXT: pop {r4, r5, r11, pc} +; CHECK-LABEL: rotl_i64: +; CHECK: @ %bb.0: +; CHECK-NEXT: .save {r4, lr} +; CHECK-NEXT: push {r4, lr} +; CHECK-NEXT: ands r3, r2, #32 +; CHECK-NEXT: and r12, r2, #31 +; CHECK-NEXT: mov r3, r0 +; CHECK-NEXT: mov r4, #31 +; CHECK-NEXT: movne r3, r1 +; CHECK-NEXT: movne r1, r0 +; CHECK-NEXT: bic r2, r4, r2 +; CHECK-NEXT: lsl lr, r3, r12 +; CHECK-NEXT: lsr r0, r1, #1 +; CHECK-NEXT: lsl r1, r1, r12 +; CHECK-NEXT: lsr r3, r3, #1 +; CHECK-NEXT: orr r0, lr, r0, lsr r2 +; CHECK-NEXT: orr r1, r1, r3, lsr r2 +; CHECK-NEXT: pop {r4, pc} %f = call i64 @llvm.fshl.i64(i64 %x, i64 %x, i64 %z) ret i64 %f } @@ -243,31 +206,21 @@ define i32 @rotr_i32(i32 %x, i32 %z) { define i64 @rotr_i64(i64 %x, i64 %z) { ; CHECK-LABEL: rotr_i64: ; CHECK: @ %bb.0: -; CHECK-NEXT: .save {r4, r5, r11, lr} -; CHECK-NEXT: push {r4, r5, r11, lr} -; CHECK-NEXT: and lr, r2, #63 -; CHECK-NEXT: rsb r2, r2, #0 -; CHECK-NEXT: rsb r3, lr, #32 -; CHECK-NEXT: and r4, r2, #63 -; CHECK-NEXT: lsr r12, r0, lr -; CHECK-NEXT: orr r3, r12, r1, lsl r3 -; CHECK-NEXT: subs r12, lr, #32 -; CHECK-NEXT: lsl r2, r0, r4 -; CHECK-NEXT: lsrpl r3, r1, r12 -; CHECK-NEXT: subs r5, r4, #32 -; CHECK-NEXT: movwpl r2, #0 -; CHECK-NEXT: cmp r5, #0 -; CHECK-NEXT: orr r2, r3, r2 -; CHECK-NEXT: rsb r3, r4, #32 -; CHECK-NEXT: lsr r3, r0, r3 -; CHECK-NEXT: orr r3, r3, r1, lsl r4 -; CHECK-NEXT: lslpl r3, r0, r5 -; CHECK-NEXT: lsr r0, r1, lr -; CHECK-NEXT: cmp r12, #0 -; CHECK-NEXT: movwpl r0, #0 -; CHECK-NEXT: orr r1, r0, r3 -; CHECK-NEXT: mov r0, r2 -; CHECK-NEXT: pop {r4, r5, r11, pc} +; CHECK-NEXT: ands r3, r2, #32 +; CHECK-NEXT: mov r3, r1 +; CHECK-NEXT: moveq r3, r0 +; CHECK-NEXT: moveq r0, r1 +; CHECK-NEXT: mov r1, #31 +; CHECK-NEXT: lsl r12, r0, #1 +; CHECK-NEXT: bic r1, r1, r2 +; CHECK-NEXT: and r2, r2, #31 +; CHECK-NEXT: lsl r12, r12, r1 +; CHECK-NEXT: orr r12, r12, r3, lsr r2 +; CHECK-NEXT: lsl r3, r3, #1 +; CHECK-NEXT: lsl r1, r3, r1 +; CHECK-NEXT: orr r1, r1, r0, lsr r2 +; CHECK-NEXT: mov r0, r12 +; CHECK-NEXT: bx lr %f = call i64 @llvm.fshr.i64(i64 %x, i64 %x, i64 %z) ret i64 %f } diff --git a/llvm/test/CodeGen/ARM/funnel-shift.ll b/llvm/test/CodeGen/ARM/funnel-shift.ll index 54c93b493c981..25e210d819147 100644 --- a/llvm/test/CodeGen/ARM/funnel-shift.ll +++ b/llvm/test/CodeGen/ARM/funnel-shift.ll @@ -45,46 +45,69 @@ define i32 @fshl_i32(i32 %x, i32 %y, i32 %z) { ; Verify that weird types are minimally supported. declare i37 @llvm.fshl.i37(i37, i37, i37) define i37 @fshl_i37(i37 %x, i37 %y, i37 %z) { -; CHECK-LABEL: fshl_i37: -; CHECK: @ %bb.0: -; CHECK-NEXT: .save {r4, r5, r6, r7, r8, lr} -; CHECK-NEXT: push {r4, r5, r6, r7, r8, lr} -; CHECK-NEXT: mov r8, r1 -; CHECK-NEXT: mov r4, r0 -; CHECK-NEXT: ldr r0, [sp, #24] -; CHECK-NEXT: mov r6, r3 -; CHECK-NEXT: ldr r1, [sp, #28] -; CHECK-NEXT: mov r7, r2 -; CHECK-NEXT: mov r2, #37 -; CHECK-NEXT: mov r3, #0 -; CHECK-NEXT: bl __aeabi_uldivmod -; CHECK-NEXT: mov r0, #63 -; CHECK-NEXT: bic r1, r0, r2 -; CHECK-NEXT: lsl r0, r6, #27 -; CHECK-NEXT: lsl r3, r7, #27 -; CHECK-NEXT: orr r0, r0, r7, lsr #5 -; CHECK-NEXT: and r2, r2, #63 -; CHECK-NEXT: lsrs r7, r0, #1 -; CHECK-NEXT: rrx r0, r3 -; CHECK-NEXT: rsb r3, r1, #32 -; CHECK-NEXT: lsr r0, r0, r1 -; CHECK-NEXT: lsl r6, r4, r2 -; CHECK-NEXT: orr r0, r0, r7, lsl r3 -; CHECK-NEXT: subs r3, r1, #32 -; CHECK-NEXT: lsr r1, r7, r1 -; CHECK-NEXT: lsrpl r0, r7, r3 -; CHECK-NEXT: subs r5, r2, #32 -; CHECK-NEXT: movwpl r6, #0 -; CHECK-NEXT: orr r0, r6, r0 -; CHECK-NEXT: rsb r6, r2, #32 -; CHECK-NEXT: cmp r5, #0 -; CHECK-NEXT: lsr r6, r4, r6 -; CHECK-NEXT: orr r2, r6, r8, lsl r2 -; CHECK-NEXT: lslpl r2, r4, r5 -; CHECK-NEXT: cmp r3, #0 -; CHECK-NEXT: movwpl r1, #0 -; CHECK-NEXT: orr r1, r2, r1 -; CHECK-NEXT: pop {r4, r5, r6, r7, r8, pc} +; SCALAR-LABEL: fshl_i37: +; SCALAR: @ %bb.0: +; SCALAR-NEXT: .save {r4, r5, r6, r7, r8, lr} +; SCALAR-NEXT: push {r4, r5, r6, r7, r8, lr} +; SCALAR-NEXT: mov r4, r1 +; SCALAR-NEXT: mov r8, r0 +; SCALAR-NEXT: ldr r0, [sp, #24] +; SCALAR-NEXT: mov r5, r3 +; SCALAR-NEXT: ldr r1, [sp, #28] +; SCALAR-NEXT: mov r6, r2 +; SCALAR-NEXT: mov r2, #37 +; SCALAR-NEXT: mov r3, #0 +; SCALAR-NEXT: bl __aeabi_uldivmod +; SCALAR-NEXT: lsl r1, r5, #27 +; SCALAR-NEXT: ands r12, r2, #32 +; SCALAR-NEXT: orr r1, r1, r6, lsr #5 +; SCALAR-NEXT: mov r3, r8 +; SCALAR-NEXT: and r5, r2, #31 +; SCALAR-NEXT: mov r0, #31 +; SCALAR-NEXT: movne r3, r1 +; SCALAR-NEXT: cmp r12, #0 +; SCALAR-NEXT: bic r2, r0, r2 +; SCALAR-NEXT: lslne r1, r6, #27 +; SCALAR-NEXT: movne r4, r8 +; SCALAR-NEXT: lsl r7, r3, r5 +; SCALAR-NEXT: lsr r0, r1, #1 +; SCALAR-NEXT: lsl r1, r4, r5 +; SCALAR-NEXT: lsr r3, r3, #1 +; SCALAR-NEXT: orr r0, r7, r0, lsr r2 +; SCALAR-NEXT: orr r1, r1, r3, lsr r2 +; SCALAR-NEXT: pop {r4, r5, r6, r7, r8, pc} +; +; NEON-LABEL: fshl_i37: +; NEON: @ %bb.0: +; NEON-NEXT: .save {r4, r5, r6, r7, r11, lr} +; NEON-NEXT: push {r4, r5, r6, r7, r11, lr} +; NEON-NEXT: mov r4, r1 +; NEON-NEXT: mov r5, r0 +; NEON-NEXT: ldr r0, [sp, #24] +; NEON-NEXT: mov r7, r3 +; NEON-NEXT: ldr r1, [sp, #28] +; NEON-NEXT: mov r6, r2 +; NEON-NEXT: mov r2, #37 +; NEON-NEXT: mov r3, #0 +; NEON-NEXT: bl __aeabi_uldivmod +; NEON-NEXT: mov r0, #31 +; NEON-NEXT: bic r1, r0, r2 +; NEON-NEXT: lsl r0, r7, #27 +; NEON-NEXT: ands r12, r2, #32 +; NEON-NEXT: orr r0, r0, r6, lsr #5 +; NEON-NEXT: mov r7, r5 +; NEON-NEXT: and r2, r2, #31 +; NEON-NEXT: movne r7, r0 +; NEON-NEXT: lslne r0, r6, #27 +; NEON-NEXT: cmp r12, #0 +; NEON-NEXT: lsl r3, r7, r2 +; NEON-NEXT: lsr r0, r0, #1 +; NEON-NEXT: movne r4, r5 +; NEON-NEXT: orr r0, r3, r0, lsr r1 +; NEON-NEXT: lsr r3, r7, #1 +; NEON-NEXT: lsl r2, r4, r2 +; NEON-NEXT: orr r1, r2, r3, lsr r1 +; NEON-NEXT: pop {r4, r5, r6, r7, r11, pc} %f = call i37 @llvm.fshl.i37(i37 %x, i37 %y, i37 %z) ret i37 %f } @@ -157,8 +180,8 @@ define i32 @fshl_i32_const_overshift(i32 %x, i32 %y) { define i64 @fshl_i64_const_overshift(i64 %x, i64 %y) { ; CHECK-LABEL: fshl_i64_const_overshift: ; CHECK: @ %bb.0: -; CHECK-NEXT: lsr r1, r2, #23 -; CHECK-NEXT: orr r2, r1, r3, lsl #9 +; CHECK-NEXT: lsl r1, r3, #9 +; CHECK-NEXT: orr r2, r1, r2, lsr #23 ; CHECK-NEXT: lsl r0, r0, #9 ; CHECK-NEXT: orr r1, r0, r3, lsr #23 ; CHECK-NEXT: mov r0, r2 @@ -212,46 +235,36 @@ declare i37 @llvm.fshr.i37(i37, i37, i37) define i37 @fshr_i37(i37 %x, i37 %y, i37 %z) { ; CHECK-LABEL: fshr_i37: ; CHECK: @ %bb.0: -; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r11, lr} -; CHECK-NEXT: push {r4, r5, r6, r7, r8, r9, r11, lr} -; CHECK-NEXT: mov r8, r1 -; CHECK-NEXT: mov r9, r0 -; CHECK-NEXT: ldr r0, [sp, #32] -; CHECK-NEXT: mov r6, r3 -; CHECK-NEXT: ldr r1, [sp, #36] +; CHECK-NEXT: .save {r4, r5, r6, r7, r11, lr} +; CHECK-NEXT: push {r4, r5, r6, r7, r11, lr} +; CHECK-NEXT: mov r4, r1 +; CHECK-NEXT: mov r6, r0 +; CHECK-NEXT: ldr r0, [sp, #24] +; CHECK-NEXT: mov r5, r3 +; CHECK-NEXT: ldr r1, [sp, #28] ; CHECK-NEXT: mov r7, r2 ; CHECK-NEXT: mov r2, #37 ; CHECK-NEXT: mov r3, #0 ; CHECK-NEXT: bl __aeabi_uldivmod +; CHECK-NEXT: lsl r3, r5, #27 ; CHECK-NEXT: add r0, r2, #27 -; CHECK-NEXT: lsl r6, r6, #27 -; CHECK-NEXT: and r1, r0, #63 -; CHECK-NEXT: lsl r2, r7, #27 -; CHECK-NEXT: orr r7, r6, r7, lsr #5 -; CHECK-NEXT: mov r6, #63 -; CHECK-NEXT: rsb r3, r1, #32 -; CHECK-NEXT: lsr r2, r2, r1 -; CHECK-NEXT: subs r12, r1, #32 -; CHECK-NEXT: bic r6, r6, r0 -; CHECK-NEXT: orr r2, r2, r7, lsl r3 -; CHECK-NEXT: lsl r5, r9, #1 -; CHECK-NEXT: lsrpl r2, r7, r12 -; CHECK-NEXT: lsl r0, r5, r6 -; CHECK-NEXT: subs r4, r6, #32 -; CHECK-NEXT: lsl r3, r8, #1 -; CHECK-NEXT: movwpl r0, #0 -; CHECK-NEXT: orr r3, r3, r9, lsr #31 -; CHECK-NEXT: orr r0, r0, r2 -; CHECK-NEXT: rsb r2, r6, #32 -; CHECK-NEXT: cmp r4, #0 -; CHECK-NEXT: lsr r1, r7, r1 -; CHECK-NEXT: lsr r2, r5, r2 -; CHECK-NEXT: orr r2, r2, r3, lsl r6 -; CHECK-NEXT: lslpl r2, r5, r4 +; CHECK-NEXT: orr r3, r3, r7, lsr #5 +; CHECK-NEXT: mov r1, #31 +; CHECK-NEXT: ands r12, r0, #32 +; CHECK-NEXT: mov r5, r6 +; CHECK-NEXT: moveq r5, r3 +; CHECK-NEXT: bic r1, r1, r0 +; CHECK-NEXT: lsl r2, r5, #1 +; CHECK-NEXT: lsleq r3, r7, #27 ; CHECK-NEXT: cmp r12, #0 -; CHECK-NEXT: movwpl r1, #0 -; CHECK-NEXT: orr r1, r2, r1 -; CHECK-NEXT: pop {r4, r5, r6, r7, r8, r9, r11, pc} +; CHECK-NEXT: and r7, r0, #31 +; CHECK-NEXT: lsl r2, r2, r1 +; CHECK-NEXT: moveq r4, r6 +; CHECK-NEXT: orr r0, r2, r3, lsr r7 +; CHECK-NEXT: lsl r2, r4, #1 +; CHECK-NEXT: lsl r1, r2, r1 +; CHECK-NEXT: orr r1, r1, r5, lsr r7 +; CHECK-NEXT: pop {r4, r5, r6, r7, r11, pc} %f = call i37 @llvm.fshr.i37(i37 %x, i37 %y, i37 %z) ret i37 %f } diff --git a/llvm/test/CodeGen/ARM/icmp-shift-opt.ll b/llvm/test/CodeGen/ARM/icmp-shift-opt.ll index d2514b1608016..1d0701875cedc 100644 --- a/llvm/test/CodeGen/ARM/icmp-shift-opt.ll +++ b/llvm/test/CodeGen/ARM/icmp-shift-opt.ll @@ -148,4 +148,23 @@ define i1 @opt_setcc_expanded_shl_wrong_shifts(i32 %a, i32 %b) nounwind { ret i1 %cmp } +define i1 @opt_setcc_shl_ne_zero_i128(i128 %a) nounwind { +; CHECK-LABEL: opt_setcc_shl_ne_zero_i128: +; CHECK: @ %bb.0: +; CHECK-NEXT: lsl r3, r3, #17 +; CHECK-NEXT: orr r12, r3, r2, lsr #15 +; CHECK-NEXT: lsl r3, r1, #17 +; CHECK-NEXT: lsl r2, r2, #17 +; CHECK-NEXT: orr r3, r3, r0, lsr #15 +; CHECK-NEXT: orr r1, r2, r1, lsr #15 +; CHECK-NEXT: orr r3, r3, r12 +; CHECK-NEXT: orr r0, r1, r0, lsl #17 +; CHECK-NEXT: orrs r0, r0, r3 +; CHECK-NEXT: movwne r0, #1 +; CHECK-NEXT: bx lr + %shl = shl i128 %a, 17 + %cmp = icmp ne i128 %shl, 0 + ret i1 %cmp +} + declare void @use(i64 %a) diff --git a/llvm/test/CodeGen/ARM/inlineasm-operand-implicit-cast.ll b/llvm/test/CodeGen/ARM/inlineasm-operand-implicit-cast.ll index 8ae9f704fb9f4..61e2872071a71 100644 --- a/llvm/test/CodeGen/ARM/inlineasm-operand-implicit-cast.ll +++ b/llvm/test/CodeGen/ARM/inlineasm-operand-implicit-cast.ll @@ -1,4 +1,5 @@ ; RUN: llc -mtriple armv7-arm-linux-gnueabihf -O2 -mcpu=cortex-a7 < %s | FileCheck %s +; RUN: llc -mtriple armv7-arm-linux-gnueabihf -O2 -mcpu=cortex-a7 -early-live-intervals < %s | FileCheck %s %struct.twofloat = type { float, float } %struct.twodouble = type { double, double } diff --git a/llvm/test/CodeGen/ARM/shift-combine.ll b/llvm/test/CodeGen/ARM/shift-combine.ll index dbd9a4183039a..de1beb740bfbd 100644 --- a/llvm/test/CodeGen/ARM/shift-combine.ll +++ b/llvm/test/CodeGen/ARM/shift-combine.ll @@ -1,20 +1,42 @@ -; RUN: llc -mtriple=armv7-linux-gnueabihf %s -o - | FileCheck %s --check-prefix=CHECK-ARM --check-prefix=CHECK-COMMON -; RUN: llc -mtriple=armv7eb-linux-gnueabihf %s -o - | FileCheck %s --check-prefix=CHECK-BE -; RUN: llc -mtriple=thumbv7-linux-gnueabihf %s -o - | FileCheck %s --check-prefix=CHECK-THUMB --check-prefix=CHECK-COMMON -; RUN: llc -mtriple=thumbv7m %s -o - | FileCheck %s --check-prefix=CHECK-THUMB --check-prefix=CHECK-COMMON -; RUN: llc -mtriple=thumbv7m -mattr=+strict-align %s -o - | FileCheck %s --check-prefix=CHECK-ALIGN --check-prefix=CHECK-COMMON +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=armv7-linux-gnueabihf %s -o - | FileCheck %s --check-prefixes=CHECK-COMMON,CHECK-ARM +; RUN: llc -mtriple=armv7eb-linux-gnueabihf %s -o - | FileCheck %s --check-prefixes=CHECK-BE +; RUN: llc -mtriple=thumbv7-linux-gnueabihf %s -o - | FileCheck %s --check-prefixes=CHECK-COMMON,CHECK-THUMB +; RUN: llc -mtriple=thumbv7m %s -o - | FileCheck %s --check-prefixes=CHECK-COMMON,CHECK-THUMB +; RUN: llc -mtriple=thumbv7m -mattr=+strict-align %s -o - | FileCheck %s --check-prefixes=CHECK-COMMON,CHECK-ALIGN ; RUN: llc -mtriple=thumbv6m %s -o - | FileCheck %s --check-prefix=CHECK-V6M @array = weak global [4 x i32] zeroinitializer define i32 @test_lshr_and1(i32 %x) { +; CHECK-COMMON-LABEL: test_lshr_and1: +; CHECK-COMMON: @ %bb.0: @ %entry +; CHECK-COMMON-NEXT: movw r1, :lower16:array +; CHECK-COMMON-NEXT: and r0, r0, #12 +; CHECK-COMMON-NEXT: movt r1, :upper16:array +; CHECK-COMMON-NEXT: ldr r0, [r1, r0] +; CHECK-COMMON-NEXT: bx lr +; +; CHECK-BE-LABEL: test_lshr_and1: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: movw r1, :lower16:array +; CHECK-BE-NEXT: and r0, r0, #12 +; CHECK-BE-NEXT: movt r1, :upper16:array +; CHECK-BE-NEXT: ldr r0, [r1, r0] +; CHECK-BE-NEXT: bx lr +; +; CHECK-V6M-LABEL: test_lshr_and1: +; CHECK-V6M: @ %bb.0: @ %entry +; CHECK-V6M-NEXT: movs r1, #12 +; CHECK-V6M-NEXT: ands r1, r0 +; CHECK-V6M-NEXT: ldr r0, .LCPI0_0 +; CHECK-V6M-NEXT: ldr r0, [r0, r1] +; CHECK-V6M-NEXT: bx lr +; CHECK-V6M-NEXT: .p2align 2 +; CHECK-V6M-NEXT: @ %bb.1: +; CHECK-V6M-NEXT: .LCPI0_0: +; CHECK-V6M-NEXT: .long array entry: -;CHECK-LABEL: test_lshr_and1: -;CHECK-COMMON: movw r1, :lower16:array -;CHECK-COMMON-NEXT: and r0, r0, #12 -;CHECK-COMMON-NEXT: movt r1, :upper16:array -;CHECK-COMMON-NEXT: ldr r0, [r1, r0] -;CHECK-COMMON-NEXT: bx lr %tmp2 = lshr i32 %x, 2 %tmp3 = and i32 %tmp2, 3 %tmp4 = getelementptr [4 x i32], [4 x i32]* @array, i32 0, i32 %tmp3 @@ -22,12 +44,37 @@ entry: ret i32 %tmp5 } define i32 @test_lshr_and2(i32 %x) { +; CHECK-ARM-LABEL: test_lshr_and2: +; CHECK-ARM: @ %bb.0: @ %entry +; CHECK-ARM-NEXT: ubfx r0, r0, #1, #15 +; CHECK-ARM-NEXT: add r0, r0, r0 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-BE-LABEL: test_lshr_and2: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: ubfx r0, r0, #1, #15 +; CHECK-BE-NEXT: add r0, r0, r0 +; CHECK-BE-NEXT: bx lr +; +; CHECK-THUMB-LABEL: test_lshr_and2: +; CHECK-THUMB: @ %bb.0: @ %entry +; CHECK-THUMB-NEXT: ubfx r0, r0, #1, #15 +; CHECK-THUMB-NEXT: add r0, r0 +; CHECK-THUMB-NEXT: bx lr +; +; CHECK-ALIGN-LABEL: test_lshr_and2: +; CHECK-ALIGN: @ %bb.0: @ %entry +; CHECK-ALIGN-NEXT: ubfx r0, r0, #1, #15 +; CHECK-ALIGN-NEXT: add r0, r0 +; CHECK-ALIGN-NEXT: bx lr +; +; CHECK-V6M-LABEL: test_lshr_and2: +; CHECK-V6M: @ %bb.0: @ %entry +; CHECK-V6M-NEXT: lsls r0, r0, #16 +; CHECK-V6M-NEXT: lsrs r0, r0, #17 +; CHECK-V6M-NEXT: adds r0, r0, r0 +; CHECK-V6M-NEXT: bx lr entry: -;CHECK-LABEL: test_lshr_and2: -;CHECK-COMMON: ubfx r0, r0, #1, #15 -;CHECK-ARM: add r0, r0, r0 -;CHECK-THUMB: add r0, r0 -;CHECK-COMMON: bx lr %a = and i32 %x, 65534 %b = lshr i32 %a, 1 %c = and i32 %x, 65535 @@ -36,11 +83,21 @@ entry: ret i32 %e } -; CHECK-LABEL: test_lshr_load1 -; CHECK-BE: ldrb r0, [r0] -; CHECK-COMMON: ldrb r0, [r0, #1] -; CHECK-COMMON-NEXT: bx define arm_aapcscc i32 @test_lshr_load1(i16* %a) { +; CHECK-COMMON-LABEL: test_lshr_load1: +; CHECK-COMMON: @ %bb.0: @ %entry +; CHECK-COMMON-NEXT: ldrb r0, [r0, #1] +; CHECK-COMMON-NEXT: bx lr +; +; CHECK-BE-LABEL: test_lshr_load1: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: ldrb r0, [r0] +; CHECK-BE-NEXT: bx lr +; +; CHECK-V6M-LABEL: test_lshr_load1: +; CHECK-V6M: @ %bb.0: @ %entry +; CHECK-V6M-NEXT: ldrb r0, [r0, #1] +; CHECK-V6M-NEXT: bx lr entry: %0 = load i16, i16* %a, align 2 %conv1 = zext i16 %0 to i32 @@ -48,13 +105,37 @@ entry: ret i32 %1 } -; CHECK-LABEL: test_lshr_load1_sext -; CHECK-ARM: ldrsh r0, [r0] -; CHECK-ARM-NEXT: lsr r0, r0, #8 -; CHECK-THUMB: ldrsh.w r0, [r0] -; CHECK-THUMB-NEXT: lsrs r0, r0, #8 -; CHECK-COMMON: bx define arm_aapcscc i32 @test_lshr_load1_sext(i16* %a) { +; CHECK-ARM-LABEL: test_lshr_load1_sext: +; CHECK-ARM: @ %bb.0: @ %entry +; CHECK-ARM-NEXT: ldrsh r0, [r0] +; CHECK-ARM-NEXT: lsr r0, r0, #8 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-BE-LABEL: test_lshr_load1_sext: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: ldrsh r0, [r0] +; CHECK-BE-NEXT: lsr r0, r0, #8 +; CHECK-BE-NEXT: bx lr +; +; CHECK-THUMB-LABEL: test_lshr_load1_sext: +; CHECK-THUMB: @ %bb.0: @ %entry +; CHECK-THUMB-NEXT: ldrsh.w r0, [r0] +; CHECK-THUMB-NEXT: lsrs r0, r0, #8 +; CHECK-THUMB-NEXT: bx lr +; +; CHECK-ALIGN-LABEL: test_lshr_load1_sext: +; CHECK-ALIGN: @ %bb.0: @ %entry +; CHECK-ALIGN-NEXT: ldrsh.w r0, [r0] +; CHECK-ALIGN-NEXT: lsrs r0, r0, #8 +; CHECK-ALIGN-NEXT: bx lr +; +; CHECK-V6M-LABEL: test_lshr_load1_sext: +; CHECK-V6M: @ %bb.0: @ %entry +; CHECK-V6M-NEXT: movs r1, #0 +; CHECK-V6M-NEXT: ldrsh r0, [r0, r1] +; CHECK-V6M-NEXT: lsrs r0, r0, #8 +; CHECK-V6M-NEXT: bx lr entry: %0 = load i16, i16* %a, align 2 %conv1 = sext i16 %0 to i32 @@ -62,12 +143,36 @@ entry: ret i32 %1 } -; CHECK-LABEL: test_lshr_load1_fail -; CHECK-COMMON: ldrh r0, [r0] -; CHECK-ARM: lsr r0, r0, #9 -; CHECK-THUMB: lsrs r0, r0, #9 -; CHECK-COMMON: bx define arm_aapcscc i32 @test_lshr_load1_fail(i16* %a) { +; CHECK-ARM-LABEL: test_lshr_load1_fail: +; CHECK-ARM: @ %bb.0: @ %entry +; CHECK-ARM-NEXT: ldrh r0, [r0] +; CHECK-ARM-NEXT: lsr r0, r0, #9 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-BE-LABEL: test_lshr_load1_fail: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: ldrh r0, [r0] +; CHECK-BE-NEXT: lsr r0, r0, #9 +; CHECK-BE-NEXT: bx lr +; +; CHECK-THUMB-LABEL: test_lshr_load1_fail: +; CHECK-THUMB: @ %bb.0: @ %entry +; CHECK-THUMB-NEXT: ldrh r0, [r0] +; CHECK-THUMB-NEXT: lsrs r0, r0, #9 +; CHECK-THUMB-NEXT: bx lr +; +; CHECK-ALIGN-LABEL: test_lshr_load1_fail: +; CHECK-ALIGN: @ %bb.0: @ %entry +; CHECK-ALIGN-NEXT: ldrh r0, [r0] +; CHECK-ALIGN-NEXT: lsrs r0, r0, #9 +; CHECK-ALIGN-NEXT: bx lr +; +; CHECK-V6M-LABEL: test_lshr_load1_fail: +; CHECK-V6M: @ %bb.0: @ %entry +; CHECK-V6M-NEXT: ldrh r0, [r0] +; CHECK-V6M-NEXT: lsrs r0, r0, #9 +; CHECK-V6M-NEXT: bx lr entry: %0 = load i16, i16* %a, align 2 %conv1 = zext i16 %0 to i32 @@ -75,69 +180,152 @@ entry: ret i32 %1 } -; CHECK-LABEL: test_lshr_load32 -; CHECK-COMMON: ldr r0, [r0] -; CHECK-ARM: lsr r0, r0, #8 -; CHECK-THUMB: lsrs r0, r0, #8 -; CHECK-COMMON: bx define arm_aapcscc i32 @test_lshr_load32(i32* %a) { +; CHECK-ARM-LABEL: test_lshr_load32: +; CHECK-ARM: @ %bb.0: @ %entry +; CHECK-ARM-NEXT: ldr r0, [r0] +; CHECK-ARM-NEXT: lsr r0, r0, #8 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-BE-LABEL: test_lshr_load32: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: ldr r0, [r0] +; CHECK-BE-NEXT: lsr r0, r0, #8 +; CHECK-BE-NEXT: bx lr +; +; CHECK-THUMB-LABEL: test_lshr_load32: +; CHECK-THUMB: @ %bb.0: @ %entry +; CHECK-THUMB-NEXT: ldr r0, [r0] +; CHECK-THUMB-NEXT: lsrs r0, r0, #8 +; CHECK-THUMB-NEXT: bx lr +; +; CHECK-ALIGN-LABEL: test_lshr_load32: +; CHECK-ALIGN: @ %bb.0: @ %entry +; CHECK-ALIGN-NEXT: ldr r0, [r0] +; CHECK-ALIGN-NEXT: lsrs r0, r0, #8 +; CHECK-ALIGN-NEXT: bx lr +; +; CHECK-V6M-LABEL: test_lshr_load32: +; CHECK-V6M: @ %bb.0: @ %entry +; CHECK-V6M-NEXT: ldr r0, [r0] +; CHECK-V6M-NEXT: lsrs r0, r0, #8 +; CHECK-V6M-NEXT: bx lr entry: %0 = load i32, i32* %a, align 4 %1 = lshr i32 %0, 8 ret i32 %1 } -; CHECK-LABEL: test_lshr_load32_2 -; CHECK-BE: ldrh r0, [r0] -; CHECK-COMMON: ldrh r0, [r0, #2] -; CHECK-COMMON-NEXT: bx define arm_aapcscc i32 @test_lshr_load32_2(i32* %a) { +; CHECK-COMMON-LABEL: test_lshr_load32_2: +; CHECK-COMMON: @ %bb.0: @ %entry +; CHECK-COMMON-NEXT: ldrh r0, [r0, #2] +; CHECK-COMMON-NEXT: bx lr +; +; CHECK-BE-LABEL: test_lshr_load32_2: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: ldrh r0, [r0] +; CHECK-BE-NEXT: bx lr +; +; CHECK-V6M-LABEL: test_lshr_load32_2: +; CHECK-V6M: @ %bb.0: @ %entry +; CHECK-V6M-NEXT: ldrh r0, [r0, #2] +; CHECK-V6M-NEXT: bx lr entry: %0 = load i32, i32* %a, align 4 %1 = lshr i32 %0, 16 ret i32 %1 } -; CHECK-LABEL: test_lshr_load32_1 -; CHECK-BE: ldrb r0, [r0] -; CHECK-COMMON: ldrb r0, [r0, #3] -; CHECK-COMMON-NEXT: bx define arm_aapcscc i32 @test_lshr_load32_1(i32* %a) { +; CHECK-COMMON-LABEL: test_lshr_load32_1: +; CHECK-COMMON: @ %bb.0: @ %entry +; CHECK-COMMON-NEXT: ldrb r0, [r0, #3] +; CHECK-COMMON-NEXT: bx lr +; +; CHECK-BE-LABEL: test_lshr_load32_1: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: ldrb r0, [r0] +; CHECK-BE-NEXT: bx lr +; +; CHECK-V6M-LABEL: test_lshr_load32_1: +; CHECK-V6M: @ %bb.0: @ %entry +; CHECK-V6M-NEXT: ldrb r0, [r0, #3] +; CHECK-V6M-NEXT: bx lr entry: %0 = load i32, i32* %a, align 4 %1 = lshr i32 %0, 24 ret i32 %1 } -; CHECK-LABEL: test_lshr_load32_fail -; CHECK-BE: ldr r0, [r0] -; CHECK-BE-NEXT: lsr r0, r0, #15 -; CHECK-COMMON: ldr r0, [r0] -; CHECK-ARM: lsr r0, r0, #15 -; CHECK-THUMB: lsrs r0, r0, #15 -; CHECK-COMMON: bx define arm_aapcscc i32 @test_lshr_load32_fail(i32* %a) { +; CHECK-ARM-LABEL: test_lshr_load32_fail: +; CHECK-ARM: @ %bb.0: @ %entry +; CHECK-ARM-NEXT: ldr r0, [r0] +; CHECK-ARM-NEXT: lsr r0, r0, #15 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-BE-LABEL: test_lshr_load32_fail: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: ldr r0, [r0] +; CHECK-BE-NEXT: lsr r0, r0, #15 +; CHECK-BE-NEXT: bx lr +; +; CHECK-THUMB-LABEL: test_lshr_load32_fail: +; CHECK-THUMB: @ %bb.0: @ %entry +; CHECK-THUMB-NEXT: ldr r0, [r0] +; CHECK-THUMB-NEXT: lsrs r0, r0, #15 +; CHECK-THUMB-NEXT: bx lr +; +; CHECK-ALIGN-LABEL: test_lshr_load32_fail: +; CHECK-ALIGN: @ %bb.0: @ %entry +; CHECK-ALIGN-NEXT: ldr r0, [r0] +; CHECK-ALIGN-NEXT: lsrs r0, r0, #15 +; CHECK-ALIGN-NEXT: bx lr +; +; CHECK-V6M-LABEL: test_lshr_load32_fail: +; CHECK-V6M: @ %bb.0: @ %entry +; CHECK-V6M-NEXT: ldr r0, [r0] +; CHECK-V6M-NEXT: lsrs r0, r0, #15 +; CHECK-V6M-NEXT: bx lr entry: %0 = load i32, i32* %a, align 4 %1 = lshr i32 %0, 15 ret i32 %1 } -; CHECK-LABEL: test_lshr_load64_4_unaligned -; CHECK-BE: ldr [[HIGH:r[0-9]+]], [r0] -; CHECK-BE-NEXT: ldrh [[LOW:r[0-9]+]], [r0, #4] -; CHECK-BE-NEXT: orr r0, [[LOW]], [[HIGH]], lsl #16 -; CHECK-V6M: ldrh [[LOW:r[0-9]+]], [r0, #2] -; CHECK-V6M: ldr [[HIGH:r[0-9]+]], [r0, #4] -; CHECK-V6M-NEXT: lsls [[HIGH]], [[HIGH]], #16 -; CHECK-V6M-NEXT: adds r0, r1, r0 -; CHECK-ALIGN: ldr [[HIGH:r[0-9]+]], [r0, #4] -; CHECK-ALIGN-NEXT: ldrh [[LOW:r[0-9]+]], [r0, #2] -; CHECK-ALIGN-NEXT: orr.w r0, [[LOW]], [[HIGH]], lsl #16 -; CHECK-ARM: ldr r0, [r0, #2] -; CHECK-THUMB: ldr.w r0, [r0, #2] -; CHECK-COMMON: bx define arm_aapcscc i32 @test_lshr_load64_4_unaligned(i64* %a) { +; CHECK-ARM-LABEL: test_lshr_load64_4_unaligned: +; CHECK-ARM: @ %bb.0: @ %entry +; CHECK-ARM-NEXT: ldr r0, [r0, #2] +; CHECK-ARM-NEXT: bx lr +; +; CHECK-BE-LABEL: test_lshr_load64_4_unaligned: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: ldr r1, [r0] +; CHECK-BE-NEXT: ldrh r0, [r0, #4] +; CHECK-BE-NEXT: orr r0, r0, r1, lsl #16 +; CHECK-BE-NEXT: bx lr +; +; CHECK-THUMB-LABEL: test_lshr_load64_4_unaligned: +; CHECK-THUMB: @ %bb.0: @ %entry +; CHECK-THUMB-NEXT: ldr.w r0, [r0, #2] +; CHECK-THUMB-NEXT: bx lr +; +; CHECK-ALIGN-LABEL: test_lshr_load64_4_unaligned: +; CHECK-ALIGN: @ %bb.0: @ %entry +; CHECK-ALIGN-NEXT: ldr r1, [r0, #4] +; CHECK-ALIGN-NEXT: ldrh r0, [r0, #2] +; CHECK-ALIGN-NEXT: orr.w r0, r0, r1, lsl #16 +; CHECK-ALIGN-NEXT: bx lr +; +; CHECK-V6M-LABEL: test_lshr_load64_4_unaligned: +; CHECK-V6M: @ %bb.0: @ %entry +; CHECK-V6M-NEXT: ldrh r1, [r0, #2] +; CHECK-V6M-NEXT: ldr r0, [r0, #4] +; CHECK-V6M-NEXT: lsls r0, r0, #16 +; CHECK-V6M-NEXT: adds r0, r1, r0 +; CHECK-V6M-NEXT: bx lr entry: %0 = load i64, i64* %a, align 8 %1 = lshr i64 %0, 16 @@ -145,17 +333,38 @@ entry: ret i32 %conv } -; CHECK-LABEL: test_lshr_load64_1_lsb -; CHECK-BE: ldr r1, [r0] +define arm_aapcscc i32 @test_lshr_load64_1_lsb(i64* %a) { +; CHECK-ARM-LABEL: test_lshr_load64_1_lsb: +; CHECK-ARM: @ %bb.0: @ %entry +; CHECK-ARM-NEXT: ldr r0, [r0, #3] +; CHECK-ARM-NEXT: bx lr +; +; CHECK-BE-LABEL: test_lshr_load64_1_lsb: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: ldr r1, [r0] ; CHECK-BE-NEXT: ldrb r0, [r0, #4] ; CHECK-BE-NEXT: orr r0, r0, r1, lsl #8 -; CHECK-ARM: ldr r0, [r0, #3] -; CHECK-THUMB: ldr.w r0, [r0, #3] -; CHECK-ALIGN: ldr [[HIGH:r[0-9]+]], [r0, #4] -; CHECK-ALIGN-NEXT: ldrb [[LOW:r[0-9]+]], [r0, #3] -; CHECK-ALIGN-NEXT: orr.w r0, [[LOW]], [[HIGH]], lsl #8 -; CHECK-COMMON: bx -define arm_aapcscc i32 @test_lshr_load64_1_lsb(i64* %a) { +; CHECK-BE-NEXT: bx lr +; +; CHECK-THUMB-LABEL: test_lshr_load64_1_lsb: +; CHECK-THUMB: @ %bb.0: @ %entry +; CHECK-THUMB-NEXT: ldr.w r0, [r0, #3] +; CHECK-THUMB-NEXT: bx lr +; +; CHECK-ALIGN-LABEL: test_lshr_load64_1_lsb: +; CHECK-ALIGN: @ %bb.0: @ %entry +; CHECK-ALIGN-NEXT: ldr r1, [r0, #4] +; CHECK-ALIGN-NEXT: ldrb r0, [r0, #3] +; CHECK-ALIGN-NEXT: orr.w r0, r0, r1, lsl #8 +; CHECK-ALIGN-NEXT: bx lr +; +; CHECK-V6M-LABEL: test_lshr_load64_1_lsb: +; CHECK-V6M: @ %bb.0: @ %entry +; CHECK-V6M-NEXT: ldrb r1, [r0, #3] +; CHECK-V6M-NEXT: ldr r0, [r0, #4] +; CHECK-V6M-NEXT: lsls r0, r0, #8 +; CHECK-V6M-NEXT: adds r0, r1, r0 +; CHECK-V6M-NEXT: bx lr entry: %0 = load i64, i64* %a, align 8 %1 = lshr i64 %0, 24 @@ -163,12 +372,21 @@ entry: ret i32 %conv } -; CHECK-LABEL: test_lshr_load64_1_msb -; CHECK-BE: ldrb r0, [r0] -; CHECK-BE-NEXT: bx -; CHECK-COMMON: ldrb r0, [r0, #7] -; CHECK-COMMON-NEXT: bx define arm_aapcscc i32 @test_lshr_load64_1_msb(i64* %a) { +; CHECK-COMMON-LABEL: test_lshr_load64_1_msb: +; CHECK-COMMON: @ %bb.0: @ %entry +; CHECK-COMMON-NEXT: ldrb r0, [r0, #7] +; CHECK-COMMON-NEXT: bx lr +; +; CHECK-BE-LABEL: test_lshr_load64_1_msb: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: ldrb r0, [r0] +; CHECK-BE-NEXT: bx lr +; +; CHECK-V6M-LABEL: test_lshr_load64_1_msb: +; CHECK-V6M: @ %bb.0: @ %entry +; CHECK-V6M-NEXT: ldrb r0, [r0, #7] +; CHECK-V6M-NEXT: bx lr entry: %0 = load i64, i64* %a, align 8 %1 = lshr i64 %0, 56 @@ -176,12 +394,21 @@ entry: ret i32 %conv } -; CHECK-LABEL: test_lshr_load64_4 -; CHECK-BE: ldr r0, [r0] -; CHECK-BE-NEXT: bx -; CHECK-COMMON: ldr r0, [r0, #4] -; CHECK-COMMON-NEXT: bx define arm_aapcscc i32 @test_lshr_load64_4(i64* %a) { +; CHECK-COMMON-LABEL: test_lshr_load64_4: +; CHECK-COMMON: @ %bb.0: @ %entry +; CHECK-COMMON-NEXT: ldr r0, [r0, #4] +; CHECK-COMMON-NEXT: bx lr +; +; CHECK-BE-LABEL: test_lshr_load64_4: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: ldr r0, [r0] +; CHECK-BE-NEXT: bx lr +; +; CHECK-V6M-LABEL: test_lshr_load64_4: +; CHECK-V6M: @ %bb.0: @ %entry +; CHECK-V6M-NEXT: ldr r0, [r0, #4] +; CHECK-V6M-NEXT: bx lr entry: %0 = load i64, i64* %a, align 8 %1 = lshr i64 %0, 32 @@ -189,12 +416,21 @@ entry: ret i32 %conv } -; CHECK-LABEL: test_lshr_load64_2 -; CHECK-BE: ldrh r0, [r0] -; CHECK-BE-NEXT: bx -; CHECK-COMMON: ldrh r0, [r0, #6] -; CHECK-COMMON-NEXT:bx define arm_aapcscc i32 @test_lshr_load64_2(i64* %a) { +; CHECK-COMMON-LABEL: test_lshr_load64_2: +; CHECK-COMMON: @ %bb.0: @ %entry +; CHECK-COMMON-NEXT: ldrh r0, [r0, #6] +; CHECK-COMMON-NEXT: bx lr +; +; CHECK-BE-LABEL: test_lshr_load64_2: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: ldrh r0, [r0] +; CHECK-BE-NEXT: bx lr +; +; CHECK-V6M-LABEL: test_lshr_load64_2: +; CHECK-V6M: @ %bb.0: @ %entry +; CHECK-V6M-NEXT: ldrh r0, [r0, #6] +; CHECK-V6M-NEXT: bx lr entry: %0 = load i64, i64* %a, align 8 %1 = lshr i64 %0, 48 @@ -202,14 +438,43 @@ entry: ret i32 %conv } -; CHECK-LABEL: test_lshr_load4_fail -; CHECK-COMMON: ldrd r0, r1, [r0] -; CHECK-ARM: lsr r0, r0, #8 -; CHECK-ARM-NEXT: orr r0, r0, r1, lsl #24 -; CHECK-THUMB: lsrs r0, r0, #8 -; CHECK-THUMB-NEXT: orr.w r0, r0, r1, lsl #24 -; CHECK-COMMON: bx define arm_aapcscc i32 @test_lshr_load4_fail(i64* %a) { +; CHECK-ARM-LABEL: test_lshr_load4_fail: +; CHECK-ARM: @ %bb.0: @ %entry +; CHECK-ARM-NEXT: ldrd r0, r1, [r0] +; CHECK-ARM-NEXT: lsr r0, r0, #8 +; CHECK-ARM-NEXT: orr r0, r0, r1, lsl #24 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-BE-LABEL: test_lshr_load4_fail: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: ldrd r0, r1, [r0] +; CHECK-BE-NEXT: lsr r1, r1, #8 +; CHECK-BE-NEXT: orr r0, r1, r0, lsl #24 +; CHECK-BE-NEXT: bx lr +; +; CHECK-THUMB-LABEL: test_lshr_load4_fail: +; CHECK-THUMB: @ %bb.0: @ %entry +; CHECK-THUMB-NEXT: ldrd r0, r1, [r0] +; CHECK-THUMB-NEXT: lsrs r0, r0, #8 +; CHECK-THUMB-NEXT: orr.w r0, r0, r1, lsl #24 +; CHECK-THUMB-NEXT: bx lr +; +; CHECK-ALIGN-LABEL: test_lshr_load4_fail: +; CHECK-ALIGN: @ %bb.0: @ %entry +; CHECK-ALIGN-NEXT: ldrd r0, r1, [r0] +; CHECK-ALIGN-NEXT: lsrs r0, r0, #8 +; CHECK-ALIGN-NEXT: orr.w r0, r0, r1, lsl #24 +; CHECK-ALIGN-NEXT: bx lr +; +; CHECK-V6M-LABEL: test_lshr_load4_fail: +; CHECK-V6M: @ %bb.0: @ %entry +; CHECK-V6M-NEXT: ldr r1, [r0] +; CHECK-V6M-NEXT: ldr r0, [r0, #4] +; CHECK-V6M-NEXT: lsls r0, r0, #24 +; CHECK-V6M-NEXT: lsrs r1, r1, #8 +; CHECK-V6M-NEXT: adds r0, r1, r0 +; CHECK-V6M-NEXT: bx lr entry: %0 = load i64, i64* %a, align 8 %1 = lshr i64 %0, 8 @@ -217,12 +482,28 @@ entry: ret i32 %conv } -; CHECK-LABEL: test_shift7_mask8 -; CHECK-BE: ldr r1, [r0] -; CHECK-COMMON: ldr r1, [r0] -; CHECK-COMMON: ubfx r1, r1, #7, #8 -; CHECK-COMMON: str r1, [r0] define arm_aapcscc void @test_shift7_mask8(i32* nocapture %p) { +; CHECK-COMMON-LABEL: test_shift7_mask8: +; CHECK-COMMON: @ %bb.0: @ %entry +; CHECK-COMMON-NEXT: ldr r1, [r0] +; CHECK-COMMON-NEXT: ubfx r1, r1, #7, #8 +; CHECK-COMMON-NEXT: str r1, [r0] +; CHECK-COMMON-NEXT: bx lr +; +; CHECK-BE-LABEL: test_shift7_mask8: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: ldr r1, [r0] +; CHECK-BE-NEXT: ubfx r1, r1, #7, #8 +; CHECK-BE-NEXT: str r1, [r0] +; CHECK-BE-NEXT: bx lr +; +; CHECK-V6M-LABEL: test_shift7_mask8: +; CHECK-V6M: @ %bb.0: @ %entry +; CHECK-V6M-NEXT: ldr r1, [r0] +; CHECK-V6M-NEXT: lsrs r1, r1, #7 +; CHECK-V6M-NEXT: uxtb r1, r1 +; CHECK-V6M-NEXT: str r1, [r0] +; CHECK-V6M-NEXT: bx lr entry: %0 = load i32, i32* %p, align 4 %shl = lshr i32 %0, 7 @@ -231,11 +512,24 @@ entry: ret void } -; CHECK-LABEL: test_shift8_mask8 -; CHECK-BE: ldrb r1, [r0, #2] -; CHECK-COMMON: ldrb r1, [r0, #1] -; CHECK-COMMON: str r1, [r0] define arm_aapcscc void @test_shift8_mask8(i32* nocapture %p) { +; CHECK-COMMON-LABEL: test_shift8_mask8: +; CHECK-COMMON: @ %bb.0: @ %entry +; CHECK-COMMON-NEXT: ldrb r1, [r0, #1] +; CHECK-COMMON-NEXT: str r1, [r0] +; CHECK-COMMON-NEXT: bx lr +; +; CHECK-BE-LABEL: test_shift8_mask8: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: ldrb r1, [r0, #2] +; CHECK-BE-NEXT: str r1, [r0] +; CHECK-BE-NEXT: bx lr +; +; CHECK-V6M-LABEL: test_shift8_mask8: +; CHECK-V6M: @ %bb.0: @ %entry +; CHECK-V6M-NEXT: ldrb r1, [r0, #1] +; CHECK-V6M-NEXT: str r1, [r0] +; CHECK-V6M-NEXT: bx lr entry: %0 = load i32, i32* %p, align 4 %shl = lshr i32 %0, 8 @@ -244,12 +538,28 @@ entry: ret void } -; CHECK-LABEL: test_shift8_mask7 -; CHECK-BE: ldr r1, [r0] -; CHECK-COMMON: ldr r1, [r0] -; CHECK-COMMON: ubfx r1, r1, #8, #7 -; CHECK-COMMON: str r1, [r0] define arm_aapcscc void @test_shift8_mask7(i32* nocapture %p) { +; CHECK-COMMON-LABEL: test_shift8_mask7: +; CHECK-COMMON: @ %bb.0: @ %entry +; CHECK-COMMON-NEXT: ldr r1, [r0] +; CHECK-COMMON-NEXT: ubfx r1, r1, #8, #7 +; CHECK-COMMON-NEXT: str r1, [r0] +; CHECK-COMMON-NEXT: bx lr +; +; CHECK-BE-LABEL: test_shift8_mask7: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: ldr r1, [r0] +; CHECK-BE-NEXT: ubfx r1, r1, #8, #7 +; CHECK-BE-NEXT: str r1, [r0] +; CHECK-BE-NEXT: bx lr +; +; CHECK-V6M-LABEL: test_shift8_mask7: +; CHECK-V6M: @ %bb.0: @ %entry +; CHECK-V6M-NEXT: ldr r1, [r0] +; CHECK-V6M-NEXT: lsls r1, r1, #17 +; CHECK-V6M-NEXT: lsrs r1, r1, #25 +; CHECK-V6M-NEXT: str r1, [r0] +; CHECK-V6M-NEXT: bx lr entry: %0 = load i32, i32* %p, align 4 %shl = lshr i32 %0, 8 @@ -258,12 +568,28 @@ entry: ret void } -; CHECK-LABEL: test_shift9_mask8 -; CHECK-BE: ldr r1, [r0] -; CHECK-COMMON: ldr r1, [r0] -; CHECK-COMMON: ubfx r1, r1, #9, #8 -; CHECK-COMMON: str r1, [r0] define arm_aapcscc void @test_shift9_mask8(i32* nocapture %p) { +; CHECK-COMMON-LABEL: test_shift9_mask8: +; CHECK-COMMON: @ %bb.0: @ %entry +; CHECK-COMMON-NEXT: ldr r1, [r0] +; CHECK-COMMON-NEXT: ubfx r1, r1, #9, #8 +; CHECK-COMMON-NEXT: str r1, [r0] +; CHECK-COMMON-NEXT: bx lr +; +; CHECK-BE-LABEL: test_shift9_mask8: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: ldr r1, [r0] +; CHECK-BE-NEXT: ubfx r1, r1, #9, #8 +; CHECK-BE-NEXT: str r1, [r0] +; CHECK-BE-NEXT: bx lr +; +; CHECK-V6M-LABEL: test_shift9_mask8: +; CHECK-V6M: @ %bb.0: @ %entry +; CHECK-V6M-NEXT: ldr r1, [r0] +; CHECK-V6M-NEXT: lsrs r1, r1, #9 +; CHECK-V6M-NEXT: uxtb r1, r1 +; CHECK-V6M-NEXT: str r1, [r0] +; CHECK-V6M-NEXT: bx lr entry: %0 = load i32, i32* %p, align 4 %shl = lshr i32 %0, 9 @@ -272,14 +598,39 @@ entry: ret void } -; CHECK-LABEL: test_shift8_mask16 -; CHECK-ALIGN: ldr r1, [r0] -; CHECK-ALIGN: ubfx r1, r1, #8, #16 -; CHECK-BE: ldrh r1, [r0, #1] -; CHECK-ARM: ldrh r1, [r0, #1] -; CHECK-THUMB: ldrh.w r1, [r0, #1] -; CHECK-COMMON: str r1, [r0] define arm_aapcscc void @test_shift8_mask16(i32* nocapture %p) { +; CHECK-ARM-LABEL: test_shift8_mask16: +; CHECK-ARM: @ %bb.0: @ %entry +; CHECK-ARM-NEXT: ldrh r1, [r0, #1] +; CHECK-ARM-NEXT: str r1, [r0] +; CHECK-ARM-NEXT: bx lr +; +; CHECK-BE-LABEL: test_shift8_mask16: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: ldrh r1, [r0, #1] +; CHECK-BE-NEXT: str r1, [r0] +; CHECK-BE-NEXT: bx lr +; +; CHECK-THUMB-LABEL: test_shift8_mask16: +; CHECK-THUMB: @ %bb.0: @ %entry +; CHECK-THUMB-NEXT: ldrh.w r1, [r0, #1] +; CHECK-THUMB-NEXT: str r1, [r0] +; CHECK-THUMB-NEXT: bx lr +; +; CHECK-ALIGN-LABEL: test_shift8_mask16: +; CHECK-ALIGN: @ %bb.0: @ %entry +; CHECK-ALIGN-NEXT: ldr r1, [r0] +; CHECK-ALIGN-NEXT: ubfx r1, r1, #8, #16 +; CHECK-ALIGN-NEXT: str r1, [r0] +; CHECK-ALIGN-NEXT: bx lr +; +; CHECK-V6M-LABEL: test_shift8_mask16: +; CHECK-V6M: @ %bb.0: @ %entry +; CHECK-V6M-NEXT: ldr r1, [r0] +; CHECK-V6M-NEXT: lsrs r1, r1, #8 +; CHECK-V6M-NEXT: uxth r1, r1 +; CHECK-V6M-NEXT: str r1, [r0] +; CHECK-V6M-NEXT: bx lr entry: %0 = load i32, i32* %p, align 4 %shl = lshr i32 %0, 8 @@ -288,11 +639,28 @@ entry: ret void } -; CHECK-LABEL: test_shift15_mask16 -; CHECK-COMMON: ldr r1, [r0] -; CHECK-COMMON: ubfx r1, r1, #15, #16 -; CHECK-COMMON: str r1, [r0] define arm_aapcscc void @test_shift15_mask16(i32* nocapture %p) { +; CHECK-COMMON-LABEL: test_shift15_mask16: +; CHECK-COMMON: @ %bb.0: @ %entry +; CHECK-COMMON-NEXT: ldr r1, [r0] +; CHECK-COMMON-NEXT: ubfx r1, r1, #15, #16 +; CHECK-COMMON-NEXT: str r1, [r0] +; CHECK-COMMON-NEXT: bx lr +; +; CHECK-BE-LABEL: test_shift15_mask16: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: ldr r1, [r0] +; CHECK-BE-NEXT: ubfx r1, r1, #15, #16 +; CHECK-BE-NEXT: str r1, [r0] +; CHECK-BE-NEXT: bx lr +; +; CHECK-V6M-LABEL: test_shift15_mask16: +; CHECK-V6M: @ %bb.0: @ %entry +; CHECK-V6M-NEXT: ldr r1, [r0] +; CHECK-V6M-NEXT: lsrs r1, r1, #15 +; CHECK-V6M-NEXT: uxth r1, r1 +; CHECK-V6M-NEXT: str r1, [r0] +; CHECK-V6M-NEXT: bx lr entry: %0 = load i32, i32* %p, align 4 %shl = lshr i32 %0, 15 @@ -301,12 +669,32 @@ entry: ret void } -; CHECK-LABEL: test_shift16_mask15 -; CHECK-BE: ldrh r1, [r0] -; CHECK-COMMON: ldrh r1, [r0, #2] -; CHECK-COMMON: bfc r1, #15, #17 -; CHECK-COMMON: str r1, [r0] define arm_aapcscc void @test_shift16_mask15(i32* nocapture %p) { +; CHECK-COMMON-LABEL: test_shift16_mask15: +; CHECK-COMMON: @ %bb.0: @ %entry +; CHECK-COMMON-NEXT: ldrh r1, [r0, #2] +; CHECK-COMMON-NEXT: bfc r1, #15, #17 +; CHECK-COMMON-NEXT: str r1, [r0] +; CHECK-COMMON-NEXT: bx lr +; +; CHECK-BE-LABEL: test_shift16_mask15: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: ldrh r1, [r0] +; CHECK-BE-NEXT: bfc r1, #15, #17 +; CHECK-BE-NEXT: str r1, [r0] +; CHECK-BE-NEXT: bx lr +; +; CHECK-V6M-LABEL: test_shift16_mask15: +; CHECK-V6M: @ %bb.0: @ %entry +; CHECK-V6M-NEXT: ldrh r1, [r0, #2] +; CHECK-V6M-NEXT: ldr r2, .LCPI21_0 +; CHECK-V6M-NEXT: ands r2, r1 +; CHECK-V6M-NEXT: str r2, [r0] +; CHECK-V6M-NEXT: bx lr +; CHECK-V6M-NEXT: .p2align 2 +; CHECK-V6M-NEXT: @ %bb.1: +; CHECK-V6M-NEXT: .LCPI21_0: +; CHECK-V6M-NEXT: .long 32767 @ 0x7fff entry: %0 = load i32, i32* %p, align 4 %shl = lshr i32 %0, 16 @@ -315,13 +703,41 @@ entry: ret void } -; CHECK-LABEL: test_shift8_mask24 -; CHECK-BE: ldr r1, [r0] -; CHECK-COMMON: ldr r1, [r0] -; CHECK-ARM: lsr r1, r1, #8 -; CHECK-THUMB: lsrs r1, r1, #8 -; CHECK-COMMON: str r1, [r0] define arm_aapcscc void @test_shift8_mask24(i32* nocapture %p) { +; CHECK-ARM-LABEL: test_shift8_mask24: +; CHECK-ARM: @ %bb.0: @ %entry +; CHECK-ARM-NEXT: ldr r1, [r0] +; CHECK-ARM-NEXT: lsr r1, r1, #8 +; CHECK-ARM-NEXT: str r1, [r0] +; CHECK-ARM-NEXT: bx lr +; +; CHECK-BE-LABEL: test_shift8_mask24: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: ldr r1, [r0] +; CHECK-BE-NEXT: lsr r1, r1, #8 +; CHECK-BE-NEXT: str r1, [r0] +; CHECK-BE-NEXT: bx lr +; +; CHECK-THUMB-LABEL: test_shift8_mask24: +; CHECK-THUMB: @ %bb.0: @ %entry +; CHECK-THUMB-NEXT: ldr r1, [r0] +; CHECK-THUMB-NEXT: lsrs r1, r1, #8 +; CHECK-THUMB-NEXT: str r1, [r0] +; CHECK-THUMB-NEXT: bx lr +; +; CHECK-ALIGN-LABEL: test_shift8_mask24: +; CHECK-ALIGN: @ %bb.0: @ %entry +; CHECK-ALIGN-NEXT: ldr r1, [r0] +; CHECK-ALIGN-NEXT: lsrs r1, r1, #8 +; CHECK-ALIGN-NEXT: str r1, [r0] +; CHECK-ALIGN-NEXT: bx lr +; +; CHECK-V6M-LABEL: test_shift8_mask24: +; CHECK-V6M: @ %bb.0: @ %entry +; CHECK-V6M-NEXT: ldr r1, [r0] +; CHECK-V6M-NEXT: lsrs r1, r1, #8 +; CHECK-V6M-NEXT: str r1, [r0] +; CHECK-V6M-NEXT: bx lr entry: %0 = load i32, i32* %p, align 4 %shl = lshr i32 %0, 8 @@ -330,11 +746,24 @@ entry: ret void } -; CHECK-LABEL: test_shift24_mask16 -; CHECK-BE: ldrb r1, [r0] -; CHECK-COMMON: ldrb r1, [r0, #3] -; CHECK-COMMON: str r1, [r0] define arm_aapcscc void @test_shift24_mask16(i32* nocapture %p) { +; CHECK-COMMON-LABEL: test_shift24_mask16: +; CHECK-COMMON: @ %bb.0: @ %entry +; CHECK-COMMON-NEXT: ldrb r1, [r0, #3] +; CHECK-COMMON-NEXT: str r1, [r0] +; CHECK-COMMON-NEXT: bx lr +; +; CHECK-BE-LABEL: test_shift24_mask16: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: ldrb r1, [r0] +; CHECK-BE-NEXT: str r1, [r0] +; CHECK-BE-NEXT: bx lr +; +; CHECK-V6M-LABEL: test_shift24_mask16: +; CHECK-V6M: @ %bb.0: @ %entry +; CHECK-V6M-NEXT: ldrb r1, [r0, #3] +; CHECK-V6M-NEXT: str r1, [r0] +; CHECK-V6M-NEXT: bx lr entry: %0 = load i32, i32* %p, align 4 %shl = lshr i32 %0, 24 @@ -343,11 +772,24 @@ entry: ret void } -; CHECK-LABEL: test_sext_shift8_mask8 -; CHECK-BE: ldrb r0, [r0] -; CHECK-COMMON: ldrb r0, [r0, #1] -; CHECK-COMMON: str r0, [r1] define arm_aapcscc void @test_sext_shift8_mask8(i16* %p, i32* %q) { +; CHECK-COMMON-LABEL: test_sext_shift8_mask8: +; CHECK-COMMON: @ %bb.0: @ %entry +; CHECK-COMMON-NEXT: ldrb r0, [r0, #1] +; CHECK-COMMON-NEXT: str r0, [r1] +; CHECK-COMMON-NEXT: bx lr +; +; CHECK-BE-LABEL: test_sext_shift8_mask8: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: ldrb r0, [r0] +; CHECK-BE-NEXT: str r0, [r1] +; CHECK-BE-NEXT: bx lr +; +; CHECK-V6M-LABEL: test_sext_shift8_mask8: +; CHECK-V6M: @ %bb.0: @ %entry +; CHECK-V6M-NEXT: ldrb r0, [r0, #1] +; CHECK-V6M-NEXT: str r0, [r1] +; CHECK-V6M-NEXT: bx lr entry: %0 = load i16, i16* %p, align 4 %1 = sext i16 %0 to i32 @@ -357,13 +799,43 @@ entry: ret void } -; CHECK-LABEL: test_sext_shift8_mask16 -; CHECK-ARM: ldrsh r0, [r0] -; CHECK-BE: ldrsh r0, [r0] -; CHECK-THUMB: ldrsh.w r0, [r0] -; CHECK-COMMON: ubfx r0, r0, #8, #16 -; CHECK-COMMON: str r0, [r1] define arm_aapcscc void @test_sext_shift8_mask16(i16* %p, i32* %q) { +; CHECK-ARM-LABEL: test_sext_shift8_mask16: +; CHECK-ARM: @ %bb.0: @ %entry +; CHECK-ARM-NEXT: ldrsh r0, [r0] +; CHECK-ARM-NEXT: ubfx r0, r0, #8, #16 +; CHECK-ARM-NEXT: str r0, [r1] +; CHECK-ARM-NEXT: bx lr +; +; CHECK-BE-LABEL: test_sext_shift8_mask16: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: ldrsh r0, [r0] +; CHECK-BE-NEXT: ubfx r0, r0, #8, #16 +; CHECK-BE-NEXT: str r0, [r1] +; CHECK-BE-NEXT: bx lr +; +; CHECK-THUMB-LABEL: test_sext_shift8_mask16: +; CHECK-THUMB: @ %bb.0: @ %entry +; CHECK-THUMB-NEXT: ldrsh.w r0, [r0] +; CHECK-THUMB-NEXT: ubfx r0, r0, #8, #16 +; CHECK-THUMB-NEXT: str r0, [r1] +; CHECK-THUMB-NEXT: bx lr +; +; CHECK-ALIGN-LABEL: test_sext_shift8_mask16: +; CHECK-ALIGN: @ %bb.0: @ %entry +; CHECK-ALIGN-NEXT: ldrsh.w r0, [r0] +; CHECK-ALIGN-NEXT: ubfx r0, r0, #8, #16 +; CHECK-ALIGN-NEXT: str r0, [r1] +; CHECK-ALIGN-NEXT: bx lr +; +; CHECK-V6M-LABEL: test_sext_shift8_mask16: +; CHECK-V6M: @ %bb.0: @ %entry +; CHECK-V6M-NEXT: movs r2, #0 +; CHECK-V6M-NEXT: ldrsh r0, [r0, r2] +; CHECK-V6M-NEXT: lsrs r0, r0, #8 +; CHECK-V6M-NEXT: uxth r0, r0 +; CHECK-V6M-NEXT: str r0, [r1] +; CHECK-V6M-NEXT: bx lr entry: %0 = load i16, i16* %p, align 4 %1 = sext i16 %0 to i32 @@ -373,10 +845,56 @@ entry: ret void } -; CHECK-LABEL: trunc_i64_mask_srl -; CHECK-ARM: ldrh r2, [r1, #4] -; CHECK-BE: ldrh r2, [r1, #2] define i1 @trunc_i64_mask_srl(i32 zeroext %AttrArgNo, i64* %ptr) { +; CHECK-ARM-LABEL: trunc_i64_mask_srl: +; CHECK-ARM: @ %bb.0: @ %entry +; CHECK-ARM-NEXT: ldrh r2, [r1, #4] +; CHECK-ARM-NEXT: mov r1, #0 +; CHECK-ARM-NEXT: cmp r2, r0 +; CHECK-ARM-NEXT: movwhi r1, #1 +; CHECK-ARM-NEXT: mov r0, r1 +; CHECK-ARM-NEXT: bx lr +; +; CHECK-BE-LABEL: trunc_i64_mask_srl: +; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: ldrh r2, [r1, #2] +; CHECK-BE-NEXT: mov r1, #0 +; CHECK-BE-NEXT: cmp r2, r0 +; CHECK-BE-NEXT: movwhi r1, #1 +; CHECK-BE-NEXT: mov r0, r1 +; CHECK-BE-NEXT: bx lr +; +; CHECK-THUMB-LABEL: trunc_i64_mask_srl: +; CHECK-THUMB: @ %bb.0: @ %entry +; CHECK-THUMB-NEXT: ldrh r2, [r1, #4] +; CHECK-THUMB-NEXT: movs r1, #0 +; CHECK-THUMB-NEXT: cmp r2, r0 +; CHECK-THUMB-NEXT: it hi +; CHECK-THUMB-NEXT: movhi r1, #1 +; CHECK-THUMB-NEXT: mov r0, r1 +; CHECK-THUMB-NEXT: bx lr +; +; CHECK-ALIGN-LABEL: trunc_i64_mask_srl: +; CHECK-ALIGN: @ %bb.0: @ %entry +; CHECK-ALIGN-NEXT: ldrh r2, [r1, #4] +; CHECK-ALIGN-NEXT: movs r1, #0 +; CHECK-ALIGN-NEXT: cmp r2, r0 +; CHECK-ALIGN-NEXT: it hi +; CHECK-ALIGN-NEXT: movhi r1, #1 +; CHECK-ALIGN-NEXT: mov r0, r1 +; CHECK-ALIGN-NEXT: bx lr +; +; CHECK-V6M-LABEL: trunc_i64_mask_srl: +; CHECK-V6M: @ %bb.0: @ %entry +; CHECK-V6M-NEXT: ldrh r1, [r1, #4] +; CHECK-V6M-NEXT: cmp r1, r0 +; CHECK-V6M-NEXT: bhi .LBB26_2 +; CHECK-V6M-NEXT: @ %bb.1: @ %entry +; CHECK-V6M-NEXT: movs r0, #0 +; CHECK-V6M-NEXT: bx lr +; CHECK-V6M-NEXT: .LBB26_2: +; CHECK-V6M-NEXT: movs r0, #1 +; CHECK-V6M-NEXT: bx lr entry: %bf.load.i = load i64, i64* %ptr, align 8 %bf.lshr.i = lshr i64 %bf.load.i, 32 diff --git a/llvm/test/CodeGen/BPF/BTF/type-tag-var.ll b/llvm/test/CodeGen/BPF/BTF/type-tag-var.ll new file mode 100644 index 0000000000000..1acf2c6db83fd --- /dev/null +++ b/llvm/test/CodeGen/BPF/BTF/type-tag-var.ll @@ -0,0 +1,63 @@ +; RUN: llc -march=bpfel -filetype=asm -o - %s | FileCheck -check-prefixes=CHECK %s +; RUN: llc -march=bpfeb -filetype=asm -o - %s | FileCheck -check-prefixes=CHECK %s +; +; Source: +; #define __tag1 __attribute__((btf_type_tag("tag1"))) +; #define __tag2 __attribute__((btf_type_tag("tag2"))) +; int __tag1 * __tag1 __tag2 *g; +; Compilation flag: +; clang -target bpf -O2 -g -S -emit-llvm test.c + +@g = dso_local local_unnamed_addr global i32** null, align 8, !dbg !0 + +!llvm.dbg.cu = !{!2} +!llvm.module.flags = !{!12, !13, !14, !15} +!llvm.ident = !{!16} + +!0 = !DIGlobalVariableExpression(var: !1, expr: !DIExpression()) +!1 = distinct !DIGlobalVariable(name: "g", scope: !2, file: !3, line: 3, type: !5, isLocal: false, isDefinition: true) +!2 = distinct !DICompileUnit(language: DW_LANG_C99, file: !3, producer: "clang version 14.0.0 (https://github.com/llvm/llvm-project.git 077b2e0cf1e97c4d97ca5ceab3ec0192ed11c66e)", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, globals: !4, splitDebugInlining: false, nameTableKind: None) +!3 = !DIFile(filename: "test.c", directory: "/tmp/home/yhs/work/tests/llvm/btf_tag_type") +!4 = !{!0} +!5 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !6, size: 64, annotations: !10) +!6 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !7, size: 64, annotations: !8) +!7 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) +!8 = !{!9} +!9 = !{!"btf_type_tag", !"tag1"} +!10 = !{!9, !11} +!11 = !{!"btf_type_tag", !"tag2"} + +; CHECK: .long 0 # BTF_KIND_PTR(id = 1) +; CHECK-NEXT: .long 33554432 # 0x2000000 +; CHECK-NEXT: .long 5 +; CHECK-NEXT: .long 1 # BTF_KIND_TYPE_TAG(id = 2) +; CHECK-NEXT: .long 301989888 # 0x12000000 +; CHECK-NEXT: .long 1 +; CHECK-NEXT: .long 6 # BTF_KIND_TYPE_TAG(id = 3) +; CHECK-NEXT: .long 301989888 # 0x12000000 +; CHECK-NEXT: .long 2 +; CHECK-NEXT: .long 0 # BTF_KIND_PTR(id = 4) +; CHECK-NEXT: .long 33554432 # 0x2000000 +; CHECK-NEXT: .long 6 +; CHECK-NEXT: .long 6 # BTF_KIND_TYPE_TAG(id = 5) +; CHECK-NEXT: .long 301989888 # 0x12000000 +; CHECK-NEXT: .long 4 +; CHECK-NEXT: .long 11 # BTF_KIND_INT(id = 6) +; CHECK-NEXT: .long 16777216 # 0x1000000 +; CHECK-NEXT: .long 4 +; CHECK-NEXT: .long 16777248 # 0x1000020 +; CHECK-NEXT: .long 15 # BTF_KIND_VAR(id = 7) +; CHECK-NEXT: .long 234881024 # 0xe000000 +; CHECK-NEXT: .long 3 +; CHECK-NEXT: .long 1 + +; CHECK: .ascii "tag2" # string offset=1 +; CHECK: .ascii "tag1" # string offset=6 +; CHECK: .ascii "int" # string offset=11 +; CHECK: .byte 103 # string offset=15 + +!12 = !{i32 7, !"Dwarf Version", i32 4} +!13 = !{i32 2, !"Debug Info Version", i32 3} +!14 = !{i32 1, !"wchar_size", i32 4} +!15 = !{i32 7, !"frame-pointer", i32 2} +!16 = !{!"clang version 14.0.0 (https://github.com/llvm/llvm-project.git 077b2e0cf1e97c4d97ca5ceab3ec0192ed11c66e)"} diff --git a/llvm/test/CodeGen/BPF/adjust-opt-icmp1.ll b/llvm/test/CodeGen/BPF/adjust-opt-icmp1.ll index 6eea4d2f4618c..a2e1b03b711ab 100644 --- a/llvm/test/CodeGen/BPF/adjust-opt-icmp1.ll +++ b/llvm/test/CodeGen/BPF/adjust-opt-icmp1.ll @@ -39,10 +39,11 @@ entry: ; CHECK: if [[REG2]] s> [[REG1]] goto ; CHECK: if [[REG1]] s> 7 goto -; CHECK-DISABLE: [[REG1:r[0-9]+]] += -1 +; CHECK-DISABLE: [[REG1:r[0-9]+]] += -8 ; CHECK-DISABLE: [[REG1]] <<= 32 ; CHECK-DISABLE: [[REG1]] >>= 32 -; CHECK-DISABLE: if [[REG1]] > 6 goto +; CHECK-DISABLE: [[REG2:r[0-9]+]] = 4294967289 +; CHECK-DISABLE: if [[REG2]] > [[REG1]] goto lor.lhs.false: ; preds = %entry %2 = load i32, i32* %ret, align 4, !tbaa !2 diff --git a/llvm/test/CodeGen/BPF/adjust-opt-icmp2.ll b/llvm/test/CodeGen/BPF/adjust-opt-icmp2.ll index a264422d2b762..61071f50d8506 100644 --- a/llvm/test/CodeGen/BPF/adjust-opt-icmp2.ll +++ b/llvm/test/CodeGen/BPF/adjust-opt-icmp2.ll @@ -37,10 +37,11 @@ entry: ; CHECK: if [[REG2]] s> [[REG1]] goto ; CHECK: if [[REG1]] s> 7 goto -; CHECK-DISABLE: [[REG1:r[0-9]+]] += -1 +; CHECK-DISABLE: [[REG1:r[0-9]+]] += -8 ; CHECK-DISABLE: [[REG1]] <<= 32 ; CHECK-DISABLE: [[REG1]] >>= 32 -; CHECK-DISABLE: if [[REG1]] > 6 goto +; CHECK-DISABLE: [[REG2:r[0-9]+]] = 4294967289 +; CHECK-DISABLE: if [[REG2]] > [[REG1]] goto if.then: ; preds = %entry store i32 0, i32* %retval, align 4 diff --git a/llvm/test/CodeGen/BPF/adjust-opt-icmp3.ll b/llvm/test/CodeGen/BPF/adjust-opt-icmp3.ll new file mode 100644 index 0000000000000..4df88c70bb35d --- /dev/null +++ b/llvm/test/CodeGen/BPF/adjust-opt-icmp3.ll @@ -0,0 +1,85 @@ +; RUN: opt -O2 -S -mtriple=bpf-pc-linux %s -o %t1 +; RUN: llc %t1 -o - | FileCheck -check-prefixes=CHECK,CHECK-V1 %s +; RUN: opt -O2 -S -mtriple=bpf-pc-linux %s -o %t1 +; RUN: llc %t1 -mcpu=v3 -o - | FileCheck -check-prefixes=CHECK,CHECK-V3 %s +; +; Source: +; int test1(unsigned long a) { +; if ((unsigned)a <= 3) return 2; +; return 3; +; } +; int test2(unsigned long a) { +; if ((unsigned)a < 4) return 2; +; return 3; +; } +; Compilation flag: +; clang -target bpf -O2 -S -emit-llvm -Xclang -disable-llvm-passes test.c + +; Function Attrs: nounwind +define dso_local i32 @test1(i64 %a) #0 { +entry: + %retval = alloca i32, align 4 + %a.addr = alloca i64, align 8 + store i64 %a, i64* %a.addr, align 8, !tbaa !3 + %0 = load i64, i64* %a.addr, align 8, !tbaa !3 + %conv = trunc i64 %0 to i32 + %cmp = icmp ule i32 %conv, 3 + br i1 %cmp, label %if.then, label %if.end + +if.then: ; preds = %entry + store i32 2, i32* %retval, align 4 + br label %return + +if.end: ; preds = %entry + store i32 3, i32* %retval, align 4 + br label %return + +return: ; preds = %if.end, %if.then + %1 = load i32, i32* %retval, align 4 + ret i32 %1 +} + +; CHECK-LABEL: test1 +; CHECK-V1: if r[[#]] > r[[#]] goto +; CHECK-V3: if w[[#]] < 4 goto + +; Function Attrs: nounwind +define dso_local i32 @test2(i64 %a) #0 { +entry: + %retval = alloca i32, align 4 + %a.addr = alloca i64, align 8 + store i64 %a, i64* %a.addr, align 8, !tbaa !3 + %0 = load i64, i64* %a.addr, align 8, !tbaa !3 + %conv = trunc i64 %0 to i32 + %cmp = icmp ult i32 %conv, 4 + br i1 %cmp, label %if.then, label %if.end + +if.then: ; preds = %entry + store i32 2, i32* %retval, align 4 + br label %return + +if.end: ; preds = %entry + store i32 3, i32* %retval, align 4 + br label %return + +return: ; preds = %if.end, %if.then + %1 = load i32, i32* %retval, align 4 + ret i32 %1 +} + +; CHECK-LABEL: test2 +; CHECK-V1: if r[[#]] > r[[#]] goto +; CHECK-V3: if w[[#]] < 4 goto + +attributes #0 = { nounwind "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } + +!llvm.module.flags = !{!0, !1} +!llvm.ident = !{!2} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{i32 7, !"frame-pointer", i32 2} +!2 = !{!"clang version 14.0.0 (https://github.com/llvm/llvm-project.git b7892f95881c891032742e0cd81861b845512653)"} +!3 = !{!4, !4, i64 0} +!4 = !{!"long", !5, i64 0} +!5 = !{!"omnipotent char", !6, i64 0} +!6 = !{!"Simple C/C++ TBAA"} diff --git a/llvm/test/CodeGen/Generic/MIRDebugify/check-line-and-variables.mir b/llvm/test/CodeGen/Generic/MIRDebugify/check-line-and-variables.mir index 3c412a19fecc7..d1fb9cd409fea 100644 --- a/llvm/test/CodeGen/Generic/MIRDebugify/check-line-and-variables.mir +++ b/llvm/test/CodeGen/Generic/MIRDebugify/check-line-and-variables.mir @@ -62,7 +62,7 @@ body: | MOV32mr $noreg, 1, $noreg, @ga, $noreg, killed %8 :: (store (s32) into @ga) %5:gr32 = MOV32rm %stack.2.c, 1, $noreg, 0, $noreg :: (load (s32) from %ir.c) $eax = COPY %5 - RETQ implicit $eax + RET64 implicit $eax ;CHECK: WARNING: Missing line 9 ;CHECK-NEXT: Machine IR debug info check: FAIL diff --git a/llvm/test/CodeGen/Hexagon/autohvx/isel-concat-vectors-bool.ll b/llvm/test/CodeGen/Hexagon/autohvx/isel-concat-vectors-bool.ll index 1721c996fdb0a..4230bf1b41072 100644 --- a/llvm/test/CodeGen/Hexagon/autohvx/isel-concat-vectors-bool.ll +++ b/llvm/test/CodeGen/Hexagon/autohvx/isel-concat-vectors-bool.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=hexagon < %s | FileCheck %s +; RUN: llc -march=hexagon -verify-machineinstrs < %s | FileCheck %s ; Check for successful compilation. ; CHECK: sfcmp diff --git a/llvm/test/CodeGen/Hexagon/autohvx/isel-const-splat-bitcast.ll b/llvm/test/CodeGen/Hexagon/autohvx/isel-const-splat-bitcast.ll index f446b63bf5353..4afc60b1d451d 100644 --- a/llvm/test/CodeGen/Hexagon/autohvx/isel-const-splat-bitcast.ll +++ b/llvm/test/CodeGen/Hexagon/autohvx/isel-const-splat-bitcast.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=hexagon < %s | FileCheck %s +; RUN: llc -march=hexagon -verify-machineinstrs < %s | FileCheck %s ; The generation of a constant vector in the selection step resulted in ; a VSPLAT, which, deeper in the expression tree had an unrelated BITCAST. diff --git a/llvm/test/CodeGen/Hexagon/bit-extract-off.ll b/llvm/test/CodeGen/Hexagon/bit-extract-off.ll index d696800671acd..032fb806e3d77 100644 --- a/llvm/test/CodeGen/Hexagon/bit-extract-off.ll +++ b/llvm/test/CodeGen/Hexagon/bit-extract-off.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=hexagon < %s | FileCheck %s +; RUN: llc -march=hexagon -verify-machineinstrs < %s | FileCheck %s ; CHECK: extractu(r1,#31,#0) ; In the IR this was an extract of 31 bits starting at position 32 in r1:0. diff --git a/llvm/test/CodeGen/MIR/X86/auto-successor.mir b/llvm/test/CodeGen/MIR/X86/auto-successor.mir index 22128b3724dfb..ef9ed5c7c231c 100644 --- a/llvm/test/CodeGen/MIR/X86/auto-successor.mir +++ b/llvm/test/CodeGen/MIR/X86/auto-successor.mir @@ -12,7 +12,7 @@ # CHECK-NOT: successors # CHECK: JCC_1 %bb.1, 4, implicit undef $eflags # CHECK: bb.3: -# CHECK: RETQ undef $eax +# CHECK: RET64 undef $eax name: func0 body: | bb.0: @@ -28,7 +28,7 @@ body: | JCC_1 %bb.4, 4, implicit undef $eflags ; condjump+fallthrough to same block bb.4: - RETQ undef $eax + RET64 undef $eax ... --- # Some cases that need explicit successors: @@ -56,6 +56,6 @@ body: | bb.3: ; CHECK: bb.3: - ; CHECK: RETQ undef $eax - RETQ undef $eax + ; CHECK: RET64 undef $eax + RET64 undef $eax ... diff --git a/llvm/test/CodeGen/MIR/X86/basic-block-liveins.mir b/llvm/test/CodeGen/MIR/X86/basic-block-liveins.mir index 7212dceb744fe..40bd1a86990be 100644 --- a/llvm/test/CodeGen/MIR/X86/basic-block-liveins.mir +++ b/llvm/test/CodeGen/MIR/X86/basic-block-liveins.mir @@ -31,7 +31,7 @@ body: | liveins: $edi, $esi $eax = LEA64_32r killed $rdi, 1, killed $rsi, 0, _ - RETQ $eax + RET64 $eax ... --- name: test2 @@ -47,7 +47,7 @@ body: | liveins: $esi $eax = LEA64_32r killed $rdi, 1, killed $rsi, 0, _ - RETQ $eax + RET64 $eax ... --- name: test3 @@ -61,5 +61,5 @@ body: | liveins: $eax = MOV32r0 implicit-def dead $eflags - RETQ killed $eax + RET64 killed $eax ... diff --git a/llvm/test/CodeGen/MIR/X86/basic-block-not-at-start-of-line-error.mir b/llvm/test/CodeGen/MIR/X86/basic-block-not-at-start-of-line-error.mir index ee10a174fba56..9358c7c19c418 100644 --- a/llvm/test/CodeGen/MIR/X86/basic-block-not-at-start-of-line-error.mir +++ b/llvm/test/CodeGen/MIR/X86/basic-block-not-at-start-of-line-error.mir @@ -31,11 +31,11 @@ body: | ; CHECK: [[@LINE+1]]:8: basic block definition should be located at the start of the line less bb.1: $eax = MOV32r0 implicit-def dead $eflags - RETQ killed $eax + RET64 killed $eax bb.2.exit: liveins: $edi $eax = COPY killed $edi - RETQ killed $eax + RET64 killed $eax ... diff --git a/llvm/test/CodeGen/MIR/X86/block-address-operands.mir b/llvm/test/CodeGen/MIR/X86/block-address-operands.mir index 4d72fe84c0b24..397cea0bdd447 100644 --- a/llvm/test/CodeGen/MIR/X86/block-address-operands.mir +++ b/llvm/test/CodeGen/MIR/X86/block-address-operands.mir @@ -63,7 +63,7 @@ body: | JMP64m $rip, 1, _, @addr, _ bb.1.block (address-taken): - RETQ + RET64 ... --- name: test2 @@ -77,7 +77,7 @@ body: | JMP64m $rip, 1, _, @addr, _ bb.1 (address-taken): - RETQ + RET64 ... --- name: slot_in_other_function @@ -89,7 +89,7 @@ body: | ; CHECK: $rax = LEA64r $rip, 1, $noreg, blockaddress(@test3, %ir-block.0), $noreg $rax = LEA64r $rip, 1, _, blockaddress(@test3, %ir-block.0), _ MOV64mr killed $rdi, 1, _, 0, _, killed $rax - RETQ + RET64 ... --- name: test3 @@ -104,7 +104,7 @@ body: | JMP64m $rip, 1, _, @addr, _ bb.1 (address-taken): - RETQ + RET64 ... --- name: test4 @@ -117,5 +117,5 @@ body: | JMP64m $rip, 1, _, @addr, _ bb.1.block (address-taken): - RETQ + RET64 ... diff --git a/llvm/test/CodeGen/MIR/X86/branch-probabilities.mir b/llvm/test/CodeGen/MIR/X86/branch-probabilities.mir index 40e463850ef2b..6732b5c509588 100644 --- a/llvm/test/CodeGen/MIR/X86/branch-probabilities.mir +++ b/llvm/test/CodeGen/MIR/X86/branch-probabilities.mir @@ -14,5 +14,5 @@ body: | NOOP bb.2: - RETQ undef $eax + RET64 undef $eax ... diff --git a/llvm/test/CodeGen/MIR/X86/callee-saved-info.mir b/llvm/test/CodeGen/MIR/X86/callee-saved-info.mir index a6b2ea1c9e723..606abdd156578 100644 --- a/llvm/test/CodeGen/MIR/X86/callee-saved-info.mir +++ b/llvm/test/CodeGen/MIR/X86/callee-saved-info.mir @@ -39,7 +39,7 @@ body: | liveins: $edi $eax = COPY killed $edi - RETQ killed $eax + RET64 killed $eax ... --- name: func @@ -92,5 +92,5 @@ body: | $eax = MOV32r0 implicit-def dead $eflags $rsp = ADD64ri8 $rsp, 16, implicit-def dead $eflags $rbx = POP64r implicit-def $rsp, implicit $rsp - RETQ $eax + RET64 $eax ... diff --git a/llvm/test/CodeGen/MIR/X86/cfi-def-cfa-offset.mir b/llvm/test/CodeGen/MIR/X86/cfi-def-cfa-offset.mir index e6a36bb3dd58a..2ef5eb3ecc371 100644 --- a/llvm/test/CodeGen/MIR/X86/cfi-def-cfa-offset.mir +++ b/llvm/test/CodeGen/MIR/X86/cfi-def-cfa-offset.mir @@ -24,6 +24,6 @@ body: | ; CHECK: CFI_INSTRUCTION def_cfa_offset 4048 CFI_INSTRUCTION def_cfa_offset 4048 $rsp = ADD64ri32 $rsp, 4040, implicit-def dead $eflags - RETQ + RET64 ... diff --git a/llvm/test/CodeGen/MIR/X86/cfi-offset.mir b/llvm/test/CodeGen/MIR/X86/cfi-offset.mir index b8d9e3150810f..1e1a8ad2011bd 100644 --- a/llvm/test/CodeGen/MIR/X86/cfi-offset.mir +++ b/llvm/test/CodeGen/MIR/X86/cfi-offset.mir @@ -42,6 +42,6 @@ body: | CALL64pcrel32 @foo, csr_64, implicit $rsp, implicit $edi, implicit-def $rsp $eax = LEA64_32r killed $rbx, 1, $rbx, 0, _ $rbx = POP64r implicit-def $rsp, implicit $rsp - RETQ $eax + RET64 $eax ... diff --git a/llvm/test/CodeGen/MIR/X86/constant-pool-item-redefinition-error.mir b/llvm/test/CodeGen/MIR/X86/constant-pool-item-redefinition-error.mir index 2f016a7599e36..acae12b07de31 100644 --- a/llvm/test/CodeGen/MIR/X86/constant-pool-item-redefinition-error.mir +++ b/llvm/test/CodeGen/MIR/X86/constant-pool-item-redefinition-error.mir @@ -20,6 +20,6 @@ constants: body: | bb.0.entry: %xmm0 = ADDSDrm killed %xmm0, %rip, 1, _, %const.0, _ - RETQ %xmm0 + RET64 %xmm0 ... diff --git a/llvm/test/CodeGen/MIR/X86/constant-pool.mir b/llvm/test/CodeGen/MIR/X86/constant-pool.mir index 85b2071632f7a..e367c2658cbaa 100644 --- a/llvm/test/CodeGen/MIR/X86/constant-pool.mir +++ b/llvm/test/CodeGen/MIR/X86/constant-pool.mir @@ -67,7 +67,7 @@ body: | $xmm1 = ADDSSrm killed $xmm1, $rip, 1, _, %const.1, _, implicit $mxcsr $xmm1 = CVTSS2SDrr killed $xmm1, implicit $mxcsr $xmm0 = MULSDrr killed $xmm0, killed $xmm1, implicit $mxcsr - RETQ $xmm0 + RET64 $xmm0 ... --- # Verify that alignment can be inferred: @@ -93,7 +93,7 @@ body: | $xmm1 = ADDSSrm killed $xmm1, $rip, 1, _, %const.1, _, implicit $mxcsr $xmm1 = CVTSS2SDrr killed $xmm1, implicit $mxcsr $xmm0 = MULSDrr killed $xmm0, killed $xmm1, implicit $mxcsr - RETQ $xmm0 + RET64 $xmm0 ... --- # Verify that the non-standard alignments are respected: @@ -123,7 +123,7 @@ body: | $xmm1 = ADDSSrm killed $xmm1, $rip, 1, _, %const.1, _, implicit $mxcsr $xmm1 = CVTSS2SDrr killed $xmm1, implicit $mxcsr $xmm0 = MULSDrr killed $xmm0, killed $xmm1, implicit $mxcsr - RETQ $xmm0 + RET64 $xmm0 ... --- # CHECK: name: test4 @@ -141,5 +141,5 @@ body: | $xmm1 = ADDSSrm killed $xmm1, $rip, 1, _, %const.0 + 8, _, implicit $mxcsr $xmm1 = CVTSS2SDrr killed $xmm1, implicit $mxcsr $xmm0 = MULSDrr killed $xmm0, killed $xmm1, implicit $mxcsr - RETQ $xmm0 + RET64 $xmm0 ... diff --git a/llvm/test/CodeGen/MIR/X86/constant-value-error.mir b/llvm/test/CodeGen/MIR/X86/constant-value-error.mir index baf933a87105e..ca5b78984c81e 100644 --- a/llvm/test/CodeGen/MIR/X86/constant-value-error.mir +++ b/llvm/test/CodeGen/MIR/X86/constant-value-error.mir @@ -20,6 +20,6 @@ constants: body: | bb.0.entry: %xmm0 = ADDSDrm killed %xmm0, %rip, 1, _, %const.0, _ - RETQ %xmm0 + RET64 %xmm0 ... diff --git a/llvm/test/CodeGen/MIR/X86/dbg-value-list.mir b/llvm/test/CodeGen/MIR/X86/dbg-value-list.mir index 268a8d9e21e67..c419638be6697 100644 --- a/llvm/test/CodeGen/MIR/X86/dbg-value-list.mir +++ b/llvm/test/CodeGen/MIR/X86/dbg-value-list.mir @@ -58,7 +58,7 @@ body: | DBG_VALUE $esi, $noreg, !13, !DIExpression(), debug-location !15 DBG_VALUE $eax, $noreg, !12, !DIExpression(), debug-location !15 renamable $eax = nsw IMUL32rr killed renamable $eax, killed renamable $esi, implicit-def dead $eflags, debug-location !16 - RETQ $eax, debug-location !17 + RET64 $eax, debug-location !17 ... diff --git a/llvm/test/CodeGen/MIR/X86/dead-register-flag.mir b/llvm/test/CodeGen/MIR/X86/dead-register-flag.mir index 19920e9560199..e01fd8d2b0824 100644 --- a/llvm/test/CodeGen/MIR/X86/dead-register-flag.mir +++ b/llvm/test/CodeGen/MIR/X86/dead-register-flag.mir @@ -20,5 +20,5 @@ body: | bb.0.body: ; CHECK: $eax = IMUL32rri8 $edi, 11, implicit-def dead $eflags $eax = IMUL32rri8 $edi, 11, implicit-def dead $eflags - RETQ $eax + RET64 $eax ... diff --git a/llvm/test/CodeGen/MIR/X86/def-register-already-tied-error.mir b/llvm/test/CodeGen/MIR/X86/def-register-already-tied-error.mir index 53fb4c6e59bc1..765d2e09ba823 100644 --- a/llvm/test/CodeGen/MIR/X86/def-register-already-tied-error.mir +++ b/llvm/test/CodeGen/MIR/X86/def-register-already-tied-error.mir @@ -20,5 +20,5 @@ body: | ; CHECK: [[@LINE+1]]:83: the tied-def operand #3 is already tied with another register operand INLINEASM &"$foo", 1, 2818058, def $rdi, 2147483657, killed $rdi(tied-def 3), killed $rdi(tied-def 3) $rax = COPY killed $rdi - RETQ killed $rax + RET64 killed $rax ... diff --git a/llvm/test/CodeGen/MIR/X86/duplicate-memory-operand-flag.mir b/llvm/test/CodeGen/MIR/X86/duplicate-memory-operand-flag.mir index 13229dc70db74..6f6d8377b8517 100644 --- a/llvm/test/CodeGen/MIR/X86/duplicate-memory-operand-flag.mir +++ b/llvm/test/CodeGen/MIR/X86/duplicate-memory-operand-flag.mir @@ -23,5 +23,5 @@ body: | $eax = MOV32rm $rdi, 1, _, 0, _ :: (volatile volatile load (s32) from %ir.x) $eax = INC32r killed $eax, implicit-def dead $eflags MOV32mr killed $rdi, 1, _, 0, _, $eax :: (volatile store (s32) into %ir.x) - RETQ $eax + RET64 $eax ... diff --git a/llvm/test/CodeGen/MIR/X86/duplicate-register-flag-error.mir b/llvm/test/CodeGen/MIR/X86/duplicate-register-flag-error.mir index b43dd2689b290..b7d7221fb790f 100644 --- a/llvm/test/CodeGen/MIR/X86/duplicate-register-flag-error.mir +++ b/llvm/test/CodeGen/MIR/X86/duplicate-register-flag-error.mir @@ -27,9 +27,9 @@ body: | bb.1.less: $eax = MOV32r0 implicit-def $eflags - RETQ $eax + RET64 $eax bb.2.exit: $eax = COPY $edi - RETQ $eax + RET64 $eax ... diff --git a/llvm/test/CodeGen/MIR/X86/early-clobber-register-flag.mir b/llvm/test/CodeGen/MIR/X86/early-clobber-register-flag.mir index 87ea82623ee9f..aff96f49c9aa2 100644 --- a/llvm/test/CodeGen/MIR/X86/early-clobber-register-flag.mir +++ b/llvm/test/CodeGen/MIR/X86/early-clobber-register-flag.mir @@ -40,5 +40,5 @@ body: | $edi = COPY killed $ecx CALL64pcrel32 @foo, csr_64, implicit $rsp, implicit $edi, implicit-def $rsp $rax = POP64r implicit-def $rsp, implicit $rsp - RETQ + RET64 ... diff --git a/llvm/test/CodeGen/MIR/X86/expected-align-in-memory-operand.mir b/llvm/test/CodeGen/MIR/X86/expected-align-in-memory-operand.mir index 2cda984eba8c0..94c06fea3419b 100644 --- a/llvm/test/CodeGen/MIR/X86/expected-align-in-memory-operand.mir +++ b/llvm/test/CodeGen/MIR/X86/expected-align-in-memory-operand.mir @@ -26,5 +26,5 @@ body: | $xmm1 = MOVSSrr killed $xmm1, killed $xmm2 MOVAPSmr $rdi, 1, _, 0, _, killed $xmm0 :: (store (s128) into %ir.vec, align 32) MOVAPSmr killed $rdi, 1, _, 16, _, killed $xmm1 :: (store (s128) into %ir.vec + 16, basealign 32) - RETQ + RET64 ... diff --git a/llvm/test/CodeGen/MIR/X86/expected-alignment-after-align-in-memory-operand.mir b/llvm/test/CodeGen/MIR/X86/expected-alignment-after-align-in-memory-operand.mir index db09b558fdbcf..bc7eb83cbc2d0 100644 --- a/llvm/test/CodeGen/MIR/X86/expected-alignment-after-align-in-memory-operand.mir +++ b/llvm/test/CodeGen/MIR/X86/expected-alignment-after-align-in-memory-operand.mir @@ -26,5 +26,5 @@ body: | $xmm1 = MOVSSrr killed $xmm1, killed $xmm2 MOVAPSmr $rdi, 1, _, 0, _, killed $xmm0 :: (store (s128) into %ir.vec, align 32) MOVAPSmr killed $rdi, 1, _, 16, _, killed $xmm1 :: (store (s128) into %ir.vec + 16, basealign 32) - RETQ + RET64 ... diff --git a/llvm/test/CodeGen/MIR/X86/expected-basic-block-at-start-of-body.mir b/llvm/test/CodeGen/MIR/X86/expected-basic-block-at-start-of-body.mir index a712fb189664a..a4e19a4ec96b6 100644 --- a/llvm/test/CodeGen/MIR/X86/expected-basic-block-at-start-of-body.mir +++ b/llvm/test/CodeGen/MIR/X86/expected-basic-block-at-start-of-body.mir @@ -30,11 +30,11 @@ body: | bb.1.less: $eax = MOV32r0 implicit-def dead $eflags - RETQ killed $eax + RET64 killed $eax bb.2.exit: liveins: $edi $eax = COPY killed $edi - RETQ killed $eax + RET64 killed $eax ... diff --git a/llvm/test/CodeGen/MIR/X86/expected-block-reference-in-blockaddress.mir b/llvm/test/CodeGen/MIR/X86/expected-block-reference-in-blockaddress.mir index cad3d529df1ac..1d7f69df1a30a 100644 --- a/llvm/test/CodeGen/MIR/X86/expected-block-reference-in-blockaddress.mir +++ b/llvm/test/CodeGen/MIR/X86/expected-block-reference-in-blockaddress.mir @@ -26,5 +26,5 @@ body: | JMP64m $rip, 1, _, @addr, _ bb.1.block (address-taken): - RETQ + RET64 ... diff --git a/llvm/test/CodeGen/MIR/X86/expected-comma-after-cfi-register.mir b/llvm/test/CodeGen/MIR/X86/expected-comma-after-cfi-register.mir index f861689b5e9f8..a0d9dcd84ee5b 100644 --- a/llvm/test/CodeGen/MIR/X86/expected-comma-after-cfi-register.mir +++ b/llvm/test/CodeGen/MIR/X86/expected-comma-after-cfi-register.mir @@ -38,5 +38,5 @@ body: | CALL64pcrel32 @foo, csr_64, implicit $rsp, implicit $edi, implicit-def $rsp $eax = LEA64_32r killed $rbx, 1, $rbx, 0, _ $rbx = POP64r implicit-def $rsp, implicit $rsp - RETQ $eax + RET64 $eax ... diff --git a/llvm/test/CodeGen/MIR/X86/expected-comma-after-memory-operand.mir b/llvm/test/CodeGen/MIR/X86/expected-comma-after-memory-operand.mir index 5a32c4f58faff..eba584e129eb3 100644 --- a/llvm/test/CodeGen/MIR/X86/expected-comma-after-memory-operand.mir +++ b/llvm/test/CodeGen/MIR/X86/expected-comma-after-memory-operand.mir @@ -21,5 +21,5 @@ body: | liveins: $rdi ; CHECK: [[@LINE+1]]:91: expected ',' before the next machine memory operand INC32m killed $rdi, 1, _, 0, _, implicit-def dead $eflags :: (store (s32) into %ir.a) (load (s32) from %ir.a) - RETQ + RET64 ... diff --git a/llvm/test/CodeGen/MIR/X86/expected-different-implicit-operand.mir b/llvm/test/CodeGen/MIR/X86/expected-different-implicit-operand.mir index 0e4dcf4dd94b3..98a6173937028 100644 --- a/llvm/test/CodeGen/MIR/X86/expected-different-implicit-operand.mir +++ b/llvm/test/CodeGen/MIR/X86/expected-different-implicit-operand.mir @@ -30,5 +30,5 @@ body: | $eax = MOV32r0 implicit-def $eflags bb.2.exit: - RETQ $eax + RET64 $eax ... diff --git a/llvm/test/CodeGen/MIR/X86/expected-different-implicit-register-flag.mir b/llvm/test/CodeGen/MIR/X86/expected-different-implicit-register-flag.mir index f185c39ff6848..5bfff0a776a03 100644 --- a/llvm/test/CodeGen/MIR/X86/expected-different-implicit-register-flag.mir +++ b/llvm/test/CodeGen/MIR/X86/expected-different-implicit-register-flag.mir @@ -30,5 +30,5 @@ body: | $eax = MOV32r0 implicit-def $eflags bb.2.exit: - RETQ $eax + RET64 $eax ... diff --git a/llvm/test/CodeGen/MIR/X86/expected-function-reference-after-blockaddress.mir b/llvm/test/CodeGen/MIR/X86/expected-function-reference-after-blockaddress.mir index 067cc5230e4f0..a67faa601497c 100644 --- a/llvm/test/CodeGen/MIR/X86/expected-function-reference-after-blockaddress.mir +++ b/llvm/test/CodeGen/MIR/X86/expected-function-reference-after-blockaddress.mir @@ -26,5 +26,5 @@ body: | JMP64m $rip, 1, _, @addr, _ bb.1.block (address-taken): - RETQ + RET64 ... diff --git a/llvm/test/CodeGen/MIR/X86/expected-global-value-after-blockaddress.mir b/llvm/test/CodeGen/MIR/X86/expected-global-value-after-blockaddress.mir index a7e8763039515..d13c7d5072867 100644 --- a/llvm/test/CodeGen/MIR/X86/expected-global-value-after-blockaddress.mir +++ b/llvm/test/CodeGen/MIR/X86/expected-global-value-after-blockaddress.mir @@ -26,5 +26,5 @@ body: | JMP64m $rip, 1, _, @addr, _ bb.1.block (address-taken): - RETQ + RET64 ... diff --git a/llvm/test/CodeGen/MIR/X86/expected-integer-after-offset-sign.mir b/llvm/test/CodeGen/MIR/X86/expected-integer-after-offset-sign.mir index 3f492a94e2c2b..6870b860c191d 100644 --- a/llvm/test/CodeGen/MIR/X86/expected-integer-after-offset-sign.mir +++ b/llvm/test/CodeGen/MIR/X86/expected-integer-after-offset-sign.mir @@ -20,5 +20,5 @@ body: | $rax = MOV64rm $rip, 1, _, @G + , _ $eax = MOV32rm $rax, 1, _, 0, _ $eax = INC32r $eax, implicit-def $eflags - RETQ $eax + RET64 $eax ... diff --git a/llvm/test/CodeGen/MIR/X86/expected-integer-after-tied-def.mir b/llvm/test/CodeGen/MIR/X86/expected-integer-after-tied-def.mir index 23f64aee32ebf..f8b6baddd41d5 100644 --- a/llvm/test/CodeGen/MIR/X86/expected-integer-after-tied-def.mir +++ b/llvm/test/CodeGen/MIR/X86/expected-integer-after-tied-def.mir @@ -20,5 +20,5 @@ body: | ; CHECK: [[@LINE+1]]:78: expected tied-def or low-level type after '(' INLINEASM &"$foo", 1, 2818058, def $rdi, 2147483657, killed $rdi(tied-def) $rax = COPY killed $rdi - RETQ killed $rax + RET64 killed $rax ... diff --git a/llvm/test/CodeGen/MIR/X86/expected-integer-in-successor-weight.mir b/llvm/test/CodeGen/MIR/X86/expected-integer-in-successor-weight.mir index 0db170a94dbe3..4a9c425fbfda0 100644 --- a/llvm/test/CodeGen/MIR/X86/expected-integer-in-successor-weight.mir +++ b/llvm/test/CodeGen/MIR/X86/expected-integer-in-successor-weight.mir @@ -28,11 +28,11 @@ body: | bb.1.less: $eax = MOV32r0 implicit-def dead $eflags - RETQ killed $eax + RET64 killed $eax bb.2.exit: liveins: $edi $eax = COPY killed $edi - RETQ killed $eax + RET64 killed $eax ... diff --git a/llvm/test/CodeGen/MIR/X86/expected-load-or-store-in-memory-operand.mir b/llvm/test/CodeGen/MIR/X86/expected-load-or-store-in-memory-operand.mir index 6be0a902ec908..4d262ef398e38 100644 --- a/llvm/test/CodeGen/MIR/X86/expected-load-or-store-in-memory-operand.mir +++ b/llvm/test/CodeGen/MIR/X86/expected-load-or-store-in-memory-operand.mir @@ -19,5 +19,5 @@ body: | liveins: $rdi ; CHECK: [[@LINE+1]]:48: expected 'load' or 'store' memory operation $eax = MOV32rm killed $rdi, 1, _, 0, _ :: (4 from %ir.a) - RETQ $eax + RET64 $eax ... diff --git a/llvm/test/CodeGen/MIR/X86/expected-machine-operand.mir b/llvm/test/CodeGen/MIR/X86/expected-machine-operand.mir index 89bec0e5bb3bf..db927be80bc02 100644 --- a/llvm/test/CodeGen/MIR/X86/expected-machine-operand.mir +++ b/llvm/test/CodeGen/MIR/X86/expected-machine-operand.mir @@ -14,6 +14,6 @@ body: | bb.0.entry: ; CHECK: [[@LINE+1]]:20: expected a machine operand $eax = XOR32rr = - RETQ $eax + RET64 $eax ... diff --git a/llvm/test/CodeGen/MIR/X86/expected-metadata-node-after-debug-location.mir b/llvm/test/CodeGen/MIR/X86/expected-metadata-node-after-debug-location.mir index b21a12c3ee645..9478786d55678 100644 --- a/llvm/test/CodeGen/MIR/X86/expected-metadata-node-after-debug-location.mir +++ b/llvm/test/CodeGen/MIR/X86/expected-metadata-node-after-debug-location.mir @@ -53,5 +53,5 @@ body: | DBG_VALUE _, 0, !12, !13, debug-location 14 MOV32mr $stack.x.addr, 1, _, 0, _, %0 $eax = COPY %0 - RETQ $eax + RET64 $eax ... diff --git a/llvm/test/CodeGen/MIR/X86/expected-metadata-node-after-exclaim.mir b/llvm/test/CodeGen/MIR/X86/expected-metadata-node-after-exclaim.mir index 418127078497a..d04ef11cbfb84 100644 --- a/llvm/test/CodeGen/MIR/X86/expected-metadata-node-after-exclaim.mir +++ b/llvm/test/CodeGen/MIR/X86/expected-metadata-node-after-exclaim.mir @@ -53,5 +53,5 @@ body: | DBG_VALUE _, 0, !12, ! _ MOV32mr %stack.0.x.addr, 1, _, 0, _, %0 $eax = COPY %0 - RETQ $eax + RET64 $eax ... diff --git a/llvm/test/CodeGen/MIR/X86/expected-metadata-node-in-stack-object.mir b/llvm/test/CodeGen/MIR/X86/expected-metadata-node-in-stack-object.mir index d35bf538f871a..fe5785038beac 100644 --- a/llvm/test/CodeGen/MIR/X86/expected-metadata-node-in-stack-object.mir +++ b/llvm/test/CodeGen/MIR/X86/expected-metadata-node-in-stack-object.mir @@ -22,5 +22,5 @@ body: | MOV32mr $rsp, 1, _, -4, _, $edi :: (store (s32) into %ir.xa) $eax = COPY killed $edi - RETQ killed $eax + RET64 killed $eax ... diff --git a/llvm/test/CodeGen/MIR/X86/expected-named-register-in-allocation-hint.mir b/llvm/test/CodeGen/MIR/X86/expected-named-register-in-allocation-hint.mir index bca4aa1729f30..7ed390570adc7 100644 --- a/llvm/test/CodeGen/MIR/X86/expected-named-register-in-allocation-hint.mir +++ b/llvm/test/CodeGen/MIR/X86/expected-named-register-in-allocation-hint.mir @@ -26,5 +26,5 @@ body: | %2 = COPY $edi %2 = IMUL32rr %2, %1, implicit-def dead $eflags $eax = COPY %2 - RETQ killed $eax + RET64 killed $eax ... diff --git a/llvm/test/CodeGen/MIR/X86/expected-named-register-in-callee-saved-register.mir b/llvm/test/CodeGen/MIR/X86/expected-named-register-in-callee-saved-register.mir index 2b83732e585d8..05836e7bab2eb 100644 --- a/llvm/test/CodeGen/MIR/X86/expected-named-register-in-callee-saved-register.mir +++ b/llvm/test/CodeGen/MIR/X86/expected-named-register-in-callee-saved-register.mir @@ -37,7 +37,7 @@ body: | liveins: $edi $eax = COPY killed $edi - RETQ killed $eax + RET64 killed $eax ... --- name: func @@ -84,5 +84,5 @@ body: | $eax = MOV32r0 implicit-def dead $eflags $rsp = ADD64ri8 $rsp, 16, implicit-def dead $eflags $rbx = POP64r implicit-def $rsp, implicit $rsp - RETQ $eax + RET64 $eax ... diff --git a/llvm/test/CodeGen/MIR/X86/expected-named-register-in-functions-livein.mir b/llvm/test/CodeGen/MIR/X86/expected-named-register-in-functions-livein.mir index af563bd672add..b0c5b979277e2 100644 --- a/llvm/test/CodeGen/MIR/X86/expected-named-register-in-functions-livein.mir +++ b/llvm/test/CodeGen/MIR/X86/expected-named-register-in-functions-livein.mir @@ -22,5 +22,5 @@ body: | %0 = COPY %edi %eax = COPY %0 - RETQ %eax + RET64 %eax ... diff --git a/llvm/test/CodeGen/MIR/X86/expected-named-register-livein.mir b/llvm/test/CodeGen/MIR/X86/expected-named-register-livein.mir index f2e2eaea6516f..9257807d2879d 100644 --- a/llvm/test/CodeGen/MIR/X86/expected-named-register-livein.mir +++ b/llvm/test/CodeGen/MIR/X86/expected-named-register-livein.mir @@ -16,5 +16,5 @@ body: | liveins: %0 $eax = COPY $edi - RETQ $eax + RET64 $eax ... diff --git a/llvm/test/CodeGen/MIR/X86/expected-newline-at-end-of-list.mir b/llvm/test/CodeGen/MIR/X86/expected-newline-at-end-of-list.mir index f40dc2f6dd17b..dd0f4d39b09ac 100644 --- a/llvm/test/CodeGen/MIR/X86/expected-newline-at-end-of-list.mir +++ b/llvm/test/CodeGen/MIR/X86/expected-newline-at-end-of-list.mir @@ -31,11 +31,11 @@ body: | bb.1.less: $eax = MOV32r0 implicit-def dead $eflags - RETQ killed $eax + RET64 killed $eax bb.2.exit: liveins: $edi $eax = COPY killed $edi - RETQ killed $eax + RET64 killed $eax ... diff --git a/llvm/test/CodeGen/MIR/X86/expected-number-after-bb.mir b/llvm/test/CodeGen/MIR/X86/expected-number-after-bb.mir index f4a20d688487c..e9a1a9e4ee875 100644 --- a/llvm/test/CodeGen/MIR/X86/expected-number-after-bb.mir +++ b/llvm/test/CodeGen/MIR/X86/expected-number-after-bb.mir @@ -29,5 +29,5 @@ body: | $eax = MOV32r0 implicit-def $eflags bb.2.nah: - RETQ $eax + RET64 $eax ... diff --git a/llvm/test/CodeGen/MIR/X86/expected-offset-after-cfi-operand.mir b/llvm/test/CodeGen/MIR/X86/expected-offset-after-cfi-operand.mir index 1cadbecac1f08..44ebcbc8a4461 100644 --- a/llvm/test/CodeGen/MIR/X86/expected-offset-after-cfi-operand.mir +++ b/llvm/test/CodeGen/MIR/X86/expected-offset-after-cfi-operand.mir @@ -22,6 +22,6 @@ body: | ; CHECK: [[@LINE+1]]:36: expected a cfi offset CFI_INSTRUCTION def_cfa_offset _ $rsp = ADD64ri32 $rsp, 4040, implicit-def dead $eflags - RETQ + RET64 ... diff --git a/llvm/test/CodeGen/MIR/X86/expected-pointer-value-in-memory-operand.mir b/llvm/test/CodeGen/MIR/X86/expected-pointer-value-in-memory-operand.mir index 3951943b98f55..d94dc3ffb7e0c 100644 --- a/llvm/test/CodeGen/MIR/X86/expected-pointer-value-in-memory-operand.mir +++ b/llvm/test/CodeGen/MIR/X86/expected-pointer-value-in-memory-operand.mir @@ -19,6 +19,6 @@ body: | liveins: $rdi ; CHECK: [[@LINE+1]]:64: expected a pointer IR value $eax = MOV32rm killed $rdi, 1, _, 0, _ :: (load (s32) from %ir.b) - RETQ $eax + RET64 $eax ... diff --git a/llvm/test/CodeGen/MIR/X86/expected-positive-alignment-after-align.mir b/llvm/test/CodeGen/MIR/X86/expected-positive-alignment-after-align.mir index 1596fe3813734..1f033c5d7d52b 100644 --- a/llvm/test/CodeGen/MIR/X86/expected-positive-alignment-after-align.mir +++ b/llvm/test/CodeGen/MIR/X86/expected-positive-alignment-after-align.mir @@ -26,5 +26,5 @@ body: | $xmm1 = MOVSSrr killed $xmm1, killed $xmm2 MOVAPSmr $rdi, 1, _, 0, _, killed $xmm0 :: (store (s128) into %ir.vec, align 32) MOVAPSmr killed $rdi, 1, _, 16, _, killed $xmm1 :: (store (s128) into %ir.vec + 16, basealign 32) - RETQ + RET64 ... diff --git a/llvm/test/CodeGen/MIR/X86/expected-register-after-cfi-operand.mir b/llvm/test/CodeGen/MIR/X86/expected-register-after-cfi-operand.mir index f47fcee4d33cd..65f2c0b63879e 100644 --- a/llvm/test/CodeGen/MIR/X86/expected-register-after-cfi-operand.mir +++ b/llvm/test/CodeGen/MIR/X86/expected-register-after-cfi-operand.mir @@ -38,5 +38,5 @@ body: | CALL64pcrel32 @foo, csr_64, implicit $rsp, implicit $edi, implicit-def $rsp $eax = LEA64_32r killed $rbx, 1, $rbx, 0, _ $rbx = POP64r implicit-def $rsp, implicit $rsp - RETQ $eax + RET64 $eax ... diff --git a/llvm/test/CodeGen/MIR/X86/expected-register-after-flags.mir b/llvm/test/CodeGen/MIR/X86/expected-register-after-flags.mir index cc2cd4af19fe3..d507de9531188 100644 --- a/llvm/test/CodeGen/MIR/X86/expected-register-after-flags.mir +++ b/llvm/test/CodeGen/MIR/X86/expected-register-after-flags.mir @@ -16,5 +16,5 @@ body: | bb.0.entry: ; CHECK: [[@LINE+1]]:33: expected a register after register flags $eax = MOV32r0 implicit-def 2 - RETQ $eax + RET64 $eax ... diff --git a/llvm/test/CodeGen/MIR/X86/expected-size-integer-after-memory-operation.mir b/llvm/test/CodeGen/MIR/X86/expected-size-integer-after-memory-operation.mir index 537f25e8b067f..ff9dfba5cd1cc 100644 --- a/llvm/test/CodeGen/MIR/X86/expected-size-integer-after-memory-operation.mir +++ b/llvm/test/CodeGen/MIR/X86/expected-size-integer-after-memory-operation.mir @@ -19,6 +19,6 @@ body: | liveins: $rdi ; CHECK: [[@LINE+1]]:53: expected an atomic scope, ordering or a size specification $eax = MOV32rm killed $rdi, 1, _, 0, _ :: (load from %ir.a) - RETQ $eax + RET64 $eax ... diff --git a/llvm/test/CodeGen/MIR/X86/expected-size-integer-after-memory-operation2.mir b/llvm/test/CodeGen/MIR/X86/expected-size-integer-after-memory-operation2.mir index b7533f266bae1..71edc242f1662 100644 --- a/llvm/test/CodeGen/MIR/X86/expected-size-integer-after-memory-operation2.mir +++ b/llvm/test/CodeGen/MIR/X86/expected-size-integer-after-memory-operation2.mir @@ -19,6 +19,6 @@ body: | liveins: $rdi ; CHECK: [[@LINE+1]]:53: expected memory LLT, the size integer literal or 'unknown-size' after memory operation $eax = MOV32rm killed $rdi, 1, _, 0, _ :: (load . from %ir.a) - RETQ $eax + RET64 $eax ... diff --git a/llvm/test/CodeGen/MIR/X86/expected-stack-object.mir b/llvm/test/CodeGen/MIR/X86/expected-stack-object.mir index 213f18611a4cc..c9f6c9902b492 100644 --- a/llvm/test/CodeGen/MIR/X86/expected-stack-object.mir +++ b/llvm/test/CodeGen/MIR/X86/expected-stack-object.mir @@ -63,5 +63,5 @@ body: | CMP64rm killed $rbx, $rsp, 1, _, 24, _, implicit-def $eflags $rsp = ADD64ri8 $rsp, 32, implicit-def dead $eflags $rbx = POP64r implicit-def $rsp, implicit $rsp - RETQ $eax + RET64 $eax ... diff --git a/llvm/test/CodeGen/MIR/X86/expected-subregister-after-colon.mir b/llvm/test/CodeGen/MIR/X86/expected-subregister-after-colon.mir index c06e4601480ae..25a352a06d20d 100644 --- a/llvm/test/CodeGen/MIR/X86/expected-subregister-after-colon.mir +++ b/llvm/test/CodeGen/MIR/X86/expected-subregister-after-colon.mir @@ -22,5 +22,5 @@ body: | %1 = COPY %0 . 42 %2 = AND8ri %1, 1, implicit-def $eflags $al = COPY %2 - RETQ $al + RET64 $al ... diff --git a/llvm/test/CodeGen/MIR/X86/expected-target-flag-name.mir b/llvm/test/CodeGen/MIR/X86/expected-target-flag-name.mir index 02e18434d191d..b2fe4c009d5cd 100644 --- a/llvm/test/CodeGen/MIR/X86/expected-target-flag-name.mir +++ b/llvm/test/CodeGen/MIR/X86/expected-target-flag-name.mir @@ -20,5 +20,5 @@ body: | $rax = MOV64rm $rip, 1, _, target-flags( ) @G, _ $eax = MOV32rm killed $rax, 1, _, 0, _ $eax = INC32r killed $eax, implicit-def dead $eflags - RETQ $eax + RET64 $eax ... diff --git a/llvm/test/CodeGen/MIR/X86/expected-tied-def-after-lparen.mir b/llvm/test/CodeGen/MIR/X86/expected-tied-def-after-lparen.mir index ff97bd9f0a1fa..f15db555f33f5 100644 --- a/llvm/test/CodeGen/MIR/X86/expected-tied-def-after-lparen.mir +++ b/llvm/test/CodeGen/MIR/X86/expected-tied-def-after-lparen.mir @@ -20,5 +20,5 @@ body: | ; CHECK: [[@LINE+1]]:70: expected tied-def or low-level type after '(' INLINEASM &"$foo", 1, 2818058, def $rdi, 2147483657, killed $rdi(3) $rax = COPY killed $rdi - RETQ killed $rax + RET64 killed $rax ... diff --git a/llvm/test/CodeGen/MIR/X86/expected-value-in-memory-operand.mir b/llvm/test/CodeGen/MIR/X86/expected-value-in-memory-operand.mir index 02dd5b2337c7c..5d1c750227930 100644 --- a/llvm/test/CodeGen/MIR/X86/expected-value-in-memory-operand.mir +++ b/llvm/test/CodeGen/MIR/X86/expected-value-in-memory-operand.mir @@ -19,6 +19,6 @@ body: | liveins: $rdi ; CHECK: [[@LINE+1]]:64: expected an IR value reference $eax = MOV32rm killed $rdi, 1, _, 0, _ :: (load (s32) from a) - RETQ $eax + RET64 $eax ... diff --git a/llvm/test/CodeGen/MIR/X86/expected-virtual-register-in-functions-livein.mir b/llvm/test/CodeGen/MIR/X86/expected-virtual-register-in-functions-livein.mir index 1f4f836017825..6b56f39c7e2d6 100644 --- a/llvm/test/CodeGen/MIR/X86/expected-virtual-register-in-functions-livein.mir +++ b/llvm/test/CodeGen/MIR/X86/expected-virtual-register-in-functions-livein.mir @@ -22,5 +22,5 @@ body: | %0 = COPY $edi $eax = COPY %0 - RETQ $eax + RET64 $eax ... diff --git a/llvm/test/CodeGen/MIR/X86/external-symbol-operands.mir b/llvm/test/CodeGen/MIR/X86/external-symbol-operands.mir index c5e314307c212..ff5c8d4dbc3fa 100644 --- a/llvm/test/CodeGen/MIR/X86/external-symbol-operands.mir +++ b/llvm/test/CodeGen/MIR/X86/external-symbol-operands.mir @@ -46,7 +46,7 @@ body: | liveins: $eax $rsp = ADD64ri32 $rsp, 520, implicit-def $eflags - RETQ $eax + RET64 $eax bb.2.entry: ; CHECK: CALL64pcrel32 &__stack_chk_fail, diff --git a/llvm/test/CodeGen/MIR/X86/fixed-stack-memory-operands.mir b/llvm/test/CodeGen/MIR/X86/fixed-stack-memory-operands.mir index cb31d32d1cefd..fa99421460dc6 100644 --- a/llvm/test/CodeGen/MIR/X86/fixed-stack-memory-operands.mir +++ b/llvm/test/CodeGen/MIR/X86/fixed-stack-memory-operands.mir @@ -35,5 +35,5 @@ body: | $eax = MOV32rm $esp, 1, _, 8, _ :: (load (s32) from %fixed-stack.0, align 16) MOV32mr $esp, 1, _, 0, _, $eax :: (store (s32) into %ir.b) $edx = POP32r implicit-def $esp, implicit $esp - RETL $eax + RET32 $eax ... diff --git a/llvm/test/CodeGen/MIR/X86/fixed-stack-object-redefinition-error.mir b/llvm/test/CodeGen/MIR/X86/fixed-stack-object-redefinition-error.mir index 7116bb809b28e..2ce41fdada600 100644 --- a/llvm/test/CodeGen/MIR/X86/fixed-stack-object-redefinition-error.mir +++ b/llvm/test/CodeGen/MIR/X86/fixed-stack-object-redefinition-error.mir @@ -24,5 +24,5 @@ body: | bb.0.entry: $eax = MOV32rm $esp, 1, _, 4, _ $eax = ADD32rm killed $eax, $esp, 1, _, 8, _, implicit-def dead $eflags - RETL $eax + RET32 $eax ... diff --git a/llvm/test/CodeGen/MIR/X86/fixed-stack-objects.mir b/llvm/test/CodeGen/MIR/X86/fixed-stack-objects.mir index 0882378e1a954..9eba3eaf4bade 100644 --- a/llvm/test/CodeGen/MIR/X86/fixed-stack-objects.mir +++ b/llvm/test/CodeGen/MIR/X86/fixed-stack-objects.mir @@ -30,5 +30,5 @@ body: | bb.0.entry: $eax = MOV32rm $esp, 1, _, 8, _ MOV32mr $esp, 1, _, 0, _, $eax - RETL $eax + RET32 $eax ... diff --git a/llvm/test/CodeGen/MIR/X86/frame-info-save-restore-points.mir b/llvm/test/CodeGen/MIR/X86/frame-info-save-restore-points.mir index ba991fcc510eb..cb804b3cbfabb 100644 --- a/llvm/test/CodeGen/MIR/X86/frame-info-save-restore-points.mir +++ b/llvm/test/CodeGen/MIR/X86/frame-info-save-restore-points.mir @@ -69,5 +69,5 @@ body: | bb.3.false: liveins: $eax - RETQ $eax + RET64 $eax ... diff --git a/llvm/test/CodeGen/MIR/X86/frame-info-stack-references.mir b/llvm/test/CodeGen/MIR/X86/frame-info-stack-references.mir index 01e619a783347..8c4583ade6328 100644 --- a/llvm/test/CodeGen/MIR/X86/frame-info-stack-references.mir +++ b/llvm/test/CodeGen/MIR/X86/frame-info-stack-references.mir @@ -72,7 +72,7 @@ body: | $rsp = ADD64ri8 $rsp, 32, implicit-def dead $eflags $rbx = POP64r implicit-def $rsp, implicit $rsp - RETQ $eax + RET64 $eax bb.2.entry: CALL64pcrel32 &__stack_chk_fail, csr_64, implicit $rsp, implicit-def $rsp diff --git a/llvm/test/CodeGen/MIR/X86/frame-setup-instruction-flag.mir b/llvm/test/CodeGen/MIR/X86/frame-setup-instruction-flag.mir index 2e6d48c4647df..01341b3a6da6c 100644 --- a/llvm/test/CodeGen/MIR/X86/frame-setup-instruction-flag.mir +++ b/llvm/test/CodeGen/MIR/X86/frame-setup-instruction-flag.mir @@ -21,7 +21,7 @@ name: compute body: | bb.0.body: $eax = IMUL32rri8 $edi, 11, implicit-def $eflags - RETQ $eax + RET64 $eax ... --- name: foo @@ -36,5 +36,5 @@ body: | $rdx = frame-setup frame-destroy POP64r implicit-def $rsp, implicit $rsp ; CHECK: $rdx = frame-setup frame-destroy POP64r $rdx = frame-destroy frame-setup POP64r implicit-def $rsp, implicit $rsp - RETQ $eax + RET64 $eax ... diff --git a/llvm/test/CodeGen/MIR/X86/function-liveins.mir b/llvm/test/CodeGen/MIR/X86/function-liveins.mir index 2f3cb6d11ee33..7e71d2e05f166 100644 --- a/llvm/test/CodeGen/MIR/X86/function-liveins.mir +++ b/llvm/test/CodeGen/MIR/X86/function-liveins.mir @@ -32,5 +32,5 @@ body: | %0 = COPY $edi %2 = ADD32rr %0, %1, implicit-def dead $eflags $eax = COPY %2 - RETQ $eax + RET64 $eax ... diff --git a/llvm/test/CodeGen/MIR/X86/global-value-operands.mir b/llvm/test/CodeGen/MIR/X86/global-value-operands.mir index 7b55cb4f14ea4..526f939d0ce5b 100644 --- a/llvm/test/CodeGen/MIR/X86/global-value-operands.mir +++ b/llvm/test/CodeGen/MIR/X86/global-value-operands.mir @@ -68,7 +68,7 @@ body: | $rax = MOV64rm $rip, 1, _, @G, _ $eax = MOV32rm $rax, 1, _, 0, _ $eax = INC32r $eax, implicit-def $eflags - RETQ $eax + RET64 $eax ... --- # CHECK: name: inc2 @@ -79,7 +79,7 @@ body: | $rax = MOV64rm $rip, 1, _, @0, _ $eax = MOV32rm $rax, 1, _, 0, _ $eax = INC32r $eax, implicit-def $eflags - RETQ $eax + RET64 $eax ... --- name: test @@ -97,7 +97,7 @@ body: | $eax = MOV32rm killed $rax, 1, _, 0, _ $rcx = MOV64rm $rip, 1, _, @$.-B, _ MOV32mr killed $rcx, 1, _, 0, _, $eax - RETQ $eax + RET64 $eax ... --- name: test2 @@ -106,7 +106,7 @@ body: | ; CHECK: , @"\01Hello@$%09 \\ World,", $rax = MOV64rm $rip, 1, _, @"\01Hello@$%09 \\ World,", _ $eax = MOV32rm killed $rax, 1, _, 0, _ - RETQ $eax + RET64 $eax ... --- # CHECK: name: test3 @@ -125,7 +125,7 @@ body: | $eax = MOV32rm killed $rax, 1, _, 0, _ $rcx = MOV64rm $rip, 1, _, @$.-B - 8, _ MOV32mr killed $rcx, 1, _, 0, _, $eax - RETQ $eax + RET64 $eax ... --- # CHECK: name: tf @@ -136,5 +136,5 @@ body: | $rax = MOV64rm $rip, 1, _, target-flags(x86-gotpcrel) @G, _ $eax = MOV32rm $rax, 1, _, 0, _ $eax = INC32r $eax, implicit-def $eflags - RETQ $eax + RET64 $eax ... diff --git a/llvm/test/CodeGen/MIR/X86/immediate-operands.mir b/llvm/test/CodeGen/MIR/X86/immediate-operands.mir index 05aa89cb8e3f6..2fef51ca2305f 100644 --- a/llvm/test/CodeGen/MIR/X86/immediate-operands.mir +++ b/llvm/test/CodeGen/MIR/X86/immediate-operands.mir @@ -20,9 +20,9 @@ name: foo body: | bb.0.entry: ; CHECK: $eax = MOV32ri 42 - ; CHECK-NEXT: RETQ $eax + ; CHECK-NEXT: RET64 $eax $eax = MOV32ri 42 - RETQ $eax + RET64 $eax ... --- # CHECK: name: bar @@ -30,7 +30,7 @@ name: bar body: | bb.0.entry: ; CHECK: $eax = MOV32ri -11 - ; CHECK-NEXT: RETQ $eax + ; CHECK-NEXT: RET64 $eax $eax = MOV32ri -11 - RETQ $eax + RET64 $eax ... diff --git a/llvm/test/CodeGen/MIR/X86/implicit-register-flag.mir b/llvm/test/CodeGen/MIR/X86/implicit-register-flag.mir index e5fc11531f530..61c4b6c98a73c 100644 --- a/llvm/test/CodeGen/MIR/X86/implicit-register-flag.mir +++ b/llvm/test/CodeGen/MIR/X86/implicit-register-flag.mir @@ -40,11 +40,11 @@ body: | bb.1.less: ; CHECK: $eax = MOV32r0 implicit-def $eflags $eax = MOV32r0 implicit-def $eflags - RETQ $eax + RET64 $eax bb.2.exit: $eax = COPY $edi - RETQ $eax + RET64 $eax ... --- name: implicit_subregister1 @@ -55,7 +55,7 @@ body: | ; CHECK-LABEL: name: implicit_subregister1 ; CHECK: dead $eax = XOR32rr undef $eax, undef $eax, implicit-def dead $eflags, implicit-def $al dead $eax = XOR32rr undef $eax, undef $eax, implicit-def dead $eflags, implicit-def $al - RETQ killed $al + RET64 killed $al ... --- name: implicit_subregister2 @@ -64,5 +64,5 @@ body: | ; CHECK-LABEL: name: implicit_subregister2 ; CHECK: dead $r15 = XOR64rr undef $r15, undef $r15, implicit-def dead $eflags, implicit-def $r15w dead $r15 = XOR64rr undef $r15, undef $r15, implicit-def dead $eflags, implicit-def $r15w - RETQ killed $r15w + RET64 killed $r15w ... diff --git a/llvm/test/CodeGen/MIR/X86/inline-asm-registers.mir b/llvm/test/CodeGen/MIR/X86/inline-asm-registers.mir index d5bbb2dfbd539..44705cbcfac48 100644 --- a/llvm/test/CodeGen/MIR/X86/inline-asm-registers.mir +++ b/llvm/test/CodeGen/MIR/X86/inline-asm-registers.mir @@ -31,7 +31,7 @@ body: | ; CHECK: INLINEASM &foo, 0 /* attdialect */, 4390922 /* regdef:GR64 */, def $rsi, 4390922 /* regdef:GR64 */, def dead $rdi, INLINEASM &foo, 0, 4390922, def $rsi, 4390922, def dead $rdi, 2147549193, killed $rdi, 2147483657, killed $rsi, 12, implicit-def dead early-clobber $eflags $rax = MOV64rr killed $rsi - RETQ killed $rax + RET64 killed $rax ... --- name: test2 @@ -48,5 +48,5 @@ body: | ; CHECK: INLINEASM &foo, 0 /* attdialect */, 4390922 /* regdef:GR64 */, def $rsi, 4390922 /* regdef:GR64 */, def dead $rdi, 2147549193 /* reguse tiedto:$1 */, killed $rdi(tied-def 5), 2147483657 /* reguse tiedto:$0 */, killed $rsi(tied-def 3), 12 /* clobber */, implicit-def dead early-clobber $eflags INLINEASM &foo, 0, 4390922, def $rsi, 4390922, def dead $rdi, 2147549193, killed $rdi(tied-def 5), 2147483657, killed $rsi(tied-def 3), 12, implicit-def dead early-clobber $eflags $rax = MOV64rr killed $rsi - RETQ killed $rax + RET64 killed $rax ... diff --git a/llvm/test/CodeGen/MIR/X86/instr-symbols-and-mcsymbol-operands.mir b/llvm/test/CodeGen/MIR/X86/instr-symbols-and-mcsymbol-operands.mir index 34a9236c16bca..aade832755449 100644 --- a/llvm/test/CodeGen/MIR/X86/instr-symbols-and-mcsymbol-operands.mir +++ b/llvm/test/CodeGen/MIR/X86/instr-symbols-and-mcsymbol-operands.mir @@ -77,6 +77,6 @@ body: | %7:gr64 = ADD64rr killed %4, killed %5, implicit-def $eflags %8:gr64 = ADD64rr killed %6, killed %7, implicit-def $eflags $rax = COPY %8 - RETQ implicit $rax + RET64 implicit $rax ... diff --git a/llvm/test/CodeGen/MIR/X86/instructions-debug-location.mir b/llvm/test/CodeGen/MIR/X86/instructions-debug-location.mir index 9c6fe3aa708a6..9abe47991f322 100644 --- a/llvm/test/CodeGen/MIR/X86/instructions-debug-location.mir +++ b/llvm/test/CodeGen/MIR/X86/instructions-debug-location.mir @@ -75,7 +75,7 @@ body: | ; CHECK: DBG_VALUE $noreg, 0, !11, !DIExpression(), debug-location !12 ; CHECK: DBG_VALUE $noreg, 0, !11, !DIExpression(), debug-location !12 ; CHECK: $eax = COPY %0, debug-location !13 - ; CHECK: RETQ $eax, debug-location !13 + ; CHECK: RET64 $eax, debug-location !13 %0 = COPY $edi DBG_VALUE _, 0, !12, !DIExpression(), debug-location !13 ; Test whether debug-use is still recognized for compatibility with old @@ -83,7 +83,7 @@ body: | DBG_VALUE debug-use _, 0, !12, !DIExpression(), debug-location !13 MOV32mr %stack.0.x.addr, 1, _, 0, _, %0 $eax = COPY %0, debug-location !14 - RETQ $eax, debug-location !14 + RET64 $eax, debug-location !14 ... --- name: test_typed_immediates @@ -107,7 +107,7 @@ body: | DBG_VALUE _, i128 123492148938512984928424384934328985928, !12, !DIExpression(), debug-location !13 MOV32mr %stack.0.x.addr, 1, _, 0, _, %0 $eax = COPY %0 - RETQ $eax + RET64 $eax ... --- name: test_mir_created @@ -135,5 +135,5 @@ body: | MOV32mr %stack.0.x.addr, 1, _, 0, _, %0, debug-location !DILocation(line: 4, scope: !15, inlinedAt: !16) MOV32mr %stack.0.x.addr, 1, _, 0, _, %0, debug-location !DILocation(line: 5, scope: !15, inlinedAt: !DILocation(line: 4, scope: !15)) $eax = COPY %0 - RETQ $eax + RET64 $eax ... diff --git a/llvm/test/CodeGen/MIR/X86/invalid-constant-pool-item.mir b/llvm/test/CodeGen/MIR/X86/invalid-constant-pool-item.mir index 45c0b813acb5c..bcfdc6a316fa5 100644 --- a/llvm/test/CodeGen/MIR/X86/invalid-constant-pool-item.mir +++ b/llvm/test/CodeGen/MIR/X86/invalid-constant-pool-item.mir @@ -20,6 +20,6 @@ body: | bb.0.entry: ; CHECK: [[@LINE+1]]:47: use of undefined constant '%const.10' $xmm0 = ADDSDrm killed $xmm0, $rip, 1, _, %const.10, _ - RETQ $xmm0 + RET64 $xmm0 ... diff --git a/llvm/test/CodeGen/MIR/X86/invalid-target-flag-name.mir b/llvm/test/CodeGen/MIR/X86/invalid-target-flag-name.mir index 9e65f2ae2bd1c..a3d4a3857434b 100644 --- a/llvm/test/CodeGen/MIR/X86/invalid-target-flag-name.mir +++ b/llvm/test/CodeGen/MIR/X86/invalid-target-flag-name.mir @@ -20,5 +20,5 @@ body: | $rax = MOV64rm $rip, 1, _, target-flags(x86-test) @G, _ $eax = MOV32rm killed $rax, 1, _, 0, _ $eax = INC32r killed $eax, implicit-def dead $eflags - RETQ $eax + RET64 $eax ... diff --git a/llvm/test/CodeGen/MIR/X86/invalid-tied-def-index-error.mir b/llvm/test/CodeGen/MIR/X86/invalid-tied-def-index-error.mir index abfa5682ec824..65aab15442a56 100644 --- a/llvm/test/CodeGen/MIR/X86/invalid-tied-def-index-error.mir +++ b/llvm/test/CodeGen/MIR/X86/invalid-tied-def-index-error.mir @@ -20,5 +20,5 @@ body: | ; CHECK: [[@LINE+1]]:58: use of invalid tied-def operand index '300'; instruction has only 6 operands INLINEASM &"$foo", 1, 2818058, def $rdi, 2147483657, killed $rdi(tied-def 300) $rax = COPY killed $rdi - RETQ killed $rax + RET64 killed $rax ... diff --git a/llvm/test/CodeGen/MIR/X86/jump-table-info.mir b/llvm/test/CodeGen/MIR/X86/jump-table-info.mir index af7e520df89bd..8bbfb96a81906 100644 --- a/llvm/test/CodeGen/MIR/X86/jump-table-info.mir +++ b/llvm/test/CodeGen/MIR/X86/jump-table-info.mir @@ -86,23 +86,23 @@ body: | bb.2.def: $eax = MOV32r0 implicit-def $eflags - RETQ $eax + RET64 $eax bb.3.lbl1: $eax = MOV32ri 1 - RETQ $eax + RET64 $eax bb.4.lbl2: $eax = MOV32ri 2 - RETQ $eax + RET64 $eax bb.5.lbl3: $eax = MOV32ri 4 - RETQ $eax + RET64 $eax bb.6.lbl4: $eax = MOV32ri 8 - RETQ $eax + RET64 $eax ... --- name: test_jumptable2 @@ -130,21 +130,21 @@ body: | bb.2.def: $eax = MOV32r0 implicit-def $eflags - RETQ $eax + RET64 $eax bb.3.lbl1: $eax = MOV32ri 1 - RETQ $eax + RET64 $eax bb.4.lbl2: $eax = MOV32ri 2 - RETQ $eax + RET64 $eax bb.5.lbl3: $eax = MOV32ri 4 - RETQ $eax + RET64 $eax bb.6.lbl4: $eax = MOV32ri 8 - RETQ $eax + RET64 $eax ... diff --git a/llvm/test/CodeGen/MIR/X86/jump-table-redefinition-error.mir b/llvm/test/CodeGen/MIR/X86/jump-table-redefinition-error.mir index fade2fd1230c5..a965f9938b2c1 100644 --- a/llvm/test/CodeGen/MIR/X86/jump-table-redefinition-error.mir +++ b/llvm/test/CodeGen/MIR/X86/jump-table-redefinition-error.mir @@ -56,21 +56,21 @@ body: | bb.2.def: $eax = MOV32r0 implicit-def $eflags - RETQ $eax + RET64 $eax bb.3.lbl1: $eax = MOV32ri 1 - RETQ $eax + RET64 $eax bb.4.lbl2: $eax = MOV32ri 2 - RETQ $eax + RET64 $eax bb.5.lbl3: $eax = MOV32ri 4 - RETQ $eax + RET64 $eax bb.6.lbl4: $eax = MOV32ri 8 - RETQ $eax + RET64 $eax ... diff --git a/llvm/test/CodeGen/MIR/X86/killed-register-flag.mir b/llvm/test/CodeGen/MIR/X86/killed-register-flag.mir index 38b524782e0d1..6641848c75be6 100644 --- a/llvm/test/CodeGen/MIR/X86/killed-register-flag.mir +++ b/llvm/test/CodeGen/MIR/X86/killed-register-flag.mir @@ -28,13 +28,13 @@ body: | bb.1.less: ; CHECK: $eax = MOV32r0 - ; CHECK-NEXT: RETQ killed $eax + ; CHECK-NEXT: RET64 killed $eax $eax = MOV32r0 implicit-def $eflags - RETQ killed $eax + RET64 killed $eax bb.2.exit: ; CHECK: $eax = COPY killed $edi - ; CHECK-NEXT: RETQ killed $eax + ; CHECK-NEXT: RET64 killed $eax $eax = COPY killed $edi - RETQ killed $eax + RET64 killed $eax ... diff --git a/llvm/test/CodeGen/MIR/X86/large-cfi-offset-number-error.mir b/llvm/test/CodeGen/MIR/X86/large-cfi-offset-number-error.mir index c36a67670dcc5..1d53de61d4356 100644 --- a/llvm/test/CodeGen/MIR/X86/large-cfi-offset-number-error.mir +++ b/llvm/test/CodeGen/MIR/X86/large-cfi-offset-number-error.mir @@ -22,6 +22,6 @@ body: | ; CHECK: [[@LINE+1]]:36: expected a 32 bit integer (the cfi offset is too large) CFI_INSTRUCTION def_cfa_offset 123456789123456 $rsp = ADD64ri32 $rsp, 4040, implicit-def dead $eflags - RETQ + RET64 ... diff --git a/llvm/test/CodeGen/MIR/X86/large-immediate-operand-error.mir b/llvm/test/CodeGen/MIR/X86/large-immediate-operand-error.mir index a59527230f4df..84e1c4fa8c12c 100644 --- a/llvm/test/CodeGen/MIR/X86/large-immediate-operand-error.mir +++ b/llvm/test/CodeGen/MIR/X86/large-immediate-operand-error.mir @@ -14,5 +14,5 @@ body: | bb.0.entry: ; CHECK: [[@LINE+1]]:20: integer literal is too large to be an immediate operand $eax = MOV32ri 12346127502983478823754212949184914 - RETQ $eax + RET64 $eax ... diff --git a/llvm/test/CodeGen/MIR/X86/large-index-number-error.mir b/llvm/test/CodeGen/MIR/X86/large-index-number-error.mir index d7bf528265c9d..ba68680089793 100644 --- a/llvm/test/CodeGen/MIR/X86/large-index-number-error.mir +++ b/llvm/test/CodeGen/MIR/X86/large-index-number-error.mir @@ -29,5 +29,5 @@ body: | $eax = MOV32r0 implicit-def $eflags bb.2: - RETQ $eax + RET64 $eax ... diff --git a/llvm/test/CodeGen/MIR/X86/large-offset-number-error.mir b/llvm/test/CodeGen/MIR/X86/large-offset-number-error.mir index 0c0f5d87b2e5a..2063f4ca98cb8 100644 --- a/llvm/test/CodeGen/MIR/X86/large-offset-number-error.mir +++ b/llvm/test/CodeGen/MIR/X86/large-offset-number-error.mir @@ -20,5 +20,5 @@ body: | $rax = MOV64rm $rip, 1, _, @G + 123456789123456789123456789, _ $eax = MOV32rm $rax, 1, _, 0, _ $eax = INC32r $eax implicit-def $eflags - RETQ $eax + RET64 $eax ... diff --git a/llvm/test/CodeGen/MIR/X86/large-size-in-memory-operand-error.mir b/llvm/test/CodeGen/MIR/X86/large-size-in-memory-operand-error.mir index 59bd717aeadd4..b0fae40f7fdf1 100644 --- a/llvm/test/CodeGen/MIR/X86/large-size-in-memory-operand-error.mir +++ b/llvm/test/CodeGen/MIR/X86/large-size-in-memory-operand-error.mir @@ -19,6 +19,6 @@ body: | liveins: $rdi ; CHECK: [[@LINE+1]]:53: expected 64-bit integer (too large) $eax = MOV32rm killed $rdi, 1, _, 0, _ :: (load 12345678912345678924218574857 from %ir.a) - RETQ $eax + RET64 $eax ... diff --git a/llvm/test/CodeGen/MIR/X86/liveout-register-mask.mir b/llvm/test/CodeGen/MIR/X86/liveout-register-mask.mir index 6db52cdaee0dd..956a436a8557c 100644 --- a/llvm/test/CodeGen/MIR/X86/liveout-register-mask.mir +++ b/llvm/test/CodeGen/MIR/X86/liveout-register-mask.mir @@ -38,5 +38,5 @@ body: | ; CHECK: PATCHPOINT 5, 5, 0, 2, 0, $rdi, $rsi, csr_64, liveout($esp, $rsp, $sp, $spl), PATCHPOINT 5, 5, 0, 2, 0, $rdi, $rsi, csr_64, liveout($esp, $rsp, $sp, $spl), implicit-def dead early-clobber $r11, implicit-def $rsp, implicit-def dead $rax $rbp = POP64r implicit-def $rsp, implicit $rsp - RETQ + RET64 ... diff --git a/llvm/test/CodeGen/MIR/X86/load-with-max-alignment.mir b/llvm/test/CodeGen/MIR/X86/load-with-max-alignment.mir index adb4059eb7e3d..fc6a81fcef02d 100644 --- a/llvm/test/CodeGen/MIR/X86/load-with-max-alignment.mir +++ b/llvm/test/CodeGen/MIR/X86/load-with-max-alignment.mir @@ -21,7 +21,7 @@ body: | liveins: $rdi renamable $eax = MOV32rm killed renamable $rdi, 1, $noreg, 0, $noreg :: (load (s32) from unknown-address, align 4294967296) - RETQ $eax + RET64 $eax ... --- @@ -39,6 +39,6 @@ body: | liveins: $rdi renamable $eax = MOV32rm killed renamable $rdi, 1, $noreg, 0, $noreg :: (load (s32) from unknown-address + 4, basealign 4294967296) - RETQ $eax + RET64 $eax ... diff --git a/llvm/test/CodeGen/MIR/X86/machine-basic-block-operands.mir b/llvm/test/CodeGen/MIR/X86/machine-basic-block-operands.mir index 027482debe18c..e4e3a52d23117 100644 --- a/llvm/test/CodeGen/MIR/X86/machine-basic-block-operands.mir +++ b/llvm/test/CodeGen/MIR/X86/machine-basic-block-operands.mir @@ -49,7 +49,7 @@ body: | $eax = MOV32r0 implicit-def $eflags bb.2.exit: - RETQ $eax + RET64 $eax ... --- # CHECK: name: bar @@ -69,5 +69,5 @@ body: | $eax = MOV32r0 implicit-def $eflags bb.3: - RETQ $eax + RET64 $eax ... diff --git a/llvm/test/CodeGen/MIR/X86/machine-instructions.mir b/llvm/test/CodeGen/MIR/X86/machine-instructions.mir index 1989ee35fb507..38d80ebf304ab 100644 --- a/llvm/test/CodeGen/MIR/X86/machine-instructions.mir +++ b/llvm/test/CodeGen/MIR/X86/machine-instructions.mir @@ -17,7 +17,7 @@ name: inc body: | bb.0.entry: ; CHECK: MOV32rr - ; CHECK-NEXT: RETQ + ; CHECK-NEXT: RET64 $eax = MOV32rr $eax - RETQ $eax + RET64 $eax ... diff --git a/llvm/test/CodeGen/MIR/X86/machine-verifier.mir b/llvm/test/CodeGen/MIR/X86/machine-verifier.mir index 17aa16b4a4316..5cf5e8f0adc92 100644 --- a/llvm/test/CodeGen/MIR/X86/machine-verifier.mir +++ b/llvm/test/CodeGen/MIR/X86/machine-verifier.mir @@ -19,5 +19,5 @@ body: | ; CHECK: instruction: COPY ; CHECK: 2 operands expected, but 0 given. COPY - RETQ + RET64 ... diff --git a/llvm/test/CodeGen/MIR/X86/memory-operands.mir b/llvm/test/CodeGen/MIR/X86/memory-operands.mir index 483764a915e22..b136ef0a01370 100644 --- a/llvm/test/CodeGen/MIR/X86/memory-operands.mir +++ b/llvm/test/CodeGen/MIR/X86/memory-operands.mir @@ -205,7 +205,7 @@ body: | ; CHECK-NEXT: MOV32mi killed $rdi, 1, $noreg, 0, $noreg, 42 :: (store (s32) into %ir.a) $eax = MOV32rm $rdi, 1, _, 0, _ :: (load (s32) from %ir.a) MOV32mi killed $rdi, 1, _, 0, _, 42 :: (store (s32) into %ir.a) - RETQ $eax + RET64 $eax ... --- name: test2 @@ -217,7 +217,7 @@ body: | liveins: $rdi ; CHECK: INC32m killed $rdi, 1, $noreg, 0, $noreg, implicit-def dead $eflags :: (store (s32) into %ir."a value"), (load (s32) from %ir."a value") INC32m killed $rdi, 1, _, 0, _, implicit-def dead $eflags :: (store (s32) into %ir."a value"), (load (s32) from %ir."a value") - RETQ + RET64 ... --- name: test3 @@ -238,7 +238,7 @@ body: | $eax = MOV32rm killed $rdi, 1, _, 0, _ :: (load (s32) from %ir.0) $eax = INC32r killed $eax, implicit-def dead $eflags MOV32mr $rsp, 1, _, -4, _, killed $eax :: (store (s32) into %ir.1) - RETQ + RET64 ... --- name: volatile_inc @@ -254,7 +254,7 @@ body: | $eax = MOV32rm $rdi, 1, _, 0, _ :: (volatile load (s32) from %ir.x) $eax = INC32r killed $eax, implicit-def dead $eflags MOV32mr killed $rdi, 1, _, 0, _, $eax :: (volatile store (s32) into %ir.x) - RETQ $eax + RET64 $eax ... --- name: non_temporal_store @@ -268,7 +268,7 @@ body: | ; CHECK: name: non_temporal_store ; CHECK: MOVNTImr killed $rdi, 1, $noreg, 0, $noreg, killed $esi :: (non-temporal store (s32) into %ir.a) MOVNTImr killed $rdi, 1, _, 0, _, killed $esi :: (non-temporal store (s32) into %ir.a) - RETQ + RET64 ... --- name: invariant_load @@ -281,7 +281,7 @@ body: | ; CHECK: name: invariant_load ; CHECK: $eax = MOV32rm killed $rdi, 1, $noreg, 0, $noreg :: (invariant load (s32) from %ir.x) $eax = MOV32rm killed $rdi, 1, _, 0, _ :: (invariant load (s32) from %ir.x) - RETQ $eax + RET64 $eax ... --- name: memory_offset @@ -302,7 +302,7 @@ body: | $xmm1 = MOVSSrr killed $xmm1, killed $xmm2 MOVAPSmr $rdi, 1, _, 0, _, killed $xmm0 :: (store (s128) into %ir.vec) MOVAPSmr killed $rdi, 1, _, 16, _, killed $xmm1 :: (store (s128) into %ir.vec + 16) - RETQ + RET64 ... --- name: memory_alignment @@ -331,7 +331,7 @@ body: | MOVAPSmr $rdi, 1, _, 16, _, killed $xmm1 :: (store (s128) into %ir.vec + 16, basealign 64) MOVAPSmr $rdi, 1, _, 32, _, killed $xmm2 :: (store (s128) into %ir.vec + 32, align 32, basealign 64) MOVAPSmr killed $rdi, 1, _, 48, _, killed $xmm3 :: (store (s128) into %ir.vec + 48, basealign 64) - RETQ + RET64 ... --- name: constant_pool_psv @@ -349,7 +349,7 @@ body: | ; CHECK-NEXT: $xmm0 = ADDSDrm killed $xmm0, $rip, 1, $noreg, %const.0, $noreg, implicit $mxcsr :: (load (s64) from constant-pool + 8) $xmm0 = ADDSDrm killed $xmm0, $rip, 1, _, %const.0, _, implicit $mxcsr :: (load (s64) from constant-pool) $xmm0 = ADDSDrm killed $xmm0, $rip, 1, _, %const.0, _, implicit $mxcsr :: (load (s64) from constant-pool + 8) - RETQ $xmm0 + RET64 $xmm0 ... --- name: stack_psv @@ -372,7 +372,7 @@ body: | ST_FP80m $rsp, 1, _, 0, _, implicit-def dead $fpsw, implicit $fpcw :: (store (s80) into stack, align 16) CALL64pcrel32 &cosl, csr_64, implicit $rsp, implicit-def $rsp, implicit-def $fp0 $rsp = ADD64ri8 $rsp, 24, implicit-def dead $eflags - RETQ + RET64 ... --- name: got_psv @@ -384,7 +384,7 @@ body: | $rax = MOV64rm $rip, 1, _, @G, _ :: (load (s64) from got) $eax = MOV32rm killed $rax, 1, _, 0, _ $eax = INC32r killed $eax, implicit-def dead $eflags - RETQ $eax + RET64 $eax ... --- name: global_value @@ -399,7 +399,7 @@ body: | $rcx = MOV64rm $rip, 1, _, @0, _ $ecx = MOV32rm killed $rcx, 1, _, 0, _, implicit-def $rcx :: (load (s32) from @0) $eax = LEA64_32r killed $rax, 1, killed $rcx, 1, _ - RETQ $eax + RET64 $eax ... --- name: jumptable_psv @@ -433,23 +433,23 @@ body: | bb.2.def: $eax = MOV32r0 implicit-def dead $eflags - RETQ $eax + RET64 $eax bb.3.lbl1: $eax = MOV32ri 1 - RETQ $eax + RET64 $eax bb.4.lbl2: $eax = MOV32ri 2 - RETQ $eax + RET64 $eax bb.5.lbl3: $eax = MOV32ri 4 - RETQ $eax + RET64 $eax bb.6.lbl4: $eax = MOV32ri 8 - RETQ $eax + RET64 $eax ... --- name: tbaa_metadata @@ -462,7 +462,7 @@ body: | ; CHECK-NEXT: $eax = MOV32rm killed $rax, 1, $noreg, 0, $noreg :: (load (s32) from %ir.total_len2, !tbaa !6) $eax = MOV32rm killed $rax, 1, _, 0, _, implicit-def $rax :: (load (s32) from @a, !tbaa !2) $eax = MOV32rm killed $rax, 1, _, 0, _ :: (load (s32) from %ir.total_len2, !tbaa !6) - RETQ $eax + RET64 $eax ... --- name: aa_scope @@ -480,7 +480,7 @@ body: | MOVSSmr $rdi, 1, _, 20, _, killed $xmm0 :: (store (s32) into %ir.arrayidx.i, !noalias !9) $xmm0 = MOVSSrm_alt killed $rsi, 1, _, 0, _ :: (load (s32) from %ir.c) MOVSSmr killed $rdi, 1, _, 28, _, killed $xmm0 :: (store (s32) into %ir.arrayidx) - RETQ + RET64 ... --- name: range_metadata @@ -493,7 +493,7 @@ body: | ; CHECK-LABEL: name: range_metadata ; CHECK: $al = MOV8rm killed $rdi, 1, $noreg, 0, $noreg :: (load (s8) from %ir.x, !range !11) $al = MOV8rm killed $rdi, 1, _, 0, _ :: (load (s8) from %ir.x, !range !11) - RETQ $al + RET64 $al ... --- name: gep_value @@ -508,7 +508,7 @@ body: | ; CHECK-LABEL: gep_value ; CHECK: MOV32mr killed $rax, 1, $noreg, 0, $noreg, $edi, implicit killed $rdi :: (store (s32) into `i32* getelementptr inbounds ([50 x %st], [50 x %st]* @values, i64 0, i64 0, i32 0)`, align 16) MOV32mr killed $rax, 1, _, 0, _, $edi, implicit killed $rdi :: (store (s32) into `i32* getelementptr inbounds ([50 x %st], [50 x %st]* @values, i64 0, i64 0, i32 0)`, align 16) - RETQ + RET64 ... --- name: undef_value @@ -518,7 +518,7 @@ body: | ; CHECK-LABEL: name: undef_value ; CHECK: $rax = MOV64rm undef $rax, 1, $noreg, 0, $noreg :: (load (s64) from `i8** undef`) $rax = MOV64rm undef $rax, 1, _, 0, _ :: (load (s64) from `i8** undef`) - RETQ $rax + RET64 $rax ... --- # Test memory operand without associated value. @@ -529,7 +529,7 @@ tracksRegLiveness: true body: | bb.0: $rax = MOV64rm undef $rax, 1, _, 0, _ :: (load (s64)) - RETQ $rax + RET64 $rax ... --- # Test parsing of stack references in machine memory operands. @@ -542,7 +542,7 @@ stack: body: | bb.0: $rax = MOV64rm $rsp, 1, _, 0, _ :: (load (s64) from %stack.0) - RETQ $rax + RET64 $rax ... --- # Test parsing of unknown size in machine memory operands without alignment. @@ -555,7 +555,7 @@ stack: body: | bb.0: $rax = MOV64rm $rsp, 1, _, 0, _ :: (load unknown-size from %stack.0) - RETQ $rax + RET64 $rax ... --- # Test parsing of unknown size in machine memory operands with alignment. @@ -568,5 +568,5 @@ stack: body: | bb.0: $rax = MOV64rm $rsp, 1, _, 0, _ :: (load unknown-size from %stack.0, align 4) - RETQ $rax + RET64 $rax ... diff --git a/llvm/test/CodeGen/MIR/X86/metadata-operands.mir b/llvm/test/CodeGen/MIR/X86/metadata-operands.mir index 5375485d320ce..fe1f21efacd7c 100644 --- a/llvm/test/CodeGen/MIR/X86/metadata-operands.mir +++ b/llvm/test/CodeGen/MIR/X86/metadata-operands.mir @@ -56,5 +56,5 @@ body: | DBG_VALUE _, 0, !12, !DIExpression(), debug-location !13 MOV32mr %stack.0.x.addr, 1, _, 0, _, %0 $eax = COPY %0 - RETQ $eax + RET64 $eax ... diff --git a/llvm/test/CodeGen/MIR/X86/missing-closing-quote.mir b/llvm/test/CodeGen/MIR/X86/missing-closing-quote.mir index 22f8ede90ba2f..ffbbf4f6591e4 100644 --- a/llvm/test/CodeGen/MIR/X86/missing-closing-quote.mir +++ b/llvm/test/CodeGen/MIR/X86/missing-closing-quote.mir @@ -18,5 +18,5 @@ body: | ; CHECK: [[@LINE+1]]:48: end of machine instruction reached before the closing '"' $rax = MOV64rm $rip, 1, _, @"quoted name, _ $eax = MOV32rm killed $rax, 1, _, 0, _ - RETQ $eax + RET64 $eax ... diff --git a/llvm/test/CodeGen/MIR/X86/missing-comma.mir b/llvm/test/CodeGen/MIR/X86/missing-comma.mir index c87f5e476df7b..3500f05966fc5 100644 --- a/llvm/test/CodeGen/MIR/X86/missing-comma.mir +++ b/llvm/test/CodeGen/MIR/X86/missing-comma.mir @@ -14,6 +14,6 @@ body: | bb.0.entry: ; CHECK: [[@LINE+1]]:25: expected ',' before the next machine operand $eax = XOR32rr $eax $eflags - RETQ $eax + RET64 $eax ... diff --git a/llvm/test/CodeGen/MIR/X86/missing-implicit-operand.mir b/llvm/test/CodeGen/MIR/X86/missing-implicit-operand.mir index 4bb1c25574333..4bc09b6a4a3f9 100644 --- a/llvm/test/CodeGen/MIR/X86/missing-implicit-operand.mir +++ b/llvm/test/CodeGen/MIR/X86/missing-implicit-operand.mir @@ -34,5 +34,5 @@ body: | $eax = MOV32r0 implicit-def $eflags bb.2.exit: - RETQ $eax + RET64 $eax ... diff --git a/llvm/test/CodeGen/MIR/X86/named-registers.mir b/llvm/test/CodeGen/MIR/X86/named-registers.mir index 1bb67225d38b7..9d4dd8b8427a7 100644 --- a/llvm/test/CodeGen/MIR/X86/named-registers.mir +++ b/llvm/test/CodeGen/MIR/X86/named-registers.mir @@ -15,7 +15,7 @@ name: foo body: | bb.0.entry: ; CHECK: $eax = MOV32r0 - ; CHECK-NEXT: RETQ $eax + ; CHECK-NEXT: RET64 $eax $eax = MOV32r0 implicit-def $eflags - RETQ $eax + RET64 $eax ... diff --git a/llvm/test/CodeGen/MIR/X86/newline-handling.mir b/llvm/test/CodeGen/MIR/X86/newline-handling.mir index f8acb61bf2757..8b425a2f98cce 100644 --- a/llvm/test/CodeGen/MIR/X86/newline-handling.mir +++ b/llvm/test/CodeGen/MIR/X86/newline-handling.mir @@ -42,12 +42,12 @@ liveins: # CHECK: bb.1.less: # CHECK-NEXT: $eax = MOV32r0 implicit-def dead $eflags -# CHECK-NEXT: RETQ killed $eax +# CHECK-NEXT: RET64 killed $eax # CHECK: bb.2.exit: # CHECK-NEXT: liveins: $edi # CHECK: $eax = COPY killed $edi -# CHECK-NEXT: RETQ killed $eax +# CHECK-NEXT: RET64 killed $eax body: | bb.0.entry: successors: %bb.1, %bb.2 @@ -62,13 +62,13 @@ body: | $eax = MOV32r0 implicit-def dead $eflags - RETQ killed $eax + RET64 killed $eax bb.2.exit: liveins: $edi $eax = COPY killed $edi - RETQ killed $eax + RET64 killed $eax ... --- @@ -86,12 +86,12 @@ liveins: # CHECK: bb.1.less: # CHECK-NEXT: $eax = MOV32r0 implicit-def dead $eflags -# CHECK-NEXT: RETQ killed $eax +# CHECK-NEXT: RET64 killed $eax # CHECK: bb.2.exit: # CHECK-NEXT: liveins: $edi # CHECK: $eax = COPY killed $edi -# CHECK-NEXT: RETQ killed $eax +# CHECK-NEXT: RET64 killed $eax body: | bb.0.entry: @@ -100,10 +100,10 @@ body: | CMP32ri8 $edi, 10, implicit-def $eflags JCC_1 %bb.2, 15, implicit killed $eflags bb.1.less: $eax = MOV32r0 implicit-def dead $eflags - RETQ killed $eax + RET64 killed $eax bb.2.exit: liveins: $edi $eax = COPY killed $edi - RETQ killed $eax + RET64 killed $eax ... diff --git a/llvm/test/CodeGen/MIR/X86/null-register-operands.mir b/llvm/test/CodeGen/MIR/X86/null-register-operands.mir index 35b02cf3464d5..f64ba1b78721a 100644 --- a/llvm/test/CodeGen/MIR/X86/null-register-operands.mir +++ b/llvm/test/CodeGen/MIR/X86/null-register-operands.mir @@ -16,7 +16,7 @@ name: deref body: | bb.0.entry: ; CHECK: $eax = MOV32rm $rdi, 1, $noreg, 0, $noreg - ; CHECK-NEXT: RETQ $eax + ; CHECK-NEXT: RET64 $eax $eax = MOV32rm $rdi, 1, _, 0, $noreg - RETQ $eax + RET64 $eax ... diff --git a/llvm/test/CodeGen/MIR/X86/register-mask-operands.mir b/llvm/test/CodeGen/MIR/X86/register-mask-operands.mir index 7e88b28d07898..761d44d84ffb4 100644 --- a/llvm/test/CodeGen/MIR/X86/register-mask-operands.mir +++ b/llvm/test/CodeGen/MIR/X86/register-mask-operands.mir @@ -23,7 +23,7 @@ name: compute body: | bb.0.body: $eax = IMUL32rri8 $edi, 11, implicit-def $eflags - RETQ $eax + RET64 $eax ... --- # CHECK: name: foo @@ -35,5 +35,5 @@ body: | PUSH64r $rax, implicit-def $rsp, implicit $rsp CALL64pcrel32 @compute, csr_64, implicit $rsp, implicit $edi, implicit-def $rsp, implicit-def $eax $rdx = POP64r implicit-def $rsp, implicit $rsp - RETQ $eax + RET64 $eax ... diff --git a/llvm/test/CodeGen/MIR/X86/register-operands-target-flag-error.mir b/llvm/test/CodeGen/MIR/X86/register-operands-target-flag-error.mir index 530a3bff97a59..b6bcb778bb96b 100644 --- a/llvm/test/CodeGen/MIR/X86/register-operands-target-flag-error.mir +++ b/llvm/test/CodeGen/MIR/X86/register-operands-target-flag-error.mir @@ -20,5 +20,5 @@ body: | $rax = MOV64rm target-flags(x86-got) $rip, 1, _, @G, _ $eax = MOV32rm killed $rax, 1, _, 0, _ $eax = INC32r killed $eax, implicit-def dead $eflags - RETQ $eax + RET64 $eax ... diff --git a/llvm/test/CodeGen/MIR/X86/renamable-register-flag.mir b/llvm/test/CodeGen/MIR/X86/renamable-register-flag.mir index 5b2f482449ce8..3a773b1320862 100644 --- a/llvm/test/CodeGen/MIR/X86/renamable-register-flag.mir +++ b/llvm/test/CodeGen/MIR/X86/renamable-register-flag.mir @@ -12,5 +12,5 @@ body: | bb.0: ; CHECK: renamable $eax = IMUL32rri8 $edi, 11, implicit-def dead $eflags renamable $eax = IMUL32rri8 $edi, 11, implicit-def dead $eflags - RETQ $eax + RET64 $eax ... diff --git a/llvm/test/CodeGen/MIR/X86/roundtrip.mir b/llvm/test/CodeGen/MIR/X86/roundtrip.mir index b703be0a827ee..46f08ad1a214d 100644 --- a/llvm/test/CodeGen/MIR/X86/roundtrip.mir +++ b/llvm/test/CodeGen/MIR/X86/roundtrip.mir @@ -9,12 +9,12 @@ # CHECK: %0:gr32 = MOV32r0 implicit-def $eflags # CHECK: dead %1:gr32 = COPY %0 # CHECK: MOV32mr undef $rcx, 1, $noreg, 0, $noreg, killed %0 :: (volatile store (s32)) -# CHECK: RETQ undef $eax +# CHECK: RET64 undef $eax name: func0 body: | bb.0: %0 : gr32 = MOV32r0 implicit-def $eflags dead %1 : gr32 = COPY %0 MOV32mr undef $rcx, 1, _, 0, _, killed %0 :: (volatile store (s32)) - RETQ undef $eax + RET64 undef $eax ... diff --git a/llvm/test/CodeGen/MIR/X86/simple-register-allocation-hints.mir b/llvm/test/CodeGen/MIR/X86/simple-register-allocation-hints.mir index 5cae2a20c3a5b..84d298dbd4070 100644 --- a/llvm/test/CodeGen/MIR/X86/simple-register-allocation-hints.mir +++ b/llvm/test/CodeGen/MIR/X86/simple-register-allocation-hints.mir @@ -30,5 +30,5 @@ body: | %2 = COPY $edi %2 = IMUL32rr %2, %1, implicit-def dead $eflags $eax = COPY %2 - RETQ killed $eax + RET64 killed $eax ... diff --git a/llvm/test/CodeGen/MIR/X86/spill-slot-fixed-stack-object-aliased.mir b/llvm/test/CodeGen/MIR/X86/spill-slot-fixed-stack-object-aliased.mir index 13c252316777c..9063d296f93a2 100644 --- a/llvm/test/CodeGen/MIR/X86/spill-slot-fixed-stack-object-aliased.mir +++ b/llvm/test/CodeGen/MIR/X86/spill-slot-fixed-stack-object-aliased.mir @@ -26,5 +26,5 @@ body: | bb.0.entry: MOV32mr %rsp, 1, _, -4, _, %edi %eax = COPY %edi - RETQ %eax + RET64 %eax ... diff --git a/llvm/test/CodeGen/MIR/X86/spill-slot-fixed-stack-object-immutable.mir b/llvm/test/CodeGen/MIR/X86/spill-slot-fixed-stack-object-immutable.mir index dba4a0b42a680..430f0143c78b1 100644 --- a/llvm/test/CodeGen/MIR/X86/spill-slot-fixed-stack-object-immutable.mir +++ b/llvm/test/CodeGen/MIR/X86/spill-slot-fixed-stack-object-immutable.mir @@ -26,5 +26,5 @@ body: | bb.0.entry: MOV32mr %rsp, 1, _, -4, _, %edi %eax = COPY %edi - RETQ %eax + RET64 %eax ... diff --git a/llvm/test/CodeGen/MIR/X86/spill-slot-fixed-stack-objects.mir b/llvm/test/CodeGen/MIR/X86/spill-slot-fixed-stack-objects.mir index f17a42359ab11..1f7c200848a89 100644 --- a/llvm/test/CodeGen/MIR/X86/spill-slot-fixed-stack-objects.mir +++ b/llvm/test/CodeGen/MIR/X86/spill-slot-fixed-stack-objects.mir @@ -30,5 +30,5 @@ body: | bb.0.entry: MOV32mr $rsp, 1, _, -4, _, $edi $eax = COPY $edi - RETQ $eax + RET64 $eax ... diff --git a/llvm/test/CodeGen/MIR/X86/stack-object-invalid-name.mir b/llvm/test/CodeGen/MIR/X86/stack-object-invalid-name.mir index 3a72dc4521e0b..54d01563db451 100644 --- a/llvm/test/CodeGen/MIR/X86/stack-object-invalid-name.mir +++ b/llvm/test/CodeGen/MIR/X86/stack-object-invalid-name.mir @@ -24,5 +24,5 @@ body: | bb.0.entry: MOV32mr $rsp, 1, _, -4, _, $edi $eax = MOV32rm $rsp, 1, _, -4, _ - RETQ $eax + RET64 $eax ... diff --git a/llvm/test/CodeGen/MIR/X86/stack-object-operand-name-mismatch-error.mir b/llvm/test/CodeGen/MIR/X86/stack-object-operand-name-mismatch-error.mir index 91b7951bf3680..66d84017e4975 100644 --- a/llvm/test/CodeGen/MIR/X86/stack-object-operand-name-mismatch-error.mir +++ b/llvm/test/CodeGen/MIR/X86/stack-object-operand-name-mismatch-error.mir @@ -28,5 +28,5 @@ body: | ; CHECK: [[@LINE+1]]:13: the name of the stack object '%stack.0' isn't 'x' MOV32mr %stack.0.x, 1, _, 0, _, %0 $eax = COPY %0 - RETQ $eax + RET64 $eax ... diff --git a/llvm/test/CodeGen/MIR/X86/stack-object-operands.mir b/llvm/test/CodeGen/MIR/X86/stack-object-operands.mir index 76be7e074cdd8..d6dac85f21cb4 100644 --- a/llvm/test/CodeGen/MIR/X86/stack-object-operands.mir +++ b/llvm/test/CodeGen/MIR/X86/stack-object-operands.mir @@ -37,11 +37,11 @@ body: | ; CHECK: MOV32mi %stack.1, 1, $noreg, 0, $noreg, 2 ; CHECK: [[MOV32rm1:%[0-9]+]]:gr32 = MOV32rm %stack.0.b, 1, $noreg, 0, $noreg ; CHECK: $eax = COPY [[MOV32rm1]] - ; CHECK: RETL $eax + ; CHECK: RET32 $eax %0 = MOV32rm %fixed-stack.0, 1, _, 0, _ MOV32mr %stack.0.b, 1, _, 0, _, %0 MOV32mi %stack.1, 1, _, 0, _, 2 %1 = MOV32rm %stack.0, 1, _, 0, _ $eax = COPY %1 - RETL $eax + RET32 $eax ... diff --git a/llvm/test/CodeGen/MIR/X86/stack-object-redefinition-error.mir b/llvm/test/CodeGen/MIR/X86/stack-object-redefinition-error.mir index 4e734d7416351..ad6b9da529843 100644 --- a/llvm/test/CodeGen/MIR/X86/stack-object-redefinition-error.mir +++ b/llvm/test/CodeGen/MIR/X86/stack-object-redefinition-error.mir @@ -33,5 +33,5 @@ body: | MOV32mr $rsp, 1, _, -4, _, killed $edi MOV64mi32 $rsp, 1, _, -16, _, 2 $eax = MOV32rm $rsp, 1, _, -4, _ - RETQ $eax + RET64 $eax ... diff --git a/llvm/test/CodeGen/MIR/X86/stack-objects.mir b/llvm/test/CodeGen/MIR/X86/stack-objects.mir index ec3c099646ccc..282c6102ffb7d 100644 --- a/llvm/test/CodeGen/MIR/X86/stack-objects.mir +++ b/llvm/test/CodeGen/MIR/X86/stack-objects.mir @@ -39,5 +39,5 @@ body: | MOV32mr $rsp, 1, _, -4, _, $edi MOV64mi32 $rsp, 1, _, -16, _, 2 $eax = MOV32rm $rsp, 1, _, -4, _ - RETQ $eax + RET64 $eax ... diff --git a/llvm/test/CodeGen/MIR/X86/standalone-register-error.mir b/llvm/test/CodeGen/MIR/X86/standalone-register-error.mir index 009e514e770e1..30df44da56c65 100644 --- a/llvm/test/CodeGen/MIR/X86/standalone-register-error.mir +++ b/llvm/test/CodeGen/MIR/X86/standalone-register-error.mir @@ -19,5 +19,5 @@ body: | %0 = COPY $edi $eax = COPY %0 - RETQ $eax + RET64 $eax ... diff --git a/llvm/test/CodeGen/MIR/X86/subregister-index-operands.mir b/llvm/test/CodeGen/MIR/X86/subregister-index-operands.mir index b1e96779c3f71..383499f3650f9 100644 --- a/llvm/test/CodeGen/MIR/X86/subregister-index-operands.mir +++ b/llvm/test/CodeGen/MIR/X86/subregister-index-operands.mir @@ -25,10 +25,10 @@ body: | ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:gr32 = INSERT_SUBREG $edi, $al, %subreg.sub_8bit ; CHECK: [[EXTRACT_SUBREG:%[0-9]+]]:gr8 = EXTRACT_SUBREG $eax, %subreg.sub_8bit_hi ; CHECK: $ax = REG_SEQUENCE [[EXTRACT_SUBREG]], %subreg.sub_8bit, [[EXTRACT_SUBREG]], %subreg.sub_8bit_hi - ; CHECK: RETQ $ax + ; CHECK: RET64 $ax %0 = INSERT_SUBREG $edi, $al, %subreg.sub_8bit %1 = EXTRACT_SUBREG $eax, %subreg.sub_8bit_hi $ax = REG_SEQUENCE %1, %subreg.sub_8bit, %1, %subreg.sub_8bit_hi - RETQ $ax + RET64 $ax ... diff --git a/llvm/test/CodeGen/MIR/X86/subregister-operands.mir b/llvm/test/CodeGen/MIR/X86/subregister-operands.mir index 3361deb437c02..a7d854fde1ed5 100644 --- a/llvm/test/CodeGen/MIR/X86/subregister-operands.mir +++ b/llvm/test/CodeGen/MIR/X86/subregister-operands.mir @@ -27,11 +27,11 @@ body: | ; CHECK: [[COPY1:%[0-9]+]]:gr8 = COPY [[COPY]].sub_8bit ; CHECK: [[AND8ri:%[0-9]+]]:gr8 = AND8ri [[COPY1]], 1, implicit-def $eflags ; CHECK: $al = COPY [[AND8ri]] - ; CHECK: RETQ $al + ; CHECK: RET64 $al %0 = COPY $edi %1 = COPY %0.sub_8bit %2 = AND8ri %1, 1, implicit-def $eflags $al = COPY %2 - RETQ $al + RET64 $al ... diff --git a/llvm/test/CodeGen/MIR/X86/successor-basic-blocks-weights.mir b/llvm/test/CodeGen/MIR/X86/successor-basic-blocks-weights.mir index dd6dcef5ef1e5..54fbd01eae7a5 100644 --- a/llvm/test/CodeGen/MIR/X86/successor-basic-blocks-weights.mir +++ b/llvm/test/CodeGen/MIR/X86/successor-basic-blocks-weights.mir @@ -32,11 +32,11 @@ body: | bb.1.less: $eax = MOV32r0 implicit-def dead $eflags - RETQ killed $eax + RET64 killed $eax bb.2.exit: liveins: $edi $eax = COPY killed $edi - RETQ killed $eax + RET64 killed $eax ... diff --git a/llvm/test/CodeGen/MIR/X86/successor-basic-blocks.mir b/llvm/test/CodeGen/MIR/X86/successor-basic-blocks.mir index 0f541aa03b2fa..d08919692a49d 100644 --- a/llvm/test/CodeGen/MIR/X86/successor-basic-blocks.mir +++ b/llvm/test/CodeGen/MIR/X86/successor-basic-blocks.mir @@ -42,13 +42,13 @@ body: | bb.1.less: $eax = MOV32r0 implicit-def dead $eflags - RETQ killed $eax + RET64 killed $eax bb.2.exit: liveins: $edi $eax = COPY killed $edi - RETQ killed $eax + RET64 killed $eax ... --- name: bar @@ -72,11 +72,11 @@ body: | bb.1: successors: $eax = MOV32r0 implicit-def dead $eflags - RETQ killed $eax + RET64 killed $eax bb.2: liveins: $edi $eax = COPY killed $edi - RETQ killed $eax + RET64 killed $eax ... diff --git a/llvm/test/CodeGen/MIR/X86/tied-def-operand-invalid.mir b/llvm/test/CodeGen/MIR/X86/tied-def-operand-invalid.mir index 412db1dfd2215..7509b31964e5a 100644 --- a/llvm/test/CodeGen/MIR/X86/tied-def-operand-invalid.mir +++ b/llvm/test/CodeGen/MIR/X86/tied-def-operand-invalid.mir @@ -20,5 +20,5 @@ body: | ; CHECK: [[@LINE+1]]:58: use of invalid tied-def operand index '0'; the operand #0 isn't a defined register INLINEASM &"$foo", 1, 2818058, def $rdi, 2147483657, killed $rdi(tied-def 0) $rax = COPY killed $rdi - RETQ killed $rax + RET64 killed $rax ... diff --git a/llvm/test/CodeGen/MIR/X86/tied-physical-regs-match.mir b/llvm/test/CodeGen/MIR/X86/tied-physical-regs-match.mir index 3d842f66f0aa7..1e4104e8ebda0 100644 --- a/llvm/test/CodeGen/MIR/X86/tied-physical-regs-match.mir +++ b/llvm/test/CodeGen/MIR/X86/tied-physical-regs-match.mir @@ -18,5 +18,5 @@ body: | ; CHECK: Tied physical registers must match. $rbx = AND64rm killed $rdx, killed $rdi, 1, _, 0, _, implicit-def dead $eflags - RETQ $rbx + RET64 $rbx ... diff --git a/llvm/test/CodeGen/MIR/X86/undef-register-flag.mir b/llvm/test/CodeGen/MIR/X86/undef-register-flag.mir index 2464959db6255..e0220cfdb706e 100644 --- a/llvm/test/CodeGen/MIR/X86/undef-register-flag.mir +++ b/llvm/test/CodeGen/MIR/X86/undef-register-flag.mir @@ -24,7 +24,7 @@ name: compute body: | bb.0.body: $eax = IMUL32rri8 $edi, 11, implicit-def $eflags - RETQ $eax + RET64 $eax ... --- name: foo @@ -34,5 +34,5 @@ body: | PUSH64r undef $rax, implicit-def $rsp, implicit $rsp CALL64pcrel32 @compute, csr_64, implicit $rsp, implicit $edi, implicit-def $rsp, implicit-def $eax $rdx = POP64r implicit-def $rsp, implicit $rsp - RETQ $eax + RET64 $eax ... diff --git a/llvm/test/CodeGen/MIR/X86/undefined-fixed-stack-object.mir b/llvm/test/CodeGen/MIR/X86/undefined-fixed-stack-object.mir index 7462290472ae9..d7344cb02f6e5 100644 --- a/llvm/test/CodeGen/MIR/X86/undefined-fixed-stack-object.mir +++ b/llvm/test/CodeGen/MIR/X86/undefined-fixed-stack-object.mir @@ -33,5 +33,5 @@ body: | MOV32mi %stack.1, 1, _, 0, _, 2 %1 = MOV32rm %stack.0, 1, _, 0, _ $eax = COPY %1 - RETL $eax + RET32 $eax ... diff --git a/llvm/test/CodeGen/MIR/X86/undefined-global-value.mir b/llvm/test/CodeGen/MIR/X86/undefined-global-value.mir index 881e114b25ab9..ec27a408933a2 100644 --- a/llvm/test/CodeGen/MIR/X86/undefined-global-value.mir +++ b/llvm/test/CodeGen/MIR/X86/undefined-global-value.mir @@ -22,5 +22,5 @@ body: | $rax = MOV64rm $rip, 1, _, @2, _ $eax = MOV32rm $rax, 1, _, 0, _ $eax = INC32r $eax - RETQ $eax + RET64 $eax ... diff --git a/llvm/test/CodeGen/MIR/X86/undefined-ir-block-in-blockaddress.mir b/llvm/test/CodeGen/MIR/X86/undefined-ir-block-in-blockaddress.mir index e5fde201fb91e..8ee9cb282d0fd 100644 --- a/llvm/test/CodeGen/MIR/X86/undefined-ir-block-in-blockaddress.mir +++ b/llvm/test/CodeGen/MIR/X86/undefined-ir-block-in-blockaddress.mir @@ -26,5 +26,5 @@ body: | JMP64m $rip, 1, _, @addr, _ bb.1.block (address-taken): - RETQ + RET64 ... diff --git a/llvm/test/CodeGen/MIR/X86/undefined-ir-block-slot-in-blockaddress.mir b/llvm/test/CodeGen/MIR/X86/undefined-ir-block-slot-in-blockaddress.mir index be0690b02a428..2ee485eb90751 100644 --- a/llvm/test/CodeGen/MIR/X86/undefined-ir-block-slot-in-blockaddress.mir +++ b/llvm/test/CodeGen/MIR/X86/undefined-ir-block-slot-in-blockaddress.mir @@ -25,5 +25,5 @@ body: | JMP64m $rip, 1, _, @addr, _ bb.1 (address-taken): - RETQ + RET64 ... diff --git a/llvm/test/CodeGen/MIR/X86/undefined-jump-table-id.mir b/llvm/test/CodeGen/MIR/X86/undefined-jump-table-id.mir index 9672bc76a0d52..246b0c9964614 100644 --- a/llvm/test/CodeGen/MIR/X86/undefined-jump-table-id.mir +++ b/llvm/test/CodeGen/MIR/X86/undefined-jump-table-id.mir @@ -53,21 +53,21 @@ body: | bb.2.def: $eax = MOV32r0 implicit-def $eflags - RETQ $eax + RET64 $eax bb.3.lbl1: $eax = MOV32ri 1 - RETQ $eax + RET64 $eax bb.4.lbl2: $eax = MOV32ri 2 - RETQ $eax + RET64 $eax bb.5.lbl3: $eax = MOV32ri 4 - RETQ $eax + RET64 $eax bb.6.lbl4: $eax = MOV32ri 8 - RETQ $eax + RET64 $eax ... diff --git a/llvm/test/CodeGen/MIR/X86/undefined-named-global-value.mir b/llvm/test/CodeGen/MIR/X86/undefined-named-global-value.mir index 43c8d1414324c..8d0e129515407 100644 --- a/llvm/test/CodeGen/MIR/X86/undefined-named-global-value.mir +++ b/llvm/test/CodeGen/MIR/X86/undefined-named-global-value.mir @@ -22,5 +22,5 @@ body: | $rax = MOV64rm $rip, 1, _, @GG, _ $eax = MOV32rm $rax, 1, _, 0, _ $eax = INC32r $eax - RETQ $eax + RET64 $eax ... diff --git a/llvm/test/CodeGen/MIR/X86/undefined-register-class.mir b/llvm/test/CodeGen/MIR/X86/undefined-register-class.mir index f17fc7e8ef9c0..8fb9dbc2c5680 100644 --- a/llvm/test/CodeGen/MIR/X86/undefined-register-class.mir +++ b/llvm/test/CodeGen/MIR/X86/undefined-register-class.mir @@ -18,6 +18,6 @@ registers: - {id: 0, class: 'gr3200'} body: | bb.0.entry: - RETQ %eax + RET64 %eax ... diff --git a/llvm/test/CodeGen/MIR/X86/undefined-stack-object.mir b/llvm/test/CodeGen/MIR/X86/undefined-stack-object.mir index aeb6b2abfe853..bf3f5fa2eae5f 100644 --- a/llvm/test/CodeGen/MIR/X86/undefined-stack-object.mir +++ b/llvm/test/CodeGen/MIR/X86/undefined-stack-object.mir @@ -25,5 +25,5 @@ body: | ; CHECK: [[@LINE+1]]:13: use of undefined stack object '%stack.2' MOV32mr %stack.2, 1, _, 0, _, %0 $eax = COPY %0 - RETQ $eax + RET64 $eax ... diff --git a/llvm/test/CodeGen/MIR/X86/undefined-value-in-memory-operand.mir b/llvm/test/CodeGen/MIR/X86/undefined-value-in-memory-operand.mir index e56873d52d607..6069e48def4b1 100644 --- a/llvm/test/CodeGen/MIR/X86/undefined-value-in-memory-operand.mir +++ b/llvm/test/CodeGen/MIR/X86/undefined-value-in-memory-operand.mir @@ -19,6 +19,6 @@ body: | liveins: $rdi ; CHECK: [[@LINE+1]]:64: use of undefined IR value '%ir.c' $eax = MOV32rm killed $rdi, 1, _, 0, _ :: (load (s32) from %ir.c) - RETQ $eax + RET64 $eax ... diff --git a/llvm/test/CodeGen/MIR/X86/undefined-virtual-register.mir b/llvm/test/CodeGen/MIR/X86/undefined-virtual-register.mir index 665e37fc15c2c..c31735a52eb68 100644 --- a/llvm/test/CodeGen/MIR/X86/undefined-virtual-register.mir +++ b/llvm/test/CodeGen/MIR/X86/undefined-virtual-register.mir @@ -20,6 +20,6 @@ body: | %0 = COPY $edi ; CHECK: Cannot determine class/bank of virtual register 1 in function 'test' $eax = COPY %1 - RETQ $eax + RET64 $eax ... diff --git a/llvm/test/CodeGen/MIR/X86/unknown-machine-basic-block.mir b/llvm/test/CodeGen/MIR/X86/unknown-machine-basic-block.mir index 65c0b8d7f1b8e..0c5a58e467cee 100644 --- a/llvm/test/CodeGen/MIR/X86/unknown-machine-basic-block.mir +++ b/llvm/test/CodeGen/MIR/X86/unknown-machine-basic-block.mir @@ -32,5 +32,5 @@ body: | $eax = MOV32r0 implicit-def $eflags bb.2: - RETQ $eax + RET64 $eax ... diff --git a/llvm/test/CodeGen/MIR/X86/unknown-metadata-keyword.mir b/llvm/test/CodeGen/MIR/X86/unknown-metadata-keyword.mir index 85abb2fcfdffe..45d39c399660c 100644 --- a/llvm/test/CodeGen/MIR/X86/unknown-metadata-keyword.mir +++ b/llvm/test/CodeGen/MIR/X86/unknown-metadata-keyword.mir @@ -21,5 +21,5 @@ body: | $eax = MOV32rm $rdi, 1, _, 0, _ :: (load (s32) from %ir.x, !tba !0) $eax = INC32r killed $eax, implicit-def dead $eflags MOV32mr killed $rdi, 1, _, 0, _, $eax :: (store (s32) into %ir.x) - RETQ $eax + RET64 $eax ... diff --git a/llvm/test/CodeGen/MIR/X86/unknown-metadata-node.mir b/llvm/test/CodeGen/MIR/X86/unknown-metadata-node.mir index a6152701c8f57..260b0a0a15d54 100644 --- a/llvm/test/CodeGen/MIR/X86/unknown-metadata-node.mir +++ b/llvm/test/CodeGen/MIR/X86/unknown-metadata-node.mir @@ -53,5 +53,5 @@ body: | DBG_VALUE _, 0, !42, !13 MOV32mr %stack.0.x.addr, 1, _, 0, _, %0 $eax = COPY %0 - RETQ $eax + RET64 $eax ... diff --git a/llvm/test/CodeGen/MIR/X86/unknown-named-machine-basic-block.mir b/llvm/test/CodeGen/MIR/X86/unknown-named-machine-basic-block.mir index 17bbce438a89a..11d894dbeff59 100644 --- a/llvm/test/CodeGen/MIR/X86/unknown-named-machine-basic-block.mir +++ b/llvm/test/CodeGen/MIR/X86/unknown-named-machine-basic-block.mir @@ -31,5 +31,5 @@ body: | $eax = MOV32r0 implicit-def $eflags bb.2.exit: - RETQ $eax + RET64 $eax ... diff --git a/llvm/test/CodeGen/MIR/X86/unknown-register.mir b/llvm/test/CodeGen/MIR/X86/unknown-register.mir index 37c30f6cb3167..cdd85f0b1b369 100644 --- a/llvm/test/CodeGen/MIR/X86/unknown-register.mir +++ b/llvm/test/CodeGen/MIR/X86/unknown-register.mir @@ -16,5 +16,5 @@ body: | bb.0.entry: ; CHECK: [[@LINE+1]]:5: unknown register name 'xax' $xax = MOV32r0 - RETQ $xax + RET64 $xax ... diff --git a/llvm/test/CodeGen/MIR/X86/unknown-subregister-index-op.mir b/llvm/test/CodeGen/MIR/X86/unknown-subregister-index-op.mir index 18eb3e7356626..30ec530fad3db 100644 --- a/llvm/test/CodeGen/MIR/X86/unknown-subregister-index-op.mir +++ b/llvm/test/CodeGen/MIR/X86/unknown-subregister-index-op.mir @@ -21,5 +21,5 @@ body: | bb.0.entry: ; CHECK: [[@LINE+1]]:35: unknown subregister index 'bit8' %0 = INSERT_SUBREG $edi, $al, %subreg.bit8 - RETQ %0 + RET64 %0 ... diff --git a/llvm/test/CodeGen/MIR/X86/unknown-subregister-index.mir b/llvm/test/CodeGen/MIR/X86/unknown-subregister-index.mir index d5cbb06aac5bc..6db338dc9fcae 100644 --- a/llvm/test/CodeGen/MIR/X86/unknown-subregister-index.mir +++ b/llvm/test/CodeGen/MIR/X86/unknown-subregister-index.mir @@ -24,5 +24,5 @@ body: | %1 = COPY %0.bit8 %2 = AND8ri %1, 1, implicit-def $eflags $al = COPY %2 - RETQ $al + RET64 $al ... diff --git a/llvm/test/CodeGen/MIR/X86/unrecognized-character.mir b/llvm/test/CodeGen/MIR/X86/unrecognized-character.mir index 4b6631099716a..15817d06a28a9 100644 --- a/llvm/test/CodeGen/MIR/X86/unrecognized-character.mir +++ b/llvm/test/CodeGen/MIR/X86/unrecognized-character.mir @@ -13,5 +13,5 @@ name: foo body: | bb.0.entry: ; CHECK: [[@LINE+1]]:5: unexpected character '\' - \ RETQ + \ RET64 ... diff --git a/llvm/test/CodeGen/MIR/X86/variable-sized-stack-object-size-error.mir b/llvm/test/CodeGen/MIR/X86/variable-sized-stack-object-size-error.mir index ce3e846c03c06..77562620f1eb0 100644 --- a/llvm/test/CodeGen/MIR/X86/variable-sized-stack-object-size-error.mir +++ b/llvm/test/CodeGen/MIR/X86/variable-sized-stack-object-size-error.mir @@ -30,5 +30,5 @@ body: | MOV32mr %rsp, 1, _, -4, _, %edi MOV64mi32 %rsp, 1, _, -16, _, 2 %eax = MOV32rm %rsp, 1, _, -4, _ - RETQ %eax + RET64 %eax ... diff --git a/llvm/test/CodeGen/MIR/X86/variable-sized-stack-objects.mir b/llvm/test/CodeGen/MIR/X86/variable-sized-stack-objects.mir index e251ddac5f3b8..81c2b99fc0684 100644 --- a/llvm/test/CodeGen/MIR/X86/variable-sized-stack-objects.mir +++ b/llvm/test/CodeGen/MIR/X86/variable-sized-stack-objects.mir @@ -40,5 +40,5 @@ body: | MOV32mr $rsp, 1, _, -4, _, $edi MOV64mi32 $rsp, 1, _, -16, _, 2 $eax = MOV32rm $rsp, 1, _, -4, _ - RETQ $eax + RET64 $eax ... diff --git a/llvm/test/CodeGen/MIR/X86/virtual-register-redefinition-error.mir b/llvm/test/CodeGen/MIR/X86/virtual-register-redefinition-error.mir index 6ecfabc1b30f7..423e892da4a49 100644 --- a/llvm/test/CodeGen/MIR/X86/virtual-register-redefinition-error.mir +++ b/llvm/test/CodeGen/MIR/X86/virtual-register-redefinition-error.mir @@ -21,6 +21,6 @@ body: | %0 = COPY %edi %eax = COPY %0 - RETQ %eax + RET64 %eax ... diff --git a/llvm/test/CodeGen/MIR/X86/virtual-registers.mir b/llvm/test/CodeGen/MIR/X86/virtual-registers.mir index 56a475de783a1..e317746e08a18 100644 --- a/llvm/test/CodeGen/MIR/X86/virtual-registers.mir +++ b/llvm/test/CodeGen/MIR/X86/virtual-registers.mir @@ -56,11 +56,11 @@ body: | ; CHECK-NEXT: $eax = COPY %2 %2 = MOV32r0 implicit-def $eflags $eax = COPY %2 - RETQ $eax + RET64 $eax bb.2.exit: $eax = COPY %0 - RETQ $eax + RET64 $eax ... --- name: foo @@ -90,10 +90,10 @@ body: | ; CHECK-NEXT: $eax = COPY %2 %10 = MOV32r0 implicit-def $eflags $eax = COPY %10 - RETQ $eax + RET64 $eax bb.2.exit: ; CHECK: $eax = COPY %0 $eax = COPY %2 - RETQ $eax + RET64 $eax ... diff --git a/llvm/test/CodeGen/Mips/funnel-shift-rot.ll b/llvm/test/CodeGen/Mips/funnel-shift-rot.ll index 49532f246838a..e17980e98e9b5 100644 --- a/llvm/test/CodeGen/Mips/funnel-shift-rot.ll +++ b/llvm/test/CodeGen/Mips/funnel-shift-rot.ll @@ -76,59 +76,43 @@ define i32 @rotl_i32(i32 %x, i32 %z) { define i64 @rotl_i64(i64 %x, i64 %z) { ; CHECK-BE-LABEL: rotl_i64: ; CHECK-BE: # %bb.0: -; CHECK-BE-NEXT: negu $1, $7 -; CHECK-BE-NEXT: andi $3, $1, 63 -; CHECK-BE-NEXT: srlv $6, $4, $3 -; CHECK-BE-NEXT: andi $1, $1, 32 -; CHECK-BE-NEXT: andi $2, $7, 63 -; CHECK-BE-NEXT: move $8, $6 -; CHECK-BE-NEXT: movn $8, $zero, $1 -; CHECK-BE-NEXT: sllv $9, $4, $2 -; CHECK-BE-NEXT: srl $10, $5, 1 -; CHECK-BE-NEXT: not $11, $2 -; CHECK-BE-NEXT: srlv $10, $10, $11 -; CHECK-BE-NEXT: or $9, $9, $10 -; CHECK-BE-NEXT: sllv $10, $5, $2 -; CHECK-BE-NEXT: andi $7, $7, 32 -; CHECK-BE-NEXT: movn $9, $10, $7 -; CHECK-BE-NEXT: or $2, $9, $8 -; CHECK-BE-NEXT: srlv $5, $5, $3 -; CHECK-BE-NEXT: not $3, $3 -; CHECK-BE-NEXT: sll $4, $4, 1 -; CHECK-BE-NEXT: sllv $3, $4, $3 -; CHECK-BE-NEXT: or $3, $3, $5 -; CHECK-BE-NEXT: movn $3, $6, $1 -; CHECK-BE-NEXT: movn $10, $zero, $7 +; CHECK-BE-NEXT: srl $1, $7, 5 +; CHECK-BE-NEXT: andi $1, $1, 1 +; CHECK-BE-NEXT: move $3, $4 +; CHECK-BE-NEXT: movn $3, $5, $1 +; CHECK-BE-NEXT: andi $6, $7, 31 +; CHECK-BE-NEXT: sllv $2, $3, $6 +; CHECK-BE-NEXT: movn $5, $4, $1 +; CHECK-BE-NEXT: srl $1, $5, 1 +; CHECK-BE-NEXT: not $4, $7 +; CHECK-BE-NEXT: andi $4, $4, 31 +; CHECK-BE-NEXT: srlv $1, $1, $4 +; CHECK-BE-NEXT: or $2, $2, $1 +; CHECK-BE-NEXT: sllv $1, $5, $6 +; CHECK-BE-NEXT: srl $3, $3, 1 +; CHECK-BE-NEXT: srlv $3, $3, $4 ; CHECK-BE-NEXT: jr $ra -; CHECK-BE-NEXT: or $3, $10, $3 +; CHECK-BE-NEXT: or $3, $1, $3 ; ; CHECK-LE-LABEL: rotl_i64: ; CHECK-LE: # %bb.0: -; CHECK-LE-NEXT: negu $1, $6 -; CHECK-LE-NEXT: andi $2, $1, 63 -; CHECK-LE-NEXT: srlv $7, $5, $2 -; CHECK-LE-NEXT: andi $1, $1, 32 -; CHECK-LE-NEXT: andi $3, $6, 63 -; CHECK-LE-NEXT: move $8, $7 -; CHECK-LE-NEXT: movn $8, $zero, $1 -; CHECK-LE-NEXT: sllv $9, $5, $3 -; CHECK-LE-NEXT: srl $10, $4, 1 -; CHECK-LE-NEXT: not $11, $3 -; CHECK-LE-NEXT: srlv $10, $10, $11 -; CHECK-LE-NEXT: or $9, $9, $10 -; CHECK-LE-NEXT: sllv $10, $4, $3 -; CHECK-LE-NEXT: andi $6, $6, 32 -; CHECK-LE-NEXT: movn $9, $10, $6 -; CHECK-LE-NEXT: or $3, $9, $8 -; CHECK-LE-NEXT: srlv $4, $4, $2 -; CHECK-LE-NEXT: not $2, $2 -; CHECK-LE-NEXT: sll $5, $5, 1 -; CHECK-LE-NEXT: sllv $2, $5, $2 -; CHECK-LE-NEXT: or $2, $2, $4 -; CHECK-LE-NEXT: movn $2, $7, $1 -; CHECK-LE-NEXT: movn $10, $zero, $6 +; CHECK-LE-NEXT: srl $1, $6, 5 +; CHECK-LE-NEXT: andi $1, $1, 1 +; CHECK-LE-NEXT: move $3, $4 +; CHECK-LE-NEXT: movn $3, $5, $1 +; CHECK-LE-NEXT: andi $7, $6, 31 +; CHECK-LE-NEXT: sllv $2, $3, $7 +; CHECK-LE-NEXT: movn $5, $4, $1 +; CHECK-LE-NEXT: srl $1, $5, 1 +; CHECK-LE-NEXT: not $4, $6 +; CHECK-LE-NEXT: andi $4, $4, 31 +; CHECK-LE-NEXT: srlv $1, $1, $4 +; CHECK-LE-NEXT: or $2, $2, $1 +; CHECK-LE-NEXT: sllv $1, $5, $7 +; CHECK-LE-NEXT: srl $3, $3, 1 +; CHECK-LE-NEXT: srlv $3, $3, $4 ; CHECK-LE-NEXT: jr $ra -; CHECK-LE-NEXT: or $2, $10, $2 +; CHECK-LE-NEXT: or $3, $1, $3 %f = call i64 @llvm.fshl.i64(i64 %x, i64 %x, i64 %z) ret i64 %f } @@ -254,59 +238,41 @@ define i32 @rotr_i32(i32 %x, i32 %z) { define i64 @rotr_i64(i64 %x, i64 %z) { ; CHECK-BE-LABEL: rotr_i64: ; CHECK-BE: # %bb.0: -; CHECK-BE-NEXT: negu $1, $7 -; CHECK-BE-NEXT: andi $2, $1, 63 -; CHECK-BE-NEXT: sllv $6, $5, $2 -; CHECK-BE-NEXT: andi $1, $1, 32 -; CHECK-BE-NEXT: andi $3, $7, 63 -; CHECK-BE-NEXT: move $8, $6 -; CHECK-BE-NEXT: movn $8, $zero, $1 -; CHECK-BE-NEXT: srlv $9, $5, $3 -; CHECK-BE-NEXT: sll $10, $4, 1 -; CHECK-BE-NEXT: not $11, $3 -; CHECK-BE-NEXT: sllv $10, $10, $11 -; CHECK-BE-NEXT: or $9, $10, $9 -; CHECK-BE-NEXT: srlv $10, $4, $3 -; CHECK-BE-NEXT: andi $7, $7, 32 -; CHECK-BE-NEXT: movn $9, $10, $7 -; CHECK-BE-NEXT: or $3, $9, $8 -; CHECK-BE-NEXT: sllv $4, $4, $2 -; CHECK-BE-NEXT: not $2, $2 -; CHECK-BE-NEXT: srl $5, $5, 1 -; CHECK-BE-NEXT: srlv $2, $5, $2 -; CHECK-BE-NEXT: or $2, $4, $2 -; CHECK-BE-NEXT: movn $2, $6, $1 -; CHECK-BE-NEXT: movn $10, $zero, $7 +; CHECK-BE-NEXT: andi $1, $7, 32 +; CHECK-BE-NEXT: move $3, $5 +; CHECK-BE-NEXT: movz $3, $4, $1 +; CHECK-BE-NEXT: andi $6, $7, 31 +; CHECK-BE-NEXT: srlv $2, $3, $6 +; CHECK-BE-NEXT: movz $4, $5, $1 +; CHECK-BE-NEXT: sll $1, $4, 1 +; CHECK-BE-NEXT: not $5, $7 +; CHECK-BE-NEXT: andi $5, $5, 31 +; CHECK-BE-NEXT: sllv $1, $1, $5 +; CHECK-BE-NEXT: or $2, $1, $2 +; CHECK-BE-NEXT: srlv $1, $4, $6 +; CHECK-BE-NEXT: sll $3, $3, 1 +; CHECK-BE-NEXT: sllv $3, $3, $5 ; CHECK-BE-NEXT: jr $ra -; CHECK-BE-NEXT: or $2, $10, $2 +; CHECK-BE-NEXT: or $3, $3, $1 ; ; CHECK-LE-LABEL: rotr_i64: ; CHECK-LE: # %bb.0: -; CHECK-LE-NEXT: negu $1, $6 -; CHECK-LE-NEXT: andi $3, $1, 63 -; CHECK-LE-NEXT: sllv $7, $4, $3 -; CHECK-LE-NEXT: andi $1, $1, 32 -; CHECK-LE-NEXT: andi $2, $6, 63 -; CHECK-LE-NEXT: move $8, $7 -; CHECK-LE-NEXT: movn $8, $zero, $1 -; CHECK-LE-NEXT: srlv $9, $4, $2 -; CHECK-LE-NEXT: sll $10, $5, 1 -; CHECK-LE-NEXT: not $11, $2 -; CHECK-LE-NEXT: sllv $10, $10, $11 -; CHECK-LE-NEXT: or $9, $10, $9 -; CHECK-LE-NEXT: srlv $10, $5, $2 -; CHECK-LE-NEXT: andi $6, $6, 32 -; CHECK-LE-NEXT: movn $9, $10, $6 -; CHECK-LE-NEXT: or $2, $9, $8 -; CHECK-LE-NEXT: sllv $5, $5, $3 -; CHECK-LE-NEXT: not $3, $3 -; CHECK-LE-NEXT: srl $4, $4, 1 -; CHECK-LE-NEXT: srlv $3, $4, $3 -; CHECK-LE-NEXT: or $3, $5, $3 -; CHECK-LE-NEXT: movn $3, $7, $1 -; CHECK-LE-NEXT: movn $10, $zero, $6 +; CHECK-LE-NEXT: andi $1, $6, 32 +; CHECK-LE-NEXT: move $3, $5 +; CHECK-LE-NEXT: movz $3, $4, $1 +; CHECK-LE-NEXT: andi $7, $6, 31 +; CHECK-LE-NEXT: srlv $2, $3, $7 +; CHECK-LE-NEXT: movz $4, $5, $1 +; CHECK-LE-NEXT: sll $1, $4, 1 +; CHECK-LE-NEXT: not $5, $6 +; CHECK-LE-NEXT: andi $5, $5, 31 +; CHECK-LE-NEXT: sllv $1, $1, $5 +; CHECK-LE-NEXT: or $2, $1, $2 +; CHECK-LE-NEXT: srlv $1, $4, $7 +; CHECK-LE-NEXT: sll $3, $3, 1 +; CHECK-LE-NEXT: sllv $3, $3, $5 ; CHECK-LE-NEXT: jr $ra -; CHECK-LE-NEXT: or $3, $10, $3 +; CHECK-LE-NEXT: or $3, $3, $1 %f = call i64 @llvm.fshr.i64(i64 %x, i64 %x, i64 %z) ret i64 %f } diff --git a/llvm/test/CodeGen/Mips/funnel-shift.ll b/llvm/test/CodeGen/Mips/funnel-shift.ll index 99029b7b9410c..d4f47318ebb18 100644 --- a/llvm/test/CodeGen/Mips/funnel-shift.ll +++ b/llvm/test/CodeGen/Mips/funnel-shift.ll @@ -72,37 +72,25 @@ define i37 @fshl_i37(i37 %x, i37 %y, i37 %z) { ; CHECK-BE-NEXT: jal __umoddi3 ; CHECK-BE-NEXT: addiu $7, $zero, 37 ; CHECK-BE-NEXT: not $1, $3 -; CHECK-BE-NEXT: andi $2, $3, 63 -; CHECK-BE-NEXT: not $4, $2 -; CHECK-BE-NEXT: srl $5, $18, 1 -; CHECK-BE-NEXT: sllv $6, $19, $2 -; CHECK-BE-NEXT: srlv $4, $5, $4 -; CHECK-BE-NEXT: andi $5, $1, 63 -; CHECK-BE-NEXT: srl $7, $16, 5 -; CHECK-BE-NEXT: sll $8, $17, 27 -; CHECK-BE-NEXT: or $7, $8, $7 -; CHECK-BE-NEXT: srl $8, $7, 1 -; CHECK-BE-NEXT: srlv $9, $8, $5 -; CHECK-BE-NEXT: andi $1, $1, 32 -; CHECK-BE-NEXT: move $10, $9 -; CHECK-BE-NEXT: movn $10, $zero, $1 -; CHECK-BE-NEXT: or $4, $6, $4 -; CHECK-BE-NEXT: sllv $6, $18, $2 -; CHECK-BE-NEXT: andi $3, $3, 32 -; CHECK-BE-NEXT: movn $4, $6, $3 -; CHECK-BE-NEXT: sll $7, $7, 31 -; CHECK-BE-NEXT: sll $2, $16, 27 -; CHECK-BE-NEXT: srl $11, $2, 1 -; CHECK-BE-NEXT: or $2, $4, $10 -; CHECK-BE-NEXT: movn $6, $zero, $3 -; CHECK-BE-NEXT: or $3, $11, $7 -; CHECK-BE-NEXT: srlv $3, $3, $5 -; CHECK-BE-NEXT: not $4, $5 -; CHECK-BE-NEXT: sll $5, $8, 1 -; CHECK-BE-NEXT: sllv $4, $5, $4 -; CHECK-BE-NEXT: or $3, $4, $3 -; CHECK-BE-NEXT: movn $3, $9, $1 -; CHECK-BE-NEXT: or $3, $6, $3 +; CHECK-BE-NEXT: srl $2, $3, 5 +; CHECK-BE-NEXT: andi $4, $2, 1 +; CHECK-BE-NEXT: movn $19, $18, $4 +; CHECK-BE-NEXT: andi $3, $3, 31 +; CHECK-BE-NEXT: sllv $2, $19, $3 +; CHECK-BE-NEXT: andi $1, $1, 31 +; CHECK-BE-NEXT: srl $5, $16, 5 +; CHECK-BE-NEXT: sll $6, $17, 27 +; CHECK-BE-NEXT: or $5, $6, $5 +; CHECK-BE-NEXT: movn $18, $5, $4 +; CHECK-BE-NEXT: srl $6, $18, 1 +; CHECK-BE-NEXT: srlv $6, $6, $1 +; CHECK-BE-NEXT: or $2, $2, $6 +; CHECK-BE-NEXT: sllv $3, $18, $3 +; CHECK-BE-NEXT: sll $6, $16, 27 +; CHECK-BE-NEXT: movn $5, $6, $4 +; CHECK-BE-NEXT: srl $4, $5, 1 +; CHECK-BE-NEXT: srlv $1, $4, $1 +; CHECK-BE-NEXT: or $3, $3, $1 ; CHECK-BE-NEXT: lw $16, 20($sp) # 4-byte Folded Reload ; CHECK-BE-NEXT: lw $17, 24($sp) # 4-byte Folded Reload ; CHECK-BE-NEXT: lw $18, 28($sp) # 4-byte Folded Reload @@ -134,38 +122,27 @@ define i37 @fshl_i37(i37 %x, i37 %y, i37 %z) { ; CHECK-LE-NEXT: addiu $6, $zero, 37 ; CHECK-LE-NEXT: jal __umoddi3 ; CHECK-LE-NEXT: addiu $7, $zero, 0 -; CHECK-LE-NEXT: not $1, $2 -; CHECK-LE-NEXT: andi $3, $2, 63 -; CHECK-LE-NEXT: not $4, $3 -; CHECK-LE-NEXT: srl $5, $19, 1 -; CHECK-LE-NEXT: sllv $6, $18, $3 -; CHECK-LE-NEXT: srlv $4, $5, $4 -; CHECK-LE-NEXT: andi $5, $1, 63 -; CHECK-LE-NEXT: srl $7, $17, 5 -; CHECK-LE-NEXT: sll $8, $16, 27 -; CHECK-LE-NEXT: or $7, $8, $7 -; CHECK-LE-NEXT: srl $8, $7, 1 -; CHECK-LE-NEXT: srlv $9, $8, $5 -; CHECK-LE-NEXT: andi $1, $1, 32 -; CHECK-LE-NEXT: move $10, $9 -; CHECK-LE-NEXT: movn $10, $zero, $1 -; CHECK-LE-NEXT: or $4, $6, $4 -; CHECK-LE-NEXT: sllv $6, $19, $3 -; CHECK-LE-NEXT: andi $2, $2, 32 -; CHECK-LE-NEXT: movn $4, $6, $2 -; CHECK-LE-NEXT: sll $7, $7, 31 -; CHECK-LE-NEXT: sll $3, $17, 27 -; CHECK-LE-NEXT: srl $11, $3, 1 -; CHECK-LE-NEXT: or $3, $4, $10 -; CHECK-LE-NEXT: movn $6, $zero, $2 -; CHECK-LE-NEXT: or $2, $11, $7 -; CHECK-LE-NEXT: srlv $2, $2, $5 -; CHECK-LE-NEXT: not $4, $5 -; CHECK-LE-NEXT: sll $5, $8, 1 -; CHECK-LE-NEXT: sllv $4, $5, $4 -; CHECK-LE-NEXT: or $2, $4, $2 -; CHECK-LE-NEXT: movn $2, $9, $1 +; CHECK-LE-NEXT: srl $1, $2, 5 +; CHECK-LE-NEXT: andi $1, $1, 1 +; CHECK-LE-NEXT: srl $3, $17, 5 +; CHECK-LE-NEXT: sll $4, $16, 27 +; CHECK-LE-NEXT: or $3, $4, $3 +; CHECK-LE-NEXT: move $4, $19 +; CHECK-LE-NEXT: movn $4, $3, $1 +; CHECK-LE-NEXT: andi $5, $2, 31 +; CHECK-LE-NEXT: sllv $6, $4, $5 +; CHECK-LE-NEXT: not $2, $2 +; CHECK-LE-NEXT: andi $7, $2, 31 +; CHECK-LE-NEXT: sll $2, $17, 27 +; CHECK-LE-NEXT: movn $3, $2, $1 +; CHECK-LE-NEXT: srl $2, $3, 1 +; CHECK-LE-NEXT: srlv $2, $2, $7 ; CHECK-LE-NEXT: or $2, $6, $2 +; CHECK-LE-NEXT: movn $18, $19, $1 +; CHECK-LE-NEXT: sllv $1, $18, $5 +; CHECK-LE-NEXT: srl $3, $4, 1 +; CHECK-LE-NEXT: srlv $3, $3, $7 +; CHECK-LE-NEXT: or $3, $1, $3 ; CHECK-LE-NEXT: lw $16, 20($sp) # 4-byte Folded Reload ; CHECK-LE-NEXT: lw $17, 24($sp) # 4-byte Folded Reload ; CHECK-LE-NEXT: lw $18, 28($sp) # 4-byte Folded Reload @@ -250,15 +227,15 @@ define i64 @fshl_i64_const_overshift(i64 %x, i64 %y) { ; CHECK-BE-NEXT: srl $1, $6, 23 ; CHECK-BE-NEXT: sll $2, $5, 9 ; CHECK-BE-NEXT: or $2, $2, $1 -; CHECK-BE-NEXT: sll $1, $6, 9 -; CHECK-BE-NEXT: srl $3, $7, 23 +; CHECK-BE-NEXT: srl $1, $7, 23 +; CHECK-BE-NEXT: sll $3, $6, 9 ; CHECK-BE-NEXT: jr $ra ; CHECK-BE-NEXT: or $3, $3, $1 ; ; CHECK-LE-LABEL: fshl_i64_const_overshift: ; CHECK-LE: # %bb.0: -; CHECK-LE-NEXT: sll $1, $7, 9 -; CHECK-LE-NEXT: srl $2, $6, 23 +; CHECK-LE-NEXT: srl $1, $6, 23 +; CHECK-LE-NEXT: sll $2, $7, 9 ; CHECK-LE-NEXT: or $2, $2, $1 ; CHECK-LE-NEXT: srl $1, $7, 23 ; CHECK-LE-NEXT: sll $3, $4, 9 @@ -338,40 +315,25 @@ define i37 @fshr_i37(i37 %x, i37 %y, i37 %z) { ; CHECK-BE-NEXT: jal __umoddi3 ; CHECK-BE-NEXT: addiu $7, $zero, 37 ; CHECK-BE-NEXT: addiu $1, $3, 27 -; CHECK-BE-NEXT: andi $2, $1, 63 -; CHECK-BE-NEXT: not $3, $2 -; CHECK-BE-NEXT: srl $4, $16, 5 -; CHECK-BE-NEXT: sll $5, $17, 27 -; CHECK-BE-NEXT: or $4, $5, $4 -; CHECK-BE-NEXT: sll $5, $4, 1 -; CHECK-BE-NEXT: sll $6, $16, 27 -; CHECK-BE-NEXT: srlv $6, $6, $2 -; CHECK-BE-NEXT: sllv $3, $5, $3 -; CHECK-BE-NEXT: not $5, $1 -; CHECK-BE-NEXT: andi $7, $5, 63 -; CHECK-BE-NEXT: sll $8, $18, 1 -; CHECK-BE-NEXT: sllv $8, $8, $7 -; CHECK-BE-NEXT: andi $5, $5, 32 -; CHECK-BE-NEXT: move $9, $8 -; CHECK-BE-NEXT: movn $9, $zero, $5 -; CHECK-BE-NEXT: or $3, $3, $6 -; CHECK-BE-NEXT: srlv $2, $4, $2 -; CHECK-BE-NEXT: andi $1, $1, 32 -; CHECK-BE-NEXT: movn $3, $2, $1 -; CHECK-BE-NEXT: srl $4, $18, 31 +; CHECK-BE-NEXT: andi $3, $1, 32 +; CHECK-BE-NEXT: srl $2, $16, 5 +; CHECK-BE-NEXT: sll $4, $17, 27 +; CHECK-BE-NEXT: or $4, $4, $2 +; CHECK-BE-NEXT: movz $19, $18, $3 +; CHECK-BE-NEXT: movz $18, $4, $3 +; CHECK-BE-NEXT: andi $5, $1, 31 +; CHECK-BE-NEXT: srlv $2, $18, $5 +; CHECK-BE-NEXT: not $1, $1 +; CHECK-BE-NEXT: andi $1, $1, 31 ; CHECK-BE-NEXT: sll $6, $19, 1 -; CHECK-BE-NEXT: or $4, $6, $4 -; CHECK-BE-NEXT: or $3, $9, $3 -; CHECK-BE-NEXT: movn $2, $zero, $1 -; CHECK-BE-NEXT: sllv $1, $4, $7 -; CHECK-BE-NEXT: not $4, $7 -; CHECK-BE-NEXT: lui $6, 32767 -; CHECK-BE-NEXT: ori $6, $6, 65535 -; CHECK-BE-NEXT: and $6, $18, $6 -; CHECK-BE-NEXT: srlv $4, $6, $4 -; CHECK-BE-NEXT: or $1, $1, $4 -; CHECK-BE-NEXT: movn $1, $8, $5 -; CHECK-BE-NEXT: or $2, $1, $2 +; CHECK-BE-NEXT: sllv $6, $6, $1 +; CHECK-BE-NEXT: or $2, $6, $2 +; CHECK-BE-NEXT: sll $6, $16, 27 +; CHECK-BE-NEXT: movz $4, $6, $3 +; CHECK-BE-NEXT: srlv $3, $4, $5 +; CHECK-BE-NEXT: sll $4, $18, 1 +; CHECK-BE-NEXT: sllv $1, $4, $1 +; CHECK-BE-NEXT: or $3, $1, $3 ; CHECK-BE-NEXT: lw $16, 20($sp) # 4-byte Folded Reload ; CHECK-BE-NEXT: lw $17, 24($sp) # 4-byte Folded Reload ; CHECK-BE-NEXT: lw $18, 28($sp) # 4-byte Folded Reload @@ -404,39 +366,25 @@ define i37 @fshr_i37(i37 %x, i37 %y, i37 %z) { ; CHECK-LE-NEXT: jal __umoddi3 ; CHECK-LE-NEXT: addiu $7, $zero, 0 ; CHECK-LE-NEXT: addiu $1, $2, 27 -; CHECK-LE-NEXT: andi $2, $1, 63 -; CHECK-LE-NEXT: not $3, $2 -; CHECK-LE-NEXT: srl $4, $17, 5 -; CHECK-LE-NEXT: sll $5, $16, 27 -; CHECK-LE-NEXT: or $4, $5, $4 -; CHECK-LE-NEXT: sll $5, $4, 1 -; CHECK-LE-NEXT: sll $6, $17, 27 -; CHECK-LE-NEXT: srlv $6, $6, $2 -; CHECK-LE-NEXT: sllv $3, $5, $3 -; CHECK-LE-NEXT: not $5, $1 -; CHECK-LE-NEXT: andi $7, $5, 63 -; CHECK-LE-NEXT: sll $8, $19, 1 -; CHECK-LE-NEXT: sllv $8, $8, $7 -; CHECK-LE-NEXT: andi $5, $5, 32 -; CHECK-LE-NEXT: move $9, $8 -; CHECK-LE-NEXT: movn $9, $zero, $5 -; CHECK-LE-NEXT: or $3, $3, $6 -; CHECK-LE-NEXT: srlv $4, $4, $2 -; CHECK-LE-NEXT: andi $1, $1, 32 -; CHECK-LE-NEXT: movn $3, $4, $1 -; CHECK-LE-NEXT: srl $2, $19, 31 -; CHECK-LE-NEXT: sll $6, $18, 1 -; CHECK-LE-NEXT: or $6, $6, $2 -; CHECK-LE-NEXT: or $2, $9, $3 -; CHECK-LE-NEXT: movn $4, $zero, $1 -; CHECK-LE-NEXT: sllv $1, $6, $7 -; CHECK-LE-NEXT: not $3, $7 -; CHECK-LE-NEXT: lui $6, 32767 -; CHECK-LE-NEXT: ori $6, $6, 65535 -; CHECK-LE-NEXT: and $6, $19, $6 -; CHECK-LE-NEXT: srlv $3, $6, $3 -; CHECK-LE-NEXT: or $1, $1, $3 -; CHECK-LE-NEXT: movn $1, $8, $5 +; CHECK-LE-NEXT: andi $3, $1, 32 +; CHECK-LE-NEXT: srl $2, $17, 5 +; CHECK-LE-NEXT: sll $4, $16, 27 +; CHECK-LE-NEXT: or $2, $4, $2 +; CHECK-LE-NEXT: sll $4, $17, 27 +; CHECK-LE-NEXT: move $5, $19 +; CHECK-LE-NEXT: movz $5, $2, $3 +; CHECK-LE-NEXT: movz $2, $4, $3 +; CHECK-LE-NEXT: andi $4, $1, 31 +; CHECK-LE-NEXT: srlv $2, $2, $4 +; CHECK-LE-NEXT: not $1, $1 +; CHECK-LE-NEXT: andi $1, $1, 31 +; CHECK-LE-NEXT: sll $6, $5, 1 +; CHECK-LE-NEXT: sllv $6, $6, $1 +; CHECK-LE-NEXT: or $2, $6, $2 +; CHECK-LE-NEXT: srlv $4, $5, $4 +; CHECK-LE-NEXT: movz $18, $19, $3 +; CHECK-LE-NEXT: sll $3, $18, 1 +; CHECK-LE-NEXT: sllv $1, $3, $1 ; CHECK-LE-NEXT: or $3, $1, $4 ; CHECK-LE-NEXT: lw $16, 20($sp) # 4-byte Folded Reload ; CHECK-LE-NEXT: lw $17, 24($sp) # 4-byte Folded Reload diff --git a/llvm/test/CodeGen/NVPTX/branch-fold.ll b/llvm/test/CodeGen/NVPTX/branch-fold.ll index 2b9cd0a35d929..336147f1f9925 100644 --- a/llvm/test/CodeGen/NVPTX/branch-fold.ll +++ b/llvm/test/CodeGen/NVPTX/branch-fold.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=nvptx64 -mcpu=sm_35 -disable-cgp | FileCheck %s +; RUN: llc < %s -march=nvptx64 -mcpu=sm_35 -disable-cgp -verify-machineinstrs | FileCheck %s ; Disable CGP which also folds branches, so that only BranchFolding is under ; the spotlight. diff --git a/llvm/test/CodeGen/NVPTX/branch-fold.mir b/llvm/test/CodeGen/NVPTX/branch-fold.mir new file mode 100644 index 0000000000000..8bdac44c4f235 --- /dev/null +++ b/llvm/test/CodeGen/NVPTX/branch-fold.mir @@ -0,0 +1,86 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -o - %s -march=nvptx64 -mcpu=sm_35 -run-pass=branch-folder | FileCheck %s + +--- | + ; ModuleID = '/mnt/nas/asavonic/work/llvm/llvm/test/CodeGen/NVPTX/branch-fold.ll' + source_filename = "/mnt/nas/asavonic/work/llvm/llvm/test/CodeGen/NVPTX/branch-fold.ll" + target datalayout = "e-i64:64-i128:128-v16:16-v32:32-n16:32:64" + target triple = "nvptx64-nvidia-cuda" + + define ptx_kernel void @hoge() { + bb: + br i1 undef, label %bb1.preheader, label %bb4.preheader + + bb1.preheader: ; preds = %bb + br label %bb1 + + bb1: ; preds = %bb1.preheader, %bb1 + %lsr.iv = phi i64 [ undef, %bb1.preheader ], [ %lsr.iv.next, %bb1 ] + %lsr.iv.next = add i64 %lsr.iv, 1 + %tmp3 = icmp sle i64 %lsr.iv.next, 0 + br i1 %tmp3, label %bb1, label %bb4.preheader + + bb4.preheader: ; preds = %bb1, %bb + br label %bb4 + + bb4: ; preds = %bb4.preheader, %bb4 + br label %bb4 + } + +... +--- +name: hoge +alignment: 1 +tracksRegLiveness: true +registers: + - { id: 0, class: int64regs } + - { id: 1, class: int64regs } + - { id: 2, class: int1regs } + - { id: 3, class: int64regs } + - { id: 4, class: int1regs } + - { id: 5, class: int64regs } +frameInfo: + maxAlignment: 1 +machineFunctionInfo: {} +body: | + ; CHECK-LABEL: name: hoge + ; CHECK: bb.0.bb: + ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: CBranch undef %2:int1regs, %bb.3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1.bb1.preheader: + ; CHECK-NEXT: successors: %bb.2(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DEF:%[0-9]+]]:int64regs = IMPLICIT_DEF + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2.bb1: + ; CHECK-NEXT: successors: %bb.2(0x7c000000), %bb.3(0x04000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[ADDi64ri:%[0-9]+]]:int64regs = ADDi64ri [[ADDi64ri]], 1 + ; CHECK-NEXT: [[SETP_s64ri:%[0-9]+]]:int1regs = SETP_s64ri [[ADDi64ri]], 1, 2 + ; CHECK-NEXT: CBranch [[SETP_s64ri]], %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3.bb4: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: GOTO %bb.3 + bb.0.bb: + successors: %bb.1, %bb.3 + + CBranch undef %2:int1regs, %bb.3 + + bb.1.bb1.preheader: + %5:int64regs = IMPLICIT_DEF + + bb.2.bb1: + successors: %bb.2(0x7c000000), %bb.3(0x04000000) + + %5:int64regs = ADDi64ri %5, 1 + %4:int1regs = SETP_s64ri %5, 1, 2 + CBranch %4, %bb.2 + + bb.3.bb4: + GOTO %bb.3 + +... diff --git a/llvm/test/CodeGen/NVPTX/envreg.ll b/llvm/test/CodeGen/NVPTX/envreg.ll index a341b49ecdf3d..8ab5816e68ed2 100644 --- a/llvm/test/CodeGen/NVPTX/envreg.ll +++ b/llvm/test/CodeGen/NVPTX/envreg.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s +; RUN: llc < %s -march=nvptx -mcpu=sm_20 -verify-machineinstrs | FileCheck %s declare i32 @llvm.nvvm.read.ptx.sreg.envreg0() diff --git a/llvm/test/CodeGen/NVPTX/lower-byval-args.ll b/llvm/test/CodeGen/NVPTX/lower-byval-args.ll index 4d74b44bc100e..d7936c5021b8b 100644 --- a/llvm/test/CodeGen/NVPTX/lower-byval-args.ll +++ b/llvm/test/CodeGen/NVPTX/lower-byval-args.ll @@ -1,16 +1,21 @@ -; RUN: llc < %s -mcpu=sm_20 | FileCheck %s - -target triple = "nvptx64-nvidia-cuda" +; RUN: llc < %s -mtriple nvptx64 -mcpu=sm_20 -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,CHECK64 +; RUN: llc < %s -mtriple nvptx -mcpu=sm_20 -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,CHECK32 %struct.ham = type { [4 x i32] } ; // Verify that load with static offset into parameter is done directly. ; CHECK-LABEL: .visible .entry static_offset ; CHECK-NOT: .local -; CHECK: ld.param.u64 [[result_addr:%rd[0-9]+]], [{{.*}}_param_0] -; CHECK: mov.b64 %[[param_addr:rd[0-9]+]], {{.*}}_param_1 -; CHECK: mov.u64 %[[param_addr1:rd[0-9]+]], %[[param_addr]] -; CHECK: cvta.to.global.u64 [[result_addr_g:%rd[0-9]+]], [[result_addr]] +; CHECK64: ld.param.u64 [[result_addr:%rd[0-9]+]], [{{.*}}_param_0] +; CHECK64: mov.b64 %[[param_addr:rd[0-9]+]], {{.*}}_param_1 +; CHECK64: mov.u64 %[[param_addr1:rd[0-9]+]], %[[param_addr]] +; CHECK64: cvta.to.global.u64 [[result_addr_g:%rd[0-9]+]], [[result_addr]] +; +; CHECK32: ld.param.u32 [[result_addr:%r[0-9]+]], [{{.*}}_param_0] +; CHECK32: mov.b32 %[[param_addr:r[0-9]+]], {{.*}}_param_1 +; CHECK32: mov.u32 %[[param_addr1:r[0-9]+]], %[[param_addr]] +; CHECK32: cvta.to.global.u32 [[result_addr_g:%r[0-9]+]], [[result_addr]] +; ; CHECK: ld.param.u32 [[value:%r[0-9]+]], [%[[param_addr1]]+12]; ; CHECK st.global.u32 [[[result_addr_g]]], [[value]]; ; Function Attrs: nofree norecurse nounwind willreturn mustprogress @@ -32,11 +37,18 @@ bb6: ; preds = %bb3, %bb ; // Verify that load with dynamic offset into parameter is also done directly. ; CHECK-LABEL: .visible .entry dynamic_offset ; CHECK-NOT: .local -; CHECK: ld.param.u64 [[result_addr:%rd[0-9]+]], [{{.*}}_param_0] -; CHECK: mov.b64 %[[param_addr:rd[0-9]+]], {{.*}}_param_1 -; CHECK: mov.u64 %[[param_addr1:rd[0-9]+]], %[[param_addr]] -; CHECK: cvta.to.global.u64 [[result_addr_g:%rd[0-9]+]], [[result_addr]] -; CHECK: add.s64 %[[param_w_offset:rd[0-9]+]], %[[param_addr1]], +; CHECK64: ld.param.u64 [[result_addr:%rd[0-9]+]], [{{.*}}_param_0] +; CHECK64: mov.b64 %[[param_addr:rd[0-9]+]], {{.*}}_param_1 +; CHECK64: mov.u64 %[[param_addr1:rd[0-9]+]], %[[param_addr]] +; CHECK64: cvta.to.global.u64 [[result_addr_g:%rd[0-9]+]], [[result_addr]] +; CHECK64: add.s64 %[[param_w_offset:rd[0-9]+]], %[[param_addr1]], +; +; CHECK32: ld.param.u32 [[result_addr:%r[0-9]+]], [{{.*}}_param_0] +; CHECK32: mov.b32 %[[param_addr:r[0-9]+]], {{.*}}_param_1 +; CHECK32: mov.u32 %[[param_addr1:r[0-9]+]], %[[param_addr]] +; CHECK32: cvta.to.global.u32 [[result_addr_g:%r[0-9]+]], [[result_addr]] +; CHECK32: add.s32 %[[param_w_offset:r[0-9]+]], %[[param_addr1]], +; ; CHECK: ld.param.u32 [[value:%r[0-9]+]], [%[[param_w_offset]]]; ; CHECK st.global.u32 [[[result_addr_g]]], [[value]]; @@ -53,11 +65,17 @@ bb: ; Same as above, but with a bitcast present in the chain ; CHECK-LABEL:.visible .entry gep_bitcast ; CHECK-NOT: .local -; CHECK-DAG: ld.param.u64 [[out:%rd[0-9]+]], [gep_bitcast_param_0] -; CHECK-DAG: mov.b64 {{%rd[0-9]+}}, gep_bitcast_param_1 +; CHECK64-DAG: ld.param.u64 [[out:%rd[0-9]+]], [gep_bitcast_param_0] +; CHECK64-DAG: mov.b64 {{%rd[0-9]+}}, gep_bitcast_param_1 +; +; CHECK32-DAG: ld.param.u32 [[out:%r[0-9]+]], [gep_bitcast_param_0] +; CHECK32-DAG: mov.b32 {{%r[0-9]+}}, gep_bitcast_param_1 +; ; CHECK-DAG: ld.param.u32 {{%r[0-9]+}}, [gep_bitcast_param_2] -; CHECK: ld.param.u8 [[value:%rs[0-9]+]], [{{%rd[0-9]+}}] -; CHECK: st.global.u8 [{{%rd[0-9]+}}], [[value]]; +; CHECK64: ld.param.u8 [[value:%rs[0-9]+]], [{{%rd[0-9]+}}] +; CHECK64: st.global.u8 [{{%rd[0-9]+}}], [[value]]; +; CHECK32: ld.param.u8 [[value:%rs[0-9]+]], [{{%r[0-9]+}}] +; CHECK32: st.global.u8 [{{%r[0-9]+}}], [[value]]; ; ; Function Attrs: nofree norecurse nounwind willreturn mustprogress define dso_local void @gep_bitcast(i8* nocapture %out, %struct.ham* nocapture readonly byval(%struct.ham) align 4 %in, i32 %n) local_unnamed_addr #0 { @@ -73,11 +91,17 @@ bb: ; Same as above, but with an ASC(101) present in the chain ; CHECK-LABEL:.visible .entry gep_bitcast_asc ; CHECK-NOT: .local -; CHECK-DAG: ld.param.u64 [[out:%rd[0-9]+]], [gep_bitcast_asc_param_0] -; CHECK-DAG: mov.b64 {{%rd[0-9]+}}, gep_bitcast_asc_param_1 +; CHECK64-DAG: ld.param.u64 [[out:%rd[0-9]+]], [gep_bitcast_asc_param_0] +; CHECK64-DAG: mov.b64 {{%rd[0-9]+}}, gep_bitcast_asc_param_1 +; +; CHECK32-DAG: ld.param.u32 [[out:%r[0-9]+]], [gep_bitcast_asc_param_0] +; CHECK32-DAG: mov.b32 {{%r[0-9]+}}, gep_bitcast_asc_param_1 +; ; CHECK-DAG: ld.param.u32 {{%r[0-9]+}}, [gep_bitcast_asc_param_2] -; CHECK: ld.param.u8 [[value:%rs[0-9]+]], [{{%rd[0-9]+}}] -; CHECK: st.global.u8 [{{%rd[0-9]+}}], [[value]]; +; CHECK64: ld.param.u8 [[value:%rs[0-9]+]], [{{%rd[0-9]+}}] +; CHECK64: st.global.u8 [{{%rd[0-9]+}}], [[value]]; +; CHECK32: ld.param.u8 [[value:%rs[0-9]+]], [{{%r[0-9]+}}] +; CHECK32: st.global.u8 [{{%r[0-9]+}}], [[value]]; ; ; Function Attrs: nofree norecurse nounwind willreturn mustprogress define dso_local void @gep_bitcast_asc(i8* nocapture %out, %struct.ham* nocapture readonly byval(%struct.ham) align 4 %in, i32 %n) local_unnamed_addr #0 { @@ -95,8 +119,10 @@ bb: ; Verify that if the pointer escapes, then we do fall back onto using a temp copy. ; CHECK-LABEL: .visible .entry pointer_escapes ; CHECK: .local .align 8 .b8 __local_depot{{.*}} -; CHECK: ld.param.u64 [[result_addr:%rd[0-9]+]], [{{.*}}_param_0] -; CHECK: add.u64 %[[copy_addr:rd[0-9]+]], %SPL, 0; +; CHECK64: ld.param.u64 [[result_addr:%rd[0-9]+]], [{{.*}}_param_0] +; CHECK64: add.u64 %[[copy_addr:rd[0-9]+]], %SPL, 0; +; CHECK32: ld.param.u32 [[result_addr:%r[0-9]+]], [{{.*}}_param_0] +; CHECK32: add.u32 %[[copy_addr:r[0-9]+]], %SPL, 0; ; CHECK-DAG: ld.param.u32 %{{.*}}, [pointer_escapes_param_1+12]; ; CHECK-DAG: ld.param.u32 %{{.*}}, [pointer_escapes_param_1+8]; ; CHECK-DAG: ld.param.u32 %{{.*}}, [pointer_escapes_param_1+4]; @@ -105,8 +131,10 @@ bb: ; CHECK-DAG: st.local.u32 [%[[copy_addr]]+8], ; CHECK-DAG: st.local.u32 [%[[copy_addr]]+4], ; CHECK-DAG: st.local.u32 [%[[copy_addr]]], -; CHECK: cvta.to.global.u64 [[result_addr_g:%rd[0-9]+]], [[result_addr]] -; CHECK: add.s64 %[[copy_w_offset:rd[0-9]+]], %[[copy_addr]], +; CHECK64: cvta.to.global.u64 [[result_addr_g:%rd[0-9]+]], [[result_addr]] +; CHECK64: add.s64 %[[copy_w_offset:rd[0-9]+]], %[[copy_addr]], +; CHECK32: cvta.to.global.u32 [[result_addr_g:%r[0-9]+]], [[result_addr]] +; CHECK32: add.s32 %[[copy_w_offset:r[0-9]+]], %[[copy_addr]], ; CHECK: ld.local.u32 [[value:%r[0-9]+]], [%[[copy_w_offset]]]; ; CHECK st.global.u32 [[[result_addr_g]]], [[value]]; diff --git a/llvm/test/CodeGen/PowerPC/canonical-merge-shuffles.ll b/llvm/test/CodeGen/PowerPC/canonical-merge-shuffles.ll index 0d5bb96e57fde..1c55f4e43d1a7 100644 --- a/llvm/test/CodeGen/PowerPC/canonical-merge-shuffles.ll +++ b/llvm/test/CodeGen/PowerPC/canonical-merge-shuffles.ll @@ -640,24 +640,20 @@ entry: define dso_local <16 x i8> @no_RAUW_in_combine_during_legalize(i32* nocapture readonly %ptr, i32 signext %offset) local_unnamed_addr #0 { ; CHECK-P8-LABEL: no_RAUW_in_combine_during_legalize: ; CHECK-P8: # %bb.0: # %entry -; CHECK-P8-NEXT: addis r5, r2, .LCPI16_0@toc@ha ; CHECK-P8-NEXT: sldi r4, r4, 2 -; CHECK-P8-NEXT: xxlxor v4, v4, v4 -; CHECK-P8-NEXT: addi r5, r5, .LCPI16_0@toc@l -; CHECK-P8-NEXT: lxsiwzx v2, r3, r4 -; CHECK-P8-NEXT: lvx v3, 0, r5 -; CHECK-P8-NEXT: vperm v2, v4, v2, v3 +; CHECK-P8-NEXT: xxlxor v3, v3, v3 +; CHECK-P8-NEXT: lfiwzx f0, r3, r4 +; CHECK-P8-NEXT: xxspltd v2, f0, 0 +; CHECK-P8-NEXT: vmrglb v2, v3, v2 ; CHECK-P8-NEXT: blr ; ; CHECK-P9-LABEL: no_RAUW_in_combine_during_legalize: ; CHECK-P9: # %bb.0: # %entry ; CHECK-P9-NEXT: sldi r4, r4, 2 -; CHECK-P9-NEXT: xxlxor v4, v4, v4 -; CHECK-P9-NEXT: lxsiwzx v2, r3, r4 -; CHECK-P9-NEXT: addis r3, r2, .LCPI16_0@toc@ha -; CHECK-P9-NEXT: addi r3, r3, .LCPI16_0@toc@l -; CHECK-P9-NEXT: lxv v3, 0(r3) -; CHECK-P9-NEXT: vperm v2, v4, v2, v3 +; CHECK-P9-NEXT: xxlxor v3, v3, v3 +; CHECK-P9-NEXT: lfiwzx f0, r3, r4 +; CHECK-P9-NEXT: xxspltd v2, f0, 0 +; CHECK-P9-NEXT: vmrglb v2, v3, v2 ; CHECK-P9-NEXT: blr ; ; CHECK-P9-BE-LABEL: no_RAUW_in_combine_during_legalize: @@ -682,12 +678,9 @@ define dso_local <16 x i8> @no_RAUW_in_combine_during_legalize(i32* nocapture re ; CHECK-P7-LABEL: no_RAUW_in_combine_during_legalize: ; CHECK-P7: # %bb.0: # %entry ; CHECK-P7-NEXT: sldi r4, r4, 2 -; CHECK-P7-NEXT: addi r5, r1, -16 ; CHECK-P7-NEXT: xxlxor v3, v3, v3 -; CHECK-P7-NEXT: lwzx r3, r3, r4 -; CHECK-P7-NEXT: std r3, -16(r1) -; CHECK-P7-NEXT: lxvd2x vs0, 0, r5 -; CHECK-P7-NEXT: xxswapd v2, vs0 +; CHECK-P7-NEXT: lfiwzx f0, r3, r4 +; CHECK-P7-NEXT: xxspltd v2, f0, 0 ; CHECK-P7-NEXT: vmrglb v2, v3, v2 ; CHECK-P7-NEXT: blr entry: @@ -831,8 +824,8 @@ entry: define dso_local void @testByteSplat() #0 { ; CHECK-P8-LABEL: testByteSplat: ; CHECK-P8: # %bb.0: # %entry -; CHECK-P8-NEXT: lbz r3, 0(r3) -; CHECK-P8-NEXT: mtvsrd v2, r3 +; CHECK-P8-NEXT: lbzx r3, 0, r3 +; CHECK-P8-NEXT: mtvsrwz v2, r3 ; CHECK-P8-NEXT: vspltb v2, v2, 7 ; CHECK-P8-NEXT: stvx v2, 0, r3 ; CHECK-P8-NEXT: blr @@ -863,10 +856,9 @@ define dso_local void @testByteSplat() #0 { ; ; CHECK-P7-LABEL: testByteSplat: ; CHECK-P7: # %bb.0: # %entry -; CHECK-P7-NEXT: lbz r3, 0(r3) -; CHECK-P7-NEXT: stb r3, -16(r1) -; CHECK-P7-NEXT: addi r3, r1, -16 -; CHECK-P7-NEXT: lvx v2, 0, r3 +; CHECK-P7-NEXT: lvsr v2, 0, r3 +; CHECK-P7-NEXT: lvx v3, 0, r3 +; CHECK-P7-NEXT: vperm v2, v3, v3, v2 ; CHECK-P7-NEXT: vspltb v2, v2, 15 ; CHECK-P7-NEXT: stvx v2, 0, r3 ; CHECK-P7-NEXT: blr diff --git a/llvm/test/CodeGen/PowerPC/common-chain-aix32.ll b/llvm/test/CodeGen/PowerPC/common-chain-aix32.ll index 9067799709048..71061fef3e390 100644 --- a/llvm/test/CodeGen/PowerPC/common-chain-aix32.ll +++ b/llvm/test/CodeGen/PowerPC/common-chain-aix32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -ppc-asm-full-reg-names -verify-machineinstrs \ +; RUN: llc -ppc-asm-full-reg-names -verify-machineinstrs -ppc-formprep-chain-commoning \ ; RUN: -mtriple=powerpc-ibm-aix-xcoff -mcpu=pwr9 < %s | FileCheck %s ; Test that on 32 bit AIX, the chain commoning still works without crash. diff --git a/llvm/test/CodeGen/PowerPC/common-chain.ll b/llvm/test/CodeGen/PowerPC/common-chain.ll index 231ea4306e683..595b4836f4367 100644 --- a/llvm/test/CodeGen/PowerPC/common-chain.ll +++ b/llvm/test/CodeGen/PowerPC/common-chain.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -ppc-asm-full-reg-names -verify-machineinstrs \ +; RUN: llc -ppc-asm-full-reg-names -verify-machineinstrs -ppc-formprep-chain-commoning \ ; RUN: -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr9 < %s | FileCheck %s ; addresses: diff --git a/llvm/test/CodeGen/PowerPC/f128-truncateNconv.ll b/llvm/test/CodeGen/PowerPC/f128-truncateNconv.ll index 0fcf5c81c4bf0..61c41dcef982a 100644 --- a/llvm/test/CodeGen/PowerPC/f128-truncateNconv.ll +++ b/llvm/test/CodeGen/PowerPC/f128-truncateNconv.ll @@ -1403,3 +1403,96 @@ entry: store i8 %conv, i8* %res, align 1 ret void } + +define void @qpConvppcf128(fp128 %src, ppc_fp128* %dst) { +; CHECK-LABEL: qpConvppcf128: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: mflr r0 +; CHECK-NEXT: .cfi_def_cfa_offset 48 +; CHECK-NEXT: .cfi_offset lr, 16 +; CHECK-NEXT: .cfi_offset r30, -16 +; CHECK-NEXT: std r30, -16(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r0, 16(r1) +; CHECK-NEXT: stdu r1, -48(r1) +; CHECK-NEXT: mr r30, r5 +; CHECK-NEXT: bl __extendkftf2 +; CHECK-NEXT: nop +; CHECK-NEXT: stfd f2, 8(r30) +; CHECK-NEXT: stfd f1, 0(r30) +; CHECK-NEXT: addi r1, r1, 48 +; CHECK-NEXT: ld r0, 16(r1) +; CHECK-NEXT: ld r30, -16(r1) # 8-byte Folded Reload +; CHECK-NEXT: mtlr r0 +; CHECK-NEXT: blr +; +; CHECK-P8-LABEL: qpConvppcf128: +; CHECK-P8: # %bb.0: # %entry +; CHECK-P8-NEXT: mflr r0 +; CHECK-P8-NEXT: .cfi_def_cfa_offset 48 +; CHECK-P8-NEXT: .cfi_offset lr, 16 +; CHECK-P8-NEXT: .cfi_offset r30, -16 +; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill +; CHECK-P8-NEXT: std r0, 16(r1) +; CHECK-P8-NEXT: stdu r1, -48(r1) +; CHECK-P8-NEXT: mr r30, r5 +; CHECK-P8-NEXT: bl __extendkftf2 +; CHECK-P8-NEXT: nop +; CHECK-P8-NEXT: stfd f2, 8(r30) +; CHECK-P8-NEXT: stfd f1, 0(r30) +; CHECK-P8-NEXT: addi r1, r1, 48 +; CHECK-P8-NEXT: ld r0, 16(r1) +; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload +; CHECK-P8-NEXT: mtlr r0 +; CHECK-P8-NEXT: blr +entry: + %res = call ppc_fp128 @llvm.ppc.convert.f128.to.ppcf128(fp128 %src) + store ppc_fp128 %res, ppc_fp128* %dst, align 16 + ret void +} + +define void @ppcf128Convqp(ppc_fp128 %src, fp128* %dst) { +; CHECK-LABEL: ppcf128Convqp: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: mflr r0 +; CHECK-NEXT: .cfi_def_cfa_offset 48 +; CHECK-NEXT: .cfi_offset lr, 16 +; CHECK-NEXT: .cfi_offset r30, -16 +; CHECK-NEXT: std r30, -16(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r0, 16(r1) +; CHECK-NEXT: stdu r1, -48(r1) +; CHECK-NEXT: mr r30, r5 +; CHECK-NEXT: bl __trunctfkf2 +; CHECK-NEXT: nop +; CHECK-NEXT: stxv v2, 0(r30) +; CHECK-NEXT: addi r1, r1, 48 +; CHECK-NEXT: ld r0, 16(r1) +; CHECK-NEXT: ld r30, -16(r1) # 8-byte Folded Reload +; CHECK-NEXT: mtlr r0 +; CHECK-NEXT: blr +; +; CHECK-P8-LABEL: ppcf128Convqp: +; CHECK-P8: # %bb.0: # %entry +; CHECK-P8-NEXT: mflr r0 +; CHECK-P8-NEXT: .cfi_def_cfa_offset 48 +; CHECK-P8-NEXT: .cfi_offset lr, 16 +; CHECK-P8-NEXT: .cfi_offset r30, -16 +; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill +; CHECK-P8-NEXT: std r0, 16(r1) +; CHECK-P8-NEXT: stdu r1, -48(r1) +; CHECK-P8-NEXT: mr r30, r5 +; CHECK-P8-NEXT: bl __trunctfkf2 +; CHECK-P8-NEXT: nop +; CHECK-P8-NEXT: stvx v2, 0, r30 +; CHECK-P8-NEXT: addi r1, r1, 48 +; CHECK-P8-NEXT: ld r0, 16(r1) +; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload +; CHECK-P8-NEXT: mtlr r0 +; CHECK-P8-NEXT: blr +entry: + %res = call fp128 @llvm.ppc.convert.ppcf128.to.f128(ppc_fp128 %src) + store fp128 %res, fp128* %dst, align 16 + ret void +} + +declare ppc_fp128 @llvm.ppc.convert.f128.to.ppcf128(fp128) +declare fp128 @llvm.ppc.convert.ppcf128.to.f128(ppc_fp128) diff --git a/llvm/test/CodeGen/PowerPC/funnel-shift-rot.ll b/llvm/test/CodeGen/PowerPC/funnel-shift-rot.ll index c44304d1c8ed5..0a622fd68d6b3 100644 --- a/llvm/test/CodeGen/PowerPC/funnel-shift-rot.ll +++ b/llvm/test/CodeGen/PowerPC/funnel-shift-rot.ll @@ -87,52 +87,44 @@ define i32 @rotl_i32(i32 %x, i32 %z) { define i64 @rotl_i64(i64 %x, i64 %z) { ; CHECK32_32-LABEL: rotl_i64: ; CHECK32_32: # %bb.0: -; CHECK32_32-NEXT: clrlwi 5, 6, 26 -; CHECK32_32-NEXT: subfic 8, 5, 32 -; CHECK32_32-NEXT: neg 6, 6 -; CHECK32_32-NEXT: slw 7, 3, 5 -; CHECK32_32-NEXT: addi 9, 5, -32 -; CHECK32_32-NEXT: srw 8, 4, 8 -; CHECK32_32-NEXT: clrlwi 6, 6, 26 -; CHECK32_32-NEXT: slw 9, 4, 9 -; CHECK32_32-NEXT: or 7, 7, 8 -; CHECK32_32-NEXT: subfic 8, 6, 32 -; CHECK32_32-NEXT: or 7, 7, 9 -; CHECK32_32-NEXT: addi 9, 6, -32 -; CHECK32_32-NEXT: slw 8, 3, 8 -; CHECK32_32-NEXT: srw 9, 3, 9 -; CHECK32_32-NEXT: srw 3, 3, 6 -; CHECK32_32-NEXT: srw 6, 4, 6 -; CHECK32_32-NEXT: or 6, 6, 8 -; CHECK32_32-NEXT: or 6, 6, 9 -; CHECK32_32-NEXT: slw 4, 4, 5 -; CHECK32_32-NEXT: or 3, 7, 3 -; CHECK32_32-NEXT: or 4, 4, 6 +; CHECK32_32-NEXT: andi. 5, 6, 32 +; CHECK32_32-NEXT: clrlwi 5, 6, 27 +; CHECK32_32-NEXT: subfic 6, 5, 32 +; CHECK32_32-NEXT: bc 12, 2, .LBB4_2 +; CHECK32_32-NEXT: # %bb.1: +; CHECK32_32-NEXT: ori 7, 3, 0 +; CHECK32_32-NEXT: ori 3, 4, 0 +; CHECK32_32-NEXT: b .LBB4_3 +; CHECK32_32-NEXT: .LBB4_2: +; CHECK32_32-NEXT: addi 7, 4, 0 +; CHECK32_32-NEXT: .LBB4_3: +; CHECK32_32-NEXT: srw 4, 7, 6 +; CHECK32_32-NEXT: slw 8, 3, 5 +; CHECK32_32-NEXT: srw 6, 3, 6 +; CHECK32_32-NEXT: slw 5, 7, 5 +; CHECK32_32-NEXT: or 3, 8, 4 +; CHECK32_32-NEXT: or 4, 5, 6 ; CHECK32_32-NEXT: blr ; ; CHECK32_64-LABEL: rotl_i64: ; CHECK32_64: # %bb.0: -; CHECK32_64-NEXT: clrlwi 5, 6, 26 -; CHECK32_64-NEXT: neg 6, 6 -; CHECK32_64-NEXT: subfic 8, 5, 32 -; CHECK32_64-NEXT: slw 7, 3, 5 -; CHECK32_64-NEXT: clrlwi 6, 6, 26 -; CHECK32_64-NEXT: srw 8, 4, 8 -; CHECK32_64-NEXT: addi 9, 5, -32 -; CHECK32_64-NEXT: or 7, 7, 8 -; CHECK32_64-NEXT: subfic 8, 6, 32 -; CHECK32_64-NEXT: slw 5, 4, 5 -; CHECK32_64-NEXT: slw 9, 4, 9 -; CHECK32_64-NEXT: srw 10, 3, 6 -; CHECK32_64-NEXT: srw 4, 4, 6 -; CHECK32_64-NEXT: addi 6, 6, -32 -; CHECK32_64-NEXT: slw 8, 3, 8 -; CHECK32_64-NEXT: srw 3, 3, 6 -; CHECK32_64-NEXT: or 4, 4, 8 -; CHECK32_64-NEXT: or 6, 7, 9 -; CHECK32_64-NEXT: or 4, 4, 3 -; CHECK32_64-NEXT: or 3, 6, 10 -; CHECK32_64-NEXT: or 4, 5, 4 +; CHECK32_64-NEXT: andi. 5, 6, 32 +; CHECK32_64-NEXT: clrlwi 5, 6, 27 +; CHECK32_64-NEXT: bc 12, 2, .LBB4_2 +; CHECK32_64-NEXT: # %bb.1: +; CHECK32_64-NEXT: ori 7, 3, 0 +; CHECK32_64-NEXT: ori 3, 4, 0 +; CHECK32_64-NEXT: b .LBB4_3 +; CHECK32_64-NEXT: .LBB4_2: +; CHECK32_64-NEXT: addi 7, 4, 0 +; CHECK32_64-NEXT: .LBB4_3: +; CHECK32_64-NEXT: subfic 6, 5, 32 +; CHECK32_64-NEXT: srw 4, 7, 6 +; CHECK32_64-NEXT: slw 8, 3, 5 +; CHECK32_64-NEXT: srw 6, 3, 6 +; CHECK32_64-NEXT: slw 5, 7, 5 +; CHECK32_64-NEXT: or 3, 8, 4 +; CHECK32_64-NEXT: or 4, 5, 6 ; CHECK32_64-NEXT: blr ; ; CHECK64-LABEL: rotl_i64: @@ -256,52 +248,44 @@ define i32 @rotr_i32(i32 %x, i32 %z) { define i64 @rotr_i64(i64 %x, i64 %z) { ; CHECK32_32-LABEL: rotr_i64: ; CHECK32_32: # %bb.0: -; CHECK32_32-NEXT: clrlwi 5, 6, 26 -; CHECK32_32-NEXT: subfic 8, 5, 32 -; CHECK32_32-NEXT: neg 6, 6 -; CHECK32_32-NEXT: srw 7, 4, 5 -; CHECK32_32-NEXT: addi 9, 5, -32 -; CHECK32_32-NEXT: slw 8, 3, 8 -; CHECK32_32-NEXT: clrlwi 6, 6, 26 -; CHECK32_32-NEXT: srw 9, 3, 9 -; CHECK32_32-NEXT: or 7, 7, 8 -; CHECK32_32-NEXT: subfic 8, 6, 32 -; CHECK32_32-NEXT: or 7, 7, 9 -; CHECK32_32-NEXT: addi 9, 6, -32 -; CHECK32_32-NEXT: srw 8, 4, 8 -; CHECK32_32-NEXT: slw 9, 4, 9 -; CHECK32_32-NEXT: slw 4, 4, 6 -; CHECK32_32-NEXT: slw 6, 3, 6 -; CHECK32_32-NEXT: or 6, 6, 8 -; CHECK32_32-NEXT: or 6, 6, 9 -; CHECK32_32-NEXT: srw 3, 3, 5 -; CHECK32_32-NEXT: or 4, 7, 4 -; CHECK32_32-NEXT: or 3, 3, 6 +; CHECK32_32-NEXT: andi. 5, 6, 32 +; CHECK32_32-NEXT: clrlwi 5, 6, 27 +; CHECK32_32-NEXT: subfic 6, 5, 32 +; CHECK32_32-NEXT: bc 12, 2, .LBB11_2 +; CHECK32_32-NEXT: # %bb.1: +; CHECK32_32-NEXT: ori 7, 4, 0 +; CHECK32_32-NEXT: b .LBB11_3 +; CHECK32_32-NEXT: .LBB11_2: +; CHECK32_32-NEXT: addi 7, 3, 0 +; CHECK32_32-NEXT: addi 3, 4, 0 +; CHECK32_32-NEXT: .LBB11_3: +; CHECK32_32-NEXT: srw 4, 7, 5 +; CHECK32_32-NEXT: slw 8, 3, 6 +; CHECK32_32-NEXT: srw 5, 3, 5 +; CHECK32_32-NEXT: slw 6, 7, 6 +; CHECK32_32-NEXT: or 3, 8, 4 +; CHECK32_32-NEXT: or 4, 6, 5 ; CHECK32_32-NEXT: blr ; ; CHECK32_64-LABEL: rotr_i64: ; CHECK32_64: # %bb.0: -; CHECK32_64-NEXT: clrlwi 5, 6, 26 -; CHECK32_64-NEXT: neg 6, 6 -; CHECK32_64-NEXT: subfic 8, 5, 32 -; CHECK32_64-NEXT: srw 7, 4, 5 -; CHECK32_64-NEXT: clrlwi 6, 6, 26 -; CHECK32_64-NEXT: slw 8, 3, 8 -; CHECK32_64-NEXT: addi 9, 5, -32 -; CHECK32_64-NEXT: or 7, 7, 8 -; CHECK32_64-NEXT: subfic 8, 6, 32 +; CHECK32_64-NEXT: andi. 5, 6, 32 +; CHECK32_64-NEXT: clrlwi 5, 6, 27 +; CHECK32_64-NEXT: bc 12, 2, .LBB11_2 +; CHECK32_64-NEXT: # %bb.1: +; CHECK32_64-NEXT: ori 7, 4, 0 +; CHECK32_64-NEXT: b .LBB11_3 +; CHECK32_64-NEXT: .LBB11_2: +; CHECK32_64-NEXT: addi 7, 3, 0 +; CHECK32_64-NEXT: addi 3, 4, 0 +; CHECK32_64-NEXT: .LBB11_3: +; CHECK32_64-NEXT: subfic 6, 5, 32 +; CHECK32_64-NEXT: srw 4, 7, 5 +; CHECK32_64-NEXT: slw 8, 3, 6 ; CHECK32_64-NEXT: srw 5, 3, 5 -; CHECK32_64-NEXT: srw 9, 3, 9 -; CHECK32_64-NEXT: slw 10, 4, 6 -; CHECK32_64-NEXT: slw 3, 3, 6 -; CHECK32_64-NEXT: addi 6, 6, -32 -; CHECK32_64-NEXT: srw 8, 4, 8 -; CHECK32_64-NEXT: slw 4, 4, 6 -; CHECK32_64-NEXT: or 3, 3, 8 -; CHECK32_64-NEXT: or 6, 7, 9 -; CHECK32_64-NEXT: or 3, 3, 4 -; CHECK32_64-NEXT: or 4, 6, 10 -; CHECK32_64-NEXT: or 3, 5, 3 +; CHECK32_64-NEXT: slw 6, 7, 6 +; CHECK32_64-NEXT: or 3, 8, 4 +; CHECK32_64-NEXT: or 4, 6, 5 ; CHECK32_64-NEXT: blr ; ; CHECK64-LABEL: rotr_i64: diff --git a/llvm/test/CodeGen/PowerPC/funnel-shift.ll b/llvm/test/CodeGen/PowerPC/funnel-shift.ll index c33904082f238..62b68e0b2cadd 100644 --- a/llvm/test/CodeGen/PowerPC/funnel-shift.ll +++ b/llvm/test/CodeGen/PowerPC/funnel-shift.ll @@ -43,58 +43,47 @@ define i32 @fshl_i32(i32 %x, i32 %y, i32 %z) { define i64 @fshl_i64(i64 %x, i64 %y, i64 %z) { ; CHECK32_32-LABEL: fshl_i64: ; CHECK32_32: # %bb.0: -; CHECK32_32-NEXT: clrlwi 7, 8, 26 -; CHECK32_32-NEXT: not 8, 8 -; CHECK32_32-NEXT: rotlwi 6, 6, 31 -; CHECK32_32-NEXT: subfic 10, 7, 32 -; CHECK32_32-NEXT: srwi 9, 5, 1 +; CHECK32_32-NEXT: andi. 7, 8, 32 +; CHECK32_32-NEXT: clrlwi 7, 8, 27 +; CHECK32_32-NEXT: subfic 8, 7, 32 +; CHECK32_32-NEXT: bc 12, 2, .LBB1_2 +; CHECK32_32-NEXT: # %bb.1: +; CHECK32_32-NEXT: ori 9, 5, 0 +; CHECK32_32-NEXT: ori 3, 4, 0 +; CHECK32_32-NEXT: ori 4, 6, 0 +; CHECK32_32-NEXT: b .LBB1_3 +; CHECK32_32-NEXT: .LBB1_2: +; CHECK32_32-NEXT: addi 9, 4, 0 +; CHECK32_32-NEXT: addi 4, 5, 0 +; CHECK32_32-NEXT: .LBB1_3: +; CHECK32_32-NEXT: srw 5, 9, 8 ; CHECK32_32-NEXT: slw 3, 3, 7 -; CHECK32_32-NEXT: clrlwi 8, 8, 26 -; CHECK32_32-NEXT: rlwimi 6, 5, 31, 0, 0 -; CHECK32_32-NEXT: srw 5, 4, 10 -; CHECK32_32-NEXT: srw 10, 9, 8 -; CHECK32_32-NEXT: srw 6, 6, 8 +; CHECK32_32-NEXT: srw 4, 4, 8 +; CHECK32_32-NEXT: slw 6, 9, 7 ; CHECK32_32-NEXT: or 3, 3, 5 -; CHECK32_32-NEXT: subfic 5, 8, 32 -; CHECK32_32-NEXT: addi 8, 8, -32 -; CHECK32_32-NEXT: slw 5, 9, 5 -; CHECK32_32-NEXT: srw 8, 9, 8 -; CHECK32_32-NEXT: addi 9, 7, -32 -; CHECK32_32-NEXT: slw 9, 4, 9 -; CHECK32_32-NEXT: or 5, 6, 5 -; CHECK32_32-NEXT: or 3, 3, 9 -; CHECK32_32-NEXT: or 5, 5, 8 -; CHECK32_32-NEXT: slw 4, 4, 7 -; CHECK32_32-NEXT: or 3, 3, 10 -; CHECK32_32-NEXT: or 4, 4, 5 +; CHECK32_32-NEXT: or 4, 6, 4 ; CHECK32_32-NEXT: blr ; ; CHECK32_64-LABEL: fshl_i64: ; CHECK32_64: # %bb.0: -; CHECK32_64-NEXT: clrlwi 7, 8, 26 -; CHECK32_64-NEXT: not 8, 8 -; CHECK32_64-NEXT: subfic 9, 7, 32 -; CHECK32_64-NEXT: rotlwi 6, 6, 31 +; CHECK32_64-NEXT: andi. 7, 8, 32 +; CHECK32_64-NEXT: clrlwi 7, 8, 27 +; CHECK32_64-NEXT: bc 12, 2, .LBB1_2 +; CHECK32_64-NEXT: # %bb.1: +; CHECK32_64-NEXT: ori 9, 5, 0 +; CHECK32_64-NEXT: ori 3, 4, 0 +; CHECK32_64-NEXT: ori 5, 6, 0 +; CHECK32_64-NEXT: b .LBB1_3 +; CHECK32_64-NEXT: .LBB1_2: +; CHECK32_64-NEXT: addi 9, 4, 0 +; CHECK32_64-NEXT: .LBB1_3: +; CHECK32_64-NEXT: subfic 8, 7, 32 +; CHECK32_64-NEXT: srw 4, 9, 8 ; CHECK32_64-NEXT: slw 3, 3, 7 -; CHECK32_64-NEXT: clrlwi 8, 8, 26 -; CHECK32_64-NEXT: srw 9, 4, 9 -; CHECK32_64-NEXT: rlwimi 6, 5, 31, 0, 0 -; CHECK32_64-NEXT: srwi 5, 5, 1 -; CHECK32_64-NEXT: addi 10, 7, -32 -; CHECK32_64-NEXT: or 3, 3, 9 -; CHECK32_64-NEXT: subfic 9, 8, 32 -; CHECK32_64-NEXT: slw 7, 4, 7 -; CHECK32_64-NEXT: slw 4, 4, 10 -; CHECK32_64-NEXT: srw 10, 5, 8 -; CHECK32_64-NEXT: srw 6, 6, 8 -; CHECK32_64-NEXT: addi 8, 8, -32 -; CHECK32_64-NEXT: slw 9, 5, 9 ; CHECK32_64-NEXT: srw 5, 5, 8 -; CHECK32_64-NEXT: or 6, 6, 9 +; CHECK32_64-NEXT: slw 6, 9, 7 ; CHECK32_64-NEXT: or 3, 3, 4 ; CHECK32_64-NEXT: or 4, 6, 5 -; CHECK32_64-NEXT: or 3, 3, 10 -; CHECK32_64-NEXT: or 4, 7, 4 ; CHECK32_64-NEXT: blr ; ; CHECK64-LABEL: fshl_i64: @@ -112,387 +101,128 @@ define i64 @fshl_i64(i64 %x, i64 %y, i64 %z) { define i128 @fshl_i128(i128 %x, i128 %y, i128 %z) nounwind { ; CHECK32_32-LABEL: fshl_i128: ; CHECK32_32: # %bb.0: -; CHECK32_32-NEXT: stwu 1, -64(1) -; CHECK32_32-NEXT: lwz 0, 84(1) -; CHECK32_32-NEXT: rotlwi 12, 8, 31 -; CHECK32_32-NEXT: srwi 11, 7, 1 -; CHECK32_32-NEXT: rlwimi 12, 7, 31, 0, 0 -; CHECK32_32-NEXT: andi. 7, 0, 127 -; CHECK32_32-NEXT: stw 27, 44(1) # 4-byte Folded Spill -; CHECK32_32-NEXT: rotlwi 10, 10, 31 -; CHECK32_32-NEXT: stw 30, 56(1) # 4-byte Folded Spill -; CHECK32_32-NEXT: rotlwi 30, 9, 31 -; CHECK32_32-NEXT: subfic 27, 7, 32 -; CHECK32_32-NEXT: stw 22, 24(1) # 4-byte Folded Spill -; CHECK32_32-NEXT: rlwimi 10, 9, 31, 0, 0 -; CHECK32_32-NEXT: stw 25, 36(1) # 4-byte Folded Spill -; CHECK32_32-NEXT: rlwimi 30, 8, 31, 0, 0 -; CHECK32_32-NEXT: stw 28, 48(1) # 4-byte Folded Spill -; CHECK32_32-NEXT: not 8, 0 -; CHECK32_32-NEXT: subfic 9, 7, 96 -; CHECK32_32-NEXT: addi 0, 7, -64 -; CHECK32_32-NEXT: slw 28, 3, 7 -; CHECK32_32-NEXT: subfic 25, 7, 64 -; CHECK32_32-NEXT: srw 22, 4, 27 -; CHECK32_32-NEXT: stw 20, 16(1) # 4-byte Folded Spill -; CHECK32_32-NEXT: srw 9, 6, 9 -; CHECK32_32-NEXT: stw 23, 28(1) # 4-byte Folded Spill -; CHECK32_32-NEXT: slw 23, 5, 0 -; CHECK32_32-NEXT: stw 29, 52(1) # 4-byte Folded Spill -; CHECK32_32-NEXT: addi 29, 7, -96 -; CHECK32_32-NEXT: srw 20, 5, 25 -; CHECK32_32-NEXT: or 28, 28, 22 -; CHECK32_32-NEXT: srw 22, 6, 25 -; CHECK32_32-NEXT: subfic 25, 25, 32 -; CHECK32_32-NEXT: stw 24, 32(1) # 4-byte Folded Spill +; CHECK32_32-NEXT: lwz 11, 20(1) +; CHECK32_32-NEXT: andi. 12, 11, 64 ; CHECK32_32-NEXT: mcrf 1, 0 -; CHECK32_32-NEXT: stw 26, 40(1) # 4-byte Folded Spill -; CHECK32_32-NEXT: addi 26, 7, -32 -; CHECK32_32-NEXT: andi. 8, 8, 127 -; CHECK32_32-NEXT: slw 24, 5, 7 -; CHECK32_32-NEXT: slw 29, 6, 29 -; CHECK32_32-NEXT: or 9, 23, 9 -; CHECK32_32-NEXT: slw 25, 5, 25 -; CHECK32_32-NEXT: srw 5, 5, 27 -; CHECK32_32-NEXT: srw 27, 6, 27 -; CHECK32_32-NEXT: stw 21, 20(1) # 4-byte Folded Spill -; CHECK32_32-NEXT: slw 21, 4, 26 -; CHECK32_32-NEXT: subfic 23, 8, 32 -; CHECK32_32-NEXT: or 27, 24, 27 -; CHECK32_32-NEXT: subfic 24, 8, 96 -; CHECK32_32-NEXT: or 9, 9, 29 -; CHECK32_32-NEXT: addi 29, 8, -64 -; CHECK32_32-NEXT: or 25, 22, 25 -; CHECK32_32-NEXT: stw 19, 12(1) # 4-byte Folded Spill -; CHECK32_32-NEXT: srw 19, 12, 8 -; CHECK32_32-NEXT: or 28, 28, 21 -; CHECK32_32-NEXT: slw 21, 11, 23 -; CHECK32_32-NEXT: slw 24, 11, 24 -; CHECK32_32-NEXT: srw 22, 12, 29 -; CHECK32_32-NEXT: slw 26, 6, 26 -; CHECK32_32-NEXT: or 5, 25, 5 -; CHECK32_32-NEXT: addi 25, 8, -96 -; CHECK32_32-NEXT: or 21, 19, 21 -; CHECK32_32-NEXT: srw 19, 10, 8 -; CHECK32_32-NEXT: or 24, 22, 24 -; CHECK32_32-NEXT: slw 22, 30, 23 -; CHECK32_32-NEXT: or 27, 27, 26 -; CHECK32_32-NEXT: addi 26, 8, -32 -; CHECK32_32-NEXT: srw 25, 11, 25 -; CHECK32_32-NEXT: or 22, 19, 22 -; CHECK32_32-NEXT: or 28, 28, 20 -; CHECK32_32-NEXT: srw 20, 11, 26 -; CHECK32_32-NEXT: or 25, 24, 25 -; CHECK32_32-NEXT: subfic 24, 8, 64 -; CHECK32_32-NEXT: srw 26, 30, 26 -; CHECK32_32-NEXT: or 26, 22, 26 -; CHECK32_32-NEXT: subfic 22, 24, 32 -; CHECK32_32-NEXT: slw 23, 12, 23 -; CHECK32_32-NEXT: srw 22, 12, 22 -; CHECK32_32-NEXT: slw 12, 12, 24 -; CHECK32_32-NEXT: slw 24, 11, 24 -; CHECK32_32-NEXT: cmplwi 5, 7, 64 -; CHECK32_32-NEXT: or 24, 24, 22 -; CHECK32_32-NEXT: slw 22, 6, 0 -; CHECK32_32-NEXT: slw 6, 6, 7 -; CHECK32_32-NEXT: slw 7, 4, 7 -; CHECK32_32-NEXT: srw 29, 11, 29 -; CHECK32_32-NEXT: srw 11, 11, 8 -; CHECK32_32-NEXT: cmplwi 6, 8, 64 -; CHECK32_32-NEXT: srw 8, 30, 8 -; CHECK32_32-NEXT: or 5, 7, 5 -; CHECK32_32-NEXT: or 7, 26, 12 -; CHECK32_32-NEXT: or 12, 24, 23 -; CHECK32_32-NEXT: bc 12, 20, .LBB2_1 -; CHECK32_32-NEXT: b .LBB2_2 -; CHECK32_32-NEXT: .LBB2_1: -; CHECK32_32-NEXT: addi 9, 28, 0 +; CHECK32_32-NEXT: andi. 12, 11, 32 +; CHECK32_32-NEXT: clrlwi 11, 11, 27 +; CHECK32_32-NEXT: bc 12, 6, .LBB2_2 +; CHECK32_32-NEXT: # %bb.1: +; CHECK32_32-NEXT: ori 4, 6, 0 +; CHECK32_32-NEXT: ori 12, 7, 0 +; CHECK32_32-NEXT: ori 3, 5, 0 +; CHECK32_32-NEXT: ori 5, 8, 0 +; CHECK32_32-NEXT: ori 6, 9, 0 +; CHECK32_32-NEXT: ori 7, 10, 0 +; CHECK32_32-NEXT: b .LBB2_3 ; CHECK32_32-NEXT: .LBB2_2: -; CHECK32_32-NEXT: li 28, 0 -; CHECK32_32-NEXT: bc 12, 20, .LBB2_4 -; CHECK32_32-NEXT: # %bb.3: -; CHECK32_32-NEXT: ori 5, 22, 0 -; CHECK32_32-NEXT: b .LBB2_4 -; CHECK32_32-NEXT: .LBB2_4: -; CHECK32_32-NEXT: bc 12, 24, .LBB2_6 -; CHECK32_32-NEXT: # %bb.5: -; CHECK32_32-NEXT: ori 7, 25, 0 +; CHECK32_32-NEXT: addi 12, 5, 0 +; CHECK32_32-NEXT: addi 5, 6, 0 +; CHECK32_32-NEXT: addi 6, 7, 0 +; CHECK32_32-NEXT: addi 7, 8, 0 +; CHECK32_32-NEXT: .LBB2_3: +; CHECK32_32-NEXT: subfic 8, 11, 32 +; CHECK32_32-NEXT: bc 12, 2, .LBB2_5 +; CHECK32_32-NEXT: # %bb.4: +; CHECK32_32-NEXT: ori 9, 12, 0 +; CHECK32_32-NEXT: ori 3, 4, 0 +; CHECK32_32-NEXT: ori 4, 5, 0 +; CHECK32_32-NEXT: ori 5, 6, 0 +; CHECK32_32-NEXT: ori 6, 7, 0 ; CHECK32_32-NEXT: b .LBB2_6 +; CHECK32_32-NEXT: .LBB2_5: +; CHECK32_32-NEXT: addi 9, 4, 0 +; CHECK32_32-NEXT: addi 4, 12, 0 ; CHECK32_32-NEXT: .LBB2_6: -; CHECK32_32-NEXT: or 8, 8, 12 -; CHECK32_32-NEXT: or 21, 21, 20 -; CHECK32_32-NEXT: bc 12, 20, .LBB2_8 -; CHECK32_32-NEXT: # %bb.7: -; CHECK32_32-NEXT: ori 6, 28, 0 -; CHECK32_32-NEXT: b .LBB2_8 -; CHECK32_32-NEXT: .LBB2_8: -; CHECK32_32-NEXT: bc 12, 6, .LBB2_10 -; CHECK32_32-NEXT: # %bb.9: -; CHECK32_32-NEXT: ori 4, 5, 0 -; CHECK32_32-NEXT: b .LBB2_10 -; CHECK32_32-NEXT: .LBB2_10: -; CHECK32_32-NEXT: bc 12, 2, .LBB2_12 -; CHECK32_32-NEXT: # %bb.11: -; CHECK32_32-NEXT: ori 5, 7, 0 -; CHECK32_32-NEXT: b .LBB2_13 -; CHECK32_32-NEXT: .LBB2_12: -; CHECK32_32-NEXT: addi 5, 10, 0 -; CHECK32_32-NEXT: .LBB2_13: -; CHECK32_32-NEXT: bc 12, 24, .LBB2_15 -; CHECK32_32-NEXT: # %bb.14: -; CHECK32_32-NEXT: ori 7, 29, 0 -; CHECK32_32-NEXT: ori 11, 28, 0 -; CHECK32_32-NEXT: ori 0, 28, 0 -; CHECK32_32-NEXT: b .LBB2_16 -; CHECK32_32-NEXT: .LBB2_15: -; CHECK32_32-NEXT: addi 7, 8, 0 -; CHECK32_32-NEXT: addi 0, 21, 0 -; CHECK32_32-NEXT: .LBB2_16: -; CHECK32_32-NEXT: bc 12, 6, .LBB2_18 -; CHECK32_32-NEXT: # %bb.17: -; CHECK32_32-NEXT: ori 3, 9, 0 -; CHECK32_32-NEXT: b .LBB2_18 -; CHECK32_32-NEXT: .LBB2_18: -; CHECK32_32-NEXT: lwz 29, 52(1) # 4-byte Folded Reload -; CHECK32_32-NEXT: or 6, 6, 5 -; CHECK32_32-NEXT: bc 12, 20, .LBB2_20 -; CHECK32_32-NEXT: # %bb.19: -; CHECK32_32-NEXT: ori 5, 28, 0 -; CHECK32_32-NEXT: b .LBB2_21 -; CHECK32_32-NEXT: .LBB2_20: -; CHECK32_32-NEXT: addi 5, 27, 0 -; CHECK32_32-NEXT: .LBB2_21: -; CHECK32_32-NEXT: bc 12, 2, .LBB2_22 -; CHECK32_32-NEXT: b .LBB2_23 -; CHECK32_32-NEXT: .LBB2_22: -; CHECK32_32-NEXT: addi 7, 30, 0 -; CHECK32_32-NEXT: .LBB2_23: -; CHECK32_32-NEXT: or 3, 3, 11 -; CHECK32_32-NEXT: or 4, 4, 0 -; CHECK32_32-NEXT: or 5, 5, 7 -; CHECK32_32-NEXT: lwz 30, 56(1) # 4-byte Folded Reload -; CHECK32_32-NEXT: lwz 28, 48(1) # 4-byte Folded Reload -; CHECK32_32-NEXT: lwz 27, 44(1) # 4-byte Folded Reload -; CHECK32_32-NEXT: lwz 26, 40(1) # 4-byte Folded Reload -; CHECK32_32-NEXT: lwz 25, 36(1) # 4-byte Folded Reload -; CHECK32_32-NEXT: lwz 24, 32(1) # 4-byte Folded Reload -; CHECK32_32-NEXT: lwz 23, 28(1) # 4-byte Folded Reload -; CHECK32_32-NEXT: lwz 22, 24(1) # 4-byte Folded Reload -; CHECK32_32-NEXT: lwz 21, 20(1) # 4-byte Folded Reload -; CHECK32_32-NEXT: lwz 20, 16(1) # 4-byte Folded Reload -; CHECK32_32-NEXT: lwz 19, 12(1) # 4-byte Folded Reload -; CHECK32_32-NEXT: addi 1, 1, 64 +; CHECK32_32-NEXT: srw 7, 9, 8 +; CHECK32_32-NEXT: slw 3, 3, 11 +; CHECK32_32-NEXT: srw 10, 4, 8 +; CHECK32_32-NEXT: slw 9, 9, 11 +; CHECK32_32-NEXT: srw 12, 5, 8 +; CHECK32_32-NEXT: slw 0, 4, 11 +; CHECK32_32-NEXT: srw 6, 6, 8 +; CHECK32_32-NEXT: slw 8, 5, 11 +; CHECK32_32-NEXT: or 3, 3, 7 +; CHECK32_32-NEXT: or 4, 9, 10 +; CHECK32_32-NEXT: or 5, 0, 12 +; CHECK32_32-NEXT: or 6, 8, 6 ; CHECK32_32-NEXT: blr ; ; CHECK32_64-LABEL: fshl_i128: ; CHECK32_64: # %bb.0: -; CHECK32_64-NEXT: stwu 1, -64(1) -; CHECK32_64-NEXT: lwz 12, 84(1) -; CHECK32_64-NEXT: rotlwi 11, 8, 31 -; CHECK32_64-NEXT: rotlwi 10, 10, 31 -; CHECK32_64-NEXT: rlwimi 10, 9, 31, 0, 0 -; CHECK32_64-NEXT: rlwimi 11, 7, 31, 0, 0 -; CHECK32_64-NEXT: stw 30, 56(1) # 4-byte Folded Spill -; CHECK32_64-NEXT: rotlwi 30, 9, 31 -; CHECK32_64-NEXT: stw 27, 44(1) # 4-byte Folded Spill -; CHECK32_64-NEXT: not 9, 12 -; CHECK32_64-NEXT: rlwimi 30, 8, 31, 0, 0 -; CHECK32_64-NEXT: andi. 8, 12, 127 -; CHECK32_64-NEXT: stw 22, 24(1) # 4-byte Folded Spill +; CHECK32_64-NEXT: stwu 1, -16(1) +; CHECK32_64-NEXT: lwz 11, 36(1) +; CHECK32_64-NEXT: andi. 12, 11, 64 +; CHECK32_64-NEXT: stw 30, 8(1) # 4-byte Folded Spill ; CHECK32_64-NEXT: mcrf 1, 0 -; CHECK32_64-NEXT: subfic 12, 8, 96 -; CHECK32_64-NEXT: addi 0, 8, -64 -; CHECK32_64-NEXT: subfic 27, 8, 32 -; CHECK32_64-NEXT: stw 23, 28(1) # 4-byte Folded Spill -; CHECK32_64-NEXT: andi. 9, 9, 127 -; CHECK32_64-NEXT: srw 12, 6, 12 -; CHECK32_64-NEXT: stw 25, 36(1) # 4-byte Folded Spill -; CHECK32_64-NEXT: subfic 25, 8, 64 -; CHECK32_64-NEXT: slw 23, 5, 0 -; CHECK32_64-NEXT: stw 26, 40(1) # 4-byte Folded Spill -; CHECK32_64-NEXT: addi 26, 8, -32 -; CHECK32_64-NEXT: srw 22, 4, 27 -; CHECK32_64-NEXT: srwi 7, 7, 1 -; CHECK32_64-NEXT: or 12, 23, 12 -; CHECK32_64-NEXT: stw 28, 48(1) # 4-byte Folded Spill -; CHECK32_64-NEXT: slw 28, 3, 8 -; CHECK32_64-NEXT: srw 23, 6, 25 -; CHECK32_64-NEXT: stw 18, 8(1) # 4-byte Folded Spill -; CHECK32_64-NEXT: subfic 18, 9, 32 -; CHECK32_64-NEXT: or 28, 28, 22 -; CHECK32_64-NEXT: srw 22, 5, 27 -; CHECK32_64-NEXT: srw 27, 6, 27 -; CHECK32_64-NEXT: stw 20, 16(1) # 4-byte Folded Spill -; CHECK32_64-NEXT: srw 20, 5, 25 -; CHECK32_64-NEXT: subfic 25, 25, 32 -; CHECK32_64-NEXT: stw 21, 20(1) # 4-byte Folded Spill -; CHECK32_64-NEXT: slw 21, 4, 26 -; CHECK32_64-NEXT: slw 26, 6, 26 -; CHECK32_64-NEXT: or 28, 28, 21 -; CHECK32_64-NEXT: slw 21, 7, 18 -; CHECK32_64-NEXT: stw 24, 32(1) # 4-byte Folded Spill -; CHECK32_64-NEXT: slw 24, 5, 8 -; CHECK32_64-NEXT: slw 5, 5, 25 -; CHECK32_64-NEXT: stw 29, 52(1) # 4-byte Folded Spill -; CHECK32_64-NEXT: addi 29, 8, -96 -; CHECK32_64-NEXT: subfic 25, 9, 96 -; CHECK32_64-NEXT: slw 29, 6, 29 -; CHECK32_64-NEXT: or 27, 24, 27 -; CHECK32_64-NEXT: stw 19, 12(1) # 4-byte Folded Spill -; CHECK32_64-NEXT: srw 19, 11, 9 -; CHECK32_64-NEXT: addi 24, 9, -64 -; CHECK32_64-NEXT: or 12, 12, 29 -; CHECK32_64-NEXT: srw 29, 10, 9 -; CHECK32_64-NEXT: slw 25, 7, 25 -; CHECK32_64-NEXT: or 21, 19, 21 -; CHECK32_64-NEXT: srw 19, 11, 24 -; CHECK32_64-NEXT: or 5, 23, 5 -; CHECK32_64-NEXT: slw 23, 30, 18 -; CHECK32_64-NEXT: or 27, 27, 26 -; CHECK32_64-NEXT: addi 26, 9, -96 -; CHECK32_64-NEXT: or 25, 19, 25 -; CHECK32_64-NEXT: lwz 19, 12(1) # 4-byte Folded Reload -; CHECK32_64-NEXT: or 29, 29, 23 -; CHECK32_64-NEXT: addi 23, 9, -32 -; CHECK32_64-NEXT: srw 26, 7, 26 -; CHECK32_64-NEXT: or 28, 28, 20 -; CHECK32_64-NEXT: srw 20, 7, 23 -; CHECK32_64-NEXT: or 26, 25, 26 -; CHECK32_64-NEXT: subfic 25, 9, 64 -; CHECK32_64-NEXT: srw 23, 30, 23 -; CHECK32_64-NEXT: or 29, 29, 23 -; CHECK32_64-NEXT: subfic 23, 25, 32 -; CHECK32_64-NEXT: or 5, 5, 22 -; CHECK32_64-NEXT: slw 22, 11, 18 -; CHECK32_64-NEXT: lwz 18, 8(1) # 4-byte Folded Reload -; CHECK32_64-NEXT: srw 23, 11, 23 -; CHECK32_64-NEXT: slw 11, 11, 25 -; CHECK32_64-NEXT: slw 25, 7, 25 -; CHECK32_64-NEXT: cmplwi 5, 8, 64 -; CHECK32_64-NEXT: bc 12, 20, .LBB2_1 -; CHECK32_64-NEXT: b .LBB2_2 -; CHECK32_64-NEXT: .LBB2_1: -; CHECK32_64-NEXT: addi 12, 28, 0 +; CHECK32_64-NEXT: clrlwi 12, 11, 27 +; CHECK32_64-NEXT: andi. 11, 11, 32 +; CHECK32_64-NEXT: bc 12, 6, .LBB2_2 +; CHECK32_64-NEXT: # %bb.1: +; CHECK32_64-NEXT: ori 4, 6, 0 +; CHECK32_64-NEXT: ori 30, 7, 0 +; CHECK32_64-NEXT: ori 3, 5, 0 +; CHECK32_64-NEXT: ori 7, 9, 0 +; CHECK32_64-NEXT: b .LBB2_3 ; CHECK32_64-NEXT: .LBB2_2: -; CHECK32_64-NEXT: lwz 28, 48(1) # 4-byte Folded Reload -; CHECK32_64-NEXT: or 25, 25, 23 -; CHECK32_64-NEXT: bc 12, 6, .LBB2_4 -; CHECK32_64-NEXT: # %bb.3: -; CHECK32_64-NEXT: ori 3, 12, 0 -; CHECK32_64-NEXT: b .LBB2_4 -; CHECK32_64-NEXT: .LBB2_4: -; CHECK32_64-NEXT: slw 23, 6, 0 -; CHECK32_64-NEXT: slw 6, 6, 8 -; CHECK32_64-NEXT: slw 8, 4, 8 -; CHECK32_64-NEXT: cmplwi 6, 9, 64 -; CHECK32_64-NEXT: or 5, 8, 5 -; CHECK32_64-NEXT: bc 12, 20, .LBB2_6 -; CHECK32_64-NEXT: # %bb.5: -; CHECK32_64-NEXT: ori 5, 23, 0 +; CHECK32_64-NEXT: addi 30, 5, 0 +; CHECK32_64-NEXT: .LBB2_3: +; CHECK32_64-NEXT: bc 12, 2, .LBB2_5 +; CHECK32_64-NEXT: # %bb.4: +; CHECK32_64-NEXT: ori 5, 30, 0 +; CHECK32_64-NEXT: ori 3, 4, 0 ; CHECK32_64-NEXT: b .LBB2_6 +; CHECK32_64-NEXT: .LBB2_5: +; CHECK32_64-NEXT: addi 5, 4, 0 ; CHECK32_64-NEXT: .LBB2_6: -; CHECK32_64-NEXT: lwz 23, 28(1) # 4-byte Folded Reload -; CHECK32_64-NEXT: li 8, 0 -; CHECK32_64-NEXT: srw 24, 7, 24 ; CHECK32_64-NEXT: bc 12, 6, .LBB2_8 ; CHECK32_64-NEXT: # %bb.7: -; CHECK32_64-NEXT: ori 4, 5, 0 -; CHECK32_64-NEXT: b .LBB2_8 +; CHECK32_64-NEXT: ori 4, 8, 0 +; CHECK32_64-NEXT: ori 8, 10, 0 +; CHECK32_64-NEXT: b .LBB2_9 ; CHECK32_64-NEXT: .LBB2_8: -; CHECK32_64-NEXT: bc 12, 20, .LBB2_10 -; CHECK32_64-NEXT: # %bb.9: -; CHECK32_64-NEXT: ori 6, 8, 0 -; CHECK32_64-NEXT: b .LBB2_10 -; CHECK32_64-NEXT: .LBB2_10: -; CHECK32_64-NEXT: srw 7, 7, 9 -; CHECK32_64-NEXT: srw 9, 30, 9 -; CHECK32_64-NEXT: bc 12, 24, .LBB2_12 -; CHECK32_64-NEXT: # %bb.11: +; CHECK32_64-NEXT: addi 4, 6, 0 +; CHECK32_64-NEXT: .LBB2_9: +; CHECK32_64-NEXT: subfic 11, 12, 32 +; CHECK32_64-NEXT: bc 12, 2, .LBB2_11 +; CHECK32_64-NEXT: # %bb.10: +; CHECK32_64-NEXT: ori 0, 4, 0 +; CHECK32_64-NEXT: ori 4, 7, 0 ; CHECK32_64-NEXT: ori 7, 8, 0 ; CHECK32_64-NEXT: b .LBB2_12 +; CHECK32_64-NEXT: .LBB2_11: +; CHECK32_64-NEXT: addi 0, 30, 0 ; CHECK32_64-NEXT: .LBB2_12: -; CHECK32_64-NEXT: or 0, 25, 22 -; CHECK32_64-NEXT: or 11, 29, 11 -; CHECK32_64-NEXT: lwz 29, 52(1) # 4-byte Folded Reload -; CHECK32_64-NEXT: bc 12, 24, .LBB2_14 -; CHECK32_64-NEXT: # %bb.13: -; CHECK32_64-NEXT: ori 5, 26, 0 -; CHECK32_64-NEXT: b .LBB2_15 -; CHECK32_64-NEXT: .LBB2_14: -; CHECK32_64-NEXT: addi 5, 11, 0 -; CHECK32_64-NEXT: .LBB2_15: -; CHECK32_64-NEXT: or 9, 9, 0 -; CHECK32_64-NEXT: or 21, 21, 20 -; CHECK32_64-NEXT: bc 12, 2, .LBB2_16 -; CHECK32_64-NEXT: b .LBB2_17 -; CHECK32_64-NEXT: .LBB2_16: -; CHECK32_64-NEXT: addi 5, 10, 0 -; CHECK32_64-NEXT: .LBB2_17: -; CHECK32_64-NEXT: bc 12, 24, .LBB2_19 -; CHECK32_64-NEXT: # %bb.18: -; CHECK32_64-NEXT: ori 0, 8, 0 -; CHECK32_64-NEXT: b .LBB2_20 -; CHECK32_64-NEXT: .LBB2_19: -; CHECK32_64-NEXT: addi 0, 21, 0 -; CHECK32_64-NEXT: .LBB2_20: -; CHECK32_64-NEXT: bc 12, 20, .LBB2_21 -; CHECK32_64-NEXT: b .LBB2_22 -; CHECK32_64-NEXT: .LBB2_21: -; CHECK32_64-NEXT: addi 8, 27, 0 -; CHECK32_64-NEXT: .LBB2_22: -; CHECK32_64-NEXT: lwz 27, 44(1) # 4-byte Folded Reload -; CHECK32_64-NEXT: or 3, 3, 7 -; CHECK32_64-NEXT: bc 12, 24, .LBB2_24 -; CHECK32_64-NEXT: # %bb.23: -; CHECK32_64-NEXT: ori 7, 24, 0 -; CHECK32_64-NEXT: b .LBB2_25 -; CHECK32_64-NEXT: .LBB2_24: -; CHECK32_64-NEXT: addi 7, 9, 0 -; CHECK32_64-NEXT: .LBB2_25: -; CHECK32_64-NEXT: or 4, 4, 0 -; CHECK32_64-NEXT: bc 12, 2, .LBB2_26 -; CHECK32_64-NEXT: b .LBB2_27 -; CHECK32_64-NEXT: .LBB2_26: -; CHECK32_64-NEXT: addi 7, 30, 0 -; CHECK32_64-NEXT: .LBB2_27: -; CHECK32_64-NEXT: or 6, 6, 5 -; CHECK32_64-NEXT: or 5, 8, 7 -; CHECK32_64-NEXT: lwz 30, 56(1) # 4-byte Folded Reload -; CHECK32_64-NEXT: lwz 26, 40(1) # 4-byte Folded Reload -; CHECK32_64-NEXT: lwz 25, 36(1) # 4-byte Folded Reload -; CHECK32_64-NEXT: lwz 24, 32(1) # 4-byte Folded Reload -; CHECK32_64-NEXT: lwz 22, 24(1) # 4-byte Folded Reload -; CHECK32_64-NEXT: lwz 21, 20(1) # 4-byte Folded Reload -; CHECK32_64-NEXT: lwz 20, 16(1) # 4-byte Folded Reload -; CHECK32_64-NEXT: addi 1, 1, 64 +; CHECK32_64-NEXT: srw 6, 5, 11 +; CHECK32_64-NEXT: lwz 30, 8(1) # 4-byte Folded Reload +; CHECK32_64-NEXT: slw 3, 3, 12 +; CHECK32_64-NEXT: srw 9, 0, 11 +; CHECK32_64-NEXT: slw 5, 5, 12 +; CHECK32_64-NEXT: srw 10, 4, 11 +; CHECK32_64-NEXT: slw 0, 0, 12 +; CHECK32_64-NEXT: srw 7, 7, 11 +; CHECK32_64-NEXT: slw 8, 4, 12 +; CHECK32_64-NEXT: or 3, 3, 6 +; CHECK32_64-NEXT: or 4, 5, 9 +; CHECK32_64-NEXT: or 5, 0, 10 +; CHECK32_64-NEXT: or 6, 8, 7 +; CHECK32_64-NEXT: addi 1, 1, 16 ; CHECK32_64-NEXT: blr ; ; CHECK64-LABEL: fshl_i128: ; CHECK64: # %bb.0: -; CHECK64-NEXT: clrlwi 8, 7, 25 -; CHECK64-NEXT: rotldi 5, 5, 63 -; CHECK64-NEXT: not 7, 7 -; CHECK64-NEXT: rldicl 9, 6, 63, 1 -; CHECK64-NEXT: subfic 10, 8, 64 -; CHECK64-NEXT: addi 11, 8, -64 -; CHECK64-NEXT: rldimi 5, 6, 63, 0 -; CHECK64-NEXT: clrlwi 6, 7, 25 -; CHECK64-NEXT: srd 7, 3, 10 -; CHECK64-NEXT: sld 10, 3, 11 -; CHECK64-NEXT: subfic 11, 6, 64 -; CHECK64-NEXT: addi 12, 6, -64 -; CHECK64-NEXT: sld 4, 4, 8 -; CHECK64-NEXT: srd 5, 5, 6 -; CHECK64-NEXT: sld 11, 9, 11 -; CHECK64-NEXT: or 4, 4, 7 -; CHECK64-NEXT: or 5, 5, 11 -; CHECK64-NEXT: srd 7, 9, 12 -; CHECK64-NEXT: or 4, 4, 10 -; CHECK64-NEXT: srd 6, 9, 6 -; CHECK64-NEXT: or 5, 5, 7 -; CHECK64-NEXT: sld 3, 3, 8 -; CHECK64-NEXT: or 4, 4, 6 -; CHECK64-NEXT: or 3, 3, 5 +; CHECK64-NEXT: andi. 8, 7, 64 +; CHECK64-NEXT: clrlwi 7, 7, 26 +; CHECK64-NEXT: iseleq 5, 6, 5 +; CHECK64-NEXT: subfic 8, 7, 64 +; CHECK64-NEXT: iseleq 6, 3, 6 +; CHECK64-NEXT: iseleq 3, 4, 3 +; CHECK64-NEXT: srd 4, 5, 8 +; CHECK64-NEXT: sld 5, 6, 7 +; CHECK64-NEXT: srd 6, 6, 8 +; CHECK64-NEXT: sld 7, 3, 7 +; CHECK64-NEXT: or 3, 5, 4 +; CHECK64-NEXT: or 4, 7, 6 ; CHECK64-NEXT: blr %f = call i128 @llvm.fshl.i128(i128 %x, i128 %y, i128 %z) ret i128 %f @@ -525,31 +255,29 @@ define i37 @fshl_i37(i37 %x, i37 %y, i37 %z) { ; CHECK32_32-NEXT: li 5, 0 ; CHECK32_32-NEXT: li 6, 37 ; CHECK32_32-NEXT: bl __umoddi3 -; CHECK32_32-NEXT: clrlwi 6, 4, 26 -; CHECK32_32-NEXT: srwi 3, 30, 6 -; CHECK32_32-NEXT: not 4, 4 -; CHECK32_32-NEXT: subfic 8, 6, 32 -; CHECK32_32-NEXT: slwi 5, 30, 26 -; CHECK32_32-NEXT: rlwimi 3, 29, 26, 1, 5 -; CHECK32_32-NEXT: slw 7, 27, 6 -; CHECK32_32-NEXT: clrlwi 4, 4, 26 -; CHECK32_32-NEXT: srw 8, 28, 8 -; CHECK32_32-NEXT: srw 9, 3, 4 -; CHECK32_32-NEXT: srw 5, 5, 4 -; CHECK32_32-NEXT: or 7, 7, 8 -; CHECK32_32-NEXT: subfic 8, 4, 32 -; CHECK32_32-NEXT: addi 4, 4, -32 -; CHECK32_32-NEXT: slw 8, 3, 8 -; CHECK32_32-NEXT: srw 4, 3, 4 -; CHECK32_32-NEXT: addi 3, 6, -32 -; CHECK32_32-NEXT: slw 3, 28, 3 -; CHECK32_32-NEXT: or 5, 5, 8 -; CHECK32_32-NEXT: or 3, 7, 3 -; CHECK32_32-NEXT: or 4, 5, 4 -; CHECK32_32-NEXT: slw 5, 28, 6 -; CHECK32_32-NEXT: or 3, 3, 9 -; CHECK32_32-NEXT: or 4, 5, 4 +; CHECK32_32-NEXT: rotlwi 3, 30, 27 +; CHECK32_32-NEXT: slwi 5, 30, 27 +; CHECK32_32-NEXT: andi. 6, 4, 32 +; CHECK32_32-NEXT: rlwimi 3, 29, 27, 0, 4 +; CHECK32_32-NEXT: clrlwi 4, 4, 27 +; CHECK32_32-NEXT: subfic 6, 4, 32 +; CHECK32_32-NEXT: bc 12, 2, .LBB3_2 +; CHECK32_32-NEXT: # %bb.1: +; CHECK32_32-NEXT: ori 7, 3, 0 +; CHECK32_32-NEXT: ori 8, 28, 0 +; CHECK32_32-NEXT: ori 3, 5, 0 +; CHECK32_32-NEXT: b .LBB3_3 +; CHECK32_32-NEXT: .LBB3_2: +; CHECK32_32-NEXT: addi 7, 28, 0 +; CHECK32_32-NEXT: addi 8, 27, 0 +; CHECK32_32-NEXT: .LBB3_3: ; CHECK32_32-NEXT: lwz 30, 24(1) # 4-byte Folded Reload +; CHECK32_32-NEXT: srw 5, 7, 6 +; CHECK32_32-NEXT: slw 8, 8, 4 +; CHECK32_32-NEXT: srw 6, 3, 6 +; CHECK32_32-NEXT: slw 4, 7, 4 +; CHECK32_32-NEXT: or 3, 8, 5 +; CHECK32_32-NEXT: or 4, 4, 6 ; CHECK32_32-NEXT: lwz 29, 20(1) # 4-byte Folded Reload ; CHECK32_32-NEXT: lwz 28, 16(1) # 4-byte Folded Reload ; CHECK32_32-NEXT: lwz 27, 12(1) # 4-byte Folded Reload @@ -582,35 +310,42 @@ define i37 @fshl_i37(i37 %x, i37 %y, i37 %z) { ; CHECK32_64-NEXT: mr 30, 6 ; CHECK32_64-NEXT: li 6, 37 ; CHECK32_64-NEXT: bl __umoddi3 -; CHECK32_64-NEXT: clrlwi 6, 4, 26 -; CHECK32_64-NEXT: not 4, 4 -; CHECK32_64-NEXT: subfic 8, 6, 32 -; CHECK32_64-NEXT: srwi 3, 30, 6 -; CHECK32_64-NEXT: slw 7, 27, 6 -; CHECK32_64-NEXT: clrlwi 4, 4, 26 +; CHECK32_64-NEXT: rotlwi 3, 30, 27 +; CHECK32_64-NEXT: andi. 5, 4, 32 +; CHECK32_64-NEXT: bc 12, 2, .LBB3_2 +; CHECK32_64-NEXT: # %bb.1: +; CHECK32_64-NEXT: ori 8, 28, 0 +; CHECK32_64-NEXT: b .LBB3_3 +; CHECK32_64-NEXT: .LBB3_2: +; CHECK32_64-NEXT: addi 8, 27, 0 +; CHECK32_64-NEXT: .LBB3_3: ; CHECK32_64-NEXT: lwz 27, 12(1) # 4-byte Folded Reload -; CHECK32_64-NEXT: srw 8, 28, 8 -; CHECK32_64-NEXT: rlwimi 3, 29, 26, 1, 5 -; CHECK32_64-NEXT: lwz 29, 20(1) # 4-byte Folded Reload -; CHECK32_64-NEXT: slwi 5, 30, 26 -; CHECK32_64-NEXT: or 7, 7, 8 -; CHECK32_64-NEXT: subfic 8, 4, 32 +; CHECK32_64-NEXT: rlwimi 3, 29, 27, 0, 4 +; CHECK32_64-NEXT: clrlwi 4, 4, 27 +; CHECK32_64-NEXT: bc 12, 2, .LBB3_5 +; CHECK32_64-NEXT: # %bb.4: +; CHECK32_64-NEXT: ori 7, 3, 0 +; CHECK32_64-NEXT: b .LBB3_6 +; CHECK32_64-NEXT: .LBB3_5: +; CHECK32_64-NEXT: addi 7, 28, 0 +; CHECK32_64-NEXT: .LBB3_6: +; CHECK32_64-NEXT: slwi 5, 30, 27 ; CHECK32_64-NEXT: lwz 30, 24(1) # 4-byte Folded Reload -; CHECK32_64-NEXT: addi 9, 6, -32 -; CHECK32_64-NEXT: srw 10, 3, 4 -; CHECK32_64-NEXT: srw 5, 5, 4 -; CHECK32_64-NEXT: addi 4, 4, -32 -; CHECK32_64-NEXT: slw 8, 3, 8 -; CHECK32_64-NEXT: slw 9, 28, 9 -; CHECK32_64-NEXT: srw 3, 3, 4 -; CHECK32_64-NEXT: or 4, 5, 8 -; CHECK32_64-NEXT: slw 6, 28, 6 -; CHECK32_64-NEXT: or 5, 7, 9 +; CHECK32_64-NEXT: bc 12, 2, .LBB3_8 +; CHECK32_64-NEXT: # %bb.7: +; CHECK32_64-NEXT: ori 3, 5, 0 +; CHECK32_64-NEXT: b .LBB3_8 +; CHECK32_64-NEXT: .LBB3_8: +; CHECK32_64-NEXT: subfic 6, 4, 32 +; CHECK32_64-NEXT: slw 8, 8, 4 +; CHECK32_64-NEXT: lwz 29, 20(1) # 4-byte Folded Reload +; CHECK32_64-NEXT: srw 9, 7, 6 +; CHECK32_64-NEXT: srw 5, 3, 6 +; CHECK32_64-NEXT: slw 4, 7, 4 +; CHECK32_64-NEXT: or 3, 8, 9 ; CHECK32_64-NEXT: lwz 28, 16(1) # 4-byte Folded Reload -; CHECK32_64-NEXT: or 4, 4, 3 -; CHECK32_64-NEXT: or 3, 5, 10 +; CHECK32_64-NEXT: or 4, 4, 5 ; CHECK32_64-NEXT: lwz 0, 36(1) -; CHECK32_64-NEXT: or 4, 6, 4 ; CHECK32_64-NEXT: addi 1, 1, 32 ; CHECK32_64-NEXT: mtlr 0 ; CHECK32_64-NEXT: blr @@ -737,58 +472,47 @@ define i32 @fshr_i32(i32 %x, i32 %y, i32 %z) { define i64 @fshr_i64(i64 %x, i64 %y, i64 %z) { ; CHECK32_32-LABEL: fshr_i64: ; CHECK32_32: # %bb.0: -; CHECK32_32-NEXT: clrlwi 7, 8, 26 -; CHECK32_32-NEXT: slwi 9, 4, 1 -; CHECK32_32-NEXT: not 8, 8 -; CHECK32_32-NEXT: rotlwi 4, 4, 1 -; CHECK32_32-NEXT: subfic 10, 7, 32 -; CHECK32_32-NEXT: srw 6, 6, 7 -; CHECK32_32-NEXT: clrlwi 8, 8, 26 -; CHECK32_32-NEXT: rlwimi 4, 3, 1, 0, 30 -; CHECK32_32-NEXT: slw 3, 5, 10 -; CHECK32_32-NEXT: slw 10, 9, 8 -; CHECK32_32-NEXT: slw 4, 4, 8 -; CHECK32_32-NEXT: or 3, 6, 3 -; CHECK32_32-NEXT: subfic 6, 8, 32 -; CHECK32_32-NEXT: addi 8, 8, -32 -; CHECK32_32-NEXT: srw 6, 9, 6 -; CHECK32_32-NEXT: slw 8, 9, 8 -; CHECK32_32-NEXT: addi 9, 7, -32 -; CHECK32_32-NEXT: srw 9, 5, 9 -; CHECK32_32-NEXT: or 3, 3, 9 -; CHECK32_32-NEXT: or 6, 4, 6 -; CHECK32_32-NEXT: or 4, 10, 3 -; CHECK32_32-NEXT: or 3, 6, 8 -; CHECK32_32-NEXT: srw 5, 5, 7 +; CHECK32_32-NEXT: andi. 7, 8, 32 +; CHECK32_32-NEXT: clrlwi 7, 8, 27 +; CHECK32_32-NEXT: subfic 8, 7, 32 +; CHECK32_32-NEXT: bc 12, 2, .LBB10_2 +; CHECK32_32-NEXT: # %bb.1: +; CHECK32_32-NEXT: ori 9, 4, 0 +; CHECK32_32-NEXT: ori 4, 5, 0 +; CHECK32_32-NEXT: b .LBB10_3 +; CHECK32_32-NEXT: .LBB10_2: +; CHECK32_32-NEXT: addi 9, 5, 0 +; CHECK32_32-NEXT: addi 3, 4, 0 +; CHECK32_32-NEXT: addi 4, 6, 0 +; CHECK32_32-NEXT: .LBB10_3: +; CHECK32_32-NEXT: srw 5, 9, 7 +; CHECK32_32-NEXT: slw 3, 3, 8 +; CHECK32_32-NEXT: srw 4, 4, 7 +; CHECK32_32-NEXT: slw 6, 9, 8 ; CHECK32_32-NEXT: or 3, 3, 5 +; CHECK32_32-NEXT: or 4, 6, 4 ; CHECK32_32-NEXT: blr ; ; CHECK32_64-LABEL: fshr_i64: ; CHECK32_64: # %bb.0: -; CHECK32_64-NEXT: rotlwi 7, 4, 1 -; CHECK32_64-NEXT: slwi 4, 4, 1 -; CHECK32_64-NEXT: rlwimi 7, 3, 1, 0, 30 -; CHECK32_64-NEXT: clrlwi 3, 8, 26 -; CHECK32_64-NEXT: not 8, 8 -; CHECK32_64-NEXT: subfic 9, 3, 32 -; CHECK32_64-NEXT: srw 6, 6, 3 -; CHECK32_64-NEXT: clrlwi 8, 8, 26 -; CHECK32_64-NEXT: slw 9, 5, 9 -; CHECK32_64-NEXT: addi 10, 3, -32 -; CHECK32_64-NEXT: or 6, 6, 9 -; CHECK32_64-NEXT: subfic 9, 8, 32 -; CHECK32_64-NEXT: srw 3, 5, 3 -; CHECK32_64-NEXT: srw 5, 5, 10 -; CHECK32_64-NEXT: slw 10, 4, 8 -; CHECK32_64-NEXT: slw 7, 7, 8 -; CHECK32_64-NEXT: addi 8, 8, -32 -; CHECK32_64-NEXT: srw 9, 4, 9 -; CHECK32_64-NEXT: slw 4, 4, 8 -; CHECK32_64-NEXT: or 7, 7, 9 -; CHECK32_64-NEXT: or 5, 6, 5 -; CHECK32_64-NEXT: or 6, 7, 4 -; CHECK32_64-NEXT: or 4, 10, 5 -; CHECK32_64-NEXT: or 3, 6, 3 +; CHECK32_64-NEXT: andi. 7, 8, 32 +; CHECK32_64-NEXT: clrlwi 7, 8, 27 +; CHECK32_64-NEXT: bc 12, 2, .LBB10_2 +; CHECK32_64-NEXT: # %bb.1: +; CHECK32_64-NEXT: ori 9, 4, 0 +; CHECK32_64-NEXT: b .LBB10_3 +; CHECK32_64-NEXT: .LBB10_2: +; CHECK32_64-NEXT: addi 9, 5, 0 +; CHECK32_64-NEXT: addi 3, 4, 0 +; CHECK32_64-NEXT: addi 5, 6, 0 +; CHECK32_64-NEXT: .LBB10_3: +; CHECK32_64-NEXT: subfic 8, 7, 32 +; CHECK32_64-NEXT: srw 4, 9, 7 +; CHECK32_64-NEXT: slw 3, 3, 8 +; CHECK32_64-NEXT: srw 5, 5, 7 +; CHECK32_64-NEXT: slw 6, 9, 8 +; CHECK32_64-NEXT: or 3, 3, 4 +; CHECK32_64-NEXT: or 4, 6, 5 ; CHECK32_64-NEXT: blr ; ; CHECK64-LABEL: fshr_i64: @@ -830,35 +554,30 @@ define i37 @fshr_i37(i37 %x, i37 %y, i37 %z) { ; CHECK32_32-NEXT: li 5, 0 ; CHECK32_32-NEXT: li 6, 37 ; CHECK32_32-NEXT: bl __umoddi3 +; CHECK32_32-NEXT: rotlwi 3, 30, 27 ; CHECK32_32-NEXT: addi 4, 4, 27 -; CHECK32_32-NEXT: rotlwi 5, 30, 27 -; CHECK32_32-NEXT: clrlwi 8, 4, 26 -; CHECK32_32-NEXT: slwi 3, 30, 27 -; CHECK32_32-NEXT: rotlwi 7, 28, 1 -; CHECK32_32-NEXT: rlwimi 5, 29, 27, 0, 4 -; CHECK32_32-NEXT: not 4, 4 -; CHECK32_32-NEXT: subfic 9, 8, 32 -; CHECK32_32-NEXT: slwi 6, 28, 1 -; CHECK32_32-NEXT: rlwimi 7, 27, 1, 0, 30 -; CHECK32_32-NEXT: srw 3, 3, 8 -; CHECK32_32-NEXT: clrlwi 4, 4, 26 -; CHECK32_32-NEXT: slw 9, 5, 9 -; CHECK32_32-NEXT: slw 10, 6, 4 -; CHECK32_32-NEXT: slw 7, 7, 4 -; CHECK32_32-NEXT: or 3, 3, 9 -; CHECK32_32-NEXT: subfic 9, 4, 32 -; CHECK32_32-NEXT: addi 4, 4, -32 -; CHECK32_32-NEXT: srw 9, 6, 9 -; CHECK32_32-NEXT: slw 6, 6, 4 -; CHECK32_32-NEXT: addi 4, 8, -32 -; CHECK32_32-NEXT: srw 4, 5, 4 -; CHECK32_32-NEXT: or 3, 3, 4 -; CHECK32_32-NEXT: or 7, 7, 9 -; CHECK32_32-NEXT: or 4, 10, 3 -; CHECK32_32-NEXT: or 3, 7, 6 -; CHECK32_32-NEXT: srw 5, 5, 8 -; CHECK32_32-NEXT: or 3, 3, 5 +; CHECK32_32-NEXT: slwi 5, 30, 27 +; CHECK32_32-NEXT: rlwimi 3, 29, 27, 0, 4 +; CHECK32_32-NEXT: andi. 6, 4, 32 +; CHECK32_32-NEXT: clrlwi 4, 4, 27 +; CHECK32_32-NEXT: subfic 6, 4, 32 +; CHECK32_32-NEXT: bc 12, 2, .LBB11_2 +; CHECK32_32-NEXT: # %bb.1: +; CHECK32_32-NEXT: ori 7, 28, 0 +; CHECK32_32-NEXT: ori 8, 27, 0 +; CHECK32_32-NEXT: b .LBB11_3 +; CHECK32_32-NEXT: .LBB11_2: +; CHECK32_32-NEXT: addi 7, 3, 0 +; CHECK32_32-NEXT: addi 8, 28, 0 +; CHECK32_32-NEXT: addi 3, 5, 0 +; CHECK32_32-NEXT: .LBB11_3: ; CHECK32_32-NEXT: lwz 30, 24(1) # 4-byte Folded Reload +; CHECK32_32-NEXT: srw 5, 7, 4 +; CHECK32_32-NEXT: slw 8, 8, 6 +; CHECK32_32-NEXT: srw 4, 3, 4 +; CHECK32_32-NEXT: slw 6, 7, 6 +; CHECK32_32-NEXT: or 3, 8, 5 +; CHECK32_32-NEXT: or 4, 6, 4 ; CHECK32_32-NEXT: lwz 29, 20(1) # 4-byte Folded Reload ; CHECK32_32-NEXT: lwz 28, 16(1) # 4-byte Folded Reload ; CHECK32_32-NEXT: lwz 27, 12(1) # 4-byte Folded Reload @@ -893,37 +612,36 @@ define i37 @fshr_i37(i37 %x, i37 %y, i37 %z) { ; CHECK32_64-NEXT: bl __umoddi3 ; CHECK32_64-NEXT: addi 4, 4, 27 ; CHECK32_64-NEXT: rotlwi 3, 30, 27 -; CHECK32_64-NEXT: clrlwi 8, 4, 26 +; CHECK32_64-NEXT: andi. 5, 4, 32 ; CHECK32_64-NEXT: rlwimi 3, 29, 27, 0, 4 ; CHECK32_64-NEXT: lwz 29, 20(1) # 4-byte Folded Reload -; CHECK32_64-NEXT: slwi 6, 30, 27 -; CHECK32_64-NEXT: lwz 30, 24(1) # 4-byte Folded Reload -; CHECK32_64-NEXT: not 4, 4 -; CHECK32_64-NEXT: subfic 9, 8, 32 -; CHECK32_64-NEXT: rotlwi 5, 28, 1 -; CHECK32_64-NEXT: srw 6, 6, 8 -; CHECK32_64-NEXT: clrlwi 4, 4, 26 -; CHECK32_64-NEXT: slw 9, 3, 9 -; CHECK32_64-NEXT: rlwimi 5, 27, 1, 0, 30 -; CHECK32_64-NEXT: slwi 7, 28, 1 +; CHECK32_64-NEXT: bc 12, 2, .LBB11_2 +; CHECK32_64-NEXT: # %bb.1: +; CHECK32_64-NEXT: ori 7, 28, 0 +; CHECK32_64-NEXT: ori 8, 27, 0 +; CHECK32_64-NEXT: b .LBB11_3 +; CHECK32_64-NEXT: .LBB11_2: +; CHECK32_64-NEXT: addi 7, 3, 0 +; CHECK32_64-NEXT: addi 8, 28, 0 +; CHECK32_64-NEXT: .LBB11_3: +; CHECK32_64-NEXT: clrlwi 4, 4, 27 ; CHECK32_64-NEXT: lwz 28, 16(1) # 4-byte Folded Reload -; CHECK32_64-NEXT: addi 10, 8, -32 +; CHECK32_64-NEXT: slwi 5, 30, 27 +; CHECK32_64-NEXT: subfic 6, 4, 32 +; CHECK32_64-NEXT: bc 12, 2, .LBB11_4 +; CHECK32_64-NEXT: b .LBB11_5 +; CHECK32_64-NEXT: .LBB11_4: +; CHECK32_64-NEXT: addi 3, 5, 0 +; CHECK32_64-NEXT: .LBB11_5: +; CHECK32_64-NEXT: srw 9, 7, 4 +; CHECK32_64-NEXT: slw 8, 8, 6 +; CHECK32_64-NEXT: lwz 30, 24(1) # 4-byte Folded Reload +; CHECK32_64-NEXT: srw 4, 3, 4 +; CHECK32_64-NEXT: slw 5, 7, 6 ; CHECK32_64-NEXT: lwz 27, 12(1) # 4-byte Folded Reload -; CHECK32_64-NEXT: or 6, 6, 9 -; CHECK32_64-NEXT: subfic 9, 4, 32 -; CHECK32_64-NEXT: srw 8, 3, 8 -; CHECK32_64-NEXT: srw 3, 3, 10 +; CHECK32_64-NEXT: or 3, 8, 9 +; CHECK32_64-NEXT: or 4, 5, 4 ; CHECK32_64-NEXT: lwz 0, 36(1) -; CHECK32_64-NEXT: slw 10, 7, 4 -; CHECK32_64-NEXT: slw 5, 5, 4 -; CHECK32_64-NEXT: addi 4, 4, -32 -; CHECK32_64-NEXT: srw 9, 7, 9 -; CHECK32_64-NEXT: slw 4, 7, 4 -; CHECK32_64-NEXT: or 5, 5, 9 -; CHECK32_64-NEXT: or 3, 6, 3 -; CHECK32_64-NEXT: or 5, 5, 4 -; CHECK32_64-NEXT: or 4, 10, 3 -; CHECK32_64-NEXT: or 3, 5, 8 ; CHECK32_64-NEXT: addi 1, 1, 32 ; CHECK32_64-NEXT: mtlr 0 ; CHECK32_64-NEXT: blr diff --git a/llvm/test/CodeGen/PowerPC/ifunc.ll b/llvm/test/CodeGen/PowerPC/ifunc.ll index a58601f8f32f6..23afc886a991f 100644 --- a/llvm/test/CodeGen/PowerPC/ifunc.ll +++ b/llvm/test/CodeGen/PowerPC/ifunc.ll @@ -7,10 +7,10 @@ ; RUN: llc %s -o - -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr10 \ ; RUN: -verify-machineinstrs | FileCheck --check-prefix=LEP10 %s -@ifunc1 = dso_local ifunc void(), i8*()* @resolver -@ifunc2 = ifunc void(), i8*()* @resolver +@ifunc1 = dso_local ifunc void(), void()* ()* @resolver +@ifunc2 = ifunc void(), void()* ()* @resolver -define i8* @resolver() { ret i8* null } +define void()* @resolver() { ret void()* null } define void @foo() #0 { ; REL-LABEL: foo diff --git a/llvm/test/CodeGen/PowerPC/instruction-mix-remarks-BCTRL_LWZinto_toc.ll b/llvm/test/CodeGen/PowerPC/instruction-mix-remarks-BCTRL_LWZinto_toc.ll new file mode 100644 index 0000000000000..e5fe7aabf82fa --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/instruction-mix-remarks-BCTRL_LWZinto_toc.ll @@ -0,0 +1,16 @@ + +; RUN: llc -verify-machineinstrs -mattr=-altivec -mtriple powerpc64-ibm-aix-xcoff \ +; RUN: -pass-remarks-output=%t -pass-remarks=asm-printer -mcpu=pwr4 -o - %s +; RUN: FileCheck --input-file=%t %s + +; CHECK: - String: "\n" +; CHECK: - String: "bctrl\n\tld 2, " +; CHECK: - String: ': ' +; CHECK: - INST_bctrl: '1' +; CHECK: - String: "\n" + + +define void @callThroughPtrWithArgs(void (i32, i16, i64)* nocapture) { + tail call void %0(i32 signext 1, i16 zeroext 2, i64 3) + ret void +} diff --git a/llvm/test/CodeGen/PowerPC/load-and-splat.ll b/llvm/test/CodeGen/PowerPC/load-and-splat.ll index dca65cdb2612f..8196efa43e15b 100644 --- a/llvm/test/CodeGen/PowerPC/load-and-splat.ll +++ b/llvm/test/CodeGen/PowerPC/load-and-splat.ll @@ -8,6 +8,15 @@ ; RUN: llc -mcpu=pwr7 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \ ; RUN: -mtriple=powerpc64-unknown-unknown < %s | FileCheck %s \ ; RUN: -check-prefix=P7 +; RUN: llc -mcpu=pwr9 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \ +; RUN: -mtriple=powerpc-ibm-aix-xcoff < %s | FileCheck %s \ +; RUN: -check-prefix=P9-AIX32 +; RUN: llc -mcpu=pwr8 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \ +; RUN: -mtriple=powerpc-ibm-aix-xcoff < %s | FileCheck %s \ +; RUN: -check-prefix=P8-AIX32 +; RUN: llc -mcpu=pwr7 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \ +; RUN: -mtriple=powerpc-ibm-aix-xcoff < %s | FileCheck %s \ +; RUN: -check-prefix=P7-AIX32 ; v2f64 define dso_local void @test(<2 x double>* nocapture %c, double* nocapture readonly %a) local_unnamed_addr { @@ -31,6 +40,27 @@ define dso_local void @test(<2 x double>* nocapture %c, double* nocapture readon ; P7-NEXT: lxvdsx vs0, 0, r4 ; P7-NEXT: stxvd2x vs0, 0, r3 ; P7-NEXT: blr +; +; P9-AIX32-LABEL: test: +; P9-AIX32: # %bb.0: # %entry +; P9-AIX32-NEXT: addi r4, r4, 24 +; P9-AIX32-NEXT: lxvdsx vs0, 0, r4 +; P9-AIX32-NEXT: stxv vs0, 0(r3) +; P9-AIX32-NEXT: blr +; +; P8-AIX32-LABEL: test: +; P8-AIX32: # %bb.0: # %entry +; P8-AIX32-NEXT: addi r4, r4, 24 +; P8-AIX32-NEXT: lxvdsx vs0, 0, r4 +; P8-AIX32-NEXT: stxvd2x vs0, 0, r3 +; P8-AIX32-NEXT: blr +; +; P7-AIX32-LABEL: test: +; P7-AIX32: # %bb.0: # %entry +; P7-AIX32-NEXT: addi r4, r4, 24 +; P7-AIX32-NEXT: lxvdsx vs0, 0, r4 +; P7-AIX32-NEXT: stxvd2x vs0, 0, r3 +; P7-AIX32-NEXT: blr entry: %arrayidx = getelementptr inbounds double, double* %a, i64 3 %0 = load double, double* %arrayidx, align 8 @@ -59,13 +89,34 @@ define dso_local void @test2(<4 x float>* nocapture %c, float* nocapture readonl ; ; P7-LABEL: test2: ; P7: # %bb.0: # %entry -; P7-NEXT: lwz r4, 12(r4) -; P7-NEXT: addi r5, r1, -16 -; P7-NEXT: stw r4, -16(r1) -; P7-NEXT: lxvw4x vs0, 0, r5 -; P7-NEXT: xxspltw vs0, vs0, 0 +; P7-NEXT: addi r4, r4, 12 +; P7-NEXT: lfiwzx f0, 0, r4 +; P7-NEXT: xxspltw vs0, vs0, 1 ; P7-NEXT: stxvw4x vs0, 0, r3 ; P7-NEXT: blr +; +; P9-AIX32-LABEL: test2: +; P9-AIX32: # %bb.0: # %entry +; P9-AIX32-NEXT: addi r4, r4, 12 +; P9-AIX32-NEXT: lxvwsx vs0, 0, r4 +; P9-AIX32-NEXT: stxv vs0, 0(r3) +; P9-AIX32-NEXT: blr +; +; P8-AIX32-LABEL: test2: +; P8-AIX32: # %bb.0: # %entry +; P8-AIX32-NEXT: addi r4, r4, 12 +; P8-AIX32-NEXT: lfiwzx f0, 0, r4 +; P8-AIX32-NEXT: xxspltw vs0, vs0, 1 +; P8-AIX32-NEXT: stxvw4x vs0, 0, r3 +; P8-AIX32-NEXT: blr +; +; P7-AIX32-LABEL: test2: +; P7-AIX32: # %bb.0: # %entry +; P7-AIX32-NEXT: addi r4, r4, 12 +; P7-AIX32-NEXT: lfiwzx f0, 0, r4 +; P7-AIX32-NEXT: xxspltw vs0, vs0, 1 +; P7-AIX32-NEXT: stxvw4x vs0, 0, r3 +; P7-AIX32-NEXT: blr entry: %arrayidx = getelementptr inbounds float, float* %a, i64 3 %0 = load float, float* %arrayidx, align 4 @@ -94,13 +145,34 @@ define dso_local void @test3(<4 x i32>* nocapture %c, i32* nocapture readonly %a ; ; P7-LABEL: test3: ; P7: # %bb.0: # %entry -; P7-NEXT: lwz r4, 12(r4) -; P7-NEXT: addi r5, r1, -16 -; P7-NEXT: stw r4, -16(r1) -; P7-NEXT: lxvw4x vs0, 0, r5 -; P7-NEXT: xxspltw vs0, vs0, 0 +; P7-NEXT: addi r4, r4, 12 +; P7-NEXT: lfiwzx f0, 0, r4 +; P7-NEXT: xxspltw vs0, vs0, 1 ; P7-NEXT: stxvw4x vs0, 0, r3 ; P7-NEXT: blr +; +; P9-AIX32-LABEL: test3: +; P9-AIX32: # %bb.0: # %entry +; P9-AIX32-NEXT: addi r4, r4, 12 +; P9-AIX32-NEXT: lxvwsx vs0, 0, r4 +; P9-AIX32-NEXT: stxv vs0, 0(r3) +; P9-AIX32-NEXT: blr +; +; P8-AIX32-LABEL: test3: +; P8-AIX32: # %bb.0: # %entry +; P8-AIX32-NEXT: addi r4, r4, 12 +; P8-AIX32-NEXT: lfiwzx f0, 0, r4 +; P8-AIX32-NEXT: xxspltw vs0, vs0, 1 +; P8-AIX32-NEXT: stxvw4x vs0, 0, r3 +; P8-AIX32-NEXT: blr +; +; P7-AIX32-LABEL: test3: +; P7-AIX32: # %bb.0: # %entry +; P7-AIX32-NEXT: addi r4, r4, 12 +; P7-AIX32-NEXT: lfiwzx f0, 0, r4 +; P7-AIX32-NEXT: xxspltw vs0, vs0, 1 +; P7-AIX32-NEXT: stxvw4x vs0, 0, r3 +; P7-AIX32-NEXT: blr entry: %arrayidx = getelementptr inbounds i32, i32* %a, i64 3 %0 = load i32, i32* %arrayidx, align 4 @@ -110,6 +182,7 @@ entry: ret void } + ; v2i64 define dso_local void @test4(<2 x i64>* nocapture %c, i64* nocapture readonly %a) local_unnamed_addr { ; P9-LABEL: test4: @@ -132,6 +205,52 @@ define dso_local void @test4(<2 x i64>* nocapture %c, i64* nocapture readonly %a ; P7-NEXT: lxvdsx vs0, 0, r4 ; P7-NEXT: stxvd2x vs0, 0, r3 ; P7-NEXT: blr +; +; P9-AIX32-LABEL: test4: +; P9-AIX32: # %bb.0: # %entry +; P9-AIX32-NEXT: lwz r5, 24(r4) +; P9-AIX32-NEXT: lwz r4, 28(r4) +; P9-AIX32-NEXT: stw r4, -16(r1) +; P9-AIX32-NEXT: stw r5, -32(r1) +; P9-AIX32-NEXT: lxv v2, -16(r1) +; P9-AIX32-NEXT: lxv v3, -32(r1) +; P9-AIX32-NEXT: vmrghw v2, v3, v2 +; P9-AIX32-NEXT: xxswapd vs0, v2 +; P9-AIX32-NEXT: xxsldwi vs0, vs0, v2, 2 +; P9-AIX32-NEXT: stxv vs0, 0(r3) +; P9-AIX32-NEXT: blr +; +; P8-AIX32-LABEL: test4: +; P8-AIX32: # %bb.0: # %entry +; P8-AIX32-NEXT: lwz r5, 24(r4) +; P8-AIX32-NEXT: lwz r4, 28(r4) +; P8-AIX32-NEXT: stw r4, -16(r1) +; P8-AIX32-NEXT: stw r5, -32(r1) +; P8-AIX32-NEXT: addi r4, r1, -16 +; P8-AIX32-NEXT: addi r5, r1, -32 +; P8-AIX32-NEXT: lxvw4x v2, 0, r4 +; P8-AIX32-NEXT: lxvw4x v3, 0, r5 +; P8-AIX32-NEXT: vmrghw v2, v3, v2 +; P8-AIX32-NEXT: xxswapd vs0, v2 +; P8-AIX32-NEXT: xxsldwi vs0, vs0, v2, 2 +; P8-AIX32-NEXT: stxvw4x vs0, 0, r3 +; P8-AIX32-NEXT: blr +; +; P7-AIX32-LABEL: test4: +; P7-AIX32: # %bb.0: # %entry +; P7-AIX32-NEXT: lwz r6, 28(r4) +; P7-AIX32-NEXT: lwz r4, 24(r4) +; P7-AIX32-NEXT: addi r5, r1, -16 +; P7-AIX32-NEXT: stw r6, -16(r1) +; P7-AIX32-NEXT: stw r4, -32(r1) +; P7-AIX32-NEXT: addi r4, r1, -32 +; P7-AIX32-NEXT: lxvw4x v2, 0, r5 +; P7-AIX32-NEXT: lxvw4x v3, 0, r4 +; P7-AIX32-NEXT: vmrghw v2, v3, v2 +; P7-AIX32-NEXT: xxswapd vs0, v2 +; P7-AIX32-NEXT: xxsldwi vs0, vs0, v2, 2 +; P7-AIX32-NEXT: stxvw4x vs0, 0, r3 +; P7-AIX32-NEXT: blr entry: %arrayidx = getelementptr inbounds i64, i64* %a, i64 3 %0 = load i64, i64* %arrayidx, align 8 @@ -146,26 +265,69 @@ define void @test5(<2 x i64>* %a, i32* %in) { ; P9-LABEL: test5: ; P9: # %bb.0: # %entry ; P9-NEXT: lfiwax f0, 0, r4 -; P9-NEXT: xxspltd vs0, vs0, 0 +; P9-NEXT: xxspltd vs0, f0, 0 ; P9-NEXT: stxv vs0, 0(r3) ; P9-NEXT: blr ; ; P8-LABEL: test5: ; P8: # %bb.0: # %entry ; P8-NEXT: lfiwax f0, 0, r4 -; P8-NEXT: xxspltd vs0, vs0, 0 +; P8-NEXT: xxspltd vs0, f0, 0 ; P8-NEXT: stxvd2x vs0, 0, r3 ; P8-NEXT: blr ; ; P7-LABEL: test5: ; P7: # %bb.0: # %entry -; P7-NEXT: lwa r4, 0(r4) -; P7-NEXT: addi r5, r1, -16 -; P7-NEXT: std r4, -8(r1) -; P7-NEXT: std r4, -16(r1) -; P7-NEXT: lxvd2x vs0, 0, r5 +; P7-NEXT: lfiwax f0, 0, r4 +; P7-NEXT: xxspltd vs0, f0, 0 ; P7-NEXT: stxvd2x vs0, 0, r3 ; P7-NEXT: blr +; +; P9-AIX32-LABEL: test5: +; P9-AIX32: # %bb.0: # %entry +; P9-AIX32-NEXT: lwz r4, 0(r4) +; P9-AIX32-NEXT: srawi r5, r4, 31 +; P9-AIX32-NEXT: stw r4, -16(r1) +; P9-AIX32-NEXT: lxv v2, -16(r1) +; P9-AIX32-NEXT: stw r5, -32(r1) +; P9-AIX32-NEXT: lxv v3, -32(r1) +; P9-AIX32-NEXT: vmrghw v2, v3, v2 +; P9-AIX32-NEXT: xxswapd vs0, v2 +; P9-AIX32-NEXT: xxsldwi vs0, vs0, v2, 2 +; P9-AIX32-NEXT: stxv vs0, 0(r3) +; P9-AIX32-NEXT: blr +; +; P8-AIX32-LABEL: test5: +; P8-AIX32: # %bb.0: # %entry +; P8-AIX32-NEXT: lwz r4, 0(r4) +; P8-AIX32-NEXT: srawi r5, r4, 31 +; P8-AIX32-NEXT: stw r4, -16(r1) +; P8-AIX32-NEXT: addi r4, r1, -16 +; P8-AIX32-NEXT: stw r5, -32(r1) +; P8-AIX32-NEXT: addi r5, r1, -32 +; P8-AIX32-NEXT: lxvw4x v2, 0, r4 +; P8-AIX32-NEXT: lxvw4x v3, 0, r5 +; P8-AIX32-NEXT: vmrghw v2, v3, v2 +; P8-AIX32-NEXT: xxswapd vs0, v2 +; P8-AIX32-NEXT: xxsldwi vs0, vs0, v2, 2 +; P8-AIX32-NEXT: stxvw4x vs0, 0, r3 +; P8-AIX32-NEXT: blr +; +; P7-AIX32-LABEL: test5: +; P7-AIX32: # %bb.0: # %entry +; P7-AIX32-NEXT: lwz r4, 0(r4) +; P7-AIX32-NEXT: addi r5, r1, -16 +; P7-AIX32-NEXT: stw r4, -16(r1) +; P7-AIX32-NEXT: srawi r4, r4, 31 +; P7-AIX32-NEXT: stw r4, -32(r1) +; P7-AIX32-NEXT: addi r4, r1, -32 +; P7-AIX32-NEXT: lxvw4x v2, 0, r5 +; P7-AIX32-NEXT: lxvw4x v3, 0, r4 +; P7-AIX32-NEXT: vmrghw v2, v3, v2 +; P7-AIX32-NEXT: xxswapd vs0, v2 +; P7-AIX32-NEXT: xxsldwi vs0, vs0, v2, 2 +; P7-AIX32-NEXT: stxvw4x vs0, 0, r3 +; P7-AIX32-NEXT: blr entry: %0 = load i32, i32* %in, align 4 %conv = sext i32 %0 to i64 @@ -180,26 +342,69 @@ define void @test6(<2 x i64>* %a, i32* %in) { ; P9-LABEL: test6: ; P9: # %bb.0: # %entry ; P9-NEXT: lfiwzx f0, 0, r4 -; P9-NEXT: xxspltd vs0, vs0, 0 +; P9-NEXT: xxspltd vs0, f0, 0 ; P9-NEXT: stxv vs0, 0(r3) ; P9-NEXT: blr ; ; P8-LABEL: test6: ; P8: # %bb.0: # %entry ; P8-NEXT: lfiwzx f0, 0, r4 -; P8-NEXT: xxspltd vs0, vs0, 0 +; P8-NEXT: xxspltd vs0, f0, 0 ; P8-NEXT: stxvd2x vs0, 0, r3 ; P8-NEXT: blr ; ; P7-LABEL: test6: ; P7: # %bb.0: # %entry -; P7-NEXT: lwz r4, 0(r4) -; P7-NEXT: addi r5, r1, -16 -; P7-NEXT: std r4, -8(r1) -; P7-NEXT: std r4, -16(r1) -; P7-NEXT: lxvd2x vs0, 0, r5 +; P7-NEXT: lfiwzx f0, 0, r4 +; P7-NEXT: xxspltd vs0, f0, 0 ; P7-NEXT: stxvd2x vs0, 0, r3 ; P7-NEXT: blr +; +; P9-AIX32-LABEL: test6: +; P9-AIX32: # %bb.0: # %entry +; P9-AIX32-NEXT: lwz r4, 0(r4) +; P9-AIX32-NEXT: li r5, 0 +; P9-AIX32-NEXT: stw r5, -32(r1) +; P9-AIX32-NEXT: lxv v2, -32(r1) +; P9-AIX32-NEXT: stw r4, -16(r1) +; P9-AIX32-NEXT: lxv v3, -16(r1) +; P9-AIX32-NEXT: vmrghw v2, v2, v3 +; P9-AIX32-NEXT: xxswapd vs0, v2 +; P9-AIX32-NEXT: xxsldwi vs0, vs0, v2, 2 +; P9-AIX32-NEXT: stxv vs0, 0(r3) +; P9-AIX32-NEXT: blr +; +; P8-AIX32-LABEL: test6: +; P8-AIX32: # %bb.0: # %entry +; P8-AIX32-NEXT: lwz r4, 0(r4) +; P8-AIX32-NEXT: li r5, 0 +; P8-AIX32-NEXT: stw r5, -32(r1) +; P8-AIX32-NEXT: addi r5, r1, -16 +; P8-AIX32-NEXT: stw r4, -16(r1) +; P8-AIX32-NEXT: addi r4, r1, -32 +; P8-AIX32-NEXT: lxvw4x v2, 0, r4 +; P8-AIX32-NEXT: lxvw4x v3, 0, r5 +; P8-AIX32-NEXT: vmrghw v2, v2, v3 +; P8-AIX32-NEXT: xxswapd vs0, v2 +; P8-AIX32-NEXT: xxsldwi vs0, vs0, v2, 2 +; P8-AIX32-NEXT: stxvw4x vs0, 0, r3 +; P8-AIX32-NEXT: blr +; +; P7-AIX32-LABEL: test6: +; P7-AIX32: # %bb.0: # %entry +; P7-AIX32-NEXT: lwz r4, 0(r4) +; P7-AIX32-NEXT: li r5, 0 +; P7-AIX32-NEXT: stw r5, -32(r1) +; P7-AIX32-NEXT: addi r5, r1, -16 +; P7-AIX32-NEXT: stw r4, -16(r1) +; P7-AIX32-NEXT: addi r4, r1, -32 +; P7-AIX32-NEXT: lxvw4x v2, 0, r4 +; P7-AIX32-NEXT: lxvw4x v3, 0, r5 +; P7-AIX32-NEXT: vmrghw v2, v2, v3 +; P7-AIX32-NEXT: xxswapd vs0, v2 +; P7-AIX32-NEXT: xxsldwi vs0, vs0, v2, 2 +; P7-AIX32-NEXT: stxvw4x vs0, 0, r3 +; P7-AIX32-NEXT: blr entry: %0 = load i32, i32* %in, align 4 %conv = zext i32 %0 to i64 @@ -220,21 +425,48 @@ define void @test7(<8 x i16>* %a, i16* %in) { ; ; P8-LABEL: test7: ; P8: # %bb.0: # %entry -; P8-NEXT: lhz r4, 0(r4) -; P8-NEXT: mtvsrd v2, r4 +; P8-NEXT: lhzx r4, 0, r4 +; P8-NEXT: mtvsrwz v2, r4 ; P8-NEXT: vsplth v2, v2, 3 ; P8-NEXT: stvx v2, 0, r3 ; P8-NEXT: blr ; ; P7-LABEL: test7: ; P7: # %bb.0: # %entry -; P7-NEXT: lhz r4, 0(r4) -; P7-NEXT: addi r5, r1, -16 -; P7-NEXT: sth r4, -16(r1) -; P7-NEXT: lxvw4x v2, 0, r5 +; P7-NEXT: li r5, 1 +; P7-NEXT: lvx v2, 0, r4 +; P7-NEXT: lvsl v4, 0, r4 +; P7-NEXT: lvx v3, r5, r4 +; P7-NEXT: vperm v2, v2, v3, v4 ; P7-NEXT: vsplth v2, v2, 0 ; P7-NEXT: stxvw4x v2, 0, r3 ; P7-NEXT: blr +; +; P9-AIX32-LABEL: test7: +; P9-AIX32: # %bb.0: # %entry +; P9-AIX32-NEXT: lxsihzx v2, 0, r4 +; P9-AIX32-NEXT: vsplth v2, v2, 3 +; P9-AIX32-NEXT: stxv v2, 0(r3) +; P9-AIX32-NEXT: blr +; +; P8-AIX32-LABEL: test7: +; P8-AIX32: # %bb.0: # %entry +; P8-AIX32-NEXT: lhzx r4, 0, r4 +; P8-AIX32-NEXT: mtvsrwz v2, r4 +; P8-AIX32-NEXT: vsplth v2, v2, 3 +; P8-AIX32-NEXT: stxvw4x v2, 0, r3 +; P8-AIX32-NEXT: blr +; +; P7-AIX32-LABEL: test7: +; P7-AIX32: # %bb.0: # %entry +; P7-AIX32-NEXT: li r5, 1 +; P7-AIX32-NEXT: lvx v2, 0, r4 +; P7-AIX32-NEXT: lvsl v4, 0, r4 +; P7-AIX32-NEXT: lvx v3, r5, r4 +; P7-AIX32-NEXT: vperm v2, v2, v3, v4 +; P7-AIX32-NEXT: vsplth v2, v2, 0 +; P7-AIX32-NEXT: stxvw4x v2, 0, r3 +; P7-AIX32-NEXT: blr entry: %0 = load i16, i16* %in, align 2 %splat.splatinsert.i = insertelement <8 x i16> poison, i16 %0, i32 0 @@ -254,21 +486,44 @@ define void @test8(<16 x i8>* %a, i8* %in) { ; ; P8-LABEL: test8: ; P8: # %bb.0: # %entry -; P8-NEXT: lbz r4, 0(r4) -; P8-NEXT: mtvsrd v2, r4 +; P8-NEXT: lbzx r4, 0, r4 +; P8-NEXT: mtvsrwz v2, r4 ; P8-NEXT: vspltb v2, v2, 7 ; P8-NEXT: stvx v2, 0, r3 ; P8-NEXT: blr ; ; P7-LABEL: test8: ; P7: # %bb.0: # %entry -; P7-NEXT: lbz r4, 0(r4) -; P7-NEXT: addi r5, r1, -16 -; P7-NEXT: stb r4, -16(r1) -; P7-NEXT: lxvw4x v2, 0, r5 +; P7-NEXT: lvsl v2, 0, r4 +; P7-NEXT: lvx v3, 0, r4 +; P7-NEXT: vperm v2, v3, v3, v2 ; P7-NEXT: vspltb v2, v2, 0 ; P7-NEXT: stxvw4x v2, 0, r3 ; P7-NEXT: blr +; +; P9-AIX32-LABEL: test8: +; P9-AIX32: # %bb.0: # %entry +; P9-AIX32-NEXT: lxsibzx v2, 0, r4 +; P9-AIX32-NEXT: vspltb v2, v2, 7 +; P9-AIX32-NEXT: stxv v2, 0(r3) +; P9-AIX32-NEXT: blr +; +; P8-AIX32-LABEL: test8: +; P8-AIX32: # %bb.0: # %entry +; P8-AIX32-NEXT: lbzx r4, 0, r4 +; P8-AIX32-NEXT: mtvsrwz v2, r4 +; P8-AIX32-NEXT: vspltb v2, v2, 7 +; P8-AIX32-NEXT: stxvw4x v2, 0, r3 +; P8-AIX32-NEXT: blr +; +; P7-AIX32-LABEL: test8: +; P7-AIX32: # %bb.0: # %entry +; P7-AIX32-NEXT: lvsl v2, 0, r4 +; P7-AIX32-NEXT: lvx v3, 0, r4 +; P7-AIX32-NEXT: vperm v2, v3, v3, v2 +; P7-AIX32-NEXT: vspltb v2, v2, 0 +; P7-AIX32-NEXT: stxvw4x v2, 0, r3 +; P7-AIX32-NEXT: blr entry: %0 = load i8, i8* %in, align 1 %splat.splatinsert.i = insertelement <16 x i8> poison, i8 %0, i32 0 @@ -297,6 +552,27 @@ define <16 x i8> @unadjusted_lxvwsx(i32* %s, i32* %t) { ; P7-NEXT: lxvw4x vs0, 0, r4 ; P7-NEXT: xxspltw v2, vs0, 0 ; P7-NEXT: blr +; +; P9-AIX32-LABEL: unadjusted_lxvwsx: +; P9-AIX32: # %bb.0: # %entry +; P9-AIX32-NEXT: lxvwsx v2, 0, r3 +; P9-AIX32-NEXT: blr +; +; P8-AIX32-LABEL: unadjusted_lxvwsx: +; P8-AIX32: # %bb.0: # %entry +; P8-AIX32-NEXT: lwz r3, 0(r3) +; P8-AIX32-NEXT: mtfprwz f0, r3 +; P8-AIX32-NEXT: xxspltw v2, vs0, 1 +; P8-AIX32-NEXT: blr +; +; P7-AIX32-LABEL: unadjusted_lxvwsx: +; P7-AIX32: # %bb.0: # %entry +; P7-AIX32-NEXT: lwz r3, 0(r3) +; P7-AIX32-NEXT: addi r4, r1, -16 +; P7-AIX32-NEXT: stw r3, -16(r1) +; P7-AIX32-NEXT: lxvw4x vs0, 0, r4 +; P7-AIX32-NEXT: xxspltw v2, vs0, 0 +; P7-AIX32-NEXT: blr entry: %0 = bitcast i32* %s to <4 x i8>* %1 = load <4 x i8>, <4 x i8>* %0, align 4 @@ -325,6 +601,35 @@ define <16 x i8> @adjusted_lxvwsx(i64* %s, i64* %t) { ; P7-NEXT: lxvw4x vs0, 0, r4 ; P7-NEXT: xxspltw v2, vs0, 1 ; P7-NEXT: blr +; +; P9-AIX32-LABEL: adjusted_lxvwsx: +; P9-AIX32: # %bb.0: # %entry +; P9-AIX32-NEXT: lwz r3, 4(r3) +; P9-AIX32-NEXT: stw r3, -16(r1) +; P9-AIX32-NEXT: lxv v2, -16(r1) +; P9-AIX32-NEXT: vmrghw v2, v2, v2 +; P9-AIX32-NEXT: xxspltw v2, v2, 1 +; P9-AIX32-NEXT: blr +; +; P8-AIX32-LABEL: adjusted_lxvwsx: +; P8-AIX32: # %bb.0: # %entry +; P8-AIX32-NEXT: lwz r3, 4(r3) +; P8-AIX32-NEXT: addi r4, r1, -16 +; P8-AIX32-NEXT: stw r3, -16(r1) +; P8-AIX32-NEXT: lxvw4x v2, 0, r4 +; P8-AIX32-NEXT: vmrghw v2, v2, v2 +; P8-AIX32-NEXT: xxspltw v2, v2, 1 +; P8-AIX32-NEXT: blr +; +; P7-AIX32-LABEL: adjusted_lxvwsx: +; P7-AIX32: # %bb.0: # %entry +; P7-AIX32-NEXT: lwz r3, 4(r3) +; P7-AIX32-NEXT: addi r4, r1, -16 +; P7-AIX32-NEXT: stw r3, -16(r1) +; P7-AIX32-NEXT: lxvw4x v2, 0, r4 +; P7-AIX32-NEXT: vmrghw v2, v2, v2 +; P7-AIX32-NEXT: xxspltw v2, v2, 1 +; P7-AIX32-NEXT: blr entry: %0 = bitcast i64* %s to <8 x i8>* %1 = load <8 x i8>, <8 x i8>* %0, align 8 @@ -349,6 +654,23 @@ define <16 x i8> @unadjusted_lxvwsx_v16i8(<16 x i8> *%s, <16 x i8> %t) { ; P7-NEXT: lxvw4x vs0, 0, r3 ; P7-NEXT: xxspltw v2, vs0, 0 ; P7-NEXT: blr +; +; P9-AIX32-LABEL: unadjusted_lxvwsx_v16i8: +; P9-AIX32: # %bb.0: # %entry +; P9-AIX32-NEXT: lxvwsx v2, 0, r3 +; P9-AIX32-NEXT: blr +; +; P8-AIX32-LABEL: unadjusted_lxvwsx_v16i8: +; P8-AIX32: # %bb.0: # %entry +; P8-AIX32-NEXT: lxvw4x vs0, 0, r3 +; P8-AIX32-NEXT: xxspltw v2, vs0, 0 +; P8-AIX32-NEXT: blr +; +; P7-AIX32-LABEL: unadjusted_lxvwsx_v16i8: +; P7-AIX32: # %bb.0: # %entry +; P7-AIX32-NEXT: lxvw4x vs0, 0, r3 +; P7-AIX32-NEXT: xxspltw v2, vs0, 0 +; P7-AIX32-NEXT: blr entry: %0 = load <16 x i8>, <16 x i8>* %s, align 16 %1 = shufflevector <16 x i8> %0, <16 x i8> undef, <16 x i32> @@ -373,6 +695,24 @@ define <16 x i8> @adjusted_lxvwsx_v16i8(<16 x i8> *%s, <16 x i8> %t) { ; P7-NEXT: lxvw4x vs0, 0, r3 ; P7-NEXT: xxspltw v2, vs0, 1 ; P7-NEXT: blr +; +; P9-AIX32-LABEL: adjusted_lxvwsx_v16i8: +; P9-AIX32: # %bb.0: # %entry +; P9-AIX32-NEXT: addi r3, r3, 4 +; P9-AIX32-NEXT: lxvwsx v2, 0, r3 +; P9-AIX32-NEXT: blr +; +; P8-AIX32-LABEL: adjusted_lxvwsx_v16i8: +; P8-AIX32: # %bb.0: # %entry +; P8-AIX32-NEXT: lxvw4x vs0, 0, r3 +; P8-AIX32-NEXT: xxspltw v2, vs0, 1 +; P8-AIX32-NEXT: blr +; +; P7-AIX32-LABEL: adjusted_lxvwsx_v16i8: +; P7-AIX32: # %bb.0: # %entry +; P7-AIX32-NEXT: lxvw4x vs0, 0, r3 +; P7-AIX32-NEXT: xxspltw v2, vs0, 1 +; P7-AIX32-NEXT: blr entry: %0 = load <16 x i8>, <16 x i8>* %s, align 16 %1 = shufflevector <16 x i8> %0, <16 x i8> undef, <16 x i32> @@ -397,6 +737,24 @@ define <16 x i8> @adjusted_lxvwsx_v16i8_2(<16 x i8> *%s, <16 x i8> %t) { ; P7-NEXT: lxvw4x vs0, 0, r3 ; P7-NEXT: xxspltw v2, vs0, 2 ; P7-NEXT: blr +; +; P9-AIX32-LABEL: adjusted_lxvwsx_v16i8_2: +; P9-AIX32: # %bb.0: # %entry +; P9-AIX32-NEXT: addi r3, r3, 8 +; P9-AIX32-NEXT: lxvwsx v2, 0, r3 +; P9-AIX32-NEXT: blr +; +; P8-AIX32-LABEL: adjusted_lxvwsx_v16i8_2: +; P8-AIX32: # %bb.0: # %entry +; P8-AIX32-NEXT: lxvw4x vs0, 0, r3 +; P8-AIX32-NEXT: xxspltw v2, vs0, 2 +; P8-AIX32-NEXT: blr +; +; P7-AIX32-LABEL: adjusted_lxvwsx_v16i8_2: +; P7-AIX32: # %bb.0: # %entry +; P7-AIX32-NEXT: lxvw4x vs0, 0, r3 +; P7-AIX32-NEXT: xxspltw v2, vs0, 2 +; P7-AIX32-NEXT: blr entry: %0 = load <16 x i8>, <16 x i8>* %s, align 16 %1 = shufflevector <16 x i8> %0, <16 x i8> undef, <16 x i32> @@ -421,6 +779,24 @@ define <16 x i8> @adjusted_lxvwsx_v16i8_3(<16 x i8> *%s, <16 x i8> %t) { ; P7-NEXT: lxvw4x vs0, 0, r3 ; P7-NEXT: xxspltw v2, vs0, 3 ; P7-NEXT: blr +; +; P9-AIX32-LABEL: adjusted_lxvwsx_v16i8_3: +; P9-AIX32: # %bb.0: # %entry +; P9-AIX32-NEXT: addi r3, r3, 12 +; P9-AIX32-NEXT: lxvwsx v2, 0, r3 +; P9-AIX32-NEXT: blr +; +; P8-AIX32-LABEL: adjusted_lxvwsx_v16i8_3: +; P8-AIX32: # %bb.0: # %entry +; P8-AIX32-NEXT: lxvw4x vs0, 0, r3 +; P8-AIX32-NEXT: xxspltw v2, vs0, 3 +; P8-AIX32-NEXT: blr +; +; P7-AIX32-LABEL: adjusted_lxvwsx_v16i8_3: +; P7-AIX32: # %bb.0: # %entry +; P7-AIX32-NEXT: lxvw4x vs0, 0, r3 +; P7-AIX32-NEXT: xxspltw v2, vs0, 3 +; P7-AIX32-NEXT: blr entry: %0 = load <16 x i8>, <16 x i8>* %s, align 16 %1 = shufflevector <16 x i8> %0, <16 x i8> undef, <16 x i32> @@ -442,6 +818,47 @@ define <16 x i8> @unadjusted_lxvdsx(i64* %s, i64* %t) { ; P7: # %bb.0: # %entry ; P7-NEXT: lxvdsx v2, 0, r3 ; P7-NEXT: blr +; +; P9-AIX32-LABEL: unadjusted_lxvdsx: +; P9-AIX32: # %bb.0: # %entry +; P9-AIX32-NEXT: lwz r4, 0(r3) +; P9-AIX32-NEXT: stw r4, -16(r1) +; P9-AIX32-NEXT: lwz r3, 4(r3) +; P9-AIX32-NEXT: lxv vs1, -16(r1) +; P9-AIX32-NEXT: mtfprwz f0, r3 +; P9-AIX32-NEXT: xxinsertw vs1, vs0, 4 +; P9-AIX32-NEXT: xxmrghd v2, vs1, vs1 +; P9-AIX32-NEXT: blr +; +; P8-AIX32-LABEL: unadjusted_lxvdsx: +; P8-AIX32: # %bb.0: # %entry +; P8-AIX32-NEXT: lwz r4, 4(r3) +; P8-AIX32-NEXT: stw r4, -32(r1) +; P8-AIX32-NEXT: addi r4, r1, -16 +; P8-AIX32-NEXT: lwz r3, 0(r3) +; P8-AIX32-NEXT: stw r3, -16(r1) +; P8-AIX32-NEXT: addi r3, r1, -32 +; P8-AIX32-NEXT: lxvw4x v2, 0, r3 +; P8-AIX32-NEXT: lxvw4x v3, 0, r4 +; P8-AIX32-NEXT: vmrghw v2, v3, v2 +; P8-AIX32-NEXT: xxsldwi vs0, v3, v2, 2 +; P8-AIX32-NEXT: xxmrgld v2, vs0, vs0 +; P8-AIX32-NEXT: blr +; +; P7-AIX32-LABEL: unadjusted_lxvdsx: +; P7-AIX32: # %bb.0: # %entry +; P7-AIX32-NEXT: lwz r5, 4(r3) +; P7-AIX32-NEXT: addi r4, r1, -32 +; P7-AIX32-NEXT: stw r5, -32(r1) +; P7-AIX32-NEXT: lwz r3, 0(r3) +; P7-AIX32-NEXT: stw r3, -16(r1) +; P7-AIX32-NEXT: addi r3, r1, -16 +; P7-AIX32-NEXT: lxvw4x v2, 0, r4 +; P7-AIX32-NEXT: lxvw4x v3, 0, r3 +; P7-AIX32-NEXT: vmrghw v2, v3, v2 +; P7-AIX32-NEXT: xxsldwi vs0, v3, v2, 2 +; P7-AIX32-NEXT: xxmrgld v2, vs0, vs0 +; P7-AIX32-NEXT: blr entry: %0 = bitcast i64* %s to <8 x i8>* %1 = load <8 x i8>, <8 x i8>* %0, align 8 @@ -464,6 +881,21 @@ define <16 x i8> @unadjusted_lxvdsx_v16i8(<16 x i8> *%s, <16 x i8> %t) { ; P7: # %bb.0: # %entry ; P7-NEXT: lxvdsx v2, 0, r3 ; P7-NEXT: blr +; +; P9-AIX32-LABEL: unadjusted_lxvdsx_v16i8: +; P9-AIX32: # %bb.0: # %entry +; P9-AIX32-NEXT: lxvdsx v2, 0, r3 +; P9-AIX32-NEXT: blr +; +; P8-AIX32-LABEL: unadjusted_lxvdsx_v16i8: +; P8-AIX32: # %bb.0: # %entry +; P8-AIX32-NEXT: lxvdsx v2, 0, r3 +; P8-AIX32-NEXT: blr +; +; P7-AIX32-LABEL: unadjusted_lxvdsx_v16i8: +; P7-AIX32: # %bb.0: # %entry +; P7-AIX32-NEXT: lxvdsx v2, 0, r3 +; P7-AIX32-NEXT: blr entry: %0 = load <16 x i8>, <16 x i8>* %s, align 16 %1 = shufflevector <16 x i8> %0, <16 x i8> undef, <16 x i32> @@ -488,6 +920,24 @@ define <16 x i8> @adjusted_lxvdsx_v16i8(<16 x i8> *%s, <16 x i8> %t) { ; P7-NEXT: addi r3, r3, 8 ; P7-NEXT: lxvdsx v2, 0, r3 ; P7-NEXT: blr +; +; P9-AIX32-LABEL: adjusted_lxvdsx_v16i8: +; P9-AIX32: # %bb.0: # %entry +; P9-AIX32-NEXT: addi r3, r3, 8 +; P9-AIX32-NEXT: lxvdsx v2, 0, r3 +; P9-AIX32-NEXT: blr +; +; P8-AIX32-LABEL: adjusted_lxvdsx_v16i8: +; P8-AIX32: # %bb.0: # %entry +; P8-AIX32-NEXT: addi r3, r3, 8 +; P8-AIX32-NEXT: lxvdsx v2, 0, r3 +; P8-AIX32-NEXT: blr +; +; P7-AIX32-LABEL: adjusted_lxvdsx_v16i8: +; P7-AIX32: # %bb.0: # %entry +; P7-AIX32-NEXT: addi r3, r3, 8 +; P7-AIX32-NEXT: lxvdsx v2, 0, r3 +; P7-AIX32-NEXT: blr entry: %0 = load <16 x i8>, <16 x i8>* %s, align 16 %1 = shufflevector <16 x i8> %0, <16 x i8> undef, <16 x i32> diff --git a/llvm/test/CodeGen/PowerPC/longdouble-pack.ll b/llvm/test/CodeGen/PowerPC/longdouble-pack.ll new file mode 100644 index 0000000000000..ec34dbbd92945 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/longdouble-pack.ll @@ -0,0 +1,34 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=powerpc64le-unknown-linux < %s | FileCheck %s +; RUN: llc -mtriple=powerpc64-ibm-aix-xcoff < %s | FileCheck %s + +define double @ldbl_1(ppc_fp128 %x) { +; CHECK-LABEL: ldbl_1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmr 1, 2 +; CHECK-NEXT: blr +entry: + %0 = call double @llvm.ppc.unpack.longdouble(ppc_fp128 %x, i32 0) + ret double %0 +} + +define double @ldbl_2(ppc_fp128 %x) { +; CHECK-LABEL: ldbl_2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: blr +entry: + %0 = call double @llvm.ppc.unpack.longdouble(ppc_fp128 %x, i32 1) + ret double %0 +} + +define ppc_fp128 @ldbl_pack(double %x, double %y) { +; CHECK-LABEL: ldbl_pack: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: blr +entry: + %0 = call ppc_fp128 @llvm.ppc.pack.longdouble(double %x, double %y) + ret ppc_fp128 %0 +} + +declare double @llvm.ppc.unpack.longdouble(ppc_fp128, i32) +declare ppc_fp128 @llvm.ppc.pack.longdouble(double, double) diff --git a/llvm/test/CodeGen/PowerPC/macro-fusion.mir b/llvm/test/CodeGen/PowerPC/macro-fusion.mir new file mode 100644 index 0000000000000..16391a2ab8fa2 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/macro-fusion.mir @@ -0,0 +1,95 @@ +# REQUIRES: asserts +# RUN: llc -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr10 -x=mir < %s \ +# RUN: -debug-only=machine-scheduler -start-before=postmisched 2>&1 \ +# RUN: | FileCheck %s + +# CHECK: add_mulld:%bb.0 +# CHECK: Macro fuse: SU(0) - SU(1) / MULLD - ADD8 +--- +name: add_mulld +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x3, $x4, $x5 + renamable $x4 = MULLD $x3, $x4 + renamable $x3 = ADD8 killed renamable $x4, $x5 + BLR8 implicit $lr8, implicit $rm, implicit $x3 +... + +# CHECK: add_and:%bb.0 +# CHECK: Macro fuse: SU(0) - SU(1) / ADD8 - AND8 +--- +name: add_and +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x3, $x4, $x5 + renamable $x4 = ADD8 $x3, $x4 + renamable $x3 = AND8 killed renamable $x4, $x5 + BLR8 implicit $lr8, implicit $rm, implicit $x3 +... + +# CHECK: xor_subf:%bb.0 +# CHECK: Macro fuse: SU(0) - SU(1) / XOR8 - SUBF8 +--- +name: xor_subf +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x3, $x4, $x5 + renamable $x4 = XOR8 $x3, $x4 + renamable $x3 = SUBF8 killed renamable $x4, $x5 + BLR8 implicit $lr8, implicit $rm, implicit $x3 +... + +# CHECK: or_nand:%bb.0 +# CHECK: Macro fuse: SU(0) - SU(1) / OR8 - NAND8 +--- +name: or_nand +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x3, $x4, $x5 + renamable $x4 = OR8 $x3, $x4 + renamable $x3 = NAND8 killed renamable $x4, $x5 + BLR8 implicit $lr8, implicit $rm, implicit $x3 +... + +# CHECK: vand_vand:%bb.0 +# CHECK: Macro fuse: SU(0) - SU(1) / VAND - VAND +--- +name: vand_vand +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v2, $v3, $v4 + renamable $v2 = VAND $v3, $v2 + renamable $v2 = VAND killed renamable $v2, $v4 + BLR8 implicit $lr8, implicit $rm +... + +# CHECK: vadd_vadd:%bb.0 +# CHECK: Macro fuse: SU(0) - SU(1) / VADDUDM - VADDUDM +--- +name: vadd_vadd +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v2, $v3, $v4 + renamable $v2 = VADDUDM $v3, $v2 + renamable $v2 = VADDUDM killed renamable $v2, $v4 + BLR8 implicit $lr8, implicit $rm +... + +# CHECK: sldi_add:%bb.0 +# CHECK: Macro fuse: SU(0) - SU(1) / RLDICR - ADD8 +--- +name: sldi_add +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x3, $x4, $x5 + renamable $x4 = RLDICR $x3, 3, 60 + renamable $x3 = ADD8 killed renamable $x4, $x5 + BLR8 implicit $lr8, implicit $rm, implicit $x3 +... diff --git a/llvm/test/CodeGen/PowerPC/mma-acc-memops.ll b/llvm/test/CodeGen/PowerPC/mma-acc-memops.ll index ee97843beac2b..5c8c84be07021 100644 --- a/llvm/test/CodeGen/PowerPC/mma-acc-memops.ll +++ b/llvm/test/CodeGen/PowerPC/mma-acc-memops.ll @@ -130,37 +130,37 @@ entry: define dso_local void @testXLdSt(i64 %SrcIdx, i64 %DstIdx) { ; LE-PAIRED-LABEL: testXLdSt: ; LE-PAIRED: # %bb.0: # %entry -; LE-PAIRED-NEXT: sldi r3, r3, 6 ; LE-PAIRED-NEXT: paddi r5, 0, f@PCREL, 1 +; LE-PAIRED-NEXT: sldi r3, r3, 6 ; LE-PAIRED-NEXT: add r6, r5, r3 ; LE-PAIRED-NEXT: lxv vs1, 32(r6) ; LE-PAIRED-NEXT: lxv vs0, 48(r6) ; LE-PAIRED-NEXT: lxvx vs3, r5, r3 ; LE-PAIRED-NEXT: lxv vs2, 16(r6) ; LE-PAIRED-NEXT: sldi r3, r4, 6 +; LE-PAIRED-NEXT: add r4, r5, r3 ; LE-PAIRED-NEXT: stxvx vs3, r5, r3 -; LE-PAIRED-NEXT: add r3, r5, r3 -; LE-PAIRED-NEXT: stxv vs0, 48(r3) -; LE-PAIRED-NEXT: stxv vs1, 32(r3) -; LE-PAIRED-NEXT: stxv vs2, 16(r3) +; LE-PAIRED-NEXT: stxv vs0, 48(r4) +; LE-PAIRED-NEXT: stxv vs1, 32(r4) +; LE-PAIRED-NEXT: stxv vs2, 16(r4) ; LE-PAIRED-NEXT: blr ; ; BE-PAIRED-LABEL: testXLdSt: ; BE-PAIRED: # %bb.0: # %entry ; BE-PAIRED-NEXT: addis r5, r2, f@toc@ha -; BE-PAIRED-NEXT: sldi r3, r3, 6 ; BE-PAIRED-NEXT: addi r5, r5, f@toc@l +; BE-PAIRED-NEXT: sldi r3, r3, 6 ; BE-PAIRED-NEXT: add r6, r5, r3 ; BE-PAIRED-NEXT: lxvx vs0, r5, r3 ; BE-PAIRED-NEXT: sldi r3, r4, 6 +; BE-PAIRED-NEXT: add r4, r5, r3 ; BE-PAIRED-NEXT: lxv vs1, 16(r6) ; BE-PAIRED-NEXT: lxv vs3, 48(r6) ; BE-PAIRED-NEXT: lxv vs2, 32(r6) ; BE-PAIRED-NEXT: stxvx vs0, r5, r3 -; BE-PAIRED-NEXT: add r3, r5, r3 -; BE-PAIRED-NEXT: stxv vs1, 16(r3) -; BE-PAIRED-NEXT: stxv vs3, 48(r3) -; BE-PAIRED-NEXT: stxv vs2, 32(r3) +; BE-PAIRED-NEXT: stxv vs1, 16(r4) +; BE-PAIRED-NEXT: stxv vs3, 48(r4) +; BE-PAIRED-NEXT: stxv vs2, 32(r4) ; BE-PAIRED-NEXT: blr ; ; LE-PWR9-LABEL: testXLdSt: diff --git a/llvm/test/CodeGen/PowerPC/more-dq-form-prepare.ll b/llvm/test/CodeGen/PowerPC/more-dq-form-prepare.ll index 0d9662dc12427..d5ef4e64d0a85 100644 --- a/llvm/test/CodeGen/PowerPC/more-dq-form-prepare.ll +++ b/llvm/test/CodeGen/PowerPC/more-dq-form-prepare.ll @@ -10,8 +10,8 @@ target triple = "powerpc64le-unknown-linux-gnu" define void @foo(i32* %.m, i32* %.n, [0 x %_elem_type_of_a]* %.a, [0 x %_elem_type_of_x]* %.x, i32* %.l, <2 x double>* %.vy01, <2 x double>* %.vy02, <2 x double>* %.vy03, <2 x double>* %.vy04, <2 x double>* %.vy05, <2 x double>* %.vy06, <2 x double>* %.vy07, <2 x double>* %.vy08, <2 x double>* %.vy09, <2 x double>* %.vy0a, <2 x double>* %.vy0b, <2 x double>* %.vy0c, <2 x double>* %.vy21, <2 x double>* %.vy22, <2 x double>* %.vy23, <2 x double>* %.vy24, <2 x double>* %.vy25, <2 x double>* %.vy26, <2 x double>* %.vy27, <2 x double>* %.vy28, <2 x double>* %.vy29, <2 x double>* %.vy2a, <2 x double>* %.vy2b, <2 x double>* %.vy2c) { ; CHECK-LABEL: foo: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: stdu 1, -592(1) -; CHECK-NEXT: .cfi_def_cfa_offset 592 +; CHECK-NEXT: stdu 1, -608(1) +; CHECK-NEXT: .cfi_def_cfa_offset 608 ; CHECK-NEXT: .cfi_offset r14, -192 ; CHECK-NEXT: .cfi_offset r15, -184 ; CHECK-NEXT: .cfi_offset r16, -176 @@ -48,193 +48,194 @@ define void @foo(i32* %.m, i32* %.n, [0 x %_elem_type_of_a]* %.a, [0 x %_elem_ty ; CHECK-NEXT: .cfi_offset v29, -240 ; CHECK-NEXT: .cfi_offset v30, -224 ; CHECK-NEXT: .cfi_offset v31, -208 -; CHECK-NEXT: lwz 4, 0(4) -; CHECK-NEXT: std 14, 400(1) # 8-byte Folded Spill -; CHECK-NEXT: std 15, 408(1) # 8-byte Folded Spill -; CHECK-NEXT: cmpwi 4, 1 -; CHECK-NEXT: std 16, 416(1) # 8-byte Folded Spill -; CHECK-NEXT: std 17, 424(1) # 8-byte Folded Spill -; CHECK-NEXT: std 18, 432(1) # 8-byte Folded Spill -; CHECK-NEXT: std 19, 440(1) # 8-byte Folded Spill -; CHECK-NEXT: std 20, 448(1) # 8-byte Folded Spill -; CHECK-NEXT: std 21, 456(1) # 8-byte Folded Spill -; CHECK-NEXT: std 22, 464(1) # 8-byte Folded Spill -; CHECK-NEXT: std 23, 472(1) # 8-byte Folded Spill -; CHECK-NEXT: std 24, 480(1) # 8-byte Folded Spill -; CHECK-NEXT: std 25, 488(1) # 8-byte Folded Spill -; CHECK-NEXT: std 26, 496(1) # 8-byte Folded Spill -; CHECK-NEXT: std 27, 504(1) # 8-byte Folded Spill -; CHECK-NEXT: std 28, 512(1) # 8-byte Folded Spill -; CHECK-NEXT: std 29, 520(1) # 8-byte Folded Spill -; CHECK-NEXT: std 30, 528(1) # 8-byte Folded Spill -; CHECK-NEXT: std 31, 536(1) # 8-byte Folded Spill -; CHECK-NEXT: stfd 26, 544(1) # 8-byte Folded Spill -; CHECK-NEXT: stfd 27, 552(1) # 8-byte Folded Spill -; CHECK-NEXT: stfd 28, 560(1) # 8-byte Folded Spill -; CHECK-NEXT: stfd 29, 568(1) # 8-byte Folded Spill -; CHECK-NEXT: stfd 30, 576(1) # 8-byte Folded Spill -; CHECK-NEXT: stfd 31, 584(1) # 8-byte Folded Spill -; CHECK-NEXT: stxv 52, 208(1) # 16-byte Folded Spill -; CHECK-NEXT: stxv 53, 224(1) # 16-byte Folded Spill -; CHECK-NEXT: stxv 54, 240(1) # 16-byte Folded Spill -; CHECK-NEXT: stxv 55, 256(1) # 16-byte Folded Spill -; CHECK-NEXT: stxv 56, 272(1) # 16-byte Folded Spill -; CHECK-NEXT: stxv 57, 288(1) # 16-byte Folded Spill -; CHECK-NEXT: stxv 58, 304(1) # 16-byte Folded Spill -; CHECK-NEXT: stxv 59, 320(1) # 16-byte Folded Spill -; CHECK-NEXT: stxv 60, 336(1) # 16-byte Folded Spill -; CHECK-NEXT: stxv 61, 352(1) # 16-byte Folded Spill -; CHECK-NEXT: stxv 62, 368(1) # 16-byte Folded Spill -; CHECK-NEXT: stxv 63, 384(1) # 16-byte Folded Spill +; CHECK-NEXT: lwz 0, 0(4) +; CHECK-NEXT: std 14, 416(1) # 8-byte Folded Spill +; CHECK-NEXT: std 15, 424(1) # 8-byte Folded Spill +; CHECK-NEXT: cmpwi 0, 1 +; CHECK-NEXT: std 16, 432(1) # 8-byte Folded Spill +; CHECK-NEXT: std 17, 440(1) # 8-byte Folded Spill +; CHECK-NEXT: std 18, 448(1) # 8-byte Folded Spill +; CHECK-NEXT: std 19, 456(1) # 8-byte Folded Spill +; CHECK-NEXT: std 20, 464(1) # 8-byte Folded Spill +; CHECK-NEXT: std 21, 472(1) # 8-byte Folded Spill +; CHECK-NEXT: std 22, 480(1) # 8-byte Folded Spill +; CHECK-NEXT: std 23, 488(1) # 8-byte Folded Spill +; CHECK-NEXT: std 24, 496(1) # 8-byte Folded Spill +; CHECK-NEXT: std 25, 504(1) # 8-byte Folded Spill +; CHECK-NEXT: std 26, 512(1) # 8-byte Folded Spill +; CHECK-NEXT: std 27, 520(1) # 8-byte Folded Spill +; CHECK-NEXT: std 28, 528(1) # 8-byte Folded Spill +; CHECK-NEXT: std 29, 536(1) # 8-byte Folded Spill +; CHECK-NEXT: std 30, 544(1) # 8-byte Folded Spill +; CHECK-NEXT: std 31, 552(1) # 8-byte Folded Spill +; CHECK-NEXT: stfd 26, 560(1) # 8-byte Folded Spill +; CHECK-NEXT: stfd 27, 568(1) # 8-byte Folded Spill +; CHECK-NEXT: stfd 28, 576(1) # 8-byte Folded Spill +; CHECK-NEXT: stfd 29, 584(1) # 8-byte Folded Spill +; CHECK-NEXT: stfd 30, 592(1) # 8-byte Folded Spill +; CHECK-NEXT: stfd 31, 600(1) # 8-byte Folded Spill +; CHECK-NEXT: stxv 52, 224(1) # 16-byte Folded Spill +; CHECK-NEXT: stxv 53, 240(1) # 16-byte Folded Spill +; CHECK-NEXT: stxv 54, 256(1) # 16-byte Folded Spill +; CHECK-NEXT: stxv 55, 272(1) # 16-byte Folded Spill +; CHECK-NEXT: stxv 56, 288(1) # 16-byte Folded Spill +; CHECK-NEXT: stxv 57, 304(1) # 16-byte Folded Spill +; CHECK-NEXT: stxv 58, 320(1) # 16-byte Folded Spill +; CHECK-NEXT: stxv 59, 336(1) # 16-byte Folded Spill +; CHECK-NEXT: stxv 60, 352(1) # 16-byte Folded Spill +; CHECK-NEXT: stxv 61, 368(1) # 16-byte Folded Spill +; CHECK-NEXT: stxv 62, 384(1) # 16-byte Folded Spill +; CHECK-NEXT: stxv 63, 400(1) # 16-byte Folded Spill ; CHECK-NEXT: blt 0, .LBB0_7 ; CHECK-NEXT: # %bb.1: # %_loop_1_do_.lr.ph -; CHECK-NEXT: mr 22, 5 -; CHECK-NEXT: lwz 5, 0(3) -; CHECK-NEXT: cmpwi 5, 1 +; CHECK-NEXT: lwz 3, 0(3) +; CHECK-NEXT: cmpwi 3, 1 ; CHECK-NEXT: blt 0, .LBB0_7 ; CHECK-NEXT: # %bb.2: # %_loop_1_do_.preheader -; CHECK-NEXT: mr 14, 6 -; CHECK-NEXT: ld 6, 712(1) -; CHECK-NEXT: lwa 3, 0(7) -; CHECK-NEXT: addi 5, 5, 1 +; CHECK-NEXT: mr 23, 5 +; CHECK-NEXT: ld 5, 704(1) +; CHECK-NEXT: addi 3, 3, 1 +; CHECK-NEXT: ld 4, 728(1) +; CHECK-NEXT: mr 11, 10 +; CHECK-NEXT: mr 10, 6 ; CHECK-NEXT: std 8, 40(1) # 8-byte Folded Spill ; CHECK-NEXT: std 9, 48(1) # 8-byte Folded Spill -; CHECK-NEXT: mr 11, 10 -; CHECK-NEXT: cmpldi 5, 9 +; CHECK-NEXT: lwa 7, 0(7) +; CHECK-NEXT: ld 29, 840(1) +; CHECK-NEXT: cmpldi 3, 9 +; CHECK-NEXT: ld 27, 832(1) +; CHECK-NEXT: ld 28, 856(1) +; CHECK-NEXT: std 5, 112(1) # 8-byte Folded Spill +; CHECK-NEXT: std 4, 120(1) # 8-byte Folded Spill +; CHECK-NEXT: lxv 1, 0(5) +; CHECK-NEXT: li 5, 9 +; CHECK-NEXT: ld 30, 848(1) +; CHECK-NEXT: lxv 0, 0(4) +; CHECK-NEXT: sldi 4, 7, 3 +; CHECK-NEXT: add 4, 4, 23 +; CHECK-NEXT: sldi 16, 7, 2 +; CHECK-NEXT: sldi 15, 7, 1 +; CHECK-NEXT: ld 17, 760(1) +; CHECK-NEXT: std 27, 192(1) # 8-byte Folded Spill +; CHECK-NEXT: std 29, 200(1) # 8-byte Folded Spill +; CHECK-NEXT: lxv 6, 0(29) +; CHECK-NEXT: ld 26, 824(1) +; CHECK-NEXT: ld 25, 816(1) +; CHECK-NEXT: ld 24, 808(1) +; CHECK-NEXT: std 30, 208(1) # 8-byte Folded Spill +; CHECK-NEXT: std 28, 216(1) # 8-byte Folded Spill +; CHECK-NEXT: std 25, 176(1) # 8-byte Folded Spill +; CHECK-NEXT: std 26, 184(1) # 8-byte Folded Spill +; CHECK-NEXT: std 24, 168(1) # 8-byte Folded Spill +; CHECK-NEXT: iselgt 3, 3, 5 +; CHECK-NEXT: ld 5, 752(1) +; CHECK-NEXT: addi 14, 4, 32 +; CHECK-NEXT: sldi 4, 7, 4 +; CHECK-NEXT: add 29, 7, 15 +; CHECK-NEXT: ld 22, 800(1) +; CHECK-NEXT: ld 21, 792(1) +; CHECK-NEXT: ld 20, 784(1) +; CHECK-NEXT: std 22, 160(1) # 8-byte Folded Spill +; CHECK-NEXT: std 20, 144(1) # 8-byte Folded Spill +; CHECK-NEXT: std 21, 152(1) # 8-byte Folded Spill +; CHECK-NEXT: addi 6, 3, -2 +; CHECK-NEXT: add 3, 7, 16 +; CHECK-NEXT: add 4, 4, 23 +; CHECK-NEXT: ld 19, 776(1) +; CHECK-NEXT: ld 18, 768(1) ; CHECK-NEXT: lxv 4, 0(8) -; CHECK-NEXT: ld 8, 696(1) -; CHECK-NEXT: ld 10, 736(1) -; CHECK-NEXT: ld 28, 824(1) -; CHECK-NEXT: std 6, 88(1) # 8-byte Folded Spill -; CHECK-NEXT: std 10, 96(1) # 8-byte Folded Spill -; CHECK-NEXT: lxv 0, 0(6) -; CHECK-NEXT: li 6, 9 -; CHECK-NEXT: ld 7, 688(1) -; CHECK-NEXT: ld 27, 840(1) -; CHECK-NEXT: ld 29, 832(1) -; CHECK-NEXT: ld 26, 816(1) -; CHECK-NEXT: ld 25, 808(1) -; CHECK-NEXT: ld 24, 800(1) -; CHECK-NEXT: ld 23, 792(1) -; CHECK-NEXT: std 8, 32(1) # 8-byte Folded Spill -; CHECK-NEXT: sldi 0, 3, 1 -; CHECK-NEXT: sldi 31, 3, 2 -; CHECK-NEXT: std 28, 184(1) # 8-byte Folded Spill -; CHECK-NEXT: std 29, 192(1) # 8-byte Folded Spill -; CHECK-NEXT: std 25, 168(1) # 8-byte Folded Spill -; CHECK-NEXT: std 26, 176(1) # 8-byte Folded Spill -; CHECK-NEXT: std 23, 152(1) # 8-byte Folded Spill -; CHECK-NEXT: std 24, 160(1) # 8-byte Folded Spill -; CHECK-NEXT: std 27, 200(1) # 8-byte Folded Spill -; CHECK-NEXT: iselgt 5, 5, 6 +; CHECK-NEXT: lxv 2, 0(11) +; CHECK-NEXT: std 18, 128(1) # 8-byte Folded Spill +; CHECK-NEXT: std 19, 136(1) # 8-byte Folded Spill +; CHECK-NEXT: addi 12, 4, 32 +; CHECK-NEXT: rldicl 2, 6, 61, 3 ; CHECK-NEXT: sldi 6, 3, 3 -; CHECK-NEXT: ld 21, 784(1) -; CHECK-NEXT: ld 20, 776(1) -; CHECK-NEXT: ld 19, 768(1) -; CHECK-NEXT: ld 18, 760(1) -; CHECK-NEXT: std 18, 120(1) # 8-byte Folded Spill -; CHECK-NEXT: std 19, 128(1) # 8-byte Folded Spill -; CHECK-NEXT: std 20, 136(1) # 8-byte Folded Spill -; CHECK-NEXT: std 21, 144(1) # 8-byte Folded Spill -; CHECK-NEXT: add 2, 6, 22 -; CHECK-NEXT: ld 17, 752(1) -; CHECK-NEXT: ld 16, 744(1) +; CHECK-NEXT: ld 4, 736(1) +; CHECK-NEXT: ld 31, 720(1) +; CHECK-NEXT: std 11, 56(1) # 8-byte Folded Spill +; CHECK-NEXT: std 31, 64(1) # 8-byte Folded Spill +; CHECK-NEXT: add 11, 23, 6 +; CHECK-NEXT: ld 6, 744(1) +; CHECK-NEXT: ld 8, 712(1) +; CHECK-NEXT: std 5, 96(1) # 8-byte Folded Spill +; CHECK-NEXT: std 17, 104(1) # 8-byte Folded Spill +; CHECK-NEXT: lxv 39, 0(5) +; CHECK-NEXT: sldi 5, 7, 5 +; CHECK-NEXT: lxv 5, 0(30) +; CHECK-NEXT: lxv 7, 0(28) ; CHECK-NEXT: lxv 3, 0(9) -; CHECK-NEXT: ld 6, 728(1) -; CHECK-NEXT: addi 5, 5, -2 -; CHECK-NEXT: std 7, 80(1) # 8-byte Folded Spill -; CHECK-NEXT: std 6, 72(1) # 8-byte Folded Spill -; CHECK-NEXT: ld 15, 720(1) -; CHECK-NEXT: ld 9, 704(1) +; CHECK-NEXT: addi 2, 2, 1 +; CHECK-NEXT: add 30, 23, 5 +; CHECK-NEXT: sldi 5, 29, 3 +; CHECK-NEXT: add 28, 23, 5 +; CHECK-NEXT: ld 5, 864(1) ; CHECK-NEXT: lxv 43, 0(8) -; CHECK-NEXT: ld 8, 848(1) -; CHECK-NEXT: std 11, 56(1) # 8-byte Folded Spill -; CHECK-NEXT: std 15, 64(1) # 8-byte Folded Spill -; CHECK-NEXT: lxv 2, 0(11) -; CHECK-NEXT: sldi 11, 3, 4 -; CHECK-NEXT: rldicl 5, 5, 61, 3 -; CHECK-NEXT: lxv 1, 0(7) -; CHECK-NEXT: add 7, 3, 31 -; CHECK-NEXT: add 12, 11, 22 -; CHECK-NEXT: addi 11, 2, 32 -; CHECK-NEXT: addi 2, 5, 1 -; CHECK-NEXT: lxv 6, 0(28) -; CHECK-NEXT: sldi 5, 3, 5 -; CHECK-NEXT: add 28, 3, 0 -; CHECK-NEXT: lxv 42, 0(9) -; CHECK-NEXT: lxv 41, 0(15) +; CHECK-NEXT: lxv 42, 0(31) +; CHECK-NEXT: lxv 38, 0(17) +; CHECK-NEXT: std 4, 72(1) # 8-byte Folded Spill +; CHECK-NEXT: std 6, 80(1) # 8-byte Folded Spill +; CHECK-NEXT: lxv 41, 0(4) ; CHECK-NEXT: lxv 40, 0(6) -; CHECK-NEXT: lxv 39, 0(10) -; CHECK-NEXT: lxv 38, 0(16) -; CHECK-NEXT: sldi 30, 7, 3 -; CHECK-NEXT: addi 12, 12, 32 -; CHECK-NEXT: add 30, 22, 30 -; CHECK-NEXT: std 16, 104(1) # 8-byte Folded Spill -; CHECK-NEXT: std 17, 112(1) # 8-byte Folded Spill -; CHECK-NEXT: lxv 33, 0(17) -; CHECK-NEXT: lxv 32, 0(18) -; CHECK-NEXT: lxv 37, 0(19) -; CHECK-NEXT: lxv 36, 0(20) -; CHECK-NEXT: lxv 13, 0(21) -; CHECK-NEXT: lxv 12, 0(23) -; CHECK-NEXT: li 23, 0 -; CHECK-NEXT: lxv 11, 0(24) -; CHECK-NEXT: li 24, 1 -; CHECK-NEXT: lxv 9, 0(25) -; CHECK-NEXT: mulli 25, 3, 6 -; CHECK-NEXT: lxv 8, 0(26) -; CHECK-NEXT: mulli 26, 3, 48 -; CHECK-NEXT: lxv 5, 0(29) -; CHECK-NEXT: add 29, 22, 5 -; CHECK-NEXT: sldi 5, 28, 3 -; CHECK-NEXT: lxv 7, 0(27) -; CHECK-NEXT: add 27, 22, 5 -; CHECK-NEXT: mr 5, 22 -; CHECK-NEXT: lxv 10, 0(8) +; CHECK-NEXT: lxv 33, 0(18) +; CHECK-NEXT: lxv 32, 0(19) +; CHECK-NEXT: std 5, 88(1) # 8-byte Folded Spill +; CHECK-NEXT: lxv 37, 0(20) +; CHECK-NEXT: lxv 36, 0(21) +; CHECK-NEXT: lxv 13, 0(22) +; CHECK-NEXT: lxv 12, 0(24) +; CHECK-NEXT: lxv 11, 0(25) +; CHECK-NEXT: lxv 9, 0(26) +; CHECK-NEXT: lxv 8, 0(27) +; CHECK-NEXT: lxv 10, 0(5) +; CHECK-NEXT: mulli 27, 7, 48 +; CHECK-NEXT: mulli 26, 7, 6 +; CHECK-NEXT: li 25, 1 +; CHECK-NEXT: li 24, 0 +; CHECK-NEXT: mr 5, 23 ; CHECK-NEXT: .p2align 4 ; CHECK-NEXT: .LBB0_3: # %_loop_2_do_.lr.ph ; CHECK-NEXT: # =>This Loop Header: Depth=1 ; CHECK-NEXT: # Child Loop BB0_4 Depth 2 -; CHECK-NEXT: maddld 6, 25, 23, 7 +; CHECK-NEXT: maddld 6, 26, 24, 3 ; CHECK-NEXT: mtctr 2 ; CHECK-NEXT: sldi 6, 6, 3 -; CHECK-NEXT: add 21, 22, 6 -; CHECK-NEXT: maddld 6, 25, 23, 31 +; CHECK-NEXT: add 22, 23, 6 +; CHECK-NEXT: maddld 6, 26, 24, 16 ; CHECK-NEXT: sldi 6, 6, 3 -; CHECK-NEXT: add 20, 22, 6 -; CHECK-NEXT: maddld 6, 25, 23, 28 +; CHECK-NEXT: add 21, 23, 6 +; CHECK-NEXT: maddld 6, 26, 24, 29 ; CHECK-NEXT: sldi 6, 6, 3 -; CHECK-NEXT: add 19, 22, 6 -; CHECK-NEXT: maddld 6, 25, 23, 0 +; CHECK-NEXT: add 20, 23, 6 +; CHECK-NEXT: maddld 6, 26, 24, 15 ; CHECK-NEXT: sldi 6, 6, 3 -; CHECK-NEXT: add 18, 22, 6 -; CHECK-NEXT: maddld 6, 25, 23, 3 +; CHECK-NEXT: add 19, 23, 6 +; CHECK-NEXT: maddld 6, 26, 24, 7 ; CHECK-NEXT: sldi 6, 6, 3 -; CHECK-NEXT: add 17, 22, 6 -; CHECK-NEXT: mulld 6, 25, 23 +; CHECK-NEXT: add 18, 23, 6 +; CHECK-NEXT: mulld 6, 26, 24 ; CHECK-NEXT: sldi 6, 6, 3 -; CHECK-NEXT: add 16, 22, 6 -; CHECK-NEXT: mr 6, 14 +; CHECK-NEXT: add 17, 23, 6 +; CHECK-NEXT: mr 6, 10 ; CHECK-NEXT: .p2align 5 ; CHECK-NEXT: .LBB0_4: # %_loop_2_do_ ; CHECK-NEXT: # Parent Loop BB0_3 Depth=1 ; CHECK-NEXT: # => This Inner Loop Header: Depth=2 ; CHECK-NEXT: lxvp 34, 0(6) -; CHECK-NEXT: lxvp 44, 0(16) +; CHECK-NEXT: lxvp 44, 0(17) ; CHECK-NEXT: xvmaddadp 4, 45, 35 -; CHECK-NEXT: lxvp 46, 0(17) +; CHECK-NEXT: lxvp 46, 0(18) ; CHECK-NEXT: xvmaddadp 3, 47, 35 -; CHECK-NEXT: lxvp 48, 0(18) -; CHECK-NEXT: lxvp 50, 0(19) -; CHECK-NEXT: lxvp 62, 0(20) -; CHECK-NEXT: lxvp 60, 0(21) +; CHECK-NEXT: lxvp 48, 0(19) +; CHECK-NEXT: lxvp 50, 0(20) +; CHECK-NEXT: lxvp 62, 0(21) +; CHECK-NEXT: lxvp 60, 0(22) ; CHECK-NEXT: lxvp 58, 32(6) -; CHECK-NEXT: lxvp 56, 32(16) -; CHECK-NEXT: lxvp 54, 32(17) -; CHECK-NEXT: lxvp 52, 32(18) -; CHECK-NEXT: lxvp 30, 32(19) -; CHECK-NEXT: lxvp 28, 32(20) -; CHECK-NEXT: lxvp 26, 32(21) +; CHECK-NEXT: lxvp 56, 32(17) +; CHECK-NEXT: lxvp 54, 32(18) +; CHECK-NEXT: lxvp 52, 32(19) +; CHECK-NEXT: lxvp 30, 32(20) +; CHECK-NEXT: lxvp 28, 32(21) +; CHECK-NEXT: lxvp 26, 32(22) ; CHECK-NEXT: xvmaddadp 2, 49, 35 ; CHECK-NEXT: xvmaddadp 1, 51, 35 ; CHECK-NEXT: xvmaddadp 43, 63, 35 @@ -258,24 +259,24 @@ define void @foo(i32* %.m, i32* %.n, [0 x %_elem_type_of_a]* %.a, [0 x %_elem_ty ; CHECK-NEXT: xvmaddadp 7, 28, 58 ; CHECK-NEXT: xvmaddadp 10, 26, 58 ; CHECK-NEXT: addi 6, 6, 64 -; CHECK-NEXT: addi 16, 16, 64 ; CHECK-NEXT: addi 17, 17, 64 ; CHECK-NEXT: addi 18, 18, 64 ; CHECK-NEXT: addi 19, 19, 64 ; CHECK-NEXT: addi 20, 20, 64 ; CHECK-NEXT: addi 21, 21, 64 +; CHECK-NEXT: addi 22, 22, 64 ; CHECK-NEXT: bdnz .LBB0_4 ; CHECK-NEXT: # %bb.5: # %_loop_2_endl_ ; CHECK-NEXT: # -; CHECK-NEXT: addi 24, 24, 6 -; CHECK-NEXT: add 5, 5, 26 -; CHECK-NEXT: add 11, 11, 26 -; CHECK-NEXT: add 30, 30, 26 -; CHECK-NEXT: add 12, 12, 26 -; CHECK-NEXT: add 29, 29, 26 -; CHECK-NEXT: add 27, 27, 26 -; CHECK-NEXT: addi 23, 23, 1 -; CHECK-NEXT: cmpld 24, 4 +; CHECK-NEXT: addi 25, 25, 6 +; CHECK-NEXT: add 5, 5, 27 +; CHECK-NEXT: add 14, 14, 27 +; CHECK-NEXT: add 11, 11, 27 +; CHECK-NEXT: add 12, 12, 27 +; CHECK-NEXT: add 30, 30, 27 +; CHECK-NEXT: add 28, 28, 27 +; CHECK-NEXT: addi 24, 24, 1 +; CHECK-NEXT: cmpld 25, 0 ; CHECK-NEXT: ble 0, .LBB0_3 ; CHECK-NEXT: # %bb.6: # %_loop_1_loopHeader_._return_bb_crit_edge.loopexit ; CHECK-NEXT: ld 3, 40(1) # 8-byte Folded Reload @@ -284,84 +285,85 @@ define void @foo(i32* %.m, i32* %.n, [0 x %_elem_type_of_a]* %.a, [0 x %_elem_ty ; CHECK-NEXT: stxv 3, 0(3) ; CHECK-NEXT: ld 3, 56(1) # 8-byte Folded Reload ; CHECK-NEXT: stxv 2, 0(3) -; CHECK-NEXT: ld 3, 80(1) # 8-byte Folded Reload +; CHECK-NEXT: ld 3, 112(1) # 8-byte Folded Reload ; CHECK-NEXT: stxv 1, 0(3) -; CHECK-NEXT: ld 3, 32(1) # 8-byte Folded Reload -; CHECK-NEXT: stxv 43, 0(3) -; CHECK-NEXT: ld 3, 88(1) # 8-byte Folded Reload -; CHECK-NEXT: stxv 42, 0(9) -; CHECK-NEXT: stxv 0, 0(3) ; CHECK-NEXT: ld 3, 64(1) # 8-byte Folded Reload -; CHECK-NEXT: stxv 41, 0(3) +; CHECK-NEXT: stxv 43, 0(8) +; CHECK-NEXT: stxv 42, 0(3) +; CHECK-NEXT: ld 3, 120(1) # 8-byte Folded Reload +; CHECK-NEXT: stxv 0, 0(3) ; CHECK-NEXT: ld 3, 72(1) # 8-byte Folded Reload +; CHECK-NEXT: stxv 41, 0(3) +; CHECK-NEXT: ld 3, 80(1) # 8-byte Folded Reload ; CHECK-NEXT: stxv 40, 0(3) ; CHECK-NEXT: ld 3, 96(1) # 8-byte Folded Reload ; CHECK-NEXT: stxv 39, 0(3) ; CHECK-NEXT: ld 3, 104(1) # 8-byte Folded Reload ; CHECK-NEXT: stxv 38, 0(3) -; CHECK-NEXT: ld 3, 112(1) # 8-byte Folded Reload +; CHECK-NEXT: ld 3, 128(1) # 8-byte Folded Reload ; CHECK-NEXT: stxv 33, 0(3) -; CHECK-NEXT: ld 3, 120(1) # 8-byte Folded Reload +; CHECK-NEXT: ld 3, 136(1) # 8-byte Folded Reload ; CHECK-NEXT: stxv 32, 0(3) -; CHECK-NEXT: ld 3, 128(1) # 8-byte Folded Reload +; CHECK-NEXT: ld 3, 144(1) # 8-byte Folded Reload ; CHECK-NEXT: stxv 37, 0(3) -; CHECK-NEXT: ld 3, 136(1) # 8-byte Folded Reload +; CHECK-NEXT: ld 3, 152(1) # 8-byte Folded Reload ; CHECK-NEXT: stxv 36, 0(3) -; CHECK-NEXT: ld 3, 144(1) # 8-byte Folded Reload +; CHECK-NEXT: ld 3, 160(1) # 8-byte Folded Reload ; CHECK-NEXT: stxv 13, 0(3) -; CHECK-NEXT: ld 3, 152(1) # 8-byte Folded Reload +; CHECK-NEXT: ld 3, 168(1) # 8-byte Folded Reload ; CHECK-NEXT: stxv 12, 0(3) -; CHECK-NEXT: ld 3, 160(1) # 8-byte Folded Reload +; CHECK-NEXT: ld 3, 176(1) # 8-byte Folded Reload ; CHECK-NEXT: stxv 11, 0(3) -; CHECK-NEXT: ld 3, 168(1) # 8-byte Folded Reload +; CHECK-NEXT: ld 3, 184(1) # 8-byte Folded Reload ; CHECK-NEXT: stxv 9, 0(3) -; CHECK-NEXT: ld 3, 176(1) # 8-byte Folded Reload +; CHECK-NEXT: ld 3, 192(1) # 8-byte Folded Reload ; CHECK-NEXT: stxv 8, 0(3) -; CHECK-NEXT: ld 3, 184(1) # 8-byte Folded Reload +; CHECK-NEXT: ld 3, 200(1) # 8-byte Folded Reload ; CHECK-NEXT: stxv 6, 0(3) -; CHECK-NEXT: ld 3, 192(1) # 8-byte Folded Reload +; CHECK-NEXT: ld 3, 208(1) # 8-byte Folded Reload ; CHECK-NEXT: stxv 5, 0(3) -; CHECK-NEXT: ld 3, 200(1) # 8-byte Folded Reload +; CHECK-NEXT: ld 3, 216(1) # 8-byte Folded Reload ; CHECK-NEXT: stxv 7, 0(3) -; CHECK-NEXT: stxv 10, 0(8) +; CHECK-NEXT: ld 3, 88(1) # 8-byte Folded Reload +; CHECK-NEXT: stxv 10, 0(3) ; CHECK-NEXT: .LBB0_7: # %_return_bb -; CHECK-NEXT: lxv 63, 384(1) # 16-byte Folded Reload -; CHECK-NEXT: lxv 62, 368(1) # 16-byte Folded Reload -; CHECK-NEXT: lxv 61, 352(1) # 16-byte Folded Reload -; CHECK-NEXT: lxv 60, 336(1) # 16-byte Folded Reload -; CHECK-NEXT: lxv 59, 320(1) # 16-byte Folded Reload -; CHECK-NEXT: lxv 58, 304(1) # 16-byte Folded Reload -; CHECK-NEXT: lxv 57, 288(1) # 16-byte Folded Reload -; CHECK-NEXT: lxv 56, 272(1) # 16-byte Folded Reload -; CHECK-NEXT: lxv 55, 256(1) # 16-byte Folded Reload -; CHECK-NEXT: lxv 54, 240(1) # 16-byte Folded Reload -; CHECK-NEXT: lxv 53, 224(1) # 16-byte Folded Reload -; CHECK-NEXT: lxv 52, 208(1) # 16-byte Folded Reload -; CHECK-NEXT: lfd 31, 584(1) # 8-byte Folded Reload -; CHECK-NEXT: lfd 30, 576(1) # 8-byte Folded Reload -; CHECK-NEXT: lfd 29, 568(1) # 8-byte Folded Reload -; CHECK-NEXT: lfd 28, 560(1) # 8-byte Folded Reload -; CHECK-NEXT: lfd 27, 552(1) # 8-byte Folded Reload -; CHECK-NEXT: lfd 26, 544(1) # 8-byte Folded Reload -; CHECK-NEXT: ld 31, 536(1) # 8-byte Folded Reload -; CHECK-NEXT: ld 30, 528(1) # 8-byte Folded Reload -; CHECK-NEXT: ld 29, 520(1) # 8-byte Folded Reload -; CHECK-NEXT: ld 28, 512(1) # 8-byte Folded Reload -; CHECK-NEXT: ld 27, 504(1) # 8-byte Folded Reload -; CHECK-NEXT: ld 26, 496(1) # 8-byte Folded Reload -; CHECK-NEXT: ld 25, 488(1) # 8-byte Folded Reload -; CHECK-NEXT: ld 24, 480(1) # 8-byte Folded Reload -; CHECK-NEXT: ld 23, 472(1) # 8-byte Folded Reload -; CHECK-NEXT: ld 22, 464(1) # 8-byte Folded Reload -; CHECK-NEXT: ld 21, 456(1) # 8-byte Folded Reload -; CHECK-NEXT: ld 20, 448(1) # 8-byte Folded Reload -; CHECK-NEXT: ld 19, 440(1) # 8-byte Folded Reload -; CHECK-NEXT: ld 18, 432(1) # 8-byte Folded Reload -; CHECK-NEXT: ld 17, 424(1) # 8-byte Folded Reload -; CHECK-NEXT: ld 16, 416(1) # 8-byte Folded Reload -; CHECK-NEXT: ld 15, 408(1) # 8-byte Folded Reload -; CHECK-NEXT: ld 14, 400(1) # 8-byte Folded Reload -; CHECK-NEXT: addi 1, 1, 592 +; CHECK-NEXT: lxv 63, 400(1) # 16-byte Folded Reload +; CHECK-NEXT: lxv 62, 384(1) # 16-byte Folded Reload +; CHECK-NEXT: lxv 61, 368(1) # 16-byte Folded Reload +; CHECK-NEXT: lxv 60, 352(1) # 16-byte Folded Reload +; CHECK-NEXT: lxv 59, 336(1) # 16-byte Folded Reload +; CHECK-NEXT: lxv 58, 320(1) # 16-byte Folded Reload +; CHECK-NEXT: lxv 57, 304(1) # 16-byte Folded Reload +; CHECK-NEXT: lxv 56, 288(1) # 16-byte Folded Reload +; CHECK-NEXT: lxv 55, 272(1) # 16-byte Folded Reload +; CHECK-NEXT: lxv 54, 256(1) # 16-byte Folded Reload +; CHECK-NEXT: lxv 53, 240(1) # 16-byte Folded Reload +; CHECK-NEXT: lxv 52, 224(1) # 16-byte Folded Reload +; CHECK-NEXT: lfd 31, 600(1) # 8-byte Folded Reload +; CHECK-NEXT: lfd 30, 592(1) # 8-byte Folded Reload +; CHECK-NEXT: lfd 29, 584(1) # 8-byte Folded Reload +; CHECK-NEXT: lfd 28, 576(1) # 8-byte Folded Reload +; CHECK-NEXT: lfd 27, 568(1) # 8-byte Folded Reload +; CHECK-NEXT: lfd 26, 560(1) # 8-byte Folded Reload +; CHECK-NEXT: ld 31, 552(1) # 8-byte Folded Reload +; CHECK-NEXT: ld 30, 544(1) # 8-byte Folded Reload +; CHECK-NEXT: ld 29, 536(1) # 8-byte Folded Reload +; CHECK-NEXT: ld 28, 528(1) # 8-byte Folded Reload +; CHECK-NEXT: ld 27, 520(1) # 8-byte Folded Reload +; CHECK-NEXT: ld 26, 512(1) # 8-byte Folded Reload +; CHECK-NEXT: ld 25, 504(1) # 8-byte Folded Reload +; CHECK-NEXT: ld 24, 496(1) # 8-byte Folded Reload +; CHECK-NEXT: ld 23, 488(1) # 8-byte Folded Reload +; CHECK-NEXT: ld 22, 480(1) # 8-byte Folded Reload +; CHECK-NEXT: ld 21, 472(1) # 8-byte Folded Reload +; CHECK-NEXT: ld 20, 464(1) # 8-byte Folded Reload +; CHECK-NEXT: ld 19, 456(1) # 8-byte Folded Reload +; CHECK-NEXT: ld 18, 448(1) # 8-byte Folded Reload +; CHECK-NEXT: ld 17, 440(1) # 8-byte Folded Reload +; CHECK-NEXT: ld 16, 432(1) # 8-byte Folded Reload +; CHECK-NEXT: ld 15, 424(1) # 8-byte Folded Reload +; CHECK-NEXT: ld 14, 416(1) # 8-byte Folded Reload +; CHECK-NEXT: addi 1, 1, 608 ; CHECK-NEXT: blr entry: %_val_l_ = load i32, i32* %.l, align 4 diff --git a/llvm/test/CodeGen/PowerPC/p10-spill-creq.ll b/llvm/test/CodeGen/PowerPC/p10-spill-creq.ll index 27b7d2d47ebe3..e3d54cc38dd80 100644 --- a/llvm/test/CodeGen/PowerPC/p10-spill-creq.ll +++ b/llvm/test/CodeGen/PowerPC/p10-spill-creq.ll @@ -158,15 +158,15 @@ define dso_local double @P10_Spill_CR_EQ(%2* %arg) local_unnamed_addr #0 { ; CHECK-NEXT: mfocrf r8, 4 ; CHECK-NEXT: rlwimi r8, r9, 9, 23, 23 ; CHECK-NEXT: lwz r9, -4(r1) -; CHECK-NEXT: add r5, r7, r5 ; CHECK-NEXT: mtocrf 4, r8 ; CHECK-NEXT: isel r3, 0, r3, 4*cr5+lt ; CHECK-NEXT: setbc r8, 4*cr5+un ; CHECK-NEXT: isel r6, 0, r6, 4*cr5+gt -; CHECK-NEXT: isel r4, 0, r4, 4*cr5+eq +; CHECK-NEXT: add r5, r7, r5 +; CHECK-NEXT: add r5, r8, r5 ; CHECK-NEXT: mtocrf 128, r9 ; CHECK-NEXT: lwz r9, -8(r1) -; CHECK-NEXT: add r5, r8, r5 +; CHECK-NEXT: isel r4, 0, r4, 4*cr5+eq ; CHECK-NEXT: iseleq r3, 0, r3 ; CHECK-NEXT: mtfprd f0, r5 ; CHECK-NEXT: xscvsxddp f0, f0 @@ -174,13 +174,13 @@ define dso_local double @P10_Spill_CR_EQ(%2* %arg) local_unnamed_addr #0 { ; CHECK-NEXT: lwz r9, -12(r1) ; CHECK-NEXT: lwz r12, 8(r1) ; CHECK-NEXT: iseleq r6, 0, r6 -; CHECK-NEXT: add r3, r6, r3 ; CHECK-NEXT: xsmuldp f0, f0, f2 ; CHECK-NEXT: mtocrf 128, r9 ; CHECK-NEXT: mtocrf 32, r12 ; CHECK-NEXT: mtocrf 16, r12 ; CHECK-NEXT: mtocrf 8, r12 ; CHECK-NEXT: iseleq r4, 0, r4 +; CHECK-NEXT: add r3, r6, r3 ; CHECK-NEXT: add r3, r4, r3 ; CHECK-NEXT: mtfprd f1, r3 ; CHECK-NEXT: xscvsxddp f1, f1 diff --git a/llvm/test/CodeGen/PowerPC/pcrel-call-linkage-leaf.ll b/llvm/test/CodeGen/PowerPC/pcrel-call-linkage-leaf.ll index 37cf078f53bff..c65a202e21087 100644 --- a/llvm/test/CodeGen/PowerPC/pcrel-call-linkage-leaf.ll +++ b/llvm/test/CodeGen/PowerPC/pcrel-call-linkage-leaf.ll @@ -45,12 +45,12 @@ define dso_local signext i32 @AsmClobberX2WithTOC(i32 signext %a, i32 signext %b ; CHECK-LARGE: ld r2, .Lfunc_toc2-.Lfunc_gep2(r12) ; CHECK-LARGE: add r2, r2, r12 ; CHECK-S: .localentry AsmClobberX2WithTOC -; CHECK-S: add r3, r4, r3 -; CHECK-S-NEXT: #APP +; CHECK-S: #APP ; CHECK-S-NEXT: li r2, 0 ; CHECK-S-NEXT: #NO_APP -; CHECK-S-NEXT: plwz r4, global_int@PCREL(0), 1 -; CHECK-S-NEXT: add r3, r3, r4 +; CHECK-S-NEXT: plwz r5, global_int@PCREL(0), 1 +; CHECK-S-NEXT: add r3, r4, r3 +; CHECK-S-NEXT: add r3, r3, r5 ; CHECK-S-NEXT: extsw r3, r3 ; CHECK-S-NEXT: blr entry: diff --git a/llvm/test/CodeGen/PowerPC/read-set-flm.ll b/llvm/test/CodeGen/PowerPC/read-set-flm.ll index aa651e14f203a..1f4a905cd69e2 100644 --- a/llvm/test/CodeGen/PowerPC/read-set-flm.ll +++ b/llvm/test/CodeGen/PowerPC/read-set-flm.ll @@ -11,6 +11,7 @@ define double @in_nostrict(double %a, double %b, double %c, double %d) { ; CHECK-NEXT: xsdivdp 1, 1, 2 ; CHECK-NEXT: xsadddp 1, 1, 3 ; CHECK-NEXT: xsadddp 0, 1, 0 +; CHECK-NEXT: mffs 1 ; CHECK-NEXT: mtfsf 255, 4 ; CHECK-NEXT: xsdivdp 1, 3, 4 ; CHECK-NEXT: xsadddp 1, 1, 2 @@ -46,6 +47,7 @@ define double @in_strict(double %a, double %b, double %c, double %d) #0 { ; CHECK-NEXT: xsdivdp 1, 1, 2 ; CHECK-NEXT: xsadddp 1, 1, 3 ; CHECK-NEXT: xsadddp 0, 1, 0 +; CHECK-NEXT: mffs 1 ; CHECK-NEXT: mtfsf 255, 4 ; CHECK-NEXT: xsdivdp 1, 3, 4 ; CHECK-NEXT: xsadddp 1, 1, 2 @@ -74,9 +76,88 @@ entry: ret double %7 } +define void @cse_nomerge(double* %f1, double* %f2, double %f3) #0 { +; CHECK-LABEL: cse_nomerge: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: mflr 0 +; CHECK-NEXT: .cfi_def_cfa_offset 64 +; CHECK-NEXT: .cfi_offset lr, 16 +; CHECK-NEXT: .cfi_offset r30, -24 +; CHECK-NEXT: .cfi_offset f31, -8 +; CHECK-NEXT: std 30, -24(1) # 8-byte Folded Spill +; CHECK-NEXT: stfd 31, -8(1) # 8-byte Folded Spill +; CHECK-NEXT: std 0, 16(1) +; CHECK-NEXT: stdu 1, -64(1) +; CHECK-NEXT: fmr 31, 1 +; CHECK-NEXT: mr 30, 4 +; CHECK-NEXT: mffs 0 +; CHECK-NEXT: stfd 0, 0(3) +; CHECK-NEXT: bl effect_func +; CHECK-NEXT: nop +; CHECK-NEXT: mffs 0 +; CHECK-NEXT: stfd 0, 0(30) +; CHECK-NEXT: mffs 0 +; CHECK-NEXT: mtfsf 255, 31 +; CHECK-NEXT: addi 1, 1, 64 +; CHECK-NEXT: ld 0, 16(1) +; CHECK-NEXT: lfd 31, -8(1) # 8-byte Folded Reload +; CHECK-NEXT: ld 30, -24(1) # 8-byte Folded Reload +; CHECK-NEXT: mtlr 0 +; CHECK-NEXT: blr +entry: + %0 = call double @llvm.ppc.readflm() + store double %0, double* %f1, align 8 + call void @effect_func() + %1 = call double @llvm.ppc.readflm() + store double %1, double* %f2, align 8 + %2 = call contract double @llvm.ppc.setflm(double %f3) + ret void +} + +define void @cse_nomerge_readonly(double* %f1, double* %f2, double %f3) #0 { +; CHECK-LABEL: cse_nomerge_readonly: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: mflr 0 +; CHECK-NEXT: .cfi_def_cfa_offset 64 +; CHECK-NEXT: .cfi_offset lr, 16 +; CHECK-NEXT: .cfi_offset r30, -24 +; CHECK-NEXT: .cfi_offset f31, -8 +; CHECK-NEXT: std 30, -24(1) # 8-byte Folded Spill +; CHECK-NEXT: stfd 31, -8(1) # 8-byte Folded Spill +; CHECK-NEXT: std 0, 16(1) +; CHECK-NEXT: stdu 1, -64(1) +; CHECK-NEXT: fmr 31, 1 +; CHECK-NEXT: mr 30, 4 +; CHECK-NEXT: mffs 0 +; CHECK-NEXT: stfd 0, 0(3) +; CHECK-NEXT: bl readonly_func +; CHECK-NEXT: nop +; CHECK-NEXT: mffs 0 +; CHECK-NEXT: stfd 0, 0(30) +; CHECK-NEXT: mffs 0 +; CHECK-NEXT: mtfsf 255, 31 +; CHECK-NEXT: addi 1, 1, 64 +; CHECK-NEXT: ld 0, 16(1) +; CHECK-NEXT: lfd 31, -8(1) # 8-byte Folded Reload +; CHECK-NEXT: ld 30, -24(1) # 8-byte Folded Reload +; CHECK-NEXT: mtlr 0 +; CHECK-NEXT: blr +entry: + %0 = call double @llvm.ppc.readflm() + store double %0, double* %f1, align 8 + call void @readonly_func() + %1 = call double @llvm.ppc.readflm() + store double %1, double* %f2, align 8 + %2 = call contract double @llvm.ppc.setflm(double %f3) + ret void +} + +declare void @effect_func() +declare void @readonly_func() #1 declare double @llvm.ppc.readflm() declare double @llvm.ppc.setflm(double) declare double @llvm.experimental.constrained.fadd.f64(double, double, metadata, metadata) declare double @llvm.experimental.constrained.fdiv.f64(double, double, metadata, metadata) attributes #0 = { strictfp } +attributes #1 = { readonly } diff --git a/llvm/test/CodeGen/PowerPC/scalar_vector_test_3.ll b/llvm/test/CodeGen/PowerPC/scalar_vector_test_3.ll index f4572c3599421..e2291ea2a9fb3 100644 --- a/llvm/test/CodeGen/PowerPC/scalar_vector_test_3.ll +++ b/llvm/test/CodeGen/PowerPC/scalar_vector_test_3.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mcpu=pwr9 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \ ; RUN: -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s --check-prefix=P9LE ; RUN: llc -mcpu=pwr9 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \ @@ -14,24 +15,27 @@ define <2 x i64> @s2v_test1(i32* nocapture readonly %int32, <2 x i64> %vec) { ; P9LE-NEXT: lfiwax f0, 0, r3 ; P9LE-NEXT: xxmrghd v2, v2, vs0 ; P9LE-NEXT: blr - +; ; P9BE-LABEL: s2v_test1: ; P9BE: # %bb.0: # %entry ; P9BE-NEXT: lfiwax f0, 0, r3 ; P9BE-NEXT: xxpermdi v2, vs0, v2, 1 ; P9BE-NEXT: blr - +; ; P8LE-LABEL: s2v_test1: ; P8LE: # %bb.0: # %entry ; P8LE-NEXT: lfiwax f0, 0, r3 ; P8LE-NEXT: xxmrghd v2, v2, vs0 ; P8LE-NEXT: blr - +; ; P8BE-LABEL: s2v_test1: ; P8BE: # %bb.0: # %entry ; P8BE-NEXT: lfiwax f0, 0, r3 ; P8BE-NEXT: xxpermdi v2, vs0, v2, 1 ; P8BE-NEXT: blr + + + entry: %0 = load i32, i32* %int32, align 4 %conv = sext i32 %0 to i64 @@ -47,27 +51,30 @@ define <2 x i64> @s2v_test2(i32* nocapture readonly %int32, <2 x i64> %vec) { ; P9LE-NEXT: lfiwax f0, 0, r3 ; P9LE-NEXT: xxmrghd v2, v2, vs0 ; P9LE-NEXT: blr - +; ; P9BE-LABEL: s2v_test2: ; P9BE: # %bb.0: # %entry ; P9BE-NEXT: addi r3, r3, 4 ; P9BE-NEXT: lfiwax f0, 0, r3 ; P9BE-NEXT: xxpermdi v2, vs0, v2, 1 ; P9BE-NEXT: blr - +; ; P8LE-LABEL: s2v_test2: ; P8LE: # %bb.0: # %entry ; P8LE-NEXT: addi r3, r3, 4 ; P8LE-NEXT: lfiwax f0, 0, r3 ; P8LE-NEXT: xxmrghd v2, v2, vs0 ; P8LE-NEXT: blr - +; ; P8BE-LABEL: s2v_test2: ; P8BE: # %bb.0: # %entry ; P8BE-NEXT: addi r3, r3, 4 ; P8BE-NEXT: lfiwax f0, 0, r3 ; P8BE-NEXT: xxpermdi v2, vs0, v2, 1 ; P8BE-NEXT: blr + + + entry: %arrayidx = getelementptr inbounds i32, i32* %int32, i64 1 %0 = load i32, i32* %arrayidx, align 4 @@ -84,27 +91,30 @@ define <2 x i64> @s2v_test3(i32* nocapture readonly %int32, <2 x i64> %vec, i32 ; P9LE-NEXT: lfiwax f0, r3, r4 ; P9LE-NEXT: xxmrghd v2, v2, vs0 ; P9LE-NEXT: blr - +; ; P9BE-LABEL: s2v_test3: ; P9BE: # %bb.0: # %entry ; P9BE-NEXT: sldi r4, r7, 2 ; P9BE-NEXT: lfiwax f0, r3, r4 ; P9BE-NEXT: xxpermdi v2, vs0, v2, 1 ; P9BE-NEXT: blr - +; ; P8LE-LABEL: s2v_test3: ; P8LE: # %bb.0: # %entry ; P8LE-NEXT: sldi r4, r7, 2 ; P8LE-NEXT: lfiwax f0, r3, r4 ; P8LE-NEXT: xxmrghd v2, v2, vs0 ; P8LE-NEXT: blr - +; ; P8BE-LABEL: s2v_test3: ; P8BE: # %bb.0: # %entry ; P8BE-NEXT: sldi r4, r7, 2 ; P8BE-NEXT: lfiwax f0, r3, r4 ; P8BE-NEXT: xxpermdi v2, vs0, v2, 1 ; P8BE-NEXT: blr + + + entry: %idxprom = sext i32 %Idx to i64 %arrayidx = getelementptr inbounds i32, i32* %int32, i64 %idxprom @@ -122,27 +132,30 @@ define <2 x i64> @s2v_test4(i32* nocapture readonly %int32, <2 x i64> %vec) { ; P9LE-NEXT: lfiwax f0, 0, r3 ; P9LE-NEXT: xxmrghd v2, v2, vs0 ; P9LE-NEXT: blr - +; ; P9BE-LABEL: s2v_test4: ; P9BE: # %bb.0: # %entry ; P9BE-NEXT: addi r3, r3, 4 ; P9BE-NEXT: lfiwax f0, 0, r3 ; P9BE-NEXT: xxpermdi v2, vs0, v2, 1 ; P9BE-NEXT: blr - +; ; P8LE-LABEL: s2v_test4: ; P8LE: # %bb.0: # %entry ; P8LE-NEXT: addi r3, r3, 4 ; P8LE-NEXT: lfiwax f0, 0, r3 ; P8LE-NEXT: xxmrghd v2, v2, vs0 ; P8LE-NEXT: blr - +; ; P8BE-LABEL: s2v_test4: ; P8BE: # %bb.0: # %entry ; P8BE-NEXT: addi r3, r3, 4 ; P8BE-NEXT: lfiwax f0, 0, r3 ; P8BE-NEXT: xxpermdi v2, vs0, v2, 1 ; P8BE-NEXT: blr + + + entry: %arrayidx = getelementptr inbounds i32, i32* %int32, i64 1 %0 = load i32, i32* %arrayidx, align 4 @@ -158,24 +171,27 @@ define <2 x i64> @s2v_test5(<2 x i64> %vec, i32* nocapture readonly %ptr1) { ; P9LE-NEXT: lfiwax f0, 0, r5 ; P9LE-NEXT: xxmrghd v2, v2, vs0 ; P9LE-NEXT: blr - +; ; P9BE-LABEL: s2v_test5: ; P9BE: # %bb.0: # %entry ; P9BE-NEXT: lfiwax f0, 0, r5 ; P9BE-NEXT: xxpermdi v2, vs0, v2, 1 ; P9BE-NEXT: blr - +; ; P8LE-LABEL: s2v_test5: ; P8LE: # %bb.0: # %entry ; P8LE-NEXT: lfiwax f0, 0, r5 ; P8LE-NEXT: xxmrghd v2, v2, vs0 ; P8LE-NEXT: blr - +; ; P8BE-LABEL: s2v_test5: ; P8BE: # %bb.0: # %entry ; P8BE-NEXT: lfiwax f0, 0, r5 ; P8BE-NEXT: xxpermdi v2, vs0, v2, 1 ; P8BE-NEXT: blr + + + entry: %0 = load i32, i32* %ptr1, align 4 %conv = sext i32 %0 to i64 @@ -188,26 +204,29 @@ define <2 x i64> @s2v_test6(i32* nocapture readonly %ptr) { ; P9LE-LABEL: s2v_test6: ; P9LE: # %bb.0: # %entry ; P9LE-NEXT: lfiwax f0, 0, r3 -; P9LE-NEXT: xxspltd v2, vs0, 0 +; P9LE-NEXT: xxspltd v2, f0, 0 ; P9LE-NEXT: blr - +; ; P9BE-LABEL: s2v_test6: ; P9BE: # %bb.0: # %entry ; P9BE-NEXT: lfiwax f0, 0, r3 -; P9BE-NEXT: xxspltd v2, vs0, 0 +; P9BE-NEXT: xxspltd v2, f0, 0 ; P9BE-NEXT: blr - +; ; P8LE-LABEL: s2v_test6: ; P8LE: # %bb.0: # %entry ; P8LE-NEXT: lfiwax f0, 0, r3 -; P8LE-NEXT: xxspltd v2, vs0, 0 +; P8LE-NEXT: xxspltd v2, f0, 0 ; P8LE-NEXT: blr - +; ; P8BE-LABEL: s2v_test6: ; P8BE: # %bb.0: # %entry ; P8BE-NEXT: lfiwax f0, 0, r3 -; P8BE-NEXT: xxspltd v2, vs0, 0 +; P8BE-NEXT: xxspltd v2, f0, 0 ; P8BE-NEXT: blr + + + entry: %0 = load i32, i32* %ptr, align 4 %conv = sext i32 %0 to i64 @@ -221,26 +240,29 @@ define <2 x i64> @s2v_test7(i32* nocapture readonly %ptr) { ; P9LE-LABEL: s2v_test7: ; P9LE: # %bb.0: # %entry ; P9LE-NEXT: lfiwax f0, 0, r3 -; P9LE-NEXT: xxspltd v2, vs0, 0 +; P9LE-NEXT: xxspltd v2, f0, 0 ; P9LE-NEXT: blr - +; ; P9BE-LABEL: s2v_test7: ; P9BE: # %bb.0: # %entry ; P9BE-NEXT: lfiwax f0, 0, r3 -; P9BE-NEXT: xxspltd v2, vs0, 0 +; P9BE-NEXT: xxspltd v2, f0, 0 ; P9BE-NEXT: blr - +; ; P8LE-LABEL: s2v_test7: ; P8LE: # %bb.0: # %entry ; P8LE-NEXT: lfiwax f0, 0, r3 -; P8LE-NEXT: xxspltd v2, vs0, 0 +; P8LE-NEXT: xxspltd v2, f0, 0 ; P8LE-NEXT: blr - +; ; P8BE-LABEL: s2v_test7: ; P8BE: # %bb.0: # %entry ; P8BE-NEXT: lfiwax f0, 0, r3 -; P8BE-NEXT: xxspltd v2, vs0, 0 +; P8BE-NEXT: xxspltd v2, f0, 0 ; P8BE-NEXT: blr + + + entry: %0 = load i32, i32* %ptr, align 4 %conv = sext i32 %0 to i64 diff --git a/llvm/test/CodeGen/PowerPC/stack-protector.ll b/llvm/test/CodeGen/PowerPC/stack-protector.ll index 1fcce55087326..0c59bb7e0a277 100644 --- a/llvm/test/CodeGen/PowerPC/stack-protector.ll +++ b/llvm/test/CodeGen/PowerPC/stack-protector.ll @@ -3,16 +3,24 @@ ; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux < %s | FileCheck -check-prefix=LINUX64 %s ; RUN: llc -verify-machineinstrs -mtriple=powerpc-ibm-aix-xcoff < %s | FileCheck -check-prefix=AIX32 %s ; RUN: llc -verify-machineinstrs -mtriple=powerpc64-ibm-aix-xcoff < %s | FileCheck -check-prefix=AIX64 %s +; RUN: llc -verify-machineinstrs -mtriple=powerpc-unknown-freebsd13 < %s | FileCheck -check-prefix=FREEBSD32 %s +; RUN: llc -verify-machineinstrs -mtriple=powerpcle-unknown-freebsd13 < %s | FileCheck -check-prefix=FREEBSD32 %s +; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-freebsd13 < %s | FileCheck -check-prefix=FREEBSD64 %s +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-freebsd13 < %s | FileCheck -check-prefix=FREEBSD64 %s -; LINUX32: lwz {{[0-9]+}}, -28680(2) -; LINUX64: ld {{[0-9]+}}, -28688(13) +; LINUX32: lwz [[#]], -28680(2) +; LINUX64: ld [[#]], -28688(13) ; AIX32: lwz {{.*}}__ssp_canary_word ; AIX64: ld {{.*}}__ssp_canary_word +; FREEBSD32: lwz [[#]], __stack_chk_guard@l([[#]]) +; FREEBSD64: ld [[#]], .LC0@toc@l([[#]]) ; LINUX32: __stack_chk_fail ; LINUX64: __stack_chk_fail ; AIX32: __stack_chk_fail ; AIX64: __stack_chk_fail +; FREEBSD32: bl __stack_chk_fail +; FREEBSD64: bl __stack_chk_fail @"\01LC" = internal constant [11 x i8] c"buf == %s\0A\00" ; <[11 x i8]*> [#uses=1] diff --git a/llvm/test/CodeGen/RISCV/addimm-mulimm.ll b/llvm/test/CodeGen/RISCV/addimm-mulimm.ll index 761a20e957a45..399361e7fb39e 100644 --- a/llvm/test/CodeGen/RISCV/addimm-mulimm.ll +++ b/llvm/test/CodeGen/RISCV/addimm-mulimm.ll @@ -559,9 +559,9 @@ define i64 @add_mul_combine_infinite_loop(i64 %x) { ; ; RV64IMB-LABEL: add_mul_combine_infinite_loop: ; RV64IMB: # %bb.0: +; RV64IMB-NEXT: addi a0, a0, 86 ; RV64IMB-NEXT: sh1add a0, a0, a0 -; RV64IMB-NEXT: lui a1, 1 -; RV64IMB-NEXT: addiw a1, a1, -2048 +; RV64IMB-NEXT: addi a1, zero, -16 ; RV64IMB-NEXT: sh3add a0, a0, a1 ; RV64IMB-NEXT: ret %tmp0 = mul i64 %x, 24 @@ -572,22 +572,20 @@ define i64 @add_mul_combine_infinite_loop(i64 %x) { define i32 @mul3000_add8990_a(i32 %x) { ; RV32IMB-LABEL: mul3000_add8990_a: ; RV32IMB: # %bb.0: +; RV32IMB-NEXT: addi a0, a0, 3 ; RV32IMB-NEXT: lui a1, 1 ; RV32IMB-NEXT: addi a1, a1, -1096 ; RV32IMB-NEXT: mul a0, a0, a1 -; RV32IMB-NEXT: lui a1, 2 -; RV32IMB-NEXT: addi a1, a1, 798 -; RV32IMB-NEXT: add a0, a0, a1 +; RV32IMB-NEXT: addi a0, a0, -10 ; RV32IMB-NEXT: ret ; ; RV64IMB-LABEL: mul3000_add8990_a: ; RV64IMB: # %bb.0: +; RV64IMB-NEXT: addiw a0, a0, 3 ; RV64IMB-NEXT: lui a1, 1 ; RV64IMB-NEXT: addiw a1, a1, -1096 ; RV64IMB-NEXT: mulw a0, a0, a1 -; RV64IMB-NEXT: lui a1, 2 -; RV64IMB-NEXT: addiw a1, a1, 798 -; RV64IMB-NEXT: addw a0, a0, a1 +; RV64IMB-NEXT: addiw a0, a0, -10 ; RV64IMB-NEXT: ret %tmp0 = mul i32 %x, 3000 %tmp1 = add i32 %tmp0, 8990 @@ -597,22 +595,20 @@ define i32 @mul3000_add8990_a(i32 %x) { define signext i32 @mul3000_add8990_b(i32 signext %x) { ; RV32IMB-LABEL: mul3000_add8990_b: ; RV32IMB: # %bb.0: +; RV32IMB-NEXT: addi a0, a0, 3 ; RV32IMB-NEXT: lui a1, 1 ; RV32IMB-NEXT: addi a1, a1, -1096 ; RV32IMB-NEXT: mul a0, a0, a1 -; RV32IMB-NEXT: lui a1, 2 -; RV32IMB-NEXT: addi a1, a1, 798 -; RV32IMB-NEXT: add a0, a0, a1 +; RV32IMB-NEXT: addi a0, a0, -10 ; RV32IMB-NEXT: ret ; ; RV64IMB-LABEL: mul3000_add8990_b: ; RV64IMB: # %bb.0: +; RV64IMB-NEXT: addiw a0, a0, 3 ; RV64IMB-NEXT: lui a1, 1 ; RV64IMB-NEXT: addiw a1, a1, -1096 ; RV64IMB-NEXT: mulw a0, a0, a1 -; RV64IMB-NEXT: lui a1, 2 -; RV64IMB-NEXT: addiw a1, a1, 798 -; RV64IMB-NEXT: addw a0, a0, a1 +; RV64IMB-NEXT: addiw a0, a0, -10 ; RV64IMB-NEXT: ret %tmp0 = mul i32 %x, 3000 %tmp1 = add i32 %tmp0, 8990 @@ -637,12 +633,11 @@ define i64 @mul3000_add8990_c(i64 %x) { ; ; RV64IMB-LABEL: mul3000_add8990_c: ; RV64IMB: # %bb.0: +; RV64IMB-NEXT: addi a0, a0, 3 ; RV64IMB-NEXT: lui a1, 1 ; RV64IMB-NEXT: addiw a1, a1, -1096 ; RV64IMB-NEXT: mul a0, a0, a1 -; RV64IMB-NEXT: lui a1, 2 -; RV64IMB-NEXT: addiw a1, a1, 798 -; RV64IMB-NEXT: add a0, a0, a1 +; RV64IMB-NEXT: addi a0, a0, -10 ; RV64IMB-NEXT: ret %tmp0 = mul i64 %x, 3000 %tmp1 = add i64 %tmp0, 8990 @@ -652,22 +647,20 @@ define i64 @mul3000_add8990_c(i64 %x) { define i32 @mul3000_sub8990_a(i32 %x) { ; RV32IMB-LABEL: mul3000_sub8990_a: ; RV32IMB: # %bb.0: +; RV32IMB-NEXT: addi a0, a0, -3 ; RV32IMB-NEXT: lui a1, 1 ; RV32IMB-NEXT: addi a1, a1, -1096 ; RV32IMB-NEXT: mul a0, a0, a1 -; RV32IMB-NEXT: lui a1, 1048574 -; RV32IMB-NEXT: addi a1, a1, -798 -; RV32IMB-NEXT: add a0, a0, a1 +; RV32IMB-NEXT: addi a0, a0, 10 ; RV32IMB-NEXT: ret ; ; RV64IMB-LABEL: mul3000_sub8990_a: ; RV64IMB: # %bb.0: +; RV64IMB-NEXT: addiw a0, a0, -3 ; RV64IMB-NEXT: lui a1, 1 ; RV64IMB-NEXT: addiw a1, a1, -1096 ; RV64IMB-NEXT: mulw a0, a0, a1 -; RV64IMB-NEXT: lui a1, 1048574 -; RV64IMB-NEXT: addiw a1, a1, -798 -; RV64IMB-NEXT: addw a0, a0, a1 +; RV64IMB-NEXT: addiw a0, a0, 10 ; RV64IMB-NEXT: ret %tmp0 = mul i32 %x, 3000 %tmp1 = add i32 %tmp0, -8990 @@ -677,22 +670,20 @@ define i32 @mul3000_sub8990_a(i32 %x) { define signext i32 @mul3000_sub8990_b(i32 signext %x) { ; RV32IMB-LABEL: mul3000_sub8990_b: ; RV32IMB: # %bb.0: +; RV32IMB-NEXT: addi a0, a0, -3 ; RV32IMB-NEXT: lui a1, 1 ; RV32IMB-NEXT: addi a1, a1, -1096 ; RV32IMB-NEXT: mul a0, a0, a1 -; RV32IMB-NEXT: lui a1, 1048574 -; RV32IMB-NEXT: addi a1, a1, -798 -; RV32IMB-NEXT: add a0, a0, a1 +; RV32IMB-NEXT: addi a0, a0, 10 ; RV32IMB-NEXT: ret ; ; RV64IMB-LABEL: mul3000_sub8990_b: ; RV64IMB: # %bb.0: +; RV64IMB-NEXT: addiw a0, a0, -3 ; RV64IMB-NEXT: lui a1, 1 ; RV64IMB-NEXT: addiw a1, a1, -1096 ; RV64IMB-NEXT: mulw a0, a0, a1 -; RV64IMB-NEXT: lui a1, 1048574 -; RV64IMB-NEXT: addiw a1, a1, -798 -; RV64IMB-NEXT: addw a0, a0, a1 +; RV64IMB-NEXT: addiw a0, a0, 10 ; RV64IMB-NEXT: ret %tmp0 = mul i32 %x, 3000 %tmp1 = add i32 %tmp0, -8990 @@ -718,12 +709,11 @@ define i64 @mul3000_sub8990_c(i64 %x) { ; ; RV64IMB-LABEL: mul3000_sub8990_c: ; RV64IMB: # %bb.0: +; RV64IMB-NEXT: addi a0, a0, -3 ; RV64IMB-NEXT: lui a1, 1 ; RV64IMB-NEXT: addiw a1, a1, -1096 ; RV64IMB-NEXT: mul a0, a0, a1 -; RV64IMB-NEXT: lui a1, 1048574 -; RV64IMB-NEXT: addiw a1, a1, -798 -; RV64IMB-NEXT: add a0, a0, a1 +; RV64IMB-NEXT: addi a0, a0, 10 ; RV64IMB-NEXT: ret %tmp0 = mul i64 %x, 3000 %tmp1 = add i64 %tmp0, -8990 @@ -733,22 +723,20 @@ define i64 @mul3000_sub8990_c(i64 %x) { define i32 @mulneg3000_add8990_a(i32 %x) { ; RV32IMB-LABEL: mulneg3000_add8990_a: ; RV32IMB: # %bb.0: +; RV32IMB-NEXT: addi a0, a0, -3 ; RV32IMB-NEXT: lui a1, 1048575 ; RV32IMB-NEXT: addi a1, a1, 1096 ; RV32IMB-NEXT: mul a0, a0, a1 -; RV32IMB-NEXT: lui a1, 2 -; RV32IMB-NEXT: addi a1, a1, 798 -; RV32IMB-NEXT: add a0, a0, a1 +; RV32IMB-NEXT: addi a0, a0, -10 ; RV32IMB-NEXT: ret ; ; RV64IMB-LABEL: mulneg3000_add8990_a: ; RV64IMB: # %bb.0: +; RV64IMB-NEXT: addiw a0, a0, -3 ; RV64IMB-NEXT: lui a1, 1048575 ; RV64IMB-NEXT: addiw a1, a1, 1096 ; RV64IMB-NEXT: mulw a0, a0, a1 -; RV64IMB-NEXT: lui a1, 2 -; RV64IMB-NEXT: addiw a1, a1, 798 -; RV64IMB-NEXT: addw a0, a0, a1 +; RV64IMB-NEXT: addiw a0, a0, -10 ; RV64IMB-NEXT: ret %tmp0 = mul i32 %x, -3000 %tmp1 = add i32 %tmp0, 8990 @@ -758,22 +746,20 @@ define i32 @mulneg3000_add8990_a(i32 %x) { define signext i32 @mulneg3000_add8990_b(i32 signext %x) { ; RV32IMB-LABEL: mulneg3000_add8990_b: ; RV32IMB: # %bb.0: +; RV32IMB-NEXT: addi a0, a0, -3 ; RV32IMB-NEXT: lui a1, 1048575 ; RV32IMB-NEXT: addi a1, a1, 1096 ; RV32IMB-NEXT: mul a0, a0, a1 -; RV32IMB-NEXT: lui a1, 2 -; RV32IMB-NEXT: addi a1, a1, 798 -; RV32IMB-NEXT: add a0, a0, a1 +; RV32IMB-NEXT: addi a0, a0, -10 ; RV32IMB-NEXT: ret ; ; RV64IMB-LABEL: mulneg3000_add8990_b: ; RV64IMB: # %bb.0: +; RV64IMB-NEXT: addiw a0, a0, -3 ; RV64IMB-NEXT: lui a1, 1048575 ; RV64IMB-NEXT: addiw a1, a1, 1096 ; RV64IMB-NEXT: mulw a0, a0, a1 -; RV64IMB-NEXT: lui a1, 2 -; RV64IMB-NEXT: addiw a1, a1, 798 -; RV64IMB-NEXT: addw a0, a0, a1 +; RV64IMB-NEXT: addiw a0, a0, -10 ; RV64IMB-NEXT: ret %tmp0 = mul i32 %x, -3000 %tmp1 = add i32 %tmp0, 8990 @@ -799,12 +785,11 @@ define i64 @mulneg3000_add8990_c(i64 %x) { ; ; RV64IMB-LABEL: mulneg3000_add8990_c: ; RV64IMB: # %bb.0: +; RV64IMB-NEXT: addi a0, a0, -3 ; RV64IMB-NEXT: lui a1, 1048575 ; RV64IMB-NEXT: addiw a1, a1, 1096 ; RV64IMB-NEXT: mul a0, a0, a1 -; RV64IMB-NEXT: lui a1, 2 -; RV64IMB-NEXT: addiw a1, a1, 798 -; RV64IMB-NEXT: add a0, a0, a1 +; RV64IMB-NEXT: addi a0, a0, -10 ; RV64IMB-NEXT: ret %tmp0 = mul i64 %x, -3000 %tmp1 = add i64 %tmp0, 8990 @@ -814,22 +799,20 @@ define i64 @mulneg3000_add8990_c(i64 %x) { define i32 @mulneg3000_sub8990_a(i32 %x) { ; RV32IMB-LABEL: mulneg3000_sub8990_a: ; RV32IMB: # %bb.0: +; RV32IMB-NEXT: addi a0, a0, 3 ; RV32IMB-NEXT: lui a1, 1048575 ; RV32IMB-NEXT: addi a1, a1, 1096 ; RV32IMB-NEXT: mul a0, a0, a1 -; RV32IMB-NEXT: lui a1, 1048574 -; RV32IMB-NEXT: addi a1, a1, -798 -; RV32IMB-NEXT: add a0, a0, a1 +; RV32IMB-NEXT: addi a0, a0, 10 ; RV32IMB-NEXT: ret ; ; RV64IMB-LABEL: mulneg3000_sub8990_a: ; RV64IMB: # %bb.0: +; RV64IMB-NEXT: addiw a0, a0, 3 ; RV64IMB-NEXT: lui a1, 1048575 ; RV64IMB-NEXT: addiw a1, a1, 1096 ; RV64IMB-NEXT: mulw a0, a0, a1 -; RV64IMB-NEXT: lui a1, 1048574 -; RV64IMB-NEXT: addiw a1, a1, -798 -; RV64IMB-NEXT: addw a0, a0, a1 +; RV64IMB-NEXT: addiw a0, a0, 10 ; RV64IMB-NEXT: ret %tmp0 = mul i32 %x, -3000 %tmp1 = add i32 %tmp0, -8990 @@ -839,22 +822,20 @@ define i32 @mulneg3000_sub8990_a(i32 %x) { define signext i32 @mulneg3000_sub8990_b(i32 signext %x) { ; RV32IMB-LABEL: mulneg3000_sub8990_b: ; RV32IMB: # %bb.0: +; RV32IMB-NEXT: addi a0, a0, 3 ; RV32IMB-NEXT: lui a1, 1048575 ; RV32IMB-NEXT: addi a1, a1, 1096 ; RV32IMB-NEXT: mul a0, a0, a1 -; RV32IMB-NEXT: lui a1, 1048574 -; RV32IMB-NEXT: addi a1, a1, -798 -; RV32IMB-NEXT: add a0, a0, a1 +; RV32IMB-NEXT: addi a0, a0, 10 ; RV32IMB-NEXT: ret ; ; RV64IMB-LABEL: mulneg3000_sub8990_b: ; RV64IMB: # %bb.0: +; RV64IMB-NEXT: addiw a0, a0, 3 ; RV64IMB-NEXT: lui a1, 1048575 ; RV64IMB-NEXT: addiw a1, a1, 1096 ; RV64IMB-NEXT: mulw a0, a0, a1 -; RV64IMB-NEXT: lui a1, 1048574 -; RV64IMB-NEXT: addiw a1, a1, -798 -; RV64IMB-NEXT: addw a0, a0, a1 +; RV64IMB-NEXT: addiw a0, a0, 10 ; RV64IMB-NEXT: ret %tmp0 = mul i32 %x, -3000 %tmp1 = add i32 %tmp0, -8990 @@ -881,12 +862,11 @@ define i64 @mulneg3000_sub8990_c(i64 %x) { ; ; RV64IMB-LABEL: mulneg3000_sub8990_c: ; RV64IMB: # %bb.0: +; RV64IMB-NEXT: addi a0, a0, 3 ; RV64IMB-NEXT: lui a1, 1048575 ; RV64IMB-NEXT: addiw a1, a1, 1096 ; RV64IMB-NEXT: mul a0, a0, a1 -; RV64IMB-NEXT: lui a1, 1048574 -; RV64IMB-NEXT: addiw a1, a1, -798 -; RV64IMB-NEXT: add a0, a0, a1 +; RV64IMB-NEXT: addi a0, a0, 10 ; RV64IMB-NEXT: ret %tmp0 = mul i64 %x, -3000 %tmp1 = add i64 %tmp0, -8990 diff --git a/llvm/test/CodeGen/RISCV/attributes.ll b/llvm/test/CodeGen/RISCV/attributes.ll index 2be0535f678d5..c0944a533762a 100644 --- a/llvm/test/CodeGen/RISCV/attributes.ll +++ b/llvm/test/CodeGen/RISCV/attributes.ll @@ -6,6 +6,7 @@ ; RUN: llc -mtriple=riscv32 -mattr=+d %s -o - | FileCheck --check-prefix=RV32D %s ; RUN: llc -mtriple=riscv32 -mattr=+c %s -o - | FileCheck --check-prefix=RV32C %s ; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+experimental-zvamo,+experimental-zvlsseg %s -o - | FileCheck --check-prefix=RV32V %s +; RUN: llc -mtriple=riscv32 -mattr=+experimental-zfhmin %s -o - | FileCheck --check-prefix=RV32ZFHMIN %s ; RUN: llc -mtriple=riscv32 -mattr=+experimental-zfh %s -o - | FileCheck --check-prefix=RV32ZFH %s ; RUN: llc -mtriple=riscv32 -mattr=+experimental-zba %s -o - | FileCheck --check-prefix=RV32ZBA %s ; RUN: llc -mtriple=riscv32 -mattr=+experimental-zbb %s -o - | FileCheck --check-prefix=RV32ZBB %s @@ -24,6 +25,7 @@ ; RUN: llc -mtriple=riscv64 -mattr=+d %s -o - | FileCheck --check-prefix=RV64D %s ; RUN: llc -mtriple=riscv64 -mattr=+c %s -o - | FileCheck --check-prefix=RV64C %s ; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+experimental-zvamo,+experimental-zvlsseg %s -o - | FileCheck --check-prefix=RV64V %s +; RUN: llc -mtriple=riscv64 -mattr=+experimental-zfhmin %s -o - | FileCheck --check-prefix=RV64ZFHMIN %s ; RUN: llc -mtriple=riscv64 -mattr=+experimental-zfh %s -o - | FileCheck --check-prefix=RV64ZFH %s ; RUN: llc -mtriple=riscv64 -mattr=+experimental-zba %s -o - | FileCheck --check-prefix=RV64ZBA %s ; RUN: llc -mtriple=riscv64 -mattr=+experimental-zbb %s -o - | FileCheck --check-prefix=RV64ZBB %s @@ -43,7 +45,8 @@ ; RV32D: .attribute 5, "rv32i2p0_f2p0_d2p0" ; RV32C: .attribute 5, "rv32i2p0_c2p0" ; RV32V: .attribute 5, "rv32i2p0_v0p10_zvamo0p10_zvlsseg0p10" -; RV32ZFH: .attribute 5, "rv32i2p0_f2p0_zfh0p1" +; RV32ZFHMIN: .attribute 5, "rv32i2p0_f2p0_zfhmin0p1" +; RV32ZFH: .attribute 5, "rv32i2p0_f2p0_zfh0p1_zfhmin0p1" ; RV32ZBA: .attribute 5, "rv32i2p0_zba1p0" ; RV32ZBB: .attribute 5, "rv32i2p0_zbb1p0" ; RV32ZBC: .attribute 5, "rv32i2p0_zbc1p0" @@ -54,14 +57,15 @@ ; RV32ZBR: .attribute 5, "rv32i2p0_zbr0p93" ; RV32ZBS: .attribute 5, "rv32i2p0_zbs1p0" ; RV32ZBT: .attribute 5, "rv32i2p0_zbt0p93" -; RV32COMBINED: .attribute 5, "rv32i2p0_f2p0_v0p10_zfh0p1_zbb1p0_zvamo0p10_zvlsseg0p10" +; RV32COMBINED: .attribute 5, "rv32i2p0_f2p0_v0p10_zfh0p1_zfhmin0p1_zbb1p0_zvamo0p10_zvlsseg0p10" ; RV64M: .attribute 5, "rv64i2p0_m2p0" ; RV64A: .attribute 5, "rv64i2p0_a2p0" ; RV64F: .attribute 5, "rv64i2p0_f2p0" ; RV64D: .attribute 5, "rv64i2p0_f2p0_d2p0" ; RV64C: .attribute 5, "rv64i2p0_c2p0" -; RV64ZFH: .attribute 5, "rv64i2p0_f2p0_zfh0p1" +; RV64ZFHMIN: .attribute 5, "rv64i2p0_f2p0_zfhmin0p1" +; RV64ZFH: .attribute 5, "rv64i2p0_f2p0_zfh0p1_zfhmin0p1" ; RV64ZBA: .attribute 5, "rv64i2p0_zba1p0" ; RV64ZBB: .attribute 5, "rv64i2p0_zbb1p0" ; RV64ZBC: .attribute 5, "rv64i2p0_zbc1p0" @@ -73,7 +77,7 @@ ; RV64ZBS: .attribute 5, "rv64i2p0_zbs1p0" ; RV64ZBT: .attribute 5, "rv64i2p0_zbt0p93" ; RV64V: .attribute 5, "rv64i2p0_v0p10_zvamo0p10_zvlsseg0p10" -; RV64COMBINED: .attribute 5, "rv64i2p0_f2p0_v0p10_zfh0p1_zbb1p0_zvamo0p10_zvlsseg0p10" +; RV64COMBINED: .attribute 5, "rv64i2p0_f2p0_v0p10_zfh0p1_zfhmin0p1_zbb1p0_zvamo0p10_zvlsseg0p10" define i32 @addi(i32 %a) { diff --git a/llvm/test/CodeGen/RISCV/live-sp.mir b/llvm/test/CodeGen/RISCV/live-sp.mir new file mode 100644 index 0000000000000..2210fd1a8a927 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/live-sp.mir @@ -0,0 +1,94 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -march=riscv64 -run-pass=prologepilog -simplify-mir -o - %s | FileCheck %s +# verify live-on-entry registers are not marked killed by spills +--- | + + declare void @vararg(i32 signext, ...) + + define void @test1() { + entry: + %a = alloca i32, align 4 + %0 = call i8* @llvm.returnaddress(i32 0) + %1 = ptrtoint i8* %0 to i64 + %conv = trunc i64 %1 to i32 + store i32 %conv, i32* %a, align 4 + %2 = load i32, i32* %a, align 4 + call void (i32, ...) @vararg(i32 signext 0, i32 signext %2) + ret void + } + + ; Function Attrs: nofree nosync nounwind readnone willreturn + declare i8* @llvm.returnaddress(i32 immarg) #0 + + attributes #0 = { nofree nosync nounwind readnone willreturn } + +... +--- +name: test1 +alignment: 4 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +failedISel: false +tracksRegLiveness: true +hasWinCFI: false +registers: [] +liveins: + - { reg: '$x1', virtual-reg: '' } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: true + hasStackMap: false + hasPatchPoint: false + stackSize: 0 + offsetAdjustment: 0 + maxAlignment: 4 + adjustsStack: false + hasCalls: true + stackProtector: '' + maxCallFrameSize: 4294967295 + cvBytesOfCalleeSavedRegisters: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false + hasTailCall: false + localFrameSize: 0 + savePoint: '' + restorePoint: '' +fixedStack: [] +stack: + - { id: 0, name: a, type: default, offset: 0, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } +callSites: [] +debugValueSubstitutions: [] +constants: [] +machineFunctionInfo: {} +body: | + bb.0.entry: + liveins: $x1 + + ; CHECK-LABEL: name: test1 + ; CHECK: liveins: $x1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $x2 = frame-setup ADDI $x2, -16 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 16 + ; CHECK-NEXT: SD $x1, $x2, 8 :: (store (s64) into %stack.1) + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $x1, -8 + ; CHECK-NEXT: SW renamable $x1, $x2, 4 :: (store (s32) into %ir.a) + ; CHECK-NEXT: renamable $x11 = ADDIW killed renamable $x1, 0 + ; CHECK-NEXT: $x10 = COPY $x0 + ; CHECK-NEXT: PseudoCALL target-flags(riscv-plt) @vararg, csr_ilp32_lp64, implicit-def dead $x1, implicit killed $x10, implicit $x11, implicit-def $x2 + ; CHECK-NEXT: $x1 = LD $x2, 8 :: (load (s64) from %stack.1) + ; CHECK-NEXT: $x2 = frame-destroy ADDI $x2, 16 + ; CHECK-NEXT: PseudoRET + SW renamable $x1, %stack.0.a, 0 :: (store (s32) into %ir.a) + renamable $x11 = ADDIW killed renamable $x1, 0 + ADJCALLSTACKDOWN 0, 0, implicit-def dead $x2, implicit $x2 + $x10 = COPY $x0 + PseudoCALL target-flags(riscv-plt) @vararg, csr_ilp32_lp64, implicit-def dead $x1, implicit killed $x10, implicit $x11, implicit-def $x2 + ADJCALLSTACKUP 0, 0, implicit-def dead $x2, implicit $x2 + PseudoRET + +... diff --git a/llvm/test/CodeGen/RISCV/rv32zbb-zbp.ll b/llvm/test/CodeGen/RISCV/rv32zbb-zbp.ll index 4307ea01be5a8..f0cb94814613e 100644 --- a/llvm/test/CodeGen/RISCV/rv32zbb-zbp.ll +++ b/llvm/test/CodeGen/RISCV/rv32zbb-zbp.ll @@ -176,124 +176,76 @@ declare i64 @llvm.fshl.i64(i64, i64, i64) define i64 @rol_i64(i64 %a, i64 %b) nounwind { ; RV32I-LABEL: rol_i64: ; RV32I: # %bb.0: -; RV32I-NEXT: mv a7, a1 -; RV32I-NEXT: andi a1, a2, 63 -; RV32I-NEXT: addi t0, a1, -32 -; RV32I-NEXT: addi a6, zero, 31 -; RV32I-NEXT: bltz t0, .LBB7_2 +; RV32I-NEXT: srli a3, a2, 5 +; RV32I-NEXT: andi a3, a3, 1 +; RV32I-NEXT: mv a4, a1 +; RV32I-NEXT: bnez a3, .LBB7_2 ; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: sll a1, a0, t0 -; RV32I-NEXT: j .LBB7_3 +; RV32I-NEXT: mv a4, a0 ; RV32I-NEXT: .LBB7_2: -; RV32I-NEXT: sll a3, a7, a2 -; RV32I-NEXT: sub a1, a6, a1 -; RV32I-NEXT: srli a4, a0, 1 -; RV32I-NEXT: srl a1, a4, a1 -; RV32I-NEXT: or a1, a3, a1 -; RV32I-NEXT: .LBB7_3: -; RV32I-NEXT: neg a5, a2 -; RV32I-NEXT: andi a3, a5, 63 -; RV32I-NEXT: addi a4, a3, -32 -; RV32I-NEXT: bltz a4, .LBB7_5 -; RV32I-NEXT: # %bb.4: -; RV32I-NEXT: srl a3, a7, a4 -; RV32I-NEXT: bltz t0, .LBB7_6 -; RV32I-NEXT: j .LBB7_7 -; RV32I-NEXT: .LBB7_5: -; RV32I-NEXT: srl a4, a7, a5 -; RV32I-NEXT: or a1, a1, a4 -; RV32I-NEXT: srl a4, a0, a5 -; RV32I-NEXT: sub a3, a6, a3 -; RV32I-NEXT: slli a5, a7, 1 -; RV32I-NEXT: sll a3, a5, a3 -; RV32I-NEXT: or a3, a4, a3 -; RV32I-NEXT: bgez t0, .LBB7_7 -; RV32I-NEXT: .LBB7_6: +; RV32I-NEXT: sll a6, a4, a2 +; RV32I-NEXT: bnez a3, .LBB7_4 +; RV32I-NEXT: # %bb.3: +; RV32I-NEXT: mv a0, a1 +; RV32I-NEXT: .LBB7_4: +; RV32I-NEXT: srli a1, a0, 1 +; RV32I-NEXT: not a5, a2 +; RV32I-NEXT: srl a1, a1, a5 +; RV32I-NEXT: or a3, a6, a1 ; RV32I-NEXT: sll a0, a0, a2 -; RV32I-NEXT: or a3, a3, a0 -; RV32I-NEXT: .LBB7_7: +; RV32I-NEXT: srli a1, a4, 1 +; RV32I-NEXT: srl a1, a1, a5 +; RV32I-NEXT: or a1, a0, a1 ; RV32I-NEXT: mv a0, a3 ; RV32I-NEXT: ret ; ; RV32ZBB-LABEL: rol_i64: ; RV32ZBB: # %bb.0: -; RV32ZBB-NEXT: mv a7, a1 -; RV32ZBB-NEXT: andi a1, a2, 63 -; RV32ZBB-NEXT: addi t0, a1, -32 -; RV32ZBB-NEXT: addi a6, zero, 31 -; RV32ZBB-NEXT: bltz t0, .LBB7_2 +; RV32ZBB-NEXT: srli a3, a2, 5 +; RV32ZBB-NEXT: andi a3, a3, 1 +; RV32ZBB-NEXT: mv a4, a1 +; RV32ZBB-NEXT: bnez a3, .LBB7_2 ; RV32ZBB-NEXT: # %bb.1: -; RV32ZBB-NEXT: sll a1, a0, t0 -; RV32ZBB-NEXT: j .LBB7_3 +; RV32ZBB-NEXT: mv a4, a0 ; RV32ZBB-NEXT: .LBB7_2: -; RV32ZBB-NEXT: sll a3, a7, a2 -; RV32ZBB-NEXT: sub a1, a6, a1 -; RV32ZBB-NEXT: srli a4, a0, 1 -; RV32ZBB-NEXT: srl a1, a4, a1 -; RV32ZBB-NEXT: or a1, a3, a1 -; RV32ZBB-NEXT: .LBB7_3: -; RV32ZBB-NEXT: neg a5, a2 -; RV32ZBB-NEXT: andi a3, a5, 63 -; RV32ZBB-NEXT: addi a4, a3, -32 -; RV32ZBB-NEXT: bltz a4, .LBB7_5 -; RV32ZBB-NEXT: # %bb.4: -; RV32ZBB-NEXT: srl a3, a7, a4 -; RV32ZBB-NEXT: bltz t0, .LBB7_6 -; RV32ZBB-NEXT: j .LBB7_7 -; RV32ZBB-NEXT: .LBB7_5: -; RV32ZBB-NEXT: srl a4, a7, a5 -; RV32ZBB-NEXT: or a1, a1, a4 -; RV32ZBB-NEXT: srl a4, a0, a5 -; RV32ZBB-NEXT: sub a3, a6, a3 -; RV32ZBB-NEXT: slli a5, a7, 1 -; RV32ZBB-NEXT: sll a3, a5, a3 -; RV32ZBB-NEXT: or a3, a4, a3 -; RV32ZBB-NEXT: bgez t0, .LBB7_7 -; RV32ZBB-NEXT: .LBB7_6: +; RV32ZBB-NEXT: sll a6, a4, a2 +; RV32ZBB-NEXT: bnez a3, .LBB7_4 +; RV32ZBB-NEXT: # %bb.3: +; RV32ZBB-NEXT: mv a0, a1 +; RV32ZBB-NEXT: .LBB7_4: +; RV32ZBB-NEXT: srli a1, a0, 1 +; RV32ZBB-NEXT: not a5, a2 +; RV32ZBB-NEXT: srl a1, a1, a5 +; RV32ZBB-NEXT: or a3, a6, a1 ; RV32ZBB-NEXT: sll a0, a0, a2 -; RV32ZBB-NEXT: or a3, a3, a0 -; RV32ZBB-NEXT: .LBB7_7: +; RV32ZBB-NEXT: srli a1, a4, 1 +; RV32ZBB-NEXT: srl a1, a1, a5 +; RV32ZBB-NEXT: or a1, a0, a1 ; RV32ZBB-NEXT: mv a0, a3 ; RV32ZBB-NEXT: ret ; ; RV32ZBP-LABEL: rol_i64: ; RV32ZBP: # %bb.0: -; RV32ZBP-NEXT: mv a7, a1 -; RV32ZBP-NEXT: andi a1, a2, 63 -; RV32ZBP-NEXT: addi t0, a1, -32 -; RV32ZBP-NEXT: addi a6, zero, 31 -; RV32ZBP-NEXT: bltz t0, .LBB7_2 +; RV32ZBP-NEXT: srli a3, a2, 5 +; RV32ZBP-NEXT: andi a3, a3, 1 +; RV32ZBP-NEXT: mv a4, a1 +; RV32ZBP-NEXT: bnez a3, .LBB7_2 ; RV32ZBP-NEXT: # %bb.1: -; RV32ZBP-NEXT: sll a1, a0, t0 -; RV32ZBP-NEXT: j .LBB7_3 +; RV32ZBP-NEXT: mv a4, a0 ; RV32ZBP-NEXT: .LBB7_2: -; RV32ZBP-NEXT: sll a3, a7, a2 -; RV32ZBP-NEXT: sub a1, a6, a1 -; RV32ZBP-NEXT: srli a4, a0, 1 -; RV32ZBP-NEXT: srl a1, a4, a1 -; RV32ZBP-NEXT: or a1, a3, a1 -; RV32ZBP-NEXT: .LBB7_3: -; RV32ZBP-NEXT: neg a5, a2 -; RV32ZBP-NEXT: andi a3, a5, 63 -; RV32ZBP-NEXT: addi a4, a3, -32 -; RV32ZBP-NEXT: bltz a4, .LBB7_5 -; RV32ZBP-NEXT: # %bb.4: -; RV32ZBP-NEXT: srl a3, a7, a4 -; RV32ZBP-NEXT: bltz t0, .LBB7_6 -; RV32ZBP-NEXT: j .LBB7_7 -; RV32ZBP-NEXT: .LBB7_5: -; RV32ZBP-NEXT: srl a4, a7, a5 -; RV32ZBP-NEXT: or a1, a1, a4 -; RV32ZBP-NEXT: srl a4, a0, a5 -; RV32ZBP-NEXT: sub a3, a6, a3 -; RV32ZBP-NEXT: slli a5, a7, 1 -; RV32ZBP-NEXT: sll a3, a5, a3 -; RV32ZBP-NEXT: or a3, a4, a3 -; RV32ZBP-NEXT: bgez t0, .LBB7_7 -; RV32ZBP-NEXT: .LBB7_6: +; RV32ZBP-NEXT: sll a6, a4, a2 +; RV32ZBP-NEXT: bnez a3, .LBB7_4 +; RV32ZBP-NEXT: # %bb.3: +; RV32ZBP-NEXT: mv a0, a1 +; RV32ZBP-NEXT: .LBB7_4: +; RV32ZBP-NEXT: srli a1, a0, 1 +; RV32ZBP-NEXT: not a5, a2 +; RV32ZBP-NEXT: srl a1, a1, a5 +; RV32ZBP-NEXT: or a3, a6, a1 ; RV32ZBP-NEXT: sll a0, a0, a2 -; RV32ZBP-NEXT: or a3, a3, a0 -; RV32ZBP-NEXT: .LBB7_7: +; RV32ZBP-NEXT: srli a1, a4, 1 +; RV32ZBP-NEXT: srl a1, a1, a5 +; RV32ZBP-NEXT: or a1, a0, a1 ; RV32ZBP-NEXT: mv a0, a3 ; RV32ZBP-NEXT: ret %or = tail call i64 @llvm.fshl.i64(i64 %a, i64 %a, i64 %b) @@ -332,125 +284,71 @@ declare i64 @llvm.fshr.i64(i64, i64, i64) define i64 @ror_i64(i64 %a, i64 %b) nounwind { ; RV32I-LABEL: ror_i64: ; RV32I: # %bb.0: -; RV32I-NEXT: mv t0, a0 -; RV32I-NEXT: andi a0, a2, 63 -; RV32I-NEXT: addi a7, a0, -32 -; RV32I-NEXT: addi a6, zero, 31 -; RV32I-NEXT: bltz a7, .LBB9_2 +; RV32I-NEXT: andi a4, a2, 32 +; RV32I-NEXT: mv a3, a0 +; RV32I-NEXT: beqz a4, .LBB9_2 ; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: srl a0, a1, a7 -; RV32I-NEXT: j .LBB9_3 +; RV32I-NEXT: mv a3, a1 ; RV32I-NEXT: .LBB9_2: -; RV32I-NEXT: srl a3, t0, a2 -; RV32I-NEXT: sub a0, a6, a0 -; RV32I-NEXT: slli a4, a1, 1 -; RV32I-NEXT: sll a0, a4, a0 -; RV32I-NEXT: or a0, a3, a0 -; RV32I-NEXT: .LBB9_3: -; RV32I-NEXT: neg a5, a2 -; RV32I-NEXT: andi a4, a5, 63 -; RV32I-NEXT: addi a3, a4, -32 -; RV32I-NEXT: bltz a3, .LBB9_5 -; RV32I-NEXT: # %bb.4: -; RV32I-NEXT: sll a3, t0, a3 -; RV32I-NEXT: bltz a7, .LBB9_6 -; RV32I-NEXT: j .LBB9_7 -; RV32I-NEXT: .LBB9_5: -; RV32I-NEXT: sll a3, t0, a5 -; RV32I-NEXT: or a0, a0, a3 -; RV32I-NEXT: sll a3, a1, a5 -; RV32I-NEXT: sub a4, a6, a4 -; RV32I-NEXT: srli a5, t0, 1 -; RV32I-NEXT: srl a4, a5, a4 -; RV32I-NEXT: or a3, a3, a4 -; RV32I-NEXT: bgez a7, .LBB9_7 -; RV32I-NEXT: .LBB9_6: +; RV32I-NEXT: srl a5, a3, a2 +; RV32I-NEXT: beqz a4, .LBB9_4 +; RV32I-NEXT: # %bb.3: +; RV32I-NEXT: mv a1, a0 +; RV32I-NEXT: .LBB9_4: +; RV32I-NEXT: slli a0, a1, 1 +; RV32I-NEXT: not a4, a2 +; RV32I-NEXT: sll a0, a0, a4 +; RV32I-NEXT: or a0, a0, a5 ; RV32I-NEXT: srl a1, a1, a2 -; RV32I-NEXT: or a3, a3, a1 -; RV32I-NEXT: .LBB9_7: -; RV32I-NEXT: mv a1, a3 +; RV32I-NEXT: slli a2, a3, 1 +; RV32I-NEXT: sll a2, a2, a4 +; RV32I-NEXT: or a1, a2, a1 ; RV32I-NEXT: ret ; ; RV32ZBB-LABEL: ror_i64: ; RV32ZBB: # %bb.0: -; RV32ZBB-NEXT: mv t0, a0 -; RV32ZBB-NEXT: andi a0, a2, 63 -; RV32ZBB-NEXT: addi a7, a0, -32 -; RV32ZBB-NEXT: addi a6, zero, 31 -; RV32ZBB-NEXT: bltz a7, .LBB9_2 +; RV32ZBB-NEXT: andi a4, a2, 32 +; RV32ZBB-NEXT: mv a3, a0 +; RV32ZBB-NEXT: beqz a4, .LBB9_2 ; RV32ZBB-NEXT: # %bb.1: -; RV32ZBB-NEXT: srl a0, a1, a7 -; RV32ZBB-NEXT: j .LBB9_3 +; RV32ZBB-NEXT: mv a3, a1 ; RV32ZBB-NEXT: .LBB9_2: -; RV32ZBB-NEXT: srl a3, t0, a2 -; RV32ZBB-NEXT: sub a0, a6, a0 -; RV32ZBB-NEXT: slli a4, a1, 1 -; RV32ZBB-NEXT: sll a0, a4, a0 -; RV32ZBB-NEXT: or a0, a3, a0 -; RV32ZBB-NEXT: .LBB9_3: -; RV32ZBB-NEXT: neg a5, a2 -; RV32ZBB-NEXT: andi a4, a5, 63 -; RV32ZBB-NEXT: addi a3, a4, -32 -; RV32ZBB-NEXT: bltz a3, .LBB9_5 -; RV32ZBB-NEXT: # %bb.4: -; RV32ZBB-NEXT: sll a3, t0, a3 -; RV32ZBB-NEXT: bltz a7, .LBB9_6 -; RV32ZBB-NEXT: j .LBB9_7 -; RV32ZBB-NEXT: .LBB9_5: -; RV32ZBB-NEXT: sll a3, t0, a5 -; RV32ZBB-NEXT: or a0, a0, a3 -; RV32ZBB-NEXT: sll a3, a1, a5 -; RV32ZBB-NEXT: sub a4, a6, a4 -; RV32ZBB-NEXT: srli a5, t0, 1 -; RV32ZBB-NEXT: srl a4, a5, a4 -; RV32ZBB-NEXT: or a3, a3, a4 -; RV32ZBB-NEXT: bgez a7, .LBB9_7 -; RV32ZBB-NEXT: .LBB9_6: +; RV32ZBB-NEXT: srl a5, a3, a2 +; RV32ZBB-NEXT: beqz a4, .LBB9_4 +; RV32ZBB-NEXT: # %bb.3: +; RV32ZBB-NEXT: mv a1, a0 +; RV32ZBB-NEXT: .LBB9_4: +; RV32ZBB-NEXT: slli a0, a1, 1 +; RV32ZBB-NEXT: not a4, a2 +; RV32ZBB-NEXT: sll a0, a0, a4 +; RV32ZBB-NEXT: or a0, a0, a5 ; RV32ZBB-NEXT: srl a1, a1, a2 -; RV32ZBB-NEXT: or a3, a3, a1 -; RV32ZBB-NEXT: .LBB9_7: -; RV32ZBB-NEXT: mv a1, a3 +; RV32ZBB-NEXT: slli a2, a3, 1 +; RV32ZBB-NEXT: sll a2, a2, a4 +; RV32ZBB-NEXT: or a1, a2, a1 ; RV32ZBB-NEXT: ret ; ; RV32ZBP-LABEL: ror_i64: ; RV32ZBP: # %bb.0: -; RV32ZBP-NEXT: mv t0, a0 -; RV32ZBP-NEXT: andi a0, a2, 63 -; RV32ZBP-NEXT: addi a7, a0, -32 -; RV32ZBP-NEXT: addi a6, zero, 31 -; RV32ZBP-NEXT: bltz a7, .LBB9_2 +; RV32ZBP-NEXT: andi a4, a2, 32 +; RV32ZBP-NEXT: mv a3, a0 +; RV32ZBP-NEXT: beqz a4, .LBB9_2 ; RV32ZBP-NEXT: # %bb.1: -; RV32ZBP-NEXT: srl a0, a1, a7 -; RV32ZBP-NEXT: j .LBB9_3 +; RV32ZBP-NEXT: mv a3, a1 ; RV32ZBP-NEXT: .LBB9_2: -; RV32ZBP-NEXT: srl a3, t0, a2 -; RV32ZBP-NEXT: sub a0, a6, a0 -; RV32ZBP-NEXT: slli a4, a1, 1 -; RV32ZBP-NEXT: sll a0, a4, a0 -; RV32ZBP-NEXT: or a0, a3, a0 -; RV32ZBP-NEXT: .LBB9_3: -; RV32ZBP-NEXT: neg a5, a2 -; RV32ZBP-NEXT: andi a4, a5, 63 -; RV32ZBP-NEXT: addi a3, a4, -32 -; RV32ZBP-NEXT: bltz a3, .LBB9_5 -; RV32ZBP-NEXT: # %bb.4: -; RV32ZBP-NEXT: sll a3, t0, a3 -; RV32ZBP-NEXT: bltz a7, .LBB9_6 -; RV32ZBP-NEXT: j .LBB9_7 -; RV32ZBP-NEXT: .LBB9_5: -; RV32ZBP-NEXT: sll a3, t0, a5 -; RV32ZBP-NEXT: or a0, a0, a3 -; RV32ZBP-NEXT: sll a3, a1, a5 -; RV32ZBP-NEXT: sub a4, a6, a4 -; RV32ZBP-NEXT: srli a5, t0, 1 -; RV32ZBP-NEXT: srl a4, a5, a4 -; RV32ZBP-NEXT: or a3, a3, a4 -; RV32ZBP-NEXT: bgez a7, .LBB9_7 -; RV32ZBP-NEXT: .LBB9_6: +; RV32ZBP-NEXT: srl a5, a3, a2 +; RV32ZBP-NEXT: beqz a4, .LBB9_4 +; RV32ZBP-NEXT: # %bb.3: +; RV32ZBP-NEXT: mv a1, a0 +; RV32ZBP-NEXT: .LBB9_4: +; RV32ZBP-NEXT: slli a0, a1, 1 +; RV32ZBP-NEXT: not a4, a2 +; RV32ZBP-NEXT: sll a0, a0, a4 +; RV32ZBP-NEXT: or a0, a0, a5 ; RV32ZBP-NEXT: srl a1, a1, a2 -; RV32ZBP-NEXT: or a3, a3, a1 -; RV32ZBP-NEXT: .LBB9_7: -; RV32ZBP-NEXT: mv a1, a3 +; RV32ZBP-NEXT: slli a2, a3, 1 +; RV32ZBP-NEXT: sll a2, a2, a4 +; RV32ZBP-NEXT: or a1, a2, a1 ; RV32ZBP-NEXT: ret %or = tail call i64 @llvm.fshr.i64(i64 %a, i64 %a, i64 %b) ret i64 %or @@ -501,8 +399,8 @@ define i32 @rori_i32_fshr(i32 %a) nounwind { define i64 @rori_i64(i64 %a) nounwind { ; RV32I-LABEL: rori_i64: ; RV32I: # %bb.0: -; RV32I-NEXT: slli a2, a1, 31 -; RV32I-NEXT: srli a3, a0, 1 +; RV32I-NEXT: srli a2, a0, 1 +; RV32I-NEXT: slli a3, a1, 31 ; RV32I-NEXT: or a2, a3, a2 ; RV32I-NEXT: srli a1, a1, 1 ; RV32I-NEXT: slli a0, a0, 31 @@ -512,8 +410,8 @@ define i64 @rori_i64(i64 %a) nounwind { ; ; RV32ZBB-LABEL: rori_i64: ; RV32ZBB: # %bb.0: -; RV32ZBB-NEXT: slli a2, a1, 31 -; RV32ZBB-NEXT: srli a3, a0, 1 +; RV32ZBB-NEXT: srli a2, a0, 1 +; RV32ZBB-NEXT: slli a3, a1, 31 ; RV32ZBB-NEXT: or a2, a3, a2 ; RV32ZBB-NEXT: srli a1, a1, 1 ; RV32ZBB-NEXT: slli a0, a0, 31 @@ -523,8 +421,8 @@ define i64 @rori_i64(i64 %a) nounwind { ; ; RV32ZBP-LABEL: rori_i64: ; RV32ZBP: # %bb.0: -; RV32ZBP-NEXT: slli a2, a1, 31 -; RV32ZBP-NEXT: srli a3, a0, 1 +; RV32ZBP-NEXT: srli a2, a0, 1 +; RV32ZBP-NEXT: slli a3, a1, 31 ; RV32ZBP-NEXT: or a2, a3, a2 ; RV32ZBP-NEXT: srli a1, a1, 1 ; RV32ZBP-NEXT: slli a0, a0, 31 @@ -538,8 +436,8 @@ define i64 @rori_i64(i64 %a) nounwind { define i64 @rori_i64_fshr(i64 %a) nounwind { ; RV32I-LABEL: rori_i64_fshr: ; RV32I: # %bb.0: -; RV32I-NEXT: slli a2, a0, 1 -; RV32I-NEXT: srli a3, a1, 31 +; RV32I-NEXT: srli a2, a1, 31 +; RV32I-NEXT: slli a3, a0, 1 ; RV32I-NEXT: or a2, a3, a2 ; RV32I-NEXT: srli a0, a0, 31 ; RV32I-NEXT: slli a1, a1, 1 @@ -549,8 +447,8 @@ define i64 @rori_i64_fshr(i64 %a) nounwind { ; ; RV32ZBB-LABEL: rori_i64_fshr: ; RV32ZBB: # %bb.0: -; RV32ZBB-NEXT: slli a2, a0, 1 -; RV32ZBB-NEXT: srli a3, a1, 31 +; RV32ZBB-NEXT: srli a2, a1, 31 +; RV32ZBB-NEXT: slli a3, a0, 1 ; RV32ZBB-NEXT: or a2, a3, a2 ; RV32ZBB-NEXT: srli a0, a0, 31 ; RV32ZBB-NEXT: slli a1, a1, 1 @@ -560,8 +458,8 @@ define i64 @rori_i64_fshr(i64 %a) nounwind { ; ; RV32ZBP-LABEL: rori_i64_fshr: ; RV32ZBP: # %bb.0: -; RV32ZBP-NEXT: slli a2, a0, 1 -; RV32ZBP-NEXT: srli a3, a1, 31 +; RV32ZBP-NEXT: srli a2, a1, 31 +; RV32ZBP-NEXT: slli a3, a0, 1 ; RV32ZBP-NEXT: or a2, a3, a2 ; RV32ZBP-NEXT: srli a0, a0, 31 ; RV32ZBP-NEXT: slli a1, a1, 1 diff --git a/llvm/test/CodeGen/RISCV/rv32zbt.ll b/llvm/test/CodeGen/RISCV/rv32zbt.ll index 6a298c423ad69..9cb081c1c70a7 100644 --- a/llvm/test/CodeGen/RISCV/rv32zbt.ll +++ b/llvm/test/CodeGen/RISCV/rv32zbt.ll @@ -340,82 +340,44 @@ declare i64 @llvm.fshl.i64(i64, i64, i64) define i64 @fshl_i64(i64 %a, i64 %b, i64 %c) nounwind { ; RV32I-LABEL: fshl_i64: ; RV32I: # %bb.0: -; RV32I-NEXT: andi a5, a4, 63 -; RV32I-NEXT: addi a7, a5, -32 -; RV32I-NEXT: addi a6, zero, 31 -; RV32I-NEXT: bltz a7, .LBB13_2 +; RV32I-NEXT: srli a5, a4, 5 +; RV32I-NEXT: andi a5, a5, 1 +; RV32I-NEXT: mv a6, a3 +; RV32I-NEXT: bnez a5, .LBB13_2 ; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: sll a1, a0, a7 -; RV32I-NEXT: j .LBB13_3 +; RV32I-NEXT: mv a6, a0 ; RV32I-NEXT: .LBB13_2: -; RV32I-NEXT: sll t0, a1, a4 -; RV32I-NEXT: sub a5, a6, a5 -; RV32I-NEXT: srli a1, a0, 1 -; RV32I-NEXT: srl a1, a1, a5 -; RV32I-NEXT: or a1, t0, a1 -; RV32I-NEXT: .LBB13_3: -; RV32I-NEXT: not t2, a4 -; RV32I-NEXT: andi t1, t2, 63 -; RV32I-NEXT: addi a5, t1, -32 -; RV32I-NEXT: srli t0, a3, 1 -; RV32I-NEXT: bltz a5, .LBB13_5 -; RV32I-NEXT: # %bb.4: -; RV32I-NEXT: srl a2, t0, a5 -; RV32I-NEXT: bltz a7, .LBB13_6 -; RV32I-NEXT: j .LBB13_7 -; RV32I-NEXT: .LBB13_5: -; RV32I-NEXT: srl a5, t0, t2 -; RV32I-NEXT: or a1, a1, a5 -; RV32I-NEXT: slli a3, a3, 31 +; RV32I-NEXT: sll a7, a6, a4 +; RV32I-NEXT: bnez a5, .LBB13_4 +; RV32I-NEXT: # %bb.3: +; RV32I-NEXT: mv a2, a3 +; RV32I-NEXT: .LBB13_4: ; RV32I-NEXT: srli a2, a2, 1 -; RV32I-NEXT: or a2, a2, a3 -; RV32I-NEXT: srl a2, a2, t2 -; RV32I-NEXT: sub a3, a6, t1 -; RV32I-NEXT: slli a5, t0, 1 -; RV32I-NEXT: sll a3, a5, a3 -; RV32I-NEXT: or a2, a2, a3 -; RV32I-NEXT: bgez a7, .LBB13_7 +; RV32I-NEXT: not a3, a4 +; RV32I-NEXT: srl a2, a2, a3 +; RV32I-NEXT: or a2, a7, a2 +; RV32I-NEXT: bnez a5, .LBB13_6 +; RV32I-NEXT: # %bb.5: +; RV32I-NEXT: mv a0, a1 ; RV32I-NEXT: .LBB13_6: ; RV32I-NEXT: sll a0, a0, a4 -; RV32I-NEXT: or a2, a2, a0 -; RV32I-NEXT: .LBB13_7: +; RV32I-NEXT: srli a1, a6, 1 +; RV32I-NEXT: srl a1, a1, a3 +; RV32I-NEXT: or a1, a0, a1 ; RV32I-NEXT: mv a0, a2 ; RV32I-NEXT: ret ; ; RV32ZBT-LABEL: fshl_i64: ; RV32ZBT: # %bb.0: -; RV32ZBT-NEXT: sll a7, a1, a4 -; RV32ZBT-NEXT: andi a5, a4, 63 -; RV32ZBT-NEXT: addi a6, zero, 31 -; RV32ZBT-NEXT: sub t0, a6, a5 -; RV32ZBT-NEXT: srli a1, a0, 1 -; RV32ZBT-NEXT: srl a1, a1, t0 -; RV32ZBT-NEXT: or a7, a7, a1 -; RV32ZBT-NEXT: addi t1, a5, -32 -; RV32ZBT-NEXT: sll t0, a0, t1 -; RV32ZBT-NEXT: slti a1, t1, 0 -; RV32ZBT-NEXT: cmov t0, a1, a7, t0 -; RV32ZBT-NEXT: not a5, a4 -; RV32ZBT-NEXT: srli a7, a3, 1 -; RV32ZBT-NEXT: srl t4, a7, a5 -; RV32ZBT-NEXT: andi t2, a5, 63 -; RV32ZBT-NEXT: addi t3, t2, -32 -; RV32ZBT-NEXT: srai a1, t3, 31 -; RV32ZBT-NEXT: and a1, a1, t4 -; RV32ZBT-NEXT: or a1, t0, a1 -; RV32ZBT-NEXT: fsri a2, a2, a3, 1 -; RV32ZBT-NEXT: srl a2, a2, a5 -; RV32ZBT-NEXT: sub a3, a6, t2 -; RV32ZBT-NEXT: slli a5, a7, 1 -; RV32ZBT-NEXT: sll a3, a5, a3 -; RV32ZBT-NEXT: or a2, a2, a3 -; RV32ZBT-NEXT: srl a3, a7, t3 -; RV32ZBT-NEXT: slti a5, t3, 0 +; RV32ZBT-NEXT: srli a5, a4, 5 +; RV32ZBT-NEXT: andi a5, a5, 1 ; RV32ZBT-NEXT: cmov a2, a5, a2, a3 -; RV32ZBT-NEXT: sll a0, a0, a4 -; RV32ZBT-NEXT: srai a3, t1, 31 -; RV32ZBT-NEXT: and a0, a3, a0 -; RV32ZBT-NEXT: or a0, a0, a2 +; RV32ZBT-NEXT: cmov a3, a5, a3, a0 +; RV32ZBT-NEXT: andi a4, a4, 31 +; RV32ZBT-NEXT: fsl a2, a3, a2, a4 +; RV32ZBT-NEXT: cmov a0, a5, a0, a1 +; RV32ZBT-NEXT: fsl a1, a0, a3, a4 +; RV32ZBT-NEXT: mv a0, a2 ; RV32ZBT-NEXT: ret %1 = tail call i64 @llvm.fshl.i64(i64 %a, i64 %b, i64 %c) ret i64 %1 @@ -453,87 +415,41 @@ declare i64 @llvm.fshr.i64(i64, i64, i64) define i64 @fshr_i64(i64 %a, i64 %b, i64 %c) nounwind { ; RV32I-LABEL: fshr_i64: ; RV32I: # %bb.0: -; RV32I-NEXT: mv t0, a0 -; RV32I-NEXT: andi a0, a4, 63 -; RV32I-NEXT: addi a6, a0, -32 -; RV32I-NEXT: addi a7, zero, 31 -; RV32I-NEXT: bltz a6, .LBB15_2 +; RV32I-NEXT: andi a5, a4, 32 +; RV32I-NEXT: beqz a5, .LBB15_2 ; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: srl a0, a3, a6 -; RV32I-NEXT: j .LBB15_3 +; RV32I-NEXT: mv a2, a3 ; RV32I-NEXT: .LBB15_2: -; RV32I-NEXT: srl a2, a2, a4 -; RV32I-NEXT: sub a0, a7, a0 -; RV32I-NEXT: slli a5, a3, 1 -; RV32I-NEXT: sll a0, a5, a0 -; RV32I-NEXT: or a0, a2, a0 -; RV32I-NEXT: .LBB15_3: -; RV32I-NEXT: not t2, a4 -; RV32I-NEXT: andi a5, t2, 63 -; RV32I-NEXT: addi a2, a5, -32 -; RV32I-NEXT: slli t1, t0, 1 -; RV32I-NEXT: bltz a2, .LBB15_5 -; RV32I-NEXT: # %bb.4: -; RV32I-NEXT: sll a1, t1, a2 -; RV32I-NEXT: bltz a6, .LBB15_6 -; RV32I-NEXT: j .LBB15_7 -; RV32I-NEXT: .LBB15_5: -; RV32I-NEXT: sll a2, t1, t2 -; RV32I-NEXT: or a0, a0, a2 -; RV32I-NEXT: lui a2, 524288 -; RV32I-NEXT: addi a2, a2, -1 -; RV32I-NEXT: and a2, t0, a2 -; RV32I-NEXT: sub a5, a7, a5 -; RV32I-NEXT: srl a2, a2, a5 -; RV32I-NEXT: srli a5, t0, 31 -; RV32I-NEXT: slli a1, a1, 1 -; RV32I-NEXT: or a1, a1, a5 -; RV32I-NEXT: sll a1, a1, t2 -; RV32I-NEXT: or a1, a1, a2 -; RV32I-NEXT: bgez a6, .LBB15_7 +; RV32I-NEXT: srl a6, a2, a4 +; RV32I-NEXT: beqz a5, .LBB15_4 +; RV32I-NEXT: # %bb.3: +; RV32I-NEXT: mv a3, a0 +; RV32I-NEXT: .LBB15_4: +; RV32I-NEXT: slli a7, a3, 1 +; RV32I-NEXT: not t0, a4 +; RV32I-NEXT: sll a2, a7, t0 +; RV32I-NEXT: or a6, a2, a6 +; RV32I-NEXT: srl a3, a3, a4 +; RV32I-NEXT: beqz a5, .LBB15_6 +; RV32I-NEXT: # %bb.5: +; RV32I-NEXT: mv a0, a1 ; RV32I-NEXT: .LBB15_6: -; RV32I-NEXT: srl a2, a3, a4 -; RV32I-NEXT: or a1, a1, a2 -; RV32I-NEXT: .LBB15_7: +; RV32I-NEXT: slli a0, a0, 1 +; RV32I-NEXT: sll a0, a0, t0 +; RV32I-NEXT: or a1, a0, a3 +; RV32I-NEXT: mv a0, a6 ; RV32I-NEXT: ret ; ; RV32ZBT-LABEL: fshr_i64: ; RV32ZBT: # %bb.0: -; RV32ZBT-NEXT: srl a7, a2, a4 -; RV32ZBT-NEXT: andi a5, a4, 63 -; RV32ZBT-NEXT: addi a6, zero, 31 -; RV32ZBT-NEXT: sub t0, a6, a5 -; RV32ZBT-NEXT: slli a2, a3, 1 -; RV32ZBT-NEXT: sll a2, a2, t0 -; RV32ZBT-NEXT: or a7, a7, a2 -; RV32ZBT-NEXT: addi t2, a5, -32 -; RV32ZBT-NEXT: srl t0, a3, t2 -; RV32ZBT-NEXT: slti a2, t2, 0 -; RV32ZBT-NEXT: cmov a7, a2, a7, t0 -; RV32ZBT-NEXT: not t4, a4 -; RV32ZBT-NEXT: slli t0, a0, 1 -; RV32ZBT-NEXT: sll t1, t0, t4 -; RV32ZBT-NEXT: andi t3, t4, 63 -; RV32ZBT-NEXT: addi a5, t3, -32 -; RV32ZBT-NEXT: srai a2, a5, 31 -; RV32ZBT-NEXT: and a2, a2, t1 -; RV32ZBT-NEXT: or a7, a2, a7 -; RV32ZBT-NEXT: lui a2, 524288 -; RV32ZBT-NEXT: addi a2, a2, -1 -; RV32ZBT-NEXT: and t1, a0, a2 -; RV32ZBT-NEXT: sub a2, a6, t3 -; RV32ZBT-NEXT: srl a2, t1, a2 -; RV32ZBT-NEXT: fsri a0, a0, a1, 31 -; RV32ZBT-NEXT: sll a0, a0, t4 -; RV32ZBT-NEXT: or a0, a0, a2 -; RV32ZBT-NEXT: sll a1, t0, a5 -; RV32ZBT-NEXT: slti a2, a5, 0 -; RV32ZBT-NEXT: cmov a0, a2, a0, a1 -; RV32ZBT-NEXT: srl a1, a3, a4 -; RV32ZBT-NEXT: srai a2, t2, 31 -; RV32ZBT-NEXT: and a1, a2, a1 -; RV32ZBT-NEXT: or a1, a0, a1 -; RV32ZBT-NEXT: mv a0, a7 +; RV32ZBT-NEXT: andi a5, a4, 32 +; RV32ZBT-NEXT: cmov a6, a5, a0, a3 +; RV32ZBT-NEXT: cmov a2, a5, a3, a2 +; RV32ZBT-NEXT: andi a3, a4, 31 +; RV32ZBT-NEXT: fsr a2, a2, a6, a3 +; RV32ZBT-NEXT: cmov a0, a5, a1, a0 +; RV32ZBT-NEXT: fsr a1, a6, a0, a3 +; RV32ZBT-NEXT: mv a0, a2 ; RV32ZBT-NEXT: ret %1 = tail call i64 @llvm.fshr.i64(i64 %a, i64 %b, i64 %c) ret i64 %1 @@ -558,8 +474,8 @@ define i32 @fshri_i32(i32 %a, i32 %b) nounwind { define i64 @fshri_i64(i64 %a, i64 %b) nounwind { ; RV32I-LABEL: fshri_i64: ; RV32I: # %bb.0: -; RV32I-NEXT: slli a1, a3, 27 -; RV32I-NEXT: srli a2, a2, 5 +; RV32I-NEXT: srli a1, a2, 5 +; RV32I-NEXT: slli a2, a3, 27 ; RV32I-NEXT: or a2, a2, a1 ; RV32I-NEXT: srli a1, a3, 5 ; RV32I-NEXT: slli a0, a0, 27 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitcast.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitcast.ll index 6d24d0b440574..622df640b1a58 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitcast.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitcast.ll @@ -1,6 +1,10 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs -riscv-v-vector-bits-min=128 < %s | FileCheck %s --check-prefixes=CHECK,RV32 -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs -riscv-v-vector-bits-min=128 < %s | FileCheck %s --check-prefixes=CHECK,RV64 +; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+d,+experimental-zfh -verify-machineinstrs \ +; RUN: -riscv-v-vector-bits-min=128 -target-abi=ilp32d < %s \ +; RUN: | FileCheck %s --check-prefixes=CHECK,RV32 +; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d,+experimental-zfh -verify-machineinstrs \ +; RUN: -riscv-v-vector-bits-min=128 -target-abi=lp64d < %s \ +; RUN: | FileCheck %s --check-prefixes=CHECK,RV64 define <32 x i1> @bitcast_v4i8_v32i1(<4 x i8> %a, <32 x i1> %b) { ; CHECK-LABEL: bitcast_v4i8_v32i1: @@ -154,7 +158,7 @@ define half @bitcast_v2i8_f16(<2 x i8> %a) { ; CHECK-LABEL: bitcast_v2i8_f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 0, e16, mf4, ta, mu -; CHECK-NEXT: vmv.x.s a0, v8 +; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret %b = bitcast <2 x i8> %a to half ret half %b @@ -164,7 +168,7 @@ define half @bitcast_v1i16_f16(<1 x i16> %a) { ; CHECK-LABEL: bitcast_v1i16_f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 0, e16, mf4, ta, mu -; CHECK-NEXT: vmv.x.s a0, v8 +; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret %b = bitcast <1 x i16> %a to half ret half %b @@ -174,7 +178,7 @@ define float @bitcast_v4i8_f32(<4 x i8> %a) { ; CHECK-LABEL: bitcast_v4i8_f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 0, e32, mf2, ta, mu -; CHECK-NEXT: vmv.x.s a0, v8 +; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret %b = bitcast <4 x i8> %a to float ret float %b @@ -184,7 +188,7 @@ define float @bitcast_v2i16_f32(<2 x i16> %a) { ; CHECK-LABEL: bitcast_v2i16_f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 0, e32, mf2, ta, mu -; CHECK-NEXT: vmv.x.s a0, v8 +; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret %b = bitcast <2 x i16> %a to float ret float %b @@ -194,84 +198,48 @@ define float @bitcast_v1i32_f32(<1 x i32> %a) { ; CHECK-LABEL: bitcast_v1i32_f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 0, e32, mf2, ta, mu -; CHECK-NEXT: vmv.x.s a0, v8 +; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret %b = bitcast <1 x i32> %a to float ret float %b } define double @bitcast_v8i8_f64(<8 x i8> %a) { -; RV32-LABEL: bitcast_v8i8_f64: -; RV32: # %bb.0: -; RV32-NEXT: addi a0, zero, 32 -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, mu -; RV32-NEXT: vsrl.vx v9, v8, a0 -; RV32-NEXT: vmv.x.s a1, v9 -; RV32-NEXT: vmv.x.s a0, v8 -; RV32-NEXT: ret -; -; RV64-LABEL: bitcast_v8i8_f64: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 0, e64, m1, ta, mu -; RV64-NEXT: vmv.x.s a0, v8 -; RV64-NEXT: ret +; CHECK-LABEL: bitcast_v8i8_f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e64, m1, ta, mu +; CHECK-NEXT: vfmv.f.s fa0, v8 +; CHECK-NEXT: ret %b = bitcast <8 x i8> %a to double ret double %b } define double @bitcast_v4i16_f64(<4 x i16> %a) { -; RV32-LABEL: bitcast_v4i16_f64: -; RV32: # %bb.0: -; RV32-NEXT: addi a0, zero, 32 -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, mu -; RV32-NEXT: vsrl.vx v9, v8, a0 -; RV32-NEXT: vmv.x.s a1, v9 -; RV32-NEXT: vmv.x.s a0, v8 -; RV32-NEXT: ret -; -; RV64-LABEL: bitcast_v4i16_f64: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 0, e64, m1, ta, mu -; RV64-NEXT: vmv.x.s a0, v8 -; RV64-NEXT: ret +; CHECK-LABEL: bitcast_v4i16_f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e64, m1, ta, mu +; CHECK-NEXT: vfmv.f.s fa0, v8 +; CHECK-NEXT: ret %b = bitcast <4 x i16> %a to double ret double %b } define double @bitcast_v2i32_f64(<2 x i32> %a) { -; RV32-LABEL: bitcast_v2i32_f64: -; RV32: # %bb.0: -; RV32-NEXT: addi a0, zero, 32 -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, mu -; RV32-NEXT: vsrl.vx v9, v8, a0 -; RV32-NEXT: vmv.x.s a1, v9 -; RV32-NEXT: vmv.x.s a0, v8 -; RV32-NEXT: ret -; -; RV64-LABEL: bitcast_v2i32_f64: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 0, e64, m1, ta, mu -; RV64-NEXT: vmv.x.s a0, v8 -; RV64-NEXT: ret +; CHECK-LABEL: bitcast_v2i32_f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e64, m1, ta, mu +; CHECK-NEXT: vfmv.f.s fa0, v8 +; CHECK-NEXT: ret %b = bitcast <2 x i32> %a to double ret double %b } define double @bitcast_v1i64_f64(<1 x i64> %a) { -; RV32-LABEL: bitcast_v1i64_f64: -; RV32: # %bb.0: -; RV32-NEXT: addi a0, zero, 32 -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, mu -; RV32-NEXT: vsrl.vx v9, v8, a0 -; RV32-NEXT: vmv.x.s a1, v9 -; RV32-NEXT: vmv.x.s a0, v8 -; RV32-NEXT: ret -; -; RV64-LABEL: bitcast_v1i64_f64: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 0, e64, m1, ta, mu -; RV64-NEXT: vmv.x.s a0, v8 -; RV64-NEXT: ret +; CHECK-LABEL: bitcast_v1i64_f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 0, e64, m1, ta, mu +; CHECK-NEXT: vfmv.f.s fa0, v8 +; CHECK-NEXT: ret %b = bitcast <1 x i64> %a to double ret double %b } diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-logic.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-logic.ll index fc19cee141e64..69c02c28ec202 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-logic.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-logic.ll @@ -75,7 +75,7 @@ define void @andnot_v8i1(<8 x i1>* %x, <8 x i1>* %y) { ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu ; CHECK-NEXT: vlm.v v8, (a0) ; CHECK-NEXT: vlm.v v9, (a1) -; CHECK-NEXT: vmandnot.mm v8, v9, v8 +; CHECK-NEXT: vmandn.mm v8, v9, v8 ; CHECK-NEXT: vsm.v v8, (a0) ; CHECK-NEXT: ret %a = load <8 x i1>, <8 x i1>* %x @@ -92,7 +92,7 @@ define void @ornot_v16i1(<16 x i1>* %x, <16 x i1>* %y) { ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, mu ; CHECK-NEXT: vlm.v v8, (a0) ; CHECK-NEXT: vlm.v v9, (a1) -; CHECK-NEXT: vmornot.mm v8, v9, v8 +; CHECK-NEXT: vmorn.mm v8, v9, v8 ; CHECK-NEXT: vsm.v v8, (a0) ; CHECK-NEXT: ret %a = load <16 x i1>, <16 x i1>* %x diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-mask-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-mask-vp.ll index 3bfc7e0112eff..771e770bafd82 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-mask-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-mask-vp.ll @@ -12,7 +12,7 @@ define signext i1 @vpreduce_and_v1i1(i1 signext %s, <1 x i1> %v, <1 x i1> %m, i3 ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, mu ; CHECK-NEXT: vmnand.mm v9, v0, v0 ; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vpopc.m a1, v9, v0.t +; CHECK-NEXT: vcpop.m a1, v9, v0.t ; CHECK-NEXT: seqz a1, a1 ; CHECK-NEXT: and a0, a1, a0 ; CHECK-NEXT: neg a0, a0 @@ -29,7 +29,7 @@ define signext i1 @vpreduce_or_v1i1(i1 signext %s, <1 x i1> %v, <1 x i1> %m, i32 ; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, mu ; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vpopc.m a1, v9, v0.t +; CHECK-NEXT: vcpop.m a1, v9, v0.t ; CHECK-NEXT: snez a1, a1 ; CHECK-NEXT: or a0, a1, a0 ; CHECK-NEXT: andi a0, a0, 1 @@ -47,7 +47,7 @@ define signext i1 @vpreduce_xor_v1i1(i1 signext %s, <1 x i1> %v, <1 x i1> %m, i3 ; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, mu ; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vpopc.m a1, v9, v0.t +; CHECK-NEXT: vcpop.m a1, v9, v0.t ; CHECK-NEXT: xor a0, a1, a0 ; CHECK-NEXT: andi a0, a0, 1 ; CHECK-NEXT: neg a0, a0 @@ -64,7 +64,7 @@ define signext i1 @vpreduce_and_v2i1(i1 signext %s, <2 x i1> %v, <2 x i1> %m, i3 ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, mu ; CHECK-NEXT: vmnand.mm v9, v0, v0 ; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vpopc.m a1, v9, v0.t +; CHECK-NEXT: vcpop.m a1, v9, v0.t ; CHECK-NEXT: seqz a1, a1 ; CHECK-NEXT: and a0, a1, a0 ; CHECK-NEXT: neg a0, a0 @@ -81,7 +81,7 @@ define signext i1 @vpreduce_or_v2i1(i1 signext %s, <2 x i1> %v, <2 x i1> %m, i32 ; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, mu ; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vpopc.m a1, v9, v0.t +; CHECK-NEXT: vcpop.m a1, v9, v0.t ; CHECK-NEXT: snez a1, a1 ; CHECK-NEXT: or a0, a1, a0 ; CHECK-NEXT: andi a0, a0, 1 @@ -99,7 +99,7 @@ define signext i1 @vpreduce_xor_v2i1(i1 signext %s, <2 x i1> %v, <2 x i1> %m, i3 ; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, mu ; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vpopc.m a1, v9, v0.t +; CHECK-NEXT: vcpop.m a1, v9, v0.t ; CHECK-NEXT: xor a0, a1, a0 ; CHECK-NEXT: andi a0, a0, 1 ; CHECK-NEXT: neg a0, a0 @@ -116,7 +116,7 @@ define signext i1 @vpreduce_and_v4i1(i1 signext %s, <4 x i1> %v, <4 x i1> %m, i3 ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, mu ; CHECK-NEXT: vmnand.mm v9, v0, v0 ; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vpopc.m a1, v9, v0.t +; CHECK-NEXT: vcpop.m a1, v9, v0.t ; CHECK-NEXT: seqz a1, a1 ; CHECK-NEXT: and a0, a1, a0 ; CHECK-NEXT: neg a0, a0 @@ -133,7 +133,7 @@ define signext i1 @vpreduce_or_v4i1(i1 signext %s, <4 x i1> %v, <4 x i1> %m, i32 ; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vpopc.m a1, v9, v0.t +; CHECK-NEXT: vcpop.m a1, v9, v0.t ; CHECK-NEXT: snez a1, a1 ; CHECK-NEXT: or a0, a1, a0 ; CHECK-NEXT: andi a0, a0, 1 @@ -151,7 +151,7 @@ define signext i1 @vpreduce_xor_v4i1(i1 signext %s, <4 x i1> %v, <4 x i1> %m, i3 ; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vpopc.m a1, v9, v0.t +; CHECK-NEXT: vcpop.m a1, v9, v0.t ; CHECK-NEXT: xor a0, a1, a0 ; CHECK-NEXT: andi a0, a0, 1 ; CHECK-NEXT: neg a0, a0 @@ -168,7 +168,7 @@ define signext i1 @vpreduce_and_v8i1(i1 signext %s, <8 x i1> %v, <8 x i1> %m, i3 ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, mu ; CHECK-NEXT: vmnand.mm v9, v0, v0 ; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vpopc.m a1, v9, v0.t +; CHECK-NEXT: vcpop.m a1, v9, v0.t ; CHECK-NEXT: seqz a1, a1 ; CHECK-NEXT: and a0, a1, a0 ; CHECK-NEXT: neg a0, a0 @@ -185,7 +185,7 @@ define signext i1 @vpreduce_or_v8i1(i1 signext %s, <8 x i1> %v, <8 x i1> %m, i32 ; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vpopc.m a1, v9, v0.t +; CHECK-NEXT: vcpop.m a1, v9, v0.t ; CHECK-NEXT: snez a1, a1 ; CHECK-NEXT: or a0, a1, a0 ; CHECK-NEXT: andi a0, a0, 1 @@ -203,7 +203,7 @@ define signext i1 @vpreduce_xor_v8i1(i1 signext %s, <8 x i1> %v, <8 x i1> %m, i3 ; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vpopc.m a1, v9, v0.t +; CHECK-NEXT: vcpop.m a1, v9, v0.t ; CHECK-NEXT: xor a0, a1, a0 ; CHECK-NEXT: andi a0, a0, 1 ; CHECK-NEXT: neg a0, a0 @@ -220,7 +220,7 @@ define signext i1 @vpreduce_and_v16i1(i1 signext %s, <16 x i1> %v, <16 x i1> %m, ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, mu ; CHECK-NEXT: vmnand.mm v9, v0, v0 ; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vpopc.m a1, v9, v0.t +; CHECK-NEXT: vcpop.m a1, v9, v0.t ; CHECK-NEXT: seqz a1, a1 ; CHECK-NEXT: and a0, a1, a0 ; CHECK-NEXT: neg a0, a0 @@ -237,7 +237,7 @@ define signext i1 @vpreduce_or_v16i1(i1 signext %s, <16 x i1> %v, <16 x i1> %m, ; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vpopc.m a1, v9, v0.t +; CHECK-NEXT: vcpop.m a1, v9, v0.t ; CHECK-NEXT: snez a1, a1 ; CHECK-NEXT: or a0, a1, a0 ; CHECK-NEXT: andi a0, a0, 1 @@ -255,7 +255,7 @@ define signext i1 @vpreduce_xor_v16i1(i1 signext %s, <16 x i1> %v, <16 x i1> %m, ; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vpopc.m a1, v9, v0.t +; CHECK-NEXT: vcpop.m a1, v9, v0.t ; CHECK-NEXT: xor a0, a1, a0 ; CHECK-NEXT: andi a0, a0, 1 ; CHECK-NEXT: neg a0, a0 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-select-int.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-select-int.ll index e3f8eb3b8ef99..d452e525c6788 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-select-int.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-select-int.ll @@ -10,7 +10,7 @@ define <1 x i1> @select_v1i1(i1 zeroext %c, <1 x i1> %a, <1 x i1> %b) { ; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, mu ; CHECK-NEXT: vmv.v.x v9, a0 ; CHECK-NEXT: vmsne.vi v9, v9, 0 -; CHECK-NEXT: vmandnot.mm v8, v8, v9 +; CHECK-NEXT: vmandn.mm v8, v8, v9 ; CHECK-NEXT: vmand.mm v9, v0, v9 ; CHECK-NEXT: vmor.mm v0, v9, v8 ; CHECK-NEXT: ret @@ -26,7 +26,7 @@ define <1 x i1> @selectcc_v1i1(i1 signext %a, i1 signext %b, <1 x i1> %c, <1 x i ; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, mu ; CHECK-NEXT: vmv.v.x v9, a0 ; CHECK-NEXT: vmsne.vi v9, v9, 0 -; CHECK-NEXT: vmandnot.mm v8, v8, v9 +; CHECK-NEXT: vmandn.mm v8, v8, v9 ; CHECK-NEXT: vmand.mm v9, v0, v9 ; CHECK-NEXT: vmor.mm v0, v9, v8 ; CHECK-NEXT: ret @@ -41,7 +41,7 @@ define <2 x i1> @select_v2i1(i1 zeroext %c, <2 x i1> %a, <2 x i1> %b) { ; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, mu ; CHECK-NEXT: vmv.v.x v9, a0 ; CHECK-NEXT: vmsne.vi v9, v9, 0 -; CHECK-NEXT: vmandnot.mm v8, v8, v9 +; CHECK-NEXT: vmandn.mm v8, v8, v9 ; CHECK-NEXT: vmand.mm v9, v0, v9 ; CHECK-NEXT: vmor.mm v0, v9, v8 ; CHECK-NEXT: ret @@ -57,7 +57,7 @@ define <2 x i1> @selectcc_v2i1(i1 signext %a, i1 signext %b, <2 x i1> %c, <2 x i ; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, mu ; CHECK-NEXT: vmv.v.x v9, a0 ; CHECK-NEXT: vmsne.vi v9, v9, 0 -; CHECK-NEXT: vmandnot.mm v8, v8, v9 +; CHECK-NEXT: vmandn.mm v8, v8, v9 ; CHECK-NEXT: vmand.mm v9, v0, v9 ; CHECK-NEXT: vmor.mm v0, v9, v8 ; CHECK-NEXT: ret @@ -72,7 +72,7 @@ define <4 x i1> @select_v4i1(i1 zeroext %c, <4 x i1> %a, <4 x i1> %b) { ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, mu ; CHECK-NEXT: vmv.v.x v9, a0 ; CHECK-NEXT: vmsne.vi v9, v9, 0 -; CHECK-NEXT: vmandnot.mm v8, v8, v9 +; CHECK-NEXT: vmandn.mm v8, v8, v9 ; CHECK-NEXT: vmand.mm v9, v0, v9 ; CHECK-NEXT: vmor.mm v0, v9, v8 ; CHECK-NEXT: ret @@ -88,7 +88,7 @@ define <4 x i1> @selectcc_v4i1(i1 signext %a, i1 signext %b, <4 x i1> %c, <4 x i ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, mu ; CHECK-NEXT: vmv.v.x v9, a0 ; CHECK-NEXT: vmsne.vi v9, v9, 0 -; CHECK-NEXT: vmandnot.mm v8, v8, v9 +; CHECK-NEXT: vmandn.mm v8, v8, v9 ; CHECK-NEXT: vmand.mm v9, v0, v9 ; CHECK-NEXT: vmor.mm v0, v9, v8 ; CHECK-NEXT: ret @@ -103,7 +103,7 @@ define <8 x i1> @select_v8i1(i1 zeroext %c, <8 x i1> %a, <8 x i1> %b) { ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu ; CHECK-NEXT: vmv.v.x v9, a0 ; CHECK-NEXT: vmsne.vi v9, v9, 0 -; CHECK-NEXT: vmandnot.mm v8, v8, v9 +; CHECK-NEXT: vmandn.mm v8, v8, v9 ; CHECK-NEXT: vmand.mm v9, v0, v9 ; CHECK-NEXT: vmor.mm v0, v9, v8 ; CHECK-NEXT: ret @@ -119,7 +119,7 @@ define <8 x i1> @selectcc_v8i1(i1 signext %a, i1 signext %b, <8 x i1> %c, <8 x i ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu ; CHECK-NEXT: vmv.v.x v9, a0 ; CHECK-NEXT: vmsne.vi v9, v9, 0 -; CHECK-NEXT: vmandnot.mm v8, v8, v9 +; CHECK-NEXT: vmandn.mm v8, v8, v9 ; CHECK-NEXT: vmand.mm v9, v0, v9 ; CHECK-NEXT: vmor.mm v0, v9, v8 ; CHECK-NEXT: ret @@ -134,7 +134,7 @@ define <16 x i1> @select_v16i1(i1 zeroext %c, <16 x i1> %a, <16 x i1> %b) { ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, mu ; CHECK-NEXT: vmv.v.x v9, a0 ; CHECK-NEXT: vmsne.vi v9, v9, 0 -; CHECK-NEXT: vmandnot.mm v8, v8, v9 +; CHECK-NEXT: vmandn.mm v8, v8, v9 ; CHECK-NEXT: vmand.mm v9, v0, v9 ; CHECK-NEXT: vmor.mm v0, v9, v8 ; CHECK-NEXT: ret @@ -150,7 +150,7 @@ define <16 x i1> @selectcc_v16i1(i1 signext %a, i1 signext %b, <16 x i1> %c, <16 ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, mu ; CHECK-NEXT: vmv.v.x v9, a0 ; CHECK-NEXT: vmsne.vi v9, v9, 0 -; CHECK-NEXT: vmandnot.mm v8, v8, v9 +; CHECK-NEXT: vmandn.mm v8, v8, v9 ; CHECK-NEXT: vmand.mm v9, v0, v9 ; CHECK-NEXT: vmor.mm v0, v9, v8 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpload.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpload.ll index 562d75e526777..67a5900ba69ea 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpload.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpload.ll @@ -4,7 +4,7 @@ ; RUN: llc -mtriple=riscv64 -mattr=+d,+experimental-zfh,+experimental-v -riscv-v-vector-bits-min=128 \ ; RUN: -verify-machineinstrs < %s | FileCheck %s -declare <2 x i8> @llvm.vp.load.v2i8(<2 x i8>*, <2 x i1>, i32) +declare <2 x i8> @llvm.vp.load.v2i8.p0v2i8(<2 x i8>*, <2 x i1>, i32) define <2 x i8> @vpload_v2i8(<2 x i8>* %ptr, <2 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpload_v2i8: @@ -12,11 +12,11 @@ define <2 x i8> @vpload_v2i8(<2 x i8>* %ptr, <2 x i1> %m, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, mu ; CHECK-NEXT: vle8.v v8, (a0), v0.t ; CHECK-NEXT: ret - %load = call <2 x i8> @llvm.vp.load.v2i8(<2 x i8>* %ptr, <2 x i1> %m, i32 %evl) + %load = call <2 x i8> @llvm.vp.load.v2i8.p0v2i8(<2 x i8>* %ptr, <2 x i1> %m, i32 %evl) ret <2 x i8> %load } -declare <4 x i8> @llvm.vp.load.v4i8(<4 x i8>*, <4 x i1>, i32) +declare <4 x i8> @llvm.vp.load.v4i8.p0v4i8(<4 x i8>*, <4 x i1>, i32) define <4 x i8> @vpload_v4i8(<4 x i8>* %ptr, <4 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpload_v4i8: @@ -24,11 +24,23 @@ define <4 x i8> @vpload_v4i8(<4 x i8>* %ptr, <4 x i1> %m, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, mu ; CHECK-NEXT: vle8.v v8, (a0), v0.t ; CHECK-NEXT: ret - %load = call <4 x i8> @llvm.vp.load.v4i8(<4 x i8>* %ptr, <4 x i1> %m, i32 %evl) + %load = call <4 x i8> @llvm.vp.load.v4i8.p0v4i8(<4 x i8>* %ptr, <4 x i1> %m, i32 %evl) ret <4 x i8> %load } -declare <8 x i8> @llvm.vp.load.v8i8(<8 x i8>*, <8 x i1>, i32) +define <4 x i8> @vpload_v4i8_allones_mask(<4 x i8>* %ptr, i32 zeroext %evl) { +; CHECK-LABEL: vpload_v4i8_allones_mask: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, mu +; CHECK-NEXT: vle8.v v8, (a0) +; CHECK-NEXT: ret + %a = insertelement <4 x i1> undef, i1 true, i32 0 + %b = shufflevector <4 x i1> %a, <4 x i1> poison, <4 x i32> zeroinitializer + %load = call <4 x i8> @llvm.vp.load.v4i8.p0v4i8(<4 x i8>* %ptr, <4 x i1> %b, i32 %evl) + ret <4 x i8> %load +} + +declare <8 x i8> @llvm.vp.load.v8i8.p0v8i8(<8 x i8>*, <8 x i1>, i32) define <8 x i8> @vpload_v8i8(<8 x i8>* %ptr, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpload_v8i8: @@ -36,11 +48,11 @@ define <8 x i8> @vpload_v8i8(<8 x i8>* %ptr, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, mu ; CHECK-NEXT: vle8.v v8, (a0), v0.t ; CHECK-NEXT: ret - %load = call <8 x i8> @llvm.vp.load.v8i8(<8 x i8>* %ptr, <8 x i1> %m, i32 %evl) + %load = call <8 x i8> @llvm.vp.load.v8i8.p0v8i8(<8 x i8>* %ptr, <8 x i1> %m, i32 %evl) ret <8 x i8> %load } -declare <2 x i16> @llvm.vp.load.v2i16(<2 x i16>*, <2 x i1>, i32) +declare <2 x i16> @llvm.vp.load.v2i16.p0v2i16(<2 x i16>*, <2 x i1>, i32) define <2 x i16> @vpload_v2i16(<2 x i16>* %ptr, <2 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpload_v2i16: @@ -48,11 +60,11 @@ define <2 x i16> @vpload_v2i16(<2 x i16>* %ptr, <2 x i1> %m, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu ; CHECK-NEXT: vle16.v v8, (a0), v0.t ; CHECK-NEXT: ret - %load = call <2 x i16> @llvm.vp.load.v2i16(<2 x i16>* %ptr, <2 x i1> %m, i32 %evl) + %load = call <2 x i16> @llvm.vp.load.v2i16.p0v2i16(<2 x i16>* %ptr, <2 x i1> %m, i32 %evl) ret <2 x i16> %load } -declare <4 x i16> @llvm.vp.load.v4i16(<4 x i16>*, <4 x i1>, i32) +declare <4 x i16> @llvm.vp.load.v4i16.p0v4i16(<4 x i16>*, <4 x i1>, i32) define <4 x i16> @vpload_v4i16(<4 x i16>* %ptr, <4 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpload_v4i16: @@ -60,11 +72,11 @@ define <4 x i16> @vpload_v4i16(<4 x i16>* %ptr, <4 x i1> %m, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu ; CHECK-NEXT: vle16.v v8, (a0), v0.t ; CHECK-NEXT: ret - %load = call <4 x i16> @llvm.vp.load.v4i16(<4 x i16>* %ptr, <4 x i1> %m, i32 %evl) + %load = call <4 x i16> @llvm.vp.load.v4i16.p0v4i16(<4 x i16>* %ptr, <4 x i1> %m, i32 %evl) ret <4 x i16> %load } -declare <8 x i16> @llvm.vp.load.v8i16(<8 x i16>*, <8 x i1>, i32) +declare <8 x i16> @llvm.vp.load.v8i16.p0v8i16(<8 x i16>*, <8 x i1>, i32) define <8 x i16> @vpload_v8i16(<8 x i16>* %ptr, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpload_v8i16: @@ -72,11 +84,23 @@ define <8 x i16> @vpload_v8i16(<8 x i16>* %ptr, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu ; CHECK-NEXT: vle16.v v8, (a0), v0.t ; CHECK-NEXT: ret - %load = call <8 x i16> @llvm.vp.load.v8i16(<8 x i16>* %ptr, <8 x i1> %m, i32 %evl) + %load = call <8 x i16> @llvm.vp.load.v8i16.p0v8i16(<8 x i16>* %ptr, <8 x i1> %m, i32 %evl) ret <8 x i16> %load } -declare <2 x i32> @llvm.vp.load.v2i32(<2 x i32>*, <2 x i1>, i32) +define <8 x i16> @vpload_v8i16_allones_mask(<8 x i16>* %ptr, i32 zeroext %evl) { +; CHECK-LABEL: vpload_v8i16_allones_mask: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: ret + %a = insertelement <8 x i1> undef, i1 true, i32 0 + %b = shufflevector <8 x i1> %a, <8 x i1> poison, <8 x i32> zeroinitializer + %load = call <8 x i16> @llvm.vp.load.v8i16.p0v8i16(<8 x i16>* %ptr, <8 x i1> %b, i32 %evl) + ret <8 x i16> %load +} + +declare <2 x i32> @llvm.vp.load.v2i32.p0v2i32(<2 x i32>*, <2 x i1>, i32) define <2 x i32> @vpload_v2i32(<2 x i32>* %ptr, <2 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpload_v2i32: @@ -84,11 +108,11 @@ define <2 x i32> @vpload_v2i32(<2 x i32>* %ptr, <2 x i1> %m, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu ; CHECK-NEXT: vle32.v v8, (a0), v0.t ; CHECK-NEXT: ret - %load = call <2 x i32> @llvm.vp.load.v2i32(<2 x i32>* %ptr, <2 x i1> %m, i32 %evl) + %load = call <2 x i32> @llvm.vp.load.v2i32.p0v2i32(<2 x i32>* %ptr, <2 x i1> %m, i32 %evl) ret <2 x i32> %load } -declare <4 x i32> @llvm.vp.load.v4i32(<4 x i32>*, <4 x i1>, i32) +declare <4 x i32> @llvm.vp.load.v4i32.p0v4i32(<4 x i32>*, <4 x i1>, i32) define <4 x i32> @vpload_v4i32(<4 x i32>* %ptr, <4 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpload_v4i32: @@ -96,11 +120,11 @@ define <4 x i32> @vpload_v4i32(<4 x i32>* %ptr, <4 x i1> %m, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0), v0.t ; CHECK-NEXT: ret - %load = call <4 x i32> @llvm.vp.load.v4i32(<4 x i32>* %ptr, <4 x i1> %m, i32 %evl) + %load = call <4 x i32> @llvm.vp.load.v4i32.p0v4i32(<4 x i32>* %ptr, <4 x i1> %m, i32 %evl) ret <4 x i32> %load } -declare <8 x i32> @llvm.vp.load.v8i32(<8 x i32>*, <8 x i1>, i32) +declare <8 x i32> @llvm.vp.load.v8i32.p0v8i32(<8 x i32>*, <8 x i1>, i32) define <8 x i32> @vpload_v8i32(<8 x i32>* %ptr, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpload_v8i32: @@ -108,11 +132,23 @@ define <8 x i32> @vpload_v8i32(<8 x i32>* %ptr, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu ; CHECK-NEXT: vle32.v v8, (a0), v0.t ; CHECK-NEXT: ret - %load = call <8 x i32> @llvm.vp.load.v8i32(<8 x i32>* %ptr, <8 x i1> %m, i32 %evl) + %load = call <8 x i32> @llvm.vp.load.v8i32.p0v8i32(<8 x i32>* %ptr, <8 x i1> %m, i32 %evl) + ret <8 x i32> %load +} + +define <8 x i32> @vpload_v8i32_allones_mask(<8 x i32>* %ptr, i32 zeroext %evl) { +; CHECK-LABEL: vpload_v8i32_allones_mask: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: ret + %a = insertelement <8 x i1> undef, i1 true, i32 0 + %b = shufflevector <8 x i1> %a, <8 x i1> poison, <8 x i32> zeroinitializer + %load = call <8 x i32> @llvm.vp.load.v8i32.p0v8i32(<8 x i32>* %ptr, <8 x i1> %b, i32 %evl) ret <8 x i32> %load } -declare <2 x i64> @llvm.vp.load.v2i64(<2 x i64>*, <2 x i1>, i32) +declare <2 x i64> @llvm.vp.load.v2i64.p0v2i64(<2 x i64>*, <2 x i1>, i32) define <2 x i64> @vpload_v2i64(<2 x i64>* %ptr, <2 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpload_v2i64: @@ -120,11 +156,11 @@ define <2 x i64> @vpload_v2i64(<2 x i64>* %ptr, <2 x i1> %m, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, mu ; CHECK-NEXT: vle64.v v8, (a0), v0.t ; CHECK-NEXT: ret - %load = call <2 x i64> @llvm.vp.load.v2i64(<2 x i64>* %ptr, <2 x i1> %m, i32 %evl) + %load = call <2 x i64> @llvm.vp.load.v2i64.p0v2i64(<2 x i64>* %ptr, <2 x i1> %m, i32 %evl) ret <2 x i64> %load } -declare <4 x i64> @llvm.vp.load.v4i64(<4 x i64>*, <4 x i1>, i32) +declare <4 x i64> @llvm.vp.load.v4i64.p0v4i64(<4 x i64>*, <4 x i1>, i32) define <4 x i64> @vpload_v4i64(<4 x i64>* %ptr, <4 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpload_v4i64: @@ -132,11 +168,23 @@ define <4 x i64> @vpload_v4i64(<4 x i64>* %ptr, <4 x i1> %m, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, mu ; CHECK-NEXT: vle64.v v8, (a0), v0.t ; CHECK-NEXT: ret - %load = call <4 x i64> @llvm.vp.load.v4i64(<4 x i64>* %ptr, <4 x i1> %m, i32 %evl) + %load = call <4 x i64> @llvm.vp.load.v4i64.p0v4i64(<4 x i64>* %ptr, <4 x i1> %m, i32 %evl) + ret <4 x i64> %load +} + +define <4 x i64> @vpload_v4i64_allones_mask(<4 x i64>* %ptr, i32 zeroext %evl) { +; CHECK-LABEL: vpload_v4i64_allones_mask: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, mu +; CHECK-NEXT: vle64.v v8, (a0) +; CHECK-NEXT: ret + %a = insertelement <4 x i1> undef, i1 true, i32 0 + %b = shufflevector <4 x i1> %a, <4 x i1> poison, <4 x i32> zeroinitializer + %load = call <4 x i64> @llvm.vp.load.v4i64.p0v4i64(<4 x i64>* %ptr, <4 x i1> %b, i32 %evl) ret <4 x i64> %load } -declare <8 x i64> @llvm.vp.load.v8i64(<8 x i64>*, <8 x i1>, i32) +declare <8 x i64> @llvm.vp.load.v8i64.p0v8i64(<8 x i64>*, <8 x i1>, i32) define <8 x i64> @vpload_v8i64(<8 x i64>* %ptr, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpload_v8i64: @@ -144,11 +192,11 @@ define <8 x i64> @vpload_v8i64(<8 x i64>* %ptr, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, mu ; CHECK-NEXT: vle64.v v8, (a0), v0.t ; CHECK-NEXT: ret - %load = call <8 x i64> @llvm.vp.load.v8i64(<8 x i64>* %ptr, <8 x i1> %m, i32 %evl) + %load = call <8 x i64> @llvm.vp.load.v8i64.p0v8i64(<8 x i64>* %ptr, <8 x i1> %m, i32 %evl) ret <8 x i64> %load } -declare <2 x half> @llvm.vp.load.v2f16(<2 x half>*, <2 x i1>, i32) +declare <2 x half> @llvm.vp.load.v2f16.p0v2f16(<2 x half>*, <2 x i1>, i32) define <2 x half> @vpload_v2f16(<2 x half>* %ptr, <2 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpload_v2f16: @@ -156,11 +204,23 @@ define <2 x half> @vpload_v2f16(<2 x half>* %ptr, <2 x i1> %m, i32 zeroext %evl) ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu ; CHECK-NEXT: vle16.v v8, (a0), v0.t ; CHECK-NEXT: ret - %load = call <2 x half> @llvm.vp.load.v2f16(<2 x half>* %ptr, <2 x i1> %m, i32 %evl) + %load = call <2 x half> @llvm.vp.load.v2f16.p0v2f16(<2 x half>* %ptr, <2 x i1> %m, i32 %evl) ret <2 x half> %load } -declare <4 x half> @llvm.vp.load.v4f16(<4 x half>*, <4 x i1>, i32) +define <2 x half> @vpload_v2f16_allones_mask(<2 x half>* %ptr, i32 zeroext %evl) { +; CHECK-LABEL: vpload_v2f16_allones_mask: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: ret + %a = insertelement <2 x i1> undef, i1 true, i32 0 + %b = shufflevector <2 x i1> %a, <2 x i1> poison, <2 x i32> zeroinitializer + %load = call <2 x half> @llvm.vp.load.v2f16.p0v2f16(<2 x half>* %ptr, <2 x i1> %b, i32 %evl) + ret <2 x half> %load +} + +declare <4 x half> @llvm.vp.load.v4f16.p0v4f16(<4 x half>*, <4 x i1>, i32) define <4 x half> @vpload_v4f16(<4 x half>* %ptr, <4 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpload_v4f16: @@ -168,11 +228,11 @@ define <4 x half> @vpload_v4f16(<4 x half>* %ptr, <4 x i1> %m, i32 zeroext %evl) ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu ; CHECK-NEXT: vle16.v v8, (a0), v0.t ; CHECK-NEXT: ret - %load = call <4 x half> @llvm.vp.load.v4f16(<4 x half>* %ptr, <4 x i1> %m, i32 %evl) + %load = call <4 x half> @llvm.vp.load.v4f16.p0v4f16(<4 x half>* %ptr, <4 x i1> %m, i32 %evl) ret <4 x half> %load } -declare <8 x half> @llvm.vp.load.v8f16(<8 x half>*, <8 x i1>, i32) +declare <8 x half> @llvm.vp.load.v8f16.p0v8f16(<8 x half>*, <8 x i1>, i32) define <8 x half> @vpload_v8f16(<8 x half>* %ptr, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpload_v8f16: @@ -180,11 +240,11 @@ define <8 x half> @vpload_v8f16(<8 x half>* %ptr, <8 x i1> %m, i32 zeroext %evl) ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu ; CHECK-NEXT: vle16.v v8, (a0), v0.t ; CHECK-NEXT: ret - %load = call <8 x half> @llvm.vp.load.v8f16(<8 x half>* %ptr, <8 x i1> %m, i32 %evl) + %load = call <8 x half> @llvm.vp.load.v8f16.p0v8f16(<8 x half>* %ptr, <8 x i1> %m, i32 %evl) ret <8 x half> %load } -declare <2 x float> @llvm.vp.load.v2f32(<2 x float>*, <2 x i1>, i32) +declare <2 x float> @llvm.vp.load.v2f32.p0v2f32(<2 x float>*, <2 x i1>, i32) define <2 x float> @vpload_v2f32(<2 x float>* %ptr, <2 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpload_v2f32: @@ -192,11 +252,11 @@ define <2 x float> @vpload_v2f32(<2 x float>* %ptr, <2 x i1> %m, i32 zeroext %ev ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu ; CHECK-NEXT: vle32.v v8, (a0), v0.t ; CHECK-NEXT: ret - %load = call <2 x float> @llvm.vp.load.v2f32(<2 x float>* %ptr, <2 x i1> %m, i32 %evl) + %load = call <2 x float> @llvm.vp.load.v2f32.p0v2f32(<2 x float>* %ptr, <2 x i1> %m, i32 %evl) ret <2 x float> %load } -declare <4 x float> @llvm.vp.load.v4f32(<4 x float>*, <4 x i1>, i32) +declare <4 x float> @llvm.vp.load.v4f32.p0v4f32(<4 x float>*, <4 x i1>, i32) define <4 x float> @vpload_v4f32(<4 x float>* %ptr, <4 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpload_v4f32: @@ -204,11 +264,11 @@ define <4 x float> @vpload_v4f32(<4 x float>* %ptr, <4 x i1> %m, i32 zeroext %ev ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0), v0.t ; CHECK-NEXT: ret - %load = call <4 x float> @llvm.vp.load.v4f32(<4 x float>* %ptr, <4 x i1> %m, i32 %evl) + %load = call <4 x float> @llvm.vp.load.v4f32.p0v4f32(<4 x float>* %ptr, <4 x i1> %m, i32 %evl) ret <4 x float> %load } -declare <8 x float> @llvm.vp.load.v8f32(<8 x float>*, <8 x i1>, i32) +declare <8 x float> @llvm.vp.load.v8f32.p0v8f32(<8 x float>*, <8 x i1>, i32) define <8 x float> @vpload_v8f32(<8 x float>* %ptr, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpload_v8f32: @@ -216,11 +276,23 @@ define <8 x float> @vpload_v8f32(<8 x float>* %ptr, <8 x i1> %m, i32 zeroext %ev ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu ; CHECK-NEXT: vle32.v v8, (a0), v0.t ; CHECK-NEXT: ret - %load = call <8 x float> @llvm.vp.load.v8f32(<8 x float>* %ptr, <8 x i1> %m, i32 %evl) + %load = call <8 x float> @llvm.vp.load.v8f32.p0v8f32(<8 x float>* %ptr, <8 x i1> %m, i32 %evl) ret <8 x float> %load } -declare <2 x double> @llvm.vp.load.v2f64(<2 x double>*, <2 x i1>, i32) +define <8 x float> @vpload_v8f32_allones_mask(<8 x float>* %ptr, i32 zeroext %evl) { +; CHECK-LABEL: vpload_v8f32_allones_mask: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: ret + %a = insertelement <8 x i1> undef, i1 true, i32 0 + %b = shufflevector <8 x i1> %a, <8 x i1> poison, <8 x i32> zeroinitializer + %load = call <8 x float> @llvm.vp.load.v8f32.p0v8f32(<8 x float>* %ptr, <8 x i1> %b, i32 %evl) + ret <8 x float> %load +} + +declare <2 x double> @llvm.vp.load.v2f64.p0v2f64(<2 x double>*, <2 x i1>, i32) define <2 x double> @vpload_v2f64(<2 x double>* %ptr, <2 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpload_v2f64: @@ -228,11 +300,11 @@ define <2 x double> @vpload_v2f64(<2 x double>* %ptr, <2 x i1> %m, i32 zeroext % ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, mu ; CHECK-NEXT: vle64.v v8, (a0), v0.t ; CHECK-NEXT: ret - %load = call <2 x double> @llvm.vp.load.v2f64(<2 x double>* %ptr, <2 x i1> %m, i32 %evl) + %load = call <2 x double> @llvm.vp.load.v2f64.p0v2f64(<2 x double>* %ptr, <2 x i1> %m, i32 %evl) ret <2 x double> %load } -declare <4 x double> @llvm.vp.load.v4f64(<4 x double>*, <4 x i1>, i32) +declare <4 x double> @llvm.vp.load.v4f64.p0v4f64(<4 x double>*, <4 x i1>, i32) define <4 x double> @vpload_v4f64(<4 x double>* %ptr, <4 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpload_v4f64: @@ -240,11 +312,23 @@ define <4 x double> @vpload_v4f64(<4 x double>* %ptr, <4 x i1> %m, i32 zeroext % ; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, mu ; CHECK-NEXT: vle64.v v8, (a0), v0.t ; CHECK-NEXT: ret - %load = call <4 x double> @llvm.vp.load.v4f64(<4 x double>* %ptr, <4 x i1> %m, i32 %evl) + %load = call <4 x double> @llvm.vp.load.v4f64.p0v4f64(<4 x double>* %ptr, <4 x i1> %m, i32 %evl) + ret <4 x double> %load +} + +define <4 x double> @vpload_v4f64_allones_mask(<4 x double>* %ptr, i32 zeroext %evl) { +; CHECK-LABEL: vpload_v4f64_allones_mask: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, mu +; CHECK-NEXT: vle64.v v8, (a0) +; CHECK-NEXT: ret + %a = insertelement <4 x i1> undef, i1 true, i32 0 + %b = shufflevector <4 x i1> %a, <4 x i1> poison, <4 x i32> zeroinitializer + %load = call <4 x double> @llvm.vp.load.v4f64.p0v4f64(<4 x double>* %ptr, <4 x i1> %b, i32 %evl) ret <4 x double> %load } -declare <8 x double> @llvm.vp.load.v8f64(<8 x double>*, <8 x i1>, i32) +declare <8 x double> @llvm.vp.load.v8f64.p0v8f64(<8 x double>*, <8 x i1>, i32) define <8 x double> @vpload_v8f64(<8 x double>* %ptr, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpload_v8f64: @@ -252,6 +336,6 @@ define <8 x double> @vpload_v8f64(<8 x double>* %ptr, <8 x i1> %m, i32 zeroext % ; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, mu ; CHECK-NEXT: vle64.v v8, (a0), v0.t ; CHECK-NEXT: ret - %load = call <8 x double> @llvm.vp.load.v8f64(<8 x double>* %ptr, <8 x i1> %m, i32 %evl) + %load = call <8 x double> @llvm.vp.load.v8f64.p0v8f64(<8 x double>* %ptr, <8 x i1> %m, i32 %evl) ret <8 x double> %load } diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpstore.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpstore.ll index 28e2dc89df115..8a3fb38c8206f 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpstore.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpstore.ll @@ -4,7 +4,7 @@ ; RUN: llc -mtriple=riscv64 -mattr=+d,+experimental-zfh,+experimental-v -riscv-v-vector-bits-min=128 \ ; RUN: -verify-machineinstrs < %s | FileCheck %s -declare void @llvm.vp.store.v2i8(<2 x i8>, <2 x i8>*, <2 x i1>, i32) +declare void @llvm.vp.store.v2i8.p0v2i8(<2 x i8>, <2 x i8>*, <2 x i1>, i32) define void @vpstore_v2i8(<2 x i8> %val, <2 x i8>* %ptr, <2 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpstore_v2i8: @@ -12,11 +12,11 @@ define void @vpstore_v2i8(<2 x i8> %val, <2 x i8>* %ptr, <2 x i1> %m, i32 zeroex ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, mu ; CHECK-NEXT: vse8.v v8, (a0), v0.t ; CHECK-NEXT: ret - call void @llvm.vp.store.v2i8(<2 x i8> %val, <2 x i8>* %ptr, <2 x i1> %m, i32 %evl) + call void @llvm.vp.store.v2i8.p0v2i8(<2 x i8> %val, <2 x i8>* %ptr, <2 x i1> %m, i32 %evl) ret void } -declare void @llvm.vp.store.v4i8(<4 x i8>, <4 x i8>*, <4 x i1>, i32) +declare void @llvm.vp.store.v4i8.p0v4i8(<4 x i8>, <4 x i8>*, <4 x i1>, i32) define void @vpstore_v4i8(<4 x i8> %val, <4 x i8>* %ptr, <4 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpstore_v4i8: @@ -24,11 +24,11 @@ define void @vpstore_v4i8(<4 x i8> %val, <4 x i8>* %ptr, <4 x i1> %m, i32 zeroex ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, mu ; CHECK-NEXT: vse8.v v8, (a0), v0.t ; CHECK-NEXT: ret - call void @llvm.vp.store.v4i8(<4 x i8> %val, <4 x i8>* %ptr, <4 x i1> %m, i32 %evl) + call void @llvm.vp.store.v4i8.p0v4i8(<4 x i8> %val, <4 x i8>* %ptr, <4 x i1> %m, i32 %evl) ret void } -declare void @llvm.vp.store.v8i8(<8 x i8>, <8 x i8>*, <8 x i1>, i32) +declare void @llvm.vp.store.v8i8.p0v8i8(<8 x i8>, <8 x i8>*, <8 x i1>, i32) define void @vpstore_v8i8(<8 x i8> %val, <8 x i8>* %ptr, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpstore_v8i8: @@ -36,11 +36,11 @@ define void @vpstore_v8i8(<8 x i8> %val, <8 x i8>* %ptr, <8 x i1> %m, i32 zeroex ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, mu ; CHECK-NEXT: vse8.v v8, (a0), v0.t ; CHECK-NEXT: ret - call void @llvm.vp.store.v8i8(<8 x i8> %val, <8 x i8>* %ptr, <8 x i1> %m, i32 %evl) + call void @llvm.vp.store.v8i8.p0v8i8(<8 x i8> %val, <8 x i8>* %ptr, <8 x i1> %m, i32 %evl) ret void } -declare void @llvm.vp.store.v2i16(<2 x i16>, <2 x i16>*, <2 x i1>, i32) +declare void @llvm.vp.store.v2i16.p0v2i16(<2 x i16>, <2 x i16>*, <2 x i1>, i32) define void @vpstore_v2i16(<2 x i16> %val, <2 x i16>* %ptr, <2 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpstore_v2i16: @@ -48,11 +48,11 @@ define void @vpstore_v2i16(<2 x i16> %val, <2 x i16>* %ptr, <2 x i1> %m, i32 zer ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu ; CHECK-NEXT: vse16.v v8, (a0), v0.t ; CHECK-NEXT: ret - call void @llvm.vp.store.v2i16(<2 x i16> %val, <2 x i16>* %ptr, <2 x i1> %m, i32 %evl) + call void @llvm.vp.store.v2i16.p0v2i16(<2 x i16> %val, <2 x i16>* %ptr, <2 x i1> %m, i32 %evl) ret void } -declare void @llvm.vp.store.v4i16(<4 x i16>, <4 x i16>*, <4 x i1>, i32) +declare void @llvm.vp.store.v4i16.p0v4i16(<4 x i16>, <4 x i16>*, <4 x i1>, i32) define void @vpstore_v4i16(<4 x i16> %val, <4 x i16>* %ptr, <4 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpstore_v4i16: @@ -60,11 +60,11 @@ define void @vpstore_v4i16(<4 x i16> %val, <4 x i16>* %ptr, <4 x i1> %m, i32 zer ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu ; CHECK-NEXT: vse16.v v8, (a0), v0.t ; CHECK-NEXT: ret - call void @llvm.vp.store.v4i16(<4 x i16> %val, <4 x i16>* %ptr, <4 x i1> %m, i32 %evl) + call void @llvm.vp.store.v4i16.p0v4i16(<4 x i16> %val, <4 x i16>* %ptr, <4 x i1> %m, i32 %evl) ret void } -declare void @llvm.vp.store.v8i16(<8 x i16>, <8 x i16>*, <8 x i1>, i32) +declare void @llvm.vp.store.v8i16.p0v8i16(<8 x i16>, <8 x i16>*, <8 x i1>, i32) define void @vpstore_v8i16(<8 x i16> %val, <8 x i16>* %ptr, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpstore_v8i16: @@ -72,11 +72,11 @@ define void @vpstore_v8i16(<8 x i16> %val, <8 x i16>* %ptr, <8 x i1> %m, i32 zer ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu ; CHECK-NEXT: vse16.v v8, (a0), v0.t ; CHECK-NEXT: ret - call void @llvm.vp.store.v8i16(<8 x i16> %val, <8 x i16>* %ptr, <8 x i1> %m, i32 %evl) + call void @llvm.vp.store.v8i16.p0v8i16(<8 x i16> %val, <8 x i16>* %ptr, <8 x i1> %m, i32 %evl) ret void } -declare void @llvm.vp.store.v2i32(<2 x i32>, <2 x i32>*, <2 x i1>, i32) +declare void @llvm.vp.store.v2i32.p0v2i32(<2 x i32>, <2 x i32>*, <2 x i1>, i32) define void @vpstore_v2i32(<2 x i32> %val, <2 x i32>* %ptr, <2 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpstore_v2i32: @@ -84,11 +84,11 @@ define void @vpstore_v2i32(<2 x i32> %val, <2 x i32>* %ptr, <2 x i1> %m, i32 zer ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu ; CHECK-NEXT: vse32.v v8, (a0), v0.t ; CHECK-NEXT: ret - call void @llvm.vp.store.v2i32(<2 x i32> %val, <2 x i32>* %ptr, <2 x i1> %m, i32 %evl) + call void @llvm.vp.store.v2i32.p0v2i32(<2 x i32> %val, <2 x i32>* %ptr, <2 x i1> %m, i32 %evl) ret void } -declare void @llvm.vp.store.v4i32(<4 x i32>, <4 x i32>*, <4 x i1>, i32) +declare void @llvm.vp.store.v4i32.p0v4i32(<4 x i32>, <4 x i32>*, <4 x i1>, i32) define void @vpstore_v4i32(<4 x i32> %val, <4 x i32>* %ptr, <4 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpstore_v4i32: @@ -96,11 +96,11 @@ define void @vpstore_v4i32(<4 x i32> %val, <4 x i32>* %ptr, <4 x i1> %m, i32 zer ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu ; CHECK-NEXT: vse32.v v8, (a0), v0.t ; CHECK-NEXT: ret - call void @llvm.vp.store.v4i32(<4 x i32> %val, <4 x i32>* %ptr, <4 x i1> %m, i32 %evl) + call void @llvm.vp.store.v4i32.p0v4i32(<4 x i32> %val, <4 x i32>* %ptr, <4 x i1> %m, i32 %evl) ret void } -declare void @llvm.vp.store.v8i32(<8 x i32>, <8 x i32>*, <8 x i1>, i32) +declare void @llvm.vp.store.v8i32.p0v8i32(<8 x i32>, <8 x i32>*, <8 x i1>, i32) define void @vpstore_v8i32(<8 x i32> %val, <8 x i32>* %ptr, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpstore_v8i32: @@ -108,11 +108,11 @@ define void @vpstore_v8i32(<8 x i32> %val, <8 x i32>* %ptr, <8 x i1> %m, i32 zer ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu ; CHECK-NEXT: vse32.v v8, (a0), v0.t ; CHECK-NEXT: ret - call void @llvm.vp.store.v8i32(<8 x i32> %val, <8 x i32>* %ptr, <8 x i1> %m, i32 %evl) + call void @llvm.vp.store.v8i32.p0v8i32(<8 x i32> %val, <8 x i32>* %ptr, <8 x i1> %m, i32 %evl) ret void } -declare void @llvm.vp.store.v2i64(<2 x i64>, <2 x i64>*, <2 x i1>, i32) +declare void @llvm.vp.store.v2i64.p0v2i64(<2 x i64>, <2 x i64>*, <2 x i1>, i32) define void @vpstore_v2i64(<2 x i64> %val, <2 x i64>* %ptr, <2 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpstore_v2i64: @@ -120,11 +120,11 @@ define void @vpstore_v2i64(<2 x i64> %val, <2 x i64>* %ptr, <2 x i1> %m, i32 zer ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, mu ; CHECK-NEXT: vse64.v v8, (a0), v0.t ; CHECK-NEXT: ret - call void @llvm.vp.store.v2i64(<2 x i64> %val, <2 x i64>* %ptr, <2 x i1> %m, i32 %evl) + call void @llvm.vp.store.v2i64.p0v2i64(<2 x i64> %val, <2 x i64>* %ptr, <2 x i1> %m, i32 %evl) ret void } -declare void @llvm.vp.store.v4i64(<4 x i64>, <4 x i64>*, <4 x i1>, i32) +declare void @llvm.vp.store.v4i64.p0v4i64(<4 x i64>, <4 x i64>*, <4 x i1>, i32) define void @vpstore_v4i64(<4 x i64> %val, <4 x i64>* %ptr, <4 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpstore_v4i64: @@ -132,11 +132,11 @@ define void @vpstore_v4i64(<4 x i64> %val, <4 x i64>* %ptr, <4 x i1> %m, i32 zer ; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, mu ; CHECK-NEXT: vse64.v v8, (a0), v0.t ; CHECK-NEXT: ret - call void @llvm.vp.store.v4i64(<4 x i64> %val, <4 x i64>* %ptr, <4 x i1> %m, i32 %evl) + call void @llvm.vp.store.v4i64.p0v4i64(<4 x i64> %val, <4 x i64>* %ptr, <4 x i1> %m, i32 %evl) ret void } -declare void @llvm.vp.store.v8i64(<8 x i64>, <8 x i64>*, <8 x i1>, i32) +declare void @llvm.vp.store.v8i64.p0v8i64(<8 x i64>, <8 x i64>*, <8 x i1>, i32) define void @vpstore_v8i64(<8 x i64> %val, <8 x i64>* %ptr, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpstore_v8i64: @@ -144,11 +144,11 @@ define void @vpstore_v8i64(<8 x i64> %val, <8 x i64>* %ptr, <8 x i1> %m, i32 zer ; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, mu ; CHECK-NEXT: vse64.v v8, (a0), v0.t ; CHECK-NEXT: ret - call void @llvm.vp.store.v8i64(<8 x i64> %val, <8 x i64>* %ptr, <8 x i1> %m, i32 %evl) + call void @llvm.vp.store.v8i64.p0v8i64(<8 x i64> %val, <8 x i64>* %ptr, <8 x i1> %m, i32 %evl) ret void } -declare void @llvm.vp.store.v2f16(<2 x half>, <2 x half>*, <2 x i1>, i32) +declare void @llvm.vp.store.v2f16.p0v2f16(<2 x half>, <2 x half>*, <2 x i1>, i32) define void @vpstore_v2f16(<2 x half> %val, <2 x half>* %ptr, <2 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpstore_v2f16: @@ -156,11 +156,11 @@ define void @vpstore_v2f16(<2 x half> %val, <2 x half>* %ptr, <2 x i1> %m, i32 z ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu ; CHECK-NEXT: vse16.v v8, (a0), v0.t ; CHECK-NEXT: ret - call void @llvm.vp.store.v2f16(<2 x half> %val, <2 x half>* %ptr, <2 x i1> %m, i32 %evl) + call void @llvm.vp.store.v2f16.p0v2f16(<2 x half> %val, <2 x half>* %ptr, <2 x i1> %m, i32 %evl) ret void } -declare void @llvm.vp.store.v4f16(<4 x half>, <4 x half>*, <4 x i1>, i32) +declare void @llvm.vp.store.v4f16.p0v4f16(<4 x half>, <4 x half>*, <4 x i1>, i32) define void @vpstore_v4f16(<4 x half> %val, <4 x half>* %ptr, <4 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpstore_v4f16: @@ -168,11 +168,11 @@ define void @vpstore_v4f16(<4 x half> %val, <4 x half>* %ptr, <4 x i1> %m, i32 z ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu ; CHECK-NEXT: vse16.v v8, (a0), v0.t ; CHECK-NEXT: ret - call void @llvm.vp.store.v4f16(<4 x half> %val, <4 x half>* %ptr, <4 x i1> %m, i32 %evl) + call void @llvm.vp.store.v4f16.p0v4f16(<4 x half> %val, <4 x half>* %ptr, <4 x i1> %m, i32 %evl) ret void } -declare void @llvm.vp.store.v8f16(<8 x half>, <8 x half>*, <8 x i1>, i32) +declare void @llvm.vp.store.v8f16.p0v8f16(<8 x half>, <8 x half>*, <8 x i1>, i32) define void @vpstore_v8f16(<8 x half> %val, <8 x half>* %ptr, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpstore_v8f16: @@ -180,11 +180,11 @@ define void @vpstore_v8f16(<8 x half> %val, <8 x half>* %ptr, <8 x i1> %m, i32 z ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu ; CHECK-NEXT: vse16.v v8, (a0), v0.t ; CHECK-NEXT: ret - call void @llvm.vp.store.v8f16(<8 x half> %val, <8 x half>* %ptr, <8 x i1> %m, i32 %evl) + call void @llvm.vp.store.v8f16.p0v8f16(<8 x half> %val, <8 x half>* %ptr, <8 x i1> %m, i32 %evl) ret void } -declare void @llvm.vp.store.v2f32(<2 x float>, <2 x float>*, <2 x i1>, i32) +declare void @llvm.vp.store.v2f32.p0v2f32(<2 x float>, <2 x float>*, <2 x i1>, i32) define void @vpstore_v2f32(<2 x float> %val, <2 x float>* %ptr, <2 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpstore_v2f32: @@ -192,11 +192,11 @@ define void @vpstore_v2f32(<2 x float> %val, <2 x float>* %ptr, <2 x i1> %m, i32 ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu ; CHECK-NEXT: vse32.v v8, (a0), v0.t ; CHECK-NEXT: ret - call void @llvm.vp.store.v2f32(<2 x float> %val, <2 x float>* %ptr, <2 x i1> %m, i32 %evl) + call void @llvm.vp.store.v2f32.p0v2f32(<2 x float> %val, <2 x float>* %ptr, <2 x i1> %m, i32 %evl) ret void } -declare void @llvm.vp.store.v4f32(<4 x float>, <4 x float>*, <4 x i1>, i32) +declare void @llvm.vp.store.v4f32.p0v4f32(<4 x float>, <4 x float>*, <4 x i1>, i32) define void @vpstore_v4f32(<4 x float> %val, <4 x float>* %ptr, <4 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpstore_v4f32: @@ -204,11 +204,11 @@ define void @vpstore_v4f32(<4 x float> %val, <4 x float>* %ptr, <4 x i1> %m, i32 ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu ; CHECK-NEXT: vse32.v v8, (a0), v0.t ; CHECK-NEXT: ret - call void @llvm.vp.store.v4f32(<4 x float> %val, <4 x float>* %ptr, <4 x i1> %m, i32 %evl) + call void @llvm.vp.store.v4f32.p0v4f32(<4 x float> %val, <4 x float>* %ptr, <4 x i1> %m, i32 %evl) ret void } -declare void @llvm.vp.store.v8f32(<8 x float>, <8 x float>*, <8 x i1>, i32) +declare void @llvm.vp.store.v8f32.p0v8f32(<8 x float>, <8 x float>*, <8 x i1>, i32) define void @vpstore_v8f32(<8 x float> %val, <8 x float>* %ptr, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpstore_v8f32: @@ -216,11 +216,11 @@ define void @vpstore_v8f32(<8 x float> %val, <8 x float>* %ptr, <8 x i1> %m, i32 ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu ; CHECK-NEXT: vse32.v v8, (a0), v0.t ; CHECK-NEXT: ret - call void @llvm.vp.store.v8f32(<8 x float> %val, <8 x float>* %ptr, <8 x i1> %m, i32 %evl) + call void @llvm.vp.store.v8f32.p0v8f32(<8 x float> %val, <8 x float>* %ptr, <8 x i1> %m, i32 %evl) ret void } -declare void @llvm.vp.store.v2f64(<2 x double>, <2 x double>*, <2 x i1>, i32) +declare void @llvm.vp.store.v2f64.p0v2f64(<2 x double>, <2 x double>*, <2 x i1>, i32) define void @vpstore_v2f64(<2 x double> %val, <2 x double>* %ptr, <2 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpstore_v2f64: @@ -228,11 +228,11 @@ define void @vpstore_v2f64(<2 x double> %val, <2 x double>* %ptr, <2 x i1> %m, i ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, mu ; CHECK-NEXT: vse64.v v8, (a0), v0.t ; CHECK-NEXT: ret - call void @llvm.vp.store.v2f64(<2 x double> %val, <2 x double>* %ptr, <2 x i1> %m, i32 %evl) + call void @llvm.vp.store.v2f64.p0v2f64(<2 x double> %val, <2 x double>* %ptr, <2 x i1> %m, i32 %evl) ret void } -declare void @llvm.vp.store.v4f64(<4 x double>, <4 x double>*, <4 x i1>, i32) +declare void @llvm.vp.store.v4f64.p0v4f64(<4 x double>, <4 x double>*, <4 x i1>, i32) define void @vpstore_v4f64(<4 x double> %val, <4 x double>* %ptr, <4 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpstore_v4f64: @@ -240,11 +240,11 @@ define void @vpstore_v4f64(<4 x double> %val, <4 x double>* %ptr, <4 x i1> %m, i ; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, mu ; CHECK-NEXT: vse64.v v8, (a0), v0.t ; CHECK-NEXT: ret - call void @llvm.vp.store.v4f64(<4 x double> %val, <4 x double>* %ptr, <4 x i1> %m, i32 %evl) + call void @llvm.vp.store.v4f64.p0v4f64(<4 x double> %val, <4 x double>* %ptr, <4 x i1> %m, i32 %evl) ret void } -declare void @llvm.vp.store.v8f64(<8 x double>, <8 x double>*, <8 x i1>, i32) +declare void @llvm.vp.store.v8f64.p0v8f64(<8 x double>, <8 x double>*, <8 x i1>, i32) define void @vpstore_v8f64(<8 x double> %val, <8 x double>* %ptr, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpstore_v8f64: @@ -252,7 +252,7 @@ define void @vpstore_v8f64(<8 x double> %val, <8 x double>* %ptr, <8 x i1> %m, i ; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, mu ; CHECK-NEXT: vse64.v v8, (a0), v0.t ; CHECK-NEXT: ret - call void @llvm.vp.store.v8f64(<8 x double> %val, <8 x double>* %ptr, <8 x i1> %m, i32 %evl) + call void @llvm.vp.store.v8f64.p0v8f64(<8 x double> %val, <8 x double>* %ptr, <8 x i1> %m, i32 %evl) ret void } @@ -264,6 +264,6 @@ define void @vpstore_v2i8_allones_mask(<2 x i8> %val, <2 x i8>* %ptr, i32 zeroex ; CHECK-NEXT: ret %a = insertelement <2 x i1> undef, i1 true, i32 0 %b = shufflevector <2 x i1> %a, <2 x i1> poison, <2 x i32> zeroinitializer - call void @llvm.vp.store.v2i8(<2 x i8> %val, <2 x i8>* %ptr, <2 x i1> %b, i32 %evl) + call void @llvm.vp.store.v2i8.p0v2i8(<2 x i8> %val, <2 x i8>* %ptr, <2 x i1> %b, i32 %evl) ret void } diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vreductions-mask.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vreductions-mask.ll index 37f63f474e415..7887f3c5dbb6d 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vreductions-mask.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vreductions-mask.ll @@ -58,7 +58,7 @@ define signext i1 @vreduce_or_v2i1(<2 x i1> %v) { ; CHECK-LABEL: vreduce_or_v2i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, mu -; CHECK-NEXT: vpopc.m a0, v0 +; CHECK-NEXT: vcpop.m a0, v0 ; CHECK-NEXT: snez a0, a0 ; CHECK-NEXT: neg a0, a0 ; CHECK-NEXT: ret @@ -72,7 +72,7 @@ define signext i1 @vreduce_xor_v2i1(<2 x i1> %v) { ; CHECK-LABEL: vreduce_xor_v2i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, mu -; CHECK-NEXT: vpopc.m a0, v0 +; CHECK-NEXT: vcpop.m a0, v0 ; CHECK-NEXT: andi a0, a0, 1 ; CHECK-NEXT: neg a0, a0 ; CHECK-NEXT: ret @@ -87,7 +87,7 @@ define signext i1 @vreduce_and_v2i1(<2 x i1> %v) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, mu ; CHECK-NEXT: vmnand.mm v8, v0, v0 -; CHECK-NEXT: vpopc.m a0, v8 +; CHECK-NEXT: vcpop.m a0, v8 ; CHECK-NEXT: seqz a0, a0 ; CHECK-NEXT: neg a0, a0 ; CHECK-NEXT: ret @@ -101,7 +101,7 @@ define signext i1 @vreduce_or_v4i1(<4 x i1> %v) { ; CHECK-LABEL: vreduce_or_v4i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, mu -; CHECK-NEXT: vpopc.m a0, v0 +; CHECK-NEXT: vcpop.m a0, v0 ; CHECK-NEXT: snez a0, a0 ; CHECK-NEXT: neg a0, a0 ; CHECK-NEXT: ret @@ -115,7 +115,7 @@ define signext i1 @vreduce_xor_v4i1(<4 x i1> %v) { ; CHECK-LABEL: vreduce_xor_v4i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, mu -; CHECK-NEXT: vpopc.m a0, v0 +; CHECK-NEXT: vcpop.m a0, v0 ; CHECK-NEXT: andi a0, a0, 1 ; CHECK-NEXT: neg a0, a0 ; CHECK-NEXT: ret @@ -130,7 +130,7 @@ define signext i1 @vreduce_and_v4i1(<4 x i1> %v) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, mu ; CHECK-NEXT: vmnand.mm v8, v0, v0 -; CHECK-NEXT: vpopc.m a0, v8 +; CHECK-NEXT: vcpop.m a0, v8 ; CHECK-NEXT: seqz a0, a0 ; CHECK-NEXT: neg a0, a0 ; CHECK-NEXT: ret @@ -144,7 +144,7 @@ define signext i1 @vreduce_or_v8i1(<8 x i1> %v) { ; CHECK-LABEL: vreduce_or_v8i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu -; CHECK-NEXT: vpopc.m a0, v0 +; CHECK-NEXT: vcpop.m a0, v0 ; CHECK-NEXT: snez a0, a0 ; CHECK-NEXT: neg a0, a0 ; CHECK-NEXT: ret @@ -158,7 +158,7 @@ define signext i1 @vreduce_xor_v8i1(<8 x i1> %v) { ; CHECK-LABEL: vreduce_xor_v8i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu -; CHECK-NEXT: vpopc.m a0, v0 +; CHECK-NEXT: vcpop.m a0, v0 ; CHECK-NEXT: andi a0, a0, 1 ; CHECK-NEXT: neg a0, a0 ; CHECK-NEXT: ret @@ -173,7 +173,7 @@ define signext i1 @vreduce_and_v8i1(<8 x i1> %v) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu ; CHECK-NEXT: vmnand.mm v8, v0, v0 -; CHECK-NEXT: vpopc.m a0, v8 +; CHECK-NEXT: vcpop.m a0, v8 ; CHECK-NEXT: seqz a0, a0 ; CHECK-NEXT: neg a0, a0 ; CHECK-NEXT: ret @@ -187,7 +187,7 @@ define signext i1 @vreduce_or_v16i1(<16 x i1> %v) { ; CHECK-LABEL: vreduce_or_v16i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, mu -; CHECK-NEXT: vpopc.m a0, v0 +; CHECK-NEXT: vcpop.m a0, v0 ; CHECK-NEXT: snez a0, a0 ; CHECK-NEXT: neg a0, a0 ; CHECK-NEXT: ret @@ -201,7 +201,7 @@ define signext i1 @vreduce_xor_v16i1(<16 x i1> %v) { ; CHECK-LABEL: vreduce_xor_v16i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, mu -; CHECK-NEXT: vpopc.m a0, v0 +; CHECK-NEXT: vcpop.m a0, v0 ; CHECK-NEXT: andi a0, a0, 1 ; CHECK-NEXT: neg a0, a0 ; CHECK-NEXT: ret @@ -216,7 +216,7 @@ define signext i1 @vreduce_and_v16i1(<16 x i1> %v) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, mu ; CHECK-NEXT: vmnand.mm v8, v0, v0 -; CHECK-NEXT: vpopc.m a0, v8 +; CHECK-NEXT: vcpop.m a0, v8 ; CHECK-NEXT: seqz a0, a0 ; CHECK-NEXT: neg a0, a0 ; CHECK-NEXT: ret @@ -231,7 +231,7 @@ define signext i1 @vreduce_or_v32i1(<32 x i1> %v) { ; LMULMAX1: # %bb.0: ; LMULMAX1-NEXT: vsetivli zero, 16, e8, m1, ta, mu ; LMULMAX1-NEXT: vmor.mm v8, v0, v8 -; LMULMAX1-NEXT: vpopc.m a0, v8 +; LMULMAX1-NEXT: vcpop.m a0, v8 ; LMULMAX1-NEXT: snez a0, a0 ; LMULMAX1-NEXT: neg a0, a0 ; LMULMAX1-NEXT: ret @@ -240,7 +240,7 @@ define signext i1 @vreduce_or_v32i1(<32 x i1> %v) { ; LMULMAX8: # %bb.0: ; LMULMAX8-NEXT: addi a0, zero, 32 ; LMULMAX8-NEXT: vsetvli zero, a0, e8, m2, ta, mu -; LMULMAX8-NEXT: vpopc.m a0, v0 +; LMULMAX8-NEXT: vcpop.m a0, v0 ; LMULMAX8-NEXT: snez a0, a0 ; LMULMAX8-NEXT: neg a0, a0 ; LMULMAX8-NEXT: ret @@ -255,7 +255,7 @@ define signext i1 @vreduce_xor_v32i1(<32 x i1> %v) { ; LMULMAX1: # %bb.0: ; LMULMAX1-NEXT: vsetivli zero, 16, e8, m1, ta, mu ; LMULMAX1-NEXT: vmxor.mm v8, v0, v8 -; LMULMAX1-NEXT: vpopc.m a0, v8 +; LMULMAX1-NEXT: vcpop.m a0, v8 ; LMULMAX1-NEXT: andi a0, a0, 1 ; LMULMAX1-NEXT: neg a0, a0 ; LMULMAX1-NEXT: ret @@ -264,7 +264,7 @@ define signext i1 @vreduce_xor_v32i1(<32 x i1> %v) { ; LMULMAX8: # %bb.0: ; LMULMAX8-NEXT: addi a0, zero, 32 ; LMULMAX8-NEXT: vsetvli zero, a0, e8, m2, ta, mu -; LMULMAX8-NEXT: vpopc.m a0, v0 +; LMULMAX8-NEXT: vcpop.m a0, v0 ; LMULMAX8-NEXT: andi a0, a0, 1 ; LMULMAX8-NEXT: neg a0, a0 ; LMULMAX8-NEXT: ret @@ -279,7 +279,7 @@ define signext i1 @vreduce_and_v32i1(<32 x i1> %v) { ; LMULMAX1: # %bb.0: ; LMULMAX1-NEXT: vsetivli zero, 16, e8, m1, ta, mu ; LMULMAX1-NEXT: vmnand.mm v8, v0, v8 -; LMULMAX1-NEXT: vpopc.m a0, v8 +; LMULMAX1-NEXT: vcpop.m a0, v8 ; LMULMAX1-NEXT: seqz a0, a0 ; LMULMAX1-NEXT: neg a0, a0 ; LMULMAX1-NEXT: ret @@ -289,7 +289,7 @@ define signext i1 @vreduce_and_v32i1(<32 x i1> %v) { ; LMULMAX8-NEXT: addi a0, zero, 32 ; LMULMAX8-NEXT: vsetvli zero, a0, e8, m2, ta, mu ; LMULMAX8-NEXT: vmnand.mm v8, v0, v0 -; LMULMAX8-NEXT: vpopc.m a0, v8 +; LMULMAX8-NEXT: vcpop.m a0, v8 ; LMULMAX8-NEXT: seqz a0, a0 ; LMULMAX8-NEXT: neg a0, a0 ; LMULMAX8-NEXT: ret @@ -306,7 +306,7 @@ define signext i1 @vreduce_or_v64i1(<64 x i1> %v) { ; LMULMAX1-NEXT: vmor.mm v8, v8, v10 ; LMULMAX1-NEXT: vmor.mm v9, v0, v9 ; LMULMAX1-NEXT: vmor.mm v8, v9, v8 -; LMULMAX1-NEXT: vpopc.m a0, v8 +; LMULMAX1-NEXT: vcpop.m a0, v8 ; LMULMAX1-NEXT: snez a0, a0 ; LMULMAX1-NEXT: neg a0, a0 ; LMULMAX1-NEXT: ret @@ -315,7 +315,7 @@ define signext i1 @vreduce_or_v64i1(<64 x i1> %v) { ; LMULMAX8: # %bb.0: ; LMULMAX8-NEXT: addi a0, zero, 64 ; LMULMAX8-NEXT: vsetvli zero, a0, e8, m4, ta, mu -; LMULMAX8-NEXT: vpopc.m a0, v0 +; LMULMAX8-NEXT: vcpop.m a0, v0 ; LMULMAX8-NEXT: snez a0, a0 ; LMULMAX8-NEXT: neg a0, a0 ; LMULMAX8-NEXT: ret @@ -332,7 +332,7 @@ define signext i1 @vreduce_xor_v64i1(<64 x i1> %v) { ; LMULMAX1-NEXT: vmxor.mm v8, v8, v10 ; LMULMAX1-NEXT: vmxor.mm v9, v0, v9 ; LMULMAX1-NEXT: vmxor.mm v8, v9, v8 -; LMULMAX1-NEXT: vpopc.m a0, v8 +; LMULMAX1-NEXT: vcpop.m a0, v8 ; LMULMAX1-NEXT: andi a0, a0, 1 ; LMULMAX1-NEXT: neg a0, a0 ; LMULMAX1-NEXT: ret @@ -341,7 +341,7 @@ define signext i1 @vreduce_xor_v64i1(<64 x i1> %v) { ; LMULMAX8: # %bb.0: ; LMULMAX8-NEXT: addi a0, zero, 64 ; LMULMAX8-NEXT: vsetvli zero, a0, e8, m4, ta, mu -; LMULMAX8-NEXT: vpopc.m a0, v0 +; LMULMAX8-NEXT: vcpop.m a0, v0 ; LMULMAX8-NEXT: andi a0, a0, 1 ; LMULMAX8-NEXT: neg a0, a0 ; LMULMAX8-NEXT: ret @@ -358,7 +358,7 @@ define signext i1 @vreduce_and_v64i1(<64 x i1> %v) { ; LMULMAX1-NEXT: vmand.mm v8, v8, v10 ; LMULMAX1-NEXT: vmand.mm v9, v0, v9 ; LMULMAX1-NEXT: vmnand.mm v8, v9, v8 -; LMULMAX1-NEXT: vpopc.m a0, v8 +; LMULMAX1-NEXT: vcpop.m a0, v8 ; LMULMAX1-NEXT: seqz a0, a0 ; LMULMAX1-NEXT: neg a0, a0 ; LMULMAX1-NEXT: ret @@ -368,7 +368,7 @@ define signext i1 @vreduce_and_v64i1(<64 x i1> %v) { ; LMULMAX8-NEXT: addi a0, zero, 64 ; LMULMAX8-NEXT: vsetvli zero, a0, e8, m4, ta, mu ; LMULMAX8-NEXT: vmnand.mm v8, v0, v0 -; LMULMAX8-NEXT: vpopc.m a0, v8 +; LMULMAX8-NEXT: vcpop.m a0, v8 ; LMULMAX8-NEXT: seqz a0, a0 ; LMULMAX8-NEXT: neg a0, a0 ; LMULMAX8-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect.ll index 9845a485caa9d..fee5a617441d6 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect.ll @@ -225,7 +225,7 @@ define <2 x i1> @vselect_v2i1(<2 x i1> %a, <2 x i1> %b, <2 x i1> %cc) { ; CHECK-LABEL: vselect_v2i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, mu -; CHECK-NEXT: vmandnot.mm v8, v8, v9 +; CHECK-NEXT: vmandn.mm v8, v8, v9 ; CHECK-NEXT: vmand.mm v9, v0, v9 ; CHECK-NEXT: vmor.mm v0, v9, v8 ; CHECK-NEXT: ret @@ -237,7 +237,7 @@ define <4 x i1> @vselect_v4i1(<4 x i1> %a, <4 x i1> %b, <4 x i1> %cc) { ; CHECK-LABEL: vselect_v4i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, mu -; CHECK-NEXT: vmandnot.mm v8, v8, v9 +; CHECK-NEXT: vmandn.mm v8, v8, v9 ; CHECK-NEXT: vmand.mm v9, v0, v9 ; CHECK-NEXT: vmor.mm v0, v9, v8 ; CHECK-NEXT: ret @@ -249,7 +249,7 @@ define <8 x i1> @vselect_v8i1(<8 x i1> %a, <8 x i1> %b, <8 x i1> %cc) { ; CHECK-LABEL: vselect_v8i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu -; CHECK-NEXT: vmandnot.mm v8, v8, v9 +; CHECK-NEXT: vmandn.mm v8, v8, v9 ; CHECK-NEXT: vmand.mm v9, v0, v9 ; CHECK-NEXT: vmor.mm v0, v9, v8 ; CHECK-NEXT: ret @@ -261,7 +261,7 @@ define <16 x i1> @vselect_v16i1(<16 x i1> %a, <16 x i1> %b, <16 x i1> %cc) { ; CHECK-LABEL: vselect_v16i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, mu -; CHECK-NEXT: vmandnot.mm v8, v8, v9 +; CHECK-NEXT: vmandn.mm v8, v8, v9 ; CHECK-NEXT: vmand.mm v9, v0, v9 ; CHECK-NEXT: vmor.mm v0, v9, v8 ; CHECK-NEXT: ret @@ -274,7 +274,7 @@ define <32 x i1> @vselect_v32i1(<32 x i1> %a, <32 x i1> %b, <32 x i1> %cc) { ; CHECK: # %bb.0: ; CHECK-NEXT: addi a0, zero, 32 ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu -; CHECK-NEXT: vmandnot.mm v8, v8, v9 +; CHECK-NEXT: vmandn.mm v8, v8, v9 ; CHECK-NEXT: vmand.mm v9, v0, v9 ; CHECK-NEXT: vmor.mm v0, v9, v8 ; CHECK-NEXT: ret @@ -287,7 +287,7 @@ define <64 x i1> @vselect_v64i1(<64 x i1> %a, <64 x i1> %b, <64 x i1> %cc) { ; CHECK: # %bb.0: ; CHECK-NEXT: addi a0, zero, 64 ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu -; CHECK-NEXT: vmandnot.mm v8, v8, v9 +; CHECK-NEXT: vmandn.mm v8, v8, v9 ; CHECK-NEXT: vmand.mm v9, v0, v9 ; CHECK-NEXT: vmor.mm v0, v9, v8 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/select-int.ll b/llvm/test/CodeGen/RISCV/rvv/select-int.ll index 8c6ba3106b9b6..9ad7d41483987 100644 --- a/llvm/test/CodeGen/RISCV/rvv/select-int.ll +++ b/llvm/test/CodeGen/RISCV/rvv/select-int.ll @@ -10,7 +10,7 @@ define @select_nxv1i1(i1 zeroext %c, %a, @selectcc_nxv1i1(i1 signext %a, i1 signext %b, @select_nxv2i1(i1 zeroext %c, %a, @selectcc_nxv2i1(i1 signext %a, i1 signext %b, @select_nxv4i1(i1 zeroext %c, %a, @selectcc_nxv4i1(i1 signext %a, i1 signext %b, @select_nxv8i1(i1 zeroext %c, %a, @selectcc_nxv8i1(i1 signext %a, i1 signext %b, @select_nxv16i1(i1 zeroext %c, %a, ; CHECK-NEXT: vsetvli a1, zero, e8, m2, ta, mu ; CHECK-NEXT: vmv.v.x v10, a0 ; CHECK-NEXT: vmsne.vi v9, v10, 0 -; CHECK-NEXT: vmandnot.mm v8, v8, v9 +; CHECK-NEXT: vmandn.mm v8, v8, v9 ; CHECK-NEXT: vmand.mm v9, v0, v9 ; CHECK-NEXT: vmor.mm v0, v9, v8 ; CHECK-NEXT: ret @@ -150,7 +150,7 @@ define @selectcc_nxv16i1(i1 signext %a, i1 signext %b, @select_nxv32i1(i1 zeroext %c, %a, ; CHECK-NEXT: vsetvli a1, zero, e8, m4, ta, mu ; CHECK-NEXT: vmv.v.x v12, a0 ; CHECK-NEXT: vmsne.vi v9, v12, 0 -; CHECK-NEXT: vmandnot.mm v8, v8, v9 +; CHECK-NEXT: vmandn.mm v8, v8, v9 ; CHECK-NEXT: vmand.mm v9, v0, v9 ; CHECK-NEXT: vmor.mm v0, v9, v8 ; CHECK-NEXT: ret @@ -181,7 +181,7 @@ define @selectcc_nxv32i1(i1 signext %a, i1 signext %b, @select_nxv64i1(i1 zeroext %c, %a, ; CHECK-NEXT: vsetvli a1, zero, e8, m8, ta, mu ; CHECK-NEXT: vmv.v.x v16, a0 ; CHECK-NEXT: vmsne.vi v9, v16, 0 -; CHECK-NEXT: vmandnot.mm v8, v8, v9 +; CHECK-NEXT: vmandn.mm v8, v8, v9 ; CHECK-NEXT: vmand.mm v9, v0, v9 ; CHECK-NEXT: vmor.mm v0, v9, v8 ; CHECK-NEXT: ret @@ -212,7 +212,7 @@ define @selectcc_nxv64i1(i1 signext %a, i1 signext %b, , i32); -define i32 @intrinsic_vpopc_m_i32_nxv1i1( %0, i32 %1) nounwind { -; CHECK-LABEL: intrinsic_vpopc_m_i32_nxv1i1: +define i32 @intrinsic_vcpop_m_i32_nxv1i1( %0, i32 %1) nounwind { +; CHECK-LABEL: intrinsic_vcpop_m_i32_nxv1i1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu -; CHECK-NEXT: vpopc.m a0, v0 +; CHECK-NEXT: vcpop.m a0, v0 ; CHECK-NEXT: ret entry: - %a = call i32 @llvm.riscv.vpopc.i32.nxv1i1( + %a = call i32 @llvm.riscv.vcpop.i32.nxv1i1( %0, i32 %1) ret i32 %a } -declare i32 @llvm.riscv.vpopc.mask.i32.nxv1i1( +declare i32 @llvm.riscv.vcpop.mask.i32.nxv1i1( , , i32); -define i32 @intrinsic_vpopc_mask_m_i32_nxv1i1( %0, %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vpopc_mask_m_i32_nxv1i1: +define i32 @intrinsic_vcpop_mask_m_i32_nxv1i1( %0, %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vcpop_mask_m_i32_nxv1i1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu ; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vpopc.m a0, v9, v0.t +; CHECK-NEXT: vcpop.m a0, v9, v0.t ; CHECK-NEXT: ret entry: - %a = call i32 @llvm.riscv.vpopc.mask.i32.nxv1i1( + %a = call i32 @llvm.riscv.vcpop.mask.i32.nxv1i1( %0, %1, i32 %2) @@ -41,39 +41,39 @@ entry: ret i32 %a } -declare i32 @llvm.riscv.vpopc.i32.nxv2i1( +declare i32 @llvm.riscv.vcpop.i32.nxv2i1( , i32); -define i32 @intrinsic_vpopc_m_i32_nxv2i1( %0, i32 %1) nounwind { -; CHECK-LABEL: intrinsic_vpopc_m_i32_nxv2i1: +define i32 @intrinsic_vcpop_m_i32_nxv2i1( %0, i32 %1) nounwind { +; CHECK-LABEL: intrinsic_vcpop_m_i32_nxv2i1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu -; CHECK-NEXT: vpopc.m a0, v0 +; CHECK-NEXT: vcpop.m a0, v0 ; CHECK-NEXT: ret entry: - %a = call i32 @llvm.riscv.vpopc.i32.nxv2i1( + %a = call i32 @llvm.riscv.vcpop.i32.nxv2i1( %0, i32 %1) ret i32 %a } -declare i32 @llvm.riscv.vpopc.mask.i32.nxv2i1( +declare i32 @llvm.riscv.vcpop.mask.i32.nxv2i1( , , i32); -define i32 @intrinsic_vpopc_mask_m_i32_nxv2i1( %0, %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vpopc_mask_m_i32_nxv2i1: +define i32 @intrinsic_vcpop_mask_m_i32_nxv2i1( %0, %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vcpop_mask_m_i32_nxv2i1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vpopc.m a0, v9, v0.t +; CHECK-NEXT: vcpop.m a0, v9, v0.t ; CHECK-NEXT: ret entry: - %a = call i32 @llvm.riscv.vpopc.mask.i32.nxv2i1( + %a = call i32 @llvm.riscv.vcpop.mask.i32.nxv2i1( %0, %1, i32 %2) @@ -81,39 +81,39 @@ entry: ret i32 %a } -declare i32 @llvm.riscv.vpopc.i32.nxv4i1( +declare i32 @llvm.riscv.vcpop.i32.nxv4i1( , i32); -define i32 @intrinsic_vpopc_m_i32_nxv4i1( %0, i32 %1) nounwind { -; CHECK-LABEL: intrinsic_vpopc_m_i32_nxv4i1: +define i32 @intrinsic_vcpop_m_i32_nxv4i1( %0, i32 %1) nounwind { +; CHECK-LABEL: intrinsic_vcpop_m_i32_nxv4i1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu -; CHECK-NEXT: vpopc.m a0, v0 +; CHECK-NEXT: vcpop.m a0, v0 ; CHECK-NEXT: ret entry: - %a = call i32 @llvm.riscv.vpopc.i32.nxv4i1( + %a = call i32 @llvm.riscv.vcpop.i32.nxv4i1( %0, i32 %1) ret i32 %a } -declare i32 @llvm.riscv.vpopc.mask.i32.nxv4i1( +declare i32 @llvm.riscv.vcpop.mask.i32.nxv4i1( , , i32); -define i32 @intrinsic_vpopc_mask_m_i32_nxv4i1( %0, %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vpopc_mask_m_i32_nxv4i1: +define i32 @intrinsic_vcpop_mask_m_i32_nxv4i1( %0, %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vcpop_mask_m_i32_nxv4i1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vpopc.m a0, v9, v0.t +; CHECK-NEXT: vcpop.m a0, v9, v0.t ; CHECK-NEXT: ret entry: - %a = call i32 @llvm.riscv.vpopc.mask.i32.nxv4i1( + %a = call i32 @llvm.riscv.vcpop.mask.i32.nxv4i1( %0, %1, i32 %2) @@ -121,39 +121,39 @@ entry: ret i32 %a } -declare i32 @llvm.riscv.vpopc.i32.nxv8i1( +declare i32 @llvm.riscv.vcpop.i32.nxv8i1( , i32); -define i32 @intrinsic_vpopc_m_i32_nxv8i1( %0, i32 %1) nounwind { -; CHECK-LABEL: intrinsic_vpopc_m_i32_nxv8i1: +define i32 @intrinsic_vcpop_m_i32_nxv8i1( %0, i32 %1) nounwind { +; CHECK-LABEL: intrinsic_vcpop_m_i32_nxv8i1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu -; CHECK-NEXT: vpopc.m a0, v0 +; CHECK-NEXT: vcpop.m a0, v0 ; CHECK-NEXT: ret entry: - %a = call i32 @llvm.riscv.vpopc.i32.nxv8i1( + %a = call i32 @llvm.riscv.vcpop.i32.nxv8i1( %0, i32 %1) ret i32 %a } -declare i32 @llvm.riscv.vpopc.mask.i32.nxv8i1( +declare i32 @llvm.riscv.vcpop.mask.i32.nxv8i1( , , i32); -define i32 @intrinsic_vpopc_mask_m_i32_nxv8i1( %0, %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vpopc_mask_m_i32_nxv8i1: +define i32 @intrinsic_vcpop_mask_m_i32_nxv8i1( %0, %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vcpop_mask_m_i32_nxv8i1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vpopc.m a0, v9, v0.t +; CHECK-NEXT: vcpop.m a0, v9, v0.t ; CHECK-NEXT: ret entry: - %a = call i32 @llvm.riscv.vpopc.mask.i32.nxv8i1( + %a = call i32 @llvm.riscv.vcpop.mask.i32.nxv8i1( %0, %1, i32 %2) @@ -161,39 +161,39 @@ entry: ret i32 %a } -declare i32 @llvm.riscv.vpopc.i32.nxv16i1( +declare i32 @llvm.riscv.vcpop.i32.nxv16i1( , i32); -define i32 @intrinsic_vpopc_m_i32_nxv16i1( %0, i32 %1) nounwind { -; CHECK-LABEL: intrinsic_vpopc_m_i32_nxv16i1: +define i32 @intrinsic_vcpop_m_i32_nxv16i1( %0, i32 %1) nounwind { +; CHECK-LABEL: intrinsic_vcpop_m_i32_nxv16i1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu -; CHECK-NEXT: vpopc.m a0, v0 +; CHECK-NEXT: vcpop.m a0, v0 ; CHECK-NEXT: ret entry: - %a = call i32 @llvm.riscv.vpopc.i32.nxv16i1( + %a = call i32 @llvm.riscv.vcpop.i32.nxv16i1( %0, i32 %1) ret i32 %a } -declare i32 @llvm.riscv.vpopc.mask.i32.nxv16i1( +declare i32 @llvm.riscv.vcpop.mask.i32.nxv16i1( , , i32); -define i32 @intrinsic_vpopc_mask_m_i32_nxv16i1( %0, %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vpopc_mask_m_i32_nxv16i1: +define i32 @intrinsic_vcpop_mask_m_i32_nxv16i1( %0, %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vcpop_mask_m_i32_nxv16i1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vpopc.m a0, v9, v0.t +; CHECK-NEXT: vcpop.m a0, v9, v0.t ; CHECK-NEXT: ret entry: - %a = call i32 @llvm.riscv.vpopc.mask.i32.nxv16i1( + %a = call i32 @llvm.riscv.vcpop.mask.i32.nxv16i1( %0, %1, i32 %2) @@ -201,39 +201,39 @@ entry: ret i32 %a } -declare i32 @llvm.riscv.vpopc.i32.nxv32i1( +declare i32 @llvm.riscv.vcpop.i32.nxv32i1( , i32); -define i32 @intrinsic_vpopc_m_i32_nxv32i1( %0, i32 %1) nounwind { -; CHECK-LABEL: intrinsic_vpopc_m_i32_nxv32i1: +define i32 @intrinsic_vcpop_m_i32_nxv32i1( %0, i32 %1) nounwind { +; CHECK-LABEL: intrinsic_vcpop_m_i32_nxv32i1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu -; CHECK-NEXT: vpopc.m a0, v0 +; CHECK-NEXT: vcpop.m a0, v0 ; CHECK-NEXT: ret entry: - %a = call i32 @llvm.riscv.vpopc.i32.nxv32i1( + %a = call i32 @llvm.riscv.vcpop.i32.nxv32i1( %0, i32 %1) ret i32 %a } -declare i32 @llvm.riscv.vpopc.mask.i32.nxv32i1( +declare i32 @llvm.riscv.vcpop.mask.i32.nxv32i1( , , i32); -define i32 @intrinsic_vpopc_mask_m_i32_nxv32i1( %0, %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vpopc_mask_m_i32_nxv32i1: +define i32 @intrinsic_vcpop_mask_m_i32_nxv32i1( %0, %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vcpop_mask_m_i32_nxv32i1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vpopc.m a0, v9, v0.t +; CHECK-NEXT: vcpop.m a0, v9, v0.t ; CHECK-NEXT: ret entry: - %a = call i32 @llvm.riscv.vpopc.mask.i32.nxv32i1( + %a = call i32 @llvm.riscv.vcpop.mask.i32.nxv32i1( %0, %1, i32 %2) @@ -241,39 +241,39 @@ entry: ret i32 %a } -declare i32 @llvm.riscv.vpopc.i32.nxv64i1( +declare i32 @llvm.riscv.vcpop.i32.nxv64i1( , i32); -define i32 @intrinsic_vpopc_m_i32_nxv64i1( %0, i32 %1) nounwind { -; CHECK-LABEL: intrinsic_vpopc_m_i32_nxv64i1: +define i32 @intrinsic_vcpop_m_i32_nxv64i1( %0, i32 %1) nounwind { +; CHECK-LABEL: intrinsic_vcpop_m_i32_nxv64i1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, mu -; CHECK-NEXT: vpopc.m a0, v0 +; CHECK-NEXT: vcpop.m a0, v0 ; CHECK-NEXT: ret entry: - %a = call i32 @llvm.riscv.vpopc.i32.nxv64i1( + %a = call i32 @llvm.riscv.vcpop.i32.nxv64i1( %0, i32 %1) ret i32 %a } -declare i32 @llvm.riscv.vpopc.mask.i32.nxv64i1( +declare i32 @llvm.riscv.vcpop.mask.i32.nxv64i1( , , i32); -define i32 @intrinsic_vpopc_mask_m_i32_nxv64i1( %0, %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vpopc_mask_m_i32_nxv64i1: +define i32 @intrinsic_vcpop_mask_m_i32_nxv64i1( %0, %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vcpop_mask_m_i32_nxv64i1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, mu ; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vpopc.m a0, v9, v0.t +; CHECK-NEXT: vcpop.m a0, v9, v0.t ; CHECK-NEXT: ret entry: - %a = call i32 @llvm.riscv.vpopc.mask.i32.nxv64i1( + %a = call i32 @llvm.riscv.vcpop.mask.i32.nxv64i1( %0, %1, i32 %2) diff --git a/llvm/test/CodeGen/RISCV/rvv/vpopc-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vcpop-rv64.ll similarity index 55% rename from llvm/test/CodeGen/RISCV/rvv/vpopc-rv64.ll rename to llvm/test/CodeGen/RISCV/rvv/vcpop-rv64.ll index 10c72e381fb94..b42d679b52b6e 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vpopc-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vcpop-rv64.ll @@ -1,39 +1,39 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d -verify-machineinstrs \ ; RUN: < %s | FileCheck %s -declare i64 @llvm.riscv.vpopc.i64.nxv1i1( +declare i64 @llvm.riscv.vcpop.i64.nxv1i1( , i64); -define i64 @intrinsic_vpopc_m_i64_nxv1i1( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vpopc_m_i64_nxv1i1: +define i64 @intrinsic_vcpop_m_i64_nxv1i1( %0, i64 %1) nounwind { +; CHECK-LABEL: intrinsic_vcpop_m_i64_nxv1i1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu -; CHECK-NEXT: vpopc.m a0, v0 +; CHECK-NEXT: vcpop.m a0, v0 ; CHECK-NEXT: ret entry: - %a = call i64 @llvm.riscv.vpopc.i64.nxv1i1( + %a = call i64 @llvm.riscv.vcpop.i64.nxv1i1( %0, i64 %1) ret i64 %a } -declare i64 @llvm.riscv.vpopc.mask.i64.nxv1i1( +declare i64 @llvm.riscv.vcpop.mask.i64.nxv1i1( , , i64); -define i64 @intrinsic_vpopc_mask_m_i64_nxv1i1( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vpopc_mask_m_i64_nxv1i1: +define i64 @intrinsic_vcpop_mask_m_i64_nxv1i1( %0, %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vcpop_mask_m_i64_nxv1i1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu ; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vpopc.m a0, v9, v0.t +; CHECK-NEXT: vcpop.m a0, v9, v0.t ; CHECK-NEXT: ret entry: - %a = call i64 @llvm.riscv.vpopc.mask.i64.nxv1i1( + %a = call i64 @llvm.riscv.vcpop.mask.i64.nxv1i1( %0, %1, i64 %2) @@ -41,39 +41,39 @@ entry: ret i64 %a } -declare i64 @llvm.riscv.vpopc.i64.nxv2i1( +declare i64 @llvm.riscv.vcpop.i64.nxv2i1( , i64); -define i64 @intrinsic_vpopc_m_i64_nxv2i1( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vpopc_m_i64_nxv2i1: +define i64 @intrinsic_vcpop_m_i64_nxv2i1( %0, i64 %1) nounwind { +; CHECK-LABEL: intrinsic_vcpop_m_i64_nxv2i1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu -; CHECK-NEXT: vpopc.m a0, v0 +; CHECK-NEXT: vcpop.m a0, v0 ; CHECK-NEXT: ret entry: - %a = call i64 @llvm.riscv.vpopc.i64.nxv2i1( + %a = call i64 @llvm.riscv.vcpop.i64.nxv2i1( %0, i64 %1) ret i64 %a } -declare i64 @llvm.riscv.vpopc.mask.i64.nxv2i1( +declare i64 @llvm.riscv.vcpop.mask.i64.nxv2i1( , , i64); -define i64 @intrinsic_vpopc_mask_m_i64_nxv2i1( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vpopc_mask_m_i64_nxv2i1: +define i64 @intrinsic_vcpop_mask_m_i64_nxv2i1( %0, %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vcpop_mask_m_i64_nxv2i1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vpopc.m a0, v9, v0.t +; CHECK-NEXT: vcpop.m a0, v9, v0.t ; CHECK-NEXT: ret entry: - %a = call i64 @llvm.riscv.vpopc.mask.i64.nxv2i1( + %a = call i64 @llvm.riscv.vcpop.mask.i64.nxv2i1( %0, %1, i64 %2) @@ -81,39 +81,39 @@ entry: ret i64 %a } -declare i64 @llvm.riscv.vpopc.i64.nxv4i1( +declare i64 @llvm.riscv.vcpop.i64.nxv4i1( , i64); -define i64 @intrinsic_vpopc_m_i64_nxv4i1( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vpopc_m_i64_nxv4i1: +define i64 @intrinsic_vcpop_m_i64_nxv4i1( %0, i64 %1) nounwind { +; CHECK-LABEL: intrinsic_vcpop_m_i64_nxv4i1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu -; CHECK-NEXT: vpopc.m a0, v0 +; CHECK-NEXT: vcpop.m a0, v0 ; CHECK-NEXT: ret entry: - %a = call i64 @llvm.riscv.vpopc.i64.nxv4i1( + %a = call i64 @llvm.riscv.vcpop.i64.nxv4i1( %0, i64 %1) ret i64 %a } -declare i64 @llvm.riscv.vpopc.mask.i64.nxv4i1( +declare i64 @llvm.riscv.vcpop.mask.i64.nxv4i1( , , i64); -define i64 @intrinsic_vpopc_mask_m_i64_nxv4i1( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vpopc_mask_m_i64_nxv4i1: +define i64 @intrinsic_vcpop_mask_m_i64_nxv4i1( %0, %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vcpop_mask_m_i64_nxv4i1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vpopc.m a0, v9, v0.t +; CHECK-NEXT: vcpop.m a0, v9, v0.t ; CHECK-NEXT: ret entry: - %a = call i64 @llvm.riscv.vpopc.mask.i64.nxv4i1( + %a = call i64 @llvm.riscv.vcpop.mask.i64.nxv4i1( %0, %1, i64 %2) @@ -121,39 +121,39 @@ entry: ret i64 %a } -declare i64 @llvm.riscv.vpopc.i64.nxv8i1( +declare i64 @llvm.riscv.vcpop.i64.nxv8i1( , i64); -define i64 @intrinsic_vpopc_m_i64_nxv8i1( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vpopc_m_i64_nxv8i1: +define i64 @intrinsic_vcpop_m_i64_nxv8i1( %0, i64 %1) nounwind { +; CHECK-LABEL: intrinsic_vcpop_m_i64_nxv8i1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu -; CHECK-NEXT: vpopc.m a0, v0 +; CHECK-NEXT: vcpop.m a0, v0 ; CHECK-NEXT: ret entry: - %a = call i64 @llvm.riscv.vpopc.i64.nxv8i1( + %a = call i64 @llvm.riscv.vcpop.i64.nxv8i1( %0, i64 %1) ret i64 %a } -declare i64 @llvm.riscv.vpopc.mask.i64.nxv8i1( +declare i64 @llvm.riscv.vcpop.mask.i64.nxv8i1( , , i64); -define i64 @intrinsic_vpopc_mask_m_i64_nxv8i1( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vpopc_mask_m_i64_nxv8i1: +define i64 @intrinsic_vcpop_mask_m_i64_nxv8i1( %0, %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vcpop_mask_m_i64_nxv8i1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vpopc.m a0, v9, v0.t +; CHECK-NEXT: vcpop.m a0, v9, v0.t ; CHECK-NEXT: ret entry: - %a = call i64 @llvm.riscv.vpopc.mask.i64.nxv8i1( + %a = call i64 @llvm.riscv.vcpop.mask.i64.nxv8i1( %0, %1, i64 %2) @@ -161,39 +161,39 @@ entry: ret i64 %a } -declare i64 @llvm.riscv.vpopc.i64.nxv16i1( +declare i64 @llvm.riscv.vcpop.i64.nxv16i1( , i64); -define i64 @intrinsic_vpopc_m_i64_nxv16i1( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vpopc_m_i64_nxv16i1: +define i64 @intrinsic_vcpop_m_i64_nxv16i1( %0, i64 %1) nounwind { +; CHECK-LABEL: intrinsic_vcpop_m_i64_nxv16i1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu -; CHECK-NEXT: vpopc.m a0, v0 +; CHECK-NEXT: vcpop.m a0, v0 ; CHECK-NEXT: ret entry: - %a = call i64 @llvm.riscv.vpopc.i64.nxv16i1( + %a = call i64 @llvm.riscv.vcpop.i64.nxv16i1( %0, i64 %1) ret i64 %a } -declare i64 @llvm.riscv.vpopc.mask.i64.nxv16i1( +declare i64 @llvm.riscv.vcpop.mask.i64.nxv16i1( , , i64); -define i64 @intrinsic_vpopc_mask_m_i64_nxv16i1( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vpopc_mask_m_i64_nxv16i1: +define i64 @intrinsic_vcpop_mask_m_i64_nxv16i1( %0, %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vcpop_mask_m_i64_nxv16i1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vpopc.m a0, v9, v0.t +; CHECK-NEXT: vcpop.m a0, v9, v0.t ; CHECK-NEXT: ret entry: - %a = call i64 @llvm.riscv.vpopc.mask.i64.nxv16i1( + %a = call i64 @llvm.riscv.vcpop.mask.i64.nxv16i1( %0, %1, i64 %2) @@ -201,39 +201,39 @@ entry: ret i64 %a } -declare i64 @llvm.riscv.vpopc.i64.nxv32i1( +declare i64 @llvm.riscv.vcpop.i64.nxv32i1( , i64); -define i64 @intrinsic_vpopc_m_i64_nxv32i1( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vpopc_m_i64_nxv32i1: +define i64 @intrinsic_vcpop_m_i64_nxv32i1( %0, i64 %1) nounwind { +; CHECK-LABEL: intrinsic_vcpop_m_i64_nxv32i1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu -; CHECK-NEXT: vpopc.m a0, v0 +; CHECK-NEXT: vcpop.m a0, v0 ; CHECK-NEXT: ret entry: - %a = call i64 @llvm.riscv.vpopc.i64.nxv32i1( + %a = call i64 @llvm.riscv.vcpop.i64.nxv32i1( %0, i64 %1) ret i64 %a } -declare i64 @llvm.riscv.vpopc.mask.i64.nxv32i1( +declare i64 @llvm.riscv.vcpop.mask.i64.nxv32i1( , , i64); -define i64 @intrinsic_vpopc_mask_m_i64_nxv32i1( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vpopc_mask_m_i64_nxv32i1: +define i64 @intrinsic_vcpop_mask_m_i64_nxv32i1( %0, %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vcpop_mask_m_i64_nxv32i1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vpopc.m a0, v9, v0.t +; CHECK-NEXT: vcpop.m a0, v9, v0.t ; CHECK-NEXT: ret entry: - %a = call i64 @llvm.riscv.vpopc.mask.i64.nxv32i1( + %a = call i64 @llvm.riscv.vcpop.mask.i64.nxv32i1( %0, %1, i64 %2) @@ -241,39 +241,39 @@ entry: ret i64 %a } -declare i64 @llvm.riscv.vpopc.i64.nxv64i1( +declare i64 @llvm.riscv.vcpop.i64.nxv64i1( , i64); -define i64 @intrinsic_vpopc_m_i64_nxv64i1( %0, i64 %1) nounwind { -; CHECK-LABEL: intrinsic_vpopc_m_i64_nxv64i1: +define i64 @intrinsic_vcpop_m_i64_nxv64i1( %0, i64 %1) nounwind { +; CHECK-LABEL: intrinsic_vcpop_m_i64_nxv64i1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, mu -; CHECK-NEXT: vpopc.m a0, v0 +; CHECK-NEXT: vcpop.m a0, v0 ; CHECK-NEXT: ret entry: - %a = call i64 @llvm.riscv.vpopc.i64.nxv64i1( + %a = call i64 @llvm.riscv.vcpop.i64.nxv64i1( %0, i64 %1) ret i64 %a } -declare i64 @llvm.riscv.vpopc.mask.i64.nxv64i1( +declare i64 @llvm.riscv.vcpop.mask.i64.nxv64i1( , , i64); -define i64 @intrinsic_vpopc_mask_m_i64_nxv64i1( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vpopc_mask_m_i64_nxv64i1: +define i64 @intrinsic_vcpop_mask_m_i64_nxv64i1( %0, %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vcpop_mask_m_i64_nxv64i1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, mu ; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vpopc.m a0, v9, v0.t +; CHECK-NEXT: vcpop.m a0, v9, v0.t ; CHECK-NEXT: ret entry: - %a = call i64 @llvm.riscv.vpopc.mask.i64.nxv64i1( + %a = call i64 @llvm.riscv.vcpop.mask.i64.nxv64i1( %0, %1, i64 %2) diff --git a/llvm/test/CodeGen/RISCV/rvv/vfredusum-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfredusum-rv32.ll new file mode 100644 index 0000000000000..ad4e946a01472 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vfredusum-rv32.ll @@ -0,0 +1,692 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+d,+experimental-zfh -verify-machineinstrs \ +; RUN: < %s | FileCheck %s +declare @llvm.riscv.vfredusum.nxv4f16.nxv1f16( + , + , + , + i32); + +define @intrinsic_vfredusum_vs_nxv4f16_nxv1f16_nxv4f16( %0, %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vfredusum_vs_nxv4f16_nxv1f16_nxv4f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu +; CHECK-NEXT: vfredusum.vs v8, v9, v10 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vfredusum.nxv4f16.nxv1f16( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfredusum.mask.nxv4f16.nxv1f16.nxv1i1( + , + , + , + , + i32); + +define @intrinsic_vfredusum_mask_vs_nxv4f16_nxv1f16_nxv4f16( %0, %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfredusum_mask_vs_nxv4f16_nxv1f16_nxv4f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu +; CHECK-NEXT: vfredusum.vs v8, v9, v10, v0.t +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vfredusum.mask.nxv4f16.nxv1f16.nxv1i1( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfredusum.nxv4f16.nxv2f16( + , + , + , + i32); + +define @intrinsic_vfredusum_vs_nxv4f16_nxv2f16_nxv4f16( %0, %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vfredusum_vs_nxv4f16_nxv2f16_nxv4f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu +; CHECK-NEXT: vfredusum.vs v8, v9, v10 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vfredusum.nxv4f16.nxv2f16( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfredusum.mask.nxv4f16.nxv2f16.nxv2i1( + , + , + , + , + i32); + +define @intrinsic_vfredusum_mask_vs_nxv4f16_nxv2f16_nxv4f16( %0, %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfredusum_mask_vs_nxv4f16_nxv2f16_nxv4f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu +; CHECK-NEXT: vfredusum.vs v8, v9, v10, v0.t +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vfredusum.mask.nxv4f16.nxv2f16.nxv2i1( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfredusum.nxv4f16.nxv4f16( + , + , + , + i32); + +define @intrinsic_vfredusum_vs_nxv4f16_nxv4f16_nxv4f16( %0, %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vfredusum_vs_nxv4f16_nxv4f16_nxv4f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu +; CHECK-NEXT: vfredusum.vs v8, v9, v10 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vfredusum.nxv4f16.nxv4f16( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfredusum.mask.nxv4f16.nxv4f16.nxv4i1( + , + , + , + , + i32); + +define @intrinsic_vfredusum_mask_vs_nxv4f16_nxv4f16_nxv4f16( %0, %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfredusum_mask_vs_nxv4f16_nxv4f16_nxv4f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu +; CHECK-NEXT: vfredusum.vs v8, v9, v10, v0.t +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vfredusum.mask.nxv4f16.nxv4f16.nxv4i1( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfredusum.nxv4f16.nxv8f16( + , + , + , + i32); + +define @intrinsic_vfredusum_vs_nxv4f16_nxv8f16_nxv4f16( %0, %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vfredusum_vs_nxv4f16_nxv8f16_nxv4f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu +; CHECK-NEXT: vfredusum.vs v8, v10, v9 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vfredusum.nxv4f16.nxv8f16( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfredusum.mask.nxv4f16.nxv8f16.nxv8i1( + , + , + , + , + i32); + +define @intrinsic_vfredusum_mask_vs_nxv4f16_nxv8f16_nxv4f16( %0, %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfredusum_mask_vs_nxv4f16_nxv8f16_nxv4f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu +; CHECK-NEXT: vfredusum.vs v8, v10, v9, v0.t +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vfredusum.mask.nxv4f16.nxv8f16.nxv8i1( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfredusum.nxv4f16.nxv16f16( + , + , + , + i32); + +define @intrinsic_vfredusum_vs_nxv4f16_nxv16f16_nxv4f16( %0, %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vfredusum_vs_nxv4f16_nxv16f16_nxv4f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu +; CHECK-NEXT: vfredusum.vs v8, v12, v9 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vfredusum.nxv4f16.nxv16f16( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfredusum.mask.nxv4f16.nxv16f16.nxv16i1( + , + , + , + , + i32); + +define @intrinsic_vfredusum_mask_vs_nxv4f16_nxv16f16_nxv4f16( %0, %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfredusum_mask_vs_nxv4f16_nxv16f16_nxv4f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu +; CHECK-NEXT: vfredusum.vs v8, v12, v9, v0.t +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vfredusum.mask.nxv4f16.nxv16f16.nxv16i1( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfredusum.nxv4f16.nxv32f16( + , + , + , + i32); + +define @intrinsic_vfredusum_vs_nxv4f16_nxv32f16_nxv4f16( %0, %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vfredusum_vs_nxv4f16_nxv32f16_nxv4f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16, m8, tu, mu +; CHECK-NEXT: vfredusum.vs v8, v16, v9 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vfredusum.nxv4f16.nxv32f16( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfredusum.mask.nxv4f16.nxv32f16.nxv32i1( + , + , + , + , + i32); + +define @intrinsic_vfredusum_mask_vs_nxv4f16_nxv32f16_nxv4f16( %0, %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfredusum_mask_vs_nxv4f16_nxv32f16_nxv4f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16, m8, tu, mu +; CHECK-NEXT: vfredusum.vs v8, v16, v9, v0.t +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vfredusum.mask.nxv4f16.nxv32f16.nxv32i1( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfredusum.nxv2f32.nxv1f32( + , + , + , + i32); + +define @intrinsic_vfredusum_vs_nxv2f32_nxv1f32_nxv2f32( %0, %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vfredusum_vs_nxv2f32_nxv1f32_nxv2f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu +; CHECK-NEXT: vfredusum.vs v8, v9, v10 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vfredusum.nxv2f32.nxv1f32( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfredusum.mask.nxv2f32.nxv1f32.nxv1i1( + , + , + , + , + i32); + +define @intrinsic_vfredusum_mask_vs_nxv2f32_nxv1f32_nxv2f32( %0, %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfredusum_mask_vs_nxv2f32_nxv1f32_nxv2f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu +; CHECK-NEXT: vfredusum.vs v8, v9, v10, v0.t +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vfredusum.mask.nxv2f32.nxv1f32.nxv1i1( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfredusum.nxv2f32.nxv2f32( + , + , + , + i32); + +define @intrinsic_vfredusum_vs_nxv2f32_nxv2f32_nxv2f32( %0, %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vfredusum_vs_nxv2f32_nxv2f32_nxv2f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu +; CHECK-NEXT: vfredusum.vs v8, v9, v10 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vfredusum.nxv2f32.nxv2f32( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfredusum.mask.nxv2f32.nxv2f32.nxv2i1( + , + , + , + , + i32); + +define @intrinsic_vfredusum_mask_vs_nxv2f32_nxv2f32_nxv2f32( %0, %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfredusum_mask_vs_nxv2f32_nxv2f32_nxv2f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu +; CHECK-NEXT: vfredusum.vs v8, v9, v10, v0.t +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vfredusum.mask.nxv2f32.nxv2f32.nxv2i1( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfredusum.nxv2f32.nxv4f32( + , + , + , + i32); + +define @intrinsic_vfredusum_vs_nxv2f32_nxv4f32_nxv2f32( %0, %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vfredusum_vs_nxv2f32_nxv4f32_nxv2f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu +; CHECK-NEXT: vfredusum.vs v8, v10, v9 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vfredusum.nxv2f32.nxv4f32( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfredusum.mask.nxv2f32.nxv4f32.nxv4i1( + , + , + , + , + i32); + +define @intrinsic_vfredusum_mask_vs_nxv2f32_nxv4f32_nxv2f32( %0, %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfredusum_mask_vs_nxv2f32_nxv4f32_nxv2f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu +; CHECK-NEXT: vfredusum.vs v8, v10, v9, v0.t +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vfredusum.mask.nxv2f32.nxv4f32.nxv4i1( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfredusum.nxv2f32.nxv8f32( + , + , + , + i32); + +define @intrinsic_vfredusum_vs_nxv2f32_nxv8f32_nxv2f32( %0, %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vfredusum_vs_nxv2f32_nxv8f32_nxv2f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu +; CHECK-NEXT: vfredusum.vs v8, v12, v9 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vfredusum.nxv2f32.nxv8f32( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfredusum.mask.nxv2f32.nxv8f32.nxv8i1( + , + , + , + , + i32); + +define @intrinsic_vfredusum_mask_vs_nxv2f32_nxv8f32_nxv2f32( %0, %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfredusum_mask_vs_nxv2f32_nxv8f32_nxv2f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu +; CHECK-NEXT: vfredusum.vs v8, v12, v9, v0.t +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vfredusum.mask.nxv2f32.nxv8f32.nxv8i1( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfredusum.nxv2f32.nxv16f32( + , + , + , + i32); + +define @intrinsic_vfredusum_vs_nxv2f32_nxv16f32_nxv2f32( %0, %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vfredusum_vs_nxv2f32_nxv16f32_nxv2f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e32, m8, tu, mu +; CHECK-NEXT: vfredusum.vs v8, v16, v9 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vfredusum.nxv2f32.nxv16f32( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfredusum.mask.nxv2f32.nxv16f32.nxv16i1( + , + , + , + , + i32); + +define @intrinsic_vfredusum_mask_vs_nxv2f32_nxv16f32_nxv2f32( %0, %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfredusum_mask_vs_nxv2f32_nxv16f32_nxv2f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e32, m8, tu, mu +; CHECK-NEXT: vfredusum.vs v8, v16, v9, v0.t +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vfredusum.mask.nxv2f32.nxv16f32.nxv16i1( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfredusum.nxv1f64.nxv1f64( + , + , + , + i32); + +define @intrinsic_vfredusum_vs_nxv1f64_nxv1f64_nxv1f64( %0, %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vfredusum_vs_nxv1f64_nxv1f64_nxv1f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, mu +; CHECK-NEXT: vfredusum.vs v8, v9, v10 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vfredusum.nxv1f64.nxv1f64( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfredusum.mask.nxv1f64.nxv1f64.nxv1i1( + , + , + , + , + i32); + +define @intrinsic_vfredusum_mask_vs_nxv1f64_nxv1f64_nxv1f64( %0, %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfredusum_mask_vs_nxv1f64_nxv1f64_nxv1f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, mu +; CHECK-NEXT: vfredusum.vs v8, v9, v10, v0.t +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vfredusum.mask.nxv1f64.nxv1f64.nxv1i1( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfredusum.nxv1f64.nxv2f64( + , + , + , + i32); + +define @intrinsic_vfredusum_vs_nxv1f64_nxv2f64_nxv1f64( %0, %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vfredusum_vs_nxv1f64_nxv2f64_nxv1f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, mu +; CHECK-NEXT: vfredusum.vs v8, v10, v9 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vfredusum.nxv1f64.nxv2f64( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfredusum.mask.nxv1f64.nxv2f64.nxv2i1( + , + , + , + , + i32); + +define @intrinsic_vfredusum_mask_vs_nxv1f64_nxv2f64_nxv1f64( %0, %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfredusum_mask_vs_nxv1f64_nxv2f64_nxv1f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, mu +; CHECK-NEXT: vfredusum.vs v8, v10, v9, v0.t +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vfredusum.mask.nxv1f64.nxv2f64.nxv2i1( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfredusum.nxv1f64.nxv4f64( + , + , + , + i32); + +define @intrinsic_vfredusum_vs_nxv1f64_nxv4f64_nxv1f64( %0, %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vfredusum_vs_nxv1f64_nxv4f64_nxv1f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, mu +; CHECK-NEXT: vfredusum.vs v8, v12, v9 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vfredusum.nxv1f64.nxv4f64( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfredusum.mask.nxv1f64.nxv4f64.nxv4i1( + , + , + , + , + i32); + +define @intrinsic_vfredusum_mask_vs_nxv1f64_nxv4f64_nxv1f64( %0, %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfredusum_mask_vs_nxv1f64_nxv4f64_nxv1f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, mu +; CHECK-NEXT: vfredusum.vs v8, v12, v9, v0.t +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vfredusum.mask.nxv1f64.nxv4f64.nxv4i1( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfredusum.nxv1f64.nxv8f64( + , + , + , + i32); + +define @intrinsic_vfredusum_vs_nxv1f64_nxv8f64_nxv1f64( %0, %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vfredusum_vs_nxv1f64_nxv8f64_nxv1f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e64, m8, tu, mu +; CHECK-NEXT: vfredusum.vs v8, v16, v9 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vfredusum.nxv1f64.nxv8f64( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfredusum.mask.nxv1f64.nxv8f64.nxv8i1( + , + , + , + , + i32); + +define @intrinsic_vfredusum_mask_vs_nxv1f64_nxv8f64_nxv1f64( %0, %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfredusum_mask_vs_nxv1f64_nxv8f64_nxv1f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e64, m8, tu, mu +; CHECK-NEXT: vfredusum.vs v8, v16, v9, v0.t +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vfredusum.mask.nxv1f64.nxv8f64.nxv8i1( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vfredusum-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfredusum-rv64.ll new file mode 100644 index 0000000000000..2f0f81533edb6 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vfredusum-rv64.ll @@ -0,0 +1,692 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d,+experimental-zfh -verify-machineinstrs \ +; RUN: < %s | FileCheck %s +declare @llvm.riscv.vfredusum.nxv4f16.nxv1f16( + , + , + , + i64); + +define @intrinsic_vfredusum_vs_nxv4f16_nxv1f16_nxv4f16( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfredusum_vs_nxv4f16_nxv1f16_nxv4f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu +; CHECK-NEXT: vfredusum.vs v8, v9, v10 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vfredusum.nxv4f16.nxv1f16( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfredusum.mask.nxv4f16.nxv1f16( + , + , + , + , + i64); + +define @intrinsic_vfredusum_mask_vs_nxv4f16_nxv1f16_nxv4f16( %0, %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfredusum_mask_vs_nxv4f16_nxv1f16_nxv4f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu +; CHECK-NEXT: vfredusum.vs v8, v9, v10, v0.t +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vfredusum.mask.nxv4f16.nxv1f16( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfredusum.nxv4f16.nxv2f16( + , + , + , + i64); + +define @intrinsic_vfredusum_vs_nxv4f16_nxv2f16_nxv4f16( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfredusum_vs_nxv4f16_nxv2f16_nxv4f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu +; CHECK-NEXT: vfredusum.vs v8, v9, v10 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vfredusum.nxv4f16.nxv2f16( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfredusum.mask.nxv4f16.nxv2f16( + , + , + , + , + i64); + +define @intrinsic_vfredusum_mask_vs_nxv4f16_nxv2f16_nxv4f16( %0, %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfredusum_mask_vs_nxv4f16_nxv2f16_nxv4f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu +; CHECK-NEXT: vfredusum.vs v8, v9, v10, v0.t +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vfredusum.mask.nxv4f16.nxv2f16( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfredusum.nxv4f16.nxv4f16( + , + , + , + i64); + +define @intrinsic_vfredusum_vs_nxv4f16_nxv4f16_nxv4f16( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfredusum_vs_nxv4f16_nxv4f16_nxv4f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu +; CHECK-NEXT: vfredusum.vs v8, v9, v10 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vfredusum.nxv4f16.nxv4f16( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfredusum.mask.nxv4f16.nxv4f16( + , + , + , + , + i64); + +define @intrinsic_vfredusum_mask_vs_nxv4f16_nxv4f16_nxv4f16( %0, %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfredusum_mask_vs_nxv4f16_nxv4f16_nxv4f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu +; CHECK-NEXT: vfredusum.vs v8, v9, v10, v0.t +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vfredusum.mask.nxv4f16.nxv4f16( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfredusum.nxv4f16.nxv8f16( + , + , + , + i64); + +define @intrinsic_vfredusum_vs_nxv4f16_nxv8f16_nxv4f16( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfredusum_vs_nxv4f16_nxv8f16_nxv4f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu +; CHECK-NEXT: vfredusum.vs v8, v10, v9 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vfredusum.nxv4f16.nxv8f16( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfredusum.mask.nxv4f16.nxv8f16( + , + , + , + , + i64); + +define @intrinsic_vfredusum_mask_vs_nxv4f16_nxv8f16_nxv4f16( %0, %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfredusum_mask_vs_nxv4f16_nxv8f16_nxv4f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu +; CHECK-NEXT: vfredusum.vs v8, v10, v9, v0.t +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vfredusum.mask.nxv4f16.nxv8f16( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfredusum.nxv4f16.nxv16f16( + , + , + , + i64); + +define @intrinsic_vfredusum_vs_nxv4f16_nxv16f16_nxv4f16( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfredusum_vs_nxv4f16_nxv16f16_nxv4f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu +; CHECK-NEXT: vfredusum.vs v8, v12, v9 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vfredusum.nxv4f16.nxv16f16( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfredusum.mask.nxv4f16.nxv16f16( + , + , + , + , + i64); + +define @intrinsic_vfredusum_mask_vs_nxv4f16_nxv16f16_nxv4f16( %0, %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfredusum_mask_vs_nxv4f16_nxv16f16_nxv4f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu +; CHECK-NEXT: vfredusum.vs v8, v12, v9, v0.t +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vfredusum.mask.nxv4f16.nxv16f16( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfredusum.nxv4f16.nxv32f16( + , + , + , + i64); + +define @intrinsic_vfredusum_vs_nxv4f16_nxv32f16_nxv4f16( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfredusum_vs_nxv4f16_nxv32f16_nxv4f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16, m8, tu, mu +; CHECK-NEXT: vfredusum.vs v8, v16, v9 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vfredusum.nxv4f16.nxv32f16( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfredusum.mask.nxv4f16.nxv32f16( + , + , + , + , + i64); + +define @intrinsic_vfredusum_mask_vs_nxv4f16_nxv32f16_nxv4f16( %0, %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfredusum_mask_vs_nxv4f16_nxv32f16_nxv4f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16, m8, tu, mu +; CHECK-NEXT: vfredusum.vs v8, v16, v9, v0.t +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vfredusum.mask.nxv4f16.nxv32f16( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfredusum.nxv2f32.nxv1f32( + , + , + , + i64); + +define @intrinsic_vfredusum_vs_nxv2f32_nxv1f32_nxv2f32( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfredusum_vs_nxv2f32_nxv1f32_nxv2f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu +; CHECK-NEXT: vfredusum.vs v8, v9, v10 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vfredusum.nxv2f32.nxv1f32( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfredusum.mask.nxv2f32.nxv1f32( + , + , + , + , + i64); + +define @intrinsic_vfredusum_mask_vs_nxv2f32_nxv1f32_nxv2f32( %0, %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfredusum_mask_vs_nxv2f32_nxv1f32_nxv2f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu +; CHECK-NEXT: vfredusum.vs v8, v9, v10, v0.t +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vfredusum.mask.nxv2f32.nxv1f32( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfredusum.nxv2f32.nxv2f32( + , + , + , + i64); + +define @intrinsic_vfredusum_vs_nxv2f32_nxv2f32_nxv2f32( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfredusum_vs_nxv2f32_nxv2f32_nxv2f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu +; CHECK-NEXT: vfredusum.vs v8, v9, v10 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vfredusum.nxv2f32.nxv2f32( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfredusum.mask.nxv2f32.nxv2f32( + , + , + , + , + i64); + +define @intrinsic_vfredusum_mask_vs_nxv2f32_nxv2f32_nxv2f32( %0, %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfredusum_mask_vs_nxv2f32_nxv2f32_nxv2f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu +; CHECK-NEXT: vfredusum.vs v8, v9, v10, v0.t +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vfredusum.mask.nxv2f32.nxv2f32( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfredusum.nxv2f32.nxv4f32( + , + , + , + i64); + +define @intrinsic_vfredusum_vs_nxv2f32_nxv4f32_nxv2f32( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfredusum_vs_nxv2f32_nxv4f32_nxv2f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu +; CHECK-NEXT: vfredusum.vs v8, v10, v9 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vfredusum.nxv2f32.nxv4f32( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfredusum.mask.nxv2f32.nxv4f32( + , + , + , + , + i64); + +define @intrinsic_vfredusum_mask_vs_nxv2f32_nxv4f32_nxv2f32( %0, %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfredusum_mask_vs_nxv2f32_nxv4f32_nxv2f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu +; CHECK-NEXT: vfredusum.vs v8, v10, v9, v0.t +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vfredusum.mask.nxv2f32.nxv4f32( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfredusum.nxv2f32.nxv8f32( + , + , + , + i64); + +define @intrinsic_vfredusum_vs_nxv2f32_nxv8f32_nxv2f32( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfredusum_vs_nxv2f32_nxv8f32_nxv2f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu +; CHECK-NEXT: vfredusum.vs v8, v12, v9 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vfredusum.nxv2f32.nxv8f32( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfredusum.mask.nxv2f32.nxv8f32( + , + , + , + , + i64); + +define @intrinsic_vfredusum_mask_vs_nxv2f32_nxv8f32_nxv2f32( %0, %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfredusum_mask_vs_nxv2f32_nxv8f32_nxv2f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu +; CHECK-NEXT: vfredusum.vs v8, v12, v9, v0.t +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vfredusum.mask.nxv2f32.nxv8f32( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfredusum.nxv2f32.nxv16f32( + , + , + , + i64); + +define @intrinsic_vfredusum_vs_nxv2f32_nxv16f32_nxv2f32( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfredusum_vs_nxv2f32_nxv16f32_nxv2f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e32, m8, tu, mu +; CHECK-NEXT: vfredusum.vs v8, v16, v9 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vfredusum.nxv2f32.nxv16f32( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfredusum.mask.nxv2f32.nxv16f32( + , + , + , + , + i64); + +define @intrinsic_vfredusum_mask_vs_nxv2f32_nxv16f32_nxv2f32( %0, %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfredusum_mask_vs_nxv2f32_nxv16f32_nxv2f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e32, m8, tu, mu +; CHECK-NEXT: vfredusum.vs v8, v16, v9, v0.t +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vfredusum.mask.nxv2f32.nxv16f32( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfredusum.nxv1f64.nxv1f64( + , + , + , + i64); + +define @intrinsic_vfredusum_vs_nxv1f64_nxv1f64_nxv1f64( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfredusum_vs_nxv1f64_nxv1f64_nxv1f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, mu +; CHECK-NEXT: vfredusum.vs v8, v9, v10 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vfredusum.nxv1f64.nxv1f64( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfredusum.mask.nxv1f64.nxv1f64( + , + , + , + , + i64); + +define @intrinsic_vfredusum_mask_vs_nxv1f64_nxv1f64_nxv1f64( %0, %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfredusum_mask_vs_nxv1f64_nxv1f64_nxv1f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, mu +; CHECK-NEXT: vfredusum.vs v8, v9, v10, v0.t +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vfredusum.mask.nxv1f64.nxv1f64( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfredusum.nxv1f64.nxv2f64( + , + , + , + i64); + +define @intrinsic_vfredusum_vs_nxv1f64_nxv2f64_nxv1f64( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfredusum_vs_nxv1f64_nxv2f64_nxv1f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, mu +; CHECK-NEXT: vfredusum.vs v8, v10, v9 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vfredusum.nxv1f64.nxv2f64( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfredusum.mask.nxv1f64.nxv2f64( + , + , + , + , + i64); + +define @intrinsic_vfredusum_mask_vs_nxv1f64_nxv2f64_nxv1f64( %0, %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfredusum_mask_vs_nxv1f64_nxv2f64_nxv1f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, mu +; CHECK-NEXT: vfredusum.vs v8, v10, v9, v0.t +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vfredusum.mask.nxv1f64.nxv2f64( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfredusum.nxv1f64.nxv4f64( + , + , + , + i64); + +define @intrinsic_vfredusum_vs_nxv1f64_nxv4f64_nxv1f64( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfredusum_vs_nxv1f64_nxv4f64_nxv1f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, mu +; CHECK-NEXT: vfredusum.vs v8, v12, v9 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vfredusum.nxv1f64.nxv4f64( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfredusum.mask.nxv1f64.nxv4f64( + , + , + , + , + i64); + +define @intrinsic_vfredusum_mask_vs_nxv1f64_nxv4f64_nxv1f64( %0, %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfredusum_mask_vs_nxv1f64_nxv4f64_nxv1f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, mu +; CHECK-NEXT: vfredusum.vs v8, v12, v9, v0.t +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vfredusum.mask.nxv1f64.nxv4f64( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfredusum.nxv1f64.nxv8f64( + , + , + , + i64); + +define @intrinsic_vfredusum_vs_nxv1f64_nxv8f64_nxv1f64( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfredusum_vs_nxv1f64_nxv8f64_nxv1f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e64, m8, tu, mu +; CHECK-NEXT: vfredusum.vs v8, v16, v9 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vfredusum.nxv1f64.nxv8f64( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfredusum.mask.nxv1f64.nxv8f64( + , + , + , + , + i64); + +define @intrinsic_vfredusum_mask_vs_nxv1f64_nxv8f64_nxv1f64( %0, %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfredusum_mask_vs_nxv1f64_nxv8f64_nxv1f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e64, m8, tu, mu +; CHECK-NEXT: vfredusum.vs v8, v16, v9, v0.t +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vfredusum.mask.nxv1f64.nxv8f64( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwredusum-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfwredusum-rv32.ll new file mode 100644 index 0000000000000..25064a75cc795 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vfwredusum-rv32.ll @@ -0,0 +1,508 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+d,+experimental-zfh -verify-machineinstrs \ +; RUN: < %s | FileCheck %s +declare @llvm.riscv.vfwredusum.nxv2f32.nxv1f16( + , + , + , + i32); + +define @intrinsic_vfwredusum_vs_nxv2f32_nxv1f16_nxv2f32( %0, %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vfwredusum_vs_nxv2f32_nxv1f16_nxv2f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu +; CHECK-NEXT: vfwredusum.vs v8, v9, v10 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vfwredusum.nxv2f32.nxv1f16( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfwredusum.mask.nxv2f32.nxv1f16.nxv2f32( + , + , + , + , + i32); + +define @intrinsic_vfwredusum_mask_vs_nxv2f32_nxv1f16_nxv2f32( %0, %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfwredusum_mask_vs_nxv2f32_nxv1f16_nxv2f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu +; CHECK-NEXT: vfwredusum.vs v8, v9, v10, v0.t +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vfwredusum.mask.nxv2f32.nxv1f16.nxv2f32( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfwredusum.nxv2f32.nxv2f16( + , + , + , + i32); + +define @intrinsic_vfwredusum_vs_nxv2f32_nxv2f16_nxv2f32( %0, %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vfwredusum_vs_nxv2f32_nxv2f16_nxv2f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu +; CHECK-NEXT: vfwredusum.vs v8, v9, v10 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vfwredusum.nxv2f32.nxv2f16( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfwredusum.mask.nxv2f32.nxv2f16.nxv2f32( + , + , + , + , + i32); + +define @intrinsic_vfwredusum_mask_vs_nxv2f32_nxv2f16_nxv2f32( %0, %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfwredusum_mask_vs_nxv2f32_nxv2f16_nxv2f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu +; CHECK-NEXT: vfwredusum.vs v8, v9, v10, v0.t +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vfwredusum.mask.nxv2f32.nxv2f16.nxv2f32( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfwredusum.nxv2f32.nxv4f16( + , + , + , + i32); + +define @intrinsic_vfwredusum_vs_nxv2f32_nxv4f16_nxv2f32( %0, %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vfwredusum_vs_nxv2f32_nxv4f16_nxv2f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu +; CHECK-NEXT: vfwredusum.vs v8, v9, v10 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vfwredusum.nxv2f32.nxv4f16( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfwredusum.mask.nxv2f32.nxv4f16.nxv2f32( + , + , + , + , + i32); + +define @intrinsic_vfwredusum_mask_vs_nxv2f32_nxv4f16_nxv2f32( %0, %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfwredusum_mask_vs_nxv2f32_nxv4f16_nxv2f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu +; CHECK-NEXT: vfwredusum.vs v8, v9, v10, v0.t +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vfwredusum.mask.nxv2f32.nxv4f16.nxv2f32( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfwredusum.nxv2f32.nxv8f16( + , + , + , + i32); + +define @intrinsic_vfwredusum_vs_nxv2f32_nxv8f16_nxv2f32( %0, %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vfwredusum_vs_nxv2f32_nxv8f16_nxv2f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu +; CHECK-NEXT: vfwredusum.vs v8, v10, v9 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vfwredusum.nxv2f32.nxv8f16( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfwredusum.mask.nxv2f32.nxv8f16.nxv2f32( + , + , + , + , + i32); + +define @intrinsic_vfwredusum_mask_vs_nxv2f32_nxv8f16_nxv2f32( %0, %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfwredusum_mask_vs_nxv2f32_nxv8f16_nxv2f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu +; CHECK-NEXT: vfwredusum.vs v8, v10, v9, v0.t +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vfwredusum.mask.nxv2f32.nxv8f16.nxv2f32( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfwredusum.nxv2f32.nxv16f16( + , + , + , + i32); + +define @intrinsic_vfwredusum_vs_nxv2f32_nxv16f16_nxv2f32( %0, %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vfwredusum_vs_nxv2f32_nxv16f16_nxv2f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu +; CHECK-NEXT: vfwredusum.vs v8, v12, v9 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vfwredusum.nxv2f32.nxv16f16( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfwredusum.mask.nxv2f32.nxv16f16.nxv2f32( + , + , + , + , + i32); + +define @intrinsic_vfwredusum_mask_vs_nxv2f32_nxv16f16_nxv2f32( %0, %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfwredusum_mask_vs_nxv2f32_nxv16f16_nxv2f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu +; CHECK-NEXT: vfwredusum.vs v8, v12, v9, v0.t +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vfwredusum.mask.nxv2f32.nxv16f16.nxv2f32( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfwredusum.nxv2f32.nxv32f16( + , + , + , + i32); + +define @intrinsic_vfwredusum_vs_nxv2f32_nxv32f16_nxv2f32( %0, %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vfwredusum_vs_nxv2f32_nxv32f16_nxv2f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16, m8, tu, mu +; CHECK-NEXT: vfwredusum.vs v8, v16, v9 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vfwredusum.nxv2f32.nxv32f16( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfwredusum.mask.nxv2f32.nxv32f16( + , + , + , + , + i32); + +define @intrinsic_vfwredusum_mask_vs_nxv2f32_nxv32f16_nxv2f32( %0, %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfwredusum_mask_vs_nxv2f32_nxv32f16_nxv2f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16, m8, tu, mu +; CHECK-NEXT: vfwredusum.vs v8, v16, v9, v0.t +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vfwredusum.mask.nxv2f32.nxv32f16( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfwredusum.nxv1f64.nxv1f32( + , + , + , + i32); + +define @intrinsic_vfwredusum_vs_nxv1f64_nxv1f32_nxv1f64( %0, %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vfwredusum_vs_nxv1f64_nxv1f32_nxv1f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu +; CHECK-NEXT: vfwredusum.vs v8, v9, v10 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vfwredusum.nxv1f64.nxv1f32( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfwredusum.mask.nxv1f64.nxv1f32.nxv1f64( + , + , + , + , + i32); + +define @intrinsic_vfwredusum_mask_vs_nxv1f64_nxv1f32_nxv1f64( %0, %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfwredusum_mask_vs_nxv1f64_nxv1f32_nxv1f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu +; CHECK-NEXT: vfwredusum.vs v8, v9, v10, v0.t +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vfwredusum.mask.nxv1f64.nxv1f32.nxv1f64( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfwredusum.nxv1f64.nxv2f32( + , + , + , + i32); + +define @intrinsic_vfwredusum_vs_nxv1f64_nxv2f32_nxv1f64( %0, %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vfwredusum_vs_nxv1f64_nxv2f32_nxv1f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu +; CHECK-NEXT: vfwredusum.vs v8, v9, v10 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vfwredusum.nxv1f64.nxv2f32( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfwredusum.mask.nxv1f64.nxv2f32.nxv1f64( + , + , + , + , + i32); + +define @intrinsic_vfwredusum_mask_vs_nxv1f64_nxv2f32_nxv1f64( %0, %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfwredusum_mask_vs_nxv1f64_nxv2f32_nxv1f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu +; CHECK-NEXT: vfwredusum.vs v8, v9, v10, v0.t +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vfwredusum.mask.nxv1f64.nxv2f32.nxv1f64( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfwredusum.nxv1f64.nxv4f32( + , + , + , + i32); + +define @intrinsic_vfwredusum_vs_nxv1f64_nxv4f32_nxv1f64( %0, %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vfwredusum_vs_nxv1f64_nxv4f32_nxv1f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu +; CHECK-NEXT: vfwredusum.vs v8, v10, v9 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vfwredusum.nxv1f64.nxv4f32( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfwredusum.mask.nxv1f64.nxv4f32.nxv1f64( + , + , + , + , + i32); + +define @intrinsic_vfwredusum_mask_vs_nxv1f64_nxv4f32_nxv1f64( %0, %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfwredusum_mask_vs_nxv1f64_nxv4f32_nxv1f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu +; CHECK-NEXT: vfwredusum.vs v8, v10, v9, v0.t +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vfwredusum.mask.nxv1f64.nxv4f32.nxv1f64( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfwredusum.nxv1f64.nxv8f32( + , + , + , + i32); + +define @intrinsic_vfwredusum_vs_nxv1f64_nxv8f32_nxv1f64( %0, %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vfwredusum_vs_nxv1f64_nxv8f32_nxv1f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu +; CHECK-NEXT: vfwredusum.vs v8, v12, v9 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vfwredusum.nxv1f64.nxv8f32( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfwredusum.mask.nxv1f64.nxv8f32.nxv1f64( + , + , + , + , + i32); + +define @intrinsic_vfwredusum_mask_vs_nxv1f64_nxv8f32_nxv1f64( %0, %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfwredusum_mask_vs_nxv1f64_nxv8f32_nxv1f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu +; CHECK-NEXT: vfwredusum.vs v8, v12, v9, v0.t +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vfwredusum.mask.nxv1f64.nxv8f32.nxv1f64( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vfwredusum.nxv1f64.nxv16f32( + , + , + , + i32); + +define @intrinsic_vfwredusum_vs_nxv1f64_nxv16f32_nxv1f64( %0, %1, %2, i32 %3) nounwind { +; CHECK-LABEL: intrinsic_vfwredusum_vs_nxv1f64_nxv16f32_nxv1f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e32, m8, tu, mu +; CHECK-NEXT: vfwredusum.vs v8, v16, v9 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vfwredusum.nxv1f64.nxv16f32( + %0, + %1, + %2, + i32 %3) + + ret %a +} + +declare @llvm.riscv.vfwredusum.mask.nxv1f64.nxv16f32.nxv1f64( + , + , + , + , + i32); + +define @intrinsic_vfwredusum_mask_vs_nxv1f64_nxv16f32_nxv1f64( %0, %1, %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vfwredusum_mask_vs_nxv1f64_nxv16f32_nxv1f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e32, m8, tu, mu +; CHECK-NEXT: vfwredusum.vs v8, v16, v9, v0.t +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vfwredusum.mask.nxv1f64.nxv16f32.nxv1f64( + %0, + %1, + %2, + %3, + i32 %4) + + ret %a +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwredusum-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfwredusum-rv64.ll new file mode 100644 index 0000000000000..d8fabd4906b28 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vfwredusum-rv64.ll @@ -0,0 +1,508 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d,+experimental-zfh -verify-machineinstrs \ +; RUN: < %s | FileCheck %s +declare @llvm.riscv.vfwredusum.nxv2f32.nxv1f16( + , + , + , + i64); + +define @intrinsic_vfwredusum_vs_nxv2f32_nxv1f16_nxv2f32( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfwredusum_vs_nxv2f32_nxv1f16_nxv2f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu +; CHECK-NEXT: vfwredusum.vs v8, v9, v10 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vfwredusum.nxv2f32.nxv1f16( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfwredusum.mask.nxv2f32.nxv1f16.nxv2f32( + , + , + , + , + i64); + +define @intrinsic_vfwredusum_mask_vs_nxv2f32_nxv1f16_nxv2f32( %0, %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfwredusum_mask_vs_nxv2f32_nxv1f16_nxv2f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu +; CHECK-NEXT: vfwredusum.vs v8, v9, v10, v0.t +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vfwredusum.mask.nxv2f32.nxv1f16.nxv2f32( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfwredusum.nxv2f32.nxv2f16( + , + , + , + i64); + +define @intrinsic_vfwredusum_vs_nxv2f32_nxv2f16_nxv2f32( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfwredusum_vs_nxv2f32_nxv2f16_nxv2f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu +; CHECK-NEXT: vfwredusum.vs v8, v9, v10 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vfwredusum.nxv2f32.nxv2f16( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfwredusum.mask.nxv2f32.nxv2f16.nxv2f32( + , + , + , + , + i64); + +define @intrinsic_vfwredusum_mask_vs_nxv2f32_nxv2f16_nxv2f32( %0, %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfwredusum_mask_vs_nxv2f32_nxv2f16_nxv2f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu +; CHECK-NEXT: vfwredusum.vs v8, v9, v10, v0.t +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vfwredusum.mask.nxv2f32.nxv2f16.nxv2f32( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfwredusum.nxv2f32.nxv4f16( + , + , + , + i64); + +define @intrinsic_vfwredusum_vs_nxv2f32_nxv4f16_nxv2f32( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfwredusum_vs_nxv2f32_nxv4f16_nxv2f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu +; CHECK-NEXT: vfwredusum.vs v8, v9, v10 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vfwredusum.nxv2f32.nxv4f16( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfwredusum.mask.nxv2f32.nxv4f16.nxv2f32( + , + , + , + , + i64); + +define @intrinsic_vfwredusum_mask_vs_nxv2f32_nxv4f16_nxv2f32( %0, %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfwredusum_mask_vs_nxv2f32_nxv4f16_nxv2f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu +; CHECK-NEXT: vfwredusum.vs v8, v9, v10, v0.t +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vfwredusum.mask.nxv2f32.nxv4f16.nxv2f32( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfwredusum.nxv2f32.nxv8f16( + , + , + , + i64); + +define @intrinsic_vfwredusum_vs_nxv2f32_nxv8f16_nxv2f32( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfwredusum_vs_nxv2f32_nxv8f16_nxv2f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu +; CHECK-NEXT: vfwredusum.vs v8, v10, v9 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vfwredusum.nxv2f32.nxv8f16( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfwredusum.mask.nxv2f32.nxv8f16.nxv2f32( + , + , + , + , + i64); + +define @intrinsic_vfwredusum_mask_vs_nxv2f32_nxv8f16_nxv2f32( %0, %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfwredusum_mask_vs_nxv2f32_nxv8f16_nxv2f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu +; CHECK-NEXT: vfwredusum.vs v8, v10, v9, v0.t +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vfwredusum.mask.nxv2f32.nxv8f16.nxv2f32( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfwredusum.nxv2f32.nxv16f16( + , + , + , + i64); + +define @intrinsic_vfwredusum_vs_nxv2f32_nxv16f16_nxv2f32( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfwredusum_vs_nxv2f32_nxv16f16_nxv2f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu +; CHECK-NEXT: vfwredusum.vs v8, v12, v9 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vfwredusum.nxv2f32.nxv16f16( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfwredusum.mask.nxv2f32.nxv16f16.nxv2f32( + , + , + , + , + i64); + +define @intrinsic_vfwredusum_mask_vs_nxv2f32_nxv16f16_nxv2f32( %0, %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfwredusum_mask_vs_nxv2f32_nxv16f16_nxv2f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu +; CHECK-NEXT: vfwredusum.vs v8, v12, v9, v0.t +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vfwredusum.mask.nxv2f32.nxv16f16.nxv2f32( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfwredusum.nxv2f32.nxv32f16( + , + , + , + i64); + +define @intrinsic_vfwredusum_vs_nxv2f32_nxv32f16_nxv2f32( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfwredusum_vs_nxv2f32_nxv32f16_nxv2f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16, m8, tu, mu +; CHECK-NEXT: vfwredusum.vs v8, v16, v9 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vfwredusum.nxv2f32.nxv32f16( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfwredusum.mask.nxv2f32.nxv32f16( + , + , + , + , + i64); + +define @intrinsic_vfwredusum_mask_vs_nxv2f32_nxv32f16_nxv2f32( %0, %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfwredusum_mask_vs_nxv2f32_nxv32f16_nxv2f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16, m8, tu, mu +; CHECK-NEXT: vfwredusum.vs v8, v16, v9, v0.t +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vfwredusum.mask.nxv2f32.nxv32f16( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfwredusum.nxv1f64.nxv1f32( + , + , + , + i64); + +define @intrinsic_vfwredusum_vs_nxv1f64_nxv1f32_nxv1f64( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfwredusum_vs_nxv1f64_nxv1f32_nxv1f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu +; CHECK-NEXT: vfwredusum.vs v8, v9, v10 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vfwredusum.nxv1f64.nxv1f32( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfwredusum.mask.nxv1f64.nxv1f32.nxv1f64( + , + , + , + , + i64); + +define @intrinsic_vfwredusum_mask_vs_nxv1f64_nxv1f32_nxv1f64( %0, %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfwredusum_mask_vs_nxv1f64_nxv1f32_nxv1f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu +; CHECK-NEXT: vfwredusum.vs v8, v9, v10, v0.t +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vfwredusum.mask.nxv1f64.nxv1f32.nxv1f64( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfwredusum.nxv1f64.nxv2f32( + , + , + , + i64); + +define @intrinsic_vfwredusum_vs_nxv1f64_nxv2f32_nxv1f64( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfwredusum_vs_nxv1f64_nxv2f32_nxv1f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu +; CHECK-NEXT: vfwredusum.vs v8, v9, v10 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vfwredusum.nxv1f64.nxv2f32( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfwredusum.mask.nxv1f64.nxv2f32.nxv1f64( + , + , + , + , + i64); + +define @intrinsic_vfwredusum_mask_vs_nxv1f64_nxv2f32_nxv1f64( %0, %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfwredusum_mask_vs_nxv1f64_nxv2f32_nxv1f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu +; CHECK-NEXT: vfwredusum.vs v8, v9, v10, v0.t +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vfwredusum.mask.nxv1f64.nxv2f32.nxv1f64( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfwredusum.nxv1f64.nxv4f32( + , + , + , + i64); + +define @intrinsic_vfwredusum_vs_nxv1f64_nxv4f32_nxv1f64( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfwredusum_vs_nxv1f64_nxv4f32_nxv1f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu +; CHECK-NEXT: vfwredusum.vs v8, v10, v9 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vfwredusum.nxv1f64.nxv4f32( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfwredusum.mask.nxv1f64.nxv4f32.nxv1f64( + , + , + , + , + i64); + +define @intrinsic_vfwredusum_mask_vs_nxv1f64_nxv4f32_nxv1f64( %0, %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfwredusum_mask_vs_nxv1f64_nxv4f32_nxv1f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu +; CHECK-NEXT: vfwredusum.vs v8, v10, v9, v0.t +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vfwredusum.mask.nxv1f64.nxv4f32.nxv1f64( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfwredusum.nxv1f64.nxv8f32( + , + , + , + i64); + +define @intrinsic_vfwredusum_vs_nxv1f64_nxv8f32_nxv1f64( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfwredusum_vs_nxv1f64_nxv8f32_nxv1f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu +; CHECK-NEXT: vfwredusum.vs v8, v12, v9 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vfwredusum.nxv1f64.nxv8f32( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfwredusum.mask.nxv1f64.nxv8f32.nxv1f64( + , + , + , + , + i64); + +define @intrinsic_vfwredusum_mask_vs_nxv1f64_nxv8f32_nxv1f64( %0, %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfwredusum_mask_vs_nxv1f64_nxv8f32_nxv1f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu +; CHECK-NEXT: vfwredusum.vs v8, v12, v9, v0.t +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vfwredusum.mask.nxv1f64.nxv8f32.nxv1f64( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} + +declare @llvm.riscv.vfwredusum.nxv1f64.nxv16f32( + , + , + , + i64); + +define @intrinsic_vfwredusum_vs_nxv1f64_nxv16f32_nxv1f64( %0, %1, %2, i64 %3) nounwind { +; CHECK-LABEL: intrinsic_vfwredusum_vs_nxv1f64_nxv16f32_nxv1f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e32, m8, tu, mu +; CHECK-NEXT: vfwredusum.vs v8, v16, v9 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vfwredusum.nxv1f64.nxv16f32( + %0, + %1, + %2, + i64 %3) + + ret %a +} + +declare @llvm.riscv.vfwredusum.mask.nxv1f64.nxv16f32( + , + , + , + , + i64); + +define @intrinsic_vfwredusum_mask_vs_nxv1f64_nxv16f32_nxv1f64( %0, %1, %2, %3, i64 %4) nounwind { +; CHECK-LABEL: intrinsic_vfwredusum_mask_vs_nxv1f64_nxv16f32_nxv1f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e32, m8, tu, mu +; CHECK-NEXT: vfwredusum.vs v8, v16, v9, v0.t +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vfwredusum.mask.nxv1f64.nxv16f32( + %0, + %1, + %2, + %3, + i64 %4) + + ret %a +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vmandn-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vmandn-rv32.ll new file mode 100644 index 0000000000000..8743551c89cd6 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vmandn-rv32.ll @@ -0,0 +1,142 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+f -verify-machineinstrs \ +; RUN: < %s | FileCheck %s +declare @llvm.riscv.vmandn.nxv1i1( + , + , + i32); + +define @intrinsic_vmandn_mm_nxv1i1( %0, %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vmandn_mm_nxv1i1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu +; CHECK-NEXT: vmandn.mm v0, v0, v8 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vmandn.nxv1i1( + %0, + %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmandn.nxv2i1( + , + , + i32); + +define @intrinsic_vmandn_mm_nxv2i1( %0, %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vmandn_mm_nxv2i1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu +; CHECK-NEXT: vmandn.mm v0, v0, v8 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vmandn.nxv2i1( + %0, + %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmandn.nxv4i1( + , + , + i32); + +define @intrinsic_vmandn_mm_nxv4i1( %0, %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vmandn_mm_nxv4i1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu +; CHECK-NEXT: vmandn.mm v0, v0, v8 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vmandn.nxv4i1( + %0, + %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmandn.nxv8i1( + , + , + i32); + +define @intrinsic_vmandn_mm_nxv8i1( %0, %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vmandn_mm_nxv8i1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu +; CHECK-NEXT: vmandn.mm v0, v0, v8 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vmandn.nxv8i1( + %0, + %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmandn.nxv16i1( + , + , + i32); + +define @intrinsic_vmandn_mm_nxv16i1( %0, %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vmandn_mm_nxv16i1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu +; CHECK-NEXT: vmandn.mm v0, v0, v8 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vmandn.nxv16i1( + %0, + %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmandn.nxv32i1( + , + , + i32); + +define @intrinsic_vmandn_mm_nxv32i1( %0, %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vmandn_mm_nxv32i1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu +; CHECK-NEXT: vmandn.mm v0, v0, v8 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vmandn.nxv32i1( + %0, + %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmandn.nxv64i1( + , + , + i32); + +define @intrinsic_vmandn_mm_nxv64i1( %0, %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vmandn_mm_nxv64i1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, mu +; CHECK-NEXT: vmandn.mm v0, v0, v8 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vmandn.nxv64i1( + %0, + %1, + i32 %2) + + ret %a +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vmandn-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vmandn-rv64.ll new file mode 100644 index 0000000000000..3cbd68f5e39ba --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vmandn-rv64.ll @@ -0,0 +1,142 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d -verify-machineinstrs \ +; RUN: < %s | FileCheck %s +declare @llvm.riscv.vmandn.nxv1i1( + , + , + i64); + +define @intrinsic_vmandn_mm_nxv1i1( %0, %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vmandn_mm_nxv1i1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu +; CHECK-NEXT: vmandn.mm v0, v0, v8 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vmandn.nxv1i1( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmandn.nxv2i1( + , + , + i64); + +define @intrinsic_vmandn_mm_nxv2i1( %0, %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vmandn_mm_nxv2i1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu +; CHECK-NEXT: vmandn.mm v0, v0, v8 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vmandn.nxv2i1( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmandn.nxv4i1( + , + , + i64); + +define @intrinsic_vmandn_mm_nxv4i1( %0, %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vmandn_mm_nxv4i1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu +; CHECK-NEXT: vmandn.mm v0, v0, v8 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vmandn.nxv4i1( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmandn.nxv8i1( + , + , + i64); + +define @intrinsic_vmandn_mm_nxv8i1( %0, %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vmandn_mm_nxv8i1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu +; CHECK-NEXT: vmandn.mm v0, v0, v8 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vmandn.nxv8i1( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmandn.nxv16i1( + , + , + i64); + +define @intrinsic_vmandn_mm_nxv16i1( %0, %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vmandn_mm_nxv16i1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu +; CHECK-NEXT: vmandn.mm v0, v0, v8 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vmandn.nxv16i1( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmandn.nxv32i1( + , + , + i64); + +define @intrinsic_vmandn_mm_nxv32i1( %0, %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vmandn_mm_nxv32i1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu +; CHECK-NEXT: vmandn.mm v0, v0, v8 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vmandn.nxv32i1( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmandn.nxv64i1( + , + , + i64); + +define @intrinsic_vmandn_mm_nxv64i1( %0, %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vmandn_mm_nxv64i1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, mu +; CHECK-NEXT: vmandn.mm v0, v0, v8 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vmandn.nxv64i1( + %0, + %1, + i64 %2) + + ret %a +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vmandnot-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vmandnot-rv32.ll deleted file mode 100644 index d917b5b0ec9af..0000000000000 --- a/llvm/test/CodeGen/RISCV/rvv/vmandnot-rv32.ll +++ /dev/null @@ -1,142 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+f -verify-machineinstrs \ -; RUN: < %s | FileCheck %s -declare @llvm.riscv.vmandnot.nxv1i1( - , - , - i32); - -define @intrinsic_vmandnot_mm_nxv1i1( %0, %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vmandnot_mm_nxv1i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu -; CHECK-NEXT: vmandnot.mm v0, v0, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vmandnot.nxv1i1( - %0, - %1, - i32 %2) - - ret %a -} - -declare @llvm.riscv.vmandnot.nxv2i1( - , - , - i32); - -define @intrinsic_vmandnot_mm_nxv2i1( %0, %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vmandnot_mm_nxv2i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu -; CHECK-NEXT: vmandnot.mm v0, v0, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vmandnot.nxv2i1( - %0, - %1, - i32 %2) - - ret %a -} - -declare @llvm.riscv.vmandnot.nxv4i1( - , - , - i32); - -define @intrinsic_vmandnot_mm_nxv4i1( %0, %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vmandnot_mm_nxv4i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu -; CHECK-NEXT: vmandnot.mm v0, v0, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vmandnot.nxv4i1( - %0, - %1, - i32 %2) - - ret %a -} - -declare @llvm.riscv.vmandnot.nxv8i1( - , - , - i32); - -define @intrinsic_vmandnot_mm_nxv8i1( %0, %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vmandnot_mm_nxv8i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu -; CHECK-NEXT: vmandnot.mm v0, v0, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vmandnot.nxv8i1( - %0, - %1, - i32 %2) - - ret %a -} - -declare @llvm.riscv.vmandnot.nxv16i1( - , - , - i32); - -define @intrinsic_vmandnot_mm_nxv16i1( %0, %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vmandnot_mm_nxv16i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu -; CHECK-NEXT: vmandnot.mm v0, v0, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vmandnot.nxv16i1( - %0, - %1, - i32 %2) - - ret %a -} - -declare @llvm.riscv.vmandnot.nxv32i1( - , - , - i32); - -define @intrinsic_vmandnot_mm_nxv32i1( %0, %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vmandnot_mm_nxv32i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu -; CHECK-NEXT: vmandnot.mm v0, v0, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vmandnot.nxv32i1( - %0, - %1, - i32 %2) - - ret %a -} - -declare @llvm.riscv.vmandnot.nxv64i1( - , - , - i32); - -define @intrinsic_vmandnot_mm_nxv64i1( %0, %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vmandnot_mm_nxv64i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, mu -; CHECK-NEXT: vmandnot.mm v0, v0, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vmandnot.nxv64i1( - %0, - %1, - i32 %2) - - ret %a -} diff --git a/llvm/test/CodeGen/RISCV/rvv/vmandnot-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vmandnot-rv64.ll deleted file mode 100644 index 93dcf6aed8e57..0000000000000 --- a/llvm/test/CodeGen/RISCV/rvv/vmandnot-rv64.ll +++ /dev/null @@ -1,142 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d -verify-machineinstrs \ -; RUN: < %s | FileCheck %s -declare @llvm.riscv.vmandnot.nxv1i1( - , - , - i64); - -define @intrinsic_vmandnot_mm_nxv1i1( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vmandnot_mm_nxv1i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu -; CHECK-NEXT: vmandnot.mm v0, v0, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vmandnot.nxv1i1( - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vmandnot.nxv2i1( - , - , - i64); - -define @intrinsic_vmandnot_mm_nxv2i1( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vmandnot_mm_nxv2i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu -; CHECK-NEXT: vmandnot.mm v0, v0, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vmandnot.nxv2i1( - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vmandnot.nxv4i1( - , - , - i64); - -define @intrinsic_vmandnot_mm_nxv4i1( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vmandnot_mm_nxv4i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu -; CHECK-NEXT: vmandnot.mm v0, v0, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vmandnot.nxv4i1( - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vmandnot.nxv8i1( - , - , - i64); - -define @intrinsic_vmandnot_mm_nxv8i1( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vmandnot_mm_nxv8i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu -; CHECK-NEXT: vmandnot.mm v0, v0, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vmandnot.nxv8i1( - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vmandnot.nxv16i1( - , - , - i64); - -define @intrinsic_vmandnot_mm_nxv16i1( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vmandnot_mm_nxv16i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu -; CHECK-NEXT: vmandnot.mm v0, v0, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vmandnot.nxv16i1( - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vmandnot.nxv32i1( - , - , - i64); - -define @intrinsic_vmandnot_mm_nxv32i1( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vmandnot_mm_nxv32i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu -; CHECK-NEXT: vmandnot.mm v0, v0, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vmandnot.nxv32i1( - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vmandnot.nxv64i1( - , - , - i64); - -define @intrinsic_vmandnot_mm_nxv64i1( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vmandnot_mm_nxv64i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, mu -; CHECK-NEXT: vmandnot.mm v0, v0, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vmandnot.nxv64i1( - %0, - %1, - i64 %2) - - ret %a -} diff --git a/llvm/test/CodeGen/RISCV/rvv/vmarith-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vmarith-sdnode.ll index 3bf5961c08429..bc7699e5a902c 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmarith-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmarith-sdnode.ll @@ -347,11 +347,11 @@ define @vmxnor_vv_nxv16i1( %va, %not } -define @vmandnot_vv_nxv1i1( %va, %vb) { -; CHECK-LABEL: vmandnot_vv_nxv1i1: +define @vmandn_vv_nxv1i1( %va, %vb) { +; CHECK-LABEL: vmandn_vv_nxv1i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, mu -; CHECK-NEXT: vmandnot.mm v0, v0, v8 +; CHECK-NEXT: vmandn.mm v0, v0, v8 ; CHECK-NEXT: ret %head = insertelement undef, i1 1, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -360,11 +360,11 @@ define @vmandnot_vv_nxv1i1( %va, %vc } -define @vmandnot_vv_nxv2i1( %va, %vb) { -; CHECK-LABEL: vmandnot_vv_nxv2i1: +define @vmandn_vv_nxv2i1( %va, %vb) { +; CHECK-LABEL: vmandn_vv_nxv2i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, mu -; CHECK-NEXT: vmandnot.mm v0, v0, v8 +; CHECK-NEXT: vmandn.mm v0, v0, v8 ; CHECK-NEXT: ret %head = insertelement undef, i1 1, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -373,11 +373,11 @@ define @vmandnot_vv_nxv2i1( %va, %vc } -define @vmandnot_vv_nxv4i1( %va, %vb) { -; CHECK-LABEL: vmandnot_vv_nxv4i1: +define @vmandn_vv_nxv4i1( %va, %vb) { +; CHECK-LABEL: vmandn_vv_nxv4i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, mu -; CHECK-NEXT: vmandnot.mm v0, v0, v8 +; CHECK-NEXT: vmandn.mm v0, v0, v8 ; CHECK-NEXT: ret %head = insertelement undef, i1 1, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -386,11 +386,11 @@ define @vmandnot_vv_nxv4i1( %va, %vc } -define @vmandnot_vv_nxv8i1( %va, %vb) { -; CHECK-LABEL: vmandnot_vv_nxv8i1: +define @vmandn_vv_nxv8i1( %va, %vb) { +; CHECK-LABEL: vmandn_vv_nxv8i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, mu -; CHECK-NEXT: vmandnot.mm v0, v0, v8 +; CHECK-NEXT: vmandn.mm v0, v0, v8 ; CHECK-NEXT: ret %head = insertelement undef, i1 1, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -399,11 +399,11 @@ define @vmandnot_vv_nxv8i1( %va, %vc } -define @vmandnot_vv_nxv16i1( %va, %vb) { -; CHECK-LABEL: vmandnot_vv_nxv16i1: +define @vmandn_vv_nxv16i1( %va, %vb) { +; CHECK-LABEL: vmandn_vv_nxv16i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e8, m2, ta, mu -; CHECK-NEXT: vmandnot.mm v0, v0, v8 +; CHECK-NEXT: vmandn.mm v0, v0, v8 ; CHECK-NEXT: ret %head = insertelement undef, i1 1, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -412,11 +412,11 @@ define @vmandnot_vv_nxv16i1( %va, %vc } -define @vmornot_vv_nxv1i1( %va, %vb) { -; CHECK-LABEL: vmornot_vv_nxv1i1: +define @vmorn_vv_nxv1i1( %va, %vb) { +; CHECK-LABEL: vmorn_vv_nxv1i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, mu -; CHECK-NEXT: vmornot.mm v0, v0, v8 +; CHECK-NEXT: vmorn.mm v0, v0, v8 ; CHECK-NEXT: ret %head = insertelement undef, i1 1, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -425,11 +425,11 @@ define @vmornot_vv_nxv1i1( %va, %vc } -define @vmornot_vv_nxv2i1( %va, %vb) { -; CHECK-LABEL: vmornot_vv_nxv2i1: +define @vmorn_vv_nxv2i1( %va, %vb) { +; CHECK-LABEL: vmorn_vv_nxv2i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, mu -; CHECK-NEXT: vmornot.mm v0, v0, v8 +; CHECK-NEXT: vmorn.mm v0, v0, v8 ; CHECK-NEXT: ret %head = insertelement undef, i1 1, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -438,11 +438,11 @@ define @vmornot_vv_nxv2i1( %va, %vc } -define @vmornot_vv_nxv4i1( %va, %vb) { -; CHECK-LABEL: vmornot_vv_nxv4i1: +define @vmorn_vv_nxv4i1( %va, %vb) { +; CHECK-LABEL: vmorn_vv_nxv4i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, mu -; CHECK-NEXT: vmornot.mm v0, v0, v8 +; CHECK-NEXT: vmorn.mm v0, v0, v8 ; CHECK-NEXT: ret %head = insertelement undef, i1 1, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -451,11 +451,11 @@ define @vmornot_vv_nxv4i1( %va, %vc } -define @vmornot_vv_nxv8i1( %va, %vb) { -; CHECK-LABEL: vmornot_vv_nxv8i1: +define @vmorn_vv_nxv8i1( %va, %vb) { +; CHECK-LABEL: vmorn_vv_nxv8i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, mu -; CHECK-NEXT: vmornot.mm v0, v0, v8 +; CHECK-NEXT: vmorn.mm v0, v0, v8 ; CHECK-NEXT: ret %head = insertelement undef, i1 1, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -464,11 +464,11 @@ define @vmornot_vv_nxv8i1( %va, %vc } -define @vmornot_vv_nxv16i1( %va, %vb) { -; CHECK-LABEL: vmornot_vv_nxv16i1: +define @vmorn_vv_nxv16i1( %va, %vb) { +; CHECK-LABEL: vmorn_vv_nxv16i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e8, m2, ta, mu -; CHECK-NEXT: vmornot.mm v0, v0, v8 +; CHECK-NEXT: vmorn.mm v0, v0, v8 ; CHECK-NEXT: ret %head = insertelement undef, i1 1, i32 0 %splat = shufflevector %head, undef, zeroinitializer diff --git a/llvm/test/CodeGen/RISCV/rvv/vmorn-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vmorn-rv32.ll new file mode 100644 index 0000000000000..15fc0c3c33706 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vmorn-rv32.ll @@ -0,0 +1,142 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+f -verify-machineinstrs \ +; RUN: < %s | FileCheck %s +declare @llvm.riscv.vmorn.nxv1i1( + , + , + i32); + +define @intrinsic_vmorn_mm_nxv1i1( %0, %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vmorn_mm_nxv1i1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu +; CHECK-NEXT: vmorn.mm v0, v0, v8 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vmorn.nxv1i1( + %0, + %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmorn.nxv2i1( + , + , + i32); + +define @intrinsic_vmorn_mm_nxv2i1( %0, %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vmorn_mm_nxv2i1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu +; CHECK-NEXT: vmorn.mm v0, v0, v8 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vmorn.nxv2i1( + %0, + %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmorn.nxv4i1( + , + , + i32); + +define @intrinsic_vmorn_mm_nxv4i1( %0, %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vmorn_mm_nxv4i1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu +; CHECK-NEXT: vmorn.mm v0, v0, v8 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vmorn.nxv4i1( + %0, + %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmorn.nxv8i1( + , + , + i32); + +define @intrinsic_vmorn_mm_nxv8i1( %0, %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vmorn_mm_nxv8i1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu +; CHECK-NEXT: vmorn.mm v0, v0, v8 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vmorn.nxv8i1( + %0, + %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmorn.nxv16i1( + , + , + i32); + +define @intrinsic_vmorn_mm_nxv16i1( %0, %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vmorn_mm_nxv16i1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu +; CHECK-NEXT: vmorn.mm v0, v0, v8 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vmorn.nxv16i1( + %0, + %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmorn.nxv32i1( + , + , + i32); + +define @intrinsic_vmorn_mm_nxv32i1( %0, %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vmorn_mm_nxv32i1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu +; CHECK-NEXT: vmorn.mm v0, v0, v8 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vmorn.nxv32i1( + %0, + %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vmorn.nxv64i1( + , + , + i32); + +define @intrinsic_vmorn_mm_nxv64i1( %0, %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vmorn_mm_nxv64i1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, mu +; CHECK-NEXT: vmorn.mm v0, v0, v8 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vmorn.nxv64i1( + %0, + %1, + i32 %2) + + ret %a +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vmorn-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vmorn-rv64.ll new file mode 100644 index 0000000000000..d0358ead0012f --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vmorn-rv64.ll @@ -0,0 +1,142 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d -verify-machineinstrs \ +; RUN: < %s | FileCheck %s +declare @llvm.riscv.vmorn.nxv1i1( + , + , + i64); + +define @intrinsic_vmorn_mm_nxv1i1( %0, %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vmorn_mm_nxv1i1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu +; CHECK-NEXT: vmorn.mm v0, v0, v8 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vmorn.nxv1i1( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmorn.nxv2i1( + , + , + i64); + +define @intrinsic_vmorn_mm_nxv2i1( %0, %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vmorn_mm_nxv2i1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu +; CHECK-NEXT: vmorn.mm v0, v0, v8 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vmorn.nxv2i1( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmorn.nxv4i1( + , + , + i64); + +define @intrinsic_vmorn_mm_nxv4i1( %0, %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vmorn_mm_nxv4i1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu +; CHECK-NEXT: vmorn.mm v0, v0, v8 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vmorn.nxv4i1( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmorn.nxv8i1( + , + , + i64); + +define @intrinsic_vmorn_mm_nxv8i1( %0, %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vmorn_mm_nxv8i1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu +; CHECK-NEXT: vmorn.mm v0, v0, v8 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vmorn.nxv8i1( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmorn.nxv16i1( + , + , + i64); + +define @intrinsic_vmorn_mm_nxv16i1( %0, %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vmorn_mm_nxv16i1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu +; CHECK-NEXT: vmorn.mm v0, v0, v8 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vmorn.nxv16i1( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmorn.nxv32i1( + , + , + i64); + +define @intrinsic_vmorn_mm_nxv32i1( %0, %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vmorn_mm_nxv32i1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu +; CHECK-NEXT: vmorn.mm v0, v0, v8 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vmorn.nxv32i1( + %0, + %1, + i64 %2) + + ret %a +} + +declare @llvm.riscv.vmorn.nxv64i1( + , + , + i64); + +define @intrinsic_vmorn_mm_nxv64i1( %0, %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vmorn_mm_nxv64i1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, mu +; CHECK-NEXT: vmorn.mm v0, v0, v8 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vmorn.nxv64i1( + %0, + %1, + i64 %2) + + ret %a +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vmornot-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vmornot-rv32.ll deleted file mode 100644 index 484f1fadf7a76..0000000000000 --- a/llvm/test/CodeGen/RISCV/rvv/vmornot-rv32.ll +++ /dev/null @@ -1,142 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+f -verify-machineinstrs \ -; RUN: < %s | FileCheck %s -declare @llvm.riscv.vmornot.nxv1i1( - , - , - i32); - -define @intrinsic_vmornot_mm_nxv1i1( %0, %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vmornot_mm_nxv1i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu -; CHECK-NEXT: vmornot.mm v0, v0, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vmornot.nxv1i1( - %0, - %1, - i32 %2) - - ret %a -} - -declare @llvm.riscv.vmornot.nxv2i1( - , - , - i32); - -define @intrinsic_vmornot_mm_nxv2i1( %0, %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vmornot_mm_nxv2i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu -; CHECK-NEXT: vmornot.mm v0, v0, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vmornot.nxv2i1( - %0, - %1, - i32 %2) - - ret %a -} - -declare @llvm.riscv.vmornot.nxv4i1( - , - , - i32); - -define @intrinsic_vmornot_mm_nxv4i1( %0, %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vmornot_mm_nxv4i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu -; CHECK-NEXT: vmornot.mm v0, v0, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vmornot.nxv4i1( - %0, - %1, - i32 %2) - - ret %a -} - -declare @llvm.riscv.vmornot.nxv8i1( - , - , - i32); - -define @intrinsic_vmornot_mm_nxv8i1( %0, %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vmornot_mm_nxv8i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu -; CHECK-NEXT: vmornot.mm v0, v0, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vmornot.nxv8i1( - %0, - %1, - i32 %2) - - ret %a -} - -declare @llvm.riscv.vmornot.nxv16i1( - , - , - i32); - -define @intrinsic_vmornot_mm_nxv16i1( %0, %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vmornot_mm_nxv16i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu -; CHECK-NEXT: vmornot.mm v0, v0, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vmornot.nxv16i1( - %0, - %1, - i32 %2) - - ret %a -} - -declare @llvm.riscv.vmornot.nxv32i1( - , - , - i32); - -define @intrinsic_vmornot_mm_nxv32i1( %0, %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vmornot_mm_nxv32i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu -; CHECK-NEXT: vmornot.mm v0, v0, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vmornot.nxv32i1( - %0, - %1, - i32 %2) - - ret %a -} - -declare @llvm.riscv.vmornot.nxv64i1( - , - , - i32); - -define @intrinsic_vmornot_mm_nxv64i1( %0, %1, i32 %2) nounwind { -; CHECK-LABEL: intrinsic_vmornot_mm_nxv64i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, mu -; CHECK-NEXT: vmornot.mm v0, v0, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vmornot.nxv64i1( - %0, - %1, - i32 %2) - - ret %a -} diff --git a/llvm/test/CodeGen/RISCV/rvv/vmornot-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vmornot-rv64.ll deleted file mode 100644 index f4cf403c0730a..0000000000000 --- a/llvm/test/CodeGen/RISCV/rvv/vmornot-rv64.ll +++ /dev/null @@ -1,142 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d -verify-machineinstrs \ -; RUN: < %s | FileCheck %s -declare @llvm.riscv.vmornot.nxv1i1( - , - , - i64); - -define @intrinsic_vmornot_mm_nxv1i1( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vmornot_mm_nxv1i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu -; CHECK-NEXT: vmornot.mm v0, v0, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vmornot.nxv1i1( - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vmornot.nxv2i1( - , - , - i64); - -define @intrinsic_vmornot_mm_nxv2i1( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vmornot_mm_nxv2i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu -; CHECK-NEXT: vmornot.mm v0, v0, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vmornot.nxv2i1( - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vmornot.nxv4i1( - , - , - i64); - -define @intrinsic_vmornot_mm_nxv4i1( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vmornot_mm_nxv4i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu -; CHECK-NEXT: vmornot.mm v0, v0, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vmornot.nxv4i1( - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vmornot.nxv8i1( - , - , - i64); - -define @intrinsic_vmornot_mm_nxv8i1( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vmornot_mm_nxv8i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu -; CHECK-NEXT: vmornot.mm v0, v0, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vmornot.nxv8i1( - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vmornot.nxv16i1( - , - , - i64); - -define @intrinsic_vmornot_mm_nxv16i1( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vmornot_mm_nxv16i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu -; CHECK-NEXT: vmornot.mm v0, v0, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vmornot.nxv16i1( - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vmornot.nxv32i1( - , - , - i64); - -define @intrinsic_vmornot_mm_nxv32i1( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vmornot_mm_nxv32i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu -; CHECK-NEXT: vmornot.mm v0, v0, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vmornot.nxv32i1( - %0, - %1, - i64 %2) - - ret %a -} - -declare @llvm.riscv.vmornot.nxv64i1( - , - , - i64); - -define @intrinsic_vmornot_mm_nxv64i1( %0, %1, i64 %2) nounwind { -; CHECK-LABEL: intrinsic_vmornot_mm_nxv64i1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, mu -; CHECK-NEXT: vmornot.mm v0, v0, v8 -; CHECK-NEXT: ret -entry: - %a = call @llvm.riscv.vmornot.nxv64i1( - %0, - %1, - i64 %2) - - ret %a -} diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsge-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vmsge-rv32.ll index 79f6f417d46d7..29a2aaf6f192d 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmsge-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmsge-rv32.ll @@ -2470,7 +2470,7 @@ define @intrinsic_vmsge_maskedoff_mask_vx_nxv1i8_i8( @llvm.riscv.vmsge.mask.nxv1i8.i8( @@ -2488,7 +2488,7 @@ define @intrinsic_vmsge_maskedoff_mask_vx_nxv2i8_i8( @llvm.riscv.vmsge.mask.nxv2i8.i8( @@ -2506,7 +2506,7 @@ define @intrinsic_vmsge_maskedoff_mask_vx_nxv4i8_i8( @llvm.riscv.vmsge.mask.nxv4i8.i8( @@ -2524,7 +2524,7 @@ define @intrinsic_vmsge_maskedoff_mask_vx_nxv8i8_i8( @llvm.riscv.vmsge.mask.nxv8i8.i8( @@ -2542,7 +2542,7 @@ define @intrinsic_vmsge_maskedoff_mask_vx_nxv16i8_i8( @llvm.riscv.vmsge.mask.nxv16i8.i8( @@ -2560,7 +2560,7 @@ define @intrinsic_vmsge_maskedoff_mask_vx_nxv32i8_i8( @llvm.riscv.vmsge.mask.nxv32i8.i8( @@ -2578,7 +2578,7 @@ define @intrinsic_vmsge_maskedoff_mask_vx_nxv1i16_i16( @llvm.riscv.vmsge.mask.nxv1i16.i16( @@ -2596,7 +2596,7 @@ define @intrinsic_vmsge_maskedoff_mask_vx_nxv2i16_i16( @llvm.riscv.vmsge.mask.nxv2i16.i16( @@ -2614,7 +2614,7 @@ define @intrinsic_vmsge_maskedoff_mask_vx_nxv4i16_i16( @llvm.riscv.vmsge.mask.nxv4i16.i16( @@ -2632,7 +2632,7 @@ define @intrinsic_vmsge_maskedoff_mask_vx_nxv8i16_i16( @llvm.riscv.vmsge.mask.nxv8i16.i16( @@ -2650,7 +2650,7 @@ define @intrinsic_vmsge_maskedoff_mask_vx_nxv16i16_i16( @llvm.riscv.vmsge.mask.nxv16i16.i16( @@ -2668,7 +2668,7 @@ define @intrinsic_vmsge_maskedoff_mask_vx_nxv1i32_i32( @llvm.riscv.vmsge.mask.nxv1i32.i32( @@ -2686,7 +2686,7 @@ define @intrinsic_vmsge_maskedoff_mask_vx_nxv2i32_i32( @llvm.riscv.vmsge.mask.nxv2i32.i32( @@ -2704,7 +2704,7 @@ define @intrinsic_vmsge_maskedoff_mask_vx_nxv4i32_i32( @llvm.riscv.vmsge.mask.nxv4i32.i32( @@ -2722,7 +2722,7 @@ define @intrinsic_vmsge_maskedoff_mask_vx_nxv8i32_i32( @llvm.riscv.vmsge.mask.nxv8i32.i32( diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsge-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vmsge-rv64.ll index c690b9f7f275f..5062948a6c839 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmsge-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmsge-rv64.ll @@ -2437,7 +2437,7 @@ define @intrinsic_vmsge_maskedoff_mask_vx_nxv1i8_i8( @llvm.riscv.vmsge.mask.nxv1i8.i8( @@ -2455,7 +2455,7 @@ define @intrinsic_vmsge_maskedoff_mask_vx_nxv2i8_i8( @llvm.riscv.vmsge.mask.nxv2i8.i8( @@ -2473,7 +2473,7 @@ define @intrinsic_vmsge_maskedoff_mask_vx_nxv4i8_i8( @llvm.riscv.vmsge.mask.nxv4i8.i8( @@ -2491,7 +2491,7 @@ define @intrinsic_vmsge_maskedoff_mask_vx_nxv8i8_i8( @llvm.riscv.vmsge.mask.nxv8i8.i8( @@ -2509,7 +2509,7 @@ define @intrinsic_vmsge_maskedoff_mask_vx_nxv16i8_i8( @llvm.riscv.vmsge.mask.nxv16i8.i8( @@ -2527,7 +2527,7 @@ define @intrinsic_vmsge_maskedoff_mask_vx_nxv32i8_i8( @llvm.riscv.vmsge.mask.nxv32i8.i8( @@ -2545,7 +2545,7 @@ define @intrinsic_vmsge_maskedoff_mask_vx_nxv1i16_i16( @llvm.riscv.vmsge.mask.nxv1i16.i16( @@ -2563,7 +2563,7 @@ define @intrinsic_vmsge_maskedoff_mask_vx_nxv2i16_i16( @llvm.riscv.vmsge.mask.nxv2i16.i16( @@ -2581,7 +2581,7 @@ define @intrinsic_vmsge_maskedoff_mask_vx_nxv4i16_i16( @llvm.riscv.vmsge.mask.nxv4i16.i16( @@ -2599,7 +2599,7 @@ define @intrinsic_vmsge_maskedoff_mask_vx_nxv8i16_i16( @llvm.riscv.vmsge.mask.nxv8i16.i16( @@ -2617,7 +2617,7 @@ define @intrinsic_vmsge_maskedoff_mask_vx_nxv16i16_i16( @llvm.riscv.vmsge.mask.nxv16i16.i16( @@ -2635,7 +2635,7 @@ define @intrinsic_vmsge_maskedoff_mask_vx_nxv1i32_i32( @llvm.riscv.vmsge.mask.nxv1i32.i32( @@ -2653,7 +2653,7 @@ define @intrinsic_vmsge_maskedoff_mask_vx_nxv2i32_i32( @llvm.riscv.vmsge.mask.nxv2i32.i32( @@ -2671,7 +2671,7 @@ define @intrinsic_vmsge_maskedoff_mask_vx_nxv4i32_i32( @llvm.riscv.vmsge.mask.nxv4i32.i32( @@ -2689,7 +2689,7 @@ define @intrinsic_vmsge_maskedoff_mask_vx_nxv8i32_i32( @llvm.riscv.vmsge.mask.nxv8i32.i32( @@ -2707,7 +2707,7 @@ define @intrinsic_vmsge_maskedoff_mask_vx_nxv1i64_i64( @llvm.riscv.vmsge.mask.nxv1i64.i64( @@ -2725,7 +2725,7 @@ define @intrinsic_vmsge_maskedoff_mask_vx_nxv2i64_i64( @llvm.riscv.vmsge.mask.nxv2i64.i64( @@ -2743,7 +2743,7 @@ define @intrinsic_vmsge_maskedoff_mask_vx_nxv4i64_i64( @llvm.riscv.vmsge.mask.nxv4i64.i64( diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsgeu-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vmsgeu-rv32.ll index 2773db93d51e1..86dfdf58fa596 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmsgeu-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmsgeu-rv32.ll @@ -2470,7 +2470,7 @@ define @intrinsic_vmsgeu_maskedoff_mask_vx_nxv1i8_i8( @llvm.riscv.vmsgeu.mask.nxv1i8.i8( @@ -2488,7 +2488,7 @@ define @intrinsic_vmsgeu_maskedoff_mask_vx_nxv2i8_i8( @llvm.riscv.vmsgeu.mask.nxv2i8.i8( @@ -2506,7 +2506,7 @@ define @intrinsic_vmsgeu_maskedoff_mask_vx_nxv4i8_i8( @llvm.riscv.vmsgeu.mask.nxv4i8.i8( @@ -2524,7 +2524,7 @@ define @intrinsic_vmsgeu_maskedoff_mask_vx_nxv8i8_i8( @llvm.riscv.vmsgeu.mask.nxv8i8.i8( @@ -2542,7 +2542,7 @@ define @intrinsic_vmsgeu_maskedoff_mask_vx_nxv16i8_i8( @llvm.riscv.vmsgeu.mask.nxv16i8.i8( @@ -2560,7 +2560,7 @@ define @intrinsic_vmsgeu_maskedoff_mask_vx_nxv32i8_i8( @llvm.riscv.vmsgeu.mask.nxv32i8.i8( @@ -2578,7 +2578,7 @@ define @intrinsic_vmsgeu_maskedoff_mask_vx_nxv1i16_i16( @llvm.riscv.vmsgeu.mask.nxv1i16.i16( @@ -2596,7 +2596,7 @@ define @intrinsic_vmsgeu_maskedoff_mask_vx_nxv2i16_i16( @llvm.riscv.vmsgeu.mask.nxv2i16.i16( @@ -2614,7 +2614,7 @@ define @intrinsic_vmsgeu_maskedoff_mask_vx_nxv4i16_i16( @llvm.riscv.vmsgeu.mask.nxv4i16.i16( @@ -2632,7 +2632,7 @@ define @intrinsic_vmsgeu_maskedoff_mask_vx_nxv8i16_i16( @llvm.riscv.vmsgeu.mask.nxv8i16.i16( @@ -2650,7 +2650,7 @@ define @intrinsic_vmsgeu_maskedoff_mask_vx_nxv16i16_i16( @llvm.riscv.vmsgeu.mask.nxv16i16.i16( @@ -2668,7 +2668,7 @@ define @intrinsic_vmsgeu_maskedoff_mask_vx_nxv1i32_i32( @llvm.riscv.vmsgeu.mask.nxv1i32.i32( @@ -2686,7 +2686,7 @@ define @intrinsic_vmsgeu_maskedoff_mask_vx_nxv2i32_i32( @llvm.riscv.vmsgeu.mask.nxv2i32.i32( @@ -2704,7 +2704,7 @@ define @intrinsic_vmsgeu_maskedoff_mask_vx_nxv4i32_i32( @llvm.riscv.vmsgeu.mask.nxv4i32.i32( @@ -2722,7 +2722,7 @@ define @intrinsic_vmsgeu_maskedoff_mask_vx_nxv8i32_i32( @llvm.riscv.vmsgeu.mask.nxv8i32.i32( diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsgeu-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vmsgeu-rv64.ll index e7f6af07d94df..ce56f7a9eb393 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmsgeu-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmsgeu-rv64.ll @@ -2437,7 +2437,7 @@ define @intrinsic_vmsgeu_maskedoff_mask_vx_nxv1i8_i8( @llvm.riscv.vmsgeu.mask.nxv1i8.i8( @@ -2455,7 +2455,7 @@ define @intrinsic_vmsgeu_maskedoff_mask_vx_nxv2i8_i8( @llvm.riscv.vmsgeu.mask.nxv2i8.i8( @@ -2473,7 +2473,7 @@ define @intrinsic_vmsgeu_maskedoff_mask_vx_nxv4i8_i8( @llvm.riscv.vmsgeu.mask.nxv4i8.i8( @@ -2491,7 +2491,7 @@ define @intrinsic_vmsgeu_maskedoff_mask_vx_nxv8i8_i8( @llvm.riscv.vmsgeu.mask.nxv8i8.i8( @@ -2509,7 +2509,7 @@ define @intrinsic_vmsgeu_maskedoff_mask_vx_nxv16i8_i8( @llvm.riscv.vmsgeu.mask.nxv16i8.i8( @@ -2527,7 +2527,7 @@ define @intrinsic_vmsgeu_maskedoff_mask_vx_nxv32i8_i8( @llvm.riscv.vmsgeu.mask.nxv32i8.i8( @@ -2545,7 +2545,7 @@ define @intrinsic_vmsgeu_maskedoff_mask_vx_nxv1i16_i16( @llvm.riscv.vmsgeu.mask.nxv1i16.i16( @@ -2563,7 +2563,7 @@ define @intrinsic_vmsgeu_maskedoff_mask_vx_nxv2i16_i16( @llvm.riscv.vmsgeu.mask.nxv2i16.i16( @@ -2581,7 +2581,7 @@ define @intrinsic_vmsgeu_maskedoff_mask_vx_nxv4i16_i16( @llvm.riscv.vmsgeu.mask.nxv4i16.i16( @@ -2599,7 +2599,7 @@ define @intrinsic_vmsgeu_maskedoff_mask_vx_nxv8i16_i16( @llvm.riscv.vmsgeu.mask.nxv8i16.i16( @@ -2617,7 +2617,7 @@ define @intrinsic_vmsgeu_maskedoff_mask_vx_nxv16i16_i16( @llvm.riscv.vmsgeu.mask.nxv16i16.i16( @@ -2635,7 +2635,7 @@ define @intrinsic_vmsgeu_maskedoff_mask_vx_nxv1i32_i32( @llvm.riscv.vmsgeu.mask.nxv1i32.i32( @@ -2653,7 +2653,7 @@ define @intrinsic_vmsgeu_maskedoff_mask_vx_nxv2i32_i32( @llvm.riscv.vmsgeu.mask.nxv2i32.i32( @@ -2671,7 +2671,7 @@ define @intrinsic_vmsgeu_maskedoff_mask_vx_nxv4i32_i32( @llvm.riscv.vmsgeu.mask.nxv4i32.i32( @@ -2689,7 +2689,7 @@ define @intrinsic_vmsgeu_maskedoff_mask_vx_nxv8i32_i32( @llvm.riscv.vmsgeu.mask.nxv8i32.i32( @@ -2707,7 +2707,7 @@ define @intrinsic_vmsgeu_maskedoff_mask_vx_nxv1i64_i64( @llvm.riscv.vmsgeu.mask.nxv1i64.i64( @@ -2725,7 +2725,7 @@ define @intrinsic_vmsgeu_maskedoff_mask_vx_nxv2i64_i64( @llvm.riscv.vmsgeu.mask.nxv2i64.i64( @@ -2743,7 +2743,7 @@ define @intrinsic_vmsgeu_maskedoff_mask_vx_nxv4i64_i64( @llvm.riscv.vmsgeu.mask.nxv4i64.i64( diff --git a/llvm/test/CodeGen/RISCV/rvv/vmulh-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vmulh-sdnode.ll index 340694828c416..19d5141d78105 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmulh-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmulh-sdnode.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK +; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 +; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 ; Test that the prepareSREMEqFold optimization doesn't crash on scalable ; vector types. @@ -24,3 +24,327 @@ define @srem_eq_fold_nxv4i8( %va) { %cc = icmp eq %rem, zeroinitializer ret %cc } + +define @vmulh_vv_nxv1i32( %va, %vb) { +; CHECK-LABEL: vmulh_vv_nxv1i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, mu +; CHECK-NEXT: vmulh.vv v8, v9, v8 +; CHECK-NEXT: ret + %vc = sext %vb to + %vd = sext %va to + %ve = mul %vc, %vd + %head = insertelement undef, i64 32, i32 0 + %splat = shufflevector %head, undef, zeroinitializer + %vf = lshr %ve, %splat + %vg = trunc %vf to + ret %vg +} + +define @vmulh_vx_nxv1i32( %va, i32 %x) { +; CHECK-LABEL: vmulh_vx_nxv1i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e32, mf2, ta, mu +; CHECK-NEXT: vmulh.vx v8, v8, a0 +; CHECK-NEXT: ret + %head1 = insertelement undef, i32 %x, i32 0 + %splat1 = shufflevector %head1, undef, zeroinitializer + %vb = sext %splat1 to + %vc = sext %va to + %vd = mul %vb, %vc + %head2 = insertelement undef, i64 32, i32 0 + %splat2 = shufflevector %head2, undef, zeroinitializer + %ve = lshr %vd, %splat2 + %vf = trunc %ve to + ret %vf +} + +define @vmulh_vi_nxv1i32_0( %va) { +; RV32-LABEL: vmulh_vi_nxv1i32_0: +; RV32: # %bb.0: +; RV32-NEXT: addi a0, zero, -7 +; RV32-NEXT: vsetvli a1, zero, e32, mf2, ta, mu +; RV32-NEXT: vmulh.vx v8, v8, a0 +; RV32-NEXT: ret +; +; RV64-LABEL: vmulh_vi_nxv1i32_0: +; RV64: # %bb.0: +; RV64-NEXT: addi a0, zero, 1 +; RV64-NEXT: slli a0, a0, 32 +; RV64-NEXT: addi a0, a0, -7 +; RV64-NEXT: vsetvli a1, zero, e32, mf2, ta, mu +; RV64-NEXT: vmulh.vx v8, v8, a0 +; RV64-NEXT: ret + %head1 = insertelement undef, i32 -7, i32 0 + %splat1 = shufflevector %head1, undef, zeroinitializer + %vb = sext %splat1 to + %vc = sext %va to + %vd = mul %vb, %vc + %head2 = insertelement undef, i64 32, i32 0 + %splat2 = shufflevector %head2, undef, zeroinitializer + %ve = lshr %vd, %splat2 + %vf = trunc %ve to + ret %vf +} + +define @vmulh_vi_nxv1i32_1( %va) { +; CHECK-LABEL: vmulh_vi_nxv1i32_1: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a0, zero, 16 +; CHECK-NEXT: vsetvli a1, zero, e32, mf2, ta, mu +; CHECK-NEXT: vmulh.vx v8, v8, a0 +; CHECK-NEXT: ret + %head1 = insertelement undef, i32 16, i32 0 + %splat1 = shufflevector %head1, undef, zeroinitializer + %vb = sext %splat1 to + %vc = sext %va to + %vd = mul %vb, %vc + %head2 = insertelement undef, i64 32, i32 0 + %splat2 = shufflevector %head2, undef, zeroinitializer + %ve = lshr %vd, %splat2 + %vf = trunc %ve to + ret %vf +} + +define @vmulh_vv_nxv2i32( %va, %vb) { +; CHECK-LABEL: vmulh_vv_nxv2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, mu +; CHECK-NEXT: vmulh.vv v8, v9, v8 +; CHECK-NEXT: ret + %vc = sext %vb to + %vd = sext %va to + %ve = mul %vc, %vd + %head = insertelement undef, i64 32, i32 0 + %splat = shufflevector %head, undef, zeroinitializer + %vf = lshr %ve, %splat + %vg = trunc %vf to + ret %vg +} + +define @vmulh_vx_nxv2i32( %va, i32 %x) { +; CHECK-LABEL: vmulh_vx_nxv2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, mu +; CHECK-NEXT: vmulh.vx v8, v8, a0 +; CHECK-NEXT: ret + %head1 = insertelement undef, i32 %x, i32 0 + %splat1 = shufflevector %head1, undef, zeroinitializer + %vb = sext %splat1 to + %vc = sext %va to + %vd = mul %vb, %vc + %head2 = insertelement undef, i64 32, i32 0 + %splat2 = shufflevector %head2, undef, zeroinitializer + %ve = lshr %vd, %splat2 + %vf = trunc %ve to + ret %vf +} + +define @vmulh_vi_nxv2i32_0( %va) { +; RV32-LABEL: vmulh_vi_nxv2i32_0: +; RV32: # %bb.0: +; RV32-NEXT: addi a0, zero, -7 +; RV32-NEXT: vsetvli a1, zero, e32, m1, ta, mu +; RV32-NEXT: vmulh.vx v8, v8, a0 +; RV32-NEXT: ret +; +; RV64-LABEL: vmulh_vi_nxv2i32_0: +; RV64: # %bb.0: +; RV64-NEXT: addi a0, zero, 1 +; RV64-NEXT: slli a0, a0, 32 +; RV64-NEXT: addi a0, a0, -7 +; RV64-NEXT: vsetvli a1, zero, e32, m1, ta, mu +; RV64-NEXT: vmulh.vx v8, v8, a0 +; RV64-NEXT: ret + %head1 = insertelement undef, i32 -7, i32 0 + %splat1 = shufflevector %head1, undef, zeroinitializer + %vb = sext %splat1 to + %vc = sext %va to + %vd = mul %vb, %vc + %head2 = insertelement undef, i64 32, i32 0 + %splat2 = shufflevector %head2, undef, zeroinitializer + %ve = lshr %vd, %splat2 + %vf = trunc %ve to + ret %vf +} + +define @vmulh_vi_nxv2i32_1( %va) { +; CHECK-LABEL: vmulh_vi_nxv2i32_1: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a0, zero, 16 +; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, mu +; CHECK-NEXT: vmulh.vx v8, v8, a0 +; CHECK-NEXT: ret + %head1 = insertelement undef, i32 16, i32 0 + %splat1 = shufflevector %head1, undef, zeroinitializer + %vb = sext %splat1 to + %vc = sext %va to + %vd = mul %vb, %vc + %head2 = insertelement undef, i64 32, i32 0 + %splat2 = shufflevector %head2, undef, zeroinitializer + %ve = lshr %vd, %splat2 + %vf = trunc %ve to + ret %vf +} + +define @vmulh_vv_nxv4i32( %va, %vb) { +; CHECK-LABEL: vmulh_vv_nxv4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, mu +; CHECK-NEXT: vmulh.vv v8, v10, v8 +; CHECK-NEXT: ret + %vc = sext %vb to + %vd = sext %va to + %ve = mul %vc, %vd + %head = insertelement undef, i64 32, i32 0 + %splat = shufflevector %head, undef, zeroinitializer + %vf = lshr %ve, %splat + %vg = trunc %vf to + ret %vg +} + +define @vmulh_vx_nxv4i32( %va, i32 %x) { +; CHECK-LABEL: vmulh_vx_nxv4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e32, m2, ta, mu +; CHECK-NEXT: vmulh.vx v8, v8, a0 +; CHECK-NEXT: ret + %head1 = insertelement undef, i32 %x, i32 0 + %splat1 = shufflevector %head1, undef, zeroinitializer + %vb = sext %splat1 to + %vc = sext %va to + %vd = mul %vb, %vc + %head2 = insertelement undef, i64 32, i32 0 + %splat2 = shufflevector %head2, undef, zeroinitializer + %ve = lshr %vd, %splat2 + %vf = trunc %ve to + ret %vf +} + +define @vmulh_vi_nxv4i32_0( %va) { +; RV32-LABEL: vmulh_vi_nxv4i32_0: +; RV32: # %bb.0: +; RV32-NEXT: addi a0, zero, -7 +; RV32-NEXT: vsetvli a1, zero, e32, m2, ta, mu +; RV32-NEXT: vmulh.vx v8, v8, a0 +; RV32-NEXT: ret +; +; RV64-LABEL: vmulh_vi_nxv4i32_0: +; RV64: # %bb.0: +; RV64-NEXT: addi a0, zero, 1 +; RV64-NEXT: slli a0, a0, 32 +; RV64-NEXT: addi a0, a0, -7 +; RV64-NEXT: vsetvli a1, zero, e32, m2, ta, mu +; RV64-NEXT: vmulh.vx v8, v8, a0 +; RV64-NEXT: ret + %head1 = insertelement undef, i32 -7, i32 0 + %splat1 = shufflevector %head1, undef, zeroinitializer + %vb = sext %splat1 to + %vc = sext %va to + %vd = mul %vb, %vc + %head2 = insertelement undef, i64 32, i32 0 + %splat2 = shufflevector %head2, undef, zeroinitializer + %ve = lshr %vd, %splat2 + %vf = trunc %ve to + ret %vf +} + +define @vmulh_vi_nxv4i32_1( %va) { +; CHECK-LABEL: vmulh_vi_nxv4i32_1: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a0, zero, 16 +; CHECK-NEXT: vsetvli a1, zero, e32, m2, ta, mu +; CHECK-NEXT: vmulh.vx v8, v8, a0 +; CHECK-NEXT: ret + %head1 = insertelement undef, i32 16, i32 0 + %splat1 = shufflevector %head1, undef, zeroinitializer + %vb = sext %splat1 to + %vc = sext %va to + %vd = mul %vb, %vc + %head2 = insertelement undef, i64 32, i32 0 + %splat2 = shufflevector %head2, undef, zeroinitializer + %ve = lshr %vd, %splat2 + %vf = trunc %ve to + ret %vf +} + +define @vmulh_vv_nxv8i32( %va, %vb) { +; CHECK-LABEL: vmulh_vv_nxv8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, mu +; CHECK-NEXT: vmulh.vv v8, v12, v8 +; CHECK-NEXT: ret + %vc = sext %vb to + %vd = sext %va to + %ve = mul %vc, %vd + %head = insertelement undef, i64 32, i32 0 + %splat = shufflevector %head, undef, zeroinitializer + %vf = lshr %ve, %splat + %vg = trunc %vf to + ret %vg +} + +define @vmulh_vx_nxv8i32( %va, i32 %x) { +; CHECK-LABEL: vmulh_vx_nxv8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e32, m4, ta, mu +; CHECK-NEXT: vmulh.vx v8, v8, a0 +; CHECK-NEXT: ret + %head1 = insertelement undef, i32 %x, i32 0 + %splat1 = shufflevector %head1, undef, zeroinitializer + %vb = sext %splat1 to + %vc = sext %va to + %vd = mul %vb, %vc + %head2 = insertelement undef, i64 32, i32 0 + %splat2 = shufflevector %head2, undef, zeroinitializer + %ve = lshr %vd, %splat2 + %vf = trunc %ve to + ret %vf +} + +define @vmulh_vi_nxv8i32_0( %va) { +; RV32-LABEL: vmulh_vi_nxv8i32_0: +; RV32: # %bb.0: +; RV32-NEXT: addi a0, zero, -7 +; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, mu +; RV32-NEXT: vmulh.vx v8, v8, a0 +; RV32-NEXT: ret +; +; RV64-LABEL: vmulh_vi_nxv8i32_0: +; RV64: # %bb.0: +; RV64-NEXT: addi a0, zero, 1 +; RV64-NEXT: slli a0, a0, 32 +; RV64-NEXT: addi a0, a0, -7 +; RV64-NEXT: vsetvli a1, zero, e32, m4, ta, mu +; RV64-NEXT: vmulh.vx v8, v8, a0 +; RV64-NEXT: ret + %head1 = insertelement undef, i32 -7, i32 0 + %splat1 = shufflevector %head1, undef, zeroinitializer + %vb = sext %splat1 to + %vc = sext %va to + %vd = mul %vb, %vc + %head2 = insertelement undef, i64 32, i32 0 + %splat2 = shufflevector %head2, undef, zeroinitializer + %ve = lshr %vd, %splat2 + %vf = trunc %ve to + ret %vf +} + +define @vmulh_vi_nxv8i32_1( %va) { +; CHECK-LABEL: vmulh_vi_nxv8i32_1: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a0, zero, 16 +; CHECK-NEXT: vsetvli a1, zero, e32, m4, ta, mu +; CHECK-NEXT: vmulh.vx v8, v8, a0 +; CHECK-NEXT: ret + %head1 = insertelement undef, i32 16, i32 0 + %splat1 = shufflevector %head1, undef, zeroinitializer + %vb = sext %splat1 to + %vc = sext %va to + %vd = mul %vb, %vc + %head2 = insertelement undef, i64 32, i32 0 + %splat2 = shufflevector %head2, undef, zeroinitializer + %ve = lshr %vd, %splat2 + %vf = trunc %ve to + ret %vf +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vmulhu-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vmulhu-sdnode.ll new file mode 100644 index 0000000000000..ec06dbffbcf93 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vmulhu-sdnode.ll @@ -0,0 +1,351 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 +; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 + +define @vmulhu_vv_nxv1i32( %va, %vb) { +; CHECK-LABEL: vmulhu_vv_nxv1i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, mu +; CHECK-NEXT: vmulhu.vv v8, v9, v8 +; CHECK-NEXT: ret + %vc = zext %vb to + %vd = zext %va to + %ve = mul %vc, %vd + %head = insertelement undef, i64 32, i32 0 + %splat = shufflevector %head, undef, zeroinitializer + %vf = lshr %ve, %splat + %vg = trunc %vf to + ret %vg +} + +define @vmulhu_vx_nxv1i32( %va, i32 %x) { +; CHECK-LABEL: vmulhu_vx_nxv1i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e32, mf2, ta, mu +; CHECK-NEXT: vmulhu.vx v8, v8, a0 +; CHECK-NEXT: ret + %head1 = insertelement undef, i32 %x, i32 0 + %splat1 = shufflevector %head1, undef, zeroinitializer + %vb = zext %splat1 to + %vc = zext %va to + %vd = mul %vb, %vc + %head2 = insertelement undef, i64 32, i32 0 + %splat2 = shufflevector %head2, undef, zeroinitializer + %ve = lshr %vd, %splat2 + %vf = trunc %ve to + ret %vf +} + +define @vmulhu_vi_nxv1i32_0( %va) { +; RV32-LABEL: vmulhu_vi_nxv1i32_0: +; RV32: # %bb.0: +; RV32-NEXT: addi a0, zero, -7 +; RV32-NEXT: vsetvli a1, zero, e32, mf2, ta, mu +; RV32-NEXT: vmulhu.vx v8, v8, a0 +; RV32-NEXT: ret +; +; RV64-LABEL: vmulhu_vi_nxv1i32_0: +; RV64: # %bb.0: +; RV64-NEXT: addi a0, zero, 1 +; RV64-NEXT: slli a0, a0, 32 +; RV64-NEXT: addi a0, a0, -7 +; RV64-NEXT: vsetvli a1, zero, e32, mf2, ta, mu +; RV64-NEXT: vmulhu.vx v8, v8, a0 +; RV64-NEXT: ret + %head1 = insertelement undef, i32 -7, i32 0 + %splat1 = shufflevector %head1, undef, zeroinitializer + %vb = zext %splat1 to + %vc = zext %va to + %vd = mul %vb, %vc + %head2 = insertelement undef, i64 32, i32 0 + %splat2 = shufflevector %head2, undef, zeroinitializer + %ve = lshr %vd, %splat2 + %vf = trunc %ve to + ret %vf +} + +define @vmulhu_vi_nxv1i32_1( %va) { +; RV32-LABEL: vmulhu_vi_nxv1i32_1: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e32, mf2, ta, mu +; RV32-NEXT: vsrl.vi v8, v8, 28 +; RV32-NEXT: ret +; +; RV64-LABEL: vmulhu_vi_nxv1i32_1: +; RV64: # %bb.0: +; RV64-NEXT: addi a0, zero, 16 +; RV64-NEXT: vsetvli a1, zero, e32, mf2, ta, mu +; RV64-NEXT: vmulhu.vx v8, v8, a0 +; RV64-NEXT: ret + %head1 = insertelement undef, i32 16, i32 0 + %splat1 = shufflevector %head1, undef, zeroinitializer + %vb = zext %splat1 to + %vc = zext %va to + %vd = mul %vb, %vc + %head2 = insertelement undef, i64 32, i32 0 + %splat2 = shufflevector %head2, undef, zeroinitializer + %ve = lshr %vd, %splat2 + %vf = trunc %ve to + ret %vf +} + +define @vmulhu_vv_nxv2i32( %va, %vb) { +; CHECK-LABEL: vmulhu_vv_nxv2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, mu +; CHECK-NEXT: vmulhu.vv v8, v9, v8 +; CHECK-NEXT: ret + %vc = zext %vb to + %vd = zext %va to + %ve = mul %vc, %vd + %head = insertelement undef, i64 32, i32 0 + %splat = shufflevector %head, undef, zeroinitializer + %vf = lshr %ve, %splat + %vg = trunc %vf to + ret %vg +} + +define @vmulhu_vx_nxv2i32( %va, i32 %x) { +; CHECK-LABEL: vmulhu_vx_nxv2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, mu +; CHECK-NEXT: vmulhu.vx v8, v8, a0 +; CHECK-NEXT: ret + %head1 = insertelement undef, i32 %x, i32 0 + %splat1 = shufflevector %head1, undef, zeroinitializer + %vb = zext %splat1 to + %vc = zext %va to + %vd = mul %vb, %vc + %head2 = insertelement undef, i64 32, i32 0 + %splat2 = shufflevector %head2, undef, zeroinitializer + %ve = lshr %vd, %splat2 + %vf = trunc %ve to + ret %vf +} + +define @vmulhu_vi_nxv2i32_0( %va) { +; RV32-LABEL: vmulhu_vi_nxv2i32_0: +; RV32: # %bb.0: +; RV32-NEXT: addi a0, zero, -7 +; RV32-NEXT: vsetvli a1, zero, e32, m1, ta, mu +; RV32-NEXT: vmulhu.vx v8, v8, a0 +; RV32-NEXT: ret +; +; RV64-LABEL: vmulhu_vi_nxv2i32_0: +; RV64: # %bb.0: +; RV64-NEXT: addi a0, zero, 1 +; RV64-NEXT: slli a0, a0, 32 +; RV64-NEXT: addi a0, a0, -7 +; RV64-NEXT: vsetvli a1, zero, e32, m1, ta, mu +; RV64-NEXT: vmulhu.vx v8, v8, a0 +; RV64-NEXT: ret + %head1 = insertelement undef, i32 -7, i32 0 + %splat1 = shufflevector %head1, undef, zeroinitializer + %vb = zext %splat1 to + %vc = zext %va to + %vd = mul %vb, %vc + %head2 = insertelement undef, i64 32, i32 0 + %splat2 = shufflevector %head2, undef, zeroinitializer + %ve = lshr %vd, %splat2 + %vf = trunc %ve to + ret %vf +} + +define @vmulhu_vi_nxv2i32_1( %va) { +; RV32-LABEL: vmulhu_vi_nxv2i32_1: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e32, m1, ta, mu +; RV32-NEXT: vsrl.vi v8, v8, 28 +; RV32-NEXT: ret +; +; RV64-LABEL: vmulhu_vi_nxv2i32_1: +; RV64: # %bb.0: +; RV64-NEXT: addi a0, zero, 16 +; RV64-NEXT: vsetvli a1, zero, e32, m1, ta, mu +; RV64-NEXT: vmulhu.vx v8, v8, a0 +; RV64-NEXT: ret + %head1 = insertelement undef, i32 16, i32 0 + %splat1 = shufflevector %head1, undef, zeroinitializer + %vb = zext %splat1 to + %vc = zext %va to + %vd = mul %vb, %vc + %head2 = insertelement undef, i64 32, i32 0 + %splat2 = shufflevector %head2, undef, zeroinitializer + %ve = lshr %vd, %splat2 + %vf = trunc %ve to + ret %vf +} + +define @vmulhu_vv_nxv4i32( %va, %vb) { +; CHECK-LABEL: vmulhu_vv_nxv4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, mu +; CHECK-NEXT: vmulhu.vv v8, v10, v8 +; CHECK-NEXT: ret + %vc = zext %vb to + %vd = zext %va to + %ve = mul %vc, %vd + %head = insertelement undef, i64 32, i32 0 + %splat = shufflevector %head, undef, zeroinitializer + %vf = lshr %ve, %splat + %vg = trunc %vf to + ret %vg +} + +define @vmulhu_vx_nxv4i32( %va, i32 %x) { +; CHECK-LABEL: vmulhu_vx_nxv4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e32, m2, ta, mu +; CHECK-NEXT: vmulhu.vx v8, v8, a0 +; CHECK-NEXT: ret + %head1 = insertelement undef, i32 %x, i32 0 + %splat1 = shufflevector %head1, undef, zeroinitializer + %vb = zext %splat1 to + %vc = zext %va to + %vd = mul %vb, %vc + %head2 = insertelement undef, i64 32, i32 0 + %splat2 = shufflevector %head2, undef, zeroinitializer + %ve = lshr %vd, %splat2 + %vf = trunc %ve to + ret %vf +} + +define @vmulhu_vi_nxv4i32_0( %va) { +; RV32-LABEL: vmulhu_vi_nxv4i32_0: +; RV32: # %bb.0: +; RV32-NEXT: addi a0, zero, -7 +; RV32-NEXT: vsetvli a1, zero, e32, m2, ta, mu +; RV32-NEXT: vmulhu.vx v8, v8, a0 +; RV32-NEXT: ret +; +; RV64-LABEL: vmulhu_vi_nxv4i32_0: +; RV64: # %bb.0: +; RV64-NEXT: addi a0, zero, 1 +; RV64-NEXT: slli a0, a0, 32 +; RV64-NEXT: addi a0, a0, -7 +; RV64-NEXT: vsetvli a1, zero, e32, m2, ta, mu +; RV64-NEXT: vmulhu.vx v8, v8, a0 +; RV64-NEXT: ret + %head1 = insertelement undef, i32 -7, i32 0 + %splat1 = shufflevector %head1, undef, zeroinitializer + %vb = zext %splat1 to + %vc = zext %va to + %vd = mul %vb, %vc + %head2 = insertelement undef, i64 32, i32 0 + %splat2 = shufflevector %head2, undef, zeroinitializer + %ve = lshr %vd, %splat2 + %vf = trunc %ve to + ret %vf +} + +define @vmulhu_vi_nxv4i32_1( %va) { +; RV32-LABEL: vmulhu_vi_nxv4i32_1: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e32, m2, ta, mu +; RV32-NEXT: vsrl.vi v8, v8, 28 +; RV32-NEXT: ret +; +; RV64-LABEL: vmulhu_vi_nxv4i32_1: +; RV64: # %bb.0: +; RV64-NEXT: addi a0, zero, 16 +; RV64-NEXT: vsetvli a1, zero, e32, m2, ta, mu +; RV64-NEXT: vmulhu.vx v8, v8, a0 +; RV64-NEXT: ret + %head1 = insertelement undef, i32 16, i32 0 + %splat1 = shufflevector %head1, undef, zeroinitializer + %vb = zext %splat1 to + %vc = zext %va to + %vd = mul %vb, %vc + %head2 = insertelement undef, i64 32, i32 0 + %splat2 = shufflevector %head2, undef, zeroinitializer + %ve = lshr %vd, %splat2 + %vf = trunc %ve to + ret %vf +} + +define @vmulhu_vv_nxv8i32( %va, %vb) { +; CHECK-LABEL: vmulhu_vv_nxv8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, mu +; CHECK-NEXT: vmulhu.vv v8, v12, v8 +; CHECK-NEXT: ret + %vc = zext %vb to + %vd = zext %va to + %ve = mul %vc, %vd + %head = insertelement undef, i64 32, i32 0 + %splat = shufflevector %head, undef, zeroinitializer + %vf = lshr %ve, %splat + %vg = trunc %vf to + ret %vg +} + +define @vmulhu_vx_nxv8i32( %va, i32 %x) { +; CHECK-LABEL: vmulhu_vx_nxv8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e32, m4, ta, mu +; CHECK-NEXT: vmulhu.vx v8, v8, a0 +; CHECK-NEXT: ret + %head1 = insertelement undef, i32 %x, i32 0 + %splat1 = shufflevector %head1, undef, zeroinitializer + %vb = zext %splat1 to + %vc = zext %va to + %vd = mul %vb, %vc + %head2 = insertelement undef, i64 32, i32 0 + %splat2 = shufflevector %head2, undef, zeroinitializer + %ve = lshr %vd, %splat2 + %vf = trunc %ve to + ret %vf +} + +define @vmulhu_vi_nxv8i32_0( %va) { +; RV32-LABEL: vmulhu_vi_nxv8i32_0: +; RV32: # %bb.0: +; RV32-NEXT: addi a0, zero, -7 +; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, mu +; RV32-NEXT: vmulhu.vx v8, v8, a0 +; RV32-NEXT: ret +; +; RV64-LABEL: vmulhu_vi_nxv8i32_0: +; RV64: # %bb.0: +; RV64-NEXT: addi a0, zero, 1 +; RV64-NEXT: slli a0, a0, 32 +; RV64-NEXT: addi a0, a0, -7 +; RV64-NEXT: vsetvli a1, zero, e32, m4, ta, mu +; RV64-NEXT: vmulhu.vx v8, v8, a0 +; RV64-NEXT: ret + %head1 = insertelement undef, i32 -7, i32 0 + %splat1 = shufflevector %head1, undef, zeroinitializer + %vb = zext %splat1 to + %vc = zext %va to + %vd = mul %vb, %vc + %head2 = insertelement undef, i64 32, i32 0 + %splat2 = shufflevector %head2, undef, zeroinitializer + %ve = lshr %vd, %splat2 + %vf = trunc %ve to + ret %vf +} + +define @vmulhu_vi_nxv8i32_1( %va) { +; RV32-LABEL: vmulhu_vi_nxv8i32_1: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e32, m4, ta, mu +; RV32-NEXT: vsrl.vi v8, v8, 28 +; RV32-NEXT: ret +; +; RV64-LABEL: vmulhu_vi_nxv8i32_1: +; RV64: # %bb.0: +; RV64-NEXT: addi a0, zero, 16 +; RV64-NEXT: vsetvli a1, zero, e32, m4, ta, mu +; RV64-NEXT: vmulhu.vx v8, v8, a0 +; RV64-NEXT: ret + %head1 = insertelement undef, i32 16, i32 0 + %splat1 = shufflevector %head1, undef, zeroinitializer + %vb = zext %splat1 to + %vc = zext %va to + %vd = mul %vb, %vc + %head2 = insertelement undef, i64 32, i32 0 + %splat2 = shufflevector %head2, undef, zeroinitializer + %ve = lshr %vd, %splat2 + %vf = trunc %ve to + ret %vf +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vpload.ll b/llvm/test/CodeGen/RISCV/rvv/vpload.ll index 24c31ccae1227..b652654c1bbfb 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vpload.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vpload.ll @@ -4,7 +4,7 @@ ; RUN: llc -mtriple=riscv64 -mattr=+d,+experimental-zfh,+experimental-v \ ; RUN: -verify-machineinstrs < %s | FileCheck %s -declare @llvm.vp.load.nxv1i8(*, , i32) +declare @llvm.vp.load.nxv1i8.p0nxv1i8(*, , i32) define @vpload_nxv1i8(* %ptr, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpload_nxv1i8: @@ -12,11 +12,23 @@ define @vpload_nxv1i8(* %ptr, @llvm.vp.load.nxv1i8(* %ptr, %m, i32 %evl) + %load = call @llvm.vp.load.nxv1i8.p0nxv1i8(* %ptr, %m, i32 %evl) ret %load } -declare @llvm.vp.load.nxv2i8(*, , i32) +define @vpload_nxv1i8_allones_mask(* %ptr, i32 zeroext %evl) { +; CHECK-LABEL: vpload_nxv1i8_allones_mask: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, mu +; CHECK-NEXT: vle8.v v8, (a0) +; CHECK-NEXT: ret + %a = insertelement undef, i1 true, i32 0 + %b = shufflevector %a, poison, zeroinitializer + %load = call @llvm.vp.load.nxv1i8.p0nxv1i8(* %ptr, %b, i32 %evl) + ret %load +} + +declare @llvm.vp.load.nxv2i8.p0nxv2i8(*, , i32) define @vpload_nxv2i8(* %ptr, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpload_nxv2i8: @@ -24,11 +36,11 @@ define @vpload_nxv2i8(* %ptr, @llvm.vp.load.nxv2i8(* %ptr, %m, i32 %evl) + %load = call @llvm.vp.load.nxv2i8.p0nxv2i8(* %ptr, %m, i32 %evl) ret %load } -declare @llvm.vp.load.nxv4i8(*, , i32) +declare @llvm.vp.load.nxv4i8.p0nxv4i8(*, , i32) define @vpload_nxv4i8(* %ptr, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpload_nxv4i8: @@ -36,11 +48,11 @@ define @vpload_nxv4i8(* %ptr, @llvm.vp.load.nxv4i8(* %ptr, %m, i32 %evl) + %load = call @llvm.vp.load.nxv4i8.p0nxv4i8(* %ptr, %m, i32 %evl) ret %load } -declare @llvm.vp.load.nxv8i8(*, , i32) +declare @llvm.vp.load.nxv8i8.p0nxv8i8(*, , i32) define @vpload_nxv8i8(* %ptr, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpload_nxv8i8: @@ -48,11 +60,23 @@ define @vpload_nxv8i8(* %ptr, @llvm.vp.load.nxv8i8(* %ptr, %m, i32 %evl) + %load = call @llvm.vp.load.nxv8i8.p0nxv8i8(* %ptr, %m, i32 %evl) + ret %load +} + +define @vpload_nxv8i8_allones_mask(* %ptr, i32 zeroext %evl) { +; CHECK-LABEL: vpload_nxv8i8_allones_mask: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, mu +; CHECK-NEXT: vle8.v v8, (a0) +; CHECK-NEXT: ret + %a = insertelement undef, i1 true, i32 0 + %b = shufflevector %a, poison, zeroinitializer + %load = call @llvm.vp.load.nxv8i8.p0nxv8i8(* %ptr, %b, i32 %evl) ret %load } -declare @llvm.vp.load.nxv1i16(*, , i32) +declare @llvm.vp.load.nxv1i16.p0nxv1i16(*, , i32) define @vpload_nxv1i16(* %ptr, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpload_nxv1i16: @@ -60,11 +84,11 @@ define @vpload_nxv1i16(* %ptr, @llvm.vp.load.nxv1i16(* %ptr, %m, i32 %evl) + %load = call @llvm.vp.load.nxv1i16.p0nxv1i16(* %ptr, %m, i32 %evl) ret %load } -declare @llvm.vp.load.nxv2i16(*, , i32) +declare @llvm.vp.load.nxv2i16.p0nxv2i16(*, , i32) define @vpload_nxv2i16(* %ptr, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpload_nxv2i16: @@ -72,11 +96,23 @@ define @vpload_nxv2i16(* %ptr, @llvm.vp.load.nxv2i16(* %ptr, %m, i32 %evl) + %load = call @llvm.vp.load.nxv2i16.p0nxv2i16(* %ptr, %m, i32 %evl) ret %load } -declare @llvm.vp.load.nxv4i16(*, , i32) +define @vpload_nxv2i16_allones_mask(* %ptr, i32 zeroext %evl) { +; CHECK-LABEL: vpload_nxv2i16_allones_mask: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: ret + %a = insertelement undef, i1 true, i32 0 + %b = shufflevector %a, poison, zeroinitializer + %load = call @llvm.vp.load.nxv2i16.p0nxv2i16(* %ptr, %b, i32 %evl) + ret %load +} + +declare @llvm.vp.load.nxv4i16.p0nxv4i16(*, , i32) define @vpload_nxv4i16(* %ptr, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpload_nxv4i16: @@ -84,11 +120,11 @@ define @vpload_nxv4i16(* %ptr, @llvm.vp.load.nxv4i16(* %ptr, %m, i32 %evl) + %load = call @llvm.vp.load.nxv4i16.p0nxv4i16(* %ptr, %m, i32 %evl) ret %load } -declare @llvm.vp.load.nxv8i16(*, , i32) +declare @llvm.vp.load.nxv8i16.p0nxv8i16(*, , i32) define @vpload_nxv8i16(* %ptr, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpload_nxv8i16: @@ -96,11 +132,11 @@ define @vpload_nxv8i16(* %ptr, @llvm.vp.load.nxv8i16(* %ptr, %m, i32 %evl) + %load = call @llvm.vp.load.nxv8i16.p0nxv8i16(* %ptr, %m, i32 %evl) ret %load } -declare @llvm.vp.load.nxv1i32(*, , i32) +declare @llvm.vp.load.nxv1i32.p0nxv1i32(*, , i32) define @vpload_nxv1i32(* %ptr, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpload_nxv1i32: @@ -108,11 +144,11 @@ define @vpload_nxv1i32(* %ptr, @llvm.vp.load.nxv1i32(* %ptr, %m, i32 %evl) + %load = call @llvm.vp.load.nxv1i32.p0nxv1i32(* %ptr, %m, i32 %evl) ret %load } -declare @llvm.vp.load.nxv2i32(*, , i32) +declare @llvm.vp.load.nxv2i32.p0nxv2i32(*, , i32) define @vpload_nxv2i32(* %ptr, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpload_nxv2i32: @@ -120,11 +156,11 @@ define @vpload_nxv2i32(* %ptr, @llvm.vp.load.nxv2i32(* %ptr, %m, i32 %evl) + %load = call @llvm.vp.load.nxv2i32.p0nxv2i32(* %ptr, %m, i32 %evl) ret %load } -declare @llvm.vp.load.nxv4i32(*, , i32) +declare @llvm.vp.load.nxv4i32.p0nxv4i32(*, , i32) define @vpload_nxv4i32(* %ptr, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpload_nxv4i32: @@ -132,11 +168,23 @@ define @vpload_nxv4i32(* %ptr, @llvm.vp.load.nxv4i32(* %ptr, %m, i32 %evl) + %load = call @llvm.vp.load.nxv4i32.p0nxv4i32(* %ptr, %m, i32 %evl) + ret %load +} + +define @vpload_nxv4i32_allones_mask(* %ptr, i32 zeroext %evl) { +; CHECK-LABEL: vpload_nxv4i32_allones_mask: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: ret + %a = insertelement undef, i1 true, i32 0 + %b = shufflevector %a, poison, zeroinitializer + %load = call @llvm.vp.load.nxv4i32.p0nxv4i32(* %ptr, %b, i32 %evl) ret %load } -declare @llvm.vp.load.nxv8i32(*, , i32) +declare @llvm.vp.load.nxv8i32.p0nxv8i32(*, , i32) define @vpload_nxv8i32(* %ptr, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpload_nxv8i32: @@ -144,11 +192,11 @@ define @vpload_nxv8i32(* %ptr, @llvm.vp.load.nxv8i32(* %ptr, %m, i32 %evl) + %load = call @llvm.vp.load.nxv8i32.p0nxv8i32(* %ptr, %m, i32 %evl) ret %load } -declare @llvm.vp.load.nxv1i64(*, , i32) +declare @llvm.vp.load.nxv1i64.p0nxv1i64(*, , i32) define @vpload_nxv1i64(* %ptr, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpload_nxv1i64: @@ -156,11 +204,23 @@ define @vpload_nxv1i64(* %ptr, @llvm.vp.load.nxv1i64(* %ptr, %m, i32 %evl) + %load = call @llvm.vp.load.nxv1i64.p0nxv1i64(* %ptr, %m, i32 %evl) ret %load } -declare @llvm.vp.load.nxv2i64(*, , i32) +define @vpload_nxv1i64_allones_mask(* %ptr, i32 zeroext %evl) { +; CHECK-LABEL: vpload_nxv1i64_allones_mask: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, mu +; CHECK-NEXT: vle64.v v8, (a0) +; CHECK-NEXT: ret + %a = insertelement undef, i1 true, i32 0 + %b = shufflevector %a, poison, zeroinitializer + %load = call @llvm.vp.load.nxv1i64.p0nxv1i64(* %ptr, %b, i32 %evl) + ret %load +} + +declare @llvm.vp.load.nxv2i64.p0nxv2i64(*, , i32) define @vpload_nxv2i64(* %ptr, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpload_nxv2i64: @@ -168,11 +228,11 @@ define @vpload_nxv2i64(* %ptr, @llvm.vp.load.nxv2i64(* %ptr, %m, i32 %evl) + %load = call @llvm.vp.load.nxv2i64.p0nxv2i64(* %ptr, %m, i32 %evl) ret %load } -declare @llvm.vp.load.nxv4i64(*, , i32) +declare @llvm.vp.load.nxv4i64.p0nxv4i64(*, , i32) define @vpload_nxv4i64(* %ptr, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpload_nxv4i64: @@ -180,11 +240,11 @@ define @vpload_nxv4i64(* %ptr, @llvm.vp.load.nxv4i64(* %ptr, %m, i32 %evl) + %load = call @llvm.vp.load.nxv4i64.p0nxv4i64(* %ptr, %m, i32 %evl) ret %load } -declare @llvm.vp.load.nxv8i64(*, , i32) +declare @llvm.vp.load.nxv8i64.p0nxv8i64(*, , i32) define @vpload_nxv8i64(* %ptr, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpload_nxv8i64: @@ -192,11 +252,11 @@ define @vpload_nxv8i64(* %ptr, @llvm.vp.load.nxv8i64(* %ptr, %m, i32 %evl) + %load = call @llvm.vp.load.nxv8i64.p0nxv8i64(* %ptr, %m, i32 %evl) ret %load } -declare @llvm.vp.load.nxv1f16(*, , i32) +declare @llvm.vp.load.nxv1f16.p0nxv1f16(*, , i32) define @vpload_nxv1f16(* %ptr, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpload_nxv1f16: @@ -204,11 +264,11 @@ define @vpload_nxv1f16(* %ptr, @llvm.vp.load.nxv1f16(* %ptr, %m, i32 %evl) + %load = call @llvm.vp.load.nxv1f16.p0nxv1f16(* %ptr, %m, i32 %evl) ret %load } -declare @llvm.vp.load.nxv2f16(*, , i32) +declare @llvm.vp.load.nxv2f16.p0nxv2f16(*, , i32) define @vpload_nxv2f16(* %ptr, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpload_nxv2f16: @@ -216,11 +276,23 @@ define @vpload_nxv2f16(* %ptr, @llvm.vp.load.nxv2f16(* %ptr, %m, i32 %evl) + %load = call @llvm.vp.load.nxv2f16.p0nxv2f16(* %ptr, %m, i32 %evl) + ret %load +} + +define @vpload_nxv2f16_allones_mask(* %ptr, i32 zeroext %evl) { +; CHECK-LABEL: vpload_nxv2f16_allones_mask: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: ret + %a = insertelement undef, i1 true, i32 0 + %b = shufflevector %a, poison, zeroinitializer + %load = call @llvm.vp.load.nxv2f16.p0nxv2f16(* %ptr, %b, i32 %evl) ret %load } -declare @llvm.vp.load.nxv4f16(*, , i32) +declare @llvm.vp.load.nxv4f16.p0nxv4f16(*, , i32) define @vpload_nxv4f16(* %ptr, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpload_nxv4f16: @@ -228,11 +300,11 @@ define @vpload_nxv4f16(* %ptr, @llvm.vp.load.nxv4f16(* %ptr, %m, i32 %evl) + %load = call @llvm.vp.load.nxv4f16.p0nxv4f16(* %ptr, %m, i32 %evl) ret %load } -declare @llvm.vp.load.nxv8f16(*, , i32) +declare @llvm.vp.load.nxv8f16.p0nxv8f16(*, , i32) define @vpload_nxv8f16(* %ptr, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpload_nxv8f16: @@ -240,11 +312,11 @@ define @vpload_nxv8f16(* %ptr, @llvm.vp.load.nxv8f16(* %ptr, %m, i32 %evl) + %load = call @llvm.vp.load.nxv8f16.p0nxv8f16(* %ptr, %m, i32 %evl) ret %load } -declare @llvm.vp.load.nxv1f32(*, , i32) +declare @llvm.vp.load.nxv1f32.p0nxv1f32(*, , i32) define @vpload_nxv1f32(* %ptr, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpload_nxv1f32: @@ -252,11 +324,11 @@ define @vpload_nxv1f32(* %ptr, @llvm.vp.load.nxv1f32(* %ptr, %m, i32 %evl) + %load = call @llvm.vp.load.nxv1f32.p0nxv1f32(* %ptr, %m, i32 %evl) ret %load } -declare @llvm.vp.load.nxv2f32(*, , i32) +declare @llvm.vp.load.nxv2f32.p0nxv2f32(*, , i32) define @vpload_nxv2f32(* %ptr, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpload_nxv2f32: @@ -264,11 +336,11 @@ define @vpload_nxv2f32(* %ptr, @llvm.vp.load.nxv2f32(* %ptr, %m, i32 %evl) + %load = call @llvm.vp.load.nxv2f32.p0nxv2f32(* %ptr, %m, i32 %evl) ret %load } -declare @llvm.vp.load.nxv4f32(*, , i32) +declare @llvm.vp.load.nxv4f32.p0nxv4f32(*, , i32) define @vpload_nxv4f32(* %ptr, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpload_nxv4f32: @@ -276,11 +348,11 @@ define @vpload_nxv4f32(* %ptr, @llvm.vp.load.nxv4f32(* %ptr, %m, i32 %evl) + %load = call @llvm.vp.load.nxv4f32.p0nxv4f32(* %ptr, %m, i32 %evl) ret %load } -declare @llvm.vp.load.nxv8f32(*, , i32) +declare @llvm.vp.load.nxv8f32.p0nxv8f32(*, , i32) define @vpload_nxv8f32(* %ptr, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpload_nxv8f32: @@ -288,11 +360,23 @@ define @vpload_nxv8f32(* %ptr, @llvm.vp.load.nxv8f32(* %ptr, %m, i32 %evl) + %load = call @llvm.vp.load.nxv8f32.p0nxv8f32(* %ptr, %m, i32 %evl) ret %load } -declare @llvm.vp.load.nxv1f64(*, , i32) +define @vpload_nxv8f32_allones_mask(* %ptr, i32 zeroext %evl) { +; CHECK-LABEL: vpload_nxv8f32_allones_mask: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: ret + %a = insertelement undef, i1 true, i32 0 + %b = shufflevector %a, poison, zeroinitializer + %load = call @llvm.vp.load.nxv8f32.p0nxv8f32(* %ptr, %b, i32 %evl) + ret %load +} + +declare @llvm.vp.load.nxv1f64.p0nxv1f64(*, , i32) define @vpload_nxv1f64(* %ptr, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpload_nxv1f64: @@ -300,11 +384,11 @@ define @vpload_nxv1f64(* %ptr, @llvm.vp.load.nxv1f64(* %ptr, %m, i32 %evl) + %load = call @llvm.vp.load.nxv1f64.p0nxv1f64(* %ptr, %m, i32 %evl) ret %load } -declare @llvm.vp.load.nxv2f64(*, , i32) +declare @llvm.vp.load.nxv2f64.p0nxv2f64(*, , i32) define @vpload_nxv2f64(* %ptr, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpload_nxv2f64: @@ -312,11 +396,11 @@ define @vpload_nxv2f64(* %ptr, @llvm.vp.load.nxv2f64(* %ptr, %m, i32 %evl) + %load = call @llvm.vp.load.nxv2f64.p0nxv2f64(* %ptr, %m, i32 %evl) ret %load } -declare @llvm.vp.load.nxv4f64(*, , i32) +declare @llvm.vp.load.nxv4f64.p0nxv4f64(*, , i32) define @vpload_nxv4f64(* %ptr, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpload_nxv4f64: @@ -324,11 +408,23 @@ define @vpload_nxv4f64(* %ptr, @llvm.vp.load.nxv4f64(* %ptr, %m, i32 %evl) + %load = call @llvm.vp.load.nxv4f64.p0nxv4f64(* %ptr, %m, i32 %evl) + ret %load +} + +define @vpload_nxv4f64_allones_mask(* %ptr, i32 zeroext %evl) { +; CHECK-LABEL: vpload_nxv4f64_allones_mask: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, mu +; CHECK-NEXT: vle64.v v8, (a0) +; CHECK-NEXT: ret + %a = insertelement undef, i1 true, i32 0 + %b = shufflevector %a, poison, zeroinitializer + %load = call @llvm.vp.load.nxv4f64.p0nxv4f64(* %ptr, %b, i32 %evl) ret %load } -declare @llvm.vp.load.nxv8f64(*, , i32) +declare @llvm.vp.load.nxv8f64.p0nxv8f64(*, , i32) define @vpload_nxv8f64(* %ptr, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpload_nxv8f64: @@ -336,6 +432,6 @@ define @vpload_nxv8f64(* %ptr, @llvm.vp.load.nxv8f64(* %ptr, %m, i32 %evl) + %load = call @llvm.vp.load.nxv8f64.p0nxv8f64(* %ptr, %m, i32 %evl) ret %load } diff --git a/llvm/test/CodeGen/RISCV/rvv/vpstore.ll b/llvm/test/CodeGen/RISCV/rvv/vpstore.ll index e9fdb943fc9e1..3eba76f51c247 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vpstore.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vpstore.ll @@ -4,7 +4,7 @@ ; RUN: llc -mtriple=riscv64 -mattr=+d,+experimental-zfh,+experimental-v \ ; RUN: -verify-machineinstrs < %s | FileCheck %s -declare void @llvm.vp.store.nxv1i8(, *, , i32) +declare void @llvm.vp.store.nxv1i8.p0nxv1i8(, *, , i32) define void @vpstore_nxv1i8( %val, * %ptr, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpstore_nxv1i8: @@ -12,11 +12,11 @@ define void @vpstore_nxv1i8( %val, * %ptr, %val, * %ptr, %m, i32 %evl) + call void @llvm.vp.store.nxv1i8.p0nxv1i8( %val, * %ptr, %m, i32 %evl) ret void } -declare void @llvm.vp.store.nxv2i8(, *, , i32) +declare void @llvm.vp.store.nxv2i8.p0nxv2i8(, *, , i32) define void @vpstore_nxv2i8( %val, * %ptr, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpstore_nxv2i8: @@ -24,11 +24,11 @@ define void @vpstore_nxv2i8( %val, * %ptr, %val, * %ptr, %m, i32 %evl) + call void @llvm.vp.store.nxv2i8.p0nxv2i8( %val, * %ptr, %m, i32 %evl) ret void } -declare void @llvm.vp.store.nxv4i8(, *, , i32) +declare void @llvm.vp.store.nxv4i8.p0nxv4i8(, *, , i32) define void @vpstore_nxv4i8( %val, * %ptr, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpstore_nxv4i8: @@ -36,11 +36,11 @@ define void @vpstore_nxv4i8( %val, * %ptr, %val, * %ptr, %m, i32 %evl) + call void @llvm.vp.store.nxv4i8.p0nxv4i8( %val, * %ptr, %m, i32 %evl) ret void } -declare void @llvm.vp.store.nxv8i8(, *, , i32) +declare void @llvm.vp.store.nxv8i8.p0nxv8i8(, *, , i32) define void @vpstore_nxv8i8( %val, * %ptr, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpstore_nxv8i8: @@ -48,11 +48,11 @@ define void @vpstore_nxv8i8( %val, * %ptr, %val, * %ptr, %m, i32 %evl) + call void @llvm.vp.store.nxv8i8.p0nxv8i8( %val, * %ptr, %m, i32 %evl) ret void } -declare void @llvm.vp.store.nxv1i16(, *, , i32) +declare void @llvm.vp.store.nxv1i16.p0nxv1i16(, *, , i32) define void @vpstore_nxv1i16( %val, * %ptr, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpstore_nxv1i16: @@ -60,11 +60,11 @@ define void @vpstore_nxv1i16( %val, * %ptr, ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu ; CHECK-NEXT: vse16.v v8, (a0), v0.t ; CHECK-NEXT: ret - call void @llvm.vp.store.nxv1i16( %val, * %ptr, %m, i32 %evl) + call void @llvm.vp.store.nxv1i16.p0nxv1i16( %val, * %ptr, %m, i32 %evl) ret void } -declare void @llvm.vp.store.nxv2i16(, *, , i32) +declare void @llvm.vp.store.nxv2i16.p0nxv2i16(, *, , i32) define void @vpstore_nxv2i16( %val, * %ptr, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpstore_nxv2i16: @@ -72,11 +72,11 @@ define void @vpstore_nxv2i16( %val, * %ptr, ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu ; CHECK-NEXT: vse16.v v8, (a0), v0.t ; CHECK-NEXT: ret - call void @llvm.vp.store.nxv2i16( %val, * %ptr, %m, i32 %evl) + call void @llvm.vp.store.nxv2i16.p0nxv2i16( %val, * %ptr, %m, i32 %evl) ret void } -declare void @llvm.vp.store.nxv4i16(, *, , i32) +declare void @llvm.vp.store.nxv4i16.p0nxv4i16(, *, , i32) define void @vpstore_nxv4i16( %val, * %ptr, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpstore_nxv4i16: @@ -84,11 +84,11 @@ define void @vpstore_nxv4i16( %val, * %ptr, ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu ; CHECK-NEXT: vse16.v v8, (a0), v0.t ; CHECK-NEXT: ret - call void @llvm.vp.store.nxv4i16( %val, * %ptr, %m, i32 %evl) + call void @llvm.vp.store.nxv4i16.p0nxv4i16( %val, * %ptr, %m, i32 %evl) ret void } -declare void @llvm.vp.store.nxv8i16(, *, , i32) +declare void @llvm.vp.store.nxv8i16.p0nxv8i16(, *, , i32) define void @vpstore_nxv8i16( %val, * %ptr, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpstore_nxv8i16: @@ -96,11 +96,11 @@ define void @vpstore_nxv8i16( %val, * %ptr, ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu ; CHECK-NEXT: vse16.v v8, (a0), v0.t ; CHECK-NEXT: ret - call void @llvm.vp.store.nxv8i16( %val, * %ptr, %m, i32 %evl) + call void @llvm.vp.store.nxv8i16.p0nxv8i16( %val, * %ptr, %m, i32 %evl) ret void } -declare void @llvm.vp.store.nxv1i32(, *, , i32) +declare void @llvm.vp.store.nxv1i32.p0nxv1i32(, *, , i32) define void @vpstore_nxv1i32( %val, * %ptr, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpstore_nxv1i32: @@ -108,11 +108,11 @@ define void @vpstore_nxv1i32( %val, * %ptr, ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu ; CHECK-NEXT: vse32.v v8, (a0), v0.t ; CHECK-NEXT: ret - call void @llvm.vp.store.nxv1i32( %val, * %ptr, %m, i32 %evl) + call void @llvm.vp.store.nxv1i32.p0nxv1i32( %val, * %ptr, %m, i32 %evl) ret void } -declare void @llvm.vp.store.nxv2i32(, *, , i32) +declare void @llvm.vp.store.nxv2i32.p0nxv2i32(, *, , i32) define void @vpstore_nxv2i32( %val, * %ptr, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpstore_nxv2i32: @@ -120,11 +120,11 @@ define void @vpstore_nxv2i32( %val, * %ptr, ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu ; CHECK-NEXT: vse32.v v8, (a0), v0.t ; CHECK-NEXT: ret - call void @llvm.vp.store.nxv2i32( %val, * %ptr, %m, i32 %evl) + call void @llvm.vp.store.nxv2i32.p0nxv2i32( %val, * %ptr, %m, i32 %evl) ret void } -declare void @llvm.vp.store.nxv4i32(, *, , i32) +declare void @llvm.vp.store.nxv4i32.p0nxv4i32(, *, , i32) define void @vpstore_nxv4i32( %val, * %ptr, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpstore_nxv4i32: @@ -132,11 +132,11 @@ define void @vpstore_nxv4i32( %val, * %ptr, ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu ; CHECK-NEXT: vse32.v v8, (a0), v0.t ; CHECK-NEXT: ret - call void @llvm.vp.store.nxv4i32( %val, * %ptr, %m, i32 %evl) + call void @llvm.vp.store.nxv4i32.p0nxv4i32( %val, * %ptr, %m, i32 %evl) ret void } -declare void @llvm.vp.store.nxv8i32(, *, , i32) +declare void @llvm.vp.store.nxv8i32.p0nxv8i32(, *, , i32) define void @vpstore_nxv8i32( %val, * %ptr, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpstore_nxv8i32: @@ -144,11 +144,11 @@ define void @vpstore_nxv8i32( %val, * %ptr, ; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu ; CHECK-NEXT: vse32.v v8, (a0), v0.t ; CHECK-NEXT: ret - call void @llvm.vp.store.nxv8i32( %val, * %ptr, %m, i32 %evl) + call void @llvm.vp.store.nxv8i32.p0nxv8i32( %val, * %ptr, %m, i32 %evl) ret void } -declare void @llvm.vp.store.nxv1i64(, *, , i32) +declare void @llvm.vp.store.nxv1i64.p0nxv1i64(, *, , i32) define void @vpstore_nxv1i64( %val, * %ptr, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpstore_nxv1i64: @@ -156,11 +156,11 @@ define void @vpstore_nxv1i64( %val, * %ptr, ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, mu ; CHECK-NEXT: vse64.v v8, (a0), v0.t ; CHECK-NEXT: ret - call void @llvm.vp.store.nxv1i64( %val, * %ptr, %m, i32 %evl) + call void @llvm.vp.store.nxv1i64.p0nxv1i64( %val, * %ptr, %m, i32 %evl) ret void } -declare void @llvm.vp.store.nxv2i64(, *, , i32) +declare void @llvm.vp.store.nxv2i64.p0nxv2i64(, *, , i32) define void @vpstore_nxv2i64( %val, * %ptr, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpstore_nxv2i64: @@ -168,11 +168,11 @@ define void @vpstore_nxv2i64( %val, * %ptr, ; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, mu ; CHECK-NEXT: vse64.v v8, (a0), v0.t ; CHECK-NEXT: ret - call void @llvm.vp.store.nxv2i64( %val, * %ptr, %m, i32 %evl) + call void @llvm.vp.store.nxv2i64.p0nxv2i64( %val, * %ptr, %m, i32 %evl) ret void } -declare void @llvm.vp.store.nxv4i64(, *, , i32) +declare void @llvm.vp.store.nxv4i64.p0nxv4i64(, *, , i32) define void @vpstore_nxv4i64( %val, * %ptr, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpstore_nxv4i64: @@ -180,11 +180,11 @@ define void @vpstore_nxv4i64( %val, * %ptr, ; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, mu ; CHECK-NEXT: vse64.v v8, (a0), v0.t ; CHECK-NEXT: ret - call void @llvm.vp.store.nxv4i64( %val, * %ptr, %m, i32 %evl) + call void @llvm.vp.store.nxv4i64.p0nxv4i64( %val, * %ptr, %m, i32 %evl) ret void } -declare void @llvm.vp.store.nxv8i64(, *, , i32) +declare void @llvm.vp.store.nxv8i64.p0nxv8i64(, *, , i32) define void @vpstore_nxv8i64( %val, * %ptr, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpstore_nxv8i64: @@ -192,11 +192,11 @@ define void @vpstore_nxv8i64( %val, * %ptr, ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, mu ; CHECK-NEXT: vse64.v v8, (a0), v0.t ; CHECK-NEXT: ret - call void @llvm.vp.store.nxv8i64( %val, * %ptr, %m, i32 %evl) + call void @llvm.vp.store.nxv8i64.p0nxv8i64( %val, * %ptr, %m, i32 %evl) ret void } -declare void @llvm.vp.store.nxv1f16(, *, , i32) +declare void @llvm.vp.store.nxv1f16.p0nxv1f16(, *, , i32) define void @vpstore_nxv1f16( %val, * %ptr, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpstore_nxv1f16: @@ -204,11 +204,11 @@ define void @vpstore_nxv1f16( %val, * %ptr ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu ; CHECK-NEXT: vse16.v v8, (a0), v0.t ; CHECK-NEXT: ret - call void @llvm.vp.store.nxv1f16( %val, * %ptr, %m, i32 %evl) + call void @llvm.vp.store.nxv1f16.p0nxv1f16( %val, * %ptr, %m, i32 %evl) ret void } -declare void @llvm.vp.store.nxv2f16(, *, , i32) +declare void @llvm.vp.store.nxv2f16.p0nxv2f16(, *, , i32) define void @vpstore_nxv2f16( %val, * %ptr, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpstore_nxv2f16: @@ -216,11 +216,11 @@ define void @vpstore_nxv2f16( %val, * %ptr ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu ; CHECK-NEXT: vse16.v v8, (a0), v0.t ; CHECK-NEXT: ret - call void @llvm.vp.store.nxv2f16( %val, * %ptr, %m, i32 %evl) + call void @llvm.vp.store.nxv2f16.p0nxv2f16( %val, * %ptr, %m, i32 %evl) ret void } -declare void @llvm.vp.store.nxv4f16(, *, , i32) +declare void @llvm.vp.store.nxv4f16.p0nxv4f16(, *, , i32) define void @vpstore_nxv4f16( %val, * %ptr, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpstore_nxv4f16: @@ -228,11 +228,11 @@ define void @vpstore_nxv4f16( %val, * %ptr ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu ; CHECK-NEXT: vse16.v v8, (a0), v0.t ; CHECK-NEXT: ret - call void @llvm.vp.store.nxv4f16( %val, * %ptr, %m, i32 %evl) + call void @llvm.vp.store.nxv4f16.p0nxv4f16( %val, * %ptr, %m, i32 %evl) ret void } -declare void @llvm.vp.store.nxv8f16(, *, , i32) +declare void @llvm.vp.store.nxv8f16.p0nxv8f16(, *, , i32) define void @vpstore_nxv8f16( %val, * %ptr, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpstore_nxv8f16: @@ -240,11 +240,11 @@ define void @vpstore_nxv8f16( %val, * %ptr ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu ; CHECK-NEXT: vse16.v v8, (a0), v0.t ; CHECK-NEXT: ret - call void @llvm.vp.store.nxv8f16( %val, * %ptr, %m, i32 %evl) + call void @llvm.vp.store.nxv8f16.p0nxv8f16( %val, * %ptr, %m, i32 %evl) ret void } -declare void @llvm.vp.store.nxv1f32(, *, , i32) +declare void @llvm.vp.store.nxv1f32.p0nxv1f32(, *, , i32) define void @vpstore_nxv1f32( %val, * %ptr, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpstore_nxv1f32: @@ -252,11 +252,11 @@ define void @vpstore_nxv1f32( %val, * %p ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu ; CHECK-NEXT: vse32.v v8, (a0), v0.t ; CHECK-NEXT: ret - call void @llvm.vp.store.nxv1f32( %val, * %ptr, %m, i32 %evl) + call void @llvm.vp.store.nxv1f32.p0nxv1f32( %val, * %ptr, %m, i32 %evl) ret void } -declare void @llvm.vp.store.nxv2f32(, *, , i32) +declare void @llvm.vp.store.nxv2f32.p0nxv2f32(, *, , i32) define void @vpstore_nxv2f32( %val, * %ptr, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpstore_nxv2f32: @@ -264,11 +264,11 @@ define void @vpstore_nxv2f32( %val, * %p ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu ; CHECK-NEXT: vse32.v v8, (a0), v0.t ; CHECK-NEXT: ret - call void @llvm.vp.store.nxv2f32( %val, * %ptr, %m, i32 %evl) + call void @llvm.vp.store.nxv2f32.p0nxv2f32( %val, * %ptr, %m, i32 %evl) ret void } -declare void @llvm.vp.store.nxv4f32(, *, , i32) +declare void @llvm.vp.store.nxv4f32.p0nxv4f32(, *, , i32) define void @vpstore_nxv4f32( %val, * %ptr, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpstore_nxv4f32: @@ -276,11 +276,11 @@ define void @vpstore_nxv4f32( %val, * %p ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu ; CHECK-NEXT: vse32.v v8, (a0), v0.t ; CHECK-NEXT: ret - call void @llvm.vp.store.nxv4f32( %val, * %ptr, %m, i32 %evl) + call void @llvm.vp.store.nxv4f32.p0nxv4f32( %val, * %ptr, %m, i32 %evl) ret void } -declare void @llvm.vp.store.nxv8f32(, *, , i32) +declare void @llvm.vp.store.nxv8f32.p0nxv8f32(, *, , i32) define void @vpstore_nxv8f32( %val, * %ptr, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpstore_nxv8f32: @@ -288,11 +288,11 @@ define void @vpstore_nxv8f32( %val, * %p ; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu ; CHECK-NEXT: vse32.v v8, (a0), v0.t ; CHECK-NEXT: ret - call void @llvm.vp.store.nxv8f32( %val, * %ptr, %m, i32 %evl) + call void @llvm.vp.store.nxv8f32.p0nxv8f32( %val, * %ptr, %m, i32 %evl) ret void } -declare void @llvm.vp.store.nxv1f64(, *, , i32) +declare void @llvm.vp.store.nxv1f64.p0nxv1f64(, *, , i32) define void @vpstore_nxv1f64( %val, * %ptr, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpstore_nxv1f64: @@ -300,11 +300,11 @@ define void @vpstore_nxv1f64( %val, * ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, mu ; CHECK-NEXT: vse64.v v8, (a0), v0.t ; CHECK-NEXT: ret - call void @llvm.vp.store.nxv1f64( %val, * %ptr, %m, i32 %evl) + call void @llvm.vp.store.nxv1f64.p0nxv1f64( %val, * %ptr, %m, i32 %evl) ret void } -declare void @llvm.vp.store.nxv2f64(, *, , i32) +declare void @llvm.vp.store.nxv2f64.p0nxv2f64(, *, , i32) define void @vpstore_nxv2f64( %val, * %ptr, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpstore_nxv2f64: @@ -312,11 +312,11 @@ define void @vpstore_nxv2f64( %val, * ; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, mu ; CHECK-NEXT: vse64.v v8, (a0), v0.t ; CHECK-NEXT: ret - call void @llvm.vp.store.nxv2f64( %val, * %ptr, %m, i32 %evl) + call void @llvm.vp.store.nxv2f64.p0nxv2f64( %val, * %ptr, %m, i32 %evl) ret void } -declare void @llvm.vp.store.nxv4f64(, *, , i32) +declare void @llvm.vp.store.nxv4f64.p0nxv4f64(, *, , i32) define void @vpstore_nxv4f64( %val, * %ptr, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpstore_nxv4f64: @@ -324,11 +324,11 @@ define void @vpstore_nxv4f64( %val, * ; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, mu ; CHECK-NEXT: vse64.v v8, (a0), v0.t ; CHECK-NEXT: ret - call void @llvm.vp.store.nxv4f64( %val, * %ptr, %m, i32 %evl) + call void @llvm.vp.store.nxv4f64.p0nxv4f64( %val, * %ptr, %m, i32 %evl) ret void } -declare void @llvm.vp.store.nxv8f64(, *, , i32) +declare void @llvm.vp.store.nxv8f64.p0nxv8f64(, *, , i32) define void @vpstore_nxv8f64( %val, * %ptr, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpstore_nxv8f64: @@ -336,7 +336,7 @@ define void @vpstore_nxv8f64( %val, * ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, mu ; CHECK-NEXT: vse64.v v8, (a0), v0.t ; CHECK-NEXT: ret - call void @llvm.vp.store.nxv8f64( %val, * %ptr, %m, i32 %evl) + call void @llvm.vp.store.nxv8f64.p0nxv8f64( %val, * %ptr, %m, i32 %evl) ret void } @@ -348,6 +348,6 @@ define void @vpstore_nxv1i8_allones_mask( %val, undef, i1 true, i32 0 %b = shufflevector %a, poison, zeroinitializer - call void @llvm.vp.store.nxv1i8( %val, * %ptr, %b, i32 %evl) + call void @llvm.vp.store.nxv1i8.p0nxv1i8( %val, * %ptr, %b, i32 %evl) ret void } diff --git a/llvm/test/CodeGen/RISCV/rvv/vreductions-mask-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vreductions-mask-vp.ll index 1d8797daa6ebc..29aed36b82a10 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vreductions-mask-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vreductions-mask-vp.ll @@ -10,7 +10,7 @@ define signext i1 @vpreduce_and_nxv1i1(i1 signext %s, %v, %v, %v, %v, %v, %v, %v, %v, %v, %v, %v, %v, %v, %v, %v, %v, %v, %v, %v, %v, %v, %v) { ; CHECK-LABEL: vreduce_or_nxv1i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, mu -; CHECK-NEXT: vpopc.m a0, v0 +; CHECK-NEXT: vcpop.m a0, v0 ; CHECK-NEXT: snez a0, a0 ; CHECK-NEXT: neg a0, a0 ; CHECK-NEXT: ret @@ -22,7 +22,7 @@ define signext i1 @vreduce_xor_nxv1i1( %v) { ; CHECK-LABEL: vreduce_xor_nxv1i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, mu -; CHECK-NEXT: vpopc.m a0, v0 +; CHECK-NEXT: vcpop.m a0, v0 ; CHECK-NEXT: andi a0, a0, 1 ; CHECK-NEXT: neg a0, a0 ; CHECK-NEXT: ret @@ -37,7 +37,7 @@ define signext i1 @vreduce_and_nxv1i1( %v) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, mu ; CHECK-NEXT: vmnand.mm v8, v0, v0 -; CHECK-NEXT: vpopc.m a0, v8 +; CHECK-NEXT: vcpop.m a0, v8 ; CHECK-NEXT: seqz a0, a0 ; CHECK-NEXT: neg a0, a0 ; CHECK-NEXT: ret @@ -51,7 +51,7 @@ define signext i1 @vreduce_or_nxv2i1( %v) { ; CHECK-LABEL: vreduce_or_nxv2i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, mu -; CHECK-NEXT: vpopc.m a0, v0 +; CHECK-NEXT: vcpop.m a0, v0 ; CHECK-NEXT: snez a0, a0 ; CHECK-NEXT: neg a0, a0 ; CHECK-NEXT: ret @@ -65,7 +65,7 @@ define signext i1 @vreduce_xor_nxv2i1( %v) { ; CHECK-LABEL: vreduce_xor_nxv2i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, mu -; CHECK-NEXT: vpopc.m a0, v0 +; CHECK-NEXT: vcpop.m a0, v0 ; CHECK-NEXT: andi a0, a0, 1 ; CHECK-NEXT: neg a0, a0 ; CHECK-NEXT: ret @@ -80,7 +80,7 @@ define signext i1 @vreduce_and_nxv2i1( %v) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, mu ; CHECK-NEXT: vmnand.mm v8, v0, v0 -; CHECK-NEXT: vpopc.m a0, v8 +; CHECK-NEXT: vcpop.m a0, v8 ; CHECK-NEXT: seqz a0, a0 ; CHECK-NEXT: neg a0, a0 ; CHECK-NEXT: ret @@ -94,7 +94,7 @@ define signext i1 @vreduce_or_nxv4i1( %v) { ; CHECK-LABEL: vreduce_or_nxv4i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, mu -; CHECK-NEXT: vpopc.m a0, v0 +; CHECK-NEXT: vcpop.m a0, v0 ; CHECK-NEXT: snez a0, a0 ; CHECK-NEXT: neg a0, a0 ; CHECK-NEXT: ret @@ -108,7 +108,7 @@ define signext i1 @vreduce_xor_nxv4i1( %v) { ; CHECK-LABEL: vreduce_xor_nxv4i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, mu -; CHECK-NEXT: vpopc.m a0, v0 +; CHECK-NEXT: vcpop.m a0, v0 ; CHECK-NEXT: andi a0, a0, 1 ; CHECK-NEXT: neg a0, a0 ; CHECK-NEXT: ret @@ -123,7 +123,7 @@ define signext i1 @vreduce_and_nxv4i1( %v) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, mu ; CHECK-NEXT: vmnand.mm v8, v0, v0 -; CHECK-NEXT: vpopc.m a0, v8 +; CHECK-NEXT: vcpop.m a0, v8 ; CHECK-NEXT: seqz a0, a0 ; CHECK-NEXT: neg a0, a0 ; CHECK-NEXT: ret @@ -137,7 +137,7 @@ define signext i1 @vreduce_or_nxv8i1( %v) { ; CHECK-LABEL: vreduce_or_nxv8i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, mu -; CHECK-NEXT: vpopc.m a0, v0 +; CHECK-NEXT: vcpop.m a0, v0 ; CHECK-NEXT: snez a0, a0 ; CHECK-NEXT: neg a0, a0 ; CHECK-NEXT: ret @@ -151,7 +151,7 @@ define signext i1 @vreduce_xor_nxv8i1( %v) { ; CHECK-LABEL: vreduce_xor_nxv8i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, mu -; CHECK-NEXT: vpopc.m a0, v0 +; CHECK-NEXT: vcpop.m a0, v0 ; CHECK-NEXT: andi a0, a0, 1 ; CHECK-NEXT: neg a0, a0 ; CHECK-NEXT: ret @@ -166,7 +166,7 @@ define signext i1 @vreduce_and_nxv8i1( %v) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, mu ; CHECK-NEXT: vmnand.mm v8, v0, v0 -; CHECK-NEXT: vpopc.m a0, v8 +; CHECK-NEXT: vcpop.m a0, v8 ; CHECK-NEXT: seqz a0, a0 ; CHECK-NEXT: neg a0, a0 ; CHECK-NEXT: ret @@ -180,7 +180,7 @@ define signext i1 @vreduce_or_nxv16i1( %v) { ; CHECK-LABEL: vreduce_or_nxv16i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e8, m2, ta, mu -; CHECK-NEXT: vpopc.m a0, v0 +; CHECK-NEXT: vcpop.m a0, v0 ; CHECK-NEXT: snez a0, a0 ; CHECK-NEXT: neg a0, a0 ; CHECK-NEXT: ret @@ -194,7 +194,7 @@ define signext i1 @vreduce_xor_nxv16i1( %v) { ; CHECK-LABEL: vreduce_xor_nxv16i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e8, m2, ta, mu -; CHECK-NEXT: vpopc.m a0, v0 +; CHECK-NEXT: vcpop.m a0, v0 ; CHECK-NEXT: andi a0, a0, 1 ; CHECK-NEXT: neg a0, a0 ; CHECK-NEXT: ret @@ -209,7 +209,7 @@ define signext i1 @vreduce_and_nxv16i1( %v) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e8, m2, ta, mu ; CHECK-NEXT: vmnand.mm v8, v0, v0 -; CHECK-NEXT: vpopc.m a0, v8 +; CHECK-NEXT: vcpop.m a0, v8 ; CHECK-NEXT: seqz a0, a0 ; CHECK-NEXT: neg a0, a0 ; CHECK-NEXT: ret @@ -223,7 +223,7 @@ define signext i1 @vreduce_or_nxv32i1( %v) { ; CHECK-LABEL: vreduce_or_nxv32i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e8, m4, ta, mu -; CHECK-NEXT: vpopc.m a0, v0 +; CHECK-NEXT: vcpop.m a0, v0 ; CHECK-NEXT: snez a0, a0 ; CHECK-NEXT: neg a0, a0 ; CHECK-NEXT: ret @@ -237,7 +237,7 @@ define signext i1 @vreduce_xor_nxv32i1( %v) { ; CHECK-LABEL: vreduce_xor_nxv32i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e8, m4, ta, mu -; CHECK-NEXT: vpopc.m a0, v0 +; CHECK-NEXT: vcpop.m a0, v0 ; CHECK-NEXT: andi a0, a0, 1 ; CHECK-NEXT: neg a0, a0 ; CHECK-NEXT: ret @@ -252,7 +252,7 @@ define signext i1 @vreduce_and_nxv32i1( %v) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e8, m4, ta, mu ; CHECK-NEXT: vmnand.mm v8, v0, v0 -; CHECK-NEXT: vpopc.m a0, v8 +; CHECK-NEXT: vcpop.m a0, v8 ; CHECK-NEXT: seqz a0, a0 ; CHECK-NEXT: neg a0, a0 ; CHECK-NEXT: ret @@ -266,7 +266,7 @@ define signext i1 @vreduce_or_nxv64i1( %v) { ; CHECK-LABEL: vreduce_or_nxv64i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e8, m8, ta, mu -; CHECK-NEXT: vpopc.m a0, v0 +; CHECK-NEXT: vcpop.m a0, v0 ; CHECK-NEXT: snez a0, a0 ; CHECK-NEXT: neg a0, a0 ; CHECK-NEXT: ret @@ -280,7 +280,7 @@ define signext i1 @vreduce_xor_nxv64i1( %v) { ; CHECK-LABEL: vreduce_xor_nxv64i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e8, m8, ta, mu -; CHECK-NEXT: vpopc.m a0, v0 +; CHECK-NEXT: vcpop.m a0, v0 ; CHECK-NEXT: andi a0, a0, 1 ; CHECK-NEXT: neg a0, a0 ; CHECK-NEXT: ret @@ -295,7 +295,7 @@ define signext i1 @vreduce_and_nxv64i1( %v) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e8, m8, ta, mu ; CHECK-NEXT: vmnand.mm v8, v0, v0 -; CHECK-NEXT: vpopc.m a0, v8 +; CHECK-NEXT: vcpop.m a0, v8 ; CHECK-NEXT: seqz a0, a0 ; CHECK-NEXT: neg a0, a0 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/vselect-mask.ll b/llvm/test/CodeGen/RISCV/rvv/vselect-mask.ll index 0862603c3200d..bc8e96ec31258 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vselect-mask.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vselect-mask.ll @@ -6,7 +6,7 @@ define @vselect_nxv1i1( %a, ; CHECK-LABEL: vselect_nxv1i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, mu -; CHECK-NEXT: vmandnot.mm v8, v8, v9 +; CHECK-NEXT: vmandn.mm v8, v8, v9 ; CHECK-NEXT: vmand.mm v9, v0, v9 ; CHECK-NEXT: vmor.mm v0, v9, v8 ; CHECK-NEXT: ret @@ -18,7 +18,7 @@ define @vselect_nxv2i1( %a, ; CHECK-LABEL: vselect_nxv2i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, mu -; CHECK-NEXT: vmandnot.mm v8, v8, v9 +; CHECK-NEXT: vmandn.mm v8, v8, v9 ; CHECK-NEXT: vmand.mm v9, v0, v9 ; CHECK-NEXT: vmor.mm v0, v9, v8 ; CHECK-NEXT: ret @@ -30,7 +30,7 @@ define @vselect_nxv4i1( %a, ; CHECK-LABEL: vselect_nxv4i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, mu -; CHECK-NEXT: vmandnot.mm v8, v8, v9 +; CHECK-NEXT: vmandn.mm v8, v8, v9 ; CHECK-NEXT: vmand.mm v9, v0, v9 ; CHECK-NEXT: vmor.mm v0, v9, v8 ; CHECK-NEXT: ret @@ -42,7 +42,7 @@ define @vselect_nxv8i1( %a, ; CHECK-LABEL: vselect_nxv8i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, mu -; CHECK-NEXT: vmandnot.mm v8, v8, v9 +; CHECK-NEXT: vmandn.mm v8, v8, v9 ; CHECK-NEXT: vmand.mm v9, v0, v9 ; CHECK-NEXT: vmor.mm v0, v9, v8 ; CHECK-NEXT: ret @@ -54,7 +54,7 @@ define @vselect_nxv16i1( %a, @vselect_nxv32i1( %a, @vselect_nxv64i1( %a, %d } - define void @vsetvli_vpopc() { + define void @vsetvli_vcpop() { ret void } @@ -442,7 +442,7 @@ body: | ... --- -name: vsetvli_vpopc +name: vsetvli_vcpop tracksRegLiveness: true registers: - { id: 0, class: gpr, preferred-register: '' } @@ -458,7 +458,7 @@ registers: - { id: 10, class: gpr, preferred-register: '' } - { id: 11, class: vr, preferred-register: '' } body: | - ; CHECK-LABEL: name: vsetvli_vpopc + ; CHECK-LABEL: name: vsetvli_vcpop ; CHECK: bb.0: ; CHECK-NEXT: successors: %bb.1(0x80000000) ; CHECK-NEXT: liveins: $x10, $x11 @@ -479,9 +479,9 @@ body: | ; CHECK-NEXT: dead $x0 = PseudoVSETVLIX0 killed $x0, 23, implicit-def $vl, implicit-def $vtype, implicit $vl ; CHECK-NEXT: [[PseudoVLE32_V_MF2_MASK:%[0-9]+]]:vrnov0 = PseudoVLE32_V_MF2_MASK [[PseudoVMV_V_I_MF2_]], killed [[COPY]], $v0, -1, 5, 0, implicit $vl, implicit $vtype ; CHECK-NEXT: dead $x0 = PseudoVSETVLIX0 killed $x0, 69, implicit-def $vl, implicit-def $vtype, implicit $vl - ; CHECK-NEXT: [[PseudoVPOPC_M_B1_:%[0-9]+]]:gpr = PseudoVPOPC_M_B1 [[PseudoVMSEQ_VI_MF2_]], -1, 0, implicit $vl, implicit $vtype + ; CHECK-NEXT: [[PseudoVCPOP_M_B1_:%[0-9]+]]:gpr = PseudoVCPOP_M_B1 [[PseudoVMSEQ_VI_MF2_]], -1, 0, implicit $vl, implicit $vtype ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr = COPY $x0 - ; CHECK-NEXT: BEQ killed [[PseudoVPOPC_M_B1_]], [[COPY2]], %bb.3 + ; CHECK-NEXT: BEQ killed [[PseudoVCPOP_M_B1_]], [[COPY2]], %bb.3 ; CHECK-NEXT: PseudoBR %bb.2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: @@ -511,7 +511,7 @@ body: | %5:vmv0 = PseudoVMSEQ_VI_MF2 killed %3, 0, -1, 5 $v0 = COPY %5 %6:vrnov0 = PseudoVLE32_V_MF2_MASK %4, killed %0, $v0, -1, 5, 0 - %7:gpr = PseudoVPOPC_M_B1 %5, -1, 0 + %7:gpr = PseudoVCPOP_M_B1 %5, -1, 0 %8:gpr = COPY $x0 BEQ killed %7, %8, %bb.3 PseudoBR %bb.2 diff --git a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.ll b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.ll index 585a819ea04a4..420bb3473ed3f 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.ll @@ -93,3 +93,60 @@ entry: } declare @llvm.riscv.vmseq.nxv1i64.i64(, , i64) declare @llvm.riscv.vmand.nxv1i1.i64(, , i64) + +; FIXME: There shouldn't be a vsetvli before the vmor. +define void @test6(i32* nocapture readonly %A, i32* nocapture %B, i64 %n) { +; CHECK-LABEL: test6: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a3, a2, e32, m1, ta, mu +; CHECK-NEXT: beqz a3, .LBB5_3 +; CHECK-NEXT: # %bb.1: # %for.body.preheader +; CHECK-NEXT: mv a4, zero +; CHECK-NEXT: .LBB5_2: # %for.body +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: slli a6, a4, 2 +; CHECK-NEXT: add a5, a0, a6 +; CHECK-NEXT: vle32.v v8, (a5) +; CHECK-NEXT: vmsle.vi v9, v8, -3 +; CHECK-NEXT: vmsgt.vi v10, v8, 2 +; CHECK-NEXT: vsetvli zero, a3, e8, mf4, ta, mu +; CHECK-NEXT: vmor.mm v0, v9, v10 +; CHECK-NEXT: add a5, a1, a6 +; CHECK-NEXT: vse32.v v8, (a5), v0.t +; CHECK-NEXT: add a4, a4, a3 +; CHECK-NEXT: vsetvli a3, a2, e32, m1, ta, mu +; CHECK-NEXT: bnez a3, .LBB5_2 +; CHECK-NEXT: .LBB5_3: # %for.cond.cleanup +; CHECK-NEXT: ret +entry: + %0 = tail call i64 @llvm.riscv.vsetvli.i64(i64 %n, i64 2, i64 0) + %cmp.not11 = icmp eq i64 %0, 0 + br i1 %cmp.not11, label %for.cond.cleanup, label %for.body + +for.cond.cleanup: ; preds = %for.body, %entry + ret void + +for.body: ; preds = %entry, %for.body + %1 = phi i64 [ %8, %for.body ], [ %0, %entry ] + %i.012 = phi i64 [ %add, %for.body ], [ 0, %entry ] + %add.ptr = getelementptr inbounds i32, i32* %A, i64 %i.012 + %2 = bitcast i32* %add.ptr to * + %3 = tail call @llvm.riscv.vle.nxv2i32.i64(* %2, i64 %1) + %4 = tail call @llvm.riscv.vmslt.nxv2i32.i32.i64( %3, i32 -2, i64 %1) + %5 = tail call @llvm.riscv.vmsgt.nxv2i32.i32.i64( %3, i32 2, i64 %1) + %6 = tail call @llvm.riscv.vmor.nxv2i1.i64( %4, %5, i64 %1) + %add.ptr1 = getelementptr inbounds i32, i32* %B, i64 %i.012 + %7 = bitcast i32* %add.ptr1 to * + tail call void @llvm.riscv.vse.mask.nxv2i32.i64( %3, * %7, %6, i64 %1) + %add = add i64 %1, %i.012 + %8 = tail call i64 @llvm.riscv.vsetvli.i64(i64 %n, i64 2, i64 0) + %cmp.not = icmp eq i64 %8, 0 + br i1 %cmp.not, label %for.cond.cleanup, label %for.body +} + +declare i64 @llvm.riscv.vsetvli.i64(i64, i64 immarg, i64 immarg) +declare @llvm.riscv.vle.nxv2i32.i64(* nocapture, i64) +declare @llvm.riscv.vmslt.nxv2i32.i32.i64(, i32, i64) +declare @llvm.riscv.vmsgt.nxv2i32.i32.i64(, i32, i64) +declare @llvm.riscv.vmor.nxv2i1.i64(, , i64) +declare void @llvm.riscv.vse.mask.nxv2i32.i64(, * nocapture, , i64) diff --git a/llvm/test/CodeGen/RISCV/shifts.ll b/llvm/test/CodeGen/RISCV/shifts.ll index 62644de177e4d..5b67ca0f56164 100644 --- a/llvm/test/CodeGen/RISCV/shifts.ll +++ b/llvm/test/CodeGen/RISCV/shifts.ll @@ -571,34 +571,25 @@ define i128 @shl128(i128 %a, i128 %b) nounwind { define i64 @fshr64_minsize(i64 %a, i64 %b) minsize nounwind { ; RV32I-LABEL: fshr64_minsize: ; RV32I: # %bb.0: -; RV32I-NEXT: addi sp, sp, -32 -; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s0, a2 -; RV32I-NEXT: mv s2, a1 -; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: andi a2, a2, 63 -; RV32I-NEXT: call __lshrdi3@plt -; RV32I-NEXT: mv s3, a0 -; RV32I-NEXT: mv s4, a1 -; RV32I-NEXT: neg a0, s0 -; RV32I-NEXT: andi a2, a0, 63 -; RV32I-NEXT: mv a0, s1 -; RV32I-NEXT: mv a1, s2 -; RV32I-NEXT: call __ashldi3@plt -; RV32I-NEXT: or a0, s3, a0 -; RV32I-NEXT: or a1, s4, a1 -; RV32I-NEXT: lw s4, 8(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload -; RV32I-NEXT: addi sp, sp, 32 +; RV32I-NEXT: andi a4, a2, 32 +; RV32I-NEXT: mv a3, a0 +; RV32I-NEXT: beqz a4, .LBB9_2 +; RV32I-NEXT: # %bb.1: +; RV32I-NEXT: mv a3, a1 +; RV32I-NEXT: .LBB9_2: +; RV32I-NEXT: srl a5, a3, a2 +; RV32I-NEXT: beqz a4, .LBB9_4 +; RV32I-NEXT: # %bb.3: +; RV32I-NEXT: mv a1, a0 +; RV32I-NEXT: .LBB9_4: +; RV32I-NEXT: slli a0, a1, 1 +; RV32I-NEXT: not a4, a2 +; RV32I-NEXT: sll a0, a0, a4 +; RV32I-NEXT: or a0, a0, a5 +; RV32I-NEXT: srl a1, a1, a2 +; RV32I-NEXT: slli a2, a3, 1 +; RV32I-NEXT: sll a2, a2, a4 +; RV32I-NEXT: or a1, a2, a1 ; RV32I-NEXT: ret ; ; RV64I-LABEL: fshr64_minsize: @@ -615,182 +606,92 @@ define i64 @fshr64_minsize(i64 %a, i64 %b) minsize nounwind { define i128 @fshr128_minsize(i128 %a, i128 %b) minsize nounwind { ; RV32I-LABEL: fshr128_minsize: ; RV32I: # %bb.0: -; RV32I-NEXT: addi sp, sp, -64 -; RV32I-NEXT: sw ra, 60(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s0, 56(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s1, 52(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s2, 48(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s3, 44(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s4, 40(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s5, 36(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s6, 32(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s7, 28(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s8, 24(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s9, 20(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s10, 16(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s11, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: lw s5, 0(a1) -; RV32I-NEXT: lw s6, 4(a1) -; RV32I-NEXT: lw s4, 8(a1) -; RV32I-NEXT: lw s3, 12(a1) -; RV32I-NEXT: lw s11, 0(a2) -; RV32I-NEXT: mv s2, a0 -; RV32I-NEXT: andi s0, s11, 127 -; RV32I-NEXT: addi a2, s0, -64 -; RV32I-NEXT: mv a0, s4 -; RV32I-NEXT: mv a1, s3 -; RV32I-NEXT: call __lshrdi3@plt -; RV32I-NEXT: mv s8, a0 -; RV32I-NEXT: sw a1, 8(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv a0, s5 -; RV32I-NEXT: mv a1, s6 -; RV32I-NEXT: mv a2, s0 -; RV32I-NEXT: call __lshrdi3@plt -; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: sw a1, 0(sp) # 4-byte Folded Spill -; RV32I-NEXT: addi s9, zero, 64 -; RV32I-NEXT: sub a2, s9, s0 -; RV32I-NEXT: mv a0, s4 -; RV32I-NEXT: mv a1, s3 -; RV32I-NEXT: call __ashldi3@plt -; RV32I-NEXT: mv s10, a1 -; RV32I-NEXT: bgeu s0, s9, .LBB10_2 +; RV32I-NEXT: lw t2, 8(a1) +; RV32I-NEXT: lw a3, 0(a1) +; RV32I-NEXT: lw a2, 0(a2) +; RV32I-NEXT: lw a7, 4(a1) +; RV32I-NEXT: lw t1, 12(a1) +; RV32I-NEXT: andi a1, a2, 64 +; RV32I-NEXT: mv a5, a7 +; RV32I-NEXT: mv a6, a3 +; RV32I-NEXT: beqz a1, .LBB10_2 ; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: or s8, s1, a0 +; RV32I-NEXT: mv a5, t1 +; RV32I-NEXT: mv a6, t2 ; RV32I-NEXT: .LBB10_2: -; RV32I-NEXT: mv s7, s5 -; RV32I-NEXT: beqz s0, .LBB10_4 +; RV32I-NEXT: andi a4, a2, 32 +; RV32I-NEXT: mv t0, a6 +; RV32I-NEXT: bnez a4, .LBB10_13 ; RV32I-NEXT: # %bb.3: -; RV32I-NEXT: mv s7, s8 +; RV32I-NEXT: bnez a1, .LBB10_14 ; RV32I-NEXT: .LBB10_4: -; RV32I-NEXT: neg a0, s11 -; RV32I-NEXT: andi s1, a0, 127 -; RV32I-NEXT: mv a0, s5 -; RV32I-NEXT: mv a1, s6 -; RV32I-NEXT: mv a2, s1 -; RV32I-NEXT: call __ashldi3@plt -; RV32I-NEXT: sw a1, 4(sp) # 4-byte Folded Spill -; RV32I-NEXT: bgeu s1, s9, .LBB10_6 -; RV32I-NEXT: # %bb.5: -; RV32I-NEXT: or s7, s7, a0 +; RV32I-NEXT: beqz a4, .LBB10_6 +; RV32I-NEXT: .LBB10_5: +; RV32I-NEXT: mv a5, t2 ; RV32I-NEXT: .LBB10_6: -; RV32I-NEXT: bltu s0, s9, .LBB10_8 +; RV32I-NEXT: slli t3, a5, 1 +; RV32I-NEXT: not a3, a2 +; RV32I-NEXT: beqz a1, .LBB10_8 ; RV32I-NEXT: # %bb.7: -; RV32I-NEXT: lw a0, 8(sp) # 4-byte Folded Reload -; RV32I-NEXT: j .LBB10_9 +; RV32I-NEXT: mv t1, a7 ; RV32I-NEXT: .LBB10_8: -; RV32I-NEXT: lw a0, 0(sp) # 4-byte Folded Reload -; RV32I-NEXT: or a0, a0, s10 -; RV32I-NEXT: .LBB10_9: -; RV32I-NEXT: mv s8, s6 -; RV32I-NEXT: beqz s0, .LBB10_11 -; RV32I-NEXT: # %bb.10: -; RV32I-NEXT: mv s8, a0 -; RV32I-NEXT: .LBB10_11: -; RV32I-NEXT: sub a2, s9, s1 -; RV32I-NEXT: mv a0, s5 -; RV32I-NEXT: mv a1, s6 -; RV32I-NEXT: call __lshrdi3@plt -; RV32I-NEXT: mv s10, a0 -; RV32I-NEXT: sw a1, 8(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv a0, s4 -; RV32I-NEXT: mv a1, s3 -; RV32I-NEXT: mv a2, s1 -; RV32I-NEXT: call __ashldi3@plt -; RV32I-NEXT: mv s11, a0 -; RV32I-NEXT: sw a1, 0(sp) # 4-byte Folded Spill -; RV32I-NEXT: addi a2, s1, -64 -; RV32I-NEXT: mv a0, s5 -; RV32I-NEXT: mv a1, s6 -; RV32I-NEXT: call __ashldi3@plt -; RV32I-NEXT: mv s5, a1 -; RV32I-NEXT: bgeu s1, s9, .LBB10_13 -; RV32I-NEXT: # %bb.12: -; RV32I-NEXT: lw a0, 4(sp) # 4-byte Folded Reload -; RV32I-NEXT: or s8, s8, a0 -; RV32I-NEXT: or a0, s11, s10 -; RV32I-NEXT: .LBB10_13: -; RV32I-NEXT: mv s6, s4 -; RV32I-NEXT: beqz s1, .LBB10_15 -; RV32I-NEXT: # %bb.14: -; RV32I-NEXT: mv s6, a0 -; RV32I-NEXT: .LBB10_15: -; RV32I-NEXT: mv a0, s4 -; RV32I-NEXT: mv a1, s3 -; RV32I-NEXT: mv a2, s0 -; RV32I-NEXT: call __lshrdi3@plt -; RV32I-NEXT: bltu s0, s9, .LBB10_21 -; RV32I-NEXT: # %bb.16: -; RV32I-NEXT: bltu s1, s9, .LBB10_22 -; RV32I-NEXT: .LBB10_17: -; RV32I-NEXT: bnez s1, .LBB10_23 -; RV32I-NEXT: .LBB10_18: -; RV32I-NEXT: bgeu s0, s9, .LBB10_20 -; RV32I-NEXT: .LBB10_19: -; RV32I-NEXT: or s3, s3, a1 -; RV32I-NEXT: .LBB10_20: -; RV32I-NEXT: sw s8, 4(s2) -; RV32I-NEXT: sw s7, 0(s2) -; RV32I-NEXT: sw s3, 12(s2) -; RV32I-NEXT: sw s6, 8(s2) -; RV32I-NEXT: lw s11, 12(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s10, 16(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s9, 20(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s8, 24(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s7, 28(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s6, 32(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s5, 36(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s4, 40(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s3, 44(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s2, 48(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s1, 52(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s0, 56(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw ra, 60(sp) # 4-byte Folded Reload -; RV32I-NEXT: addi sp, sp, 64 +; RV32I-NEXT: srl a7, t0, a2 +; RV32I-NEXT: sll a1, t3, a3 +; RV32I-NEXT: srl a5, a5, a2 +; RV32I-NEXT: beqz a4, .LBB10_10 +; RV32I-NEXT: # %bb.9: +; RV32I-NEXT: mv t2, t1 +; RV32I-NEXT: .LBB10_10: +; RV32I-NEXT: or a7, a1, a7 +; RV32I-NEXT: slli a1, t2, 1 +; RV32I-NEXT: sll a1, a1, a3 +; RV32I-NEXT: or a5, a1, a5 +; RV32I-NEXT: srl a1, t2, a2 +; RV32I-NEXT: beqz a4, .LBB10_12 +; RV32I-NEXT: # %bb.11: +; RV32I-NEXT: mv t1, a6 +; RV32I-NEXT: .LBB10_12: +; RV32I-NEXT: slli a4, t1, 1 +; RV32I-NEXT: sll a4, a4, a3 +; RV32I-NEXT: or a1, a4, a1 +; RV32I-NEXT: srl a2, t1, a2 +; RV32I-NEXT: slli a4, t0, 1 +; RV32I-NEXT: sll a3, a4, a3 +; RV32I-NEXT: or a2, a3, a2 +; RV32I-NEXT: sw a2, 12(a0) +; RV32I-NEXT: sw a1, 8(a0) +; RV32I-NEXT: sw a5, 4(a0) +; RV32I-NEXT: sw a7, 0(a0) ; RV32I-NEXT: ret -; RV32I-NEXT: .LBB10_21: -; RV32I-NEXT: or s6, s6, a0 -; RV32I-NEXT: bgeu s1, s9, .LBB10_17 -; RV32I-NEXT: .LBB10_22: -; RV32I-NEXT: lw a0, 8(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw a2, 0(sp) # 4-byte Folded Reload -; RV32I-NEXT: or s5, a2, a0 -; RV32I-NEXT: beqz s1, .LBB10_18 -; RV32I-NEXT: .LBB10_23: -; RV32I-NEXT: mv s3, s5 -; RV32I-NEXT: bltu s0, s9, .LBB10_19 -; RV32I-NEXT: j .LBB10_20 +; RV32I-NEXT: .LBB10_13: +; RV32I-NEXT: mv t0, a5 +; RV32I-NEXT: beqz a1, .LBB10_4 +; RV32I-NEXT: .LBB10_14: +; RV32I-NEXT: mv t2, a3 +; RV32I-NEXT: bnez a4, .LBB10_5 +; RV32I-NEXT: j .LBB10_6 ; ; RV64I-LABEL: fshr128_minsize: ; RV64I: # %bb.0: -; RV64I-NEXT: addi sp, sp, -48 -; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s4, 0(sp) # 8-byte Folded Spill -; RV64I-NEXT: mv s0, a2 -; RV64I-NEXT: mv s2, a1 -; RV64I-NEXT: mv s1, a0 -; RV64I-NEXT: andi a2, a2, 127 -; RV64I-NEXT: call __lshrti3@plt -; RV64I-NEXT: mv s3, a0 -; RV64I-NEXT: mv s4, a1 -; RV64I-NEXT: neg a0, s0 -; RV64I-NEXT: andi a2, a0, 127 -; RV64I-NEXT: mv a0, s1 -; RV64I-NEXT: mv a1, s2 -; RV64I-NEXT: call __ashlti3@plt -; RV64I-NEXT: or a0, s3, a0 -; RV64I-NEXT: or a1, s4, a1 -; RV64I-NEXT: ld s4, 0(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 48 +; RV64I-NEXT: andi a4, a2, 64 +; RV64I-NEXT: mv a3, a0 +; RV64I-NEXT: beqz a4, .LBB10_2 +; RV64I-NEXT: # %bb.1: +; RV64I-NEXT: mv a3, a1 +; RV64I-NEXT: .LBB10_2: +; RV64I-NEXT: srl a5, a3, a2 +; RV64I-NEXT: beqz a4, .LBB10_4 +; RV64I-NEXT: # %bb.3: +; RV64I-NEXT: mv a1, a0 +; RV64I-NEXT: .LBB10_4: +; RV64I-NEXT: slli a0, a1, 1 +; RV64I-NEXT: not a4, a2 +; RV64I-NEXT: sll a0, a0, a4 +; RV64I-NEXT: or a0, a0, a5 +; RV64I-NEXT: srl a1, a1, a2 +; RV64I-NEXT: slli a2, a3, 1 +; RV64I-NEXT: sll a2, a2, a4 +; RV64I-NEXT: or a1, a2, a1 ; RV64I-NEXT: ret %res = tail call i128 @llvm.fshr.i128(i128 %a, i128 %a, i128 %b) ret i128 %res diff --git a/llvm/test/CodeGen/Thumb/emergency-spill-slot.ll b/llvm/test/CodeGen/Thumb/emergency-spill-slot.ll index f61390c9f81fa..5661c82326bb4 100644 --- a/llvm/test/CodeGen/Thumb/emergency-spill-slot.ll +++ b/llvm/test/CodeGen/Thumb/emergency-spill-slot.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s | FileCheck %s +; RUN: llc -early-live-intervals < %s | FileCheck %s target triple = "thumbv6m-unknown-unknown-eabi" define void @vla_emergency_spill(i32 %n) { diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/arm_cmplx_dot_prod_f32.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/arm_cmplx_dot_prod_f32.ll index 08bdd6a7d5411..ab95baeedf610 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/arm_cmplx_dot_prod_f32.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/arm_cmplx_dot_prod_f32.ll @@ -44,8 +44,8 @@ define void @arm_cmplx_dot_prod_f32(float* %pSrcA, float* %pSrcB, i32 %numSample ; CHECK-NEXT: vcmla.f32 q0, q2, q1, #90 ; CHECK-NEXT: cbz r2, .LBB0_8 ; CHECK-NEXT: @ %bb.4: @ %while.body9 -; CHECK-NEXT: cmp r2, #4 ; CHECK-NEXT: vctp.32 r2 +; CHECK-NEXT: cmp r2, #4 ; CHECK-NEXT: vpstttt ; CHECK-NEXT: vldrwt.u32 q1, [r1] ; CHECK-NEXT: vldrwt.u32 q2, [r0] diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/cond-vector-reduce-mve-codegen.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/cond-vector-reduce-mve-codegen.ll index 3de54247ad8e7..cb773718f7f5a 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/cond-vector-reduce-mve-codegen.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/cond-vector-reduce-mve-codegen.ll @@ -20,20 +20,20 @@ define dso_local i32 @vpsel_mul_reduce_add(i32* noalias nocapture readonly %a, i ; CHECK-NEXT: .LBB0_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: and r4, r12, #15 -; CHECK-NEXT: vmov q0, q1 ; CHECK-NEXT: vctp.32 r3 +; CHECK-NEXT: vmov q0, q1 ; CHECK-NEXT: vpstt ; CHECK-NEXT: vldrwt.u32 q1, [r2], #16 ; CHECK-NEXT: vldrwt.u32 q2, [r1], #16 ; CHECK-NEXT: vdup.32 q3, r4 +; CHECK-NEXT: add.w r12, r12, #4 ; CHECK-NEXT: vpt.i32 eq, q3, zr ; CHECK-NEXT: vmovt q1, q2 ; CHECK-NEXT: vctp.32 r3 ; CHECK-NEXT: vpst ; CHECK-NEXT: vldrwt.u32 q2, [r0], #16 -; CHECK-NEXT: vmul.i32 q1, q1, q2 -; CHECK-NEXT: add.w r12, r12, #4 ; CHECK-NEXT: subs r3, #4 +; CHECK-NEXT: vmul.i32 q1, q1, q2 ; CHECK-NEXT: vadd.i32 q1, q1, q0 ; CHECK-NEXT: le lr, .LBB0_2 ; CHECK-NEXT: @ %bb.3: @ %middle.block @@ -101,22 +101,22 @@ define dso_local i32 @vpsel_mul_reduce_add_2(i32* noalias nocapture readonly %a, ; CHECK-NEXT: movs r4, #0 ; CHECK-NEXT: .LBB1_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: vctp.32 r12 ; CHECK-NEXT: and r5, r4, #15 ; CHECK-NEXT: vmov q0, q1 -; CHECK-NEXT: vctp.32 r12 ; CHECK-NEXT: vpsttt ; CHECK-NEXT: vldrwt.u32 q1, [r1], #16 ; CHECK-NEXT: vldrwt.u32 q2, [r3], #16 ; CHECK-NEXT: vldrwt.u32 q3, [r2], #16 ; CHECK-NEXT: vdup.32 q4, r5 +; CHECK-NEXT: adds r4, #4 ; CHECK-NEXT: vpt.i32 eq, q4, zr ; CHECK-NEXT: vsubt.i32 q1, q3, q2 ; CHECK-NEXT: vctp.32 r12 ; CHECK-NEXT: vpst ; CHECK-NEXT: vldrwt.u32 q2, [r0], #16 -; CHECK-NEXT: vmul.i32 q1, q1, q2 -; CHECK-NEXT: adds r4, #4 ; CHECK-NEXT: sub.w r12, r12, #4 +; CHECK-NEXT: vmul.i32 q1, q1, q2 ; CHECK-NEXT: vadd.i32 q1, q1, q0 ; CHECK-NEXT: le lr, .LBB1_2 ; CHECK-NEXT: @ %bb.3: @ %middle.block @@ -200,8 +200,8 @@ define dso_local i32 @and_mul_reduce_add(i32* noalias nocapture readonly %a, i32 ; CHECK-NEXT: vldrwt.u32 q1, [r1], #16 ; CHECK-NEXT: vldrwt.u32 q2, [r0], #16 ; CHECK-NEXT: vstr p0, [sp] @ 4-byte Spill -; CHECK-NEXT: vsub.i32 q1, q2, q1 ; CHECK-NEXT: sub.w r12, r12, #4 +; CHECK-NEXT: vsub.i32 q1, q2, q1 ; CHECK-NEXT: vpsttt ; CHECK-NEXT: vcmpt.i32 eq, q1, zr ; CHECK-NEXT: vldrwt.u32 q1, [r3], #16 @@ -288,13 +288,13 @@ define dso_local i32 @or_mul_reduce_add(i32* noalias nocapture readonly %a, i32* ; CHECK-NEXT: vldrwt.u32 q1, [r1], #16 ; CHECK-NEXT: vldrwt.u32 q2, [r0], #16 ; CHECK-NEXT: vstr p0, [sp] @ 4-byte Spill -; CHECK-NEXT: vsub.i32 q1, q2, q1 ; CHECK-NEXT: vpnot +; CHECK-NEXT: vsub.i32 q1, q2, q1 +; CHECK-NEXT: sub.w r12, r12, #4 ; CHECK-NEXT: vpstee ; CHECK-NEXT: vcmpt.i32 ne, q1, zr ; CHECK-NEXT: vldrwe.u32 q1, [r3], #16 ; CHECK-NEXT: vldrwe.u32 q2, [r2], #16 -; CHECK-NEXT: sub.w r12, r12, #4 ; CHECK-NEXT: vmul.i32 q1, q2, q1 ; CHECK-NEXT: vadd.i32 q1, q1, q0 ; CHECK-NEXT: le lr, .LBB3_2 @@ -415,8 +415,9 @@ define dso_local arm_aapcs_vfpcc void @range_test(i32* noalias nocapture %arg, i ; CHECK-NEXT: .LBB5_2: @ %bb12 ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldrw.u32 q0, [r0] -; CHECK-NEXT: vptt.i32 ne, q0, zr +; CHECK-NEXT: vpt.i32 ne, q0, zr ; CHECK-NEXT: vcmpt.s32 le, q0, r2 +; CHECK-NEXT: vpst ; CHECK-NEXT: vldrwt.u32 q1, [r1], #16 ; CHECK-NEXT: vmul.i32 q0, q1, q0 ; CHECK-NEXT: vpst diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/fast-fp-loops.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/fast-fp-loops.ll index 603f667d0c615..22e2f290f2da3 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/fast-fp-loops.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/fast-fp-loops.ll @@ -214,10 +214,10 @@ define arm_aapcs_vfpcc float @fast_float_mac(float* nocapture readonly %b, float ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vctp.32 r2 ; CHECK-NEXT: subs r2, #4 -; CHECK-NEXT: vmov q1, q0 ; CHECK-NEXT: vpstt ; CHECK-NEXT: vldrwt.u32 q2, [r0], #16 ; CHECK-NEXT: vldrwt.u32 q3, [r1], #16 +; CHECK-NEXT: vmov q1, q0 ; CHECK-NEXT: vfma.f32 q0, q3, q2 ; CHECK-NEXT: le lr, .LBB1_2 ; CHECK-NEXT: @ %bb.3: @ %middle.block diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-tail-data-types.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-tail-data-types.ll index c69321daca317..0553d948d3429 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-tail-data-types.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-tail-data-types.ll @@ -21,9 +21,9 @@ define arm_aapcs_vfpcc i32 @test_acc_scalar_char(i8 zeroext %a, i8* nocapture re ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vctp.32 r2 ; CHECK-NEXT: subs r2, #4 -; CHECK-NEXT: vmov q1, q0 ; CHECK-NEXT: vpst ; CHECK-NEXT: vldrbt.u32 q2, [r1], #4 +; CHECK-NEXT: vmov q1, q0 ; CHECK-NEXT: vmla.u32 q0, q2, r0 ; CHECK-NEXT: le lr, .LBB0_2 ; CHECK-NEXT: @ %bb.3: @ %middle.block @@ -86,9 +86,9 @@ define arm_aapcs_vfpcc i32 @test_acc_scalar_short(i16 signext %a, i16* nocapture ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vctp.32 r2 ; CHECK-NEXT: subs r2, #4 -; CHECK-NEXT: vmov q1, q0 ; CHECK-NEXT: vpst ; CHECK-NEXT: vldrht.s32 q2, [r1], #8 +; CHECK-NEXT: vmov q1, q0 ; CHECK-NEXT: vmla.u32 q0, q2, r0 ; CHECK-NEXT: le lr, .LBB1_2 ; CHECK-NEXT: @ %bb.3: @ %middle.block @@ -151,9 +151,9 @@ define arm_aapcs_vfpcc i32 @test_acc_scalar_uchar(i8 zeroext %a, i8* nocapture r ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vctp.32 r2 ; CHECK-NEXT: subs r2, #4 -; CHECK-NEXT: vmov q1, q0 ; CHECK-NEXT: vpst ; CHECK-NEXT: vldrbt.u32 q2, [r1], #4 +; CHECK-NEXT: vmov q1, q0 ; CHECK-NEXT: vmla.u32 q0, q2, r0 ; CHECK-NEXT: le lr, .LBB2_2 ; CHECK-NEXT: @ %bb.3: @ %middle.block @@ -216,9 +216,9 @@ define arm_aapcs_vfpcc i32 @test_acc_scalar_ushort(i16 signext %a, i16* nocaptur ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vctp.32 r2 ; CHECK-NEXT: subs r2, #4 -; CHECK-NEXT: vmov q1, q0 ; CHECK-NEXT: vpst ; CHECK-NEXT: vldrht.u32 q2, [r1], #8 +; CHECK-NEXT: vmov q1, q0 ; CHECK-NEXT: vmla.u32 q0, q2, r0 ; CHECK-NEXT: le lr, .LBB3_2 ; CHECK-NEXT: @ %bb.3: @ %middle.block @@ -281,9 +281,9 @@ define arm_aapcs_vfpcc i32 @test_acc_scalar_int(i32 %a, i32* nocapture readonly ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vctp.32 r2 ; CHECK-NEXT: subs r2, #4 -; CHECK-NEXT: vmov q1, q0 ; CHECK-NEXT: vpst ; CHECK-NEXT: vldrwt.u32 q2, [r1], #16 +; CHECK-NEXT: vmov q1, q0 ; CHECK-NEXT: vmla.u32 q0, q2, r0 ; CHECK-NEXT: le lr, .LBB4_2 ; CHECK-NEXT: @ %bb.3: @ %middle.block diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/reductions.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/reductions.ll index d58ad4c697d68..eb98b85eafc90 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/reductions.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/reductions.ll @@ -78,12 +78,11 @@ define dso_local arm_aapcs_vfpcc signext i16 @one_loop_add_add_v8i16(i8* nocaptu ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vctp.16 r2 ; CHECK-NEXT: vmov q0, q1 -; CHECK-NEXT: vpst +; CHECK-NEXT: vpstt ; CHECK-NEXT: vldrbt.u16 q1, [r0], #8 +; CHECK-NEXT: vldrbt.u16 q2, [r1], #8 ; CHECK-NEXT: subs r2, #8 ; CHECK-NEXT: vadd.i16 q1, q0, q1 -; CHECK-NEXT: vpst -; CHECK-NEXT: vldrbt.u16 q2, [r1], #8 ; CHECK-NEXT: vadd.i16 q1, q1, q2 ; CHECK-NEXT: le lr, .LBB1_2 ; CHECK-NEXT: @ %bb.3: @ %middle.block @@ -559,9 +558,9 @@ define dso_local arm_aapcs_vfpcc void @two_reductions_mul_add_v8i16(i8* nocaptur ; CHECK-NEXT: vldrbt.u16 q1, [r3], #8 ; CHECK-NEXT: vldrbt.u16 q4, [r4], #8 ; CHECK-NEXT: vmov q2, q3 +; CHECK-NEXT: subs r2, #8 ; CHECK-NEXT: vsub.i16 q3, q4, q1 ; CHECK-NEXT: vmul.i16 q1, q4, q1 -; CHECK-NEXT: subs r2, #8 ; CHECK-NEXT: vadd.i16 q3, q3, q2 ; CHECK-NEXT: vadd.i16 q1, q1, q0 ; CHECK-NEXT: le lr, .LBB7_2 diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/spillingmove.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/spillingmove.ll index 6d8ba975ac919..6c75cd51de606 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/spillingmove.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/spillingmove.ll @@ -57,10 +57,10 @@ define void @__arm_2d_impl_rgb16_colour_filling_with_alpha(i16* noalias nocaptur ; CHECK-NEXT: @ Parent Loop BB0_2 Depth=1 ; CHECK-NEXT: @ => This Inner Loop Header: Depth=2 ; CHECK-NEXT: vctp.16 r6 +; CHECK-NEXT: subs r6, #8 ; CHECK-NEXT: vpst ; CHECK-NEXT: vldrht.u16 q0, [r5] ; CHECK-NEXT: vshr.u16 q1, q0, #3 -; CHECK-NEXT: subs r6, #8 ; CHECK-NEXT: vand q1, q1, q2 ; CHECK-NEXT: vmov q2, q4 ; CHECK-NEXT: vmla.u16 q2, q1, r2 @@ -237,10 +237,10 @@ define void @__arm_2d_impl_rgb16_colour_filling_with_alpha_sched(i16* noalias no ; CHECK-NEXT: @ Parent Loop BB1_3 Depth=1 ; CHECK-NEXT: @ => This Inner Loop Header: Depth=2 ; CHECK-NEXT: vldrh.u16 q0, [r5] -; CHECK-NEXT: vshl.i16 q1, q0, #3 -; CHECK-NEXT: vand q1, q1, q2 ; CHECK-NEXT: vmov.f64 d6, d4 ; CHECK-NEXT: vmov.f64 d7, d5 +; CHECK-NEXT: vshl.i16 q1, q0, #3 +; CHECK-NEXT: vand q1, q1, q2 ; CHECK-NEXT: vmov q2, q4 ; CHECK-NEXT: vmla.u16 q2, q1, r3 ; CHECK-NEXT: vshr.u16 q1, q0, #3 @@ -265,10 +265,10 @@ define void @__arm_2d_impl_rgb16_colour_filling_with_alpha_sched(i16* noalias no ; CHECK-NEXT: vldrw.u32 q3, [sp] @ 16-byte Reload ; CHECK-NEXT: vmov.f64 d8, d10 ; CHECK-NEXT: vmov.f64 d9, d11 -; CHECK-NEXT: vmov.f64 d10, d14 -; CHECK-NEXT: vmov.f64 d11, d15 ; CHECK-NEXT: vand q1, q1, q3 ; CHECK-NEXT: vorr q0, q0, q1 +; CHECK-NEXT: vmov.f64 d10, d14 +; CHECK-NEXT: vmov.f64 d11, d15 ; CHECK-NEXT: vstrh.16 q0, [r5], #16 ; CHECK-NEXT: letp lr, .LBB1_4 ; CHECK-NEXT: @ %bb.5: @ %for.cond3.for.cond.cleanup7_crit_edge.us diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-disabled-in-loloops.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-disabled-in-loloops.ll index 73865945cdc35..8dc18e5593a2b 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-disabled-in-loloops.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-disabled-in-loloops.ll @@ -57,13 +57,13 @@ define dso_local void @check_option(i32* noalias nocapture %A, i32* noalias noca ; DISABLED-NEXT: .LBB0_3: @ %vector.body ; DISABLED-NEXT: @ Parent Loop BB0_2 Depth=1 ; DISABLED-NEXT: @ => This Inner Loop Header: Depth=2 -; DISABLED-NEXT: mov lr, r7 ; DISABLED-NEXT: vctp.32 r6 -; DISABLED-NEXT: subs r7, #1 -; DISABLED-NEXT: subs r6, #4 +; DISABLED-NEXT: mov lr, r7 ; DISABLED-NEXT: vpstt ; DISABLED-NEXT: vldrwt.u32 q0, [r5], #16 ; DISABLED-NEXT: vldrwt.u32 q1, [r4], #16 +; DISABLED-NEXT: subs r7, #1 +; DISABLED-NEXT: subs r6, #4 ; DISABLED-NEXT: vadd.i32 q0, q1, q0 ; DISABLED-NEXT: vpst ; DISABLED-NEXT: vstrwt.32 q0, [r12], #16 diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/unpredload.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/unpredload.ll index 29174b44cd45a..888fbcc0ef106 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/unpredload.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/unpredload.ll @@ -63,9 +63,9 @@ define i32 @bad(i32* readonly %x, i32* nocapture readonly %y, i32 %n) { ; CHECK-NEXT: .LBB1_1: @ %do.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vctp.32 r2 +; CHECK-NEXT: vldrw.u32 q1, [r1], #16 ; CHECK-NEXT: vpst ; CHECK-NEXT: vldrwt.u32 q0, [r0], #16 -; CHECK-NEXT: vldrw.u32 q1, [r1], #16 ; CHECK-NEXT: subs r2, #4 ; CHECK-NEXT: vmlava.s32 r12, q0, q1 ; CHECK-NEXT: le lr, .LBB1_1 diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/varying-outer-2d-reduction.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/varying-outer-2d-reduction.ll index af5c76fd44770..df0afcfd14473 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/varying-outer-2d-reduction.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/varying-outer-2d-reduction.ll @@ -71,10 +71,10 @@ define dso_local void @varying_outer_2d_reduction(i16* nocapture readonly %Input ; ENABLED-NEXT: vldrht.s32 q1, [r0], #8 ; ENABLED-NEXT: vldrht.s32 q2, [r7], #8 ; ENABLED-NEXT: mov lr, r6 -; ENABLED-NEXT: vmul.i32 q1, q2, q1 ; ENABLED-NEXT: subs r6, #1 -; ENABLED-NEXT: vshl.s32 q1, r5 +; ENABLED-NEXT: vmul.i32 q1, q2, q1 ; ENABLED-NEXT: subs r4, #4 +; ENABLED-NEXT: vshl.s32 q1, r5 ; ENABLED-NEXT: vadd.i32 q1, q1, q0 ; ENABLED-NEXT: le lr, .LBB0_6 ; ENABLED-NEXT: @ %bb.7: @ %middle.block @@ -142,10 +142,10 @@ define dso_local void @varying_outer_2d_reduction(i16* nocapture readonly %Input ; NOREDUCTIONS-NEXT: vldrht.s32 q1, [r0], #8 ; NOREDUCTIONS-NEXT: vldrht.s32 q2, [r7], #8 ; NOREDUCTIONS-NEXT: mov lr, r6 -; NOREDUCTIONS-NEXT: vmul.i32 q1, q2, q1 ; NOREDUCTIONS-NEXT: subs r6, #1 -; NOREDUCTIONS-NEXT: vshl.s32 q1, r5 +; NOREDUCTIONS-NEXT: vmul.i32 q1, q2, q1 ; NOREDUCTIONS-NEXT: subs r4, #4 +; NOREDUCTIONS-NEXT: vshl.s32 q1, r5 ; NOREDUCTIONS-NEXT: vadd.i32 q1, q1, q0 ; NOREDUCTIONS-NEXT: le lr, .LBB0_6 ; NOREDUCTIONS-NEXT: @ %bb.7: @ %middle.block diff --git a/llvm/test/CodeGen/Thumb2/active_lane_mask.ll b/llvm/test/CodeGen/Thumb2/active_lane_mask.ll index 607a55b52370b..c39d9226bb9e4 100644 --- a/llvm/test/CodeGen/Thumb2/active_lane_mask.ll +++ b/llvm/test/CodeGen/Thumb2/active_lane_mask.ll @@ -342,11 +342,11 @@ define <16 x i8> @v16i8(i32 %index, i32 %TC, <16 x i8> %V1, <16 x i8> %V2) { ; CHECK-NEXT: add r0, sp, #88 ; CHECK-NEXT: vcmp.i8 ne, q3, zr ; CHECK-NEXT: vldr d1, [sp, #80] -; CHECK-NEXT: vldrw.u32 q1, [r0] ; CHECK-NEXT: vpnot -; CHECK-NEXT: vmov d0, r2, r3 +; CHECK-NEXT: vldrw.u32 q1, [r0] ; CHECK-NEXT: vpst ; CHECK-NEXT: vcmpt.i8 ne, q2, zr +; CHECK-NEXT: vmov d0, r2, r3 ; CHECK-NEXT: vpsel q0, q0, q1 ; CHECK-NEXT: vmov r0, r1, d0 ; CHECK-NEXT: vmov r2, r3, d1 diff --git a/llvm/test/CodeGen/Thumb2/mve-blockplacement.ll b/llvm/test/CodeGen/Thumb2/mve-blockplacement.ll index d56d7599652c8..611b02be5b3b0 100644 --- a/llvm/test/CodeGen/Thumb2/mve-blockplacement.ll +++ b/llvm/test/CodeGen/Thumb2/mve-blockplacement.ll @@ -453,8 +453,8 @@ define i32 @d(i64 %e, i32 %f, i64 %g, i32 %h) { ; CHECK-NEXT: @ Parent Loop BB1_8 Depth=2 ; CHECK-NEXT: @ => This Inner Loop Header: Depth=3 ; CHECK-NEXT: vshl.i32 q2, q1, #2 -; CHECK-NEXT: vadd.i32 q1, q1, q6 ; CHECK-NEXT: vadd.i32 q2, q2, r10 +; CHECK-NEXT: vadd.i32 q1, q1, q6 ; CHECK-NEXT: vstrw.32 q0, [q2] ; CHECK-NEXT: letp lr, .LBB1_10 ; CHECK-NEXT: b .LBB1_13 @@ -467,8 +467,8 @@ define i32 @d(i64 %e, i32 %f, i64 %g, i32 %h) { ; CHECK-NEXT: @ Parent Loop BB1_8 Depth=2 ; CHECK-NEXT: @ => This Inner Loop Header: Depth=3 ; CHECK-NEXT: vshl.i32 q2, q1, #2 -; CHECK-NEXT: vadd.i32 q1, q1, q5 ; CHECK-NEXT: vadd.i32 q2, q2, r10 +; CHECK-NEXT: vadd.i32 q1, q1, q5 ; CHECK-NEXT: vstrw.32 q0, [q2] ; CHECK-NEXT: letp lr, .LBB1_12 ; CHECK-NEXT: .LBB1_13: @ %for.cond9.for.cond15.preheader_crit_edge.us diff --git a/llvm/test/CodeGen/Thumb2/mve-float16regloops.ll b/llvm/test/CodeGen/Thumb2/mve-float16regloops.ll index 45ff2396fb17a..c089034f93265 100644 --- a/llvm/test/CodeGen/Thumb2/mve-float16regloops.ll +++ b/llvm/test/CodeGen/Thumb2/mve-float16regloops.ll @@ -835,10 +835,10 @@ define void @arm_fir_f32_1_4_mve(%struct.arm_fir_instance_f32* nocapture readonl ; CHECK-NEXT: and r5, r3, #3 ; CHECK-NEXT: vldrw.u32 q0, [r1] ; CHECK-NEXT: vctp.16 r5 +; CHECK-NEXT: add.w r1, r10, #2 ; CHECK-NEXT: vpst ; CHECK-NEXT: vstrht.16 q0, [r4] ; CHECK-NEXT: vldrw.u32 q0, [r10] -; CHECK-NEXT: add.w r1, r10, #2 ; CHECK-NEXT: vldrw.u32 q1, [r1] ; CHECK-NEXT: add.w r1, r10, #6 ; CHECK-NEXT: vmul.f16 q0, q0, r7 diff --git a/llvm/test/CodeGen/Thumb2/mve-gather-increment.ll b/llvm/test/CodeGen/Thumb2/mve-gather-increment.ll index b6a1c7b94f5ef..31e0b048e74e0 100644 --- a/llvm/test/CodeGen/Thumb2/mve-gather-increment.ll +++ b/llvm/test/CodeGen/Thumb2/mve-gather-increment.ll @@ -1399,6 +1399,160 @@ for.cond.cleanup: ; preds = %for.body, %middle.b ret void } +define void @shl(i32* nocapture %x, i32* noalias nocapture readonly %y, i32 %n) { +; CHECK-LABEL: shl: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: .save {r7, lr} +; CHECK-NEXT: push {r7, lr} +; CHECK-NEXT: cmp r2, #1 +; CHECK-NEXT: it lt +; CHECK-NEXT: poplt {r7, pc} +; CHECK-NEXT: .LBB15_1: @ %vector.ph +; CHECK-NEXT: adr r3, .LCPI15_0 +; CHECK-NEXT: vldrw.u32 q0, [r3] +; CHECK-NEXT: vadd.i32 q0, q0, r1 +; CHECK-NEXT: dlstp.32 lr, r2 +; CHECK-NEXT: .LBB15_2: @ %vector.body +; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: vldrw.u32 q1, [q0, #64]! +; CHECK-NEXT: vstrw.32 q1, [r0], #16 +; CHECK-NEXT: letp lr, .LBB15_2 +; CHECK-NEXT: @ %bb.3: @ %for.cond.cleanup +; CHECK-NEXT: pop {r7, pc} +; CHECK-NEXT: .p2align 4 +; CHECK-NEXT: @ %bb.4: +; CHECK-NEXT: .LCPI15_0: +; CHECK-NEXT: .long 4294967232 @ 0xffffffc0 +; CHECK-NEXT: .long 4294967248 @ 0xffffffd0 +; CHECK-NEXT: .long 4294967264 @ 0xffffffe0 +; CHECK-NEXT: .long 4294967280 @ 0xfffffff0 +entry: + %cmp6 = icmp sgt i32 %n, 0 + br i1 %cmp6, label %vector.ph, label %for.cond.cleanup + +vector.ph: ; preds = %entry + %n.rnd.up = add i32 %n, 3 + %n.vec = and i32 %n.rnd.up, -4 + br label %vector.body + +vector.body: ; preds = %vector.body, %vector.ph + %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] + %vec.ind = phi <4 x i32> [ , %vector.ph ], [ %vec.ind.next, %vector.body ] + %active.lane.mask = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 %n) + %0 = shl nsw <4 x i32> %vec.ind, + %1 = getelementptr inbounds i32, i32* %y, <4 x i32> %0 + %wide.masked.gather = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %1, i32 4, <4 x i1> %active.lane.mask, <4 x i32> undef) + %2 = getelementptr inbounds i32, i32* %x, i32 %index + %3 = bitcast i32* %2 to <4 x i32>* + call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %wide.masked.gather, <4 x i32>* %3, i32 4, <4 x i1> %active.lane.mask) + %index.next = add i32 %index, 4 + %vec.ind.next = add <4 x i32> %vec.ind, + %4 = icmp eq i32 %index.next, %n.vec + br i1 %4, label %for.cond.cleanup, label %vector.body + +for.cond.cleanup: ; preds = %vector.body, %entry + ret void +} + +define void @shlor(i32* nocapture %x, i32* noalias nocapture readonly %y, i32 %n) { +; CHECK-LABEL: shlor: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: .save {r4, r5, r6, lr} +; CHECK-NEXT: push {r4, r5, r6, lr} +; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13} +; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13} +; CHECK-NEXT: cmp r2, #1 +; CHECK-NEXT: blt .LBB16_3 +; CHECK-NEXT: @ %bb.1: @ %vector.ph +; CHECK-NEXT: adr.w lr, .LCPI16_0 +; CHECK-NEXT: adr r4, .LCPI16_1 +; CHECK-NEXT: adr r5, .LCPI16_2 +; CHECK-NEXT: adr r6, .LCPI16_3 +; CHECK-NEXT: vldrw.u32 q0, [r6] +; CHECK-NEXT: vldrw.u32 q1, [r5] +; CHECK-NEXT: vldrw.u32 q2, [r4] +; CHECK-NEXT: vldrw.u32 q3, [lr] +; CHECK-NEXT: vadd.i32 q0, q0, r1 +; CHECK-NEXT: vadd.i32 q1, q1, r1 +; CHECK-NEXT: vadd.i32 q2, q2, r1 +; CHECK-NEXT: vadd.i32 q3, q3, r1 +; CHECK-NEXT: dlstp.32 lr, r2 +; CHECK-NEXT: .LBB16_2: @ %vector.body +; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: vldrw.u32 q4, [q3, #128]! +; CHECK-NEXT: vldrw.u32 q5, [q2, #128]! +; CHECK-NEXT: vldrw.u32 q6, [q0, #128]! +; CHECK-NEXT: vadd.i32 q4, q5, q4 +; CHECK-NEXT: vldrw.u32 q5, [q1, #128]! +; CHECK-NEXT: vadd.i32 q4, q4, q5 +; CHECK-NEXT: vadd.i32 q4, q4, q6 +; CHECK-NEXT: vstrw.32 q4, [r0], #16 +; CHECK-NEXT: letp lr, .LBB16_2 +; CHECK-NEXT: .LBB16_3: @ %for.cond.cleanup +; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13} +; CHECK-NEXT: pop {r4, r5, r6, pc} +; CHECK-NEXT: .p2align 4 +; CHECK-NEXT: @ %bb.4: +; CHECK-NEXT: .LCPI16_0: +; CHECK-NEXT: .long 4294967168 @ 0xffffff80 +; CHECK-NEXT: .long 4294967200 @ 0xffffffa0 +; CHECK-NEXT: .long 4294967232 @ 0xffffffc0 +; CHECK-NEXT: .long 4294967264 @ 0xffffffe0 +; CHECK-NEXT: .LCPI16_1: +; CHECK-NEXT: .long 4294967176 @ 0xffffff88 +; CHECK-NEXT: .long 4294967208 @ 0xffffffa8 +; CHECK-NEXT: .long 4294967240 @ 0xffffffc8 +; CHECK-NEXT: .long 4294967272 @ 0xffffffe8 +; CHECK-NEXT: .LCPI16_2: +; CHECK-NEXT: .long 4294967184 @ 0xffffff90 +; CHECK-NEXT: .long 4294967216 @ 0xffffffb0 +; CHECK-NEXT: .long 4294967248 @ 0xffffffd0 +; CHECK-NEXT: .long 4294967280 @ 0xfffffff0 +; CHECK-NEXT: .LCPI16_3: +; CHECK-NEXT: .long 4294967192 @ 0xffffff98 +; CHECK-NEXT: .long 4294967224 @ 0xffffffb8 +; CHECK-NEXT: .long 4294967256 @ 0xffffffd8 +; CHECK-NEXT: .long 4294967288 @ 0xfffffff8 +entry: + %cmp23 = icmp sgt i32 %n, 0 + br i1 %cmp23, label %vector.ph, label %for.cond.cleanup + +vector.ph: ; preds = %entry + %n.rnd.up = add i32 %n, 3 + %n.vec = and i32 %n.rnd.up, -4 + br label %vector.body + +vector.body: ; preds = %vector.body, %vector.ph + %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] + %vec.ind = phi <4 x i32> [ , %vector.ph ], [ %vec.ind.next, %vector.body ] + %active.lane.mask = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 %n) + %0 = shl nsw <4 x i32> %vec.ind, + %1 = getelementptr inbounds i32, i32* %y, <4 x i32> %0 + %wide.masked.gather = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %1, i32 4, <4 x i1> %active.lane.mask, <4 x i32> undef) + %2 = or <4 x i32> %0, + %3 = getelementptr inbounds i32, i32* %y, <4 x i32> %2 + %wide.masked.gather25 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %3, i32 4, <4 x i1> %active.lane.mask, <4 x i32> undef) + %4 = add nsw <4 x i32> %wide.masked.gather25, %wide.masked.gather + %5 = or <4 x i32> %0, + %6 = getelementptr inbounds i32, i32* %y, <4 x i32> %5 + %wide.masked.gather26 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %6, i32 4, <4 x i1> %active.lane.mask, <4 x i32> undef) + %7 = add nsw <4 x i32> %4, %wide.masked.gather26 + %8 = or <4 x i32> %0, + %9 = getelementptr inbounds i32, i32* %y, <4 x i32> %8 + %wide.masked.gather27 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %9, i32 4, <4 x i1> %active.lane.mask, <4 x i32> undef) + %10 = add nsw <4 x i32> %7, %wide.masked.gather27 + %11 = getelementptr inbounds i32, i32* %x, i32 %index + %12 = bitcast i32* %11 to <4 x i32>* + call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %10, <4 x i32>* %12, i32 4, <4 x i1> %active.lane.mask) + %index.next = add i32 %index, 4 + %vec.ind.next = add <4 x i32> %vec.ind, + %13 = icmp eq i32 %index.next, %n.vec + br i1 %13, label %for.cond.cleanup, label %vector.body + +for.cond.cleanup: ; preds = %vector.body, %entry + ret void +} + declare <2 x i32> @llvm.masked.gather.v2i32.v2p0i32(<2 x i32*>, i32, <2 x i1>, <2 x i32>) declare <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*>, i32, <4 x i1>, <4 x i32>) @@ -1419,3 +1573,4 @@ declare <8 x i8> @llvm.masked.gather.v8i8.v8p0i8(<8 x i8*>, i32, <8 x i1>, <8 x declare <16 x i8> @llvm.masked.gather.v16i8.v16p0i8(<16 x i8*>, i32, <16 x i1>, <16 x i8>) declare <32 x i8> @llvm.masked.gather.v32i8.v32p0i8(<32 x i8*>, i32, <32 x i1>, <32 x i8>) declare void @llvm.masked.store.v4i32.p0v4i32(<4 x i32>, <4 x i32>*, i32, <4 x i1>) +declare <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32, i32) diff --git a/llvm/test/CodeGen/Thumb2/mve-gather-ind16-scaled.ll b/llvm/test/CodeGen/Thumb2/mve-gather-ind16-scaled.ll index ac1c0d03c85b5..9bdfdd332ad7d 100644 --- a/llvm/test/CodeGen/Thumb2/mve-gather-ind16-scaled.ll +++ b/llvm/test/CodeGen/Thumb2/mve-gather-ind16-scaled.ll @@ -231,8 +231,8 @@ entry: define arm_aapcs_vfpcc <8 x i16> @scaled_v8i16_i16_passthru_icmp1(i16* %base, <8 x i16>* %offptr) { ; CHECK-LABEL: scaled_v8i16_i16_passthru_icmp1: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vmov.i16 q0, #0x1 ; CHECK-NEXT: vldrh.u16 q1, [r1] +; CHECK-NEXT: vmov.i16 q0, #0x1 ; CHECK-NEXT: vpt.s16 gt, q1, zr ; CHECK-NEXT: vldrht.u16 q2, [r0, q1, uxtw #1] ; CHECK-NEXT: vpsel q0, q2, q0 diff --git a/llvm/test/CodeGen/Thumb2/mve-gather-scatter-tailpred.ll b/llvm/test/CodeGen/Thumb2/mve-gather-scatter-tailpred.ll index c3c94a598de22..debf276bde7dd 100644 --- a/llvm/test/CodeGen/Thumb2/mve-gather-scatter-tailpred.ll +++ b/llvm/test/CodeGen/Thumb2/mve-gather-scatter-tailpred.ll @@ -152,13 +152,13 @@ define dso_local void @mve_scatter_qi(i32* noalias nocapture readonly %A, i32* n ; CHECK-NEXT: .LBB2_1: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vctp.32 r3 +; CHECK-NEXT: subs r3, #4 ; CHECK-NEXT: vpst ; CHECK-NEXT: vldrwt.u32 q3, [r4], #16 ; CHECK-NEXT: vmul.i32 q3, q3, q2 -; CHECK-NEXT: subs r3, #4 -; CHECK-NEXT: vadd.i32 q0, q0, q3 ; CHECK-NEXT: vpst ; CHECK-NEXT: vstrwt.32 q3, [q1, #80]! +; CHECK-NEXT: vadd.i32 q0, q0, q3 ; CHECK-NEXT: le lr, .LBB2_1 ; CHECK-NEXT: @ %bb.2: @ %middle.block ; CHECK-NEXT: vaddv.u32 r0, q0 @@ -244,13 +244,13 @@ define void @justoffsets(i8* noalias nocapture readonly %r, i8* noalias nocaptur ; CHECK-NEXT: .LBB3_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldrw.u32 q1, [sp, #16] @ 16-byte Reload -; CHECK-NEXT: vldrb.u32 q7, [r0, q1] ; CHECK-NEXT: vldrb.u32 q5, [r0, q2] +; CHECK-NEXT: vldrb.u32 q6, [r0, q3] +; CHECK-NEXT: vldrb.u32 q7, [r0, q1] +; CHECK-NEXT: adds r0, #12 ; CHECK-NEXT: vmul.i32 q4, q5, r8 ; CHECK-NEXT: vmla.u32 q4, q7, r9 -; CHECK-NEXT: vldrb.u32 q6, [r0, q3] ; CHECK-NEXT: vmla.u32 q4, q6, r12 -; CHECK-NEXT: adds r0, #12 ; CHECK-NEXT: vadd.i32 q4, q4, q0 ; CHECK-NEXT: vshr.u32 q4, q4, #16 ; CHECK-NEXT: vstrb.32 q4, [r1, q1] @@ -263,8 +263,8 @@ define void @justoffsets(i8* noalias nocapture readonly %r, i8* noalias nocaptur ; CHECK-NEXT: vmla.u32 q4, q6, r4 ; CHECK-NEXT: vadd.i32 q1, q1, q0 ; CHECK-NEXT: vadd.i32 q4, q4, q0 -; CHECK-NEXT: vshr.u32 q1, q1, #16 ; CHECK-NEXT: vshr.u32 q4, q4, #16 +; CHECK-NEXT: vshr.u32 q1, q1, #16 ; CHECK-NEXT: vstrb.32 q4, [r1, q2] ; CHECK-NEXT: vstrb.32 q1, [r1, q3] ; CHECK-NEXT: adds r1, #12 diff --git a/llvm/test/CodeGen/Thumb2/mve-masked-ldst-offset.ll b/llvm/test/CodeGen/Thumb2/mve-masked-ldst-offset.ll index 9957cae69f750..5405d4b7427c6 100644 --- a/llvm/test/CodeGen/Thumb2/mve-masked-ldst-offset.ll +++ b/llvm/test/CodeGen/Thumb2/mve-masked-ldst-offset.ll @@ -1543,8 +1543,8 @@ entry: define i8* @strw32_4(i8* %y, i8* %x, <4 x i32> *%m) { ; CHECK-LABEL: strw32_4: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrw.u32 q0, [r1] ; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vldrw.u32 q0, [r1] ; CHECK-NEXT: vpt.i32 ne, q1, zr ; CHECK-NEXT: vstrwt.32 q0, [r0, #4] ; CHECK-NEXT: bx lr @@ -1562,9 +1562,9 @@ entry: define i8* @strw32_3(i8* %y, i8* %x, <4 x i32> *%m) { ; CHECK-LABEL: strw32_3: ; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q1, [r2] ; CHECK-NEXT: vldrw.u32 q0, [r1] ; CHECK-NEXT: adds r1, r0, #3 -; CHECK-NEXT: vldrw.u32 q1, [r2] ; CHECK-NEXT: vpt.i32 ne, q1, zr ; CHECK-NEXT: vstrwt.32 q0, [r1] ; CHECK-NEXT: bx lr @@ -1582,9 +1582,9 @@ entry: define i8* @strw32_2(i8* %y, i8* %x, <4 x i32> *%m) { ; CHECK-LABEL: strw32_2: ; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q1, [r2] ; CHECK-NEXT: vldrw.u32 q0, [r1] ; CHECK-NEXT: adds r1, r0, #2 -; CHECK-NEXT: vldrw.u32 q1, [r2] ; CHECK-NEXT: vpt.i32 ne, q1, zr ; CHECK-NEXT: vstrwt.32 q0, [r1] ; CHECK-NEXT: bx lr @@ -1602,8 +1602,8 @@ entry: define i8* @strw32_508(i8* %y, i8* %x, <4 x i32> *%m) { ; CHECK-LABEL: strw32_508: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrw.u32 q0, [r1] ; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vldrw.u32 q0, [r1] ; CHECK-NEXT: vpt.i32 ne, q1, zr ; CHECK-NEXT: vstrwt.32 q0, [r0, #508] ; CHECK-NEXT: bx lr @@ -1621,9 +1621,9 @@ entry: define i8* @strw32_512(i8* %y, i8* %x, <4 x i32> *%m) { ; CHECK-LABEL: strw32_512: ; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q1, [r2] ; CHECK-NEXT: vldrw.u32 q0, [r1] ; CHECK-NEXT: add.w r1, r0, #512 -; CHECK-NEXT: vldrw.u32 q1, [r2] ; CHECK-NEXT: vpt.i32 ne, q1, zr ; CHECK-NEXT: vstrwt.32 q0, [r1] ; CHECK-NEXT: bx lr @@ -1641,8 +1641,8 @@ entry: define i8* @strw32_m508(i8* %y, i8* %x, <4 x i32> *%m) { ; CHECK-LABEL: strw32_m508: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrw.u32 q0, [r1] ; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vldrw.u32 q0, [r1] ; CHECK-NEXT: vpt.i32 ne, q1, zr ; CHECK-NEXT: vstrwt.32 q0, [r0, #-508] ; CHECK-NEXT: bx lr @@ -1660,9 +1660,9 @@ entry: define i8* @strw32_m512(i8* %y, i8* %x, <4 x i32> *%m) { ; CHECK-LABEL: strw32_m512: ; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q1, [r2] ; CHECK-NEXT: vldrw.u32 q0, [r1] ; CHECK-NEXT: sub.w r1, r0, #512 -; CHECK-NEXT: vldrw.u32 q1, [r2] ; CHECK-NEXT: vpt.i32 ne, q1, zr ; CHECK-NEXT: vstrwt.32 q0, [r1] ; CHECK-NEXT: bx lr @@ -1680,8 +1680,8 @@ entry: define i8* @strh32_4(i8* %y, i8* %x, <4 x i32> *%m) { ; CHECK-LABEL: strh32_4: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrh.u32 q0, [r1] ; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vldrh.u32 q0, [r1] ; CHECK-NEXT: vpt.i32 ne, q1, zr ; CHECK-NEXT: vstrht.32 q0, [r0, #4] ; CHECK-NEXT: bx lr @@ -1699,9 +1699,9 @@ entry: define i8* @strh32_3(i8* %y, i8* %x, <4 x i32> *%m) { ; CHECK-LABEL: strh32_3: ; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q1, [r2] ; CHECK-NEXT: vldrh.u32 q0, [r1] ; CHECK-NEXT: adds r1, r0, #3 -; CHECK-NEXT: vldrw.u32 q1, [r2] ; CHECK-NEXT: vpt.i32 ne, q1, zr ; CHECK-NEXT: vstrht.32 q0, [r1] ; CHECK-NEXT: bx lr @@ -1719,8 +1719,8 @@ entry: define i8* @strh32_2(i8* %y, i8* %x, <4 x i32> *%m) { ; CHECK-LABEL: strh32_2: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrh.u32 q0, [r1] ; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vldrh.u32 q0, [r1] ; CHECK-NEXT: vpt.i32 ne, q1, zr ; CHECK-NEXT: vstrht.32 q0, [r0, #2] ; CHECK-NEXT: bx lr @@ -1738,8 +1738,8 @@ entry: define i8* @strh32_254(i8* %y, i8* %x, <4 x i32> *%m) { ; CHECK-LABEL: strh32_254: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrh.u32 q0, [r1] ; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vldrh.u32 q0, [r1] ; CHECK-NEXT: vpt.i32 ne, q1, zr ; CHECK-NEXT: vstrht.32 q0, [r0, #254] ; CHECK-NEXT: bx lr @@ -1757,9 +1757,9 @@ entry: define i8* @strh32_256(i8* %y, i8* %x, <4 x i32> *%m) { ; CHECK-LABEL: strh32_256: ; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q1, [r2] ; CHECK-NEXT: vldrh.u32 q0, [r1] ; CHECK-NEXT: add.w r1, r0, #256 -; CHECK-NEXT: vldrw.u32 q1, [r2] ; CHECK-NEXT: vpt.i32 ne, q1, zr ; CHECK-NEXT: vstrht.32 q0, [r1] ; CHECK-NEXT: bx lr @@ -1777,8 +1777,8 @@ entry: define i8* @strh32_m254(i8* %y, i8* %x, <4 x i32> *%m) { ; CHECK-LABEL: strh32_m254: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrh.u32 q0, [r1] ; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vldrh.u32 q0, [r1] ; CHECK-NEXT: vpt.i32 ne, q1, zr ; CHECK-NEXT: vstrht.32 q0, [r0, #-254] ; CHECK-NEXT: bx lr @@ -1796,9 +1796,9 @@ entry: define i8* @strh32_m256(i8* %y, i8* %x, <4 x i32> *%m) { ; CHECK-LABEL: strh32_m256: ; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q1, [r2] ; CHECK-NEXT: vldrh.u32 q0, [r1] ; CHECK-NEXT: sub.w r1, r0, #256 -; CHECK-NEXT: vldrw.u32 q1, [r2] ; CHECK-NEXT: vpt.i32 ne, q1, zr ; CHECK-NEXT: vstrht.32 q0, [r1] ; CHECK-NEXT: bx lr @@ -1816,8 +1816,8 @@ entry: define i8* @strh16_4(i8* %y, i8* %x, <8 x i16> *%m) { ; CHECK-LABEL: strh16_4: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrh.u16 q0, [r1] ; CHECK-NEXT: vldrh.u16 q1, [r2] +; CHECK-NEXT: vldrh.u16 q0, [r1] ; CHECK-NEXT: vpt.i16 ne, q1, zr ; CHECK-NEXT: vstrht.16 q0, [r0, #4] ; CHECK-NEXT: bx lr @@ -1835,9 +1835,9 @@ entry: define i8* @strh16_3(i8* %y, i8* %x, <8 x i16> *%m) { ; CHECK-LABEL: strh16_3: ; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q1, [r2] ; CHECK-NEXT: vldrh.u16 q0, [r1] ; CHECK-NEXT: adds r1, r0, #3 -; CHECK-NEXT: vldrh.u16 q1, [r2] ; CHECK-NEXT: vpt.i16 ne, q1, zr ; CHECK-NEXT: vstrht.16 q0, [r1] ; CHECK-NEXT: bx lr @@ -1855,8 +1855,8 @@ entry: define i8* @strh16_2(i8* %y, i8* %x, <8 x i16> *%m) { ; CHECK-LABEL: strh16_2: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrh.u16 q0, [r1] ; CHECK-NEXT: vldrh.u16 q1, [r2] +; CHECK-NEXT: vldrh.u16 q0, [r1] ; CHECK-NEXT: vpt.i16 ne, q1, zr ; CHECK-NEXT: vstrht.16 q0, [r0, #2] ; CHECK-NEXT: bx lr @@ -1874,8 +1874,8 @@ entry: define i8* @strh16_254(i8* %y, i8* %x, <8 x i16> *%m) { ; CHECK-LABEL: strh16_254: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrh.u16 q0, [r1] ; CHECK-NEXT: vldrh.u16 q1, [r2] +; CHECK-NEXT: vldrh.u16 q0, [r1] ; CHECK-NEXT: vpt.i16 ne, q1, zr ; CHECK-NEXT: vstrht.16 q0, [r0, #254] ; CHECK-NEXT: bx lr @@ -1893,9 +1893,9 @@ entry: define i8* @strh16_256(i8* %y, i8* %x, <8 x i16> *%m) { ; CHECK-LABEL: strh16_256: ; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q1, [r2] ; CHECK-NEXT: vldrh.u16 q0, [r1] ; CHECK-NEXT: add.w r1, r0, #256 -; CHECK-NEXT: vldrh.u16 q1, [r2] ; CHECK-NEXT: vpt.i16 ne, q1, zr ; CHECK-NEXT: vstrht.16 q0, [r1] ; CHECK-NEXT: bx lr @@ -1913,8 +1913,8 @@ entry: define i8* @strh16_m254(i8* %y, i8* %x, <8 x i16> *%m) { ; CHECK-LABEL: strh16_m254: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrh.u16 q0, [r1] ; CHECK-NEXT: vldrh.u16 q1, [r2] +; CHECK-NEXT: vldrh.u16 q0, [r1] ; CHECK-NEXT: vpt.i16 ne, q1, zr ; CHECK-NEXT: vstrht.16 q0, [r0, #-254] ; CHECK-NEXT: bx lr @@ -1932,9 +1932,9 @@ entry: define i8* @strh16_m256(i8* %y, i8* %x, <8 x i16> *%m) { ; CHECK-LABEL: strh16_m256: ; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q1, [r2] ; CHECK-NEXT: vldrh.u16 q0, [r1] ; CHECK-NEXT: sub.w r1, r0, #256 -; CHECK-NEXT: vldrh.u16 q1, [r2] ; CHECK-NEXT: vpt.i16 ne, q1, zr ; CHECK-NEXT: vstrht.16 q0, [r1] ; CHECK-NEXT: bx lr @@ -1952,8 +1952,8 @@ entry: define i8* @strb32_4(i8* %y, i8* %x, <4 x i32> *%m) { ; CHECK-LABEL: strb32_4: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrb.u32 q0, [r1] ; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vldrb.u32 q0, [r1] ; CHECK-NEXT: vpt.i32 ne, q1, zr ; CHECK-NEXT: vstrbt.32 q0, [r0, #4] ; CHECK-NEXT: bx lr @@ -1971,8 +1971,8 @@ entry: define i8* @strb32_3(i8* %y, i8* %x, <4 x i32> *%m) { ; CHECK-LABEL: strb32_3: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrb.u32 q0, [r1] ; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vldrb.u32 q0, [r1] ; CHECK-NEXT: vpt.i32 ne, q1, zr ; CHECK-NEXT: vstrbt.32 q0, [r0, #3] ; CHECK-NEXT: bx lr @@ -1990,8 +1990,8 @@ entry: define i8* @strb32_2(i8* %y, i8* %x, <4 x i32> *%m) { ; CHECK-LABEL: strb32_2: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrb.u32 q0, [r1] ; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vldrb.u32 q0, [r1] ; CHECK-NEXT: vpt.i32 ne, q1, zr ; CHECK-NEXT: vstrbt.32 q0, [r0, #2] ; CHECK-NEXT: bx lr @@ -2009,8 +2009,8 @@ entry: define i8* @strb32_127(i8* %y, i8* %x, <4 x i32> *%m) { ; CHECK-LABEL: strb32_127: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrb.u32 q0, [r1] ; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vldrb.u32 q0, [r1] ; CHECK-NEXT: vpt.i32 ne, q1, zr ; CHECK-NEXT: vstrbt.32 q0, [r0, #127] ; CHECK-NEXT: bx lr @@ -2028,9 +2028,9 @@ entry: define i8* @strb32_128(i8* %y, i8* %x, <4 x i32> *%m) { ; CHECK-LABEL: strb32_128: ; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q1, [r2] ; CHECK-NEXT: vldrb.u32 q0, [r1] ; CHECK-NEXT: add.w r1, r0, #128 -; CHECK-NEXT: vldrw.u32 q1, [r2] ; CHECK-NEXT: vpt.i32 ne, q1, zr ; CHECK-NEXT: vstrbt.32 q0, [r1] ; CHECK-NEXT: bx lr @@ -2048,8 +2048,8 @@ entry: define i8* @strb32_m127(i8* %y, i8* %x, <4 x i32> *%m) { ; CHECK-LABEL: strb32_m127: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrb.u32 q0, [r1] ; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vldrb.u32 q0, [r1] ; CHECK-NEXT: vpt.i32 ne, q1, zr ; CHECK-NEXT: vstrbt.32 q0, [r0, #-127] ; CHECK-NEXT: bx lr @@ -2067,9 +2067,9 @@ entry: define i8* @strb32_m128(i8* %y, i8* %x, <4 x i32> *%m) { ; CHECK-LABEL: strb32_m128: ; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q1, [r2] ; CHECK-NEXT: vldrb.u32 q0, [r1] ; CHECK-NEXT: sub.w r1, r0, #128 -; CHECK-NEXT: vldrw.u32 q1, [r2] ; CHECK-NEXT: vpt.i32 ne, q1, zr ; CHECK-NEXT: vstrbt.32 q0, [r1] ; CHECK-NEXT: bx lr @@ -2087,8 +2087,8 @@ entry: define i8* @strb16_4(i8* %y, i8* %x, <8 x i16> *%m) { ; CHECK-LABEL: strb16_4: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrb.u16 q0, [r1] ; CHECK-NEXT: vldrh.u16 q1, [r2] +; CHECK-NEXT: vldrb.u16 q0, [r1] ; CHECK-NEXT: vpt.i16 ne, q1, zr ; CHECK-NEXT: vstrbt.16 q0, [r0, #4] ; CHECK-NEXT: bx lr @@ -2106,8 +2106,8 @@ entry: define i8* @strb16_3(i8* %y, i8* %x, <8 x i16> *%m) { ; CHECK-LABEL: strb16_3: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrb.u16 q0, [r1] ; CHECK-NEXT: vldrh.u16 q1, [r2] +; CHECK-NEXT: vldrb.u16 q0, [r1] ; CHECK-NEXT: vpt.i16 ne, q1, zr ; CHECK-NEXT: vstrbt.16 q0, [r0, #3] ; CHECK-NEXT: bx lr @@ -2125,8 +2125,8 @@ entry: define i8* @strb16_2(i8* %y, i8* %x, <8 x i16> *%m) { ; CHECK-LABEL: strb16_2: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrb.u16 q0, [r1] ; CHECK-NEXT: vldrh.u16 q1, [r2] +; CHECK-NEXT: vldrb.u16 q0, [r1] ; CHECK-NEXT: vpt.i16 ne, q1, zr ; CHECK-NEXT: vstrbt.16 q0, [r0, #2] ; CHECK-NEXT: bx lr @@ -2144,8 +2144,8 @@ entry: define i8* @strb16_127(i8* %y, i8* %x, <8 x i16> *%m) { ; CHECK-LABEL: strb16_127: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrb.u16 q0, [r1] ; CHECK-NEXT: vldrh.u16 q1, [r2] +; CHECK-NEXT: vldrb.u16 q0, [r1] ; CHECK-NEXT: vpt.i16 ne, q1, zr ; CHECK-NEXT: vstrbt.16 q0, [r0, #127] ; CHECK-NEXT: bx lr @@ -2163,9 +2163,9 @@ entry: define i8* @strb16_128(i8* %y, i8* %x, <8 x i16> *%m) { ; CHECK-LABEL: strb16_128: ; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q1, [r2] ; CHECK-NEXT: vldrb.u16 q0, [r1] ; CHECK-NEXT: add.w r1, r0, #128 -; CHECK-NEXT: vldrh.u16 q1, [r2] ; CHECK-NEXT: vpt.i16 ne, q1, zr ; CHECK-NEXT: vstrbt.16 q0, [r1] ; CHECK-NEXT: bx lr @@ -2183,8 +2183,8 @@ entry: define i8* @strb16_m127(i8* %y, i8* %x, <8 x i16> *%m) { ; CHECK-LABEL: strb16_m127: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrb.u16 q0, [r1] ; CHECK-NEXT: vldrh.u16 q1, [r2] +; CHECK-NEXT: vldrb.u16 q0, [r1] ; CHECK-NEXT: vpt.i16 ne, q1, zr ; CHECK-NEXT: vstrbt.16 q0, [r0, #-127] ; CHECK-NEXT: bx lr @@ -2202,9 +2202,9 @@ entry: define i8* @strb16_m128(i8* %y, i8* %x, <8 x i16> *%m) { ; CHECK-LABEL: strb16_m128: ; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q1, [r2] ; CHECK-NEXT: vldrb.u16 q0, [r1] ; CHECK-NEXT: sub.w r1, r0, #128 -; CHECK-NEXT: vldrh.u16 q1, [r2] ; CHECK-NEXT: vpt.i16 ne, q1, zr ; CHECK-NEXT: vstrbt.16 q0, [r1] ; CHECK-NEXT: bx lr @@ -2222,8 +2222,8 @@ entry: define i8* @strb8_4(i8* %y, i8* %x, <16 x i8> *%m) { ; CHECK-LABEL: strb8_4: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrb.u8 q0, [r1] ; CHECK-NEXT: vldrb.u8 q1, [r2] +; CHECK-NEXT: vldrb.u8 q0, [r1] ; CHECK-NEXT: vpt.i8 ne, q1, zr ; CHECK-NEXT: vstrbt.8 q0, [r0, #4] ; CHECK-NEXT: bx lr @@ -2241,8 +2241,8 @@ entry: define i8* @strb8_3(i8* %y, i8* %x, <16 x i8> *%m) { ; CHECK-LABEL: strb8_3: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrb.u8 q0, [r1] ; CHECK-NEXT: vldrb.u8 q1, [r2] +; CHECK-NEXT: vldrb.u8 q0, [r1] ; CHECK-NEXT: vpt.i8 ne, q1, zr ; CHECK-NEXT: vstrbt.8 q0, [r0, #3] ; CHECK-NEXT: bx lr @@ -2260,8 +2260,8 @@ entry: define i8* @strb8_2(i8* %y, i8* %x, <16 x i8> *%m) { ; CHECK-LABEL: strb8_2: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrb.u8 q0, [r1] ; CHECK-NEXT: vldrb.u8 q1, [r2] +; CHECK-NEXT: vldrb.u8 q0, [r1] ; CHECK-NEXT: vpt.i8 ne, q1, zr ; CHECK-NEXT: vstrbt.8 q0, [r0, #2] ; CHECK-NEXT: bx lr @@ -2279,8 +2279,8 @@ entry: define i8* @strb8_127(i8* %y, i8* %x, <16 x i8> *%m) { ; CHECK-LABEL: strb8_127: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrb.u8 q0, [r1] ; CHECK-NEXT: vldrb.u8 q1, [r2] +; CHECK-NEXT: vldrb.u8 q0, [r1] ; CHECK-NEXT: vpt.i8 ne, q1, zr ; CHECK-NEXT: vstrbt.8 q0, [r0, #127] ; CHECK-NEXT: bx lr @@ -2298,9 +2298,9 @@ entry: define i8* @strb8_128(i8* %y, i8* %x, <16 x i8> *%m) { ; CHECK-LABEL: strb8_128: ; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrb.u8 q1, [r2] ; CHECK-NEXT: vldrb.u8 q0, [r1] ; CHECK-NEXT: add.w r1, r0, #128 -; CHECK-NEXT: vldrb.u8 q1, [r2] ; CHECK-NEXT: vpt.i8 ne, q1, zr ; CHECK-NEXT: vstrbt.8 q0, [r1] ; CHECK-NEXT: bx lr @@ -2318,8 +2318,8 @@ entry: define i8* @strb8_m127(i8* %y, i8* %x, <16 x i8> *%m) { ; CHECK-LABEL: strb8_m127: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrb.u8 q0, [r1] ; CHECK-NEXT: vldrb.u8 q1, [r2] +; CHECK-NEXT: vldrb.u8 q0, [r1] ; CHECK-NEXT: vpt.i8 ne, q1, zr ; CHECK-NEXT: vstrbt.8 q0, [r0, #-127] ; CHECK-NEXT: bx lr @@ -2337,9 +2337,9 @@ entry: define i8* @strb8_m128(i8* %y, i8* %x, <16 x i8> *%m) { ; CHECK-LABEL: strb8_m128: ; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrb.u8 q1, [r2] ; CHECK-NEXT: vldrb.u8 q0, [r1] ; CHECK-NEXT: sub.w r1, r0, #128 -; CHECK-NEXT: vldrb.u8 q1, [r2] ; CHECK-NEXT: vpt.i8 ne, q1, zr ; CHECK-NEXT: vstrbt.8 q0, [r1] ; CHECK-NEXT: bx lr @@ -2357,8 +2357,8 @@ entry: define i8* @strwf32_4(i8* %y, i8* %x, <4 x i32> *%m) { ; CHECK-LABEL: strwf32_4: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrw.u32 q0, [r1] ; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vldrw.u32 q0, [r1] ; CHECK-NEXT: vpt.i32 ne, q1, zr ; CHECK-NEXT: vstrwt.32 q0, [r0, #4] ; CHECK-NEXT: bx lr @@ -2376,9 +2376,9 @@ entry: define i8* @strwf32_3(i8* %y, i8* %x, <4 x i32> *%m) { ; CHECK-LABEL: strwf32_3: ; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q1, [r2] ; CHECK-NEXT: vldrw.u32 q0, [r1] ; CHECK-NEXT: adds r1, r0, #3 -; CHECK-NEXT: vldrw.u32 q1, [r2] ; CHECK-NEXT: vpt.i32 ne, q1, zr ; CHECK-NEXT: vstrwt.32 q0, [r1] ; CHECK-NEXT: bx lr @@ -2396,9 +2396,9 @@ entry: define i8* @strwf32_2(i8* %y, i8* %x, <4 x i32> *%m) { ; CHECK-LABEL: strwf32_2: ; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q1, [r2] ; CHECK-NEXT: vldrw.u32 q0, [r1] ; CHECK-NEXT: adds r1, r0, #2 -; CHECK-NEXT: vldrw.u32 q1, [r2] ; CHECK-NEXT: vpt.i32 ne, q1, zr ; CHECK-NEXT: vstrwt.32 q0, [r1] ; CHECK-NEXT: bx lr @@ -2416,8 +2416,8 @@ entry: define i8* @strwf32_508(i8* %y, i8* %x, <4 x i32> *%m) { ; CHECK-LABEL: strwf32_508: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrw.u32 q0, [r1] ; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vldrw.u32 q0, [r1] ; CHECK-NEXT: vpt.i32 ne, q1, zr ; CHECK-NEXT: vstrwt.32 q0, [r0, #508] ; CHECK-NEXT: bx lr @@ -2435,9 +2435,9 @@ entry: define i8* @strwf32_512(i8* %y, i8* %x, <4 x i32> *%m) { ; CHECK-LABEL: strwf32_512: ; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q1, [r2] ; CHECK-NEXT: vldrw.u32 q0, [r1] ; CHECK-NEXT: add.w r1, r0, #512 -; CHECK-NEXT: vldrw.u32 q1, [r2] ; CHECK-NEXT: vpt.i32 ne, q1, zr ; CHECK-NEXT: vstrwt.32 q0, [r1] ; CHECK-NEXT: bx lr @@ -2455,8 +2455,8 @@ entry: define i8* @strwf32_m508(i8* %y, i8* %x, <4 x i32> *%m) { ; CHECK-LABEL: strwf32_m508: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrw.u32 q0, [r1] ; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vldrw.u32 q0, [r1] ; CHECK-NEXT: vpt.i32 ne, q1, zr ; CHECK-NEXT: vstrwt.32 q0, [r0, #-508] ; CHECK-NEXT: bx lr @@ -2474,9 +2474,9 @@ entry: define i8* @strwf32_m512(i8* %y, i8* %x, <4 x i32> *%m) { ; CHECK-LABEL: strwf32_m512: ; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q1, [r2] ; CHECK-NEXT: vldrw.u32 q0, [r1] ; CHECK-NEXT: sub.w r1, r0, #512 -; CHECK-NEXT: vldrw.u32 q1, [r2] ; CHECK-NEXT: vpt.i32 ne, q1, zr ; CHECK-NEXT: vstrwt.32 q0, [r1] ; CHECK-NEXT: bx lr @@ -2494,8 +2494,8 @@ entry: define i8* @strhf16_4(i8* %y, i8* %x, <8 x i16> *%m) { ; CHECK-LABEL: strhf16_4: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrh.u16 q0, [r1] ; CHECK-NEXT: vldrh.u16 q1, [r2] +; CHECK-NEXT: vldrh.u16 q0, [r1] ; CHECK-NEXT: vpt.i16 ne, q1, zr ; CHECK-NEXT: vstrht.16 q0, [r0, #4] ; CHECK-NEXT: bx lr @@ -2513,9 +2513,9 @@ entry: define i8* @strhf16_3(i8* %y, i8* %x, <8 x i16> *%m) { ; CHECK-LABEL: strhf16_3: ; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q1, [r2] ; CHECK-NEXT: vldrh.u16 q0, [r1] ; CHECK-NEXT: adds r1, r0, #3 -; CHECK-NEXT: vldrh.u16 q1, [r2] ; CHECK-NEXT: vpt.i16 ne, q1, zr ; CHECK-NEXT: vstrht.16 q0, [r1] ; CHECK-NEXT: bx lr @@ -2533,8 +2533,8 @@ entry: define i8* @strhf16_2(i8* %y, i8* %x, <8 x i16> *%m) { ; CHECK-LABEL: strhf16_2: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrh.u16 q0, [r1] ; CHECK-NEXT: vldrh.u16 q1, [r2] +; CHECK-NEXT: vldrh.u16 q0, [r1] ; CHECK-NEXT: vpt.i16 ne, q1, zr ; CHECK-NEXT: vstrht.16 q0, [r0, #2] ; CHECK-NEXT: bx lr @@ -2552,8 +2552,8 @@ entry: define i8* @strhf16_254(i8* %y, i8* %x, <8 x i16> *%m) { ; CHECK-LABEL: strhf16_254: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrh.u16 q0, [r1] ; CHECK-NEXT: vldrh.u16 q1, [r2] +; CHECK-NEXT: vldrh.u16 q0, [r1] ; CHECK-NEXT: vpt.i16 ne, q1, zr ; CHECK-NEXT: vstrht.16 q0, [r0, #254] ; CHECK-NEXT: bx lr @@ -2571,9 +2571,9 @@ entry: define i8* @strhf16_256(i8* %y, i8* %x, <8 x i16> *%m) { ; CHECK-LABEL: strhf16_256: ; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q1, [r2] ; CHECK-NEXT: vldrh.u16 q0, [r1] ; CHECK-NEXT: add.w r1, r0, #256 -; CHECK-NEXT: vldrh.u16 q1, [r2] ; CHECK-NEXT: vpt.i16 ne, q1, zr ; CHECK-NEXT: vstrht.16 q0, [r1] ; CHECK-NEXT: bx lr @@ -2591,8 +2591,8 @@ entry: define i8* @strhf16_m254(i8* %y, i8* %x, <8 x i16> *%m) { ; CHECK-LABEL: strhf16_m254: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrh.u16 q0, [r1] ; CHECK-NEXT: vldrh.u16 q1, [r2] +; CHECK-NEXT: vldrh.u16 q0, [r1] ; CHECK-NEXT: vpt.i16 ne, q1, zr ; CHECK-NEXT: vstrht.16 q0, [r0, #-254] ; CHECK-NEXT: bx lr @@ -2610,9 +2610,9 @@ entry: define i8* @strhf16_m256(i8* %y, i8* %x, <8 x i16> *%m) { ; CHECK-LABEL: strhf16_m256: ; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q1, [r2] ; CHECK-NEXT: vldrh.u16 q0, [r1] ; CHECK-NEXT: sub.w r1, r0, #256 -; CHECK-NEXT: vldrh.u16 q1, [r2] ; CHECK-NEXT: vpt.i16 ne, q1, zr ; CHECK-NEXT: vstrht.16 q0, [r1] ; CHECK-NEXT: bx lr diff --git a/llvm/test/CodeGen/Thumb2/mve-masked-ldst-postinc.ll b/llvm/test/CodeGen/Thumb2/mve-masked-ldst-postinc.ll index 2b3e81fecc268..e505930117ec4 100644 --- a/llvm/test/CodeGen/Thumb2/mve-masked-ldst-postinc.ll +++ b/llvm/test/CodeGen/Thumb2/mve-masked-ldst-postinc.ll @@ -1543,8 +1543,8 @@ entry: define i8* @strw32_4(i8* %y, i8* %x, <4 x i32> *%m) { ; CHECK-LABEL: strw32_4: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrw.u32 q0, [r1] ; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vldrw.u32 q0, [r1] ; CHECK-NEXT: vpt.i32 ne, q1, zr ; CHECK-NEXT: vstrwt.32 q0, [r0], #4 ; CHECK-NEXT: bx lr @@ -1562,8 +1562,8 @@ entry: define i8* @strw32_3(i8* %y, i8* %x, <4 x i32> *%m) { ; CHECK-LABEL: strw32_3: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrw.u32 q0, [r1] ; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vldrw.u32 q0, [r1] ; CHECK-NEXT: vpt.i32 ne, q1, zr ; CHECK-NEXT: vstrwt.32 q0, [r0] ; CHECK-NEXT: adds r0, #3 @@ -1582,8 +1582,8 @@ entry: define i8* @strw32_2(i8* %y, i8* %x, <4 x i32> *%m) { ; CHECK-LABEL: strw32_2: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrw.u32 q0, [r1] ; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vldrw.u32 q0, [r1] ; CHECK-NEXT: vpt.i32 ne, q1, zr ; CHECK-NEXT: vstrwt.32 q0, [r0] ; CHECK-NEXT: adds r0, #2 @@ -1602,8 +1602,8 @@ entry: define i8* @strw32_508(i8* %y, i8* %x, <4 x i32> *%m) { ; CHECK-LABEL: strw32_508: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrw.u32 q0, [r1] ; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vldrw.u32 q0, [r1] ; CHECK-NEXT: vpt.i32 ne, q1, zr ; CHECK-NEXT: vstrwt.32 q0, [r0], #508 ; CHECK-NEXT: bx lr @@ -1621,8 +1621,8 @@ entry: define i8* @strw32_512(i8* %y, i8* %x, <4 x i32> *%m) { ; CHECK-LABEL: strw32_512: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrw.u32 q0, [r1] ; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vldrw.u32 q0, [r1] ; CHECK-NEXT: vpt.i32 ne, q1, zr ; CHECK-NEXT: vstrwt.32 q0, [r0] ; CHECK-NEXT: add.w r0, r0, #512 @@ -1641,8 +1641,8 @@ entry: define i8* @strw32_m508(i8* %y, i8* %x, <4 x i32> *%m) { ; CHECK-LABEL: strw32_m508: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrw.u32 q0, [r1] ; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vldrw.u32 q0, [r1] ; CHECK-NEXT: vpt.i32 ne, q1, zr ; CHECK-NEXT: vstrwt.32 q0, [r0], #-508 ; CHECK-NEXT: bx lr @@ -1660,8 +1660,8 @@ entry: define i8* @strw32_m512(i8* %y, i8* %x, <4 x i32> *%m) { ; CHECK-LABEL: strw32_m512: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrw.u32 q0, [r1] ; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vldrw.u32 q0, [r1] ; CHECK-NEXT: vpt.i32 ne, q1, zr ; CHECK-NEXT: vstrwt.32 q0, [r0] ; CHECK-NEXT: sub.w r0, r0, #512 @@ -1680,8 +1680,8 @@ entry: define i8* @strh32_4(i8* %y, i8* %x, <4 x i32> *%m) { ; CHECK-LABEL: strh32_4: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrh.u32 q0, [r1] ; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vldrh.u32 q0, [r1] ; CHECK-NEXT: vpt.i32 ne, q1, zr ; CHECK-NEXT: vstrht.32 q0, [r0], #4 ; CHECK-NEXT: bx lr @@ -1699,8 +1699,8 @@ entry: define i8* @strh32_3(i8* %y, i8* %x, <4 x i32> *%m) { ; CHECK-LABEL: strh32_3: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrh.u32 q0, [r1] ; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vldrh.u32 q0, [r1] ; CHECK-NEXT: vpt.i32 ne, q1, zr ; CHECK-NEXT: vstrht.32 q0, [r0] ; CHECK-NEXT: adds r0, #3 @@ -1719,8 +1719,8 @@ entry: define i8* @strh32_2(i8* %y, i8* %x, <4 x i32> *%m) { ; CHECK-LABEL: strh32_2: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrh.u32 q0, [r1] ; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vldrh.u32 q0, [r1] ; CHECK-NEXT: vpt.i32 ne, q1, zr ; CHECK-NEXT: vstrht.32 q0, [r0], #2 ; CHECK-NEXT: bx lr @@ -1738,8 +1738,8 @@ entry: define i8* @strh32_254(i8* %y, i8* %x, <4 x i32> *%m) { ; CHECK-LABEL: strh32_254: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrh.u32 q0, [r1] ; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vldrh.u32 q0, [r1] ; CHECK-NEXT: vpt.i32 ne, q1, zr ; CHECK-NEXT: vstrht.32 q0, [r0], #254 ; CHECK-NEXT: bx lr @@ -1757,8 +1757,8 @@ entry: define i8* @strh32_256(i8* %y, i8* %x, <4 x i32> *%m) { ; CHECK-LABEL: strh32_256: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrh.u32 q0, [r1] ; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vldrh.u32 q0, [r1] ; CHECK-NEXT: vpt.i32 ne, q1, zr ; CHECK-NEXT: vstrht.32 q0, [r0] ; CHECK-NEXT: add.w r0, r0, #256 @@ -1777,8 +1777,8 @@ entry: define i8* @strh32_m254(i8* %y, i8* %x, <4 x i32> *%m) { ; CHECK-LABEL: strh32_m254: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrh.u32 q0, [r1] ; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vldrh.u32 q0, [r1] ; CHECK-NEXT: vpt.i32 ne, q1, zr ; CHECK-NEXT: vstrht.32 q0, [r0], #-254 ; CHECK-NEXT: bx lr @@ -1796,8 +1796,8 @@ entry: define i8* @strh32_m256(i8* %y, i8* %x, <4 x i32> *%m) { ; CHECK-LABEL: strh32_m256: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrh.u32 q0, [r1] ; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vldrh.u32 q0, [r1] ; CHECK-NEXT: vpt.i32 ne, q1, zr ; CHECK-NEXT: vstrht.32 q0, [r0] ; CHECK-NEXT: sub.w r0, r0, #256 @@ -1816,8 +1816,8 @@ entry: define i8* @strh16_4(i8* %y, i8* %x, <8 x i16> *%m) { ; CHECK-LABEL: strh16_4: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrh.u16 q0, [r1] ; CHECK-NEXT: vldrh.u16 q1, [r2] +; CHECK-NEXT: vldrh.u16 q0, [r1] ; CHECK-NEXT: vpt.i16 ne, q1, zr ; CHECK-NEXT: vstrht.16 q0, [r0], #4 ; CHECK-NEXT: bx lr @@ -1835,8 +1835,8 @@ entry: define i8* @strh16_3(i8* %y, i8* %x, <8 x i16> *%m) { ; CHECK-LABEL: strh16_3: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrh.u16 q0, [r1] ; CHECK-NEXT: vldrh.u16 q1, [r2] +; CHECK-NEXT: vldrh.u16 q0, [r1] ; CHECK-NEXT: vpt.i16 ne, q1, zr ; CHECK-NEXT: vstrht.16 q0, [r0] ; CHECK-NEXT: adds r0, #3 @@ -1855,8 +1855,8 @@ entry: define i8* @strh16_2(i8* %y, i8* %x, <8 x i16> *%m) { ; CHECK-LABEL: strh16_2: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrh.u16 q0, [r1] ; CHECK-NEXT: vldrh.u16 q1, [r2] +; CHECK-NEXT: vldrh.u16 q0, [r1] ; CHECK-NEXT: vpt.i16 ne, q1, zr ; CHECK-NEXT: vstrht.16 q0, [r0], #2 ; CHECK-NEXT: bx lr @@ -1874,8 +1874,8 @@ entry: define i8* @strh16_254(i8* %y, i8* %x, <8 x i16> *%m) { ; CHECK-LABEL: strh16_254: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrh.u16 q0, [r1] ; CHECK-NEXT: vldrh.u16 q1, [r2] +; CHECK-NEXT: vldrh.u16 q0, [r1] ; CHECK-NEXT: vpt.i16 ne, q1, zr ; CHECK-NEXT: vstrht.16 q0, [r0], #254 ; CHECK-NEXT: bx lr @@ -1893,8 +1893,8 @@ entry: define i8* @strh16_256(i8* %y, i8* %x, <8 x i16> *%m) { ; CHECK-LABEL: strh16_256: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrh.u16 q0, [r1] ; CHECK-NEXT: vldrh.u16 q1, [r2] +; CHECK-NEXT: vldrh.u16 q0, [r1] ; CHECK-NEXT: vpt.i16 ne, q1, zr ; CHECK-NEXT: vstrht.16 q0, [r0] ; CHECK-NEXT: add.w r0, r0, #256 @@ -1913,8 +1913,8 @@ entry: define i8* @strh16_m254(i8* %y, i8* %x, <8 x i16> *%m) { ; CHECK-LABEL: strh16_m254: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrh.u16 q0, [r1] ; CHECK-NEXT: vldrh.u16 q1, [r2] +; CHECK-NEXT: vldrh.u16 q0, [r1] ; CHECK-NEXT: vpt.i16 ne, q1, zr ; CHECK-NEXT: vstrht.16 q0, [r0], #-254 ; CHECK-NEXT: bx lr @@ -1932,8 +1932,8 @@ entry: define i8* @strh16_m256(i8* %y, i8* %x, <8 x i16> *%m) { ; CHECK-LABEL: strh16_m256: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrh.u16 q0, [r1] ; CHECK-NEXT: vldrh.u16 q1, [r2] +; CHECK-NEXT: vldrh.u16 q0, [r1] ; CHECK-NEXT: vpt.i16 ne, q1, zr ; CHECK-NEXT: vstrht.16 q0, [r0] ; CHECK-NEXT: sub.w r0, r0, #256 @@ -1952,8 +1952,8 @@ entry: define i8* @strb32_4(i8* %y, i8* %x, <4 x i32> *%m) { ; CHECK-LABEL: strb32_4: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrb.u32 q0, [r1] ; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vldrb.u32 q0, [r1] ; CHECK-NEXT: vpt.i32 ne, q1, zr ; CHECK-NEXT: vstrbt.32 q0, [r0], #4 ; CHECK-NEXT: bx lr @@ -1971,8 +1971,8 @@ entry: define i8* @strb32_3(i8* %y, i8* %x, <4 x i32> *%m) { ; CHECK-LABEL: strb32_3: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrb.u32 q0, [r1] ; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vldrb.u32 q0, [r1] ; CHECK-NEXT: vpt.i32 ne, q1, zr ; CHECK-NEXT: vstrbt.32 q0, [r0], #3 ; CHECK-NEXT: bx lr @@ -1990,8 +1990,8 @@ entry: define i8* @strb32_2(i8* %y, i8* %x, <4 x i32> *%m) { ; CHECK-LABEL: strb32_2: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrb.u32 q0, [r1] ; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vldrb.u32 q0, [r1] ; CHECK-NEXT: vpt.i32 ne, q1, zr ; CHECK-NEXT: vstrbt.32 q0, [r0], #2 ; CHECK-NEXT: bx lr @@ -2009,8 +2009,8 @@ entry: define i8* @strb32_127(i8* %y, i8* %x, <4 x i32> *%m) { ; CHECK-LABEL: strb32_127: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrb.u32 q0, [r1] ; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vldrb.u32 q0, [r1] ; CHECK-NEXT: vpt.i32 ne, q1, zr ; CHECK-NEXT: vstrbt.32 q0, [r0], #127 ; CHECK-NEXT: bx lr @@ -2028,8 +2028,8 @@ entry: define i8* @strb32_128(i8* %y, i8* %x, <4 x i32> *%m) { ; CHECK-LABEL: strb32_128: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrb.u32 q0, [r1] ; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vldrb.u32 q0, [r1] ; CHECK-NEXT: vpt.i32 ne, q1, zr ; CHECK-NEXT: vstrbt.32 q0, [r0] ; CHECK-NEXT: adds r0, #128 @@ -2048,8 +2048,8 @@ entry: define i8* @strb32_m127(i8* %y, i8* %x, <4 x i32> *%m) { ; CHECK-LABEL: strb32_m127: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrb.u32 q0, [r1] ; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vldrb.u32 q0, [r1] ; CHECK-NEXT: vpt.i32 ne, q1, zr ; CHECK-NEXT: vstrbt.32 q0, [r0], #-127 ; CHECK-NEXT: bx lr @@ -2067,8 +2067,8 @@ entry: define i8* @strb32_m128(i8* %y, i8* %x, <4 x i32> *%m) { ; CHECK-LABEL: strb32_m128: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrb.u32 q0, [r1] ; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vldrb.u32 q0, [r1] ; CHECK-NEXT: vpt.i32 ne, q1, zr ; CHECK-NEXT: vstrbt.32 q0, [r0] ; CHECK-NEXT: subs r0, #128 @@ -2087,8 +2087,8 @@ entry: define i8* @strb16_4(i8* %y, i8* %x, <8 x i16> *%m) { ; CHECK-LABEL: strb16_4: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrb.u16 q0, [r1] ; CHECK-NEXT: vldrh.u16 q1, [r2] +; CHECK-NEXT: vldrb.u16 q0, [r1] ; CHECK-NEXT: vpt.i16 ne, q1, zr ; CHECK-NEXT: vstrbt.16 q0, [r0], #4 ; CHECK-NEXT: bx lr @@ -2106,8 +2106,8 @@ entry: define i8* @strb16_3(i8* %y, i8* %x, <8 x i16> *%m) { ; CHECK-LABEL: strb16_3: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrb.u16 q0, [r1] ; CHECK-NEXT: vldrh.u16 q1, [r2] +; CHECK-NEXT: vldrb.u16 q0, [r1] ; CHECK-NEXT: vpt.i16 ne, q1, zr ; CHECK-NEXT: vstrbt.16 q0, [r0], #3 ; CHECK-NEXT: bx lr @@ -2125,8 +2125,8 @@ entry: define i8* @strb16_2(i8* %y, i8* %x, <8 x i16> *%m) { ; CHECK-LABEL: strb16_2: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrb.u16 q0, [r1] ; CHECK-NEXT: vldrh.u16 q1, [r2] +; CHECK-NEXT: vldrb.u16 q0, [r1] ; CHECK-NEXT: vpt.i16 ne, q1, zr ; CHECK-NEXT: vstrbt.16 q0, [r0], #2 ; CHECK-NEXT: bx lr @@ -2144,8 +2144,8 @@ entry: define i8* @strb16_127(i8* %y, i8* %x, <8 x i16> *%m) { ; CHECK-LABEL: strb16_127: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrb.u16 q0, [r1] ; CHECK-NEXT: vldrh.u16 q1, [r2] +; CHECK-NEXT: vldrb.u16 q0, [r1] ; CHECK-NEXT: vpt.i16 ne, q1, zr ; CHECK-NEXT: vstrbt.16 q0, [r0], #127 ; CHECK-NEXT: bx lr @@ -2163,8 +2163,8 @@ entry: define i8* @strb16_128(i8* %y, i8* %x, <8 x i16> *%m) { ; CHECK-LABEL: strb16_128: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrb.u16 q0, [r1] ; CHECK-NEXT: vldrh.u16 q1, [r2] +; CHECK-NEXT: vldrb.u16 q0, [r1] ; CHECK-NEXT: vpt.i16 ne, q1, zr ; CHECK-NEXT: vstrbt.16 q0, [r0] ; CHECK-NEXT: adds r0, #128 @@ -2183,8 +2183,8 @@ entry: define i8* @strb16_m127(i8* %y, i8* %x, <8 x i16> *%m) { ; CHECK-LABEL: strb16_m127: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrb.u16 q0, [r1] ; CHECK-NEXT: vldrh.u16 q1, [r2] +; CHECK-NEXT: vldrb.u16 q0, [r1] ; CHECK-NEXT: vpt.i16 ne, q1, zr ; CHECK-NEXT: vstrbt.16 q0, [r0], #-127 ; CHECK-NEXT: bx lr @@ -2202,8 +2202,8 @@ entry: define i8* @strb16_m128(i8* %y, i8* %x, <8 x i16> *%m) { ; CHECK-LABEL: strb16_m128: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrb.u16 q0, [r1] ; CHECK-NEXT: vldrh.u16 q1, [r2] +; CHECK-NEXT: vldrb.u16 q0, [r1] ; CHECK-NEXT: vpt.i16 ne, q1, zr ; CHECK-NEXT: vstrbt.16 q0, [r0] ; CHECK-NEXT: subs r0, #128 @@ -2222,8 +2222,8 @@ entry: define i8* @strb8_4(i8* %y, i8* %x, <16 x i8> *%m) { ; CHECK-LABEL: strb8_4: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrb.u8 q0, [r1] ; CHECK-NEXT: vldrb.u8 q1, [r2] +; CHECK-NEXT: vldrb.u8 q0, [r1] ; CHECK-NEXT: vpt.i8 ne, q1, zr ; CHECK-NEXT: vstrbt.8 q0, [r0], #4 ; CHECK-NEXT: bx lr @@ -2241,8 +2241,8 @@ entry: define i8* @strb8_3(i8* %y, i8* %x, <16 x i8> *%m) { ; CHECK-LABEL: strb8_3: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrb.u8 q0, [r1] ; CHECK-NEXT: vldrb.u8 q1, [r2] +; CHECK-NEXT: vldrb.u8 q0, [r1] ; CHECK-NEXT: vpt.i8 ne, q1, zr ; CHECK-NEXT: vstrbt.8 q0, [r0], #3 ; CHECK-NEXT: bx lr @@ -2260,8 +2260,8 @@ entry: define i8* @strb8_2(i8* %y, i8* %x, <16 x i8> *%m) { ; CHECK-LABEL: strb8_2: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrb.u8 q0, [r1] ; CHECK-NEXT: vldrb.u8 q1, [r2] +; CHECK-NEXT: vldrb.u8 q0, [r1] ; CHECK-NEXT: vpt.i8 ne, q1, zr ; CHECK-NEXT: vstrbt.8 q0, [r0], #2 ; CHECK-NEXT: bx lr @@ -2279,8 +2279,8 @@ entry: define i8* @strb8_127(i8* %y, i8* %x, <16 x i8> *%m) { ; CHECK-LABEL: strb8_127: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrb.u8 q0, [r1] ; CHECK-NEXT: vldrb.u8 q1, [r2] +; CHECK-NEXT: vldrb.u8 q0, [r1] ; CHECK-NEXT: vpt.i8 ne, q1, zr ; CHECK-NEXT: vstrbt.8 q0, [r0], #127 ; CHECK-NEXT: bx lr @@ -2298,8 +2298,8 @@ entry: define i8* @strb8_128(i8* %y, i8* %x, <16 x i8> *%m) { ; CHECK-LABEL: strb8_128: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrb.u8 q0, [r1] ; CHECK-NEXT: vldrb.u8 q1, [r2] +; CHECK-NEXT: vldrb.u8 q0, [r1] ; CHECK-NEXT: vpt.i8 ne, q1, zr ; CHECK-NEXT: vstrbt.8 q0, [r0] ; CHECK-NEXT: adds r0, #128 @@ -2318,8 +2318,8 @@ entry: define i8* @strb8_m127(i8* %y, i8* %x, <16 x i8> *%m) { ; CHECK-LABEL: strb8_m127: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrb.u8 q0, [r1] ; CHECK-NEXT: vldrb.u8 q1, [r2] +; CHECK-NEXT: vldrb.u8 q0, [r1] ; CHECK-NEXT: vpt.i8 ne, q1, zr ; CHECK-NEXT: vstrbt.8 q0, [r0], #-127 ; CHECK-NEXT: bx lr @@ -2337,8 +2337,8 @@ entry: define i8* @strb8_m128(i8* %y, i8* %x, <16 x i8> *%m) { ; CHECK-LABEL: strb8_m128: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrb.u8 q0, [r1] ; CHECK-NEXT: vldrb.u8 q1, [r2] +; CHECK-NEXT: vldrb.u8 q0, [r1] ; CHECK-NEXT: vpt.i8 ne, q1, zr ; CHECK-NEXT: vstrbt.8 q0, [r0] ; CHECK-NEXT: subs r0, #128 @@ -2357,8 +2357,8 @@ entry: define i8* @strwf32_4(i8* %y, i8* %x, <4 x i32> *%m) { ; CHECK-LABEL: strwf32_4: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrw.u32 q0, [r1] ; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vldrw.u32 q0, [r1] ; CHECK-NEXT: vpt.i32 ne, q1, zr ; CHECK-NEXT: vstrwt.32 q0, [r0], #4 ; CHECK-NEXT: bx lr @@ -2376,8 +2376,8 @@ entry: define i8* @strwf32_3(i8* %y, i8* %x, <4 x i32> *%m) { ; CHECK-LABEL: strwf32_3: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrw.u32 q0, [r1] ; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vldrw.u32 q0, [r1] ; CHECK-NEXT: vpt.i32 ne, q1, zr ; CHECK-NEXT: vstrwt.32 q0, [r0] ; CHECK-NEXT: adds r0, #3 @@ -2396,8 +2396,8 @@ entry: define i8* @strwf32_2(i8* %y, i8* %x, <4 x i32> *%m) { ; CHECK-LABEL: strwf32_2: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrw.u32 q0, [r1] ; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vldrw.u32 q0, [r1] ; CHECK-NEXT: vpt.i32 ne, q1, zr ; CHECK-NEXT: vstrwt.32 q0, [r0] ; CHECK-NEXT: adds r0, #2 @@ -2416,8 +2416,8 @@ entry: define i8* @strwf32_508(i8* %y, i8* %x, <4 x i32> *%m) { ; CHECK-LABEL: strwf32_508: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrw.u32 q0, [r1] ; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vldrw.u32 q0, [r1] ; CHECK-NEXT: vpt.i32 ne, q1, zr ; CHECK-NEXT: vstrwt.32 q0, [r0], #508 ; CHECK-NEXT: bx lr @@ -2435,8 +2435,8 @@ entry: define i8* @strwf32_512(i8* %y, i8* %x, <4 x i32> *%m) { ; CHECK-LABEL: strwf32_512: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrw.u32 q0, [r1] ; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vldrw.u32 q0, [r1] ; CHECK-NEXT: vpt.i32 ne, q1, zr ; CHECK-NEXT: vstrwt.32 q0, [r0] ; CHECK-NEXT: add.w r0, r0, #512 @@ -2455,8 +2455,8 @@ entry: define i8* @strwf32_m508(i8* %y, i8* %x, <4 x i32> *%m) { ; CHECK-LABEL: strwf32_m508: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrw.u32 q0, [r1] ; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vldrw.u32 q0, [r1] ; CHECK-NEXT: vpt.i32 ne, q1, zr ; CHECK-NEXT: vstrwt.32 q0, [r0], #-508 ; CHECK-NEXT: bx lr @@ -2474,8 +2474,8 @@ entry: define i8* @strwf32_m512(i8* %y, i8* %x, <4 x i32> *%m) { ; CHECK-LABEL: strwf32_m512: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrw.u32 q0, [r1] ; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vldrw.u32 q0, [r1] ; CHECK-NEXT: vpt.i32 ne, q1, zr ; CHECK-NEXT: vstrwt.32 q0, [r0] ; CHECK-NEXT: sub.w r0, r0, #512 @@ -2494,8 +2494,8 @@ entry: define i8* @strhf16_4(i8* %y, i8* %x, <8 x i16> *%m) { ; CHECK-LABEL: strhf16_4: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrh.u16 q0, [r1] ; CHECK-NEXT: vldrh.u16 q1, [r2] +; CHECK-NEXT: vldrh.u16 q0, [r1] ; CHECK-NEXT: vpt.i16 ne, q1, zr ; CHECK-NEXT: vstrht.16 q0, [r0], #4 ; CHECK-NEXT: bx lr @@ -2513,8 +2513,8 @@ entry: define i8* @strhf16_3(i8* %y, i8* %x, <8 x i16> *%m) { ; CHECK-LABEL: strhf16_3: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrh.u16 q0, [r1] ; CHECK-NEXT: vldrh.u16 q1, [r2] +; CHECK-NEXT: vldrh.u16 q0, [r1] ; CHECK-NEXT: vpt.i16 ne, q1, zr ; CHECK-NEXT: vstrht.16 q0, [r0] ; CHECK-NEXT: adds r0, #3 @@ -2533,8 +2533,8 @@ entry: define i8* @strhf16_2(i8* %y, i8* %x, <8 x i16> *%m) { ; CHECK-LABEL: strhf16_2: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrh.u16 q0, [r1] ; CHECK-NEXT: vldrh.u16 q1, [r2] +; CHECK-NEXT: vldrh.u16 q0, [r1] ; CHECK-NEXT: vpt.i16 ne, q1, zr ; CHECK-NEXT: vstrht.16 q0, [r0], #2 ; CHECK-NEXT: bx lr @@ -2552,8 +2552,8 @@ entry: define i8* @strhf16_254(i8* %y, i8* %x, <8 x i16> *%m) { ; CHECK-LABEL: strhf16_254: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrh.u16 q0, [r1] ; CHECK-NEXT: vldrh.u16 q1, [r2] +; CHECK-NEXT: vldrh.u16 q0, [r1] ; CHECK-NEXT: vpt.i16 ne, q1, zr ; CHECK-NEXT: vstrht.16 q0, [r0], #254 ; CHECK-NEXT: bx lr @@ -2571,8 +2571,8 @@ entry: define i8* @strhf16_256(i8* %y, i8* %x, <8 x i16> *%m) { ; CHECK-LABEL: strhf16_256: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrh.u16 q0, [r1] ; CHECK-NEXT: vldrh.u16 q1, [r2] +; CHECK-NEXT: vldrh.u16 q0, [r1] ; CHECK-NEXT: vpt.i16 ne, q1, zr ; CHECK-NEXT: vstrht.16 q0, [r0] ; CHECK-NEXT: add.w r0, r0, #256 @@ -2591,8 +2591,8 @@ entry: define i8* @strhf16_m254(i8* %y, i8* %x, <8 x i16> *%m) { ; CHECK-LABEL: strhf16_m254: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrh.u16 q0, [r1] ; CHECK-NEXT: vldrh.u16 q1, [r2] +; CHECK-NEXT: vldrh.u16 q0, [r1] ; CHECK-NEXT: vpt.i16 ne, q1, zr ; CHECK-NEXT: vstrht.16 q0, [r0], #-254 ; CHECK-NEXT: bx lr @@ -2610,8 +2610,8 @@ entry: define i8* @strhf16_m256(i8* %y, i8* %x, <8 x i16> *%m) { ; CHECK-LABEL: strhf16_m256: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrh.u16 q0, [r1] ; CHECK-NEXT: vldrh.u16 q1, [r2] +; CHECK-NEXT: vldrh.u16 q0, [r1] ; CHECK-NEXT: vpt.i16 ne, q1, zr ; CHECK-NEXT: vstrht.16 q0, [r0] ; CHECK-NEXT: sub.w r0, r0, #256 diff --git a/llvm/test/CodeGen/Thumb2/mve-masked-ldst-preinc.ll b/llvm/test/CodeGen/Thumb2/mve-masked-ldst-preinc.ll index 26cebcf6fae41..6e74b7a78c0e1 100644 --- a/llvm/test/CodeGen/Thumb2/mve-masked-ldst-preinc.ll +++ b/llvm/test/CodeGen/Thumb2/mve-masked-ldst-preinc.ll @@ -24,8 +24,8 @@ entry: define i8* @ldrwu32_3(i8* %x, i8* %y, <4 x i32> *%m) { ; CHECK-LABEL: ldrwu32_3: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: adds r0, #3 ; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: adds r0, #3 ; CHECK-NEXT: vpt.i32 ne, q0, zr ; CHECK-NEXT: vldrwt.u32 q0, [r0] ; CHECK-NEXT: vstrw.32 q0, [r1] @@ -44,8 +44,8 @@ entry: define i8* @ldrwu32_2(i8* %x, i8* %y, <4 x i32> *%m) { ; CHECK-LABEL: ldrwu32_2: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: adds r0, #2 ; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: adds r0, #2 ; CHECK-NEXT: vpt.i32 ne, q0, zr ; CHECK-NEXT: vldrwt.u32 q0, [r0] ; CHECK-NEXT: vstrw.32 q0, [r1] @@ -83,8 +83,8 @@ entry: define i8* @ldrwu32_512(i8* %x, i8* %y, <4 x i32> *%m) { ; CHECK-LABEL: ldrwu32_512: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: add.w r0, r0, #512 ; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: add.w r0, r0, #512 ; CHECK-NEXT: vpt.i32 ne, q0, zr ; CHECK-NEXT: vldrwt.u32 q0, [r0] ; CHECK-NEXT: vstrw.32 q0, [r1] @@ -122,8 +122,8 @@ entry: define i8* @ldrwu32_m512(i8* %x, i8* %y, <4 x i32> *%m) { ; CHECK-LABEL: ldrwu32_m512: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: sub.w r0, r0, #512 ; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: sub.w r0, r0, #512 ; CHECK-NEXT: vpt.i32 ne, q0, zr ; CHECK-NEXT: vldrwt.u32 q0, [r0] ; CHECK-NEXT: vstrw.32 q0, [r1] @@ -162,8 +162,8 @@ entry: define i8* @ldrhu32_3(i8* %x, i8* %y, <4 x i32> *%m) { ; CHECK-LABEL: ldrhu32_3: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: adds r0, #3 ; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: adds r0, #3 ; CHECK-NEXT: vpt.i32 ne, q0, zr ; CHECK-NEXT: vldrht.u32 q0, [r0] ; CHECK-NEXT: vstrw.32 q0, [r1] @@ -223,8 +223,8 @@ entry: define i8* @ldrhu32_256(i8* %x, i8* %y, <4 x i32> *%m) { ; CHECK-LABEL: ldrhu32_256: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: add.w r0, r0, #256 ; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: add.w r0, r0, #256 ; CHECK-NEXT: vpt.i32 ne, q0, zr ; CHECK-NEXT: vldrht.u32 q0, [r0] ; CHECK-NEXT: vstrw.32 q0, [r1] @@ -264,8 +264,8 @@ entry: define i8* @ldrhu32_m256(i8* %x, i8* %y, <4 x i32> *%m) { ; CHECK-LABEL: ldrhu32_m256: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: sub.w r0, r0, #256 ; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: sub.w r0, r0, #256 ; CHECK-NEXT: vpt.i32 ne, q0, zr ; CHECK-NEXT: vldrht.u32 q0, [r0] ; CHECK-NEXT: vstrw.32 q0, [r1] @@ -305,8 +305,8 @@ entry: define i8* @ldrhs32_3(i8* %x, i8* %y, <4 x i32> *%m) { ; CHECK-LABEL: ldrhs32_3: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: adds r0, #3 ; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: adds r0, #3 ; CHECK-NEXT: vpt.i32 ne, q0, zr ; CHECK-NEXT: vldrht.s32 q0, [r0] ; CHECK-NEXT: vstrw.32 q0, [r1] @@ -366,8 +366,8 @@ entry: define i8* @ldrhs32_256(i8* %x, i8* %y, <4 x i32> *%m) { ; CHECK-LABEL: ldrhs32_256: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: add.w r0, r0, #256 ; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: add.w r0, r0, #256 ; CHECK-NEXT: vpt.i32 ne, q0, zr ; CHECK-NEXT: vldrht.s32 q0, [r0] ; CHECK-NEXT: vstrw.32 q0, [r1] @@ -407,8 +407,8 @@ entry: define i8* @ldrhs32_m256(i8* %x, i8* %y, <4 x i32> *%m) { ; CHECK-LABEL: ldrhs32_m256: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: sub.w r0, r0, #256 ; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: sub.w r0, r0, #256 ; CHECK-NEXT: vpt.i32 ne, q0, zr ; CHECK-NEXT: vldrht.s32 q0, [r0] ; CHECK-NEXT: vstrw.32 q0, [r1] @@ -447,8 +447,8 @@ entry: define i8* @ldrhu16_3(i8* %x, i8* %y, <8 x i16> *%m) { ; CHECK-LABEL: ldrhu16_3: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: adds r0, #3 ; CHECK-NEXT: vldrh.u16 q0, [r2] +; CHECK-NEXT: adds r0, #3 ; CHECK-NEXT: vpt.i16 ne, q0, zr ; CHECK-NEXT: vldrht.u16 q0, [r0] ; CHECK-NEXT: vstrh.16 q0, [r1] @@ -505,8 +505,8 @@ entry: define i8* @ldrhu16_256(i8* %x, i8* %y, <8 x i16> *%m) { ; CHECK-LABEL: ldrhu16_256: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: add.w r0, r0, #256 ; CHECK-NEXT: vldrh.u16 q0, [r2] +; CHECK-NEXT: add.w r0, r0, #256 ; CHECK-NEXT: vpt.i16 ne, q0, zr ; CHECK-NEXT: vldrht.u16 q0, [r0] ; CHECK-NEXT: vstrh.16 q0, [r1] @@ -544,8 +544,8 @@ entry: define i8* @ldrhu16_m256(i8* %x, i8* %y, <8 x i16> *%m) { ; CHECK-LABEL: ldrhu16_m256: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: sub.w r0, r0, #256 ; CHECK-NEXT: vldrh.u16 q0, [r2] +; CHECK-NEXT: sub.w r0, r0, #256 ; CHECK-NEXT: vpt.i16 ne, q0, zr ; CHECK-NEXT: vldrht.u16 q0, [r0] ; CHECK-NEXT: vstrh.16 q0, [r1] @@ -644,8 +644,8 @@ entry: define i8* @ldrbu32_128(i8* %x, i8* %y, <4 x i32> *%m) { ; CHECK-LABEL: ldrbu32_128: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: adds r0, #128 ; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: adds r0, #128 ; CHECK-NEXT: vpt.i32 ne, q0, zr ; CHECK-NEXT: vldrbt.u32 q0, [r0] ; CHECK-NEXT: vstrw.32 q0, [r1] @@ -685,8 +685,8 @@ entry: define i8* @ldrbu32_m128(i8* %x, i8* %y, <4 x i32> *%m) { ; CHECK-LABEL: ldrbu32_m128: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: subs r0, #128 ; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: subs r0, #128 ; CHECK-NEXT: vpt.i32 ne, q0, zr ; CHECK-NEXT: vldrbt.u32 q0, [r0] ; CHECK-NEXT: vstrw.32 q0, [r1] @@ -786,8 +786,8 @@ entry: define i8* @ldrbs32_128(i8* %x, i8* %y, <4 x i32> *%m) { ; CHECK-LABEL: ldrbs32_128: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: adds r0, #128 ; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: adds r0, #128 ; CHECK-NEXT: vpt.i32 ne, q0, zr ; CHECK-NEXT: vldrbt.s32 q0, [r0] ; CHECK-NEXT: vstrw.32 q0, [r1] @@ -827,8 +827,8 @@ entry: define i8* @ldrbs32_m128(i8* %x, i8* %y, <4 x i32> *%m) { ; CHECK-LABEL: ldrbs32_m128: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: subs r0, #128 ; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: subs r0, #128 ; CHECK-NEXT: vpt.i32 ne, q0, zr ; CHECK-NEXT: vldrbt.s32 q0, [r0] ; CHECK-NEXT: vstrw.32 q0, [r1] @@ -928,8 +928,8 @@ entry: define i8* @ldrbu16_128(i8* %x, i8* %y, <8 x i16> *%m) { ; CHECK-LABEL: ldrbu16_128: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: adds r0, #128 ; CHECK-NEXT: vldrh.u16 q0, [r2] +; CHECK-NEXT: adds r0, #128 ; CHECK-NEXT: vpt.i16 ne, q0, zr ; CHECK-NEXT: vldrbt.u16 q0, [r0] ; CHECK-NEXT: vstrh.16 q0, [r1] @@ -969,8 +969,8 @@ entry: define i8* @ldrbu16_m128(i8* %x, i8* %y, <8 x i16> *%m) { ; CHECK-LABEL: ldrbu16_m128: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: subs r0, #128 ; CHECK-NEXT: vldrh.u16 q0, [r2] +; CHECK-NEXT: subs r0, #128 ; CHECK-NEXT: vpt.i16 ne, q0, zr ; CHECK-NEXT: vldrbt.u16 q0, [r0] ; CHECK-NEXT: vstrh.16 q0, [r1] @@ -1070,8 +1070,8 @@ entry: define i8* @ldrbs16_128(i8* %x, i8* %y, <8 x i16> *%m) { ; CHECK-LABEL: ldrbs16_128: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: adds r0, #128 ; CHECK-NEXT: vldrh.u16 q0, [r2] +; CHECK-NEXT: adds r0, #128 ; CHECK-NEXT: vpt.i16 ne, q0, zr ; CHECK-NEXT: vldrbt.s16 q0, [r0] ; CHECK-NEXT: vstrh.16 q0, [r1] @@ -1111,8 +1111,8 @@ entry: define i8* @ldrbs16_m128(i8* %x, i8* %y, <8 x i16> *%m) { ; CHECK-LABEL: ldrbs16_m128: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: subs r0, #128 ; CHECK-NEXT: vldrh.u16 q0, [r2] +; CHECK-NEXT: subs r0, #128 ; CHECK-NEXT: vpt.i16 ne, q0, zr ; CHECK-NEXT: vldrbt.s16 q0, [r0] ; CHECK-NEXT: vstrh.16 q0, [r1] @@ -1208,8 +1208,8 @@ entry: define i8* @ldrbu8_128(i8* %x, i8* %y, <16 x i8> *%m) { ; CHECK-LABEL: ldrbu8_128: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: adds r0, #128 ; CHECK-NEXT: vldrb.u8 q0, [r2] +; CHECK-NEXT: adds r0, #128 ; CHECK-NEXT: vpt.i8 ne, q0, zr ; CHECK-NEXT: vldrbt.u8 q0, [r0] ; CHECK-NEXT: vstrb.8 q0, [r1] @@ -1247,8 +1247,8 @@ entry: define i8* @ldrbu8_m128(i8* %x, i8* %y, <16 x i8> *%m) { ; CHECK-LABEL: ldrbu8_m128: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: subs r0, #128 ; CHECK-NEXT: vldrb.u8 q0, [r2] +; CHECK-NEXT: subs r0, #128 ; CHECK-NEXT: vpt.i8 ne, q0, zr ; CHECK-NEXT: vldrbt.u8 q0, [r0] ; CHECK-NEXT: vstrb.8 q0, [r1] @@ -1286,8 +1286,8 @@ entry: define i8* @ldrwf32_3(i8* %x, i8* %y, <4 x i32> *%m) { ; CHECK-LABEL: ldrwf32_3: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: adds r0, #3 ; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: adds r0, #3 ; CHECK-NEXT: vpt.i32 ne, q0, zr ; CHECK-NEXT: vldrwt.u32 q0, [r0] ; CHECK-NEXT: vstrw.32 q0, [r1] @@ -1306,8 +1306,8 @@ entry: define i8* @ldrwf32_2(i8* %x, i8* %y, <4 x i32> *%m) { ; CHECK-LABEL: ldrwf32_2: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: adds r0, #2 ; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: adds r0, #2 ; CHECK-NEXT: vpt.i32 ne, q0, zr ; CHECK-NEXT: vldrwt.u32 q0, [r0] ; CHECK-NEXT: vstrw.32 q0, [r1] @@ -1345,8 +1345,8 @@ entry: define i8* @ldrwf32_512(i8* %x, i8* %y, <4 x i32> *%m) { ; CHECK-LABEL: ldrwf32_512: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: add.w r0, r0, #512 ; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: add.w r0, r0, #512 ; CHECK-NEXT: vpt.i32 ne, q0, zr ; CHECK-NEXT: vldrwt.u32 q0, [r0] ; CHECK-NEXT: vstrw.32 q0, [r1] @@ -1384,8 +1384,8 @@ entry: define i8* @ldrwf32_m512(i8* %x, i8* %y, <4 x i32> *%m) { ; CHECK-LABEL: ldrwf32_m512: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: sub.w r0, r0, #512 ; CHECK-NEXT: vldrw.u32 q0, [r2] +; CHECK-NEXT: sub.w r0, r0, #512 ; CHECK-NEXT: vpt.i32 ne, q0, zr ; CHECK-NEXT: vldrwt.u32 q0, [r0] ; CHECK-NEXT: vstrw.32 q0, [r1] @@ -1423,8 +1423,8 @@ entry: define i8* @ldrhf16_3(i8* %x, i8* %y, <8 x i16> *%m) { ; CHECK-LABEL: ldrhf16_3: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: adds r0, #3 ; CHECK-NEXT: vldrh.u16 q0, [r2] +; CHECK-NEXT: adds r0, #3 ; CHECK-NEXT: vpt.i16 ne, q0, zr ; CHECK-NEXT: vldrht.u16 q0, [r0] ; CHECK-NEXT: vstrh.16 q0, [r1] @@ -1481,8 +1481,8 @@ entry: define i8* @ldrhf16_256(i8* %x, i8* %y, <8 x i16> *%m) { ; CHECK-LABEL: ldrhf16_256: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: add.w r0, r0, #256 ; CHECK-NEXT: vldrh.u16 q0, [r2] +; CHECK-NEXT: add.w r0, r0, #256 ; CHECK-NEXT: vpt.i16 ne, q0, zr ; CHECK-NEXT: vldrht.u16 q0, [r0] ; CHECK-NEXT: vstrh.16 q0, [r1] @@ -1520,8 +1520,8 @@ entry: define i8* @ldrhf16_m256(i8* %x, i8* %y, <8 x i16> *%m) { ; CHECK-LABEL: ldrhf16_m256: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: sub.w r0, r0, #256 ; CHECK-NEXT: vldrh.u16 q0, [r2] +; CHECK-NEXT: sub.w r0, r0, #256 ; CHECK-NEXT: vpt.i16 ne, q0, zr ; CHECK-NEXT: vldrht.u16 q0, [r0] ; CHECK-NEXT: vstrh.16 q0, [r1] @@ -1543,8 +1543,8 @@ entry: define i8* @strw32_4(i8* %y, i8* %x, <4 x i32> *%m) { ; CHECK-LABEL: strw32_4: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrw.u32 q0, [r1] ; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vldrw.u32 q0, [r1] ; CHECK-NEXT: vpt.i32 ne, q1, zr ; CHECK-NEXT: vstrwt.32 q0, [r0, #4]! ; CHECK-NEXT: bx lr @@ -1562,9 +1562,9 @@ entry: define i8* @strw32_3(i8* %y, i8* %x, <4 x i32> *%m) { ; CHECK-LABEL: strw32_3: ; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q1, [r2] ; CHECK-NEXT: adds r0, #3 ; CHECK-NEXT: vldrw.u32 q0, [r1] -; CHECK-NEXT: vldrw.u32 q1, [r2] ; CHECK-NEXT: vpt.i32 ne, q1, zr ; CHECK-NEXT: vstrwt.32 q0, [r0] ; CHECK-NEXT: bx lr @@ -1582,9 +1582,9 @@ entry: define i8* @strw32_2(i8* %y, i8* %x, <4 x i32> *%m) { ; CHECK-LABEL: strw32_2: ; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q1, [r2] ; CHECK-NEXT: adds r0, #2 ; CHECK-NEXT: vldrw.u32 q0, [r1] -; CHECK-NEXT: vldrw.u32 q1, [r2] ; CHECK-NEXT: vpt.i32 ne, q1, zr ; CHECK-NEXT: vstrwt.32 q0, [r0] ; CHECK-NEXT: bx lr @@ -1602,8 +1602,8 @@ entry: define i8* @strw32_508(i8* %y, i8* %x, <4 x i32> *%m) { ; CHECK-LABEL: strw32_508: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrw.u32 q0, [r1] ; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vldrw.u32 q0, [r1] ; CHECK-NEXT: vpt.i32 ne, q1, zr ; CHECK-NEXT: vstrwt.32 q0, [r0, #508]! ; CHECK-NEXT: bx lr @@ -1621,9 +1621,9 @@ entry: define i8* @strw32_512(i8* %y, i8* %x, <4 x i32> *%m) { ; CHECK-LABEL: strw32_512: ; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q1, [r2] ; CHECK-NEXT: add.w r0, r0, #512 ; CHECK-NEXT: vldrw.u32 q0, [r1] -; CHECK-NEXT: vldrw.u32 q1, [r2] ; CHECK-NEXT: vpt.i32 ne, q1, zr ; CHECK-NEXT: vstrwt.32 q0, [r0] ; CHECK-NEXT: bx lr @@ -1641,8 +1641,8 @@ entry: define i8* @strw32_m508(i8* %y, i8* %x, <4 x i32> *%m) { ; CHECK-LABEL: strw32_m508: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrw.u32 q0, [r1] ; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vldrw.u32 q0, [r1] ; CHECK-NEXT: vpt.i32 ne, q1, zr ; CHECK-NEXT: vstrwt.32 q0, [r0, #-508]! ; CHECK-NEXT: bx lr @@ -1660,9 +1660,9 @@ entry: define i8* @strw32_m512(i8* %y, i8* %x, <4 x i32> *%m) { ; CHECK-LABEL: strw32_m512: ; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q1, [r2] ; CHECK-NEXT: sub.w r0, r0, #512 ; CHECK-NEXT: vldrw.u32 q0, [r1] -; CHECK-NEXT: vldrw.u32 q1, [r2] ; CHECK-NEXT: vpt.i32 ne, q1, zr ; CHECK-NEXT: vstrwt.32 q0, [r0] ; CHECK-NEXT: bx lr @@ -1680,8 +1680,8 @@ entry: define i8* @strh32_4(i8* %y, i8* %x, <4 x i32> *%m) { ; CHECK-LABEL: strh32_4: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrh.u32 q0, [r1] ; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vldrh.u32 q0, [r1] ; CHECK-NEXT: vpt.i32 ne, q1, zr ; CHECK-NEXT: vstrht.32 q0, [r0, #4]! ; CHECK-NEXT: bx lr @@ -1699,9 +1699,9 @@ entry: define i8* @strh32_3(i8* %y, i8* %x, <4 x i32> *%m) { ; CHECK-LABEL: strh32_3: ; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q1, [r2] ; CHECK-NEXT: adds r0, #3 ; CHECK-NEXT: vldrh.u32 q0, [r1] -; CHECK-NEXT: vldrw.u32 q1, [r2] ; CHECK-NEXT: vpt.i32 ne, q1, zr ; CHECK-NEXT: vstrht.32 q0, [r0] ; CHECK-NEXT: bx lr @@ -1719,8 +1719,8 @@ entry: define i8* @strh32_2(i8* %y, i8* %x, <4 x i32> *%m) { ; CHECK-LABEL: strh32_2: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrh.u32 q0, [r1] ; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vldrh.u32 q0, [r1] ; CHECK-NEXT: vpt.i32 ne, q1, zr ; CHECK-NEXT: vstrht.32 q0, [r0, #2]! ; CHECK-NEXT: bx lr @@ -1738,8 +1738,8 @@ entry: define i8* @strh32_254(i8* %y, i8* %x, <4 x i32> *%m) { ; CHECK-LABEL: strh32_254: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrh.u32 q0, [r1] ; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vldrh.u32 q0, [r1] ; CHECK-NEXT: vpt.i32 ne, q1, zr ; CHECK-NEXT: vstrht.32 q0, [r0, #254]! ; CHECK-NEXT: bx lr @@ -1757,9 +1757,9 @@ entry: define i8* @strh32_256(i8* %y, i8* %x, <4 x i32> *%m) { ; CHECK-LABEL: strh32_256: ; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q1, [r2] ; CHECK-NEXT: add.w r0, r0, #256 ; CHECK-NEXT: vldrh.u32 q0, [r1] -; CHECK-NEXT: vldrw.u32 q1, [r2] ; CHECK-NEXT: vpt.i32 ne, q1, zr ; CHECK-NEXT: vstrht.32 q0, [r0] ; CHECK-NEXT: bx lr @@ -1777,8 +1777,8 @@ entry: define i8* @strh32_m254(i8* %y, i8* %x, <4 x i32> *%m) { ; CHECK-LABEL: strh32_m254: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrh.u32 q0, [r1] ; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vldrh.u32 q0, [r1] ; CHECK-NEXT: vpt.i32 ne, q1, zr ; CHECK-NEXT: vstrht.32 q0, [r0, #-254]! ; CHECK-NEXT: bx lr @@ -1796,9 +1796,9 @@ entry: define i8* @strh32_m256(i8* %y, i8* %x, <4 x i32> *%m) { ; CHECK-LABEL: strh32_m256: ; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q1, [r2] ; CHECK-NEXT: sub.w r0, r0, #256 ; CHECK-NEXT: vldrh.u32 q0, [r1] -; CHECK-NEXT: vldrw.u32 q1, [r2] ; CHECK-NEXT: vpt.i32 ne, q1, zr ; CHECK-NEXT: vstrht.32 q0, [r0] ; CHECK-NEXT: bx lr @@ -1816,8 +1816,8 @@ entry: define i8* @strh16_4(i8* %y, i8* %x, <8 x i16> *%m) { ; CHECK-LABEL: strh16_4: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrh.u16 q0, [r1] ; CHECK-NEXT: vldrh.u16 q1, [r2] +; CHECK-NEXT: vldrh.u16 q0, [r1] ; CHECK-NEXT: vpt.i16 ne, q1, zr ; CHECK-NEXT: vstrht.16 q0, [r0, #4]! ; CHECK-NEXT: bx lr @@ -1835,9 +1835,9 @@ entry: define i8* @strh16_3(i8* %y, i8* %x, <8 x i16> *%m) { ; CHECK-LABEL: strh16_3: ; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q1, [r2] ; CHECK-NEXT: adds r0, #3 ; CHECK-NEXT: vldrh.u16 q0, [r1] -; CHECK-NEXT: vldrh.u16 q1, [r2] ; CHECK-NEXT: vpt.i16 ne, q1, zr ; CHECK-NEXT: vstrht.16 q0, [r0] ; CHECK-NEXT: bx lr @@ -1855,8 +1855,8 @@ entry: define i8* @strh16_2(i8* %y, i8* %x, <8 x i16> *%m) { ; CHECK-LABEL: strh16_2: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrh.u16 q0, [r1] ; CHECK-NEXT: vldrh.u16 q1, [r2] +; CHECK-NEXT: vldrh.u16 q0, [r1] ; CHECK-NEXT: vpt.i16 ne, q1, zr ; CHECK-NEXT: vstrht.16 q0, [r0, #2]! ; CHECK-NEXT: bx lr @@ -1874,8 +1874,8 @@ entry: define i8* @strh16_254(i8* %y, i8* %x, <8 x i16> *%m) { ; CHECK-LABEL: strh16_254: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrh.u16 q0, [r1] ; CHECK-NEXT: vldrh.u16 q1, [r2] +; CHECK-NEXT: vldrh.u16 q0, [r1] ; CHECK-NEXT: vpt.i16 ne, q1, zr ; CHECK-NEXT: vstrht.16 q0, [r0, #254]! ; CHECK-NEXT: bx lr @@ -1893,9 +1893,9 @@ entry: define i8* @strh16_256(i8* %y, i8* %x, <8 x i16> *%m) { ; CHECK-LABEL: strh16_256: ; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q1, [r2] ; CHECK-NEXT: add.w r0, r0, #256 ; CHECK-NEXT: vldrh.u16 q0, [r1] -; CHECK-NEXT: vldrh.u16 q1, [r2] ; CHECK-NEXT: vpt.i16 ne, q1, zr ; CHECK-NEXT: vstrht.16 q0, [r0] ; CHECK-NEXT: bx lr @@ -1913,8 +1913,8 @@ entry: define i8* @strh16_m254(i8* %y, i8* %x, <8 x i16> *%m) { ; CHECK-LABEL: strh16_m254: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrh.u16 q0, [r1] ; CHECK-NEXT: vldrh.u16 q1, [r2] +; CHECK-NEXT: vldrh.u16 q0, [r1] ; CHECK-NEXT: vpt.i16 ne, q1, zr ; CHECK-NEXT: vstrht.16 q0, [r0, #-254]! ; CHECK-NEXT: bx lr @@ -1932,9 +1932,9 @@ entry: define i8* @strh16_m256(i8* %y, i8* %x, <8 x i16> *%m) { ; CHECK-LABEL: strh16_m256: ; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q1, [r2] ; CHECK-NEXT: sub.w r0, r0, #256 ; CHECK-NEXT: vldrh.u16 q0, [r1] -; CHECK-NEXT: vldrh.u16 q1, [r2] ; CHECK-NEXT: vpt.i16 ne, q1, zr ; CHECK-NEXT: vstrht.16 q0, [r0] ; CHECK-NEXT: bx lr @@ -1952,8 +1952,8 @@ entry: define i8* @strb32_4(i8* %y, i8* %x, <4 x i32> *%m) { ; CHECK-LABEL: strb32_4: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrb.u32 q0, [r1] ; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vldrb.u32 q0, [r1] ; CHECK-NEXT: vpt.i32 ne, q1, zr ; CHECK-NEXT: vstrbt.32 q0, [r0, #4]! ; CHECK-NEXT: bx lr @@ -1971,8 +1971,8 @@ entry: define i8* @strb32_3(i8* %y, i8* %x, <4 x i32> *%m) { ; CHECK-LABEL: strb32_3: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrb.u32 q0, [r1] ; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vldrb.u32 q0, [r1] ; CHECK-NEXT: vpt.i32 ne, q1, zr ; CHECK-NEXT: vstrbt.32 q0, [r0, #3]! ; CHECK-NEXT: bx lr @@ -1990,8 +1990,8 @@ entry: define i8* @strb32_2(i8* %y, i8* %x, <4 x i32> *%m) { ; CHECK-LABEL: strb32_2: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrb.u32 q0, [r1] ; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vldrb.u32 q0, [r1] ; CHECK-NEXT: vpt.i32 ne, q1, zr ; CHECK-NEXT: vstrbt.32 q0, [r0, #2]! ; CHECK-NEXT: bx lr @@ -2009,8 +2009,8 @@ entry: define i8* @strb32_127(i8* %y, i8* %x, <4 x i32> *%m) { ; CHECK-LABEL: strb32_127: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrb.u32 q0, [r1] ; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vldrb.u32 q0, [r1] ; CHECK-NEXT: vpt.i32 ne, q1, zr ; CHECK-NEXT: vstrbt.32 q0, [r0, #127]! ; CHECK-NEXT: bx lr @@ -2028,9 +2028,9 @@ entry: define i8* @strb32_128(i8* %y, i8* %x, <4 x i32> *%m) { ; CHECK-LABEL: strb32_128: ; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q1, [r2] ; CHECK-NEXT: adds r0, #128 ; CHECK-NEXT: vldrb.u32 q0, [r1] -; CHECK-NEXT: vldrw.u32 q1, [r2] ; CHECK-NEXT: vpt.i32 ne, q1, zr ; CHECK-NEXT: vstrbt.32 q0, [r0] ; CHECK-NEXT: bx lr @@ -2048,8 +2048,8 @@ entry: define i8* @strb32_m127(i8* %y, i8* %x, <4 x i32> *%m) { ; CHECK-LABEL: strb32_m127: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrb.u32 q0, [r1] ; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vldrb.u32 q0, [r1] ; CHECK-NEXT: vpt.i32 ne, q1, zr ; CHECK-NEXT: vstrbt.32 q0, [r0, #-127]! ; CHECK-NEXT: bx lr @@ -2067,9 +2067,9 @@ entry: define i8* @strb32_m128(i8* %y, i8* %x, <4 x i32> *%m) { ; CHECK-LABEL: strb32_m128: ; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q1, [r2] ; CHECK-NEXT: subs r0, #128 ; CHECK-NEXT: vldrb.u32 q0, [r1] -; CHECK-NEXT: vldrw.u32 q1, [r2] ; CHECK-NEXT: vpt.i32 ne, q1, zr ; CHECK-NEXT: vstrbt.32 q0, [r0] ; CHECK-NEXT: bx lr @@ -2087,8 +2087,8 @@ entry: define i8* @strb16_4(i8* %y, i8* %x, <8 x i16> *%m) { ; CHECK-LABEL: strb16_4: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrb.u16 q0, [r1] ; CHECK-NEXT: vldrh.u16 q1, [r2] +; CHECK-NEXT: vldrb.u16 q0, [r1] ; CHECK-NEXT: vpt.i16 ne, q1, zr ; CHECK-NEXT: vstrbt.16 q0, [r0, #4]! ; CHECK-NEXT: bx lr @@ -2106,8 +2106,8 @@ entry: define i8* @strb16_3(i8* %y, i8* %x, <8 x i16> *%m) { ; CHECK-LABEL: strb16_3: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrb.u16 q0, [r1] ; CHECK-NEXT: vldrh.u16 q1, [r2] +; CHECK-NEXT: vldrb.u16 q0, [r1] ; CHECK-NEXT: vpt.i16 ne, q1, zr ; CHECK-NEXT: vstrbt.16 q0, [r0, #3]! ; CHECK-NEXT: bx lr @@ -2125,8 +2125,8 @@ entry: define i8* @strb16_2(i8* %y, i8* %x, <8 x i16> *%m) { ; CHECK-LABEL: strb16_2: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrb.u16 q0, [r1] ; CHECK-NEXT: vldrh.u16 q1, [r2] +; CHECK-NEXT: vldrb.u16 q0, [r1] ; CHECK-NEXT: vpt.i16 ne, q1, zr ; CHECK-NEXT: vstrbt.16 q0, [r0, #2]! ; CHECK-NEXT: bx lr @@ -2144,8 +2144,8 @@ entry: define i8* @strb16_127(i8* %y, i8* %x, <8 x i16> *%m) { ; CHECK-LABEL: strb16_127: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrb.u16 q0, [r1] ; CHECK-NEXT: vldrh.u16 q1, [r2] +; CHECK-NEXT: vldrb.u16 q0, [r1] ; CHECK-NEXT: vpt.i16 ne, q1, zr ; CHECK-NEXT: vstrbt.16 q0, [r0, #127]! ; CHECK-NEXT: bx lr @@ -2163,9 +2163,9 @@ entry: define i8* @strb16_128(i8* %y, i8* %x, <8 x i16> *%m) { ; CHECK-LABEL: strb16_128: ; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q1, [r2] ; CHECK-NEXT: adds r0, #128 ; CHECK-NEXT: vldrb.u16 q0, [r1] -; CHECK-NEXT: vldrh.u16 q1, [r2] ; CHECK-NEXT: vpt.i16 ne, q1, zr ; CHECK-NEXT: vstrbt.16 q0, [r0] ; CHECK-NEXT: bx lr @@ -2183,8 +2183,8 @@ entry: define i8* @strb16_m127(i8* %y, i8* %x, <8 x i16> *%m) { ; CHECK-LABEL: strb16_m127: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrb.u16 q0, [r1] ; CHECK-NEXT: vldrh.u16 q1, [r2] +; CHECK-NEXT: vldrb.u16 q0, [r1] ; CHECK-NEXT: vpt.i16 ne, q1, zr ; CHECK-NEXT: vstrbt.16 q0, [r0, #-127]! ; CHECK-NEXT: bx lr @@ -2202,9 +2202,9 @@ entry: define i8* @strb16_m128(i8* %y, i8* %x, <8 x i16> *%m) { ; CHECK-LABEL: strb16_m128: ; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q1, [r2] ; CHECK-NEXT: subs r0, #128 ; CHECK-NEXT: vldrb.u16 q0, [r1] -; CHECK-NEXT: vldrh.u16 q1, [r2] ; CHECK-NEXT: vpt.i16 ne, q1, zr ; CHECK-NEXT: vstrbt.16 q0, [r0] ; CHECK-NEXT: bx lr @@ -2222,8 +2222,8 @@ entry: define i8* @strb8_4(i8* %y, i8* %x, <16 x i8> *%m) { ; CHECK-LABEL: strb8_4: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrb.u8 q0, [r1] ; CHECK-NEXT: vldrb.u8 q1, [r2] +; CHECK-NEXT: vldrb.u8 q0, [r1] ; CHECK-NEXT: vpt.i8 ne, q1, zr ; CHECK-NEXT: vstrbt.8 q0, [r0, #4]! ; CHECK-NEXT: bx lr @@ -2241,8 +2241,8 @@ entry: define i8* @strb8_3(i8* %y, i8* %x, <16 x i8> *%m) { ; CHECK-LABEL: strb8_3: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrb.u8 q0, [r1] ; CHECK-NEXT: vldrb.u8 q1, [r2] +; CHECK-NEXT: vldrb.u8 q0, [r1] ; CHECK-NEXT: vpt.i8 ne, q1, zr ; CHECK-NEXT: vstrbt.8 q0, [r0, #3]! ; CHECK-NEXT: bx lr @@ -2260,8 +2260,8 @@ entry: define i8* @strb8_2(i8* %y, i8* %x, <16 x i8> *%m) { ; CHECK-LABEL: strb8_2: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrb.u8 q0, [r1] ; CHECK-NEXT: vldrb.u8 q1, [r2] +; CHECK-NEXT: vldrb.u8 q0, [r1] ; CHECK-NEXT: vpt.i8 ne, q1, zr ; CHECK-NEXT: vstrbt.8 q0, [r0, #2]! ; CHECK-NEXT: bx lr @@ -2279,8 +2279,8 @@ entry: define i8* @strb8_127(i8* %y, i8* %x, <16 x i8> *%m) { ; CHECK-LABEL: strb8_127: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrb.u8 q0, [r1] ; CHECK-NEXT: vldrb.u8 q1, [r2] +; CHECK-NEXT: vldrb.u8 q0, [r1] ; CHECK-NEXT: vpt.i8 ne, q1, zr ; CHECK-NEXT: vstrbt.8 q0, [r0, #127]! ; CHECK-NEXT: bx lr @@ -2298,9 +2298,9 @@ entry: define i8* @strb8_128(i8* %y, i8* %x, <16 x i8> *%m) { ; CHECK-LABEL: strb8_128: ; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrb.u8 q1, [r2] ; CHECK-NEXT: adds r0, #128 ; CHECK-NEXT: vldrb.u8 q0, [r1] -; CHECK-NEXT: vldrb.u8 q1, [r2] ; CHECK-NEXT: vpt.i8 ne, q1, zr ; CHECK-NEXT: vstrbt.8 q0, [r0] ; CHECK-NEXT: bx lr @@ -2318,8 +2318,8 @@ entry: define i8* @strb8_m127(i8* %y, i8* %x, <16 x i8> *%m) { ; CHECK-LABEL: strb8_m127: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrb.u8 q0, [r1] ; CHECK-NEXT: vldrb.u8 q1, [r2] +; CHECK-NEXT: vldrb.u8 q0, [r1] ; CHECK-NEXT: vpt.i8 ne, q1, zr ; CHECK-NEXT: vstrbt.8 q0, [r0, #-127]! ; CHECK-NEXT: bx lr @@ -2337,9 +2337,9 @@ entry: define i8* @strb8_m128(i8* %y, i8* %x, <16 x i8> *%m) { ; CHECK-LABEL: strb8_m128: ; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrb.u8 q1, [r2] ; CHECK-NEXT: subs r0, #128 ; CHECK-NEXT: vldrb.u8 q0, [r1] -; CHECK-NEXT: vldrb.u8 q1, [r2] ; CHECK-NEXT: vpt.i8 ne, q1, zr ; CHECK-NEXT: vstrbt.8 q0, [r0] ; CHECK-NEXT: bx lr @@ -2357,8 +2357,8 @@ entry: define i8* @strwf32_4(i8* %y, i8* %x, <4 x i32> *%m) { ; CHECK-LABEL: strwf32_4: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrw.u32 q0, [r1] ; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vldrw.u32 q0, [r1] ; CHECK-NEXT: vpt.i32 ne, q1, zr ; CHECK-NEXT: vstrwt.32 q0, [r0, #4]! ; CHECK-NEXT: bx lr @@ -2376,9 +2376,9 @@ entry: define i8* @strwf32_3(i8* %y, i8* %x, <4 x i32> *%m) { ; CHECK-LABEL: strwf32_3: ; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q1, [r2] ; CHECK-NEXT: adds r0, #3 ; CHECK-NEXT: vldrw.u32 q0, [r1] -; CHECK-NEXT: vldrw.u32 q1, [r2] ; CHECK-NEXT: vpt.i32 ne, q1, zr ; CHECK-NEXT: vstrwt.32 q0, [r0] ; CHECK-NEXT: bx lr @@ -2396,9 +2396,9 @@ entry: define i8* @strwf32_2(i8* %y, i8* %x, <4 x i32> *%m) { ; CHECK-LABEL: strwf32_2: ; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q1, [r2] ; CHECK-NEXT: adds r0, #2 ; CHECK-NEXT: vldrw.u32 q0, [r1] -; CHECK-NEXT: vldrw.u32 q1, [r2] ; CHECK-NEXT: vpt.i32 ne, q1, zr ; CHECK-NEXT: vstrwt.32 q0, [r0] ; CHECK-NEXT: bx lr @@ -2416,8 +2416,8 @@ entry: define i8* @strwf32_508(i8* %y, i8* %x, <4 x i32> *%m) { ; CHECK-LABEL: strwf32_508: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrw.u32 q0, [r1] ; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vldrw.u32 q0, [r1] ; CHECK-NEXT: vpt.i32 ne, q1, zr ; CHECK-NEXT: vstrwt.32 q0, [r0, #508]! ; CHECK-NEXT: bx lr @@ -2435,9 +2435,9 @@ entry: define i8* @strwf32_512(i8* %y, i8* %x, <4 x i32> *%m) { ; CHECK-LABEL: strwf32_512: ; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q1, [r2] ; CHECK-NEXT: add.w r0, r0, #512 ; CHECK-NEXT: vldrw.u32 q0, [r1] -; CHECK-NEXT: vldrw.u32 q1, [r2] ; CHECK-NEXT: vpt.i32 ne, q1, zr ; CHECK-NEXT: vstrwt.32 q0, [r0] ; CHECK-NEXT: bx lr @@ -2455,8 +2455,8 @@ entry: define i8* @strwf32_m508(i8* %y, i8* %x, <4 x i32> *%m) { ; CHECK-LABEL: strwf32_m508: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrw.u32 q0, [r1] ; CHECK-NEXT: vldrw.u32 q1, [r2] +; CHECK-NEXT: vldrw.u32 q0, [r1] ; CHECK-NEXT: vpt.i32 ne, q1, zr ; CHECK-NEXT: vstrwt.32 q0, [r0, #-508]! ; CHECK-NEXT: bx lr @@ -2474,9 +2474,9 @@ entry: define i8* @strwf32_m512(i8* %y, i8* %x, <4 x i32> *%m) { ; CHECK-LABEL: strwf32_m512: ; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrw.u32 q1, [r2] ; CHECK-NEXT: sub.w r0, r0, #512 ; CHECK-NEXT: vldrw.u32 q0, [r1] -; CHECK-NEXT: vldrw.u32 q1, [r2] ; CHECK-NEXT: vpt.i32 ne, q1, zr ; CHECK-NEXT: vstrwt.32 q0, [r0] ; CHECK-NEXT: bx lr @@ -2494,8 +2494,8 @@ entry: define i8* @strhf16_4(i8* %y, i8* %x, <8 x i16> *%m) { ; CHECK-LABEL: strhf16_4: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrh.u16 q0, [r1] ; CHECK-NEXT: vldrh.u16 q1, [r2] +; CHECK-NEXT: vldrh.u16 q0, [r1] ; CHECK-NEXT: vpt.i16 ne, q1, zr ; CHECK-NEXT: vstrht.16 q0, [r0, #4]! ; CHECK-NEXT: bx lr @@ -2513,9 +2513,9 @@ entry: define i8* @strhf16_3(i8* %y, i8* %x, <8 x i16> *%m) { ; CHECK-LABEL: strhf16_3: ; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q1, [r2] ; CHECK-NEXT: adds r0, #3 ; CHECK-NEXT: vldrh.u16 q0, [r1] -; CHECK-NEXT: vldrh.u16 q1, [r2] ; CHECK-NEXT: vpt.i16 ne, q1, zr ; CHECK-NEXT: vstrht.16 q0, [r0] ; CHECK-NEXT: bx lr @@ -2533,8 +2533,8 @@ entry: define i8* @strhf16_2(i8* %y, i8* %x, <8 x i16> *%m) { ; CHECK-LABEL: strhf16_2: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrh.u16 q0, [r1] ; CHECK-NEXT: vldrh.u16 q1, [r2] +; CHECK-NEXT: vldrh.u16 q0, [r1] ; CHECK-NEXT: vpt.i16 ne, q1, zr ; CHECK-NEXT: vstrht.16 q0, [r0, #2]! ; CHECK-NEXT: bx lr @@ -2552,8 +2552,8 @@ entry: define i8* @strhf16_254(i8* %y, i8* %x, <8 x i16> *%m) { ; CHECK-LABEL: strhf16_254: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrh.u16 q0, [r1] ; CHECK-NEXT: vldrh.u16 q1, [r2] +; CHECK-NEXT: vldrh.u16 q0, [r1] ; CHECK-NEXT: vpt.i16 ne, q1, zr ; CHECK-NEXT: vstrht.16 q0, [r0, #254]! ; CHECK-NEXT: bx lr @@ -2571,9 +2571,9 @@ entry: define i8* @strhf16_256(i8* %y, i8* %x, <8 x i16> *%m) { ; CHECK-LABEL: strhf16_256: ; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q1, [r2] ; CHECK-NEXT: add.w r0, r0, #256 ; CHECK-NEXT: vldrh.u16 q0, [r1] -; CHECK-NEXT: vldrh.u16 q1, [r2] ; CHECK-NEXT: vpt.i16 ne, q1, zr ; CHECK-NEXT: vstrht.16 q0, [r0] ; CHECK-NEXT: bx lr @@ -2591,8 +2591,8 @@ entry: define i8* @strhf16_m254(i8* %y, i8* %x, <8 x i16> *%m) { ; CHECK-LABEL: strhf16_m254: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vldrh.u16 q0, [r1] ; CHECK-NEXT: vldrh.u16 q1, [r2] +; CHECK-NEXT: vldrh.u16 q0, [r1] ; CHECK-NEXT: vpt.i16 ne, q1, zr ; CHECK-NEXT: vstrht.16 q0, [r0, #-254]! ; CHECK-NEXT: bx lr @@ -2610,9 +2610,9 @@ entry: define i8* @strhf16_m256(i8* %y, i8* %x, <8 x i16> *%m) { ; CHECK-LABEL: strhf16_m256: ; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldrh.u16 q1, [r2] ; CHECK-NEXT: sub.w r0, r0, #256 ; CHECK-NEXT: vldrh.u16 q0, [r1] -; CHECK-NEXT: vldrh.u16 q1, [r2] ; CHECK-NEXT: vpt.i16 ne, q1, zr ; CHECK-NEXT: vstrht.16 q0, [r0] ; CHECK-NEXT: bx lr diff --git a/llvm/test/CodeGen/Thumb2/mve-postinc-dct.ll b/llvm/test/CodeGen/Thumb2/mve-postinc-dct.ll index d4ad24933070f..230b031d046eb 100644 --- a/llvm/test/CodeGen/Thumb2/mve-postinc-dct.ll +++ b/llvm/test/CodeGen/Thumb2/mve-postinc-dct.ll @@ -648,14 +648,14 @@ define void @DCT_mve5(%struct.DCT_InstanceTypeDef* nocapture readonly %S, float* ; CHECK-NEXT: add.w r9, r3, r5 ; CHECK-NEXT: vldrw.u32 q5, [r4], #16 ; CHECK-NEXT: vldrw.u32 q6, [r3], #16 -; CHECK-NEXT: vfma.f32 q3, q6, q5 ; CHECK-NEXT: add.w r12, r9, r5 +; CHECK-NEXT: vfma.f32 q3, q6, q5 ; CHECK-NEXT: vldrw.u32 q6, [r9] -; CHECK-NEXT: vfma.f32 q4, q6, q5 ; CHECK-NEXT: add.w r6, r12, r5 +; CHECK-NEXT: vfma.f32 q4, q6, q5 ; CHECK-NEXT: vldrw.u32 q6, [r12] -; CHECK-NEXT: vfma.f32 q2, q6, q5 ; CHECK-NEXT: adds r7, r6, r5 +; CHECK-NEXT: vfma.f32 q2, q6, q5 ; CHECK-NEXT: vldrw.u32 q6, [r6] ; CHECK-NEXT: vfma.f32 q0, q6, q5 ; CHECK-NEXT: vldrw.u32 q6, [r7] @@ -866,17 +866,17 @@ define void @DCT_mve6(%struct.DCT_InstanceTypeDef* nocapture readonly %S, float* ; CHECK-NEXT: add.w r12, r3, r5 ; CHECK-NEXT: vldrw.u32 q6, [r1], #16 ; CHECK-NEXT: vldrw.u32 q7, [r3], #16 -; CHECK-NEXT: vfma.f32 q4, q7, q6 ; CHECK-NEXT: add.w r10, r12, r5 +; CHECK-NEXT: vfma.f32 q4, q7, q6 ; CHECK-NEXT: vldrw.u32 q7, [r12] -; CHECK-NEXT: vfma.f32 q5, q7, q6 ; CHECK-NEXT: add.w r6, r10, r5 +; CHECK-NEXT: vfma.f32 q5, q7, q6 ; CHECK-NEXT: vldrw.u32 q7, [r10] -; CHECK-NEXT: vfma.f32 q2, q7, q6 ; CHECK-NEXT: adds r7, r6, r5 +; CHECK-NEXT: vfma.f32 q2, q7, q6 ; CHECK-NEXT: vldrw.u32 q7, [r6] -; CHECK-NEXT: vfma.f32 q0, q7, q6 ; CHECK-NEXT: adds r6, r7, r5 +; CHECK-NEXT: vfma.f32 q0, q7, q6 ; CHECK-NEXT: vldrw.u32 q7, [r7] ; CHECK-NEXT: vfma.f32 q3, q7, q6 ; CHECK-NEXT: vldrw.u32 q7, [r6] @@ -1107,47 +1107,49 @@ define void @DCT_mve7(%struct.DCT_InstanceTypeDef* nocapture readonly %S, float* ; CHECK-NEXT: .LBB6_3: @ %vector.body ; CHECK-NEXT: @ Parent Loop BB6_2 Depth=1 ; CHECK-NEXT: @ => This Inner Loop Header: Depth=2 -; CHECK-NEXT: add.w r10, r3, r5 ; CHECK-NEXT: vctp.32 r12 -; CHECK-NEXT: vpsttt +; CHECK-NEXT: add.w r10, r3, r5 +; CHECK-NEXT: vpstt ; CHECK-NEXT: vldrwt.u32 q7, [r1], #16 ; CHECK-NEXT: vldrwt.u32 q0, [r3], #16 -; CHECK-NEXT: vfmat.f32 q5, q0, q7 ; CHECK-NEXT: add.w r11, r10, r5 +; CHECK-NEXT: sub.w r12, r12, #4 ; CHECK-NEXT: vpstt +; CHECK-NEXT: vfmat.f32 q5, q0, q7 ; CHECK-NEXT: vldrwt.u32 q0, [r10] -; CHECK-NEXT: vfmat.f32 q6, q0, q7 -; CHECK-NEXT: vstrw.32 q6, [sp, #40] @ 16-byte Spill +; CHECK-NEXT: add.w r6, r11, r5 ; CHECK-NEXT: vpstt +; CHECK-NEXT: vfmat.f32 q6, q0, q7 ; CHECK-NEXT: vldrwt.u32 q0, [r11] -; CHECK-NEXT: vfmat.f32 q1, q0, q7 -; CHECK-NEXT: add.w r6, r11, r5 +; CHECK-NEXT: vstrw.32 q6, [sp, #40] @ 16-byte Spill ; CHECK-NEXT: vmov q6, q5 +; CHECK-NEXT: vpst +; CHECK-NEXT: vfmat.f32 q1, q0, q7 ; CHECK-NEXT: vmov q5, q4 ; CHECK-NEXT: vmov q4, q3 +; CHECK-NEXT: vmov q3, q1 ; CHECK-NEXT: vpst ; CHECK-NEXT: vldrwt.u32 q0, [r6] -; CHECK-NEXT: vmov q3, q1 ; CHECK-NEXT: vldrw.u32 q1, [sp, #56] @ 16-byte Reload -; CHECK-NEXT: vpst -; CHECK-NEXT: vfmat.f32 q1, q0, q7 ; CHECK-NEXT: adds r7, r6, r5 +; CHECK-NEXT: vpstt +; CHECK-NEXT: vfmat.f32 q1, q0, q7 +; CHECK-NEXT: vldrwt.u32 q0, [r7] +; CHECK-NEXT: adds r6, r7, r5 ; CHECK-NEXT: vstrw.32 q1, [sp, #56] @ 16-byte Spill ; CHECK-NEXT: vmov q1, q3 ; CHECK-NEXT: vmov q3, q4 -; CHECK-NEXT: vmov q4, q5 -; CHECK-NEXT: vmov q5, q6 -; CHECK-NEXT: vldrw.u32 q6, [sp, #40] @ 16-byte Reload -; CHECK-NEXT: sub.w r12, r12, #4 -; CHECK-NEXT: adds r6, r7, r5 ; CHECK-NEXT: vpstt -; CHECK-NEXT: vldrwt.u32 q0, [r7] ; CHECK-NEXT: vfmat.f32 q3, q0, q7 -; CHECK-NEXT: adds r7, r6, r5 -; CHECK-NEXT: vpstttt ; CHECK-NEXT: vldrwt.u32 q0, [r6] +; CHECK-NEXT: vmov q4, q5 +; CHECK-NEXT: adds r7, r6, r5 +; CHECK-NEXT: vpstt ; CHECK-NEXT: vfmat.f32 q4, q0, q7 ; CHECK-NEXT: vldrwt.u32 q0, [r7] +; CHECK-NEXT: vmov q5, q6 +; CHECK-NEXT: vldrw.u32 q6, [sp, #40] @ 16-byte Reload +; CHECK-NEXT: vpst ; CHECK-NEXT: vfmat.f32 q2, q0, q7 ; CHECK-NEXT: le lr, .LBB6_3 ; CHECK-NEXT: @ %bb.4: @ %middle.block @@ -1396,54 +1398,55 @@ define void @DCT_mve8(%struct.DCT_InstanceTypeDef* nocapture readonly %S, float* ; CHECK-NEXT: .LBB7_3: @ %vector.body ; CHECK-NEXT: @ Parent Loop BB7_2 Depth=1 ; CHECK-NEXT: @ => This Inner Loop Header: Depth=2 -; CHECK-NEXT: add.w r11, r3, r5 ; CHECK-NEXT: vctp.32 r10 -; CHECK-NEXT: vpsttt +; CHECK-NEXT: add.w r11, r3, r5 +; CHECK-NEXT: vpstt ; CHECK-NEXT: vldrwt.u32 q0, [r9], #16 ; CHECK-NEXT: vldrwt.u32 q1, [r3], #16 -; CHECK-NEXT: vfmat.f32 q6, q1, q0 -; CHECK-NEXT: vstrw.32 q6, [sp, #40] @ 16-byte Spill +; CHECK-NEXT: add.w r6, r11, r5 +; CHECK-NEXT: sub.w r10, r10, #4 ; CHECK-NEXT: vpstt +; CHECK-NEXT: vfmat.f32 q6, q1, q0 ; CHECK-NEXT: vldrwt.u32 q1, [r11] -; CHECK-NEXT: vfmat.f32 q7, q1, q0 -; CHECK-NEXT: add.w r6, r11, r5 +; CHECK-NEXT: vstrw.32 q6, [sp, #40] @ 16-byte Spill ; CHECK-NEXT: vmov q6, q5 +; CHECK-NEXT: vpst +; CHECK-NEXT: vfmat.f32 q7, q1, q0 ; CHECK-NEXT: vmov q5, q3 ; CHECK-NEXT: vmov q3, q4 +; CHECK-NEXT: vmov q4, q2 ; CHECK-NEXT: vpst ; CHECK-NEXT: vldrwt.u32 q1, [r6] -; CHECK-NEXT: vmov q4, q2 ; CHECK-NEXT: vldrw.u32 q2, [sp, #56] @ 16-byte Reload -; CHECK-NEXT: vpst -; CHECK-NEXT: vfmat.f32 q2, q1, q0 -; CHECK-NEXT: vstrw.32 q2, [sp, #56] @ 16-byte Spill ; CHECK-NEXT: adds r7, r6, r5 -; CHECK-NEXT: vpst +; CHECK-NEXT: vpstt +; CHECK-NEXT: vfmat.f32 q2, q1, q0 ; CHECK-NEXT: vldrwt.u32 q1, [r7] +; CHECK-NEXT: vstrw.32 q2, [sp, #56] @ 16-byte Spill ; CHECK-NEXT: vldrw.u32 q2, [sp, #72] @ 16-byte Reload ; CHECK-NEXT: adds r6, r7, r5 -; CHECK-NEXT: vpst +; CHECK-NEXT: vpstt ; CHECK-NEXT: vfmat.f32 q2, q1, q0 +; CHECK-NEXT: vldrwt.u32 q1, [r6] +; CHECK-NEXT: adds r7, r6, r5 ; CHECK-NEXT: vstrw.32 q2, [sp, #72] @ 16-byte Spill ; CHECK-NEXT: vmov q2, q4 ; CHECK-NEXT: vmov q4, q3 -; CHECK-NEXT: vmov q3, q5 -; CHECK-NEXT: vmov q5, q6 -; CHECK-NEXT: vldrw.u32 q6, [sp, #40] @ 16-byte Reload -; CHECK-NEXT: adds r7, r6, r5 ; CHECK-NEXT: vpstt -; CHECK-NEXT: vldrwt.u32 q1, [r6] ; CHECK-NEXT: vfmat.f32 q2, q1, q0 -; CHECK-NEXT: sub.w r10, r10, #4 -; CHECK-NEXT: adds r6, r7, r5 -; CHECK-NEXT: vpstttt ; CHECK-NEXT: vldrwt.u32 q1, [r7] +; CHECK-NEXT: adds r6, r7, r5 +; CHECK-NEXT: vmov q3, q5 +; CHECK-NEXT: vpstt ; CHECK-NEXT: vfmat.f32 q4, q1, q0 ; CHECK-NEXT: vldrwt.u32 q1, [r6] -; CHECK-NEXT: vfmat.f32 q5, q1, q0 +; CHECK-NEXT: vmov q5, q6 ; CHECK-NEXT: add r6, r5 ; CHECK-NEXT: vpstt +; CHECK-NEXT: vfmat.f32 q5, q1, q0 ; CHECK-NEXT: vldrwt.u32 q1, [r6] +; CHECK-NEXT: vldrw.u32 q6, [sp, #40] @ 16-byte Reload +; CHECK-NEXT: vpst ; CHECK-NEXT: vfmat.f32 q3, q1, q0 ; CHECK-NEXT: le lr, .LBB7_3 ; CHECK-NEXT: @ %bb.4: @ %middle.block diff --git a/llvm/test/CodeGen/Thumb2/mve-pred-build-const.ll b/llvm/test/CodeGen/Thumb2/mve-pred-build-const.ll index 86634db14b344..248edbf6c1558 100644 --- a/llvm/test/CodeGen/Thumb2/mve-pred-build-const.ll +++ b/llvm/test/CodeGen/Thumb2/mve-pred-build-const.ll @@ -158,13 +158,20 @@ define arm_aapcs_vfpcc <2 x i64> @build_upper_v2i1(<2 x i64> %a, <2 x i64> %b) { ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: adr r0, .LCPI14_0 ; CHECK-NEXT: vldrw.u32 q2, [r0] -; CHECK-NEXT: vbic q1, q1, q2 +; CHECK-NEXT: adr r0, .LCPI14_1 +; CHECK-NEXT: vand q1, q1, q2 +; CHECK-NEXT: vldrw.u32 q2, [r0] ; CHECK-NEXT: vand q0, q0, q2 ; CHECK-NEXT: vorr q0, q0, q1 ; CHECK-NEXT: bx lr ; CHECK-NEXT: .p2align 4 ; CHECK-NEXT: @ %bb.1: ; CHECK-NEXT: .LCPI14_0: +; CHECK-NEXT: .long 4294967295 @ 0xffffffff +; CHECK-NEXT: .long 4294967295 @ 0xffffffff +; CHECK-NEXT: .long 0 @ 0x0 +; CHECK-NEXT: .long 0 @ 0x0 +; CHECK-NEXT: .LCPI14_1: ; CHECK-NEXT: .long 0 @ 0x0 ; CHECK-NEXT: .long 0 @ 0x0 ; CHECK-NEXT: .long 4294967295 @ 0xffffffff @@ -179,13 +186,20 @@ define arm_aapcs_vfpcc <2 x i64> @build_lower_v2i1(<2 x i64> %a, <2 x i64> %b) { ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: adr r0, .LCPI15_0 ; CHECK-NEXT: vldrw.u32 q2, [r0] -; CHECK-NEXT: vbic q1, q1, q2 +; CHECK-NEXT: adr r0, .LCPI15_1 +; CHECK-NEXT: vand q1, q1, q2 +; CHECK-NEXT: vldrw.u32 q2, [r0] ; CHECK-NEXT: vand q0, q0, q2 ; CHECK-NEXT: vorr q0, q0, q1 ; CHECK-NEXT: bx lr ; CHECK-NEXT: .p2align 4 ; CHECK-NEXT: @ %bb.1: ; CHECK-NEXT: .LCPI15_0: +; CHECK-NEXT: .long 0 @ 0x0 +; CHECK-NEXT: .long 0 @ 0x0 +; CHECK-NEXT: .long 4294967295 @ 0xffffffff +; CHECK-NEXT: .long 4294967295 @ 0xffffffff +; CHECK-NEXT: .LCPI15_1: ; CHECK-NEXT: .long 4294967295 @ 0xffffffff ; CHECK-NEXT: .long 4294967295 @ 0xffffffff ; CHECK-NEXT: .long 0 @ 0x0 diff --git a/llvm/test/CodeGen/Thumb2/mve-pred-constfold.ll b/llvm/test/CodeGen/Thumb2/mve-pred-constfold.ll index 0259cd6770ad7..85760e1a5292e 100644 --- a/llvm/test/CodeGen/Thumb2/mve-pred-constfold.ll +++ b/llvm/test/CodeGen/Thumb2/mve-pred-constfold.ll @@ -13,8 +13,8 @@ define arm_aapcs_vfpcc void @reg(<8 x i16> %acc0, <8 x i16> %acc1, i32* nocaptur ; CHECK-NEXT: vaddve.s16 r2, q1 ; CHECK-NEXT: vaddvt.s16 r4, q0 ; CHECK-NEXT: vaddve.s16 r6, q0 -; CHECK-NEXT: strd r6, r4, [r0] ; CHECK-NEXT: strd r2, r12, [r0, #8] +; CHECK-NEXT: strd r6, r4, [r0] ; CHECK-NEXT: pop {r4, r6, r7, pc} entry: %0 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 13107) @@ -164,10 +164,10 @@ define arm_aapcs_vfpcc i32 @const_mask_not1(<4 x i32> %0, <4 x i32> %1, i32 %2) ; CHECK: @ %bb.0: ; CHECK-NEXT: movs r1, #1 ; CHECK-NEXT: vmsr p0, r1 +; CHECK-NEXT: movw r1, #65533 ; CHECK-NEXT: vpstt ; CHECK-NEXT: vaddvat.s32 r0, q0 ; CHECK-NEXT: vaddvat.s32 r0, q1 -; CHECK-NEXT: movw r1, #65533 ; CHECK-NEXT: vmsr p0, r1 ; CHECK-NEXT: vpstt ; CHECK-NEXT: vaddvat.s32 r0, q0 @@ -242,8 +242,8 @@ define arm_aapcs_vfpcc i32 @const_mask_abbreakab(<4 x i32> %0, <4 x i32> %1, i32 ; CHECK-NEXT: vpste ; CHECK-NEXT: vaddvat.s32 r0, q0 ; CHECK-NEXT: vaddvae.s32 r0, q1 -; CHECK-NEXT: vadd.i32 q1, q0, r0 ; CHECK-NEXT: vpnot +; CHECK-NEXT: vadd.i32 q1, q0, r0 ; CHECK-NEXT: vpste ; CHECK-NEXT: vaddvat.s32 r0, q1 ; CHECK-NEXT: vaddvae.s32 r0, q0 @@ -272,8 +272,8 @@ define arm_aapcs_vfpcc i32 @const_mask_break(<4 x i32> %0, <4 x i32> %1, i32 %2) ; CHECK-NEXT: vpstt ; CHECK-NEXT: vaddvat.s32 r0, q0 ; CHECK-NEXT: vaddvat.s32 r0, q1 -; CHECK-NEXT: vadd.i32 q1, q0, r0 ; CHECK-NEXT: vpnot +; CHECK-NEXT: vadd.i32 q1, q0, r0 ; CHECK-NEXT: vpstt ; CHECK-NEXT: vaddvat.s32 r0, q1 ; CHECK-NEXT: vaddvat.s32 r0, q0 @@ -299,14 +299,14 @@ define arm_aapcs_vfpcc i32 @const_mask_threepred(<4 x i32> %0, <4 x i32> %1, i32 ; CHECK: @ %bb.0: ; CHECK-NEXT: movw r1, #1234 ; CHECK-NEXT: vmsr p0, r1 +; CHECK-NEXT: movw r1, #64300 ; CHECK-NEXT: vpstt ; CHECK-NEXT: vaddvat.s32 r0, q0 ; CHECK-NEXT: vaddvat.s32 r0, q1 -; CHECK-NEXT: movw r1, #64300 ; CHECK-NEXT: vmsr p0, r1 +; CHECK-NEXT: movw r1, #64301 ; CHECK-NEXT: vpst ; CHECK-NEXT: vaddvat.s32 r0, q1 -; CHECK-NEXT: movw r1, #64301 ; CHECK-NEXT: vmsr p0, r1 ; CHECK-NEXT: vpstt ; CHECK-NEXT: vaddvat.s32 r0, q1 diff --git a/llvm/test/CodeGen/Thumb2/mve-pred-vctpvpsel.ll b/llvm/test/CodeGen/Thumb2/mve-pred-vctpvpsel.ll index 406d2d15a6b34..48a21237d9dea 100644 --- a/llvm/test/CodeGen/Thumb2/mve-pred-vctpvpsel.ll +++ b/llvm/test/CodeGen/Thumb2/mve-pred-vctpvpsel.ll @@ -20,8 +20,8 @@ define void @arm_min_helium_f32(float* %pSrc, i32 %blockSize, float* nocapture % ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldrw.u32 q4, [r0], #16 ; CHECK-NEXT: vptt.f32 ge, q1, q4 -; CHECK-NEXT: vmovt q1, q4 ; CHECK-NEXT: vmovt q0, q2 +; CHECK-NEXT: vmovt q1, q4 ; CHECK-NEXT: vadd.i32 q2, q2, q3 ; CHECK-NEXT: letp lr, .LBB0_1 ; CHECK-NEXT: @ %bb.2: @ %do.end diff --git a/llvm/test/CodeGen/Thumb2/mve-satmul-loops.ll b/llvm/test/CodeGen/Thumb2/mve-satmul-loops.ll index 3f7b0e6a437b1..50d9c62c1fa6b 100644 --- a/llvm/test/CodeGen/Thumb2/mve-satmul-loops.ll +++ b/llvm/test/CodeGen/Thumb2/mve-satmul-loops.ll @@ -520,9 +520,8 @@ define arm_aapcs_vfpcc void @ssatmul_4t_q31(i32* nocapture readonly %pSrcA, i32* ; CHECK-NEXT: vdup.32 q4, r9 ; CHECK-NEXT: add.w r9, r9, #4 ; CHECK-NEXT: vorr q4, q4, q0 -; CHECK-NEXT: vpt.u32 cs, q1, q4 +; CHECK-NEXT: vptt.u32 cs, q1, q4 ; CHECK-NEXT: vldrwt.u32 q4, [r0], #16 -; CHECK-NEXT: vpst ; CHECK-NEXT: vldrwt.u32 q5, [r1], #16 ; CHECK-NEXT: vmov.f32 s24, s18 ; CHECK-NEXT: vmov.f32 s26, s19 diff --git a/llvm/test/CodeGen/Thumb2/mve-scatter-increment.ll b/llvm/test/CodeGen/Thumb2/mve-scatter-increment.ll index 5bdf3b929bb3e..9443164ea8001 100644 --- a/llvm/test/CodeGen/Thumb2/mve-scatter-increment.ll +++ b/llvm/test/CodeGen/Thumb2/mve-scatter-increment.ll @@ -225,6 +225,175 @@ for.cond.cleanup: ; preds = %for.body, %middle.b ret void } +define void @shl(i32* nocapture readonly %x, i32* noalias nocapture %y, i32 %n) { +; CHECK-LABEL: shl: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: .save {r7, lr} +; CHECK-NEXT: push {r7, lr} +; CHECK-NEXT: cmp r2, #1 +; CHECK-NEXT: it lt +; CHECK-NEXT: poplt {r7, pc} +; CHECK-NEXT: .LBB4_1: @ %vector.ph +; CHECK-NEXT: adr r3, .LCPI4_0 +; CHECK-NEXT: vldrw.u32 q0, [r3] +; CHECK-NEXT: vadd.i32 q0, q0, r1 +; CHECK-NEXT: dlstp.32 lr, r2 +; CHECK-NEXT: .LBB4_2: @ %vector.body +; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: vldrw.u32 q1, [r0], #16 +; CHECK-NEXT: vstrw.32 q1, [q0, #64]! +; CHECK-NEXT: letp lr, .LBB4_2 +; CHECK-NEXT: @ %bb.3: @ %for.cond.cleanup +; CHECK-NEXT: pop {r7, pc} +; CHECK-NEXT: .p2align 4 +; CHECK-NEXT: @ %bb.4: +; CHECK-NEXT: .LCPI4_0: +; CHECK-NEXT: .long 4294967232 @ 0xffffffc0 +; CHECK-NEXT: .long 4294967248 @ 0xffffffd0 +; CHECK-NEXT: .long 4294967264 @ 0xffffffe0 +; CHECK-NEXT: .long 4294967280 @ 0xfffffff0 +entry: + %cmp6 = icmp sgt i32 %n, 0 + br i1 %cmp6, label %vector.ph, label %for.cond.cleanup + +vector.ph: ; preds = %entry + %n.rnd.up = add i32 %n, 3 + %n.vec = and i32 %n.rnd.up, -4 + br label %vector.body + +vector.body: ; preds = %vector.body, %vector.ph + %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] + %vec.ind = phi <4 x i32> [ , %vector.ph ], [ %vec.ind.next, %vector.body ] + %active.lane.mask = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 %n) + %0 = getelementptr inbounds i32, i32* %x, i32 %index + %1 = bitcast i32* %0 to <4 x i32>* + %wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %1, i32 4, <4 x i1> %active.lane.mask, <4 x i32> poison) + %2 = shl nsw <4 x i32> %vec.ind, + %3 = getelementptr inbounds i32, i32* %y, <4 x i32> %2 + call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %wide.masked.load, <4 x i32*> %3, i32 4, <4 x i1> %active.lane.mask) + %index.next = add i32 %index, 4 + %vec.ind.next = add <4 x i32> %vec.ind, + %4 = icmp eq i32 %index.next, %n.vec + br i1 %4, label %for.cond.cleanup, label %vector.body + +for.cond.cleanup: ; preds = %vector.body, %entry + ret void +} + +define void @shlor(i32* nocapture readonly %x, i32* noalias nocapture %y, i32 %n) { +; CHECK-LABEL: shlor: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: .save {r4, r5, r6, lr} +; CHECK-NEXT: push {r4, r5, r6, lr} +; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} +; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} +; CHECK-NEXT: .pad #48 +; CHECK-NEXT: sub sp, #48 +; CHECK-NEXT: cmp r2, #1 +; CHECK-NEXT: blt .LBB5_3 +; CHECK-NEXT: @ %bb.1: @ %vector.ph +; CHECK-NEXT: adr.w lr, .LCPI5_0 +; CHECK-NEXT: adr r4, .LCPI5_1 +; CHECK-NEXT: adr r5, .LCPI5_2 +; CHECK-NEXT: adr r6, .LCPI5_3 +; CHECK-NEXT: vldrw.u32 q0, [r6] +; CHECK-NEXT: vldrw.u32 q1, [r5] +; CHECK-NEXT: vldrw.u32 q2, [r4] +; CHECK-NEXT: vldrw.u32 q3, [lr] +; CHECK-NEXT: vadd.i32 q0, q0, r1 +; CHECK-NEXT: vadd.i32 q1, q1, r1 +; CHECK-NEXT: vadd.i32 q2, q2, r1 +; CHECK-NEXT: vadd.i32 q3, q3, r1 +; CHECK-NEXT: vmov.i32 q4, #0x3 +; CHECK-NEXT: vstrw.32 q4, [sp, #32] @ 16-byte Spill +; CHECK-NEXT: vmov.i32 q4, #0x2 +; CHECK-NEXT: vstrw.32 q4, [sp, #16] @ 16-byte Spill +; CHECK-NEXT: vmov.i32 q4, #0x1 +; CHECK-NEXT: vmov.i32 q7, #0x4 +; CHECK-NEXT: vstrw.32 q4, [sp] @ 16-byte Spill +; CHECK-NEXT: dlstp.32 lr, r2 +; CHECK-NEXT: .LBB5_2: @ %vector.body +; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: vldrw.u32 q6, [sp] @ 16-byte Reload +; CHECK-NEXT: vldrw.u32 q4, [r0], #16 +; CHECK-NEXT: vadd.i32 q6, q4, q6 +; CHECK-NEXT: vadd.i32 q5, q4, q7 +; CHECK-NEXT: vstrw.32 q6, [q3, #128]! +; CHECK-NEXT: vldrw.u32 q6, [sp, #16] @ 16-byte Reload +; CHECK-NEXT: vadd.i32 q6, q4, q6 +; CHECK-NEXT: vstrw.32 q6, [q2, #128]! +; CHECK-NEXT: vldrw.u32 q6, [sp, #32] @ 16-byte Reload +; CHECK-NEXT: vadd.i32 q4, q4, q6 +; CHECK-NEXT: vstrw.32 q4, [q1, #128]! +; CHECK-NEXT: vstrw.32 q5, [q0, #128]! +; CHECK-NEXT: letp lr, .LBB5_2 +; CHECK-NEXT: .LBB5_3: @ %for.cond.cleanup +; CHECK-NEXT: add sp, #48 +; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} +; CHECK-NEXT: pop {r4, r5, r6, pc} +; CHECK-NEXT: .p2align 4 +; CHECK-NEXT: @ %bb.4: +; CHECK-NEXT: .LCPI5_0: +; CHECK-NEXT: .long 4294967168 @ 0xffffff80 +; CHECK-NEXT: .long 4294967200 @ 0xffffffa0 +; CHECK-NEXT: .long 4294967232 @ 0xffffffc0 +; CHECK-NEXT: .long 4294967264 @ 0xffffffe0 +; CHECK-NEXT: .LCPI5_1: +; CHECK-NEXT: .long 4294967176 @ 0xffffff88 +; CHECK-NEXT: .long 4294967208 @ 0xffffffa8 +; CHECK-NEXT: .long 4294967240 @ 0xffffffc8 +; CHECK-NEXT: .long 4294967272 @ 0xffffffe8 +; CHECK-NEXT: .LCPI5_2: +; CHECK-NEXT: .long 4294967184 @ 0xffffff90 +; CHECK-NEXT: .long 4294967216 @ 0xffffffb0 +; CHECK-NEXT: .long 4294967248 @ 0xffffffd0 +; CHECK-NEXT: .long 4294967280 @ 0xfffffff0 +; CHECK-NEXT: .LCPI5_3: +; CHECK-NEXT: .long 4294967192 @ 0xffffff98 +; CHECK-NEXT: .long 4294967224 @ 0xffffffb8 +; CHECK-NEXT: .long 4294967256 @ 0xffffffd8 +; CHECK-NEXT: .long 4294967288 @ 0xfffffff8 +entry: + %cmp33 = icmp sgt i32 %n, 0 + br i1 %cmp33, label %vector.ph, label %for.cond.cleanup + +vector.ph: ; preds = %entry + %n.rnd.up = add i32 %n, 3 + %n.vec = and i32 %n.rnd.up, -4 + br label %vector.body + +vector.body: ; preds = %vector.body, %vector.ph + %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] + %vec.ind = phi <4 x i32> [ , %vector.ph ], [ %vec.ind.next, %vector.body ] + %active.lane.mask = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 %n) + %0 = getelementptr inbounds i32, i32* %x, i32 %index + %1 = bitcast i32* %0 to <4 x i32>* + %wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %1, i32 4, <4 x i1> %active.lane.mask, <4 x i32> poison) + %2 = add nsw <4 x i32> %wide.masked.load, + %3 = shl nsw <4 x i32> %vec.ind, + %4 = getelementptr inbounds i32, i32* %y, <4 x i32> %3 + call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %2, <4 x i32*> %4, i32 4, <4 x i1> %active.lane.mask) + %5 = add nsw <4 x i32> %wide.masked.load, + %6 = or <4 x i32> %3, + %7 = getelementptr inbounds i32, i32* %y, <4 x i32> %6 + call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %5, <4 x i32*> %7, i32 4, <4 x i1> %active.lane.mask) + %8 = add nsw <4 x i32> %wide.masked.load, + %9 = or <4 x i32> %3, + %10 = getelementptr inbounds i32, i32* %y, <4 x i32> %9 + call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %8, <4 x i32*> %10, i32 4, <4 x i1> %active.lane.mask) + %11 = add nsw <4 x i32> %wide.masked.load, + %12 = or <4 x i32> %3, + %13 = getelementptr inbounds i32, i32* %y, <4 x i32> %12 + call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %11, <4 x i32*> %13, i32 4, <4 x i1> %active.lane.mask) + %index.next = add i32 %index, 4 + %vec.ind.next = add <4 x i32> %vec.ind, + %14 = icmp eq i32 %index.next, %n.vec + br i1 %14, label %for.cond.cleanup, label %vector.body + +for.cond.cleanup: ; preds = %vector.body, %entry + ret void +} + declare void @llvm.masked.scatter.v8i8.v8p0i8(<8 x i8>, <8 x i8*>, i32, <8 x i1>) declare void @llvm.masked.scatter.v8i16.v8p0i16(<8 x i16>, <8 x i16*>, i32, <8 x i1>) declare void @llvm.masked.scatter.v8f16.v8p0f16(<8 x half>, <8 x half*>, i32, <8 x i1>) @@ -234,3 +403,5 @@ declare void @llvm.masked.scatter.v4i16.v4p0i16(<4 x i16>, <4 x i16*>, i32, <4 x declare void @llvm.masked.scatter.v4f16.v4p0f16(<4 x half>, <4 x half*>, i32, <4 x i1>) declare void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32>, <4 x i32*>, i32, <4 x i1>) declare void @llvm.masked.scatter.v4f32.v4p0f32(<4 x float>, <4 x float*>, i32, <4 x i1>) +declare <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32, i32) +declare <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>*, i32, <4 x i1>, <4 x i32>) diff --git a/llvm/test/CodeGen/Thumb2/mve-tailpred-loopinvariant.ll b/llvm/test/CodeGen/Thumb2/mve-tailpred-loopinvariant.ll index ce79c46a32c29..359591219ce63 100644 --- a/llvm/test/CodeGen/Thumb2/mve-tailpred-loopinvariant.ll +++ b/llvm/test/CodeGen/Thumb2/mve-tailpred-loopinvariant.ll @@ -21,8 +21,8 @@ define i32 @a(i32* readnone %b, i8* %c) { ; CHECK-NEXT: .LBB0_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: adds r0, r1, r2 -; CHECK-NEXT: adds r2, #16 ; CHECK-NEXT: vidup.u8 q0, r0, #1 +; CHECK-NEXT: adds r2, #16 ; CHECK-NEXT: vstrb.8 q0, [r3], #16 ; CHECK-NEXT: letp lr, .LBB0_2 ; CHECK-NEXT: @ %bb.3: @ %while.end diff --git a/llvm/test/CodeGen/Thumb2/mve-vctp.ll b/llvm/test/CodeGen/Thumb2/mve-vctp.ll index 8cddbc79e2e13..24d77d1102051 100644 --- a/llvm/test/CodeGen/Thumb2/mve-vctp.ll +++ b/llvm/test/CodeGen/Thumb2/mve-vctp.ll @@ -4,9 +4,9 @@ define void @vctp8(i32 %arg, <16 x i8> *%in, <16 x i8>* %out) { ; CHECK-LABEL: vctp8: ; CHECK: @ %bb.0: -; CHECK-NEXT: vmov.i32 q0, #0x0 -; CHECK-NEXT: vctp.8 r0 ; CHECK-NEXT: vldrw.u32 q1, [r1] +; CHECK-NEXT: vctp.8 r0 +; CHECK-NEXT: vmov.i32 q0, #0x0 ; CHECK-NEXT: vpst ; CHECK-NEXT: vmovt q0, q1 ; CHECK-NEXT: vstrw.32 q0, [r2] @@ -21,9 +21,9 @@ define void @vctp8(i32 %arg, <16 x i8> *%in, <16 x i8>* %out) { define void @vctp16(i32 %arg, <8 x i16> *%in, <8 x i16>* %out) { ; CHECK-LABEL: vctp16: ; CHECK: @ %bb.0: -; CHECK-NEXT: vmov.i32 q0, #0x0 -; CHECK-NEXT: vctp.16 r0 ; CHECK-NEXT: vldrw.u32 q1, [r1] +; CHECK-NEXT: vctp.16 r0 +; CHECK-NEXT: vmov.i32 q0, #0x0 ; CHECK-NEXT: vpst ; CHECK-NEXT: vmovt q0, q1 ; CHECK-NEXT: vstrw.32 q0, [r2] @@ -38,9 +38,9 @@ define void @vctp16(i32 %arg, <8 x i16> *%in, <8 x i16>* %out) { define void @vctp32(i32 %arg, <4 x i32> *%in, <4 x i32>* %out) { ; CHECK-LABEL: vctp32: ; CHECK: @ %bb.0: -; CHECK-NEXT: vmov.i32 q0, #0x0 -; CHECK-NEXT: vctp.32 r0 ; CHECK-NEXT: vldrw.u32 q1, [r1] +; CHECK-NEXT: vctp.32 r0 +; CHECK-NEXT: vmov.i32 q0, #0x0 ; CHECK-NEXT: vpst ; CHECK-NEXT: vmovt q0, q1 ; CHECK-NEXT: vstrw.32 q0, [r2] diff --git a/llvm/test/CodeGen/Thumb2/mve-vecreduce-addpred.ll b/llvm/test/CodeGen/Thumb2/mve-vecreduce-addpred.ll index 0946b0103e345..9650cfd692493 100644 --- a/llvm/test/CodeGen/Thumb2/mve-vecreduce-addpred.ll +++ b/llvm/test/CodeGen/Thumb2/mve-vecreduce-addpred.ll @@ -139,8 +139,8 @@ entry: define arm_aapcs_vfpcc i32 @add_v4i16_v4i32_zext(<4 x i16> %x, <4 x i16> %b) { ; CHECK-LABEL: add_v4i16_v4i32_zext: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vmovlb.u16 q0, q0 ; CHECK-NEXT: vmovlb.u16 q1, q1 +; CHECK-NEXT: vmovlb.u16 q0, q0 ; CHECK-NEXT: vpt.i32 eq, q1, zr ; CHECK-NEXT: vaddvt.u32 r0, q0 ; CHECK-NEXT: bx lr @@ -155,8 +155,8 @@ entry: define arm_aapcs_vfpcc i32 @add_v4i16_v4i32_sext(<4 x i16> %x, <4 x i16> %b) { ; CHECK-LABEL: add_v4i16_v4i32_sext: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vmovlb.s16 q0, q0 ; CHECK-NEXT: vmovlb.u16 q1, q1 +; CHECK-NEXT: vmovlb.s16 q0, q0 ; CHECK-NEXT: vpt.i32 eq, q1, zr ; CHECK-NEXT: vaddvt.u32 r0, q0 ; CHECK-NEXT: bx lr @@ -393,8 +393,8 @@ entry: define arm_aapcs_vfpcc i64 @add_v4i16_v4i64_zext(<4 x i16> %x, <4 x i16> %b) { ; CHECK-LABEL: add_v4i16_v4i64_zext: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vmovlb.u16 q0, q0 ; CHECK-NEXT: vmovlb.u16 q1, q1 +; CHECK-NEXT: vmovlb.u16 q0, q0 ; CHECK-NEXT: vpt.i32 eq, q1, zr ; CHECK-NEXT: vaddlvt.u32 r0, r1, q0 ; CHECK-NEXT: bx lr @@ -409,8 +409,8 @@ entry: define arm_aapcs_vfpcc i64 @add_v4i16_v4i64_sext(<4 x i16> %x, <4 x i16> %b) { ; CHECK-LABEL: add_v4i16_v4i64_sext: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vmovlb.s16 q0, q0 ; CHECK-NEXT: vmovlb.u16 q1, q1 +; CHECK-NEXT: vmovlb.s16 q0, q0 ; CHECK-NEXT: vpt.i32 eq, q1, zr ; CHECK-NEXT: vaddlvt.s32 r0, r1, q0 ; CHECK-NEXT: bx lr @@ -524,8 +524,8 @@ entry: define arm_aapcs_vfpcc i32 @add_v8i8_v8i32_zext(<8 x i8> %x, <8 x i8> %b) { ; CHECK-LABEL: add_v8i8_v8i32_zext: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vmovlb.u8 q0, q0 ; CHECK-NEXT: vmovlb.u8 q1, q1 +; CHECK-NEXT: vmovlb.u8 q0, q0 ; CHECK-NEXT: vpt.i16 eq, q1, zr ; CHECK-NEXT: vaddvt.u16 r0, q0 ; CHECK-NEXT: bx lr @@ -540,8 +540,8 @@ entry: define arm_aapcs_vfpcc i32 @add_v8i8_v8i32_sext(<8 x i8> %x, <8 x i8> %b) { ; CHECK-LABEL: add_v8i8_v8i32_sext: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vmovlb.s8 q0, q0 ; CHECK-NEXT: vmovlb.u8 q1, q1 +; CHECK-NEXT: vmovlb.s8 q0, q0 ; CHECK-NEXT: vpt.i16 eq, q1, zr ; CHECK-NEXT: vaddvt.s16 r0, q0 ; CHECK-NEXT: bx lr @@ -557,8 +557,8 @@ define arm_aapcs_vfpcc i32 @add_v4i8_v4i32_zext(<4 x i8> %x, <4 x i8> %b) { ; CHECK-LABEL: add_v4i8_v4i32_zext: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vmov.i32 q2, #0xff -; CHECK-NEXT: vand q0, q0, q2 ; CHECK-NEXT: vand q1, q1, q2 +; CHECK-NEXT: vand q0, q0, q2 ; CHECK-NEXT: vpt.i32 eq, q1, zr ; CHECK-NEXT: vaddvt.u32 r0, q0 ; CHECK-NEXT: bx lr @@ -573,8 +573,8 @@ entry: define arm_aapcs_vfpcc i32 @add_v4i8_v4i32_sext(<4 x i8> %x, <4 x i8> %b) { ; CHECK-LABEL: add_v4i8_v4i32_sext: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vmovlb.s8 q0, q0 ; CHECK-NEXT: vmov.i32 q2, #0xff +; CHECK-NEXT: vmovlb.s8 q0, q0 ; CHECK-NEXT: vand q1, q1, q2 ; CHECK-NEXT: vmovlb.s16 q0, q0 ; CHECK-NEXT: vpt.i32 eq, q1, zr @@ -621,8 +621,8 @@ entry: define arm_aapcs_vfpcc zeroext i16 @add_v8i8_v8i16_zext(<8 x i8> %x, <8 x i8> %b) { ; CHECK-LABEL: add_v8i8_v8i16_zext: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vmovlb.u8 q0, q0 ; CHECK-NEXT: vmovlb.u8 q1, q1 +; CHECK-NEXT: vmovlb.u8 q0, q0 ; CHECK-NEXT: vpt.i16 eq, q1, zr ; CHECK-NEXT: vaddvt.u16 r0, q0 ; CHECK-NEXT: uxth r0, r0 @@ -638,8 +638,8 @@ entry: define arm_aapcs_vfpcc signext i16 @add_v8i8_v8i16_sext(<8 x i8> %x, <8 x i8> %b) { ; CHECK-LABEL: add_v8i8_v8i16_sext: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vmovlb.s8 q0, q0 ; CHECK-NEXT: vmovlb.u8 q1, q1 +; CHECK-NEXT: vmovlb.s8 q0, q0 ; CHECK-NEXT: vpt.i16 eq, q1, zr ; CHECK-NEXT: vaddvt.u16 r0, q0 ; CHECK-NEXT: sxth r0, r0 @@ -1350,8 +1350,8 @@ define arm_aapcs_vfpcc i64 @add_v4i8_v4i64_zext(<4 x i8> %x, <4 x i8> %b) { ; CHECK-LABEL: add_v4i8_v4i64_zext: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vmov.i32 q2, #0xff -; CHECK-NEXT: vand q0, q0, q2 ; CHECK-NEXT: vand q1, q1, q2 +; CHECK-NEXT: vand q0, q0, q2 ; CHECK-NEXT: vpt.i32 eq, q1, zr ; CHECK-NEXT: vaddlvt.u32 r0, r1, q0 ; CHECK-NEXT: bx lr @@ -1366,8 +1366,8 @@ entry: define arm_aapcs_vfpcc i64 @add_v4i8_v4i64_sext(<4 x i8> %x, <4 x i8> %b) { ; CHECK-LABEL: add_v4i8_v4i64_sext: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vmovlb.s8 q0, q0 ; CHECK-NEXT: vmov.i32 q2, #0xff +; CHECK-NEXT: vmovlb.s8 q0, q0 ; CHECK-NEXT: vand q1, q1, q2 ; CHECK-NEXT: vmovlb.s16 q0, q0 ; CHECK-NEXT: vpt.i32 eq, q1, zr @@ -1633,8 +1633,8 @@ entry: define arm_aapcs_vfpcc i32 @add_v4i16_v4i32_acc_zext(<4 x i16> %x, <4 x i16> %b, i32 %a) { ; CHECK-LABEL: add_v4i16_v4i32_acc_zext: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vmovlb.u16 q0, q0 ; CHECK-NEXT: vmovlb.u16 q1, q1 +; CHECK-NEXT: vmovlb.u16 q0, q0 ; CHECK-NEXT: vpt.i32 eq, q1, zr ; CHECK-NEXT: vaddvat.u32 r0, q0 ; CHECK-NEXT: bx lr @@ -1650,8 +1650,8 @@ entry: define arm_aapcs_vfpcc i32 @add_v4i16_v4i32_acc_sext(<4 x i16> %x, <4 x i16> %b, i32 %a) { ; CHECK-LABEL: add_v4i16_v4i32_acc_sext: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vmovlb.s16 q0, q0 ; CHECK-NEXT: vmovlb.u16 q1, q1 +; CHECK-NEXT: vmovlb.s16 q0, q0 ; CHECK-NEXT: vpt.i32 eq, q1, zr ; CHECK-NEXT: vaddvat.u32 r0, q0 ; CHECK-NEXT: bx lr @@ -2012,8 +2012,8 @@ define arm_aapcs_vfpcc i32 @add_v4i8_v4i32_acc_zext(<4 x i8> %x, <4 x i8> %b, i3 ; CHECK-LABEL: add_v4i8_v4i32_acc_zext: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vmov.i32 q2, #0xff -; CHECK-NEXT: vand q0, q0, q2 ; CHECK-NEXT: vand q1, q1, q2 +; CHECK-NEXT: vand q0, q0, q2 ; CHECK-NEXT: vpt.i32 eq, q1, zr ; CHECK-NEXT: vaddvat.u32 r0, q0 ; CHECK-NEXT: bx lr @@ -2029,8 +2029,8 @@ entry: define arm_aapcs_vfpcc i32 @add_v4i8_v4i32_acc_sext(<4 x i8> %x, <4 x i8> %b, i32 %a) { ; CHECK-LABEL: add_v4i8_v4i32_acc_sext: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vmovlb.s8 q0, q0 ; CHECK-NEXT: vmov.i32 q2, #0xff +; CHECK-NEXT: vmovlb.s8 q0, q0 ; CHECK-NEXT: vand q1, q1, q2 ; CHECK-NEXT: vmovlb.s16 q0, q0 ; CHECK-NEXT: vpt.i32 eq, q1, zr @@ -2080,8 +2080,8 @@ entry: define arm_aapcs_vfpcc zeroext i16 @add_v8i8_v8i16_acc_zext(<8 x i8> %x, <8 x i8> %b, i16 %a) { ; CHECK-LABEL: add_v8i8_v8i16_acc_zext: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vmovlb.u8 q0, q0 ; CHECK-NEXT: vmovlb.u8 q1, q1 +; CHECK-NEXT: vmovlb.u8 q0, q0 ; CHECK-NEXT: vpt.i16 eq, q1, zr ; CHECK-NEXT: vaddvat.u16 r0, q0 ; CHECK-NEXT: uxth r0, r0 @@ -2098,8 +2098,8 @@ entry: define arm_aapcs_vfpcc signext i16 @add_v8i8_v8i16_acc_sext(<8 x i8> %x, <8 x i8> %b, i16 %a) { ; CHECK-LABEL: add_v8i8_v8i16_acc_sext: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vmovlb.s8 q0, q0 ; CHECK-NEXT: vmovlb.u8 q1, q1 +; CHECK-NEXT: vmovlb.s8 q0, q0 ; CHECK-NEXT: vpt.i16 eq, q1, zr ; CHECK-NEXT: vaddvat.u16 r0, q0 ; CHECK-NEXT: sxth r0, r0 diff --git a/llvm/test/CodeGen/Thumb2/mve-vecreduce-mlapred.ll b/llvm/test/CodeGen/Thumb2/mve-vecreduce-mlapred.ll index 0aeff64fffe8d..1b984fa3e3347 100644 --- a/llvm/test/CodeGen/Thumb2/mve-vecreduce-mlapred.ll +++ b/llvm/test/CodeGen/Thumb2/mve-vecreduce-mlapred.ll @@ -146,9 +146,9 @@ entry: define arm_aapcs_vfpcc i32 @add_v4i16_v4i32_zext(<4 x i16> %x, <4 x i16> %y, <4 x i16> %b) { ; CHECK-LABEL: add_v4i16_v4i32_zext: ; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmovlb.u16 q2, q2 ; CHECK-NEXT: vmovlb.u16 q1, q1 ; CHECK-NEXT: vmovlb.u16 q0, q0 -; CHECK-NEXT: vmovlb.u16 q2, q2 ; CHECK-NEXT: vpt.i32 eq, q2, zr ; CHECK-NEXT: vmlavt.u32 r0, q0, q1 ; CHECK-NEXT: bx lr @@ -165,9 +165,9 @@ entry: define arm_aapcs_vfpcc i32 @add_v4i16_v4i32_sext(<4 x i16> %x, <4 x i16> %y, <4 x i16> %b) { ; CHECK-LABEL: add_v4i16_v4i32_sext: ; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmovlb.u16 q2, q2 ; CHECK-NEXT: vmovlb.s16 q1, q1 ; CHECK-NEXT: vmovlb.s16 q0, q0 -; CHECK-NEXT: vmovlb.u16 q2, q2 ; CHECK-NEXT: vpt.i32 eq, q2, zr ; CHECK-NEXT: vmlavt.u32 r0, q0, q1 ; CHECK-NEXT: bx lr @@ -315,9 +315,9 @@ entry: define arm_aapcs_vfpcc i64 @add_v4i16_v4i64_zext(<4 x i16> %x, <4 x i16> %y, <4 x i16> %b) { ; CHECK-LABEL: add_v4i16_v4i64_zext: ; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmovlb.u16 q2, q2 ; CHECK-NEXT: vmovlb.u16 q1, q1 ; CHECK-NEXT: vmovlb.u16 q0, q0 -; CHECK-NEXT: vmovlb.u16 q2, q2 ; CHECK-NEXT: vpt.i32 eq, q2, zr ; CHECK-NEXT: vmlalvt.u32 r0, r1, q0, q1 ; CHECK-NEXT: bx lr @@ -334,9 +334,9 @@ entry: define arm_aapcs_vfpcc i64 @add_v4i16_v4i64_sext(<4 x i16> %x, <4 x i16> %y, <4 x i16> %b) { ; CHECK-LABEL: add_v4i16_v4i64_sext: ; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmovlb.u16 q2, q2 ; CHECK-NEXT: vmovlb.s16 q1, q1 ; CHECK-NEXT: vmovlb.s16 q0, q0 -; CHECK-NEXT: vmovlb.u16 q2, q2 ; CHECK-NEXT: vpt.i32 eq, q2, zr ; CHECK-NEXT: vmlalvt.s32 r0, r1, q0, q1 ; CHECK-NEXT: bx lr @@ -523,9 +523,9 @@ entry: define arm_aapcs_vfpcc i32 @add_v8i8_v8i32_zext(<8 x i8> %x, <8 x i8> %y, <8 x i8> %b) { ; CHECK-LABEL: add_v8i8_v8i32_zext: ; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmovlb.u8 q2, q2 ; CHECK-NEXT: vmovlb.u8 q1, q1 ; CHECK-NEXT: vmovlb.u8 q0, q0 -; CHECK-NEXT: vmovlb.u8 q2, q2 ; CHECK-NEXT: vpt.i16 eq, q2, zr ; CHECK-NEXT: vmlavt.u16 r0, q0, q1 ; CHECK-NEXT: bx lr @@ -542,9 +542,9 @@ entry: define arm_aapcs_vfpcc i32 @add_v8i8_v8i32_sext(<8 x i8> %x, <8 x i8> %y, <8 x i8> %b) { ; CHECK-LABEL: add_v8i8_v8i32_sext: ; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmovlb.u8 q2, q2 ; CHECK-NEXT: vmovlb.s8 q1, q1 ; CHECK-NEXT: vmovlb.s8 q0, q0 -; CHECK-NEXT: vmovlb.u8 q2, q2 ; CHECK-NEXT: vpt.i16 eq, q2, zr ; CHECK-NEXT: vmlavt.s16 r0, q0, q1 ; CHECK-NEXT: bx lr @@ -561,8 +561,8 @@ entry: define arm_aapcs_vfpcc i32 @add_v8i8i16_v8i32_zext(<8 x i8> %x, <8 x i16> %y, <8 x i8> %b) { ; CHECK-LABEL: add_v8i8i16_v8i32_zext: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vmovlb.u8 q0, q0 ; CHECK-NEXT: vmovlb.u8 q2, q2 +; CHECK-NEXT: vmovlb.u8 q0, q0 ; CHECK-NEXT: vpt.i16 eq, q2, zr ; CHECK-NEXT: vmlavt.u16 r0, q0, q1 ; CHECK-NEXT: bx lr @@ -579,8 +579,8 @@ entry: define arm_aapcs_vfpcc i32 @add_v8i8i16_v8i32_sext(<8 x i8> %x, <8 x i16> %y, <8 x i8> %b) { ; CHECK-LABEL: add_v8i8i16_v8i32_sext: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vmovlb.s8 q0, q0 ; CHECK-NEXT: vmovlb.u8 q2, q2 +; CHECK-NEXT: vmovlb.s8 q0, q0 ; CHECK-NEXT: vpt.i16 eq, q2, zr ; CHECK-NEXT: vmlavt.s16 r0, q0, q1 ; CHECK-NEXT: bx lr @@ -598,9 +598,9 @@ define arm_aapcs_vfpcc i32 @add_v4i8_v4i32_zext(<4 x i8> %x, <4 x i8> %y, <4 x i ; CHECK-LABEL: add_v4i8_v4i32_zext: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vmov.i32 q3, #0xff +; CHECK-NEXT: vand q2, q2, q3 ; CHECK-NEXT: vand q1, q1, q3 ; CHECK-NEXT: vand q0, q0, q3 -; CHECK-NEXT: vand q2, q2, q3 ; CHECK-NEXT: vpt.i32 eq, q2, zr ; CHECK-NEXT: vmlavt.u32 r0, q0, q1 ; CHECK-NEXT: bx lr @@ -617,11 +617,11 @@ entry: define arm_aapcs_vfpcc i32 @add_v4i8_v4i32_sext(<4 x i8> %x, <4 x i8> %y, <4 x i8> %b) { ; CHECK-LABEL: add_v4i8_v4i32_sext: ; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmov.i32 q3, #0xff ; CHECK-NEXT: vmovlb.s8 q1, q1 ; CHECK-NEXT: vmovlb.s8 q0, q0 -; CHECK-NEXT: vmov.i32 q3, #0xff -; CHECK-NEXT: vmovlb.s16 q1, q1 ; CHECK-NEXT: vand q2, q2, q3 +; CHECK-NEXT: vmovlb.s16 q1, q1 ; CHECK-NEXT: vmovlb.s16 q0, q0 ; CHECK-NEXT: vpt.i32 eq, q2, zr ; CHECK-NEXT: vmlavt.u32 r0, q0, q1 @@ -641,8 +641,8 @@ define arm_aapcs_vfpcc i32 @add_v4i8_v4i32_szext(<4 x i8> %x, <4 x i8> %y, <4 x ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vmov.i32 q3, #0xff ; CHECK-NEXT: vmovlb.s8 q0, q0 -; CHECK-NEXT: vand q1, q1, q3 ; CHECK-NEXT: vand q2, q2, q3 +; CHECK-NEXT: vand q1, q1, q3 ; CHECK-NEXT: vmovlb.s16 q0, q0 ; CHECK-NEXT: vpt.i32 eq, q2, zr ; CHECK-NEXT: vmlavt.u32 r0, q0, q1 @@ -762,9 +762,9 @@ entry: define arm_aapcs_vfpcc zeroext i16 @add_v8i8_v8i16_zext(<8 x i8> %x, <8 x i8> %y, <8 x i8> %b) { ; CHECK-LABEL: add_v8i8_v8i16_zext: ; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmovlb.u8 q2, q2 ; CHECK-NEXT: vmovlb.u8 q1, q1 ; CHECK-NEXT: vmovlb.u8 q0, q0 -; CHECK-NEXT: vmovlb.u8 q2, q2 ; CHECK-NEXT: vpt.i16 eq, q2, zr ; CHECK-NEXT: vmlavt.u16 r0, q0, q1 ; CHECK-NEXT: uxth r0, r0 @@ -782,9 +782,9 @@ entry: define arm_aapcs_vfpcc signext i16 @add_v8i8_v8i16_sext(<8 x i8> %x, <8 x i8> %y, <8 x i8> %b) { ; CHECK-LABEL: add_v8i8_v8i16_sext: ; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmovlb.u8 q2, q2 ; CHECK-NEXT: vmovlb.s8 q1, q1 ; CHECK-NEXT: vmovlb.s8 q0, q0 -; CHECK-NEXT: vmovlb.u8 q2, q2 ; CHECK-NEXT: vpt.i16 eq, q2, zr ; CHECK-NEXT: vmlavt.u16 r0, q0, q1 ; CHECK-NEXT: sxth r0, r0 @@ -1406,9 +1406,9 @@ entry: define arm_aapcs_vfpcc i64 @add_v8i8_v8i64_zext(<8 x i8> %x, <8 x i8> %y, <8 x i8> %b) { ; CHECK-LABEL: add_v8i8_v8i64_zext: ; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmovlb.u8 q2, q2 ; CHECK-NEXT: vmovlb.u8 q1, q1 ; CHECK-NEXT: vmovlb.u8 q0, q0 -; CHECK-NEXT: vmovlb.u8 q2, q2 ; CHECK-NEXT: vpt.i16 eq, q2, zr ; CHECK-NEXT: vmlalvt.u16 r0, r1, q0, q1 ; CHECK-NEXT: bx lr @@ -1425,9 +1425,9 @@ entry: define arm_aapcs_vfpcc i64 @add_v8i8_v8i64_sext(<8 x i8> %x, <8 x i8> %y, <8 x i8> %b) { ; CHECK-LABEL: add_v8i8_v8i64_sext: ; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmovlb.u8 q2, q2 ; CHECK-NEXT: vmovlb.s8 q1, q1 ; CHECK-NEXT: vmovlb.s8 q0, q0 -; CHECK-NEXT: vmovlb.u8 q2, q2 ; CHECK-NEXT: vpt.i16 eq, q2, zr ; CHECK-NEXT: vmlalvt.s16 r0, r1, q0, q1 ; CHECK-NEXT: bx lr @@ -1445,9 +1445,9 @@ define arm_aapcs_vfpcc i64 @add_v4i8_v4i64_zext(<4 x i8> %x, <4 x i8> %y, <4 x i ; CHECK-LABEL: add_v4i8_v4i64_zext: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vmov.i32 q3, #0xff +; CHECK-NEXT: vand q2, q2, q3 ; CHECK-NEXT: vand q1, q1, q3 ; CHECK-NEXT: vand q0, q0, q3 -; CHECK-NEXT: vand q2, q2, q3 ; CHECK-NEXT: vpt.i32 eq, q2, zr ; CHECK-NEXT: vmlalvt.u32 r0, r1, q0, q1 ; CHECK-NEXT: bx lr @@ -1464,11 +1464,11 @@ entry: define arm_aapcs_vfpcc i64 @add_v4i8_v4i64_sext(<4 x i8> %x, <4 x i8> %y, <4 x i8> %b) { ; CHECK-LABEL: add_v4i8_v4i64_sext: ; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmov.i32 q3, #0xff ; CHECK-NEXT: vmovlb.s8 q1, q1 ; CHECK-NEXT: vmovlb.s8 q0, q0 -; CHECK-NEXT: vmov.i32 q3, #0xff -; CHECK-NEXT: vmovlb.s16 q1, q1 ; CHECK-NEXT: vand q2, q2, q3 +; CHECK-NEXT: vmovlb.s16 q1, q1 ; CHECK-NEXT: vmovlb.s16 q0, q0 ; CHECK-NEXT: vpt.i32 eq, q2, zr ; CHECK-NEXT: vmlalvt.s32 r0, r1, q0, q1 @@ -1488,8 +1488,8 @@ define arm_aapcs_vfpcc i64 @add_v4i8i16_v4i64_zext(<4 x i8> %x, <4 x i16> %y, <4 ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vmov.i32 q3, #0xff ; CHECK-NEXT: vmovlb.u16 q1, q1 -; CHECK-NEXT: vand q0, q0, q3 ; CHECK-NEXT: vand q2, q2, q3 +; CHECK-NEXT: vand q0, q0, q3 ; CHECK-NEXT: vpt.i32 eq, q2, zr ; CHECK-NEXT: vmlalvt.u32 r0, r1, q0, q1 ; CHECK-NEXT: bx lr @@ -1506,8 +1506,8 @@ entry: define arm_aapcs_vfpcc i64 @add_v4i8i16_v4i64_sext(<4 x i8> %x, <4 x i16> %y, <4 x i8> %b) { ; CHECK-LABEL: add_v4i8i16_v4i64_sext: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vmovlb.s8 q0, q0 ; CHECK-NEXT: vmov.i32 q3, #0xff +; CHECK-NEXT: vmovlb.s8 q0, q0 ; CHECK-NEXT: vand q2, q2, q3 ; CHECK-NEXT: vmovlb.s16 q1, q1 ; CHECK-NEXT: vmovlb.s16 q0, q0 @@ -1529,8 +1529,8 @@ define arm_aapcs_vfpcc i64 @add_v4i8i16_v4i32_v4i64_zext(<4 x i8> %x, <4 x i16> ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vmov.i32 q3, #0xff ; CHECK-NEXT: vmovlb.u16 q1, q1 -; CHECK-NEXT: vand q0, q0, q3 ; CHECK-NEXT: vand q2, q2, q3 +; CHECK-NEXT: vand q0, q0, q3 ; CHECK-NEXT: vpt.i32 eq, q2, zr ; CHECK-NEXT: vmlalvt.u32 r0, r1, q0, q1 ; CHECK-NEXT: bx lr @@ -1548,8 +1548,8 @@ entry: define arm_aapcs_vfpcc i64 @add_v4i8i16_v4i32_v4i64_sext(<4 x i8> %x, <4 x i16> %y, <4 x i8> %b) { ; CHECK-LABEL: add_v4i8i16_v4i32_v4i64_sext: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vmovlb.s8 q0, q0 ; CHECK-NEXT: vmov.i32 q3, #0xff +; CHECK-NEXT: vmovlb.s8 q0, q0 ; CHECK-NEXT: vand q2, q2, q3 ; CHECK-NEXT: vmovlb.s16 q1, q1 ; CHECK-NEXT: vmovlb.s16 q0, q0 @@ -1858,9 +1858,9 @@ entry: define arm_aapcs_vfpcc i32 @add_v4i16_v4i32_acc_zext(<4 x i16> %x, <4 x i16> %y, <4 x i16> %b, i32 %a) { ; CHECK-LABEL: add_v4i16_v4i32_acc_zext: ; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmovlb.u16 q2, q2 ; CHECK-NEXT: vmovlb.u16 q1, q1 ; CHECK-NEXT: vmovlb.u16 q0, q0 -; CHECK-NEXT: vmovlb.u16 q2, q2 ; CHECK-NEXT: vpt.i32 eq, q2, zr ; CHECK-NEXT: vmlavat.u32 r0, q0, q1 ; CHECK-NEXT: bx lr @@ -1878,9 +1878,9 @@ entry: define arm_aapcs_vfpcc i32 @add_v4i16_v4i32_acc_sext(<4 x i16> %x, <4 x i16> %y, <4 x i16> %b, i32 %a) { ; CHECK-LABEL: add_v4i16_v4i32_acc_sext: ; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmovlb.u16 q2, q2 ; CHECK-NEXT: vmovlb.s16 q1, q1 ; CHECK-NEXT: vmovlb.s16 q0, q0 -; CHECK-NEXT: vmovlb.u16 q2, q2 ; CHECK-NEXT: vpt.i32 eq, q2, zr ; CHECK-NEXT: vmlavat.u32 r0, q0, q1 ; CHECK-NEXT: bx lr @@ -2187,9 +2187,9 @@ define arm_aapcs_vfpcc i32 @add_v4i8_v4i32_acc_zext(<4 x i8> %x, <4 x i8> %y, <4 ; CHECK-LABEL: add_v4i8_v4i32_acc_zext: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vmov.i32 q3, #0xff +; CHECK-NEXT: vand q2, q2, q3 ; CHECK-NEXT: vand q1, q1, q3 ; CHECK-NEXT: vand q0, q0, q3 -; CHECK-NEXT: vand q2, q2, q3 ; CHECK-NEXT: vpt.i32 eq, q2, zr ; CHECK-NEXT: vmlavat.u32 r0, q0, q1 ; CHECK-NEXT: bx lr @@ -2207,11 +2207,11 @@ entry: define arm_aapcs_vfpcc i32 @add_v4i8_v4i32_acc_sext(<4 x i8> %x, <4 x i8> %y, <4 x i8> %b, i32 %a) { ; CHECK-LABEL: add_v4i8_v4i32_acc_sext: ; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmov.i32 q3, #0xff ; CHECK-NEXT: vmovlb.s8 q1, q1 ; CHECK-NEXT: vmovlb.s8 q0, q0 -; CHECK-NEXT: vmov.i32 q3, #0xff -; CHECK-NEXT: vmovlb.s16 q1, q1 ; CHECK-NEXT: vand q2, q2, q3 +; CHECK-NEXT: vmovlb.s16 q1, q1 ; CHECK-NEXT: vmovlb.s16 q0, q0 ; CHECK-NEXT: vpt.i32 eq, q2, zr ; CHECK-NEXT: vmlavat.u32 r0, q0, q1 @@ -2266,9 +2266,9 @@ entry: define arm_aapcs_vfpcc zeroext i16 @add_v8i8_v8i16_acc_zext(<8 x i8> %x, <8 x i8> %y, <8 x i8> %b, i16 %a) { ; CHECK-LABEL: add_v8i8_v8i16_acc_zext: ; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmovlb.u8 q2, q2 ; CHECK-NEXT: vmovlb.u8 q1, q1 ; CHECK-NEXT: vmovlb.u8 q0, q0 -; CHECK-NEXT: vmovlb.u8 q2, q2 ; CHECK-NEXT: vpt.i16 eq, q2, zr ; CHECK-NEXT: vmlavat.u16 r0, q0, q1 ; CHECK-NEXT: uxth r0, r0 @@ -2287,9 +2287,9 @@ entry: define arm_aapcs_vfpcc signext i16 @add_v8i8_v8i16_acc_sext(<8 x i8> %x, <8 x i8> %y, <8 x i8> %b, i16 %a) { ; CHECK-LABEL: add_v8i8_v8i16_acc_sext: ; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmovlb.u8 q2, q2 ; CHECK-NEXT: vmovlb.s8 q1, q1 ; CHECK-NEXT: vmovlb.s8 q0, q0 -; CHECK-NEXT: vmovlb.u8 q2, q2 ; CHECK-NEXT: vpt.i16 eq, q2, zr ; CHECK-NEXT: vmlavat.u16 r0, q0, q1 ; CHECK-NEXT: sxth r0, r0 diff --git a/llvm/test/CodeGen/Thumb2/mve-vmaxnma-tailpred.ll b/llvm/test/CodeGen/Thumb2/mve-vmaxnma-tailpred.ll index 2473840169748..8882108a0564d 100644 --- a/llvm/test/CodeGen/Thumb2/mve-vmaxnma-tailpred.ll +++ b/llvm/test/CodeGen/Thumb2/mve-vmaxnma-tailpred.ll @@ -117,8 +117,8 @@ define half @maxf16(half* noalias nocapture readonly %s1, half* noalias nocaptur ; CHECK-NEXT: .LBB2_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldrh.u16 q0, [r0], #16 -; CHECK-NEXT: vabs.f16 q0, q0 ; CHECK-NEXT: vldrh.u16 q1, [r1], #16 +; CHECK-NEXT: vabs.f16 q0, q0 ; CHECK-NEXT: vmaxnm.f16 q0, q0, q1 ; CHECK-NEXT: vstrh.16 q0, [r2], #16 ; CHECK-NEXT: letp lr, .LBB2_2 diff --git a/llvm/test/CodeGen/Thumb2/mve-vmovlloop.ll b/llvm/test/CodeGen/Thumb2/mve-vmovlloop.ll index 5ddbb3ba5dd00..311e648d8433e 100644 --- a/llvm/test/CodeGen/Thumb2/mve-vmovlloop.ll +++ b/llvm/test/CodeGen/Thumb2/mve-vmovlloop.ll @@ -166,10 +166,10 @@ define void @sunken_vmovl(i8* noalias %pTarget, i16 signext %iTargetStride, i8* ; CHECK-NEXT: vsub.i16 q3, q0, q1 ; CHECK-NEXT: vmovlb.u8 q2, q2 ; CHECK-NEXT: vmul.i16 q3, q2, q3 +; CHECK-NEXT: vldrb.u16 q2, [r0], #8 ; CHECK-NEXT: vmla.u16 q3, q1, r3 -; CHECK-NEXT: vshr.u16 q3, q3, #8 ; CHECK-NEXT: vldrb.u16 q1, [r2], #8 -; CHECK-NEXT: vldrb.u16 q2, [r0], #8 +; CHECK-NEXT: vshr.u16 q3, q3, #8 ; CHECK-NEXT: vstrb.16 q3, [r0, #-16] ; CHECK-NEXT: letp lr, .LBB3_1 ; CHECK-NEXT: @ %bb.2: @ %do.end diff --git a/llvm/test/CodeGen/Thumb2/mve-vselect-constants.ll b/llvm/test/CodeGen/Thumb2/mve-vselect-constants.ll new file mode 100644 index 0000000000000..b425d32efb6c2 --- /dev/null +++ b/llvm/test/CodeGen/Thumb2/mve-vselect-constants.ll @@ -0,0 +1,287 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp -verify-machineinstrs %s -o - | FileCheck %s + +define arm_aapcs_vfpcc <4 x i32> @cmp_sel_C1_or_C2_vec(<4 x i32> %x, <4 x i32> %y) { +; CHECK-LABEL: cmp_sel_C1_or_C2_vec: +; CHECK: @ %bb.0: +; CHECK-NEXT: adr r0, .LCPI0_0 +; CHECK-NEXT: adr r1, .LCPI0_1 +; CHECK-NEXT: vcmp.i32 eq, q0, q1 +; CHECK-NEXT: vldrw.u32 q0, [r1] +; CHECK-NEXT: vldrw.u32 q1, [r0] +; CHECK-NEXT: vpsel q0, q1, q0 +; CHECK-NEXT: bx lr +; CHECK-NEXT: .p2align 4 +; CHECK-NEXT: @ %bb.1: +; CHECK-NEXT: .LCPI0_0: +; CHECK-NEXT: .long 3000 @ 0xbb8 +; CHECK-NEXT: .long 1 @ 0x1 +; CHECK-NEXT: .long 4294967295 @ 0xffffffff +; CHECK-NEXT: .long 0 @ 0x0 +; CHECK-NEXT: .LCPI0_1: +; CHECK-NEXT: .long 42 @ 0x2a +; CHECK-NEXT: .long 0 @ 0x0 +; CHECK-NEXT: .long 4294967294 @ 0xfffffffe +; CHECK-NEXT: .long 4294967295 @ 0xffffffff + %cond = icmp eq <4 x i32> %x, %y + %add = select <4 x i1> %cond, <4 x i32> , <4 x i32> + ret <4 x i32> %add +} + +define arm_aapcs_vfpcc <4 x i32> @cmp_sel_Cplus1_or_C_vec(<4 x i32> %x, <4 x i32> %y) { +; CHECK-LABEL: cmp_sel_Cplus1_or_C_vec: +; CHECK: @ %bb.0: +; CHECK-NEXT: adr r0, .LCPI1_0 +; CHECK-NEXT: adr r1, .LCPI1_1 +; CHECK-NEXT: vcmp.i32 eq, q0, q1 +; CHECK-NEXT: vldrw.u32 q0, [r1] +; CHECK-NEXT: vldrw.u32 q1, [r0] +; CHECK-NEXT: vpsel q0, q1, q0 +; CHECK-NEXT: bx lr +; CHECK-NEXT: .p2align 4 +; CHECK-NEXT: @ %bb.1: +; CHECK-NEXT: .LCPI1_0: +; CHECK-NEXT: .long 43 @ 0x2b +; CHECK-NEXT: .long 1 @ 0x1 +; CHECK-NEXT: .long 4294967295 @ 0xffffffff +; CHECK-NEXT: .long 0 @ 0x0 +; CHECK-NEXT: .LCPI1_1: +; CHECK-NEXT: .long 42 @ 0x2a +; CHECK-NEXT: .long 0 @ 0x0 +; CHECK-NEXT: .long 4294967294 @ 0xfffffffe +; CHECK-NEXT: .long 4294967295 @ 0xffffffff + %cond = icmp eq <4 x i32> %x, %y + %add = select <4 x i1> %cond, <4 x i32> , <4 x i32> + ret <4 x i32> %add +} + +define arm_aapcs_vfpcc <4 x i32> @cmp_sel_Cminus1_or_C_vec(<4 x i32> %x, <4 x i32> %y) { +; CHECK-LABEL: cmp_sel_Cminus1_or_C_vec: +; CHECK: @ %bb.0: +; CHECK-NEXT: adr r0, .LCPI2_0 +; CHECK-NEXT: adr r1, .LCPI2_1 +; CHECK-NEXT: vcmp.i32 eq, q0, q1 +; CHECK-NEXT: vldrw.u32 q0, [r1] +; CHECK-NEXT: vldrw.u32 q1, [r0] +; CHECK-NEXT: vpsel q0, q1, q0 +; CHECK-NEXT: bx lr +; CHECK-NEXT: .p2align 4 +; CHECK-NEXT: @ %bb.1: +; CHECK-NEXT: .LCPI2_0: +; CHECK-NEXT: .long 43 @ 0x2b +; CHECK-NEXT: .long 1 @ 0x1 +; CHECK-NEXT: .long 4294967295 @ 0xffffffff +; CHECK-NEXT: .long 0 @ 0x0 +; CHECK-NEXT: .LCPI2_1: +; CHECK-NEXT: .long 44 @ 0x2c +; CHECK-NEXT: .long 2 @ 0x2 +; CHECK-NEXT: .long 0 @ 0x0 +; CHECK-NEXT: .long 1 @ 0x1 + %cond = icmp eq <4 x i32> %x, %y + %add = select <4 x i1> %cond, <4 x i32> , <4 x i32> + ret <4 x i32> %add +} + +define arm_aapcs_vfpcc <4 x i32> @cmp_sel_minus1_or_0_vec(<4 x i32> %x, <4 x i32> %y) { +; CHECK-LABEL: cmp_sel_minus1_or_0_vec: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmov.i32 q2, #0x0 +; CHECK-NEXT: vmov.i8 q3, #0xff +; CHECK-NEXT: vcmp.i32 eq, q0, q1 +; CHECK-NEXT: vpsel q0, q3, q2 +; CHECK-NEXT: bx lr + %cond = icmp eq <4 x i32> %x, %y + %add = select <4 x i1> %cond, <4 x i32> , <4 x i32> + ret <4 x i32> %add +} + +define arm_aapcs_vfpcc <4 x i32> @cmp_sel_0_or_minus1_vec(<4 x i32> %x, <4 x i32> %y) { +; CHECK-LABEL: cmp_sel_0_or_minus1_vec: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmov.i8 q2, #0xff +; CHECK-NEXT: vmov.i32 q3, #0x0 +; CHECK-NEXT: vcmp.i32 eq, q0, q1 +; CHECK-NEXT: vpsel q0, q3, q2 +; CHECK-NEXT: bx lr + %cond = icmp eq <4 x i32> %x, %y + %add = select <4 x i1> %cond, <4 x i32> , <4 x i32> + ret <4 x i32> %add +} + +define arm_aapcs_vfpcc <4 x i32> @cmp_sel_1_or_0_vec(<4 x i32> %x, <4 x i32> %y) { +; CHECK-LABEL: cmp_sel_1_or_0_vec: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmov.i32 q2, #0x0 +; CHECK-NEXT: vmov.i32 q3, #0x1 +; CHECK-NEXT: vcmp.i32 eq, q0, q1 +; CHECK-NEXT: vpsel q0, q3, q2 +; CHECK-NEXT: bx lr + %cond = icmp eq <4 x i32> %x, %y + %add = select <4 x i1> %cond, <4 x i32> , <4 x i32> + ret <4 x i32> %add +} + +define arm_aapcs_vfpcc <4 x i32> @cmp_sel_0_or_1_vec(<4 x i32> %x, <4 x i32> %y) { +; CHECK-LABEL: cmp_sel_0_or_1_vec: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmov.i32 q2, #0x1 +; CHECK-NEXT: vmov.i32 q3, #0x0 +; CHECK-NEXT: vcmp.i32 eq, q0, q1 +; CHECK-NEXT: vpsel q0, q3, q2 +; CHECK-NEXT: bx lr + %cond = icmp eq <4 x i32> %x, %y + %add = select <4 x i1> %cond, <4 x i32> , <4 x i32> + ret <4 x i32> %add +} + +define arm_aapcs_vfpcc <16 x i8> @signbit_mask_v16i8(<16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: signbit_mask_v16i8: +; CHECK: @ %bb.0: +; CHECK-NEXT: vshr.s8 q0, q0, #7 +; CHECK-NEXT: vand q0, q0, q1 +; CHECK-NEXT: bx lr + %cond = icmp slt <16 x i8> %a, zeroinitializer + %r = select <16 x i1> %cond, <16 x i8> %b, <16 x i8> zeroinitializer + ret <16 x i8> %r +} + +define arm_aapcs_vfpcc <8 x i16> @signbit_mask_v8i16(<8 x i16> %a, <8 x i16> %b) { +; CHECK-LABEL: signbit_mask_v8i16: +; CHECK: @ %bb.0: +; CHECK-NEXT: vshr.s16 q0, q0, #15 +; CHECK-NEXT: vand q0, q0, q1 +; CHECK-NEXT: bx lr + %cond = icmp slt <8 x i16> %a, zeroinitializer + %r = select <8 x i1> %cond, <8 x i16> %b, <8 x i16> zeroinitializer + ret <8 x i16> %r +} + +define arm_aapcs_vfpcc <4 x i32> @signbit_mask_v4i32(<4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: signbit_mask_v4i32: +; CHECK: @ %bb.0: +; CHECK-NEXT: vshr.s32 q0, q0, #31 +; CHECK-NEXT: vand q0, q0, q1 +; CHECK-NEXT: bx lr + %cond = icmp slt <4 x i32> %a, zeroinitializer + %r = select <4 x i1> %cond, <4 x i32> %b, <4 x i32> zeroinitializer + ret <4 x i32> %r +} + +define arm_aapcs_vfpcc <2 x i64> @signbit_mask_v2i64(<2 x i64> %a, <2 x i64> %b) { +; CHECK-LABEL: signbit_mask_v2i64: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmov r0, s3 +; CHECK-NEXT: vmov r1, s1 +; CHECK-NEXT: asrs r0, r0, #31 +; CHECK-NEXT: asrs r1, r1, #31 +; CHECK-NEXT: vmov q0[2], q0[0], r1, r0 +; CHECK-NEXT: vmov q0[3], q0[1], r1, r0 +; CHECK-NEXT: vand q0, q1, q0 +; CHECK-NEXT: bx lr + %cond = icmp slt <2 x i64> %a, zeroinitializer + %r = select <2 x i1> %cond, <2 x i64> %b, <2 x i64> zeroinitializer + ret <2 x i64> %r +} + +define arm_aapcs_vfpcc <16 x i8> @signbit_setmask_v16i8(<16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: signbit_setmask_v16i8: +; CHECK: @ %bb.0: +; CHECK-NEXT: vshr.s8 q0, q0, #7 +; CHECK-NEXT: vorr q0, q0, q1 +; CHECK-NEXT: bx lr + %cond = icmp slt <16 x i8> %a, zeroinitializer + %r = select <16 x i1> %cond, <16 x i8> , <16 x i8> %b + ret <16 x i8> %r +} + +define arm_aapcs_vfpcc <8 x i16> @signbit_setmask_v8i16(<8 x i16> %a, <8 x i16> %b) { +; CHECK-LABEL: signbit_setmask_v8i16: +; CHECK: @ %bb.0: +; CHECK-NEXT: vshr.s16 q0, q0, #15 +; CHECK-NEXT: vorr q0, q0, q1 +; CHECK-NEXT: bx lr + %cond = icmp slt <8 x i16> %a, zeroinitializer + %r = select <8 x i1> %cond, <8 x i16> , <8 x i16> %b + ret <8 x i16> %r +} + +define arm_aapcs_vfpcc <4 x i32> @signbit_setmask_v4i32(<4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: signbit_setmask_v4i32: +; CHECK: @ %bb.0: +; CHECK-NEXT: vshr.s32 q0, q0, #31 +; CHECK-NEXT: vorr q0, q0, q1 +; CHECK-NEXT: bx lr + %cond = icmp slt <4 x i32> %a, zeroinitializer + %r = select <4 x i1> %cond, <4 x i32> , <4 x i32> %b + ret <4 x i32> %r +} + +define arm_aapcs_vfpcc <2 x i64> @signbit_setmask_v2i64(<2 x i64> %a, <2 x i64> %b) { +; CHECK-LABEL: signbit_setmask_v2i64: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmov r0, s3 +; CHECK-NEXT: vmov r1, s1 +; CHECK-NEXT: asrs r0, r0, #31 +; CHECK-NEXT: asrs r1, r1, #31 +; CHECK-NEXT: vmov q0[2], q0[0], r1, r0 +; CHECK-NEXT: vmov q0[3], q0[1], r1, r0 +; CHECK-NEXT: vorr q0, q1, q0 +; CHECK-NEXT: bx lr + %cond = icmp slt <2 x i64> %a, zeroinitializer + %r = select <2 x i1> %cond, <2 x i64> , <2 x i64> %b + ret <2 x i64> %r +} +define arm_aapcs_vfpcc <16 x i8> @not_signbit_mask_v16i8(<16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: not_signbit_mask_v16i8: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmov.i8 q2, #0xff +; CHECK-NEXT: vmov.i32 q3, #0x0 +; CHECK-NEXT: vcmp.s8 gt, q0, q2 +; CHECK-NEXT: vpsel q0, q1, q3 +; CHECK-NEXT: bx lr + %cond = icmp sgt <16 x i8> %a, + %r = select <16 x i1> %cond, <16 x i8> %b, <16 x i8> zeroinitializer + ret <16 x i8> %r +} + +define arm_aapcs_vfpcc <8 x i16> @not_signbit_mask_v8i16(<8 x i16> %a, <8 x i16> %b) { +; CHECK-LABEL: not_signbit_mask_v8i16: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmov.i8 q2, #0xff +; CHECK-NEXT: vmov.i32 q3, #0x0 +; CHECK-NEXT: vcmp.s16 gt, q0, q2 +; CHECK-NEXT: vpsel q0, q1, q3 +; CHECK-NEXT: bx lr + %cond = icmp sgt <8 x i16> %a, + %r = select <8 x i1> %cond, <8 x i16> %b, <8 x i16> zeroinitializer + ret <8 x i16> %r +} + +define arm_aapcs_vfpcc <4 x i32> @not_signbit_mask_v4i32(<4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: not_signbit_mask_v4i32: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmov.i8 q2, #0xff +; CHECK-NEXT: vmov.i32 q3, #0x0 +; CHECK-NEXT: vcmp.s32 gt, q0, q2 +; CHECK-NEXT: vpsel q0, q1, q3 +; CHECK-NEXT: bx lr + %cond = icmp sgt <4 x i32> %a, + %r = select <4 x i1> %cond, <4 x i32> %b, <4 x i32> zeroinitializer + ret <4 x i32> %r +} + +define arm_aapcs_vfpcc <2 x i64> @not_signbit_mask_v2i64(<2 x i64> %a, <2 x i64> %b) { +; CHECK-LABEL: not_signbit_mask_v2i64: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmov r0, s3 +; CHECK-NEXT: vmov r1, s1 +; CHECK-NEXT: mvn.w r0, r0, asr #31 +; CHECK-NEXT: mvn.w r1, r1, asr #31 +; CHECK-NEXT: vmov q0[2], q0[0], r1, r0 +; CHECK-NEXT: vmov q0[3], q0[1], r1, r0 +; CHECK-NEXT: vand q0, q1, q0 +; CHECK-NEXT: bx lr + %cond = icmp sgt <2 x i64> %a, + %r = select <2 x i1> %cond, <2 x i64> %b, <2 x i64> zeroinitializer + ret <2 x i64> %r +} diff --git a/llvm/test/CodeGen/Thumb2/mve-zext-masked-load.ll b/llvm/test/CodeGen/Thumb2/mve-zext-masked-load.ll index 5e3546585e94b..65214ca40d82f 100644 --- a/llvm/test/CodeGen/Thumb2/mve-zext-masked-load.ll +++ b/llvm/test/CodeGen/Thumb2/mve-zext-masked-load.ll @@ -54,9 +54,9 @@ define arm_aapcs_vfpcc <4 x double> @foo_v4i32(<4 x i32>* nocapture readonly %pS ; CHECK-NEXT: push {r4, r5, r7, lr} ; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13} ; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13} +; CHECK-NEXT: vmov.i64 q5, #0xffffffff ; CHECK-NEXT: vpt.s32 lt, q0, zr ; CHECK-NEXT: vldrwt.u32 q4, [r0] -; CHECK-NEXT: vmov.i64 q5, #0xffffffff ; CHECK-NEXT: vmov.f32 s0, s16 ; CHECK-NEXT: vmov.f32 s2, s17 ; CHECK-NEXT: vand q6, q0, q5 diff --git a/llvm/test/CodeGen/X86/addrsig.ll b/llvm/test/CodeGen/X86/addrsig.ll index 957de7ec2a64c..f028e0a6b1905 100644 --- a/llvm/test/CodeGen/X86/addrsig.ll +++ b/llvm/test/CodeGen/X86/addrsig.ll @@ -6,9 +6,9 @@ ; CHECK: .addrsig ; CHECK: .addrsig_sym f1 -define void @f1() { - %f1 = bitcast void()* @f1 to i8* - %f2 = bitcast void()* @f2 to i8* +define void()* @f1() { + %f1 = bitcast void()* ()* @f1 to i8* + %f2 = bitcast void()* ()* @f2 to i8* %f3 = bitcast void()* @f3 to i8* %g1 = bitcast i32* @g1 to i8* %g2 = bitcast i32* @g2 to i8* @@ -34,7 +34,7 @@ declare void @metadata_f1() declare void @metadata_f2() ; CHECK-NOT: .addrsig_sym f2 -define internal void @f2() local_unnamed_addr { +define internal void()* @f2() local_unnamed_addr { unreachable } @@ -63,9 +63,9 @@ declare void @f3() unnamed_addr @a2 = internal local_unnamed_addr alias i32, i32* @g2 ; CHECK: .addrsig_sym i1 -@i1 = ifunc void(), void()* @f1 +@i1 = ifunc void(), void()* ()* @f1 ; CHECK-NOT: .addrsig_sym i2 -@i2 = internal local_unnamed_addr ifunc void(), void()* @f2 +@i2 = internal local_unnamed_addr ifunc void(), void()* ()* @f2 declare void @llvm.dbg.value(metadata, metadata, metadata) diff --git a/llvm/test/CodeGen/X86/avx512-logic.ll b/llvm/test/CodeGen/X86/avx512-logic.ll index ac35c5639cc85..4617f3f48b673 100644 --- a/llvm/test/CodeGen/X86/avx512-logic.ll +++ b/llvm/test/CodeGen/X86/avx512-logic.ll @@ -907,20 +907,12 @@ define <8 x i64> @ternlog_xor_and_mask(<8 x i64> %x, <8 x i64> %y) { } define <16 x i32> @ternlog_maskz_or_and_mask(<16 x i32> %x, <16 x i32> %y, <16 x i32> %mask) { -; KNL-LABEL: ternlog_maskz_or_and_mask: -; KNL: ## %bb.0: -; KNL-NEXT: vpxor %xmm3, %xmm3, %xmm3 -; KNL-NEXT: vpcmpgtd %zmm2, %zmm3, %k1 -; KNL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0 -; KNL-NEXT: vpord %zmm1, %zmm0, %zmm0 {%k1} {z} -; KNL-NEXT: retq -; -; SKX-LABEL: ternlog_maskz_or_and_mask: -; SKX: ## %bb.0: -; SKX-NEXT: vpmovd2m %zmm2, %k1 -; SKX-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0 -; SKX-NEXT: vorps %zmm1, %zmm0, %zmm0 {%k1} {z} -; SKX-NEXT: retq +; ALL-LABEL: ternlog_maskz_or_and_mask: +; ALL: ## %bb.0: +; ALL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm3 +; ALL-NEXT: vpsrad $31, %zmm2, %zmm0 +; ALL-NEXT: vpternlogd $224, %zmm1, %zmm3, %zmm0 +; ALL-NEXT: retq %m = icmp slt <16 x i32> %mask, zeroinitializer %a = and <16 x i32> %x, %b = or <16 x i32> %a, %y @@ -929,20 +921,12 @@ define <16 x i32> @ternlog_maskz_or_and_mask(<16 x i32> %x, <16 x i32> %y, <16 x } define <8 x i64> @ternlog_maskz_xor_and_mask(<8 x i64> %x, <8 x i64> %y, <8 x i64> %mask) { -; KNL-LABEL: ternlog_maskz_xor_and_mask: -; KNL: ## %bb.0: -; KNL-NEXT: vpxor %xmm3, %xmm3, %xmm3 -; KNL-NEXT: vpcmpgtq %zmm2, %zmm3, %k1 -; KNL-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0 -; KNL-NEXT: vpxorq %zmm1, %zmm0, %zmm0 {%k1} {z} -; KNL-NEXT: retq -; -; SKX-LABEL: ternlog_maskz_xor_and_mask: -; SKX: ## %bb.0: -; SKX-NEXT: vpmovq2m %zmm2, %k1 -; SKX-NEXT: vandpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0 -; SKX-NEXT: vxorpd %zmm1, %zmm0, %zmm0 {%k1} {z} -; SKX-NEXT: retq +; ALL-LABEL: ternlog_maskz_xor_and_mask: +; ALL: ## %bb.0: +; ALL-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm3 +; ALL-NEXT: vpsraq $63, %zmm2, %zmm0 +; ALL-NEXT: vpternlogq $96, %zmm1, %zmm3, %zmm0 +; ALL-NEXT: retq %m = icmp slt <8 x i64> %mask, zeroinitializer %a = and <8 x i64> %x, %b = xor <8 x i64> %a, %y diff --git a/llvm/test/CodeGen/X86/avx512vl-logic.ll b/llvm/test/CodeGen/X86/avx512vl-logic.ll index 26c30e950d0d2..0d32ddc147fc0 100644 --- a/llvm/test/CodeGen/X86/avx512vl-logic.ll +++ b/llvm/test/CodeGen/X86/avx512vl-logic.ll @@ -1077,20 +1077,12 @@ define <4 x i64> @ternlog_xor_and_mask_ymm(<4 x i64> %x, <4 x i64> %y) { } define <4 x i32> @ternlog_maskz_or_and_mask(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z, <4 x i32> %mask) { -; KNL-LABEL: ternlog_maskz_or_and_mask: -; KNL: ## %bb.0: -; KNL-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; KNL-NEXT: vpcmpgtd %xmm3, %xmm2, %k1 -; KNL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; KNL-NEXT: vpord %xmm1, %xmm0, %xmm0 {%k1} {z} -; KNL-NEXT: retq -; -; SKX-LABEL: ternlog_maskz_or_and_mask: -; SKX: ## %bb.0: -; SKX-NEXT: vpmovd2m %xmm3, %k1 -; SKX-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; SKX-NEXT: vorps %xmm1, %xmm0, %xmm0 {%k1} {z} -; SKX-NEXT: retq +; CHECK-LABEL: ternlog_maskz_or_and_mask: +; CHECK: ## %bb.0: +; CHECK-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm2 +; CHECK-NEXT: vpsrad $31, %xmm3, %xmm0 +; CHECK-NEXT: vpternlogd $224, %xmm1, %xmm2, %xmm0 +; CHECK-NEXT: retq %m = icmp slt <4 x i32> %mask, zeroinitializer %a = and <4 x i32> %x, %b = or <4 x i32> %a, %y @@ -1099,20 +1091,12 @@ define <4 x i32> @ternlog_maskz_or_and_mask(<4 x i32> %x, <4 x i32> %y, <4 x i32 } define <8 x i32> @ternlog_maskz_or_and_mask_ymm(<8 x i32> %x, <8 x i32> %y, <8 x i32> %mask) { -; KNL-LABEL: ternlog_maskz_or_and_mask_ymm: -; KNL: ## %bb.0: -; KNL-NEXT: vpxor %xmm3, %xmm3, %xmm3 -; KNL-NEXT: vpcmpgtd %ymm2, %ymm3, %k1 -; KNL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 -; KNL-NEXT: vpord %ymm1, %ymm0, %ymm0 {%k1} {z} -; KNL-NEXT: retq -; -; SKX-LABEL: ternlog_maskz_or_and_mask_ymm: -; SKX: ## %bb.0: -; SKX-NEXT: vpmovd2m %ymm2, %k1 -; SKX-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 -; SKX-NEXT: vorps %ymm1, %ymm0, %ymm0 {%k1} {z} -; SKX-NEXT: retq +; CHECK-LABEL: ternlog_maskz_or_and_mask_ymm: +; CHECK: ## %bb.0: +; CHECK-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm3 +; CHECK-NEXT: vpsrad $31, %ymm2, %ymm0 +; CHECK-NEXT: vpternlogd $224, %ymm1, %ymm3, %ymm0 +; CHECK-NEXT: retq %m = icmp slt <8 x i32> %mask, zeroinitializer %a = and <8 x i32> %x, %b = or <8 x i32> %a, %y @@ -1121,20 +1105,12 @@ define <8 x i32> @ternlog_maskz_or_and_mask_ymm(<8 x i32> %x, <8 x i32> %y, <8 x } define <2 x i64> @ternlog_maskz_xor_and_mask(<2 x i64> %x, <2 x i64> %y, <2 x i64> %mask) { -; KNL-LABEL: ternlog_maskz_xor_and_mask: -; KNL: ## %bb.0: -; KNL-NEXT: vpxor %xmm3, %xmm3, %xmm3 -; KNL-NEXT: vpcmpgtq %xmm2, %xmm3, %k1 -; KNL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; KNL-NEXT: vpxorq %xmm1, %xmm0, %xmm0 {%k1} {z} -; KNL-NEXT: retq -; -; SKX-LABEL: ternlog_maskz_xor_and_mask: -; SKX: ## %bb.0: -; SKX-NEXT: vpmovq2m %xmm2, %k1 -; SKX-NEXT: vandpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; SKX-NEXT: vxorpd %xmm1, %xmm0, %xmm0 {%k1} {z} -; SKX-NEXT: retq +; CHECK-LABEL: ternlog_maskz_xor_and_mask: +; CHECK: ## %bb.0: +; CHECK-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm3 +; CHECK-NEXT: vpsraq $63, %xmm2, %xmm0 +; CHECK-NEXT: vpternlogq $96, %xmm1, %xmm3, %xmm0 +; CHECK-NEXT: retq %m = icmp slt <2 x i64> %mask, zeroinitializer %a = and <2 x i64> %x, %b = xor <2 x i64> %a, %y @@ -1143,20 +1119,12 @@ define <2 x i64> @ternlog_maskz_xor_and_mask(<2 x i64> %x, <2 x i64> %y, <2 x i6 } define <4 x i64> @ternlog_maskz_xor_and_mask_ymm(<4 x i64> %x, <4 x i64> %y, <4 x i64> %mask) { -; KNL-LABEL: ternlog_maskz_xor_and_mask_ymm: -; KNL: ## %bb.0: -; KNL-NEXT: vpxor %xmm3, %xmm3, %xmm3 -; KNL-NEXT: vpcmpgtq %ymm2, %ymm3, %k1 -; KNL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 -; KNL-NEXT: vpxorq %ymm1, %ymm0, %ymm0 {%k1} {z} -; KNL-NEXT: retq -; -; SKX-LABEL: ternlog_maskz_xor_and_mask_ymm: -; SKX: ## %bb.0: -; SKX-NEXT: vpmovq2m %ymm2, %k1 -; SKX-NEXT: vandpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 -; SKX-NEXT: vxorpd %ymm1, %ymm0, %ymm0 {%k1} {z} -; SKX-NEXT: retq +; CHECK-LABEL: ternlog_maskz_xor_and_mask_ymm: +; CHECK: ## %bb.0: +; CHECK-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm3 +; CHECK-NEXT: vpsraq $63, %ymm2, %ymm0 +; CHECK-NEXT: vpternlogq $96, %ymm1, %ymm3, %ymm0 +; CHECK-NEXT: retq %m = icmp slt <4 x i64> %mask, zeroinitializer %a = and <4 x i64> %x, %b = xor <4 x i64> %a, %y diff --git a/llvm/test/CodeGen/X86/basic-block-sections-mir-parse.mir b/llvm/test/CodeGen/X86/basic-block-sections-mir-parse.mir index 3006647f6bae5..ab284955e4955 100644 --- a/llvm/test/CodeGen/X86/basic-block-sections-mir-parse.mir +++ b/llvm/test/CodeGen/X86/basic-block-sections-mir-parse.mir @@ -118,7 +118,7 @@ body: | renamable $eax = MOV32rm $rbp, 1, $noreg, -8, $noreg :: (load (s32) from %ir.2) $rbp = frame-destroy POP64r implicit-def $rsp, implicit $rsp CFI_INSTRUCTION def_cfa $rsp, 8 - RETQ implicit $eax + RET64 implicit $eax ... diff --git a/llvm/test/CodeGen/X86/block-placement.mir b/llvm/test/CodeGen/X86/block-placement.mir index 315669403c51f..81d58effbdc2c 100644 --- a/llvm/test/CodeGen/X86/block-placement.mir +++ b/llvm/test/CodeGen/X86/block-placement.mir @@ -73,7 +73,7 @@ body: | liveins: $rdi, $eax $rcx = POP64r implicit-def $rsp, implicit $rsp - RETQ $eax + RET64 $eax bb.2.null: liveins: $rdi diff --git a/llvm/test/CodeGen/X86/bug47278.mir b/llvm/test/CodeGen/X86/bug47278.mir index c7387c915972c..70390cbb04361 100644 --- a/llvm/test/CodeGen/X86/bug47278.mir +++ b/llvm/test/CodeGen/X86/bug47278.mir @@ -24,7 +24,7 @@ body: | ; CHECK: $cl = IMPLICIT_DEF ; CHECK: renamable $eax = COPY renamable $edx ; CHECK: dead renamable $eax = SHRD32rrCL renamable $eax, killed renamable $edx, implicit-def dead $eflags, implicit killed $cl - ; CHECK: RETL + ; CHECK: RET32 %0:gr32 = IMPLICIT_DEF %1:gr32 = MOVZX32rm8 %0, 1, $noreg, 0, $noreg :: (load (s8) from `i168* undef` + 20, align 4, basealign 16) %2:gr32 = MOV32rm %0, 1, $noreg, 0, $noreg :: (load (s32) from `i168* undef` + 12, basealign 16) @@ -40,6 +40,6 @@ body: | $cl = IMPLICIT_DEF %8:gr32 = COPY %1 %8:gr32 = SHRD32rrCL %8, %1, implicit-def dead $eflags, implicit $cl - RETL + RET32 ... diff --git a/llvm/test/CodeGen/X86/call-structfp.ll b/llvm/test/CodeGen/X86/call-structfp.ll new file mode 100644 index 0000000000000..7e6c2bbc2a076 --- /dev/null +++ b/llvm/test/CodeGen/X86/call-structfp.ll @@ -0,0 +1,19 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnux32 | FileCheck %s +declare { i64, void ()* } @f() +define void @pr52357() { +; CHECK-LABEL: pr52357: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: callq f@PLT +; CHECK-NEXT: movl %edx, %eax +; CHECK-NEXT: popq %rcx +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: jmpq *%rax # TAILCALL +entry: + %0 = tail call { i64, void ()* } @f() + %1 = extractvalue { i64, void ()* } %0, 1 + tail call void %1() + ret void +} diff --git a/llvm/test/CodeGen/X86/combine-concatvectors.ll b/llvm/test/CodeGen/X86/combine-concatvectors.ll index 3dffeaebfbcb4..c2f8827b9af7e 100644 --- a/llvm/test/CodeGen/X86/combine-concatvectors.ll +++ b/llvm/test/CodeGen/X86/combine-concatvectors.ll @@ -85,3 +85,38 @@ ifmerge.1298: ; preds = %loop.4942 store <2 x float> , <2 x float>* bitcast (i8* getelementptr inbounds ([49216 x i8], [49216 x i8]* @qa_, i64 0, i64 47372) to <2 x float>*), align 4 ret void } + +define <4 x float> @concat_of_broadcast_v4f32_v8f32(<8 x float>* %a0, <8 x float>* %a1, <8 x float>* %a2) { +; AVX1-LABEL: concat_of_broadcast_v4f32_v8f32: +; AVX1: # %bb.0: +; AVX1-NEXT: vmovaps (%rdi), %ymm0 +; AVX1-NEXT: vmovaps (%rsi), %ymm1 +; AVX1-NEXT: vmovaps (%rdx), %ymm2 +; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[0,0,0,0] +; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm1, %ymm1 +; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5],ymm1[6,7] +; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm2[0,1,2,3],ymm0[4],ymm2[5,6],ymm0[7] +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 +; AVX1-NEXT: vshufps {{.*#+}} xmm1 = xmm2[3,0],xmm0[0,0] +; AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[2,3],xmm1[2,0] +; AVX1-NEXT: vzeroupper +; AVX1-NEXT: retq +; +; AVX2-LABEL: concat_of_broadcast_v4f32_v8f32: +; AVX2: # %bb.0: +; AVX2-NEXT: vmovaps (%rdi), %ymm0 +; AVX2-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] +; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,1,2,0] +; AVX2-NEXT: vmovaps {{.*#+}} xmm1 = [6,7,4,3] +; AVX2-NEXT: vblendps {{.*#+}} ymm0 = mem[0,1,2,3],ymm0[4],mem[5,6],ymm0[7] +; AVX2-NEXT: vpermps %ymm0, %ymm1, %ymm0 +; AVX2-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 +; AVX2-NEXT: vzeroupper +; AVX2-NEXT: retq + %ld0 = load volatile <8 x float>, <8 x float>* %a0 + %ld1 = load volatile <8 x float>, <8 x float>* %a1 + %ld2 = load volatile <8 x float>, <8 x float>* %a2 + %shuffle = shufflevector <8 x float> %ld0, <8 x float> %ld1, <8 x i32> + %shuffle1 = shufflevector <8 x float> %ld2, <8 x float> %shuffle, <4 x i32> + ret <4 x float> %shuffle1 +} diff --git a/llvm/test/CodeGen/X86/dbg-changes-codegen-branch-folding2.mir b/llvm/test/CodeGen/X86/dbg-changes-codegen-branch-folding2.mir index e0412817a04da..84f205ad77458 100644 --- a/llvm/test/CodeGen/X86/dbg-changes-codegen-branch-folding2.mir +++ b/llvm/test/CodeGen/X86/dbg-changes-codegen-branch-folding2.mir @@ -109,7 +109,7 @@ body: | bb.3.for.cond.cleanup: liveins: $rdi, $xmm3, $xmm4, $xmm5, $xmm6, $xmm7, $xmm9, $xmm13, $xmm14 - RETQ + RET64 bb.4.for.body: successors: %bb.1, %bb.5 diff --git a/llvm/test/CodeGen/X86/dbg-distringtype-uint.ll b/llvm/test/CodeGen/X86/dbg-distringtype-uint.ll index 48aa3d575717f..0e79553842438 100644 --- a/llvm/test/CodeGen/X86/dbg-distringtype-uint.ll +++ b/llvm/test/CodeGen/X86/dbg-distringtype-uint.ll @@ -69,7 +69,7 @@ attributes #3 = { nofree nosync nounwind readnone speculatable willreturn } !0 = !DIGlobalVariableExpression(var: !1, expr: !DIExpression()) !1 = distinct !DIGlobalVariable(name: "elemnt", linkageName: "semiempirical_corrections_mp_esym_$ELEMNT", scope: !2, file: !3, line: 17, type: !16, isLocal: true, isDefinition: true) !2 = distinct !DISubprogram(name: "esym", linkageName: "semiempirical_corrections_mp_esym_", scope: !4, file: !3, line: 15, type: !5, scopeLine: 15, spFlags: DISPFlagDefinition, unit: !8, retainedNodes: !11) -!3 = !DIFile(filename: "se6.f90", directory: "/iusers/cchen15/examples/tests/jr30349/gamess-dga-master/object") +!3 = !DIFile(filename: "se6.f90", directory: "/iusers/cchen15/examples/tests/jr30349/gamess-dga-main/object") !4 = !DIModule(scope: null, name: "semiempirical_corrections", file: !3, line: 1) !5 = !DISubroutineType(types: !6) !6 = !{!7} diff --git a/llvm/test/CodeGen/X86/dbg-value-superreg-copy.mir b/llvm/test/CodeGen/X86/dbg-value-superreg-copy.mir index 0a8af06b9abd1..3b41f412a697e 100644 --- a/llvm/test/CodeGen/X86/dbg-value-superreg-copy.mir +++ b/llvm/test/CodeGen/X86/dbg-value-superreg-copy.mir @@ -44,7 +44,7 @@ body: | bb.2: $ax = COPY %1 $dx = COPY %2 - RETQ killed $ax, killed $dx + RET64 killed $ax, killed $dx ... # This test case was created as a reproducer for a bug when we got incorrect diff --git a/llvm/test/CodeGen/X86/dso_local_equivalent.ll b/llvm/test/CodeGen/X86/dso_local_equivalent.ll index e4a5fdfa9046f..9a8f163e0689d 100644 --- a/llvm/test/CodeGen/X86/dso_local_equivalent.ll +++ b/llvm/test/CodeGen/X86/dso_local_equivalent.ll @@ -74,12 +74,12 @@ define void @call_dso_local_alias_func() { ret void } -@ifunc_func = ifunc void (), i64 ()* @resolver -@dso_local_ifunc_func = dso_local ifunc void (), i64 ()* @resolver +@ifunc_func = ifunc void (), void ()* ()* @resolver +@dso_local_ifunc_func = dso_local ifunc void (), void ()* ()* @resolver -define internal i64 @resolver() { +define internal void ()* @resolver() { entry: - ret i64 0 + ret void ()* null } ; If an ifunc is not dso_local already, then we should still emit a stub for it diff --git a/llvm/test/CodeGen/X86/evex-to-vex-compress.mir b/llvm/test/CodeGen/X86/evex-to-vex-compress.mir index fbfb58d75b71e..e64fa2c21490e 100755 --- a/llvm/test/CodeGen/X86/evex-to-vex-compress.mir +++ b/llvm/test/CodeGen/X86/evex-to-vex-compress.mir @@ -914,7 +914,7 @@ body: | ; CHECK: $ymm0 = VPERM2I128rr $ymm0, $ymm1, 32 $ymm0 = VSHUFI64X2Z256rri $ymm0, $ymm1, 228 - RETQ + RET64 ... --- # CHECK-LABEL: name: evex_z128_to_vex_test @@ -1800,7 +1800,7 @@ body: | ; CHECK: $xmm0 = VROUNDPSr $xmm0, 15, implicit $mxcsr $xmm0 = VRNDSCALEPSZ128rri $xmm0, 15, implicit $mxcsr - RETQ + RET64 ... --- # CHECK-LABEL: name: evex_scalar_to_vex_test @@ -2371,7 +2371,7 @@ body: | ; CHECK: $xmm0 = VROUNDSSr_Int $xmm0, $xmm1, 15, implicit $mxcsr $xmm0 = VRNDSCALESSZr_Int $xmm0, $xmm1, 15, implicit $mxcsr - RETQ + RET64 ... --- # CHECK-LABEL: name: evex_z256_to_evex_test @@ -3285,7 +3285,7 @@ body: | ; CHECK: $ymm16 = VSHUFI64X2Z256rri $ymm16, $ymm1, 228 $ymm16 = VSHUFI64X2Z256rri $ymm16, $ymm1, 228 - RETQ + RET64 ... --- # CHECK-LABEL: name: evex_z128_to_evex_test @@ -4179,7 +4179,7 @@ body: | ; CHECK: $xmm0 = VRNDSCALEPSZ128rri $xmm0, 31, implicit $mxcsr $xmm0 = VRNDSCALEPSZ128rri $xmm0, 31, implicit $mxcsr - RETQ + RET64 ... --- # CHECK-LABEL: name: evex_scalar_to_evex_test @@ -4761,5 +4761,5 @@ body: | ; CHECK: $xmm0 = VRNDSCALESSZr_Int $xmm0, $xmm1, 31, implicit $mxcsr $xmm0 = VRNDSCALESSZr_Int $xmm0, $xmm1, 31, implicit $mxcsr - RETQ + RET64 ... diff --git a/llvm/test/CodeGen/X86/expand-call-rvmarker.mir b/llvm/test/CodeGen/X86/expand-call-rvmarker.mir index e83090e22f7e4..7124d9934d9ca 100644 --- a/llvm/test/CodeGen/X86/expand-call-rvmarker.mir +++ b/llvm/test/CodeGen/X86/expand-call-rvmarker.mir @@ -34,7 +34,7 @@ # CHECK-NEXT: $rdi = MOV64rr $rax # CHECK-NEXT: CALL64pcrel32 @objc_retainAutoreleasedReturnValue, csr_64, implicit $rsp, implicit $ssp, implicit-def $rax # CHECK-NEXT: $rcx = frame-destroy POP64r implicit-def $rsp, implicit $rsp -# CHECK-NEXT: RETQ +# CHECK-NEXT: RET64 # name: test_objc_retainAutoreleaseReturnedValue @@ -66,7 +66,7 @@ body: | # CHECK-NEXT: $rdi = MOV64rr $rax # CHECK-NEXT: CALL64pcrel32 @objc_unsafeClaimAutoreleasedReturnValue, csr_64, implicit $rsp, implicit $ssp, implicit-def $rax # CHECK-NEXT: $rcx = frame-destroy POP64r implicit-def $rsp, implicit $rsp -# CHECK-NEXT: RETQ +# CHECK-NEXT: RET64 # name: test_objc_unsafeClaimAutoreleasedReturnValue alignment: 16 @@ -99,7 +99,7 @@ body: | # CHECK-NEXT: $rdi = MOV64rr $rax # CHECK-NEXT: CALL64pcrel32 @objc_retainAutoreleasedReturnValue, csr_64, implicit $rsp, implicit $ssp, implicit-def dead $rax # CHECK-NEXT: $rax = frame-destroy POP64r implicit-def $rsp, implicit $rsp -# CHECK-NEXT: RETQ +# CHECK-NEXT: RET64 # name: test_objc_unsafeClaimAutoreleasedReturnValue_2_args alignment: 16 @@ -133,7 +133,7 @@ body: | # CHECK-NEXT: $rdi = MOV64rr $rax # CHECK-NEXT: CALL64pcrel32 @objc_retainAutoreleasedReturnValue, csr_64, implicit $rsp, implicit $ssp, implicit-def dead $rax # CHECK-NEXT: $rax = frame-destroy POP64r implicit-def $rsp, implicit $rsp -# CHECK-NEXT: RETQ +# CHECK-NEXT: RET64 # name: test_ret_void alignment: 16 diff --git a/llvm/test/CodeGen/X86/expand-vr64-gr64-copy.mir b/llvm/test/CodeGen/X86/expand-vr64-gr64-copy.mir index 59e767d16416f..800af1ce5432e 100644 --- a/llvm/test/CodeGen/X86/expand-vr64-gr64-copy.mir +++ b/llvm/test/CodeGen/X86/expand-vr64-gr64-copy.mir @@ -32,5 +32,5 @@ body: | MMX_MOVQ64mr $rsp, 1, $noreg, -16, $noreg, killed $mm0 $xmm0 = MOVQI2PQIrm $rsp, 1, $noreg, -16, $noreg $xmm0 = PSHUFDri killed $xmm0, -44 - RETQ $xmm0 + RET64 $xmm0 ... diff --git a/llvm/test/CodeGen/X86/extend-set-cc-uses-dbg.ll b/llvm/test/CodeGen/X86/extend-set-cc-uses-dbg.ll index 2ba7fbeffc40c..a155ad47b545c 100644 --- a/llvm/test/CodeGen/X86/extend-set-cc-uses-dbg.ll +++ b/llvm/test/CodeGen/X86/extend-set-cc-uses-dbg.ll @@ -38,7 +38,7 @@ bb7: ; preds = %bb !llvm.module.flags = !{!2, !3} !0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 7.0.0 (trunk 330770) (llvm/trunk 330769)", isOptimized: true, runtimeVersion: 0, emissionKind: LineTablesOnly) -!1 = !DIFile(filename: "/Users/vsk/src/llvm.org-master/llvm/lib/Demangle/ItaniumDemangle.cpp", directory: "/Users/vsk/src/builds/llvm.org-master-RA-stage2") +!1 = !DIFile(filename: "/Users/vsk/src/llvm.org-main/llvm/lib/Demangle/ItaniumDemangle.cpp", directory: "/Users/vsk/src/builds/llvm.org-main-RA-stage2") !2 = !{i32 2, !"Debug Info Version", i32 3} !3 = !{i32 7, !"PIC Level", i32 2} !4 = distinct !DISubprogram(name: "printLeft", scope: !1, file: !1, line: 1306, type: !5, isLocal: true, isDefinition: true, scopeLine: 1306, flags: DIFlagPrototyped, isOptimized: true, unit: !0) diff --git a/llvm/test/CodeGen/X86/fast-regalloc-live-out-debug-values.mir b/llvm/test/CodeGen/X86/fast-regalloc-live-out-debug-values.mir index 2b2192a409e5e..6b26359b9b8f1 100644 --- a/llvm/test/CodeGen/X86/fast-regalloc-live-out-debug-values.mir +++ b/llvm/test/CodeGen/X86/fast-regalloc-live-out-debug-values.mir @@ -181,7 +181,7 @@ body: | ; CHECK: $rbx = frame-destroy POP64r implicit-def $rsp, implicit $rsp, debug-location !45 ; CHECK: $rbp = frame-destroy POP64r implicit-def $rsp, implicit $rsp, debug-location !45 ; CHECK: CFI_INSTRUCTION def_cfa $rsp, 8, debug-location !45 - ; CHECK: RETQ implicit killed $eax, debug-location !45 + ; CHECK: RET64 implicit killed $eax, debug-location !45 bb.0.entry: liveins: $edi @@ -217,6 +217,6 @@ body: | %7:gr64_nosp = IMPLICIT_DEF %8:gr32 = MOV32rm %3, 4, %7, 0, $noreg, debug-location !44 :: (load (s32) from %ir.arrayidx3) $eax = COPY %8, debug-location !45 - RETQ implicit $eax, debug-location !45 + RET64 implicit $eax, debug-location !45 ... diff --git a/llvm/test/CodeGen/X86/fixup-bw-copy.mir b/llvm/test/CodeGen/X86/fixup-bw-copy.mir index d9dc5f23b03d8..7d9dee748d415 100644 --- a/llvm/test/CodeGen/X86/fixup-bw-copy.mir +++ b/llvm/test/CodeGen/X86/fixup-bw-copy.mir @@ -47,7 +47,7 @@ body: | ; CHECK: $eax = MOV32rr undef $edi, implicit $dil $al = MOV8rr killed $dil - RETQ killed $al + RET64 killed $al ... @@ -62,7 +62,7 @@ body: | ; CHECK: $eax = MOV32rr undef $edi, implicit $dil $al = MOV8rr $dil, implicit $edi - RETQ killed $al + RET64 killed $al ... @@ -77,7 +77,7 @@ body: | ; CHECK: $eax = MOV32rr undef $edi, implicit $dil, implicit-def $rax $al = MOV8rr $dil, implicit-def $rax - RETQ killed $al + RET64 killed $al ... @@ -92,7 +92,7 @@ body: | ; CHECK: $eax = MOV32rr undef $edi, implicit $dil $al = MOV8rr $dil, implicit-def $eax - RETQ killed $al + RET64 killed $al ... @@ -107,7 +107,7 @@ body: | ; CHECK: $eax = MOV32rr undef $edi, implicit $dil $al = MOV8rr $dil, implicit-def $ax - RETQ killed $al + RET64 killed $al ... @@ -122,7 +122,7 @@ body: | ; CHECK: $eax = MOV32rr undef $edi, implicit $di $ax = MOV16rr $di, implicit-def $eax - RETQ killed $ax + RET64 killed $ax ... @@ -137,6 +137,6 @@ body: | ; CHECK: $eax = MOV32rr undef $edi, implicit $di, implicit-def $rax $ax = MOV16rr $di, implicit-def $rax - RETQ killed $ax + RET64 killed $ax ... diff --git a/llvm/test/CodeGen/X86/fixup-bw-inst.mir b/llvm/test/CodeGen/X86/fixup-bw-inst.mir index 83cd81ca7151c..4e997c15152f0 100644 --- a/llvm/test/CodeGen/X86/fixup-bw-inst.mir +++ b/llvm/test/CodeGen/X86/fixup-bw-inst.mir @@ -55,7 +55,7 @@ body: | $ax = MOV16rm killed $rax, 1, $noreg, 0, $noreg ; CHECK: $eax = MOVZX32rm16 killed $rax - RETQ $ax + RET64 $ax ... --- @@ -76,7 +76,7 @@ body: | bb.1: liveins: $rcx - RETQ $cl + RET64 $cl ... --- @@ -105,12 +105,12 @@ body: | $ax = MOV16rm killed $rdi, 1, $noreg, 0, $noreg, implicit-def $eax :: (load (s16) from %ir.p) ; CHECK: $eax = MOVZX32rm16 killed $rdi, 1, $noreg, 0, $noreg, implicit-def $eax :: (load (s16) from %ir.p) $ax = KILL $ax, implicit killed $eax - RETQ $ax + RET64 $ax bb.1: $eax = XOR32rr undef $eax, undef $eax, implicit-def dead $eflags $ax = KILL $ax, implicit killed $eax - RETQ $ax + RET64 $ax ... --- @@ -130,7 +130,7 @@ body: | ; CHECK: $r9b = MOV8rr undef $r10b, implicit-def $r9d, implicit killed $r9d, implicit-def $eflags $ax = OR16rr undef $ax, $r9w, implicit-def $eflags - RETQ $ax + RET64 $ax ... --- @@ -147,6 +147,6 @@ body: | $cl = MOV8rr $bl, implicit-def $cx, implicit killed $ch, implicit-def $eflags ; CHECK: $cl = MOV8rr $bl, implicit-def $cx, implicit killed $ch, implicit-def $eflags - RETQ $cx + RET64 $cx ... diff --git a/llvm/test/CodeGen/X86/fold-sext-trunc.ll b/llvm/test/CodeGen/X86/fold-sext-trunc.ll index 0f1745e6e0a82..8ee2e113b6494 100644 --- a/llvm/test/CodeGen/X86/fold-sext-trunc.ll +++ b/llvm/test/CodeGen/X86/fold-sext-trunc.ll @@ -35,7 +35,7 @@ declare void @llvm.dbg.value(metadata, metadata, metadata) !llvm.debugify = !{!3, !4} !0 = distinct !DICompileUnit(language: DW_LANG_C, file: !1, producer: "debugify", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2) -!1 = !DIFile(filename: "/Users/vsk/src/llvm.org-master/llvm/test/CodeGen/X86/fold-sext-trunc.ll", directory: "/") +!1 = !DIFile(filename: "/Users/vsk/src/llvm.org-main/llvm/test/CodeGen/X86/fold-sext-trunc.ll", directory: "/") !2 = !{} !3 = !{i32 8} !4 = !{i32 6} diff --git a/llvm/test/CodeGen/X86/fold-zext-trunc.ll b/llvm/test/CodeGen/X86/fold-zext-trunc.ll index 064f9bad266d2..cbbfcaebacdf0 100644 --- a/llvm/test/CodeGen/X86/fold-zext-trunc.ll +++ b/llvm/test/CodeGen/X86/fold-zext-trunc.ll @@ -35,7 +35,7 @@ declare void @llvm.dbg.value(metadata, metadata, metadata) !llvm.debugify = !{!3, !4} !0 = distinct !DICompileUnit(language: DW_LANG_C, file: !1, producer: "debugify", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2) -!1 = !DIFile(filename: "/Users/vsk/src/llvm.org-master/llvm/test/CodeGen/X86/fold-zext-trunc.ll", directory: "/") +!1 = !DIFile(filename: "/Users/vsk/src/llvm.org-main/llvm/test/CodeGen/X86/fold-zext-trunc.ll", directory: "/") !2 = !{} !3 = !{i32 6} !4 = !{i32 4} diff --git a/llvm/test/CodeGen/X86/fshl.ll b/llvm/test/CodeGen/X86/fshl.ll index bc3236343f530..26e5c04eb6c19 100644 --- a/llvm/test/CodeGen/X86/fshl.ll +++ b/llvm/test/CodeGen/X86/fshl.ll @@ -179,102 +179,62 @@ define i32 @var_shift_i32_pgso(i32 %x, i32 %y, i32 %z) nounwind !prof !14 { define i64 @var_shift_i64(i64 %x, i64 %y, i64 %z) nounwind { ; X86-FAST-LABEL: var_shift_i64: ; X86-FAST: # %bb.0: -; X86-FAST-NEXT: pushl %ebx ; X86-FAST-NEXT: pushl %edi ; X86-FAST-NEXT: pushl %esi -; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-FAST-NEXT: movb {{[0-9]+}}(%esp), %ch -; X86-FAST-NEXT: movb %ch, %cl -; X86-FAST-NEXT: notb %cl -; X86-FAST-NEXT: shrdl $1, %edi, %esi -; X86-FAST-NEXT: shrl %edi -; X86-FAST-NEXT: shrdl %cl, %edi, %esi -; X86-FAST-NEXT: shrl %cl, %edi +; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-FAST-NEXT: testb $32, %cl -; X86-FAST-NEXT: je .LBB5_2 -; X86-FAST-NEXT: # %bb.1: -; X86-FAST-NEXT: movl %edi, %esi -; X86-FAST-NEXT: xorl %edi, %edi -; X86-FAST-NEXT: .LBB5_2: -; X86-FAST-NEXT: movl %ebx, %eax -; X86-FAST-NEXT: movb %ch, %cl -; X86-FAST-NEXT: shll %cl, %eax -; X86-FAST-NEXT: shldl %cl, %ebx, %edx -; X86-FAST-NEXT: testb $32, %ch -; X86-FAST-NEXT: je .LBB5_4 -; X86-FAST-NEXT: # %bb.3: -; X86-FAST-NEXT: movl %eax, %edx -; X86-FAST-NEXT: xorl %eax, %eax -; X86-FAST-NEXT: .LBB5_4: -; X86-FAST-NEXT: orl %edi, %edx -; X86-FAST-NEXT: orl %esi, %eax +; X86-FAST-NEXT: jne .LBB5_1 +; X86-FAST-NEXT: # %bb.2: +; X86-FAST-NEXT: movl %edx, %edi +; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-FAST-NEXT: jmp .LBB5_3 +; X86-FAST-NEXT: .LBB5_1: +; X86-FAST-NEXT: movl %esi, %edi +; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-FAST-NEXT: .LBB5_3: +; X86-FAST-NEXT: movl %edi, %eax +; X86-FAST-NEXT: shldl %cl, %esi, %eax +; X86-FAST-NEXT: # kill: def $cl killed $cl killed $ecx +; X86-FAST-NEXT: shldl %cl, %edi, %edx ; X86-FAST-NEXT: popl %esi ; X86-FAST-NEXT: popl %edi -; X86-FAST-NEXT: popl %ebx ; X86-FAST-NEXT: retl ; ; X86-SLOW-LABEL: var_shift_i64: ; X86-SLOW: # %bb.0: -; X86-SLOW-NEXT: pushl %ebp ; X86-SLOW-NEXT: pushl %ebx ; X86-SLOW-NEXT: pushl %edi ; X86-SLOW-NEXT: pushl %esi +; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X86-SLOW-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-SLOW-NEXT: shrl %eax -; X86-SLOW-NEXT: movl %esi, %edi -; X86-SLOW-NEXT: shll $31, %edi -; X86-SLOW-NEXT: orl %eax, %edi -; X86-SLOW-NEXT: movl %ecx, %eax -; X86-SLOW-NEXT: movb %cl, %ch -; X86-SLOW-NEXT: notb %ch -; X86-SLOW-NEXT: movb %ch, %cl -; X86-SLOW-NEXT: shrl %cl, %edi -; X86-SLOW-NEXT: shrl %esi -; X86-SLOW-NEXT: leal (%esi,%esi), %ebp -; X86-SLOW-NEXT: movb %al, %cl -; X86-SLOW-NEXT: shll %cl, %ebp -; X86-SLOW-NEXT: shll %cl, %ebx -; X86-SLOW-NEXT: movl %edx, %eax -; X86-SLOW-NEXT: shrl %eax -; X86-SLOW-NEXT: movb %ch, %cl -; X86-SLOW-NEXT: shrl %cl, %eax -; X86-SLOW-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-SLOW-NEXT: shll %cl, %edx -; X86-SLOW-NEXT: testb $32, {{[0-9]+}}(%esp) +; X86-SLOW-NEXT: testb $32, %bl ; X86-SLOW-NEXT: jne .LBB5_1 ; X86-SLOW-NEXT: # %bb.2: -; X86-SLOW-NEXT: orl %eax, %ebx +; X86-SLOW-NEXT: movl %edx, %esi +; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-SLOW-NEXT: jmp .LBB5_3 ; X86-SLOW-NEXT: .LBB5_1: -; X86-SLOW-NEXT: movl %edx, %ebx -; X86-SLOW-NEXT: xorl %edx, %edx +; X86-SLOW-NEXT: movl %eax, %esi +; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-SLOW-NEXT: .LBB5_3: -; X86-SLOW-NEXT: movb %ch, %cl +; X86-SLOW-NEXT: movl %esi, %edi +; X86-SLOW-NEXT: movl %ebx, %ecx +; X86-SLOW-NEXT: shll %cl, %edi +; X86-SLOW-NEXT: shrl %eax +; X86-SLOW-NEXT: notb %cl +; X86-SLOW-NEXT: shrl %cl, %eax +; X86-SLOW-NEXT: orl %edi, %eax +; X86-SLOW-NEXT: shrl %esi ; X86-SLOW-NEXT: shrl %cl, %esi -; X86-SLOW-NEXT: testb $32, %ch -; X86-SLOW-NEXT: jne .LBB5_4 -; X86-SLOW-NEXT: # %bb.5: -; X86-SLOW-NEXT: orl %edi, %ebp -; X86-SLOW-NEXT: jmp .LBB5_6 -; X86-SLOW-NEXT: .LBB5_4: -; X86-SLOW-NEXT: movl %esi, %ebp -; X86-SLOW-NEXT: xorl %esi, %esi -; X86-SLOW-NEXT: .LBB5_6: -; X86-SLOW-NEXT: orl %ebp, %edx -; X86-SLOW-NEXT: orl %esi, %ebx -; X86-SLOW-NEXT: movl %edx, %eax -; X86-SLOW-NEXT: movl %ebx, %edx +; X86-SLOW-NEXT: movl %ebx, %ecx +; X86-SLOW-NEXT: shll %cl, %edx +; X86-SLOW-NEXT: orl %esi, %edx ; X86-SLOW-NEXT: popl %esi ; X86-SLOW-NEXT: popl %edi ; X86-SLOW-NEXT: popl %ebx -; X86-SLOW-NEXT: popl %ebp ; X86-SLOW-NEXT: retl ; ; X64-FAST-LABEL: var_shift_i64: @@ -307,226 +267,50 @@ define i128 @var_shift_i128(i128 %x, i128 %y, i128 %z) nounwind { ; X86-FAST-NEXT: pushl %ebx ; X86-FAST-NEXT: pushl %edi ; X86-FAST-NEXT: pushl %esi -; X86-FAST-NEXT: subl $72, %esp -; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-FAST-NEXT: movl %edx, %edi -; X86-FAST-NEXT: shldl $31, %eax, %edi -; X86-FAST-NEXT: movl %ebx, %eax -; X86-FAST-NEXT: notl %ebx -; X86-FAST-NEXT: andl $127, %ebx -; X86-FAST-NEXT: movb $64, %cl -; X86-FAST-NEXT: subb %bl, %cl -; X86-FAST-NEXT: shrl %edx -; X86-FAST-NEXT: movl %edx, %ebp -; X86-FAST-NEXT: shldl %cl, %edi, %edx -; X86-FAST-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-FAST-NEXT: movl %edi, %edx -; X86-FAST-NEXT: shll %cl, %edx -; X86-FAST-NEXT: testb $32, %cl ; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %edi ; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-FAST-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-FAST-NEXT: testb $64, %cl ; X86-FAST-NEXT: jne .LBB6_1 ; X86-FAST-NEXT: # %bb.2: -; X86-FAST-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-FAST-NEXT: jmp .LBB6_3 -; X86-FAST-NEXT: .LBB6_1: -; X86-FAST-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-FAST-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X86-FAST-NEXT: .LBB6_3: -; X86-FAST-NEXT: andl $127, %eax -; X86-FAST-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-FAST-NEXT: movb %al, %ch -; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-FAST-NEXT: movb %ch, %cl -; X86-FAST-NEXT: shldl %cl, %esi, %eax -; X86-FAST-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-FAST-NEXT: movb %bl, %cl -; X86-FAST-NEXT: addb $-64, %cl ; X86-FAST-NEXT: movl %edi, %eax -; X86-FAST-NEXT: movl %ebp, %edx -; X86-FAST-NEXT: shrdl %cl, %ebp, %eax -; X86-FAST-NEXT: shrl %cl, %ebp +; X86-FAST-NEXT: movl %esi, %edi +; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-FAST-NEXT: movl %ebx, %ebp +; X86-FAST-NEXT: movl %edx, %ebx +; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-FAST-NEXT: testb $32, %cl -; X86-FAST-NEXT: jne .LBB6_4 -; X86-FAST-NEXT: # %bb.5: -; X86-FAST-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-FAST-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-FAST-NEXT: jmp .LBB6_6 +; X86-FAST-NEXT: je .LBB6_5 ; X86-FAST-NEXT: .LBB6_4: -; X86-FAST-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-FAST-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X86-FAST-NEXT: .LBB6_6: +; X86-FAST-NEXT: movl %edx, %esi +; X86-FAST-NEXT: movl %edi, %edx +; X86-FAST-NEXT: movl %ebx, %edi +; X86-FAST-NEXT: movl %eax, %ebx +; X86-FAST-NEXT: jmp .LBB6_6 +; X86-FAST-NEXT: .LBB6_1: ; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %ebp -; X86-FAST-NEXT: movb %ch, %cl -; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-FAST-NEXT: shldl %cl, %eax, %ebp -; X86-FAST-NEXT: shll %cl, %eax -; X86-FAST-NEXT: shll %cl, %esi -; X86-FAST-NEXT: testb $32, %ch -; X86-FAST-NEXT: jne .LBB6_7 -; X86-FAST-NEXT: # %bb.8: -; X86-FAST-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-FAST-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-FAST-NEXT: jmp .LBB6_9 -; X86-FAST-NEXT: .LBB6_7: -; X86-FAST-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-FAST-NEXT: movl %eax, %ebp -; X86-FAST-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X86-FAST-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X86-FAST-NEXT: .LBB6_9: -; X86-FAST-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-FAST-NEXT: cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload -; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-FAST-NEXT: jb .LBB6_11 -; X86-FAST-NEXT: # %bb.10: -; X86-FAST-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X86-FAST-NEXT: .LBB6_11: -; X86-FAST-NEXT: movb %bl, %cl -; X86-FAST-NEXT: shrdl %cl, %edx, %edi -; X86-FAST-NEXT: shrl %cl, %edx -; X86-FAST-NEXT: shldl $31, %eax, %esi -; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %ebp -; X86-FAST-NEXT: shrdl $1, %ebp, %eax -; X86-FAST-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-FAST-NEXT: shrdl %cl, %esi, %eax -; X86-FAST-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-FAST-NEXT: movl %esi, %eax -; X86-FAST-NEXT: shrl %cl, %eax -; X86-FAST-NEXT: testb $32, %bl -; X86-FAST-NEXT: je .LBB6_13 -; X86-FAST-NEXT: # %bb.12: -; X86-FAST-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-FAST-NEXT: movl %edx, %edi -; X86-FAST-NEXT: xorl %eax, %eax -; X86-FAST-NEXT: xorl %edx, %edx -; X86-FAST-NEXT: .LBB6_13: -; X86-FAST-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X86-FAST-NEXT: cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload -; X86-FAST-NEXT: jb .LBB6_15 -; X86-FAST-NEXT: # %bb.14: -; X86-FAST-NEXT: xorl %ebp, %ebp -; X86-FAST-NEXT: .LBB6_15: -; X86-FAST-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-FAST-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-FAST-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-FAST-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-FAST-NEXT: movb $64, %cl -; X86-FAST-NEXT: subb %ch, %cl -; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %ebp -; X86-FAST-NEXT: shrl %cl, %ebp -; X86-FAST-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X86-FAST-NEXT: testb $32, %cl -; X86-FAST-NEXT: movl $0, %edx -; X86-FAST-NEXT: jne .LBB6_17 -; X86-FAST-NEXT: # %bb.16: -; X86-FAST-NEXT: movl %ebp, %edx -; X86-FAST-NEXT: .LBB6_17: -; X86-FAST-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-FAST-NEXT: addb $-64, %ch -; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-FAST-NEXT: movl %edi, %esi -; X86-FAST-NEXT: movb %ch, %cl -; X86-FAST-NEXT: shll %cl, %esi -; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-FAST-NEXT: shldl %cl, %edi, %edx -; X86-FAST-NEXT: testb $32, %ch -; X86-FAST-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-FAST-NEXT: jne .LBB6_19 -; X86-FAST-NEXT: # %bb.18: -; X86-FAST-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-FAST-NEXT: .LBB6_19: -; X86-FAST-NEXT: cmpl $64, %ebx -; X86-FAST-NEXT: jb .LBB6_21 -; X86-FAST-NEXT: # %bb.20: -; X86-FAST-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X86-FAST-NEXT: .LBB6_21: -; X86-FAST-NEXT: cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload -; X86-FAST-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X86-FAST-NEXT: jae .LBB6_23 -; X86-FAST-NEXT: # %bb.22: -; X86-FAST-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X86-FAST-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X86-FAST-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-FAST-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X86-FAST-NEXT: .LBB6_23: -; X86-FAST-NEXT: testb $32, %ch -; X86-FAST-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X86-FAST-NEXT: jne .LBB6_25 -; X86-FAST-NEXT: # %bb.24: -; X86-FAST-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-FAST-NEXT: .LBB6_25: -; X86-FAST-NEXT: cmpl $64, %ebx -; X86-FAST-NEXT: jb .LBB6_27 -; X86-FAST-NEXT: # %bb.26: -; X86-FAST-NEXT: xorl %edx, %edx -; X86-FAST-NEXT: .LBB6_27: -; X86-FAST-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload -; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-FAST-NEXT: shrdl %cl, %esi, %edi ; X86-FAST-NEXT: testb $32, %cl -; X86-FAST-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-FAST-NEXT: jne .LBB6_29 -; X86-FAST-NEXT: # %bb.28: -; X86-FAST-NEXT: movl %edi, %ebp -; X86-FAST-NEXT: .LBB6_29: -; X86-FAST-NEXT: cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload -; X86-FAST-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X86-FAST-NEXT: jae .LBB6_31 -; X86-FAST-NEXT: # %bb.30: -; X86-FAST-NEXT: orl %ebp, %esi -; X86-FAST-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-FAST-NEXT: .LBB6_31: -; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-FAST-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X86-FAST-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X86-FAST-NEXT: cmpl $64, %ebx -; X86-FAST-NEXT: jae .LBB6_33 -; X86-FAST-NEXT: # %bb.32: -; X86-FAST-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X86-FAST-NEXT: jne .LBB6_4 +; X86-FAST-NEXT: .LBB6_5: ; X86-FAST-NEXT: movl %eax, %ebp -; X86-FAST-NEXT: .LBB6_33: -; X86-FAST-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-FAST-NEXT: cmpl $64, %ebx -; X86-FAST-NEXT: jae .LBB6_35 -; X86-FAST-NEXT: # %bb.34: -; X86-FAST-NEXT: movl %edx, %ecx -; X86-FAST-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X86-FAST-NEXT: orl %eax, %edx -; X86-FAST-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-FAST-NEXT: movl %ecx, %edx -; X86-FAST-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-FAST-NEXT: .LBB6_35: -; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-FAST-NEXT: testl %ebx, %ebx -; X86-FAST-NEXT: je .LBB6_37 -; X86-FAST-NEXT: # %bb.36: -; X86-FAST-NEXT: movl %ebp, %ecx -; X86-FAST-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X86-FAST-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-FAST-NEXT: .LBB6_37: -; X86-FAST-NEXT: orl %ecx, %edi -; X86-FAST-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X86-FAST-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X86-FAST-NEXT: cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload -; X86-FAST-NEXT: je .LBB6_39 -; X86-FAST-NEXT: # %bb.38: -; X86-FAST-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-FAST-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-FAST-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X86-FAST-NEXT: .LBB6_39: -; X86-FAST-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-FAST-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X86-FAST-NEXT: orl %edx, %esi -; X86-FAST-NEXT: movl %ecx, 12(%eax) -; X86-FAST-NEXT: movl %esi, 8(%eax) -; X86-FAST-NEXT: movl %edi, 4(%eax) -; X86-FAST-NEXT: movl %ebx, (%eax) -; X86-FAST-NEXT: addl $72, %esp +; X86-FAST-NEXT: .LBB6_6: +; X86-FAST-NEXT: movl %ebx, %eax +; X86-FAST-NEXT: shldl %cl, %ebp, %eax +; X86-FAST-NEXT: movl %edi, %ebp +; X86-FAST-NEXT: shldl %cl, %ebx, %ebp +; X86-FAST-NEXT: movl %edx, %ebx +; X86-FAST-NEXT: shldl %cl, %edi, %ebx +; X86-FAST-NEXT: # kill: def $cl killed $cl killed $ecx +; X86-FAST-NEXT: shldl %cl, %edx, %esi +; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-FAST-NEXT: movl %esi, 12(%ecx) +; X86-FAST-NEXT: movl %ebx, 8(%ecx) +; X86-FAST-NEXT: movl %ebp, 4(%ecx) +; X86-FAST-NEXT: movl %eax, (%ecx) +; X86-FAST-NEXT: movl %ecx, %eax ; X86-FAST-NEXT: popl %esi ; X86-FAST-NEXT: popl %edi ; X86-FAST-NEXT: popl %ebx @@ -539,289 +323,76 @@ define i128 @var_shift_i128(i128 %x, i128 %y, i128 %z) nounwind { ; X86-SLOW-NEXT: pushl %ebx ; X86-SLOW-NEXT: pushl %edi ; X86-SLOW-NEXT: pushl %esi -; X86-SLOW-NEXT: subl $76, %esp -; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X86-SLOW-NEXT: pushl %eax +; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %edi ; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-SLOW-NEXT: andl $127, %eax -; X86-SLOW-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-SLOW-NEXT: # kill: def $al killed $al killed $eax -; X86-SLOW-NEXT: movl %eax, %ecx -; X86-SLOW-NEXT: shll %cl, %edx -; X86-SLOW-NEXT: movl %ebx, %esi -; X86-SLOW-NEXT: shrl %esi -; X86-SLOW-NEXT: movb %al, %ah -; X86-SLOW-NEXT: notb %ah -; X86-SLOW-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-SLOW-NEXT: movb %ah, %cl -; X86-SLOW-NEXT: shrl %cl, %esi -; X86-SLOW-NEXT: movl %esi, (%esp) # 4-byte Spill ; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %ebp -; X86-SLOW-NEXT: movl %eax, %ecx -; X86-SLOW-NEXT: shll %cl, %ebp -; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-SLOW-NEXT: shrl %edi -; X86-SLOW-NEXT: movb %ah, %cl -; X86-SLOW-NEXT: shrl %cl, %edi -; X86-SLOW-NEXT: movl %ebx, %esi -; X86-SLOW-NEXT: movl %eax, %ecx -; X86-SLOW-NEXT: shll %cl, %esi -; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X86-SLOW-NEXT: shll %cl, %ebx -; X86-SLOW-NEXT: movb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X86-SLOW-NEXT: testb $32, %al +; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-SLOW-NEXT: testb $64, %al ; X86-SLOW-NEXT: jne .LBB6_1 ; X86-SLOW-NEXT: # %bb.2: -; X86-SLOW-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-SLOW-NEXT: orl (%esp), %edx # 4-byte Folded Reload -; X86-SLOW-NEXT: movl %edx, (%esp) # 4-byte Spill -; X86-SLOW-NEXT: orl %edi, %ebp +; X86-SLOW-NEXT: movl %ebp, %ecx +; X86-SLOW-NEXT: movl %edi, %ebp +; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-SLOW-NEXT: movl %edx, %ebx +; X86-SLOW-NEXT: movl %esi, %edx +; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-SLOW-NEXT: jmp .LBB6_3 ; X86-SLOW-NEXT: .LBB6_1: -; X86-SLOW-NEXT: movl %ebx, %ebp -; X86-SLOW-NEXT: movl %esi, (%esp) # 4-byte Spill -; X86-SLOW-NEXT: xorl %ebx, %ebx -; X86-SLOW-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-SLOW-NEXT: .LBB6_3: -; X86-SLOW-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-SLOW-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-SLOW-NEXT: cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload -; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X86-SLOW-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %dl # 1-byte Reload -; X86-SLOW-NEXT: jb .LBB6_5 -; X86-SLOW-NEXT: # %bb.4: -; X86-SLOW-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X86-SLOW-NEXT: .LBB6_5: -; X86-SLOW-NEXT: shrl %edi -; X86-SLOW-NEXT: notl %ebx -; X86-SLOW-NEXT: andl $127, %ebx -; X86-SLOW-NEXT: movl %edi, %ebp -; X86-SLOW-NEXT: movl %ebx, %ecx -; X86-SLOW-NEXT: shrl %cl, %ebp -; X86-SLOW-NEXT: movl %esi, %ecx -; X86-SLOW-NEXT: shrl %ecx -; X86-SLOW-NEXT: movl %eax, %esi -; X86-SLOW-NEXT: shll $31, %esi -; X86-SLOW-NEXT: orl %ecx, %esi -; X86-SLOW-NEXT: movl %esi, %ecx -; X86-SLOW-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-SLOW-NEXT: movl %ebx, %ecx -; X86-SLOW-NEXT: shrl %cl, %esi -; X86-SLOW-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-SLOW-NEXT: testb $32, %bl -; X86-SLOW-NEXT: movl $0, %esi -; X86-SLOW-NEXT: movl $0, %ecx -; X86-SLOW-NEXT: jne .LBB6_7 -; X86-SLOW-NEXT: # %bb.6: -; X86-SLOW-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X86-SLOW-NEXT: movl %ebp, %ecx -; X86-SLOW-NEXT: .LBB6_7: -; X86-SLOW-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-SLOW-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-SLOW-NEXT: shrl %eax -; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-SLOW-NEXT: shll $31, %esi -; X86-SLOW-NEXT: orl %eax, %esi -; X86-SLOW-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-SLOW-NEXT: movl %ebx, %ecx -; X86-SLOW-NEXT: shrl %cl, %esi -; X86-SLOW-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-SLOW-NEXT: addl %edi, %edi -; X86-SLOW-NEXT: notb %cl -; X86-SLOW-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-SLOW-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X86-SLOW-NEXT: shll %cl, %edi -; X86-SLOW-NEXT: testb $32, %bl -; X86-SLOW-NEXT: jne .LBB6_9 -; X86-SLOW-NEXT: # %bb.8: -; X86-SLOW-NEXT: orl %esi, %edi -; X86-SLOW-NEXT: movl %edi, %ebp -; X86-SLOW-NEXT: .LBB6_9: -; X86-SLOW-NEXT: movb %bl, %dh -; X86-SLOW-NEXT: addb $-64, %dh -; X86-SLOW-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X86-SLOW-NEXT: movb %dh, %cl -; X86-SLOW-NEXT: shrl %cl, %esi -; X86-SLOW-NEXT: testb $32, %dh -; X86-SLOW-NEXT: movl $0, %ecx -; X86-SLOW-NEXT: movl (%esp), %eax # 4-byte Reload -; X86-SLOW-NEXT: jne .LBB6_11 -; X86-SLOW-NEXT: # %bb.10: -; X86-SLOW-NEXT: movl %esi, %ecx -; X86-SLOW-NEXT: .LBB6_11: -; X86-SLOW-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-SLOW-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-SLOW-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-SLOW-NEXT: cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload -; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %ebp -; X86-SLOW-NEXT: jb .LBB6_13 -; X86-SLOW-NEXT: # %bb.12: -; X86-SLOW-NEXT: xorl %eax, %eax -; X86-SLOW-NEXT: .LBB6_13: -; X86-SLOW-NEXT: movl %eax, (%esp) # 4-byte Spill -; X86-SLOW-NEXT: movb $64, %ch -; X86-SLOW-NEXT: movb $64, %ah -; X86-SLOW-NEXT: subb %dl, %ah -; X86-SLOW-NEXT: movb %ah, %cl -; X86-SLOW-NEXT: shrl %cl, %esi -; X86-SLOW-NEXT: notb %cl -; X86-SLOW-NEXT: leal (%ebp,%ebp), %edi +; X86-SLOW-NEXT: testb $32, %al +; X86-SLOW-NEXT: jne .LBB6_4 +; X86-SLOW-NEXT: # %bb.5: +; X86-SLOW-NEXT: movl %ecx, %ebx +; X86-SLOW-NEXT: movl %edi, (%esp) # 4-byte Spill +; X86-SLOW-NEXT: jmp .LBB6_6 +; X86-SLOW-NEXT: .LBB6_4: +; X86-SLOW-NEXT: movl %esi, (%esp) # 4-byte Spill +; X86-SLOW-NEXT: movl %ebp, %esi +; X86-SLOW-NEXT: movl %edx, %ebp +; X86-SLOW-NEXT: movl %ecx, %edx +; X86-SLOW-NEXT: .LBB6_6: +; X86-SLOW-NEXT: movl %edx, %edi +; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-SLOW-NEXT: movl %eax, %ecx ; X86-SLOW-NEXT: shll %cl, %edi -; X86-SLOW-NEXT: movb %ah, %cl -; X86-SLOW-NEXT: shrl %cl, %ebp -; X86-SLOW-NEXT: testb $32, %ah -; X86-SLOW-NEXT: jne .LBB6_14 -; X86-SLOW-NEXT: # %bb.15: -; X86-SLOW-NEXT: orl %esi, %edi -; X86-SLOW-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-SLOW-NEXT: movl %edi, %ebp -; X86-SLOW-NEXT: jmp .LBB6_16 -; X86-SLOW-NEXT: .LBB6_14: -; X86-SLOW-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X86-SLOW-NEXT: .LBB6_16: -; X86-SLOW-NEXT: addb $-64, %dl -; X86-SLOW-NEXT: movb %dl, %cl -; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-SLOW-NEXT: shrl %ebx +; X86-SLOW-NEXT: movb %al, %ch +; X86-SLOW-NEXT: notb %ch +; X86-SLOW-NEXT: movb %ch, %cl +; X86-SLOW-NEXT: shrl %cl, %ebx +; X86-SLOW-NEXT: orl %edi, %ebx +; X86-SLOW-NEXT: movl %ebp, %edi +; X86-SLOW-NEXT: movb %al, %cl ; X86-SLOW-NEXT: shll %cl, %edi -; X86-SLOW-NEXT: notb %cl -; X86-SLOW-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-SLOW-NEXT: shrl %cl, %eax -; X86-SLOW-NEXT: movb %dl, %cl -; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-SLOW-NEXT: shll %cl, %esi -; X86-SLOW-NEXT: testb $32, %dl -; X86-SLOW-NEXT: jne .LBB6_17 -; X86-SLOW-NEXT: # %bb.18: -; X86-SLOW-NEXT: orl %eax, %edi -; X86-SLOW-NEXT: cmpl $64, %ebx -; X86-SLOW-NEXT: jae .LBB6_20 -; X86-SLOW-NEXT: jmp .LBB6_21 -; X86-SLOW-NEXT: .LBB6_17: +; X86-SLOW-NEXT: shrl %edx +; X86-SLOW-NEXT: movb %ch, %cl +; X86-SLOW-NEXT: shrl %cl, %edx +; X86-SLOW-NEXT: orl %edi, %edx ; X86-SLOW-NEXT: movl %esi, %edi -; X86-SLOW-NEXT: xorl %esi, %esi -; X86-SLOW-NEXT: cmpl $64, %ebx -; X86-SLOW-NEXT: jb .LBB6_21 -; X86-SLOW-NEXT: .LBB6_20: -; X86-SLOW-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X86-SLOW-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X86-SLOW-NEXT: .LBB6_21: -; X86-SLOW-NEXT: cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload -; X86-SLOW-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-SLOW-NEXT: jae .LBB6_23 -; X86-SLOW-NEXT: # %bb.22: -; X86-SLOW-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X86-SLOW-NEXT: orl %ebp, %esi -; X86-SLOW-NEXT: .LBB6_23: -; X86-SLOW-NEXT: movl (%esp), %ebp # 4-byte Reload -; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-SLOW-NEXT: cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload -; X86-SLOW-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-SLOW-NEXT: jae .LBB6_25 -; X86-SLOW-NEXT: # %bb.24: -; X86-SLOW-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X86-SLOW-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X86-SLOW-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-SLOW-NEXT: .LBB6_25: -; X86-SLOW-NEXT: shrl %edi -; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-SLOW-NEXT: shll $31, %esi -; X86-SLOW-NEXT: orl %edi, %esi -; X86-SLOW-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-SLOW-NEXT: movb %bl, %cl -; X86-SLOW-NEXT: shrl %cl, %esi -; X86-SLOW-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X86-SLOW-NEXT: addl %edi, %edi -; X86-SLOW-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload +; X86-SLOW-NEXT: movb %al, %cl ; X86-SLOW-NEXT: shll %cl, %edi -; X86-SLOW-NEXT: testb $32, %bl -; X86-SLOW-NEXT: jne .LBB6_27 -; X86-SLOW-NEXT: # %bb.26: -; X86-SLOW-NEXT: orl %esi, %edi -; X86-SLOW-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-SLOW-NEXT: .LBB6_27: -; X86-SLOW-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X86-SLOW-NEXT: movl %edi, %eax -; X86-SLOW-NEXT: movb %dh, %cl -; X86-SLOW-NEXT: shrl %cl, %eax -; X86-SLOW-NEXT: notb %cl -; X86-SLOW-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X86-SLOW-NEXT: shll %cl, %esi -; X86-SLOW-NEXT: testb $32, %dh -; X86-SLOW-NEXT: jne .LBB6_29 -; X86-SLOW-NEXT: # %bb.28: -; X86-SLOW-NEXT: orl %eax, %esi -; X86-SLOW-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-SLOW-NEXT: .LBB6_29: -; X86-SLOW-NEXT: subb %bl, %ch -; X86-SLOW-NEXT: movl %edi, %eax +; X86-SLOW-NEXT: shrl %ebp ; X86-SLOW-NEXT: movb %ch, %cl +; X86-SLOW-NEXT: shrl %cl, %ebp +; X86-SLOW-NEXT: orl %edi, %ebp +; X86-SLOW-NEXT: movb %al, %cl +; X86-SLOW-NEXT: movl (%esp), %eax # 4-byte Reload ; X86-SLOW-NEXT: shll %cl, %eax -; X86-SLOW-NEXT: shrl %edi -; X86-SLOW-NEXT: notb %cl -; X86-SLOW-NEXT: shrl %cl, %edi +; X86-SLOW-NEXT: shrl %esi ; X86-SLOW-NEXT: movb %ch, %cl -; X86-SLOW-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X86-SLOW-NEXT: shll %cl, %edx -; X86-SLOW-NEXT: testb $32, %ch -; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-SLOW-NEXT: movl %edi, %ecx -; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-SLOW-NEXT: jne .LBB6_30 -; X86-SLOW-NEXT: # %bb.31: -; X86-SLOW-NEXT: orl %ecx, %edx -; X86-SLOW-NEXT: movl %edx, %ecx -; X86-SLOW-NEXT: cmpl $64, %ebx -; X86-SLOW-NEXT: jb .LBB6_33 -; X86-SLOW-NEXT: jmp .LBB6_34 -; X86-SLOW-NEXT: .LBB6_30: -; X86-SLOW-NEXT: movl %eax, %ecx -; X86-SLOW-NEXT: xorl %eax, %eax -; X86-SLOW-NEXT: cmpl $64, %ebx -; X86-SLOW-NEXT: jae .LBB6_34 -; X86-SLOW-NEXT: .LBB6_33: -; X86-SLOW-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X86-SLOW-NEXT: orl %eax, %edx -; X86-SLOW-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-SLOW-NEXT: .LBB6_34: -; X86-SLOW-NEXT: cmpl $64, %ebx -; X86-SLOW-NEXT: jb .LBB6_35 -; X86-SLOW-NEXT: # %bb.36: -; X86-SLOW-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-SLOW-NEXT: jmp .LBB6_37 -; X86-SLOW-NEXT: .LBB6_35: -; X86-SLOW-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-SLOW-NEXT: orl %ecx, %eax -; X86-SLOW-NEXT: movl %eax, %ecx -; X86-SLOW-NEXT: .LBB6_37: +; X86-SLOW-NEXT: shrl %cl, %esi +; X86-SLOW-NEXT: orl %eax, %esi ; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-SLOW-NEXT: testl %ebx, %ebx -; X86-SLOW-NEXT: je .LBB6_39 -; X86-SLOW-NEXT: # %bb.38: -; X86-SLOW-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X86-SLOW-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-SLOW-NEXT: movl %ecx, %ebx -; X86-SLOW-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-SLOW-NEXT: .LBB6_39: -; X86-SLOW-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X86-SLOW-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X86-SLOW-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X86-SLOW-NEXT: cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload -; X86-SLOW-NEXT: je .LBB6_41 -; X86-SLOW-NEXT: # %bb.40: -; X86-SLOW-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X86-SLOW-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X86-SLOW-NEXT: .LBB6_41: -; X86-SLOW-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X86-SLOW-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload ; X86-SLOW-NEXT: movl %esi, 12(%eax) -; X86-SLOW-NEXT: movl %edi, 8(%eax) +; X86-SLOW-NEXT: movl %ebp, 8(%eax) +; X86-SLOW-NEXT: movl %edx, 4(%eax) ; X86-SLOW-NEXT: movl %ebx, (%eax) -; X86-SLOW-NEXT: movl %ebp, 4(%eax) -; X86-SLOW-NEXT: addl $76, %esp +; X86-SLOW-NEXT: addl $4, %esp ; X86-SLOW-NEXT: popl %esi ; X86-SLOW-NEXT: popl %edi ; X86-SLOW-NEXT: popl %ebx @@ -830,65 +401,39 @@ define i128 @var_shift_i128(i128 %x, i128 %y, i128 %z) nounwind { ; ; X64-FAST-LABEL: var_shift_i128: ; X64-FAST: # %bb.0: -; X64-FAST-NEXT: movq %r8, %r9 -; X64-FAST-NEXT: movq %rcx, %r8 -; X64-FAST-NEXT: movl %r9d, %ecx -; X64-FAST-NEXT: shldq %cl, %rdi, %rsi -; X64-FAST-NEXT: shrdq $1, %r8, %rdx -; X64-FAST-NEXT: shrq %r8 -; X64-FAST-NEXT: notb %cl -; X64-FAST-NEXT: shrdq %cl, %r8, %rdx -; X64-FAST-NEXT: shrq %cl, %r8 -; X64-FAST-NEXT: xorl %eax, %eax -; X64-FAST-NEXT: testb $64, %cl -; X64-FAST-NEXT: cmovneq %r8, %rdx -; X64-FAST-NEXT: cmovneq %rax, %r8 -; X64-FAST-NEXT: movl %r9d, %ecx -; X64-FAST-NEXT: shlq %cl, %rdi -; X64-FAST-NEXT: testb $64, %r9b +; X64-FAST-NEXT: testb $64, %r8b ; X64-FAST-NEXT: cmovneq %rdi, %rsi -; X64-FAST-NEXT: cmoveq %rdi, %rax -; X64-FAST-NEXT: orq %rdx, %rax -; X64-FAST-NEXT: orq %rsi, %r8 -; X64-FAST-NEXT: movq %r8, %rdx +; X64-FAST-NEXT: cmoveq %rcx, %rdx +; X64-FAST-NEXT: cmovneq %rcx, %rdi +; X64-FAST-NEXT: movq %rdi, %rax +; X64-FAST-NEXT: movl %r8d, %ecx +; X64-FAST-NEXT: shldq %cl, %rdx, %rax +; X64-FAST-NEXT: shldq %cl, %rdi, %rsi +; X64-FAST-NEXT: movq %rsi, %rdx ; X64-FAST-NEXT: retq ; ; X64-SLOW-LABEL: var_shift_i128: ; X64-SLOW: # %bb.0: -; X64-SLOW-NEXT: movq %rcx, %r11 -; X64-SLOW-NEXT: movq %rdx, %r9 +; X64-SLOW-NEXT: testb $64, %r8b +; X64-SLOW-NEXT: cmovneq %rdi, %rsi +; X64-SLOW-NEXT: cmoveq %rcx, %rdx +; X64-SLOW-NEXT: cmovneq %rcx, %rdi +; X64-SLOW-NEXT: movq %rdi, %rax ; X64-SLOW-NEXT: movl %r8d, %ecx -; X64-SLOW-NEXT: shlq %cl, %rsi -; X64-SLOW-NEXT: movq %rdi, %rdx +; X64-SLOW-NEXT: shlq %cl, %rax ; X64-SLOW-NEXT: shrq %rdx -; X64-SLOW-NEXT: movl %r8d, %r10d -; X64-SLOW-NEXT: notb %r10b -; X64-SLOW-NEXT: movl %r10d, %ecx +; X64-SLOW-NEXT: movl %r8d, %r9d +; X64-SLOW-NEXT: notb %r9b +; X64-SLOW-NEXT: movl %r9d, %ecx ; X64-SLOW-NEXT: shrq %cl, %rdx -; X64-SLOW-NEXT: orq %rsi, %rdx -; X64-SLOW-NEXT: shrq %r9 -; X64-SLOW-NEXT: movq %r11, %rax -; X64-SLOW-NEXT: shlq $63, %rax -; X64-SLOW-NEXT: orq %r9, %rax -; X64-SLOW-NEXT: shrq %cl, %rax -; X64-SLOW-NEXT: shrq %r11 -; X64-SLOW-NEXT: leaq (%r11,%r11), %rsi +; X64-SLOW-NEXT: orq %rdx, %rax ; X64-SLOW-NEXT: movl %r8d, %ecx ; X64-SLOW-NEXT: shlq %cl, %rsi -; X64-SLOW-NEXT: orq %rax, %rsi -; X64-SLOW-NEXT: movl %r10d, %ecx -; X64-SLOW-NEXT: shrq %cl, %r11 -; X64-SLOW-NEXT: xorl %eax, %eax -; X64-SLOW-NEXT: testb $64, %r10b -; X64-SLOW-NEXT: cmovneq %r11, %rsi -; X64-SLOW-NEXT: cmovneq %rax, %r11 -; X64-SLOW-NEXT: movl %r8d, %ecx -; X64-SLOW-NEXT: shlq %cl, %rdi -; X64-SLOW-NEXT: testb $64, %r8b -; X64-SLOW-NEXT: cmovneq %rdi, %rdx -; X64-SLOW-NEXT: cmoveq %rdi, %rax -; X64-SLOW-NEXT: orq %rsi, %rax -; X64-SLOW-NEXT: orq %r11, %rdx +; X64-SLOW-NEXT: shrq %rdi +; X64-SLOW-NEXT: movl %r9d, %ecx +; X64-SLOW-NEXT: shrq %cl, %rdi +; X64-SLOW-NEXT: orq %rsi, %rdi +; X64-SLOW-NEXT: movq %rdi, %rdx ; X64-SLOW-NEXT: retq %tmp = tail call i128 @llvm.fshl.i128(i128 %x, i128 %y, i128 %z) ret i128 %tmp diff --git a/llvm/test/CodeGen/X86/fshr.ll b/llvm/test/CodeGen/X86/fshr.ll index ba6bf62e38bff..830dadba73730 100644 --- a/llvm/test/CodeGen/X86/fshr.ll +++ b/llvm/test/CodeGen/X86/fshr.ll @@ -176,106 +176,60 @@ define i32 @var_shift_i32_pgso(i32 %x, i32 %y, i32 %z) nounwind !prof !14 { define i64 @var_shift_i64(i64 %x, i64 %y, i64 %z) nounwind { ; X86-FAST-LABEL: var_shift_i64: ; X86-FAST: # %bb.0: -; X86-FAST-NEXT: pushl %ebp -; X86-FAST-NEXT: pushl %ebx -; X86-FAST-NEXT: pushl %edi ; X86-FAST-NEXT: pushl %esi -; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %ebp ; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-FAST-NEXT: movb {{[0-9]+}}(%esp), %bl -; X86-FAST-NEXT: movb %bl, %ch -; X86-FAST-NEXT: notb %ch -; X86-FAST-NEXT: shldl $1, %eax, %edx -; X86-FAST-NEXT: addl %eax, %eax -; X86-FAST-NEXT: movb %ch, %cl -; X86-FAST-NEXT: shldl %cl, %eax, %edx -; X86-FAST-NEXT: movl %ebp, %edi -; X86-FAST-NEXT: movb %bl, %cl -; X86-FAST-NEXT: shrl %cl, %edi -; X86-FAST-NEXT: shrdl %cl, %ebp, %esi -; X86-FAST-NEXT: testb $32, %bl -; X86-FAST-NEXT: je .LBB5_2 -; X86-FAST-NEXT: # %bb.1: -; X86-FAST-NEXT: movl %edi, %esi -; X86-FAST-NEXT: xorl %edi, %edi -; X86-FAST-NEXT: .LBB5_2: -; X86-FAST-NEXT: movb %ch, %cl -; X86-FAST-NEXT: shll %cl, %eax -; X86-FAST-NEXT: testb $32, %ch -; X86-FAST-NEXT: je .LBB5_4 -; X86-FAST-NEXT: # %bb.3: +; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-FAST-NEXT: testb $32, %cl +; X86-FAST-NEXT: je .LBB5_1 +; X86-FAST-NEXT: # %bb.2: +; X86-FAST-NEXT: movl %esi, %edx +; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-FAST-NEXT: jmp .LBB5_3 +; X86-FAST-NEXT: .LBB5_1: ; X86-FAST-NEXT: movl %eax, %edx -; X86-FAST-NEXT: xorl %eax, %eax -; X86-FAST-NEXT: .LBB5_4: -; X86-FAST-NEXT: orl %edi, %edx -; X86-FAST-NEXT: orl %esi, %eax +; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-FAST-NEXT: .LBB5_3: +; X86-FAST-NEXT: shrdl %cl, %edx, %eax +; X86-FAST-NEXT: # kill: def $cl killed $cl killed $ecx +; X86-FAST-NEXT: shrdl %cl, %esi, %edx ; X86-FAST-NEXT: popl %esi -; X86-FAST-NEXT: popl %edi -; X86-FAST-NEXT: popl %ebx -; X86-FAST-NEXT: popl %ebp ; X86-FAST-NEXT: retl ; ; X86-SLOW-LABEL: var_shift_i64: ; X86-SLOW: # %bb.0: -; X86-SLOW-NEXT: pushl %ebp ; X86-SLOW-NEXT: pushl %ebx ; X86-SLOW-NEXT: pushl %edi ; X86-SLOW-NEXT: pushl %esi -; X86-SLOW-NEXT: pushl %eax -; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %ebp -; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-SLOW-NEXT: movb {{[0-9]+}}(%esp), %bl ; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-SLOW-NEXT: movl %eax, %edi -; X86-SLOW-NEXT: andl $2147483647, %edi # imm = 0x7FFFFFFF -; X86-SLOW-NEXT: movl %ebx, %ecx -; X86-SLOW-NEXT: shrl %cl, %edi -; X86-SLOW-NEXT: movl %eax, %ecx -; X86-SLOW-NEXT: shrl $31, %ecx -; X86-SLOW-NEXT: leal (%ecx,%edx,2), %edx -; X86-SLOW-NEXT: movb %bl, %ch -; X86-SLOW-NEXT: notb %ch -; X86-SLOW-NEXT: movb %ch, %cl -; X86-SLOW-NEXT: shll %cl, %edx -; X86-SLOW-NEXT: movb %bl, %cl -; X86-SLOW-NEXT: shrl %cl, %ebp -; X86-SLOW-NEXT: movl %ebp, (%esp) # 4-byte Spill -; X86-SLOW-NEXT: leal (%esi,%esi), %ebp -; X86-SLOW-NEXT: movb %ch, %cl -; X86-SLOW-NEXT: shll %cl, %ebp -; X86-SLOW-NEXT: movb %bl, %cl -; X86-SLOW-NEXT: shrl %cl, %esi +; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %ebx ; X86-SLOW-NEXT: testb $32, %bl -; X86-SLOW-NEXT: jne .LBB5_1 +; X86-SLOW-NEXT: je .LBB5_1 ; X86-SLOW-NEXT: # %bb.2: -; X86-SLOW-NEXT: orl (%esp), %ebp # 4-byte Folded Reload +; X86-SLOW-NEXT: movl %edx, %esi +; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-SLOW-NEXT: jmp .LBB5_3 ; X86-SLOW-NEXT: .LBB5_1: -; X86-SLOW-NEXT: movl %esi, %ebp -; X86-SLOW-NEXT: xorl %esi, %esi +; X86-SLOW-NEXT: movl %eax, %esi +; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-SLOW-NEXT: .LBB5_3: -; X86-SLOW-NEXT: addl %eax, %eax +; X86-SLOW-NEXT: leal (%esi,%esi), %edi +; X86-SLOW-NEXT: movb %bl, %ch +; X86-SLOW-NEXT: notb %ch ; X86-SLOW-NEXT: movb %ch, %cl -; X86-SLOW-NEXT: shll %cl, %eax -; X86-SLOW-NEXT: testb $32, %ch -; X86-SLOW-NEXT: jne .LBB5_4 -; X86-SLOW-NEXT: # %bb.5: -; X86-SLOW-NEXT: orl %edi, %edx -; X86-SLOW-NEXT: jmp .LBB5_6 -; X86-SLOW-NEXT: .LBB5_4: -; X86-SLOW-NEXT: movl %eax, %edx -; X86-SLOW-NEXT: xorl %eax, %eax -; X86-SLOW-NEXT: .LBB5_6: +; X86-SLOW-NEXT: shll %cl, %edi +; X86-SLOW-NEXT: movb %bl, %cl +; X86-SLOW-NEXT: shrl %cl, %eax +; X86-SLOW-NEXT: orl %edi, %eax +; X86-SLOW-NEXT: shrl %cl, %esi +; X86-SLOW-NEXT: addl %edx, %edx +; X86-SLOW-NEXT: movb %ch, %cl +; X86-SLOW-NEXT: shll %cl, %edx ; X86-SLOW-NEXT: orl %esi, %edx -; X86-SLOW-NEXT: orl %ebp, %eax -; X86-SLOW-NEXT: addl $4, %esp ; X86-SLOW-NEXT: popl %esi ; X86-SLOW-NEXT: popl %edi ; X86-SLOW-NEXT: popl %ebx -; X86-SLOW-NEXT: popl %ebp ; X86-SLOW-NEXT: retl ; ; X64-FAST-LABEL: var_shift_i64: @@ -307,243 +261,48 @@ define i128 @var_shift_i128(i128 %x, i128 %y, i128 %z) nounwind { ; X86-FAST-NEXT: pushl %ebx ; X86-FAST-NEXT: pushl %edi ; X86-FAST-NEXT: pushl %esi -; X86-FAST-NEXT: subl $76, %esp -; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %ebp +; X86-FAST-NEXT: pushl %eax ; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X86-FAST-NEXT: movl %ebx, %ecx -; X86-FAST-NEXT: andl $127, %ecx -; X86-FAST-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-FAST-NEXT: movb %cl, %ch -; X86-FAST-NEXT: movb $64, %cl -; X86-FAST-NEXT: subb %ch, %cl -; X86-FAST-NEXT: shll %cl, %edi -; X86-FAST-NEXT: movb %cl, (%esp) # 1-byte Spill -; X86-FAST-NEXT: testb $32, %cl -; X86-FAST-NEXT: movl $0, %esi -; X86-FAST-NEXT: jne .LBB6_2 -; X86-FAST-NEXT: # %bb.1: -; X86-FAST-NEXT: movl %edi, %esi -; X86-FAST-NEXT: .LBB6_2: -; X86-FAST-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-FAST-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-FAST-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-FAST-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-FAST-NEXT: movl %eax, %edi -; X86-FAST-NEXT: movl %ebp, %eax -; X86-FAST-NEXT: shldl $1, %ebp, %edi -; X86-FAST-NEXT: addl %ebp, %eax -; X86-FAST-NEXT: notl %ebx -; X86-FAST-NEXT: andl $127, %ebx -; X86-FAST-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-FAST-NEXT: movb %bl, %cl -; X86-FAST-NEXT: shldl %cl, %eax, %edi -; X86-FAST-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-FAST-NEXT: shll %cl, %eax -; X86-FAST-NEXT: testb $32, %bl -; X86-FAST-NEXT: movl %eax, %esi -; X86-FAST-NEXT: jne .LBB6_4 -; X86-FAST-NEXT: # %bb.3: -; X86-FAST-NEXT: movl %edi, %esi -; X86-FAST-NEXT: .LBB6_4: -; X86-FAST-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-FAST-NEXT: movb %ch, %cl -; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-FAST-NEXT: shrdl %cl, %edi, %esi -; X86-FAST-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-FAST-NEXT: movl %edi, %esi -; X86-FAST-NEXT: shrl %cl, %esi -; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-FAST-NEXT: shrl %cl, %edi -; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %ebp +; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-FAST-NEXT: testb $64, %cl +; X86-FAST-NEXT: je .LBB6_1 +; X86-FAST-NEXT: # %bb.2: +; X86-FAST-NEXT: movl %edi, %ebp +; X86-FAST-NEXT: movl %ebx, %edi +; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X86-FAST-NEXT: movl %esi, (%esp) # 4-byte Spill +; X86-FAST-NEXT: movl %edx, %esi ; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-FAST-NEXT: shrdl %cl, %edx, %ebp -; X86-FAST-NEXT: testb $32, %ch -; X86-FAST-NEXT: jne .LBB6_5 -; X86-FAST-NEXT: # %bb.6: -; X86-FAST-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-FAST-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-FAST-NEXT: jmp .LBB6_7 -; X86-FAST-NEXT: .LBB6_5: -; X86-FAST-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-FAST-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-FAST-NEXT: xorl %edi, %edi -; X86-FAST-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X86-FAST-NEXT: .LBB6_7: -; X86-FAST-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-FAST-NEXT: testb $32, %bl -; X86-FAST-NEXT: movl $0, %esi -; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %ebp -; X86-FAST-NEXT: jne .LBB6_9 -; X86-FAST-NEXT: # %bb.8: -; X86-FAST-NEXT: movl %eax, %esi -; X86-FAST-NEXT: .LBB6_9: -; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-FAST-NEXT: cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload -; X86-FAST-NEXT: jb .LBB6_11 -; X86-FAST-NEXT: # %bb.10: -; X86-FAST-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X86-FAST-NEXT: .LBB6_11: -; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-FAST-NEXT: shrdl $31, %edi, %eax -; X86-FAST-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-FAST-NEXT: movb %bl, %cl -; X86-FAST-NEXT: shll %cl, %eax -; X86-FAST-NEXT: testb $32, %bl -; X86-FAST-NEXT: movl $0, %edi -; X86-FAST-NEXT: jne .LBB6_13 -; X86-FAST-NEXT: # %bb.12: -; X86-FAST-NEXT: movl %eax, %edi -; X86-FAST-NEXT: .LBB6_13: -; X86-FAST-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-FAST-NEXT: movb (%esp), %cl # 1-byte Reload -; X86-FAST-NEXT: shldl %cl, %ebp, %eax ; X86-FAST-NEXT: testb $32, %cl -; X86-FAST-NEXT: jne .LBB6_15 -; X86-FAST-NEXT: # %bb.14: -; X86-FAST-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-FAST-NEXT: .LBB6_15: -; X86-FAST-NEXT: movb %bl, %dh -; X86-FAST-NEXT: addb $-64, %dh -; X86-FAST-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-FAST-NEXT: movb %dh, %cl -; X86-FAST-NEXT: shll %cl, %eax -; X86-FAST-NEXT: movl %eax, (%esp) # 4-byte Spill -; X86-FAST-NEXT: testb $32, %dh -; X86-FAST-NEXT: movl $0, %eax -; X86-FAST-NEXT: jne .LBB6_17 -; X86-FAST-NEXT: # %bb.16: -; X86-FAST-NEXT: movl (%esp), %eax # 4-byte Reload -; X86-FAST-NEXT: .LBB6_17: -; X86-FAST-NEXT: cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload -; X86-FAST-NEXT: jb .LBB6_19 -; X86-FAST-NEXT: # %bb.18: -; X86-FAST-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X86-FAST-NEXT: .LBB6_19: -; X86-FAST-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-FAST-NEXT: cmpl $64, %ebx -; X86-FAST-NEXT: jb .LBB6_21 -; X86-FAST-NEXT: # %bb.20: -; X86-FAST-NEXT: xorl %esi, %esi -; X86-FAST-NEXT: .LBB6_21: -; X86-FAST-NEXT: addb $-64, %ch -; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-FAST-NEXT: movb %ch, %cl -; X86-FAST-NEXT: shrl %cl, %eax -; X86-FAST-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-FAST-NEXT: testb $32, %ch -; X86-FAST-NEXT: movl $0, %eax -; X86-FAST-NEXT: jne .LBB6_23 -; X86-FAST-NEXT: # %bb.22: -; X86-FAST-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-FAST-NEXT: .LBB6_23: -; X86-FAST-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-FAST-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-FAST-NEXT: cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload -; X86-FAST-NEXT: jae .LBB6_25 -; X86-FAST-NEXT: # %bb.24: -; X86-FAST-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-FAST-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X86-FAST-NEXT: .LBB6_25: -; X86-FAST-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-FAST-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X86-FAST-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X86-FAST-NEXT: movb %ch, %cl -; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-FAST-NEXT: shrdl %cl, %eax, %ebp -; X86-FAST-NEXT: testb $32, %ch -; X86-FAST-NEXT: jne .LBB6_27 -; X86-FAST-NEXT: # %bb.26: -; X86-FAST-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-FAST-NEXT: .LBB6_27: -; X86-FAST-NEXT: cmpl $64, %ebx -; X86-FAST-NEXT: jb .LBB6_29 -; X86-FAST-NEXT: # %bb.28: -; X86-FAST-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X86-FAST-NEXT: .LBB6_29: -; X86-FAST-NEXT: cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload +; X86-FAST-NEXT: je .LBB6_4 +; X86-FAST-NEXT: jmp .LBB6_5 +; X86-FAST-NEXT: .LBB6_1: +; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %ebp +; X86-FAST-NEXT: movl %ebp, (%esp) # 4-byte Spill ; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %ebp -; X86-FAST-NEXT: jae .LBB6_31 -; X86-FAST-NEXT: # %bb.30: -; X86-FAST-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-FAST-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X86-FAST-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-FAST-NEXT: .LBB6_31: -; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-FAST-NEXT: shldl $1, %eax, %ebp -; X86-FAST-NEXT: movl %ebp, %eax -; X86-FAST-NEXT: movl %ebx, %ecx -; X86-FAST-NEXT: shldl %cl, %edi, %eax -; X86-FAST-NEXT: testb $32, %bl -; X86-FAST-NEXT: jne .LBB6_33 -; X86-FAST-NEXT: # %bb.32: -; X86-FAST-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-FAST-NEXT: .LBB6_33: -; X86-FAST-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-FAST-NEXT: movb %dh, %cl -; X86-FAST-NEXT: shldl %cl, %esi, %eax -; X86-FAST-NEXT: testb $32, %dh -; X86-FAST-NEXT: jne .LBB6_35 -; X86-FAST-NEXT: # %bb.34: -; X86-FAST-NEXT: movl %eax, (%esp) # 4-byte Spill -; X86-FAST-NEXT: .LBB6_35: -; X86-FAST-NEXT: movb $64, %cl -; X86-FAST-NEXT: subb %bl, %cl -; X86-FAST-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-FAST-NEXT: shrdl %cl, %eax, %esi -; X86-FAST-NEXT: shrl %cl, %eax ; X86-FAST-NEXT: testb $32, %cl -; X86-FAST-NEXT: je .LBB6_37 -; X86-FAST-NEXT: # %bb.36: -; X86-FAST-NEXT: movl %eax, %esi -; X86-FAST-NEXT: xorl %eax, %eax -; X86-FAST-NEXT: .LBB6_37: -; X86-FAST-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X86-FAST-NEXT: cmpl $64, %ebx -; X86-FAST-NEXT: jae .LBB6_39 -; X86-FAST-NEXT: # %bb.38: -; X86-FAST-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-FAST-NEXT: orl %eax, %ecx -; X86-FAST-NEXT: movl %ecx, (%esp) # 4-byte Spill -; X86-FAST-NEXT: .LBB6_39: -; X86-FAST-NEXT: cmpl $64, %ebx -; X86-FAST-NEXT: jae .LBB6_41 -; X86-FAST-NEXT: # %bb.40: -; X86-FAST-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-FAST-NEXT: orl %esi, %eax -; X86-FAST-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-FAST-NEXT: .LBB6_41: -; X86-FAST-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-FAST-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-FAST-NEXT: testl %ebx, %ebx -; X86-FAST-NEXT: je .LBB6_43 -; X86-FAST-NEXT: # %bb.42: +; X86-FAST-NEXT: jne .LBB6_5 +; X86-FAST-NEXT: .LBB6_4: +; X86-FAST-NEXT: movl %edx, %ebx +; X86-FAST-NEXT: movl %edi, %edx +; X86-FAST-NEXT: movl %esi, %edi +; X86-FAST-NEXT: movl %ebp, %esi ; X86-FAST-NEXT: movl (%esp), %ebp # 4-byte Reload -; X86-FAST-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X86-FAST-NEXT: .LBB6_43: -; X86-FAST-NEXT: orl %edx, %ebp -; X86-FAST-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X86-FAST-NEXT: cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload -; X86-FAST-NEXT: je .LBB6_45 -; X86-FAST-NEXT: # %bb.44: -; X86-FAST-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X86-FAST-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-FAST-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X86-FAST-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-FAST-NEXT: .LBB6_45: -; X86-FAST-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X86-FAST-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X86-FAST-NEXT: movl %ecx, 4(%eax) -; X86-FAST-NEXT: movl %esi, (%eax) -; X86-FAST-NEXT: movl %ebp, 12(%eax) +; X86-FAST-NEXT: .LBB6_5: +; X86-FAST-NEXT: shrdl %cl, %esi, %ebp +; X86-FAST-NEXT: shrdl %cl, %edi, %esi +; X86-FAST-NEXT: shrdl %cl, %edx, %edi +; X86-FAST-NEXT: # kill: def $cl killed $cl killed $ecx +; X86-FAST-NEXT: shrdl %cl, %ebx, %edx +; X86-FAST-NEXT: movl %edx, 12(%eax) ; X86-FAST-NEXT: movl %edi, 8(%eax) -; X86-FAST-NEXT: addl $76, %esp +; X86-FAST-NEXT: movl %esi, 4(%eax) +; X86-FAST-NEXT: movl %ebp, (%eax) +; X86-FAST-NEXT: addl $4, %esp ; X86-FAST-NEXT: popl %esi ; X86-FAST-NEXT: popl %edi ; X86-FAST-NEXT: popl %ebx @@ -556,281 +315,76 @@ define i128 @var_shift_i128(i128 %x, i128 %y, i128 %z) nounwind { ; X86-SLOW-NEXT: pushl %ebx ; X86-SLOW-NEXT: pushl %edi ; X86-SLOW-NEXT: pushl %esi -; X86-SLOW-NEXT: subl $72, %esp -; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-SLOW-NEXT: subl $8, %esp ; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-SLOW-NEXT: andl $127, %eax -; X86-SLOW-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-SLOW-NEXT: movl %eax, %edx -; X86-SLOW-NEXT: movl %eax, %ecx -; X86-SLOW-NEXT: shrl %cl, %esi -; X86-SLOW-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-SLOW-NEXT: leal (%edi,%edi), %ebp -; X86-SLOW-NEXT: notb %al -; X86-SLOW-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-SLOW-NEXT: movl %eax, %ecx -; X86-SLOW-NEXT: shll %cl, %ebp ; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-SLOW-NEXT: movl %edx, %ecx -; X86-SLOW-NEXT: shrl %cl, %edi -; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-SLOW-NEXT: leal (%esi,%esi), %ebx -; X86-SLOW-NEXT: movl %eax, %ecx -; X86-SLOW-NEXT: shll %cl, %ebx -; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-SLOW-NEXT: movl %edx, %ecx -; X86-SLOW-NEXT: shrl %cl, %eax -; X86-SLOW-NEXT: shrl %cl, %esi -; X86-SLOW-NEXT: testb $32, %dl -; X86-SLOW-NEXT: jne .LBB6_1 +; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %ebp +; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-SLOW-NEXT: testb $64, %cl +; X86-SLOW-NEXT: je .LBB6_1 ; X86-SLOW-NEXT: # %bb.2: -; X86-SLOW-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-SLOW-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-SLOW-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; X86-SLOW-NEXT: orl %edi, %ebx -; X86-SLOW-NEXT: movl %ebx, %esi -; X86-SLOW-NEXT: jmp .LBB6_3 -; X86-SLOW-NEXT: .LBB6_1: -; X86-SLOW-NEXT: movl %eax, %ebp -; X86-SLOW-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X86-SLOW-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X86-SLOW-NEXT: .LBB6_3: -; X86-SLOW-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-SLOW-NEXT: movl %ebx, %edx +; X86-SLOW-NEXT: movl %edi, %ebx ; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-SLOW-NEXT: movl %ebp, %eax +; X86-SLOW-NEXT: movl %esi, %ebp +; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-SLOW-NEXT: testb $32, %cl +; X86-SLOW-NEXT: jne .LBB6_5 +; X86-SLOW-NEXT: .LBB6_4: +; X86-SLOW-NEXT: movl %esi, %edi +; X86-SLOW-NEXT: movl %ebx, (%esp) # 4-byte Spill +; X86-SLOW-NEXT: movl %ebp, %esi +; X86-SLOW-NEXT: movl %edx, %ebp +; X86-SLOW-NEXT: movl %eax, %edx +; X86-SLOW-NEXT: jmp .LBB6_6 +; X86-SLOW-NEXT: .LBB6_1: ; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-SLOW-NEXT: cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload -; X86-SLOW-NEXT: jb .LBB6_5 -; X86-SLOW-NEXT: # %bb.4: -; X86-SLOW-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-SLOW-NEXT: testb $32, %cl +; X86-SLOW-NEXT: je .LBB6_4 ; X86-SLOW-NEXT: .LBB6_5: -; X86-SLOW-NEXT: leal (%ecx,%ecx), %esi -; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X86-SLOW-NEXT: notl %ebx -; X86-SLOW-NEXT: andl $127, %ebx -; X86-SLOW-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-SLOW-NEXT: movl %esi, (%esp) # 4-byte Spill +; X86-SLOW-NEXT: movl %ebx, %esi +; X86-SLOW-NEXT: .LBB6_6: +; X86-SLOW-NEXT: shrl %cl, %edx +; X86-SLOW-NEXT: movl %ecx, %ebx +; X86-SLOW-NEXT: notb %bl +; X86-SLOW-NEXT: leal (%ebp,%ebp), %eax ; X86-SLOW-NEXT: movl %ebx, %ecx -; X86-SLOW-NEXT: shll %cl, %esi -; X86-SLOW-NEXT: movl %eax, %ecx -; X86-SLOW-NEXT: shrl $31, %ecx -; X86-SLOW-NEXT: leal (%ecx,%edi,2), %ecx -; X86-SLOW-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-SLOW-NEXT: movl %ecx, %edi +; X86-SLOW-NEXT: shll %cl, %eax +; X86-SLOW-NEXT: orl %edx, %eax +; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-SLOW-NEXT: # kill: def $cl killed $cl killed $ecx +; X86-SLOW-NEXT: shrl %cl, %ebp +; X86-SLOW-NEXT: leal (%esi,%esi), %edx ; X86-SLOW-NEXT: movl %ebx, %ecx -; X86-SLOW-NEXT: shll %cl, %edi -; X86-SLOW-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-SLOW-NEXT: testb $32, %bl -; X86-SLOW-NEXT: movl $0, %edi -; X86-SLOW-NEXT: movl $0, %ecx -; X86-SLOW-NEXT: jne .LBB6_7 -; X86-SLOW-NEXT: # %bb.6: -; X86-SLOW-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X86-SLOW-NEXT: movl %esi, %ecx -; X86-SLOW-NEXT: .LBB6_7: -; X86-SLOW-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-SLOW-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-SLOW-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-SLOW-NEXT: movl %edi, %ecx -; X86-SLOW-NEXT: shrl $31, %ecx -; X86-SLOW-NEXT: leal (%ecx,%eax,2), %esi +; X86-SLOW-NEXT: shll %cl, %edx +; X86-SLOW-NEXT: orl %ebp, %edx +; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-SLOW-NEXT: # kill: def $cl killed $cl killed $ecx +; X86-SLOW-NEXT: shrl %cl, %esi ; X86-SLOW-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-SLOW-NEXT: movl (%esp), %esi # 4-byte Reload +; X86-SLOW-NEXT: leal (%esi,%esi), %ebp ; X86-SLOW-NEXT: movl %ebx, %ecx -; X86-SLOW-NEXT: shll %cl, %esi -; X86-SLOW-NEXT: andl $2147483647, %edi # imm = 0x7FFFFFFF -; X86-SLOW-NEXT: movl %ebx, %ecx -; X86-SLOW-NEXT: notb %cl -; X86-SLOW-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-SLOW-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X86-SLOW-NEXT: shrl %cl, %edi -; X86-SLOW-NEXT: testb $32, %bl -; X86-SLOW-NEXT: jne .LBB6_9 -; X86-SLOW-NEXT: # %bb.8: -; X86-SLOW-NEXT: orl %edi, %esi -; X86-SLOW-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-SLOW-NEXT: .LBB6_9: -; X86-SLOW-NEXT: movb %bl, %dh -; X86-SLOW-NEXT: addb $-64, %dh -; X86-SLOW-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X86-SLOW-NEXT: movb %dh, %cl -; X86-SLOW-NEXT: shll %cl, %esi -; X86-SLOW-NEXT: testb $32, %dh -; X86-SLOW-NEXT: movl $0, %ecx -; X86-SLOW-NEXT: jne .LBB6_11 -; X86-SLOW-NEXT: # %bb.10: -; X86-SLOW-NEXT: movl %esi, %ecx -; X86-SLOW-NEXT: .LBB6_11: -; X86-SLOW-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-SLOW-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-SLOW-NEXT: cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload -; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-SLOW-NEXT: jb .LBB6_13 -; X86-SLOW-NEXT: # %bb.12: -; X86-SLOW-NEXT: xorl %ebp, %ebp -; X86-SLOW-NEXT: .LBB6_13: -; X86-SLOW-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-SLOW-NEXT: movb $64, %ch -; X86-SLOW-NEXT: movb $64, %ah -; X86-SLOW-NEXT: subb %dl, %ah -; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %ebp -; X86-SLOW-NEXT: movb %ah, %cl -; X86-SLOW-NEXT: shll %cl, %ebp -; X86-SLOW-NEXT: notb %cl -; X86-SLOW-NEXT: movl %esi, %edi -; X86-SLOW-NEXT: shrl %edi -; X86-SLOW-NEXT: shrl %cl, %edi -; X86-SLOW-NEXT: movb %ah, %cl -; X86-SLOW-NEXT: shll %cl, %esi -; X86-SLOW-NEXT: testb $32, %ah -; X86-SLOW-NEXT: jne .LBB6_14 -; X86-SLOW-NEXT: # %bb.15: -; X86-SLOW-NEXT: orl %edi, %ebp -; X86-SLOW-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-SLOW-NEXT: movl %ebp, %esi -; X86-SLOW-NEXT: jmp .LBB6_16 -; X86-SLOW-NEXT: .LBB6_14: -; X86-SLOW-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X86-SLOW-NEXT: .LBB6_16: -; X86-SLOW-NEXT: addb $-64, %dl -; X86-SLOW-NEXT: movb %dl, %cl -; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-SLOW-NEXT: shrl %cl, %eax -; X86-SLOW-NEXT: notb %cl -; X86-SLOW-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload ; X86-SLOW-NEXT: shll %cl, %ebp -; X86-SLOW-NEXT: movb %dl, %cl -; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-SLOW-NEXT: shrl %cl, %edi -; X86-SLOW-NEXT: testb $32, %dl -; X86-SLOW-NEXT: jne .LBB6_17 -; X86-SLOW-NEXT: # %bb.18: -; X86-SLOW-NEXT: orl %eax, %ebp -; X86-SLOW-NEXT: cmpl $64, %ebx -; X86-SLOW-NEXT: jae .LBB6_20 -; X86-SLOW-NEXT: jmp .LBB6_21 -; X86-SLOW-NEXT: .LBB6_17: -; X86-SLOW-NEXT: movl %edi, %ebp -; X86-SLOW-NEXT: xorl %edi, %edi -; X86-SLOW-NEXT: cmpl $64, %ebx -; X86-SLOW-NEXT: jb .LBB6_21 -; X86-SLOW-NEXT: .LBB6_20: -; X86-SLOW-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X86-SLOW-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X86-SLOW-NEXT: .LBB6_21: -; X86-SLOW-NEXT: cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload -; X86-SLOW-NEXT: jae .LBB6_23 -; X86-SLOW-NEXT: # %bb.22: -; X86-SLOW-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X86-SLOW-NEXT: orl %esi, %edi -; X86-SLOW-NEXT: .LBB6_23: -; X86-SLOW-NEXT: cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload -; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-SLOW-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-SLOW-NEXT: jb .LBB6_24 -; X86-SLOW-NEXT: # %bb.25: -; X86-SLOW-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-SLOW-NEXT: jmp .LBB6_26 -; X86-SLOW-NEXT: .LBB6_24: -; X86-SLOW-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X86-SLOW-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X86-SLOW-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-SLOW-NEXT: .LBB6_26: -; X86-SLOW-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-SLOW-NEXT: shrl $31, %eax -; X86-SLOW-NEXT: leal (%eax,%esi,2), %esi -; X86-SLOW-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-SLOW-NEXT: movb %bl, %cl -; X86-SLOW-NEXT: shll %cl, %esi -; X86-SLOW-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X86-SLOW-NEXT: shrl %edi -; X86-SLOW-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload -; X86-SLOW-NEXT: shrl %cl, %edi -; X86-SLOW-NEXT: testb $32, %bl -; X86-SLOW-NEXT: jne .LBB6_28 -; X86-SLOW-NEXT: # %bb.27: -; X86-SLOW-NEXT: orl %edi, %esi -; X86-SLOW-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-SLOW-NEXT: .LBB6_28: -; X86-SLOW-NEXT: movl %ebp, %eax -; X86-SLOW-NEXT: movb %dh, %cl -; X86-SLOW-NEXT: shll %cl, %eax -; X86-SLOW-NEXT: notb %cl -; X86-SLOW-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X86-SLOW-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-SLOW-NEXT: # kill: def $cl killed $cl killed $ecx ; X86-SLOW-NEXT: shrl %cl, %esi -; X86-SLOW-NEXT: testb $32, %dh -; X86-SLOW-NEXT: jne .LBB6_30 -; X86-SLOW-NEXT: # %bb.29: -; X86-SLOW-NEXT: orl %esi, %eax -; X86-SLOW-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-SLOW-NEXT: .LBB6_30: -; X86-SLOW-NEXT: subb %bl, %ch -; X86-SLOW-NEXT: movl %ebp, %eax -; X86-SLOW-NEXT: movb %ch, %cl -; X86-SLOW-NEXT: shrl %cl, %eax -; X86-SLOW-NEXT: addl %ebp, %ebp -; X86-SLOW-NEXT: notb %cl -; X86-SLOW-NEXT: shll %cl, %ebp -; X86-SLOW-NEXT: movb %ch, %cl -; X86-SLOW-NEXT: movl %ebp, %esi -; X86-SLOW-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X86-SLOW-NEXT: shrl %cl, %ebp -; X86-SLOW-NEXT: testb $32, %ch +; X86-SLOW-NEXT: addl %edi, %edi +; X86-SLOW-NEXT: movl %ebx, %ecx +; X86-SLOW-NEXT: shll %cl, %edi +; X86-SLOW-NEXT: orl %esi, %edi ; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-SLOW-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X86-SLOW-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X86-SLOW-NEXT: jne .LBB6_31 -; X86-SLOW-NEXT: # %bb.32: -; X86-SLOW-NEXT: orl %ebp, %esi -; X86-SLOW-NEXT: movl %esi, %ebp -; X86-SLOW-NEXT: cmpl $64, %ebx -; X86-SLOW-NEXT: jb .LBB6_34 -; X86-SLOW-NEXT: jmp .LBB6_35 -; X86-SLOW-NEXT: .LBB6_31: -; X86-SLOW-NEXT: movl %eax, %ebp -; X86-SLOW-NEXT: xorl %eax, %eax -; X86-SLOW-NEXT: cmpl $64, %ebx -; X86-SLOW-NEXT: jae .LBB6_35 -; X86-SLOW-NEXT: .LBB6_34: -; X86-SLOW-NEXT: movl %ebp, %esi -; X86-SLOW-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X86-SLOW-NEXT: orl %eax, %ebp -; X86-SLOW-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-SLOW-NEXT: movl %esi, %ebp -; X86-SLOW-NEXT: .LBB6_35: -; X86-SLOW-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X86-SLOW-NEXT: cmpl $64, %ebx -; X86-SLOW-NEXT: jae .LBB6_37 -; X86-SLOW-NEXT: # %bb.36: -; X86-SLOW-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-SLOW-NEXT: orl %ebp, %eax -; X86-SLOW-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-SLOW-NEXT: .LBB6_37: -; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %ebp -; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-SLOW-NEXT: testl %ebx, %ebx -; X86-SLOW-NEXT: je .LBB6_39 -; X86-SLOW-NEXT: # %bb.38: -; X86-SLOW-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X86-SLOW-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X86-SLOW-NEXT: .LBB6_39: -; X86-SLOW-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X86-SLOW-NEXT: cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload -; X86-SLOW-NEXT: je .LBB6_41 -; X86-SLOW-NEXT: # %bb.40: -; X86-SLOW-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-SLOW-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; X86-SLOW-NEXT: .LBB6_41: -; X86-SLOW-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X86-SLOW-NEXT: orl %ecx, %ebx -; X86-SLOW-NEXT: orl %ebp, %edx -; X86-SLOW-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X86-SLOW-NEXT: movl %ebx, (%eax) -; X86-SLOW-NEXT: movl %esi, 12(%eax) -; X86-SLOW-NEXT: movl %edx, 4(%eax) -; X86-SLOW-NEXT: movl %edi, 8(%eax) -; X86-SLOW-NEXT: addl $72, %esp +; X86-SLOW-NEXT: movl %edi, 12(%ecx) +; X86-SLOW-NEXT: movl %ebp, 8(%ecx) +; X86-SLOW-NEXT: movl %edx, 4(%ecx) +; X86-SLOW-NEXT: movl %eax, (%ecx) +; X86-SLOW-NEXT: movl %ecx, %eax +; X86-SLOW-NEXT: addl $8, %esp ; X86-SLOW-NEXT: popl %esi ; X86-SLOW-NEXT: popl %edi ; X86-SLOW-NEXT: popl %ebx @@ -839,65 +393,37 @@ define i128 @var_shift_i128(i128 %x, i128 %y, i128 %z) nounwind { ; ; X64-FAST-LABEL: var_shift_i128: ; X64-FAST: # %bb.0: -; X64-FAST-NEXT: movq %r8, %r9 -; X64-FAST-NEXT: movq %rcx, %r8 -; X64-FAST-NEXT: movl %r9d, %ecx -; X64-FAST-NEXT: shrdq %cl, %r8, %rdx -; X64-FAST-NEXT: shrq %cl, %r8 -; X64-FAST-NEXT: xorl %eax, %eax -; X64-FAST-NEXT: testb $64, %r9b -; X64-FAST-NEXT: cmovneq %r8, %rdx -; X64-FAST-NEXT: cmovneq %rax, %r8 -; X64-FAST-NEXT: shldq $1, %rdi, %rsi -; X64-FAST-NEXT: addq %rdi, %rdi -; X64-FAST-NEXT: notb %r9b -; X64-FAST-NEXT: movl %r9d, %ecx -; X64-FAST-NEXT: shldq %cl, %rdi, %rsi -; X64-FAST-NEXT: shlq %cl, %rdi -; X64-FAST-NEXT: testb $64, %r9b -; X64-FAST-NEXT: cmovneq %rdi, %rsi -; X64-FAST-NEXT: cmoveq %rdi, %rax -; X64-FAST-NEXT: orq %rdx, %rax -; X64-FAST-NEXT: orq %rsi, %r8 -; X64-FAST-NEXT: movq %r8, %rdx +; X64-FAST-NEXT: movq %rdx, %rax +; X64-FAST-NEXT: testb $64, %r8b +; X64-FAST-NEXT: cmoveq %rdi, %rsi +; X64-FAST-NEXT: cmoveq %rcx, %rdi +; X64-FAST-NEXT: cmovneq %rcx, %rax +; X64-FAST-NEXT: movl %r8d, %ecx +; X64-FAST-NEXT: shrdq %cl, %rdi, %rax +; X64-FAST-NEXT: shrdq %cl, %rsi, %rdi +; X64-FAST-NEXT: movq %rdi, %rdx ; X64-FAST-NEXT: retq ; ; X64-SLOW-LABEL: var_shift_i128: ; X64-SLOW: # %bb.0: -; X64-SLOW-NEXT: movq %rcx, %r9 -; X64-SLOW-NEXT: movq %rdx, %r10 -; X64-SLOW-NEXT: movabsq $9223372036854775807, %rax # imm = 0x7FFFFFFFFFFFFFFF -; X64-SLOW-NEXT: andq %rdi, %rax -; X64-SLOW-NEXT: movl %r8d, %ecx -; X64-SLOW-NEXT: shrq %cl, %rax -; X64-SLOW-NEXT: movq %rdi, %rcx -; X64-SLOW-NEXT: shrq $63, %rcx -; X64-SLOW-NEXT: leaq (%rcx,%rsi,2), %rdx -; X64-SLOW-NEXT: movl %r8d, %r11d -; X64-SLOW-NEXT: notb %r11b -; X64-SLOW-NEXT: movl %r11d, %ecx -; X64-SLOW-NEXT: shlq %cl, %rdx -; X64-SLOW-NEXT: orq %rax, %rdx +; X64-SLOW-NEXT: testb $64, %r8b +; X64-SLOW-NEXT: cmoveq %rdi, %rsi +; X64-SLOW-NEXT: cmoveq %rcx, %rdi +; X64-SLOW-NEXT: cmovneq %rcx, %rdx ; X64-SLOW-NEXT: movl %r8d, %ecx -; X64-SLOW-NEXT: shrq %cl, %r10 -; X64-SLOW-NEXT: leaq (%r9,%r9), %rsi -; X64-SLOW-NEXT: movl %r11d, %ecx -; X64-SLOW-NEXT: shlq %cl, %rsi -; X64-SLOW-NEXT: orq %r10, %rsi +; X64-SLOW-NEXT: shrq %cl, %rdx +; X64-SLOW-NEXT: leaq (%rdi,%rdi), %rax +; X64-SLOW-NEXT: movl %r8d, %r9d +; X64-SLOW-NEXT: notb %r9b +; X64-SLOW-NEXT: movl %r9d, %ecx +; X64-SLOW-NEXT: shlq %cl, %rax +; X64-SLOW-NEXT: orq %rdx, %rax ; X64-SLOW-NEXT: movl %r8d, %ecx -; X64-SLOW-NEXT: shrq %cl, %r9 -; X64-SLOW-NEXT: xorl %eax, %eax -; X64-SLOW-NEXT: testb $64, %r8b -; X64-SLOW-NEXT: cmovneq %r9, %rsi -; X64-SLOW-NEXT: cmovneq %rax, %r9 -; X64-SLOW-NEXT: addq %rdi, %rdi -; X64-SLOW-NEXT: movl %r11d, %ecx -; X64-SLOW-NEXT: shlq %cl, %rdi -; X64-SLOW-NEXT: testb $64, %r11b -; X64-SLOW-NEXT: cmovneq %rdi, %rdx -; X64-SLOW-NEXT: cmoveq %rdi, %rax -; X64-SLOW-NEXT: orq %rsi, %rax -; X64-SLOW-NEXT: orq %r9, %rdx +; X64-SLOW-NEXT: shrq %cl, %rdi +; X64-SLOW-NEXT: leaq (%rsi,%rsi), %rdx +; X64-SLOW-NEXT: movl %r9d, %ecx +; X64-SLOW-NEXT: shlq %cl, %rdx +; X64-SLOW-NEXT: orq %rdi, %rdx ; X64-SLOW-NEXT: retq %tmp = tail call i128 @llvm.fshr.i128(i128 %x, i128 %y, i128 %z) ret i128 %tmp @@ -1004,9 +530,9 @@ define i32 @const_shift_i32(i32 %x, i32 %y) nounwind { define i64 @const_shift_i64(i64 %x, i64 %y) nounwind { ; X86-FAST-LABEL: const_shift_i64: ; X86-FAST: # %bb.0: +; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-FAST-NEXT: shldl $25, %ecx, %edx ; X86-FAST-NEXT: shrdl $7, %ecx, %eax ; X86-FAST-NEXT: retl diff --git a/llvm/test/CodeGen/X86/funnel-shift-rot.ll b/llvm/test/CodeGen/X86/funnel-shift-rot.ll index aaefb082cc8ca..a73ef92f9ff63 100644 --- a/llvm/test/CodeGen/X86/funnel-shift-rot.ll +++ b/llvm/test/CodeGen/X86/funnel-shift-rot.ll @@ -276,34 +276,19 @@ define i16 @rotr_i16(i16 %x, i16 %z) nounwind { define i64 @rotr_i64(i64 %x, i64 %z) nounwind { ; X32-SSE2-LABEL: rotr_i64: ; X32-SSE2: # %bb.0: -; X32-SSE2-NEXT: pushl %ebp -; X32-SSE2-NEXT: pushl %ebx -; X32-SSE2-NEXT: pushl %edi ; X32-SSE2-NEXT: pushl %esi -; X32-SSE2-NEXT: movb {{[0-9]+}}(%esp), %cl -; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edx -; X32-SSE2-NEXT: movl %edx, %esi -; X32-SSE2-NEXT: shrl %cl, %esi -; X32-SSE2-NEXT: movl %ebx, %edi -; X32-SSE2-NEXT: shrdl %cl, %edx, %edi -; X32-SSE2-NEXT: xorl %ebp, %ebp -; X32-SSE2-NEXT: testb $32, %cl -; X32-SSE2-NEXT: cmovnel %esi, %edi -; X32-SSE2-NEXT: cmovnel %ebp, %esi -; X32-SSE2-NEXT: negb %cl -; X32-SSE2-NEXT: movl %ebx, %eax -; X32-SSE2-NEXT: shll %cl, %eax -; X32-SSE2-NEXT: shldl %cl, %ebx, %edx +; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X32-SSE2-NEXT: testb $32, %cl -; X32-SSE2-NEXT: cmovnel %eax, %edx -; X32-SSE2-NEXT: cmovnel %ebp, %eax -; X32-SSE2-NEXT: orl %edi, %eax -; X32-SSE2-NEXT: orl %esi, %edx +; X32-SSE2-NEXT: movl %eax, %edx +; X32-SSE2-NEXT: cmovel %esi, %edx +; X32-SSE2-NEXT: cmovel %eax, %esi +; X32-SSE2-NEXT: movl %esi, %eax +; X32-SSE2-NEXT: shrdl %cl, %edx, %eax +; X32-SSE2-NEXT: # kill: def $cl killed $cl killed $ecx +; X32-SSE2-NEXT: shrdl %cl, %esi, %edx ; X32-SSE2-NEXT: popl %esi -; X32-SSE2-NEXT: popl %edi -; X32-SSE2-NEXT: popl %ebx -; X32-SSE2-NEXT: popl %ebp ; X32-SSE2-NEXT: retl ; ; X64-AVX2-LABEL: rotr_i64: diff --git a/llvm/test/CodeGen/X86/funnel-shift.ll b/llvm/test/CodeGen/X86/funnel-shift.ll index dc4c929ee835e..ef1761d39f9e1 100644 --- a/llvm/test/CodeGen/X86/funnel-shift.ll +++ b/llvm/test/CodeGen/X86/funnel-shift.ll @@ -40,38 +40,22 @@ define i32 @fshl_i32(i32 %x, i32 %y, i32 %z) nounwind { define i64 @fshl_i64(i64 %x, i64 %y, i64 %z) nounwind { ; X32-SSE2-LABEL: fshl_i64: ; X32-SSE2: # %bb.0: -; X32-SSE2-NEXT: pushl %ebp -; X32-SSE2-NEXT: pushl %ebx ; X32-SSE2-NEXT: pushl %edi ; X32-SSE2-NEXT: pushl %esi -; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edi -; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edx ; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %esi -; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X32-SSE2-NEXT: movb {{[0-9]+}}(%esp), %ch -; X32-SSE2-NEXT: movb %ch, %cl -; X32-SSE2-NEXT: notb %cl -; X32-SSE2-NEXT: shrdl $1, %ebx, %esi -; X32-SSE2-NEXT: shrl %ebx -; X32-SSE2-NEXT: shrdl %cl, %ebx, %esi -; X32-SSE2-NEXT: shrl %cl, %ebx -; X32-SSE2-NEXT: xorl %ebp, %ebp +; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X32-SSE2-NEXT: testb $32, %cl -; X32-SSE2-NEXT: cmovnel %ebx, %esi -; X32-SSE2-NEXT: cmovnel %ebp, %ebx +; X32-SSE2-NEXT: movl %edx, %edi +; X32-SSE2-NEXT: cmovnel %esi, %edi +; X32-SSE2-NEXT: cmovel {{[0-9]+}}(%esp), %edx +; X32-SSE2-NEXT: cmovnel {{[0-9]+}}(%esp), %esi ; X32-SSE2-NEXT: movl %edi, %eax -; X32-SSE2-NEXT: movb %ch, %cl -; X32-SSE2-NEXT: shll %cl, %eax +; X32-SSE2-NEXT: shldl %cl, %esi, %eax +; X32-SSE2-NEXT: # kill: def $cl killed $cl killed $ecx ; X32-SSE2-NEXT: shldl %cl, %edi, %edx -; X32-SSE2-NEXT: testb $32, %ch -; X32-SSE2-NEXT: cmovnel %eax, %edx -; X32-SSE2-NEXT: cmovnel %ebp, %eax -; X32-SSE2-NEXT: orl %esi, %eax -; X32-SSE2-NEXT: orl %ebx, %edx ; X32-SSE2-NEXT: popl %esi ; X32-SSE2-NEXT: popl %edi -; X32-SSE2-NEXT: popl %ebx -; X32-SSE2-NEXT: popl %ebp ; X32-SSE2-NEXT: retl ; ; X64-AVX2-LABEL: fshl_i64: @@ -92,169 +76,40 @@ define i128 @fshl_i128(i128 %x, i128 %y, i128 %z) nounwind { ; X32-SSE2-NEXT: pushl %ebx ; X32-SSE2-NEXT: pushl %edi ; X32-SSE2-NEXT: pushl %esi -; X32-SSE2-NEXT: subl $64, %esp ; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %esi -; X32-SSE2-NEXT: movl %esi, %edi -; X32-SSE2-NEXT: shldl $31, %ecx, %edi -; X32-SSE2-NEXT: notl %ebx -; X32-SSE2-NEXT: andl $127, %ebx -; X32-SSE2-NEXT: movb $64, %cl -; X32-SSE2-NEXT: subb %bl, %cl -; X32-SSE2-NEXT: shrl %esi -; X32-SSE2-NEXT: movl %esi, (%esp) # 4-byte Spill -; X32-SSE2-NEXT: shldl %cl, %edi, %esi -; X32-SSE2-NEXT: movl %edi, %ebp -; X32-SSE2-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-SSE2-NEXT: shll %cl, %ebp -; X32-SSE2-NEXT: xorl %eax, %eax -; X32-SSE2-NEXT: testb $32, %cl -; X32-SSE2-NEXT: cmovnel %ebp, %esi -; X32-SSE2-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-SSE2-NEXT: cmovnel %eax, %ebp -; X32-SSE2-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-SSE2-NEXT: andl $127, %eax -; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ebp -; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edx -; X32-SSE2-NEXT: movl %eax, %ecx -; X32-SSE2-NEXT: shldl %cl, %ebp, %edx -; X32-SSE2-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-SSE2-NEXT: movl %ebx, %ecx -; X32-SSE2-NEXT: addb $-64, %cl -; X32-SSE2-NEXT: movl (%esp), %esi # 4-byte Reload -; X32-SSE2-NEXT: shrdl %cl, %esi, %edi -; X32-SSE2-NEXT: shrl %cl, %esi -; X32-SSE2-NEXT: testb $32, %cl -; X32-SSE2-NEXT: cmovnel %esi, %edi -; X32-SSE2-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-SSE2-NEXT: movl $0, %ecx -; X32-SSE2-NEXT: cmovnel %ecx, %esi -; X32-SSE2-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edi ; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %esi -; X32-SSE2-NEXT: movl %eax, %ecx -; X32-SSE2-NEXT: shldl %cl, %edi, %esi -; X32-SSE2-NEXT: movl %edi, %edx -; X32-SSE2-NEXT: shll %cl, %edx -; X32-SSE2-NEXT: shll %cl, %ebp -; X32-SSE2-NEXT: testb $32, %al -; X32-SSE2-NEXT: movl %eax, %ecx -; X32-SSE2-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-SSE2-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-SSE2-NEXT: cmovnel %ebp, %eax -; X32-SSE2-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-SSE2-NEXT: cmovnel %edx, %esi -; X32-SSE2-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-SSE2-NEXT: movl $0, %eax -; X32-SSE2-NEXT: cmovnel %eax, %ebp -; X32-SSE2-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-SSE2-NEXT: cmovnel %eax, %edx -; X32-SSE2-NEXT: xorl %eax, %eax -; X32-SSE2-NEXT: cmpl $64, %ecx -; X32-SSE2-NEXT: cmovael %eax, %edx -; X32-SSE2-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ebp -; X32-SSE2-NEXT: shldl $31, %eax, %ebp -; X32-SSE2-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %esi -; X32-SSE2-NEXT: shrdl $1, %eax, %esi -; X32-SSE2-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-SSE2-NEXT: movl %ebx, %ecx -; X32-SSE2-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-SSE2-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-SSE2-NEXT: movl (%esp), %edx # 4-byte Reload -; X32-SSE2-NEXT: shrdl %cl, %edx, %eax -; X32-SSE2-NEXT: shrl %cl, %edx -; X32-SSE2-NEXT: movl %esi, %ebx -; X32-SSE2-NEXT: shrdl %cl, %ebp, %ebx -; X32-SSE2-NEXT: movl %ebp, %esi -; X32-SSE2-NEXT: shrl %cl, %esi -; X32-SSE2-NEXT: testb $32, %cl -; X32-SSE2-NEXT: cmovnel %esi, %ebx -; X32-SSE2-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-SSE2-NEXT: movl %edx, %ecx -; X32-SSE2-NEXT: cmovnel %edx, %eax -; X32-SSE2-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-SSE2-NEXT: movl $0, %eax -; X32-SSE2-NEXT: cmovnel %eax, %esi -; X32-SSE2-NEXT: cmovnel %eax, %ecx -; X32-SSE2-NEXT: movl %ecx, (%esp) # 4-byte Spill -; X32-SSE2-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-SSE2-NEXT: cmpl $64, %ebx -; X32-SSE2-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-SSE2-NEXT: cmovael %eax, %ecx -; X32-SSE2-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-SSE2-NEXT: xorl %ebp, %ebp -; X32-SSE2-NEXT: movb $64, %ch -; X32-SSE2-NEXT: subb %bl, %ch ; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edx -; X32-SSE2-NEXT: movb %ch, %cl -; X32-SSE2-NEXT: shrl %cl, %edx -; X32-SSE2-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-SSE2-NEXT: testb $32, %ch -; X32-SSE2-NEXT: cmovnel %ebp, %edx -; X32-SSE2-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-SSE2-NEXT: movb %bl, %cl -; X32-SSE2-NEXT: addb $-64, %cl -; X32-SSE2-NEXT: movl %edi, %ebp -; X32-SSE2-NEXT: shll %cl, %ebp -; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-SSE2-NEXT: shldl %cl, %edi, %eax -; X32-SSE2-NEXT: testb $32, %cl -; X32-SSE2-NEXT: cmovnel %ebp, %eax -; X32-SSE2-NEXT: cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload -; X32-SSE2-NEXT: movl (%esp), %ebx # 4-byte Reload -; X32-SSE2-NEXT: movl $0, %edi -; X32-SSE2-NEXT: cmovael %edi, %ebx -; X32-SSE2-NEXT: movl %ebx, (%esp) # 4-byte Spill -; X32-SSE2-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-SSE2-NEXT: cmpl $64, %ebx -; X32-SSE2-NEXT: cmovbl %edx, %eax -; X32-SSE2-NEXT: testb $32, %cl -; X32-SSE2-NEXT: movl $0, %edi +; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-SSE2-NEXT: testb $64, %cl +; X32-SSE2-NEXT: movl %esi, %eax +; X32-SSE2-NEXT: cmovnel %ebx, %eax +; X32-SSE2-NEXT: movl %edx, %ebp ; X32-SSE2-NEXT: cmovnel %edi, %ebp -; X32-SSE2-NEXT: cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload -; X32-SSE2-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-SSE2-NEXT: cmovael %edi, %edx -; X32-SSE2-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-SSE2-NEXT: movb %ch, %cl -; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edx -; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edi -; X32-SSE2-NEXT: shrdl %cl, %edx, %edi -; X32-SSE2-NEXT: testb $32, %ch -; X32-SSE2-NEXT: cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-SSE2-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-SSE2-NEXT: cmpl $64, %ebx -; X32-SSE2-NEXT: cmovael %ebp, %edi -; X32-SSE2-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-SSE2-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-SSE2-NEXT: cmpl $64, %edx -; X32-SSE2-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-SSE2-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-SSE2-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-SSE2-NEXT: cmpl $64, %edx -; X32-SSE2-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-SSE2-NEXT: testl %edx, %edx -; X32-SSE2-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-SSE2-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-SSE2-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-SSE2-NEXT: movl %ecx, %edx -; X32-SSE2-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-SSE2-NEXT: testl %ebx, %ebx -; X32-SSE2-NEXT: cmovel {{[0-9]+}}(%esp), %eax -; X32-SSE2-NEXT: cmovel {{[0-9]+}}(%esp), %edi -; X32-SSE2-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-SSE2-NEXT: orl (%esp), %eax # 4-byte Folded Reload +; X32-SSE2-NEXT: cmovnel {{[0-9]+}}(%esp), %edi +; X32-SSE2-NEXT: cmovnel {{[0-9]+}}(%esp), %ebx +; X32-SSE2-NEXT: cmovel {{[0-9]+}}(%esp), %edx +; X32-SSE2-NEXT: cmovel {{[0-9]+}}(%esp), %esi +; X32-SSE2-NEXT: testb $32, %cl +; X32-SSE2-NEXT: cmovnel %esi, %edx +; X32-SSE2-NEXT: cmovnel %ebp, %esi +; X32-SSE2-NEXT: cmovnel %eax, %ebp +; X32-SSE2-NEXT: cmovel %edi, %ebx +; X32-SSE2-NEXT: cmovel %eax, %edi +; X32-SSE2-NEXT: movl %edi, %eax +; X32-SSE2-NEXT: shldl %cl, %ebx, %eax +; X32-SSE2-NEXT: movl %ebp, %ebx +; X32-SSE2-NEXT: shldl %cl, %edi, %ebx +; X32-SSE2-NEXT: movl %esi, %edi +; X32-SSE2-NEXT: shldl %cl, %ebp, %edi +; X32-SSE2-NEXT: # kill: def $cl killed $cl killed $ecx +; X32-SSE2-NEXT: shldl %cl, %esi, %edx ; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-SSE2-NEXT: movl %eax, 12(%ecx) +; X32-SSE2-NEXT: movl %edx, 12(%ecx) ; X32-SSE2-NEXT: movl %edi, 8(%ecx) -; X32-SSE2-NEXT: movl %esi, 4(%ecx) -; X32-SSE2-NEXT: movl %edx, (%ecx) +; X32-SSE2-NEXT: movl %ebx, 4(%ecx) +; X32-SSE2-NEXT: movl %eax, (%ecx) ; X32-SSE2-NEXT: movl %ecx, %eax -; X32-SSE2-NEXT: addl $64, %esp ; X32-SSE2-NEXT: popl %esi ; X32-SSE2-NEXT: popl %edi ; X32-SSE2-NEXT: popl %ebx @@ -263,27 +118,15 @@ define i128 @fshl_i128(i128 %x, i128 %y, i128 %z) nounwind { ; ; X64-AVX2-LABEL: fshl_i128: ; X64-AVX2: # %bb.0: -; X64-AVX2-NEXT: movq %r8, %r9 -; X64-AVX2-NEXT: movq %rcx, %r8 -; X64-AVX2-NEXT: movl %r9d, %ecx -; X64-AVX2-NEXT: shldq %cl, %rdi, %rsi -; X64-AVX2-NEXT: shrdq $1, %r8, %rdx -; X64-AVX2-NEXT: shrq %r8 -; X64-AVX2-NEXT: notb %cl -; X64-AVX2-NEXT: shrdq %cl, %r8, %rdx -; X64-AVX2-NEXT: shrq %cl, %r8 -; X64-AVX2-NEXT: xorl %eax, %eax -; X64-AVX2-NEXT: testb $64, %cl -; X64-AVX2-NEXT: cmovneq %r8, %rdx -; X64-AVX2-NEXT: cmovneq %rax, %r8 -; X64-AVX2-NEXT: movl %r9d, %ecx -; X64-AVX2-NEXT: shlq %cl, %rdi -; X64-AVX2-NEXT: testb $64, %r9b +; X64-AVX2-NEXT: testb $64, %r8b ; X64-AVX2-NEXT: cmovneq %rdi, %rsi -; X64-AVX2-NEXT: cmoveq %rdi, %rax -; X64-AVX2-NEXT: orq %rdx, %rax -; X64-AVX2-NEXT: orq %rsi, %r8 -; X64-AVX2-NEXT: movq %r8, %rdx +; X64-AVX2-NEXT: cmoveq %rcx, %rdx +; X64-AVX2-NEXT: cmovneq %rcx, %rdi +; X64-AVX2-NEXT: movq %rdi, %rax +; X64-AVX2-NEXT: movl %r8d, %ecx +; X64-AVX2-NEXT: shldq %cl, %rdx, %rax +; X64-AVX2-NEXT: shldq %cl, %rdi, %rsi +; X64-AVX2-NEXT: movq %rsi, %rdx ; X64-AVX2-NEXT: retq %f = call i128 @llvm.fshl.i128(i128 %x, i128 %y, i128 %z) ret i128 %f @@ -294,7 +137,6 @@ declare i37 @llvm.fshl.i37(i37, i37, i37) define i37 @fshl_i37(i37 %x, i37 %y, i37 %z) nounwind { ; X32-SSE2-LABEL: fshl_i37: ; X32-SSE2: # %bb.0: -; X32-SSE2-NEXT: pushl %ebp ; X32-SSE2-NEXT: pushl %ebx ; X32-SSE2-NEXT: pushl %edi ; X32-SSE2-NEXT: pushl %esi @@ -302,40 +144,31 @@ define i37 @fshl_i37(i37 %x, i37 %y, i37 %z) nounwind { ; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ebx ; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edi ; X32-SSE2-NEXT: shldl $27, %ebx, %edi -; X32-SSE2-NEXT: shll $27, %ebx -; X32-SSE2-NEXT: shrdl $1, %edi, %ebx -; X32-SSE2-NEXT: shrl %edi ; X32-SSE2-NEXT: pushl $0 ; X32-SSE2-NEXT: pushl $37 ; X32-SSE2-NEXT: pushl {{[0-9]+}}(%esp) ; X32-SSE2-NEXT: pushl {{[0-9]+}}(%esp) ; X32-SSE2-NEXT: calll __umoddi3 ; X32-SSE2-NEXT: addl $16, %esp -; X32-SSE2-NEXT: movl %eax, %edx -; X32-SSE2-NEXT: movl %edx, %ecx -; X32-SSE2-NEXT: notb %cl -; X32-SSE2-NEXT: shrdl %cl, %edi, %ebx -; X32-SSE2-NEXT: shrl %cl, %edi -; X32-SSE2-NEXT: xorl %eax, %eax +; X32-SSE2-NEXT: movl %eax, %ecx ; X32-SSE2-NEXT: testb $32, %cl -; X32-SSE2-NEXT: cmovnel %edi, %ebx -; X32-SSE2-NEXT: cmovnel %eax, %edi -; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-SSE2-NEXT: movl %edx, %ecx -; X32-SSE2-NEXT: shll %cl, %eax -; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ebp -; X32-SSE2-NEXT: shldl %cl, %ebp, %esi -; X32-SSE2-NEXT: testb $32, %dl -; X32-SSE2-NEXT: cmovnel %eax, %esi -; X32-SSE2-NEXT: movl $0, %ecx -; X32-SSE2-NEXT: cmovnel %ecx, %eax -; X32-SSE2-NEXT: orl %ebx, %eax -; X32-SSE2-NEXT: orl %edi, %esi +; X32-SSE2-NEXT: jne .LBB3_1 +; X32-SSE2-NEXT: # %bb.2: +; X32-SSE2-NEXT: movl %edi, %ebx +; X32-SSE2-NEXT: movl %esi, %edi +; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %esi +; X32-SSE2-NEXT: jmp .LBB3_3 +; X32-SSE2-NEXT: .LBB3_1: +; X32-SSE2-NEXT: shll $27, %ebx +; X32-SSE2-NEXT: .LBB3_3: +; X32-SSE2-NEXT: movl %edi, %eax +; X32-SSE2-NEXT: shldl %cl, %ebx, %eax +; X32-SSE2-NEXT: # kill: def $cl killed $cl killed $ecx +; X32-SSE2-NEXT: shldl %cl, %edi, %esi ; X32-SSE2-NEXT: movl %esi, %edx ; X32-SSE2-NEXT: popl %esi ; X32-SSE2-NEXT: popl %edi ; X32-SSE2-NEXT: popl %ebx -; X32-SSE2-NEXT: popl %ebp ; X32-SSE2-NEXT: retl ; ; X64-AVX2-LABEL: fshl_i37: @@ -468,51 +301,39 @@ declare i37 @llvm.fshr.i37(i37, i37, i37) define i37 @fshr_i37(i37 %x, i37 %y, i37 %z) nounwind { ; X32-SSE2-LABEL: fshr_i37: ; X32-SSE2: # %bb.0: -; X32-SSE2-NEXT: pushl %ebp ; X32-SSE2-NEXT: pushl %ebx ; X32-SSE2-NEXT: pushl %edi ; X32-SSE2-NEXT: pushl %esi -; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ebp -; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ebx ; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edi +; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ebx ; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %esi -; X32-SSE2-NEXT: shldl $1, %edi, %esi -; X32-SSE2-NEXT: addl %edi, %edi +; X32-SSE2-NEXT: shldl $27, %ebx, %esi ; X32-SSE2-NEXT: pushl $0 ; X32-SSE2-NEXT: pushl $37 ; X32-SSE2-NEXT: pushl {{[0-9]+}}(%esp) ; X32-SSE2-NEXT: pushl {{[0-9]+}}(%esp) ; X32-SSE2-NEXT: calll __umoddi3 ; X32-SSE2-NEXT: addl $16, %esp -; X32-SSE2-NEXT: movl %eax, %edx -; X32-SSE2-NEXT: addb $27, %dl -; X32-SSE2-NEXT: movl %edx, %eax -; X32-SSE2-NEXT: notb %al -; X32-SSE2-NEXT: movl %eax, %ecx -; X32-SSE2-NEXT: shldl %cl, %edi, %esi -; X32-SSE2-NEXT: shldl $27, %ebp, %ebx -; X32-SSE2-NEXT: shll $27, %ebp -; X32-SSE2-NEXT: movl %edx, %ecx -; X32-SSE2-NEXT: shrdl %cl, %ebx, %ebp -; X32-SSE2-NEXT: shrl %cl, %ebx -; X32-SSE2-NEXT: xorl %ecx, %ecx -; X32-SSE2-NEXT: testb $32, %dl -; X32-SSE2-NEXT: cmovnel %ebx, %ebp -; X32-SSE2-NEXT: cmovnel %ecx, %ebx -; X32-SSE2-NEXT: xorl %edx, %edx ; X32-SSE2-NEXT: movl %eax, %ecx -; X32-SSE2-NEXT: shll %cl, %edi -; X32-SSE2-NEXT: testb $32, %al -; X32-SSE2-NEXT: cmovnel %edi, %esi -; X32-SSE2-NEXT: cmovnel %edx, %edi -; X32-SSE2-NEXT: orl %ebp, %edi -; X32-SSE2-NEXT: orl %ebx, %esi -; X32-SSE2-NEXT: movl %edi, %eax +; X32-SSE2-NEXT: addl $27, %ecx +; X32-SSE2-NEXT: testb $32, %cl +; X32-SSE2-NEXT: je .LBB10_1 +; X32-SSE2-NEXT: # %bb.2: +; X32-SSE2-NEXT: movl %edi, %edx +; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edi +; X32-SSE2-NEXT: jmp .LBB10_3 +; X32-SSE2-NEXT: .LBB10_1: +; X32-SSE2-NEXT: shll $27, %ebx ; X32-SSE2-NEXT: movl %esi, %edx +; X32-SSE2-NEXT: movl %ebx, %esi +; X32-SSE2-NEXT: .LBB10_3: +; X32-SSE2-NEXT: shrdl %cl, %edx, %esi +; X32-SSE2-NEXT: # kill: def $cl killed $cl killed $ecx +; X32-SSE2-NEXT: shrdl %cl, %edi, %edx +; X32-SSE2-NEXT: movl %esi, %eax ; X32-SSE2-NEXT: popl %esi ; X32-SSE2-NEXT: popl %edi ; X32-SSE2-NEXT: popl %ebx -; X32-SSE2-NEXT: popl %ebp ; X32-SSE2-NEXT: retl ; ; X64-AVX2-LABEL: fshr_i37: @@ -1070,9 +891,9 @@ define i32 @fshr_i32_const_overshift(i32 %x, i32 %y) nounwind { define i64 @fshr_i64_const_overshift(i64 %x, i64 %y) nounwind { ; X32-SSE2-LABEL: fshr_i64_const_overshift: ; X32-SSE2: # %bb.0: -; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-SSE2-NEXT: shrdl $9, %ecx, %eax ; X32-SSE2-NEXT: shldl $23, %ecx, %edx ; X32-SSE2-NEXT: retl diff --git a/llvm/test/CodeGen/X86/ifunc-asm.ll b/llvm/test/CodeGen/X86/ifunc-asm.ll index c1604882f3c2a..a37f4263fb504 100644 --- a/llvm/test/CodeGen/X86/ifunc-asm.ll +++ b/llvm/test/CodeGen/X86/ifunc-asm.ll @@ -2,13 +2,13 @@ target triple = "x86_64-unknown-linux-gnu" -define internal i64 @foo_ifunc() { +define internal i32 (i32)* @foo_ifunc() { entry: - ret i64 0 + ret i32 (i32)* null } ; CHECK: .type foo_ifunc,@function ; CHECK-NEXT: foo_ifunc: -@foo = ifunc i32 (i32), i64 ()* @foo_ifunc +@foo = ifunc i32 (i32), i32 (i32)* ()* @foo_ifunc ; CHECK: .type foo,@gnu_indirect_function ; CHECK-NEXT: .set foo, foo_ifunc diff --git a/llvm/test/CodeGen/X86/implicit-null-checks.mir b/llvm/test/CodeGen/X86/implicit-null-checks.mir index 5ba9caff35c63..d5a5b256a5067 100644 --- a/llvm/test/CodeGen/X86/implicit-null-checks.mir +++ b/llvm/test/CodeGen/X86/implicit-null-checks.mir @@ -427,15 +427,15 @@ body: | bb.2.ret_200: $eax = MOV32ri 200 - RETQ $eax + RET64 $eax bb.3.is_null: $eax = MOV32ri 42 - RETQ $eax + RET64 $eax bb.4.ret_100: $eax = MOV32ri 100 - RETQ $eax + RET64 $eax ... --- @@ -474,11 +474,11 @@ body: | bb.3.is_null: liveins: $eax, $ah, $al, $ax, $bh, $bl, $bp, $bpl, $bx, $eax, $ebp, $ebx, $rax, $rbp, $rbx, $r12, $r13, $r14, $r15, $r12b, $r13b, $r14b, $r15b, $r12d, $r13d, $r14d, $r15d, $r12w, $r13w, $r14w, $r15w - RETQ $eax + RET64 $eax bb.4.ret_100: $eax = MOV32ri 100 - RETQ $eax + RET64 $eax ... --- @@ -511,15 +511,15 @@ body: | bb.2.ret_200: $eax = MOV32ri 200 - RETQ $eax + RET64 $eax bb.3.is_null: $eax = MOV32ri 42 - RETQ $eax + RET64 $eax bb.4.ret_100: $eax = MOV32ri 100 - RETQ $eax + RET64 $eax ... --- @@ -551,15 +551,15 @@ body: | bb.2.ret_200: $eax = MOV32ri 200 - RETQ $eax + RET64 $eax bb.3.is_null: $eax = MOV32ri 42 - RETQ $eax + RET64 $eax bb.4.ret_100: $eax = MOV32ri 100 - RETQ $eax + RET64 $eax ... --- @@ -592,15 +592,15 @@ body: | bb.2.ret_200: $eax = MOV32ri 200 - RETQ $eax + RET64 $eax bb.3.is_null: $eax = MOV32ri 42 - RETQ $eax + RET64 $eax bb.4.ret_100: $eax = MOV32ri 100 - RETQ $eax + RET64 $eax ... --- @@ -635,12 +635,12 @@ body: | CALL64pcrel32 @f, csr_64, implicit $rsp, implicit-def $rsp $eax = MOV32rm killed $rbx, 1, $noreg, 0, $noreg :: (load (s32) from %ir.ptr) $rbx = POP64r implicit-def $rsp, implicit $rsp - RETQ $eax + RET64 $eax bb.2.leave: $eax = XOR32rr undef $eax, undef $eax, implicit-def dead $eflags $rbx = POP64r implicit-def $rsp, implicit $rsp - RETQ $eax + RET64 $eax ... --- @@ -673,11 +673,11 @@ body: | $esi = MOV32ri 3076 $eax = BEXTR32rm killed $rdi, 1, $noreg, 0, $noreg, killed $esi, implicit-def dead $eflags :: (load (s32) from %ir.ptr) $eax = ADD32rm killed $eax, killed $rcx, 1, $noreg, 0, $noreg, implicit-def dead $eflags :: (load (s32) from %ir.val) - RETQ $eax + RET64 $eax bb.2.is_null: $eax = XOR32rr undef $eax, undef $eax, implicit-def dead $eflags - RETQ $eax + RET64 $eax ... --- @@ -706,11 +706,11 @@ body: | $rcx = MOV64rm killed $rsi, 1, $noreg, 0, $noreg $rcx = AND64rm killed $rcx, $rdi, 1, $noreg, 0, $noreg, implicit-def dead $eflags $rax = MOV64rm killed $rdi, 1, $noreg, 0, $noreg - RETQ $eax + RET64 $eax bb.2.is_null: $eax = XOR32rr undef $eax, undef $eax, implicit-def dead $eflags - RETQ $eax + RET64 $eax ... --- @@ -737,11 +737,11 @@ body: | $rsi = ADD64rr $rsi, $rdi, implicit-def dead $eflags $eax = MOV32rm killed $rdi, 1, $noreg, 0, $noreg, implicit-def $rax :: (load (s32) from %ir.x) $eax = LEA64_32r killed $rax, 1, killed $rsi, 4, $noreg - RETQ $eax + RET64 $eax bb.1.is_null: $eax = MOV32ri 42 - RETQ $eax + RET64 $eax ... --- @@ -769,11 +769,11 @@ body: | $rsi = ADD64rr $rsi, $rdi, implicit-def dead $eflags $esi = AND32rm killed $esi, $rdi, 1, $noreg, 0, $noreg, implicit-def dead $eflags $eax = MOV32rr $esi - RETQ $eax + RET64 $eax bb.1.is_null: $eax = MOV32ri 42 - RETQ $eax + RET64 $eax ... --- @@ -800,10 +800,10 @@ body: | liveins: $rdi, $rsi MOV64mr killed $rdi, 1, $noreg, 0, $noreg, killed $rsi - RETQ + RET64 bb.2.is_null: - RETQ + RET64 ... --- @@ -830,10 +830,10 @@ body: | liveins: $rdi, $rsi MOV64mr killed $rdi, 1, $noreg, 16, $noreg, killed $rsi - RETQ + RET64 bb.2.is_null: - RETQ + RET64 ... --- @@ -863,10 +863,10 @@ body: | $esi = ADD32rr killed $esi, killed $esi, implicit-def dead $eflags MOV32mr killed $rdi, 1, $noreg, 16, $noreg, killed $esi - RETQ + RET64 bb.2.is_null: - RETQ + RET64 ... --- @@ -895,13 +895,13 @@ body: | $esi = ADD32rr $esi, $esi, implicit-def dead $eflags MOV32mr killed $rdi, 1, $noreg, 0, $noreg, $esi $eax = MOV32rr killed $esi - RETQ $eax + RET64 $eax bb.2.is_null: liveins: $rsi $eax = MOV32rr killed $esi - RETQ $eax + RET64 $eax ... --- @@ -928,10 +928,10 @@ body: | liveins: $rdi, $rsi MOV32mr killed $rdi, 1, $noreg, 0, $noreg, killed $esi :: (volatile store (s32) into %ir.ptr) - RETQ + RET64 bb.2.is_null: - RETQ + RET64 ... --- @@ -960,10 +960,10 @@ body: | $esi = ADD32rr killed $esi, killed $esi, implicit-def dead $eflags $esi = ADD32ri killed $esi, 15, implicit-def dead $eflags MOV32mr killed $rdi, 1, $noreg, 16, $noreg, killed $esi - RETQ + RET64 bb.2.is_null: - RETQ + RET64 ... --- @@ -991,10 +991,10 @@ body: | $rdi = ADD64rr killed $rdi, killed $rdi, implicit-def dead $eflags MOV32mr killed $rdi, 1, $noreg, 16, $noreg, killed $esi - RETQ + RET64 bb.2.is_null: - RETQ + RET64 ... --- @@ -1022,11 +1022,11 @@ body: | $rax = MOV64rr $rdi MOV32mr killed $rdi, 1, $noreg, 16, $noreg, killed $esi - RETQ $eax + RET64 $eax bb.2.is_null: $rax = XOR64rr undef $rax, undef $rax, implicit-def dead $eflags - RETQ $eax + RET64 $eax ... --- @@ -1063,12 +1063,12 @@ body: | MOV32mi $rbx, 1, $noreg, 0, $noreg, 20 $rax = MOV64rr killed $rbx $rbx = POP64r implicit-def $rsp, implicit $rsp - RETQ $eax + RET64 $eax bb.2.is_null: $eax = XOR32rr undef $eax, undef $eax, implicit-def dead $eflags $rbx = POP64r implicit-def $rsp, implicit $rsp - RETQ $eax + RET64 $eax ... --- @@ -1097,11 +1097,11 @@ body: | $eax = MOV32rr $esi $esi = ADD32ri killed $esi, 15, implicit-def dead $eflags MOV32mr killed $rdi, 1, $noreg, 0, $noreg, killed $esi - RETQ $eax + RET64 $eax bb.2.is_null: $eax = XOR32rr undef $eax, undef $eax, implicit-def dead $eflags - RETQ $eax + RET64 $eax ... --- @@ -1129,11 +1129,11 @@ body: | MOV32mi killed $rsi, 1, $noreg, 0, $noreg, 2 $eax = MOV32rm killed $rdi, 1, $noreg, 0, $noreg - RETQ $eax + RET64 $eax bb.2.is_null: $eax = XOR32rr undef $eax, undef $eax, implicit-def dead $eflags - RETQ $eax + RET64 $eax ... --- @@ -1161,11 +1161,11 @@ body: | $eax = MOV32rm killed $rsi, 1, $noreg, 0, $noreg MOV32mi killed $rdi, 1, $noreg, 0, $noreg, 2 - RETQ $eax + RET64 $eax bb.2.is_null: $eax = XOR32rr undef $eax, undef $eax, implicit-def dead $eflags - RETQ $eax + RET64 $eax ... --- @@ -1193,10 +1193,10 @@ body: | MOV32mi killed $rsi, 1, $noreg, 0, $noreg, 3 MOV32mi killed $rdi, 1, $noreg, 0, $noreg, 2 - RETQ + RET64 bb.2.is_null: - RETQ + RET64 ... --- @@ -1224,10 +1224,10 @@ body: | $esi = ADD32rr $esi, $esi, implicit-def dead $eflags ADD32mr killed $rdi, 1, $noreg, 0, $noreg, killed $esi, implicit-def dead $eflags - RETQ + RET64 bb.2.is_null: - RETQ + RET64 ... --- @@ -1255,11 +1255,11 @@ body: | MOV32mi killed $rsi, 1, $noreg, 0, $noreg, 3 :: (store (s32) into %ir.ptr2) $eax = MOV32rm killed $rdi, 1, $noreg, 0, $noreg :: (load (s32) from %ir.ptr) - RETQ $eax + RET64 $eax bb.2.is_null: $eax = XOR32rr undef $eax, undef $eax, implicit-def dead $eflags - RETQ $eax + RET64 $eax ... --- @@ -1287,11 +1287,11 @@ body: | MOV32mi killed $rsi, 1, $noreg, 0, $noreg, 3 :: (store (s32) into %ir.ptr2) $eax = MOV32rm killed $rdi, 1, $noreg, 0, $noreg :: (load (s32) from %ir.ptr) - RETQ $eax + RET64 $eax bb.2.is_null: $eax = XOR32rr undef $eax, undef $eax, implicit-def dead $eflags - RETQ $eax + RET64 $eax ... --- @@ -1325,11 +1325,11 @@ body: | MOV64mr $rsp, 1, $noreg, 0, $noreg, $rdi :: (store (s64) into %stack.0) $edi = MOV32rm $rdi, 1, $noreg, 8, $noreg :: (load (s32) from %ir.ptr) $eax = MOV32rr $edi - RETQ $eax + RET64 $eax bb.2.is_null: $eax = XOR32rr undef $eax, undef $eax, implicit-def dead $eflags - RETQ $eax + RET64 $eax ... --- @@ -1354,10 +1354,10 @@ body: | $rcx = MOV64ri -9223372036854775808 $eax = MOV32rm killed $rdi, 2, $rcx, 0, $noreg, implicit-def $rax - RETQ $eax + RET64 $eax bb.1.is_null: $eax = MOV32ri 42 - RETQ $eax + RET64 $eax ... diff --git a/llvm/test/CodeGen/X86/implicit-null-chk-reg-rewrite.mir b/llvm/test/CodeGen/X86/implicit-null-chk-reg-rewrite.mir index 6e83b207c2c99..6330e8c2564d8 100644 --- a/llvm/test/CodeGen/X86/implicit-null-chk-reg-rewrite.mir +++ b/llvm/test/CodeGen/X86/implicit-null-chk-reg-rewrite.mir @@ -41,9 +41,9 @@ body: | liveins: $rdi, $rsi $rax = MOV64rm renamable $rdi, 1, $noreg, 4, $noreg - RETQ $eax + RET64 $eax bb.2.is_null: $eax = MOV32ri 200 - RETQ $eax + RET64 $eax ... diff --git a/llvm/test/CodeGen/X86/inline-asm-A-constraint.ll b/llvm/test/CodeGen/X86/inline-asm-A-constraint.ll index d8e44470fc62e..f07a13cf54ae8 100644 --- a/llvm/test/CodeGen/X86/inline-asm-A-constraint.ll +++ b/llvm/test/CodeGen/X86/inline-asm-A-constraint.ll @@ -1,4 +1,5 @@ ; RUN: llc -mtriple=x86_64-- < %s | FileCheck %s +; RUN: llc -mtriple=x86_64-- -early-live-intervals < %s | FileCheck %s target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64--" diff --git a/llvm/test/CodeGen/X86/instr-symbols.mir b/llvm/test/CodeGen/X86/instr-symbols.mir index eb1f1f1d1bd07..a900288d70869 100644 --- a/llvm/test/CodeGen/X86/instr-symbols.mir +++ b/llvm/test/CodeGen/X86/instr-symbols.mir @@ -69,6 +69,6 @@ body: | %7:gr64 = ADD64rr killed %4, killed %5, implicit-def $eflags %8:gr64 = ADD64rr killed %6, killed %7, implicit-def $eflags $rax = COPY %8 - RETQ implicit $rax + RET64 implicit $rax ... diff --git a/llvm/test/CodeGen/X86/invalid-liveness.mir b/llvm/test/CodeGen/X86/invalid-liveness.mir index 416921ddcd013..039bb6bc79c75 100644 --- a/llvm/test/CodeGen/X86/invalid-liveness.mir +++ b/llvm/test/CodeGen/X86/invalid-liveness.mir @@ -25,5 +25,5 @@ body: | bb.3: $eax = COPY %0 - RETQ $eax + RET64 $eax ... diff --git a/llvm/test/CodeGen/X86/leaFixup32.mir b/llvm/test/CodeGen/X86/leaFixup32.mir index 9fe5b190c1542..c929df8acfc1f 100644 --- a/llvm/test/CodeGen/X86/leaFixup32.mir +++ b/llvm/test/CodeGen/X86/leaFixup32.mir @@ -110,9 +110,9 @@ body: | ; CHECK: liveins: $eax, $ebp ; CHECK: $eax = ADD32rr $eax, $ebp, implicit-def $eflags ; CHECK: $eax = ADD32ri8 $eax, -5, implicit-def $eflags - ; CHECK: RETQ $eax + ; CHECK: RET64 $eax $eax = LEA32r killed $eax, 1, killed $ebp, -5, $noreg - RETQ $eax + RET64 $eax ... --- @@ -148,9 +148,9 @@ body: | ; CHECK: liveins: $eax, $ebp ; CHECK: $ebp = ADD32rr $ebp, $eax, implicit-def $eflags ; CHECK: $ebp = ADD32ri8 $ebp, -5, implicit-def $eflags - ; CHECK: RETQ $ebp + ; CHECK: RET64 $ebp $ebp = LEA32r killed $ebp, 1, killed $eax, -5, $noreg - RETQ $ebp + RET64 $ebp ... --- @@ -185,9 +185,9 @@ body: | ; CHECK-LABEL: name: test1add_ebp_32 ; CHECK: liveins: $eax, $ebp ; CHECK: $ebp = ADD32rr $ebp, $eax, implicit-def $eflags - ; CHECK: RETQ $ebp + ; CHECK: RET64 $ebp $ebp = LEA32r killed $ebp, 1, killed $eax, 0, $noreg - RETQ $ebp + RET64 $ebp ... --- @@ -224,9 +224,9 @@ body: | ; CHECK: liveins: $eax, $ebp, $esi ; CHECK: $ebx = LEA32r killed $eax, 1, killed $ebp, 0, $noreg ; CHECK: $ebx = ADD32ri8 $ebx, -5, implicit-def $eflags - ; CHECK: RETQ $ebx + ; CHECK: RET64 $ebx $ebx = LEA32r killed $eax, 1, killed $ebp, -5, $noreg - RETQ $ebx + RET64 $ebx ... --- @@ -263,9 +263,9 @@ body: | ; CHECK: liveins: $eax, $ebp ; CHECK: $ebx = LEA32r killed $eax, 1, killed $ebp, 0, $noreg ; CHECK: $ebx = ADD32ri8 $ebx, -5, implicit-def $eflags - ; CHECK: RETQ $ebx + ; CHECK: RET64 $ebx $ebx = LEA32r killed $ebp, 1, killed $eax, -5, $noreg - RETQ $ebx + RET64 $ebx ... --- @@ -301,9 +301,9 @@ body: | ; CHECK-LABEL: name: test1lea_ebp_32 ; CHECK: liveins: $eax, $ebp ; CHECK: $ebx = LEA32r killed $eax, 1, killed $ebp, 0, $noreg - ; CHECK: RETQ $ebx + ; CHECK: RET64 $ebx $ebx = LEA32r killed $ebp, 1, killed $eax, 0, $noreg - RETQ $ebx + RET64 $ebx ... --- @@ -339,9 +339,9 @@ body: | ; CHECK: liveins: $eax, $ebp ; CHECK: $eax = ADD32rr $eax, $ebp, implicit-def $eflags ; CHECK: $eax = ADD32ri $eax, 129, implicit-def $eflags - ; CHECK: RETQ $eax + ; CHECK: RET64 $eax $eax = LEA32r killed $eax, 1, killed $ebp, 129, $noreg - RETQ $eax + RET64 $eax ... --- @@ -378,9 +378,9 @@ body: | ; CHECK: liveins: $eax, $ebp, $ebx ; CHECK: $ebx = MOV32rr $ebp ; CHECK: $ebx = ADD32rr $ebx, $ebp, implicit-def $eflags - ; CHECK: RETQ $ebx + ; CHECK: RET64 $ebx $ebx = LEA32r killed $ebp, 1, $ebp, 0, $noreg - RETQ $ebx + RET64 $ebx ... --- @@ -416,9 +416,9 @@ body: | ; CHECK: liveins: $eax, $ebp, $ebx ; CHECK: $ebx = LEA32r $noreg, 1, $ebp, 5, $noreg ; CHECK: $ebx = ADD32rr $ebx, $ebp, implicit-def $eflags - ; CHECK: RETQ $ebx + ; CHECK: RET64 $ebx $ebx = LEA32r $ebp, 1, $ebp, 5, $noreg - RETQ $ebx + RET64 $ebx ... --- @@ -454,9 +454,9 @@ body: | ; CHECK: liveins: $eax, $ebp, $ebx ; CHECK: $ebx = LEA32r $noreg, 4, $ebp, 5, $noreg ; CHECK: $ebx = ADD32rr $ebx, $ebp, implicit-def $eflags - ; CHECK: RETQ $ebx + ; CHECK: RET64 $ebx $ebx = LEA32r $ebp, 4, $ebp, 5, $noreg - RETQ $ebx + RET64 $ebx ... --- @@ -491,9 +491,9 @@ body: | ; CHECK-LABEL: name: test_skip_opt_32 ; CHECK: liveins: $eax, $ebp, $ebx ; CHECK: $ebp = LEA32r killed $ebp, 4, killed $ebp, 0, $noreg - ; CHECK: RETQ $ebp + ; CHECK: RET64 $ebp $ebp = LEA32r killed $ebp, 4, killed $ebp, 0, $noreg - RETQ $ebp + RET64 $ebp ... --- @@ -529,23 +529,23 @@ body: | ; CHECK: CMP32rr $eax, killed $ebx, implicit-def $eflags ; CHECK: $ebx = LEA32r killed $eax, 4, killed $eax, 5, $noreg ; CHECK: JCC_1 %bb.1, 4, implicit $eflags - ; CHECK: RETQ $ebx + ; CHECK: RET64 $ebx ; CHECK: bb.1: ; CHECK: liveins: $eax, $ebp, $ebx ; CHECK: $ebp = LEA32r killed $ebx, 4, killed $ebx, 0, $noreg ; CHECK: $ebp = ADD32ri8 $ebp, 5, implicit-def $eflags - ; CHECK: RETQ $ebp + ; CHECK: RET64 $ebp bb.0 (%ir-block.0): liveins: $eax, $ebp, $ebx CMP32rr $eax, killed $ebx, implicit-def $eflags $ebx = LEA32r killed $eax, 4, killed $eax, 5, $noreg JCC_1 %bb.1, 4, implicit $eflags - RETQ $ebx + RET64 $ebx bb.1: liveins: $eax, $ebp, $ebx $ebp = LEA32r killed $ebx, 4, killed $ebx, 5, $noreg - RETQ $ebp + RET64 $ebp ... diff --git a/llvm/test/CodeGen/X86/leaFixup64.mir b/llvm/test/CodeGen/X86/leaFixup64.mir index 7fdf582055c24..2bab3c10e78e3 100644 --- a/llvm/test/CodeGen/X86/leaFixup64.mir +++ b/llvm/test/CodeGen/X86/leaFixup64.mir @@ -187,9 +187,9 @@ body: | ; CHECK: liveins: $rax, $rbp ; CHECK: $eax = ADD32rr $eax, $ebp, implicit-def $eflags, implicit $rax, implicit $rbp ; CHECK: $eax = ADD32ri8 $eax, -5, implicit-def $eflags - ; CHECK: RETQ $eax + ; CHECK: RET64 $eax $eax = LEA64_32r killed $rax, 1, killed $rbp, -5, $noreg - RETQ $eax + RET64 $eax ... --- @@ -225,9 +225,9 @@ body: | ; CHECK: liveins: $rax, $rbp ; CHECK: $ebp = ADD32rr $ebp, $eax, implicit-def $eflags, implicit $rbp, implicit $rax ; CHECK: $ebp = ADD32ri8 $ebp, -5, implicit-def $eflags - ; CHECK: RETQ $ebp + ; CHECK: RET64 $ebp $ebp = LEA64_32r killed $rbp, 1, killed $rax, -5, $noreg - RETQ $ebp + RET64 $ebp ... --- @@ -262,9 +262,9 @@ body: | ; CHECK-LABEL: name: test1lea_rbp_64_32_1 ; CHECK: liveins: $rax, $rbp ; CHECK: $ebp = ADD32rr $ebp, $eax, implicit-def $eflags, implicit $rbp, implicit $rax - ; CHECK: RETQ $ebp + ; CHECK: RET64 $ebp $ebp = LEA64_32r killed $rbp, 1, killed $rax, 0, $noreg - RETQ $ebp + RET64 $ebp ... --- @@ -300,9 +300,9 @@ body: | ; CHECK: liveins: $rax, $rbp ; CHECK: $rax = ADD64rr $rax, $rbp, implicit-def $eflags ; CHECK: $rax = ADD64ri8 $rax, -5, implicit-def $eflags - ; CHECK: RETQ $eax + ; CHECK: RET64 $eax $rax = LEA64r killed $rax, 1, killed $rbp, -5, $noreg - RETQ $eax + RET64 $eax ... --- @@ -338,9 +338,9 @@ body: | ; CHECK: liveins: $rax, $rbp ; CHECK: $rbp = ADD64rr $rbp, $rax, implicit-def $eflags ; CHECK: $rbp = ADD64ri8 $rbp, -5, implicit-def $eflags - ; CHECK: RETQ $ebp + ; CHECK: RET64 $ebp $rbp = LEA64r killed $rbp, 1, killed $rax, -5, $noreg - RETQ $ebp + RET64 $ebp ... --- @@ -375,9 +375,9 @@ body: | ; CHECK-LABEL: name: test1add_rbp_64 ; CHECK: liveins: $rax, $rbp ; CHECK: $rbp = ADD64rr $rbp, $rax, implicit-def $eflags - ; CHECK: RETQ $ebp + ; CHECK: RET64 $ebp $rbp = LEA64r killed $rbp, 1, killed $rax, 0, $noreg - RETQ $ebp + RET64 $ebp ... --- @@ -414,9 +414,9 @@ body: | ; CHECK: liveins: $rax, $rbp ; CHECK: $ebx = LEA64_32r killed $rax, 1, killed $rbp, 0, $noreg ; CHECK: $ebx = ADD32ri8 $ebx, -5, implicit-def $eflags - ; CHECK: RETQ $ebx + ; CHECK: RET64 $ebx $ebx = LEA64_32r killed $rax, 1, killed $rbp, -5, $noreg - RETQ $ebx + RET64 $ebx ... --- @@ -453,9 +453,9 @@ body: | ; CHECK: liveins: $rax, $rbp ; CHECK: $ebx = LEA64_32r killed $rax, 1, killed $rbp, 0, $noreg ; CHECK: $ebx = ADD32ri8 $ebx, -5, implicit-def $eflags - ; CHECK: RETQ $ebx + ; CHECK: RET64 $ebx $ebx = LEA64_32r killed $rbp, 1, killed $rax, -5, $noreg - RETQ $ebx + RET64 $ebx ... --- @@ -491,9 +491,9 @@ body: | ; CHECK-LABEL: name: test1lea_rbp_64_32 ; CHECK: liveins: $rax, $rbp ; CHECK: $ebx = LEA64_32r killed $rax, 1, killed $rbp, 0, $noreg - ; CHECK: RETQ $ebx + ; CHECK: RET64 $ebx $ebx = LEA64_32r killed $rbp, 1, killed $rax, 0, $noreg - RETQ $ebx + RET64 $ebx ... --- @@ -530,9 +530,9 @@ body: | ; CHECK: liveins: $rax, $rbp ; CHECK: $rbx = LEA64r killed $rax, 1, killed $rbp, 0, $noreg ; CHECK: $rbx = ADD64ri8 $rbx, -5, implicit-def $eflags - ; CHECK: RETQ $ebx + ; CHECK: RET64 $ebx $rbx = LEA64r killed $rax, 1, killed $rbp, -5, $noreg - RETQ $ebx + RET64 $ebx ... --- @@ -569,9 +569,9 @@ body: | ; CHECK: liveins: $rax, $rbp ; CHECK: $rbx = LEA64r killed $rax, 1, killed $rbp, 0, $noreg ; CHECK: $rbx = ADD64ri8 $rbx, -5, implicit-def $eflags - ; CHECK: RETQ $ebx + ; CHECK: RET64 $ebx $rbx = LEA64r killed $rbp, 1, killed $rax, -5, $noreg - RETQ $ebx + RET64 $ebx ... --- @@ -607,9 +607,9 @@ body: | ; CHECK-LABEL: name: test1lea_rbp_64 ; CHECK: liveins: $rax, $rbp ; CHECK: $rbx = LEA64r killed $rax, 1, killed $rbp, 0, $noreg - ; CHECK: RETQ $ebx + ; CHECK: RET64 $ebx $rbx = LEA64r killed $rbp, 1, killed $rax, 0, $noreg - RETQ $ebx + RET64 $ebx ... --- @@ -646,11 +646,11 @@ body: | ; CHECK: $r13 = KILL $rdi, implicit-def $r13 ; CHECK: $r12 = LEA64r $noreg, 2, killed $r13, 5, $noreg ; CHECK: $r12 = ADD64rr $r12, killed $rbp, implicit-def $eflags - ; CHECK: RETQ $r12 + ; CHECK: RET64 $r12 $rbp = KILL $rbp, implicit-def $rbp $r13 = KILL $rdi, implicit-def $r13 $r12 = LEA64r killed $rbp, 2, killed $r13, 5, $noreg - RETQ $r12 + RET64 $r12 ... --- @@ -686,9 +686,9 @@ body: | ; CHECK: liveins: $rax, $rbp ; CHECK: $eax = ADD32rr $eax, $ebp, implicit-def $eflags, implicit $rax, implicit $rbp ; CHECK: $eax = ADD32ri $eax, 129, implicit-def $eflags - ; CHECK: RETQ $eax + ; CHECK: RET64 $eax $eax = LEA64_32r killed $rax, 1, killed $rbp, 129, $noreg - RETQ $eax + RET64 $eax ... --- @@ -723,9 +723,9 @@ body: | ; CHECK-LABEL: name: test1mov1add_rbp_64_32 ; CHECK: liveins: $rax, $rbp, $rbx ; CHECK: $ebx = LEA64_32r killed $rbp, 1, killed $rbp, 0, $noreg - ; CHECK: RETQ $ebx + ; CHECK: RET64 $ebx $ebx = LEA64_32r killed $rbp, 1, killed $rbp, 0, $noreg - RETQ $ebx + RET64 $ebx ... --- @@ -760,9 +760,9 @@ body: | ; CHECK-LABEL: name: testleaadd_rbp_index_64_32 ; CHECK: liveins: $rax, $rbp, $rbx ; CHECK: $ebx = LEA64_32r killed $rbp, 1, killed $rbp, 5, $noreg - ; CHECK: RETQ $ebx + ; CHECK: RET64 $ebx $ebx = LEA64_32r killed $rbp, 1, killed $rbp, 5, $noreg - RETQ $ebx + RET64 $ebx ... --- @@ -797,9 +797,9 @@ body: | ; CHECK-LABEL: name: testleaadd_rbp_index2_64_32 ; CHECK: liveins: $eax, $ebp, $ebx ; CHECK: $ebx = LEA64_32r killed $rbp, 4, killed $rbp, 5, $noreg - ; CHECK: RETQ $ebx + ; CHECK: RET64 $ebx $ebx = LEA64_32r killed $rbp, 4, killed $rbp, 5, $noreg - RETQ $ebx + RET64 $ebx ... --- @@ -835,9 +835,9 @@ body: | ; CHECK: liveins: $rax, $rbp ; CHECK: $rax = ADD64rr $rax, $rbp, implicit-def $eflags ; CHECK: $rax = ADD64ri32 $rax, 129, implicit-def $eflags - ; CHECK: RETQ $eax + ; CHECK: RET64 $eax $rax = LEA64r killed $rax, 1, killed $rbp, 129, $noreg - RETQ $eax + RET64 $eax ... --- @@ -873,9 +873,9 @@ body: | ; CHECK: liveins: $rax, $rbp, $rbx ; CHECK: $rbx = MOV64rr $rbp ; CHECK: $rbx = ADD64rr $rbx, $rbp, implicit-def $eflags - ; CHECK: RETQ $ebx + ; CHECK: RET64 $ebx $rbx = LEA64r killed $rbp, 1, $rbp, 0, $noreg - RETQ $ebx + RET64 $ebx ... --- @@ -911,9 +911,9 @@ body: | ; CHECK: liveins: $rax, $rbp, $rbx ; CHECK: $rbx = LEA64r $noreg, 1, $rbp, 5, $noreg ; CHECK: $rbx = ADD64rr $rbx, $rbp, implicit-def $eflags - ; CHECK: RETQ $ebx + ; CHECK: RET64 $ebx $rbx = LEA64r $rbp, 1, $rbp, 5, $noreg - RETQ $ebx + RET64 $ebx ... --- @@ -949,9 +949,9 @@ body: | ; CHECK: liveins: $rax, $rbp, $rbx ; CHECK: $rbx = LEA64r $noreg, 4, $rbp, 5, $noreg ; CHECK: $rbx = ADD64rr $rbx, $rbp, implicit-def $eflags - ; CHECK: RETQ $ebx + ; CHECK: RET64 $ebx $rbx = LEA64r $rbp, 4, $rbp, 5, $noreg - RETQ $ebx + RET64 $ebx ... --- @@ -986,9 +986,9 @@ body: | ; CHECK-LABEL: name: test_skip_opt_64 ; CHECK: liveins: $rax, $rbp, $rbx ; CHECK: $rbp = LEA64r killed $rbp, 4, killed $rbp, 0, $noreg - ; CHECK: RETQ $ebp + ; CHECK: RET64 $ebp $rbp = LEA64r killed $rbp, 4, killed $rbp, 0, $noreg - RETQ $ebp + RET64 $ebp ... --- @@ -1024,23 +1024,23 @@ body: | ; CHECK: CMP64rr $rax, killed $rbx, implicit-def $eflags ; CHECK: $rbx = LEA64r killed $rax, 4, killed $rax, 5, $noreg ; CHECK: JCC_1 %bb.1, 4, implicit $eflags - ; CHECK: RETQ $ebx + ; CHECK: RET64 $ebx ; CHECK: bb.1: ; CHECK: liveins: $rax, $rbp, $rbx ; CHECK: $rbp = LEA64r killed $rbx, 4, killed $rbx, 0, $noreg ; CHECK: $rbp = ADD64ri8 $rbp, 5, implicit-def $eflags - ; CHECK: RETQ $ebp + ; CHECK: RET64 $ebp bb.0 (%ir-block.0): liveins: $rax, $rbp, $rbx CMP64rr $rax, killed $rbx, implicit-def $eflags $rbx = LEA64r killed $rax, 4, killed $rax, 5, $noreg JCC_1 %bb.1, 4, implicit $eflags - RETQ $ebx + RET64 $ebx bb.1: liveins: $rax, $rbp, $rbx $rbp = LEA64r killed $rbx, 4, killed $rbx, 5, $noreg - RETQ $ebp + RET64 $ebp ... --- @@ -1075,9 +1075,9 @@ body: | ; CHECK-LABEL: name: test_skip_opt_64_32 ; CHECK: liveins: $rax, $rbp, $rbx ; CHECK: $ebp = LEA64_32r killed $rbp, 4, killed $rbp, 0, $noreg - ; CHECK: RETQ $ebp + ; CHECK: RET64 $ebp $ebp = LEA64_32r killed $rbp, 4, killed $rbp, 0, $noreg - RETQ $ebp + RET64 $ebp ... --- @@ -1113,23 +1113,23 @@ body: | ; CHECK: CMP64rr $rax, killed $rbx, implicit-def $eflags ; CHECK: $ebx = LEA64_32r killed $rax, 4, killed $rax, 5, $noreg ; CHECK: JCC_1 %bb.1, 4, implicit $eflags - ; CHECK: RETQ $ebx + ; CHECK: RET64 $ebx ; CHECK: bb.1: ; CHECK: liveins: $rax, $rbp, $rbx ; CHECK: $ebp = LEA64_32r killed $rbx, 4, killed $rbx, 0, $noreg ; CHECK: $ebp = ADD32ri8 $ebp, 5, implicit-def $eflags - ; CHECK: RETQ $ebp + ; CHECK: RET64 $ebp bb.0 (%ir-block.0): liveins: $rax, $rbp, $rbx CMP64rr $rax, killed $rbx, implicit-def $eflags $ebx = LEA64_32r killed $rax, 4, killed $rax, 5, $noreg JCC_1 %bb.1, 4, implicit $eflags - RETQ $ebx + RET64 $ebx bb.1: liveins: $rax, $rbp, $rbx $ebp = LEA64_32r killed $rbx, 4, killed $rbx, 5, $noreg - RETQ $ebp + RET64 $ebp ... --- @@ -1173,7 +1173,7 @@ body: | ; CHECK: NOOP ; CHECK: NOOP ; CHECK: NOOP - ; CHECK: RETQ $ebp + ; CHECK: RET64 $ebp DBG_VALUE 0, $noreg NOOP NOOP @@ -1184,7 +1184,7 @@ body: | NOOP NOOP NOOP - RETQ $ebp + RET64 $ebp ... ... diff --git a/llvm/test/CodeGen/X86/machine-copy-dbgvalue.mir b/llvm/test/CodeGen/X86/machine-copy-dbgvalue.mir index 914beab052dbc..95665c0ce1146 100644 --- a/llvm/test/CodeGen/X86/machine-copy-dbgvalue.mir +++ b/llvm/test/CodeGen/X86/machine-copy-dbgvalue.mir @@ -8,7 +8,7 @@ # CHECK-NEXT: $rax = MOV64ri 31 # CHECK-NEXT: DBG_VALUE $rax # CHECK-NEXT: DBG_VALUE_LIST 0, !DIExpression(DW_OP_LLVM_arg, 0, DW_OP_LLVM_arg, 1, DW_OP_constu, 4, DW_OP_mul, DW_OP_plus, DW_OP_stack_value), $rax, 0, 0 -# CHECK-NEXT: RETQ implicit killed $rax +# CHECK-NEXT: RET64 implicit killed $rax name: foo body: | bb.0: @@ -16,5 +16,5 @@ body: | DBG_VALUE $rcx, 0, 0, 0, 0 DBG_VALUE_LIST 0, !DIExpression(DW_OP_LLVM_arg, 0, DW_OP_LLVM_arg, 1, DW_OP_constu, 4, DW_OP_mul, DW_OP_plus, DW_OP_stack_value), $rcx, 0, 0 $rax = COPY killed renamable $rcx - RETQ implicit killed $rax + RET64 implicit killed $rax ... diff --git a/llvm/test/CodeGen/X86/machine-outliner-cfi-tail-some.mir b/llvm/test/CodeGen/X86/machine-outliner-cfi-tail-some.mir index 328ee19c71921..2056d593205e4 100644 --- a/llvm/test/CodeGen/X86/machine-outliner-cfi-tail-some.mir +++ b/llvm/test/CodeGen/X86/machine-outliner-cfi-tail-some.mir @@ -39,7 +39,7 @@ body: | $edx = MOV32ri 2 $edi = MOV32ri 3 $eax = MOV32ri 4 - RETQ + RET64 ... --- name: bar @@ -63,7 +63,7 @@ body: | $edx = MOV32ri 2 $edi = MOV32ri 3 $eax = MOV32ri 4 - RETQ + RET64 ... --- name: baz @@ -87,4 +87,4 @@ body: | $edx = MOV32ri 2 $edi = MOV32ri 3 $eax = MOV32ri 4 - RETQ + RET64 diff --git a/llvm/test/CodeGen/X86/machine-outliner-cfi-tail.mir b/llvm/test/CodeGen/X86/machine-outliner-cfi-tail.mir index 117205698cea8..f18f62419a0d1 100644 --- a/llvm/test/CodeGen/X86/machine-outliner-cfi-tail.mir +++ b/llvm/test/CodeGen/X86/machine-outliner-cfi-tail.mir @@ -30,7 +30,7 @@ body: | $edx = MOV32ri 2 $edi = MOV32ri 3 $eax = MOV32ri 4 - RETQ + RET64 ... --- name: bar @@ -52,7 +52,7 @@ body: | $edx = MOV32ri 2 $edi = MOV32ri 3 $eax = MOV32ri 4 - RETQ + RET64 ... --- name: baz @@ -74,4 +74,4 @@ body: | $edx = MOV32ri 2 $edi = MOV32ri 3 $eax = MOV32ri 4 - RETQ + RET64 diff --git a/llvm/test/CodeGen/X86/madd.ll b/llvm/test/CodeGen/X86/madd.ll index a0a3346d7e9fc..23dec421a3c14 100644 --- a/llvm/test/CodeGen/X86/madd.ll +++ b/llvm/test/CodeGen/X86/madd.ll @@ -17,10 +17,10 @@ define i32 @_Z10test_shortPsS_i_128(i16* nocapture readonly, i16* nocapture read ; SSE2-NEXT: # =>This Inner Loop Header: Depth=1 ; SSE2-NEXT: movq {{.*#+}} xmm2 = mem[0],zero ; SSE2-NEXT: movq {{.*#+}} xmm3 = mem[0],zero +; SSE2-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0,0,1,1,2,2,3,3] ; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3] -; SSE2-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3] -; SSE2-NEXT: pmaddwd %xmm2, %xmm3 -; SSE2-NEXT: paddd %xmm3, %xmm1 +; SSE2-NEXT: pmaddwd %xmm3, %xmm2 +; SSE2-NEXT: paddd %xmm2, %xmm1 ; SSE2-NEXT: addq $8, %rcx ; SSE2-NEXT: cmpq %rcx, %rax ; SSE2-NEXT: jne .LBB0_1 @@ -1859,6 +1859,7 @@ define <4 x i32> @pmaddwd_8_swapped(<8 x i16> %A, <8 x i16> %B) { ret <4 x i32> %ret } +; FIXME: SSE fails to match PMADDWD define <4 x i32> @larger_mul(<16 x i16> %A, <16 x i16> %B) { ; SSE2-LABEL: larger_mul: ; SSE2: # %bb.0: diff --git a/llvm/test/CodeGen/X86/masked_store_trunc.ll b/llvm/test/CodeGen/X86/masked_store_trunc.ll index 80ef17c5c9a44..e4b2ea1201713 100644 --- a/llvm/test/CodeGen/X86/masked_store_trunc.ll +++ b/llvm/test/CodeGen/X86/masked_store_trunc.ll @@ -1262,9 +1262,9 @@ define void @truncstore_v4i64_v4i16(<4 x i64> %x, <4 x i16>* %p, <4 x i32> %mask ; AVX2-LABEL: truncstore_v4i64_v4i16: ; AVX2: # %bb.0: ; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; AVX2-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm3[1,2,3],ymm0[4],ymm3[5,6,7],ymm0[8],ymm3[9,10,11],ymm0[12],ymm3[13,14,15] ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm3 -; AVX2-NEXT: vpblendw {{.*#+}} xmm3 = xmm3[0],xmm2[1,2,3],xmm3[4],xmm2[5,6,7] -; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm2[1,2,3],xmm0[4],xmm2[5,6,7] ; AVX2-NEXT: vpackusdw %xmm3, %xmm0, %xmm0 ; AVX2-NEXT: vpackusdw %xmm0, %xmm0, %xmm0 ; AVX2-NEXT: vpcmpeqd %xmm2, %xmm1, %xmm1 diff --git a/llvm/test/CodeGen/X86/ms-inline-asm-array.ll b/llvm/test/CodeGen/X86/ms-inline-asm-array.ll new file mode 100644 index 0000000000000..3bbdbaac47e06 --- /dev/null +++ b/llvm/test/CodeGen/X86/ms-inline-asm-array.ll @@ -0,0 +1,22 @@ +; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu | FileCheck %s + +@arr = internal global [10 x i32] zeroinitializer, align 16 + +; CHECK: movl %edx, arr(,%rdx,4) +define dso_local i32 @main() #0 { +entry: + call void asm sideeffect inteldialect "mov dword ptr $0[rdx * $$4],edx", "=*m,~{dirflag},~{fpsr},~{flags}"([10 x i32]* @arr) #1, !srcloc !4 + ret i32 0 +} + +attributes #0 = { noinline nounwind optnone uwtable "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" } +attributes #1 = { nounwind } + +!llvm.module.flags = !{!0, !1, !2} +!llvm.ident = !{!3} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{i32 7, !"uwtable", i32 1} +!2 = !{i32 7, !"frame-pointer", i32 2} +!3 = !{!"clang"} +!4 = !{i64 63} diff --git a/llvm/test/CodeGen/X86/optimize-compare.mir b/llvm/test/CodeGen/X86/optimize-compare.mir index 7b3ef4dd0f8d4..968ee7804b833 100644 --- a/llvm/test/CodeGen/X86/optimize-compare.mir +++ b/llvm/test/CodeGen/X86/optimize-compare.mir @@ -595,3 +595,91 @@ body: | CMP16ri %0, 65535, implicit-def $eflags $bl = SETCCr 6, implicit $eflags ... +--- +name: opt_adjusted_imm_multiple_blocks +body: | + ; CHECK-LABEL: name: opt_adjusted_imm_multiple_blocks + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gr32 = COPY $eax + ; CHECK-NEXT: CMP32ri [[COPY]], 20, implicit-def $eflags + ; CHECK-NEXT: JCC_1 %bb.1, 4, implicit $eflags + ; CHECK-NEXT: JMP_1 %bb.3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: JCC_1 %bb.2, 15, implicit $eflags + ; CHECK-NEXT: JMP_1 %bb.3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: JMP_1 %bb.3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: RET64 + bb.0: + %0:gr32 = COPY $eax + CMP32ri %0, 20, implicit-def $eflags + JCC_1 %bb.1, 4, implicit $eflags + JMP_1 %bb.3 + + bb.1: + ; CMP can be removed when adjusting the JCC. + CMP32ri %0, 21, implicit-def $eflags + JCC_1 %bb.2, 13, implicit $eflags + JMP_1 %bb.3 + + bb.2: + JMP_1 %bb.3 + + bb.3: + RET64 +... +--- +name: opt_adjusted_imm_multiple_blocks_noopt +body: | + ; CHECK-LABEL: name: opt_adjusted_imm_multiple_blocks_noopt + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gr32 = COPY $eax + ; CHECK-NEXT: CMP32ri [[COPY]], 20, implicit-def $eflags + ; CHECK-NEXT: JCC_1 %bb.1, 4, implicit $eflags + ; CHECK-NEXT: JMP_1 %bb.3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: CMP32ri [[COPY]], 21, implicit-def $eflags + ; CHECK-NEXT: JCC_1 %bb.2, 13, implicit $eflags + ; CHECK-NEXT: JMP_1 %bb.3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $al = SETCCr 4, implicit $eflags + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: RET64 + bb.0: + %0:gr32 = COPY $eax + CMP32ri %0, 20, implicit-def $eflags + JCC_1 %bb.1, 4, implicit $eflags + JMP_1 %bb.3 + + bb.1: + ; The following CMP should not be optimized because $eflags is live-out + CMP32ri %0, 21, implicit-def $eflags + JCC_1 %bb.2, 13, implicit $eflags + JMP_1 %bb.3 + + bb.2: + liveins: $eflags + $al = SETCCr 4, implicit $eflags + + bb.3: + RET64 +... diff --git a/llvm/test/CodeGen/X86/partition.ll b/llvm/test/CodeGen/X86/partition.ll index cc8d44e399ef3..f83a4cf32f5bc 100644 --- a/llvm/test/CodeGen/X86/partition.ll +++ b/llvm/test/CodeGen/X86/partition.ll @@ -17,7 +17,7 @@ ; CHECK-NEXT: .zero 1 ; CHECK-NEXT: .quad i1 -define void @f1() partition "part1" { +define void ()* @f1() partition "part1" { unreachable } @@ -30,4 +30,4 @@ declare void @f3() partition "part3" @g1 = global i32 0, partition "part4" @a1 = alias i32, i32* @g1, partition "part5" -@i1 = ifunc void(), void()* @f1, partition "part6" +@i1 = ifunc void(), void()* ()* @f1, partition "part6" diff --git a/llvm/test/CodeGen/X86/patchpoint-verifiable.mir b/llvm/test/CodeGen/X86/patchpoint-verifiable.mir index 54f39e155b065..cd57c64b4b93a 100644 --- a/llvm/test/CodeGen/X86/patchpoint-verifiable.mir +++ b/llvm/test/CodeGen/X86/patchpoint-verifiable.mir @@ -38,5 +38,5 @@ body: | ; CHECK: PATCHPOINT 5, 5, 0, 2, 0, $rdi, $rsi, csr_64, implicit-def dead early-clobber $r11, implicit-def $rsp, implicit-def dead $rax PATCHPOINT 5, 5, 0, 2, 0, $rdi, $rsi, csr_64, implicit-def dead early-clobber $r11, implicit-def $rsp, implicit-def dead $rax $rbp = POP64r implicit-def $rsp, implicit $rsp - RETQ + RET64 ... diff --git a/llvm/test/CodeGen/X86/phielim-undef.mir b/llvm/test/CodeGen/X86/phielim-undef.mir index 7970a08caaa24..005ee37398157 100644 --- a/llvm/test/CodeGen/X86/phielim-undef.mir +++ b/llvm/test/CodeGen/X86/phielim-undef.mir @@ -52,7 +52,7 @@ body: | ; CHECK-NEXT: JMP_1 %bb.2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.5: - ; CHECK-NEXT: RETQ + ; CHECK-NEXT: RET64 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.6: bb.0: @@ -91,7 +91,7 @@ body: | JMP_1 %bb.2 bb.6: - RETQ + RET64 bb.7: ... diff --git a/llvm/test/CodeGen/X86/post-ra-sched-with-debug.mir b/llvm/test/CodeGen/X86/post-ra-sched-with-debug.mir index 261d6b9ecb742..960c5c997600c 100644 --- a/llvm/test/CodeGen/X86/post-ra-sched-with-debug.mir +++ b/llvm/test/CodeGen/X86/post-ra-sched-with-debug.mir @@ -330,6 +330,6 @@ body: | $rbx = POP64r implicit-def $rsp, implicit $rsp $r14 = POP64r implicit-def $rsp, implicit $rsp $rbp = POP64r implicit-def $rsp, implicit $rsp - RETQ + RET64 ... diff --git a/llvm/test/CodeGen/X86/prologue-epilogue-remarks.mir b/llvm/test/CodeGen/X86/prologue-epilogue-remarks.mir index efa674451b28a..f0c6121bd98e7 100644 --- a/llvm/test/CodeGen/X86/prologue-epilogue-remarks.mir +++ b/llvm/test/CodeGen/X86/prologue-epilogue-remarks.mir @@ -23,7 +23,7 @@ stack: constants: body: | bb.0: - RETQ + RET64 ... --- @@ -41,7 +41,7 @@ stack: constants: body: | bb.0: - RETQ + RET64 ... --- @@ -59,6 +59,6 @@ stack: constants: body: | bb.0: - RETQ + RET64 ... diff --git a/llvm/test/CodeGen/X86/scavenger.mir b/llvm/test/CodeGen/X86/scavenger.mir index 7a13e872ec531..666fda2ddccfd 100644 --- a/llvm/test/CodeGen/X86/scavenger.mir +++ b/llvm/test/CodeGen/X86/scavenger.mir @@ -36,7 +36,7 @@ body: | ; CHECK: NOOP implicit killed [[REG1]] NOOP implicit %2 NOOP implicit %1 - RETQ $eax + RET64 $eax ... --- # CHECK-LABEL: name: func3 diff --git a/llvm/test/CodeGen/X86/selectiondag-debug-loc.ll b/llvm/test/CodeGen/X86/selectiondag-debug-loc.ll index e556236edbc19..165768c20da2f 100644 --- a/llvm/test/CodeGen/X86/selectiondag-debug-loc.ll +++ b/llvm/test/CodeGen/X86/selectiondag-debug-loc.ll @@ -34,7 +34,7 @@ declare void @llvm.dbg.declare(metadata, metadata, metadata) !llvm.ident = !{!7} !0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 7.0.0 (trunk 330296) (llvm/trunk 330298)", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2) -!1 = !DIFile(filename: "-", directory: "/Users/vsk/src/builds/llvm.org-master-RA") +!1 = !DIFile(filename: "-", directory: "/Users/vsk/src/builds/llvm.org-main-RA") !2 = !{} !3 = !{i32 2, !"Dwarf Version", i32 4} !4 = !{i32 2, !"Debug Info Version", i32 3} @@ -42,7 +42,7 @@ declare void @llvm.dbg.declare(metadata, metadata, metadata) !6 = !{i32 7, !"PIC Level", i32 2} !7 = !{!"clang version 7.0.0 (trunk 330296) (llvm/trunk 330298)"} !8 = distinct !DISubprogram(name: "main", scope: !9, file: !9, line: 1, type: !10, isLocal: false, isDefinition: true, scopeLine: 1, flags: DIFlagPrototyped, isOptimized: false, unit: !0, retainedNodes: !2) -!9 = !DIFile(filename: "", directory: "/Users/vsk/src/builds/llvm.org-master-RA") +!9 = !DIFile(filename: "", directory: "/Users/vsk/src/builds/llvm.org-main-RA") !10 = !DISubroutineType(types: !11) !11 = !{!12, !12, !13} !12 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) diff --git a/llvm/test/CodeGen/X86/shrink_vmul.ll b/llvm/test/CodeGen/X86/shrink_vmul.ll index d8e7f3358b1fc..7557b3fc28440 100644 --- a/llvm/test/CodeGen/X86/shrink_vmul.ll +++ b/llvm/test/CodeGen/X86/shrink_vmul.ll @@ -1079,10 +1079,10 @@ define void @mul_2xi16_sext(i8* nocapture readonly %a, i8* nocapture readonly %b ; X86-SSE-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero ; X86-SSE-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero ; X86-SSE-NEXT: pxor %xmm2, %xmm2 -; X86-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] -; X86-SSE-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,1,1,3,4,5,6,7] -; X86-SSE-NEXT: pmaddwd %xmm0, %xmm1 -; X86-SSE-NEXT: movq %xmm1, (%esi,%ecx,4) +; X86-SSE-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3] +; X86-SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,1,1,3,4,5,6,7] +; X86-SSE-NEXT: pmaddwd %xmm1, %xmm0 +; X86-SSE-NEXT: movq %xmm0, (%esi,%ecx,4) ; X86-SSE-NEXT: popl %esi ; X86-SSE-NEXT: retl ; diff --git a/llvm/test/CodeGen/X86/shuffle-vs-trunc-256.ll b/llvm/test/CodeGen/X86/shuffle-vs-trunc-256.ll index 09d6944efa300..ab0f5d8d76e57 100644 --- a/llvm/test/CodeGen/X86/shuffle-vs-trunc-256.ll +++ b/llvm/test/CodeGen/X86/shuffle-vs-trunc-256.ll @@ -737,12 +737,12 @@ define <2 x i64> @trunc_v4i64_to_v4i16_return_v2i64(<4 x i64> %vec) nounwind { ; ; AVX2-LABEL: trunc_v4i64_to_v4i16_return_v2i64: ; AVX2: # %bb.0: +; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7],ymm0[8],ymm1[9,10,11],ymm0[12],ymm1[13,14,15] ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 -; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; AVX2-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3],xmm1[4],xmm2[5,6,7] -; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm2[1,2,3],xmm0[4],xmm2[5,6,7] ; AVX2-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpackusdw %xmm2, %xmm0, %xmm0 +; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX2-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq ; @@ -940,12 +940,12 @@ define <8 x i16> @trunc_v4i64_to_v4i16_return_v8i16(<4 x i64> %vec) nounwind { ; ; AVX2-LABEL: trunc_v4i64_to_v4i16_return_v8i16: ; AVX2: # %bb.0: +; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7],ymm0[8],ymm1[9,10,11],ymm0[12],ymm1[13,14,15] ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 -; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; AVX2-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3],xmm1[4],xmm2[5,6,7] -; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm2[1,2,3],xmm0[4],xmm2[5,6,7] ; AVX2-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpackusdw %xmm2, %xmm0, %xmm0 +; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX2-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq ; diff --git a/llvm/test/CodeGen/X86/subvector-broadcast.ll b/llvm/test/CodeGen/X86/subvector-broadcast.ll index bd807c9e76c64..bbf413a91badc 100644 --- a/llvm/test/CodeGen/X86/subvector-broadcast.ll +++ b/llvm/test/CodeGen/X86/subvector-broadcast.ll @@ -1752,3 +1752,80 @@ define <8 x double> @broadcast_v8f64_v2f64_0uuu0101(<2 x double>* %vp) { %res = shufflevector <2 x double> %vec, <2 x double> undef, <8 x i32> ret <8 x double> %res } + +define void @PR51226() { +; X86-AVX1-LABEL: PR51226: +; X86-AVX1: # %bb.0: +; X86-AVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero +; X86-AVX1-NEXT: vpslld $16, %xmm0, %xmm0 +; X86-AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 +; X86-AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; X86-AVX1-NEXT: vminps %ymm1, %ymm0, %ymm0 +; X86-AVX1-NEXT: vmovups %ymm0, (%eax) +; X86-AVX1-NEXT: vzeroupper +; X86-AVX1-NEXT: retl +; +; X86-AVX2-LABEL: PR51226: +; X86-AVX2: # %bb.0: +; X86-AVX2-NEXT: vpmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero +; X86-AVX2-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 +; X86-AVX2-NEXT: vpslld $16, %ymm0, %ymm0 +; X86-AVX2-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; X86-AVX2-NEXT: vminps %ymm1, %ymm0, %ymm0 +; X86-AVX2-NEXT: vmovups %ymm0, (%eax) +; X86-AVX2-NEXT: vzeroupper +; X86-AVX2-NEXT: retl +; +; X86-AVX512-LABEL: PR51226: +; X86-AVX512: # %bb.0: +; X86-AVX512-NEXT: vpmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero +; X86-AVX512-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 +; X86-AVX512-NEXT: vpslld $16, %ymm0, %ymm0 +; X86-AVX512-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; X86-AVX512-NEXT: vminps %ymm1, %ymm0, %ymm0 +; X86-AVX512-NEXT: vmovups %ymm0, (%eax) +; X86-AVX512-NEXT: vzeroupper +; X86-AVX512-NEXT: retl +; +; X64-AVX1-LABEL: PR51226: +; X64-AVX1: # %bb.0: +; X64-AVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero +; X64-AVX1-NEXT: vpslld $16, %xmm0, %xmm0 +; X64-AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 +; X64-AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; X64-AVX1-NEXT: vminps %ymm1, %ymm0, %ymm0 +; X64-AVX1-NEXT: vmovups %ymm0, (%rax) +; X64-AVX1-NEXT: vzeroupper +; X64-AVX1-NEXT: retq +; +; X64-AVX2-LABEL: PR51226: +; X64-AVX2: # %bb.0: +; X64-AVX2-NEXT: vpmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero +; X64-AVX2-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 +; X64-AVX2-NEXT: vpslld $16, %ymm0, %ymm0 +; X64-AVX2-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; X64-AVX2-NEXT: vminps %ymm1, %ymm0, %ymm0 +; X64-AVX2-NEXT: vmovups %ymm0, (%rax) +; X64-AVX2-NEXT: vzeroupper +; X64-AVX2-NEXT: retq +; +; X64-AVX512-LABEL: PR51226: +; X64-AVX512: # %bb.0: +; X64-AVX512-NEXT: vpmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero +; X64-AVX512-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 +; X64-AVX512-NEXT: vpslld $16, %ymm0, %ymm0 +; X64-AVX512-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; X64-AVX512-NEXT: vminps %ymm1, %ymm0, %ymm0 +; X64-AVX512-NEXT: vmovups %ymm0, (%rax) +; X64-AVX512-NEXT: vzeroupper +; X64-AVX512-NEXT: retq + %i = load <4 x i16>, <4 x i16>* undef, align 8 + %i1 = zext <4 x i16> %i to <4 x i32> + %i2 = shl nuw <4 x i32> %i1, + %i3 = bitcast <4 x i32> %i2 to <4 x float> + %shuffle99 = shufflevector <4 x float> %i3, <4 x float> poison, <8 x i32> + %i4 = fcmp reassoc nsz contract ogt <8 x float> zeroinitializer, %shuffle99 + %i5 = select <8 x i1> %i4, <8 x float> %shuffle99, <8 x float> zeroinitializer + store <8 x float> %i5, <8 x float>* undef, align 16 + ret void +} diff --git a/llvm/test/CodeGen/X86/tail-dup-multiple-latch-loop.ll b/llvm/test/CodeGen/X86/tail-dup-multiple-latch-loop.ll new file mode 100644 index 0000000000000..25da377ec487b --- /dev/null +++ b/llvm/test/CodeGen/X86/tail-dup-multiple-latch-loop.ll @@ -0,0 +1,193 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -tail-dup-jmptable-loop-size=5 -mtriple=x86_64-unknown-linux-gnu | FileCheck %s +define i8* @large_loop_switch(i8* %p) { +; CHECK-LABEL: large_loop_switch: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pushq %rbx +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset %rbx, -16 +; CHECK-NEXT: movq %rdi, %rsi +; CHECK-NEXT: movl $6, %ebx +; CHECK-NEXT: movl %ebx, %eax +; CHECK-NEXT: jmpq *.LJTI0_0(,%rax,8) +; CHECK-NEXT: .LBB0_2: # %sw.bb1 +; CHECK-NEXT: movl $531, %edi # imm = 0x213 +; CHECK-NEXT: .LBB0_3: # %for.body +; CHECK-NEXT: callq ccc@PLT +; CHECK-NEXT: .LBB0_4: # %for.body +; CHECK-NEXT: movq %rax, %rsi +; CHECK-NEXT: decl %ebx +; CHECK-NEXT: movl %ebx, %eax +; CHECK-NEXT: jmpq *.LJTI0_0(,%rax,8) +; CHECK-NEXT: .LBB0_5: # %sw.bb3 +; CHECK-NEXT: movl $532, %edi # imm = 0x214 +; CHECK-NEXT: callq bbb@PLT +; CHECK-NEXT: jmp .LBB0_4 +; CHECK-NEXT: .LBB0_7: # %sw.bb5 +; CHECK-NEXT: movl $533, %edi # imm = 0x215 +; CHECK-NEXT: callq bbb@PLT +; CHECK-NEXT: jmp .LBB0_4 +; CHECK-NEXT: .LBB0_8: # %sw.bb7 +; CHECK-NEXT: movl $535, %edi # imm = 0x217 +; CHECK-NEXT: callq bbb@PLT +; CHECK-NEXT: jmp .LBB0_4 +; CHECK-NEXT: .LBB0_9: # %sw.bb9 +; CHECK-NEXT: movl $536, %edi # imm = 0x218 +; CHECK-NEXT: jmp .LBB0_3 +; CHECK-NEXT: .LBB0_10: # %sw.bb11 +; CHECK-NEXT: movl $658, %edi # imm = 0x292 +; CHECK-NEXT: callq bbb@PLT +; CHECK-NEXT: jmp .LBB0_4 +; CHECK-NEXT: .LBB0_11: # %for.cond.cleanup +; CHECK-NEXT: movl $530, %edi # imm = 0x212 +; CHECK-NEXT: popq %rbx +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: jmp ccc@PLT # TAILCALL +entry: + br label %for.body + +for.cond.cleanup: ; preds = %for.body + %call = tail call i8* @ccc(i32 signext 530, i8* %p.addr.03006) + ret i8* %call + +for.body: ; preds = %for.inc, %entry + %i.03007 = phi i32 [ 6, %entry ], [ %dec, %for.inc ] + %p.addr.03006 = phi i8* [ %p, %entry ], [ %p.addr.1, %for.inc ] + switch i32 %i.03007, label %for.body.unreachabledefault [ + i32 0, label %for.cond.cleanup + i32 1, label %sw.bb1 + i32 2, label %sw.bb3 + i32 3, label %sw.bb5 + i32 4, label %sw.bb7 + i32 5, label %sw.bb9 + i32 6, label %sw.bb11 + ] + +sw.bb1: ; preds = %for.body + %call2 = tail call i8* @ccc(i32 signext 531, i8* %p.addr.03006) + br label %for.inc + +sw.bb3: ; preds = %for.body + %call4 = tail call i8* @bbb(i32 signext 532, i8* %p.addr.03006) + br label %for.inc + +sw.bb5: ; preds = %for.body + %call6 = tail call i8* @bbb(i32 signext 533, i8* %p.addr.03006) + br label %for.inc + +sw.bb7: ; preds = %for.body + %call8 = tail call i8* @bbb(i32 signext 535, i8* %p.addr.03006) + br label %for.inc + +sw.bb9: ; preds = %for.body + %call10 = tail call i8* @ccc(i32 signext 536, i8* %p.addr.03006) + br label %for.inc + +sw.bb11: ; preds = %for.body + %call12 = tail call i8* @bbb(i32 signext 658, i8* %p.addr.03006) + br label %for.inc + +for.body.unreachabledefault: ; preds = %for.body + unreachable + +for.inc: ; preds = %sw.bb1, %sw.bb3, %sw.bb5, %sw.bb7, %sw.bb9, %sw.bb11 + %p.addr.1 = phi i8* [ %call12, %sw.bb11 ], [ %call10, %sw.bb9 ], [ %call8, %sw.bb7 ], [ %call6, %sw.bb5 ], [ %call4, %sw.bb3 ], [ %call2, %sw.bb1 ] + %dec = add nsw i32 %i.03007, -1 + br label %for.body +} + +declare i8* @bbb(i32 signext, i8*) +declare i8* @ccc(i32 signext, i8*) + + +define i32 @interp_switch(i8* nocapture readonly %0, i32 %1) { +; CHECK-LABEL: interp_switch: +; CHECK: # %bb.0: +; CHECK-NEXT: movl %esi, %eax +; CHECK-NEXT: jmp .LBB1_1 +; CHECK-NEXT: .LBB1_7: # in Loop: Header=BB1_1 Depth=1 +; CHECK-NEXT: addl $7, %eax +; CHECK-NEXT: incq %rdi +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB1_1: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: movzbl (%rdi), %ecx +; CHECK-NEXT: decb %cl +; CHECK-NEXT: cmpb $5, %cl +; CHECK-NEXT: ja .LBB1_9 +; CHECK-NEXT: # %bb.2: # in Loop: Header=BB1_1 Depth=1 +; CHECK-NEXT: movzbl %cl, %ecx +; CHECK-NEXT: jmpq *.LJTI1_0(,%rcx,8) +; CHECK-NEXT: .LBB1_3: # in Loop: Header=BB1_1 Depth=1 +; CHECK-NEXT: incl %eax +; CHECK-NEXT: incq %rdi +; CHECK-NEXT: jmp .LBB1_1 +; CHECK-NEXT: .LBB1_4: # in Loop: Header=BB1_1 Depth=1 +; CHECK-NEXT: decl %eax +; CHECK-NEXT: incq %rdi +; CHECK-NEXT: jmp .LBB1_1 +; CHECK-NEXT: .LBB1_5: # in Loop: Header=BB1_1 Depth=1 +; CHECK-NEXT: addl %eax, %eax +; CHECK-NEXT: incq %rdi +; CHECK-NEXT: jmp .LBB1_1 +; CHECK-NEXT: .LBB1_6: # in Loop: Header=BB1_1 Depth=1 +; CHECK-NEXT: movl %eax, %ecx +; CHECK-NEXT: shrl $31, %ecx +; CHECK-NEXT: addl %eax, %ecx +; CHECK-NEXT: sarl %ecx +; CHECK-NEXT: incq %rdi +; CHECK-NEXT: movl %ecx, %eax +; CHECK-NEXT: jmp .LBB1_1 +; CHECK-NEXT: .LBB1_8: # in Loop: Header=BB1_1 Depth=1 +; CHECK-NEXT: negl %eax +; CHECK-NEXT: incq %rdi +; CHECK-NEXT: jmp .LBB1_1 +; CHECK-NEXT: .LBB1_9: +; CHECK-NEXT: retq + br label %3 + +3: ; preds = %21, %2 + %4 = phi i64 [ 0, %2 ], [ %6, %21 ] + %5 = phi i32 [ %1, %2 ], [ %22, %21 ] + %6 = add nuw i64 %4, 1 + %7 = getelementptr inbounds i8, i8* %0, i64 %4 + %8 = load i8, i8* %7, align 1 + switch i8 %8, label %23 [ + i8 6, label %19 + i8 1, label %9 + i8 2, label %11 + i8 3, label %13 + i8 4, label %15 + i8 5, label %17 + ] + +9: ; preds = %3 + %10 = add nsw i32 %5, 1 + br label %21 + +11: ; preds = %3 + %12 = add nsw i32 %5, -1 + br label %21 + +13: ; preds = %3 + %14 = shl nsw i32 %5, 1 + br label %21 + +15: ; preds = %3 + %16 = sdiv i32 %5, 2 + br label %21 + +17: ; preds = %3 + %18 = add nsw i32 %5, 7 + br label %21 + +19: ; preds = %3 + %20 = sub nsw i32 0, %5 + br label %21 + +21: ; preds = %19, %17, %15, %13, %11, %9 + %22 = phi i32 [ %20, %19 ], [ %18, %17 ], [ %16, %15 ], [ %14, %13 ], [ %12, %11 ], [ %10, %9 ] + br label %3 + +23: ; preds = %3 + ret i32 %5 +} diff --git a/llvm/test/CodeGen/X86/tail-merge-after-mbp.mir b/llvm/test/CodeGen/X86/tail-merge-after-mbp.mir index 97c2a2914f827..ac2e728ab5d07 100644 --- a/llvm/test/CodeGen/X86/tail-merge-after-mbp.mir +++ b/llvm/test/CodeGen/X86/tail-merge-after-mbp.mir @@ -24,7 +24,7 @@ body: | ; CHECK: bb.4: ; CHECK: $ebp = XOR32rr undef $ebp, undef $ebp, implicit-def dead $eflags ; CHECK: dead $eax = XOR32rr undef $eax, undef $eax, implicit-def dead $eflags, implicit-def $al - ; CHECK: RETQ $eax + ; CHECK: RET64 $eax ; CHECK: bb.6: ; CHECK: successors: %bb.1(0x30000000), %bb.7(0x50000000) ; CHECK: $rax = MOV64rm $r14, 1, $noreg, 0, $noreg :: (load (s64)) @@ -41,7 +41,7 @@ body: | ; CHECK: JCC_1 %bb.7, 5, implicit $eflags ; CHECK: bb.1: ; CHECK: $ebp = XOR32rr undef $ebp, undef $ebp, implicit-def dead $eflags - ; CHECK: RETQ $eax + ; CHECK: RET64 $eax bb.0: successors: %bb.1(0x40000000), %bb.7(0x40000000) @@ -113,6 +113,6 @@ body: | bb.16: - RETQ $eax + RET64 $eax ... diff --git a/llvm/test/CodeGen/X86/tied-depbreak.mir b/llvm/test/CodeGen/X86/tied-depbreak.mir index 2fef158720aff..4eca28795516f 100644 --- a/llvm/test/CodeGen/X86/tied-depbreak.mir +++ b/llvm/test/CodeGen/X86/tied-depbreak.mir @@ -59,6 +59,6 @@ body: | ; Verify that XOR is untouched by the dependency breaker ; CHECK: $esi = XOR32rr undef $esi, undef $esi, implicit-def dead $eflags, implicit-def $rsi $esi = XOR32rr undef $esi, undef $esi, implicit-def dead $eflags, implicit-def $rsi - RETQ killed $eax + RET64 killed $eax ... diff --git a/llvm/test/CodeGen/X86/twoaddr-dbg-value.mir b/llvm/test/CodeGen/X86/twoaddr-dbg-value.mir index f2d6c5df58224..b575852d4ba3c 100644 --- a/llvm/test/CodeGen/X86/twoaddr-dbg-value.mir +++ b/llvm/test/CodeGen/X86/twoaddr-dbg-value.mir @@ -13,7 +13,7 @@ body: | %5:gr32 = COPY %4 PUSH32r killed %1, implicit-def $esp, implicit $esp $eax = COPY killed %5 - RETQ implicit killed $eax + RET64 implicit killed $eax ... diff --git a/llvm/test/CodeGen/X86/update-terminator.mir b/llvm/test/CodeGen/X86/update-terminator.mir index bbb327cd2a5f6..d26f797507716 100644 --- a/llvm/test/CodeGen/X86/update-terminator.mir +++ b/llvm/test/CodeGen/X86/update-terminator.mir @@ -74,6 +74,6 @@ body: | JMP_1 %bb.2 bb.4: - RETQ + RET64 ... diff --git a/llvm/test/CodeGen/X86/vaargs-prolog-insert.ll b/llvm/test/CodeGen/X86/vaargs-prolog-insert.ll index 952a9e2d8b4e2..ccd8cdff891a1 100644 --- a/llvm/test/CodeGen/X86/vaargs-prolog-insert.ll +++ b/llvm/test/CodeGen/X86/vaargs-prolog-insert.ll @@ -5,6 +5,7 @@ define void @reduce(i32, i32, i32, i32, i32, i32, ...) nounwind { ; CHECK-LABEL: reduce: ; CHECK: # %bb.0: +; CHECK-NEXT: subq $56, %rsp ; CHECK-NEXT: testb %al, %al ; CHECK-NEXT: je .LBB0_4 ; CHECK-NEXT: # %bb.3: @@ -21,15 +22,14 @@ define void @reduce(i32, i32, i32, i32, i32, i32, ...) nounwind { ; CHECK-NEXT: testb %al, %al ; CHECK-NEXT: jne .LBB0_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: subq $56, %rsp ; CHECK-NEXT: leaq -{{[0-9]+}}(%rsp), %rax ; CHECK-NEXT: movq %rax, 16 ; CHECK-NEXT: leaq {{[0-9]+}}(%rsp), %rax ; CHECK-NEXT: movq %rax, 8 ; CHECK-NEXT: movl $48, 4 ; CHECK-NEXT: movl $48, 0 -; CHECK-NEXT: addq $56, %rsp ; CHECK-NEXT: .LBB0_2: +; CHECK-NEXT: addq $56, %rsp ; CHECK-NEXT: retq br i1 undef, label %8, label %7 diff --git a/llvm/test/CodeGen/X86/vector-fshl-128.ll b/llvm/test/CodeGen/X86/vector-fshl-128.ll index 85e39d4c24530..648b1323936e4 100644 --- a/llvm/test/CodeGen/X86/vector-fshl-128.ll +++ b/llvm/test/CodeGen/X86/vector-fshl-128.ll @@ -35,7 +35,7 @@ define <2 x i64> @var_funnnel_v2i64(<2 x i64> %x, <2 x i64> %y, <2 x i64> %amt) ; SSE2-NEXT: psrlq %xmm4, %xmm5 ; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[2,3,2,3] ; SSE2-NEXT: psrlq %xmm4, %xmm1 -; SSE2-NEXT: shufpd{{.*#+}} xmm5 = xmm5[0],xmm1[1] +; SSE2-NEXT: shufpd {{.*#+}} xmm5 = xmm5[0],xmm1[1] ; SSE2-NEXT: pand %xmm3, %xmm2 ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psllq %xmm2, %xmm1 @@ -2161,21 +2161,13 @@ define <2 x i64> @constant_funnnel_v2i64(<2 x i64> %x, <2 x i64> %y) nounwind { ; ; X86-SSE2-LABEL: constant_funnnel_v2i64: ; X86-SSE2: # %bb.0: -; X86-SSE2-NEXT: movdqa {{.*#+}} xmm3 = [63,0,63,0] -; X86-SSE2-NEXT: movdqa {{.*#+}} xmm4 = <4,u,14,u> -; X86-SSE2-NEXT: movdqa %xmm4, %xmm5 -; X86-SSE2-NEXT: pandn %xmm3, %xmm5 -; X86-SSE2-NEXT: psrlq $1, %xmm1 ; X86-SSE2-NEXT: movdqa %xmm1, %xmm2 -; X86-SSE2-NEXT: psrlq %xmm5, %xmm2 -; X86-SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm5[2,3,2,3] -; X86-SSE2-NEXT: psrlq %xmm5, %xmm1 +; X86-SSE2-NEXT: psrlq $60, %xmm2 +; X86-SSE2-NEXT: psrlq $50, %xmm1 ; X86-SSE2-NEXT: shufpd {{.*#+}} xmm2 = xmm2[0],xmm1[1] -; X86-SSE2-NEXT: pand %xmm3, %xmm4 ; X86-SSE2-NEXT: movdqa %xmm0, %xmm1 -; X86-SSE2-NEXT: psllq %xmm4, %xmm1 -; X86-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm4[2,3,2,3] -; X86-SSE2-NEXT: psllq %xmm3, %xmm0 +; X86-SSE2-NEXT: psllq $4, %xmm1 +; X86-SSE2-NEXT: psllq $14, %xmm0 ; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] ; X86-SSE2-NEXT: orpd %xmm2, %xmm0 ; X86-SSE2-NEXT: retl @@ -2695,10 +2687,8 @@ define <2 x i64> @splatconstant_funnnel_v2i64(<2 x i64> %x, <2 x i64> %y) nounwi ; X86-SSE2-LABEL: splatconstant_funnnel_v2i64: ; X86-SSE2: # %bb.0: ; X86-SSE2-NEXT: psrlq $50, %xmm1 -; X86-SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm1[0,1] ; X86-SSE2-NEXT: psllq $14, %xmm0 -; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm0[0,1] -; X86-SSE2-NEXT: orpd %xmm1, %xmm0 +; X86-SSE2-NEXT: por %xmm1, %xmm0 ; X86-SSE2-NEXT: retl %res = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %x, <2 x i64> %y, <2 x i64> ) ret <2 x i64> %res diff --git a/llvm/test/CodeGen/X86/vector-fshl-rot-128.ll b/llvm/test/CodeGen/X86/vector-fshl-rot-128.ll index 3b69e9cfcd375..d3e4260abf6a6 100644 --- a/llvm/test/CodeGen/X86/vector-fshl-rot-128.ll +++ b/llvm/test/CodeGen/X86/vector-fshl-rot-128.ll @@ -1460,24 +1460,20 @@ define <2 x i64> @constant_funnnel_v2i64(<2 x i64> %x) nounwind { ; ; X86-SSE2-LABEL: constant_funnnel_v2i64: ; X86-SSE2: # %bb.0: -; X86-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [63,0,63,0] -; X86-SSE2-NEXT: movdqa {{.*#+}} xmm2 = <4,u,14,u> -; X86-SSE2-NEXT: pxor %xmm3, %xmm3 -; X86-SSE2-NEXT: psubq %xmm2, %xmm3 -; X86-SSE2-NEXT: pand %xmm1, %xmm2 -; X86-SSE2-NEXT: movdqa %xmm0, %xmm4 -; X86-SSE2-NEXT: psllq %xmm2, %xmm4 -; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[2,3,2,3] -; X86-SSE2-NEXT: movdqa %xmm0, %xmm5 -; X86-SSE2-NEXT: psllq %xmm2, %xmm5 -; X86-SSE2-NEXT: movsd {{.*#+}} xmm5 = xmm4[0],xmm5[1] -; X86-SSE2-NEXT: pand %xmm1, %xmm3 +; X86-SSE2-NEXT: pxor %xmm1, %xmm1 +; X86-SSE2-NEXT: psubq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1 +; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1 +; X86-SSE2-NEXT: movdqa %xmm0, %xmm2 +; X86-SSE2-NEXT: psrlq %xmm1, %xmm2 +; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3] +; X86-SSE2-NEXT: movdqa %xmm0, %xmm3 +; X86-SSE2-NEXT: psrlq %xmm1, %xmm3 +; X86-SSE2-NEXT: movsd {{.*#+}} xmm3 = xmm2[0],xmm3[1] ; X86-SSE2-NEXT: movdqa %xmm0, %xmm1 -; X86-SSE2-NEXT: psrlq %xmm3, %xmm1 -; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm3[2,3,2,3] -; X86-SSE2-NEXT: psrlq %xmm2, %xmm0 +; X86-SSE2-NEXT: psllq $4, %xmm1 +; X86-SSE2-NEXT: psllq $14, %xmm0 ; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] -; X86-SSE2-NEXT: orpd %xmm5, %xmm0 +; X86-SSE2-NEXT: orpd %xmm3, %xmm0 ; X86-SSE2-NEXT: retl %res = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %x, <2 x i64> %x, <2 x i64> ) ret <2 x i64> %res @@ -1932,9 +1928,8 @@ define <2 x i64> @splatconstant_funnnel_v2i64(<2 x i64> %x) nounwind { ; X86-SSE2-LABEL: splatconstant_funnnel_v2i64: ; X86-SSE2: # %bb.0: ; X86-SSE2-NEXT: movdqa %xmm0, %xmm1 -; X86-SSE2-NEXT: psrlq $50, %xmm1 -; X86-SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm1[0,1] -; X86-SSE2-NEXT: psllq $14, %xmm0 +; X86-SSE2-NEXT: psllq $14, %xmm1 +; X86-SSE2-NEXT: psrlq $50, %xmm0 ; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm0[0,1] ; X86-SSE2-NEXT: orpd %xmm1, %xmm0 ; X86-SSE2-NEXT: retl diff --git a/llvm/test/CodeGen/X86/vector-fshl-rot-512.ll b/llvm/test/CodeGen/X86/vector-fshl-rot-512.ll index 488861ad53b39..a5f79092c25e8 100644 --- a/llvm/test/CodeGen/X86/vector-fshl-rot-512.ll +++ b/llvm/test/CodeGen/X86/vector-fshl-rot-512.ll @@ -37,62 +37,54 @@ define <32 x i16> @var_funnnel_v32i16(<32 x i16> %x, <32 x i16> %amt) nounwind { ; AVX512F-LABEL: var_funnnel_v32i16: ; AVX512F: # %bb.0: ; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm2 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero -; AVX512F-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm3 -; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm4 = ymm3[0],zero,ymm3[1],zero,ymm3[2],zero,ymm3[3],zero,ymm3[4],zero,ymm3[5],zero,ymm3[6],zero,ymm3[7],zero,ymm3[8],zero,ymm3[9],zero,ymm3[10],zero,ymm3[11],zero,ymm3[12],zero,ymm3[13],zero,ymm3[14],zero,ymm3[15],zero -; AVX512F-NEXT: vpsllvd %zmm4, %zmm2, %zmm4 -; AVX512F-NEXT: vpmovdw %zmm4, %ymm4 +; AVX512F-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm1 +; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm3 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero +; AVX512F-NEXT: vpsllvd %zmm3, %zmm2, %zmm3 +; AVX512F-NEXT: vpmovdw %zmm3, %ymm3 ; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm0 ; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero -; AVX512F-NEXT: vextracti64x4 $1, %zmm3, %ymm3 -; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm3 = ymm3[0],zero,ymm3[1],zero,ymm3[2],zero,ymm3[3],zero,ymm3[4],zero,ymm3[5],zero,ymm3[6],zero,ymm3[7],zero,ymm3[8],zero,ymm3[9],zero,ymm3[10],zero,ymm3[11],zero,ymm3[12],zero,ymm3[13],zero,ymm3[14],zero,ymm3[15],zero -; AVX512F-NEXT: vpsllvd %zmm3, %zmm0, %zmm3 -; AVX512F-NEXT: vpmovdw %zmm3, %ymm3 -; AVX512F-NEXT: vinserti64x4 $1, %ymm3, %zmm4, %zmm3 -; AVX512F-NEXT: vpxor %xmm4, %xmm4, %xmm4 -; AVX512F-NEXT: vpsubw %ymm1, %ymm4, %ymm5 -; AVX512F-NEXT: vmovdqa {{.*#+}} ymm6 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX512F-NEXT: vpand %ymm6, %ymm5, %ymm5 -; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm5 = ymm5[0],zero,ymm5[1],zero,ymm5[2],zero,ymm5[3],zero,ymm5[4],zero,ymm5[5],zero,ymm5[6],zero,ymm5[7],zero,ymm5[8],zero,ymm5[9],zero,ymm5[10],zero,ymm5[11],zero,ymm5[12],zero,ymm5[13],zero,ymm5[14],zero,ymm5[15],zero -; AVX512F-NEXT: vpsrlvd %zmm5, %zmm2, %zmm2 -; AVX512F-NEXT: vpmovdw %zmm2, %ymm2 -; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm1 -; AVX512F-NEXT: vpsubw %ymm1, %ymm4, %ymm1 -; AVX512F-NEXT: vpand %ymm6, %ymm1, %ymm1 +; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm4 +; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm5 = ymm4[0],zero,ymm4[1],zero,ymm4[2],zero,ymm4[3],zero,ymm4[4],zero,ymm4[5],zero,ymm4[6],zero,ymm4[7],zero,ymm4[8],zero,ymm4[9],zero,ymm4[10],zero,ymm4[11],zero,ymm4[12],zero,ymm4[13],zero,ymm4[14],zero,ymm4[15],zero +; AVX512F-NEXT: vpsllvd %zmm5, %zmm0, %zmm5 +; AVX512F-NEXT: vpmovdw %zmm5, %ymm5 +; AVX512F-NEXT: vinserti64x4 $1, %ymm5, %zmm3, %zmm3 +; AVX512F-NEXT: vmovdqa {{.*#+}} ymm5 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] +; AVX512F-NEXT: vpsubw %ymm1, %ymm5, %ymm1 ; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero -; AVX512F-NEXT: vpsrlvd %zmm1, %zmm0, %zmm0 +; AVX512F-NEXT: vpsrlvd %zmm1, %zmm2, %zmm1 +; AVX512F-NEXT: vpmovdw %zmm1, %ymm1 +; AVX512F-NEXT: vpsubw %ymm4, %ymm5, %ymm2 +; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm2 = ymm2[0],zero,ymm2[1],zero,ymm2[2],zero,ymm2[3],zero,ymm2[4],zero,ymm2[5],zero,ymm2[6],zero,ymm2[7],zero,ymm2[8],zero,ymm2[9],zero,ymm2[10],zero,ymm2[11],zero,ymm2[12],zero,ymm2[13],zero,ymm2[14],zero,ymm2[15],zero +; AVX512F-NEXT: vpsrlvd %zmm2, %zmm0, %zmm0 ; AVX512F-NEXT: vpmovdw %zmm0, %ymm0 -; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm2, %zmm0 +; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 ; AVX512F-NEXT: vporq %zmm0, %zmm3, %zmm0 ; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: var_funnnel_v32i16: ; AVX512VL: # %bb.0: ; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm2 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero -; AVX512VL-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm3 -; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm4 = ymm3[0],zero,ymm3[1],zero,ymm3[2],zero,ymm3[3],zero,ymm3[4],zero,ymm3[5],zero,ymm3[6],zero,ymm3[7],zero,ymm3[8],zero,ymm3[9],zero,ymm3[10],zero,ymm3[11],zero,ymm3[12],zero,ymm3[13],zero,ymm3[14],zero,ymm3[15],zero -; AVX512VL-NEXT: vpsllvd %zmm4, %zmm2, %zmm4 -; AVX512VL-NEXT: vpmovdw %zmm4, %ymm4 +; AVX512VL-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm1 +; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm3 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero +; AVX512VL-NEXT: vpsllvd %zmm3, %zmm2, %zmm3 +; AVX512VL-NEXT: vpmovdw %zmm3, %ymm3 ; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm0 ; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero -; AVX512VL-NEXT: vextracti64x4 $1, %zmm3, %ymm3 -; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm3 = ymm3[0],zero,ymm3[1],zero,ymm3[2],zero,ymm3[3],zero,ymm3[4],zero,ymm3[5],zero,ymm3[6],zero,ymm3[7],zero,ymm3[8],zero,ymm3[9],zero,ymm3[10],zero,ymm3[11],zero,ymm3[12],zero,ymm3[13],zero,ymm3[14],zero,ymm3[15],zero -; AVX512VL-NEXT: vpsllvd %zmm3, %zmm0, %zmm3 -; AVX512VL-NEXT: vpmovdw %zmm3, %ymm3 -; AVX512VL-NEXT: vinserti64x4 $1, %ymm3, %zmm4, %zmm3 -; AVX512VL-NEXT: vpxor %xmm4, %xmm4, %xmm4 -; AVX512VL-NEXT: vpsubw %ymm1, %ymm4, %ymm5 -; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm6 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX512VL-NEXT: vpand %ymm6, %ymm5, %ymm5 -; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm5 = ymm5[0],zero,ymm5[1],zero,ymm5[2],zero,ymm5[3],zero,ymm5[4],zero,ymm5[5],zero,ymm5[6],zero,ymm5[7],zero,ymm5[8],zero,ymm5[9],zero,ymm5[10],zero,ymm5[11],zero,ymm5[12],zero,ymm5[13],zero,ymm5[14],zero,ymm5[15],zero -; AVX512VL-NEXT: vpsrlvd %zmm5, %zmm2, %zmm2 -; AVX512VL-NEXT: vpmovdw %zmm2, %ymm2 -; AVX512VL-NEXT: vextracti64x4 $1, %zmm1, %ymm1 -; AVX512VL-NEXT: vpsubw %ymm1, %ymm4, %ymm1 -; AVX512VL-NEXT: vpand %ymm6, %ymm1, %ymm1 +; AVX512VL-NEXT: vextracti64x4 $1, %zmm1, %ymm4 +; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm5 = ymm4[0],zero,ymm4[1],zero,ymm4[2],zero,ymm4[3],zero,ymm4[4],zero,ymm4[5],zero,ymm4[6],zero,ymm4[7],zero,ymm4[8],zero,ymm4[9],zero,ymm4[10],zero,ymm4[11],zero,ymm4[12],zero,ymm4[13],zero,ymm4[14],zero,ymm4[15],zero +; AVX512VL-NEXT: vpsllvd %zmm5, %zmm0, %zmm5 +; AVX512VL-NEXT: vpmovdw %zmm5, %ymm5 +; AVX512VL-NEXT: vinserti64x4 $1, %ymm5, %zmm3, %zmm3 +; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm5 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] +; AVX512VL-NEXT: vpsubw %ymm1, %ymm5, %ymm1 ; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero -; AVX512VL-NEXT: vpsrlvd %zmm1, %zmm0, %zmm0 +; AVX512VL-NEXT: vpsrlvd %zmm1, %zmm2, %zmm1 +; AVX512VL-NEXT: vpmovdw %zmm1, %ymm1 +; AVX512VL-NEXT: vpsubw %ymm4, %ymm5, %ymm2 +; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm2 = ymm2[0],zero,ymm2[1],zero,ymm2[2],zero,ymm2[3],zero,ymm2[4],zero,ymm2[5],zero,ymm2[6],zero,ymm2[7],zero,ymm2[8],zero,ymm2[9],zero,ymm2[10],zero,ymm2[11],zero,ymm2[12],zero,ymm2[13],zero,ymm2[14],zero,ymm2[15],zero +; AVX512VL-NEXT: vpsrlvd %zmm2, %zmm0, %zmm0 ; AVX512VL-NEXT: vpmovdw %zmm0, %ymm0 -; AVX512VL-NEXT: vinserti64x4 $1, %ymm0, %zmm2, %zmm0 +; AVX512VL-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 ; AVX512VL-NEXT: vporq %zmm0, %zmm3, %zmm0 ; AVX512VL-NEXT: retq ; @@ -454,47 +446,35 @@ define <16 x i32> @splatvar_funnnel_v16i32(<16 x i32> %x, <16 x i32> %amt) nounw define <32 x i16> @splatvar_funnnel_v32i16(<32 x i16> %x, <32 x i16> %amt) nounwind { ; AVX512F-LABEL: splatvar_funnnel_v32i16: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vmovdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15] -; AVX512F-NEXT: vpand %xmm2, %xmm1, %xmm3 -; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero -; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm4 -; AVX512F-NEXT: vpsllw %xmm3, %ymm4, %ymm5 +; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm2 +; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm3 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero +; AVX512F-NEXT: vpsllw %xmm3, %ymm2, %ymm4 ; AVX512F-NEXT: vpsllw %xmm3, %ymm0, %ymm3 -; AVX512F-NEXT: vinserti64x4 $1, %ymm5, %zmm3, %zmm3 -; AVX512F-NEXT: vpxor %xmm5, %xmm5, %xmm5 -; AVX512F-NEXT: vpsubw %xmm1, %xmm5, %xmm1 -; AVX512F-NEXT: vpand %xmm2, %xmm1, %xmm1 +; AVX512F-NEXT: vinserti64x4 $1, %ymm4, %zmm3, %zmm3 +; AVX512F-NEXT: vmovdqa {{.*#+}} xmm4 = [16,16,16,16,16,16,16,16] +; AVX512F-NEXT: vpsubw %xmm1, %xmm4, %xmm1 ; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero -; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero -; AVX512F-NEXT: vpsrld %xmm1, %zmm0, %zmm0 -; AVX512F-NEXT: vpmovdw %zmm0, %ymm0 -; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm2 = ymm4[0],zero,ymm4[1],zero,ymm4[2],zero,ymm4[3],zero,ymm4[4],zero,ymm4[5],zero,ymm4[6],zero,ymm4[7],zero,ymm4[8],zero,ymm4[9],zero,ymm4[10],zero,ymm4[11],zero,ymm4[12],zero,ymm4[13],zero,ymm4[14],zero,ymm4[15],zero -; AVX512F-NEXT: vpsrld %xmm1, %zmm2, %zmm1 -; AVX512F-NEXT: vpmovdw %zmm1, %ymm1 -; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 +; AVX512F-NEXT: vpsrlw %xmm1, %ymm2, %ymm2 +; AVX512F-NEXT: vpsrlw %xmm1, %ymm0, %ymm0 +; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0 ; AVX512F-NEXT: vporq %zmm0, %zmm3, %zmm0 ; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: splatvar_funnnel_v32i16: ; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15] -; AVX512VL-NEXT: vpand %xmm2, %xmm1, %xmm3 -; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero -; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm4 -; AVX512VL-NEXT: vpsllw %xmm3, %ymm4, %ymm5 +; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm2 +; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm3 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero +; AVX512VL-NEXT: vpsllw %xmm3, %ymm2, %ymm4 ; AVX512VL-NEXT: vpsllw %xmm3, %ymm0, %ymm3 -; AVX512VL-NEXT: vinserti64x4 $1, %ymm5, %zmm3, %zmm3 -; AVX512VL-NEXT: vpxor %xmm5, %xmm5, %xmm5 -; AVX512VL-NEXT: vpsubw %xmm1, %xmm5, %xmm1 -; AVX512VL-NEXT: vpand %xmm2, %xmm1, %xmm1 +; AVX512VL-NEXT: vinserti64x4 $1, %ymm4, %zmm3, %zmm3 +; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm4 = [16,16,16,16,16,16,16,16] +; AVX512VL-NEXT: vpsubw %xmm1, %xmm4, %xmm1 ; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero -; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero -; AVX512VL-NEXT: vpsrld %xmm1, %zmm0, %zmm0 -; AVX512VL-NEXT: vpmovdw %zmm0, %ymm0 -; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm2 = ymm4[0],zero,ymm4[1],zero,ymm4[2],zero,ymm4[3],zero,ymm4[4],zero,ymm4[5],zero,ymm4[6],zero,ymm4[7],zero,ymm4[8],zero,ymm4[9],zero,ymm4[10],zero,ymm4[11],zero,ymm4[12],zero,ymm4[13],zero,ymm4[14],zero,ymm4[15],zero -; AVX512VL-NEXT: vpsrld %xmm1, %zmm2, %zmm1 -; AVX512VL-NEXT: vpmovdw %zmm1, %ymm1 -; AVX512VL-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 +; AVX512VL-NEXT: vpsrlw %xmm1, %ymm2, %ymm2 +; AVX512VL-NEXT: vpsrlw %xmm1, %ymm0, %ymm0 +; AVX512VL-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0 ; AVX512VL-NEXT: vporq %zmm0, %zmm3, %zmm0 ; AVX512VL-NEXT: retq ; @@ -745,37 +725,27 @@ define <32 x i16> @constant_funnnel_v32i16(<32 x i16> %x) nounwind { ; AVX512F-LABEL: constant_funnnel_v32i16: ; AVX512F: # %bb.0: ; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm1 -; AVX512F-NEXT: vmovdqa {{.*#+}} ymm2 = +; AVX512F-NEXT: vmovdqa {{.*#+}} ymm2 = [1,2,4,8,16,32,64,128,256,512,1024,2048,4096,8192,16384,32768] ; AVX512F-NEXT: vpmulhuw %ymm2, %ymm1, %ymm3 -; AVX512F-NEXT: vpblendw {{.*#+}} xmm4 = xmm1[0],xmm3[1,2,3,4,5,6,7] -; AVX512F-NEXT: vpblendd {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7] -; AVX512F-NEXT: vpmulhuw %ymm2, %ymm0, %ymm2 -; AVX512F-NEXT: vpblendw {{.*#+}} xmm4 = xmm0[0],xmm2[1,2,3,4,5,6,7] -; AVX512F-NEXT: vpblendd {{.*#+}} ymm2 = ymm4[0,1,2,3],ymm2[4,5,6,7] -; AVX512F-NEXT: vinserti64x4 $1, %ymm3, %zmm2, %zmm2 -; AVX512F-NEXT: vmovdqa {{.*#+}} ymm3 = [1,2,4,8,16,32,64,128,256,512,1024,2048,4096,8192,16384,32768] -; AVX512F-NEXT: vpmullw %ymm3, %ymm1, %ymm1 -; AVX512F-NEXT: vpmullw %ymm3, %ymm0, %ymm0 +; AVX512F-NEXT: vpmulhuw %ymm2, %ymm0, %ymm4 +; AVX512F-NEXT: vinserti64x4 $1, %ymm3, %zmm4, %zmm3 +; AVX512F-NEXT: vpmullw %ymm2, %ymm1, %ymm1 +; AVX512F-NEXT: vpmullw %ymm2, %ymm0, %ymm0 ; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 -; AVX512F-NEXT: vporq %zmm2, %zmm0, %zmm0 +; AVX512F-NEXT: vporq %zmm3, %zmm0, %zmm0 ; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: constant_funnnel_v32i16: ; AVX512VL: # %bb.0: ; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm1 -; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm2 = +; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm2 = [1,2,4,8,16,32,64,128,256,512,1024,2048,4096,8192,16384,32768] ; AVX512VL-NEXT: vpmulhuw %ymm2, %ymm1, %ymm3 -; AVX512VL-NEXT: vpblendw {{.*#+}} xmm4 = xmm1[0],xmm3[1,2,3,4,5,6,7] -; AVX512VL-NEXT: vpblendd {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7] -; AVX512VL-NEXT: vpmulhuw %ymm2, %ymm0, %ymm2 -; AVX512VL-NEXT: vpblendw {{.*#+}} xmm4 = xmm0[0],xmm2[1,2,3,4,5,6,7] -; AVX512VL-NEXT: vpblendd {{.*#+}} ymm2 = ymm4[0,1,2,3],ymm2[4,5,6,7] -; AVX512VL-NEXT: vinserti64x4 $1, %ymm3, %zmm2, %zmm2 -; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm3 = [1,2,4,8,16,32,64,128,256,512,1024,2048,4096,8192,16384,32768] -; AVX512VL-NEXT: vpmullw %ymm3, %ymm1, %ymm1 -; AVX512VL-NEXT: vpmullw %ymm3, %ymm0, %ymm0 +; AVX512VL-NEXT: vpmulhuw %ymm2, %ymm0, %ymm4 +; AVX512VL-NEXT: vinserti64x4 $1, %ymm3, %zmm4, %zmm3 +; AVX512VL-NEXT: vpmullw %ymm2, %ymm1, %ymm1 +; AVX512VL-NEXT: vpmullw %ymm2, %ymm0, %ymm0 ; AVX512VL-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 -; AVX512VL-NEXT: vporq %zmm2, %zmm0, %zmm0 +; AVX512VL-NEXT: vporq %zmm3, %zmm0, %zmm0 ; AVX512VL-NEXT: retq ; ; AVX512BW-LABEL: constant_funnnel_v32i16: diff --git a/llvm/test/CodeGen/X86/vector-fshl-rot-sub128.ll b/llvm/test/CodeGen/X86/vector-fshl-rot-sub128.ll index 6c210b267fd4b..e6cd4514e02c0 100644 --- a/llvm/test/CodeGen/X86/vector-fshl-rot-sub128.ll +++ b/llvm/test/CodeGen/X86/vector-fshl-rot-sub128.ll @@ -444,7 +444,7 @@ define <2 x i32> @splatconstant_funnnel_v2i32(<2 x i32> %x) nounwind { ; SSE2-NEXT: movdqa %xmm0, %xmm2 ; SSE2-NEXT: pslld $4, %xmm2 ; SSE2-NEXT: por %xmm1, %xmm2 -; SSE2-NEXT: movsd %xmm2, %xmm0 +; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm2[0],xmm0[1] ; SSE2-NEXT: retq ; ; SSE41-LABEL: splatconstant_funnnel_v2i32: @@ -523,7 +523,7 @@ define <2 x i32> @splatconstant_funnnel_v2i32(<2 x i32> %x) nounwind { ; X86-SSE2-NEXT: movdqa %xmm0, %xmm2 ; X86-SSE2-NEXT: pslld $4, %xmm2 ; X86-SSE2-NEXT: por %xmm1, %xmm2 -; X86-SSE2-NEXT: movsd %xmm2, %xmm0 +; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm2[0],xmm0[1] ; X86-SSE2-NEXT: retl %res = call <2 x i32> @llvm.fshl.v2i32(<2 x i32> %x, <2 x i32> %x, <2 x i32> ) ret <2 x i32> %res diff --git a/llvm/test/CodeGen/X86/vector-fshr-128.ll b/llvm/test/CodeGen/X86/vector-fshr-128.ll index d279ad9c67c80..f1b0c70825cef 100644 --- a/llvm/test/CodeGen/X86/vector-fshr-128.ll +++ b/llvm/test/CodeGen/X86/vector-fshr-128.ll @@ -1867,21 +1867,13 @@ define <2 x i64> @constant_funnnel_v2i64(<2 x i64> %x, <2 x i64> %y) nounwind { ; ; X86-SSE2-LABEL: constant_funnnel_v2i64: ; X86-SSE2: # %bb.0: -; X86-SSE2-NEXT: movdqa {{.*#+}} xmm3 = [63,0,63,0] -; X86-SSE2-NEXT: movdqa {{.*#+}} xmm4 = <4,u,14,u> -; X86-SSE2-NEXT: movdqa %xmm4, %xmm5 -; X86-SSE2-NEXT: pand %xmm3, %xmm5 ; X86-SSE2-NEXT: movdqa %xmm1, %xmm2 -; X86-SSE2-NEXT: psrlq %xmm5, %xmm2 -; X86-SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm5[2,3,2,3] -; X86-SSE2-NEXT: psrlq %xmm5, %xmm1 +; X86-SSE2-NEXT: psrlq $4, %xmm2 +; X86-SSE2-NEXT: psrlq $14, %xmm1 ; X86-SSE2-NEXT: shufpd {{.*#+}} xmm2 = xmm2[0],xmm1[1] -; X86-SSE2-NEXT: pandn %xmm3, %xmm4 -; X86-SSE2-NEXT: psllq $1, %xmm0 ; X86-SSE2-NEXT: movdqa %xmm0, %xmm1 -; X86-SSE2-NEXT: psllq %xmm4, %xmm1 -; X86-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm4[2,3,2,3] -; X86-SSE2-NEXT: psllq %xmm3, %xmm0 +; X86-SSE2-NEXT: psllq $60, %xmm1 +; X86-SSE2-NEXT: psllq $50, %xmm0 ; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] ; X86-SSE2-NEXT: orpd %xmm2, %xmm0 ; X86-SSE2-NEXT: retl @@ -2414,10 +2406,8 @@ define <2 x i64> @splatconstant_funnnel_v2i64(<2 x i64> %x, <2 x i64> %y) nounwi ; X86-SSE2-LABEL: splatconstant_funnnel_v2i64: ; X86-SSE2: # %bb.0: ; X86-SSE2-NEXT: psrlq $14, %xmm1 -; X86-SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm1[0,1] ; X86-SSE2-NEXT: psllq $50, %xmm0 -; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm0[0,1] -; X86-SSE2-NEXT: orpd %xmm1, %xmm0 +; X86-SSE2-NEXT: por %xmm1, %xmm0 ; X86-SSE2-NEXT: retl %res = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %x, <2 x i64> %y, <2 x i64> ) ret <2 x i64> %res diff --git a/llvm/test/CodeGen/X86/vector-fshr-512.ll b/llvm/test/CodeGen/X86/vector-fshr-512.ll index 85bc3c6dd440e..5ed47f9fb069f 100644 --- a/llvm/test/CodeGen/X86/vector-fshr-512.ll +++ b/llvm/test/CodeGen/X86/vector-fshr-512.ll @@ -153,14 +153,14 @@ define <32 x i16> @var_funnnel_v32i16(<32 x i16> %x, <32 x i16> %y, <32 x i16> % ; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm5, %zmm1 ; AVX512F-NEXT: vpandnq %zmm3, %zmm2, %zmm2 ; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm3 = ymm2[0],zero,ymm2[1],zero,ymm2[2],zero,ymm2[3],zero,ymm2[4],zero,ymm2[5],zero,ymm2[6],zero,ymm2[7],zero,ymm2[8],zero,ymm2[9],zero,ymm2[10],zero,ymm2[11],zero,ymm2[12],zero,ymm2[13],zero,ymm2[14],zero,ymm2[15],zero -; AVX512F-NEXT: vpsllw $1, %ymm0, %ymm4 +; AVX512F-NEXT: vpaddw %ymm0, %ymm0, %ymm4 ; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm4 = ymm4[0],zero,ymm4[1],zero,ymm4[2],zero,ymm4[3],zero,ymm4[4],zero,ymm4[5],zero,ymm4[6],zero,ymm4[7],zero,ymm4[8],zero,ymm4[9],zero,ymm4[10],zero,ymm4[11],zero,ymm4[12],zero,ymm4[13],zero,ymm4[14],zero,ymm4[15],zero ; AVX512F-NEXT: vpsllvd %zmm3, %zmm4, %zmm3 ; AVX512F-NEXT: vpmovdw %zmm3, %ymm3 ; AVX512F-NEXT: vextracti64x4 $1, %zmm2, %ymm2 ; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm2 = ymm2[0],zero,ymm2[1],zero,ymm2[2],zero,ymm2[3],zero,ymm2[4],zero,ymm2[5],zero,ymm2[6],zero,ymm2[7],zero,ymm2[8],zero,ymm2[9],zero,ymm2[10],zero,ymm2[11],zero,ymm2[12],zero,ymm2[13],zero,ymm2[14],zero,ymm2[15],zero ; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm0 -; AVX512F-NEXT: vpsllw $1, %ymm0, %ymm0 +; AVX512F-NEXT: vpaddw %ymm0, %ymm0, %ymm0 ; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero ; AVX512F-NEXT: vpsllvd %zmm2, %zmm0, %zmm0 ; AVX512F-NEXT: vpmovdw %zmm0, %ymm0 @@ -185,14 +185,14 @@ define <32 x i16> @var_funnnel_v32i16(<32 x i16> %x, <32 x i16> %y, <32 x i16> % ; AVX512VL-NEXT: vinserti64x4 $1, %ymm1, %zmm5, %zmm1 ; AVX512VL-NEXT: vpandnq %zmm3, %zmm2, %zmm2 ; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm3 = ymm2[0],zero,ymm2[1],zero,ymm2[2],zero,ymm2[3],zero,ymm2[4],zero,ymm2[5],zero,ymm2[6],zero,ymm2[7],zero,ymm2[8],zero,ymm2[9],zero,ymm2[10],zero,ymm2[11],zero,ymm2[12],zero,ymm2[13],zero,ymm2[14],zero,ymm2[15],zero -; AVX512VL-NEXT: vpsllw $1, %ymm0, %ymm4 +; AVX512VL-NEXT: vpaddw %ymm0, %ymm0, %ymm4 ; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm4 = ymm4[0],zero,ymm4[1],zero,ymm4[2],zero,ymm4[3],zero,ymm4[4],zero,ymm4[5],zero,ymm4[6],zero,ymm4[7],zero,ymm4[8],zero,ymm4[9],zero,ymm4[10],zero,ymm4[11],zero,ymm4[12],zero,ymm4[13],zero,ymm4[14],zero,ymm4[15],zero ; AVX512VL-NEXT: vpsllvd %zmm3, %zmm4, %zmm3 ; AVX512VL-NEXT: vpmovdw %zmm3, %ymm3 ; AVX512VL-NEXT: vextracti64x4 $1, %zmm2, %ymm2 ; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm2 = ymm2[0],zero,ymm2[1],zero,ymm2[2],zero,ymm2[3],zero,ymm2[4],zero,ymm2[5],zero,ymm2[6],zero,ymm2[7],zero,ymm2[8],zero,ymm2[9],zero,ymm2[10],zero,ymm2[11],zero,ymm2[12],zero,ymm2[13],zero,ymm2[14],zero,ymm2[15],zero ; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm0 -; AVX512VL-NEXT: vpsllw $1, %ymm0, %ymm0 +; AVX512VL-NEXT: vpaddw %ymm0, %ymm0, %ymm0 ; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero ; AVX512VL-NEXT: vpsllvd %zmm2, %zmm0, %zmm0 ; AVX512VL-NEXT: vpmovdw %zmm0, %ymm0 @@ -674,9 +674,9 @@ define <32 x i16> @splatvar_funnnel_v32i16(<32 x i16> %x, <32 x i16> %y, <32 x i ; AVX512F-NEXT: vpandn %xmm3, %xmm2, %xmm2 ; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero ; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm3 -; AVX512F-NEXT: vpsllw $1, %ymm3, %ymm3 +; AVX512F-NEXT: vpaddw %ymm3, %ymm3, %ymm3 ; AVX512F-NEXT: vpsllw %xmm2, %ymm3, %ymm3 -; AVX512F-NEXT: vpsllw $1, %ymm0, %ymm0 +; AVX512F-NEXT: vpaddw %ymm0, %ymm0, %ymm0 ; AVX512F-NEXT: vpsllw %xmm2, %ymm0, %ymm0 ; AVX512F-NEXT: vinserti64x4 $1, %ymm3, %zmm0, %zmm0 ; AVX512F-NEXT: vporq %zmm1, %zmm0, %zmm0 @@ -694,9 +694,9 @@ define <32 x i16> @splatvar_funnnel_v32i16(<32 x i16> %x, <32 x i16> %y, <32 x i ; AVX512VL-NEXT: vpandn %xmm3, %xmm2, %xmm2 ; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero ; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm3 -; AVX512VL-NEXT: vpsllw $1, %ymm3, %ymm3 +; AVX512VL-NEXT: vpaddw %ymm3, %ymm3, %ymm3 ; AVX512VL-NEXT: vpsllw %xmm2, %ymm3, %ymm3 -; AVX512VL-NEXT: vpsllw $1, %ymm0, %ymm0 +; AVX512VL-NEXT: vpaddw %ymm0, %ymm0, %ymm0 ; AVX512VL-NEXT: vpsllw %xmm2, %ymm0, %ymm0 ; AVX512VL-NEXT: vinserti64x4 $1, %ymm3, %zmm0, %zmm0 ; AVX512VL-NEXT: vporq %zmm1, %zmm0, %zmm0 @@ -994,10 +994,8 @@ define <32 x i16> @constant_funnnel_v32i16(<32 x i16> %x, <32 x i16> %y) nounwin ; AVX512F-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm3[4,5,6,7] ; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm1, %zmm1 ; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm2 -; AVX512F-NEXT: vpsllw $1, %ymm2, %ymm2 -; AVX512F-NEXT: vmovdqa {{.*#+}} ymm3 = [32768,16384,8192,4096,2048,1024,512,256,128,64,32,16,8,4,2,1] +; AVX512F-NEXT: vmovdqa {{.*#+}} ymm3 = [0,32768,16384,8192,4096,2048,1024,512,256,128,64,32,16,8,4,2] ; AVX512F-NEXT: vpmullw %ymm3, %ymm2, %ymm2 -; AVX512F-NEXT: vpsllw $1, %ymm0, %ymm0 ; AVX512F-NEXT: vpmullw %ymm3, %ymm0, %ymm0 ; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0 ; AVX512F-NEXT: vporq %zmm1, %zmm0, %zmm0 @@ -1015,10 +1013,8 @@ define <32 x i16> @constant_funnnel_v32i16(<32 x i16> %x, <32 x i16> %y) nounwin ; AVX512VL-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm3[4,5,6,7] ; AVX512VL-NEXT: vinserti64x4 $1, %ymm2, %zmm1, %zmm1 ; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm2 -; AVX512VL-NEXT: vpsllw $1, %ymm2, %ymm2 -; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm3 = [32768,16384,8192,4096,2048,1024,512,256,128,64,32,16,8,4,2,1] +; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm3 = [0,32768,16384,8192,4096,2048,1024,512,256,128,64,32,16,8,4,2] ; AVX512VL-NEXT: vpmullw %ymm3, %ymm2, %ymm2 -; AVX512VL-NEXT: vpsllw $1, %ymm0, %ymm0 ; AVX512VL-NEXT: vpmullw %ymm3, %ymm0, %ymm0 ; AVX512VL-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0 ; AVX512VL-NEXT: vporq %zmm1, %zmm0, %zmm0 diff --git a/llvm/test/CodeGen/X86/vector-fshr-rot-128.ll b/llvm/test/CodeGen/X86/vector-fshr-rot-128.ll index fec48466aff33..f6d983dad2501 100644 --- a/llvm/test/CodeGen/X86/vector-fshr-rot-128.ll +++ b/llvm/test/CodeGen/X86/vector-fshr-rot-128.ll @@ -1548,24 +1548,20 @@ define <2 x i64> @constant_funnnel_v2i64(<2 x i64> %x) nounwind { ; ; X86-SSE2-LABEL: constant_funnnel_v2i64: ; X86-SSE2: # %bb.0: -; X86-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [63,0,63,0] -; X86-SSE2-NEXT: movdqa {{.*#+}} xmm2 = <4,u,14,u> -; X86-SSE2-NEXT: pxor %xmm3, %xmm3 -; X86-SSE2-NEXT: psubq %xmm2, %xmm3 -; X86-SSE2-NEXT: pand %xmm1, %xmm2 -; X86-SSE2-NEXT: movdqa %xmm0, %xmm4 -; X86-SSE2-NEXT: psrlq %xmm2, %xmm4 -; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[2,3,2,3] -; X86-SSE2-NEXT: movdqa %xmm0, %xmm5 -; X86-SSE2-NEXT: psrlq %xmm2, %xmm5 -; X86-SSE2-NEXT: movsd {{.*#+}} xmm5 = xmm4[0],xmm5[1] -; X86-SSE2-NEXT: pand %xmm1, %xmm3 +; X86-SSE2-NEXT: pxor %xmm1, %xmm1 +; X86-SSE2-NEXT: psubq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1 +; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1 +; X86-SSE2-NEXT: movdqa %xmm0, %xmm2 +; X86-SSE2-NEXT: psllq %xmm1, %xmm2 +; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3] +; X86-SSE2-NEXT: movdqa %xmm0, %xmm3 +; X86-SSE2-NEXT: psllq %xmm1, %xmm3 +; X86-SSE2-NEXT: movsd {{.*#+}} xmm3 = xmm2[0],xmm3[1] ; X86-SSE2-NEXT: movdqa %xmm0, %xmm1 -; X86-SSE2-NEXT: psllq %xmm3, %xmm1 -; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm3[2,3,2,3] -; X86-SSE2-NEXT: psllq %xmm2, %xmm0 +; X86-SSE2-NEXT: psrlq $4, %xmm1 +; X86-SSE2-NEXT: psrlq $14, %xmm0 ; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] -; X86-SSE2-NEXT: orpd %xmm5, %xmm0 +; X86-SSE2-NEXT: orpd %xmm3, %xmm0 ; X86-SSE2-NEXT: retl %res = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %x, <2 x i64> %x, <2 x i64> ) ret <2 x i64> %res @@ -2020,9 +2016,8 @@ define <2 x i64> @splatconstant_funnnel_v2i64(<2 x i64> %x) nounwind { ; X86-SSE2-LABEL: splatconstant_funnnel_v2i64: ; X86-SSE2: # %bb.0: ; X86-SSE2-NEXT: movdqa %xmm0, %xmm1 -; X86-SSE2-NEXT: psllq $50, %xmm1 -; X86-SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm1[0,1] -; X86-SSE2-NEXT: psrlq $14, %xmm0 +; X86-SSE2-NEXT: psrlq $14, %xmm1 +; X86-SSE2-NEXT: psllq $50, %xmm0 ; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm0[0,1] ; X86-SSE2-NEXT: orpd %xmm1, %xmm0 ; X86-SSE2-NEXT: retl diff --git a/llvm/test/CodeGen/X86/vector-fshr-rot-512.ll b/llvm/test/CodeGen/X86/vector-fshr-rot-512.ll index 543a5cbab0d02..9443807e0c405 100644 --- a/llvm/test/CodeGen/X86/vector-fshr-rot-512.ll +++ b/llvm/test/CodeGen/X86/vector-fshr-rot-512.ll @@ -36,64 +36,66 @@ define <16 x i32> @var_funnnel_v16i32(<16 x i32> %x, <16 x i32> %amt) nounwind { define <32 x i16> @var_funnnel_v32i16(<32 x i16> %x, <32 x i16> %amt) nounwind { ; AVX512F-LABEL: var_funnnel_v32i16: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm2 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero -; AVX512F-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm3 -; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm4 = ymm3[0],zero,ymm3[1],zero,ymm3[2],zero,ymm3[3],zero,ymm3[4],zero,ymm3[5],zero,ymm3[6],zero,ymm3[7],zero,ymm3[8],zero,ymm3[9],zero,ymm3[10],zero,ymm3[11],zero,ymm3[12],zero,ymm3[13],zero,ymm3[14],zero,ymm3[15],zero -; AVX512F-NEXT: vpsrlvd %zmm4, %zmm2, %zmm4 -; AVX512F-NEXT: vpmovdw %zmm4, %ymm4 +; AVX512F-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; AVX512F-NEXT: vpsubw %ymm1, %ymm2, %ymm3 +; AVX512F-NEXT: vmovdqa {{.*#+}} ymm4 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] +; AVX512F-NEXT: vpand %ymm4, %ymm3, %ymm3 +; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm5 = ymm3[0],zero,ymm3[1],zero,ymm3[2],zero,ymm3[3],zero,ymm3[4],zero,ymm3[5],zero,ymm3[6],zero,ymm3[7],zero,ymm3[8],zero,ymm3[9],zero,ymm3[10],zero,ymm3[11],zero,ymm3[12],zero,ymm3[13],zero,ymm3[14],zero,ymm3[15],zero +; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm6 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero +; AVX512F-NEXT: vpsllvd %zmm5, %zmm6, %zmm5 +; AVX512F-NEXT: vpmovdw %zmm5, %ymm5 +; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm1 +; AVX512F-NEXT: vpsubw %ymm1, %ymm2, %ymm1 +; AVX512F-NEXT: vpand %ymm4, %ymm1, %ymm1 +; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm2 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero ; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm0 ; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero -; AVX512F-NEXT: vextracti64x4 $1, %zmm3, %ymm3 +; AVX512F-NEXT: vpsllvd %zmm2, %zmm0, %zmm2 +; AVX512F-NEXT: vpmovdw %zmm2, %ymm2 +; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm5, %zmm2 +; AVX512F-NEXT: vmovdqa {{.*#+}} ymm4 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] +; AVX512F-NEXT: vpsubw %ymm3, %ymm4, %ymm3 ; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm3 = ymm3[0],zero,ymm3[1],zero,ymm3[2],zero,ymm3[3],zero,ymm3[4],zero,ymm3[5],zero,ymm3[6],zero,ymm3[7],zero,ymm3[8],zero,ymm3[9],zero,ymm3[10],zero,ymm3[11],zero,ymm3[12],zero,ymm3[13],zero,ymm3[14],zero,ymm3[15],zero -; AVX512F-NEXT: vpsrlvd %zmm3, %zmm0, %zmm3 +; AVX512F-NEXT: vpsrlvd %zmm3, %zmm6, %zmm3 ; AVX512F-NEXT: vpmovdw %zmm3, %ymm3 -; AVX512F-NEXT: vinserti64x4 $1, %ymm3, %zmm4, %zmm3 -; AVX512F-NEXT: vpxor %xmm4, %xmm4, %xmm4 -; AVX512F-NEXT: vpsubw %ymm1, %ymm4, %ymm5 -; AVX512F-NEXT: vmovdqa {{.*#+}} ymm6 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX512F-NEXT: vpand %ymm6, %ymm5, %ymm5 -; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm5 = ymm5[0],zero,ymm5[1],zero,ymm5[2],zero,ymm5[3],zero,ymm5[4],zero,ymm5[5],zero,ymm5[6],zero,ymm5[7],zero,ymm5[8],zero,ymm5[9],zero,ymm5[10],zero,ymm5[11],zero,ymm5[12],zero,ymm5[13],zero,ymm5[14],zero,ymm5[15],zero -; AVX512F-NEXT: vpsllvd %zmm5, %zmm2, %zmm2 -; AVX512F-NEXT: vpmovdw %zmm2, %ymm2 -; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm1 ; AVX512F-NEXT: vpsubw %ymm1, %ymm4, %ymm1 -; AVX512F-NEXT: vpand %ymm6, %ymm1, %ymm1 ; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero -; AVX512F-NEXT: vpsllvd %zmm1, %zmm0, %zmm0 +; AVX512F-NEXT: vpsrlvd %zmm1, %zmm0, %zmm0 ; AVX512F-NEXT: vpmovdw %zmm0, %ymm0 -; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm2, %zmm0 -; AVX512F-NEXT: vporq %zmm0, %zmm3, %zmm0 +; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm3, %zmm0 +; AVX512F-NEXT: vporq %zmm0, %zmm2, %zmm0 ; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: var_funnnel_v32i16: ; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm2 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero -; AVX512VL-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm3 -; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm4 = ymm3[0],zero,ymm3[1],zero,ymm3[2],zero,ymm3[3],zero,ymm3[4],zero,ymm3[5],zero,ymm3[6],zero,ymm3[7],zero,ymm3[8],zero,ymm3[9],zero,ymm3[10],zero,ymm3[11],zero,ymm3[12],zero,ymm3[13],zero,ymm3[14],zero,ymm3[15],zero -; AVX512VL-NEXT: vpsrlvd %zmm4, %zmm2, %zmm4 -; AVX512VL-NEXT: vpmovdw %zmm4, %ymm4 +; AVX512VL-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; AVX512VL-NEXT: vpsubw %ymm1, %ymm2, %ymm3 +; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm4 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] +; AVX512VL-NEXT: vpand %ymm4, %ymm3, %ymm3 +; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm5 = ymm3[0],zero,ymm3[1],zero,ymm3[2],zero,ymm3[3],zero,ymm3[4],zero,ymm3[5],zero,ymm3[6],zero,ymm3[7],zero,ymm3[8],zero,ymm3[9],zero,ymm3[10],zero,ymm3[11],zero,ymm3[12],zero,ymm3[13],zero,ymm3[14],zero,ymm3[15],zero +; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm6 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero +; AVX512VL-NEXT: vpsllvd %zmm5, %zmm6, %zmm5 +; AVX512VL-NEXT: vpmovdw %zmm5, %ymm5 +; AVX512VL-NEXT: vextracti64x4 $1, %zmm1, %ymm1 +; AVX512VL-NEXT: vpsubw %ymm1, %ymm2, %ymm1 +; AVX512VL-NEXT: vpand %ymm4, %ymm1, %ymm1 +; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm2 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero ; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm0 ; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero -; AVX512VL-NEXT: vextracti64x4 $1, %zmm3, %ymm3 +; AVX512VL-NEXT: vpsllvd %zmm2, %zmm0, %zmm2 +; AVX512VL-NEXT: vpmovdw %zmm2, %ymm2 +; AVX512VL-NEXT: vinserti64x4 $1, %ymm2, %zmm5, %zmm2 +; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm4 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] +; AVX512VL-NEXT: vpsubw %ymm3, %ymm4, %ymm3 ; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm3 = ymm3[0],zero,ymm3[1],zero,ymm3[2],zero,ymm3[3],zero,ymm3[4],zero,ymm3[5],zero,ymm3[6],zero,ymm3[7],zero,ymm3[8],zero,ymm3[9],zero,ymm3[10],zero,ymm3[11],zero,ymm3[12],zero,ymm3[13],zero,ymm3[14],zero,ymm3[15],zero -; AVX512VL-NEXT: vpsrlvd %zmm3, %zmm0, %zmm3 +; AVX512VL-NEXT: vpsrlvd %zmm3, %zmm6, %zmm3 ; AVX512VL-NEXT: vpmovdw %zmm3, %ymm3 -; AVX512VL-NEXT: vinserti64x4 $1, %ymm3, %zmm4, %zmm3 -; AVX512VL-NEXT: vpxor %xmm4, %xmm4, %xmm4 -; AVX512VL-NEXT: vpsubw %ymm1, %ymm4, %ymm5 -; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm6 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX512VL-NEXT: vpand %ymm6, %ymm5, %ymm5 -; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm5 = ymm5[0],zero,ymm5[1],zero,ymm5[2],zero,ymm5[3],zero,ymm5[4],zero,ymm5[5],zero,ymm5[6],zero,ymm5[7],zero,ymm5[8],zero,ymm5[9],zero,ymm5[10],zero,ymm5[11],zero,ymm5[12],zero,ymm5[13],zero,ymm5[14],zero,ymm5[15],zero -; AVX512VL-NEXT: vpsllvd %zmm5, %zmm2, %zmm2 -; AVX512VL-NEXT: vpmovdw %zmm2, %ymm2 -; AVX512VL-NEXT: vextracti64x4 $1, %zmm1, %ymm1 ; AVX512VL-NEXT: vpsubw %ymm1, %ymm4, %ymm1 -; AVX512VL-NEXT: vpand %ymm6, %ymm1, %ymm1 ; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero -; AVX512VL-NEXT: vpsllvd %zmm1, %zmm0, %zmm0 +; AVX512VL-NEXT: vpsrlvd %zmm1, %zmm0, %zmm0 ; AVX512VL-NEXT: vpmovdw %zmm0, %ymm0 -; AVX512VL-NEXT: vinserti64x4 $1, %ymm0, %zmm2, %zmm0 -; AVX512VL-NEXT: vporq %zmm0, %zmm3, %zmm0 +; AVX512VL-NEXT: vinserti64x4 $1, %ymm0, %zmm3, %zmm0 +; AVX512VL-NEXT: vporq %zmm0, %zmm2, %zmm0 ; AVX512VL-NEXT: retq ; ; AVX512BW-LABEL: var_funnnel_v32i16: @@ -454,48 +456,54 @@ define <16 x i32> @splatvar_funnnel_v16i32(<16 x i32> %x, <16 x i32> %amt) nounw define <32 x i16> @splatvar_funnnel_v32i16(<32 x i16> %x, <32 x i16> %amt) nounwind { ; AVX512F-LABEL: splatvar_funnnel_v32i16: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vmovdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15] -; AVX512F-NEXT: vpand %xmm2, %xmm1, %xmm3 -; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero -; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm4 -; AVX512F-NEXT: vpsrlw %xmm3, %ymm4, %ymm5 -; AVX512F-NEXT: vpsrlw %xmm3, %ymm0, %ymm3 -; AVX512F-NEXT: vinserti64x4 $1, %ymm5, %zmm3, %zmm3 -; AVX512F-NEXT: vpxor %xmm5, %xmm5, %xmm5 -; AVX512F-NEXT: vpsubw %xmm1, %xmm5, %xmm1 -; AVX512F-NEXT: vpand %xmm2, %xmm1, %xmm1 -; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero +; AVX512F-NEXT: vpbroadcastw %xmm1, %xmm1 +; AVX512F-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; AVX512F-NEXT: vpsubw %xmm1, %xmm2, %xmm1 +; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero +; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm3 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero +; AVX512F-NEXT: vpslld %xmm2, %zmm3, %zmm4 +; AVX512F-NEXT: vpmovdw %zmm4, %ymm4 +; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm0 ; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero -; AVX512F-NEXT: vpslld %xmm1, %zmm0, %zmm0 +; AVX512F-NEXT: vpslld %xmm2, %zmm0, %zmm2 +; AVX512F-NEXT: vpmovdw %zmm2, %ymm2 +; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm4, %zmm2 +; AVX512F-NEXT: vmovdqa {{.*#+}} xmm4 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] +; AVX512F-NEXT: vpsubw %xmm1, %xmm4, %xmm1 +; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero +; AVX512F-NEXT: vpsrld %xmm1, %zmm3, %zmm3 +; AVX512F-NEXT: vpmovdw %zmm3, %ymm3 +; AVX512F-NEXT: vpsrld %xmm1, %zmm0, %zmm0 ; AVX512F-NEXT: vpmovdw %zmm0, %ymm0 -; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm2 = ymm4[0],zero,ymm4[1],zero,ymm4[2],zero,ymm4[3],zero,ymm4[4],zero,ymm4[5],zero,ymm4[6],zero,ymm4[7],zero,ymm4[8],zero,ymm4[9],zero,ymm4[10],zero,ymm4[11],zero,ymm4[12],zero,ymm4[13],zero,ymm4[14],zero,ymm4[15],zero -; AVX512F-NEXT: vpslld %xmm1, %zmm2, %zmm1 -; AVX512F-NEXT: vpmovdw %zmm1, %ymm1 -; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 -; AVX512F-NEXT: vporq %zmm0, %zmm3, %zmm0 +; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm3, %zmm0 +; AVX512F-NEXT: vporq %zmm0, %zmm2, %zmm0 ; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: splatvar_funnnel_v32i16: ; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15] -; AVX512VL-NEXT: vpand %xmm2, %xmm1, %xmm3 -; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero -; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm4 -; AVX512VL-NEXT: vpsrlw %xmm3, %ymm4, %ymm5 -; AVX512VL-NEXT: vpsrlw %xmm3, %ymm0, %ymm3 -; AVX512VL-NEXT: vinserti64x4 $1, %ymm5, %zmm3, %zmm3 -; AVX512VL-NEXT: vpxor %xmm5, %xmm5, %xmm5 -; AVX512VL-NEXT: vpsubw %xmm1, %xmm5, %xmm1 -; AVX512VL-NEXT: vpand %xmm2, %xmm1, %xmm1 -; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero +; AVX512VL-NEXT: vpbroadcastw %xmm1, %xmm1 +; AVX512VL-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; AVX512VL-NEXT: vpsubw %xmm1, %xmm2, %xmm1 +; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero +; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm3 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero +; AVX512VL-NEXT: vpslld %xmm2, %zmm3, %zmm4 +; AVX512VL-NEXT: vpmovdw %zmm4, %ymm4 +; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm0 ; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero -; AVX512VL-NEXT: vpslld %xmm1, %zmm0, %zmm0 +; AVX512VL-NEXT: vpslld %xmm2, %zmm0, %zmm2 +; AVX512VL-NEXT: vpmovdw %zmm2, %ymm2 +; AVX512VL-NEXT: vinserti64x4 $1, %ymm2, %zmm4, %zmm2 +; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm4 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] +; AVX512VL-NEXT: vpsubw %xmm1, %xmm4, %xmm1 +; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero +; AVX512VL-NEXT: vpsrld %xmm1, %zmm3, %zmm3 +; AVX512VL-NEXT: vpmovdw %zmm3, %ymm3 +; AVX512VL-NEXT: vpsrld %xmm1, %zmm0, %zmm0 ; AVX512VL-NEXT: vpmovdw %zmm0, %ymm0 -; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm2 = ymm4[0],zero,ymm4[1],zero,ymm4[2],zero,ymm4[3],zero,ymm4[4],zero,ymm4[5],zero,ymm4[6],zero,ymm4[7],zero,ymm4[8],zero,ymm4[9],zero,ymm4[10],zero,ymm4[11],zero,ymm4[12],zero,ymm4[13],zero,ymm4[14],zero,ymm4[15],zero -; AVX512VL-NEXT: vpslld %xmm1, %zmm2, %zmm1 -; AVX512VL-NEXT: vpmovdw %zmm1, %ymm1 -; AVX512VL-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 -; AVX512VL-NEXT: vporq %zmm0, %zmm3, %zmm0 +; AVX512VL-NEXT: vinserti64x4 $1, %ymm0, %zmm3, %zmm0 +; AVX512VL-NEXT: vporq %zmm0, %zmm2, %zmm0 ; AVX512VL-NEXT: retq ; ; AVX512BW-LABEL: splatvar_funnnel_v32i16: @@ -741,37 +749,27 @@ define <32 x i16> @constant_funnnel_v32i16(<32 x i16> %x) nounwind { ; AVX512F-LABEL: constant_funnnel_v32i16: ; AVX512F: # %bb.0: ; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm1 -; AVX512F-NEXT: vmovdqa {{.*#+}} ymm2 = +; AVX512F-NEXT: vmovdqa {{.*#+}} ymm2 = [1,32768,16384,8192,4096,2048,1024,512,256,128,64,32,16,8,4,2] ; AVX512F-NEXT: vpmulhuw %ymm2, %ymm1, %ymm3 -; AVX512F-NEXT: vpblendw {{.*#+}} xmm4 = xmm1[0],xmm3[1,2,3,4,5,6,7] -; AVX512F-NEXT: vpblendd {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7] -; AVX512F-NEXT: vpmulhuw %ymm2, %ymm0, %ymm2 -; AVX512F-NEXT: vpblendw {{.*#+}} xmm4 = xmm0[0],xmm2[1,2,3,4,5,6,7] -; AVX512F-NEXT: vpblendd {{.*#+}} ymm2 = ymm4[0,1,2,3],ymm2[4,5,6,7] -; AVX512F-NEXT: vinserti64x4 $1, %ymm3, %zmm2, %zmm2 -; AVX512F-NEXT: vmovdqa {{.*#+}} ymm3 = [1,32768,16384,8192,4096,2048,1024,512,256,128,64,32,16,8,4,2] -; AVX512F-NEXT: vpmullw %ymm3, %ymm1, %ymm1 -; AVX512F-NEXT: vpmullw %ymm3, %ymm0, %ymm0 +; AVX512F-NEXT: vpmulhuw %ymm2, %ymm0, %ymm4 +; AVX512F-NEXT: vinserti64x4 $1, %ymm3, %zmm4, %zmm3 +; AVX512F-NEXT: vpmullw %ymm2, %ymm1, %ymm1 +; AVX512F-NEXT: vpmullw %ymm2, %ymm0, %ymm0 ; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 -; AVX512F-NEXT: vporq %zmm0, %zmm2, %zmm0 +; AVX512F-NEXT: vporq %zmm3, %zmm0, %zmm0 ; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: constant_funnnel_v32i16: ; AVX512VL: # %bb.0: ; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm1 -; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm2 = +; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm2 = [1,32768,16384,8192,4096,2048,1024,512,256,128,64,32,16,8,4,2] ; AVX512VL-NEXT: vpmulhuw %ymm2, %ymm1, %ymm3 -; AVX512VL-NEXT: vpblendw {{.*#+}} xmm4 = xmm1[0],xmm3[1,2,3,4,5,6,7] -; AVX512VL-NEXT: vpblendd {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7] -; AVX512VL-NEXT: vpmulhuw %ymm2, %ymm0, %ymm2 -; AVX512VL-NEXT: vpblendw {{.*#+}} xmm4 = xmm0[0],xmm2[1,2,3,4,5,6,7] -; AVX512VL-NEXT: vpblendd {{.*#+}} ymm2 = ymm4[0,1,2,3],ymm2[4,5,6,7] -; AVX512VL-NEXT: vinserti64x4 $1, %ymm3, %zmm2, %zmm2 -; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm3 = [1,32768,16384,8192,4096,2048,1024,512,256,128,64,32,16,8,4,2] -; AVX512VL-NEXT: vpmullw %ymm3, %ymm1, %ymm1 -; AVX512VL-NEXT: vpmullw %ymm3, %ymm0, %ymm0 +; AVX512VL-NEXT: vpmulhuw %ymm2, %ymm0, %ymm4 +; AVX512VL-NEXT: vinserti64x4 $1, %ymm3, %zmm4, %zmm3 +; AVX512VL-NEXT: vpmullw %ymm2, %ymm1, %ymm1 +; AVX512VL-NEXT: vpmullw %ymm2, %ymm0, %ymm0 ; AVX512VL-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 -; AVX512VL-NEXT: vporq %zmm0, %zmm2, %zmm0 +; AVX512VL-NEXT: vporq %zmm3, %zmm0, %zmm0 ; AVX512VL-NEXT: retq ; ; AVX512BW-LABEL: constant_funnnel_v32i16: @@ -1036,24 +1034,24 @@ define <16 x i32> @splatconstant_funnnel_v16i32(<16 x i32> %x) nounwind { define <32 x i16> @splatconstant_funnnel_v32i16(<32 x i16> %x) nounwind { ; AVX512F-LABEL: splatconstant_funnnel_v32i16: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vpsllw $9, %ymm0, %ymm1 +; AVX512F-NEXT: vpsrlw $7, %ymm0, %ymm1 ; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm2 -; AVX512F-NEXT: vpsllw $9, %ymm2, %ymm3 +; AVX512F-NEXT: vpsrlw $7, %ymm2, %ymm3 ; AVX512F-NEXT: vinserti64x4 $1, %ymm3, %zmm1, %zmm1 -; AVX512F-NEXT: vpsrlw $7, %ymm0, %ymm0 -; AVX512F-NEXT: vpsrlw $7, %ymm2, %ymm2 +; AVX512F-NEXT: vpsllw $9, %ymm0, %ymm0 +; AVX512F-NEXT: vpsllw $9, %ymm2, %ymm2 ; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0 ; AVX512F-NEXT: vporq %zmm1, %zmm0, %zmm0 ; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: splatconstant_funnnel_v32i16: ; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vpsllw $9, %ymm0, %ymm1 +; AVX512VL-NEXT: vpsrlw $7, %ymm0, %ymm1 ; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm2 -; AVX512VL-NEXT: vpsllw $9, %ymm2, %ymm3 +; AVX512VL-NEXT: vpsrlw $7, %ymm2, %ymm3 ; AVX512VL-NEXT: vinserti64x4 $1, %ymm3, %zmm1, %zmm1 -; AVX512VL-NEXT: vpsrlw $7, %ymm0, %ymm0 -; AVX512VL-NEXT: vpsrlw $7, %ymm2, %ymm2 +; AVX512VL-NEXT: vpsllw $9, %ymm0, %ymm0 +; AVX512VL-NEXT: vpsllw $9, %ymm2, %ymm2 ; AVX512VL-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0 ; AVX512VL-NEXT: vporq %zmm1, %zmm0, %zmm0 ; AVX512VL-NEXT: retq diff --git a/llvm/test/CodeGen/X86/vector-fshr-rot-sub128.ll b/llvm/test/CodeGen/X86/vector-fshr-rot-sub128.ll index 3368ec38f8af3..4d584a8d35e35 100644 --- a/llvm/test/CodeGen/X86/vector-fshr-rot-sub128.ll +++ b/llvm/test/CodeGen/X86/vector-fshr-rot-sub128.ll @@ -471,7 +471,7 @@ define <2 x i32> @splatconstant_funnnel_v2i32(<2 x i32> %x) nounwind { ; SSE2-NEXT: movdqa %xmm0, %xmm2 ; SSE2-NEXT: pslld $28, %xmm2 ; SSE2-NEXT: por %xmm1, %xmm2 -; SSE2-NEXT: movsd %xmm2, %xmm0 +; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm2[0],xmm0[1] ; SSE2-NEXT: retq ; ; SSE41-LABEL: splatconstant_funnnel_v2i32: @@ -550,7 +550,7 @@ define <2 x i32> @splatconstant_funnnel_v2i32(<2 x i32> %x) nounwind { ; X86-SSE2-NEXT: movdqa %xmm0, %xmm2 ; X86-SSE2-NEXT: pslld $28, %xmm2 ; X86-SSE2-NEXT: por %xmm1, %xmm2 -; X86-SSE2-NEXT: movsd %xmm2, %xmm0 +; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm2[0],xmm0[1] ; X86-SSE2-NEXT: retl %res = call <2 x i32> @llvm.fshr.v2i32(<2 x i32> %x, <2 x i32> %x, <2 x i32> ) ret <2 x i32> %res diff --git a/llvm/test/CodeGen/X86/vector-interleaved-load-i16-stride-4.ll b/llvm/test/CodeGen/X86/vector-interleaved-load-i16-stride-4.ll index 4b3e462a5cc9e..0bc686174fd1c 100644 --- a/llvm/test/CodeGen/X86/vector-interleaved-load-i16-stride-4.ll +++ b/llvm/test/CodeGen/X86/vector-interleaved-load-i16-stride-4.ll @@ -314,98 +314,100 @@ define void @vf8(<32 x i16>* %in.vec, <8 x i16>* %out.vec0, <8 x i16>* %out.vec1 ; ; AVX2-SLOW-LABEL: vf8: ; AVX2-SLOW: # %bb.0: -; AVX2-SLOW-NEXT: vmovdqa (%rdi), %xmm0 -; AVX2-SLOW-NEXT: vmovdqa 16(%rdi), %xmm1 -; AVX2-SLOW-NEXT: vmovdqa 32(%rdi), %xmm2 -; AVX2-SLOW-NEXT: vmovdqa 48(%rdi), %xmm3 -; AVX2-SLOW-NEXT: vpxor %xmm4, %xmm4, %xmm4 -; AVX2-SLOW-NEXT: vpblendw {{.*#+}} xmm5 = xmm3[0],xmm4[1,2,3],xmm3[4],xmm4[5,6,7] -; AVX2-SLOW-NEXT: vpblendw {{.*#+}} xmm6 = xmm2[0],xmm4[1,2,3],xmm2[4],xmm4[5,6,7] -; AVX2-SLOW-NEXT: vpackusdw %xmm5, %xmm6, %xmm5 -; AVX2-SLOW-NEXT: vpblendw {{.*#+}} xmm6 = xmm1[0],xmm4[1,2,3],xmm1[4],xmm4[5,6,7] -; AVX2-SLOW-NEXT: vpblendw {{.*#+}} xmm4 = xmm0[0],xmm4[1,2,3],xmm0[4],xmm4[5,6,7] -; AVX2-SLOW-NEXT: vpackusdw %xmm6, %xmm4, %xmm4 -; AVX2-SLOW-NEXT: vpackusdw %xmm5, %xmm4, %xmm8 -; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm5 = xmm3[0,2,2,3] +; AVX2-SLOW-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; AVX2-SLOW-NEXT: vpblendw {{.*#+}} ymm1 = mem[0],ymm0[1,2,3],mem[4],ymm0[5,6,7],mem[8],ymm0[9,10,11],mem[12],ymm0[13,14,15] +; AVX2-SLOW-NEXT: vextracti128 $1, %ymm1, %xmm2 +; AVX2-SLOW-NEXT: vpackusdw %xmm2, %xmm1, %xmm1 +; AVX2-SLOW-NEXT: vpblendw {{.*#+}} ymm0 = mem[0],ymm0[1,2,3],mem[4],ymm0[5,6,7],mem[8],ymm0[9,10,11],mem[12],ymm0[13,14,15] +; AVX2-SLOW-NEXT: vextracti128 $1, %ymm0, %xmm2 +; AVX2-SLOW-NEXT: vpackusdw %xmm2, %xmm0, %xmm0 +; AVX2-SLOW-NEXT: vpackusdw %xmm1, %xmm0, %xmm8 +; AVX2-SLOW-NEXT: vmovdqa (%rdi), %xmm1 +; AVX2-SLOW-NEXT: vmovdqa 16(%rdi), %xmm2 +; AVX2-SLOW-NEXT: vmovdqa 32(%rdi), %xmm3 +; AVX2-SLOW-NEXT: vmovdqa 48(%rdi), %xmm4 +; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm5 = xmm4[0,2,2,3] ; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm5 = xmm5[0,1,1,3,4,5,6,7] -; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm6 = xmm2[0,2,2,3] +; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm6 = xmm3[0,2,2,3] ; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm6 = xmm6[0,1,1,3,4,5,6,7] ; AVX2-SLOW-NEXT: vpunpckldq {{.*#+}} xmm5 = xmm6[0],xmm5[0],xmm6[1],xmm5[1] -; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm6 = xmm1[0,2,2,3] +; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm6 = xmm2[0,2,2,3] ; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm6 = xmm6[1,3,2,3,4,5,6,7] -; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm7 = xmm0[0,2,2,3] +; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm7 = xmm1[0,2,2,3] ; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm7 = xmm7[1,3,2,3,4,5,6,7] ; AVX2-SLOW-NEXT: vpunpckldq {{.*#+}} xmm6 = xmm7[0],xmm6[0],xmm7[1],xmm6[1] ; AVX2-SLOW-NEXT: vpblendd {{.*#+}} xmm5 = xmm6[0,1],xmm5[2,3] +; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm4 = xmm4[3,1,2,3] +; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm6 = xmm4[0,1,2,0,4,5,6,7] ; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[3,1,2,3] -; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm6 = xmm3[0,1,2,0,4,5,6,7] -; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[3,1,2,3] -; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm7 = xmm2[0,1,2,0,4,5,6,7] +; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm7 = xmm3[0,1,2,0,4,5,6,7] ; AVX2-SLOW-NEXT: vpunpckldq {{.*#+}} xmm6 = xmm7[0],xmm6[0],xmm7[1],xmm6[1] +; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[3,1,2,3] +; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm7 = xmm2[2,0,2,3,4,5,6,7] ; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[3,1,2,3] -; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm7 = xmm1[2,0,2,3,4,5,6,7] -; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,1,2,3] -; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm4 = xmm0[2,0,2,3,4,5,6,7] -; AVX2-SLOW-NEXT: vpunpckldq {{.*#+}} xmm4 = xmm4[0],xmm7[0],xmm4[1],xmm7[1] -; AVX2-SLOW-NEXT: vpblendd {{.*#+}} xmm4 = xmm4[0,1],xmm6[2,3] +; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm1[2,0,2,3,4,5,6,7] +; AVX2-SLOW-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm7[0],xmm0[1],xmm7[1] +; AVX2-SLOW-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm6[2,3] +; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm4 = xmm4[0,1,3,1,4,5,6,7] ; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm3 = xmm3[0,1,3,1,4,5,6,7] -; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm2 = xmm2[0,1,3,1,4,5,6,7] -; AVX2-SLOW-NEXT: vpunpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1] +; AVX2-SLOW-NEXT: vpunpckldq {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1] +; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm2 = xmm2[3,1,2,3,4,5,6,7] ; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[3,1,2,3,4,5,6,7] -; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[3,1,2,3,4,5,6,7] -; AVX2-SLOW-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] -; AVX2-SLOW-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3] +; AVX2-SLOW-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] +; AVX2-SLOW-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3] ; AVX2-SLOW-NEXT: vmovdqa %xmm8, (%rsi) ; AVX2-SLOW-NEXT: vmovdqa %xmm5, (%rdx) -; AVX2-SLOW-NEXT: vmovdqa %xmm4, (%rcx) -; AVX2-SLOW-NEXT: vmovdqa %xmm0, (%r8) +; AVX2-SLOW-NEXT: vmovdqa %xmm0, (%rcx) +; AVX2-SLOW-NEXT: vmovdqa %xmm1, (%r8) +; AVX2-SLOW-NEXT: vzeroupper ; AVX2-SLOW-NEXT: retq ; ; AVX2-FAST-LABEL: vf8: ; AVX2-FAST: # %bb.0: -; AVX2-FAST-NEXT: vmovdqa (%rdi), %xmm0 -; AVX2-FAST-NEXT: vmovdqa 16(%rdi), %xmm1 -; AVX2-FAST-NEXT: vmovdqa 32(%rdi), %xmm2 -; AVX2-FAST-NEXT: vmovdqa 48(%rdi), %xmm3 -; AVX2-FAST-NEXT: vpxor %xmm4, %xmm4, %xmm4 -; AVX2-FAST-NEXT: vpblendw {{.*#+}} xmm5 = xmm3[0],xmm4[1,2,3],xmm3[4],xmm4[5,6,7] -; AVX2-FAST-NEXT: vpblendw {{.*#+}} xmm6 = xmm2[0],xmm4[1,2,3],xmm2[4],xmm4[5,6,7] -; AVX2-FAST-NEXT: vpackusdw %xmm5, %xmm6, %xmm5 -; AVX2-FAST-NEXT: vpblendw {{.*#+}} xmm6 = xmm1[0],xmm4[1,2,3],xmm1[4],xmm4[5,6,7] -; AVX2-FAST-NEXT: vpblendw {{.*#+}} xmm4 = xmm0[0],xmm4[1,2,3],xmm0[4],xmm4[5,6,7] -; AVX2-FAST-NEXT: vpackusdw %xmm6, %xmm4, %xmm4 -; AVX2-FAST-NEXT: vpackusdw %xmm5, %xmm4, %xmm8 +; AVX2-FAST-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; AVX2-FAST-NEXT: vpblendw {{.*#+}} ymm1 = mem[0],ymm0[1,2,3],mem[4],ymm0[5,6,7],mem[8],ymm0[9,10,11],mem[12],ymm0[13,14,15] +; AVX2-FAST-NEXT: vextracti128 $1, %ymm1, %xmm2 +; AVX2-FAST-NEXT: vpackusdw %xmm2, %xmm1, %xmm1 +; AVX2-FAST-NEXT: vpblendw {{.*#+}} ymm0 = mem[0],ymm0[1,2,3],mem[4],ymm0[5,6,7],mem[8],ymm0[9,10,11],mem[12],ymm0[13,14,15] +; AVX2-FAST-NEXT: vextracti128 $1, %ymm0, %xmm2 +; AVX2-FAST-NEXT: vpackusdw %xmm2, %xmm0, %xmm0 +; AVX2-FAST-NEXT: vpackusdw %xmm1, %xmm0, %xmm8 +; AVX2-FAST-NEXT: vmovdqa (%rdi), %xmm1 +; AVX2-FAST-NEXT: vmovdqa 16(%rdi), %xmm2 +; AVX2-FAST-NEXT: vmovdqa 32(%rdi), %xmm3 +; AVX2-FAST-NEXT: vmovdqa 48(%rdi), %xmm4 ; AVX2-FAST-NEXT: vmovdqa {{.*#+}} xmm5 = [0,1,2,3,2,3,10,11,8,9,10,11,12,13,14,15] -; AVX2-FAST-NEXT: vpshufb %xmm5, %xmm3, %xmm6 -; AVX2-FAST-NEXT: vpshufb %xmm5, %xmm2, %xmm5 +; AVX2-FAST-NEXT: vpshufb %xmm5, %xmm4, %xmm6 +; AVX2-FAST-NEXT: vpshufb %xmm5, %xmm3, %xmm5 ; AVX2-FAST-NEXT: vpunpckldq {{.*#+}} xmm5 = xmm5[0],xmm6[0],xmm5[1],xmm6[1] ; AVX2-FAST-NEXT: vmovdqa {{.*#+}} xmm6 = [2,3,10,11,8,9,10,11,8,9,10,11,12,13,14,15] -; AVX2-FAST-NEXT: vpshufb %xmm6, %xmm1, %xmm7 -; AVX2-FAST-NEXT: vpshufb %xmm6, %xmm0, %xmm6 +; AVX2-FAST-NEXT: vpshufb %xmm6, %xmm2, %xmm7 +; AVX2-FAST-NEXT: vpshufb %xmm6, %xmm1, %xmm6 ; AVX2-FAST-NEXT: vpunpckldq {{.*#+}} xmm6 = xmm6[0],xmm7[0],xmm6[1],xmm7[1] ; AVX2-FAST-NEXT: vpblendd {{.*#+}} xmm5 = xmm6[0,1],xmm5[2,3] +; AVX2-FAST-NEXT: vpshufd {{.*#+}} xmm4 = xmm4[3,1,2,3] +; AVX2-FAST-NEXT: vpshuflw {{.*#+}} xmm6 = xmm4[0,1,2,0,4,5,6,7] ; AVX2-FAST-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[3,1,2,3] -; AVX2-FAST-NEXT: vpshuflw {{.*#+}} xmm6 = xmm3[0,1,2,0,4,5,6,7] -; AVX2-FAST-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[3,1,2,3] -; AVX2-FAST-NEXT: vpshuflw {{.*#+}} xmm7 = xmm2[0,1,2,0,4,5,6,7] +; AVX2-FAST-NEXT: vpshuflw {{.*#+}} xmm7 = xmm3[0,1,2,0,4,5,6,7] ; AVX2-FAST-NEXT: vpunpckldq {{.*#+}} xmm6 = xmm7[0],xmm6[0],xmm7[1],xmm6[1] +; AVX2-FAST-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[3,1,2,3] +; AVX2-FAST-NEXT: vpshuflw {{.*#+}} xmm7 = xmm2[2,0,2,3,4,5,6,7] ; AVX2-FAST-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[3,1,2,3] -; AVX2-FAST-NEXT: vpshuflw {{.*#+}} xmm7 = xmm1[2,0,2,3,4,5,6,7] -; AVX2-FAST-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,1,2,3] -; AVX2-FAST-NEXT: vpshuflw {{.*#+}} xmm4 = xmm0[2,0,2,3,4,5,6,7] -; AVX2-FAST-NEXT: vpunpckldq {{.*#+}} xmm4 = xmm4[0],xmm7[0],xmm4[1],xmm7[1] -; AVX2-FAST-NEXT: vpblendd {{.*#+}} xmm4 = xmm4[0,1],xmm6[2,3] +; AVX2-FAST-NEXT: vpshuflw {{.*#+}} xmm0 = xmm1[2,0,2,3,4,5,6,7] +; AVX2-FAST-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm7[0],xmm0[1],xmm7[1] +; AVX2-FAST-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm6[2,3] +; AVX2-FAST-NEXT: vpshuflw {{.*#+}} xmm4 = xmm4[0,1,3,1,4,5,6,7] ; AVX2-FAST-NEXT: vpshuflw {{.*#+}} xmm3 = xmm3[0,1,3,1,4,5,6,7] -; AVX2-FAST-NEXT: vpshuflw {{.*#+}} xmm2 = xmm2[0,1,3,1,4,5,6,7] -; AVX2-FAST-NEXT: vpunpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1] +; AVX2-FAST-NEXT: vpunpckldq {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1] +; AVX2-FAST-NEXT: vpshuflw {{.*#+}} xmm2 = xmm2[3,1,2,3,4,5,6,7] ; AVX2-FAST-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[3,1,2,3,4,5,6,7] -; AVX2-FAST-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[3,1,2,3,4,5,6,7] -; AVX2-FAST-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] -; AVX2-FAST-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3] +; AVX2-FAST-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] +; AVX2-FAST-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3] ; AVX2-FAST-NEXT: vmovdqa %xmm8, (%rsi) ; AVX2-FAST-NEXT: vmovdqa %xmm5, (%rdx) -; AVX2-FAST-NEXT: vmovdqa %xmm4, (%rcx) -; AVX2-FAST-NEXT: vmovdqa %xmm0, (%r8) +; AVX2-FAST-NEXT: vmovdqa %xmm0, (%rcx) +; AVX2-FAST-NEXT: vmovdqa %xmm1, (%r8) +; AVX2-FAST-NEXT: vzeroupper ; AVX2-FAST-NEXT: retq ; ; AVX512-LABEL: vf8: @@ -543,278 +545,281 @@ define void @vf16(<64 x i16>* %in.vec, <16 x i16>* %out.vec0, <16 x i16>* %out.v ; ; AVX1-LABEL: vf16: ; AVX1: # %bb.0: -; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX1-NEXT: vmovdqa 112(%rdi), %xmm12 -; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm12[0],xmm1[1,2,3],xmm12[4],xmm1[5,6,7] -; AVX1-NEXT: vmovdqa 96(%rdi), %xmm4 -; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm4[0],xmm1[1,2,3],xmm4[4],xmm1[5,6,7] -; AVX1-NEXT: vpackusdw %xmm0, %xmm2, %xmm0 -; AVX1-NEXT: vmovdqa 80(%rdi), %xmm5 -; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm5[0],xmm1[1,2,3],xmm5[4],xmm1[5,6,7] -; AVX1-NEXT: vmovdqa 64(%rdi), %xmm6 -; AVX1-NEXT: vpblendw {{.*#+}} xmm7 = xmm6[0],xmm1[1,2,3],xmm6[4],xmm1[5,6,7] -; AVX1-NEXT: vpackusdw %xmm2, %xmm7, %xmm2 -; AVX1-NEXT: vpackusdw %xmm0, %xmm2, %xmm8 -; AVX1-NEXT: vmovdqa (%rdi), %xmm10 -; AVX1-NEXT: vmovdqa 16(%rdi), %xmm11 +; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; AVX1-NEXT: vmovdqa 112(%rdi), %xmm5 +; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm5[0],xmm2[1,2,3],xmm5[4],xmm2[5,6,7] +; AVX1-NEXT: vmovdqa 96(%rdi), %xmm6 +; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm6[0],xmm2[1,2,3],xmm6[4],xmm2[5,6,7] +; AVX1-NEXT: vpackusdw %xmm0, %xmm1, %xmm1 +; AVX1-NEXT: vmovdqa 80(%rdi), %xmm7 +; AVX1-NEXT: vpblendw {{.*#+}} xmm3 = xmm7[0],xmm2[1,2,3],xmm7[4],xmm2[5,6,7] +; AVX1-NEXT: vmovdqa 64(%rdi), %xmm0 +; AVX1-NEXT: vpblendw {{.*#+}} xmm4 = xmm0[0],xmm2[1,2,3],xmm0[4],xmm2[5,6,7] +; AVX1-NEXT: vpackusdw %xmm3, %xmm4, %xmm3 +; AVX1-NEXT: vpackusdw %xmm1, %xmm3, %xmm1 +; AVX1-NEXT: vmovdqa %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX1-NEXT: vmovdqa (%rdi), %xmm11 +; AVX1-NEXT: vmovdqa 16(%rdi), %xmm12 ; AVX1-NEXT: vmovdqa 32(%rdi), %xmm13 -; AVX1-NEXT: vmovdqa 48(%rdi), %xmm0 -; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm0[0],xmm1[1,2,3],xmm0[4],xmm1[5,6,7] -; AVX1-NEXT: vpblendw {{.*#+}} xmm3 = xmm13[0],xmm1[1,2,3],xmm13[4],xmm1[5,6,7] -; AVX1-NEXT: vpackusdw %xmm2, %xmm3, %xmm2 -; AVX1-NEXT: vpblendw {{.*#+}} xmm3 = xmm11[0],xmm1[1,2,3],xmm11[4],xmm1[5,6,7] -; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm10[0],xmm1[1,2,3],xmm10[4],xmm1[5,6,7] -; AVX1-NEXT: vpackusdw %xmm3, %xmm1, %xmm1 -; AVX1-NEXT: vpackusdw %xmm2, %xmm1, %xmm1 -; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm1[0,1],ymm8[0,1] -; AVX1-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill -; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm12[0,2,2,3] -; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,1,1,3,4,5,6,7] -; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm4[0,2,2,3] -; AVX1-NEXT: vpshuflw {{.*#+}} xmm2 = xmm2[0,1,1,3,4,5,6,7] -; AVX1-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] +; AVX1-NEXT: vmovdqa 48(%rdi), %xmm1 +; AVX1-NEXT: vpblendw {{.*#+}} xmm3 = xmm1[0],xmm2[1,2,3],xmm1[4],xmm2[5,6,7] +; AVX1-NEXT: vpblendw {{.*#+}} xmm4 = xmm13[0],xmm2[1,2,3],xmm13[4],xmm2[5,6,7] +; AVX1-NEXT: vpackusdw %xmm3, %xmm4, %xmm3 +; AVX1-NEXT: vpblendw {{.*#+}} xmm4 = xmm12[0],xmm2[1,2,3],xmm12[4],xmm2[5,6,7] +; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm11[0],xmm2[1,2,3],xmm11[4],xmm2[5,6,7] +; AVX1-NEXT: vpackusdw %xmm4, %xmm2, %xmm2 +; AVX1-NEXT: vpackusdw %xmm3, %xmm2, %xmm9 ; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm5[0,2,2,3] -; AVX1-NEXT: vpshuflw {{.*#+}} xmm2 = xmm2[1,3,2,3,4,5,6,7] -; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm6[0,2,2,3] -; AVX1-NEXT: vpshuflw {{.*#+}} xmm3 = xmm3[1,3,2,3,4,5,6,7] -; AVX1-NEXT: vpunpckldq {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[1],xmm2[1] -; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm2[0,1,2,3],xmm1[4,5,6,7] -; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm9 -; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[0,2,2,3] ; AVX1-NEXT: vpshuflw {{.*#+}} xmm2 = xmm2[0,1,1,3,4,5,6,7] -; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm13[0,2,2,3] +; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm6[0,2,2,3] ; AVX1-NEXT: vpshuflw {{.*#+}} xmm3 = xmm3[0,1,1,3,4,5,6,7] ; AVX1-NEXT: vpunpckldq {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[1],xmm2[1] -; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm11[0,2,2,3] +; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm7[0,2,2,3] ; AVX1-NEXT: vpshuflw {{.*#+}} xmm3 = xmm3[1,3,2,3,4,5,6,7] -; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm10[0,2,2,3] -; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[1,3,2,3,4,5,6,7] -; AVX1-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1] -; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm2[4,5,6,7] -; AVX1-NEXT: vblendps {{.*#+}} ymm9 = ymm1[0,1,2,3],ymm9[4,5,6,7] -; AVX1-NEXT: vpshufd {{.*#+}} xmm14 = xmm12[3,1,2,3] -; AVX1-NEXT: vpshuflw {{.*#+}} xmm2 = xmm14[0,1,2,0,4,5,6,7] -; AVX1-NEXT: vpshufd {{.*#+}} xmm15 = xmm4[3,1,2,3] -; AVX1-NEXT: vpshuflw {{.*#+}} xmm4 = xmm15[0,1,2,0,4,5,6,7] -; AVX1-NEXT: vpunpckldq {{.*#+}} xmm2 = xmm4[0],xmm2[0],xmm4[1],xmm2[1] -; AVX1-NEXT: vpshufd {{.*#+}} xmm8 = xmm5[3,1,2,3] -; AVX1-NEXT: vpshuflw {{.*#+}} xmm5 = xmm8[2,0,2,3,4,5,6,7] -; AVX1-NEXT: vpshufd {{.*#+}} xmm6 = xmm6[3,1,2,3] -; AVX1-NEXT: vpshuflw {{.*#+}} xmm7 = xmm6[2,0,2,3,4,5,6,7] -; AVX1-NEXT: vpunpckldq {{.*#+}} xmm5 = xmm7[0],xmm5[0],xmm7[1],xmm5[1] -; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm5[0,1,2,3],xmm2[4,5,6,7] -; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm12 +; AVX1-NEXT: vpshufd {{.*#+}} xmm4 = xmm0[0,2,2,3] +; AVX1-NEXT: vpshuflw {{.*#+}} xmm4 = xmm4[1,3,2,3,4,5,6,7] +; AVX1-NEXT: vpunpckldq {{.*#+}} xmm3 = xmm4[0],xmm3[0],xmm4[1],xmm3[1] +; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm3[0,1,2,3],xmm2[4,5,6,7] +; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm10 +; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm1[0,2,2,3] +; AVX1-NEXT: vpshuflw {{.*#+}} xmm3 = xmm3[0,1,1,3,4,5,6,7] +; AVX1-NEXT: vpshufd {{.*#+}} xmm4 = xmm13[0,2,2,3] +; AVX1-NEXT: vpshuflw {{.*#+}} xmm4 = xmm4[0,1,1,3,4,5,6,7] +; AVX1-NEXT: vpunpckldq {{.*#+}} xmm3 = xmm4[0],xmm3[0],xmm4[1],xmm3[1] +; AVX1-NEXT: vpshufd {{.*#+}} xmm4 = xmm12[0,2,2,3] +; AVX1-NEXT: vpshuflw {{.*#+}} xmm4 = xmm4[1,3,2,3,4,5,6,7] +; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm11[0,2,2,3] +; AVX1-NEXT: vpshuflw {{.*#+}} xmm2 = xmm2[1,3,2,3,4,5,6,7] +; AVX1-NEXT: vpunpckldq {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1] +; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm3[4,5,6,7] +; AVX1-NEXT: vblendps {{.*#+}} ymm10 = ymm2[0,1,2,3],ymm10[4,5,6,7] +; AVX1-NEXT: vpshufd {{.*#+}} xmm15 = xmm5[3,1,2,3] +; AVX1-NEXT: vpshuflw {{.*#+}} xmm3 = xmm15[0,1,2,0,4,5,6,7] +; AVX1-NEXT: vpshufd {{.*#+}} xmm8 = xmm6[3,1,2,3] +; AVX1-NEXT: vpshuflw {{.*#+}} xmm5 = xmm8[0,1,2,0,4,5,6,7] +; AVX1-NEXT: vpunpckldq {{.*#+}} xmm3 = xmm5[0],xmm3[0],xmm5[1],xmm3[1] +; AVX1-NEXT: vpshufd {{.*#+}} xmm5 = xmm7[3,1,2,3] +; AVX1-NEXT: vpshuflw {{.*#+}} xmm6 = xmm5[2,0,2,3,4,5,6,7] ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,1,2,3] -; AVX1-NEXT: vpshuflw {{.*#+}} xmm5 = xmm0[0,1,2,0,4,5,6,7] +; AVX1-NEXT: vpshuflw {{.*#+}} xmm7 = xmm0[2,0,2,3,4,5,6,7] +; AVX1-NEXT: vpunpckldq {{.*#+}} xmm6 = xmm7[0],xmm6[0],xmm7[1],xmm6[1] +; AVX1-NEXT: vpblendw {{.*#+}} xmm3 = xmm6[0,1,2,3],xmm3[4,5,6,7] +; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm14 +; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[3,1,2,3] +; AVX1-NEXT: vpshuflw {{.*#+}} xmm6 = xmm1[0,1,2,0,4,5,6,7] ; AVX1-NEXT: vpshufd {{.*#+}} xmm7 = xmm13[3,1,2,3] -; AVX1-NEXT: vpshuflw {{.*#+}} xmm2 = xmm7[0,1,2,0,4,5,6,7] -; AVX1-NEXT: vpunpckldq {{.*#+}} xmm2 = xmm2[0],xmm5[0],xmm2[1],xmm5[1] -; AVX1-NEXT: vpshufd {{.*#+}} xmm5 = xmm11[3,1,2,3] -; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm5[2,0,2,3,4,5,6,7] -; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm10[3,1,2,3] +; AVX1-NEXT: vpshuflw {{.*#+}} xmm3 = xmm7[0,1,2,0,4,5,6,7] +; AVX1-NEXT: vpunpckldq {{.*#+}} xmm13 = xmm3[0],xmm6[0],xmm3[1],xmm6[1] +; AVX1-NEXT: vpshufd {{.*#+}} xmm6 = xmm12[3,1,2,3] +; AVX1-NEXT: vpshuflw {{.*#+}} xmm2 = xmm6[2,0,2,3,4,5,6,7] +; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm11[3,1,2,3] ; AVX1-NEXT: vpshuflw {{.*#+}} xmm4 = xmm3[2,0,2,3,4,5,6,7] -; AVX1-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm4[0],xmm1[0],xmm4[1],xmm1[1] -; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm2[4,5,6,7] -; AVX1-NEXT: vblendps {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm12[4,5,6,7] -; AVX1-NEXT: vpshuflw {{.*#+}} xmm2 = xmm14[0,1,3,1,4,5,6,7] -; AVX1-NEXT: vpshuflw {{.*#+}} xmm4 = xmm15[0,1,3,1,4,5,6,7] ; AVX1-NEXT: vpunpckldq {{.*#+}} xmm2 = xmm4[0],xmm2[0],xmm4[1],xmm2[1] -; AVX1-NEXT: vpshuflw {{.*#+}} xmm4 = xmm8[3,1,2,3,4,5,6,7] -; AVX1-NEXT: vpshuflw {{.*#+}} xmm6 = xmm6[3,1,2,3,4,5,6,7] -; AVX1-NEXT: vpunpckldq {{.*#+}} xmm4 = xmm6[0],xmm4[0],xmm6[1],xmm4[1] -; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm4[0,1,2,3],xmm2[4,5,6,7] -; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm2 -; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,1,3,1,4,5,6,7] -; AVX1-NEXT: vpshuflw {{.*#+}} xmm4 = xmm7[0,1,3,1,4,5,6,7] -; AVX1-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm4[0],xmm0[0],xmm4[1],xmm0[1] +; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm13[4,5,6,7] +; AVX1-NEXT: vblendps {{.*#+}} ymm11 = ymm2[0,1,2,3],ymm14[4,5,6,7] +; AVX1-NEXT: vpshuflw {{.*#+}} xmm4 = xmm15[0,1,3,1,4,5,6,7] +; AVX1-NEXT: vpshuflw {{.*#+}} xmm2 = xmm8[0,1,3,1,4,5,6,7] +; AVX1-NEXT: vpunpckldq {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1] ; AVX1-NEXT: vpshuflw {{.*#+}} xmm4 = xmm5[3,1,2,3,4,5,6,7] +; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[3,1,2,3,4,5,6,7] +; AVX1-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1] +; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm2[4,5,6,7] +; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 +; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,1,3,1,4,5,6,7] +; AVX1-NEXT: vpshuflw {{.*#+}} xmm2 = xmm7[0,1,3,1,4,5,6,7] +; AVX1-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] +; AVX1-NEXT: vpshuflw {{.*#+}} xmm2 = xmm6[3,1,2,3,4,5,6,7] ; AVX1-NEXT: vpshuflw {{.*#+}} xmm3 = xmm3[3,1,2,3,4,5,6,7] -; AVX1-NEXT: vpunpckldq {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1] -; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm3[0,1,2,3],xmm0[4,5,6,7] -; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm2[4,5,6,7] -; AVX1-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm2 # 32-byte Reload -; AVX1-NEXT: vmovaps %ymm2, (%rsi) -; AVX1-NEXT: vmovaps %ymm9, (%rdx) -; AVX1-NEXT: vmovaps %ymm1, (%rcx) +; AVX1-NEXT: vpunpckldq {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[1],xmm2[1] +; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm2[0,1,2,3],xmm1[4,5,6,7] +; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7] +; AVX1-NEXT: vmovdqa %xmm9, (%rsi) +; AVX1-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload +; AVX1-NEXT: vmovaps %xmm1, 16(%rsi) +; AVX1-NEXT: vmovaps %ymm10, (%rdx) +; AVX1-NEXT: vmovaps %ymm11, (%rcx) ; AVX1-NEXT: vmovaps %ymm0, (%r8) ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq ; ; AVX2-SLOW-LABEL: vf16: ; AVX2-SLOW: # %bb.0: -; AVX2-SLOW-NEXT: vmovdqa (%rdi), %xmm10 -; AVX2-SLOW-NEXT: vmovdqa 16(%rdi), %xmm11 -; AVX2-SLOW-NEXT: vmovdqa 32(%rdi), %xmm12 -; AVX2-SLOW-NEXT: vmovdqa 48(%rdi), %xmm13 ; AVX2-SLOW-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; AVX2-SLOW-NEXT: vpblendw {{.*#+}} xmm2 = xmm13[0],xmm0[1,2,3],xmm13[4],xmm0[5,6,7] -; AVX2-SLOW-NEXT: vpblendw {{.*#+}} xmm5 = xmm12[0],xmm0[1,2,3],xmm12[4],xmm0[5,6,7] -; AVX2-SLOW-NEXT: vpackusdw %xmm2, %xmm5, %xmm2 -; AVX2-SLOW-NEXT: vpblendw {{.*#+}} xmm5 = xmm11[0],xmm0[1,2,3],xmm11[4],xmm0[5,6,7] -; AVX2-SLOW-NEXT: vpblendw {{.*#+}} xmm6 = xmm10[0],xmm0[1,2,3],xmm10[4],xmm0[5,6,7] -; AVX2-SLOW-NEXT: vpackusdw %xmm5, %xmm6, %xmm5 -; AVX2-SLOW-NEXT: vpackusdw %xmm2, %xmm5, %xmm8 -; AVX2-SLOW-NEXT: vmovdqa 112(%rdi), %xmm6 -; AVX2-SLOW-NEXT: vpblendw {{.*#+}} xmm2 = xmm6[0],xmm0[1,2,3],xmm6[4],xmm0[5,6,7] -; AVX2-SLOW-NEXT: vmovdqa 96(%rdi), %xmm7 -; AVX2-SLOW-NEXT: vpblendw {{.*#+}} xmm5 = xmm7[0],xmm0[1,2,3],xmm7[4],xmm0[5,6,7] -; AVX2-SLOW-NEXT: vpackusdw %xmm2, %xmm5, %xmm2 +; AVX2-SLOW-NEXT: vpblendw {{.*#+}} ymm1 = mem[0],ymm0[1,2,3],mem[4],ymm0[5,6,7],mem[8],ymm0[9,10,11],mem[12],ymm0[13,14,15] +; AVX2-SLOW-NEXT: vextracti128 $1, %ymm1, %xmm2 +; AVX2-SLOW-NEXT: vpackusdw %xmm2, %xmm1, %xmm1 +; AVX2-SLOW-NEXT: vpblendw {{.*#+}} ymm2 = mem[0],ymm0[1,2,3],mem[4],ymm0[5,6,7],mem[8],ymm0[9,10,11],mem[12],ymm0[13,14,15] +; AVX2-SLOW-NEXT: vextracti128 $1, %ymm2, %xmm3 +; AVX2-SLOW-NEXT: vpackusdw %xmm3, %xmm2, %xmm2 +; AVX2-SLOW-NEXT: vpackusdw %xmm1, %xmm2, %xmm1 +; AVX2-SLOW-NEXT: vpblendw {{.*#+}} ymm2 = mem[0],ymm0[1,2,3],mem[4],ymm0[5,6,7],mem[8],ymm0[9,10,11],mem[12],ymm0[13,14,15] +; AVX2-SLOW-NEXT: vextracti128 $1, %ymm2, %xmm3 +; AVX2-SLOW-NEXT: vpackusdw %xmm3, %xmm2, %xmm2 ; AVX2-SLOW-NEXT: vpackusdw %xmm2, %xmm2, %xmm2 -; AVX2-SLOW-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm9 -; AVX2-SLOW-NEXT: vmovdqa 80(%rdi), %xmm2 -; AVX2-SLOW-NEXT: vpblendw {{.*#+}} xmm1 = xmm2[0],xmm0[1,2,3],xmm2[4],xmm0[5,6,7] -; AVX2-SLOW-NEXT: vmovdqa 64(%rdi), %xmm5 -; AVX2-SLOW-NEXT: vpblendw {{.*#+}} xmm0 = xmm5[0],xmm0[1,2,3],xmm5[4],xmm0[5,6,7] -; AVX2-SLOW-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 +; AVX2-SLOW-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2 +; AVX2-SLOW-NEXT: vpblendw {{.*#+}} ymm0 = mem[0],ymm0[1,2,3],mem[4],ymm0[5,6,7],mem[8],ymm0[9,10,11],mem[12],ymm0[13,14,15] +; AVX2-SLOW-NEXT: vextracti128 $1, %ymm0, %xmm3 +; AVX2-SLOW-NEXT: vpackusdw %xmm3, %xmm0, %xmm0 ; AVX2-SLOW-NEXT: vpackusdw %xmm0, %xmm0, %xmm0 ; AVX2-SLOW-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 -; AVX2-SLOW-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5],ymm9[6,7] -; AVX2-SLOW-NEXT: vpblendd {{.*#+}} ymm0 = ymm8[0,1,2,3],ymm0[4,5,6,7] +; AVX2-SLOW-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5],ymm2[6,7] +; AVX2-SLOW-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7] ; AVX2-SLOW-NEXT: vmovdqu %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill -; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm6[0,2,2,3] +; AVX2-SLOW-NEXT: vmovdqa (%rdi), %xmm9 +; AVX2-SLOW-NEXT: vmovdqa 16(%rdi), %xmm10 +; AVX2-SLOW-NEXT: vmovdqa 32(%rdi), %xmm11 +; AVX2-SLOW-NEXT: vmovdqa 48(%rdi), %xmm12 +; AVX2-SLOW-NEXT: vmovdqa 112(%rdi), %xmm6 +; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm4 = xmm6[0,2,2,3] +; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm4 = xmm4[0,1,1,3,4,5,6,7] +; AVX2-SLOW-NEXT: vmovdqa 96(%rdi), %xmm7 +; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm7[0,2,2,3] ; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,1,1,3,4,5,6,7] -; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm1 = xmm7[0,2,2,3] -; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,1,1,3,4,5,6,7] -; AVX2-SLOW-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] +; AVX2-SLOW-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1] ; AVX2-SLOW-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 -; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm1 = xmm2[0,2,2,3] +; AVX2-SLOW-NEXT: vmovdqa 80(%rdi), %xmm4 +; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm1 = xmm4[0,2,2,3] ; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[1,3,2,3,4,5,6,7] -; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm3 = xmm5[0,2,2,3] +; AVX2-SLOW-NEXT: vmovdqa 64(%rdi), %xmm2 +; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm3 = xmm2[0,2,2,3] ; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm3 = xmm3[1,3,2,3,4,5,6,7] ; AVX2-SLOW-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm3[0],xmm1[0],xmm3[1],xmm1[1] ; AVX2-SLOW-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1 ; AVX2-SLOW-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5],ymm0[6,7] -; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm1 = xmm13[0,2,2,3] +; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm1 = xmm12[0,2,2,3] ; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,1,1,3,4,5,6,7] -; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm3 = xmm12[0,2,2,3] +; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm3 = xmm11[0,2,2,3] ; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm3 = xmm3[0,1,1,3,4,5,6,7] ; AVX2-SLOW-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm3[0],xmm1[0],xmm3[1],xmm1[1] -; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm3 = xmm11[0,2,2,3] +; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm3 = xmm10[0,2,2,3] ; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm3 = xmm3[1,3,2,3,4,5,6,7] -; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm4 = xmm10[0,2,2,3] -; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm4 = xmm4[1,3,2,3,4,5,6,7] -; AVX2-SLOW-NEXT: vpunpckldq {{.*#+}} xmm3 = xmm4[0],xmm3[0],xmm4[1],xmm3[1] +; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm5 = xmm9[0,2,2,3] +; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm5 = xmm5[1,3,2,3,4,5,6,7] +; AVX2-SLOW-NEXT: vpunpckldq {{.*#+}} xmm3 = xmm5[0],xmm3[0],xmm5[1],xmm3[1] ; AVX2-SLOW-NEXT: vpblendd {{.*#+}} xmm1 = xmm3[0,1],xmm1[2,3] -; AVX2-SLOW-NEXT: vpblendd {{.*#+}} ymm9 = ymm1[0,1,2,3],ymm0[4,5,6,7] +; AVX2-SLOW-NEXT: vpblendd {{.*#+}} ymm13 = ymm1[0,1,2,3],ymm0[4,5,6,7] ; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm15 = xmm6[3,1,2,3] -; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm1 = xmm15[0,1,2,0,4,5,6,7] +; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm3 = xmm15[0,1,2,0,4,5,6,7] ; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm8 = xmm7[3,1,2,3] -; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm4 = xmm8[0,1,2,0,4,5,6,7] -; AVX2-SLOW-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm4[0],xmm1[0],xmm4[1],xmm1[1] -; AVX2-SLOW-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1 +; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm6 = xmm8[0,1,2,0,4,5,6,7] +; AVX2-SLOW-NEXT: vpunpckldq {{.*#+}} xmm3 = xmm6[0],xmm3[0],xmm6[1],xmm3[1] +; AVX2-SLOW-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm3 +; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm4 = xmm4[3,1,2,3] +; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm6 = xmm4[2,0,2,3,4,5,6,7] ; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[3,1,2,3] -; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm4 = xmm2[2,0,2,3,4,5,6,7] -; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm5 = xmm5[3,1,2,3] -; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm6 = xmm5[2,0,2,3,4,5,6,7] -; AVX2-SLOW-NEXT: vpunpckldq {{.*#+}} xmm4 = xmm6[0],xmm4[0],xmm6[1],xmm4[1] -; AVX2-SLOW-NEXT: vinserti128 $1, %xmm4, %ymm0, %ymm4 -; AVX2-SLOW-NEXT: vpblendd {{.*#+}} ymm14 = ymm4[0,1,2,3,4,5],ymm1[6,7] -; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm13 = xmm13[3,1,2,3] -; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm6 = xmm13[0,1,2,0,4,5,6,7] -; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm7 = xmm12[3,1,2,3] -; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm1 = xmm7[0,1,2,0,4,5,6,7] -; AVX2-SLOW-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm1[0],xmm6[0],xmm1[1],xmm6[1] -; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm6 = xmm11[3,1,2,3] -; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm6[2,0,2,3,4,5,6,7] -; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm4 = xmm10[3,1,2,3] -; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm3 = xmm4[2,0,2,3,4,5,6,7] -; AVX2-SLOW-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm3[0],xmm0[0],xmm3[1],xmm0[1] -; AVX2-SLOW-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] -; AVX2-SLOW-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm14[4,5,6,7] -; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm1 = xmm15[0,1,3,1,4,5,6,7] -; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm3 = xmm8[0,1,3,1,4,5,6,7] -; AVX2-SLOW-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm3[0],xmm1[0],xmm3[1],xmm1[1] -; AVX2-SLOW-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1 +; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm7 = xmm2[2,0,2,3,4,5,6,7] +; AVX2-SLOW-NEXT: vpunpckldq {{.*#+}} xmm6 = xmm7[0],xmm6[0],xmm7[1],xmm6[1] +; AVX2-SLOW-NEXT: vinserti128 $1, %xmm6, %ymm0, %ymm6 +; AVX2-SLOW-NEXT: vpblendd {{.*#+}} ymm14 = ymm6[0,1,2,3,4,5],ymm3[6,7] +; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm12 = xmm12[3,1,2,3] +; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm7 = xmm12[0,1,2,0,4,5,6,7] +; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm11[3,1,2,3] +; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm3 = xmm0[0,1,2,0,4,5,6,7] +; AVX2-SLOW-NEXT: vpunpckldq {{.*#+}} xmm3 = xmm3[0],xmm7[0],xmm3[1],xmm7[1] +; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm7 = xmm10[3,1,2,3] +; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm1 = xmm7[2,0,2,3,4,5,6,7] +; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm6 = xmm9[3,1,2,3] +; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm5 = xmm6[2,0,2,3,4,5,6,7] +; AVX2-SLOW-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm5[0],xmm1[0],xmm5[1],xmm1[1] +; AVX2-SLOW-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3] +; AVX2-SLOW-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm14[4,5,6,7] +; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm3 = xmm15[0,1,3,1,4,5,6,7] +; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm5 = xmm8[0,1,3,1,4,5,6,7] +; AVX2-SLOW-NEXT: vpunpckldq {{.*#+}} xmm3 = xmm5[0],xmm3[0],xmm5[1],xmm3[1] +; AVX2-SLOW-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm3 +; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm4 = xmm4[3,1,2,3,4,5,6,7] ; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm2 = xmm2[3,1,2,3,4,5,6,7] -; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm3 = xmm5[3,1,2,3,4,5,6,7] -; AVX2-SLOW-NEXT: vpunpckldq {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[1],xmm2[1] +; AVX2-SLOW-NEXT: vpunpckldq {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1] ; AVX2-SLOW-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2 -; AVX2-SLOW-NEXT: vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3,4,5],ymm1[6,7] -; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm2 = xmm13[0,1,3,1,4,5,6,7] -; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm3 = xmm7[0,1,3,1,4,5,6,7] -; AVX2-SLOW-NEXT: vpunpckldq {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[1],xmm2[1] -; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm3 = xmm6[3,1,2,3,4,5,6,7] -; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm4 = xmm4[3,1,2,3,4,5,6,7] +; AVX2-SLOW-NEXT: vpblendd {{.*#+}} ymm2 = ymm2[0,1,2,3,4,5],ymm3[6,7] +; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm3 = xmm12[0,1,3,1,4,5,6,7] +; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,1,3,1,4,5,6,7] +; AVX2-SLOW-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1] +; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm3 = xmm7[3,1,2,3,4,5,6,7] +; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm4 = xmm6[3,1,2,3,4,5,6,7] ; AVX2-SLOW-NEXT: vpunpckldq {{.*#+}} xmm3 = xmm4[0],xmm3[0],xmm4[1],xmm3[1] -; AVX2-SLOW-NEXT: vpblendd {{.*#+}} xmm2 = xmm3[0,1],xmm2[2,3] -; AVX2-SLOW-NEXT: vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7] +; AVX2-SLOW-NEXT: vpblendd {{.*#+}} xmm0 = xmm3[0,1],xmm0[2,3] +; AVX2-SLOW-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm2[4,5,6,7] ; AVX2-SLOW-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm2 # 32-byte Reload ; AVX2-SLOW-NEXT: vmovaps %ymm2, (%rsi) -; AVX2-SLOW-NEXT: vmovdqa %ymm9, (%rdx) -; AVX2-SLOW-NEXT: vmovdqa %ymm0, (%rcx) -; AVX2-SLOW-NEXT: vmovdqa %ymm1, (%r8) +; AVX2-SLOW-NEXT: vmovdqa %ymm13, (%rdx) +; AVX2-SLOW-NEXT: vmovdqa %ymm1, (%rcx) +; AVX2-SLOW-NEXT: vmovdqa %ymm0, (%r8) ; AVX2-SLOW-NEXT: vzeroupper ; AVX2-SLOW-NEXT: retq ; ; AVX2-FAST-ALL-LABEL: vf16: ; AVX2-FAST-ALL: # %bb.0: -; AVX2-FAST-ALL-NEXT: vmovdqa 64(%rdi), %ymm8 -; AVX2-FAST-ALL-NEXT: vmovdqa 96(%rdi), %ymm13 -; AVX2-FAST-ALL-NEXT: vmovdqa (%rdi), %xmm14 -; AVX2-FAST-ALL-NEXT: vmovdqa 16(%rdi), %xmm1 -; AVX2-FAST-ALL-NEXT: vmovdqa 32(%rdi), %xmm3 +; AVX2-FAST-ALL-NEXT: vmovdqa 64(%rdi), %ymm11 +; AVX2-FAST-ALL-NEXT: vmovdqa 96(%rdi), %ymm12 +; AVX2-FAST-ALL-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; AVX2-FAST-ALL-NEXT: vpblendw {{.*#+}} ymm3 = mem[0],ymm0[1,2,3],mem[4],ymm0[5,6,7],mem[8],ymm0[9,10,11],mem[12],ymm0[13,14,15] +; AVX2-FAST-ALL-NEXT: vextracti128 $1, %ymm3, %xmm4 +; AVX2-FAST-ALL-NEXT: vpackusdw %xmm4, %xmm3, %xmm3 +; AVX2-FAST-ALL-NEXT: vpblendw {{.*#+}} ymm0 = mem[0],ymm0[1,2,3],mem[4],ymm0[5,6,7],mem[8],ymm0[9,10,11],mem[12],ymm0[13,14,15] +; AVX2-FAST-ALL-NEXT: vextracti128 $1, %ymm0, %xmm4 +; AVX2-FAST-ALL-NEXT: vpackusdw %xmm4, %xmm0, %xmm0 +; AVX2-FAST-ALL-NEXT: vpackusdw %xmm3, %xmm0, %xmm0 +; AVX2-FAST-ALL-NEXT: vmovdqa {{.*#+}} ymm3 = [0,2,2,3,0,2,4,6] +; AVX2-FAST-ALL-NEXT: vpermd %ymm12, %ymm3, %ymm4 +; AVX2-FAST-ALL-NEXT: vmovdqa {{.*#+}} ymm10 = +; AVX2-FAST-ALL-NEXT: vpshufb %ymm10, %ymm4, %ymm6 +; AVX2-FAST-ALL-NEXT: vpermd %ymm11, %ymm3, %ymm7 +; AVX2-FAST-ALL-NEXT: vmovdqa {{.*#+}} ymm8 = <0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15,16,17,20,21,24,25,28,29,u,u,u,u,u,u,u,u> +; AVX2-FAST-ALL-NEXT: vpshufb %ymm8, %ymm7, %ymm3 +; AVX2-FAST-ALL-NEXT: vpblendd {{.*#+}} ymm3 = ymm3[0,1,2,3,4,5],ymm6[6,7] +; AVX2-FAST-ALL-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm3[4,5,6,7] +; AVX2-FAST-ALL-NEXT: vmovdqu %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill +; AVX2-FAST-ALL-NEXT: vmovdqa (%rdi), %xmm13 +; AVX2-FAST-ALL-NEXT: vmovdqa 16(%rdi), %xmm6 +; AVX2-FAST-ALL-NEXT: vmovdqa 32(%rdi), %xmm0 ; AVX2-FAST-ALL-NEXT: vmovdqa 48(%rdi), %xmm5 -; AVX2-FAST-ALL-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; AVX2-FAST-ALL-NEXT: vpblendw {{.*#+}} xmm7 = xmm5[0],xmm2[1,2,3],xmm5[4],xmm2[5,6,7] -; AVX2-FAST-ALL-NEXT: vpblendw {{.*#+}} xmm4 = xmm3[0],xmm2[1,2,3],xmm3[4],xmm2[5,6,7] -; AVX2-FAST-ALL-NEXT: vpackusdw %xmm7, %xmm4, %xmm4 -; AVX2-FAST-ALL-NEXT: vpblendw {{.*#+}} xmm7 = xmm1[0],xmm2[1,2,3],xmm1[4],xmm2[5,6,7] -; AVX2-FAST-ALL-NEXT: vpblendw {{.*#+}} xmm2 = xmm14[0],xmm2[1,2,3],xmm14[4],xmm2[5,6,7] -; AVX2-FAST-ALL-NEXT: vpackusdw %xmm7, %xmm2, %xmm2 -; AVX2-FAST-ALL-NEXT: vpackusdw %xmm4, %xmm2, %xmm2 -; AVX2-FAST-ALL-NEXT: vmovdqa {{.*#+}} ymm4 = [0,2,2,3,0,2,4,6] -; AVX2-FAST-ALL-NEXT: vpermd %ymm13, %ymm4, %ymm7 -; AVX2-FAST-ALL-NEXT: vpshufb {{.*#+}} ymm10 = ymm7[u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,16,17,20,21,24,25,28,29] -; AVX2-FAST-ALL-NEXT: vpermd %ymm8, %ymm4, %ymm4 -; AVX2-FAST-ALL-NEXT: vmovdqa {{.*#+}} ymm11 = <0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15,16,17,20,21,24,25,28,29,u,u,u,u,u,u,u,u> -; AVX2-FAST-ALL-NEXT: vpshufb %ymm11, %ymm4, %ymm12 -; AVX2-FAST-ALL-NEXT: vpblendd {{.*#+}} ymm10 = ymm12[0,1,2,3,4,5],ymm10[6,7] -; AVX2-FAST-ALL-NEXT: vpblendd {{.*#+}} ymm10 = ymm2[0,1,2,3],ymm10[4,5,6,7] -; AVX2-FAST-ALL-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,2,3,2,3,10,11,8,9,10,11,12,13,14,15] -; AVX2-FAST-ALL-NEXT: vpshufb %xmm2, %xmm5, %xmm6 -; AVX2-FAST-ALL-NEXT: vpshufb %xmm2, %xmm3, %xmm2 -; AVX2-FAST-ALL-NEXT: vpunpckldq {{.*#+}} xmm2 = xmm2[0],xmm6[0],xmm2[1],xmm6[1] -; AVX2-FAST-ALL-NEXT: vmovdqa {{.*#+}} xmm6 = [2,3,10,11,8,9,10,11,8,9,10,11,12,13,14,15] -; AVX2-FAST-ALL-NEXT: vpshufb %xmm6, %xmm1, %xmm0 -; AVX2-FAST-ALL-NEXT: vpshufb %xmm6, %xmm14, %xmm6 -; AVX2-FAST-ALL-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm6[0],xmm0[0],xmm6[1],xmm0[1] -; AVX2-FAST-ALL-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3] +; AVX2-FAST-ALL-NEXT: vmovdqa {{.*#+}} xmm1 = [0,1,2,3,2,3,10,11,8,9,10,11,12,13,14,15] +; AVX2-FAST-ALL-NEXT: vpshufb %xmm1, %xmm5, %xmm2 +; AVX2-FAST-ALL-NEXT: vpshufb %xmm1, %xmm0, %xmm1 +; AVX2-FAST-ALL-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] +; AVX2-FAST-ALL-NEXT: vmovdqa {{.*#+}} xmm2 = [2,3,10,11,8,9,10,11,8,9,10,11,12,13,14,15] +; AVX2-FAST-ALL-NEXT: vpshufb %xmm2, %xmm6, %xmm3 +; AVX2-FAST-ALL-NEXT: vpshufb %xmm2, %xmm13, %xmm2 +; AVX2-FAST-ALL-NEXT: vpunpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1] +; AVX2-FAST-ALL-NEXT: vpblendd {{.*#+}} xmm1 = xmm2[0,1],xmm1[2,3] ; AVX2-FAST-ALL-NEXT: vmovdqa {{.*#+}} ymm15 = -; AVX2-FAST-ALL-NEXT: vpshufb %ymm15, %ymm7, %ymm6 -; AVX2-FAST-ALL-NEXT: vmovdqa {{.*#+}} ymm12 = <2,3,6,7,10,11,14,15,14,15,10,11,12,13,14,15,18,19,22,23,26,27,30,31,u,u,u,u,u,u,u,u> -; AVX2-FAST-ALL-NEXT: vpshufb %ymm12, %ymm4, %ymm4 -; AVX2-FAST-ALL-NEXT: vpblendd {{.*#+}} ymm4 = ymm4[0,1,2,3,4,5],ymm6[6,7] -; AVX2-FAST-ALL-NEXT: vpblendd {{.*#+}} ymm9 = ymm0[0,1,2,3],ymm4[4,5,6,7] -; AVX2-FAST-ALL-NEXT: vmovdqa {{.*#+}} ymm0 = [1,3,2,3,1,3,5,7] -; AVX2-FAST-ALL-NEXT: vpermd %ymm13, %ymm0, %ymm4 -; AVX2-FAST-ALL-NEXT: vpshufb {{.*#+}} ymm6 = ymm4[u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,16,17,20,21,24,25,28,29] -; AVX2-FAST-ALL-NEXT: vpermd %ymm8, %ymm0, %ymm0 -; AVX2-FAST-ALL-NEXT: vpshufb %ymm11, %ymm0, %ymm8 -; AVX2-FAST-ALL-NEXT: vpblendd {{.*#+}} ymm8 = ymm8[0,1,2,3,4,5],ymm6[6,7] +; AVX2-FAST-ALL-NEXT: vpshufb %ymm15, %ymm4, %ymm3 +; AVX2-FAST-ALL-NEXT: vmovdqa {{.*#+}} ymm14 = <2,3,6,7,10,11,14,15,14,15,10,11,12,13,14,15,18,19,22,23,26,27,30,31,u,u,u,u,u,u,u,u> +; AVX2-FAST-ALL-NEXT: vpshufb %ymm14, %ymm7, %ymm4 +; AVX2-FAST-ALL-NEXT: vpblendd {{.*#+}} ymm3 = ymm4[0,1,2,3,4,5],ymm3[6,7] +; AVX2-FAST-ALL-NEXT: vpblendd {{.*#+}} ymm9 = ymm1[0,1,2,3],ymm3[4,5,6,7] +; AVX2-FAST-ALL-NEXT: vmovdqa {{.*#+}} ymm1 = [1,3,2,3,1,3,5,7] +; AVX2-FAST-ALL-NEXT: vpermd %ymm12, %ymm1, %ymm3 +; AVX2-FAST-ALL-NEXT: vpshufb %ymm10, %ymm3, %ymm7 +; AVX2-FAST-ALL-NEXT: vpermd %ymm11, %ymm1, %ymm1 +; AVX2-FAST-ALL-NEXT: vpshufb %ymm8, %ymm1, %ymm8 +; AVX2-FAST-ALL-NEXT: vpblendd {{.*#+}} ymm8 = ymm8[0,1,2,3,4,5],ymm7[6,7] ; AVX2-FAST-ALL-NEXT: vpshufd {{.*#+}} xmm5 = xmm5[3,1,2,3] ; AVX2-FAST-ALL-NEXT: vpshuflw {{.*#+}} xmm2 = xmm5[0,1,2,0,4,5,6,7] -; AVX2-FAST-ALL-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[3,1,2,3] -; AVX2-FAST-ALL-NEXT: vpshuflw {{.*#+}} xmm7 = xmm3[0,1,2,0,4,5,6,7] -; AVX2-FAST-ALL-NEXT: vpunpckldq {{.*#+}} xmm11 = xmm7[0],xmm2[0],xmm7[1],xmm2[1] -; AVX2-FAST-ALL-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[3,1,2,3] -; AVX2-FAST-ALL-NEXT: vpshuflw {{.*#+}} xmm7 = xmm1[2,0,2,3,4,5,6,7] -; AVX2-FAST-ALL-NEXT: vpshufd {{.*#+}} xmm6 = xmm14[3,1,2,3] -; AVX2-FAST-ALL-NEXT: vpshuflw {{.*#+}} xmm2 = xmm6[2,0,2,3,4,5,6,7] -; AVX2-FAST-ALL-NEXT: vpunpckldq {{.*#+}} xmm2 = xmm2[0],xmm7[0],xmm2[1],xmm7[1] -; AVX2-FAST-ALL-NEXT: vpblendd {{.*#+}} xmm2 = xmm2[0,1],xmm11[2,3] +; AVX2-FAST-ALL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,1,2,3] +; AVX2-FAST-ALL-NEXT: vpshuflw {{.*#+}} xmm4 = xmm0[0,1,2,0,4,5,6,7] +; AVX2-FAST-ALL-NEXT: vpunpckldq {{.*#+}} xmm10 = xmm4[0],xmm2[0],xmm4[1],xmm2[1] +; AVX2-FAST-ALL-NEXT: vpshufd {{.*#+}} xmm4 = xmm6[3,1,2,3] +; AVX2-FAST-ALL-NEXT: vpshuflw {{.*#+}} xmm6 = xmm4[2,0,2,3,4,5,6,7] +; AVX2-FAST-ALL-NEXT: vpshufd {{.*#+}} xmm7 = xmm13[3,1,2,3] +; AVX2-FAST-ALL-NEXT: vpshuflw {{.*#+}} xmm2 = xmm7[2,0,2,3,4,5,6,7] +; AVX2-FAST-ALL-NEXT: vpunpckldq {{.*#+}} xmm2 = xmm2[0],xmm6[0],xmm2[1],xmm6[1] +; AVX2-FAST-ALL-NEXT: vpblendd {{.*#+}} xmm2 = xmm2[0,1],xmm10[2,3] ; AVX2-FAST-ALL-NEXT: vpblendd {{.*#+}} ymm2 = ymm2[0,1,2,3],ymm8[4,5,6,7] -; AVX2-FAST-ALL-NEXT: vpshufb %ymm15, %ymm4, %ymm4 -; AVX2-FAST-ALL-NEXT: vpshufb %ymm12, %ymm0, %ymm0 -; AVX2-FAST-ALL-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5],ymm4[6,7] -; AVX2-FAST-ALL-NEXT: vpshuflw {{.*#+}} xmm4 = xmm5[0,1,3,1,4,5,6,7] -; AVX2-FAST-ALL-NEXT: vpshuflw {{.*#+}} xmm3 = xmm3[0,1,3,1,4,5,6,7] -; AVX2-FAST-ALL-NEXT: vpunpckldq {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1] -; AVX2-FAST-ALL-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[3,1,2,3,4,5,6,7] -; AVX2-FAST-ALL-NEXT: vpshuflw {{.*#+}} xmm4 = xmm6[3,1,2,3,4,5,6,7] -; AVX2-FAST-ALL-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm4[0],xmm1[0],xmm4[1],xmm1[1] -; AVX2-FAST-ALL-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3] -; AVX2-FAST-ALL-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7] -; AVX2-FAST-ALL-NEXT: vmovdqa %ymm10, (%rsi) +; AVX2-FAST-ALL-NEXT: vpshufb %ymm15, %ymm3, %ymm3 +; AVX2-FAST-ALL-NEXT: vpshufb %ymm14, %ymm1, %ymm1 +; AVX2-FAST-ALL-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3,4,5],ymm3[6,7] +; AVX2-FAST-ALL-NEXT: vpshuflw {{.*#+}} xmm3 = xmm5[0,1,3,1,4,5,6,7] +; AVX2-FAST-ALL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,1,3,1,4,5,6,7] +; AVX2-FAST-ALL-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1] +; AVX2-FAST-ALL-NEXT: vpshuflw {{.*#+}} xmm3 = xmm4[3,1,2,3,4,5,6,7] +; AVX2-FAST-ALL-NEXT: vpshuflw {{.*#+}} xmm4 = xmm7[3,1,2,3,4,5,6,7] +; AVX2-FAST-ALL-NEXT: vpunpckldq {{.*#+}} xmm3 = xmm4[0],xmm3[0],xmm4[1],xmm3[1] +; AVX2-FAST-ALL-NEXT: vpblendd {{.*#+}} xmm0 = xmm3[0,1],xmm0[2,3] +; AVX2-FAST-ALL-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] +; AVX2-FAST-ALL-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm1 # 32-byte Reload +; AVX2-FAST-ALL-NEXT: vmovaps %ymm1, (%rsi) ; AVX2-FAST-ALL-NEXT: vmovdqa %ymm9, (%rdx) ; AVX2-FAST-ALL-NEXT: vmovdqa %ymm2, (%rcx) ; AVX2-FAST-ALL-NEXT: vmovdqa %ymm0, (%r8) @@ -823,99 +828,99 @@ define void @vf16(<64 x i16>* %in.vec, <16 x i16>* %out.vec0, <16 x i16>* %out.v ; ; AVX2-FAST-PERLANE-LABEL: vf16: ; AVX2-FAST-PERLANE: # %bb.0: -; AVX2-FAST-PERLANE-NEXT: vmovdqa (%rdi), %xmm10 -; AVX2-FAST-PERLANE-NEXT: vmovdqa 16(%rdi), %xmm11 -; AVX2-FAST-PERLANE-NEXT: vmovdqa 32(%rdi), %xmm12 -; AVX2-FAST-PERLANE-NEXT: vmovdqa 48(%rdi), %xmm13 ; AVX2-FAST-PERLANE-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; AVX2-FAST-PERLANE-NEXT: vpblendw {{.*#+}} xmm2 = xmm13[0],xmm0[1,2,3],xmm13[4],xmm0[5,6,7] -; AVX2-FAST-PERLANE-NEXT: vpblendw {{.*#+}} xmm5 = xmm12[0],xmm0[1,2,3],xmm12[4],xmm0[5,6,7] -; AVX2-FAST-PERLANE-NEXT: vpackusdw %xmm2, %xmm5, %xmm2 -; AVX2-FAST-PERLANE-NEXT: vpblendw {{.*#+}} xmm5 = xmm11[0],xmm0[1,2,3],xmm11[4],xmm0[5,6,7] -; AVX2-FAST-PERLANE-NEXT: vpblendw {{.*#+}} xmm6 = xmm10[0],xmm0[1,2,3],xmm10[4],xmm0[5,6,7] -; AVX2-FAST-PERLANE-NEXT: vpackusdw %xmm5, %xmm6, %xmm5 -; AVX2-FAST-PERLANE-NEXT: vpackusdw %xmm2, %xmm5, %xmm8 -; AVX2-FAST-PERLANE-NEXT: vmovdqa 112(%rdi), %xmm6 -; AVX2-FAST-PERLANE-NEXT: vpblendw {{.*#+}} xmm2 = xmm6[0],xmm0[1,2,3],xmm6[4],xmm0[5,6,7] -; AVX2-FAST-PERLANE-NEXT: vmovdqa 96(%rdi), %xmm7 -; AVX2-FAST-PERLANE-NEXT: vpblendw {{.*#+}} xmm5 = xmm7[0],xmm0[1,2,3],xmm7[4],xmm0[5,6,7] -; AVX2-FAST-PERLANE-NEXT: vpackusdw %xmm2, %xmm5, %xmm2 +; AVX2-FAST-PERLANE-NEXT: vpblendw {{.*#+}} ymm1 = mem[0],ymm0[1,2,3],mem[4],ymm0[5,6,7],mem[8],ymm0[9,10,11],mem[12],ymm0[13,14,15] +; AVX2-FAST-PERLANE-NEXT: vextracti128 $1, %ymm1, %xmm2 +; AVX2-FAST-PERLANE-NEXT: vpackusdw %xmm2, %xmm1, %xmm1 +; AVX2-FAST-PERLANE-NEXT: vpblendw {{.*#+}} ymm2 = mem[0],ymm0[1,2,3],mem[4],ymm0[5,6,7],mem[8],ymm0[9,10,11],mem[12],ymm0[13,14,15] +; AVX2-FAST-PERLANE-NEXT: vextracti128 $1, %ymm2, %xmm3 +; AVX2-FAST-PERLANE-NEXT: vpackusdw %xmm3, %xmm2, %xmm2 +; AVX2-FAST-PERLANE-NEXT: vpackusdw %xmm1, %xmm2, %xmm1 +; AVX2-FAST-PERLANE-NEXT: vpblendw {{.*#+}} ymm2 = mem[0],ymm0[1,2,3],mem[4],ymm0[5,6,7],mem[8],ymm0[9,10,11],mem[12],ymm0[13,14,15] +; AVX2-FAST-PERLANE-NEXT: vextracti128 $1, %ymm2, %xmm3 +; AVX2-FAST-PERLANE-NEXT: vpackusdw %xmm3, %xmm2, %xmm2 ; AVX2-FAST-PERLANE-NEXT: vpackusdw %xmm2, %xmm2, %xmm2 -; AVX2-FAST-PERLANE-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm9 -; AVX2-FAST-PERLANE-NEXT: vmovdqa 80(%rdi), %xmm2 -; AVX2-FAST-PERLANE-NEXT: vpblendw {{.*#+}} xmm1 = xmm2[0],xmm0[1,2,3],xmm2[4],xmm0[5,6,7] -; AVX2-FAST-PERLANE-NEXT: vmovdqa 64(%rdi), %xmm5 -; AVX2-FAST-PERLANE-NEXT: vpblendw {{.*#+}} xmm0 = xmm5[0],xmm0[1,2,3],xmm5[4],xmm0[5,6,7] -; AVX2-FAST-PERLANE-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 +; AVX2-FAST-PERLANE-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2 +; AVX2-FAST-PERLANE-NEXT: vpblendw {{.*#+}} ymm0 = mem[0],ymm0[1,2,3],mem[4],ymm0[5,6,7],mem[8],ymm0[9,10,11],mem[12],ymm0[13,14,15] +; AVX2-FAST-PERLANE-NEXT: vextracti128 $1, %ymm0, %xmm3 +; AVX2-FAST-PERLANE-NEXT: vpackusdw %xmm3, %xmm0, %xmm0 ; AVX2-FAST-PERLANE-NEXT: vpackusdw %xmm0, %xmm0, %xmm0 ; AVX2-FAST-PERLANE-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 -; AVX2-FAST-PERLANE-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5],ymm9[6,7] -; AVX2-FAST-PERLANE-NEXT: vpblendd {{.*#+}} ymm0 = ymm8[0,1,2,3],ymm0[4,5,6,7] +; AVX2-FAST-PERLANE-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5],ymm2[6,7] +; AVX2-FAST-PERLANE-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7] ; AVX2-FAST-PERLANE-NEXT: vmovdqu %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill -; AVX2-FAST-PERLANE-NEXT: vmovdqa {{.*#+}} xmm0 = [0,1,2,3,2,3,10,11,8,9,10,11,12,13,14,15] -; AVX2-FAST-PERLANE-NEXT: vpshufb %xmm0, %xmm6, %xmm1 -; AVX2-FAST-PERLANE-NEXT: vpshufb %xmm0, %xmm7, %xmm3 -; AVX2-FAST-PERLANE-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm3[0],xmm1[0],xmm3[1],xmm1[1] -; AVX2-FAST-PERLANE-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm9 -; AVX2-FAST-PERLANE-NEXT: vmovdqa {{.*#+}} xmm3 = [2,3,10,11,8,9,10,11,8,9,10,11,12,13,14,15] -; AVX2-FAST-PERLANE-NEXT: vpshufb %xmm3, %xmm2, %xmm4 -; AVX2-FAST-PERLANE-NEXT: vpshufb %xmm3, %xmm5, %xmm1 +; AVX2-FAST-PERLANE-NEXT: vmovdqa (%rdi), %xmm9 +; AVX2-FAST-PERLANE-NEXT: vmovdqa 16(%rdi), %xmm10 +; AVX2-FAST-PERLANE-NEXT: vmovdqa 32(%rdi), %xmm11 +; AVX2-FAST-PERLANE-NEXT: vmovdqa 48(%rdi), %xmm12 +; AVX2-FAST-PERLANE-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,2,3,2,3,10,11,8,9,10,11,12,13,14,15] +; AVX2-FAST-PERLANE-NEXT: vmovdqa 112(%rdi), %xmm6 +; AVX2-FAST-PERLANE-NEXT: vpshufb %xmm2, %xmm6, %xmm7 +; AVX2-FAST-PERLANE-NEXT: vmovdqa 96(%rdi), %xmm0 +; AVX2-FAST-PERLANE-NEXT: vpshufb %xmm2, %xmm0, %xmm1 +; AVX2-FAST-PERLANE-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm1[0],xmm7[0],xmm1[1],xmm7[1] +; AVX2-FAST-PERLANE-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm13 +; AVX2-FAST-PERLANE-NEXT: vmovdqa {{.*#+}} xmm7 = [2,3,10,11,8,9,10,11,8,9,10,11,12,13,14,15] +; AVX2-FAST-PERLANE-NEXT: vmovdqa 80(%rdi), %xmm3 +; AVX2-FAST-PERLANE-NEXT: vpshufb %xmm7, %xmm3, %xmm4 +; AVX2-FAST-PERLANE-NEXT: vmovdqa 64(%rdi), %xmm5 +; AVX2-FAST-PERLANE-NEXT: vpshufb %xmm7, %xmm5, %xmm1 ; AVX2-FAST-PERLANE-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1] ; AVX2-FAST-PERLANE-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1 -; AVX2-FAST-PERLANE-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3,4,5],ymm9[6,7] -; AVX2-FAST-PERLANE-NEXT: vpshufb %xmm0, %xmm13, %xmm4 -; AVX2-FAST-PERLANE-NEXT: vpshufb %xmm0, %xmm12, %xmm0 -; AVX2-FAST-PERLANE-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1] -; AVX2-FAST-PERLANE-NEXT: vpshufb %xmm3, %xmm11, %xmm4 -; AVX2-FAST-PERLANE-NEXT: vpshufb %xmm3, %xmm10, %xmm3 -; AVX2-FAST-PERLANE-NEXT: vpunpckldq {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1] -; AVX2-FAST-PERLANE-NEXT: vpblendd {{.*#+}} xmm0 = xmm3[0,1],xmm0[2,3] -; AVX2-FAST-PERLANE-NEXT: vpblendd {{.*#+}} ymm9 = ymm0[0,1,2,3],ymm1[4,5,6,7] +; AVX2-FAST-PERLANE-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3,4,5],ymm13[6,7] +; AVX2-FAST-PERLANE-NEXT: vpshufb %xmm2, %xmm12, %xmm4 +; AVX2-FAST-PERLANE-NEXT: vpshufb %xmm2, %xmm11, %xmm2 +; AVX2-FAST-PERLANE-NEXT: vpunpckldq {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1] +; AVX2-FAST-PERLANE-NEXT: vpshufb %xmm7, %xmm10, %xmm4 +; AVX2-FAST-PERLANE-NEXT: vpshufb %xmm7, %xmm9, %xmm7 +; AVX2-FAST-PERLANE-NEXT: vpunpckldq {{.*#+}} xmm4 = xmm7[0],xmm4[0],xmm7[1],xmm4[1] +; AVX2-FAST-PERLANE-NEXT: vpblendd {{.*#+}} xmm2 = xmm4[0,1],xmm2[2,3] +; AVX2-FAST-PERLANE-NEXT: vpblendd {{.*#+}} ymm13 = ymm2[0,1,2,3],ymm1[4,5,6,7] ; AVX2-FAST-PERLANE-NEXT: vpshufd {{.*#+}} xmm15 = xmm6[3,1,2,3] -; AVX2-FAST-PERLANE-NEXT: vpshuflw {{.*#+}} xmm1 = xmm15[0,1,2,0,4,5,6,7] -; AVX2-FAST-PERLANE-NEXT: vpshufd {{.*#+}} xmm8 = xmm7[3,1,2,3] -; AVX2-FAST-PERLANE-NEXT: vpshuflw {{.*#+}} xmm4 = xmm8[0,1,2,0,4,5,6,7] -; AVX2-FAST-PERLANE-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm4[0],xmm1[0],xmm4[1],xmm1[1] -; AVX2-FAST-PERLANE-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1 -; AVX2-FAST-PERLANE-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[3,1,2,3] -; AVX2-FAST-PERLANE-NEXT: vpshuflw {{.*#+}} xmm4 = xmm2[2,0,2,3,4,5,6,7] -; AVX2-FAST-PERLANE-NEXT: vpshufd {{.*#+}} xmm5 = xmm5[3,1,2,3] -; AVX2-FAST-PERLANE-NEXT: vpshuflw {{.*#+}} xmm6 = xmm5[2,0,2,3,4,5,6,7] +; AVX2-FAST-PERLANE-NEXT: vpshuflw {{.*#+}} xmm4 = xmm15[0,1,2,0,4,5,6,7] +; AVX2-FAST-PERLANE-NEXT: vpshufd {{.*#+}} xmm8 = xmm0[3,1,2,3] +; AVX2-FAST-PERLANE-NEXT: vpshuflw {{.*#+}} xmm6 = xmm8[0,1,2,0,4,5,6,7] ; AVX2-FAST-PERLANE-NEXT: vpunpckldq {{.*#+}} xmm4 = xmm6[0],xmm4[0],xmm6[1],xmm4[1] ; AVX2-FAST-PERLANE-NEXT: vinserti128 $1, %xmm4, %ymm0, %ymm4 -; AVX2-FAST-PERLANE-NEXT: vpblendd {{.*#+}} ymm14 = ymm4[0,1,2,3,4,5],ymm1[6,7] -; AVX2-FAST-PERLANE-NEXT: vpshufd {{.*#+}} xmm13 = xmm13[3,1,2,3] -; AVX2-FAST-PERLANE-NEXT: vpshuflw {{.*#+}} xmm6 = xmm13[0,1,2,0,4,5,6,7] -; AVX2-FAST-PERLANE-NEXT: vpshufd {{.*#+}} xmm7 = xmm12[3,1,2,3] -; AVX2-FAST-PERLANE-NEXT: vpshuflw {{.*#+}} xmm1 = xmm7[0,1,2,0,4,5,6,7] -; AVX2-FAST-PERLANE-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm1[0],xmm6[0],xmm1[1],xmm6[1] -; AVX2-FAST-PERLANE-NEXT: vpshufd {{.*#+}} xmm6 = xmm11[3,1,2,3] +; AVX2-FAST-PERLANE-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[3,1,2,3] +; AVX2-FAST-PERLANE-NEXT: vpshuflw {{.*#+}} xmm6 = xmm3[2,0,2,3,4,5,6,7] +; AVX2-FAST-PERLANE-NEXT: vpshufd {{.*#+}} xmm5 = xmm5[3,1,2,3] +; AVX2-FAST-PERLANE-NEXT: vpshuflw {{.*#+}} xmm7 = xmm5[2,0,2,3,4,5,6,7] +; AVX2-FAST-PERLANE-NEXT: vpunpckldq {{.*#+}} xmm6 = xmm7[0],xmm6[0],xmm7[1],xmm6[1] +; AVX2-FAST-PERLANE-NEXT: vinserti128 $1, %xmm6, %ymm0, %ymm6 +; AVX2-FAST-PERLANE-NEXT: vpblendd {{.*#+}} ymm14 = ymm6[0,1,2,3,4,5],ymm4[6,7] +; AVX2-FAST-PERLANE-NEXT: vpshufd {{.*#+}} xmm12 = xmm12[3,1,2,3] +; AVX2-FAST-PERLANE-NEXT: vpshuflw {{.*#+}} xmm7 = xmm12[0,1,2,0,4,5,6,7] +; AVX2-FAST-PERLANE-NEXT: vpshufd {{.*#+}} xmm2 = xmm11[3,1,2,3] +; AVX2-FAST-PERLANE-NEXT: vpshuflw {{.*#+}} xmm4 = xmm2[0,1,2,0,4,5,6,7] +; AVX2-FAST-PERLANE-NEXT: vpunpckldq {{.*#+}} xmm4 = xmm4[0],xmm7[0],xmm4[1],xmm7[1] +; AVX2-FAST-PERLANE-NEXT: vpshufd {{.*#+}} xmm7 = xmm10[3,1,2,3] +; AVX2-FAST-PERLANE-NEXT: vpshuflw {{.*#+}} xmm1 = xmm7[2,0,2,3,4,5,6,7] +; AVX2-FAST-PERLANE-NEXT: vpshufd {{.*#+}} xmm6 = xmm9[3,1,2,3] ; AVX2-FAST-PERLANE-NEXT: vpshuflw {{.*#+}} xmm0 = xmm6[2,0,2,3,4,5,6,7] -; AVX2-FAST-PERLANE-NEXT: vpshufd {{.*#+}} xmm4 = xmm10[3,1,2,3] -; AVX2-FAST-PERLANE-NEXT: vpshuflw {{.*#+}} xmm3 = xmm4[2,0,2,3,4,5,6,7] -; AVX2-FAST-PERLANE-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm3[0],xmm0[0],xmm3[1],xmm0[1] -; AVX2-FAST-PERLANE-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] +; AVX2-FAST-PERLANE-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; AVX2-FAST-PERLANE-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm4[2,3] ; AVX2-FAST-PERLANE-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm14[4,5,6,7] ; AVX2-FAST-PERLANE-NEXT: vpshuflw {{.*#+}} xmm1 = xmm15[0,1,3,1,4,5,6,7] -; AVX2-FAST-PERLANE-NEXT: vpshuflw {{.*#+}} xmm3 = xmm8[0,1,3,1,4,5,6,7] -; AVX2-FAST-PERLANE-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm3[0],xmm1[0],xmm3[1],xmm1[1] +; AVX2-FAST-PERLANE-NEXT: vpshuflw {{.*#+}} xmm4 = xmm8[0,1,3,1,4,5,6,7] +; AVX2-FAST-PERLANE-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm4[0],xmm1[0],xmm4[1],xmm1[1] ; AVX2-FAST-PERLANE-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1 -; AVX2-FAST-PERLANE-NEXT: vpshuflw {{.*#+}} xmm2 = xmm2[3,1,2,3,4,5,6,7] -; AVX2-FAST-PERLANE-NEXT: vpshuflw {{.*#+}} xmm3 = xmm5[3,1,2,3,4,5,6,7] -; AVX2-FAST-PERLANE-NEXT: vpunpckldq {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[1],xmm2[1] -; AVX2-FAST-PERLANE-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2 -; AVX2-FAST-PERLANE-NEXT: vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3,4,5],ymm1[6,7] -; AVX2-FAST-PERLANE-NEXT: vpshuflw {{.*#+}} xmm2 = xmm13[0,1,3,1,4,5,6,7] -; AVX2-FAST-PERLANE-NEXT: vpshuflw {{.*#+}} xmm3 = xmm7[0,1,3,1,4,5,6,7] -; AVX2-FAST-PERLANE-NEXT: vpunpckldq {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[1],xmm2[1] -; AVX2-FAST-PERLANE-NEXT: vpshuflw {{.*#+}} xmm3 = xmm6[3,1,2,3,4,5,6,7] -; AVX2-FAST-PERLANE-NEXT: vpshuflw {{.*#+}} xmm4 = xmm4[3,1,2,3,4,5,6,7] +; AVX2-FAST-PERLANE-NEXT: vpshuflw {{.*#+}} xmm3 = xmm3[3,1,2,3,4,5,6,7] +; AVX2-FAST-PERLANE-NEXT: vpshuflw {{.*#+}} xmm4 = xmm5[3,1,2,3,4,5,6,7] +; AVX2-FAST-PERLANE-NEXT: vpunpckldq {{.*#+}} xmm3 = xmm4[0],xmm3[0],xmm4[1],xmm3[1] +; AVX2-FAST-PERLANE-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm3 +; AVX2-FAST-PERLANE-NEXT: vpblendd {{.*#+}} ymm1 = ymm3[0,1,2,3,4,5],ymm1[6,7] +; AVX2-FAST-PERLANE-NEXT: vpshuflw {{.*#+}} xmm3 = xmm12[0,1,3,1,4,5,6,7] +; AVX2-FAST-PERLANE-NEXT: vpshuflw {{.*#+}} xmm2 = xmm2[0,1,3,1,4,5,6,7] +; AVX2-FAST-PERLANE-NEXT: vpunpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1] +; AVX2-FAST-PERLANE-NEXT: vpshuflw {{.*#+}} xmm3 = xmm7[3,1,2,3,4,5,6,7] +; AVX2-FAST-PERLANE-NEXT: vpshuflw {{.*#+}} xmm4 = xmm6[3,1,2,3,4,5,6,7] ; AVX2-FAST-PERLANE-NEXT: vpunpckldq {{.*#+}} xmm3 = xmm4[0],xmm3[0],xmm4[1],xmm3[1] ; AVX2-FAST-PERLANE-NEXT: vpblendd {{.*#+}} xmm2 = xmm3[0,1],xmm2[2,3] ; AVX2-FAST-PERLANE-NEXT: vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7] ; AVX2-FAST-PERLANE-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm2 # 32-byte Reload ; AVX2-FAST-PERLANE-NEXT: vmovaps %ymm2, (%rsi) -; AVX2-FAST-PERLANE-NEXT: vmovdqa %ymm9, (%rdx) +; AVX2-FAST-PERLANE-NEXT: vmovdqa %ymm13, (%rdx) ; AVX2-FAST-PERLANE-NEXT: vmovdqa %ymm0, (%rcx) ; AVX2-FAST-PERLANE-NEXT: vmovdqa %ymm1, (%r8) ; AVX2-FAST-PERLANE-NEXT: vzeroupper @@ -1205,184 +1210,179 @@ define void @vf32(<128 x i16>* %in.vec, <32 x i16>* %out.vec0, <32 x i16>* %out. ; ; AVX1-LABEL: vf32: ; AVX1: # %bb.0: -; AVX1-NEXT: subq $232, %rsp -; AVX1-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa 112(%rdi), %xmm14 -; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm14[0],xmm0[1,2,3],xmm14[4],xmm0[5,6,7] -; AVX1-NEXT: vmovdqa 96(%rdi), %xmm10 -; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm10[0],xmm0[1,2,3],xmm10[4],xmm0[5,6,7] -; AVX1-NEXT: vmovdqa %xmm10, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill -; AVX1-NEXT: vpackusdw %xmm1, %xmm2, %xmm1 -; AVX1-NEXT: vmovdqa 80(%rdi), %xmm9 -; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm9[0],xmm0[1,2,3],xmm9[4],xmm0[5,6,7] -; AVX1-NEXT: vmovdqa %xmm9, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill -; AVX1-NEXT: vmovdqa 64(%rdi), %xmm3 -; AVX1-NEXT: vmovdqa %xmm3, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill -; AVX1-NEXT: vpblendw {{.*#+}} xmm3 = xmm3[0],xmm0[1,2,3],xmm3[4],xmm0[5,6,7] -; AVX1-NEXT: vpackusdw %xmm2, %xmm3, %xmm2 -; AVX1-NEXT: vpackusdw %xmm1, %xmm2, %xmm3 -; AVX1-NEXT: vmovdqa (%rdi), %xmm1 -; AVX1-NEXT: vmovdqa 16(%rdi), %xmm5 -; AVX1-NEXT: vmovdqa %xmm5, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill -; AVX1-NEXT: vmovdqa 32(%rdi), %xmm8 -; AVX1-NEXT: vmovdqa 48(%rdi), %xmm7 -; AVX1-NEXT: vpblendw {{.*#+}} xmm4 = xmm7[0],xmm0[1,2,3],xmm7[4],xmm0[5,6,7] -; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm8[0],xmm0[1,2,3],xmm8[4],xmm0[5,6,7] -; AVX1-NEXT: vmovdqa %xmm8, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill -; AVX1-NEXT: vpackusdw %xmm4, %xmm2, %xmm2 -; AVX1-NEXT: vpblendw {{.*#+}} xmm4 = xmm5[0],xmm0[1,2,3],xmm5[4],xmm0[5,6,7] -; AVX1-NEXT: vpblendw {{.*#+}} xmm5 = xmm1[0],xmm0[1,2,3],xmm1[4],xmm0[5,6,7] -; AVX1-NEXT: vmovdqa %xmm1, %xmm11 -; AVX1-NEXT: vpackusdw %xmm4, %xmm5, %xmm4 -; AVX1-NEXT: vpackusdw %xmm2, %xmm4, %xmm2 -; AVX1-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm2[0,1],ymm3[0,1] -; AVX1-NEXT: vmovups %ymm2, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill +; AVX1-NEXT: subq $200, %rsp +; AVX1-NEXT: vpxor %xmm4, %xmm4, %xmm4 ; AVX1-NEXT: vmovdqa 240(%rdi), %xmm1 +; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0],xmm4[1,2,3],xmm1[4],xmm4[5,6,7] +; AVX1-NEXT: vmovdqa %xmm1, %xmm12 ; AVX1-NEXT: vmovdqa %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill -; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm1[0],xmm0[1,2,3],xmm1[4],xmm0[5,6,7] -; AVX1-NEXT: vmovdqa 224(%rdi), %xmm1 +; AVX1-NEXT: vmovdqa 224(%rdi), %xmm2 +; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm2[0],xmm4[1,2,3],xmm2[4],xmm4[5,6,7] +; AVX1-NEXT: vmovdqa %xmm2, %xmm14 +; AVX1-NEXT: vpackusdw %xmm0, %xmm1, %xmm0 +; AVX1-NEXT: vmovdqa 208(%rdi), %xmm2 +; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm2[0],xmm4[1,2,3],xmm2[4],xmm4[5,6,7] +; AVX1-NEXT: vmovdqa %xmm2, %xmm15 +; AVX1-NEXT: vmovdqa 192(%rdi), %xmm2 +; AVX1-NEXT: vmovdqa %xmm2, (%rsp) # 16-byte Spill +; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0],xmm4[1,2,3],xmm2[4],xmm4[5,6,7] +; AVX1-NEXT: vpackusdw %xmm1, %xmm2, %xmm1 +; AVX1-NEXT: vpackusdw %xmm0, %xmm1, %xmm0 +; AVX1-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX1-NEXT: vmovdqa 176(%rdi), %xmm0 +; AVX1-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm4[1,2,3],xmm0[4],xmm4[5,6,7] +; AVX1-NEXT: vmovdqa 160(%rdi), %xmm1 ; AVX1-NEXT: vmovdqa %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill -; AVX1-NEXT: vpblendw {{.*#+}} xmm3 = xmm1[0],xmm0[1,2,3],xmm1[4],xmm0[5,6,7] -; AVX1-NEXT: vpackusdw %xmm2, %xmm3, %xmm2 -; AVX1-NEXT: vmovdqa 208(%rdi), %xmm1 -; AVX1-NEXT: vpblendw {{.*#+}} xmm3 = xmm1[0],xmm0[1,2,3],xmm1[4],xmm0[5,6,7] +; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm4[1,2,3],xmm1[4],xmm4[5,6,7] +; AVX1-NEXT: vpackusdw %xmm0, %xmm1, %xmm0 +; AVX1-NEXT: vmovdqa 144(%rdi), %xmm1 ; AVX1-NEXT: vmovdqa %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill -; AVX1-NEXT: vmovdqa 192(%rdi), %xmm13 -; AVX1-NEXT: vpblendw {{.*#+}} xmm4 = xmm13[0],xmm0[1,2,3],xmm13[4],xmm0[5,6,7] -; AVX1-NEXT: vpackusdw %xmm3, %xmm4, %xmm3 -; AVX1-NEXT: vpackusdw %xmm2, %xmm3, %xmm2 -; AVX1-NEXT: vmovdqa 176(%rdi), %xmm5 -; AVX1-NEXT: vpblendw {{.*#+}} xmm3 = xmm5[0],xmm0[1,2,3],xmm5[4],xmm0[5,6,7] -; AVX1-NEXT: vmovdqa %xmm5, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill -; AVX1-NEXT: vmovdqa 160(%rdi), %xmm15 -; AVX1-NEXT: vpblendw {{.*#+}} xmm4 = xmm15[0],xmm0[1,2,3],xmm15[4],xmm0[5,6,7] -; AVX1-NEXT: vpackusdw %xmm3, %xmm4, %xmm3 -; AVX1-NEXT: vmovdqa 144(%rdi), %xmm12 -; AVX1-NEXT: vpblendw {{.*#+}} xmm4 = xmm12[0],xmm0[1,2,3],xmm12[4],xmm0[5,6,7] -; AVX1-NEXT: vmovdqa %xmm12, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill -; AVX1-NEXT: vmovdqa 128(%rdi), %xmm6 -; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm6[0],xmm0[1,2,3],xmm6[4],xmm0[5,6,7] -; AVX1-NEXT: vmovdqa %xmm6, (%rsp) # 16-byte Spill -; AVX1-NEXT: vpackusdw %xmm4, %xmm0, %xmm0 -; AVX1-NEXT: vpackusdw %xmm3, %xmm0, %xmm0 -; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[0,1],ymm2[0,1] -; AVX1-NEXT: vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill -; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm14[0,2,2,3] -; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,1,1,3,4,5,6,7] -; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm10[0,2,2,3] -; AVX1-NEXT: vpshuflw {{.*#+}} xmm2 = xmm2[0,1,1,3,4,5,6,7] -; AVX1-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm2[0],xmm0[0],xmm2[1],xmm0[1] -; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm9[0,2,2,3] -; AVX1-NEXT: vpshuflw {{.*#+}} xmm2 = xmm2[1,3,2,3,4,5,6,7] -; AVX1-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm9 # 16-byte Reload -; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm9[0,2,2,3] -; AVX1-NEXT: vpshuflw {{.*#+}} xmm3 = xmm3[1,3,2,3,4,5,6,7] -; AVX1-NEXT: vpunpckldq {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[1],xmm2[1] -; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm2[0,1,2,3],xmm0[4,5,6,7] -; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm7[0,2,2,3] -; AVX1-NEXT: vpshuflw {{.*#+}} xmm2 = xmm2[0,1,1,3,4,5,6,7] -; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm8[0,2,2,3] -; AVX1-NEXT: vpshuflw {{.*#+}} xmm3 = xmm3[0,1,1,3,4,5,6,7] -; AVX1-NEXT: vpunpckldq {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[1],xmm2[1] -; AVX1-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm10 # 16-byte Reload -; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm10[0,2,2,3] -; AVX1-NEXT: vpshuflw {{.*#+}} xmm3 = xmm3[1,3,2,3,4,5,6,7] -; AVX1-NEXT: vpshufd {{.*#+}} xmm4 = xmm11[0,2,2,3] -; AVX1-NEXT: vmovdqa %xmm11, %xmm8 -; AVX1-NEXT: vpshuflw {{.*#+}} xmm4 = xmm4[1,3,2,3,4,5,6,7] -; AVX1-NEXT: vpunpckldq {{.*#+}} xmm3 = xmm4[0],xmm3[0],xmm4[1],xmm3[1] -; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm3[0,1,2,3],xmm2[4,5,6,7] -; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 -; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm2[0,1,2,3],ymm0[4,5,6,7] -; AVX1-NEXT: vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill -; AVX1-NEXT: vpshufd $232, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload -; AVX1-NEXT: # xmm0 = mem[0,2,2,3] -; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,1,1,3,4,5,6,7] -; AVX1-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm11 # 16-byte Reload +; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm4[1,2,3],xmm1[4],xmm4[5,6,7] +; AVX1-NEXT: vmovdqa 128(%rdi), %xmm2 +; AVX1-NEXT: vmovdqa %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0],xmm4[1,2,3],xmm2[4],xmm4[5,6,7] +; AVX1-NEXT: vpackusdw %xmm1, %xmm2, %xmm1 +; AVX1-NEXT: vpackusdw %xmm0, %xmm1, %xmm0 +; AVX1-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX1-NEXT: vmovdqa 112(%rdi), %xmm8 +; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm8[0],xmm4[1,2,3],xmm8[4],xmm4[5,6,7] +; AVX1-NEXT: vmovdqa 96(%rdi), %xmm5 +; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm5[0],xmm4[1,2,3],xmm5[4],xmm4[5,6,7] +; AVX1-NEXT: vpackusdw %xmm0, %xmm1, %xmm0 +; AVX1-NEXT: vmovdqa 80(%rdi), %xmm9 +; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm9[0],xmm4[1,2,3],xmm9[4],xmm4[5,6,7] +; AVX1-NEXT: vmovdqa 64(%rdi), %xmm3 +; AVX1-NEXT: vpblendw {{.*#+}} xmm6 = xmm3[0],xmm4[1,2,3],xmm3[4],xmm4[5,6,7] +; AVX1-NEXT: vpackusdw %xmm1, %xmm6, %xmm1 +; AVX1-NEXT: vpackusdw %xmm0, %xmm1, %xmm0 +; AVX1-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX1-NEXT: vmovdqa 32(%rdi), %xmm10 +; AVX1-NEXT: vmovdqa 48(%rdi), %xmm0 +; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],xmm4[1,2,3],xmm0[4],xmm4[5,6,7] +; AVX1-NEXT: vpblendw {{.*#+}} xmm7 = xmm10[0],xmm4[1,2,3],xmm10[4],xmm4[5,6,7] +; AVX1-NEXT: vpackusdw %xmm1, %xmm7, %xmm13 +; AVX1-NEXT: vmovdqa (%rdi), %xmm11 +; AVX1-NEXT: vmovdqa 16(%rdi), %xmm1 +; AVX1-NEXT: vpblendw {{.*#+}} xmm7 = xmm1[0],xmm4[1,2,3],xmm1[4],xmm4[5,6,7] +; AVX1-NEXT: vpblendw {{.*#+}} xmm4 = xmm11[0],xmm4[1,2,3],xmm11[4],xmm4[5,6,7] +; AVX1-NEXT: vpackusdw %xmm7, %xmm4, %xmm4 +; AVX1-NEXT: vpackusdw %xmm13, %xmm4, %xmm2 +; AVX1-NEXT: vmovdqa %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX1-NEXT: vpshufd {{.*#+}} xmm4 = xmm8[0,2,2,3] +; AVX1-NEXT: vpshuflw {{.*#+}} xmm4 = xmm4[0,1,1,3,4,5,6,7] +; AVX1-NEXT: vpshufd {{.*#+}} xmm7 = xmm5[0,2,2,3] +; AVX1-NEXT: vpshuflw {{.*#+}} xmm7 = xmm7[0,1,1,3,4,5,6,7] +; AVX1-NEXT: vpunpckldq {{.*#+}} xmm4 = xmm7[0],xmm4[0],xmm7[1],xmm4[1] +; AVX1-NEXT: vpshufd {{.*#+}} xmm7 = xmm9[0,2,2,3] +; AVX1-NEXT: vpshuflw {{.*#+}} xmm7 = xmm7[1,3,2,3,4,5,6,7] +; AVX1-NEXT: vpshufd {{.*#+}} xmm6 = xmm3[0,2,2,3] +; AVX1-NEXT: vpshuflw {{.*#+}} xmm6 = xmm6[1,3,2,3,4,5,6,7] +; AVX1-NEXT: vpunpckldq {{.*#+}} xmm6 = xmm6[0],xmm7[0],xmm6[1],xmm7[1] +; AVX1-NEXT: vpblendw {{.*#+}} xmm4 = xmm6[0,1,2,3],xmm4[4,5,6,7] +; AVX1-NEXT: vpshufd {{.*#+}} xmm6 = xmm0[0,2,2,3] +; AVX1-NEXT: vpshuflw {{.*#+}} xmm6 = xmm6[0,1,1,3,4,5,6,7] +; AVX1-NEXT: vpshufd {{.*#+}} xmm7 = xmm10[0,2,2,3] +; AVX1-NEXT: vpshuflw {{.*#+}} xmm7 = xmm7[0,1,1,3,4,5,6,7] +; AVX1-NEXT: vpunpckldq {{.*#+}} xmm6 = xmm7[0],xmm6[0],xmm7[1],xmm6[1] +; AVX1-NEXT: vpshufd {{.*#+}} xmm7 = xmm1[0,2,2,3] +; AVX1-NEXT: vpshuflw {{.*#+}} xmm7 = xmm7[1,3,2,3,4,5,6,7] ; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm11[0,2,2,3] -; AVX1-NEXT: vpshuflw {{.*#+}} xmm2 = xmm2[0,1,1,3,4,5,6,7] -; AVX1-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm2[0],xmm0[0],xmm2[1],xmm0[1] -; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[0,2,2,3] ; AVX1-NEXT: vpshuflw {{.*#+}} xmm2 = xmm2[1,3,2,3,4,5,6,7] -; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm13[0,2,2,3] -; AVX1-NEXT: vpshuflw {{.*#+}} xmm3 = xmm3[1,3,2,3,4,5,6,7] -; AVX1-NEXT: vpunpckldq {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[1],xmm2[1] -; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm2[0,1,2,3],xmm0[4,5,6,7] -; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm5[0,2,2,3] +; AVX1-NEXT: vpunpckldq {{.*#+}} xmm2 = xmm2[0],xmm7[0],xmm2[1],xmm7[1] +; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm6[4,5,6,7] +; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm0, %ymm4 +; AVX1-NEXT: vblendps {{.*#+}} ymm2 = ymm2[0,1,2,3],ymm4[4,5,6,7] +; AVX1-NEXT: vmovups %ymm2, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill +; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm12[0,2,2,3] ; AVX1-NEXT: vpshuflw {{.*#+}} xmm2 = xmm2[0,1,1,3,4,5,6,7] -; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm15[0,2,2,3] -; AVX1-NEXT: vpshuflw {{.*#+}} xmm3 = xmm3[0,1,1,3,4,5,6,7] -; AVX1-NEXT: vpunpckldq {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[1],xmm2[1] -; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm12[0,2,2,3] -; AVX1-NEXT: vpshuflw {{.*#+}} xmm3 = xmm3[1,3,2,3,4,5,6,7] -; AVX1-NEXT: vpshufd {{.*#+}} xmm4 = xmm6[0,2,2,3] +; AVX1-NEXT: vmovdqa %xmm14, %xmm12 +; AVX1-NEXT: vpshufd {{.*#+}} xmm4 = xmm14[0,2,2,3] +; AVX1-NEXT: vpshuflw {{.*#+}} xmm4 = xmm4[0,1,1,3,4,5,6,7] +; AVX1-NEXT: vpunpckldq {{.*#+}} xmm2 = xmm4[0],xmm2[0],xmm4[1],xmm2[1] +; AVX1-NEXT: vmovdqa %xmm15, %xmm14 +; AVX1-NEXT: vpshufd {{.*#+}} xmm4 = xmm15[0,2,2,3] ; AVX1-NEXT: vpshuflw {{.*#+}} xmm4 = xmm4[1,3,2,3,4,5,6,7] -; AVX1-NEXT: vpunpckldq {{.*#+}} xmm3 = xmm4[0],xmm3[0],xmm4[1],xmm3[1] -; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm3[0,1,2,3],xmm2[4,5,6,7] -; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 -; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm2[0,1,2,3],ymm0[4,5,6,7] -; AVX1-NEXT: vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill -; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm14[3,1,2,3] -; AVX1-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill -; AVX1-NEXT: vpshufd $231, {{[-0-9]+}}(%r{{[sb]}}p), %xmm2 # 16-byte Folded Reload -; AVX1-NEXT: # xmm2 = mem[3,1,2,3] +; AVX1-NEXT: vmovdqa (%rsp), %xmm15 # 16-byte Reload +; AVX1-NEXT: vpshufd {{.*#+}} xmm6 = xmm15[0,2,2,3] +; AVX1-NEXT: vpshuflw {{.*#+}} xmm6 = xmm6[1,3,2,3,4,5,6,7] +; AVX1-NEXT: vpunpckldq {{.*#+}} xmm4 = xmm6[0],xmm4[0],xmm6[1],xmm4[1] +; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm4[0,1,2,3],xmm2[4,5,6,7] +; AVX1-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm13 # 16-byte Reload +; AVX1-NEXT: vpshufd {{.*#+}} xmm4 = xmm13[0,2,2,3] +; AVX1-NEXT: vpshuflw {{.*#+}} xmm4 = xmm4[0,1,1,3,4,5,6,7] +; AVX1-NEXT: vpshufd $232, {{[-0-9]+}}(%r{{[sb]}}p), %xmm6 # 16-byte Folded Reload +; AVX1-NEXT: # xmm6 = mem[0,2,2,3] +; AVX1-NEXT: vpshuflw {{.*#+}} xmm6 = xmm6[0,1,1,3,4,5,6,7] +; AVX1-NEXT: vpunpckldq {{.*#+}} xmm4 = xmm6[0],xmm4[0],xmm6[1],xmm4[1] +; AVX1-NEXT: vpshufd $232, {{[-0-9]+}}(%r{{[sb]}}p), %xmm6 # 16-byte Folded Reload +; AVX1-NEXT: # xmm6 = mem[0,2,2,3] +; AVX1-NEXT: vpshuflw {{.*#+}} xmm6 = xmm6[1,3,2,3,4,5,6,7] +; AVX1-NEXT: vpshufd $232, {{[-0-9]+}}(%r{{[sb]}}p), %xmm7 # 16-byte Folded Reload +; AVX1-NEXT: # xmm7 = mem[0,2,2,3] +; AVX1-NEXT: vpshuflw {{.*#+}} xmm7 = xmm7[1,3,2,3,4,5,6,7] +; AVX1-NEXT: vpunpckldq {{.*#+}} xmm6 = xmm7[0],xmm6[0],xmm7[1],xmm6[1] +; AVX1-NEXT: vpblendw {{.*#+}} xmm4 = xmm6[0,1,2,3],xmm4[4,5,6,7] +; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm2 +; AVX1-NEXT: vblendps {{.*#+}} ymm2 = ymm4[0,1,2,3],ymm2[4,5,6,7] +; AVX1-NEXT: vmovups %ymm2, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill +; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm8[3,1,2,3] ; AVX1-NEXT: vmovdqa %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill -; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,1,2,0,4,5,6,7] +; AVX1-NEXT: vpshufd {{.*#+}} xmm5 = xmm5[3,1,2,3] +; AVX1-NEXT: vmovdqa %xmm5, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; AVX1-NEXT: vpshuflw {{.*#+}} xmm2 = xmm2[0,1,2,0,4,5,6,7] +; AVX1-NEXT: vpshuflw {{.*#+}} xmm4 = xmm5[0,1,2,0,4,5,6,7] +; AVX1-NEXT: vpunpckldq {{.*#+}} xmm2 = xmm4[0],xmm2[0],xmm4[1],xmm2[1] +; AVX1-NEXT: vpshufd {{.*#+}} xmm4 = xmm9[3,1,2,3] +; AVX1-NEXT: vmovdqa %xmm4, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX1-NEXT: vpshufd {{.*#+}} xmm5 = xmm3[3,1,2,3] +; AVX1-NEXT: vmovdqa %xmm5, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX1-NEXT: vpshuflw {{.*#+}} xmm3 = xmm4[2,0,2,3,4,5,6,7] +; AVX1-NEXT: vpshuflw {{.*#+}} xmm4 = xmm5[2,0,2,3,4,5,6,7] +; AVX1-NEXT: vpunpckldq {{.*#+}} xmm3 = xmm4[0],xmm3[0],xmm4[1],xmm3[1] +; AVX1-NEXT: vpblendw {{.*#+}} xmm9 = xmm3[0,1,2,3],xmm2[4,5,6,7] +; AVX1-NEXT: vpshufd {{.*#+}} xmm8 = xmm0[3,1,2,3] +; AVX1-NEXT: vpshufd {{.*#+}} xmm6 = xmm10[3,1,2,3] +; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm8[0,1,2,0,4,5,6,7] +; AVX1-NEXT: vpshuflw {{.*#+}} xmm2 = xmm6[0,1,2,0,4,5,6,7] ; AVX1-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm2[0],xmm0[0],xmm2[1],xmm0[1] -; AVX1-NEXT: vpshufd $231, {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload -; AVX1-NEXT: # xmm1 = mem[3,1,2,3] -; AVX1-NEXT: vmovdqa %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill -; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm9[3,1,2,3] -; AVX1-NEXT: vmovdqa %xmm3, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill -; AVX1-NEXT: vpshuflw {{.*#+}} xmm2 = xmm1[2,0,2,3,4,5,6,7] -; AVX1-NEXT: vpshuflw {{.*#+}} xmm3 = xmm3[2,0,2,3,4,5,6,7] -; AVX1-NEXT: vpunpckldq {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[1],xmm2[1] -; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm2[0,1,2,3],xmm0[4,5,6,7] -; AVX1-NEXT: vpshufd {{.*#+}} xmm7 = xmm7[3,1,2,3] -; AVX1-NEXT: vpshufd $231, {{[-0-9]+}}(%r{{[sb]}}p), %xmm9 # 16-byte Folded Reload -; AVX1-NEXT: # xmm9 = mem[3,1,2,3] -; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm7[0,1,2,0,4,5,6,7] -; AVX1-NEXT: vpshuflw {{.*#+}} xmm2 = xmm9[0,1,2,0,4,5,6,7] -; AVX1-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] -; AVX1-NEXT: vpshufd {{.*#+}} xmm12 = xmm10[3,1,2,3] -; AVX1-NEXT: vpshufd {{.*#+}} xmm8 = xmm8[3,1,2,3] -; AVX1-NEXT: vpshuflw {{.*#+}} xmm2 = xmm12[2,0,2,3,4,5,6,7] -; AVX1-NEXT: vpshuflw {{.*#+}} xmm3 = xmm8[2,0,2,3,4,5,6,7] -; AVX1-NEXT: vpunpckldq {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[1],xmm2[1] -; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm2[0,1,2,3],xmm1[4,5,6,7] -; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 -; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7] +; AVX1-NEXT: vpshufd {{.*#+}} xmm5 = xmm1[3,1,2,3] +; AVX1-NEXT: vpshufd {{.*#+}} xmm4 = xmm11[3,1,2,3] +; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm5[2,0,2,3,4,5,6,7] +; AVX1-NEXT: vpshuflw {{.*#+}} xmm7 = xmm4[2,0,2,3,4,5,6,7] +; AVX1-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm7[0],xmm1[0],xmm7[1],xmm1[1] +; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7] +; AVX1-NEXT: vinsertf128 $1, %xmm9, %ymm0, %ymm1 +; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] ; AVX1-NEXT: vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill ; AVX1-NEXT: vpshufd $231, {{[-0-9]+}}(%r{{[sb]}}p), %xmm10 # 16-byte Folded Reload ; AVX1-NEXT: # xmm10 = mem[3,1,2,3] -; AVX1-NEXT: vpshufd {{.*#+}} xmm6 = xmm11[3,1,2,3] +; AVX1-NEXT: vpshufd {{.*#+}} xmm11 = xmm12[3,1,2,3] ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm10[0,1,2,0,4,5,6,7] -; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm6[0,1,2,0,4,5,6,7] +; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm11[0,1,2,0,4,5,6,7] ; AVX1-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] -; AVX1-NEXT: vpshufd $231, {{[-0-9]+}}(%r{{[sb]}}p), %xmm5 # 16-byte Folded Reload -; AVX1-NEXT: # xmm5 = mem[3,1,2,3] -; AVX1-NEXT: vpshufd {{.*#+}} xmm13 = xmm13[3,1,2,3] -; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm5[2,0,2,3,4,5,6,7] -; AVX1-NEXT: vpshuflw {{.*#+}} xmm14 = xmm13[2,0,2,3,4,5,6,7] -; AVX1-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm14[0],xmm1[0],xmm14[1],xmm1[1] -; AVX1-NEXT: vpblendw {{.*#+}} xmm3 = xmm1[0,1,2,3],xmm0[4,5,6,7] -; AVX1-NEXT: vpshufd $231, {{[-0-9]+}}(%r{{[sb]}}p), %xmm14 # 16-byte Folded Reload -; AVX1-NEXT: # xmm14 = mem[3,1,2,3] -; AVX1-NEXT: vpshufd {{.*#+}} xmm15 = xmm15[3,1,2,3] -; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm14[0,1,2,0,4,5,6,7] -; AVX1-NEXT: vpshuflw {{.*#+}} xmm2 = xmm15[0,1,2,0,4,5,6,7] -; AVX1-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] -; AVX1-NEXT: vpshufd $231, {{[-0-9]+}}(%r{{[sb]}}p), %xmm4 # 16-byte Folded Reload -; AVX1-NEXT: # xmm4 = mem[3,1,2,3] -; AVX1-NEXT: vpshufd $231, (%rsp), %xmm2 # 16-byte Folded Reload +; AVX1-NEXT: vpshufd {{.*#+}} xmm12 = xmm14[3,1,2,3] +; AVX1-NEXT: vpshufd {{.*#+}} xmm9 = xmm15[3,1,2,3] +; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm12[2,0,2,3,4,5,6,7] +; AVX1-NEXT: vpshuflw {{.*#+}} xmm7 = xmm9[2,0,2,3,4,5,6,7] +; AVX1-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm7[0],xmm1[0],xmm7[1],xmm1[1] +; AVX1-NEXT: vpblendw {{.*#+}} xmm7 = xmm1[0,1,2,3],xmm0[4,5,6,7] +; AVX1-NEXT: vpshufd {{.*#+}} xmm14 = xmm13[3,1,2,3] +; AVX1-NEXT: vpshufd $231, {{[-0-9]+}}(%r{{[sb]}}p), %xmm15 # 16-byte Folded Reload +; AVX1-NEXT: # xmm15 = mem[3,1,2,3] +; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm14[0,1,2,0,4,5,6,7] +; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm15[0,1,2,0,4,5,6,7] +; AVX1-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] +; AVX1-NEXT: vpshufd $231, {{[-0-9]+}}(%r{{[sb]}}p), %xmm3 # 16-byte Folded Reload +; AVX1-NEXT: # xmm3 = mem[3,1,2,3] +; AVX1-NEXT: vpshufd $231, {{[-0-9]+}}(%r{{[sb]}}p), %xmm2 # 16-byte Folded Reload ; AVX1-NEXT: # xmm2 = mem[3,1,2,3] -; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm4[2,0,2,3,4,5,6,7] -; AVX1-NEXT: vpshuflw {{.*#+}} xmm11 = xmm2[2,0,2,3,4,5,6,7] -; AVX1-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm11[0],xmm0[0],xmm11[1],xmm0[1] +; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm3[2,0,2,3,4,5,6,7] +; AVX1-NEXT: vpshuflw {{.*#+}} xmm13 = xmm2[2,0,2,3,4,5,6,7] +; AVX1-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm13[0],xmm0[0],xmm13[1],xmm0[1] ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7] -; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm1 -; AVX1-NEXT: vblendps {{.*#+}} ymm11 = ymm0[0,1,2,3],ymm1[4,5,6,7] +; AVX1-NEXT: vinsertf128 $1, %xmm7, %ymm0, %ymm1 +; AVX1-NEXT: vblendps {{.*#+}} ymm13 = ymm0[0,1,2,3],ymm1[4,5,6,7] ; AVX1-NEXT: vpshuflw $116, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload ; AVX1-NEXT: # xmm0 = mem[0,1,3,1,4,5,6,7] ; AVX1-NEXT: vpshuflw $116, {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload @@ -1390,241 +1390,237 @@ define void @vf32(<128 x i16>* %in.vec, <32 x i16>* %out.vec0, <32 x i16>* %out. ; AVX1-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] ; AVX1-NEXT: vpshuflw $231, {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload ; AVX1-NEXT: # xmm1 = mem[3,1,2,3,4,5,6,7] -; AVX1-NEXT: vpshuflw $231, {{[-0-9]+}}(%r{{[sb]}}p), %xmm3 # 16-byte Folded Reload -; AVX1-NEXT: # xmm3 = mem[3,1,2,3,4,5,6,7] -; AVX1-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm3[0],xmm1[0],xmm3[1],xmm1[1] +; AVX1-NEXT: vpshuflw $231, {{[-0-9]+}}(%r{{[sb]}}p), %xmm7 # 16-byte Folded Reload +; AVX1-NEXT: # xmm7 = mem[3,1,2,3,4,5,6,7] +; AVX1-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm7[0],xmm1[0],xmm7[1],xmm1[1] ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7] -; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm7[0,1,3,1,4,5,6,7] -; AVX1-NEXT: vpshuflw {{.*#+}} xmm3 = xmm9[0,1,3,1,4,5,6,7] -; AVX1-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm3[0],xmm1[0],xmm3[1],xmm1[1] -; AVX1-NEXT: vpshuflw {{.*#+}} xmm3 = xmm12[3,1,2,3,4,5,6,7] -; AVX1-NEXT: vpshuflw {{.*#+}} xmm7 = xmm8[3,1,2,3,4,5,6,7] -; AVX1-NEXT: vpunpckldq {{.*#+}} xmm3 = xmm7[0],xmm3[0],xmm7[1],xmm3[1] -; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm3[0,1,2,3],xmm1[4,5,6,7] +; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm8[0,1,3,1,4,5,6,7] +; AVX1-NEXT: vpshuflw {{.*#+}} xmm6 = xmm6[0,1,3,1,4,5,6,7] +; AVX1-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm6[0],xmm1[0],xmm6[1],xmm1[1] +; AVX1-NEXT: vpshuflw {{.*#+}} xmm5 = xmm5[3,1,2,3,4,5,6,7] +; AVX1-NEXT: vpshuflw {{.*#+}} xmm4 = xmm4[3,1,2,3,4,5,6,7] +; AVX1-NEXT: vpunpckldq {{.*#+}} xmm4 = xmm4[0],xmm5[0],xmm4[1],xmm5[1] +; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm4[0,1,2,3],xmm1[4,5,6,7] ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7] ; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm10[0,1,3,1,4,5,6,7] -; AVX1-NEXT: vpshuflw {{.*#+}} xmm3 = xmm6[0,1,3,1,4,5,6,7] -; AVX1-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm3[0],xmm1[0],xmm3[1],xmm1[1] -; AVX1-NEXT: vpshuflw {{.*#+}} xmm3 = xmm5[3,1,2,3,4,5,6,7] -; AVX1-NEXT: vpshuflw {{.*#+}} xmm5 = xmm13[3,1,2,3,4,5,6,7] -; AVX1-NEXT: vpunpckldq {{.*#+}} xmm3 = xmm5[0],xmm3[0],xmm5[1],xmm3[1] -; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm3[0,1,2,3],xmm1[4,5,6,7] -; AVX1-NEXT: vpshuflw {{.*#+}} xmm3 = xmm14[0,1,3,1,4,5,6,7] +; AVX1-NEXT: vpshuflw {{.*#+}} xmm4 = xmm11[0,1,3,1,4,5,6,7] +; AVX1-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm4[0],xmm1[0],xmm4[1],xmm1[1] +; AVX1-NEXT: vpshuflw {{.*#+}} xmm4 = xmm12[3,1,2,3,4,5,6,7] +; AVX1-NEXT: vpshuflw {{.*#+}} xmm5 = xmm9[3,1,2,3,4,5,6,7] +; AVX1-NEXT: vpunpckldq {{.*#+}} xmm4 = xmm5[0],xmm4[0],xmm5[1],xmm4[1] +; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm4[0,1,2,3],xmm1[4,5,6,7] +; AVX1-NEXT: vpshuflw {{.*#+}} xmm4 = xmm14[0,1,3,1,4,5,6,7] ; AVX1-NEXT: vpshuflw {{.*#+}} xmm5 = xmm15[0,1,3,1,4,5,6,7] -; AVX1-NEXT: vpunpckldq {{.*#+}} xmm3 = xmm5[0],xmm3[0],xmm5[1],xmm3[1] -; AVX1-NEXT: vpshuflw {{.*#+}} xmm4 = xmm4[3,1,2,3,4,5,6,7] +; AVX1-NEXT: vpunpckldq {{.*#+}} xmm4 = xmm5[0],xmm4[0],xmm5[1],xmm4[1] +; AVX1-NEXT: vpshuflw {{.*#+}} xmm3 = xmm3[3,1,2,3,4,5,6,7] ; AVX1-NEXT: vpshuflw {{.*#+}} xmm2 = xmm2[3,1,2,3,4,5,6,7] -; AVX1-NEXT: vpunpckldq {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1] -; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm3[4,5,6,7] +; AVX1-NEXT: vpunpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1] +; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm4[4,5,6,7] ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1 ; AVX1-NEXT: vblendps {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7] -; AVX1-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm2 # 32-byte Reload -; AVX1-NEXT: vmovaps %ymm2, 32(%rsi) -; AVX1-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm2 # 32-byte Reload -; AVX1-NEXT: vmovaps %ymm2, (%rsi) +; AVX1-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm2 # 16-byte Reload +; AVX1-NEXT: vmovaps %xmm2, (%rsi) +; AVX1-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm2 # 16-byte Reload +; AVX1-NEXT: vmovaps %xmm2, 16(%rsi) +; AVX1-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm2 # 16-byte Reload +; AVX1-NEXT: vmovaps %xmm2, 32(%rsi) +; AVX1-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm2 # 16-byte Reload +; AVX1-NEXT: vmovaps %xmm2, 48(%rsi) ; AVX1-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm2 # 32-byte Reload ; AVX1-NEXT: vmovaps %ymm2, 32(%rdx) ; AVX1-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm2 # 32-byte Reload ; AVX1-NEXT: vmovaps %ymm2, (%rdx) -; AVX1-NEXT: vmovaps %ymm11, 32(%rcx) +; AVX1-NEXT: vmovaps %ymm13, 32(%rcx) ; AVX1-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm2 # 32-byte Reload ; AVX1-NEXT: vmovaps %ymm2, (%rcx) ; AVX1-NEXT: vmovaps %ymm1, 32(%r8) ; AVX1-NEXT: vmovaps %ymm0, (%r8) -; AVX1-NEXT: addq $232, %rsp +; AVX1-NEXT: addq $200, %rsp ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq ; ; AVX2-SLOW-LABEL: vf32: ; AVX2-SLOW: # %bb.0: -; AVX2-SLOW-NEXT: subq $248, %rsp -; AVX2-SLOW-NEXT: vmovdqa (%rdi), %xmm5 -; AVX2-SLOW-NEXT: vmovdqa %xmm5, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill -; AVX2-SLOW-NEXT: vmovdqa 16(%rdi), %xmm9 -; AVX2-SLOW-NEXT: vmovdqa 32(%rdi), %xmm4 -; AVX2-SLOW-NEXT: vmovdqa %xmm4, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill -; AVX2-SLOW-NEXT: vmovdqa 48(%rdi), %xmm3 +; AVX2-SLOW-NEXT: subq $200, %rsp ; AVX2-SLOW-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; AVX2-SLOW-NEXT: vpblendw {{.*#+}} xmm2 = xmm3[0],xmm0[1,2,3],xmm3[4],xmm0[5,6,7] -; AVX2-SLOW-NEXT: vmovdqa %xmm3, %xmm14 -; AVX2-SLOW-NEXT: vmovdqa %xmm3, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill -; AVX2-SLOW-NEXT: vpblendw {{.*#+}} xmm3 = xmm4[0],xmm0[1,2,3],xmm4[4],xmm0[5,6,7] -; AVX2-SLOW-NEXT: vpackusdw %xmm2, %xmm3, %xmm2 -; AVX2-SLOW-NEXT: vpblendw {{.*#+}} xmm3 = xmm9[0],xmm0[1,2,3],xmm9[4],xmm0[5,6,7] -; AVX2-SLOW-NEXT: vmovdqa %xmm9, (%rsp) # 16-byte Spill -; AVX2-SLOW-NEXT: vpblendw {{.*#+}} xmm4 = xmm5[0],xmm0[1,2,3],xmm5[4],xmm0[5,6,7] -; AVX2-SLOW-NEXT: vpackusdw %xmm3, %xmm4, %xmm3 -; AVX2-SLOW-NEXT: vpackusdw %xmm2, %xmm3, %xmm2 -; AVX2-SLOW-NEXT: vmovdqa 112(%rdi), %xmm13 -; AVX2-SLOW-NEXT: vpblendw {{.*#+}} xmm3 = xmm13[0],xmm0[1,2,3],xmm13[4],xmm0[5,6,7] -; AVX2-SLOW-NEXT: vmovdqa 96(%rdi), %xmm12 -; AVX2-SLOW-NEXT: vpblendw {{.*#+}} xmm4 = xmm12[0],xmm0[1,2,3],xmm12[4],xmm0[5,6,7] -; AVX2-SLOW-NEXT: vpackusdw %xmm3, %xmm4, %xmm3 +; AVX2-SLOW-NEXT: vpblendw {{.*#+}} ymm1 = mem[0],ymm0[1,2,3],mem[4],ymm0[5,6,7],mem[8],ymm0[9,10,11],mem[12],ymm0[13,14,15] +; AVX2-SLOW-NEXT: vextracti128 $1, %ymm1, %xmm2 +; AVX2-SLOW-NEXT: vpackusdw %xmm2, %xmm1, %xmm1 +; AVX2-SLOW-NEXT: vpblendw {{.*#+}} ymm2 = mem[0],ymm0[1,2,3],mem[4],ymm0[5,6,7],mem[8],ymm0[9,10,11],mem[12],ymm0[13,14,15] +; AVX2-SLOW-NEXT: vextracti128 $1, %ymm2, %xmm3 +; AVX2-SLOW-NEXT: vpackusdw %xmm3, %xmm2, %xmm2 +; AVX2-SLOW-NEXT: vpackusdw %xmm1, %xmm2, %xmm1 +; AVX2-SLOW-NEXT: vpblendw {{.*#+}} ymm2 = mem[0],ymm0[1,2,3],mem[4],ymm0[5,6,7],mem[8],ymm0[9,10,11],mem[12],ymm0[13,14,15] +; AVX2-SLOW-NEXT: vextracti128 $1, %ymm2, %xmm3 +; AVX2-SLOW-NEXT: vpackusdw %xmm3, %xmm2, %xmm2 +; AVX2-SLOW-NEXT: vpackusdw %xmm2, %xmm2, %xmm2 +; AVX2-SLOW-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2 +; AVX2-SLOW-NEXT: vpblendw {{.*#+}} ymm3 = mem[0],ymm0[1,2,3],mem[4],ymm0[5,6,7],mem[8],ymm0[9,10,11],mem[12],ymm0[13,14,15] +; AVX2-SLOW-NEXT: vextracti128 $1, %ymm3, %xmm4 +; AVX2-SLOW-NEXT: vpackusdw %xmm4, %xmm3, %xmm3 ; AVX2-SLOW-NEXT: vpackusdw %xmm3, %xmm3, %xmm3 ; AVX2-SLOW-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm3 -; AVX2-SLOW-NEXT: vmovdqa 80(%rdi), %xmm6 -; AVX2-SLOW-NEXT: vpblendw {{.*#+}} xmm7 = xmm6[0],xmm0[1,2,3],xmm6[4],xmm0[5,6,7] -; AVX2-SLOW-NEXT: vmovdqa 64(%rdi), %xmm8 -; AVX2-SLOW-NEXT: vpblendw {{.*#+}} xmm1 = xmm8[0],xmm0[1,2,3],xmm8[4],xmm0[5,6,7] -; AVX2-SLOW-NEXT: vpackusdw %xmm7, %xmm1, %xmm1 -; AVX2-SLOW-NEXT: vpackusdw %xmm1, %xmm1, %xmm1 -; AVX2-SLOW-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1 -; AVX2-SLOW-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3,4,5],ymm3[6,7] -; AVX2-SLOW-NEXT: vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7] +; AVX2-SLOW-NEXT: vpblendd {{.*#+}} ymm2 = ymm3[0,1,2,3,4,5],ymm2[6,7] +; AVX2-SLOW-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm2[4,5,6,7] ; AVX2-SLOW-NEXT: vmovdqu %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill -; AVX2-SLOW-NEXT: vmovdqa 176(%rdi), %xmm1 -; AVX2-SLOW-NEXT: vmovdqa %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill -; AVX2-SLOW-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm0[1,2,3],xmm1[4],xmm0[5,6,7] -; AVX2-SLOW-NEXT: vmovdqa 160(%rdi), %xmm2 -; AVX2-SLOW-NEXT: vmovdqa %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill -; AVX2-SLOW-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0],xmm0[1,2,3],xmm2[4],xmm0[5,6,7] +; AVX2-SLOW-NEXT: vpblendw {{.*#+}} ymm1 = mem[0],ymm0[1,2,3],mem[4],ymm0[5,6,7],mem[8],ymm0[9,10,11],mem[12],ymm0[13,14,15] +; AVX2-SLOW-NEXT: vextracti128 $1, %ymm1, %xmm2 +; AVX2-SLOW-NEXT: vpackusdw %xmm2, %xmm1, %xmm1 +; AVX2-SLOW-NEXT: vpblendw {{.*#+}} ymm2 = mem[0],ymm0[1,2,3],mem[4],ymm0[5,6,7],mem[8],ymm0[9,10,11],mem[12],ymm0[13,14,15] +; AVX2-SLOW-NEXT: vextracti128 $1, %ymm2, %xmm3 +; AVX2-SLOW-NEXT: vpackusdw %xmm3, %xmm2, %xmm2 ; AVX2-SLOW-NEXT: vpackusdw %xmm1, %xmm2, %xmm1 -; AVX2-SLOW-NEXT: vmovdqa 144(%rdi), %xmm3 -; AVX2-SLOW-NEXT: vpblendw {{.*#+}} xmm2 = xmm3[0],xmm0[1,2,3],xmm3[4],xmm0[5,6,7] -; AVX2-SLOW-NEXT: vmovdqa %xmm3, %xmm15 -; AVX2-SLOW-NEXT: vmovdqa %xmm3, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill -; AVX2-SLOW-NEXT: vmovdqa 128(%rdi), %xmm3 -; AVX2-SLOW-NEXT: vmovdqa %xmm3, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill -; AVX2-SLOW-NEXT: vpblendw {{.*#+}} xmm3 = xmm3[0],xmm0[1,2,3],xmm3[4],xmm0[5,6,7] -; AVX2-SLOW-NEXT: vpackusdw %xmm2, %xmm3, %xmm2 -; AVX2-SLOW-NEXT: vpackusdw %xmm1, %xmm2, %xmm4 -; AVX2-SLOW-NEXT: vmovdqa 240(%rdi), %xmm10 -; AVX2-SLOW-NEXT: vpblendw {{.*#+}} xmm2 = xmm10[0],xmm0[1,2,3],xmm10[4],xmm0[5,6,7] -; AVX2-SLOW-NEXT: vmovdqa 224(%rdi), %xmm11 -; AVX2-SLOW-NEXT: vpblendw {{.*#+}} xmm3 = xmm11[0],xmm0[1,2,3],xmm11[4],xmm0[5,6,7] -; AVX2-SLOW-NEXT: vmovdqa %xmm11, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill -; AVX2-SLOW-NEXT: vpackusdw %xmm2, %xmm3, %xmm7 -; AVX2-SLOW-NEXT: vmovdqa 208(%rdi), %xmm3 -; AVX2-SLOW-NEXT: vpblendw {{.*#+}} xmm1 = xmm3[0],xmm0[1,2,3],xmm3[4],xmm0[5,6,7] -; AVX2-SLOW-NEXT: vmovdqa 192(%rdi), %xmm2 -; AVX2-SLOW-NEXT: vpblendw {{.*#+}} xmm0 = xmm2[0],xmm0[1,2,3],xmm2[4],xmm0[5,6,7] -; AVX2-SLOW-NEXT: vmovdqa %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill -; AVX2-SLOW-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 -; AVX2-SLOW-NEXT: vpackusdw %xmm7, %xmm7, %xmm1 -; AVX2-SLOW-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1 +; AVX2-SLOW-NEXT: vpblendw {{.*#+}} ymm2 = mem[0],ymm0[1,2,3],mem[4],ymm0[5,6,7],mem[8],ymm0[9,10,11],mem[12],ymm0[13,14,15] +; AVX2-SLOW-NEXT: vextracti128 $1, %ymm2, %xmm3 +; AVX2-SLOW-NEXT: vpackusdw %xmm3, %xmm2, %xmm2 +; AVX2-SLOW-NEXT: vpackusdw %xmm2, %xmm2, %xmm2 +; AVX2-SLOW-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2 +; AVX2-SLOW-NEXT: vpblendw {{.*#+}} ymm0 = mem[0],ymm0[1,2,3],mem[4],ymm0[5,6,7],mem[8],ymm0[9,10,11],mem[12],ymm0[13,14,15] +; AVX2-SLOW-NEXT: vextracti128 $1, %ymm0, %xmm3 +; AVX2-SLOW-NEXT: vpackusdw %xmm3, %xmm0, %xmm0 ; AVX2-SLOW-NEXT: vpackusdw %xmm0, %xmm0, %xmm0 ; AVX2-SLOW-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 -; AVX2-SLOW-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5],ymm1[6,7] -; AVX2-SLOW-NEXT: vpblendd {{.*#+}} ymm0 = ymm4[0,1,2,3],ymm0[4,5,6,7] +; AVX2-SLOW-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5],ymm2[6,7] +; AVX2-SLOW-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7] ; AVX2-SLOW-NEXT: vmovdqu %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill -; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm13[0,2,2,3] +; AVX2-SLOW-NEXT: vmovdqa (%rdi), %xmm12 +; AVX2-SLOW-NEXT: vmovdqa 16(%rdi), %xmm7 +; AVX2-SLOW-NEXT: vmovdqa 32(%rdi), %xmm3 +; AVX2-SLOW-NEXT: vmovdqa %xmm3, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX2-SLOW-NEXT: vmovdqa 48(%rdi), %xmm15 +; AVX2-SLOW-NEXT: vmovdqa 112(%rdi), %xmm11 +; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm11[0,2,2,3] ; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,1,1,3,4,5,6,7] -; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm1 = xmm12[0,2,2,3] +; AVX2-SLOW-NEXT: vmovdqa 96(%rdi), %xmm10 +; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm1 = xmm10[0,2,2,3] ; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,1,1,3,4,5,6,7] ; AVX2-SLOW-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] -; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm1 = xmm6[0,2,2,3] -; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[1,3,2,3,4,5,6,7] -; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm4 = xmm8[0,2,2,3] -; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm4 = xmm4[1,3,2,3,4,5,6,7] -; AVX2-SLOW-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm4[0],xmm1[0],xmm4[1],xmm1[1] ; AVX2-SLOW-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 +; AVX2-SLOW-NEXT: vmovdqa 80(%rdi), %xmm9 +; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm1 = xmm9[0,2,2,3] +; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[1,3,2,3,4,5,6,7] +; AVX2-SLOW-NEXT: vmovdqa 64(%rdi), %xmm8 +; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm2 = xmm8[0,2,2,3] +; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm2 = xmm2[1,3,2,3,4,5,6,7] +; AVX2-SLOW-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] ; AVX2-SLOW-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1 ; AVX2-SLOW-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5],ymm0[6,7] -; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm1 = xmm14[0,2,2,3] +; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm1 = xmm15[0,2,2,3] ; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,1,1,3,4,5,6,7] -; AVX2-SLOW-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm5 # 16-byte Reload -; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm4 = xmm5[0,2,2,3] -; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm4 = xmm4[0,1,1,3,4,5,6,7] -; AVX2-SLOW-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm4[0],xmm1[0],xmm4[1],xmm1[1] -; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm4 = xmm9[0,2,2,3] -; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm4 = xmm4[1,3,2,3,4,5,6,7] -; AVX2-SLOW-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm9 # 16-byte Reload -; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm7 = xmm9[0,2,2,3] -; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm7 = xmm7[1,3,2,3,4,5,6,7] -; AVX2-SLOW-NEXT: vpunpckldq {{.*#+}} xmm4 = xmm7[0],xmm4[0],xmm7[1],xmm4[1] -; AVX2-SLOW-NEXT: vpblendd {{.*#+}} xmm1 = xmm4[0,1],xmm1[2,3] +; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm2 = xmm3[0,2,2,3] +; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm2 = xmm2[0,1,1,3,4,5,6,7] +; AVX2-SLOW-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] +; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm2 = xmm7[0,2,2,3] +; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm2 = xmm2[1,3,2,3,4,5,6,7] +; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm3 = xmm12[0,2,2,3] +; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm3 = xmm3[1,3,2,3,4,5,6,7] +; AVX2-SLOW-NEXT: vpunpckldq {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[1],xmm2[1] +; AVX2-SLOW-NEXT: vpblendd {{.*#+}} xmm1 = xmm2[0,1],xmm1[2,3] ; AVX2-SLOW-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7] ; AVX2-SLOW-NEXT: vmovdqu %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill -; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm10[0,2,2,3] +; AVX2-SLOW-NEXT: vmovdqa 240(%rdi), %xmm0 +; AVX2-SLOW-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] ; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,1,1,3,4,5,6,7] -; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm1 = xmm11[0,2,2,3] +; AVX2-SLOW-NEXT: vmovdqa 224(%rdi), %xmm1 +; AVX2-SLOW-NEXT: vmovdqa %xmm1, (%rsp) # 16-byte Spill +; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] ; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,1,1,3,4,5,6,7] ; AVX2-SLOW-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] -; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm1 = xmm3[0,2,2,3] +; AVX2-SLOW-NEXT: vmovdqa 208(%rdi), %xmm6 +; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm1 = xmm6[0,2,2,3] ; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[1,3,2,3,4,5,6,7] -; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm4 = xmm2[0,2,2,3] -; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm4 = xmm4[1,3,2,3,4,5,6,7] -; AVX2-SLOW-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm4[0],xmm1[0],xmm4[1],xmm1[1] +; AVX2-SLOW-NEXT: vmovdqa 192(%rdi), %xmm3 +; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm2 = xmm3[0,2,2,3] +; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm2 = xmm2[1,3,2,3,4,5,6,7] +; AVX2-SLOW-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] ; AVX2-SLOW-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 ; AVX2-SLOW-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1 ; AVX2-SLOW-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5],ymm0[6,7] -; AVX2-SLOW-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm11 # 16-byte Reload -; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm1 = xmm11[0,2,2,3] +; AVX2-SLOW-NEXT: vmovdqa 176(%rdi), %xmm1 +; AVX2-SLOW-NEXT: vmovdqa %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] ; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,1,1,3,4,5,6,7] -; AVX2-SLOW-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm14 # 16-byte Reload -; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm4 = xmm14[0,2,2,3] +; AVX2-SLOW-NEXT: vmovdqa 160(%rdi), %xmm2 +; AVX2-SLOW-NEXT: vmovdqa %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm4 = xmm2[0,2,2,3] ; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm4 = xmm4[0,1,1,3,4,5,6,7] -; AVX2-SLOW-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm4[0],xmm1[0],xmm4[1],xmm1[1] -; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm4 = xmm15[0,2,2,3] -; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm4 = xmm4[1,3,2,3,4,5,6,7] -; AVX2-SLOW-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm15 # 16-byte Reload -; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm7 = xmm15[0,2,2,3] -; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm7 = xmm7[1,3,2,3,4,5,6,7] -; AVX2-SLOW-NEXT: vpunpckldq {{.*#+}} xmm4 = xmm7[0],xmm4[0],xmm7[1],xmm4[1] -; AVX2-SLOW-NEXT: vpblendd {{.*#+}} xmm1 = xmm4[0,1],xmm1[2,3] -; AVX2-SLOW-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7] +; AVX2-SLOW-NEXT: vpunpckldq {{.*#+}} xmm4 = xmm4[0],xmm1[0],xmm4[1],xmm1[1] +; AVX2-SLOW-NEXT: vmovdqa 144(%rdi), %xmm1 +; AVX2-SLOW-NEXT: vmovdqa %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm13 = xmm1[0,2,2,3] +; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm13 = xmm13[1,3,2,3,4,5,6,7] +; AVX2-SLOW-NEXT: vmovdqa 128(%rdi), %xmm1 +; AVX2-SLOW-NEXT: vmovdqa %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm14 = xmm1[0,2,2,3] +; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm5 = xmm14[1,3,2,3,4,5,6,7] +; AVX2-SLOW-NEXT: vpunpckldq {{.*#+}} xmm5 = xmm5[0],xmm13[0],xmm5[1],xmm13[1] +; AVX2-SLOW-NEXT: vpblendd {{.*#+}} xmm4 = xmm5[0,1],xmm4[2,3] +; AVX2-SLOW-NEXT: vpblendd {{.*#+}} ymm0 = ymm4[0,1,2,3],ymm0[4,5,6,7] ; AVX2-SLOW-NEXT: vmovdqu %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill -; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm13[3,1,2,3] +; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm11[3,1,2,3] ; AVX2-SLOW-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill -; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm2 = xmm12[3,1,2,3] +; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm2 = xmm10[3,1,2,3] ; AVX2-SLOW-NEXT: vmovdqa %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,1,2,0,4,5,6,7] -; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm1 = xmm2[0,1,2,0,4,5,6,7] -; AVX2-SLOW-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] -; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm1 = xmm6[3,1,2,3] +; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm4 = xmm2[0,1,2,0,4,5,6,7] +; AVX2-SLOW-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm4[0],xmm0[0],xmm4[1],xmm0[1] +; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm1 = xmm9[3,1,2,3] ; AVX2-SLOW-NEXT: vmovdqa %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill -; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm4 = xmm8[3,1,2,3] -; AVX2-SLOW-NEXT: vmovdqa %xmm4, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill -; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[2,0,2,3,4,5,6,7] -; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm4 = xmm4[2,0,2,3,4,5,6,7] -; AVX2-SLOW-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm4[0],xmm1[0],xmm4[1],xmm1[1] +; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm2 = xmm8[3,1,2,3] +; AVX2-SLOW-NEXT: vmovdqa %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm4 = xmm1[2,0,2,3,4,5,6,7] +; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm5 = xmm2[2,0,2,3,4,5,6,7] +; AVX2-SLOW-NEXT: vpunpckldq {{.*#+}} xmm4 = xmm5[0],xmm4[0],xmm5[1],xmm4[1] ; AVX2-SLOW-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 -; AVX2-SLOW-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1 -; AVX2-SLOW-NEXT: vpblendd {{.*#+}} ymm2 = ymm1[0,1,2,3,4,5],ymm0[6,7] -; AVX2-SLOW-NEXT: vpshufd $231, {{[-0-9]+}}(%r{{[sb]}}p), %xmm8 # 16-byte Folded Reload -; AVX2-SLOW-NEXT: # xmm8 = mem[3,1,2,3] -; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm13 = xmm5[3,1,2,3] -; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm1 = xmm8[0,1,2,0,4,5,6,7] -; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm4 = xmm13[0,1,2,0,4,5,6,7] -; AVX2-SLOW-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm4[0],xmm1[0],xmm4[1],xmm1[1] -; AVX2-SLOW-NEXT: vpshufd $231, (%rsp), %xmm6 # 16-byte Folded Reload -; AVX2-SLOW-NEXT: # xmm6 = mem[3,1,2,3] -; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm5 = xmm9[3,1,2,3] -; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm6[2,0,2,3,4,5,6,7] -; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm12 = xmm5[2,0,2,3,4,5,6,7] +; AVX2-SLOW-NEXT: vinserti128 $1, %xmm4, %ymm0, %ymm4 +; AVX2-SLOW-NEXT: vpblendd {{.*#+}} ymm1 = ymm4[0,1,2,3,4,5],ymm0[6,7] +; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm15 = xmm15[3,1,2,3] +; AVX2-SLOW-NEXT: vpshufd $231, {{[-0-9]+}}(%r{{[sb]}}p), %xmm14 # 16-byte Folded Reload +; AVX2-SLOW-NEXT: # xmm14 = mem[3,1,2,3] +; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm4 = xmm15[0,1,2,0,4,5,6,7] +; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm5 = xmm14[0,1,2,0,4,5,6,7] +; AVX2-SLOW-NEXT: vpunpckldq {{.*#+}} xmm5 = xmm5[0],xmm4[0],xmm5[1],xmm4[1] +; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm13 = xmm7[3,1,2,3] +; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm7 = xmm12[3,1,2,3] +; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm13[2,0,2,3,4,5,6,7] +; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm12 = xmm7[2,0,2,3,4,5,6,7] ; AVX2-SLOW-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm12[0],xmm0[0],xmm12[1],xmm0[1] -; AVX2-SLOW-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] -; AVX2-SLOW-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm2[4,5,6,7] +; AVX2-SLOW-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm5[2,3] +; AVX2-SLOW-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] ; AVX2-SLOW-NEXT: vmovdqu %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill -; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm10 = xmm10[3,1,2,3] -; AVX2-SLOW-NEXT: vpshufd $231, {{[-0-9]+}}(%r{{[sb]}}p), %xmm9 # 16-byte Folded Reload -; AVX2-SLOW-NEXT: # xmm9 = mem[3,1,2,3] -; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm10[0,1,2,0,4,5,6,7] -; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm1 = xmm9[0,1,2,0,4,5,6,7] +; AVX2-SLOW-NEXT: vpshufd $231, {{[-0-9]+}}(%r{{[sb]}}p), %xmm12 # 16-byte Folded Reload +; AVX2-SLOW-NEXT: # xmm12 = mem[3,1,2,3] +; AVX2-SLOW-NEXT: vpshufd $231, (%rsp), %xmm10 # 16-byte Folded Reload +; AVX2-SLOW-NEXT: # xmm10 = mem[3,1,2,3] +; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm12[0,1,2,0,4,5,6,7] +; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm1 = xmm10[0,1,2,0,4,5,6,7] ; AVX2-SLOW-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] -; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm4 = xmm3[3,1,2,3] -; AVX2-SLOW-NEXT: vpshufd $231, {{[-0-9]+}}(%r{{[sb]}}p), %xmm3 # 16-byte Folded Reload -; AVX2-SLOW-NEXT: # xmm3 = mem[3,1,2,3] -; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm1 = xmm4[2,0,2,3,4,5,6,7] -; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm7 = xmm3[2,0,2,3,4,5,6,7] -; AVX2-SLOW-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm7[0],xmm1[0],xmm7[1],xmm1[1] +; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm9 = xmm6[3,1,2,3] +; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm8 = xmm3[3,1,2,3] +; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm1 = xmm9[2,0,2,3,4,5,6,7] +; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm2 = xmm8[2,0,2,3,4,5,6,7] +; AVX2-SLOW-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] ; AVX2-SLOW-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 ; AVX2-SLOW-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1 -; AVX2-SLOW-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3,4,5],ymm0[6,7] -; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm11 = xmm11[3,1,2,3] -; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm12 = xmm14[3,1,2,3] -; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm11[0,1,2,0,4,5,6,7] -; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm7 = xmm12[0,1,2,0,4,5,6,7] -; AVX2-SLOW-NEXT: vpunpckldq {{.*#+}} xmm7 = xmm7[0],xmm0[0],xmm7[1],xmm0[1] -; AVX2-SLOW-NEXT: vpshufd $231, {{[-0-9]+}}(%r{{[sb]}}p), %xmm14 # 16-byte Folded Reload -; AVX2-SLOW-NEXT: # xmm14 = mem[3,1,2,3] -; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm2 = xmm15[3,1,2,3] -; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm14[2,0,2,3,4,5,6,7] -; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm15 = xmm2[2,0,2,3,4,5,6,7] -; AVX2-SLOW-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm15[0],xmm0[0],xmm15[1],xmm0[1] -; AVX2-SLOW-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm7[2,3] -; AVX2-SLOW-NEXT: vpblendd {{.*#+}} ymm15 = ymm0[0,1,2,3],ymm1[4,5,6,7] +; AVX2-SLOW-NEXT: vpblendd {{.*#+}} ymm3 = ymm1[0,1,2,3,4,5],ymm0[6,7] +; AVX2-SLOW-NEXT: vpshufd $231, {{[-0-9]+}}(%r{{[sb]}}p), %xmm6 # 16-byte Folded Reload +; AVX2-SLOW-NEXT: # xmm6 = mem[3,1,2,3] +; AVX2-SLOW-NEXT: vpshufd $231, {{[-0-9]+}}(%r{{[sb]}}p), %xmm5 # 16-byte Folded Reload +; AVX2-SLOW-NEXT: # xmm5 = mem[3,1,2,3] +; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm1 = xmm6[0,1,2,0,4,5,6,7] +; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm2 = xmm5[0,1,2,0,4,5,6,7] +; AVX2-SLOW-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] +; AVX2-SLOW-NEXT: vpshufd $231, {{[-0-9]+}}(%r{{[sb]}}p), %xmm4 # 16-byte Folded Reload +; AVX2-SLOW-NEXT: # xmm4 = mem[3,1,2,3] +; AVX2-SLOW-NEXT: vpshufd $231, {{[-0-9]+}}(%r{{[sb]}}p), %xmm2 # 16-byte Folded Reload +; AVX2-SLOW-NEXT: # xmm2 = mem[3,1,2,3] +; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm4[2,0,2,3,4,5,6,7] +; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm11 = xmm2[2,0,2,3,4,5,6,7] +; AVX2-SLOW-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm11[0],xmm0[0],xmm11[1],xmm0[1] +; AVX2-SLOW-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] +; AVX2-SLOW-NEXT: vpblendd {{.*#+}} ymm11 = ymm0[0,1,2,3],ymm3[4,5,6,7] ; AVX2-SLOW-NEXT: vpshuflw $116, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload ; AVX2-SLOW-NEXT: # xmm0 = mem[0,1,3,1,4,5,6,7] ; AVX2-SLOW-NEXT: vpshuflw $116, {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload @@ -1632,33 +1628,33 @@ define void @vf32(<128 x i16>* %in.vec, <32 x i16>* %out.vec0, <32 x i16>* %out. ; AVX2-SLOW-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] ; AVX2-SLOW-NEXT: vpshuflw $231, {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload ; AVX2-SLOW-NEXT: # xmm1 = mem[3,1,2,3,4,5,6,7] -; AVX2-SLOW-NEXT: vpshuflw $231, {{[-0-9]+}}(%r{{[sb]}}p), %xmm7 # 16-byte Folded Reload -; AVX2-SLOW-NEXT: # xmm7 = mem[3,1,2,3,4,5,6,7] -; AVX2-SLOW-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm7[0],xmm1[0],xmm7[1],xmm1[1] +; AVX2-SLOW-NEXT: vpshuflw $231, {{[-0-9]+}}(%r{{[sb]}}p), %xmm3 # 16-byte Folded Reload +; AVX2-SLOW-NEXT: # xmm3 = mem[3,1,2,3,4,5,6,7] +; AVX2-SLOW-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm3[0],xmm1[0],xmm3[1],xmm1[1] ; AVX2-SLOW-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 ; AVX2-SLOW-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1 ; AVX2-SLOW-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5],ymm0[6,7] -; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm1 = xmm8[0,1,3,1,4,5,6,7] -; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm7 = xmm13[0,1,3,1,4,5,6,7] -; AVX2-SLOW-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm7[0],xmm1[0],xmm7[1],xmm1[1] -; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm6 = xmm6[3,1,2,3,4,5,6,7] -; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm5 = xmm5[3,1,2,3,4,5,6,7] -; AVX2-SLOW-NEXT: vpunpckldq {{.*#+}} xmm5 = xmm5[0],xmm6[0],xmm5[1],xmm6[1] -; AVX2-SLOW-NEXT: vpblendd {{.*#+}} xmm1 = xmm5[0,1],xmm1[2,3] +; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm1 = xmm15[0,1,3,1,4,5,6,7] +; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm3 = xmm14[0,1,3,1,4,5,6,7] +; AVX2-SLOW-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm3[0],xmm1[0],xmm3[1],xmm1[1] +; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm3 = xmm13[3,1,2,3,4,5,6,7] +; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm7 = xmm7[3,1,2,3,4,5,6,7] +; AVX2-SLOW-NEXT: vpunpckldq {{.*#+}} xmm3 = xmm7[0],xmm3[0],xmm7[1],xmm3[1] +; AVX2-SLOW-NEXT: vpblendd {{.*#+}} xmm1 = xmm3[0,1],xmm1[2,3] ; AVX2-SLOW-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7] -; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm1 = xmm10[0,1,3,1,4,5,6,7] -; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm5 = xmm9[0,1,3,1,4,5,6,7] -; AVX2-SLOW-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm5[0],xmm1[0],xmm5[1],xmm1[1] -; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm4 = xmm4[3,1,2,3,4,5,6,7] -; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm3 = xmm3[3,1,2,3,4,5,6,7] -; AVX2-SLOW-NEXT: vpunpckldq {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1] +; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm1 = xmm12[0,1,3,1,4,5,6,7] +; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm3 = xmm10[0,1,3,1,4,5,6,7] +; AVX2-SLOW-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm3[0],xmm1[0],xmm3[1],xmm1[1] +; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm3 = xmm9[3,1,2,3,4,5,6,7] +; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm7 = xmm8[3,1,2,3,4,5,6,7] +; AVX2-SLOW-NEXT: vpunpckldq {{.*#+}} xmm3 = xmm7[0],xmm3[0],xmm7[1],xmm3[1] ; AVX2-SLOW-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1 ; AVX2-SLOW-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm3 ; AVX2-SLOW-NEXT: vpblendd {{.*#+}} ymm1 = ymm3[0,1,2,3,4,5],ymm1[6,7] -; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm3 = xmm11[0,1,3,1,4,5,6,7] -; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm4 = xmm12[0,1,3,1,4,5,6,7] -; AVX2-SLOW-NEXT: vpunpckldq {{.*#+}} xmm3 = xmm4[0],xmm3[0],xmm4[1],xmm3[1] -; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm4 = xmm14[3,1,2,3,4,5,6,7] +; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm3 = xmm6[0,1,3,1,4,5,6,7] +; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm5 = xmm5[0,1,3,1,4,5,6,7] +; AVX2-SLOW-NEXT: vpunpckldq {{.*#+}} xmm3 = xmm5[0],xmm3[0],xmm5[1],xmm3[1] +; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm4 = xmm4[3,1,2,3,4,5,6,7] ; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm2 = xmm2[3,1,2,3,4,5,6,7] ; AVX2-SLOW-NEXT: vpunpckldq {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1] ; AVX2-SLOW-NEXT: vpblendd {{.*#+}} xmm2 = xmm2[0,1],xmm3[2,3] @@ -1671,169 +1667,159 @@ define void @vf32(<128 x i16>* %in.vec, <32 x i16>* %out.vec0, <32 x i16>* %out. ; AVX2-SLOW-NEXT: vmovaps %ymm2, 32(%rdx) ; AVX2-SLOW-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm2 # 32-byte Reload ; AVX2-SLOW-NEXT: vmovaps %ymm2, (%rdx) -; AVX2-SLOW-NEXT: vmovdqa %ymm15, 32(%rcx) +; AVX2-SLOW-NEXT: vmovdqa %ymm11, 32(%rcx) ; AVX2-SLOW-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm2 # 32-byte Reload ; AVX2-SLOW-NEXT: vmovaps %ymm2, (%rcx) ; AVX2-SLOW-NEXT: vmovdqa %ymm1, 32(%r8) ; AVX2-SLOW-NEXT: vmovdqa %ymm0, (%r8) -; AVX2-SLOW-NEXT: addq $248, %rsp +; AVX2-SLOW-NEXT: addq $200, %rsp ; AVX2-SLOW-NEXT: vzeroupper ; AVX2-SLOW-NEXT: retq ; ; AVX2-FAST-ALL-LABEL: vf32: ; AVX2-FAST-ALL: # %bb.0: ; AVX2-FAST-ALL-NEXT: subq $200, %rsp +; AVX2-FAST-ALL-NEXT: vmovdqa 192(%rdi), %ymm6 +; AVX2-FAST-ALL-NEXT: vmovdqu %ymm6, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill +; AVX2-FAST-ALL-NEXT: vmovdqa 224(%rdi), %ymm7 +; AVX2-FAST-ALL-NEXT: vmovdqu %ymm7, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill ; AVX2-FAST-ALL-NEXT: vmovdqa 64(%rdi), %ymm5 -; AVX2-FAST-ALL-NEXT: vmovdqu %ymm5, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill -; AVX2-FAST-ALL-NEXT: vmovdqa 96(%rdi), %ymm6 -; AVX2-FAST-ALL-NEXT: vmovdqu %ymm6, (%rsp) # 32-byte Spill -; AVX2-FAST-ALL-NEXT: vmovdqa (%rdi), %xmm2 -; AVX2-FAST-ALL-NEXT: vmovdqa %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill -; AVX2-FAST-ALL-NEXT: vmovdqa 16(%rdi), %xmm11 -; AVX2-FAST-ALL-NEXT: vmovdqa 32(%rdi), %xmm12 -; AVX2-FAST-ALL-NEXT: vmovdqa 48(%rdi), %xmm9 -; AVX2-FAST-ALL-NEXT: vpxor %xmm8, %xmm8, %xmm8 -; AVX2-FAST-ALL-NEXT: vpblendw {{.*#+}} xmm0 = xmm9[0],xmm8[1,2,3],xmm9[4],xmm8[5,6,7] -; AVX2-FAST-ALL-NEXT: vpblendw {{.*#+}} xmm1 = xmm12[0],xmm8[1,2,3],xmm12[4],xmm8[5,6,7] -; AVX2-FAST-ALL-NEXT: vpackusdw %xmm0, %xmm1, %xmm0 -; AVX2-FAST-ALL-NEXT: vpblendw {{.*#+}} xmm1 = xmm11[0],xmm8[1,2,3],xmm11[4],xmm8[5,6,7] -; AVX2-FAST-ALL-NEXT: vmovdqa %xmm11, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill -; AVX2-FAST-ALL-NEXT: vpblendw {{.*#+}} xmm4 = xmm2[0],xmm8[1,2,3],xmm2[4],xmm8[5,6,7] -; AVX2-FAST-ALL-NEXT: vpackusdw %xmm1, %xmm4, %xmm1 -; AVX2-FAST-ALL-NEXT: vpackusdw %xmm0, %xmm1, %xmm1 -; AVX2-FAST-ALL-NEXT: vmovdqa {{.*#+}} ymm15 = [0,2,2,3,0,2,4,6] -; AVX2-FAST-ALL-NEXT: vpermd %ymm6, %ymm15, %ymm2 -; AVX2-FAST-ALL-NEXT: vmovdqu %ymm2, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill -; AVX2-FAST-ALL-NEXT: vmovdqa {{.*#+}} ymm0 = -; AVX2-FAST-ALL-NEXT: vpshufb %ymm0, %ymm2, %ymm4 -; AVX2-FAST-ALL-NEXT: vmovdqa %ymm0, %ymm3 -; AVX2-FAST-ALL-NEXT: vpermd %ymm5, %ymm15, %ymm2 -; AVX2-FAST-ALL-NEXT: vmovdqa {{.*#+}} ymm0 = <0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15,16,17,20,21,24,25,28,29,u,u,u,u,u,u,u,u> -; AVX2-FAST-ALL-NEXT: vpshufb %ymm0, %ymm2, %ymm5 -; AVX2-FAST-ALL-NEXT: vmovdqa %ymm0, %ymm14 +; AVX2-FAST-ALL-NEXT: vmovdqu %ymm5, (%rsp) # 32-byte Spill +; AVX2-FAST-ALL-NEXT: vmovdqa 96(%rdi), %ymm4 +; AVX2-FAST-ALL-NEXT: vmovdqu %ymm4, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill +; AVX2-FAST-ALL-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; AVX2-FAST-ALL-NEXT: vpblendw {{.*#+}} ymm1 = mem[0],ymm0[1,2,3],mem[4],ymm0[5,6,7],mem[8],ymm0[9,10,11],mem[12],ymm0[13,14,15] +; AVX2-FAST-ALL-NEXT: vextracti128 $1, %ymm1, %xmm2 +; AVX2-FAST-ALL-NEXT: vpackusdw %xmm2, %xmm1, %xmm1 +; AVX2-FAST-ALL-NEXT: vpblendw {{.*#+}} ymm2 = mem[0],ymm0[1,2,3],mem[4],ymm0[5,6,7],mem[8],ymm0[9,10,11],mem[12],ymm0[13,14,15] +; AVX2-FAST-ALL-NEXT: vextracti128 $1, %ymm2, %xmm3 +; AVX2-FAST-ALL-NEXT: vpackusdw %xmm3, %xmm2, %xmm2 +; AVX2-FAST-ALL-NEXT: vpackusdw %xmm1, %xmm2, %xmm1 +; AVX2-FAST-ALL-NEXT: vmovdqa {{.*#+}} ymm3 = [0,2,2,3,0,2,4,6] +; AVX2-FAST-ALL-NEXT: vpermd %ymm4, %ymm3, %ymm10 +; AVX2-FAST-ALL-NEXT: vmovdqa {{.*#+}} ymm2 = +; AVX2-FAST-ALL-NEXT: vpshufb %ymm2, %ymm10, %ymm4 +; AVX2-FAST-ALL-NEXT: vpermd %ymm5, %ymm3, %ymm8 +; AVX2-FAST-ALL-NEXT: vmovdqa {{.*#+}} ymm9 = <0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15,16,17,20,21,24,25,28,29,u,u,u,u,u,u,u,u> +; AVX2-FAST-ALL-NEXT: vpshufb %ymm9, %ymm8, %ymm5 +; AVX2-FAST-ALL-NEXT: vmovdqa %ymm9, %ymm11 ; AVX2-FAST-ALL-NEXT: vpblendd {{.*#+}} ymm4 = ymm5[0,1,2,3,4,5],ymm4[6,7] -; AVX2-FAST-ALL-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm4[4,5,6,7] +; AVX2-FAST-ALL-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm4[4,5,6,7] +; AVX2-FAST-ALL-NEXT: vmovdqu %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill +; AVX2-FAST-ALL-NEXT: vpblendw {{.*#+}} ymm1 = mem[0],ymm0[1,2,3],mem[4],ymm0[5,6,7],mem[8],ymm0[9,10,11],mem[12],ymm0[13,14,15] +; AVX2-FAST-ALL-NEXT: vextracti128 $1, %ymm1, %xmm4 +; AVX2-FAST-ALL-NEXT: vpackusdw %xmm4, %xmm1, %xmm1 +; AVX2-FAST-ALL-NEXT: vpblendw {{.*#+}} ymm0 = mem[0],ymm0[1,2,3],mem[4],ymm0[5,6,7],mem[8],ymm0[9,10,11],mem[12],ymm0[13,14,15] +; AVX2-FAST-ALL-NEXT: vextracti128 $1, %ymm0, %xmm4 +; AVX2-FAST-ALL-NEXT: vpackusdw %xmm4, %xmm0, %xmm0 +; AVX2-FAST-ALL-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 +; AVX2-FAST-ALL-NEXT: vpermd %ymm7, %ymm3, %ymm15 +; AVX2-FAST-ALL-NEXT: vpshufb %ymm2, %ymm15, %ymm1 +; AVX2-FAST-ALL-NEXT: vpermd %ymm6, %ymm3, %ymm9 +; AVX2-FAST-ALL-NEXT: vpshufb %ymm11, %ymm9, %ymm4 +; AVX2-FAST-ALL-NEXT: vpblendd {{.*#+}} ymm1 = ymm4[0,1,2,3,4,5],ymm1[6,7] +; AVX2-FAST-ALL-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] +; AVX2-FAST-ALL-NEXT: vmovdqu %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill +; AVX2-FAST-ALL-NEXT: vmovdqa 32(%rdi), %xmm12 +; AVX2-FAST-ALL-NEXT: vmovdqa 48(%rdi), %xmm2 +; AVX2-FAST-ALL-NEXT: vmovdqa {{.*#+}} xmm6 = [0,1,2,3,2,3,10,11,8,9,10,11,12,13,14,15] +; AVX2-FAST-ALL-NEXT: vpshufb %xmm6, %xmm2, %xmm1 +; AVX2-FAST-ALL-NEXT: vpshufb %xmm6, %xmm12, %xmm4 +; AVX2-FAST-ALL-NEXT: vpunpckldq {{.*#+}} xmm5 = xmm4[0],xmm1[0],xmm4[1],xmm1[1] +; AVX2-FAST-ALL-NEXT: vmovdqa (%rdi), %xmm11 +; AVX2-FAST-ALL-NEXT: vmovdqa 16(%rdi), %xmm3 +; AVX2-FAST-ALL-NEXT: vmovdqa {{.*#+}} xmm1 = [2,3,10,11,8,9,10,11,8,9,10,11,12,13,14,15] +; AVX2-FAST-ALL-NEXT: vpshufb %xmm1, %xmm3, %xmm4 +; AVX2-FAST-ALL-NEXT: vpshufb %xmm1, %xmm11, %xmm7 +; AVX2-FAST-ALL-NEXT: vpunpckldq {{.*#+}} xmm4 = xmm7[0],xmm4[0],xmm7[1],xmm4[1] +; AVX2-FAST-ALL-NEXT: vpblendd {{.*#+}} xmm4 = xmm4[0,1],xmm5[2,3] +; AVX2-FAST-ALL-NEXT: vmovdqa {{.*#+}} ymm14 = +; AVX2-FAST-ALL-NEXT: vpshufb %ymm14, %ymm10, %ymm5 +; AVX2-FAST-ALL-NEXT: vmovdqa {{.*#+}} ymm13 = <2,3,6,7,10,11,14,15,14,15,10,11,12,13,14,15,18,19,22,23,26,27,30,31,u,u,u,u,u,u,u,u> +; AVX2-FAST-ALL-NEXT: vpshufb %ymm13, %ymm8, %ymm7 +; AVX2-FAST-ALL-NEXT: vpblendd {{.*#+}} ymm5 = ymm7[0,1,2,3,4,5],ymm5[6,7] +; AVX2-FAST-ALL-NEXT: vpblendd {{.*#+}} ymm0 = ymm4[0,1,2,3],ymm5[4,5,6,7] ; AVX2-FAST-ALL-NEXT: vmovdqu %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill -; AVX2-FAST-ALL-NEXT: vmovdqa 176(%rdi), %xmm13 -; AVX2-FAST-ALL-NEXT: vpblendw {{.*#+}} xmm5 = xmm13[0],xmm8[1,2,3],xmm13[4],xmm8[5,6,7] -; AVX2-FAST-ALL-NEXT: vmovdqa %xmm13, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; AVX2-FAST-ALL-NEXT: vmovdqa 160(%rdi), %xmm0 ; AVX2-FAST-ALL-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill -; AVX2-FAST-ALL-NEXT: vpblendw {{.*#+}} xmm7 = xmm0[0],xmm8[1,2,3],xmm0[4],xmm8[5,6,7] -; AVX2-FAST-ALL-NEXT: vpackusdw %xmm5, %xmm7, %xmm10 -; AVX2-FAST-ALL-NEXT: vmovdqa 144(%rdi), %xmm5 -; AVX2-FAST-ALL-NEXT: vpblendw {{.*#+}} xmm0 = xmm5[0],xmm8[1,2,3],xmm5[4],xmm8[5,6,7] +; AVX2-FAST-ALL-NEXT: vmovdqa 176(%rdi), %xmm4 +; AVX2-FAST-ALL-NEXT: vpshufb %xmm6, %xmm4, %xmm7 +; AVX2-FAST-ALL-NEXT: vpshufb %xmm6, %xmm0, %xmm6 +; AVX2-FAST-ALL-NEXT: vpunpckldq {{.*#+}} xmm8 = xmm6[0],xmm7[0],xmm6[1],xmm7[1] +; AVX2-FAST-ALL-NEXT: vmovdqa 128(%rdi), %xmm5 ; AVX2-FAST-ALL-NEXT: vmovdqa %xmm5, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill -; AVX2-FAST-ALL-NEXT: vmovdqa 128(%rdi), %xmm4 -; AVX2-FAST-ALL-NEXT: vpblendw {{.*#+}} xmm6 = xmm4[0],xmm8[1,2,3],xmm4[4],xmm8[5,6,7] -; AVX2-FAST-ALL-NEXT: vmovdqa %xmm4, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill -; AVX2-FAST-ALL-NEXT: vpackusdw %xmm0, %xmm6, %xmm0 -; AVX2-FAST-ALL-NEXT: vpackusdw %xmm10, %xmm0, %xmm0 -; AVX2-FAST-ALL-NEXT: vmovdqa 192(%rdi), %ymm1 -; AVX2-FAST-ALL-NEXT: vmovdqu %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill -; AVX2-FAST-ALL-NEXT: vmovdqa 224(%rdi), %ymm10 -; AVX2-FAST-ALL-NEXT: vpermd %ymm10, %ymm15, %ymm6 -; AVX2-FAST-ALL-NEXT: vpermd %ymm1, %ymm15, %ymm1 -; AVX2-FAST-ALL-NEXT: vpshufb %ymm3, %ymm6, %ymm15 -; AVX2-FAST-ALL-NEXT: vpshufb %ymm14, %ymm1, %ymm14 -; AVX2-FAST-ALL-NEXT: vpblendd {{.*#+}} ymm14 = ymm14[0,1,2,3,4,5],ymm15[6,7] -; AVX2-FAST-ALL-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm14[4,5,6,7] -; AVX2-FAST-ALL-NEXT: vmovdqu %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill -; AVX2-FAST-ALL-NEXT: vmovdqa {{.*#+}} xmm14 = [0,1,2,3,2,3,10,11,8,9,10,11,12,13,14,15] -; AVX2-FAST-ALL-NEXT: vpshufb %xmm14, %xmm9, %xmm0 -; AVX2-FAST-ALL-NEXT: vmovdqa %xmm12, %xmm7 -; AVX2-FAST-ALL-NEXT: vpshufb %xmm14, %xmm12, %xmm3 -; AVX2-FAST-ALL-NEXT: vpunpckldq {{.*#+}} xmm15 = xmm3[0],xmm0[0],xmm3[1],xmm0[1] -; AVX2-FAST-ALL-NEXT: vmovdqa {{.*#+}} xmm3 = [2,3,10,11,8,9,10,11,8,9,10,11,12,13,14,15] -; AVX2-FAST-ALL-NEXT: vpshufb %xmm3, %xmm11, %xmm0 -; AVX2-FAST-ALL-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm8 # 16-byte Reload -; AVX2-FAST-ALL-NEXT: vpshufb %xmm3, %xmm8, %xmm12 -; AVX2-FAST-ALL-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm12[0],xmm0[0],xmm12[1],xmm0[1] -; AVX2-FAST-ALL-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm15[2,3] -; AVX2-FAST-ALL-NEXT: vmovdqa {{.*#+}} ymm11 = -; AVX2-FAST-ALL-NEXT: vmovdqu {{[-0-9]+}}(%r{{[sb]}}p), %ymm12 # 32-byte Reload -; AVX2-FAST-ALL-NEXT: vpshufb %ymm11, %ymm12, %ymm12 -; AVX2-FAST-ALL-NEXT: vmovdqa %ymm11, %ymm15 -; AVX2-FAST-ALL-NEXT: vmovdqa {{.*#+}} ymm11 = <2,3,6,7,10,11,14,15,14,15,10,11,12,13,14,15,18,19,22,23,26,27,30,31,u,u,u,u,u,u,u,u> -; AVX2-FAST-ALL-NEXT: vpshufb %ymm11, %ymm2, %ymm2 -; AVX2-FAST-ALL-NEXT: vpblendd {{.*#+}} ymm2 = ymm2[0,1,2,3,4,5],ymm12[6,7] -; AVX2-FAST-ALL-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm2[4,5,6,7] -; AVX2-FAST-ALL-NEXT: vmovdqu %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill -; AVX2-FAST-ALL-NEXT: vpshufb %xmm14, %xmm13, %xmm0 -; AVX2-FAST-ALL-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm13 # 16-byte Reload -; AVX2-FAST-ALL-NEXT: vpshufb %xmm14, %xmm13, %xmm2 -; AVX2-FAST-ALL-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm2[0],xmm0[0],xmm2[1],xmm0[1] -; AVX2-FAST-ALL-NEXT: vpshufb %xmm3, %xmm5, %xmm2 -; AVX2-FAST-ALL-NEXT: vpshufb %xmm3, %xmm4, %xmm3 -; AVX2-FAST-ALL-NEXT: vpunpckldq {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[1],xmm2[1] -; AVX2-FAST-ALL-NEXT: vpblendd {{.*#+}} xmm0 = xmm2[0,1],xmm0[2,3] -; AVX2-FAST-ALL-NEXT: vpshufb %ymm15, %ymm6, %ymm2 -; AVX2-FAST-ALL-NEXT: vpshufb %ymm11, %ymm1, %ymm1 -; AVX2-FAST-ALL-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3,4,5],ymm2[6,7] +; AVX2-FAST-ALL-NEXT: vmovdqa 144(%rdi), %xmm10 +; AVX2-FAST-ALL-NEXT: vpshufb %xmm1, %xmm10, %xmm0 +; AVX2-FAST-ALL-NEXT: vpshufb %xmm1, %xmm5, %xmm1 +; AVX2-FAST-ALL-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] +; AVX2-FAST-ALL-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm8[2,3] +; AVX2-FAST-ALL-NEXT: vpshufb %ymm14, %ymm15, %ymm1 +; AVX2-FAST-ALL-NEXT: vpshufb %ymm13, %ymm9, %ymm8 +; AVX2-FAST-ALL-NEXT: vpblendd {{.*#+}} ymm1 = ymm8[0,1,2,3,4,5],ymm1[6,7] ; AVX2-FAST-ALL-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] ; AVX2-FAST-ALL-NEXT: vmovdqu %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill ; AVX2-FAST-ALL-NEXT: vmovdqa {{.*#+}} ymm1 = [1,3,2,3,1,3,5,7] -; AVX2-FAST-ALL-NEXT: vpermd (%rsp), %ymm1, %ymm6 # 32-byte Folded Reload -; AVX2-FAST-ALL-NEXT: vpermd {{[-0-9]+}}(%r{{[sb]}}p), %ymm1, %ymm14 # 32-byte Folded Reload -; AVX2-FAST-ALL-NEXT: vpshufb {{.*#+}} ymm0 = ymm6[u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,16,17,20,21,24,25,28,29] -; AVX2-FAST-ALL-NEXT: vmovdqa {{.*#+}} ymm2 = <0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15,16,17,20,21,24,25,28,29,u,u,u,u,u,u,u,u> -; AVX2-FAST-ALL-NEXT: vpshufb %ymm2, %ymm14, %ymm2 -; AVX2-FAST-ALL-NEXT: vpblendd {{.*#+}} ymm5 = ymm2[0,1,2,3,4,5],ymm0[6,7] -; AVX2-FAST-ALL-NEXT: vpshufd {{.*#+}} xmm11 = xmm9[3,1,2,3] -; AVX2-FAST-ALL-NEXT: vpshufd {{.*#+}} xmm9 = xmm7[3,1,2,3] -; AVX2-FAST-ALL-NEXT: vpshuflw {{.*#+}} xmm3 = xmm11[0,1,2,0,4,5,6,7] -; AVX2-FAST-ALL-NEXT: vpshuflw {{.*#+}} xmm4 = xmm9[0,1,2,0,4,5,6,7] -; AVX2-FAST-ALL-NEXT: vpunpckldq {{.*#+}} xmm3 = xmm4[0],xmm3[0],xmm4[1],xmm3[1] -; AVX2-FAST-ALL-NEXT: vpshufd $231, {{[-0-9]+}}(%r{{[sb]}}p), %xmm12 # 16-byte Folded Reload -; AVX2-FAST-ALL-NEXT: # xmm12 = mem[3,1,2,3] -; AVX2-FAST-ALL-NEXT: vpshufd {{.*#+}} xmm4 = xmm8[3,1,2,3] -; AVX2-FAST-ALL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm12[2,0,2,3,4,5,6,7] -; AVX2-FAST-ALL-NEXT: vpshuflw {{.*#+}} xmm15 = xmm4[2,0,2,3,4,5,6,7] -; AVX2-FAST-ALL-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm15[0],xmm0[0],xmm15[1],xmm0[1] -; AVX2-FAST-ALL-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm3[2,3] -; AVX2-FAST-ALL-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm5[4,5,6,7] -; AVX2-FAST-ALL-NEXT: vmovdqu %ymm0, (%rsp) # 32-byte Spill -; AVX2-FAST-ALL-NEXT: vpermd %ymm10, %ymm1, %ymm2 -; AVX2-FAST-ALL-NEXT: vpermd {{[-0-9]+}}(%r{{[sb]}}p), %ymm1, %ymm10 # 32-byte Folded Reload -; AVX2-FAST-ALL-NEXT: vpshufb {{.*#+}} ymm5 = ymm2[u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,16,17,20,21,24,25,28,29] -; AVX2-FAST-ALL-NEXT: vpshufb {{.*#+}} ymm8 = ymm10[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15,16,17,20,21,24,25,28,29,u,u,u,u,u,u,u,u] -; AVX2-FAST-ALL-NEXT: vpblendd {{.*#+}} ymm8 = ymm8[0,1,2,3,4,5],ymm5[6,7] -; AVX2-FAST-ALL-NEXT: vpshufd $231, {{[-0-9]+}}(%r{{[sb]}}p), %xmm3 # 16-byte Folded Reload -; AVX2-FAST-ALL-NEXT: # xmm3 = mem[3,1,2,3] -; AVX2-FAST-ALL-NEXT: vpshufd {{.*#+}} xmm13 = xmm13[3,1,2,3] -; AVX2-FAST-ALL-NEXT: vpshuflw {{.*#+}} xmm1 = xmm3[0,1,2,0,4,5,6,7] -; AVX2-FAST-ALL-NEXT: vpshuflw {{.*#+}} xmm7 = xmm13[0,1,2,0,4,5,6,7] -; AVX2-FAST-ALL-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm7[0],xmm1[0],xmm7[1],xmm1[1] +; AVX2-FAST-ALL-NEXT: vpermd {{[-0-9]+}}(%r{{[sb]}}p), %ymm1, %ymm0 # 32-byte Folded Reload +; AVX2-FAST-ALL-NEXT: vmovdqu %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill +; AVX2-FAST-ALL-NEXT: vpermd (%rsp), %ymm1, %ymm15 # 32-byte Folded Reload +; AVX2-FAST-ALL-NEXT: vmovdqa {{.*#+}} ymm7 = +; AVX2-FAST-ALL-NEXT: vpshufb %ymm7, %ymm0, %ymm0 +; AVX2-FAST-ALL-NEXT: vpshufb {{.*#+}} ymm9 = ymm15[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15,16,17,20,21,24,25,28,29,u,u,u,u,u,u,u,u] +; AVX2-FAST-ALL-NEXT: vpblendd {{.*#+}} ymm14 = ymm9[0,1,2,3,4,5],ymm0[6,7] +; AVX2-FAST-ALL-NEXT: vpshufd {{.*#+}} xmm13 = xmm2[3,1,2,3] +; AVX2-FAST-ALL-NEXT: vpshufd {{.*#+}} xmm12 = xmm12[3,1,2,3] +; AVX2-FAST-ALL-NEXT: vpshuflw {{.*#+}} xmm2 = xmm13[0,1,2,0,4,5,6,7] +; AVX2-FAST-ALL-NEXT: vpshuflw {{.*#+}} xmm6 = xmm12[0,1,2,0,4,5,6,7] +; AVX2-FAST-ALL-NEXT: vpunpckldq {{.*#+}} xmm2 = xmm6[0],xmm2[0],xmm6[1],xmm2[1] +; AVX2-FAST-ALL-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[3,1,2,3] +; AVX2-FAST-ALL-NEXT: vpshufd {{.*#+}} xmm8 = xmm11[3,1,2,3] +; AVX2-FAST-ALL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm3[2,0,2,3,4,5,6,7] +; AVX2-FAST-ALL-NEXT: vpshuflw {{.*#+}} xmm5 = xmm8[2,0,2,3,4,5,6,7] +; AVX2-FAST-ALL-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm5[0],xmm0[0],xmm5[1],xmm0[1] +; AVX2-FAST-ALL-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3] +; AVX2-FAST-ALL-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm14[4,5,6,7] +; AVX2-FAST-ALL-NEXT: vmovdqu %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill +; AVX2-FAST-ALL-NEXT: vpermd {{[-0-9]+}}(%r{{[sb]}}p), %ymm1, %ymm9 # 32-byte Folded Reload +; AVX2-FAST-ALL-NEXT: vpermd {{[-0-9]+}}(%r{{[sb]}}p), %ymm1, %ymm1 # 32-byte Folded Reload +; AVX2-FAST-ALL-NEXT: vpshufb %ymm7, %ymm9, %ymm5 +; AVX2-FAST-ALL-NEXT: vpshufb {{.*#+}} ymm11 = ymm1[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15,16,17,20,21,24,25,28,29,u,u,u,u,u,u,u,u] +; AVX2-FAST-ALL-NEXT: vpblendd {{.*#+}} ymm11 = ymm11[0,1,2,3,4,5],ymm5[6,7] +; AVX2-FAST-ALL-NEXT: vpshufd {{.*#+}} xmm4 = xmm4[3,1,2,3] +; AVX2-FAST-ALL-NEXT: vpshufd $231, {{[-0-9]+}}(%r{{[sb]}}p), %xmm2 # 16-byte Folded Reload +; AVX2-FAST-ALL-NEXT: # xmm2 = mem[3,1,2,3] +; AVX2-FAST-ALL-NEXT: vpshuflw {{.*#+}} xmm5 = xmm4[0,1,2,0,4,5,6,7] +; AVX2-FAST-ALL-NEXT: vpshuflw {{.*#+}} xmm7 = xmm2[0,1,2,0,4,5,6,7] +; AVX2-FAST-ALL-NEXT: vpunpckldq {{.*#+}} xmm5 = xmm7[0],xmm5[0],xmm7[1],xmm5[1] +; AVX2-FAST-ALL-NEXT: vpshufd {{.*#+}} xmm10 = xmm10[3,1,2,3] ; AVX2-FAST-ALL-NEXT: vpshufd $231, {{[-0-9]+}}(%r{{[sb]}}p), %xmm7 # 16-byte Folded Reload ; AVX2-FAST-ALL-NEXT: # xmm7 = mem[3,1,2,3] -; AVX2-FAST-ALL-NEXT: vpshufd $231, {{[-0-9]+}}(%r{{[sb]}}p), %xmm5 # 16-byte Folded Reload -; AVX2-FAST-ALL-NEXT: # xmm5 = mem[3,1,2,3] -; AVX2-FAST-ALL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm7[2,0,2,3,4,5,6,7] -; AVX2-FAST-ALL-NEXT: vpshuflw {{.*#+}} xmm15 = xmm5[2,0,2,3,4,5,6,7] -; AVX2-FAST-ALL-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm15[0],xmm0[0],xmm15[1],xmm0[1] -; AVX2-FAST-ALL-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] -; AVX2-FAST-ALL-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm8[4,5,6,7] +; AVX2-FAST-ALL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm10[2,0,2,3,4,5,6,7] +; AVX2-FAST-ALL-NEXT: vpshuflw {{.*#+}} xmm14 = xmm7[2,0,2,3,4,5,6,7] +; AVX2-FAST-ALL-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm14[0],xmm0[0],xmm14[1],xmm0[1] +; AVX2-FAST-ALL-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm5[2,3] +; AVX2-FAST-ALL-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm11[4,5,6,7] ; AVX2-FAST-ALL-NEXT: vmovdqu %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill -; AVX2-FAST-ALL-NEXT: vmovdqa {{.*#+}} ymm15 = -; AVX2-FAST-ALL-NEXT: vpshufb %ymm15, %ymm6, %ymm1 -; AVX2-FAST-ALL-NEXT: vpshufb {{.*#+}} ymm8 = ymm14[2,3,6,7,10,11,14,15,14,15,10,11,12,13,14,15,18,19,22,23,26,27,30,31,u,u,u,u,u,u,u,u] -; AVX2-FAST-ALL-NEXT: vpblendd {{.*#+}} ymm1 = ymm8[0,1,2,3,4,5],ymm1[6,7] -; AVX2-FAST-ALL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm11[0,1,3,1,4,5,6,7] -; AVX2-FAST-ALL-NEXT: vpshuflw {{.*#+}} xmm6 = xmm9[0,1,3,1,4,5,6,7] +; AVX2-FAST-ALL-NEXT: vmovdqa {{.*#+}} ymm14 = +; AVX2-FAST-ALL-NEXT: vmovdqu {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload +; AVX2-FAST-ALL-NEXT: vpshufb %ymm14, %ymm0, %ymm5 +; AVX2-FAST-ALL-NEXT: vpshufb {{.*#+}} ymm11 = ymm15[2,3,6,7,10,11,14,15,14,15,10,11,12,13,14,15,18,19,22,23,26,27,30,31,u,u,u,u,u,u,u,u] +; AVX2-FAST-ALL-NEXT: vpblendd {{.*#+}} ymm5 = ymm11[0,1,2,3,4,5],ymm5[6,7] +; AVX2-FAST-ALL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm13[0,1,3,1,4,5,6,7] +; AVX2-FAST-ALL-NEXT: vpshuflw {{.*#+}} xmm6 = xmm12[0,1,3,1,4,5,6,7] ; AVX2-FAST-ALL-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm6[0],xmm0[0],xmm6[1],xmm0[1] -; AVX2-FAST-ALL-NEXT: vpshuflw {{.*#+}} xmm6 = xmm12[3,1,2,3,4,5,6,7] -; AVX2-FAST-ALL-NEXT: vpshuflw {{.*#+}} xmm4 = xmm4[3,1,2,3,4,5,6,7] -; AVX2-FAST-ALL-NEXT: vpunpckldq {{.*#+}} xmm4 = xmm4[0],xmm6[0],xmm4[1],xmm6[1] -; AVX2-FAST-ALL-NEXT: vpblendd {{.*#+}} xmm0 = xmm4[0,1],xmm0[2,3] -; AVX2-FAST-ALL-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] -; AVX2-FAST-ALL-NEXT: vpshufb %ymm15, %ymm2, %ymm1 -; AVX2-FAST-ALL-NEXT: vpshufb {{.*#+}} ymm2 = ymm10[2,3,6,7,10,11,14,15,14,15,10,11,12,13,14,15,18,19,22,23,26,27,30,31,u,u,u,u,u,u,u,u] -; AVX2-FAST-ALL-NEXT: vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3,4,5],ymm1[6,7] -; AVX2-FAST-ALL-NEXT: vpshuflw {{.*#+}} xmm2 = xmm3[0,1,3,1,4,5,6,7] -; AVX2-FAST-ALL-NEXT: vpshuflw {{.*#+}} xmm3 = xmm13[0,1,3,1,4,5,6,7] -; AVX2-FAST-ALL-NEXT: vpunpckldq {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[1],xmm2[1] -; AVX2-FAST-ALL-NEXT: vpshuflw {{.*#+}} xmm3 = xmm7[3,1,2,3,4,5,6,7] -; AVX2-FAST-ALL-NEXT: vpshuflw {{.*#+}} xmm4 = xmm5[3,1,2,3,4,5,6,7] +; AVX2-FAST-ALL-NEXT: vpshuflw {{.*#+}} xmm6 = xmm3[3,1,2,3,4,5,6,7] +; AVX2-FAST-ALL-NEXT: vpshuflw {{.*#+}} xmm3 = xmm8[3,1,2,3,4,5,6,7] +; AVX2-FAST-ALL-NEXT: vpunpckldq {{.*#+}} xmm3 = xmm3[0],xmm6[0],xmm3[1],xmm6[1] +; AVX2-FAST-ALL-NEXT: vpblendd {{.*#+}} xmm0 = xmm3[0,1],xmm0[2,3] +; AVX2-FAST-ALL-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm5[4,5,6,7] +; AVX2-FAST-ALL-NEXT: vpshufb %ymm14, %ymm9, %ymm3 +; AVX2-FAST-ALL-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[2,3,6,7,10,11,14,15,14,15,10,11,12,13,14,15,18,19,22,23,26,27,30,31,u,u,u,u,u,u,u,u] +; AVX2-FAST-ALL-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3,4,5],ymm3[6,7] +; AVX2-FAST-ALL-NEXT: vpshuflw {{.*#+}} xmm3 = xmm4[0,1,3,1,4,5,6,7] +; AVX2-FAST-ALL-NEXT: vpshuflw {{.*#+}} xmm2 = xmm2[0,1,3,1,4,5,6,7] +; AVX2-FAST-ALL-NEXT: vpunpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1] +; AVX2-FAST-ALL-NEXT: vpshuflw {{.*#+}} xmm3 = xmm10[3,1,2,3,4,5,6,7] +; AVX2-FAST-ALL-NEXT: vpshuflw {{.*#+}} xmm4 = xmm7[3,1,2,3,4,5,6,7] ; AVX2-FAST-ALL-NEXT: vpunpckldq {{.*#+}} xmm3 = xmm4[0],xmm3[0],xmm4[1],xmm3[1] ; AVX2-FAST-ALL-NEXT: vpblendd {{.*#+}} xmm2 = xmm3[0,1],xmm2[2,3] ; AVX2-FAST-ALL-NEXT: vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7] @@ -1847,7 +1833,7 @@ define void @vf32(<128 x i16>* %in.vec, <32 x i16>* %out.vec0, <32 x i16>* %out. ; AVX2-FAST-ALL-NEXT: vmovaps %ymm2, (%rdx) ; AVX2-FAST-ALL-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm2 # 32-byte Reload ; AVX2-FAST-ALL-NEXT: vmovaps %ymm2, 32(%rcx) -; AVX2-FAST-ALL-NEXT: vmovups (%rsp), %ymm2 # 32-byte Reload +; AVX2-FAST-ALL-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm2 # 32-byte Reload ; AVX2-FAST-ALL-NEXT: vmovaps %ymm2, (%rcx) ; AVX2-FAST-ALL-NEXT: vmovdqa %ymm1, 32(%r8) ; AVX2-FAST-ALL-NEXT: vmovdqa %ymm0, (%r8) @@ -1857,184 +1843,169 @@ define void @vf32(<128 x i16>* %in.vec, <32 x i16>* %out.vec0, <32 x i16>* %out. ; ; AVX2-FAST-PERLANE-LABEL: vf32: ; AVX2-FAST-PERLANE: # %bb.0: -; AVX2-FAST-PERLANE-NEXT: subq $248, %rsp -; AVX2-FAST-PERLANE-NEXT: vmovdqa (%rdi), %xmm6 -; AVX2-FAST-PERLANE-NEXT: vmovdqa %xmm6, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill -; AVX2-FAST-PERLANE-NEXT: vmovdqa 16(%rdi), %xmm1 -; AVX2-FAST-PERLANE-NEXT: vmovdqa 32(%rdi), %xmm5 -; AVX2-FAST-PERLANE-NEXT: vmovdqa 48(%rdi), %xmm3 +; AVX2-FAST-PERLANE-NEXT: subq $184, %rsp ; AVX2-FAST-PERLANE-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; AVX2-FAST-PERLANE-NEXT: vpblendw {{.*#+}} xmm2 = xmm3[0],xmm0[1,2,3],xmm3[4],xmm0[5,6,7] -; AVX2-FAST-PERLANE-NEXT: vmovdqa %xmm3, %xmm8 -; AVX2-FAST-PERLANE-NEXT: vmovdqa %xmm3, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill -; AVX2-FAST-PERLANE-NEXT: vpblendw {{.*#+}} xmm3 = xmm5[0],xmm0[1,2,3],xmm5[4],xmm0[5,6,7] -; AVX2-FAST-PERLANE-NEXT: vmovdqa %xmm5, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill -; AVX2-FAST-PERLANE-NEXT: vpackusdw %xmm2, %xmm3, %xmm2 -; AVX2-FAST-PERLANE-NEXT: vpblendw {{.*#+}} xmm3 = xmm1[0],xmm0[1,2,3],xmm1[4],xmm0[5,6,7] -; AVX2-FAST-PERLANE-NEXT: vmovdqa %xmm1, %xmm9 -; AVX2-FAST-PERLANE-NEXT: vpblendw {{.*#+}} xmm4 = xmm6[0],xmm0[1,2,3],xmm6[4],xmm0[5,6,7] -; AVX2-FAST-PERLANE-NEXT: vpackusdw %xmm3, %xmm4, %xmm3 -; AVX2-FAST-PERLANE-NEXT: vpackusdw %xmm2, %xmm3, %xmm2 -; AVX2-FAST-PERLANE-NEXT: vmovdqa 112(%rdi), %xmm15 -; AVX2-FAST-PERLANE-NEXT: vpblendw {{.*#+}} xmm3 = xmm15[0],xmm0[1,2,3],xmm15[4],xmm0[5,6,7] -; AVX2-FAST-PERLANE-NEXT: vmovdqa 96(%rdi), %xmm1 -; AVX2-FAST-PERLANE-NEXT: vpblendw {{.*#+}} xmm4 = xmm1[0],xmm0[1,2,3],xmm1[4],xmm0[5,6,7] -; AVX2-FAST-PERLANE-NEXT: vmovdqa %xmm1, %xmm6 -; AVX2-FAST-PERLANE-NEXT: vmovdqa %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill -; AVX2-FAST-PERLANE-NEXT: vpackusdw %xmm3, %xmm4, %xmm3 +; AVX2-FAST-PERLANE-NEXT: vpblendw {{.*#+}} ymm1 = mem[0],ymm0[1,2,3],mem[4],ymm0[5,6,7],mem[8],ymm0[9,10,11],mem[12],ymm0[13,14,15] +; AVX2-FAST-PERLANE-NEXT: vextracti128 $1, %ymm1, %xmm2 +; AVX2-FAST-PERLANE-NEXT: vpackusdw %xmm2, %xmm1, %xmm1 +; AVX2-FAST-PERLANE-NEXT: vpblendw {{.*#+}} ymm2 = mem[0],ymm0[1,2,3],mem[4],ymm0[5,6,7],mem[8],ymm0[9,10,11],mem[12],ymm0[13,14,15] +; AVX2-FAST-PERLANE-NEXT: vextracti128 $1, %ymm2, %xmm3 +; AVX2-FAST-PERLANE-NEXT: vpackusdw %xmm3, %xmm2, %xmm2 +; AVX2-FAST-PERLANE-NEXT: vpackusdw %xmm1, %xmm2, %xmm1 +; AVX2-FAST-PERLANE-NEXT: vpblendw {{.*#+}} ymm2 = mem[0],ymm0[1,2,3],mem[4],ymm0[5,6,7],mem[8],ymm0[9,10,11],mem[12],ymm0[13,14,15] +; AVX2-FAST-PERLANE-NEXT: vextracti128 $1, %ymm2, %xmm3 +; AVX2-FAST-PERLANE-NEXT: vpackusdw %xmm3, %xmm2, %xmm2 +; AVX2-FAST-PERLANE-NEXT: vpackusdw %xmm2, %xmm2, %xmm2 +; AVX2-FAST-PERLANE-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2 +; AVX2-FAST-PERLANE-NEXT: vpblendw {{.*#+}} ymm3 = mem[0],ymm0[1,2,3],mem[4],ymm0[5,6,7],mem[8],ymm0[9,10,11],mem[12],ymm0[13,14,15] +; AVX2-FAST-PERLANE-NEXT: vextracti128 $1, %ymm3, %xmm4 +; AVX2-FAST-PERLANE-NEXT: vpackusdw %xmm4, %xmm3, %xmm3 ; AVX2-FAST-PERLANE-NEXT: vpackusdw %xmm3, %xmm3, %xmm3 ; AVX2-FAST-PERLANE-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm3 -; AVX2-FAST-PERLANE-NEXT: vmovdqa 80(%rdi), %xmm12 -; AVX2-FAST-PERLANE-NEXT: vpblendw {{.*#+}} xmm7 = xmm12[0],xmm0[1,2,3],xmm12[4],xmm0[5,6,7] -; AVX2-FAST-PERLANE-NEXT: vmovdqa 64(%rdi), %xmm14 -; AVX2-FAST-PERLANE-NEXT: vpblendw {{.*#+}} xmm1 = xmm14[0],xmm0[1,2,3],xmm14[4],xmm0[5,6,7] -; AVX2-FAST-PERLANE-NEXT: vpackusdw %xmm7, %xmm1, %xmm1 -; AVX2-FAST-PERLANE-NEXT: vpackusdw %xmm1, %xmm1, %xmm1 -; AVX2-FAST-PERLANE-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1 -; AVX2-FAST-PERLANE-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3,4,5],ymm3[6,7] -; AVX2-FAST-PERLANE-NEXT: vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7] +; AVX2-FAST-PERLANE-NEXT: vpblendd {{.*#+}} ymm2 = ymm3[0,1,2,3,4,5],ymm2[6,7] +; AVX2-FAST-PERLANE-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm2[4,5,6,7] ; AVX2-FAST-PERLANE-NEXT: vmovdqu %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill -; AVX2-FAST-PERLANE-NEXT: vmovdqa 176(%rdi), %xmm1 -; AVX2-FAST-PERLANE-NEXT: vmovdqa %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill -; AVX2-FAST-PERLANE-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm0[1,2,3],xmm1[4],xmm0[5,6,7] -; AVX2-FAST-PERLANE-NEXT: vmovdqa 160(%rdi), %xmm2 -; AVX2-FAST-PERLANE-NEXT: vmovdqa %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill -; AVX2-FAST-PERLANE-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0],xmm0[1,2,3],xmm2[4],xmm0[5,6,7] +; AVX2-FAST-PERLANE-NEXT: vpblendw {{.*#+}} ymm1 = mem[0],ymm0[1,2,3],mem[4],ymm0[5,6,7],mem[8],ymm0[9,10,11],mem[12],ymm0[13,14,15] +; AVX2-FAST-PERLANE-NEXT: vextracti128 $1, %ymm1, %xmm2 +; AVX2-FAST-PERLANE-NEXT: vpackusdw %xmm2, %xmm1, %xmm1 +; AVX2-FAST-PERLANE-NEXT: vpblendw {{.*#+}} ymm2 = mem[0],ymm0[1,2,3],mem[4],ymm0[5,6,7],mem[8],ymm0[9,10,11],mem[12],ymm0[13,14,15] +; AVX2-FAST-PERLANE-NEXT: vextracti128 $1, %ymm2, %xmm3 +; AVX2-FAST-PERLANE-NEXT: vpackusdw %xmm3, %xmm2, %xmm2 ; AVX2-FAST-PERLANE-NEXT: vpackusdw %xmm1, %xmm2, %xmm1 -; AVX2-FAST-PERLANE-NEXT: vmovdqa 144(%rdi), %xmm2 -; AVX2-FAST-PERLANE-NEXT: vmovdqa %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill -; AVX2-FAST-PERLANE-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0],xmm0[1,2,3],xmm2[4],xmm0[5,6,7] -; AVX2-FAST-PERLANE-NEXT: vmovdqa 128(%rdi), %xmm10 -; AVX2-FAST-PERLANE-NEXT: vpblendw {{.*#+}} xmm3 = xmm10[0],xmm0[1,2,3],xmm10[4],xmm0[5,6,7] -; AVX2-FAST-PERLANE-NEXT: vmovdqa %xmm10, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill -; AVX2-FAST-PERLANE-NEXT: vpackusdw %xmm2, %xmm3, %xmm2 -; AVX2-FAST-PERLANE-NEXT: vpackusdw %xmm1, %xmm2, %xmm4 -; AVX2-FAST-PERLANE-NEXT: vmovdqa 240(%rdi), %xmm1 -; AVX2-FAST-PERLANE-NEXT: vmovdqa %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill -; AVX2-FAST-PERLANE-NEXT: vpblendw {{.*#+}} xmm2 = xmm1[0],xmm0[1,2,3],xmm1[4],xmm0[5,6,7] -; AVX2-FAST-PERLANE-NEXT: vmovdqa 224(%rdi), %xmm13 -; AVX2-FAST-PERLANE-NEXT: vpblendw {{.*#+}} xmm3 = xmm13[0],xmm0[1,2,3],xmm13[4],xmm0[5,6,7] -; AVX2-FAST-PERLANE-NEXT: vmovdqa %xmm13, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill -; AVX2-FAST-PERLANE-NEXT: vpackusdw %xmm2, %xmm3, %xmm7 -; AVX2-FAST-PERLANE-NEXT: vmovdqa 208(%rdi), %xmm3 -; AVX2-FAST-PERLANE-NEXT: vpblendw {{.*#+}} xmm1 = xmm3[0],xmm0[1,2,3],xmm3[4],xmm0[5,6,7] -; AVX2-FAST-PERLANE-NEXT: vmovdqa 192(%rdi), %xmm2 -; AVX2-FAST-PERLANE-NEXT: vpblendw {{.*#+}} xmm0 = xmm2[0],xmm0[1,2,3],xmm2[4],xmm0[5,6,7] -; AVX2-FAST-PERLANE-NEXT: vmovdqa %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill -; AVX2-FAST-PERLANE-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 -; AVX2-FAST-PERLANE-NEXT: vpackusdw %xmm7, %xmm7, %xmm1 -; AVX2-FAST-PERLANE-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1 +; AVX2-FAST-PERLANE-NEXT: vpblendw {{.*#+}} ymm2 = mem[0],ymm0[1,2,3],mem[4],ymm0[5,6,7],mem[8],ymm0[9,10,11],mem[12],ymm0[13,14,15] +; AVX2-FAST-PERLANE-NEXT: vextracti128 $1, %ymm2, %xmm3 +; AVX2-FAST-PERLANE-NEXT: vpackusdw %xmm3, %xmm2, %xmm2 +; AVX2-FAST-PERLANE-NEXT: vpackusdw %xmm2, %xmm2, %xmm2 +; AVX2-FAST-PERLANE-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2 +; AVX2-FAST-PERLANE-NEXT: vpblendw {{.*#+}} ymm0 = mem[0],ymm0[1,2,3],mem[4],ymm0[5,6,7],mem[8],ymm0[9,10,11],mem[12],ymm0[13,14,15] +; AVX2-FAST-PERLANE-NEXT: vextracti128 $1, %ymm0, %xmm3 +; AVX2-FAST-PERLANE-NEXT: vpackusdw %xmm3, %xmm0, %xmm0 ; AVX2-FAST-PERLANE-NEXT: vpackusdw %xmm0, %xmm0, %xmm0 ; AVX2-FAST-PERLANE-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 -; AVX2-FAST-PERLANE-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5],ymm1[6,7] -; AVX2-FAST-PERLANE-NEXT: vpblendd {{.*#+}} ymm0 = ymm4[0,1,2,3],ymm0[4,5,6,7] -; AVX2-FAST-PERLANE-NEXT: vmovdqu %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill -; AVX2-FAST-PERLANE-NEXT: vmovdqa {{.*#+}} xmm0 = [0,1,2,3,2,3,10,11,8,9,10,11,12,13,14,15] -; AVX2-FAST-PERLANE-NEXT: vpshufb %xmm0, %xmm15, %xmm1 -; AVX2-FAST-PERLANE-NEXT: vpshufb %xmm0, %xmm6, %xmm4 -; AVX2-FAST-PERLANE-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm4[0],xmm1[0],xmm4[1],xmm1[1] -; AVX2-FAST-PERLANE-NEXT: vmovdqa {{.*#+}} xmm4 = [2,3,10,11,8,9,10,11,8,9,10,11,12,13,14,15] -; AVX2-FAST-PERLANE-NEXT: vpshufb %xmm4, %xmm12, %xmm7 -; AVX2-FAST-PERLANE-NEXT: vpshufb %xmm4, %xmm14, %xmm11 -; AVX2-FAST-PERLANE-NEXT: vpunpckldq {{.*#+}} xmm7 = xmm11[0],xmm7[0],xmm11[1],xmm7[1] -; AVX2-FAST-PERLANE-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1 -; AVX2-FAST-PERLANE-NEXT: vinserti128 $1, %xmm7, %ymm0, %ymm7 -; AVX2-FAST-PERLANE-NEXT: vpblendd {{.*#+}} ymm1 = ymm7[0,1,2,3,4,5],ymm1[6,7] -; AVX2-FAST-PERLANE-NEXT: vpshufb %xmm0, %xmm8, %xmm7 -; AVX2-FAST-PERLANE-NEXT: vpshufb %xmm0, %xmm5, %xmm6 -; AVX2-FAST-PERLANE-NEXT: vpunpckldq {{.*#+}} xmm6 = xmm6[0],xmm7[0],xmm6[1],xmm7[1] -; AVX2-FAST-PERLANE-NEXT: vmovdqa %xmm9, %xmm8 -; AVX2-FAST-PERLANE-NEXT: vpshufb %xmm4, %xmm9, %xmm7 -; AVX2-FAST-PERLANE-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm9 # 16-byte Reload -; AVX2-FAST-PERLANE-NEXT: vpshufb %xmm4, %xmm9, %xmm5 -; AVX2-FAST-PERLANE-NEXT: vpunpckldq {{.*#+}} xmm5 = xmm5[0],xmm7[0],xmm5[1],xmm7[1] -; AVX2-FAST-PERLANE-NEXT: vpblendd {{.*#+}} xmm5 = xmm5[0,1],xmm6[2,3] -; AVX2-FAST-PERLANE-NEXT: vpblendd {{.*#+}} ymm1 = ymm5[0,1,2,3],ymm1[4,5,6,7] -; AVX2-FAST-PERLANE-NEXT: vmovdqu %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill -; AVX2-FAST-PERLANE-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm7 # 16-byte Reload -; AVX2-FAST-PERLANE-NEXT: vpshufb %xmm0, %xmm7, %xmm1 -; AVX2-FAST-PERLANE-NEXT: vpshufb %xmm0, %xmm13, %xmm5 -; AVX2-FAST-PERLANE-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm5[0],xmm1[0],xmm5[1],xmm1[1] -; AVX2-FAST-PERLANE-NEXT: vpshufb %xmm4, %xmm3, %xmm5 -; AVX2-FAST-PERLANE-NEXT: vpshufb %xmm4, %xmm2, %xmm6 -; AVX2-FAST-PERLANE-NEXT: vpunpckldq {{.*#+}} xmm5 = xmm6[0],xmm5[0],xmm6[1],xmm5[1] -; AVX2-FAST-PERLANE-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1 -; AVX2-FAST-PERLANE-NEXT: vinserti128 $1, %xmm5, %ymm0, %ymm5 -; AVX2-FAST-PERLANE-NEXT: vpblendd {{.*#+}} ymm1 = ymm5[0,1,2,3,4,5],ymm1[6,7] -; AVX2-FAST-PERLANE-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm6 # 16-byte Reload -; AVX2-FAST-PERLANE-NEXT: vpshufb %xmm0, %xmm6, %xmm5 -; AVX2-FAST-PERLANE-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm13 # 16-byte Reload -; AVX2-FAST-PERLANE-NEXT: vpshufb %xmm0, %xmm13, %xmm0 -; AVX2-FAST-PERLANE-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm5[0],xmm0[1],xmm5[1] -; AVX2-FAST-PERLANE-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm5 # 16-byte Reload -; AVX2-FAST-PERLANE-NEXT: vpshufb %xmm4, %xmm5, %xmm5 -; AVX2-FAST-PERLANE-NEXT: vpshufb %xmm4, %xmm10, %xmm4 -; AVX2-FAST-PERLANE-NEXT: vpunpckldq {{.*#+}} xmm4 = xmm4[0],xmm5[0],xmm4[1],xmm5[1] -; AVX2-FAST-PERLANE-NEXT: vpblendd {{.*#+}} xmm0 = xmm4[0,1],xmm0[2,3] -; AVX2-FAST-PERLANE-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] +; AVX2-FAST-PERLANE-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5],ymm2[6,7] +; AVX2-FAST-PERLANE-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7] ; AVX2-FAST-PERLANE-NEXT: vmovdqu %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill -; AVX2-FAST-PERLANE-NEXT: vpshufd {{.*#+}} xmm0 = xmm15[3,1,2,3] -; AVX2-FAST-PERLANE-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill -; AVX2-FAST-PERLANE-NEXT: vpshufd $231, {{[-0-9]+}}(%r{{[sb]}}p), %xmm2 # 16-byte Folded Reload -; AVX2-FAST-PERLANE-NEXT: # xmm2 = mem[3,1,2,3] +; AVX2-FAST-PERLANE-NEXT: vmovdqa (%rdi), %xmm2 ; AVX2-FAST-PERLANE-NEXT: vmovdqa %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill -; AVX2-FAST-PERLANE-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,1,2,0,4,5,6,7] -; AVX2-FAST-PERLANE-NEXT: vpshuflw {{.*#+}} xmm1 = xmm2[0,1,2,0,4,5,6,7] +; AVX2-FAST-PERLANE-NEXT: vmovdqa 16(%rdi), %xmm13 +; AVX2-FAST-PERLANE-NEXT: vmovdqa 32(%rdi), %xmm14 +; AVX2-FAST-PERLANE-NEXT: vmovdqa 48(%rdi), %xmm15 +; AVX2-FAST-PERLANE-NEXT: vmovdqa {{.*#+}} xmm5 = [0,1,2,3,2,3,10,11,8,9,10,11,12,13,14,15] +; AVX2-FAST-PERLANE-NEXT: vmovdqa 112(%rdi), %xmm11 +; AVX2-FAST-PERLANE-NEXT: vpshufb %xmm5, %xmm11, %xmm0 +; AVX2-FAST-PERLANE-NEXT: vmovdqa 96(%rdi), %xmm12 +; AVX2-FAST-PERLANE-NEXT: vpshufb %xmm5, %xmm12, %xmm1 ; AVX2-FAST-PERLANE-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] -; AVX2-FAST-PERLANE-NEXT: vpshufd {{.*#+}} xmm1 = xmm12[3,1,2,3] -; AVX2-FAST-PERLANE-NEXT: vmovdqa %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill -; AVX2-FAST-PERLANE-NEXT: vpshufd {{.*#+}} xmm4 = xmm14[3,1,2,3] +; AVX2-FAST-PERLANE-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 +; AVX2-FAST-PERLANE-NEXT: vmovdqa {{.*#+}} xmm6 = [2,3,10,11,8,9,10,11,8,9,10,11,12,13,14,15] +; AVX2-FAST-PERLANE-NEXT: vmovdqa 80(%rdi), %xmm10 +; AVX2-FAST-PERLANE-NEXT: vpshufb %xmm6, %xmm10, %xmm3 +; AVX2-FAST-PERLANE-NEXT: vmovdqa 64(%rdi), %xmm1 +; AVX2-FAST-PERLANE-NEXT: vpshufb %xmm6, %xmm1, %xmm4 +; AVX2-FAST-PERLANE-NEXT: vpunpckldq {{.*#+}} xmm3 = xmm4[0],xmm3[0],xmm4[1],xmm3[1] +; AVX2-FAST-PERLANE-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm3 +; AVX2-FAST-PERLANE-NEXT: vpblendd {{.*#+}} ymm0 = ymm3[0,1,2,3,4,5],ymm0[6,7] +; AVX2-FAST-PERLANE-NEXT: vpshufb %xmm5, %xmm15, %xmm3 +; AVX2-FAST-PERLANE-NEXT: vpshufb %xmm5, %xmm14, %xmm4 +; AVX2-FAST-PERLANE-NEXT: vpunpckldq {{.*#+}} xmm3 = xmm4[0],xmm3[0],xmm4[1],xmm3[1] +; AVX2-FAST-PERLANE-NEXT: vpshufb %xmm6, %xmm13, %xmm4 +; AVX2-FAST-PERLANE-NEXT: vpshufb %xmm6, %xmm2, %xmm7 +; AVX2-FAST-PERLANE-NEXT: vpunpckldq {{.*#+}} xmm4 = xmm7[0],xmm4[0],xmm7[1],xmm4[1] +; AVX2-FAST-PERLANE-NEXT: vpblendd {{.*#+}} xmm3 = xmm4[0,1],xmm3[2,3] +; AVX2-FAST-PERLANE-NEXT: vpblendd {{.*#+}} ymm0 = ymm3[0,1,2,3],ymm0[4,5,6,7] +; AVX2-FAST-PERLANE-NEXT: vmovdqu %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill +; AVX2-FAST-PERLANE-NEXT: vmovdqa 240(%rdi), %xmm0 +; AVX2-FAST-PERLANE-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX2-FAST-PERLANE-NEXT: vpshufb %xmm5, %xmm0, %xmm2 +; AVX2-FAST-PERLANE-NEXT: vmovdqa 224(%rdi), %xmm7 +; AVX2-FAST-PERLANE-NEXT: vpshufb %xmm5, %xmm7, %xmm3 +; AVX2-FAST-PERLANE-NEXT: vpunpckldq {{.*#+}} xmm8 = xmm3[0],xmm2[0],xmm3[1],xmm2[1] +; AVX2-FAST-PERLANE-NEXT: vmovdqa 208(%rdi), %xmm0 +; AVX2-FAST-PERLANE-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX2-FAST-PERLANE-NEXT: vpshufb %xmm6, %xmm0, %xmm9 +; AVX2-FAST-PERLANE-NEXT: vmovdqa 192(%rdi), %xmm3 +; AVX2-FAST-PERLANE-NEXT: vpshufb %xmm6, %xmm3, %xmm2 +; AVX2-FAST-PERLANE-NEXT: vpunpckldq {{.*#+}} xmm2 = xmm2[0],xmm9[0],xmm2[1],xmm9[1] +; AVX2-FAST-PERLANE-NEXT: vinserti128 $1, %xmm8, %ymm0, %ymm8 +; AVX2-FAST-PERLANE-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2 +; AVX2-FAST-PERLANE-NEXT: vpblendd {{.*#+}} ymm2 = ymm2[0,1,2,3,4,5],ymm8[6,7] +; AVX2-FAST-PERLANE-NEXT: vmovdqa 160(%rdi), %xmm0 +; AVX2-FAST-PERLANE-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX2-FAST-PERLANE-NEXT: vmovdqa 176(%rdi), %xmm4 +; AVX2-FAST-PERLANE-NEXT: vmovdqa %xmm4, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX2-FAST-PERLANE-NEXT: vpshufb %xmm5, %xmm4, %xmm9 +; AVX2-FAST-PERLANE-NEXT: vpshufb %xmm5, %xmm0, %xmm5 +; AVX2-FAST-PERLANE-NEXT: vpunpckldq {{.*#+}} xmm5 = xmm5[0],xmm9[0],xmm5[1],xmm9[1] +; AVX2-FAST-PERLANE-NEXT: vmovdqa 128(%rdi), %xmm4 ; AVX2-FAST-PERLANE-NEXT: vmovdqa %xmm4, (%rsp) # 16-byte Spill -; AVX2-FAST-PERLANE-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[2,0,2,3,4,5,6,7] -; AVX2-FAST-PERLANE-NEXT: vpshuflw {{.*#+}} xmm4 = xmm4[2,0,2,3,4,5,6,7] -; AVX2-FAST-PERLANE-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm4[0],xmm1[0],xmm4[1],xmm1[1] +; AVX2-FAST-PERLANE-NEXT: vmovdqa 144(%rdi), %xmm9 +; AVX2-FAST-PERLANE-NEXT: vpshufb %xmm6, %xmm9, %xmm0 +; AVX2-FAST-PERLANE-NEXT: vpshufb %xmm6, %xmm4, %xmm6 +; AVX2-FAST-PERLANE-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm6[0],xmm0[0],xmm6[1],xmm0[1] +; AVX2-FAST-PERLANE-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm5[2,3] +; AVX2-FAST-PERLANE-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm2[4,5,6,7] +; AVX2-FAST-PERLANE-NEXT: vmovdqu %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill +; AVX2-FAST-PERLANE-NEXT: vpshufd {{.*#+}} xmm0 = xmm11[3,1,2,3] +; AVX2-FAST-PERLANE-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX2-FAST-PERLANE-NEXT: vpshufd {{.*#+}} xmm4 = xmm12[3,1,2,3] +; AVX2-FAST-PERLANE-NEXT: vmovdqa %xmm4, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX2-FAST-PERLANE-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,1,2,0,4,5,6,7] +; AVX2-FAST-PERLANE-NEXT: vpshuflw {{.*#+}} xmm2 = xmm4[0,1,2,0,4,5,6,7] +; AVX2-FAST-PERLANE-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm2[0],xmm0[0],xmm2[1],xmm0[1] +; AVX2-FAST-PERLANE-NEXT: vpshufd {{.*#+}} xmm2 = xmm10[3,1,2,3] +; AVX2-FAST-PERLANE-NEXT: vmovdqa %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX2-FAST-PERLANE-NEXT: vpshufd {{.*#+}} xmm4 = xmm1[3,1,2,3] +; AVX2-FAST-PERLANE-NEXT: vmovdqa %xmm4, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVX2-FAST-PERLANE-NEXT: vpshuflw {{.*#+}} xmm1 = xmm2[2,0,2,3,4,5,6,7] +; AVX2-FAST-PERLANE-NEXT: vpshuflw {{.*#+}} xmm2 = xmm4[2,0,2,3,4,5,6,7] +; AVX2-FAST-PERLANE-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] ; AVX2-FAST-PERLANE-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 ; AVX2-FAST-PERLANE-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1 -; AVX2-FAST-PERLANE-NEXT: vpblendd {{.*#+}} ymm11 = ymm1[0,1,2,3,4,5],ymm0[6,7] -; AVX2-FAST-PERLANE-NEXT: vpshufd $231, {{[-0-9]+}}(%r{{[sb]}}p), %xmm10 # 16-byte Folded Reload -; AVX2-FAST-PERLANE-NEXT: # xmm10 = mem[3,1,2,3] -; AVX2-FAST-PERLANE-NEXT: vpshufd $231, {{[-0-9]+}}(%r{{[sb]}}p), %xmm14 # 16-byte Folded Reload -; AVX2-FAST-PERLANE-NEXT: # xmm14 = mem[3,1,2,3] -; AVX2-FAST-PERLANE-NEXT: vpshuflw {{.*#+}} xmm1 = xmm10[0,1,2,0,4,5,6,7] -; AVX2-FAST-PERLANE-NEXT: vpshuflw {{.*#+}} xmm4 = xmm14[0,1,2,0,4,5,6,7] -; AVX2-FAST-PERLANE-NEXT: vpunpckldq {{.*#+}} xmm12 = xmm4[0],xmm1[0],xmm4[1],xmm1[1] -; AVX2-FAST-PERLANE-NEXT: vpshufd {{.*#+}} xmm5 = xmm8[3,1,2,3] -; AVX2-FAST-PERLANE-NEXT: vpshufd {{.*#+}} xmm4 = xmm9[3,1,2,3] -; AVX2-FAST-PERLANE-NEXT: vpshuflw {{.*#+}} xmm0 = xmm5[2,0,2,3,4,5,6,7] -; AVX2-FAST-PERLANE-NEXT: vpshuflw {{.*#+}} xmm15 = xmm4[2,0,2,3,4,5,6,7] -; AVX2-FAST-PERLANE-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm15[0],xmm0[0],xmm15[1],xmm0[1] -; AVX2-FAST-PERLANE-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm12[2,3] -; AVX2-FAST-PERLANE-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm11[4,5,6,7] +; AVX2-FAST-PERLANE-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5],ymm0[6,7] +; AVX2-FAST-PERLANE-NEXT: vpshufd {{.*#+}} xmm15 = xmm15[3,1,2,3] +; AVX2-FAST-PERLANE-NEXT: vpshufd {{.*#+}} xmm14 = xmm14[3,1,2,3] +; AVX2-FAST-PERLANE-NEXT: vpshuflw {{.*#+}} xmm1 = xmm15[0,1,2,0,4,5,6,7] +; AVX2-FAST-PERLANE-NEXT: vpshuflw {{.*#+}} xmm2 = xmm14[0,1,2,0,4,5,6,7] +; AVX2-FAST-PERLANE-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] +; AVX2-FAST-PERLANE-NEXT: vpshufd {{.*#+}} xmm13 = xmm13[3,1,2,3] +; AVX2-FAST-PERLANE-NEXT: vpshufd $231, {{[-0-9]+}}(%r{{[sb]}}p), %xmm4 # 16-byte Folded Reload +; AVX2-FAST-PERLANE-NEXT: # xmm4 = mem[3,1,2,3] +; AVX2-FAST-PERLANE-NEXT: vpshuflw {{.*#+}} xmm2 = xmm13[2,0,2,3,4,5,6,7] +; AVX2-FAST-PERLANE-NEXT: vpshuflw {{.*#+}} xmm6 = xmm4[2,0,2,3,4,5,6,7] +; AVX2-FAST-PERLANE-NEXT: vpunpckldq {{.*#+}} xmm2 = xmm6[0],xmm2[0],xmm6[1],xmm2[1] +; AVX2-FAST-PERLANE-NEXT: vpblendd {{.*#+}} xmm1 = xmm2[0,1],xmm1[2,3] +; AVX2-FAST-PERLANE-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7] ; AVX2-FAST-PERLANE-NEXT: vmovdqu %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill +; AVX2-FAST-PERLANE-NEXT: vpshufd $231, {{[-0-9]+}}(%r{{[sb]}}p), %xmm11 # 16-byte Folded Reload +; AVX2-FAST-PERLANE-NEXT: # xmm11 = mem[3,1,2,3] ; AVX2-FAST-PERLANE-NEXT: vpshufd {{.*#+}} xmm8 = xmm7[3,1,2,3] -; AVX2-FAST-PERLANE-NEXT: vpshufd $231, {{[-0-9]+}}(%r{{[sb]}}p), %xmm9 # 16-byte Folded Reload -; AVX2-FAST-PERLANE-NEXT: # xmm9 = mem[3,1,2,3] -; AVX2-FAST-PERLANE-NEXT: vpshuflw {{.*#+}} xmm0 = xmm8[0,1,2,0,4,5,6,7] -; AVX2-FAST-PERLANE-NEXT: vpshuflw {{.*#+}} xmm2 = xmm9[0,1,2,0,4,5,6,7] -; AVX2-FAST-PERLANE-NEXT: vpunpckldq {{.*#+}} xmm11 = xmm2[0],xmm0[0],xmm2[1],xmm0[1] -; AVX2-FAST-PERLANE-NEXT: vpshufd {{.*#+}} xmm2 = xmm3[3,1,2,3] -; AVX2-FAST-PERLANE-NEXT: vpshufd $231, {{[-0-9]+}}(%r{{[sb]}}p), %xmm3 # 16-byte Folded Reload -; AVX2-FAST-PERLANE-NEXT: # xmm3 = mem[3,1,2,3] -; AVX2-FAST-PERLANE-NEXT: vpshuflw {{.*#+}} xmm0 = xmm2[2,0,2,3,4,5,6,7] +; AVX2-FAST-PERLANE-NEXT: vpshuflw {{.*#+}} xmm0 = xmm11[0,1,2,0,4,5,6,7] +; AVX2-FAST-PERLANE-NEXT: vpshuflw {{.*#+}} xmm2 = xmm8[0,1,2,0,4,5,6,7] +; AVX2-FAST-PERLANE-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm2[0],xmm0[0],xmm2[1],xmm0[1] +; AVX2-FAST-PERLANE-NEXT: vpshufd $231, {{[-0-9]+}}(%r{{[sb]}}p), %xmm2 # 16-byte Folded Reload +; AVX2-FAST-PERLANE-NEXT: # xmm2 = mem[3,1,2,3] +; AVX2-FAST-PERLANE-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[3,1,2,3] +; AVX2-FAST-PERLANE-NEXT: vpshuflw {{.*#+}} xmm6 = xmm2[2,0,2,3,4,5,6,7] ; AVX2-FAST-PERLANE-NEXT: vpshuflw {{.*#+}} xmm7 = xmm3[2,0,2,3,4,5,6,7] -; AVX2-FAST-PERLANE-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm7[0],xmm0[0],xmm7[1],xmm0[1] -; AVX2-FAST-PERLANE-NEXT: vinserti128 $1, %xmm11, %ymm0, %ymm7 +; AVX2-FAST-PERLANE-NEXT: vpunpckldq {{.*#+}} xmm6 = xmm7[0],xmm6[0],xmm7[1],xmm6[1] ; AVX2-FAST-PERLANE-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 -; AVX2-FAST-PERLANE-NEXT: vpblendd {{.*#+}} ymm7 = ymm0[0,1,2,3,4,5],ymm7[6,7] -; AVX2-FAST-PERLANE-NEXT: vpshufd {{.*#+}} xmm11 = xmm6[3,1,2,3] -; AVX2-FAST-PERLANE-NEXT: vpshufd {{.*#+}} xmm15 = xmm13[3,1,2,3] -; AVX2-FAST-PERLANE-NEXT: vpshuflw {{.*#+}} xmm0 = xmm11[0,1,2,0,4,5,6,7] -; AVX2-FAST-PERLANE-NEXT: vpshuflw {{.*#+}} xmm6 = xmm15[0,1,2,0,4,5,6,7] -; AVX2-FAST-PERLANE-NEXT: vpunpckldq {{.*#+}} xmm6 = xmm6[0],xmm0[0],xmm6[1],xmm0[1] -; AVX2-FAST-PERLANE-NEXT: vpshufd $231, {{[-0-9]+}}(%r{{[sb]}}p), %xmm12 # 16-byte Folded Reload -; AVX2-FAST-PERLANE-NEXT: # xmm12 = mem[3,1,2,3] -; AVX2-FAST-PERLANE-NEXT: vpshufd $231, {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload +; AVX2-FAST-PERLANE-NEXT: vinserti128 $1, %xmm6, %ymm0, %ymm6 +; AVX2-FAST-PERLANE-NEXT: vpblendd {{.*#+}} ymm6 = ymm6[0,1,2,3,4,5],ymm0[6,7] +; AVX2-FAST-PERLANE-NEXT: vpshufd $231, {{[-0-9]+}}(%r{{[sb]}}p), %xmm7 # 16-byte Folded Reload +; AVX2-FAST-PERLANE-NEXT: # xmm7 = mem[3,1,2,3] +; AVX2-FAST-PERLANE-NEXT: vpshufd $231, {{[-0-9]+}}(%r{{[sb]}}p), %xmm10 # 16-byte Folded Reload +; AVX2-FAST-PERLANE-NEXT: # xmm10 = mem[3,1,2,3] +; AVX2-FAST-PERLANE-NEXT: vpshuflw {{.*#+}} xmm0 = xmm7[0,1,2,0,4,5,6,7] +; AVX2-FAST-PERLANE-NEXT: vpshuflw {{.*#+}} xmm5 = xmm10[0,1,2,0,4,5,6,7] +; AVX2-FAST-PERLANE-NEXT: vpunpckldq {{.*#+}} xmm5 = xmm5[0],xmm0[0],xmm5[1],xmm0[1] +; AVX2-FAST-PERLANE-NEXT: vpshufd {{.*#+}} xmm9 = xmm9[3,1,2,3] +; AVX2-FAST-PERLANE-NEXT: vpshufd $231, (%rsp), %xmm1 # 16-byte Folded Reload ; AVX2-FAST-PERLANE-NEXT: # xmm1 = mem[3,1,2,3] -; AVX2-FAST-PERLANE-NEXT: vpshuflw {{.*#+}} xmm0 = xmm12[2,0,2,3,4,5,6,7] -; AVX2-FAST-PERLANE-NEXT: vpshuflw {{.*#+}} xmm13 = xmm1[2,0,2,3,4,5,6,7] -; AVX2-FAST-PERLANE-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm13[0],xmm0[0],xmm13[1],xmm0[1] -; AVX2-FAST-PERLANE-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm6[2,3] -; AVX2-FAST-PERLANE-NEXT: vpblendd {{.*#+}} ymm13 = ymm0[0,1,2,3],ymm7[4,5,6,7] +; AVX2-FAST-PERLANE-NEXT: vpshuflw {{.*#+}} xmm0 = xmm9[2,0,2,3,4,5,6,7] +; AVX2-FAST-PERLANE-NEXT: vpshuflw {{.*#+}} xmm12 = xmm1[2,0,2,3,4,5,6,7] +; AVX2-FAST-PERLANE-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm12[0],xmm0[0],xmm12[1],xmm0[1] +; AVX2-FAST-PERLANE-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm5[2,3] +; AVX2-FAST-PERLANE-NEXT: vpblendd {{.*#+}} ymm12 = ymm0[0,1,2,3],ymm6[4,5,6,7] ; AVX2-FAST-PERLANE-NEXT: vpshuflw $116, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload ; AVX2-FAST-PERLANE-NEXT: # xmm0 = mem[0,1,3,1,4,5,6,7] ; AVX2-FAST-PERLANE-NEXT: vpshuflw $116, {{[-0-9]+}}(%r{{[sb]}}p), %xmm6 # 16-byte Folded Reload @@ -2042,22 +2013,22 @@ define void @vf32(<128 x i16>* %in.vec, <32 x i16>* %out.vec0, <32 x i16>* %out. ; AVX2-FAST-PERLANE-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm6[0],xmm0[0],xmm6[1],xmm0[1] ; AVX2-FAST-PERLANE-NEXT: vpshuflw $231, {{[-0-9]+}}(%r{{[sb]}}p), %xmm6 # 16-byte Folded Reload ; AVX2-FAST-PERLANE-NEXT: # xmm6 = mem[3,1,2,3,4,5,6,7] -; AVX2-FAST-PERLANE-NEXT: vpshuflw $231, (%rsp), %xmm7 # 16-byte Folded Reload -; AVX2-FAST-PERLANE-NEXT: # xmm7 = mem[3,1,2,3,4,5,6,7] -; AVX2-FAST-PERLANE-NEXT: vpunpckldq {{.*#+}} xmm6 = xmm7[0],xmm6[0],xmm7[1],xmm6[1] +; AVX2-FAST-PERLANE-NEXT: vpshuflw $231, {{[-0-9]+}}(%r{{[sb]}}p), %xmm5 # 16-byte Folded Reload +; AVX2-FAST-PERLANE-NEXT: # xmm5 = mem[3,1,2,3,4,5,6,7] +; AVX2-FAST-PERLANE-NEXT: vpunpckldq {{.*#+}} xmm5 = xmm5[0],xmm6[0],xmm5[1],xmm6[1] ; AVX2-FAST-PERLANE-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 -; AVX2-FAST-PERLANE-NEXT: vinserti128 $1, %xmm6, %ymm0, %ymm6 -; AVX2-FAST-PERLANE-NEXT: vpblendd {{.*#+}} ymm0 = ymm6[0,1,2,3,4,5],ymm0[6,7] -; AVX2-FAST-PERLANE-NEXT: vpshuflw {{.*#+}} xmm6 = xmm10[0,1,3,1,4,5,6,7] -; AVX2-FAST-PERLANE-NEXT: vpshuflw {{.*#+}} xmm7 = xmm14[0,1,3,1,4,5,6,7] -; AVX2-FAST-PERLANE-NEXT: vpunpckldq {{.*#+}} xmm6 = xmm7[0],xmm6[0],xmm7[1],xmm6[1] -; AVX2-FAST-PERLANE-NEXT: vpshuflw {{.*#+}} xmm5 = xmm5[3,1,2,3,4,5,6,7] +; AVX2-FAST-PERLANE-NEXT: vinserti128 $1, %xmm5, %ymm0, %ymm5 +; AVX2-FAST-PERLANE-NEXT: vpblendd {{.*#+}} ymm0 = ymm5[0,1,2,3,4,5],ymm0[6,7] +; AVX2-FAST-PERLANE-NEXT: vpshuflw {{.*#+}} xmm5 = xmm15[0,1,3,1,4,5,6,7] +; AVX2-FAST-PERLANE-NEXT: vpshuflw {{.*#+}} xmm6 = xmm14[0,1,3,1,4,5,6,7] +; AVX2-FAST-PERLANE-NEXT: vpunpckldq {{.*#+}} xmm5 = xmm6[0],xmm5[0],xmm6[1],xmm5[1] +; AVX2-FAST-PERLANE-NEXT: vpshuflw {{.*#+}} xmm6 = xmm13[3,1,2,3,4,5,6,7] ; AVX2-FAST-PERLANE-NEXT: vpshuflw {{.*#+}} xmm4 = xmm4[3,1,2,3,4,5,6,7] -; AVX2-FAST-PERLANE-NEXT: vpunpckldq {{.*#+}} xmm4 = xmm4[0],xmm5[0],xmm4[1],xmm5[1] -; AVX2-FAST-PERLANE-NEXT: vpblendd {{.*#+}} xmm4 = xmm4[0,1],xmm6[2,3] +; AVX2-FAST-PERLANE-NEXT: vpunpckldq {{.*#+}} xmm4 = xmm4[0],xmm6[0],xmm4[1],xmm6[1] +; AVX2-FAST-PERLANE-NEXT: vpblendd {{.*#+}} xmm4 = xmm4[0,1],xmm5[2,3] ; AVX2-FAST-PERLANE-NEXT: vpblendd {{.*#+}} ymm0 = ymm4[0,1,2,3],ymm0[4,5,6,7] -; AVX2-FAST-PERLANE-NEXT: vpshuflw {{.*#+}} xmm4 = xmm8[0,1,3,1,4,5,6,7] -; AVX2-FAST-PERLANE-NEXT: vpshuflw {{.*#+}} xmm5 = xmm9[0,1,3,1,4,5,6,7] +; AVX2-FAST-PERLANE-NEXT: vpshuflw {{.*#+}} xmm4 = xmm11[0,1,3,1,4,5,6,7] +; AVX2-FAST-PERLANE-NEXT: vpshuflw {{.*#+}} xmm5 = xmm8[0,1,3,1,4,5,6,7] ; AVX2-FAST-PERLANE-NEXT: vpunpckldq {{.*#+}} xmm4 = xmm5[0],xmm4[0],xmm5[1],xmm4[1] ; AVX2-FAST-PERLANE-NEXT: vpshuflw {{.*#+}} xmm2 = xmm2[3,1,2,3,4,5,6,7] ; AVX2-FAST-PERLANE-NEXT: vpshuflw {{.*#+}} xmm3 = xmm3[3,1,2,3,4,5,6,7] @@ -2065,10 +2036,10 @@ define void @vf32(<128 x i16>* %in.vec, <32 x i16>* %out.vec0, <32 x i16>* %out. ; AVX2-FAST-PERLANE-NEXT: vinserti128 $1, %xmm4, %ymm0, %ymm3 ; AVX2-FAST-PERLANE-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2 ; AVX2-FAST-PERLANE-NEXT: vpblendd {{.*#+}} ymm2 = ymm2[0,1,2,3,4,5],ymm3[6,7] -; AVX2-FAST-PERLANE-NEXT: vpshuflw {{.*#+}} xmm3 = xmm11[0,1,3,1,4,5,6,7] -; AVX2-FAST-PERLANE-NEXT: vpshuflw {{.*#+}} xmm4 = xmm15[0,1,3,1,4,5,6,7] +; AVX2-FAST-PERLANE-NEXT: vpshuflw {{.*#+}} xmm3 = xmm7[0,1,3,1,4,5,6,7] +; AVX2-FAST-PERLANE-NEXT: vpshuflw {{.*#+}} xmm4 = xmm10[0,1,3,1,4,5,6,7] ; AVX2-FAST-PERLANE-NEXT: vpunpckldq {{.*#+}} xmm3 = xmm4[0],xmm3[0],xmm4[1],xmm3[1] -; AVX2-FAST-PERLANE-NEXT: vpshuflw {{.*#+}} xmm4 = xmm12[3,1,2,3,4,5,6,7] +; AVX2-FAST-PERLANE-NEXT: vpshuflw {{.*#+}} xmm4 = xmm9[3,1,2,3,4,5,6,7] ; AVX2-FAST-PERLANE-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[3,1,2,3,4,5,6,7] ; AVX2-FAST-PERLANE-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1] ; AVX2-FAST-PERLANE-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3] @@ -2081,12 +2052,12 @@ define void @vf32(<128 x i16>* %in.vec, <32 x i16>* %out.vec0, <32 x i16>* %out. ; AVX2-FAST-PERLANE-NEXT: vmovaps %ymm2, 32(%rdx) ; AVX2-FAST-PERLANE-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm2 # 32-byte Reload ; AVX2-FAST-PERLANE-NEXT: vmovaps %ymm2, (%rdx) -; AVX2-FAST-PERLANE-NEXT: vmovdqa %ymm13, 32(%rcx) +; AVX2-FAST-PERLANE-NEXT: vmovdqa %ymm12, 32(%rcx) ; AVX2-FAST-PERLANE-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm2 # 32-byte Reload ; AVX2-FAST-PERLANE-NEXT: vmovaps %ymm2, (%rcx) ; AVX2-FAST-PERLANE-NEXT: vmovdqa %ymm1, 32(%r8) ; AVX2-FAST-PERLANE-NEXT: vmovdqa %ymm0, (%r8) -; AVX2-FAST-PERLANE-NEXT: addq $248, %rsp +; AVX2-FAST-PERLANE-NEXT: addq $184, %rsp ; AVX2-FAST-PERLANE-NEXT: vzeroupper ; AVX2-FAST-PERLANE-NEXT: retq ; diff --git a/llvm/test/CodeGen/X86/vector-interleaved-load-i32-stride-2.ll b/llvm/test/CodeGen/X86/vector-interleaved-load-i32-stride-2.ll index 823b2d70a7031..5dcf66c08a1bd 100644 --- a/llvm/test/CodeGen/X86/vector-interleaved-load-i32-stride-2.ll +++ b/llvm/test/CodeGen/X86/vector-interleaved-load-i32-stride-2.ll @@ -110,12 +110,11 @@ define void @load_i32_stride2_vf8(<16 x i32>* %in.vec, <8 x i32>* %out.vec0, <8 ; AVX1-LABEL: load_i32_stride2_vf8: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovaps (%rdi), %ymm0 -; AVX1-NEXT: vmovaps 32(%rdi), %ymm1 -; AVX1-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm0[2,3],ymm1[2,3] -; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[0,1],ymm1[0,1] -; AVX1-NEXT: vshufps {{.*#+}} ymm1 = ymm0[0,2],ymm2[0,2],ymm0[4,6],ymm2[4,6] -; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[1,3],ymm2[1,3],ymm0[5,7],ymm2[5,7] -; AVX1-NEXT: vmovaps %ymm1, (%rsi) +; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3],mem[2,3] +; AVX1-NEXT: vinsertf128 $1, 32(%rdi), %ymm0, %ymm0 +; AVX1-NEXT: vshufps {{.*#+}} ymm2 = ymm0[0,2],ymm1[0,2],ymm0[4,6],ymm1[4,6] +; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[1,3],ymm1[1,3],ymm0[5,7],ymm1[5,7] +; AVX1-NEXT: vmovaps %ymm2, (%rsi) ; AVX1-NEXT: vmovaps %ymm0, (%rdx) ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq @@ -190,20 +189,18 @@ define void @load_i32_stride2_vf16(<32 x i32>* %in.vec, <16 x i32>* %out.vec0, < ; AVX1-LABEL: load_i32_stride2_vf16: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovaps (%rdi), %ymm0 -; AVX1-NEXT: vmovaps 32(%rdi), %ymm1 -; AVX1-NEXT: vmovaps 64(%rdi), %ymm2 -; AVX1-NEXT: vmovaps 96(%rdi), %ymm3 -; AVX1-NEXT: vperm2f128 {{.*#+}} ymm4 = ymm0[2,3],ymm1[2,3] -; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[0,1],ymm1[0,1] -; AVX1-NEXT: vshufps {{.*#+}} ymm1 = ymm0[0,2],ymm4[0,2],ymm0[4,6],ymm4[4,6] -; AVX1-NEXT: vperm2f128 {{.*#+}} ymm5 = ymm2[2,3],ymm3[2,3] -; AVX1-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm2[0,1],ymm3[0,1] -; AVX1-NEXT: vshufps {{.*#+}} ymm3 = ymm2[0,2],ymm5[0,2],ymm2[4,6],ymm5[4,6] -; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[1,3],ymm4[1,3],ymm0[5,7],ymm4[5,7] -; AVX1-NEXT: vshufps {{.*#+}} ymm2 = ymm2[1,3],ymm5[1,3],ymm2[5,7],ymm5[5,7] -; AVX1-NEXT: vmovaps %ymm3, 32(%rsi) -; AVX1-NEXT: vmovaps %ymm1, (%rsi) -; AVX1-NEXT: vmovaps %ymm2, 32(%rdx) +; AVX1-NEXT: vmovaps 64(%rdi), %ymm1 +; AVX1-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm0[2,3],mem[2,3] +; AVX1-NEXT: vinsertf128 $1, 32(%rdi), %ymm0, %ymm0 +; AVX1-NEXT: vshufps {{.*#+}} ymm3 = ymm0[0,2],ymm2[0,2],ymm0[4,6],ymm2[4,6] +; AVX1-NEXT: vperm2f128 {{.*#+}} ymm4 = ymm1[2,3],mem[2,3] +; AVX1-NEXT: vinsertf128 $1, 96(%rdi), %ymm1, %ymm1 +; AVX1-NEXT: vshufps {{.*#+}} ymm5 = ymm1[0,2],ymm4[0,2],ymm1[4,6],ymm4[4,6] +; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[1,3],ymm2[1,3],ymm0[5,7],ymm2[5,7] +; AVX1-NEXT: vshufps {{.*#+}} ymm1 = ymm1[1,3],ymm4[1,3],ymm1[5,7],ymm4[5,7] +; AVX1-NEXT: vmovaps %ymm5, 32(%rsi) +; AVX1-NEXT: vmovaps %ymm3, (%rsi) +; AVX1-NEXT: vmovaps %ymm1, 32(%rdx) ; AVX1-NEXT: vmovaps %ymm0, (%rdx) ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq @@ -318,37 +315,33 @@ define void @load_i32_stride2_vf32(<64 x i32>* %in.vec, <32 x i32>* %out.vec0, < ; AVX1-LABEL: load_i32_stride2_vf32: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovaps (%rdi), %ymm0 -; AVX1-NEXT: vmovaps 32(%rdi), %ymm1 -; AVX1-NEXT: vmovaps 64(%rdi), %ymm2 -; AVX1-NEXT: vmovaps 96(%rdi), %ymm3 -; AVX1-NEXT: vmovaps 224(%rdi), %ymm4 -; AVX1-NEXT: vmovaps 192(%rdi), %ymm5 -; AVX1-NEXT: vmovaps 160(%rdi), %ymm6 -; AVX1-NEXT: vmovaps 128(%rdi), %ymm7 -; AVX1-NEXT: vperm2f128 {{.*#+}} ymm8 = ymm7[2,3],ymm6[2,3] -; AVX1-NEXT: vperm2f128 {{.*#+}} ymm6 = ymm7[0,1],ymm6[0,1] -; AVX1-NEXT: vshufps {{.*#+}} ymm7 = ymm6[0,2],ymm8[0,2],ymm6[4,6],ymm8[4,6] -; AVX1-NEXT: vperm2f128 {{.*#+}} ymm9 = ymm5[2,3],ymm4[2,3] -; AVX1-NEXT: vperm2f128 {{.*#+}} ymm4 = ymm5[0,1],ymm4[0,1] -; AVX1-NEXT: vshufps {{.*#+}} ymm5 = ymm4[0,2],ymm9[0,2],ymm4[4,6],ymm9[4,6] -; AVX1-NEXT: vperm2f128 {{.*#+}} ymm10 = ymm2[2,3],ymm3[2,3] -; AVX1-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm2[0,1],ymm3[0,1] -; AVX1-NEXT: vshufps {{.*#+}} ymm3 = ymm2[0,2],ymm10[0,2],ymm2[4,6],ymm10[4,6] -; AVX1-NEXT: vperm2f128 {{.*#+}} ymm11 = ymm0[2,3],ymm1[2,3] -; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[0,1],ymm1[0,1] -; AVX1-NEXT: vshufps {{.*#+}} ymm1 = ymm0[0,2],ymm11[0,2],ymm0[4,6],ymm11[4,6] -; AVX1-NEXT: vshufps {{.*#+}} ymm4 = ymm4[1,3],ymm9[1,3],ymm4[5,7],ymm9[5,7] -; AVX1-NEXT: vshufps {{.*#+}} ymm6 = ymm6[1,3],ymm8[1,3],ymm6[5,7],ymm8[5,7] -; AVX1-NEXT: vshufps {{.*#+}} ymm2 = ymm2[1,3],ymm10[1,3],ymm2[5,7],ymm10[5,7] -; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[1,3],ymm11[1,3],ymm0[5,7],ymm11[5,7] -; AVX1-NEXT: vmovaps %ymm5, 96(%rsi) -; AVX1-NEXT: vmovaps %ymm1, (%rsi) -; AVX1-NEXT: vmovaps %ymm3, 32(%rsi) -; AVX1-NEXT: vmovaps %ymm7, 64(%rsi) -; AVX1-NEXT: vmovaps %ymm6, 64(%rdx) -; AVX1-NEXT: vmovaps %ymm4, 96(%rdx) +; AVX1-NEXT: vmovaps 64(%rdi), %ymm1 +; AVX1-NEXT: vmovaps 128(%rdi), %ymm2 +; AVX1-NEXT: vmovaps 192(%rdi), %ymm3 +; AVX1-NEXT: vperm2f128 {{.*#+}} ymm4 = ymm2[2,3],mem[2,3] +; AVX1-NEXT: vinsertf128 $1, 160(%rdi), %ymm2, %ymm2 +; AVX1-NEXT: vshufps {{.*#+}} ymm5 = ymm2[0,2],ymm4[0,2],ymm2[4,6],ymm4[4,6] +; AVX1-NEXT: vperm2f128 {{.*#+}} ymm6 = ymm1[2,3],mem[2,3] +; AVX1-NEXT: vinsertf128 $1, 96(%rdi), %ymm1, %ymm1 +; AVX1-NEXT: vshufps {{.*#+}} ymm7 = ymm1[0,2],ymm6[0,2],ymm1[4,6],ymm6[4,6] +; AVX1-NEXT: vperm2f128 {{.*#+}} ymm8 = ymm0[2,3],mem[2,3] +; AVX1-NEXT: vinsertf128 $1, 32(%rdi), %ymm0, %ymm0 +; AVX1-NEXT: vshufps {{.*#+}} ymm9 = ymm0[0,2],ymm8[0,2],ymm0[4,6],ymm8[4,6] +; AVX1-NEXT: vperm2f128 {{.*#+}} ymm10 = ymm3[2,3],mem[2,3] +; AVX1-NEXT: vinsertf128 $1, 224(%rdi), %ymm3, %ymm3 +; AVX1-NEXT: vshufps {{.*#+}} ymm11 = ymm3[0,2],ymm10[0,2],ymm3[4,6],ymm10[4,6] +; AVX1-NEXT: vshufps {{.*#+}} ymm1 = ymm1[1,3],ymm6[1,3],ymm1[5,7],ymm6[5,7] +; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[1,3],ymm8[1,3],ymm0[5,7],ymm8[5,7] +; AVX1-NEXT: vshufps {{.*#+}} ymm3 = ymm3[1,3],ymm10[1,3],ymm3[5,7],ymm10[5,7] +; AVX1-NEXT: vshufps {{.*#+}} ymm2 = ymm2[1,3],ymm4[1,3],ymm2[5,7],ymm4[5,7] +; AVX1-NEXT: vmovaps %ymm11, 96(%rsi) +; AVX1-NEXT: vmovaps %ymm9, (%rsi) +; AVX1-NEXT: vmovaps %ymm7, 32(%rsi) +; AVX1-NEXT: vmovaps %ymm5, 64(%rsi) +; AVX1-NEXT: vmovaps %ymm2, 64(%rdx) +; AVX1-NEXT: vmovaps %ymm3, 96(%rdx) ; AVX1-NEXT: vmovaps %ymm0, (%rdx) -; AVX1-NEXT: vmovaps %ymm2, 32(%rdx) +; AVX1-NEXT: vmovaps %ymm1, 32(%rdx) ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq ; diff --git a/llvm/test/CodeGen/X86/vector-interleaved-load-i32-stride-6.ll b/llvm/test/CodeGen/X86/vector-interleaved-load-i32-stride-6.ll index ea4a61fd5852a..2a48147bbb399 100644 --- a/llvm/test/CodeGen/X86/vector-interleaved-load-i32-stride-6.ll +++ b/llvm/test/CodeGen/X86/vector-interleaved-load-i32-stride-6.ll @@ -483,52 +483,52 @@ define void @load_i32_stride6_vf8(<48 x i32>* %in.vec, <8 x i32>* %out.vec0, <8 ; AVX1-LABEL: load_i32_stride6_vf8: ; AVX1: # %bb.0: ; AVX1-NEXT: movq {{[0-9]+}}(%rsp), %rax +; AVX1-NEXT: vmovaps 128(%rdi), %ymm11 +; AVX1-NEXT: vmovaps 160(%rdi), %ymm12 +; AVX1-NEXT: vmovaps 32(%rdi), %ymm9 +; AVX1-NEXT: vmovaps (%rdi), %ymm10 ; AVX1-NEXT: vmovaps 96(%rdi), %ymm0 ; AVX1-NEXT: vmovaps 64(%rdi), %ymm1 -; AVX1-NEXT: vmovaps 32(%rdi), %ymm9 -; AVX1-NEXT: vmovaps (%rdi), %ymm11 -; AVX1-NEXT: vmovaps 128(%rdi), %ymm6 -; AVX1-NEXT: vmovaps 160(%rdi), %ymm7 -; AVX1-NEXT: vblendps {{.*#+}} ymm3 = ymm7[0,1,2,3],ymm6[4,5],ymm7[6,7] -; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm2 -; AVX1-NEXT: vblendps {{.*#+}} xmm4 = xmm2[0,1],xmm3[2,3] -; AVX1-NEXT: vpermilps {{.*#+}} xmm4 = xmm4[0,1,0,2] -; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm0, %ymm8 -; AVX1-NEXT: vblendps {{.*#+}} ymm10 = ymm11[0,1,2,3],ymm9[4,5],ymm11[6,7] -; AVX1-NEXT: vextractf128 $1, %ymm10, %xmm5 -; AVX1-NEXT: vblendps {{.*#+}} xmm4 = xmm10[0,1],xmm5[2,3] -; AVX1-NEXT: vshufps {{.*#+}} xmm4 = xmm4[0,2],xmm5[0,3] -; AVX1-NEXT: vperm2f128 {{.*#+}} ymm12 = ymm1[0,1],ymm0[0,1] -; AVX1-NEXT: vshufps {{.*#+}} ymm13 = ymm0[2,0],ymm12[0,0],ymm0[6,4],ymm12[4,4] -; AVX1-NEXT: vshufps {{.*#+}} ymm13 = ymm13[2,0],ymm1[2,2],ymm13[6,4],ymm1[6,6] -; AVX1-NEXT: vblendps {{.*#+}} ymm4 = ymm4[0,1,2],ymm13[3,4,5],ymm4[6,7] -; AVX1-NEXT: vblendps {{.*#+}} ymm8 = ymm4[0,1,2,3,4,5],ymm8[6,7] -; AVX1-NEXT: vinsertps {{.*#+}} xmm2 = zero,zero,xmm2[1],xmm3[3] -; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm2 -; AVX1-NEXT: vshufps {{.*#+}} xmm3 = xmm10[1,0],xmm5[3,0] -; AVX1-NEXT: vshufps {{.*#+}} xmm3 = xmm3[0,2],xmm5[1,3] -; AVX1-NEXT: vshufps {{.*#+}} ymm4 = ymm0[3,0],ymm12[1,0],ymm0[7,4],ymm12[5,4] -; AVX1-NEXT: vshufps {{.*#+}} ymm4 = ymm4[2,0],ymm1[2,3],ymm4[6,4],ymm1[6,7] -; AVX1-NEXT: vblendps {{.*#+}} ymm3 = ymm3[0,1,2],ymm4[3,4,5],ymm3[6,7] -; AVX1-NEXT: vblendps {{.*#+}} ymm12 = ymm3[0,1,2,3,4,5],ymm2[6,7] -; AVX1-NEXT: vblendps {{.*#+}} ymm2 = ymm6[0,1,2,3],ymm7[4,5],ymm6[6,7] -; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm5 -; AVX1-NEXT: vshufps {{.*#+}} xmm4 = xmm2[0,0],xmm5[2,0] +; AVX1-NEXT: vinsertf128 $1, 96(%rdi), %ymm1, %ymm3 +; AVX1-NEXT: vshufps {{.*#+}} ymm2 = ymm0[2,0],ymm3[0,0],ymm0[6,4],ymm3[4,4] +; AVX1-NEXT: vshufps {{.*#+}} ymm2 = ymm2[2,0],ymm3[2,2],ymm2[6,4],ymm3[6,6] +; AVX1-NEXT: vblendps {{.*#+}} ymm8 = ymm10[0,1,2,3],ymm9[4,5],ymm10[6,7] +; AVX1-NEXT: vextractf128 $1, %ymm8, %xmm4 +; AVX1-NEXT: vblendps {{.*#+}} xmm5 = xmm8[0,1],xmm4[2,3] +; AVX1-NEXT: vshufps {{.*#+}} xmm5 = xmm5[0,2],xmm4[0,3] +; AVX1-NEXT: vblendps {{.*#+}} ymm2 = ymm5[0,1,2],ymm2[3,4,5],ymm5[6,7] +; AVX1-NEXT: vblendps {{.*#+}} ymm5 = ymm12[0,1,2,3],ymm11[4,5],ymm12[6,7] +; AVX1-NEXT: vextractf128 $1, %ymm5, %xmm6 +; AVX1-NEXT: vblendps {{.*#+}} xmm7 = xmm6[0,1],xmm5[2,3] +; AVX1-NEXT: vpermilps {{.*#+}} xmm7 = xmm7[0,1,0,2] +; AVX1-NEXT: vinsertf128 $1, %xmm7, %ymm0, %ymm7 +; AVX1-NEXT: vblendps {{.*#+}} ymm13 = ymm2[0,1,2,3,4,5],ymm7[6,7] +; AVX1-NEXT: vshufps {{.*#+}} ymm7 = ymm0[3,0],ymm3[1,0],ymm0[7,4],ymm3[5,4] +; AVX1-NEXT: vshufps {{.*#+}} ymm3 = ymm7[2,0],ymm3[2,3],ymm7[6,4],ymm3[6,7] +; AVX1-NEXT: vshufps {{.*#+}} xmm7 = xmm8[1,0],xmm4[3,0] +; AVX1-NEXT: vshufps {{.*#+}} xmm4 = xmm7[0,2],xmm4[1,3] +; AVX1-NEXT: vblendps {{.*#+}} ymm3 = ymm4[0,1,2],ymm3[3,4,5],ymm4[6,7] +; AVX1-NEXT: vinsertps {{.*#+}} xmm4 = zero,zero,xmm6[1],xmm5[3] ; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm0, %ymm4 -; AVX1-NEXT: vblendps {{.*#+}} ymm6 = ymm9[0,1],ymm11[2,3],ymm9[4,5,6,7] -; AVX1-NEXT: vextractf128 $1, %ymm6, %xmm7 -; AVX1-NEXT: vshufps {{.*#+}} xmm9 = xmm6[2,0],xmm7[2,3] -; AVX1-NEXT: vshufps {{.*#+}} ymm10 = ymm0[2,1],ymm1[2,0],ymm0[6,5],ymm1[6,4] -; AVX1-NEXT: vperm2f128 {{.*#+}} ymm10 = ymm10[2,3,0,1] -; AVX1-NEXT: vblendps {{.*#+}} ymm9 = ymm9[0,1,2],ymm10[3,4],ymm9[5,6,7] -; AVX1-NEXT: vblendps {{.*#+}} ymm9 = ymm9[0,1,2,3,4],ymm4[5,6,7] -; AVX1-NEXT: vshufps {{.*#+}} xmm2 = xmm2[0,1],xmm5[3,1] -; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm2 -; AVX1-NEXT: vshufps {{.*#+}} xmm5 = xmm6[3,1],xmm7[3,3] +; AVX1-NEXT: vblendps {{.*#+}} ymm14 = ymm3[0,1,2,3,4,5],ymm4[6,7] +; AVX1-NEXT: vblendps {{.*#+}} ymm5 = ymm11[0,1,2,3],ymm12[4,5],ymm11[6,7] +; AVX1-NEXT: vextractf128 $1, %ymm5, %xmm6 +; AVX1-NEXT: vshufps {{.*#+}} xmm4 = xmm5[0,0],xmm6[2,0] +; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm0, %ymm4 +; AVX1-NEXT: vblendps {{.*#+}} ymm7 = ymm9[0,1],ymm10[2,3],ymm9[4,5,6,7] +; AVX1-NEXT: vextractf128 $1, %ymm7, %xmm2 +; AVX1-NEXT: vshufps {{.*#+}} xmm8 = xmm7[2,0],xmm2[2,3] +; AVX1-NEXT: vshufps {{.*#+}} ymm9 = ymm0[2,1],ymm1[2,0],ymm0[6,5],ymm1[6,4] +; AVX1-NEXT: vperm2f128 {{.*#+}} ymm9 = ymm9[2,3,0,1] +; AVX1-NEXT: vblendps {{.*#+}} ymm8 = ymm8[0,1,2],ymm9[3,4],ymm8[5,6,7] +; AVX1-NEXT: vblendps {{.*#+}} ymm8 = ymm8[0,1,2,3,4],ymm4[5,6,7] +; AVX1-NEXT: vshufps {{.*#+}} xmm5 = xmm5[0,1],xmm6[3,1] +; AVX1-NEXT: vinsertf128 $1, %xmm5, %ymm0, %ymm5 +; AVX1-NEXT: vshufps {{.*#+}} xmm2 = xmm7[3,1],xmm2[3,3] ; AVX1-NEXT: vshufps {{.*#+}} ymm6 = ymm0[3,1],ymm1[2,1],ymm0[7,5],ymm1[6,5] ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm6 = ymm6[2,3,0,1] -; AVX1-NEXT: vblendps {{.*#+}} ymm5 = ymm5[0,1,2],ymm6[3,4],ymm5[5,6,7] -; AVX1-NEXT: vblendps {{.*#+}} ymm10 = ymm5[0,1,2,3,4],ymm2[5,6,7] +; AVX1-NEXT: vblendps {{.*#+}} ymm2 = ymm2[0,1,2],ymm6[3,4],ymm2[5,6,7] +; AVX1-NEXT: vblendps {{.*#+}} ymm9 = ymm2[0,1,2,3,4],ymm5[5,6,7] ; AVX1-NEXT: vmovdqa 160(%rdi), %xmm5 ; AVX1-NEXT: vpblendw {{.*#+}} xmm5 = xmm5[0,1,2,3],mem[4,5,6,7] ; AVX1-NEXT: vmovdqa 176(%rdi), %xmm6 @@ -539,24 +539,24 @@ define void @load_i32_stride6_vf8(<48 x i32>* %in.vec, <8 x i32>* %out.vec0, <8 ; AVX1-NEXT: vpermilps {{.*#+}} xmm4 = xmm3[2,2,3,3] ; AVX1-NEXT: vmovaps 16(%rdi), %xmm2 ; AVX1-NEXT: vblendps {{.*#+}} xmm4 = xmm2[0],xmm4[1],xmm2[2,3] -; AVX1-NEXT: vmovapd 80(%rdi), %xmm11 -; AVX1-NEXT: vshufpd {{.*#+}} ymm13 = ymm11[1],ymm1[0],ymm11[2],ymm1[2] -; AVX1-NEXT: vshufps {{.*#+}} ymm13 = ymm0[0,1],ymm13[2,0],ymm0[4,5],ymm13[6,4] -; AVX1-NEXT: vblendps {{.*#+}} ymm4 = ymm4[0,1],ymm13[2,3,4,5,6,7] +; AVX1-NEXT: vmovapd 80(%rdi), %xmm10 +; AVX1-NEXT: vshufpd {{.*#+}} ymm11 = ymm10[1],ymm1[0],ymm10[2],ymm1[2] +; AVX1-NEXT: vshufps {{.*#+}} ymm11 = ymm0[0,1],ymm11[2,0],ymm0[4,5],ymm11[6,4] +; AVX1-NEXT: vblendps {{.*#+}} ymm4 = ymm4[0,1],ymm11[2,3,4,5,6,7] ; AVX1-NEXT: vblendps {{.*#+}} ymm4 = ymm4[0,1,2,3,4],ymm7[5,6,7] ; AVX1-NEXT: vpalignr {{.*#+}} xmm6 = xmm6[12,13,14,15],xmm5[0,1,2,3,4,5,6,7,8,9,10,11] ; AVX1-NEXT: vshufps {{.*#+}} xmm5 = xmm5[0,3],xmm6[2,0] ; AVX1-NEXT: vinsertf128 $1, %xmm5, %ymm0, %ymm5 ; AVX1-NEXT: vblendps {{.*#+}} xmm2 = xmm2[0,1],xmm3[2,3] ; AVX1-NEXT: vpermilps {{.*#+}} xmm2 = xmm2[1,3,2,3] -; AVX1-NEXT: vshufps {{.*#+}} ymm1 = ymm11[3,1],ymm1[1,3],ymm11[7,5],ymm1[5,7] +; AVX1-NEXT: vshufps {{.*#+}} ymm1 = ymm10[3,1],ymm1[1,3],ymm10[7,5],ymm1[5,7] ; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[1,1],ymm1[2,0],ymm0[5,5],ymm1[6,4] ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm2[0,1],ymm0[2,3,4,5,6,7] ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3,4],ymm5[5,6,7] -; AVX1-NEXT: vmovaps %ymm8, (%rsi) -; AVX1-NEXT: vmovaps %ymm12, (%rdx) -; AVX1-NEXT: vmovaps %ymm9, (%rcx) -; AVX1-NEXT: vmovaps %ymm10, (%r8) +; AVX1-NEXT: vmovaps %ymm13, (%rsi) +; AVX1-NEXT: vmovaps %ymm14, (%rdx) +; AVX1-NEXT: vmovaps %ymm8, (%rcx) +; AVX1-NEXT: vmovaps %ymm9, (%r8) ; AVX1-NEXT: vmovaps %ymm4, (%r9) ; AVX1-NEXT: vmovaps %ymm0, (%rax) ; AVX1-NEXT: vzeroupper @@ -1181,201 +1181,205 @@ define void @load_i32_stride6_vf16(<96 x i32>* %in.vec, <16 x i32>* %out.vec0, < ; ; AVX1-LABEL: load_i32_stride6_vf16: ; AVX1: # %bb.0: -; AVX1-NEXT: subq $264, %rsp # imm = 0x108 -; AVX1-NEXT: vmovaps 32(%rdi), %ymm7 -; AVX1-NEXT: vmovups %ymm7, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill -; AVX1-NEXT: vmovaps (%rdi), %ymm8 -; AVX1-NEXT: vmovups %ymm8, (%rsp) # 32-byte Spill -; AVX1-NEXT: vmovaps 288(%rdi), %ymm2 -; AVX1-NEXT: vmovaps 256(%rdi), %ymm3 -; AVX1-NEXT: vmovaps 224(%rdi), %ymm1 -; AVX1-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill -; AVX1-NEXT: vmovaps 192(%rdi), %ymm4 -; AVX1-NEXT: vmovups %ymm4, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill -; AVX1-NEXT: vmovaps 320(%rdi), %ymm0 -; AVX1-NEXT: vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill -; AVX1-NEXT: vmovaps 352(%rdi), %ymm5 +; AVX1-NEXT: subq $328, %rsp # imm = 0x148 +; AVX1-NEXT: vmovaps 96(%rdi), %ymm8 +; AVX1-NEXT: vmovaps 64(%rdi), %ymm10 +; AVX1-NEXT: vmovups %ymm10, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill +; AVX1-NEXT: vmovaps 320(%rdi), %ymm5 ; AVX1-NEXT: vmovups %ymm5, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill -; AVX1-NEXT: vblendps {{.*#+}} ymm10 = ymm5[0,1,2,3],ymm0[4,5],ymm5[6,7] -; AVX1-NEXT: vextractf128 $1, %ymm10, %xmm0 -; AVX1-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill -; AVX1-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0,1],xmm10[2,3] -; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,1,0,2] -; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm5 -; AVX1-NEXT: vblendps {{.*#+}} ymm15 = ymm4[0,1,2,3],ymm1[4,5],ymm4[6,7] -; AVX1-NEXT: vextractf128 $1, %ymm15, %xmm4 -; AVX1-NEXT: vblendps {{.*#+}} xmm1 = xmm15[0,1],xmm4[2,3] -; AVX1-NEXT: vshufps {{.*#+}} xmm6 = xmm1[0,2],xmm4[0,3] +; AVX1-NEXT: vmovaps 352(%rdi), %ymm6 +; AVX1-NEXT: vmovups %ymm6, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill +; AVX1-NEXT: vmovaps 224(%rdi), %ymm2 ; AVX1-NEXT: vmovups %ymm2, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill +; AVX1-NEXT: vmovaps 192(%rdi), %ymm3 ; AVX1-NEXT: vmovups %ymm3, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill -; AVX1-NEXT: vperm2f128 {{.*#+}} ymm13 = ymm3[0,1],ymm2[0,1] -; AVX1-NEXT: vshufps {{.*#+}} ymm14 = ymm2[2,0],ymm13[0,0],ymm2[6,4],ymm13[4,4] -; AVX1-NEXT: vshufps {{.*#+}} ymm14 = ymm14[2,0],ymm3[2,2],ymm14[6,4],ymm3[6,6] -; AVX1-NEXT: vblendps {{.*#+}} ymm6 = ymm6[0,1,2],ymm14[3,4,5],ymm6[6,7] -; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm6[0,1,2,3,4,5],ymm5[6,7] +; AVX1-NEXT: vmovaps 288(%rdi), %ymm1 +; AVX1-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill +; AVX1-NEXT: vmovaps 256(%rdi), %ymm0 ; AVX1-NEXT: vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill -; AVX1-NEXT: vblendps {{.*#+}} ymm14 = ymm8[0,1,2,3],ymm7[4,5],ymm8[6,7] -; AVX1-NEXT: vextractf128 $1, %ymm14, %xmm1 -; AVX1-NEXT: vblendps {{.*#+}} xmm5 = xmm14[0,1],xmm1[2,3] -; AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm5[0,2],xmm1[0,3] -; AVX1-NEXT: vmovaps 96(%rdi), %ymm9 -; AVX1-NEXT: vmovaps 64(%rdi), %ymm8 -; AVX1-NEXT: vperm2f128 {{.*#+}} ymm3 = ymm8[0,1],ymm9[0,1] -; AVX1-NEXT: vshufps {{.*#+}} ymm12 = ymm9[2,0],ymm3[0,0],ymm9[6,4],ymm3[4,4] -; AVX1-NEXT: vshufps {{.*#+}} ymm12 = ymm12[2,0],ymm8[2,2],ymm12[6,4],ymm8[6,6] -; AVX1-NEXT: vblendps {{.*#+}} ymm5 = ymm0[0,1,2],ymm12[3,4,5],ymm0[6,7] +; AVX1-NEXT: vinsertf128 $1, 288(%rdi), %ymm0, %ymm7 +; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm1[2,0],ymm7[0,0],ymm1[6,4],ymm7[4,4] +; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[2,0],ymm7[2,2],ymm0[6,4],ymm7[6,6] +; AVX1-NEXT: vblendps {{.*#+}} ymm11 = ymm3[0,1,2,3],ymm2[4,5],ymm3[6,7] +; AVX1-NEXT: vextractf128 $1, %ymm11, %xmm4 +; AVX1-NEXT: vblendps {{.*#+}} xmm1 = xmm11[0,1],xmm4[2,3] +; AVX1-NEXT: vshufps {{.*#+}} xmm1 = xmm1[0,2],xmm4[0,3] +; AVX1-NEXT: vblendps {{.*#+}} ymm1 = ymm1[0,1,2],ymm0[3,4,5],ymm1[6,7] +; AVX1-NEXT: vblendps {{.*#+}} ymm14 = ymm6[0,1,2,3],ymm5[4,5],ymm6[6,7] +; AVX1-NEXT: vextractf128 $1, %ymm14, %xmm15 +; AVX1-NEXT: vblendps {{.*#+}} xmm6 = xmm15[0,1],xmm14[2,3] +; AVX1-NEXT: vpermilps {{.*#+}} xmm6 = xmm6[0,1,0,2] +; AVX1-NEXT: vinsertf128 $1, %xmm6, %ymm0, %ymm6 +; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5],ymm6[6,7] +; AVX1-NEXT: vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill +; AVX1-NEXT: vinsertf128 $1, 96(%rdi), %ymm10, %ymm6 +; AVX1-NEXT: vshufps {{.*#+}} ymm1 = ymm8[2,0],ymm6[0,0],ymm8[6,4],ymm6[4,4] +; AVX1-NEXT: vshufps {{.*#+}} ymm1 = ymm1[2,0],ymm6[2,2],ymm1[6,4],ymm6[6,6] +; AVX1-NEXT: vmovaps 32(%rdi), %ymm0 +; AVX1-NEXT: vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill +; AVX1-NEXT: vmovaps (%rdi), %ymm2 +; AVX1-NEXT: vmovups %ymm2, (%rsp) # 32-byte Spill +; AVX1-NEXT: vblendps {{.*#+}} ymm2 = ymm2[0,1,2,3],ymm0[4,5],ymm2[6,7] +; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm0 +; AVX1-NEXT: vblendps {{.*#+}} xmm12 = xmm2[0,1],xmm0[2,3] +; AVX1-NEXT: vshufps {{.*#+}} xmm12 = xmm12[0,2],xmm0[0,3] +; AVX1-NEXT: vblendps {{.*#+}} ymm9 = ymm12[0,1,2],ymm1[3,4,5],ymm12[6,7] ; AVX1-NEXT: vmovaps 128(%rdi), %ymm12 -; AVX1-NEXT: vmovaps 160(%rdi), %ymm6 -; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm6[0,1,2,3],ymm12[4,5],ymm6[6,7] -; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 -; AVX1-NEXT: vblendps {{.*#+}} xmm11 = xmm2[0,1],xmm0[2,3] -; AVX1-NEXT: vpermilps {{.*#+}} xmm7 = xmm11[0,1,0,2] -; AVX1-NEXT: vinsertf128 $1, %xmm7, %ymm0, %ymm7 -; AVX1-NEXT: vblendps {{.*#+}} ymm5 = ymm5[0,1,2,3,4,5],ymm7[6,7] +; AVX1-NEXT: vmovaps 160(%rdi), %ymm10 +; AVX1-NEXT: vblendps {{.*#+}} ymm1 = ymm10[0,1,2,3],ymm12[4,5],ymm10[6,7] +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 +; AVX1-NEXT: vblendps {{.*#+}} xmm13 = xmm3[0,1],xmm1[2,3] +; AVX1-NEXT: vpermilps {{.*#+}} xmm5 = xmm13[0,1,0,2] +; AVX1-NEXT: vinsertf128 $1, %xmm5, %ymm0, %ymm5 +; AVX1-NEXT: vblendps {{.*#+}} ymm5 = ymm9[0,1,2,3,4,5],ymm5[6,7] ; AVX1-NEXT: vmovups %ymm5, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill -; AVX1-NEXT: vinsertps {{.*#+}} xmm0 = zero,zero,xmm2[1],xmm0[3] -; AVX1-NEXT: vshufps {{.*#+}} xmm2 = xmm14[1,0],xmm1[3,0] -; AVX1-NEXT: vshufps {{.*#+}} xmm1 = xmm2[0,2],xmm1[1,3] -; AVX1-NEXT: vshufps {{.*#+}} ymm2 = ymm9[3,0],ymm3[1,0],ymm9[7,4],ymm3[5,4] -; AVX1-NEXT: vshufps {{.*#+}} ymm2 = ymm2[2,0],ymm8[2,3],ymm2[6,4],ymm8[6,7] -; AVX1-NEXT: vblendps {{.*#+}} ymm1 = ymm1[0,1,2],ymm2[3,4,5],ymm1[6,7] -; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 -; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5],ymm0[6,7] +; AVX1-NEXT: vshufps {{.*#+}} ymm5 = ymm8[3,0],ymm6[1,0],ymm8[7,4],ymm6[5,4] +; AVX1-NEXT: vmovaps %ymm8, %ymm9 +; AVX1-NEXT: vshufps {{.*#+}} ymm5 = ymm5[2,0],ymm6[2,3],ymm5[6,4],ymm6[6,7] +; AVX1-NEXT: vshufps {{.*#+}} xmm2 = xmm2[1,0],xmm0[3,0] +; AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm2[0,2],xmm0[1,3] +; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2],ymm5[3,4,5],ymm0[6,7] +; AVX1-NEXT: vinsertps {{.*#+}} xmm1 = zero,zero,xmm3[1],xmm1[3] +; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1 +; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5],ymm1[6,7] ; AVX1-NEXT: vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill -; AVX1-NEXT: vinsertps $35, {{[-0-9]+}}(%r{{[sb]}}p), %xmm10, %xmm0 # 16-byte Folded Reload -; AVX1-NEXT: # xmm0 = zero,zero,mem[0],xmm10[3] -; AVX1-NEXT: vshufps {{.*#+}} xmm1 = xmm15[1,0],xmm4[3,0] +; AVX1-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm8 # 32-byte Reload +; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm8[3,0],ymm7[1,0],ymm8[7,4],ymm7[5,4] +; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[2,0],ymm7[2,3],ymm0[6,4],ymm7[6,7] +; AVX1-NEXT: vshufps {{.*#+}} xmm1 = xmm11[1,0],xmm4[3,0] ; AVX1-NEXT: vshufps {{.*#+}} xmm1 = xmm1[0,2],xmm4[1,3] -; AVX1-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm4 # 32-byte Reload -; AVX1-NEXT: vshufps {{.*#+}} ymm2 = ymm4[3,0],ymm13[1,0],ymm4[7,4],ymm13[5,4] -; AVX1-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm13 # 32-byte Reload -; AVX1-NEXT: vshufps {{.*#+}} ymm2 = ymm2[2,0],ymm13[2,3],ymm2[6,4],ymm13[6,7] -; AVX1-NEXT: vblendps {{.*#+}} ymm1 = ymm1[0,1,2],ymm2[3,4,5],ymm1[6,7] -; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 -; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5],ymm0[6,7] +; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2],ymm0[3,4,5],ymm1[6,7] +; AVX1-NEXT: vinsertps {{.*#+}} xmm1 = zero,zero,xmm15[1],xmm14[3] +; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1 +; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5],ymm1[6,7] ; AVX1-NEXT: vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill -; AVX1-NEXT: vblendps {{.*#+}} ymm2 = ymm12[0,1,2,3],ymm6[4,5],ymm12[6,7] +; AVX1-NEXT: vblendps {{.*#+}} ymm1 = ymm12[0,1,2,3],ymm10[4,5],ymm12[6,7] ; AVX1-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload -; AVX1-NEXT: vblendps $12, (%rsp), %ymm0, %ymm3 # 32-byte Folded Reload -; AVX1-NEXT: # ymm3 = ymm0[0,1],mem[2,3],ymm0[4,5,6,7] -; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm15 -; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm9[2,1],ymm8[2,0],ymm9[6,5],ymm8[6,4] +; AVX1-NEXT: vblendps $12, (%rsp), %ymm0, %ymm2 # 32-byte Folded Reload +; AVX1-NEXT: # ymm2 = ymm0[0,1],mem[2,3],ymm0[4,5,6,7] +; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm12 +; AVX1-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm13 # 32-byte Reload +; AVX1-NEXT: vmovaps %ymm9, %ymm14 +; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm9[2,1],ymm13[2,0],ymm9[6,5],ymm13[6,4] ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1] -; AVX1-NEXT: vshufps {{.*#+}} xmm1 = xmm3[2,0],xmm15[2,3] -; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2],ymm0[3,4],ymm1[5,6,7] -; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm6 -; AVX1-NEXT: vshufps {{.*#+}} xmm1 = xmm2[0,0],xmm6[2,0] -; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1 -; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3,4],ymm1[5,6,7] +; AVX1-NEXT: vshufps {{.*#+}} xmm4 = xmm2[2,0],xmm12[2,3] +; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm4[0,1,2],ymm0[3,4],ymm4[5,6,7] +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4 +; AVX1-NEXT: vshufps {{.*#+}} xmm5 = xmm1[0,0],xmm4[2,0] +; AVX1-NEXT: vinsertf128 $1, %xmm5, %ymm0, %ymm5 +; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3,4],ymm5[5,6,7] ; AVX1-NEXT: vmovups %ymm0, (%rsp) # 32-byte Spill ; AVX1-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload -; AVX1-NEXT: vblendps $48, {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm7 # 32-byte Folded Reload -; AVX1-NEXT: # ymm7 = ymm0[0,1,2,3],mem[4,5],ymm0[6,7] +; AVX1-NEXT: vblendps $48, {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm5 # 32-byte Folded Reload +; AVX1-NEXT: # ymm5 = ymm0[0,1,2,3],mem[4,5],ymm0[6,7] ; AVX1-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload -; AVX1-NEXT: vblendps $12, {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm10 # 32-byte Folded Reload -; AVX1-NEXT: # ymm10 = ymm0[0,1],mem[2,3],ymm0[4,5,6,7] -; AVX1-NEXT: vshufps {{.*#+}} ymm1 = ymm4[2,1],ymm13[2,0],ymm4[6,5],ymm13[6,4] -; AVX1-NEXT: vmovaps %ymm4, %ymm11 -; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm1[2,3,0,1] -; AVX1-NEXT: vextractf128 $1, %ymm10, %xmm4 -; AVX1-NEXT: vshufps {{.*#+}} xmm12 = xmm10[2,0],xmm4[2,3] -; AVX1-NEXT: vblendps {{.*#+}} ymm1 = ymm12[0,1,2],ymm1[3,4],ymm12[5,6,7] -; AVX1-NEXT: vextractf128 $1, %ymm7, %xmm0 -; AVX1-NEXT: vshufps {{.*#+}} xmm5 = xmm7[0,0],xmm0[2,0] -; AVX1-NEXT: vinsertf128 $1, %xmm5, %ymm0, %ymm5 -; AVX1-NEXT: vblendps {{.*#+}} ymm1 = ymm1[0,1,2,3,4],ymm5[5,6,7] +; AVX1-NEXT: vblendps $12, {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm9 # 32-byte Folded Reload +; AVX1-NEXT: # ymm9 = ymm0[0,1],mem[2,3],ymm0[4,5,6,7] +; AVX1-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm11 # 32-byte Reload +; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm8[2,1],ymm11[2,0],ymm8[6,5],ymm11[6,4] +; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1] +; AVX1-NEXT: vextractf128 $1, %ymm9, %xmm6 +; AVX1-NEXT: vshufps {{.*#+}} xmm10 = xmm9[2,0],xmm6[2,3] +; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm10[0,1,2],ymm0[3,4],ymm10[5,6,7] +; AVX1-NEXT: vextractf128 $1, %ymm5, %xmm7 +; AVX1-NEXT: vshufps {{.*#+}} xmm3 = xmm5[0,0],xmm7[2,0] +; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm3 +; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3,4],ymm3[5,6,7] +; AVX1-NEXT: vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill +; AVX1-NEXT: vshufps {{.*#+}} xmm1 = xmm1[0,1],xmm4[3,1] +; AVX1-NEXT: vshufps {{.*#+}} xmm2 = xmm2[3,1],xmm12[3,3] +; AVX1-NEXT: vmovaps %ymm14, %ymm0 +; AVX1-NEXT: vmovups %ymm14, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill +; AVX1-NEXT: vshufps {{.*#+}} ymm3 = ymm14[3,1],ymm13[2,1],ymm14[7,5],ymm13[6,5] +; AVX1-NEXT: vmovaps %ymm13, %ymm14 +; AVX1-NEXT: vperm2f128 {{.*#+}} ymm3 = ymm3[2,3,0,1] +; AVX1-NEXT: vblendps {{.*#+}} ymm2 = ymm2[0,1,2],ymm3[3,4],ymm2[5,6,7] +; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1 +; AVX1-NEXT: vblendps {{.*#+}} ymm1 = ymm2[0,1,2,3,4],ymm1[5,6,7] ; AVX1-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill -; AVX1-NEXT: vshufps {{.*#+}} xmm2 = xmm2[0,1],xmm6[3,1] -; AVX1-NEXT: vshufps {{.*#+}} xmm3 = xmm3[3,1],xmm15[3,3] -; AVX1-NEXT: vshufps {{.*#+}} ymm5 = ymm9[3,1],ymm8[2,1],ymm9[7,5],ymm8[6,5] -; AVX1-NEXT: vperm2f128 {{.*#+}} ymm5 = ymm5[2,3,0,1] -; AVX1-NEXT: vblendps {{.*#+}} ymm3 = ymm3[0,1,2],ymm5[3,4],ymm3[5,6,7] +; AVX1-NEXT: vshufps {{.*#+}} xmm2 = xmm5[0,1],xmm7[3,1] ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm2 -; AVX1-NEXT: vblendps {{.*#+}} ymm1 = ymm3[0,1,2,3,4],ymm2[5,6,7] -; AVX1-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill -; AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm7[0,1],xmm0[3,1] -; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 -; AVX1-NEXT: vshufps {{.*#+}} xmm3 = xmm10[3,1],xmm4[3,3] -; AVX1-NEXT: vshufps {{.*#+}} ymm4 = ymm11[3,1],ymm13[2,1],ymm11[7,5],ymm13[6,5] -; AVX1-NEXT: vmovaps %ymm11, %ymm5 +; AVX1-NEXT: vshufps {{.*#+}} xmm3 = xmm9[3,1],xmm6[3,3] +; AVX1-NEXT: vshufps {{.*#+}} ymm4 = ymm8[3,1],ymm11[2,1],ymm8[7,5],ymm11[6,5] +; AVX1-NEXT: vmovaps %ymm11, %ymm12 +; AVX1-NEXT: vmovaps %ymm8, %ymm10 ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm4 = ymm4[2,3,0,1] ; AVX1-NEXT: vblendps {{.*#+}} ymm3 = ymm3[0,1,2],ymm4[3,4],ymm3[5,6,7] -; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm3[0,1,2,3,4],ymm0[5,6,7] -; AVX1-NEXT: vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill -; AVX1-NEXT: vmovaps 32(%rdi), %xmm12 -; AVX1-NEXT: vpermilps {{.*#+}} xmm4 = xmm12[2,2,3,3] -; AVX1-NEXT: vmovaps 16(%rdi), %xmm10 -; AVX1-NEXT: vblendps {{.*#+}} xmm4 = xmm10[0],xmm4[1],xmm10[2,3] -; AVX1-NEXT: vmovapd 80(%rdi), %xmm11 -; AVX1-NEXT: vshufpd {{.*#+}} ymm7 = ymm11[1],ymm8[0],ymm11[2],ymm8[2] -; AVX1-NEXT: vshufps {{.*#+}} ymm7 = ymm9[0,1],ymm7[2,0],ymm9[4,5],ymm7[6,4] +; AVX1-NEXT: vblendps {{.*#+}} ymm1 = ymm3[0,1,2,3,4],ymm2[5,6,7] +; AVX1-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill +; AVX1-NEXT: vmovaps 32(%rdi), %xmm9 +; AVX1-NEXT: vpermilps {{.*#+}} xmm4 = xmm9[2,2,3,3] +; AVX1-NEXT: vmovaps 16(%rdi), %xmm8 +; AVX1-NEXT: vblendps {{.*#+}} xmm4 = xmm8[0],xmm4[1],xmm8[2,3] +; AVX1-NEXT: vmovapd 80(%rdi), %xmm6 +; AVX1-NEXT: vshufpd {{.*#+}} ymm7 = ymm6[1],ymm13[0],ymm6[2],ymm13[2] +; AVX1-NEXT: vshufps {{.*#+}} ymm7 = ymm0[0,1],ymm7[2,0],ymm0[4,5],ymm7[6,4] ; AVX1-NEXT: vblendps {{.*#+}} ymm4 = ymm4[0,1],ymm7[2,3,4,5,6,7] ; AVX1-NEXT: vmovdqa 160(%rdi), %xmm7 ; AVX1-NEXT: vpblendw {{.*#+}} xmm7 = xmm7[0,1,2,3],mem[4,5,6,7] -; AVX1-NEXT: vmovdqa 176(%rdi), %xmm2 -; AVX1-NEXT: vpalignr {{.*#+}} xmm1 = xmm2[8,9,10,11,12,13,14,15],xmm7[0,1,2,3,4,5,6,7] +; AVX1-NEXT: vmovdqa 176(%rdi), %xmm0 +; AVX1-NEXT: vpalignr {{.*#+}} xmm1 = xmm0[8,9,10,11,12,13,14,15],xmm7[0,1,2,3,4,5,6,7] ; AVX1-NEXT: vshufps {{.*#+}} xmm1 = xmm7[0,2],xmm1[2,0] ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1 -; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm4[0,1,2,3,4],ymm1[5,6,7] -; AVX1-NEXT: vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill +; AVX1-NEXT: vblendps {{.*#+}} ymm1 = ymm4[0,1,2,3,4],ymm1[5,6,7] +; AVX1-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill ; AVX1-NEXT: vmovaps 224(%rdi), %xmm1 ; AVX1-NEXT: vpermilps {{.*#+}} xmm4 = xmm1[2,2,3,3] -; AVX1-NEXT: vmovaps 208(%rdi), %xmm3 -; AVX1-NEXT: vblendps {{.*#+}} xmm4 = xmm3[0],xmm4[1],xmm3[2,3] -; AVX1-NEXT: vmovapd 272(%rdi), %xmm0 -; AVX1-NEXT: vshufpd {{.*#+}} ymm14 = ymm0[1],ymm13[0],ymm0[2],ymm13[2] -; AVX1-NEXT: vshufps {{.*#+}} ymm14 = ymm5[0,1],ymm14[2,0],ymm5[4,5],ymm14[6,4] -; AVX1-NEXT: vmovaps %ymm5, %ymm13 -; AVX1-NEXT: vblendps {{.*#+}} ymm14 = ymm4[0,1],ymm14[2,3,4,5,6,7] -; AVX1-NEXT: vmovdqa 352(%rdi), %xmm5 -; AVX1-NEXT: vpblendw {{.*#+}} xmm5 = xmm5[0,1,2,3],mem[4,5,6,7] +; AVX1-NEXT: vmovaps 208(%rdi), %xmm2 +; AVX1-NEXT: vblendps {{.*#+}} xmm4 = xmm2[0],xmm4[1],xmm2[2,3] +; AVX1-NEXT: vmovapd 272(%rdi), %xmm15 +; AVX1-NEXT: vshufpd {{.*#+}} ymm13 = ymm15[1],ymm11[0],ymm15[2],ymm11[2] +; AVX1-NEXT: vshufps {{.*#+}} ymm13 = ymm10[0,1],ymm13[2,0],ymm10[4,5],ymm13[6,4] +; AVX1-NEXT: vblendps {{.*#+}} ymm13 = ymm4[0,1],ymm13[2,3,4,5,6,7] +; AVX1-NEXT: vmovdqa 352(%rdi), %xmm3 +; AVX1-NEXT: vpblendw {{.*#+}} xmm3 = xmm3[0,1,2,3],mem[4,5,6,7] ; AVX1-NEXT: vmovdqa 368(%rdi), %xmm4 -; AVX1-NEXT: vpalignr {{.*#+}} xmm15 = xmm4[8,9,10,11,12,13,14,15],xmm5[0,1,2,3,4,5,6,7] -; AVX1-NEXT: vshufps {{.*#+}} xmm6 = xmm5[0,2],xmm15[2,0] -; AVX1-NEXT: vinsertf128 $1, %xmm6, %ymm0, %ymm6 -; AVX1-NEXT: vblendps {{.*#+}} ymm6 = ymm14[0,1,2,3,4],ymm6[5,6,7] -; AVX1-NEXT: vpalignr {{.*#+}} xmm2 = xmm2[12,13,14,15],xmm7[0,1,2,3,4,5,6,7,8,9,10,11] -; AVX1-NEXT: vshufps {{.*#+}} xmm2 = xmm7[0,3],xmm2[2,0] -; AVX1-NEXT: vblendps {{.*#+}} xmm7 = xmm10[0,1],xmm12[2,3] -; AVX1-NEXT: vshufps {{.*#+}} ymm8 = ymm11[3,1],ymm8[1,3],ymm11[7,5],ymm8[5,7] -; AVX1-NEXT: vshufps {{.*#+}} ymm8 = ymm9[1,1],ymm8[2,0],ymm9[5,5],ymm8[6,4] +; AVX1-NEXT: vpalignr {{.*#+}} xmm11 = xmm4[8,9,10,11,12,13,14,15],xmm3[0,1,2,3,4,5,6,7] +; AVX1-NEXT: vshufps {{.*#+}} xmm5 = xmm3[0,2],xmm11[2,0] +; AVX1-NEXT: vinsertf128 $1, %xmm5, %ymm0, %ymm5 +; AVX1-NEXT: vblendps {{.*#+}} ymm5 = ymm13[0,1,2,3,4],ymm5[5,6,7] +; AVX1-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[12,13,14,15],xmm7[0,1,2,3,4,5,6,7,8,9,10,11] +; AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm7[0,3],xmm0[2,0] +; AVX1-NEXT: vblendps {{.*#+}} xmm7 = xmm8[0,1],xmm9[2,3] +; AVX1-NEXT: vshufps {{.*#+}} ymm6 = ymm6[3,1],ymm14[1,3],ymm6[7,5],ymm14[5,7] +; AVX1-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm8 # 32-byte Reload +; AVX1-NEXT: vshufps {{.*#+}} ymm6 = ymm8[1,1],ymm6[2,0],ymm8[5,5],ymm6[6,4] ; AVX1-NEXT: vpermilps {{.*#+}} xmm7 = xmm7[1,3,2,3] -; AVX1-NEXT: vblendps {{.*#+}} ymm7 = ymm7[0,1],ymm8[2,3,4,5,6,7] -; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm2 -; AVX1-NEXT: vblendps {{.*#+}} ymm2 = ymm7[0,1,2,3,4],ymm2[5,6,7] -; AVX1-NEXT: vpalignr {{.*#+}} xmm4 = xmm4[12,13,14,15],xmm5[0,1,2,3,4,5,6,7,8,9,10,11] -; AVX1-NEXT: vshufps {{.*#+}} xmm4 = xmm5[0,3],xmm4[2,0] -; AVX1-NEXT: vblendps {{.*#+}} xmm1 = xmm3[0,1],xmm1[2,3] -; AVX1-NEXT: vshufps $215, {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 32-byte Folded Reload -; AVX1-NEXT: # ymm0 = ymm0[3,1],mem[1,3],ymm0[7,5],mem[5,7] -; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm13[1,1],ymm0[2,0],ymm13[5,5],ymm0[6,4] +; AVX1-NEXT: vblendps {{.*#+}} ymm6 = ymm7[0,1],ymm6[2,3,4,5,6,7] +; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 +; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm6[0,1,2,3,4],ymm0[5,6,7] +; AVX1-NEXT: vpalignr {{.*#+}} xmm4 = xmm4[12,13,14,15],xmm3[0,1,2,3,4,5,6,7,8,9,10,11] +; AVX1-NEXT: vshufps {{.*#+}} xmm3 = xmm3[0,3],xmm4[2,0] +; AVX1-NEXT: vblendps {{.*#+}} xmm1 = xmm2[0,1],xmm1[2,3] +; AVX1-NEXT: vshufps {{.*#+}} ymm2 = ymm15[3,1],ymm12[1,3],ymm15[7,5],ymm12[5,7] +; AVX1-NEXT: vshufps {{.*#+}} ymm2 = ymm10[1,1],ymm2[2,0],ymm10[5,5],ymm2[6,4] ; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[1,3,2,3] -; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3,4,5,6,7] -; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm0, %ymm1 -; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3,4],ymm1[5,6,7] -; AVX1-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm1 # 32-byte Reload -; AVX1-NEXT: vmovaps %ymm1, (%rsi) -; AVX1-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm1 # 32-byte Reload -; AVX1-NEXT: vmovaps %ymm1, 32(%rsi) -; AVX1-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm1 # 32-byte Reload -; AVX1-NEXT: vmovaps %ymm1, 32(%rdx) -; AVX1-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm1 # 32-byte Reload -; AVX1-NEXT: vmovaps %ymm1, (%rdx) -; AVX1-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm1 # 32-byte Reload -; AVX1-NEXT: vmovaps %ymm1, 32(%rcx) -; AVX1-NEXT: vmovups (%rsp), %ymm1 # 32-byte Reload -; AVX1-NEXT: vmovaps %ymm1, (%rcx) -; AVX1-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm1 # 32-byte Reload -; AVX1-NEXT: vmovaps %ymm1, 32(%r8) -; AVX1-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm1 # 32-byte Reload -; AVX1-NEXT: vmovaps %ymm1, (%r8) -; AVX1-NEXT: vmovaps %ymm6, 32(%r9) -; AVX1-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm1 # 32-byte Reload -; AVX1-NEXT: vmovaps %ymm1, (%r9) +; AVX1-NEXT: vblendps {{.*#+}} ymm1 = ymm1[0,1],ymm2[2,3,4,5,6,7] +; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm2 +; AVX1-NEXT: vblendps {{.*#+}} ymm1 = ymm1[0,1,2,3,4],ymm2[5,6,7] +; AVX1-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm2 # 32-byte Reload +; AVX1-NEXT: vmovaps %ymm2, (%rsi) +; AVX1-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm2 # 32-byte Reload +; AVX1-NEXT: vmovaps %ymm2, 32(%rsi) +; AVX1-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm2 # 32-byte Reload +; AVX1-NEXT: vmovaps %ymm2, 32(%rdx) +; AVX1-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm2 # 32-byte Reload +; AVX1-NEXT: vmovaps %ymm2, (%rdx) +; AVX1-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm2 # 32-byte Reload +; AVX1-NEXT: vmovaps %ymm2, 32(%rcx) +; AVX1-NEXT: vmovups (%rsp), %ymm2 # 32-byte Reload +; AVX1-NEXT: vmovaps %ymm2, (%rcx) +; AVX1-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm2 # 32-byte Reload +; AVX1-NEXT: vmovaps %ymm2, 32(%r8) +; AVX1-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm2 # 32-byte Reload +; AVX1-NEXT: vmovaps %ymm2, (%r8) +; AVX1-NEXT: vmovaps %ymm5, 32(%r9) +; AVX1-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm2 # 32-byte Reload +; AVX1-NEXT: vmovaps %ymm2, (%r9) ; AVX1-NEXT: movq {{[0-9]+}}(%rsp), %rax -; AVX1-NEXT: vmovaps %ymm0, 32(%rax) -; AVX1-NEXT: vmovaps %ymm2, (%rax) -; AVX1-NEXT: addq $264, %rsp # imm = 0x108 +; AVX1-NEXT: vmovaps %ymm1, 32(%rax) +; AVX1-NEXT: vmovaps %ymm0, (%rax) +; AVX1-NEXT: addq $328, %rsp # imm = 0x148 ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq ; diff --git a/llvm/test/CodeGen/X86/vector-interleaved-load-i64-stride-2.ll b/llvm/test/CodeGen/X86/vector-interleaved-load-i64-stride-2.ll index e6fafb606c521..352d447876805 100644 --- a/llvm/test/CodeGen/X86/vector-interleaved-load-i64-stride-2.ll +++ b/llvm/test/CodeGen/X86/vector-interleaved-load-i64-stride-2.ll @@ -72,12 +72,11 @@ define void @load_i64_stride2_vf4(<8 x i64>* %in.vec, <4 x i64>* %out.vec0, <4 x ; AVX1-LABEL: load_i64_stride2_vf4: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovaps (%rdi), %ymm0 -; AVX1-NEXT: vmovaps 32(%rdi), %ymm1 -; AVX1-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm0[2,3],ymm1[2,3] -; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[0,1],ymm1[0,1] -; AVX1-NEXT: vunpcklpd {{.*#+}} ymm1 = ymm0[0],ymm2[0],ymm0[2],ymm2[2] -; AVX1-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm2[1],ymm0[3],ymm2[3] -; AVX1-NEXT: vmovaps %ymm1, (%rsi) +; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3],mem[2,3] +; AVX1-NEXT: vinsertf128 $1, 32(%rdi), %ymm0, %ymm0 +; AVX1-NEXT: vunpcklpd {{.*#+}} ymm2 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] +; AVX1-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] +; AVX1-NEXT: vmovaps %ymm2, (%rsi) ; AVX1-NEXT: vmovaps %ymm0, (%rdx) ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq @@ -154,20 +153,18 @@ define void @load_i64_stride2_vf8(<16 x i64>* %in.vec, <8 x i64>* %out.vec0, <8 ; AVX1-LABEL: load_i64_stride2_vf8: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovaps (%rdi), %ymm0 -; AVX1-NEXT: vmovaps 32(%rdi), %ymm1 -; AVX1-NEXT: vmovaps 64(%rdi), %ymm2 -; AVX1-NEXT: vmovaps 96(%rdi), %ymm3 -; AVX1-NEXT: vperm2f128 {{.*#+}} ymm4 = ymm0[2,3],ymm1[2,3] -; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[0,1],ymm1[0,1] -; AVX1-NEXT: vunpcklpd {{.*#+}} ymm1 = ymm0[0],ymm4[0],ymm0[2],ymm4[2] -; AVX1-NEXT: vperm2f128 {{.*#+}} ymm5 = ymm2[2,3],ymm3[2,3] -; AVX1-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm2[0,1],ymm3[0,1] -; AVX1-NEXT: vunpcklpd {{.*#+}} ymm3 = ymm2[0],ymm5[0],ymm2[2],ymm5[2] -; AVX1-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm4[1],ymm0[3],ymm4[3] -; AVX1-NEXT: vunpckhpd {{.*#+}} ymm2 = ymm2[1],ymm5[1],ymm2[3],ymm5[3] -; AVX1-NEXT: vmovaps %ymm3, 32(%rsi) -; AVX1-NEXT: vmovaps %ymm1, (%rsi) -; AVX1-NEXT: vmovaps %ymm2, 32(%rdx) +; AVX1-NEXT: vmovaps 64(%rdi), %ymm1 +; AVX1-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm0[2,3],mem[2,3] +; AVX1-NEXT: vinsertf128 $1, 32(%rdi), %ymm0, %ymm0 +; AVX1-NEXT: vunpcklpd {{.*#+}} ymm3 = ymm0[0],ymm2[0],ymm0[2],ymm2[2] +; AVX1-NEXT: vperm2f128 {{.*#+}} ymm4 = ymm1[2,3],mem[2,3] +; AVX1-NEXT: vinsertf128 $1, 96(%rdi), %ymm1, %ymm1 +; AVX1-NEXT: vunpcklpd {{.*#+}} ymm5 = ymm1[0],ymm4[0],ymm1[2],ymm4[2] +; AVX1-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm2[1],ymm0[3],ymm2[3] +; AVX1-NEXT: vunpckhpd {{.*#+}} ymm1 = ymm1[1],ymm4[1],ymm1[3],ymm4[3] +; AVX1-NEXT: vmovaps %ymm5, 32(%rsi) +; AVX1-NEXT: vmovaps %ymm3, (%rsi) +; AVX1-NEXT: vmovaps %ymm1, 32(%rdx) ; AVX1-NEXT: vmovaps %ymm0, (%rdx) ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq @@ -282,37 +279,33 @@ define void @load_i64_stride2_vf16(<32 x i64>* %in.vec, <16 x i64>* %out.vec0, < ; AVX1-LABEL: load_i64_stride2_vf16: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovaps (%rdi), %ymm0 -; AVX1-NEXT: vmovaps 32(%rdi), %ymm1 -; AVX1-NEXT: vmovaps 64(%rdi), %ymm2 -; AVX1-NEXT: vmovaps 96(%rdi), %ymm3 -; AVX1-NEXT: vmovaps 224(%rdi), %ymm4 -; AVX1-NEXT: vmovaps 192(%rdi), %ymm5 -; AVX1-NEXT: vmovaps 160(%rdi), %ymm6 -; AVX1-NEXT: vmovaps 128(%rdi), %ymm7 -; AVX1-NEXT: vperm2f128 {{.*#+}} ymm8 = ymm7[2,3],ymm6[2,3] -; AVX1-NEXT: vperm2f128 {{.*#+}} ymm6 = ymm7[0,1],ymm6[0,1] -; AVX1-NEXT: vunpcklpd {{.*#+}} ymm7 = ymm6[0],ymm8[0],ymm6[2],ymm8[2] -; AVX1-NEXT: vperm2f128 {{.*#+}} ymm9 = ymm5[2,3],ymm4[2,3] -; AVX1-NEXT: vperm2f128 {{.*#+}} ymm4 = ymm5[0,1],ymm4[0,1] -; AVX1-NEXT: vunpcklpd {{.*#+}} ymm5 = ymm4[0],ymm9[0],ymm4[2],ymm9[2] -; AVX1-NEXT: vperm2f128 {{.*#+}} ymm10 = ymm2[2,3],ymm3[2,3] -; AVX1-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm2[0,1],ymm3[0,1] -; AVX1-NEXT: vunpcklpd {{.*#+}} ymm3 = ymm2[0],ymm10[0],ymm2[2],ymm10[2] -; AVX1-NEXT: vperm2f128 {{.*#+}} ymm11 = ymm0[2,3],ymm1[2,3] -; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[0,1],ymm1[0,1] -; AVX1-NEXT: vunpcklpd {{.*#+}} ymm1 = ymm0[0],ymm11[0],ymm0[2],ymm11[2] -; AVX1-NEXT: vunpckhpd {{.*#+}} ymm4 = ymm4[1],ymm9[1],ymm4[3],ymm9[3] -; AVX1-NEXT: vunpckhpd {{.*#+}} ymm6 = ymm6[1],ymm8[1],ymm6[3],ymm8[3] -; AVX1-NEXT: vunpckhpd {{.*#+}} ymm2 = ymm2[1],ymm10[1],ymm2[3],ymm10[3] -; AVX1-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm11[1],ymm0[3],ymm11[3] -; AVX1-NEXT: vmovaps %ymm5, 96(%rsi) -; AVX1-NEXT: vmovaps %ymm1, (%rsi) -; AVX1-NEXT: vmovaps %ymm3, 32(%rsi) -; AVX1-NEXT: vmovaps %ymm7, 64(%rsi) -; AVX1-NEXT: vmovaps %ymm6, 64(%rdx) -; AVX1-NEXT: vmovaps %ymm4, 96(%rdx) +; AVX1-NEXT: vmovaps 64(%rdi), %ymm1 +; AVX1-NEXT: vmovaps 128(%rdi), %ymm2 +; AVX1-NEXT: vmovaps 192(%rdi), %ymm3 +; AVX1-NEXT: vperm2f128 {{.*#+}} ymm4 = ymm2[2,3],mem[2,3] +; AVX1-NEXT: vinsertf128 $1, 160(%rdi), %ymm2, %ymm2 +; AVX1-NEXT: vunpcklpd {{.*#+}} ymm5 = ymm2[0],ymm4[0],ymm2[2],ymm4[2] +; AVX1-NEXT: vperm2f128 {{.*#+}} ymm6 = ymm1[2,3],mem[2,3] +; AVX1-NEXT: vinsertf128 $1, 96(%rdi), %ymm1, %ymm1 +; AVX1-NEXT: vunpcklpd {{.*#+}} ymm7 = ymm1[0],ymm6[0],ymm1[2],ymm6[2] +; AVX1-NEXT: vperm2f128 {{.*#+}} ymm8 = ymm0[2,3],mem[2,3] +; AVX1-NEXT: vinsertf128 $1, 32(%rdi), %ymm0, %ymm0 +; AVX1-NEXT: vunpcklpd {{.*#+}} ymm9 = ymm0[0],ymm8[0],ymm0[2],ymm8[2] +; AVX1-NEXT: vperm2f128 {{.*#+}} ymm10 = ymm3[2,3],mem[2,3] +; AVX1-NEXT: vinsertf128 $1, 224(%rdi), %ymm3, %ymm3 +; AVX1-NEXT: vunpcklpd {{.*#+}} ymm11 = ymm3[0],ymm10[0],ymm3[2],ymm10[2] +; AVX1-NEXT: vunpckhpd {{.*#+}} ymm1 = ymm1[1],ymm6[1],ymm1[3],ymm6[3] +; AVX1-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm8[1],ymm0[3],ymm8[3] +; AVX1-NEXT: vunpckhpd {{.*#+}} ymm3 = ymm3[1],ymm10[1],ymm3[3],ymm10[3] +; AVX1-NEXT: vunpckhpd {{.*#+}} ymm2 = ymm2[1],ymm4[1],ymm2[3],ymm4[3] +; AVX1-NEXT: vmovaps %ymm11, 96(%rsi) +; AVX1-NEXT: vmovaps %ymm9, (%rsi) +; AVX1-NEXT: vmovaps %ymm7, 32(%rsi) +; AVX1-NEXT: vmovaps %ymm5, 64(%rsi) +; AVX1-NEXT: vmovaps %ymm2, 64(%rdx) +; AVX1-NEXT: vmovaps %ymm3, 96(%rdx) ; AVX1-NEXT: vmovaps %ymm0, (%rdx) -; AVX1-NEXT: vmovaps %ymm2, 32(%rdx) +; AVX1-NEXT: vmovaps %ymm1, 32(%rdx) ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq ; @@ -538,76 +531,64 @@ define void @load_i64_stride2_vf32(<64 x i64>* %in.vec, <32 x i64>* %out.vec0, < ; ; AVX1-LABEL: load_i64_stride2_vf32: ; AVX1: # %bb.0: -; AVX1-NEXT: vmovaps (%rdi), %ymm0 -; AVX1-NEXT: vmovaps 64(%rdi), %ymm1 -; AVX1-NEXT: vmovaps 96(%rdi), %ymm2 -; AVX1-NEXT: vmovaps 480(%rdi), %ymm3 -; AVX1-NEXT: vmovaps 448(%rdi), %ymm4 -; AVX1-NEXT: vmovaps 288(%rdi), %ymm5 -; AVX1-NEXT: vmovaps 256(%rdi), %ymm6 -; AVX1-NEXT: vmovaps 352(%rdi), %ymm7 -; AVX1-NEXT: vmovaps 320(%rdi), %ymm8 -; AVX1-NEXT: vmovaps 160(%rdi), %ymm9 -; AVX1-NEXT: vmovaps 128(%rdi), %ymm10 -; AVX1-NEXT: vmovaps 224(%rdi), %ymm11 -; AVX1-NEXT: vmovaps 192(%rdi), %ymm12 -; AVX1-NEXT: vmovaps 416(%rdi), %ymm13 -; AVX1-NEXT: vmovaps 384(%rdi), %ymm14 -; AVX1-NEXT: vperm2f128 {{.*#+}} ymm15 = ymm14[2,3],ymm13[2,3] -; AVX1-NEXT: vmovups %ymm15, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill -; AVX1-NEXT: vperm2f128 {{.*#+}} ymm15 = ymm14[0,1],ymm13[0,1] -; AVX1-NEXT: vperm2f128 {{.*#+}} ymm14 = ymm12[2,3],ymm11[2,3] -; AVX1-NEXT: vperm2f128 {{.*#+}} ymm13 = ymm12[0,1],ymm11[0,1] -; AVX1-NEXT: vperm2f128 {{.*#+}} ymm12 = ymm10[2,3],ymm9[2,3] -; AVX1-NEXT: vperm2f128 {{.*#+}} ymm11 = ymm10[0,1],ymm9[0,1] -; AVX1-NEXT: vperm2f128 {{.*#+}} ymm10 = ymm8[2,3],ymm7[2,3] -; AVX1-NEXT: vperm2f128 {{.*#+}} ymm9 = ymm8[0,1],ymm7[0,1] -; AVX1-NEXT: vperm2f128 {{.*#+}} ymm8 = ymm6[2,3],ymm5[2,3] -; AVX1-NEXT: vperm2f128 {{.*#+}} ymm7 = ymm6[0,1],ymm5[0,1] -; AVX1-NEXT: vperm2f128 {{.*#+}} ymm6 = ymm4[2,3],ymm3[2,3] -; AVX1-NEXT: vperm2f128 {{.*#+}} ymm5 = ymm4[0,1],ymm3[0,1] -; AVX1-NEXT: vperm2f128 {{.*#+}} ymm4 = ymm1[2,3],ymm2[2,3] -; AVX1-NEXT: vperm2f128 {{.*#+}} ymm3 = ymm1[0,1],ymm2[0,1] -; AVX1-NEXT: vmovaps 32(%rdi), %ymm2 -; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3],ymm2[2,3] -; AVX1-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm0[0,1],ymm2[0,1] -; AVX1-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm13[0],ymm14[0],ymm13[2],ymm14[2] -; AVX1-NEXT: vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill -; AVX1-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm13[1],ymm14[1],ymm13[3],ymm14[3] -; AVX1-NEXT: vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill -; AVX1-NEXT: vunpcklpd {{.*#+}} ymm14 = ymm11[0],ymm12[0],ymm11[2],ymm12[2] -; AVX1-NEXT: vunpckhpd {{.*#+}} ymm11 = ymm11[1],ymm12[1],ymm11[3],ymm12[3] -; AVX1-NEXT: vunpcklpd {{.*#+}} ymm12 = ymm9[0],ymm10[0],ymm9[2],ymm10[2] -; AVX1-NEXT: vunpckhpd {{.*#+}} ymm9 = ymm9[1],ymm10[1],ymm9[3],ymm10[3] -; AVX1-NEXT: vunpcklpd {{.*#+}} ymm10 = ymm7[0],ymm8[0],ymm7[2],ymm8[2] -; AVX1-NEXT: vunpckhpd {{.*#+}} ymm7 = ymm7[1],ymm8[1],ymm7[3],ymm8[3] -; AVX1-NEXT: vunpcklpd {{.*#+}} ymm8 = ymm5[0],ymm6[0],ymm5[2],ymm6[2] -; AVX1-NEXT: vunpckhpd {{.*#+}} ymm5 = ymm5[1],ymm6[1],ymm5[3],ymm6[3] -; AVX1-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload -; AVX1-NEXT: vunpcklpd {{.*#+}} ymm6 = ymm15[0],ymm0[0],ymm15[2],ymm0[2] -; AVX1-NEXT: vunpckhpd {{.*#+}} ymm13 = ymm15[1],ymm0[1],ymm15[3],ymm0[3] -; AVX1-NEXT: vunpcklpd {{.*#+}} ymm15 = ymm3[0],ymm4[0],ymm3[2],ymm4[2] -; AVX1-NEXT: vunpckhpd {{.*#+}} ymm3 = ymm3[1],ymm4[1],ymm3[3],ymm4[3] -; AVX1-NEXT: vunpcklpd {{.*#+}} ymm4 = ymm2[0],ymm1[0],ymm2[2],ymm1[2] -; AVX1-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm2[1],ymm1[1],ymm2[3],ymm1[3] -; AVX1-NEXT: vmovaps %ymm8, 224(%rsi) -; AVX1-NEXT: vmovaps %ymm10, 128(%rsi) -; AVX1-NEXT: vmovaps %ymm12, 160(%rsi) -; AVX1-NEXT: vmovaps %ymm14, 64(%rsi) -; AVX1-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm1 # 32-byte Reload -; AVX1-NEXT: vmovaps %ymm1, 96(%rsi) -; AVX1-NEXT: vmovaps %ymm4, (%rsi) -; AVX1-NEXT: vmovaps %ymm15, 32(%rsi) -; AVX1-NEXT: vmovaps %ymm6, 192(%rsi) -; AVX1-NEXT: vmovaps %ymm13, 192(%rdx) -; AVX1-NEXT: vmovaps %ymm5, 224(%rdx) -; AVX1-NEXT: vmovaps %ymm7, 128(%rdx) -; AVX1-NEXT: vmovaps %ymm9, 160(%rdx) -; AVX1-NEXT: vmovaps %ymm11, 64(%rdx) -; AVX1-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm1 # 32-byte Reload -; AVX1-NEXT: vmovaps %ymm1, 96(%rdx) -; AVX1-NEXT: vmovaps %ymm0, (%rdx) -; AVX1-NEXT: vmovaps %ymm3, 32(%rdx) +; AVX1-NEXT: vmovaps 448(%rdi), %ymm0 +; AVX1-NEXT: vmovaps 256(%rdi), %ymm1 +; AVX1-NEXT: vmovaps 320(%rdi), %ymm3 +; AVX1-NEXT: vmovaps (%rdi), %ymm4 +; AVX1-NEXT: vmovaps 64(%rdi), %ymm2 +; AVX1-NEXT: vmovaps 128(%rdi), %ymm5 +; AVX1-NEXT: vmovaps 192(%rdi), %ymm6 +; AVX1-NEXT: vmovaps 384(%rdi), %ymm7 +; AVX1-NEXT: vperm2f128 {{.*#+}} ymm8 = ymm2[2,3],mem[2,3] +; AVX1-NEXT: vinsertf128 $1, 96(%rdi), %ymm2, %ymm9 +; AVX1-NEXT: vunpcklpd {{.*#+}} ymm2 = ymm9[0],ymm8[0],ymm9[2],ymm8[2] +; AVX1-NEXT: vmovups %ymm2, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill +; AVX1-NEXT: vperm2f128 {{.*#+}} ymm10 = ymm4[2,3],mem[2,3] +; AVX1-NEXT: vinsertf128 $1, 32(%rdi), %ymm4, %ymm4 +; AVX1-NEXT: vunpcklpd {{.*#+}} ymm11 = ymm4[0],ymm10[0],ymm4[2],ymm10[2] +; AVX1-NEXT: vperm2f128 {{.*#+}} ymm12 = ymm6[2,3],mem[2,3] +; AVX1-NEXT: vinsertf128 $1, 224(%rdi), %ymm6, %ymm6 +; AVX1-NEXT: vunpcklpd {{.*#+}} ymm13 = ymm6[0],ymm12[0],ymm6[2],ymm12[2] +; AVX1-NEXT: vperm2f128 {{.*#+}} ymm14 = ymm5[2,3],mem[2,3] +; AVX1-NEXT: vinsertf128 $1, 160(%rdi), %ymm5, %ymm5 +; AVX1-NEXT: vunpcklpd {{.*#+}} ymm15 = ymm5[0],ymm14[0],ymm5[2],ymm14[2] +; AVX1-NEXT: vunpckhpd {{.*#+}} ymm8 = ymm9[1],ymm8[1],ymm9[3],ymm8[3] +; AVX1-NEXT: vperm2f128 {{.*#+}} ymm9 = ymm3[2,3],mem[2,3] +; AVX1-NEXT: vinsertf128 $1, 352(%rdi), %ymm3, %ymm3 +; AVX1-NEXT: vunpckhpd {{.*#+}} ymm4 = ymm4[1],ymm10[1],ymm4[3],ymm10[3] +; AVX1-NEXT: vunpcklpd {{.*#+}} ymm10 = ymm3[0],ymm9[0],ymm3[2],ymm9[2] +; AVX1-NEXT: vunpckhpd {{.*#+}} ymm6 = ymm6[1],ymm12[1],ymm6[3],ymm12[3] +; AVX1-NEXT: vperm2f128 {{.*#+}} ymm12 = ymm1[2,3],mem[2,3] +; AVX1-NEXT: vinsertf128 $1, 288(%rdi), %ymm1, %ymm1 +; AVX1-NEXT: vunpckhpd {{.*#+}} ymm5 = ymm5[1],ymm14[1],ymm5[3],ymm14[3] +; AVX1-NEXT: vunpcklpd {{.*#+}} ymm14 = ymm1[0],ymm12[0],ymm1[2],ymm12[2] +; AVX1-NEXT: vunpckhpd {{.*#+}} ymm3 = ymm3[1],ymm9[1],ymm3[3],ymm9[3] +; AVX1-NEXT: vperm2f128 {{.*#+}} ymm9 = ymm0[2,3],mem[2,3] +; AVX1-NEXT: vinsertf128 $1, 480(%rdi), %ymm0, %ymm0 +; AVX1-NEXT: vunpckhpd {{.*#+}} ymm2 = ymm1[1],ymm12[1],ymm1[3],ymm12[3] +; AVX1-NEXT: vunpcklpd {{.*#+}} ymm12 = ymm0[0],ymm9[0],ymm0[2],ymm9[2] +; AVX1-NEXT: vunpckhpd {{.*#+}} ymm1 = ymm0[1],ymm9[1],ymm0[3],ymm9[3] +; AVX1-NEXT: vperm2f128 {{.*#+}} ymm9 = ymm7[2,3],mem[2,3] +; AVX1-NEXT: vinsertf128 $1, 416(%rdi), %ymm7, %ymm7 +; AVX1-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm7[0],ymm9[0],ymm7[2],ymm9[2] +; AVX1-NEXT: vunpckhpd {{.*#+}} ymm7 = ymm7[1],ymm9[1],ymm7[3],ymm9[3] +; AVX1-NEXT: vmovaps %ymm12, 224(%rsi) +; AVX1-NEXT: vmovaps %ymm14, 128(%rsi) +; AVX1-NEXT: vmovaps %ymm10, 160(%rsi) +; AVX1-NEXT: vmovaps %ymm15, 64(%rsi) +; AVX1-NEXT: vmovaps %ymm13, 96(%rsi) +; AVX1-NEXT: vmovaps %ymm11, (%rsi) +; AVX1-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm9 # 32-byte Reload +; AVX1-NEXT: vmovaps %ymm9, 32(%rsi) +; AVX1-NEXT: vmovaps %ymm0, 192(%rsi) +; AVX1-NEXT: vmovaps %ymm7, 192(%rdx) +; AVX1-NEXT: vmovaps %ymm1, 224(%rdx) +; AVX1-NEXT: vmovaps %ymm2, 128(%rdx) +; AVX1-NEXT: vmovaps %ymm3, 160(%rdx) +; AVX1-NEXT: vmovaps %ymm5, 64(%rdx) +; AVX1-NEXT: vmovaps %ymm6, 96(%rdx) +; AVX1-NEXT: vmovaps %ymm4, (%rdx) +; AVX1-NEXT: vmovaps %ymm8, 32(%rdx) ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq ; diff --git a/llvm/test/CodeGen/X86/vector-interleaved-load-i64-stride-4.ll b/llvm/test/CodeGen/X86/vector-interleaved-load-i64-stride-4.ll index 238f602986e87..02db24660c45b 100644 --- a/llvm/test/CodeGen/X86/vector-interleaved-load-i64-stride-4.ll +++ b/llvm/test/CodeGen/X86/vector-interleaved-load-i64-stride-4.ll @@ -1,9 +1,9 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=x86_64-- -mattr=+sse2 | FileCheck %s --check-prefixes=SSE -; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1 -; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX2 -; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx2,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX,AVX2 -; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx2,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX,AVX2 +; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx | FileCheck %s --check-prefixes=AVX1 +; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx2 | FileCheck %s --check-prefixes=AVX2 +; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx2,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX2 +; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx2,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX2 ; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512bw,+avx512vl | FileCheck %s --check-prefixes=AVX512 ; These patterns are produced by LoopVectorizer for interleaved loads. @@ -124,26 +124,45 @@ define void @load_i64_stride4_vf4(<16 x i64>* %in.vec, <4 x i64>* %out.vec0, <4 ; SSE-NEXT: movaps %xmm3, (%r8) ; SSE-NEXT: retq ; -; AVX-LABEL: load_i64_stride4_vf4: -; AVX: # %bb.0: -; AVX-NEXT: vmovaps (%rdi), %ymm0 -; AVX-NEXT: vmovaps 32(%rdi), %ymm1 -; AVX-NEXT: vmovaps 64(%rdi), %ymm2 -; AVX-NEXT: vmovaps 96(%rdi), %ymm3 -; AVX-NEXT: vperm2f128 {{.*#+}} ymm4 = ymm0[0,1],ymm2[0,1] -; AVX-NEXT: vperm2f128 {{.*#+}} ymm5 = ymm1[0,1],ymm3[0,1] -; AVX-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm2[2,3] -; AVX-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm1[2,3],ymm3[2,3] -; AVX-NEXT: vunpcklpd {{.*#+}} ymm2 = ymm4[0],ymm5[0],ymm4[2],ymm5[2] -; AVX-NEXT: vunpcklpd {{.*#+}} ymm3 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] -; AVX-NEXT: vunpckhpd {{.*#+}} ymm4 = ymm4[1],ymm5[1],ymm4[3],ymm5[3] -; AVX-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] -; AVX-NEXT: vmovaps %ymm2, (%rsi) -; AVX-NEXT: vmovaps %ymm4, (%rdx) -; AVX-NEXT: vmovaps %ymm3, (%rcx) -; AVX-NEXT: vmovaps %ymm0, (%r8) -; AVX-NEXT: vzeroupper -; AVX-NEXT: retq +; AVX1-LABEL: load_i64_stride4_vf4: +; AVX1: # %bb.0: +; AVX1-NEXT: vmovaps (%rdi), %ymm0 +; AVX1-NEXT: vmovaps 32(%rdi), %ymm1 +; AVX1-NEXT: vinsertf128 $1, 64(%rdi), %ymm0, %ymm2 +; AVX1-NEXT: vinsertf128 $1, 96(%rdi), %ymm1, %ymm3 +; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],mem[2,3] +; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm1[2,3],mem[2,3] +; AVX1-NEXT: vunpcklpd {{.*#+}} ymm4 = ymm2[0],ymm3[0],ymm2[2],ymm3[2] +; AVX1-NEXT: vunpcklpd {{.*#+}} ymm5 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] +; AVX1-NEXT: vunpckhpd {{.*#+}} ymm2 = ymm2[1],ymm3[1],ymm2[3],ymm3[3] +; AVX1-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] +; AVX1-NEXT: vmovaps %ymm4, (%rsi) +; AVX1-NEXT: vmovaps %ymm2, (%rdx) +; AVX1-NEXT: vmovaps %ymm5, (%rcx) +; AVX1-NEXT: vmovaps %ymm0, (%r8) +; AVX1-NEXT: vzeroupper +; AVX1-NEXT: retq +; +; AVX2-LABEL: load_i64_stride4_vf4: +; AVX2: # %bb.0: +; AVX2-NEXT: vmovaps (%rdi), %ymm0 +; AVX2-NEXT: vmovaps 32(%rdi), %ymm1 +; AVX2-NEXT: vmovaps 64(%rdi), %ymm2 +; AVX2-NEXT: vmovaps 96(%rdi), %ymm3 +; AVX2-NEXT: vperm2f128 {{.*#+}} ymm4 = ymm0[0,1],ymm2[0,1] +; AVX2-NEXT: vperm2f128 {{.*#+}} ymm5 = ymm1[0,1],ymm3[0,1] +; AVX2-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm2[2,3] +; AVX2-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm1[2,3],ymm3[2,3] +; AVX2-NEXT: vunpcklpd {{.*#+}} ymm2 = ymm4[0],ymm5[0],ymm4[2],ymm5[2] +; AVX2-NEXT: vunpcklpd {{.*#+}} ymm3 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] +; AVX2-NEXT: vunpckhpd {{.*#+}} ymm4 = ymm4[1],ymm5[1],ymm4[3],ymm5[3] +; AVX2-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] +; AVX2-NEXT: vmovaps %ymm2, (%rsi) +; AVX2-NEXT: vmovaps %ymm4, (%rdx) +; AVX2-NEXT: vmovaps %ymm3, (%rcx) +; AVX2-NEXT: vmovaps %ymm0, (%r8) +; AVX2-NEXT: vzeroupper +; AVX2-NEXT: retq ; ; AVX512-LABEL: load_i64_stride4_vf4: ; AVX512: # %bb.0: diff --git a/llvm/test/CodeGen/X86/vector-interleaved-store-i64-stride-4.ll b/llvm/test/CodeGen/X86/vector-interleaved-store-i64-stride-4.ll index 15eaad9b0527f..77c17fb20aa7e 100644 --- a/llvm/test/CodeGen/X86/vector-interleaved-store-i64-stride-4.ll +++ b/llvm/test/CodeGen/X86/vector-interleaved-store-i64-stride-4.ll @@ -1,9 +1,9 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=x86_64-- -mattr=+sse2 | FileCheck %s --check-prefixes=SSE -; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1 -; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX2 -; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx2,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX,AVX2 -; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx2,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX,AVX2 +; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx | FileCheck %s --check-prefixes=AVX1 +; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx2 | FileCheck %s --check-prefixes=AVX2 +; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx2,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX2 +; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx2,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX2 ; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512bw,+avx512vl | FileCheck %s --check-prefixes=AVX512 ; These patterns are produced by LoopVectorizer for interleaved stores. @@ -115,26 +115,45 @@ define void @store_i64_stride4_vf4(<4 x i64>* %in.vecptr0, <4 x i64>* %in.vecptr ; SSE-NEXT: movaps %xmm3, 16(%r8) ; SSE-NEXT: retq ; -; AVX-LABEL: store_i64_stride4_vf4: -; AVX: # %bb.0: -; AVX-NEXT: vmovaps (%rdi), %ymm0 -; AVX-NEXT: vmovaps (%rsi), %ymm1 -; AVX-NEXT: vmovaps (%rdx), %ymm2 -; AVX-NEXT: vmovaps (%rcx), %ymm3 -; AVX-NEXT: vperm2f128 {{.*#+}} ymm4 = ymm0[0,1],ymm2[0,1] -; AVX-NEXT: vperm2f128 {{.*#+}} ymm5 = ymm1[0,1],ymm3[0,1] -; AVX-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm2[2,3] -; AVX-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm1[2,3],ymm3[2,3] -; AVX-NEXT: vunpcklpd {{.*#+}} ymm2 = ymm4[0],ymm5[0],ymm4[2],ymm5[2] -; AVX-NEXT: vunpcklpd {{.*#+}} ymm3 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] -; AVX-NEXT: vunpckhpd {{.*#+}} ymm4 = ymm4[1],ymm5[1],ymm4[3],ymm5[3] -; AVX-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] -; AVX-NEXT: vmovaps %ymm0, 96(%r8) -; AVX-NEXT: vmovaps %ymm3, 64(%r8) -; AVX-NEXT: vmovaps %ymm4, 32(%r8) -; AVX-NEXT: vmovaps %ymm2, (%r8) -; AVX-NEXT: vzeroupper -; AVX-NEXT: retq +; AVX1-LABEL: store_i64_stride4_vf4: +; AVX1: # %bb.0: +; AVX1-NEXT: vmovaps (%rdi), %ymm0 +; AVX1-NEXT: vmovaps (%rsi), %ymm1 +; AVX1-NEXT: vinsertf128 $1, (%rdx), %ymm0, %ymm2 +; AVX1-NEXT: vinsertf128 $1, (%rcx), %ymm1, %ymm3 +; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],mem[2,3] +; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm1[2,3],mem[2,3] +; AVX1-NEXT: vunpcklpd {{.*#+}} ymm4 = ymm2[0],ymm3[0],ymm2[2],ymm3[2] +; AVX1-NEXT: vunpcklpd {{.*#+}} ymm5 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] +; AVX1-NEXT: vunpckhpd {{.*#+}} ymm2 = ymm2[1],ymm3[1],ymm2[3],ymm3[3] +; AVX1-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] +; AVX1-NEXT: vmovaps %ymm4, (%r8) +; AVX1-NEXT: vmovaps %ymm0, 96(%r8) +; AVX1-NEXT: vmovaps %ymm5, 64(%r8) +; AVX1-NEXT: vmovaps %ymm2, 32(%r8) +; AVX1-NEXT: vzeroupper +; AVX1-NEXT: retq +; +; AVX2-LABEL: store_i64_stride4_vf4: +; AVX2: # %bb.0: +; AVX2-NEXT: vmovaps (%rdi), %ymm0 +; AVX2-NEXT: vmovaps (%rsi), %ymm1 +; AVX2-NEXT: vmovaps (%rdx), %ymm2 +; AVX2-NEXT: vmovaps (%rcx), %ymm3 +; AVX2-NEXT: vperm2f128 {{.*#+}} ymm4 = ymm0[0,1],ymm2[0,1] +; AVX2-NEXT: vperm2f128 {{.*#+}} ymm5 = ymm1[0,1],ymm3[0,1] +; AVX2-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm2[2,3] +; AVX2-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm1[2,3],ymm3[2,3] +; AVX2-NEXT: vunpcklpd {{.*#+}} ymm2 = ymm4[0],ymm5[0],ymm4[2],ymm5[2] +; AVX2-NEXT: vunpcklpd {{.*#+}} ymm3 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] +; AVX2-NEXT: vunpckhpd {{.*#+}} ymm4 = ymm4[1],ymm5[1],ymm4[3],ymm5[3] +; AVX2-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] +; AVX2-NEXT: vmovaps %ymm0, 96(%r8) +; AVX2-NEXT: vmovaps %ymm3, 64(%r8) +; AVX2-NEXT: vmovaps %ymm4, 32(%r8) +; AVX2-NEXT: vmovaps %ymm2, (%r8) +; AVX2-NEXT: vzeroupper +; AVX2-NEXT: retq ; ; AVX512-LABEL: store_i64_stride4_vf4: ; AVX512: # %bb.0: diff --git a/llvm/test/CodeGen/X86/vector-rotate-512.ll b/llvm/test/CodeGen/X86/vector-rotate-512.ll index 0b8208930796f..19f5457b96afa 100644 --- a/llvm/test/CodeGen/X86/vector-rotate-512.ll +++ b/llvm/test/CodeGen/X86/vector-rotate-512.ll @@ -37,54 +37,56 @@ define <16 x i32> @var_rotate_v16i32(<16 x i32> %a, <16 x i32> %b) nounwind { define <32 x i16> @var_rotate_v32i16(<32 x i16> %a, <32 x i16> %b) nounwind { ; AVX512F-LABEL: var_rotate_v32i16: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vmovdqa {{.*#+}} ymm2 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] -; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm3 -; AVX512F-NEXT: vpsubw %ymm3, %ymm2, %ymm4 -; AVX512F-NEXT: vpsubw %ymm1, %ymm2, %ymm2 -; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero -; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm5 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero -; AVX512F-NEXT: vpsllvd %zmm1, %zmm5, %zmm1 -; AVX512F-NEXT: vpmovdw %zmm1, %ymm1 -; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm3 = ymm3[0],zero,ymm3[1],zero,ymm3[2],zero,ymm3[3],zero,ymm3[4],zero,ymm3[5],zero,ymm3[6],zero,ymm3[7],zero,ymm3[8],zero,ymm3[9],zero,ymm3[10],zero,ymm3[11],zero,ymm3[12],zero,ymm3[13],zero,ymm3[14],zero,ymm3[15],zero +; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm2 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero +; AVX512F-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm1 +; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm3 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero +; AVX512F-NEXT: vpsllvd %zmm3, %zmm2, %zmm3 +; AVX512F-NEXT: vpmovdw %zmm3, %ymm3 ; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm0 ; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero -; AVX512F-NEXT: vpsllvd %zmm3, %zmm0, %zmm3 -; AVX512F-NEXT: vpmovdw %zmm3, %ymm3 -; AVX512F-NEXT: vinserti64x4 $1, %ymm3, %zmm1, %zmm1 +; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm4 +; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm5 = ymm4[0],zero,ymm4[1],zero,ymm4[2],zero,ymm4[3],zero,ymm4[4],zero,ymm4[5],zero,ymm4[6],zero,ymm4[7],zero,ymm4[8],zero,ymm4[9],zero,ymm4[10],zero,ymm4[11],zero,ymm4[12],zero,ymm4[13],zero,ymm4[14],zero,ymm4[15],zero +; AVX512F-NEXT: vpsllvd %zmm5, %zmm0, %zmm5 +; AVX512F-NEXT: vpmovdw %zmm5, %ymm5 +; AVX512F-NEXT: vinserti64x4 $1, %ymm5, %zmm3, %zmm3 +; AVX512F-NEXT: vmovdqa {{.*#+}} ymm5 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] +; AVX512F-NEXT: vpsubw %ymm1, %ymm5, %ymm1 +; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero +; AVX512F-NEXT: vpsrlvd %zmm1, %zmm2, %zmm1 +; AVX512F-NEXT: vpmovdw %zmm1, %ymm1 +; AVX512F-NEXT: vpsubw %ymm4, %ymm5, %ymm2 ; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm2 = ymm2[0],zero,ymm2[1],zero,ymm2[2],zero,ymm2[3],zero,ymm2[4],zero,ymm2[5],zero,ymm2[6],zero,ymm2[7],zero,ymm2[8],zero,ymm2[9],zero,ymm2[10],zero,ymm2[11],zero,ymm2[12],zero,ymm2[13],zero,ymm2[14],zero,ymm2[15],zero -; AVX512F-NEXT: vpsrlvd %zmm2, %zmm5, %zmm2 -; AVX512F-NEXT: vpmovdw %zmm2, %ymm2 -; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm3 = ymm4[0],zero,ymm4[1],zero,ymm4[2],zero,ymm4[3],zero,ymm4[4],zero,ymm4[5],zero,ymm4[6],zero,ymm4[7],zero,ymm4[8],zero,ymm4[9],zero,ymm4[10],zero,ymm4[11],zero,ymm4[12],zero,ymm4[13],zero,ymm4[14],zero,ymm4[15],zero -; AVX512F-NEXT: vpsrlvd %zmm3, %zmm0, %zmm0 +; AVX512F-NEXT: vpsrlvd %zmm2, %zmm0, %zmm0 ; AVX512F-NEXT: vpmovdw %zmm0, %ymm0 -; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm2, %zmm0 -; AVX512F-NEXT: vporq %zmm0, %zmm1, %zmm0 +; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 +; AVX512F-NEXT: vporq %zmm0, %zmm3, %zmm0 ; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: var_rotate_v32i16: ; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm2 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] -; AVX512VL-NEXT: vextracti64x4 $1, %zmm1, %ymm3 -; AVX512VL-NEXT: vpsubw %ymm3, %ymm2, %ymm4 -; AVX512VL-NEXT: vpsubw %ymm1, %ymm2, %ymm2 -; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero -; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm5 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero -; AVX512VL-NEXT: vpsllvd %zmm1, %zmm5, %zmm1 -; AVX512VL-NEXT: vpmovdw %zmm1, %ymm1 -; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm3 = ymm3[0],zero,ymm3[1],zero,ymm3[2],zero,ymm3[3],zero,ymm3[4],zero,ymm3[5],zero,ymm3[6],zero,ymm3[7],zero,ymm3[8],zero,ymm3[9],zero,ymm3[10],zero,ymm3[11],zero,ymm3[12],zero,ymm3[13],zero,ymm3[14],zero,ymm3[15],zero +; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm2 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero +; AVX512VL-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm1 +; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm3 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero +; AVX512VL-NEXT: vpsllvd %zmm3, %zmm2, %zmm3 +; AVX512VL-NEXT: vpmovdw %zmm3, %ymm3 ; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm0 ; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero -; AVX512VL-NEXT: vpsllvd %zmm3, %zmm0, %zmm3 -; AVX512VL-NEXT: vpmovdw %zmm3, %ymm3 -; AVX512VL-NEXT: vinserti64x4 $1, %ymm3, %zmm1, %zmm1 +; AVX512VL-NEXT: vextracti64x4 $1, %zmm1, %ymm4 +; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm5 = ymm4[0],zero,ymm4[1],zero,ymm4[2],zero,ymm4[3],zero,ymm4[4],zero,ymm4[5],zero,ymm4[6],zero,ymm4[7],zero,ymm4[8],zero,ymm4[9],zero,ymm4[10],zero,ymm4[11],zero,ymm4[12],zero,ymm4[13],zero,ymm4[14],zero,ymm4[15],zero +; AVX512VL-NEXT: vpsllvd %zmm5, %zmm0, %zmm5 +; AVX512VL-NEXT: vpmovdw %zmm5, %ymm5 +; AVX512VL-NEXT: vinserti64x4 $1, %ymm5, %zmm3, %zmm3 +; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm5 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] +; AVX512VL-NEXT: vpsubw %ymm1, %ymm5, %ymm1 +; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero +; AVX512VL-NEXT: vpsrlvd %zmm1, %zmm2, %zmm1 +; AVX512VL-NEXT: vpmovdw %zmm1, %ymm1 +; AVX512VL-NEXT: vpsubw %ymm4, %ymm5, %ymm2 ; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm2 = ymm2[0],zero,ymm2[1],zero,ymm2[2],zero,ymm2[3],zero,ymm2[4],zero,ymm2[5],zero,ymm2[6],zero,ymm2[7],zero,ymm2[8],zero,ymm2[9],zero,ymm2[10],zero,ymm2[11],zero,ymm2[12],zero,ymm2[13],zero,ymm2[14],zero,ymm2[15],zero -; AVX512VL-NEXT: vpsrlvd %zmm2, %zmm5, %zmm2 -; AVX512VL-NEXT: vpmovdw %zmm2, %ymm2 -; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm3 = ymm4[0],zero,ymm4[1],zero,ymm4[2],zero,ymm4[3],zero,ymm4[4],zero,ymm4[5],zero,ymm4[6],zero,ymm4[7],zero,ymm4[8],zero,ymm4[9],zero,ymm4[10],zero,ymm4[11],zero,ymm4[12],zero,ymm4[13],zero,ymm4[14],zero,ymm4[15],zero -; AVX512VL-NEXT: vpsrlvd %zmm3, %zmm0, %zmm0 +; AVX512VL-NEXT: vpsrlvd %zmm2, %zmm0, %zmm0 ; AVX512VL-NEXT: vpmovdw %zmm0, %ymm0 -; AVX512VL-NEXT: vinserti64x4 $1, %ymm0, %zmm2, %zmm0 -; AVX512VL-NEXT: vporq %zmm0, %zmm1, %zmm0 +; AVX512VL-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 +; AVX512VL-NEXT: vporq %zmm0, %zmm3, %zmm0 ; AVX512VL-NEXT: retq ; ; AVX512BW-LABEL: var_rotate_v32i16: @@ -429,34 +431,36 @@ define <16 x i32> @splatvar_rotate_v16i32(<16 x i32> %a, <16 x i32> %b) nounwind define <32 x i16> @splatvar_rotate_v32i16(<32 x i16> %a, <32 x i16> %b) nounwind { ; AVX512F-LABEL: splatvar_rotate_v32i16: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero -; AVX512F-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16] -; AVX512F-NEXT: vpsubw %xmm1, %xmm3, %xmm1 +; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm2 +; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm3 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero +; AVX512F-NEXT: vpsllw %xmm3, %ymm2, %ymm4 +; AVX512F-NEXT: vpsllw %xmm3, %ymm0, %ymm3 +; AVX512F-NEXT: vinserti64x4 $1, %ymm4, %zmm3, %zmm3 +; AVX512F-NEXT: vmovdqa {{.*#+}} xmm4 = [16,16,16,16,16,16,16,16] +; AVX512F-NEXT: vpsubw %xmm1, %xmm4, %xmm1 ; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero -; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm3 -; AVX512F-NEXT: vpsllw %xmm2, %ymm3, %ymm4 -; AVX512F-NEXT: vpsllw %xmm2, %ymm0, %ymm2 -; AVX512F-NEXT: vinserti64x4 $1, %ymm4, %zmm2, %zmm2 -; AVX512F-NEXT: vpsrlw %xmm1, %ymm3, %ymm3 +; AVX512F-NEXT: vpsrlw %xmm1, %ymm2, %ymm2 ; AVX512F-NEXT: vpsrlw %xmm1, %ymm0, %ymm0 -; AVX512F-NEXT: vinserti64x4 $1, %ymm3, %zmm0, %zmm0 -; AVX512F-NEXT: vporq %zmm0, %zmm2, %zmm0 +; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0 +; AVX512F-NEXT: vporq %zmm0, %zmm3, %zmm0 ; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: splatvar_rotate_v32i16: ; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero -; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16] -; AVX512VL-NEXT: vpsubw %xmm1, %xmm3, %xmm1 +; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm2 +; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm3 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero +; AVX512VL-NEXT: vpsllw %xmm3, %ymm2, %ymm4 +; AVX512VL-NEXT: vpsllw %xmm3, %ymm0, %ymm3 +; AVX512VL-NEXT: vinserti64x4 $1, %ymm4, %zmm3, %zmm3 +; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm4 = [16,16,16,16,16,16,16,16] +; AVX512VL-NEXT: vpsubw %xmm1, %xmm4, %xmm1 ; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero -; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm3 -; AVX512VL-NEXT: vpsllw %xmm2, %ymm3, %ymm4 -; AVX512VL-NEXT: vpsllw %xmm2, %ymm0, %ymm2 -; AVX512VL-NEXT: vinserti64x4 $1, %ymm4, %zmm2, %zmm2 -; AVX512VL-NEXT: vpsrlw %xmm1, %ymm3, %ymm3 +; AVX512VL-NEXT: vpsrlw %xmm1, %ymm2, %ymm2 ; AVX512VL-NEXT: vpsrlw %xmm1, %ymm0, %ymm0 -; AVX512VL-NEXT: vinserti64x4 $1, %ymm3, %zmm0, %zmm0 -; AVX512VL-NEXT: vporq %zmm0, %zmm2, %zmm0 +; AVX512VL-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0 +; AVX512VL-NEXT: vporq %zmm0, %zmm3, %zmm0 ; AVX512VL-NEXT: retq ; ; AVX512BW-LABEL: splatvar_rotate_v32i16: @@ -661,26 +665,26 @@ define <32 x i16> @constant_rotate_v32i16(<32 x i16> %a) nounwind { ; AVX512F: # %bb.0: ; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm1 ; AVX512F-NEXT: vmovdqa {{.*#+}} ymm2 = [1,2,4,8,16,32,64,128,256,512,1024,2048,4096,8192,16384,32768] -; AVX512F-NEXT: vpmullw %ymm2, %ymm1, %ymm3 -; AVX512F-NEXT: vpmullw %ymm2, %ymm0, %ymm4 +; AVX512F-NEXT: vpmulhuw %ymm2, %ymm1, %ymm3 +; AVX512F-NEXT: vpmulhuw %ymm2, %ymm0, %ymm4 ; AVX512F-NEXT: vinserti64x4 $1, %ymm3, %zmm4, %zmm3 -; AVX512F-NEXT: vpmulhuw %ymm2, %ymm1, %ymm1 -; AVX512F-NEXT: vpmulhuw %ymm2, %ymm0, %ymm0 +; AVX512F-NEXT: vpmullw %ymm2, %ymm1, %ymm1 +; AVX512F-NEXT: vpmullw %ymm2, %ymm0, %ymm0 ; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 -; AVX512F-NEXT: vporq %zmm0, %zmm3, %zmm0 +; AVX512F-NEXT: vporq %zmm3, %zmm0, %zmm0 ; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: constant_rotate_v32i16: ; AVX512VL: # %bb.0: ; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm1 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm2 = [1,2,4,8,16,32,64,128,256,512,1024,2048,4096,8192,16384,32768] -; AVX512VL-NEXT: vpmullw %ymm2, %ymm1, %ymm3 -; AVX512VL-NEXT: vpmullw %ymm2, %ymm0, %ymm4 +; AVX512VL-NEXT: vpmulhuw %ymm2, %ymm1, %ymm3 +; AVX512VL-NEXT: vpmulhuw %ymm2, %ymm0, %ymm4 ; AVX512VL-NEXT: vinserti64x4 $1, %ymm3, %zmm4, %zmm3 -; AVX512VL-NEXT: vpmulhuw %ymm2, %ymm1, %ymm1 -; AVX512VL-NEXT: vpmulhuw %ymm2, %ymm0, %ymm0 +; AVX512VL-NEXT: vpmullw %ymm2, %ymm1, %ymm1 +; AVX512VL-NEXT: vpmullw %ymm2, %ymm0, %ymm0 ; AVX512VL-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 -; AVX512VL-NEXT: vporq %zmm0, %zmm3, %zmm0 +; AVX512VL-NEXT: vporq %zmm3, %zmm0, %zmm0 ; AVX512VL-NEXT: retq ; ; AVX512BW-LABEL: constant_rotate_v32i16: @@ -953,26 +957,26 @@ define <16 x i32> @splatconstant_rotate_v16i32(<16 x i32> %a) nounwind { define <32 x i16> @splatconstant_rotate_v32i16(<32 x i16> %a) nounwind { ; AVX512F-LABEL: splatconstant_rotate_v32i16: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vpsllw $7, %ymm0, %ymm1 +; AVX512F-NEXT: vpsrlw $9, %ymm0, %ymm1 ; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm2 -; AVX512F-NEXT: vpsllw $7, %ymm2, %ymm3 +; AVX512F-NEXT: vpsrlw $9, %ymm2, %ymm3 ; AVX512F-NEXT: vinserti64x4 $1, %ymm3, %zmm1, %zmm1 -; AVX512F-NEXT: vpsrlw $9, %ymm0, %ymm0 -; AVX512F-NEXT: vpsrlw $9, %ymm2, %ymm2 +; AVX512F-NEXT: vpsllw $7, %ymm0, %ymm0 +; AVX512F-NEXT: vpsllw $7, %ymm2, %ymm2 ; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0 -; AVX512F-NEXT: vporq %zmm0, %zmm1, %zmm0 +; AVX512F-NEXT: vporq %zmm1, %zmm0, %zmm0 ; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: splatconstant_rotate_v32i16: ; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vpsllw $7, %ymm0, %ymm1 +; AVX512VL-NEXT: vpsrlw $9, %ymm0, %ymm1 ; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm2 -; AVX512VL-NEXT: vpsllw $7, %ymm2, %ymm3 +; AVX512VL-NEXT: vpsrlw $9, %ymm2, %ymm3 ; AVX512VL-NEXT: vinserti64x4 $1, %ymm3, %zmm1, %zmm1 -; AVX512VL-NEXT: vpsrlw $9, %ymm0, %ymm0 -; AVX512VL-NEXT: vpsrlw $9, %ymm2, %ymm2 +; AVX512VL-NEXT: vpsllw $7, %ymm0, %ymm0 +; AVX512VL-NEXT: vpsllw $7, %ymm2, %ymm2 ; AVX512VL-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0 -; AVX512VL-NEXT: vporq %zmm0, %zmm1, %zmm0 +; AVX512VL-NEXT: vporq %zmm1, %zmm0, %zmm0 ; AVX512VL-NEXT: retq ; ; AVX512BW-LABEL: splatconstant_rotate_v32i16: @@ -1103,9 +1107,8 @@ define <32 x i16> @splatconstant_rotate_mask_v32i16(<32 x i16> %a) nounwind { ; AVX512F-NEXT: vinserti64x4 $1, %ymm3, %zmm1, %zmm1 ; AVX512F-NEXT: vpsrlw $11, %ymm0, %ymm0 ; AVX512F-NEXT: vpsrlw $11, %ymm2, %ymm2 -; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm2 -; AVX512F-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm0 -; AVX512F-NEXT: vpternlogq $248, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm2, %zmm0 +; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0 +; AVX512F-NEXT: vpternlogq $168, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm0 ; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: splatconstant_rotate_mask_v32i16: @@ -1116,9 +1119,8 @@ define <32 x i16> @splatconstant_rotate_mask_v32i16(<32 x i16> %a) nounwind { ; AVX512VL-NEXT: vinserti64x4 $1, %ymm3, %zmm1, %zmm1 ; AVX512VL-NEXT: vpsrlw $11, %ymm0, %ymm0 ; AVX512VL-NEXT: vpsrlw $11, %ymm2, %ymm2 -; AVX512VL-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm2 -; AVX512VL-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm0 -; AVX512VL-NEXT: vpternlogq $248, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm2, %zmm0 +; AVX512VL-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0 +; AVX512VL-NEXT: vpternlogq $168, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm0 ; AVX512VL-NEXT: retq ; ; AVX512BW-LABEL: splatconstant_rotate_mask_v32i16: diff --git a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx.ll b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx.ll index 1a1153d0e8864..8012121c3c6a5 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx.ll @@ -447,7 +447,7 @@ define void @PR48908(<4 x double> %v0, <4 x double> %v1, <4 x double> %v2, <4 x ; X86-AVX1-NEXT: vperm2f128 {{.*#+}} ymm4 = ymm1[2,3],ymm2[0,1] ; X86-AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm5 ; X86-AVX1-NEXT: vshufpd {{.*#+}} ymm4 = ymm5[1],ymm4[0],ymm5[2],ymm4[3] -; X86-AVX1-NEXT: vperm2f128 {{.*#+}} ymm5 = ymm0[0,1],ymm2[0,1] +; X86-AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm5 ; X86-AVX1-NEXT: vperm2f128 {{.*#+}} ymm3 = ymm3[2,3,0,1] ; X86-AVX1-NEXT: vblendpd {{.*#+}} ymm3 = ymm5[0],ymm3[1],ymm5[2],ymm3[3] ; X86-AVX1-NEXT: vmovapd %ymm3, (%edx) @@ -520,7 +520,7 @@ define void @PR48908(<4 x double> %v0, <4 x double> %v1, <4 x double> %v2, <4 x ; X64-AVX1-NEXT: vperm2f128 {{.*#+}} ymm4 = ymm1[2,3],ymm2[0,1] ; X64-AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm5 ; X64-AVX1-NEXT: vshufpd {{.*#+}} ymm4 = ymm5[1],ymm4[0],ymm5[2],ymm4[3] -; X64-AVX1-NEXT: vperm2f128 {{.*#+}} ymm5 = ymm0[0,1],ymm2[0,1] +; X64-AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm5 ; X64-AVX1-NEXT: vperm2f128 {{.*#+}} ymm3 = ymm3[2,3,0,1] ; X64-AVX1-NEXT: vblendpd {{.*#+}} ymm3 = ymm5[0],ymm3[1],ymm5[2],ymm3[3] ; X64-AVX1-NEXT: vmovapd %ymm3, (%rdi) diff --git a/llvm/test/CodeGen/X86/vector-trunc.ll b/llvm/test/CodeGen/X86/vector-trunc.ll index b2c95135504dd..c035d7d2073a7 100644 --- a/llvm/test/CodeGen/X86/vector-trunc.ll +++ b/llvm/test/CodeGen/X86/vector-trunc.ll @@ -1492,14 +1492,12 @@ define <8 x i16> @trunc2x4i64_8i16(<4 x i64> %a, <4 x i64> %b) { ; ; AVX2-LABEL: trunc2x4i64_8i16: ; AVX2: # %bb.0: # %entry -; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2 -; AVX2-NEXT: vpxor %xmm3, %xmm3, %xmm3 -; AVX2-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0],xmm3[1,2,3],xmm2[4],xmm3[5,6,7] -; AVX2-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm3[1,2,3],xmm1[4],xmm3[5,6,7] -; AVX2-NEXT: vpackusdw %xmm2, %xmm1, %xmm1 +; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; AVX2-NEXT: vpblendw {{.*#+}} ymm1 = ymm1[0],ymm2[1,2,3],ymm1[4],ymm2[5,6,7],ymm1[8],ymm2[9,10,11],ymm1[12],ymm2[13,14,15] +; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm3 +; AVX2-NEXT: vpackusdw %xmm3, %xmm1, %xmm1 +; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm2[1,2,3],ymm0[4],ymm2[5,6,7],ymm0[8],ymm2[9,10,11],ymm0[12],ymm2[13,14,15] ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm2 -; AVX2-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0],xmm3[1,2,3],xmm2[4],xmm3[5,6,7] -; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm3[1,2,3],xmm0[4],xmm3[5,6,7] ; AVX2-NEXT: vpackusdw %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vzeroupper diff --git a/llvm/test/CodeGen/X86/vselect-zero.ll b/llvm/test/CodeGen/X86/vselect-zero.ll index e1070e3621443..d156bdb5a99d5 100644 --- a/llvm/test/CodeGen/X86/vselect-zero.ll +++ b/llvm/test/CodeGen/X86/vselect-zero.ll @@ -1,8 +1,10 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=SSE,SSE2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.2 | FileCheck %s --check-prefixes=SSE,SSE42 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX2 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl | FileCheck %s --check-prefixes=AVX512,AVX512F +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl,+avx512dq,+avx512bw | FileCheck %s --check-prefixes=AVX512,AVX512DQBW ; PR28925 @@ -20,6 +22,21 @@ define <4 x i32> @test1(<4 x i1> %cond, <4 x i32> %x) { ; AVX-NEXT: vpsrad $31, %xmm0, %xmm0 ; AVX-NEXT: vpandn %xmm1, %xmm0, %xmm0 ; AVX-NEXT: retq +; +; AVX512F-LABEL: test1: +; AVX512F: # %bb.0: +; AVX512F-NEXT: vpslld $31, %xmm0, %xmm0 +; AVX512F-NEXT: vptestnmd %xmm0, %xmm0, %k1 +; AVX512F-NEXT: vmovdqa32 %xmm1, %xmm0 {%k1} {z} +; AVX512F-NEXT: retq +; +; AVX512DQBW-LABEL: test1: +; AVX512DQBW: # %bb.0: +; AVX512DQBW-NEXT: vpslld $31, %xmm0, %xmm0 +; AVX512DQBW-NEXT: vpmovd2m %xmm0, %k0 +; AVX512DQBW-NEXT: knotw %k0, %k1 +; AVX512DQBW-NEXT: vmovdqa32 %xmm1, %xmm0 {%k1} {z} +; AVX512DQBW-NEXT: retq %r = select <4 x i1> %cond, <4 x i32> zeroinitializer, <4 x i32> %x ret <4 x i32> %r } @@ -36,6 +53,12 @@ define <4 x i32> @test2(<4 x float> %a, <4 x float> %b, <4 x i32> %x) { ; AVX-NEXT: vcmpneqps %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vandps %xmm2, %xmm0, %xmm0 ; AVX-NEXT: retq +; +; AVX512-LABEL: test2: +; AVX512: # %bb.0: +; AVX512-NEXT: vcmpneqps %xmm1, %xmm0, %k1 +; AVX512-NEXT: vmovdqa32 %xmm2, %xmm0 {%k1} {z} +; AVX512-NEXT: retq %cond = fcmp oeq <4 x float> %a, %b %r = select <4 x i1> %cond, <4 x i32> zeroinitializer, <4 x i32> %x ret <4 x i32> %r @@ -53,6 +76,12 @@ define float @fsel_zero_false_val(float %a, float %b, float %x) { ; AVX-NEXT: vcmpeqss %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vandps %xmm2, %xmm0, %xmm0 ; AVX-NEXT: retq +; +; AVX512-LABEL: fsel_zero_false_val: +; AVX512: # %bb.0: +; AVX512-NEXT: vcmpeqss %xmm1, %xmm0, %k1 +; AVX512-NEXT: vmovss %xmm2, %xmm2, %xmm0 {%k1} {z} +; AVX512-NEXT: retq %cond = fcmp oeq float %a, %b %r = select i1 %cond, float %x, float 0.0 ret float %r @@ -70,6 +99,14 @@ define float @fsel_zero_true_val(float %a, float %b, float %x) { ; AVX-NEXT: vcmpeqss %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vandnps %xmm2, %xmm0, %xmm0 ; AVX-NEXT: retq +; +; AVX512-LABEL: fsel_zero_true_val: +; AVX512: # %bb.0: +; AVX512-NEXT: vcmpeqss %xmm1, %xmm0, %k1 +; AVX512-NEXT: vxorps %xmm0, %xmm0, %xmm0 +; AVX512-NEXT: vmovss %xmm0, %xmm2, %xmm2 {%k1} +; AVX512-NEXT: vmovaps %xmm2, %xmm0 +; AVX512-NEXT: retq %cond = fcmp oeq float %a, %b %r = select i1 %cond, float 0.0, float %x ret float %r @@ -91,6 +128,13 @@ define double @fsel_nonzero_false_val(double %x, double %y, double %z) { ; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero ; AVX-NEXT: vblendvpd %xmm0, %xmm2, %xmm1, %xmm0 ; AVX-NEXT: retq +; +; AVX512-LABEL: fsel_nonzero_false_val: +; AVX512: # %bb.0: +; AVX512-NEXT: vcmpeqsd %xmm1, %xmm0, %k1 +; AVX512-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; AVX512-NEXT: vmovsd %xmm2, %xmm0, %xmm0 {%k1} +; AVX512-NEXT: retq %cond = fcmp oeq double %x, %y %r = select i1 %cond, double %z, double 42.0 ret double %r @@ -112,6 +156,13 @@ define double @fsel_nonzero_true_val(double %x, double %y, double %z) { ; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero ; AVX-NEXT: vblendvpd %xmm0, %xmm1, %xmm2, %xmm0 ; AVX-NEXT: retq +; +; AVX512-LABEL: fsel_nonzero_true_val: +; AVX512: # %bb.0: +; AVX512-NEXT: vcmpeqsd %xmm1, %xmm0, %k1 +; AVX512-NEXT: vmovsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 {%k1} +; AVX512-NEXT: vmovapd %xmm2, %xmm0 +; AVX512-NEXT: retq %cond = fcmp oeq double %x, %y %r = select i1 %cond, double 42.0, double %z ret double %r @@ -133,6 +184,13 @@ define double @fsel_nonzero_constants(double %x, double %y) { ; AVX-NEXT: vmovsd {{.*#+}} xmm2 = mem[0],zero ; AVX-NEXT: vblendvpd %xmm0, %xmm1, %xmm2, %xmm0 ; AVX-NEXT: retq +; +; AVX512-LABEL: fsel_nonzero_constants: +; AVX512: # %bb.0: +; AVX512-NEXT: vcmpeqsd %xmm1, %xmm0, %k1 +; AVX512-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; AVX512-NEXT: vmovsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 {%k1} +; AVX512-NEXT: retq %cond = fcmp oeq double %x, %y %r = select i1 %cond, double 12.0, double 42.0 ret double %r @@ -164,8 +222,847 @@ define <2 x double> @vsel_nonzero_constants(<2 x double> %x, <2 x double> %y) { ; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero ; AVX-NEXT: vblendvpd %xmm0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm0 ; AVX-NEXT: retq +; +; AVX512-LABEL: vsel_nonzero_constants: +; AVX512: # %bb.0: +; AVX512-NEXT: vcmplepd %xmm0, %xmm1, %k1 +; AVX512-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; AVX512-NEXT: vmovapd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 {%k1} +; AVX512-NEXT: retq %cond = fcmp oge <2 x double> %x, %y %r = select <2 x i1> %cond, <2 x double> , <2 x double> ret <2 x double> %r } +define <16 x i8> @signbit_mask_v16i8(<16 x i8> %a, <16 x i8> %b) { +; SSE-LABEL: signbit_mask_v16i8: +; SSE: # %bb.0: +; SSE-NEXT: pxor %xmm2, %xmm2 +; SSE-NEXT: pcmpgtb %xmm0, %xmm2 +; SSE-NEXT: pand %xmm1, %xmm2 +; SSE-NEXT: movdqa %xmm2, %xmm0 +; SSE-NEXT: retq +; +; AVX-LABEL: signbit_mask_v16i8: +; AVX: # %bb.0: +; AVX-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; AVX-NEXT: vpcmpgtb %xmm0, %xmm2, %xmm0 +; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0 +; AVX-NEXT: retq +; +; AVX512-LABEL: signbit_mask_v16i8: +; AVX512: # %bb.0: +; AVX512-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; AVX512-NEXT: vpcmpgtb %xmm0, %xmm2, %xmm0 +; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0 +; AVX512-NEXT: retq + %cond = icmp slt <16 x i8> %a, zeroinitializer + %r = select <16 x i1> %cond, <16 x i8> %b, <16 x i8> zeroinitializer + ret <16 x i8> %r +} + +define <8 x i16> @signbit_mask_v8i16(<8 x i16> %a, <8 x i16> %b) { +; SSE-LABEL: signbit_mask_v8i16: +; SSE: # %bb.0: +; SSE-NEXT: psraw $15, %xmm0 +; SSE-NEXT: pand %xmm1, %xmm0 +; SSE-NEXT: retq +; +; AVX-LABEL: signbit_mask_v8i16: +; AVX: # %bb.0: +; AVX-NEXT: vpsraw $15, %xmm0, %xmm0 +; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0 +; AVX-NEXT: retq +; +; AVX512-LABEL: signbit_mask_v8i16: +; AVX512: # %bb.0: +; AVX512-NEXT: vpsraw $15, %xmm0, %xmm0 +; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0 +; AVX512-NEXT: retq + %cond = icmp slt <8 x i16> %a, zeroinitializer + %r = select <8 x i1> %cond, <8 x i16> %b, <8 x i16> zeroinitializer + ret <8 x i16> %r +} + +define <4 x i32> @signbit_mask_v4i32(<4 x i32> %a, <4 x i32> %b) { +; SSE-LABEL: signbit_mask_v4i32: +; SSE: # %bb.0: +; SSE-NEXT: psrad $31, %xmm0 +; SSE-NEXT: pand %xmm1, %xmm0 +; SSE-NEXT: retq +; +; AVX-LABEL: signbit_mask_v4i32: +; AVX: # %bb.0: +; AVX-NEXT: vpsrad $31, %xmm0, %xmm0 +; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0 +; AVX-NEXT: retq +; +; AVX512-LABEL: signbit_mask_v4i32: +; AVX512: # %bb.0: +; AVX512-NEXT: vpsrad $31, %xmm0, %xmm0 +; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0 +; AVX512-NEXT: retq + %cond = icmp slt <4 x i32> %a, zeroinitializer + %r = select <4 x i1> %cond, <4 x i32> %b, <4 x i32> zeroinitializer + ret <4 x i32> %r +} + +define <2 x i64> @signbit_mask_v2i64(<2 x i64> %a, <2 x i64> %b) { +; SSE2-LABEL: signbit_mask_v2i64: +; SSE2: # %bb.0: +; SSE2-NEXT: psrad $31, %xmm0 +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] +; SSE2-NEXT: pand %xmm1, %xmm0 +; SSE2-NEXT: retq +; +; SSE42-LABEL: signbit_mask_v2i64: +; SSE42: # %bb.0: +; SSE42-NEXT: pxor %xmm2, %xmm2 +; SSE42-NEXT: pcmpgtq %xmm0, %xmm2 +; SSE42-NEXT: pand %xmm1, %xmm2 +; SSE42-NEXT: movdqa %xmm2, %xmm0 +; SSE42-NEXT: retq +; +; AVX-LABEL: signbit_mask_v2i64: +; AVX: # %bb.0: +; AVX-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; AVX-NEXT: vpcmpgtq %xmm0, %xmm2, %xmm0 +; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0 +; AVX-NEXT: retq +; +; AVX512-LABEL: signbit_mask_v2i64: +; AVX512: # %bb.0: +; AVX512-NEXT: vpsraq $63, %xmm0, %xmm0 +; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0 +; AVX512-NEXT: retq + %cond = icmp slt <2 x i64> %a, zeroinitializer + %r = select <2 x i1> %cond, <2 x i64> %b, <2 x i64> zeroinitializer + ret <2 x i64> %r +} + +define <32 x i8> @signbit_mask_v32i8(<32 x i8> %a, <32 x i8> %b) { +; SSE-LABEL: signbit_mask_v32i8: +; SSE: # %bb.0: +; SSE-NEXT: pxor %xmm4, %xmm4 +; SSE-NEXT: pxor %xmm5, %xmm5 +; SSE-NEXT: pcmpgtb %xmm0, %xmm5 +; SSE-NEXT: pand %xmm2, %xmm5 +; SSE-NEXT: pcmpgtb %xmm1, %xmm4 +; SSE-NEXT: pand %xmm3, %xmm4 +; SSE-NEXT: movdqa %xmm5, %xmm0 +; SSE-NEXT: movdqa %xmm4, %xmm1 +; SSE-NEXT: retq +; +; AVX1-LABEL: signbit_mask_v32i8: +; AVX1: # %bb.0: +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 +; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; AVX1-NEXT: vpcmpgtb %xmm2, %xmm3, %xmm2 +; AVX1-NEXT: vpcmpgtb %xmm0, %xmm3, %xmm0 +; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 +; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0 +; AVX1-NEXT: retq +; +; AVX2-LABEL: signbit_mask_v32i8: +; AVX2: # %bb.0: +; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; AVX2-NEXT: vpcmpgtb %ymm0, %ymm2, %ymm0 +; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: retq +; +; AVX512-LABEL: signbit_mask_v32i8: +; AVX512: # %bb.0: +; AVX512-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; AVX512-NEXT: vpcmpgtb %ymm0, %ymm2, %ymm0 +; AVX512-NEXT: vpand %ymm1, %ymm0, %ymm0 +; AVX512-NEXT: retq + %cond = icmp slt <32 x i8> %a, zeroinitializer + %r = select <32 x i1> %cond, <32 x i8> %b, <32 x i8> zeroinitializer + ret <32 x i8> %r +} + +define <16 x i16> @signbit_mask_v16i16(<16 x i16> %a, <16 x i16> %b) { +; SSE-LABEL: signbit_mask_v16i16: +; SSE: # %bb.0: +; SSE-NEXT: psraw $15, %xmm0 +; SSE-NEXT: pand %xmm2, %xmm0 +; SSE-NEXT: psraw $15, %xmm1 +; SSE-NEXT: pand %xmm3, %xmm1 +; SSE-NEXT: retq +; +; AVX1-LABEL: signbit_mask_v16i16: +; AVX1: # %bb.0: +; AVX1-NEXT: vpsraw $15, %xmm0, %xmm2 +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 +; AVX1-NEXT: vpsraw $15, %xmm0, %xmm0 +; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0 +; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0 +; AVX1-NEXT: retq +; +; AVX2-LABEL: signbit_mask_v16i16: +; AVX2: # %bb.0: +; AVX2-NEXT: vpsraw $15, %ymm0, %ymm0 +; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: retq +; +; AVX512-LABEL: signbit_mask_v16i16: +; AVX512: # %bb.0: +; AVX512-NEXT: vpsraw $15, %ymm0, %ymm0 +; AVX512-NEXT: vpand %ymm1, %ymm0, %ymm0 +; AVX512-NEXT: retq + %cond = icmp slt <16 x i16> %a, zeroinitializer + %r = select <16 x i1> %cond, <16 x i16> %b, <16 x i16> zeroinitializer + ret <16 x i16> %r +} + +define <8 x i32> @signbit_mask_v8i32(<8 x i32> %a, <8 x i32> %b) { +; SSE-LABEL: signbit_mask_v8i32: +; SSE: # %bb.0: +; SSE-NEXT: psrad $31, %xmm0 +; SSE-NEXT: pand %xmm2, %xmm0 +; SSE-NEXT: psrad $31, %xmm1 +; SSE-NEXT: pand %xmm3, %xmm1 +; SSE-NEXT: retq +; +; AVX1-LABEL: signbit_mask_v8i32: +; AVX1: # %bb.0: +; AVX1-NEXT: vpsrad $31, %xmm0, %xmm2 +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 +; AVX1-NEXT: vpsrad $31, %xmm0, %xmm0 +; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0 +; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0 +; AVX1-NEXT: retq +; +; AVX2-LABEL: signbit_mask_v8i32: +; AVX2: # %bb.0: +; AVX2-NEXT: vpsrad $31, %ymm0, %ymm0 +; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: retq +; +; AVX512-LABEL: signbit_mask_v8i32: +; AVX512: # %bb.0: +; AVX512-NEXT: vpsrad $31, %ymm0, %ymm0 +; AVX512-NEXT: vpand %ymm1, %ymm0, %ymm0 +; AVX512-NEXT: retq + %cond = icmp slt <8 x i32> %a, zeroinitializer + %r = select <8 x i1> %cond, <8 x i32> %b, <8 x i32> zeroinitializer + ret <8 x i32> %r +} + +define <4 x i64> @signbit_mask_v4i64(<4 x i64> %a, <4 x i64> %b) { +; SSE2-LABEL: signbit_mask_v4i64: +; SSE2: # %bb.0: +; SSE2-NEXT: psrad $31, %xmm0 +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] +; SSE2-NEXT: pand %xmm2, %xmm0 +; SSE2-NEXT: psrad $31, %xmm1 +; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] +; SSE2-NEXT: pand %xmm3, %xmm1 +; SSE2-NEXT: retq +; +; SSE42-LABEL: signbit_mask_v4i64: +; SSE42: # %bb.0: +; SSE42-NEXT: pxor %xmm4, %xmm4 +; SSE42-NEXT: pxor %xmm5, %xmm5 +; SSE42-NEXT: pcmpgtq %xmm0, %xmm5 +; SSE42-NEXT: pand %xmm2, %xmm5 +; SSE42-NEXT: pcmpgtq %xmm1, %xmm4 +; SSE42-NEXT: pand %xmm3, %xmm4 +; SSE42-NEXT: movdqa %xmm5, %xmm0 +; SSE42-NEXT: movdqa %xmm4, %xmm1 +; SSE42-NEXT: retq +; +; AVX1-LABEL: signbit_mask_v4i64: +; AVX1: # %bb.0: +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 +; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2 +; AVX1-NEXT: vpcmpgtq %xmm0, %xmm3, %xmm0 +; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 +; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0 +; AVX1-NEXT: retq +; +; AVX2-LABEL: signbit_mask_v4i64: +; AVX2: # %bb.0: +; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; AVX2-NEXT: vpcmpgtq %ymm0, %ymm2, %ymm0 +; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: retq +; +; AVX512-LABEL: signbit_mask_v4i64: +; AVX512: # %bb.0: +; AVX512-NEXT: vpsraq $63, %ymm0, %ymm0 +; AVX512-NEXT: vpand %ymm1, %ymm0, %ymm0 +; AVX512-NEXT: retq + %cond = icmp slt <4 x i64> %a, zeroinitializer + %r = select <4 x i1> %cond, <4 x i64> %b, <4 x i64> zeroinitializer + ret <4 x i64> %r +} + +define <16 x i8> @signbit_setmask_v16i8(<16 x i8> %a, <16 x i8> %b) { +; SSE-LABEL: signbit_setmask_v16i8: +; SSE: # %bb.0: +; SSE-NEXT: pxor %xmm2, %xmm2 +; SSE-NEXT: pcmpgtb %xmm0, %xmm2 +; SSE-NEXT: por %xmm1, %xmm2 +; SSE-NEXT: movdqa %xmm2, %xmm0 +; SSE-NEXT: retq +; +; AVX-LABEL: signbit_setmask_v16i8: +; AVX: # %bb.0: +; AVX-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; AVX-NEXT: vpcmpgtb %xmm0, %xmm2, %xmm0 +; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX-NEXT: retq +; +; AVX512-LABEL: signbit_setmask_v16i8: +; AVX512: # %bb.0: +; AVX512-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; AVX512-NEXT: vpcmpgtb %xmm0, %xmm2, %xmm0 +; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX512-NEXT: retq + %cond = icmp slt <16 x i8> %a, zeroinitializer + %r = select <16 x i1> %cond, <16 x i8> , <16 x i8> %b + ret <16 x i8> %r +} + +define <8 x i16> @signbit_setmask_v8i16(<8 x i16> %a, <8 x i16> %b) { +; SSE-LABEL: signbit_setmask_v8i16: +; SSE: # %bb.0: +; SSE-NEXT: psraw $15, %xmm0 +; SSE-NEXT: por %xmm1, %xmm0 +; SSE-NEXT: retq +; +; AVX-LABEL: signbit_setmask_v8i16: +; AVX: # %bb.0: +; AVX-NEXT: vpsraw $15, %xmm0, %xmm0 +; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX-NEXT: retq +; +; AVX512-LABEL: signbit_setmask_v8i16: +; AVX512: # %bb.0: +; AVX512-NEXT: vpsraw $15, %xmm0, %xmm0 +; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX512-NEXT: retq + %cond = icmp slt <8 x i16> %a, zeroinitializer + %r = select <8 x i1> %cond, <8 x i16> , <8 x i16> %b + ret <8 x i16> %r +} + +define <4 x i32> @signbit_setmask_v4i32(<4 x i32> %a, <4 x i32> %b) { +; SSE-LABEL: signbit_setmask_v4i32: +; SSE: # %bb.0: +; SSE-NEXT: psrad $31, %xmm0 +; SSE-NEXT: por %xmm1, %xmm0 +; SSE-NEXT: retq +; +; AVX-LABEL: signbit_setmask_v4i32: +; AVX: # %bb.0: +; AVX-NEXT: vpsrad $31, %xmm0, %xmm0 +; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX-NEXT: retq +; +; AVX512-LABEL: signbit_setmask_v4i32: +; AVX512: # %bb.0: +; AVX512-NEXT: vpsrad $31, %xmm0, %xmm0 +; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX512-NEXT: retq + %cond = icmp slt <4 x i32> %a, zeroinitializer + %r = select <4 x i1> %cond, <4 x i32> , <4 x i32> %b + ret <4 x i32> %r +} + +define <2 x i64> @signbit_setmask_v2i64(<2 x i64> %a, <2 x i64> %b) { +; SSE2-LABEL: signbit_setmask_v2i64: +; SSE2: # %bb.0: +; SSE2-NEXT: psrad $31, %xmm0 +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] +; SSE2-NEXT: por %xmm1, %xmm0 +; SSE2-NEXT: retq +; +; SSE42-LABEL: signbit_setmask_v2i64: +; SSE42: # %bb.0: +; SSE42-NEXT: pxor %xmm2, %xmm2 +; SSE42-NEXT: pcmpgtq %xmm0, %xmm2 +; SSE42-NEXT: por %xmm1, %xmm2 +; SSE42-NEXT: movdqa %xmm2, %xmm0 +; SSE42-NEXT: retq +; +; AVX-LABEL: signbit_setmask_v2i64: +; AVX: # %bb.0: +; AVX-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; AVX-NEXT: vpcmpgtq %xmm0, %xmm2, %xmm0 +; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX-NEXT: retq +; +; AVX512-LABEL: signbit_setmask_v2i64: +; AVX512: # %bb.0: +; AVX512-NEXT: vpsraq $63, %xmm0, %xmm0 +; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX512-NEXT: retq + %cond = icmp slt <2 x i64> %a, zeroinitializer + %r = select <2 x i1> %cond, <2 x i64> , <2 x i64> %b + ret <2 x i64> %r +} + +define <32 x i8> @signbit_setmask_v32i8(<32 x i8> %a, <32 x i8> %b) { +; SSE-LABEL: signbit_setmask_v32i8: +; SSE: # %bb.0: +; SSE-NEXT: pxor %xmm4, %xmm4 +; SSE-NEXT: pxor %xmm5, %xmm5 +; SSE-NEXT: pcmpgtb %xmm0, %xmm5 +; SSE-NEXT: por %xmm2, %xmm5 +; SSE-NEXT: pcmpgtb %xmm1, %xmm4 +; SSE-NEXT: por %xmm3, %xmm4 +; SSE-NEXT: movdqa %xmm5, %xmm0 +; SSE-NEXT: movdqa %xmm4, %xmm1 +; SSE-NEXT: retq +; +; AVX1-LABEL: signbit_setmask_v32i8: +; AVX1: # %bb.0: +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 +; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; AVX1-NEXT: vpcmpgtb %xmm2, %xmm3, %xmm2 +; AVX1-NEXT: vpcmpgtb %xmm0, %xmm3, %xmm0 +; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 +; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0 +; AVX1-NEXT: retq +; +; AVX2-LABEL: signbit_setmask_v32i8: +; AVX2: # %bb.0: +; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; AVX2-NEXT: vpcmpgtb %ymm0, %ymm2, %ymm0 +; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: retq +; +; AVX512-LABEL: signbit_setmask_v32i8: +; AVX512: # %bb.0: +; AVX512-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; AVX512-NEXT: vpcmpgtb %ymm0, %ymm2, %ymm0 +; AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0 +; AVX512-NEXT: retq + %cond = icmp slt <32 x i8> %a, zeroinitializer + %r = select <32 x i1> %cond, <32 x i8> , <32 x i8> %b + ret <32 x i8> %r +} + +define <16 x i16> @signbit_setmask_v16i16(<16 x i16> %a, <16 x i16> %b) { +; SSE-LABEL: signbit_setmask_v16i16: +; SSE: # %bb.0: +; SSE-NEXT: psraw $15, %xmm0 +; SSE-NEXT: por %xmm2, %xmm0 +; SSE-NEXT: psraw $15, %xmm1 +; SSE-NEXT: por %xmm3, %xmm1 +; SSE-NEXT: retq +; +; AVX1-LABEL: signbit_setmask_v16i16: +; AVX1: # %bb.0: +; AVX1-NEXT: vpsraw $15, %xmm0, %xmm2 +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 +; AVX1-NEXT: vpsraw $15, %xmm0, %xmm0 +; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0 +; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0 +; AVX1-NEXT: retq +; +; AVX2-LABEL: signbit_setmask_v16i16: +; AVX2: # %bb.0: +; AVX2-NEXT: vpsraw $15, %ymm0, %ymm0 +; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: retq +; +; AVX512-LABEL: signbit_setmask_v16i16: +; AVX512: # %bb.0: +; AVX512-NEXT: vpsraw $15, %ymm0, %ymm0 +; AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0 +; AVX512-NEXT: retq + %cond = icmp slt <16 x i16> %a, zeroinitializer + %r = select <16 x i1> %cond, <16 x i16> , <16 x i16> %b + ret <16 x i16> %r +} + +define <8 x i32> @signbit_setmask_v8i32(<8 x i32> %a, <8 x i32> %b) { +; SSE-LABEL: signbit_setmask_v8i32: +; SSE: # %bb.0: +; SSE-NEXT: psrad $31, %xmm0 +; SSE-NEXT: por %xmm2, %xmm0 +; SSE-NEXT: psrad $31, %xmm1 +; SSE-NEXT: por %xmm3, %xmm1 +; SSE-NEXT: retq +; +; AVX1-LABEL: signbit_setmask_v8i32: +; AVX1: # %bb.0: +; AVX1-NEXT: vpsrad $31, %xmm0, %xmm2 +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 +; AVX1-NEXT: vpsrad $31, %xmm0, %xmm0 +; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0 +; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0 +; AVX1-NEXT: retq +; +; AVX2-LABEL: signbit_setmask_v8i32: +; AVX2: # %bb.0: +; AVX2-NEXT: vpsrad $31, %ymm0, %ymm0 +; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: retq +; +; AVX512-LABEL: signbit_setmask_v8i32: +; AVX512: # %bb.0: +; AVX512-NEXT: vpsrad $31, %ymm0, %ymm0 +; AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0 +; AVX512-NEXT: retq + %cond = icmp slt <8 x i32> %a, zeroinitializer + %r = select <8 x i1> %cond, <8 x i32> , <8 x i32> %b + ret <8 x i32> %r +} + +define <4 x i64> @signbit_setmask_v4i64(<4 x i64> %a, <4 x i64> %b) { +; SSE2-LABEL: signbit_setmask_v4i64: +; SSE2: # %bb.0: +; SSE2-NEXT: psrad $31, %xmm0 +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] +; SSE2-NEXT: por %xmm2, %xmm0 +; SSE2-NEXT: psrad $31, %xmm1 +; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] +; SSE2-NEXT: por %xmm3, %xmm1 +; SSE2-NEXT: retq +; +; SSE42-LABEL: signbit_setmask_v4i64: +; SSE42: # %bb.0: +; SSE42-NEXT: pxor %xmm4, %xmm4 +; SSE42-NEXT: pxor %xmm5, %xmm5 +; SSE42-NEXT: pcmpgtq %xmm0, %xmm5 +; SSE42-NEXT: por %xmm2, %xmm5 +; SSE42-NEXT: pcmpgtq %xmm1, %xmm4 +; SSE42-NEXT: por %xmm3, %xmm4 +; SSE42-NEXT: movdqa %xmm5, %xmm0 +; SSE42-NEXT: movdqa %xmm4, %xmm1 +; SSE42-NEXT: retq +; +; AVX1-LABEL: signbit_setmask_v4i64: +; AVX1: # %bb.0: +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 +; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2 +; AVX1-NEXT: vpcmpgtq %xmm0, %xmm3, %xmm0 +; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 +; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0 +; AVX1-NEXT: retq +; +; AVX2-LABEL: signbit_setmask_v4i64: +; AVX2: # %bb.0: +; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; AVX2-NEXT: vpcmpgtq %ymm0, %ymm2, %ymm0 +; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: retq +; +; AVX512-LABEL: signbit_setmask_v4i64: +; AVX512: # %bb.0: +; AVX512-NEXT: vpsraq $63, %ymm0, %ymm0 +; AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0 +; AVX512-NEXT: retq + %cond = icmp slt <4 x i64> %a, zeroinitializer + %r = select <4 x i1> %cond, <4 x i64> , <4 x i64> %b + ret <4 x i64> %r +} + +define <16 x i8> @not_signbit_mask_v16i8(<16 x i8> %a, <16 x i8> %b) { +; SSE-LABEL: not_signbit_mask_v16i8: +; SSE: # %bb.0: +; SSE-NEXT: pcmpeqd %xmm2, %xmm2 +; SSE-NEXT: pcmpgtb %xmm2, %xmm0 +; SSE-NEXT: pand %xmm1, %xmm0 +; SSE-NEXT: retq +; +; AVX-LABEL: not_signbit_mask_v16i8: +; AVX: # %bb.0: +; AVX-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 +; AVX-NEXT: vpcmpgtb %xmm2, %xmm0, %xmm0 +; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0 +; AVX-NEXT: retq +; +; AVX512F-LABEL: not_signbit_mask_v16i8: +; AVX512F: # %bb.0: +; AVX512F-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 +; AVX512F-NEXT: vpcmpgtb %xmm2, %xmm0, %xmm0 +; AVX512F-NEXT: vpand %xmm1, %xmm0, %xmm0 +; AVX512F-NEXT: retq +; +; AVX512DQBW-LABEL: not_signbit_mask_v16i8: +; AVX512DQBW: # %bb.0: +; AVX512DQBW-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 +; AVX512DQBW-NEXT: vpcmpgtb %xmm2, %xmm0, %k1 +; AVX512DQBW-NEXT: vmovdqu8 %xmm1, %xmm0 {%k1} {z} +; AVX512DQBW-NEXT: retq + %cond = icmp sgt <16 x i8> %a, + %r = select <16 x i1> %cond, <16 x i8> %b, <16 x i8> zeroinitializer + ret <16 x i8> %r +} + +define <8 x i16> @not_signbit_mask_v8i16(<8 x i16> %a, <8 x i16> %b) { +; SSE-LABEL: not_signbit_mask_v8i16: +; SSE: # %bb.0: +; SSE-NEXT: pcmpeqd %xmm2, %xmm2 +; SSE-NEXT: pcmpgtw %xmm2, %xmm0 +; SSE-NEXT: pand %xmm1, %xmm0 +; SSE-NEXT: retq +; +; AVX-LABEL: not_signbit_mask_v8i16: +; AVX: # %bb.0: +; AVX-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 +; AVX-NEXT: vpcmpgtw %xmm2, %xmm0, %xmm0 +; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0 +; AVX-NEXT: retq +; +; AVX512F-LABEL: not_signbit_mask_v8i16: +; AVX512F: # %bb.0: +; AVX512F-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 +; AVX512F-NEXT: vpcmpgtw %xmm2, %xmm0, %xmm0 +; AVX512F-NEXT: vpand %xmm1, %xmm0, %xmm0 +; AVX512F-NEXT: retq +; +; AVX512DQBW-LABEL: not_signbit_mask_v8i16: +; AVX512DQBW: # %bb.0: +; AVX512DQBW-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 +; AVX512DQBW-NEXT: vpcmpgtw %xmm2, %xmm0, %k1 +; AVX512DQBW-NEXT: vmovdqu16 %xmm1, %xmm0 {%k1} {z} +; AVX512DQBW-NEXT: retq + %cond = icmp sgt <8 x i16> %a, + %r = select <8 x i1> %cond, <8 x i16> %b, <8 x i16> zeroinitializer + ret <8 x i16> %r +} + +define <4 x i32> @not_signbit_mask_v4i32(<4 x i32> %a, <4 x i32> %b) { +; SSE-LABEL: not_signbit_mask_v4i32: +; SSE: # %bb.0: +; SSE-NEXT: pcmpeqd %xmm2, %xmm2 +; SSE-NEXT: pcmpgtd %xmm2, %xmm0 +; SSE-NEXT: pand %xmm1, %xmm0 +; SSE-NEXT: retq +; +; AVX-LABEL: not_signbit_mask_v4i32: +; AVX: # %bb.0: +; AVX-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 +; AVX-NEXT: vpcmpgtd %xmm2, %xmm0, %xmm0 +; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0 +; AVX-NEXT: retq +; +; AVX512-LABEL: not_signbit_mask_v4i32: +; AVX512: # %bb.0: +; AVX512-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 +; AVX512-NEXT: vpcmpgtd %xmm2, %xmm0, %k1 +; AVX512-NEXT: vmovdqa32 %xmm1, %xmm0 {%k1} {z} +; AVX512-NEXT: retq + %cond = icmp sgt <4 x i32> %a, + %r = select <4 x i1> %cond, <4 x i32> %b, <4 x i32> zeroinitializer + ret <4 x i32> %r +} + +define <2 x i64> @not_signbit_mask_v2i64(<2 x i64> %a, <2 x i64> %b) { +; SSE2-LABEL: not_signbit_mask_v2i64: +; SSE2: # %bb.0: +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] +; SSE2-NEXT: pcmpeqd %xmm2, %xmm2 +; SSE2-NEXT: pcmpgtd %xmm2, %xmm0 +; SSE2-NEXT: pand %xmm1, %xmm0 +; SSE2-NEXT: retq +; +; SSE42-LABEL: not_signbit_mask_v2i64: +; SSE42: # %bb.0: +; SSE42-NEXT: pcmpeqd %xmm2, %xmm2 +; SSE42-NEXT: pcmpgtq %xmm2, %xmm0 +; SSE42-NEXT: pand %xmm1, %xmm0 +; SSE42-NEXT: retq +; +; AVX-LABEL: not_signbit_mask_v2i64: +; AVX: # %bb.0: +; AVX-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 +; AVX-NEXT: vpcmpgtq %xmm2, %xmm0, %xmm0 +; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0 +; AVX-NEXT: retq +; +; AVX512-LABEL: not_signbit_mask_v2i64: +; AVX512: # %bb.0: +; AVX512-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 +; AVX512-NEXT: vpcmpgtq %xmm2, %xmm0, %k1 +; AVX512-NEXT: vmovdqa64 %xmm1, %xmm0 {%k1} {z} +; AVX512-NEXT: retq + %cond = icmp sgt <2 x i64> %a, + %r = select <2 x i1> %cond, <2 x i64> %b, <2 x i64> zeroinitializer + ret <2 x i64> %r +} + +define <32 x i8> @not_signbit_mask_v32i8(<32 x i8> %a, <32 x i8> %b) { +; SSE-LABEL: not_signbit_mask_v32i8: +; SSE: # %bb.0: +; SSE-NEXT: pcmpeqd %xmm4, %xmm4 +; SSE-NEXT: pcmpgtb %xmm4, %xmm1 +; SSE-NEXT: pcmpgtb %xmm4, %xmm0 +; SSE-NEXT: pand %xmm2, %xmm0 +; SSE-NEXT: pand %xmm3, %xmm1 +; SSE-NEXT: retq +; +; AVX1-LABEL: not_signbit_mask_v32i8: +; AVX1: # %bb.0: +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 +; AVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3 +; AVX1-NEXT: vpcmpgtb %xmm3, %xmm2, %xmm2 +; AVX1-NEXT: vpcmpgtb %xmm3, %xmm0, %xmm0 +; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 +; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0 +; AVX1-NEXT: retq +; +; AVX2-LABEL: not_signbit_mask_v32i8: +; AVX2: # %bb.0: +; AVX2-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2 +; AVX2-NEXT: vpcmpgtb %ymm2, %ymm0, %ymm0 +; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: retq +; +; AVX512F-LABEL: not_signbit_mask_v32i8: +; AVX512F: # %bb.0: +; AVX512F-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2 +; AVX512F-NEXT: vpcmpgtb %ymm2, %ymm0, %ymm0 +; AVX512F-NEXT: vpand %ymm1, %ymm0, %ymm0 +; AVX512F-NEXT: retq +; +; AVX512DQBW-LABEL: not_signbit_mask_v32i8: +; AVX512DQBW: # %bb.0: +; AVX512DQBW-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2 +; AVX512DQBW-NEXT: vpcmpgtb %ymm2, %ymm0, %k1 +; AVX512DQBW-NEXT: vmovdqu8 %ymm1, %ymm0 {%k1} {z} +; AVX512DQBW-NEXT: retq + %cond = icmp sgt <32 x i8> %a, + %r = select <32 x i1> %cond, <32 x i8> %b, <32 x i8> zeroinitializer + ret <32 x i8> %r +} + +define <16 x i16> @not_signbit_mask_v16i16(<16 x i16> %a, <16 x i16> %b) { +; SSE-LABEL: not_signbit_mask_v16i16: +; SSE: # %bb.0: +; SSE-NEXT: pcmpeqd %xmm4, %xmm4 +; SSE-NEXT: pcmpgtw %xmm4, %xmm1 +; SSE-NEXT: pcmpgtw %xmm4, %xmm0 +; SSE-NEXT: pand %xmm2, %xmm0 +; SSE-NEXT: pand %xmm3, %xmm1 +; SSE-NEXT: retq +; +; AVX1-LABEL: not_signbit_mask_v16i16: +; AVX1: # %bb.0: +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 +; AVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3 +; AVX1-NEXT: vpcmpgtw %xmm3, %xmm2, %xmm2 +; AVX1-NEXT: vpcmpgtw %xmm3, %xmm0, %xmm0 +; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 +; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0 +; AVX1-NEXT: retq +; +; AVX2-LABEL: not_signbit_mask_v16i16: +; AVX2: # %bb.0: +; AVX2-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2 +; AVX2-NEXT: vpcmpgtw %ymm2, %ymm0, %ymm0 +; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: retq +; +; AVX512F-LABEL: not_signbit_mask_v16i16: +; AVX512F: # %bb.0: +; AVX512F-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2 +; AVX512F-NEXT: vpcmpgtw %ymm2, %ymm0, %ymm0 +; AVX512F-NEXT: vpand %ymm1, %ymm0, %ymm0 +; AVX512F-NEXT: retq +; +; AVX512DQBW-LABEL: not_signbit_mask_v16i16: +; AVX512DQBW: # %bb.0: +; AVX512DQBW-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2 +; AVX512DQBW-NEXT: vpcmpgtw %ymm2, %ymm0, %k1 +; AVX512DQBW-NEXT: vmovdqu16 %ymm1, %ymm0 {%k1} {z} +; AVX512DQBW-NEXT: retq + %cond = icmp sgt <16 x i16> %a, + %r = select <16 x i1> %cond, <16 x i16> %b, <16 x i16> zeroinitializer + ret <16 x i16> %r +} + +define <8 x i32> @not_signbit_mask_v8i32(<8 x i32> %a, <8 x i32> %b) { +; SSE-LABEL: not_signbit_mask_v8i32: +; SSE: # %bb.0: +; SSE-NEXT: pcmpeqd %xmm4, %xmm4 +; SSE-NEXT: pcmpgtd %xmm4, %xmm1 +; SSE-NEXT: pcmpgtd %xmm4, %xmm0 +; SSE-NEXT: pand %xmm2, %xmm0 +; SSE-NEXT: pand %xmm3, %xmm1 +; SSE-NEXT: retq +; +; AVX1-LABEL: not_signbit_mask_v8i32: +; AVX1: # %bb.0: +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 +; AVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3 +; AVX1-NEXT: vpcmpgtd %xmm3, %xmm2, %xmm2 +; AVX1-NEXT: vpcmpgtd %xmm3, %xmm0, %xmm0 +; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 +; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0 +; AVX1-NEXT: retq +; +; AVX2-LABEL: not_signbit_mask_v8i32: +; AVX2: # %bb.0: +; AVX2-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2 +; AVX2-NEXT: vpcmpgtd %ymm2, %ymm0, %ymm0 +; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: retq +; +; AVX512-LABEL: not_signbit_mask_v8i32: +; AVX512: # %bb.0: +; AVX512-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2 +; AVX512-NEXT: vpcmpgtd %ymm2, %ymm0, %k1 +; AVX512-NEXT: vmovdqa32 %ymm1, %ymm0 {%k1} {z} +; AVX512-NEXT: retq + %cond = icmp sgt <8 x i32> %a, + %r = select <8 x i1> %cond, <8 x i32> %b, <8 x i32> zeroinitializer + ret <8 x i32> %r +} + +define <4 x i64> @not_signbit_mask_v4i64(<4 x i64> %a, <4 x i64> %b) { +; SSE2-LABEL: not_signbit_mask_v4i64: +; SSE2: # %bb.0: +; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] +; SSE2-NEXT: pcmpeqd %xmm4, %xmm4 +; SSE2-NEXT: pcmpgtd %xmm4, %xmm1 +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] +; SSE2-NEXT: pcmpgtd %xmm4, %xmm0 +; SSE2-NEXT: pand %xmm2, %xmm0 +; SSE2-NEXT: pand %xmm3, %xmm1 +; SSE2-NEXT: retq +; +; SSE42-LABEL: not_signbit_mask_v4i64: +; SSE42: # %bb.0: +; SSE42-NEXT: pcmpeqd %xmm4, %xmm4 +; SSE42-NEXT: pcmpgtq %xmm4, %xmm1 +; SSE42-NEXT: pcmpgtq %xmm4, %xmm0 +; SSE42-NEXT: pand %xmm2, %xmm0 +; SSE42-NEXT: pand %xmm3, %xmm1 +; SSE42-NEXT: retq +; +; AVX1-LABEL: not_signbit_mask_v4i64: +; AVX1: # %bb.0: +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 +; AVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3 +; AVX1-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2 +; AVX1-NEXT: vpcmpgtq %xmm3, %xmm0, %xmm0 +; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 +; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0 +; AVX1-NEXT: retq +; +; AVX2-LABEL: not_signbit_mask_v4i64: +; AVX2: # %bb.0: +; AVX2-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2 +; AVX2-NEXT: vpcmpgtq %ymm2, %ymm0, %ymm0 +; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: retq +; +; AVX512-LABEL: not_signbit_mask_v4i64: +; AVX512: # %bb.0: +; AVX512-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2 +; AVX512-NEXT: vpcmpgtq %ymm2, %ymm0, %k1 +; AVX512-NEXT: vmovdqa64 %ymm1, %ymm0 {%k1} {z} +; AVX512-NEXT: retq + %cond = icmp sgt <4 x i64> %a, + %r = select <4 x i1> %cond, <4 x i64> %b, <4 x i64> zeroinitializer + ret <4 x i64> %r +} diff --git a/llvm/test/CodeGen/X86/win64-eh-empty-block-2.mir b/llvm/test/CodeGen/X86/win64-eh-empty-block-2.mir index d7e75db5ebbce..c08dc76aef99a 100644 --- a/llvm/test/CodeGen/X86/win64-eh-empty-block-2.mir +++ b/llvm/test/CodeGen/X86/win64-eh-empty-block-2.mir @@ -173,7 +173,7 @@ body: | SEH_Epilogue $rsp = frame-destroy ADD64ri8 $rsp, 48, implicit-def dead $eflags $rbp = frame-destroy POP64r implicit-def $rsp, implicit $rsp - RETQ $eax + RET64 $eax bb.1.if.then: successors: %bb.2(0x7ffff800), %bb.7(0x00000800) diff --git a/llvm/test/CodeGen/X86/x86-interleaved-access.ll b/llvm/test/CodeGen/X86/x86-interleaved-access.ll index 258b54203438b..127972d9ce710 100644 --- a/llvm/test/CodeGen/X86/x86-interleaved-access.ll +++ b/llvm/test/CodeGen/X86/x86-interleaved-access.ll @@ -4,22 +4,37 @@ ; RUN: llc < %s -mtriple=x86_64-pc-linux -mattr=+avx512f -mattr=+avx512bw | FileCheck %s --check-prefixes=AVX,AVX2OR512,AVX512 define <4 x double> @load_factorf64_4(<16 x double>* %ptr) { -; AVX-LABEL: load_factorf64_4: -; AVX: # %bb.0: -; AVX-NEXT: vmovupd (%rdi), %ymm0 -; AVX-NEXT: vmovupd 32(%rdi), %ymm1 -; AVX-NEXT: vmovupd 64(%rdi), %ymm2 -; AVX-NEXT: vmovupd 96(%rdi), %ymm3 -; AVX-NEXT: vperm2f128 {{.*#+}} ymm4 = ymm0[0,1],ymm2[0,1] -; AVX-NEXT: vperm2f128 {{.*#+}} ymm5 = ymm1[0,1],ymm3[0,1] -; AVX-NEXT: vhaddpd %ymm5, %ymm4, %ymm4 -; AVX-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm2[2,3] -; AVX-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm1[2,3],ymm3[2,3] -; AVX-NEXT: vunpcklpd {{.*#+}} ymm2 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] -; AVX-NEXT: vaddpd %ymm2, %ymm4, %ymm2 -; AVX-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] -; AVX-NEXT: vaddpd %ymm0, %ymm2, %ymm0 -; AVX-NEXT: retq +; AVX1-LABEL: load_factorf64_4: +; AVX1: # %bb.0: +; AVX1-NEXT: vmovupd (%rdi), %ymm0 +; AVX1-NEXT: vmovupd 32(%rdi), %ymm1 +; AVX1-NEXT: vinsertf128 $1, 64(%rdi), %ymm0, %ymm2 +; AVX1-NEXT: vinsertf128 $1, 96(%rdi), %ymm1, %ymm3 +; AVX1-NEXT: vhaddpd %ymm3, %ymm2, %ymm2 +; AVX1-NEXT: vperm2f128 $49, 64(%rdi), %ymm0, %ymm0 # ymm0 = ymm0[2,3],mem[2,3] +; AVX1-NEXT: vperm2f128 $49, 96(%rdi), %ymm1, %ymm1 # ymm1 = ymm1[2,3],mem[2,3] +; AVX1-NEXT: vunpcklpd {{.*#+}} ymm3 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] +; AVX1-NEXT: vaddpd %ymm3, %ymm2, %ymm2 +; AVX1-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] +; AVX1-NEXT: vaddpd %ymm0, %ymm2, %ymm0 +; AVX1-NEXT: retq +; +; AVX2OR512-LABEL: load_factorf64_4: +; AVX2OR512: # %bb.0: +; AVX2OR512-NEXT: vmovupd (%rdi), %ymm0 +; AVX2OR512-NEXT: vmovupd 32(%rdi), %ymm1 +; AVX2OR512-NEXT: vmovupd 64(%rdi), %ymm2 +; AVX2OR512-NEXT: vmovupd 96(%rdi), %ymm3 +; AVX2OR512-NEXT: vperm2f128 {{.*#+}} ymm4 = ymm0[0,1],ymm2[0,1] +; AVX2OR512-NEXT: vperm2f128 {{.*#+}} ymm5 = ymm1[0,1],ymm3[0,1] +; AVX2OR512-NEXT: vhaddpd %ymm5, %ymm4, %ymm4 +; AVX2OR512-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm2[2,3] +; AVX2OR512-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm1[2,3],ymm3[2,3] +; AVX2OR512-NEXT: vunpcklpd {{.*#+}} ymm2 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] +; AVX2OR512-NEXT: vaddpd %ymm2, %ymm4, %ymm2 +; AVX2OR512-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] +; AVX2OR512-NEXT: vaddpd %ymm0, %ymm2, %ymm0 +; AVX2OR512-NEXT: retq %wide.vec = load <16 x double>, <16 x double>* %ptr, align 16 %strided.v0 = shufflevector <16 x double> %wide.vec, <16 x double> undef, <4 x i32> %strided.v1 = shufflevector <16 x double> %wide.vec, <16 x double> undef, <4 x i32> @@ -32,20 +47,33 @@ define <4 x double> @load_factorf64_4(<16 x double>* %ptr) { } define <4 x double> @load_factorf64_2(<16 x double>* %ptr) { -; AVX-LABEL: load_factorf64_2: -; AVX: # %bb.0: -; AVX-NEXT: vmovupd (%rdi), %ymm0 -; AVX-NEXT: vmovupd 32(%rdi), %ymm1 -; AVX-NEXT: vmovupd 64(%rdi), %ymm2 -; AVX-NEXT: vmovupd 96(%rdi), %ymm3 -; AVX-NEXT: vperm2f128 {{.*#+}} ymm4 = ymm0[0,1],ymm2[0,1] -; AVX-NEXT: vperm2f128 {{.*#+}} ymm5 = ymm1[0,1],ymm3[0,1] -; AVX-NEXT: vunpcklpd {{.*#+}} ymm4 = ymm4[0],ymm5[0],ymm4[2],ymm5[2] -; AVX-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm2[2,3] -; AVX-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm1[2,3],ymm3[2,3] -; AVX-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] -; AVX-NEXT: vmulpd %ymm0, %ymm4, %ymm0 -; AVX-NEXT: retq +; AVX1-LABEL: load_factorf64_2: +; AVX1: # %bb.0: +; AVX1-NEXT: vmovupd (%rdi), %ymm0 +; AVX1-NEXT: vmovupd 32(%rdi), %ymm1 +; AVX1-NEXT: vinsertf128 $1, 64(%rdi), %ymm0, %ymm2 +; AVX1-NEXT: vinsertf128 $1, 96(%rdi), %ymm1, %ymm3 +; AVX1-NEXT: vunpcklpd {{.*#+}} ymm2 = ymm2[0],ymm3[0],ymm2[2],ymm3[2] +; AVX1-NEXT: vperm2f128 $49, 64(%rdi), %ymm0, %ymm0 # ymm0 = ymm0[2,3],mem[2,3] +; AVX1-NEXT: vperm2f128 $49, 96(%rdi), %ymm1, %ymm1 # ymm1 = ymm1[2,3],mem[2,3] +; AVX1-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] +; AVX1-NEXT: vmulpd %ymm0, %ymm2, %ymm0 +; AVX1-NEXT: retq +; +; AVX2OR512-LABEL: load_factorf64_2: +; AVX2OR512: # %bb.0: +; AVX2OR512-NEXT: vmovupd (%rdi), %ymm0 +; AVX2OR512-NEXT: vmovupd 32(%rdi), %ymm1 +; AVX2OR512-NEXT: vmovupd 64(%rdi), %ymm2 +; AVX2OR512-NEXT: vmovupd 96(%rdi), %ymm3 +; AVX2OR512-NEXT: vperm2f128 {{.*#+}} ymm4 = ymm0[0,1],ymm2[0,1] +; AVX2OR512-NEXT: vperm2f128 {{.*#+}} ymm5 = ymm1[0,1],ymm3[0,1] +; AVX2OR512-NEXT: vunpcklpd {{.*#+}} ymm4 = ymm4[0],ymm5[0],ymm4[2],ymm5[2] +; AVX2OR512-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm2[2,3] +; AVX2OR512-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm1[2,3],ymm3[2,3] +; AVX2OR512-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] +; AVX2OR512-NEXT: vmulpd %ymm0, %ymm4, %ymm0 +; AVX2OR512-NEXT: retq %wide.vec = load <16 x double>, <16 x double>* %ptr, align 16 %strided.v0 = shufflevector <16 x double> %wide.vec, <16 x double> undef, <4 x i32> %strided.v3 = shufflevector <16 x double> %wide.vec, <16 x double> undef, <4 x i32> @@ -54,15 +82,25 @@ define <4 x double> @load_factorf64_2(<16 x double>* %ptr) { } define <4 x double> @load_factorf64_1(<16 x double>* %ptr) { -; AVX-LABEL: load_factorf64_1: -; AVX: # %bb.0: -; AVX-NEXT: vmovupd (%rdi), %ymm0 -; AVX-NEXT: vmovupd 32(%rdi), %ymm1 -; AVX-NEXT: vperm2f128 $32, 64(%rdi), %ymm0, %ymm0 # ymm0 = ymm0[0,1],mem[0,1] -; AVX-NEXT: vperm2f128 $32, 96(%rdi), %ymm1, %ymm1 # ymm1 = ymm1[0,1],mem[0,1] -; AVX-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] -; AVX-NEXT: vmulpd %ymm0, %ymm0, %ymm0 -; AVX-NEXT: retq +; AVX1-LABEL: load_factorf64_1: +; AVX1: # %bb.0: +; AVX1-NEXT: vmovups (%rdi), %ymm0 +; AVX1-NEXT: vmovups 32(%rdi), %ymm1 +; AVX1-NEXT: vinsertf128 $1, 64(%rdi), %ymm0, %ymm0 +; AVX1-NEXT: vinsertf128 $1, 96(%rdi), %ymm1, %ymm1 +; AVX1-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] +; AVX1-NEXT: vmulpd %ymm0, %ymm0, %ymm0 +; AVX1-NEXT: retq +; +; AVX2OR512-LABEL: load_factorf64_1: +; AVX2OR512: # %bb.0: +; AVX2OR512-NEXT: vmovupd (%rdi), %ymm0 +; AVX2OR512-NEXT: vmovupd 32(%rdi), %ymm1 +; AVX2OR512-NEXT: vperm2f128 $32, 64(%rdi), %ymm0, %ymm0 # ymm0 = ymm0[0,1],mem[0,1] +; AVX2OR512-NEXT: vperm2f128 $32, 96(%rdi), %ymm1, %ymm1 # ymm1 = ymm1[0,1],mem[0,1] +; AVX2OR512-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] +; AVX2OR512-NEXT: vmulpd %ymm0, %ymm0, %ymm0 +; AVX2OR512-NEXT: retq %wide.vec = load <16 x double>, <16 x double>* %ptr, align 16 %strided.v0 = shufflevector <16 x double> %wide.vec, <16 x double> undef, <4 x i32> %strided.v3 = shufflevector <16 x double> %wide.vec, <16 x double> undef, <4 x i32> @@ -75,26 +113,24 @@ define <4 x i64> @load_factori64_4(<16 x i64>* %ptr) { ; AVX1: # %bb.0: ; AVX1-NEXT: vmovups (%rdi), %ymm0 ; AVX1-NEXT: vmovups 32(%rdi), %ymm1 -; AVX1-NEXT: vmovups 64(%rdi), %ymm2 -; AVX1-NEXT: vmovups 96(%rdi), %ymm3 -; AVX1-NEXT: vperm2f128 {{.*#+}} ymm4 = ymm0[0,1],ymm2[0,1] -; AVX1-NEXT: vperm2f128 {{.*#+}} ymm5 = ymm1[0,1],ymm3[0,1] -; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm2[2,3] -; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm1[2,3],ymm3[2,3] -; AVX1-NEXT: vunpcklpd {{.*#+}} ymm2 = ymm4[0],ymm5[0],ymm4[2],ymm5[2] -; AVX1-NEXT: vunpcklpd {{.*#+}} ymm3 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] -; AVX1-NEXT: vunpckhpd {{.*#+}} ymm4 = ymm4[1],ymm5[1],ymm4[3],ymm5[3] +; AVX1-NEXT: vinsertf128 $1, 64(%rdi), %ymm0, %ymm2 +; AVX1-NEXT: vinsertf128 $1, 96(%rdi), %ymm1, %ymm3 +; AVX1-NEXT: vperm2f128 $49, 64(%rdi), %ymm0, %ymm0 # ymm0 = ymm0[2,3],mem[2,3] +; AVX1-NEXT: vperm2f128 $49, 96(%rdi), %ymm1, %ymm1 # ymm1 = ymm1[2,3],mem[2,3] +; AVX1-NEXT: vunpcklpd {{.*#+}} ymm4 = ymm2[0],ymm3[0],ymm2[2],ymm3[2] +; AVX1-NEXT: vunpcklpd {{.*#+}} ymm5 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] +; AVX1-NEXT: vunpckhpd {{.*#+}} ymm2 = ymm2[1],ymm3[1],ymm2[3],ymm3[3] ; AVX1-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] -; AVX1-NEXT: vextractf128 $1, %ymm4, %xmm1 -; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm5 -; AVX1-NEXT: vpaddq %xmm3, %xmm4, %xmm4 -; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm3 -; AVX1-NEXT: vpaddq %xmm3, %xmm1, %xmm1 -; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 -; AVX1-NEXT: vpaddq %xmm3, %xmm1, %xmm1 -; AVX1-NEXT: vpaddq %xmm1, %xmm5, %xmm1 -; AVX1-NEXT: vpaddq %xmm0, %xmm4, %xmm0 +; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm1 +; AVX1-NEXT: vextractf128 $1, %ymm4, %xmm3 +; AVX1-NEXT: vpaddq %xmm5, %xmm2, %xmm2 +; AVX1-NEXT: vextractf128 $1, %ymm5, %xmm5 +; AVX1-NEXT: vpaddq %xmm5, %xmm1, %xmm1 +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm5 +; AVX1-NEXT: vpaddq %xmm5, %xmm1, %xmm1 +; AVX1-NEXT: vpaddq %xmm1, %xmm3, %xmm1 ; AVX1-NEXT: vpaddq %xmm0, %xmm2, %xmm0 +; AVX1-NEXT: vpaddq %xmm0, %xmm4, %xmm0 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX1-NEXT: retq ; diff --git a/llvm/test/CodeGen/X86/xray-multiplerets-in-blocks.mir b/llvm/test/CodeGen/X86/xray-multiplerets-in-blocks.mir index d8f08104a9e7f..69e8c6bfda4be 100644 --- a/llvm/test/CodeGen/X86/xray-multiplerets-in-blocks.mir +++ b/llvm/test/CodeGen/X86/xray-multiplerets-in-blocks.mir @@ -21,8 +21,8 @@ body: | bb.0: liveins: $edi ; CHECK: PATCHABLE_FUNCTION_ENTER - RETQ + RET64 ; CHECK-NEXT: PATCHABLE_RET - RETQ + RET64 ; CHECK-NEXT: PATCHABLE_RET ... diff --git a/llvm/test/DebugInfo/COFF/fortran-basic.ll b/llvm/test/DebugInfo/COFF/fortran-basic.ll new file mode 100644 index 0000000000000..96f3af3ebda96 --- /dev/null +++ b/llvm/test/DebugInfo/COFF/fortran-basic.ll @@ -0,0 +1,143 @@ +; RUN: llc < %s -filetype=obj | llvm-readobj - --codeview | FileCheck %s +; +; The IR in this test derives from the following Fortran program: +; program array +; integer array1, array2 +; dimension array1(10) +; dimension array2(3:10) +; double precision d +; logical l +; character*6 c +; +; common /com/ d, l, c +; +; array1(1) = 1 +; array2(3) = 2 +; d = 8.0 +; l = .TRUE. +; c = 'oooooo' +; end +; +; CHECK: Array ([[array2_t:.*]]) { +; CHECK-NEXT: TypeLeafKind: LF_ARRAY +; CHECK-NEXT: ElementType: int +; CHECK-NEXT: IndexType: unsigned __int64 +; CHECK-NEXT: SizeOf: 32 +; +; CHECK: Array ([[array1_t:.*]]) { +; CHECK-NEXT: TypeLeafKind: LF_ARRAY +; CHECK-NEXT: ElementType: int +; CHECK-NEXT: IndexType: unsigned __int64 +; CHECK-NEXT: SizeOf: 40 +; +; CHECK: Array ([[char_6_t:.*]]) { +; CHECK-NEXT: TypeLeafKind: LF_ARRAY +; CHECK-NEXT: ElementType: char +; CHECK-NEXT: IndexType: unsigned __int64 +; CHECK-NEXT: SizeOf: 6 +; CHECK-NEXT: CHARACTER_0 +; +; CHECK: DataOffset: ARRAY$ARRAY2+0x0 +; CHECK-NEXT: Type: [[array2_t]] +; CHECK-NEXT: DisplayName: ARRAY2 +; CHECK-NEXT: LinkageName: ARRAY$ARRAY2 +; +; CHECK: DataOffset: ARRAY$ARRAY1+0x0 +; CHECK-NEXT: Type: [[array1_t]] +; CHECK-NEXT: DisplayName: ARRAY1 +; CHECK-NEXT: LinkageName: ARRAY$ARRAY1 +; +; CHECK: DataOffset: COM+0x0 +; CHECK-NEXT: Type: double +; CHECK-NEXT: DisplayName: D +; CHECK-NEXT: LinkageName: COM +; +; CHECK: DataOffset: COM+0x8 +; CHECK-NEXT: Type: __bool32 +; CHECK-NEXT: DisplayName: L +; CHECK-NEXT: LinkageName: COM +; +; CHECK: DataOffset: COM+0xC +; CHECK-NEXT: Type: CHARACTER_0 ([[char_6_t]]) +; CHECK-NEXT: DisplayName: C +; CHECK-NEXT: LinkageName: COM + +; ModuleID = 'fortran-basic.f' +source_filename = "fortran-basic.f" +target datalayout = "e-m:w-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-pc-windows-msvc" + +@strlit = internal unnamed_addr constant [6 x i8] c"oooooo" +@COM = common unnamed_addr global [18 x i8] zeroinitializer, align 32, !dbg !0, !dbg !9, !dbg !12 +@"ARRAY$ARRAY2" = internal global [8 x i32] zeroinitializer, align 16, !dbg !15 +@"ARRAY$ARRAY1" = internal global [10 x i32] zeroinitializer, align 16, !dbg !21 +@0 = internal unnamed_addr constant i32 2 + +; Function Attrs: noinline nounwind optnone uwtable +define void @MAIN__() #0 !dbg !3 { +alloca_0: + %"$io_ctx" = alloca [6 x i64], align 8 + %strlit_fetch.1 = load [6 x i8], [6 x i8]* @strlit, align 1, !dbg !39 + %func_result = call i32 @for_set_reentrancy(i32* @0), !dbg !39 + store i32 1, i32* getelementptr inbounds ([10 x i32], [10 x i32]* @"ARRAY$ARRAY1", i32 0, i32 0), align 1, !dbg !40 + store i32 2, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @"ARRAY$ARRAY2", i32 0, i32 0), align 1, !dbg !41 + store double 8.000000e+00, double* bitcast ([18 x i8]* @COM to double*), align 1, !dbg !42 + store i32 -1, i32* bitcast (i8* getelementptr inbounds ([18 x i8], [18 x i8]* @COM, i32 0, i64 8) to i32*), align 1, !dbg !43 + call void @llvm.for.cpystr.i64.i64.i64(i8* getelementptr inbounds ([18 x i8], [18 x i8]* @COM, i32 0, i64 12), i64 6, i8* getelementptr inbounds ([6 x i8], [6 x i8]* @strlit, i32 0, i32 0), i64 3, i64 0, i1 false), !dbg !44 + ret void, !dbg !45 +} + +declare i32 @for_set_reentrancy(i32* nocapture readonly) + +; Function Attrs: nounwind readnone speculatable +declare i32* @llvm.intel.subscript.p0i32.i64.i64.p0i32.i64(i8, i64, i64, i32*, i64) #1 + +; Function Attrs: argmemonly nofree nosync nounwind willreturn +declare void @llvm.for.cpystr.i64.i64.i64(i8* noalias nocapture writeonly, i64, i8* noalias nocapture readonly, i64, i64, i1 immarg) #2 + +attributes #0 = { noinline nounwind optnone uwtable "frame-pointer"="none" "intel-lang"="fortran" "min-legal-vector-width"="0" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" } +attributes #1 = { nounwind readnone speculatable } +attributes #2 = { argmemonly nofree nosync nounwind willreturn } + +!llvm.module.flags = !{!28, !29, !30} +!llvm.dbg.cu = !{!7} +!omp_offload.info = !{} + +!0 = !DIGlobalVariableExpression(var: !1, expr: !DIExpression()) +!1 = distinct !DIGlobalVariable(name: "D", linkageName: "COM", scope: !2, file: !4, line: 5, type: !27, isLocal: false, isDefinition: true) +!2 = !DICommonBlock(scope: !3, declaration: null, name: "COM", file: !4, line: 8) +!3 = distinct !DISubprogram(name: "ARRAY", linkageName: "MAIN__", scope: !4, file: !4, line: 1, type: !5, scopeLine: 1, spFlags: DISPFlagDefinition | DISPFlagMainSubprogram, unit: !7, retainedNodes: !26) +!4 = !DIFile(filename: "fortran-basic.f", directory: "d:\\iusers\\cchen15\\examples\\tests\\vsdF-nightly\\vsdF\\opt_none_debug") +!5 = !DISubroutineType(types: !6) +!6 = !{null} +!7 = distinct !DICompileUnit(language: DW_LANG_Fortran95, file: !4, producer: "Intel(R) Fortran 22.0-1034", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, globals: !8, splitDebugInlining: false, nameTableKind: None) +!8 = !{!0, !9, !12, !15, !21} +!9 = !DIGlobalVariableExpression(var: !10, expr: !DIExpression(DW_OP_plus_uconst, 8)) +!10 = distinct !DIGlobalVariable(name: "L", linkageName: "COM", scope: !2, file: !4, line: 6, type: !11, isLocal: false, isDefinition: true) +!11 = !DIBasicType(name: "LOGICAL*4", size: 32, encoding: DW_ATE_boolean) +!12 = !DIGlobalVariableExpression(var: !13, expr: !DIExpression(DW_OP_plus_uconst, 12)) +!13 = distinct !DIGlobalVariable(name: "C", linkageName: "COM", scope: !2, file: !4, line: 7, type: !14, isLocal: false, isDefinition: true) +!14 = !DIStringType(name: "CHARACTER_0", size: 48) +!15 = !DIGlobalVariableExpression(var: !16, expr: !DIExpression()) +!16 = distinct !DIGlobalVariable(name: "ARRAY2", linkageName: "ARRAY$ARRAY2", scope: !3, file: !4, line: 2, type: !17, isLocal: true, isDefinition: true) +!17 = !DICompositeType(tag: DW_TAG_array_type, baseType: !18, elements: !19) +!18 = !DIBasicType(name: "INTEGER*4", size: 32, encoding: DW_ATE_signed) +!19 = !{!20} +!20 = !DISubrange(lowerBound: 3, upperBound: 10) +!21 = !DIGlobalVariableExpression(var: !22, expr: !DIExpression()) +!22 = distinct !DIGlobalVariable(name: "ARRAY1", linkageName: "ARRAY$ARRAY1", scope: !3, file: !4, line: 2, type: !23, isLocal: true, isDefinition: true) +!23 = !DICompositeType(tag: DW_TAG_array_type, baseType: !18, elements: !24) +!24 = !{!25} +!25 = !DISubrange(count: 10, lowerBound: 1) +!26 = !{} +!27 = !DIBasicType(name: "REAL*8", size: 64, encoding: DW_ATE_float) +!28 = !{i32 7, !"PIC Level", i32 2} +!29 = !{i32 2, !"Debug Info Version", i32 3} +!30 = !{i32 2, !"CodeView", i32 1} +!39 = !DILocation(line: 1, column: 10, scope: !3) +!40 = !DILocation(line: 9, column: 9, scope: !3) +!41 = !DILocation(line: 10, column: 9, scope: !3) +!42 = !DILocation(line: 11, column: 9, scope: !3) +!43 = !DILocation(line: 12, column: 9, scope: !3) +!44 = !DILocation(line: 13, column: 9, scope: !3) +!45 = !DILocation(line: 14, column: 2, scope: !3) diff --git a/llvm/test/DebugInfo/MIR/InstrRef/dbg-phi-subregister-location.mir b/llvm/test/DebugInfo/MIR/InstrRef/dbg-phi-subregister-location.mir index 1730bca2ac40d..0e105c132001a 100644 --- a/llvm/test/DebugInfo/MIR/InstrRef/dbg-phi-subregister-location.mir +++ b/llvm/test/DebugInfo/MIR/InstrRef/dbg-phi-subregister-location.mir @@ -66,6 +66,6 @@ body: | DBG_INSTR_REF 2, 0, !12, !DIExpression(), debug-location !13 renamable $rax = MOV64rm $rip, 1, $noreg, target-flags(x86-gotpcrel) @someglobal, $noreg, debug-location !13 :: (load (s64) from got) MOV8mr killed renamable $rax, 1, $noreg, 0, $noreg, renamable $dil, debug-location !13 :: (store (s8) into @someglobal) - RETQ debug-location !13 + RET64 debug-location !13 ... diff --git a/llvm/test/DebugInfo/MIR/InstrRef/dbg-phis-in-ldv.mir b/llvm/test/DebugInfo/MIR/InstrRef/dbg-phis-in-ldv.mir index 93a82ee8f1d06..0102f648c27a9 100644 --- a/llvm/test/DebugInfo/MIR/InstrRef/dbg-phis-in-ldv.mir +++ b/llvm/test/DebugInfo/MIR/InstrRef/dbg-phis-in-ldv.mir @@ -157,6 +157,6 @@ body: | CFI_INSTRUCTION def_cfa_offset 16, debug-location !26 $r14 = frame-destroy POP64r implicit-def $rsp, implicit $rsp, debug-location !26 CFI_INSTRUCTION def_cfa_offset 8, debug-location !26 - RETQ implicit $eax, debug-location !26 + RET64 implicit $eax, debug-location !26 ... diff --git a/llvm/test/DebugInfo/MIR/InstrRef/dbg-phis-merging-in-ldv.mir b/llvm/test/DebugInfo/MIR/InstrRef/dbg-phis-merging-in-ldv.mir index b46c4284c31ac..586b4b1824331 100644 --- a/llvm/test/DebugInfo/MIR/InstrRef/dbg-phis-merging-in-ldv.mir +++ b/llvm/test/DebugInfo/MIR/InstrRef/dbg-phis-merging-in-ldv.mir @@ -194,6 +194,6 @@ body: | CFI_INSTRUCTION def_cfa_offset 16, debug-location !31 $r14 = frame-destroy POP64r implicit-def $rsp, implicit $rsp, debug-location !31 CFI_INSTRUCTION def_cfa_offset 8, debug-location !31 - RETQ implicit $eax, debug-location !31 + RET64 implicit $eax, debug-location !31 ... diff --git a/llvm/test/DebugInfo/MIR/InstrRef/dbg-phis-with-loops.mir b/llvm/test/DebugInfo/MIR/InstrRef/dbg-phis-with-loops.mir index cba605089aa93..1f9843b552517 100644 --- a/llvm/test/DebugInfo/MIR/InstrRef/dbg-phis-with-loops.mir +++ b/llvm/test/DebugInfo/MIR/InstrRef/dbg-phis-with-loops.mir @@ -200,6 +200,6 @@ body: | CFI_INSTRUCTION def_cfa_offset 16, debug-location !31 $r14 = frame-destroy POP64r implicit-def $rsp, implicit $rsp, debug-location !31 CFI_INSTRUCTION def_cfa_offset 8, debug-location !31 - RETQ implicit $eax, debug-location !31 + RET64 implicit $eax, debug-location !31 ... diff --git a/llvm/test/DebugInfo/MIR/InstrRef/follow-spill-of-live-value.mir b/llvm/test/DebugInfo/MIR/InstrRef/follow-spill-of-live-value.mir index 849395f7e203b..c66c9d05c0637 100644 --- a/llvm/test/DebugInfo/MIR/InstrRef/follow-spill-of-live-value.mir +++ b/llvm/test/DebugInfo/MIR/InstrRef/follow-spill-of-live-value.mir @@ -328,6 +328,6 @@ body: | CFI_INSTRUCTION def_cfa_offset 16 $rbp = frame-destroy POP64r implicit-def $rsp, implicit $rsp CFI_INSTRUCTION def_cfa_offset 8 - RETQ + RET64 ... diff --git a/llvm/test/DebugInfo/MIR/InstrRef/ignore-dbg-value-list.mir b/llvm/test/DebugInfo/MIR/InstrRef/ignore-dbg-value-list.mir index ee3a0d8e3ac27..f9728d073c6d0 100644 --- a/llvm/test/DebugInfo/MIR/InstrRef/ignore-dbg-value-list.mir +++ b/llvm/test/DebugInfo/MIR/InstrRef/ignore-dbg-value-list.mir @@ -69,6 +69,6 @@ body: | ; This clobber of $rax might cause LDV to re-issue a DBG_VALUE stating the ; variable location as $rbx. However, the preceeding DBG_VALUE_LIST should ; terminate the earlier location. - RETQ implicit $rbx, debug-location !13 + RET64 implicit $rbx, debug-location !13 ... diff --git a/llvm/test/DebugInfo/MIR/InstrRef/instr-ref-roundtrip.mir b/llvm/test/DebugInfo/MIR/InstrRef/instr-ref-roundtrip.mir index 0d30b2b8785e7..b045cbc784fcd 100644 --- a/llvm/test/DebugInfo/MIR/InstrRef/instr-ref-roundtrip.mir +++ b/llvm/test/DebugInfo/MIR/InstrRef/instr-ref-roundtrip.mir @@ -14,5 +14,5 @@ body: | $rbp = MOV64rr $rdi, debug-instr-number 1 dead $rcx = MOV64ri 0 CMP64ri8 renamable $rax, 1, implicit-def $eflags - RETQ $rax + RET64 $rax ... diff --git a/llvm/test/DebugInfo/MIR/InstrRef/livedebugvalues_instrref_tolocs.mir b/llvm/test/DebugInfo/MIR/InstrRef/livedebugvalues_instrref_tolocs.mir index 13d9295ad656a..19353dae26080 100644 --- a/llvm/test/DebugInfo/MIR/InstrRef/livedebugvalues_instrref_tolocs.mir +++ b/llvm/test/DebugInfo/MIR/InstrRef/livedebugvalues_instrref_tolocs.mir @@ -154,5 +154,5 @@ body: | ; This is instruction 10 referred to in bb.10. However, as the variable ; location/value has been modified in the meantime, no DBG_VALUE should be ; generated here. - RETQ $eax, debug-location !17 + RET64 $eax, debug-location !17 ... diff --git a/llvm/test/DebugInfo/MIR/InstrRef/livedebugvalues_recover_clobbers.mir b/llvm/test/DebugInfo/MIR/InstrRef/livedebugvalues_recover_clobbers.mir index 3d45a548e26e9..c41610fdc2017 100644 --- a/llvm/test/DebugInfo/MIR/InstrRef/livedebugvalues_recover_clobbers.mir +++ b/llvm/test/DebugInfo/MIR/InstrRef/livedebugvalues_recover_clobbers.mir @@ -96,5 +96,5 @@ body: | ; CHECK-NEXT: CALL64pcrel32 ; CHECK-NEXT: DBG_VALUE $ebx - RETQ $eax, debug-location !17 + RET64 $eax, debug-location !17 ... diff --git a/llvm/test/DebugInfo/MIR/InstrRef/livedebugvalues_stackslot_subregs.mir b/llvm/test/DebugInfo/MIR/InstrRef/livedebugvalues_stackslot_subregs.mir index 9cf1e4cc1b45d..327fdeed7cb10 100644 --- a/llvm/test/DebugInfo/MIR/InstrRef/livedebugvalues_stackslot_subregs.mir +++ b/llvm/test/DebugInfo/MIR/InstrRef/livedebugvalues_stackslot_subregs.mir @@ -52,5 +52,5 @@ body: | DBG_INSTR_REF 1, 0, !11, !DIExpression(), debug-location !12 ; CHECK: DBG_INSTR_REF ; CHECK-NEXT: DBG_VALUE $esi - RETQ $rsi, debug-location !12 + RET64 $rsi, debug-location !12 ... diff --git a/llvm/test/DebugInfo/MIR/InstrRef/livedebugvalues_subreg_substitutions.mir b/llvm/test/DebugInfo/MIR/InstrRef/livedebugvalues_subreg_substitutions.mir index 26cdcf211a64e..6277b1b6dee23 100644 --- a/llvm/test/DebugInfo/MIR/InstrRef/livedebugvalues_subreg_substitutions.mir +++ b/llvm/test/DebugInfo/MIR/InstrRef/livedebugvalues_subreg_substitutions.mir @@ -105,5 +105,5 @@ body: | ; CHECK-NEXT: DBG_INSTR_REF 13, 0 ; CHECK-NEXT: DBG_VALUE $noreg $rax = MOV64rm $rsp, 1, $noreg, 8, $noreg :: (load 8 from %stack.0) - RETQ $rax, debug-location !12 + RET64 $rax, debug-location !12 ... diff --git a/llvm/test/DebugInfo/MIR/InstrRef/memory-operand-tracking.mir b/llvm/test/DebugInfo/MIR/InstrRef/memory-operand-tracking.mir index c7236f5e23c31..2f19ad527ab75 100644 --- a/llvm/test/DebugInfo/MIR/InstrRef/memory-operand-tracking.mir +++ b/llvm/test/DebugInfo/MIR/InstrRef/memory-operand-tracking.mir @@ -81,5 +81,5 @@ body: | ; CHECK-NEXT: DBG_VALUE $noreg $rax = MOV64rm $rsp, 1, $noreg, 8, $noreg :: (load 8 from %stack.0) - RETQ $rax, debug-location !12 + RET64 $rax, debug-location !12 ... diff --git a/llvm/test/DebugInfo/MIR/InstrRef/no-duplicates.mir b/llvm/test/DebugInfo/MIR/InstrRef/no-duplicates.mir index 79d7960dd3bf8..5ade1830995b2 100644 --- a/llvm/test/DebugInfo/MIR/InstrRef/no-duplicates.mir +++ b/llvm/test/DebugInfo/MIR/InstrRef/no-duplicates.mir @@ -36,5 +36,5 @@ body: | $rbp = MOV64rr $rdi, debug-instr-number 1, debug-location !12 dead $rcx = MOV64ri 0, debug-instr-number 1, debug-location !12 CMP64ri8 renamable $rax, 1, implicit-def $eflags - RETQ $rax + RET64 $rax ... diff --git a/llvm/test/DebugInfo/MIR/InstrRef/no-metainstrs.mir b/llvm/test/DebugInfo/MIR/InstrRef/no-metainstrs.mir index d943a8eabd06f..b1d5c03d68b8e 100644 --- a/llvm/test/DebugInfo/MIR/InstrRef/no-metainstrs.mir +++ b/llvm/test/DebugInfo/MIR/InstrRef/no-metainstrs.mir @@ -37,5 +37,5 @@ body: | $ebp = KILL killed $rbp, debug-instr-number 2, debug-location !12 dead $rcx = MOV64ri 0 CMP64ri8 renamable $rax, 1, implicit-def $eflags - RETQ $rax + RET64 $rax ... diff --git a/llvm/test/DebugInfo/MIR/InstrRef/phi-coalesce-subreg.mir b/llvm/test/DebugInfo/MIR/InstrRef/phi-coalesce-subreg.mir index b18f57f4d91ab..b6df94a0b8c2c 100644 --- a/llvm/test/DebugInfo/MIR/InstrRef/phi-coalesce-subreg.mir +++ b/llvm/test/DebugInfo/MIR/InstrRef/phi-coalesce-subreg.mir @@ -156,6 +156,6 @@ body: | ADJCALLSTACKUP64 0, 0, implicit-def $rsp, implicit-def $eflags, implicit-def $ssp, implicit $rsp, implicit $ssp, debug-location !13 %23:gr32 = MOVSX32rr16 %26, debug-location !13 $eax = COPY %23, debug-location !13 - RETQ implicit $eax, debug-location !13 + RET64 implicit $eax, debug-location !13 ... diff --git a/llvm/test/DebugInfo/MIR/InstrRef/phi-coalescing.mir b/llvm/test/DebugInfo/MIR/InstrRef/phi-coalescing.mir index c37804f6036c1..91f63937df1c9 100644 --- a/llvm/test/DebugInfo/MIR/InstrRef/phi-coalescing.mir +++ b/llvm/test/DebugInfo/MIR/InstrRef/phi-coalescing.mir @@ -155,6 +155,6 @@ body: | ADJCALLSTACKUP64 0, 0, implicit-def $rsp, implicit-def $eflags, implicit-def $ssp, implicit $rsp, implicit $ssp, debug-location !13 %13:gr32 = COPY %14.sub_32bit, debug-location !13 $eax = COPY %13, debug-location !13 - RETQ implicit $eax, debug-location !13 + RET64 implicit $eax, debug-location !13 ... diff --git a/llvm/test/DebugInfo/MIR/InstrRef/pick-vphi-in-shifting-loop.mir b/llvm/test/DebugInfo/MIR/InstrRef/pick-vphi-in-shifting-loop.mir index e1d00597057a1..d652da6088d5d 100644 --- a/llvm/test/DebugInfo/MIR/InstrRef/pick-vphi-in-shifting-loop.mir +++ b/llvm/test/DebugInfo/MIR/InstrRef/pick-vphi-in-shifting-loop.mir @@ -123,6 +123,6 @@ body: | JCC_1 %bb.5, 6, implicit $eflags, debug-location !22 bb.6: - RETQ debug-location !22 + RET64 debug-location !22 ... diff --git a/llvm/test/DebugInfo/MIR/InstrRef/restore-to-rsp-crash.mir b/llvm/test/DebugInfo/MIR/InstrRef/restore-to-rsp-crash.mir index cfe6c1757a0c5..fa5fccfddcd7e 100644 --- a/llvm/test/DebugInfo/MIR/InstrRef/restore-to-rsp-crash.mir +++ b/llvm/test/DebugInfo/MIR/InstrRef/restore-to-rsp-crash.mir @@ -61,6 +61,6 @@ body: | DBG_VALUE $rax, $noreg, !12, !DIExpression(), debug-location !13 MOV64mr $rsp, 1, $noreg, -8, $noreg, renamable $rax :: (store 8 into %stack.0) $rsp = MOV64rm $rsp, 1, $noreg, 0, $noreg, debug-location !13 :: (load 8 from %stack.0) - RETQ implicit $rbx, debug-location !13 + RET64 implicit $rbx, debug-location !13 ... diff --git a/llvm/test/DebugInfo/MIR/InstrRef/substitusions-roundtrip.mir b/llvm/test/DebugInfo/MIR/InstrRef/substitusions-roundtrip.mir index 0a8fada169cf8..a5c5018b8434f 100644 --- a/llvm/test/DebugInfo/MIR/InstrRef/substitusions-roundtrip.mir +++ b/llvm/test/DebugInfo/MIR/InstrRef/substitusions-roundtrip.mir @@ -22,5 +22,5 @@ body: | DBG_INSTR_REF 1, 0 dead $rcx = MOV64ri 0 CMP64ri8 renamable $rax, 1, implicit-def $eflags - RETQ $rax + RET64 $rax ... diff --git a/llvm/test/DebugInfo/MIR/InstrRef/survives-livedebugvars.mir b/llvm/test/DebugInfo/MIR/InstrRef/survives-livedebugvars.mir index 80ae2e78fea07..53658d2fb53c6 100644 --- a/llvm/test/DebugInfo/MIR/InstrRef/survives-livedebugvars.mir +++ b/llvm/test/DebugInfo/MIR/InstrRef/survives-livedebugvars.mir @@ -41,7 +41,7 @@ # FASTREG-NEXT: JMP_1 # FASTREG-LABEL: bb.4: # FASTREG: DBG_INSTR_REF 5, 0 -# FASTREG-NEXT: RETQ +# FASTREG-NEXT: RET64 --- | ; ModuleID = 'tmp.ll' @@ -137,6 +137,6 @@ body: | bb.4: $eax = COPY %5, debug-location !18 DBG_INSTR_REF 5, 0, !9, !DIExpression(), debug-location !16 - RETQ implicit $eax, debug-location !18 + RET64 implicit $eax, debug-location !18 ... diff --git a/llvm/test/DebugInfo/MIR/InstrRef/x86-fixup-bw-inst-subreb.mir b/llvm/test/DebugInfo/MIR/InstrRef/x86-fixup-bw-inst-subreb.mir index 27b155639af5d..b20041d04b991 100644 --- a/llvm/test/DebugInfo/MIR/InstrRef/x86-fixup-bw-inst-subreb.mir +++ b/llvm/test/DebugInfo/MIR/InstrRef/x86-fixup-bw-inst-subreb.mir @@ -28,7 +28,7 @@ body: | $ax = MOV16rm killed $rax, 1, $noreg, 0, $noreg, debug-instr-number 1 ; CHECK: $eax = MOVZX32rm16 killed $rax, {{.*}} debug-instr-number 2 - RETQ $ax + RET64 $ax ... --- @@ -55,11 +55,11 @@ body: | $ax = MOV16rm killed $rdi, 1, $noreg, 0, $noreg, implicit-def $eax, debug-instr-number 1 ; CHECK: $eax = MOVZX32rm16 killed $rdi, {{.*}} debug-instr-number 2 $ax = KILL $ax, implicit killed $eax - RETQ $ax + RET64 $ax bb.1: $eax = XOR32rr undef $eax, undef $eax, implicit-def dead $eflags $ax = KILL $ax, implicit killed $eax - RETQ $ax + RET64 $ax ... diff --git a/llvm/test/DebugInfo/MIR/InstrRef/x86-lea-fixup-2.mir b/llvm/test/DebugInfo/MIR/InstrRef/x86-lea-fixup-2.mir index 2ffd6b5e49b52..867dd278de505 100644 --- a/llvm/test/DebugInfo/MIR/InstrRef/x86-lea-fixup-2.mir +++ b/llvm/test/DebugInfo/MIR/InstrRef/x86-lea-fixup-2.mir @@ -18,7 +18,7 @@ body: | ; CHECK: $eax = ADD32ri8 {{.*}} debug-instr-number 2 $eax = LEA32r killed $eax, 1, killed $ebp, -5, $noreg, debug-instr-number 1 - RETQ $eax + RET64 $eax ... --- @@ -38,7 +38,7 @@ body: | ; CHECK: $ebx = ADD32rr {{.*}} debug-instr-number 2 $ebx = LEA32r killed $ebp, 1, $ebp, 0, $noreg, debug-instr-number 1 - RETQ $ebx + RET64 $ebx ... --- @@ -57,6 +57,6 @@ body: | ; CHECK: $ebx = ADD32rr {{.*}} debug-instr-number 2 $ebx = LEA32r $ebp, 1, $ebp, 5, $noreg, debug-instr-number 1 - RETQ $ebx + RET64 $ebx ... diff --git a/llvm/test/DebugInfo/MIR/InstrRef/x86-lea-fixup.mir b/llvm/test/DebugInfo/MIR/InstrRef/x86-lea-fixup.mir index f8b7967e88b6b..2765ce2284f17 100644 --- a/llvm/test/DebugInfo/MIR/InstrRef/x86-lea-fixup.mir +++ b/llvm/test/DebugInfo/MIR/InstrRef/x86-lea-fixup.mir @@ -24,7 +24,7 @@ body: | $ebp = LEA64_32r killed $rbp, 1, killed $rax, 0, $noreg, debug-instr-number 1 ; COREI7: ADD32rr {{.*}} debug-instr-number 2 - RETQ $ebp + RET64 $ebp ... --- @@ -51,7 +51,7 @@ body: | renamable $eax = nsw LEA64_32r killed renamable $rdi, 4, renamable $rdi, 2, $noreg, debug-instr-number 2 ; HASWELL: ADD32ri8 {{.*}} debug-instr-number 4 renamable $eax = nsw IMUL32rr killed renamable $eax, killed renamable $ecx, implicit-def dead $eflags - RETQ $eax + RET64 $eax ... --- @@ -72,6 +72,6 @@ body: | renamable $ecx = nsw ADD32rr renamable $ecx, renamable $eax, implicit-def dead $eflags, implicit killed $rax, implicit killed $rcx, implicit-def $rcx, debug-instr-number 1 ; ATOM: LEA64_32r {{.*}} debug-instr-number 2 renamable $eax = MOV32rm killed renamable $rcx, 1, $noreg, 0, $noreg :: (load (s32) from `i32 *undef`) - RETQ $eax + RET64 $eax ... diff --git a/llvm/test/DebugInfo/MIR/X86/backup-entry-values-usage.mir b/llvm/test/DebugInfo/MIR/X86/backup-entry-values-usage.mir index 278a3f6a3242c..1bfb8184db306 100644 --- a/llvm/test/DebugInfo/MIR/X86/backup-entry-values-usage.mir +++ b/llvm/test/DebugInfo/MIR/X86/backup-entry-values-usage.mir @@ -101,6 +101,6 @@ body: | $eax = XOR32rr undef $eax, undef $eax, implicit-def dead $eflags, debug-location !22 $rcx = frame-destroy POP64r implicit-def $rsp, implicit $rsp, debug-location !22 CFI_INSTRUCTION def_cfa_offset 8, debug-location !22 - RETQ killed $eax, debug-location !22 + RET64 killed $eax, debug-location !22 ... diff --git a/llvm/test/DebugInfo/MIR/X86/bit-piece-dh.mir b/llvm/test/DebugInfo/MIR/X86/bit-piece-dh.mir index 9530683506285..3e8a9db2cb047 100644 --- a/llvm/test/DebugInfo/MIR/X86/bit-piece-dh.mir +++ b/llvm/test/DebugInfo/MIR/X86/bit-piece-dh.mir @@ -92,6 +92,6 @@ body: | $edi = SHR32ri killed $edi, 8, implicit-def dead $eflags, debug-location !17 $eax = MOVSX32rr8 $dil, implicit killed $edi, debug-location !20 $rbp = POP64r implicit-def $rsp, implicit $rsp, debug-location !20 - RETQ $eax, debug-location !20 + RET64 $eax, debug-location !20 ... diff --git a/llvm/test/DebugInfo/MIR/X86/call-site-gnu-vs-dwarf5-attrs.mir b/llvm/test/DebugInfo/MIR/X86/call-site-gnu-vs-dwarf5-attrs.mir index bde717e3c9da5..789968397199c 100644 --- a/llvm/test/DebugInfo/MIR/X86/call-site-gnu-vs-dwarf5-attrs.mir +++ b/llvm/test/DebugInfo/MIR/X86/call-site-gnu-vs-dwarf5-attrs.mir @@ -191,7 +191,7 @@ body: | $rbx = frame-destroy POP64r implicit-def $rsp, implicit $rsp, debug-location !33 DBG_VALUE $rdi, $noreg, !23, !DIExpression(DW_OP_LLVM_entry_value, 1), debug-location !25 CFI_INSTRUCTION def_cfa_offset 8, debug-location !33 - RETQ $eax, debug-location !33 + RET64 $eax, debug-location !33 bb.2.if.then: CFI_INSTRUCTION def_cfa_offset 16, debug-location !32 diff --git a/llvm/test/DebugInfo/MIR/X86/clobbered-fragments.mir b/llvm/test/DebugInfo/MIR/X86/clobbered-fragments.mir index 259ffa8fce6f4..c8ea384ce7273 100644 --- a/llvm/test/DebugInfo/MIR/X86/clobbered-fragments.mir +++ b/llvm/test/DebugInfo/MIR/X86/clobbered-fragments.mir @@ -105,7 +105,7 @@ body: | CALL64pcrel32 @ext2, csr_64, implicit $rsp, implicit $ssp, implicit $edi, implicit killed $esi, implicit-def $rsp, implicit-def $ssp, debug-location !16 $eax = MOV32ri 123, debug-location !17 $rcx = frame-destroy POP64r implicit-def $rsp, implicit $rsp, debug-location !17 - RETQ killed $eax, debug-location !17 + RET64 killed $eax, debug-location !17 ... @@ -135,7 +135,7 @@ body: | CALL64pcrel32 @ext3, csr_64, implicit $rsp, implicit $ssp, implicit killed $edi, implicit $esi, implicit $edx, implicit-def $rsp, implicit-def $ssp, debug-location !20 $eax = MOV32rr killed $ebx, debug-location !20 $rbx = frame-destroy POP64r implicit-def $rsp, implicit $rsp, debug-location !20 - RETQ killed $eax, debug-location !21 + RET64 killed $eax, debug-location !21 ... diff --git a/llvm/test/DebugInfo/MIR/X86/complex-entryvalue.mir b/llvm/test/DebugInfo/MIR/X86/complex-entryvalue.mir index cc7e6dbc08e31..a015855f190b2 100644 --- a/llvm/test/DebugInfo/MIR/X86/complex-entryvalue.mir +++ b/llvm/test/DebugInfo/MIR/X86/complex-entryvalue.mir @@ -48,6 +48,6 @@ body: | frame-setup PUSH64r killed $rbp, implicit-def $rsp, implicit $rsp $rbp = frame-setup MOV64rr $rsp $rbp = frame-destroy POP64r implicit-def $rsp, implicit $rsp, debug-location !16 - RETQ debug-location !16 + RET64 debug-location !16 ... diff --git a/llvm/test/DebugInfo/MIR/X86/dbg-call-site-spilled-arg-multiple-defs.mir b/llvm/test/DebugInfo/MIR/X86/dbg-call-site-spilled-arg-multiple-defs.mir index c13c22344d4e5..4d3c466f3eb36 100644 --- a/llvm/test/DebugInfo/MIR/X86/dbg-call-site-spilled-arg-multiple-defs.mir +++ b/llvm/test/DebugInfo/MIR/X86/dbg-call-site-spilled-arg-multiple-defs.mir @@ -132,6 +132,6 @@ body: | CFI_INSTRUCTION def_cfa_offset 16, debug-location !18 $rbp = frame-destroy POP64r implicit-def $rsp, implicit $rsp, debug-location !18 CFI_INSTRUCTION def_cfa_offset 8, debug-location !18 - RETQ debug-location !18 + RET64 debug-location !18 ... diff --git a/llvm/test/DebugInfo/MIR/X86/dbg-call-site-spilled-arg.mir b/llvm/test/DebugInfo/MIR/X86/dbg-call-site-spilled-arg.mir index 146b7f184c9af..ecfe5de75b458 100644 --- a/llvm/test/DebugInfo/MIR/X86/dbg-call-site-spilled-arg.mir +++ b/llvm/test/DebugInfo/MIR/X86/dbg-call-site-spilled-arg.mir @@ -172,6 +172,6 @@ body: | $r14 = frame-destroy POP64r implicit-def $rsp, implicit $rsp, debug-location !20 $r15 = frame-destroy POP64r implicit-def $rsp, implicit $rsp, debug-location !20 $rbp = frame-destroy POP64r implicit-def $rsp, implicit $rsp, debug-location !20 - RETQ debug-location !20 + RET64 debug-location !20 ... diff --git a/llvm/test/DebugInfo/MIR/X86/dbg-stack-value-range.mir b/llvm/test/DebugInfo/MIR/X86/dbg-stack-value-range.mir index 462bf3473a5d2..8254b81c22046 100644 --- a/llvm/test/DebugInfo/MIR/X86/dbg-stack-value-range.mir +++ b/llvm/test/DebugInfo/MIR/X86/dbg-stack-value-range.mir @@ -177,6 +177,6 @@ body: | $rsp = frame-destroy ADD64ri8 $rsp, 16, implicit-def dead $eflags, debug-location !15 $rbp = frame-destroy POP64r implicit-def $rsp, implicit $rsp, debug-location !15 CFI_INSTRUCTION def_cfa $rsp, 8, debug-location !15 - RETQ $eax, debug-location !15 + RET64 $eax, debug-location !15 ... diff --git a/llvm/test/DebugInfo/MIR/X86/dbgcall-site-copy-super-sub.mir b/llvm/test/DebugInfo/MIR/X86/dbgcall-site-copy-super-sub.mir index 347a0ec09bb24..821be45acc6ff 100644 --- a/llvm/test/DebugInfo/MIR/X86/dbgcall-site-copy-super-sub.mir +++ b/llvm/test/DebugInfo/MIR/X86/dbgcall-site-copy-super-sub.mir @@ -106,7 +106,7 @@ body: | $eax = MOV32rr killed $ebx, debug-location !32 $rbx = frame-destroy POP64r implicit-def $rsp, implicit $rsp, debug-location !32 CFI_INSTRUCTION def_cfa_offset 8, debug-location !32 - RETQ killed $eax, debug-location !32 + RET64 killed $eax, debug-location !32 ... diff --git a/llvm/test/DebugInfo/MIR/X86/dbgcall-site-interpretation.mir b/llvm/test/DebugInfo/MIR/X86/dbgcall-site-interpretation.mir index 79601c802e26a..a6f64f6f3b24c 100644 --- a/llvm/test/DebugInfo/MIR/X86/dbgcall-site-interpretation.mir +++ b/llvm/test/DebugInfo/MIR/X86/dbgcall-site-interpretation.mir @@ -199,6 +199,6 @@ body: | $r15 = frame-destroy POP64r implicit-def $rsp, implicit $rsp, debug-location !21 DBG_VALUE $esi, $noreg, !15, !DIExpression(DW_OP_LLVM_entry_value, 1), debug-location !21 CFI_INSTRUCTION def_cfa_offset 8, debug-location !21 - RETQ $eax, debug-location !21 + RET64 $eax, debug-location !21 ... diff --git a/llvm/test/DebugInfo/MIR/X86/dbgcall-site-lea-interpretation.mir b/llvm/test/DebugInfo/MIR/X86/dbgcall-site-lea-interpretation.mir index b87c0ac26b0bb..f6f746e470052 100644 --- a/llvm/test/DebugInfo/MIR/X86/dbgcall-site-lea-interpretation.mir +++ b/llvm/test/DebugInfo/MIR/X86/dbgcall-site-lea-interpretation.mir @@ -135,6 +135,6 @@ body: | CFI_INSTRUCTION def_cfa_offset 16, debug-location !14 $rbx = frame-destroy POP64r implicit-def $rsp, implicit $rsp, debug-location !14 CFI_INSTRUCTION def_cfa_offset 8, debug-location !14 - RETQ $eax, debug-location !14 + RET64 $eax, debug-location !14 ... diff --git a/llvm/test/DebugInfo/MIR/X86/dbgcall-site-partial-describe.mir b/llvm/test/DebugInfo/MIR/X86/dbgcall-site-partial-describe.mir index f0902bbe41d66..4053bba80a150 100644 --- a/llvm/test/DebugInfo/MIR/X86/dbgcall-site-partial-describe.mir +++ b/llvm/test/DebugInfo/MIR/X86/dbgcall-site-partial-describe.mir @@ -54,7 +54,7 @@ body: | CALL64pcrel32 @call, csr_64, implicit $rsp, implicit $ssp, implicit killed $edi, implicit undef $esi, implicit-def $rsp, implicit-def $ssp, debug-location !15 $rax = frame-destroy POP64r implicit-def $rsp, implicit $rsp, debug-location !16 CFI_INSTRUCTION def_cfa_offset 8, debug-location !16 - RETQ debug-location !16 + RET64 debug-location !16 ... diff --git a/llvm/test/DebugInfo/MIR/X86/dbgcall-site-reference.mir b/llvm/test/DebugInfo/MIR/X86/dbgcall-site-reference.mir index 2a2943ffaeaad..a915a4826ed74 100644 --- a/llvm/test/DebugInfo/MIR/X86/dbgcall-site-reference.mir +++ b/llvm/test/DebugInfo/MIR/X86/dbgcall-site-reference.mir @@ -112,6 +112,6 @@ body: | CALL64pcrel32 @_ZN1CC2E1B, csr_64, implicit $rsp, implicit $ssp, implicit $rdi, implicit $rsi, implicit-def $rsp, implicit-def $ssp, debug-location !36 $rax = frame-destroy POP64r implicit-def $rsp, implicit $rsp, debug-location !36 CFI_INSTRUCTION def_cfa_offset 8, debug-location !36 - RETQ debug-location !36 + RET64 debug-location !36 ... diff --git a/llvm/test/DebugInfo/MIR/X86/dbgcall-site-reg-shuffle.mir b/llvm/test/DebugInfo/MIR/X86/dbgcall-site-reg-shuffle.mir index 27a03193e8161..03c967d17569a 100644 --- a/llvm/test/DebugInfo/MIR/X86/dbgcall-site-reg-shuffle.mir +++ b/llvm/test/DebugInfo/MIR/X86/dbgcall-site-reg-shuffle.mir @@ -74,7 +74,7 @@ body: | CALL64pcrel32 @call2, csr_64, implicit $rsp, implicit $ssp, implicit $edi, implicit $esi, implicit-def $rsp, implicit-def $ssp, debug-location !15 $rax = frame-destroy POP64r implicit-def $rsp, implicit $rsp, debug-location !16 CFI_INSTRUCTION def_cfa_offset 8, debug-location !16 - RETQ debug-location !16 + RET64 debug-location !16 ... diff --git a/llvm/test/DebugInfo/MIR/X86/dbgcall-site-two-fwd-reg-defs.mir b/llvm/test/DebugInfo/MIR/X86/dbgcall-site-two-fwd-reg-defs.mir index fb150f7b36246..01b61913fd65a 100644 --- a/llvm/test/DebugInfo/MIR/X86/dbgcall-site-two-fwd-reg-defs.mir +++ b/llvm/test/DebugInfo/MIR/X86/dbgcall-site-two-fwd-reg-defs.mir @@ -88,7 +88,7 @@ body: | renamable $eax = MOV32rm $rip, 1, $noreg, @a, $noreg, debug-location !16 :: (dereferenceable load (s32) from @a) $rbp = frame-destroy POP64r implicit-def $rsp, implicit $rsp, debug-location !16 CFI_INSTRUCTION def_cfa $rsp, 8, debug-location !16 - RETQ $eax, debug-location !16 + RET64 $eax, debug-location !16 ... --- @@ -123,7 +123,7 @@ body: | $rbx = frame-destroy POP64r implicit-def $rsp, implicit $rsp, debug-location !22 $rbp = frame-destroy POP64r implicit-def $rsp, implicit $rsp, debug-location !22 CFI_INSTRUCTION def_cfa $rsp, 8, debug-location !22 - RETQ $eax, debug-location !22 + RET64 $eax, debug-location !22 ... diff --git a/llvm/test/DebugInfo/MIR/X86/debug-call-site-param.mir b/llvm/test/DebugInfo/MIR/X86/debug-call-site-param.mir index ca577f7b6d1f1..ba82357649660 100644 --- a/llvm/test/DebugInfo/MIR/X86/debug-call-site-param.mir +++ b/llvm/test/DebugInfo/MIR/X86/debug-call-site-param.mir @@ -190,6 +190,6 @@ body: | CFI_INSTRUCTION def_cfa_offset 16, debug-location !24 $r15 = frame-destroy POP64r implicit-def $rsp, implicit $rsp, debug-location !24 CFI_INSTRUCTION def_cfa_offset 8, debug-location !24 - RETQ debug-location !24 + RET64 debug-location !24 ... diff --git a/llvm/test/DebugInfo/MIR/X86/debug-entry-value-operation.mir b/llvm/test/DebugInfo/MIR/X86/debug-entry-value-operation.mir index 3ce6388b21a0c..a051aaa5f7ad1 100644 --- a/llvm/test/DebugInfo/MIR/X86/debug-entry-value-operation.mir +++ b/llvm/test/DebugInfo/MIR/X86/debug-entry-value-operation.mir @@ -88,6 +88,6 @@ body: | MOV32mr $rip, 1, $noreg, @global, $noreg, killed renamable $edi, debug-location !18 :: (store (s32) into @global) INLINEASM &"", 1, 12, implicit-def dead early-clobber $edi, 12, implicit-def dead early-clobber $esi, 12, implicit-def dead early-clobber $edx, 12, implicit-def dead early-clobber $df, 12, implicit-def dead early-clobber $fpsw, 12, implicit-def dead early-clobber $eflags, !19, debug-location !18 $eax = MOV32ri 123, debug-location !18 - RETQ killed $eax, debug-location !18 + RET64 killed $eax, debug-location !18 ... diff --git a/llvm/test/DebugInfo/MIR/X86/dvl-livedebugvalues-clobber.mir b/llvm/test/DebugInfo/MIR/X86/dvl-livedebugvalues-clobber.mir index a42748e5cedb0..124ddd0d78c92 100644 --- a/llvm/test/DebugInfo/MIR/X86/dvl-livedebugvalues-clobber.mir +++ b/llvm/test/DebugInfo/MIR/X86/dvl-livedebugvalues-clobber.mir @@ -100,7 +100,7 @@ body: | bb.4: liveins: $rbx, $rsi - RETQ $rbx, debug-location !17 + RET64 $rbx, debug-location !17 ... diff --git a/llvm/test/DebugInfo/MIR/X86/dvl-livedebugvalues-join.mir b/llvm/test/DebugInfo/MIR/X86/dvl-livedebugvalues-join.mir index f8863eedb1764..ea747635bac95 100644 --- a/llvm/test/DebugInfo/MIR/X86/dvl-livedebugvalues-join.mir +++ b/llvm/test/DebugInfo/MIR/X86/dvl-livedebugvalues-join.mir @@ -154,7 +154,7 @@ body: | bb.10: liveins: $rdi, $rsi ; Should _not_ be a live-in loc here. - RETQ + RET64 ... diff --git a/llvm/test/DebugInfo/MIR/X86/dvl-livedebugvalues-movements.mir b/llvm/test/DebugInfo/MIR/X86/dvl-livedebugvalues-movements.mir index 4fd435461718a..3c8e34a466a64 100644 --- a/llvm/test/DebugInfo/MIR/X86/dvl-livedebugvalues-movements.mir +++ b/llvm/test/DebugInfo/MIR/X86/dvl-livedebugvalues-movements.mir @@ -84,7 +84,7 @@ body: | bb.2: liveins: $rbx, $rbp - RETQ $rbp, debug-location !17 + RET64 $rbp, debug-location !17 ... diff --git a/llvm/test/DebugInfo/MIR/X86/dvl-livedebugvalues-spillrestore.mir b/llvm/test/DebugInfo/MIR/X86/dvl-livedebugvalues-spillrestore.mir index 32a68639ec5f2..8006945837d6c 100644 --- a/llvm/test/DebugInfo/MIR/X86/dvl-livedebugvalues-spillrestore.mir +++ b/llvm/test/DebugInfo/MIR/X86/dvl-livedebugvalues-spillrestore.mir @@ -72,6 +72,6 @@ body: | $rax = COPY killed $rdi $rdi = MOV64ri 0 $rdi = MOV64rm $rsp, 1, $noreg, -16, $noreg, debug-location !15 :: (load (s64) from %stack.0) - RETQ + RET64 ... diff --git a/llvm/test/DebugInfo/MIR/X86/dvl-livedebugvars-movements.mir b/llvm/test/DebugInfo/MIR/X86/dvl-livedebugvars-movements.mir index 82cc5b00e5309..42f8bc2564285 100644 --- a/llvm/test/DebugInfo/MIR/X86/dvl-livedebugvars-movements.mir +++ b/llvm/test/DebugInfo/MIR/X86/dvl-livedebugvars-movements.mir @@ -105,5 +105,5 @@ body: | CALL64pcrel32 @foo, csr_64, implicit $rsp, implicit $ssp, implicit %3, implicit %5 - RETQ + RET64 ... diff --git a/llvm/test/DebugInfo/MIR/X86/dvl-livedebugvars-stackptr.mir b/llvm/test/DebugInfo/MIR/X86/dvl-livedebugvars-stackptr.mir index 491bb27cafeff..aeadd5bd2b4f7 100644 --- a/llvm/test/DebugInfo/MIR/X86/dvl-livedebugvars-stackptr.mir +++ b/llvm/test/DebugInfo/MIR/X86/dvl-livedebugvars-stackptr.mir @@ -109,5 +109,5 @@ body: | CALL64pcrel32 @foo, csr_64, implicit $rsp, implicit $ssp, implicit %3, implicit %5 - RETQ + RET64 ... diff --git a/llvm/test/DebugInfo/MIR/X86/empty-inline.mir b/llvm/test/DebugInfo/MIR/X86/empty-inline.mir index 742098ef0c45f..d2b057af17c3e 100644 --- a/llvm/test/DebugInfo/MIR/X86/empty-inline.mir +++ b/llvm/test/DebugInfo/MIR/X86/empty-inline.mir @@ -109,13 +109,13 @@ body: | $rax = MOV64rm $rdi, 1, _, 0, _ :: (load (s64) from %ir.6, align 4) $al = MOV8rm killed $rax, 1, _, 0, _ :: (load (s8) from %ir.8) MOV8mr killed $rdi, 1, _, 8, _, killed $al, debug-location !14 :: (store (s8) into %ir.12) - RETQ undef $eax + RET64 undef $eax bb.1: liveins: $rdi $al = IMPLICIT_DEF debug-location !10 MOV8mr killed $rdi, 1, _, 8, _, killed $al, debug-location !14 :: (store (s8) into %ir.12) - RETQ undef $eax + RET64 undef $eax ... diff --git a/llvm/test/DebugInfo/MIR/X86/entry-value-of-modified-param.mir b/llvm/test/DebugInfo/MIR/X86/entry-value-of-modified-param.mir index 8de1020e15ea8..3ac9b0be6c407 100644 --- a/llvm/test/DebugInfo/MIR/X86/entry-value-of-modified-param.mir +++ b/llvm/test/DebugInfo/MIR/X86/entry-value-of-modified-param.mir @@ -114,6 +114,6 @@ body: | renamable $al = SETCCr 12, implicit killed $eflags, implicit killed $eax, implicit-def $eax, debug-location !23 $rbx = frame-destroy POP64r implicit-def $rsp, implicit $rsp, debug-location !26 CFI_INSTRUCTION def_cfa_offset 8, debug-location !26 - RETQ $eax, debug-location !26 + RET64 $eax, debug-location !26 ... diff --git a/llvm/test/DebugInfo/MIR/X86/entry-values-diamond-bbs.mir b/llvm/test/DebugInfo/MIR/X86/entry-values-diamond-bbs.mir index 27f881d0f178b..72810369c0e1f 100644 --- a/llvm/test/DebugInfo/MIR/X86/entry-values-diamond-bbs.mir +++ b/llvm/test/DebugInfo/MIR/X86/entry-values-diamond-bbs.mir @@ -178,6 +178,6 @@ body: | CFI_INSTRUCTION def_cfa_offset 16, debug-location !34 $rbp = frame-destroy POP64r implicit-def $rsp, implicit $rsp, debug-location !34 CFI_INSTRUCTION def_cfa_offset 8, debug-location !34 - RETQ killed $eax, debug-location !34 + RET64 killed $eax, debug-location !34 ... diff --git a/llvm/test/DebugInfo/MIR/X86/kill-after-spill.mir b/llvm/test/DebugInfo/MIR/X86/kill-after-spill.mir index f2bdab16e574c..29cc2220b8230 100644 --- a/llvm/test/DebugInfo/MIR/X86/kill-after-spill.mir +++ b/llvm/test/DebugInfo/MIR/X86/kill-after-spill.mir @@ -382,6 +382,6 @@ body: | $r14 = POP64r implicit-def $rsp, implicit $rsp $r15 = POP64r implicit-def $rsp, implicit $rsp $rbp = POP64r implicit-def $rsp, implicit $rsp - RETQ $eax, debug-location !57 + RET64 $eax, debug-location !57 ... diff --git a/llvm/test/DebugInfo/MIR/X86/kill-entry-value-after-diamond-bbs.mir b/llvm/test/DebugInfo/MIR/X86/kill-entry-value-after-diamond-bbs.mir index ec165f4c798d3..c33f5f6affbb3 100644 --- a/llvm/test/DebugInfo/MIR/X86/kill-entry-value-after-diamond-bbs.mir +++ b/llvm/test/DebugInfo/MIR/X86/kill-entry-value-after-diamond-bbs.mir @@ -180,7 +180,7 @@ body: | CFI_INSTRUCTION def_cfa_offset 16, debug-location !34 $rbp = frame-destroy POP64r implicit-def $rsp, implicit $rsp, debug-location !34 CFI_INSTRUCTION def_cfa_offset 8, debug-location !34 - RETQ killed $eax, debug-location !34 + RET64 killed $eax, debug-location !34 ... diff --git a/llvm/test/DebugInfo/MIR/X86/live-debug-values-3preds.mir b/llvm/test/DebugInfo/MIR/X86/live-debug-values-3preds.mir index bef0f4e4aa5ab..50cf59025bfb6 100644 --- a/llvm/test/DebugInfo/MIR/X86/live-debug-values-3preds.mir +++ b/llvm/test/DebugInfo/MIR/X86/live-debug-values-3preds.mir @@ -267,7 +267,7 @@ body: | $ecx = IMUL32rr killed $ecx, killed $edi, implicit-def dead $eflags, debug-location !36 DBG_VALUE 0, 0, !13, !17, debug-location !25 $eax = MOV32rr killed $ecx, debug-location !50 - RETQ $eax, debug-location !50 + RET64 $eax, debug-location !50 bb.6.if.then.4: liveins: $ecx, $esi @@ -278,7 +278,7 @@ body: | $ecx = IMUL32rr killed $ecx, killed $esi, implicit-def dead $eflags, debug-location !40 DBG_VALUE 0, 0, !13, !17, debug-location !25 $eax = MOV32rr killed $ecx, debug-location !50 - RETQ $eax, debug-location !50 + RET64 $eax, debug-location !50 bb.8.if.then.8: successors: %bb.9.for.end(0) @@ -294,6 +294,6 @@ body: | DBG_VALUE 0, 0, !13, !17, debug-location !25 $eax = MOV32rr killed $ecx, debug-location !50 - RETQ $eax, debug-location !50 + RET64 $eax, debug-location !50 ... diff --git a/llvm/test/DebugInfo/MIR/X86/live-debug-values-bad-transfer.mir b/llvm/test/DebugInfo/MIR/X86/live-debug-values-bad-transfer.mir index 97fad0755b80e..5cc1105e9ab22 100644 --- a/llvm/test/DebugInfo/MIR/X86/live-debug-values-bad-transfer.mir +++ b/llvm/test/DebugInfo/MIR/X86/live-debug-values-bad-transfer.mir @@ -106,6 +106,6 @@ body: | $eax = MOV32rr killed $ebx, debug-location !10 $rbx = frame-destroy POP64r implicit-def $rsp, implicit $rsp CFI_INSTRUCTION def_cfa_offset 8 - RETQ $eax, debug-location !10 + RET64 $eax, debug-location !10 ... diff --git a/llvm/test/DebugInfo/MIR/X86/live-debug-values-cutoffs.mir b/llvm/test/DebugInfo/MIR/X86/live-debug-values-cutoffs.mir index 17b6b9b3149c3..cedfccc574f17 100644 --- a/llvm/test/DebugInfo/MIR/X86/live-debug-values-cutoffs.mir +++ b/llvm/test/DebugInfo/MIR/X86/live-debug-values-cutoffs.mir @@ -114,6 +114,6 @@ body: | CALL64pcrel32 @use, csr_64, implicit $rsp, implicit $ssp, implicit killed $edi, implicit-def $eax, debug-location !14 DBG_VALUE renamable $eax, $noreg, !11, !DIExpression(), debug-location !14 $rcx = frame-destroy POP64r implicit-def $rsp, implicit $rsp, debug-location !15 - RETQ implicit killed $eax, debug-location !15 + RET64 implicit killed $eax, debug-location !15 ... diff --git a/llvm/test/DebugInfo/MIR/X86/live-debug-values-entry-transfer.mir b/llvm/test/DebugInfo/MIR/X86/live-debug-values-entry-transfer.mir index 53c948ad2678a..03b10929f7be0 100644 --- a/llvm/test/DebugInfo/MIR/X86/live-debug-values-entry-transfer.mir +++ b/llvm/test/DebugInfo/MIR/X86/live-debug-values-entry-transfer.mir @@ -118,6 +118,6 @@ body: | $eax = MOV32rr killed $ebx $rbx = frame-destroy POP64r implicit-def $rsp, implicit $rsp CFI_INSTRUCTION def_cfa_offset 8 - RETQ $eax + RET64 $eax ... diff --git a/llvm/test/DebugInfo/MIR/X86/live-debug-values-fragments.mir b/llvm/test/DebugInfo/MIR/X86/live-debug-values-fragments.mir index 970f59194aa50..d0ae6277a512c 100644 --- a/llvm/test/DebugInfo/MIR/X86/live-debug-values-fragments.mir +++ b/llvm/test/DebugInfo/MIR/X86/live-debug-values-fragments.mir @@ -20,7 +20,7 @@ # CHECK-NEXT: DBG_VALUE $eax, $noreg, !{{[0-9]+}}, # CHECK-SAME: !DIExpression(DW_OP_LLVM_fragment, 0, 32) # CHECK-NEXT: XOR32rr -# CHECK-NEXT: RETQ +# CHECK-NEXT: RET64 # # CHECK-LABEL: bar # CHECK-LABEL: bb.0.entry: @@ -51,7 +51,7 @@ # CHECK-NEXT: DBG_VALUE $ax, $noreg, !{{[0-9]+}}, # CHECK-SAME: !DIExpression(DW_OP_LLVM_fragment, 8, 16) # CHECK-NEXT: XOR32rr -# CHECK-NEXT: RETQ +# CHECK-NEXT: RET64 # CHECK-LABEL: baz # CHECK-LABEL: bb.0.entry: @@ -81,7 +81,7 @@ # CHECK: DBG_VALUE $ebx, $noreg, !{{[0-9]+}}, # CHECK-SAME: !DIExpression(DW_OP_LLVM_fragment, 32, 32) # CHECK-NEXT: XOR32rr -# CHECK-NEXT: RETQ +# CHECK-NEXT: RET64 --- | target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" @@ -178,7 +178,7 @@ body: | bb.3.bb3: liveins: $eax, $ebx $eax = XOR32rr killed $eax, killed $ebx, implicit-def $eflags - RETQ $eax, debug-location !8 + RET64 $eax, debug-location !8 ... --- @@ -216,7 +216,7 @@ body: | bb.3.bb3: liveins: $eax, $ebx $eax = XOR32rr killed $eax, killed $ebx, implicit-def $eflags - RETQ $eax, debug-location !48 + RET64 $eax, debug-location !48 ... --- @@ -254,6 +254,6 @@ body: | bb.3.bb3: liveins: $eax, $ebx $eax = XOR32rr killed $eax, killed $ebx, implicit-def $eflags - RETQ $eax, debug-location !88 + RET64 $eax, debug-location !88 ... diff --git a/llvm/test/DebugInfo/MIR/X86/live-debug-values-restore-collide.mir b/llvm/test/DebugInfo/MIR/X86/live-debug-values-restore-collide.mir index 49c5aed0b9473..9033c86f62e57 100644 --- a/llvm/test/DebugInfo/MIR/X86/live-debug-values-restore-collide.mir +++ b/llvm/test/DebugInfo/MIR/X86/live-debug-values-restore-collide.mir @@ -81,5 +81,5 @@ body: | ; Return faff $eax = MOV32ri 0 $rsp = frame-destroy ADD64ri8 $rsp, 24, implicit-def dead $eflags - RETQ debug-location !10 + RET64 debug-location !10 ... diff --git a/llvm/test/DebugInfo/MIR/X86/live-debug-values-restore.mir b/llvm/test/DebugInfo/MIR/X86/live-debug-values-restore.mir index ef0d4d4797b80..6963c8c75363c 100644 --- a/llvm/test/DebugInfo/MIR/X86/live-debug-values-restore.mir +++ b/llvm/test/DebugInfo/MIR/X86/live-debug-values-restore.mir @@ -326,7 +326,7 @@ body: | liveins: $rdi, $rbx, $r12, $r13, $r14, $r15, $rbp renamable $eax = MOV32rm killed renamable $rdi, 1, $noreg, 4, $noreg, debug-location !23 :: (load (s32) from %ir.add.ptr, !tbaa !24) - RETQ $eax, debug-location !28 + RET64 $eax, debug-location !28 ... --- @@ -401,7 +401,7 @@ body: | renamable $eax = MOV32rm killed renamable $rsi, 1, $noreg, 4, $noreg, debug-location !123 :: (load (s32) from %ir.add.ptr, !tbaa !24) $rdi = MOV64ri 0 - RETQ $eax, debug-location !128 + RET64 $eax, debug-location !128 ... --- @@ -507,7 +507,7 @@ body: | liveins: $rdi, $rbx, $r12, $r13, $r14, $r15, $rbp renamable $eax = MOV32rm killed renamable $rdi, 1, $noreg, 4, $noreg, debug-location !223 :: (load (s32) from %ir.add.ptr, !tbaa !24) - RETQ $eax, debug-location !228 + RET64 $eax, debug-location !228 @@ -628,7 +628,7 @@ body: | renamable $rdi = MOV64rm $rsp, 1, $noreg, -8, $noreg :: (load (s64) from %stack.0) renamable $eax = MOV32rm killed renamable $rdi, 1, $noreg, 4, $noreg, debug-location !323 :: (load (s32) from %ir.add.ptr, !tbaa !24) - RETQ $eax, debug-location !328 + RET64 $eax, debug-location !328 ... --- @@ -731,6 +731,6 @@ body: | liveins: $rdi, $rbx, $r12, $r13, $r14, $r15, $rbp renamable $eax = MOV32rm killed renamable $rdi, 1, $noreg, 4, $noreg, debug-location !414 :: (load (s32) from %ir.add.ptr, !tbaa !24) - RETQ $eax, debug-location !415 + RET64 $eax, debug-location !415 ... diff --git a/llvm/test/DebugInfo/MIR/X86/live-debug-values-spill.mir b/llvm/test/DebugInfo/MIR/X86/live-debug-values-spill.mir index 61a5e204668c2..88ba9a5ac458b 100644 --- a/llvm/test/DebugInfo/MIR/X86/live-debug-values-spill.mir +++ b/llvm/test/DebugInfo/MIR/X86/live-debug-values-spill.mir @@ -467,6 +467,6 @@ body: | $r14 = POP64r implicit-def $rsp, implicit $rsp, debug-location !90 $r15 = POP64r implicit-def $rsp, implicit $rsp, debug-location !90 $rbp = POP64r implicit-def $rsp, implicit $rsp, debug-location !90 - RETQ debug-location !90 + RET64 debug-location !90 ... diff --git a/llvm/test/DebugInfo/MIR/X86/live-debug-values-stack-clobber.mir b/llvm/test/DebugInfo/MIR/X86/live-debug-values-stack-clobber.mir index 647f66c68cd06..2f5f8b829d66a 100644 --- a/llvm/test/DebugInfo/MIR/X86/live-debug-values-stack-clobber.mir +++ b/llvm/test/DebugInfo/MIR/X86/live-debug-values-stack-clobber.mir @@ -195,6 +195,6 @@ body: | $r14 = frame-destroy POP64r implicit-def $rsp, implicit $rsp, debug-location !28 $r15 = frame-destroy POP64r implicit-def $rsp, implicit $rsp, debug-location !28 $rbp = frame-destroy POP64r implicit-def $rsp, implicit $rsp, debug-location !28 - RETQ $rax, debug-location !28 + RET64 $rax, debug-location !28 ... diff --git a/llvm/test/DebugInfo/MIR/X86/live-debug-values.mir b/llvm/test/DebugInfo/MIR/X86/live-debug-values.mir index be6fe2f74d3d0..b91d558e7d96b 100644 --- a/llvm/test/DebugInfo/MIR/X86/live-debug-values.mir +++ b/llvm/test/DebugInfo/MIR/X86/live-debug-values.mir @@ -252,6 +252,6 @@ body: | CALL64pcrel32 @printf, csr_64, implicit $rsp, implicit $rdi, implicit $esi, implicit $al, implicit-def $rsp, implicit-def dead $eax, debug-location !46 $eax = XOR32rr undef $eax, undef $eax, implicit-def dead $eflags, debug-location !47 $rbx = POP64r implicit-def $rsp, implicit $rsp, debug-location !47 - RETQ $eax, debug-location !47 + RET64 $eax, debug-location !47 ... diff --git a/llvm/test/DebugInfo/MIR/X86/live-debug-vars-unused-arg-debugonly.mir b/llvm/test/DebugInfo/MIR/X86/live-debug-vars-unused-arg-debugonly.mir index f58b6d71bd110..64ca80f4c5fac 100644 --- a/llvm/test/DebugInfo/MIR/X86/live-debug-vars-unused-arg-debugonly.mir +++ b/llvm/test/DebugInfo/MIR/X86/live-debug-vars-unused-arg-debugonly.mir @@ -57,7 +57,7 @@ !0 = !DIGlobalVariableExpression(var: !1, expr: !DIExpression()) !1 = distinct !DIGlobalVariable(name: "bar", scope: !2, file: !3, line: 3, type: !6, isLocal: false, isDefinition: true) !2 = distinct !DICompileUnit(language: DW_LANG_C99, file: !3, producer: "clang version 6.0.0 (trunk 313866) (llvm/trunk 313875)", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !4, globals: !5) - !3 = !DIFile(filename: "live-debug-vars-unused-arg.c", directory: "/repo/uabbpet/master") + !3 = !DIFile(filename: "live-debug-vars-unused-arg.c", directory: "/") !4 = !{} !5 = !{!0} !6 = !DICompositeType(tag: DW_TAG_array_type, baseType: !7, size: 64, elements: !8) diff --git a/llvm/test/DebugInfo/MIR/X86/live-debug-vars-unused-arg.mir b/llvm/test/DebugInfo/MIR/X86/live-debug-vars-unused-arg.mir index d57c224d57f71..c61f7b6fc49cb 100644 --- a/llvm/test/DebugInfo/MIR/X86/live-debug-vars-unused-arg.mir +++ b/llvm/test/DebugInfo/MIR/X86/live-debug-vars-unused-arg.mir @@ -55,7 +55,7 @@ !0 = !DIGlobalVariableExpression(var: !1, expr: !DIExpression()) !1 = distinct !DIGlobalVariable(name: "bar", scope: !2, file: !3, line: 3, type: !6, isLocal: false, isDefinition: true) !2 = distinct !DICompileUnit(language: DW_LANG_C99, file: !3, producer: "clang version 6.0.0 (trunk 313866) (llvm/trunk 313875)", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !4, globals: !5) - !3 = !DIFile(filename: "live-debug-vars-unused-arg.c", directory: "/repo/uabbpet/master") + !3 = !DIFile(filename: "live-debug-vars-unused-arg.c", directory: "/") !4 = !{} !5 = !{!0} !6 = !DICompositeType(tag: DW_TAG_array_type, baseType: !7, size: 64, elements: !8) diff --git a/llvm/test/DebugInfo/MIR/X86/livedebugvalues-ignores-metaInstructions.mir b/llvm/test/DebugInfo/MIR/X86/livedebugvalues-ignores-metaInstructions.mir index 89c7d55d95c6e..b887f9b44fd77 100644 --- a/llvm/test/DebugInfo/MIR/X86/livedebugvalues-ignores-metaInstructions.mir +++ b/llvm/test/DebugInfo/MIR/X86/livedebugvalues-ignores-metaInstructions.mir @@ -60,5 +60,5 @@ body: | successors: %bb.3 renamable $ebx = KILL $ebx bb.3.bb3: - RETQ $eax, debug-location !17 + RET64 $eax, debug-location !17 ... diff --git a/llvm/test/DebugInfo/MIR/X86/livedebugvalues_basic_diamond.mir b/llvm/test/DebugInfo/MIR/X86/livedebugvalues_basic_diamond.mir index 89b4ac63e08a1..b5b2fe4a463f0 100644 --- a/llvm/test/DebugInfo/MIR/X86/livedebugvalues_basic_diamond.mir +++ b/llvm/test/DebugInfo/MIR/X86/livedebugvalues_basic_diamond.mir @@ -63,5 +63,5 @@ body: | successors: %bb.3 $eax = MOV32ri 0, debug-location !17 bb.3.bb3: - RETQ $eax, debug-location !17 + RET64 $eax, debug-location !17 ... diff --git a/llvm/test/DebugInfo/MIR/X86/livedebugvalues_basic_diamond_match_clobber.mir b/llvm/test/DebugInfo/MIR/X86/livedebugvalues_basic_diamond_match_clobber.mir index bd6dacc2fed1a..32ab6efa98155 100644 --- a/llvm/test/DebugInfo/MIR/X86/livedebugvalues_basic_diamond_match_clobber.mir +++ b/llvm/test/DebugInfo/MIR/X86/livedebugvalues_basic_diamond_match_clobber.mir @@ -63,5 +63,5 @@ body: | successors: %bb.3 $ebx = MOV32ri 0, debug-location !17 bb.3.bb3: - RETQ $eax, debug-location !17 + RET64 $eax, debug-location !17 ... diff --git a/llvm/test/DebugInfo/MIR/X86/livedebugvalues_basic_diamond_match_move.mir b/llvm/test/DebugInfo/MIR/X86/livedebugvalues_basic_diamond_match_move.mir index 05a1955532aaa..4c3747be64706 100644 --- a/llvm/test/DebugInfo/MIR/X86/livedebugvalues_basic_diamond_match_move.mir +++ b/llvm/test/DebugInfo/MIR/X86/livedebugvalues_basic_diamond_match_move.mir @@ -69,5 +69,5 @@ body: | $eax = MOV32ri 0, debug-location !17 DBG_VALUE $eax, $noreg, !16, !DIExpression(), debug-location !17 bb.3.bb3: - RETQ $eax, debug-location !17 + RET64 $eax, debug-location !17 ... diff --git a/llvm/test/DebugInfo/MIR/X86/livedebugvalues_basic_diamond_one_clobber.mir b/llvm/test/DebugInfo/MIR/X86/livedebugvalues_basic_diamond_one_clobber.mir index ee843492c7b95..8f37c88fe9850 100644 --- a/llvm/test/DebugInfo/MIR/X86/livedebugvalues_basic_diamond_one_clobber.mir +++ b/llvm/test/DebugInfo/MIR/X86/livedebugvalues_basic_diamond_one_clobber.mir @@ -61,5 +61,5 @@ body: | successors: %bb.3 $eax = MOV32ri 0, debug-location !17 bb.3.bb3: - RETQ $eax, debug-location !17 + RET64 $eax, debug-location !17 ... diff --git a/llvm/test/DebugInfo/MIR/X86/livedebugvalues_basic_diamond_one_move.mir b/llvm/test/DebugInfo/MIR/X86/livedebugvalues_basic_diamond_one_move.mir index fe3924bf846ae..33b918c2eaa7d 100644 --- a/llvm/test/DebugInfo/MIR/X86/livedebugvalues_basic_diamond_one_move.mir +++ b/llvm/test/DebugInfo/MIR/X86/livedebugvalues_basic_diamond_one_move.mir @@ -64,5 +64,5 @@ body: | successors: %bb.3 $eax = MOV32ri 0, debug-location !17 bb.3.bb3: - RETQ $eax, debug-location !17 + RET64 $eax, debug-location !17 ... diff --git a/llvm/test/DebugInfo/MIR/X86/livedebugvalues_basic_loop.mir b/llvm/test/DebugInfo/MIR/X86/livedebugvalues_basic_loop.mir index d7eb4bd48ab3a..7f85b98c20134 100644 --- a/llvm/test/DebugInfo/MIR/X86/livedebugvalues_basic_loop.mir +++ b/llvm/test/DebugInfo/MIR/X86/livedebugvalues_basic_loop.mir @@ -62,5 +62,5 @@ body: | $eax = MOV32ri 0, debug-location !17 JCC_1 %bb.1, 4, implicit killed $eflags bb.3.bb3: - RETQ $eax, debug-location !17 + RET64 $eax, debug-location !17 ... diff --git a/llvm/test/DebugInfo/MIR/X86/livedebugvalues_bb_to_bb.mir b/llvm/test/DebugInfo/MIR/X86/livedebugvalues_bb_to_bb.mir index f48940a24861b..808d3a41a05a2 100644 --- a/llvm/test/DebugInfo/MIR/X86/livedebugvalues_bb_to_bb.mir +++ b/llvm/test/DebugInfo/MIR/X86/livedebugvalues_bb_to_bb.mir @@ -61,5 +61,5 @@ body: | successors: %bb.3 $eax = MOV32ri 0, debug-location !17 bb.3.bb3: - RETQ $eax, debug-location !17 + RET64 $eax, debug-location !17 ... diff --git a/llvm/test/DebugInfo/MIR/X86/livedebugvalues_bb_to_bb_clobbered.mir b/llvm/test/DebugInfo/MIR/X86/livedebugvalues_bb_to_bb_clobbered.mir index f969179b76a7d..935dcc494fd86 100644 --- a/llvm/test/DebugInfo/MIR/X86/livedebugvalues_bb_to_bb_clobbered.mir +++ b/llvm/test/DebugInfo/MIR/X86/livedebugvalues_bb_to_bb_clobbered.mir @@ -57,5 +57,5 @@ body: | successors: %bb.3 $eax = MOV32ri 0, debug-location !17 bb.3.bb3: - RETQ $eax, debug-location !17 + RET64 $eax, debug-location !17 ... diff --git a/llvm/test/DebugInfo/MIR/X86/livedebugvalues_bb_to_bb_move_to_clobber.mir b/llvm/test/DebugInfo/MIR/X86/livedebugvalues_bb_to_bb_move_to_clobber.mir index 339d21380fa64..33246054dca82 100644 --- a/llvm/test/DebugInfo/MIR/X86/livedebugvalues_bb_to_bb_move_to_clobber.mir +++ b/llvm/test/DebugInfo/MIR/X86/livedebugvalues_bb_to_bb_move_to_clobber.mir @@ -64,5 +64,5 @@ body: | successors: %bb.3 $eax = MOV32ri 0, debug-location !17 bb.3.bb3: - RETQ $eax, debug-location !17 + RET64 $eax, debug-location !17 ... diff --git a/llvm/test/DebugInfo/MIR/X86/livedebugvalues_load_in_loop.mir b/llvm/test/DebugInfo/MIR/X86/livedebugvalues_load_in_loop.mir index f80bf367f3fed..dd0288ddadf0a 100644 --- a/llvm/test/DebugInfo/MIR/X86/livedebugvalues_load_in_loop.mir +++ b/llvm/test/DebugInfo/MIR/X86/livedebugvalues_load_in_loop.mir @@ -109,5 +109,5 @@ body: | liveins: $rax, $rbp bb.6: liveins: $rax, $rbp - RETQ $rax, debug-location !17 + RET64 $rax, debug-location !17 ... diff --git a/llvm/test/DebugInfo/MIR/X86/livedebugvalues_loop_break.mir b/llvm/test/DebugInfo/MIR/X86/livedebugvalues_loop_break.mir index 0d9cc1905134a..c9f972646f9ba 100644 --- a/llvm/test/DebugInfo/MIR/X86/livedebugvalues_loop_break.mir +++ b/llvm/test/DebugInfo/MIR/X86/livedebugvalues_loop_break.mir @@ -70,5 +70,5 @@ body: | $eax = MOV32ri 0, debug-location !17 JCC_1 %bb.1, 4, implicit killed $eflags bb.4.bb4: - RETQ $eax, debug-location !17 + RET64 $eax, debug-location !17 ... diff --git a/llvm/test/DebugInfo/MIR/X86/livedebugvalues_loop_break_clobbered.mir b/llvm/test/DebugInfo/MIR/X86/livedebugvalues_loop_break_clobbered.mir index b4dea6ccbf700..92fe5a041eb47 100644 --- a/llvm/test/DebugInfo/MIR/X86/livedebugvalues_loop_break_clobbered.mir +++ b/llvm/test/DebugInfo/MIR/X86/livedebugvalues_loop_break_clobbered.mir @@ -62,5 +62,5 @@ body: | $eax = MOV32ri 0, debug-location !17 JCC_1 %bb.1, 4, implicit killed $eflags bb.4.bb4: - RETQ $eax, debug-location !17 + RET64 $eax, debug-location !17 ... diff --git a/llvm/test/DebugInfo/MIR/X86/livedebugvalues_loop_clobbered.mir b/llvm/test/DebugInfo/MIR/X86/livedebugvalues_loop_clobbered.mir index 47f114f7fe1b8..14d9196ee13a0 100644 --- a/llvm/test/DebugInfo/MIR/X86/livedebugvalues_loop_clobbered.mir +++ b/llvm/test/DebugInfo/MIR/X86/livedebugvalues_loop_clobbered.mir @@ -59,5 +59,5 @@ body: | JCC_1 %bb.1, 4, implicit killed $eflags bb.3.bb3: - RETQ $eax, debug-location !17 + RET64 $eax, debug-location !17 ... diff --git a/llvm/test/DebugInfo/MIR/X86/livedebugvalues_loop_diamond.mir b/llvm/test/DebugInfo/MIR/X86/livedebugvalues_loop_diamond.mir index 1e410054dc1cb..a0b1075ae30a4 100644 --- a/llvm/test/DebugInfo/MIR/X86/livedebugvalues_loop_diamond.mir +++ b/llvm/test/DebugInfo/MIR/X86/livedebugvalues_loop_diamond.mir @@ -78,5 +78,5 @@ body: | $eax = MOV32ri 0, debug-location !17 JCC_1 %bb.1, 4, implicit killed $eflags bb.5.bb5: - RETQ $eax, debug-location !17 + RET64 $eax, debug-location !17 ... diff --git a/llvm/test/DebugInfo/MIR/X86/livedebugvalues_loop_diamond_clobber.mir b/llvm/test/DebugInfo/MIR/X86/livedebugvalues_loop_diamond_clobber.mir index 7e18939870bc7..2e1f4292042d6 100644 --- a/llvm/test/DebugInfo/MIR/X86/livedebugvalues_loop_diamond_clobber.mir +++ b/llvm/test/DebugInfo/MIR/X86/livedebugvalues_loop_diamond_clobber.mir @@ -69,5 +69,5 @@ body: | $eax = MOV32ri 0, debug-location !17 JCC_1 %bb.1, 4, implicit killed $eflags bb.5.bb5: - RETQ $eax, debug-location !17 + RET64 $eax, debug-location !17 ... diff --git a/llvm/test/DebugInfo/MIR/X86/livedebugvalues_loop_diamond_move.mir b/llvm/test/DebugInfo/MIR/X86/livedebugvalues_loop_diamond_move.mir index 7861e7dfa9c62..70a7b379e19b0 100644 --- a/llvm/test/DebugInfo/MIR/X86/livedebugvalues_loop_diamond_move.mir +++ b/llvm/test/DebugInfo/MIR/X86/livedebugvalues_loop_diamond_move.mir @@ -79,5 +79,5 @@ body: | $ebx = MOV32ri 0, debug-location !17 JCC_1 %bb.1, 4, implicit killed $eflags bb.5.bb5: - RETQ $eax, debug-location !17 + RET64 $eax, debug-location !17 ... diff --git a/llvm/test/DebugInfo/MIR/X86/livedebugvalues_loop_early_clobber.mir b/llvm/test/DebugInfo/MIR/X86/livedebugvalues_loop_early_clobber.mir index ff66eba762cb6..01d2735d4d926 100644 --- a/llvm/test/DebugInfo/MIR/X86/livedebugvalues_loop_early_clobber.mir +++ b/llvm/test/DebugInfo/MIR/X86/livedebugvalues_loop_early_clobber.mir @@ -56,5 +56,5 @@ body: | $eax = MOV32ri 0, debug-location !17 JCC_1 %bb.1, 4, implicit killed $eflags bb.3.bb3: - RETQ $eax, debug-location !17 + RET64 $eax, debug-location !17 ... diff --git a/llvm/test/DebugInfo/MIR/X86/livedebugvalues_loop_terminated.mir b/llvm/test/DebugInfo/MIR/X86/livedebugvalues_loop_terminated.mir index bbe1d4ceda607..0011593d4f504 100644 --- a/llvm/test/DebugInfo/MIR/X86/livedebugvalues_loop_terminated.mir +++ b/llvm/test/DebugInfo/MIR/X86/livedebugvalues_loop_terminated.mir @@ -63,5 +63,5 @@ body: | JCC_1 %bb.1, 4, implicit killed $eflags bb.3.bb3: - RETQ $eax, debug-location !17 + RET64 $eax, debug-location !17 ... diff --git a/llvm/test/DebugInfo/MIR/X86/livedebugvalues_loop_two_backedge.mir b/llvm/test/DebugInfo/MIR/X86/livedebugvalues_loop_two_backedge.mir index 83f7235558947..a018c66160f1f 100644 --- a/llvm/test/DebugInfo/MIR/X86/livedebugvalues_loop_two_backedge.mir +++ b/llvm/test/DebugInfo/MIR/X86/livedebugvalues_loop_two_backedge.mir @@ -70,5 +70,5 @@ body: | $eax = MOV32ri 0, debug-location !17 JCC_1 %bb.1, 4, implicit killed $eflags bb.4.bb4: - RETQ $eax, debug-location !17 + RET64 $eax, debug-location !17 ... diff --git a/llvm/test/DebugInfo/MIR/X86/livedebugvalues_loop_two_backedge_clobbered.mir b/llvm/test/DebugInfo/MIR/X86/livedebugvalues_loop_two_backedge_clobbered.mir index 0148af9155597..d40553322e5dc 100644 --- a/llvm/test/DebugInfo/MIR/X86/livedebugvalues_loop_two_backedge_clobbered.mir +++ b/llvm/test/DebugInfo/MIR/X86/livedebugvalues_loop_two_backedge_clobbered.mir @@ -62,5 +62,5 @@ body: | $eax = MOV32ri 0, debug-location !17 JCC_1 %bb.1, 4, implicit killed $eflags bb.4.bb4: - RETQ $eax, debug-location !17 + RET64 $eax, debug-location !17 ... diff --git a/llvm/test/DebugInfo/MIR/X86/livedebugvalues_loop_within_loop.mir b/llvm/test/DebugInfo/MIR/X86/livedebugvalues_loop_within_loop.mir index 7ff781a07fce6..32d85e882dbfb 100644 --- a/llvm/test/DebugInfo/MIR/X86/livedebugvalues_loop_within_loop.mir +++ b/llvm/test/DebugInfo/MIR/X86/livedebugvalues_loop_within_loop.mir @@ -77,5 +77,5 @@ body: | $eax = MOV32ri 0, debug-location !17 JCC_1 %bb.1, 4, implicit killed $eflags bb.5.bb5: - RETQ $eax, debug-location !17 + RET64 $eax, debug-location !17 ... diff --git a/llvm/test/DebugInfo/MIR/X86/livedebugvalues_loop_within_loop_clobbered.mir b/llvm/test/DebugInfo/MIR/X86/livedebugvalues_loop_within_loop_clobbered.mir index 78330d52c7cfe..4b143414a41c0 100644 --- a/llvm/test/DebugInfo/MIR/X86/livedebugvalues_loop_within_loop_clobbered.mir +++ b/llvm/test/DebugInfo/MIR/X86/livedebugvalues_loop_within_loop_clobbered.mir @@ -68,5 +68,5 @@ body: | $eax = MOV32ri 0, debug-location !17 JCC_1 %bb.1, 4, implicit killed $eflags bb.5.bb5: - RETQ $eax, debug-location !17 + RET64 $eax, debug-location !17 ... diff --git a/llvm/test/DebugInfo/MIR/X86/livedebugvalues_loop_within_loop_moved.mir b/llvm/test/DebugInfo/MIR/X86/livedebugvalues_loop_within_loop_moved.mir index fca7f83a14be4..3b25b8313e40a 100644 --- a/llvm/test/DebugInfo/MIR/X86/livedebugvalues_loop_within_loop_moved.mir +++ b/llvm/test/DebugInfo/MIR/X86/livedebugvalues_loop_within_loop_moved.mir @@ -71,5 +71,5 @@ body: | $eax = MOV32ri 0, debug-location !17 JCC_1 %bb.1, 4, implicit killed $eflags bb.5.bb5: - RETQ $eax, debug-location !17 + RET64 $eax, debug-location !17 ... diff --git a/llvm/test/DebugInfo/MIR/X86/livedebugvalues_loop_within_loop_outer_moved.mir b/llvm/test/DebugInfo/MIR/X86/livedebugvalues_loop_within_loop_outer_moved.mir index baade395c6ede..013d9541516f0 100644 --- a/llvm/test/DebugInfo/MIR/X86/livedebugvalues_loop_within_loop_outer_moved.mir +++ b/llvm/test/DebugInfo/MIR/X86/livedebugvalues_loop_within_loop_outer_moved.mir @@ -73,5 +73,5 @@ body: | DBG_VALUE $eax, $noreg, !16, !DIExpression(), debug-location !17 JCC_1 %bb.1, 4, implicit killed $eflags bb.5.bb5: - RETQ $eax, debug-location !17 + RET64 $eax, debug-location !17 ... diff --git a/llvm/test/DebugInfo/MIR/X86/livedebugvalues_many_loop_heads.mir b/llvm/test/DebugInfo/MIR/X86/livedebugvalues_many_loop_heads.mir index f5332c29c837f..c204b5dcf82c4 100644 --- a/llvm/test/DebugInfo/MIR/X86/livedebugvalues_many_loop_heads.mir +++ b/llvm/test/DebugInfo/MIR/X86/livedebugvalues_many_loop_heads.mir @@ -191,6 +191,6 @@ body: | bb.18: liveins: $rsi, $rdi, $eflags - RETQ + RET64 ... diff --git a/llvm/test/DebugInfo/MIR/X86/livedebugvars-crossbb-interval.mir b/llvm/test/DebugInfo/MIR/X86/livedebugvars-crossbb-interval.mir index 14ff154289416..1ace13da0d22b 100644 --- a/llvm/test/DebugInfo/MIR/X86/livedebugvars-crossbb-interval.mir +++ b/llvm/test/DebugInfo/MIR/X86/livedebugvars-crossbb-interval.mir @@ -133,6 +133,6 @@ body: | bb.4.exit: $eax = COPY %5, debug-location !18 - RETQ implicit $eax, debug-location !18 + RET64 implicit $eax, debug-location !18 ... diff --git a/llvm/test/DebugInfo/MIR/X86/multiple-param-dbg-value-entry.mir b/llvm/test/DebugInfo/MIR/X86/multiple-param-dbg-value-entry.mir index 3b053da5925ee..262eb06697760 100644 --- a/llvm/test/DebugInfo/MIR/X86/multiple-param-dbg-value-entry.mir +++ b/llvm/test/DebugInfo/MIR/X86/multiple-param-dbg-value-entry.mir @@ -80,6 +80,6 @@ body: | MOV32mr $rip, 1, $noreg, @global, $noreg, killed renamable $edi, debug-location !18 :: (store (s32) into @global) INLINEASM &"", 1, 12, implicit-def dead early-clobber $edi, 12, implicit-def dead early-clobber $esi, 12, implicit-def dead early-clobber $edx, 12, implicit-def dead early-clobber $df, 12, implicit-def dead early-clobber $fpsw, 12, implicit-def dead early-clobber $eflags, !19, debug-location !18 $eax = MOV32ri 123, debug-location !18 - RETQ killed $eax, debug-location !18 + RET64 killed $eax, debug-location !18 ... diff --git a/llvm/test/DebugInfo/MIR/X86/piece-entryval.mir b/llvm/test/DebugInfo/MIR/X86/piece-entryval.mir index 0f63faa37e9f9..e711f05d2ae73 100644 --- a/llvm/test/DebugInfo/MIR/X86/piece-entryval.mir +++ b/llvm/test/DebugInfo/MIR/X86/piece-entryval.mir @@ -49,6 +49,6 @@ body: | frame-setup PUSH64r killed $rbp, implicit-def $rsp, implicit $rsp $rbp = frame-setup MOV64rr $rsp $rbp = frame-destroy POP64r implicit-def $rsp, implicit $rsp, debug-location !16 - RETQ debug-location !16 + RET64 debug-location !16 ... diff --git a/llvm/test/DebugInfo/MIR/X86/propagate-entry-value-cross-bbs.mir b/llvm/test/DebugInfo/MIR/X86/propagate-entry-value-cross-bbs.mir index 0bd8d012940cf..d80e214843cd0 100644 --- a/llvm/test/DebugInfo/MIR/X86/propagate-entry-value-cross-bbs.mir +++ b/llvm/test/DebugInfo/MIR/X86/propagate-entry-value-cross-bbs.mir @@ -171,6 +171,6 @@ body: | CFI_INSTRUCTION def_cfa_offset 16, debug-location !34 $rbp = frame-destroy POP64r implicit-def $rsp, implicit $rsp, debug-location !34 CFI_INSTRUCTION def_cfa_offset 8, debug-location !34 - RETQ killed $eax, debug-location !34 + RET64 killed $eax, debug-location !34 ... diff --git a/llvm/test/DebugInfo/MIR/X86/remove-entry-value-from-loop.mir b/llvm/test/DebugInfo/MIR/X86/remove-entry-value-from-loop.mir index 981fb2da634fc..6f84074231d11 100644 --- a/llvm/test/DebugInfo/MIR/X86/remove-entry-value-from-loop.mir +++ b/llvm/test/DebugInfo/MIR/X86/remove-entry-value-from-loop.mir @@ -154,7 +154,7 @@ body: | liveins: $eax DBG_VALUE $eax, $noreg, !15, !DIExpression(), debug-location !17 - RETQ $eax, debug-location !32 + RET64 $eax, debug-location !32 bb.3.if.then: liveins: $eax, $edi @@ -162,6 +162,6 @@ body: | renamable $eax = nsw IMUL32rr killed renamable $eax, killed renamable $edi, implicit-def dead $eflags, debug-location !28 DBG_VALUE $eax, $noreg, !15, !DIExpression(), debug-location !17 DBG_VALUE $eax, $noreg, !15, !DIExpression(), debug-location !17 - RETQ $eax, debug-location !32 + RET64 $eax, debug-location !32 ... diff --git a/llvm/test/DebugInfo/MIR/X86/remove-redundant-dbg-vals.mir b/llvm/test/DebugInfo/MIR/X86/remove-redundant-dbg-vals.mir index fba3d9ee15040..9c51dac7431e5 100644 --- a/llvm/test/DebugInfo/MIR/X86/remove-redundant-dbg-vals.mir +++ b/llvm/test/DebugInfo/MIR/X86/remove-redundant-dbg-vals.mir @@ -169,7 +169,7 @@ body: | $eax = XOR32rr undef $eax, undef $eax, implicit-def dead $eflags, debug-location !22 $rcx = frame-destroy POP64r implicit-def $rsp, implicit $rsp, debug-location !22 CFI_INSTRUCTION def_cfa_offset 8, debug-location !22 - RETQ killed $eax, debug-location !22 + RET64 killed $eax, debug-location !22 ... --- @@ -192,7 +192,7 @@ body: | $eax = XOR32rr undef $eax, undef $eax, implicit-def dead $eflags, debug-location !22 $rcx = frame-destroy POP64r implicit-def $rsp, implicit $rsp, debug-location !22 CFI_INSTRUCTION def_cfa_offset 8, debug-location !22 - RETQ killed $eax, debug-location !22 + RET64 killed $eax, debug-location !22 ... --- @@ -214,7 +214,7 @@ body: | DBG_VALUE $edi, $noreg, !13, !DIExpression(DW_OP_LLVM_fragment, 0, 32), debug-location !14 $rcx = frame-destroy POP64r implicit-def $rsp, implicit $rsp, debug-location !22 CFI_INSTRUCTION def_cfa_offset 8, debug-location !22 - RETQ killed $eax, debug-location !22 + RET64 killed $eax, debug-location !22 ... --- @@ -236,7 +236,7 @@ body: | DBG_VALUE $edi, $noreg, !13, !DIExpression(DW_OP_LLVM_fragment, 0, 32), debug-location !14 $rcx = frame-destroy POP64r implicit-def $rsp, implicit $rsp, debug-location !22 CFI_INSTRUCTION def_cfa_offset 8, debug-location !22 - RETQ killed $eax, debug-location !22 + RET64 killed $eax, debug-location !22 ... --- @@ -258,7 +258,7 @@ body: | DBG_VALUE $edi, $noreg, !13, !DIExpression(DW_OP_LLVM_fragment, 0, 32), debug-location !14 $rcx = frame-destroy POP64r implicit-def $rsp, implicit $rsp, debug-location !22 CFI_INSTRUCTION def_cfa_offset 8, debug-location !22 - RETQ killed $eax, debug-location !22 + RET64 killed $eax, debug-location !22 ... --- @@ -279,6 +279,6 @@ body: | $eax = XOR32rr undef $eax, undef $eax, implicit-def dead $eflags, debug-location !22 $rcx = frame-destroy POP64r implicit-def $rsp, implicit $rsp, debug-location !22 CFI_INSTRUCTION def_cfa_offset 8, debug-location !22 - RETQ killed $eax, debug-location !22 + RET64 killed $eax, debug-location !22 ... diff --git a/llvm/test/DebugInfo/MIR/X86/unreachable-block-call-site.mir b/llvm/test/DebugInfo/MIR/X86/unreachable-block-call-site.mir index bfc5c2be127e7..e77192db977d7 100644 --- a/llvm/test/DebugInfo/MIR/X86/unreachable-block-call-site.mir +++ b/llvm/test/DebugInfo/MIR/X86/unreachable-block-call-site.mir @@ -63,6 +63,6 @@ body: | $eax = XOR32rr undef $eax, undef $eax, implicit-def dead $eflags, debug-location !18 $rcx = frame-destroy POP64r implicit-def $rsp, implicit $rsp, debug-location !18 CFI_INSTRUCTION def_cfa_offset 8, debug-location !18 - RETQ killed $eax, debug-location !18 + RET64 killed $eax, debug-location !18 ... diff --git a/llvm/test/DebugInfo/X86/bbjoin.ll b/llvm/test/DebugInfo/X86/bbjoin.ll index c175108f38422..b21990469febd 100644 --- a/llvm/test/DebugInfo/X86/bbjoin.ll +++ b/llvm/test/DebugInfo/X86/bbjoin.ll @@ -17,7 +17,7 @@ ; CHECK: DBG_VALUE 43, $noreg, ![[X]], ; CHECK: bb.2.if.end: ; CHECK-NOT: DBG_VALUE 23, $noreg, ![[X]], -; CHECK: RETQ $eax +; CHECK: RET64 $eax target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.11.0" diff --git a/llvm/test/DebugInfo/X86/dbg_entity_calc_ignores_KILL_instruction_at_return.mir b/llvm/test/DebugInfo/X86/dbg_entity_calc_ignores_KILL_instruction_at_return.mir index bfc9df5695e88..dcdaed64e88a6 100644 --- a/llvm/test/DebugInfo/X86/dbg_entity_calc_ignores_KILL_instruction_at_return.mir +++ b/llvm/test/DebugInfo/X86/dbg_entity_calc_ignores_KILL_instruction_at_return.mir @@ -71,6 +71,6 @@ body: | ; CHECK: [0x0000000000000003, 0x0000000000000004): DW_OP_reg0 RAX) ; CHECK-NEXT: DW_AT_name ("result") renamable $eax = KILL killed $eax, implicit-def $rax - RETQ killed $eax, debug-location !24 + RET64 killed $eax, debug-location !24 ... diff --git a/llvm/test/DebugInfo/X86/dbg_entity_calc_ignores_KILL_instruction_still_clobbers.mir b/llvm/test/DebugInfo/X86/dbg_entity_calc_ignores_KILL_instruction_still_clobbers.mir index 0f01a51a482f5..2828d640c23d1 100644 --- a/llvm/test/DebugInfo/X86/dbg_entity_calc_ignores_KILL_instruction_still_clobbers.mir +++ b/llvm/test/DebugInfo/X86/dbg_entity_calc_ignores_KILL_instruction_still_clobbers.mir @@ -74,6 +74,6 @@ body: | $edi = MOV32rr $eax, debug-location !24 $eax = MOV32rr $eax, debug-location !24 $edi = MOV32rr $eax, debug-location !24 - RETQ killed $eax, debug-location !24 + RET64 killed $eax, debug-location !24 ... diff --git a/llvm/test/DebugInfo/X86/dbg_value_list_clobbers.mir b/llvm/test/DebugInfo/X86/dbg_value_list_clobbers.mir index 601067a80d013..4114efc3f8587 100644 --- a/llvm/test/DebugInfo/X86/dbg_value_list_clobbers.mir +++ b/llvm/test/DebugInfo/X86/dbg_value_list_clobbers.mir @@ -80,5 +80,5 @@ body: | DBG_VALUE_LIST !12, !DIExpression(DW_OP_LLVM_arg, 0, DW_OP_LLVM_arg, 1, DW_OP_plus, DW_OP_stack_value, DW_OP_LLVM_fragment, 16, 16), $eax, $ecx, debug-location !15 ; CHECK-NEXT: [{{.*}}): DW_OP_breg0 RAX+0, DW_OP_constu 0xffffffff, DW_OP_and, DW_OP_stack_value, DW_OP_piece 0x2, DW_OP_breg0 RAX+0, DW_OP_constu 0xffffffff, DW_OP_and, DW_OP_breg2 RCX+0, DW_OP_constu 0xffffffff, DW_OP_and, DW_OP_plus, DW_OP_stack_value, DW_OP_piece 0x2 - RETQ debug-location !15 + RET64 debug-location !15 ... diff --git a/llvm/test/DebugInfo/X86/dbg_value_list_emission.mir b/llvm/test/DebugInfo/X86/dbg_value_list_emission.mir index e3380cb42a4ba..e447fa82f95b3 100644 --- a/llvm/test/DebugInfo/X86/dbg_value_list_emission.mir +++ b/llvm/test/DebugInfo/X86/dbg_value_list_emission.mir @@ -97,5 +97,5 @@ body: | ; CHECK-NEXT: DW_AT_name ("localh") ; CHECK-NOT: DW_AT_location - RETQ debug-location !15 + RET64 debug-location !15 ... diff --git a/llvm/test/DebugInfo/X86/debug-loc-asan.mir b/llvm/test/DebugInfo/X86/debug-loc-asan.mir index 79926c3bcfc17..9dd5fc5fa6cd0 100644 --- a/llvm/test/DebugInfo/X86/debug-loc-asan.mir +++ b/llvm/test/DebugInfo/X86/debug-loc-asan.mir @@ -341,6 +341,6 @@ body: | $rsp = MOV64rr $rbp, debug-location !12 $rbp = frame-destroy POP64r implicit-def $rsp, implicit $rsp, debug-location !12 CFI_INSTRUCTION def_cfa $rsp, 8, debug-location !12 - RETQ implicit killed $eax, debug-location !12 + RET64 implicit killed $eax, debug-location !12 ... diff --git a/llvm/test/DebugInfo/X86/debug-loc-offset.mir b/llvm/test/DebugInfo/X86/debug-loc-offset.mir index e4a81ce68a7f2..95f524fc162cf 100644 --- a/llvm/test/DebugInfo/X86/debug-loc-offset.mir +++ b/llvm/test/DebugInfo/X86/debug-loc-offset.mir @@ -207,7 +207,7 @@ body: | $esp = frame-destroy ADD32ri8 $esp, 4, implicit-def dead $eflags, debug-location !16 $ebp = frame-destroy POP32r implicit-def $esp, implicit $esp, debug-location !16 CFI_INSTRUCTION def_cfa $esp, 4, debug-location !16 - RETL implicit killed $eax, debug-location !16 + RET32 implicit killed $eax, debug-location !16 ... --- @@ -271,6 +271,6 @@ body: | $esp = frame-destroy ADD32ri8 $esp, 24, implicit-def dead $eflags, debug-location !30 $ebp = frame-destroy POP32r implicit-def $esp, implicit $esp, debug-location !30 CFI_INSTRUCTION def_cfa $esp, 4, debug-location !30 - RETL debug-location !30 + RET32 debug-location !30 ... diff --git a/llvm/test/DebugInfo/X86/dw_op_constu.mir b/llvm/test/DebugInfo/X86/dw_op_constu.mir index 69e51b141b9e9..be404f0050a4f 100644 --- a/llvm/test/DebugInfo/X86/dw_op_constu.mir +++ b/llvm/test/DebugInfo/X86/dw_op_constu.mir @@ -73,7 +73,7 @@ body: | renamable $rcx = MOV64rm renamable $noreg, 1, $noreg, 0, $fs DBG_VALUE renamable $rcx, 0, !9, !DIExpression(DW_OP_constu, 18446744073709551614, DW_OP_minus), debug-location !14 - RETQ debug-location !14 + RET64 debug-location !14 bb.1: ;-------------------------- DW_OP_plus ------------------------------- @@ -106,7 +106,7 @@ body: | renamable $rcx = MOV64rm renamable $noreg, 1, $noreg, 0, $fs DBG_VALUE renamable $rcx, 0, !9, !DIExpression(DW_OP_constu, 18446744073709551614, DW_OP_plus), debug-location !14 - RETQ debug-location !14 + RET64 debug-location !14 bb.2: ;-------------------------- DW_OP_plus_uconst ------------------------------- @@ -139,7 +139,7 @@ body: | renamable $rcx = MOV64rm renamable $noreg, 1, $noreg, 0, $fs DBG_VALUE renamable $rcx, 0, !9, !DIExpression(DW_OP_plus_uconst, 18446744073709551614), debug-location !14 - RETQ debug-location !14 + RET64 debug-location !14 ... diff --git a/llvm/test/DebugInfo/X86/dw_op_minus.mir b/llvm/test/DebugInfo/X86/dw_op_minus.mir index 50491b1b13c1c..2a408c97bf3d2 100644 --- a/llvm/test/DebugInfo/X86/dw_op_minus.mir +++ b/llvm/test/DebugInfo/X86/dw_op_minus.mir @@ -114,6 +114,6 @@ body: | MOV64mr killed renamable $rax, 1, $noreg, 0, $fs, killed renamable $rcx, debug-location !18 :: (store (s64) into @__safestack_unsafe_stack_ptr) $rsp = frame-destroy ADD64ri8 $rsp, 24, implicit-def dead $eflags, debug-location !18 CFI_INSTRUCTION def_cfa_offset 8, debug-location !18 - RETQ debug-location !18 + RET64 debug-location !18 ... diff --git a/llvm/test/DebugInfo/X86/live-debug-values-constprop.mir b/llvm/test/DebugInfo/X86/live-debug-values-constprop.mir index 57405786ef262..2137b52936e32 100644 --- a/llvm/test/DebugInfo/X86/live-debug-values-constprop.mir +++ b/llvm/test/DebugInfo/X86/live-debug-values-constprop.mir @@ -153,7 +153,7 @@ body: | bb.3.bb3: liveins: $rax - RETQ $eax, debug-location !9 + RET64 $eax, debug-location !9 ... --- @@ -217,7 +217,7 @@ body: | bb.3.bb3: liveins: $rax - RETQ $eax, debug-location !42 + RET64 $eax, debug-location !42 ... --- @@ -281,7 +281,7 @@ body: | bb.3.bb3: liveins: $rax - RETQ $eax, debug-location !82 + RET64 $eax, debug-location !82 ... --- @@ -342,6 +342,6 @@ body: | bb.3.bb3: liveins: $rax - RETQ $eax, debug-location !122 + RET64 $eax, debug-location !122 ... diff --git a/llvm/test/DebugInfo/X86/location-range-inlined-xblock.mir b/llvm/test/DebugInfo/X86/location-range-inlined-xblock.mir index b66e17f99881b..4a8567363c103 100644 --- a/llvm/test/DebugInfo/X86/location-range-inlined-xblock.mir +++ b/llvm/test/DebugInfo/X86/location-range-inlined-xblock.mir @@ -167,6 +167,6 @@ body: | $rbx = frame-destroy POP64r implicit-def $rsp, implicit $rsp, debug-location !40 DBG_VALUE $edi, $noreg, !15, !DIExpression(DW_OP_LLVM_entry_value, 1), debug-location !16 CFI_INSTRUCTION def_cfa_offset 8, debug-location !40 - RETQ $eax, debug-location !40 + RET64 $eax, debug-location !40 ... diff --git a/llvm/test/DebugInfo/X86/location-range.mir b/llvm/test/DebugInfo/X86/location-range.mir index 4a306f4c7b433..6b4a44b3f6edc 100644 --- a/llvm/test/DebugInfo/X86/location-range.mir +++ b/llvm/test/DebugInfo/X86/location-range.mir @@ -133,12 +133,12 @@ body: | bb.1.if.then: renamable $rax = MOV64rm $rip, 1, $noreg, @b, $noreg, debug-location !21 :: (dereferenceable load (s64) from @b) - RETQ $rax, debug-location !27 + RET64 $rax, debug-location !27 bb.2.if.end: DBG_VALUE 0, $noreg, !19, !DIExpression(), debug-location !27 renamable $rax = MOV64rm $rip, 1, $noreg, @a, $noreg, debug-location !27 :: (dereferenceable load (s64) from @a) - RETQ $rax, debug-location !27 + RET64 $rax, debug-location !27 ... --- @@ -147,6 +147,6 @@ body: | bb.0.entry: $eax = MOVZX32rm16 $noreg, 1, $noreg, 0, $noreg, debug-location !48 :: (load (s16) from `i16* null`, align 1) DBG_VALUE $ax, $noreg, !39, !DIExpression(), debug-location !51 - RETQ $ax, debug-location !51 + RET64 $ax, debug-location !51 ... diff --git a/llvm/test/DebugInfo/X86/pr19307.mir b/llvm/test/DebugInfo/X86/pr19307.mir index 160f524dba911..594afc043f7f4 100644 --- a/llvm/test/DebugInfo/X86/pr19307.mir +++ b/llvm/test/DebugInfo/X86/pr19307.mir @@ -219,6 +219,6 @@ body: | $rsp = frame-destroy ADD64ri8 $rsp, 32, implicit-def dead $eflags, debug-location !53 $rbp = frame-destroy POP64r implicit-def $rsp, implicit $rsp, debug-location !53 CFI_INSTRUCTION def_cfa $rsp, 8, debug-location !53 - RETQ debug-location !53 + RET64 debug-location !53 ... diff --git a/llvm/test/DebugInfo/X86/pr34545.ll b/llvm/test/DebugInfo/X86/pr34545.ll index fe5d2a285f55c..7b8b548553089 100644 --- a/llvm/test/DebugInfo/X86/pr34545.ll +++ b/llvm/test/DebugInfo/X86/pr34545.ll @@ -8,7 +8,7 @@ ; CHECK: DBG_VALUE $eax ; CHECK: $eax = SHL32rCL killed renamable $eax ; CHECK: DBG_VALUE $eax -; CHECK: RETQ $eax +; CHECK: RET64 $eax target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" diff --git a/llvm/test/DebugInfo/X86/pr45181.ll b/llvm/test/DebugInfo/X86/pr45181.ll index 9c168164d4f8c..c45799c4acc87 100644 --- a/llvm/test/DebugInfo/X86/pr45181.ll +++ b/llvm/test/DebugInfo/X86/pr45181.ll @@ -159,7 +159,7 @@ attributes #7 = { nounwind optsize } !0 = !DIGlobalVariableExpression(var: !1, expr: !DIExpression()) !1 = distinct !DIGlobalVariable(name: "o", scope: !2, file: !6, line: 11, type: !40, isLocal: false, isDefinition: true) !2 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !3, producer: "clang version 11.0.0 (git@github.com:llvm/llvm-project.git 0fecdcd1628999a1900d9cf84cd33dacf1319fa6)", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !4, retainedTypes: !10, globals: !41, nameTableKind: None, sysroot: "/") -!3 = !DIFile(filename: "/Users/vsk/tmp/x.cc", directory: "/Users/vsk/src/llvm-backup-master") +!3 = !DIFile(filename: "/Users/vsk/tmp/x.cc", directory: "/Users/vsk/src/llvm-backup-main") !4 = !{!5} !5 = !DICompositeType(tag: DW_TAG_enumeration_type, file: !6, line: 16, baseType: !7, size: 32, elements: !8) !6 = !DIFile(filename: "tmp/x.cc", directory: "/Users/vsk") diff --git a/llvm/test/DebugInfo/X86/sdag-dangling-dbgvalue.ll b/llvm/test/DebugInfo/X86/sdag-dangling-dbgvalue.ll index 95b9e5246656f..3aae5e0107f1e 100644 --- a/llvm/test/DebugInfo/X86/sdag-dangling-dbgvalue.ll +++ b/llvm/test/DebugInfo/X86/sdag-dangling-dbgvalue.ll @@ -141,7 +141,7 @@ attributes #1 = { nounwind readnone speculatable } !0 = !DIGlobalVariableExpression(var: !1, expr: !DIExpression()) !1 = distinct !DIGlobalVariable(name: "S", scope: !2, file: !3, line: 4, type: !8, isLocal: false, isDefinition: true) !2 = distinct !DICompileUnit(language: DW_LANG_C99, file: !3, producer: "clang version 7.0.0 (trunk 327229) (llvm/trunk 327239)", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !4, retainedTypes: !5, globals: !7) -!3 = !DIFile(filename: "sdag-dangling-dbgvalue.c", directory: "/repo/uabbpet/llvm-master") +!3 = !DIFile(filename: "sdag-dangling-dbgvalue.c", directory: "/") !4 = !{} !5 = !{!6} !6 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) diff --git a/llvm/test/DebugInfo/X86/single-location-2.mir b/llvm/test/DebugInfo/X86/single-location-2.mir index e3f0ec979e22e..d412f9f5e2f85 100644 --- a/llvm/test/DebugInfo/X86/single-location-2.mir +++ b/llvm/test/DebugInfo/X86/single-location-2.mir @@ -88,5 +88,5 @@ body: | ;; end scope, end location range $esi = MOV32ri 2, debug-location !26 - RETQ debug-location !15 + RET64 debug-location !15 ... diff --git a/llvm/test/DebugInfo/X86/single-location-inlined-param.mir b/llvm/test/DebugInfo/X86/single-location-inlined-param.mir index f766c4fbef10d..8b36f3ce44f6a 100644 --- a/llvm/test/DebugInfo/X86/single-location-inlined-param.mir +++ b/llvm/test/DebugInfo/X86/single-location-inlined-param.mir @@ -105,10 +105,10 @@ body: | bb.2.return: liveins: $eax - RETQ $eax, debug-location !36 + RET64 $eax, debug-location !36 bb.1.if.end: renamable $eax = MOV32ri 1 - RETQ $eax, debug-location !36 + RET64 $eax, debug-location !36 ... diff --git a/llvm/test/DebugInfo/X86/single-location-interrupted-scope.mir b/llvm/test/DebugInfo/X86/single-location-interrupted-scope.mir index 19f7caee575dc..c5feecaab40da 100644 --- a/llvm/test/DebugInfo/X86/single-location-interrupted-scope.mir +++ b/llvm/test/DebugInfo/X86/single-location-interrupted-scope.mir @@ -155,7 +155,7 @@ body: | TAILJMPd64 @ext, csr_64, implicit $rsp, implicit $ssp, implicit $rsp, implicit $ssp, implicit killed $al, debug-location !35 bb.1.if.end: - RETQ debug-location !36 + RET64 debug-location !36 ... --- @@ -177,6 +177,6 @@ body: | TAILJMPd64 @ext, csr_64, implicit $rsp, implicit $ssp, implicit $rsp, implicit $ssp, implicit killed $al, debug-location !49 bb.1.if.end: - RETQ debug-location !50 + RET64 debug-location !50 ... diff --git a/llvm/test/DebugInfo/X86/symbolize_function_start.s b/llvm/test/DebugInfo/X86/symbolize_function_start.s index fb08c68349880..15862432f24c9 100644 --- a/llvm/test/DebugInfo/X86/symbolize_function_start.s +++ b/llvm/test/DebugInfo/X86/symbolize_function_start.s @@ -1,12 +1,12 @@ # RUN: rm -rf %t # RUN: mkdir %t -# RUN: llvm-mc -filetype=obj -triple=x86_64 -dwarf-version=4 %s -o %t/test.o +# RUN: llvm-mc -filetype=obj -triple=x86_64 -dwarf-version=4 %s -o %t/test.o -split-dwarf-file %t/test.dwo # RUN: cd %t # RUN: llvm-symbolizer --verbose 0x0 --obj=test.o | FileCheck --check-prefix=SYM %s # RUN: llvm-dwarfdump -lookup=0x1 test.o | FileCheck --check-prefix=LOOKUP %s # SYM: Filename: .{{[/\\]}}.{{[/\\]}}./test.h -# SYM: Function start filename: .{{[/\\]}}test.cpp +# SYM: Function start filename: .{{[/\\]}}.{{[/\\]}}test.cpp # LOOKUP: Line info: line 0, column 0, start file 'test.cpp', start line 1 @@ -75,7 +75,7 @@ _Z2f1v: # @_Z2f1v .Lskel_string0: .asciz "." # string offset=0 .Lskel_string1: - .asciz "test.o" # string offset=2 + .asciz "test.dwo" # string offset=2 .section .debug_str.dwo,"eMS",@progbits,1 .Linfo_string0: .asciz "_Z2f1v" # string offset=0 @@ -88,7 +88,7 @@ _Z2f1v: # @_Z2f1v .Linfo_string4: .asciz "test.cpp" # string offset=115 .Linfo_string5: - .asciz "test.o" # string offset=124 + .asciz "test.dwo" # string offset=124 .section .debug_str_offsets.dwo,"e",@progbits .long 0 .long 7 diff --git a/llvm/test/DebugInfo/X86/symbolize_function_start_v5.s b/llvm/test/DebugInfo/X86/symbolize_function_start_v5.s new file mode 100644 index 0000000000000..f7c6a6c7ef348 --- /dev/null +++ b/llvm/test/DebugInfo/X86/symbolize_function_start_v5.s @@ -0,0 +1,202 @@ +# RUN: rm -rf %t +# RUN: mkdir %t +# RUN: llvm-mc -filetype=obj -triple=x86_64 -dwarf-version=4 %s -o %t/test.o -split-dwarf-file %t/test.dwo +# RUN: cd %t +# RUN: llvm-symbolizer --verbose 0x0 --obj=test.o | FileCheck --check-prefix=SYM %s +# RUN: llvm-dwarfdump -lookup=0x1 test.o | FileCheck --check-prefix=LOOKUP %s + +# SYM: Filename: .{{[/\\]}}.{{[/\\]}}test.c +# SYM: Function start filename: .{{[/\\]}}.{{[/\\]}}test.c + +# LOOKUP: Line info: line 0, column 0, start file 'test.c', start line 1 + + .text + .file "test.c" + .globl f1 # -- Begin function f1 + .p2align 4, 0x90 + .type f1,@function +f1: # @f1 +.Lfunc_begin0: + .file 0 "./" "test.c" md5 0xde4c8c8bd673cd6151beb4570168727a + .loc 0 1 0 # test.c:1:0 + .cfi_startproc +# %bb.0: # %entry + pushq %rbp + .cfi_def_cfa_offset 16 + .cfi_offset %rbp, -16 + movq %rsp, %rbp + .cfi_def_cfa_register %rbp +.Ltmp0: + .loc 0 1 13 prologue_end # test.c:1:13 + popq %rbp + .cfi_def_cfa %rsp, 8 + retq +.Ltmp1: +.Lfunc_end0: + .size f1, .Lfunc_end0-f1 + .cfi_endproc + # -- End function + .section .debug_abbrev,"",@progbits + .byte 1 # Abbreviation Code + .byte 74 # DW_TAG_skeleton_unit + .byte 0 # DW_CHILDREN_no + .byte 16 # DW_AT_stmt_list + .byte 23 # DW_FORM_sec_offset + .byte 114 # DW_AT_str_offsets_base + .byte 23 # DW_FORM_sec_offset + .byte 27 # DW_AT_comp_dir + .byte 37 # DW_FORM_strx1 + .ascii "\264B" # DW_AT_GNU_pubnames + .byte 25 # DW_FORM_flag_present + .byte 118 # DW_AT_dwo_name + .byte 37 # DW_FORM_strx1 + .byte 17 # DW_AT_low_pc + .byte 27 # DW_FORM_addrx + .byte 18 # DW_AT_high_pc + .byte 6 # DW_FORM_data4 + .byte 115 # DW_AT_addr_base + .byte 23 # DW_FORM_sec_offset + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 0 # EOM(3) + .section .debug_info,"",@progbits +.Lcu_begin0: + .long .Ldebug_info_end0-.Ldebug_info_start0 # Length of Unit +.Ldebug_info_start0: + .short 5 # DWARF version number + .byte 4 # DWARF Unit Type + .byte 8 # Address Size (in bytes) + .long .debug_abbrev # Offset Into Abbrev. Section + .quad -2320906069171721424 + .byte 1 # Abbrev [1] 0x14:0x14 DW_TAG_skeleton_unit + .long .Lline_table_start0 # DW_AT_stmt_list + .long .Lstr_offsets_base0 # DW_AT_str_offsets_base + .byte 0 # DW_AT_comp_dir + # DW_AT_GNU_pubnames + .byte 1 # DW_AT_dwo_name + .byte 0 # DW_AT_low_pc + .long .Lfunc_end0-.Lfunc_begin0 # DW_AT_high_pc + .long .Laddr_table_base0 # DW_AT_addr_base +.Ldebug_info_end0: + .section .debug_str_offsets,"",@progbits + .long 12 # Length of String Offsets Set + .short 5 + .short 0 +.Lstr_offsets_base0: + .section .debug_str,"MS",@progbits,1 +.Lskel_string0: + .asciz "./" # string offset=0 +.Lskel_string1: + .asciz "./test.dwo" # string offset=3 + .section .debug_str_offsets,"",@progbits + .long .Lskel_string0 + .long .Lskel_string1 + .section .debug_str_offsets.dwo,"e",@progbits + .long 20 # Length of String Offsets Set + .short 5 + .short 0 + .section .debug_str.dwo,"eMS",@progbits,1 +.Linfo_string0: + .asciz "f1" # string offset=0 +.Linfo_string1: + .asciz "clang version 14.0.0 (git@github.com:llvm/llvm-project.git e7fdff403e849b18d93cd4a5cb760cba66a92c0b)" # string offset=3 +.Linfo_string2: + .asciz "test.c" # string offset=104 +.Linfo_string3: + .asciz "./test.dwo" # string offset=111 + .section .debug_str_offsets.dwo,"e",@progbits + .long 0 + .long 3 + .long 104 + .long 111 + .section .debug_info.dwo,"e",@progbits + .long .Ldebug_info_dwo_end0-.Ldebug_info_dwo_start0 # Length of Unit +.Ldebug_info_dwo_start0: + .short 5 # DWARF version number + .byte 5 # DWARF Unit Type + .byte 8 # Address Size (in bytes) + .long 0 # Offset Into Abbrev. Section + .quad -2320906069171721424 + .byte 1 # Abbrev [1] 0x14:0x12 DW_TAG_compile_unit + .byte 1 # DW_AT_producer + .short 12 # DW_AT_language + .byte 2 # DW_AT_name + .byte 3 # DW_AT_dwo_name + .byte 2 # Abbrev [2] 0x1a:0xb DW_TAG_subprogram + .byte 0 # DW_AT_low_pc + .long .Lfunc_end0-.Lfunc_begin0 # DW_AT_high_pc + .byte 1 # DW_AT_frame_base + .byte 86 + .byte 0 # DW_AT_name + .byte 0 # DW_AT_decl_file + .byte 1 # DW_AT_decl_line + # DW_AT_external + .byte 0 # End Of Children Mark +.Ldebug_info_dwo_end0: + .section .debug_abbrev.dwo,"e",@progbits + .byte 1 # Abbreviation Code + .byte 17 # DW_TAG_compile_unit + .byte 1 # DW_CHILDREN_yes + .byte 37 # DW_AT_producer + .byte 37 # DW_FORM_strx1 + .byte 19 # DW_AT_language + .byte 5 # DW_FORM_data2 + .byte 3 # DW_AT_name + .byte 37 # DW_FORM_strx1 + .byte 118 # DW_AT_dwo_name + .byte 37 # DW_FORM_strx1 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 2 # Abbreviation Code + .byte 46 # DW_TAG_subprogram + .byte 0 # DW_CHILDREN_no + .byte 17 # DW_AT_low_pc + .byte 27 # DW_FORM_addrx + .byte 18 # DW_AT_high_pc + .byte 6 # DW_FORM_data4 + .byte 64 # DW_AT_frame_base + .byte 24 # DW_FORM_exprloc + .byte 3 # DW_AT_name + .byte 37 # DW_FORM_strx1 + .byte 58 # DW_AT_decl_file + .byte 11 # DW_FORM_data1 + .byte 59 # DW_AT_decl_line + .byte 11 # DW_FORM_data1 + .byte 63 # DW_AT_external + .byte 25 # DW_FORM_flag_present + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 0 # EOM(3) + .section .debug_addr,"",@progbits + .long .Ldebug_addr_end0-.Ldebug_addr_start0 # Length of contribution +.Ldebug_addr_start0: + .short 5 # DWARF version number + .byte 8 # Address size + .byte 0 # Segment selector size +.Laddr_table_base0: + .quad .Lfunc_begin0 +.Ldebug_addr_end0: + .section .debug_gnu_pubnames,"",@progbits + .long .LpubNames_end0-.LpubNames_start0 # Length of Public Names Info +.LpubNames_start0: + .short 2 # DWARF Version + .long .Lcu_begin0 # Offset of Compilation Unit Info + .long 40 # Compilation Unit Length + .long 26 # DIE offset + .byte 48 # Attributes: FUNCTION, EXTERNAL + .asciz "f1" # External Name + .long 0 # End Mark +.LpubNames_end0: + .section .debug_gnu_pubtypes,"",@progbits + .long .LpubTypes_end0-.LpubTypes_start0 # Length of Public Types Info +.LpubTypes_start0: + .short 2 # DWARF Version + .long .Lcu_begin0 # Offset of Compilation Unit Info + .long 40 # Compilation Unit Length + .long 0 # End Mark +.LpubTypes_end0: + .ident "clang version 14.0.0 (git@github.com:llvm/llvm-project.git e7fdff403e849b18d93cd4a5cb760cba66a92c0b)" + .section ".note.GNU-stack","",@progbits + .addrsig + .section .debug_line,"",@progbits +.Lline_table_start0: diff --git a/llvm/test/DebugInfo/X86/trim-var-locs.mir b/llvm/test/DebugInfo/X86/trim-var-locs.mir index 9c1de2593fa5f..e1d3670fee1bd 100644 --- a/llvm/test/DebugInfo/X86/trim-var-locs.mir +++ b/llvm/test/DebugInfo/X86/trim-var-locs.mir @@ -116,5 +116,5 @@ body: | $edi = MOV32ri 9, debug-location !26 ; scope fun !7 - RETQ debug-location !15 + RET64 debug-location !15 ... diff --git a/llvm/test/DebugInfo/X86/tu-to-non-named-type.ll b/llvm/test/DebugInfo/X86/tu-to-non-named-type.ll index 095509fdaddd3..883bab142ec7e 100644 --- a/llvm/test/DebugInfo/X86/tu-to-non-named-type.ll +++ b/llvm/test/DebugInfo/X86/tu-to-non-named-type.ll @@ -11,7 +11,7 @@ ; CHECK-NOT: {{DW_TAG|NULL}} ; CHECK: DW_TAG_member ; CHECK-NEXT: DW_AT_name ("x") -; CHECK-NEXT: DW_AT_type ({{.*}} "int [1]" +; CHECK-NEXT: DW_AT_type ({{.*}} "int[1]" ; But make sure we still use a type unit for an anonymous type that still has a ; name for linkage purposes (due to being defined in a typedef). diff --git a/llvm/test/DebugInfo/attr-btf_type_tag.ll b/llvm/test/DebugInfo/attr-btf_type_tag.ll new file mode 100644 index 0000000000000..4561d0512a991 --- /dev/null +++ b/llvm/test/DebugInfo/attr-btf_type_tag.ll @@ -0,0 +1,62 @@ +; REQUIRES: x86-registered-target +; RUN: llc -filetype=obj -o %t %s +; RUN: llvm-dwarfdump -debug-info %t | FileCheck %s +; Source: +; #define __tag1 __attribute__((btf_type_tag("tag1"))) +; #define __tag2 __attribute__((btf_type_tag("tag2"))) +; +; int * __tag1 * __tag2 *g; +; Compilation flag: +; clang -target x86_64 -g -S -emit-llvm t.c + +@g = dso_local global i32*** null, align 8, !dbg !0 + +!llvm.dbg.cu = !{!2} +!llvm.module.flags = !{!13, !14, !15, !16, !17} +!llvm.ident = !{!18} + +!0 = !DIGlobalVariableExpression(var: !1, expr: !DIExpression()) +!1 = distinct !DIGlobalVariable(name: "g", scope: !2, file: !3, line: 4, type: !5, isLocal: false, isDefinition: true) +!2 = distinct !DICompileUnit(language: DW_LANG_C99, file: !3, producer: "clang version 14.0.0 (https://github.com/llvm/llvm-project.git 2c240a5eefae1a945dfd36cdaa0c677eca90dd82)", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, globals: !4, splitDebugInlining: false, nameTableKind: None) +!3 = !DIFile(filename: "t.c", directory: "/home/yhs/work/tests/llvm/btf_tag_type") +!4 = !{!0} +!5 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !6, size: 64, annotations: !11) +!6 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !7, size: 64, annotations: !9) +!7 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !8, size: 64) +!8 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) +!9 = !{!10} +!10 = !{!"btf_type_tag", !"tag1"} +!11 = !{!12} +!12 = !{!"btf_type_tag", !"tag2"} + +; CHECK: DW_TAG_variable +; CHECK-NEXT: DW_AT_name ("g") +; CHECK-NEXT: DW_AT_type (0x[[T1:[0-9a-f]+]] "int ***") + +; CHECK: 0x[[T1]]: DW_TAG_pointer_type +; CHECK-NEXT: DW_AT_type (0x[[T2:[0-9a-f]+]] "int **") + +; CHECK: DW_TAG_LLVM_annotation +; CHECK-NEXT: DW_AT_name ("btf_type_tag") +; CHECK-NEXT: DW_AT_const_value ("tag2") + +; CHECK: NULL + +; CHECK: 0x[[T2]]: DW_TAG_pointer_type +; CHECK-NEXT: DW_AT_type (0x[[T3:[0-9a-f]+]] "int *") + +; CHECK: DW_TAG_LLVM_annotation +; CHECK-NEXT: DW_AT_name ("btf_type_tag") +; CHECK-NEXT: DW_AT_const_value ("tag1") + +; CHECK: NULL + +; CHECK: 0x[[T3]]: DW_TAG_pointer_type +; CHECK-NEXT: DW_AT_type (0x{{[0-9a-f]+}} "int") + +!13 = !{i32 7, !"Dwarf Version", i32 4} +!14 = !{i32 2, !"Debug Info Version", i32 3} +!15 = !{i32 1, !"wchar_size", i32 4} +!16 = !{i32 7, !"uwtable", i32 1} +!17 = !{i32 7, !"frame-pointer", i32 2} +!18 = !{!"clang version 14.0.0 (https://github.com/llvm/llvm-project.git 2c240a5eefae1a945dfd36cdaa0c677eca90dd82)"} diff --git a/llvm/test/DebugInfo/debugify-bogus-dbg-value.ll b/llvm/test/DebugInfo/debugify-bogus-dbg-value.ll index 71a6e4722992f..9a3ce244516a0 100644 --- a/llvm/test/DebugInfo/debugify-bogus-dbg-value.ll +++ b/llvm/test/DebugInfo/debugify-bogus-dbg-value.ll @@ -34,7 +34,7 @@ attributes #0 = { nounwind readnone speculatable } !llvm.module.flags = !{!5} !0 = distinct !DICompileUnit(language: DW_LANG_C, file: !1, producer: "debugify", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2) -!1 = !DIFile(filename: "/Users/vsk/src/llvm.org-master/llvm/test/DebugInfo/debugify-bogus-dbg-value.ll", directory: "/") +!1 = !DIFile(filename: "/Users/vsk/src/llvm.org-main/llvm/test/DebugInfo/debugify-bogus-dbg-value.ll", directory: "/") !2 = !{} !3 = !{i32 4} !4 = !{i32 4} diff --git a/llvm/test/DebugInfo/dwarfdump-type-units.test b/llvm/test/DebugInfo/dwarfdump-type-units.test index 9f9efc5aaf0d4..5048ca097e8d0 100644 --- a/llvm/test/DebugInfo/dwarfdump-type-units.test +++ b/llvm/test/DebugInfo/dwarfdump-type-units.test @@ -1,16 +1,16 @@ -RUN: llvm-dwarfdump -v %p/Inputs/dwarfdump-type-units.elf-x86-64 | FileCheck -check-prefix=CHECK -check-prefix=LONG %s -RUN: llvm-dwarfdump -v %p/Inputs/dwarfdump-type-units.elf-x86-64 -summarize-types | FileCheck -check-prefix=CHECK -check-prefix=SHORT %s +RUN: llvm-dwarfdump -v %p/Inputs/dwarfdump-type-units.elf-x86-64 | FileCheck -check-prefix=INFO -check-prefix=CHECK -check-prefix=LONG %s +RUN: llvm-dwarfdump -debug-types %p/Inputs/dwarfdump-type-units.elf-x86-64 -summarize-types | FileCheck -implicit-check-not=DW_ -check-prefix=CHECK -check-prefix=SHORT %s RUN: llvm-dwarfdump -v -debug-types %p/Inputs/dwarfdump-type-units.elf-x86-64 | FileCheck -check-prefix=TYPES %s -CHECK: debug_info contents: +INFO: debug_info contents: -CHECK: DW_TAG_variable -CHECK-NEXT: DW_AT_name {{.*}}"f" -CHECK: DW_AT_type [DW_FORM_ref_sig8] ([[FOO_SIG:0x[0-9a-f]*]] +INFO: DW_TAG_variable +INFO-NEXT: DW_AT_name {{.*}}"f" +INFO: DW_AT_type [DW_FORM_ref_sig8] ([[FOO_SIG:0x[0-9a-f]*]] -CHECK: DW_TAG_variable -CHECK-NEXT: DW_AT_name {{.*}}"b" -CHECK: DW_AT_type [DW_FORM_ref_sig8] ([[BAR_SIG:0x[0-9a-f]*]] +INFO: DW_TAG_variable +INFO-NEXT: DW_AT_name {{.*}}"b" +INFO: DW_AT_type [DW_FORM_ref_sig8] ([[BAR_SIG:0x[0-9a-f]*]] CHECK: debug_types contents: @@ -18,7 +18,8 @@ CHECK: debug_types contents: LONG: 0x00000000: Type Unit: {{.*}} name = SHORT-NOT: Type Unit SHORT: name = -CHECK-SAME: 'bar', type_signature = [[BAR_SIG]] +CHECK-SAME: 'bar', type_signature = +LONG: [[BAR_SIG]] SHORT-SAME: length = LONG-SAME: type_offset = 0x[[BAR_OFF:[0-9a-f]*]] (next unit at LONG: DW_TAG_type_unit @@ -28,7 +29,8 @@ LONG-NEXT: DW_AT_name {{.*}}"bar" LONG: 0x00000000: Type Unit: {{.*}} name = SHORT: name = -CHECK-SAME: 'foo', type_signature = [[FOO_SIG]] +CHECK-SAME: 'foo', type_signature = +LONG: [[FOO_SIG]] LONG-SAME: type_offset = 0x[[FOO_OFF:[0-9a-f]*]] (next unit at LONG: DW_TAG_type_unit LONG-NOT: NULL diff --git a/llvm/test/ExecutionEngine/JITLink/X86/Inputs/x86-64_self_relocation.s b/llvm/test/ExecutionEngine/JITLink/X86/Inputs/x86-64_self_relocation.s new file mode 100644 index 0000000000000..37384408c73b2 --- /dev/null +++ b/llvm/test/ExecutionEngine/JITLink/X86/Inputs/x86-64_self_relocation.s @@ -0,0 +1,35 @@ + .section __TEXT,__text,regular,pure_instructions + + .globl _form_func_ptr +_form_func_ptr: + leaq _form_func_ptr(%rip), %rax + leaq _other(%rip), %rax + leaq _form_func_ptr(%rip), %rax + nop + leaq _form_func_ptr(%rip), %rax + retq + + .globl _other +_other: + leaq _form_func_ptr(%rip), %rax + retq + +# Return 0 if the pointers formed inside and outside the function are the same. + .globl _main +_main: + pushq %rbp + movq %rsp, %rbp + subq $32, %rsp + movl $0, -4(%rbp) + callq _form_func_ptr + movq %rax, -16(%rbp) + callq _other + movq %rax, -24(%rbp) + movq -16(%rbp), %rax + cmpq -24(%rbp), %rax + setne %al + andb $1, %al + movzbl %al, %eax + addq $32, %rsp + popq %rbp + retq \ No newline at end of file diff --git a/llvm/test/ExecutionEngine/JITLink/X86/MachO_x86-64_self_relocation.test b/llvm/test/ExecutionEngine/JITLink/X86/MachO_x86-64_self_relocation.test new file mode 100644 index 0000000000000..54f8a8a12df26 --- /dev/null +++ b/llvm/test/ExecutionEngine/JITLink/X86/MachO_x86-64_self_relocation.test @@ -0,0 +1,34 @@ +# RUN: llvm-mc -triple=x86_64-apple-macosx10.9 -filetype=obj -o %t %S/Inputs/x86-64_self_relocation.s +# RUN: llvm-jitlink -noexec -show-graph %t 2>&1 | \ +# RUN: FileCheck -check-prefix=WITHOUT %s +# RUN: llvm-jitlink -noexec -show-graph -add-self-relocations %t 2>&1 | \ +# RUN: FileCheck -check-prefix=CHECK %s + +# WITHOUT: block +# WITHOUT-NEXT: symbols +# WITHOUT-NEXT: _form_func_ptr +# WITHOUT-NEXT: edges +# WITHOUT-NEXT: (block + 0x0000000a), addend = -0x00000004, kind = Delta32, target = _other +# WITHOUT-NOT: kind = + +# CHECK: block +# CHECK-NEXT: symbols +# CHECK-NEXT: _form_func_ptr +# CHECK-NEXT: edges +# CHECK-NEXT: (block + 0x00000003), addend = -0x00000004, kind = Delta32, target = _form_func_ptr +# CHECK-NEXT: (block + 0x0000000a), addend = -0x00000004, kind = Delta32, target = _other +# CHECK-NEXT: (block + 0x00000011), addend = -0x00000004, kind = Delta32, target = _form_func_ptr +# CHECK-NEXT: (block + 0x00000019), addend = -0x00000004, kind = Delta32, target = _form_func_ptr +# CHECK-NOT: kind = + +# WITHOUT: block +# WITHOUT-NEXT: symbols +# WITHOUT-NEXT: _other +# WITHOUT-NEXT: edges +# WITHOUT-NEXT: kind = Delta32, target = _form_func_ptr + +# CHECK: block +# CHECK-NEXT: symbols +# CHECK-NEXT: _other +# CHECK-NEXT: edges +# CHECK-NEXT: kind = Delta32, target = _form_func_ptr diff --git a/llvm/test/ExecutionEngine/JITLink/X86/MachO_x86-64_self_relocation_exec.test b/llvm/test/ExecutionEngine/JITLink/X86/MachO_x86-64_self_relocation_exec.test new file mode 100644 index 0000000000000..49a1c00dcefe9 --- /dev/null +++ b/llvm/test/ExecutionEngine/JITLink/X86/MachO_x86-64_self_relocation_exec.test @@ -0,0 +1,8 @@ +# RUN: llvm-mc -triple=x86_64-apple-macosx10.9 -filetype=obj -o %t %S/Inputs/x86-64_self_relocation.s +# RUN: llvm-jitlink -show-graph %t +# RUN: llvm-jitlink -show-graph -add-self-relocations %t + +# Ensure that the added relocation does not create an incorrect pointer. + +# Execution test +# REQUIRES: system-darwin && native && target-x86_64 diff --git a/llvm/test/ExecutionEngine/MCJIT/remote/eh.ll b/llvm/test/ExecutionEngine/MCJIT/remote/eh.ll index be60ec730ff10..e25fd710b5c36 100644 --- a/llvm/test/ExecutionEngine/MCJIT/remote/eh.ll +++ b/llvm/test/ExecutionEngine/MCJIT/remote/eh.ll @@ -3,6 +3,8 @@ ; XFAIL: arm, cygwin, windows-msvc, windows-gnu ; UNSUPPORTED: powerpc64-unknown-linux-gnu ; Remove UNSUPPORTED for powerpc64-unknown-linux-gnu if problem caused by r266663 is fixed +; FIXME: Something hangs here. +; UNSUPPORTED: use_msan_with_origins declare i8* @__cxa_allocate_exception(i64) declare void @__cxa_throw(i8*, i8*, i8*) declare i32 @__gxx_personality_v0(...) diff --git a/llvm/test/ExecutionEngine/OrcLazy/multiple-compile-threads-basic.ll b/llvm/test/ExecutionEngine/OrcLazy/multiple-compile-threads-basic.ll index d013e704c6066..de516650a7fe1 100644 --- a/llvm/test/ExecutionEngine/OrcLazy/multiple-compile-threads-basic.ll +++ b/llvm/test/ExecutionEngine/OrcLazy/multiple-compile-threads-basic.ll @@ -1,6 +1,9 @@ ; RUN: lli -jit-kind=orc-lazy -compile-threads=2 -thread-entry hello %s | FileCheck %s ; REQUIRES: thread_support ; +; FIXME: Something hangs here. +; UNSUPPORTED: use_msan_with_origins +; ; CHECK: Hello @.str = private unnamed_addr constant [7 x i8] c"Hello\0A\00", align 1 diff --git a/llvm/test/Instrumentation/AddressSanitizer/asan-stack-safety.ll b/llvm/test/Instrumentation/AddressSanitizer/asan-stack-safety.ll new file mode 100644 index 0000000000000..d05f37db27b14 --- /dev/null +++ b/llvm/test/Instrumentation/AddressSanitizer/asan-stack-safety.ll @@ -0,0 +1,19 @@ +; REQUIRES: x86-registered-target + +; RUN: opt < %s -S -enable-new-pm=0 -asan-instrumentation-with-call-threshold=0 -asan \ +; RUN: -asan-use-stack-safety=0 -o - | FileCheck %s --check-prefixes=NOSAFETY +; RUN: opt < %s -S -enable-new-pm=0 -asan-instrumentation-with-call-threshold=0 -asan \ +; RUN: -asan-use-stack-safety=1 -o - | FileCheck %s --check-prefixes=SAFETY +; RUN: opt < %s -S -enable-new-pm=1 -asan-instrumentation-with-call-threshold=0 \ +; RUN: -passes='asan-pipeline' -asan-use-stack-safety=0 -o - | FileCheck %s --check-prefixes=NOSAFETY +; RUN: opt < %s -S -enable-new-pm=1 -asan-instrumentation-with-call-threshold=0 \ +; RUN: -passes='asan-pipeline' -asan-use-stack-safety=1 -o - | FileCheck %s --check-prefixes=SAFETY +; NOSAFETY: call void @__asan_load1 +; SAFETY-NOT: call void @__asan_load1 + +define i32 @stack-safety() sanitize_address { + %buf = alloca [10 x i8], align 1 + %arrayidx = getelementptr inbounds [10 x i8], [10 x i8]* %buf, i64 0, i64 0 + %1 = load i8, i8* %arrayidx, align 1 + ret i32 0 +} diff --git a/llvm/test/Instrumentation/SanitizerCoverage/tracing-comdat.ll b/llvm/test/Instrumentation/SanitizerCoverage/tracing-comdat.ll index 3f7003b136aef..4c3871842eb37 100644 --- a/llvm/test/Instrumentation/SanitizerCoverage/tracing-comdat.ll +++ b/llvm/test/Instrumentation/SanitizerCoverage/tracing-comdat.ll @@ -2,7 +2,7 @@ ; Make sure asan does not instrument __sancov_gen_ ; RUN: opt < %s -passes='module(sancov-module)' -sanitizer-coverage-level=3 -sanitizer-coverage-trace-pc-guard -S | FileCheck %s -; RUN: opt < %s -passes='module(require,sancov-module,asan-module),function(asan)' -sanitizer-coverage-level=3 -sanitizer-coverage-trace-pc-guard -S | FileCheck %s +; RUN: opt < %s -passes='module(require,sancov-module,asan-module)' -sanitizer-coverage-level=3 -sanitizer-coverage-trace-pc-guard -S | FileCheck %s target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" $Foo = comdat any diff --git a/llvm/test/LTO/Resolution/X86/Inputs/ifunc2.ll b/llvm/test/LTO/Resolution/X86/Inputs/ifunc2.ll index a70325bebd61a..09403e47de58a 100644 --- a/llvm/test/LTO/Resolution/X86/Inputs/ifunc2.ll +++ b/llvm/test/LTO/Resolution/X86/Inputs/ifunc2.ll @@ -1,6 +1,6 @@ target datalayout = "e-p:64:64" target triple = "x86_64-unknown-linux-gnu" -define i32 @foo_resolver() { - ret i32 2 +define i32 ()* @foo_resolver() { + ret i32 ()* inttoptr (i32 2 to i32 ()*) } diff --git a/llvm/test/LTO/Resolution/X86/ifunc.ll b/llvm/test/LTO/Resolution/X86/ifunc.ll index afe7c8cd1e7e7..d4a2d5bd608fe 100644 --- a/llvm/test/LTO/Resolution/X86/ifunc.ll +++ b/llvm/test/LTO/Resolution/X86/ifunc.ll @@ -1,23 +1,15 @@ ; RUN: opt -module-summary -o %t.bc %s -; RUN: llvm-lto2 run %t.bc -r %t.bc,foo,pl -r %t.bc,strlen,pl -o %t2 +; RUN: llvm-lto2 run %t.bc -r %t.bc,foo,pl -o %t2 ; RUN: llvm-nm %t2.1 | FileCheck %s ; CHECK: i foo ; CHECK: t foo_resolver -; CHECK: i strlen -; CHECK: t strlen_resolver target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" -@foo = ifunc i32 (i32), i64 ()* @foo_resolver -@strlen = ifunc i64 (i8*), bitcast (i64 (i8*)* ()* @strlen_resolver to i64 (i8*)*) +@foo = ifunc i32 (i32), i32 (i32)* ()* @foo_resolver -define internal i64 @foo_resolver() { +define internal i32 (i32)* @foo_resolver() { entry: - ret i64 0 -} - -define internal i64 (i8*)* @strlen_resolver() { -entry: - ret i64 (i8*)* null + ret i32 (i32)* null } diff --git a/llvm/test/LTO/Resolution/X86/ifunc2.ll b/llvm/test/LTO/Resolution/X86/ifunc2.ll index 6dd5e59831836..0d824f6f3b27c 100644 --- a/llvm/test/LTO/Resolution/X86/ifunc2.ll +++ b/llvm/test/LTO/Resolution/X86/ifunc2.ll @@ -6,14 +6,14 @@ target datalayout = "e-p:64:64" target triple = "x86_64-unknown-linux-gnu" -; CHECK: @foo = ifunc i32 (), i32 ()* @foo_resolver.2 -@foo = ifunc i32 (), i32 ()* @foo_resolver +; CHECK: @foo = ifunc i32 (), i32 ()* ()* @foo_resolver.2 +@foo = ifunc i32 (), i32 ()* ()* @foo_resolver -; CHECK: define internal i32 @foo_resolver.2() { -; CHECK-NEXT: ret i32 1 -define weak i32 @foo_resolver() { - ret i32 1 +; CHECK: define internal i32 ()* @foo_resolver.2() { +; CHECK-NEXT: ret i32 ()* inttoptr (i32 1 to i32 ()*) +define weak i32 ()* @foo_resolver() { + ret i32 ()* inttoptr (i32 1 to i32 ()*) } -; CHECK: define i32 @foo_resolver() { -; CHECK-NEXT: ret i32 2 +; CHECK: define i32 ()* @foo_resolver() { +; CHECK-NEXT: ret i32 ()* inttoptr (i32 2 to i32 ()*) diff --git a/llvm/test/Linker/ifunc.ll b/llvm/test/Linker/ifunc.ll index 1e5396ed5fed6..aaf5836a137da 100644 --- a/llvm/test/Linker/ifunc.ll +++ b/llvm/test/Linker/ifunc.ll @@ -3,18 +3,18 @@ ;; Check that ifuncs are linked in properly. -; CHECK-DAG: @foo = ifunc void (), bitcast (void ()* ()* @foo_resolve to void ()*) +; CHECK-DAG: @foo = ifunc void (), void ()* ()* @foo_resolve ; CHECK-DAG: define internal void ()* @foo_resolve() { -; CHECK-DAG: @bar = ifunc void (), bitcast (void ()* ()* @bar_resolve to void ()*) +; CHECK-DAG: @bar = ifunc void (), void ()* ()* @bar_resolve ; CHECK-DAG: define internal void ()* @bar_resolve() { ;--- a.ll declare void @bar() ;--- b.ll -@foo = ifunc void (), bitcast (void ()* ()* @foo_resolve to void ()*) -@bar = ifunc void (), bitcast (void ()* ()* @bar_resolve to void ()*) +@foo = ifunc void (), void ()* ()* @foo_resolve +@bar = ifunc void (), void ()* ()* @bar_resolve define internal void ()* @foo_resolve() { ret void ()* null diff --git a/llvm/test/MC/AArch64/fixup-out-of-range.s b/llvm/test/MC/AArch64/fixup-out-of-range.s index 500c1d1d1f9af..05d07b654fa8b 100644 --- a/llvm/test/MC/AArch64/fixup-out-of-range.s +++ b/llvm/test/MC/AArch64/fixup-out-of-range.s @@ -1,5 +1,5 @@ // RUN: not llvm-mc -triple aarch64--none-eabi -filetype obj < %s -o /dev/null 2>&1 | FileCheck %s -// RUN: not llvm-mc -triple aarch64-windows -filetype obj < %s -o /dev/null 2>&1 | FileCheck %s +// RUN: not llvm-mc -triple aarch64-windows -filetype obj < %s -o /dev/null 2>&1 | FileCheck %s -check-prefixes=CHECK,CHECK-WIN // CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: fixup value out of range adr x0, distant @@ -70,6 +70,8 @@ // CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: relocation for a thread-local variable points to an absolute symbol movz x0, #:tprel_g0:value1 +// CHECK-WIN: :[[@LINE+1]]:{{[0-9]+}}: error: fixup value out of range + adrp x0, external+0x1000000 .byte 0 unaligned: diff --git a/llvm/test/MC/ARM/deprecated-v8.s b/llvm/test/MC/ARM/deprecated-v8.s index aa72c2e7bd8e6..3962396884042 100644 --- a/llvm/test/MC/ARM/deprecated-v8.s +++ b/llvm/test/MC/ARM/deprecated-v8.s @@ -3,49 +3,58 @@ @ RUN: llvm-mc -triple armv7 -show-encoding < %s 2>&1 | FileCheck %s --check-prefix=CHECK-ARMV7 @ RUN: llvm-mc -triple thumbv7 -show-encoding < %s 2>&1 | FileCheck %s --check-prefix=CHECK-THUMBV7 @ RUN: llvm-mc -triple armv6 -show-encoding < %s 2>&1 | FileCheck %s --check-prefix=CHECK-ARMV6 +@ RUN: llvm-mc -triple thumbv8 -show-encoding -no-deprecated-warn < %s 2>&1 | FileCheck %s --check-prefix=CHECK-NO-WARN setend be @ CHECK-ARMV8: warning: deprecated @ CHECK-THUMBV8: warning: deprecated @ CHECK-ARMV7-NOT: warning: deprecated @ CHECK-THUMBV7-NOT: warning: deprecated +@ CHECK-NO-WARN-NOT: warning: deprecated mcr p15, #0, r5, c7, c5, #4 @ CHECK-ARMV8: warning: deprecated since v7, use 'isb' @ CHECK-THUMBV8: warning: deprecated since v7, use 'isb' @ CHECK-ARMV7: warning: deprecated since v7, use 'isb' @ CHECK-THUMBV7: warning: deprecated since v7, use 'isb' @ CHECK-ARMV6-NOT: warning: deprecated since v7, use 'isb' +@ CHECK-NO-WARN-NOT: warning: deprecated since v7, use 'isb' mcr p15, #0, r5, c7, c10, #4 @ CHECK-ARMV8: warning: deprecated since v7, use 'dsb' @ CHECK-THUMBV8: warning: deprecated since v7, use 'dsb' @ CHECK-ARMV7: warning: deprecated since v7, use 'dsb' @ CHECK-THUMBV7: warning: deprecated since v7, use 'dsb' @ CHECK-ARMV6-NOT: warning: deprecated since v7, use 'dsb' +@ CHECK-NO-WARN-NOT: warning: deprecated since v7, use 'dsb' mcr p15, #0, r5, c7, c10, #5 @ CHECK-ARMV8: warning: deprecated since v7, use 'dmb' @ CHECK-THUMBV8: warning: deprecated since v7, use 'dmb' @ CHECK-ARMV7: warning: deprecated since v7, use 'dmb' @ CHECK-THUMBV7: warning: deprecated since v7, use 'dmb' @ CHECK-ARMV6-NOT: warning: deprecated since v7, use 'dmb' +@ CHECK-NO-WARN-NOT: warning: deprecated since v7, use 'dmb' it ge movge r0, #4096 @ CHECK-THUMBV8: warning: deprecated instruction in IT block @ CHECK-THUMBV7-NOT: warning +@ CHECK-NO-WARN-NOT: warning ite ge addge r0, r1 addlt r0, r2 @ CHECK-ARMV8: warning: applying IT instruction to more than one subsequent instruction is deprecated @ CHECK-THUMBV8: warning: applying IT instruction to more than one subsequent instruction is deprecated @ CHECK-THUMBV7-NOT: warning +@ CHECK-NO-WARN-NOT: warning it ge movge r0, pc // invalid operand @ CHECK-THUMBV8: warning: deprecated instruction in IT block @ CHECK-THUMBV7-NOT: warning +@ CHECK-NO-WARN-NOT: warning it ge revge r0, r0 // invalid instruction @ CHECK-THUMBV8: warning: deprecated instruction in IT block @ CHECK-THUMBV7-NOT: warning +@ CHECK-NO-WARN-NOT: warning it ge clzge r0, r0 // only has 32-bit form @ CHECK-THUMBV8: warning: deprecated instruction in IT block @ CHECK-THUMBV7-NOT: warning - +@ CHECK-NO-WARN-NOT: warning diff --git a/llvm/test/MC/ARM/inline-asm-srcloc.ll b/llvm/test/MC/ARM/inline-asm-srcloc.ll index 75386d7e4c811..f1c95fef95aef 100644 --- a/llvm/test/MC/ARM/inline-asm-srcloc.ll +++ b/llvm/test/MC/ARM/inline-asm-srcloc.ll @@ -1,7 +1,5 @@ ; RUN: not llc -filetype=obj 2>&1 -o /dev/null < %s | FileCheck %s -; ModuleID = '/scratch/llvm/master/tools/clang/test/Misc/inline-asm-diags.c' -source_filename = "/scratch/llvm/master/tools/clang/test/Misc/inline-asm-diags.c" target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" target triple = "armv7-arm-none-eabi" diff --git a/llvm/test/MC/CSKY/basic-16bit.s b/llvm/test/MC/CSKY/basic-16bit.s new file mode 100644 index 0000000000000..9b484431f6e9b --- /dev/null +++ b/llvm/test/MC/CSKY/basic-16bit.s @@ -0,0 +1,270 @@ +# RUN: llvm-mc %s -triple=csky -show-encoding -csky-no-aliases -mattr=+e1 \ +# RUN: -mattr=+e2 -mattr=+btst16 | FileCheck -check-prefixes=CHECK-ASM %s + +# CHECK-ASM: addi16 a0, a0, 2 +# CHECK-ASM: encoding: [0x06,0x58] +addi16 a0, a0, 2 + +# CHECK-ASM: addi16 a0, sp, 4 +# CHECK-ASM: encoding: [0x01,0x18] +addi16 a0, sp, 4 + +# CHECK-ASM: addi16 a0, a1, 2 +# CHECK-ASM: encoding: [0x06,0x59] +addi16 a0, a1, 2 + +# CHECK-ASM: addi16 sp, sp, 8 +# CHECK-ASM: encoding: [0x02,0x14] +addi16 sp, sp, 8 + +# CHECK-ASM: subi16 a0, a0, 2 +# CHECK-ASM: encoding: [0x07,0x58] +subi16 a0, a0, 2 + +# CHECK-ASM: subi16 a0, a1, 2 +# CHECK-ASM: encoding: [0x07,0x59] +subi16 a0, a1, 2 + +# CHECK-ASM: subi16 sp, sp, 8 +# CHECK-ASM: encoding: [0x22,0x14] +subi16 sp, sp, 8 + +# CHECK-ASM: lsli16 a0, a1, 2 +# CHECK-ASM: encoding: [0x02,0x41] +lsli16 a0, a1, 2 + +# CHECK-ASM: lsri16 a0, a1, 2 +# CHECK-ASM: encoding: [0x02,0x49] +lsri16 a0, a1, 2 + +# CHECK-ASM: asri16 a0, a1, 2 +# CHECK-ASM: encoding: [0x02,0x51] +asri16 a0, a1, 2 + +# CHECK-ASM: btsti16 a0, 2 +# CHECK-ASM: encoding: [0xc2,0x38] +btsti16 a0, 2 + +# CHECK-ASM: bclri16 a0, 2 +# CHECK-ASM: encoding: [0x82,0x38] +bclri16 a0, 2 + +# CHECK-ASM: bseti16 a0, 2 +# CHECK-ASM: encoding: [0xa2,0x38] +bseti16 a0, 2 + +# CHECK-ASM: cmpnei16 a0, 2 +# CHECK-ASM: encoding: [0x42,0x38] +cmpnei16 a0, 2 + +# CHECK-ASM: cmphsi16 a0, 2 +# CHECK-ASM: encoding: [0x01,0x38] +cmphsi16 a0, 2 + +# CHECK-ASM: cmplti16 a0, 2 +# CHECK-ASM: encoding: [0x21,0x38] +cmplti16 a0, 2 + +# CHECK-ASM: movi16 a0, 2 +# CHECK-ASM: encoding: [0x02,0x30] +movi16 a0, 2 + +# CHECK-ASM: addu16 a3, l0, l1 +# CHECK-ASM: encoding: [0x74,0x5c] +addu16 a3, l0, l1 + +# CHECK-ASM: subu16 a3, l0, l1 +# CHECK-ASM: encoding: [0x75,0x5c] +subu16 a3, l0, l1 + +# CHECK-ASM: and16 a3, l0 +# CHECK-ASM: encoding: [0xd0,0x68] +and16 a3, l0 + +# CHECK-ASM: andn16 a3, l0 +# CHECK-ASM: encoding: [0xd1,0x68] +andn16 a3, l0 + +# CHECK-ASM: or16 a3, l0 +# CHECK-ASM: encoding: [0xd0,0x6c] +or16 a3, l0 + +# CHECK-ASM: xor16 a3, l0 +# CHECK-ASM: encoding: [0xd1,0x6c] +xor16 a3, l0 + +# CHECK-ASM: nor16 a3, l0 +# CHECK-ASM: encoding: [0xd2,0x6c] +nor16 a3, l0 + +# CHECK-ASM: lsl16 a3, l0 +# CHECK-ASM: encoding: [0xd0,0x70] +lsl16 a3, l0 + +# CHECK-ASM: rotl16 a3, l0 +# CHECK-ASM: encoding: [0xd3,0x70] +rotl16 a3, l0 + +# CHECK-ASM: lsr16 a3, l0 +# CHECK-ASM: encoding: [0xd1,0x70] +lsr16 a3, l0 + +# CHECK-ASM: asr16 a3, l0 +# CHECK-ASM: encoding: [0xd2,0x70] +asr16 a3, l0 + +# CHECK-ASM: mult16 a3, l0 +# CHECK-ASM: encoding: [0xd0,0x7c] +mult16 a3, l0 + +# CHECK-ASM: addc16 a3, l0 +# CHECK-ASM: encoding: [0xd1,0x60] +addc16 a3, l0 + +# CHECK-ASM: subc16 a3, l0 +# CHECK-ASM: encoding: [0xd3,0x60] +subc16 a3, l0 + +# CHECK-ASM: ld16.b a0, (a0, 2) +# CHECK-ASM: encoding: [0x02,0x80] +ld16.b a0, (a0, 2) + +# CHECK-ASM: ld16.h a0, (a0, 2) +# CHECK-ASM: encoding: [0x01,0x88] +ld16.h a0, (a0, 2) + +# CHECK-ASM: ld16.w a0, (a0, 4) +# CHECK-ASM: encoding: [0x01,0x90] +ld16.w a0, (a0, 4) + +# CHECK-ASM: ld16.w a0, (sp, 4) +# CHECK-ASM: encoding: [0x01,0x98] +ld16.w a0, (sp, 4) + +# CHECK-ASM: st16.b a0, (a0, 2) +# CHECK-ASM: encoding: [0x02,0xa0] +st16.b a0, (a0, 2) + +# CHECK-ASM: st16.h a0, (a0, 2) +# CHECK-ASM: encoding: [0x01,0xa8] +st16.h a0, (a0, 2) + +# CHECK-ASM: st16.w a0, (a0, 4) +# CHECK-ASM: encoding: [0x01,0xb0] +st16.w a0, (a0, 4) + +# CHECK-ASM: st16.w a0, (sp, 4) +# CHECK-ASM: encoding: [0x01,0xb8] +st16.w a0, (sp, 4) + +# CHECK-ASM: revb16 a3, l0 +# CHECK-ASM: encoding: [0xd2,0x78] +revb16 a3, l0 + +# CHECK-ASM: revh16 a3, l0 +# CHECK-ASM: encoding: [0xd3,0x78] +revh16 a3, l0 + +# CHECK-ASM: mvcv16 a3 +# CHECK-ASM: encoding: [0xc3,0x64] +mvcv16 a3 + +# CHECK-ASM: cmpne16 a3, l0 +# CHECK-ASM: encoding: [0x0e,0x65] +cmpne16 a3, l0 + +# CHECK-ASM: cmphs16 a3, l0 +# CHECK-ASM: encoding: [0x0c,0x65] +cmphs16 a3, l0 + +# CHECK-ASM: cmplt16 a3, l0 +# CHECK-ASM: encoding: [0x0d,0x65] +cmplt16 a3, l0 + +# CHECK-ASM: tst16 a3, l0 +# CHECK-ASM: encoding: [0x0e,0x69] +tst16 a3, l0 + +# CHECK-ASM: tstnbz16 a3 +# CHECK-ASM: encoding: [0x0f,0x68] +tstnbz16 a3 + +# CHECK-ASM: br16 .L.test +# CHECK-ASM: encoding: [A,0x04'A'] +# CHECK-ASM: fixup A - offset: 0, value: .L.test, kind: fixup_csky_pcrel_imm10_scale2 +.L.test: +br16 .L.test + +# CHECK-ASM: bt16 .L.test2 +# CHECK-ASM: encoding: [A,0x08'A'] +# CHECK-ASM: fixup A - offset: 0, value: .L.test2, kind: fixup_csky_pcrel_imm10_scale2 +.L.test2: +bt16 .L.test2 + +# CHECK-ASM: bf16 .L.test3 +# CHECK-ASM: encoding: [A,0x0c'A'] +# CHECK-ASM: fixup A - offset: 0, value: .L.test3, kind: fixup_csky_pcrel_imm10_scale2 +.L.test3: +bf16 .L.test3 + +# CHECK-ASM: jmp16 a3 +# CHECK-ASM: encoding: [0x0c,0x78] +jmp16 a3 + +# CHECK-ASM: jsr16 a3 +# CHECK-ASM: encoding: [0xcd,0x7b] +jsr16 a3 + +# CHECK-ASM: lrw16 a0, [.L.test14] +# CHECK-ASM: encoding: [A,A] +# CHECK-ASM: fixup A - offset: 0, value: .L.test14, kind: fixup_csky_pcrel_uimm7_scale4 +.L.test14: +lrw16 a0, [.L.test14] + +# RUN: not llvm-mc -triple csky -mattr=+e1 -mattr=+e2 -mattr=+btst16 --defsym=ERR=1 < %s 2>&1 | FileCheck %s + +.ifdef ERR + +# oimm8 +addi16 a0, 0 # CHECK: :[[#@LINE]]:12: error: operand must be sp register + +# oimm5 +cmphsi16 a0, 0 # CHECK: :[[#@LINE]]:14: error: immediate must be an integer in the range [1, 32] + +# uimm5 +lsli16 a0, a0, 32 # CHECK: :[[#@LINE]]:16: error: immediate must be an integer in the range [0, 31] + +# uimm5/uimm5_1/uimm5_2 +ld32.b a0, (a0, -1) # CHECK: :[[@LINE]]:17: error: immediate must be an integer in the range [0, 4095] +ld32.h a0, (a0, 4095) # CHECK: :[[@LINE]]:17: error: immediate must be a multiple of 2 bytes in the range [0, 4094] +ld32.h a0, (a0, 4093) # CHECK: :[[@LINE]]:17: error: immediate must be a multiple of 2 bytes in the range [0, 4094] +ld32.w a0, (a0, 4093) # CHECK: :[[@LINE]]:17: error: immediate must be a multiple of 4 bytes in the range [0, 4092] +ld32.w a0, (a0, 2) # CHECK: :[[@LINE]]:17: error: immediate must be a multiple of 4 bytes in the range [0, 4092] + +st32.b a0, (a0, -1) # CHECK: :[[@LINE]]:17: error: immediate must be an integer in the range [0, 4095] +st32.h a0, (a0, 4095) # CHECK: :[[@LINE]]:17: error: immediate must be a multiple of 2 bytes in the range [0, 4094] +st32.h a0, (a0, 4093) # CHECK: :[[@LINE]]:17: error: immediate must be a multiple of 2 bytes in the range [0, 4094] +st32.w a0, (a0, 4093) # CHECK: :[[@LINE]]:17: error: immediate must be a multiple of 4 bytes in the range [0, 4092] +st32.w a0, (a0, 2) # CHECK: :[[@LINE]]:17: error: immediate must be a multiple of 4 bytes in the range [0, 4092] + +# Invalid mnemonics +subs t0, t2, t1 # CHECK: :[[#@LINE]]:1: error: unrecognized instruction mnemonic +nandi t0, t2, 0 # CHECK: :[[#@LINE]]:1: error: unrecognized instruction mnemonic + +# Invalid register names +addi16 foo, sp, 10 # CHECK: :[[#@LINE]]:8: error: unknown operand +lsli16 a10, a2, 0x20 # CHECK: :[[#@LINE]]:8: error: unknown operand +asri16 x16, s0, s0 # CHECK: :[[#@LINE]]:8: error: unknown operand + +# Invalid operand types +lsli16 a0, 22, 220 # CHECK: :[[#@LINE]]:12: error: invalid operand for instruction +subu16 a0, a1, 1 # CHECK: :[[#@LINE]]:16: error: invalid operand for instruction + +# Too many operands +lsli16 a0, a1, 0x11, 0x60 # CHECK: :[[@LINE]]:22: error: invalid operand for instruction + +# Too few operands +lsli16 a0 # CHECK: :[[#@LINE]]:1: error: too few operands for instruction +lsl16 a0 # CHECK: :[[#@LINE]]:1: error: too few operands for instruction + +.endif \ No newline at end of file diff --git a/llvm/test/MC/RISCV/attribute-arch.s b/llvm/test/MC/RISCV/attribute-arch.s index a6365a1f5d00f..9edc0aacda037 100644 --- a/llvm/test/MC/RISCV/attribute-arch.s +++ b/llvm/test/MC/RISCV/attribute-arch.s @@ -66,8 +66,11 @@ .attribute arch, "rv32izbt" # CHECK: attribute 5, "rv32i2p0_zbt0p93" +.attribute arch, "rv32ifzfhmin" +# CHECK: attribute 5, "rv32i2p0_f2p0_zfhmin0p1" + .attribute arch, "rv32ifzfh" -# CHECK: attribute 5, "rv32i2p0_f2p0_zfh0p1" +# CHECK: attribute 5, "rv32i2p0_f2p0_zfh0p1_zfhmin0p1" .attribute arch, "rv32ivzvamo_zvlsseg" # CHECK: attribute 5, "rv32i2p0_v0p10_zvamo0p10_zvlsseg0p10" diff --git a/llvm/test/MC/RISCV/rv32zfhmin-invalid.s b/llvm/test/MC/RISCV/rv32zfhmin-invalid.s new file mode 100644 index 0000000000000..e1aff3cef204b --- /dev/null +++ b/llvm/test/MC/RISCV/rv32zfhmin-invalid.s @@ -0,0 +1,23 @@ +# RUN: not llvm-mc -triple riscv32 -mattr=+experimental-zfhmin < %s 2>&1 | \ +# RUN: FileCheck %s + +# Out of range immediates +## simm12 +flh ft1, -2049(a0) # CHECK: :[[@LINE]]:10: error: operand must be a symbol with %lo/%pcrel_lo/%tprel_lo modifier or an integer in the range [-2048, 2047] +fsh ft2, 2048(a1) # CHECK: :[[@LINE]]:10: error: operand must be a symbol with %lo/%pcrel_lo/%tprel_lo modifier or an integer in the range [-2048, 2047] + +# Memory operand not formatted correctly +flh ft1, a0, -200 # CHECK: :[[@LINE]]:14: error: invalid operand for instruction + +# Invalid register names +flh ft15, 100(a0) # CHECK: :[[@LINE]]:5: error: invalid operand for instruction +flh ft1, 100(a10) # CHECK: :[[@LINE]]:14: error: expected register + +# Integer registers where FP regs are expected +fmv.x.h fs7, a2 # CHECK: :[[@LINE]]:9: error: invalid operand for instruction + +# FP registers where integer regs are expected +fmv.h.x a8, ft2 # CHECK: :[[@LINE]]:9: error: invalid operand for instruction + +# Zfh instructions +fmadd.h f10, f11, f12, f13, dyn # CHECK: :[[@LINE]]:1: error: instruction requires the following: 'Zfh' (Half-Precision Floating-Point) diff --git a/llvm/test/MC/RISCV/rv32zfhmin-valid.s b/llvm/test/MC/RISCV/rv32zfhmin-valid.s new file mode 100644 index 0000000000000..661c69b5a49cd --- /dev/null +++ b/llvm/test/MC/RISCV/rv32zfhmin-valid.s @@ -0,0 +1,62 @@ +# RUN: llvm-mc %s -triple=riscv32 -mattr=+experimental-zfhmin,+d -riscv-no-aliases -show-encoding \ +# RUN: | FileCheck -check-prefixes=CHECK-ASM,CHECK-ASM-AND-OBJ %s +# RUN: llvm-mc %s -triple=riscv64 -mattr=+experimental-zfhmin,+d -riscv-no-aliases -show-encoding \ +# RUN: | FileCheck -check-prefixes=CHECK-ASM,CHECK-ASM-AND-OBJ %s +# RUN: llvm-mc -filetype=obj -triple=riscv32 -mattr=+experimental-zfhmin,+d < %s \ +# RUN: | llvm-objdump --mattr=+experimental-zfhmin,+d -M no-aliases -d -r - \ +# RUN: | FileCheck --check-prefix=CHECK-ASM-AND-OBJ %s +# RUN: llvm-mc -filetype=obj -triple=riscv64 -mattr=+experimental-zfhmin,+d < %s \ +# RUN: | llvm-objdump --mattr=+experimental-zfhmin,+d -M no-aliases -d -r - \ +# RUN: | FileCheck --check-prefix=CHECK-ASM-AND-OBJ %s + +# CHECK-ASM-AND-OBJ: flh ft0, 12(a0) +# CHECK-ASM: encoding: [0x07,0x10,0xc5,0x00] +flh f0, 12(a0) +# CHECK-ASM-AND-OBJ: flh ft1, 4(ra) +# CHECK-ASM: encoding: [0x87,0x90,0x40,0x00] +flh f1, +4(ra) +# CHECK-ASM-AND-OBJ: flh ft2, -2048(a3) +# CHECK-ASM: encoding: [0x07,0x91,0x06,0x80] +flh f2, -2048(x13) +# CHECK-ASM-AND-OBJ: flh ft3, -2048(s1) +# CHECK-ASM: encoding: [0x87,0x91,0x04,0x80] +flh f3, %lo(2048)(s1) +# CHECK-ASM-AND-OBJ: flh ft4, 2047(s2) +# CHECK-ASM: encoding: [0x07,0x12,0xf9,0x7f] +flh f4, 2047(s2) +# CHECK-ASM-AND-OBJ: flh ft5, 0(s3) +# CHECK-ASM: encoding: [0x87,0x92,0x09,0x00] +flh f5, 0(s3) + +# CHECK-ASM-AND-OBJ: fsh ft6, 2047(s4) +# CHECK-ASM: encoding: [0xa7,0x1f,0x6a,0x7e] +fsh f6, 2047(s4) +# CHECK-ASM-AND-OBJ: fsh ft7, -2048(s5) +# CHECK-ASM: encoding: [0x27,0x90,0x7a,0x80] +fsh f7, -2048(s5) +# CHECK-ASM-AND-OBJ: fsh fs0, -2048(s6) +# CHECK-ASM: encoding: [0x27,0x10,0x8b,0x80] +fsh f8, %lo(2048)(s6) +# CHECK-ASM-AND-OBJ: fsh fs1, 999(s7) +# CHECK-ASM: encoding: [0xa7,0x93,0x9b,0x3e] +fsh f9, 999(s7) + +# CHECK-ASM-AND-OBJ: fmv.x.h a2, fs7 +# CHECK-ASM: encoding: [0x53,0x86,0x0b,0xe4] +fmv.x.h a2, fs7 +# CHECK-ASM-AND-OBJ: fmv.h.x ft1, a6 +# CHECK-ASM: encoding: [0xd3,0x00,0x08,0xf4] +fmv.h.x ft1, a6 + +# CHECK-ASM-AND-OBJ: fcvt.s.h fa0, ft0 +# CHECK-ASM: encoding: [0x53,0x05,0x20,0x40] +fcvt.s.h fa0, ft0 +# CHECK-ASM-AND-OBJ: fcvt.h.s ft2, fa2 +# CHECK-ASM: encoding: [0x53,0x71,0x06,0x44] +fcvt.h.s ft2, fa2 +# CHECK-ASM-AND-OBJ: fcvt.d.h fa0, ft0 +# CHECK-ASM: encoding: [0x53,0x05,0x20,0x42] +fcvt.d.h fa0, ft0 +# CHECK-ASM-AND-OBJ: fcvt.h.d ft2, fa2 +# CHECK-ASM: encoding: [0x53,0x71,0x16,0x44] +fcvt.h.d ft2, fa2 diff --git a/llvm/test/MC/RISCV/rvv/aliases.s b/llvm/test/MC/RISCV/rvv/aliases.s index ee0de452ab6b7..3bf55fa405fe3 100644 --- a/llvm/test/MC/RISCV/rvv/aliases.s +++ b/llvm/test/MC/RISCV/rvv/aliases.s @@ -90,3 +90,12 @@ vfredsum.vs v8, v4, v20, v0.t # ALIAS: vfwredusum.vs v8, v4, v20, v0.t # encoding: [0x57,0x14,0x4a,0xc4] # NO-ALIAS: vfwredusum.vs v8, v4, v20, v0.t # encoding: [0x57,0x14,0x4a,0xc4] vfwredsum.vs v8, v4, v20, v0.t +# ALIAS: vcpop.m a2, v4, v0.t # encoding: [0x57,0x26,0x48,0x40] +# NO-ALIAS: vcpop.m a2, v4, v0.t # encoding: [0x57,0x26,0x48,0x40] +vpopc.m a2, v4, v0.t +# ALIAS: vmandn.mm v8, v4, v20 # encoding: [0x57,0x24,0x4a,0x62] +# NO-ALIAS: vmandn.mm v8, v4, v20 # encoding: [0x57,0x24,0x4a,0x62] +vmandnot.mm v8, v4, v20 +# ALIAS: vmorn.mm v8, v4, v20 # encoding: [0x57,0x24,0x4a,0x72] +# NO-ALIAS: vmorn.mm v8, v4, v20 # encoding: [0x57,0x24,0x4a,0x72] +vmornot.mm v8, v4, v20 diff --git a/llvm/test/MC/RISCV/rvv/compare.s b/llvm/test/MC/RISCV/rvv/compare.s index 28bc8b55369ae..e236a99e16d79 100644 --- a/llvm/test/MC/RISCV/rvv/compare.s +++ b/llvm/test/MC/RISCV/rvv/compare.s @@ -421,7 +421,7 @@ vmsge.vx v8, v4, a0, v0.t vmsgeu.vx v0, v4, a0, v0.t, v2 # CHECK-INST: vmsltu.vx v2, v4, a0, v0.t -# CHECK-INST: vmandnot.mm v0, v0, v2 +# CHECK-INST: vmandn.mm v0, v0, v2 # CHECK-ENCODING: [0x57,0x41,0x45,0x68] # CHECK-ENCODING: [0x57,0x20,0x01,0x62] # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) @@ -430,7 +430,7 @@ vmsgeu.vx v0, v4, a0, v0.t, v2 vmsge.vx v0, v4, a0, v0.t, v2 # CHECK-INST: vmslt.vx v2, v4, a0, v0.t -# CHECK-INST: vmandnot.mm v0, v0, v2 +# CHECK-INST: vmandn.mm v0, v0, v2 # CHECK-ENCODING: [0x57,0x41,0x45,0x6c] # CHECK-ENCODING: [0x57,0x20,0x01,0x62] # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) @@ -439,8 +439,8 @@ vmsge.vx v0, v4, a0, v0.t, v2 vmsgeu.vx v9, v4, a0, v0.t, v2 # CHECK-INST: vmsltu.vx v2, v4, a0 -# CHECK-INST: vmandnot.mm v2, v0, v2 -# CHECK-INST: vmandnot.mm v9, v9, v0 +# CHECK-INST: vmandn.mm v2, v0, v2 +# CHECK-INST: vmandn.mm v9, v9, v0 # CHECK-INST: vmor.mm v9, v2, v9 # CHECK-ENCODING: [0x57,0x41,0x45,0x6a] # CHECK-ENCODING: [0x57,0x21,0x01,0x62] @@ -454,8 +454,8 @@ vmsgeu.vx v9, v4, a0, v0.t, v2 vmsge.vx v8, v4, a0, v0.t, v2 # CHECK-INST: vmslt.vx v2, v4, a0 -# CHECK-INST: vmandnot.mm v2, v0, v2 -# CHECK-INST: vmandnot.mm v8, v8, v0 +# CHECK-INST: vmandn.mm v2, v0, v2 +# CHECK-INST: vmandn.mm v8, v8, v0 # CHECK-INST: vmor.mm v8, v2, v8 # CHECK-ENCODING: [0x57,0x41,0x45,0x6e] # CHECK-ENCODING: [0x57,0x21,0x01,0x62] diff --git a/llvm/test/MC/RISCV/rvv/mask.s b/llvm/test/MC/RISCV/rvv/mask.s index ef029388da9c3..e50f19dca2bb5 100644 --- a/llvm/test/MC/RISCV/rvv/mask.s +++ b/llvm/test/MC/RISCV/rvv/mask.s @@ -20,8 +20,8 @@ vmnand.mm v8, v4, v20 # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) # CHECK-UNKNOWN: 57 24 4a 76 -vmandnot.mm v8, v4, v20 -# CHECK-INST: vmandnot.mm v8, v4, v20 +vmandn.mm v8, v4, v20 +# CHECK-INST: vmandn.mm v8, v4, v20 # CHECK-ENCODING: [0x57,0x24,0x4a,0x62] # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) # CHECK-UNKNOWN: 57 24 4a 62 @@ -44,8 +44,8 @@ vmnor.mm v8, v4, v20 # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) # CHECK-UNKNOWN: 57 24 4a 7a -vmornot.mm v8, v4, v20 -# CHECK-INST: vmornot.mm v8, v4, v20 +vmorn.mm v8, v4, v20 +# CHECK-INST: vmorn.mm v8, v4, v20 # CHECK-ENCODING: [0x57,0x24,0x4a,0x72] # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) # CHECK-UNKNOWN: 57 24 4a 72 @@ -56,14 +56,14 @@ vmxnor.mm v8, v4, v20 # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) # CHECK-UNKNOWN: 57 24 4a 7e -vpopc.m a2, v4, v0.t -# CHECK-INST: vpopc.m a2, v4, v0.t +vcpop.m a2, v4, v0.t +# CHECK-INST: vcpop.m a2, v4, v0.t # CHECK-ENCODING: [0x57,0x26,0x48,0x40] # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) # CHECK-UNKNOWN: 57 26 48 40 -vpopc.m a2, v4 -# CHECK-INST: vpopc.m a2, v4 +vcpop.m a2, v4 +# CHECK-INST: vcpop.m a2, v4 # CHECK-ENCODING: [0x57,0x26,0x48,0x42] # CHECK-ERROR: instruction requires the following: 'V' (Vector Instructions) # CHECK-UNKNOWN: 57 26 48 42 diff --git a/llvm/test/MC/WebAssembly/basic-assembly.s b/llvm/test/MC/WebAssembly/basic-assembly.s index 44640824504da..b86172ba9e24c 100644 --- a/llvm/test/MC/WebAssembly/basic-assembly.s +++ b/llvm/test/MC/WebAssembly/basic-assembly.s @@ -1,9 +1,10 @@ -# RUN: llvm-mc -triple=wasm32-unknown-unknown -mattr=+reference-types,atomics,+simd128,+nontrapping-fptoint,+exception-handling < %s | FileCheck %s +# RUN: llvm-mc -triple=wasm32-unknown-unknown -mattr=+tail-call,+reference-types,atomics,+simd128,+nontrapping-fptoint,+exception-handling < %s | FileCheck %s # Check that it converts to .o without errors, but don't check any output: -# RUN: llvm-mc -triple=wasm32-unknown-unknown -filetype=obj -mattr=+reference-types,+atomics,+simd128,+nontrapping-fptoint,+exception-handling -o %t.o < %s +# RUN: llvm-mc -triple=wasm32-unknown-unknown -filetype=obj -mattr=+tail-call,+reference-types,+atomics,+simd128,+nontrapping-fptoint,+exception-handling -o %t.o < %s .functype something1 () -> () .functype something2 (i64) -> (i32, f64) +.functype something3 () -> (i32) .globaltype __stack_pointer, i32 empty_func: @@ -86,6 +87,17 @@ test0: else end_if drop + block void + i32.const 2 + return + end_block + block void + return_call something3 + end_block + block void + i32.const 3 + return_call_indirect () -> (i32) + end_block local.get 4 local.get 5 f32x4.add @@ -215,6 +227,17 @@ empty_fref_table: # CHECK-NEXT: else # CHECK-NEXT: end_if # CHECK-NEXT: drop +# CHECK-NEXT: block +# CHECK-NEXT: i32.const 2 +# CHECK-NEXT: return +# CHECK-NEXT: end_block +# CHECK-NEXT: block +# CHECK-NEXT: return_call something3 +# CHECK-NEXT: end_block +# CHECK-NEXT: block +# CHECK-NEXT: i32.const 3 +# CHECK-NEXT: return_call_indirect __indirect_function_table, () -> (i32) +# CHECK-NEXT: end_block # CHECK-NEXT: local.get 4 # CHECK-NEXT: local.get 5 # CHECK-NEXT: f32x4.add diff --git a/llvm/test/Object/X86/nm-ir.ll b/llvm/test/Object/X86/nm-ir.ll index c90f67b15160d..e57c6d9a11c6e 100644 --- a/llvm/test/Object/X86/nm-ir.ll +++ b/llvm/test/Object/X86/nm-ir.ll @@ -32,12 +32,12 @@ module asm ".long undef_asm_sym" @a1 = alias i32, i32* @g1 @a2 = internal alias i32, i32* @g1 -define void @f1() { +define void ()* @f1() { call void @f5() - ret void + ret void ()* null } -@ifunc_f1 = ifunc void (), void ()* @f1 +@ifunc_f1 = ifunc void (), void ()* ()* @f1 define internal void @f2() { ret void diff --git a/llvm/test/Other/ChangePrinters/DotCfg/lit.local.cfg b/llvm/test/Other/ChangePrinters/DotCfg/lit.local.cfg new file mode 100644 index 0000000000000..a739faf919daa --- /dev/null +++ b/llvm/test/Other/ChangePrinters/DotCfg/lit.local.cfg @@ -0,0 +1,4 @@ +import os + +if not os.path.exists('/usr/bin/dot'): + config.unsupported = True diff --git a/llvm/test/Other/ChangePrinters/DotCfg/print-changed-dot-cfg.ll b/llvm/test/Other/ChangePrinters/DotCfg/print-changed-dot-cfg.ll new file mode 100644 index 0000000000000..cbd9d3013d97c --- /dev/null +++ b/llvm/test/Other/ChangePrinters/DotCfg/print-changed-dot-cfg.ll @@ -0,0 +1,311 @@ +; Simple checks of -print-changed=dot-cfg +; +; Note that (mostly) only the banners are checked. +; +; Simple functionality check. +; RUN: rm -rf %t && mkdir -p %t +; RUN: opt -disable-verify -S -print-changed=dot-cfg -passes=instsimplify -dot-cfg-dir=%t < %s -o /dev/null +; RUN: ls %t/*.pdf %t/passes.html | count 4 +; RUN: FileCheck %s -input-file=%t/passes.html --check-prefix=CHECK-DOT-CFG-SIMPLE +; +; Check that only the passes that change the IR are printed and that the +; others (including g) are filtered out. +; RUN: rm -rf %t && mkdir -p %t +; RUN: opt -disable-verify -S -print-changed=dot-cfg -passes=instsimplify -filter-print-funcs=f -dot-cfg-dir=%t < %s -o /dev/null +; RUN: ls %t/*.pdf %t/passes.html | count 3 +; RUN: FileCheck %s -input-file=%t/passes.html --check-prefix=CHECK-DOT-CFG-FUNC-FILTER +; +; Check that the reporting of IRs respects is not affected by +; -print-module-scope +; RUN: rm -rf %t && mkdir -p %t +; RUN: opt -disable-verify -S -print-changed=dot-cfg -passes=instsimplify -print-module-scope -dot-cfg-dir=%t < %s -o /dev/null +; RUN: ls %t/*.pdf %t/passes.html | count 4 +; RUN: FileCheck %s -input-file=%t/passes.html --check-prefix=CHECK-DOT-CFG-PRINT-MOD-SCOPE +; +; Check that reporting of multiple functions happens +; RUN: rm -rf %t && mkdir -p %t +; RUN: opt -disable-verify -S -print-changed=dot-cfg -passes=instsimplify -filter-print-funcs="f,g" -dot-cfg-dir=%t < %s -o /dev/null +; RUN: ls %t/*.pdf %t/passes.html | count 4 +; RUN: FileCheck %s -input-file=%t/passes.html --check-prefix=CHECK-DOT-CFG-FILTER-MULT-FUNC +; +; Check that the reporting of IRs respects -filter-passes +; RUN: rm -rf %t && mkdir -p %t +; RUN: opt -disable-verify -S -print-changed=dot-cfg -passes="instsimplify,no-op-function" -filter-passes="NoOpFunctionPass" -dot-cfg-dir=%t < %s -o /dev/null +; RUN: ls %t/*.pdf %t/passes.html | count 2 +; RUN: FileCheck %s -input-file=%t/passes.html --check-prefix=CHECK-DOT-CFG-FILTER-PASSES +; +; Check that the reporting of IRs respects -filter-passes with multiple passes +; RUN: rm -rf %t && mkdir -p %t +; RUN: opt -disable-verify -S -print-changed=dot-cfg -passes="instsimplify,no-op-function" -filter-passes="NoOpFunctionPass,InstSimplifyPass" -dot-cfg-dir=%t < %s -o /dev/null +; RUN: ls %t/*.pdf %t/passes.html | count 4 +; RUN: FileCheck %s -input-file=%t/passes.html --check-prefix=CHECK-DOT-CFG-FILTER-MULT-PASSES +; +; Check that the reporting of IRs respects both -filter-passes and -filter-print-funcs +; RUN: rm -rf %t && mkdir -p %t +; RUN: opt -disable-verify -S -print-changed=dot-cfg -passes="instsimplify,no-op-function" -filter-passes="NoOpFunctionPass,InstSimplifyPass" -filter-print-funcs=f -dot-cfg-dir=%t < %s -o /dev/null +; RUN: ls %t/*.pdf %t/passes.html | count 3 +; RUN: FileCheck %s -input-file=%t/passes.html --check-prefix=CHECK-DOT-CFG-FILTER-FUNC-PASSES +; +; Check that repeated passes that change the IR are printed and that the +; others (including g) are filtered out. Note that only the first time +; instsimplify is run on f will result in changes +; RUN: rm -rf %t && mkdir -p %t +; RUN: opt -disable-verify -S -print-changed=dot-cfg -passes="instsimplify,instsimplify" -filter-print-funcs=f -dot-cfg-dir=%t < %s -o /dev/null +; RUN: ls %t/*.pdf %t/passes.html | count 3 +; RUN: FileCheck %s -input-file=%t/passes.html --check-prefix=CHECK-DOT-CFG-MULT-PASSES-FILTER-FUNC +; +; Simple checks of -print-changed=dot-cfg-quiet +; +; Note that (mostly) only the banners are checked. +; +; Simple functionality check. +; RUN: rm -rf %t && mkdir -p %t +; RUN: opt -S -print-changed=dot-cfg-quiet -passes=instsimplify -dot-cfg-dir=%t < %s -o /dev/null +; RUN: ls %t/*.pdf %t/passes.html | count 3 +; RUN: FileCheck %s -input-file=%t/passes.html --check-prefix=CHECK-DOT-CFG-QUIET-SIMPLE --allow-empty +; +; Check that only the passes that change the IR are printed and that the +; others (including g) are filtered out. +; RUN: rm -rf %t && mkdir -p %t +; RUN: opt -S -print-changed=dot-cfg-quiet -passes=instsimplify -filter-print-funcs=f -dot-cfg-dir=%t < %s -o /dev/null +; RUN: ls %t/*.pdf %t/passes.html | count 2 +; RUN: FileCheck %s -input-file=%t/passes.html --check-prefix=CHECK-DOT-CFG-QUIET-FUNC-FILTER +; +; Check that the reporting of IRs respects is not affected by +; -print-module-scope +; RUN: rm -rf %t && mkdir -p %t +; RUN: opt -S -print-changed=dot-cfg-quiet -passes=instsimplify -print-module-scope -dot-cfg-dir=%t < %s -o /dev/null +; RUN: ls %t/*.pdf %t/passes.html | count 3 +; RUN: FileCheck %s -input-file=%t/passes.html --check-prefix=CHECK-DOT-CFG-QUIET-PRINT-MOD-SCOPE +; +; Check that reporting of multiple functions happens +; RUN: rm -rf %t && mkdir -p %t +; RUN: opt -S -print-changed=dot-cfg-quiet -passes=instsimplify -filter-print-funcs="f,g" -dot-cfg-dir=%t < %s -o /dev/null +; RUN: ls %t/*.pdf %t/passes.html | count 3 +; RUN: FileCheck %s -input-file=%t/passes.html --check-prefix=CHECK-DOT-CFG-QUIET-FILTER-MULT-FUNC +; +; Check that the reporting of IRs respects -filter-passes +; RUN: rm -rf %t && mkdir -p %t +; RUN: opt -S -print-changed=dot-cfg-quiet -passes="instsimplify,no-op-function" -filter-passes="NoOpFunctionPass" -dot-cfg-dir=%t < %s -o /dev/null +; RUN: FileCheck %s -input-file=%t/passes.html --check-prefix=CHECK-DOT-CFG-QUIET-FILTER-PASSES-NONE --allow-empty +; +; Check that the reporting of IRs respects -filter-passes with multiple passes +; RUN: rm -rf %t && mkdir -p %t +; RUN: opt -S -print-changed=dot-cfg-quiet -passes="instsimplify,no-op-function" -filter-passes="NoOpFunctionPass,InstSimplifyPass" -dot-cfg-dir=%t < %s -o /dev/null +; RUN: ls %t/*.pdf %t/passes.html | count 3 +; RUN: FileCheck %s -input-file=%t/passes.html --check-prefix=CHECK-DOT-CFG-QUIET-FILTER-MULT-PASSES +; +; Check that the reporting of IRs respects both -filter-passes and -filter-print-funcs +; RUN: rm -rf %t && mkdir -p %t +; RUN: opt -S -print-changed=dot-cfg-quiet -passes="instsimplify,no-op-function" -filter-passes="NoOpFunctionPass,InstSimplifyPass" -filter-print-funcs=f -dot-cfg-dir=%t < %s -o /dev/null +; RUN: ls %t/*.pdf %t/passes.html | count 2 +; RUN: FileCheck %s -input-file=%t/passes.html --check-prefix=CHECK-DOT-CFG-QUIET-FILTER-FUNC-PASSES +; +; Check that repeated passes that change the IR are printed and that the +; others (including g) are filtered out. Note that only the first time +; instsimplify is run on f will result in changes +; RUN: rm -rf %t && mkdir -p %t +; RUN: opt -S -print-changed=dot-cfg-quiet -passes="instsimplify,instsimplify" -filter-print-funcs=f -dot-cfg-dir=%t < %s -o /dev/null +; RUN: ls %t/*.pdf %t/passes.html | count 2 +; RUN: FileCheck %s -input-file=%t/passes.html --check-prefix=CHECK-DOT-CFG-QUIET-MULT-PASSES-FILTER-FUNC + +define i32 @g() { +entry: + %a = add i32 2, 3 + ret i32 %a +} + +define i32 @f() { +entry: + %a = add i32 2, 3 + ret i32 %a +} + +; CHECK-DOT-CFG-SIMPLE-FILES: passes.html diff_0.pdf diff_1.pdf diff_3.pdf +; CHECK-DOT-CFG-SIMPLE: passes.html +; CHECK-DOT-CFG-SIMPLE-NEXT: +; CHECK-DOT-CFG-SIMPLE-NEXT:
+; CHECK-DOT-CFG-SIMPLE-NEXT:

+; CHECK-DOT-CFG-SIMPLE-NEXT: 0. Initial IR
+; CHECK-DOT-CFG-SIMPLE-NEXT:

+; CHECK-DOT-CFG-SIMPLE-NEXT:

+; CHECK-DOT-CFG-SIMPLE-NEXT: 1. Pass InstSimplifyPass on g
+; CHECK-DOT-CFG-SIMPLE-NEXT:

+; CHECK-DOT-CFG-SIMPLE-NEXT: 2. PassManager<llvm::Function> on g ignored
+; CHECK-DOT-CFG-SIMPLE-NEXT: 3. Pass InstSimplifyPass on f
+; CHECK-DOT-CFG-SIMPLE-NEXT:

+; CHECK-DOT-CFG-SIMPLE-NEXT: 4. PassManager<llvm::Function> on f ignored
+; CHECK-DOT-CFG-SIMPLE-NEXT: 5. ModuleToFunctionPassAdaptor on [module] ignored
+; CHECK-DOT-CFG-SIMPLE-NEXT: 6. Pass PrintModulePass on [module] omitted because no change
+; CHECK-DOT-CFG-SIMPLE-NEXT: + +; CHECK-DOT-CFG-FUNC-FILTER: passes.html +; CHECK-DOT-CFG-FUNC-FILTER-NEXT: 0. Pass InstSimplifyPass on g filtered out
+; CHECK-DOT-CFG-FUNC-FILTER-NEXT: 1. PassManager<llvm::Function> on g ignored
+; CHECK-DOT-CFG-FUNC-FILTER-NEXT: +; CHECK-DOT-CFG-FUNC-FILTER-NEXT:
+; CHECK-DOT-CFG-FUNC-FILTER-NEXT:

+; CHECK-DOT-CFG-FUNC-FILTER-NEXT: 2. Initial IR
+; CHECK-DOT-CFG-FUNC-FILTER-NEXT:

+; CHECK-DOT-CFG-FUNC-FILTER-NEXT:

+; CHECK-DOT-CFG-FUNC-FILTER-NEXT: 3. Pass InstSimplifyPass on f
+; CHECK-DOT-CFG-FUNC-FILTER-NEXT:

+; CHECK-DOT-CFG-FUNC-FILTER-NEXT: 4. PassManager<llvm::Function> on f ignored
+; CHECK-DOT-CFG-FUNC-FILTER-NEXT: 5. ModuleToFunctionPassAdaptor on [module] ignored
+; CHECK-DOT-CFG-FUNC-FILTER-NEXT: 6. Pass PrintModulePass on [module] omitted because no change
+; CHECK-DOT-CFG-FUNC-FILTER-NEXT: + +; CHECK-DOT-CFG-PRINT-MOD-SCOPE: passes.html +; CHECK-DOT-CFG-PRINT-MOD-SCOPE-NEXT: +; CHECK-DOT-CFG-PRINT-MOD-SCOPE-NEXT:
+; CHECK-DOT-CFG-PRINT-MOD-SCOPE-NEXT:

+; CHECK-DOT-CFG-PRINT-MOD-SCOPE-NEXT: 0. Initial IR
+; CHECK-DOT-CFG-PRINT-MOD-SCOPE-NEXT:

+; CHECK-DOT-CFG-PRINT-MOD-SCOPE-NEXT:

+; CHECK-DOT-CFG-PRINT-MOD-SCOPE-NEXT: 1. Pass InstSimplifyPass on g
+; CHECK-DOT-CFG-PRINT-MOD-SCOPE-NEXT:

+; CHECK-DOT-CFG-PRINT-MOD-SCOPE-NEXT: 2. PassManager<llvm::Function> on g ignored
+; CHECK-DOT-CFG-PRINT-MOD-SCOPE-NEXT: 3. Pass InstSimplifyPass on f
+; CHECK-DOT-CFG-PRINT-MOD-SCOPE-NEXT:

+; CHECK-DOT-CFG-PRINT-MOD-SCOPE-NEXT: 4. PassManager<llvm::Function> on f ignored
+; CHECK-DOT-CFG-PRINT-MOD-SCOPE-NEXT: 5. ModuleToFunctionPassAdaptor on [module] ignored
+; CHECK-DOT-CFG-PRINT-MOD-SCOPE-NEXT: 6. Pass PrintModulePass on [module] omitted because no change
+; CHECK-DOT-CFG-PRINT-MOD-SCOPE-NEXT: + +; CHECK-DOT-CFG-FILTER-MULT-FUNC: passes.html +; CHECK-DOT-CFG-FILTER-MULT-FUNC-NEXT: +; CHECK-DOT-CFG-FILTER-MULT-FUNC-NEXT:
+; CHECK-DOT-CFG-FILTER-MULT-FUNC-NEXT:

+; CHECK-DOT-CFG-FILTER-MULT-FUNC-NEXT: 0. Initial IR
+; CHECK-DOT-CFG-FILTER-MULT-FUNC-NEXT:

+; CHECK-DOT-CFG-FILTER-MULT-FUNC-NEXT:

+; CHECK-DOT-CFG-FILTER-MULT-FUNC-NEXT: 1. Pass InstSimplifyPass on g
+; CHECK-DOT-CFG-FILTER-MULT-FUNC-NEXT:

+; CHECK-DOT-CFG-FILTER-MULT-FUNC-NEXT: 2. PassManager<llvm::Function> on g ignored
+; CHECK-DOT-CFG-FILTER-MULT-FUNC-NEXT: 3. Pass InstSimplifyPass on f
+; CHECK-DOT-CFG-FILTER-MULT-FUNC-NEXT:

+; CHECK-DOT-CFG-FILTER-MULT-FUNC-NEXT: 4. PassManager<llvm::Function> on f ignored
+; CHECK-DOT-CFG-FILTER-MULT-FUNC-NEXT: 5. ModuleToFunctionPassAdaptor on [module] ignored
+; CHECK-DOT-CFG-FILTER-MULT-FUNC-NEXT: 6. Pass PrintModulePass on [module] omitted because no change
+; CHECK-DOT-CFG-FILTER-MULT-FUNC-NEXT: + +; CHECK-DOT-CFG-FILTER-PASSES: passes.html +; CHECK-DOT-CFG-FILTER-PASSES-NEXT: 0. Pass InstSimplifyPass on g filtered out
+; CHECK-DOT-CFG-FILTER-PASSES-NEXT: +; CHECK-DOT-CFG-FILTER-PASSES-NEXT:
+; CHECK-DOT-CFG-FILTER-PASSES-NEXT:

+; CHECK-DOT-CFG-FILTER-PASSES-NEXT: 1. Initial IR
+; CHECK-DOT-CFG-FILTER-PASSES-NEXT:

+; CHECK-DOT-CFG-FILTER-PASSES-NEXT:

+; CHECK-DOT-CFG-FILTER-PASSES-NEXT: 2. Pass NoOpFunctionPass on g omitted because no change
+; CHECK-DOT-CFG-FILTER-PASSES-NEXT: 3. PassManager<llvm::Function> on g ignored
+; CHECK-DOT-CFG-FILTER-PASSES-NEXT: 4. Pass InstSimplifyPass on f filtered out
+; CHECK-DOT-CFG-FILTER-PASSES-NEXT: 5. Pass NoOpFunctionPass on f omitted because no change
+; CHECK-DOT-CFG-FILTER-PASSES-NEXT: 6. PassManager<llvm::Function> on f ignored
+; CHECK-DOT-CFG-FILTER-PASSES-NEXT: 7. ModuleToFunctionPassAdaptor on [module] ignored
+; CHECK-DOT-CFG-FILTER-PASSES-NEXT: 8. Pass PrintModulePass on [module] filtered out
+; CHECK-DOT-CFG-FILTER-PASSES-NEXT: + + + +; CHECK-DOT-CFG-FILTER-MULT-PASSES: passes.html +; CHECK-DOT-CFG-FILTER-MULT-PASSES-NEXT: +; CHECK-DOT-CFG-FILTER-MULT-PASSES-NEXT:
+; CHECK-DOT-CFG-FILTER-MULT-PASSES-NEXT:

+; CHECK-DOT-CFG-FILTER-MULT-PASSES-NEXT: 0. Initial IR
+; CHECK-DOT-CFG-FILTER-MULT-PASSES-NEXT:

+; CHECK-DOT-CFG-FILTER-MULT-PASSES-NEXT:

+; CHECK-DOT-CFG-FILTER-MULT-PASSES-NEXT: 1. Pass InstSimplifyPass on g
+; CHECK-DOT-CFG-FILTER-MULT-PASSES-NEXT:

+; CHECK-DOT-CFG-FILTER-MULT-PASSES-NEXT: 2. Pass NoOpFunctionPass on g omitted because no change
+; CHECK-DOT-CFG-FILTER-MULT-PASSES-NEXT: 3. PassManager<llvm::Function> on g ignored
+; CHECK-DOT-CFG-FILTER-MULT-PASSES-NEXT: 4. Pass InstSimplifyPass on f
+; CHECK-DOT-CFG-FILTER-MULT-PASSES-NEXT:

+; CHECK-DOT-CFG-FILTER-MULT-PASSES-NEXT: 5. Pass NoOpFunctionPass on f omitted because no change
+; CHECK-DOT-CFG-FILTER-MULT-PASSES-NEXT: 6. PassManager<llvm::Function> on f ignored
+; CHECK-DOT-CFG-FILTER-MULT-PASSES-NEXT: 7. ModuleToFunctionPassAdaptor on [module] ignored
+; CHECK-DOT-CFG-FILTER-MULT-PASSES-NEXT: 8. Pass PrintModulePass on [module] filtered out
+; CHECK-DOT-CFG-FILTER-MULT-PASSES-NEXT: + +; CHECK-DOT-CFG-FILTER-FUNC-PASSES: passes.html +; CHECK-DOT-CFG-FILTER-FUNC-PASSES-NEXT: 0. Pass InstSimplifyPass on g filtered out
+; CHECK-DOT-CFG-FILTER-FUNC-PASSES-NEXT: 1. Pass NoOpFunctionPass on g filtered out
+; CHECK-DOT-CFG-FILTER-FUNC-PASSES-NEXT: 2. PassManager<llvm::Function> on g ignored
+; CHECK-DOT-CFG-FILTER-FUNC-PASSES-NEXT: +; CHECK-DOT-CFG-FILTER-FUNC-PASSES-NEXT:
+; CHECK-DOT-CFG-FILTER-FUNC-PASSES-NEXT:

+; CHECK-DOT-CFG-FILTER-FUNC-PASSES-NEXT: 3. Initial IR
+; CHECK-DOT-CFG-FILTER-FUNC-PASSES-NEXT:

+; CHECK-DOT-CFG-FILTER-FUNC-PASSES-NEXT:

+; CHECK-DOT-CFG-FILTER-FUNC-PASSES-NEXT: 4. Pass InstSimplifyPass on f
+; CHECK-DOT-CFG-FILTER-FUNC-PASSES-NEXT:

+; CHECK-DOT-CFG-FILTER-FUNC-PASSES-NEXT: 5. Pass NoOpFunctionPass on f omitted because no change
+; CHECK-DOT-CFG-FILTER-FUNC-PASSES-NEXT: 6. PassManager<llvm::Function> on f ignored
+; CHECK-DOT-CFG-FILTER-FUNC-PASSES-NEXT: 7. ModuleToFunctionPassAdaptor on [module] ignored
+; CHECK-DOT-CFG-FILTER-FUNC-PASSES-NEXT: 8. Pass PrintModulePass on [module] filtered out
+; CHECK-DOT-CFG-FILTER-FUNC-PASSES-NEXT: + + +; CHECK-DOT-CFG-MULT-PASSES-FILTER-FUNC: passes.html +; CHECK-DOT-CFG-MULT-PASSES-FILTER-FUNC-NEXT: 0. Pass InstSimplifyPass on g filtered out
+; CHECK-DOT-CFG-MULT-PASSES-FILTER-FUNC-NEXT: 1. Pass InstSimplifyPass on g filtered out
+; CHECK-DOT-CFG-MULT-PASSES-FILTER-FUNC-NEXT: 2. PassManager<llvm::Function> on g ignored
+; CHECK-DOT-CFG-MULT-PASSES-FILTER-FUNC-NEXT: +; CHECK-DOT-CFG-MULT-PASSES-FILTER-FUNC-NEXT:
+; CHECK-DOT-CFG-MULT-PASSES-FILTER-FUNC-NEXT:

+; CHECK-DOT-CFG-MULT-PASSES-FILTER-FUNC-NEXT: 3. Initial IR
+; CHECK-DOT-CFG-MULT-PASSES-FILTER-FUNC-NEXT:

+; CHECK-DOT-CFG-MULT-PASSES-FILTER-FUNC-NEXT:

+; CHECK-DOT-CFG-MULT-PASSES-FILTER-FUNC-NEXT: 4. Pass InstSimplifyPass on f
+; CHECK-DOT-CFG-MULT-PASSES-FILTER-FUNC-NEXT:

+; CHECK-DOT-CFG-MULT-PASSES-FILTER-FUNC-NEXT: 5. Pass InstSimplifyPass on f omitted because no change
+; CHECK-DOT-CFG-MULT-PASSES-FILTER-FUNC-NEXT: 6. PassManager<llvm::Function> on f ignored
+; CHECK-DOT-CFG-MULT-PASSES-FILTER-FUNC-NEXT: 7. ModuleToFunctionPassAdaptor on [module] ignored
+; CHECK-DOT-CFG-MULT-PASSES-FILTER-FUNC-NEXT: 8. Pass PrintModulePass on [module] omitted because no change
+; CHECK-DOT-CFG-MULT-PASSES-FILTER-FUNC-NEXT: + +; CHECK-DOT-CFG-QUIET-SIMPLE: passes.html +; CHECK-DOT-CFG-QUIET-SIMPLE-NEXT: 0. Pass InstSimplifyPass on g
+; CHECK-DOT-CFG-QUIET-SIMPLE-NEXT:

+; CHECK-DOT-CFG-QUIET-SIMPLE-NEXT: 1. Pass InstSimplifyPass on f
+; CHECK-DOT-CFG-QUIET-SIMPLE-NEXT:

+; CHECK-DOT-CFG-QUIET-SIMPLE-NEXT: + +; CHECK-DOT-CFG-QUIET-FUNC-FILTER: passes.html +; CHECK-DOT-CFG-QUIET-FUNC-FILTER-NEXT: 0. Pass InstSimplifyPass on f
+; CHECK-DOT-CFG-QUIET-FUNC-FILTER-NEXT:

+; CHECK-DOT-CFG-QUIET-FUNC-FILTER-NEXT: + +; CHECK-DOT-CFG-QUIET-PRINT-MOD-SCOPE: passes.html +; CHECK-DOT-CFG-QUIET-PRINT-MOD-SCOPE-NEXT: 0. Pass InstSimplifyPass on g
+; CHECK-DOT-CFG-QUIET-PRINT-MOD-SCOPE-NEXT:

+; CHECK-DOT-CFG-QUIET-PRINT-MOD-SCOPE-NEXT: 1. Pass InstSimplifyPass on f
+; CHECK-DOT-CFG-QUIET-PRINT-MOD-SCOPE-NEXT:

+; CHECK-DOT-CFG-QUIET-PRINT-MOD-SCOPE-NEXT: + +; CHECK-DOT-CFG-QUIET-FILTER-MULT-FUNC: passes.html +; CHECK-DOT-CFG-QUIET-FILTER-MULT-FUNC-NEXT: 0. Pass InstSimplifyPass on g
+; CHECK-DOT-CFG-QUIET-FILTER-MULT-FUNC-NEXT:

+; CHECK-DOT-CFG-QUIET-FILTER-MULT-FUNC-NEXT: 1. Pass InstSimplifyPass on f
+; CHECK-DOT-CFG-QUIET-FILTER-MULT-FUNC-NEXT:

+; CHECK-DOT-CFG-QUIET-FILTER-MULT-FUNC-NEXT: + +; CHECK-DOT-CFG-QUIET-FILTER-PASSES-NONE: passes.html +; CHECK-DOT-CFG-QUIET-FILTER-PASSES-NONE-NEXT: + +; CHECK-DOT-CFG-QUIET-FILTER-MULT-PASSES: passes.html +; CHECK-DOT-CFG-QUIET-FILTER-MULT-PASSES-NEXT: 0. Pass InstSimplifyPass on g
+; CHECK-DOT-CFG-QUIET-FILTER-MULT-PASSES-NEXT:

+; CHECK-DOT-CFG-QUIET-FILTER-MULT-PASSES-NEXT: 1. Pass InstSimplifyPass on f
+; CHECK-DOT-CFG-QUIET-FILTER-MULT-PASSES-NEXT:

+; CHECK-DOT-CFG-QUIET-FILTER-MULT-PASSES-NEXT: + +; CHECK-DOT-CFG-QUIET-FILTER-FUNC-PASSES: passes.html +; CHECK-DOT-CFG-QUIET-FILTER-FUNC-PASSES-NEXT: 0. Pass InstSimplifyPass on f
+; CHECK-DOT-CFG-QUIET-FILTER-FUNC-PASSES-NEXT:

+; CHECK-DOT-CFG-QUIET-FILTER-FUNC-PASSES-NEXT: + +; CHECK-DOT-CFG-QUIET-MULT-PASSES-FILTER-FUNC: passes.html +; CHECK-DOT-CFG-QUIET-MULT-PASSES-FILTER-FUNC-NEXT: 0. Pass InstSimplifyPass on f
+; CHECK-DOT-CFG-QUIET-MULT-PASSES-FILTER-FUNC-NEXT:

+; CHECK-DOT-CFG-QUIET-MULT-PASSES-FILTER-FUNC-NEXT: diff --git a/llvm/test/Other/loopnest-pass-ordering.ll b/llvm/test/Other/loopnest-pass-ordering.ll new file mode 100644 index 0000000000000..4f176a8094c61 --- /dev/null +++ b/llvm/test/Other/loopnest-pass-ordering.ll @@ -0,0 +1,34 @@ +; RUN: opt -disable-output -debug-pass-manager \ +; RUN: -passes='no-op-loopnest' %s 2>&1 \ +; RUN: | FileCheck %s + +; @f() +; / \ +; loop.0 loop.1 +; / \ \ +; loop.0.0 loop.0.1 loop.1.0 +; +; CHECK: Running pass: NoOpLoopNestPass on Loop at depth 1 containing: %loop.0
,%loop.0.0,%loop.0.1,%loop.0.1.preheader,%loop.0.loopexit,%loop.0.0.preheader +; CHECK: Running pass: NoOpLoopNestPass on Loop at depth 1 containing: %loop.1
,%loop.1.bb1,%loop.1.bb2,%loop.1.0,%loop.1.0.preheader,%loop.1.loopexit,%loop.1.backedge +; CHECK-NOT: Running pass: NoOpLoopNestPass on Loop at depth 2 + +define void @f() { +entry: + br label %loop.0 +loop.0: + br i1 undef, label %loop.0.0, label %loop.1 +loop.0.0: + br i1 undef, label %loop.0.0, label %loop.0.1 +loop.0.1: + br i1 undef, label %loop.0.1, label %loop.0 +loop.1: + br i1 undef, label %loop.1, label %loop.1.bb1 +loop.1.bb1: + br i1 undef, label %loop.1, label %loop.1.bb2 +loop.1.bb2: + br i1 undef, label %end, label %loop.1.0 +loop.1.0: + br i1 undef, label %loop.1.0, label %loop.1 +end: + ret void +} diff --git a/llvm/test/Other/new-pm-defaults.ll b/llvm/test/Other/new-pm-defaults.ll index 7cf7aecc76adc..b9f90dad82249 100644 --- a/llvm/test/Other/new-pm-defaults.ll +++ b/llvm/test/Other/new-pm-defaults.ll @@ -198,7 +198,7 @@ ; CHECK-O-NEXT: Running pass: SimplifyCFGPass ; CHECK-O-NEXT: Running pass: InstCombinePass ; CHECK-EP-PEEPHOLE-NEXT: Running pass: NoOpFunctionPass -; CHECK-O-NEXT: Running pass: +; CHECK-O-NEXT: Running pass: CoroSplitPass ; CHECK-O-NEXT: Invalidating analysis: InlineAdvisorAnalysis ; CHECK-O-NEXT: Running pass: GlobalOptPass ; CHECK-O-NEXT: Running pass: GlobalDCEPass @@ -216,6 +216,7 @@ ; CHECK-O-NEXT: Running pass: LoopSimplifyPass ; CHECK-O-NEXT: Running pass: LCSSAPass ; CHECK-O-NEXT: Running pass: LoopRotatePass +; CHECK-O-NEXT: Running pass: LoopDeletionPass ; CHECK-O-NEXT: Running pass: LoopDistributePass ; CHECK-O-NEXT: Running pass: InjectTLIMappings ; CHECK-O-NEXT: Running pass: LoopVectorizePass diff --git a/llvm/test/Other/new-pm-eager-invalidate.ll b/llvm/test/Other/new-pm-eager-invalidate.ll index 188ac9bcaf039..cabc9772da587 100644 --- a/llvm/test/Other/new-pm-eager-invalidate.ll +++ b/llvm/test/Other/new-pm-eager-invalidate.ll @@ -1,8 +1,27 @@ -; RUN: opt -disable-verify -debug-pass-manager -passes='function(require)' -disable-output -eagerly-invalidate-analyses %s 2>&1 | FileCheck %s -; RUN: opt -disable-verify -debug-pass-manager -passes='cgscc(function(require))' -disable-output -eagerly-invalidate-analyses %s 2>&1 | FileCheck %s +; RUN: opt -disable-verify -debug-pass-manager -passes='function(require)' -disable-output %s 2>&1 | FileCheck %s --check-prefix=NORMAL +; RUN: opt -disable-verify -debug-pass-manager -passes='cgscc(function(require))' -disable-output %s 2>&1 | FileCheck %s --check-prefix=NORMAL +; RUN: opt -disable-verify -debug-pass-manager -passes='function(require)' -disable-output %s 2>&1 | FileCheck %s --check-prefix=EAGER +; RUN: opt -disable-verify -debug-pass-manager -passes='cgscc(function(require))' -disable-output %s 2>&1 | FileCheck %s --check-prefix=EAGER -; CHECK: Invalidating analysis: NoOpFunctionAnalysis +; RUN: opt -disable-verify -debug-pass-manager -passes='default' -disable-output %s 2>&1 | FileCheck %s --check-prefix=PIPELINE +; RUN: opt -disable-verify -debug-pass-manager -passes='default' -eagerly-invalidate-analyses -disable-output %s 2>&1 | FileCheck %s --check-prefix=PIPELINE-EAGER -define void @foo() { - unreachable +; NORMAL-NOT: Invalidating analysis: NoOpFunctionAnalysis +; EAGER: Invalidating analysis: NoOpFunctionAnalysis +; PIPELINE-NOT: Invalidating analysis: DominatorTreeAnalysis +; PIPELINE-EAGER: Invalidating analysis: DominatorTreeAnalysis + +declare void @bar() local_unnamed_addr + +define void @foo(i32 %n) local_unnamed_addr { +entry: + br label %loop +loop: + %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ] + %iv.next = add i32 %iv, 1 + tail call void @bar() + %cmp = icmp eq i32 %iv, %n + br i1 %cmp, label %exit, label %loop +exit: + ret void } diff --git a/llvm/test/Other/new-pm-print-pipeline.ll b/llvm/test/Other/new-pm-print-pipeline.ll index 11fa91684c4dc..da06e93080512 100644 --- a/llvm/test/Other/new-pm-print-pipeline.ll +++ b/llvm/test/Other/new-pm-print-pipeline.ll @@ -66,3 +66,6 @@ ; RUN: opt -disable-output -disable-verify -print-pipeline-passes -passes='scc-oz-module-inliner' < %s | FileCheck %s --match-full-lines --check-prefixes=CHECK-21 ; CHECK-21: require,function(invalidate),require,cgscc(devirt<4>(inline,inline,{{.*}},instcombine{{.*}})) + +; RUN: opt -disable-output -disable-verify -print-pipeline-passes -passes='cgscc(function(no-op-function)),function(no-op-function)' < %s | FileCheck %s --match-full-lines --check-prefixes=CHECK-22 +; CHECK-22: cgscc(function(no-op-function)),function(no-op-function) diff --git a/llvm/test/Other/new-pm-thinlto-defaults.ll b/llvm/test/Other/new-pm-thinlto-defaults.ll index 1f52fe47ae73c..7836de5c6ccef 100644 --- a/llvm/test/Other/new-pm-thinlto-defaults.ll +++ b/llvm/test/Other/new-pm-thinlto-defaults.ll @@ -196,6 +196,7 @@ ; CHECK-POSTLINK-O-NEXT: Running pass: LoopSimplifyPass ; CHECK-POSTLINK-O-NEXT: Running pass: LCSSAPass ; CHECK-POSTLINK-O-NEXT: Running pass: LoopRotatePass +; CHECK-POSTLINK-O-NEXT: Running pass: LoopDeletionPass ; CHECK-POSTLINK-O-NEXT: Running pass: LoopDistributePass ; CHECK-POSTLINK-O-NEXT: Running pass: InjectTLIMappings ; CHECK-POSTLINK-O-NEXT: Running pass: LoopVectorizePass diff --git a/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll b/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll index 3a80efba3c565..e66e8672358c1 100644 --- a/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll +++ b/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll @@ -167,6 +167,7 @@ ; CHECK-O-NEXT: Running pass: LoopSimplifyPass on foo ; CHECK-O-NEXT: Running pass: LCSSAPass on foo ; CHECK-O-NEXT: Running pass: LoopRotatePass +; CHECK-O-NEXT: Running pass: LoopDeletionPass ; CHECK-O-NEXT: Running pass: LoopDistributePass ; CHECK-O-NEXT: Running pass: InjectTLIMappings ; CHECK-O-NEXT: Running pass: LoopVectorizePass diff --git a/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll b/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll index 2e822b21f8a11..410841124c8e7 100644 --- a/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll +++ b/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll @@ -179,6 +179,7 @@ ; CHECK-O-NEXT: Running pass: LoopSimplifyPass ; CHECK-O-NEXT: Running pass: LCSSAPass ; CHECK-O-NEXT: Running pass: LoopRotatePass +; CHECK-O-NEXT: Running pass: LoopDeletionPass ; CHECK-O-NEXT: Running pass: LoopDistributePass ; CHECK-O-NEXT: Running pass: InjectTLIMappings ; CHECK-O-NEXT: Running pass: LoopVectorizePass diff --git a/llvm/test/Other/print-passes.ll b/llvm/test/Other/print-passes.ll index 09db6e9678387..955a0cd225468 100644 --- a/llvm/test/Other/print-passes.ll +++ b/llvm/test/Other/print-passes.ll @@ -18,6 +18,8 @@ ; CHECK: no-op-function ; CHECK: Function alias analyses: ; CHECK: basic-aa +; CHECK: LoopNest passes: +; CHECK: no-op-loopnest ; CHECK: Loop passes: ; CHECK: no-op-loop ; CHECK: Loop passes with params: diff --git a/llvm/test/TableGen/2010-03-24-PrematureDefaults.td b/llvm/test/TableGen/2010-03-24-PrematureDefaults.td index 24f6c93b3e17c..ace979ce8f5b7 100644 --- a/llvm/test/TableGen/2010-03-24-PrematureDefaults.td +++ b/llvm/test/TableGen/2010-03-24-PrematureDefaults.td @@ -1,4 +1,4 @@ -// RUN: llvm-tblgen %s | FileCheck %s +// RUN: llvm-tblgen --no-warn-on-unused-template-args %s | FileCheck %s // XFAIL: vg_leak class A x = 1> { diff --git a/llvm/test/TableGen/TemplateArgRename.td b/llvm/test/TableGen/TemplateArgRename.td index 654b86dc03e2f..c5c24cefbd878 100644 --- a/llvm/test/TableGen/TemplateArgRename.td +++ b/llvm/test/TableGen/TemplateArgRename.td @@ -1,4 +1,4 @@ -// RUN: llvm-tblgen %s +// RUN: llvm-tblgen --no-warn-on-unused-template-args %s // XFAIL: vg_leak // Make sure there is no collision between XX and XX. diff --git a/llvm/test/TableGen/cond-subclass.td b/llvm/test/TableGen/cond-subclass.td index 9f6f6e2cb8cc6..5f31bf15afb13 100644 --- a/llvm/test/TableGen/cond-subclass.td +++ b/llvm/test/TableGen/cond-subclass.td @@ -1,6 +1,6 @@ // Check that !cond with operands of different subtypes can // initialize a supertype variable. -// RUN: llvm-tblgen %s | FileCheck %s +// RUN: llvm-tblgen --no-warn-on-unused-template-args %s | FileCheck %s // XFAIL: vg_leak class E {} diff --git a/llvm/test/TableGen/defmclass.td b/llvm/test/TableGen/defmclass.td index 80f03b319426e..2a621847a338c 100644 --- a/llvm/test/TableGen/defmclass.td +++ b/llvm/test/TableGen/defmclass.td @@ -1,4 +1,4 @@ -// RUN: llvm-tblgen %s | FileCheck %s +// RUN: llvm-tblgen --no-warn-on-unused-template-args %s | FileCheck %s // XFAIL: vg_leak class XD { bits<4> Prefix = 11; } diff --git a/llvm/test/TableGen/if.td b/llvm/test/TableGen/if.td index b2ba89c8dd087..cd8a8e728df66 100644 --- a/llvm/test/TableGen/if.td +++ b/llvm/test/TableGen/if.td @@ -1,4 +1,4 @@ -// RUN: llvm-tblgen %s | FileCheck %s +// RUN: llvm-tblgen --no-warn-on-unused-template-args %s | FileCheck %s // XFAIL: vg_leak // Support for an `!if' operator as part of a `let' statement. diff --git a/llvm/test/TableGen/isa.td b/llvm/test/TableGen/isa.td index cfaacb03b71aa..e4095fb96752d 100644 --- a/llvm/test/TableGen/isa.td +++ b/llvm/test/TableGen/isa.td @@ -1,4 +1,4 @@ -// RUN: llvm-tblgen %s | FileCheck %s +// RUN: llvm-tblgen --no-warn-on-unused-template-args %s | FileCheck %s // XFAIL: vg_leak // CHECK: --- Defs --- diff --git a/llvm/test/TableGen/pr8330.td b/llvm/test/TableGen/pr8330.td index 7779b635e33cc..ceabbc50f64c1 100644 --- a/llvm/test/TableGen/pr8330.td +++ b/llvm/test/TableGen/pr8330.td @@ -1,4 +1,4 @@ -// RUN: llvm-tblgen %s | FileCheck %s +// RUN: llvm-tblgen --no-warn-on-unused-template-args %s | FileCheck %s // XFAIL: vg_leak class Or4 Val> { diff --git a/llvm/test/TableGen/warn-unused-template-arg.td b/llvm/test/TableGen/warn-unused-template-arg.td new file mode 100644 index 0000000000000..5f76e82fc0d9a --- /dev/null +++ b/llvm/test/TableGen/warn-unused-template-arg.td @@ -0,0 +1,25 @@ +// RUN: llvm-tblgen %s 2>&1 | FileCheck %s +// RUN: llvm-tblgen --no-warn-on-unused-template-args %s 2>&1 | FileCheck %s --check-prefix=CHECK-DISABLED + +class UnusedClassArg {} + +// CHECK: warning: unused template argument: UnusedClassArg:foo +// CHECK-NEXT: class UnusedClassArg {} +// CHECK-NEXT: ^ + +multiclass UnusedMultiClassArg { + def bar; +} + +defm : UnusedMultiClassArg<1>; + +// CHECK: warning: unused template argument: UnusedMultiClassArg::foo +// CHECK-NEXT: multiclass UnusedMultiClassArg { +// CHECK-NEXT: ^ + +class NoWarning { + int a = b; +} + +// CHECK-NOT: warning: unused template argument: NoWarning:b +// CHECK-DISABLED-NOT: warning diff --git a/llvm/test/ThinLTO/X86/empty-module.ll b/llvm/test/ThinLTO/X86/empty-module.ll index 3a63a65259da7..04bc0d5942e1a 100644 --- a/llvm/test/ThinLTO/X86/empty-module.ll +++ b/llvm/test/ThinLTO/X86/empty-module.ll @@ -10,9 +10,9 @@ target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" -@foo = ifunc i32 (i32), i64 ()* @foo_ifunc +@foo = ifunc i32 (i32), i32 (i32)* ()* @foo_ifunc -define internal i64 @foo_ifunc() { +define internal i32 (i32)* @foo_ifunc() { entry: - ret i64 0 + ret i32 (i32)* null } diff --git a/llvm/test/Transforms/Attributor/IPConstantProp/dangling-block-address.ll b/llvm/test/Transforms/Attributor/IPConstantProp/dangling-block-address.ll index 3ddf81dfc1ac7..c4ae94bdd9100 100644 --- a/llvm/test/Transforms/Attributor/IPConstantProp/dangling-block-address.ll +++ b/llvm/test/Transforms/Attributor/IPConstantProp/dangling-block-address.ll @@ -34,37 +34,23 @@ entry: } define internal void @bar(i32* nocapture %pc) nounwind readonly { -; IS__CGSCC_OPM: Function Attrs: nounwind readonly -; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@bar -; IS__CGSCC_OPM-SAME: (i32* nocapture [[PC:%.*]]) #[[ATTR1:[0-9]+]] { -; IS__CGSCC_OPM-NEXT: entry: -; IS__CGSCC_OPM-NEXT: br label [[INDIRECTGOTO:%.*]] -; IS__CGSCC_OPM: lab0: -; IS__CGSCC_OPM-NEXT: [[INDVAR_NEXT:%.*]] = add i32 [[INDVAR:%.*]], 1 -; IS__CGSCC_OPM-NEXT: br label [[INDIRECTGOTO]] -; IS__CGSCC_OPM: end: -; IS__CGSCC_OPM-NEXT: ret void -; IS__CGSCC_OPM: indirectgoto: -; IS__CGSCC_OPM-NEXT: [[INDVAR]] = phi i32 [ [[INDVAR_NEXT]], [[LAB0:%.*]] ], [ 0, [[ENTRY:%.*]] ] -; IS__CGSCC_OPM-NEXT: [[PC_ADDR_0:%.*]] = getelementptr i32, i32* [[PC]], i32 [[INDVAR]] -; IS__CGSCC_OPM-NEXT: [[TMP1_PN:%.*]] = load i32, i32* [[PC_ADDR_0]], align 4 -; IS__CGSCC_OPM-NEXT: [[INDIRECT_GOTO_DEST_IN:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* @bar.l, i32 0, i32 [[TMP1_PN]] -; IS__CGSCC_OPM-NEXT: [[INDIRECT_GOTO_DEST:%.*]] = load i8*, i8** [[INDIRECT_GOTO_DEST_IN]], align 8 -; IS__CGSCC_OPM-NEXT: indirectbr i8* [[INDIRECT_GOTO_DEST]], [label [[LAB0]], label %end] -; -; IS__CGSCC_NPM: Function Attrs: nofree norecurse nosync nounwind readnone -; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@bar -; IS__CGSCC_NPM-SAME: () #[[ATTR1:[0-9]+]] { -; IS__CGSCC_NPM-NEXT: entry: -; IS__CGSCC_NPM-NEXT: br label [[INDIRECTGOTO:%.*]] -; IS__CGSCC_NPM: lab0: -; IS__CGSCC_NPM-NEXT: [[INDVAR_NEXT:%.*]] = add i32 [[INDVAR:%.*]], 1 -; IS__CGSCC_NPM-NEXT: br label [[INDIRECTGOTO]] -; IS__CGSCC_NPM: end: -; IS__CGSCC_NPM-NEXT: ret void -; IS__CGSCC_NPM: indirectgoto: -; IS__CGSCC_NPM-NEXT: [[INDVAR]] = phi i32 [ [[INDVAR_NEXT]], [[LAB0:%.*]] ], [ 0, [[ENTRY:%.*]] ] -; IS__CGSCC_NPM-NEXT: indirectbr i8* undef, [label [[LAB0]], label %end] +; IS__CGSCC____: Function Attrs: nounwind readonly +; IS__CGSCC____-LABEL: define {{[^@]+}}@bar +; IS__CGSCC____-SAME: (i32* nocapture [[PC:%.*]]) #[[ATTR1:[0-9]+]] { +; IS__CGSCC____-NEXT: entry: +; IS__CGSCC____-NEXT: br label [[INDIRECTGOTO:%.*]] +; IS__CGSCC____: lab0: +; IS__CGSCC____-NEXT: [[INDVAR_NEXT:%.*]] = add i32 [[INDVAR:%.*]], 1 +; IS__CGSCC____-NEXT: br label [[INDIRECTGOTO]] +; IS__CGSCC____: end: +; IS__CGSCC____-NEXT: ret void +; IS__CGSCC____: indirectgoto: +; IS__CGSCC____-NEXT: [[INDVAR]] = phi i32 [ [[INDVAR_NEXT]], [[LAB0:%.*]] ], [ 0, [[ENTRY:%.*]] ] +; IS__CGSCC____-NEXT: [[PC_ADDR_0:%.*]] = getelementptr i32, i32* [[PC]], i32 [[INDVAR]] +; IS__CGSCC____-NEXT: [[TMP1_PN:%.*]] = load i32, i32* [[PC_ADDR_0]], align 4 +; IS__CGSCC____-NEXT: [[INDIRECT_GOTO_DEST_IN:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* @bar.l, i32 0, i32 [[TMP1_PN]] +; IS__CGSCC____-NEXT: [[INDIRECT_GOTO_DEST:%.*]] = load i8*, i8** [[INDIRECT_GOTO_DEST_IN]], align 8 +; IS__CGSCC____-NEXT: indirectbr i8* [[INDIRECT_GOTO_DEST]], [label [[LAB0]], label %end] ; entry: br label %indirectgoto @@ -104,11 +90,7 @@ entry: ;. ; IS__TUNIT____: attributes #[[ATTR0]] = { nofree nosync nounwind readnone willreturn } ;. -; IS__CGSCC_OPM: attributes #[[ATTR0]] = { nounwind readnone } -; IS__CGSCC_OPM: attributes #[[ATTR1]] = { nounwind readonly } -; IS__CGSCC_OPM: attributes #[[ATTR2]] = { nofree norecurse nosync nounwind readnone willreturn } -;. -; IS__CGSCC_NPM: attributes #[[ATTR0]] = { nounwind readnone } -; IS__CGSCC_NPM: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind readnone } -; IS__CGSCC_NPM: attributes #[[ATTR2]] = { nofree norecurse nosync nounwind readnone willreturn } +; IS__CGSCC____: attributes #[[ATTR0]] = { nounwind readnone } +; IS__CGSCC____: attributes #[[ATTR1]] = { nounwind readonly } +; IS__CGSCC____: attributes #[[ATTR2]] = { nofree norecurse nosync nounwind readnone willreturn } ;. diff --git a/llvm/test/Transforms/Attributor/liveness.ll b/llvm/test/Transforms/Attributor/liveness.ll index fcb6530c63339..30e2fd69cef5a 100644 --- a/llvm/test/Transforms/Attributor/liveness.ll +++ b/llvm/test/Transforms/Attributor/liveness.ll @@ -2432,9 +2432,9 @@ define internal void @dead_with_blockaddress_users(i32* nocapture %pc) nounwind ; IS__CGSCC_OPM-NEXT: [[INDIRECT_GOTO_DEST:%.*]] = load i8*, i8** [[INDIRECT_GOTO_DEST_IN]] ; IS__CGSCC_OPM-NEXT: indirectbr i8* [[INDIRECT_GOTO_DEST]], [label [[LAB0]], label %end] ; -; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind readnone +; IS__CGSCC____: Function Attrs: nounwind readonly ; IS__CGSCC____-LABEL: define {{[^@]+}}@dead_with_blockaddress_users -; IS__CGSCC____-SAME: () #[[ATTR14:[0-9]+]] { +; IS__CGSCC____-SAME: (i32* nocapture [[PC:%.*]]) #[[ATTR14:[0-9]+]] { ; IS__CGSCC____-NEXT: entry: ; IS__CGSCC____-NEXT: br label [[INDIRECTGOTO:%.*]] ; IS__CGSCC____: lab0: @@ -2444,7 +2444,11 @@ define internal void @dead_with_blockaddress_users(i32* nocapture %pc) nounwind ; IS__CGSCC____-NEXT: ret void ; IS__CGSCC____: indirectgoto: ; IS__CGSCC____-NEXT: [[INDVAR]] = phi i32 [ [[INDVAR_NEXT]], [[LAB0:%.*]] ], [ 0, [[ENTRY:%.*]] ] -; IS__CGSCC____-NEXT: indirectbr i8* undef, [label [[LAB0]], label %end] +; IS__CGSCC____-NEXT: [[PC_ADDR_0:%.*]] = getelementptr i32, i32* [[PC]], i32 [[INDVAR]] +; IS__CGSCC____-NEXT: [[TMP1_PN:%.*]] = load i32, i32* [[PC_ADDR_0]], align 4 +; IS__CGSCC____-NEXT: [[INDIRECT_GOTO_DEST_IN:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* @dead_with_blockaddress_users.l, i32 0, i32 [[TMP1_PN]] +; IS__CGSCC____-NEXT: [[INDIRECT_GOTO_DEST:%.*]] = load i8*, i8** [[INDIRECT_GOTO_DEST_IN]], align 8 +; IS__CGSCC____-NEXT: indirectbr i8* [[INDIRECT_GOTO_DEST]], [label [[LAB0]], label %end] ; entry: br label %indirectgoto @@ -2681,7 +2685,7 @@ declare void @llvm.lifetime.end.p0i8(i64 %0, i8* %1) ; IS__CGSCC____: attributes #[[ATTR11]] = { nofree norecurse noreturn nosync nounwind readnone } ; IS__CGSCC____: attributes #[[ATTR12]] = { nofree norecurse noreturn nosync nounwind readnone willreturn } ; IS__CGSCC____: attributes #[[ATTR13]] = { nofree nosync nounwind willreturn } -; IS__CGSCC____: attributes #[[ATTR14]] = { nofree norecurse nosync nounwind readnone } +; IS__CGSCC____: attributes #[[ATTR14]] = { nounwind readonly } ; IS__CGSCC____: attributes #[[ATTR15]] = { nofree nosync nounwind readnone willreturn } ; IS__CGSCC____: attributes #[[ATTR16:[0-9]+]] = { argmemonly nofree nosync nounwind willreturn } ; IS__CGSCC____: attributes #[[ATTR17]] = { nounwind willreturn } diff --git a/llvm/test/Transforms/Coroutines/coro-retcon-alloca.ll b/llvm/test/Transforms/Coroutines/coro-retcon-alloca.ll index bf2862fcac2bd..be8df510ecfa2 100644 --- a/llvm/test/Transforms/Coroutines/coro-retcon-alloca.ll +++ b/llvm/test/Transforms/Coroutines/coro-retcon-alloca.ll @@ -10,7 +10,7 @@ define {i8*, i8*, i32} @f(i8* %buffer, i32 %n) { ; CHECK-NEXT: [[N_VAL_SPILL_ADDR:%.*]] = getelementptr inbounds i8, i8* [[BUFFER:%.*]], i64 8 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[N_VAL_SPILL_ADDR]] to i32* ; CHECK-NEXT: store i32 [[N:%.*]], i32* [[TMP0]], align 4 -; CHECK-NEXT: [[TMP1:%.*]] = tail call i8* @allocate(i32 [[N]]) #[[ATTR0:[0-9]+]] +; CHECK-NEXT: [[TMP1:%.*]] = tail call i8* @allocate(i32 [[N]]) ; CHECK-NEXT: [[DOTSPILL_ADDR:%.*]] = bitcast i8* [[BUFFER]] to i8** ; CHECK-NEXT: store i8* [[TMP1]], i8** [[DOTSPILL_ADDR]], align 8 ; CHECK-NEXT: [[TMP2:%.*]] = insertvalue { i8*, i8*, i32 } { i8* bitcast ({ i8*, i8*, i32 } (i8*, i1)* @f.resume.0 to i8*), i8* undef, i32 undef }, i8* [[TMP1]], 1 diff --git a/llvm/test/Transforms/Coroutines/coro-retcon.ll b/llvm/test/Transforms/Coroutines/coro-retcon.ll index 5c9e33897139b..7445ba8c1117d 100644 --- a/llvm/test/Transforms/Coroutines/coro-retcon.ll +++ b/llvm/test/Transforms/Coroutines/coro-retcon.ll @@ -72,7 +72,7 @@ entry: define hidden { i8*, i8* } @g(i8* %buffer, i16* %ptr) { ; CHECK-LABEL: @g( ; CHECK-NEXT: coro.return: -; CHECK-NEXT: [[TMP0:%.*]] = tail call i8* @allocate(i32 8) #[[ATTR0:[0-9]+]] +; CHECK-NEXT: [[TMP0:%.*]] = tail call i8* @allocate(i32 8) ; CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[BUFFER:%.*]] to i8** ; CHECK-NEXT: store i8* [[TMP0]], i8** [[TMP1]], align 8 ; CHECK-NEXT: [[PTR_SPILL_ADDR:%.*]] = bitcast i8* [[TMP0]] to i16** diff --git a/llvm/test/Transforms/CorrelatedValuePropagation/basic.ll b/llvm/test/Transforms/CorrelatedValuePropagation/basic.ll index 5abbcbc90e010..9caaacfa00d21 100644 --- a/llvm/test/Transforms/CorrelatedValuePropagation/basic.ll +++ b/llvm/test/Transforms/CorrelatedValuePropagation/basic.ll @@ -382,7 +382,7 @@ define i32 @switch_range(i32 %cond) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[S:%.*]] = urem i32 [[COND:%.*]], 3 ; CHECK-NEXT: [[S1:%.*]] = add nuw nsw i32 [[S]], 1 -; CHECK-NEXT: switch i32 [[S1]], label [[UNREACHABLE:%.*]] [ +; CHECK-NEXT: switch i32 [[S1]], label [[ENTRY_UNREACHABLEDEFAULT:%.*]] [ ; CHECK-NEXT: i32 1, label [[EXIT1:%.*]] ; CHECK-NEXT: i32 2, label [[EXIT2:%.*]] ; CHECK-NEXT: i32 3, label [[EXIT1]] @@ -391,6 +391,8 @@ define i32 @switch_range(i32 %cond) { ; CHECK-NEXT: ret i32 1 ; CHECK: exit2: ; CHECK-NEXT: ret i32 2 +; CHECK: entry.unreachabledefault: +; CHECK-NEXT: unreachable ; CHECK: unreachable: ; CHECK-NEXT: ret i32 0 ; @@ -453,10 +455,9 @@ define i8 @switch_defaultdest_multipleuse(i8 %t0) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[O:%.*]] = or i8 [[T0:%.*]], 1 ; CHECK-NEXT: [[R:%.*]] = srem i8 1, [[O]] -; CHECK-NEXT: switch i8 [[R]], label [[EXIT:%.*]] [ -; CHECK-NEXT: i8 0, label [[EXIT]] -; CHECK-NEXT: i8 1, label [[EXIT]] -; CHECK-NEXT: ] +; CHECK-NEXT: br label [[EXIT:%.*]] +; CHECK: entry.unreachabledefault: +; CHECK-NEXT: unreachable ; CHECK: exit: ; CHECK-NEXT: ret i8 0 ; @@ -532,7 +533,7 @@ define i1 @smin(i32 %a, i32 %b) { ; CHECK-NEXT: [[CMP2:%.*]] = icmp ult i32 [[B:%.*]], 20 ; CHECK-NEXT: br i1 [[CMP2]], label [[B_GUARD:%.*]], label [[OUT]] ; CHECK: b_guard: -; CHECK-NEXT: [[SEL_CMP:%.*]] = icmp sle i32 [[A]], [[B]] +; CHECK-NEXT: [[SEL_CMP:%.*]] = icmp ule i32 [[A]], [[B]] ; CHECK-NEXT: [[MIN:%.*]] = select i1 [[SEL_CMP]], i32 [[A]], i32 [[B]] ; CHECK-NEXT: ret i1 false ; CHECK: out: @@ -564,7 +565,7 @@ define i1 @smax(i32 %a, i32 %b) { ; CHECK-NEXT: [[CMP2:%.*]] = icmp sgt i32 [[B:%.*]], 20 ; CHECK-NEXT: br i1 [[CMP2]], label [[B_GUARD:%.*]], label [[OUT]] ; CHECK: b_guard: -; CHECK-NEXT: [[SEL_CMP:%.*]] = icmp sge i32 [[A]], [[B]] +; CHECK-NEXT: [[SEL_CMP:%.*]] = icmp uge i32 [[A]], [[B]] ; CHECK-NEXT: [[MAX:%.*]] = select i1 [[SEL_CMP]], i32 [[A]], i32 [[B]] ; CHECK-NEXT: ret i1 false ; CHECK: out: @@ -737,7 +738,7 @@ define i1 @clamp_low3(i32 %a) { ; CHECK-NEXT: [[CMP:%.*]] = icmp sge i32 [[A:%.*]], 5 ; CHECK-NEXT: br i1 [[CMP]], label [[A_GUARD:%.*]], label [[OUT:%.*]] ; CHECK: a_guard: -; CHECK-NEXT: [[SEL_CMP:%.*]] = icmp sgt i32 [[A]], 5 +; CHECK-NEXT: [[SEL_CMP:%.*]] = icmp ugt i32 [[A]], 5 ; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[A]], -1 ; CHECK-NEXT: [[SEL:%.*]] = select i1 [[SEL_CMP]], i32 [[ADD]], i32 5 ; CHECK-NEXT: ret i1 false @@ -764,7 +765,7 @@ define i1 @clamp_low4(i32 %a) { ; CHECK-NEXT: [[CMP:%.*]] = icmp sge i32 [[A:%.*]], 5 ; CHECK-NEXT: br i1 [[CMP]], label [[A_GUARD:%.*]], label [[OUT:%.*]] ; CHECK: a_guard: -; CHECK-NEXT: [[SEL_CMP:%.*]] = icmp sle i32 [[A]], 5 +; CHECK-NEXT: [[SEL_CMP:%.*]] = icmp ule i32 [[A]], 5 ; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[A]], -1 ; CHECK-NEXT: [[SEL:%.*]] = select i1 [[SEL_CMP]], i32 5, i32 [[ADD]] ; CHECK-NEXT: ret i1 false @@ -933,10 +934,10 @@ define void @abs1(i32 %a, i1* %p) { ; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[A]], 0 ; CHECK-NEXT: [[ABS:%.*]] = select i1 [[CMP]], i32 [[SUB]], i32 [[A]] ; CHECK-NEXT: store i1 true, i1* [[P:%.*]], align 1 -; CHECK-NEXT: [[C2:%.*]] = icmp slt i32 [[ABS]], 19 +; CHECK-NEXT: [[C2:%.*]] = icmp ult i32 [[ABS]], 19 ; CHECK-NEXT: store i1 [[C2]], i1* [[P]], align 1 ; CHECK-NEXT: store i1 true, i1* [[P]], align 1 -; CHECK-NEXT: [[C4:%.*]] = icmp sge i32 [[ABS]], 1 +; CHECK-NEXT: [[C4:%.*]] = icmp uge i32 [[ABS]], 1 ; CHECK-NEXT: store i1 [[C4]], i1* [[P]], align 1 ; CHECK-NEXT: br label [[EXIT]] ; CHECK: exit: @@ -978,10 +979,10 @@ define void @abs2(i32 %a, i1* %p) { ; CHECK-NEXT: [[CMP:%.*]] = icmp sge i32 [[A]], 0 ; CHECK-NEXT: [[ABS:%.*]] = select i1 [[CMP]], i32 [[A]], i32 [[SUB]] ; CHECK-NEXT: store i1 true, i1* [[P:%.*]], align 1 -; CHECK-NEXT: [[C2:%.*]] = icmp slt i32 [[ABS]], 19 +; CHECK-NEXT: [[C2:%.*]] = icmp ult i32 [[ABS]], 19 ; CHECK-NEXT: store i1 [[C2]], i1* [[P]], align 1 ; CHECK-NEXT: store i1 true, i1* [[P]], align 1 -; CHECK-NEXT: [[C4:%.*]] = icmp sge i32 [[ABS]], 1 +; CHECK-NEXT: [[C4:%.*]] = icmp uge i32 [[ABS]], 1 ; CHECK-NEXT: store i1 [[C4]], i1* [[P]], align 1 ; CHECK-NEXT: br label [[EXIT]] ; CHECK: exit: diff --git a/llvm/test/Transforms/CorrelatedValuePropagation/deopt.ll b/llvm/test/Transforms/CorrelatedValuePropagation/deopt.ll index af4bf6f90c77a..096c5d7e3698c 100644 --- a/llvm/test/Transforms/CorrelatedValuePropagation/deopt.ll +++ b/llvm/test/Transforms/CorrelatedValuePropagation/deopt.ll @@ -97,7 +97,7 @@ define void @test3(i1 %c, i1 %c2) { ; CHECK-LABEL: @test3( ; CHECK-NEXT: [[SEL:%.*]] = select i1 [[C:%.*]], i64 0, i64 1 ; CHECK-NEXT: [[SEL2:%.*]] = select i1 [[C2:%.*]], i64 [[SEL]], i64 2 -; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i64 [[SEL2]], 1 +; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i64 [[SEL2]], 1 ; CHECK-NEXT: br i1 [[CMP]], label [[TAKEN:%.*]], label [[UNTAKEN:%.*]] ; CHECK: taken: ; CHECK-NEXT: call void @use() [ "deopt"(i64 2) ] @@ -122,7 +122,7 @@ define void @test4(i1 %c, i1 %c2) { ; CHECK-NEXT: [[SEL2:%.*]] = select i1 [[C2:%.*]], i64 0, i64 1 ; CHECK-NEXT: [[ADD1:%.*]] = add nuw nsw i64 0, [[SEL]] ; CHECK-NEXT: [[ADD2:%.*]] = add nuw nsw i64 [[ADD1]], [[SEL2]] -; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i64 [[ADD2]], 1 +; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i64 [[ADD2]], 1 ; CHECK-NEXT: br i1 [[CMP]], label [[TAKEN:%.*]], label [[UNTAKEN:%.*]] ; CHECK: taken: ; CHECK-NEXT: call void @use() [ "deopt"(i64 2) ] diff --git a/llvm/test/Transforms/CorrelatedValuePropagation/minmaxabs.ll b/llvm/test/Transforms/CorrelatedValuePropagation/minmaxabs.ll index bc0a0150de76c..5bb34185cde4d 100644 --- a/llvm/test/Transforms/CorrelatedValuePropagation/minmaxabs.ll +++ b/llvm/test/Transforms/CorrelatedValuePropagation/minmaxabs.ll @@ -60,7 +60,7 @@ define void @test_smax(i32 %x) { ; CHECK-LABEL: @test_smax( ; CHECK-NEXT: [[M:%.*]] = call i32 @llvm.smax.i32(i32 [[X:%.*]], i32 10) ; CHECK-NEXT: call void @use(i1 true) -; CHECK-NEXT: [[C2:%.*]] = icmp sgt i32 [[M]], 10 +; CHECK-NEXT: [[C2:%.*]] = icmp ugt i32 [[M]], 10 ; CHECK-NEXT: call void @use(i1 [[C2]]) ; CHECK-NEXT: ret void ; @@ -110,7 +110,7 @@ define void @test_abs3(i32 %x) { ; CHECK-LABEL: @test_abs3( ; CHECK-NEXT: [[A:%.*]] = call i32 @llvm.abs.i32(i32 [[X:%.*]], i1 true) ; CHECK-NEXT: call void @use(i1 true) -; CHECK-NEXT: [[C2:%.*]] = icmp sgt i32 [[A]], 0 +; CHECK-NEXT: [[C2:%.*]] = icmp ugt i32 [[A]], 0 ; CHECK-NEXT: call void @use(i1 [[C2]]) ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/CorrelatedValuePropagation/overflow_predicate.ll b/llvm/test/Transforms/CorrelatedValuePropagation/overflow_predicate.ll index b4e10110f7490..f3943edbd8fc8 100644 --- a/llvm/test/Transforms/CorrelatedValuePropagation/overflow_predicate.ll +++ b/llvm/test/Transforms/CorrelatedValuePropagation/overflow_predicate.ll @@ -113,7 +113,7 @@ define i1 @sadd_ov_true(i8 %x, i8* %px, i1* %pc) { ; CHECK-NEXT: [[OV:%.*]] = extractvalue { i8, i1 } [[VAL_OV]], 1 ; CHECK-NEXT: br i1 [[OV]], label [[OVERFLOW:%.*]], label [[TRAP:%.*]] ; CHECK: overflow: -; CHECK-NEXT: [[C1:%.*]] = icmp sgt i8 [[X]], 28 +; CHECK-NEXT: [[C1:%.*]] = icmp ugt i8 [[X]], 28 ; CHECK-NEXT: store i1 [[C1]], i1* [[PC:%.*]], align 1 ; CHECK-NEXT: ret i1 true ; CHECK: trap: @@ -241,7 +241,7 @@ define i1 @ssub_ov_true(i8 %x, i8* %px, i1* %pc) { ; CHECK-NEXT: [[OV:%.*]] = extractvalue { i8, i1 } [[VAL_OV]], 1 ; CHECK-NEXT: br i1 [[OV]], label [[OVERFLOW:%.*]], label [[TRAP:%.*]] ; CHECK: overflow: -; CHECK-NEXT: [[C1:%.*]] = icmp slt i8 [[X]], -29 +; CHECK-NEXT: [[C1:%.*]] = icmp ult i8 [[X]], -29 ; CHECK-NEXT: store i1 [[C1]], i1* [[PC:%.*]], align 1 ; CHECK-NEXT: ret i1 true ; CHECK: trap: diff --git a/llvm/test/Transforms/CorrelatedValuePropagation/range.ll b/llvm/test/Transforms/CorrelatedValuePropagation/range.ll index 89bcf9fd15f51..570b5dcad02f2 100644 --- a/llvm/test/Transforms/CorrelatedValuePropagation/range.ll +++ b/llvm/test/Transforms/CorrelatedValuePropagation/range.ll @@ -64,7 +64,7 @@ define i32 @test3(i32 %c) nounwind { ; CHECK: if.then: ; CHECK-NEXT: ret i32 1 ; CHECK: if.end: -; CHECK-NEXT: [[CMP1:%.*]] = icmp slt i32 [[C]], 3 +; CHECK-NEXT: [[CMP1:%.*]] = icmp ult i32 [[C]], 3 ; CHECK-NEXT: br i1 [[CMP1]], label [[IF_THEN2:%.*]], label [[IF_END8:%.*]] ; CHECK: if.then2: ; CHECK-NEXT: br i1 true, label [[IF_THEN4:%.*]], label [[IF_END6:%.*]] diff --git a/llvm/test/Transforms/CorrelatedValuePropagation/sdiv.ll b/llvm/test/Transforms/CorrelatedValuePropagation/sdiv.ll index 9150e8170cc2a..8da9a203be562 100644 --- a/llvm/test/Transforms/CorrelatedValuePropagation/sdiv.ll +++ b/llvm/test/Transforms/CorrelatedValuePropagation/sdiv.ll @@ -128,10 +128,10 @@ define void @test5(i32 %n) { ; CHECK-NEXT: br i1 [[CMP]], label [[LOOP:%.*]], label [[EXIT:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[A:%.*]] = phi i32 [ [[N]], [[ENTRY:%.*]] ], [ [[DIV1:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[COND:%.*]] = icmp sgt i32 [[A]], 4 +; CHECK-NEXT: [[COND:%.*]] = icmp ugt i32 [[A]], 4 ; CHECK-NEXT: call void @llvm.assume(i1 [[COND]]) ; CHECK-NEXT: [[DIV1]] = udiv i32 [[A]], 6 -; CHECK-NEXT: [[LOOPCOND:%.*]] = icmp sgt i32 [[DIV1]], 8 +; CHECK-NEXT: [[LOOPCOND:%.*]] = icmp ugt i32 [[DIV1]], 8 ; CHECK-NEXT: br i1 [[LOOPCOND]], label [[LOOP]], label [[EXIT]] ; CHECK: exit: ; CHECK-NEXT: ret void diff --git a/llvm/test/Transforms/CorrelatedValuePropagation/srem.ll b/llvm/test/Transforms/CorrelatedValuePropagation/srem.ll index 192d9160ba40d..c3df87e5969fb 100644 --- a/llvm/test/Transforms/CorrelatedValuePropagation/srem.ll +++ b/llvm/test/Transforms/CorrelatedValuePropagation/srem.ll @@ -41,10 +41,10 @@ define void @test4(i32 %n) { ; CHECK-NEXT: br i1 [[CMP]], label [[LOOP:%.*]], label [[EXIT:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[A:%.*]] = phi i32 [ [[N]], [[ENTRY:%.*]] ], [ [[REM1:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[COND:%.*]] = icmp sgt i32 [[A]], 4 +; CHECK-NEXT: [[COND:%.*]] = icmp ugt i32 [[A]], 4 ; CHECK-NEXT: call void @llvm.assume(i1 [[COND]]) ; CHECK-NEXT: [[REM1]] = urem i32 [[A]], 17 -; CHECK-NEXT: [[LOOPCOND:%.*]] = icmp sgt i32 [[REM1]], 8 +; CHECK-NEXT: [[LOOPCOND:%.*]] = icmp ugt i32 [[REM1]], 8 ; CHECK-NEXT: br i1 [[LOOPCOND]], label [[LOOP]], label [[EXIT]] ; CHECK: exit: ; CHECK-NEXT: ret void diff --git a/llvm/test/Transforms/FunctionAttrs/noreturn.ll b/llvm/test/Transforms/FunctionAttrs/noreturn.ll index 098788f93af8f..eba56c9630adb 100644 --- a/llvm/test/Transforms/FunctionAttrs/noreturn.ll +++ b/llvm/test/Transforms/FunctionAttrs/noreturn.ll @@ -71,3 +71,20 @@ define void @callsite_noreturn() { call i32 @f() noreturn ret void } + +; CHECK: Function Attrs: {{.*}}noreturn +; CHECK-NEXT: @unreachable +define void @unreachable() { + unreachable +} + +; CHECK-NOT: Function Attrs: {{.*}}noreturn +; CHECK: @coro +define void @coro() "coroutine.presplit"="1" { + call token @llvm.coro.id.retcon.once(i32 0, i32 0, i8* null, i8* bitcast(void() *@coro to i8*), i8* null, i8* null) + call i1 @llvm.coro.end(i8* null, i1 false) + unreachable +} + +declare token @llvm.coro.id.retcon.once(i32 %size, i32 %align, i8* %buffer, i8* %prototype, i8* %alloc, i8* %free) +declare i1 @llvm.coro.end(i8*, i1) \ No newline at end of file diff --git a/llvm/test/Transforms/GVN/gvn-eliminate-duplicating-phis.ll b/llvm/test/Transforms/GVN/gvn-eliminate-duplicating-phis.ll index 3db03042f9685..0c03574733f17 100644 --- a/llvm/test/Transforms/GVN/gvn-eliminate-duplicating-phis.ll +++ b/llvm/test/Transforms/GVN/gvn-eliminate-duplicating-phis.ll @@ -1,11 +1,13 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -gvn -indvars -S %s | FileCheck %s +target triple = "aarch64--linux-gnu" + target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" declare void @escape(i32* %ptr) -declare void @foo(i64 %v) +declare void @foo(i64 %v) readonly define void @non_local_load(i32* %ptr) { ; CHECK-LABEL: @non_local_load( @@ -44,17 +46,14 @@ define void @non_local_load_with_iv_zext(i32* %ptr) { ; CHECK-NEXT: store i32 0, i32* [[PTR:%.*]], align 4 ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[LOOP_LOOP_CRIT_EDGE:%.*]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[VAL:%.*]] = phi i32 [ [[VAL_PRE:%.*]], [[LOOP_LOOP_CRIT_EDGE]] ], [ 0, [[ENTRY]] ] -; CHECK-NEXT: [[VAL_INC:%.*]] = add i32 [[VAL]], 1 +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[LOOP]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[VAL:%.*]] = phi i32 [ [[VAL_INC:%.*]], [[LOOP]] ], [ 0, [[ENTRY]] ] +; CHECK-NEXT: [[VAL_INC]] = add nuw nsw i32 [[VAL]], 1 ; CHECK-NEXT: store i32 [[VAL_INC]], i32* [[PTR]], align 4 ; CHECK-NEXT: call void @foo(i64 [[INDVARS_IV]]) ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; CHECK-NEXT: [[LOOP_COND:%.*]] = icmp eq i64 [[INDVARS_IV]], 1000 -; CHECK-NEXT: br i1 [[LOOP_COND]], label [[EXIT:%.*]], label [[LOOP_LOOP_CRIT_EDGE]] -; CHECK: loop.loop_crit_edge: -; CHECK-NEXT: [[VAL_PRE]] = load i32, i32* [[PTR]], align 4 -; CHECK-NEXT: br label [[LOOP]] +; CHECK-NEXT: br i1 [[LOOP_COND]], label [[EXIT:%.*]], label [[LOOP]] ; CHECK: exit: ; CHECK-NEXT: ret void ; @@ -85,21 +84,15 @@ define void @two_non_local_loads(i32* %ptr1) { ; CHECK-NEXT: store i32 0, i32* [[PTR2]], align 4 ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[LOOP_LOOP_CRIT_EDGE:%.*]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[VAL2:%.*]] = phi i32 [ [[VAL2_PRE:%.*]], [[LOOP_LOOP_CRIT_EDGE]] ], [ 0, [[ENTRY]] ] -; CHECK-NEXT: [[VAL1:%.*]] = phi i32 [ [[VAL1_PRE:%.*]], [[LOOP_LOOP_CRIT_EDGE]] ], [ 0, [[ENTRY]] ] -; CHECK-NEXT: [[VAL1_INC:%.*]] = add i32 [[VAL1]], 1 -; CHECK-NEXT: store i32 [[VAL1_INC]], i32* [[PTR1]], align 4 -; CHECK-NEXT: [[VAL2_INC:%.*]] = add i32 [[VAL2]], 1 +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[LOOP]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[VAL2:%.*]] = phi i32 [ [[VAL2_INC:%.*]], [[LOOP]] ], [ 0, [[ENTRY]] ] +; CHECK-NEXT: [[VAL2_INC]] = add nuw nsw i32 [[VAL2]], 1 +; CHECK-NEXT: store i32 [[VAL2_INC]], i32* [[PTR1]], align 4 ; CHECK-NEXT: store i32 [[VAL2_INC]], i32* [[PTR2]], align 4 ; CHECK-NEXT: call void @foo(i64 [[INDVARS_IV]]) ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; CHECK-NEXT: [[LOOP_COND:%.*]] = icmp eq i64 [[INDVARS_IV]], 1000 -; CHECK-NEXT: br i1 [[LOOP_COND]], label [[EXIT:%.*]], label [[LOOP_LOOP_CRIT_EDGE]] -; CHECK: loop.loop_crit_edge: -; CHECK-NEXT: [[VAL1_PRE]] = load i32, i32* [[PTR1]], align 4 -; CHECK-NEXT: [[VAL2_PRE]] = load i32, i32* [[PTR2]], align 4 -; CHECK-NEXT: br label [[LOOP]] +; CHECK-NEXT: br i1 [[LOOP_COND]], label [[EXIT:%.*]], label [[LOOP]] ; CHECK: exit: ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/GlobalDCE/global-ifunc.ll b/llvm/test/Transforms/GlobalDCE/global-ifunc.ll index 8022452c34856..e12cead897f91 100644 --- a/llvm/test/Transforms/GlobalDCE/global-ifunc.ll +++ b/llvm/test/Transforms/GlobalDCE/global-ifunc.ll @@ -2,12 +2,12 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" -@if = ifunc void (), void ()* @fn +@if = ifunc void (), void ()* ()* @fn -define internal void @fn() { +define internal void ()* @fn() { entry: - ret void + ret void ()* null } -; CHECK-DAG: @if = ifunc void (), void ()* @fn -; CHECK-DAG: define internal void @fn( +; CHECK-DAG: @if = ifunc void (), void ()* ()* @fn +; CHECK-DAG: define internal void ()* @fn( diff --git a/llvm/test/Transforms/IRCE/conjunctive-checks.ll b/llvm/test/Transforms/IRCE/conjunctive-checks.ll index 39ce32ed84052..c589d64ca40e6 100644 --- a/llvm/test/Transforms/IRCE/conjunctive-checks.ll +++ b/llvm/test/Transforms/IRCE/conjunctive-checks.ll @@ -1,25 +1,77 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -S -verify-loop-info -irce < %s | FileCheck %s ; RUN: opt -S -verify-loop-info -passes='require,irce' < %s | FileCheck %s define void @f_0(i32 *%arr, i32 *%a_len_ptr, i32 %n, i1* %cond_buf) { ; CHECK-LABEL: @f_0( - -; CHECK: loop.preheader: -; CHECK: [[len_sub:[^ ]+]] = add nsw i32 %len, -4 -; CHECK: [[exit_main_loop_at_hiclamp:[^ ]+]] = call i32 @llvm.smin.i32(i32 %n, i32 [[len_sub]]) -; CHECK: [[exit_main_loop_at_loclamp:[^ ]+]] = call i32 @llvm.smax.i32(i32 [[exit_main_loop_at_hiclamp]], i32 0) -; CHECK: [[enter_main_loop:[^ ]+]] = icmp slt i32 0, [[exit_main_loop_at_loclamp]] -; CHECK: br i1 [[enter_main_loop]], label %[[loop_preheader2:[^ ,]+]], label %main.pseudo.exit - -; CHECK: [[loop_preheader2]]: -; CHECK: br label %loop - - entry: +; CHECK-NEXT: entry: +; CHECK-NEXT: [[LEN:%.*]] = load i32, i32* [[A_LEN_PTR:%.*]], align 4, !range [[RNG0:![0-9]+]] +; CHECK-NEXT: [[FIRST_ITR_CHECK:%.*]] = icmp sgt i32 [[N:%.*]], 0 +; CHECK-NEXT: br i1 [[FIRST_ITR_CHECK]], label [[LOOP_PREHEADER:%.*]], label [[EXIT:%.*]] +; CHECK: loop.preheader: +; CHECK-NEXT: [[TMP0:%.*]] = add nsw i32 [[LEN]], -4 +; CHECK-NEXT: [[SMIN:%.*]] = call i32 @llvm.smin.i32(i32 [[N]], i32 [[TMP0]]) +; CHECK-NEXT: [[EXIT_MAINLOOP_AT:%.*]] = call i32 @llvm.smax.i32(i32 [[SMIN]], i32 0) +; CHECK-NEXT: [[TMP1:%.*]] = icmp slt i32 0, [[EXIT_MAINLOOP_AT]] +; CHECK-NEXT: br i1 [[TMP1]], label [[LOOP_PREHEADER1:%.*]], label [[MAIN_PSEUDO_EXIT:%.*]] +; CHECK: loop.preheader1: +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[IDX:%.*]] = phi i32 [ [[IDX_NEXT:%.*]], [[IN_BOUNDS:%.*]] ], [ 0, [[LOOP_PREHEADER1]] ] +; CHECK-NEXT: [[IDX_NEXT]] = add i32 [[IDX]], 1 +; CHECK-NEXT: [[IDX_FOR_ABC:%.*]] = add i32 [[IDX]], 4 +; CHECK-NEXT: [[ABC_ACTUAL:%.*]] = icmp slt i32 [[IDX_FOR_ABC]], [[LEN]] +; CHECK-NEXT: [[COND:%.*]] = load volatile i1, i1* [[COND_BUF:%.*]], align 1 +; CHECK-NEXT: [[ABC:%.*]] = and i1 [[COND]], true +; CHECK-NEXT: br i1 [[ABC]], label [[IN_BOUNDS]], label [[OUT_OF_BOUNDS_LOOPEXIT2:%.*]], !prof [[PROF1:![0-9]+]] +; CHECK: in.bounds: +; CHECK-NEXT: [[ADDR:%.*]] = getelementptr i32, i32* [[ARR:%.*]], i32 [[IDX_FOR_ABC]] +; CHECK-NEXT: store i32 0, i32* [[ADDR]], align 4 +; CHECK-NEXT: [[NEXT:%.*]] = icmp slt i32 [[IDX_NEXT]], [[N]] +; CHECK-NEXT: [[TMP2:%.*]] = icmp slt i32 [[IDX_NEXT]], [[EXIT_MAINLOOP_AT]] +; CHECK-NEXT: br i1 [[TMP2]], label [[LOOP]], label [[MAIN_EXIT_SELECTOR:%.*]] +; CHECK: main.exit.selector: +; CHECK-NEXT: [[IDX_NEXT_LCSSA:%.*]] = phi i32 [ [[IDX_NEXT]], [[IN_BOUNDS]] ] +; CHECK-NEXT: [[TMP3:%.*]] = icmp slt i32 [[IDX_NEXT_LCSSA]], [[N]] +; CHECK-NEXT: br i1 [[TMP3]], label [[MAIN_PSEUDO_EXIT]], label [[EXIT_LOOPEXIT:%.*]] +; CHECK: main.pseudo.exit: +; CHECK-NEXT: [[IDX_COPY:%.*]] = phi i32 [ 0, [[LOOP_PREHEADER]] ], [ [[IDX_NEXT_LCSSA]], [[MAIN_EXIT_SELECTOR]] ] +; CHECK-NEXT: [[INDVAR_END:%.*]] = phi i32 [ 0, [[LOOP_PREHEADER]] ], [ [[IDX_NEXT_LCSSA]], [[MAIN_EXIT_SELECTOR]] ] +; CHECK-NEXT: br label [[POSTLOOP:%.*]] +; CHECK: out.of.bounds.loopexit: +; CHECK-NEXT: br label [[OUT_OF_BOUNDS:%.*]] +; CHECK: out.of.bounds.loopexit2: +; CHECK-NEXT: br label [[OUT_OF_BOUNDS]] +; CHECK: out.of.bounds: +; CHECK-NEXT: ret void +; CHECK: exit.loopexit.loopexit: +; CHECK-NEXT: br label [[EXIT_LOOPEXIT]] +; CHECK: exit.loopexit: +; CHECK-NEXT: br label [[EXIT]] +; CHECK: exit: +; CHECK-NEXT: ret void +; CHECK: postloop: +; CHECK-NEXT: br label [[LOOP_POSTLOOP:%.*]] +; CHECK: loop.postloop: +; CHECK-NEXT: [[IDX_POSTLOOP:%.*]] = phi i32 [ [[IDX_NEXT_POSTLOOP:%.*]], [[IN_BOUNDS_POSTLOOP:%.*]] ], [ [[IDX_COPY]], [[POSTLOOP]] ] +; CHECK-NEXT: [[IDX_NEXT_POSTLOOP]] = add i32 [[IDX_POSTLOOP]], 1 +; CHECK-NEXT: [[IDX_FOR_ABC_POSTLOOP:%.*]] = add i32 [[IDX_POSTLOOP]], 4 +; CHECK-NEXT: [[ABC_ACTUAL_POSTLOOP:%.*]] = icmp slt i32 [[IDX_FOR_ABC_POSTLOOP]], [[LEN]] +; CHECK-NEXT: [[COND_POSTLOOP:%.*]] = load volatile i1, i1* [[COND_BUF]], align 1 +; CHECK-NEXT: [[ABC_POSTLOOP:%.*]] = and i1 [[COND_POSTLOOP]], [[ABC_ACTUAL_POSTLOOP]] +; CHECK-NEXT: br i1 [[ABC_POSTLOOP]], label [[IN_BOUNDS_POSTLOOP]], label [[OUT_OF_BOUNDS_LOOPEXIT:%.*]], !prof [[PROF1]] +; CHECK: in.bounds.postloop: +; CHECK-NEXT: [[ADDR_POSTLOOP:%.*]] = getelementptr i32, i32* [[ARR]], i32 [[IDX_FOR_ABC_POSTLOOP]] +; CHECK-NEXT: store i32 0, i32* [[ADDR_POSTLOOP]], align 4 +; CHECK-NEXT: [[NEXT_POSTLOOP:%.*]] = icmp slt i32 [[IDX_NEXT_POSTLOOP]], [[N]] +; CHECK-NEXT: br i1 [[NEXT_POSTLOOP]], label [[LOOP_POSTLOOP]], label [[EXIT_LOOPEXIT_LOOPEXIT:%.*]], !llvm.loop [[LOOP2:![0-9]+]], !irce.loop.clone !7 +; +entry: %len = load i32, i32* %a_len_ptr, !range !0 %first.itr.check = icmp sgt i32 %n, 0 br i1 %first.itr.check, label %loop, label %exit - loop: +loop: %idx = phi i32 [ 0, %entry ] , [ %idx.next, %in.bounds ] %idx.next = add i32 %idx, 1 %idx.for.abc = add i32 %idx, 4 @@ -28,43 +80,96 @@ define void @f_0(i32 *%arr, i32 *%a_len_ptr, i32 %n, i1* %cond_buf) { %abc = and i1 %cond, %abc.actual br i1 %abc, label %in.bounds, label %out.of.bounds, !prof !1 -; CHECK: loop: -; CHECK: %cond = load volatile i1, i1* %cond_buf -; CHECK: %abc = and i1 %cond, true -; CHECK: br i1 %abc, label %in.bounds, label %[[loop_exit:[^ ,]+]], !prof !1 - -; CHECK: [[loop_exit]]: -; CHECK: br label %out.of.bounds - - in.bounds: +in.bounds: %addr = getelementptr i32, i32* %arr, i32 %idx.for.abc store i32 0, i32* %addr %next = icmp slt i32 %idx.next, %n br i1 %next, label %loop, label %exit - out.of.bounds: +out.of.bounds: ret void - exit: +exit: ret void } define void @f_1( - i32* %arr_a, i32* %a_len_ptr, i32* %arr_b, i32* %b_len_ptr, i32 %n) { ; CHECK-LABEL: @f_1( - -; CHECK: loop.preheader: -; CHECK: [[smax_len:[^ ]+]] = call i32 @llvm.smin.i32(i32 %len.b, i32 %len.a) -; CHECK: [[upper_limit_loclamp:[^ ]+]] = call i32 @llvm.smin.i32(i32 [[smax_len]], i32 %n) -; CHECK: [[upper_limit:[^ ]+]] = call i32 @llvm.smax.i32(i32 [[upper_limit_loclamp]], i32 0) - - entry: +; CHECK-NEXT: entry: +; CHECK-NEXT: [[LEN_A:%.*]] = load i32, i32* [[A_LEN_PTR:%.*]], align 4, !range [[RNG0]] +; CHECK-NEXT: [[LEN_B:%.*]] = load i32, i32* [[B_LEN_PTR:%.*]], align 4, !range [[RNG0]] +; CHECK-NEXT: [[FIRST_ITR_CHECK:%.*]] = icmp sgt i32 [[N:%.*]], 0 +; CHECK-NEXT: br i1 [[FIRST_ITR_CHECK]], label [[LOOP_PREHEADER:%.*]], label [[EXIT:%.*]] +; CHECK: loop.preheader: +; CHECK-NEXT: [[SMIN:%.*]] = call i32 @llvm.smin.i32(i32 [[LEN_B]], i32 [[LEN_A]]) +; CHECK-NEXT: [[SMIN1:%.*]] = call i32 @llvm.smin.i32(i32 [[SMIN]], i32 [[N]]) +; CHECK-NEXT: [[EXIT_MAINLOOP_AT:%.*]] = call i32 @llvm.smax.i32(i32 [[SMIN1]], i32 0) +; CHECK-NEXT: [[TMP0:%.*]] = icmp slt i32 0, [[EXIT_MAINLOOP_AT]] +; CHECK-NEXT: br i1 [[TMP0]], label [[LOOP_PREHEADER2:%.*]], label [[MAIN_PSEUDO_EXIT:%.*]] +; CHECK: loop.preheader2: +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[IDX:%.*]] = phi i32 [ [[IDX_NEXT:%.*]], [[IN_BOUNDS:%.*]] ], [ 0, [[LOOP_PREHEADER2]] ] +; CHECK-NEXT: [[IDX_NEXT]] = add i32 [[IDX]], 1 +; CHECK-NEXT: [[ABC_A:%.*]] = icmp slt i32 [[IDX]], [[LEN_A]] +; CHECK-NEXT: [[ABC_B:%.*]] = icmp slt i32 [[IDX]], [[LEN_B]] +; CHECK-NEXT: [[ABC:%.*]] = and i1 true, true +; CHECK-NEXT: br i1 [[ABC]], label [[IN_BOUNDS]], label [[OUT_OF_BOUNDS_LOOPEXIT3:%.*]], !prof [[PROF1]] +; CHECK: in.bounds: +; CHECK-NEXT: [[ADDR_A:%.*]] = getelementptr i32, i32* [[ARR_A:%.*]], i32 [[IDX]] +; CHECK-NEXT: store i32 0, i32* [[ADDR_A]], align 4 +; CHECK-NEXT: [[ADDR_B:%.*]] = getelementptr i32, i32* [[ARR_B:%.*]], i32 [[IDX]] +; CHECK-NEXT: store i32 -1, i32* [[ADDR_B]], align 4 +; CHECK-NEXT: [[NEXT:%.*]] = icmp slt i32 [[IDX_NEXT]], [[N]] +; CHECK-NEXT: [[TMP1:%.*]] = icmp slt i32 [[IDX_NEXT]], [[EXIT_MAINLOOP_AT]] +; CHECK-NEXT: br i1 [[TMP1]], label [[LOOP]], label [[MAIN_EXIT_SELECTOR:%.*]] +; CHECK: main.exit.selector: +; CHECK-NEXT: [[IDX_NEXT_LCSSA:%.*]] = phi i32 [ [[IDX_NEXT]], [[IN_BOUNDS]] ] +; CHECK-NEXT: [[TMP2:%.*]] = icmp slt i32 [[IDX_NEXT_LCSSA]], [[N]] +; CHECK-NEXT: br i1 [[TMP2]], label [[MAIN_PSEUDO_EXIT]], label [[EXIT_LOOPEXIT:%.*]] +; CHECK: main.pseudo.exit: +; CHECK-NEXT: [[IDX_COPY:%.*]] = phi i32 [ 0, [[LOOP_PREHEADER]] ], [ [[IDX_NEXT_LCSSA]], [[MAIN_EXIT_SELECTOR]] ] +; CHECK-NEXT: [[INDVAR_END:%.*]] = phi i32 [ 0, [[LOOP_PREHEADER]] ], [ [[IDX_NEXT_LCSSA]], [[MAIN_EXIT_SELECTOR]] ] +; CHECK-NEXT: br label [[POSTLOOP:%.*]] +; CHECK: out.of.bounds.loopexit: +; CHECK-NEXT: br label [[OUT_OF_BOUNDS:%.*]] +; CHECK: out.of.bounds.loopexit3: +; CHECK-NEXT: br label [[OUT_OF_BOUNDS]] +; CHECK: out.of.bounds: +; CHECK-NEXT: ret void +; CHECK: exit.loopexit.loopexit: +; CHECK-NEXT: br label [[EXIT_LOOPEXIT]] +; CHECK: exit.loopexit: +; CHECK-NEXT: br label [[EXIT]] +; CHECK: exit: +; CHECK-NEXT: ret void +; CHECK: postloop: +; CHECK-NEXT: br label [[LOOP_POSTLOOP:%.*]] +; CHECK: loop.postloop: +; CHECK-NEXT: [[IDX_POSTLOOP:%.*]] = phi i32 [ [[IDX_NEXT_POSTLOOP:%.*]], [[IN_BOUNDS_POSTLOOP:%.*]] ], [ [[IDX_COPY]], [[POSTLOOP]] ] +; CHECK-NEXT: [[IDX_NEXT_POSTLOOP]] = add i32 [[IDX_POSTLOOP]], 1 +; CHECK-NEXT: [[ABC_A_POSTLOOP:%.*]] = icmp slt i32 [[IDX_POSTLOOP]], [[LEN_A]] +; CHECK-NEXT: [[ABC_B_POSTLOOP:%.*]] = icmp slt i32 [[IDX_POSTLOOP]], [[LEN_B]] +; CHECK-NEXT: [[ABC_POSTLOOP:%.*]] = and i1 [[ABC_A_POSTLOOP]], [[ABC_B_POSTLOOP]] +; CHECK-NEXT: br i1 [[ABC_POSTLOOP]], label [[IN_BOUNDS_POSTLOOP]], label [[OUT_OF_BOUNDS_LOOPEXIT:%.*]], !prof [[PROF1]] +; CHECK: in.bounds.postloop: +; CHECK-NEXT: [[ADDR_A_POSTLOOP:%.*]] = getelementptr i32, i32* [[ARR_A]], i32 [[IDX_POSTLOOP]] +; CHECK-NEXT: store i32 0, i32* [[ADDR_A_POSTLOOP]], align 4 +; CHECK-NEXT: [[ADDR_B_POSTLOOP:%.*]] = getelementptr i32, i32* [[ARR_B]], i32 [[IDX_POSTLOOP]] +; CHECK-NEXT: store i32 -1, i32* [[ADDR_B_POSTLOOP]], align 4 +; CHECK-NEXT: [[NEXT_POSTLOOP:%.*]] = icmp slt i32 [[IDX_NEXT_POSTLOOP]], [[N]] +; CHECK-NEXT: br i1 [[NEXT_POSTLOOP]], label [[LOOP_POSTLOOP]], label [[EXIT_LOOPEXIT_LOOPEXIT:%.*]], !llvm.loop [[LOOP8:![0-9]+]], !irce.loop.clone !7 +; + i32* %arr_a, i32* %a_len_ptr, i32* %arr_b, i32* %b_len_ptr, i32 %n) { + + +entry: %len.a = load i32, i32* %a_len_ptr, !range !0 %len.b = load i32, i32* %b_len_ptr, !range !0 %first.itr.check = icmp sgt i32 %n, 0 br i1 %first.itr.check, label %loop, label %exit - loop: +loop: %idx = phi i32 [ 0, %entry ] , [ %idx.next, %in.bounds ] %idx.next = add i32 %idx, 1 %abc.a = icmp slt i32 %idx, %len.a @@ -72,15 +177,7 @@ define void @f_1( %abc = and i1 %abc.a, %abc.b br i1 %abc, label %in.bounds, label %out.of.bounds, !prof !1 -; CHECK: loop: -; CHECK: %abc = and i1 true, true -; CHECK: br i1 %abc, label %in.bounds, label %[[oob_loopexit:[^ ,]+]], !prof !1 - -; CHECK: [[oob_loopexit]]: -; CHECK-NEXT: br label %out.of.bounds - - - in.bounds: +in.bounds: %addr.a = getelementptr i32, i32* %arr_a, i32 %idx store i32 0, i32* %addr.a %addr.b = getelementptr i32, i32* %arr_b, i32 %idx @@ -88,10 +185,10 @@ define void @f_1( %next = icmp slt i32 %idx.next, %n br i1 %next, label %loop, label %exit - out.of.bounds: +out.of.bounds: ret void - exit: +exit: ret void } diff --git a/llvm/test/Transforms/IRCE/multiple-access-no-preloop.ll b/llvm/test/Transforms/IRCE/multiple-access-no-preloop.ll index 778b755521846..ddb48c1ab140e 100644 --- a/llvm/test/Transforms/IRCE/multiple-access-no-preloop.ll +++ b/llvm/test/Transforms/IRCE/multiple-access-no-preloop.ll @@ -1,60 +1,108 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -verify-loop-info -irce -S < %s | FileCheck %s ; RUN: opt -verify-loop-info -passes='require,irce' -S < %s | FileCheck %s define void @multiple_access_no_preloop( - i32* %arr_a, i32* %a_len_ptr, i32* %arr_b, i32* %b_len_ptr, i32 %n) { +; CHECK-LABEL: @multiple_access_no_preloop( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[LEN_A:%.*]] = load i32, i32* [[A_LEN_PTR:%.*]], align 4, !range [[RNG0:![0-9]+]] +; CHECK-NEXT: [[LEN_B:%.*]] = load i32, i32* [[B_LEN_PTR:%.*]], align 4, !range [[RNG0]] +; CHECK-NEXT: [[FIRST_ITR_CHECK:%.*]] = icmp sgt i32 [[N:%.*]], 0 +; CHECK-NEXT: br i1 [[FIRST_ITR_CHECK]], label [[LOOP_PREHEADER:%.*]], label [[EXIT:%.*]] +; CHECK: loop.preheader: +; CHECK-NEXT: [[SMIN:%.*]] = call i32 @llvm.smin.i32(i32 [[LEN_B]], i32 [[LEN_A]]) +; CHECK-NEXT: [[SMIN1:%.*]] = call i32 @llvm.smin.i32(i32 [[SMIN]], i32 [[N]]) +; CHECK-NEXT: [[EXIT_MAINLOOP_AT:%.*]] = call i32 @llvm.smax.i32(i32 [[SMIN1]], i32 0) +; CHECK-NEXT: [[TMP0:%.*]] = icmp slt i32 0, [[EXIT_MAINLOOP_AT]] +; CHECK-NEXT: br i1 [[TMP0]], label [[LOOP_PREHEADER2:%.*]], label [[MAIN_PSEUDO_EXIT:%.*]] +; CHECK: loop.preheader2: +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[IDX:%.*]] = phi i32 [ [[IDX_NEXT:%.*]], [[IN_BOUNDS_B:%.*]] ], [ 0, [[LOOP_PREHEADER2]] ] +; CHECK-NEXT: [[IDX_NEXT]] = add i32 [[IDX]], 1 +; CHECK-NEXT: [[ABC_A:%.*]] = icmp slt i32 [[IDX]], [[LEN_A]] +; CHECK-NEXT: br i1 true, label [[IN_BOUNDS_A:%.*]], label [[OUT_OF_BOUNDS_LOOPEXIT3:%.*]], !prof [[PROF1:![0-9]+]] +; CHECK: in.bounds.a: +; CHECK-NEXT: [[ADDR_A:%.*]] = getelementptr i32, i32* [[ARR_A:%.*]], i32 [[IDX]] +; CHECK-NEXT: store i32 0, i32* [[ADDR_A]], align 4 +; CHECK-NEXT: [[ABC_B:%.*]] = icmp slt i32 [[IDX]], [[LEN_B]] +; CHECK-NEXT: br i1 true, label [[IN_BOUNDS_B]], label [[OUT_OF_BOUNDS_LOOPEXIT3]], !prof [[PROF1]] +; CHECK: in.bounds.b: +; CHECK-NEXT: [[ADDR_B:%.*]] = getelementptr i32, i32* [[ARR_B:%.*]], i32 [[IDX]] +; CHECK-NEXT: store i32 -1, i32* [[ADDR_B]], align 4 +; CHECK-NEXT: [[NEXT:%.*]] = icmp slt i32 [[IDX_NEXT]], [[N]] +; CHECK-NEXT: [[TMP1:%.*]] = icmp slt i32 [[IDX_NEXT]], [[EXIT_MAINLOOP_AT]] +; CHECK-NEXT: br i1 [[TMP1]], label [[LOOP]], label [[MAIN_EXIT_SELECTOR:%.*]] +; CHECK: main.exit.selector: +; CHECK-NEXT: [[IDX_NEXT_LCSSA:%.*]] = phi i32 [ [[IDX_NEXT]], [[IN_BOUNDS_B]] ] +; CHECK-NEXT: [[TMP2:%.*]] = icmp slt i32 [[IDX_NEXT_LCSSA]], [[N]] +; CHECK-NEXT: br i1 [[TMP2]], label [[MAIN_PSEUDO_EXIT]], label [[EXIT_LOOPEXIT:%.*]] +; CHECK: main.pseudo.exit: +; CHECK-NEXT: [[IDX_COPY:%.*]] = phi i32 [ 0, [[LOOP_PREHEADER]] ], [ [[IDX_NEXT_LCSSA]], [[MAIN_EXIT_SELECTOR]] ] +; CHECK-NEXT: [[INDVAR_END:%.*]] = phi i32 [ 0, [[LOOP_PREHEADER]] ], [ [[IDX_NEXT_LCSSA]], [[MAIN_EXIT_SELECTOR]] ] +; CHECK-NEXT: br label [[POSTLOOP:%.*]] +; CHECK: out.of.bounds.loopexit: +; CHECK-NEXT: br label [[OUT_OF_BOUNDS:%.*]] +; CHECK: out.of.bounds.loopexit3: +; CHECK-NEXT: br label [[OUT_OF_BOUNDS]] +; CHECK: out.of.bounds: +; CHECK-NEXT: ret void +; CHECK: exit.loopexit.loopexit: +; CHECK-NEXT: br label [[EXIT_LOOPEXIT]] +; CHECK: exit.loopexit: +; CHECK-NEXT: br label [[EXIT]] +; CHECK: exit: +; CHECK-NEXT: ret void +; CHECK: postloop: +; CHECK-NEXT: br label [[LOOP_POSTLOOP:%.*]] +; CHECK: loop.postloop: +; CHECK-NEXT: [[IDX_POSTLOOP:%.*]] = phi i32 [ [[IDX_NEXT_POSTLOOP:%.*]], [[IN_BOUNDS_B_POSTLOOP:%.*]] ], [ [[IDX_COPY]], [[POSTLOOP]] ] +; CHECK-NEXT: [[IDX_NEXT_POSTLOOP]] = add i32 [[IDX_POSTLOOP]], 1 +; CHECK-NEXT: [[ABC_A_POSTLOOP:%.*]] = icmp slt i32 [[IDX_POSTLOOP]], [[LEN_A]] +; CHECK-NEXT: br i1 [[ABC_A_POSTLOOP]], label [[IN_BOUNDS_A_POSTLOOP:%.*]], label [[OUT_OF_BOUNDS_LOOPEXIT:%.*]], !prof [[PROF1]] +; CHECK: in.bounds.a.postloop: +; CHECK-NEXT: [[ADDR_A_POSTLOOP:%.*]] = getelementptr i32, i32* [[ARR_A]], i32 [[IDX_POSTLOOP]] +; CHECK-NEXT: store i32 0, i32* [[ADDR_A_POSTLOOP]], align 4 +; CHECK-NEXT: [[ABC_B_POSTLOOP:%.*]] = icmp slt i32 [[IDX_POSTLOOP]], [[LEN_B]] +; CHECK-NEXT: br i1 [[ABC_B_POSTLOOP]], label [[IN_BOUNDS_B_POSTLOOP]], label [[OUT_OF_BOUNDS_LOOPEXIT]], !prof [[PROF1]] +; CHECK: in.bounds.b.postloop: +; CHECK-NEXT: [[ADDR_B_POSTLOOP:%.*]] = getelementptr i32, i32* [[ARR_B]], i32 [[IDX_POSTLOOP]] +; CHECK-NEXT: store i32 -1, i32* [[ADDR_B_POSTLOOP]], align 4 +; CHECK-NEXT: [[NEXT_POSTLOOP:%.*]] = icmp slt i32 [[IDX_NEXT_POSTLOOP]], [[N]] +; CHECK-NEXT: br i1 [[NEXT_POSTLOOP]], label [[LOOP_POSTLOOP]], label [[EXIT_LOOPEXIT_LOOPEXIT:%.*]], !llvm.loop [[LOOP2:![0-9]+]], !irce.loop.clone !7 +; + i32* %arr_a, i32* %a_len_ptr, i32* %arr_b, i32* %b_len_ptr, i32 %n) { - entry: + entry: %len.a = load i32, i32* %a_len_ptr, !range !0 %len.b = load i32, i32* %b_len_ptr, !range !0 %first.itr.check = icmp sgt i32 %n, 0 br i1 %first.itr.check, label %loop, label %exit - loop: + loop: %idx = phi i32 [ 0, %entry ] , [ %idx.next, %in.bounds.b ] %idx.next = add i32 %idx, 1 %abc.a = icmp slt i32 %idx, %len.a br i1 %abc.a, label %in.bounds.a, label %out.of.bounds, !prof !1 - in.bounds.a: + in.bounds.a: %addr.a = getelementptr i32, i32* %arr_a, i32 %idx store i32 0, i32* %addr.a %abc.b = icmp slt i32 %idx, %len.b br i1 %abc.b, label %in.bounds.b, label %out.of.bounds, !prof !1 - in.bounds.b: + in.bounds.b: %addr.b = getelementptr i32, i32* %arr_b, i32 %idx store i32 -1, i32* %addr.b %next = icmp slt i32 %idx.next, %n br i1 %next, label %loop, label %exit - out.of.bounds: + out.of.bounds: ret void - exit: + exit: ret void } -; CHECK-LABEL: @multiple_access_no_preloop( - -; CHECK: loop.preheader: -; CHECK: [[smax_len:[^ ]+]] = call i32 @llvm.smin.i32(i32 %len.b, i32 %len.a) -; CHECK: [[upper_limit_loclamp:[^ ]+]] = call i32 @llvm.smin.i32(i32 [[smax_len]], i32 %n) -; CHECK: [[upper_limit:[^ ]+]] = call i32 @llvm.smax.i32(i32 [[upper_limit_loclamp]], i32 0) - -; CHECK: loop: -; CHECK: br i1 true, label %in.bounds.a, label %out.of.bounds - -; CHECK: in.bounds.a: -; CHECK: br i1 true, label %in.bounds.b, label %out.of.bounds - -; CHECK: in.bounds.b: -; CHECK: [[main_loop_cond:[^ ]+]] = icmp slt i32 %idx.next, [[upper_limit]] -; CHECK: br i1 [[main_loop_cond]], label %loop, label %main.exit.selector - -; CHECK: in.bounds.b.postloop: -; CHECK: %next.postloop = icmp slt i32 %idx.next.postloop, %n -; CHECK: br i1 %next.postloop, label %loop.postloop, label %exit.loopexit - !0 = !{i32 0, i32 2147483647} !1 = !{!"branch_weights", i32 128, i32 4} diff --git a/llvm/test/Transforms/IRCE/ranges_of_different_types.ll b/llvm/test/Transforms/IRCE/ranges_of_different_types.ll index 5a838aa318ba5..7b6eb7656d051 100644 --- a/llvm/test/Transforms/IRCE/ranges_of_different_types.ll +++ b/llvm/test/Transforms/IRCE/ranges_of_different_types.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -verify-loop-info -irce-print-changed-loops -irce -S < %s 2>&1 | FileCheck %s ; RUN: opt -verify-loop-info -irce-print-changed-loops -passes='require,irce' -S < %s 2>&1 | FileCheck %s @@ -18,19 +19,60 @@ ; %exit.mainloop.at = 101 define void @test_01(i32* %arr, i32* %a_len_ptr) #0 { - -; CHECK-LABEL: test_01( -; CHECK-NOT: preloop -; CHECK: entry: -; CHECK-NEXT: %len = load i32, i32* %a_len_ptr, align 4, !range !0 -; CHECK-NEXT: [[SUB1:%[^ ]+]] = add nsw i32 %len, -13 -; CHECK-NEXT: [[SMAX:%[^ ]+]] = call i32 @llvm.smin.i32(i32 [[SUB1]], i32 101) -; CHECK-NEXT: %exit.mainloop.at = call i32 @llvm.smax.i32(i32 [[SMAX]], i32 0) -; CHECK-NEXT: [[GOTO_LOOP:%[^ ]+]] = icmp slt i32 0, %exit.mainloop.at -; CHECK-NEXT: br i1 [[GOTO_LOOP]], label %loop.preheader, label %main.pseudo.exit -; CHECK: loop -; CHECK: br i1 true, label %in.bounds -; CHECK: postloop: +; CHECK-LABEL: @test_01( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[LEN:%.*]] = load i32, i32* [[A_LEN_PTR:%.*]], align 4, !range [[RNG0:![0-9]+]] +; CHECK-NEXT: [[TMP0:%.*]] = add nsw i32 [[LEN]], -13 +; CHECK-NEXT: [[SMIN:%.*]] = call i32 @llvm.smin.i32(i32 [[TMP0]], i32 101) +; CHECK-NEXT: [[EXIT_MAINLOOP_AT:%.*]] = call i32 @llvm.smax.i32(i32 [[SMIN]], i32 0) +; CHECK-NEXT: [[TMP1:%.*]] = icmp slt i32 0, [[EXIT_MAINLOOP_AT]] +; CHECK-NEXT: br i1 [[TMP1]], label [[LOOP_PREHEADER:%.*]], label [[MAIN_PSEUDO_EXIT:%.*]] +; CHECK: loop.preheader: +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[IDX:%.*]] = phi i32 [ [[IDX_NEXT:%.*]], [[IN_BOUNDS:%.*]] ], [ 0, [[LOOP_PREHEADER]] ] +; CHECK-NEXT: [[IDX_NEXT]] = add i32 [[IDX]], 1 +; CHECK-NEXT: [[IDX_OFFSET:%.*]] = add i32 [[IDX]], 13 +; CHECK-NEXT: [[ABC:%.*]] = icmp ult i32 [[IDX_OFFSET]], [[LEN]] +; CHECK-NEXT: br i1 true, label [[IN_BOUNDS]], label [[OUT_OF_BOUNDS_LOOPEXIT1:%.*]] +; CHECK: in.bounds: +; CHECK-NEXT: [[ADDR:%.*]] = getelementptr i32, i32* [[ARR:%.*]], i32 [[IDX]] +; CHECK-NEXT: store i32 0, i32* [[ADDR]], align 4 +; CHECK-NEXT: [[NEXT:%.*]] = icmp slt i32 [[IDX_NEXT]], 101 +; CHECK-NEXT: [[TMP2:%.*]] = icmp slt i32 [[IDX_NEXT]], [[EXIT_MAINLOOP_AT]] +; CHECK-NEXT: br i1 [[TMP2]], label [[LOOP]], label [[MAIN_EXIT_SELECTOR:%.*]] +; CHECK: main.exit.selector: +; CHECK-NEXT: [[IDX_NEXT_LCSSA:%.*]] = phi i32 [ [[IDX_NEXT]], [[IN_BOUNDS]] ] +; CHECK-NEXT: [[TMP3:%.*]] = icmp slt i32 [[IDX_NEXT_LCSSA]], 101 +; CHECK-NEXT: br i1 [[TMP3]], label [[MAIN_PSEUDO_EXIT]], label [[EXIT:%.*]] +; CHECK: main.pseudo.exit: +; CHECK-NEXT: [[IDX_COPY:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IDX_NEXT_LCSSA]], [[MAIN_EXIT_SELECTOR]] ] +; CHECK-NEXT: [[INDVAR_END:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[IDX_NEXT_LCSSA]], [[MAIN_EXIT_SELECTOR]] ] +; CHECK-NEXT: br label [[POSTLOOP:%.*]] +; CHECK: out.of.bounds.loopexit: +; CHECK-NEXT: br label [[OUT_OF_BOUNDS:%.*]] +; CHECK: out.of.bounds.loopexit1: +; CHECK-NEXT: br label [[OUT_OF_BOUNDS]] +; CHECK: out.of.bounds: +; CHECK-NEXT: ret void +; CHECK: exit.loopexit: +; CHECK-NEXT: br label [[EXIT]] +; CHECK: exit: +; CHECK-NEXT: ret void +; CHECK: postloop: +; CHECK-NEXT: br label [[LOOP_POSTLOOP:%.*]] +; CHECK: loop.postloop: +; CHECK-NEXT: [[IDX_POSTLOOP:%.*]] = phi i32 [ [[IDX_COPY]], [[POSTLOOP]] ], [ [[IDX_NEXT_POSTLOOP:%.*]], [[IN_BOUNDS_POSTLOOP:%.*]] ] +; CHECK-NEXT: [[IDX_NEXT_POSTLOOP]] = add i32 [[IDX_POSTLOOP]], 1 +; CHECK-NEXT: [[IDX_OFFSET_POSTLOOP:%.*]] = add i32 [[IDX_POSTLOOP]], 13 +; CHECK-NEXT: [[ABC_POSTLOOP:%.*]] = icmp ult i32 [[IDX_OFFSET_POSTLOOP]], [[LEN]] +; CHECK-NEXT: br i1 [[ABC_POSTLOOP]], label [[IN_BOUNDS_POSTLOOP]], label [[OUT_OF_BOUNDS_LOOPEXIT:%.*]] +; CHECK: in.bounds.postloop: +; CHECK-NEXT: [[ADDR_POSTLOOP:%.*]] = getelementptr i32, i32* [[ARR]], i32 [[IDX_POSTLOOP]] +; CHECK-NEXT: store i32 0, i32* [[ADDR_POSTLOOP]], align 4 +; CHECK-NEXT: [[NEXT_POSTLOOP:%.*]] = icmp slt i32 [[IDX_NEXT_POSTLOOP]], 101 +; CHECK-NEXT: br i1 [[NEXT_POSTLOOP]], label [[LOOP_POSTLOOP]], label [[EXIT_LOOPEXIT:%.*]], !llvm.loop [[LOOP1:![0-9]+]], !irce.loop.clone !6 +; entry: %len = load i32, i32* %a_len_ptr, !range !0 @@ -73,28 +115,87 @@ exit: ; %exit.mainloop.at = 101 define void @test_02(i32* %arr, i32* %a_len_ptr) #0 { - -; CHECK-LABEL: test_02( -; CHECK: entry: -; CHECK-NEXT: %len = load i32, i32* %a_len_ptr, align 4, !range !0 -; CHECK-NEXT: [[LEN_MINUS_SMAX:%[^ ]+]] = add nuw nsw i32 %len, -2147483647 -; CHECK-NEXT: [[SMAX1:%[^ ]+]] = call i32 @llvm.smax.i32(i32 [[LEN_MINUS_SMAX]], i32 -13) -; CHECK-NEXT: [[SUB1:%[^ ]+]] = sub i32 %len, [[SMAX1]] -; CHECK-NEXT: %exit.mainloop.at = call i32 @llvm.smin.i32(i32 [[SUB1]], i32 101) -; CHECK-NEXT: br i1 true, label %loop.preloop.preheader -; CHECK: loop.preloop: -; CHECK-NEXT: %idx.preloop = phi i32 [ %idx.next.preloop, %in.bounds.preloop ], [ 0, %loop.preloop.preheader ] -; CHECK-NEXT: %idx.next.preloop = add i32 %idx.preloop, 1 -; CHECK-NEXT: %idx.offset.preloop = sub i32 %idx.preloop, 13 -; CHECK-NEXT: %abc.preloop = icmp ult i32 %idx.offset.preloop, %len -; CHECK-NEXT: br i1 %abc.preloop, label %in.bounds.preloop, label %out.of.bounds.loopexit -; CHECK: in.bounds.preloop: -; CHECK-NEXT: %addr.preloop = getelementptr i32, i32* %arr, i32 %idx.preloop -; CHECK-NEXT: store i32 0, i32* %addr.preloop -; CHECK-NEXT: %next.preloop = icmp slt i32 %idx.next.preloop, 101 -; CHECK-NEXT: [[PRELOOP_COND:%[^ ]+]] = icmp slt i32 %idx.next.preloop, 13 -; CHECK-NEXT: br i1 [[PRELOOP_COND]], label %loop.preloop, label %preloop.exit.selector -; CHECK: postloop: +; CHECK-LABEL: @test_02( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[LEN:%.*]] = load i32, i32* [[A_LEN_PTR:%.*]], align 4, !range [[RNG0]] +; CHECK-NEXT: [[TMP0:%.*]] = add nuw nsw i32 [[LEN]], -2147483647 +; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[TMP0]], i32 -13) +; CHECK-NEXT: [[TMP1:%.*]] = sub i32 [[LEN]], [[SMAX]] +; CHECK-NEXT: [[EXIT_MAINLOOP_AT:%.*]] = call i32 @llvm.smin.i32(i32 [[TMP1]], i32 101) +; CHECK-NEXT: br i1 true, label [[LOOP_PRELOOP_PREHEADER:%.*]], label [[PRELOOP_PSEUDO_EXIT:%.*]] +; CHECK: loop.preloop.preheader: +; CHECK-NEXT: br label [[LOOP_PRELOOP:%.*]] +; CHECK: mainloop: +; CHECK-NEXT: [[TMP2:%.*]] = icmp slt i32 [[INDVAR_END:%.*]], [[EXIT_MAINLOOP_AT]] +; CHECK-NEXT: br i1 [[TMP2]], label [[LOOP_PREHEADER:%.*]], label [[MAIN_PSEUDO_EXIT:%.*]] +; CHECK: loop.preheader: +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[IDX:%.*]] = phi i32 [ [[IDX_NEXT:%.*]], [[IN_BOUNDS:%.*]] ], [ [[IDX_PRELOOP_COPY:%.*]], [[LOOP_PREHEADER]] ] +; CHECK-NEXT: [[IDX_NEXT]] = add i32 [[IDX]], 1 +; CHECK-NEXT: [[IDX_OFFSET:%.*]] = sub i32 [[IDX]], 13 +; CHECK-NEXT: [[ABC:%.*]] = icmp ult i32 [[IDX_OFFSET]], [[LEN]] +; CHECK-NEXT: br i1 true, label [[IN_BOUNDS]], label [[OUT_OF_BOUNDS_LOOPEXIT3:%.*]] +; CHECK: in.bounds: +; CHECK-NEXT: [[ADDR:%.*]] = getelementptr i32, i32* [[ARR:%.*]], i32 [[IDX]] +; CHECK-NEXT: store i32 0, i32* [[ADDR]], align 4 +; CHECK-NEXT: [[NEXT:%.*]] = icmp slt i32 [[IDX_NEXT]], 101 +; CHECK-NEXT: [[TMP3:%.*]] = icmp slt i32 [[IDX_NEXT]], [[EXIT_MAINLOOP_AT]] +; CHECK-NEXT: br i1 [[TMP3]], label [[LOOP]], label [[MAIN_EXIT_SELECTOR:%.*]] +; CHECK: main.exit.selector: +; CHECK-NEXT: [[IDX_NEXT_LCSSA:%.*]] = phi i32 [ [[IDX_NEXT]], [[IN_BOUNDS]] ] +; CHECK-NEXT: [[TMP4:%.*]] = icmp slt i32 [[IDX_NEXT_LCSSA]], 101 +; CHECK-NEXT: br i1 [[TMP4]], label [[MAIN_PSEUDO_EXIT]], label [[EXIT:%.*]] +; CHECK: main.pseudo.exit: +; CHECK-NEXT: [[IDX_COPY:%.*]] = phi i32 [ [[IDX_PRELOOP_COPY]], [[MAINLOOP:%.*]] ], [ [[IDX_NEXT_LCSSA]], [[MAIN_EXIT_SELECTOR]] ] +; CHECK-NEXT: [[INDVAR_END1:%.*]] = phi i32 [ [[INDVAR_END]], [[MAINLOOP]] ], [ [[IDX_NEXT_LCSSA]], [[MAIN_EXIT_SELECTOR]] ] +; CHECK-NEXT: br label [[POSTLOOP:%.*]] +; CHECK: out.of.bounds.loopexit: +; CHECK-NEXT: br label [[OUT_OF_BOUNDS:%.*]] +; CHECK: out.of.bounds.loopexit2: +; CHECK-NEXT: br label [[OUT_OF_BOUNDS]] +; CHECK: out.of.bounds.loopexit3: +; CHECK-NEXT: br label [[OUT_OF_BOUNDS]] +; CHECK: out.of.bounds: +; CHECK-NEXT: ret void +; CHECK: exit.loopexit: +; CHECK-NEXT: br label [[EXIT]] +; CHECK: exit: +; CHECK-NEXT: ret void +; CHECK: loop.preloop: +; CHECK-NEXT: [[IDX_PRELOOP:%.*]] = phi i32 [ [[IDX_NEXT_PRELOOP:%.*]], [[IN_BOUNDS_PRELOOP:%.*]] ], [ 0, [[LOOP_PRELOOP_PREHEADER]] ] +; CHECK-NEXT: [[IDX_NEXT_PRELOOP]] = add i32 [[IDX_PRELOOP]], 1 +; CHECK-NEXT: [[IDX_OFFSET_PRELOOP:%.*]] = sub i32 [[IDX_PRELOOP]], 13 +; CHECK-NEXT: [[ABC_PRELOOP:%.*]] = icmp ult i32 [[IDX_OFFSET_PRELOOP]], [[LEN]] +; CHECK-NEXT: br i1 [[ABC_PRELOOP]], label [[IN_BOUNDS_PRELOOP]], label [[OUT_OF_BOUNDS_LOOPEXIT:%.*]] +; CHECK: in.bounds.preloop: +; CHECK-NEXT: [[ADDR_PRELOOP:%.*]] = getelementptr i32, i32* [[ARR]], i32 [[IDX_PRELOOP]] +; CHECK-NEXT: store i32 0, i32* [[ADDR_PRELOOP]], align 4 +; CHECK-NEXT: [[NEXT_PRELOOP:%.*]] = icmp slt i32 [[IDX_NEXT_PRELOOP]], 101 +; CHECK-NEXT: [[TMP5:%.*]] = icmp slt i32 [[IDX_NEXT_PRELOOP]], 13 +; CHECK-NEXT: br i1 [[TMP5]], label [[LOOP_PRELOOP]], label [[PRELOOP_EXIT_SELECTOR:%.*]], !llvm.loop [[LOOP7:![0-9]+]], !irce.loop.clone !6 +; CHECK: preloop.exit.selector: +; CHECK-NEXT: [[IDX_NEXT_PRELOOP_LCSSA:%.*]] = phi i32 [ [[IDX_NEXT_PRELOOP]], [[IN_BOUNDS_PRELOOP]] ] +; CHECK-NEXT: [[TMP6:%.*]] = icmp slt i32 [[IDX_NEXT_PRELOOP_LCSSA]], 101 +; CHECK-NEXT: br i1 [[TMP6]], label [[PRELOOP_PSEUDO_EXIT]], label [[EXIT]] +; CHECK: preloop.pseudo.exit: +; CHECK-NEXT: [[IDX_PRELOOP_COPY]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IDX_NEXT_PRELOOP_LCSSA]], [[PRELOOP_EXIT_SELECTOR]] ] +; CHECK-NEXT: [[INDVAR_END]] = phi i32 [ 0, [[ENTRY]] ], [ [[IDX_NEXT_PRELOOP_LCSSA]], [[PRELOOP_EXIT_SELECTOR]] ] +; CHECK-NEXT: br label [[MAINLOOP]] +; CHECK: postloop: +; CHECK-NEXT: br label [[LOOP_POSTLOOP:%.*]] +; CHECK: loop.postloop: +; CHECK-NEXT: [[IDX_POSTLOOP:%.*]] = phi i32 [ [[IDX_COPY]], [[POSTLOOP]] ], [ [[IDX_NEXT_POSTLOOP:%.*]], [[IN_BOUNDS_POSTLOOP:%.*]] ] +; CHECK-NEXT: [[IDX_NEXT_POSTLOOP]] = add i32 [[IDX_POSTLOOP]], 1 +; CHECK-NEXT: [[IDX_OFFSET_POSTLOOP:%.*]] = sub i32 [[IDX_POSTLOOP]], 13 +; CHECK-NEXT: [[ABC_POSTLOOP:%.*]] = icmp ult i32 [[IDX_OFFSET_POSTLOOP]], [[LEN]] +; CHECK-NEXT: br i1 [[ABC_POSTLOOP]], label [[IN_BOUNDS_POSTLOOP]], label [[OUT_OF_BOUNDS_LOOPEXIT2:%.*]] +; CHECK: in.bounds.postloop: +; CHECK-NEXT: [[ADDR_POSTLOOP:%.*]] = getelementptr i32, i32* [[ARR]], i32 [[IDX_POSTLOOP]] +; CHECK-NEXT: store i32 0, i32* [[ADDR_POSTLOOP]], align 4 +; CHECK-NEXT: [[NEXT_POSTLOOP:%.*]] = icmp slt i32 [[IDX_NEXT_POSTLOOP]], 101 +; CHECK-NEXT: br i1 [[NEXT_POSTLOOP]], label [[LOOP_POSTLOOP]], label [[EXIT_LOOPEXIT:%.*]], !llvm.loop [[LOOP8:![0-9]+]], !irce.loop.clone !6 +; entry: %len = load i32, i32* %a_len_ptr, !range !0 @@ -137,17 +238,60 @@ exit: ; %exit.mainloop.at = 101 define void @test_03(i32* %arr, i32* %a_len_ptr) #0 { - -; CHECK-LABEL: test_03( -; CHECK-NOT: preloop -; CHECK: entry: -; CHECK-NEXT: %len = load i32, i32* %a_len_ptr, align 4, !range !0 -; CHECK-NEXT: [[SMAX1:%[^ ]+]] = call i32 @llvm.smin.i32(i32 %len, i32 13) -; CHECK-NEXT: [[SUB3:%[^ ]+]] = sub i32 %len, [[SMAX1]] -; CHECK-NEXT: %exit.mainloop.at = call i32 @llvm.umin.i32(i32 [[SUB3]], i32 101) -; CHECK-NEXT: [[CMP3:%[^ ]+]] = icmp ult i32 0, %exit.mainloop.at -; CHECK-NEXT: br i1 [[CMP3]], label %loop.preheader, label %main.pseudo.exit -; CHECK: postloop: +; CHECK-LABEL: @test_03( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[LEN:%.*]] = load i32, i32* [[A_LEN_PTR:%.*]], align 4, !range [[RNG0]] +; CHECK-NEXT: [[SMIN:%.*]] = call i32 @llvm.smin.i32(i32 [[LEN]], i32 13) +; CHECK-NEXT: [[TMP0:%.*]] = sub i32 [[LEN]], [[SMIN]] +; CHECK-NEXT: [[EXIT_MAINLOOP_AT:%.*]] = call i32 @llvm.umin.i32(i32 [[TMP0]], i32 101) +; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 0, [[EXIT_MAINLOOP_AT]] +; CHECK-NEXT: br i1 [[TMP1]], label [[LOOP_PREHEADER:%.*]], label [[MAIN_PSEUDO_EXIT:%.*]] +; CHECK: loop.preheader: +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[IDX:%.*]] = phi i32 [ [[IDX_NEXT:%.*]], [[IN_BOUNDS:%.*]] ], [ 0, [[LOOP_PREHEADER]] ] +; CHECK-NEXT: [[IDX_NEXT]] = add i32 [[IDX]], 1 +; CHECK-NEXT: [[IDX_OFFSET:%.*]] = add i32 [[IDX]], 13 +; CHECK-NEXT: [[ABC:%.*]] = icmp slt i32 [[IDX_OFFSET]], [[LEN]] +; CHECK-NEXT: br i1 true, label [[IN_BOUNDS]], label [[OUT_OF_BOUNDS_LOOPEXIT1:%.*]] +; CHECK: in.bounds: +; CHECK-NEXT: [[ADDR:%.*]] = getelementptr i32, i32* [[ARR:%.*]], i32 [[IDX]] +; CHECK-NEXT: store i32 0, i32* [[ADDR]], align 4 +; CHECK-NEXT: [[NEXT:%.*]] = icmp ult i32 [[IDX_NEXT]], 101 +; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i32 [[IDX_NEXT]], [[EXIT_MAINLOOP_AT]] +; CHECK-NEXT: br i1 [[TMP2]], label [[LOOP]], label [[MAIN_EXIT_SELECTOR:%.*]] +; CHECK: main.exit.selector: +; CHECK-NEXT: [[IDX_NEXT_LCSSA:%.*]] = phi i32 [ [[IDX_NEXT]], [[IN_BOUNDS]] ] +; CHECK-NEXT: [[TMP3:%.*]] = icmp ult i32 [[IDX_NEXT_LCSSA]], 101 +; CHECK-NEXT: br i1 [[TMP3]], label [[MAIN_PSEUDO_EXIT]], label [[EXIT:%.*]] +; CHECK: main.pseudo.exit: +; CHECK-NEXT: [[IDX_COPY:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IDX_NEXT_LCSSA]], [[MAIN_EXIT_SELECTOR]] ] +; CHECK-NEXT: [[INDVAR_END:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[IDX_NEXT_LCSSA]], [[MAIN_EXIT_SELECTOR]] ] +; CHECK-NEXT: br label [[POSTLOOP:%.*]] +; CHECK: out.of.bounds.loopexit: +; CHECK-NEXT: br label [[OUT_OF_BOUNDS:%.*]] +; CHECK: out.of.bounds.loopexit1: +; CHECK-NEXT: br label [[OUT_OF_BOUNDS]] +; CHECK: out.of.bounds: +; CHECK-NEXT: ret void +; CHECK: exit.loopexit: +; CHECK-NEXT: br label [[EXIT]] +; CHECK: exit: +; CHECK-NEXT: ret void +; CHECK: postloop: +; CHECK-NEXT: br label [[LOOP_POSTLOOP:%.*]] +; CHECK: loop.postloop: +; CHECK-NEXT: [[IDX_POSTLOOP:%.*]] = phi i32 [ [[IDX_COPY]], [[POSTLOOP]] ], [ [[IDX_NEXT_POSTLOOP:%.*]], [[IN_BOUNDS_POSTLOOP:%.*]] ] +; CHECK-NEXT: [[IDX_NEXT_POSTLOOP]] = add i32 [[IDX_POSTLOOP]], 1 +; CHECK-NEXT: [[IDX_OFFSET_POSTLOOP:%.*]] = add i32 [[IDX_POSTLOOP]], 13 +; CHECK-NEXT: [[ABC_POSTLOOP:%.*]] = icmp slt i32 [[IDX_OFFSET_POSTLOOP]], [[LEN]] +; CHECK-NEXT: br i1 [[ABC_POSTLOOP]], label [[IN_BOUNDS_POSTLOOP]], label [[OUT_OF_BOUNDS_LOOPEXIT:%.*]] +; CHECK: in.bounds.postloop: +; CHECK-NEXT: [[ADDR_POSTLOOP:%.*]] = getelementptr i32, i32* [[ARR]], i32 [[IDX_POSTLOOP]] +; CHECK-NEXT: store i32 0, i32* [[ADDR_POSTLOOP]], align 4 +; CHECK-NEXT: [[NEXT_POSTLOOP:%.*]] = icmp ult i32 [[IDX_NEXT_POSTLOOP]], 101 +; CHECK-NEXT: br i1 [[NEXT_POSTLOOP]], label [[LOOP_POSTLOOP]], label [[EXIT_LOOPEXIT:%.*]], !llvm.loop [[LOOP9:![0-9]+]], !irce.loop.clone !6 +; entry: %len = load i32, i32* %a_len_ptr, !range !0 @@ -190,20 +334,85 @@ exit: ; %exit.mainloop.at = 101 define void @test_04(i32* %arr, i32* %a_len_ptr) #0 { - -; CHECK-LABEL: test_04( -; CHECK: entry: -; CHECK-NEXT: %len = load i32, i32* %a_len_ptr, align 4, !range !0 -; CHECK-NEXT: [[SUB1:%[^ ]+]] = add nuw i32 %len, 13 -; CHECK-NEXT: %exit.mainloop.at = call i32 @llvm.umin.i32(i32 [[SUB1]], i32 101) -; CHECK-NEXT: br i1 true, label %loop.preloop.preheader -; CHECK: in.bounds.preloop: -; CHECK-NEXT: %addr.preloop = getelementptr i32, i32* %arr, i32 %idx.preloop -; CHECK-NEXT: store i32 0, i32* %addr.preloop -; CHECK-NEXT: %next.preloop = icmp ult i32 %idx.next.preloop, 101 -; CHECK-NEXT: [[PRELOOP_COND:%[^ ]+]] = icmp ult i32 %idx.next.preloop, 13 -; CHECK-NEXT: br i1 [[PRELOOP_COND]], label %loop.preloop, label %preloop.exit.selector -; CHECK: postloop: +; CHECK-LABEL: @test_04( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[LEN:%.*]] = load i32, i32* [[A_LEN_PTR:%.*]], align 4, !range [[RNG0]] +; CHECK-NEXT: [[TMP0:%.*]] = add nuw i32 [[LEN]], 13 +; CHECK-NEXT: [[EXIT_MAINLOOP_AT:%.*]] = call i32 @llvm.umin.i32(i32 [[TMP0]], i32 101) +; CHECK-NEXT: br i1 true, label [[LOOP_PRELOOP_PREHEADER:%.*]], label [[PRELOOP_PSEUDO_EXIT:%.*]] +; CHECK: loop.preloop.preheader: +; CHECK-NEXT: br label [[LOOP_PRELOOP:%.*]] +; CHECK: mainloop: +; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 [[INDVAR_END:%.*]], [[EXIT_MAINLOOP_AT]] +; CHECK-NEXT: br i1 [[TMP1]], label [[LOOP_PREHEADER:%.*]], label [[MAIN_PSEUDO_EXIT:%.*]] +; CHECK: loop.preheader: +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[IDX:%.*]] = phi i32 [ [[IDX_NEXT:%.*]], [[IN_BOUNDS:%.*]] ], [ [[IDX_PRELOOP_COPY:%.*]], [[LOOP_PREHEADER]] ] +; CHECK-NEXT: [[IDX_NEXT]] = add i32 [[IDX]], 1 +; CHECK-NEXT: [[IDX_OFFSET:%.*]] = sub i32 [[IDX]], 13 +; CHECK-NEXT: [[ABC:%.*]] = icmp slt i32 [[IDX_OFFSET]], [[LEN]] +; CHECK-NEXT: br i1 true, label [[IN_BOUNDS]], label [[OUT_OF_BOUNDS_LOOPEXIT3:%.*]] +; CHECK: in.bounds: +; CHECK-NEXT: [[ADDR:%.*]] = getelementptr i32, i32* [[ARR:%.*]], i32 [[IDX]] +; CHECK-NEXT: store i32 0, i32* [[ADDR]], align 4 +; CHECK-NEXT: [[NEXT:%.*]] = icmp ult i32 [[IDX_NEXT]], 101 +; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i32 [[IDX_NEXT]], [[EXIT_MAINLOOP_AT]] +; CHECK-NEXT: br i1 [[TMP2]], label [[LOOP]], label [[MAIN_EXIT_SELECTOR:%.*]] +; CHECK: main.exit.selector: +; CHECK-NEXT: [[IDX_NEXT_LCSSA:%.*]] = phi i32 [ [[IDX_NEXT]], [[IN_BOUNDS]] ] +; CHECK-NEXT: [[TMP3:%.*]] = icmp ult i32 [[IDX_NEXT_LCSSA]], 101 +; CHECK-NEXT: br i1 [[TMP3]], label [[MAIN_PSEUDO_EXIT]], label [[EXIT:%.*]] +; CHECK: main.pseudo.exit: +; CHECK-NEXT: [[IDX_COPY:%.*]] = phi i32 [ [[IDX_PRELOOP_COPY]], [[MAINLOOP:%.*]] ], [ [[IDX_NEXT_LCSSA]], [[MAIN_EXIT_SELECTOR]] ] +; CHECK-NEXT: [[INDVAR_END1:%.*]] = phi i32 [ [[INDVAR_END]], [[MAINLOOP]] ], [ [[IDX_NEXT_LCSSA]], [[MAIN_EXIT_SELECTOR]] ] +; CHECK-NEXT: br label [[POSTLOOP:%.*]] +; CHECK: out.of.bounds.loopexit: +; CHECK-NEXT: br label [[OUT_OF_BOUNDS:%.*]] +; CHECK: out.of.bounds.loopexit2: +; CHECK-NEXT: br label [[OUT_OF_BOUNDS]] +; CHECK: out.of.bounds.loopexit3: +; CHECK-NEXT: br label [[OUT_OF_BOUNDS]] +; CHECK: out.of.bounds: +; CHECK-NEXT: ret void +; CHECK: exit.loopexit: +; CHECK-NEXT: br label [[EXIT]] +; CHECK: exit: +; CHECK-NEXT: ret void +; CHECK: loop.preloop: +; CHECK-NEXT: [[IDX_PRELOOP:%.*]] = phi i32 [ [[IDX_NEXT_PRELOOP:%.*]], [[IN_BOUNDS_PRELOOP:%.*]] ], [ 0, [[LOOP_PRELOOP_PREHEADER]] ] +; CHECK-NEXT: [[IDX_NEXT_PRELOOP]] = add i32 [[IDX_PRELOOP]], 1 +; CHECK-NEXT: [[IDX_OFFSET_PRELOOP:%.*]] = sub i32 [[IDX_PRELOOP]], 13 +; CHECK-NEXT: [[ABC_PRELOOP:%.*]] = icmp slt i32 [[IDX_OFFSET_PRELOOP]], [[LEN]] +; CHECK-NEXT: br i1 [[ABC_PRELOOP]], label [[IN_BOUNDS_PRELOOP]], label [[OUT_OF_BOUNDS_LOOPEXIT:%.*]] +; CHECK: in.bounds.preloop: +; CHECK-NEXT: [[ADDR_PRELOOP:%.*]] = getelementptr i32, i32* [[ARR]], i32 [[IDX_PRELOOP]] +; CHECK-NEXT: store i32 0, i32* [[ADDR_PRELOOP]], align 4 +; CHECK-NEXT: [[NEXT_PRELOOP:%.*]] = icmp ult i32 [[IDX_NEXT_PRELOOP]], 101 +; CHECK-NEXT: [[TMP4:%.*]] = icmp ult i32 [[IDX_NEXT_PRELOOP]], 13 +; CHECK-NEXT: br i1 [[TMP4]], label [[LOOP_PRELOOP]], label [[PRELOOP_EXIT_SELECTOR:%.*]], !llvm.loop [[LOOP10:![0-9]+]], !irce.loop.clone !6 +; CHECK: preloop.exit.selector: +; CHECK-NEXT: [[IDX_NEXT_PRELOOP_LCSSA:%.*]] = phi i32 [ [[IDX_NEXT_PRELOOP]], [[IN_BOUNDS_PRELOOP]] ] +; CHECK-NEXT: [[TMP5:%.*]] = icmp ult i32 [[IDX_NEXT_PRELOOP_LCSSA]], 101 +; CHECK-NEXT: br i1 [[TMP5]], label [[PRELOOP_PSEUDO_EXIT]], label [[EXIT]] +; CHECK: preloop.pseudo.exit: +; CHECK-NEXT: [[IDX_PRELOOP_COPY]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IDX_NEXT_PRELOOP_LCSSA]], [[PRELOOP_EXIT_SELECTOR]] ] +; CHECK-NEXT: [[INDVAR_END]] = phi i32 [ 0, [[ENTRY]] ], [ [[IDX_NEXT_PRELOOP_LCSSA]], [[PRELOOP_EXIT_SELECTOR]] ] +; CHECK-NEXT: br label [[MAINLOOP]] +; CHECK: postloop: +; CHECK-NEXT: br label [[LOOP_POSTLOOP:%.*]] +; CHECK: loop.postloop: +; CHECK-NEXT: [[IDX_POSTLOOP:%.*]] = phi i32 [ [[IDX_COPY]], [[POSTLOOP]] ], [ [[IDX_NEXT_POSTLOOP:%.*]], [[IN_BOUNDS_POSTLOOP:%.*]] ] +; CHECK-NEXT: [[IDX_NEXT_POSTLOOP]] = add i32 [[IDX_POSTLOOP]], 1 +; CHECK-NEXT: [[IDX_OFFSET_POSTLOOP:%.*]] = sub i32 [[IDX_POSTLOOP]], 13 +; CHECK-NEXT: [[ABC_POSTLOOP:%.*]] = icmp slt i32 [[IDX_OFFSET_POSTLOOP]], [[LEN]] +; CHECK-NEXT: br i1 [[ABC_POSTLOOP]], label [[IN_BOUNDS_POSTLOOP]], label [[OUT_OF_BOUNDS_LOOPEXIT2:%.*]] +; CHECK: in.bounds.postloop: +; CHECK-NEXT: [[ADDR_POSTLOOP:%.*]] = getelementptr i32, i32* [[ARR]], i32 [[IDX_POSTLOOP]] +; CHECK-NEXT: store i32 0, i32* [[ADDR_POSTLOOP]], align 4 +; CHECK-NEXT: [[NEXT_POSTLOOP:%.*]] = icmp ult i32 [[IDX_NEXT_POSTLOOP]], 101 +; CHECK-NEXT: br i1 [[NEXT_POSTLOOP]], label [[LOOP_POSTLOOP]], label [[EXIT_LOOPEXIT:%.*]], !llvm.loop [[LOOP11:![0-9]+]], !irce.loop.clone !6 +; entry: %len = load i32, i32* %a_len_ptr, !range !0 @@ -231,19 +440,60 @@ exit: ; Signed latch, signed RC, positive offset. Same as test_01. define void @test_05(i32* %arr, i32* %a_len_ptr) #0 { - -; CHECK-LABEL: test_05( -; CHECK-NOT: preloop -; CHECK: entry: -; CHECK-NEXT: %len = load i32, i32* %a_len_ptr, align 4, !range !0 -; CHECK-NEXT: [[SUB1:%[^ ]+]] = add nsw i32 %len, -13 -; CHECK-NEXT: [[SMAX:%[^ ]+]] = call i32 @llvm.smin.i32(i32 [[SUB1]], i32 101) -; CHECK-NEXT: %exit.mainloop.at = call i32 @llvm.smax.i32(i32 [[SMAX]], i32 0) -; CHECK-NEXT: [[GOTO_LOOP:%[^ ]+]] = icmp slt i32 0, %exit.mainloop.at -; CHECK-NEXT: br i1 [[GOTO_LOOP]], label %loop.preheader, label %main.pseudo.exit -; CHECK: loop -; CHECK: br i1 true, label %in.bounds -; CHECK: postloop: +; CHECK-LABEL: @test_05( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[LEN:%.*]] = load i32, i32* [[A_LEN_PTR:%.*]], align 4, !range [[RNG0]] +; CHECK-NEXT: [[TMP0:%.*]] = add nsw i32 [[LEN]], -13 +; CHECK-NEXT: [[SMIN:%.*]] = call i32 @llvm.smin.i32(i32 [[TMP0]], i32 101) +; CHECK-NEXT: [[EXIT_MAINLOOP_AT:%.*]] = call i32 @llvm.smax.i32(i32 [[SMIN]], i32 0) +; CHECK-NEXT: [[TMP1:%.*]] = icmp slt i32 0, [[EXIT_MAINLOOP_AT]] +; CHECK-NEXT: br i1 [[TMP1]], label [[LOOP_PREHEADER:%.*]], label [[MAIN_PSEUDO_EXIT:%.*]] +; CHECK: loop.preheader: +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[IDX:%.*]] = phi i32 [ [[IDX_NEXT:%.*]], [[IN_BOUNDS:%.*]] ], [ 0, [[LOOP_PREHEADER]] ] +; CHECK-NEXT: [[IDX_NEXT]] = add i32 [[IDX]], 1 +; CHECK-NEXT: [[IDX_OFFSET:%.*]] = add i32 [[IDX]], 13 +; CHECK-NEXT: [[ABC:%.*]] = icmp slt i32 [[IDX_OFFSET]], [[LEN]] +; CHECK-NEXT: br i1 true, label [[IN_BOUNDS]], label [[OUT_OF_BOUNDS_LOOPEXIT1:%.*]] +; CHECK: in.bounds: +; CHECK-NEXT: [[ADDR:%.*]] = getelementptr i32, i32* [[ARR:%.*]], i32 [[IDX]] +; CHECK-NEXT: store i32 0, i32* [[ADDR]], align 4 +; CHECK-NEXT: [[NEXT:%.*]] = icmp slt i32 [[IDX_NEXT]], 101 +; CHECK-NEXT: [[TMP2:%.*]] = icmp slt i32 [[IDX_NEXT]], [[EXIT_MAINLOOP_AT]] +; CHECK-NEXT: br i1 [[TMP2]], label [[LOOP]], label [[MAIN_EXIT_SELECTOR:%.*]] +; CHECK: main.exit.selector: +; CHECK-NEXT: [[IDX_NEXT_LCSSA:%.*]] = phi i32 [ [[IDX_NEXT]], [[IN_BOUNDS]] ] +; CHECK-NEXT: [[TMP3:%.*]] = icmp slt i32 [[IDX_NEXT_LCSSA]], 101 +; CHECK-NEXT: br i1 [[TMP3]], label [[MAIN_PSEUDO_EXIT]], label [[EXIT:%.*]] +; CHECK: main.pseudo.exit: +; CHECK-NEXT: [[IDX_COPY:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IDX_NEXT_LCSSA]], [[MAIN_EXIT_SELECTOR]] ] +; CHECK-NEXT: [[INDVAR_END:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[IDX_NEXT_LCSSA]], [[MAIN_EXIT_SELECTOR]] ] +; CHECK-NEXT: br label [[POSTLOOP:%.*]] +; CHECK: out.of.bounds.loopexit: +; CHECK-NEXT: br label [[OUT_OF_BOUNDS:%.*]] +; CHECK: out.of.bounds.loopexit1: +; CHECK-NEXT: br label [[OUT_OF_BOUNDS]] +; CHECK: out.of.bounds: +; CHECK-NEXT: ret void +; CHECK: exit.loopexit: +; CHECK-NEXT: br label [[EXIT]] +; CHECK: exit: +; CHECK-NEXT: ret void +; CHECK: postloop: +; CHECK-NEXT: br label [[LOOP_POSTLOOP:%.*]] +; CHECK: loop.postloop: +; CHECK-NEXT: [[IDX_POSTLOOP:%.*]] = phi i32 [ [[IDX_COPY]], [[POSTLOOP]] ], [ [[IDX_NEXT_POSTLOOP:%.*]], [[IN_BOUNDS_POSTLOOP:%.*]] ] +; CHECK-NEXT: [[IDX_NEXT_POSTLOOP]] = add i32 [[IDX_POSTLOOP]], 1 +; CHECK-NEXT: [[IDX_OFFSET_POSTLOOP:%.*]] = add i32 [[IDX_POSTLOOP]], 13 +; CHECK-NEXT: [[ABC_POSTLOOP:%.*]] = icmp slt i32 [[IDX_OFFSET_POSTLOOP]], [[LEN]] +; CHECK-NEXT: br i1 [[ABC_POSTLOOP]], label [[IN_BOUNDS_POSTLOOP]], label [[OUT_OF_BOUNDS_LOOPEXIT:%.*]] +; CHECK: in.bounds.postloop: +; CHECK-NEXT: [[ADDR_POSTLOOP:%.*]] = getelementptr i32, i32* [[ARR]], i32 [[IDX_POSTLOOP]] +; CHECK-NEXT: store i32 0, i32* [[ADDR_POSTLOOP]], align 4 +; CHECK-NEXT: [[NEXT_POSTLOOP:%.*]] = icmp slt i32 [[IDX_NEXT_POSTLOOP]], 101 +; CHECK-NEXT: br i1 [[NEXT_POSTLOOP]], label [[LOOP_POSTLOOP]], label [[EXIT_LOOPEXIT:%.*]], !llvm.loop [[LOOP12:![0-9]+]], !irce.loop.clone !6 +; entry: %len = load i32, i32* %a_len_ptr, !range !0 @@ -271,22 +521,87 @@ exit: ; Signed latch, signed RC, negative offset. Same as test_02. define void @test_06(i32* %arr, i32* %a_len_ptr) #0 { - -; CHECK-LABEL: test_06( -; CHECK: entry: -; CHECK-NEXT: %len = load i32, i32* %a_len_ptr, align 4, !range !0 -; CHECK-NEXT: [[LEN_MINUS_SMAX:%[^ ]+]] = add nuw nsw i32 %len, -2147483647 -; CHECK-NEXT: [[SMAX1:%[^ ]+]] = call i32 @llvm.smax.i32(i32 [[LEN_MINUS_SMAX]], i32 -13) -; CHECK-NEXT: [[SUB1:%[^ ]+]] = sub i32 %len, [[SMAX1]] -; CHECK-NEXT: %exit.mainloop.at = call i32 @llvm.smin.i32(i32 [[SUB1]], i32 101) -; CHECK-NEXT: br i1 true, label %loop.preloop.preheader -; CHECK: in.bounds.preloop: -; CHECK-NEXT: %addr.preloop = getelementptr i32, i32* %arr, i32 %idx.preloop -; CHECK-NEXT: store i32 0, i32* %addr.preloop -; CHECK-NEXT: %next.preloop = icmp slt i32 %idx.next.preloop, 101 -; CHECK-NEXT: [[PRELOOP_COND:%[^ ]+]] = icmp slt i32 %idx.next.preloop, 13 -; CHECK-NEXT: br i1 [[PRELOOP_COND]], label %loop.preloop, label %preloop.exit.selector -; CHECK: postloop: +; CHECK-LABEL: @test_06( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[LEN:%.*]] = load i32, i32* [[A_LEN_PTR:%.*]], align 4, !range [[RNG0]] +; CHECK-NEXT: [[TMP0:%.*]] = add nuw nsw i32 [[LEN]], -2147483647 +; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[TMP0]], i32 -13) +; CHECK-NEXT: [[TMP1:%.*]] = sub i32 [[LEN]], [[SMAX]] +; CHECK-NEXT: [[EXIT_MAINLOOP_AT:%.*]] = call i32 @llvm.smin.i32(i32 [[TMP1]], i32 101) +; CHECK-NEXT: br i1 true, label [[LOOP_PRELOOP_PREHEADER:%.*]], label [[PRELOOP_PSEUDO_EXIT:%.*]] +; CHECK: loop.preloop.preheader: +; CHECK-NEXT: br label [[LOOP_PRELOOP:%.*]] +; CHECK: mainloop: +; CHECK-NEXT: [[TMP2:%.*]] = icmp slt i32 [[INDVAR_END:%.*]], [[EXIT_MAINLOOP_AT]] +; CHECK-NEXT: br i1 [[TMP2]], label [[LOOP_PREHEADER:%.*]], label [[MAIN_PSEUDO_EXIT:%.*]] +; CHECK: loop.preheader: +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[IDX:%.*]] = phi i32 [ [[IDX_NEXT:%.*]], [[IN_BOUNDS:%.*]] ], [ [[IDX_PRELOOP_COPY:%.*]], [[LOOP_PREHEADER]] ] +; CHECK-NEXT: [[IDX_NEXT]] = add i32 [[IDX]], 1 +; CHECK-NEXT: [[IDX_OFFSET:%.*]] = sub i32 [[IDX]], 13 +; CHECK-NEXT: [[ABC:%.*]] = icmp slt i32 [[IDX_OFFSET]], [[LEN]] +; CHECK-NEXT: br i1 true, label [[IN_BOUNDS]], label [[OUT_OF_BOUNDS_LOOPEXIT3:%.*]] +; CHECK: in.bounds: +; CHECK-NEXT: [[ADDR:%.*]] = getelementptr i32, i32* [[ARR:%.*]], i32 [[IDX]] +; CHECK-NEXT: store i32 0, i32* [[ADDR]], align 4 +; CHECK-NEXT: [[NEXT:%.*]] = icmp slt i32 [[IDX_NEXT]], 101 +; CHECK-NEXT: [[TMP3:%.*]] = icmp slt i32 [[IDX_NEXT]], [[EXIT_MAINLOOP_AT]] +; CHECK-NEXT: br i1 [[TMP3]], label [[LOOP]], label [[MAIN_EXIT_SELECTOR:%.*]] +; CHECK: main.exit.selector: +; CHECK-NEXT: [[IDX_NEXT_LCSSA:%.*]] = phi i32 [ [[IDX_NEXT]], [[IN_BOUNDS]] ] +; CHECK-NEXT: [[TMP4:%.*]] = icmp slt i32 [[IDX_NEXT_LCSSA]], 101 +; CHECK-NEXT: br i1 [[TMP4]], label [[MAIN_PSEUDO_EXIT]], label [[EXIT:%.*]] +; CHECK: main.pseudo.exit: +; CHECK-NEXT: [[IDX_COPY:%.*]] = phi i32 [ [[IDX_PRELOOP_COPY]], [[MAINLOOP:%.*]] ], [ [[IDX_NEXT_LCSSA]], [[MAIN_EXIT_SELECTOR]] ] +; CHECK-NEXT: [[INDVAR_END1:%.*]] = phi i32 [ [[INDVAR_END]], [[MAINLOOP]] ], [ [[IDX_NEXT_LCSSA]], [[MAIN_EXIT_SELECTOR]] ] +; CHECK-NEXT: br label [[POSTLOOP:%.*]] +; CHECK: out.of.bounds.loopexit: +; CHECK-NEXT: br label [[OUT_OF_BOUNDS:%.*]] +; CHECK: out.of.bounds.loopexit2: +; CHECK-NEXT: br label [[OUT_OF_BOUNDS]] +; CHECK: out.of.bounds.loopexit3: +; CHECK-NEXT: br label [[OUT_OF_BOUNDS]] +; CHECK: out.of.bounds: +; CHECK-NEXT: ret void +; CHECK: exit.loopexit: +; CHECK-NEXT: br label [[EXIT]] +; CHECK: exit: +; CHECK-NEXT: ret void +; CHECK: loop.preloop: +; CHECK-NEXT: [[IDX_PRELOOP:%.*]] = phi i32 [ [[IDX_NEXT_PRELOOP:%.*]], [[IN_BOUNDS_PRELOOP:%.*]] ], [ 0, [[LOOP_PRELOOP_PREHEADER]] ] +; CHECK-NEXT: [[IDX_NEXT_PRELOOP]] = add i32 [[IDX_PRELOOP]], 1 +; CHECK-NEXT: [[IDX_OFFSET_PRELOOP:%.*]] = sub i32 [[IDX_PRELOOP]], 13 +; CHECK-NEXT: [[ABC_PRELOOP:%.*]] = icmp slt i32 [[IDX_OFFSET_PRELOOP]], [[LEN]] +; CHECK-NEXT: br i1 [[ABC_PRELOOP]], label [[IN_BOUNDS_PRELOOP]], label [[OUT_OF_BOUNDS_LOOPEXIT:%.*]] +; CHECK: in.bounds.preloop: +; CHECK-NEXT: [[ADDR_PRELOOP:%.*]] = getelementptr i32, i32* [[ARR]], i32 [[IDX_PRELOOP]] +; CHECK-NEXT: store i32 0, i32* [[ADDR_PRELOOP]], align 4 +; CHECK-NEXT: [[NEXT_PRELOOP:%.*]] = icmp slt i32 [[IDX_NEXT_PRELOOP]], 101 +; CHECK-NEXT: [[TMP5:%.*]] = icmp slt i32 [[IDX_NEXT_PRELOOP]], 13 +; CHECK-NEXT: br i1 [[TMP5]], label [[LOOP_PRELOOP]], label [[PRELOOP_EXIT_SELECTOR:%.*]], !llvm.loop [[LOOP13:![0-9]+]], !irce.loop.clone !6 +; CHECK: preloop.exit.selector: +; CHECK-NEXT: [[IDX_NEXT_PRELOOP_LCSSA:%.*]] = phi i32 [ [[IDX_NEXT_PRELOOP]], [[IN_BOUNDS_PRELOOP]] ] +; CHECK-NEXT: [[TMP6:%.*]] = icmp slt i32 [[IDX_NEXT_PRELOOP_LCSSA]], 101 +; CHECK-NEXT: br i1 [[TMP6]], label [[PRELOOP_PSEUDO_EXIT]], label [[EXIT]] +; CHECK: preloop.pseudo.exit: +; CHECK-NEXT: [[IDX_PRELOOP_COPY]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IDX_NEXT_PRELOOP_LCSSA]], [[PRELOOP_EXIT_SELECTOR]] ] +; CHECK-NEXT: [[INDVAR_END]] = phi i32 [ 0, [[ENTRY]] ], [ [[IDX_NEXT_PRELOOP_LCSSA]], [[PRELOOP_EXIT_SELECTOR]] ] +; CHECK-NEXT: br label [[MAINLOOP]] +; CHECK: postloop: +; CHECK-NEXT: br label [[LOOP_POSTLOOP:%.*]] +; CHECK: loop.postloop: +; CHECK-NEXT: [[IDX_POSTLOOP:%.*]] = phi i32 [ [[IDX_COPY]], [[POSTLOOP]] ], [ [[IDX_NEXT_POSTLOOP:%.*]], [[IN_BOUNDS_POSTLOOP:%.*]] ] +; CHECK-NEXT: [[IDX_NEXT_POSTLOOP]] = add i32 [[IDX_POSTLOOP]], 1 +; CHECK-NEXT: [[IDX_OFFSET_POSTLOOP:%.*]] = sub i32 [[IDX_POSTLOOP]], 13 +; CHECK-NEXT: [[ABC_POSTLOOP:%.*]] = icmp slt i32 [[IDX_OFFSET_POSTLOOP]], [[LEN]] +; CHECK-NEXT: br i1 [[ABC_POSTLOOP]], label [[IN_BOUNDS_POSTLOOP]], label [[OUT_OF_BOUNDS_LOOPEXIT2:%.*]] +; CHECK: in.bounds.postloop: +; CHECK-NEXT: [[ADDR_POSTLOOP:%.*]] = getelementptr i32, i32* [[ARR]], i32 [[IDX_POSTLOOP]] +; CHECK-NEXT: store i32 0, i32* [[ADDR_POSTLOOP]], align 4 +; CHECK-NEXT: [[NEXT_POSTLOOP:%.*]] = icmp slt i32 [[IDX_NEXT_POSTLOOP]], 101 +; CHECK-NEXT: br i1 [[NEXT_POSTLOOP]], label [[LOOP_POSTLOOP]], label [[EXIT_LOOPEXIT:%.*]], !llvm.loop [[LOOP14:![0-9]+]], !irce.loop.clone !6 +; entry: %len = load i32, i32* %a_len_ptr, !range !0 @@ -314,19 +629,60 @@ exit: ; Unsigned latch, Unsigned RC, negative offset. Same as test_03. define void @test_07(i32* %arr, i32* %a_len_ptr) #0 { - -; CHECK-LABEL: test_07( -; CHECK-NOT: preloop -; CHECK: entry: -; CHECK-NEXT: %len = load i32, i32* %a_len_ptr, align 4, !range !0 -; CHECK-NEXT: [[SMAX1:%[^ ]+]] = call i32 @llvm.smin.i32(i32 %len, i32 13) -; CHECK-NEXT: [[SUB3:%[^ ]+]] = sub i32 %len, [[SMAX1]] -; CHECK-NEXT: %exit.mainloop.at = call i32 @llvm.umin.i32(i32 [[SUB3]], i32 101) -; CHECK-NEXT: [[CMP3:%[^ ]+]] = icmp ult i32 0, %exit.mainloop.at -; CHECK-NEXT: br i1 [[CMP3]], label %loop.preheader, label %main.pseudo.exit -; CHECK: loop -; CHECK: br i1 true, label %in.bounds -; CHECK: postloop: +; CHECK-LABEL: @test_07( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[LEN:%.*]] = load i32, i32* [[A_LEN_PTR:%.*]], align 4, !range [[RNG0]] +; CHECK-NEXT: [[SMIN:%.*]] = call i32 @llvm.smin.i32(i32 [[LEN]], i32 13) +; CHECK-NEXT: [[TMP0:%.*]] = sub i32 [[LEN]], [[SMIN]] +; CHECK-NEXT: [[EXIT_MAINLOOP_AT:%.*]] = call i32 @llvm.umin.i32(i32 [[TMP0]], i32 101) +; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 0, [[EXIT_MAINLOOP_AT]] +; CHECK-NEXT: br i1 [[TMP1]], label [[LOOP_PREHEADER:%.*]], label [[MAIN_PSEUDO_EXIT:%.*]] +; CHECK: loop.preheader: +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[IDX:%.*]] = phi i32 [ [[IDX_NEXT:%.*]], [[IN_BOUNDS:%.*]] ], [ 0, [[LOOP_PREHEADER]] ] +; CHECK-NEXT: [[IDX_NEXT]] = add i32 [[IDX]], 1 +; CHECK-NEXT: [[IDX_OFFSET:%.*]] = add i32 [[IDX]], 13 +; CHECK-NEXT: [[ABC:%.*]] = icmp ult i32 [[IDX_OFFSET]], [[LEN]] +; CHECK-NEXT: br i1 true, label [[IN_BOUNDS]], label [[OUT_OF_BOUNDS_LOOPEXIT1:%.*]] +; CHECK: in.bounds: +; CHECK-NEXT: [[ADDR:%.*]] = getelementptr i32, i32* [[ARR:%.*]], i32 [[IDX]] +; CHECK-NEXT: store i32 0, i32* [[ADDR]], align 4 +; CHECK-NEXT: [[NEXT:%.*]] = icmp ult i32 [[IDX_NEXT]], 101 +; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i32 [[IDX_NEXT]], [[EXIT_MAINLOOP_AT]] +; CHECK-NEXT: br i1 [[TMP2]], label [[LOOP]], label [[MAIN_EXIT_SELECTOR:%.*]] +; CHECK: main.exit.selector: +; CHECK-NEXT: [[IDX_NEXT_LCSSA:%.*]] = phi i32 [ [[IDX_NEXT]], [[IN_BOUNDS]] ] +; CHECK-NEXT: [[TMP3:%.*]] = icmp ult i32 [[IDX_NEXT_LCSSA]], 101 +; CHECK-NEXT: br i1 [[TMP3]], label [[MAIN_PSEUDO_EXIT]], label [[EXIT:%.*]] +; CHECK: main.pseudo.exit: +; CHECK-NEXT: [[IDX_COPY:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IDX_NEXT_LCSSA]], [[MAIN_EXIT_SELECTOR]] ] +; CHECK-NEXT: [[INDVAR_END:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[IDX_NEXT_LCSSA]], [[MAIN_EXIT_SELECTOR]] ] +; CHECK-NEXT: br label [[POSTLOOP:%.*]] +; CHECK: out.of.bounds.loopexit: +; CHECK-NEXT: br label [[OUT_OF_BOUNDS:%.*]] +; CHECK: out.of.bounds.loopexit1: +; CHECK-NEXT: br label [[OUT_OF_BOUNDS]] +; CHECK: out.of.bounds: +; CHECK-NEXT: ret void +; CHECK: exit.loopexit: +; CHECK-NEXT: br label [[EXIT]] +; CHECK: exit: +; CHECK-NEXT: ret void +; CHECK: postloop: +; CHECK-NEXT: br label [[LOOP_POSTLOOP:%.*]] +; CHECK: loop.postloop: +; CHECK-NEXT: [[IDX_POSTLOOP:%.*]] = phi i32 [ [[IDX_COPY]], [[POSTLOOP]] ], [ [[IDX_NEXT_POSTLOOP:%.*]], [[IN_BOUNDS_POSTLOOP:%.*]] ] +; CHECK-NEXT: [[IDX_NEXT_POSTLOOP]] = add i32 [[IDX_POSTLOOP]], 1 +; CHECK-NEXT: [[IDX_OFFSET_POSTLOOP:%.*]] = add i32 [[IDX_POSTLOOP]], 13 +; CHECK-NEXT: [[ABC_POSTLOOP:%.*]] = icmp ult i32 [[IDX_OFFSET_POSTLOOP]], [[LEN]] +; CHECK-NEXT: br i1 [[ABC_POSTLOOP]], label [[IN_BOUNDS_POSTLOOP]], label [[OUT_OF_BOUNDS_LOOPEXIT:%.*]] +; CHECK: in.bounds.postloop: +; CHECK-NEXT: [[ADDR_POSTLOOP:%.*]] = getelementptr i32, i32* [[ARR]], i32 [[IDX_POSTLOOP]] +; CHECK-NEXT: store i32 0, i32* [[ADDR_POSTLOOP]], align 4 +; CHECK-NEXT: [[NEXT_POSTLOOP:%.*]] = icmp ult i32 [[IDX_NEXT_POSTLOOP]], 101 +; CHECK-NEXT: br i1 [[NEXT_POSTLOOP]], label [[LOOP_POSTLOOP]], label [[EXIT_LOOPEXIT:%.*]], !llvm.loop [[LOOP15:![0-9]+]], !irce.loop.clone !6 +; entry: %len = load i32, i32* %a_len_ptr, !range !0 @@ -354,20 +710,85 @@ exit: ; Unsigned latch, Unsigned RC, negative offset. Same as test_04. define void @test_08(i32* %arr, i32* %a_len_ptr) #0 { - -; CHECK-LABEL: test_08( -; CHECK: entry: -; CHECK-NEXT: %len = load i32, i32* %a_len_ptr, align 4, !range !0 -; CHECK-NEXT: [[SUB1:%[^ ]+]] = add nuw i32 %len, 13 -; CHECK-NEXT: %exit.mainloop.at = call i32 @llvm.umin.i32(i32 [[SUB1]], i32 101) -; CHECK-NEXT: br i1 true, label %loop.preloop.preheader -; CHECK: in.bounds.preloop: -; CHECK-NEXT: %addr.preloop = getelementptr i32, i32* %arr, i32 %idx.preloop -; CHECK-NEXT: store i32 0, i32* %addr.preloop -; CHECK-NEXT: %next.preloop = icmp ult i32 %idx.next.preloop, 101 -; CHECK-NEXT: [[PRELOOP_COND:%[^ ]+]] = icmp ult i32 %idx.next.preloop, 13 -; CHECK-NEXT: br i1 [[PRELOOP_COND]], label %loop.preloop, label %preloop.exit.selector -; CHECK: postloop: +; CHECK-LABEL: @test_08( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[LEN:%.*]] = load i32, i32* [[A_LEN_PTR:%.*]], align 4, !range [[RNG0]] +; CHECK-NEXT: [[TMP0:%.*]] = add nuw i32 [[LEN]], 13 +; CHECK-NEXT: [[EXIT_MAINLOOP_AT:%.*]] = call i32 @llvm.umin.i32(i32 [[TMP0]], i32 101) +; CHECK-NEXT: br i1 true, label [[LOOP_PRELOOP_PREHEADER:%.*]], label [[PRELOOP_PSEUDO_EXIT:%.*]] +; CHECK: loop.preloop.preheader: +; CHECK-NEXT: br label [[LOOP_PRELOOP:%.*]] +; CHECK: mainloop: +; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 [[INDVAR_END:%.*]], [[EXIT_MAINLOOP_AT]] +; CHECK-NEXT: br i1 [[TMP1]], label [[LOOP_PREHEADER:%.*]], label [[MAIN_PSEUDO_EXIT:%.*]] +; CHECK: loop.preheader: +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[IDX:%.*]] = phi i32 [ [[IDX_NEXT:%.*]], [[IN_BOUNDS:%.*]] ], [ [[IDX_PRELOOP_COPY:%.*]], [[LOOP_PREHEADER]] ] +; CHECK-NEXT: [[IDX_NEXT]] = add i32 [[IDX]], 1 +; CHECK-NEXT: [[IDX_OFFSET:%.*]] = sub i32 [[IDX]], 13 +; CHECK-NEXT: [[ABC:%.*]] = icmp ult i32 [[IDX_OFFSET]], [[LEN]] +; CHECK-NEXT: br i1 true, label [[IN_BOUNDS]], label [[OUT_OF_BOUNDS_LOOPEXIT3:%.*]] +; CHECK: in.bounds: +; CHECK-NEXT: [[ADDR:%.*]] = getelementptr i32, i32* [[ARR:%.*]], i32 [[IDX]] +; CHECK-NEXT: store i32 0, i32* [[ADDR]], align 4 +; CHECK-NEXT: [[NEXT:%.*]] = icmp ult i32 [[IDX_NEXT]], 101 +; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i32 [[IDX_NEXT]], [[EXIT_MAINLOOP_AT]] +; CHECK-NEXT: br i1 [[TMP2]], label [[LOOP]], label [[MAIN_EXIT_SELECTOR:%.*]] +; CHECK: main.exit.selector: +; CHECK-NEXT: [[IDX_NEXT_LCSSA:%.*]] = phi i32 [ [[IDX_NEXT]], [[IN_BOUNDS]] ] +; CHECK-NEXT: [[TMP3:%.*]] = icmp ult i32 [[IDX_NEXT_LCSSA]], 101 +; CHECK-NEXT: br i1 [[TMP3]], label [[MAIN_PSEUDO_EXIT]], label [[EXIT:%.*]] +; CHECK: main.pseudo.exit: +; CHECK-NEXT: [[IDX_COPY:%.*]] = phi i32 [ [[IDX_PRELOOP_COPY]], [[MAINLOOP:%.*]] ], [ [[IDX_NEXT_LCSSA]], [[MAIN_EXIT_SELECTOR]] ] +; CHECK-NEXT: [[INDVAR_END1:%.*]] = phi i32 [ [[INDVAR_END]], [[MAINLOOP]] ], [ [[IDX_NEXT_LCSSA]], [[MAIN_EXIT_SELECTOR]] ] +; CHECK-NEXT: br label [[POSTLOOP:%.*]] +; CHECK: out.of.bounds.loopexit: +; CHECK-NEXT: br label [[OUT_OF_BOUNDS:%.*]] +; CHECK: out.of.bounds.loopexit2: +; CHECK-NEXT: br label [[OUT_OF_BOUNDS]] +; CHECK: out.of.bounds.loopexit3: +; CHECK-NEXT: br label [[OUT_OF_BOUNDS]] +; CHECK: out.of.bounds: +; CHECK-NEXT: ret void +; CHECK: exit.loopexit: +; CHECK-NEXT: br label [[EXIT]] +; CHECK: exit: +; CHECK-NEXT: ret void +; CHECK: loop.preloop: +; CHECK-NEXT: [[IDX_PRELOOP:%.*]] = phi i32 [ [[IDX_NEXT_PRELOOP:%.*]], [[IN_BOUNDS_PRELOOP:%.*]] ], [ 0, [[LOOP_PRELOOP_PREHEADER]] ] +; CHECK-NEXT: [[IDX_NEXT_PRELOOP]] = add i32 [[IDX_PRELOOP]], 1 +; CHECK-NEXT: [[IDX_OFFSET_PRELOOP:%.*]] = sub i32 [[IDX_PRELOOP]], 13 +; CHECK-NEXT: [[ABC_PRELOOP:%.*]] = icmp ult i32 [[IDX_OFFSET_PRELOOP]], [[LEN]] +; CHECK-NEXT: br i1 [[ABC_PRELOOP]], label [[IN_BOUNDS_PRELOOP]], label [[OUT_OF_BOUNDS_LOOPEXIT:%.*]] +; CHECK: in.bounds.preloop: +; CHECK-NEXT: [[ADDR_PRELOOP:%.*]] = getelementptr i32, i32* [[ARR]], i32 [[IDX_PRELOOP]] +; CHECK-NEXT: store i32 0, i32* [[ADDR_PRELOOP]], align 4 +; CHECK-NEXT: [[NEXT_PRELOOP:%.*]] = icmp ult i32 [[IDX_NEXT_PRELOOP]], 101 +; CHECK-NEXT: [[TMP4:%.*]] = icmp ult i32 [[IDX_NEXT_PRELOOP]], 13 +; CHECK-NEXT: br i1 [[TMP4]], label [[LOOP_PRELOOP]], label [[PRELOOP_EXIT_SELECTOR:%.*]], !llvm.loop [[LOOP16:![0-9]+]], !irce.loop.clone !6 +; CHECK: preloop.exit.selector: +; CHECK-NEXT: [[IDX_NEXT_PRELOOP_LCSSA:%.*]] = phi i32 [ [[IDX_NEXT_PRELOOP]], [[IN_BOUNDS_PRELOOP]] ] +; CHECK-NEXT: [[TMP5:%.*]] = icmp ult i32 [[IDX_NEXT_PRELOOP_LCSSA]], 101 +; CHECK-NEXT: br i1 [[TMP5]], label [[PRELOOP_PSEUDO_EXIT]], label [[EXIT]] +; CHECK: preloop.pseudo.exit: +; CHECK-NEXT: [[IDX_PRELOOP_COPY]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IDX_NEXT_PRELOOP_LCSSA]], [[PRELOOP_EXIT_SELECTOR]] ] +; CHECK-NEXT: [[INDVAR_END]] = phi i32 [ 0, [[ENTRY]] ], [ [[IDX_NEXT_PRELOOP_LCSSA]], [[PRELOOP_EXIT_SELECTOR]] ] +; CHECK-NEXT: br label [[MAINLOOP]] +; CHECK: postloop: +; CHECK-NEXT: br label [[LOOP_POSTLOOP:%.*]] +; CHECK: loop.postloop: +; CHECK-NEXT: [[IDX_POSTLOOP:%.*]] = phi i32 [ [[IDX_COPY]], [[POSTLOOP]] ], [ [[IDX_NEXT_POSTLOOP:%.*]], [[IN_BOUNDS_POSTLOOP:%.*]] ] +; CHECK-NEXT: [[IDX_NEXT_POSTLOOP]] = add i32 [[IDX_POSTLOOP]], 1 +; CHECK-NEXT: [[IDX_OFFSET_POSTLOOP:%.*]] = sub i32 [[IDX_POSTLOOP]], 13 +; CHECK-NEXT: [[ABC_POSTLOOP:%.*]] = icmp ult i32 [[IDX_OFFSET_POSTLOOP]], [[LEN]] +; CHECK-NEXT: br i1 [[ABC_POSTLOOP]], label [[IN_BOUNDS_POSTLOOP]], label [[OUT_OF_BOUNDS_LOOPEXIT2:%.*]] +; CHECK: in.bounds.postloop: +; CHECK-NEXT: [[ADDR_POSTLOOP:%.*]] = getelementptr i32, i32* [[ARR]], i32 [[IDX_POSTLOOP]] +; CHECK-NEXT: store i32 0, i32* [[ADDR_POSTLOOP]], align 4 +; CHECK-NEXT: [[NEXT_POSTLOOP:%.*]] = icmp ult i32 [[IDX_NEXT_POSTLOOP]], 101 +; CHECK-NEXT: br i1 [[NEXT_POSTLOOP]], label [[LOOP_POSTLOOP]], label [[EXIT_LOOPEXIT:%.*]], !llvm.loop [[LOOP17:![0-9]+]], !irce.loop.clone !6 +; entry: %len = load i32, i32* %a_len_ptr, !range !0 diff --git a/llvm/test/Transforms/IndVarSimplify/X86/eliminate-trunc.ll b/llvm/test/Transforms/IndVarSimplify/X86/eliminate-trunc.ll index 625103f6d4cb2..fefa170e81314 100644 --- a/llvm/test/Transforms/IndVarSimplify/X86/eliminate-trunc.ll +++ b/llvm/test/Transforms/IndVarSimplify/X86/eliminate-trunc.ll @@ -543,7 +543,7 @@ bb7: ; preds = %bb6 define void @test_12(i32* %p) { ; CHECK-LABEL: @test_12( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[N:%.*]] = load i32, i32* [[P:%.*]], align 4, [[RNG0:!range !.*]] +; CHECK-NEXT: [[N:%.*]] = load i32, i32* [[P:%.*]], align 4, !range [[RNG0:![0-9]+]] ; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[N]], i32 1) ; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[SMAX]] to i64 ; CHECK-NEXT: br label [[LOOP:%.*]] diff --git a/llvm/test/Transforms/IndVarSimplify/X86/pr27133.ll b/llvm/test/Transforms/IndVarSimplify/X86/pr27133.ll index 1262407ea826b..1a3f6cd6ab9de 100644 --- a/llvm/test/Transforms/IndVarSimplify/X86/pr27133.ll +++ b/llvm/test/Transforms/IndVarSimplify/X86/pr27133.ll @@ -1,23 +1,41 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -indvars -S < %s | FileCheck %s target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-pc-windows-msvc18.0.0" define i32 @fn2() personality i32 (...)* @__CxxFrameHandler3 { +; CHECK-LABEL: @fn2( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[FOR_COND:%.*]] +; CHECK: for.cond: +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[C_0:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_INC]] ], [ 0, [[ENTRY]] ] +; CHECK-NEXT: invoke void @fn1(i64 [[INDVARS_IV]]) +; CHECK-NEXT: to label [[FOR_INC]] unwind label [[CATCH_DISPATCH:%.*]] +; CHECK: catch.dispatch: +; CHECK-NEXT: [[C_0_LCSSA:%.*]] = phi i32 [ [[C_0]], [[FOR_COND]] ] +; CHECK-NEXT: [[TMP0:%.*]] = catchswitch within none [label %catch] unwind to caller +; CHECK: catch: +; CHECK-NEXT: [[TMP1:%.*]] = catchpad within [[TMP0]] [i8* null, i32 64, i8* null] +; CHECK-NEXT: catchret from [[TMP1]] to label [[EXIT:%.*]] +; CHECK: exit: +; CHECK-NEXT: ret i32 [[C_0_LCSSA]] +; CHECK: for.inc: +; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw i64 [[INDVARS_IV]], 1 +; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[C_0]], 1 +; CHECK-NEXT: br label [[FOR_COND]] +; entry: br label %for.cond for.cond: ; preds = %for.inc, %entry %c.0 = phi i32 [ %inc, %for.inc ], [ 0, %entry ] -; CHECK: %[[WIDE:.*]] = phi i64 -; CHECK: %[[NORM:.*]] = phi i32 -; CHECK: invoke void @fn1(i64 %[[WIDE]]) %idxprom = sext i32 %c.0 to i64 invoke void @fn1(i64 %idxprom) - to label %for.inc unwind label %catch.dispatch + to label %for.inc unwind label %catch.dispatch catch.dispatch: ; preds = %for.cond %c.0.lcssa = phi i32 [ %c.0, %for.cond ] -; CHECK: %[[LCSSA:.*]] = phi i32 [ %[[NORM]], %0 = catchswitch within none [label %catch] unwind to caller catch: ; preds = %catch.dispatch @@ -25,7 +43,6 @@ catch: ; preds = %catch.dispatch catchret from %1 to label %exit exit: -; CHECK: ret i32 %[[LCSSA]] ret i32 %c.0.lcssa for.inc: ; preds = %for.cond diff --git a/llvm/test/Transforms/IndVarSimplify/finite-exit-comparisons.ll b/llvm/test/Transforms/IndVarSimplify/finite-exit-comparisons.ll index 0859c057ed2ae..c5e7a76c64387 100644 --- a/llvm/test/Transforms/IndVarSimplify/finite-exit-comparisons.ll +++ b/llvm/test/Transforms/IndVarSimplify/finite-exit-comparisons.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -indvars -S < %s | FileCheck %s +; RUN: opt -indvars -S < %s -indvars-predicate-loops=0 | FileCheck %s ; A collection of tests which domonstrate cases where we can use properties ; of the loop (i.e. single exit, finite, mustprogress) to optimize conditions @@ -10,12 +10,12 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3 define void @slt_constant_rhs(i16 %n.raw, i8 %start) mustprogress { ; CHECK-LABEL: @slt_constant_rhs( ; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = trunc i16 254 to i8 ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[IV:%.*]] = phi i8 [ [[IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[START:%.*]], [[ENTRY:%.*]] ] ; CHECK-NEXT: [[IV_NEXT]] = add i8 [[IV]], 1 -; CHECK-NEXT: [[ZEXT:%.*]] = zext i8 [[IV_NEXT]] to i16 -; CHECK-NEXT: [[CMP:%.*]] = icmp ult i16 [[ZEXT]], 254 +; CHECK-NEXT: [[CMP:%.*]] = icmp ult i8 [[IV_NEXT]], [[TMP0]] ; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END:%.*]] ; CHECK: for.end: ; CHECK-NEXT: ret void @@ -38,13 +38,13 @@ for.end: ; preds = %for.body, %entry define void @slt_constant_rhs_maythrow(i16 %n.raw, i8 %start) mustprogress { ; CHECK-LABEL: @slt_constant_rhs_maythrow( ; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = trunc i16 254 to i8 ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[IV:%.*]] = phi i8 [ [[IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[START:%.*]], [[ENTRY:%.*]] ] ; CHECK-NEXT: [[IV_NEXT]] = add i8 [[IV]], 1 ; CHECK-NEXT: call void @unknown() -; CHECK-NEXT: [[ZEXT:%.*]] = zext i8 [[IV_NEXT]] to i16 -; CHECK-NEXT: [[CMP:%.*]] = icmp ult i16 [[ZEXT]], 254 +; CHECK-NEXT: [[CMP:%.*]] = icmp ult i8 [[IV_NEXT]], [[TMP0]] ; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END:%.*]] ; CHECK: for.end: ; CHECK-NEXT: ret void @@ -68,14 +68,14 @@ for.end: ; preds = %for.body, %entry define void @slt_constant_rhs_multiexit(i16 %n.raw, i8 %start, i1 %c) mustprogress { ; CHECK-LABEL: @slt_constant_rhs_multiexit( ; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = trunc i16 254 to i8 ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[IV:%.*]] = phi i8 [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ], [ [[START:%.*]], [[ENTRY:%.*]] ] ; CHECK-NEXT: [[IV_NEXT]] = add i8 [[IV]], 1 ; CHECK-NEXT: br i1 [[C:%.*]], label [[LATCH]], label [[FOR_END:%.*]] ; CHECK: latch: -; CHECK-NEXT: [[ZEXT:%.*]] = zext i8 [[IV_NEXT]] to i16 -; CHECK-NEXT: [[CMP:%.*]] = icmp ult i16 [[ZEXT]], 254 +; CHECK-NEXT: [[CMP:%.*]] = icmp ult i8 [[IV_NEXT]], [[TMP0]] ; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END]] ; CHECK: for.end: ; CHECK-NEXT: ret void @@ -105,7 +105,7 @@ define void @slt_non_constant_rhs(i16 %n) mustprogress { ; CHECK-NEXT: [[IV:%.*]] = phi i8 [ [[IV_NEXT:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: [[IV_NEXT]] = add i8 [[IV]], 1 ; CHECK-NEXT: [[ZEXT:%.*]] = zext i8 [[IV_NEXT]] to i16 -; CHECK-NEXT: [[CMP:%.*]] = icmp ult i16 [[ZEXT]], [[N:%.*]] +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i16 [[ZEXT]], [[N:%.*]] ; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END:%.*]] ; CHECK: for.end: ; CHECK-NEXT: ret void @@ -129,13 +129,13 @@ define void @slt_non_constant_rhs_no_mustprogress(i16 %n.raw) { ; CHECK-LABEL: @slt_non_constant_rhs_no_mustprogress( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[N:%.*]] = and i16 [[N_RAW:%.*]], 255 +; CHECK-NEXT: [[SMAX:%.*]] = call i16 @llvm.smax.i16(i16 [[N]], i16 1) ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: -; CHECK-NEXT: [[IV:%.*]] = phi i8 [ [[IV_NEXT:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[IV_NEXT]] = add i8 [[IV]], 1 -; CHECK-NEXT: [[ZEXT:%.*]] = zext i8 [[IV_NEXT]] to i16 -; CHECK-NEXT: [[CMP:%.*]] = icmp ult i16 [[ZEXT]], [[N]] -; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END:%.*]] +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i16 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i16 [[INDVARS_IV]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i16 [[INDVARS_IV_NEXT]], [[SMAX]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END:%.*]] ; CHECK: for.end: ; CHECK-NEXT: ret void ; @@ -416,12 +416,12 @@ for.end: ; preds = %for.body, %entry define void @sgt_constant_rhs(i16 %n.raw, i8 %start) mustprogress { ; CHECK-LABEL: @sgt_constant_rhs( ; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = trunc i16 254 to i8 ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[IV:%.*]] = phi i8 [ [[IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[START:%.*]], [[ENTRY:%.*]] ] ; CHECK-NEXT: [[IV_NEXT]] = add i8 [[IV]], 1 -; CHECK-NEXT: [[ZEXT:%.*]] = zext i8 [[IV_NEXT]] to i16 -; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i16 [[ZEXT]], 254 +; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i8 [[IV_NEXT]], [[TMP0]] ; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END:%.*]] ; CHECK: for.end: ; CHECK-NEXT: ret void @@ -448,7 +448,7 @@ define void @sgt_non_constant_rhs(i16 %n) mustprogress { ; CHECK-NEXT: [[IV:%.*]] = phi i8 [ [[IV_NEXT:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: [[IV_NEXT]] = add i8 [[IV]], 1 ; CHECK-NEXT: [[ZEXT:%.*]] = zext i8 [[IV_NEXT]] to i16 -; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i16 [[ZEXT]], [[N:%.*]] +; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i16 [[ZEXT]], [[N:%.*]] ; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END:%.*]] ; CHECK: for.end: ; CHECK-NEXT: ret void @@ -470,12 +470,12 @@ for.end: ; preds = %for.body, %entry define void @sle_constant_rhs(i16 %n.raw, i8 %start) mustprogress { ; CHECK-LABEL: @sle_constant_rhs( ; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = trunc i16 254 to i8 ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[IV:%.*]] = phi i8 [ [[IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[START:%.*]], [[ENTRY:%.*]] ] ; CHECK-NEXT: [[IV_NEXT]] = add i8 [[IV]], 1 -; CHECK-NEXT: [[ZEXT:%.*]] = zext i8 [[IV_NEXT]] to i16 -; CHECK-NEXT: [[CMP:%.*]] = icmp ule i16 [[ZEXT]], 254 +; CHECK-NEXT: [[CMP:%.*]] = icmp ule i8 [[IV_NEXT]], [[TMP0]] ; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END:%.*]] ; CHECK: for.end: ; CHECK-NEXT: ret void @@ -502,7 +502,7 @@ define void @sle_non_constant_rhs(i16 %n) mustprogress { ; CHECK-NEXT: [[IV:%.*]] = phi i8 [ [[IV_NEXT:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: [[IV_NEXT]] = add i8 [[IV]], 1 ; CHECK-NEXT: [[ZEXT:%.*]] = zext i8 [[IV_NEXT]] to i16 -; CHECK-NEXT: [[CMP:%.*]] = icmp ule i16 [[ZEXT]], [[N:%.*]] +; CHECK-NEXT: [[CMP:%.*]] = icmp sle i16 [[ZEXT]], [[N:%.*]] ; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END:%.*]] ; CHECK: for.end: ; CHECK-NEXT: ret void @@ -524,12 +524,12 @@ for.end: ; preds = %for.body, %entry define void @sge_constant_rhs(i16 %n.raw, i8 %start) mustprogress { ; CHECK-LABEL: @sge_constant_rhs( ; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = trunc i16 254 to i8 ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[IV:%.*]] = phi i8 [ [[IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[START:%.*]], [[ENTRY:%.*]] ] ; CHECK-NEXT: [[IV_NEXT]] = add i8 [[IV]], 1 -; CHECK-NEXT: [[ZEXT:%.*]] = zext i8 [[IV_NEXT]] to i16 -; CHECK-NEXT: [[CMP:%.*]] = icmp uge i16 [[ZEXT]], 254 +; CHECK-NEXT: [[CMP:%.*]] = icmp uge i8 [[IV_NEXT]], [[TMP0]] ; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END:%.*]] ; CHECK: for.end: ; CHECK-NEXT: ret void @@ -556,7 +556,7 @@ define void @sge_non_constant_rhs(i16 %n) mustprogress { ; CHECK-NEXT: [[IV:%.*]] = phi i8 [ [[IV_NEXT:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: [[IV_NEXT]] = add i8 [[IV]], 1 ; CHECK-NEXT: [[ZEXT:%.*]] = zext i8 [[IV_NEXT]] to i16 -; CHECK-NEXT: [[CMP:%.*]] = icmp uge i16 [[ZEXT]], [[N:%.*]] +; CHECK-NEXT: [[CMP:%.*]] = icmp sge i16 [[ZEXT]], [[N:%.*]] ; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END:%.*]] ; CHECK: for.end: ; CHECK-NEXT: ret void @@ -578,12 +578,12 @@ for.end: ; preds = %for.body, %entry define void @ult_constant_rhs(i16 %n.raw, i8 %start) mustprogress { ; CHECK-LABEL: @ult_constant_rhs( ; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = trunc i16 254 to i8 ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[IV:%.*]] = phi i8 [ [[IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[START:%.*]], [[ENTRY:%.*]] ] ; CHECK-NEXT: [[IV_NEXT]] = add i8 [[IV]], 1 -; CHECK-NEXT: [[ZEXT:%.*]] = zext i8 [[IV_NEXT]] to i16 -; CHECK-NEXT: [[CMP:%.*]] = icmp ult i16 [[ZEXT]], 254 +; CHECK-NEXT: [[CMP:%.*]] = icmp ult i8 [[IV_NEXT]], [[TMP0]] ; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END:%.*]] ; CHECK: for.end: ; CHECK-NEXT: ret void @@ -632,12 +632,12 @@ for.end: ; preds = %for.body, %entry define void @ugt_constant_rhs(i16 %n.raw, i8 %start) mustprogress { ; CHECK-LABEL: @ugt_constant_rhs( ; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = trunc i16 254 to i8 ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[IV:%.*]] = phi i8 [ [[IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[START:%.*]], [[ENTRY:%.*]] ] ; CHECK-NEXT: [[IV_NEXT]] = add i8 [[IV]], 1 -; CHECK-NEXT: [[ZEXT:%.*]] = zext i8 [[IV_NEXT]] to i16 -; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i16 [[ZEXT]], 254 +; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i8 [[IV_NEXT]], [[TMP0]] ; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END:%.*]] ; CHECK: for.end: ; CHECK-NEXT: ret void @@ -656,6 +656,33 @@ for.end: ; preds = %for.body, %entry ret void } +define void @ugt_neg_non_loop(i16 %n.raw, i8 %start) mustprogress { +; CHECK-LABEL: @ugt_neg_non_loop( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[IV:%.*]] = phi i8 [ [[IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[START:%.*]], [[ENTRY:%.*]] ] +; CHECK-NEXT: [[IV_NEXT]] = add i8 [[IV]], 1 +; CHECK-NEXT: [[ZEXT:%.*]] = zext i8 [[IV_NEXT]] to i16 +; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i16 [[ZEXT]], -2 +; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END:%.*]] +; CHECK: for.end: +; CHECK-NEXT: ret void +; +entry: + br label %for.body + +for.body: ; preds = %entry, %for.body + %iv = phi i8 [ %iv.next, %for.body ], [ %start, %entry ] + %iv.next = add i8 %iv, 1 + %zext = zext i8 %iv.next to i16 + %cmp = icmp ugt i16 %zext, -2 + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %for.body, %entry + ret void +} + define void @ugt_non_constant_rhs(i16 %n) mustprogress { ; CHECK-LABEL: @ugt_non_constant_rhs( ; CHECK-NEXT: entry: @@ -686,12 +713,12 @@ for.end: ; preds = %for.body, %entry define void @ule_constant_rhs(i16 %n.raw, i8 %start) mustprogress { ; CHECK-LABEL: @ule_constant_rhs( ; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = trunc i16 254 to i8 ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[IV:%.*]] = phi i8 [ [[IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[START:%.*]], [[ENTRY:%.*]] ] ; CHECK-NEXT: [[IV_NEXT]] = add i8 [[IV]], 1 -; CHECK-NEXT: [[ZEXT:%.*]] = zext i8 [[IV_NEXT]] to i16 -; CHECK-NEXT: [[CMP:%.*]] = icmp ule i16 [[ZEXT]], 254 +; CHECK-NEXT: [[CMP:%.*]] = icmp ule i8 [[IV_NEXT]], [[TMP0]] ; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END:%.*]] ; CHECK: for.end: ; CHECK-NEXT: ret void @@ -740,12 +767,12 @@ for.end: ; preds = %for.body, %entry define void @uge_constant_rhs(i16 %n.raw, i8 %start) mustprogress { ; CHECK-LABEL: @uge_constant_rhs( ; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = trunc i16 254 to i8 ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[IV:%.*]] = phi i8 [ [[IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[START:%.*]], [[ENTRY:%.*]] ] ; CHECK-NEXT: [[IV_NEXT]] = add i8 [[IV]], 1 -; CHECK-NEXT: [[ZEXT:%.*]] = zext i8 [[IV_NEXT]] to i16 -; CHECK-NEXT: [[CMP:%.*]] = icmp uge i16 [[ZEXT]], 254 +; CHECK-NEXT: [[CMP:%.*]] = icmp uge i8 [[IV_NEXT]], [[TMP0]] ; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END:%.*]] ; CHECK: for.end: ; CHECK-NEXT: ret void @@ -790,3 +817,238 @@ for.body: ; preds = %entry, %for.body for.end: ; preds = %for.body, %entry ret void } + +; Show that these transformatios also work with inverted operands +; We only both to do this with slt/ult, but it applies to all predicates. + +define void @slt_constant_lhs(i16 %n.raw, i8 %start) mustprogress { +; CHECK-LABEL: @slt_constant_lhs( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = trunc i16 254 to i8 +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[IV:%.*]] = phi i8 [ [[IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[START:%.*]], [[ENTRY:%.*]] ] +; CHECK-NEXT: [[IV_NEXT]] = add i8 [[IV]], 1 +; CHECK-NEXT: [[CMP:%.*]] = icmp ult i8 [[TMP0]], [[IV_NEXT]] +; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END:%.*]] +; CHECK: for.end: +; CHECK-NEXT: ret void +; +entry: + br label %for.body + +for.body: ; preds = %entry, %for.body + %iv = phi i8 [ %iv.next, %for.body ], [ %start, %entry ] + %iv.next = add i8 %iv, 1 + %zext = zext i8 %iv.next to i16 + %cmp = icmp slt i16 254, %zext + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %for.body, %entry + ret void +} + +define void @slt_non_constant_lhs(i16 %n) mustprogress { +; CHECK-LABEL: @slt_non_constant_lhs( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[IV:%.*]] = phi i8 [ [[IV_NEXT:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[IV_NEXT]] = add i8 [[IV]], 1 +; CHECK-NEXT: [[ZEXT:%.*]] = zext i8 [[IV_NEXT]] to i16 +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i16 [[N:%.*]], [[ZEXT]] +; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END:%.*]] +; CHECK: for.end: +; CHECK-NEXT: ret void +; +entry: + br label %for.body + +for.body: ; preds = %entry, %for.body + %iv = phi i8 [ %iv.next, %for.body ], [ 0, %entry ] + %iv.next = add i8 %iv, 1 + %zext = zext i8 %iv.next to i16 + %cmp = icmp slt i16 %n, %zext + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %for.body, %entry + ret void +} + +define void @ult_constant_lhs(i16 %n.raw, i8 %start) mustprogress { +; CHECK-LABEL: @ult_constant_lhs( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = trunc i16 254 to i8 +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[IV:%.*]] = phi i8 [ [[IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[START:%.*]], [[ENTRY:%.*]] ] +; CHECK-NEXT: [[IV_NEXT]] = add i8 [[IV]], 1 +; CHECK-NEXT: [[CMP:%.*]] = icmp ult i8 [[TMP0]], [[IV_NEXT]] +; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END:%.*]] +; CHECK: for.end: +; CHECK-NEXT: ret void +; +entry: + br label %for.body + +for.body: ; preds = %entry, %for.body + %iv = phi i8 [ %iv.next, %for.body ], [ %start, %entry ] + %iv.next = add i8 %iv, 1 + %zext = zext i8 %iv.next to i16 + %cmp = icmp ult i16 254, %zext + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %for.body, %entry + ret void +} + +define void @ult_non_constant_lhs(i16 %n) mustprogress { +; CHECK-LABEL: @ult_non_constant_lhs( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[IV:%.*]] = phi i8 [ [[IV_NEXT:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[IV_NEXT]] = add i8 [[IV]], 1 +; CHECK-NEXT: [[ZEXT:%.*]] = zext i8 [[IV_NEXT]] to i16 +; CHECK-NEXT: [[CMP:%.*]] = icmp ult i16 [[N:%.*]], [[ZEXT]] +; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END:%.*]] +; CHECK: for.end: +; CHECK-NEXT: ret void +; +entry: + br label %for.body + +for.body: ; preds = %entry, %for.body + %iv = phi i8 [ %iv.next, %for.body ], [ 0, %entry ] + %iv.next = add i8 %iv, 1 + %zext = zext i8 %iv.next to i16 + %cmp = icmp ult i16 %n, %zext + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %for.body, %entry + ret void +} + +define i16 @ult_multiuse_profit(i16 %n.raw, i8 %start) mustprogress { +; CHECK-LABEL: @ult_multiuse_profit( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = add i8 [[START:%.*]], 1 +; CHECK-NEXT: [[TMP1:%.*]] = zext i8 [[TMP0]] to i16 +; CHECK-NEXT: [[TMP2:%.*]] = trunc i16 254 to i8 +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[IV:%.*]] = phi i8 [ [[IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[START]], [[ENTRY:%.*]] ] +; CHECK-NEXT: [[IV_NEXT]] = add i8 [[IV]], 1 +; CHECK-NEXT: [[CMP:%.*]] = icmp ult i8 [[IV_NEXT]], [[TMP2]] +; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END:%.*]] +; CHECK: for.end: +; CHECK-NEXT: [[UMAX:%.*]] = call i16 @llvm.umax.i16(i16 [[TMP1]], i16 254) +; CHECK-NEXT: ret i16 [[UMAX]] +; +entry: + br label %for.body + +for.body: ; preds = %entry, %for.body + %iv = phi i8 [ %iv.next, %for.body ], [ %start, %entry ] + %iv.next = add i8 %iv, 1 + %zext = zext i8 %iv.next to i16 + %cmp = icmp ult i16 %zext, 254 + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %for.body, %entry + ret i16 %zext +} + +define i16 @ult_multiuse_profit2(i16 %n.raw, i8 %start) mustprogress { +; CHECK-LABEL: @ult_multiuse_profit2( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = trunc i16 254 to i8 +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[IV:%.*]] = phi i8 [ [[IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[START:%.*]], [[ENTRY:%.*]] ] +; CHECK-NEXT: [[IV2:%.*]] = phi i16 [ [[ZEXT:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY]] ] +; CHECK-NEXT: [[IV_NEXT]] = add i8 [[IV]], 1 +; CHECK-NEXT: [[ZEXT]] = zext i8 [[IV_NEXT]] to i16 +; CHECK-NEXT: [[CMP:%.*]] = icmp ult i8 [[IV_NEXT]], [[TMP0]] +; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END:%.*]] +; CHECK: for.end: +; CHECK-NEXT: [[IV2_LCSSA:%.*]] = phi i16 [ [[IV2]], [[FOR_BODY]] ] +; CHECK-NEXT: ret i16 [[IV2_LCSSA]] +; +entry: + br label %for.body + +for.body: ; preds = %entry, %for.body + %iv = phi i8 [ %iv.next, %for.body ], [ %start, %entry ] + %iv2 = phi i16 [%zext, %for.body], [0, %entry] + %iv.next = add i8 %iv, 1 + %zext = zext i8 %iv.next to i16 + %cmp = icmp ult i16 %zext, 254 + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %for.body, %entry + ret i16 %iv2 +} + +define void @slt_restricted_rhs(i16 %n.raw) mustprogress { +; CHECK-LABEL: @slt_restricted_rhs( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[N:%.*]] = and i16 [[N_RAW:%.*]], 255 +; CHECK-NEXT: [[SMAX:%.*]] = call i16 @llvm.smax.i16(i16 [[N]], i16 1) +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i16 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i16 [[INDVARS_IV]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i16 [[INDVARS_IV_NEXT]], [[SMAX]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END:%.*]] +; CHECK: for.end: +; CHECK-NEXT: ret void +; +entry: + %n = and i16 %n.raw, 255 + br label %for.body + +for.body: ; preds = %entry, %for.body + %iv = phi i8 [ %iv.next, %for.body ], [ 0, %entry ] + %iv.next = add i8 %iv, 1 + %zext = zext i8 %iv.next to i16 + %cmp = icmp slt i16 %zext, %n + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %for.body, %entry + ret void +} + +define void @slt_guarded_rhs(i16 %n) mustprogress { +; CHECK-LABEL: @slt_guarded_rhs( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[IN_RANGE:%.*]] = icmp ult i16 [[N:%.*]], 256 +; CHECK-NEXT: br i1 [[IN_RANGE]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]] +; CHECK: for.body.preheader: +; CHECK-NEXT: [[TMP0:%.*]] = trunc i16 [[N]] to i8 +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[IV:%.*]] = phi i8 [ [[IV_NEXT:%.*]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER]] ] +; CHECK-NEXT: [[IV_NEXT]] = add i8 [[IV]], 1 +; CHECK-NEXT: [[CMP:%.*]] = icmp ult i8 [[IV_NEXT]], [[TMP0]] +; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END_LOOPEXIT:%.*]] +; CHECK: for.end.loopexit: +; CHECK-NEXT: br label [[FOR_END]] +; CHECK: for.end: +; CHECK-NEXT: ret void +; +entry: + %in_range = icmp ult i16 %n, 256 + br i1 %in_range, label %for.body, label %for.end + +for.body: ; preds = %entry, %for.body + %iv = phi i8 [ %iv.next, %for.body ], [ 0, %entry ] + %iv.next = add i8 %iv, 1 + %zext = zext i8 %iv.next to i16 + %cmp = icmp slt i16 %zext, %n + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %for.body, %entry + ret void +} + diff --git a/llvm/test/Transforms/IndVarSimplify/widen-loop-comp.ll b/llvm/test/Transforms/IndVarSimplify/widen-loop-comp.ll index 469fa7acea9b6..c474899ccba08 100644 --- a/llvm/test/Transforms/IndVarSimplify/widen-loop-comp.ll +++ b/llvm/test/Transforms/IndVarSimplify/widen-loop-comp.ll @@ -1302,3 +1302,141 @@ exit: failure: unreachable } + +declare void @foo(i64 %v) +declare void @bar(i32 %v) + +define void @test18() { +; CHECK-LABEL: @test18( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[LOOP]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], [[LOOP]] ], [ 0, [[ENTRY]] ] +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1 +; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 +; CHECK-NEXT: call void @bar(i32 [[IV_NEXT]]) +; CHECK-NEXT: call void @foo(i64 [[INDVARS_IV]]) +; CHECK-NEXT: [[LOOP_COND:%.*]] = icmp eq i64 [[INDVARS_IV]], 1000 +; CHECK-NEXT: br i1 [[LOOP_COND]], label [[EXIT:%.*]], label [[LOOP]] +; CHECK: exit: +; CHECK-NEXT: ret void +; +entry: + br label %loop + +loop: ; preds = %loop, %entry + %iv = phi i32 [ %iv.next, %loop ], [ 0, %entry ] + %val1 = phi i32 [ %val1.inc, %loop ], [ 0, %entry ] + %val1.inc = add i32 %val1, 1 + %iv.next = add i32 %iv, 1 + call void @bar(i32 %val1.inc) + %iv.wide = zext i32 %iv to i64 + call void @foo(i64 %iv.wide) + %loop.cond = icmp eq i32 %iv, 1000 + br i1 %loop.cond, label %exit, label %loop + +exit: ; preds = %loop + ret void +} + +define void @test19() { +; CHECK-LABEL: @test19( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[VAL1:%.*]] = phi i64 [ [[VAL1_INC:%.*]], [[LOOP]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[VAL1_INC]] = add nuw nsw i64 [[VAL1]], 1 +; CHECK-NEXT: call void @foo(i64 [[VAL1_INC]]) +; CHECK-NEXT: call void @foo(i64 [[VAL1]]) +; CHECK-NEXT: [[LOOP_COND:%.*]] = icmp eq i64 [[VAL1]], 1000 +; CHECK-NEXT: br i1 [[LOOP_COND]], label [[EXIT:%.*]], label [[LOOP]] +; CHECK: exit: +; CHECK-NEXT: ret void +; +entry: + br label %loop + +loop: ; preds = %loop, %entry + %iv = phi i32 [ %iv.next, %loop ], [ 0, %entry ] + %val1 = phi i64 [ %val1.inc, %loop ], [ 0, %entry ] + %val1.inc = add i64 %val1, 1 + %iv.next = add i32 %iv, 1 + call void @foo(i64 %val1.inc) + %iv.wide = zext i32 %iv to i64 + call void @foo(i64 %iv.wide) + %loop.cond = icmp eq i32 %iv, 1000 + br i1 %loop.cond, label %exit, label %loop + +exit: ; preds = %loop + ret void +} + +define void @test20() { +; CHECK-LABEL: @test20( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[LOOP]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 +; CHECK-NEXT: call void @foo(i64 [[INDVARS_IV]]) +; CHECK-NEXT: call void @foo(i64 [[INDVARS_IV]]) +; CHECK-NEXT: [[LOOP_COND:%.*]] = icmp eq i64 [[INDVARS_IV]], 1000 +; CHECK-NEXT: br i1 [[LOOP_COND]], label [[EXIT:%.*]], label [[LOOP]] +; CHECK: exit: +; CHECK-NEXT: ret void +; +entry: + br label %loop + +loop: ; preds = %loop, %entry + %iv = phi i32 [ %iv.next, %loop ], [ 0, %entry ] + %val1 = phi i32 [ %val1.inc, %loop ], [ 0, %entry ] + %val1.inc = add i32 %val1, 1 + %iv.next = add i32 %iv, 1 + %val1.wide = zext i32 %val1 to i64 + call void @foo(i64 %val1.wide) + %iv.wide = zext i32 %iv to i64 + call void @foo(i64 %iv.wide) + %loop.cond = icmp eq i32 %iv, 1000 + br i1 %loop.cond, label %exit, label %loop + +exit: ; preds = %loop + ret void +} + +define void @test21(i32* %ptr) { +; CHECK-LABEL: @test21( +; CHECK-NEXT: entry: +; CHECK-NEXT: store i32 0, i32* [[PTR:%.*]], align 4 +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[LOOP]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[VAL:%.*]] = phi i32 [ [[VAL_INC:%.*]], [[LOOP]] ], [ 0, [[ENTRY]] ] +; CHECK-NEXT: [[VAL_INC]] = add nuw nsw i32 [[VAL]], 1 +; CHECK-NEXT: store i32 [[VAL_INC]], i32* [[PTR]], align 4 +; CHECK-NEXT: call void @foo(i64 [[INDVARS_IV]]) +; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 +; CHECK-NEXT: [[LOOP_COND:%.*]] = icmp eq i64 [[INDVARS_IV]], 1000 +; CHECK-NEXT: br i1 [[LOOP_COND]], label [[EXIT:%.*]], label [[LOOP]] +; CHECK: exit: +; CHECK-NEXT: ret void +; +entry: + store i32 0, i32* %ptr, align 4 + br label %loop + +loop: ; preds = %loop, %entry + %val = phi i32 [ %val.inc, %loop ], [ 0, %entry ] + %iv = phi i32 [ %iv.next, %loop ], [ 0, %entry ] + %val.inc = add i32 %val, 1 + store i32 %val.inc, i32* %ptr, align 4 + %iv.wide = zext i32 %iv to i64 + call void @foo(i64 %iv.wide) + %iv.next = add i32 %iv, 1 + %loop.cond = icmp eq i32 %iv, 1000 + br i1 %loop.cond, label %exit, label %loop + +exit: ; preds = %loop + ret void +} diff --git a/llvm/test/Transforms/InstCombine/2006-12-15-Range-Test.ll b/llvm/test/Transforms/InstCombine/2006-12-15-Range-Test.ll index 3d08ae5e4012a..6b83408ed7e09 100644 --- a/llvm/test/Transforms/InstCombine/2006-12-15-Range-Test.ll +++ b/llvm/test/Transforms/InstCombine/2006-12-15-Range-Test.ll @@ -18,9 +18,9 @@ define i1 @print_pgm_cond_true(i32 %tmp12.reload, i32* %tmp16.out) { ; CHECK: cond_true: ; CHECK-NEXT: [[TMP15:%.*]] = getelementptr [17 x i32], [17 x i32]* @r, i32 0, i32 [[TMP12_RELOAD:%.*]] ; CHECK-NEXT: [[TMP16]] = load i32, i32* [[TMP15]], align 4 -; CHECK-NEXT: [[TMP16_OFF:%.*]] = add i32 [[TMP16]], 31 -; CHECK-NEXT: [[TMP0:%.*]] = icmp ugt i32 [[TMP16_OFF]], 62 -; CHECK-NEXT: br i1 [[TMP0]], label [[BB27_EXITSTUB:%.*]], label [[COND_NEXT23_EXITSTUB:%.*]] +; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[TMP16]], -32 +; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 [[TMP0]], -63 +; CHECK-NEXT: br i1 [[TMP1]], label [[BB27_EXITSTUB:%.*]], label [[COND_NEXT23_EXITSTUB:%.*]] ; newFuncRoot: br label %cond_true @@ -55,9 +55,9 @@ define i1 @print_pgm_cond_true_logical(i32 %tmp12.reload, i32* %tmp16.out) { ; CHECK: cond_true: ; CHECK-NEXT: [[TMP15:%.*]] = getelementptr [17 x i32], [17 x i32]* @r, i32 0, i32 [[TMP12_RELOAD:%.*]] ; CHECK-NEXT: [[TMP16]] = load i32, i32* [[TMP15]], align 4 -; CHECK-NEXT: [[TMP16_OFF:%.*]] = add i32 [[TMP16]], 31 -; CHECK-NEXT: [[TMP0:%.*]] = icmp ugt i32 [[TMP16_OFF]], 62 -; CHECK-NEXT: br i1 [[TMP0]], label [[BB27_EXITSTUB:%.*]], label [[COND_NEXT23_EXITSTUB:%.*]] +; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[TMP16]], -32 +; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 [[TMP0]], -63 +; CHECK-NEXT: br i1 [[TMP1]], label [[BB27_EXITSTUB:%.*]], label [[COND_NEXT23_EXITSTUB:%.*]] ; newFuncRoot: br label %cond_true diff --git a/llvm/test/Transforms/InstCombine/2007-03-21-SignedRangeTest.ll b/llvm/test/Transforms/InstCombine/2007-03-21-SignedRangeTest.ll index ffcfe2683cf1d..a7040bbaa0c9b 100644 --- a/llvm/test/Transforms/InstCombine/2007-03-21-SignedRangeTest.ll +++ b/llvm/test/Transforms/InstCombine/2007-03-21-SignedRangeTest.ll @@ -5,9 +5,9 @@ define i1 @test(i32 %tmp6) { ; CHECK-LABEL: @test( -; CHECK-NEXT: [[TMP6_OFF:%.*]] = add i32 %tmp6, 83 -; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt i32 [[TMP6_OFF]], 11 -; CHECK-NEXT: ret i1 [[TMP1]] +; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[TMP6:%.*]], 71 +; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i32 [[TMP1]], -12 +; CHECK-NEXT: ret i1 [[TMP2]] ; %tmp7 = sdiv i32 %tmp6, 12 icmp ne i32 %tmp7, -6 @@ -16,9 +16,9 @@ define i1 @test(i32 %tmp6) { define <2 x i1> @test_vec(<2 x i32> %tmp6) { ; CHECK-LABEL: @test_vec( -; CHECK-NEXT: [[TMP6_OFF:%.*]] = add <2 x i32> %tmp6, -; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt <2 x i32> [[TMP6_OFF]], -; CHECK-NEXT: ret <2 x i1> [[TMP1]] +; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i32> [[TMP6:%.*]], +; CHECK-NEXT: [[TMP2:%.*]] = icmp ult <2 x i32> [[TMP1]], +; CHECK-NEXT: ret <2 x i1> [[TMP2]] ; %tmp7 = sdiv <2 x i32> %tmp6, icmp ne <2 x i32> %tmp7, diff --git a/llvm/test/Transforms/InstCombine/2008-08-05-And.ll b/llvm/test/Transforms/InstCombine/2008-08-05-And.ll index bec055a2ee7cf..f5a6df2ab80f5 100644 --- a/llvm/test/Transforms/InstCombine/2008-08-05-And.ll +++ b/llvm/test/Transforms/InstCombine/2008-08-05-And.ll @@ -8,10 +8,10 @@ define void @f(i8* %x) nounwind { ; CHECK-NEXT: br label [[BB:%.*]] ; CHECK: bb: ; CHECK-NEXT: [[L1:%.*]] = load i8, i8* [[X:%.*]], align 1 -; CHECK-NEXT: [[S1:%.*]] = add i8 [[L1]], -6 -; CHECK-NEXT: [[C1:%.*]] = icmp ugt i8 [[S1]], 2 -; CHECK-NEXT: [[S2:%.*]] = add i8 [[L1]], -10 -; CHECK-NEXT: [[C2:%.*]] = icmp ugt i8 [[S2]], 2 +; CHECK-NEXT: [[TMP0:%.*]] = add i8 [[L1]], -9 +; CHECK-NEXT: [[C1:%.*]] = icmp ult i8 [[TMP0]], -3 +; CHECK-NEXT: [[TMP1:%.*]] = add i8 [[L1]], -13 +; CHECK-NEXT: [[C2:%.*]] = icmp ult i8 [[TMP1]], -3 ; CHECK-NEXT: [[A1:%.*]] = and i1 [[C1]], [[C2]] ; CHECK-NEXT: br i1 [[A1]], label [[INCOMPATIBLE:%.*]], label [[OKAY:%.*]] ; CHECK: okay: @@ -45,10 +45,10 @@ define void @f_logical(i8* %x) nounwind { ; CHECK-NEXT: br label [[BB:%.*]] ; CHECK: bb: ; CHECK-NEXT: [[L1:%.*]] = load i8, i8* [[X:%.*]], align 1 -; CHECK-NEXT: [[S1:%.*]] = add i8 [[L1]], -6 -; CHECK-NEXT: [[C1:%.*]] = icmp ugt i8 [[S1]], 2 -; CHECK-NEXT: [[S2:%.*]] = add i8 [[L1]], -10 -; CHECK-NEXT: [[C2:%.*]] = icmp ugt i8 [[S2]], 2 +; CHECK-NEXT: [[TMP0:%.*]] = add i8 [[L1]], -9 +; CHECK-NEXT: [[C1:%.*]] = icmp ult i8 [[TMP0]], -3 +; CHECK-NEXT: [[TMP1:%.*]] = add i8 [[L1]], -13 +; CHECK-NEXT: [[C2:%.*]] = icmp ult i8 [[TMP1]], -3 ; CHECK-NEXT: [[A1:%.*]] = and i1 [[C1]], [[C2]] ; CHECK-NEXT: br i1 [[A1]], label [[INCOMPATIBLE:%.*]], label [[OKAY:%.*]] ; CHECK: okay: diff --git a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-fmla.ll b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-fmla.ll deleted file mode 100644 index 13d0f3a55c5ea..0000000000000 --- a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-fmla.ll +++ /dev/null @@ -1,121 +0,0 @@ -; RUN: opt -S -instcombine < %s | FileCheck %s - -target triple = "aarch64-unknown-linux-gnu" - -define dso_local @combine_fmla( %0, %1, %2, %3) local_unnamed_addr #0 { -; CHECK-LABEL: @combine_fmla -; CHECK-NEXT: %5 = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %0) -; CHECK-NEXT: %6 = call fast @llvm.aarch64.sve.fmla.nxv8f16( %5, %1, %2, %3) -; CHECK-NEXT: ret %6 - %5 = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %0) - %6 = tail call fast @llvm.aarch64.sve.fmul.nxv8f16( %5, %2, %3) - %7 = tail call fast @llvm.aarch64.sve.fadd.nxv8f16( %5, %1, %6) - ret %7 -} - -define dso_local @neg_combine_fmla_contract_flag_only( %0, %1, %2, %3) local_unnamed_addr #0 { -; CHECK-LABEL: @neg_combine_fmla_contract_flag_only -; CHECK-NEXT: %5 = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %0) -; CHECK-NEXT: %6 = tail call contract @llvm.aarch64.sve.fmul.nxv8f16( %5, %2, %3) -; CHECK-NEXT: %7 = tail call contract @llvm.aarch64.sve.fadd.nxv8f16( %5, %1, %6) -; CHECK-NEXT: ret %7 - %5 = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %0) - %6 = tail call contract @llvm.aarch64.sve.fmul.nxv8f16( %5, %2, %3) - %7 = tail call contract @llvm.aarch64.sve.fadd.nxv8f16( %5, %1, %6) - ret %7 -} - -define dso_local @neg_combine_fmla_reassoc_flag_only( %0, %1, %2, %3) local_unnamed_addr #0 { -; CHECK-LABEL: @neg_combine_fmla_reassoc_flag_only -; CHECK-NEXT: %5 = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %0) -; CHECK-NEXT: %6 = tail call reassoc @llvm.aarch64.sve.fmul.nxv8f16( %5, %2, %3) -; CHECK-NEXT: %7 = tail call reassoc @llvm.aarch64.sve.fadd.nxv8f16( %5, %1, %6) -; CHECK-NEXT: ret %7 - %5 = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %0) - %6 = tail call reassoc @llvm.aarch64.sve.fmul.nxv8f16( %5, %2, %3) - %7 = tail call reassoc @llvm.aarch64.sve.fadd.nxv8f16( %5, %1, %6) - ret %7 -} - -define dso_local @neg_combine_fmla_min_flags( %0, %1, %2, %3) local_unnamed_addr #0 { -; CHECK-LABEL: @neg_combine_fmla_min_flags -; CHECK-NEXT: %5 = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %0) -; CHECK-NEXT: %6 = call reassoc contract @llvm.aarch64.sve.fmla.nxv8f16( %5, %1, %2, %3) -; CHECK-NEXT: ret %6 - %5 = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %0) - %6 = tail call reassoc contract @llvm.aarch64.sve.fmul.nxv8f16( %5, %2, %3) - %7 = tail call reassoc contract @llvm.aarch64.sve.fadd.nxv8f16( %5, %1, %6) - ret %7 -} - -define dso_local @neg_combine_fmla_no_fast_flag( %0, %1, %2, %3) local_unnamed_addr #0 { -; CHECK-LABEL: @neg_combine_fmla_no_fast_flag -; CHECK-NEXT: %5 = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %0) -; CHECK-NEXT: %6 = tail call @llvm.aarch64.sve.fmul.nxv8f16( %5, %2, %3) -; CHECK-NEXT: %7 = tail call @llvm.aarch64.sve.fadd.nxv8f16( %5, %1, %6) -; CHECK-NEXT: ret %7 - %5 = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %0) - %6 = tail call @llvm.aarch64.sve.fmul.nxv8f16( %5, %2, %3) - %7 = tail call @llvm.aarch64.sve.fadd.nxv8f16( %5, %1, %6) - ret %7 -} - -define dso_local @neg_combine_fmla_no_fmul( %0, %1, %2, %3) local_unnamed_addr #0 { -; CHECK-LABEL: @neg_combine_fmla_no_fmul -; CHECK-NEXT: %5 = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %0) -; CHECK-NEXT: %6 = tail call fast @llvm.aarch64.sve.fadd.nxv8f16( %5, %2, %3) -; CHECK-NEXT: %7 = tail call fast @llvm.aarch64.sve.fadd.nxv8f16( %5, %1, %6) -; CHECK-NEXT: ret %7 - %5 = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %0) - %6 = tail call fast @llvm.aarch64.sve.fadd.nxv8f16( %5, %2, %3) - %7 = tail call fast @llvm.aarch64.sve.fadd.nxv8f16( %5, %1, %6) - ret %7 -} - -define dso_local @neg_combine_fmla_neq_pred( %0, %1, %2, %3) local_unnamed_addr #0 { -; CHECK-LABEL: @neg_combine_fmla_neq_pred -; CHECK-NEXT: %5 = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %0) -; CHECK-NEXT: %6 = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 5) -; CHECK-NEXT: %7 = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %6) -; CHECK-NEXT: %8 = tail call fast @llvm.aarch64.sve.fmul.nxv8f16( %5, %2, %3) -; CHECK-NEXT: %9 = tail call fast @llvm.aarch64.sve.fadd.nxv8f16( %7, %1, %8) -; ret %9 - %5 = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %0) - %6 = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 5) - %7 = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %6) - %8 = tail call fast @llvm.aarch64.sve.fmul.nxv8f16( %5, %2, %3) - %9 = tail call fast @llvm.aarch64.sve.fadd.nxv8f16( %7, %1, %8) - ret %9 -} - -define dso_local @neg_combine_fmla_two_fmul_uses( %0, %1, %2, %3) local_unnamed_addr #0 { -; CHECK-LABEL: @neg_combine_fmla_two_fmul_uses -; CHECK-NEXT: %5 = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %0) -; CHECK-NEXT: %6 = tail call fast @llvm.aarch64.sve.fmul.nxv8f16( %5, %2, %3) -; CHECK-NEXT: %7 = tail call fast @llvm.aarch64.sve.fadd.nxv8f16( %5, %1, %6) -; CHECK-NEXT: %8 = tail call fast @llvm.aarch64.sve.fadd.nxv8f16( %5, %7, %6) -; ret %8 - %5 = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %0) - %6 = tail call fast @llvm.aarch64.sve.fmul.nxv8f16( %5, %2, %3) - %7 = tail call fast @llvm.aarch64.sve.fadd.nxv8f16( %5, %1, %6) - %8 = tail call fast @llvm.aarch64.sve.fadd.nxv8f16( %5, %7, %6) - ret %8 -} - -define dso_local @neg_combine_fmla_neq_flags( %0, %1, %2, %3) local_unnamed_addr #0 { -; CHECK-LABEL: @neg_combine_fmla_neq_flags -; CHECK-NEXT: %5 = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %0) -; CHECK-NEXT: %6 = tail call reassoc nnan contract @llvm.aarch64.sve.fmul.nxv8f16( %5, %2, %3) -; CHECK-NEXT: %7 = tail call reassoc contract @llvm.aarch64.sve.fadd.nxv8f16( %5, %1, %6) -; ret %7 - %5 = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %0) - %6 = tail call reassoc nnan contract @llvm.aarch64.sve.fmul.nxv8f16( %5, %2, %3) - %7 = tail call reassoc contract @llvm.aarch64.sve.fadd.nxv8f16( %5, %1, %6) - ret %7 -} - -declare @llvm.aarch64.sve.convert.from.svbool.nxv8i1() -declare @llvm.aarch64.sve.fmul.nxv8f16(, , ) -declare @llvm.aarch64.sve.fadd.nxv8f16(, , ) -declare @llvm.aarch64.sve.ptrue.nxv16i1(i32) -attributes #0 = { "target-features"="+sve" } diff --git a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-gatherscatter.ll b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-gatherscatter.ll new file mode 100644 index 0000000000000..2f665d5d6610f --- /dev/null +++ b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-gatherscatter.ll @@ -0,0 +1,88 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -instcombine -dce < %s | FileCheck %s + +target triple = "aarch64-unknown-linux-gnu" + +;; +;; Gathers. +;; + +define @test_ld1_gather_index_nxv2f64_stride1( %pred, double* %x, i64 %base) #0 { +; CHECK-LABEL: @test_ld1_gather_index_nxv2f64_stride1( +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr double, double* [[X:%.*]], i64 [[BASE:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = bitcast double* [[TMP1]] to * +; CHECK-NEXT: [[LD:%.*]] = call @llvm.masked.load.nxv2f64.p0nxv2f64(* [[TMP2]], i32 1, [[PRED:%.*]], zeroinitializer) +; CHECK-NEXT: ret [[LD]] +; + %idx = tail call @llvm.aarch64.sve.index.nxv2i64(i64 %base, i64 1) + %ld = tail call @llvm.aarch64.sve.ld1.gather.index.nxv2f64( %pred, double* %x, %idx) + ret %ld +} + +define @test_ld1_gather_index_nxv2f64_stride2_negtest( %pred, double* %x, i64 %base) #0 { +; CHECK-LABEL: @test_ld1_gather_index_nxv2f64_stride2_negtest( +; CHECK-NEXT: [[IDX:%.*]] = tail call @llvm.aarch64.sve.index.nxv2i64(i64 [[BASE:%.*]], i64 2) +; CHECK-NEXT: [[LD:%.*]] = tail call @llvm.aarch64.sve.ld1.gather.index.nxv2f64( [[PRED:%.*]], double* [[X:%.*]], [[IDX]]) +; CHECK-NEXT: ret [[LD]] +; + %idx = tail call @llvm.aarch64.sve.index.nxv2i64(i64 %base, i64 2) + %ld = tail call @llvm.aarch64.sve.ld1.gather.index.nxv2f64( %pred, double* %x, %idx) + ret %ld +} + +define @test_ld1_gather_index_nxv2f64_stride1_align8( %pred, double* align 8 %x, i64 %base) #0 { +; CHECK-LABEL: @test_ld1_gather_index_nxv2f64_stride1_align8( +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr double, double* [[X:%.*]], i64 [[BASE:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = bitcast double* [[TMP1]] to * +; CHECK-NEXT: [[LD:%.*]] = call @llvm.masked.load.nxv2f64.p0nxv2f64(* [[TMP2]], i32 8, [[PRED:%.*]], zeroinitializer) +; CHECK-NEXT: ret [[LD]] +; + %idx = tail call @llvm.aarch64.sve.index.nxv2i64(i64 %base, i64 1) + %ld = tail call @llvm.aarch64.sve.ld1.gather.index.nxv2f64( %pred, double* %x, %idx) + ret %ld +} + +;; +;; Scatters. +;; + +define void @test_st1_scatter_index_nxv2f64_stride1( %pred, double* %x, i64 %base, %val) #0 { +; CHECK-LABEL: @test_st1_scatter_index_nxv2f64_stride1( +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr double, double* [[X:%.*]], i64 [[BASE:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = bitcast double* [[TMP1]] to * +; CHECK-NEXT: call void @llvm.masked.store.nxv2f64.p0nxv2f64( [[VAL:%.*]], * [[TMP2]], i32 1, [[PRED:%.*]]) +; CHECK-NEXT: ret void +; + %idx = tail call @llvm.aarch64.sve.index.nxv2i64(i64 %base, i64 1) + tail call void @llvm.aarch64.sve.st1.scatter.index.nxv2f64( %val, %pred, double* %x, %idx) + ret void +} + +define void @test_st1_scatter_index_nxv2f64_stride2_negtest( %pred, double* %x, i64 %base, %val) #0 { +; CHECK-LABEL: @test_st1_scatter_index_nxv2f64_stride2_negtest( +; CHECK-NEXT: [[IDX:%.*]] = tail call @llvm.aarch64.sve.index.nxv2i64(i64 [[BASE:%.*]], i64 2) +; CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.scatter.index.nxv2f64( [[VAL:%.*]], [[PRED:%.*]], double* [[X:%.*]], [[IDX]]) +; CHECK-NEXT: ret void +; + %idx = tail call @llvm.aarch64.sve.index.nxv2i64(i64 %base, i64 2) + tail call void @llvm.aarch64.sve.st1.scatter.index.nxv2f64( %val, %pred, double* %x, %idx) + ret void +} + +define void @test_st1_scatter_index_nxv2f64_stride1_align8( %pred, double* align 8 %x, i64 %base, %val) #0 { +; CHECK-LABEL: @test_st1_scatter_index_nxv2f64_stride1_align8( +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr double, double* [[X:%.*]], i64 [[BASE:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = bitcast double* [[TMP1]] to * +; CHECK-NEXT: call void @llvm.masked.store.nxv2f64.p0nxv2f64( [[VAL:%.*]], * [[TMP2]], i32 8, [[PRED:%.*]]) +; CHECK-NEXT: ret void +; + %idx = tail call @llvm.aarch64.sve.index.nxv2i64(i64 %base, i64 1) + tail call void @llvm.aarch64.sve.st1.scatter.index.nxv2f64( %val, %pred, double* %x, %idx) + ret void +} + +declare @llvm.aarch64.sve.index.nxv2i64(i64, i64) +declare @llvm.aarch64.sve.ld1.gather.index.nxv2f64(, double*, ) +declare void @llvm.aarch64.sve.st1.scatter.index.nxv2f64(, , double*, ) + +attributes #0 = { "target-features"="+sve" } diff --git a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-muladd.ll b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-muladd.ll new file mode 100644 index 0000000000000..56219c2d9a9bc --- /dev/null +++ b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-muladd.ll @@ -0,0 +1,108 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -instcombine < %s | FileCheck %s + +target triple = "aarch64-unknown-linux-gnu" + +define dso_local @combine_fmla( %p, %a, %b, %c) local_unnamed_addr #0 { +; CHECK-LABEL: @combine_fmla( +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[P:%.*]]) +; CHECK-NEXT: [[TMP2:%.*]] = call fast @llvm.aarch64.sve.fmla.nxv8f16( [[TMP1]], [[A:%.*]], [[B:%.*]], [[C:%.*]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %p) + %2 = tail call fast @llvm.aarch64.sve.fmul.nxv8f16( %1, %b, %c) + %3 = tail call fast @llvm.aarch64.sve.fadd.nxv8f16( %1, %a, %2) + ret %3 +} + +define dso_local @neg_combine_fmla_mul_first_operand( %p, %a, %b, %c) local_unnamed_addr #0 { +; CHECK-LABEL: @neg_combine_fmla_mul_first_operand( +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[P:%.*]]) +; CHECK-NEXT: [[TMP2:%.*]] = tail call fast @llvm.aarch64.sve.fmul.nxv8f16( [[TMP1]], [[B:%.*]], [[C:%.*]]) +; CHECK-NEXT: [[TMP3:%.*]] = tail call fast @llvm.aarch64.sve.fadd.nxv8f16( [[TMP1]], [[TMP2]], [[A:%.*]]) +; CHECK-NEXT: ret [[TMP3]] +; + %1 = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %p) + %2 = tail call fast @llvm.aarch64.sve.fmul.nxv8f16( %1, %b, %c) + %3 = tail call fast @llvm.aarch64.sve.fadd.nxv8f16( %1, %2, %a) + ret %3 +} + +define dso_local @neg_combine_fmla_contract_flag_only( %p, %a, %b, %c) local_unnamed_addr #0 { +; CHECK-LABEL: @neg_combine_fmla_contract_flag_only( +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[P:%.*]]) +; CHECK-NEXT: [[TMP2:%.*]] = call contract @llvm.aarch64.sve.fmla.nxv8f16( [[TMP1]], [[A:%.*]], [[B:%.*]], [[C:%.*]]) +; CHECK-NEXT: ret [[TMP2]] +; + %1 = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %p) + %2 = tail call contract @llvm.aarch64.sve.fmul.nxv8f16( %1, %b, %c) + %3 = tail call contract @llvm.aarch64.sve.fadd.nxv8f16( %1, %a, %2) + ret %3 +} + +define dso_local @neg_combine_fmla_no_flags( %p, %a, %b, %c) local_unnamed_addr #0 { +; CHECK-LABEL: @neg_combine_fmla_no_flags( +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[P:%.*]]) +; CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.fmul.nxv8f16( [[TMP1]], [[B:%.*]], [[C:%.*]]) +; CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.fadd.nxv8f16( [[TMP1]], [[A:%.*]], [[TMP2]]) +; CHECK-NEXT: ret [[TMP3]] +; + %1 = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %p) + %2 = tail call @llvm.aarch64.sve.fmul.nxv8f16( %1, %b, %c) + %3 = tail call @llvm.aarch64.sve.fadd.nxv8f16( %1, %a, %2) + ret %3 +} + +define dso_local @neg_combine_fmla_neq_pred( %p, %a, %b, %c) local_unnamed_addr #0 { +; CHECK-LABEL: @neg_combine_fmla_neq_pred( +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[P:%.*]]) +; CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 5) +; CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[TMP2]]) +; CHECK-NEXT: [[TMP4:%.*]] = tail call fast @llvm.aarch64.sve.fmul.nxv8f16( [[TMP1]], [[B:%.*]], [[C:%.*]]) +; CHECK-NEXT: [[TMP5:%.*]] = tail call fast @llvm.aarch64.sve.fadd.nxv8f16( [[TMP3]], [[A:%.*]], [[TMP4]]) +; CHECK-NEXT: ret [[TMP5]] +; +; ret %9 + %1 = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %p) + %2 = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 5) + %3 = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %2) + %4 = tail call fast @llvm.aarch64.sve.fmul.nxv8f16( %1, %b, %c) + %5 = tail call fast @llvm.aarch64.sve.fadd.nxv8f16( %3, %a, %4) + ret %5 +} + +define dso_local @neg_combine_fmla_two_fmul_uses( %p, %a, %b, %c) local_unnamed_addr #0 { +; CHECK-LABEL: @neg_combine_fmla_two_fmul_uses( +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[P:%.*]]) +; CHECK-NEXT: [[TMP2:%.*]] = tail call fast @llvm.aarch64.sve.fmul.nxv8f16( [[TMP1]], [[B:%.*]], [[C:%.*]]) +; CHECK-NEXT: [[TMP3:%.*]] = tail call fast @llvm.aarch64.sve.fadd.nxv8f16( [[TMP1]], [[A:%.*]], [[TMP2]]) +; CHECK-NEXT: [[TMP4:%.*]] = tail call fast @llvm.aarch64.sve.fadd.nxv8f16( [[TMP1]], [[TMP3]], [[TMP2]]) +; CHECK-NEXT: ret [[TMP4]] +; +; ret %8 + %1 = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %p) + %2 = tail call fast @llvm.aarch64.sve.fmul.nxv8f16( %1, %b, %c) + %3 = tail call fast @llvm.aarch64.sve.fadd.nxv8f16( %1, %a, %2) + %4 = tail call fast @llvm.aarch64.sve.fadd.nxv8f16( %1, %3, %2) + ret %4 +} + +define dso_local @neg_combine_fmla_neq_flags( %p, %a, %b, %c) local_unnamed_addr #0 { +; CHECK-LABEL: @neg_combine_fmla_neq_flags( +; CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[P:%.*]]) +; CHECK-NEXT: [[TMP2:%.*]] = tail call reassoc nnan contract @llvm.aarch64.sve.fmul.nxv8f16( [[TMP1]], [[B:%.*]], [[C:%.*]]) +; CHECK-NEXT: [[TMP3:%.*]] = tail call reassoc contract @llvm.aarch64.sve.fadd.nxv8f16( [[TMP1]], [[A:%.*]], [[TMP2]]) +; CHECK-NEXT: ret [[TMP3]] +; +; ret %7 + %1 = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %p) + %2 = tail call reassoc nnan contract @llvm.aarch64.sve.fmul.nxv8f16( %1, %b, %c) + %3 = tail call reassoc contract @llvm.aarch64.sve.fadd.nxv8f16( %1, %a, %2) + ret %3 +} + +declare @llvm.aarch64.sve.convert.from.svbool.nxv8i1() +declare @llvm.aarch64.sve.fmul.nxv8f16(, , ) +declare @llvm.aarch64.sve.fadd.nxv8f16(, , ) +declare @llvm.aarch64.sve.ptrue.nxv16i1(i32) +attributes #0 = { "target-features"="+sve" } diff --git a/llvm/test/Transforms/InstCombine/and-or-icmps.ll b/llvm/test/Transforms/InstCombine/and-or-icmps.ll index 00a720b1da265..a9c5d2b1c1b81 100644 --- a/llvm/test/Transforms/InstCombine/and-or-icmps.ll +++ b/llvm/test/Transforms/InstCombine/and-or-icmps.ll @@ -257,8 +257,8 @@ define i1 @or_eq_with_diff_one_logical(i8 %x) { define i1 @and_ne_with_diff_one(i32 %x) { ; CHECK-LABEL: @and_ne_with_diff_one( -; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[X:%.*]], -39 -; CHECK-NEXT: [[TMP2:%.*]] = icmp ugt i32 [[TMP1]], 1 +; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[X:%.*]], -41 +; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i32 [[TMP1]], -2 ; CHECK-NEXT: ret i1 [[TMP2]] ; %cmp1 = icmp ne i32 %x, 40 @@ -269,8 +269,8 @@ define i1 @and_ne_with_diff_one(i32 %x) { define i1 @and_ne_with_diff_one_logical(i32 %x) { ; CHECK-LABEL: @and_ne_with_diff_one_logical( -; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[X:%.*]], -39 -; CHECK-NEXT: [[TMP2:%.*]] = icmp ugt i32 [[TMP1]], 1 +; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[X:%.*]], -41 +; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i32 [[TMP1]], -2 ; CHECK-NEXT: ret i1 [[TMP2]] ; %cmp1 = icmp ne i32 %x, 40 @@ -308,8 +308,8 @@ define i1 @or_eq_with_diff_one_signed_logical(i32 %x) { define i1 @and_ne_with_diff_one_signed(i64 %x) { ; CHECK-LABEL: @and_ne_with_diff_one_signed( -; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[X:%.*]], 1 -; CHECK-NEXT: [[TMP2:%.*]] = icmp ugt i64 [[TMP1]], 1 +; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[X:%.*]], -1 +; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i64 [[TMP1]], -2 ; CHECK-NEXT: ret i1 [[TMP2]] ; %cmp1 = icmp ne i64 %x, -1 @@ -320,8 +320,8 @@ define i1 @and_ne_with_diff_one_signed(i64 %x) { define i1 @and_ne_with_diff_one_signed_logical(i64 %x) { ; CHECK-LABEL: @and_ne_with_diff_one_signed_logical( -; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[X:%.*]], 1 -; CHECK-NEXT: [[TMP2:%.*]] = icmp ugt i64 [[TMP1]], 1 +; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[X:%.*]], -1 +; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i64 [[TMP1]], -2 ; CHECK-NEXT: ret i1 [[TMP2]] ; %cmp1 = icmp ne i64 %x, -1 @@ -346,8 +346,8 @@ define <2 x i1> @or_eq_with_one_bit_diff_constants2_splatvec(<2 x i32> %x) { define <2 x i1> @and_ne_with_diff_one_splatvec(<2 x i32> %x) { ; CHECK-LABEL: @and_ne_with_diff_one_splatvec( -; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i32> [[X:%.*]], -; CHECK-NEXT: [[TMP2:%.*]] = icmp ugt <2 x i32> [[TMP1]], +; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[TMP2:%.*]] = icmp ult <2 x i32> [[TMP1]], ; CHECK-NEXT: ret <2 x i1> [[TMP2]] ; %cmp1 = icmp ne <2 x i32> %x, @@ -508,9 +508,9 @@ define i1 @PR42691_4_logical(i32 %x) { define i1 @PR42691_5(i32 %x) { ; CHECK-LABEL: @PR42691_5( -; CHECK-NEXT: [[X_OFF:%.*]] = add i32 [[X:%.*]], -1 -; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt i32 [[X_OFF]], 2147483645 -; CHECK-NEXT: ret i1 [[TMP1]] +; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[X:%.*]], -2147483647 +; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i32 [[TMP1]], -2147483646 +; CHECK-NEXT: ret i1 [[TMP2]] ; %c1 = icmp slt i32 %x, 1 %c2 = icmp eq i32 %x, 2147483647 @@ -520,9 +520,9 @@ define i1 @PR42691_5(i32 %x) { define i1 @PR42691_5_logical(i32 %x) { ; CHECK-LABEL: @PR42691_5_logical( -; CHECK-NEXT: [[X_OFF:%.*]] = add i32 [[X:%.*]], -1 -; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt i32 [[X_OFF]], 2147483645 -; CHECK-NEXT: ret i1 [[TMP1]] +; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[X:%.*]], -2147483647 +; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i32 [[TMP1]], -2147483646 +; CHECK-NEXT: ret i1 [[TMP2]] ; %c1 = icmp slt i32 %x, 1 %c2 = icmp eq i32 %x, 2147483647 @@ -532,9 +532,9 @@ define i1 @PR42691_5_logical(i32 %x) { define i1 @PR42691_6(i32 %x) { ; CHECK-LABEL: @PR42691_6( -; CHECK-NEXT: [[X_OFF:%.*]] = add i32 [[X:%.*]], 2147483647 -; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt i32 [[X_OFF]], 2147483645 -; CHECK-NEXT: ret i1 [[TMP1]] +; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[X:%.*]], 1 +; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i32 [[TMP1]], -2147483646 +; CHECK-NEXT: ret i1 [[TMP2]] ; %c1 = icmp ult i32 %x, 2147483649 %c2 = icmp eq i32 %x, 4294967295 @@ -544,9 +544,9 @@ define i1 @PR42691_6(i32 %x) { define i1 @PR42691_6_logical(i32 %x) { ; CHECK-LABEL: @PR42691_6_logical( -; CHECK-NEXT: [[X_OFF:%.*]] = add i32 [[X:%.*]], 2147483647 -; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt i32 [[X_OFF]], 2147483645 -; CHECK-NEXT: ret i1 [[TMP1]] +; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[X:%.*]], 1 +; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i32 [[TMP1]], -2147483646 +; CHECK-NEXT: ret i1 [[TMP2]] ; %c1 = icmp ult i32 %x, 2147483649 %c2 = icmp eq i32 %x, 4294967295 diff --git a/llvm/test/Transforms/InstCombine/and-or.ll b/llvm/test/Transforms/InstCombine/and-or.ll index 82d60ce89539e..2f3add7e77ee1 100644 --- a/llvm/test/Transforms/InstCombine/and-or.ll +++ b/llvm/test/Transforms/InstCombine/and-or.ll @@ -1,6 +1,9 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -instcombine -S | FileCheck %s +declare void @use(i8) +declare void @use_vec(<2 x i8>) + ; ((b | a) & C1) | (b & C2) -> (a & C1) | b iff C1 == ~C2 define i32 @or_and_not_constant_commute0(i32 %a, i32 %b) { @@ -68,6 +71,193 @@ define <2 x i7> @or_and_not_constant_commute0_splat(<2 x i7> %a, <2 x i7> %b) { ret <2 x i7> %t3 } +; ((x | N) & C1) | (x & C2) --> (x | N) & (C1 | C2) +; iff (C1 & C2) == 0 and (N & ~C1) == 0 + +define i8 @or_and_or_commute0(i8 %x) { +; CHECK-LABEL: @or_and_or_commute0( +; CHECK-NEXT: [[XN:%.*]] = or i8 [[X:%.*]], 16 +; CHECK-NEXT: call void @use(i8 [[XN]]) +; CHECK-NEXT: [[X1:%.*]] = and i8 [[XN]], 59 +; CHECK-NEXT: call void @use(i8 [[X1]]) +; CHECK-NEXT: [[X2:%.*]] = and i8 [[X]], 64 +; CHECK-NEXT: call void @use(i8 [[X2]]) +; CHECK-NEXT: [[R:%.*]] = and i8 [[XN]], 123 +; CHECK-NEXT: ret i8 [[R]] +; + %xn = or i8 %x, 16 ; 0001_0000 + call void @use(i8 %xn) + %x1 = and i8 %xn, 59 ; 0011_1011 + call void @use(i8 %x1) + %x2 = and i8 %x, 64 ; 0100_0000 + call void @use(i8 %x2) + %r = or i8 %x1, %x2 + ret i8 %r +} + +define i8 @or_and_or_commute1(i8 %x) { +; CHECK-LABEL: @or_and_or_commute1( +; CHECK-NEXT: [[XN:%.*]] = or i8 [[X:%.*]], 16 +; CHECK-NEXT: call void @use(i8 [[XN]]) +; CHECK-NEXT: [[X1:%.*]] = and i8 [[XN]], 59 +; CHECK-NEXT: call void @use(i8 [[X1]]) +; CHECK-NEXT: [[X2:%.*]] = and i8 [[X]], 64 +; CHECK-NEXT: call void @use(i8 [[X2]]) +; CHECK-NEXT: [[R:%.*]] = and i8 [[XN]], 123 +; CHECK-NEXT: ret i8 [[R]] +; + %xn = or i8 %x, 16 ; 0001_0000 + call void @use(i8 %xn) + %x1 = and i8 %xn, 59 ; 0011_1011 + call void @use(i8 %x1) + %x2 = and i8 %x, 64 ; 0100_0000 + call void @use(i8 %x2) + %r = or i8 %x2, %x1 + ret i8 %r +} + +define <2 x i8> @or_and_or_commute1_splat(<2 x i8> %x) { +; CHECK-LABEL: @or_and_or_commute1_splat( +; CHECK-NEXT: [[XN:%.*]] = or <2 x i8> [[X:%.*]], +; CHECK-NEXT: call void @use_vec(<2 x i8> [[XN]]) +; CHECK-NEXT: [[X1:%.*]] = and <2 x i8> [[XN]], +; CHECK-NEXT: call void @use_vec(<2 x i8> [[X1]]) +; CHECK-NEXT: [[X2:%.*]] = and <2 x i8> [[X]], +; CHECK-NEXT: call void @use_vec(<2 x i8> [[X2]]) +; CHECK-NEXT: [[R:%.*]] = and <2 x i8> [[XN]], +; CHECK-NEXT: ret <2 x i8> [[R]] +; + %xn = or <2 x i8> %x, + call void @use_vec(<2 x i8> %xn) + %x1 = and <2 x i8> %xn, + call void @use_vec(<2 x i8> %x1) + %x2 = and <2 x i8> %x, + call void @use_vec(<2 x i8> %x2) + %r = or <2 x i8> %x2, %x1 + ret <2 x i8> %r +} + +define i8 @or_and_or_commute2(i8 %x, i8 %y) { +; CHECK-LABEL: @or_and_or_commute2( +; CHECK-NEXT: [[N:%.*]] = lshr i8 [[Y:%.*]], 6 +; CHECK-NEXT: [[XN:%.*]] = or i8 [[N]], [[X:%.*]] +; CHECK-NEXT: call void @use(i8 [[XN]]) +; CHECK-NEXT: [[X1:%.*]] = and i8 [[XN]], -69 +; CHECK-NEXT: call void @use(i8 [[X1]]) +; CHECK-NEXT: [[X2:%.*]] = and i8 [[X]], 64 +; CHECK-NEXT: call void @use(i8 [[X2]]) +; CHECK-NEXT: [[R:%.*]] = and i8 [[XN]], -5 +; CHECK-NEXT: ret i8 [[R]] +; + %n = lshr i8 %y, 6 + %xn = or i8 %n, %x + call void @use(i8 %xn) + %x1 = and i8 %xn, 187 + call void @use(i8 %x1) + %x2 = and i8 %x, 64 + call void @use(i8 %x2) + %r = or i8 %x1, %x2 + ret i8 %r +} + +define <2 x i8> @or_and_or_commute2_splat(<2 x i8> %x, <2 x i8> %y) { +; CHECK-LABEL: @or_and_or_commute2_splat( +; CHECK-NEXT: [[N:%.*]] = lshr <2 x i8> [[Y:%.*]], +; CHECK-NEXT: [[XN:%.*]] = or <2 x i8> [[N]], [[X:%.*]] +; CHECK-NEXT: call void @use_vec(<2 x i8> [[XN]]) +; CHECK-NEXT: [[X1:%.*]] = and <2 x i8> [[XN]], +; CHECK-NEXT: call void @use_vec(<2 x i8> [[X1]]) +; CHECK-NEXT: [[X2:%.*]] = and <2 x i8> [[X]], +; CHECK-NEXT: call void @use_vec(<2 x i8> [[X2]]) +; CHECK-NEXT: [[R:%.*]] = and <2 x i8> [[XN]], +; CHECK-NEXT: ret <2 x i8> [[R]] +; + %n = lshr <2 x i8> %y, + %xn = or <2 x i8> %n, %x + call void @use_vec(<2 x i8> %xn) + %x1 = and <2 x i8> %xn, + call void @use_vec(<2 x i8> %x1) + %x2 = and <2 x i8> %x, + call void @use_vec(<2 x i8> %x2) + %r = or <2 x i8> %x1, %x2 + ret <2 x i8> %r +} + +define i8 @or_and_or_commute3(i8 %x, i8 %y) { +; CHECK-LABEL: @or_and_or_commute3( +; CHECK-NEXT: [[N:%.*]] = lshr i8 [[Y:%.*]], 6 +; CHECK-NEXT: [[XN:%.*]] = or i8 [[N]], [[X:%.*]] +; CHECK-NEXT: call void @use(i8 [[XN]]) +; CHECK-NEXT: [[X1:%.*]] = and i8 [[XN]], -69 +; CHECK-NEXT: call void @use(i8 [[X1]]) +; CHECK-NEXT: [[X2:%.*]] = and i8 [[X]], 64 +; CHECK-NEXT: call void @use(i8 [[X2]]) +; CHECK-NEXT: [[R:%.*]] = and i8 [[XN]], -5 +; CHECK-NEXT: ret i8 [[R]] +; + %n = lshr i8 %y, 6 + %xn = or i8 %n, %x + call void @use(i8 %xn) + %x1 = and i8 %xn, 187 + call void @use(i8 %x1) + %x2 = and i8 %x, 64 + call void @use(i8 %x2) + %r = or i8 %x2, %x1 + ret i8 %r +} + +define i8 @or_and2_or2(i8 %x) { +; CHECK-LABEL: @or_and2_or2( +; CHECK-NEXT: [[O1:%.*]] = or i8 [[X:%.*]], 1 +; CHECK-NEXT: call void @use(i8 [[O1]]) +; CHECK-NEXT: [[O2:%.*]] = or i8 [[X]], 2 +; CHECK-NEXT: call void @use(i8 [[O2]]) +; CHECK-NEXT: [[X1:%.*]] = and i8 [[O1]], -71 +; CHECK-NEXT: call void @use(i8 [[X1]]) +; CHECK-NEXT: [[X2:%.*]] = and i8 [[O2]], 66 +; CHECK-NEXT: call void @use(i8 [[X2]]) +; CHECK-NEXT: [[BITFIELD:%.*]] = and i8 [[X]], -8 +; CHECK-NEXT: [[R:%.*]] = or i8 [[BITFIELD]], 3 +; CHECK-NEXT: ret i8 [[R]] +; + %o1 = or i8 %x, 1 + call void @use(i8 %o1) + %o2 = or i8 %x, 2 + call void @use(i8 %o2) + %x1 = and i8 %o1, 185 + call void @use(i8 %x1) + %x2 = and i8 %o2, 66 + call void @use(i8 %x2) + %r = or i8 %x1, %x2 + ret i8 %r +} + +define <2 x i8> @or_and2_or2_splat(<2 x i8> %x) { +; CHECK-LABEL: @or_and2_or2_splat( +; CHECK-NEXT: [[O1:%.*]] = or <2 x i8> [[X:%.*]], +; CHECK-NEXT: call void @use_vec(<2 x i8> [[O1]]) +; CHECK-NEXT: [[O2:%.*]] = or <2 x i8> [[X]], +; CHECK-NEXT: call void @use_vec(<2 x i8> [[O2]]) +; CHECK-NEXT: [[X1:%.*]] = and <2 x i8> [[O1]], +; CHECK-NEXT: call void @use_vec(<2 x i8> [[X1]]) +; CHECK-NEXT: [[X2:%.*]] = and <2 x i8> [[O2]], +; CHECK-NEXT: call void @use_vec(<2 x i8> [[X2]]) +; CHECK-NEXT: [[BITFIELD:%.*]] = and <2 x i8> [[X]], +; CHECK-NEXT: [[R:%.*]] = or <2 x i8> [[BITFIELD]], +; CHECK-NEXT: ret <2 x i8> [[R]] +; + %o1 = or <2 x i8> %x, + call void @use_vec(<2 x i8> %o1) + %o2 = or <2 x i8> %x, + call void @use_vec(<2 x i8> %o2) + %x1 = and <2 x i8> %o1, + call void @use_vec(<2 x i8> %x1) + %x2 = and <2 x i8> %o2, + call void @use_vec(<2 x i8> %x2) + %r = or <2 x i8> %x1, %x2 + ret <2 x i8> %r +} + ; Check variants of: ; and ({x}or X, Y), C --> {x}or X, (and Y, C) ; ...in the following 5 tests. diff --git a/llvm/test/Transforms/InstCombine/and-xor-or.ll b/llvm/test/Transforms/InstCombine/and-xor-or.ll index 12e161c01957a..26ced92fc8d37 100644 --- a/llvm/test/Transforms/InstCombine/and-xor-or.ll +++ b/llvm/test/Transforms/InstCombine/and-xor-or.ll @@ -1336,3 +1336,1077 @@ define i32 @or_and_not_not_wrong_b(i32 %a, i32 %b, i32 %c, i32 %d) { %or3 = or i32 %and, %not1 ret i32 %or3 } + +; (a & ~(b | c)) | ~(a | (b ^ c)) --> (~a & b & c) | ~(b | c) + +define i32 @and_not_or_or_not_or_xor(i32 %a, i32 %b, i32 %c) { +; CHECK-LABEL: @and_not_or_or_not_or_xor( +; CHECK-NEXT: [[OR1:%.*]] = or i32 [[B:%.*]], [[C:%.*]] +; CHECK-NEXT: [[NOT1:%.*]] = xor i32 [[OR1]], -1 +; CHECK-NEXT: [[AND1:%.*]] = and i32 [[NOT1]], [[A:%.*]] +; CHECK-NEXT: [[XOR1:%.*]] = xor i32 [[B]], [[C]] +; CHECK-NEXT: [[OR2:%.*]] = or i32 [[XOR1]], [[A]] +; CHECK-NEXT: [[NOT2:%.*]] = xor i32 [[OR2]], -1 +; CHECK-NEXT: [[OR3:%.*]] = or i32 [[AND1]], [[NOT2]] +; CHECK-NEXT: ret i32 [[OR3]] +; + %or1 = or i32 %b, %c + %not1 = xor i32 %or1, -1 + %and1 = and i32 %not1, %a + %xor1 = xor i32 %b, %c + %or2 = or i32 %xor1, %a + %not2 = xor i32 %or2, -1 + %or3 = or i32 %and1, %not2 + ret i32 %or3 +} + +define i32 @and_not_or_or_not_or_xor_commute1(i32 %a, i32 %b, i32 %c) { +; CHECK-LABEL: @and_not_or_or_not_or_xor_commute1( +; CHECK-NEXT: [[OR1:%.*]] = or i32 [[C:%.*]], [[B:%.*]] +; CHECK-NEXT: [[NOT1:%.*]] = xor i32 [[OR1]], -1 +; CHECK-NEXT: [[AND1:%.*]] = and i32 [[NOT1]], [[A:%.*]] +; CHECK-NEXT: [[XOR1:%.*]] = xor i32 [[B]], [[C]] +; CHECK-NEXT: [[OR2:%.*]] = or i32 [[XOR1]], [[A]] +; CHECK-NEXT: [[NOT2:%.*]] = xor i32 [[OR2]], -1 +; CHECK-NEXT: [[OR3:%.*]] = or i32 [[AND1]], [[NOT2]] +; CHECK-NEXT: ret i32 [[OR3]] +; + %or1 = or i32 %c, %b + %not1 = xor i32 %or1, -1 + %and1 = and i32 %not1, %a + %xor1 = xor i32 %b, %c + %or2 = or i32 %xor1, %a + %not2 = xor i32 %or2, -1 + %or3 = or i32 %and1, %not2 + ret i32 %or3 +} + +define i32 @and_not_or_or_not_or_xor_commute2(i32 %a0, i32 %b, i32 %c) { +; CHECK-LABEL: @and_not_or_or_not_or_xor_commute2( +; CHECK-NEXT: [[A:%.*]] = sdiv i32 42, [[A0:%.*]] +; CHECK-NEXT: [[OR1:%.*]] = or i32 [[B:%.*]], [[C:%.*]] +; CHECK-NEXT: [[NOT1:%.*]] = xor i32 [[OR1]], -1 +; CHECK-NEXT: [[AND1:%.*]] = and i32 [[A]], [[NOT1]] +; CHECK-NEXT: [[XOR1:%.*]] = xor i32 [[B]], [[C]] +; CHECK-NEXT: [[OR2:%.*]] = or i32 [[XOR1]], [[A]] +; CHECK-NEXT: [[NOT2:%.*]] = xor i32 [[OR2]], -1 +; CHECK-NEXT: [[OR3:%.*]] = or i32 [[AND1]], [[NOT2]] +; CHECK-NEXT: ret i32 [[OR3]] +; + %a = sdiv i32 42, %a0 ; thwart complexity-based canonicalization + %or1 = or i32 %b, %c + %not1 = xor i32 %or1, -1 + %and1 = and i32 %a, %not1 + %xor1 = xor i32 %b, %c + %or2 = or i32 %xor1, %a + %not2 = xor i32 %or2, -1 + %or3 = or i32 %and1, %not2 + ret i32 %or3 +} + +define i32 @and_not_or_or_not_or_xor_commute3(i32 %a, i32 %b, i32 %c) { +; CHECK-LABEL: @and_not_or_or_not_or_xor_commute3( +; CHECK-NEXT: [[OR1:%.*]] = or i32 [[B:%.*]], [[C:%.*]] +; CHECK-NEXT: [[NOT1:%.*]] = xor i32 [[OR1]], -1 +; CHECK-NEXT: [[AND1:%.*]] = and i32 [[NOT1]], [[A:%.*]] +; CHECK-NEXT: [[XOR1:%.*]] = xor i32 [[C]], [[B]] +; CHECK-NEXT: [[OR2:%.*]] = or i32 [[XOR1]], [[A]] +; CHECK-NEXT: [[NOT2:%.*]] = xor i32 [[OR2]], -1 +; CHECK-NEXT: [[OR3:%.*]] = or i32 [[AND1]], [[NOT2]] +; CHECK-NEXT: ret i32 [[OR3]] +; + %or1 = or i32 %b, %c + %not1 = xor i32 %or1, -1 + %and1 = and i32 %not1, %a + %xor1 = xor i32 %c, %b + %or2 = or i32 %xor1, %a + %not2 = xor i32 %or2, -1 + %or3 = or i32 %and1, %not2 + ret i32 %or3 +} + +define i32 @and_not_or_or_not_or_xor_commute4(i32 %a0, i32 %b, i32 %c) { +; CHECK-LABEL: @and_not_or_or_not_or_xor_commute4( +; CHECK-NEXT: [[A:%.*]] = sdiv i32 42, [[A0:%.*]] +; CHECK-NEXT: [[OR1:%.*]] = or i32 [[B:%.*]], [[C:%.*]] +; CHECK-NEXT: [[NOT1:%.*]] = xor i32 [[OR1]], -1 +; CHECK-NEXT: [[AND1:%.*]] = and i32 [[A]], [[NOT1]] +; CHECK-NEXT: [[XOR1:%.*]] = xor i32 [[B]], [[C]] +; CHECK-NEXT: [[OR2:%.*]] = or i32 [[A]], [[XOR1]] +; CHECK-NEXT: [[NOT2:%.*]] = xor i32 [[OR2]], -1 +; CHECK-NEXT: [[OR3:%.*]] = or i32 [[AND1]], [[NOT2]] +; CHECK-NEXT: ret i32 [[OR3]] +; + %a = sdiv i32 42, %a0 ; thwart complexity-based canonicalization + %or1 = or i32 %b, %c + %not1 = xor i32 %or1, -1 + %and1 = and i32 %a, %not1 + %xor1 = xor i32 %b, %c + %or2 = or i32 %a, %xor1 + %not2 = xor i32 %or2, -1 + %or3 = or i32 %and1, %not2 + ret i32 %or3 +} + +define i32 @and_not_or_or_not_or_xor_commute5(i32 %a, i32 %b, i32 %c) { +; CHECK-LABEL: @and_not_or_or_not_or_xor_commute5( +; CHECK-NEXT: [[OR1:%.*]] = or i32 [[B:%.*]], [[C:%.*]] +; CHECK-NEXT: [[NOT1:%.*]] = xor i32 [[OR1]], -1 +; CHECK-NEXT: [[AND1:%.*]] = and i32 [[NOT1]], [[A:%.*]] +; CHECK-NEXT: [[XOR1:%.*]] = xor i32 [[B]], [[C]] +; CHECK-NEXT: [[OR2:%.*]] = or i32 [[XOR1]], [[A]] +; CHECK-NEXT: [[NOT2:%.*]] = xor i32 [[OR2]], -1 +; CHECK-NEXT: [[OR3:%.*]] = or i32 [[AND1]], [[NOT2]] +; CHECK-NEXT: ret i32 [[OR3]] +; + %or1 = or i32 %b, %c + %not1 = xor i32 %or1, -1 + %and1 = and i32 %not1, %a + %xor1 = xor i32 %b, %c + %or2 = or i32 %xor1, %a + %not2 = xor i32 %or2, -1 + %or3 = or i32 %not2, %and1 + ret i32 %or3 +} + +define i32 @and_not_or_or_not_or_xor_use1(i32 %a, i32 %b, i32 %c) { +; CHECK-LABEL: @and_not_or_or_not_or_xor_use1( +; CHECK-NEXT: [[OR1:%.*]] = or i32 [[B:%.*]], [[C:%.*]] +; CHECK-NEXT: [[NOT1:%.*]] = xor i32 [[OR1]], -1 +; CHECK-NEXT: [[AND1:%.*]] = and i32 [[NOT1]], [[A:%.*]] +; CHECK-NEXT: [[XOR1:%.*]] = xor i32 [[B]], [[C]] +; CHECK-NEXT: [[OR2:%.*]] = or i32 [[XOR1]], [[A]] +; CHECK-NEXT: [[NOT2:%.*]] = xor i32 [[OR2]], -1 +; CHECK-NEXT: [[OR3:%.*]] = or i32 [[AND1]], [[NOT2]] +; CHECK-NEXT: call void @use(i32 [[OR1]]) +; CHECK-NEXT: ret i32 [[OR3]] +; + %or1 = or i32 %b, %c + %not1 = xor i32 %or1, -1 + %and1 = and i32 %not1, %a + %xor1 = xor i32 %b, %c + %or2 = or i32 %xor1, %a + %not2 = xor i32 %or2, -1 + %or3 = or i32 %and1, %not2 + call void @use(i32 %or1) + ret i32 %or3 +} + +define i32 @and_not_or_or_not_or_xor_use2(i32 %a, i32 %b, i32 %c) { +; CHECK-LABEL: @and_not_or_or_not_or_xor_use2( +; CHECK-NEXT: [[OR1:%.*]] = or i32 [[B:%.*]], [[C:%.*]] +; CHECK-NEXT: [[NOT1:%.*]] = xor i32 [[OR1]], -1 +; CHECK-NEXT: [[AND1:%.*]] = and i32 [[NOT1]], [[A:%.*]] +; CHECK-NEXT: [[XOR1:%.*]] = xor i32 [[B]], [[C]] +; CHECK-NEXT: [[OR2:%.*]] = or i32 [[XOR1]], [[A]] +; CHECK-NEXT: [[NOT2:%.*]] = xor i32 [[OR2]], -1 +; CHECK-NEXT: [[OR3:%.*]] = or i32 [[AND1]], [[NOT2]] +; CHECK-NEXT: call void @use(i32 [[NOT1]]) +; CHECK-NEXT: ret i32 [[OR3]] +; + %or1 = or i32 %b, %c + %not1 = xor i32 %or1, -1 + %and1 = and i32 %not1, %a + %xor1 = xor i32 %b, %c + %or2 = or i32 %xor1, %a + %not2 = xor i32 %or2, -1 + %or3 = or i32 %and1, %not2 + call void @use(i32 %not1) + ret i32 %or3 +} + +define i32 @and_not_or_or_not_or_xor_use3(i32 %a, i32 %b, i32 %c) { +; CHECK-LABEL: @and_not_or_or_not_or_xor_use3( +; CHECK-NEXT: [[OR1:%.*]] = or i32 [[B:%.*]], [[C:%.*]] +; CHECK-NEXT: [[NOT1:%.*]] = xor i32 [[OR1]], -1 +; CHECK-NEXT: [[AND1:%.*]] = and i32 [[NOT1]], [[A:%.*]] +; CHECK-NEXT: [[XOR1:%.*]] = xor i32 [[B]], [[C]] +; CHECK-NEXT: [[OR2:%.*]] = or i32 [[XOR1]], [[A]] +; CHECK-NEXT: [[NOT2:%.*]] = xor i32 [[OR2]], -1 +; CHECK-NEXT: [[OR3:%.*]] = or i32 [[AND1]], [[NOT2]] +; CHECK-NEXT: call void @use(i32 [[AND1]]) +; CHECK-NEXT: ret i32 [[OR3]] +; + %or1 = or i32 %b, %c + %not1 = xor i32 %or1, -1 + %and1 = and i32 %not1, %a + %xor1 = xor i32 %b, %c + %or2 = or i32 %xor1, %a + %not2 = xor i32 %or2, -1 + %or3 = or i32 %and1, %not2 + call void @use(i32 %and1) + ret i32 %or3 +} + +define i32 @and_not_or_or_not_or_xor_use4(i32 %a, i32 %b, i32 %c) { +; CHECK-LABEL: @and_not_or_or_not_or_xor_use4( +; CHECK-NEXT: [[OR1:%.*]] = or i32 [[B:%.*]], [[C:%.*]] +; CHECK-NEXT: [[NOT1:%.*]] = xor i32 [[OR1]], -1 +; CHECK-NEXT: [[AND1:%.*]] = and i32 [[NOT1]], [[A:%.*]] +; CHECK-NEXT: [[XOR1:%.*]] = xor i32 [[B]], [[C]] +; CHECK-NEXT: [[OR2:%.*]] = or i32 [[XOR1]], [[A]] +; CHECK-NEXT: [[NOT2:%.*]] = xor i32 [[OR2]], -1 +; CHECK-NEXT: [[OR3:%.*]] = or i32 [[AND1]], [[NOT2]] +; CHECK-NEXT: call void @use(i32 [[XOR1]]) +; CHECK-NEXT: ret i32 [[OR3]] +; + %or1 = or i32 %b, %c + %not1 = xor i32 %or1, -1 + %and1 = and i32 %not1, %a + %xor1 = xor i32 %b, %c + %or2 = or i32 %xor1, %a + %not2 = xor i32 %or2, -1 + %or3 = or i32 %and1, %not2 + call void @use(i32 %xor1) + ret i32 %or3 +} + +define i32 @and_not_or_or_not_or_xor_use5(i32 %a, i32 %b, i32 %c) { +; CHECK-LABEL: @and_not_or_or_not_or_xor_use5( +; CHECK-NEXT: [[OR1:%.*]] = or i32 [[B:%.*]], [[C:%.*]] +; CHECK-NEXT: [[NOT1:%.*]] = xor i32 [[OR1]], -1 +; CHECK-NEXT: [[AND1:%.*]] = and i32 [[NOT1]], [[A:%.*]] +; CHECK-NEXT: [[XOR1:%.*]] = xor i32 [[B]], [[C]] +; CHECK-NEXT: [[OR2:%.*]] = or i32 [[XOR1]], [[A]] +; CHECK-NEXT: [[NOT2:%.*]] = xor i32 [[OR2]], -1 +; CHECK-NEXT: [[OR3:%.*]] = or i32 [[AND1]], [[NOT2]] +; CHECK-NEXT: call void @use(i32 [[OR2]]) +; CHECK-NEXT: ret i32 [[OR3]] +; + %or1 = or i32 %b, %c + %not1 = xor i32 %or1, -1 + %and1 = and i32 %not1, %a + %xor1 = xor i32 %b, %c + %or2 = or i32 %xor1, %a + %not2 = xor i32 %or2, -1 + %or3 = or i32 %and1, %not2 + call void @use(i32 %or2) + ret i32 %or3 +} + +define i32 @and_not_or_or_not_or_xor_use6(i32 %a, i32 %b, i32 %c) { +; CHECK-LABEL: @and_not_or_or_not_or_xor_use6( +; CHECK-NEXT: [[OR1:%.*]] = or i32 [[B:%.*]], [[C:%.*]] +; CHECK-NEXT: [[NOT1:%.*]] = xor i32 [[OR1]], -1 +; CHECK-NEXT: [[AND1:%.*]] = and i32 [[NOT1]], [[A:%.*]] +; CHECK-NEXT: [[XOR1:%.*]] = xor i32 [[B]], [[C]] +; CHECK-NEXT: [[OR2:%.*]] = or i32 [[XOR1]], [[A]] +; CHECK-NEXT: [[NOT2:%.*]] = xor i32 [[OR2]], -1 +; CHECK-NEXT: [[OR3:%.*]] = or i32 [[AND1]], [[NOT2]] +; CHECK-NEXT: call void @use(i32 [[NOT2]]) +; CHECK-NEXT: ret i32 [[OR3]] +; + %or1 = or i32 %b, %c + %not1 = xor i32 %or1, -1 + %and1 = and i32 %not1, %a + %xor1 = xor i32 %b, %c + %or2 = or i32 %xor1, %a + %not2 = xor i32 %or2, -1 + %or3 = or i32 %and1, %not2 + call void @use(i32 %not2) + ret i32 %or3 +} + +; (~a & b & c) | ~(a | b | c) -> ~(a | (b ^ c)) + +define i32 @not_and_and_or_not_or_or(i32 %a, i32 %b, i32 %c) { +; CHECK-LABEL: @not_and_and_or_not_or_or( +; CHECK-NEXT: [[OR1:%.*]] = or i32 [[B:%.*]], [[A:%.*]] +; CHECK-NEXT: [[OR2:%.*]] = or i32 [[OR1]], [[C:%.*]] +; CHECK-NEXT: [[NOT1:%.*]] = xor i32 [[OR2]], -1 +; CHECK-NEXT: [[NOT2:%.*]] = xor i32 [[A]], -1 +; CHECK-NEXT: [[AND1:%.*]] = and i32 [[NOT2]], [[B]] +; CHECK-NEXT: [[AND2:%.*]] = and i32 [[AND1]], [[C]] +; CHECK-NEXT: [[OR3:%.*]] = or i32 [[AND2]], [[NOT1]] +; CHECK-NEXT: ret i32 [[OR3]] +; + %or1 = or i32 %b, %a + %or2 = or i32 %or1, %c + %not1 = xor i32 %or2, -1 + %not2 = xor i32 %a, -1 + %and1 = and i32 %not2, %b + %and2 = and i32 %and1, %c + %or3 = or i32 %and2, %not1 + ret i32 %or3 +} + +define i32 @not_and_and_or_not_or_or_commute1_or(i32 %a, i32 %b, i32 %c) { +; CHECK-LABEL: @not_and_and_or_not_or_or_commute1_or( +; CHECK-NEXT: [[OR1:%.*]] = or i32 [[C:%.*]], [[A:%.*]] +; CHECK-NEXT: [[OR2:%.*]] = or i32 [[OR1]], [[B:%.*]] +; CHECK-NEXT: [[NOT1:%.*]] = xor i32 [[OR2]], -1 +; CHECK-NEXT: [[NOT2:%.*]] = xor i32 [[A]], -1 +; CHECK-NEXT: [[AND1:%.*]] = and i32 [[NOT2]], [[B]] +; CHECK-NEXT: [[AND2:%.*]] = and i32 [[AND1]], [[C]] +; CHECK-NEXT: [[OR3:%.*]] = or i32 [[AND2]], [[NOT1]] +; CHECK-NEXT: ret i32 [[OR3]] +; + %or1 = or i32 %c, %a + %or2 = or i32 %or1, %b + %not1 = xor i32 %or2, -1 + %not2 = xor i32 %a, -1 + %and1 = and i32 %not2, %b + %and2 = and i32 %and1, %c + %or3 = or i32 %and2, %not1 + ret i32 %or3 +} + +define i32 @not_and_and_or_not_or_or_commute2_or(i32 %a, i32 %b, i32 %c) { +; CHECK-LABEL: @not_and_and_or_not_or_or_commute2_or( +; CHECK-NEXT: [[OR1:%.*]] = or i32 [[B:%.*]], [[C:%.*]] +; CHECK-NEXT: [[OR2:%.*]] = or i32 [[OR1]], [[A:%.*]] +; CHECK-NEXT: [[NOT1:%.*]] = xor i32 [[OR2]], -1 +; CHECK-NEXT: [[NOT2:%.*]] = xor i32 [[A]], -1 +; CHECK-NEXT: [[AND1:%.*]] = and i32 [[NOT2]], [[B]] +; CHECK-NEXT: [[AND2:%.*]] = and i32 [[AND1]], [[C]] +; CHECK-NEXT: [[OR3:%.*]] = or i32 [[AND2]], [[NOT1]] +; CHECK-NEXT: ret i32 [[OR3]] +; + %or1 = or i32 %b, %c + %or2 = or i32 %or1, %a + %not1 = xor i32 %or2, -1 + %not2 = xor i32 %a, -1 + %and1 = and i32 %not2, %b + %and2 = and i32 %and1, %c + %or3 = or i32 %and2, %not1 + ret i32 %or3 +} + +define i32 @not_and_and_or_not_or_or_commute1_and(i32 %a, i32 %b, i32 %c) { +; CHECK-LABEL: @not_and_and_or_not_or_or_commute1_and( +; CHECK-NEXT: [[OR1:%.*]] = or i32 [[B:%.*]], [[A:%.*]] +; CHECK-NEXT: [[OR2:%.*]] = or i32 [[OR1]], [[C:%.*]] +; CHECK-NEXT: [[NOT1:%.*]] = xor i32 [[OR2]], -1 +; CHECK-NEXT: [[NOT2:%.*]] = xor i32 [[A]], -1 +; CHECK-NEXT: [[AND1:%.*]] = and i32 [[NOT2]], [[C]] +; CHECK-NEXT: [[AND2:%.*]] = and i32 [[AND1]], [[B]] +; CHECK-NEXT: [[OR3:%.*]] = or i32 [[AND2]], [[NOT1]] +; CHECK-NEXT: ret i32 [[OR3]] +; + %or1 = or i32 %b, %a + %or2 = or i32 %or1, %c + %not1 = xor i32 %or2, -1 + %not2 = xor i32 %a, -1 + %and1 = and i32 %not2, %c + %and2 = and i32 %and1, %b + %or3 = or i32 %and2, %not1 + ret i32 %or3 +} + +define i32 @not_and_and_or_not_or_or_commute2_and(i32 %a, i32 %b, i32 %c) { +; CHECK-LABEL: @not_and_and_or_not_or_or_commute2_and( +; CHECK-NEXT: [[OR1:%.*]] = or i32 [[B:%.*]], [[A:%.*]] +; CHECK-NEXT: [[OR2:%.*]] = or i32 [[OR1]], [[C:%.*]] +; CHECK-NEXT: [[NOT1:%.*]] = xor i32 [[OR2]], -1 +; CHECK-NEXT: [[NOT2:%.*]] = xor i32 [[A]], -1 +; CHECK-NEXT: [[AND1:%.*]] = and i32 [[B]], [[C]] +; CHECK-NEXT: [[AND2:%.*]] = and i32 [[AND1]], [[NOT2]] +; CHECK-NEXT: [[OR3:%.*]] = or i32 [[AND2]], [[NOT1]] +; CHECK-NEXT: ret i32 [[OR3]] +; + %or1 = or i32 %b, %a + %or2 = or i32 %or1, %c + %not1 = xor i32 %or2, -1 + %not2 = xor i32 %a, -1 + %and1 = and i32 %b, %c + %and2 = and i32 %and1, %not2 + %or3 = or i32 %and2, %not1 + ret i32 %or3 +} + +define i32 @not_and_and_or_not_or_or_commute1(i32 %a, i32 %b, i32 %c) { +; CHECK-LABEL: @not_and_and_or_not_or_or_commute1( +; CHECK-NEXT: [[OR1:%.*]] = or i32 [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: [[OR2:%.*]] = or i32 [[OR1]], [[C:%.*]] +; CHECK-NEXT: [[NOT1:%.*]] = xor i32 [[OR2]], -1 +; CHECK-NEXT: [[NOT2:%.*]] = xor i32 [[A]], -1 +; CHECK-NEXT: [[AND1:%.*]] = and i32 [[NOT2]], [[B]] +; CHECK-NEXT: [[AND2:%.*]] = and i32 [[AND1]], [[C]] +; CHECK-NEXT: [[OR3:%.*]] = or i32 [[AND2]], [[NOT1]] +; CHECK-NEXT: ret i32 [[OR3]] +; + %or1 = or i32 %a, %b + %or2 = or i32 %or1, %c + %not1 = xor i32 %or2, -1 + %not2 = xor i32 %a, -1 + %and1 = and i32 %not2, %b + %and2 = and i32 %and1, %c + %or3 = or i32 %and2, %not1 + ret i32 %or3 +} + +define i32 @not_and_and_or_not_or_or_commute2(i32 %a, i32 %b, i32 %c0) { +; CHECK-LABEL: @not_and_and_or_not_or_or_commute2( +; CHECK-NEXT: [[C:%.*]] = sdiv i32 42, [[C0:%.*]] +; CHECK-NEXT: [[OR1:%.*]] = or i32 [[B:%.*]], [[A:%.*]] +; CHECK-NEXT: [[OR2:%.*]] = or i32 [[C]], [[OR1]] +; CHECK-NEXT: [[NOT1:%.*]] = xor i32 [[OR2]], -1 +; CHECK-NEXT: [[NOT2:%.*]] = xor i32 [[A]], -1 +; CHECK-NEXT: [[AND1:%.*]] = and i32 [[NOT2]], [[B]] +; CHECK-NEXT: [[AND2:%.*]] = and i32 [[AND1]], [[C]] +; CHECK-NEXT: [[OR3:%.*]] = or i32 [[AND2]], [[NOT1]] +; CHECK-NEXT: ret i32 [[OR3]] +; + %c = sdiv i32 42, %c0 ; thwart complexity-based canonicalization + %or1 = or i32 %b, %a + %or2 = or i32 %c, %or1 + %not1 = xor i32 %or2, -1 + %not2 = xor i32 %a, -1 + %and1 = and i32 %not2, %b + %and2 = and i32 %and1, %c + %or3 = or i32 %and2, %not1 + ret i32 %or3 +} + +define i32 @not_and_and_or_not_or_or_commute3(i32 %a, i32 %b0, i32 %c) { +; CHECK-LABEL: @not_and_and_or_not_or_or_commute3( +; CHECK-NEXT: [[B:%.*]] = sdiv i32 42, [[B0:%.*]] +; CHECK-NEXT: [[OR1:%.*]] = or i32 [[B]], [[A:%.*]] +; CHECK-NEXT: [[OR2:%.*]] = or i32 [[OR1]], [[C:%.*]] +; CHECK-NEXT: [[NOT1:%.*]] = xor i32 [[OR2]], -1 +; CHECK-NEXT: [[NOT2:%.*]] = xor i32 [[A]], -1 +; CHECK-NEXT: [[AND1:%.*]] = and i32 [[B]], [[NOT2]] +; CHECK-NEXT: [[AND2:%.*]] = and i32 [[AND1]], [[C]] +; CHECK-NEXT: [[OR3:%.*]] = or i32 [[AND2]], [[NOT1]] +; CHECK-NEXT: ret i32 [[OR3]] +; + %b = sdiv i32 42, %b0 ; thwart complexity-based canonicalization + %or1 = or i32 %b, %a + %or2 = or i32 %or1, %c + %not1 = xor i32 %or2, -1 + %not2 = xor i32 %a, -1 + %and1 = and i32 %b, %not2 + %and2 = and i32 %and1, %c + %or3 = or i32 %and2, %not1 + ret i32 %or3 +} + +define i32 @not_and_and_or_not_or_or_commute4(i32 %a, i32 %b, i32 %c0) { +; CHECK-LABEL: @not_and_and_or_not_or_or_commute4( +; CHECK-NEXT: [[C:%.*]] = sdiv i32 42, [[C0:%.*]] +; CHECK-NEXT: [[OR1:%.*]] = or i32 [[B:%.*]], [[A:%.*]] +; CHECK-NEXT: [[OR2:%.*]] = or i32 [[OR1]], [[C]] +; CHECK-NEXT: [[NOT1:%.*]] = xor i32 [[OR2]], -1 +; CHECK-NEXT: [[NOT2:%.*]] = xor i32 [[A]], -1 +; CHECK-NEXT: [[AND1:%.*]] = and i32 [[NOT2]], [[B]] +; CHECK-NEXT: [[AND2:%.*]] = and i32 [[C]], [[AND1]] +; CHECK-NEXT: [[OR3:%.*]] = or i32 [[AND2]], [[NOT1]] +; CHECK-NEXT: ret i32 [[OR3]] +; + %c = sdiv i32 42, %c0 ; thwart complexity-based canonicalization + %or1 = or i32 %b, %a + %or2 = or i32 %or1, %c + %not1 = xor i32 %or2, -1 + %not2 = xor i32 %a, -1 + %and1 = and i32 %not2, %b + %and2 = and i32 %c, %and1 + %or3 = or i32 %and2, %not1 + ret i32 %or3 +} + +define i32 @not_and_and_or_not_or_or_use1(i32 %a, i32 %b, i32 %c) { +; CHECK-LABEL: @not_and_and_or_not_or_or_use1( +; CHECK-NEXT: [[OR1:%.*]] = or i32 [[B:%.*]], [[A:%.*]] +; CHECK-NEXT: [[OR2:%.*]] = or i32 [[OR1]], [[C:%.*]] +; CHECK-NEXT: [[NOT1:%.*]] = xor i32 [[OR2]], -1 +; CHECK-NEXT: [[NOT2:%.*]] = xor i32 [[A]], -1 +; CHECK-NEXT: [[AND1:%.*]] = and i32 [[NOT2]], [[B]] +; CHECK-NEXT: [[AND2:%.*]] = and i32 [[AND1]], [[C]] +; CHECK-NEXT: [[OR3:%.*]] = or i32 [[AND2]], [[NOT1]] +; CHECK-NEXT: call void @use(i32 [[OR1]]) +; CHECK-NEXT: ret i32 [[OR3]] +; + %or1 = or i32 %b, %a + %or2 = or i32 %or1, %c + %not1 = xor i32 %or2, -1 + %not2 = xor i32 %a, -1 + %and1 = and i32 %not2, %b + %and2 = and i32 %and1, %c + %or3 = or i32 %and2, %not1 + call void @use(i32 %or1) + ret i32 %or3 +} + +define i32 @not_and_and_or_not_or_or_use2(i32 %a, i32 %b, i32 %c) { +; CHECK-LABEL: @not_and_and_or_not_or_or_use2( +; CHECK-NEXT: [[OR1:%.*]] = or i32 [[B:%.*]], [[A:%.*]] +; CHECK-NEXT: [[OR2:%.*]] = or i32 [[OR1]], [[C:%.*]] +; CHECK-NEXT: [[NOT1:%.*]] = xor i32 [[OR2]], -1 +; CHECK-NEXT: [[NOT2:%.*]] = xor i32 [[A]], -1 +; CHECK-NEXT: [[AND1:%.*]] = and i32 [[NOT2]], [[B]] +; CHECK-NEXT: [[AND2:%.*]] = and i32 [[AND1]], [[C]] +; CHECK-NEXT: [[OR3:%.*]] = or i32 [[AND2]], [[NOT1]] +; CHECK-NEXT: call void @use(i32 [[OR2]]) +; CHECK-NEXT: ret i32 [[OR3]] +; + %or1 = or i32 %b, %a + %or2 = or i32 %or1, %c + %not1 = xor i32 %or2, -1 + %not2 = xor i32 %a, -1 + %and1 = and i32 %not2, %b + %and2 = and i32 %and1, %c + %or3 = or i32 %and2, %not1 + call void @use(i32 %or2) + ret i32 %or3 +} + +define i32 @not_and_and_or_not_or_or_use3(i32 %a, i32 %b, i32 %c) { +; CHECK-LABEL: @not_and_and_or_not_or_or_use3( +; CHECK-NEXT: [[OR1:%.*]] = or i32 [[B:%.*]], [[A:%.*]] +; CHECK-NEXT: [[OR2:%.*]] = or i32 [[OR1]], [[C:%.*]] +; CHECK-NEXT: [[NOT1:%.*]] = xor i32 [[OR2]], -1 +; CHECK-NEXT: [[NOT2:%.*]] = xor i32 [[A]], -1 +; CHECK-NEXT: [[AND1:%.*]] = and i32 [[NOT2]], [[B]] +; CHECK-NEXT: [[AND2:%.*]] = and i32 [[AND1]], [[C]] +; CHECK-NEXT: [[OR3:%.*]] = or i32 [[AND2]], [[NOT1]] +; CHECK-NEXT: call void @use(i32 [[NOT1]]) +; CHECK-NEXT: ret i32 [[OR3]] +; + %or1 = or i32 %b, %a + %or2 = or i32 %or1, %c + %not1 = xor i32 %or2, -1 + %not2 = xor i32 %a, -1 + %and1 = and i32 %not2, %b + %and2 = and i32 %and1, %c + %or3 = or i32 %and2, %not1 + call void @use(i32 %not1) + ret i32 %or3 +} + +define i32 @not_and_and_or_not_or_or_use4(i32 %a, i32 %b, i32 %c) { +; CHECK-LABEL: @not_and_and_or_not_or_or_use4( +; CHECK-NEXT: [[OR1:%.*]] = or i32 [[B:%.*]], [[A:%.*]] +; CHECK-NEXT: [[OR2:%.*]] = or i32 [[OR1]], [[C:%.*]] +; CHECK-NEXT: [[NOT1:%.*]] = xor i32 [[OR2]], -1 +; CHECK-NEXT: [[NOT2:%.*]] = xor i32 [[A]], -1 +; CHECK-NEXT: [[AND1:%.*]] = and i32 [[NOT2]], [[B]] +; CHECK-NEXT: [[AND2:%.*]] = and i32 [[AND1]], [[C]] +; CHECK-NEXT: [[OR3:%.*]] = or i32 [[AND2]], [[NOT1]] +; CHECK-NEXT: call void @use(i32 [[NOT2]]) +; CHECK-NEXT: ret i32 [[OR3]] +; + %or1 = or i32 %b, %a + %or2 = or i32 %or1, %c + %not1 = xor i32 %or2, -1 + %not2 = xor i32 %a, -1 + %and1 = and i32 %not2, %b + %and2 = and i32 %and1, %c + %or3 = or i32 %and2, %not1 + call void @use(i32 %not2) + ret i32 %or3 +} + +define i32 @not_and_and_or_not_or_or_use5(i32 %a, i32 %b, i32 %c) { +; CHECK-LABEL: @not_and_and_or_not_or_or_use5( +; CHECK-NEXT: [[OR1:%.*]] = or i32 [[B:%.*]], [[A:%.*]] +; CHECK-NEXT: [[OR2:%.*]] = or i32 [[OR1]], [[C:%.*]] +; CHECK-NEXT: [[NOT1:%.*]] = xor i32 [[OR2]], -1 +; CHECK-NEXT: [[NOT2:%.*]] = xor i32 [[A]], -1 +; CHECK-NEXT: [[AND1:%.*]] = and i32 [[NOT2]], [[B]] +; CHECK-NEXT: [[AND2:%.*]] = and i32 [[AND1]], [[C]] +; CHECK-NEXT: [[OR3:%.*]] = or i32 [[AND2]], [[NOT1]] +; CHECK-NEXT: call void @use(i32 [[AND1]]) +; CHECK-NEXT: ret i32 [[OR3]] +; + %or1 = or i32 %b, %a + %or2 = or i32 %or1, %c + %not1 = xor i32 %or2, -1 + %not2 = xor i32 %a, -1 + %and1 = and i32 %not2, %b + %and2 = and i32 %and1, %c + %or3 = or i32 %and2, %not1 + call void @use(i32 %and1) + ret i32 %or3 +} + +define i32 @not_and_and_or_not_or_or_use6(i32 %a, i32 %b, i32 %c) { +; CHECK-LABEL: @not_and_and_or_not_or_or_use6( +; CHECK-NEXT: [[OR1:%.*]] = or i32 [[B:%.*]], [[A:%.*]] +; CHECK-NEXT: [[OR2:%.*]] = or i32 [[OR1]], [[C:%.*]] +; CHECK-NEXT: [[NOT1:%.*]] = xor i32 [[OR2]], -1 +; CHECK-NEXT: [[NOT2:%.*]] = xor i32 [[A]], -1 +; CHECK-NEXT: [[AND1:%.*]] = and i32 [[NOT2]], [[B]] +; CHECK-NEXT: [[AND2:%.*]] = and i32 [[AND1]], [[C]] +; CHECK-NEXT: [[OR3:%.*]] = or i32 [[AND2]], [[NOT1]] +; CHECK-NEXT: call void @use(i32 [[AND2]]) +; CHECK-NEXT: ret i32 [[OR3]] +; + %or1 = or i32 %b, %a + %or2 = or i32 %or1, %c + %not1 = xor i32 %or2, -1 + %not2 = xor i32 %a, -1 + %and1 = and i32 %not2, %b + %and2 = and i32 %and1, %c + %or3 = or i32 %and2, %not1 + call void @use(i32 %and2) + ret i32 %or3 +} + +; (~a & b & c) | ~(a | b) -> (c | ~b) & ~a + +define i32 @not_and_and_or_no_or(i32 %a, i32 %b, i32 %c) { +; CHECK-LABEL: @not_and_and_or_no_or( +; CHECK-NEXT: [[OR1:%.*]] = or i32 [[B:%.*]], [[A:%.*]] +; CHECK-NEXT: [[NOT1:%.*]] = xor i32 [[OR1]], -1 +; CHECK-NEXT: [[NOT2:%.*]] = xor i32 [[A]], -1 +; CHECK-NEXT: [[AND1:%.*]] = and i32 [[NOT2]], [[B]] +; CHECK-NEXT: [[AND2:%.*]] = and i32 [[AND1]], [[C:%.*]] +; CHECK-NEXT: [[OR2:%.*]] = or i32 [[AND2]], [[NOT1]] +; CHECK-NEXT: ret i32 [[OR2]] +; + %or1 = or i32 %b, %a + %not1 = xor i32 %or1, -1 + %not2 = xor i32 %a, -1 + %and1 = and i32 %not2, %b + %and2 = and i32 %and1, %c + %or2 = or i32 %and2, %not1 + ret i32 %or2 +} + +define i32 @not_and_and_or_no_or_commute1_and(i32 %a, i32 %b, i32 %c) { +; CHECK-LABEL: @not_and_and_or_no_or_commute1_and( +; CHECK-NEXT: [[OR1:%.*]] = or i32 [[B:%.*]], [[A:%.*]] +; CHECK-NEXT: [[NOT1:%.*]] = xor i32 [[OR1]], -1 +; CHECK-NEXT: [[NOT2:%.*]] = xor i32 [[A]], -1 +; CHECK-NEXT: [[AND1:%.*]] = and i32 [[C:%.*]], [[B]] +; CHECK-NEXT: [[AND2:%.*]] = and i32 [[AND1]], [[NOT2]] +; CHECK-NEXT: [[OR2:%.*]] = or i32 [[AND2]], [[NOT1]] +; CHECK-NEXT: ret i32 [[OR2]] +; + %or1 = or i32 %b, %a + %not1 = xor i32 %or1, -1 + %not2 = xor i32 %a, -1 + %and1 = and i32 %c, %b + %and2 = and i32 %and1, %not2 + %or2 = or i32 %and2, %not1 + ret i32 %or2 +} + +define i32 @not_and_and_or_no_or_commute2_and(i32 %a, i32 %b, i32 %c) { +; CHECK-LABEL: @not_and_and_or_no_or_commute2_and( +; CHECK-NEXT: [[OR1:%.*]] = or i32 [[B:%.*]], [[A:%.*]] +; CHECK-NEXT: [[NOT1:%.*]] = xor i32 [[OR1]], -1 +; CHECK-NEXT: [[NOT2:%.*]] = xor i32 [[A]], -1 +; CHECK-NEXT: [[AND1:%.*]] = and i32 [[NOT2]], [[C:%.*]] +; CHECK-NEXT: [[AND2:%.*]] = and i32 [[AND1]], [[B]] +; CHECK-NEXT: [[OR2:%.*]] = or i32 [[AND2]], [[NOT1]] +; CHECK-NEXT: ret i32 [[OR2]] +; + %or1 = or i32 %b, %a + %not1 = xor i32 %or1, -1 + %not2 = xor i32 %a, -1 + %and1 = and i32 %not2, %c + %and2 = and i32 %and1, %b + %or2 = or i32 %and2, %not1 + ret i32 %or2 +} + +define i32 @not_and_and_or_no_or_commute1(i32 %a, i32 %b, i32 %c) { +; CHECK-LABEL: @not_and_and_or_no_or_commute1( +; CHECK-NEXT: [[OR1:%.*]] = or i32 [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: [[NOT1:%.*]] = xor i32 [[OR1]], -1 +; CHECK-NEXT: [[NOT2:%.*]] = xor i32 [[A]], -1 +; CHECK-NEXT: [[AND1:%.*]] = and i32 [[NOT2]], [[B]] +; CHECK-NEXT: [[AND2:%.*]] = and i32 [[AND1]], [[C:%.*]] +; CHECK-NEXT: [[OR2:%.*]] = or i32 [[AND2]], [[NOT1]] +; CHECK-NEXT: ret i32 [[OR2]] +; + %or1 = or i32 %a, %b + %not1 = xor i32 %or1, -1 + %not2 = xor i32 %a, -1 + %and1 = and i32 %not2, %b + %and2 = and i32 %and1, %c + %or2 = or i32 %and2, %not1 + ret i32 %or2 +} + +define i32 @not_and_and_or_no_or_commute2(i32 %a, i32 %b0, i32 %c) { +; CHECK-LABEL: @not_and_and_or_no_or_commute2( +; CHECK-NEXT: [[B:%.*]] = sdiv i32 42, [[B0:%.*]] +; CHECK-NEXT: [[OR1:%.*]] = or i32 [[B]], [[A:%.*]] +; CHECK-NEXT: [[NOT1:%.*]] = xor i32 [[OR1]], -1 +; CHECK-NEXT: [[NOT2:%.*]] = xor i32 [[A]], -1 +; CHECK-NEXT: [[AND1:%.*]] = and i32 [[B]], [[NOT2]] +; CHECK-NEXT: [[AND2:%.*]] = and i32 [[AND1]], [[C:%.*]] +; CHECK-NEXT: [[OR2:%.*]] = or i32 [[AND2]], [[NOT1]] +; CHECK-NEXT: ret i32 [[OR2]] +; + %b = sdiv i32 42, %b0 ; thwart complexity-based canonicalization + %or1 = or i32 %b, %a + %not1 = xor i32 %or1, -1 + %not2 = xor i32 %a, -1 + %and1 = and i32 %b, %not2 + %and2 = and i32 %and1, %c + %or2 = or i32 %and2, %not1 + ret i32 %or2 +} + +define i32 @not_and_and_or_no_or_commute3(i32 %a, i32 %b, i32 %c0) { +; CHECK-LABEL: @not_and_and_or_no_or_commute3( +; CHECK-NEXT: [[C:%.*]] = sdiv i32 42, [[C0:%.*]] +; CHECK-NEXT: [[OR1:%.*]] = or i32 [[B:%.*]], [[A:%.*]] +; CHECK-NEXT: [[NOT1:%.*]] = xor i32 [[OR1]], -1 +; CHECK-NEXT: [[NOT2:%.*]] = xor i32 [[A]], -1 +; CHECK-NEXT: [[AND1:%.*]] = and i32 [[NOT2]], [[B]] +; CHECK-NEXT: [[AND2:%.*]] = and i32 [[C]], [[AND1]] +; CHECK-NEXT: [[OR2:%.*]] = or i32 [[AND2]], [[NOT1]] +; CHECK-NEXT: ret i32 [[OR2]] +; + %c = sdiv i32 42, %c0 ; thwart complexity-based canonicalization + %or1 = or i32 %b, %a + %not1 = xor i32 %or1, -1 + %not2 = xor i32 %a, -1 + %and1 = and i32 %not2, %b + %and2 = and i32 %c, %and1 + %or2 = or i32 %and2, %not1 + ret i32 %or2 +} + +define i32 @not_and_and_or_no_or_use1(i32 %a, i32 %b, i32 %c) { +; CHECK-LABEL: @not_and_and_or_no_or_use1( +; CHECK-NEXT: [[OR1:%.*]] = or i32 [[B:%.*]], [[A:%.*]] +; CHECK-NEXT: [[NOT1:%.*]] = xor i32 [[OR1]], -1 +; CHECK-NEXT: [[NOT2:%.*]] = xor i32 [[A]], -1 +; CHECK-NEXT: [[AND1:%.*]] = and i32 [[NOT2]], [[B]] +; CHECK-NEXT: [[AND2:%.*]] = and i32 [[AND1]], [[C:%.*]] +; CHECK-NEXT: [[OR2:%.*]] = or i32 [[AND2]], [[NOT1]] +; CHECK-NEXT: call void @use(i32 [[NOT2]]) +; CHECK-NEXT: ret i32 [[OR2]] +; + %or1 = or i32 %b, %a + %not1 = xor i32 %or1, -1 + %not2 = xor i32 %a, -1 + %and1 = and i32 %not2, %b + %and2 = and i32 %and1, %c + %or2 = or i32 %and2, %not1 + call void @use(i32 %not2) + ret i32 %or2 +} + +define i32 @not_and_and_or_no_or_use2(i32 %a, i32 %b, i32 %c) { +; CHECK-LABEL: @not_and_and_or_no_or_use2( +; CHECK-NEXT: [[OR1:%.*]] = or i32 [[B:%.*]], [[A:%.*]] +; CHECK-NEXT: [[NOT1:%.*]] = xor i32 [[OR1]], -1 +; CHECK-NEXT: [[NOT2:%.*]] = xor i32 [[A]], -1 +; CHECK-NEXT: [[AND1:%.*]] = and i32 [[B]], [[C:%.*]] +; CHECK-NEXT: [[AND2:%.*]] = and i32 [[AND1]], [[NOT2]] +; CHECK-NEXT: [[OR2:%.*]] = or i32 [[AND2]], [[NOT1]] +; CHECK-NEXT: call void @use(i32 [[NOT2]]) +; CHECK-NEXT: ret i32 [[OR2]] +; + %or1 = or i32 %b, %a + %not1 = xor i32 %or1, -1 + %not2 = xor i32 %a, -1 + %and1 = and i32 %b, %c + %and2 = and i32 %and1, %not2 + %or2 = or i32 %and2, %not1 + call void @use(i32 %not2) + ret i32 %or2 +} + +define i32 @not_and_and_or_no_or_use3(i32 %a, i32 %b, i32 %c) { +; CHECK-LABEL: @not_and_and_or_no_or_use3( +; CHECK-NEXT: [[OR1:%.*]] = or i32 [[B:%.*]], [[A:%.*]] +; CHECK-NEXT: [[NOT1:%.*]] = xor i32 [[OR1]], -1 +; CHECK-NEXT: [[NOT2:%.*]] = xor i32 [[A]], -1 +; CHECK-NEXT: [[AND1:%.*]] = and i32 [[NOT2]], [[C:%.*]] +; CHECK-NEXT: [[AND2:%.*]] = and i32 [[AND1]], [[B]] +; CHECK-NEXT: [[OR2:%.*]] = or i32 [[AND2]], [[NOT1]] +; CHECK-NEXT: call void @use(i32 [[NOT2]]) +; CHECK-NEXT: ret i32 [[OR2]] +; + %or1 = or i32 %b, %a + %not1 = xor i32 %or1, -1 + %not2 = xor i32 %a, -1 + %and1 = and i32 %not2, %c + %and2 = and i32 %and1, %b + %or2 = or i32 %and2, %not1 + call void @use(i32 %not2) + ret i32 %or2 +} + +define i32 @not_and_and_or_no_or_use4(i32 %a, i32 %b, i32 %c) { +; CHECK-LABEL: @not_and_and_or_no_or_use4( +; CHECK-NEXT: [[OR1:%.*]] = or i32 [[B:%.*]], [[A:%.*]] +; CHECK-NEXT: [[NOT1:%.*]] = xor i32 [[OR1]], -1 +; CHECK-NEXT: [[NOT2:%.*]] = xor i32 [[A]], -1 +; CHECK-NEXT: [[AND1:%.*]] = and i32 [[NOT2]], [[C:%.*]] +; CHECK-NEXT: [[AND2:%.*]] = and i32 [[AND1]], [[B]] +; CHECK-NEXT: [[OR2:%.*]] = or i32 [[AND2]], [[NOT1]] +; CHECK-NEXT: call void @use(i32 [[NOT2]]) +; CHECK-NEXT: ret i32 [[OR2]] +; + %or1 = or i32 %b, %a + %not1 = xor i32 %or1, -1 + %not2 = xor i32 %a, -1 + %and1 = and i32 %not2, %c + %and2 = and i32 %and1, %b + %or2 = or i32 %and2, %not1 + call void @use(i32 %not2) + ret i32 %or2 +} + +define i32 @not_and_and_or_no_or_use5(i32 %a, i32 %b, i32 %c) { +; CHECK-LABEL: @not_and_and_or_no_or_use5( +; CHECK-NEXT: [[OR1:%.*]] = or i32 [[B:%.*]], [[A:%.*]] +; CHECK-NEXT: [[NOT1:%.*]] = xor i32 [[OR1]], -1 +; CHECK-NEXT: [[NOT2:%.*]] = xor i32 [[A]], -1 +; CHECK-NEXT: [[AND1:%.*]] = and i32 [[NOT2]], [[B]] +; CHECK-NEXT: [[AND2:%.*]] = and i32 [[AND1]], [[C:%.*]] +; CHECK-NEXT: [[OR2:%.*]] = or i32 [[AND2]], [[NOT1]] +; CHECK-NEXT: call void @use(i32 [[OR1]]) +; CHECK-NEXT: ret i32 [[OR2]] +; + %or1 = or i32 %b, %a + %not1 = xor i32 %or1, -1 + %not2 = xor i32 %a, -1 + %and1 = and i32 %not2, %b + %and2 = and i32 %and1, %c + %or2 = or i32 %and2, %not1 + call void @use(i32 %or1) + ret i32 %or2 +} + +define i32 @not_and_and_or_no_or_use6(i32 %a, i32 %b, i32 %c) { +; CHECK-LABEL: @not_and_and_or_no_or_use6( +; CHECK-NEXT: [[OR1:%.*]] = or i32 [[B:%.*]], [[A:%.*]] +; CHECK-NEXT: [[NOT1:%.*]] = xor i32 [[OR1]], -1 +; CHECK-NEXT: [[NOT2:%.*]] = xor i32 [[A]], -1 +; CHECK-NEXT: [[AND1:%.*]] = and i32 [[NOT2]], [[B]] +; CHECK-NEXT: [[AND2:%.*]] = and i32 [[AND1]], [[C:%.*]] +; CHECK-NEXT: [[OR2:%.*]] = or i32 [[AND2]], [[NOT1]] +; CHECK-NEXT: call void @use(i32 [[NOT1]]) +; CHECK-NEXT: ret i32 [[OR2]] +; + %or1 = or i32 %b, %a + %not1 = xor i32 %or1, -1 + %not2 = xor i32 %a, -1 + %and1 = and i32 %not2, %b + %and2 = and i32 %and1, %c + %or2 = or i32 %and2, %not1 + call void @use(i32 %not1) + ret i32 %or2 +} + +define i32 @not_and_and_or_no_or_use7(i32 %a, i32 %b, i32 %c) { +; CHECK-LABEL: @not_and_and_or_no_or_use7( +; CHECK-NEXT: [[OR1:%.*]] = or i32 [[B:%.*]], [[A:%.*]] +; CHECK-NEXT: [[NOT1:%.*]] = xor i32 [[OR1]], -1 +; CHECK-NEXT: [[NOT2:%.*]] = xor i32 [[A]], -1 +; CHECK-NEXT: [[AND1:%.*]] = and i32 [[NOT2]], [[B]] +; CHECK-NEXT: [[AND2:%.*]] = and i32 [[AND1]], [[C:%.*]] +; CHECK-NEXT: [[OR2:%.*]] = or i32 [[AND2]], [[NOT1]] +; CHECK-NEXT: call void @use(i32 [[AND1]]) +; CHECK-NEXT: ret i32 [[OR2]] +; + %or1 = or i32 %b, %a + %not1 = xor i32 %or1, -1 + %not2 = xor i32 %a, -1 + %and1 = and i32 %not2, %b + %and2 = and i32 %and1, %c + %or2 = or i32 %and2, %not1 + call void @use(i32 %and1) + ret i32 %or2 +} + +define i32 @not_and_and_or_no_or_use8(i32 %a, i32 %b, i32 %c) { +; CHECK-LABEL: @not_and_and_or_no_or_use8( +; CHECK-NEXT: [[OR1:%.*]] = or i32 [[B:%.*]], [[A:%.*]] +; CHECK-NEXT: [[NOT1:%.*]] = xor i32 [[OR1]], -1 +; CHECK-NEXT: [[NOT2:%.*]] = xor i32 [[A]], -1 +; CHECK-NEXT: [[AND1:%.*]] = and i32 [[NOT2]], [[B]] +; CHECK-NEXT: [[AND2:%.*]] = and i32 [[AND1]], [[C:%.*]] +; CHECK-NEXT: [[OR2:%.*]] = or i32 [[AND2]], [[NOT1]] +; CHECK-NEXT: call void @use(i32 [[AND2]]) +; CHECK-NEXT: ret i32 [[OR2]] +; + %or1 = or i32 %b, %a + %not1 = xor i32 %or1, -1 + %not2 = xor i32 %a, -1 + %and1 = and i32 %not2, %b + %and2 = and i32 %and1, %c + %or2 = or i32 %and2, %not1 + call void @use(i32 %and2) + ret i32 %or2 +} + +; (~(a | b) & c) | ~(a | (b | c)) -> ~(a | b) +; (~(a | b) & c) | ~(b | (a | c)) -> ~(a | b) + +define i32 @not_or_and_or_not_or_or(i32 %a, i32 %b, i32 %c) { +; CHECK-LABEL: @not_or_and_or_not_or_or( +; CHECK-NEXT: [[OR1:%.*]] = or i32 [[B:%.*]], [[C:%.*]] +; CHECK-NEXT: [[OR2:%.*]] = or i32 [[OR1]], [[A:%.*]] +; CHECK-NEXT: [[NOT1:%.*]] = xor i32 [[OR2]], -1 +; CHECK-NEXT: [[OR3:%.*]] = or i32 [[B]], [[A]] +; CHECK-NEXT: [[NOT2:%.*]] = xor i32 [[OR3]], -1 +; CHECK-NEXT: [[AND2:%.*]] = and i32 [[NOT2]], [[C]] +; CHECK-NEXT: [[OR4:%.*]] = or i32 [[AND2]], [[NOT1]] +; CHECK-NEXT: ret i32 [[OR4]] +; + %or1 = or i32 %b, %c + %or2 = or i32 %or1, %a + %not1 = xor i32 %or2, -1 + %or3 = or i32 %b, %a + %not2 = xor i32 %or3, -1 + %and2 = and i32 %not2, %c + %or4 = or i32 %and2, %not1 + ret i32 %or4 +} + +define i32 @not_or_and_or_not_or_or_commute1(i32 %a, i32 %b, i32 %c) { +; CHECK-LABEL: @not_or_and_or_not_or_or_commute1( +; CHECK-NEXT: [[OR1:%.*]] = or i32 [[A:%.*]], [[C:%.*]] +; CHECK-NEXT: [[OR2:%.*]] = or i32 [[OR1]], [[B:%.*]] +; CHECK-NEXT: [[NOT1:%.*]] = xor i32 [[OR2]], -1 +; CHECK-NEXT: [[OR3:%.*]] = or i32 [[B]], [[A]] +; CHECK-NEXT: [[NOT2:%.*]] = xor i32 [[OR3]], -1 +; CHECK-NEXT: [[AND2:%.*]] = and i32 [[NOT2]], [[C]] +; CHECK-NEXT: [[OR4:%.*]] = or i32 [[AND2]], [[NOT1]] +; CHECK-NEXT: ret i32 [[OR4]] +; + %or1 = or i32 %a, %c + %or2 = or i32 %or1, %b + %not1 = xor i32 %or2, -1 + %or3 = or i32 %b, %a + %not2 = xor i32 %or3, -1 + %and2 = and i32 %not2, %c + %or4 = or i32 %and2, %not1 + ret i32 %or4 +} + +define i32 @not_or_and_or_not_or_or_commute2(i32 %a, i32 %b, i32 %c) { +; CHECK-LABEL: @not_or_and_or_not_or_or_commute2( +; CHECK-NEXT: [[OR1:%.*]] = or i32 [[C:%.*]], [[B:%.*]] +; CHECK-NEXT: [[OR2:%.*]] = or i32 [[OR1]], [[A:%.*]] +; CHECK-NEXT: [[NOT1:%.*]] = xor i32 [[OR2]], -1 +; CHECK-NEXT: [[OR3:%.*]] = or i32 [[B]], [[A]] +; CHECK-NEXT: [[NOT2:%.*]] = xor i32 [[OR3]], -1 +; CHECK-NEXT: [[AND2:%.*]] = and i32 [[NOT2]], [[C]] +; CHECK-NEXT: [[OR4:%.*]] = or i32 [[AND2]], [[NOT1]] +; CHECK-NEXT: ret i32 [[OR4]] +; + %or1 = or i32 %c, %b + %or2 = or i32 %or1, %a + %not1 = xor i32 %or2, -1 + %or3 = or i32 %b, %a + %not2 = xor i32 %or3, -1 + %and2 = and i32 %not2, %c + %or4 = or i32 %and2, %not1 + ret i32 %or4 +} + +define i32 @not_or_and_or_not_or_or_commute3(i32 %a0, i32 %b, i32 %c) { +; CHECK-LABEL: @not_or_and_or_not_or_or_commute3( +; CHECK-NEXT: [[A:%.*]] = sdiv i32 42, [[A0:%.*]] +; CHECK-NEXT: [[OR1:%.*]] = or i32 [[B:%.*]], [[C:%.*]] +; CHECK-NEXT: [[OR2:%.*]] = or i32 [[A]], [[OR1]] +; CHECK-NEXT: [[NOT1:%.*]] = xor i32 [[OR2]], -1 +; CHECK-NEXT: [[OR3:%.*]] = or i32 [[A]], [[B]] +; CHECK-NEXT: [[NOT2:%.*]] = xor i32 [[OR3]], -1 +; CHECK-NEXT: [[AND2:%.*]] = and i32 [[NOT2]], [[C]] +; CHECK-NEXT: [[OR4:%.*]] = or i32 [[AND2]], [[NOT1]] +; CHECK-NEXT: ret i32 [[OR4]] +; + %a = sdiv i32 42, %a0 ; thwart complexity-based canonicalization + %or1 = or i32 %b, %c + %or2 = or i32 %a, %or1 + %not1 = xor i32 %or2, -1 + %or3 = or i32 %a, %b + %not2 = xor i32 %or3, -1 + %and2 = and i32 %not2, %c + %or4 = or i32 %and2, %not1 + ret i32 %or4 +} + +define i32 @not_or_and_or_not_or_or_commute4(i32 %a, i32 %b0, i32 %c) { +; CHECK-LABEL: @not_or_and_or_not_or_or_commute4( +; CHECK-NEXT: [[B:%.*]] = sdiv i32 42, [[B0:%.*]] +; CHECK-NEXT: [[OR1:%.*]] = or i32 [[A:%.*]], [[C:%.*]] +; CHECK-NEXT: [[OR2:%.*]] = or i32 [[B]], [[OR1]] +; CHECK-NEXT: [[NOT1:%.*]] = xor i32 [[OR2]], -1 +; CHECK-NEXT: [[OR3:%.*]] = or i32 [[B]], [[A]] +; CHECK-NEXT: [[NOT2:%.*]] = xor i32 [[OR3]], -1 +; CHECK-NEXT: [[AND2:%.*]] = and i32 [[NOT2]], [[C]] +; CHECK-NEXT: [[OR4:%.*]] = or i32 [[AND2]], [[NOT1]] +; CHECK-NEXT: ret i32 [[OR4]] +; + %b = sdiv i32 42, %b0 ; thwart complexity-based canonicalization + %or1 = or i32 %a, %c + %or2 = or i32 %b, %or1 + %not1 = xor i32 %or2, -1 + %or3 = or i32 %b, %a + %not2 = xor i32 %or3, -1 + %and2 = and i32 %not2, %c + %or4 = or i32 %and2, %not1 + ret i32 %or4 +} + +define i32 @not_or_and_or_not_or_or_commute5(i32 %a, i32 %b, i32 %c0) { +; CHECK-LABEL: @not_or_and_or_not_or_or_commute5( +; CHECK-NEXT: [[C:%.*]] = sdiv i32 42, [[C0:%.*]] +; CHECK-NEXT: [[OR1:%.*]] = or i32 [[C]], [[A:%.*]] +; CHECK-NEXT: [[OR2:%.*]] = or i32 [[OR1]], [[B:%.*]] +; CHECK-NEXT: [[NOT1:%.*]] = xor i32 [[OR2]], -1 +; CHECK-NEXT: [[OR3:%.*]] = or i32 [[B]], [[A]] +; CHECK-NEXT: [[NOT2:%.*]] = xor i32 [[OR3]], -1 +; CHECK-NEXT: [[AND2:%.*]] = and i32 [[C]], [[NOT2]] +; CHECK-NEXT: [[OR4:%.*]] = or i32 [[AND2]], [[NOT1]] +; CHECK-NEXT: ret i32 [[OR4]] +; + %c = sdiv i32 42, %c0 ; thwart complexity-based canonicalization + %or1 = or i32 %c, %a + %or2 = or i32 %or1, %b + %not1 = xor i32 %or2, -1 + %or3 = or i32 %b, %a + %not2 = xor i32 %or3, -1 + %and2 = and i32 %c, %not2 + %or4 = or i32 %and2, %not1 + ret i32 %or4 +} + +define i32 @not_or_and_or_not_or_or_use1(i32 %a, i32 %b, i32 %c) { +; CHECK-LABEL: @not_or_and_or_not_or_or_use1( +; CHECK-NEXT: [[OR1:%.*]] = or i32 [[B:%.*]], [[C:%.*]] +; CHECK-NEXT: [[OR2:%.*]] = or i32 [[OR1]], [[A:%.*]] +; CHECK-NEXT: [[NOT1:%.*]] = xor i32 [[OR2]], -1 +; CHECK-NEXT: [[OR3:%.*]] = or i32 [[B]], [[A]] +; CHECK-NEXT: [[NOT2:%.*]] = xor i32 [[OR3]], -1 +; CHECK-NEXT: [[AND2:%.*]] = and i32 [[NOT2]], [[C]] +; CHECK-NEXT: [[OR4:%.*]] = or i32 [[AND2]], [[NOT1]] +; CHECK-NEXT: call void @use(i32 [[NOT1]]) +; CHECK-NEXT: ret i32 [[OR4]] +; + %or1 = or i32 %b, %c + %or2 = or i32 %or1, %a + %not1 = xor i32 %or2, -1 + %or3 = or i32 %b, %a + %not2 = xor i32 %or3, -1 + %and2 = and i32 %not2, %c + %or4 = or i32 %and2, %not1 + call void @use(i32 %not1) + ret i32 %or4 +} + +define i32 @not_or_and_or_not_or_or_use2(i32 %a, i32 %b, i32 %c) { +; CHECK-LABEL: @not_or_and_or_not_or_or_use2( +; CHECK-NEXT: [[OR1:%.*]] = or i32 [[A:%.*]], [[C:%.*]] +; CHECK-NEXT: [[OR2:%.*]] = or i32 [[OR1]], [[B:%.*]] +; CHECK-NEXT: [[NOT1:%.*]] = xor i32 [[OR2]], -1 +; CHECK-NEXT: [[OR3:%.*]] = or i32 [[B]], [[A]] +; CHECK-NEXT: [[NOT2:%.*]] = xor i32 [[OR3]], -1 +; CHECK-NEXT: [[AND2:%.*]] = and i32 [[NOT2]], [[C]] +; CHECK-NEXT: [[OR4:%.*]] = or i32 [[AND2]], [[NOT1]] +; CHECK-NEXT: call void @use(i32 [[NOT1]]) +; CHECK-NEXT: ret i32 [[OR4]] +; + %or1 = or i32 %a, %c + %or2 = or i32 %or1, %b + %not1 = xor i32 %or2, -1 + %or3 = or i32 %b, %a + %not2 = xor i32 %or3, -1 + %and2 = and i32 %not2, %c + %or4 = or i32 %and2, %not1 + call void @use(i32 %not1) + ret i32 %or4 +} diff --git a/llvm/test/Transforms/InstCombine/canonicalize-signed-truncation-check.ll b/llvm/test/Transforms/InstCombine/canonicalize-signed-truncation-check.ll index 90d19be434f25..8e8152a520a6d 100644 --- a/llvm/test/Transforms/InstCombine/canonicalize-signed-truncation-check.ll +++ b/llvm/test/Transforms/InstCombine/canonicalize-signed-truncation-check.ll @@ -15,8 +15,8 @@ define i1 @p0(i8 %x) { ; CHECK-LABEL: @p0( -; CHECK-NEXT: [[TMP1:%.*]] = add i8 [[X:%.*]], 4 -; CHECK-NEXT: [[TMP2:%.*]] = icmp ugt i8 [[TMP1]], 7 +; CHECK-NEXT: [[TMP1:%.*]] = add i8 [[X:%.*]], -4 +; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i8 [[TMP1]], -8 ; CHECK-NEXT: ret i1 [[TMP2]] ; %tmp0 = shl i8 %x, 5 @@ -44,8 +44,8 @@ define i1 @pb(i65 %x) { define <2 x i1> @p1_vec_splat(<2 x i8> %x) { ; CHECK-LABEL: @p1_vec_splat( -; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i8> [[X:%.*]], -; CHECK-NEXT: [[TMP2:%.*]] = icmp ugt <2 x i8> [[TMP1]], +; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[TMP2:%.*]] = icmp ult <2 x i8> [[TMP1]], ; CHECK-NEXT: ret <2 x i1> [[TMP2]] ; %tmp0 = shl <2 x i8> %x, @@ -115,8 +115,8 @@ declare i8 @gen8() define i1 @c0() { ; CHECK-LABEL: @c0( ; CHECK-NEXT: [[X:%.*]] = call i8 @gen8() -; CHECK-NEXT: [[TMP1:%.*]] = add i8 [[X]], 4 -; CHECK-NEXT: [[TMP2:%.*]] = icmp ugt i8 [[TMP1]], 7 +; CHECK-NEXT: [[TMP1:%.*]] = add i8 [[X]], -4 +; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i8 [[TMP1]], -8 ; CHECK-NEXT: ret i1 [[TMP2]] ; %x = call i8 @gen8() @@ -136,8 +136,8 @@ define i1 @n_oneuse0(i8 %x) { ; CHECK-LABEL: @n_oneuse0( ; CHECK-NEXT: [[TMP0:%.*]] = shl i8 [[X:%.*]], 5 ; CHECK-NEXT: call void @use8(i8 [[TMP0]]) -; CHECK-NEXT: [[TMP1:%.*]] = add i8 [[X]], 4 -; CHECK-NEXT: [[TMP2:%.*]] = icmp ugt i8 [[TMP1]], 7 +; CHECK-NEXT: [[TMP1:%.*]] = add i8 [[X]], -4 +; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i8 [[TMP1]], -8 ; CHECK-NEXT: ret i1 [[TMP2]] ; %tmp0 = shl i8 %x, 5 diff --git a/llvm/test/Transforms/InstCombine/icmp-add.ll b/llvm/test/Transforms/InstCombine/icmp-add.ll index 1750b5685c50d..eb2f1945dc4d5 100644 --- a/llvm/test/Transforms/InstCombine/icmp-add.ll +++ b/llvm/test/Transforms/InstCombine/icmp-add.ll @@ -363,8 +363,8 @@ define i1 @ult_add_nonuw(i8 %in) { define i1 @uge_add_nonuw(i32 %in) { ; CHECK-LABEL: @uge_add_nonuw( -; CHECK-NEXT: [[A6:%.*]] = add i32 [[IN:%.*]], 3 -; CHECK-NEXT: [[A18:%.*]] = icmp ugt i32 [[A6]], 11 +; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[IN:%.*]], -9 +; CHECK-NEXT: [[A18:%.*]] = icmp ult i32 [[TMP1]], -12 ; CHECK-NEXT: ret i1 [[A18]] ; %a6 = add i32 %in, 3 @@ -785,8 +785,8 @@ define <2 x i1> @ugt_offset_splat(<2 x i5> %a) { define i1 @ugt_wrong_offset(i8 %a) { ; CHECK-LABEL: @ugt_wrong_offset( -; CHECK-NEXT: [[T:%.*]] = add i8 [[A:%.*]], 123 -; CHECK-NEXT: [[OV:%.*]] = icmp ugt i8 [[T]], -5 +; CHECK-NEXT: [[TMP1:%.*]] = add i8 [[A:%.*]], 127 +; CHECK-NEXT: [[OV:%.*]] = icmp ult i8 [[TMP1]], 4 ; CHECK-NEXT: ret i1 [[OV]] ; %t = add i8 %a, 123 diff --git a/llvm/test/Transforms/InstCombine/icmp-custom-dl.ll b/llvm/test/Transforms/InstCombine/icmp-custom-dl.ll index 6e76525bad350..5accd3e8594a3 100644 --- a/llvm/test/Transforms/InstCombine/icmp-custom-dl.ll +++ b/llvm/test/Transforms/InstCombine/icmp-custom-dl.ll @@ -159,9 +159,13 @@ define i1 @test61_as1(i8 addrspace(1)* %foo, i16 %i, i16 %j) { ; Don't transform non-inbounds GEPs. } +; Negative test: GEP inbounds may cross sign boundary. define i1 @test62(i8* %a) { ; CHECK-LABEL: @test62( -; CHECK-NEXT: ret i1 true +; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, i8* [[A:%.*]], i32 1 +; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, i8* [[A]], i32 10 +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i8* [[ARRAYIDX1]], [[ARRAYIDX2]] +; CHECK-NEXT: ret i1 [[CMP]] ; %arrayidx1 = getelementptr inbounds i8, i8* %a, i64 1 %arrayidx2 = getelementptr inbounds i8, i8* %a, i64 10 @@ -171,7 +175,10 @@ define i1 @test62(i8* %a) { define i1 @test62_as1(i8 addrspace(1)* %a) { ; CHECK-LABEL: @test62_as1( -; CHECK-NEXT: ret i1 true +; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, i8 addrspace(1)* [[A:%.*]], i16 1 +; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, i8 addrspace(1)* [[A]], i16 10 +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i8 addrspace(1)* [[ARRAYIDX1]], [[ARRAYIDX2]] +; CHECK-NEXT: ret i1 [[CMP]] ; %arrayidx1 = getelementptr inbounds i8, i8 addrspace(1)* %a, i64 1 %arrayidx2 = getelementptr inbounds i8, i8 addrspace(1)* %a, i64 10 diff --git a/llvm/test/Transforms/InstCombine/icmp-sub.ll b/llvm/test/Transforms/InstCombine/icmp-sub.ll index 17a6800a7a79b..40447559bbb80 100644 --- a/llvm/test/Transforms/InstCombine/icmp-sub.ll +++ b/llvm/test/Transforms/InstCombine/icmp-sub.ll @@ -57,8 +57,8 @@ define i1 @test_negative_nuw_and_signed_pred(i64 %x) { define i1 @test_negative_nsw_and_unsigned_pred(i64 %x) { ; CHECK-LABEL: @test_negative_nsw_and_unsigned_pred( -; CHECK-NEXT: [[NOTSUB:%.*]] = add nsw i64 [[X:%.*]], -11 -; CHECK-NEXT: [[Z:%.*]] = icmp ugt i64 [[NOTSUB]], -4 +; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[X:%.*]], -8 +; CHECK-NEXT: [[Z:%.*]] = icmp ult i64 [[TMP1]], 3 ; CHECK-NEXT: ret i1 [[Z]] ; %y = sub nsw i64 10, %x diff --git a/llvm/test/Transforms/InstCombine/icmp-topbitssame.ll b/llvm/test/Transforms/InstCombine/icmp-topbitssame.ll index 80344391ae7f3..61107cd15d6de 100644 --- a/llvm/test/Transforms/InstCombine/icmp-topbitssame.ll +++ b/llvm/test/Transforms/InstCombine/icmp-topbitssame.ll @@ -35,8 +35,8 @@ define i1 @testi16i8_com(i16 %add) { define i1 @testi16i8_ne(i16 %add) { ; CHECK-LABEL: @testi16i8_ne( -; CHECK-NEXT: [[TMP1:%.*]] = add i16 [[ADD:%.*]], 128 -; CHECK-NEXT: [[CMP_NOT_I:%.*]] = icmp ugt i16 [[TMP1]], 255 +; CHECK-NEXT: [[TMP1:%.*]] = add i16 [[ADD:%.*]], -128 +; CHECK-NEXT: [[CMP_NOT_I:%.*]] = icmp ult i16 [[TMP1]], -256 ; CHECK-NEXT: ret i1 [[CMP_NOT_I]] ; %sh = lshr i16 %add, 8 @@ -49,8 +49,8 @@ define i1 @testi16i8_ne(i16 %add) { define i1 @testi16i8_ne_com(i16 %add) { ; CHECK-LABEL: @testi16i8_ne_com( -; CHECK-NEXT: [[TMP1:%.*]] = add i16 [[ADD:%.*]], 128 -; CHECK-NEXT: [[CMP_NOT_I:%.*]] = icmp ugt i16 [[TMP1]], 255 +; CHECK-NEXT: [[TMP1:%.*]] = add i16 [[ADD:%.*]], -128 +; CHECK-NEXT: [[CMP_NOT_I:%.*]] = icmp ult i16 [[TMP1]], -256 ; CHECK-NEXT: ret i1 [[CMP_NOT_I]] ; %sh = lshr i16 %add, 8 @@ -77,8 +77,8 @@ define i1 @testi64i32(i64 %add) { define i1 @testi64i32_ne(i64 %add) { ; CHECK-LABEL: @testi64i32_ne( -; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[ADD:%.*]], 2147483648 -; CHECK-NEXT: [[CMP_NOT_I:%.*]] = icmp ugt i64 [[TMP1]], 4294967295 +; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[ADD:%.*]], -2147483648 +; CHECK-NEXT: [[CMP_NOT_I:%.*]] = icmp ult i64 [[TMP1]], -4294967296 ; CHECK-NEXT: ret i1 [[CMP_NOT_I]] ; %sh = lshr i64 %add, 32 diff --git a/llvm/test/Transforms/InstCombine/icmp-trunc.ll b/llvm/test/Transforms/InstCombine/icmp-trunc.ll index 4c41892d5cd8c..02e30c620461f 100644 --- a/llvm/test/Transforms/InstCombine/icmp-trunc.ll +++ b/llvm/test/Transforms/InstCombine/icmp-trunc.ll @@ -91,8 +91,6 @@ define <2 x i1> @ult_2044_splat(<2 x i16> %x) { ret <2 x i1> %r } -; negative test - need high-bit-mask constant - define i1 @ult_96(i32 %x) { ; CHECK-LABEL: @ult_96( ; CHECK-NEXT: [[T:%.*]] = trunc i32 [[X:%.*]] to i8 @@ -104,8 +102,6 @@ define i1 @ult_96(i32 %x) { ret i1 %r } -; negative test - no extra use allowed - define i1 @ult_192_use(i32 %x) { ; CHECK-LABEL: @ult_192_use( ; CHECK-NEXT: [[T:%.*]] = trunc i32 [[X:%.*]] to i8 @@ -119,6 +115,52 @@ define i1 @ult_192_use(i32 %x) { ret i1 %r } +define i1 @ugt_3(i32 %x) { +; CHECK-LABEL: @ugt_3( +; CHECK-NEXT: [[T:%.*]] = trunc i32 [[X:%.*]] to i8 +; CHECK-NEXT: [[R:%.*]] = icmp ugt i8 [[T]], 3 +; CHECK-NEXT: ret i1 [[R]] +; + %t = trunc i32 %x to i8 + %r = icmp ugt i8 %t, 3 + ret i1 %r +} + +define <2 x i1> @ugt_7_splat(<2 x i16> %x) { +; CHECK-LABEL: @ugt_7_splat( +; CHECK-NEXT: [[T:%.*]] = trunc <2 x i16> [[X:%.*]] to <2 x i11> +; CHECK-NEXT: [[R:%.*]] = icmp ugt <2 x i11> [[T]], +; CHECK-NEXT: ret <2 x i1> [[R]] +; + %t = trunc <2 x i16> %x to <2 x i11> + %r = icmp ugt <2 x i11> %t, + ret <2 x i1> %r +} + +define i1 @ugt_4(i32 %x) { +; CHECK-LABEL: @ugt_4( +; CHECK-NEXT: [[T:%.*]] = trunc i32 [[X:%.*]] to i8 +; CHECK-NEXT: [[R:%.*]] = icmp ugt i8 [[T]], 4 +; CHECK-NEXT: ret i1 [[R]] +; + %t = trunc i32 %x to i8 + %r = icmp ugt i8 %t, 4 + ret i1 %r +} + +define i1 @ugt_3_use(i32 %x) { +; CHECK-LABEL: @ugt_3_use( +; CHECK-NEXT: [[T:%.*]] = trunc i32 [[X:%.*]] to i8 +; CHECK-NEXT: call void @use(i8 [[T]]) +; CHECK-NEXT: [[R:%.*]] = icmp ugt i8 [[T]], 3 +; CHECK-NEXT: ret i1 [[R]] +; + %t = trunc i32 %x to i8 + call void @use(i8 %t) + %r = icmp ugt i8 %t, 3 + ret i1 %r +} + define i1 @ugt_253(i32 %x) { ; CHECK-LABEL: @ugt_253( ; CHECK-NEXT: [[T:%.*]] = trunc i32 [[X:%.*]] to i8 @@ -130,21 +172,19 @@ define i1 @ugt_253(i32 %x) { ret i1 %r } -define <2 x i1> @ugt_239_splat(<2 x i16> %x) { -; CHECK-LABEL: @ugt_239_splat( +define <2 x i1> @ugt_2043_splat(<2 x i16> %x) { +; CHECK-LABEL: @ugt_2043_splat( ; CHECK-NEXT: [[T:%.*]] = trunc <2 x i16> [[X:%.*]] to <2 x i11> -; CHECK-NEXT: [[R:%.*]] = icmp ugt <2 x i11> [[T]], +; CHECK-NEXT: [[R:%.*]] = icmp ugt <2 x i11> [[T]], ; CHECK-NEXT: ret <2 x i1> [[R]] ; %t = trunc <2 x i16> %x to <2 x i11> - %r = icmp ugt <2 x i11> %t, + %r = icmp ugt <2 x i11> %t, ; 0b111_1111_101 ret <2 x i1> %r } -; negative test - need inverted power-of-2 constant - -define i1 @ugt_3(i32 %x) { -; CHECK-LABEL: @ugt_3( +define i1 @ugt_252(i32 %x) { +; CHECK-LABEL: @ugt_252( ; CHECK-NEXT: [[T:%.*]] = trunc i32 [[X:%.*]] to i8 ; CHECK-NEXT: [[R:%.*]] = icmp ugt i8 [[T]], -4 ; CHECK-NEXT: ret i1 [[R]] @@ -154,10 +194,8 @@ define i1 @ugt_3(i32 %x) { ret i1 %r } -; negative test - no extra use allowed - -define i1 @ugt_2_use(i32 %x) { -; CHECK-LABEL: @ugt_2_use( +define i1 @ugt_253_use(i32 %x) { +; CHECK-LABEL: @ugt_253_use( ; CHECK-NEXT: [[T:%.*]] = trunc i32 [[X:%.*]] to i8 ; CHECK-NEXT: call void @use(i8 [[T]]) ; CHECK-NEXT: [[R:%.*]] = icmp ugt i8 [[T]], -3 @@ -168,3 +206,95 @@ define i1 @ugt_2_use(i32 %x) { %r = icmp ugt i8 %t, 253 ret i1 %r } + +define i1 @slt_0(i32 %x) { +; CHECK-LABEL: @slt_0( +; CHECK-NEXT: [[T:%.*]] = trunc i32 [[X:%.*]] to i8 +; CHECK-NEXT: [[R:%.*]] = icmp slt i8 [[T]], 0 +; CHECK-NEXT: ret i1 [[R]] +; + %t = trunc i32 %x to i8 + %r = icmp slt i8 %t, 0 + ret i1 %r +} + +define <2 x i1> @slt_0_splat(<2 x i16> %x) { +; CHECK-LABEL: @slt_0_splat( +; CHECK-NEXT: [[T:%.*]] = trunc <2 x i16> [[X:%.*]] to <2 x i11> +; CHECK-NEXT: [[R:%.*]] = icmp slt <2 x i11> [[T]], zeroinitializer +; CHECK-NEXT: ret <2 x i1> [[R]] +; + %t = trunc <2 x i16> %x to <2 x i11> + %r = icmp slt <2 x i11> %t, zeroinitializer + ret <2 x i1> %r +} + +define i1 @slt_1(i32 %x) { +; CHECK-LABEL: @slt_1( +; CHECK-NEXT: [[T:%.*]] = trunc i32 [[X:%.*]] to i8 +; CHECK-NEXT: [[R:%.*]] = icmp slt i8 [[T]], 1 +; CHECK-NEXT: ret i1 [[R]] +; + %t = trunc i32 %x to i8 + %r = icmp slt i8 %t, 1 + ret i1 %r +} + +define i1 @slt_0_use(i32 %x) { +; CHECK-LABEL: @slt_0_use( +; CHECK-NEXT: [[T:%.*]] = trunc i32 [[X:%.*]] to i8 +; CHECK-NEXT: call void @use(i8 [[T]]) +; CHECK-NEXT: [[R:%.*]] = icmp slt i8 [[T]], 0 +; CHECK-NEXT: ret i1 [[R]] +; + %t = trunc i32 %x to i8 + call void @use(i8 %t) + %r = icmp slt i8 %t, 0 + ret i1 %r +} + +define i1 @sgt_n1(i32 %x) { +; CHECK-LABEL: @sgt_n1( +; CHECK-NEXT: [[T:%.*]] = trunc i32 [[X:%.*]] to i8 +; CHECK-NEXT: [[R:%.*]] = icmp sgt i8 [[T]], -1 +; CHECK-NEXT: ret i1 [[R]] +; + %t = trunc i32 %x to i8 + %r = icmp sgt i8 %t, -1 + ret i1 %r +} + +define <2 x i1> @sgt_n1_splat(<2 x i16> %x) { +; CHECK-LABEL: @sgt_n1_splat( +; CHECK-NEXT: [[T:%.*]] = trunc <2 x i16> [[X:%.*]] to <2 x i11> +; CHECK-NEXT: [[R:%.*]] = icmp sgt <2 x i11> [[T]], +; CHECK-NEXT: ret <2 x i1> [[R]] +; + %t = trunc <2 x i16> %x to <2 x i11> + %r = icmp sgt <2 x i11> %t, + ret <2 x i1> %r +} + +define i1 @sgt_0(i32 %x) { +; CHECK-LABEL: @sgt_0( +; CHECK-NEXT: [[T:%.*]] = trunc i32 [[X:%.*]] to i8 +; CHECK-NEXT: [[R:%.*]] = icmp sgt i8 [[T]], 0 +; CHECK-NEXT: ret i1 [[R]] +; + %t = trunc i32 %x to i8 + %r = icmp sgt i8 %t, 0 + ret i1 %r +} + +define i1 @sgt_n1_use(i32 %x) { +; CHECK-LABEL: @sgt_n1_use( +; CHECK-NEXT: [[T:%.*]] = trunc i32 [[X:%.*]] to i8 +; CHECK-NEXT: call void @use(i8 [[T]]) +; CHECK-NEXT: [[R:%.*]] = icmp sgt i8 [[T]], -1 +; CHECK-NEXT: ret i1 [[R]] +; + %t = trunc i32 %x to i8 + call void @use(i8 %t) + %r = icmp sgt i8 %t, -1 + ret i1 %r +} diff --git a/llvm/test/Transforms/InstCombine/icmp.ll b/llvm/test/Transforms/InstCombine/icmp.ll index 3122743eee065..dc7282cd1ad10 100644 --- a/llvm/test/Transforms/InstCombine/icmp.ll +++ b/llvm/test/Transforms/InstCombine/icmp.ll @@ -1129,9 +1129,13 @@ define void @test58() { } declare i32 @test58_d(i64) +; Negative test: GEP inbounds may cross sign boundary. define i1 @test62(i8* %a) { ; CHECK-LABEL: @test62( -; CHECK-NEXT: ret i1 true +; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, i8* [[A:%.*]], i64 1 +; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, i8* [[A]], i64 10 +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i8* [[ARRAYIDX1]], [[ARRAYIDX2]] +; CHECK-NEXT: ret i1 [[CMP]] ; %arrayidx1 = getelementptr inbounds i8, i8* %a, i64 1 %arrayidx2 = getelementptr inbounds i8, i8* %a, i64 10 @@ -1141,7 +1145,10 @@ define i1 @test62(i8* %a) { define i1 @test62_as1(i8 addrspace(1)* %a) { ; CHECK-LABEL: @test62_as1( -; CHECK-NEXT: ret i1 true +; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, i8 addrspace(1)* [[A:%.*]], i16 1 +; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, i8 addrspace(1)* [[A]], i16 10 +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i8 addrspace(1)* [[ARRAYIDX1]], [[ARRAYIDX2]] +; CHECK-NEXT: ret i1 [[CMP]] ; %arrayidx1 = getelementptr inbounds i8, i8 addrspace(1)* %a, i64 1 %arrayidx2 = getelementptr inbounds i8, i8 addrspace(1)* %a, i64 10 diff --git a/llvm/test/Transforms/InstCombine/insert-extract-shuffle.ll b/llvm/test/Transforms/InstCombine/insert-extract-shuffle.ll index ec47af3ce2e1f..b84a885886584 100644 --- a/llvm/test/Transforms/InstCombine/insert-extract-shuffle.ll +++ b/llvm/test/Transforms/InstCombine/insert-extract-shuffle.ll @@ -696,7 +696,7 @@ define <5 x float> @insert_undemanded_element_unequal_length_op0(<4 x float> %x, ; CHECK-LABEL: @insert_undemanded_element_unequal_length_op0( ; CHECK-NEXT: [[INS:%.*]] = insertelement <4 x float> [[X:%.*]], float 4.200000e+01, i32 3 ; CHECK-NEXT: call void @use(<4 x float> [[INS]]) -; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x float> [[INS]], <4 x float> [[Y:%.*]], <5 x i32> +; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x float> [[X]], <4 x float> [[Y:%.*]], <5 x i32> ; CHECK-NEXT: ret <5 x float> [[S]] ; %ins = insertelement <4 x float> %x, float 42.0, i32 3 @@ -709,7 +709,7 @@ define <5 x float> @insert_undemanded_element_unequal_length_op1(<4 x float> %x, ; CHECK-LABEL: @insert_undemanded_element_unequal_length_op1( ; CHECK-NEXT: [[INS:%.*]] = insertelement <4 x float> [[X:%.*]], float 4.200000e+01, i32 3 ; CHECK-NEXT: call void @use(<4 x float> [[INS]]) -; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x float> [[Y:%.*]], <4 x float> [[INS]], <5 x i32> +; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x float> [[Y:%.*]], <4 x float> [[X]], <5 x i32> ; CHECK-NEXT: ret <5 x float> [[S]] ; %ins = insertelement <4 x float> %x, float 42.0, i32 3 diff --git a/llvm/test/Transforms/InstCombine/load-cmp.ll b/llvm/test/Transforms/InstCombine/load-cmp.ll index 92bf377d016c8..0176093e31f47 100644 --- a/llvm/test/Transforms/InstCombine/load-cmp.ll +++ b/llvm/test/Transforms/InstCombine/load-cmp.ll @@ -145,8 +145,8 @@ define i1 @test6(i32 %X) { define i1 @test7(i32 %X) { ; CHECK-LABEL: @test7( -; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[X:%.*]], -1 -; CHECK-NEXT: [[R:%.*]] = icmp ugt i32 [[TMP1]], 2 +; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[X:%.*]], -4 +; CHECK-NEXT: [[R:%.*]] = icmp ult i32 [[TMP1]], -3 ; CHECK-NEXT: ret i1 [[R]] ; %P = getelementptr inbounds [6 x double], [6 x double]* @GD, i32 0, i32 %X diff --git a/llvm/test/Transforms/InstCombine/logical-select-inseltpoison.ll b/llvm/test/Transforms/InstCombine/logical-select-inseltpoison.ll index ec2f47d240b86..43e97cca05bab 100644 --- a/llvm/test/Transforms/InstCombine/logical-select-inseltpoison.ll +++ b/llvm/test/Transforms/InstCombine/logical-select-inseltpoison.ll @@ -19,8 +19,8 @@ define i32 @foo(i32 %a, i32 %b, i32 %c, i32 %d) { define i32 @bar(i32 %a, i32 %b, i32 %c, i32 %d) { ; CHECK-LABEL: @bar( -; CHECK-NEXT: [[E_NOT:%.*]] = icmp slt i32 [[A:%.*]], [[B:%.*]] -; CHECK-NEXT: [[TMP1:%.*]] = select i1 [[E_NOT]], i32 [[C:%.*]], i32 [[D:%.*]] +; CHECK-NEXT: [[E:%.*]] = icmp slt i32 [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = select i1 [[E]], i32 [[C:%.*]], i32 [[D:%.*]] ; CHECK-NEXT: ret i32 [[TMP1]] ; %e = icmp slt i32 %a, %b @@ -472,9 +472,9 @@ define <4 x i1> @vec_of_bools(<4 x i1> %a, <4 x i1> %b, <4 x i1> %c) { define i4 @vec_of_casted_bools(i4 %a, i4 %b, <4 x i1> %c) { ; CHECK-LABEL: @vec_of_casted_bools( -; CHECK-NEXT: [[TMP1:%.*]] = bitcast i4 [[A:%.*]] to <4 x i1> -; CHECK-NEXT: [[TMP2:%.*]] = bitcast i4 [[B:%.*]] to <4 x i1> -; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[C:%.*]], <4 x i1> [[TMP2]], <4 x i1> [[TMP1]] +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i4 [[B:%.*]] to <4 x i1> +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i4 [[A:%.*]] to <4 x i1> +; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[C:%.*]], <4 x i1> [[TMP1]], <4 x i1> [[TMP2]] ; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i1> [[TMP3]] to i4 ; CHECK-NEXT: ret i4 [[TMP4]] ; @@ -582,8 +582,8 @@ define <4 x i32> @vec_sel_xor_multi_use(<4 x i32> %a, <4 x i32> %b, <4 x i1> %c) define i32 @allSignBits(i32 %cond, i32 %tval, i32 %fval) { ; CHECK-LABEL: @allSignBits( -; CHECK-NEXT: [[DOTNOT:%.*]] = icmp slt i32 [[COND:%.*]], 0 -; CHECK-NEXT: [[TMP1:%.*]] = select i1 [[DOTNOT]], i32 [[TVAL:%.*]], i32 [[FVAL:%.*]] +; CHECK-NEXT: [[DOTNOT:%.*]] = icmp sgt i32 [[COND:%.*]], -1 +; CHECK-NEXT: [[TMP1:%.*]] = select i1 [[DOTNOT]], i32 [[FVAL:%.*]], i32 [[TVAL:%.*]] ; CHECK-NEXT: ret i32 [[TMP1]] ; %bitmask = ashr i32 %cond, 31 diff --git a/llvm/test/Transforms/InstCombine/logical-select.ll b/llvm/test/Transforms/InstCombine/logical-select.ll index 2d22f565473ae..610eb20eaf3e2 100644 --- a/llvm/test/Transforms/InstCombine/logical-select.ll +++ b/llvm/test/Transforms/InstCombine/logical-select.ll @@ -19,8 +19,8 @@ define i32 @foo(i32 %a, i32 %b, i32 %c, i32 %d) { define i32 @bar(i32 %a, i32 %b, i32 %c, i32 %d) { ; CHECK-LABEL: @bar( -; CHECK-NEXT: [[E_NOT:%.*]] = icmp slt i32 [[A:%.*]], [[B:%.*]] -; CHECK-NEXT: [[TMP1:%.*]] = select i1 [[E_NOT]], i32 [[C:%.*]], i32 [[D:%.*]] +; CHECK-NEXT: [[E:%.*]] = icmp slt i32 [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = select i1 [[E]], i32 [[C:%.*]], i32 [[D:%.*]] ; CHECK-NEXT: ret i32 [[TMP1]] ; %e = icmp slt i32 %a, %b @@ -472,9 +472,9 @@ define <4 x i1> @vec_of_bools(<4 x i1> %a, <4 x i1> %b, <4 x i1> %c) { define i4 @vec_of_casted_bools(i4 %a, i4 %b, <4 x i1> %c) { ; CHECK-LABEL: @vec_of_casted_bools( -; CHECK-NEXT: [[TMP1:%.*]] = bitcast i4 [[A:%.*]] to <4 x i1> -; CHECK-NEXT: [[TMP2:%.*]] = bitcast i4 [[B:%.*]] to <4 x i1> -; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[C:%.*]], <4 x i1> [[TMP2]], <4 x i1> [[TMP1]] +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i4 [[B:%.*]] to <4 x i1> +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i4 [[A:%.*]] to <4 x i1> +; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[C:%.*]], <4 x i1> [[TMP1]], <4 x i1> [[TMP2]] ; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i1> [[TMP3]] to i4 ; CHECK-NEXT: ret i4 [[TMP4]] ; @@ -582,8 +582,8 @@ define <4 x i32> @vec_sel_xor_multi_use(<4 x i32> %a, <4 x i32> %b, <4 x i1> %c) define i32 @allSignBits(i32 %cond, i32 %tval, i32 %fval) { ; CHECK-LABEL: @allSignBits( -; CHECK-NEXT: [[DOTNOT:%.*]] = icmp slt i32 [[COND:%.*]], 0 -; CHECK-NEXT: [[TMP1:%.*]] = select i1 [[DOTNOT]], i32 [[TVAL:%.*]], i32 [[FVAL:%.*]] +; CHECK-NEXT: [[DOTNOT:%.*]] = icmp sgt i32 [[COND:%.*]], -1 +; CHECK-NEXT: [[TMP1:%.*]] = select i1 [[DOTNOT]], i32 [[FVAL:%.*]], i32 [[TVAL:%.*]] ; CHECK-NEXT: ret i32 [[TMP1]] ; %bitmask = ashr i32 %cond, 31 @@ -682,3 +682,88 @@ define <4 x i32> @computesignbits_through_two_input_shuffle(<4 x i32> %x, <4 x i ret <4 x i32> %sel } +define <2 x i64> @bitcast_vec_cond(<16 x i1> %cond, <2 x i64> %c, <2 x i64> %d) { +; CHECK-LABEL: @bitcast_vec_cond( +; CHECK-NEXT: [[S:%.*]] = sext <16 x i1> [[COND:%.*]] to <16 x i8> +; CHECK-NEXT: [[T9:%.*]] = bitcast <16 x i8> [[S]] to <2 x i64> +; CHECK-NEXT: [[NOTT9:%.*]] = xor <2 x i64> [[T9]], +; CHECK-NEXT: [[T11:%.*]] = and <2 x i64> [[NOTT9]], [[C:%.*]] +; CHECK-NEXT: [[T12:%.*]] = and <2 x i64> [[T9]], [[D:%.*]] +; CHECK-NEXT: [[R:%.*]] = or <2 x i64> [[T11]], [[T12]] +; CHECK-NEXT: ret <2 x i64> [[R]] +; + %s = sext <16 x i1> %cond to <16 x i8> + %t9 = bitcast <16 x i8> %s to <2 x i64> + %nott9 = xor <2 x i64> %t9, + %t11 = and <2 x i64> %nott9, %c + %t12 = and <2 x i64> %t9, %d + %r = or <2 x i64> %t11, %t12 + ret <2 x i64> %r +} + +define <8 x i3> @bitcast_vec_cond_commute1(<3 x i1> %cond, <8 x i3> %pc, <8 x i3> %d) { +; CHECK-LABEL: @bitcast_vec_cond_commute1( +; CHECK-NEXT: [[C:%.*]] = mul <8 x i3> [[PC:%.*]], [[PC]] +; CHECK-NEXT: [[S:%.*]] = sext <3 x i1> [[COND:%.*]] to <3 x i8> +; CHECK-NEXT: [[T9:%.*]] = bitcast <3 x i8> [[S]] to <8 x i3> +; CHECK-NEXT: [[NOTT9:%.*]] = xor <8 x i3> [[T9]], +; CHECK-NEXT: [[T11:%.*]] = and <8 x i3> [[C]], [[NOTT9]] +; CHECK-NEXT: [[T12:%.*]] = and <8 x i3> [[T9]], [[D:%.*]] +; CHECK-NEXT: [[R:%.*]] = or <8 x i3> [[T11]], [[T12]] +; CHECK-NEXT: ret <8 x i3> [[R]] +; + %c = mul <8 x i3> %pc, %pc ; thwart complexity-based canonicalization + %s = sext <3 x i1> %cond to <3 x i8> + %t9 = bitcast <3 x i8> %s to <8 x i3> + %nott9 = xor <8 x i3> %t9, + %t11 = and <8 x i3> %c, %nott9 + %t12 = and <8 x i3> %t9, %d + %r = or <8 x i3> %t11, %t12 + ret <8 x i3> %r +} + +define <2 x i16> @bitcast_vec_cond_commute2(<4 x i1> %cond, <2 x i16> %pc, <2 x i16> %pd) { +; CHECK-LABEL: @bitcast_vec_cond_commute2( +; CHECK-NEXT: [[C:%.*]] = mul <2 x i16> [[PC:%.*]], [[PC]] +; CHECK-NEXT: [[D:%.*]] = mul <2 x i16> [[PD:%.*]], [[PD]] +; CHECK-NEXT: [[S:%.*]] = sext <4 x i1> [[COND:%.*]] to <4 x i8> +; CHECK-NEXT: [[T9:%.*]] = bitcast <4 x i8> [[S]] to <2 x i16> +; CHECK-NEXT: [[NOTT9:%.*]] = xor <2 x i16> [[T9]], +; CHECK-NEXT: [[T11:%.*]] = and <2 x i16> [[C]], [[NOTT9]] +; CHECK-NEXT: [[T12:%.*]] = and <2 x i16> [[D]], [[T9]] +; CHECK-NEXT: [[R:%.*]] = or <2 x i16> [[T11]], [[T12]] +; CHECK-NEXT: ret <2 x i16> [[R]] +; + %c = mul <2 x i16> %pc, %pc ; thwart complexity-based canonicalization + %d = mul <2 x i16> %pd, %pd ; thwart complexity-based canonicalization + %s = sext <4 x i1> %cond to <4 x i8> + %t9 = bitcast <4 x i8> %s to <2 x i16> + %nott9 = xor <2 x i16> %t9, + %t11 = and <2 x i16> %c, %nott9 + %t12 = and <2 x i16> %d, %t9 + %r = or <2 x i16> %t11, %t12 + ret <2 x i16> %r +} + +define <2 x i16> @bitcast_vec_cond_commute3(<4 x i8> %cond, <2 x i16> %pc, <2 x i16> %pd) { +; CHECK-LABEL: @bitcast_vec_cond_commute3( +; CHECK-NEXT: [[C:%.*]] = mul <2 x i16> [[PC:%.*]], [[PC]] +; CHECK-NEXT: [[D:%.*]] = mul <2 x i16> [[PD:%.*]], [[PD]] +; CHECK-NEXT: [[S:%.*]] = ashr <4 x i8> [[COND:%.*]], +; CHECK-NEXT: [[T9:%.*]] = bitcast <4 x i8> [[S]] to <2 x i16> +; CHECK-NEXT: [[NOTT9:%.*]] = xor <2 x i16> [[T9]], +; CHECK-NEXT: [[T11:%.*]] = and <2 x i16> [[C]], [[NOTT9]] +; CHECK-NEXT: [[T12:%.*]] = and <2 x i16> [[D]], [[T9]] +; CHECK-NEXT: [[R:%.*]] = or <2 x i16> [[T11]], [[T12]] +; CHECK-NEXT: ret <2 x i16> [[R]] +; + %c = mul <2 x i16> %pc, %pc ; thwart complexity-based canonicalization + %d = mul <2 x i16> %pd, %pd ; thwart complexity-based canonicalization + %s = ashr <4 x i8> %cond, + %t9 = bitcast <4 x i8> %s to <2 x i16> + %nott9 = xor <2 x i16> %t9, + %t11 = and <2 x i16> %c, %nott9 + %t12 = and <2 x i16> %d, %t9 + %r = or <2 x i16> %t11, %t12 + ret <2 x i16> %r +} diff --git a/llvm/test/Transforms/InstCombine/minmax-fold.ll b/llvm/test/Transforms/InstCombine/minmax-fold.ll index 03500298dbf7e..82c3ba8c3c29c 100644 --- a/llvm/test/Transforms/InstCombine/minmax-fold.ll +++ b/llvm/test/Transforms/InstCombine/minmax-fold.ll @@ -1120,6 +1120,10 @@ define i32 @add_umax_wrong_pred(i32 %x) { ; Negative test +; Without the nuw that would allow pushing the add through the umax, the +; add + icmp ugt combination can be interpreted as a range check, and would +; normally be canonicalized to use ult instead. However, this is not done when +; used as part of a umax to avoid breaking the SPF pattern. define i32 @add_umax_wrong_wrap(i32 %x) { ; CHECK-LABEL: @add_umax_wrong_wrap( ; CHECK-NEXT: [[A:%.*]] = add nsw i32 [[X:%.*]], 15 diff --git a/llvm/test/Transforms/InstCombine/minmax-intrinsics.ll b/llvm/test/Transforms/InstCombine/minmax-intrinsics.ll index 7b76c3ad74226..6c55c23750100 100644 --- a/llvm/test/Transforms/InstCombine/minmax-intrinsics.ll +++ b/llvm/test/Transforms/InstCombine/minmax-intrinsics.ll @@ -2129,3 +2129,26 @@ define <3 x i8> @umax_vector_splat_undef(<3 x i8> %x) { %r = call <3 x i8> @llvm.umax.v3i8(<3 x i8> %a, <3 x i8> ) ret <3 x i8> %r } + +define <3 x i8> @umax_sub_vec(<3 x i8> %x, <3 x i8> %y) { +; CHECK-LABEL: @umax_sub_vec( +; CHECK-NEXT: [[TMP1:%.*]] = call <3 x i8> @llvm.usub.sat.v3i8(<3 x i8> [[X:%.*]], <3 x i8> [[Y:%.*]]) +; CHECK-NEXT: ret <3 x i8> [[TMP1]] +; + %u = call <3 x i8> @llvm.umax.v3i8(<3 x i8> %x, <3 x i8> %y) + %r = sub <3 x i8> %u, %y + ret <3 x i8> %r +} + +define i8 @umax_sub_use(i8 %x, i8 %y) { +; CHECK-LABEL: @umax_sub_use( +; CHECK-NEXT: [[U:%.*]] = call i8 @llvm.umax.i8(i8 [[X:%.*]], i8 [[Y:%.*]]) +; CHECK-NEXT: call void @use(i8 [[U]]) +; CHECK-NEXT: [[R:%.*]] = sub i8 [[U]], [[Y]] +; CHECK-NEXT: ret i8 [[R]] +; + %u = call i8 @llvm.umax.i8(i8 %x, i8 %y) + call void @use(i8 %u) + %r = sub i8 %u, %y + ret i8 %r +} diff --git a/llvm/test/Transforms/InstCombine/or.ll b/llvm/test/Transforms/InstCombine/or.ll index fcc6a0fbac6fd..9ed333325819d 100644 --- a/llvm/test/Transforms/InstCombine/or.ll +++ b/llvm/test/Transforms/InstCombine/or.ll @@ -99,9 +99,9 @@ define i32 @test17(i32 %A) { define i1 @test18(i32 %A) { ; CHECK-LABEL: @test18( -; CHECK-NEXT: [[A_OFF:%.*]] = add i32 [[A:%.*]], -50 -; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt i32 [[A_OFF]], 49 -; CHECK-NEXT: ret i1 [[TMP1]] +; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[A:%.*]], -100 +; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i32 [[TMP1]], -50 +; CHECK-NEXT: ret i1 [[TMP2]] ; %B = icmp sge i32 %A, 100 %C = icmp slt i32 %A, 50 @@ -111,9 +111,9 @@ define i1 @test18(i32 %A) { define i1 @test18_logical(i32 %A) { ; CHECK-LABEL: @test18_logical( -; CHECK-NEXT: [[A_OFF:%.*]] = add i32 [[A:%.*]], -50 -; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt i32 [[A_OFF]], 49 -; CHECK-NEXT: ret i1 [[TMP1]] +; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[A:%.*]], -100 +; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i32 [[TMP1]], -50 +; CHECK-NEXT: ret i1 [[TMP2]] ; %B = icmp sge i32 %A, 100 %C = icmp slt i32 %A, 50 @@ -341,9 +341,9 @@ define i32 @test30(i32 %A) { ; CHECK-NEXT: [[E:%.*]] = or i32 [[D]], 32962 ; CHECK-NEXT: ret i32 [[E]] ; - %B = or i32 %A, 32962 - %C = and i32 %A, -65536 - %D = and i32 %B, 40186 + %B = or i32 %A, 32962 ; 0b1000_0000_1100_0010 + %C = and i32 %A, -65536 ; 0xffff0000 + %D = and i32 %B, 40186 ; 0b1001_1100_1111_1010 %E = or i32 %D, %C ret i32 %E } diff --git a/llvm/test/Transforms/InstCombine/overflow.ll b/llvm/test/Transforms/InstCombine/overflow.ll index 6205a02776cce..153522ef6704b 100644 --- a/llvm/test/Transforms/InstCombine/overflow.ll +++ b/llvm/test/Transforms/InstCombine/overflow.ll @@ -11,7 +11,7 @@ define i32 @test1(i32 %a, i32 %b) nounwind ssp { ; CHECK-NEXT: [[TMP0:%.*]] = extractvalue { i32, i1 } [[SADD]], 1 ; CHECK-NEXT: br i1 [[TMP0]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] ; CHECK: if.then: -; CHECK-NEXT: tail call void @throwAnExceptionOrWhatever() #2 +; CHECK-NEXT: tail call void @throwAnExceptionOrWhatever() #[[ATTR2:[0-9]+]] ; CHECK-NEXT: br label [[IF_END]] ; CHECK: if.end: ; CHECK-NEXT: [[SADD_RESULT:%.*]] = extractvalue { i32, i1 } [[SADD]], 0 @@ -49,7 +49,7 @@ define i32 @test2(i32 %a, i32 %b, i64* %P) nounwind ssp { ; CHECK-NEXT: [[TMP0:%.*]] = icmp ugt i64 [[ADD_OFF]], 4294967295 ; CHECK-NEXT: br i1 [[TMP0]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] ; CHECK: if.then: -; CHECK-NEXT: tail call void @throwAnExceptionOrWhatever() #2 +; CHECK-NEXT: tail call void @throwAnExceptionOrWhatever() #[[ATTR2]] ; CHECK-NEXT: br label [[IF_END]] ; CHECK: if.end: ; CHECK-NEXT: [[CONV9:%.*]] = trunc i64 [[ADD]] to i32 @@ -82,11 +82,11 @@ define i64 @test3(i32 %a, i32 %b) nounwind ssp { ; CHECK-NEXT: [[CONV:%.*]] = sext i32 [[A:%.*]] to i64 ; CHECK-NEXT: [[CONV2:%.*]] = sext i32 [[B:%.*]] to i64 ; CHECK-NEXT: [[ADD:%.*]] = add nsw i64 [[CONV2]], [[CONV]] -; CHECK-NEXT: [[ADD_OFF:%.*]] = add nsw i64 [[ADD]], 2147483648 -; CHECK-NEXT: [[TMP0:%.*]] = icmp ugt i64 [[ADD_OFF]], 4294967295 -; CHECK-NEXT: br i1 [[TMP0]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] +; CHECK-NEXT: [[TMP0:%.*]] = add nsw i64 [[ADD]], -2147483648 +; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i64 [[TMP0]], -4294967296 +; CHECK-NEXT: br i1 [[TMP1]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] ; CHECK: if.then: -; CHECK-NEXT: tail call void @throwAnExceptionOrWhatever() #2 +; CHECK-NEXT: tail call void @throwAnExceptionOrWhatever() #[[ATTR2]] ; CHECK-NEXT: br label [[IF_END]] ; CHECK: if.end: ; CHECK-NEXT: ret i64 [[ADD]] @@ -116,7 +116,7 @@ define zeroext i8 @test4(i8 signext %a, i8 signext %b) nounwind ssp { ; CHECK-NEXT: [[CMP:%.*]] = extractvalue { i8, i1 } [[SADD]], 1 ; CHECK-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] ; CHECK: if.then: -; CHECK-NEXT: tail call void @throwAnExceptionOrWhatever() #2 +; CHECK-NEXT: tail call void @throwAnExceptionOrWhatever() #[[ATTR2]] ; CHECK-NEXT: unreachable ; CHECK: if.end: ; CHECK-NEXT: [[SADD_RESULT:%.*]] = extractvalue { i8, i1 } [[SADD]], 0 @@ -146,11 +146,11 @@ define i32 @test8(i64 %a, i64 %b) nounwind ssp { ; CHECK-LABEL: @test8( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[ADD:%.*]] = add i64 [[A:%.*]], [[B:%.*]] -; CHECK-NEXT: [[ADD_OFF:%.*]] = add i64 [[ADD]], 2147483648 -; CHECK-NEXT: [[TMP0:%.*]] = icmp ugt i64 [[ADD_OFF]], 4294967295 -; CHECK-NEXT: br i1 [[TMP0]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] +; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[ADD]], -2147483648 +; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i64 [[TMP0]], -4294967296 +; CHECK-NEXT: br i1 [[TMP1]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] ; CHECK: if.then: -; CHECK-NEXT: tail call void @throwAnExceptionOrWhatever() #2 +; CHECK-NEXT: tail call void @throwAnExceptionOrWhatever() #[[ATTR2]] ; CHECK-NEXT: br label [[IF_END]] ; CHECK: if.end: ; CHECK-NEXT: [[CONV9:%.*]] = trunc i64 [[ADD]] to i32 diff --git a/llvm/test/Transforms/InstCombine/sadd_sat.ll b/llvm/test/Transforms/InstCombine/sadd_sat.ll index b9a4771f6eeb4..bf30fdee07cc1 100644 --- a/llvm/test/Transforms/InstCombine/sadd_sat.ll +++ b/llvm/test/Transforms/InstCombine/sadd_sat.ll @@ -695,6 +695,191 @@ entry: ret i64 %spec.store.select8 } +define i32 @ashrA(i64 %a, i32 %b) { +; CHECK-LABEL: @ashrA( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = lshr i64 [[A:%.*]], 32 +; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32 +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.sadd.sat.i32(i32 [[TMP1]], i32 [[B:%.*]]) +; CHECK-NEXT: ret i32 [[TMP2]] +; +entry: + %conv = ashr i64 %a, 32 + %conv1 = sext i32 %b to i64 + %add = add i64 %conv1, %conv + %spec.store.select = call i64 @llvm.smin.i64(i64 %add, i64 2147483647) + %spec.store.select8 = call i64 @llvm.smax.i64(i64 %spec.store.select, i64 -2147483648) + %conv7 = trunc i64 %spec.store.select8 to i32 + ret i32 %conv7 +} + +define i32 @ashrB(i32 %a, i64 %b) { +; CHECK-LABEL: @ashrB( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = lshr i64 [[B:%.*]], 32 +; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32 +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.sadd.sat.i32(i32 [[TMP1]], i32 [[A:%.*]]) +; CHECK-NEXT: ret i32 [[TMP2]] +; +entry: + %conv = sext i32 %a to i64 + %conv1 = ashr i64 %b, 32 + %add = add i64 %conv1, %conv + %0 = icmp sgt i64 %add, -2147483648 + %spec.store.select = select i1 %0, i64 %add, i64 -2147483648 + %1 = icmp slt i64 %spec.store.select, 2147483647 + %spec.store.select8 = select i1 %1, i64 %spec.store.select, i64 2147483647 + %conv7 = trunc i64 %spec.store.select8 to i32 + ret i32 %conv7 +} + +define i32 @ashrAB(i64 %a, i64 %b) { +; CHECK-LABEL: @ashrAB( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = lshr i64 [[A:%.*]], 32 +; CHECK-NEXT: [[TMP1:%.*]] = lshr i64 [[B:%.*]], 32 +; CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[TMP1]] to i32 +; CHECK-NEXT: [[TMP3:%.*]] = trunc i64 [[TMP0]] to i32 +; CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.sadd.sat.i32(i32 [[TMP2]], i32 [[TMP3]]) +; CHECK-NEXT: ret i32 [[TMP4]] +; +entry: + %conv = ashr i64 %a, 32 + %conv1 = ashr i64 %b, 32 + %add = add i64 %conv1, %conv + %0 = icmp sgt i64 %add, -2147483648 + %spec.store.select = select i1 %0, i64 %add, i64 -2147483648 + %1 = icmp slt i64 %spec.store.select, 2147483647 + %spec.store.select8 = select i1 %1, i64 %spec.store.select, i64 2147483647 + %conv7 = trunc i64 %spec.store.select8 to i32 + ret i32 %conv7 +} + +define i32 @ashrA31(i64 %a, i32 %b) { +; CHECK-LABEL: @ashrA31( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CONV:%.*]] = ashr i64 [[A:%.*]], 31 +; CHECK-NEXT: [[CONV1:%.*]] = sext i32 [[B:%.*]] to i64 +; CHECK-NEXT: [[ADD:%.*]] = add nsw i64 [[CONV]], [[CONV1]] +; CHECK-NEXT: [[TMP0:%.*]] = icmp sgt i64 [[ADD]], -2147483648 +; CHECK-NEXT: [[SPEC_STORE_SELECT:%.*]] = select i1 [[TMP0]], i64 [[ADD]], i64 -2147483648 +; CHECK-NEXT: [[TMP1:%.*]] = icmp slt i64 [[SPEC_STORE_SELECT]], 2147483647 +; CHECK-NEXT: [[SPEC_STORE_SELECT8:%.*]] = select i1 [[TMP1]], i64 [[SPEC_STORE_SELECT]], i64 2147483647 +; CHECK-NEXT: [[CONV7:%.*]] = trunc i64 [[SPEC_STORE_SELECT8]] to i32 +; CHECK-NEXT: ret i32 [[CONV7]] +; +entry: + %conv = ashr i64 %a, 31 + %conv1 = sext i32 %b to i64 + %add = add i64 %conv1, %conv + %0 = icmp sgt i64 %add, -2147483648 + %spec.store.select = select i1 %0, i64 %add, i64 -2147483648 + %1 = icmp slt i64 %spec.store.select, 2147483647 + %spec.store.select8 = select i1 %1, i64 %spec.store.select, i64 2147483647 + %conv7 = trunc i64 %spec.store.select8 to i32 + ret i32 %conv7 +} + +define i32 @ashrA33(i64 %a, i32 %b) { +; CHECK-LABEL: @ashrA33( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CONV:%.*]] = ashr i64 [[A:%.*]], 33 +; CHECK-NEXT: [[TMP0:%.*]] = trunc i64 [[CONV]] to i32 +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.sadd.sat.i32(i32 [[TMP0]], i32 [[B:%.*]]) +; CHECK-NEXT: ret i32 [[TMP1]] +; +entry: + %conv = ashr i64 %a, 33 + %conv1 = sext i32 %b to i64 + %add = add i64 %conv1, %conv + %0 = icmp sgt i64 %add, -2147483648 + %spec.store.select = select i1 %0, i64 %add, i64 -2147483648 + %1 = icmp slt i64 %spec.store.select, 2147483647 + %spec.store.select8 = select i1 %1, i64 %spec.store.select, i64 2147483647 + %conv7 = trunc i64 %spec.store.select8 to i32 + ret i32 %conv7 +} + +define <2 x i8> @ashrv2i8(<2 x i16> %a, <2 x i8> %b) { +; CHECK-LABEL: @ashrv2i8( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CONV:%.*]] = ashr <2 x i16> [[A:%.*]], +; CHECK-NEXT: [[CONV1:%.*]] = sext <2 x i8> [[B:%.*]] to <2 x i16> +; CHECK-NEXT: [[ADD:%.*]] = add <2 x i16> [[CONV]], [[CONV1]] +; CHECK-NEXT: [[TMP0:%.*]] = icmp sgt <2 x i16> [[ADD]], +; CHECK-NEXT: [[SPEC_STORE_SELECT:%.*]] = select <2 x i1> [[TMP0]], <2 x i16> [[ADD]], <2 x i16> +; CHECK-NEXT: [[TMP1:%.*]] = icmp slt <2 x i16> [[SPEC_STORE_SELECT]], +; CHECK-NEXT: [[SPEC_STORE_SELECT8:%.*]] = select <2 x i1> [[TMP1]], <2 x i16> [[SPEC_STORE_SELECT]], <2 x i16> +; CHECK-NEXT: [[CONV7:%.*]] = trunc <2 x i16> [[SPEC_STORE_SELECT8]] to <2 x i8> +; CHECK-NEXT: ret <2 x i8> [[CONV7]] +; +entry: + %conv = ashr <2 x i16> %a, + %conv1 = sext <2 x i8> %b to <2 x i16> + %add = add <2 x i16> %conv1, %conv + %0 = icmp sgt <2 x i16> %add, + %spec.store.select = select <2 x i1> %0, <2 x i16> %add, <2 x i16> + %1 = icmp slt <2 x i16> %spec.store.select, + %spec.store.select8 = select <2 x i1> %1, <2 x i16> %spec.store.select, <2 x i16> + %conv7 = trunc <2 x i16> %spec.store.select8 to <2 x i8> + ret <2 x i8> %conv7 +} + +define <2 x i8> @ashrv2i8_s(<2 x i16> %a, <2 x i8> %b) { +; CHECK-LABEL: @ashrv2i8_s( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = lshr <2 x i16> [[A:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = trunc <2 x i16> [[TMP0]] to <2 x i8> +; CHECK-NEXT: [[TMP2:%.*]] = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> [[TMP1]], <2 x i8> [[B:%.*]]) +; CHECK-NEXT: ret <2 x i8> [[TMP2]] +; +entry: + %conv = ashr <2 x i16> %a, + %conv1 = sext <2 x i8> %b to <2 x i16> + %add = add <2 x i16> %conv1, %conv + %0 = icmp sgt <2 x i16> %add, + %spec.store.select = select <2 x i1> %0, <2 x i16> %add, <2 x i16> + %1 = icmp slt <2 x i16> %spec.store.select, + %spec.store.select8 = select <2 x i1> %1, <2 x i16> %spec.store.select, <2 x i16> + %conv7 = trunc <2 x i16> %spec.store.select8 to <2 x i8> + ret <2 x i8> %conv7 +} + +define i16 @or(i8 %X, i16 %Y) { +; CHECK-LABEL: @or( +; CHECK-NEXT: [[TMP1:%.*]] = trunc i16 [[Y:%.*]] to i8 +; CHECK-NEXT: [[TMP2:%.*]] = or i8 [[TMP1]], -16 +; CHECK-NEXT: [[TMP3:%.*]] = call i8 @llvm.ssub.sat.i8(i8 [[X:%.*]], i8 [[TMP2]]) +; CHECK-NEXT: [[L12:%.*]] = sext i8 [[TMP3]] to i16 +; CHECK-NEXT: ret i16 [[L12]] +; + %conv10 = sext i8 %X to i16 + %conv14 = or i16 %Y, 65520 + %sub = sub nsw i16 %conv10, %conv14 + %l9 = icmp sgt i16 %sub, -128 + %l10 = select i1 %l9, i16 %sub, i16 -128 + %l11 = icmp slt i16 %l10, 127 + %l12 = select i1 %l11, i16 %l10, i16 127 + ret i16 %l12 +} + +define i16 @const(i8 %X) { +; CHECK-LABEL: @const( +; CHECK-NEXT: [[CONV10:%.*]] = sext i8 [[X:%.*]] to i16 +; CHECK-NEXT: [[TMP1:%.*]] = icmp slt i16 [[CONV10]], 117 +; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i16 [[CONV10]], i16 117 +; CHECK-NEXT: [[L12:%.*]] = add nsw i16 [[TMP2]], 10 +; CHECK-NEXT: ret i16 [[L12]] +; + %conv10 = sext i8 %X to i16 + %sub = add i16 %conv10, 10 + %l9 = icmp sgt i16 %sub, -128 + %l10 = select i1 %l9, i16 %sub, i16 -128 + %l11 = icmp slt i16 %l10, 127 + %l12 = select i1 %l11, i16 %l10, i16 127 + ret i16 %l12 +} + declare void @use64(i64) declare i64 @llvm.smin.i64(i64, i64) declare i64 @llvm.smax.i64(i64, i64) diff --git a/llvm/test/Transforms/InstCombine/smulo.ll b/llvm/test/Transforms/InstCombine/smulo.ll index c909ed1d45ae3..42ca6d7f2b5a3 100644 --- a/llvm/test/Transforms/InstCombine/smulo.ll +++ b/llvm/test/Transforms/InstCombine/smulo.ll @@ -46,8 +46,8 @@ define i1 @test_constant2(i8 %a) { define i1 @test_constant3(i8 %a) { ; CHECK-LABEL: @test_constant3( -; CHECK-NEXT: [[TMP1:%.*]] = add i8 [[A:%.*]], 42 -; CHECK-NEXT: [[OVERFLOW:%.*]] = icmp ugt i8 [[TMP1]], 84 +; CHECK-NEXT: [[TMP1:%.*]] = add i8 [[A:%.*]], -43 +; CHECK-NEXT: [[OVERFLOW:%.*]] = icmp ult i8 [[TMP1]], -85 ; CHECK-NEXT: ret i1 [[OVERFLOW]] ; %res = tail call { i8, i1 } @llvm.smul.with.overflow.i8(i8 %a, i8 3) @@ -57,8 +57,8 @@ define i1 @test_constant3(i8 %a) { define i1 @test_constant4(i8 %a) { ; CHECK-LABEL: @test_constant4( -; CHECK-NEXT: [[TMP1:%.*]] = add i8 [[A:%.*]], 32 -; CHECK-NEXT: [[OVERFLOW:%.*]] = icmp ugt i8 [[TMP1]], 63 +; CHECK-NEXT: [[TMP1:%.*]] = add i8 [[A:%.*]], -32 +; CHECK-NEXT: [[OVERFLOW:%.*]] = icmp ult i8 [[TMP1]], -64 ; CHECK-NEXT: ret i1 [[OVERFLOW]] ; %res = tail call { i8, i1 } @llvm.smul.with.overflow.i8(i8 %a, i8 4) @@ -69,8 +69,8 @@ define i1 @test_constant4(i8 %a) { define i1 @test_constant127(i8 %a) { ; CHECK-LABEL: @test_constant127( -; CHECK-NEXT: [[TMP1:%.*]] = add i8 [[A:%.*]], 1 -; CHECK-NEXT: [[OVERFLOW:%.*]] = icmp ugt i8 [[TMP1]], 2 +; CHECK-NEXT: [[TMP1:%.*]] = add i8 [[A:%.*]], -2 +; CHECK-NEXT: [[OVERFLOW:%.*]] = icmp ult i8 [[TMP1]], -3 ; CHECK-NEXT: ret i1 [[OVERFLOW]] ; %res = tail call { i8, i1 } @llvm.smul.with.overflow.i8(i8 %a, i8 127) diff --git a/llvm/test/Transforms/InstCombine/xor-ashr.ll b/llvm/test/Transforms/InstCombine/xor-ashr.ll index 570a2b33fd59a..a4da030ac6c56 100644 --- a/llvm/test/Transforms/InstCombine/xor-ashr.ll +++ b/llvm/test/Transforms/InstCombine/xor-ashr.ll @@ -90,6 +90,23 @@ define i8 @wrongimm(i16 %add) { ret i8 %x } +; PR52397 Some of the lanes of the xor/ashr are unused, becoming poison. +define <4 x i32> @vectorpoison(<6 x i32> %0) { +; CHECK-LABEL: @vectorpoison( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[NEG:%.*]] = ashr <6 x i32> [[TMP0:%.*]], +; CHECK-NEXT: [[SHR:%.*]] = xor <6 x i32> [[NEG]], +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <6 x i32> [[SHR]], <6 x i32> poison, <4 x i32> +; CHECK-NEXT: ret <4 x i32> [[TMP1]] +; +entry: + %neg = xor <6 x i32> %0, + %shr = ashr <6 x i32> %neg, + %1 = shufflevector <6 x i32> %shr, <6 x i32> poison, <4 x i32> + ret <4 x i32> %1 +} + + ; One use define i16 @extrause(i16 %add) { diff --git a/llvm/test/Transforms/InstSimplify/compare.ll b/llvm/test/Transforms/InstSimplify/compare.ll index 834b0befac05f..1aebe5461cf64 100644 --- a/llvm/test/Transforms/InstSimplify/compare.ll +++ b/llvm/test/Transforms/InstSimplify/compare.ll @@ -108,7 +108,7 @@ define i1 @gep6(%gept* %x) { define i1 @gep7(%gept* %x) { ; CHECK-LABEL: @gep7( ; CHECK-NEXT: [[A:%.*]] = getelementptr [[GEPT:%.*]], %gept* [[X:%.*]], i64 0, i32 0 -; CHECK-NEXT: [[EQUAL:%.*]] = icmp eq i32* [[A]], getelementptr (%gept, %gept* @gepz, i32 0, i32 0) +; CHECK-NEXT: [[EQUAL:%.*]] = icmp eq i32* [[A]], getelementptr ([[GEPT]], %gept* @gepz, i32 0, i32 0) ; CHECK-NEXT: ret i1 [[EQUAL]] ; %a = getelementptr %gept, %gept* %x, i64 0, i32 0 @@ -294,9 +294,13 @@ define i1 @gep17() { ret i1 %cmp } +; Negative test: GEP inbounds may cross sign boundary. define i1 @gep_same_base_constant_indices(i8* %a) { ; CHECK-LABEL: @gep_same_base_constant_indices( -; CHECK-NEXT: ret i1 true +; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, i8* [[A:%.*]], i64 1 +; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, i8* [[A]], i64 10 +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i8* [[ARRAYIDX1]], [[ARRAYIDX2]] +; CHECK-NEXT: ret i1 [[CMP]] ; %arrayidx1 = getelementptr inbounds i8, i8* %a, i64 1 %arrayidx2 = getelementptr inbounds i8, i8* %a, i64 10 diff --git a/llvm/test/Transforms/InterleavedAccess/AArch64/sve-interleaved-accesses.ll b/llvm/test/Transforms/InterleavedAccess/AArch64/sve-interleaved-accesses.ll index 0a23239a09b0d..44abb97f205bf 100644 --- a/llvm/test/Transforms/InterleavedAccess/AArch64/sve-interleaved-accesses.ll +++ b/llvm/test/Transforms/InterleavedAccess/AArch64/sve-interleaved-accesses.ll @@ -351,6 +351,167 @@ define void @store_min_ge_type(<8 x i64>* %ptr, <4 x i64> %v0, <4 x i64> %v1) #2 ret void } +define void @load_double_factor4(<16 x double>* %ptr) #0 { +; CHECK-LABEL: @load_double_factor4( +; CHECK-NEXT: [[TMP1:%.*]] = call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x double>* [[PTR:%.*]] to double* +; CHECK-NEXT: [[LDN:%.*]] = call { , , , } @llvm.aarch64.sve.ld4.sret.nxv2f64( [[TMP1]], double* [[TMP2]]) +; CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[LDN]], 3 +; CHECK-NEXT: [[TMP4:%.*]] = call <4 x double> @llvm.experimental.vector.extract.v4f64.nxv2f64( [[TMP3]], i64 0) +; CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[LDN]], 2 +; CHECK-NEXT: [[TMP6:%.*]] = call <4 x double> @llvm.experimental.vector.extract.v4f64.nxv2f64( [[TMP5]], i64 0) +; CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[LDN]], 1 +; CHECK-NEXT: [[TMP8:%.*]] = call <4 x double> @llvm.experimental.vector.extract.v4f64.nxv2f64( [[TMP7]], i64 0) +; CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[LDN]], 0 +; CHECK-NEXT: [[TMP10:%.*]] = call <4 x double> @llvm.experimental.vector.extract.v4f64.nxv2f64( [[TMP9]], i64 0) +; CHECK-NEXT: ret void +; + %interleaved.vec = load <16 x double>, <16 x double>* %ptr, align 4 + %v0 = shufflevector <16 x double> %interleaved.vec, <16 x double> poison, <4 x i32> + %v1 = shufflevector <16 x double> %interleaved.vec, <16 x double> poison, <4 x i32> + %v2 = shufflevector <16 x double> %interleaved.vec, <16 x double> poison, <4 x i32> + %v3 = shufflevector <16 x double> %interleaved.vec, <16 x double> poison, <4 x i32> + ret void +} + +define void @load_float_factor3(<24 x float>* %ptr) #0 { +; CHECK-LABEL: @load_float_factor3( +; CHECK-NEXT: [[TMP1:%.*]] = call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <24 x float>* [[PTR:%.*]] to float* +; CHECK-NEXT: [[LDN:%.*]] = call { , , } @llvm.aarch64.sve.ld3.sret.nxv4f32( [[TMP1]], float* [[TMP2]]) +; CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , } [[LDN]], 2 +; CHECK-NEXT: [[TMP4:%.*]] = call <8 x float> @llvm.experimental.vector.extract.v8f32.nxv4f32( [[TMP3]], i64 0) +; CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , } [[LDN]], 1 +; CHECK-NEXT: [[TMP6:%.*]] = call <8 x float> @llvm.experimental.vector.extract.v8f32.nxv4f32( [[TMP5]], i64 0) +; CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , } [[LDN]], 0 +; CHECK-NEXT: [[TMP8:%.*]] = call <8 x float> @llvm.experimental.vector.extract.v8f32.nxv4f32( [[TMP7]], i64 0) +; CHECK-NEXT: ret void +; + %interleaved.vec = load <24 x float>, <24 x float>* %ptr, align 4 + %v0 = shufflevector <24 x float> %interleaved.vec, <24 x float> poison, <8 x i32> + %v1 = shufflevector <24 x float> %interleaved.vec, <24 x float> poison, <8 x i32> + %v2 = shufflevector <24 x float> %interleaved.vec, <24 x float> poison, <8 x i32> + ret void +} + +define void @load_half_factor2(<32 x half>* %ptr) #0 { +; CHECK-LABEL: @load_half_factor2( +; CHECK-NEXT: [[TMP1:%.*]] = call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <32 x half>* [[PTR:%.*]] to half* +; CHECK-NEXT: [[LDN:%.*]] = call { , } @llvm.aarch64.sve.ld2.sret.nxv8f16( [[TMP1]], half* [[TMP2]]) +; CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[LDN]], 1 +; CHECK-NEXT: [[TMP4:%.*]] = call <16 x half> @llvm.experimental.vector.extract.v16f16.nxv8f16( [[TMP3]], i64 0) +; CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[LDN]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = call <16 x half> @llvm.experimental.vector.extract.v16f16.nxv8f16( [[TMP5]], i64 0) +; CHECK-NEXT: ret void +; + %interleaved.vec = load <32 x half>, <32 x half>* %ptr, align 4 + %v0 = shufflevector <32 x half> %interleaved.vec, <32 x half> poison, <16 x i32> + %v1 = shufflevector <32 x half> %interleaved.vec, <32 x half> poison, <16 x i32> + ret void +} + +define void @load_bfloat_factor2(<32 x bfloat>* %ptr) #0 { +; CHECK-LABEL: @load_bfloat_factor2( +; CHECK-NEXT: [[TMP1:%.*]] = call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <32 x bfloat>* [[PTR:%.*]] to bfloat* +; CHECK-NEXT: [[LDN:%.*]] = call { , } @llvm.aarch64.sve.ld2.sret.nxv8bf16( [[TMP1]], bfloat* [[TMP2]]) +; CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[LDN]], 1 +; CHECK-NEXT: [[TMP4:%.*]] = call <16 x bfloat> @llvm.experimental.vector.extract.v16bf16.nxv8bf16( [[TMP3]], i64 0) +; CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[LDN]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = call <16 x bfloat> @llvm.experimental.vector.extract.v16bf16.nxv8bf16( [[TMP5]], i64 0) +; CHECK-NEXT: ret void +; + %interleaved.vec = load <32 x bfloat>, <32 x bfloat>* %ptr, align 4 + %v0 = shufflevector <32 x bfloat> %interleaved.vec, <32 x bfloat> poison, <16 x i32> + %v1 = shufflevector <32 x bfloat> %interleaved.vec, <32 x bfloat> poison, <16 x i32> + ret void +} + +define void @store_double_factor4(<16 x double>* %ptr, <4 x double> %v0, <4 x double> %v1, <4 x double> %v2, <4 x double> %v3) #0 { +; CHECK-LABEL: @store_double_factor4( +; CHECK-NEXT: [[S0:%.*]] = shufflevector <4 x double> [[V0:%.*]], <4 x double> [[V1:%.*]], <8 x i32> +; CHECK-NEXT: [[S1:%.*]] = shufflevector <4 x double> [[V2:%.*]], <4 x double> [[V3:%.*]], <8 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x double> [[S0]], <8 x double> [[S1]], <4 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = call @llvm.experimental.vector.insert.nxv2f64.v4f64( undef, <4 x double> [[TMP2]], i64 0) +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <8 x double> [[S0]], <8 x double> [[S1]], <4 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = call @llvm.experimental.vector.insert.nxv2f64.v4f64( undef, <4 x double> [[TMP4]], i64 0) +; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <8 x double> [[S0]], <8 x double> [[S1]], <4 x i32> +; CHECK-NEXT: [[TMP7:%.*]] = call @llvm.experimental.vector.insert.nxv2f64.v4f64( undef, <4 x double> [[TMP6]], i64 0) +; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <8 x double> [[S0]], <8 x double> [[S1]], <4 x i32> +; CHECK-NEXT: [[TMP9:%.*]] = call @llvm.experimental.vector.insert.nxv2f64.v4f64( undef, <4 x double> [[TMP8]], i64 0) +; CHECK-NEXT: [[TMP10:%.*]] = bitcast <16 x double>* [[PTR:%.*]] to double* +; CHECK-NEXT: call void @llvm.aarch64.sve.st4.nxv2f64( [[TMP3]], [[TMP5]], [[TMP7]], [[TMP9]], [[TMP1]], double* [[TMP10]]) +; CHECK-NEXT: ret void +; + %s0 = shufflevector <4 x double> %v0, <4 x double> %v1, <8 x i32> + %s1 = shufflevector <4 x double> %v2, <4 x double> %v3, <8 x i32> + %interleaved.vec = shufflevector <8 x double> %s0, <8 x double> %s1, <16 x i32> + store <16 x double> %interleaved.vec, <16 x double>* %ptr, align 4 + ret void +} + +define void @store_float_factor3(<24 x float>* %ptr, <8 x float> %v0, <8 x float> %v1, <8 x float> %v2) #0 { +; CHECK-LABEL: @store_float_factor3( +; CHECK-NEXT: [[S0:%.*]] = shufflevector <8 x float> [[V0:%.*]], <8 x float> [[V1:%.*]], <16 x i32> +; CHECK-NEXT: [[S1:%.*]] = shufflevector <8 x float> [[V2:%.*]], <8 x float> poison, <16 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x float> [[S0]], <16 x float> [[S1]], <8 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = call @llvm.experimental.vector.insert.nxv4f32.v8f32( undef, <8 x float> [[TMP2]], i64 0) +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <16 x float> [[S0]], <16 x float> [[S1]], <8 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = call @llvm.experimental.vector.insert.nxv4f32.v8f32( undef, <8 x float> [[TMP4]], i64 0) +; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <16 x float> [[S0]], <16 x float> [[S1]], <8 x i32> +; CHECK-NEXT: [[TMP7:%.*]] = call @llvm.experimental.vector.insert.nxv4f32.v8f32( undef, <8 x float> [[TMP6]], i64 0) +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <24 x float>* [[PTR:%.*]] to float* +; CHECK-NEXT: call void @llvm.aarch64.sve.st3.nxv4f32( [[TMP3]], [[TMP5]], [[TMP7]], [[TMP1]], float* [[TMP8]]) +; CHECK-NEXT: ret void +; + %s0 = shufflevector <8 x float> %v0, <8 x float> %v1, <16 x i32> + %s1 = shufflevector <8 x float> %v2, <8 x float> poison, <16 x i32> + %interleaved.vec = shufflevector <16 x float> %s0, <16 x float> %s1, <24 x i32> + store <24 x float> %interleaved.vec, <24 x float>* %ptr, align 4 + ret void +} + +define void @store_half_factor2(<32 x half>* %ptr, <16 x half> %v0, <16 x half> %v1) #0 { +; CHECK-LABEL: @store_half_factor2( +; CHECK-NEXT: [[TMP1:%.*]] = call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x half> [[V0:%.*]], <16 x half> [[V1:%.*]], <16 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = call @llvm.experimental.vector.insert.nxv8f16.v16f16( undef, <16 x half> [[TMP2]], i64 0) +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <16 x half> [[V0]], <16 x half> [[V1]], <16 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = call @llvm.experimental.vector.insert.nxv8f16.v16f16( undef, <16 x half> [[TMP4]], i64 0) +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <32 x half>* [[PTR:%.*]] to half* +; CHECK-NEXT: call void @llvm.aarch64.sve.st2.nxv8f16( [[TMP3]], [[TMP5]], [[TMP1]], half* [[TMP6]]) +; CHECK-NEXT: ret void +; + %interleaved.vec = shufflevector <16 x half> %v0, <16 x half> %v1, <32 x i32> + store <32 x half> %interleaved.vec, <32 x half>* %ptr, align 4 + ret void +} + + +define void @store_bfloat_factor2(<32 x bfloat>* %ptr, <16 x bfloat> %v0, <16 x bfloat> %v1) #0 { +; CHECK-LABEL: @store_bfloat_factor2( +; CHECK-NEXT: [[TMP1:%.*]] = call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x bfloat> [[V0:%.*]], <16 x bfloat> [[V1:%.*]], <16 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = call @llvm.experimental.vector.insert.nxv8bf16.v16bf16( undef, <16 x bfloat> [[TMP2]], i64 0) +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <16 x bfloat> [[V0]], <16 x bfloat> [[V1]], <16 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = call @llvm.experimental.vector.insert.nxv8bf16.v16bf16( undef, <16 x bfloat> [[TMP4]], i64 0) +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <32 x bfloat>* [[PTR:%.*]] to bfloat* +; CHECK-NEXT: call void @llvm.aarch64.sve.st2.nxv8bf16( [[TMP3]], [[TMP5]], [[TMP1]], bfloat* [[TMP6]]) +; CHECK-NEXT: ret void +; + %interleaved.vec = shufflevector <16 x bfloat> %v0, <16 x bfloat> %v1, <32 x i32> + store <32 x bfloat> %interleaved.vec, <32 x bfloat>* %ptr, align 4 + ret void +} + attributes #0 = { vscale_range(2,2) "target-features"="+sve" } attributes #1 = { vscale_range(2,4) "target-features"="+sve" } attributes #2 = { vscale_range(4,4) "target-features"="+sve" } diff --git a/llvm/test/Transforms/LoopUnroll/peel-multiple-unreachable-exits.ll b/llvm/test/Transforms/LoopUnroll/peel-multiple-unreachable-exits.ll index 98c78ff87f87c..bf264aac3a49e 100644 --- a/llvm/test/Transforms/LoopUnroll/peel-multiple-unreachable-exits.ll +++ b/llvm/test/Transforms/LoopUnroll/peel-multiple-unreachable-exits.ll @@ -193,28 +193,56 @@ unreachable.exit: define void @peel_exits_to_blocks_branch_to_unreachable_block(i32* %ptr, i32 %N, i32 %x, i1 %c.1) { ; CHECK-LABEL: @peel_exits_to_blocks_branch_to_unreachable_block( ; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[LOOP_HEADER_PEEL_BEGIN:%.*]] +; CHECK: loop.header.peel.begin: +; CHECK-NEXT: br label [[LOOP_HEADER_PEEL:%.*]] +; CHECK: loop.header.peel: +; CHECK-NEXT: [[C_PEEL:%.*]] = icmp ult i32 1, 2 +; CHECK-NEXT: br i1 [[C_PEEL]], label [[THEN_PEEL:%.*]], label [[ELSE_PEEL:%.*]] +; CHECK: else.peel: +; CHECK-NEXT: [[C_2_PEEL:%.*]] = icmp eq i32 1, [[X:%.*]] +; CHECK-NEXT: br i1 [[C_2_PEEL]], label [[EXIT_2:%.*]], label [[LOOP_LATCH_PEEL:%.*]] +; CHECK: then.peel: +; CHECK-NEXT: br i1 [[C_1:%.*]], label [[EXIT_1:%.*]], label [[LOOP_LATCH_PEEL]] +; CHECK: loop.latch.peel: +; CHECK-NEXT: [[M_PEEL:%.*]] = phi i32 [ 0, [[THEN_PEEL]] ], [ [[X]], [[ELSE_PEEL]] ] +; CHECK-NEXT: [[GEP_PEEL:%.*]] = getelementptr i32, i32* [[PTR:%.*]], i32 1 +; CHECK-NEXT: store i32 [[M_PEEL]], i32* [[GEP_PEEL]], align 4 +; CHECK-NEXT: [[IV_NEXT_PEEL:%.*]] = add nuw nsw i32 1, 1 +; CHECK-NEXT: [[C_3_PEEL:%.*]] = icmp ult i32 1, 1000 +; CHECK-NEXT: br i1 [[C_3_PEEL]], label [[LOOP_HEADER_PEEL_NEXT:%.*]], label [[EXIT:%.*]] +; CHECK: loop.header.peel.next: +; CHECK-NEXT: br label [[LOOP_HEADER_PEEL_NEXT1:%.*]] +; CHECK: loop.header.peel.next1: +; CHECK-NEXT: br label [[ENTRY_PEEL_NEWPH:%.*]] +; CHECK: entry.peel.newph: ; CHECK-NEXT: br label [[LOOP_HEADER:%.*]] ; CHECK: loop.header: -; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 1, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] -; CHECK-NEXT: [[C:%.*]] = icmp ult i32 [[IV]], 2 -; CHECK-NEXT: br i1 [[C]], label [[THEN:%.*]], label [[ELSE:%.*]] +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[IV_NEXT_PEEL]], [[ENTRY_PEEL_NEWPH]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] +; CHECK-NEXT: br i1 false, label [[THEN:%.*]], label [[ELSE:%.*]] ; CHECK: then: -; CHECK-NEXT: br i1 [[C_1:%.*]], label [[EXIT_1:%.*]], label [[LOOP_LATCH]] +; CHECK-NEXT: br i1 [[C_1]], label [[EXIT_1_LOOPEXIT:%.*]], label [[LOOP_LATCH]] ; CHECK: else: -; CHECK-NEXT: [[C_2:%.*]] = icmp eq i32 [[IV]], [[X:%.*]] -; CHECK-NEXT: br i1 [[C_2]], label [[EXIT_2:%.*]], label [[LOOP_LATCH]] +; CHECK-NEXT: [[C_2:%.*]] = icmp eq i32 [[IV]], [[X]] +; CHECK-NEXT: br i1 [[C_2]], label [[EXIT_2_LOOPEXIT:%.*]], label [[LOOP_LATCH]] ; CHECK: loop.latch: ; CHECK-NEXT: [[M:%.*]] = phi i32 [ 0, [[THEN]] ], [ [[X]], [[ELSE]] ] -; CHECK-NEXT: [[GEP:%.*]] = getelementptr i32, i32* [[PTR:%.*]], i32 [[IV]] +; CHECK-NEXT: [[GEP:%.*]] = getelementptr i32, i32* [[PTR]], i32 [[IV]] ; CHECK-NEXT: store i32 [[M]], i32* [[GEP]], align 4 ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1 ; CHECK-NEXT: [[C_3:%.*]] = icmp ult i32 [[IV]], 1000 -; CHECK-NEXT: br i1 [[C_3]], label [[LOOP_HEADER]], label [[EXIT:%.*]] +; CHECK-NEXT: br i1 [[C_3]], label [[LOOP_HEADER]], label [[EXIT_LOOPEXIT:%.*]], !llvm.loop [[LOOP2:![0-9]+]] +; CHECK: exit.loopexit: +; CHECK-NEXT: br label [[EXIT]] ; CHECK: exit: ; CHECK-NEXT: ret void +; CHECK: exit.1.loopexit: +; CHECK-NEXT: br label [[EXIT_1]] ; CHECK: exit.1: ; CHECK-NEXT: call void @foo() ; CHECK-NEXT: br label [[UNREACHABLE_TERM:%.*]] +; CHECK: exit.2.loopexit: +; CHECK-NEXT: br label [[EXIT_2]] ; CHECK: exit.2: ; CHECK-NEXT: call void @bar() ; CHECK-NEXT: br label [[UNREACHABLE_TERM]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-vectorization-cost-tuning.ll b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-vectorization-cost-tuning.ll new file mode 100644 index 0000000000000..b70510b84eb77 --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-vectorization-cost-tuning.ll @@ -0,0 +1,54 @@ +; REQUIRES: asserts +; RUN: opt -mtriple=aarch64 -mattr=+sve -scalable-vectorization=on \ +; RUN: -force-target-instruction-cost=1 -loop-vectorize -S -debug-only=loop-vectorize < %s 2>&1 \ +; RUN: | FileCheck %s --check-prefixes=GENERIC,VF-VSCALE4 + +; RUN: opt -mtriple=aarch64 -mattr=+sve -mcpu=generic -scalable-vectorization=on \ +; RUN: -force-target-instruction-cost=1 -loop-vectorize -S -debug-only=loop-vectorize < %s 2>&1 \ +; RUN: | FileCheck %s --check-prefixes=GENERIC,VF-VSCALE4 + +; RUN: opt -mtriple=aarch64 -mcpu=neoverse-v1 -scalable-vectorization=on \ +; RUN: -force-target-instruction-cost=1 -loop-vectorize -S -debug-only=loop-vectorize < %s 2>&1 \ +; RUN: | FileCheck %s --check-prefixes=NEOVERSE-V1,VF-VSCALE4 + +; RUN: opt -mtriple=aarch64 -mcpu=neoverse-n2 -scalable-vectorization=on \ +; RUN: -force-target-instruction-cost=1 -loop-vectorize -S -debug-only=loop-vectorize < %s 2>&1 \ +; RUN: | FileCheck %s --check-prefixes=NEOVERSE-N2,VF-4 + +; RUN: opt -mtriple=aarch64 -mcpu=neoverse-n2 -scalable-vectorization=preferred \ +; RUN: -force-target-instruction-cost=1 -loop-vectorize -S -debug-only=loop-vectorize < %s 2>&1 \ +; RUN: | FileCheck %s --check-prefixes=NEOVERSE-N2,VF-VSCALE4 + +; GENERIC: LV: Vector loop of width vscale x 2 costs: 3 (assuming a minimum vscale of 2). +; GENERIC: LV: Vector loop of width vscale x 4 costs: 1 (assuming a minimum vscale of 2). + +; NEOVERSE-V1: LV: Vector loop of width vscale x 2 costs: 3 (assuming a minimum vscale of 2). +; NEOVERSE-V1: LV: Vector loop of width vscale x 4 costs: 1 (assuming a minimum vscale of 2). + +; NEOVERSE-N2: LV: Vector loop of width vscale x 2 costs: 6 (assuming a minimum vscale of 1). +; NEOVERSE-N2: LV: Vector loop of width vscale x 4 costs: 3 (assuming a minimum vscale of 1). + +; VF-4: <4 x i32> +; VF-VSCALE4: +define void @test0(i32* %a, i8* %b, i32* %c) #0 { +entry: + br label %loop + +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %arrayidx = getelementptr inbounds i32, i32* %c, i64 %iv + %0 = load i32, i32* %arrayidx, align 4 + %arrayidx2 = getelementptr inbounds i8, i8* %b, i64 %iv + %1 = load i8, i8* %arrayidx2, align 4 + %zext = zext i8 %1 to i32 + %add = add nsw i32 %zext, %0 + %arrayidx5 = getelementptr inbounds i32, i32* %a, i64 %iv + store i32 %add, i32* %arrayidx5, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond.not = icmp eq i64 %iv.next, 1024 + br i1 %exitcond.not, label %exit, label %loop + +exit: + ret void +} + diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-vectorization.ll b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-vectorization.ll index 8d53ae5a0b5d9..ea9860c73f999 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-vectorization.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-vectorization.ll @@ -10,7 +10,7 @@ define void @test0(i32* %a, i8* %b, i32* %c) #0 { ; CHECK: LV: Checking a loop in "test0" ; CHECK_SCALABLE_ON: LV: Found feasible scalable VF = vscale x 4 -; CHECK_SCALABLE_ON: LV: Selecting VF: 4 +; CHECK_SCALABLE_ON: LV: Selecting VF: vscale x 4 ; CHECK_SCALABLE_PREFERRED: LV: Found feasible scalable VF = vscale x 4 ; CHECK_SCALABLE_PREFERRED: LV: Selecting VF: vscale x 4 ; CHECK_SCALABLE_DISABLED-NOT: LV: Found feasible scalable VF @@ -43,7 +43,7 @@ exit: define void @test1(i32* %a, i8* %b) #0 { ; CHECK: LV: Checking a loop in "test1" ; CHECK_SCALABLE_ON: LV: Found feasible scalable VF = vscale x 4 -; CHECK_SCALABLE_ON: LV: Selecting VF: 4 +; CHECK_SCALABLE_ON: LV: Selecting VF: vscale x 4 ; CHECK_SCALABLE_PREFERRED: LV: Found feasible scalable VF = vscale x 4 ; CHECK_SCALABLE_PREFERRED: LV: Selecting VF: vscale x 4 ; CHECK_SCALABLE_DISABLED-NOT: LV: Found feasible scalable VF @@ -79,7 +79,7 @@ define void @test2(i32* %a, i8* %b) #0 { ; CHECK_SCALABLE_ON: LV: Found feasible scalable VF = vscale x 2 ; CHECK_SCALABLE_ON: LV: Selecting VF: 4 ; CHECK_SCALABLE_PREFERRED: LV: Found feasible scalable VF = vscale x 2 -; CHECK_SCALABLE_PREFERRED: LV: Selecting VF: 4 +; CHECK_SCALABLE_PREFERRED: LV: Selecting VF: vscale x 2 ; CHECK_SCALABLE_DISABLED-NOT: LV: Found feasible scalable VF ; CHECK_SCALABLE_DISABLED: LV: Selecting VF: 4 ; CHECK_SCALABLE_PREFERRED_MAXBW: LV: Found feasible scalable VF = vscale x 2 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-vf-hint.ll b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-vf-hint.ll index 246dcd2370880..73b9853b7171b 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-vf-hint.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-vf-hint.ll @@ -187,9 +187,9 @@ exit: ; CHECK-DBG: LV: User VF=vscale x 4 is unsafe. Ignoring scalable UserVF. ; CHECK-DBG: remark: :0:0: User-specified vectorization factor vscale x 4 is unsafe. Ignoring the hint to let the compiler pick a more suitable value. ; CHECK-DBG: Found feasible scalable VF = vscale x 2 -; CHECK-DBG: LV: Selecting VF: 4. +; CHECK-DBG: LV: Selecting VF: vscale x 2. ; CHECK-LABEL: @test4 -; CHECK: <4 x i32> +; CHECK: define void @test4(i32* %a, i32* %b) #0 { entry: br label %loop diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/strict-fadd.ll b/llvm/test/Transforms/LoopVectorize/AArch64/strict-fadd.ll index 6ff6c16c69cb2..7ab65a9fc3577 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/strict-fadd.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/strict-fadd.ll @@ -49,6 +49,52 @@ for.end: ret float %add } +; Same as above but where fadd has a fast-math flag. +define float @fadd_strict_fmf(float* noalias nocapture readonly %a, i64 %n) { +; CHECK-ORDERED-LABEL: @fadd_strict_fmf +; CHECK-ORDERED: vector.body: +; CHECK-ORDERED: [[VEC_PHI:%.*]] = phi float [ 0.000000e+00, %vector.ph ], [ [[RDX:%.*]], %vector.body ] +; CHECK-ORDERED: [[LOAD_VEC:%.*]] = load <8 x float>, <8 x float>* +; CHECK-ORDERED: [[RDX]] = call nnan float @llvm.vector.reduce.fadd.v8f32(float [[VEC_PHI]], <8 x float> [[LOAD_VEC]]) +; CHECK-ORDERED: for.end: +; CHECK-ORDERED: [[RES:%.*]] = phi float [ [[SCALAR:%.*]], %for.body ], [ [[RDX]], %middle.block ] +; CHECK-ORDERED: ret float [[RES]] + +; CHECK-UNORDERED-LABEL: @fadd_strict_fmf +; CHECK-UNORDERED: vector.body: +; CHECK-UNORDERED: [[VEC_PHI:%.*]] = phi <8 x float> [ , %vector.ph ], [ [[FADD_VEC:%.*]], %vector.body ] +; CHECK-UNORDERED: [[LOAD_VEC:%.*]] = load <8 x float>, <8 x float>* +; CHECK-UNORDERED: [[FADD_VEC]] = fadd nnan <8 x float> [[LOAD_VEC]], [[VEC_PHI]] +; CHECK-UNORDERED-NOT: @llvm.vector.reduce.fadd +; CHECK-UNORDERED: middle.block: +; CHECK-UNORDERED: [[RDX:%.*]] = call nnan float @llvm.vector.reduce.fadd.v8f32(float -0.000000e+00, <8 x float> [[FADD_VEC]]) +; CHECK-UNORDERED: for.body: +; CHECK-UNORDERED: [[LOAD:%.*]] = load float, float* +; CHECK-UNORDERED: [[FADD:%.*]] = fadd nnan float [[LOAD]], {{.*}} +; CHECK-UNORDERED: for.end: +; CHECK-UNORDERED: [[RES:%.*]] = phi float [ [[FADD]], %for.body ], [ [[RDX]], %middle.block ] +; CHECK-UNORDERED: ret float [[RES]] + +; CHECK-NOT-VECTORIZED-LABEL: @fadd_strict_fmf +; CHECK-NOT-VECTORIZED-NOT: vector.body + +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %sum.07 = phi float [ 0.000000e+00, %entry ], [ %add, %for.body ] + %arrayidx = getelementptr inbounds float, float* %a, i64 %iv + %0 = load float, float* %arrayidx, align 4 + %add = fadd nnan float %0, %sum.07 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond.not = icmp eq i64 %iv.next, %n + br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0 + +for.end: + ret float %add +} + define float @fadd_strict_unroll(float* noalias nocapture readonly %a, i64 %n) { ; CHECK-ORDERED-LABEL: @fadd_strict_unroll ; CHECK-ORDERED: vector.body: @@ -767,6 +813,79 @@ for.end: ret float %add } +; Same as above but where fadd has a fast-math flag. +define float @fadd_scalar_vf_fmf(float* noalias nocapture readonly %a, i64 %n) { +; CHECK-ORDERED-LABEL: @fadd_scalar_vf_fmf +; CHECK-ORDERED: vector.body: +; CHECK-ORDERED: [[VEC_PHI:%.*]] = phi float [ 0.000000e+00, %vector.ph ], [ [[FADD4:%.*]], %vector.body ] +; CHECK-ORDERED: [[LOAD1:%.*]] = load float, float* +; CHECK-ORDERED: [[LOAD2:%.*]] = load float, float* +; CHECK-ORDERED: [[LOAD3:%.*]] = load float, float* +; CHECK-ORDERED: [[LOAD4:%.*]] = load float, float* +; CHECK-ORDERED: [[FADD1:%.*]] = fadd nnan float [[VEC_PHI]], [[LOAD1]] +; CHECK-ORDERED: [[FADD2:%.*]] = fadd nnan float [[FADD1]], [[LOAD2]] +; CHECK-ORDERED: [[FADD3:%.*]] = fadd nnan float [[FADD2]], [[LOAD3]] +; CHECK-ORDERED: [[FADD4]] = fadd nnan float [[FADD3]], [[LOAD4]] +; CHECK-ORDERED-NOT: @llvm.vector.reduce.fadd +; CHECK-ORDERED: scalar.ph: +; CHECK-ORDERED: [[MERGE_RDX:%.*]] = phi float [ 0.000000e+00, %entry ], [ [[FADD4]], %middle.block ] +; CHECK-ORDERED: for.body: +; CHECK-ORDERED: [[SUM_07:%.*]] = phi float [ [[MERGE_RDX]], %scalar.ph ], [ [[FADD5:%.*]], %for.body ] +; CHECK-ORDERED: [[LOAD5:%.*]] = load float, float* +; CHECK-ORDERED: [[FADD5]] = fadd nnan float [[LOAD5]], [[SUM_07]] +; CHECK-ORDERED: for.end: +; CHECK-ORDERED: [[RES:%.*]] = phi float [ [[FADD5]], %for.body ], [ [[FADD4]], %middle.block ] +; CHECK-ORDERED: ret float [[RES]] + +; CHECK-UNORDERED-LABEL: @fadd_scalar_vf_fmf +; CHECK-UNORDERED: vector.body: +; CHECK-UNORDERED: [[VEC_PHI1:%.*]] = phi float [ 0.000000e+00, %vector.ph ], [ [[FADD1:%.*]], %vector.body ] +; CHECK-UNORDERED: [[VEC_PHI2:%.*]] = phi float [ -0.000000e+00, %vector.ph ], [ [[FADD2:%.*]], %vector.body ] +; CHECK-UNORDERED: [[VEC_PHI3:%.*]] = phi float [ -0.000000e+00, %vector.ph ], [ [[FADD3:%.*]], %vector.body ] +; CHECK-UNORDERED: [[VEC_PHI4:%.*]] = phi float [ -0.000000e+00, %vector.ph ], [ [[FADD4:%.*]], %vector.body ] +; CHECK-UNORDERED: [[LOAD1:%.*]] = load float, float* +; CHECK-UNORDERED: [[LOAD2:%.*]] = load float, float* +; CHECK-UNORDERED: [[LOAD3:%.*]] = load float, float* +; CHECK-UNORDERED: [[LOAD4:%.*]] = load float, float* +; CHECK-UNORDERED: [[FADD1]] = fadd nnan float [[LOAD1]], [[VEC_PHI1]] +; CHECK-UNORDERED: [[FADD2]] = fadd nnan float [[LOAD2]], [[VEC_PHI2]] +; CHECK-UNORDERED: [[FADD3]] = fadd nnan float [[LOAD3]], [[VEC_PHI3]] +; CHECK-UNORDERED: [[FADD4]] = fadd nnan float [[LOAD4]], [[VEC_PHI4]] +; CHECK-UNORDERED-NOT: @llvm.vector.reduce.fadd +; CHECK-UNORDERED: middle.block: +; CHECK-UNORDERED: [[BIN_RDX1:%.*]] = fadd nnan float [[FADD2]], [[FADD1]] +; CHECK-UNORDERED: [[BIN_RDX2:%.*]] = fadd nnan float [[FADD3]], [[BIN_RDX1]] +; CHECK-UNORDERED: [[BIN_RDX3:%.*]] = fadd nnan float [[FADD4]], [[BIN_RDX2]] +; CHECK-UNORDERED: scalar.ph: +; CHECK-UNORDERED: [[MERGE_RDX:%.*]] = phi float [ 0.000000e+00, %entry ], [ [[BIN_RDX3]], %middle.block ] +; CHECK-UNORDERED: for.body: +; CHECK-UNORDERED: [[SUM_07:%.*]] = phi float [ [[MERGE_RDX]], %scalar.ph ], [ [[FADD5:%.*]], %for.body ] +; CHECK-UNORDERED: [[LOAD5:%.*]] = load float, float* +; CHECK-UNORDERED: [[FADD5]] = fadd nnan float [[LOAD5]], [[SUM_07]] +; CHECK-UORDERED: for.end +; CHECK-UNORDERED: [[RES:%.*]] = phi float [ [[FADD5]], %for.body ], [ [[BIN_RDX3]], %middle.block ] +; CHECK-UNORDERED: ret float [[RES]] + +; CHECK-NOT-VECTORIZED-LABEL: @fadd_scalar_vf_fmf +; CHECK-NOT-VECTORIZED-NOT: vector.body + +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %sum.07 = phi float [ 0.000000e+00, %entry ], [ %add, %for.body ] + %arrayidx = getelementptr inbounds float, float* %a, i64 %iv + %0 = load float, float* %arrayidx, align 4 + %add = fadd nnan float %0, %sum.07 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond.not = icmp eq i64 %iv.next, %n + br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !4 + +for.end: + ret float %add +} + ; Test case where the reduction step is a first-order recurrence. define double @reduction_increment_by_first_order_recurrence() { ; CHECK-ORDERED-LABEL: @reduction_increment_by_first_order_recurrence( diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect.ll new file mode 100644 index 0000000000000..d175eed822c1f --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect.ll @@ -0,0 +1,113 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; REQUIRES: asserts +; RUN: opt < %s -loop-vectorize -force-vector-interleave=2 -epilogue-vectorization-minimum-VF=0 --debug-only=loop-vectorize -force-target-instruction-cost=1 -S -scalable-vectorization=preferred 2>%t | FileCheck %s +; RUN: cat %t | FileCheck %s --check-prefix=DEBUG +; RUN: opt < %s -loop-vectorize -force-vector-interleave=2 -epilogue-vectorization-minimum-VF=8 --debug-only=loop-vectorize -S -scalable-vectorization=preferred 2>%t | FileCheck %s +; RUN: cat %t | FileCheck %s --check-prefix=DEBUG +; RUN: opt < %s -loop-vectorize -force-vector-interleave=2 -epilogue-vectorization-force-VF=8 --debug-only=loop-vectorize -S -scalable-vectorization=preferred 2>%t | FileCheck %s +; RUN: cat %t | FileCheck %s --check-prefix=DEBUG-FORCED + +target triple = "aarch64-linux-gnu" + +; DEBUG: LV: Checking a loop in "f1" +; DEBUG: LEV: Epilogue vectorization using scalable vectors not yet supported. Converting to fixed-width (VF=16) instead +; DEBUG: Create Skeleton for epilogue vectorized loop (first pass) +; DEBUG: Main Loop VF:vscale x 16, Main Loop UF:2, Epilogue Loop VF:8, Epilogue Loop UF:1 + +; DEBUG-FORCED: LV: Checking a loop in "f1" +; DEBUG-FORCED: LEV: Epilogue vectorization factor is forced. +; DEBUG-FORCED: Create Skeleton for epilogue vectorized loop (first pass) +; DEBUG-FORCED: Main Loop VF:vscale x 16, Main Loop UF:2, Epilogue Loop VF:8, Epilogue Loop UF:1 + +define void @f1(i8* %A) #0 { +; CHECK-LABEL: @f1( +; CHECK-NEXT: iter.check: +; CHECK-NEXT: br i1 false, label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]] +; CHECK: vector.main.loop.iter.check: +; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 32 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]] +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 32 +; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 32 +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP5]] +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 16 +; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[TMP8]], 0 +; CHECK-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 1 +; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], [[TMP10]] +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, i8* [[A:%.*]], i64 [[TMP6]] +; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, i8* [[A]], i64 [[TMP11]] +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, i8* [[TMP12]], i32 0 +; CHECK-NEXT: [[TMP15:%.*]] = bitcast i8* [[TMP14]] to * +; CHECK-NEXT: store shufflevector ( insertelement ( poison, i8 1, i32 0), poison, zeroinitializer), * [[TMP15]], align 1 +; CHECK-NEXT: [[TMP16:%.*]] = call i32 @llvm.vscale.i32() +; CHECK-NEXT: [[TMP17:%.*]] = mul i32 [[TMP16]], 16 +; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i8, i8* [[TMP12]], i32 [[TMP17]] +; CHECK-NEXT: [[TMP19:%.*]] = bitcast i8* [[TMP18]] to * +; CHECK-NEXT: store shufflevector ( insertelement ( poison, i8 1, i32 0), poison, zeroinitializer), * [[TMP19]], align 1 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] +; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK: middle.block: +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] +; CHECK: vec.epilog.iter.check: +; CHECK-NEXT: [[N_VEC_REMAINING:%.*]] = sub i64 1024, [[N_VEC]] +; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_VEC_REMAINING]], 8 +; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]] +; CHECK: vec.epilog.ph: +; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] +; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] +; CHECK: vec.epilog.vector.body: +; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT2:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP21:%.*]] = add i64 [[INDEX1]], 0 +; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds i8, i8* [[A]], i64 [[TMP21]] +; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds i8, i8* [[TMP22]], i32 0 +; CHECK-NEXT: [[TMP24:%.*]] = bitcast i8* [[TMP23]] to <8 x i8>* +; CHECK-NEXT: store <8 x i8> , <8 x i8>* [[TMP24]], align 1 +; CHECK-NEXT: [[INDEX_NEXT2]] = add nuw i64 [[INDEX1]], 8 +; CHECK-NEXT: [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT2]], 1024 +; CHECK-NEXT: br i1 [[TMP25]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]] +; CHECK: vec.epilog.middle.block: +; CHECK-NEXT: [[CMP_N3:%.*]] = icmp eq i64 1024, 1024 +; CHECK-NEXT: br i1 [[CMP_N3]], label [[EXIT_LOOPEXIT:%.*]], label [[VEC_EPILOG_SCALAR_PH]] +; CHECK: vec.epilog.scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ] +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, i8* [[A]], i64 [[IV]] +; CHECK-NEXT: store i8 1, i8* [[ARRAYIDX]], align 1 +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[IV_NEXT]], 1024 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[EXIT_LOOPEXIT]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK: exit.loopexit: +; CHECK-NEXT: br label [[EXIT]] +; CHECK: exit: +; CHECK-NEXT: ret void + +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %arrayidx = getelementptr inbounds i8, i8* %A, i64 %iv + store i8 1, i8* %arrayidx, align 1 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp ne i64 %iv.next, 1024 + br i1 %exitcond, label %for.body, label %exit + +exit: + ret void +} + +attributes #0 = { "target-features"="+sve" } diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/tail-fold-uniform-memops.ll b/llvm/test/Transforms/LoopVectorize/AArch64/tail-fold-uniform-memops.ll new file mode 100644 index 0000000000000..152e4de6be78d --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/AArch64/tail-fold-uniform-memops.ll @@ -0,0 +1,119 @@ +; RUN: opt -loop-vectorize -scalable-vectorization=off -force-vector-width=4 -prefer-predicate-over-epilogue=predicate-dont-vectorize -S < %s | FileCheck %s + +; NOTE: These tests aren't really target-specific, but it's convenient to target AArch64 +; so that TTI.isLegalMaskedLoad can return true. + +target triple = "aarch64-linux-gnu" + +define void @uniform_load(i32* noalias %dst, i32* noalias readonly %src, i64 %n) #0 { +; CHECK-LABEL: @uniform_load( +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %vector.ph ], [ [[INDEX_NEXT:%.*]], %pred.load.continue8 ] +; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <4 x i64> poison, i64 [[INDEX]], i32 0 +; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT1]], <4 x i64> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i64> [[BROADCAST_SPLAT2]], +; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP1:%.*]] = icmp ule <4 x i64> [[INDUCTION]] +; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i1> [[TMP1]], i32 0 +; CHECK-NEXT: br i1 [[TMP2]], label [[PRED_LOAD_IF:%.*]], label %pred.load.continue +; CHECK: pred.load.if: +; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[SRC:%.*]], align 4 +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> poison, i32 [[TMP3]], i32 0 +; CHECK-NEXT: br label %pred.load.continue +; CHECK: pred.load.continue: +; CHECK-NEXT: [[TMP5:%.*]] = phi <4 x i32> [ poison, %vector.body ], [ [[TMP4]], [[PRED_LOAD_IF]] ] +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i1> [[TMP1]], i32 1 +; CHECK-NEXT: br i1 [[TMP6]], label [[PRED_LOAD_IF3:%.*]], label %pred.load.continue4 +; CHECK: pred.load.if3: +; CHECK-NEXT: [[TMP7:%.*]] = load i32, i32* [[SRC]], align 4 +; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x i32> [[TMP5]], i32 [[TMP7]], i32 1 +; CHECK-NEXT: br label %pred.load.continue4 +; CHECK: pred.load.continue4: +; CHECK-NEXT: [[TMP9:%.*]] = phi <4 x i32> [ [[TMP5]], %pred.load.continue ], [ [[TMP8]], %pred.load.if3 ] +; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i1> [[TMP1]], i32 2 +; CHECK-NEXT: br i1 [[TMP10]], label [[PRED_LOAD_IF5:%.*]], label %pred.load.continue6 +; CHECK: pred.load.if5: +; CHECK-NEXT: [[TMP11:%.*]] = load i32, i32* [[SRC]], align 4 +; CHECK-NEXT: [[TMP12:%.*]] = insertelement <4 x i32> [[TMP9]], i32 [[TMP11]], i32 2 +; CHECK-NEXT: br label %pred.load.continue6 +; CHECK: pred.load.continue6: +; CHECK-NEXT: [[TMP13:%.*]] = phi <4 x i32> [ [[TMP9]], %pred.load.continue4 ], [ [[TMP12]], %pred.load.if5 ] +; CHECK-NEXT: [[TMP14:%.*]] = extractelement <4 x i1> [[TMP1]], i32 3 +; CHECK-NEXT: br i1 [[TMP14]], label [[PRED_LOAD_IF7:%.*]], label %pred.load.continue8 +; CHECK: pred.load.if7: +; CHECK-NEXT: [[TMP15:%.*]] = load i32, i32* [[SRC]], align 4 +; CHECK-NEXT: [[TMP16:%.*]] = insertelement <4 x i32> [[TMP13]], i32 [[TMP15]], i32 3 +; CHECK-NEXT: br label %pred.load.continue8 +; CHECK: pred.load.continue8: +; CHECK-NEXT: [[TMP17:%.*]] = phi <4 x i32> [ [[TMP13]], %pred.load.continue6 ], [ [[TMP16]], [[PRED_LOAD_IF7]] ] +; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, i32* [[DST:%.*]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, i32* [[TMP18]], i32 0 +; CHECK-NEXT: [[TMP20:%.*]] = bitcast i32* [[TMP19]] to <4 x i32>* +; CHECK-NEXT: call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> [[TMP17]], <4 x i32>* [[TMP20]], i32 4, <4 x i1> [[TMP1]]) +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 +; CHECK-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], %n.vec +; CHECK-NEXT: br i1 [[TMP21]], label [[MIDDLE_BLOCK:%.*]], label %vector.body + +entry: + br label %for.body + +for.body: ; preds = %entry, %for.body + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %val = load i32, i32* %src, align 4 + %arrayidx = getelementptr inbounds i32, i32* %dst, i64 %indvars.iv + store i32 %val, i32* %arrayidx, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond.not = icmp eq i64 %indvars.iv.next, %n + br i1 %exitcond.not, label %for.end, label %for.body + +for.end: ; preds = %for.body, %entry + ret void +} + +; The original loop had a conditional uniform load. In this case we actually +; do need to perform conditional loads and so we end up using a gather instead. +; However, we at least ensure the mask is the overlap of the loop predicate +; and the original condition. +define void @cond_uniform_load(i32* nocapture %dst, i32* nocapture readonly %src, i32* nocapture readonly %cond, i64 %n) #0 { +; CHECK-LABEL: @cond_uniform_load( +; CHECK: vector.ph: +; CHECK: [[TMP1:%.*]] = insertelement <4 x i32*> poison, i32* %src, i32 0 +; CHECK-NEXT: [[SRC_SPLAT:%.*]] = shufflevector <4 x i32*> [[TMP1]], <4 x i32*> poison, <4 x i32> zeroinitializer +; CHECK: vector.body: +; CHECK-NEXT: [[IDX:%.*]] = phi i64 [ 0, %vector.ph ], [ [[IDX_NEXT:%.*]], %vector.body ] +; CHECK: [[TMP1:%.*]] = insertelement <4 x i64> poison, i64 [[IDX]], i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i64> [[TMP1]], <4 x i64> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i64> [[TMP2]], +; CHECK: [[LOOP_PRED:%.*]] = icmp ule <4 x i64> [[INDUCTION]] +; CHECK: [[COND_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* {{%.*}}, i32 4, <4 x i1> [[LOOP_PRED]], <4 x i32> poison) +; CHECK-NEXT: [[TMP3:%.*]] = icmp eq <4 x i32> [[COND_LOAD]], zeroinitializer +; CHECK-NEXT: [[TMP4:%.*]] = xor <4 x i1> [[TMP3]], +; CHECK-NEXT: [[MASK:%.*]] = select <4 x i1> [[LOOP_PRED]], <4 x i1> [[TMP4]], <4 x i1> zeroinitializer +; CHECK-NEXT: call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> [[SRC_SPLAT]], i32 4, <4 x i1> [[MASK]], <4 x i32> undef) +entry: + br label %for.body + +for.body: ; preds = %entry, %if.end + %index = phi i64 [ %index.next, %if.end ], [ 0, %entry ] + %arrayidx = getelementptr inbounds i32, i32* %cond, i64 %index + %0 = load i32, i32* %arrayidx, align 4 + %tobool.not = icmp eq i32 %0, 0 + br i1 %tobool.not, label %if.end, label %if.then + +if.then: ; preds = %for.body + %1 = load i32, i32* %src, align 4 + br label %if.end + +if.end: ; preds = %if.then, %for.body + %val.0 = phi i32 [ %1, %if.then ], [ 0, %for.body ] + %arrayidx1 = getelementptr inbounds i32, i32* %dst, i64 %index + store i32 %val.0, i32* %arrayidx1, align 4 + %index.next = add nuw i64 %index, 1 + %exitcond.not = icmp eq i64 %index.next, %n + br i1 %exitcond.not, label %for.end, label %for.body + +for.end: ; preds = %for.inc, %entry + ret void +} + +attributes #0 = { "target-features"="+neon,+sve,+v8.1a" } diff --git a/llvm/test/Transforms/LoopVectorize/induction.ll b/llvm/test/Transforms/LoopVectorize/induction.ll index 6de7b0737347d..b14ac068c759b 100644 --- a/llvm/test/Transforms/LoopVectorize/induction.ll +++ b/llvm/test/Transforms/LoopVectorize/induction.ll @@ -851,8 +851,10 @@ for.end: } ; Ensure that the shuffle vector for first order recurrence is inserted -; correctly after all the phis. These new phis correspond to new IVs -; that are generated by optimizing non-free truncs of IVs to IVs themselves +; correctly after all the phis. These new phis correspond to new IVs +; that are generated by optimizing non-free truncs of IVs to IVs themselves. +; This also ensures the first-order recurrence splice recipe is placed +; correctly if it is fed by an induction. define i64 @trunc_with_first_order_recurrence() { ; CHECK-LABEL: trunc_with_first_order_recurrence ; CHECK-LABEL: vector.body: diff --git a/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization-scalable.ll b/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization-scalable.ll index c42bb332ab282..61c5f3824c11a 100644 --- a/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization-scalable.ll +++ b/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization-scalable.ll @@ -1,11 +1,12 @@ ; REQUIRES: asserts -; RUN: opt < %s -passes='loop-vectorize' -force-vector-width=2 -force-target-supports-scalable-vectors=true -enable-epilogue-vectorization -epilogue-vectorization-force-VF=2 --debug-only=loop-vectorize -S -scalable-vectorization=on 2>&1 | FileCheck %s +; RUN: opt < %s -passes='loop-vectorize' -force-target-supports-scalable-vectors=true -enable-epilogue-vectorization -epilogue-vectorization-force-VF=2 --debug-only=loop-vectorize -S -scalable-vectorization=on 2>&1 | FileCheck %s target datalayout = "e-m:e-i64:64-n32:64-v256:256:256-v512:512:512" ; Currently we cannot handle scalable vectorization factors. ; CHECK: LV: Checking a loop in "f1" -; CHECK: LEV: Epilogue vectorization for scalable vectors not yet supported. +; CHECK: LEV: Epilogue vectorization factor is forced. +; CHECK: Epilogue Loop VF:2, Epilogue Loop UF:1 define void @f1(i8* %A) { entry: diff --git a/llvm/test/Transforms/LoopVectorize/reduction-inloop-pred.ll b/llvm/test/Transforms/LoopVectorize/reduction-inloop-pred.ll index d82a4df564574..a99f6610e9186 100644 --- a/llvm/test/Transforms/LoopVectorize/reduction-inloop-pred.ll +++ b/llvm/test/Transforms/LoopVectorize/reduction-inloop-pred.ll @@ -1016,16 +1016,14 @@ define float @reduction_fadd(float* nocapture %A, float* nocapture %B) { ; CHECK: pred.load.continue6: ; CHECK-NEXT: [[TMP38:%.*]] = phi <4 x float> [ [[TMP28]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP34]], [[PRED_LOAD_IF5]] ] ; CHECK-NEXT: [[TMP39:%.*]] = phi <4 x float> [ [[TMP29]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP37]], [[PRED_LOAD_IF5]] ] -; CHECK-NEXT: [[TMP40:%.*]] = select <4 x i1> [[TMP0]], <4 x float> [[TMP38]], <4 x float> zeroinitializer -; CHECK-NEXT: [[TMP41:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> [[TMP40]]) -; CHECK-NEXT: [[TMP42:%.*]] = fadd float [[TMP41]], [[VEC_PHI]] -; CHECK-NEXT: [[TMP43:%.*]] = select <4 x i1> [[TMP0]], <4 x float> [[TMP39]], <4 x float> zeroinitializer -; CHECK-NEXT: [[TMP44:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> [[TMP43]]) -; CHECK-NEXT: [[TMP45]] = fadd float [[TMP44]], [[TMP42]] +; CHECK-NEXT: [[TMP40:%.*]] = select fast <4 x i1> [[TMP0]], <4 x float> [[TMP38]], <4 x float> zeroinitializer +; CHECK-NEXT: [[TMP41:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float [[VEC_PHI]], <4 x float> [[TMP40]]) +; CHECK-NEXT: [[TMP42:%.*]] = select fast <4 x i1> [[TMP0]], <4 x float> [[TMP39]], <4 x float> zeroinitializer +; CHECK-NEXT: [[TMP43:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float [[TMP41]], <4 x float> [[TMP42]]) ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], -; CHECK-NEXT: [[TMP46:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260 -; CHECK-NEXT: br i1 [[TMP46]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] +; CHECK-NEXT: [[TMP44:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260 +; CHECK-NEXT: br i1 [[TMP44]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: @@ -1033,7 +1031,7 @@ define float @reduction_fadd(float* nocapture %A, float* nocapture %B) { ; CHECK: for.body: ; CHECK-NEXT: br i1 undef, label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]] ; CHECK: for.end: -; CHECK-NEXT: [[RESULT_0_LCSSA:%.*]] = phi float [ undef, [[FOR_BODY]] ], [ [[TMP45]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[RESULT_0_LCSSA:%.*]] = phi float [ undef, [[FOR_BODY]] ], [ [[TMP43]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: ret float [[RESULT_0_LCSSA]] ; entry: @@ -1124,12 +1122,12 @@ define float @reduction_fmul(float* nocapture %A, float* nocapture %B) { ; CHECK: pred.load.continue6: ; CHECK-NEXT: [[TMP38:%.*]] = phi <4 x float> [ [[TMP28]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP34]], [[PRED_LOAD_IF5]] ] ; CHECK-NEXT: [[TMP39:%.*]] = phi <4 x float> [ [[TMP29]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP37]], [[PRED_LOAD_IF5]] ] -; CHECK-NEXT: [[TMP40:%.*]] = select <4 x i1> [[TMP0]], <4 x float> [[TMP38]], <4 x float> +; CHECK-NEXT: [[TMP40:%.*]] = select fast <4 x i1> [[TMP0]], <4 x float> [[TMP38]], <4 x float> ; CHECK-NEXT: [[TMP41:%.*]] = call fast float @llvm.vector.reduce.fmul.v4f32(float 1.000000e+00, <4 x float> [[TMP40]]) -; CHECK-NEXT: [[TMP42:%.*]] = fmul float [[TMP41]], [[VEC_PHI]] -; CHECK-NEXT: [[TMP43:%.*]] = select <4 x i1> [[TMP0]], <4 x float> [[TMP39]], <4 x float> +; CHECK-NEXT: [[TMP42:%.*]] = fmul fast float [[TMP41]], [[VEC_PHI]] +; CHECK-NEXT: [[TMP43:%.*]] = select fast <4 x i1> [[TMP0]], <4 x float> [[TMP39]], <4 x float> ; CHECK-NEXT: [[TMP44:%.*]] = call fast float @llvm.vector.reduce.fmul.v4f32(float 1.000000e+00, <4 x float> [[TMP43]]) -; CHECK-NEXT: [[TMP45]] = fmul float [[TMP44]], [[TMP42]] +; CHECK-NEXT: [[TMP45]] = fmul fast float [[TMP44]], [[TMP42]] ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], ; CHECK-NEXT: [[TMP46:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260 diff --git a/llvm/test/Transforms/LoopVectorize/reduction-inloop.ll b/llvm/test/Transforms/LoopVectorize/reduction-inloop.ll index 15c5f774a7711..76ef0faf8fd81 100644 --- a/llvm/test/Transforms/LoopVectorize/reduction-inloop.ll +++ b/llvm/test/Transforms/LoopVectorize/reduction-inloop.ll @@ -558,13 +558,11 @@ define float @reduction_fadd(float* nocapture %A, float* nocapture %B) { ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, float* [[B:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP3:%.*]] = bitcast float* [[TMP2]] to <4 x float>* ; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x float>, <4 x float>* [[TMP3]], align 4 -; CHECK-NEXT: [[TMP4:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> [[WIDE_LOAD]]) -; CHECK-NEXT: [[TMP5:%.*]] = fadd float [[TMP4]], [[VEC_PHI]] -; CHECK-NEXT: [[TMP6:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> [[WIDE_LOAD1]]) -; CHECK-NEXT: [[TMP7]] = fadd float [[TMP6]], [[TMP5]] +; CHECK-NEXT: [[TMP4:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float [[VEC_PHI]], <4 x float> [[WIDE_LOAD]]) +; CHECK-NEXT: [[TMP5:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float [[TMP4]], <4 x float> [[WIDE_LOAD1]]) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256 -; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] +; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256 +; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: @@ -572,7 +570,7 @@ define float @reduction_fadd(float* nocapture %A, float* nocapture %B) { ; CHECK: for.body: ; CHECK-NEXT: br i1 undef, label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP23:![0-9]+]] ; CHECK: for.end: -; CHECK-NEXT: [[RESULT_0_LCSSA:%.*]] = phi float [ undef, [[FOR_BODY]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[RESULT_0_LCSSA:%.*]] = phi float [ undef, [[FOR_BODY]] ], [ [[TMP5]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: ret float [[RESULT_0_LCSSA]] ; entry: @@ -613,9 +611,9 @@ define float @reduction_fmul(float* nocapture %A, float* nocapture %B) { ; CHECK-NEXT: [[TMP3:%.*]] = bitcast float* [[TMP2]] to <4 x float>* ; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x float>, <4 x float>* [[TMP3]], align 4 ; CHECK-NEXT: [[TMP4:%.*]] = call fast float @llvm.vector.reduce.fmul.v4f32(float 1.000000e+00, <4 x float> [[WIDE_LOAD]]) -; CHECK-NEXT: [[TMP5:%.*]] = fmul float [[TMP4]], [[VEC_PHI]] +; CHECK-NEXT: [[TMP5:%.*]] = fmul fast float [[TMP4]], [[VEC_PHI]] ; CHECK-NEXT: [[TMP6:%.*]] = call fast float @llvm.vector.reduce.fmul.v4f32(float 1.000000e+00, <4 x float> [[WIDE_LOAD1]]) -; CHECK-NEXT: [[TMP7]] = fmul float [[TMP6]], [[TMP5]] +; CHECK-NEXT: [[TMP7]] = fmul fast float [[TMP6]], [[TMP5]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256 ; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/reduction-small-size.ll b/llvm/test/Transforms/LoopVectorize/reduction-small-size.ll index 64aebe45e0cbf..7515fa70b6e3a 100644 --- a/llvm/test/Transforms/LoopVectorize/reduction-small-size.ll +++ b/llvm/test/Transforms/LoopVectorize/reduction-small-size.ll @@ -71,3 +71,37 @@ for.end: %tmp1 = phi i32 [ %r.next, %for.body ] ret i32 %tmp1 } + +define i32 @pr51794_signed_negative(i16 %iv.start, i32 %xor.start) { +; CHECK-LABEL: define {{.*}} @pr51794_signed_negative +; CHECK: [[XOR_START:%.+]] = insertelement <4 x i32> zeroinitializer, i32 %xor.start, i32 0 +; CHECK-LABEL: vector.body: +; CHECK: [[XOR_RED:%.+]] = phi <4 x i32> [ [[XOR_START]], %vector.ph ], [ [[XOR_SEXT:%.+]], %vector.body ] +; CHECK: [[AND:%.+]] = and <4 x i32> [[XOR_RED]], +; CHECK-NEXT: [[XOR:%.+]] = xor <4 x i32> [[AND]], +; CHECK: [[XOR_TRUNC:%.+]] = trunc <4 x i32> [[XOR]] to <4 x i1> +; CHECK-NEXT: [[XOR_SEXT]] = sext <4 x i1> [[XOR_TRUNC]] to <4 x i32> +; +; CHECK-LABEL: middle.block: +; CHECK-NEXT: [[RES_TRUNC:%.+]] = trunc <4 x i32> [[XOR_SEXT]] to <4 x i1> +; CHECK-NEXT: [[RES_RED:%.+]] = call i1 @llvm.vector.reduce.xor.v4i1(<4 x i1> [[RES_TRUNC]]) +; CHECK-NEXT: sext i1 [[RES_RED]] to i32 +; +entry: + br label %loop + +loop: + %xor.red = phi i32 [ %xor.start, %entry ], [ %xor, %loop ] + %iv = phi i16 [ %iv.start, %entry ], [ %iv.next, %loop ] + %iv.next = add i16 %iv, -1 + %and = and i32 %xor.red, 1 + %xor = xor i32 %and, -1 + %tobool.not = icmp eq i16 %iv.next, 0 + br i1 %tobool.not, label %exit, label %loop + +exit: + %xor.lcssa = phi i32 [ %xor, %loop ] + ret i32 %xor.lcssa +} + + diff --git a/llvm/test/Transforms/LoopVectorize/scalable-inductions.ll b/llvm/test/Transforms/LoopVectorize/scalable-inductions.ll index fd37d8659bef6..c8b23fba825bb 100644 --- a/llvm/test/Transforms/LoopVectorize/scalable-inductions.ll +++ b/llvm/test/Transforms/LoopVectorize/scalable-inductions.ll @@ -146,7 +146,7 @@ define void @add_unique_indf32(float* noalias nocapture %a, i64 %n) { ; CHECK-NEXT: %[[INDINIT:.*]] = fadd %[[TMP2]], shufflevector ( insertelement ( poison, float 0.000000e+00, i32 0), poison, zeroinitializer) ; CHECK-NEXT: %[[VSCALE:.*]] = call i32 @llvm.vscale.i32() ; CHECK-NEXT: %[[TMP3:.*]] = shl i32 %8, 2 -; CHECK-NEXT: %[[TMP4:.*]] = sitofp i32 %[[TMP3]] to float +; CHECK-NEXT: %[[TMP4:.*]] = uitofp i32 %[[TMP3]] to float ; CHECK-NEXT: %[[INC:.*]] = fmul float %[[TMP4]], 2.000000e+00 ; CHECK-NEXT: %[[TMP5:.*]] = insertelement poison, float %[[INC]], i32 0 ; CHECK-NEXT: %[[VECINC:.*]] = shufflevector %[[TMP5]], poison, zeroinitializer diff --git a/llvm/test/Transforms/OpenMP/always_inline_device.ll b/llvm/test/Transforms/OpenMP/always_inline_device.ll index 7ff4541a740c3..a1f7a7ab694ba 100644 --- a/llvm/test/Transforms/OpenMP/always_inline_device.ll +++ b/llvm/test/Transforms/OpenMP/always_inline_device.ll @@ -6,6 +6,7 @@ @1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @0, i32 0, i32 0) }, align 8 @__omp_offloading_fd02_c0934fc2_foo_l4_exec_mode = weak constant i8 1 @llvm.compiler.used = appending global [1 x i8*] [i8* @__omp_offloading_fd02_c0934fc2_foo_l4_exec_mode], section "llvm.metadata" +@G = external global i8 ; Function Attrs: convergent norecurse nounwind define weak void @__omp_offloading_fd02_c0934fc2_foo_l4() #0 { @@ -16,6 +17,7 @@ define weak void @__omp_offloading_fd02_c0934fc2_foo_l4() #0 { ; CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 ; CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] ; CHECK: user_code.entry: +; CHECK-NEXT: store i8 0, i8* @G, align 1 ; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) ; CHECK-NEXT: ret void ; CHECK: worker.exit: @@ -27,6 +29,12 @@ entry: br i1 %exec_user_code, label %user_code.entry, label %worker.exit user_code.entry: ; preds = %entry + ; Ensure we see a 0 here as the kernel doesn't have parallel regions and we want + ; generic execution. + ; TODO: This is not perfect. We should rather go for SPMD mode and tell the runtime + ; to only spawn a single thread. Further, we then should not guard any code. + %isSPMD = call i8 @__kmpc_is_spmd_exec_mode() + store i8 %isSPMD, i8* @G call void @bar() #2 call void @__kmpc_target_deinit(%struct.ident_t* @1, i8 1, i1 true) ret void @@ -35,6 +43,8 @@ worker.exit: ; preds = %entry ret void } +declare i8 @__kmpc_is_spmd_exec_mode() + declare i32 @__kmpc_target_init(%struct.ident_t*, i8, i1, i1) declare void @__kmpc_target_deinit(%struct.ident_t*, i8, i1) diff --git a/llvm/test/Transforms/OpenMP/custom_state_machines.ll b/llvm/test/Transforms/OpenMP/custom_state_machines.ll index d2c2985902207..dfd38bcd33939 100644 --- a/llvm/test/Transforms/OpenMP/custom_state_machines.ll +++ b/llvm/test/Transforms/OpenMP/custom_state_machines.ll @@ -915,7 +915,7 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 ; AMDGPU-NEXT: br i1 [[THREAD_IS_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] ; AMDGPU: worker_state_machine.begin: -; AMDGPU-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) +; AMDGPU-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) ; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast i8* addrspace(5)* [[WORKER_WORK_FN_ADDR]] to i8** ; AMDGPU-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR_GENERIC]]) ; AMDGPU-NEXT: [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR_GENERIC]], align 8 @@ -943,7 +943,7 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-NEXT: call void @__kmpc_kernel_end_parallel() ; AMDGPU-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] ; AMDGPU: worker_state_machine.done.barrier: -; AMDGPU-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) +; AMDGPU-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) ; AMDGPU-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] ; AMDGPU: thread.user_code.check: ; AMDGPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 @@ -1040,7 +1040,7 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 ; AMDGPU-NEXT: br i1 [[THREAD_IS_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] ; AMDGPU: worker_state_machine.begin: -; AMDGPU-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) +; AMDGPU-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) ; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast i8* addrspace(5)* [[WORKER_WORK_FN_ADDR]] to i8** ; AMDGPU-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR_GENERIC]]) ; AMDGPU-NEXT: [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR_GENERIC]], align 8 @@ -1074,7 +1074,7 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-NEXT: call void @__kmpc_kernel_end_parallel() ; AMDGPU-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] ; AMDGPU: worker_state_machine.done.barrier: -; AMDGPU-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) +; AMDGPU-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) ; AMDGPU-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] ; AMDGPU: thread.user_code.check: ; AMDGPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 @@ -1188,7 +1188,7 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 ; AMDGPU-NEXT: br i1 [[THREAD_IS_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] ; AMDGPU: worker_state_machine.begin: -; AMDGPU-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) +; AMDGPU-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) ; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast i8* addrspace(5)* [[WORKER_WORK_FN_ADDR]] to i8** ; AMDGPU-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR_GENERIC]]) ; AMDGPU-NEXT: [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR_GENERIC]], align 8 @@ -1218,7 +1218,7 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-NEXT: call void @__kmpc_kernel_end_parallel() ; AMDGPU-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] ; AMDGPU: worker_state_machine.done.barrier: -; AMDGPU-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) +; AMDGPU-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) ; AMDGPU-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] ; AMDGPU: thread.user_code.check: ; AMDGPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 @@ -1314,7 +1314,7 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 ; AMDGPU-NEXT: br i1 [[THREAD_IS_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] ; AMDGPU: worker_state_machine.begin: -; AMDGPU-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) +; AMDGPU-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) ; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast i8* addrspace(5)* [[WORKER_WORK_FN_ADDR]] to i8** ; AMDGPU-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR_GENERIC]]) ; AMDGPU-NEXT: [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR_GENERIC]], align 8 @@ -1342,7 +1342,7 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-NEXT: call void @__kmpc_kernel_end_parallel() ; AMDGPU-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] ; AMDGPU: worker_state_machine.done.barrier: -; AMDGPU-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) +; AMDGPU-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) ; AMDGPU-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] ; AMDGPU: thread.user_code.check: ; AMDGPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 @@ -1438,7 +1438,7 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 ; AMDGPU-NEXT: br i1 [[THREAD_IS_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] ; AMDGPU: worker_state_machine.begin: -; AMDGPU-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) +; AMDGPU-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) ; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast i8* addrspace(5)* [[WORKER_WORK_FN_ADDR]] to i8** ; AMDGPU-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR_GENERIC]]) ; AMDGPU-NEXT: [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR_GENERIC]], align 8 @@ -1466,7 +1466,7 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-NEXT: call void @__kmpc_kernel_end_parallel() ; AMDGPU-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] ; AMDGPU: worker_state_machine.done.barrier: -; AMDGPU-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) +; AMDGPU-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) ; AMDGPU-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] ; AMDGPU: thread.user_code.check: ; AMDGPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 @@ -1562,7 +1562,7 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 ; AMDGPU-NEXT: br i1 [[THREAD_IS_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] ; AMDGPU: worker_state_machine.begin: -; AMDGPU-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) +; AMDGPU-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) ; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast i8* addrspace(5)* [[WORKER_WORK_FN_ADDR]] to i8** ; AMDGPU-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR_GENERIC]]) ; AMDGPU-NEXT: [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR_GENERIC]], align 8 @@ -1586,7 +1586,7 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-NEXT: call void @__kmpc_kernel_end_parallel() ; AMDGPU-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] ; AMDGPU: worker_state_machine.done.barrier: -; AMDGPU-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) +; AMDGPU-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) ; AMDGPU-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] ; AMDGPU: thread.user_code.check: ; AMDGPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 @@ -1662,7 +1662,7 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 ; AMDGPU-NEXT: br i1 [[THREAD_IS_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] ; AMDGPU: worker_state_machine.begin: -; AMDGPU-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) +; AMDGPU-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) ; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast i8* addrspace(5)* [[WORKER_WORK_FN_ADDR]] to i8** ; AMDGPU-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR_GENERIC]]) ; AMDGPU-NEXT: [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR_GENERIC]], align 8 @@ -1680,7 +1680,7 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-NEXT: call void @__kmpc_kernel_end_parallel() ; AMDGPU-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] ; AMDGPU: worker_state_machine.done.barrier: -; AMDGPU-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) +; AMDGPU-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) ; AMDGPU-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] ; AMDGPU: thread.user_code.check: ; AMDGPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 @@ -1885,7 +1885,7 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 ; NVPTX-NEXT: br i1 [[THREAD_IS_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] ; NVPTX: worker_state_machine.begin: -; NVPTX-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) +; NVPTX-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) ; NVPTX-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR]]) ; NVPTX-NEXT: [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR]], align 8 ; NVPTX-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast i8* [[WORKER_WORK_FN]] to void (i16, i32)* @@ -1912,7 +1912,7 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-NEXT: call void @__kmpc_kernel_end_parallel() ; NVPTX-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] ; NVPTX: worker_state_machine.done.barrier: -; NVPTX-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) +; NVPTX-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) ; NVPTX-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] ; NVPTX: thread.user_code.check: ; NVPTX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 @@ -2009,7 +2009,7 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 ; NVPTX-NEXT: br i1 [[THREAD_IS_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] ; NVPTX: worker_state_machine.begin: -; NVPTX-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) +; NVPTX-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) ; NVPTX-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR]]) ; NVPTX-NEXT: [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR]], align 8 ; NVPTX-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast i8* [[WORKER_WORK_FN]] to void (i16, i32)* @@ -2042,7 +2042,7 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-NEXT: call void @__kmpc_kernel_end_parallel() ; NVPTX-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] ; NVPTX: worker_state_machine.done.barrier: -; NVPTX-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) +; NVPTX-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) ; NVPTX-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] ; NVPTX: thread.user_code.check: ; NVPTX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 @@ -2156,7 +2156,7 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 ; NVPTX-NEXT: br i1 [[THREAD_IS_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] ; NVPTX: worker_state_machine.begin: -; NVPTX-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) +; NVPTX-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) ; NVPTX-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR]]) ; NVPTX-NEXT: [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR]], align 8 ; NVPTX-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast i8* [[WORKER_WORK_FN]] to void (i16, i32)* @@ -2185,7 +2185,7 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-NEXT: call void @__kmpc_kernel_end_parallel() ; NVPTX-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] ; NVPTX: worker_state_machine.done.barrier: -; NVPTX-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) +; NVPTX-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) ; NVPTX-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] ; NVPTX: thread.user_code.check: ; NVPTX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 @@ -2281,7 +2281,7 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 ; NVPTX-NEXT: br i1 [[THREAD_IS_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] ; NVPTX: worker_state_machine.begin: -; NVPTX-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) +; NVPTX-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) ; NVPTX-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR]]) ; NVPTX-NEXT: [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR]], align 8 ; NVPTX-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast i8* [[WORKER_WORK_FN]] to void (i16, i32)* @@ -2308,7 +2308,7 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-NEXT: call void @__kmpc_kernel_end_parallel() ; NVPTX-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] ; NVPTX: worker_state_machine.done.barrier: -; NVPTX-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) +; NVPTX-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) ; NVPTX-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] ; NVPTX: thread.user_code.check: ; NVPTX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 @@ -2404,7 +2404,7 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 ; NVPTX-NEXT: br i1 [[THREAD_IS_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] ; NVPTX: worker_state_machine.begin: -; NVPTX-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) +; NVPTX-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) ; NVPTX-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR]]) ; NVPTX-NEXT: [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR]], align 8 ; NVPTX-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast i8* [[WORKER_WORK_FN]] to void (i16, i32)* @@ -2431,7 +2431,7 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-NEXT: call void @__kmpc_kernel_end_parallel() ; NVPTX-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] ; NVPTX: worker_state_machine.done.barrier: -; NVPTX-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) +; NVPTX-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) ; NVPTX-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] ; NVPTX: thread.user_code.check: ; NVPTX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 @@ -2527,7 +2527,7 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 ; NVPTX-NEXT: br i1 [[THREAD_IS_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] ; NVPTX: worker_state_machine.begin: -; NVPTX-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) +; NVPTX-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) ; NVPTX-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR]]) ; NVPTX-NEXT: [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR]], align 8 ; NVPTX-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast i8* [[WORKER_WORK_FN]] to void (i16, i32)* @@ -2550,7 +2550,7 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-NEXT: call void @__kmpc_kernel_end_parallel() ; NVPTX-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] ; NVPTX: worker_state_machine.done.barrier: -; NVPTX-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) +; NVPTX-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) ; NVPTX-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] ; NVPTX: thread.user_code.check: ; NVPTX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 @@ -2626,7 +2626,7 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 ; NVPTX-NEXT: br i1 [[THREAD_IS_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] ; NVPTX: worker_state_machine.begin: -; NVPTX-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) +; NVPTX-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) ; NVPTX-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR]]) ; NVPTX-NEXT: [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR]], align 8 ; NVPTX-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast i8* [[WORKER_WORK_FN]] to void (i16, i32)* @@ -2643,7 +2643,7 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-NEXT: call void @__kmpc_kernel_end_parallel() ; NVPTX-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] ; NVPTX: worker_state_machine.done.barrier: -; NVPTX-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) +; NVPTX-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) ; NVPTX-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] ; NVPTX: thread.user_code.check: ; NVPTX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 diff --git a/llvm/test/Transforms/OpenMP/get_hardware_num_threads_in_block_fold.ll b/llvm/test/Transforms/OpenMP/get_hardware_num_threads_in_block_fold.ll index a817c384719a6..602d4f0be5779 100644 --- a/llvm/test/Transforms/OpenMP/get_hardware_num_threads_in_block_fold.ll +++ b/llvm/test/Transforms/OpenMP/get_hardware_num_threads_in_block_fold.ll @@ -8,9 +8,9 @@ target triple = "nvptx64" @G = external global i32 ;. -; CHECK: @[[KERNEL0_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1 +; CHECK: @[[KERNEL0_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 3 ; CHECK: @[[G:[a-zA-Z0-9_$"\\.-]+]] = external global i32 -; CHECK: @[[KERNEL1_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1 +; CHECK: @[[KERNEL1_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 3 ; CHECK: @[[KERNEL2_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 3 ; CHECK: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c" ; CHECK: @[[GLOB1:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @[[GLOB0]], i32 0, i32 0) }, align 8 @@ -25,11 +25,11 @@ define weak void @kernel0() #0 { ; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* null, i8 2, i1 false) ; CHECK-NEXT: ret void ; - %i = call i32 @__kmpc_target_init(%struct.ident_t* null, i8 2, i1 false, i1 false) + %i = call i32 @__kmpc_target_init(%struct.ident_t* null, i8 1, i1 false, i1 false) call void @helper0() call void @helper1() call void @helper2() - call void @__kmpc_target_deinit(%struct.ident_t* null, i8 2, i1 false) + call void @__kmpc_target_deinit(%struct.ident_t* null, i8 1, i1 false) ret void } @@ -43,9 +43,9 @@ define weak void @kernel1() #0 { ; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* null, i8 2, i1 false) ; CHECK-NEXT: ret void ; - %i = call i32 @__kmpc_target_init(%struct.ident_t* null, i8 2, i1 false, i1 false) + %i = call i32 @__kmpc_target_init(%struct.ident_t* null, i8 1, i1 false, i1 false) call void @helper1() - call void @__kmpc_target_deinit(%struct.ident_t* null, i8 2, i1 false) + call void @__kmpc_target_deinit(%struct.ident_t* null, i8 1, i1 false) ret void } diff --git a/llvm/test/Transforms/OpenMP/replace_globalization.ll b/llvm/test/Transforms/OpenMP/replace_globalization.ll index 1c043b317fc8f..d1a04de1f2f62 100644 --- a/llvm/test/Transforms/OpenMP/replace_globalization.ll +++ b/llvm/test/Transforms/OpenMP/replace_globalization.ll @@ -125,10 +125,10 @@ declare void @unknown_no_openmp() "llvm.assume"="omp_no_openmp" ; CHECK-LABEL: define {{[^@]+}}@foo() { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[C:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false, i1 true) -; CHECK-NEXT: [[X:%.*]] = call i8* @__kmpc_alloc_shared(i64 4) #[[ATTR4:[0-9]+]] +; CHECK-NEXT: [[X:%.*]] = call i8* @__kmpc_alloc_shared(i64 4) #[[ATTR5:[0-9]+]] ; CHECK-NEXT: call void @unknown_no_openmp() -; CHECK-NEXT: call void @use.internalized(i8* nofree writeonly [[X]]) #[[ATTR5:[0-9]+]] -; CHECK-NEXT: call void @__kmpc_free_shared(i8* [[X]], i64 4) #[[ATTR4]] +; CHECK-NEXT: call void @use.internalized(i8* nofree writeonly [[X]]) #[[ATTR6:[0-9]+]] +; CHECK-NEXT: call void @__kmpc_free_shared(i8* [[X]], i64 4) #[[ATTR5]] ; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) ; CHECK-NEXT: ret void ; @@ -139,13 +139,13 @@ declare void @unknown_no_openmp() "llvm.assume"="omp_no_openmp" ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[C]], -1 ; CHECK-NEXT: br i1 [[CMP]], label [[MASTER1:%.*]], label [[EXIT:%.*]] ; CHECK: master1: -; CHECK-NEXT: call void @use.internalized(i8* nofree addrspacecast (i8 addrspace(3)* getelementptr inbounds ([16 x i8], [16 x i8] addrspace(3)* @x, i32 0, i32 0) to i8*)) #[[ATTR5]] +; CHECK-NEXT: call void @use.internalized(i8* nofree addrspacecast (i8 addrspace(3)* getelementptr inbounds ([16 x i8], [16 x i8] addrspace(3)* @x, i32 0, i32 0) to i8*)) #[[ATTR6]] ; CHECK-NEXT: br label [[NEXT:%.*]] ; CHECK: next: ; CHECK-NEXT: call void @unknown_no_openmp() ; CHECK-NEXT: br label [[MASTER2:%.*]] ; CHECK: master2: -; CHECK-NEXT: call void @use.internalized(i8* nofree addrspacecast (i8 addrspace(3)* getelementptr inbounds ([4 x i8], [4 x i8] addrspace(3)* @y, i32 0, i32 0) to i8*)) #[[ATTR5]] +; CHECK-NEXT: call void @use.internalized(i8* nofree addrspacecast (i8 addrspace(3)* getelementptr inbounds ([4 x i8], [4 x i8] addrspace(3)* @y, i32 0, i32 0) to i8*)) #[[ATTR6]] ; CHECK-NEXT: br label [[EXIT]] ; CHECK: exit: ; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) @@ -158,9 +158,9 @@ declare void @unknown_no_openmp() "llvm.assume"="omp_no_openmp" ; CHECK-NEXT: [[C0:%.*]] = icmp eq i32 [[C]], -1 ; CHECK-NEXT: br i1 [[C0]], label [[MASTER3:%.*]], label [[EXIT:%.*]] ; CHECK: master3: -; CHECK-NEXT: [[Z:%.*]] = call i8* @__kmpc_alloc_shared(i64 24) #[[ATTR4]], !dbg [[DBG9:![0-9]+]] -; CHECK-NEXT: call void @use.internalized(i8* nofree [[Z]]) #[[ATTR5]] -; CHECK-NEXT: call void @__kmpc_free_shared(i8* [[Z]], i64 24) #[[ATTR4]] +; CHECK-NEXT: [[Z:%.*]] = call i8* @__kmpc_alloc_shared(i64 24) #[[ATTR5]], !dbg [[DBG9:![0-9]+]] +; CHECK-NEXT: call void @use.internalized(i8* nofree [[Z]]) #[[ATTR6]] +; CHECK-NEXT: call void @__kmpc_free_shared(i8* [[Z]], i64 24) #[[ATTR5]] ; CHECK-NEXT: br label [[EXIT]] ; CHECK: exit: ; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 true) @@ -184,10 +184,11 @@ declare void @unknown_no_openmp() "llvm.assume"="omp_no_openmp" ;. ; CHECK: attributes #[[ATTR0]] = { nofree nosync nounwind willreturn writeonly } ; CHECK: attributes #[[ATTR1:[0-9]+]] = { nosync nounwind } -; CHECK: attributes #[[ATTR2:[0-9]+]] = { nounwind readnone } -; CHECK: attributes #[[ATTR3:[0-9]+]] = { "llvm.assume"="omp_no_openmp" } -; CHECK: attributes #[[ATTR4]] = { nounwind } -; CHECK: attributes #[[ATTR5]] = { nounwind writeonly } +; CHECK: attributes #[[ATTR2:[0-9]+]] = { nounwind readnone speculatable } +; CHECK: attributes #[[ATTR3:[0-9]+]] = { nofree nosync nounwind readnone speculatable willreturn } +; CHECK: attributes #[[ATTR4:[0-9]+]] = { "llvm.assume"="omp_no_openmp" } +; CHECK: attributes #[[ATTR5]] = { nounwind } +; CHECK: attributes #[[ATTR6]] = { nounwind writeonly } ;. ; CHECK: [[META0:![0-9]+]] = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 12.0.0", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, splitDebugInlining: false, nameTableKind: None) ; CHECK: [[META1:![0-9]+]] = !DIFile(filename: "replace_globalization.c", directory: "/tmp/replace_globalization.c") diff --git a/llvm/test/Transforms/OpenMP/spmdization.ll b/llvm/test/Transforms/OpenMP/spmdization.ll index baf61407e700c..ad3dc488dc147 100644 --- a/llvm/test/Transforms/OpenMP/spmdization.ll +++ b/llvm/test/Transforms/OpenMP/spmdization.ll @@ -187,7 +187,7 @@ define weak void @__omp_offloading_14_a34ca11_sequential_loop_l5() #0 { ; AMDGPU-DISABLED-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 ; AMDGPU-DISABLED-NEXT: br i1 [[THREAD_IS_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] ; AMDGPU-DISABLED: worker_state_machine.begin: -; AMDGPU-DISABLED-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) +; AMDGPU-DISABLED-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) ; AMDGPU-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast i8* addrspace(5)* [[WORKER_WORK_FN_ADDR]] to i8** ; AMDGPU-DISABLED-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR_GENERIC]]) ; AMDGPU-DISABLED-NEXT: [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR_GENERIC]], align 8 @@ -211,7 +211,7 @@ define weak void @__omp_offloading_14_a34ca11_sequential_loop_l5() #0 { ; AMDGPU-DISABLED-NEXT: call void @__kmpc_kernel_end_parallel() ; AMDGPU-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] ; AMDGPU-DISABLED: worker_state_machine.done.barrier: -; AMDGPU-DISABLED-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) +; AMDGPU-DISABLED-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) ; AMDGPU-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] ; AMDGPU-DISABLED: thread.user_code.check: ; AMDGPU-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 @@ -236,7 +236,7 @@ define weak void @__omp_offloading_14_a34ca11_sequential_loop_l5() #0 { ; NVPTX-DISABLED-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 ; NVPTX-DISABLED-NEXT: br i1 [[THREAD_IS_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] ; NVPTX-DISABLED: worker_state_machine.begin: -; NVPTX-DISABLED-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) +; NVPTX-DISABLED-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) ; NVPTX-DISABLED-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR]]) ; NVPTX-DISABLED-NEXT: [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR]], align 8 ; NVPTX-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast i8* [[WORKER_WORK_FN]] to void (i16, i32)* @@ -259,7 +259,7 @@ define weak void @__omp_offloading_14_a34ca11_sequential_loop_l5() #0 { ; NVPTX-DISABLED-NEXT: call void @__kmpc_kernel_end_parallel() ; NVPTX-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] ; NVPTX-DISABLED: worker_state_machine.done.barrier: -; NVPTX-DISABLED-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) +; NVPTX-DISABLED-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) ; NVPTX-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] ; NVPTX-DISABLED: thread.user_code.check: ; NVPTX-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 @@ -618,7 +618,7 @@ define weak void @__omp_offloading_14_a34ca11_sequential_loop_to_stack_var_l20() ; AMDGPU-DISABLED-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 ; AMDGPU-DISABLED-NEXT: br i1 [[THREAD_IS_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] ; AMDGPU-DISABLED: worker_state_machine.begin: -; AMDGPU-DISABLED-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) +; AMDGPU-DISABLED-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) ; AMDGPU-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast i8* addrspace(5)* [[WORKER_WORK_FN_ADDR]] to i8** ; AMDGPU-DISABLED-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR_GENERIC]]) ; AMDGPU-DISABLED-NEXT: [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR_GENERIC]], align 8 @@ -642,7 +642,7 @@ define weak void @__omp_offloading_14_a34ca11_sequential_loop_to_stack_var_l20() ; AMDGPU-DISABLED-NEXT: call void @__kmpc_kernel_end_parallel() ; AMDGPU-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] ; AMDGPU-DISABLED: worker_state_machine.done.barrier: -; AMDGPU-DISABLED-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) +; AMDGPU-DISABLED-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) ; AMDGPU-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] ; AMDGPU-DISABLED: thread.user_code.check: ; AMDGPU-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 @@ -667,7 +667,7 @@ define weak void @__omp_offloading_14_a34ca11_sequential_loop_to_stack_var_l20() ; NVPTX-DISABLED-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 ; NVPTX-DISABLED-NEXT: br i1 [[THREAD_IS_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] ; NVPTX-DISABLED: worker_state_machine.begin: -; NVPTX-DISABLED-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) +; NVPTX-DISABLED-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) ; NVPTX-DISABLED-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR]]) ; NVPTX-DISABLED-NEXT: [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR]], align 8 ; NVPTX-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast i8* [[WORKER_WORK_FN]] to void (i16, i32)* @@ -690,7 +690,7 @@ define weak void @__omp_offloading_14_a34ca11_sequential_loop_to_stack_var_l20() ; NVPTX-DISABLED-NEXT: call void @__kmpc_kernel_end_parallel() ; NVPTX-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] ; NVPTX-DISABLED: worker_state_machine.done.barrier: -; NVPTX-DISABLED-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) +; NVPTX-DISABLED-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) ; NVPTX-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] ; NVPTX-DISABLED: thread.user_code.check: ; NVPTX-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 @@ -1055,7 +1055,7 @@ define weak void @__omp_offloading_14_a34ca11_sequential_loop_to_shared_var_l35( ; AMDGPU-DISABLED-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 ; AMDGPU-DISABLED-NEXT: br i1 [[THREAD_IS_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] ; AMDGPU-DISABLED: worker_state_machine.begin: -; AMDGPU-DISABLED-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) +; AMDGPU-DISABLED-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) ; AMDGPU-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast i8* addrspace(5)* [[WORKER_WORK_FN_ADDR]] to i8** ; AMDGPU-DISABLED-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR_GENERIC]]) ; AMDGPU-DISABLED-NEXT: [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR_GENERIC]], align 8 @@ -1079,7 +1079,7 @@ define weak void @__omp_offloading_14_a34ca11_sequential_loop_to_shared_var_l35( ; AMDGPU-DISABLED-NEXT: call void @__kmpc_kernel_end_parallel() ; AMDGPU-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] ; AMDGPU-DISABLED: worker_state_machine.done.barrier: -; AMDGPU-DISABLED-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) +; AMDGPU-DISABLED-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) ; AMDGPU-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] ; AMDGPU-DISABLED: thread.user_code.check: ; AMDGPU-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 @@ -1104,7 +1104,7 @@ define weak void @__omp_offloading_14_a34ca11_sequential_loop_to_shared_var_l35( ; NVPTX-DISABLED-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 ; NVPTX-DISABLED-NEXT: br i1 [[THREAD_IS_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] ; NVPTX-DISABLED: worker_state_machine.begin: -; NVPTX-DISABLED-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) +; NVPTX-DISABLED-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) ; NVPTX-DISABLED-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR]]) ; NVPTX-DISABLED-NEXT: [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR]], align 8 ; NVPTX-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast i8* [[WORKER_WORK_FN]] to void (i16, i32)* @@ -1127,7 +1127,7 @@ define weak void @__omp_offloading_14_a34ca11_sequential_loop_to_shared_var_l35( ; NVPTX-DISABLED-NEXT: call void @__kmpc_kernel_end_parallel() ; NVPTX-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] ; NVPTX-DISABLED: worker_state_machine.done.barrier: -; NVPTX-DISABLED-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) +; NVPTX-DISABLED-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) ; NVPTX-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] ; NVPTX-DISABLED: thread.user_code.check: ; NVPTX-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 @@ -1532,7 +1532,7 @@ define weak void @__omp_offloading_14_a34ca11_sequential_loop_to_shared_var_guar ; AMDGPU-DISABLED-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 ; AMDGPU-DISABLED-NEXT: br i1 [[THREAD_IS_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] ; AMDGPU-DISABLED: worker_state_machine.begin: -; AMDGPU-DISABLED-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) +; AMDGPU-DISABLED-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) ; AMDGPU-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast i8* addrspace(5)* [[WORKER_WORK_FN_ADDR]] to i8** ; AMDGPU-DISABLED-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR_GENERIC]]) ; AMDGPU-DISABLED-NEXT: [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR_GENERIC]], align 8 @@ -1556,7 +1556,7 @@ define weak void @__omp_offloading_14_a34ca11_sequential_loop_to_shared_var_guar ; AMDGPU-DISABLED-NEXT: call void @__kmpc_kernel_end_parallel() ; AMDGPU-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] ; AMDGPU-DISABLED: worker_state_machine.done.barrier: -; AMDGPU-DISABLED-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) +; AMDGPU-DISABLED-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) ; AMDGPU-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] ; AMDGPU-DISABLED: thread.user_code.check: ; AMDGPU-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 @@ -1581,7 +1581,7 @@ define weak void @__omp_offloading_14_a34ca11_sequential_loop_to_shared_var_guar ; NVPTX-DISABLED-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 ; NVPTX-DISABLED-NEXT: br i1 [[THREAD_IS_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] ; NVPTX-DISABLED: worker_state_machine.begin: -; NVPTX-DISABLED-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) +; NVPTX-DISABLED-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) ; NVPTX-DISABLED-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR]]) ; NVPTX-DISABLED-NEXT: [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR]], align 8 ; NVPTX-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast i8* [[WORKER_WORK_FN]] to void (i16, i32)* @@ -1604,7 +1604,7 @@ define weak void @__omp_offloading_14_a34ca11_sequential_loop_to_shared_var_guar ; NVPTX-DISABLED-NEXT: call void @__kmpc_kernel_end_parallel() ; NVPTX-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] ; NVPTX-DISABLED: worker_state_machine.done.barrier: -; NVPTX-DISABLED-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) +; NVPTX-DISABLED-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) ; NVPTX-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] ; NVPTX-DISABLED: thread.user_code.check: ; NVPTX-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 @@ -2009,7 +2009,7 @@ define weak void @__omp_offloading_14_a34ca11_do_not_spmdize_target_l65() #0 { ; AMDGPU-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 ; AMDGPU-NEXT: br i1 [[THREAD_IS_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] ; AMDGPU: worker_state_machine.begin: -; AMDGPU-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) +; AMDGPU-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) ; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast i8* addrspace(5)* [[WORKER_WORK_FN_ADDR]] to i8** ; AMDGPU-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR_GENERIC]]) ; AMDGPU-NEXT: [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR_GENERIC]], align 8 @@ -2027,7 +2027,7 @@ define weak void @__omp_offloading_14_a34ca11_do_not_spmdize_target_l65() #0 { ; AMDGPU-NEXT: call void @__kmpc_kernel_end_parallel() ; AMDGPU-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] ; AMDGPU: worker_state_machine.done.barrier: -; AMDGPU-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) +; AMDGPU-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) ; AMDGPU-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] ; AMDGPU: thread.user_code.check: ; AMDGPU-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 @@ -2050,7 +2050,7 @@ define weak void @__omp_offloading_14_a34ca11_do_not_spmdize_target_l65() #0 { ; NVPTX-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 ; NVPTX-NEXT: br i1 [[THREAD_IS_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] ; NVPTX: worker_state_machine.begin: -; NVPTX-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) +; NVPTX-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) ; NVPTX-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR]]) ; NVPTX-NEXT: [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR]], align 8 ; NVPTX-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast i8* [[WORKER_WORK_FN]] to void (i16, i32)* @@ -2067,7 +2067,7 @@ define weak void @__omp_offloading_14_a34ca11_do_not_spmdize_target_l65() #0 { ; NVPTX-NEXT: call void @__kmpc_kernel_end_parallel() ; NVPTX-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] ; NVPTX: worker_state_machine.done.barrier: -; NVPTX-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) +; NVPTX-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) ; NVPTX-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] ; NVPTX: thread.user_code.check: ; NVPTX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 @@ -2091,7 +2091,7 @@ define weak void @__omp_offloading_14_a34ca11_do_not_spmdize_target_l65() #0 { ; AMDGPU-DISABLED-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 ; AMDGPU-DISABLED-NEXT: br i1 [[THREAD_IS_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] ; AMDGPU-DISABLED: worker_state_machine.begin: -; AMDGPU-DISABLED-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) +; AMDGPU-DISABLED-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) ; AMDGPU-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast i8* addrspace(5)* [[WORKER_WORK_FN_ADDR]] to i8** ; AMDGPU-DISABLED-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR_GENERIC]]) ; AMDGPU-DISABLED-NEXT: [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR_GENERIC]], align 8 @@ -2109,7 +2109,7 @@ define weak void @__omp_offloading_14_a34ca11_do_not_spmdize_target_l65() #0 { ; AMDGPU-DISABLED-NEXT: call void @__kmpc_kernel_end_parallel() ; AMDGPU-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] ; AMDGPU-DISABLED: worker_state_machine.done.barrier: -; AMDGPU-DISABLED-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) +; AMDGPU-DISABLED-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) ; AMDGPU-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] ; AMDGPU-DISABLED: thread.user_code.check: ; AMDGPU-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 @@ -2133,7 +2133,7 @@ define weak void @__omp_offloading_14_a34ca11_do_not_spmdize_target_l65() #0 { ; NVPTX-DISABLED-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 ; NVPTX-DISABLED-NEXT: br i1 [[THREAD_IS_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] ; NVPTX-DISABLED: worker_state_machine.begin: -; NVPTX-DISABLED-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) +; NVPTX-DISABLED-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) ; NVPTX-DISABLED-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR]]) ; NVPTX-DISABLED-NEXT: [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR]], align 8 ; NVPTX-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast i8* [[WORKER_WORK_FN]] to void (i16, i32)* @@ -2150,7 +2150,7 @@ define weak void @__omp_offloading_14_a34ca11_do_not_spmdize_target_l65() #0 { ; NVPTX-DISABLED-NEXT: call void @__kmpc_kernel_end_parallel() ; NVPTX-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] ; NVPTX-DISABLED: worker_state_machine.done.barrier: -; NVPTX-DISABLED-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) +; NVPTX-DISABLED-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) ; NVPTX-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] ; NVPTX-DISABLED: thread.user_code.check: ; NVPTX-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 diff --git a/llvm/test/Transforms/OpenMP/spmdization_guarding.ll b/llvm/test/Transforms/OpenMP/spmdization_guarding.ll index a2a6d6de13729..f2a75be2990fc 100644 --- a/llvm/test/Transforms/OpenMP/spmdization_guarding.ll +++ b/llvm/test/Transforms/OpenMP/spmdization_guarding.ll @@ -180,7 +180,7 @@ define weak void @__omp_offloading_2a_fbfa7a_sequential_loop_l6(i32* %x, i64 %N) ; CHECK-DISABLED-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1 ; CHECK-DISABLED-NEXT: br i1 [[THREAD_IS_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]] ; CHECK-DISABLED: worker_state_machine.begin: -; CHECK-DISABLED-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) +; CHECK-DISABLED-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) ; CHECK-DISABLED-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR]]) ; CHECK-DISABLED-NEXT: [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR]], align 8 ; CHECK-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast i8* [[WORKER_WORK_FN]] to void (i16, i32)* @@ -201,7 +201,7 @@ define weak void @__omp_offloading_2a_fbfa7a_sequential_loop_l6(i32* %x, i64 %N) ; CHECK-DISABLED-NEXT: call void @__kmpc_kernel_end_parallel() ; CHECK-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_DONE_BARRIER]] ; CHECK-DISABLED: worker_state_machine.done.barrier: -; CHECK-DISABLED-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) +; CHECK-DISABLED-NEXT: call void @__kmpc_barrier_simple_generic(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) ; CHECK-DISABLED-NEXT: br label [[WORKER_STATE_MACHINE_BEGIN]] ; CHECK-DISABLED: thread.user_code.check: ; CHECK-DISABLED-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 diff --git a/llvm/test/Transforms/PhaseOrdering/X86/pr52253.ll b/llvm/test/Transforms/PhaseOrdering/X86/pr52253.ll new file mode 100644 index 0000000000000..099ffd02f9566 --- /dev/null +++ b/llvm/test/Transforms/PhaseOrdering/X86/pr52253.ll @@ -0,0 +1,57 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -O3 -S < %s | FileCheck %s +; RUN: opt -instcombine -sccp -bdce -S < %s | FileCheck %s +; RUN: opt -aggressive-instcombine -instcombine -sccp -bdce -S < %s | FileCheck %s + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define i1 @foo(i32 %t4, i32 %t10) { +; CHECK-LABEL: @foo( +; CHECK-NEXT: ret i1 false +; + %t09 = shl i32 %t10, 24 + %t010 = ashr exact i32 %t09, 24 + %t011 = add nsw i32 %t010, 979 + %t11 = trunc i32 %t4 to i8 + %t12 = icmp eq i8 %t11, 0 + %t14 = zext i1 %t12 to i32 + %t15 = shl i32 %t4, %t14 + %t17 = and i32 %t15, 255 + %t18 = icmp eq i32 %t011, %t17 + ret i1 %t18 +} + +define i1 @bar(i32 %t4, i32 %t10) { +; CHECK-LABEL: @bar( +; CHECK-NEXT: ret i1 false +; + %t09 = shl i32 %t10, 24 + %t010 = ashr exact i32 %t09, 24 + %t011 = add nsw i32 %t010, 979 + %t11 = trunc i32 %t4 to i8 + %t12 = icmp eq i8 %t11, 0 + %t14 = zext i1 %t12 to i8 + %t15 = shl i8 %t11, %t14 + %t17 = zext i8 %t15 to i32 + %t18 = icmp eq i32 %t011, %t17 + ret i1 %t18 +} + +define i1 @foobar(i32 %t4, i32 %t10) { +; CHECK-LABEL: @foobar( +; CHECK-NEXT: ret i1 false +; + %t09 = shl i32 %t10, 24 + %t010 = ashr exact i32 %t09, 24 + %t011 = add nsw i32 %t010, 979 + %t11 = trunc i32 %t4 to i8 + %t12 = icmp eq i8 %t11, 0 + %t13 = zext i8 %t11 to i32 + %t14 = select i1 %t12, i32 1, i32 0 + %t15 = shl nuw nsw i32 %t13, %t14 + %t16 = trunc i32 %t15 to i8 + %t17 = zext i8 %t16 to i32 + %t18 = icmp eq i32 %t011, %t17 + ret i1 %t18 +} diff --git a/llvm/test/Transforms/PhaseOrdering/X86/vector-math.ll b/llvm/test/Transforms/PhaseOrdering/X86/vector-math.ll new file mode 100644 index 0000000000000..a658b19898896 --- /dev/null +++ b/llvm/test/Transforms/PhaseOrdering/X86/vector-math.ll @@ -0,0 +1,103 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -O2 -S -mattr=sse < %s | FileCheck %s + +; This file should represent the nearly raw (mem2reg was run to make it more direct) +; IR for code written using x86 SSE intrinsics to compute integer abs/max functions. +; +; https://llvm.org/PR34047 + +define available_externally <2 x i64> @cmpgt_i32_sel_m128i(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c, <2 x i64> %d) { + %call = call <2 x i64> @_mm_cmpgt_epi32(<2 x i64> %a, <2 x i64> %b) + %call1 = call <2 x i64> @_mm_andnot_si128(<2 x i64> %call, <2 x i64> %c) + %call2 = call <2 x i64> @_mm_and_si128(<2 x i64> %call, <2 x i64> %d) + %call3 = call <2 x i64> @_mm_or_si128(<2 x i64> %call1, <2 x i64> %call2) + ret <2 x i64> %call3 +} + +define internal <2 x i64> @_mm_set1_epi32(i32 %__i) { + %call = call <2 x i64> @_mm_set_epi32(i32 %__i, i32 %__i, i32 %__i, i32 %__i) + ret <2 x i64> %call +} + +define internal <2 x i64> @_mm_sub_epi32(<2 x i64> %__a, <2 x i64> %__b) { + %t0 = bitcast <2 x i64> %__a to <4 x i32> + %t1 = bitcast <2 x i64> %__b to <4 x i32> + %sub = sub <4 x i32> %t0, %t1 + %t2 = bitcast <4 x i32> %sub to <2 x i64> + ret <2 x i64> %t2 +} + +define internal <2 x i64> @_mm_setzero_si128() { + ret <2 x i64> zeroinitializer +} + +define internal <2 x i64> @_mm_cmpgt_epi32(<2 x i64> %__a, <2 x i64> %__b) { + %t0 = bitcast <2 x i64> %__a to <4 x i32> + %t1 = bitcast <2 x i64> %__b to <4 x i32> + %cmp = icmp sgt <4 x i32> %t0, %t1 + %sext = sext <4 x i1> %cmp to <4 x i32> + %t2 = bitcast <4 x i32> %sext to <2 x i64> + ret <2 x i64> %t2 +} + +define internal <2 x i64> @_mm_or_si128(<2 x i64> %__a, <2 x i64> %__b) { + %or = or <2 x i64> %__a, %__b + ret <2 x i64> %or +} + +define internal <2 x i64> @_mm_andnot_si128(<2 x i64> %__a, <2 x i64> %__b) { + %neg = xor <2 x i64> %__a, + %and = and <2 x i64> %neg, %__b + ret <2 x i64> %and +} + +define internal <2 x i64> @_mm_and_si128(<2 x i64> %__a, <2 x i64> %__b) { + %and = and <2 x i64> %__a, %__b + ret <2 x i64> %and +} + +define internal <2 x i64> @_mm_set_epi32(i32 %__i3, i32 %__i2, i32 %__i1, i32 %__i0) { + %vecinit = insertelement <4 x i32> undef, i32 %__i0, i32 0 + %vecinit1 = insertelement <4 x i32> %vecinit, i32 %__i1, i32 1 + %vecinit2 = insertelement <4 x i32> %vecinit1, i32 %__i2, i32 2 + %vecinit3 = insertelement <4 x i32> %vecinit2, i32 %__i3, i32 3 + %t0 = bitcast <4 x i32> %vecinit3 to <2 x i64> + ret <2 x i64> %t0 +} + +define <2 x i64> @abs_v4i32(<2 x i64> %x) { +; CHECK-LABEL: @abs_v4i32( +; CHECK-NEXT: [[T1_I:%.*]] = bitcast <2 x i64> [[X:%.*]] to <4 x i32> +; CHECK-NEXT: [[SUB_I:%.*]] = sub <4 x i32> zeroinitializer, [[T1_I]] +; CHECK-NEXT: [[T1_I_LOBIT:%.*]] = ashr <4 x i32> [[T1_I]], +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[T1_I_LOBIT]] to <2 x i64> +; CHECK-NEXT: [[T2_I_I:%.*]] = xor <2 x i64> [[TMP1]], +; CHECK-NEXT: [[AND_I_I1:%.*]] = and <4 x i32> [[T1_I_LOBIT]], [[SUB_I]] +; CHECK-NEXT: [[AND_I_I:%.*]] = bitcast <4 x i32> [[AND_I_I1]] to <2 x i64> +; CHECK-NEXT: [[AND_I1_I:%.*]] = and <2 x i64> [[T2_I_I]], [[X]] +; CHECK-NEXT: [[OR_I_I:%.*]] = or <2 x i64> [[AND_I1_I]], [[AND_I_I]] +; CHECK-NEXT: ret <2 x i64> [[OR_I_I]] +; + %call = call <2 x i64> @_mm_set1_epi32(i32 -1) + %call1 = call <2 x i64> @_mm_setzero_si128() + %call2 = call <2 x i64> @_mm_sub_epi32(<2 x i64> %call1, <2 x i64> %x) + %call3 = call <2 x i64> @cmpgt_i32_sel_m128i(<2 x i64> %x, <2 x i64> %call, <2 x i64> %call2, <2 x i64> %x) + ret <2 x i64> %call3 +} + +define <2 x i64> @max_v4i32(<2 x i64> %x, <2 x i64> %y) { +; CHECK-LABEL: @max_v4i32( +; CHECK-NEXT: [[T0_I_I:%.*]] = bitcast <2 x i64> [[X:%.*]] to <4 x i32> +; CHECK-NEXT: [[T1_I_I:%.*]] = bitcast <2 x i64> [[Y:%.*]] to <4 x i32> +; CHECK-NEXT: [[CMP_I_I:%.*]] = icmp sgt <4 x i32> [[T0_I_I]], [[T1_I_I]] +; CHECK-NEXT: [[SEXT_I_I:%.*]] = sext <4 x i1> [[CMP_I_I]] to <4 x i32> +; CHECK-NEXT: [[T2_I_I:%.*]] = bitcast <4 x i32> [[SEXT_I_I]] to <2 x i64> +; CHECK-NEXT: [[NEG_I_I:%.*]] = xor <2 x i64> [[T2_I_I]], +; CHECK-NEXT: [[AND_I_I:%.*]] = and <2 x i64> [[NEG_I_I]], [[Y]] +; CHECK-NEXT: [[AND_I1_I:%.*]] = and <2 x i64> [[T2_I_I]], [[X]] +; CHECK-NEXT: [[OR_I_I:%.*]] = or <2 x i64> [[AND_I1_I]], [[AND_I_I]] +; CHECK-NEXT: ret <2 x i64> [[OR_I_I]] +; + %call = call <2 x i64> @cmpgt_i32_sel_m128i(<2 x i64> %x, <2 x i64> %y, <2 x i64> %y, <2 x i64> %x) + ret <2 x i64> %call +} diff --git a/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions-logical.ll b/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions-logical.ll index 2fafc768bd962..dd2742c209e2d 100644 --- a/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions-logical.ll +++ b/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions-logical.ll @@ -583,7 +583,7 @@ define i32 @test_separate_anyof_v4si(<4 x i32> %t) { ; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i4 [[TMP1]], 0 ; CHECK-NEXT: br i1 [[DOTNOT]], label [[IF_END:%.*]], label [[RETURN:%.*]] ; CHECK: if.end: -; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt <4 x i32> [[T_FR]], +; CHECK-NEXT: [[TMP2:%.*]] = icmp ugt <4 x i32> [[T_FR]], ; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i1> [[TMP2]] to i4 ; CHECK-NEXT: [[DOTNOT7:%.*]] = icmp eq i4 [[TMP3]], 0 ; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x i32> [[T_FR]], <4 x i32> poison, <4 x i32> diff --git a/llvm/test/Transforms/PhaseOrdering/deletion-of-loops-that-became-side-effect-free.ll b/llvm/test/Transforms/PhaseOrdering/deletion-of-loops-that-became-side-effect-free.ll index ec8db3cceeb11..99a52acd3b2b1 100644 --- a/llvm/test/Transforms/PhaseOrdering/deletion-of-loops-that-became-side-effect-free.ll +++ b/llvm/test/Transforms/PhaseOrdering/deletion-of-loops-that-became-side-effect-free.ll @@ -11,17 +11,8 @@ define dso_local zeroext i1 @is_not_empty_variant1(%struct.node* %p) { ; ALL-LABEL: @is_not_empty_variant1( ; ALL-NEXT: entry: -; ALL-NEXT: [[TOBOOL_NOT3_I:%.*]] = icmp eq %struct.node* [[P:%.*]], null -; ALL-NEXT: br i1 [[TOBOOL_NOT3_I]], label [[COUNT_NODES_VARIANT1_EXIT:%.*]], label [[WHILE_BODY_I:%.*]] -; ALL: while.body.i: -; ALL-NEXT: [[P_ADDR_04_I:%.*]] = phi %struct.node* [ [[TMP0:%.*]], [[WHILE_BODY_I]] ], [ [[P]], [[ENTRY:%.*]] ] -; ALL-NEXT: [[NEXT_I:%.*]] = getelementptr inbounds [[STRUCT_NODE:%.*]], %struct.node* [[P_ADDR_04_I]], i64 0, i32 0 -; ALL-NEXT: [[TMP0]] = load %struct.node*, %struct.node** [[NEXT_I]], align 8 -; ALL-NEXT: [[TOBOOL_NOT_I:%.*]] = icmp eq %struct.node* [[TMP0]], null -; ALL-NEXT: br i1 [[TOBOOL_NOT_I]], label [[COUNT_NODES_VARIANT1_EXIT]], label [[WHILE_BODY_I]], !llvm.loop [[LOOP0:![0-9]+]] -; ALL: count_nodes_variant1.exit: -; ALL-NEXT: [[TMP1:%.*]] = xor i1 [[TOBOOL_NOT3_I]], true -; ALL-NEXT: ret i1 [[TMP1]] +; ALL-NEXT: [[TOBOOL_NOT3_I:%.*]] = icmp ne %struct.node* [[P:%.*]], null +; ALL-NEXT: ret i1 [[TOBOOL_NOT3_I]] ; entry: %p.addr = alloca %struct.node*, align 8 @@ -113,39 +104,13 @@ while.end: define dso_local zeroext i1 @is_not_empty_variant3(%struct.node* %p) { ; O3-LABEL: @is_not_empty_variant3( ; O3-NEXT: entry: -; O3-NEXT: [[TOBOOL_NOT4_I:%.*]] = icmp eq %struct.node* [[P:%.*]], null -; O3-NEXT: br i1 [[TOBOOL_NOT4_I]], label [[COUNT_NODES_VARIANT3_EXIT:%.*]], label [[WHILE_BODY_I:%.*]] -; O3: while.body.i: -; O3-NEXT: [[SIZE_06_I:%.*]] = phi i64 [ [[INC_I:%.*]], [[WHILE_BODY_I]] ], [ 0, [[ENTRY:%.*]] ] -; O3-NEXT: [[P_ADDR_05_I:%.*]] = phi %struct.node* [ [[TMP0:%.*]], [[WHILE_BODY_I]] ], [ [[P]], [[ENTRY]] ] -; O3-NEXT: [[CMP_I:%.*]] = icmp ne i64 [[SIZE_06_I]], -1 -; O3-NEXT: tail call void @llvm.assume(i1 [[CMP_I]]) #[[ATTR3:[0-9]+]] -; O3-NEXT: [[NEXT_I:%.*]] = getelementptr inbounds [[STRUCT_NODE:%.*]], %struct.node* [[P_ADDR_05_I]], i64 0, i32 0 -; O3-NEXT: [[TMP0]] = load %struct.node*, %struct.node** [[NEXT_I]], align 8 -; O3-NEXT: [[INC_I]] = add nuw i64 [[SIZE_06_I]], 1 -; O3-NEXT: [[TOBOOL_NOT_I:%.*]] = icmp eq %struct.node* [[TMP0]], null -; O3-NEXT: br i1 [[TOBOOL_NOT_I]], label [[COUNT_NODES_VARIANT3_EXIT]], label [[WHILE_BODY_I]], !llvm.loop [[LOOP2:![0-9]+]] -; O3: count_nodes_variant3.exit: -; O3-NEXT: [[TMP1:%.*]] = xor i1 [[TOBOOL_NOT4_I]], true -; O3-NEXT: ret i1 [[TMP1]] +; O3-NEXT: [[TOBOOL_NOT4_I:%.*]] = icmp ne %struct.node* [[P:%.*]], null +; O3-NEXT: ret i1 [[TOBOOL_NOT4_I]] ; ; O2-LABEL: @is_not_empty_variant3( ; O2-NEXT: entry: -; O2-NEXT: [[TOBOOL_NOT4_I:%.*]] = icmp eq %struct.node* [[P:%.*]], null -; O2-NEXT: br i1 [[TOBOOL_NOT4_I]], label [[COUNT_NODES_VARIANT3_EXIT:%.*]], label [[WHILE_BODY_I:%.*]] -; O2: while.body.i: -; O2-NEXT: [[SIZE_06_I:%.*]] = phi i64 [ [[INC_I:%.*]], [[WHILE_BODY_I]] ], [ 0, [[ENTRY:%.*]] ] -; O2-NEXT: [[P_ADDR_05_I:%.*]] = phi %struct.node* [ [[TMP0:%.*]], [[WHILE_BODY_I]] ], [ [[P]], [[ENTRY]] ] -; O2-NEXT: [[CMP_I:%.*]] = icmp ne i64 [[SIZE_06_I]], -1 -; O2-NEXT: tail call void @llvm.assume(i1 [[CMP_I]]) #[[ATTR3:[0-9]+]] -; O2-NEXT: [[NEXT_I:%.*]] = getelementptr inbounds [[STRUCT_NODE:%.*]], %struct.node* [[P_ADDR_05_I]], i64 0, i32 0 -; O2-NEXT: [[TMP0]] = load %struct.node*, %struct.node** [[NEXT_I]], align 8 -; O2-NEXT: [[INC_I]] = add nuw i64 [[SIZE_06_I]], 1 -; O2-NEXT: [[TOBOOL_NOT_I:%.*]] = icmp eq %struct.node* [[TMP0]], null -; O2-NEXT: br i1 [[TOBOOL_NOT_I]], label [[COUNT_NODES_VARIANT3_EXIT]], label [[WHILE_BODY_I]], !llvm.loop [[LOOP2:![0-9]+]] -; O2: count_nodes_variant3.exit: -; O2-NEXT: [[TMP1:%.*]] = xor i1 [[TOBOOL_NOT4_I]], true -; O2-NEXT: ret i1 [[TMP1]] +; O2-NEXT: [[TOBOOL_NOT4_I:%.*]] = icmp ne %struct.node* [[P:%.*]], null +; O2-NEXT: ret i1 [[TOBOOL_NOT4_I]] ; ; O1-LABEL: @is_not_empty_variant3( ; O1-NEXT: entry: @@ -160,7 +125,7 @@ define dso_local zeroext i1 @is_not_empty_variant3(%struct.node* %p) { ; O1-NEXT: [[TMP0]] = load %struct.node*, %struct.node** [[NEXT_I]], align 8 ; O1-NEXT: [[INC_I]] = add i64 [[SIZE_06_I]], 1 ; O1-NEXT: [[TOBOOL_NOT_I:%.*]] = icmp eq %struct.node* [[TMP0]], null -; O1-NEXT: br i1 [[TOBOOL_NOT_I]], label [[COUNT_NODES_VARIANT3_EXIT_LOOPEXIT:%.*]], label [[WHILE_BODY_I]], !llvm.loop [[LOOP2:![0-9]+]] +; O1-NEXT: br i1 [[TOBOOL_NOT_I]], label [[COUNT_NODES_VARIANT3_EXIT_LOOPEXIT:%.*]], label [[WHILE_BODY_I]], !llvm.loop [[LOOP0:![0-9]+]] ; O1: count_nodes_variant3.exit.loopexit: ; O1-NEXT: [[PHI_CMP:%.*]] = icmp ne i64 [[INC_I]], 0 ; O1-NEXT: br label [[COUNT_NODES_VARIANT3_EXIT]] diff --git a/llvm/test/Transforms/SCCP/ip-ranges-casts.ll b/llvm/test/Transforms/SCCP/ip-ranges-casts.ll index b39f2985b8a38..3004aae538cef 100644 --- a/llvm/test/Transforms/SCCP/ip-ranges-casts.ll +++ b/llvm/test/Transforms/SCCP/ip-ranges-casts.ll @@ -1,24 +1,26 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -ipsccp -S | FileCheck %s ; x = [100, 301) define internal i1 @f.trunc(i32 %x) { -; CHECK-LABEL: define internal i1 @f.trunc(i32 %x) { -; CHECK-NEXT: %t.1 = trunc i32 %x to i16 -; CHECK-NEXT: %c.2 = icmp sgt i16 %t.1, 299 -; CHECK-NEXT: %c.4 = icmp slt i16 %t.1, 101 -; CHECK-NEXT: %res.1 = add i1 false, %c.2 -; CHECK-NEXT: %res.2 = add i1 %res.1, false -; CHECK-NEXT: %res.3 = add i1 %res.2, %c.4 -; CHECK-NEXT: %t.2 = trunc i32 %x to i8 -; CHECK-NEXT: %c.5 = icmp sgt i8 %t.2, 44 -; CHECK-NEXT: %c.6 = icmp sgt i8 %t.2, 43 -; CHECK-NEXT: %c.7 = icmp slt i8 %t.2, 100 -; CHECK-NEXT: %c.8 = icmp slt i8 %t.2, 101 -; CHECK-NEXT: %res.4 = add i1 %res.3, %c.5 -; CHECK-NEXT: %res.5 = add i1 %res.4, %c.6 -; CHECK-NEXT: %res.6 = add i1 %res.5, %c.7 -; CHECK-NEXT: %res.7 = add i1 %res.6, %c.8 -; CHECK-NEXT: ret i1 %res.7 +; CHECK-LABEL: @f.trunc( +; CHECK-NEXT: [[T_1:%.*]] = trunc i32 [[X:%.*]] to i16 +; CHECK-NEXT: [[C_2:%.*]] = icmp sgt i16 [[T_1]], 299 +; CHECK-NEXT: [[C_4:%.*]] = icmp slt i16 [[T_1]], 101 +; CHECK-NEXT: [[RES_1:%.*]] = add i1 false, [[C_2]] +; CHECK-NEXT: [[RES_2:%.*]] = add i1 [[RES_1]], false +; CHECK-NEXT: [[RES_3:%.*]] = add i1 [[RES_2]], [[C_4]] +; CHECK-NEXT: [[T_2:%.*]] = trunc i32 [[X]] to i8 +; CHECK-NEXT: [[C_5:%.*]] = icmp sgt i8 [[T_2]], 44 +; CHECK-NEXT: [[C_6:%.*]] = icmp sgt i8 [[T_2]], 43 +; CHECK-NEXT: [[C_7:%.*]] = icmp slt i8 [[T_2]], 100 +; CHECK-NEXT: [[C_8:%.*]] = icmp slt i8 [[T_2]], 101 +; CHECK-NEXT: [[RES_4:%.*]] = add i1 [[RES_3]], [[C_5]] +; CHECK-NEXT: [[RES_5:%.*]] = add i1 [[RES_4]], [[C_6]] +; CHECK-NEXT: [[RES_6:%.*]] = add i1 [[RES_5]], [[C_7]] +; CHECK-NEXT: [[RES_7:%.*]] = add i1 [[RES_6]], [[C_8]] +; CHECK-NEXT: ret i1 [[RES_7]] +; %t.1 = trunc i32 %x to i16 %c.1 = icmp sgt i16 %t.1, 300 @@ -41,11 +43,11 @@ define internal i1 @f.trunc(i32 %x) { } define i1 @caller1() { -; CHECK-LABEL: define i1 @caller1() { -; CHECK-NEXT: %call.1 = tail call i1 @f.trunc(i32 100) -; CHECK-NEXT: %call.2 = tail call i1 @f.trunc(i32 300) -; CHECK-NEXT: %res = and i1 %call.1, %call.2 -; CHECK-NEXT: ret i1 %res +; CHECK-LABEL: @caller1( +; CHECK-NEXT: [[CALL_1:%.*]] = tail call i1 @f.trunc(i32 100) +; CHECK-NEXT: [[CALL_2:%.*]] = tail call i1 @f.trunc(i32 300) +; CHECK-NEXT: [[RES:%.*]] = and i1 [[CALL_1]], [[CALL_2]] +; CHECK-NEXT: ret i1 [[RES]] ; %call.1 = tail call i1 @f.trunc(i32 100) %call.2 = tail call i1 @f.trunc(i32 300) @@ -56,22 +58,23 @@ define i1 @caller1() { ; x = [100, 301) define internal i1 @f.zext(i32 %x, i32 %y) { -; CHECK-LABEL: define internal i1 @f.zext(i32 %x, i32 %y) { -; CHECK-NEXT: %t.1 = zext i32 %x to i64 -; CHECK-NEXT: %c.2 = icmp sgt i64 %t.1, 299 -; CHECK-NEXT: %c.4 = icmp slt i64 %t.1, 101 -; CHECK-NEXT: %res.1 = add i1 false, %c.2 -; CHECK-NEXT: %res.2 = add i1 %res.1, false -; CHECK-NEXT: %res.3 = add i1 %res.2, %c.4 -; CHECK-NEXT: %t.2 = zext i32 %y to i64 -; CHECK-NEXT: %c.5 = icmp sgt i64 %t.2, 300 -; CHECK-NEXT: %c.6 = icmp sgt i64 %t.2, 299 -; CHECK-NEXT: %c.8 = icmp slt i64 %t.2, 1 -; CHECK-NEXT: %res.4 = add i1 %res.3, %c.5 -; CHECK-NEXT: %res.5 = add i1 %res.4, %c.6 -; CHECK-NEXT: %res.6 = add i1 %res.5, false -; CHECK-NEXT: %res.7 = add i1 %res.6, %c.8 -; CHECK-NEXT: ret i1 %res.7 +; CHECK-LABEL: @f.zext( +; CHECK-NEXT: [[T_1:%.*]] = zext i32 [[X:%.*]] to i64 +; CHECK-NEXT: [[C_2:%.*]] = icmp sgt i64 [[T_1]], 299 +; CHECK-NEXT: [[C_4:%.*]] = icmp slt i64 [[T_1]], 101 +; CHECK-NEXT: [[RES_1:%.*]] = add i1 false, [[C_2]] +; CHECK-NEXT: [[RES_2:%.*]] = add i1 [[RES_1]], false +; CHECK-NEXT: [[RES_3:%.*]] = add i1 [[RES_2]], [[C_4]] +; CHECK-NEXT: [[T_2:%.*]] = zext i32 [[Y:%.*]] to i64 +; CHECK-NEXT: [[C_5:%.*]] = icmp sgt i64 [[T_2]], 300 +; CHECK-NEXT: [[C_6:%.*]] = icmp sgt i64 [[T_2]], 299 +; CHECK-NEXT: [[C_8:%.*]] = icmp slt i64 [[T_2]], 1 +; CHECK-NEXT: [[RES_4:%.*]] = add i1 [[RES_3]], [[C_5]] +; CHECK-NEXT: [[RES_5:%.*]] = add i1 [[RES_4]], [[C_6]] +; CHECK-NEXT: [[RES_6:%.*]] = add i1 [[RES_5]], false +; CHECK-NEXT: [[RES_7:%.*]] = add i1 [[RES_6]], [[C_8]] +; CHECK-NEXT: ret i1 [[RES_7]] +; %t.1 = zext i32 %x to i64 %c.1 = icmp sgt i64 %t.1, 300 @@ -94,11 +97,11 @@ define internal i1 @f.zext(i32 %x, i32 %y) { } define i1 @caller.zext() { -; CHECK-LABEL: define i1 @caller.zext() { -; CHECK-NEXT: %call.1 = tail call i1 @f.zext(i32 100, i32 -120) -; CHECK-NEXT: %call.2 = tail call i1 @f.zext(i32 300, i32 900) -; CHECK-NEXT: %res = and i1 %call.1, %call.2 -; CHECK-NEXT: ret i1 %res +; CHECK-LABEL: @caller.zext( +; CHECK-NEXT: [[CALL_1:%.*]] = tail call i1 @f.zext(i32 100, i32 -120) +; CHECK-NEXT: [[CALL_2:%.*]] = tail call i1 @f.zext(i32 300, i32 900) +; CHECK-NEXT: [[RES:%.*]] = and i1 [[CALL_1]], [[CALL_2]] +; CHECK-NEXT: ret i1 [[RES]] ; %call.1 = tail call i1 @f.zext(i32 100, i32 -120) %call.2 = tail call i1 @f.zext(i32 300, i32 900) @@ -108,21 +111,21 @@ define i1 @caller.zext() { ; x = [100, 301) define internal i1 @f.sext(i32 %x, i32 %y) { -; CHECK-LABEL: define internal i1 @f.sext(i32 %x, i32 %y) { -; CHECK-NEXT: [[T_1:%.*]] = zext i32 %x to i64 -; CHECK-NEXT: %c.2 = icmp sgt i64 [[T_1]], 299 -; CHECK-NEXT: %c.4 = icmp slt i64 [[T_1]], 101 -; CHECK-NEXT: %res.1 = add i1 false, %c.2 -; CHECK-NEXT: %res.2 = add i1 %res.1, false -; CHECK-NEXT: %res.3 = add i1 %res.2, %c.4 -; CHECK-NEXT: %t.2 = sext i32 %y to i64 -; CHECK-NEXT: %c.6 = icmp sgt i64 %t.2, 899 -; CHECK-NEXT: %c.8 = icmp slt i64 %t.2, -119 -; CHECK-NEXT: %res.4 = add i1 %res.3, false -; CHECK-NEXT: %res.5 = add i1 %res.4, %c.6 -; CHECK-NEXT: %res.6 = add i1 %res.5, false -; CHECK-NEXT: %res.7 = add i1 %res.6, %c.8 -; CHECK-NEXT: ret i1 %res.7 +; CHECK-LABEL: @f.sext( +; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[X:%.*]] to i64 +; CHECK-NEXT: [[C_2:%.*]] = icmp sgt i64 [[TMP1]], 299 +; CHECK-NEXT: [[C_4:%.*]] = icmp slt i64 [[TMP1]], 101 +; CHECK-NEXT: [[RES_1:%.*]] = add i1 false, [[C_2]] +; CHECK-NEXT: [[RES_2:%.*]] = add i1 [[RES_1]], false +; CHECK-NEXT: [[RES_3:%.*]] = add i1 [[RES_2]], [[C_4]] +; CHECK-NEXT: [[T_2:%.*]] = sext i32 [[Y:%.*]] to i64 +; CHECK-NEXT: [[C_6:%.*]] = icmp sgt i64 [[T_2]], 899 +; CHECK-NEXT: [[C_8:%.*]] = icmp slt i64 [[T_2]], -119 +; CHECK-NEXT: [[RES_4:%.*]] = add i1 [[RES_3]], false +; CHECK-NEXT: [[RES_5:%.*]] = add i1 [[RES_4]], [[C_6]] +; CHECK-NEXT: [[RES_6:%.*]] = add i1 [[RES_5]], false +; CHECK-NEXT: [[RES_7:%.*]] = add i1 [[RES_6]], [[C_8]] +; CHECK-NEXT: ret i1 [[RES_7]] ; %t.1 = sext i32 %x to i64 %c.1 = icmp sgt i64 %t.1, 300 @@ -145,11 +148,11 @@ define internal i1 @f.sext(i32 %x, i32 %y) { } define i1 @caller.sext() { -; CHECK-LABEL: define i1 @caller.sext() { -; CHECK-NEXT: %call.1 = tail call i1 @f.sext(i32 100, i32 -120) -; CHECK-NEXT: %call.2 = tail call i1 @f.sext(i32 300, i32 900) -; CHECK-NEXT: %res = and i1 %call.1, %call.2 -; CHECK-NEXT: ret i1 %res +; CHECK-LABEL: @caller.sext( +; CHECK-NEXT: [[CALL_1:%.*]] = tail call i1 @f.sext(i32 100, i32 -120) +; CHECK-NEXT: [[CALL_2:%.*]] = tail call i1 @f.sext(i32 300, i32 900) +; CHECK-NEXT: [[RES:%.*]] = and i1 [[CALL_1]], [[CALL_2]] +; CHECK-NEXT: ret i1 [[RES]] ; %call.1 = tail call i1 @f.sext(i32 100, i32 -120) %call.2 = tail call i1 @f.sext(i32 300, i32 900) @@ -159,18 +162,18 @@ define i1 @caller.sext() { ; There's nothing we can do besides going to the full range or overdefined. define internal i1 @f.fptosi(i32 %x) { -; CHECK-LABEL: define internal i1 @f.fptosi(i32 %x) { -; CHECK-NEXT: %to.double = sitofp i32 %x to double -; CHECK-NEXT: %add = fadd double 0.000000e+00, %to.double -; CHECK-NEXT: %to.i32 = fptosi double %add to i32 -; CHECK-NEXT: %c.1 = icmp sgt i32 %to.i32, 300 -; CHECK-NEXT: %c.2 = icmp sgt i32 %to.i32, 299 -; CHECK-NEXT: %c.3 = icmp slt i32 %to.i32, 100 -; CHECK-NEXT: %c.4 = icmp slt i32 %to.i32, 101 -; CHECK-NEXT: %res.1 = add i1 %c.1, %c.2 -; CHECK-NEXT: %res.2 = add i1 %res.1, %c.3 -; CHECK-NEXT: %res.3 = add i1 %res.2, %c.4 -; CHECK-NEXT: ret i1 %res.3 +; CHECK-LABEL: @f.fptosi( +; CHECK-NEXT: [[TO_DOUBLE:%.*]] = sitofp i32 [[X:%.*]] to double +; CHECK-NEXT: [[ADD:%.*]] = fadd double 0.000000e+00, [[TO_DOUBLE]] +; CHECK-NEXT: [[TO_I32:%.*]] = fptosi double [[ADD]] to i32 +; CHECK-NEXT: [[C_1:%.*]] = icmp sgt i32 [[TO_I32]], 300 +; CHECK-NEXT: [[C_2:%.*]] = icmp sgt i32 [[TO_I32]], 299 +; CHECK-NEXT: [[C_3:%.*]] = icmp slt i32 [[TO_I32]], 100 +; CHECK-NEXT: [[C_4:%.*]] = icmp slt i32 [[TO_I32]], 101 +; CHECK-NEXT: [[RES_1:%.*]] = add i1 [[C_1]], [[C_2]] +; CHECK-NEXT: [[RES_2:%.*]] = add i1 [[RES_1]], [[C_3]] +; CHECK-NEXT: [[RES_3:%.*]] = add i1 [[RES_2]], [[C_4]] +; CHECK-NEXT: ret i1 [[RES_3]] ; %to.double = sitofp i32 %x to double %add = fadd double 0.000000e+00, %to.double @@ -186,11 +189,11 @@ define internal i1 @f.fptosi(i32 %x) { } define i1 @caller.fptosi() { -; CHECK-LABEL: define i1 @caller.fptosi() { -; CHECK-NEXT: %call.1 = tail call i1 @f.fptosi(i32 100) -; CHECK-NEXT: %call.2 = tail call i1 @f.fptosi(i32 300) -; CHECK-NEXT: %res = and i1 %call.1, %call.2 -; CHECK-NEXT: ret i1 %res +; CHECK-LABEL: @caller.fptosi( +; CHECK-NEXT: [[CALL_1:%.*]] = tail call i1 @f.fptosi(i32 100) +; CHECK-NEXT: [[CALL_2:%.*]] = tail call i1 @f.fptosi(i32 300) +; CHECK-NEXT: [[RES:%.*]] = and i1 [[CALL_1]], [[CALL_2]] +; CHECK-NEXT: ret i1 [[RES]] ; %call.1 = tail call i1 @f.fptosi(i32 100) %call.2 = tail call i1 @f.fptosi(i32 300) @@ -200,18 +203,18 @@ define i1 @caller.fptosi() { ; There's nothing we can do besides going to the full range or overdefined. define internal i1 @f.fpext(i16 %x) { -; CHECK-LABEL: define internal i1 @f.fpext(i16 %x) { -; CHECK-NEXT: %to.float = sitofp i16 %x to float -; CHECK-NEXT: %to.double = fpext float %to.float to double -; CHECK-NEXT: %to.i64 = fptoui float %to.float to i64 -; CHECK-NEXT: %c.1 = icmp sgt i64 %to.i64, 300 -; CHECK-NEXT: %c.2 = icmp sgt i64 %to.i64, 299 -; CHECK-NEXT: %c.3 = icmp slt i64 %to.i64, 100 -; CHECK-NEXT: %c.4 = icmp slt i64 %to.i64, 101 -; CHECK-NEXT: %res.1 = add i1 %c.1, %c.2 -; CHECK-NEXT: %res.2 = add i1 %res.1, %c.3 -; CHECK-NEXT: %res.3 = add i1 %res.2, %c.4 -; CHECK-NEXT: ret i1 %res.3 +; CHECK-LABEL: @f.fpext( +; CHECK-NEXT: [[TO_FLOAT:%.*]] = sitofp i16 [[X:%.*]] to float +; CHECK-NEXT: [[TO_DOUBLE:%.*]] = fpext float [[TO_FLOAT]] to double +; CHECK-NEXT: [[TO_I64:%.*]] = fptoui float [[TO_FLOAT]] to i64 +; CHECK-NEXT: [[C_1:%.*]] = icmp sgt i64 [[TO_I64]], 300 +; CHECK-NEXT: [[C_2:%.*]] = icmp sgt i64 [[TO_I64]], 299 +; CHECK-NEXT: [[C_3:%.*]] = icmp slt i64 [[TO_I64]], 100 +; CHECK-NEXT: [[C_4:%.*]] = icmp slt i64 [[TO_I64]], 101 +; CHECK-NEXT: [[RES_1:%.*]] = add i1 [[C_1]], [[C_2]] +; CHECK-NEXT: [[RES_2:%.*]] = add i1 [[RES_1]], [[C_3]] +; CHECK-NEXT: [[RES_3:%.*]] = add i1 [[RES_2]], [[C_4]] +; CHECK-NEXT: ret i1 [[RES_3]] ; %to.float = sitofp i16 %x to float %to.double = fpext float %to.float to double @@ -228,11 +231,11 @@ define internal i1 @f.fpext(i16 %x) { ; There's nothing we can do besides going to the full range or overdefined. define i1 @caller.fpext() { -; CHECK-LABEL: define i1 @caller.fpext() { -; CHECK-NEXT: %call.1 = tail call i1 @f.fpext(i16 100) -; CHECK-NEXT: %call.2 = tail call i1 @f.fpext(i16 300) -; CHECK-NEXT: %res = and i1 %call.1, %call.2 -; CHECK-NEXT: ret i1 %res +; CHECK-LABEL: @caller.fpext( +; CHECK-NEXT: [[CALL_1:%.*]] = tail call i1 @f.fpext(i16 100) +; CHECK-NEXT: [[CALL_2:%.*]] = tail call i1 @f.fpext(i16 300) +; CHECK-NEXT: [[RES:%.*]] = and i1 [[CALL_1]], [[CALL_2]] +; CHECK-NEXT: ret i1 [[RES]] ; %call.1 = tail call i1 @f.fpext(i16 100) %call.2 = tail call i1 @f.fpext(i16 300) @@ -242,17 +245,17 @@ define i1 @caller.fpext() { ; There's nothing we can do besides going to the full range or overdefined. define internal i1 @f.inttoptr.ptrtoint(i64 %x) { -; CHECK-LABEL: define internal i1 @f.inttoptr.ptrtoint(i64 %x) { -; CHECK-NEXT: %to.ptr = inttoptr i64 %x to i8* -; CHECK-NEXT: %to.i64 = ptrtoint i8* %to.ptr to i64 -; CHECK-NEXT: %c.1 = icmp sgt i64 %to.i64, 300 -; CHECK-NEXT: %c.2 = icmp sgt i64 %to.i64, 299 -; CHECK-NEXT: %c.3 = icmp slt i64 %to.i64, 100 -; CHECK-NEXT: %c.4 = icmp slt i64 %to.i64, 101 -; CHECK-NEXT: %res.1 = add i1 %c.1, %c.2 -; CHECK-NEXT: %res.2 = add i1 %res.1, %c.3 -; CHECK-NEXT: %res.3 = add i1 %res.2, %c.4 -; CHECK-NEXT: ret i1 %res.3 +; CHECK-LABEL: @f.inttoptr.ptrtoint( +; CHECK-NEXT: [[TO_PTR:%.*]] = inttoptr i64 [[X:%.*]] to i8* +; CHECK-NEXT: [[TO_I64:%.*]] = ptrtoint i8* [[TO_PTR]] to i64 +; CHECK-NEXT: [[C_1:%.*]] = icmp sgt i64 [[TO_I64]], 300 +; CHECK-NEXT: [[C_2:%.*]] = icmp sgt i64 [[TO_I64]], 299 +; CHECK-NEXT: [[C_3:%.*]] = icmp slt i64 [[TO_I64]], 100 +; CHECK-NEXT: [[C_4:%.*]] = icmp slt i64 [[TO_I64]], 101 +; CHECK-NEXT: [[RES_1:%.*]] = add i1 [[C_1]], [[C_2]] +; CHECK-NEXT: [[RES_2:%.*]] = add i1 [[RES_1]], [[C_3]] +; CHECK-NEXT: [[RES_3:%.*]] = add i1 [[RES_2]], [[C_4]] +; CHECK-NEXT: ret i1 [[RES_3]] ; %to.ptr = inttoptr i64 %x to i8* %to.i64 = ptrtoint i8* %to.ptr to i64 @@ -267,11 +270,11 @@ define internal i1 @f.inttoptr.ptrtoint(i64 %x) { } define i1 @caller.inttoptr.ptrtoint() { -; CHECK-LABEL: define i1 @caller.inttoptr.ptrtoint() { -; CHECK-NEXT: %call.1 = tail call i1 @f.inttoptr.ptrtoint(i64 100) -; CHECK-NEXT: %call.2 = tail call i1 @f.inttoptr.ptrtoint(i64 300) -; CHECK-NEXT: %res = and i1 %call.1, %call.2 -; CHECK-NEXT: ret i1 %res +; CHECK-LABEL: @caller.inttoptr.ptrtoint( +; CHECK-NEXT: [[CALL_1:%.*]] = tail call i1 @f.inttoptr.ptrtoint(i64 100) +; CHECK-NEXT: [[CALL_2:%.*]] = tail call i1 @f.inttoptr.ptrtoint(i64 300) +; CHECK-NEXT: [[RES:%.*]] = and i1 [[CALL_1]], [[CALL_2]] +; CHECK-NEXT: ret i1 [[RES]] ; %call.1 = tail call i1 @f.inttoptr.ptrtoint(i64 100) %call.2 = tail call i1 @f.inttoptr.ptrtoint(i64 300) @@ -281,12 +284,12 @@ define i1 @caller.inttoptr.ptrtoint() { ; Make sure we do not create constant ranges for int to fp casts. define i1 @int_range_to_double_cast(i32 %a) { -; CHECK-LABEL: define i1 @int_range_to_double_cast(i32 %a) -; CHECK-NEXT: %r = and i32 %a, 255 -; CHECK-NEXT: %tmp4 = sitofp i32 %r to double -; CHECK-NEXT: %tmp10 = fadd double 0.000000e+00, %tmp4 -; CHECK-NEXT: %tmp11 = fcmp olt double %tmp4, %tmp10 -; CHECK-NEXT: ret i1 %tmp11 +; CHECK-LABEL: @int_range_to_double_cast( +; CHECK-NEXT: [[R:%.*]] = and i32 [[A:%.*]], 255 +; CHECK-NEXT: [[TMP4:%.*]] = sitofp i32 [[R]] to double +; CHECK-NEXT: [[TMP10:%.*]] = fadd double 0.000000e+00, [[TMP4]] +; CHECK-NEXT: [[TMP11:%.*]] = fcmp olt double [[TMP4]], [[TMP10]] +; CHECK-NEXT: ret i1 [[TMP11]] ; %r = and i32 %a, 255 %tmp4 = sitofp i32 %r to double @@ -297,13 +300,14 @@ define i1 @int_range_to_double_cast(i32 %a) { ; Make sure we do not use ranges to propagate info from vectors. define i16 @vector_binop_and_cast() { -; CHECK-LABEL: define i16 @vector_binop_and_cast( +; CHECK-LABEL: @vector_binop_and_cast( ; CHECK-NEXT: entry: -; CHECK-NEXT: %vecinit7 = insertelement <8 x i16> , i16 undef, i32 0 -; CHECK-NEXT: %rem = srem <8 x i16> , %vecinit7 -; CHECK-NEXT: %0 = bitcast <8 x i16> %rem to i128 -; CHECK-NEXT: %1 = trunc i128 %0 to i16 -; CHECK-NEXT: ret i16 %1 +; CHECK-NEXT: [[VECINIT7:%.*]] = insertelement <8 x i16> , i16 undef, i32 0 +; CHECK-NEXT: [[REM:%.*]] = srem <8 x i16> , [[VECINIT7]] +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[REM]] to i128 +; CHECK-NEXT: [[TMP1:%.*]] = trunc i128 [[TMP0]] to i16 +; CHECK-NEXT: ret i16 [[TMP1]] +; entry: %vecinit7 = insertelement <8 x i16> , i16 undef, i32 0 %rem = srem <8 x i16> , %vecinit7 @@ -311,3 +315,25 @@ entry: %1 = trunc i128 %0 to i16 ret i16 %1 } + +define internal i64 @f.sext_to_zext(i32 %t) { +; CHECK-LABEL: @f.sext_to_zext( +; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[T:%.*]] to i64 +; CHECK-NEXT: ret i64 [[TMP1]] +; + %a = sext i32 %t to i64 + ret i64 %a +} + +define i64 @caller.sext_to_zext(i32 %i) { +; CHECK-LABEL: @caller.sext_to_zext( +; CHECK-NEXT: [[CMP:%.*]] = icmp sle i32 [[I:%.*]], 9 +; CHECK-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32 +; CHECK-NEXT: [[T:%.*]] = call i64 @f.sext_to_zext(i32 [[CONV]]) +; CHECK-NEXT: ret i64 [[T]] +; + %cmp = icmp sle i32 %i, 9 + %conv = zext i1 %cmp to i32 + %t = call i64 @f.sext_to_zext(i32 %conv) + ret i64 %t +} diff --git a/llvm/test/Transforms/SCCP/overdefined-ext.ll b/llvm/test/Transforms/SCCP/overdefined-ext.ll new file mode 100644 index 0000000000000..eafd2090fd2e4 --- /dev/null +++ b/llvm/test/Transforms/SCCP/overdefined-ext.ll @@ -0,0 +1,101 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -sccp -S | FileCheck %s + +define i32 @zext_lshr(i1 %t0) { +; CHECK-LABEL: @zext_lshr( +; CHECK-NEXT: [[T1:%.*]] = zext i1 [[T0:%.*]] to i32 +; CHECK-NEXT: ret i32 0 +; + %t1 = zext i1 %t0 to i32 + %t2 = lshr i32 %t1, 1 + ret i32 %t2 +} + +define i1 @zext_icmp(i1 %t0) { +; CHECK-LABEL: @zext_icmp( +; CHECK-NEXT: [[T1:%.*]] = zext i1 [[T0:%.*]] to i32 +; CHECK-NEXT: ret i1 false +; + %t1 = zext i1 %t0 to i32 + %t2 = icmp eq i32 %t1, 2 + ret i1 %t2 +} + +; negative test. SCCP operates poorly with vector ranges + +define <2 x i1> @zext_vector(<2 x i1> %t0) { +; CHECK-LABEL: @zext_vector( +; CHECK-NEXT: [[T1:%.*]] = zext <2 x i1> [[T0:%.*]] to <2 x i32> +; CHECK-NEXT: [[T2:%.*]] = icmp eq <2 x i32> [[T1]], +; CHECK-NEXT: ret <2 x i1> [[T2]] +; + %t1 = zext <2 x i1> %t0 to <2 x i32> + %t2 = icmp eq <2 x i32> %t1, + ret <2 x i1> %t2 +} + +; negative test. SCCP operates poorly with vector ranges + +define <2 x i1> @zext_vector2(<2 x i1> %t0) { +; CHECK-LABEL: @zext_vector2( +; CHECK-NEXT: [[T1:%.*]] = zext <2 x i1> [[T0:%.*]] to <2 x i32> +; CHECK-NEXT: [[T2:%.*]] = add <2 x i32> [[T1]], +; CHECK-NEXT: [[T3:%.*]] = icmp eq <2 x i32> [[T1]], [[T2]] +; CHECK-NEXT: ret <2 x i1> [[T3]] +; + %t1 = zext <2 x i1> %t0 to <2 x i32> + %t2 = add <2 x i32> %t1, + %t3 = icmp eq <2 x i32> %t1, %t2 + ret <2 x i1> %t3 +} + +; negative test: %t2 can be replaced by %t1, but SCCP operates by ranges only + +define i32 @sext_ashr(i1 %t0) { +; CHECK-LABEL: @sext_ashr( +; CHECK-NEXT: [[T1:%.*]] = sext i1 [[T0:%.*]] to i32 +; CHECK-NEXT: [[T2:%.*]] = ashr i32 [[T1]], 1 +; CHECK-NEXT: ret i32 [[T2]] +; + %t1 = sext i1 %t0 to i32 + %t2 = ashr i32 %t1, 1 + ret i32 %t2 +} + +define i1 @sext_icmp(i1 %t0) { +; CHECK-LABEL: @sext_icmp( +; CHECK-NEXT: [[T1:%.*]] = sext i1 [[T0:%.*]] to i32 +; CHECK-NEXT: ret i1 false +; + %t1 = sext i1 %t0 to i32 + %t2 = icmp eq i32 %t1, 2 + ret i1 %t2 +} + +; negative test. SCCP operates poorly with vector ranges + +define <2 x i1> @sext_vector(<2 x i1> %t0) { +; CHECK-LABEL: @sext_vector( +; CHECK-NEXT: [[T1:%.*]] = sext <2 x i1> [[T0:%.*]] to <2 x i32> +; CHECK-NEXT: [[T2:%.*]] = icmp eq <2 x i32> [[T1]], +; CHECK-NEXT: ret <2 x i1> [[T2]] +; + %t1 = sext <2 x i1> %t0 to <2 x i32> + %t2 = icmp eq <2 x i32> %t1, + ret <2 x i1> %t2 +} + +; negative test. SCCP operates poorly with vector ranges + +define <2 x i1> @sext_vector2(<2 x i1> %t0) { +; CHECK-LABEL: @sext_vector2( +; CHECK-NEXT: [[T1:%.*]] = sext <2 x i1> [[T0:%.*]] to <2 x i32> +; CHECK-NEXT: [[T2:%.*]] = add <2 x i32> [[T1]], +; CHECK-NEXT: [[T3:%.*]] = icmp eq <2 x i32> [[T1]], [[T2]] +; CHECK-NEXT: ret <2 x i1> [[T3]] +; + %t1 = sext <2 x i1> %t0 to <2 x i32> + %t2 = add <2 x i32> %t1, + %t3 = icmp eq <2 x i32> %t1, %t2 + ret <2 x i1> %t3 +} diff --git a/llvm/test/Transforms/SCCP/pr52253.ll b/llvm/test/Transforms/SCCP/pr52253.ll new file mode 100644 index 0000000000000..8cef4f8bcd2ef --- /dev/null +++ b/llvm/test/Transforms/SCCP/pr52253.ll @@ -0,0 +1,80 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -sccp -S | FileCheck %s + +define i1 @foo(i32 %t4, i32 %t10) { +; CHECK-LABEL: @foo( +; CHECK-NEXT: [[T09:%.*]] = shl i32 [[T10:%.*]], 24 +; CHECK-NEXT: [[T010:%.*]] = ashr exact i32 [[T09]], 24 +; CHECK-NEXT: [[T011:%.*]] = add nsw i32 [[T010]], 979 +; CHECK-NEXT: [[T11:%.*]] = trunc i32 [[T4:%.*]] to i8 +; CHECK-NEXT: [[T12:%.*]] = icmp eq i8 [[T11]], 0 +; CHECK-NEXT: [[T14:%.*]] = zext i1 [[T12]] to i32 +; CHECK-NEXT: [[T15:%.*]] = shl i32 [[T4]], [[T14]] +; CHECK-NEXT: [[T17:%.*]] = and i32 [[T15]], 255 +; CHECK-NEXT: ret i1 false +; + %t09 = shl i32 %t10, 24 + %t010 = ashr exact i32 %t09, 24 + %t011 = add nsw i32 %t010, 979 + %t11 = trunc i32 %t4 to i8 + %t12 = icmp eq i8 %t11, 0 + %t14 = zext i1 %t12 to i32 + %t15 = shl i32 %t4, %t14 + %t17 = and i32 %t15, 255 + %t18 = icmp eq i32 %t011, %t17 + ret i1 %t18 +} + +define i1 @bar(i32 %t4, i32 %t10) { +; CHECK-LABEL: @bar( +; CHECK-NEXT: [[T09:%.*]] = shl i32 [[T10:%.*]], 24 +; CHECK-NEXT: [[T010:%.*]] = ashr exact i32 [[T09]], 24 +; CHECK-NEXT: [[T011:%.*]] = add nsw i32 [[T010]], 979 +; CHECK-NEXT: [[T11:%.*]] = trunc i32 [[T4:%.*]] to i8 +; CHECK-NEXT: [[T12:%.*]] = icmp eq i8 [[T11]], 0 +; CHECK-NEXT: [[T14:%.*]] = zext i1 [[T12]] to i8 +; CHECK-NEXT: [[T15:%.*]] = shl i8 [[T11]], [[T14]] +; CHECK-NEXT: [[T17:%.*]] = zext i8 [[T15]] to i32 +; CHECK-NEXT: ret i1 false +; + %t09 = shl i32 %t10, 24 + %t010 = ashr exact i32 %t09, 24 + %t011 = add nsw i32 %t010, 979 + %t11 = trunc i32 %t4 to i8 + %t12 = icmp eq i8 %t11, 0 + %t14 = zext i1 %t12 to i8 + %t15 = shl i8 %t11, %t14 + %t17 = zext i8 %t15 to i32 + %t18 = icmp eq i32 %t011, %t17 + ret i1 %t18 +} + +define i1 @foobar(i32 %t4, i32 %t10) { +; CHECK-LABEL: @foobar( +; CHECK-NEXT: [[T09:%.*]] = shl i32 [[T10:%.*]], 24 +; CHECK-NEXT: [[T010:%.*]] = ashr exact i32 [[T09]], 24 +; CHECK-NEXT: [[T011:%.*]] = add nsw i32 [[T010]], 979 +; CHECK-NEXT: [[T11:%.*]] = trunc i32 [[T4:%.*]] to i8 +; CHECK-NEXT: [[T12:%.*]] = icmp eq i8 [[T11]], 0 +; CHECK-NEXT: [[T13:%.*]] = zext i8 [[T11]] to i32 +; CHECK-NEXT: [[T14:%.*]] = select i1 [[T12]], i32 1, i32 0 +; CHECK-NEXT: [[T15:%.*]] = shl nuw nsw i32 [[T13]], [[T14]] +; CHECK-NEXT: [[T16:%.*]] = trunc i32 [[T15]] to i8 +; CHECK-NEXT: [[T17:%.*]] = zext i8 [[T16]] to i32 +; CHECK-NEXT: ret i1 false +; + %t09 = shl i32 %t10, 24 + %t010 = ashr exact i32 %t09, 24 + %t011 = add nsw i32 %t010, 979 + + %t11 = trunc i32 %t4 to i8 + %t12 = icmp eq i8 %t11, 0 + %t13 = zext i8 %t11 to i32 + %t14 = select i1 %t12, i32 1, i32 0 + %t15 = shl nuw nsw i32 %t13, %t14 + %t16 = trunc i32 %t15 to i8 + %t17 = zext i8 %t16 to i32 + + %t18 = icmp eq i32 %t011, %t17 + ret i1 %t18 +} diff --git a/llvm/test/Transforms/SLPVectorizer/X86/pr52275.ll b/llvm/test/Transforms/SLPVectorizer/X86/pr52275.ll new file mode 100644 index 0000000000000..6794553ffd6d0 --- /dev/null +++ b/llvm/test/Transforms/SLPVectorizer/X86/pr52275.ll @@ -0,0 +1,21 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -slp-vectorizer -S -mtriple=x86_64-- | FileCheck %s + +define <4 x i8> @pr52275(<4 x i8> %v, i8* %x) { +; CHECK-LABEL: @pr52275( +; CHECK-NEXT: [[G1:%.*]] = getelementptr inbounds i8, i8* [[X:%.*]], i64 1 +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[X]] to <2 x i8>* +; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i8>, <2 x i8>* [[TMP1]], align 4 +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x i8> [[TMP2]], <2 x i8> poison, <4 x i32> +; CHECK-NEXT: [[V11:%.*]] = shufflevector <4 x i8> [[V:%.*]], <4 x i8> [[TMP3]], <4 x i32> +; CHECK-NEXT: [[V2:%.*]] = add <4 x i8> [[V11]], [[V11]] +; CHECK-NEXT: ret <4 x i8> [[V2]] +; + %x0 = load i8, i8* %x, align 4 + %g1 = getelementptr inbounds i8, i8* %x, i64 1 + %x1 = load i8, i8* %g1, align 4 + %v0 = insertelement <4 x i8> %v, i8 %x0, i64 0 + %v1 = insertelement <4 x i8> %v0, i8 %x1, i64 1 + %v2 = add <4 x i8> %v0, %v1 + ret <4 x i8> %v2 +} diff --git a/llvm/test/Transforms/SampleProfile/profile-format-compress.ll b/llvm/test/Transforms/SampleProfile/profile-format-compress.ll index 8303db8f8888e..be4e27c9641b2 100644 --- a/llvm/test/Transforms/SampleProfile/profile-format-compress.ll +++ b/llvm/test/Transforms/SampleProfile/profile-format-compress.ll @@ -101,7 +101,7 @@ attributes #0 = { "use-sample-profile" } !0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5 ", isOptimized: false, emissionKind: NoDebug, file: !1, enums: !2, retainedTypes: !2, globals: !2, imports: !2) !1 = !DIFile(filename: "calls.cc", directory: ".") !2 = !{} -!4 = distinct !DISubprogram(name: "sum", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, unit: !0, scopeLine: 3, file: !1, scope: !5, type: !6, retainedNodes: !2) +!4 = distinct !DISubprogram(name: "sum", linkageName: "_Z3sumii", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, unit: !0, scopeLine: 3, file: !1, scope: !5, type: !6, retainedNodes: !2) !5 = !DIFile(filename: "calls.cc", directory: ".") !6 = !DISubroutineType(types: !2) !7 = distinct !DISubprogram(name: "main", line: 7, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, unit: !0, scopeLine: 7, file: !1, scope: !5, type: !6, retainedNodes: !2) diff --git a/llvm/test/Transforms/SampleProfile/profile-format.ll b/llvm/test/Transforms/SampleProfile/profile-format.ll index 72765c3054907..f0df061e12145 100644 --- a/llvm/test/Transforms/SampleProfile/profile-format.ll +++ b/llvm/test/Transforms/SampleProfile/profile-format.ll @@ -110,7 +110,7 @@ attributes #0 = { "use-sample-profile" } !0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5 ", isOptimized: false, emissionKind: NoDebug, file: !1, enums: !2, retainedTypes: !2, globals: !2, imports: !2) !1 = !DIFile(filename: "calls.cc", directory: ".") !2 = !{} -!4 = distinct !DISubprogram(name: "sum", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, unit: !0, scopeLine: 3, file: !1, scope: !5, type: !6, retainedNodes: !2) +!4 = distinct !DISubprogram(name: "sum", linkageName: "_Z3sumii", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, unit: !0, scopeLine: 3, file: !1, scope: !5, type: !6, retainedNodes: !2) !5 = !DIFile(filename: "calls.cc", directory: ".") !6 = !DISubroutineType(types: !2) !7 = distinct !DISubprogram(name: "main", line: 7, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, unit: !0, scopeLine: 7, file: !1, scope: !5, type: !6, retainedNodes: !2) diff --git a/llvm/test/Transforms/SampleProfile/pseudo-probe.mir b/llvm/test/Transforms/SampleProfile/pseudo-probe.mir index 8175a4769c850..d85c1826870b9 100644 --- a/llvm/test/Transforms/SampleProfile/pseudo-probe.mir +++ b/llvm/test/Transforms/SampleProfile/pseudo-probe.mir @@ -19,11 +19,11 @@ body: | bb.2: PSEUDO_PROBE 6699318081062747564, 3, 0, 0 PSEUDO_PROBE 6699318081062747564, 4, 0, 0 - RETQ + RET64 bb.1: PSEUDO_PROBE 6699318081062747564, 2, 0, 0 PSEUDO_PROBE 6699318081062747564, 4, 0, 0 - RETQ + RET64 ... diff --git a/llvm/test/Transforms/Scalarizer/dbg-invariant.ll b/llvm/test/Transforms/Scalarizer/dbg-invariant.ll new file mode 100644 index 0000000000000..ff334ebae3957 --- /dev/null +++ b/llvm/test/Transforms/Scalarizer/dbg-invariant.ll @@ -0,0 +1,64 @@ +; RUN: opt -strip-debug -passes=scalarizer -S < %s | FileCheck %s +; RUN: opt -passes=scalarizer -S < %s | FileCheck %s + +; This input caused the scalarizer to violate a debug info +; invariance by using the wrong insertion point. + +; CHECK: %0 = load <8 x i16> + +; CHECK: %.i0 = extractelement <8 x i16> %0, i32 0 +; CHECK-NEXT: %.i01 = add i16 %.i0, 28690 +; CHECK: %.i1 = extractelement <8 x i16> %0, i32 1 +; CHECK-NEXT: %.i12 = add i16 %.i1, 28690 +; CHECK: %.i2 = extractelement <8 x i16> %0, i32 2 +; CHECK-NEXT: %.i23 = add i16 %.i2, 28690 +; CHECK: %.i3 = extractelement <8 x i16> %0, i32 3 +; CHECK-NEXT: %.i34 = add i16 %.i3, 28690 +; CHECK: %.i4 = extractelement <8 x i16> %0, i32 4 +; CHECK-NEXT: %.i45 = add i16 %.i4, 28690 +; CHECK: %.i5 = extractelement <8 x i16> %0, i32 5 +; CHECK-NEXT: %.i56 = add i16 %.i5, 28690 +; CHECK: %.i6 = extractelement <8 x i16> %0, i32 6 +; CHECK-NEXT: %.i67 = add i16 %.i6, 28690 +; CHECK: %.i7 = extractelement <8 x i16> %0, i32 7 +; CHECK-NEXT: = add i16 %.i7, 28690 + +@d = external global [8 x i16], align 1 +@e = external global i16, align 1 + +; Function Attrs: nofree norecurse nounwind +define dso_local void @foo() local_unnamed_addr #0 !dbg !7 { +entry: + %0 = load <8 x i16>, <8 x i16>* bitcast ([8 x i16]* @d to <8 x i16>*), align 1 + call void @llvm.dbg.value(metadata i16 0, metadata !11, metadata !DIExpression()), !dbg !13 + %1 = add <8 x i16> %0, , !dbg !13 + store <8 x i16> %1, <8 x i16>* bitcast ([8 x i16]* @d to <8 x i16>*), align 1, !dbg !13 + %2 = extractelement <8 x i16> %1, i32 7, !dbg !13 + store i16 %2, i16* @e, align 1, !dbg !13 + ret void +} + +; Function Attrs: nounwind readnone speculatable willreturn +declare void @llvm.dbg.value(metadata, metadata, metadata) #1 + +attributes #0 = { nofree norecurse nounwind } +attributes #1 = { nounwind readnone speculatable willreturn } + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!3, !4, !5} +!llvm.ident = !{!6} + +!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 11.0.0 ", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, splitDebugInlining: false, nameTableKind: None) +!1 = !DIFile(filename: "foo.c", directory: "/") +!2 = !{} +!3 = !{i32 7, !"Dwarf Version", i32 4} +!4 = !{i32 2, !"Debug Info Version", i32 3} +!5 = !{i32 1, !"wchar_size", i32 1} +!6 = !{!"clang version 11.0.0 "} +!7 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, line: 4, type: !8, scopeLine: 4, flags: DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !10) +!8 = !DISubroutineType(types: !9) +!9 = !{null} +!10 = !{!11} +!11 = !DILocalVariable(name: "i", scope: !7, file: !1, line: 5, type: !12) +!12 = !DIBasicType(name: "int", size: 16, encoding: DW_ATE_signed) +!13 = !DILocation(line: 0, scope: !7) diff --git a/llvm/test/Transforms/Scalarizer/phi-order.ll b/llvm/test/Transforms/Scalarizer/phi-order.ll new file mode 100644 index 0000000000000..523daf3c900ae --- /dev/null +++ b/llvm/test/Transforms/Scalarizer/phi-order.ll @@ -0,0 +1,29 @@ +; RUN: opt %s -passes='function(scalarizer)' -S -o - | FileCheck %s + +; This input caused the scalarizer to insert non-PHI nodes +; in between PHI nodes (%1 and %2). + +define <3 x float> @func(i32 %inval) { +.entry: + br label %0 + +0: ; preds = %3, %.entry +; CHECK: %.i01 = phi float [ 0.000000e+00, %.entry ], [ %.i01, %3 ] +; CHECK-NEXT: %.i12 = phi float [ 0.000000e+00, %.entry ], [ %.i12, %3 ] +; CHECK-NEXT: %.i23 = phi float [ 0.000000e+00, %.entry ], [ %.i23, %3 ] +; CHECK-NEXT: %1 = phi float [ 1.000000e+00, %.entry ], [ 2.000000e+00, %3 ] +; CHECK-NEXT: %.upto0 = insertelement <3 x float> poison, float %.i01, i32 0 +; CHECK-NEXT: %.upto1 = insertelement <3 x float> %.upto0, float %.i12, i32 1 +; CHECK-NEXT: %2 = insertelement <3 x float> %.upto1, float %.i23, i32 2 + %1 = phi <3 x float> [ , %.entry], [ %1, %3 ] + %2 = phi float [ 1.0, %.entry], [ 2.0, %3 ] + br label %3 + +3: ; preds = %0 + %cond = icmp eq i32 %inval, 0 + br i1 %cond, label %0, label %exit + +exit: + ret <3 x float> %1 +} + diff --git a/llvm/test/lit.cfg.py b/llvm/test/lit.cfg.py index 52968ad8436fd..6d06bddecf918 100644 --- a/llvm/test/lit.cfg.py +++ b/llvm/test/lit.cfg.py @@ -399,3 +399,6 @@ def have_ld64_plugin_support(): if config.expensive_checks: config.available_features.add('expensive_checks') + +if "MemoryWithOrigins" in config.llvm_use_sanitizer: + config.available_features.add('use_msan_with_origins') diff --git a/llvm/test/tools/llvm-cfi-verify/X86/blacklist-expected-unprotected.s b/llvm/test/tools/llvm-cfi-verify/X86/ignorelist-expected-unprotected.s similarity index 69% rename from llvm/test/tools/llvm-cfi-verify/X86/blacklist-expected-unprotected.s rename to llvm/test/tools/llvm-cfi-verify/X86/ignorelist-expected-unprotected.s index dccbd69181216..b1ccc5f7e298e 100644 --- a/llvm/test/tools/llvm-cfi-verify/X86/blacklist-expected-unprotected.s +++ b/llvm/test/tools/llvm-cfi-verify/X86/ignorelist-expected-unprotected.s @@ -1,11 +1,11 @@ # RUN: llvm-mc %S/Inputs/unprotected-lineinfo.s -filetype obj \ # RUN: -triple x86_64-linux-elf -o %t.o -# RUN: echo "src:*tiny*" > %t.blacklist.txt -# RUN: llvm-cfi-verify %t.o %t.blacklist.txt | FileCheck %s +# RUN: echo "src:*tiny*" > %t.ignorelist.txt +# RUN: llvm-cfi-verify %t.o %t.ignorelist.txt | FileCheck %s # CHECK-LABEL: {{^Instruction: .* \(FAIL_BAD_CONDITIONAL_BRANCH\)}} # CHECK-NEXT: tiny.cc:11 -# CHECK-NEXT: {{^Blacklist Match:.*blacklist\.txt:1$}} +# CHECK-NEXT: {{^Ignorelist Match:.*ignorelist\.txt:1$}} # CHECK-NEXT: ====> Expected Unprotected # CHECK: Expected Protected: 0 (0.00%) @@ -13,5 +13,5 @@ # CHECK: Expected Unprotected: 1 (100.00%) # CHECK: Unexpected Unprotected (BAD): 0 (0.00%) -# Source: (blacklist.txt): +# Source: (ignorelist.txt): # src:*tiny* diff --git a/llvm/test/tools/llvm-cfi-verify/X86/blacklist-match-fun.s b/llvm/test/tools/llvm-cfi-verify/X86/ignorelist-match-fun.s similarity index 69% rename from llvm/test/tools/llvm-cfi-verify/X86/blacklist-match-fun.s rename to llvm/test/tools/llvm-cfi-verify/X86/ignorelist-match-fun.s index 21e1ffe7c5c97..2e62adfb3e71b 100644 --- a/llvm/test/tools/llvm-cfi-verify/X86/blacklist-match-fun.s +++ b/llvm/test/tools/llvm-cfi-verify/X86/ignorelist-match-fun.s @@ -1,11 +1,11 @@ # RUN: llvm-mc %S/Inputs/unprotected-fullinfo.s -filetype obj \ # RUN: -triple x86_64-linux-elf -o %t.o -# RUN: echo "fun:*main*" > %t.blacklist.txt -# RUN: llvm-cfi-verify %t.o %t.blacklist.txt | FileCheck %s +# RUN: echo "fun:*main*" > %t.ignorelist.txt +# RUN: llvm-cfi-verify %t.o %t.ignorelist.txt | FileCheck %s # CHECK-LABEL: {{^Instruction: .* \(FAIL_BAD_CONDITIONAL_BRANCH\)}} # CHECK-NEXT: tiny.cc:11 -# CHECK-NEXT: {{^Blacklist Match:.*blacklist\.txt:1$}} +# CHECK-NEXT: {{^Ignorelist Match:.*ignorelist\.txt:1$}} # CHECK-NEXT: ====> Expected Unprotected # CHECK: Expected Protected: 0 (0.00%) @@ -13,5 +13,5 @@ # CHECK: Expected Unprotected: 1 (100.00%) # CHECK: Unexpected Unprotected (BAD): 0 (0.00%) -# Source: (blacklist.txt): +# Source: (ignorelist.txt): # fun:*main* diff --git a/llvm/test/tools/llvm-cfi-verify/X86/blacklist-unexpected-protected.s b/llvm/test/tools/llvm-cfi-verify/X86/ignorelist-unexpected-protected.s similarity index 68% rename from llvm/test/tools/llvm-cfi-verify/X86/blacklist-unexpected-protected.s rename to llvm/test/tools/llvm-cfi-verify/X86/ignorelist-unexpected-protected.s index 2a32e78c8df62..7147ca28794a3 100644 --- a/llvm/test/tools/llvm-cfi-verify/X86/blacklist-unexpected-protected.s +++ b/llvm/test/tools/llvm-cfi-verify/X86/ignorelist-unexpected-protected.s @@ -1,11 +1,11 @@ # RUN: llvm-mc %S/Inputs/protected-lineinfo.s -filetype obj \ # RUN: -triple x86_64-linux-elf -o %t.o -# RUN: echo "src:*tiny*" > %t.blacklist.txt -# RUN: llvm-cfi-verify %t.o %t.blacklist.txt | FileCheck %s +# RUN: echo "src:*tiny*" > %t.ignorelist.txt +# RUN: llvm-cfi-verify %t.o %t.ignorelist.txt | FileCheck %s # CHECK-LABEL: {{^Instruction: .* \(PROTECTED\)}} # CHECK-NEXT: tiny.cc:11 -# CHECK-NEXT: {{^Blacklist Match:.*blacklist\.txt:1$}} +# CHECK-NEXT: {{^Ignorelist Match:.*ignorelist\.txt:1$}} # CHECK-NEXT: ====> Unexpected Protected # CHECK: Expected Protected: 0 (0.00%) @@ -13,5 +13,5 @@ # CHECK: Expected Unprotected: 0 (0.00%) # CHECK: Unexpected Unprotected (BAD): 0 (0.00%) -# Source: (blacklist.txt): +# Source: (ignorelist.txt): # src:*tiny* diff --git a/llvm/test/tools/llvm-dwarfdump/X86/prettyprint_types.s b/llvm/test/tools/llvm-dwarfdump/X86/prettyprint_types.s index cd62bf9159898..217f001a0008d 100644 --- a/llvm/test/tools/llvm-dwarfdump/X86/prettyprint_types.s +++ b/llvm/test/tools/llvm-dwarfdump/X86/prettyprint_types.s @@ -112,7 +112,7 @@ # CHECK: DW_AT_type{{.*}}"int *const (&)[1]") # CHECK: DW_AT_type{{.*}}"int *const[1]") # CHECK: DW_AT_type{{.*}}"const int (&)[1]") -# CHECK: DW_AT_type{{.*}}"const int [1]") +# CHECK: DW_AT_type{{.*}}"const int[1]") # subroutine types # CHECK: DW_AT_type{{.*}}"int ()" @@ -129,7 +129,7 @@ # qualified types # CHECK: DW_AT_type{{.*}}"ns::inner" # CHECK: DW_AT_type{{.*}}"ns::inner ()" -# CHECK: DW_AT_type{{.*}}"ns::inner [1]" +# CHECK: DW_AT_type{{.*}}"ns::inner[1]" # CHECK: DW_AT_type{{.*}}"ns::inner *" # CHECK: DW_AT_type{{.*}}"ns::inner (ns::inner::*)(ns::inner)" # CHECK: DW_AT_type{{.*}}"const ns::inner" diff --git a/llvm/test/tools/llvm-dwarfdump/X86/prettyprint_types_handcrafted.s b/llvm/test/tools/llvm-dwarfdump/X86/prettyprint_types_handcrafted.s index 8670b6ec78d95..875395e634c17 100644 --- a/llvm/test/tools/llvm-dwarfdump/X86/prettyprint_types_handcrafted.s +++ b/llvm/test/tools/llvm-dwarfdump/X86/prettyprint_types_handcrafted.s @@ -7,7 +7,7 @@ # CHECK: DW_TAG_variable # array_type -# CHECK: DW_AT_type{{.*}}"int{{ }} +# CHECK: DW_AT_type{{.*}}"int # Testing with a default lower bound of 0 and the following explicit bounds: # lower_bound(1) # CHECK-NOT: {{.}} @@ -34,7 +34,7 @@ # array_type with a language with a default lower bound of 1 instead of 0 and # an upper bound of 2. This describes an array with 2 elements (whereas with a # default lower bound of 0 it would be an array of 3 elements) -# CHECK: DW_AT_type{{.*}}"int [2]" +# CHECK: DW_AT_type{{.*}}"int[2]" .section .debug_str,"MS",@progbits,1 .Lint_name: diff --git a/llvm/test/tools/llvm-dwarfdump/X86/simplified-template-names.s b/llvm/test/tools/llvm-dwarfdump/X86/simplified-template-names.s new file mode 100644 index 0000000000000..a04750cd75970 --- /dev/null +++ b/llvm/test/tools/llvm-dwarfdump/X86/simplified-template-names.s @@ -0,0 +1,11974 @@ +# RUN: llvm-mc < %s -filetype obj -triple x86_64 -o - \ +# RUN: | llvm-dwarfdump --verify - | FileCheck %s + +# Checking the LLVM side of cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp +# Compile that file with `-g -Xclang -gsimple-template-names=mangled -Xclang -debug-forward-template-params -S` +# to (re)generate this assembly file - while it might be slightly overkill in +# some ways, it seems small/simple enough to keep this as an exact match for +# that end to end test. + +# CHECK: No errors. + .text + .file "simplified_template_names.cpp" + .file 1 "./" "cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp" + .file 2 "/usr/include/x86_64-linux-gnu/bits" "types.h" + .file 3 "/usr/include/x86_64-linux-gnu/bits" "stdint-intn.h" + .file 4 "/usr/local/google/home/blaikie/install/bin/../lib/gcc/x86_64-pc-linux-gnu/10.0.0/../../../../include/c++/10.0.0" "cstdint" + .file 5 "/usr/include" "stdint.h" + .file 6 "/usr/include/x86_64-linux-gnu/bits" "stdint-uintn.h" + .globl _Zli5_suffy # -- Begin function _Zli5_suffy + .p2align 4, 0x90 + .type _Zli5_suffy,@function +_Zli5_suffy: # @_Zli5_suffy +.Lfunc_begin0: + .loc 1 134 0 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:134:0 + .cfi_startproc +# %bb.0: # %entry + pushq %rbp + .cfi_def_cfa_offset 16 + .cfi_offset %rbp, -16 + movq %rsp, %rbp + .cfi_def_cfa_register %rbp + movq %rdi, -8(%rbp) +.Ltmp0: + .loc 1 134 44 prologue_end # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:134:44 + popq %rbp + .cfi_def_cfa %rsp, 8 + retq +.Ltmp1: +.Lfunc_end0: + .size _Zli5_suffy, .Lfunc_end0-_Zli5_suffy + .cfi_endproc + # -- End function + .globl main # -- Begin function main + .p2align 4, 0x90 + .type main,@function +main: # @main +.Lfunc_begin1: + .loc 1 166 0 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:166:0 + .cfi_startproc +# %bb.0: # %entry + pushq %rbp + .cfi_def_cfa_offset 16 + .cfi_offset %rbp, -16 + movq %rsp, %rbp + .cfi_def_cfa_register %rbp + subq $64, %rsp +.Ltmp2: + .loc 1 168 8 prologue_end # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:168:8 + movb .L__const.main.L, %al + movb %al, -16(%rbp) + .loc 1 169 3 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:169:3 + callq _Z2f1IJiEEvv + .loc 1 170 3 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:170:3 + callq _Z2f1IJfEEvv + .loc 1 171 3 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:171:3 + callq _Z2f1IJbEEvv + .loc 1 172 3 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:172:3 + callq _Z2f1IJdEEvv + .loc 1 173 3 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:173:3 + callq _Z2f1IJlEEvv + .loc 1 174 3 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:174:3 + callq _Z2f1IJsEEvv + .loc 1 175 3 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:175:3 + callq _Z2f1IJjEEvv + .loc 1 176 3 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:176:3 + callq _Z2f1IJyEEvv + .loc 1 177 3 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:177:3 + callq _Z2f1IJxEEvv + .loc 1 178 3 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:178:3 + callq _Z2f1IJ3udtEEvv + .loc 1 179 3 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:179:3 + callq _Z2f1IJN2ns3udtEEEvv + .loc 1 180 3 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:180:3 + callq _Z2f1IJPN2ns3udtEEEvv + .loc 1 181 3 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:181:3 + callq _Z2f1IJN2ns5inner3udtEEEvv + .loc 1 182 3 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:182:3 + callq _Z2f1IJ2t1IJiEEEEvv + .loc 1 183 3 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:183:3 + callq _Z2f1IJifEEvv + .loc 1 184 3 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:184:3 + callq _Z2f1IJPiEEvv + .loc 1 185 3 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:185:3 + callq _Z2f1IJRiEEvv + .loc 1 186 3 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:186:3 + callq _Z2f1IJOiEEvv + .loc 1 187 3 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:187:3 + callq _Z2f1IJKiEEvv + .loc 1 189 3 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:189:3 + callq _Z2f1IJvEEvv + .loc 1 190 3 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:190:3 + callq _Z2f1IJN11outer_class11inner_classEEEvv + .loc 1 191 3 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:191:3 + callq _Z2f1IJmEEvv + .loc 1 192 3 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:192:3 + callq _Z2f2ILb1ELi3EEvv + .loc 1 193 3 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:193:3 + callq _Z2f3IN2ns11EnumerationEJLS1_1ELS1_2EEEvv + .loc 1 194 3 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:194:3 + callq _Z2f3IN2ns16EnumerationClassEJLS1_1ELS1_2EEEvv + .loc 1 195 3 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:195:3 + callq _Z2f3IN2ns16EnumerationSmallEJLS1_255EEEvv + .loc 1 196 3 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:196:3 + callq _Z2f3IN2ns3$_0EJLS1_1ELS1_2EEEvv + .loc 1 197 3 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:197:3 + callq _Z2f3IPiJXadL_Z1iEEEEvv + .loc 1 198 3 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:198:3 + callq _Z2f3IPiJLS0_0EEEvv + .loc 1 200 3 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:200:3 + callq _Z2f3ImJLm1EEEvv + .loc 1 201 3 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:201:3 + callq _Z2f3IyJLy1EEEvv + .loc 1 202 3 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:202:3 + callq _Z2f3IlJLl1EEEvv + .loc 1 203 3 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:203:3 + callq _Z2f3IjJLj1EEEvv + .loc 1 204 3 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:204:3 + callq _Z2f3IsJLs1EEEvv + .loc 1 205 3 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:205:3 + callq _Z2f3IhJLh0EEEvv + .loc 1 206 3 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:206:3 + callq _Z2f3IaJLa0EEEvv + .loc 1 207 3 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:207:3 + callq _Z2f3ItJLt1ELt2EEEvv + .loc 1 208 3 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:208:3 + callq _Z2f3IcJLc0ELc1ELc6ELc7ELc13ELc14ELc31ELc32ELc33ELc127ELcn128EEEvv + .loc 1 209 3 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:209:3 + callq _Z2f3InJLn18446744073709551614EEEvv + .loc 1 210 3 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:210:3 + callq _Z2f4IjLj3EEvv + .loc 1 211 3 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:211:3 + callq _Z2f1IJ2t3IiLb0EEEEvv + .loc 1 212 3 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:212:3 + callq _Z2f1IJ2t3IS0_IiLb0EELb0EEEEvv + .loc 1 213 3 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:213:3 + callq _Z2f1IJZ4mainE3$_1EEvv + .loc 1 215 3 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:215:3 + callq _Z2f1IJFifEEEvv + .loc 1 216 3 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:216:3 + callq _Z2f1IJRKiEEvv + .loc 1 217 3 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:217:3 + callq _Z2f1IJRPKiEEvv + .loc 1 218 3 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:218:3 + callq _Z2f1IJN12_GLOBAL__N_12t5EEEvv + .loc 1 219 3 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:219:3 + callq _Z2f1IJDnEEvv + .loc 1 220 3 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:220:3 + callq _Z2f1IJPlS0_EEvv + .loc 1 221 3 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:221:3 + callq _Z2f1IJPlP3udtEEvv + .loc 1 222 3 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:222:3 + callq _Z2f1IJKPvEEvv + .loc 1 223 3 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:223:3 + callq _Z2f1IJPKPKvEEvv + .loc 1 224 3 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:224:3 + callq _Z2f1IJFvvEEEvv + .loc 1 225 3 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:225:3 + callq _Z2f1IJPFvvEEEvv + .loc 1 226 3 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:226:3 + callq _Z2f1IJPZ4mainE3$_1EEvv + .loc 1 227 3 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:227:3 + callq _Z2f1IJZ4mainE3$_2EEvv + .loc 1 228 3 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:228:3 + callq _Z2f1IJPZ4mainE3$_2EEvv + .loc 1 229 3 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:229:3 + callq _Z2f5IJ2t1IJiEEEiEvv + .loc 1 230 3 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:230:3 + callq _Z2f5IJEiEvv + .loc 1 231 3 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:231:3 + callq _Z2f6I2t1IJiEEJEEvv + .loc 1 232 3 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:232:3 + callq _Z2f1IJEEvv + .loc 1 233 3 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:233:3 + callq _Z2f1IJPKvS1_EEvv + .loc 1 234 3 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:234:3 + callq _Z2f1IJP2t1IJPiEEEEvv + .loc 1 235 3 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:235:3 + callq _Z2f1IJA_PiEEvv + .loc 1 237 6 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:237:6 + leaq -40(%rbp), %rdi + movl $1, %esi + callq _ZN2t6lsIiEEvi + .loc 1 238 6 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:238:6 + leaq -40(%rbp), %rdi + movl $1, %esi + callq _ZN2t6ltIiEEvi + .loc 1 239 6 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:239:6 + leaq -40(%rbp), %rdi + movl $1, %esi + callq _ZN2t6leIiEEvi + .loc 1 240 6 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:240:6 + leaq -40(%rbp), %rdi + callq _ZN2t6cvP2t1IJfEEIiEEv + .loc 1 241 6 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:241:6 + leaq -40(%rbp), %rdi + movl $3, %esi + callq _ZN2t6miIiEEvi + .loc 1 242 6 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:242:6 + leaq -40(%rbp), %rdi + movl $3, %esi + callq _ZN2t6mlIiEEvi + .loc 1 243 6 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:243:6 + leaq -40(%rbp), %rdi + movl $3, %esi + callq _ZN2t6dvIiEEvi + .loc 1 244 6 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:244:6 + leaq -40(%rbp), %rdi + movl $3, %esi + callq _ZN2t6rmIiEEvi + .loc 1 245 6 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:245:6 + leaq -40(%rbp), %rdi + movl $3, %esi + callq _ZN2t6eoIiEEvi + .loc 1 246 6 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:246:6 + leaq -40(%rbp), %rdi + movl $3, %esi + callq _ZN2t6anIiEEvi + .loc 1 247 6 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:247:6 + leaq -40(%rbp), %rdi + movl $3, %esi + callq _ZN2t6orIiEEvi + .loc 1 248 6 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:248:6 + leaq -40(%rbp), %rdi + callq _ZN2t6coIiEEvv + .loc 1 249 6 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:249:6 + leaq -40(%rbp), %rdi + callq _ZN2t6ntIiEEvv + .loc 1 250 6 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:250:6 + leaq -40(%rbp), %rdi + movl $3, %esi + callq _ZN2t6aSIiEEvi + .loc 1 251 6 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:251:6 + leaq -40(%rbp), %rdi + movl $3, %esi + callq _ZN2t6gtIiEEvi + .loc 1 252 6 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:252:6 + leaq -40(%rbp), %rdi + movl $3, %esi + callq _ZN2t6cmIiEEvi + .loc 1 253 6 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:253:6 + leaq -40(%rbp), %rdi + callq _ZN2t6clIiEEvv + .loc 1 254 6 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:254:6 + leaq -40(%rbp), %rdi + movl $3, %esi + callq _ZN2t6ixIiEEvi + .loc 1 255 6 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:255:6 + leaq -40(%rbp), %rdi + movl $3, %esi + callq _ZN2t6ssIiEEvi + .loc 1 256 3 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:256:3 + xorl %eax, %eax + movl %eax, %edi + xorl %esi, %esi + callq _ZN2t6nwIiEEPvmT_ + .loc 1 257 3 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:257:3 + xorl %eax, %eax + movl %eax, %edi + xorl %esi, %esi + callq _ZN2t6naIiEEPvmT_ + .loc 1 258 3 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:258:3 + xorl %eax, %eax + movl %eax, %edi + xorl %esi, %esi + callq _ZN2t6dlIiEEvPvT_ + .loc 1 259 3 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:259:3 + xorl %eax, %eax + movl %eax, %edi + xorl %esi, %esi + callq _ZN2t6daIiEEvPvT_ + .loc 1 260 6 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:260:6 + leaq -40(%rbp), %rdi + callq _ZN2t6awIiEEiv + .loc 1 261 3 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:261:3 + movl $42, %edi + callq _Zli5_suffy + .loc 1 263 3 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:263:3 + callq _Z2f1IJZ4mainE2t7EEvv + .loc 1 264 3 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:264:3 + callq _Z2f1IJRA3_iEEvv + .loc 1 265 3 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:265:3 + callq _Z2f1IJPA3_iEEvv + .loc 1 266 3 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:266:3 + callq _Z2f7I2t1Evv + .loc 1 267 3 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:267:3 + callq _Z2f8I2t1iEvv + .loc 1 269 3 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:269:3 + callq _ZN2ns8ttp_userINS_5inner3ttpEEEvv + .loc 1 270 3 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:270:3 + callq _Z2f1IJPiPDnEEvv + .loc 1 272 3 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:272:3 + callq _Z2f1IJ2t7IiEEEvv + .loc 1 273 3 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:273:3 + callq _Z2f7IN2ns3inl2t9EEvv + .loc 1 274 3 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:274:3 + callq _Z2f1IJU7_AtomiciEEvv + .loc 1 275 3 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:275:3 + callq _Z2f1IJilVcEEvv + .loc 1 276 3 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:276:3 + callq _Z2f1IJDv2_iEEvv + .loc 1 277 3 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:277:3 + callq _Z2f1IJVKPiEEvv + .loc 1 278 3 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:278:3 + callq _Z2f1IJVKvEEvv + .loc 1 279 3 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:279:3 + callq _Z2f1IJ2t1IJZ4mainE3$_1EEEEvv + .loc 1 280 7 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:280:7 + leaq -56(%rbp), %rdi + callq _ZN3t10C2IvEEv + .loc 1 281 3 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:281:3 + callq _Z2f1IJM3udtKFvvEEEvv + .loc 1 282 3 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:282:3 + callq _Z2f1IJM3udtVFvvREEEvv + .loc 1 283 3 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:283:3 + callq _Z2f1IJM3udtVKFvvOEEEvv + .loc 1 284 3 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:284:3 + callq _Z2f9IiEPFvvEv + .loc 1 285 3 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:285:3 + callq _Z2f1IJKPFvvEEEvv + .loc 1 286 3 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:286:3 + callq _Z2f1IJRA1_KcEEvv + .loc 1 287 3 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:287:3 + callq _Z2f1IJKFvvREEEvv + .loc 1 288 3 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:288:3 + callq _Z2f1IJVFvvOEEEvv + .loc 1 289 3 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:289:3 + callq _Z2f1IJVKFvvEEEvv + .loc 1 290 3 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:290:3 + callq _Z2f1IJA1_KPiEEvv + .loc 1 291 3 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:291:3 + callq _Z2f1IJRA1_KPiEEvv + .loc 1 292 3 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:292:3 + callq _Z2f1IJRKM3udtFvvEEEvv + .loc 1 293 3 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:293:3 + callq _Z2f1IJFPFvfEiEEEvv + .loc 1 295 3 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:295:3 + callq _Z2f1IJPDoFvvEEEvv + .loc 1 296 3 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:296:3 + callq _Z2f1IJFvZ4mainE3$_2EEEvv + .loc 1 298 3 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:298:3 + callq _Z2f1IJFvZ4mainE2t8Z4mainE3$_2EEEvv + .loc 1 299 3 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:299:3 + callq _Z2f1IJFvZ4mainE2t8EEEvv + .loc 1 300 3 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:300:3 + callq _Z19operator_not_reallyIiEvv + .loc 1 301 1 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:301:1 + xorl %eax, %eax + addq $64, %rsp + popq %rbp + .cfi_def_cfa %rsp, 8 + retq +.Ltmp3: +.Lfunc_end1: + .size main, .Lfunc_end1-main + .cfi_endproc + # -- End function + .section .text._Z2f1IJiEEvv,"axG",@progbits,_Z2f1IJiEEvv,comdat + .weak _Z2f1IJiEEvv # -- Begin function _Z2f1IJiEEvv + .p2align 4, 0x90 + .type _Z2f1IJiEEvv,@function +_Z2f1IJiEEvv: # @_Z2f1IJiEEvv +.Lfunc_begin2: + .loc 1 26 0 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:26:0 + .cfi_startproc +# %bb.0: # %entry + pushq %rbp + .cfi_def_cfa_offset 16 + .cfi_offset %rbp, -16 + movq %rsp, %rbp + .cfi_def_cfa_register %rbp +.Ltmp4: + .loc 1 29 1 prologue_end # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:29:1 + popq %rbp + .cfi_def_cfa %rsp, 8 + retq +.Ltmp5: +.Lfunc_end2: + .size _Z2f1IJiEEvv, .Lfunc_end2-_Z2f1IJiEEvv + .cfi_endproc + # -- End function + .section .text._Z2f1IJfEEvv,"axG",@progbits,_Z2f1IJfEEvv,comdat + .weak _Z2f1IJfEEvv # -- Begin function _Z2f1IJfEEvv + .p2align 4, 0x90 + .type _Z2f1IJfEEvv,@function +_Z2f1IJfEEvv: # @_Z2f1IJfEEvv +.Lfunc_begin3: + .loc 1 26 0 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:26:0 + .cfi_startproc +# %bb.0: # %entry + pushq %rbp + .cfi_def_cfa_offset 16 + .cfi_offset %rbp, -16 + movq %rsp, %rbp + .cfi_def_cfa_register %rbp +.Ltmp6: + .loc 1 29 1 prologue_end # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:29:1 + popq %rbp + .cfi_def_cfa %rsp, 8 + retq +.Ltmp7: +.Lfunc_end3: + .size _Z2f1IJfEEvv, .Lfunc_end3-_Z2f1IJfEEvv + .cfi_endproc + # -- End function + .section .text._Z2f1IJbEEvv,"axG",@progbits,_Z2f1IJbEEvv,comdat + .weak _Z2f1IJbEEvv # -- Begin function _Z2f1IJbEEvv + .p2align 4, 0x90 + .type _Z2f1IJbEEvv,@function +_Z2f1IJbEEvv: # @_Z2f1IJbEEvv +.Lfunc_begin4: + .loc 1 26 0 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:26:0 + .cfi_startproc +# %bb.0: # %entry + pushq %rbp + .cfi_def_cfa_offset 16 + .cfi_offset %rbp, -16 + movq %rsp, %rbp + .cfi_def_cfa_register %rbp +.Ltmp8: + .loc 1 29 1 prologue_end # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:29:1 + popq %rbp + .cfi_def_cfa %rsp, 8 + retq +.Ltmp9: +.Lfunc_end4: + .size _Z2f1IJbEEvv, .Lfunc_end4-_Z2f1IJbEEvv + .cfi_endproc + # -- End function + .section .text._Z2f1IJdEEvv,"axG",@progbits,_Z2f1IJdEEvv,comdat + .weak _Z2f1IJdEEvv # -- Begin function _Z2f1IJdEEvv + .p2align 4, 0x90 + .type _Z2f1IJdEEvv,@function +_Z2f1IJdEEvv: # @_Z2f1IJdEEvv +.Lfunc_begin5: + .loc 1 26 0 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:26:0 + .cfi_startproc +# %bb.0: # %entry + pushq %rbp + .cfi_def_cfa_offset 16 + .cfi_offset %rbp, -16 + movq %rsp, %rbp + .cfi_def_cfa_register %rbp +.Ltmp10: + .loc 1 29 1 prologue_end # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:29:1 + popq %rbp + .cfi_def_cfa %rsp, 8 + retq +.Ltmp11: +.Lfunc_end5: + .size _Z2f1IJdEEvv, .Lfunc_end5-_Z2f1IJdEEvv + .cfi_endproc + # -- End function + .section .text._Z2f1IJlEEvv,"axG",@progbits,_Z2f1IJlEEvv,comdat + .weak _Z2f1IJlEEvv # -- Begin function _Z2f1IJlEEvv + .p2align 4, 0x90 + .type _Z2f1IJlEEvv,@function +_Z2f1IJlEEvv: # @_Z2f1IJlEEvv +.Lfunc_begin6: + .loc 1 26 0 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:26:0 + .cfi_startproc +# %bb.0: # %entry + pushq %rbp + .cfi_def_cfa_offset 16 + .cfi_offset %rbp, -16 + movq %rsp, %rbp + .cfi_def_cfa_register %rbp +.Ltmp12: + .loc 1 29 1 prologue_end # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:29:1 + popq %rbp + .cfi_def_cfa %rsp, 8 + retq +.Ltmp13: +.Lfunc_end6: + .size _Z2f1IJlEEvv, .Lfunc_end6-_Z2f1IJlEEvv + .cfi_endproc + # -- End function + .section .text._Z2f1IJsEEvv,"axG",@progbits,_Z2f1IJsEEvv,comdat + .weak _Z2f1IJsEEvv # -- Begin function _Z2f1IJsEEvv + .p2align 4, 0x90 + .type _Z2f1IJsEEvv,@function +_Z2f1IJsEEvv: # @_Z2f1IJsEEvv +.Lfunc_begin7: + .loc 1 26 0 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:26:0 + .cfi_startproc +# %bb.0: # %entry + pushq %rbp + .cfi_def_cfa_offset 16 + .cfi_offset %rbp, -16 + movq %rsp, %rbp + .cfi_def_cfa_register %rbp +.Ltmp14: + .loc 1 29 1 prologue_end # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:29:1 + popq %rbp + .cfi_def_cfa %rsp, 8 + retq +.Ltmp15: +.Lfunc_end7: + .size _Z2f1IJsEEvv, .Lfunc_end7-_Z2f1IJsEEvv + .cfi_endproc + # -- End function + .section .text._Z2f1IJjEEvv,"axG",@progbits,_Z2f1IJjEEvv,comdat + .weak _Z2f1IJjEEvv # -- Begin function _Z2f1IJjEEvv + .p2align 4, 0x90 + .type _Z2f1IJjEEvv,@function +_Z2f1IJjEEvv: # @_Z2f1IJjEEvv +.Lfunc_begin8: + .loc 1 26 0 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:26:0 + .cfi_startproc +# %bb.0: # %entry + pushq %rbp + .cfi_def_cfa_offset 16 + .cfi_offset %rbp, -16 + movq %rsp, %rbp + .cfi_def_cfa_register %rbp +.Ltmp16: + .loc 1 29 1 prologue_end # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:29:1 + popq %rbp + .cfi_def_cfa %rsp, 8 + retq +.Ltmp17: +.Lfunc_end8: + .size _Z2f1IJjEEvv, .Lfunc_end8-_Z2f1IJjEEvv + .cfi_endproc + # -- End function + .section .text._Z2f1IJyEEvv,"axG",@progbits,_Z2f1IJyEEvv,comdat + .weak _Z2f1IJyEEvv # -- Begin function _Z2f1IJyEEvv + .p2align 4, 0x90 + .type _Z2f1IJyEEvv,@function +_Z2f1IJyEEvv: # @_Z2f1IJyEEvv +.Lfunc_begin9: + .loc 1 26 0 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:26:0 + .cfi_startproc +# %bb.0: # %entry + pushq %rbp + .cfi_def_cfa_offset 16 + .cfi_offset %rbp, -16 + movq %rsp, %rbp + .cfi_def_cfa_register %rbp +.Ltmp18: + .loc 1 29 1 prologue_end # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:29:1 + popq %rbp + .cfi_def_cfa %rsp, 8 + retq +.Ltmp19: +.Lfunc_end9: + .size _Z2f1IJyEEvv, .Lfunc_end9-_Z2f1IJyEEvv + .cfi_endproc + # -- End function + .section .text._Z2f1IJxEEvv,"axG",@progbits,_Z2f1IJxEEvv,comdat + .weak _Z2f1IJxEEvv # -- Begin function _Z2f1IJxEEvv + .p2align 4, 0x90 + .type _Z2f1IJxEEvv,@function +_Z2f1IJxEEvv: # @_Z2f1IJxEEvv +.Lfunc_begin10: + .loc 1 26 0 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:26:0 + .cfi_startproc +# %bb.0: # %entry + pushq %rbp + .cfi_def_cfa_offset 16 + .cfi_offset %rbp, -16 + movq %rsp, %rbp + .cfi_def_cfa_register %rbp +.Ltmp20: + .loc 1 29 1 prologue_end # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:29:1 + popq %rbp + .cfi_def_cfa %rsp, 8 + retq +.Ltmp21: +.Lfunc_end10: + .size _Z2f1IJxEEvv, .Lfunc_end10-_Z2f1IJxEEvv + .cfi_endproc + # -- End function + .section .text._Z2f1IJ3udtEEvv,"axG",@progbits,_Z2f1IJ3udtEEvv,comdat + .weak _Z2f1IJ3udtEEvv # -- Begin function _Z2f1IJ3udtEEvv + .p2align 4, 0x90 + .type _Z2f1IJ3udtEEvv,@function +_Z2f1IJ3udtEEvv: # @_Z2f1IJ3udtEEvv +.Lfunc_begin11: + .loc 1 26 0 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:26:0 + .cfi_startproc +# %bb.0: # %entry + pushq %rbp + .cfi_def_cfa_offset 16 + .cfi_offset %rbp, -16 + movq %rsp, %rbp + .cfi_def_cfa_register %rbp +.Ltmp22: + .loc 1 29 1 prologue_end # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:29:1 + popq %rbp + .cfi_def_cfa %rsp, 8 + retq +.Ltmp23: +.Lfunc_end11: + .size _Z2f1IJ3udtEEvv, .Lfunc_end11-_Z2f1IJ3udtEEvv + .cfi_endproc + # -- End function + .section .text._Z2f1IJN2ns3udtEEEvv,"axG",@progbits,_Z2f1IJN2ns3udtEEEvv,comdat + .weak _Z2f1IJN2ns3udtEEEvv # -- Begin function _Z2f1IJN2ns3udtEEEvv + .p2align 4, 0x90 + .type _Z2f1IJN2ns3udtEEEvv,@function +_Z2f1IJN2ns3udtEEEvv: # @_Z2f1IJN2ns3udtEEEvv +.Lfunc_begin12: + .loc 1 26 0 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:26:0 + .cfi_startproc +# %bb.0: # %entry + pushq %rbp + .cfi_def_cfa_offset 16 + .cfi_offset %rbp, -16 + movq %rsp, %rbp + .cfi_def_cfa_register %rbp +.Ltmp24: + .loc 1 29 1 prologue_end # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:29:1 + popq %rbp + .cfi_def_cfa %rsp, 8 + retq +.Ltmp25: +.Lfunc_end12: + .size _Z2f1IJN2ns3udtEEEvv, .Lfunc_end12-_Z2f1IJN2ns3udtEEEvv + .cfi_endproc + # -- End function + .section .text._Z2f1IJPN2ns3udtEEEvv,"axG",@progbits,_Z2f1IJPN2ns3udtEEEvv,comdat + .weak _Z2f1IJPN2ns3udtEEEvv # -- Begin function _Z2f1IJPN2ns3udtEEEvv + .p2align 4, 0x90 + .type _Z2f1IJPN2ns3udtEEEvv,@function +_Z2f1IJPN2ns3udtEEEvv: # @_Z2f1IJPN2ns3udtEEEvv +.Lfunc_begin13: + .loc 1 26 0 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:26:0 + .cfi_startproc +# %bb.0: # %entry + pushq %rbp + .cfi_def_cfa_offset 16 + .cfi_offset %rbp, -16 + movq %rsp, %rbp + .cfi_def_cfa_register %rbp +.Ltmp26: + .loc 1 29 1 prologue_end # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:29:1 + popq %rbp + .cfi_def_cfa %rsp, 8 + retq +.Ltmp27: +.Lfunc_end13: + .size _Z2f1IJPN2ns3udtEEEvv, .Lfunc_end13-_Z2f1IJPN2ns3udtEEEvv + .cfi_endproc + # -- End function + .section .text._Z2f1IJN2ns5inner3udtEEEvv,"axG",@progbits,_Z2f1IJN2ns5inner3udtEEEvv,comdat + .weak _Z2f1IJN2ns5inner3udtEEEvv # -- Begin function _Z2f1IJN2ns5inner3udtEEEvv + .p2align 4, 0x90 + .type _Z2f1IJN2ns5inner3udtEEEvv,@function +_Z2f1IJN2ns5inner3udtEEEvv: # @_Z2f1IJN2ns5inner3udtEEEvv +.Lfunc_begin14: + .loc 1 26 0 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:26:0 + .cfi_startproc +# %bb.0: # %entry + pushq %rbp + .cfi_def_cfa_offset 16 + .cfi_offset %rbp, -16 + movq %rsp, %rbp + .cfi_def_cfa_register %rbp +.Ltmp28: + .loc 1 29 1 prologue_end # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:29:1 + popq %rbp + .cfi_def_cfa %rsp, 8 + retq +.Ltmp29: +.Lfunc_end14: + .size _Z2f1IJN2ns5inner3udtEEEvv, .Lfunc_end14-_Z2f1IJN2ns5inner3udtEEEvv + .cfi_endproc + # -- End function + .section .text._Z2f1IJ2t1IJiEEEEvv,"axG",@progbits,_Z2f1IJ2t1IJiEEEEvv,comdat + .weak _Z2f1IJ2t1IJiEEEEvv # -- Begin function _Z2f1IJ2t1IJiEEEEvv + .p2align 4, 0x90 + .type _Z2f1IJ2t1IJiEEEEvv,@function +_Z2f1IJ2t1IJiEEEEvv: # @_Z2f1IJ2t1IJiEEEEvv +.Lfunc_begin15: + .loc 1 26 0 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:26:0 + .cfi_startproc +# %bb.0: # %entry + pushq %rbp + .cfi_def_cfa_offset 16 + .cfi_offset %rbp, -16 + movq %rsp, %rbp + .cfi_def_cfa_register %rbp +.Ltmp30: + .loc 1 29 1 prologue_end # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:29:1 + popq %rbp + .cfi_def_cfa %rsp, 8 + retq +.Ltmp31: +.Lfunc_end15: + .size _Z2f1IJ2t1IJiEEEEvv, .Lfunc_end15-_Z2f1IJ2t1IJiEEEEvv + .cfi_endproc + # -- End function + .section .text._Z2f1IJifEEvv,"axG",@progbits,_Z2f1IJifEEvv,comdat + .weak _Z2f1IJifEEvv # -- Begin function _Z2f1IJifEEvv + .p2align 4, 0x90 + .type _Z2f1IJifEEvv,@function +_Z2f1IJifEEvv: # @_Z2f1IJifEEvv +.Lfunc_begin16: + .loc 1 26 0 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:26:0 + .cfi_startproc +# %bb.0: # %entry + pushq %rbp + .cfi_def_cfa_offset 16 + .cfi_offset %rbp, -16 + movq %rsp, %rbp + .cfi_def_cfa_register %rbp +.Ltmp32: + .loc 1 29 1 prologue_end # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:29:1 + popq %rbp + .cfi_def_cfa %rsp, 8 + retq +.Ltmp33: +.Lfunc_end16: + .size _Z2f1IJifEEvv, .Lfunc_end16-_Z2f1IJifEEvv + .cfi_endproc + # -- End function + .section .text._Z2f1IJPiEEvv,"axG",@progbits,_Z2f1IJPiEEvv,comdat + .weak _Z2f1IJPiEEvv # -- Begin function _Z2f1IJPiEEvv + .p2align 4, 0x90 + .type _Z2f1IJPiEEvv,@function +_Z2f1IJPiEEvv: # @_Z2f1IJPiEEvv +.Lfunc_begin17: + .loc 1 26 0 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:26:0 + .cfi_startproc +# %bb.0: # %entry + pushq %rbp + .cfi_def_cfa_offset 16 + .cfi_offset %rbp, -16 + movq %rsp, %rbp + .cfi_def_cfa_register %rbp +.Ltmp34: + .loc 1 29 1 prologue_end # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:29:1 + popq %rbp + .cfi_def_cfa %rsp, 8 + retq +.Ltmp35: +.Lfunc_end17: + .size _Z2f1IJPiEEvv, .Lfunc_end17-_Z2f1IJPiEEvv + .cfi_endproc + # -- End function + .section .text._Z2f1IJRiEEvv,"axG",@progbits,_Z2f1IJRiEEvv,comdat + .weak _Z2f1IJRiEEvv # -- Begin function _Z2f1IJRiEEvv + .p2align 4, 0x90 + .type _Z2f1IJRiEEvv,@function +_Z2f1IJRiEEvv: # @_Z2f1IJRiEEvv +.Lfunc_begin18: + .loc 1 26 0 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:26:0 + .cfi_startproc +# %bb.0: # %entry + pushq %rbp + .cfi_def_cfa_offset 16 + .cfi_offset %rbp, -16 + movq %rsp, %rbp + .cfi_def_cfa_register %rbp +.Ltmp36: + .loc 1 29 1 prologue_end # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:29:1 + popq %rbp + .cfi_def_cfa %rsp, 8 + retq +.Ltmp37: +.Lfunc_end18: + .size _Z2f1IJRiEEvv, .Lfunc_end18-_Z2f1IJRiEEvv + .cfi_endproc + # -- End function + .section .text._Z2f1IJOiEEvv,"axG",@progbits,_Z2f1IJOiEEvv,comdat + .weak _Z2f1IJOiEEvv # -- Begin function _Z2f1IJOiEEvv + .p2align 4, 0x90 + .type _Z2f1IJOiEEvv,@function +_Z2f1IJOiEEvv: # @_Z2f1IJOiEEvv +.Lfunc_begin19: + .loc 1 26 0 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:26:0 + .cfi_startproc +# %bb.0: # %entry + pushq %rbp + .cfi_def_cfa_offset 16 + .cfi_offset %rbp, -16 + movq %rsp, %rbp + .cfi_def_cfa_register %rbp +.Ltmp38: + .loc 1 29 1 prologue_end # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:29:1 + popq %rbp + .cfi_def_cfa %rsp, 8 + retq +.Ltmp39: +.Lfunc_end19: + .size _Z2f1IJOiEEvv, .Lfunc_end19-_Z2f1IJOiEEvv + .cfi_endproc + # -- End function + .section .text._Z2f1IJKiEEvv,"axG",@progbits,_Z2f1IJKiEEvv,comdat + .weak _Z2f1IJKiEEvv # -- Begin function _Z2f1IJKiEEvv + .p2align 4, 0x90 + .type _Z2f1IJKiEEvv,@function +_Z2f1IJKiEEvv: # @_Z2f1IJKiEEvv +.Lfunc_begin20: + .loc 1 26 0 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:26:0 + .cfi_startproc +# %bb.0: # %entry + pushq %rbp + .cfi_def_cfa_offset 16 + .cfi_offset %rbp, -16 + movq %rsp, %rbp + .cfi_def_cfa_register %rbp +.Ltmp40: + .loc 1 29 1 prologue_end # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:29:1 + popq %rbp + .cfi_def_cfa %rsp, 8 + retq +.Ltmp41: +.Lfunc_end20: + .size _Z2f1IJKiEEvv, .Lfunc_end20-_Z2f1IJKiEEvv + .cfi_endproc + # -- End function + .section .text._Z2f1IJvEEvv,"axG",@progbits,_Z2f1IJvEEvv,comdat + .weak _Z2f1IJvEEvv # -- Begin function _Z2f1IJvEEvv + .p2align 4, 0x90 + .type _Z2f1IJvEEvv,@function +_Z2f1IJvEEvv: # @_Z2f1IJvEEvv +.Lfunc_begin21: + .loc 1 26 0 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:26:0 + .cfi_startproc +# %bb.0: # %entry + pushq %rbp + .cfi_def_cfa_offset 16 + .cfi_offset %rbp, -16 + movq %rsp, %rbp + .cfi_def_cfa_register %rbp +.Ltmp42: + .loc 1 29 1 prologue_end # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:29:1 + popq %rbp + .cfi_def_cfa %rsp, 8 + retq +.Ltmp43: +.Lfunc_end21: + .size _Z2f1IJvEEvv, .Lfunc_end21-_Z2f1IJvEEvv + .cfi_endproc + # -- End function + .section .text._Z2f1IJN11outer_class11inner_classEEEvv,"axG",@progbits,_Z2f1IJN11outer_class11inner_classEEEvv,comdat + .weak _Z2f1IJN11outer_class11inner_classEEEvv # -- Begin function _Z2f1IJN11outer_class11inner_classEEEvv + .p2align 4, 0x90 + .type _Z2f1IJN11outer_class11inner_classEEEvv,@function +_Z2f1IJN11outer_class11inner_classEEEvv: # @_Z2f1IJN11outer_class11inner_classEEEvv +.Lfunc_begin22: + .loc 1 26 0 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:26:0 + .cfi_startproc +# %bb.0: # %entry + pushq %rbp + .cfi_def_cfa_offset 16 + .cfi_offset %rbp, -16 + movq %rsp, %rbp + .cfi_def_cfa_register %rbp +.Ltmp44: + .loc 1 29 1 prologue_end # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:29:1 + popq %rbp + .cfi_def_cfa %rsp, 8 + retq +.Ltmp45: +.Lfunc_end22: + .size _Z2f1IJN11outer_class11inner_classEEEvv, .Lfunc_end22-_Z2f1IJN11outer_class11inner_classEEEvv + .cfi_endproc + # -- End function + .section .text._Z2f1IJmEEvv,"axG",@progbits,_Z2f1IJmEEvv,comdat + .weak _Z2f1IJmEEvv # -- Begin function _Z2f1IJmEEvv + .p2align 4, 0x90 + .type _Z2f1IJmEEvv,@function +_Z2f1IJmEEvv: # @_Z2f1IJmEEvv +.Lfunc_begin23: + .loc 1 26 0 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:26:0 + .cfi_startproc +# %bb.0: # %entry + pushq %rbp + .cfi_def_cfa_offset 16 + .cfi_offset %rbp, -16 + movq %rsp, %rbp + .cfi_def_cfa_register %rbp +.Ltmp46: + .loc 1 29 1 prologue_end # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:29:1 + popq %rbp + .cfi_def_cfa %rsp, 8 + retq +.Ltmp47: +.Lfunc_end23: + .size _Z2f1IJmEEvv, .Lfunc_end23-_Z2f1IJmEEvv + .cfi_endproc + # -- End function + .section .text._Z2f2ILb1ELi3EEvv,"axG",@progbits,_Z2f2ILb1ELi3EEvv,comdat + .weak _Z2f2ILb1ELi3EEvv # -- Begin function _Z2f2ILb1ELi3EEvv + .p2align 4, 0x90 + .type _Z2f2ILb1ELi3EEvv,@function +_Z2f2ILb1ELi3EEvv: # @_Z2f2ILb1ELi3EEvv +.Lfunc_begin24: + .loc 1 31 0 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:31:0 + .cfi_startproc +# %bb.0: # %entry + pushq %rbp + .cfi_def_cfa_offset 16 + .cfi_offset %rbp, -16 + movq %rsp, %rbp + .cfi_def_cfa_register %rbp +.Ltmp48: + .loc 1 32 1 prologue_end # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:32:1 + popq %rbp + .cfi_def_cfa %rsp, 8 + retq +.Ltmp49: +.Lfunc_end24: + .size _Z2f2ILb1ELi3EEvv, .Lfunc_end24-_Z2f2ILb1ELi3EEvv + .cfi_endproc + # -- End function + .section .text._Z2f3IN2ns11EnumerationEJLS1_1ELS1_2EEEvv,"axG",@progbits,_Z2f3IN2ns11EnumerationEJLS1_1ELS1_2EEEvv,comdat + .weak _Z2f3IN2ns11EnumerationEJLS1_1ELS1_2EEEvv # -- Begin function _Z2f3IN2ns11EnumerationEJLS1_1ELS1_2EEEvv + .p2align 4, 0x90 + .type _Z2f3IN2ns11EnumerationEJLS1_1ELS1_2EEEvv,@function +_Z2f3IN2ns11EnumerationEJLS1_1ELS1_2EEEvv: # @_Z2f3IN2ns11EnumerationEJLS1_1ELS1_2EEEvv +.Lfunc_begin25: + .loc 1 34 0 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:34:0 + .cfi_startproc +# %bb.0: # %entry + pushq %rbp + .cfi_def_cfa_offset 16 + .cfi_offset %rbp, -16 + movq %rsp, %rbp + .cfi_def_cfa_register %rbp +.Ltmp50: + .loc 1 35 1 prologue_end # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:35:1 + popq %rbp + .cfi_def_cfa %rsp, 8 + retq +.Ltmp51: +.Lfunc_end25: + .size _Z2f3IN2ns11EnumerationEJLS1_1ELS1_2EEEvv, .Lfunc_end25-_Z2f3IN2ns11EnumerationEJLS1_1ELS1_2EEEvv + .cfi_endproc + # -- End function + .section .text._Z2f3IN2ns16EnumerationClassEJLS1_1ELS1_2EEEvv,"axG",@progbits,_Z2f3IN2ns16EnumerationClassEJLS1_1ELS1_2EEEvv,comdat + .weak _Z2f3IN2ns16EnumerationClassEJLS1_1ELS1_2EEEvv # -- Begin function _Z2f3IN2ns16EnumerationClassEJLS1_1ELS1_2EEEvv + .p2align 4, 0x90 + .type _Z2f3IN2ns16EnumerationClassEJLS1_1ELS1_2EEEvv,@function +_Z2f3IN2ns16EnumerationClassEJLS1_1ELS1_2EEEvv: # @_Z2f3IN2ns16EnumerationClassEJLS1_1ELS1_2EEEvv +.Lfunc_begin26: + .loc 1 34 0 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:34:0 + .cfi_startproc +# %bb.0: # %entry + pushq %rbp + .cfi_def_cfa_offset 16 + .cfi_offset %rbp, -16 + movq %rsp, %rbp + .cfi_def_cfa_register %rbp +.Ltmp52: + .loc 1 35 1 prologue_end # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:35:1 + popq %rbp + .cfi_def_cfa %rsp, 8 + retq +.Ltmp53: +.Lfunc_end26: + .size _Z2f3IN2ns16EnumerationClassEJLS1_1ELS1_2EEEvv, .Lfunc_end26-_Z2f3IN2ns16EnumerationClassEJLS1_1ELS1_2EEEvv + .cfi_endproc + # -- End function + .section .text._Z2f3IN2ns16EnumerationSmallEJLS1_255EEEvv,"axG",@progbits,_Z2f3IN2ns16EnumerationSmallEJLS1_255EEEvv,comdat + .weak _Z2f3IN2ns16EnumerationSmallEJLS1_255EEEvv # -- Begin function _Z2f3IN2ns16EnumerationSmallEJLS1_255EEEvv + .p2align 4, 0x90 + .type _Z2f3IN2ns16EnumerationSmallEJLS1_255EEEvv,@function +_Z2f3IN2ns16EnumerationSmallEJLS1_255EEEvv: # @_Z2f3IN2ns16EnumerationSmallEJLS1_255EEEvv +.Lfunc_begin27: + .loc 1 34 0 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:34:0 + .cfi_startproc +# %bb.0: # %entry + pushq %rbp + .cfi_def_cfa_offset 16 + .cfi_offset %rbp, -16 + movq %rsp, %rbp + .cfi_def_cfa_register %rbp +.Ltmp54: + .loc 1 35 1 prologue_end # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:35:1 + popq %rbp + .cfi_def_cfa %rsp, 8 + retq +.Ltmp55: +.Lfunc_end27: + .size _Z2f3IN2ns16EnumerationSmallEJLS1_255EEEvv, .Lfunc_end27-_Z2f3IN2ns16EnumerationSmallEJLS1_255EEEvv + .cfi_endproc + # -- End function + .text + .p2align 4, 0x90 # -- Begin function _Z2f3IN2ns3$_0EJLS1_1ELS1_2EEEvv + .type _Z2f3IN2ns3$_0EJLS1_1ELS1_2EEEvv,@function +_Z2f3IN2ns3$_0EJLS1_1ELS1_2EEEvv: # @"_Z2f3IN2ns3$_0EJLS1_1ELS1_2EEEvv" +.Lfunc_begin28: + .loc 1 34 0 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:34:0 + .cfi_startproc +# %bb.0: # %entry + pushq %rbp + .cfi_def_cfa_offset 16 + .cfi_offset %rbp, -16 + movq %rsp, %rbp + .cfi_def_cfa_register %rbp +.Ltmp56: + .loc 1 35 1 prologue_end # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:35:1 + popq %rbp + .cfi_def_cfa %rsp, 8 + retq +.Ltmp57: +.Lfunc_end28: + .size _Z2f3IN2ns3$_0EJLS1_1ELS1_2EEEvv, .Lfunc_end28-_Z2f3IN2ns3$_0EJLS1_1ELS1_2EEEvv + .cfi_endproc + # -- End function + .section .text._Z2f3IPiJXadL_Z1iEEEEvv,"axG",@progbits,_Z2f3IPiJXadL_Z1iEEEEvv,comdat + .weak _Z2f3IPiJXadL_Z1iEEEEvv # -- Begin function _Z2f3IPiJXadL_Z1iEEEEvv + .p2align 4, 0x90 + .type _Z2f3IPiJXadL_Z1iEEEEvv,@function +_Z2f3IPiJXadL_Z1iEEEEvv: # @_Z2f3IPiJXadL_Z1iEEEEvv +.Lfunc_begin29: + .loc 1 34 0 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:34:0 + .cfi_startproc +# %bb.0: # %entry + pushq %rbp + .cfi_def_cfa_offset 16 + .cfi_offset %rbp, -16 + movq %rsp, %rbp + .cfi_def_cfa_register %rbp +.Ltmp58: + .loc 1 35 1 prologue_end # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:35:1 + popq %rbp + .cfi_def_cfa %rsp, 8 + retq +.Ltmp59: +.Lfunc_end29: + .size _Z2f3IPiJXadL_Z1iEEEEvv, .Lfunc_end29-_Z2f3IPiJXadL_Z1iEEEEvv + .cfi_endproc + # -- End function + .section .text._Z2f3IPiJLS0_0EEEvv,"axG",@progbits,_Z2f3IPiJLS0_0EEEvv,comdat + .weak _Z2f3IPiJLS0_0EEEvv # -- Begin function _Z2f3IPiJLS0_0EEEvv + .p2align 4, 0x90 + .type _Z2f3IPiJLS0_0EEEvv,@function +_Z2f3IPiJLS0_0EEEvv: # @_Z2f3IPiJLS0_0EEEvv +.Lfunc_begin30: + .loc 1 34 0 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:34:0 + .cfi_startproc +# %bb.0: # %entry + pushq %rbp + .cfi_def_cfa_offset 16 + .cfi_offset %rbp, -16 + movq %rsp, %rbp + .cfi_def_cfa_register %rbp +.Ltmp60: + .loc 1 35 1 prologue_end # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:35:1 + popq %rbp + .cfi_def_cfa %rsp, 8 + retq +.Ltmp61: +.Lfunc_end30: + .size _Z2f3IPiJLS0_0EEEvv, .Lfunc_end30-_Z2f3IPiJLS0_0EEEvv + .cfi_endproc + # -- End function + .section .text._Z2f3ImJLm1EEEvv,"axG",@progbits,_Z2f3ImJLm1EEEvv,comdat + .weak _Z2f3ImJLm1EEEvv # -- Begin function _Z2f3ImJLm1EEEvv + .p2align 4, 0x90 + .type _Z2f3ImJLm1EEEvv,@function +_Z2f3ImJLm1EEEvv: # @_Z2f3ImJLm1EEEvv +.Lfunc_begin31: + .loc 1 34 0 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:34:0 + .cfi_startproc +# %bb.0: # %entry + pushq %rbp + .cfi_def_cfa_offset 16 + .cfi_offset %rbp, -16 + movq %rsp, %rbp + .cfi_def_cfa_register %rbp +.Ltmp62: + .loc 1 35 1 prologue_end # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:35:1 + popq %rbp + .cfi_def_cfa %rsp, 8 + retq +.Ltmp63: +.Lfunc_end31: + .size _Z2f3ImJLm1EEEvv, .Lfunc_end31-_Z2f3ImJLm1EEEvv + .cfi_endproc + # -- End function + .section .text._Z2f3IyJLy1EEEvv,"axG",@progbits,_Z2f3IyJLy1EEEvv,comdat + .weak _Z2f3IyJLy1EEEvv # -- Begin function _Z2f3IyJLy1EEEvv + .p2align 4, 0x90 + .type _Z2f3IyJLy1EEEvv,@function +_Z2f3IyJLy1EEEvv: # @_Z2f3IyJLy1EEEvv +.Lfunc_begin32: + .loc 1 34 0 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:34:0 + .cfi_startproc +# %bb.0: # %entry + pushq %rbp + .cfi_def_cfa_offset 16 + .cfi_offset %rbp, -16 + movq %rsp, %rbp + .cfi_def_cfa_register %rbp +.Ltmp64: + .loc 1 35 1 prologue_end # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:35:1 + popq %rbp + .cfi_def_cfa %rsp, 8 + retq +.Ltmp65: +.Lfunc_end32: + .size _Z2f3IyJLy1EEEvv, .Lfunc_end32-_Z2f3IyJLy1EEEvv + .cfi_endproc + # -- End function + .section .text._Z2f3IlJLl1EEEvv,"axG",@progbits,_Z2f3IlJLl1EEEvv,comdat + .weak _Z2f3IlJLl1EEEvv # -- Begin function _Z2f3IlJLl1EEEvv + .p2align 4, 0x90 + .type _Z2f3IlJLl1EEEvv,@function +_Z2f3IlJLl1EEEvv: # @_Z2f3IlJLl1EEEvv +.Lfunc_begin33: + .loc 1 34 0 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:34:0 + .cfi_startproc +# %bb.0: # %entry + pushq %rbp + .cfi_def_cfa_offset 16 + .cfi_offset %rbp, -16 + movq %rsp, %rbp + .cfi_def_cfa_register %rbp +.Ltmp66: + .loc 1 35 1 prologue_end # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:35:1 + popq %rbp + .cfi_def_cfa %rsp, 8 + retq +.Ltmp67: +.Lfunc_end33: + .size _Z2f3IlJLl1EEEvv, .Lfunc_end33-_Z2f3IlJLl1EEEvv + .cfi_endproc + # -- End function + .section .text._Z2f3IjJLj1EEEvv,"axG",@progbits,_Z2f3IjJLj1EEEvv,comdat + .weak _Z2f3IjJLj1EEEvv # -- Begin function _Z2f3IjJLj1EEEvv + .p2align 4, 0x90 + .type _Z2f3IjJLj1EEEvv,@function +_Z2f3IjJLj1EEEvv: # @_Z2f3IjJLj1EEEvv +.Lfunc_begin34: + .loc 1 34 0 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:34:0 + .cfi_startproc +# %bb.0: # %entry + pushq %rbp + .cfi_def_cfa_offset 16 + .cfi_offset %rbp, -16 + movq %rsp, %rbp + .cfi_def_cfa_register %rbp +.Ltmp68: + .loc 1 35 1 prologue_end # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:35:1 + popq %rbp + .cfi_def_cfa %rsp, 8 + retq +.Ltmp69: +.Lfunc_end34: + .size _Z2f3IjJLj1EEEvv, .Lfunc_end34-_Z2f3IjJLj1EEEvv + .cfi_endproc + # -- End function + .section .text._Z2f3IsJLs1EEEvv,"axG",@progbits,_Z2f3IsJLs1EEEvv,comdat + .weak _Z2f3IsJLs1EEEvv # -- Begin function _Z2f3IsJLs1EEEvv + .p2align 4, 0x90 + .type _Z2f3IsJLs1EEEvv,@function +_Z2f3IsJLs1EEEvv: # @_Z2f3IsJLs1EEEvv +.Lfunc_begin35: + .loc 1 34 0 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:34:0 + .cfi_startproc +# %bb.0: # %entry + pushq %rbp + .cfi_def_cfa_offset 16 + .cfi_offset %rbp, -16 + movq %rsp, %rbp + .cfi_def_cfa_register %rbp +.Ltmp70: + .loc 1 35 1 prologue_end # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:35:1 + popq %rbp + .cfi_def_cfa %rsp, 8 + retq +.Ltmp71: +.Lfunc_end35: + .size _Z2f3IsJLs1EEEvv, .Lfunc_end35-_Z2f3IsJLs1EEEvv + .cfi_endproc + # -- End function + .section .text._Z2f3IhJLh0EEEvv,"axG",@progbits,_Z2f3IhJLh0EEEvv,comdat + .weak _Z2f3IhJLh0EEEvv # -- Begin function _Z2f3IhJLh0EEEvv + .p2align 4, 0x90 + .type _Z2f3IhJLh0EEEvv,@function +_Z2f3IhJLh0EEEvv: # @_Z2f3IhJLh0EEEvv +.Lfunc_begin36: + .loc 1 34 0 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:34:0 + .cfi_startproc +# %bb.0: # %entry + pushq %rbp + .cfi_def_cfa_offset 16 + .cfi_offset %rbp, -16 + movq %rsp, %rbp + .cfi_def_cfa_register %rbp +.Ltmp72: + .loc 1 35 1 prologue_end # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:35:1 + popq %rbp + .cfi_def_cfa %rsp, 8 + retq +.Ltmp73: +.Lfunc_end36: + .size _Z2f3IhJLh0EEEvv, .Lfunc_end36-_Z2f3IhJLh0EEEvv + .cfi_endproc + # -- End function + .section .text._Z2f3IaJLa0EEEvv,"axG",@progbits,_Z2f3IaJLa0EEEvv,comdat + .weak _Z2f3IaJLa0EEEvv # -- Begin function _Z2f3IaJLa0EEEvv + .p2align 4, 0x90 + .type _Z2f3IaJLa0EEEvv,@function +_Z2f3IaJLa0EEEvv: # @_Z2f3IaJLa0EEEvv +.Lfunc_begin37: + .loc 1 34 0 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:34:0 + .cfi_startproc +# %bb.0: # %entry + pushq %rbp + .cfi_def_cfa_offset 16 + .cfi_offset %rbp, -16 + movq %rsp, %rbp + .cfi_def_cfa_register %rbp +.Ltmp74: + .loc 1 35 1 prologue_end # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:35:1 + popq %rbp + .cfi_def_cfa %rsp, 8 + retq +.Ltmp75: +.Lfunc_end37: + .size _Z2f3IaJLa0EEEvv, .Lfunc_end37-_Z2f3IaJLa0EEEvv + .cfi_endproc + # -- End function + .section .text._Z2f3ItJLt1ELt2EEEvv,"axG",@progbits,_Z2f3ItJLt1ELt2EEEvv,comdat + .weak _Z2f3ItJLt1ELt2EEEvv # -- Begin function _Z2f3ItJLt1ELt2EEEvv + .p2align 4, 0x90 + .type _Z2f3ItJLt1ELt2EEEvv,@function +_Z2f3ItJLt1ELt2EEEvv: # @_Z2f3ItJLt1ELt2EEEvv +.Lfunc_begin38: + .loc 1 34 0 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:34:0 + .cfi_startproc +# %bb.0: # %entry + pushq %rbp + .cfi_def_cfa_offset 16 + .cfi_offset %rbp, -16 + movq %rsp, %rbp + .cfi_def_cfa_register %rbp +.Ltmp76: + .loc 1 35 1 prologue_end # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:35:1 + popq %rbp + .cfi_def_cfa %rsp, 8 + retq +.Ltmp77: +.Lfunc_end38: + .size _Z2f3ItJLt1ELt2EEEvv, .Lfunc_end38-_Z2f3ItJLt1ELt2EEEvv + .cfi_endproc + # -- End function + .section .text._Z2f3IcJLc0ELc1ELc6ELc7ELc13ELc14ELc31ELc32ELc33ELc127ELcn128EEEvv,"axG",@progbits,_Z2f3IcJLc0ELc1ELc6ELc7ELc13ELc14ELc31ELc32ELc33ELc127ELcn128EEEvv,comdat + .weak _Z2f3IcJLc0ELc1ELc6ELc7ELc13ELc14ELc31ELc32ELc33ELc127ELcn128EEEvv # -- Begin function _Z2f3IcJLc0ELc1ELc6ELc7ELc13ELc14ELc31ELc32ELc33ELc127ELcn128EEEvv + .p2align 4, 0x90 + .type _Z2f3IcJLc0ELc1ELc6ELc7ELc13ELc14ELc31ELc32ELc33ELc127ELcn128EEEvv,@function +_Z2f3IcJLc0ELc1ELc6ELc7ELc13ELc14ELc31ELc32ELc33ELc127ELcn128EEEvv: # @_Z2f3IcJLc0ELc1ELc6ELc7ELc13ELc14ELc31ELc32ELc33ELc127ELcn128EEEvv +.Lfunc_begin39: + .loc 1 34 0 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:34:0 + .cfi_startproc +# %bb.0: # %entry + pushq %rbp + .cfi_def_cfa_offset 16 + .cfi_offset %rbp, -16 + movq %rsp, %rbp + .cfi_def_cfa_register %rbp +.Ltmp78: + .loc 1 35 1 prologue_end # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:35:1 + popq %rbp + .cfi_def_cfa %rsp, 8 + retq +.Ltmp79: +.Lfunc_end39: + .size _Z2f3IcJLc0ELc1ELc6ELc7ELc13ELc14ELc31ELc32ELc33ELc127ELcn128EEEvv, .Lfunc_end39-_Z2f3IcJLc0ELc1ELc6ELc7ELc13ELc14ELc31ELc32ELc33ELc127ELcn128EEEvv + .cfi_endproc + # -- End function + .section .text._Z2f3InJLn18446744073709551614EEEvv,"axG",@progbits,_Z2f3InJLn18446744073709551614EEEvv,comdat + .weak _Z2f3InJLn18446744073709551614EEEvv # -- Begin function _Z2f3InJLn18446744073709551614EEEvv + .p2align 4, 0x90 + .type _Z2f3InJLn18446744073709551614EEEvv,@function +_Z2f3InJLn18446744073709551614EEEvv: # @_Z2f3InJLn18446744073709551614EEEvv +.Lfunc_begin40: + .loc 1 34 0 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:34:0 + .cfi_startproc +# %bb.0: # %entry + pushq %rbp + .cfi_def_cfa_offset 16 + .cfi_offset %rbp, -16 + movq %rsp, %rbp + .cfi_def_cfa_register %rbp +.Ltmp80: + .loc 1 35 1 prologue_end # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:35:1 + popq %rbp + .cfi_def_cfa %rsp, 8 + retq +.Ltmp81: +.Lfunc_end40: + .size _Z2f3InJLn18446744073709551614EEEvv, .Lfunc_end40-_Z2f3InJLn18446744073709551614EEEvv + .cfi_endproc + # -- End function + .section .text._Z2f4IjLj3EEvv,"axG",@progbits,_Z2f4IjLj3EEvv,comdat + .weak _Z2f4IjLj3EEvv # -- Begin function _Z2f4IjLj3EEvv + .p2align 4, 0x90 + .type _Z2f4IjLj3EEvv,@function +_Z2f4IjLj3EEvv: # @_Z2f4IjLj3EEvv +.Lfunc_begin41: + .loc 1 37 0 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:37:0 + .cfi_startproc +# %bb.0: # %entry + pushq %rbp + .cfi_def_cfa_offset 16 + .cfi_offset %rbp, -16 + movq %rsp, %rbp + .cfi_def_cfa_register %rbp +.Ltmp82: + .loc 1 38 1 prologue_end # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:38:1 + popq %rbp + .cfi_def_cfa %rsp, 8 + retq +.Ltmp83: +.Lfunc_end41: + .size _Z2f4IjLj3EEvv, .Lfunc_end41-_Z2f4IjLj3EEvv + .cfi_endproc + # -- End function + .section .text._Z2f1IJ2t3IiLb0EEEEvv,"axG",@progbits,_Z2f1IJ2t3IiLb0EEEEvv,comdat + .weak _Z2f1IJ2t3IiLb0EEEEvv # -- Begin function _Z2f1IJ2t3IiLb0EEEEvv + .p2align 4, 0x90 + .type _Z2f1IJ2t3IiLb0EEEEvv,@function +_Z2f1IJ2t3IiLb0EEEEvv: # @_Z2f1IJ2t3IiLb0EEEEvv +.Lfunc_begin42: + .loc 1 26 0 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:26:0 + .cfi_startproc +# %bb.0: # %entry + pushq %rbp + .cfi_def_cfa_offset 16 + .cfi_offset %rbp, -16 + movq %rsp, %rbp + .cfi_def_cfa_register %rbp +.Ltmp84: + .loc 1 29 1 prologue_end # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:29:1 + popq %rbp + .cfi_def_cfa %rsp, 8 + retq +.Ltmp85: +.Lfunc_end42: + .size _Z2f1IJ2t3IiLb0EEEEvv, .Lfunc_end42-_Z2f1IJ2t3IiLb0EEEEvv + .cfi_endproc + # -- End function + .section .text._Z2f1IJ2t3IS0_IiLb0EELb0EEEEvv,"axG",@progbits,_Z2f1IJ2t3IS0_IiLb0EELb0EEEEvv,comdat + .weak _Z2f1IJ2t3IS0_IiLb0EELb0EEEEvv # -- Begin function _Z2f1IJ2t3IS0_IiLb0EELb0EEEEvv + .p2align 4, 0x90 + .type _Z2f1IJ2t3IS0_IiLb0EELb0EEEEvv,@function +_Z2f1IJ2t3IS0_IiLb0EELb0EEEEvv: # @_Z2f1IJ2t3IS0_IiLb0EELb0EEEEvv +.Lfunc_begin43: + .loc 1 26 0 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:26:0 + .cfi_startproc +# %bb.0: # %entry + pushq %rbp + .cfi_def_cfa_offset 16 + .cfi_offset %rbp, -16 + movq %rsp, %rbp + .cfi_def_cfa_register %rbp +.Ltmp86: + .loc 1 29 1 prologue_end # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:29:1 + popq %rbp + .cfi_def_cfa %rsp, 8 + retq +.Ltmp87: +.Lfunc_end43: + .size _Z2f1IJ2t3IS0_IiLb0EELb0EEEEvv, .Lfunc_end43-_Z2f1IJ2t3IS0_IiLb0EELb0EEEEvv + .cfi_endproc + # -- End function + .text + .p2align 4, 0x90 # -- Begin function _Z2f1IJZ4mainE3$_1EEvv + .type _Z2f1IJZ4mainE3$_1EEvv,@function +_Z2f1IJZ4mainE3$_1EEvv: # @"_Z2f1IJZ4mainE3$_1EEvv" +.Lfunc_begin44: + .loc 1 26 0 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:26:0 + .cfi_startproc +# %bb.0: # %entry + pushq %rbp + .cfi_def_cfa_offset 16 + .cfi_offset %rbp, -16 + movq %rsp, %rbp + .cfi_def_cfa_register %rbp +.Ltmp88: + .loc 1 29 1 prologue_end # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:29:1 + popq %rbp + .cfi_def_cfa %rsp, 8 + retq +.Ltmp89: +.Lfunc_end44: + .size _Z2f1IJZ4mainE3$_1EEvv, .Lfunc_end44-_Z2f1IJZ4mainE3$_1EEvv + .cfi_endproc + # -- End function + .section .text._Z2f1IJFifEEEvv,"axG",@progbits,_Z2f1IJFifEEEvv,comdat + .weak _Z2f1IJFifEEEvv # -- Begin function _Z2f1IJFifEEEvv + .p2align 4, 0x90 + .type _Z2f1IJFifEEEvv,@function +_Z2f1IJFifEEEvv: # @_Z2f1IJFifEEEvv +.Lfunc_begin45: + .loc 1 26 0 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:26:0 + .cfi_startproc +# %bb.0: # %entry + pushq %rbp + .cfi_def_cfa_offset 16 + .cfi_offset %rbp, -16 + movq %rsp, %rbp + .cfi_def_cfa_register %rbp +.Ltmp90: + .loc 1 29 1 prologue_end # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:29:1 + popq %rbp + .cfi_def_cfa %rsp, 8 + retq +.Ltmp91: +.Lfunc_end45: + .size _Z2f1IJFifEEEvv, .Lfunc_end45-_Z2f1IJFifEEEvv + .cfi_endproc + # -- End function + .section .text._Z2f1IJRKiEEvv,"axG",@progbits,_Z2f1IJRKiEEvv,comdat + .weak _Z2f1IJRKiEEvv # -- Begin function _Z2f1IJRKiEEvv + .p2align 4, 0x90 + .type _Z2f1IJRKiEEvv,@function +_Z2f1IJRKiEEvv: # @_Z2f1IJRKiEEvv +.Lfunc_begin46: + .loc 1 26 0 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:26:0 + .cfi_startproc +# %bb.0: # %entry + pushq %rbp + .cfi_def_cfa_offset 16 + .cfi_offset %rbp, -16 + movq %rsp, %rbp + .cfi_def_cfa_register %rbp +.Ltmp92: + .loc 1 29 1 prologue_end # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:29:1 + popq %rbp + .cfi_def_cfa %rsp, 8 + retq +.Ltmp93: +.Lfunc_end46: + .size _Z2f1IJRKiEEvv, .Lfunc_end46-_Z2f1IJRKiEEvv + .cfi_endproc + # -- End function + .section .text._Z2f1IJRPKiEEvv,"axG",@progbits,_Z2f1IJRPKiEEvv,comdat + .weak _Z2f1IJRPKiEEvv # -- Begin function _Z2f1IJRPKiEEvv + .p2align 4, 0x90 + .type _Z2f1IJRPKiEEvv,@function +_Z2f1IJRPKiEEvv: # @_Z2f1IJRPKiEEvv +.Lfunc_begin47: + .loc 1 26 0 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:26:0 + .cfi_startproc +# %bb.0: # %entry + pushq %rbp + .cfi_def_cfa_offset 16 + .cfi_offset %rbp, -16 + movq %rsp, %rbp + .cfi_def_cfa_register %rbp +.Ltmp94: + .loc 1 29 1 prologue_end # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:29:1 + popq %rbp + .cfi_def_cfa %rsp, 8 + retq +.Ltmp95: +.Lfunc_end47: + .size _Z2f1IJRPKiEEvv, .Lfunc_end47-_Z2f1IJRPKiEEvv + .cfi_endproc + # -- End function + .text + .p2align 4, 0x90 # -- Begin function _Z2f1IJN12_GLOBAL__N_12t5EEEvv + .type _Z2f1IJN12_GLOBAL__N_12t5EEEvv,@function +_Z2f1IJN12_GLOBAL__N_12t5EEEvv: # @_Z2f1IJN12_GLOBAL__N_12t5EEEvv +.Lfunc_begin48: + .loc 1 26 0 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:26:0 + .cfi_startproc +# %bb.0: # %entry + pushq %rbp + .cfi_def_cfa_offset 16 + .cfi_offset %rbp, -16 + movq %rsp, %rbp + .cfi_def_cfa_register %rbp +.Ltmp96: + .loc 1 29 1 prologue_end # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:29:1 + popq %rbp + .cfi_def_cfa %rsp, 8 + retq +.Ltmp97: +.Lfunc_end48: + .size _Z2f1IJN12_GLOBAL__N_12t5EEEvv, .Lfunc_end48-_Z2f1IJN12_GLOBAL__N_12t5EEEvv + .cfi_endproc + # -- End function + .section .text._Z2f1IJDnEEvv,"axG",@progbits,_Z2f1IJDnEEvv,comdat + .weak _Z2f1IJDnEEvv # -- Begin function _Z2f1IJDnEEvv + .p2align 4, 0x90 + .type _Z2f1IJDnEEvv,@function +_Z2f1IJDnEEvv: # @_Z2f1IJDnEEvv +.Lfunc_begin49: + .loc 1 26 0 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:26:0 + .cfi_startproc +# %bb.0: # %entry + pushq %rbp + .cfi_def_cfa_offset 16 + .cfi_offset %rbp, -16 + movq %rsp, %rbp + .cfi_def_cfa_register %rbp +.Ltmp98: + .loc 1 29 1 prologue_end # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:29:1 + popq %rbp + .cfi_def_cfa %rsp, 8 + retq +.Ltmp99: +.Lfunc_end49: + .size _Z2f1IJDnEEvv, .Lfunc_end49-_Z2f1IJDnEEvv + .cfi_endproc + # -- End function + .section .text._Z2f1IJPlS0_EEvv,"axG",@progbits,_Z2f1IJPlS0_EEvv,comdat + .weak _Z2f1IJPlS0_EEvv # -- Begin function _Z2f1IJPlS0_EEvv + .p2align 4, 0x90 + .type _Z2f1IJPlS0_EEvv,@function +_Z2f1IJPlS0_EEvv: # @_Z2f1IJPlS0_EEvv +.Lfunc_begin50: + .loc 1 26 0 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:26:0 + .cfi_startproc +# %bb.0: # %entry + pushq %rbp + .cfi_def_cfa_offset 16 + .cfi_offset %rbp, -16 + movq %rsp, %rbp + .cfi_def_cfa_register %rbp +.Ltmp100: + .loc 1 29 1 prologue_end # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:29:1 + popq %rbp + .cfi_def_cfa %rsp, 8 + retq +.Ltmp101: +.Lfunc_end50: + .size _Z2f1IJPlS0_EEvv, .Lfunc_end50-_Z2f1IJPlS0_EEvv + .cfi_endproc + # -- End function + .section .text._Z2f1IJPlP3udtEEvv,"axG",@progbits,_Z2f1IJPlP3udtEEvv,comdat + .weak _Z2f1IJPlP3udtEEvv # -- Begin function _Z2f1IJPlP3udtEEvv + .p2align 4, 0x90 + .type _Z2f1IJPlP3udtEEvv,@function +_Z2f1IJPlP3udtEEvv: # @_Z2f1IJPlP3udtEEvv +.Lfunc_begin51: + .loc 1 26 0 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:26:0 + .cfi_startproc +# %bb.0: # %entry + pushq %rbp + .cfi_def_cfa_offset 16 + .cfi_offset %rbp, -16 + movq %rsp, %rbp + .cfi_def_cfa_register %rbp +.Ltmp102: + .loc 1 29 1 prologue_end # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:29:1 + popq %rbp + .cfi_def_cfa %rsp, 8 + retq +.Ltmp103: +.Lfunc_end51: + .size _Z2f1IJPlP3udtEEvv, .Lfunc_end51-_Z2f1IJPlP3udtEEvv + .cfi_endproc + # -- End function + .section .text._Z2f1IJKPvEEvv,"axG",@progbits,_Z2f1IJKPvEEvv,comdat + .weak _Z2f1IJKPvEEvv # -- Begin function _Z2f1IJKPvEEvv + .p2align 4, 0x90 + .type _Z2f1IJKPvEEvv,@function +_Z2f1IJKPvEEvv: # @_Z2f1IJKPvEEvv +.Lfunc_begin52: + .loc 1 26 0 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:26:0 + .cfi_startproc +# %bb.0: # %entry + pushq %rbp + .cfi_def_cfa_offset 16 + .cfi_offset %rbp, -16 + movq %rsp, %rbp + .cfi_def_cfa_register %rbp +.Ltmp104: + .loc 1 29 1 prologue_end # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:29:1 + popq %rbp + .cfi_def_cfa %rsp, 8 + retq +.Ltmp105: +.Lfunc_end52: + .size _Z2f1IJKPvEEvv, .Lfunc_end52-_Z2f1IJKPvEEvv + .cfi_endproc + # -- End function + .section .text._Z2f1IJPKPKvEEvv,"axG",@progbits,_Z2f1IJPKPKvEEvv,comdat + .weak _Z2f1IJPKPKvEEvv # -- Begin function _Z2f1IJPKPKvEEvv + .p2align 4, 0x90 + .type _Z2f1IJPKPKvEEvv,@function +_Z2f1IJPKPKvEEvv: # @_Z2f1IJPKPKvEEvv +.Lfunc_begin53: + .loc 1 26 0 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:26:0 + .cfi_startproc +# %bb.0: # %entry + pushq %rbp + .cfi_def_cfa_offset 16 + .cfi_offset %rbp, -16 + movq %rsp, %rbp + .cfi_def_cfa_register %rbp +.Ltmp106: + .loc 1 29 1 prologue_end # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:29:1 + popq %rbp + .cfi_def_cfa %rsp, 8 + retq +.Ltmp107: +.Lfunc_end53: + .size _Z2f1IJPKPKvEEvv, .Lfunc_end53-_Z2f1IJPKPKvEEvv + .cfi_endproc + # -- End function + .section .text._Z2f1IJFvvEEEvv,"axG",@progbits,_Z2f1IJFvvEEEvv,comdat + .weak _Z2f1IJFvvEEEvv # -- Begin function _Z2f1IJFvvEEEvv + .p2align 4, 0x90 + .type _Z2f1IJFvvEEEvv,@function +_Z2f1IJFvvEEEvv: # @_Z2f1IJFvvEEEvv +.Lfunc_begin54: + .loc 1 26 0 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:26:0 + .cfi_startproc +# %bb.0: # %entry + pushq %rbp + .cfi_def_cfa_offset 16 + .cfi_offset %rbp, -16 + movq %rsp, %rbp + .cfi_def_cfa_register %rbp +.Ltmp108: + .loc 1 29 1 prologue_end # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:29:1 + popq %rbp + .cfi_def_cfa %rsp, 8 + retq +.Ltmp109: +.Lfunc_end54: + .size _Z2f1IJFvvEEEvv, .Lfunc_end54-_Z2f1IJFvvEEEvv + .cfi_endproc + # -- End function + .section .text._Z2f1IJPFvvEEEvv,"axG",@progbits,_Z2f1IJPFvvEEEvv,comdat + .weak _Z2f1IJPFvvEEEvv # -- Begin function _Z2f1IJPFvvEEEvv + .p2align 4, 0x90 + .type _Z2f1IJPFvvEEEvv,@function +_Z2f1IJPFvvEEEvv: # @_Z2f1IJPFvvEEEvv +.Lfunc_begin55: + .loc 1 26 0 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:26:0 + .cfi_startproc +# %bb.0: # %entry + pushq %rbp + .cfi_def_cfa_offset 16 + .cfi_offset %rbp, -16 + movq %rsp, %rbp + .cfi_def_cfa_register %rbp +.Ltmp110: + .loc 1 29 1 prologue_end # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:29:1 + popq %rbp + .cfi_def_cfa %rsp, 8 + retq +.Ltmp111: +.Lfunc_end55: + .size _Z2f1IJPFvvEEEvv, .Lfunc_end55-_Z2f1IJPFvvEEEvv + .cfi_endproc + # -- End function + .text + .p2align 4, 0x90 # -- Begin function _Z2f1IJPZ4mainE3$_1EEvv + .type _Z2f1IJPZ4mainE3$_1EEvv,@function +_Z2f1IJPZ4mainE3$_1EEvv: # @"_Z2f1IJPZ4mainE3$_1EEvv" +.Lfunc_begin56: + .loc 1 26 0 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:26:0 + .cfi_startproc +# %bb.0: # %entry + pushq %rbp + .cfi_def_cfa_offset 16 + .cfi_offset %rbp, -16 + movq %rsp, %rbp + .cfi_def_cfa_register %rbp +.Ltmp112: + .loc 1 29 1 prologue_end # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:29:1 + popq %rbp + .cfi_def_cfa %rsp, 8 + retq +.Ltmp113: +.Lfunc_end56: + .size _Z2f1IJPZ4mainE3$_1EEvv, .Lfunc_end56-_Z2f1IJPZ4mainE3$_1EEvv + .cfi_endproc + # -- End function + .p2align 4, 0x90 # -- Begin function _Z2f1IJZ4mainE3$_2EEvv + .type _Z2f1IJZ4mainE3$_2EEvv,@function +_Z2f1IJZ4mainE3$_2EEvv: # @"_Z2f1IJZ4mainE3$_2EEvv" +.Lfunc_begin57: + .loc 1 26 0 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:26:0 + .cfi_startproc +# %bb.0: # %entry + pushq %rbp + .cfi_def_cfa_offset 16 + .cfi_offset %rbp, -16 + movq %rsp, %rbp + .cfi_def_cfa_register %rbp +.Ltmp114: + .loc 1 29 1 prologue_end # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:29:1 + popq %rbp + .cfi_def_cfa %rsp, 8 + retq +.Ltmp115: +.Lfunc_end57: + .size _Z2f1IJZ4mainE3$_2EEvv, .Lfunc_end57-_Z2f1IJZ4mainE3$_2EEvv + .cfi_endproc + # -- End function + .p2align 4, 0x90 # -- Begin function _Z2f1IJPZ4mainE3$_2EEvv + .type _Z2f1IJPZ4mainE3$_2EEvv,@function +_Z2f1IJPZ4mainE3$_2EEvv: # @"_Z2f1IJPZ4mainE3$_2EEvv" +.Lfunc_begin58: + .loc 1 26 0 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:26:0 + .cfi_startproc +# %bb.0: # %entry + pushq %rbp + .cfi_def_cfa_offset 16 + .cfi_offset %rbp, -16 + movq %rsp, %rbp + .cfi_def_cfa_register %rbp +.Ltmp116: + .loc 1 29 1 prologue_end # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:29:1 + popq %rbp + .cfi_def_cfa %rsp, 8 + retq +.Ltmp117: +.Lfunc_end58: + .size _Z2f1IJPZ4mainE3$_2EEvv, .Lfunc_end58-_Z2f1IJPZ4mainE3$_2EEvv + .cfi_endproc + # -- End function + .section .text._Z2f5IJ2t1IJiEEEiEvv,"axG",@progbits,_Z2f5IJ2t1IJiEEEiEvv,comdat + .weak _Z2f5IJ2t1IJiEEEiEvv # -- Begin function _Z2f5IJ2t1IJiEEEiEvv + .p2align 4, 0x90 + .type _Z2f5IJ2t1IJiEEEiEvv,@function +_Z2f5IJ2t1IJiEEEiEvv: # @_Z2f5IJ2t1IJiEEEiEvv +.Lfunc_begin59: + .loc 1 54 0 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:54:0 + .cfi_startproc +# %bb.0: # %entry + pushq %rbp + .cfi_def_cfa_offset 16 + .cfi_offset %rbp, -16 + movq %rsp, %rbp + .cfi_def_cfa_register %rbp +.Ltmp118: + .loc 1 54 13 prologue_end # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:54:13 + popq %rbp + .cfi_def_cfa %rsp, 8 + retq +.Ltmp119: +.Lfunc_end59: + .size _Z2f5IJ2t1IJiEEEiEvv, .Lfunc_end59-_Z2f5IJ2t1IJiEEEiEvv + .cfi_endproc + # -- End function + .section .text._Z2f5IJEiEvv,"axG",@progbits,_Z2f5IJEiEvv,comdat + .weak _Z2f5IJEiEvv # -- Begin function _Z2f5IJEiEvv + .p2align 4, 0x90 + .type _Z2f5IJEiEvv,@function +_Z2f5IJEiEvv: # @_Z2f5IJEiEvv +.Lfunc_begin60: + .loc 1 54 0 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:54:0 + .cfi_startproc +# %bb.0: # %entry + pushq %rbp + .cfi_def_cfa_offset 16 + .cfi_offset %rbp, -16 + movq %rsp, %rbp + .cfi_def_cfa_register %rbp +.Ltmp120: + .loc 1 54 13 prologue_end # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:54:13 + popq %rbp + .cfi_def_cfa %rsp, 8 + retq +.Ltmp121: +.Lfunc_end60: + .size _Z2f5IJEiEvv, .Lfunc_end60-_Z2f5IJEiEvv + .cfi_endproc + # -- End function + .section .text._Z2f6I2t1IJiEEJEEvv,"axG",@progbits,_Z2f6I2t1IJiEEJEEvv,comdat + .weak _Z2f6I2t1IJiEEJEEvv # -- Begin function _Z2f6I2t1IJiEEJEEvv + .p2align 4, 0x90 + .type _Z2f6I2t1IJiEEJEEvv,@function +_Z2f6I2t1IJiEEJEEvv: # @_Z2f6I2t1IJiEEJEEvv +.Lfunc_begin61: + .loc 1 56 0 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:56:0 + .cfi_startproc +# %bb.0: # %entry + pushq %rbp + .cfi_def_cfa_offset 16 + .cfi_offset %rbp, -16 + movq %rsp, %rbp + .cfi_def_cfa_register %rbp +.Ltmp122: + .loc 1 56 13 prologue_end # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:56:13 + popq %rbp + .cfi_def_cfa %rsp, 8 + retq +.Ltmp123: +.Lfunc_end61: + .size _Z2f6I2t1IJiEEJEEvv, .Lfunc_end61-_Z2f6I2t1IJiEEJEEvv + .cfi_endproc + # -- End function + .section .text._Z2f1IJEEvv,"axG",@progbits,_Z2f1IJEEvv,comdat + .weak _Z2f1IJEEvv # -- Begin function _Z2f1IJEEvv + .p2align 4, 0x90 + .type _Z2f1IJEEvv,@function +_Z2f1IJEEvv: # @_Z2f1IJEEvv +.Lfunc_begin62: + .loc 1 26 0 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:26:0 + .cfi_startproc +# %bb.0: # %entry + pushq %rbp + .cfi_def_cfa_offset 16 + .cfi_offset %rbp, -16 + movq %rsp, %rbp + .cfi_def_cfa_register %rbp +.Ltmp124: + .loc 1 29 1 prologue_end # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:29:1 + popq %rbp + .cfi_def_cfa %rsp, 8 + retq +.Ltmp125: +.Lfunc_end62: + .size _Z2f1IJEEvv, .Lfunc_end62-_Z2f1IJEEvv + .cfi_endproc + # -- End function + .section .text._Z2f1IJPKvS1_EEvv,"axG",@progbits,_Z2f1IJPKvS1_EEvv,comdat + .weak _Z2f1IJPKvS1_EEvv # -- Begin function _Z2f1IJPKvS1_EEvv + .p2align 4, 0x90 + .type _Z2f1IJPKvS1_EEvv,@function +_Z2f1IJPKvS1_EEvv: # @_Z2f1IJPKvS1_EEvv +.Lfunc_begin63: + .loc 1 26 0 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:26:0 + .cfi_startproc +# %bb.0: # %entry + pushq %rbp + .cfi_def_cfa_offset 16 + .cfi_offset %rbp, -16 + movq %rsp, %rbp + .cfi_def_cfa_register %rbp +.Ltmp126: + .loc 1 29 1 prologue_end # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:29:1 + popq %rbp + .cfi_def_cfa %rsp, 8 + retq +.Ltmp127: +.Lfunc_end63: + .size _Z2f1IJPKvS1_EEvv, .Lfunc_end63-_Z2f1IJPKvS1_EEvv + .cfi_endproc + # -- End function + .section .text._Z2f1IJP2t1IJPiEEEEvv,"axG",@progbits,_Z2f1IJP2t1IJPiEEEEvv,comdat + .weak _Z2f1IJP2t1IJPiEEEEvv # -- Begin function _Z2f1IJP2t1IJPiEEEEvv + .p2align 4, 0x90 + .type _Z2f1IJP2t1IJPiEEEEvv,@function +_Z2f1IJP2t1IJPiEEEEvv: # @_Z2f1IJP2t1IJPiEEEEvv +.Lfunc_begin64: + .loc 1 26 0 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:26:0 + .cfi_startproc +# %bb.0: # %entry + pushq %rbp + .cfi_def_cfa_offset 16 + .cfi_offset %rbp, -16 + movq %rsp, %rbp + .cfi_def_cfa_register %rbp +.Ltmp128: + .loc 1 29 1 prologue_end # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:29:1 + popq %rbp + .cfi_def_cfa %rsp, 8 + retq +.Ltmp129: +.Lfunc_end64: + .size _Z2f1IJP2t1IJPiEEEEvv, .Lfunc_end64-_Z2f1IJP2t1IJPiEEEEvv + .cfi_endproc + # -- End function + .section .text._Z2f1IJA_PiEEvv,"axG",@progbits,_Z2f1IJA_PiEEvv,comdat + .weak _Z2f1IJA_PiEEvv # -- Begin function _Z2f1IJA_PiEEvv + .p2align 4, 0x90 + .type _Z2f1IJA_PiEEvv,@function +_Z2f1IJA_PiEEvv: # @_Z2f1IJA_PiEEvv +.Lfunc_begin65: + .loc 1 26 0 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:26:0 + .cfi_startproc +# %bb.0: # %entry + pushq %rbp + .cfi_def_cfa_offset 16 + .cfi_offset %rbp, -16 + movq %rsp, %rbp + .cfi_def_cfa_register %rbp +.Ltmp130: + .loc 1 29 1 prologue_end # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:29:1 + popq %rbp + .cfi_def_cfa %rsp, 8 + retq +.Ltmp131: +.Lfunc_end65: + .size _Z2f1IJA_PiEEvv, .Lfunc_end65-_Z2f1IJA_PiEEvv + .cfi_endproc + # -- End function + .section .text._ZN2t6lsIiEEvi,"axG",@progbits,_ZN2t6lsIiEEvi,comdat + .weak _ZN2t6lsIiEEvi # -- Begin function _ZN2t6lsIiEEvi + .p2align 4, 0x90 + .type _ZN2t6lsIiEEvi,@function +_ZN2t6lsIiEEvi: # @_ZN2t6lsIiEEvi +.Lfunc_begin66: + .loc 1 59 0 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:59:0 + .cfi_startproc +# %bb.0: # %entry + pushq %rbp + .cfi_def_cfa_offset 16 + .cfi_offset %rbp, -16 + movq %rsp, %rbp + .cfi_def_cfa_register %rbp + movq %rdi, -8(%rbp) + movl %esi, -12(%rbp) +.Ltmp132: + .loc 1 60 3 prologue_end # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:60:3 + popq %rbp + .cfi_def_cfa %rsp, 8 + retq +.Ltmp133: +.Lfunc_end66: + .size _ZN2t6lsIiEEvi, .Lfunc_end66-_ZN2t6lsIiEEvi + .cfi_endproc + # -- End function + .section .text._ZN2t6ltIiEEvi,"axG",@progbits,_ZN2t6ltIiEEvi,comdat + .weak _ZN2t6ltIiEEvi # -- Begin function _ZN2t6ltIiEEvi + .p2align 4, 0x90 + .type _ZN2t6ltIiEEvi,@function +_ZN2t6ltIiEEvi: # @_ZN2t6ltIiEEvi +.Lfunc_begin67: + .loc 1 62 0 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:62:0 + .cfi_startproc +# %bb.0: # %entry + pushq %rbp + .cfi_def_cfa_offset 16 + .cfi_offset %rbp, -16 + movq %rsp, %rbp + .cfi_def_cfa_register %rbp + movq %rdi, -8(%rbp) + movl %esi, -12(%rbp) +.Ltmp134: + .loc 1 63 3 prologue_end # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:63:3 + popq %rbp + .cfi_def_cfa %rsp, 8 + retq +.Ltmp135: +.Lfunc_end67: + .size _ZN2t6ltIiEEvi, .Lfunc_end67-_ZN2t6ltIiEEvi + .cfi_endproc + # -- End function + .section .text._ZN2t6leIiEEvi,"axG",@progbits,_ZN2t6leIiEEvi,comdat + .weak _ZN2t6leIiEEvi # -- Begin function _ZN2t6leIiEEvi + .p2align 4, 0x90 + .type _ZN2t6leIiEEvi,@function +_ZN2t6leIiEEvi: # @_ZN2t6leIiEEvi +.Lfunc_begin68: + .loc 1 65 0 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:65:0 + .cfi_startproc +# %bb.0: # %entry + pushq %rbp + .cfi_def_cfa_offset 16 + .cfi_offset %rbp, -16 + movq %rsp, %rbp + .cfi_def_cfa_register %rbp + movq %rdi, -8(%rbp) + movl %esi, -12(%rbp) +.Ltmp136: + .loc 1 66 3 prologue_end # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:66:3 + popq %rbp + .cfi_def_cfa %rsp, 8 + retq +.Ltmp137: +.Lfunc_end68: + .size _ZN2t6leIiEEvi, .Lfunc_end68-_ZN2t6leIiEEvi + .cfi_endproc + # -- End function + .section .text._ZN2t6cvP2t1IJfEEIiEEv,"axG",@progbits,_ZN2t6cvP2t1IJfEEIiEEv,comdat + .weak _ZN2t6cvP2t1IJfEEIiEEv # -- Begin function _ZN2t6cvP2t1IJfEEIiEEv + .p2align 4, 0x90 + .type _ZN2t6cvP2t1IJfEEIiEEv,@function +_ZN2t6cvP2t1IJfEEIiEEv: # @_ZN2t6cvP2t1IJfEEIiEEv +.Lfunc_begin69: + .loc 1 68 0 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:68:0 + .cfi_startproc +# %bb.0: # %entry + pushq %rbp + .cfi_def_cfa_offset 16 + .cfi_offset %rbp, -16 + movq %rsp, %rbp + .cfi_def_cfa_register %rbp + movq %rdi, -8(%rbp) +.Ltmp138: + .loc 1 69 5 prologue_end # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:69:5 + xorl %eax, %eax + # kill: def $rax killed $eax + popq %rbp + .cfi_def_cfa %rsp, 8 + retq +.Ltmp139: +.Lfunc_end69: + .size _ZN2t6cvP2t1IJfEEIiEEv, .Lfunc_end69-_ZN2t6cvP2t1IJfEEIiEEv + .cfi_endproc + # -- End function + .section .text._ZN2t6miIiEEvi,"axG",@progbits,_ZN2t6miIiEEvi,comdat + .weak _ZN2t6miIiEEvi # -- Begin function _ZN2t6miIiEEvi + .p2align 4, 0x90 + .type _ZN2t6miIiEEvi,@function +_ZN2t6miIiEEvi: # @_ZN2t6miIiEEvi +.Lfunc_begin70: + .loc 1 72 0 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:72:0 + .cfi_startproc +# %bb.0: # %entry + pushq %rbp + .cfi_def_cfa_offset 16 + .cfi_offset %rbp, -16 + movq %rsp, %rbp + .cfi_def_cfa_register %rbp + movq %rdi, -8(%rbp) + movl %esi, -12(%rbp) +.Ltmp140: + .loc 1 73 3 prologue_end # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:73:3 + popq %rbp + .cfi_def_cfa %rsp, 8 + retq +.Ltmp141: +.Lfunc_end70: + .size _ZN2t6miIiEEvi, .Lfunc_end70-_ZN2t6miIiEEvi + .cfi_endproc + # -- End function + .section .text._ZN2t6mlIiEEvi,"axG",@progbits,_ZN2t6mlIiEEvi,comdat + .weak _ZN2t6mlIiEEvi # -- Begin function _ZN2t6mlIiEEvi + .p2align 4, 0x90 + .type _ZN2t6mlIiEEvi,@function +_ZN2t6mlIiEEvi: # @_ZN2t6mlIiEEvi +.Lfunc_begin71: + .loc 1 75 0 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:75:0 + .cfi_startproc +# %bb.0: # %entry + pushq %rbp + .cfi_def_cfa_offset 16 + .cfi_offset %rbp, -16 + movq %rsp, %rbp + .cfi_def_cfa_register %rbp + movq %rdi, -8(%rbp) + movl %esi, -12(%rbp) +.Ltmp142: + .loc 1 76 3 prologue_end # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:76:3 + popq %rbp + .cfi_def_cfa %rsp, 8 + retq +.Ltmp143: +.Lfunc_end71: + .size _ZN2t6mlIiEEvi, .Lfunc_end71-_ZN2t6mlIiEEvi + .cfi_endproc + # -- End function + .section .text._ZN2t6dvIiEEvi,"axG",@progbits,_ZN2t6dvIiEEvi,comdat + .weak _ZN2t6dvIiEEvi # -- Begin function _ZN2t6dvIiEEvi + .p2align 4, 0x90 + .type _ZN2t6dvIiEEvi,@function +_ZN2t6dvIiEEvi: # @_ZN2t6dvIiEEvi +.Lfunc_begin72: + .loc 1 78 0 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:78:0 + .cfi_startproc +# %bb.0: # %entry + pushq %rbp + .cfi_def_cfa_offset 16 + .cfi_offset %rbp, -16 + movq %rsp, %rbp + .cfi_def_cfa_register %rbp + movq %rdi, -8(%rbp) + movl %esi, -12(%rbp) +.Ltmp144: + .loc 1 79 3 prologue_end # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:79:3 + popq %rbp + .cfi_def_cfa %rsp, 8 + retq +.Ltmp145: +.Lfunc_end72: + .size _ZN2t6dvIiEEvi, .Lfunc_end72-_ZN2t6dvIiEEvi + .cfi_endproc + # -- End function + .section .text._ZN2t6rmIiEEvi,"axG",@progbits,_ZN2t6rmIiEEvi,comdat + .weak _ZN2t6rmIiEEvi # -- Begin function _ZN2t6rmIiEEvi + .p2align 4, 0x90 + .type _ZN2t6rmIiEEvi,@function +_ZN2t6rmIiEEvi: # @_ZN2t6rmIiEEvi +.Lfunc_begin73: + .loc 1 81 0 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:81:0 + .cfi_startproc +# %bb.0: # %entry + pushq %rbp + .cfi_def_cfa_offset 16 + .cfi_offset %rbp, -16 + movq %rsp, %rbp + .cfi_def_cfa_register %rbp + movq %rdi, -8(%rbp) + movl %esi, -12(%rbp) +.Ltmp146: + .loc 1 82 3 prologue_end # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:82:3 + popq %rbp + .cfi_def_cfa %rsp, 8 + retq +.Ltmp147: +.Lfunc_end73: + .size _ZN2t6rmIiEEvi, .Lfunc_end73-_ZN2t6rmIiEEvi + .cfi_endproc + # -- End function + .section .text._ZN2t6eoIiEEvi,"axG",@progbits,_ZN2t6eoIiEEvi,comdat + .weak _ZN2t6eoIiEEvi # -- Begin function _ZN2t6eoIiEEvi + .p2align 4, 0x90 + .type _ZN2t6eoIiEEvi,@function +_ZN2t6eoIiEEvi: # @_ZN2t6eoIiEEvi +.Lfunc_begin74: + .loc 1 84 0 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:84:0 + .cfi_startproc +# %bb.0: # %entry + pushq %rbp + .cfi_def_cfa_offset 16 + .cfi_offset %rbp, -16 + movq %rsp, %rbp + .cfi_def_cfa_register %rbp + movq %rdi, -8(%rbp) + movl %esi, -12(%rbp) +.Ltmp148: + .loc 1 85 3 prologue_end # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:85:3 + popq %rbp + .cfi_def_cfa %rsp, 8 + retq +.Ltmp149: +.Lfunc_end74: + .size _ZN2t6eoIiEEvi, .Lfunc_end74-_ZN2t6eoIiEEvi + .cfi_endproc + # -- End function + .section .text._ZN2t6anIiEEvi,"axG",@progbits,_ZN2t6anIiEEvi,comdat + .weak _ZN2t6anIiEEvi # -- Begin function _ZN2t6anIiEEvi + .p2align 4, 0x90 + .type _ZN2t6anIiEEvi,@function +_ZN2t6anIiEEvi: # @_ZN2t6anIiEEvi +.Lfunc_begin75: + .loc 1 87 0 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:87:0 + .cfi_startproc +# %bb.0: # %entry + pushq %rbp + .cfi_def_cfa_offset 16 + .cfi_offset %rbp, -16 + movq %rsp, %rbp + .cfi_def_cfa_register %rbp + movq %rdi, -8(%rbp) + movl %esi, -12(%rbp) +.Ltmp150: + .loc 1 88 3 prologue_end # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:88:3 + popq %rbp + .cfi_def_cfa %rsp, 8 + retq +.Ltmp151: +.Lfunc_end75: + .size _ZN2t6anIiEEvi, .Lfunc_end75-_ZN2t6anIiEEvi + .cfi_endproc + # -- End function + .section .text._ZN2t6orIiEEvi,"axG",@progbits,_ZN2t6orIiEEvi,comdat + .weak _ZN2t6orIiEEvi # -- Begin function _ZN2t6orIiEEvi + .p2align 4, 0x90 + .type _ZN2t6orIiEEvi,@function +_ZN2t6orIiEEvi: # @_ZN2t6orIiEEvi +.Lfunc_begin76: + .loc 1 90 0 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:90:0 + .cfi_startproc +# %bb.0: # %entry + pushq %rbp + .cfi_def_cfa_offset 16 + .cfi_offset %rbp, -16 + movq %rsp, %rbp + .cfi_def_cfa_register %rbp + movq %rdi, -8(%rbp) + movl %esi, -12(%rbp) +.Ltmp152: + .loc 1 91 3 prologue_end # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:91:3 + popq %rbp + .cfi_def_cfa %rsp, 8 + retq +.Ltmp153: +.Lfunc_end76: + .size _ZN2t6orIiEEvi, .Lfunc_end76-_ZN2t6orIiEEvi + .cfi_endproc + # -- End function + .section .text._ZN2t6coIiEEvv,"axG",@progbits,_ZN2t6coIiEEvv,comdat + .weak _ZN2t6coIiEEvv # -- Begin function _ZN2t6coIiEEvv + .p2align 4, 0x90 + .type _ZN2t6coIiEEvv,@function +_ZN2t6coIiEEvv: # @_ZN2t6coIiEEvv +.Lfunc_begin77: + .loc 1 93 0 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:93:0 + .cfi_startproc +# %bb.0: # %entry + pushq %rbp + .cfi_def_cfa_offset 16 + .cfi_offset %rbp, -16 + movq %rsp, %rbp + .cfi_def_cfa_register %rbp + movq %rdi, -8(%rbp) +.Ltmp154: + .loc 1 94 3 prologue_end # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:94:3 + popq %rbp + .cfi_def_cfa %rsp, 8 + retq +.Ltmp155: +.Lfunc_end77: + .size _ZN2t6coIiEEvv, .Lfunc_end77-_ZN2t6coIiEEvv + .cfi_endproc + # -- End function + .section .text._ZN2t6ntIiEEvv,"axG",@progbits,_ZN2t6ntIiEEvv,comdat + .weak _ZN2t6ntIiEEvv # -- Begin function _ZN2t6ntIiEEvv + .p2align 4, 0x90 + .type _ZN2t6ntIiEEvv,@function +_ZN2t6ntIiEEvv: # @_ZN2t6ntIiEEvv +.Lfunc_begin78: + .loc 1 96 0 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:96:0 + .cfi_startproc +# %bb.0: # %entry + pushq %rbp + .cfi_def_cfa_offset 16 + .cfi_offset %rbp, -16 + movq %rsp, %rbp + .cfi_def_cfa_register %rbp + movq %rdi, -8(%rbp) +.Ltmp156: + .loc 1 97 3 prologue_end # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:97:3 + popq %rbp + .cfi_def_cfa %rsp, 8 + retq +.Ltmp157: +.Lfunc_end78: + .size _ZN2t6ntIiEEvv, .Lfunc_end78-_ZN2t6ntIiEEvv + .cfi_endproc + # -- End function + .section .text._ZN2t6aSIiEEvi,"axG",@progbits,_ZN2t6aSIiEEvi,comdat + .weak _ZN2t6aSIiEEvi # -- Begin function _ZN2t6aSIiEEvi + .p2align 4, 0x90 + .type _ZN2t6aSIiEEvi,@function +_ZN2t6aSIiEEvi: # @_ZN2t6aSIiEEvi +.Lfunc_begin79: + .loc 1 99 0 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:99:0 + .cfi_startproc +# %bb.0: # %entry + pushq %rbp + .cfi_def_cfa_offset 16 + .cfi_offset %rbp, -16 + movq %rsp, %rbp + .cfi_def_cfa_register %rbp + movq %rdi, -8(%rbp) + movl %esi, -12(%rbp) +.Ltmp158: + .loc 1 100 3 prologue_end # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:100:3 + popq %rbp + .cfi_def_cfa %rsp, 8 + retq +.Ltmp159: +.Lfunc_end79: + .size _ZN2t6aSIiEEvi, .Lfunc_end79-_ZN2t6aSIiEEvi + .cfi_endproc + # -- End function + .section .text._ZN2t6gtIiEEvi,"axG",@progbits,_ZN2t6gtIiEEvi,comdat + .weak _ZN2t6gtIiEEvi # -- Begin function _ZN2t6gtIiEEvi + .p2align 4, 0x90 + .type _ZN2t6gtIiEEvi,@function +_ZN2t6gtIiEEvi: # @_ZN2t6gtIiEEvi +.Lfunc_begin80: + .loc 1 102 0 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:102:0 + .cfi_startproc +# %bb.0: # %entry + pushq %rbp + .cfi_def_cfa_offset 16 + .cfi_offset %rbp, -16 + movq %rsp, %rbp + .cfi_def_cfa_register %rbp + movq %rdi, -8(%rbp) + movl %esi, -12(%rbp) +.Ltmp160: + .loc 1 103 3 prologue_end # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:103:3 + popq %rbp + .cfi_def_cfa %rsp, 8 + retq +.Ltmp161: +.Lfunc_end80: + .size _ZN2t6gtIiEEvi, .Lfunc_end80-_ZN2t6gtIiEEvi + .cfi_endproc + # -- End function + .section .text._ZN2t6cmIiEEvi,"axG",@progbits,_ZN2t6cmIiEEvi,comdat + .weak _ZN2t6cmIiEEvi # -- Begin function _ZN2t6cmIiEEvi + .p2align 4, 0x90 + .type _ZN2t6cmIiEEvi,@function +_ZN2t6cmIiEEvi: # @_ZN2t6cmIiEEvi +.Lfunc_begin81: + .loc 1 105 0 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:105:0 + .cfi_startproc +# %bb.0: # %entry + pushq %rbp + .cfi_def_cfa_offset 16 + .cfi_offset %rbp, -16 + movq %rsp, %rbp + .cfi_def_cfa_register %rbp + movq %rdi, -8(%rbp) + movl %esi, -12(%rbp) +.Ltmp162: + .loc 1 106 3 prologue_end # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:106:3 + popq %rbp + .cfi_def_cfa %rsp, 8 + retq +.Ltmp163: +.Lfunc_end81: + .size _ZN2t6cmIiEEvi, .Lfunc_end81-_ZN2t6cmIiEEvi + .cfi_endproc + # -- End function + .section .text._ZN2t6clIiEEvv,"axG",@progbits,_ZN2t6clIiEEvv,comdat + .weak _ZN2t6clIiEEvv # -- Begin function _ZN2t6clIiEEvv + .p2align 4, 0x90 + .type _ZN2t6clIiEEvv,@function +_ZN2t6clIiEEvv: # @_ZN2t6clIiEEvv +.Lfunc_begin82: + .loc 1 108 0 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:108:0 + .cfi_startproc +# %bb.0: # %entry + pushq %rbp + .cfi_def_cfa_offset 16 + .cfi_offset %rbp, -16 + movq %rsp, %rbp + .cfi_def_cfa_register %rbp + movq %rdi, -8(%rbp) +.Ltmp164: + .loc 1 109 3 prologue_end # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:109:3 + popq %rbp + .cfi_def_cfa %rsp, 8 + retq +.Ltmp165: +.Lfunc_end82: + .size _ZN2t6clIiEEvv, .Lfunc_end82-_ZN2t6clIiEEvv + .cfi_endproc + # -- End function + .section .text._ZN2t6ixIiEEvi,"axG",@progbits,_ZN2t6ixIiEEvi,comdat + .weak _ZN2t6ixIiEEvi # -- Begin function _ZN2t6ixIiEEvi + .p2align 4, 0x90 + .type _ZN2t6ixIiEEvi,@function +_ZN2t6ixIiEEvi: # @_ZN2t6ixIiEEvi +.Lfunc_begin83: + .loc 1 111 0 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:111:0 + .cfi_startproc +# %bb.0: # %entry + pushq %rbp + .cfi_def_cfa_offset 16 + .cfi_offset %rbp, -16 + movq %rsp, %rbp + .cfi_def_cfa_register %rbp + movq %rdi, -8(%rbp) + movl %esi, -12(%rbp) +.Ltmp166: + .loc 1 112 3 prologue_end # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:112:3 + popq %rbp + .cfi_def_cfa %rsp, 8 + retq +.Ltmp167: +.Lfunc_end83: + .size _ZN2t6ixIiEEvi, .Lfunc_end83-_ZN2t6ixIiEEvi + .cfi_endproc + # -- End function + .section .text._ZN2t6ssIiEEvi,"axG",@progbits,_ZN2t6ssIiEEvi,comdat + .weak _ZN2t6ssIiEEvi # -- Begin function _ZN2t6ssIiEEvi + .p2align 4, 0x90 + .type _ZN2t6ssIiEEvi,@function +_ZN2t6ssIiEEvi: # @_ZN2t6ssIiEEvi +.Lfunc_begin84: + .loc 1 114 0 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:114:0 + .cfi_startproc +# %bb.0: # %entry + pushq %rbp + .cfi_def_cfa_offset 16 + .cfi_offset %rbp, -16 + movq %rsp, %rbp + .cfi_def_cfa_register %rbp + movq %rdi, -8(%rbp) + movl %esi, -12(%rbp) +.Ltmp168: + .loc 1 115 3 prologue_end # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:115:3 + popq %rbp + .cfi_def_cfa %rsp, 8 + retq +.Ltmp169: +.Lfunc_end84: + .size _ZN2t6ssIiEEvi, .Lfunc_end84-_ZN2t6ssIiEEvi + .cfi_endproc + # -- End function + .section .text._ZN2t6nwIiEEPvmT_,"axG",@progbits,_ZN2t6nwIiEEPvmT_,comdat + .weak _ZN2t6nwIiEEPvmT_ # -- Begin function _ZN2t6nwIiEEPvmT_ + .p2align 4, 0x90 + .type _ZN2t6nwIiEEPvmT_,@function +_ZN2t6nwIiEEPvmT_: # @_ZN2t6nwIiEEPvmT_ +.Lfunc_begin85: + .cfi_startproc +# %bb.0: # %entry + pushq %rbp + .cfi_def_cfa_offset 16 + .cfi_offset %rbp, -16 + movq %rsp, %rbp + .cfi_def_cfa_register %rbp + movq %rdi, -8(%rbp) + movl %esi, -12(%rbp) +.Lfunc_end85: + .size _ZN2t6nwIiEEPvmT_, .Lfunc_end85-_ZN2t6nwIiEEPvmT_ + .cfi_endproc + .file 7 "/usr/local/google/home/blaikie/install/bin/../lib/gcc/x86_64-pc-linux-gnu/10.0.0/../../../../include/c++/10.0.0/x86_64-pc-linux-gnu/bits" "c++config.h" + # -- End function + .section .text._ZN2t6naIiEEPvmT_,"axG",@progbits,_ZN2t6naIiEEPvmT_,comdat + .weak _ZN2t6naIiEEPvmT_ # -- Begin function _ZN2t6naIiEEPvmT_ + .p2align 4, 0x90 + .type _ZN2t6naIiEEPvmT_,@function +_ZN2t6naIiEEPvmT_: # @_ZN2t6naIiEEPvmT_ +.Lfunc_begin86: + .cfi_startproc +# %bb.0: # %entry + pushq %rbp + .cfi_def_cfa_offset 16 + .cfi_offset %rbp, -16 + movq %rsp, %rbp + .cfi_def_cfa_register %rbp + movq %rdi, -8(%rbp) + movl %esi, -12(%rbp) +.Lfunc_end86: + .size _ZN2t6naIiEEPvmT_, .Lfunc_end86-_ZN2t6naIiEEPvmT_ + .cfi_endproc + # -- End function + .section .text._ZN2t6dlIiEEvPvT_,"axG",@progbits,_ZN2t6dlIiEEvPvT_,comdat + .weak _ZN2t6dlIiEEvPvT_ # -- Begin function _ZN2t6dlIiEEvPvT_ + .p2align 4, 0x90 + .type _ZN2t6dlIiEEvPvT_,@function +_ZN2t6dlIiEEvPvT_: # @_ZN2t6dlIiEEvPvT_ +.Lfunc_begin87: + .loc 1 121 0 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:121:0 + .cfi_startproc +# %bb.0: # %entry + pushq %rbp + .cfi_def_cfa_offset 16 + .cfi_offset %rbp, -16 + movq %rsp, %rbp + .cfi_def_cfa_register %rbp + movq %rdi, -8(%rbp) + movl %esi, -12(%rbp) +.Ltmp170: + .loc 1 122 3 prologue_end # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:122:3 + popq %rbp + .cfi_def_cfa %rsp, 8 + retq +.Ltmp171: +.Lfunc_end87: + .size _ZN2t6dlIiEEvPvT_, .Lfunc_end87-_ZN2t6dlIiEEvPvT_ + .cfi_endproc + # -- End function + .section .text._ZN2t6daIiEEvPvT_,"axG",@progbits,_ZN2t6daIiEEvPvT_,comdat + .weak _ZN2t6daIiEEvPvT_ # -- Begin function _ZN2t6daIiEEvPvT_ + .p2align 4, 0x90 + .type _ZN2t6daIiEEvPvT_,@function +_ZN2t6daIiEEvPvT_: # @_ZN2t6daIiEEvPvT_ +.Lfunc_begin88: + .loc 1 128 0 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:128:0 + .cfi_startproc +# %bb.0: # %entry + pushq %rbp + .cfi_def_cfa_offset 16 + .cfi_offset %rbp, -16 + movq %rsp, %rbp + .cfi_def_cfa_register %rbp + movq %rdi, -8(%rbp) + movl %esi, -12(%rbp) +.Ltmp172: + .loc 1 129 3 prologue_end # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:129:3 + popq %rbp + .cfi_def_cfa %rsp, 8 + retq +.Ltmp173: +.Lfunc_end88: + .size _ZN2t6daIiEEvPvT_, .Lfunc_end88-_ZN2t6daIiEEvPvT_ + .cfi_endproc + # -- End function + .section .text._ZN2t6awIiEEiv,"axG",@progbits,_ZN2t6awIiEEiv,comdat + .weak _ZN2t6awIiEEiv # -- Begin function _ZN2t6awIiEEiv + .p2align 4, 0x90 + .type _ZN2t6awIiEEiv,@function +_ZN2t6awIiEEiv: # @_ZN2t6awIiEEiv +.Lfunc_begin89: + .cfi_startproc +# %bb.0: # %entry + pushq %rbp + .cfi_def_cfa_offset 16 + .cfi_offset %rbp, -16 + movq %rsp, %rbp + .cfi_def_cfa_register %rbp + movq %rdi, -8(%rbp) +.Lfunc_end89: + .size _ZN2t6awIiEEiv, .Lfunc_end89-_ZN2t6awIiEEiv + .cfi_endproc + # -- End function + .text + .p2align 4, 0x90 # -- Begin function _Z2f1IJZ4mainE2t7EEvv + .type _Z2f1IJZ4mainE2t7EEvv,@function +_Z2f1IJZ4mainE2t7EEvv: # @_Z2f1IJZ4mainE2t7EEvv +.Lfunc_begin90: + .loc 1 26 0 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:26:0 + .cfi_startproc +# %bb.0: # %entry + pushq %rbp + .cfi_def_cfa_offset 16 + .cfi_offset %rbp, -16 + movq %rsp, %rbp + .cfi_def_cfa_register %rbp +.Ltmp174: + .loc 1 29 1 prologue_end # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:29:1 + popq %rbp + .cfi_def_cfa %rsp, 8 + retq +.Ltmp175: +.Lfunc_end90: + .size _Z2f1IJZ4mainE2t7EEvv, .Lfunc_end90-_Z2f1IJZ4mainE2t7EEvv + .cfi_endproc + # -- End function + .section .text._Z2f1IJRA3_iEEvv,"axG",@progbits,_Z2f1IJRA3_iEEvv,comdat + .weak _Z2f1IJRA3_iEEvv # -- Begin function _Z2f1IJRA3_iEEvv + .p2align 4, 0x90 + .type _Z2f1IJRA3_iEEvv,@function +_Z2f1IJRA3_iEEvv: # @_Z2f1IJRA3_iEEvv +.Lfunc_begin91: + .loc 1 26 0 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:26:0 + .cfi_startproc +# %bb.0: # %entry + pushq %rbp + .cfi_def_cfa_offset 16 + .cfi_offset %rbp, -16 + movq %rsp, %rbp + .cfi_def_cfa_register %rbp +.Ltmp176: + .loc 1 29 1 prologue_end # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:29:1 + popq %rbp + .cfi_def_cfa %rsp, 8 + retq +.Ltmp177: +.Lfunc_end91: + .size _Z2f1IJRA3_iEEvv, .Lfunc_end91-_Z2f1IJRA3_iEEvv + .cfi_endproc + # -- End function + .section .text._Z2f1IJPA3_iEEvv,"axG",@progbits,_Z2f1IJPA3_iEEvv,comdat + .weak _Z2f1IJPA3_iEEvv # -- Begin function _Z2f1IJPA3_iEEvv + .p2align 4, 0x90 + .type _Z2f1IJPA3_iEEvv,@function +_Z2f1IJPA3_iEEvv: # @_Z2f1IJPA3_iEEvv +.Lfunc_begin92: + .loc 1 26 0 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:26:0 + .cfi_startproc +# %bb.0: # %entry + pushq %rbp + .cfi_def_cfa_offset 16 + .cfi_offset %rbp, -16 + movq %rsp, %rbp + .cfi_def_cfa_register %rbp +.Ltmp178: + .loc 1 29 1 prologue_end # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:29:1 + popq %rbp + .cfi_def_cfa %rsp, 8 + retq +.Ltmp179: +.Lfunc_end92: + .size _Z2f1IJPA3_iEEvv, .Lfunc_end92-_Z2f1IJPA3_iEEvv + .cfi_endproc + # -- End function + .section .text._Z2f7I2t1Evv,"axG",@progbits,_Z2f7I2t1Evv,comdat + .weak _Z2f7I2t1Evv # -- Begin function _Z2f7I2t1Evv + .p2align 4, 0x90 + .type _Z2f7I2t1Evv,@function +_Z2f7I2t1Evv: # @_Z2f7I2t1Evv +.Lfunc_begin93: + .loc 1 135 0 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:135:0 + .cfi_startproc +# %bb.0: # %entry + pushq %rbp + .cfi_def_cfa_offset 16 + .cfi_offset %rbp, -16 + movq %rsp, %rbp + .cfi_def_cfa_register %rbp +.Ltmp180: + .loc 1 135 53 prologue_end # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:135:53 + popq %rbp + .cfi_def_cfa %rsp, 8 + retq +.Ltmp181: +.Lfunc_end93: + .size _Z2f7I2t1Evv, .Lfunc_end93-_Z2f7I2t1Evv + .cfi_endproc + # -- End function + .section .text._Z2f8I2t1iEvv,"axG",@progbits,_Z2f8I2t1iEvv,comdat + .weak _Z2f8I2t1iEvv # -- Begin function _Z2f8I2t1iEvv + .p2align 4, 0x90 + .type _Z2f8I2t1iEvv,@function +_Z2f8I2t1iEvv: # @_Z2f8I2t1iEvv +.Lfunc_begin94: + .loc 1 136 0 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:136:0 + .cfi_startproc +# %bb.0: # %entry + pushq %rbp + .cfi_def_cfa_offset 16 + .cfi_offset %rbp, -16 + movq %rsp, %rbp + .cfi_def_cfa_register %rbp +.Ltmp182: + .loc 1 136 66 prologue_end # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:136:66 + popq %rbp + .cfi_def_cfa %rsp, 8 + retq +.Ltmp183: +.Lfunc_end94: + .size _Z2f8I2t1iEvv, .Lfunc_end94-_Z2f8I2t1iEvv + .cfi_endproc + # -- End function + .section .text._ZN2ns8ttp_userINS_5inner3ttpEEEvv,"axG",@progbits,_ZN2ns8ttp_userINS_5inner3ttpEEEvv,comdat + .weak _ZN2ns8ttp_userINS_5inner3ttpEEEvv # -- Begin function _ZN2ns8ttp_userINS_5inner3ttpEEEvv + .p2align 4, 0x90 + .type _ZN2ns8ttp_userINS_5inner3ttpEEEvv,@function +_ZN2ns8ttp_userINS_5inner3ttpEEEvv: # @_ZN2ns8ttp_userINS_5inner3ttpEEEvv +.Lfunc_begin95: + .loc 1 19 0 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:19:0 + .cfi_startproc +# %bb.0: # %entry + pushq %rbp + .cfi_def_cfa_offset 16 + .cfi_offset %rbp, -16 + movq %rsp, %rbp + .cfi_def_cfa_register %rbp +.Ltmp184: + .loc 1 19 19 prologue_end # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:19:19 + popq %rbp + .cfi_def_cfa %rsp, 8 + retq +.Ltmp185: +.Lfunc_end95: + .size _ZN2ns8ttp_userINS_5inner3ttpEEEvv, .Lfunc_end95-_ZN2ns8ttp_userINS_5inner3ttpEEEvv + .cfi_endproc + # -- End function + .section .text._Z2f1IJPiPDnEEvv,"axG",@progbits,_Z2f1IJPiPDnEEvv,comdat + .weak _Z2f1IJPiPDnEEvv # -- Begin function _Z2f1IJPiPDnEEvv + .p2align 4, 0x90 + .type _Z2f1IJPiPDnEEvv,@function +_Z2f1IJPiPDnEEvv: # @_Z2f1IJPiPDnEEvv +.Lfunc_begin96: + .loc 1 26 0 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:26:0 + .cfi_startproc +# %bb.0: # %entry + pushq %rbp + .cfi_def_cfa_offset 16 + .cfi_offset %rbp, -16 + movq %rsp, %rbp + .cfi_def_cfa_register %rbp +.Ltmp186: + .loc 1 29 1 prologue_end # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:29:1 + popq %rbp + .cfi_def_cfa %rsp, 8 + retq +.Ltmp187: +.Lfunc_end96: + .size _Z2f1IJPiPDnEEvv, .Lfunc_end96-_Z2f1IJPiPDnEEvv + .cfi_endproc + # -- End function + .section .text._Z2f1IJ2t7IiEEEvv,"axG",@progbits,_Z2f1IJ2t7IiEEEvv,comdat + .weak _Z2f1IJ2t7IiEEEvv # -- Begin function _Z2f1IJ2t7IiEEEvv + .p2align 4, 0x90 + .type _Z2f1IJ2t7IiEEEvv,@function +_Z2f1IJ2t7IiEEEvv: # @_Z2f1IJ2t7IiEEEvv +.Lfunc_begin97: + .loc 1 26 0 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:26:0 + .cfi_startproc +# %bb.0: # %entry + pushq %rbp + .cfi_def_cfa_offset 16 + .cfi_offset %rbp, -16 + movq %rsp, %rbp + .cfi_def_cfa_register %rbp +.Ltmp188: + .loc 1 29 1 prologue_end # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:29:1 + popq %rbp + .cfi_def_cfa %rsp, 8 + retq +.Ltmp189: +.Lfunc_end97: + .size _Z2f1IJ2t7IiEEEvv, .Lfunc_end97-_Z2f1IJ2t7IiEEEvv + .cfi_endproc + # -- End function + .section .text._Z2f7IN2ns3inl2t9EEvv,"axG",@progbits,_Z2f7IN2ns3inl2t9EEvv,comdat + .weak _Z2f7IN2ns3inl2t9EEvv # -- Begin function _Z2f7IN2ns3inl2t9EEvv + .p2align 4, 0x90 + .type _Z2f7IN2ns3inl2t9EEvv,@function +_Z2f7IN2ns3inl2t9EEvv: # @_Z2f7IN2ns3inl2t9EEvv +.Lfunc_begin98: + .loc 1 135 0 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:135:0 + .cfi_startproc +# %bb.0: # %entry + pushq %rbp + .cfi_def_cfa_offset 16 + .cfi_offset %rbp, -16 + movq %rsp, %rbp + .cfi_def_cfa_register %rbp +.Ltmp190: + .loc 1 135 53 prologue_end # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:135:53 + popq %rbp + .cfi_def_cfa %rsp, 8 + retq +.Ltmp191: +.Lfunc_end98: + .size _Z2f7IN2ns3inl2t9EEvv, .Lfunc_end98-_Z2f7IN2ns3inl2t9EEvv + .cfi_endproc + # -- End function + .section .text._Z2f1IJU7_AtomiciEEvv,"axG",@progbits,_Z2f1IJU7_AtomiciEEvv,comdat + .weak _Z2f1IJU7_AtomiciEEvv # -- Begin function _Z2f1IJU7_AtomiciEEvv + .p2align 4, 0x90 + .type _Z2f1IJU7_AtomiciEEvv,@function +_Z2f1IJU7_AtomiciEEvv: # @_Z2f1IJU7_AtomiciEEvv +.Lfunc_begin99: + .loc 1 26 0 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:26:0 + .cfi_startproc +# %bb.0: # %entry + pushq %rbp + .cfi_def_cfa_offset 16 + .cfi_offset %rbp, -16 + movq %rsp, %rbp + .cfi_def_cfa_register %rbp +.Ltmp192: + .loc 1 29 1 prologue_end # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:29:1 + popq %rbp + .cfi_def_cfa %rsp, 8 + retq +.Ltmp193: +.Lfunc_end99: + .size _Z2f1IJU7_AtomiciEEvv, .Lfunc_end99-_Z2f1IJU7_AtomiciEEvv + .cfi_endproc + # -- End function + .section .text._Z2f1IJilVcEEvv,"axG",@progbits,_Z2f1IJilVcEEvv,comdat + .weak _Z2f1IJilVcEEvv # -- Begin function _Z2f1IJilVcEEvv + .p2align 4, 0x90 + .type _Z2f1IJilVcEEvv,@function +_Z2f1IJilVcEEvv: # @_Z2f1IJilVcEEvv +.Lfunc_begin100: + .loc 1 26 0 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:26:0 + .cfi_startproc +# %bb.0: # %entry + pushq %rbp + .cfi_def_cfa_offset 16 + .cfi_offset %rbp, -16 + movq %rsp, %rbp + .cfi_def_cfa_register %rbp +.Ltmp194: + .loc 1 29 1 prologue_end # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:29:1 + popq %rbp + .cfi_def_cfa %rsp, 8 + retq +.Ltmp195: +.Lfunc_end100: + .size _Z2f1IJilVcEEvv, .Lfunc_end100-_Z2f1IJilVcEEvv + .cfi_endproc + # -- End function + .section .text._Z2f1IJDv2_iEEvv,"axG",@progbits,_Z2f1IJDv2_iEEvv,comdat + .weak _Z2f1IJDv2_iEEvv # -- Begin function _Z2f1IJDv2_iEEvv + .p2align 4, 0x90 + .type _Z2f1IJDv2_iEEvv,@function +_Z2f1IJDv2_iEEvv: # @_Z2f1IJDv2_iEEvv +.Lfunc_begin101: + .loc 1 26 0 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:26:0 + .cfi_startproc +# %bb.0: # %entry + pushq %rbp + .cfi_def_cfa_offset 16 + .cfi_offset %rbp, -16 + movq %rsp, %rbp + .cfi_def_cfa_register %rbp +.Ltmp196: + .loc 1 29 1 prologue_end # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:29:1 + popq %rbp + .cfi_def_cfa %rsp, 8 + retq +.Ltmp197: +.Lfunc_end101: + .size _Z2f1IJDv2_iEEvv, .Lfunc_end101-_Z2f1IJDv2_iEEvv + .cfi_endproc + # -- End function + .section .text._Z2f1IJVKPiEEvv,"axG",@progbits,_Z2f1IJVKPiEEvv,comdat + .weak _Z2f1IJVKPiEEvv # -- Begin function _Z2f1IJVKPiEEvv + .p2align 4, 0x90 + .type _Z2f1IJVKPiEEvv,@function +_Z2f1IJVKPiEEvv: # @_Z2f1IJVKPiEEvv +.Lfunc_begin102: + .loc 1 26 0 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:26:0 + .cfi_startproc +# %bb.0: # %entry + pushq %rbp + .cfi_def_cfa_offset 16 + .cfi_offset %rbp, -16 + movq %rsp, %rbp + .cfi_def_cfa_register %rbp +.Ltmp198: + .loc 1 29 1 prologue_end # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:29:1 + popq %rbp + .cfi_def_cfa %rsp, 8 + retq +.Ltmp199: +.Lfunc_end102: + .size _Z2f1IJVKPiEEvv, .Lfunc_end102-_Z2f1IJVKPiEEvv + .cfi_endproc + # -- End function + .section .text._Z2f1IJVKvEEvv,"axG",@progbits,_Z2f1IJVKvEEvv,comdat + .weak _Z2f1IJVKvEEvv # -- Begin function _Z2f1IJVKvEEvv + .p2align 4, 0x90 + .type _Z2f1IJVKvEEvv,@function +_Z2f1IJVKvEEvv: # @_Z2f1IJVKvEEvv +.Lfunc_begin103: + .loc 1 26 0 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:26:0 + .cfi_startproc +# %bb.0: # %entry + pushq %rbp + .cfi_def_cfa_offset 16 + .cfi_offset %rbp, -16 + movq %rsp, %rbp + .cfi_def_cfa_register %rbp +.Ltmp200: + .loc 1 29 1 prologue_end # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:29:1 + popq %rbp + .cfi_def_cfa %rsp, 8 + retq +.Ltmp201: +.Lfunc_end103: + .size _Z2f1IJVKvEEvv, .Lfunc_end103-_Z2f1IJVKvEEvv + .cfi_endproc + # -- End function + .text + .p2align 4, 0x90 # -- Begin function _Z2f1IJ2t1IJZ4mainE3$_1EEEEvv + .type _Z2f1IJ2t1IJZ4mainE3$_1EEEEvv,@function +_Z2f1IJ2t1IJZ4mainE3$_1EEEEvv: # @"_Z2f1IJ2t1IJZ4mainE3$_1EEEEvv" +.Lfunc_begin104: + .loc 1 26 0 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:26:0 + .cfi_startproc +# %bb.0: # %entry + pushq %rbp + .cfi_def_cfa_offset 16 + .cfi_offset %rbp, -16 + movq %rsp, %rbp + .cfi_def_cfa_register %rbp +.Ltmp202: + .loc 1 29 1 prologue_end # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:29:1 + popq %rbp + .cfi_def_cfa %rsp, 8 + retq +.Ltmp203: +.Lfunc_end104: + .size _Z2f1IJ2t1IJZ4mainE3$_1EEEEvv, .Lfunc_end104-_Z2f1IJ2t1IJZ4mainE3$_1EEEEvv + .cfi_endproc + # -- End function + .section .text._ZN3t10C2IvEEv,"axG",@progbits,_ZN3t10C2IvEEv,comdat + .weak _ZN3t10C2IvEEv # -- Begin function _ZN3t10C2IvEEv + .p2align 4, 0x90 + .type _ZN3t10C2IvEEv,@function +_ZN3t10C2IvEEv: # @_ZN3t10C2IvEEv +.Lfunc_begin105: + .loc 1 159 0 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:159:0 + .cfi_startproc +# %bb.0: # %entry + pushq %rbp + .cfi_def_cfa_offset 16 + .cfi_offset %rbp, -16 + movq %rsp, %rbp + .cfi_def_cfa_register %rbp + movq %rdi, -8(%rbp) +.Ltmp204: + .loc 1 159 11 prologue_end # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:159:11 + popq %rbp + .cfi_def_cfa %rsp, 8 + retq +.Ltmp205: +.Lfunc_end105: + .size _ZN3t10C2IvEEv, .Lfunc_end105-_ZN3t10C2IvEEv + .cfi_endproc + # -- End function + .section .text._Z2f1IJM3udtKFvvEEEvv,"axG",@progbits,_Z2f1IJM3udtKFvvEEEvv,comdat + .weak _Z2f1IJM3udtKFvvEEEvv # -- Begin function _Z2f1IJM3udtKFvvEEEvv + .p2align 4, 0x90 + .type _Z2f1IJM3udtKFvvEEEvv,@function +_Z2f1IJM3udtKFvvEEEvv: # @_Z2f1IJM3udtKFvvEEEvv +.Lfunc_begin106: + .loc 1 26 0 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:26:0 + .cfi_startproc +# %bb.0: # %entry + pushq %rbp + .cfi_def_cfa_offset 16 + .cfi_offset %rbp, -16 + movq %rsp, %rbp + .cfi_def_cfa_register %rbp +.Ltmp206: + .loc 1 29 1 prologue_end # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:29:1 + popq %rbp + .cfi_def_cfa %rsp, 8 + retq +.Ltmp207: +.Lfunc_end106: + .size _Z2f1IJM3udtKFvvEEEvv, .Lfunc_end106-_Z2f1IJM3udtKFvvEEEvv + .cfi_endproc + # -- End function + .section .text._Z2f1IJM3udtVFvvREEEvv,"axG",@progbits,_Z2f1IJM3udtVFvvREEEvv,comdat + .weak _Z2f1IJM3udtVFvvREEEvv # -- Begin function _Z2f1IJM3udtVFvvREEEvv + .p2align 4, 0x90 + .type _Z2f1IJM3udtVFvvREEEvv,@function +_Z2f1IJM3udtVFvvREEEvv: # @_Z2f1IJM3udtVFvvREEEvv +.Lfunc_begin107: + .loc 1 26 0 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:26:0 + .cfi_startproc +# %bb.0: # %entry + pushq %rbp + .cfi_def_cfa_offset 16 + .cfi_offset %rbp, -16 + movq %rsp, %rbp + .cfi_def_cfa_register %rbp +.Ltmp208: + .loc 1 29 1 prologue_end # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:29:1 + popq %rbp + .cfi_def_cfa %rsp, 8 + retq +.Ltmp209: +.Lfunc_end107: + .size _Z2f1IJM3udtVFvvREEEvv, .Lfunc_end107-_Z2f1IJM3udtVFvvREEEvv + .cfi_endproc + # -- End function + .section .text._Z2f1IJM3udtVKFvvOEEEvv,"axG",@progbits,_Z2f1IJM3udtVKFvvOEEEvv,comdat + .weak _Z2f1IJM3udtVKFvvOEEEvv # -- Begin function _Z2f1IJM3udtVKFvvOEEEvv + .p2align 4, 0x90 + .type _Z2f1IJM3udtVKFvvOEEEvv,@function +_Z2f1IJM3udtVKFvvOEEEvv: # @_Z2f1IJM3udtVKFvvOEEEvv +.Lfunc_begin108: + .loc 1 26 0 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:26:0 + .cfi_startproc +# %bb.0: # %entry + pushq %rbp + .cfi_def_cfa_offset 16 + .cfi_offset %rbp, -16 + movq %rsp, %rbp + .cfi_def_cfa_register %rbp +.Ltmp210: + .loc 1 29 1 prologue_end # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:29:1 + popq %rbp + .cfi_def_cfa %rsp, 8 + retq +.Ltmp211: +.Lfunc_end108: + .size _Z2f1IJM3udtVKFvvOEEEvv, .Lfunc_end108-_Z2f1IJM3udtVKFvvOEEEvv + .cfi_endproc + # -- End function + .section .text._Z2f9IiEPFvvEv,"axG",@progbits,_Z2f9IiEPFvvEv,comdat + .weak _Z2f9IiEPFvvEv # -- Begin function _Z2f9IiEPFvvEv + .p2align 4, 0x90 + .type _Z2f9IiEPFvvEv,@function +_Z2f9IiEPFvvEv: # @_Z2f9IiEPFvvEv +.Lfunc_begin109: + .loc 1 154 0 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:154:0 + .cfi_startproc +# %bb.0: # %entry + pushq %rbp + .cfi_def_cfa_offset 16 + .cfi_offset %rbp, -16 + movq %rsp, %rbp + .cfi_def_cfa_register %rbp +.Ltmp212: + .loc 1 155 3 prologue_end # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:155:3 + xorl %eax, %eax + # kill: def $rax killed $eax + popq %rbp + .cfi_def_cfa %rsp, 8 + retq +.Ltmp213: +.Lfunc_end109: + .size _Z2f9IiEPFvvEv, .Lfunc_end109-_Z2f9IiEPFvvEv + .cfi_endproc + # -- End function + .section .text._Z2f1IJKPFvvEEEvv,"axG",@progbits,_Z2f1IJKPFvvEEEvv,comdat + .weak _Z2f1IJKPFvvEEEvv # -- Begin function _Z2f1IJKPFvvEEEvv + .p2align 4, 0x90 + .type _Z2f1IJKPFvvEEEvv,@function +_Z2f1IJKPFvvEEEvv: # @_Z2f1IJKPFvvEEEvv +.Lfunc_begin110: + .loc 1 26 0 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:26:0 + .cfi_startproc +# %bb.0: # %entry + pushq %rbp + .cfi_def_cfa_offset 16 + .cfi_offset %rbp, -16 + movq %rsp, %rbp + .cfi_def_cfa_register %rbp +.Ltmp214: + .loc 1 29 1 prologue_end # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:29:1 + popq %rbp + .cfi_def_cfa %rsp, 8 + retq +.Ltmp215: +.Lfunc_end110: + .size _Z2f1IJKPFvvEEEvv, .Lfunc_end110-_Z2f1IJKPFvvEEEvv + .cfi_endproc + # -- End function + .section .text._Z2f1IJRA1_KcEEvv,"axG",@progbits,_Z2f1IJRA1_KcEEvv,comdat + .weak _Z2f1IJRA1_KcEEvv # -- Begin function _Z2f1IJRA1_KcEEvv + .p2align 4, 0x90 + .type _Z2f1IJRA1_KcEEvv,@function +_Z2f1IJRA1_KcEEvv: # @_Z2f1IJRA1_KcEEvv +.Lfunc_begin111: + .loc 1 26 0 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:26:0 + .cfi_startproc +# %bb.0: # %entry + pushq %rbp + .cfi_def_cfa_offset 16 + .cfi_offset %rbp, -16 + movq %rsp, %rbp + .cfi_def_cfa_register %rbp +.Ltmp216: + .loc 1 29 1 prologue_end # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:29:1 + popq %rbp + .cfi_def_cfa %rsp, 8 + retq +.Ltmp217: +.Lfunc_end111: + .size _Z2f1IJRA1_KcEEvv, .Lfunc_end111-_Z2f1IJRA1_KcEEvv + .cfi_endproc + # -- End function + .section .text._Z2f1IJKFvvREEEvv,"axG",@progbits,_Z2f1IJKFvvREEEvv,comdat + .weak _Z2f1IJKFvvREEEvv # -- Begin function _Z2f1IJKFvvREEEvv + .p2align 4, 0x90 + .type _Z2f1IJKFvvREEEvv,@function +_Z2f1IJKFvvREEEvv: # @_Z2f1IJKFvvREEEvv +.Lfunc_begin112: + .loc 1 26 0 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:26:0 + .cfi_startproc +# %bb.0: # %entry + pushq %rbp + .cfi_def_cfa_offset 16 + .cfi_offset %rbp, -16 + movq %rsp, %rbp + .cfi_def_cfa_register %rbp +.Ltmp218: + .loc 1 29 1 prologue_end # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:29:1 + popq %rbp + .cfi_def_cfa %rsp, 8 + retq +.Ltmp219: +.Lfunc_end112: + .size _Z2f1IJKFvvREEEvv, .Lfunc_end112-_Z2f1IJKFvvREEEvv + .cfi_endproc + # -- End function + .section .text._Z2f1IJVFvvOEEEvv,"axG",@progbits,_Z2f1IJVFvvOEEEvv,comdat + .weak _Z2f1IJVFvvOEEEvv # -- Begin function _Z2f1IJVFvvOEEEvv + .p2align 4, 0x90 + .type _Z2f1IJVFvvOEEEvv,@function +_Z2f1IJVFvvOEEEvv: # @_Z2f1IJVFvvOEEEvv +.Lfunc_begin113: + .loc 1 26 0 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:26:0 + .cfi_startproc +# %bb.0: # %entry + pushq %rbp + .cfi_def_cfa_offset 16 + .cfi_offset %rbp, -16 + movq %rsp, %rbp + .cfi_def_cfa_register %rbp +.Ltmp220: + .loc 1 29 1 prologue_end # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:29:1 + popq %rbp + .cfi_def_cfa %rsp, 8 + retq +.Ltmp221: +.Lfunc_end113: + .size _Z2f1IJVFvvOEEEvv, .Lfunc_end113-_Z2f1IJVFvvOEEEvv + .cfi_endproc + # -- End function + .section .text._Z2f1IJVKFvvEEEvv,"axG",@progbits,_Z2f1IJVKFvvEEEvv,comdat + .weak _Z2f1IJVKFvvEEEvv # -- Begin function _Z2f1IJVKFvvEEEvv + .p2align 4, 0x90 + .type _Z2f1IJVKFvvEEEvv,@function +_Z2f1IJVKFvvEEEvv: # @_Z2f1IJVKFvvEEEvv +.Lfunc_begin114: + .loc 1 26 0 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:26:0 + .cfi_startproc +# %bb.0: # %entry + pushq %rbp + .cfi_def_cfa_offset 16 + .cfi_offset %rbp, -16 + movq %rsp, %rbp + .cfi_def_cfa_register %rbp +.Ltmp222: + .loc 1 29 1 prologue_end # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:29:1 + popq %rbp + .cfi_def_cfa %rsp, 8 + retq +.Ltmp223: +.Lfunc_end114: + .size _Z2f1IJVKFvvEEEvv, .Lfunc_end114-_Z2f1IJVKFvvEEEvv + .cfi_endproc + # -- End function + .section .text._Z2f1IJA1_KPiEEvv,"axG",@progbits,_Z2f1IJA1_KPiEEvv,comdat + .weak _Z2f1IJA1_KPiEEvv # -- Begin function _Z2f1IJA1_KPiEEvv + .p2align 4, 0x90 + .type _Z2f1IJA1_KPiEEvv,@function +_Z2f1IJA1_KPiEEvv: # @_Z2f1IJA1_KPiEEvv +.Lfunc_begin115: + .loc 1 26 0 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:26:0 + .cfi_startproc +# %bb.0: # %entry + pushq %rbp + .cfi_def_cfa_offset 16 + .cfi_offset %rbp, -16 + movq %rsp, %rbp + .cfi_def_cfa_register %rbp +.Ltmp224: + .loc 1 29 1 prologue_end # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:29:1 + popq %rbp + .cfi_def_cfa %rsp, 8 + retq +.Ltmp225: +.Lfunc_end115: + .size _Z2f1IJA1_KPiEEvv, .Lfunc_end115-_Z2f1IJA1_KPiEEvv + .cfi_endproc + # -- End function + .section .text._Z2f1IJRA1_KPiEEvv,"axG",@progbits,_Z2f1IJRA1_KPiEEvv,comdat + .weak _Z2f1IJRA1_KPiEEvv # -- Begin function _Z2f1IJRA1_KPiEEvv + .p2align 4, 0x90 + .type _Z2f1IJRA1_KPiEEvv,@function +_Z2f1IJRA1_KPiEEvv: # @_Z2f1IJRA1_KPiEEvv +.Lfunc_begin116: + .loc 1 26 0 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:26:0 + .cfi_startproc +# %bb.0: # %entry + pushq %rbp + .cfi_def_cfa_offset 16 + .cfi_offset %rbp, -16 + movq %rsp, %rbp + .cfi_def_cfa_register %rbp +.Ltmp226: + .loc 1 29 1 prologue_end # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:29:1 + popq %rbp + .cfi_def_cfa %rsp, 8 + retq +.Ltmp227: +.Lfunc_end116: + .size _Z2f1IJRA1_KPiEEvv, .Lfunc_end116-_Z2f1IJRA1_KPiEEvv + .cfi_endproc + # -- End function + .section .text._Z2f1IJRKM3udtFvvEEEvv,"axG",@progbits,_Z2f1IJRKM3udtFvvEEEvv,comdat + .weak _Z2f1IJRKM3udtFvvEEEvv # -- Begin function _Z2f1IJRKM3udtFvvEEEvv + .p2align 4, 0x90 + .type _Z2f1IJRKM3udtFvvEEEvv,@function +_Z2f1IJRKM3udtFvvEEEvv: # @_Z2f1IJRKM3udtFvvEEEvv +.Lfunc_begin117: + .loc 1 26 0 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:26:0 + .cfi_startproc +# %bb.0: # %entry + pushq %rbp + .cfi_def_cfa_offset 16 + .cfi_offset %rbp, -16 + movq %rsp, %rbp + .cfi_def_cfa_register %rbp +.Ltmp228: + .loc 1 29 1 prologue_end # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:29:1 + popq %rbp + .cfi_def_cfa %rsp, 8 + retq +.Ltmp229: +.Lfunc_end117: + .size _Z2f1IJRKM3udtFvvEEEvv, .Lfunc_end117-_Z2f1IJRKM3udtFvvEEEvv + .cfi_endproc + # -- End function + .section .text._Z2f1IJFPFvfEiEEEvv,"axG",@progbits,_Z2f1IJFPFvfEiEEEvv,comdat + .weak _Z2f1IJFPFvfEiEEEvv # -- Begin function _Z2f1IJFPFvfEiEEEvv + .p2align 4, 0x90 + .type _Z2f1IJFPFvfEiEEEvv,@function +_Z2f1IJFPFvfEiEEEvv: # @_Z2f1IJFPFvfEiEEEvv +.Lfunc_begin118: + .loc 1 26 0 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:26:0 + .cfi_startproc +# %bb.0: # %entry + pushq %rbp + .cfi_def_cfa_offset 16 + .cfi_offset %rbp, -16 + movq %rsp, %rbp + .cfi_def_cfa_register %rbp +.Ltmp230: + .loc 1 29 1 prologue_end # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:29:1 + popq %rbp + .cfi_def_cfa %rsp, 8 + retq +.Ltmp231: +.Lfunc_end118: + .size _Z2f1IJFPFvfEiEEEvv, .Lfunc_end118-_Z2f1IJFPFvfEiEEEvv + .cfi_endproc + # -- End function + .section .text._Z2f1IJPDoFvvEEEvv,"axG",@progbits,_Z2f1IJPDoFvvEEEvv,comdat + .weak _Z2f1IJPDoFvvEEEvv # -- Begin function _Z2f1IJPDoFvvEEEvv + .p2align 4, 0x90 + .type _Z2f1IJPDoFvvEEEvv,@function +_Z2f1IJPDoFvvEEEvv: # @_Z2f1IJPDoFvvEEEvv +.Lfunc_begin119: + .loc 1 26 0 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:26:0 + .cfi_startproc +# %bb.0: # %entry + pushq %rbp + .cfi_def_cfa_offset 16 + .cfi_offset %rbp, -16 + movq %rsp, %rbp + .cfi_def_cfa_register %rbp +.Ltmp232: + .loc 1 29 1 prologue_end # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:29:1 + popq %rbp + .cfi_def_cfa %rsp, 8 + retq +.Ltmp233: +.Lfunc_end119: + .size _Z2f1IJPDoFvvEEEvv, .Lfunc_end119-_Z2f1IJPDoFvvEEEvv + .cfi_endproc + # -- End function + .text + .p2align 4, 0x90 # -- Begin function _Z2f1IJFvZ4mainE3$_2EEEvv + .type _Z2f1IJFvZ4mainE3$_2EEEvv,@function +_Z2f1IJFvZ4mainE3$_2EEEvv: # @"_Z2f1IJFvZ4mainE3$_2EEEvv" +.Lfunc_begin120: + .loc 1 26 0 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:26:0 + .cfi_startproc +# %bb.0: # %entry + pushq %rbp + .cfi_def_cfa_offset 16 + .cfi_offset %rbp, -16 + movq %rsp, %rbp + .cfi_def_cfa_register %rbp +.Ltmp234: + .loc 1 29 1 prologue_end # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:29:1 + popq %rbp + .cfi_def_cfa %rsp, 8 + retq +.Ltmp235: +.Lfunc_end120: + .size _Z2f1IJFvZ4mainE3$_2EEEvv, .Lfunc_end120-_Z2f1IJFvZ4mainE3$_2EEEvv + .cfi_endproc + # -- End function + .p2align 4, 0x90 # -- Begin function _Z2f1IJFvZ4mainE2t8Z4mainE3$_2EEEvv + .type _Z2f1IJFvZ4mainE2t8Z4mainE3$_2EEEvv,@function +_Z2f1IJFvZ4mainE2t8Z4mainE3$_2EEEvv: # @"_Z2f1IJFvZ4mainE2t8Z4mainE3$_2EEEvv" +.Lfunc_begin121: + .loc 1 26 0 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:26:0 + .cfi_startproc +# %bb.0: # %entry + pushq %rbp + .cfi_def_cfa_offset 16 + .cfi_offset %rbp, -16 + movq %rsp, %rbp + .cfi_def_cfa_register %rbp +.Ltmp236: + .loc 1 29 1 prologue_end # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:29:1 + popq %rbp + .cfi_def_cfa %rsp, 8 + retq +.Ltmp237: +.Lfunc_end121: + .size _Z2f1IJFvZ4mainE2t8Z4mainE3$_2EEEvv, .Lfunc_end121-_Z2f1IJFvZ4mainE2t8Z4mainE3$_2EEEvv + .cfi_endproc + # -- End function + .p2align 4, 0x90 # -- Begin function _Z2f1IJFvZ4mainE2t8EEEvv + .type _Z2f1IJFvZ4mainE2t8EEEvv,@function +_Z2f1IJFvZ4mainE2t8EEEvv: # @_Z2f1IJFvZ4mainE2t8EEEvv +.Lfunc_begin122: + .loc 1 26 0 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:26:0 + .cfi_startproc +# %bb.0: # %entry + pushq %rbp + .cfi_def_cfa_offset 16 + .cfi_offset %rbp, -16 + movq %rsp, %rbp + .cfi_def_cfa_register %rbp +.Ltmp238: + .loc 1 29 1 prologue_end # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:29:1 + popq %rbp + .cfi_def_cfa %rsp, 8 + retq +.Ltmp239: +.Lfunc_end122: + .size _Z2f1IJFvZ4mainE2t8EEEvv, .Lfunc_end122-_Z2f1IJFvZ4mainE2t8EEEvv + .cfi_endproc + # -- End function + .section .text._Z19operator_not_reallyIiEvv,"axG",@progbits,_Z19operator_not_reallyIiEvv,comdat + .weak _Z19operator_not_reallyIiEvv # -- Begin function _Z19operator_not_reallyIiEvv + .p2align 4, 0x90 + .type _Z19operator_not_reallyIiEvv,@function +_Z19operator_not_reallyIiEvv: # @_Z19operator_not_reallyIiEvv +.Lfunc_begin123: + .loc 1 163 0 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:163:0 + .cfi_startproc +# %bb.0: # %entry + pushq %rbp + .cfi_def_cfa_offset 16 + .cfi_offset %rbp, -16 + movq %rsp, %rbp + .cfi_def_cfa_register %rbp +.Ltmp240: + .loc 1 164 1 prologue_end # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:164:1 + popq %rbp + .cfi_def_cfa %rsp, 8 + retq +.Ltmp241: +.Lfunc_end123: + .size _Z19operator_not_reallyIiEvv, .Lfunc_end123-_Z19operator_not_reallyIiEvv + .cfi_endproc + # -- End function + .text + .globl _ZN2t83memEv # -- Begin function _ZN2t83memEv + .p2align 4, 0x90 + .type _ZN2t83memEv,@function +_ZN2t83memEv: # @_ZN2t83memEv +.Lfunc_begin124: + .loc 1 302 0 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:302:0 + .cfi_startproc +# %bb.0: # %entry + pushq %rbp + .cfi_def_cfa_offset 16 + .cfi_offset %rbp, -16 + movq %rsp, %rbp + .cfi_def_cfa_register %rbp + subq $16, %rsp + movq %rdi, -8(%rbp) +.Ltmp242: + .loc 1 304 3 prologue_end # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:304:3 + callq _Z2f1IJZN2t83memEvE2t7EEvv + .loc 1 305 3 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:305:3 + callq _Z2f1IJM2t8FvvEEEvv + .loc 1 306 1 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:306:1 + addq $16, %rsp + popq %rbp + .cfi_def_cfa %rsp, 8 + retq +.Ltmp243: +.Lfunc_end124: + .size _ZN2t83memEv, .Lfunc_end124-_ZN2t83memEv + .cfi_endproc + # -- End function + .p2align 4, 0x90 # -- Begin function _Z2f1IJZN2t83memEvE2t7EEvv + .type _Z2f1IJZN2t83memEvE2t7EEvv,@function +_Z2f1IJZN2t83memEvE2t7EEvv: # @_Z2f1IJZN2t83memEvE2t7EEvv +.Lfunc_begin125: + .loc 1 26 0 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:26:0 + .cfi_startproc +# %bb.0: # %entry + pushq %rbp + .cfi_def_cfa_offset 16 + .cfi_offset %rbp, -16 + movq %rsp, %rbp + .cfi_def_cfa_register %rbp +.Ltmp244: + .loc 1 29 1 prologue_end # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:29:1 + popq %rbp + .cfi_def_cfa %rsp, 8 + retq +.Ltmp245: +.Lfunc_end125: + .size _Z2f1IJZN2t83memEvE2t7EEvv, .Lfunc_end125-_Z2f1IJZN2t83memEvE2t7EEvv + .cfi_endproc + # -- End function + .section .text._Z2f1IJM2t8FvvEEEvv,"axG",@progbits,_Z2f1IJM2t8FvvEEEvv,comdat + .weak _Z2f1IJM2t8FvvEEEvv # -- Begin function _Z2f1IJM2t8FvvEEEvv + .p2align 4, 0x90 + .type _Z2f1IJM2t8FvvEEEvv,@function +_Z2f1IJM2t8FvvEEEvv: # @_Z2f1IJM2t8FvvEEEvv +.Lfunc_begin126: + .loc 1 26 0 # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:26:0 + .cfi_startproc +# %bb.0: # %entry + pushq %rbp + .cfi_def_cfa_offset 16 + .cfi_offset %rbp, -16 + movq %rsp, %rbp + .cfi_def_cfa_register %rbp +.Ltmp246: + .loc 1 29 1 prologue_end # cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:29:1 + popq %rbp + .cfi_def_cfa %rsp, 8 + retq +.Ltmp247: +.Lfunc_end126: + .size _Z2f1IJM2t8FvvEEEvv, .Lfunc_end126-_Z2f1IJM2t8FvvEEEvv + .cfi_endproc + # -- End function + .type i,@object # @i + .data + .globl i + .p2align 2 +i: + .long 3 # 0x3 + .size i, 4 + + .type .L__const.main.L,@object # @__const.main.L + .section .rodata,"a",@progbits +.L__const.main.L: + .zero 1 + .size .L__const.main.L, 1 + + .section ".linker-options","e",@llvm_linker_options + .section .debug_abbrev,"",@progbits + .byte 1 # Abbreviation Code + .byte 17 # DW_TAG_compile_unit + .byte 1 # DW_CHILDREN_yes + .byte 37 # DW_AT_producer + .byte 14 # DW_FORM_strp + .byte 19 # DW_AT_language + .byte 5 # DW_FORM_data2 + .byte 3 # DW_AT_name + .byte 14 # DW_FORM_strp + .byte 16 # DW_AT_stmt_list + .byte 23 # DW_FORM_sec_offset + .byte 27 # DW_AT_comp_dir + .byte 14 # DW_FORM_strp + .byte 17 # DW_AT_low_pc + .byte 1 # DW_FORM_addr + .byte 85 # DW_AT_ranges + .byte 23 # DW_FORM_sec_offset + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 2 # Abbreviation Code + .byte 52 # DW_TAG_variable + .byte 0 # DW_CHILDREN_no + .byte 3 # DW_AT_name + .byte 14 # DW_FORM_strp + .byte 73 # DW_AT_type + .byte 19 # DW_FORM_ref4 + .byte 63 # DW_AT_external + .byte 25 # DW_FORM_flag_present + .byte 58 # DW_AT_decl_file + .byte 11 # DW_FORM_data1 + .byte 59 # DW_AT_decl_line + .byte 11 # DW_FORM_data1 + .byte 2 # DW_AT_location + .byte 24 # DW_FORM_exprloc + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 3 # Abbreviation Code + .byte 36 # DW_TAG_base_type + .byte 0 # DW_CHILDREN_no + .byte 3 # DW_AT_name + .byte 14 # DW_FORM_strp + .byte 62 # DW_AT_encoding + .byte 11 # DW_FORM_data1 + .byte 11 # DW_AT_byte_size + .byte 11 # DW_FORM_data1 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 4 # Abbreviation Code + .byte 57 # DW_TAG_namespace + .byte 1 # DW_CHILDREN_yes + .byte 3 # DW_AT_name + .byte 14 # DW_FORM_strp + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 5 # Abbreviation Code + .byte 4 # DW_TAG_enumeration_type + .byte 1 # DW_CHILDREN_yes + .byte 73 # DW_AT_type + .byte 19 # DW_FORM_ref4 + .byte 3 # DW_AT_name + .byte 14 # DW_FORM_strp + .byte 11 # DW_AT_byte_size + .byte 11 # DW_FORM_data1 + .byte 58 # DW_AT_decl_file + .byte 11 # DW_FORM_data1 + .byte 59 # DW_AT_decl_line + .byte 11 # DW_FORM_data1 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 6 # Abbreviation Code + .byte 40 # DW_TAG_enumerator + .byte 0 # DW_CHILDREN_no + .byte 3 # DW_AT_name + .byte 14 # DW_FORM_strp + .byte 28 # DW_AT_const_value + .byte 15 # DW_FORM_udata + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 7 # Abbreviation Code + .byte 4 # DW_TAG_enumeration_type + .byte 1 # DW_CHILDREN_yes + .byte 73 # DW_AT_type + .byte 19 # DW_FORM_ref4 + .byte 109 # DW_AT_enum_class + .byte 25 # DW_FORM_flag_present + .byte 3 # DW_AT_name + .byte 14 # DW_FORM_strp + .byte 11 # DW_AT_byte_size + .byte 11 # DW_FORM_data1 + .byte 58 # DW_AT_decl_file + .byte 11 # DW_FORM_data1 + .byte 59 # DW_AT_decl_line + .byte 11 # DW_FORM_data1 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 8 # Abbreviation Code + .byte 40 # DW_TAG_enumerator + .byte 0 # DW_CHILDREN_no + .byte 3 # DW_AT_name + .byte 14 # DW_FORM_strp + .byte 28 # DW_AT_const_value + .byte 13 # DW_FORM_sdata + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 9 # Abbreviation Code + .byte 4 # DW_TAG_enumeration_type + .byte 1 # DW_CHILDREN_yes + .byte 73 # DW_AT_type + .byte 19 # DW_FORM_ref4 + .byte 11 # DW_AT_byte_size + .byte 11 # DW_FORM_data1 + .byte 58 # DW_AT_decl_file + .byte 11 # DW_FORM_data1 + .byte 59 # DW_AT_decl_line + .byte 11 # DW_FORM_data1 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 10 # Abbreviation Code + .byte 46 # DW_TAG_subprogram + .byte 1 # DW_CHILDREN_yes + .byte 17 # DW_AT_low_pc + .byte 1 # DW_FORM_addr + .byte 18 # DW_AT_high_pc + .byte 6 # DW_FORM_data4 + .byte 64 # DW_AT_frame_base + .byte 24 # DW_FORM_exprloc + .byte 110 # DW_AT_linkage_name + .byte 14 # DW_FORM_strp + .byte 3 # DW_AT_name + .byte 14 # DW_FORM_strp + .byte 58 # DW_AT_decl_file + .byte 11 # DW_FORM_data1 + .byte 59 # DW_AT_decl_line + .byte 11 # DW_FORM_data1 + .byte 63 # DW_AT_external + .byte 25 # DW_FORM_flag_present + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 11 # Abbreviation Code + .ascii "\206\202\001" # DW_TAG_GNU_template_template_param + .byte 0 # DW_CHILDREN_no + .byte 3 # DW_AT_name + .byte 14 # DW_FORM_strp + .ascii "\220B" # DW_AT_GNU_template_name + .byte 14 # DW_FORM_strp + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 12 # Abbreviation Code + .byte 19 # DW_TAG_structure_type + .byte 0 # DW_CHILDREN_no + .byte 3 # DW_AT_name + .byte 14 # DW_FORM_strp + .byte 60 # DW_AT_declaration + .byte 25 # DW_FORM_flag_present + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 13 # Abbreviation Code + .byte 19 # DW_TAG_structure_type + .byte 1 # DW_CHILDREN_yes + .byte 54 # DW_AT_calling_convention + .byte 11 # DW_FORM_data1 + .byte 3 # DW_AT_name + .byte 14 # DW_FORM_strp + .byte 11 # DW_AT_byte_size + .byte 11 # DW_FORM_data1 + .byte 58 # DW_AT_decl_file + .byte 11 # DW_FORM_data1 + .byte 59 # DW_AT_decl_line + .byte 11 # DW_FORM_data1 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 14 # Abbreviation Code + .byte 47 # DW_TAG_template_type_parameter + .byte 0 # DW_CHILDREN_no + .byte 73 # DW_AT_type + .byte 19 # DW_FORM_ref4 + .byte 3 # DW_AT_name + .byte 14 # DW_FORM_strp + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 15 # Abbreviation Code + .byte 48 # DW_TAG_template_value_parameter + .byte 0 # DW_CHILDREN_no + .byte 73 # DW_AT_type + .byte 19 # DW_FORM_ref4 + .byte 3 # DW_AT_name + .byte 14 # DW_FORM_strp + .byte 28 # DW_AT_const_value + .byte 15 # DW_FORM_udata + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 16 # Abbreviation Code + .byte 46 # DW_TAG_subprogram + .byte 1 # DW_CHILDREN_yes + .byte 3 # DW_AT_name + .byte 14 # DW_FORM_strp + .byte 58 # DW_AT_decl_file + .byte 11 # DW_FORM_data1 + .byte 59 # DW_AT_decl_line + .byte 11 # DW_FORM_data1 + .byte 60 # DW_AT_declaration + .byte 25 # DW_FORM_flag_present + .byte 63 # DW_AT_external + .byte 25 # DW_FORM_flag_present + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 17 # Abbreviation Code + .byte 47 # DW_TAG_template_type_parameter + .byte 0 # DW_CHILDREN_no + .byte 3 # DW_AT_name + .byte 14 # DW_FORM_strp + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 18 # Abbreviation Code + .byte 5 # DW_TAG_formal_parameter + .byte 0 # DW_CHILDREN_no + .byte 73 # DW_AT_type + .byte 19 # DW_FORM_ref4 + .byte 52 # DW_AT_artificial + .byte 25 # DW_FORM_flag_present + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 19 # Abbreviation Code + .byte 8 # DW_TAG_imported_declaration + .byte 0 # DW_CHILDREN_no + .byte 58 # DW_AT_decl_file + .byte 11 # DW_FORM_data1 + .byte 59 # DW_AT_decl_line + .byte 11 # DW_FORM_data1 + .byte 24 # DW_AT_import + .byte 19 # DW_FORM_ref4 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 20 # Abbreviation Code + .byte 22 # DW_TAG_typedef + .byte 0 # DW_CHILDREN_no + .byte 73 # DW_AT_type + .byte 19 # DW_FORM_ref4 + .byte 3 # DW_AT_name + .byte 14 # DW_FORM_strp + .byte 58 # DW_AT_decl_file + .byte 11 # DW_FORM_data1 + .byte 59 # DW_AT_decl_line + .byte 5 # DW_FORM_data2 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 21 # Abbreviation Code + .byte 22 # DW_TAG_typedef + .byte 0 # DW_CHILDREN_no + .byte 73 # DW_AT_type + .byte 19 # DW_FORM_ref4 + .byte 3 # DW_AT_name + .byte 14 # DW_FORM_strp + .byte 58 # DW_AT_decl_file + .byte 11 # DW_FORM_data1 + .byte 59 # DW_AT_decl_line + .byte 11 # DW_FORM_data1 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 22 # Abbreviation Code + .byte 5 # DW_TAG_formal_parameter + .byte 0 # DW_CHILDREN_no + .byte 2 # DW_AT_location + .byte 24 # DW_FORM_exprloc + .byte 58 # DW_AT_decl_file + .byte 11 # DW_FORM_data1 + .byte 59 # DW_AT_decl_line + .byte 11 # DW_FORM_data1 + .byte 73 # DW_AT_type + .byte 19 # DW_FORM_ref4 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 23 # Abbreviation Code + .byte 46 # DW_TAG_subprogram + .byte 1 # DW_CHILDREN_yes + .byte 17 # DW_AT_low_pc + .byte 1 # DW_FORM_addr + .byte 18 # DW_AT_high_pc + .byte 6 # DW_FORM_data4 + .byte 64 # DW_AT_frame_base + .byte 24 # DW_FORM_exprloc + .byte 3 # DW_AT_name + .byte 14 # DW_FORM_strp + .byte 58 # DW_AT_decl_file + .byte 11 # DW_FORM_data1 + .byte 59 # DW_AT_decl_line + .byte 11 # DW_FORM_data1 + .byte 73 # DW_AT_type + .byte 19 # DW_FORM_ref4 + .byte 63 # DW_AT_external + .byte 25 # DW_FORM_flag_present + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 24 # Abbreviation Code + .byte 52 # DW_TAG_variable + .byte 0 # DW_CHILDREN_no + .byte 2 # DW_AT_location + .byte 24 # DW_FORM_exprloc + .byte 3 # DW_AT_name + .byte 14 # DW_FORM_strp + .byte 58 # DW_AT_decl_file + .byte 11 # DW_FORM_data1 + .byte 59 # DW_AT_decl_line + .byte 11 # DW_FORM_data1 + .byte 73 # DW_AT_type + .byte 19 # DW_FORM_ref4 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 25 # Abbreviation Code + .byte 52 # DW_TAG_variable + .byte 0 # DW_CHILDREN_no + .byte 2 # DW_AT_location + .byte 24 # DW_FORM_exprloc + .byte 3 # DW_AT_name + .byte 14 # DW_FORM_strp + .byte 58 # DW_AT_decl_file + .byte 11 # DW_FORM_data1 + .byte 59 # DW_AT_decl_line + .byte 5 # DW_FORM_data2 + .byte 73 # DW_AT_type + .byte 19 # DW_FORM_ref4 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 26 # Abbreviation Code + .byte 58 # DW_TAG_imported_module + .byte 0 # DW_CHILDREN_no + .byte 58 # DW_AT_decl_file + .byte 11 # DW_FORM_data1 + .byte 59 # DW_AT_decl_line + .byte 5 # DW_FORM_data2 + .byte 24 # DW_AT_import + .byte 19 # DW_FORM_ref4 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 27 # Abbreviation Code + .byte 2 # DW_TAG_class_type + .byte 0 # DW_CHILDREN_no + .byte 54 # DW_AT_calling_convention + .byte 11 # DW_FORM_data1 + .byte 11 # DW_AT_byte_size + .byte 11 # DW_FORM_data1 + .byte 58 # DW_AT_decl_file + .byte 11 # DW_FORM_data1 + .byte 59 # DW_AT_decl_line + .byte 11 # DW_FORM_data1 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 28 # Abbreviation Code + .byte 19 # DW_TAG_structure_type + .byte 0 # DW_CHILDREN_no + .byte 54 # DW_AT_calling_convention + .byte 11 # DW_FORM_data1 + .byte 11 # DW_AT_byte_size + .byte 11 # DW_FORM_data1 + .byte 58 # DW_AT_decl_file + .byte 11 # DW_FORM_data1 + .byte 59 # DW_AT_decl_line + .byte 11 # DW_FORM_data1 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 29 # Abbreviation Code + .ascii "\207\202\001" # DW_TAG_GNU_template_parameter_pack + .byte 1 # DW_CHILDREN_yes + .byte 3 # DW_AT_name + .byte 14 # DW_FORM_strp + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 30 # Abbreviation Code + .byte 47 # DW_TAG_template_type_parameter + .byte 0 # DW_CHILDREN_no + .byte 73 # DW_AT_type + .byte 19 # DW_FORM_ref4 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 31 # Abbreviation Code + .byte 47 # DW_TAG_template_type_parameter + .byte 0 # DW_CHILDREN_no + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 32 # Abbreviation Code + .byte 48 # DW_TAG_template_value_parameter + .byte 0 # DW_CHILDREN_no + .byte 73 # DW_AT_type + .byte 19 # DW_FORM_ref4 + .byte 3 # DW_AT_name + .byte 14 # DW_FORM_strp + .byte 28 # DW_AT_const_value + .byte 13 # DW_FORM_sdata + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 33 # Abbreviation Code + .byte 48 # DW_TAG_template_value_parameter + .byte 0 # DW_CHILDREN_no + .byte 73 # DW_AT_type + .byte 19 # DW_FORM_ref4 + .byte 28 # DW_AT_const_value + .byte 15 # DW_FORM_udata + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 34 # Abbreviation Code + .byte 48 # DW_TAG_template_value_parameter + .byte 0 # DW_CHILDREN_no + .byte 73 # DW_AT_type + .byte 19 # DW_FORM_ref4 + .byte 28 # DW_AT_const_value + .byte 13 # DW_FORM_sdata + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 35 # Abbreviation Code + .byte 46 # DW_TAG_subprogram + .byte 1 # DW_CHILDREN_yes + .byte 17 # DW_AT_low_pc + .byte 1 # DW_FORM_addr + .byte 18 # DW_AT_high_pc + .byte 6 # DW_FORM_data4 + .byte 64 # DW_AT_frame_base + .byte 24 # DW_FORM_exprloc + .byte 110 # DW_AT_linkage_name + .byte 14 # DW_FORM_strp + .byte 3 # DW_AT_name + .byte 14 # DW_FORM_strp + .byte 58 # DW_AT_decl_file + .byte 11 # DW_FORM_data1 + .byte 59 # DW_AT_decl_line + .byte 11 # DW_FORM_data1 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 36 # Abbreviation Code + .byte 48 # DW_TAG_template_value_parameter + .byte 0 # DW_CHILDREN_no + .byte 73 # DW_AT_type + .byte 19 # DW_FORM_ref4 + .byte 2 # DW_AT_location + .byte 24 # DW_FORM_exprloc + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 37 # Abbreviation Code + .byte 48 # DW_TAG_template_value_parameter + .byte 0 # DW_CHILDREN_no + .byte 73 # DW_AT_type + .byte 19 # DW_FORM_ref4 + .byte 28 # DW_AT_const_value + .byte 10 # DW_FORM_block1 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 38 # Abbreviation Code + .ascii "\207\202\001" # DW_TAG_GNU_template_parameter_pack + .byte 0 # DW_CHILDREN_no + .byte 3 # DW_AT_name + .byte 14 # DW_FORM_strp + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 39 # Abbreviation Code + .byte 46 # DW_TAG_subprogram + .byte 1 # DW_CHILDREN_yes + .byte 110 # DW_AT_linkage_name + .byte 14 # DW_FORM_strp + .byte 3 # DW_AT_name + .byte 14 # DW_FORM_strp + .byte 58 # DW_AT_decl_file + .byte 11 # DW_FORM_data1 + .byte 59 # DW_AT_decl_line + .byte 11 # DW_FORM_data1 + .byte 60 # DW_AT_declaration + .byte 25 # DW_FORM_flag_present + .byte 63 # DW_AT_external + .byte 25 # DW_FORM_flag_present + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 40 # Abbreviation Code + .byte 5 # DW_TAG_formal_parameter + .byte 0 # DW_CHILDREN_no + .byte 73 # DW_AT_type + .byte 19 # DW_FORM_ref4 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 41 # Abbreviation Code + .byte 46 # DW_TAG_subprogram + .byte 1 # DW_CHILDREN_yes + .byte 110 # DW_AT_linkage_name + .byte 14 # DW_FORM_strp + .byte 3 # DW_AT_name + .byte 14 # DW_FORM_strp + .byte 58 # DW_AT_decl_file + .byte 11 # DW_FORM_data1 + .byte 59 # DW_AT_decl_line + .byte 11 # DW_FORM_data1 + .byte 73 # DW_AT_type + .byte 19 # DW_FORM_ref4 + .byte 60 # DW_AT_declaration + .byte 25 # DW_FORM_flag_present + .byte 63 # DW_AT_external + .byte 25 # DW_FORM_flag_present + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 42 # Abbreviation Code + .byte 15 # DW_TAG_pointer_type + .byte 0 # DW_CHILDREN_no + .byte 73 # DW_AT_type + .byte 19 # DW_FORM_ref4 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 43 # Abbreviation Code + .byte 46 # DW_TAG_subprogram + .byte 1 # DW_CHILDREN_yes + .byte 17 # DW_AT_low_pc + .byte 1 # DW_FORM_addr + .byte 18 # DW_AT_high_pc + .byte 6 # DW_FORM_data4 + .byte 64 # DW_AT_frame_base + .byte 24 # DW_FORM_exprloc + .byte 100 # DW_AT_object_pointer + .byte 19 # DW_FORM_ref4 + .byte 71 # DW_AT_specification + .byte 19 # DW_FORM_ref4 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 44 # Abbreviation Code + .byte 5 # DW_TAG_formal_parameter + .byte 0 # DW_CHILDREN_no + .byte 2 # DW_AT_location + .byte 24 # DW_FORM_exprloc + .byte 3 # DW_AT_name + .byte 14 # DW_FORM_strp + .byte 73 # DW_AT_type + .byte 19 # DW_FORM_ref4 + .byte 52 # DW_AT_artificial + .byte 25 # DW_FORM_flag_present + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 45 # Abbreviation Code + .byte 15 # DW_TAG_pointer_type + .byte 0 # DW_CHILDREN_no + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 46 # Abbreviation Code + .byte 46 # DW_TAG_subprogram + .byte 1 # DW_CHILDREN_yes + .byte 17 # DW_AT_low_pc + .byte 1 # DW_FORM_addr + .byte 18 # DW_AT_high_pc + .byte 6 # DW_FORM_data4 + .byte 64 # DW_AT_frame_base + .byte 24 # DW_FORM_exprloc + .byte 71 # DW_AT_specification + .byte 19 # DW_FORM_ref4 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 47 # Abbreviation Code + .byte 46 # DW_TAG_subprogram + .byte 1 # DW_CHILDREN_yes + .byte 17 # DW_AT_low_pc + .byte 1 # DW_FORM_addr + .byte 18 # DW_AT_high_pc + .byte 6 # DW_FORM_data4 + .byte 64 # DW_AT_frame_base + .byte 24 # DW_FORM_exprloc + .byte 100 # DW_AT_object_pointer + .byte 19 # DW_FORM_ref4 + .byte 110 # DW_AT_linkage_name + .byte 14 # DW_FORM_strp + .byte 71 # DW_AT_specification + .byte 19 # DW_FORM_ref4 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 48 # Abbreviation Code + .byte 46 # DW_TAG_subprogram + .byte 1 # DW_CHILDREN_yes + .byte 17 # DW_AT_low_pc + .byte 1 # DW_FORM_addr + .byte 18 # DW_AT_high_pc + .byte 6 # DW_FORM_data4 + .byte 64 # DW_AT_frame_base + .byte 24 # DW_FORM_exprloc + .byte 110 # DW_AT_linkage_name + .byte 14 # DW_FORM_strp + .byte 3 # DW_AT_name + .byte 14 # DW_FORM_strp + .byte 58 # DW_AT_decl_file + .byte 11 # DW_FORM_data1 + .byte 59 # DW_AT_decl_line + .byte 11 # DW_FORM_data1 + .byte 73 # DW_AT_type + .byte 19 # DW_FORM_ref4 + .byte 63 # DW_AT_external + .byte 25 # DW_FORM_flag_present + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 49 # Abbreviation Code + .byte 46 # DW_TAG_subprogram + .byte 1 # DW_CHILDREN_yes + .byte 17 # DW_AT_low_pc + .byte 1 # DW_FORM_addr + .byte 18 # DW_AT_high_pc + .byte 6 # DW_FORM_data4 + .byte 64 # DW_AT_frame_base + .byte 24 # DW_FORM_exprloc + .byte 100 # DW_AT_object_pointer + .byte 19 # DW_FORM_ref4 + .byte 59 # DW_AT_decl_line + .byte 5 # DW_FORM_data2 + .byte 71 # DW_AT_specification + .byte 19 # DW_FORM_ref4 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 50 # Abbreviation Code + .byte 16 # DW_TAG_reference_type + .byte 0 # DW_CHILDREN_no + .byte 73 # DW_AT_type + .byte 19 # DW_FORM_ref4 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 51 # Abbreviation Code + .byte 66 # DW_TAG_rvalue_reference_type + .byte 0 # DW_CHILDREN_no + .byte 73 # DW_AT_type + .byte 19 # DW_FORM_ref4 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 52 # Abbreviation Code + .byte 38 # DW_TAG_const_type + .byte 0 # DW_CHILDREN_no + .byte 73 # DW_AT_type + .byte 19 # DW_FORM_ref4 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 53 # Abbreviation Code + .byte 19 # DW_TAG_structure_type + .byte 1 # DW_CHILDREN_yes + .byte 3 # DW_AT_name + .byte 14 # DW_FORM_strp + .byte 60 # DW_AT_declaration + .byte 25 # DW_FORM_flag_present + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 54 # Abbreviation Code + .byte 21 # DW_TAG_subroutine_type + .byte 1 # DW_CHILDREN_yes + .byte 73 # DW_AT_type + .byte 19 # DW_FORM_ref4 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 55 # Abbreviation Code + .byte 57 # DW_TAG_namespace + .byte 1 # DW_CHILDREN_yes + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 56 # Abbreviation Code + .byte 59 # DW_TAG_unspecified_type + .byte 0 # DW_CHILDREN_no + .byte 3 # DW_AT_name + .byte 14 # DW_FORM_strp + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 57 # Abbreviation Code + .byte 38 # DW_TAG_const_type + .byte 0 # DW_CHILDREN_no + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 58 # Abbreviation Code + .byte 21 # DW_TAG_subroutine_type + .byte 0 # DW_CHILDREN_no + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 59 # Abbreviation Code + .byte 1 # DW_TAG_array_type + .byte 1 # DW_CHILDREN_yes + .byte 73 # DW_AT_type + .byte 19 # DW_FORM_ref4 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 60 # Abbreviation Code + .byte 33 # DW_TAG_subrange_type + .byte 0 # DW_CHILDREN_no + .byte 73 # DW_AT_type + .byte 19 # DW_FORM_ref4 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 61 # Abbreviation Code + .byte 36 # DW_TAG_base_type + .byte 0 # DW_CHILDREN_no + .byte 3 # DW_AT_name + .byte 14 # DW_FORM_strp + .byte 11 # DW_AT_byte_size + .byte 11 # DW_FORM_data1 + .byte 62 # DW_AT_encoding + .byte 11 # DW_FORM_data1 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 62 # Abbreviation Code + .byte 33 # DW_TAG_subrange_type + .byte 0 # DW_CHILDREN_no + .byte 73 # DW_AT_type + .byte 19 # DW_FORM_ref4 + .byte 55 # DW_AT_count + .byte 11 # DW_FORM_data1 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 63 # Abbreviation Code + .byte 53 # DW_TAG_volatile_type + .byte 0 # DW_CHILDREN_no + .byte 73 # DW_AT_type + .byte 19 # DW_FORM_ref4 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 64 # Abbreviation Code + .byte 1 # DW_TAG_array_type + .byte 1 # DW_CHILDREN_yes + .ascii "\207B" # DW_AT_GNU_vector + .byte 25 # DW_FORM_flag_present + .byte 73 # DW_AT_type + .byte 19 # DW_FORM_ref4 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 65 # Abbreviation Code + .byte 53 # DW_TAG_volatile_type + .byte 0 # DW_CHILDREN_no + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 66 # Abbreviation Code + .byte 31 # DW_TAG_ptr_to_member_type + .byte 0 # DW_CHILDREN_no + .byte 73 # DW_AT_type + .byte 19 # DW_FORM_ref4 + .byte 29 # DW_AT_containing_type + .byte 19 # DW_FORM_ref4 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 67 # Abbreviation Code + .byte 21 # DW_TAG_subroutine_type + .byte 1 # DW_CHILDREN_yes + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 68 # Abbreviation Code + .byte 21 # DW_TAG_subroutine_type + .byte 1 # DW_CHILDREN_yes + .byte 119 # DW_AT_reference + .byte 25 # DW_FORM_flag_present + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 69 # Abbreviation Code + .byte 21 # DW_TAG_subroutine_type + .byte 1 # DW_CHILDREN_yes + .byte 120 # DW_AT_rvalue_reference + .byte 25 # DW_FORM_flag_present + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 70 # Abbreviation Code + .byte 21 # DW_TAG_subroutine_type + .byte 0 # DW_CHILDREN_no + .byte 119 # DW_AT_reference + .byte 25 # DW_FORM_flag_present + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 71 # Abbreviation Code + .byte 21 # DW_TAG_subroutine_type + .byte 0 # DW_CHILDREN_no + .byte 120 # DW_AT_rvalue_reference + .byte 25 # DW_FORM_flag_present + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 0 # EOM(3) + .section .debug_info,"",@progbits +.Lcu_begin0: + .long .Ldebug_info_end0-.Ldebug_info_start0 # Length of Unit +.Ldebug_info_start0: + .short 4 # DWARF version number + .long .debug_abbrev # Offset Into Abbrev. Section + .byte 8 # Address Size (in bytes) + .byte 1 # Abbrev [1] 0xb:0x3300 DW_TAG_compile_unit + .long .Linfo_string0 # DW_AT_producer + .short 33 # DW_AT_language + .long .Linfo_string1 # DW_AT_name + .long .Lline_table_start0 # DW_AT_stmt_list + .long .Linfo_string2 # DW_AT_comp_dir + .quad 0 # DW_AT_low_pc + .long .Ldebug_ranges0 # DW_AT_ranges + .byte 2 # Abbrev [2] 0x2a:0x15 DW_TAG_variable + .long .Linfo_string3 # DW_AT_name + .long 63 # DW_AT_type + # DW_AT_external + .byte 1 # DW_AT_decl_file + .byte 47 # DW_AT_decl_line + .byte 9 # DW_AT_location + .byte 3 + .quad i + .byte 3 # Abbrev [3] 0x3f:0x7 DW_TAG_base_type + .long .Linfo_string4 # DW_AT_name + .byte 5 # DW_AT_encoding + .byte 4 # DW_AT_byte_size + .byte 4 # Abbrev [4] 0x46:0xa6 DW_TAG_namespace + .long .Linfo_string5 # DW_AT_name + .byte 5 # Abbrev [5] 0x4b:0x1f DW_TAG_enumeration_type + .long 236 # DW_AT_type + .long .Linfo_string10 # DW_AT_name + .byte 4 # DW_AT_byte_size + .byte 1 # DW_AT_decl_file + .byte 20 # DW_AT_decl_line + .byte 6 # Abbrev [6] 0x57:0x6 DW_TAG_enumerator + .long .Linfo_string7 # DW_AT_name + .byte 0 # DW_AT_const_value + .byte 6 # Abbrev [6] 0x5d:0x6 DW_TAG_enumerator + .long .Linfo_string8 # DW_AT_name + .byte 1 # DW_AT_const_value + .byte 6 # Abbrev [6] 0x63:0x6 DW_TAG_enumerator + .long .Linfo_string9 # DW_AT_name + .byte 1 # DW_AT_const_value + .byte 0 # End Of Children Mark + .byte 7 # Abbrev [7] 0x6a:0x1f DW_TAG_enumeration_type + .long 63 # DW_AT_type + # DW_AT_enum_class + .long .Linfo_string11 # DW_AT_name + .byte 4 # DW_AT_byte_size + .byte 1 # DW_AT_decl_file + .byte 21 # DW_AT_decl_line + .byte 8 # Abbrev [8] 0x76:0x6 DW_TAG_enumerator + .long .Linfo_string7 # DW_AT_name + .byte 0 # DW_AT_const_value + .byte 8 # Abbrev [8] 0x7c:0x6 DW_TAG_enumerator + .long .Linfo_string8 # DW_AT_name + .byte 1 # DW_AT_const_value + .byte 8 # Abbrev [8] 0x82:0x6 DW_TAG_enumerator + .long .Linfo_string9 # DW_AT_name + .byte 1 # DW_AT_const_value + .byte 0 # End Of Children Mark + .byte 5 # Abbrev [5] 0x89:0x14 DW_TAG_enumeration_type + .long 243 # DW_AT_type + .long .Linfo_string14 # DW_AT_name + .byte 1 # DW_AT_byte_size + .byte 1 # DW_AT_decl_file + .byte 23 # DW_AT_decl_line + .byte 6 # Abbrev [6] 0x95:0x7 DW_TAG_enumerator + .long .Linfo_string13 # DW_AT_name + .ascii "\377\001" # DW_AT_const_value + .byte 0 # End Of Children Mark + .byte 9 # Abbrev [9] 0x9d:0x1b DW_TAG_enumeration_type + .long 236 # DW_AT_type + .byte 4 # DW_AT_byte_size + .byte 1 # DW_AT_decl_file + .byte 22 # DW_AT_decl_line + .byte 6 # Abbrev [6] 0xa5:0x6 DW_TAG_enumerator + .long .Linfo_string15 # DW_AT_name + .byte 0 # DW_AT_const_value + .byte 6 # Abbrev [6] 0xab:0x6 DW_TAG_enumerator + .long .Linfo_string16 # DW_AT_name + .byte 1 # DW_AT_const_value + .byte 6 # Abbrev [6] 0xb1:0x6 DW_TAG_enumerator + .long .Linfo_string17 # DW_AT_name + .byte 1 # DW_AT_const_value + .byte 0 # End Of Children Mark + .byte 10 # Abbrev [10] 0xb8:0x23 DW_TAG_subprogram + .quad .Lfunc_begin95 # DW_AT_low_pc + .long .Lfunc_end95-.Lfunc_begin95 # DW_AT_high_pc + .byte 1 # DW_AT_frame_base + .byte 86 + .long .Linfo_string294 # DW_AT_linkage_name + .long .Linfo_string295 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 19 # DW_AT_decl_line + # DW_AT_external + .byte 11 # Abbrev [11] 0xd1:0x9 DW_TAG_GNU_template_template_param + .long .Linfo_string18 # DW_AT_name + .long .Linfo_string293 # DW_AT_GNU_template_name + .byte 0 # End Of Children Mark + .byte 12 # Abbrev [12] 0xdb:0x5 DW_TAG_structure_type + .long .Linfo_string156 # DW_AT_name + # DW_AT_declaration + .byte 4 # Abbrev [4] 0xe0:0xb DW_TAG_namespace + .long .Linfo_string163 # DW_AT_name + .byte 12 # Abbrev [12] 0xe5:0x5 DW_TAG_structure_type + .long .Linfo_string156 # DW_AT_name + # DW_AT_declaration + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 3 # Abbrev [3] 0xec:0x7 DW_TAG_base_type + .long .Linfo_string6 # DW_AT_name + .byte 7 # DW_AT_encoding + .byte 4 # DW_AT_byte_size + .byte 3 # Abbrev [3] 0xf3:0x7 DW_TAG_base_type + .long .Linfo_string12 # DW_AT_name + .byte 8 # DW_AT_encoding + .byte 1 # DW_AT_byte_size + .byte 13 # Abbrev [13] 0xfa:0x1d DW_TAG_structure_type + .byte 5 # DW_AT_calling_convention + .long .Linfo_string21 # DW_AT_name + .byte 1 # DW_AT_byte_size + .byte 1 # DW_AT_decl_file + .byte 41 # DW_AT_decl_line + .byte 14 # Abbrev [14] 0x103:0x9 DW_TAG_template_type_parameter + .long 63 # DW_AT_type + .long .Linfo_string18 # DW_AT_name + .byte 15 # Abbrev [15] 0x10c:0xa DW_TAG_template_value_parameter + .long 279 # DW_AT_type + .long .Linfo_string20 # DW_AT_name + .byte 0 # DW_AT_const_value + .byte 0 # End Of Children Mark + .byte 3 # Abbrev [3] 0x117:0x7 DW_TAG_base_type + .long .Linfo_string19 # DW_AT_name + .byte 2 # DW_AT_encoding + .byte 1 # DW_AT_byte_size + .byte 13 # Abbrev [13] 0x11e:0x1c DW_TAG_structure_type + .byte 5 # DW_AT_calling_convention + .long .Linfo_string22 # DW_AT_name + .byte 1 # DW_AT_byte_size + .byte 1 # DW_AT_decl_file + .byte 157 # DW_AT_decl_line + .byte 16 # Abbrev [16] 0x127:0x12 DW_TAG_subprogram + .long .Linfo_string128 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 159 # DW_AT_decl_line + # DW_AT_declaration + # DW_AT_external + .byte 17 # Abbrev [17] 0x12e:0x5 DW_TAG_template_type_parameter + .long .Linfo_string18 # DW_AT_name + .byte 18 # Abbrev [18] 0x133:0x5 DW_TAG_formal_parameter + .long 7960 # DW_AT_type + # DW_AT_artificial + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 4 # Abbrev [4] 0x13a:0xd6 DW_TAG_namespace + .long .Linfo_string23 # DW_AT_name + .byte 19 # Abbrev [19] 0x13f:0x7 DW_TAG_imported_declaration + .byte 4 # DW_AT_decl_file + .byte 47 # DW_AT_decl_line + .long 528 # DW_AT_import + .byte 19 # Abbrev [19] 0x146:0x7 DW_TAG_imported_declaration + .byte 4 # DW_AT_decl_file + .byte 48 # DW_AT_decl_line + .long 557 # DW_AT_import + .byte 19 # Abbrev [19] 0x14d:0x7 DW_TAG_imported_declaration + .byte 4 # DW_AT_decl_file + .byte 49 # DW_AT_decl_line + .long 586 # DW_AT_import + .byte 19 # Abbrev [19] 0x154:0x7 DW_TAG_imported_declaration + .byte 4 # DW_AT_decl_file + .byte 50 # DW_AT_decl_line + .long 608 # DW_AT_import + .byte 19 # Abbrev [19] 0x15b:0x7 DW_TAG_imported_declaration + .byte 4 # DW_AT_decl_file + .byte 52 # DW_AT_decl_line + .long 637 # DW_AT_import + .byte 19 # Abbrev [19] 0x162:0x7 DW_TAG_imported_declaration + .byte 4 # DW_AT_decl_file + .byte 53 # DW_AT_decl_line + .long 648 # DW_AT_import + .byte 19 # Abbrev [19] 0x169:0x7 DW_TAG_imported_declaration + .byte 4 # DW_AT_decl_file + .byte 54 # DW_AT_decl_line + .long 659 # DW_AT_import + .byte 19 # Abbrev [19] 0x170:0x7 DW_TAG_imported_declaration + .byte 4 # DW_AT_decl_file + .byte 55 # DW_AT_decl_line + .long 670 # DW_AT_import + .byte 19 # Abbrev [19] 0x177:0x7 DW_TAG_imported_declaration + .byte 4 # DW_AT_decl_file + .byte 57 # DW_AT_decl_line + .long 681 # DW_AT_import + .byte 19 # Abbrev [19] 0x17e:0x7 DW_TAG_imported_declaration + .byte 4 # DW_AT_decl_file + .byte 58 # DW_AT_decl_line + .long 703 # DW_AT_import + .byte 19 # Abbrev [19] 0x185:0x7 DW_TAG_imported_declaration + .byte 4 # DW_AT_decl_file + .byte 59 # DW_AT_decl_line + .long 725 # DW_AT_import + .byte 19 # Abbrev [19] 0x18c:0x7 DW_TAG_imported_declaration + .byte 4 # DW_AT_decl_file + .byte 60 # DW_AT_decl_line + .long 747 # DW_AT_import + .byte 19 # Abbrev [19] 0x193:0x7 DW_TAG_imported_declaration + .byte 4 # DW_AT_decl_file + .byte 62 # DW_AT_decl_line + .long 769 # DW_AT_import + .byte 19 # Abbrev [19] 0x19a:0x7 DW_TAG_imported_declaration + .byte 4 # DW_AT_decl_file + .byte 63 # DW_AT_decl_line + .long 791 # DW_AT_import + .byte 19 # Abbrev [19] 0x1a1:0x7 DW_TAG_imported_declaration + .byte 4 # DW_AT_decl_file + .byte 65 # DW_AT_decl_line + .long 802 # DW_AT_import + .byte 19 # Abbrev [19] 0x1a8:0x7 DW_TAG_imported_declaration + .byte 4 # DW_AT_decl_file + .byte 66 # DW_AT_decl_line + .long 824 # DW_AT_import + .byte 19 # Abbrev [19] 0x1af:0x7 DW_TAG_imported_declaration + .byte 4 # DW_AT_decl_file + .byte 67 # DW_AT_decl_line + .long 853 # DW_AT_import + .byte 19 # Abbrev [19] 0x1b6:0x7 DW_TAG_imported_declaration + .byte 4 # DW_AT_decl_file + .byte 68 # DW_AT_decl_line + .long 875 # DW_AT_import + .byte 19 # Abbrev [19] 0x1bd:0x7 DW_TAG_imported_declaration + .byte 4 # DW_AT_decl_file + .byte 70 # DW_AT_decl_line + .long 904 # DW_AT_import + .byte 19 # Abbrev [19] 0x1c4:0x7 DW_TAG_imported_declaration + .byte 4 # DW_AT_decl_file + .byte 71 # DW_AT_decl_line + .long 915 # DW_AT_import + .byte 19 # Abbrev [19] 0x1cb:0x7 DW_TAG_imported_declaration + .byte 4 # DW_AT_decl_file + .byte 72 # DW_AT_decl_line + .long 926 # DW_AT_import + .byte 19 # Abbrev [19] 0x1d2:0x7 DW_TAG_imported_declaration + .byte 4 # DW_AT_decl_file + .byte 73 # DW_AT_decl_line + .long 937 # DW_AT_import + .byte 19 # Abbrev [19] 0x1d9:0x7 DW_TAG_imported_declaration + .byte 4 # DW_AT_decl_file + .byte 75 # DW_AT_decl_line + .long 948 # DW_AT_import + .byte 19 # Abbrev [19] 0x1e0:0x7 DW_TAG_imported_declaration + .byte 4 # DW_AT_decl_file + .byte 76 # DW_AT_decl_line + .long 970 # DW_AT_import + .byte 19 # Abbrev [19] 0x1e7:0x7 DW_TAG_imported_declaration + .byte 4 # DW_AT_decl_file + .byte 77 # DW_AT_decl_line + .long 992 # DW_AT_import + .byte 19 # Abbrev [19] 0x1ee:0x7 DW_TAG_imported_declaration + .byte 4 # DW_AT_decl_file + .byte 78 # DW_AT_decl_line + .long 1014 # DW_AT_import + .byte 19 # Abbrev [19] 0x1f5:0x7 DW_TAG_imported_declaration + .byte 4 # DW_AT_decl_file + .byte 80 # DW_AT_decl_line + .long 1036 # DW_AT_import + .byte 19 # Abbrev [19] 0x1fc:0x7 DW_TAG_imported_declaration + .byte 4 # DW_AT_decl_file + .byte 81 # DW_AT_decl_line + .long 1058 # DW_AT_import + .byte 20 # Abbrev [20] 0x203:0xc DW_TAG_typedef + .long 897 # DW_AT_type + .long .Linfo_string119 # DW_AT_name + .byte 7 # DW_AT_decl_file + .short 260 # DW_AT_decl_line + .byte 0 # End Of Children Mark + .byte 21 # Abbrev [21] 0x210:0xb DW_TAG_typedef + .long 539 # DW_AT_type + .long .Linfo_string26 # DW_AT_name + .byte 3 # DW_AT_decl_file + .byte 24 # DW_AT_decl_line + .byte 21 # Abbrev [21] 0x21b:0xb DW_TAG_typedef + .long 550 # DW_AT_type + .long .Linfo_string25 # DW_AT_name + .byte 2 # DW_AT_decl_file + .byte 37 # DW_AT_decl_line + .byte 3 # Abbrev [3] 0x226:0x7 DW_TAG_base_type + .long .Linfo_string24 # DW_AT_name + .byte 6 # DW_AT_encoding + .byte 1 # DW_AT_byte_size + .byte 21 # Abbrev [21] 0x22d:0xb DW_TAG_typedef + .long 568 # DW_AT_type + .long .Linfo_string29 # DW_AT_name + .byte 3 # DW_AT_decl_file + .byte 25 # DW_AT_decl_line + .byte 21 # Abbrev [21] 0x238:0xb DW_TAG_typedef + .long 579 # DW_AT_type + .long .Linfo_string28 # DW_AT_name + .byte 2 # DW_AT_decl_file + .byte 39 # DW_AT_decl_line + .byte 3 # Abbrev [3] 0x243:0x7 DW_TAG_base_type + .long .Linfo_string27 # DW_AT_name + .byte 5 # DW_AT_encoding + .byte 2 # DW_AT_byte_size + .byte 21 # Abbrev [21] 0x24a:0xb DW_TAG_typedef + .long 597 # DW_AT_type + .long .Linfo_string31 # DW_AT_name + .byte 3 # DW_AT_decl_file + .byte 26 # DW_AT_decl_line + .byte 21 # Abbrev [21] 0x255:0xb DW_TAG_typedef + .long 63 # DW_AT_type + .long .Linfo_string30 # DW_AT_name + .byte 2 # DW_AT_decl_file + .byte 41 # DW_AT_decl_line + .byte 21 # Abbrev [21] 0x260:0xb DW_TAG_typedef + .long 619 # DW_AT_type + .long .Linfo_string34 # DW_AT_name + .byte 3 # DW_AT_decl_file + .byte 27 # DW_AT_decl_line + .byte 21 # Abbrev [21] 0x26b:0xb DW_TAG_typedef + .long 630 # DW_AT_type + .long .Linfo_string33 # DW_AT_name + .byte 2 # DW_AT_decl_file + .byte 44 # DW_AT_decl_line + .byte 3 # Abbrev [3] 0x276:0x7 DW_TAG_base_type + .long .Linfo_string32 # DW_AT_name + .byte 5 # DW_AT_encoding + .byte 8 # DW_AT_byte_size + .byte 21 # Abbrev [21] 0x27d:0xb DW_TAG_typedef + .long 550 # DW_AT_type + .long .Linfo_string35 # DW_AT_name + .byte 5 # DW_AT_decl_file + .byte 58 # DW_AT_decl_line + .byte 21 # Abbrev [21] 0x288:0xb DW_TAG_typedef + .long 630 # DW_AT_type + .long .Linfo_string36 # DW_AT_name + .byte 5 # DW_AT_decl_file + .byte 60 # DW_AT_decl_line + .byte 21 # Abbrev [21] 0x293:0xb DW_TAG_typedef + .long 630 # DW_AT_type + .long .Linfo_string37 # DW_AT_name + .byte 5 # DW_AT_decl_file + .byte 61 # DW_AT_decl_line + .byte 21 # Abbrev [21] 0x29e:0xb DW_TAG_typedef + .long 630 # DW_AT_type + .long .Linfo_string38 # DW_AT_name + .byte 5 # DW_AT_decl_file + .byte 62 # DW_AT_decl_line + .byte 21 # Abbrev [21] 0x2a9:0xb DW_TAG_typedef + .long 692 # DW_AT_type + .long .Linfo_string40 # DW_AT_name + .byte 5 # DW_AT_decl_file + .byte 43 # DW_AT_decl_line + .byte 21 # Abbrev [21] 0x2b4:0xb DW_TAG_typedef + .long 539 # DW_AT_type + .long .Linfo_string39 # DW_AT_name + .byte 2 # DW_AT_decl_file + .byte 52 # DW_AT_decl_line + .byte 21 # Abbrev [21] 0x2bf:0xb DW_TAG_typedef + .long 714 # DW_AT_type + .long .Linfo_string42 # DW_AT_name + .byte 5 # DW_AT_decl_file + .byte 44 # DW_AT_decl_line + .byte 21 # Abbrev [21] 0x2ca:0xb DW_TAG_typedef + .long 568 # DW_AT_type + .long .Linfo_string41 # DW_AT_name + .byte 2 # DW_AT_decl_file + .byte 54 # DW_AT_decl_line + .byte 21 # Abbrev [21] 0x2d5:0xb DW_TAG_typedef + .long 736 # DW_AT_type + .long .Linfo_string44 # DW_AT_name + .byte 5 # DW_AT_decl_file + .byte 45 # DW_AT_decl_line + .byte 21 # Abbrev [21] 0x2e0:0xb DW_TAG_typedef + .long 597 # DW_AT_type + .long .Linfo_string43 # DW_AT_name + .byte 2 # DW_AT_decl_file + .byte 56 # DW_AT_decl_line + .byte 21 # Abbrev [21] 0x2eb:0xb DW_TAG_typedef + .long 758 # DW_AT_type + .long .Linfo_string46 # DW_AT_name + .byte 5 # DW_AT_decl_file + .byte 46 # DW_AT_decl_line + .byte 21 # Abbrev [21] 0x2f6:0xb DW_TAG_typedef + .long 619 # DW_AT_type + .long .Linfo_string45 # DW_AT_name + .byte 2 # DW_AT_decl_file + .byte 58 # DW_AT_decl_line + .byte 21 # Abbrev [21] 0x301:0xb DW_TAG_typedef + .long 780 # DW_AT_type + .long .Linfo_string48 # DW_AT_name + .byte 5 # DW_AT_decl_file + .byte 101 # DW_AT_decl_line + .byte 21 # Abbrev [21] 0x30c:0xb DW_TAG_typedef + .long 630 # DW_AT_type + .long .Linfo_string47 # DW_AT_name + .byte 2 # DW_AT_decl_file + .byte 72 # DW_AT_decl_line + .byte 21 # Abbrev [21] 0x317:0xb DW_TAG_typedef + .long 630 # DW_AT_type + .long .Linfo_string49 # DW_AT_name + .byte 5 # DW_AT_decl_file + .byte 87 # DW_AT_decl_line + .byte 21 # Abbrev [21] 0x322:0xb DW_TAG_typedef + .long 813 # DW_AT_type + .long .Linfo_string51 # DW_AT_name + .byte 6 # DW_AT_decl_file + .byte 24 # DW_AT_decl_line + .byte 21 # Abbrev [21] 0x32d:0xb DW_TAG_typedef + .long 243 # DW_AT_type + .long .Linfo_string50 # DW_AT_name + .byte 2 # DW_AT_decl_file + .byte 38 # DW_AT_decl_line + .byte 21 # Abbrev [21] 0x338:0xb DW_TAG_typedef + .long 835 # DW_AT_type + .long .Linfo_string54 # DW_AT_name + .byte 6 # DW_AT_decl_file + .byte 25 # DW_AT_decl_line + .byte 21 # Abbrev [21] 0x343:0xb DW_TAG_typedef + .long 846 # DW_AT_type + .long .Linfo_string53 # DW_AT_name + .byte 2 # DW_AT_decl_file + .byte 40 # DW_AT_decl_line + .byte 3 # Abbrev [3] 0x34e:0x7 DW_TAG_base_type + .long .Linfo_string52 # DW_AT_name + .byte 7 # DW_AT_encoding + .byte 2 # DW_AT_byte_size + .byte 21 # Abbrev [21] 0x355:0xb DW_TAG_typedef + .long 864 # DW_AT_type + .long .Linfo_string56 # DW_AT_name + .byte 6 # DW_AT_decl_file + .byte 26 # DW_AT_decl_line + .byte 21 # Abbrev [21] 0x360:0xb DW_TAG_typedef + .long 236 # DW_AT_type + .long .Linfo_string55 # DW_AT_name + .byte 2 # DW_AT_decl_file + .byte 42 # DW_AT_decl_line + .byte 21 # Abbrev [21] 0x36b:0xb DW_TAG_typedef + .long 886 # DW_AT_type + .long .Linfo_string59 # DW_AT_name + .byte 6 # DW_AT_decl_file + .byte 27 # DW_AT_decl_line + .byte 21 # Abbrev [21] 0x376:0xb DW_TAG_typedef + .long 897 # DW_AT_type + .long .Linfo_string58 # DW_AT_name + .byte 2 # DW_AT_decl_file + .byte 45 # DW_AT_decl_line + .byte 3 # Abbrev [3] 0x381:0x7 DW_TAG_base_type + .long .Linfo_string57 # DW_AT_name + .byte 7 # DW_AT_encoding + .byte 8 # DW_AT_byte_size + .byte 21 # Abbrev [21] 0x388:0xb DW_TAG_typedef + .long 243 # DW_AT_type + .long .Linfo_string60 # DW_AT_name + .byte 5 # DW_AT_decl_file + .byte 71 # DW_AT_decl_line + .byte 21 # Abbrev [21] 0x393:0xb DW_TAG_typedef + .long 897 # DW_AT_type + .long .Linfo_string61 # DW_AT_name + .byte 5 # DW_AT_decl_file + .byte 73 # DW_AT_decl_line + .byte 21 # Abbrev [21] 0x39e:0xb DW_TAG_typedef + .long 897 # DW_AT_type + .long .Linfo_string62 # DW_AT_name + .byte 5 # DW_AT_decl_file + .byte 74 # DW_AT_decl_line + .byte 21 # Abbrev [21] 0x3a9:0xb DW_TAG_typedef + .long 897 # DW_AT_type + .long .Linfo_string63 # DW_AT_name + .byte 5 # DW_AT_decl_file + .byte 75 # DW_AT_decl_line + .byte 21 # Abbrev [21] 0x3b4:0xb DW_TAG_typedef + .long 959 # DW_AT_type + .long .Linfo_string65 # DW_AT_name + .byte 5 # DW_AT_decl_file + .byte 49 # DW_AT_decl_line + .byte 21 # Abbrev [21] 0x3bf:0xb DW_TAG_typedef + .long 813 # DW_AT_type + .long .Linfo_string64 # DW_AT_name + .byte 2 # DW_AT_decl_file + .byte 53 # DW_AT_decl_line + .byte 21 # Abbrev [21] 0x3ca:0xb DW_TAG_typedef + .long 981 # DW_AT_type + .long .Linfo_string67 # DW_AT_name + .byte 5 # DW_AT_decl_file + .byte 50 # DW_AT_decl_line + .byte 21 # Abbrev [21] 0x3d5:0xb DW_TAG_typedef + .long 835 # DW_AT_type + .long .Linfo_string66 # DW_AT_name + .byte 2 # DW_AT_decl_file + .byte 55 # DW_AT_decl_line + .byte 21 # Abbrev [21] 0x3e0:0xb DW_TAG_typedef + .long 1003 # DW_AT_type + .long .Linfo_string69 # DW_AT_name + .byte 5 # DW_AT_decl_file + .byte 51 # DW_AT_decl_line + .byte 21 # Abbrev [21] 0x3eb:0xb DW_TAG_typedef + .long 864 # DW_AT_type + .long .Linfo_string68 # DW_AT_name + .byte 2 # DW_AT_decl_file + .byte 57 # DW_AT_decl_line + .byte 21 # Abbrev [21] 0x3f6:0xb DW_TAG_typedef + .long 1025 # DW_AT_type + .long .Linfo_string71 # DW_AT_name + .byte 5 # DW_AT_decl_file + .byte 52 # DW_AT_decl_line + .byte 21 # Abbrev [21] 0x401:0xb DW_TAG_typedef + .long 886 # DW_AT_type + .long .Linfo_string70 # DW_AT_name + .byte 2 # DW_AT_decl_file + .byte 59 # DW_AT_decl_line + .byte 21 # Abbrev [21] 0x40c:0xb DW_TAG_typedef + .long 1047 # DW_AT_type + .long .Linfo_string73 # DW_AT_name + .byte 5 # DW_AT_decl_file + .byte 102 # DW_AT_decl_line + .byte 21 # Abbrev [21] 0x417:0xb DW_TAG_typedef + .long 897 # DW_AT_type + .long .Linfo_string72 # DW_AT_name + .byte 2 # DW_AT_decl_file + .byte 73 # DW_AT_decl_line + .byte 21 # Abbrev [21] 0x422:0xb DW_TAG_typedef + .long 897 # DW_AT_type + .long .Linfo_string74 # DW_AT_name + .byte 5 # DW_AT_decl_file + .byte 90 # DW_AT_decl_line + .byte 10 # Abbrev [10] 0x42d:0x24 DW_TAG_subprogram + .quad .Lfunc_begin0 # DW_AT_low_pc + .long .Lfunc_end0-.Lfunc_begin0 # DW_AT_high_pc + .byte 1 # DW_AT_frame_base + .byte 86 + .long .Linfo_string132 # DW_AT_linkage_name + .long .Linfo_string133 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 134 # DW_AT_decl_line + # DW_AT_external + .byte 22 # Abbrev [22] 0x446:0xa DW_TAG_formal_parameter + .byte 2 # DW_AT_location + .byte 145 + .byte 120 + .byte 1 # DW_AT_decl_file + .byte 134 # DW_AT_decl_line + .long 9336 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 23 # Abbrev [23] 0x451:0x9a DW_TAG_subprogram + .quad .Lfunc_begin1 # DW_AT_low_pc + .long .Lfunc_end1-.Lfunc_begin1 # DW_AT_high_pc + .byte 1 # DW_AT_frame_base + .byte 86 + .long .Linfo_string134 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 166 # DW_AT_decl_line + .long 63 # DW_AT_type + # DW_AT_external + .byte 24 # Abbrev [24] 0x46a:0xe DW_TAG_variable + .byte 2 # DW_AT_location + .byte 145 + .byte 120 + .long .Linfo_string189 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 167 # DW_AT_decl_line + .long 1243 # DW_AT_type + .byte 24 # Abbrev [24] 0x478:0xe DW_TAG_variable + .byte 2 # DW_AT_location + .byte 145 + .byte 112 + .long .Linfo_string357 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 168 # DW_AT_decl_line + .long 1238 # DW_AT_type + .byte 24 # Abbrev [24] 0x486:0xe DW_TAG_variable + .byte 2 # DW_AT_location + .byte 145 + .byte 104 + .long .Linfo_string358 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 199 # DW_AT_decl_line + .long 9929 # DW_AT_type + .byte 24 # Abbrev [24] 0x494:0xe DW_TAG_variable + .byte 2 # DW_AT_location + .byte 145 + .byte 96 + .long .Linfo_string361 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 214 # DW_AT_decl_line + .long 9949 # DW_AT_type + .byte 24 # Abbrev [24] 0x4a2:0xe DW_TAG_variable + .byte 2 # DW_AT_location + .byte 145 + .byte 88 + .long .Linfo_string363 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 236 # DW_AT_decl_line + .long 5142 # DW_AT_type + .byte 25 # Abbrev [25] 0x4b0:0xf DW_TAG_variable + .byte 2 # DW_AT_location + .byte 145 + .byte 80 + .long .Linfo_string364 # DW_AT_name + .byte 1 # DW_AT_decl_file + .short 271 # DW_AT_decl_line + .long 9978 # DW_AT_type + .byte 25 # Abbrev [25] 0x4bf:0xf DW_TAG_variable + .byte 2 # DW_AT_location + .byte 145 + .byte 72 + .long .Linfo_string366 # DW_AT_name + .byte 1 # DW_AT_decl_file + .short 280 # DW_AT_decl_line + .long 286 # DW_AT_type + .byte 26 # Abbrev [26] 0x4ce:0x8 DW_TAG_imported_module + .byte 1 # DW_AT_decl_file + .short 268 # DW_AT_decl_line + .long 70 # DW_AT_import + .byte 27 # Abbrev [27] 0x4d6:0x5 DW_TAG_class_type + .byte 5 # DW_AT_calling_convention + .byte 1 # DW_AT_byte_size + .byte 1 # DW_AT_decl_file + .byte 168 # DW_AT_decl_line + .byte 28 # Abbrev [28] 0x4db:0x5 DW_TAG_structure_type + .byte 5 # DW_AT_calling_convention + .byte 1 # DW_AT_byte_size + .byte 1 # DW_AT_decl_file + .byte 167 # DW_AT_decl_line + .byte 12 # Abbrev [12] 0x4e0:0x5 DW_TAG_structure_type + .long .Linfo_string281 # DW_AT_name + # DW_AT_declaration + .byte 12 # Abbrev [12] 0x4e5:0x5 DW_TAG_structure_type + .long .Linfo_string131 # DW_AT_name + # DW_AT_declaration + .byte 0 # End Of Children Mark + .byte 10 # Abbrev [10] 0x4eb:0x41 DW_TAG_subprogram + .quad .Lfunc_begin2 # DW_AT_low_pc + .long .Lfunc_end2-.Lfunc_begin2 # DW_AT_high_pc + .byte 1 # DW_AT_frame_base + .byte 86 + .long .Linfo_string135 # DW_AT_linkage_name + .long .Linfo_string136 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 26 # DW_AT_decl_line + # DW_AT_external + .byte 24 # Abbrev [24] 0x504:0xe DW_TAG_variable + .byte 2 # DW_AT_location + .byte 145 + .byte 120 + .long .Linfo_string361 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 27 # DW_AT_decl_line + .long 9360 # DW_AT_type + .byte 24 # Abbrev [24] 0x512:0xe DW_TAG_variable + .byte 2 # DW_AT_location + .byte 145 + .byte 112 + .long .Linfo_string358 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 28 # DW_AT_decl_line + .long 9989 # DW_AT_type + .byte 29 # Abbrev [29] 0x520:0xb DW_TAG_GNU_template_parameter_pack + .long .Linfo_string84 # DW_AT_name + .byte 30 # Abbrev [30] 0x525:0x5 DW_TAG_template_type_parameter + .long 63 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 10 # Abbrev [10] 0x52c:0x41 DW_TAG_subprogram + .quad .Lfunc_begin3 # DW_AT_low_pc + .long .Lfunc_end3-.Lfunc_begin3 # DW_AT_high_pc + .byte 1 # DW_AT_frame_base + .byte 86 + .long .Linfo_string137 # DW_AT_linkage_name + .long .Linfo_string138 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 26 # DW_AT_decl_line + # DW_AT_external + .byte 24 # Abbrev [24] 0x545:0xe DW_TAG_variable + .byte 2 # DW_AT_location + .byte 145 + .byte 120 + .long .Linfo_string361 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 27 # DW_AT_decl_line + .long 6062 # DW_AT_type + .byte 24 # Abbrev [24] 0x553:0xe DW_TAG_variable + .byte 2 # DW_AT_location + .byte 145 + .byte 112 + .long .Linfo_string358 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 28 # DW_AT_decl_line + .long 10011 # DW_AT_type + .byte 29 # Abbrev [29] 0x561:0xb DW_TAG_GNU_template_parameter_pack + .long .Linfo_string84 # DW_AT_name + .byte 30 # Abbrev [30] 0x566:0x5 DW_TAG_template_type_parameter + .long 6083 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 10 # Abbrev [10] 0x56d:0x41 DW_TAG_subprogram + .quad .Lfunc_begin4 # DW_AT_low_pc + .long .Lfunc_end4-.Lfunc_begin4 # DW_AT_high_pc + .byte 1 # DW_AT_frame_base + .byte 86 + .long .Linfo_string139 # DW_AT_linkage_name + .long .Linfo_string140 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 26 # DW_AT_decl_line + # DW_AT_external + .byte 24 # Abbrev [24] 0x586:0xe DW_TAG_variable + .byte 2 # DW_AT_location + .byte 145 + .byte 120 + .long .Linfo_string361 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 27 # DW_AT_decl_line + .long 10033 # DW_AT_type + .byte 24 # Abbrev [24] 0x594:0xe DW_TAG_variable + .byte 2 # DW_AT_location + .byte 145 + .byte 112 + .long .Linfo_string358 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 28 # DW_AT_decl_line + .long 10054 # DW_AT_type + .byte 29 # Abbrev [29] 0x5a2:0xb DW_TAG_GNU_template_parameter_pack + .long .Linfo_string84 # DW_AT_name + .byte 30 # Abbrev [30] 0x5a7:0x5 DW_TAG_template_type_parameter + .long 279 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 10 # Abbrev [10] 0x5ae:0x41 DW_TAG_subprogram + .quad .Lfunc_begin5 # DW_AT_low_pc + .long .Lfunc_end5-.Lfunc_begin5 # DW_AT_high_pc + .byte 1 # DW_AT_frame_base + .byte 86 + .long .Linfo_string142 # DW_AT_linkage_name + .long .Linfo_string143 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 26 # DW_AT_decl_line + # DW_AT_external + .byte 24 # Abbrev [24] 0x5c7:0xe DW_TAG_variable + .byte 2 # DW_AT_location + .byte 145 + .byte 120 + .long .Linfo_string361 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 27 # DW_AT_decl_line + .long 10076 # DW_AT_type + .byte 24 # Abbrev [24] 0x5d5:0xe DW_TAG_variable + .byte 2 # DW_AT_location + .byte 145 + .byte 112 + .long .Linfo_string358 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 28 # DW_AT_decl_line + .long 10097 # DW_AT_type + .byte 29 # Abbrev [29] 0x5e3:0xb DW_TAG_GNU_template_parameter_pack + .long .Linfo_string84 # DW_AT_name + .byte 30 # Abbrev [30] 0x5e8:0x5 DW_TAG_template_type_parameter + .long 9329 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 10 # Abbrev [10] 0x5ef:0x41 DW_TAG_subprogram + .quad .Lfunc_begin6 # DW_AT_low_pc + .long .Lfunc_end6-.Lfunc_begin6 # DW_AT_high_pc + .byte 1 # DW_AT_frame_base + .byte 86 + .long .Linfo_string144 # DW_AT_linkage_name + .long .Linfo_string145 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 26 # DW_AT_decl_line + # DW_AT_external + .byte 24 # Abbrev [24] 0x608:0xe DW_TAG_variable + .byte 2 # DW_AT_location + .byte 145 + .byte 120 + .long .Linfo_string361 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 27 # DW_AT_decl_line + .long 10119 # DW_AT_type + .byte 24 # Abbrev [24] 0x616:0xe DW_TAG_variable + .byte 2 # DW_AT_location + .byte 145 + .byte 112 + .long .Linfo_string358 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 28 # DW_AT_decl_line + .long 10140 # DW_AT_type + .byte 29 # Abbrev [29] 0x624:0xb DW_TAG_GNU_template_parameter_pack + .long .Linfo_string84 # DW_AT_name + .byte 30 # Abbrev [30] 0x629:0x5 DW_TAG_template_type_parameter + .long 630 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 10 # Abbrev [10] 0x630:0x41 DW_TAG_subprogram + .quad .Lfunc_begin7 # DW_AT_low_pc + .long .Lfunc_end7-.Lfunc_begin7 # DW_AT_high_pc + .byte 1 # DW_AT_frame_base + .byte 86 + .long .Linfo_string146 # DW_AT_linkage_name + .long .Linfo_string147 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 26 # DW_AT_decl_line + # DW_AT_external + .byte 24 # Abbrev [24] 0x649:0xe DW_TAG_variable + .byte 2 # DW_AT_location + .byte 145 + .byte 120 + .long .Linfo_string361 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 27 # DW_AT_decl_line + .long 10162 # DW_AT_type + .byte 24 # Abbrev [24] 0x657:0xe DW_TAG_variable + .byte 2 # DW_AT_location + .byte 145 + .byte 112 + .long .Linfo_string358 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 28 # DW_AT_decl_line + .long 10183 # DW_AT_type + .byte 29 # Abbrev [29] 0x665:0xb DW_TAG_GNU_template_parameter_pack + .long .Linfo_string84 # DW_AT_name + .byte 30 # Abbrev [30] 0x66a:0x5 DW_TAG_template_type_parameter + .long 579 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 10 # Abbrev [10] 0x671:0x41 DW_TAG_subprogram + .quad .Lfunc_begin8 # DW_AT_low_pc + .long .Lfunc_end8-.Lfunc_begin8 # DW_AT_high_pc + .byte 1 # DW_AT_frame_base + .byte 86 + .long .Linfo_string148 # DW_AT_linkage_name + .long .Linfo_string149 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 26 # DW_AT_decl_line + # DW_AT_external + .byte 24 # Abbrev [24] 0x68a:0xe DW_TAG_variable + .byte 2 # DW_AT_location + .byte 145 + .byte 120 + .long .Linfo_string361 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 27 # DW_AT_decl_line + .long 10205 # DW_AT_type + .byte 24 # Abbrev [24] 0x698:0xe DW_TAG_variable + .byte 2 # DW_AT_location + .byte 145 + .byte 112 + .long .Linfo_string358 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 28 # DW_AT_decl_line + .long 10226 # DW_AT_type + .byte 29 # Abbrev [29] 0x6a6:0xb DW_TAG_GNU_template_parameter_pack + .long .Linfo_string84 # DW_AT_name + .byte 30 # Abbrev [30] 0x6ab:0x5 DW_TAG_template_type_parameter + .long 236 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 10 # Abbrev [10] 0x6b2:0x41 DW_TAG_subprogram + .quad .Lfunc_begin9 # DW_AT_low_pc + .long .Lfunc_end9-.Lfunc_begin9 # DW_AT_high_pc + .byte 1 # DW_AT_frame_base + .byte 86 + .long .Linfo_string151 # DW_AT_linkage_name + .long .Linfo_string152 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 26 # DW_AT_decl_line + # DW_AT_external + .byte 24 # Abbrev [24] 0x6cb:0xe DW_TAG_variable + .byte 2 # DW_AT_location + .byte 145 + .byte 120 + .long .Linfo_string361 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 27 # DW_AT_decl_line + .long 10248 # DW_AT_type + .byte 24 # Abbrev [24] 0x6d9:0xe DW_TAG_variable + .byte 2 # DW_AT_location + .byte 145 + .byte 112 + .long .Linfo_string358 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 28 # DW_AT_decl_line + .long 10269 # DW_AT_type + .byte 29 # Abbrev [29] 0x6e7:0xb DW_TAG_GNU_template_parameter_pack + .long .Linfo_string84 # DW_AT_name + .byte 30 # Abbrev [30] 0x6ec:0x5 DW_TAG_template_type_parameter + .long 9336 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 10 # Abbrev [10] 0x6f3:0x41 DW_TAG_subprogram + .quad .Lfunc_begin10 # DW_AT_low_pc + .long .Lfunc_end10-.Lfunc_begin10 # DW_AT_high_pc + .byte 1 # DW_AT_frame_base + .byte 86 + .long .Linfo_string154 # DW_AT_linkage_name + .long .Linfo_string155 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 26 # DW_AT_decl_line + # DW_AT_external + .byte 24 # Abbrev [24] 0x70c:0xe DW_TAG_variable + .byte 2 # DW_AT_location + .byte 145 + .byte 120 + .long .Linfo_string361 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 27 # DW_AT_decl_line + .long 10291 # DW_AT_type + .byte 24 # Abbrev [24] 0x71a:0xe DW_TAG_variable + .byte 2 # DW_AT_location + .byte 145 + .byte 112 + .long .Linfo_string358 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 28 # DW_AT_decl_line + .long 10312 # DW_AT_type + .byte 29 # Abbrev [29] 0x728:0xb DW_TAG_GNU_template_parameter_pack + .long .Linfo_string84 # DW_AT_name + .byte 30 # Abbrev [30] 0x72d:0x5 DW_TAG_template_type_parameter + .long 9343 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 10 # Abbrev [10] 0x734:0x41 DW_TAG_subprogram + .quad .Lfunc_begin11 # DW_AT_low_pc + .long .Lfunc_end11-.Lfunc_begin11 # DW_AT_high_pc + .byte 1 # DW_AT_frame_base + .byte 86 + .long .Linfo_string157 # DW_AT_linkage_name + .long .Linfo_string158 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 26 # DW_AT_decl_line + # DW_AT_external + .byte 24 # Abbrev [24] 0x74d:0xe DW_TAG_variable + .byte 2 # DW_AT_location + .byte 145 + .byte 120 + .long .Linfo_string361 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 27 # DW_AT_decl_line + .long 10334 # DW_AT_type + .byte 24 # Abbrev [24] 0x75b:0xe DW_TAG_variable + .byte 2 # DW_AT_location + .byte 145 + .byte 112 + .long .Linfo_string358 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 28 # DW_AT_decl_line + .long 10355 # DW_AT_type + .byte 29 # Abbrev [29] 0x769:0xb DW_TAG_GNU_template_parameter_pack + .long .Linfo_string84 # DW_AT_name + .byte 30 # Abbrev [30] 0x76e:0x5 DW_TAG_template_type_parameter + .long 9350 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 10 # Abbrev [10] 0x775:0x41 DW_TAG_subprogram + .quad .Lfunc_begin12 # DW_AT_low_pc + .long .Lfunc_end12-.Lfunc_begin12 # DW_AT_high_pc + .byte 1 # DW_AT_frame_base + .byte 86 + .long .Linfo_string159 # DW_AT_linkage_name + .long .Linfo_string160 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 26 # DW_AT_decl_line + # DW_AT_external + .byte 24 # Abbrev [24] 0x78e:0xe DW_TAG_variable + .byte 2 # DW_AT_location + .byte 145 + .byte 120 + .long .Linfo_string361 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 27 # DW_AT_decl_line + .long 10377 # DW_AT_type + .byte 24 # Abbrev [24] 0x79c:0xe DW_TAG_variable + .byte 2 # DW_AT_location + .byte 145 + .byte 112 + .long .Linfo_string358 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 28 # DW_AT_decl_line + .long 10398 # DW_AT_type + .byte 29 # Abbrev [29] 0x7aa:0xb DW_TAG_GNU_template_parameter_pack + .long .Linfo_string84 # DW_AT_name + .byte 30 # Abbrev [30] 0x7af:0x5 DW_TAG_template_type_parameter + .long 219 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 10 # Abbrev [10] 0x7b6:0x41 DW_TAG_subprogram + .quad .Lfunc_begin13 # DW_AT_low_pc + .long .Lfunc_end13-.Lfunc_begin13 # DW_AT_high_pc + .byte 1 # DW_AT_frame_base + .byte 86 + .long .Linfo_string161 # DW_AT_linkage_name + .long .Linfo_string162 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 26 # DW_AT_decl_line + # DW_AT_external + .byte 24 # Abbrev [24] 0x7cf:0xe DW_TAG_variable + .byte 2 # DW_AT_location + .byte 145 + .byte 120 + .long .Linfo_string361 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 27 # DW_AT_decl_line + .long 10420 # DW_AT_type + .byte 24 # Abbrev [24] 0x7dd:0xe DW_TAG_variable + .byte 2 # DW_AT_location + .byte 145 + .byte 112 + .long .Linfo_string358 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 28 # DW_AT_decl_line + .long 10441 # DW_AT_type + .byte 29 # Abbrev [29] 0x7eb:0xb DW_TAG_GNU_template_parameter_pack + .long .Linfo_string84 # DW_AT_name + .byte 30 # Abbrev [30] 0x7f0:0x5 DW_TAG_template_type_parameter + .long 9355 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 10 # Abbrev [10] 0x7f7:0x41 DW_TAG_subprogram + .quad .Lfunc_begin14 # DW_AT_low_pc + .long .Lfunc_end14-.Lfunc_begin14 # DW_AT_high_pc + .byte 1 # DW_AT_frame_base + .byte 86 + .long .Linfo_string164 # DW_AT_linkage_name + .long .Linfo_string165 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 26 # DW_AT_decl_line + # DW_AT_external + .byte 24 # Abbrev [24] 0x810:0xe DW_TAG_variable + .byte 2 # DW_AT_location + .byte 145 + .byte 120 + .long .Linfo_string361 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 27 # DW_AT_decl_line + .long 10463 # DW_AT_type + .byte 24 # Abbrev [24] 0x81e:0xe DW_TAG_variable + .byte 2 # DW_AT_location + .byte 145 + .byte 112 + .long .Linfo_string358 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 28 # DW_AT_decl_line + .long 10484 # DW_AT_type + .byte 29 # Abbrev [29] 0x82c:0xb DW_TAG_GNU_template_parameter_pack + .long .Linfo_string84 # DW_AT_name + .byte 30 # Abbrev [30] 0x831:0x5 DW_TAG_template_type_parameter + .long 229 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 10 # Abbrev [10] 0x838:0x41 DW_TAG_subprogram + .quad .Lfunc_begin15 # DW_AT_low_pc + .long .Lfunc_end15-.Lfunc_begin15 # DW_AT_high_pc + .byte 1 # DW_AT_frame_base + .byte 86 + .long .Linfo_string167 # DW_AT_linkage_name + .long .Linfo_string168 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 26 # DW_AT_decl_line + # DW_AT_external + .byte 24 # Abbrev [24] 0x851:0xe DW_TAG_variable + .byte 2 # DW_AT_location + .byte 145 + .byte 120 + .long .Linfo_string361 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 27 # DW_AT_decl_line + .long 10506 # DW_AT_type + .byte 24 # Abbrev [24] 0x85f:0xe DW_TAG_variable + .byte 2 # DW_AT_location + .byte 145 + .byte 112 + .long .Linfo_string358 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 28 # DW_AT_decl_line + .long 10527 # DW_AT_type + .byte 29 # Abbrev [29] 0x86d:0xb DW_TAG_GNU_template_parameter_pack + .long .Linfo_string84 # DW_AT_name + .byte 30 # Abbrev [30] 0x872:0x5 DW_TAG_template_type_parameter + .long 9360 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 10 # Abbrev [10] 0x879:0x46 DW_TAG_subprogram + .quad .Lfunc_begin16 # DW_AT_low_pc + .long .Lfunc_end16-.Lfunc_begin16 # DW_AT_high_pc + .byte 1 # DW_AT_frame_base + .byte 86 + .long .Linfo_string169 # DW_AT_linkage_name + .long .Linfo_string170 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 26 # DW_AT_decl_line + # DW_AT_external + .byte 24 # Abbrev [24] 0x892:0xe DW_TAG_variable + .byte 2 # DW_AT_location + .byte 145 + .byte 120 + .long .Linfo_string361 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 27 # DW_AT_decl_line + .long 10549 # DW_AT_type + .byte 24 # Abbrev [24] 0x8a0:0xe DW_TAG_variable + .byte 2 # DW_AT_location + .byte 145 + .byte 112 + .long .Linfo_string358 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 28 # DW_AT_decl_line + .long 10575 # DW_AT_type + .byte 29 # Abbrev [29] 0x8ae:0x10 DW_TAG_GNU_template_parameter_pack + .long .Linfo_string84 # DW_AT_name + .byte 30 # Abbrev [30] 0x8b3:0x5 DW_TAG_template_type_parameter + .long 63 # DW_AT_type + .byte 30 # Abbrev [30] 0x8b8:0x5 DW_TAG_template_type_parameter + .long 6083 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 10 # Abbrev [10] 0x8bf:0x41 DW_TAG_subprogram + .quad .Lfunc_begin17 # DW_AT_low_pc + .long .Lfunc_end17-.Lfunc_begin17 # DW_AT_high_pc + .byte 1 # DW_AT_frame_base + .byte 86 + .long .Linfo_string171 # DW_AT_linkage_name + .long .Linfo_string172 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 26 # DW_AT_decl_line + # DW_AT_external + .byte 24 # Abbrev [24] 0x8d8:0xe DW_TAG_variable + .byte 2 # DW_AT_location + .byte 145 + .byte 120 + .long .Linfo_string361 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 27 # DW_AT_decl_line + .long 9545 # DW_AT_type + .byte 24 # Abbrev [24] 0x8e6:0xe DW_TAG_variable + .byte 2 # DW_AT_location + .byte 145 + .byte 112 + .long .Linfo_string358 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 28 # DW_AT_decl_line + .long 10602 # DW_AT_type + .byte 29 # Abbrev [29] 0x8f4:0xb DW_TAG_GNU_template_parameter_pack + .long .Linfo_string84 # DW_AT_name + .byte 30 # Abbrev [30] 0x8f9:0x5 DW_TAG_template_type_parameter + .long 9381 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 10 # Abbrev [10] 0x900:0x41 DW_TAG_subprogram + .quad .Lfunc_begin18 # DW_AT_low_pc + .long .Lfunc_end18-.Lfunc_begin18 # DW_AT_high_pc + .byte 1 # DW_AT_frame_base + .byte 86 + .long .Linfo_string173 # DW_AT_linkage_name + .long .Linfo_string174 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 26 # DW_AT_decl_line + # DW_AT_external + .byte 24 # Abbrev [24] 0x919:0xe DW_TAG_variable + .byte 2 # DW_AT_location + .byte 145 + .byte 120 + .long .Linfo_string361 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 27 # DW_AT_decl_line + .long 10624 # DW_AT_type + .byte 24 # Abbrev [24] 0x927:0xe DW_TAG_variable + .byte 2 # DW_AT_location + .byte 145 + .byte 112 + .long .Linfo_string358 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 28 # DW_AT_decl_line + .long 10645 # DW_AT_type + .byte 29 # Abbrev [29] 0x935:0xb DW_TAG_GNU_template_parameter_pack + .long .Linfo_string84 # DW_AT_name + .byte 30 # Abbrev [30] 0x93a:0x5 DW_TAG_template_type_parameter + .long 9386 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 10 # Abbrev [10] 0x941:0x41 DW_TAG_subprogram + .quad .Lfunc_begin19 # DW_AT_low_pc + .long .Lfunc_end19-.Lfunc_begin19 # DW_AT_high_pc + .byte 1 # DW_AT_frame_base + .byte 86 + .long .Linfo_string175 # DW_AT_linkage_name + .long .Linfo_string176 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 26 # DW_AT_decl_line + # DW_AT_external + .byte 24 # Abbrev [24] 0x95a:0xe DW_TAG_variable + .byte 2 # DW_AT_location + .byte 145 + .byte 120 + .long .Linfo_string361 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 27 # DW_AT_decl_line + .long 10667 # DW_AT_type + .byte 24 # Abbrev [24] 0x968:0xe DW_TAG_variable + .byte 2 # DW_AT_location + .byte 145 + .byte 112 + .long .Linfo_string358 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 28 # DW_AT_decl_line + .long 10688 # DW_AT_type + .byte 29 # Abbrev [29] 0x976:0xb DW_TAG_GNU_template_parameter_pack + .long .Linfo_string84 # DW_AT_name + .byte 30 # Abbrev [30] 0x97b:0x5 DW_TAG_template_type_parameter + .long 9391 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 10 # Abbrev [10] 0x982:0x41 DW_TAG_subprogram + .quad .Lfunc_begin20 # DW_AT_low_pc + .long .Lfunc_end20-.Lfunc_begin20 # DW_AT_high_pc + .byte 1 # DW_AT_frame_base + .byte 86 + .long .Linfo_string177 # DW_AT_linkage_name + .long .Linfo_string178 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 26 # DW_AT_decl_line + # DW_AT_external + .byte 24 # Abbrev [24] 0x99b:0xe DW_TAG_variable + .byte 2 # DW_AT_location + .byte 145 + .byte 120 + .long .Linfo_string361 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 27 # DW_AT_decl_line + .long 10710 # DW_AT_type + .byte 24 # Abbrev [24] 0x9a9:0xe DW_TAG_variable + .byte 2 # DW_AT_location + .byte 145 + .byte 112 + .long .Linfo_string358 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 28 # DW_AT_decl_line + .long 10731 # DW_AT_type + .byte 29 # Abbrev [29] 0x9b7:0xb DW_TAG_GNU_template_parameter_pack + .long .Linfo_string84 # DW_AT_name + .byte 30 # Abbrev [30] 0x9bc:0x5 DW_TAG_template_type_parameter + .long 9396 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 10 # Abbrev [10] 0x9c3:0x3d DW_TAG_subprogram + .quad .Lfunc_begin21 # DW_AT_low_pc + .long .Lfunc_end21-.Lfunc_begin21 # DW_AT_high_pc + .byte 1 # DW_AT_frame_base + .byte 86 + .long .Linfo_string179 # DW_AT_linkage_name + .long .Linfo_string180 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 26 # DW_AT_decl_line + # DW_AT_external + .byte 24 # Abbrev [24] 0x9dc:0xe DW_TAG_variable + .byte 2 # DW_AT_location + .byte 145 + .byte 120 + .long .Linfo_string361 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 27 # DW_AT_decl_line + .long 10753 # DW_AT_type + .byte 24 # Abbrev [24] 0x9ea:0xe DW_TAG_variable + .byte 2 # DW_AT_location + .byte 145 + .byte 112 + .long .Linfo_string358 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 28 # DW_AT_decl_line + .long 10770 # DW_AT_type + .byte 29 # Abbrev [29] 0x9f8:0x7 DW_TAG_GNU_template_parameter_pack + .long .Linfo_string84 # DW_AT_name + .byte 31 # Abbrev [31] 0x9fd:0x1 DW_TAG_template_type_parameter + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 10 # Abbrev [10] 0xa00:0x41 DW_TAG_subprogram + .quad .Lfunc_begin22 # DW_AT_low_pc + .long .Lfunc_end22-.Lfunc_begin22 # DW_AT_high_pc + .byte 1 # DW_AT_frame_base + .byte 86 + .long .Linfo_string183 # DW_AT_linkage_name + .long .Linfo_string184 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 26 # DW_AT_decl_line + # DW_AT_external + .byte 24 # Abbrev [24] 0xa19:0xe DW_TAG_variable + .byte 2 # DW_AT_location + .byte 145 + .byte 120 + .long .Linfo_string361 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 27 # DW_AT_decl_line + .long 10788 # DW_AT_type + .byte 24 # Abbrev [24] 0xa27:0xe DW_TAG_variable + .byte 2 # DW_AT_location + .byte 145 + .byte 112 + .long .Linfo_string358 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 28 # DW_AT_decl_line + .long 10809 # DW_AT_type + .byte 29 # Abbrev [29] 0xa35:0xb DW_TAG_GNU_template_parameter_pack + .long .Linfo_string84 # DW_AT_name + .byte 30 # Abbrev [30] 0xa3a:0x5 DW_TAG_template_type_parameter + .long 9410 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 10 # Abbrev [10] 0xa41:0x41 DW_TAG_subprogram + .quad .Lfunc_begin23 # DW_AT_low_pc + .long .Lfunc_end23-.Lfunc_begin23 # DW_AT_high_pc + .byte 1 # DW_AT_frame_base + .byte 86 + .long .Linfo_string185 # DW_AT_linkage_name + .long .Linfo_string186 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 26 # DW_AT_decl_line + # DW_AT_external + .byte 24 # Abbrev [24] 0xa5a:0xe DW_TAG_variable + .byte 2 # DW_AT_location + .byte 145 + .byte 120 + .long .Linfo_string361 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 27 # DW_AT_decl_line + .long 10831 # DW_AT_type + .byte 24 # Abbrev [24] 0xa68:0xe DW_TAG_variable + .byte 2 # DW_AT_location + .byte 145 + .byte 112 + .long .Linfo_string358 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 28 # DW_AT_decl_line + .long 10852 # DW_AT_type + .byte 29 # Abbrev [29] 0xa76:0xb DW_TAG_GNU_template_parameter_pack + .long .Linfo_string84 # DW_AT_name + .byte 30 # Abbrev [30] 0xa7b:0x5 DW_TAG_template_type_parameter + .long 897 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 10 # Abbrev [10] 0xa82:0x2e DW_TAG_subprogram + .quad .Lfunc_begin24 # DW_AT_low_pc + .long .Lfunc_end24-.Lfunc_begin24 # DW_AT_high_pc + .byte 1 # DW_AT_frame_base + .byte 86 + .long .Linfo_string187 # DW_AT_linkage_name + .long .Linfo_string188 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 31 # DW_AT_decl_line + # DW_AT_external + .byte 15 # Abbrev [15] 0xa9b:0xa DW_TAG_template_value_parameter + .long 279 # DW_AT_type + .long .Linfo_string20 # DW_AT_name + .byte 1 # DW_AT_const_value + .byte 32 # Abbrev [32] 0xaa5:0xa DW_TAG_template_value_parameter + .long 63 # DW_AT_type + .long .Linfo_string3 # DW_AT_name + .byte 3 # DW_AT_const_value + .byte 0 # End Of Children Mark + .byte 10 # Abbrev [10] 0xab0:0x35 DW_TAG_subprogram + .quad .Lfunc_begin25 # DW_AT_low_pc + .long .Lfunc_end25-.Lfunc_begin25 # DW_AT_high_pc + .byte 1 # DW_AT_frame_base + .byte 86 + .long .Linfo_string190 # DW_AT_linkage_name + .long .Linfo_string191 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 34 # DW_AT_decl_line + # DW_AT_external + .byte 14 # Abbrev [14] 0xac9:0x9 DW_TAG_template_type_parameter + .long 75 # DW_AT_type + .long .Linfo_string18 # DW_AT_name + .byte 29 # Abbrev [29] 0xad2:0x12 DW_TAG_GNU_template_parameter_pack + .long .Linfo_string189 # DW_AT_name + .byte 33 # Abbrev [33] 0xad7:0x6 DW_TAG_template_value_parameter + .long 75 # DW_AT_type + .byte 1 # DW_AT_const_value + .byte 33 # Abbrev [33] 0xadd:0x6 DW_TAG_template_value_parameter + .long 75 # DW_AT_type + .byte 2 # DW_AT_const_value + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 10 # Abbrev [10] 0xae5:0x35 DW_TAG_subprogram + .quad .Lfunc_begin26 # DW_AT_low_pc + .long .Lfunc_end26-.Lfunc_begin26 # DW_AT_high_pc + .byte 1 # DW_AT_frame_base + .byte 86 + .long .Linfo_string192 # DW_AT_linkage_name + .long .Linfo_string193 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 34 # DW_AT_decl_line + # DW_AT_external + .byte 14 # Abbrev [14] 0xafe:0x9 DW_TAG_template_type_parameter + .long 106 # DW_AT_type + .long .Linfo_string18 # DW_AT_name + .byte 29 # Abbrev [29] 0xb07:0x12 DW_TAG_GNU_template_parameter_pack + .long .Linfo_string189 # DW_AT_name + .byte 34 # Abbrev [34] 0xb0c:0x6 DW_TAG_template_value_parameter + .long 106 # DW_AT_type + .byte 1 # DW_AT_const_value + .byte 34 # Abbrev [34] 0xb12:0x6 DW_TAG_template_value_parameter + .long 106 # DW_AT_type + .byte 2 # DW_AT_const_value + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 10 # Abbrev [10] 0xb1a:0x30 DW_TAG_subprogram + .quad .Lfunc_begin27 # DW_AT_low_pc + .long .Lfunc_end27-.Lfunc_begin27 # DW_AT_high_pc + .byte 1 # DW_AT_frame_base + .byte 86 + .long .Linfo_string194 # DW_AT_linkage_name + .long .Linfo_string195 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 34 # DW_AT_decl_line + # DW_AT_external + .byte 14 # Abbrev [14] 0xb33:0x9 DW_TAG_template_type_parameter + .long 137 # DW_AT_type + .long .Linfo_string18 # DW_AT_name + .byte 29 # Abbrev [29] 0xb3c:0xd DW_TAG_GNU_template_parameter_pack + .long .Linfo_string189 # DW_AT_name + .byte 33 # Abbrev [33] 0xb41:0x7 DW_TAG_template_value_parameter + .long 137 # DW_AT_type + .ascii "\377\001" # DW_AT_const_value + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 35 # Abbrev [35] 0xb4a:0x35 DW_TAG_subprogram + .quad .Lfunc_begin28 # DW_AT_low_pc + .long .Lfunc_end28-.Lfunc_begin28 # DW_AT_high_pc + .byte 1 # DW_AT_frame_base + .byte 86 + .long .Linfo_string196 # DW_AT_linkage_name + .long .Linfo_string197 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 34 # DW_AT_decl_line + .byte 14 # Abbrev [14] 0xb63:0x9 DW_TAG_template_type_parameter + .long 157 # DW_AT_type + .long .Linfo_string18 # DW_AT_name + .byte 29 # Abbrev [29] 0xb6c:0x12 DW_TAG_GNU_template_parameter_pack + .long .Linfo_string189 # DW_AT_name + .byte 33 # Abbrev [33] 0xb71:0x6 DW_TAG_template_value_parameter + .long 157 # DW_AT_type + .byte 1 # DW_AT_const_value + .byte 33 # Abbrev [33] 0xb77:0x6 DW_TAG_template_value_parameter + .long 157 # DW_AT_type + .byte 2 # DW_AT_const_value + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 10 # Abbrev [10] 0xb7f:0x39 DW_TAG_subprogram + .quad .Lfunc_begin29 # DW_AT_low_pc + .long .Lfunc_end29-.Lfunc_begin29 # DW_AT_high_pc + .byte 1 # DW_AT_frame_base + .byte 86 + .long .Linfo_string198 # DW_AT_linkage_name + .long .Linfo_string199 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 34 # DW_AT_decl_line + # DW_AT_external + .byte 14 # Abbrev [14] 0xb98:0x9 DW_TAG_template_type_parameter + .long 9381 # DW_AT_type + .long .Linfo_string18 # DW_AT_name + .byte 29 # Abbrev [29] 0xba1:0x16 DW_TAG_GNU_template_parameter_pack + .long .Linfo_string189 # DW_AT_name + .byte 36 # Abbrev [36] 0xba6:0x10 DW_TAG_template_value_parameter + .long 9381 # DW_AT_type + .byte 10 # DW_AT_location + .byte 3 + .quad i + .byte 159 + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 10 # Abbrev [10] 0xbb8:0x2f DW_TAG_subprogram + .quad .Lfunc_begin30 # DW_AT_low_pc + .long .Lfunc_end30-.Lfunc_begin30 # DW_AT_high_pc + .byte 1 # DW_AT_frame_base + .byte 86 + .long .Linfo_string200 # DW_AT_linkage_name + .long .Linfo_string201 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 34 # DW_AT_decl_line + # DW_AT_external + .byte 14 # Abbrev [14] 0xbd1:0x9 DW_TAG_template_type_parameter + .long 9381 # DW_AT_type + .long .Linfo_string18 # DW_AT_name + .byte 29 # Abbrev [29] 0xbda:0xc DW_TAG_GNU_template_parameter_pack + .long .Linfo_string189 # DW_AT_name + .byte 33 # Abbrev [33] 0xbdf:0x6 DW_TAG_template_value_parameter + .long 9381 # DW_AT_type + .byte 0 # DW_AT_const_value + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 10 # Abbrev [10] 0xbe7:0x2f DW_TAG_subprogram + .quad .Lfunc_begin31 # DW_AT_low_pc + .long .Lfunc_end31-.Lfunc_begin31 # DW_AT_high_pc + .byte 1 # DW_AT_frame_base + .byte 86 + .long .Linfo_string202 # DW_AT_linkage_name + .long .Linfo_string203 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 34 # DW_AT_decl_line + # DW_AT_external + .byte 14 # Abbrev [14] 0xc00:0x9 DW_TAG_template_type_parameter + .long 897 # DW_AT_type + .long .Linfo_string18 # DW_AT_name + .byte 29 # Abbrev [29] 0xc09:0xc DW_TAG_GNU_template_parameter_pack + .long .Linfo_string189 # DW_AT_name + .byte 33 # Abbrev [33] 0xc0e:0x6 DW_TAG_template_value_parameter + .long 897 # DW_AT_type + .byte 1 # DW_AT_const_value + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 10 # Abbrev [10] 0xc16:0x2f DW_TAG_subprogram + .quad .Lfunc_begin32 # DW_AT_low_pc + .long .Lfunc_end32-.Lfunc_begin32 # DW_AT_high_pc + .byte 1 # DW_AT_frame_base + .byte 86 + .long .Linfo_string204 # DW_AT_linkage_name + .long .Linfo_string205 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 34 # DW_AT_decl_line + # DW_AT_external + .byte 14 # Abbrev [14] 0xc2f:0x9 DW_TAG_template_type_parameter + .long 9336 # DW_AT_type + .long .Linfo_string18 # DW_AT_name + .byte 29 # Abbrev [29] 0xc38:0xc DW_TAG_GNU_template_parameter_pack + .long .Linfo_string189 # DW_AT_name + .byte 33 # Abbrev [33] 0xc3d:0x6 DW_TAG_template_value_parameter + .long 9336 # DW_AT_type + .byte 1 # DW_AT_const_value + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 10 # Abbrev [10] 0xc45:0x2f DW_TAG_subprogram + .quad .Lfunc_begin33 # DW_AT_low_pc + .long .Lfunc_end33-.Lfunc_begin33 # DW_AT_high_pc + .byte 1 # DW_AT_frame_base + .byte 86 + .long .Linfo_string206 # DW_AT_linkage_name + .long .Linfo_string207 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 34 # DW_AT_decl_line + # DW_AT_external + .byte 14 # Abbrev [14] 0xc5e:0x9 DW_TAG_template_type_parameter + .long 630 # DW_AT_type + .long .Linfo_string18 # DW_AT_name + .byte 29 # Abbrev [29] 0xc67:0xc DW_TAG_GNU_template_parameter_pack + .long .Linfo_string189 # DW_AT_name + .byte 34 # Abbrev [34] 0xc6c:0x6 DW_TAG_template_value_parameter + .long 630 # DW_AT_type + .byte 1 # DW_AT_const_value + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 10 # Abbrev [10] 0xc74:0x2f DW_TAG_subprogram + .quad .Lfunc_begin34 # DW_AT_low_pc + .long .Lfunc_end34-.Lfunc_begin34 # DW_AT_high_pc + .byte 1 # DW_AT_frame_base + .byte 86 + .long .Linfo_string208 # DW_AT_linkage_name + .long .Linfo_string209 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 34 # DW_AT_decl_line + # DW_AT_external + .byte 14 # Abbrev [14] 0xc8d:0x9 DW_TAG_template_type_parameter + .long 236 # DW_AT_type + .long .Linfo_string18 # DW_AT_name + .byte 29 # Abbrev [29] 0xc96:0xc DW_TAG_GNU_template_parameter_pack + .long .Linfo_string189 # DW_AT_name + .byte 33 # Abbrev [33] 0xc9b:0x6 DW_TAG_template_value_parameter + .long 236 # DW_AT_type + .byte 1 # DW_AT_const_value + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 10 # Abbrev [10] 0xca3:0x2f DW_TAG_subprogram + .quad .Lfunc_begin35 # DW_AT_low_pc + .long .Lfunc_end35-.Lfunc_begin35 # DW_AT_high_pc + .byte 1 # DW_AT_frame_base + .byte 86 + .long .Linfo_string210 # DW_AT_linkage_name + .long .Linfo_string211 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 34 # DW_AT_decl_line + # DW_AT_external + .byte 14 # Abbrev [14] 0xcbc:0x9 DW_TAG_template_type_parameter + .long 579 # DW_AT_type + .long .Linfo_string18 # DW_AT_name + .byte 29 # Abbrev [29] 0xcc5:0xc DW_TAG_GNU_template_parameter_pack + .long .Linfo_string189 # DW_AT_name + .byte 34 # Abbrev [34] 0xcca:0x6 DW_TAG_template_value_parameter + .long 579 # DW_AT_type + .byte 1 # DW_AT_const_value + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 10 # Abbrev [10] 0xcd2:0x2f DW_TAG_subprogram + .quad .Lfunc_begin36 # DW_AT_low_pc + .long .Lfunc_end36-.Lfunc_begin36 # DW_AT_high_pc + .byte 1 # DW_AT_frame_base + .byte 86 + .long .Linfo_string212 # DW_AT_linkage_name + .long .Linfo_string213 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 34 # DW_AT_decl_line + # DW_AT_external + .byte 14 # Abbrev [14] 0xceb:0x9 DW_TAG_template_type_parameter + .long 243 # DW_AT_type + .long .Linfo_string18 # DW_AT_name + .byte 29 # Abbrev [29] 0xcf4:0xc DW_TAG_GNU_template_parameter_pack + .long .Linfo_string189 # DW_AT_name + .byte 33 # Abbrev [33] 0xcf9:0x6 DW_TAG_template_value_parameter + .long 243 # DW_AT_type + .byte 0 # DW_AT_const_value + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 10 # Abbrev [10] 0xd01:0x2f DW_TAG_subprogram + .quad .Lfunc_begin37 # DW_AT_low_pc + .long .Lfunc_end37-.Lfunc_begin37 # DW_AT_high_pc + .byte 1 # DW_AT_frame_base + .byte 86 + .long .Linfo_string214 # DW_AT_linkage_name + .long .Linfo_string215 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 34 # DW_AT_decl_line + # DW_AT_external + .byte 14 # Abbrev [14] 0xd1a:0x9 DW_TAG_template_type_parameter + .long 550 # DW_AT_type + .long .Linfo_string18 # DW_AT_name + .byte 29 # Abbrev [29] 0xd23:0xc DW_TAG_GNU_template_parameter_pack + .long .Linfo_string189 # DW_AT_name + .byte 34 # Abbrev [34] 0xd28:0x6 DW_TAG_template_value_parameter + .long 550 # DW_AT_type + .byte 0 # DW_AT_const_value + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 10 # Abbrev [10] 0xd30:0x35 DW_TAG_subprogram + .quad .Lfunc_begin38 # DW_AT_low_pc + .long .Lfunc_end38-.Lfunc_begin38 # DW_AT_high_pc + .byte 1 # DW_AT_frame_base + .byte 86 + .long .Linfo_string216 # DW_AT_linkage_name + .long .Linfo_string217 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 34 # DW_AT_decl_line + # DW_AT_external + .byte 14 # Abbrev [14] 0xd49:0x9 DW_TAG_template_type_parameter + .long 846 # DW_AT_type + .long .Linfo_string18 # DW_AT_name + .byte 29 # Abbrev [29] 0xd52:0x12 DW_TAG_GNU_template_parameter_pack + .long .Linfo_string189 # DW_AT_name + .byte 33 # Abbrev [33] 0xd57:0x6 DW_TAG_template_value_parameter + .long 846 # DW_AT_type + .byte 1 # DW_AT_const_value + .byte 33 # Abbrev [33] 0xd5d:0x6 DW_TAG_template_value_parameter + .long 846 # DW_AT_type + .byte 2 # DW_AT_const_value + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 10 # Abbrev [10] 0xd65:0x6d DW_TAG_subprogram + .quad .Lfunc_begin39 # DW_AT_low_pc + .long .Lfunc_end39-.Lfunc_begin39 # DW_AT_high_pc + .byte 1 # DW_AT_frame_base + .byte 86 + .long .Linfo_string219 # DW_AT_linkage_name + .long .Linfo_string220 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 34 # DW_AT_decl_line + # DW_AT_external + .byte 14 # Abbrev [14] 0xd7e:0x9 DW_TAG_template_type_parameter + .long 9416 # DW_AT_type + .long .Linfo_string18 # DW_AT_name + .byte 29 # Abbrev [29] 0xd87:0x4a DW_TAG_GNU_template_parameter_pack + .long .Linfo_string189 # DW_AT_name + .byte 34 # Abbrev [34] 0xd8c:0x6 DW_TAG_template_value_parameter + .long 9416 # DW_AT_type + .byte 0 # DW_AT_const_value + .byte 34 # Abbrev [34] 0xd92:0x6 DW_TAG_template_value_parameter + .long 9416 # DW_AT_type + .byte 1 # DW_AT_const_value + .byte 34 # Abbrev [34] 0xd98:0x6 DW_TAG_template_value_parameter + .long 9416 # DW_AT_type + .byte 6 # DW_AT_const_value + .byte 34 # Abbrev [34] 0xd9e:0x6 DW_TAG_template_value_parameter + .long 9416 # DW_AT_type + .byte 7 # DW_AT_const_value + .byte 34 # Abbrev [34] 0xda4:0x6 DW_TAG_template_value_parameter + .long 9416 # DW_AT_type + .byte 13 # DW_AT_const_value + .byte 34 # Abbrev [34] 0xdaa:0x6 DW_TAG_template_value_parameter + .long 9416 # DW_AT_type + .byte 14 # DW_AT_const_value + .byte 34 # Abbrev [34] 0xdb0:0x6 DW_TAG_template_value_parameter + .long 9416 # DW_AT_type + .byte 31 # DW_AT_const_value + .byte 34 # Abbrev [34] 0xdb6:0x6 DW_TAG_template_value_parameter + .long 9416 # DW_AT_type + .byte 32 # DW_AT_const_value + .byte 34 # Abbrev [34] 0xdbc:0x6 DW_TAG_template_value_parameter + .long 9416 # DW_AT_type + .byte 33 # DW_AT_const_value + .byte 34 # Abbrev [34] 0xdc2:0x7 DW_TAG_template_value_parameter + .long 9416 # DW_AT_type + .asciz "\377" # DW_AT_const_value + .byte 34 # Abbrev [34] 0xdc9:0x7 DW_TAG_template_value_parameter + .long 9416 # DW_AT_type + .ascii "\200\177" # DW_AT_const_value + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 10 # Abbrev [10] 0xdd2:0x3f DW_TAG_subprogram + .quad .Lfunc_begin40 # DW_AT_low_pc + .long .Lfunc_end40-.Lfunc_begin40 # DW_AT_high_pc + .byte 1 # DW_AT_frame_base + .byte 86 + .long .Linfo_string222 # DW_AT_linkage_name + .long .Linfo_string223 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 34 # DW_AT_decl_line + # DW_AT_external + .byte 14 # Abbrev [14] 0xdeb:0x9 DW_TAG_template_type_parameter + .long 9423 # DW_AT_type + .long .Linfo_string18 # DW_AT_name + .byte 29 # Abbrev [29] 0xdf4:0x1c DW_TAG_GNU_template_parameter_pack + .long .Linfo_string189 # DW_AT_name + .byte 37 # Abbrev [37] 0xdf9:0x16 DW_TAG_template_value_parameter + .long 9423 # DW_AT_type + .byte 16 # DW_AT_const_value + .byte 254 + .byte 255 + .byte 255 + .byte 255 + .byte 255 + .byte 255 + .byte 255 + .byte 255 + .byte 0 + .byte 0 + .byte 0 + .byte 0 + .byte 0 + .byte 0 + .byte 0 + .byte 0 + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 10 # Abbrev [10] 0xe11:0x29 DW_TAG_subprogram + .quad .Lfunc_begin41 # DW_AT_low_pc + .long .Lfunc_end41-.Lfunc_begin41 # DW_AT_high_pc + .byte 1 # DW_AT_frame_base + .byte 86 + .long .Linfo_string224 # DW_AT_linkage_name + .long .Linfo_string225 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 37 # DW_AT_decl_line + # DW_AT_external + .byte 14 # Abbrev [14] 0xe2a:0x9 DW_TAG_template_type_parameter + .long 236 # DW_AT_type + .long .Linfo_string18 # DW_AT_name + .byte 33 # Abbrev [33] 0xe33:0x6 DW_TAG_template_value_parameter + .long 236 # DW_AT_type + .byte 3 # DW_AT_const_value + .byte 0 # End Of Children Mark + .byte 10 # Abbrev [10] 0xe3a:0x41 DW_TAG_subprogram + .quad .Lfunc_begin42 # DW_AT_low_pc + .long .Lfunc_end42-.Lfunc_begin42 # DW_AT_high_pc + .byte 1 # DW_AT_frame_base + .byte 86 + .long .Linfo_string226 # DW_AT_linkage_name + .long .Linfo_string227 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 26 # DW_AT_decl_line + # DW_AT_external + .byte 24 # Abbrev [24] 0xe53:0xe DW_TAG_variable + .byte 2 # DW_AT_location + .byte 145 + .byte 120 + .long .Linfo_string361 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 27 # DW_AT_decl_line + .long 10874 # DW_AT_type + .byte 24 # Abbrev [24] 0xe61:0xe DW_TAG_variable + .byte 2 # DW_AT_location + .byte 145 + .byte 112 + .long .Linfo_string358 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 28 # DW_AT_decl_line + .long 10895 # DW_AT_type + .byte 29 # Abbrev [29] 0xe6f:0xb DW_TAG_GNU_template_parameter_pack + .long .Linfo_string84 # DW_AT_name + .byte 30 # Abbrev [30] 0xe74:0x5 DW_TAG_template_type_parameter + .long 250 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 10 # Abbrev [10] 0xe7b:0x41 DW_TAG_subprogram + .quad .Lfunc_begin43 # DW_AT_low_pc + .long .Lfunc_end43-.Lfunc_begin43 # DW_AT_high_pc + .byte 1 # DW_AT_frame_base + .byte 86 + .long .Linfo_string229 # DW_AT_linkage_name + .long .Linfo_string230 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 26 # DW_AT_decl_line + # DW_AT_external + .byte 24 # Abbrev [24] 0xe94:0xe DW_TAG_variable + .byte 2 # DW_AT_location + .byte 145 + .byte 120 + .long .Linfo_string361 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 27 # DW_AT_decl_line + .long 10917 # DW_AT_type + .byte 24 # Abbrev [24] 0xea2:0xe DW_TAG_variable + .byte 2 # DW_AT_location + .byte 145 + .byte 112 + .long .Linfo_string358 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 28 # DW_AT_decl_line + .long 10938 # DW_AT_type + .byte 29 # Abbrev [29] 0xeb0:0xb DW_TAG_GNU_template_parameter_pack + .long .Linfo_string84 # DW_AT_name + .byte 30 # Abbrev [30] 0xeb5:0x5 DW_TAG_template_type_parameter + .long 9430 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 35 # Abbrev [35] 0xebc:0x41 DW_TAG_subprogram + .quad .Lfunc_begin44 # DW_AT_low_pc + .long .Lfunc_end44-.Lfunc_begin44 # DW_AT_high_pc + .byte 1 # DW_AT_frame_base + .byte 86 + .long .Linfo_string231 # DW_AT_linkage_name + .long .Linfo_string232 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 26 # DW_AT_decl_line + .byte 24 # Abbrev [24] 0xed5:0xe DW_TAG_variable + .byte 2 # DW_AT_location + .byte 145 + .byte 120 + .long .Linfo_string361 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 27 # DW_AT_decl_line + .long 9663 # DW_AT_type + .byte 24 # Abbrev [24] 0xee3:0xe DW_TAG_variable + .byte 2 # DW_AT_location + .byte 145 + .byte 112 + .long .Linfo_string358 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 28 # DW_AT_decl_line + .long 10960 # DW_AT_type + .byte 29 # Abbrev [29] 0xef1:0xb DW_TAG_GNU_template_parameter_pack + .long .Linfo_string84 # DW_AT_name + .byte 30 # Abbrev [30] 0xef6:0x5 DW_TAG_template_type_parameter + .long 1238 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 10 # Abbrev [10] 0xefd:0x41 DW_TAG_subprogram + .quad .Lfunc_begin45 # DW_AT_low_pc + .long .Lfunc_end45-.Lfunc_begin45 # DW_AT_high_pc + .byte 1 # DW_AT_frame_base + .byte 86 + .long .Linfo_string233 # DW_AT_linkage_name + .long .Linfo_string234 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 26 # DW_AT_decl_line + # DW_AT_external + .byte 24 # Abbrev [24] 0xf16:0xe DW_TAG_variable + .byte 2 # DW_AT_location + .byte 145 + .byte 120 + .long .Linfo_string361 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 27 # DW_AT_decl_line + .long 10982 # DW_AT_type + .byte 24 # Abbrev [24] 0xf24:0xe DW_TAG_variable + .byte 2 # DW_AT_location + .byte 145 + .byte 112 + .long .Linfo_string358 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 28 # DW_AT_decl_line + .long 11003 # DW_AT_type + .byte 29 # Abbrev [29] 0xf32:0xb DW_TAG_GNU_template_parameter_pack + .long .Linfo_string84 # DW_AT_name + .byte 30 # Abbrev [30] 0xf37:0x5 DW_TAG_template_type_parameter + .long 9455 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 10 # Abbrev [10] 0xf3e:0x41 DW_TAG_subprogram + .quad .Lfunc_begin46 # DW_AT_low_pc + .long .Lfunc_end46-.Lfunc_begin46 # DW_AT_high_pc + .byte 1 # DW_AT_frame_base + .byte 86 + .long .Linfo_string235 # DW_AT_linkage_name + .long .Linfo_string236 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 26 # DW_AT_decl_line + # DW_AT_external + .byte 24 # Abbrev [24] 0xf57:0xe DW_TAG_variable + .byte 2 # DW_AT_location + .byte 145 + .byte 120 + .long .Linfo_string361 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 27 # DW_AT_decl_line + .long 11025 # DW_AT_type + .byte 24 # Abbrev [24] 0xf65:0xe DW_TAG_variable + .byte 2 # DW_AT_location + .byte 145 + .byte 112 + .long .Linfo_string358 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 28 # DW_AT_decl_line + .long 11046 # DW_AT_type + .byte 29 # Abbrev [29] 0xf73:0xb DW_TAG_GNU_template_parameter_pack + .long .Linfo_string84 # DW_AT_name + .byte 30 # Abbrev [30] 0xf78:0x5 DW_TAG_template_type_parameter + .long 9466 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 10 # Abbrev [10] 0xf7f:0x41 DW_TAG_subprogram + .quad .Lfunc_begin47 # DW_AT_low_pc + .long .Lfunc_end47-.Lfunc_begin47 # DW_AT_high_pc + .byte 1 # DW_AT_frame_base + .byte 86 + .long .Linfo_string237 # DW_AT_linkage_name + .long .Linfo_string238 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 26 # DW_AT_decl_line + # DW_AT_external + .byte 24 # Abbrev [24] 0xf98:0xe DW_TAG_variable + .byte 2 # DW_AT_location + .byte 145 + .byte 120 + .long .Linfo_string361 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 27 # DW_AT_decl_line + .long 11068 # DW_AT_type + .byte 24 # Abbrev [24] 0xfa6:0xe DW_TAG_variable + .byte 2 # DW_AT_location + .byte 145 + .byte 112 + .long .Linfo_string358 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 28 # DW_AT_decl_line + .long 11089 # DW_AT_type + .byte 29 # Abbrev [29] 0xfb4:0xb DW_TAG_GNU_template_parameter_pack + .long .Linfo_string84 # DW_AT_name + .byte 30 # Abbrev [30] 0xfb9:0x5 DW_TAG_template_type_parameter + .long 9471 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 35 # Abbrev [35] 0xfc0:0x41 DW_TAG_subprogram + .quad .Lfunc_begin48 # DW_AT_low_pc + .long .Lfunc_end48-.Lfunc_begin48 # DW_AT_high_pc + .byte 1 # DW_AT_frame_base + .byte 86 + .long .Linfo_string240 # DW_AT_linkage_name + .long .Linfo_string241 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 26 # DW_AT_decl_line + .byte 24 # Abbrev [24] 0xfd9:0xe DW_TAG_variable + .byte 2 # DW_AT_location + .byte 145 + .byte 120 + .long .Linfo_string361 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 27 # DW_AT_decl_line + .long 11111 # DW_AT_type + .byte 24 # Abbrev [24] 0xfe7:0xe DW_TAG_variable + .byte 2 # DW_AT_location + .byte 145 + .byte 112 + .long .Linfo_string358 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 28 # DW_AT_decl_line + .long 11132 # DW_AT_type + .byte 29 # Abbrev [29] 0xff5:0xb DW_TAG_GNU_template_parameter_pack + .long .Linfo_string84 # DW_AT_name + .byte 30 # Abbrev [30] 0xffa:0x5 DW_TAG_template_type_parameter + .long 9482 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 10 # Abbrev [10] 0x1001:0x41 DW_TAG_subprogram + .quad .Lfunc_begin49 # DW_AT_low_pc + .long .Lfunc_end49-.Lfunc_begin49 # DW_AT_high_pc + .byte 1 # DW_AT_frame_base + .byte 86 + .long .Linfo_string243 # DW_AT_linkage_name + .long .Linfo_string244 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 26 # DW_AT_decl_line + # DW_AT_external + .byte 24 # Abbrev [24] 0x101a:0xe DW_TAG_variable + .byte 2 # DW_AT_location + .byte 145 + .byte 120 + .long .Linfo_string361 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 27 # DW_AT_decl_line + .long 11154 # DW_AT_type + .byte 24 # Abbrev [24] 0x1028:0xe DW_TAG_variable + .byte 2 # DW_AT_location + .byte 145 + .byte 112 + .long .Linfo_string358 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 28 # DW_AT_decl_line + .long 11175 # DW_AT_type + .byte 29 # Abbrev [29] 0x1036:0xb DW_TAG_GNU_template_parameter_pack + .long .Linfo_string84 # DW_AT_name + .byte 30 # Abbrev [30] 0x103b:0x5 DW_TAG_template_type_parameter + .long 9488 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 10 # Abbrev [10] 0x1042:0x46 DW_TAG_subprogram + .quad .Lfunc_begin50 # DW_AT_low_pc + .long .Lfunc_end50-.Lfunc_begin50 # DW_AT_high_pc + .byte 1 # DW_AT_frame_base + .byte 86 + .long .Linfo_string245 # DW_AT_linkage_name + .long .Linfo_string246 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 26 # DW_AT_decl_line + # DW_AT_external + .byte 24 # Abbrev [24] 0x105b:0xe DW_TAG_variable + .byte 2 # DW_AT_location + .byte 145 + .byte 120 + .long .Linfo_string361 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 27 # DW_AT_decl_line + .long 11197 # DW_AT_type + .byte 24 # Abbrev [24] 0x1069:0xe DW_TAG_variable + .byte 2 # DW_AT_location + .byte 145 + .byte 112 + .long .Linfo_string358 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 28 # DW_AT_decl_line + .long 11223 # DW_AT_type + .byte 29 # Abbrev [29] 0x1077:0x10 DW_TAG_GNU_template_parameter_pack + .long .Linfo_string84 # DW_AT_name + .byte 30 # Abbrev [30] 0x107c:0x5 DW_TAG_template_type_parameter + .long 9493 # DW_AT_type + .byte 30 # Abbrev [30] 0x1081:0x5 DW_TAG_template_type_parameter + .long 9493 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 10 # Abbrev [10] 0x1088:0x46 DW_TAG_subprogram + .quad .Lfunc_begin51 # DW_AT_low_pc + .long .Lfunc_end51-.Lfunc_begin51 # DW_AT_high_pc + .byte 1 # DW_AT_frame_base + .byte 86 + .long .Linfo_string247 # DW_AT_linkage_name + .long .Linfo_string248 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 26 # DW_AT_decl_line + # DW_AT_external + .byte 24 # Abbrev [24] 0x10a1:0xe DW_TAG_variable + .byte 2 # DW_AT_location + .byte 145 + .byte 120 + .long .Linfo_string361 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 27 # DW_AT_decl_line + .long 11250 # DW_AT_type + .byte 24 # Abbrev [24] 0x10af:0xe DW_TAG_variable + .byte 2 # DW_AT_location + .byte 145 + .byte 112 + .long .Linfo_string358 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 28 # DW_AT_decl_line + .long 11276 # DW_AT_type + .byte 29 # Abbrev [29] 0x10bd:0x10 DW_TAG_GNU_template_parameter_pack + .long .Linfo_string84 # DW_AT_name + .byte 30 # Abbrev [30] 0x10c2:0x5 DW_TAG_template_type_parameter + .long 9493 # DW_AT_type + .byte 30 # Abbrev [30] 0x10c7:0x5 DW_TAG_template_type_parameter + .long 9498 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 10 # Abbrev [10] 0x10ce:0x41 DW_TAG_subprogram + .quad .Lfunc_begin52 # DW_AT_low_pc + .long .Lfunc_end52-.Lfunc_begin52 # DW_AT_high_pc + .byte 1 # DW_AT_frame_base + .byte 86 + .long .Linfo_string249 # DW_AT_linkage_name + .long .Linfo_string250 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 26 # DW_AT_decl_line + # DW_AT_external + .byte 24 # Abbrev [24] 0x10e7:0xe DW_TAG_variable + .byte 2 # DW_AT_location + .byte 145 + .byte 120 + .long .Linfo_string361 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 27 # DW_AT_decl_line + .long 11303 # DW_AT_type + .byte 24 # Abbrev [24] 0x10f5:0xe DW_TAG_variable + .byte 2 # DW_AT_location + .byte 145 + .byte 112 + .long .Linfo_string358 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 28 # DW_AT_decl_line + .long 11324 # DW_AT_type + .byte 29 # Abbrev [29] 0x1103:0xb DW_TAG_GNU_template_parameter_pack + .long .Linfo_string84 # DW_AT_name + .byte 30 # Abbrev [30] 0x1108:0x5 DW_TAG_template_type_parameter + .long 9503 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 10 # Abbrev [10] 0x110f:0x41 DW_TAG_subprogram + .quad .Lfunc_begin53 # DW_AT_low_pc + .long .Lfunc_end53-.Lfunc_begin53 # DW_AT_high_pc + .byte 1 # DW_AT_frame_base + .byte 86 + .long .Linfo_string251 # DW_AT_linkage_name + .long .Linfo_string252 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 26 # DW_AT_decl_line + # DW_AT_external + .byte 24 # Abbrev [24] 0x1128:0xe DW_TAG_variable + .byte 2 # DW_AT_location + .byte 145 + .byte 120 + .long .Linfo_string361 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 27 # DW_AT_decl_line + .long 11346 # DW_AT_type + .byte 24 # Abbrev [24] 0x1136:0xe DW_TAG_variable + .byte 2 # DW_AT_location + .byte 145 + .byte 112 + .long .Linfo_string358 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 28 # DW_AT_decl_line + .long 11367 # DW_AT_type + .byte 29 # Abbrev [29] 0x1144:0xb DW_TAG_GNU_template_parameter_pack + .long .Linfo_string84 # DW_AT_name + .byte 30 # Abbrev [30] 0x1149:0x5 DW_TAG_template_type_parameter + .long 9508 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 10 # Abbrev [10] 0x1150:0x41 DW_TAG_subprogram + .quad .Lfunc_begin54 # DW_AT_low_pc + .long .Lfunc_end54-.Lfunc_begin54 # DW_AT_high_pc + .byte 1 # DW_AT_frame_base + .byte 86 + .long .Linfo_string253 # DW_AT_linkage_name + .long .Linfo_string254 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 26 # DW_AT_decl_line + # DW_AT_external + .byte 24 # Abbrev [24] 0x1169:0xe DW_TAG_variable + .byte 2 # DW_AT_location + .byte 145 + .byte 120 + .long .Linfo_string361 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 27 # DW_AT_decl_line + .long 11389 # DW_AT_type + .byte 24 # Abbrev [24] 0x1177:0xe DW_TAG_variable + .byte 2 # DW_AT_location + .byte 145 + .byte 112 + .long .Linfo_string358 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 28 # DW_AT_decl_line + .long 11410 # DW_AT_type + .byte 29 # Abbrev [29] 0x1185:0xb DW_TAG_GNU_template_parameter_pack + .long .Linfo_string84 # DW_AT_name + .byte 30 # Abbrev [30] 0x118a:0x5 DW_TAG_template_type_parameter + .long 9524 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 10 # Abbrev [10] 0x1191:0x41 DW_TAG_subprogram + .quad .Lfunc_begin55 # DW_AT_low_pc + .long .Lfunc_end55-.Lfunc_begin55 # DW_AT_high_pc + .byte 1 # DW_AT_frame_base + .byte 86 + .long .Linfo_string255 # DW_AT_linkage_name + .long .Linfo_string256 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 26 # DW_AT_decl_line + # DW_AT_external + .byte 24 # Abbrev [24] 0x11aa:0xe DW_TAG_variable + .byte 2 # DW_AT_location + .byte 145 + .byte 120 + .long .Linfo_string361 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 27 # DW_AT_decl_line + .long 11432 # DW_AT_type + .byte 24 # Abbrev [24] 0x11b8:0xe DW_TAG_variable + .byte 2 # DW_AT_location + .byte 145 + .byte 112 + .long .Linfo_string358 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 28 # DW_AT_decl_line + .long 11453 # DW_AT_type + .byte 29 # Abbrev [29] 0x11c6:0xb DW_TAG_GNU_template_parameter_pack + .long .Linfo_string84 # DW_AT_name + .byte 30 # Abbrev [30] 0x11cb:0x5 DW_TAG_template_type_parameter + .long 9525 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 35 # Abbrev [35] 0x11d2:0x41 DW_TAG_subprogram + .quad .Lfunc_begin56 # DW_AT_low_pc + .long .Lfunc_end56-.Lfunc_begin56 # DW_AT_high_pc + .byte 1 # DW_AT_frame_base + .byte 86 + .long .Linfo_string257 # DW_AT_linkage_name + .long .Linfo_string258 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 26 # DW_AT_decl_line + .byte 24 # Abbrev [24] 0x11eb:0xe DW_TAG_variable + .byte 2 # DW_AT_location + .byte 145 + .byte 120 + .long .Linfo_string361 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 27 # DW_AT_decl_line + .long 11475 # DW_AT_type + .byte 24 # Abbrev [24] 0x11f9:0xe DW_TAG_variable + .byte 2 # DW_AT_location + .byte 145 + .byte 112 + .long .Linfo_string358 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 28 # DW_AT_decl_line + .long 11496 # DW_AT_type + .byte 29 # Abbrev [29] 0x1207:0xb DW_TAG_GNU_template_parameter_pack + .long .Linfo_string84 # DW_AT_name + .byte 30 # Abbrev [30] 0x120c:0x5 DW_TAG_template_type_parameter + .long 9530 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 35 # Abbrev [35] 0x1213:0x41 DW_TAG_subprogram + .quad .Lfunc_begin57 # DW_AT_low_pc + .long .Lfunc_end57-.Lfunc_begin57 # DW_AT_high_pc + .byte 1 # DW_AT_frame_base + .byte 86 + .long .Linfo_string259 # DW_AT_linkage_name + .long .Linfo_string260 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 26 # DW_AT_decl_line + .byte 24 # Abbrev [24] 0x122c:0xe DW_TAG_variable + .byte 2 # DW_AT_location + .byte 145 + .byte 120 + .long .Linfo_string361 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 27 # DW_AT_decl_line + .long 11518 # DW_AT_type + .byte 24 # Abbrev [24] 0x123a:0xe DW_TAG_variable + .byte 2 # DW_AT_location + .byte 145 + .byte 112 + .long .Linfo_string358 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 28 # DW_AT_decl_line + .long 11539 # DW_AT_type + .byte 29 # Abbrev [29] 0x1248:0xb DW_TAG_GNU_template_parameter_pack + .long .Linfo_string84 # DW_AT_name + .byte 30 # Abbrev [30] 0x124d:0x5 DW_TAG_template_type_parameter + .long 1243 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 35 # Abbrev [35] 0x1254:0x41 DW_TAG_subprogram + .quad .Lfunc_begin58 # DW_AT_low_pc + .long .Lfunc_end58-.Lfunc_begin58 # DW_AT_high_pc + .byte 1 # DW_AT_frame_base + .byte 86 + .long .Linfo_string261 # DW_AT_linkage_name + .long .Linfo_string262 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 26 # DW_AT_decl_line + .byte 24 # Abbrev [24] 0x126d:0xe DW_TAG_variable + .byte 2 # DW_AT_location + .byte 145 + .byte 120 + .long .Linfo_string361 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 27 # DW_AT_decl_line + .long 11561 # DW_AT_type + .byte 24 # Abbrev [24] 0x127b:0xe DW_TAG_variable + .byte 2 # DW_AT_location + .byte 145 + .byte 112 + .long .Linfo_string358 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 28 # DW_AT_decl_line + .long 11582 # DW_AT_type + .byte 29 # Abbrev [29] 0x1289:0xb DW_TAG_GNU_template_parameter_pack + .long .Linfo_string84 # DW_AT_name + .byte 30 # Abbrev [30] 0x128e:0x5 DW_TAG_template_type_parameter + .long 9535 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 10 # Abbrev [10] 0x1295:0x2e DW_TAG_subprogram + .quad .Lfunc_begin59 # DW_AT_low_pc + .long .Lfunc_end59-.Lfunc_begin59 # DW_AT_high_pc + .byte 1 # DW_AT_frame_base + .byte 86 + .long .Linfo_string265 # DW_AT_linkage_name + .long .Linfo_string266 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 54 # DW_AT_decl_line + # DW_AT_external + .byte 29 # Abbrev [29] 0x12ae:0xb DW_TAG_GNU_template_parameter_pack + .long .Linfo_string263 # DW_AT_name + .byte 30 # Abbrev [30] 0x12b3:0x5 DW_TAG_template_type_parameter + .long 9360 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 14 # Abbrev [14] 0x12b9:0x9 DW_TAG_template_type_parameter + .long 63 # DW_AT_type + .long .Linfo_string264 # DW_AT_name + .byte 0 # End Of Children Mark + .byte 10 # Abbrev [10] 0x12c3:0x28 DW_TAG_subprogram + .quad .Lfunc_begin60 # DW_AT_low_pc + .long .Lfunc_end60-.Lfunc_begin60 # DW_AT_high_pc + .byte 1 # DW_AT_frame_base + .byte 86 + .long .Linfo_string267 # DW_AT_linkage_name + .long .Linfo_string268 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 54 # DW_AT_decl_line + # DW_AT_external + .byte 38 # Abbrev [38] 0x12dc:0x5 DW_TAG_GNU_template_parameter_pack + .long .Linfo_string263 # DW_AT_name + .byte 14 # Abbrev [14] 0x12e1:0x9 DW_TAG_template_type_parameter + .long 63 # DW_AT_type + .long .Linfo_string264 # DW_AT_name + .byte 0 # End Of Children Mark + .byte 10 # Abbrev [10] 0x12eb:0x28 DW_TAG_subprogram + .quad .Lfunc_begin61 # DW_AT_low_pc + .long .Lfunc_end61-.Lfunc_begin61 # DW_AT_high_pc + .byte 1 # DW_AT_frame_base + .byte 86 + .long .Linfo_string269 # DW_AT_linkage_name + .long .Linfo_string270 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 56 # DW_AT_decl_line + # DW_AT_external + .byte 14 # Abbrev [14] 0x1304:0x9 DW_TAG_template_type_parameter + .long 9360 # DW_AT_type + .long .Linfo_string263 # DW_AT_name + .byte 38 # Abbrev [38] 0x130d:0x5 DW_TAG_GNU_template_parameter_pack + .long .Linfo_string264 # DW_AT_name + .byte 0 # End Of Children Mark + .byte 10 # Abbrev [10] 0x1313:0x3b DW_TAG_subprogram + .quad .Lfunc_begin62 # DW_AT_low_pc + .long .Lfunc_end62-.Lfunc_begin62 # DW_AT_high_pc + .byte 1 # DW_AT_frame_base + .byte 86 + .long .Linfo_string271 # DW_AT_linkage_name + .long .Linfo_string272 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 26 # DW_AT_decl_line + # DW_AT_external + .byte 24 # Abbrev [24] 0x132c:0xe DW_TAG_variable + .byte 2 # DW_AT_location + .byte 145 + .byte 120 + .long .Linfo_string361 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 27 # DW_AT_decl_line + .long 11604 # DW_AT_type + .byte 24 # Abbrev [24] 0x133a:0xe DW_TAG_variable + .byte 2 # DW_AT_location + .byte 145 + .byte 112 + .long .Linfo_string358 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 28 # DW_AT_decl_line + .long 11619 # DW_AT_type + .byte 38 # Abbrev [38] 0x1348:0x5 DW_TAG_GNU_template_parameter_pack + .long .Linfo_string84 # DW_AT_name + .byte 0 # End Of Children Mark + .byte 10 # Abbrev [10] 0x134e:0x46 DW_TAG_subprogram + .quad .Lfunc_begin63 # DW_AT_low_pc + .long .Lfunc_end63-.Lfunc_begin63 # DW_AT_high_pc + .byte 1 # DW_AT_frame_base + .byte 86 + .long .Linfo_string273 # DW_AT_linkage_name + .long .Linfo_string274 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 26 # DW_AT_decl_line + # DW_AT_external + .byte 24 # Abbrev [24] 0x1367:0xe DW_TAG_variable + .byte 2 # DW_AT_location + .byte 145 + .byte 120 + .long .Linfo_string361 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 27 # DW_AT_decl_line + .long 11635 # DW_AT_type + .byte 24 # Abbrev [24] 0x1375:0xe DW_TAG_variable + .byte 2 # DW_AT_location + .byte 145 + .byte 112 + .long .Linfo_string358 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 28 # DW_AT_decl_line + .long 11661 # DW_AT_type + .byte 29 # Abbrev [29] 0x1383:0x10 DW_TAG_GNU_template_parameter_pack + .long .Linfo_string84 # DW_AT_name + .byte 30 # Abbrev [30] 0x1388:0x5 DW_TAG_template_type_parameter + .long 9518 # DW_AT_type + .byte 30 # Abbrev [30] 0x138d:0x5 DW_TAG_template_type_parameter + .long 9518 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 10 # Abbrev [10] 0x1394:0x41 DW_TAG_subprogram + .quad .Lfunc_begin64 # DW_AT_low_pc + .long .Lfunc_end64-.Lfunc_begin64 # DW_AT_high_pc + .byte 1 # DW_AT_frame_base + .byte 86 + .long .Linfo_string276 # DW_AT_linkage_name + .long .Linfo_string277 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 26 # DW_AT_decl_line + # DW_AT_external + .byte 24 # Abbrev [24] 0x13ad:0xe DW_TAG_variable + .byte 2 # DW_AT_location + .byte 145 + .byte 120 + .long .Linfo_string361 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 27 # DW_AT_decl_line + .long 11688 # DW_AT_type + .byte 24 # Abbrev [24] 0x13bb:0xe DW_TAG_variable + .byte 2 # DW_AT_location + .byte 145 + .byte 112 + .long .Linfo_string358 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 28 # DW_AT_decl_line + .long 11709 # DW_AT_type + .byte 29 # Abbrev [29] 0x13c9:0xb DW_TAG_GNU_template_parameter_pack + .long .Linfo_string84 # DW_AT_name + .byte 30 # Abbrev [30] 0x13ce:0x5 DW_TAG_template_type_parameter + .long 9540 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 10 # Abbrev [10] 0x13d5:0x41 DW_TAG_subprogram + .quad .Lfunc_begin65 # DW_AT_low_pc + .long .Lfunc_end65-.Lfunc_begin65 # DW_AT_high_pc + .byte 1 # DW_AT_frame_base + .byte 86 + .long .Linfo_string279 # DW_AT_linkage_name + .long .Linfo_string280 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 26 # DW_AT_decl_line + # DW_AT_external + .byte 24 # Abbrev [24] 0x13ee:0xe DW_TAG_variable + .byte 2 # DW_AT_location + .byte 145 + .byte 120 + .long .Linfo_string361 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 27 # DW_AT_decl_line + .long 11731 # DW_AT_type + .byte 24 # Abbrev [24] 0x13fc:0xe DW_TAG_variable + .byte 2 # DW_AT_location + .byte 145 + .byte 112 + .long .Linfo_string358 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 28 # DW_AT_decl_line + .long 11752 # DW_AT_type + .byte 29 # Abbrev [29] 0x140a:0xb DW_TAG_GNU_template_parameter_pack + .long .Linfo_string84 # DW_AT_name + .byte 30 # Abbrev [30] 0x140f:0x5 DW_TAG_template_type_parameter + .long 9566 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 13 # Abbrev [13] 0x1416:0x2e9 DW_TAG_structure_type + .byte 5 # DW_AT_calling_convention + .long .Linfo_string75 # DW_AT_name + .byte 1 # DW_AT_byte_size + .byte 1 # DW_AT_decl_file + .byte 57 # DW_AT_decl_line + .byte 39 # Abbrev [39] 0x141f:0x1f DW_TAG_subprogram + .long .Linfo_string76 # DW_AT_linkage_name + .long .Linfo_string77 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 59 # DW_AT_decl_line + # DW_AT_declaration + # DW_AT_external + .byte 14 # Abbrev [14] 0x142a:0x9 DW_TAG_template_type_parameter + .long 63 # DW_AT_type + .long .Linfo_string18 # DW_AT_name + .byte 18 # Abbrev [18] 0x1433:0x5 DW_TAG_formal_parameter + .long 5887 # DW_AT_type + # DW_AT_artificial + .byte 40 # Abbrev [40] 0x1438:0x5 DW_TAG_formal_parameter + .long 63 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 39 # Abbrev [39] 0x143e:0x1f DW_TAG_subprogram + .long .Linfo_string78 # DW_AT_linkage_name + .long .Linfo_string79 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 62 # DW_AT_decl_line + # DW_AT_declaration + # DW_AT_external + .byte 14 # Abbrev [14] 0x1449:0x9 DW_TAG_template_type_parameter + .long 63 # DW_AT_type + .long .Linfo_string18 # DW_AT_name + .byte 18 # Abbrev [18] 0x1452:0x5 DW_TAG_formal_parameter + .long 5887 # DW_AT_type + # DW_AT_artificial + .byte 40 # Abbrev [40] 0x1457:0x5 DW_TAG_formal_parameter + .long 63 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 39 # Abbrev [39] 0x145d:0x1f DW_TAG_subprogram + .long .Linfo_string80 # DW_AT_linkage_name + .long .Linfo_string81 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 65 # DW_AT_decl_line + # DW_AT_declaration + # DW_AT_external + .byte 14 # Abbrev [14] 0x1468:0x9 DW_TAG_template_type_parameter + .long 63 # DW_AT_type + .long .Linfo_string18 # DW_AT_name + .byte 18 # Abbrev [18] 0x1471:0x5 DW_TAG_formal_parameter + .long 5887 # DW_AT_type + # DW_AT_artificial + .byte 40 # Abbrev [40] 0x1476:0x5 DW_TAG_formal_parameter + .long 63 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 41 # Abbrev [41] 0x147c:0x1e DW_TAG_subprogram + .long .Linfo_string82 # DW_AT_linkage_name + .long .Linfo_string83 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 68 # DW_AT_decl_line + .long 6057 # DW_AT_type + # DW_AT_declaration + # DW_AT_external + .byte 14 # Abbrev [14] 0x148b:0x9 DW_TAG_template_type_parameter + .long 63 # DW_AT_type + .long .Linfo_string18 # DW_AT_name + .byte 18 # Abbrev [18] 0x1494:0x5 DW_TAG_formal_parameter + .long 5887 # DW_AT_type + # DW_AT_artificial + .byte 0 # End Of Children Mark + .byte 39 # Abbrev [39] 0x149a:0x1f DW_TAG_subprogram + .long .Linfo_string87 # DW_AT_linkage_name + .long .Linfo_string88 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 72 # DW_AT_decl_line + # DW_AT_declaration + # DW_AT_external + .byte 14 # Abbrev [14] 0x14a5:0x9 DW_TAG_template_type_parameter + .long 63 # DW_AT_type + .long .Linfo_string18 # DW_AT_name + .byte 18 # Abbrev [18] 0x14ae:0x5 DW_TAG_formal_parameter + .long 5887 # DW_AT_type + # DW_AT_artificial + .byte 40 # Abbrev [40] 0x14b3:0x5 DW_TAG_formal_parameter + .long 63 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 39 # Abbrev [39] 0x14b9:0x1f DW_TAG_subprogram + .long .Linfo_string89 # DW_AT_linkage_name + .long .Linfo_string90 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 75 # DW_AT_decl_line + # DW_AT_declaration + # DW_AT_external + .byte 14 # Abbrev [14] 0x14c4:0x9 DW_TAG_template_type_parameter + .long 63 # DW_AT_type + .long .Linfo_string18 # DW_AT_name + .byte 18 # Abbrev [18] 0x14cd:0x5 DW_TAG_formal_parameter + .long 5887 # DW_AT_type + # DW_AT_artificial + .byte 40 # Abbrev [40] 0x14d2:0x5 DW_TAG_formal_parameter + .long 63 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 39 # Abbrev [39] 0x14d8:0x1f DW_TAG_subprogram + .long .Linfo_string91 # DW_AT_linkage_name + .long .Linfo_string92 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 78 # DW_AT_decl_line + # DW_AT_declaration + # DW_AT_external + .byte 14 # Abbrev [14] 0x14e3:0x9 DW_TAG_template_type_parameter + .long 63 # DW_AT_type + .long .Linfo_string18 # DW_AT_name + .byte 18 # Abbrev [18] 0x14ec:0x5 DW_TAG_formal_parameter + .long 5887 # DW_AT_type + # DW_AT_artificial + .byte 40 # Abbrev [40] 0x14f1:0x5 DW_TAG_formal_parameter + .long 63 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 39 # Abbrev [39] 0x14f7:0x1f DW_TAG_subprogram + .long .Linfo_string93 # DW_AT_linkage_name + .long .Linfo_string94 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 81 # DW_AT_decl_line + # DW_AT_declaration + # DW_AT_external + .byte 14 # Abbrev [14] 0x1502:0x9 DW_TAG_template_type_parameter + .long 63 # DW_AT_type + .long .Linfo_string18 # DW_AT_name + .byte 18 # Abbrev [18] 0x150b:0x5 DW_TAG_formal_parameter + .long 5887 # DW_AT_type + # DW_AT_artificial + .byte 40 # Abbrev [40] 0x1510:0x5 DW_TAG_formal_parameter + .long 63 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 39 # Abbrev [39] 0x1516:0x1f DW_TAG_subprogram + .long .Linfo_string95 # DW_AT_linkage_name + .long .Linfo_string96 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 84 # DW_AT_decl_line + # DW_AT_declaration + # DW_AT_external + .byte 14 # Abbrev [14] 0x1521:0x9 DW_TAG_template_type_parameter + .long 63 # DW_AT_type + .long .Linfo_string18 # DW_AT_name + .byte 18 # Abbrev [18] 0x152a:0x5 DW_TAG_formal_parameter + .long 5887 # DW_AT_type + # DW_AT_artificial + .byte 40 # Abbrev [40] 0x152f:0x5 DW_TAG_formal_parameter + .long 63 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 39 # Abbrev [39] 0x1535:0x1f DW_TAG_subprogram + .long .Linfo_string97 # DW_AT_linkage_name + .long .Linfo_string98 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 87 # DW_AT_decl_line + # DW_AT_declaration + # DW_AT_external + .byte 14 # Abbrev [14] 0x1540:0x9 DW_TAG_template_type_parameter + .long 63 # DW_AT_type + .long .Linfo_string18 # DW_AT_name + .byte 18 # Abbrev [18] 0x1549:0x5 DW_TAG_formal_parameter + .long 5887 # DW_AT_type + # DW_AT_artificial + .byte 40 # Abbrev [40] 0x154e:0x5 DW_TAG_formal_parameter + .long 63 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 39 # Abbrev [39] 0x1554:0x1f DW_TAG_subprogram + .long .Linfo_string99 # DW_AT_linkage_name + .long .Linfo_string100 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 90 # DW_AT_decl_line + # DW_AT_declaration + # DW_AT_external + .byte 14 # Abbrev [14] 0x155f:0x9 DW_TAG_template_type_parameter + .long 63 # DW_AT_type + .long .Linfo_string18 # DW_AT_name + .byte 18 # Abbrev [18] 0x1568:0x5 DW_TAG_formal_parameter + .long 5887 # DW_AT_type + # DW_AT_artificial + .byte 40 # Abbrev [40] 0x156d:0x5 DW_TAG_formal_parameter + .long 63 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 39 # Abbrev [39] 0x1573:0x1a DW_TAG_subprogram + .long .Linfo_string101 # DW_AT_linkage_name + .long .Linfo_string102 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 93 # DW_AT_decl_line + # DW_AT_declaration + # DW_AT_external + .byte 14 # Abbrev [14] 0x157e:0x9 DW_TAG_template_type_parameter + .long 63 # DW_AT_type + .long .Linfo_string18 # DW_AT_name + .byte 18 # Abbrev [18] 0x1587:0x5 DW_TAG_formal_parameter + .long 5887 # DW_AT_type + # DW_AT_artificial + .byte 0 # End Of Children Mark + .byte 39 # Abbrev [39] 0x158d:0x1a DW_TAG_subprogram + .long .Linfo_string103 # DW_AT_linkage_name + .long .Linfo_string104 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 96 # DW_AT_decl_line + # DW_AT_declaration + # DW_AT_external + .byte 14 # Abbrev [14] 0x1598:0x9 DW_TAG_template_type_parameter + .long 63 # DW_AT_type + .long .Linfo_string18 # DW_AT_name + .byte 18 # Abbrev [18] 0x15a1:0x5 DW_TAG_formal_parameter + .long 5887 # DW_AT_type + # DW_AT_artificial + .byte 0 # End Of Children Mark + .byte 39 # Abbrev [39] 0x15a7:0x1f DW_TAG_subprogram + .long .Linfo_string105 # DW_AT_linkage_name + .long .Linfo_string106 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 99 # DW_AT_decl_line + # DW_AT_declaration + # DW_AT_external + .byte 14 # Abbrev [14] 0x15b2:0x9 DW_TAG_template_type_parameter + .long 63 # DW_AT_type + .long .Linfo_string18 # DW_AT_name + .byte 18 # Abbrev [18] 0x15bb:0x5 DW_TAG_formal_parameter + .long 5887 # DW_AT_type + # DW_AT_artificial + .byte 40 # Abbrev [40] 0x15c0:0x5 DW_TAG_formal_parameter + .long 63 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 39 # Abbrev [39] 0x15c6:0x1f DW_TAG_subprogram + .long .Linfo_string107 # DW_AT_linkage_name + .long .Linfo_string108 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 102 # DW_AT_decl_line + # DW_AT_declaration + # DW_AT_external + .byte 14 # Abbrev [14] 0x15d1:0x9 DW_TAG_template_type_parameter + .long 63 # DW_AT_type + .long .Linfo_string18 # DW_AT_name + .byte 18 # Abbrev [18] 0x15da:0x5 DW_TAG_formal_parameter + .long 5887 # DW_AT_type + # DW_AT_artificial + .byte 40 # Abbrev [40] 0x15df:0x5 DW_TAG_formal_parameter + .long 63 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 39 # Abbrev [39] 0x15e5:0x1f DW_TAG_subprogram + .long .Linfo_string109 # DW_AT_linkage_name + .long .Linfo_string110 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 105 # DW_AT_decl_line + # DW_AT_declaration + # DW_AT_external + .byte 14 # Abbrev [14] 0x15f0:0x9 DW_TAG_template_type_parameter + .long 63 # DW_AT_type + .long .Linfo_string18 # DW_AT_name + .byte 18 # Abbrev [18] 0x15f9:0x5 DW_TAG_formal_parameter + .long 5887 # DW_AT_type + # DW_AT_artificial + .byte 40 # Abbrev [40] 0x15fe:0x5 DW_TAG_formal_parameter + .long 63 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 39 # Abbrev [39] 0x1604:0x1a DW_TAG_subprogram + .long .Linfo_string111 # DW_AT_linkage_name + .long .Linfo_string112 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 108 # DW_AT_decl_line + # DW_AT_declaration + # DW_AT_external + .byte 14 # Abbrev [14] 0x160f:0x9 DW_TAG_template_type_parameter + .long 63 # DW_AT_type + .long .Linfo_string18 # DW_AT_name + .byte 18 # Abbrev [18] 0x1618:0x5 DW_TAG_formal_parameter + .long 5887 # DW_AT_type + # DW_AT_artificial + .byte 0 # End Of Children Mark + .byte 39 # Abbrev [39] 0x161e:0x1f DW_TAG_subprogram + .long .Linfo_string113 # DW_AT_linkage_name + .long .Linfo_string114 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 111 # DW_AT_decl_line + # DW_AT_declaration + # DW_AT_external + .byte 14 # Abbrev [14] 0x1629:0x9 DW_TAG_template_type_parameter + .long 63 # DW_AT_type + .long .Linfo_string18 # DW_AT_name + .byte 18 # Abbrev [18] 0x1632:0x5 DW_TAG_formal_parameter + .long 5887 # DW_AT_type + # DW_AT_artificial + .byte 40 # Abbrev [40] 0x1637:0x5 DW_TAG_formal_parameter + .long 63 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 39 # Abbrev [39] 0x163d:0x1f DW_TAG_subprogram + .long .Linfo_string115 # DW_AT_linkage_name + .long .Linfo_string116 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 114 # DW_AT_decl_line + # DW_AT_declaration + # DW_AT_external + .byte 14 # Abbrev [14] 0x1648:0x9 DW_TAG_template_type_parameter + .long 63 # DW_AT_type + .long .Linfo_string18 # DW_AT_name + .byte 18 # Abbrev [18] 0x1651:0x5 DW_TAG_formal_parameter + .long 5887 # DW_AT_type + # DW_AT_artificial + .byte 40 # Abbrev [40] 0x1656:0x5 DW_TAG_formal_parameter + .long 63 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 41 # Abbrev [41] 0x165c:0x23 DW_TAG_subprogram + .long .Linfo_string117 # DW_AT_linkage_name + .long .Linfo_string118 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 117 # DW_AT_decl_line + .long 6930 # DW_AT_type + # DW_AT_declaration + # DW_AT_external + .byte 14 # Abbrev [14] 0x166b:0x9 DW_TAG_template_type_parameter + .long 63 # DW_AT_type + .long .Linfo_string18 # DW_AT_name + .byte 40 # Abbrev [40] 0x1674:0x5 DW_TAG_formal_parameter + .long 515 # DW_AT_type + .byte 40 # Abbrev [40] 0x1679:0x5 DW_TAG_formal_parameter + .long 63 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 41 # Abbrev [41] 0x167f:0x23 DW_TAG_subprogram + .long .Linfo_string120 # DW_AT_linkage_name + .long .Linfo_string121 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 124 # DW_AT_decl_line + .long 6930 # DW_AT_type + # DW_AT_declaration + # DW_AT_external + .byte 14 # Abbrev [14] 0x168e:0x9 DW_TAG_template_type_parameter + .long 63 # DW_AT_type + .long .Linfo_string18 # DW_AT_name + .byte 40 # Abbrev [40] 0x1697:0x5 DW_TAG_formal_parameter + .long 515 # DW_AT_type + .byte 40 # Abbrev [40] 0x169c:0x5 DW_TAG_formal_parameter + .long 63 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 39 # Abbrev [39] 0x16a2:0x1f DW_TAG_subprogram + .long .Linfo_string122 # DW_AT_linkage_name + .long .Linfo_string123 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 121 # DW_AT_decl_line + # DW_AT_declaration + # DW_AT_external + .byte 14 # Abbrev [14] 0x16ad:0x9 DW_TAG_template_type_parameter + .long 63 # DW_AT_type + .long .Linfo_string18 # DW_AT_name + .byte 40 # Abbrev [40] 0x16b6:0x5 DW_TAG_formal_parameter + .long 6930 # DW_AT_type + .byte 40 # Abbrev [40] 0x16bb:0x5 DW_TAG_formal_parameter + .long 63 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 39 # Abbrev [39] 0x16c1:0x1f DW_TAG_subprogram + .long .Linfo_string124 # DW_AT_linkage_name + .long .Linfo_string125 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 128 # DW_AT_decl_line + # DW_AT_declaration + # DW_AT_external + .byte 14 # Abbrev [14] 0x16cc:0x9 DW_TAG_template_type_parameter + .long 63 # DW_AT_type + .long .Linfo_string18 # DW_AT_name + .byte 40 # Abbrev [40] 0x16d5:0x5 DW_TAG_formal_parameter + .long 6930 # DW_AT_type + .byte 40 # Abbrev [40] 0x16da:0x5 DW_TAG_formal_parameter + .long 63 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 41 # Abbrev [41] 0x16e0:0x1e DW_TAG_subprogram + .long .Linfo_string126 # DW_AT_linkage_name + .long .Linfo_string127 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 131 # DW_AT_decl_line + .long 63 # DW_AT_type + # DW_AT_declaration + # DW_AT_external + .byte 14 # Abbrev [14] 0x16ef:0x9 DW_TAG_template_type_parameter + .long 63 # DW_AT_type + .long .Linfo_string18 # DW_AT_name + .byte 18 # Abbrev [18] 0x16f8:0x5 DW_TAG_formal_parameter + .long 5887 # DW_AT_type + # DW_AT_artificial + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 42 # Abbrev [42] 0x16ff:0x5 DW_TAG_pointer_type + .long 5142 # DW_AT_type + .byte 43 # Abbrev [43] 0x1704:0x37 DW_TAG_subprogram + .quad .Lfunc_begin66 # DW_AT_low_pc + .long .Lfunc_end66-.Lfunc_begin66 # DW_AT_high_pc + .byte 1 # DW_AT_frame_base + .byte 86 + .long 5915 # DW_AT_object_pointer + .long 5151 # DW_AT_specification + .byte 44 # Abbrev [44] 0x171b:0xc DW_TAG_formal_parameter + .byte 2 # DW_AT_location + .byte 145 + .byte 120 + .long .Linfo_string449 # DW_AT_name + .long 11774 # DW_AT_type + # DW_AT_artificial + .byte 22 # Abbrev [22] 0x1727:0xa DW_TAG_formal_parameter + .byte 2 # DW_AT_location + .byte 145 + .byte 116 + .byte 1 # DW_AT_decl_file + .byte 59 # DW_AT_decl_line + .long 63 # DW_AT_type + .byte 14 # Abbrev [14] 0x1731:0x9 DW_TAG_template_type_parameter + .long 63 # DW_AT_type + .long .Linfo_string18 # DW_AT_name + .byte 0 # End Of Children Mark + .byte 43 # Abbrev [43] 0x173b:0x37 DW_TAG_subprogram + .quad .Lfunc_begin67 # DW_AT_low_pc + .long .Lfunc_end67-.Lfunc_begin67 # DW_AT_high_pc + .byte 1 # DW_AT_frame_base + .byte 86 + .long 5970 # DW_AT_object_pointer + .long 5182 # DW_AT_specification + .byte 44 # Abbrev [44] 0x1752:0xc DW_TAG_formal_parameter + .byte 2 # DW_AT_location + .byte 145 + .byte 120 + .long .Linfo_string449 # DW_AT_name + .long 11774 # DW_AT_type + # DW_AT_artificial + .byte 22 # Abbrev [22] 0x175e:0xa DW_TAG_formal_parameter + .byte 2 # DW_AT_location + .byte 145 + .byte 116 + .byte 1 # DW_AT_decl_file + .byte 62 # DW_AT_decl_line + .long 63 # DW_AT_type + .byte 14 # Abbrev [14] 0x1768:0x9 DW_TAG_template_type_parameter + .long 63 # DW_AT_type + .long .Linfo_string18 # DW_AT_name + .byte 0 # End Of Children Mark + .byte 43 # Abbrev [43] 0x1772:0x37 DW_TAG_subprogram + .quad .Lfunc_begin68 # DW_AT_low_pc + .long .Lfunc_end68-.Lfunc_begin68 # DW_AT_high_pc + .byte 1 # DW_AT_frame_base + .byte 86 + .long 6025 # DW_AT_object_pointer + .long 5213 # DW_AT_specification + .byte 44 # Abbrev [44] 0x1789:0xc DW_TAG_formal_parameter + .byte 2 # DW_AT_location + .byte 145 + .byte 120 + .long .Linfo_string449 # DW_AT_name + .long 11774 # DW_AT_type + # DW_AT_artificial + .byte 22 # Abbrev [22] 0x1795:0xa DW_TAG_formal_parameter + .byte 2 # DW_AT_location + .byte 145 + .byte 116 + .byte 1 # DW_AT_decl_file + .byte 65 # DW_AT_decl_line + .long 63 # DW_AT_type + .byte 14 # Abbrev [14] 0x179f:0x9 DW_TAG_template_type_parameter + .long 63 # DW_AT_type + .long .Linfo_string18 # DW_AT_name + .byte 0 # End Of Children Mark + .byte 42 # Abbrev [42] 0x17a9:0x5 DW_TAG_pointer_type + .long 6062 # DW_AT_type + .byte 13 # Abbrev [13] 0x17ae:0x15 DW_TAG_structure_type + .byte 5 # DW_AT_calling_convention + .long .Linfo_string86 # DW_AT_name + .byte 1 # DW_AT_byte_size + .byte 1 # DW_AT_decl_file + .byte 5 # DW_AT_decl_line + .byte 29 # Abbrev [29] 0x17b7:0xb DW_TAG_GNU_template_parameter_pack + .long .Linfo_string84 # DW_AT_name + .byte 30 # Abbrev [30] 0x17bc:0x5 DW_TAG_template_type_parameter + .long 6083 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 3 # Abbrev [3] 0x17c3:0x7 DW_TAG_base_type + .long .Linfo_string85 # DW_AT_name + .byte 4 # DW_AT_encoding + .byte 4 # DW_AT_byte_size + .byte 43 # Abbrev [43] 0x17ca:0x2d DW_TAG_subprogram + .quad .Lfunc_begin69 # DW_AT_low_pc + .long .Lfunc_end69-.Lfunc_begin69 # DW_AT_high_pc + .byte 1 # DW_AT_frame_base + .byte 86 + .long 6113 # DW_AT_object_pointer + .long 5244 # DW_AT_specification + .byte 44 # Abbrev [44] 0x17e1:0xc DW_TAG_formal_parameter + .byte 2 # DW_AT_location + .byte 145 + .byte 120 + .long .Linfo_string449 # DW_AT_name + .long 11774 # DW_AT_type + # DW_AT_artificial + .byte 14 # Abbrev [14] 0x17ed:0x9 DW_TAG_template_type_parameter + .long 63 # DW_AT_type + .long .Linfo_string18 # DW_AT_name + .byte 0 # End Of Children Mark + .byte 43 # Abbrev [43] 0x17f7:0x37 DW_TAG_subprogram + .quad .Lfunc_begin70 # DW_AT_low_pc + .long .Lfunc_end70-.Lfunc_begin70 # DW_AT_high_pc + .byte 1 # DW_AT_frame_base + .byte 86 + .long 6158 # DW_AT_object_pointer + .long 5274 # DW_AT_specification + .byte 44 # Abbrev [44] 0x180e:0xc DW_TAG_formal_parameter + .byte 2 # DW_AT_location + .byte 145 + .byte 120 + .long .Linfo_string449 # DW_AT_name + .long 11774 # DW_AT_type + # DW_AT_artificial + .byte 22 # Abbrev [22] 0x181a:0xa DW_TAG_formal_parameter + .byte 2 # DW_AT_location + .byte 145 + .byte 116 + .byte 1 # DW_AT_decl_file + .byte 72 # DW_AT_decl_line + .long 63 # DW_AT_type + .byte 14 # Abbrev [14] 0x1824:0x9 DW_TAG_template_type_parameter + .long 63 # DW_AT_type + .long .Linfo_string18 # DW_AT_name + .byte 0 # End Of Children Mark + .byte 43 # Abbrev [43] 0x182e:0x37 DW_TAG_subprogram + .quad .Lfunc_begin71 # DW_AT_low_pc + .long .Lfunc_end71-.Lfunc_begin71 # DW_AT_high_pc + .byte 1 # DW_AT_frame_base + .byte 86 + .long 6213 # DW_AT_object_pointer + .long 5305 # DW_AT_specification + .byte 44 # Abbrev [44] 0x1845:0xc DW_TAG_formal_parameter + .byte 2 # DW_AT_location + .byte 145 + .byte 120 + .long .Linfo_string449 # DW_AT_name + .long 11774 # DW_AT_type + # DW_AT_artificial + .byte 22 # Abbrev [22] 0x1851:0xa DW_TAG_formal_parameter + .byte 2 # DW_AT_location + .byte 145 + .byte 116 + .byte 1 # DW_AT_decl_file + .byte 75 # DW_AT_decl_line + .long 63 # DW_AT_type + .byte 14 # Abbrev [14] 0x185b:0x9 DW_TAG_template_type_parameter + .long 63 # DW_AT_type + .long .Linfo_string18 # DW_AT_name + .byte 0 # End Of Children Mark + .byte 43 # Abbrev [43] 0x1865:0x37 DW_TAG_subprogram + .quad .Lfunc_begin72 # DW_AT_low_pc + .long .Lfunc_end72-.Lfunc_begin72 # DW_AT_high_pc + .byte 1 # DW_AT_frame_base + .byte 86 + .long 6268 # DW_AT_object_pointer + .long 5336 # DW_AT_specification + .byte 44 # Abbrev [44] 0x187c:0xc DW_TAG_formal_parameter + .byte 2 # DW_AT_location + .byte 145 + .byte 120 + .long .Linfo_string449 # DW_AT_name + .long 11774 # DW_AT_type + # DW_AT_artificial + .byte 22 # Abbrev [22] 0x1888:0xa DW_TAG_formal_parameter + .byte 2 # DW_AT_location + .byte 145 + .byte 116 + .byte 1 # DW_AT_decl_file + .byte 78 # DW_AT_decl_line + .long 63 # DW_AT_type + .byte 14 # Abbrev [14] 0x1892:0x9 DW_TAG_template_type_parameter + .long 63 # DW_AT_type + .long .Linfo_string18 # DW_AT_name + .byte 0 # End Of Children Mark + .byte 43 # Abbrev [43] 0x189c:0x37 DW_TAG_subprogram + .quad .Lfunc_begin73 # DW_AT_low_pc + .long .Lfunc_end73-.Lfunc_begin73 # DW_AT_high_pc + .byte 1 # DW_AT_frame_base + .byte 86 + .long 6323 # DW_AT_object_pointer + .long 5367 # DW_AT_specification + .byte 44 # Abbrev [44] 0x18b3:0xc DW_TAG_formal_parameter + .byte 2 # DW_AT_location + .byte 145 + .byte 120 + .long .Linfo_string449 # DW_AT_name + .long 11774 # DW_AT_type + # DW_AT_artificial + .byte 22 # Abbrev [22] 0x18bf:0xa DW_TAG_formal_parameter + .byte 2 # DW_AT_location + .byte 145 + .byte 116 + .byte 1 # DW_AT_decl_file + .byte 81 # DW_AT_decl_line + .long 63 # DW_AT_type + .byte 14 # Abbrev [14] 0x18c9:0x9 DW_TAG_template_type_parameter + .long 63 # DW_AT_type + .long .Linfo_string18 # DW_AT_name + .byte 0 # End Of Children Mark + .byte 43 # Abbrev [43] 0x18d3:0x37 DW_TAG_subprogram + .quad .Lfunc_begin74 # DW_AT_low_pc + .long .Lfunc_end74-.Lfunc_begin74 # DW_AT_high_pc + .byte 1 # DW_AT_frame_base + .byte 86 + .long 6378 # DW_AT_object_pointer + .long 5398 # DW_AT_specification + .byte 44 # Abbrev [44] 0x18ea:0xc DW_TAG_formal_parameter + .byte 2 # DW_AT_location + .byte 145 + .byte 120 + .long .Linfo_string449 # DW_AT_name + .long 11774 # DW_AT_type + # DW_AT_artificial + .byte 22 # Abbrev [22] 0x18f6:0xa DW_TAG_formal_parameter + .byte 2 # DW_AT_location + .byte 145 + .byte 116 + .byte 1 # DW_AT_decl_file + .byte 84 # DW_AT_decl_line + .long 63 # DW_AT_type + .byte 14 # Abbrev [14] 0x1900:0x9 DW_TAG_template_type_parameter + .long 63 # DW_AT_type + .long .Linfo_string18 # DW_AT_name + .byte 0 # End Of Children Mark + .byte 43 # Abbrev [43] 0x190a:0x37 DW_TAG_subprogram + .quad .Lfunc_begin75 # DW_AT_low_pc + .long .Lfunc_end75-.Lfunc_begin75 # DW_AT_high_pc + .byte 1 # DW_AT_frame_base + .byte 86 + .long 6433 # DW_AT_object_pointer + .long 5429 # DW_AT_specification + .byte 44 # Abbrev [44] 0x1921:0xc DW_TAG_formal_parameter + .byte 2 # DW_AT_location + .byte 145 + .byte 120 + .long .Linfo_string449 # DW_AT_name + .long 11774 # DW_AT_type + # DW_AT_artificial + .byte 22 # Abbrev [22] 0x192d:0xa DW_TAG_formal_parameter + .byte 2 # DW_AT_location + .byte 145 + .byte 116 + .byte 1 # DW_AT_decl_file + .byte 87 # DW_AT_decl_line + .long 63 # DW_AT_type + .byte 14 # Abbrev [14] 0x1937:0x9 DW_TAG_template_type_parameter + .long 63 # DW_AT_type + .long .Linfo_string18 # DW_AT_name + .byte 0 # End Of Children Mark + .byte 43 # Abbrev [43] 0x1941:0x37 DW_TAG_subprogram + .quad .Lfunc_begin76 # DW_AT_low_pc + .long .Lfunc_end76-.Lfunc_begin76 # DW_AT_high_pc + .byte 1 # DW_AT_frame_base + .byte 86 + .long 6488 # DW_AT_object_pointer + .long 5460 # DW_AT_specification + .byte 44 # Abbrev [44] 0x1958:0xc DW_TAG_formal_parameter + .byte 2 # DW_AT_location + .byte 145 + .byte 120 + .long .Linfo_string449 # DW_AT_name + .long 11774 # DW_AT_type + # DW_AT_artificial + .byte 22 # Abbrev [22] 0x1964:0xa DW_TAG_formal_parameter + .byte 2 # DW_AT_location + .byte 145 + .byte 116 + .byte 1 # DW_AT_decl_file + .byte 90 # DW_AT_decl_line + .long 63 # DW_AT_type + .byte 14 # Abbrev [14] 0x196e:0x9 DW_TAG_template_type_parameter + .long 63 # DW_AT_type + .long .Linfo_string18 # DW_AT_name + .byte 0 # End Of Children Mark + .byte 43 # Abbrev [43] 0x1978:0x2d DW_TAG_subprogram + .quad .Lfunc_begin77 # DW_AT_low_pc + .long .Lfunc_end77-.Lfunc_begin77 # DW_AT_high_pc + .byte 1 # DW_AT_frame_base + .byte 86 + .long 6543 # DW_AT_object_pointer + .long 5491 # DW_AT_specification + .byte 44 # Abbrev [44] 0x198f:0xc DW_TAG_formal_parameter + .byte 2 # DW_AT_location + .byte 145 + .byte 120 + .long .Linfo_string449 # DW_AT_name + .long 11774 # DW_AT_type + # DW_AT_artificial + .byte 14 # Abbrev [14] 0x199b:0x9 DW_TAG_template_type_parameter + .long 63 # DW_AT_type + .long .Linfo_string18 # DW_AT_name + .byte 0 # End Of Children Mark + .byte 43 # Abbrev [43] 0x19a5:0x2d DW_TAG_subprogram + .quad .Lfunc_begin78 # DW_AT_low_pc + .long .Lfunc_end78-.Lfunc_begin78 # DW_AT_high_pc + .byte 1 # DW_AT_frame_base + .byte 86 + .long 6588 # DW_AT_object_pointer + .long 5517 # DW_AT_specification + .byte 44 # Abbrev [44] 0x19bc:0xc DW_TAG_formal_parameter + .byte 2 # DW_AT_location + .byte 145 + .byte 120 + .long .Linfo_string449 # DW_AT_name + .long 11774 # DW_AT_type + # DW_AT_artificial + .byte 14 # Abbrev [14] 0x19c8:0x9 DW_TAG_template_type_parameter + .long 63 # DW_AT_type + .long .Linfo_string18 # DW_AT_name + .byte 0 # End Of Children Mark + .byte 43 # Abbrev [43] 0x19d2:0x37 DW_TAG_subprogram + .quad .Lfunc_begin79 # DW_AT_low_pc + .long .Lfunc_end79-.Lfunc_begin79 # DW_AT_high_pc + .byte 1 # DW_AT_frame_base + .byte 86 + .long 6633 # DW_AT_object_pointer + .long 5543 # DW_AT_specification + .byte 44 # Abbrev [44] 0x19e9:0xc DW_TAG_formal_parameter + .byte 2 # DW_AT_location + .byte 145 + .byte 120 + .long .Linfo_string449 # DW_AT_name + .long 11774 # DW_AT_type + # DW_AT_artificial + .byte 22 # Abbrev [22] 0x19f5:0xa DW_TAG_formal_parameter + .byte 2 # DW_AT_location + .byte 145 + .byte 116 + .byte 1 # DW_AT_decl_file + .byte 99 # DW_AT_decl_line + .long 63 # DW_AT_type + .byte 14 # Abbrev [14] 0x19ff:0x9 DW_TAG_template_type_parameter + .long 63 # DW_AT_type + .long .Linfo_string18 # DW_AT_name + .byte 0 # End Of Children Mark + .byte 43 # Abbrev [43] 0x1a09:0x37 DW_TAG_subprogram + .quad .Lfunc_begin80 # DW_AT_low_pc + .long .Lfunc_end80-.Lfunc_begin80 # DW_AT_high_pc + .byte 1 # DW_AT_frame_base + .byte 86 + .long 6688 # DW_AT_object_pointer + .long 5574 # DW_AT_specification + .byte 44 # Abbrev [44] 0x1a20:0xc DW_TAG_formal_parameter + .byte 2 # DW_AT_location + .byte 145 + .byte 120 + .long .Linfo_string449 # DW_AT_name + .long 11774 # DW_AT_type + # DW_AT_artificial + .byte 22 # Abbrev [22] 0x1a2c:0xa DW_TAG_formal_parameter + .byte 2 # DW_AT_location + .byte 145 + .byte 116 + .byte 1 # DW_AT_decl_file + .byte 102 # DW_AT_decl_line + .long 63 # DW_AT_type + .byte 14 # Abbrev [14] 0x1a36:0x9 DW_TAG_template_type_parameter + .long 63 # DW_AT_type + .long .Linfo_string18 # DW_AT_name + .byte 0 # End Of Children Mark + .byte 43 # Abbrev [43] 0x1a40:0x37 DW_TAG_subprogram + .quad .Lfunc_begin81 # DW_AT_low_pc + .long .Lfunc_end81-.Lfunc_begin81 # DW_AT_high_pc + .byte 1 # DW_AT_frame_base + .byte 86 + .long 6743 # DW_AT_object_pointer + .long 5605 # DW_AT_specification + .byte 44 # Abbrev [44] 0x1a57:0xc DW_TAG_formal_parameter + .byte 2 # DW_AT_location + .byte 145 + .byte 120 + .long .Linfo_string449 # DW_AT_name + .long 11774 # DW_AT_type + # DW_AT_artificial + .byte 22 # Abbrev [22] 0x1a63:0xa DW_TAG_formal_parameter + .byte 2 # DW_AT_location + .byte 145 + .byte 116 + .byte 1 # DW_AT_decl_file + .byte 105 # DW_AT_decl_line + .long 63 # DW_AT_type + .byte 14 # Abbrev [14] 0x1a6d:0x9 DW_TAG_template_type_parameter + .long 63 # DW_AT_type + .long .Linfo_string18 # DW_AT_name + .byte 0 # End Of Children Mark + .byte 43 # Abbrev [43] 0x1a77:0x2d DW_TAG_subprogram + .quad .Lfunc_begin82 # DW_AT_low_pc + .long .Lfunc_end82-.Lfunc_begin82 # DW_AT_high_pc + .byte 1 # DW_AT_frame_base + .byte 86 + .long 6798 # DW_AT_object_pointer + .long 5636 # DW_AT_specification + .byte 44 # Abbrev [44] 0x1a8e:0xc DW_TAG_formal_parameter + .byte 2 # DW_AT_location + .byte 145 + .byte 120 + .long .Linfo_string449 # DW_AT_name + .long 11774 # DW_AT_type + # DW_AT_artificial + .byte 14 # Abbrev [14] 0x1a9a:0x9 DW_TAG_template_type_parameter + .long 63 # DW_AT_type + .long .Linfo_string18 # DW_AT_name + .byte 0 # End Of Children Mark + .byte 43 # Abbrev [43] 0x1aa4:0x37 DW_TAG_subprogram + .quad .Lfunc_begin83 # DW_AT_low_pc + .long .Lfunc_end83-.Lfunc_begin83 # DW_AT_high_pc + .byte 1 # DW_AT_frame_base + .byte 86 + .long 6843 # DW_AT_object_pointer + .long 5662 # DW_AT_specification + .byte 44 # Abbrev [44] 0x1abb:0xc DW_TAG_formal_parameter + .byte 2 # DW_AT_location + .byte 145 + .byte 120 + .long .Linfo_string449 # DW_AT_name + .long 11774 # DW_AT_type + # DW_AT_artificial + .byte 22 # Abbrev [22] 0x1ac7:0xa DW_TAG_formal_parameter + .byte 2 # DW_AT_location + .byte 145 + .byte 116 + .byte 1 # DW_AT_decl_file + .byte 111 # DW_AT_decl_line + .long 63 # DW_AT_type + .byte 14 # Abbrev [14] 0x1ad1:0x9 DW_TAG_template_type_parameter + .long 63 # DW_AT_type + .long .Linfo_string18 # DW_AT_name + .byte 0 # End Of Children Mark + .byte 43 # Abbrev [43] 0x1adb:0x37 DW_TAG_subprogram + .quad .Lfunc_begin84 # DW_AT_low_pc + .long .Lfunc_end84-.Lfunc_begin84 # DW_AT_high_pc + .byte 1 # DW_AT_frame_base + .byte 86 + .long 6898 # DW_AT_object_pointer + .long 5693 # DW_AT_specification + .byte 44 # Abbrev [44] 0x1af2:0xc DW_TAG_formal_parameter + .byte 2 # DW_AT_location + .byte 145 + .byte 120 + .long .Linfo_string449 # DW_AT_name + .long 11774 # DW_AT_type + # DW_AT_artificial + .byte 22 # Abbrev [22] 0x1afe:0xa DW_TAG_formal_parameter + .byte 2 # DW_AT_location + .byte 145 + .byte 116 + .byte 1 # DW_AT_decl_file + .byte 114 # DW_AT_decl_line + .long 63 # DW_AT_type + .byte 14 # Abbrev [14] 0x1b08:0x9 DW_TAG_template_type_parameter + .long 63 # DW_AT_type + .long .Linfo_string18 # DW_AT_name + .byte 0 # End Of Children Mark + .byte 45 # Abbrev [45] 0x1b12:0x1 DW_TAG_pointer_type + .byte 46 # Abbrev [46] 0x1b13:0x1d DW_TAG_subprogram + .quad .Lfunc_begin85 # DW_AT_low_pc + .long .Lfunc_end85-.Lfunc_begin85 # DW_AT_high_pc + .byte 1 # DW_AT_frame_base + .byte 86 + .long 5724 # DW_AT_specification + .byte 14 # Abbrev [14] 0x1b26:0x9 DW_TAG_template_type_parameter + .long 63 # DW_AT_type + .long .Linfo_string18 # DW_AT_name + .byte 0 # End Of Children Mark + .byte 46 # Abbrev [46] 0x1b30:0x1d DW_TAG_subprogram + .quad .Lfunc_begin86 # DW_AT_low_pc + .long .Lfunc_end86-.Lfunc_begin86 # DW_AT_high_pc + .byte 1 # DW_AT_frame_base + .byte 86 + .long 5759 # DW_AT_specification + .byte 14 # Abbrev [14] 0x1b43:0x9 DW_TAG_template_type_parameter + .long 63 # DW_AT_type + .long .Linfo_string18 # DW_AT_name + .byte 0 # End Of Children Mark + .byte 46 # Abbrev [46] 0x1b4d:0x31 DW_TAG_subprogram + .quad .Lfunc_begin87 # DW_AT_low_pc + .long .Lfunc_end87-.Lfunc_begin87 # DW_AT_high_pc + .byte 1 # DW_AT_frame_base + .byte 86 + .long 5794 # DW_AT_specification + .byte 22 # Abbrev [22] 0x1b60:0xa DW_TAG_formal_parameter + .byte 2 # DW_AT_location + .byte 145 + .byte 120 + .byte 1 # DW_AT_decl_file + .byte 121 # DW_AT_decl_line + .long 6930 # DW_AT_type + .byte 22 # Abbrev [22] 0x1b6a:0xa DW_TAG_formal_parameter + .byte 2 # DW_AT_location + .byte 145 + .byte 116 + .byte 1 # DW_AT_decl_file + .byte 121 # DW_AT_decl_line + .long 63 # DW_AT_type + .byte 14 # Abbrev [14] 0x1b74:0x9 DW_TAG_template_type_parameter + .long 63 # DW_AT_type + .long .Linfo_string18 # DW_AT_name + .byte 0 # End Of Children Mark + .byte 46 # Abbrev [46] 0x1b7e:0x31 DW_TAG_subprogram + .quad .Lfunc_begin88 # DW_AT_low_pc + .long .Lfunc_end88-.Lfunc_begin88 # DW_AT_high_pc + .byte 1 # DW_AT_frame_base + .byte 86 + .long 5825 # DW_AT_specification + .byte 22 # Abbrev [22] 0x1b91:0xa DW_TAG_formal_parameter + .byte 2 # DW_AT_location + .byte 145 + .byte 120 + .byte 1 # DW_AT_decl_file + .byte 128 # DW_AT_decl_line + .long 6930 # DW_AT_type + .byte 22 # Abbrev [22] 0x1b9b:0xa DW_TAG_formal_parameter + .byte 2 # DW_AT_location + .byte 145 + .byte 116 + .byte 1 # DW_AT_decl_file + .byte 128 # DW_AT_decl_line + .long 63 # DW_AT_type + .byte 14 # Abbrev [14] 0x1ba5:0x9 DW_TAG_template_type_parameter + .long 63 # DW_AT_type + .long .Linfo_string18 # DW_AT_name + .byte 0 # End Of Children Mark + .byte 46 # Abbrev [46] 0x1baf:0x1d DW_TAG_subprogram + .quad .Lfunc_begin89 # DW_AT_low_pc + .long .Lfunc_end89-.Lfunc_begin89 # DW_AT_high_pc + .byte 1 # DW_AT_frame_base + .byte 86 + .long 5856 # DW_AT_specification + .byte 14 # Abbrev [14] 0x1bc2:0x9 DW_TAG_template_type_parameter + .long 63 # DW_AT_type + .long .Linfo_string18 # DW_AT_name + .byte 0 # End Of Children Mark + .byte 35 # Abbrev [35] 0x1bcc:0x41 DW_TAG_subprogram + .quad .Lfunc_begin90 # DW_AT_low_pc + .long .Lfunc_end90-.Lfunc_begin90 # DW_AT_high_pc + .byte 1 # DW_AT_frame_base + .byte 86 + .long .Linfo_string282 # DW_AT_linkage_name + .long .Linfo_string283 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 26 # DW_AT_decl_line + .byte 24 # Abbrev [24] 0x1be5:0xe DW_TAG_variable + .byte 2 # DW_AT_location + .byte 145 + .byte 120 + .long .Linfo_string361 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 27 # DW_AT_decl_line + .long 11779 # DW_AT_type + .byte 24 # Abbrev [24] 0x1bf3:0xe DW_TAG_variable + .byte 2 # DW_AT_location + .byte 145 + .byte 112 + .long .Linfo_string358 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 28 # DW_AT_decl_line + .long 11800 # DW_AT_type + .byte 29 # Abbrev [29] 0x1c01:0xb DW_TAG_GNU_template_parameter_pack + .long .Linfo_string84 # DW_AT_name + .byte 30 # Abbrev [30] 0x1c06:0x5 DW_TAG_template_type_parameter + .long 1248 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 10 # Abbrev [10] 0x1c0d:0x41 DW_TAG_subprogram + .quad .Lfunc_begin91 # DW_AT_low_pc + .long .Lfunc_end91-.Lfunc_begin91 # DW_AT_high_pc + .byte 1 # DW_AT_frame_base + .byte 86 + .long .Linfo_string284 # DW_AT_linkage_name + .long .Linfo_string285 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 26 # DW_AT_decl_line + # DW_AT_external + .byte 24 # Abbrev [24] 0x1c26:0xe DW_TAG_variable + .byte 2 # DW_AT_location + .byte 145 + .byte 120 + .long .Linfo_string361 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 27 # DW_AT_decl_line + .long 11822 # DW_AT_type + .byte 24 # Abbrev [24] 0x1c34:0xe DW_TAG_variable + .byte 2 # DW_AT_location + .byte 145 + .byte 112 + .long .Linfo_string358 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 28 # DW_AT_decl_line + .long 11843 # DW_AT_type + .byte 29 # Abbrev [29] 0x1c42:0xb DW_TAG_GNU_template_parameter_pack + .long .Linfo_string84 # DW_AT_name + .byte 30 # Abbrev [30] 0x1c47:0x5 DW_TAG_template_type_parameter + .long 9584 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 10 # Abbrev [10] 0x1c4e:0x41 DW_TAG_subprogram + .quad .Lfunc_begin92 # DW_AT_low_pc + .long .Lfunc_end92-.Lfunc_begin92 # DW_AT_high_pc + .byte 1 # DW_AT_frame_base + .byte 86 + .long .Linfo_string286 # DW_AT_linkage_name + .long .Linfo_string287 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 26 # DW_AT_decl_line + # DW_AT_external + .byte 24 # Abbrev [24] 0x1c67:0xe DW_TAG_variable + .byte 2 # DW_AT_location + .byte 145 + .byte 120 + .long .Linfo_string361 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 27 # DW_AT_decl_line + .long 11865 # DW_AT_type + .byte 24 # Abbrev [24] 0x1c75:0xe DW_TAG_variable + .byte 2 # DW_AT_location + .byte 145 + .byte 112 + .long .Linfo_string358 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 28 # DW_AT_decl_line + .long 11886 # DW_AT_type + .byte 29 # Abbrev [29] 0x1c83:0xb DW_TAG_GNU_template_parameter_pack + .long .Linfo_string84 # DW_AT_name + .byte 30 # Abbrev [30] 0x1c88:0x5 DW_TAG_template_type_parameter + .long 9601 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 10 # Abbrev [10] 0x1c8f:0x23 DW_TAG_subprogram + .quad .Lfunc_begin93 # DW_AT_low_pc + .long .Lfunc_end93-.Lfunc_begin93 # DW_AT_high_pc + .byte 1 # DW_AT_frame_base + .byte 86 + .long .Linfo_string289 # DW_AT_linkage_name + .long .Linfo_string290 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 135 # DW_AT_decl_line + # DW_AT_external + .byte 11 # Abbrev [11] 0x1ca8:0x9 DW_TAG_GNU_template_template_param + .long .Linfo_string18 # DW_AT_name + .long .Linfo_string288 # DW_AT_GNU_template_name + .byte 0 # End Of Children Mark + .byte 10 # Abbrev [10] 0x1cb2:0x2c DW_TAG_subprogram + .quad .Lfunc_begin94 # DW_AT_low_pc + .long .Lfunc_end94-.Lfunc_begin94 # DW_AT_high_pc + .byte 1 # DW_AT_frame_base + .byte 86 + .long .Linfo_string291 # DW_AT_linkage_name + .long .Linfo_string292 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 136 # DW_AT_decl_line + # DW_AT_external + .byte 11 # Abbrev [11] 0x1ccb:0x9 DW_TAG_GNU_template_template_param + .long .Linfo_string18 # DW_AT_name + .long .Linfo_string288 # DW_AT_GNU_template_name + .byte 14 # Abbrev [14] 0x1cd4:0x9 DW_TAG_template_type_parameter + .long 63 # DW_AT_type + .long .Linfo_string264 # DW_AT_name + .byte 0 # End Of Children Mark + .byte 10 # Abbrev [10] 0x1cde:0x46 DW_TAG_subprogram + .quad .Lfunc_begin96 # DW_AT_low_pc + .long .Lfunc_end96-.Lfunc_begin96 # DW_AT_high_pc + .byte 1 # DW_AT_frame_base + .byte 86 + .long .Linfo_string296 # DW_AT_linkage_name + .long .Linfo_string297 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 26 # DW_AT_decl_line + # DW_AT_external + .byte 24 # Abbrev [24] 0x1cf7:0xe DW_TAG_variable + .byte 2 # DW_AT_location + .byte 145 + .byte 120 + .long .Linfo_string361 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 27 # DW_AT_decl_line + .long 11908 # DW_AT_type + .byte 24 # Abbrev [24] 0x1d05:0xe DW_TAG_variable + .byte 2 # DW_AT_location + .byte 145 + .byte 112 + .long .Linfo_string358 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 28 # DW_AT_decl_line + .long 11934 # DW_AT_type + .byte 29 # Abbrev [29] 0x1d13:0x10 DW_TAG_GNU_template_parameter_pack + .long .Linfo_string84 # DW_AT_name + .byte 30 # Abbrev [30] 0x1d18:0x5 DW_TAG_template_type_parameter + .long 9381 # DW_AT_type + .byte 30 # Abbrev [30] 0x1d1d:0x5 DW_TAG_template_type_parameter + .long 9606 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 10 # Abbrev [10] 0x1d24:0x41 DW_TAG_subprogram + .quad .Lfunc_begin97 # DW_AT_low_pc + .long .Lfunc_end97-.Lfunc_begin97 # DW_AT_high_pc + .byte 1 # DW_AT_frame_base + .byte 86 + .long .Linfo_string299 # DW_AT_linkage_name + .long .Linfo_string300 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 26 # DW_AT_decl_line + # DW_AT_external + .byte 24 # Abbrev [24] 0x1d3d:0xe DW_TAG_variable + .byte 2 # DW_AT_location + .byte 145 + .byte 120 + .long .Linfo_string361 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 27 # DW_AT_decl_line + .long 11961 # DW_AT_type + .byte 24 # Abbrev [24] 0x1d4b:0xe DW_TAG_variable + .byte 2 # DW_AT_location + .byte 145 + .byte 112 + .long .Linfo_string358 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 28 # DW_AT_decl_line + .long 11982 # DW_AT_type + .byte 29 # Abbrev [29] 0x1d59:0xb DW_TAG_GNU_template_parameter_pack + .long .Linfo_string84 # DW_AT_name + .byte 30 # Abbrev [30] 0x1d5e:0x5 DW_TAG_template_type_parameter + .long 9611 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 10 # Abbrev [10] 0x1d65:0x23 DW_TAG_subprogram + .quad .Lfunc_begin98 # DW_AT_low_pc + .long .Lfunc_end98-.Lfunc_begin98 # DW_AT_high_pc + .byte 1 # DW_AT_frame_base + .byte 86 + .long .Linfo_string302 # DW_AT_linkage_name + .long .Linfo_string303 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 135 # DW_AT_decl_line + # DW_AT_external + .byte 11 # Abbrev [11] 0x1d7e:0x9 DW_TAG_GNU_template_template_param + .long .Linfo_string18 # DW_AT_name + .long .Linfo_string301 # DW_AT_GNU_template_name + .byte 0 # End Of Children Mark + .byte 10 # Abbrev [10] 0x1d88:0x41 DW_TAG_subprogram + .quad .Lfunc_begin99 # DW_AT_low_pc + .long .Lfunc_end99-.Lfunc_begin99 # DW_AT_high_pc + .byte 1 # DW_AT_frame_base + .byte 86 + .long .Linfo_string304 # DW_AT_linkage_name + .long .Linfo_string305 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 26 # DW_AT_decl_line + # DW_AT_external + .byte 24 # Abbrev [24] 0x1da1:0xe DW_TAG_variable + .byte 2 # DW_AT_location + .byte 145 + .byte 120 + .long .Linfo_string361 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 27 # DW_AT_decl_line + .long 12004 # DW_AT_type + .byte 24 # Abbrev [24] 0x1daf:0xe DW_TAG_variable + .byte 2 # DW_AT_location + .byte 145 + .byte 112 + .long .Linfo_string358 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 28 # DW_AT_decl_line + .long 12025 # DW_AT_type + .byte 29 # Abbrev [29] 0x1dbd:0xb DW_TAG_GNU_template_parameter_pack + .long .Linfo_string84 # DW_AT_name + .byte 30 # Abbrev [30] 0x1dc2:0x5 DW_TAG_template_type_parameter + .long 63 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 10 # Abbrev [10] 0x1dc9:0x4b DW_TAG_subprogram + .quad .Lfunc_begin100 # DW_AT_low_pc + .long .Lfunc_end100-.Lfunc_begin100 # DW_AT_high_pc + .byte 1 # DW_AT_frame_base + .byte 86 + .long .Linfo_string306 # DW_AT_linkage_name + .long .Linfo_string307 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 26 # DW_AT_decl_line + # DW_AT_external + .byte 24 # Abbrev [24] 0x1de2:0xe DW_TAG_variable + .byte 2 # DW_AT_location + .byte 145 + .byte 120 + .long .Linfo_string361 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 27 # DW_AT_decl_line + .long 12047 # DW_AT_type + .byte 24 # Abbrev [24] 0x1df0:0xe DW_TAG_variable + .byte 2 # DW_AT_location + .byte 145 + .byte 112 + .long .Linfo_string358 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 28 # DW_AT_decl_line + .long 12078 # DW_AT_type + .byte 29 # Abbrev [29] 0x1dfe:0x15 DW_TAG_GNU_template_parameter_pack + .long .Linfo_string84 # DW_AT_name + .byte 30 # Abbrev [30] 0x1e03:0x5 DW_TAG_template_type_parameter + .long 63 # DW_AT_type + .byte 30 # Abbrev [30] 0x1e08:0x5 DW_TAG_template_type_parameter + .long 630 # DW_AT_type + .byte 30 # Abbrev [30] 0x1e0d:0x5 DW_TAG_template_type_parameter + .long 9630 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 10 # Abbrev [10] 0x1e14:0x41 DW_TAG_subprogram + .quad .Lfunc_begin101 # DW_AT_low_pc + .long .Lfunc_end101-.Lfunc_begin101 # DW_AT_high_pc + .byte 1 # DW_AT_frame_base + .byte 86 + .long .Linfo_string308 # DW_AT_linkage_name + .long .Linfo_string309 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 26 # DW_AT_decl_line + # DW_AT_external + .byte 24 # Abbrev [24] 0x1e2d:0xe DW_TAG_variable + .byte 2 # DW_AT_location + .byte 145 + .byte 120 + .long .Linfo_string361 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 27 # DW_AT_decl_line + .long 12110 # DW_AT_type + .byte 24 # Abbrev [24] 0x1e3b:0xe DW_TAG_variable + .byte 2 # DW_AT_location + .byte 145 + .byte 112 + .long .Linfo_string358 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 28 # DW_AT_decl_line + .long 12131 # DW_AT_type + .byte 29 # Abbrev [29] 0x1e49:0xb DW_TAG_GNU_template_parameter_pack + .long .Linfo_string84 # DW_AT_name + .byte 30 # Abbrev [30] 0x1e4e:0x5 DW_TAG_template_type_parameter + .long 9635 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 10 # Abbrev [10] 0x1e55:0x41 DW_TAG_subprogram + .quad .Lfunc_begin102 # DW_AT_low_pc + .long .Lfunc_end102-.Lfunc_begin102 # DW_AT_high_pc + .byte 1 # DW_AT_frame_base + .byte 86 + .long .Linfo_string310 # DW_AT_linkage_name + .long .Linfo_string311 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 26 # DW_AT_decl_line + # DW_AT_external + .byte 24 # Abbrev [24] 0x1e6e:0xe DW_TAG_variable + .byte 2 # DW_AT_location + .byte 145 + .byte 120 + .long .Linfo_string361 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 27 # DW_AT_decl_line + .long 12153 # DW_AT_type + .byte 24 # Abbrev [24] 0x1e7c:0xe DW_TAG_variable + .byte 2 # DW_AT_location + .byte 145 + .byte 112 + .long .Linfo_string358 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 28 # DW_AT_decl_line + .long 12174 # DW_AT_type + .byte 29 # Abbrev [29] 0x1e8a:0xb DW_TAG_GNU_template_parameter_pack + .long .Linfo_string84 # DW_AT_name + .byte 30 # Abbrev [30] 0x1e8f:0x5 DW_TAG_template_type_parameter + .long 9647 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 10 # Abbrev [10] 0x1e96:0x41 DW_TAG_subprogram + .quad .Lfunc_begin103 # DW_AT_low_pc + .long .Lfunc_end103-.Lfunc_begin103 # DW_AT_high_pc + .byte 1 # DW_AT_frame_base + .byte 86 + .long .Linfo_string312 # DW_AT_linkage_name + .long .Linfo_string313 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 26 # DW_AT_decl_line + # DW_AT_external + .byte 24 # Abbrev [24] 0x1eaf:0xe DW_TAG_variable + .byte 2 # DW_AT_location + .byte 145 + .byte 120 + .long .Linfo_string361 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 27 # DW_AT_decl_line + .long 12196 # DW_AT_type + .byte 24 # Abbrev [24] 0x1ebd:0xe DW_TAG_variable + .byte 2 # DW_AT_location + .byte 145 + .byte 112 + .long .Linfo_string358 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 28 # DW_AT_decl_line + .long 12217 # DW_AT_type + .byte 29 # Abbrev [29] 0x1ecb:0xb DW_TAG_GNU_template_parameter_pack + .long .Linfo_string84 # DW_AT_name + .byte 30 # Abbrev [30] 0x1ed0:0x5 DW_TAG_template_type_parameter + .long 9657 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 35 # Abbrev [35] 0x1ed7:0x41 DW_TAG_subprogram + .quad .Lfunc_begin104 # DW_AT_low_pc + .long .Lfunc_end104-.Lfunc_begin104 # DW_AT_high_pc + .byte 1 # DW_AT_frame_base + .byte 86 + .long .Linfo_string315 # DW_AT_linkage_name + .long .Linfo_string316 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 26 # DW_AT_decl_line + .byte 24 # Abbrev [24] 0x1ef0:0xe DW_TAG_variable + .byte 2 # DW_AT_location + .byte 145 + .byte 120 + .long .Linfo_string361 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 27 # DW_AT_decl_line + .long 12239 # DW_AT_type + .byte 24 # Abbrev [24] 0x1efe:0xe DW_TAG_variable + .byte 2 # DW_AT_location + .byte 145 + .byte 112 + .long .Linfo_string358 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 28 # DW_AT_decl_line + .long 12260 # DW_AT_type + .byte 29 # Abbrev [29] 0x1f0c:0xb DW_TAG_GNU_template_parameter_pack + .long .Linfo_string84 # DW_AT_name + .byte 30 # Abbrev [30] 0x1f11:0x5 DW_TAG_template_type_parameter + .long 9663 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 42 # Abbrev [42] 0x1f18:0x5 DW_TAG_pointer_type + .long 286 # DW_AT_type + .byte 47 # Abbrev [47] 0x1f1d:0x2d DW_TAG_subprogram + .quad .Lfunc_begin105 # DW_AT_low_pc + .long .Lfunc_end105-.Lfunc_begin105 # DW_AT_high_pc + .byte 1 # DW_AT_frame_base + .byte 86 + .long 7992 # DW_AT_object_pointer + .long .Linfo_string317 # DW_AT_linkage_name + .long 295 # DW_AT_specification + .byte 44 # Abbrev [44] 0x1f38:0xc DW_TAG_formal_parameter + .byte 2 # DW_AT_location + .byte 145 + .byte 120 + .long .Linfo_string449 # DW_AT_name + .long 12282 # DW_AT_type + # DW_AT_artificial + .byte 17 # Abbrev [17] 0x1f44:0x5 DW_TAG_template_type_parameter + .long .Linfo_string18 # DW_AT_name + .byte 0 # End Of Children Mark + .byte 10 # Abbrev [10] 0x1f4a:0x41 DW_TAG_subprogram + .quad .Lfunc_begin106 # DW_AT_low_pc + .long .Lfunc_end106-.Lfunc_begin106 # DW_AT_high_pc + .byte 1 # DW_AT_frame_base + .byte 86 + .long .Linfo_string318 # DW_AT_linkage_name + .long .Linfo_string319 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 26 # DW_AT_decl_line + # DW_AT_external + .byte 24 # Abbrev [24] 0x1f63:0xe DW_TAG_variable + .byte 2 # DW_AT_location + .byte 145 + .byte 120 + .long .Linfo_string361 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 27 # DW_AT_decl_line + .long 12287 # DW_AT_type + .byte 24 # Abbrev [24] 0x1f71:0xe DW_TAG_variable + .byte 2 # DW_AT_location + .byte 145 + .byte 112 + .long .Linfo_string358 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 28 # DW_AT_decl_line + .long 12308 # DW_AT_type + .byte 29 # Abbrev [29] 0x1f7f:0xb DW_TAG_GNU_template_parameter_pack + .long .Linfo_string84 # DW_AT_name + .byte 30 # Abbrev [30] 0x1f84:0x5 DW_TAG_template_type_parameter + .long 9684 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 10 # Abbrev [10] 0x1f8b:0x41 DW_TAG_subprogram + .quad .Lfunc_begin107 # DW_AT_low_pc + .long .Lfunc_end107-.Lfunc_begin107 # DW_AT_high_pc + .byte 1 # DW_AT_frame_base + .byte 86 + .long .Linfo_string320 # DW_AT_linkage_name + .long .Linfo_string321 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 26 # DW_AT_decl_line + # DW_AT_external + .byte 24 # Abbrev [24] 0x1fa4:0xe DW_TAG_variable + .byte 2 # DW_AT_location + .byte 145 + .byte 120 + .long .Linfo_string361 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 27 # DW_AT_decl_line + .long 12330 # DW_AT_type + .byte 24 # Abbrev [24] 0x1fb2:0xe DW_TAG_variable + .byte 2 # DW_AT_location + .byte 145 + .byte 112 + .long .Linfo_string358 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 28 # DW_AT_decl_line + .long 12351 # DW_AT_type + .byte 29 # Abbrev [29] 0x1fc0:0xb DW_TAG_GNU_template_parameter_pack + .long .Linfo_string84 # DW_AT_name + .byte 30 # Abbrev [30] 0x1fc5:0x5 DW_TAG_template_type_parameter + .long 9710 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 10 # Abbrev [10] 0x1fcc:0x41 DW_TAG_subprogram + .quad .Lfunc_begin108 # DW_AT_low_pc + .long .Lfunc_end108-.Lfunc_begin108 # DW_AT_high_pc + .byte 1 # DW_AT_frame_base + .byte 86 + .long .Linfo_string322 # DW_AT_linkage_name + .long .Linfo_string323 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 26 # DW_AT_decl_line + # DW_AT_external + .byte 24 # Abbrev [24] 0x1fe5:0xe DW_TAG_variable + .byte 2 # DW_AT_location + .byte 145 + .byte 120 + .long .Linfo_string361 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 27 # DW_AT_decl_line + .long 12373 # DW_AT_type + .byte 24 # Abbrev [24] 0x1ff3:0xe DW_TAG_variable + .byte 2 # DW_AT_location + .byte 145 + .byte 112 + .long .Linfo_string358 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 28 # DW_AT_decl_line + .long 12394 # DW_AT_type + .byte 29 # Abbrev [29] 0x2001:0xb DW_TAG_GNU_template_parameter_pack + .long .Linfo_string84 # DW_AT_name + .byte 30 # Abbrev [30] 0x2006:0x5 DW_TAG_template_type_parameter + .long 9736 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 48 # Abbrev [48] 0x200d:0x27 DW_TAG_subprogram + .quad .Lfunc_begin109 # DW_AT_low_pc + .long .Lfunc_end109-.Lfunc_begin109 # DW_AT_high_pc + .byte 1 # DW_AT_frame_base + .byte 86 + .long .Linfo_string324 # DW_AT_linkage_name + .long .Linfo_string325 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 154 # DW_AT_decl_line + .long 9525 # DW_AT_type + # DW_AT_external + .byte 14 # Abbrev [14] 0x202a:0x9 DW_TAG_template_type_parameter + .long 63 # DW_AT_type + .long .Linfo_string18 # DW_AT_name + .byte 0 # End Of Children Mark + .byte 10 # Abbrev [10] 0x2034:0x41 DW_TAG_subprogram + .quad .Lfunc_begin110 # DW_AT_low_pc + .long .Lfunc_end110-.Lfunc_begin110 # DW_AT_high_pc + .byte 1 # DW_AT_frame_base + .byte 86 + .long .Linfo_string326 # DW_AT_linkage_name + .long .Linfo_string327 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 26 # DW_AT_decl_line + # DW_AT_external + .byte 24 # Abbrev [24] 0x204d:0xe DW_TAG_variable + .byte 2 # DW_AT_location + .byte 145 + .byte 120 + .long .Linfo_string361 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 27 # DW_AT_decl_line + .long 12416 # DW_AT_type + .byte 24 # Abbrev [24] 0x205b:0xe DW_TAG_variable + .byte 2 # DW_AT_location + .byte 145 + .byte 112 + .long .Linfo_string358 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 28 # DW_AT_decl_line + .long 12437 # DW_AT_type + .byte 29 # Abbrev [29] 0x2069:0xb DW_TAG_GNU_template_parameter_pack + .long .Linfo_string84 # DW_AT_name + .byte 30 # Abbrev [30] 0x206e:0x5 DW_TAG_template_type_parameter + .long 9762 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 10 # Abbrev [10] 0x2075:0x41 DW_TAG_subprogram + .quad .Lfunc_begin111 # DW_AT_low_pc + .long .Lfunc_end111-.Lfunc_begin111 # DW_AT_high_pc + .byte 1 # DW_AT_frame_base + .byte 86 + .long .Linfo_string328 # DW_AT_linkage_name + .long .Linfo_string329 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 26 # DW_AT_decl_line + # DW_AT_external + .byte 24 # Abbrev [24] 0x208e:0xe DW_TAG_variable + .byte 2 # DW_AT_location + .byte 145 + .byte 120 + .long .Linfo_string361 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 27 # DW_AT_decl_line + .long 12459 # DW_AT_type + .byte 24 # Abbrev [24] 0x209c:0xe DW_TAG_variable + .byte 2 # DW_AT_location + .byte 145 + .byte 112 + .long .Linfo_string358 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 28 # DW_AT_decl_line + .long 12480 # DW_AT_type + .byte 29 # Abbrev [29] 0x20aa:0xb DW_TAG_GNU_template_parameter_pack + .long .Linfo_string84 # DW_AT_name + .byte 30 # Abbrev [30] 0x20af:0x5 DW_TAG_template_type_parameter + .long 9767 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 10 # Abbrev [10] 0x20b6:0x41 DW_TAG_subprogram + .quad .Lfunc_begin112 # DW_AT_low_pc + .long .Lfunc_end112-.Lfunc_begin112 # DW_AT_high_pc + .byte 1 # DW_AT_frame_base + .byte 86 + .long .Linfo_string330 # DW_AT_linkage_name + .long .Linfo_string331 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 26 # DW_AT_decl_line + # DW_AT_external + .byte 24 # Abbrev [24] 0x20cf:0xe DW_TAG_variable + .byte 2 # DW_AT_location + .byte 145 + .byte 120 + .long .Linfo_string361 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 27 # DW_AT_decl_line + .long 12502 # DW_AT_type + .byte 24 # Abbrev [24] 0x20dd:0xe DW_TAG_variable + .byte 2 # DW_AT_location + .byte 145 + .byte 112 + .long .Linfo_string358 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 28 # DW_AT_decl_line + .long 12523 # DW_AT_type + .byte 29 # Abbrev [29] 0x20eb:0xb DW_TAG_GNU_template_parameter_pack + .long .Linfo_string84 # DW_AT_name + .byte 30 # Abbrev [30] 0x20f0:0x5 DW_TAG_template_type_parameter + .long 9789 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 10 # Abbrev [10] 0x20f7:0x41 DW_TAG_subprogram + .quad .Lfunc_begin113 # DW_AT_low_pc + .long .Lfunc_end113-.Lfunc_begin113 # DW_AT_high_pc + .byte 1 # DW_AT_frame_base + .byte 86 + .long .Linfo_string332 # DW_AT_linkage_name + .long .Linfo_string333 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 26 # DW_AT_decl_line + # DW_AT_external + .byte 24 # Abbrev [24] 0x2110:0xe DW_TAG_variable + .byte 2 # DW_AT_location + .byte 145 + .byte 120 + .long .Linfo_string361 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 27 # DW_AT_decl_line + .long 12545 # DW_AT_type + .byte 24 # Abbrev [24] 0x211e:0xe DW_TAG_variable + .byte 2 # DW_AT_location + .byte 145 + .byte 112 + .long .Linfo_string358 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 28 # DW_AT_decl_line + .long 12566 # DW_AT_type + .byte 29 # Abbrev [29] 0x212c:0xb DW_TAG_GNU_template_parameter_pack + .long .Linfo_string84 # DW_AT_name + .byte 30 # Abbrev [30] 0x2131:0x5 DW_TAG_template_type_parameter + .long 9795 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 10 # Abbrev [10] 0x2138:0x41 DW_TAG_subprogram + .quad .Lfunc_begin114 # DW_AT_low_pc + .long .Lfunc_end114-.Lfunc_begin114 # DW_AT_high_pc + .byte 1 # DW_AT_frame_base + .byte 86 + .long .Linfo_string334 # DW_AT_linkage_name + .long .Linfo_string335 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 26 # DW_AT_decl_line + # DW_AT_external + .byte 24 # Abbrev [24] 0x2151:0xe DW_TAG_variable + .byte 2 # DW_AT_location + .byte 145 + .byte 120 + .long .Linfo_string361 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 27 # DW_AT_decl_line + .long 12588 # DW_AT_type + .byte 24 # Abbrev [24] 0x215f:0xe DW_TAG_variable + .byte 2 # DW_AT_location + .byte 145 + .byte 112 + .long .Linfo_string358 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 28 # DW_AT_decl_line + .long 12609 # DW_AT_type + .byte 29 # Abbrev [29] 0x216d:0xb DW_TAG_GNU_template_parameter_pack + .long .Linfo_string84 # DW_AT_name + .byte 30 # Abbrev [30] 0x2172:0x5 DW_TAG_template_type_parameter + .long 9801 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 10 # Abbrev [10] 0x2179:0x41 DW_TAG_subprogram + .quad .Lfunc_begin115 # DW_AT_low_pc + .long .Lfunc_end115-.Lfunc_begin115 # DW_AT_high_pc + .byte 1 # DW_AT_frame_base + .byte 86 + .long .Linfo_string336 # DW_AT_linkage_name + .long .Linfo_string337 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 26 # DW_AT_decl_line + # DW_AT_external + .byte 24 # Abbrev [24] 0x2192:0xe DW_TAG_variable + .byte 2 # DW_AT_location + .byte 145 + .byte 120 + .long .Linfo_string361 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 27 # DW_AT_decl_line + .long 12631 # DW_AT_type + .byte 24 # Abbrev [24] 0x21a0:0xe DW_TAG_variable + .byte 2 # DW_AT_location + .byte 145 + .byte 112 + .long .Linfo_string358 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 28 # DW_AT_decl_line + .long 12652 # DW_AT_type + .byte 29 # Abbrev [29] 0x21ae:0xb DW_TAG_GNU_template_parameter_pack + .long .Linfo_string84 # DW_AT_name + .byte 30 # Abbrev [30] 0x21b3:0x5 DW_TAG_template_type_parameter + .long 9811 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 10 # Abbrev [10] 0x21ba:0x41 DW_TAG_subprogram + .quad .Lfunc_begin116 # DW_AT_low_pc + .long .Lfunc_end116-.Lfunc_begin116 # DW_AT_high_pc + .byte 1 # DW_AT_frame_base + .byte 86 + .long .Linfo_string338 # DW_AT_linkage_name + .long .Linfo_string339 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 26 # DW_AT_decl_line + # DW_AT_external + .byte 24 # Abbrev [24] 0x21d3:0xe DW_TAG_variable + .byte 2 # DW_AT_location + .byte 145 + .byte 120 + .long .Linfo_string361 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 27 # DW_AT_decl_line + .long 12674 # DW_AT_type + .byte 24 # Abbrev [24] 0x21e1:0xe DW_TAG_variable + .byte 2 # DW_AT_location + .byte 145 + .byte 112 + .long .Linfo_string358 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 28 # DW_AT_decl_line + .long 12695 # DW_AT_type + .byte 29 # Abbrev [29] 0x21ef:0xb DW_TAG_GNU_template_parameter_pack + .long .Linfo_string84 # DW_AT_name + .byte 30 # Abbrev [30] 0x21f4:0x5 DW_TAG_template_type_parameter + .long 9828 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 10 # Abbrev [10] 0x21fb:0x41 DW_TAG_subprogram + .quad .Lfunc_begin117 # DW_AT_low_pc + .long .Lfunc_end117-.Lfunc_begin117 # DW_AT_high_pc + .byte 1 # DW_AT_frame_base + .byte 86 + .long .Linfo_string340 # DW_AT_linkage_name + .long .Linfo_string341 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 26 # DW_AT_decl_line + # DW_AT_external + .byte 24 # Abbrev [24] 0x2214:0xe DW_TAG_variable + .byte 2 # DW_AT_location + .byte 145 + .byte 120 + .long .Linfo_string361 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 27 # DW_AT_decl_line + .long 12717 # DW_AT_type + .byte 24 # Abbrev [24] 0x2222:0xe DW_TAG_variable + .byte 2 # DW_AT_location + .byte 145 + .byte 112 + .long .Linfo_string358 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 28 # DW_AT_decl_line + .long 12738 # DW_AT_type + .byte 29 # Abbrev [29] 0x2230:0xb DW_TAG_GNU_template_parameter_pack + .long .Linfo_string84 # DW_AT_name + .byte 30 # Abbrev [30] 0x2235:0x5 DW_TAG_template_type_parameter + .long 9833 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 10 # Abbrev [10] 0x223c:0x41 DW_TAG_subprogram + .quad .Lfunc_begin118 # DW_AT_low_pc + .long .Lfunc_end118-.Lfunc_begin118 # DW_AT_high_pc + .byte 1 # DW_AT_frame_base + .byte 86 + .long .Linfo_string342 # DW_AT_linkage_name + .long .Linfo_string343 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 26 # DW_AT_decl_line + # DW_AT_external + .byte 24 # Abbrev [24] 0x2255:0xe DW_TAG_variable + .byte 2 # DW_AT_location + .byte 145 + .byte 120 + .long .Linfo_string361 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 27 # DW_AT_decl_line + .long 12760 # DW_AT_type + .byte 24 # Abbrev [24] 0x2263:0xe DW_TAG_variable + .byte 2 # DW_AT_location + .byte 145 + .byte 112 + .long .Linfo_string358 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 28 # DW_AT_decl_line + .long 12781 # DW_AT_type + .byte 29 # Abbrev [29] 0x2271:0xb DW_TAG_GNU_template_parameter_pack + .long .Linfo_string84 # DW_AT_name + .byte 30 # Abbrev [30] 0x2276:0x5 DW_TAG_template_type_parameter + .long 9864 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 10 # Abbrev [10] 0x227d:0x41 DW_TAG_subprogram + .quad .Lfunc_begin119 # DW_AT_low_pc + .long .Lfunc_end119-.Lfunc_begin119 # DW_AT_high_pc + .byte 1 # DW_AT_frame_base + .byte 86 + .long .Linfo_string344 # DW_AT_linkage_name + .long .Linfo_string345 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 26 # DW_AT_decl_line + # DW_AT_external + .byte 24 # Abbrev [24] 0x2296:0xe DW_TAG_variable + .byte 2 # DW_AT_location + .byte 145 + .byte 120 + .long .Linfo_string361 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 27 # DW_AT_decl_line + .long 12803 # DW_AT_type + .byte 24 # Abbrev [24] 0x22a4:0xe DW_TAG_variable + .byte 2 # DW_AT_location + .byte 145 + .byte 112 + .long .Linfo_string358 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 28 # DW_AT_decl_line + .long 12824 # DW_AT_type + .byte 29 # Abbrev [29] 0x22b2:0xb DW_TAG_GNU_template_parameter_pack + .long .Linfo_string84 # DW_AT_name + .byte 30 # Abbrev [30] 0x22b7:0x5 DW_TAG_template_type_parameter + .long 9525 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 35 # Abbrev [35] 0x22be:0x41 DW_TAG_subprogram + .quad .Lfunc_begin120 # DW_AT_low_pc + .long .Lfunc_end120-.Lfunc_begin120 # DW_AT_high_pc + .byte 1 # DW_AT_frame_base + .byte 86 + .long .Linfo_string346 # DW_AT_linkage_name + .long .Linfo_string347 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 26 # DW_AT_decl_line + .byte 24 # Abbrev [24] 0x22d7:0xe DW_TAG_variable + .byte 2 # DW_AT_location + .byte 145 + .byte 120 + .long .Linfo_string361 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 27 # DW_AT_decl_line + .long 12846 # DW_AT_type + .byte 24 # Abbrev [24] 0x22e5:0xe DW_TAG_variable + .byte 2 # DW_AT_location + .byte 145 + .byte 112 + .long .Linfo_string358 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 28 # DW_AT_decl_line + .long 12867 # DW_AT_type + .byte 29 # Abbrev [29] 0x22f3:0xb DW_TAG_GNU_template_parameter_pack + .long .Linfo_string84 # DW_AT_name + .byte 30 # Abbrev [30] 0x22f8:0x5 DW_TAG_template_type_parameter + .long 9887 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 35 # Abbrev [35] 0x22ff:0x41 DW_TAG_subprogram + .quad .Lfunc_begin121 # DW_AT_low_pc + .long .Lfunc_end121-.Lfunc_begin121 # DW_AT_high_pc + .byte 1 # DW_AT_frame_base + .byte 86 + .long .Linfo_string348 # DW_AT_linkage_name + .long .Linfo_string349 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 26 # DW_AT_decl_line + .byte 24 # Abbrev [24] 0x2318:0xe DW_TAG_variable + .byte 2 # DW_AT_location + .byte 145 + .byte 120 + .long .Linfo_string361 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 27 # DW_AT_decl_line + .long 12889 # DW_AT_type + .byte 24 # Abbrev [24] 0x2326:0xe DW_TAG_variable + .byte 2 # DW_AT_location + .byte 145 + .byte 112 + .long .Linfo_string358 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 28 # DW_AT_decl_line + .long 12910 # DW_AT_type + .byte 29 # Abbrev [29] 0x2334:0xb DW_TAG_GNU_template_parameter_pack + .long .Linfo_string84 # DW_AT_name + .byte 30 # Abbrev [30] 0x2339:0x5 DW_TAG_template_type_parameter + .long 9894 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 35 # Abbrev [35] 0x2340:0x41 DW_TAG_subprogram + .quad .Lfunc_begin122 # DW_AT_low_pc + .long .Lfunc_end122-.Lfunc_begin122 # DW_AT_high_pc + .byte 1 # DW_AT_frame_base + .byte 86 + .long .Linfo_string350 # DW_AT_linkage_name + .long .Linfo_string351 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 26 # DW_AT_decl_line + .byte 24 # Abbrev [24] 0x2359:0xe DW_TAG_variable + .byte 2 # DW_AT_location + .byte 145 + .byte 120 + .long .Linfo_string361 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 27 # DW_AT_decl_line + .long 12932 # DW_AT_type + .byte 24 # Abbrev [24] 0x2367:0xe DW_TAG_variable + .byte 2 # DW_AT_location + .byte 145 + .byte 112 + .long .Linfo_string358 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 28 # DW_AT_decl_line + .long 12953 # DW_AT_type + .byte 29 # Abbrev [29] 0x2375:0xb DW_TAG_GNU_template_parameter_pack + .long .Linfo_string84 # DW_AT_name + .byte 30 # Abbrev [30] 0x237a:0x5 DW_TAG_template_type_parameter + .long 9906 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 10 # Abbrev [10] 0x2381:0x23 DW_TAG_subprogram + .quad .Lfunc_begin123 # DW_AT_low_pc + .long .Lfunc_end123-.Lfunc_begin123 # DW_AT_high_pc + .byte 1 # DW_AT_frame_base + .byte 86 + .long .Linfo_string352 # DW_AT_linkage_name + .long .Linfo_string353 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 163 # DW_AT_decl_line + # DW_AT_external + .byte 14 # Abbrev [14] 0x239a:0x9 DW_TAG_template_type_parameter + .long 63 # DW_AT_type + .long .Linfo_string18 # DW_AT_name + .byte 0 # End Of Children Mark + .byte 13 # Abbrev [13] 0x23a4:0x1b DW_TAG_structure_type + .byte 5 # DW_AT_calling_convention + .long .Linfo_string131 # DW_AT_name + .byte 1 # DW_AT_byte_size + .byte 1 # DW_AT_decl_file + .byte 145 # DW_AT_decl_line + .byte 39 # Abbrev [39] 0x23ad:0x11 DW_TAG_subprogram + .long .Linfo_string129 # DW_AT_linkage_name + .long .Linfo_string130 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 146 # DW_AT_decl_line + # DW_AT_declaration + # DW_AT_external + .byte 18 # Abbrev [18] 0x23b8:0x5 DW_TAG_formal_parameter + .long 9151 # DW_AT_type + # DW_AT_artificial + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 42 # Abbrev [42] 0x23bf:0x5 DW_TAG_pointer_type + .long 9124 # DW_AT_type + .byte 49 # Abbrev [49] 0x23c4:0x2b DW_TAG_subprogram + .quad .Lfunc_begin124 # DW_AT_low_pc + .long .Lfunc_end124-.Lfunc_begin124 # DW_AT_high_pc + .byte 1 # DW_AT_frame_base + .byte 86 + .long 9181 # DW_AT_object_pointer + .short 302 # DW_AT_decl_line + .long 9133 # DW_AT_specification + .byte 44 # Abbrev [44] 0x23dd:0xc DW_TAG_formal_parameter + .byte 2 # DW_AT_location + .byte 145 + .byte 120 + .long .Linfo_string449 # DW_AT_name + .long 12975 # DW_AT_type + # DW_AT_artificial + .byte 12 # Abbrev [12] 0x23e9:0x5 DW_TAG_structure_type + .long .Linfo_string281 # DW_AT_name + # DW_AT_declaration + .byte 0 # End Of Children Mark + .byte 35 # Abbrev [35] 0x23ef:0x41 DW_TAG_subprogram + .quad .Lfunc_begin125 # DW_AT_low_pc + .long .Lfunc_end125-.Lfunc_begin125 # DW_AT_high_pc + .byte 1 # DW_AT_frame_base + .byte 86 + .long .Linfo_string354 # DW_AT_linkage_name + .long .Linfo_string283 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 26 # DW_AT_decl_line + .byte 24 # Abbrev [24] 0x2408:0xe DW_TAG_variable + .byte 2 # DW_AT_location + .byte 145 + .byte 120 + .long .Linfo_string361 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 27 # DW_AT_decl_line + .long 12980 # DW_AT_type + .byte 24 # Abbrev [24] 0x2416:0xe DW_TAG_variable + .byte 2 # DW_AT_location + .byte 145 + .byte 112 + .long .Linfo_string358 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 28 # DW_AT_decl_line + .long 13001 # DW_AT_type + .byte 29 # Abbrev [29] 0x2424:0xb DW_TAG_GNU_template_parameter_pack + .long .Linfo_string84 # DW_AT_name + .byte 30 # Abbrev [30] 0x2429:0x5 DW_TAG_template_type_parameter + .long 9193 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 10 # Abbrev [10] 0x2430:0x41 DW_TAG_subprogram + .quad .Lfunc_begin126 # DW_AT_low_pc + .long .Lfunc_end126-.Lfunc_begin126 # DW_AT_high_pc + .byte 1 # DW_AT_frame_base + .byte 86 + .long .Linfo_string355 # DW_AT_linkage_name + .long .Linfo_string356 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 26 # DW_AT_decl_line + # DW_AT_external + .byte 24 # Abbrev [24] 0x2449:0xe DW_TAG_variable + .byte 2 # DW_AT_location + .byte 145 + .byte 120 + .long .Linfo_string361 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 27 # DW_AT_decl_line + .long 13023 # DW_AT_type + .byte 24 # Abbrev [24] 0x2457:0xe DW_TAG_variable + .byte 2 # DW_AT_location + .byte 145 + .byte 112 + .long .Linfo_string358 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 28 # DW_AT_decl_line + .long 13044 # DW_AT_type + .byte 29 # Abbrev [29] 0x2465:0xb DW_TAG_GNU_template_parameter_pack + .long .Linfo_string84 # DW_AT_name + .byte 30 # Abbrev [30] 0x246a:0x5 DW_TAG_template_type_parameter + .long 9913 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 3 # Abbrev [3] 0x2471:0x7 DW_TAG_base_type + .long .Linfo_string141 # DW_AT_name + .byte 4 # DW_AT_encoding + .byte 8 # DW_AT_byte_size + .byte 3 # Abbrev [3] 0x2478:0x7 DW_TAG_base_type + .long .Linfo_string150 # DW_AT_name + .byte 7 # DW_AT_encoding + .byte 8 # DW_AT_byte_size + .byte 3 # Abbrev [3] 0x247f:0x7 DW_TAG_base_type + .long .Linfo_string153 # DW_AT_name + .byte 5 # DW_AT_encoding + .byte 8 # DW_AT_byte_size + .byte 12 # Abbrev [12] 0x2486:0x5 DW_TAG_structure_type + .long .Linfo_string156 # DW_AT_name + # DW_AT_declaration + .byte 42 # Abbrev [42] 0x248b:0x5 DW_TAG_pointer_type + .long 219 # DW_AT_type + .byte 13 # Abbrev [13] 0x2490:0x15 DW_TAG_structure_type + .byte 5 # DW_AT_calling_convention + .long .Linfo_string166 # DW_AT_name + .byte 1 # DW_AT_byte_size + .byte 1 # DW_AT_decl_file + .byte 5 # DW_AT_decl_line + .byte 29 # Abbrev [29] 0x2499:0xb DW_TAG_GNU_template_parameter_pack + .long .Linfo_string84 # DW_AT_name + .byte 30 # Abbrev [30] 0x249e:0x5 DW_TAG_template_type_parameter + .long 63 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 42 # Abbrev [42] 0x24a5:0x5 DW_TAG_pointer_type + .long 63 # DW_AT_type + .byte 50 # Abbrev [50] 0x24aa:0x5 DW_TAG_reference_type + .long 63 # DW_AT_type + .byte 51 # Abbrev [51] 0x24af:0x5 DW_TAG_rvalue_reference_type + .long 63 # DW_AT_type + .byte 52 # Abbrev [52] 0x24b4:0x5 DW_TAG_const_type + .long 63 # DW_AT_type + .byte 13 # Abbrev [13] 0x24b9:0xf DW_TAG_structure_type + .byte 5 # DW_AT_calling_convention + .long .Linfo_string181 # DW_AT_name + .byte 1 # DW_AT_byte_size + .byte 1 # DW_AT_decl_file + .byte 43 # DW_AT_decl_line + .byte 12 # Abbrev [12] 0x24c2:0x5 DW_TAG_structure_type + .long .Linfo_string182 # DW_AT_name + # DW_AT_declaration + .byte 0 # End Of Children Mark + .byte 3 # Abbrev [3] 0x24c8:0x7 DW_TAG_base_type + .long .Linfo_string218 # DW_AT_name + .byte 6 # DW_AT_encoding + .byte 1 # DW_AT_byte_size + .byte 3 # Abbrev [3] 0x24cf:0x7 DW_TAG_base_type + .long .Linfo_string221 # DW_AT_name + .byte 5 # DW_AT_encoding + .byte 16 # DW_AT_byte_size + .byte 53 # Abbrev [53] 0x24d6:0x19 DW_TAG_structure_type + .long .Linfo_string228 # DW_AT_name + # DW_AT_declaration + .byte 14 # Abbrev [14] 0x24db:0x9 DW_TAG_template_type_parameter + .long 250 # DW_AT_type + .long .Linfo_string18 # DW_AT_name + .byte 15 # Abbrev [15] 0x24e4:0xa DW_TAG_template_value_parameter + .long 279 # DW_AT_type + .long .Linfo_string20 # DW_AT_name + .byte 0 # DW_AT_const_value + .byte 0 # End Of Children Mark + .byte 54 # Abbrev [54] 0x24ef:0xb DW_TAG_subroutine_type + .long 63 # DW_AT_type + .byte 40 # Abbrev [40] 0x24f4:0x5 DW_TAG_formal_parameter + .long 6083 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 50 # Abbrev [50] 0x24fa:0x5 DW_TAG_reference_type + .long 9396 # DW_AT_type + .byte 50 # Abbrev [50] 0x24ff:0x5 DW_TAG_reference_type + .long 9476 # DW_AT_type + .byte 42 # Abbrev [42] 0x2504:0x5 DW_TAG_pointer_type + .long 9396 # DW_AT_type + .byte 55 # Abbrev [55] 0x2509:0x7 DW_TAG_namespace + .byte 12 # Abbrev [12] 0x250a:0x5 DW_TAG_structure_type + .long .Linfo_string239 # DW_AT_name + # DW_AT_declaration + .byte 0 # End Of Children Mark + .byte 56 # Abbrev [56] 0x2510:0x5 DW_TAG_unspecified_type + .long .Linfo_string242 # DW_AT_name + .byte 42 # Abbrev [42] 0x2515:0x5 DW_TAG_pointer_type + .long 630 # DW_AT_type + .byte 42 # Abbrev [42] 0x251a:0x5 DW_TAG_pointer_type + .long 9350 # DW_AT_type + .byte 52 # Abbrev [52] 0x251f:0x5 DW_TAG_const_type + .long 6930 # DW_AT_type + .byte 42 # Abbrev [42] 0x2524:0x5 DW_TAG_pointer_type + .long 9513 # DW_AT_type + .byte 52 # Abbrev [52] 0x2529:0x5 DW_TAG_const_type + .long 9518 # DW_AT_type + .byte 42 # Abbrev [42] 0x252e:0x5 DW_TAG_pointer_type + .long 9523 # DW_AT_type + .byte 57 # Abbrev [57] 0x2533:0x1 DW_TAG_const_type + .byte 58 # Abbrev [58] 0x2534:0x1 DW_TAG_subroutine_type + .byte 42 # Abbrev [42] 0x2535:0x5 DW_TAG_pointer_type + .long 9524 # DW_AT_type + .byte 42 # Abbrev [42] 0x253a:0x5 DW_TAG_pointer_type + .long 1238 # DW_AT_type + .byte 42 # Abbrev [42] 0x253f:0x5 DW_TAG_pointer_type + .long 1243 # DW_AT_type + .byte 42 # Abbrev [42] 0x2544:0x5 DW_TAG_pointer_type + .long 9545 # DW_AT_type + .byte 13 # Abbrev [13] 0x2549:0x15 DW_TAG_structure_type + .byte 5 # DW_AT_calling_convention + .long .Linfo_string275 # DW_AT_name + .byte 1 # DW_AT_byte_size + .byte 1 # DW_AT_decl_file + .byte 5 # DW_AT_decl_line + .byte 29 # Abbrev [29] 0x2552:0xb DW_TAG_GNU_template_parameter_pack + .long .Linfo_string84 # DW_AT_name + .byte 30 # Abbrev [30] 0x2557:0x5 DW_TAG_template_type_parameter + .long 9381 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 59 # Abbrev [59] 0x255e:0xb DW_TAG_array_type + .long 9381 # DW_AT_type + .byte 60 # Abbrev [60] 0x2563:0x5 DW_TAG_subrange_type + .long 9577 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 61 # Abbrev [61] 0x2569:0x7 DW_TAG_base_type + .long .Linfo_string278 # DW_AT_name + .byte 8 # DW_AT_byte_size + .byte 7 # DW_AT_encoding + .byte 50 # Abbrev [50] 0x2570:0x5 DW_TAG_reference_type + .long 9589 # DW_AT_type + .byte 59 # Abbrev [59] 0x2575:0xc DW_TAG_array_type + .long 63 # DW_AT_type + .byte 62 # Abbrev [62] 0x257a:0x6 DW_TAG_subrange_type + .long 9577 # DW_AT_type + .byte 3 # DW_AT_count + .byte 0 # End Of Children Mark + .byte 42 # Abbrev [42] 0x2581:0x5 DW_TAG_pointer_type + .long 9589 # DW_AT_type + .byte 42 # Abbrev [42] 0x2586:0x5 DW_TAG_pointer_type + .long 9488 # DW_AT_type + .byte 13 # Abbrev [13] 0x258b:0x13 DW_TAG_structure_type + .byte 5 # DW_AT_calling_convention + .long .Linfo_string298 # DW_AT_name + .byte 1 # DW_AT_byte_size + .byte 1 # DW_AT_decl_file + .byte 143 # DW_AT_decl_line + .byte 14 # Abbrev [14] 0x2594:0x9 DW_TAG_template_type_parameter + .long 63 # DW_AT_type + .long .Linfo_string18 # DW_AT_name + .byte 0 # End Of Children Mark + .byte 63 # Abbrev [63] 0x259e:0x5 DW_TAG_volatile_type + .long 9416 # DW_AT_type + .byte 64 # Abbrev [64] 0x25a3:0xc DW_TAG_array_type + # DW_AT_GNU_vector + .long 63 # DW_AT_type + .byte 62 # Abbrev [62] 0x25a8:0x6 DW_TAG_subrange_type + .long 9577 # DW_AT_type + .byte 2 # DW_AT_count + .byte 0 # End Of Children Mark + .byte 52 # Abbrev [52] 0x25af:0x5 DW_TAG_const_type + .long 9652 # DW_AT_type + .byte 63 # Abbrev [63] 0x25b4:0x5 DW_TAG_volatile_type + .long 9381 # DW_AT_type + .byte 52 # Abbrev [52] 0x25b9:0x5 DW_TAG_const_type + .long 9662 # DW_AT_type + .byte 65 # Abbrev [65] 0x25be:0x1 DW_TAG_volatile_type + .byte 13 # Abbrev [13] 0x25bf:0x15 DW_TAG_structure_type + .byte 5 # DW_AT_calling_convention + .long .Linfo_string314 # DW_AT_name + .byte 1 # DW_AT_byte_size + .byte 1 # DW_AT_decl_file + .byte 5 # DW_AT_decl_line + .byte 29 # Abbrev [29] 0x25c8:0xb DW_TAG_GNU_template_parameter_pack + .long .Linfo_string84 # DW_AT_name + .byte 30 # Abbrev [30] 0x25cd:0x5 DW_TAG_template_type_parameter + .long 1238 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 66 # Abbrev [66] 0x25d4:0x9 DW_TAG_ptr_to_member_type + .long 9693 # DW_AT_type + .long 9350 # DW_AT_containing_type + .byte 67 # Abbrev [67] 0x25dd:0x7 DW_TAG_subroutine_type + .byte 18 # Abbrev [18] 0x25de:0x5 DW_TAG_formal_parameter + .long 9700 # DW_AT_type + # DW_AT_artificial + .byte 0 # End Of Children Mark + .byte 42 # Abbrev [42] 0x25e4:0x5 DW_TAG_pointer_type + .long 9705 # DW_AT_type + .byte 52 # Abbrev [52] 0x25e9:0x5 DW_TAG_const_type + .long 9350 # DW_AT_type + .byte 66 # Abbrev [66] 0x25ee:0x9 DW_TAG_ptr_to_member_type + .long 9719 # DW_AT_type + .long 9350 # DW_AT_containing_type + .byte 68 # Abbrev [68] 0x25f7:0x7 DW_TAG_subroutine_type + # DW_AT_reference + .byte 18 # Abbrev [18] 0x25f8:0x5 DW_TAG_formal_parameter + .long 9726 # DW_AT_type + # DW_AT_artificial + .byte 0 # End Of Children Mark + .byte 42 # Abbrev [42] 0x25fe:0x5 DW_TAG_pointer_type + .long 9731 # DW_AT_type + .byte 63 # Abbrev [63] 0x2603:0x5 DW_TAG_volatile_type + .long 9350 # DW_AT_type + .byte 66 # Abbrev [66] 0x2608:0x9 DW_TAG_ptr_to_member_type + .long 9745 # DW_AT_type + .long 9350 # DW_AT_containing_type + .byte 69 # Abbrev [69] 0x2611:0x7 DW_TAG_subroutine_type + # DW_AT_rvalue_reference + .byte 18 # Abbrev [18] 0x2612:0x5 DW_TAG_formal_parameter + .long 9752 # DW_AT_type + # DW_AT_artificial + .byte 0 # End Of Children Mark + .byte 42 # Abbrev [42] 0x2618:0x5 DW_TAG_pointer_type + .long 9757 # DW_AT_type + .byte 52 # Abbrev [52] 0x261d:0x5 DW_TAG_const_type + .long 9731 # DW_AT_type + .byte 52 # Abbrev [52] 0x2622:0x5 DW_TAG_const_type + .long 9525 # DW_AT_type + .byte 50 # Abbrev [50] 0x2627:0x5 DW_TAG_reference_type + .long 9772 # DW_AT_type + .byte 52 # Abbrev [52] 0x262c:0x5 DW_TAG_const_type + .long 9777 # DW_AT_type + .byte 59 # Abbrev [59] 0x2631:0xc DW_TAG_array_type + .long 9416 # DW_AT_type + .byte 62 # Abbrev [62] 0x2636:0x6 DW_TAG_subrange_type + .long 9577 # DW_AT_type + .byte 1 # DW_AT_count + .byte 0 # End Of Children Mark + .byte 52 # Abbrev [52] 0x263d:0x5 DW_TAG_const_type + .long 9794 # DW_AT_type + .byte 70 # Abbrev [70] 0x2642:0x1 DW_TAG_subroutine_type + # DW_AT_reference + .byte 63 # Abbrev [63] 0x2643:0x5 DW_TAG_volatile_type + .long 9800 # DW_AT_type + .byte 71 # Abbrev [71] 0x2648:0x1 DW_TAG_subroutine_type + # DW_AT_rvalue_reference + .byte 52 # Abbrev [52] 0x2649:0x5 DW_TAG_const_type + .long 9806 # DW_AT_type + .byte 63 # Abbrev [63] 0x264e:0x5 DW_TAG_volatile_type + .long 9524 # DW_AT_type + .byte 52 # Abbrev [52] 0x2653:0x5 DW_TAG_const_type + .long 9816 # DW_AT_type + .byte 59 # Abbrev [59] 0x2658:0xc DW_TAG_array_type + .long 9381 # DW_AT_type + .byte 62 # Abbrev [62] 0x265d:0x6 DW_TAG_subrange_type + .long 9577 # DW_AT_type + .byte 1 # DW_AT_count + .byte 0 # End Of Children Mark + .byte 50 # Abbrev [50] 0x2664:0x5 DW_TAG_reference_type + .long 9811 # DW_AT_type + .byte 50 # Abbrev [50] 0x2669:0x5 DW_TAG_reference_type + .long 9838 # DW_AT_type + .byte 52 # Abbrev [52] 0x266e:0x5 DW_TAG_const_type + .long 9843 # DW_AT_type + .byte 66 # Abbrev [66] 0x2673:0x9 DW_TAG_ptr_to_member_type + .long 9852 # DW_AT_type + .long 9350 # DW_AT_containing_type + .byte 67 # Abbrev [67] 0x267c:0x7 DW_TAG_subroutine_type + .byte 18 # Abbrev [18] 0x267d:0x5 DW_TAG_formal_parameter + .long 9859 # DW_AT_type + # DW_AT_artificial + .byte 0 # End Of Children Mark + .byte 42 # Abbrev [42] 0x2683:0x5 DW_TAG_pointer_type + .long 9350 # DW_AT_type + .byte 54 # Abbrev [54] 0x2688:0xb DW_TAG_subroutine_type + .long 9875 # DW_AT_type + .byte 40 # Abbrev [40] 0x268d:0x5 DW_TAG_formal_parameter + .long 63 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 42 # Abbrev [42] 0x2693:0x5 DW_TAG_pointer_type + .long 9880 # DW_AT_type + .byte 67 # Abbrev [67] 0x2698:0x7 DW_TAG_subroutine_type + .byte 40 # Abbrev [40] 0x2699:0x5 DW_TAG_formal_parameter + .long 6083 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 67 # Abbrev [67] 0x269f:0x7 DW_TAG_subroutine_type + .byte 40 # Abbrev [40] 0x26a0:0x5 DW_TAG_formal_parameter + .long 1243 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 67 # Abbrev [67] 0x26a6:0xc DW_TAG_subroutine_type + .byte 40 # Abbrev [40] 0x26a7:0x5 DW_TAG_formal_parameter + .long 1253 # DW_AT_type + .byte 40 # Abbrev [40] 0x26ac:0x5 DW_TAG_formal_parameter + .long 1243 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 67 # Abbrev [67] 0x26b2:0x7 DW_TAG_subroutine_type + .byte 40 # Abbrev [40] 0x26b3:0x5 DW_TAG_formal_parameter + .long 1253 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 66 # Abbrev [66] 0x26b9:0x9 DW_TAG_ptr_to_member_type + .long 9922 # DW_AT_type + .long 9124 # DW_AT_containing_type + .byte 67 # Abbrev [67] 0x26c2:0x7 DW_TAG_subroutine_type + .byte 18 # Abbrev [18] 0x26c3:0x5 DW_TAG_formal_parameter + .long 9151 # DW_AT_type + # DW_AT_artificial + .byte 0 # End Of Children Mark + .byte 13 # Abbrev [13] 0x26c9:0x14 DW_TAG_structure_type + .byte 5 # DW_AT_calling_convention + .long .Linfo_string360 # DW_AT_name + .byte 1 # DW_AT_byte_size + .byte 1 # DW_AT_decl_file + .byte 49 # DW_AT_decl_line + .byte 15 # Abbrev [15] 0x26d2:0xa DW_TAG_template_value_parameter + .long 236 # DW_AT_type + .long .Linfo_string359 # DW_AT_name + .byte 3 # DW_AT_const_value + .byte 0 # End Of Children Mark + .byte 13 # Abbrev [13] 0x26dd:0x1d DW_TAG_structure_type + .byte 5 # DW_AT_calling_convention + .long .Linfo_string362 # DW_AT_name + .byte 1 # DW_AT_byte_size + .byte 1 # DW_AT_decl_file + .byte 40 # DW_AT_decl_line + .byte 14 # Abbrev [14] 0x26e6:0x9 DW_TAG_template_type_parameter + .long 1238 # DW_AT_type + .long .Linfo_string18 # DW_AT_name + .byte 15 # Abbrev [15] 0x26ef:0xa DW_TAG_template_value_parameter + .long 279 # DW_AT_type + .long .Linfo_string20 # DW_AT_name + .byte 0 # DW_AT_const_value + .byte 0 # End Of Children Mark + .byte 21 # Abbrev [21] 0x26fa:0xb DW_TAG_typedef + .long 9611 # DW_AT_type + .long .Linfo_string365 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 139 # DW_AT_decl_line + .byte 42 # Abbrev [42] 0x2705:0x5 DW_TAG_pointer_type + .long 9994 # DW_AT_type + .byte 53 # Abbrev [53] 0x270a:0x11 DW_TAG_structure_type + .long .Linfo_string367 # DW_AT_name + # DW_AT_declaration + .byte 29 # Abbrev [29] 0x270f:0xb DW_TAG_GNU_template_parameter_pack + .long .Linfo_string84 # DW_AT_name + .byte 30 # Abbrev [30] 0x2714:0x5 DW_TAG_template_type_parameter + .long 63 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 42 # Abbrev [42] 0x271b:0x5 DW_TAG_pointer_type + .long 10016 # DW_AT_type + .byte 53 # Abbrev [53] 0x2720:0x11 DW_TAG_structure_type + .long .Linfo_string368 # DW_AT_name + # DW_AT_declaration + .byte 29 # Abbrev [29] 0x2725:0xb DW_TAG_GNU_template_parameter_pack + .long .Linfo_string84 # DW_AT_name + .byte 30 # Abbrev [30] 0x272a:0x5 DW_TAG_template_type_parameter + .long 6083 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 13 # Abbrev [13] 0x2731:0x15 DW_TAG_structure_type + .byte 5 # DW_AT_calling_convention + .long .Linfo_string369 # DW_AT_name + .byte 1 # DW_AT_byte_size + .byte 1 # DW_AT_decl_file + .byte 5 # DW_AT_decl_line + .byte 29 # Abbrev [29] 0x273a:0xb DW_TAG_GNU_template_parameter_pack + .long .Linfo_string84 # DW_AT_name + .byte 30 # Abbrev [30] 0x273f:0x5 DW_TAG_template_type_parameter + .long 279 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 42 # Abbrev [42] 0x2746:0x5 DW_TAG_pointer_type + .long 10059 # DW_AT_type + .byte 53 # Abbrev [53] 0x274b:0x11 DW_TAG_structure_type + .long .Linfo_string370 # DW_AT_name + # DW_AT_declaration + .byte 29 # Abbrev [29] 0x2750:0xb DW_TAG_GNU_template_parameter_pack + .long .Linfo_string84 # DW_AT_name + .byte 30 # Abbrev [30] 0x2755:0x5 DW_TAG_template_type_parameter + .long 279 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 13 # Abbrev [13] 0x275c:0x15 DW_TAG_structure_type + .byte 5 # DW_AT_calling_convention + .long .Linfo_string371 # DW_AT_name + .byte 1 # DW_AT_byte_size + .byte 1 # DW_AT_decl_file + .byte 5 # DW_AT_decl_line + .byte 29 # Abbrev [29] 0x2765:0xb DW_TAG_GNU_template_parameter_pack + .long .Linfo_string84 # DW_AT_name + .byte 30 # Abbrev [30] 0x276a:0x5 DW_TAG_template_type_parameter + .long 9329 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 42 # Abbrev [42] 0x2771:0x5 DW_TAG_pointer_type + .long 10102 # DW_AT_type + .byte 53 # Abbrev [53] 0x2776:0x11 DW_TAG_structure_type + .long .Linfo_string372 # DW_AT_name + # DW_AT_declaration + .byte 29 # Abbrev [29] 0x277b:0xb DW_TAG_GNU_template_parameter_pack + .long .Linfo_string84 # DW_AT_name + .byte 30 # Abbrev [30] 0x2780:0x5 DW_TAG_template_type_parameter + .long 9329 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 13 # Abbrev [13] 0x2787:0x15 DW_TAG_structure_type + .byte 5 # DW_AT_calling_convention + .long .Linfo_string373 # DW_AT_name + .byte 1 # DW_AT_byte_size + .byte 1 # DW_AT_decl_file + .byte 5 # DW_AT_decl_line + .byte 29 # Abbrev [29] 0x2790:0xb DW_TAG_GNU_template_parameter_pack + .long .Linfo_string84 # DW_AT_name + .byte 30 # Abbrev [30] 0x2795:0x5 DW_TAG_template_type_parameter + .long 630 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 42 # Abbrev [42] 0x279c:0x5 DW_TAG_pointer_type + .long 10145 # DW_AT_type + .byte 53 # Abbrev [53] 0x27a1:0x11 DW_TAG_structure_type + .long .Linfo_string374 # DW_AT_name + # DW_AT_declaration + .byte 29 # Abbrev [29] 0x27a6:0xb DW_TAG_GNU_template_parameter_pack + .long .Linfo_string84 # DW_AT_name + .byte 30 # Abbrev [30] 0x27ab:0x5 DW_TAG_template_type_parameter + .long 630 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 13 # Abbrev [13] 0x27b2:0x15 DW_TAG_structure_type + .byte 5 # DW_AT_calling_convention + .long .Linfo_string375 # DW_AT_name + .byte 1 # DW_AT_byte_size + .byte 1 # DW_AT_decl_file + .byte 5 # DW_AT_decl_line + .byte 29 # Abbrev [29] 0x27bb:0xb DW_TAG_GNU_template_parameter_pack + .long .Linfo_string84 # DW_AT_name + .byte 30 # Abbrev [30] 0x27c0:0x5 DW_TAG_template_type_parameter + .long 579 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 42 # Abbrev [42] 0x27c7:0x5 DW_TAG_pointer_type + .long 10188 # DW_AT_type + .byte 53 # Abbrev [53] 0x27cc:0x11 DW_TAG_structure_type + .long .Linfo_string376 # DW_AT_name + # DW_AT_declaration + .byte 29 # Abbrev [29] 0x27d1:0xb DW_TAG_GNU_template_parameter_pack + .long .Linfo_string84 # DW_AT_name + .byte 30 # Abbrev [30] 0x27d6:0x5 DW_TAG_template_type_parameter + .long 579 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 13 # Abbrev [13] 0x27dd:0x15 DW_TAG_structure_type + .byte 5 # DW_AT_calling_convention + .long .Linfo_string377 # DW_AT_name + .byte 1 # DW_AT_byte_size + .byte 1 # DW_AT_decl_file + .byte 5 # DW_AT_decl_line + .byte 29 # Abbrev [29] 0x27e6:0xb DW_TAG_GNU_template_parameter_pack + .long .Linfo_string84 # DW_AT_name + .byte 30 # Abbrev [30] 0x27eb:0x5 DW_TAG_template_type_parameter + .long 236 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 42 # Abbrev [42] 0x27f2:0x5 DW_TAG_pointer_type + .long 10231 # DW_AT_type + .byte 53 # Abbrev [53] 0x27f7:0x11 DW_TAG_structure_type + .long .Linfo_string378 # DW_AT_name + # DW_AT_declaration + .byte 29 # Abbrev [29] 0x27fc:0xb DW_TAG_GNU_template_parameter_pack + .long .Linfo_string84 # DW_AT_name + .byte 30 # Abbrev [30] 0x2801:0x5 DW_TAG_template_type_parameter + .long 236 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 13 # Abbrev [13] 0x2808:0x15 DW_TAG_structure_type + .byte 5 # DW_AT_calling_convention + .long .Linfo_string379 # DW_AT_name + .byte 1 # DW_AT_byte_size + .byte 1 # DW_AT_decl_file + .byte 5 # DW_AT_decl_line + .byte 29 # Abbrev [29] 0x2811:0xb DW_TAG_GNU_template_parameter_pack + .long .Linfo_string84 # DW_AT_name + .byte 30 # Abbrev [30] 0x2816:0x5 DW_TAG_template_type_parameter + .long 9336 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 42 # Abbrev [42] 0x281d:0x5 DW_TAG_pointer_type + .long 10274 # DW_AT_type + .byte 53 # Abbrev [53] 0x2822:0x11 DW_TAG_structure_type + .long .Linfo_string380 # DW_AT_name + # DW_AT_declaration + .byte 29 # Abbrev [29] 0x2827:0xb DW_TAG_GNU_template_parameter_pack + .long .Linfo_string84 # DW_AT_name + .byte 30 # Abbrev [30] 0x282c:0x5 DW_TAG_template_type_parameter + .long 9336 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 13 # Abbrev [13] 0x2833:0x15 DW_TAG_structure_type + .byte 5 # DW_AT_calling_convention + .long .Linfo_string381 # DW_AT_name + .byte 1 # DW_AT_byte_size + .byte 1 # DW_AT_decl_file + .byte 5 # DW_AT_decl_line + .byte 29 # Abbrev [29] 0x283c:0xb DW_TAG_GNU_template_parameter_pack + .long .Linfo_string84 # DW_AT_name + .byte 30 # Abbrev [30] 0x2841:0x5 DW_TAG_template_type_parameter + .long 9343 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 42 # Abbrev [42] 0x2848:0x5 DW_TAG_pointer_type + .long 10317 # DW_AT_type + .byte 53 # Abbrev [53] 0x284d:0x11 DW_TAG_structure_type + .long .Linfo_string382 # DW_AT_name + # DW_AT_declaration + .byte 29 # Abbrev [29] 0x2852:0xb DW_TAG_GNU_template_parameter_pack + .long .Linfo_string84 # DW_AT_name + .byte 30 # Abbrev [30] 0x2857:0x5 DW_TAG_template_type_parameter + .long 9343 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 13 # Abbrev [13] 0x285e:0x15 DW_TAG_structure_type + .byte 5 # DW_AT_calling_convention + .long .Linfo_string383 # DW_AT_name + .byte 1 # DW_AT_byte_size + .byte 1 # DW_AT_decl_file + .byte 5 # DW_AT_decl_line + .byte 29 # Abbrev [29] 0x2867:0xb DW_TAG_GNU_template_parameter_pack + .long .Linfo_string84 # DW_AT_name + .byte 30 # Abbrev [30] 0x286c:0x5 DW_TAG_template_type_parameter + .long 9350 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 42 # Abbrev [42] 0x2873:0x5 DW_TAG_pointer_type + .long 10360 # DW_AT_type + .byte 53 # Abbrev [53] 0x2878:0x11 DW_TAG_structure_type + .long .Linfo_string384 # DW_AT_name + # DW_AT_declaration + .byte 29 # Abbrev [29] 0x287d:0xb DW_TAG_GNU_template_parameter_pack + .long .Linfo_string84 # DW_AT_name + .byte 30 # Abbrev [30] 0x2882:0x5 DW_TAG_template_type_parameter + .long 9350 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 13 # Abbrev [13] 0x2889:0x15 DW_TAG_structure_type + .byte 5 # DW_AT_calling_convention + .long .Linfo_string385 # DW_AT_name + .byte 1 # DW_AT_byte_size + .byte 1 # DW_AT_decl_file + .byte 5 # DW_AT_decl_line + .byte 29 # Abbrev [29] 0x2892:0xb DW_TAG_GNU_template_parameter_pack + .long .Linfo_string84 # DW_AT_name + .byte 30 # Abbrev [30] 0x2897:0x5 DW_TAG_template_type_parameter + .long 219 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 42 # Abbrev [42] 0x289e:0x5 DW_TAG_pointer_type + .long 10403 # DW_AT_type + .byte 53 # Abbrev [53] 0x28a3:0x11 DW_TAG_structure_type + .long .Linfo_string386 # DW_AT_name + # DW_AT_declaration + .byte 29 # Abbrev [29] 0x28a8:0xb DW_TAG_GNU_template_parameter_pack + .long .Linfo_string84 # DW_AT_name + .byte 30 # Abbrev [30] 0x28ad:0x5 DW_TAG_template_type_parameter + .long 219 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 13 # Abbrev [13] 0x28b4:0x15 DW_TAG_structure_type + .byte 5 # DW_AT_calling_convention + .long .Linfo_string387 # DW_AT_name + .byte 1 # DW_AT_byte_size + .byte 1 # DW_AT_decl_file + .byte 5 # DW_AT_decl_line + .byte 29 # Abbrev [29] 0x28bd:0xb DW_TAG_GNU_template_parameter_pack + .long .Linfo_string84 # DW_AT_name + .byte 30 # Abbrev [30] 0x28c2:0x5 DW_TAG_template_type_parameter + .long 9355 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 42 # Abbrev [42] 0x28c9:0x5 DW_TAG_pointer_type + .long 10446 # DW_AT_type + .byte 53 # Abbrev [53] 0x28ce:0x11 DW_TAG_structure_type + .long .Linfo_string388 # DW_AT_name + # DW_AT_declaration + .byte 29 # Abbrev [29] 0x28d3:0xb DW_TAG_GNU_template_parameter_pack + .long .Linfo_string84 # DW_AT_name + .byte 30 # Abbrev [30] 0x28d8:0x5 DW_TAG_template_type_parameter + .long 9355 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 13 # Abbrev [13] 0x28df:0x15 DW_TAG_structure_type + .byte 5 # DW_AT_calling_convention + .long .Linfo_string389 # DW_AT_name + .byte 1 # DW_AT_byte_size + .byte 1 # DW_AT_decl_file + .byte 5 # DW_AT_decl_line + .byte 29 # Abbrev [29] 0x28e8:0xb DW_TAG_GNU_template_parameter_pack + .long .Linfo_string84 # DW_AT_name + .byte 30 # Abbrev [30] 0x28ed:0x5 DW_TAG_template_type_parameter + .long 229 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 42 # Abbrev [42] 0x28f4:0x5 DW_TAG_pointer_type + .long 10489 # DW_AT_type + .byte 53 # Abbrev [53] 0x28f9:0x11 DW_TAG_structure_type + .long .Linfo_string390 # DW_AT_name + # DW_AT_declaration + .byte 29 # Abbrev [29] 0x28fe:0xb DW_TAG_GNU_template_parameter_pack + .long .Linfo_string84 # DW_AT_name + .byte 30 # Abbrev [30] 0x2903:0x5 DW_TAG_template_type_parameter + .long 229 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 13 # Abbrev [13] 0x290a:0x15 DW_TAG_structure_type + .byte 5 # DW_AT_calling_convention + .long .Linfo_string391 # DW_AT_name + .byte 1 # DW_AT_byte_size + .byte 1 # DW_AT_decl_file + .byte 5 # DW_AT_decl_line + .byte 29 # Abbrev [29] 0x2913:0xb DW_TAG_GNU_template_parameter_pack + .long .Linfo_string84 # DW_AT_name + .byte 30 # Abbrev [30] 0x2918:0x5 DW_TAG_template_type_parameter + .long 9360 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 42 # Abbrev [42] 0x291f:0x5 DW_TAG_pointer_type + .long 10532 # DW_AT_type + .byte 53 # Abbrev [53] 0x2924:0x11 DW_TAG_structure_type + .long .Linfo_string392 # DW_AT_name + # DW_AT_declaration + .byte 29 # Abbrev [29] 0x2929:0xb DW_TAG_GNU_template_parameter_pack + .long .Linfo_string84 # DW_AT_name + .byte 30 # Abbrev [30] 0x292e:0x5 DW_TAG_template_type_parameter + .long 9360 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 13 # Abbrev [13] 0x2935:0x1a DW_TAG_structure_type + .byte 5 # DW_AT_calling_convention + .long .Linfo_string393 # DW_AT_name + .byte 1 # DW_AT_byte_size + .byte 1 # DW_AT_decl_file + .byte 5 # DW_AT_decl_line + .byte 29 # Abbrev [29] 0x293e:0x10 DW_TAG_GNU_template_parameter_pack + .long .Linfo_string84 # DW_AT_name + .byte 30 # Abbrev [30] 0x2943:0x5 DW_TAG_template_type_parameter + .long 63 # DW_AT_type + .byte 30 # Abbrev [30] 0x2948:0x5 DW_TAG_template_type_parameter + .long 6083 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 42 # Abbrev [42] 0x294f:0x5 DW_TAG_pointer_type + .long 10580 # DW_AT_type + .byte 53 # Abbrev [53] 0x2954:0x16 DW_TAG_structure_type + .long .Linfo_string394 # DW_AT_name + # DW_AT_declaration + .byte 29 # Abbrev [29] 0x2959:0x10 DW_TAG_GNU_template_parameter_pack + .long .Linfo_string84 # DW_AT_name + .byte 30 # Abbrev [30] 0x295e:0x5 DW_TAG_template_type_parameter + .long 63 # DW_AT_type + .byte 30 # Abbrev [30] 0x2963:0x5 DW_TAG_template_type_parameter + .long 6083 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 42 # Abbrev [42] 0x296a:0x5 DW_TAG_pointer_type + .long 10607 # DW_AT_type + .byte 53 # Abbrev [53] 0x296f:0x11 DW_TAG_structure_type + .long .Linfo_string395 # DW_AT_name + # DW_AT_declaration + .byte 29 # Abbrev [29] 0x2974:0xb DW_TAG_GNU_template_parameter_pack + .long .Linfo_string84 # DW_AT_name + .byte 30 # Abbrev [30] 0x2979:0x5 DW_TAG_template_type_parameter + .long 9381 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 13 # Abbrev [13] 0x2980:0x15 DW_TAG_structure_type + .byte 5 # DW_AT_calling_convention + .long .Linfo_string396 # DW_AT_name + .byte 1 # DW_AT_byte_size + .byte 1 # DW_AT_decl_file + .byte 5 # DW_AT_decl_line + .byte 29 # Abbrev [29] 0x2989:0xb DW_TAG_GNU_template_parameter_pack + .long .Linfo_string84 # DW_AT_name + .byte 30 # Abbrev [30] 0x298e:0x5 DW_TAG_template_type_parameter + .long 9386 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 42 # Abbrev [42] 0x2995:0x5 DW_TAG_pointer_type + .long 10650 # DW_AT_type + .byte 53 # Abbrev [53] 0x299a:0x11 DW_TAG_structure_type + .long .Linfo_string397 # DW_AT_name + # DW_AT_declaration + .byte 29 # Abbrev [29] 0x299f:0xb DW_TAG_GNU_template_parameter_pack + .long .Linfo_string84 # DW_AT_name + .byte 30 # Abbrev [30] 0x29a4:0x5 DW_TAG_template_type_parameter + .long 9386 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 13 # Abbrev [13] 0x29ab:0x15 DW_TAG_structure_type + .byte 5 # DW_AT_calling_convention + .long .Linfo_string398 # DW_AT_name + .byte 1 # DW_AT_byte_size + .byte 1 # DW_AT_decl_file + .byte 5 # DW_AT_decl_line + .byte 29 # Abbrev [29] 0x29b4:0xb DW_TAG_GNU_template_parameter_pack + .long .Linfo_string84 # DW_AT_name + .byte 30 # Abbrev [30] 0x29b9:0x5 DW_TAG_template_type_parameter + .long 9391 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 42 # Abbrev [42] 0x29c0:0x5 DW_TAG_pointer_type + .long 10693 # DW_AT_type + .byte 53 # Abbrev [53] 0x29c5:0x11 DW_TAG_structure_type + .long .Linfo_string399 # DW_AT_name + # DW_AT_declaration + .byte 29 # Abbrev [29] 0x29ca:0xb DW_TAG_GNU_template_parameter_pack + .long .Linfo_string84 # DW_AT_name + .byte 30 # Abbrev [30] 0x29cf:0x5 DW_TAG_template_type_parameter + .long 9391 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 13 # Abbrev [13] 0x29d6:0x15 DW_TAG_structure_type + .byte 5 # DW_AT_calling_convention + .long .Linfo_string400 # DW_AT_name + .byte 1 # DW_AT_byte_size + .byte 1 # DW_AT_decl_file + .byte 5 # DW_AT_decl_line + .byte 29 # Abbrev [29] 0x29df:0xb DW_TAG_GNU_template_parameter_pack + .long .Linfo_string84 # DW_AT_name + .byte 30 # Abbrev [30] 0x29e4:0x5 DW_TAG_template_type_parameter + .long 9396 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 42 # Abbrev [42] 0x29eb:0x5 DW_TAG_pointer_type + .long 10736 # DW_AT_type + .byte 53 # Abbrev [53] 0x29f0:0x11 DW_TAG_structure_type + .long .Linfo_string401 # DW_AT_name + # DW_AT_declaration + .byte 29 # Abbrev [29] 0x29f5:0xb DW_TAG_GNU_template_parameter_pack + .long .Linfo_string84 # DW_AT_name + .byte 30 # Abbrev [30] 0x29fa:0x5 DW_TAG_template_type_parameter + .long 9396 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 13 # Abbrev [13] 0x2a01:0x11 DW_TAG_structure_type + .byte 5 # DW_AT_calling_convention + .long .Linfo_string402 # DW_AT_name + .byte 1 # DW_AT_byte_size + .byte 1 # DW_AT_decl_file + .byte 5 # DW_AT_decl_line + .byte 29 # Abbrev [29] 0x2a0a:0x7 DW_TAG_GNU_template_parameter_pack + .long .Linfo_string84 # DW_AT_name + .byte 31 # Abbrev [31] 0x2a0f:0x1 DW_TAG_template_type_parameter + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 42 # Abbrev [42] 0x2a12:0x5 DW_TAG_pointer_type + .long 10775 # DW_AT_type + .byte 53 # Abbrev [53] 0x2a17:0xd DW_TAG_structure_type + .long .Linfo_string403 # DW_AT_name + # DW_AT_declaration + .byte 29 # Abbrev [29] 0x2a1c:0x7 DW_TAG_GNU_template_parameter_pack + .long .Linfo_string84 # DW_AT_name + .byte 31 # Abbrev [31] 0x2a21:0x1 DW_TAG_template_type_parameter + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 13 # Abbrev [13] 0x2a24:0x15 DW_TAG_structure_type + .byte 5 # DW_AT_calling_convention + .long .Linfo_string404 # DW_AT_name + .byte 1 # DW_AT_byte_size + .byte 1 # DW_AT_decl_file + .byte 5 # DW_AT_decl_line + .byte 29 # Abbrev [29] 0x2a2d:0xb DW_TAG_GNU_template_parameter_pack + .long .Linfo_string84 # DW_AT_name + .byte 30 # Abbrev [30] 0x2a32:0x5 DW_TAG_template_type_parameter + .long 9410 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 42 # Abbrev [42] 0x2a39:0x5 DW_TAG_pointer_type + .long 10814 # DW_AT_type + .byte 53 # Abbrev [53] 0x2a3e:0x11 DW_TAG_structure_type + .long .Linfo_string405 # DW_AT_name + # DW_AT_declaration + .byte 29 # Abbrev [29] 0x2a43:0xb DW_TAG_GNU_template_parameter_pack + .long .Linfo_string84 # DW_AT_name + .byte 30 # Abbrev [30] 0x2a48:0x5 DW_TAG_template_type_parameter + .long 9410 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 13 # Abbrev [13] 0x2a4f:0x15 DW_TAG_structure_type + .byte 5 # DW_AT_calling_convention + .long .Linfo_string406 # DW_AT_name + .byte 1 # DW_AT_byte_size + .byte 1 # DW_AT_decl_file + .byte 5 # DW_AT_decl_line + .byte 29 # Abbrev [29] 0x2a58:0xb DW_TAG_GNU_template_parameter_pack + .long .Linfo_string84 # DW_AT_name + .byte 30 # Abbrev [30] 0x2a5d:0x5 DW_TAG_template_type_parameter + .long 897 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 42 # Abbrev [42] 0x2a64:0x5 DW_TAG_pointer_type + .long 10857 # DW_AT_type + .byte 53 # Abbrev [53] 0x2a69:0x11 DW_TAG_structure_type + .long .Linfo_string407 # DW_AT_name + # DW_AT_declaration + .byte 29 # Abbrev [29] 0x2a6e:0xb DW_TAG_GNU_template_parameter_pack + .long .Linfo_string84 # DW_AT_name + .byte 30 # Abbrev [30] 0x2a73:0x5 DW_TAG_template_type_parameter + .long 897 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 13 # Abbrev [13] 0x2a7a:0x15 DW_TAG_structure_type + .byte 5 # DW_AT_calling_convention + .long .Linfo_string408 # DW_AT_name + .byte 1 # DW_AT_byte_size + .byte 1 # DW_AT_decl_file + .byte 5 # DW_AT_decl_line + .byte 29 # Abbrev [29] 0x2a83:0xb DW_TAG_GNU_template_parameter_pack + .long .Linfo_string84 # DW_AT_name + .byte 30 # Abbrev [30] 0x2a88:0x5 DW_TAG_template_type_parameter + .long 250 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 42 # Abbrev [42] 0x2a8f:0x5 DW_TAG_pointer_type + .long 10900 # DW_AT_type + .byte 53 # Abbrev [53] 0x2a94:0x11 DW_TAG_structure_type + .long .Linfo_string409 # DW_AT_name + # DW_AT_declaration + .byte 29 # Abbrev [29] 0x2a99:0xb DW_TAG_GNU_template_parameter_pack + .long .Linfo_string84 # DW_AT_name + .byte 30 # Abbrev [30] 0x2a9e:0x5 DW_TAG_template_type_parameter + .long 250 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 13 # Abbrev [13] 0x2aa5:0x15 DW_TAG_structure_type + .byte 5 # DW_AT_calling_convention + .long .Linfo_string410 # DW_AT_name + .byte 1 # DW_AT_byte_size + .byte 1 # DW_AT_decl_file + .byte 5 # DW_AT_decl_line + .byte 29 # Abbrev [29] 0x2aae:0xb DW_TAG_GNU_template_parameter_pack + .long .Linfo_string84 # DW_AT_name + .byte 30 # Abbrev [30] 0x2ab3:0x5 DW_TAG_template_type_parameter + .long 9430 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 42 # Abbrev [42] 0x2aba:0x5 DW_TAG_pointer_type + .long 10943 # DW_AT_type + .byte 53 # Abbrev [53] 0x2abf:0x11 DW_TAG_structure_type + .long .Linfo_string411 # DW_AT_name + # DW_AT_declaration + .byte 29 # Abbrev [29] 0x2ac4:0xb DW_TAG_GNU_template_parameter_pack + .long .Linfo_string84 # DW_AT_name + .byte 30 # Abbrev [30] 0x2ac9:0x5 DW_TAG_template_type_parameter + .long 9430 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 42 # Abbrev [42] 0x2ad0:0x5 DW_TAG_pointer_type + .long 10965 # DW_AT_type + .byte 53 # Abbrev [53] 0x2ad5:0x11 DW_TAG_structure_type + .long .Linfo_string412 # DW_AT_name + # DW_AT_declaration + .byte 29 # Abbrev [29] 0x2ada:0xb DW_TAG_GNU_template_parameter_pack + .long .Linfo_string84 # DW_AT_name + .byte 30 # Abbrev [30] 0x2adf:0x5 DW_TAG_template_type_parameter + .long 1238 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 13 # Abbrev [13] 0x2ae6:0x15 DW_TAG_structure_type + .byte 5 # DW_AT_calling_convention + .long .Linfo_string413 # DW_AT_name + .byte 1 # DW_AT_byte_size + .byte 1 # DW_AT_decl_file + .byte 5 # DW_AT_decl_line + .byte 29 # Abbrev [29] 0x2aef:0xb DW_TAG_GNU_template_parameter_pack + .long .Linfo_string84 # DW_AT_name + .byte 30 # Abbrev [30] 0x2af4:0x5 DW_TAG_template_type_parameter + .long 9455 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 42 # Abbrev [42] 0x2afb:0x5 DW_TAG_pointer_type + .long 11008 # DW_AT_type + .byte 53 # Abbrev [53] 0x2b00:0x11 DW_TAG_structure_type + .long .Linfo_string414 # DW_AT_name + # DW_AT_declaration + .byte 29 # Abbrev [29] 0x2b05:0xb DW_TAG_GNU_template_parameter_pack + .long .Linfo_string84 # DW_AT_name + .byte 30 # Abbrev [30] 0x2b0a:0x5 DW_TAG_template_type_parameter + .long 9455 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 13 # Abbrev [13] 0x2b11:0x15 DW_TAG_structure_type + .byte 5 # DW_AT_calling_convention + .long .Linfo_string415 # DW_AT_name + .byte 1 # DW_AT_byte_size + .byte 1 # DW_AT_decl_file + .byte 5 # DW_AT_decl_line + .byte 29 # Abbrev [29] 0x2b1a:0xb DW_TAG_GNU_template_parameter_pack + .long .Linfo_string84 # DW_AT_name + .byte 30 # Abbrev [30] 0x2b1f:0x5 DW_TAG_template_type_parameter + .long 9466 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 42 # Abbrev [42] 0x2b26:0x5 DW_TAG_pointer_type + .long 11051 # DW_AT_type + .byte 53 # Abbrev [53] 0x2b2b:0x11 DW_TAG_structure_type + .long .Linfo_string416 # DW_AT_name + # DW_AT_declaration + .byte 29 # Abbrev [29] 0x2b30:0xb DW_TAG_GNU_template_parameter_pack + .long .Linfo_string84 # DW_AT_name + .byte 30 # Abbrev [30] 0x2b35:0x5 DW_TAG_template_type_parameter + .long 9466 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 13 # Abbrev [13] 0x2b3c:0x15 DW_TAG_structure_type + .byte 5 # DW_AT_calling_convention + .long .Linfo_string417 # DW_AT_name + .byte 1 # DW_AT_byte_size + .byte 1 # DW_AT_decl_file + .byte 5 # DW_AT_decl_line + .byte 29 # Abbrev [29] 0x2b45:0xb DW_TAG_GNU_template_parameter_pack + .long .Linfo_string84 # DW_AT_name + .byte 30 # Abbrev [30] 0x2b4a:0x5 DW_TAG_template_type_parameter + .long 9471 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 42 # Abbrev [42] 0x2b51:0x5 DW_TAG_pointer_type + .long 11094 # DW_AT_type + .byte 53 # Abbrev [53] 0x2b56:0x11 DW_TAG_structure_type + .long .Linfo_string418 # DW_AT_name + # DW_AT_declaration + .byte 29 # Abbrev [29] 0x2b5b:0xb DW_TAG_GNU_template_parameter_pack + .long .Linfo_string84 # DW_AT_name + .byte 30 # Abbrev [30] 0x2b60:0x5 DW_TAG_template_type_parameter + .long 9471 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 13 # Abbrev [13] 0x2b67:0x15 DW_TAG_structure_type + .byte 5 # DW_AT_calling_convention + .long .Linfo_string419 # DW_AT_name + .byte 1 # DW_AT_byte_size + .byte 1 # DW_AT_decl_file + .byte 5 # DW_AT_decl_line + .byte 29 # Abbrev [29] 0x2b70:0xb DW_TAG_GNU_template_parameter_pack + .long .Linfo_string84 # DW_AT_name + .byte 30 # Abbrev [30] 0x2b75:0x5 DW_TAG_template_type_parameter + .long 9482 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 42 # Abbrev [42] 0x2b7c:0x5 DW_TAG_pointer_type + .long 11137 # DW_AT_type + .byte 53 # Abbrev [53] 0x2b81:0x11 DW_TAG_structure_type + .long .Linfo_string420 # DW_AT_name + # DW_AT_declaration + .byte 29 # Abbrev [29] 0x2b86:0xb DW_TAG_GNU_template_parameter_pack + .long .Linfo_string84 # DW_AT_name + .byte 30 # Abbrev [30] 0x2b8b:0x5 DW_TAG_template_type_parameter + .long 9482 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 13 # Abbrev [13] 0x2b92:0x15 DW_TAG_structure_type + .byte 5 # DW_AT_calling_convention + .long .Linfo_string421 # DW_AT_name + .byte 1 # DW_AT_byte_size + .byte 1 # DW_AT_decl_file + .byte 5 # DW_AT_decl_line + .byte 29 # Abbrev [29] 0x2b9b:0xb DW_TAG_GNU_template_parameter_pack + .long .Linfo_string84 # DW_AT_name + .byte 30 # Abbrev [30] 0x2ba0:0x5 DW_TAG_template_type_parameter + .long 9488 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 42 # Abbrev [42] 0x2ba7:0x5 DW_TAG_pointer_type + .long 11180 # DW_AT_type + .byte 53 # Abbrev [53] 0x2bac:0x11 DW_TAG_structure_type + .long .Linfo_string422 # DW_AT_name + # DW_AT_declaration + .byte 29 # Abbrev [29] 0x2bb1:0xb DW_TAG_GNU_template_parameter_pack + .long .Linfo_string84 # DW_AT_name + .byte 30 # Abbrev [30] 0x2bb6:0x5 DW_TAG_template_type_parameter + .long 9488 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 13 # Abbrev [13] 0x2bbd:0x1a DW_TAG_structure_type + .byte 5 # DW_AT_calling_convention + .long .Linfo_string423 # DW_AT_name + .byte 1 # DW_AT_byte_size + .byte 1 # DW_AT_decl_file + .byte 5 # DW_AT_decl_line + .byte 29 # Abbrev [29] 0x2bc6:0x10 DW_TAG_GNU_template_parameter_pack + .long .Linfo_string84 # DW_AT_name + .byte 30 # Abbrev [30] 0x2bcb:0x5 DW_TAG_template_type_parameter + .long 9493 # DW_AT_type + .byte 30 # Abbrev [30] 0x2bd0:0x5 DW_TAG_template_type_parameter + .long 9493 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 42 # Abbrev [42] 0x2bd7:0x5 DW_TAG_pointer_type + .long 11228 # DW_AT_type + .byte 53 # Abbrev [53] 0x2bdc:0x16 DW_TAG_structure_type + .long .Linfo_string424 # DW_AT_name + # DW_AT_declaration + .byte 29 # Abbrev [29] 0x2be1:0x10 DW_TAG_GNU_template_parameter_pack + .long .Linfo_string84 # DW_AT_name + .byte 30 # Abbrev [30] 0x2be6:0x5 DW_TAG_template_type_parameter + .long 9493 # DW_AT_type + .byte 30 # Abbrev [30] 0x2beb:0x5 DW_TAG_template_type_parameter + .long 9493 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 13 # Abbrev [13] 0x2bf2:0x1a DW_TAG_structure_type + .byte 5 # DW_AT_calling_convention + .long .Linfo_string425 # DW_AT_name + .byte 1 # DW_AT_byte_size + .byte 1 # DW_AT_decl_file + .byte 5 # DW_AT_decl_line + .byte 29 # Abbrev [29] 0x2bfb:0x10 DW_TAG_GNU_template_parameter_pack + .long .Linfo_string84 # DW_AT_name + .byte 30 # Abbrev [30] 0x2c00:0x5 DW_TAG_template_type_parameter + .long 9493 # DW_AT_type + .byte 30 # Abbrev [30] 0x2c05:0x5 DW_TAG_template_type_parameter + .long 9498 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 42 # Abbrev [42] 0x2c0c:0x5 DW_TAG_pointer_type + .long 11281 # DW_AT_type + .byte 53 # Abbrev [53] 0x2c11:0x16 DW_TAG_structure_type + .long .Linfo_string426 # DW_AT_name + # DW_AT_declaration + .byte 29 # Abbrev [29] 0x2c16:0x10 DW_TAG_GNU_template_parameter_pack + .long .Linfo_string84 # DW_AT_name + .byte 30 # Abbrev [30] 0x2c1b:0x5 DW_TAG_template_type_parameter + .long 9493 # DW_AT_type + .byte 30 # Abbrev [30] 0x2c20:0x5 DW_TAG_template_type_parameter + .long 9498 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 13 # Abbrev [13] 0x2c27:0x15 DW_TAG_structure_type + .byte 5 # DW_AT_calling_convention + .long .Linfo_string427 # DW_AT_name + .byte 1 # DW_AT_byte_size + .byte 1 # DW_AT_decl_file + .byte 5 # DW_AT_decl_line + .byte 29 # Abbrev [29] 0x2c30:0xb DW_TAG_GNU_template_parameter_pack + .long .Linfo_string84 # DW_AT_name + .byte 30 # Abbrev [30] 0x2c35:0x5 DW_TAG_template_type_parameter + .long 9503 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 42 # Abbrev [42] 0x2c3c:0x5 DW_TAG_pointer_type + .long 11329 # DW_AT_type + .byte 53 # Abbrev [53] 0x2c41:0x11 DW_TAG_structure_type + .long .Linfo_string428 # DW_AT_name + # DW_AT_declaration + .byte 29 # Abbrev [29] 0x2c46:0xb DW_TAG_GNU_template_parameter_pack + .long .Linfo_string84 # DW_AT_name + .byte 30 # Abbrev [30] 0x2c4b:0x5 DW_TAG_template_type_parameter + .long 9503 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 13 # Abbrev [13] 0x2c52:0x15 DW_TAG_structure_type + .byte 5 # DW_AT_calling_convention + .long .Linfo_string429 # DW_AT_name + .byte 1 # DW_AT_byte_size + .byte 1 # DW_AT_decl_file + .byte 5 # DW_AT_decl_line + .byte 29 # Abbrev [29] 0x2c5b:0xb DW_TAG_GNU_template_parameter_pack + .long .Linfo_string84 # DW_AT_name + .byte 30 # Abbrev [30] 0x2c60:0x5 DW_TAG_template_type_parameter + .long 9508 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 42 # Abbrev [42] 0x2c67:0x5 DW_TAG_pointer_type + .long 11372 # DW_AT_type + .byte 53 # Abbrev [53] 0x2c6c:0x11 DW_TAG_structure_type + .long .Linfo_string430 # DW_AT_name + # DW_AT_declaration + .byte 29 # Abbrev [29] 0x2c71:0xb DW_TAG_GNU_template_parameter_pack + .long .Linfo_string84 # DW_AT_name + .byte 30 # Abbrev [30] 0x2c76:0x5 DW_TAG_template_type_parameter + .long 9508 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 13 # Abbrev [13] 0x2c7d:0x15 DW_TAG_structure_type + .byte 5 # DW_AT_calling_convention + .long .Linfo_string431 # DW_AT_name + .byte 1 # DW_AT_byte_size + .byte 1 # DW_AT_decl_file + .byte 5 # DW_AT_decl_line + .byte 29 # Abbrev [29] 0x2c86:0xb DW_TAG_GNU_template_parameter_pack + .long .Linfo_string84 # DW_AT_name + .byte 30 # Abbrev [30] 0x2c8b:0x5 DW_TAG_template_type_parameter + .long 9524 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 42 # Abbrev [42] 0x2c92:0x5 DW_TAG_pointer_type + .long 11415 # DW_AT_type + .byte 53 # Abbrev [53] 0x2c97:0x11 DW_TAG_structure_type + .long .Linfo_string432 # DW_AT_name + # DW_AT_declaration + .byte 29 # Abbrev [29] 0x2c9c:0xb DW_TAG_GNU_template_parameter_pack + .long .Linfo_string84 # DW_AT_name + .byte 30 # Abbrev [30] 0x2ca1:0x5 DW_TAG_template_type_parameter + .long 9524 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 13 # Abbrev [13] 0x2ca8:0x15 DW_TAG_structure_type + .byte 5 # DW_AT_calling_convention + .long .Linfo_string433 # DW_AT_name + .byte 1 # DW_AT_byte_size + .byte 1 # DW_AT_decl_file + .byte 5 # DW_AT_decl_line + .byte 29 # Abbrev [29] 0x2cb1:0xb DW_TAG_GNU_template_parameter_pack + .long .Linfo_string84 # DW_AT_name + .byte 30 # Abbrev [30] 0x2cb6:0x5 DW_TAG_template_type_parameter + .long 9525 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 42 # Abbrev [42] 0x2cbd:0x5 DW_TAG_pointer_type + .long 11458 # DW_AT_type + .byte 53 # Abbrev [53] 0x2cc2:0x11 DW_TAG_structure_type + .long .Linfo_string434 # DW_AT_name + # DW_AT_declaration + .byte 29 # Abbrev [29] 0x2cc7:0xb DW_TAG_GNU_template_parameter_pack + .long .Linfo_string84 # DW_AT_name + .byte 30 # Abbrev [30] 0x2ccc:0x5 DW_TAG_template_type_parameter + .long 9525 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 13 # Abbrev [13] 0x2cd3:0x15 DW_TAG_structure_type + .byte 5 # DW_AT_calling_convention + .long .Linfo_string435 # DW_AT_name + .byte 1 # DW_AT_byte_size + .byte 1 # DW_AT_decl_file + .byte 5 # DW_AT_decl_line + .byte 29 # Abbrev [29] 0x2cdc:0xb DW_TAG_GNU_template_parameter_pack + .long .Linfo_string84 # DW_AT_name + .byte 30 # Abbrev [30] 0x2ce1:0x5 DW_TAG_template_type_parameter + .long 9530 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 42 # Abbrev [42] 0x2ce8:0x5 DW_TAG_pointer_type + .long 11501 # DW_AT_type + .byte 53 # Abbrev [53] 0x2ced:0x11 DW_TAG_structure_type + .long .Linfo_string436 # DW_AT_name + # DW_AT_declaration + .byte 29 # Abbrev [29] 0x2cf2:0xb DW_TAG_GNU_template_parameter_pack + .long .Linfo_string84 # DW_AT_name + .byte 30 # Abbrev [30] 0x2cf7:0x5 DW_TAG_template_type_parameter + .long 9530 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 13 # Abbrev [13] 0x2cfe:0x15 DW_TAG_structure_type + .byte 5 # DW_AT_calling_convention + .long .Linfo_string437 # DW_AT_name + .byte 1 # DW_AT_byte_size + .byte 1 # DW_AT_decl_file + .byte 5 # DW_AT_decl_line + .byte 29 # Abbrev [29] 0x2d07:0xb DW_TAG_GNU_template_parameter_pack + .long .Linfo_string84 # DW_AT_name + .byte 30 # Abbrev [30] 0x2d0c:0x5 DW_TAG_template_type_parameter + .long 1243 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 42 # Abbrev [42] 0x2d13:0x5 DW_TAG_pointer_type + .long 11544 # DW_AT_type + .byte 53 # Abbrev [53] 0x2d18:0x11 DW_TAG_structure_type + .long .Linfo_string438 # DW_AT_name + # DW_AT_declaration + .byte 29 # Abbrev [29] 0x2d1d:0xb DW_TAG_GNU_template_parameter_pack + .long .Linfo_string84 # DW_AT_name + .byte 30 # Abbrev [30] 0x2d22:0x5 DW_TAG_template_type_parameter + .long 1243 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 13 # Abbrev [13] 0x2d29:0x15 DW_TAG_structure_type + .byte 5 # DW_AT_calling_convention + .long .Linfo_string439 # DW_AT_name + .byte 1 # DW_AT_byte_size + .byte 1 # DW_AT_decl_file + .byte 5 # DW_AT_decl_line + .byte 29 # Abbrev [29] 0x2d32:0xb DW_TAG_GNU_template_parameter_pack + .long .Linfo_string84 # DW_AT_name + .byte 30 # Abbrev [30] 0x2d37:0x5 DW_TAG_template_type_parameter + .long 9535 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 42 # Abbrev [42] 0x2d3e:0x5 DW_TAG_pointer_type + .long 11587 # DW_AT_type + .byte 53 # Abbrev [53] 0x2d43:0x11 DW_TAG_structure_type + .long .Linfo_string440 # DW_AT_name + # DW_AT_declaration + .byte 29 # Abbrev [29] 0x2d48:0xb DW_TAG_GNU_template_parameter_pack + .long .Linfo_string84 # DW_AT_name + .byte 30 # Abbrev [30] 0x2d4d:0x5 DW_TAG_template_type_parameter + .long 9535 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 13 # Abbrev [13] 0x2d54:0xf DW_TAG_structure_type + .byte 5 # DW_AT_calling_convention + .long .Linfo_string441 # DW_AT_name + .byte 1 # DW_AT_byte_size + .byte 1 # DW_AT_decl_file + .byte 5 # DW_AT_decl_line + .byte 38 # Abbrev [38] 0x2d5d:0x5 DW_TAG_GNU_template_parameter_pack + .long .Linfo_string84 # DW_AT_name + .byte 0 # End Of Children Mark + .byte 42 # Abbrev [42] 0x2d63:0x5 DW_TAG_pointer_type + .long 11624 # DW_AT_type + .byte 53 # Abbrev [53] 0x2d68:0xb DW_TAG_structure_type + .long .Linfo_string442 # DW_AT_name + # DW_AT_declaration + .byte 38 # Abbrev [38] 0x2d6d:0x5 DW_TAG_GNU_template_parameter_pack + .long .Linfo_string84 # DW_AT_name + .byte 0 # End Of Children Mark + .byte 13 # Abbrev [13] 0x2d73:0x1a DW_TAG_structure_type + .byte 5 # DW_AT_calling_convention + .long .Linfo_string443 # DW_AT_name + .byte 1 # DW_AT_byte_size + .byte 1 # DW_AT_decl_file + .byte 5 # DW_AT_decl_line + .byte 29 # Abbrev [29] 0x2d7c:0x10 DW_TAG_GNU_template_parameter_pack + .long .Linfo_string84 # DW_AT_name + .byte 30 # Abbrev [30] 0x2d81:0x5 DW_TAG_template_type_parameter + .long 9518 # DW_AT_type + .byte 30 # Abbrev [30] 0x2d86:0x5 DW_TAG_template_type_parameter + .long 9518 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 42 # Abbrev [42] 0x2d8d:0x5 DW_TAG_pointer_type + .long 11666 # DW_AT_type + .byte 53 # Abbrev [53] 0x2d92:0x16 DW_TAG_structure_type + .long .Linfo_string444 # DW_AT_name + # DW_AT_declaration + .byte 29 # Abbrev [29] 0x2d97:0x10 DW_TAG_GNU_template_parameter_pack + .long .Linfo_string84 # DW_AT_name + .byte 30 # Abbrev [30] 0x2d9c:0x5 DW_TAG_template_type_parameter + .long 9518 # DW_AT_type + .byte 30 # Abbrev [30] 0x2da1:0x5 DW_TAG_template_type_parameter + .long 9518 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 13 # Abbrev [13] 0x2da8:0x15 DW_TAG_structure_type + .byte 5 # DW_AT_calling_convention + .long .Linfo_string445 # DW_AT_name + .byte 1 # DW_AT_byte_size + .byte 1 # DW_AT_decl_file + .byte 5 # DW_AT_decl_line + .byte 29 # Abbrev [29] 0x2db1:0xb DW_TAG_GNU_template_parameter_pack + .long .Linfo_string84 # DW_AT_name + .byte 30 # Abbrev [30] 0x2db6:0x5 DW_TAG_template_type_parameter + .long 9540 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 42 # Abbrev [42] 0x2dbd:0x5 DW_TAG_pointer_type + .long 11714 # DW_AT_type + .byte 53 # Abbrev [53] 0x2dc2:0x11 DW_TAG_structure_type + .long .Linfo_string446 # DW_AT_name + # DW_AT_declaration + .byte 29 # Abbrev [29] 0x2dc7:0xb DW_TAG_GNU_template_parameter_pack + .long .Linfo_string84 # DW_AT_name + .byte 30 # Abbrev [30] 0x2dcc:0x5 DW_TAG_template_type_parameter + .long 9540 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 13 # Abbrev [13] 0x2dd3:0x15 DW_TAG_structure_type + .byte 5 # DW_AT_calling_convention + .long .Linfo_string447 # DW_AT_name + .byte 1 # DW_AT_byte_size + .byte 1 # DW_AT_decl_file + .byte 5 # DW_AT_decl_line + .byte 29 # Abbrev [29] 0x2ddc:0xb DW_TAG_GNU_template_parameter_pack + .long .Linfo_string84 # DW_AT_name + .byte 30 # Abbrev [30] 0x2de1:0x5 DW_TAG_template_type_parameter + .long 9566 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 42 # Abbrev [42] 0x2de8:0x5 DW_TAG_pointer_type + .long 11757 # DW_AT_type + .byte 53 # Abbrev [53] 0x2ded:0x11 DW_TAG_structure_type + .long .Linfo_string448 # DW_AT_name + # DW_AT_declaration + .byte 29 # Abbrev [29] 0x2df2:0xb DW_TAG_GNU_template_parameter_pack + .long .Linfo_string84 # DW_AT_name + .byte 30 # Abbrev [30] 0x2df7:0x5 DW_TAG_template_type_parameter + .long 9566 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 42 # Abbrev [42] 0x2dfe:0x5 DW_TAG_pointer_type + .long 5142 # DW_AT_type + .byte 13 # Abbrev [13] 0x2e03:0x15 DW_TAG_structure_type + .byte 5 # DW_AT_calling_convention + .long .Linfo_string450 # DW_AT_name + .byte 1 # DW_AT_byte_size + .byte 1 # DW_AT_decl_file + .byte 5 # DW_AT_decl_line + .byte 29 # Abbrev [29] 0x2e0c:0xb DW_TAG_GNU_template_parameter_pack + .long .Linfo_string84 # DW_AT_name + .byte 30 # Abbrev [30] 0x2e11:0x5 DW_TAG_template_type_parameter + .long 1248 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 42 # Abbrev [42] 0x2e18:0x5 DW_TAG_pointer_type + .long 11805 # DW_AT_type + .byte 53 # Abbrev [53] 0x2e1d:0x11 DW_TAG_structure_type + .long .Linfo_string451 # DW_AT_name + # DW_AT_declaration + .byte 29 # Abbrev [29] 0x2e22:0xb DW_TAG_GNU_template_parameter_pack + .long .Linfo_string84 # DW_AT_name + .byte 30 # Abbrev [30] 0x2e27:0x5 DW_TAG_template_type_parameter + .long 1248 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 13 # Abbrev [13] 0x2e2e:0x15 DW_TAG_structure_type + .byte 5 # DW_AT_calling_convention + .long .Linfo_string452 # DW_AT_name + .byte 1 # DW_AT_byte_size + .byte 1 # DW_AT_decl_file + .byte 5 # DW_AT_decl_line + .byte 29 # Abbrev [29] 0x2e37:0xb DW_TAG_GNU_template_parameter_pack + .long .Linfo_string84 # DW_AT_name + .byte 30 # Abbrev [30] 0x2e3c:0x5 DW_TAG_template_type_parameter + .long 9584 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 42 # Abbrev [42] 0x2e43:0x5 DW_TAG_pointer_type + .long 11848 # DW_AT_type + .byte 53 # Abbrev [53] 0x2e48:0x11 DW_TAG_structure_type + .long .Linfo_string453 # DW_AT_name + # DW_AT_declaration + .byte 29 # Abbrev [29] 0x2e4d:0xb DW_TAG_GNU_template_parameter_pack + .long .Linfo_string84 # DW_AT_name + .byte 30 # Abbrev [30] 0x2e52:0x5 DW_TAG_template_type_parameter + .long 9584 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 13 # Abbrev [13] 0x2e59:0x15 DW_TAG_structure_type + .byte 5 # DW_AT_calling_convention + .long .Linfo_string454 # DW_AT_name + .byte 1 # DW_AT_byte_size + .byte 1 # DW_AT_decl_file + .byte 5 # DW_AT_decl_line + .byte 29 # Abbrev [29] 0x2e62:0xb DW_TAG_GNU_template_parameter_pack + .long .Linfo_string84 # DW_AT_name + .byte 30 # Abbrev [30] 0x2e67:0x5 DW_TAG_template_type_parameter + .long 9601 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 42 # Abbrev [42] 0x2e6e:0x5 DW_TAG_pointer_type + .long 11891 # DW_AT_type + .byte 53 # Abbrev [53] 0x2e73:0x11 DW_TAG_structure_type + .long .Linfo_string455 # DW_AT_name + # DW_AT_declaration + .byte 29 # Abbrev [29] 0x2e78:0xb DW_TAG_GNU_template_parameter_pack + .long .Linfo_string84 # DW_AT_name + .byte 30 # Abbrev [30] 0x2e7d:0x5 DW_TAG_template_type_parameter + .long 9601 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 13 # Abbrev [13] 0x2e84:0x1a DW_TAG_structure_type + .byte 5 # DW_AT_calling_convention + .long .Linfo_string456 # DW_AT_name + .byte 1 # DW_AT_byte_size + .byte 1 # DW_AT_decl_file + .byte 5 # DW_AT_decl_line + .byte 29 # Abbrev [29] 0x2e8d:0x10 DW_TAG_GNU_template_parameter_pack + .long .Linfo_string84 # DW_AT_name + .byte 30 # Abbrev [30] 0x2e92:0x5 DW_TAG_template_type_parameter + .long 9381 # DW_AT_type + .byte 30 # Abbrev [30] 0x2e97:0x5 DW_TAG_template_type_parameter + .long 9606 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 42 # Abbrev [42] 0x2e9e:0x5 DW_TAG_pointer_type + .long 11939 # DW_AT_type + .byte 53 # Abbrev [53] 0x2ea3:0x16 DW_TAG_structure_type + .long .Linfo_string457 # DW_AT_name + # DW_AT_declaration + .byte 29 # Abbrev [29] 0x2ea8:0x10 DW_TAG_GNU_template_parameter_pack + .long .Linfo_string84 # DW_AT_name + .byte 30 # Abbrev [30] 0x2ead:0x5 DW_TAG_template_type_parameter + .long 9381 # DW_AT_type + .byte 30 # Abbrev [30] 0x2eb2:0x5 DW_TAG_template_type_parameter + .long 9606 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 13 # Abbrev [13] 0x2eb9:0x15 DW_TAG_structure_type + .byte 5 # DW_AT_calling_convention + .long .Linfo_string458 # DW_AT_name + .byte 1 # DW_AT_byte_size + .byte 1 # DW_AT_decl_file + .byte 5 # DW_AT_decl_line + .byte 29 # Abbrev [29] 0x2ec2:0xb DW_TAG_GNU_template_parameter_pack + .long .Linfo_string84 # DW_AT_name + .byte 30 # Abbrev [30] 0x2ec7:0x5 DW_TAG_template_type_parameter + .long 9611 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 42 # Abbrev [42] 0x2ece:0x5 DW_TAG_pointer_type + .long 11987 # DW_AT_type + .byte 53 # Abbrev [53] 0x2ed3:0x11 DW_TAG_structure_type + .long .Linfo_string459 # DW_AT_name + # DW_AT_declaration + .byte 29 # Abbrev [29] 0x2ed8:0xb DW_TAG_GNU_template_parameter_pack + .long .Linfo_string84 # DW_AT_name + .byte 30 # Abbrev [30] 0x2edd:0x5 DW_TAG_template_type_parameter + .long 9611 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 13 # Abbrev [13] 0x2ee4:0x15 DW_TAG_structure_type + .byte 5 # DW_AT_calling_convention + .long .Linfo_string460 # DW_AT_name + .byte 1 # DW_AT_byte_size + .byte 1 # DW_AT_decl_file + .byte 5 # DW_AT_decl_line + .byte 29 # Abbrev [29] 0x2eed:0xb DW_TAG_GNU_template_parameter_pack + .long .Linfo_string84 # DW_AT_name + .byte 30 # Abbrev [30] 0x2ef2:0x5 DW_TAG_template_type_parameter + .long 63 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 42 # Abbrev [42] 0x2ef9:0x5 DW_TAG_pointer_type + .long 12030 # DW_AT_type + .byte 53 # Abbrev [53] 0x2efe:0x11 DW_TAG_structure_type + .long .Linfo_string461 # DW_AT_name + # DW_AT_declaration + .byte 29 # Abbrev [29] 0x2f03:0xb DW_TAG_GNU_template_parameter_pack + .long .Linfo_string84 # DW_AT_name + .byte 30 # Abbrev [30] 0x2f08:0x5 DW_TAG_template_type_parameter + .long 63 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 13 # Abbrev [13] 0x2f0f:0x1f DW_TAG_structure_type + .byte 5 # DW_AT_calling_convention + .long .Linfo_string462 # DW_AT_name + .byte 1 # DW_AT_byte_size + .byte 1 # DW_AT_decl_file + .byte 5 # DW_AT_decl_line + .byte 29 # Abbrev [29] 0x2f18:0x15 DW_TAG_GNU_template_parameter_pack + .long .Linfo_string84 # DW_AT_name + .byte 30 # Abbrev [30] 0x2f1d:0x5 DW_TAG_template_type_parameter + .long 63 # DW_AT_type + .byte 30 # Abbrev [30] 0x2f22:0x5 DW_TAG_template_type_parameter + .long 630 # DW_AT_type + .byte 30 # Abbrev [30] 0x2f27:0x5 DW_TAG_template_type_parameter + .long 9630 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 42 # Abbrev [42] 0x2f2e:0x5 DW_TAG_pointer_type + .long 12083 # DW_AT_type + .byte 53 # Abbrev [53] 0x2f33:0x1b DW_TAG_structure_type + .long .Linfo_string463 # DW_AT_name + # DW_AT_declaration + .byte 29 # Abbrev [29] 0x2f38:0x15 DW_TAG_GNU_template_parameter_pack + .long .Linfo_string84 # DW_AT_name + .byte 30 # Abbrev [30] 0x2f3d:0x5 DW_TAG_template_type_parameter + .long 63 # DW_AT_type + .byte 30 # Abbrev [30] 0x2f42:0x5 DW_TAG_template_type_parameter + .long 630 # DW_AT_type + .byte 30 # Abbrev [30] 0x2f47:0x5 DW_TAG_template_type_parameter + .long 9630 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 13 # Abbrev [13] 0x2f4e:0x15 DW_TAG_structure_type + .byte 5 # DW_AT_calling_convention + .long .Linfo_string464 # DW_AT_name + .byte 1 # DW_AT_byte_size + .byte 1 # DW_AT_decl_file + .byte 5 # DW_AT_decl_line + .byte 29 # Abbrev [29] 0x2f57:0xb DW_TAG_GNU_template_parameter_pack + .long .Linfo_string84 # DW_AT_name + .byte 30 # Abbrev [30] 0x2f5c:0x5 DW_TAG_template_type_parameter + .long 9635 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 42 # Abbrev [42] 0x2f63:0x5 DW_TAG_pointer_type + .long 12136 # DW_AT_type + .byte 53 # Abbrev [53] 0x2f68:0x11 DW_TAG_structure_type + .long .Linfo_string465 # DW_AT_name + # DW_AT_declaration + .byte 29 # Abbrev [29] 0x2f6d:0xb DW_TAG_GNU_template_parameter_pack + .long .Linfo_string84 # DW_AT_name + .byte 30 # Abbrev [30] 0x2f72:0x5 DW_TAG_template_type_parameter + .long 9635 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 13 # Abbrev [13] 0x2f79:0x15 DW_TAG_structure_type + .byte 5 # DW_AT_calling_convention + .long .Linfo_string466 # DW_AT_name + .byte 1 # DW_AT_byte_size + .byte 1 # DW_AT_decl_file + .byte 5 # DW_AT_decl_line + .byte 29 # Abbrev [29] 0x2f82:0xb DW_TAG_GNU_template_parameter_pack + .long .Linfo_string84 # DW_AT_name + .byte 30 # Abbrev [30] 0x2f87:0x5 DW_TAG_template_type_parameter + .long 9647 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 42 # Abbrev [42] 0x2f8e:0x5 DW_TAG_pointer_type + .long 12179 # DW_AT_type + .byte 53 # Abbrev [53] 0x2f93:0x11 DW_TAG_structure_type + .long .Linfo_string467 # DW_AT_name + # DW_AT_declaration + .byte 29 # Abbrev [29] 0x2f98:0xb DW_TAG_GNU_template_parameter_pack + .long .Linfo_string84 # DW_AT_name + .byte 30 # Abbrev [30] 0x2f9d:0x5 DW_TAG_template_type_parameter + .long 9647 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 13 # Abbrev [13] 0x2fa4:0x15 DW_TAG_structure_type + .byte 5 # DW_AT_calling_convention + .long .Linfo_string468 # DW_AT_name + .byte 1 # DW_AT_byte_size + .byte 1 # DW_AT_decl_file + .byte 5 # DW_AT_decl_line + .byte 29 # Abbrev [29] 0x2fad:0xb DW_TAG_GNU_template_parameter_pack + .long .Linfo_string84 # DW_AT_name + .byte 30 # Abbrev [30] 0x2fb2:0x5 DW_TAG_template_type_parameter + .long 9657 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 42 # Abbrev [42] 0x2fb9:0x5 DW_TAG_pointer_type + .long 12222 # DW_AT_type + .byte 53 # Abbrev [53] 0x2fbe:0x11 DW_TAG_structure_type + .long .Linfo_string469 # DW_AT_name + # DW_AT_declaration + .byte 29 # Abbrev [29] 0x2fc3:0xb DW_TAG_GNU_template_parameter_pack + .long .Linfo_string84 # DW_AT_name + .byte 30 # Abbrev [30] 0x2fc8:0x5 DW_TAG_template_type_parameter + .long 9657 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 13 # Abbrev [13] 0x2fcf:0x15 DW_TAG_structure_type + .byte 5 # DW_AT_calling_convention + .long .Linfo_string470 # DW_AT_name + .byte 1 # DW_AT_byte_size + .byte 1 # DW_AT_decl_file + .byte 5 # DW_AT_decl_line + .byte 29 # Abbrev [29] 0x2fd8:0xb DW_TAG_GNU_template_parameter_pack + .long .Linfo_string84 # DW_AT_name + .byte 30 # Abbrev [30] 0x2fdd:0x5 DW_TAG_template_type_parameter + .long 9663 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 42 # Abbrev [42] 0x2fe4:0x5 DW_TAG_pointer_type + .long 12265 # DW_AT_type + .byte 53 # Abbrev [53] 0x2fe9:0x11 DW_TAG_structure_type + .long .Linfo_string471 # DW_AT_name + # DW_AT_declaration + .byte 29 # Abbrev [29] 0x2fee:0xb DW_TAG_GNU_template_parameter_pack + .long .Linfo_string84 # DW_AT_name + .byte 30 # Abbrev [30] 0x2ff3:0x5 DW_TAG_template_type_parameter + .long 9663 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 42 # Abbrev [42] 0x2ffa:0x5 DW_TAG_pointer_type + .long 286 # DW_AT_type + .byte 13 # Abbrev [13] 0x2fff:0x15 DW_TAG_structure_type + .byte 5 # DW_AT_calling_convention + .long .Linfo_string472 # DW_AT_name + .byte 1 # DW_AT_byte_size + .byte 1 # DW_AT_decl_file + .byte 5 # DW_AT_decl_line + .byte 29 # Abbrev [29] 0x3008:0xb DW_TAG_GNU_template_parameter_pack + .long .Linfo_string84 # DW_AT_name + .byte 30 # Abbrev [30] 0x300d:0x5 DW_TAG_template_type_parameter + .long 9684 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 42 # Abbrev [42] 0x3014:0x5 DW_TAG_pointer_type + .long 12313 # DW_AT_type + .byte 53 # Abbrev [53] 0x3019:0x11 DW_TAG_structure_type + .long .Linfo_string473 # DW_AT_name + # DW_AT_declaration + .byte 29 # Abbrev [29] 0x301e:0xb DW_TAG_GNU_template_parameter_pack + .long .Linfo_string84 # DW_AT_name + .byte 30 # Abbrev [30] 0x3023:0x5 DW_TAG_template_type_parameter + .long 9684 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 13 # Abbrev [13] 0x302a:0x15 DW_TAG_structure_type + .byte 5 # DW_AT_calling_convention + .long .Linfo_string474 # DW_AT_name + .byte 1 # DW_AT_byte_size + .byte 1 # DW_AT_decl_file + .byte 5 # DW_AT_decl_line + .byte 29 # Abbrev [29] 0x3033:0xb DW_TAG_GNU_template_parameter_pack + .long .Linfo_string84 # DW_AT_name + .byte 30 # Abbrev [30] 0x3038:0x5 DW_TAG_template_type_parameter + .long 9710 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 42 # Abbrev [42] 0x303f:0x5 DW_TAG_pointer_type + .long 12356 # DW_AT_type + .byte 53 # Abbrev [53] 0x3044:0x11 DW_TAG_structure_type + .long .Linfo_string475 # DW_AT_name + # DW_AT_declaration + .byte 29 # Abbrev [29] 0x3049:0xb DW_TAG_GNU_template_parameter_pack + .long .Linfo_string84 # DW_AT_name + .byte 30 # Abbrev [30] 0x304e:0x5 DW_TAG_template_type_parameter + .long 9710 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 13 # Abbrev [13] 0x3055:0x15 DW_TAG_structure_type + .byte 5 # DW_AT_calling_convention + .long .Linfo_string476 # DW_AT_name + .byte 1 # DW_AT_byte_size + .byte 1 # DW_AT_decl_file + .byte 5 # DW_AT_decl_line + .byte 29 # Abbrev [29] 0x305e:0xb DW_TAG_GNU_template_parameter_pack + .long .Linfo_string84 # DW_AT_name + .byte 30 # Abbrev [30] 0x3063:0x5 DW_TAG_template_type_parameter + .long 9736 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 42 # Abbrev [42] 0x306a:0x5 DW_TAG_pointer_type + .long 12399 # DW_AT_type + .byte 53 # Abbrev [53] 0x306f:0x11 DW_TAG_structure_type + .long .Linfo_string477 # DW_AT_name + # DW_AT_declaration + .byte 29 # Abbrev [29] 0x3074:0xb DW_TAG_GNU_template_parameter_pack + .long .Linfo_string84 # DW_AT_name + .byte 30 # Abbrev [30] 0x3079:0x5 DW_TAG_template_type_parameter + .long 9736 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 13 # Abbrev [13] 0x3080:0x15 DW_TAG_structure_type + .byte 5 # DW_AT_calling_convention + .long .Linfo_string478 # DW_AT_name + .byte 1 # DW_AT_byte_size + .byte 1 # DW_AT_decl_file + .byte 5 # DW_AT_decl_line + .byte 29 # Abbrev [29] 0x3089:0xb DW_TAG_GNU_template_parameter_pack + .long .Linfo_string84 # DW_AT_name + .byte 30 # Abbrev [30] 0x308e:0x5 DW_TAG_template_type_parameter + .long 9762 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 42 # Abbrev [42] 0x3095:0x5 DW_TAG_pointer_type + .long 12442 # DW_AT_type + .byte 53 # Abbrev [53] 0x309a:0x11 DW_TAG_structure_type + .long .Linfo_string479 # DW_AT_name + # DW_AT_declaration + .byte 29 # Abbrev [29] 0x309f:0xb DW_TAG_GNU_template_parameter_pack + .long .Linfo_string84 # DW_AT_name + .byte 30 # Abbrev [30] 0x30a4:0x5 DW_TAG_template_type_parameter + .long 9762 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 13 # Abbrev [13] 0x30ab:0x15 DW_TAG_structure_type + .byte 5 # DW_AT_calling_convention + .long .Linfo_string480 # DW_AT_name + .byte 1 # DW_AT_byte_size + .byte 1 # DW_AT_decl_file + .byte 5 # DW_AT_decl_line + .byte 29 # Abbrev [29] 0x30b4:0xb DW_TAG_GNU_template_parameter_pack + .long .Linfo_string84 # DW_AT_name + .byte 30 # Abbrev [30] 0x30b9:0x5 DW_TAG_template_type_parameter + .long 9767 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 42 # Abbrev [42] 0x30c0:0x5 DW_TAG_pointer_type + .long 12485 # DW_AT_type + .byte 53 # Abbrev [53] 0x30c5:0x11 DW_TAG_structure_type + .long .Linfo_string481 # DW_AT_name + # DW_AT_declaration + .byte 29 # Abbrev [29] 0x30ca:0xb DW_TAG_GNU_template_parameter_pack + .long .Linfo_string84 # DW_AT_name + .byte 30 # Abbrev [30] 0x30cf:0x5 DW_TAG_template_type_parameter + .long 9767 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 13 # Abbrev [13] 0x30d6:0x15 DW_TAG_structure_type + .byte 5 # DW_AT_calling_convention + .long .Linfo_string482 # DW_AT_name + .byte 1 # DW_AT_byte_size + .byte 1 # DW_AT_decl_file + .byte 5 # DW_AT_decl_line + .byte 29 # Abbrev [29] 0x30df:0xb DW_TAG_GNU_template_parameter_pack + .long .Linfo_string84 # DW_AT_name + .byte 30 # Abbrev [30] 0x30e4:0x5 DW_TAG_template_type_parameter + .long 9789 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 42 # Abbrev [42] 0x30eb:0x5 DW_TAG_pointer_type + .long 12528 # DW_AT_type + .byte 53 # Abbrev [53] 0x30f0:0x11 DW_TAG_structure_type + .long .Linfo_string483 # DW_AT_name + # DW_AT_declaration + .byte 29 # Abbrev [29] 0x30f5:0xb DW_TAG_GNU_template_parameter_pack + .long .Linfo_string84 # DW_AT_name + .byte 30 # Abbrev [30] 0x30fa:0x5 DW_TAG_template_type_parameter + .long 9789 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 13 # Abbrev [13] 0x3101:0x15 DW_TAG_structure_type + .byte 5 # DW_AT_calling_convention + .long .Linfo_string484 # DW_AT_name + .byte 1 # DW_AT_byte_size + .byte 1 # DW_AT_decl_file + .byte 5 # DW_AT_decl_line + .byte 29 # Abbrev [29] 0x310a:0xb DW_TAG_GNU_template_parameter_pack + .long .Linfo_string84 # DW_AT_name + .byte 30 # Abbrev [30] 0x310f:0x5 DW_TAG_template_type_parameter + .long 9795 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 42 # Abbrev [42] 0x3116:0x5 DW_TAG_pointer_type + .long 12571 # DW_AT_type + .byte 53 # Abbrev [53] 0x311b:0x11 DW_TAG_structure_type + .long .Linfo_string485 # DW_AT_name + # DW_AT_declaration + .byte 29 # Abbrev [29] 0x3120:0xb DW_TAG_GNU_template_parameter_pack + .long .Linfo_string84 # DW_AT_name + .byte 30 # Abbrev [30] 0x3125:0x5 DW_TAG_template_type_parameter + .long 9795 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 13 # Abbrev [13] 0x312c:0x15 DW_TAG_structure_type + .byte 5 # DW_AT_calling_convention + .long .Linfo_string486 # DW_AT_name + .byte 1 # DW_AT_byte_size + .byte 1 # DW_AT_decl_file + .byte 5 # DW_AT_decl_line + .byte 29 # Abbrev [29] 0x3135:0xb DW_TAG_GNU_template_parameter_pack + .long .Linfo_string84 # DW_AT_name + .byte 30 # Abbrev [30] 0x313a:0x5 DW_TAG_template_type_parameter + .long 9801 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 42 # Abbrev [42] 0x3141:0x5 DW_TAG_pointer_type + .long 12614 # DW_AT_type + .byte 53 # Abbrev [53] 0x3146:0x11 DW_TAG_structure_type + .long .Linfo_string487 # DW_AT_name + # DW_AT_declaration + .byte 29 # Abbrev [29] 0x314b:0xb DW_TAG_GNU_template_parameter_pack + .long .Linfo_string84 # DW_AT_name + .byte 30 # Abbrev [30] 0x3150:0x5 DW_TAG_template_type_parameter + .long 9801 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 13 # Abbrev [13] 0x3157:0x15 DW_TAG_structure_type + .byte 5 # DW_AT_calling_convention + .long .Linfo_string488 # DW_AT_name + .byte 1 # DW_AT_byte_size + .byte 1 # DW_AT_decl_file + .byte 5 # DW_AT_decl_line + .byte 29 # Abbrev [29] 0x3160:0xb DW_TAG_GNU_template_parameter_pack + .long .Linfo_string84 # DW_AT_name + .byte 30 # Abbrev [30] 0x3165:0x5 DW_TAG_template_type_parameter + .long 9811 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 42 # Abbrev [42] 0x316c:0x5 DW_TAG_pointer_type + .long 12657 # DW_AT_type + .byte 53 # Abbrev [53] 0x3171:0x11 DW_TAG_structure_type + .long .Linfo_string489 # DW_AT_name + # DW_AT_declaration + .byte 29 # Abbrev [29] 0x3176:0xb DW_TAG_GNU_template_parameter_pack + .long .Linfo_string84 # DW_AT_name + .byte 30 # Abbrev [30] 0x317b:0x5 DW_TAG_template_type_parameter + .long 9811 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 13 # Abbrev [13] 0x3182:0x15 DW_TAG_structure_type + .byte 5 # DW_AT_calling_convention + .long .Linfo_string490 # DW_AT_name + .byte 1 # DW_AT_byte_size + .byte 1 # DW_AT_decl_file + .byte 5 # DW_AT_decl_line + .byte 29 # Abbrev [29] 0x318b:0xb DW_TAG_GNU_template_parameter_pack + .long .Linfo_string84 # DW_AT_name + .byte 30 # Abbrev [30] 0x3190:0x5 DW_TAG_template_type_parameter + .long 9828 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 42 # Abbrev [42] 0x3197:0x5 DW_TAG_pointer_type + .long 12700 # DW_AT_type + .byte 53 # Abbrev [53] 0x319c:0x11 DW_TAG_structure_type + .long .Linfo_string491 # DW_AT_name + # DW_AT_declaration + .byte 29 # Abbrev [29] 0x31a1:0xb DW_TAG_GNU_template_parameter_pack + .long .Linfo_string84 # DW_AT_name + .byte 30 # Abbrev [30] 0x31a6:0x5 DW_TAG_template_type_parameter + .long 9828 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 13 # Abbrev [13] 0x31ad:0x15 DW_TAG_structure_type + .byte 5 # DW_AT_calling_convention + .long .Linfo_string492 # DW_AT_name + .byte 1 # DW_AT_byte_size + .byte 1 # DW_AT_decl_file + .byte 5 # DW_AT_decl_line + .byte 29 # Abbrev [29] 0x31b6:0xb DW_TAG_GNU_template_parameter_pack + .long .Linfo_string84 # DW_AT_name + .byte 30 # Abbrev [30] 0x31bb:0x5 DW_TAG_template_type_parameter + .long 9833 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 42 # Abbrev [42] 0x31c2:0x5 DW_TAG_pointer_type + .long 12743 # DW_AT_type + .byte 53 # Abbrev [53] 0x31c7:0x11 DW_TAG_structure_type + .long .Linfo_string493 # DW_AT_name + # DW_AT_declaration + .byte 29 # Abbrev [29] 0x31cc:0xb DW_TAG_GNU_template_parameter_pack + .long .Linfo_string84 # DW_AT_name + .byte 30 # Abbrev [30] 0x31d1:0x5 DW_TAG_template_type_parameter + .long 9833 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 13 # Abbrev [13] 0x31d8:0x15 DW_TAG_structure_type + .byte 5 # DW_AT_calling_convention + .long .Linfo_string494 # DW_AT_name + .byte 1 # DW_AT_byte_size + .byte 1 # DW_AT_decl_file + .byte 5 # DW_AT_decl_line + .byte 29 # Abbrev [29] 0x31e1:0xb DW_TAG_GNU_template_parameter_pack + .long .Linfo_string84 # DW_AT_name + .byte 30 # Abbrev [30] 0x31e6:0x5 DW_TAG_template_type_parameter + .long 9864 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 42 # Abbrev [42] 0x31ed:0x5 DW_TAG_pointer_type + .long 12786 # DW_AT_type + .byte 53 # Abbrev [53] 0x31f2:0x11 DW_TAG_structure_type + .long .Linfo_string495 # DW_AT_name + # DW_AT_declaration + .byte 29 # Abbrev [29] 0x31f7:0xb DW_TAG_GNU_template_parameter_pack + .long .Linfo_string84 # DW_AT_name + .byte 30 # Abbrev [30] 0x31fc:0x5 DW_TAG_template_type_parameter + .long 9864 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 13 # Abbrev [13] 0x3203:0x15 DW_TAG_structure_type + .byte 5 # DW_AT_calling_convention + .long .Linfo_string496 # DW_AT_name + .byte 1 # DW_AT_byte_size + .byte 1 # DW_AT_decl_file + .byte 5 # DW_AT_decl_line + .byte 29 # Abbrev [29] 0x320c:0xb DW_TAG_GNU_template_parameter_pack + .long .Linfo_string84 # DW_AT_name + .byte 30 # Abbrev [30] 0x3211:0x5 DW_TAG_template_type_parameter + .long 9525 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 42 # Abbrev [42] 0x3218:0x5 DW_TAG_pointer_type + .long 12829 # DW_AT_type + .byte 53 # Abbrev [53] 0x321d:0x11 DW_TAG_structure_type + .long .Linfo_string497 # DW_AT_name + # DW_AT_declaration + .byte 29 # Abbrev [29] 0x3222:0xb DW_TAG_GNU_template_parameter_pack + .long .Linfo_string84 # DW_AT_name + .byte 30 # Abbrev [30] 0x3227:0x5 DW_TAG_template_type_parameter + .long 9525 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 13 # Abbrev [13] 0x322e:0x15 DW_TAG_structure_type + .byte 5 # DW_AT_calling_convention + .long .Linfo_string498 # DW_AT_name + .byte 1 # DW_AT_byte_size + .byte 1 # DW_AT_decl_file + .byte 5 # DW_AT_decl_line + .byte 29 # Abbrev [29] 0x3237:0xb DW_TAG_GNU_template_parameter_pack + .long .Linfo_string84 # DW_AT_name + .byte 30 # Abbrev [30] 0x323c:0x5 DW_TAG_template_type_parameter + .long 9887 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 42 # Abbrev [42] 0x3243:0x5 DW_TAG_pointer_type + .long 12872 # DW_AT_type + .byte 53 # Abbrev [53] 0x3248:0x11 DW_TAG_structure_type + .long .Linfo_string499 # DW_AT_name + # DW_AT_declaration + .byte 29 # Abbrev [29] 0x324d:0xb DW_TAG_GNU_template_parameter_pack + .long .Linfo_string84 # DW_AT_name + .byte 30 # Abbrev [30] 0x3252:0x5 DW_TAG_template_type_parameter + .long 9887 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 13 # Abbrev [13] 0x3259:0x15 DW_TAG_structure_type + .byte 5 # DW_AT_calling_convention + .long .Linfo_string500 # DW_AT_name + .byte 1 # DW_AT_byte_size + .byte 1 # DW_AT_decl_file + .byte 5 # DW_AT_decl_line + .byte 29 # Abbrev [29] 0x3262:0xb DW_TAG_GNU_template_parameter_pack + .long .Linfo_string84 # DW_AT_name + .byte 30 # Abbrev [30] 0x3267:0x5 DW_TAG_template_type_parameter + .long 9894 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 42 # Abbrev [42] 0x326e:0x5 DW_TAG_pointer_type + .long 12915 # DW_AT_type + .byte 53 # Abbrev [53] 0x3273:0x11 DW_TAG_structure_type + .long .Linfo_string501 # DW_AT_name + # DW_AT_declaration + .byte 29 # Abbrev [29] 0x3278:0xb DW_TAG_GNU_template_parameter_pack + .long .Linfo_string84 # DW_AT_name + .byte 30 # Abbrev [30] 0x327d:0x5 DW_TAG_template_type_parameter + .long 9894 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 13 # Abbrev [13] 0x3284:0x15 DW_TAG_structure_type + .byte 5 # DW_AT_calling_convention + .long .Linfo_string502 # DW_AT_name + .byte 1 # DW_AT_byte_size + .byte 1 # DW_AT_decl_file + .byte 5 # DW_AT_decl_line + .byte 29 # Abbrev [29] 0x328d:0xb DW_TAG_GNU_template_parameter_pack + .long .Linfo_string84 # DW_AT_name + .byte 30 # Abbrev [30] 0x3292:0x5 DW_TAG_template_type_parameter + .long 9906 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 42 # Abbrev [42] 0x3299:0x5 DW_TAG_pointer_type + .long 12958 # DW_AT_type + .byte 53 # Abbrev [53] 0x329e:0x11 DW_TAG_structure_type + .long .Linfo_string503 # DW_AT_name + # DW_AT_declaration + .byte 29 # Abbrev [29] 0x32a3:0xb DW_TAG_GNU_template_parameter_pack + .long .Linfo_string84 # DW_AT_name + .byte 30 # Abbrev [30] 0x32a8:0x5 DW_TAG_template_type_parameter + .long 9906 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 42 # Abbrev [42] 0x32af:0x5 DW_TAG_pointer_type + .long 9124 # DW_AT_type + .byte 13 # Abbrev [13] 0x32b4:0x15 DW_TAG_structure_type + .byte 5 # DW_AT_calling_convention + .long .Linfo_string450 # DW_AT_name + .byte 1 # DW_AT_byte_size + .byte 1 # DW_AT_decl_file + .byte 5 # DW_AT_decl_line + .byte 29 # Abbrev [29] 0x32bd:0xb DW_TAG_GNU_template_parameter_pack + .long .Linfo_string84 # DW_AT_name + .byte 30 # Abbrev [30] 0x32c2:0x5 DW_TAG_template_type_parameter + .long 9193 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 42 # Abbrev [42] 0x32c9:0x5 DW_TAG_pointer_type + .long 13006 # DW_AT_type + .byte 53 # Abbrev [53] 0x32ce:0x11 DW_TAG_structure_type + .long .Linfo_string451 # DW_AT_name + # DW_AT_declaration + .byte 29 # Abbrev [29] 0x32d3:0xb DW_TAG_GNU_template_parameter_pack + .long .Linfo_string84 # DW_AT_name + .byte 30 # Abbrev [30] 0x32d8:0x5 DW_TAG_template_type_parameter + .long 9193 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 13 # Abbrev [13] 0x32df:0x15 DW_TAG_structure_type + .byte 5 # DW_AT_calling_convention + .long .Linfo_string504 # DW_AT_name + .byte 1 # DW_AT_byte_size + .byte 1 # DW_AT_decl_file + .byte 5 # DW_AT_decl_line + .byte 29 # Abbrev [29] 0x32e8:0xb DW_TAG_GNU_template_parameter_pack + .long .Linfo_string84 # DW_AT_name + .byte 30 # Abbrev [30] 0x32ed:0x5 DW_TAG_template_type_parameter + .long 9913 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 42 # Abbrev [42] 0x32f4:0x5 DW_TAG_pointer_type + .long 13049 # DW_AT_type + .byte 53 # Abbrev [53] 0x32f9:0x11 DW_TAG_structure_type + .long .Linfo_string505 # DW_AT_name + # DW_AT_declaration + .byte 29 # Abbrev [29] 0x32fe:0xb DW_TAG_GNU_template_parameter_pack + .long .Linfo_string84 # DW_AT_name + .byte 30 # Abbrev [30] 0x3303:0x5 DW_TAG_template_type_parameter + .long 9913 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark +.Ldebug_info_end0: + .section .debug_ranges,"",@progbits +.Ldebug_ranges0: + .quad .Lfunc_begin0 + .quad .Lfunc_end1 + .quad .Lfunc_begin28 + .quad .Lfunc_end28 + .quad .Lfunc_begin44 + .quad .Lfunc_end44 + .quad .Lfunc_begin48 + .quad .Lfunc_end48 + .quad .Lfunc_begin56 + .quad .Lfunc_end58 + .quad .Lfunc_begin90 + .quad .Lfunc_end90 + .quad .Lfunc_begin104 + .quad .Lfunc_end104 + .quad .Lfunc_begin120 + .quad .Lfunc_end122 + .quad .Lfunc_begin124 + .quad .Lfunc_end125 + .quad .Lfunc_begin2 + .quad .Lfunc_end2 + .quad .Lfunc_begin3 + .quad .Lfunc_end3 + .quad .Lfunc_begin4 + .quad .Lfunc_end4 + .quad .Lfunc_begin5 + .quad .Lfunc_end5 + .quad .Lfunc_begin6 + .quad .Lfunc_end6 + .quad .Lfunc_begin7 + .quad .Lfunc_end7 + .quad .Lfunc_begin8 + .quad .Lfunc_end8 + .quad .Lfunc_begin9 + .quad .Lfunc_end9 + .quad .Lfunc_begin10 + .quad .Lfunc_end10 + .quad .Lfunc_begin11 + .quad .Lfunc_end11 + .quad .Lfunc_begin12 + .quad .Lfunc_end12 + .quad .Lfunc_begin13 + .quad .Lfunc_end13 + .quad .Lfunc_begin14 + .quad .Lfunc_end14 + .quad .Lfunc_begin15 + .quad .Lfunc_end15 + .quad .Lfunc_begin16 + .quad .Lfunc_end16 + .quad .Lfunc_begin17 + .quad .Lfunc_end17 + .quad .Lfunc_begin18 + .quad .Lfunc_end18 + .quad .Lfunc_begin19 + .quad .Lfunc_end19 + .quad .Lfunc_begin20 + .quad .Lfunc_end20 + .quad .Lfunc_begin21 + .quad .Lfunc_end21 + .quad .Lfunc_begin22 + .quad .Lfunc_end22 + .quad .Lfunc_begin23 + .quad .Lfunc_end23 + .quad .Lfunc_begin24 + .quad .Lfunc_end24 + .quad .Lfunc_begin25 + .quad .Lfunc_end25 + .quad .Lfunc_begin26 + .quad .Lfunc_end26 + .quad .Lfunc_begin27 + .quad .Lfunc_end27 + .quad .Lfunc_begin29 + .quad .Lfunc_end29 + .quad .Lfunc_begin30 + .quad .Lfunc_end30 + .quad .Lfunc_begin31 + .quad .Lfunc_end31 + .quad .Lfunc_begin32 + .quad .Lfunc_end32 + .quad .Lfunc_begin33 + .quad .Lfunc_end33 + .quad .Lfunc_begin34 + .quad .Lfunc_end34 + .quad .Lfunc_begin35 + .quad .Lfunc_end35 + .quad .Lfunc_begin36 + .quad .Lfunc_end36 + .quad .Lfunc_begin37 + .quad .Lfunc_end37 + .quad .Lfunc_begin38 + .quad .Lfunc_end38 + .quad .Lfunc_begin39 + .quad .Lfunc_end39 + .quad .Lfunc_begin40 + .quad .Lfunc_end40 + .quad .Lfunc_begin41 + .quad .Lfunc_end41 + .quad .Lfunc_begin42 + .quad .Lfunc_end42 + .quad .Lfunc_begin43 + .quad .Lfunc_end43 + .quad .Lfunc_begin45 + .quad .Lfunc_end45 + .quad .Lfunc_begin46 + .quad .Lfunc_end46 + .quad .Lfunc_begin47 + .quad .Lfunc_end47 + .quad .Lfunc_begin49 + .quad .Lfunc_end49 + .quad .Lfunc_begin50 + .quad .Lfunc_end50 + .quad .Lfunc_begin51 + .quad .Lfunc_end51 + .quad .Lfunc_begin52 + .quad .Lfunc_end52 + .quad .Lfunc_begin53 + .quad .Lfunc_end53 + .quad .Lfunc_begin54 + .quad .Lfunc_end54 + .quad .Lfunc_begin55 + .quad .Lfunc_end55 + .quad .Lfunc_begin59 + .quad .Lfunc_end59 + .quad .Lfunc_begin60 + .quad .Lfunc_end60 + .quad .Lfunc_begin61 + .quad .Lfunc_end61 + .quad .Lfunc_begin62 + .quad .Lfunc_end62 + .quad .Lfunc_begin63 + .quad .Lfunc_end63 + .quad .Lfunc_begin64 + .quad .Lfunc_end64 + .quad .Lfunc_begin65 + .quad .Lfunc_end65 + .quad .Lfunc_begin66 + .quad .Lfunc_end66 + .quad .Lfunc_begin67 + .quad .Lfunc_end67 + .quad .Lfunc_begin68 + .quad .Lfunc_end68 + .quad .Lfunc_begin69 + .quad .Lfunc_end69 + .quad .Lfunc_begin70 + .quad .Lfunc_end70 + .quad .Lfunc_begin71 + .quad .Lfunc_end71 + .quad .Lfunc_begin72 + .quad .Lfunc_end72 + .quad .Lfunc_begin73 + .quad .Lfunc_end73 + .quad .Lfunc_begin74 + .quad .Lfunc_end74 + .quad .Lfunc_begin75 + .quad .Lfunc_end75 + .quad .Lfunc_begin76 + .quad .Lfunc_end76 + .quad .Lfunc_begin77 + .quad .Lfunc_end77 + .quad .Lfunc_begin78 + .quad .Lfunc_end78 + .quad .Lfunc_begin79 + .quad .Lfunc_end79 + .quad .Lfunc_begin80 + .quad .Lfunc_end80 + .quad .Lfunc_begin81 + .quad .Lfunc_end81 + .quad .Lfunc_begin82 + .quad .Lfunc_end82 + .quad .Lfunc_begin83 + .quad .Lfunc_end83 + .quad .Lfunc_begin84 + .quad .Lfunc_end84 + .quad .Lfunc_begin85 + .quad .Lfunc_end85 + .quad .Lfunc_begin86 + .quad .Lfunc_end86 + .quad .Lfunc_begin87 + .quad .Lfunc_end87 + .quad .Lfunc_begin88 + .quad .Lfunc_end88 + .quad .Lfunc_begin89 + .quad .Lfunc_end89 + .quad .Lfunc_begin91 + .quad .Lfunc_end91 + .quad .Lfunc_begin92 + .quad .Lfunc_end92 + .quad .Lfunc_begin93 + .quad .Lfunc_end93 + .quad .Lfunc_begin94 + .quad .Lfunc_end94 + .quad .Lfunc_begin95 + .quad .Lfunc_end95 + .quad .Lfunc_begin96 + .quad .Lfunc_end96 + .quad .Lfunc_begin97 + .quad .Lfunc_end97 + .quad .Lfunc_begin98 + .quad .Lfunc_end98 + .quad .Lfunc_begin99 + .quad .Lfunc_end99 + .quad .Lfunc_begin100 + .quad .Lfunc_end100 + .quad .Lfunc_begin101 + .quad .Lfunc_end101 + .quad .Lfunc_begin102 + .quad .Lfunc_end102 + .quad .Lfunc_begin103 + .quad .Lfunc_end103 + .quad .Lfunc_begin105 + .quad .Lfunc_end105 + .quad .Lfunc_begin106 + .quad .Lfunc_end106 + .quad .Lfunc_begin107 + .quad .Lfunc_end107 + .quad .Lfunc_begin108 + .quad .Lfunc_end108 + .quad .Lfunc_begin109 + .quad .Lfunc_end109 + .quad .Lfunc_begin110 + .quad .Lfunc_end110 + .quad .Lfunc_begin111 + .quad .Lfunc_end111 + .quad .Lfunc_begin112 + .quad .Lfunc_end112 + .quad .Lfunc_begin113 + .quad .Lfunc_end113 + .quad .Lfunc_begin114 + .quad .Lfunc_end114 + .quad .Lfunc_begin115 + .quad .Lfunc_end115 + .quad .Lfunc_begin116 + .quad .Lfunc_end116 + .quad .Lfunc_begin117 + .quad .Lfunc_end117 + .quad .Lfunc_begin118 + .quad .Lfunc_end118 + .quad .Lfunc_begin119 + .quad .Lfunc_end119 + .quad .Lfunc_begin123 + .quad .Lfunc_end123 + .quad .Lfunc_begin126 + .quad .Lfunc_end126 + .quad 0 + .quad 0 + .section .debug_str,"MS",@progbits,1 +.Linfo_string0: + .asciz "clang version 14.0.0 (git@github.com:llvm/llvm-project.git 6d48e2505c7a68a470e75b61ad504d51db0f8a36)" # string offset=0 +.Linfo_string1: + .asciz "cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp" # string offset=101 +.Linfo_string2: + .asciz "./" # string offset=188 +.Linfo_string3: + .asciz "i" # string offset=191 +.Linfo_string4: + .asciz "int" # string offset=193 +.Linfo_string5: + .asciz "ns" # string offset=197 +.Linfo_string6: + .asciz "unsigned int" # string offset=200 +.Linfo_string7: + .asciz "Enumerator1" # string offset=213 +.Linfo_string8: + .asciz "Enumerator2" # string offset=225 +.Linfo_string9: + .asciz "Enumerator3" # string offset=237 +.Linfo_string10: + .asciz "Enumeration" # string offset=249 +.Linfo_string11: + .asciz "EnumerationClass" # string offset=261 +.Linfo_string12: + .asciz "unsigned char" # string offset=278 +.Linfo_string13: + .asciz "kNeg" # string offset=292 +.Linfo_string14: + .asciz "EnumerationSmall" # string offset=297 +.Linfo_string15: + .asciz "AnonEnum1" # string offset=314 +.Linfo_string16: + .asciz "AnonEnum2" # string offset=324 +.Linfo_string17: + .asciz "AnonEnum3" # string offset=334 +.Linfo_string18: + .asciz "T" # string offset=344 +.Linfo_string19: + .asciz "bool" # string offset=346 +.Linfo_string20: + .asciz "b" # string offset=351 +.Linfo_string21: + .asciz "_STNt3|" # string offset=353 +.Linfo_string22: + .asciz "t10" # string offset=373 +.Linfo_string23: + .asciz "std" # string offset=377 +.Linfo_string24: + .asciz "signed char" # string offset=381 +.Linfo_string25: + .asciz "__int8_t" # string offset=393 +.Linfo_string26: + .asciz "int8_t" # string offset=402 +.Linfo_string27: + .asciz "short" # string offset=409 +.Linfo_string28: + .asciz "__int16_t" # string offset=415 +.Linfo_string29: + .asciz "int16_t" # string offset=425 +.Linfo_string30: + .asciz "__int32_t" # string offset=433 +.Linfo_string31: + .asciz "int32_t" # string offset=443 +.Linfo_string32: + .asciz "long" # string offset=451 +.Linfo_string33: + .asciz "__int64_t" # string offset=456 +.Linfo_string34: + .asciz "int64_t" # string offset=466 +.Linfo_string35: + .asciz "int_fast8_t" # string offset=474 +.Linfo_string36: + .asciz "int_fast16_t" # string offset=486 +.Linfo_string37: + .asciz "int_fast32_t" # string offset=499 +.Linfo_string38: + .asciz "int_fast64_t" # string offset=512 +.Linfo_string39: + .asciz "__int_least8_t" # string offset=525 +.Linfo_string40: + .asciz "int_least8_t" # string offset=540 +.Linfo_string41: + .asciz "__int_least16_t" # string offset=553 +.Linfo_string42: + .asciz "int_least16_t" # string offset=569 +.Linfo_string43: + .asciz "__int_least32_t" # string offset=583 +.Linfo_string44: + .asciz "int_least32_t" # string offset=599 +.Linfo_string45: + .asciz "__int_least64_t" # string offset=613 +.Linfo_string46: + .asciz "int_least64_t" # string offset=629 +.Linfo_string47: + .asciz "__intmax_t" # string offset=643 +.Linfo_string48: + .asciz "intmax_t" # string offset=654 +.Linfo_string49: + .asciz "intptr_t" # string offset=663 +.Linfo_string50: + .asciz "__uint8_t" # string offset=672 +.Linfo_string51: + .asciz "uint8_t" # string offset=682 +.Linfo_string52: + .asciz "unsigned short" # string offset=690 +.Linfo_string53: + .asciz "__uint16_t" # string offset=705 +.Linfo_string54: + .asciz "uint16_t" # string offset=716 +.Linfo_string55: + .asciz "__uint32_t" # string offset=725 +.Linfo_string56: + .asciz "uint32_t" # string offset=736 +.Linfo_string57: + .asciz "unsigned long" # string offset=745 +.Linfo_string58: + .asciz "__uint64_t" # string offset=759 +.Linfo_string59: + .asciz "uint64_t" # string offset=770 +.Linfo_string60: + .asciz "uint_fast8_t" # string offset=779 +.Linfo_string61: + .asciz "uint_fast16_t" # string offset=792 +.Linfo_string62: + .asciz "uint_fast32_t" # string offset=806 +.Linfo_string63: + .asciz "uint_fast64_t" # string offset=820 +.Linfo_string64: + .asciz "__uint_least8_t" # string offset=834 +.Linfo_string65: + .asciz "uint_least8_t" # string offset=850 +.Linfo_string66: + .asciz "__uint_least16_t" # string offset=864 +.Linfo_string67: + .asciz "uint_least16_t" # string offset=881 +.Linfo_string68: + .asciz "__uint_least32_t" # string offset=896 +.Linfo_string69: + .asciz "uint_least32_t" # string offset=913 +.Linfo_string70: + .asciz "__uint_least64_t" # string offset=928 +.Linfo_string71: + .asciz "uint_least64_t" # string offset=945 +.Linfo_string72: + .asciz "__uintmax_t" # string offset=960 +.Linfo_string73: + .asciz "uintmax_t" # string offset=972 +.Linfo_string74: + .asciz "uintptr_t" # string offset=982 +.Linfo_string75: + .asciz "t6" # string offset=992 +.Linfo_string76: + .asciz "_ZN2t6lsIiEEvi" # string offset=995 +.Linfo_string77: + .asciz "operator<<" # string offset=1010 +.Linfo_string78: + .asciz "_ZN2t6ltIiEEvi" # string offset=1026 +.Linfo_string79: + .asciz "operator<" # string offset=1041 +.Linfo_string80: + .asciz "_ZN2t6leIiEEvi" # string offset=1056 +.Linfo_string81: + .asciz "operator<=" # string offset=1071 +.Linfo_string82: + .asciz "_ZN2t6cvP2t1IJfEEIiEEv" # string offset=1087 +.Linfo_string83: + .asciz "operator t1 *" # string offset=1110 +.Linfo_string84: + .asciz "Ts" # string offset=1136 +.Linfo_string85: + .asciz "float" # string offset=1139 +.Linfo_string86: + .asciz "_STNt1|" # string offset=1145 +.Linfo_string87: + .asciz "_ZN2t6miIiEEvi" # string offset=1160 +.Linfo_string88: + .asciz "operator-" # string offset=1175 +.Linfo_string89: + .asciz "_ZN2t6mlIiEEvi" # string offset=1190 +.Linfo_string90: + .asciz "operator*" # string offset=1205 +.Linfo_string91: + .asciz "_ZN2t6dvIiEEvi" # string offset=1220 +.Linfo_string92: + .asciz "operator/" # string offset=1235 +.Linfo_string93: + .asciz "_ZN2t6rmIiEEvi" # string offset=1250 +.Linfo_string94: + .asciz "operator%" # string offset=1265 +.Linfo_string95: + .asciz "_ZN2t6eoIiEEvi" # string offset=1280 +.Linfo_string96: + .asciz "operator^" # string offset=1295 +.Linfo_string97: + .asciz "_ZN2t6anIiEEvi" # string offset=1310 +.Linfo_string98: + .asciz "operator&" # string offset=1325 +.Linfo_string99: + .asciz "_ZN2t6orIiEEvi" # string offset=1340 +.Linfo_string100: + .asciz "operator|" # string offset=1355 +.Linfo_string101: + .asciz "_ZN2t6coIiEEvv" # string offset=1370 +.Linfo_string102: + .asciz "operator~" # string offset=1385 +.Linfo_string103: + .asciz "_ZN2t6ntIiEEvv" # string offset=1400 +.Linfo_string104: + .asciz "operator!" # string offset=1415 +.Linfo_string105: + .asciz "_ZN2t6aSIiEEvi" # string offset=1430 +.Linfo_string106: + .asciz "operator=" # string offset=1445 +.Linfo_string107: + .asciz "_ZN2t6gtIiEEvi" # string offset=1460 +.Linfo_string108: + .asciz "operator>" # string offset=1475 +.Linfo_string109: + .asciz "_ZN2t6cmIiEEvi" # string offset=1490 +.Linfo_string110: + .asciz "operator," # string offset=1505 +.Linfo_string111: + .asciz "_ZN2t6clIiEEvv" # string offset=1520 +.Linfo_string112: + .asciz "operator()" # string offset=1535 +.Linfo_string113: + .asciz "_ZN2t6ixIiEEvi" # string offset=1551 +.Linfo_string114: + .asciz "operator[]" # string offset=1566 +.Linfo_string115: + .asciz "_ZN2t6ssIiEEvi" # string offset=1582 +.Linfo_string116: + .asciz "operator<=>" # string offset=1597 +.Linfo_string117: + .asciz "_ZN2t6nwIiEEPvmT_" # string offset=1614 +.Linfo_string118: + .asciz "operator new" # string offset=1632 +.Linfo_string119: + .asciz "size_t" # string offset=1650 +.Linfo_string120: + .asciz "_ZN2t6naIiEEPvmT_" # string offset=1657 +.Linfo_string121: + .asciz "operator new[]" # string offset=1675 +.Linfo_string122: + .asciz "_ZN2t6dlIiEEvPvT_" # string offset=1695 +.Linfo_string123: + .asciz "operator delete" # string offset=1713 +.Linfo_string124: + .asciz "_ZN2t6daIiEEvPvT_" # string offset=1734 +.Linfo_string125: + .asciz "operator delete[]" # string offset=1752 +.Linfo_string126: + .asciz "_ZN2t6awIiEEiv" # string offset=1775 +.Linfo_string127: + .asciz "operator co_await" # string offset=1790 +.Linfo_string128: + .asciz "_STNt10|" # string offset=1813 +.Linfo_string129: + .asciz "_ZN2t83memEv" # string offset=1828 +.Linfo_string130: + .asciz "mem" # string offset=1841 +.Linfo_string131: + .asciz "t8" # string offset=1845 +.Linfo_string132: + .asciz "_Zli5_suffy" # string offset=1848 +.Linfo_string133: + .asciz "operator\"\"_suff" # string offset=1860 +.Linfo_string134: + .asciz "main" # string offset=1876 +.Linfo_string135: + .asciz "_Z2f1IJiEEvv" # string offset=1881 +.Linfo_string136: + .asciz "_STNf1|" # string offset=1894 +.Linfo_string137: + .asciz "_Z2f1IJfEEvv" # string offset=1907 +.Linfo_string138: + .asciz "_STNf1|" # string offset=1920 +.Linfo_string139: + .asciz "_Z2f1IJbEEvv" # string offset=1935 +.Linfo_string140: + .asciz "_STNf1|" # string offset=1948 +.Linfo_string141: + .asciz "double" # string offset=1962 +.Linfo_string142: + .asciz "_Z2f1IJdEEvv" # string offset=1969 +.Linfo_string143: + .asciz "_STNf1|" # string offset=1982 +.Linfo_string144: + .asciz "_Z2f1IJlEEvv" # string offset=1998 +.Linfo_string145: + .asciz "_STNf1|" # string offset=2011 +.Linfo_string146: + .asciz "_Z2f1IJsEEvv" # string offset=2025 +.Linfo_string147: + .asciz "_STNf1|" # string offset=2038 +.Linfo_string148: + .asciz "_Z2f1IJjEEvv" # string offset=2053 +.Linfo_string149: + .asciz "_STNf1|" # string offset=2066 +.Linfo_string150: + .asciz "unsigned long long" # string offset=2088 +.Linfo_string151: + .asciz "_Z2f1IJyEEvv" # string offset=2107 +.Linfo_string152: + .asciz "_STNf1|" # string offset=2120 +.Linfo_string153: + .asciz "long long" # string offset=2148 +.Linfo_string154: + .asciz "_Z2f1IJxEEvv" # string offset=2158 +.Linfo_string155: + .asciz "_STNf1|" # string offset=2171 +.Linfo_string156: + .asciz "udt" # string offset=2190 +.Linfo_string157: + .asciz "_Z2f1IJ3udtEEvv" # string offset=2194 +.Linfo_string158: + .asciz "_STNf1|" # string offset=2210 +.Linfo_string159: + .asciz "_Z2f1IJN2ns3udtEEEvv" # string offset=2223 +.Linfo_string160: + .asciz "_STNf1|" # string offset=2244 +.Linfo_string161: + .asciz "_Z2f1IJPN2ns3udtEEEvv" # string offset=2261 +.Linfo_string162: + .asciz "_STNf1|" # string offset=2283 +.Linfo_string163: + .asciz "inner" # string offset=2302 +.Linfo_string164: + .asciz "_Z2f1IJN2ns5inner3udtEEEvv" # string offset=2308 +.Linfo_string165: + .asciz "_STNf1|" # string offset=2335 +.Linfo_string166: + .asciz "_STNt1|" # string offset=2359 +.Linfo_string167: + .asciz "_Z2f1IJ2t1IJiEEEEvv" # string offset=2372 +.Linfo_string168: + .asciz "_STNf1| >" # string offset=2392 +.Linfo_string169: + .asciz "_Z2f1IJifEEvv" # string offset=2410 +.Linfo_string170: + .asciz "_STNf1|" # string offset=2424 +.Linfo_string171: + .asciz "_Z2f1IJPiEEvv" # string offset=2444 +.Linfo_string172: + .asciz "_STNf1|" # string offset=2458 +.Linfo_string173: + .asciz "_Z2f1IJRiEEvv" # string offset=2473 +.Linfo_string174: + .asciz "_STNf1|" # string offset=2487 +.Linfo_string175: + .asciz "_Z2f1IJOiEEvv" # string offset=2502 +.Linfo_string176: + .asciz "_STNf1|" # string offset=2516 +.Linfo_string177: + .asciz "_Z2f1IJKiEEvv" # string offset=2532 +.Linfo_string178: + .asciz "_STNf1|" # string offset=2546 +.Linfo_string179: + .asciz "_Z2f1IJvEEvv" # string offset=2565 +.Linfo_string180: + .asciz "_STNf1|" # string offset=2578 +.Linfo_string181: + .asciz "outer_class" # string offset=2592 +.Linfo_string182: + .asciz "inner_class" # string offset=2604 +.Linfo_string183: + .asciz "_Z2f1IJN11outer_class11inner_classEEEvv" # string offset=2616 +.Linfo_string184: + .asciz "_STNf1|" # string offset=2656 +.Linfo_string185: + .asciz "_Z2f1IJmEEvv" # string offset=2690 +.Linfo_string186: + .asciz "_STNf1|" # string offset=2703 +.Linfo_string187: + .asciz "_Z2f2ILb1ELi3EEvv" # string offset=2726 +.Linfo_string188: + .asciz "_STNf2|" # string offset=2744 +.Linfo_string189: + .asciz "A" # string offset=2761 +.Linfo_string190: + .asciz "_Z2f3IN2ns11EnumerationEJLS1_1ELS1_2EEEvv" # string offset=2763 +.Linfo_string191: + .asciz "_STNf3|" # string offset=2805 +.Linfo_string192: + .asciz "_Z2f3IN2ns16EnumerationClassEJLS1_1ELS1_2EEEvv" # string offset=2867 +.Linfo_string193: + .asciz "_STNf3|" # string offset=2914 +.Linfo_string194: + .asciz "_Z2f3IN2ns16EnumerationSmallEJLS1_255EEEvv" # string offset=3004 +.Linfo_string195: + .asciz "_STNf3|" # string offset=3047 +.Linfo_string196: + .asciz "_Z2f3IN2ns3$_0EJLS1_1ELS1_2EEEvv" # string offset=3087 +.Linfo_string197: + .asciz "f3" # string offset=3120 +.Linfo_string198: + .asciz "_Z2f3IPiJXadL_Z1iEEEEvv" # string offset=3371 +.Linfo_string199: + .asciz "f3" # string offset=3395 +.Linfo_string200: + .asciz "_Z2f3IPiJLS0_0EEEvv" # string offset=3409 +.Linfo_string201: + .asciz "f3" # string offset=3429 +.Linfo_string202: + .asciz "_Z2f3ImJLm1EEEvv" # string offset=3448 +.Linfo_string203: + .asciz "_STNf3|" # string offset=3465 +.Linfo_string204: + .asciz "_Z2f3IyJLy1EEEvv" # string offset=3493 +.Linfo_string205: + .asciz "_STNf3|" # string offset=3510 +.Linfo_string206: + .asciz "_Z2f3IlJLl1EEEvv" # string offset=3544 +.Linfo_string207: + .asciz "_STNf3|" # string offset=3561 +.Linfo_string208: + .asciz "_Z2f3IjJLj1EEEvv" # string offset=3579 +.Linfo_string209: + .asciz "_STNf3|" # string offset=3596 +.Linfo_string210: + .asciz "_Z2f3IsJLs1EEEvv" # string offset=3622 +.Linfo_string211: + .asciz "_STNf3|" # string offset=3639 +.Linfo_string212: + .asciz "_Z2f3IhJLh0EEEvv" # string offset=3664 +.Linfo_string213: + .asciz "_STNf3|" # string offset=3681 +.Linfo_string214: + .asciz "_Z2f3IaJLa0EEEvv" # string offset=3727 +.Linfo_string215: + .asciz "_STNf3|" # string offset=3744 +.Linfo_string216: + .asciz "_Z2f3ItJLt1ELt2EEEvv" # string offset=3786 +.Linfo_string217: + .asciz "_STNf3|" # string offset=3807 +.Linfo_string218: + .asciz "char" # string offset=3869 +.Linfo_string219: + .asciz "_Z2f3IcJLc0ELc1ELc6ELc7ELc13ELc14ELc31ELc32ELc33ELc127ELcn128EEEvv" # string offset=3874 +.Linfo_string220: + .asciz "_STNf3|" # string offset=3941 +.Linfo_string221: + .asciz "__int128" # string offset=4033 +.Linfo_string222: + .asciz "_Z2f3InJLn18446744073709551614EEEvv" # string offset=4042 +.Linfo_string223: + .asciz "f3<__int128, (__int128)18446744073709551614>" # string offset=4078 +.Linfo_string224: + .asciz "_Z2f4IjLj3EEvv" # string offset=4123 +.Linfo_string225: + .asciz "_STNf4|" # string offset=4138 +.Linfo_string226: + .asciz "_Z2f1IJ2t3IiLb0EEEEvv" # string offset=4164 +.Linfo_string227: + .asciz "_STNf1| >" # string offset=4186 +.Linfo_string228: + .asciz "_STNt3|, false>" # string offset=4211 +.Linfo_string229: + .asciz "_Z2f1IJ2t3IS0_IiLb0EELb0EEEEvv" # string offset=4242 +.Linfo_string230: + .asciz "_STNf1|, false> >" # string offset=4273 +.Linfo_string231: + .asciz "_Z2f1IJZ4mainE3$_1EEvv" # string offset=4309 +.Linfo_string232: + .asciz "f1<(lambda at cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:168:12)>" # string offset=4332 +.Linfo_string233: + .asciz "_Z2f1IJFifEEEvv" # string offset=4442 +.Linfo_string234: + .asciz "_STNf1|" # string offset=4458 +.Linfo_string235: + .asciz "_Z2f1IJRKiEEvv" # string offset=4479 +.Linfo_string236: + .asciz "_STNf1|" # string offset=4494 +.Linfo_string237: + .asciz "_Z2f1IJRPKiEEvv" # string offset=4515 +.Linfo_string238: + .asciz "_STNf1|" # string offset=4531 +.Linfo_string239: + .asciz "t5" # string offset=4553 +.Linfo_string240: + .asciz "_Z2f1IJN12_GLOBAL__N_12t5EEEvv" # string offset=4556 +.Linfo_string241: + .asciz "_STNf1|<(anonymous namespace)::t5>" # string offset=4587 +.Linfo_string242: + .asciz "decltype(nullptr)" # string offset=4622 +.Linfo_string243: + .asciz "_Z2f1IJDnEEvv" # string offset=4640 +.Linfo_string244: + .asciz "_STNf1|" # string offset=4654 +.Linfo_string245: + .asciz "_Z2f1IJPlS0_EEvv" # string offset=4678 +.Linfo_string246: + .asciz "_STNf1|" # string offset=4695 +.Linfo_string247: + .asciz "_Z2f1IJPlP3udtEEvv" # string offset=4719 +.Linfo_string248: + .asciz "_STNf1|" # string offset=4738 +.Linfo_string249: + .asciz "_Z2f1IJKPvEEvv" # string offset=4761 +.Linfo_string250: + .asciz "_STNf1|" # string offset=4776 +.Linfo_string251: + .asciz "_Z2f1IJPKPKvEEvv" # string offset=4797 +.Linfo_string252: + .asciz "_STNf1|" # string offset=4814 +.Linfo_string253: + .asciz "_Z2f1IJFvvEEEvv" # string offset=4843 +.Linfo_string254: + .asciz "_STNf1|" # string offset=4859 +.Linfo_string255: + .asciz "_Z2f1IJPFvvEEEvv" # string offset=4876 +.Linfo_string256: + .asciz "_STNf1|" # string offset=4893 +.Linfo_string257: + .asciz "_Z2f1IJPZ4mainE3$_1EEvv" # string offset=4913 +.Linfo_string258: + .asciz "f1<(lambda at cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:168:12) *>" # string offset=4937 +.Linfo_string259: + .asciz "_Z2f1IJZ4mainE3$_2EEvv" # string offset=5049 +.Linfo_string260: + .asciz "f1<(unnamed struct at cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:167:3)>" # string offset=5072 +.Linfo_string261: + .asciz "_Z2f1IJPZ4mainE3$_2EEvv" # string offset=5189 +.Linfo_string262: + .asciz "f1<(unnamed struct at cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:167:3) *>" # string offset=5213 +.Linfo_string263: + .asciz "T1" # string offset=5332 +.Linfo_string264: + .asciz "T2" # string offset=5335 +.Linfo_string265: + .asciz "_Z2f5IJ2t1IJiEEEiEvv" # string offset=5338 +.Linfo_string266: + .asciz "_STNf5|, int>" # string offset=5359 +.Linfo_string267: + .asciz "_Z2f5IJEiEvv" # string offset=5381 +.Linfo_string268: + .asciz "_STNf5|" # string offset=5394 +.Linfo_string269: + .asciz "_Z2f6I2t1IJiEEJEEvv" # string offset=5407 +.Linfo_string270: + .asciz "_STNf6| >" # string offset=5427 +.Linfo_string271: + .asciz "_Z2f1IJEEvv" # string offset=5445 +.Linfo_string272: + .asciz "_STNf1|<>" # string offset=5457 +.Linfo_string273: + .asciz "_Z2f1IJPKvS1_EEvv" # string offset=5467 +.Linfo_string274: + .asciz "_STNf1|" # string offset=5485 +.Linfo_string275: + .asciz "_STNt1|" # string offset=5521 +.Linfo_string276: + .asciz "_Z2f1IJP2t1IJPiEEEEvv" # string offset=5536 +.Linfo_string277: + .asciz "_STNf1| *>" # string offset=5558 +.Linfo_string278: + .asciz "__ARRAY_SIZE_TYPE__" # string offset=5579 +.Linfo_string279: + .asciz "_Z2f1IJA_PiEEvv" # string offset=5599 +.Linfo_string280: + .asciz "_STNf1|" # string offset=5615 +.Linfo_string281: + .asciz "t7" # string offset=5632 +.Linfo_string282: + .asciz "_Z2f1IJZ4mainE2t7EEvv" # string offset=5635 +.Linfo_string283: + .asciz "_STNf1|" # string offset=5657 +.Linfo_string284: + .asciz "_Z2f1IJRA3_iEEvv" # string offset=5669 +.Linfo_string285: + .asciz "_STNf1|" # string offset=5686 +.Linfo_string286: + .asciz "_Z2f1IJPA3_iEEvv" # string offset=5706 +.Linfo_string287: + .asciz "_STNf1|" # string offset=5723 +.Linfo_string288: + .asciz "t1" # string offset=5743 +.Linfo_string289: + .asciz "_Z2f7I2t1Evv" # string offset=5746 +.Linfo_string290: + .asciz "_STNf7|" # string offset=5759 +.Linfo_string291: + .asciz "_Z2f8I2t1iEvv" # string offset=5771 +.Linfo_string292: + .asciz "_STNf8|" # string offset=5785 +.Linfo_string293: + .asciz "ns::inner::ttp" # string offset=5802 +.Linfo_string294: + .asciz "_ZN2ns8ttp_userINS_5inner3ttpEEEvv" # string offset=5817 +.Linfo_string295: + .asciz "_STNttp_user|" # string offset=5852 +.Linfo_string296: + .asciz "_Z2f1IJPiPDnEEvv" # string offset=5882 +.Linfo_string297: + .asciz "_STNf1|" # string offset=5899 +.Linfo_string298: + .asciz "_STNt7|" # string offset=5932 +.Linfo_string299: + .asciz "_Z2f1IJ2t7IiEEEvv" # string offset=5945 +.Linfo_string300: + .asciz "_STNf1| >" # string offset=5963 +.Linfo_string301: + .asciz "ns::inl::t9" # string offset=5981 +.Linfo_string302: + .asciz "_Z2f7IN2ns3inl2t9EEvv" # string offset=5993 +.Linfo_string303: + .asciz "_STNf7|" # string offset=6015 +.Linfo_string304: + .asciz "_Z2f1IJU7_AtomiciEEvv" # string offset=6036 +.Linfo_string305: + .asciz "f1<_Atomic(int)>" # string offset=6058 +.Linfo_string306: + .asciz "_Z2f1IJilVcEEvv" # string offset=6075 +.Linfo_string307: + .asciz "_STNf1|" # string offset=6091 +.Linfo_string308: + .asciz "_Z2f1IJDv2_iEEvv" # string offset=6125 +.Linfo_string309: + .asciz "f1<__attribute__((__vector_size__(2 * sizeof(int)))) int>" # string offset=6142 +.Linfo_string310: + .asciz "_Z2f1IJVKPiEEvv" # string offset=6200 +.Linfo_string311: + .asciz "_STNf1|" # string offset=6216 +.Linfo_string312: + .asciz "_Z2f1IJVKvEEvv" # string offset=6245 +.Linfo_string313: + .asciz "_STNf1|" # string offset=6260 +.Linfo_string314: + .asciz "t1<(lambda at cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:168:12)>" # string offset=6289 +.Linfo_string315: + .asciz "_Z2f1IJ2t1IJZ4mainE3$_1EEEEvv" # string offset=6399 +.Linfo_string316: + .asciz "_STNf1| >" # string offset=6429 +.Linfo_string317: + .asciz "_ZN3t10C2IvEEv" # string offset=6549 +.Linfo_string318: + .asciz "_Z2f1IJM3udtKFvvEEEvv" # string offset=6564 +.Linfo_string319: + .asciz "_STNf1|" # string offset=6586 +.Linfo_string320: + .asciz "_Z2f1IJM3udtVFvvREEEvv" # string offset=6617 +.Linfo_string321: + .asciz "_STNf1|" # string offset=6640 +.Linfo_string322: + .asciz "_Z2f1IJM3udtVKFvvOEEEvv" # string offset=6676 +.Linfo_string323: + .asciz "_STNf1|" # string offset=6700 +.Linfo_string324: + .asciz "_Z2f9IiEPFvvEv" # string offset=6743 +.Linfo_string325: + .asciz "_STNf9|" # string offset=6758 +.Linfo_string326: + .asciz "_Z2f1IJKPFvvEEEvv" # string offset=6771 +.Linfo_string327: + .asciz "_STNf1|" # string offset=6789 +.Linfo_string328: + .asciz "_Z2f1IJRA1_KcEEvv" # string offset=6814 +.Linfo_string329: + .asciz "_STNf1|" # string offset=6832 +.Linfo_string330: + .asciz "_Z2f1IJKFvvREEEvv" # string offset=6859 +.Linfo_string331: + .asciz "_STNf1|" # string offset=6877 +.Linfo_string332: + .asciz "_Z2f1IJVFvvOEEEvv" # string offset=6902 +.Linfo_string333: + .asciz "_STNf1|" # string offset=6920 +.Linfo_string334: + .asciz "_Z2f1IJVKFvvEEEvv" # string offset=6949 +.Linfo_string335: + .asciz "_STNf1|" # string offset=6967 +.Linfo_string336: + .asciz "_Z2f1IJA1_KPiEEvv" # string offset=6999 +.Linfo_string337: + .asciz "_STNf1|" # string offset=7017 +.Linfo_string338: + .asciz "_Z2f1IJRA1_KPiEEvv" # string offset=7040 +.Linfo_string339: + .asciz "_STNf1|" # string offset=7059 +.Linfo_string340: + .asciz "_Z2f1IJRKM3udtFvvEEEvv" # string offset=7086 +.Linfo_string341: + .asciz "_STNf1|" # string offset=7109 +.Linfo_string342: + .asciz "_Z2f1IJFPFvfEiEEEvv" # string offset=7141 +.Linfo_string343: + .asciz "_STNf1|" # string offset=7161 +.Linfo_string344: + .asciz "_Z2f1IJPDoFvvEEEvv" # string offset=7191 +.Linfo_string345: + .asciz "f1" # string offset=7210 +.Linfo_string346: + .asciz "_Z2f1IJFvZ4mainE3$_2EEEvv" # string offset=7234 +.Linfo_string347: + .asciz "f1" # string offset=7260 +.Linfo_string348: + .asciz "_Z2f1IJFvZ4mainE2t8Z4mainE3$_2EEEvv" # string offset=7384 +.Linfo_string349: + .asciz "f1" # string offset=7420 +.Linfo_string350: + .asciz "_Z2f1IJFvZ4mainE2t8EEEvv" # string offset=7548 +.Linfo_string351: + .asciz "_STNf1|" # string offset=7573 +.Linfo_string352: + .asciz "_Z19operator_not_reallyIiEvv" # string offset=7592 +.Linfo_string353: + .asciz "_STNoperator_not_really|" # string offset=7621 +.Linfo_string354: + .asciz "_Z2f1IJZN2t83memEvE2t7EEvv" # string offset=7651 +.Linfo_string355: + .asciz "_Z2f1IJM2t8FvvEEEvv" # string offset=7678 +.Linfo_string356: + .asciz "_STNf1|" # string offset=7698 +.Linfo_string357: + .asciz "L" # string offset=7722 +.Linfo_string358: + .asciz "v2" # string offset=7724 +.Linfo_string359: + .asciz "N" # string offset=7727 +.Linfo_string360: + .asciz "_STNt4|<3U>" # string offset=7729 +.Linfo_string361: + .asciz "v1" # string offset=7741 +.Linfo_string362: + .asciz "t3<(lambda at cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:168:12), false>" # string offset=7744 +.Linfo_string363: + .asciz "v6" # string offset=7861 +.Linfo_string364: + .asciz "x" # string offset=7864 +.Linfo_string365: + .asciz "t7i" # string offset=7866 +.Linfo_string366: + .asciz "v3" # string offset=7870 +.Linfo_string367: + .asciz "_STNt2|" # string offset=7873 +.Linfo_string368: + .asciz "_STNt2|" # string offset=7886 +.Linfo_string369: + .asciz "_STNt1|" # string offset=7901 +.Linfo_string370: + .asciz "_STNt2|" # string offset=7915 +.Linfo_string371: + .asciz "_STNt1|" # string offset=7929 +.Linfo_string372: + .asciz "_STNt2|" # string offset=7945 +.Linfo_string373: + .asciz "_STNt1|" # string offset=7961 +.Linfo_string374: + .asciz "_STNt2|" # string offset=7975 +.Linfo_string375: + .asciz "_STNt1|" # string offset=7989 +.Linfo_string376: + .asciz "_STNt2|" # string offset=8004 +.Linfo_string377: + .asciz "_STNt1|" # string offset=8019 +.Linfo_string378: + .asciz "_STNt2|" # string offset=8041 +.Linfo_string379: + .asciz "_STNt1|" # string offset=8063 +.Linfo_string380: + .asciz "_STNt2|" # string offset=8091 +.Linfo_string381: + .asciz "_STNt1|" # string offset=8119 +.Linfo_string382: + .asciz "_STNt2|" # string offset=8138 +.Linfo_string383: + .asciz "_STNt1|" # string offset=8157 +.Linfo_string384: + .asciz "_STNt2|" # string offset=8170 +.Linfo_string385: + .asciz "_STNt1|" # string offset=8183 +.Linfo_string386: + .asciz "_STNt2|" # string offset=8200 +.Linfo_string387: + .asciz "_STNt1|" # string offset=8217 +.Linfo_string388: + .asciz "_STNt2|" # string offset=8236 +.Linfo_string389: + .asciz "_STNt1|" # string offset=8255 +.Linfo_string390: + .asciz "_STNt2|" # string offset=8279 +.Linfo_string391: + .asciz "_STNt1| >" # string offset=8303 +.Linfo_string392: + .asciz "_STNt2| >" # string offset=8321 +.Linfo_string393: + .asciz "_STNt1|" # string offset=8339 +.Linfo_string394: + .asciz "_STNt2|" # string offset=8359 +.Linfo_string395: + .asciz "_STNt2|" # string offset=8379 +.Linfo_string396: + .asciz "_STNt1|" # string offset=8394 +.Linfo_string397: + .asciz "_STNt2|" # string offset=8409 +.Linfo_string398: + .asciz "_STNt1|" # string offset=8424 +.Linfo_string399: + .asciz "_STNt2|" # string offset=8440 +.Linfo_string400: + .asciz "_STNt1|" # string offset=8456 +.Linfo_string401: + .asciz "_STNt2|" # string offset=8475 +.Linfo_string402: + .asciz "_STNt1|" # string offset=8494 +.Linfo_string403: + .asciz "_STNt2|" # string offset=8508 +.Linfo_string404: + .asciz "_STNt1|" # string offset=8522 +.Linfo_string405: + .asciz "_STNt2|" # string offset=8556 +.Linfo_string406: + .asciz "_STNt1|" # string offset=8590 +.Linfo_string407: + .asciz "_STNt2|" # string offset=8613 +.Linfo_string408: + .asciz "_STNt1| >" # string offset=8636 +.Linfo_string409: + .asciz "_STNt2| >" # string offset=8661 +.Linfo_string410: + .asciz "_STNt1|, false> >" # string offset=8686 +.Linfo_string411: + .asciz "_STNt2|, false> >" # string offset=8722 +.Linfo_string412: + .asciz "t2<(lambda at cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:168:12)>" # string offset=8758 +.Linfo_string413: + .asciz "_STNt1|" # string offset=8868 +.Linfo_string414: + .asciz "_STNt2|" # string offset=8889 +.Linfo_string415: + .asciz "_STNt1|" # string offset=8910 +.Linfo_string416: + .asciz "_STNt2|" # string offset=8931 +.Linfo_string417: + .asciz "_STNt1|" # string offset=8952 +.Linfo_string418: + .asciz "_STNt2|" # string offset=8974 +.Linfo_string419: + .asciz "_STNt1|<(anonymous namespace)::t5>" # string offset=8996 +.Linfo_string420: + .asciz "_STNt2|<(anonymous namespace)::t5>" # string offset=9031 +.Linfo_string421: + .asciz "_STNt1|" # string offset=9066 +.Linfo_string422: + .asciz "_STNt2|" # string offset=9090 +.Linfo_string423: + .asciz "_STNt1|" # string offset=9114 +.Linfo_string424: + .asciz "_STNt2|" # string offset=9138 +.Linfo_string425: + .asciz "_STNt1|" # string offset=9162 +.Linfo_string426: + .asciz "_STNt2|" # string offset=9185 +.Linfo_string427: + .asciz "_STNt1|" # string offset=9208 +.Linfo_string428: + .asciz "_STNt2|" # string offset=9229 +.Linfo_string429: + .asciz "_STNt1|" # string offset=9250 +.Linfo_string430: + .asciz "_STNt2|" # string offset=9279 +.Linfo_string431: + .asciz "_STNt1|" # string offset=9308 +.Linfo_string432: + .asciz "_STNt2|" # string offset=9325 +.Linfo_string433: + .asciz "_STNt1|" # string offset=9342 +.Linfo_string434: + .asciz "_STNt2|" # string offset=9362 +.Linfo_string435: + .asciz "t1<(lambda at cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:168:12) *>" # string offset=9382 +.Linfo_string436: + .asciz "t2<(lambda at cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:168:12) *>" # string offset=9494 +.Linfo_string437: + .asciz "t1<(unnamed struct at cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:167:3)>" # string offset=9606 +.Linfo_string438: + .asciz "t2<(unnamed struct at cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:167:3)>" # string offset=9723 +.Linfo_string439: + .asciz "t1<(unnamed struct at cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:167:3) *>" # string offset=9840 +.Linfo_string440: + .asciz "t2<(unnamed struct at cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names.cpp:167:3) *>" # string offset=9959 +.Linfo_string441: + .asciz "_STNt1|<>" # string offset=10078 +.Linfo_string442: + .asciz "_STNt2|<>" # string offset=10088 +.Linfo_string443: + .asciz "_STNt1|" # string offset=10098 +.Linfo_string444: + .asciz "_STNt2|" # string offset=10134 +.Linfo_string445: + .asciz "_STNt1| *>" # string offset=10170 +.Linfo_string446: + .asciz "_STNt2| *>" # string offset=10191 +.Linfo_string447: + .asciz "_STNt1|" # string offset=10212 +.Linfo_string448: + .asciz "_STNt2|" # string offset=10229 +.Linfo_string449: + .asciz "this" # string offset=10246 +.Linfo_string450: + .asciz "_STNt1|" # string offset=10251 +.Linfo_string451: + .asciz "_STNt2|" # string offset=10263 +.Linfo_string452: + .asciz "_STNt1|" # string offset=10275 +.Linfo_string453: + .asciz "_STNt2|" # string offset=10295 +.Linfo_string454: + .asciz "_STNt1|" # string offset=10315 +.Linfo_string455: + .asciz "_STNt2|" # string offset=10335 +.Linfo_string456: + .asciz "_STNt1|" # string offset=10355 +.Linfo_string457: + .asciz "_STNt2|" # string offset=10388 +.Linfo_string458: + .asciz "_STNt1| >" # string offset=10421 +.Linfo_string459: + .asciz "_STNt2| >" # string offset=10439 +.Linfo_string460: + .asciz "t1<_Atomic(int)>" # string offset=10457 +.Linfo_string461: + .asciz "t2<_Atomic(int)>" # string offset=10474 +.Linfo_string462: + .asciz "_STNt1|" # string offset=10491 +.Linfo_string463: + .asciz "_STNt2|" # string offset=10525 +.Linfo_string464: + .asciz "t1<__attribute__((__vector_size__(2 * sizeof(int)))) int>" # string offset=10559 +.Linfo_string465: + .asciz "t2<__attribute__((__vector_size__(2 * sizeof(int)))) int>" # string offset=10617 +.Linfo_string466: + .asciz "_STNt1|" # string offset=10675 +.Linfo_string467: + .asciz "_STNt2|" # string offset=10704 +.Linfo_string468: + .asciz "_STNt1|" # string offset=10733 +.Linfo_string469: + .asciz "_STNt2|" # string offset=10762 +.Linfo_string470: + .asciz "_STNt1| >" # string offset=10791 +.Linfo_string471: + .asciz "_STNt2| >" # string offset=10911 +.Linfo_string472: + .asciz "_STNt1|" # string offset=11031 +.Linfo_string473: + .asciz "_STNt2|" # string offset=11062 +.Linfo_string474: + .asciz "_STNt1|" # string offset=11093 +.Linfo_string475: + .asciz "_STNt2|" # string offset=11129 +.Linfo_string476: + .asciz "_STNt1|" # string offset=11165 +.Linfo_string477: + .asciz "_STNt2|" # string offset=11208 +.Linfo_string478: + .asciz "_STNt1|" # string offset=11251 +.Linfo_string479: + .asciz "_STNt2|" # string offset=11276 +.Linfo_string480: + .asciz "_STNt1|" # string offset=11301 +.Linfo_string481: + .asciz "_STNt2|" # string offset=11328 +.Linfo_string482: + .asciz "_STNt1|" # string offset=11355 +.Linfo_string483: + .asciz "_STNt2|" # string offset=11380 +.Linfo_string484: + .asciz "_STNt1|" # string offset=11405 +.Linfo_string485: + .asciz "_STNt2|" # string offset=11434 +.Linfo_string486: + .asciz "_STNt1|" # string offset=11463 +.Linfo_string487: + .asciz "_STNt2|" # string offset=11495 +.Linfo_string488: + .asciz "_STNt1|" # string offset=11527 +.Linfo_string489: + .asciz "_STNt2|" # string offset=11550 +.Linfo_string490: + .asciz "_STNt1|" # string offset=11573 +.Linfo_string491: + .asciz "_STNt2|" # string offset=11600 +.Linfo_string492: + .asciz "_STNt1|" # string offset=11627 +.Linfo_string493: + .asciz "_STNt2|" # string offset=11659 +.Linfo_string494: + .asciz "_STNt1|" # string offset=11691 +.Linfo_string495: + .asciz "_STNt2|" # string offset=11721 +.Linfo_string496: + .asciz "t1" # string offset=11751 +.Linfo_string497: + .asciz "t2" # string offset=11775 +.Linfo_string498: + .asciz "t1" # string offset=11799 +.Linfo_string499: + .asciz "t2" # string offset=11923 +.Linfo_string500: + .asciz "t1" # string offset=12047 +.Linfo_string501: + .asciz "t2" # string offset=12175 +.Linfo_string502: + .asciz "_STNt1|" # string offset=12303 +.Linfo_string503: + .asciz "_STNt2|" # string offset=12322 +.Linfo_string504: + .asciz "_STNt1|" # string offset=12341 +.Linfo_string505: + .asciz "_STNt2|" # string offset=12365 + .ident "clang version 14.0.0 (git@github.com:llvm/llvm-project.git 6d48e2505c7a68a470e75b61ad504d51db0f8a36)" + .section ".note.GNU-stack","",@progbits + .addrsig + .addrsig_sym _Zli5_suffy + .addrsig_sym _Z2f1IJiEEvv + .addrsig_sym _Z2f1IJfEEvv + .addrsig_sym _Z2f1IJbEEvv + .addrsig_sym _Z2f1IJdEEvv + .addrsig_sym _Z2f1IJlEEvv + .addrsig_sym _Z2f1IJsEEvv + .addrsig_sym _Z2f1IJjEEvv + .addrsig_sym _Z2f1IJyEEvv + .addrsig_sym _Z2f1IJxEEvv + .addrsig_sym _Z2f1IJ3udtEEvv + .addrsig_sym _Z2f1IJN2ns3udtEEEvv + .addrsig_sym _Z2f1IJPN2ns3udtEEEvv + .addrsig_sym _Z2f1IJN2ns5inner3udtEEEvv + .addrsig_sym _Z2f1IJ2t1IJiEEEEvv + .addrsig_sym _Z2f1IJifEEvv + .addrsig_sym _Z2f1IJPiEEvv + .addrsig_sym _Z2f1IJRiEEvv + .addrsig_sym _Z2f1IJOiEEvv + .addrsig_sym _Z2f1IJKiEEvv + .addrsig_sym _Z2f1IJvEEvv + .addrsig_sym _Z2f1IJN11outer_class11inner_classEEEvv + .addrsig_sym _Z2f1IJmEEvv + .addrsig_sym _Z2f2ILb1ELi3EEvv + .addrsig_sym _Z2f3IN2ns11EnumerationEJLS1_1ELS1_2EEEvv + .addrsig_sym _Z2f3IN2ns16EnumerationClassEJLS1_1ELS1_2EEEvv + .addrsig_sym _Z2f3IN2ns16EnumerationSmallEJLS1_255EEEvv + .addrsig_sym _Z2f3IN2ns3$_0EJLS1_1ELS1_2EEEvv + .addrsig_sym _Z2f3IPiJXadL_Z1iEEEEvv + .addrsig_sym _Z2f3IPiJLS0_0EEEvv + .addrsig_sym _Z2f3ImJLm1EEEvv + .addrsig_sym _Z2f3IyJLy1EEEvv + .addrsig_sym _Z2f3IlJLl1EEEvv + .addrsig_sym _Z2f3IjJLj1EEEvv + .addrsig_sym _Z2f3IsJLs1EEEvv + .addrsig_sym _Z2f3IhJLh0EEEvv + .addrsig_sym _Z2f3IaJLa0EEEvv + .addrsig_sym _Z2f3ItJLt1ELt2EEEvv + .addrsig_sym _Z2f3IcJLc0ELc1ELc6ELc7ELc13ELc14ELc31ELc32ELc33ELc127ELcn128EEEvv + .addrsig_sym _Z2f3InJLn18446744073709551614EEEvv + .addrsig_sym _Z2f4IjLj3EEvv + .addrsig_sym _Z2f1IJ2t3IiLb0EEEEvv + .addrsig_sym _Z2f1IJ2t3IS0_IiLb0EELb0EEEEvv + .addrsig_sym _Z2f1IJZ4mainE3$_1EEvv + .addrsig_sym _Z2f1IJFifEEEvv + .addrsig_sym _Z2f1IJRKiEEvv + .addrsig_sym _Z2f1IJRPKiEEvv + .addrsig_sym _Z2f1IJN12_GLOBAL__N_12t5EEEvv + .addrsig_sym _Z2f1IJDnEEvv + .addrsig_sym _Z2f1IJPlS0_EEvv + .addrsig_sym _Z2f1IJPlP3udtEEvv + .addrsig_sym _Z2f1IJKPvEEvv + .addrsig_sym _Z2f1IJPKPKvEEvv + .addrsig_sym _Z2f1IJFvvEEEvv + .addrsig_sym _Z2f1IJPFvvEEEvv + .addrsig_sym _Z2f1IJPZ4mainE3$_1EEvv + .addrsig_sym _Z2f1IJZ4mainE3$_2EEvv + .addrsig_sym _Z2f1IJPZ4mainE3$_2EEvv + .addrsig_sym _Z2f5IJ2t1IJiEEEiEvv + .addrsig_sym _Z2f5IJEiEvv + .addrsig_sym _Z2f6I2t1IJiEEJEEvv + .addrsig_sym _Z2f1IJEEvv + .addrsig_sym _Z2f1IJPKvS1_EEvv + .addrsig_sym _Z2f1IJP2t1IJPiEEEEvv + .addrsig_sym _Z2f1IJA_PiEEvv + .addrsig_sym _ZN2t6lsIiEEvi + .addrsig_sym _ZN2t6ltIiEEvi + .addrsig_sym _ZN2t6leIiEEvi + .addrsig_sym _ZN2t6cvP2t1IJfEEIiEEv + .addrsig_sym _ZN2t6miIiEEvi + .addrsig_sym _ZN2t6mlIiEEvi + .addrsig_sym _ZN2t6dvIiEEvi + .addrsig_sym _ZN2t6rmIiEEvi + .addrsig_sym _ZN2t6eoIiEEvi + .addrsig_sym _ZN2t6anIiEEvi + .addrsig_sym _ZN2t6orIiEEvi + .addrsig_sym _ZN2t6coIiEEvv + .addrsig_sym _ZN2t6ntIiEEvv + .addrsig_sym _ZN2t6aSIiEEvi + .addrsig_sym _ZN2t6gtIiEEvi + .addrsig_sym _ZN2t6cmIiEEvi + .addrsig_sym _ZN2t6clIiEEvv + .addrsig_sym _ZN2t6ixIiEEvi + .addrsig_sym _ZN2t6ssIiEEvi + .addrsig_sym _ZN2t6nwIiEEPvmT_ + .addrsig_sym _ZN2t6naIiEEPvmT_ + .addrsig_sym _ZN2t6dlIiEEvPvT_ + .addrsig_sym _ZN2t6daIiEEvPvT_ + .addrsig_sym _ZN2t6awIiEEiv + .addrsig_sym _Z2f1IJZ4mainE2t7EEvv + .addrsig_sym _Z2f1IJRA3_iEEvv + .addrsig_sym _Z2f1IJPA3_iEEvv + .addrsig_sym _Z2f7I2t1Evv + .addrsig_sym _Z2f8I2t1iEvv + .addrsig_sym _ZN2ns8ttp_userINS_5inner3ttpEEEvv + .addrsig_sym _Z2f1IJPiPDnEEvv + .addrsig_sym _Z2f1IJ2t7IiEEEvv + .addrsig_sym _Z2f7IN2ns3inl2t9EEvv + .addrsig_sym _Z2f1IJU7_AtomiciEEvv + .addrsig_sym _Z2f1IJilVcEEvv + .addrsig_sym _Z2f1IJDv2_iEEvv + .addrsig_sym _Z2f1IJVKPiEEvv + .addrsig_sym _Z2f1IJVKvEEvv + .addrsig_sym _Z2f1IJ2t1IJZ4mainE3$_1EEEEvv + .addrsig_sym _Z2f1IJM3udtKFvvEEEvv + .addrsig_sym _Z2f1IJM3udtVFvvREEEvv + .addrsig_sym _Z2f1IJM3udtVKFvvOEEEvv + .addrsig_sym _Z2f9IiEPFvvEv + .addrsig_sym _Z2f1IJKPFvvEEEvv + .addrsig_sym _Z2f1IJRA1_KcEEvv + .addrsig_sym _Z2f1IJKFvvREEEvv + .addrsig_sym _Z2f1IJVFvvOEEEvv + .addrsig_sym _Z2f1IJVKFvvEEEvv + .addrsig_sym _Z2f1IJA1_KPiEEvv + .addrsig_sym _Z2f1IJRA1_KPiEEvv + .addrsig_sym _Z2f1IJRKM3udtFvvEEEvv + .addrsig_sym _Z2f1IJFPFvfEiEEEvv + .addrsig_sym _Z2f1IJPDoFvvEEEvv + .addrsig_sym _Z2f1IJFvZ4mainE3$_2EEEvv + .addrsig_sym _Z2f1IJFvZ4mainE2t8Z4mainE3$_2EEEvv + .addrsig_sym _Z2f1IJFvZ4mainE2t8EEEvv + .addrsig_sym _Z19operator_not_reallyIiEvv + .addrsig_sym _Z2f1IJZN2t83memEvE2t7EEvv + .addrsig_sym _Z2f1IJM2t8FvvEEEvv + .section .debug_line,"",@progbits +.Lline_table_start0: diff --git a/llvm/test/tools/llvm-dwarfdump/XCOFF/basic.test b/llvm/test/tools/llvm-dwarfdump/XCOFF/basic.test index b9664c599b2ec..f1ff35a57df36 100644 --- a/llvm/test/tools/llvm-dwarfdump/XCOFF/basic.test +++ b/llvm/test/tools/llvm-dwarfdump/XCOFF/basic.test @@ -95,7 +95,7 @@ # DWARF32-NEXT: DW_AT_name ("__func__") # DWARF32-NEXT: DW_AT_decl_file ("/basic.c") # DWARF32-NEXT: DW_AT_decl_line (0) -# DWARF32-NEXT: DW_AT_type (0x0000005a "const char [5]") +# DWARF32-NEXT: DW_AT_type (0x0000005a "const char[5]") # DWARF32: 0x00000096: NULL # DWARF32: 0x00000097: NULL # DWARF32: .debug_line contents: @@ -225,7 +225,7 @@ # DWARF64-NEXT: DW_AT_name ("__func__") # DWARF64-NEXT: DW_AT_decl_file ("/basic.c") # DWARF64-NEXT: DW_AT_decl_line (0) -# DWARF64-NEXT: DW_AT_type (0x00000076 "const char [5]") +# DWARF64-NEXT: DW_AT_type (0x00000076 "const char[5]") # DWARF64: 0x000000ce: NULL # DWARF64: 0x000000cf: NULL # DWARF64: .debug_line contents: diff --git a/llvm/test/tools/llvm-objdump/ELF/ARM/debug-vars-dwarf4.s b/llvm/test/tools/llvm-objdump/ELF/ARM/debug-vars-dwarf4.s index 43a30aeaca6fb..9a8bc47b51774 100644 --- a/llvm/test/tools/llvm-objdump/ELF/ARM/debug-vars-dwarf4.s +++ b/llvm/test/tools/llvm-objdump/ELF/ARM/debug-vars-dwarf4.s @@ -8,26 +8,23 @@ ## Generated with this compile command, with the source code in Inputs/debug.c: ## clang --target=arm--none-eabi -march=armv7-a -c debug.c -O1 -gdwarf-4 -S -o - -# RUN: llvm-mc -triple armv8a--none-eabi < %s -filetype=obj | \ -# RUN: llvm-objdump - -d --debug-vars | \ +# RUN: llvm-mc -triple armv8a--none-eabi < %s -filetype=obj -o %t.o + +# RUN: llvm-objdump %t.o -d --debug-vars | \ # RUN: FileCheck %s --check-prefix=RAW --strict-whitespace ## Check that passing the default value for --debug-vars-indent (52) makes no ## change to the output. -# RUN: llvm-mc -triple armv8a--none-eabi < %s -filetype=obj | \ -# RUN: llvm-objdump - -d --debug-vars --debug-vars-indent=52 | \ +# RUN: llvm-objdump %t.o -d --debug-vars --debug-vars-indent=52 | \ # RUN: FileCheck %s --check-prefix=RAW --strict-whitespace -# RUN: llvm-mc -triple armv8a--none-eabi < %s -filetype=obj | \ -# RUN: llvm-objdump - -d --debug-vars --debug-vars-indent=30 | \ +# RUN: llvm-objdump %t.o -d --debug-vars --debug-vars-indent=30 | \ # RUN: FileCheck %s --check-prefix=INDENT --strict-whitespace -# RUN: llvm-mc -triple armv8a--none-eabi < %s -filetype=obj | \ -# RUN: llvm-objdump - -d --debug-vars --no-show-raw-insn | \ +# RUN: llvm-objdump %t.o -d --debug-vars --no-show-raw-insn | \ # RUN: FileCheck %s --check-prefix=NO-RAW --strict-whitespace -# RUN: llvm-mc -triple armv8a--none-eabi < %s -filetype=obj | \ -# RUN: llvm-objdump - -d --debug-vars --no-show-raw-insn --line-numbers | \ +# RUN: llvm-objdump %t.o -d --debug-vars --no-show-raw-insn --line-numbers | \ # RUN: FileCheck %s --check-prefix=LINE-NUMS --strict-whitespace # RUN: mkdir -p %t/a @@ -39,12 +36,12 @@ ## An optional argument to the --debug-vars= option can be used to switch ## between unicode and ascii output (with unicode being the default). -# RUN: llvm-mc -triple armv8a--none-eabi < %s -filetype=obj | \ -# RUN: llvm-objdump - -d --debug-vars=unicode | \ +# RUN: llvm-objdump %t.o -d --debug-vars=unicode | \ # RUN: FileCheck %s --check-prefix=RAW --strict-whitespace -# RUN: llvm-mc -triple armv8a--none-eabi < %s -filetype=obj | \ -# RUN: llvm-objdump - -d --debug-vars=ascii | \ +# RUN: llvm-objdump %t.o -d --debug-vars=ascii | \ # RUN: FileCheck %s --check-prefix=ASCII --strict-whitespace +# RUN: not llvm-objdump %t.o -d --debug-vars=bad_value 2>&1 | \ +# RUN: FileCheck %s --check-prefix=ERROR ## Note that llvm-objdump emits tab characters in the disassembly, assuming an ## 8-byte tab stop, so these might not look aligned in a text editor. @@ -146,6 +143,8 @@ # ASCII-NEXT: c: 01 00 80 e2 add r0, r0, #1 | # ASCII-NEXT: 10: 1e ff 2f e1 bx lr v +# ERROR: error: 'bad_value' is not a valid value for '--debug-vars=' + .text .syntax unified .eabi_attribute 67, "2.09" diff --git a/llvm/test/tools/llvm-objdump/dwarf_invalid.yaml b/llvm/test/tools/llvm-objdump/dwarf_invalid.yaml new file mode 100644 index 0000000000000..531a4c7716542 --- /dev/null +++ b/llvm/test/tools/llvm-objdump/dwarf_invalid.yaml @@ -0,0 +1,12 @@ +## Test invalid use of the --dwarf option. + +# RUN: yaml2obj %s -o %t +# RUN: not llvm-objdump --dwarf=bad_value %t 2>&1 | FileCheck %s --check-prefix=ERROR + +# ERROR: error: 'bad_value' is not a valid value for '--dwarf=' + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_REL diff --git a/llvm/test/tools/llvm-profdata/cs-sample-profile.test b/llvm/test/tools/llvm-profdata/cs-sample-profile.test index 04c573ddece38..ce69a1ffd61a6 100644 --- a/llvm/test/tools/llvm-profdata/cs-sample-profile.test +++ b/llvm/test/tools/llvm-profdata/cs-sample-profile.test @@ -2,3 +2,5 @@ RUN: llvm-profdata merge --sample --text -output=%t.proftext %S/Inputs/cs-sample RUN: diff -b %t.proftext %S/Inputs/cs-sample.proftext RUN: llvm-profdata merge --sample --extbinary %p/Inputs/cs-sample.proftext -o %t.prof && llvm-profdata merge --sample --text %t.prof -o %t1.proftext RUN: diff -b %t1.proftext %S/Inputs/cs-sample.proftext +RUN: llvm-profdata show --sample -show-sec-info-only %t.prof | FileCheck %s +CHECK: FunctionMetadata {{.*}} Flags: {attr} diff --git a/llvm/test/tools/llvm-profdata/merge-probe-profile.test b/llvm/test/tools/llvm-profdata/merge-probe-profile.test index 448755f89dd65..d05c950728747 100644 --- a/llvm/test/tools/llvm-profdata/merge-probe-profile.test +++ b/llvm/test/tools/llvm-profdata/merge-probe-profile.test @@ -22,3 +22,6 @@ MERGE2: 4: 26 MERGE2: 5: 14 _Z3foov:10 _Z3barv:4 MERGE2: 6: 12 _Z3barv:8 _Z3foov:4 MERGE2: !CFGChecksum: 563022570642068 + +RUN: llvm-profdata show --sample -show-sec-info-only %t | FileCheck %s +CHECK: FunctionMetadata {{.*}} Flags: {probe} \ No newline at end of file diff --git a/llvm/test/tools/llvm-profgen/Inputs/out-of-bounds.raw.prof b/llvm/test/tools/llvm-profgen/Inputs/out-of-bounds.raw.prof new file mode 100644 index 0000000000000..b7c477f2eee62 --- /dev/null +++ b/llvm/test/tools/llvm-profgen/Inputs/out-of-bounds.raw.prof @@ -0,0 +1,5 @@ +3 +0-0:1 +f-fff0:1 +ffff-ffff:1 +0 diff --git a/llvm/test/tools/llvm-profgen/cs-invalid-ret-addr.test b/llvm/test/tools/llvm-profgen/cs-invalid-ret-addr.test index 3eac05e65f8bf..fb327e53b953d 100644 --- a/llvm/test/tools/llvm-profgen/cs-invalid-ret-addr.test +++ b/llvm/test/tools/llvm-profgen/cs-invalid-ret-addr.test @@ -1,4 +1,4 @@ ; REQUIRES: x86_64-linux -; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/cs-invalid-ret-addr.perfscript --binary=%S/Inputs/noinline-cs-noprobe.perfbin --output=%t 2>&1 | FileCheck %s +; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/cs-invalid-ret-addr.perfscript --binary=%S/Inputs/noinline-cs-noprobe.perfbin --output=%t --show-detailed-warning 2>&1 | FileCheck %s ; CHECK: warning: Truncated stack sample due to invalid return address at 0x400686, likely caused by frame pointer omission diff --git a/llvm/test/tools/llvm-profgen/inline-noprobe.test b/llvm/test/tools/llvm-profgen/inline-noprobe.test index 5203a5968ff94..9d1473e097417 100644 --- a/llvm/test/tools/llvm-profgen/inline-noprobe.test +++ b/llvm/test/tools/llvm-profgen/inline-noprobe.test @@ -6,6 +6,11 @@ ; RUN: FileCheck %s --input-file %t1 --check-prefix=CHECK ; RUN: llvm-profgen --format=text --use-dwarf-correlation --perfscript=%S/Inputs/inline-noprobe.perfscript --binary=%S/Inputs/inline-noprobe.perfbin --output=%t ; RUN: FileCheck %s --input-file %t --check-prefix=CHECK +; RUN: echo -e "0\n0" > %t +; RUN: llvm-profgen --format=text --unsymbolized-profile=%t --binary=%S/Inputs/inline-noprobe.perfbin --output=%t1 --fill-zero-for-all-funcs +; RUN: FileCheck %s --input-file %t1 --check-prefix=CHECK-ALL-ZERO +; RUN: llvm-profgen --format=text --unsymbolized-profile=%S/Inputs/out-of-bounds.raw.prof --binary=%S/Inputs/inline-noprobe.perfbin --output=%t1 +; RUN: FileCheck %s --input-file %t1 --check-prefix=CHECK-OB CHECK: main:188:0 CHECK: 0: 0 @@ -20,6 +25,33 @@ CHECK: 1: 42 CHECK: 3.2: bar:21 CHECK: 1: 21 +CHECK-ALL-ZERO: bar:0:0 +CHECK-ALL-ZERO: 1: 0 +CHECK-ALL-ZERO: 5: 0 +CHECK-ALL-ZERO: foo:0:0 +CHECK-ALL-ZERO: 0: 0 +CHECK-ALL-ZERO: 2.1: 0 +CHECK-ALL-ZERO: 3: 0 +CHECK-ALL-ZERO: 3.2: 0 +CHECK-ALL-ZERO: 4: 0 +CHECK-ALL-ZERO: 3.1: bar:0 +CHECK-ALL-ZERO: 1: 0 +CHECK-ALL-ZERO: 3.2: bar:0 +CHECK-ALL-ZERO: 1: 0 +CHECK-ALL-ZERO: 7: 0 +CHECK-ALL-ZERO: main:0:0 +CHECK-ALL-ZERO: 0: 0 +CHECK-ALL-ZERO: 2: 0 +CHECK-ALL-ZERO: 1: foo:0 +CHECK-ALL-ZERO: 2.1: 0 +CHECK-ALL-ZERO: 3: 0 +CHECK-ALL-ZERO: 3.2: 0 +CHECK-ALL-ZERO: 4: 0 +CHECK-ALL-ZERO: 3.1: bar:0 +CHECK-ALL-ZERO: 1: 0 +CHECK-ALL-ZERO: 3.2: bar:0 +CHECK-ALL-ZERO: 1: 0 + CHECK-RAW-PROFILE: 3 CHECK-RAW-PROFILE-NEXT: 650-691:21 CHECK-RAW-PROFILE-NEXT: 669-677:20 @@ -28,6 +60,33 @@ CHECK-RAW-PROFILE-NEXT: 2 CHECK-RAW-PROFILE-NEXT: 677->650:21 CHECK-RAW-PROFILE-NEXT: 691->669:43 +;CHECK-OB: foo:8:0 +;CHECK-OB: 0: 1 +;CHECK-OB: 2.1: 1 +;CHECK-OB: 3: 1 +;CHECK-OB: 3.2: 1 +;CHECK-OB: 4: 1 +;CHECK-OB: 3.1: bar:1 +;CHECK-OB: 1: 1 +;CHECK-OB: 3.2: bar:2 +;CHECK-OB: 1: 1 +;CHECK-OB: 7: 1 +;CHECK-OB: main:8:0 +;CHECK-OB: 0: 1 +;CHECK-OB: 2: 1 +;CHECK-OB: 1: foo:6 +;CHECK-OB: 2.1: 1 +;CHECK-OB: 3: 1 +;CHECK-OB: 3.2: 1 +;CHECK-OB: 4: 1 +;CHECK-OB: 3.1: bar:1 +;CHECK-OB: 1: 1 +;CHECK-OB: 3.2: bar:1 +;CHECK-OB: 1: 1 +;CHECK-OB: bar:2:0 +;CHECK-OB: 1: 1 +;CHECK-OB: 5: 1 + ; original code: ; clang -O3 -g -fdebug-info-for-profiling test.c -o a.out #include diff --git a/llvm/test/tools/llvm-readobj/ELF/dynamic-tags.test b/llvm/test/tools/llvm-readobj/ELF/dynamic-tags.test index b830079129147..5610ed872df5c 100644 --- a/llvm/test/tools/llvm-readobj/ELF/dynamic-tags.test +++ b/llvm/test/tools/llvm-readobj/ELF/dynamic-tags.test @@ -44,9 +44,9 @@ # LLVM64-NEXT: 0x0000000000000020 PREINIT_ARRAY 0x1000 # LLVM64-NEXT: 0x0000000000000021 PREINIT_ARRAYSZ 16 (bytes) # LLVM64-NEXT: 0x0000000000000022 SYMTAB_SHNDX 0x1000 -# LLVM64-NEXT: 0x0000000000000023 RELRSZ 0x10 +# LLVM64-NEXT: 0x0000000000000023 RELRSZ 16 (bytes) # LLVM64-NEXT: 0x0000000000000024 RELR 0x1000 -# LLVM64-NEXT: 0x0000000000000025 RELRENT 0x4321 +# LLVM64-NEXT: 0x0000000000000025 RELRENT 17185 (bytes) # LLVM64-NEXT: 0x000000006000000F ANDROID_REL 0x1000 # LLVM64-NEXT: 0x0000000060000010 ANDROID_RELSZ 16 (bytes) # LLVM64-NEXT: 0x0000000060000011 ANDROID_RELA 0x1000 @@ -109,9 +109,9 @@ # GNU64-NEXT: 0x0000000000000020 (PREINIT_ARRAY) 0x1000 # GNU64-NEXT: 0x0000000000000021 (PREINIT_ARRAYSZ) 16 (bytes) # GNU64-NEXT: 0x0000000000000022 (SYMTAB_SHNDX) 0x1000 -# GNU64-NEXT: 0x0000000000000023 (RELRSZ) 0x10 +# GNU64-NEXT: 0x0000000000000023 (RELRSZ) 16 (bytes) # GNU64-NEXT: 0x0000000000000024 (RELR) 0x1000 -# GNU64-NEXT: 0x0000000000000025 (RELRENT) 0x4321 +# GNU64-NEXT: 0x0000000000000025 (RELRENT) 17185 (bytes) # GNU64-NEXT: 0x000000006000000f (ANDROID_REL) 0x1000 # GNU64-NEXT: 0x0000000060000010 (ANDROID_RELSZ) 16 (bytes) # GNU64-NEXT: 0x0000000060000011 (ANDROID_RELA) 0x1000 @@ -335,9 +335,9 @@ ProgramHeaders: # LLVM32-NEXT: 0x00000020 PREINIT_ARRAY 0x1000 # LLVM32-NEXT: 0x00000021 PREINIT_ARRAYSZ 16 (bytes) # LLVM32-NEXT: 0x00000022 SYMTAB_SHNDX 0x1000 -# LLVM32-NEXT: 0x00000023 RELRSZ 0x10 +# LLVM32-NEXT: 0x00000023 RELRSZ 16 (bytes) # LLVM32-NEXT: 0x00000024 RELR 0x1000 -# LLVM32-NEXT: 0x00000025 RELRENT 0x4321 +# LLVM32-NEXT: 0x00000025 RELRENT 17185 (bytes) # LLVM32-NEXT: 0x6000000F ANDROID_REL 0x1000 # LLVM32-NEXT: 0x60000010 ANDROID_RELSZ 16 (bytes) # LLVM32-NEXT: 0x60000011 ANDROID_RELA 0x1000 @@ -400,9 +400,9 @@ ProgramHeaders: # GNU32-NEXT: 0x00000020 (PREINIT_ARRAY) 0x1000 # GNU32-NEXT: 0x00000021 (PREINIT_ARRAYSZ) 16 (bytes) # GNU32-NEXT: 0x00000022 (SYMTAB_SHNDX) 0x1000 -# GNU32-NEXT: 0x00000023 (RELRSZ) 0x10 +# GNU32-NEXT: 0x00000023 (RELRSZ) 16 (bytes) # GNU32-NEXT: 0x00000024 (RELR) 0x1000 -# GNU32-NEXT: 0x00000025 (RELRENT) 0x4321 +# GNU32-NEXT: 0x00000025 (RELRENT) 17185 (bytes) # GNU32-NEXT: 0x6000000f (ANDROID_REL) 0x1000 # GNU32-NEXT: 0x60000010 (ANDROID_RELSZ) 16 (bytes) # GNU32-NEXT: 0x60000011 (ANDROID_RELA) 0x1000 @@ -530,9 +530,9 @@ Sections: # PHENTSIZE-LLVM-NEXT: 0x0000000000000020 PREINIT_ARRAY 0x1000 # PHENTSIZE-LLVM-NEXT: 0x0000000000000021 PREINIT_ARRAYSZ 16 (bytes) # PHENTSIZE-LLVM-NEXT: 0x0000000000000022 SYMTAB_SHNDX 0x1000 -# PHENTSIZE-LLVM-NEXT: 0x0000000000000023 RELRSZ 0x10 +# PHENTSIZE-LLVM-NEXT: 0x0000000000000023 RELRSZ 16 (bytes) # PHENTSIZE-LLVM-NEXT: 0x0000000000000024 RELR 0x1000 -# PHENTSIZE-LLVM-NEXT: 0x0000000000000025 RELRENT 0x4321 +# PHENTSIZE-LLVM-NEXT: 0x0000000000000025 RELRENT 17185 (bytes) # PHENTSIZE-LLVM-NEXT: 0x000000006000000F ANDROID_REL 0x1000 # PHENTSIZE-LLVM-NEXT: 0x0000000060000010 ANDROID_RELSZ 16 (bytes) # PHENTSIZE-LLVM-NEXT: 0x0000000060000011 ANDROID_RELA 0x1000 @@ -596,9 +596,9 @@ Sections: # PHENTSIZE-GNU-NEXT: 0x0000000000000020 (PREINIT_ARRAY) 0x1000 # PHENTSIZE-GNU-NEXT: 0x0000000000000021 (PREINIT_ARRAYSZ) 16 (bytes) # PHENTSIZE-GNU-NEXT: 0x0000000000000022 (SYMTAB_SHNDX) 0x1000 -# PHENTSIZE-GNU-NEXT: 0x0000000000000023 (RELRSZ) 0x10 +# PHENTSIZE-GNU-NEXT: 0x0000000000000023 (RELRSZ) 16 (bytes) # PHENTSIZE-GNU-NEXT: 0x0000000000000024 (RELR) 0x1000 -# PHENTSIZE-GNU-NEXT: 0x0000000000000025 (RELRENT) 0x4321 +# PHENTSIZE-GNU-NEXT: 0x0000000000000025 (RELRENT) 17185 (bytes) # PHENTSIZE-GNU-NEXT: 0x000000006000000f (ANDROID_REL) 0x1000 # PHENTSIZE-GNU-NEXT: 0x0000000060000010 (ANDROID_RELSZ) 16 (bytes) # PHENTSIZE-GNU-NEXT: 0x0000000060000011 (ANDROID_RELA) 0x1000 diff --git a/llvm/test/tools/llvm-readobj/ELF/note-openbsd-core.test b/llvm/test/tools/llvm-readobj/ELF/note-openbsd-core.test new file mode 100644 index 0000000000000..0ccb55a8d6f33 --- /dev/null +++ b/llvm/test/tools/llvm-readobj/ELF/note-openbsd-core.test @@ -0,0 +1,69 @@ +## Test that note values are interpreted correctly for OpenBSD core files. +# RUN: yaml2obj %s -o %t.o +# RUN: llvm-readelf --notes %t.o | FileCheck %s --check-prefix=GNU --strict-whitespace +# RUN: llvm-readobj --notes %t.o | FileCheck %s --check-prefix=LLVM --strict-whitespace + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_CORE +Sections: + - Name: .note.foo + Type: SHT_NOTE + Notes: + - Name: OpenBSD + Type: NT_OPENBSD_PROCINFO + - Name: OpenBSD + Type: NT_OPENBSD_AUXV + - Name: OpenBSD + Type: NT_OPENBSD_WCOOKIE + - Name: OpenBSD@31337 + Type: NT_OPENBSD_REGS + - Name: OpenBSD@31337 + Type: NT_OPENBSD_FPREGS +ProgramHeaders: + - Type: PT_NOTE + FirstSec: .note.foo + LastSec: .note.foo + +# GNU: Displaying notes found at file offset 0x00000078 with length 0x00000074: +# GNU-NEXT: Owner Data size Description +# GNU-NEXT: OpenBSD 0x00000000 NT_OPENBSD_PROCINFO (procinfo structure) +# GNU-NEXT: OpenBSD 0x00000000 NT_OPENBSD_AUXV (ELF auxiliary vector data) +# GNU-NEXT: OpenBSD 0x00000000 NT_OPENBSD_WCOOKIE (window cookie) +# GNU-NEXT: OpenBSD@31337 0x00000000 NT_OPENBSD_REGS (regular registers) +# GNU-NEXT: OpenBSD@31337 0x00000000 NT_OPENBSD_FPREGS (floating point registers) + +# LLVM: Notes [ +# LLVM-NEXT: NoteSection { +# LLVM-NEXT: Name: +# LLVM-NEXT: Offset: 0x78 +# LLVM-NEXT: Size: 0x74 +# LLVM-NEXT: Note { +# LLVM-NEXT: Owner: OpenBSD +# LLVM-NEXT: Data size: 0x0 +# LLVM-NEXT: Type: NT_OPENBSD_PROCINFO (procinfo structure) +# LLVM-NEXT: } +# LLVM-NEXT: Note { +# LLVM-NEXT: Owner: OpenBSD +# LLVM-NEXT: Data size: 0x0 +# LLVM-NEXT: Type: NT_OPENBSD_AUXV (ELF auxiliary vector data) +# LLVM-NEXT: } +# LLVM-NEXT: Note { +# LLVM-NEXT: Owner: OpenBSD +# LLVM-NEXT: Data size: 0x0 +# LLVM-NEXT: Type: NT_OPENBSD_WCOOKIE (window cookie) +# LLVM-NEXT: } +# LLVM-NEXT: Note { +# LLVM-NEXT: Owner: OpenBSD@31337 +# LLVM-NEXT: Data size: 0x0 +# LLVM-NEXT: Type: NT_OPENBSD_REGS (regular registers) +# LLVM-NEXT: } +# LLVM-NEXT: Note { +# LLVM-NEXT: Owner: OpenBSD@31337 +# LLVM-NEXT: Data size: 0x0 +# LLVM-NEXT: Type: NT_OPENBSD_FPREGS (floating point registers) +# LLVM-NEXT: } +# LLVM-NEXT: } +# LLVM-NEXT: ] diff --git a/llvm/test/tools/llvm-readobj/XCOFF/Inputs/basic-neg-sym-count.o b/llvm/test/tools/llvm-readobj/XCOFF/Inputs/basic-neg-sym-count.o deleted file mode 100644 index e7f63664acd33..0000000000000 Binary files a/llvm/test/tools/llvm-readobj/XCOFF/Inputs/basic-neg-sym-count.o and /dev/null differ diff --git a/llvm/test/tools/llvm-readobj/XCOFF/Inputs/basic-neg-time.o b/llvm/test/tools/llvm-readobj/XCOFF/Inputs/basic-neg-time.o deleted file mode 100644 index f814965f4f0f6..0000000000000 Binary files a/llvm/test/tools/llvm-readobj/XCOFF/Inputs/basic-neg-time.o and /dev/null differ diff --git a/llvm/test/tools/llvm-readobj/XCOFF/Inputs/basic.o b/llvm/test/tools/llvm-readobj/XCOFF/Inputs/basic.o deleted file mode 100644 index c84056bdb00bd..0000000000000 Binary files a/llvm/test/tools/llvm-readobj/XCOFF/Inputs/basic.o and /dev/null differ diff --git a/llvm/test/tools/llvm-readobj/XCOFF/Inputs/basic64.o b/llvm/test/tools/llvm-readobj/XCOFF/Inputs/basic64.o deleted file mode 100644 index d6c79abd8cfa6..0000000000000 Binary files a/llvm/test/tools/llvm-readobj/XCOFF/Inputs/basic64.o and /dev/null differ diff --git a/llvm/test/tools/llvm-readobj/XCOFF/basic.test b/llvm/test/tools/llvm-readobj/XCOFF/basic.test deleted file mode 100644 index 9d9917274e84c..0000000000000 --- a/llvm/test/tools/llvm-readobj/XCOFF/basic.test +++ /dev/null @@ -1,160 +0,0 @@ -# RUN: llvm-readobj --file-header %p/Inputs/basic.o | \ -# RUN: FileCheck --check-prefix=FILEHEADER %s -# -# RUN: llvm-readobj --file-header %p/Inputs/basic64.o | \ -# RUN: FileCheck --check-prefix=FILEHEADER64 %s - -# RUN: llvm-readobj --file-header %p/Inputs/basic-neg-time.o | \ -# RUN: FileCheck --check-prefix=NEGTIME %s - -# RUN: llvm-readobj --file-header %p/Inputs/basic-neg-sym-count.o | \ -# RUN: FileCheck --check-prefix=NEGSYMCOUNT %s - -# RUN: llvm-readobj --relocs --expand-relocs %p/Inputs/basic.o | \ -# RUN: FileCheck --check-prefix=RELOCSEXP %s - -# FILEHEADER: File: {{.*}}basic.o -# FILEHEADER-NEXT: Format: aixcoff-rs6000 -# FILEHEADER-NEXT: Arch: powerpc -# FILEHEADER-NEXT: AddressSize: 32bit -# FILEHEADER-NEXT: FileHeader { -# FILEHEADER-NEXT: Magic: 0x1DF -# FILEHEADER-NEXT: NumberOfSections: 6 -# FILEHEADER-NEXT: TimeStamp: 2019-03-12T14:04:43Z (0x5C87BC7B) -# FILEHEADER-NEXT: SymbolTableOffset: 0x52E -# FILEHEADER-NEXT: SymbolTableEntries: 120 -# FILEHEADER-NEXT: OptionalHeaderSize: 0x1C -# FILEHEADER-NEXT: Flags: 0x0 -# FILEHEADER-NEXT: } - -# FILEHEADER64: File: {{.*}}basic64.o -# FILEHEADER64-NEXT: Format: aix5coff64-rs6000 -# FILEHEADER64-NEXT: Arch: powerpc64 -# FILEHEADER64-NEXT: AddressSize: 64bit -# FILEHEADER64-NEXT: FileHeader { -# FILEHEADER64-NEXT: Magic: 0x1F7 -# FILEHEADER64-NEXT: NumberOfSections: 5 -# FILEHEADER64-NEXT: TimeStamp: 2019-03-18T20:03:47Z (0x5C8FF9A3) -# FILEHEADER64-NEXT: SymbolTableOffset: 0x54C -# FILEHEADER64-NEXT: SymbolTableEntries: 58 -# FILEHEADER64-NEXT: OptionalHeaderSize: 0x0 -# FILEHEADER64-NEXT: Flags: 0x0 -# FILEHEADER64-NEXT: } - -# NEGTIME: File: {{.*}}basic-neg-time.o -# NEGTIME-NEXT: Format: aixcoff-rs6000 -# NEGTIME-NEXT: Arch: powerpc -# NEGTIME-NEXT: AddressSize: 32bit -# NEGTIME-NEXT: FileHeader { -# NEGTIME-NEXT: Magic: 0x1DF -# NEGTIME-NEXT: NumberOfSections: 6 -# NEGTIME-NEXT: TimeStamp: Reserved Value (0xDC87BC7B) -# NEGTIME-NEXT: SymbolTableOffset: 0x52E -# NEGTIME-NEXT: SymbolTableEntries: 120 -# NEGTIME-NEXT: OptionalHeaderSize: 0x1C -# NEGTIME-NEXT: Flags: 0x0 -# NEGTIME-NEXT: } - -# NEGSYMCOUNT: File: {{.*}}basic-neg-sym-count.o -# NEGSYMCOUNT-NEXT: Format: aixcoff-rs6000 -# NEGSYMCOUNT-NEXT: Arch: powerpc -# NEGSYMCOUNT-NEXT: AddressSize: 32bit -# NEGSYMCOUNT-NEXT: FileHeader { -# NEGSYMCOUNT-NEXT: Magic: 0x1DF -# NEGSYMCOUNT-NEXT: NumberOfSections: 5 -# NEGSYMCOUNT-NEXT: TimeStamp: 2019-03-12T14:04:43Z (0x5C87BC7B) -# NEGSYMCOUNT-NEXT: SymbolTableOffset: 0x0 -# NEGSYMCOUNT-NEXT: SymbolTableEntries: Reserved Value (0x80000000) -# NEGSYMCOUNT-NEXT: OptionalHeaderSize: 0x1C -# NEGSYMCOUNT-NEXT: Flags: 0xD -# NEGSYMCOUNT-NEXT: } - -# xcoff-basic.o was compiled with `xlc -qtls -O3 -g -c xcoff-basic.c` -# from the following source: -# int a = 55; -# int b; -# __thread int j = 55; -# __thread double d; -# int A() { return a; } -# int B() { return b; } -# int J() { return j; } -# double D() { return d; } -# -# xcoff-basic-neg-time.o was manually edited to include a negative time stamp. -# xcoff-basic-neg-sym-count.o was stripped using the 'strip' utility, and -# manually edited to have a negative symbol table entry count. - -# RELOCSEXP: File: {{.*}}basic.o -# RELOCSEXP-NEXT: Format: aixcoff-rs6000 -# RELOCSEXP-NEXT: Arch: powerpc -# RELOCSEXP-NEXT: AddressSize: 32bit -# RELOCSEXP-NEXT: Relocations [ -# RELOCSEXP-NEXT: Section (index: 1) .text { -# RELOCSEXP-NEXT: Relocation { -# RELOCSEXP-NEXT: Virtual Address: 0x2 -# RELOCSEXP-NEXT: Symbol: a (85) -# RELOCSEXP-NEXT: IsSigned: Yes -# RELOCSEXP-NEXT: FixupBitValue: 0 -# RELOCSEXP-NEXT: Length: 16 -# RELOCSEXP-NEXT: Type: R_TOC (0x3) -# RELOCSEXP-NEXT: } - -# RELOCSEXP: Virtual Address: 0x90 -# RELOCSEXP-NEXT: Symbol: .__tls_get_addr (118) -# RELOCSEXP-NEXT: IsSigned: Yes -# RELOCSEXP-NEXT: FixupBitValue: 0 -# RELOCSEXP-NEXT: Length: 26 -# RELOCSEXP-NEXT: Type: R_RBA (0x18) -# RELOCSEXP-NEXT: } -# RELOCSEXP-NEXT: } -# RELOCSEXP-NEXT: Section (index: 2) .data { -# RELOCSEXP-NEXT: Relocation { -# RELOCSEXP-NEXT: Virtual Address: 0x100 -# RELOCSEXP-NEXT: Symbol: A (78) -# RELOCSEXP-NEXT: IsSigned: No -# RELOCSEXP-NEXT: FixupBitValue: 0 -# RELOCSEXP-NEXT: Length: 32 -# RELOCSEXP-NEXT: Type: R_POS (0x0) -# RELOCSEXP-NEXT: } - -# RELOCSEXP: Virtual Address: 0x110 -# RELOCSEXP-NEXT: Symbol: J (96) -# RELOCSEXP-NEXT: IsSigned: No -# RELOCSEXP-NEXT: FixupBitValue: 0 -# RELOCSEXP-NEXT: Length: 32 -# RELOCSEXP-NEXT: Type: R_POS (0x0) -# RELOCSEXP-NEXT: } - -# RELOCSEXP: Virtual Address: 0x114 -# RELOCSEXP-NEXT: Symbol: j (100) -# RELOCSEXP-NEXT: IsSigned: No -# RELOCSEXP-NEXT: FixupBitValue: 0 -# RELOCSEXP-NEXT: Length: 32 -# RELOCSEXP-NEXT: Type: R_TLS (0x20) -# RELOCSEXP-NEXT: } - -# RELOCSEXP: Virtual Address: 0x124 -# RELOCSEXP-NEXT: Symbol: d (111) -# RELOCSEXP-NEXT: IsSigned: No -# RELOCSEXP-NEXT: FixupBitValue: 0 -# RELOCSEXP-NEXT: Length: 32 -# RELOCSEXP-NEXT: Type: R_TLSM (0x24) -# RELOCSEXP-NEXT: } - -# RELOCSEXP: Virtual Address: 0x128 -# RELOCSEXP-NEXT: Symbol: (76) -# RELOCSEXP-NEXT: IsSigned: No -# RELOCSEXP-NEXT: FixupBitValue: 0 -# RELOCSEXP-NEXT: Length: 32 -# RELOCSEXP-NEXT: Type: R_POS (0x0) -# RELOCSEXP-NEXT: } - -# RELOCSEXP: Virtual Address: 0x154 -# RELOCSEXP-NEXT: Symbol: TOC (72) -# RELOCSEXP-NEXT: IsSigned: No -# RELOCSEXP-NEXT: FixupBitValue: 0 -# RELOCSEXP-NEXT: Length: 32 -# RELOCSEXP-NEXT: Type: R_POS (0x0) -# RELOCSEXP-NEXT: } -# RELOCSEXP-NEXT: } -# RELOCSEXP-NEXT:] diff --git a/llvm/test/tools/llvm-readobj/XCOFF/file-header.test b/llvm/test/tools/llvm-readobj/XCOFF/file-header.test new file mode 100644 index 0000000000000..b7727dcf1d78a --- /dev/null +++ b/llvm/test/tools/llvm-readobj/XCOFF/file-header.test @@ -0,0 +1,65 @@ +## This is a general test for the --file-header option. + +# RUN: yaml2obj %s -o %t1 +# RUN: llvm-readobj %t1 --file-header | \ +# RUN: FileCheck %s --strict-whitespace --match-full-lines --check-prefix=FILEHEADER32 + +# FILEHEADER32:Format: aixcoff-rs6000 +# FILEHEADER32-NEXT:Arch: powerpc +# FILEHEADER32-NEXT:AddressSize: 32bit +# FILEHEADER32-NEXT:FileHeader { +# FILEHEADER32-NEXT: Magic: 0x1DF +# FILEHEADER32-NEXT: NumberOfSections: 1 +# FILEHEADER32-NEXT: TimeStamp: 1970-01-01T00:00:01Z (0x1) +# FILEHEADER32-NEXT: SymbolTableOffset: 0x3C +# FILEHEADER32-NEXT: SymbolTableEntries: 1 +# FILEHEADER32-NEXT: OptionalHeaderSize: 0x0 +# FILEHEADER32-NEXT: Flags: 0x12 +# FILEHEADER32-NEXT:} + +--- !XCOFF +FileHeader: + MagicNumber: [[MAGIC=0x01DF]] + CreationTime: [[CREATTIME=1]] + EntriesInSymbolTable: [[SYMBOLCOUNT=1]] + NumberOfSections: 1 + OffsetToSymbolTable: 0x3C + AuxiliaryHeaderSize: 0 + Flags: 0x12 +Sections: + - Name: .text +Symbols: + - Name: foo + +# RUN: yaml2obj -DMAGIC=0x01F7 -DCREATTIME=0 %s -o %t2 +# RUN: llvm-readobj %t2 --file-header | \ +# RUN: FileCheck %s --strict-whitespace --match-full-lines --check-prefix=FILEHEADER64 + +# FILEHEADER64:Format: aix5coff64-rs6000 +# FILEHEADER64-NEXT:Arch: powerpc64 +# FILEHEADER64-NEXT:AddressSize: 64bit +# FILEHEADER64-NEXT:FileHeader { +# FILEHEADER64-NEXT: Magic: 0x1F7 +# FILEHEADER64-NEXT: NumberOfSections: 1 +# FILEHEADER64-NEXT: TimeStamp: None (0x0) +# FILEHEADER64-NEXT: SymbolTableOffset: 0x3C +# FILEHEADER64-NEXT: SymbolTableEntries: 1 +# FILEHEADER64-NEXT: OptionalHeaderSize: 0x0 +# FILEHEADER64-NEXT: Flags: 0x12 +# FILEHEADER64-NEXT:} + +# RUN: yaml2obj -DCREATTIME=-1 %s -o %t3 +# RUN: llvm-readobj %t3 --file-header | \ +# RUN: FileCheck %s --strict-whitespace --match-full-lines --check-prefix=NEGTIME + +# NEGTIME:FileHeader { +# NEGTIME: TimeStamp: Reserved Value (0xFFFFFFFF) +# NEGTIME:} + +# RUN: yaml2obj -DSYMBOLCOUNT=-1 %s -o %t4 +# RUN: llvm-readobj %t4 --file-header | \ +# RUN: FileCheck %s --strict-whitespace --match-full-lines --check-prefix=NEGSYMCOUNT + +# NEGSYMCOUNT:FileHeader { +# NEGSYMCOUNT: SymbolTableEntries: Reserved Value (0xFFFFFFFF) +# NEGSYMCOUNT:} diff --git a/llvm/test/tools/llvm-readobj/XCOFF/relocations.test b/llvm/test/tools/llvm-readobj/XCOFF/relocations.test new file mode 100644 index 0000000000000..9e327c4fbbdcc --- /dev/null +++ b/llvm/test/tools/llvm-readobj/XCOFF/relocations.test @@ -0,0 +1,78 @@ +## Test how relocations are dumped. + +# RUN: yaml2obj %s -o %t +# RUN: llvm-readobj --relocs --expand-relocs %t | \ +# RUN: FileCheck %s --strict-whitespace --match-full-lines --check-prefix=RELOCSEXP +# RUN: llvm-readobj --relocs %t | \ +# RUN: FileCheck %s --strict-whitespace --match-full-lines --check-prefix=RELOCS + +# RELOCSEXP:Relocations [ +# RELOCSEXP-NEXT: Section (index: 1) .text { +# RELOCSEXP-NEXT: Relocation { +# RELOCSEXP-NEXT: Virtual Address: 0x80 +# RELOCSEXP-NEXT: Symbol: foo (0) +# RELOCSEXP-NEXT: IsSigned: No +# RELOCSEXP-NEXT: FixupBitValue: 0 +# RELOCSEXP-NEXT: Length: 22 +# RELOCSEXP-NEXT: Type: R_POS (0x0) +# RELOCSEXP-NEXT: } +# RELOCSEXP-NEXT: Relocation { +# RELOCSEXP-NEXT: Virtual Address: 0x100 +# RELOCSEXP-NEXT: Symbol: foo (0) +# RELOCSEXP-NEXT: IsSigned: No +# RELOCSEXP-NEXT: FixupBitValue: 0 +# RELOCSEXP-NEXT: Length: 21 +# RELOCSEXP-NEXT: Type: R_REL (0x2) +# RELOCSEXP-NEXT: } +# RELOCSEXP-NEXT: } +# RELOCSEXP-NEXT: Section (index: 2) .data { +# RELOCSEXP-NEXT: Relocation { +# RELOCSEXP-NEXT: Virtual Address: 0x200 +# RELOCSEXP-NEXT: Symbol: bar (1) +# RELOCSEXP-NEXT: IsSigned: No +# RELOCSEXP-NEXT: FixupBitValue: 0 +# RELOCSEXP-NEXT: Length: 20 +# RELOCSEXP-NEXT: Type: R_TOC (0x3) +# RELOCSEXP-NEXT: } +# RELOCSEXP-NEXT: } +# RELOCSEXP-NEXT:] + +# RELOCS:Relocations [ +# RELOCS-NEXT: Section (index: 1) .text { +# RELOCS-NEXT: 0x80 R_POS foo(0) 0x15 +# RELOCS-NEXT: 0x100 R_REL foo(0) 0x14 +# RELOCS-NEXT: } +# RELOCS-NEXT: Section (index: 2) .data { +# RELOCS-NEXT: 0x200 R_TOC bar(1) 0x13 +# RELOCS-NEXT: } +# RELOCS-NEXT:] + +--- !XCOFF +FileHeader: + MagicNumber: 0x01DF +Sections: + - Name: .text + Flags: [ STYP_TEXT ] + Relocations: + - Address: 0x80 + Symbol: 0x0 + Info: 0x15 + Type: 0x0 + - Address: 0x100 + Symbol: 0x0 + Info: 0x14 + Type: 0x2 + - Name: .data + Flags: [ STYP_DATA ] + Relocations: + - Address: 0x200 + Symbol: 0x1 + Info: 0x13 + Type: 0x3 +Symbols: + - Name: foo + Value: 0x0 + Section: .text + - Name: bar + Value: 0x80 + Section: .data diff --git a/llvm/test/tools/llvm-readobj/XCOFF/sections.test b/llvm/test/tools/llvm-readobj/XCOFF/sections.test index b0d5436e0baba..be098939ce775 100644 --- a/llvm/test/tools/llvm-readobj/XCOFF/sections.test +++ b/llvm/test/tools/llvm-readobj/XCOFF/sections.test @@ -1,164 +1,89 @@ -# RUN: llvm-readobj --section-headers %p/Inputs/basic.o | \ -# RUN: FileCheck --check-prefix=SEC32 %s +## This is a general test for the --section-headers option. -# RUN: llvm-readobj --section-headers %p/Inputs/basic64.o | \ -# RUN: FileCheck --check-prefix=SEC64 %s +# RUN: yaml2obj %s -o %t1 +# RUN: llvm-readobj --section-headers %t1 | \ +# RUN: FileCheck --strict-whitespace --match-full-lines --check-prefix=SEC32 %s -# SEC32: File: {{.*}}basic.o -# SEC32-NEXT: Format: aixcoff-rs6000 -# SEC32-NEXT: Arch: powerpc -# SEC32-NEXT: AddressSize: 32bit -# SEC32-NEXT: Sections [ -# SEC32-NEXT: Section { -# SEC32-NEXT: Index: 1 -# SEC32-NEXT: Name: .text -# SEC32-NEXT: PhysicalAddress: 0x0 -# SEC32-NEXT: VirtualAddress: 0x0 -# SEC32-NEXT: Size: 0x100 -# SEC32-NEXT: RawDataOffset: 0x200 -# SEC32-NEXT: RelocationPointer: 0x3D8 -# SEC32-NEXT: LineNumberPointer: 0x4E6 -# SEC32-NEXT: NumberOfRelocations: 8 -# SEC32-NEXT: NumberOfLineNumbers: 12 -# SEC32-NEXT: Type: STYP_TEXT (0x20) -# SEC32-NEXT: } -# SEC32-NEXT: Section { -# SEC32-NEXT: Index: 2 -# SEC32-NEXT: Name: .data -# SEC32-NEXT: PhysicalAddress: 0x100 -# SEC32-NEXT: VirtualAddress: 0x100 -# SEC32-NEXT: Size: 0x68 -# SEC32-NEXT: RawDataOffset: 0x300 -# SEC32-NEXT: RelocationPointer: 0x428 -# SEC32-NEXT: LineNumberPointer: 0x0 -# SEC32-NEXT: NumberOfRelocations: 19 -# SEC32-NEXT: NumberOfLineNumbers: 0 -# SEC32-NEXT: Type: STYP_DATA (0x40) -# SEC32-NEXT: } -# SEC32-NEXT: Section { -# SEC32-NEXT: Index: 3 -# SEC32-NEXT: Name: .bss -# SEC32-NEXT: PhysicalAddress: 0x168 -# SEC32-NEXT: VirtualAddress: 0x168 -# SEC32-NEXT: Size: 0x4 -# SEC32-NEXT: RawDataOffset: 0x0 -# SEC32-NEXT: RelocationPointer: 0x0 -# SEC32-NEXT: LineNumberPointer: 0x0 -# SEC32-NEXT: NumberOfRelocations: 0 -# SEC32-NEXT: NumberOfLineNumbers: 0 -# SEC32-NEXT: Type: STYP_BSS (0x80) -# SEC32-NEXT: } -# SEC32-NEXT: Section { -# SEC32-NEXT: Index: 4 -# SEC32-NEXT: Name: .tdata -# SEC32-NEXT: PhysicalAddress: 0x0 -# SEC32-NEXT: VirtualAddress: 0x0 -# SEC32-NEXT: Size: 0x4 -# SEC32-NEXT: RawDataOffset: 0x368 -# SEC32-NEXT: RelocationPointer: 0x47A -# SEC32-NEXT: LineNumberPointer: 0x0 -# SEC32-NEXT: NumberOfRelocations: 0 -# SEC32-NEXT: NumberOfLineNumbers: 0 -# SEC32-NEXT: Type: STYP_TDATA (0x400) -# SEC32-NEXT: } -# SEC32-NEXT: Section { -# SEC32-NEXT: Index: 5 -# SEC32-NEXT: Name: .tbss -# SEC32-NEXT: PhysicalAddress: 0x4 -# SEC32-NEXT: VirtualAddress: 0x4 -# SEC32-NEXT: Size: 0x8 -# SEC32-NEXT: RawDataOffset: 0x0 -# SEC32-NEXT: RelocationPointer: 0x0 -# SEC32-NEXT: LineNumberPointer: 0x0 -# SEC32-NEXT: NumberOfRelocations: 0 -# SEC32-NEXT: NumberOfLineNumbers: 0 -# SEC32-NEXT: Type: STYP_TBSS (0x800) -# SEC32-NEXT: } -# SEC32-NEXT: Section { -# SEC32-NEXT: Index: 6 -# SEC32-NEXT: Name: .debug -# SEC32-NEXT: PhysicalAddress: 0x0 -# SEC32-NEXT: VirtualAddress: 0x0 -# SEC32-NEXT: Size: 0x6C -# SEC32-NEXT: RawDataOffset: 0x36C -# SEC32-NEXT: RelocationPointer: 0x0 -# SEC32-NEXT: LineNumberPointer: 0x0 -# SEC32-NEXT: NumberOfRelocations: 0 -# SEC32-NEXT: NumberOfLineNumbers: 0 -# SEC32-NEXT: Type: STYP_DEBUG (0x2000) -# SEC32-NEXT: } -# SEC32-NEXT: ] +# SEC32:Format: aixcoff-rs6000 +# SEC32-NEXT:Arch: powerpc +# SEC32-NEXT:AddressSize: 32bit +# SEC32-NEXT:Sections [ +# SEC32-NEXT: Section { +# SEC32-NEXT: Index: 1 +# SEC32-NEXT: Name: .text +# SEC32-NEXT: PhysicalAddress: 0x0 +# SEC32-NEXT: VirtualAddress: 0x0 +# SEC32-NEXT: Size: 0x4 +# SEC32-NEXT: RawDataOffset: 0x64 +# SEC32-NEXT: RelocationPointer: 0x0 +# SEC32-NEXT: LineNumberPointer: 0x0 +# SEC32-NEXT: NumberOfRelocations: 0 +# SEC32-NEXT: NumberOfLineNumbers: 0 +# SEC32-NEXT: Type: STYP_TEXT (0x20) +# SEC32-NEXT: } +# SEC32-NEXT: Section { +# SEC32-NEXT: Index: 2 +# SEC32-NEXT: Name: .data +# SEC32-NEXT: PhysicalAddress: 0x4 +# SEC32-NEXT: VirtualAddress: 0x4 +# SEC32-NEXT: Size: 0x4 +# SEC32-NEXT: RawDataOffset: 0x68 +# SEC32-NEXT: RelocationPointer: 0x6C +# SEC32-NEXT: LineNumberPointer: 0x0 +# SEC32-NEXT: NumberOfRelocations: 1 +# SEC32-NEXT: NumberOfLineNumbers: 0 +# SEC32-NEXT: Type: STYP_DATA (0x40) +# SEC32-NEXT: } +# SEC32-NEXT:] +--- !XCOFF +FileHeader: + MagicNumber: [[MAGIC=0x1DF]] +Sections: + - Name: .text + Flags: [ STYP_TEXT ] + SectionData: "1234" + - Name: .data + Flags: [ STYP_DATA ] + SectionData: "5678" + Relocations: + - Address: 0x80 + Symbol: 0x21 + Info: 0x1F + Type: 0x0 -# SEC64: File: {{.*}}basic64.o -# SEC64-NEXT: Format: aix5coff64-rs6000 -# SEC64-NEXT: Arch: powerpc64 -# SEC64-NEXT: AddressSize: 64bit -# SEC64-NEXT: Sections [ -# SEC64-NEXT: Section { -# SEC64-NEXT: Index: 1 -# SEC64-NEXT: Name: .text -# SEC64-NEXT: PhysicalAddress: 0x0 -# SEC64-NEXT: VirtualAddress: 0x0 -# SEC64-NEXT: Size: 0x100 -# SEC64-NEXT: RawDataOffset: 0x200 -# SEC64-NEXT: RelocationPointer: 0x3C4 -# SEC64-NEXT: LineNumberPointer: 0x0 -# SEC64-NEXT: NumberOfRelocations: 9 -# SEC64-NEXT: NumberOfLineNumbers: 0 -# SEC64-NEXT: Type: STYP_TEXT (0x20) -# SEC64-NEXT: } -# SEC64-NEXT: Section { -# SEC64-NEXT: Index: 2 -# SEC64-NEXT: Name: .data -# SEC64-NEXT: PhysicalAddress: 0x100 -# SEC64-NEXT: VirtualAddress: 0x100 -# SEC64-NEXT: Size: 0xC0 -# SEC64-NEXT: RawDataOffset: 0x300 -# SEC64-NEXT: RelocationPointer: 0x442 -# SEC64-NEXT: LineNumberPointer: 0x0 -# SEC64-NEXT: NumberOfRelocations: 19 -# SEC64-NEXT: NumberOfLineNumbers: 0 -# SEC64-NEXT: Type: STYP_DATA (0x40) -# SEC64-NEXT: } -# SEC64-NEXT: Section { -# SEC64-NEXT: Index: 3 -# SEC64-NEXT: Name: .bss -# SEC64-NEXT: PhysicalAddress: 0x1C0 -# SEC64-NEXT: VirtualAddress: 0x1C0 -# SEC64-NEXT: Size: 0x8 -# SEC64-NEXT: RawDataOffset: 0x0 -# SEC64-NEXT: RelocationPointer: 0x0 -# SEC64-NEXT: LineNumberPointer: 0x0 -# SEC64-NEXT: NumberOfRelocations: 0 -# SEC64-NEXT: NumberOfLineNumbers: 0 -# SEC64-NEXT: Type: STYP_BSS (0x80) -# SEC64-NEXT: } -# SEC64-NEXT: Section { -# SEC64-NEXT: Index: 4 -# SEC64-NEXT: Name: .tdata -# SEC64-NEXT: PhysicalAddress: 0x0 -# SEC64-NEXT: VirtualAddress: 0x0 -# SEC64-NEXT: Size: 0x4 -# SEC64-NEXT: RawDataOffset: 0x3C0 -# SEC64-NEXT: RelocationPointer: 0x54C -# SEC64-NEXT: LineNumberPointer: 0x0 -# SEC64-NEXT: NumberOfRelocations: 0 -# SEC64-NEXT: NumberOfLineNumbers: 0 -# SEC64-NEXT: Type: STYP_TDATA (0x400) -# SEC64-NEXT: } -# SEC64-NEXT: Section { -# SEC64-NEXT: Index: 5 -# SEC64-NEXT: Name: .tbss -# SEC64-NEXT: PhysicalAddress: 0x4 -# SEC64-NEXT: VirtualAddress: 0x4 -# SEC64-NEXT: Size: 0x8 -# SEC64-NEXT: RawDataOffset: 0x0 -# SEC64-NEXT: RelocationPointer: 0x0 -# SEC64-NEXT: LineNumberPointer: 0x0 -# SEC64-NEXT: NumberOfRelocations: 0 -# SEC64-NEXT: NumberOfLineNumbers: 0 -# SEC64-NEXT: Type: STYP_TBSS (0x800) -# SEC64-NEXT: } -# SEC64-NEXT: ] +# RUN: yaml2obj -DMAGIC=0x01F7 %s -o %t2 +# RUN: llvm-readobj --section-headers %t2 | \ +# RUN: FileCheck --strict-whitespace --match-full-lines --check-prefix=SEC64 %s +# SEC64:Format: aix5coff64-rs6000 +# SEC64-NEXT:Arch: powerpc64 +# SEC64-NEXT:AddressSize: 64bit +# SEC64-NEXT:Sections [ +# SEC64-NEXT: Section { +# SEC64-NEXT: Index: 1 +# SEC64-NEXT: Name: .text +# SEC64-NEXT: PhysicalAddress: 0x0 +# SEC64-NEXT: VirtualAddress: 0x0 +# SEC64-NEXT: Size: 0x4 +# SEC64-NEXT: RawDataOffset: 0xA8 +# SEC64-NEXT: RelocationPointer: 0x0 +# SEC64-NEXT: LineNumberPointer: 0x0 +# SEC64-NEXT: NumberOfRelocations: 0 +# SEC64-NEXT: NumberOfLineNumbers: 0 +# SEC64-NEXT: Type: STYP_TEXT (0x20) +# SEC64-NEXT: } +# SEC64-NEXT: Section { +# SEC64-NEXT: Index: 2 +# SEC64-NEXT: Name: .data +# SEC64-NEXT: PhysicalAddress: 0x4 +# SEC64-NEXT: VirtualAddress: 0x4 +# SEC64-NEXT: Size: 0x4 +# SEC64-NEXT: RawDataOffset: 0xAC +# SEC64-NEXT: RelocationPointer: 0xB0 +# SEC64-NEXT: LineNumberPointer: 0x0 +# SEC64-NEXT: NumberOfRelocations: 1 +# SEC64-NEXT: NumberOfLineNumbers: 0 +# SEC64-NEXT: Type: STYP_DATA (0x40) +# SEC64-NEXT: } +# SEC64-NEXT:] diff --git a/llvm/test/tools/llvm-reduce/mir/instr-reduce.mir b/llvm/test/tools/llvm-reduce/mir/instr-reduce.mir new file mode 100644 index 0000000000000..f252ff3d56998 --- /dev/null +++ b/llvm/test/tools/llvm-reduce/mir/instr-reduce.mir @@ -0,0 +1,30 @@ +# RUN: llvm-reduce -mtriple=riscv32 --test %python --test-arg %p/instr-reduce.py %s -o %t +# RUN: cat %t | FileCheck --match-full-lines %s + +# REQUIRES: riscv-registered-target + +# Verify that after reduction the following instruction sequence remains. The +# interestingness-test 'instr-reduce.py' matches a '%[0-9]+:gpr = ADDI %[0-9]+, 5' +# pattern in the output and that combined with that the MIR has to be valid +# (pass verify) results in the given sequence. + +# CHECK: %0:gpr = COPY $x10 +# CHECK-NEXT: %2:gpr = ADDI %0, 5 +# CHECK-NEXT: PseudoRET implicit $x10 + +... +--- +name: f +tracksRegLiveness: true +body: | + bb.0: + liveins: $x10 + + %10:gpr = COPY $x10 + %20:gpr = ADDI %10, 1 + %30:gpr = ADDI %20, 5 + %40:gpr = ADDI %30, 9 + $x10 = COPY %40 + PseudoRET implicit $x10 +... +--- diff --git a/llvm/test/tools/llvm-reduce/mir/instr-reduce.py b/llvm/test/tools/llvm-reduce/mir/instr-reduce.py new file mode 100755 index 0000000000000..75e2cc54bbcfa --- /dev/null +++ b/llvm/test/tools/llvm-reduce/mir/instr-reduce.py @@ -0,0 +1,16 @@ +from subprocess import run, PIPE +import re +import sys + +llc = run( [ 'llc', '-disable-symbolication','-verify-machineinstrs', '-mtriple=riscv32', '-run-pass=none', '-o', '-', sys.argv[1]], stdout=PIPE, stderr=PIPE ) + +stdout = llc.stdout.decode() + +p = re.compile(r'^\s*%[0-9]+:gpr = ADDI %[0-9]+, 5$', flags=re.MULTILINE) + +if (llc.returncode == 0 and p.search(stdout)): + print('This is interesting!') + sys.exit(0) +else: + print('This is NOT interesting!') + sys.exit(1) diff --git a/llvm/test/tools/llvm-reduce/remove-dll.ll b/llvm/test/tools/llvm-reduce/remove-dll.ll new file mode 100644 index 0000000000000..b7370b305f0ff --- /dev/null +++ b/llvm/test/tools/llvm-reduce/remove-dll.ll @@ -0,0 +1,10 @@ +; RUN: llvm-reduce --abort-on-invalid-reduction --delta-passes=global-values --test FileCheck --test-arg --check-prefixes=CHECK-INTERESTINGNESS --test-arg %s --test-arg --input-file %s -o %t +; RUN: FileCheck --check-prefix=CHECK-FINAL %s --input-file=%t + +; CHECK-INTERESTINGNESS: @g = external {{.*}}global i32 +; CHECK-FINAL: @g = external global i32 +; CHECK-INTERESTINGNESS: @h = external {{.*}}global i32 +; CHECK-FINAL: @h = external global i32 + +@g = external dllimport global i32 +@h = external dllexport global i32 diff --git a/llvm/test/tools/llvm-reduce/remove-global-align.ll b/llvm/test/tools/llvm-reduce/remove-global-align.ll new file mode 100644 index 0000000000000..7f7bb466f9e8c --- /dev/null +++ b/llvm/test/tools/llvm-reduce/remove-global-align.ll @@ -0,0 +1,17 @@ +; RUN: llvm-reduce --delta-passes=global-objects --abort-on-invalid-reduction --test FileCheck --test-arg --check-prefixes=INTERESTINGNESS --test-arg %s --test-arg --input-file %s -o %t +; RUN: FileCheck --check-prefix=FINAL %s --input-file=%t + +; INTERESTINGNESS: @b = global i32 +; FINAL: @b = global i32 0{{$}} + +@b = global i32 0, align 4 + +; INTERESTINGNESS: define {{.*}} @f +; FINAL: define void @f() { +define void @f() align 4 { + ret void +} + +; INTERESTINGNESS: declare {{.*}} @h +; FINAL: declare void @h(){{$}} +declare void @h() align 4 diff --git a/llvm/test/tools/llvm-reduce/remove-section.ll b/llvm/test/tools/llvm-reduce/remove-section.ll new file mode 100644 index 0000000000000..462b450decb25 --- /dev/null +++ b/llvm/test/tools/llvm-reduce/remove-section.ll @@ -0,0 +1,17 @@ +; RUN: llvm-reduce --delta-passes=global-objects --abort-on-invalid-reduction --test FileCheck --test-arg --check-prefixes=INTERESTINGNESS --test-arg %s --test-arg --input-file %s -o %t +; RUN: FileCheck --check-prefix=FINAL %s --input-file=%t + +; INTERESTINGNESS: @a = global i32 +; FINAL: @a = global i32 0{{$}} + +@a = global i32 0, section "hi" + +; INTERESTINGNESS: define {{.*}} @f +; FINAL: define void @f() { +define void @f() section "hello" { + ret void +} + +; INTERESTINGNESS: declare {{.*}} @g +; FINAL: declare void @g(){{$}} +declare void @g() section "hello" diff --git a/llvm/test/tools/llvm-reduce/remove-thread-local.ll b/llvm/test/tools/llvm-reduce/remove-thread-local.ll new file mode 100644 index 0000000000000..14067059e1221 --- /dev/null +++ b/llvm/test/tools/llvm-reduce/remove-thread-local.ll @@ -0,0 +1,7 @@ +; RUN: llvm-reduce --abort-on-invalid-reduction --delta-passes=global-values --test FileCheck --test-arg --check-prefixes=CHECK-INTERESTINGNESS --test-arg %s --test-arg --input-file %s -o %t +; RUN: FileCheck --check-prefix=CHECK-FINAL %s --input-file=%t + +; CHECK-INTERESTINGNESS: @g = {{.*}}global i32 +; CHECK-FINAL: @g = global i32 + +@g = thread_local(initialexec) global i32 0 diff --git a/llvm/test/tools/llvm-reduce/remove-unnamed-addr.ll b/llvm/test/tools/llvm-reduce/remove-unnamed-addr.ll new file mode 100644 index 0000000000000..dc7338dd6ecd2 --- /dev/null +++ b/llvm/test/tools/llvm-reduce/remove-unnamed-addr.ll @@ -0,0 +1,7 @@ +; RUN: llvm-reduce --abort-on-invalid-reduction --delta-passes=global-values --test FileCheck --test-arg --check-prefixes=CHECK-INTERESTINGNESS --test-arg %s --test-arg --input-file %s -o %t +; RUN: FileCheck --check-prefix=CHECK-FINAL %s --input-file=%t + +; CHECK-INTERESTINGNESS: @g = {{.*}}global i32 +; CHECK-FINAL: @g = global i32 + +@g = unnamed_addr global i32 0 diff --git a/llvm/test/tools/llvm-reduce/remove-visibility.ll b/llvm/test/tools/llvm-reduce/remove-visibility.ll new file mode 100644 index 0000000000000..ce7a7bf407ea3 --- /dev/null +++ b/llvm/test/tools/llvm-reduce/remove-visibility.ll @@ -0,0 +1,7 @@ +; RUN: llvm-reduce --abort-on-invalid-reduction --delta-passes=global-values --test FileCheck --test-arg --check-prefixes=CHECK-INTERESTINGNESS --test-arg %s --test-arg --input-file %s -o %t +; RUN: FileCheck --check-prefix=CHECK-FINAL %s --input-file=%t + +; CHECK-INTERESTINGNESS: @g = {{.*}}global i32 +; CHECK-FINAL: @g = global i32 + +@g = hidden global i32 0 diff --git a/llvm/test/tools/obj2yaml/MachO/raw-linkedit.yaml b/llvm/test/tools/obj2yaml/MachO/raw-linkedit.yaml new file mode 100644 index 0000000000000..b9d0cae8ce8a1 --- /dev/null +++ b/llvm/test/tools/obj2yaml/MachO/raw-linkedit.yaml @@ -0,0 +1,184 @@ +# Test that obj2yaml + yaml2obj can round-trip mach-o executables with +# raw __LINKEDIT segments. +# +# RUN: yaml2obj %s | obj2yaml --raw-segment=data --raw-segment=linkedit | FileCheck %s +# +# This file was produced using: +# echo "int ext;" > a.c +# xcrun --sdk iphoneos clang -target arm64-apple-ios15.1 -o a.o a.c -c +# xcrun --sdk iphoneos clang -target arm64-apple-ios15.1 -dynamiclib a.o -o liba.dylib -install_name @executable_path/liba.dylib +# echo "extern int ext;" > b.c +# echo "int padding;" >> b.c +# echo "int *p = &ext + 4;" >> b.c +# xcrun --sdk iphoneos clang -target arm64-apple-ios15.1 -o b.o b.c -c +# xcrun --sdk iphoneos clang -target arm64-apple-ios15.1 -dynamiclib b.o -o libfixups.dylib -install_name @executable_path/libfixups.dylib -L. -la +# +# CHECK: - sectname: __data +# CHECK: segname: __DATA +# CHECK: content: '0000001000000080' +# CHECK: __LINKEDIT: 0000000020000000480000004C000000010000000100000000000000000000000300000000000000100000000000000018000000004006000040000000000000000000000100000001020000005F6578740000000000000000015F700006040080800100000000000000000000000000020000000F02000000400000000000000500000001000001000000000000000020005F70005F65787400000000000000 + +--- !mach-o +FileHeader: + magic: 0xFEEDFACF + cputype: 0x100000C + cpusubtype: 0x0 + filetype: 0x6 + ncmds: 16 + sizeofcmds: 816 + flags: 0x100085 + reserved: 0x0 +LoadCommands: + - cmd: LC_SEGMENT_64 + cmdsize: 152 + segname: __TEXT + vmaddr: 0 + vmsize: 16384 + fileoff: 0 + filesize: 16384 + maxprot: 5 + initprot: 5 + nsects: 1 + flags: 0 + Sections: + - sectname: __text + segname: __TEXT + addr: 0x4000 + size: 0 + offset: 0x4000 + align: 0 + reloff: 0x0 + nreloc: 0 + flags: 0x80000400 + reserved1: 0x0 + reserved2: 0x0 + reserved3: 0x0 + content: '' + - cmd: LC_SEGMENT_64 + cmdsize: 152 + segname: __DATA + vmaddr: 16384 + vmsize: 16384 + fileoff: 16384 + filesize: 16384 + maxprot: 3 + initprot: 3 + nsects: 1 + flags: 0 + Sections: + - sectname: __data + segname: __DATA + addr: 0x4000 + size: 8 + offset: 0x4000 + align: 3 + reloff: 0x0 + nreloc: 0 + flags: 0x0 + reserved1: 0x0 + reserved2: 0x0 + reserved3: 0x0 + content: '0000001000000080' + - cmd: LC_SEGMENT_64 + cmdsize: 72 + segname: __LINKEDIT + vmaddr: 32768 + vmsize: 16384 + fileoff: 32768 + filesize: 160 + maxprot: 1 + initprot: 1 + nsects: 0 + flags: 0 + - cmd: LC_ID_DYLIB + cmdsize: 64 + dylib: + name: 24 + timestamp: 1 + current_version: 0 + compatibility_version: 0 + Content: '@executable_path/libfixups.dylib' + ZeroPadBytes: 8 + - cmd: LC_DYLD_CHAINED_FIXUPS + cmdsize: 16 + dataoff: 32768 + datasize: 88 + - cmd: LC_DYLD_EXPORTS_TRIE + cmdsize: 16 + dataoff: 32856 + datasize: 16 + - cmd: LC_SYMTAB + cmdsize: 24 + symoff: 32880 + nsyms: 2 + stroff: 32912 + strsize: 16 + - cmd: LC_DYSYMTAB + cmdsize: 80 + ilocalsym: 0 + nlocalsym: 0 + iextdefsym: 0 + nextdefsym: 1 + iundefsym: 1 + nundefsym: 1 + tocoff: 0 + ntoc: 0 + modtaboff: 0 + nmodtab: 0 + extrefsymoff: 0 + nextrefsyms: 0 + indirectsymoff: 0 + nindirectsyms: 0 + extreloff: 0 + nextrel: 0 + locreloff: 0 + nlocrel: 0 + - cmd: LC_UUID + cmdsize: 24 + uuid: 56F7BCE0-C1A7-38E3-A90D-742D8E3D5FA9 + - cmd: LC_BUILD_VERSION + cmdsize: 32 + platform: 2 + minos: 983296 + sdk: 983552 + ntools: 1 + Tools: + - tool: 3 + version: 46596096 + - cmd: LC_SOURCE_VERSION + cmdsize: 16 + version: 0 + - cmd: LC_ENCRYPTION_INFO_64 + cmdsize: 24 + cryptoff: 16384 + cryptsize: 0 + cryptid: 0 + pad: 0 + - cmd: LC_LOAD_DYLIB + cmdsize: 56 + dylib: + name: 24 + timestamp: 2 + current_version: 0 + compatibility_version: 0 + Content: '@executable_path/liba.dylib' + ZeroPadBytes: 5 + - cmd: LC_LOAD_DYLIB + cmdsize: 56 + dylib: + name: 24 + timestamp: 2 + current_version: 85917696 + compatibility_version: 65536 + Content: '/usr/lib/libSystem.B.dylib' + ZeroPadBytes: 6 + - cmd: LC_FUNCTION_STARTS + cmdsize: 16 + dataoff: 32872 + datasize: 8 + - cmd: LC_DATA_IN_CODE + cmdsize: 16 + dataoff: 32880 + datasize: 0 +__LINKEDIT: 0000000020000000480000004C000000010000000100000000000000000000000300000000000000100000000000000018000000004006000040000000000000000000000100000001020000005F6578740000000000000000015F700006040080800100000000000000000000000000020000000F02000000400000000000000500000001000001000000000000000020005F70005F65787400000000000000 +... diff --git a/llvm/tools/bugpoint/CrashDebugger.cpp b/llvm/tools/bugpoint/CrashDebugger.cpp index 5a4528e9fa868..451e1cd98ee8a 100644 --- a/llvm/tools/bugpoint/CrashDebugger.cpp +++ b/llvm/tools/bugpoint/CrashDebugger.cpp @@ -786,14 +786,13 @@ bool ReduceCrashingInstructions::TestInsts( for (Module::iterator MI = M->begin(), ME = M->end(); MI != ME; ++MI) for (Function::iterator FI = MI->begin(), FE = MI->end(); FI != FE; ++FI) - for (BasicBlock::iterator I = FI->begin(), E = FI->end(); I != E;) { - Instruction *Inst = &*I++; - if (!Instructions.count(Inst) && !Inst->isTerminator() && - !Inst->isEHPad() && !Inst->getType()->isTokenTy() && - !Inst->isSwiftError()) { - if (!Inst->getType()->isVoidTy()) - Inst->replaceAllUsesWith(UndefValue::get(Inst->getType())); - Inst->eraseFromParent(); + for (Instruction &Inst : llvm::make_early_inc_range(*FI)) { + if (!Instructions.count(&Inst) && !Inst.isTerminator() && + !Inst.isEHPad() && !Inst.getType()->isTokenTy() && + !Inst.isSwiftError()) { + if (!Inst.getType()->isVoidTy()) + Inst.replaceAllUsesWith(UndefValue::get(Inst.getType())); + Inst.eraseFromParent(); } } diff --git a/llvm/tools/gold/gold-plugin.cpp b/llvm/tools/gold/gold-plugin.cpp index ef30c5b65130c..8d35dfe81b52e 100644 --- a/llvm/tools/gold/gold-plugin.cpp +++ b/llvm/tools/gold/gold-plugin.cpp @@ -1081,11 +1081,11 @@ static std::vector, bool>> runLTO() { size_t MaxTasks = Lto->getMaxTasks(); std::vector, bool>> Files(MaxTasks); - auto AddStream = [&](size_t Task) -> std::unique_ptr { + auto AddStream = [&](size_t Task) -> std::unique_ptr { Files[Task].second = !SaveTemps; int FD = getOutputFileName(Filename, /* TempOutFile */ !SaveTemps, Files[Task].first, Task); - return std::make_unique( + return std::make_unique( std::make_unique(FD, true)); }; @@ -1093,7 +1093,7 @@ static std::vector, bool>> runLTO() { *AddStream(Task)->OS << MB->getBuffer(); }; - NativeObjectCache Cache; + FileCache Cache; if (!options::cache_dir.empty()) Cache = check(localCache("ThinLTO", "Thin", options::cache_dir, AddBuffer)); diff --git a/llvm/tools/llvm-cfi-verify/llvm-cfi-verify.cpp b/llvm/tools/llvm-cfi-verify/llvm-cfi-verify.cpp index 3cb0e84f781f9..8c43ea839026a 100644 --- a/llvm/tools/llvm-cfi-verify/llvm-cfi-verify.cpp +++ b/llvm/tools/llvm-cfi-verify/llvm-cfi-verify.cpp @@ -36,10 +36,10 @@ static cl::OptionCategory CFIVerifyCategory("CFI Verify Options"); cl::opt InputFilename(cl::Positional, cl::desc(""), cl::Required, cl::cat(CFIVerifyCategory)); -cl::opt BlacklistFilename(cl::Positional, - cl::desc("[blacklist file]"), - cl::init("-"), - cl::cat(CFIVerifyCategory)); +cl::opt IgnorelistFilename(cl::Positional, + cl::desc("[ignorelist file]"), + cl::init("-"), + cl::cat(CFIVerifyCategory)); cl::opt PrintGraphs( "print-graphs", cl::desc("Print graphs around indirect CF instructions in DOT format."), @@ -103,7 +103,7 @@ static void printInstructionInformation(const FileAnalysis &Analysis, static void printInstructionStatus(unsigned BlameLine, bool CFIProtected, const DILineInfo &LineInfo) { if (BlameLine) { - outs() << "Blacklist Match: " << BlacklistFilename << ":" << BlameLine + outs() << "Ignorelist Match: " << IgnorelistFilename << ":" << BlameLine << "\n"; if (CFIProtected) outs() << "====> Unexpected Protected\n"; @@ -240,9 +240,9 @@ printIndirectCFInstructions(FileAnalysis &Analysis, if (!SpecialCaseList) return; - outs() << "\nBlacklist Results:\n"; + outs() << "\nIgnorelist Results:\n"; for (const auto &KV : BlameCounter) { - outs() << " " << BlacklistFilename << ":" << KV.first << " affects " + outs() << " " << IgnorelistFilename << ":" << KV.first << " affects " << KV.second << " indirect CF instructions.\n"; } } @@ -265,12 +265,12 @@ int main(int argc, char **argv) { PrintBlameContext.setValue(PrintBlameContextAll); std::unique_ptr SpecialCaseList; - if (BlacklistFilename != "-") { + if (IgnorelistFilename != "-") { std::string Error; - SpecialCaseList = SpecialCaseList::create({BlacklistFilename}, + SpecialCaseList = SpecialCaseList::create({IgnorelistFilename}, *vfs::getRealFileSystem(), Error); if (!SpecialCaseList) { - errs() << "Failed to get blacklist: " << Error << "\n"; + errs() << "Failed to get ignorelist: " << Error << "\n"; exit(EXIT_FAILURE); } } diff --git a/llvm/tools/llvm-exegesis/lib/SnippetGenerator.h b/llvm/tools/llvm-exegesis/lib/SnippetGenerator.h index 5f67b396ad959..7a53c03547047 100644 --- a/llvm/tools/llvm-exegesis/lib/SnippetGenerator.h +++ b/llvm/tools/llvm-exegesis/lib/SnippetGenerator.h @@ -21,6 +21,7 @@ #include "LlvmState.h" #include "MCInstrDescView.h" #include "RegisterAliasing.h" +#include "llvm/ADT/CombinationGenerator.h" #include "llvm/MC/MCInst.h" #include "llvm/Support/Error.h" #include @@ -102,128 +103,6 @@ Error randomizeUnsetVariables(const LLVMState &State, const BitVector &ForbiddenRegs, InstructionTemplate &IT); -// Combination generator. -// -// Example: given input {{0, 1}, {2}, {3, 4}} it will produce the following -// combinations: {0, 2, 3}, {0, 2, 4}, {1, 2, 3}, {1, 2, 4}. -// -// It is important to think of input as vector-of-vectors, where the -// outer vector is the variable space, and inner vector is choice space. -// The number of choices for each variable can be different. -// -// As for implementation, it is useful to think of this as a weird number, -// where each digit (==variable) may have different base (==number of choices). -// Thus modelling of 'produce next combination' is exactly analogous to the -// incrementing of an number - increment lowest digit (pick next choice for the -// variable), and if it wrapped to the beginning then increment next digit. -template -class CombinationGenerator { - template struct WrappingIterator { - using value_type = T; - - const ArrayRef Range; - typename decltype(Range)::const_iterator Position; - - // Rewind the tape, placing the position to again point at the beginning. - void rewind() { Position = Range.begin(); } - - // Advance position forward, possibly wrapping to the beginning. - // Returns whether the wrap happened. - bool operator++() { - ++Position; - bool Wrapped = Position == Range.end(); - if (Wrapped) - rewind(); - return Wrapped; - } - - // Get the value at which we are currently pointing. - operator const value_type &() const { return *Position; } - - WrappingIterator(ArrayRef Range_) : Range(Range_) { - assert(!Range.empty() && "The range must not be empty."); - rewind(); - } - }; - - const ArrayRef VariablesChoices; - - void performGeneration( - const function_ref)> Callback) const { - SmallVector, variable_smallsize> - VariablesState; - - // 'increment' of the the whole VariablesState is defined identically to the - // increment of a number: starting from the least significant element, - // increment it, and if it wrapped, then propagate that carry by also - // incrementing next (more significant) element. - auto IncrementState = - [](MutableArrayRef> VariablesState) - -> bool { - for (WrappingIterator &Variable : - llvm::reverse(VariablesState)) { - bool Wrapped = ++Variable; - if (!Wrapped) - return false; // There you go, next combination is ready. - // We have carry - increment more significant variable next.. - } - return true; // MSB variable wrapped, no more unique combinations. - }; - - // Initialize the per-variable state to refer to the possible choices for - // that variable. - VariablesState.reserve(VariablesChoices.size()); - for (ArrayRef VC : VariablesChoices) - VariablesState.emplace_back(VC); - - // Temporary buffer to store each combination before performing Callback. - SmallVector CurrentCombination; - CurrentCombination.resize(VariablesState.size()); - - while (true) { - // Gather the currently-selected variable choices into a vector. - for (auto I : llvm::zip(VariablesState, CurrentCombination)) - std::get<1>(I) = std::get<0>(I); - // And pass the new combination into callback, as intended. - if (/*Abort=*/Callback(CurrentCombination)) - return; - // And tick the state to next combination, which will be unique. - if (IncrementState(VariablesState)) - return; // All combinations produced. - } - }; - -public: - CombinationGenerator(ArrayRef VariablesChoices_) - : VariablesChoices(VariablesChoices_) { -#ifndef NDEBUG - assert(!VariablesChoices.empty() && "There should be some variables."); - llvm::for_each(VariablesChoices, [](ArrayRef VariableChoices) { - assert(!VariableChoices.empty() && - "There must always be some choice, at least a placeholder one."); - }); -#endif - } - - // How many combinations can we produce, max? - // This is at most how many times the callback will be called. - size_t numCombinations() const { - size_t NumVariants = 1; - for (ArrayRef VariableChoices : VariablesChoices) - NumVariants *= VariableChoices.size(); - assert(NumVariants >= 1 && - "We should always end up producing at least one combination"); - return NumVariants; - } - - // Actually perform exhaustive combination generation. - // Each result will be passed into the callback. - void generate(const function_ref)> Callback) { - performGeneration(Callback); - } -}; - } // namespace exegesis } // namespace llvm diff --git a/llvm/tools/llvm-exegesis/lib/X86/Target.cpp b/llvm/tools/llvm-exegesis/lib/X86/Target.cpp index 1be119a508d54..9d3fe2dbfbd59 100644 --- a/llvm/tools/llvm-exegesis/lib/X86/Target.cpp +++ b/llvm/tools/llvm-exegesis/lib/X86/Target.cpp @@ -918,8 +918,9 @@ std::vector ExegesisX86Target::generateInstructionVariants( continue; case X86::OperandType::OPERAND_COND_CODE: { Exploration = true; - auto CondCodes = - seq_inclusive(X86::CondCode::COND_O, X86::CondCode::LAST_VALID_COND); + auto CondCodes = enum_seq_inclusive(X86::CondCode::COND_O, + X86::CondCode::LAST_VALID_COND, + force_iteration_on_noniterable_enum); Choices.reserve(CondCodes.size()); for (int CondCode : CondCodes) Choices.emplace_back(MCOperand::createImm(CondCode)); diff --git a/llvm/tools/llvm-jitlink/llvm-jitlink.cpp b/llvm/tools/llvm-jitlink/llvm-jitlink.cpp index 91908eaf4f349..b9faf3d249d45 100644 --- a/llvm/tools/llvm-jitlink/llvm-jitlink.cpp +++ b/llvm/tools/llvm-jitlink/llvm-jitlink.cpp @@ -21,6 +21,7 @@ #include "llvm/ExecutionEngine/Orc/EPCDynamicLibrarySearchGenerator.h" #include "llvm/ExecutionEngine/Orc/EPCEHFrameRegistrar.h" #include "llvm/ExecutionEngine/Orc/ExecutionUtils.h" +#include "llvm/ExecutionEngine/Orc/IndirectionUtils.h" #include "llvm/ExecutionEngine/Orc/MachOPlatform.h" #include "llvm/ExecutionEngine/Orc/TargetProcess/JITLoaderGDB.h" #include "llvm/ExecutionEngine/Orc/TargetProcess/RegisterEHFrames.h" @@ -28,6 +29,7 @@ #include "llvm/MC/MCContext.h" #include "llvm/MC/MCDisassembler/MCDisassembler.h" #include "llvm/MC/MCInstPrinter.h" +#include "llvm/MC/MCInstrAnalysis.h" #include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCSubtargetInfo.h" @@ -178,6 +180,11 @@ static cl::opt OrcRuntime("orc-runtime", cl::desc("Use ORC runtime from given path"), cl::init(""), cl::cat(JITLinkCategory)); +static cl::opt AddSelfRelocations( + "add-self-relocations", + cl::desc("Add relocations to function pointers to the current function"), + cl::init(false), cl::cat(JITLinkCategory)); + ExitOnError ExitOnErr; LLVM_ATTRIBUTE_USED void linkComponents() { @@ -194,6 +201,8 @@ extern "C" void llvm_jitlink_setTestResultOverride(int64_t Value) { UseTestResultOverride = true; } +static Error addSelfRelocations(LinkGraph &G); + namespace llvm { static raw_ostream & @@ -1086,6 +1095,9 @@ void Session::modifyPassConfig(const Triple &TT, dumpSectionContents(outs(), G); return Error::success(); }); + + if (AddSelfRelocations) + PassConfig.PostPrunePasses.push_back(addSelfRelocations); } Expected Session::findFileInfo(StringRef FileName) { @@ -1374,14 +1386,21 @@ static Error loadObjects(Session &S) { return Error::success(); } -static Error runChecks(Session &S) { - const auto &TT = S.ES.getExecutorProcessControl().getTargetTriple(); - - if (CheckFiles.empty()) - return Error::success(); - - LLVM_DEBUG(dbgs() << "Running checks...\n"); +namespace { +struct TargetInfo { + const Target *TheTarget; + std::unique_ptr STI; + std::unique_ptr MRI; + std::unique_ptr MAI; + std::unique_ptr Ctx; + std::unique_ptr Disassembler; + std::unique_ptr MII; + std::unique_ptr MIA; + std::unique_ptr InstPrinter; +}; +} // anonymous namespace +static TargetInfo getTargetInfo(const Triple &TT) { auto TripleName = TT.str(); std::string ErrorStr; const Target *TheTarget = TargetRegistry::lookupTarget(TripleName, ErrorStr); @@ -1412,19 +1431,49 @@ static Error runChecks(Session &S) { TripleName, inconvertibleErrorCode())); - MCContext Ctx(Triple(TripleName), MAI.get(), MRI.get(), STI.get()); + auto Ctx = std::make_unique(Triple(TripleName), MAI.get(), + MRI.get(), STI.get()); std::unique_ptr Disassembler( - TheTarget->createMCDisassembler(*STI, Ctx)); + TheTarget->createMCDisassembler(*STI, *Ctx)); if (!Disassembler) ExitOnErr(make_error("Unable to create disassembler for " + TripleName, inconvertibleErrorCode())); std::unique_ptr MII(TheTarget->createMCInstrInfo()); + if (!MII) + ExitOnErr(make_error("Unable to create instruction info for" + + TripleName, + inconvertibleErrorCode())); + + std::unique_ptr MIA( + TheTarget->createMCInstrAnalysis(MII.get())); + if (!MIA) + ExitOnErr(make_error( + "Unable to create instruction analysis for" + TripleName, + inconvertibleErrorCode())); std::unique_ptr InstPrinter( TheTarget->createMCInstPrinter(Triple(TripleName), 0, *MAI, *MII, *MRI)); + if (!InstPrinter) + ExitOnErr(make_error( + "Unable to create instruction printer for" + TripleName, + inconvertibleErrorCode())); + return {TheTarget, std::move(STI), std::move(MRI), + std::move(MAI), std::move(Ctx), std::move(Disassembler), + std::move(MII), std::move(MIA), std::move(InstPrinter)}; +} + +static Error runChecks(Session &S) { + const auto &TT = S.ES.getExecutorProcessControl().getTargetTriple(); + + if (CheckFiles.empty()) + return Error::success(); + + LLVM_DEBUG(dbgs() << "Running checks...\n"); + + auto TI = getTargetInfo(TT); auto IsSymbolValid = [&S](StringRef Symbol) { return S.isSymbolRegistered(Symbol); @@ -1448,8 +1497,8 @@ static Error runChecks(Session &S) { RuntimeDyldChecker Checker( IsSymbolValid, GetSymbolInfo, GetSectionInfo, GetStubInfo, GetGOTInfo, - TT.isLittleEndian() ? support::little : support::big, Disassembler.get(), - InstPrinter.get(), dbgs()); + TT.isLittleEndian() ? support::little : support::big, + TI.Disassembler.get(), TI.InstPrinter.get(), dbgs()); std::string CheckLineStart = "# " + CheckName + ":"; for (auto &CheckFile : CheckFiles) { @@ -1463,6 +1512,16 @@ static Error runChecks(Session &S) { return Error::success(); } +static Error addSelfRelocations(LinkGraph &G) { + auto TI = getTargetInfo(G.getTargetTriple()); + for (auto *Sym : G.defined_symbols()) + if (Sym->isCallable()) + if (auto Err = addFunctionPointerRelocationsToCurrentSymbol( + *Sym, G, *TI.Disassembler, *TI.MIA)) + return Err; + return Error::success(); +} + static void dumpSessionStats(Session &S) { if (!ShowSizes) return; diff --git a/llvm/tools/llvm-libtool-darwin/llvm-libtool-darwin.cpp b/llvm/tools/llvm-libtool-darwin/llvm-libtool-darwin.cpp index ef4aec58d2e67..fc7a35f9595f2 100644 --- a/llvm/tools/llvm-libtool-darwin/llvm-libtool-darwin.cpp +++ b/llvm/tools/llvm-libtool-darwin/llvm-libtool-darwin.cpp @@ -299,20 +299,18 @@ static Error verifyAndAddIRObject(MembersPerArchitectureMap &Members, static Error addChildMember(MembersPerArchitectureMap &Members, const object::Archive::Child &M, const Config &C) { - Expected NMOrErr = + Expected NewMemberOrErr = NewArchiveMember::getOldMember(M, C.Deterministic); - if (!NMOrErr) - return NMOrErr.takeError(); + if (!NewMemberOrErr) + return NewMemberOrErr.takeError(); + auto &NewMember = *NewMemberOrErr; - file_magic Magic = identify_magic(NMOrErr->Buf->getBuffer()); + file_magic Magic = identify_magic(NewMember.Buf->getBuffer()); if (Magic == file_magic::bitcode) - return verifyAndAddIRObject(Members, std::move(*NMOrErr), C); + return verifyAndAddIRObject(Members, std::move(NewMember), C); - if (Error E = verifyAndAddMachOObject(Members, std::move(*NMOrErr), C)) - return E; - - return Error::success(); + return verifyAndAddMachOObject(Members, std::move(NewMember), C); } static Error processArchive(MembersPerArchitectureMap &Members, @@ -331,9 +329,10 @@ static Error processArchive(MembersPerArchitectureMap &Members, static Error addArchiveMembers(MembersPerArchitectureMap &Members, std::vector> &ArchiveBuffers, - NewArchiveMember NM, StringRef FileName, const Config &C) { + NewArchiveMember NewMember, StringRef FileName, + const Config &C) { Expected> LibOrErr = - object::Archive::create(NM.Buf->getMemBufferRef()); + object::Archive::create(NewMember.Buf->getMemBufferRef()); if (!LibOrErr) return createFileError(FileName, LibOrErr.takeError()); @@ -342,16 +341,16 @@ addArchiveMembers(MembersPerArchitectureMap &Members, // Update vector ArchiveBuffers with the MemoryBuffers to transfer // ownership. - ArchiveBuffers.push_back(std::move(NM.Buf)); + ArchiveBuffers.push_back(std::move(NewMember.Buf)); return Error::success(); } static Error addUniversalMembers( MembersPerArchitectureMap &Members, std::vector> &UniversalBuffers, - NewArchiveMember NM, StringRef FileName, const Config &C) { + NewArchiveMember NewMember, StringRef FileName, const Config &C) { Expected> BinaryOrErr = - MachOUniversalBinary::create(NM.Buf->getMemBufferRef()); + MachOUniversalBinary::create(NewMember.Buf->getMemBufferRef()); if (!BinaryOrErr) return createFileError(FileName, BinaryOrErr.takeError()); @@ -410,40 +409,39 @@ static Error addUniversalMembers( // Update vector UniversalBuffers with the MemoryBuffers to transfer // ownership. - UniversalBuffers.push_back(std::move(NM.Buf)); + UniversalBuffers.push_back(std::move(NewMember.Buf)); return Error::success(); } static Error addMember(MembersPerArchitectureMap &Members, std::vector> &FileBuffers, StringRef FileName, const Config &C) { - Expected NMOrErr = + Expected NewMemberOrErr = NewArchiveMember::getFile(FileName, C.Deterministic); - if (!NMOrErr) - return createFileError(FileName, NMOrErr.takeError()); + if (!NewMemberOrErr) + return createFileError(FileName, NewMemberOrErr.takeError()); + auto &NewMember = *NewMemberOrErr; // For regular archives, use the basename of the object path for the member // name. - NMOrErr->MemberName = sys::path::filename(NMOrErr->MemberName); - file_magic Magic = identify_magic(NMOrErr->Buf->getBuffer()); + NewMember.MemberName = sys::path::filename(NewMember.MemberName); + file_magic Magic = identify_magic(NewMember.Buf->getBuffer()); // Flatten archives. if (Magic == file_magic::archive) - return addArchiveMembers(Members, FileBuffers, std::move(*NMOrErr), + return addArchiveMembers(Members, FileBuffers, std::move(NewMember), FileName, C); // Flatten universal files. if (Magic == file_magic::macho_universal_binary) - return addUniversalMembers(Members, FileBuffers, std::move(*NMOrErr), + return addUniversalMembers(Members, FileBuffers, std::move(NewMember), FileName, C); // Bitcode files. if (Magic == file_magic::bitcode) - return verifyAndAddIRObject(Members, std::move(*NMOrErr), C); + return verifyAndAddIRObject(Members, std::move(NewMember), C); - if (Error E = verifyAndAddMachOObject(Members, std::move(*NMOrErr), C)) - return E; - return Error::success(); + return verifyAndAddMachOObject(Members, std::move(NewMember), C); } static Expected> @@ -477,45 +475,41 @@ static Error createStaticLibrary(const Config &C) { } if (NewMembers.size() == 1) { - if (Error E = - writeArchive(OutputFile, NewMembers.begin()->second, - /*WriteSymtab=*/true, - /*Kind=*/object::Archive::K_DARWIN, C.Deterministic, - /*Thin=*/false)) - return E; - } else { - SmallVector, 2> OutputBinaries; - for (const std::pair> &M : - NewMembers) { - Expected> OutputBufferOrErr = - writeArchiveToBuffer(M.second, - /*WriteSymtab=*/true, - /*Kind=*/object::Archive::K_DARWIN, - C.Deterministic, - /*Thin=*/false); - if (!OutputBufferOrErr) - return OutputBufferOrErr.takeError(); - std::unique_ptr &OutputBuffer = OutputBufferOrErr.get(); - - Expected> ArchiveOrError = - Archive::create(OutputBuffer->getMemBufferRef()); - if (!ArchiveOrError) - return ArchiveOrError.takeError(); - std::unique_ptr &A = ArchiveOrError.get(); - - OutputBinaries.push_back( - OwningBinary(std::move(A), std::move(OutputBuffer))); - } - - Expected> Slices = buildSlices(OutputBinaries); - if (!Slices) - return Slices.takeError(); + return writeArchive(OutputFile, NewMembers.begin()->second, + /*WriteSymtab=*/true, + /*Kind=*/object::Archive::K_DARWIN, C.Deterministic, + /*Thin=*/false); + } - llvm::stable_sort(*Slices); - if (Error E = writeUniversalBinary(*Slices, OutputFile)) - return E; + SmallVector, 2> OutputBinaries; + for (const std::pair> &M : + NewMembers) { + Expected> OutputBufferOrErr = + writeArchiveToBuffer(M.second, + /*WriteSymtab=*/true, + /*Kind=*/object::Archive::K_DARWIN, + C.Deterministic, + /*Thin=*/false); + if (!OutputBufferOrErr) + return OutputBufferOrErr.takeError(); + std::unique_ptr &OutputBuffer = OutputBufferOrErr.get(); + + Expected> ArchiveOrError = + Archive::create(OutputBuffer->getMemBufferRef()); + if (!ArchiveOrError) + return ArchiveOrError.takeError(); + std::unique_ptr &A = ArchiveOrError.get(); + + OutputBinaries.push_back( + OwningBinary(std::move(A), std::move(OutputBuffer))); } - return Error::success(); + + Expected> Slices = buildSlices(OutputBinaries); + if (!Slices) + return Slices.takeError(); + + llvm::stable_sort(*Slices); + return writeUniversalBinary(*Slices, OutputFile); } static Expected parseCommandLine(int Argc, char **Argv) { diff --git a/llvm/tools/llvm-lto/llvm-lto.cpp b/llvm/tools/llvm-lto/llvm-lto.cpp index 2e6705724c22a..995ebacacb870 100644 --- a/llvm/tools/llvm-lto/llvm-lto.cpp +++ b/llvm/tools/llvm-lto/llvm-lto.cpp @@ -1097,7 +1097,7 @@ int main(int argc, char **argv) { error("writing merged module failed."); } - auto AddStream = [&](size_t Task) -> std::unique_ptr { + auto AddStream = [&](size_t Task) -> std::unique_ptr { std::string PartFilename = OutputFilename; if (Parallelism != 1) PartFilename += "." + utostr(Task); @@ -1107,7 +1107,7 @@ int main(int argc, char **argv) { std::make_unique(PartFilename, EC, sys::fs::OF_None); if (EC) error("error opening the file '" + PartFilename + "': " + EC.message()); - return std::make_unique(std::move(S)); + return std::make_unique(std::move(S)); }; if (!CodeGen.compileOptimized(AddStream, Parallelism)) diff --git a/llvm/tools/llvm-lto2/llvm-lto2.cpp b/llvm/tools/llvm-lto2/llvm-lto2.cpp index b6f37fef9075e..6f6f6c1ed90fd 100644 --- a/llvm/tools/llvm-lto2/llvm-lto2.cpp +++ b/llvm/tools/llvm-lto2/llvm-lto2.cpp @@ -362,20 +362,20 @@ static int run(int argc, char **argv) { if (HasErrors) return 1; - auto AddStream = [&](size_t Task) -> std::unique_ptr { + auto AddStream = [&](size_t Task) -> std::unique_ptr { std::string Path = OutputFilename + "." + utostr(Task); std::error_code EC; auto S = std::make_unique(Path, EC, sys::fs::OF_None); check(EC, Path); - return std::make_unique(std::move(S)); + return std::make_unique(std::move(S)); }; auto AddBuffer = [&](size_t Task, std::unique_ptr MB) { *AddStream(Task)->OS << MB->getBuffer(); }; - NativeObjectCache Cache; + FileCache Cache; if (!CacheDir.empty()) Cache = check(localCache("ThinLTO", "Thin", CacheDir, AddBuffer), "failed to create cache"); diff --git a/llvm/tools/llvm-objdump/COFFDump.cpp b/llvm/tools/llvm-objdump/COFFDump.cpp index 09a900182d248..54ae3cfb6c924 100644 --- a/llvm/tools/llvm-objdump/COFFDump.cpp +++ b/llvm/tools/llvm-objdump/COFFDump.cpp @@ -278,10 +278,7 @@ static void printTLSDirectory(const COFFObjectFile *Obj) { return; const data_directory *DataDir = Obj->getDataDirectory(COFF::TLS_TABLE); - if (!DataDir) - reportError("missing data dir for TLS table", Obj->getFileName()); - - if (DataDir->RelativeVirtualAddress == 0) + if (!DataDir || DataDir->RelativeVirtualAddress == 0) return; uintptr_t IntPtr = 0; diff --git a/llvm/tools/llvm-objdump/llvm-objdump.cpp b/llvm/tools/llvm-objdump/llvm-objdump.cpp index 163c1cffcefd9..ed605e65bbe83 100644 --- a/llvm/tools/llvm-objdump/llvm-objdump.cpp +++ b/llvm/tools/llvm-objdump/llvm-objdump.cpp @@ -2500,6 +2500,11 @@ static void parseIntArg(const llvm::opt::InputArgList &InputArgs, int ID, } } +static void invalidArgValue(const opt::Arg *A) { + reportCmdLineError("'" + StringRef(A->getValue()) + + "' is not a valid value for '" + A->getSpelling() + "'"); +} + static std::vector commaSeparatedValues(const llvm::opt::InputArgList &InputArgs, int ID) { std::vector Values; @@ -2573,8 +2578,11 @@ static void parseObjdumpOptions(const llvm::opt::InputArgList &InputArgs) { commaSeparatedValues(InputArgs, OBJDUMP_disassemble_symbols_EQ); DisassembleZeroes = InputArgs.hasArg(OBJDUMP_disassemble_zeroes); if (const opt::Arg *A = InputArgs.getLastArg(OBJDUMP_dwarf_EQ)) { - DwarfDumpType = - StringSwitch(A->getValue()).Case("frames", DIDT_DebugFrame); + DwarfDumpType = StringSwitch(A->getValue()) + .Case("frames", DIDT_DebugFrame) + .Default(DIDT_Null); + if (DwarfDumpType == DIDT_Null) + invalidArgValue(A); } DynamicRelocations = InputArgs.hasArg(OBJDUMP_dynamic_reloc); FaultMapSection = InputArgs.hasArg(OBJDUMP_fault_map_section); @@ -2611,7 +2619,10 @@ static void parseObjdumpOptions(const llvm::opt::InputArgList &InputArgs) { if (const opt::Arg *A = InputArgs.getLastArg(OBJDUMP_debug_vars_EQ)) { DbgVariables = StringSwitch(A->getValue()) .Case("ascii", DVASCII) - .Case("unicode", DVUnicode); + .Case("unicode", DVUnicode) + .Default(DVInvalid); + if (DbgVariables == DVInvalid) + invalidArgValue(A); } parseIntArg(InputArgs, OBJDUMP_debug_vars_indent_EQ, DbgIndent); diff --git a/llvm/tools/llvm-objdump/llvm-objdump.h b/llvm/tools/llvm-objdump/llvm-objdump.h index d9fc3bfe66a5d..864a9920efbef 100644 --- a/llvm/tools/llvm-objdump/llvm-objdump.h +++ b/llvm/tools/llvm-objdump/llvm-objdump.h @@ -31,11 +31,7 @@ struct VersionEntry; namespace objdump { -enum DebugVarsFormat { - DVDisabled, - DVUnicode, - DVASCII, -}; +enum DebugVarsFormat { DVDisabled, DVUnicode, DVASCII, DVInvalid }; extern bool ArchiveHeaders; extern int DbgIndent; diff --git a/llvm/tools/llvm-opt-fuzzer/llvm-opt-fuzzer.cpp b/llvm/tools/llvm-opt-fuzzer/llvm-opt-fuzzer.cpp index 7cf7b1feb3b78..265c8ac47fb6d 100644 --- a/llvm/tools/llvm-opt-fuzzer/llvm-opt-fuzzer.cpp +++ b/llvm/tools/llvm-opt-fuzzer/llvm-opt-fuzzer.cpp @@ -144,7 +144,6 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) { ModulePassManager MPM; ModuleAnalysisManager MAM; - FAM.registerPass([&] { return PB.buildDefaultAAPipeline(); }); PB.registerModuleAnalyses(MAM); PB.registerCGSCCAnalyses(CGAM); PB.registerFunctionAnalyses(FAM); diff --git a/llvm/tools/llvm-profgen/PerfReader.cpp b/llvm/tools/llvm-profgen/PerfReader.cpp index e36c707c4fcd8..5798783d4b7ac 100644 --- a/llvm/tools/llvm-profgen/PerfReader.cpp +++ b/llvm/tools/llvm-profgen/PerfReader.cpp @@ -31,6 +31,10 @@ static cl::opt IgnoreStackSamples("ignore-stack-samples", cl::init(false), cl::ZeroOrMore, cl::desc("Ignore call stack samples for hybrid samples " "and produce context-insensitive profile.")); +static cl::opt + ShowDetailedWarning("show-detailed-warning", cl::init(false), + cl::ZeroOrMore, + cl::desc("Show detailed warning message.")); extern cl::opt PerfTraceFilename; extern cl::opt ShowDisassemblyOnly; @@ -125,7 +129,6 @@ std::shared_ptr FrameStack::getContextKey() { KeyStr->Context = Binary->getExpandedContext(Stack, KeyStr->WasLeafInlined); if (KeyStr->Context.empty()) return nullptr; - KeyStr->genHashCode(); return KeyStr; } @@ -139,8 +142,6 @@ std::shared_ptr ProbeStack::getContextKey() { ProbeBasedKey->Probes); CSProfileGenerator::trimContext( ProbeBasedKey->Probes); - - ProbeBasedKey->genHashCode(); return ProbeBasedKey; } @@ -433,10 +434,16 @@ void HybridPerfReader::unwindSamples() { } // Warn about untracked frames due to missing probes. - for (auto Address : AllUntrackedCallsites) - WithColor::warning() << "Profile context truncated due to missing probe " - << "for call instruction at " - << format("0x%" PRIx64, Address) << "\n"; + if (ShowDetailedWarning) { + for (auto Address : AllUntrackedCallsites) + WithColor::warning() << "Profile context truncated due to missing probe " + << "for call instruction at " + << format("0x%" PRIx64, Address) << "\n"; + } + + emitWarningSummary(AllUntrackedCallsites.size(), SampleCounters.size(), + "of profiled contexts are truncated due to missing probe " + "for call instruction."); } bool PerfScriptReader::extractLBRStack(TraceStream &TraceIt, @@ -792,7 +799,6 @@ void UnsymbolizedProfileReader::readUnsymbolizedProfile(StringRef FileName) { SampleContext::createCtxVectorFromStr(*I.first, Key->Context); TraceIt.advance(); } - Key->genHashCode(); auto Ret = SampleCounters.emplace(Hashable(Key), SampleCounter()); readSampleCounters(TraceIt, Ret.first->second); @@ -841,7 +847,6 @@ void PerfScriptReader::generateUnsymbolizedProfile() { "Sample counter map should be empty before raw profile generation"); std::shared_ptr Key = std::make_shared(); - Key->genHashCode(); SampleCounters.emplace(Hashable(Key), SampleCounter()); for (const auto &Item : AggregatedSamples) { const PerfSample *Sample = Item.first.getPtr(); @@ -1008,12 +1013,105 @@ void HybridPerfReader::generateUnsymbolizedProfile() { } void PerfScriptReader::warnTruncatedStack() { - for (auto Address : InvalidReturnAddresses) { - WithColor::warning() - << "Truncated stack sample due to invalid return address at " - << format("0x%" PRIx64, Address) - << ", likely caused by frame pointer omission\n"; + if (ShowDetailedWarning) { + for (auto Address : InvalidReturnAddresses) { + WithColor::warning() + << "Truncated stack sample due to invalid return address at " + << format("0x%" PRIx64, Address) + << ", likely caused by frame pointer omission\n"; + } + } + emitWarningSummary( + InvalidReturnAddresses.size(), AggregatedSamples.size(), + "of truncated stack samples due to invalid return address, " + "likely caused by frame pointer omission."); +} + +void PerfScriptReader::emitWarningSummary(uint64_t Num, uint64_t Total, + StringRef Msg) { + if (!Total || !Num) + return; + WithColor::warning() << format("%.2f", static_cast(Num) * 100 / Total) + << "%(" << Num << "/" << Total << ") " << Msg << "\n"; +} + +void PerfScriptReader::warnInvalidRange() { + std::unordered_map, uint64_t, + pair_hash> + Ranges; + + for (const auto &Item : AggregatedSamples) { + const PerfSample *Sample = Item.first.getPtr(); + uint64_t Count = Item.second; + uint64_t EndOffeset = 0; + for (const LBREntry &LBR : Sample->LBRStack) { + uint64_t SourceOffset = Binary->virtualAddrToOffset(LBR.Source); + uint64_t StartOffset = Binary->virtualAddrToOffset(LBR.Target); + if (EndOffeset != 0) + Ranges[{StartOffset, EndOffeset}] += Count; + EndOffeset = SourceOffset; + } + } + + if (Ranges.empty()) { + WithColor::warning() << "No samples in perf script!\n"; + return; + } + + auto WarnInvalidRange = + [&](uint64_t StartOffset, uint64_t EndOffset, StringRef Msg) { + if (!ShowDetailedWarning) + return; + WithColor::warning() + << "[" + << format("%8" PRIx64, Binary->offsetToVirtualAddr(StartOffset)) + << "," + << format("%8" PRIx64, Binary->offsetToVirtualAddr(EndOffset)) + << "]: " << Msg << "\n"; + }; + + const char *EndNotBoundaryMsg = "Range is not on instruction boundary, " + "likely due to profile and binary mismatch."; + const char *DanglingRangeMsg = "Range does not belong to any functions, " + "likely from PLT, .init or .fini section."; + const char *RangeCrossFuncMsg = + "Fall through range should not cross function boundaries, likely due to " + "profile and binary mismatch."; + + uint64_t InstNotBoundary = 0; + uint64_t UnmatchedRange = 0; + uint64_t RangeCrossFunc = 0; + + for (auto &I : Ranges) { + uint64_t StartOffset = I.first.first; + uint64_t EndOffset = I.first.second; + + if (!Binary->offsetIsCode(StartOffset) || + !Binary->offsetIsTransfer(EndOffset)) { + InstNotBoundary++; + WarnInvalidRange(StartOffset, EndOffset, EndNotBoundaryMsg); + } + + auto *FRange = Binary->findFuncRangeForOffset(StartOffset); + if (!FRange) { + UnmatchedRange++; + WarnInvalidRange(StartOffset, EndOffset, DanglingRangeMsg); + continue; + } + + if (EndOffset >= FRange->EndOffset) { + RangeCrossFunc++; + WarnInvalidRange(StartOffset, EndOffset, RangeCrossFuncMsg); + } } + + uint64_t TotalRangeNum = Ranges.size(); + emitWarningSummary(InstNotBoundary, TotalRangeNum, + "of profiled ranges are not on instruction boundary."); + emitWarningSummary(UnmatchedRange, TotalRangeNum, + "of profiled ranges do not belong to any functions."); + emitWarningSummary(RangeCrossFunc, TotalRangeNum, + "of profiled ranges do cross function boundaries."); } void PerfScriptReader::parsePerfTraces() { @@ -1022,6 +1120,7 @@ void PerfScriptReader::parsePerfTraces() { // Generate unsymbolized profile. warnTruncatedStack(); + warnInvalidRange(); generateUnsymbolizedProfile(); if (SkipSymbolization) diff --git a/llvm/tools/llvm-profgen/PerfReader.h b/llvm/tools/llvm-profgen/PerfReader.h index c9f74313c166d..a4e2fccb43958 100644 --- a/llvm/tools/llvm-profgen/PerfReader.h +++ b/llvm/tools/llvm-profgen/PerfReader.h @@ -314,7 +314,12 @@ struct UnwindState { struct ContextKey { uint64_t HashCode = 0; virtual ~ContextKey() = default; - uint64_t getHashCode() const { return HashCode; } + uint64_t getHashCode() { + if (HashCode == 0) + genHashCode(); + return HashCode; + } + virtual void genHashCode() = 0; virtual bool isEqual(const ContextKey *K) const { return HashCode == K->HashCode; }; @@ -341,7 +346,9 @@ struct StringBasedCtxKey : public ContextKey { return Context == Other->Context; } - void genHashCode() { HashCode = hash_value(SampleContextFrames(Context)); } + void genHashCode() override { + HashCode = hash_value(SampleContextFrames(Context)); + } }; // Probe based context key as the intermediate key of context @@ -364,7 +371,7 @@ struct ProbeBasedCtxKey : public ContextKey { O->Probes.end()); } - void genHashCode() { + void genHashCode() override { for (const auto *P : Probes) { HashCode = hash_combine(HashCode, P); } @@ -581,10 +588,13 @@ class PerfScriptReader : public PerfReaderBase { void parseAndAggregateTrace(); // Parse either an MMAP event or a perf sample void parseEventOrSample(TraceStream &TraceIt); + void emitWarningSummary(uint64_t Num, uint64_t Total, StringRef Msg); // Warn if the relevant mmap event is missing. void warnIfMissingMMap(); // Emit accumulate warnings. void warnTruncatedStack(); + // Warn if range is invalid. + void warnInvalidRange(); // Extract call stack from the perf trace lines bool extractCallstack(TraceStream &TraceIt, SmallVectorImpl &CallStack); diff --git a/llvm/tools/llvm-profgen/ProfileGenerator.cpp b/llvm/tools/llvm-profgen/ProfileGenerator.cpp index 1e857a6d917de..d0c074355629b 100644 --- a/llvm/tools/llvm-profgen/ProfileGenerator.cpp +++ b/llvm/tools/llvm-profgen/ProfileGenerator.cpp @@ -36,6 +36,11 @@ static cl::opt PopulateProfileSymbolList( "populate-profile-symbol-list", cl::init(false), cl::Hidden, cl::desc("Populate profile symbol list (only meaningful for -extbinary)")); +static cl::opt FillZeroForAllFuncs( + "fill-zero-for-all-funcs", cl::init(false), cl::Hidden, + cl::desc("Attribute all functions' range with zero count " + "even it's not hit by any samples.")); + static cl::opt RecursionCompression( "compress-recursion", cl::desc("Compressing recursion by deduplicating adjacent frame " @@ -347,14 +352,22 @@ FunctionSamples &ProfileGenerator::getLeafFrameProfile( RangeSample ProfileGenerator::preprocessRangeCounter(const RangeSample &RangeCounter) { RangeSample Ranges(RangeCounter.begin(), RangeCounter.end()); - // For each range, we search for all ranges of the function it belongs to and - // initialize it with zero count, so it remains zero if doesn't hit any - // samples. This is to be consistent with compiler that interpret zero count - // as unexecuted(cold). - for (auto I : RangeCounter) { - uint64_t StartOffset = I.first.first; - for (const auto &Range : Binary->getRangesForOffset(StartOffset)) - Ranges[{Range.first, Range.second - 1}] += 0; + if (FillZeroForAllFuncs) { + for (auto &FuncI : Binary->getAllBinaryFunctions()) { + for (auto &R : FuncI.second.Ranges) { + Ranges[{R.first, R.second - 1}] += 0; + } + } + } else { + // For each range, we search for all ranges of the function it belongs to + // and initialize it with zero count, so it remains zero if doesn't hit any + // samples. This is to be consistent with compiler that interpret zero count + // as unexecuted(cold). + for (auto I : RangeCounter) { + uint64_t StartOffset = I.first.first; + for (const auto &Range : Binary->getRangesForOffset(StartOffset)) + Ranges[{Range.first, Range.second - 1}] += 0; + } } RangeSample DisjointRanges; findDisjointRanges(DisjointRanges, Ranges); @@ -372,7 +385,10 @@ void ProfileGenerator::populateBodySamplesForAllFunctions( // Disjoint ranges may have range in the middle of two instr, // e.g. If Instr1 at Addr1, and Instr2 at Addr2, disjoint range // can be Addr1+1 to Addr2-1. We should ignore such range. - while (IP.Address <= RangeEnd) { + if (IP.Address > RangeEnd) + continue; + + do { uint64_t Offset = Binary->virtualAddrToOffset(IP.Address); const SampleContextFrameVector &FrameVec = Binary->getFrameLocationStack(Offset); @@ -381,9 +397,7 @@ void ProfileGenerator::populateBodySamplesForAllFunctions( updateBodySamplesforFunctionProfile(FunctionProfile, FrameVec.back(), Count); } - // Move to next IP within the range. - IP.advance(); - } + } while (IP.advance() && IP.Address <= RangeEnd); } } @@ -525,17 +539,17 @@ void CSProfileGenerator::populateBodySamplesForFunction( // Disjoint ranges may have range in the middle of two instr, // e.g. If Instr1 at Addr1, and Instr2 at Addr2, disjoint range // can be Addr1+1 to Addr2-1. We should ignore such range. - while (IP.Address <= RangeEnd) { + if (IP.Address > RangeEnd) + continue; + + do { uint64_t Offset = Binary->virtualAddrToOffset(IP.Address); auto LeafLoc = Binary->getInlineLeafFrameLoc(Offset); if (LeafLoc.hasValue()) { // Recording body sample for this specific context updateBodySamplesforFunctionProfile(FunctionProfile, *LeafLoc, Count); } - - // Move to next IP within the range - IP.advance(); - } + } while (IP.advance() && IP.Address <= RangeEnd); } } @@ -643,7 +657,7 @@ void CSProfileGenerator::postProcessProfiles() { CSProfMergeColdContext = false; } - // Trim and merge cold context profile using cold threshold above. + // Trim and merge cold context profile using cold threshold above. if (CSProfTrimColdContext || CSProfMergeColdContext) { SampleContextTrimmer(ProfileMap) .trimAndMergeColdContextProfiles( @@ -701,14 +715,13 @@ void CSProfileGenerator::extractProbesFromRange(const RangeSample &RangeCounter, continue; InstructionPointer IP(Binary, RangeBegin, true); - // Disjoint ranges may have range in the middle of two instr, // e.g. If Instr1 at Addr1, and Instr2 at Addr2, disjoint range // can be Addr1+1 to Addr2-1. We should ignore such range. if (IP.Address > RangeEnd) continue; - while (IP.Address <= RangeEnd) { + do { const AddressProbesMap &Address2ProbesMap = Binary->getAddress2ProbesMap(); auto It = Address2ProbesMap.find(IP.Address); @@ -719,9 +732,7 @@ void CSProfileGenerator::extractProbesFromRange(const RangeSample &RangeCounter, ProbeCounter[&Probe] += Count; } } - - IP.advance(); - } + } while (IP.advance() && IP.Address <= RangeEnd); } } diff --git a/llvm/tools/llvm-profgen/ProfiledBinary.cpp b/llvm/tools/llvm-profgen/ProfiledBinary.cpp index bf5c914c5daa8..4fe737eb96c68 100644 --- a/llvm/tools/llvm-profgen/ProfiledBinary.cpp +++ b/llvm/tools/llvm-profgen/ProfiledBinary.cpp @@ -187,7 +187,7 @@ void ProfiledBinary::load() { // Use function start and return address to infer prolog and epilog ProEpilogTracker.inferPrologOffsets(StartOffset2FuncRangeMap); - ProEpilogTracker.inferEpilogOffsets(RetAddrs); + ProEpilogTracker.inferEpilogOffsets(RetOffsets); // TODO: decode other sections. } @@ -397,9 +397,11 @@ bool ProfiledBinary::dissassembleSymbol(std::size_t SI, ArrayRef Bytes, // Populate address maps. CodeAddrOffsets.push_back(Offset); if (MCDesc.isCall()) - CallAddrs.insert(Offset); + CallOffsets.insert(Offset); else if (MCDesc.isReturn()) - RetAddrs.insert(Offset); + RetOffsets.insert(Offset); + else if (MCDesc.isBranch()) + BranchOffsets.insert(Offset); if (InvalidInstLength) { WarnInvalidInsts(Offset - InvalidInstLength, Offset - 1); @@ -655,13 +657,19 @@ SampleContextFrameVector ProfiledBinary::symbolize(const InstructionPointer &IP, void ProfiledBinary::computeInlinedContextSizeForRange(uint64_t StartOffset, uint64_t EndOffset) { - uint32_t Index = getIndexForOffset(StartOffset); - if (CodeAddrOffsets[Index] != StartOffset) + uint64_t RangeBegin = offsetToVirtualAddr(StartOffset); + uint64_t RangeEnd = offsetToVirtualAddr(EndOffset); + InstructionPointer IP(this, RangeBegin, true); + + if (IP.Address != RangeBegin) WithColor::warning() << "Invalid start instruction at " - << format("%8" PRIx64, StartOffset) << "\n"; + << format("%8" PRIx64, RangeBegin) << "\n"; + + if (IP.Address >= RangeEnd) + return; - uint64_t Offset = CodeAddrOffsets[Index]; - while (Offset < EndOffset) { + do { + uint64_t Offset = virtualAddrToOffset(IP.Address); const SampleContextFrameVector &SymbolizedCallStack = getFrameLocationStack(Offset, UsePseudoProbes); uint64_t Size = Offset2InstSizeMap[Offset]; @@ -669,8 +677,7 @@ void ProfiledBinary::computeInlinedContextSizeForRange(uint64_t StartOffset, // Record instruction size for the corresponding context FuncSizeTracker.addInstructionForContext(SymbolizedCallStack, Size); - Offset = CodeAddrOffsets[++Index]; - } + } while (IP.advance() && IP.Address < RangeEnd); } InstructionPointer::InstructionPointer(const ProfiledBinary *Binary, @@ -680,18 +687,31 @@ InstructionPointer::InstructionPointer(const ProfiledBinary *Binary, if (RoundToNext) { // we might get address which is not the code // it should round to the next valid address - this->Address = Binary->getAddressforIndex(Index); + if (Index >= Binary->getCodeOffsetsSize()) + this->Address = UINT64_MAX; + else + this->Address = Binary->getAddressforIndex(Index); } } -void InstructionPointer::advance() { +bool InstructionPointer::advance() { Index++; + if (Index >= Binary->getCodeOffsetsSize()) { + Address = UINT64_MAX; + return false; + } Address = Binary->getAddressforIndex(Index); + return true; } -void InstructionPointer::backward() { +bool InstructionPointer::backward() { + if (Index == 0) { + Address = 0; + return false; + } Index--; Address = Binary->getAddressforIndex(Index); + return true; } void InstructionPointer::update(uint64_t Addr) { diff --git a/llvm/tools/llvm-profgen/ProfiledBinary.h b/llvm/tools/llvm-profgen/ProfiledBinary.h index d6e7ba81fef19..11a8f9b343cc4 100644 --- a/llvm/tools/llvm-profgen/ProfiledBinary.h +++ b/llvm/tools/llvm-profgen/ProfiledBinary.h @@ -64,8 +64,8 @@ struct InstructionPointer { uint64_t Index = 0; InstructionPointer(const ProfiledBinary *Binary, uint64_t Address, bool RoundToNext = false); - void advance(); - void backward(); + bool advance(); + bool backward(); void update(uint64_t Addr); }; @@ -73,6 +73,7 @@ using RangesTy = std::vector>; struct BinaryFunction { StringRef FuncName; + // End of range is an exclusive bound. RangesTy Ranges; }; @@ -80,7 +81,7 @@ struct BinaryFunction { // non-continuous ranges, each range corresponds to one FuncRange. struct FuncRange { uint64_t StartOffset; - // EndOffset is a exclusive bound. + // EndOffset is an exclusive bound. uint64_t EndOffset; // Function the range belongs to BinaryFunction *Func; @@ -105,7 +106,8 @@ struct PrologEpilogTracker { for (auto I : FuncStartOffsetMap) { PrologEpilogSet.insert(I.first); InstructionPointer IP(Binary, I.first); - IP.advance(); + if (!IP.advance()) + break; PrologEpilogSet.insert(IP.Offset); } } @@ -115,7 +117,8 @@ struct PrologEpilogTracker { for (auto Addr : RetAddrs) { PrologEpilogSet.insert(Addr); InstructionPointer IP(Binary, Addr); - IP.backward(); + if (!IP.backward()) + break; PrologEpilogSet.insert(IP.Offset); } } @@ -204,9 +207,11 @@ class ProfiledBinary { // sorting is needed to fast advance to the next forward/backward instruction. std::vector CodeAddrOffsets; // A set of call instruction offsets. Used by virtual unwinding. - std::unordered_set CallAddrs; + std::unordered_set CallOffsets; // A set of return instruction offsets. Used by virtual unwinding. - std::unordered_set RetAddrs; + std::unordered_set RetOffsets; + // A set of branch instruction offsets. + std::unordered_set BranchOffsets; // Estimate and track function prolog and epilog ranges. PrologEpilogTracker ProEpilogTracker; @@ -305,27 +310,37 @@ class ProfiledBinary { return TextSegmentOffsets; } + bool offsetIsCode(uint64_t Offset) const { + return Offset2InstSizeMap.find(Offset) != Offset2InstSizeMap.end(); + } bool addressIsCode(uint64_t Address) const { uint64_t Offset = virtualAddrToOffset(Address); - return Offset2InstSizeMap.find(Offset) != Offset2InstSizeMap.end(); + return offsetIsCode(Offset); } bool addressIsCall(uint64_t Address) const { uint64_t Offset = virtualAddrToOffset(Address); - return CallAddrs.count(Offset); + return CallOffsets.count(Offset); } bool addressIsReturn(uint64_t Address) const { uint64_t Offset = virtualAddrToOffset(Address); - return RetAddrs.count(Offset); + return RetOffsets.count(Offset); } bool addressInPrologEpilog(uint64_t Address) const { uint64_t Offset = virtualAddrToOffset(Address); return ProEpilogTracker.PrologEpilogSet.count(Offset); } + bool offsetIsTransfer(uint64_t Offset) { + return BranchOffsets.count(Offset) || RetOffsets.count(Offset) || + CallOffsets.count(Offset); + } + uint64_t getAddressforIndex(uint64_t Index) const { return offsetToVirtualAddr(CodeAddrOffsets[Index]); } + size_t getCodeOffsetsSize() const { return CodeAddrOffsets.size(); } + bool usePseudoProbes() const { return UsePseudoProbes; } // Get the index in CodeAddrOffsets for the address // As we might get an address which is not the code @@ -378,6 +393,11 @@ class ProfiledBinary { return FRange->Func->Ranges; } + const std::unordered_map & + getAllBinaryFunctions() { + return BinaryFunctions; + } + uint32_t getFuncSizeForContext(SampleContext &Context) { return FuncSizeTracker.getFuncSizeForContext(Context); } diff --git a/llvm/tools/llvm-readobj/ELFDumper.cpp b/llvm/tools/llvm-readobj/ELFDumper.cpp index f47cdc841a7b6..4abea0b1d23d9 100644 --- a/llvm/tools/llvm-readobj/ELFDumper.cpp +++ b/llvm/tools/llvm-readobj/ELFDumper.cpp @@ -2298,6 +2298,8 @@ std::string ELFDumper::getDynamicEntry(uint64_t Type, case DT_INIT_ARRAYSZ: case DT_FINI_ARRAYSZ: case DT_PREINIT_ARRAYSZ: + case DT_RELRSZ: + case DT_RELRENT: case DT_ANDROID_RELSZ: case DT_ANDROID_RELASZ: return std::to_string(Value) + " (bytes)"; @@ -5331,6 +5333,14 @@ const NoteType FreeBSDNoteTypes[] = { "NT_FREEBSD_FEATURE_CTL (FreeBSD feature control)"}, }; +const NoteType OpenBSDCoreNoteTypes[] = { + {ELF::NT_OPENBSD_PROCINFO, "NT_OPENBSD_PROCINFO (procinfo structure)"}, + {ELF::NT_OPENBSD_AUXV, "NT_OPENBSD_AUXV (ELF auxiliary vector data)"}, + {ELF::NT_OPENBSD_REGS, "NT_OPENBSD_REGS (regular registers)"}, + {ELF::NT_OPENBSD_FPREGS, "NT_OPENBSD_FPREGS (floating point registers)"}, + {ELF::NT_OPENBSD_WCOOKIE, "NT_OPENBSD_WCOOKIE (window cookie)"}, +}; + const NoteType AMDNoteTypes[] = { {ELF::NT_AMD_HSA_CODE_OBJECT_VERSION, "NT_AMD_HSA_CODE_OBJECT_VERSION (AMD HSA Code Object Version)"}, @@ -5443,6 +5453,13 @@ StringRef getNoteTypeName(const typename ELFT::Note &Note, unsigned ELFType) { return FindNote(FreeBSDNoteTypes); } } + if (Name.startswith("OpenBSD") && ELFType == ELF::ET_CORE) { + // OpenBSD also places the generic core notes in the OpenBSD namespace. + StringRef Result = FindNote(OpenBSDCoreNoteTypes); + if (!Result.empty()) + return Result; + return FindNote(CoreNoteTypes); + } if (Name == "AMD") return FindNote(AMDNoteTypes); if (Name == "AMDGPU") @@ -6879,14 +6896,14 @@ template void LLVMELFDumper::printBBAddrMaps() { FunctionSec = unwrapOrError(this->FileName, this->Obj.getSection(Sec.sh_link)); ListScope L(W, "BBAddrMap"); - Expected> BBAddrMapOrErr = + Expected> BBAddrMapOrErr = this->Obj.decodeBBAddrMap(Sec); if (!BBAddrMapOrErr) { this->reportUniqueWarning("unable to dump " + this->describe(Sec) + ": " + toString(BBAddrMapOrErr.takeError())); continue; } - for (const Elf_BBAddrMap &AM : *BBAddrMapOrErr) { + for (const BBAddrMap &AM : *BBAddrMapOrErr) { DictScope D(W, "Function"); W.printHex("At", AM.Addr); SmallVector FuncSymIndex = @@ -6901,7 +6918,7 @@ template void LLVMELFDumper::printBBAddrMaps() { W.printString("Name", FuncName); ListScope L(W, "BB entries"); - for (const typename Elf_BBAddrMap::BBEntry &BBE : AM.BBEntries) { + for (const BBAddrMap::BBEntry &BBE : AM.BBEntries) { DictScope L(W); W.printHex("Offset", BBE.Offset); W.printHex("Size", BBE.Size); diff --git a/llvm/tools/llvm-readobj/XCOFFDumper.cpp b/llvm/tools/llvm-readobj/XCOFFDumper.cpp index c67edd6a98bce..38e459cd5425b 100644 --- a/llvm/tools/llvm-readobj/XCOFFDumper.cpp +++ b/llvm/tools/llvm-readobj/XCOFFDumper.cpp @@ -48,6 +48,7 @@ class XCOFFDumper : public ObjDumper { void printCsectAuxEnt(XCOFFCsectAuxRef AuxEntRef); void printSectAuxEntForStat(const XCOFFSectAuxEntForStat *AuxEntPtr); void printSymbol(const SymbolRef &); + template void printRelocation(RelTy Reloc); template void printRelocations(ArrayRef Sections); void printAuxiliaryHeader(const XCOFFAuxiliaryHeader32 *AuxHeader); @@ -136,11 +137,33 @@ const EnumEntry RelocationTypeNameclass[] = { #undef ECase }; +template void XCOFFDumper::printRelocation(RelTy Reloc) { + Expected ErrOrSymbolName = + Obj.getSymbolNameByIndex(Reloc.SymbolIndex); + if (Error E = ErrOrSymbolName.takeError()) { + reportUniqueWarning(std::move(E)); + return; + } + StringRef SymbolName = *ErrOrSymbolName; + StringRef RelocName = XCOFF::getRelocationTypeString(Reloc.Type); + if (opts::ExpandRelocs) { + DictScope Group(W, "Relocation"); + W.printHex("Virtual Address", Reloc.VirtualAddress); + W.printNumber("Symbol", SymbolName, Reloc.SymbolIndex); + W.printString("IsSigned", Reloc.isRelocationSigned() ? "Yes" : "No"); + W.printNumber("FixupBitValue", Reloc.isFixupIndicated() ? 1 : 0); + W.printNumber("Length", Reloc.getRelocatedLength()); + W.printEnum("Type", (uint8_t)Reloc.Type, + makeArrayRef(RelocationTypeNameclass)); + } else { + raw_ostream &OS = W.startLine(); + OS << W.hex(Reloc.VirtualAddress) << " " << RelocName << " " << SymbolName + << "(" << Reloc.SymbolIndex << ") " << W.hex(Reloc.Info) << "\n"; + } +} + template void XCOFFDumper::printRelocations(ArrayRef Sections) { - if (!opts::ExpandRelocs) - report_fatal_error("Unexpanded relocation output not implemented."); - ListScope LS(W, "Relocations"); uint16_t Index = 0; for (const Shdr &Sec : Sections) { @@ -161,24 +184,11 @@ void XCOFFDumper::printRelocations(ArrayRef Sections) { W.startLine() << "Section (index: " << Index << ") " << Sec.getName() << " {\n"; - for (const RelTy Reloc : Relocations) { - Expected ErrOrSymbolName = - Obj.getSymbolNameByIndex(Reloc.SymbolIndex); - if (Error E = ErrOrSymbolName.takeError()) { - reportUniqueWarning(std::move(E)); - continue; - } + W.indent(); + + for (const RelTy Reloc : Relocations) + printRelocation(Reloc); - StringRef SymbolName = *ErrOrSymbolName; - DictScope RelocScope(W, "Relocation"); - W.printHex("Virtual Address", Reloc.VirtualAddress); - W.printNumber("Symbol", SymbolName, Reloc.SymbolIndex); - W.printString("IsSigned", Reloc.isRelocationSigned() ? "Yes" : "No"); - W.printNumber("FixupBitValue", Reloc.isFixupIndicated() ? 1 : 0); - W.printNumber("Length", Reloc.getRelocatedLength()); - W.printEnum("Type", (uint8_t)Reloc.Type, - makeArrayRef(RelocationTypeNameclass)); - } W.unindent(); W.startLine() << "}\n"; } diff --git a/llvm/tools/llvm-reduce/CMakeLists.txt b/llvm/tools/llvm-reduce/CMakeLists.txt index eb842fccddaf2..911cb6a94eae4 100644 --- a/llvm/tools/llvm-reduce/CMakeLists.txt +++ b/llvm/tools/llvm-reduce/CMakeLists.txt @@ -3,8 +3,11 @@ set(LLVM_LINK_COMPONENTS AllTargetsCodeGens AllTargetsDescs AllTargetsInfos + CodeGen Core IRReader + MC + MIRParser Support Target TransformUtils @@ -12,6 +15,7 @@ set(LLVM_LINK_COMPONENTS add_llvm_tool(llvm-reduce DeltaManager.cpp + ReducerWorkItem.cpp TestRunner.cpp deltas/Delta.cpp deltas/ReduceAliases.cpp @@ -20,6 +24,7 @@ add_llvm_tool(llvm-reduce deltas/ReduceBasicBlocks.cpp deltas/ReduceFunctionBodies.cpp deltas/ReduceFunctions.cpp + deltas/ReduceGlobalObjects.cpp deltas/ReduceGlobalValues.cpp deltas/ReduceGlobalVarInitializers.cpp deltas/ReduceGlobalVars.cpp @@ -30,6 +35,7 @@ add_llvm_tool(llvm-reduce deltas/ReduceSpecialGlobals.cpp deltas/ReduceOperands.cpp deltas/ReduceOperandsToArgs.cpp + deltas/ReduceInstructionsMIR.cpp llvm-reduce.cpp DEPENDS diff --git a/llvm/tools/llvm-reduce/DeltaManager.cpp b/llvm/tools/llvm-reduce/DeltaManager.cpp index dab404094629d..8c2ad91d7bcd7 100644 --- a/llvm/tools/llvm-reduce/DeltaManager.cpp +++ b/llvm/tools/llvm-reduce/DeltaManager.cpp @@ -20,10 +20,12 @@ #include "deltas/ReduceBasicBlocks.h" #include "deltas/ReduceFunctionBodies.h" #include "deltas/ReduceFunctions.h" +#include "deltas/ReduceGlobalObjects.h" #include "deltas/ReduceGlobalValues.h" #include "deltas/ReduceGlobalVarInitializers.h" #include "deltas/ReduceGlobalVars.h" #include "deltas/ReduceInstructions.h" +#include "deltas/ReduceInstructionsMIR.h" #include "deltas/ReduceMetadata.h" #include "deltas/ReduceModuleData.h" #include "deltas/ReduceOperandBundles.h" @@ -46,6 +48,7 @@ static cl::opt DELTA_PASS("functions", reduceFunctionsDeltaPass) \ DELTA_PASS("basic-blocks", reduceBasicBlocksDeltaPass) \ DELTA_PASS("global-values", reduceGlobalValuesDeltaPass) \ + DELTA_PASS("global-objects", reduceGlobalObjectsDeltaPass) \ DELTA_PASS("global-initializers", reduceGlobalsInitializersDeltaPass) \ DELTA_PASS("global-variables", reduceGlobalsDeltaPass) \ DELTA_PASS("metadata", reduceMetadataDeltaPass) \ @@ -59,9 +62,16 @@ static cl::opt DELTA_PASS("attributes", reduceAttributesDeltaPass) \ DELTA_PASS("module-data", reduceModuleDataDeltaPass) +#define DELTA_PASSES_MIR \ + DELTA_PASS("instructions", reduceInstructionsMIRDeltaPass) + static void runAllDeltaPasses(TestRunner &Tester) { #define DELTA_PASS(NAME, FUNC) FUNC(Tester); - DELTA_PASSES + if (Tester.getProgram().isMIR()) { + DELTA_PASSES_MIR + } else { + DELTA_PASSES + } #undef DELTA_PASS } @@ -71,7 +81,11 @@ static void runDeltaPassName(TestRunner &Tester, StringRef PassName) { FUNC(Tester); \ return; \ } - DELTA_PASSES + if (Tester.getProgram().isMIR()) { + DELTA_PASSES_MIR + } else { + DELTA_PASSES + } #undef DELTA_PASS errs() << "unknown pass \"" << PassName << "\""; exit(1); @@ -80,7 +94,10 @@ static void runDeltaPassName(TestRunner &Tester, StringRef PassName) { void llvm::printDeltaPasses(raw_ostream &OS) { OS << "Delta passes (pass to `--delta-passes=` as a comma separated list):\n"; #define DELTA_PASS(NAME, FUNC) OS << " " << NAME << "\n"; + OS << " IR:\n"; DELTA_PASSES + OS << " MIR:\n"; + DELTA_PASSES_MIR #undef DELTA_PASS } diff --git a/llvm/tools/llvm-reduce/ReducerWorkItem.cpp b/llvm/tools/llvm-reduce/ReducerWorkItem.cpp new file mode 100644 index 0000000000000..e3845e8b897a7 --- /dev/null +++ b/llvm/tools/llvm-reduce/ReducerWorkItem.cpp @@ -0,0 +1,174 @@ +//===- ReducerWorkItem.cpp - Wrapper for Module and MachineFunction -------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "ReducerWorkItem.h" +#include "llvm/CodeGen/MIRParser/MIRParser.h" +#include "llvm/CodeGen/MIRPrinter.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/IR/Verifier.h" +#include "llvm/IRReader/IRReader.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Transforms/Utils/Cloning.h" + +static std::unique_ptr cloneMF(MachineFunction *SrcMF) { + auto DstMF = std::make_unique( + SrcMF->getFunction(), SrcMF->getTarget(), SrcMF->getSubtarget(), + SrcMF->getFunctionNumber(), SrcMF->getMMI()); + DenseMap Src2DstMBB; + DenseMap Src2DstReg; + + auto *SrcMRI = &SrcMF->getRegInfo(); + auto *DstMRI = &DstMF->getRegInfo(); + + // Create vregs. + for (auto &SrcMBB : *SrcMF) { + for (auto &SrcMI : SrcMBB) { + for (unsigned I = 0, E = SrcMI.getNumOperands(); I < E; ++I) { + auto &DMO = SrcMI.getOperand(I); + if (!DMO.isReg() || !DMO.isDef()) + continue; + Register SrcReg = DMO.getReg(); + if (Register::isPhysicalRegister(SrcReg)) + continue; + auto SrcRC = SrcMRI->getRegClass(SrcReg); + auto DstReg = DstMRI->createVirtualRegister(SrcRC); + Src2DstReg[SrcReg] = DstReg; + } + } + } + + // Clone blocks. + for (auto &SrcMBB : *SrcMF) + Src2DstMBB[&SrcMBB] = DstMF->CreateMachineBasicBlock(); + // Link blocks. + for (auto &SrcMBB : *SrcMF) { + auto *DstMBB = Src2DstMBB[&SrcMBB]; + DstMF->push_back(DstMBB); + for (auto It = SrcMBB.succ_begin(), IterEnd = SrcMBB.succ_end(); + It != IterEnd; ++It) { + auto *SrcSuccMBB = *It; + auto *DstSuccMBB = Src2DstMBB[SrcSuccMBB]; + DstMBB->addSuccessor(DstSuccMBB); + } + for (auto &LI : SrcMBB.liveins()) + DstMBB->addLiveIn(LI); + } + // Clone instructions. + for (auto &SrcMBB : *SrcMF) { + auto *DstMBB = Src2DstMBB[&SrcMBB]; + for (auto &SrcMI : SrcMBB) { + const auto &MCID = + DstMF->getSubtarget().getInstrInfo()->get(SrcMI.getOpcode()); + auto *DstMI = DstMF->CreateMachineInstr(MCID, SrcMI.getDebugLoc(), + /*NoImplicit=*/true); + DstMBB->push_back(DstMI); + for (auto &SrcMO : SrcMI.operands()) { + MachineOperand DstMO(SrcMO); + DstMO.clearParent(); + // Update vreg. + if (DstMO.isReg() && Src2DstReg.count(DstMO.getReg())) { + DstMO.setReg(Src2DstReg[DstMO.getReg()]); + } + // Update MBB. + if (DstMO.isMBB()) { + DstMO.setMBB(Src2DstMBB[DstMO.getMBB()]); + } + DstMI->addOperand(DstMO); + } + DstMI->setMemRefs(*DstMF, SrcMI.memoperands()); + } + } + + DstMF->verify(nullptr, "", /*AbortOnErrors=*/true); + return DstMF; +} + +std::unique_ptr parseReducerWorkItem(StringRef Filename, + LLVMContext &Ctxt, + MachineModuleInfo *MMI) { + auto MMM = std::make_unique(); + if (MMI) { + auto FileOrErr = MemoryBuffer::getFileOrSTDIN(Filename, /*IsText=*/true); + std::unique_ptr MParser = + createMIRParser(std::move(FileOrErr.get()), Ctxt); + + auto SetDataLayout = + [&](StringRef DataLayoutTargetTriple) -> Optional { + return MMI->getTarget().createDataLayout().getStringRepresentation(); + }; + + std::unique_ptr M = MParser->parseIRModule(SetDataLayout); + MParser->parseMachineFunctions(*M, *MMI); + MachineFunction *MF = nullptr; + for (auto &F : *M) { + if (auto *MF4F = MMI->getMachineFunction(F)) { + // XXX: Maybe it would not be a lot of effort to handle multiple MFs by + // simply storing them in a ReducerWorkItem::SmallVector or similar. The + // single MF use-case seems a lot more common though so that will do for + // now. + assert(!MF && "Only single MF supported!"); + MF = MF4F; + } + } + assert(MF && "No MF found!"); + + MMM->M = std::move(M); + MMM->MF = cloneMF(MF); + } else { + SMDiagnostic Err; + std::unique_ptr Result = parseIRFile(Filename, Err, Ctxt); + if (!Result) { + Err.print("llvm-reduce", errs()); + return std::unique_ptr(); + } + MMM->M = std::move(Result); + } + if (verifyReducerWorkItem(*MMM, &errs())) { + errs() << "Error: " << Filename << " - input module is broken!\n"; + return std::unique_ptr(); + } + return MMM; +} + +std::unique_ptr +cloneReducerWorkItem(const ReducerWorkItem &MMM) { + auto CloneMMM = std::make_unique(); + if (MMM.MF) { + // Note that we cannot clone the Module as then we would need a way to + // updated the cloned MachineFunction's IR references. + // XXX: Actually have a look at + // std::unique_ptr CloneModule(const Module &M, ValueToValueMapTy + // &VMap); + CloneMMM->M = MMM.M; + CloneMMM->MF = cloneMF(MMM.MF.get()); + } else { + CloneMMM->M = CloneModule(*MMM.M); + } + return CloneMMM; +} + +bool verifyReducerWorkItem(const ReducerWorkItem &MMM, raw_fd_ostream *OS) { + if (verifyModule(*MMM.M, OS)) + return true; + if (MMM.MF && !MMM.MF->verify(nullptr, "", /*AbortOnErrors=*/false)) + return true; + return false; +} + +void ReducerWorkItem::print(raw_ostream &ROS, void *p) const { + if (MF) { + printMIR(ROS, *M); + printMIR(ROS, *MF); + } else { + M->print(ROS, nullptr); + } +} diff --git a/llvm/tools/llvm-reduce/ReducerWorkItem.h b/llvm/tools/llvm-reduce/ReducerWorkItem.h new file mode 100644 index 0000000000000..a86e158d916fe --- /dev/null +++ b/llvm/tools/llvm-reduce/ReducerWorkItem.h @@ -0,0 +1,37 @@ +//===- ReducerWorkItem.h - Wrapper for Module and MachineFunction ---------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TOOLS_LLVM_REDUCE_REDUCERWORKITEM_H +#define LLVM_TOOLS_LLVM_REDUCE_REDUCERWORKITEM_H + +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/IR/Module.h" + +using namespace llvm; + +class ReducerWorkItem { +public: + std::shared_ptr M; + std::unique_ptr MF; + void print(raw_ostream &ROS, void *p = nullptr) const; + bool isMIR() { return MF != nullptr; } + operator Module &() const { return *M; } + operator MachineFunction &() const { return *MF; } +}; + +std::unique_ptr parseReducerWorkItem(StringRef Filename, + LLVMContext &Ctxt, + MachineModuleInfo *MMI); + +std::unique_ptr +cloneReducerWorkItem(const ReducerWorkItem &MMM); + +bool verifyReducerWorkItem(const ReducerWorkItem &MMM, raw_fd_ostream *OS); + +#endif diff --git a/llvm/tools/llvm-reduce/TestRunner.cpp b/llvm/tools/llvm-reduce/TestRunner.cpp index a3cd717ccd986..e8c12138936fa 100644 --- a/llvm/tools/llvm-reduce/TestRunner.cpp +++ b/llvm/tools/llvm-reduce/TestRunner.cpp @@ -12,7 +12,7 @@ using namespace llvm; TestRunner::TestRunner(StringRef TestName, const std::vector &TestArgs, - std::unique_ptr Program) + std::unique_ptr Program) : TestName(TestName), TestArgs(TestArgs), Program(std::move(Program)) { assert(this->Program && "Initialized with null program?"); } diff --git a/llvm/tools/llvm-reduce/TestRunner.h b/llvm/tools/llvm-reduce/TestRunner.h index 6edc1a1192fa7..c14d0459a0fb4 100644 --- a/llvm/tools/llvm-reduce/TestRunner.h +++ b/llvm/tools/llvm-reduce/TestRunner.h @@ -9,6 +9,7 @@ #ifndef LLVM_TOOLS_LLVM_REDUCE_TESTRUNNER_H #define LLVM_TOOLS_LLVM_REDUCE_TESTRUNNER_H +#include "ReducerWorkItem.h" #include "llvm/ADT/SmallString.h" #include "llvm/IR/Module.h" #include "llvm/Support/Error.h" @@ -25,16 +26,16 @@ namespace llvm { class TestRunner { public: TestRunner(StringRef TestName, const std::vector &TestArgs, - std::unique_ptr Program); + std::unique_ptr Program); /// Runs the interesting-ness test for the specified file /// @returns 0 if test was successful, 1 if otherwise int run(StringRef Filename); /// Returns the most reduced version of the original testcase - Module &getProgram() const { return *Program; } + ReducerWorkItem &getProgram() const { return *Program; } - void setProgram(std::unique_ptr P) { + void setProgram(std::unique_ptr P) { assert(P && "Setting null program?"); Program = std::move(P); } @@ -42,7 +43,7 @@ class TestRunner { private: StringRef TestName; const std::vector &TestArgs; - std::unique_ptr Program; + std::unique_ptr Program; }; } // namespace llvm diff --git a/llvm/tools/llvm-reduce/deltas/Delta.cpp b/llvm/tools/llvm-reduce/deltas/Delta.cpp index f14b326c7e0bd..cabbe9513e5de 100644 --- a/llvm/tools/llvm-reduce/deltas/Delta.cpp +++ b/llvm/tools/llvm-reduce/deltas/Delta.cpp @@ -13,11 +13,11 @@ //===----------------------------------------------------------------------===// #include "Delta.h" +#include "ReducerWorkItem.h" #include "llvm/ADT/STLExtras.h" #include "llvm/IR/Verifier.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ToolOutputFile.h" -#include "llvm/Transforms/Utils/Cloning.h" #include #include @@ -27,13 +27,14 @@ static cl::opt AbortOnInvalidReduction( "abort-on-invalid-reduction", cl::desc("Abort if any reduction results in invalid IR")); -void writeOutput(llvm::Module &M, llvm::StringRef Message); +void writeOutput(ReducerWorkItem &M, llvm::StringRef Message); -bool isReduced(Module &M, TestRunner &Test, SmallString<128> &CurrentFilepath) { - // Write Module to tmp file +bool isReduced(ReducerWorkItem &M, TestRunner &Test, + SmallString<128> &CurrentFilepath) { + // Write ReducerWorkItem to tmp file int FD; - std::error_code EC = - sys::fs::createTemporaryFile("llvm-reduce", "ll", FD, CurrentFilepath); + std::error_code EC = sys::fs::createTemporaryFile( + "llvm-reduce", M.isMIR() ? "mir" : "ll", FD, CurrentFilepath); if (EC) { errs() << "Error making unique filename: " << EC.message() << "!\n"; exit(1); @@ -95,9 +96,10 @@ static bool increaseGranularity(std::vector &Chunks) { /// Runs the Delta Debugging algorithm, splits the code into chunks and /// reduces the amount of chunks that are considered interesting by the /// given test. -void llvm::runDeltaPass( +template +void runDeltaPassInt( TestRunner &Test, int Targets, - function_ref ExtractChunksFromModule) { + function_ref ExtractChunksFromModule) { assert(Targets >= 0); if (!Targets) { errs() << "\nNothing to reduce\n"; @@ -110,11 +112,11 @@ void llvm::runDeltaPass( exit(1); } - assert(!verifyModule(Test.getProgram(), &errs()) && + assert(!verifyReducerWorkItem(Test.getProgram(), &errs()) && "input module is broken before making changes"); std::vector ChunksStillConsideredInteresting = {{1, Targets}}; - std::unique_ptr ReducedProgram; + std::unique_ptr ReducedProgram; bool FoundAtLeastOneNewUninterestingChunkWithCurrentGranularity; do { @@ -137,13 +139,14 @@ void llvm::runDeltaPass( }); // Clone module before hacking it up.. - std::unique_ptr Clone = CloneModule(Test.getProgram()); + std::unique_ptr Clone = + cloneReducerWorkItem(Test.getProgram()); // Generate Module with only Targets inside Current Chunks Oracle O(CurrentChunks); ExtractChunksFromModule(O, *Clone); // Some reductions may result in invalid IR. Skip such reductions. - if (verifyModule(*Clone, &errs())) { + if (verifyReducerWorkItem(*Clone, &errs())) { if (AbortOnInvalidReduction) { errs() << "Invalid reduction\n"; exit(1); @@ -185,3 +188,15 @@ void llvm::runDeltaPass( Test.setProgram(std::move(ReducedProgram)); errs() << "Couldn't increase anymore.\n"; } + +void llvm::runDeltaPass( + TestRunner &Test, int Targets, + function_ref ExtractChunksFromModule) { + runDeltaPassInt(Test, Targets, ExtractChunksFromModule); +} + +void llvm::runDeltaPass( + TestRunner &Test, int Targets, + function_ref ExtractChunksFromModule) { + runDeltaPassInt(Test, Targets, ExtractChunksFromModule); +} diff --git a/llvm/tools/llvm-reduce/deltas/Delta.h b/llvm/tools/llvm-reduce/deltas/Delta.h index 8ebfc9aca4d95..00829ea1ae810 100644 --- a/llvm/tools/llvm-reduce/deltas/Delta.h +++ b/llvm/tools/llvm-reduce/deltas/Delta.h @@ -104,6 +104,9 @@ class Oracle { void runDeltaPass( TestRunner &Test, int Targets, function_ref ExtractChunksFromModule); +void runDeltaPass( + TestRunner &Test, int Targets, + function_ref ExtractChunksFromModule); } // namespace llvm #endif diff --git a/llvm/tools/llvm-reduce/deltas/ReduceGlobalObjects.cpp b/llvm/tools/llvm-reduce/deltas/ReduceGlobalObjects.cpp new file mode 100644 index 0000000000000..cd7627309b555 --- /dev/null +++ b/llvm/tools/llvm-reduce/deltas/ReduceGlobalObjects.cpp @@ -0,0 +1,43 @@ +//===- ReduceGlobalObjects.cpp --------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "ReduceGlobalObjects.h" +#include "llvm/IR/GlobalObject.h" + +using namespace llvm; + +static bool shouldReduceSection(GlobalObject &GO) { return GO.hasSection(); } + +static bool shouldReduceAlign(GlobalObject &GO) { + return GO.getAlign().hasValue(); +} + +static void reduceGOs(Oracle &O, Module &Program) { + for (auto &GO : Program.global_objects()) { + if (shouldReduceSection(GO) && !O.shouldKeep()) + GO.setSection(""); + if (shouldReduceAlign(GO) && !O.shouldKeep()) + GO.setAlignment(MaybeAlign()); + } +} + +static int countGOs(Module &Program) { + int SectionCount = count_if(Program.global_objects(), [](GlobalObject &GO) { + return shouldReduceSection(GO); + }); + int AlignCount = count_if(Program.global_objects(), [](GlobalObject &GO) { + return shouldReduceAlign(GO); + }); + return SectionCount + AlignCount; +} + +void llvm::reduceGlobalObjectsDeltaPass(TestRunner &Test) { + outs() << "*** Reducing GlobalObjects...\n"; + int GVCount = countGOs(Test.getProgram()); + runDeltaPass(Test, GVCount, reduceGOs); +} diff --git a/llvm/tools/llvm-reduce/deltas/ReduceGlobalObjects.h b/llvm/tools/llvm-reduce/deltas/ReduceGlobalObjects.h new file mode 100644 index 0000000000000..7224b9bfbd5f9 --- /dev/null +++ b/llvm/tools/llvm-reduce/deltas/ReduceGlobalObjects.h @@ -0,0 +1,18 @@ +//===- ReduceGlobalObjects.h ----------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TOOLS_LLVM_REDUCE_DELTAS_REDUCEGLOBALOBJECTS_H +#define LLVM_TOOLS_LLVM_REDUCE_DELTAS_REDUCEGLOBALOBJECTS_H + +#include "Delta.h" + +namespace llvm { +void reduceGlobalObjectsDeltaPass(TestRunner &Test); +} // namespace llvm + +#endif diff --git a/llvm/tools/llvm-reduce/deltas/ReduceGlobalValues.cpp b/llvm/tools/llvm-reduce/deltas/ReduceGlobalValues.cpp index 53509400c8aa6..0110251d26a67 100644 --- a/llvm/tools/llvm-reduce/deltas/ReduceGlobalValues.cpp +++ b/llvm/tools/llvm-reduce/deltas/ReduceGlobalValues.cpp @@ -12,40 +12,73 @@ //===----------------------------------------------------------------------===// #include "ReduceGlobalValues.h" -#include "llvm/IR/Constants.h" #include "llvm/IR/GlobalValue.h" using namespace llvm; -static bool isValidDSOLocalReductionGV(GlobalValue &GV) { +static bool shouldReduceDSOLocal(GlobalValue &GV) { return GV.isDSOLocal() && !GV.isImplicitDSOLocal(); } -/// Sets dso_local to false for all global values. -static void extractGVsFromModule(Oracle &O, Module &Program) { - // remove dso_local from global values - for (auto &GV : Program.global_values()) - if (isValidDSOLocalReductionGV(GV) && !O.shouldKeep()) { +static bool shouldReduceVisibility(GlobalValue &GV) { + return GV.getVisibility() != GlobalValue::VisibilityTypes::DefaultVisibility; +} + +static bool shouldReduceUnnamedAddress(GlobalValue &GV) { + return GV.getUnnamedAddr() != GlobalValue::UnnamedAddr::None; +} + +static bool shouldReduceDLLStorageClass(GlobalValue &GV) { + return GV.getDLLStorageClass() != + GlobalValue::DLLStorageClassTypes::DefaultStorageClass; +} + +static bool shouldReduceThreadLocal(GlobalValue &GV) { + return GV.isThreadLocal(); +} + +static void reduceGVs(Oracle &O, Module &Program) { + for (auto &GV : Program.global_values()) { + if (shouldReduceDSOLocal(GV) && !O.shouldKeep()) GV.setDSOLocal(false); + if (shouldReduceVisibility(GV) && !O.shouldKeep()) { + bool IsImplicitDSOLocal = GV.isImplicitDSOLocal(); + GV.setVisibility(GlobalValue::VisibilityTypes::DefaultVisibility); + if (IsImplicitDSOLocal) + GV.setDSOLocal(false); } + if (shouldReduceUnnamedAddress(GV) && !O.shouldKeep()) + GV.setUnnamedAddr(GlobalValue::UnnamedAddr::None); + if (shouldReduceDLLStorageClass(GV) && !O.shouldKeep()) + GV.setDLLStorageClass( + GlobalValue::DLLStorageClassTypes::DefaultStorageClass); + if (shouldReduceThreadLocal(GV) && !O.shouldKeep()) + GV.setThreadLocal(false); + } } -/// Counts the amount of global values with dso_local and displays their -/// respective name & index static int countGVs(Module &Program) { - // TODO: Silence index with --quiet flag - outs() << "----------------------------\n"; - outs() << "GlobalValue Index Reference:\n"; - int GVCount = 0; - for (auto &GV : Program.global_values()) - if (isValidDSOLocalReductionGV(GV)) - outs() << "\t" << ++GVCount << ": " << GV.getName() << "\n"; - outs() << "----------------------------\n"; - return GVCount; + int DSOLocalCount = count_if(Program.global_values(), [](GlobalValue &GV) { + return shouldReduceDSOLocal(GV); + }); + int VisibilityCount = count_if(Program.global_values(), [](GlobalValue &GV) { + return shouldReduceVisibility(GV); + }); + int UnnamedAddrCount = count_if(Program.global_values(), [](GlobalValue &GV) { + return shouldReduceUnnamedAddress(GV); + }); + int DLLStorageClassCount = + count_if(Program.global_values(), + [](GlobalValue &GV) { return shouldReduceDLLStorageClass(GV); }); + int ThreadLocalCount = count_if(Program.global_values(), [](GlobalValue &GV) { + return shouldReduceThreadLocal(GV); + }); + return DSOLocalCount + VisibilityCount + UnnamedAddrCount + + DLLStorageClassCount + ThreadLocalCount; } void llvm::reduceGlobalValuesDeltaPass(TestRunner &Test) { outs() << "*** Reducing GlobalValues...\n"; int GVCount = countGVs(Test.getProgram()); - runDeltaPass(Test, GVCount, extractGVsFromModule); + runDeltaPass(Test, GVCount, reduceGVs); } diff --git a/llvm/tools/llvm-reduce/deltas/ReduceInstructionsMIR.cpp b/llvm/tools/llvm-reduce/deltas/ReduceInstructionsMIR.cpp new file mode 100644 index 0000000000000..4998de02811ff --- /dev/null +++ b/llvm/tools/llvm-reduce/deltas/ReduceInstructionsMIR.cpp @@ -0,0 +1,143 @@ +//===- ReduceInstructionsMIR.cpp - Specialized Delta Pass -----------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements a function which calls the Generic Delta pass in order +// to reduce uninteresting MachineInstr from the MachineFunction. +// +//===----------------------------------------------------------------------===// + +#include "ReduceInstructionsMIR.h" + +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetInstrInfo.h" + +using namespace llvm; + +static Register getPrevDefOfRCInMBB(MachineBasicBlock &MBB, + MachineBasicBlock::reverse_iterator &RI, + const TargetRegisterClass *RC, + SetVector &ExcludeMIs) { + auto MRI = &MBB.getParent()->getRegInfo(); + for (MachineBasicBlock::reverse_instr_iterator E = MBB.instr_rend(); RI != E; + ++RI) { + auto &MI = *RI; + // All Def operands explicit and implicit. + for (auto &MO : MI.operands()) { + if (!MO.isReg() || !MO.isDef()) + continue; + auto Reg = MO.getReg(); + if (Register::isPhysicalRegister(Reg)) + continue; + + if (MRI->getRegClass(Reg) == RC && !ExcludeMIs.count(MO.getParent())) + return Reg; + } + } + return 0; +} + +static unsigned countInstructions(MachineFunction &MF) { + unsigned Count = 0; + MachineInstr *TopMI = nullptr; + for (auto &MBB : MF) { + for (auto &MI : MBB) { + if (MI.isTerminator()) + continue; + if (MBB.isEntryBlock() && !TopMI) { + TopMI = &MI; + continue; + } + Count++; + } + } + return Count; +} + +static void extractInstrFromModule(Oracle &O, MachineFunction &MF) { + MachineDominatorTree MDT; + MDT.runOnMachineFunction(MF); + + auto MRI = &MF.getRegInfo(); + SetVector ToDelete; + + MachineInstr *TopMI = nullptr; + + // Mark MIs for deletion according to some criteria. + for (auto &MBB : MF) { + for (auto &MI : MBB) { + if (MI.isTerminator()) + continue; + if (MBB.isEntryBlock() && !TopMI) { + TopMI = &MI; + continue; + } + if (!O.shouldKeep()) + ToDelete.insert(&MI); + } + } + + // For each MI to be deleted update users of regs defined by that MI to use + // some other dominating definition (that is not to be deleted). + for (auto *MI : ToDelete) { + for (auto &MO : MI->operands()) { + if (!MO.isReg() || !MO.isDef()) + continue; + auto Reg = MO.getReg(); + if (Register::isPhysicalRegister(Reg)) + continue; + auto UI = MRI->use_begin(Reg); + auto UE = MRI->use_end(); + + auto RegRC = MRI->getRegClass(Reg); + Register NewReg = 0; + // If this is not a physical register and there are some uses. + if (UI != UE) { + MachineBasicBlock::reverse_iterator RI(*MI); + MachineBasicBlock *BB = MI->getParent(); + ++RI; + while (NewReg == 0 && BB) { + NewReg = getPrevDefOfRCInMBB(*BB, RI, RegRC, ToDelete); + // Prepare for idom(BB). + if (auto *IDM = MDT.getNode(BB)->getIDom()) { + BB = IDM->getBlock(); + RI = BB->rbegin(); + } else { + BB = nullptr; + } + } + } + + // If no dominating definition was found then add an implicit one to the + // first instruction in the entry block. + if (!NewReg && TopMI) { + NewReg = MRI->createVirtualRegister(RegRC); + TopMI->addOperand(MachineOperand::CreateReg( + NewReg, true /*IsDef*/, true /*IsImp*/, false /*IsKill*/)); + } + + // Update all uses. + while (UI != UE) { + auto &UMO = *UI++; + UMO.setReg(NewReg); + } + } + } + + // Finally delete the MIs. + for (auto *MI : ToDelete) + MI->eraseFromParent(); +} + +void llvm::reduceInstructionsMIRDeltaPass(TestRunner &Test) { + outs() << "*** Reducing Instructions...\n"; + unsigned InstCount = countInstructions(Test.getProgram()); + runDeltaPass(Test, InstCount, extractInstrFromModule); +} diff --git a/llvm/tools/llvm-reduce/deltas/ReduceInstructionsMIR.h b/llvm/tools/llvm-reduce/deltas/ReduceInstructionsMIR.h new file mode 100644 index 0000000000000..564cf63a0a323 --- /dev/null +++ b/llvm/tools/llvm-reduce/deltas/ReduceInstructionsMIR.h @@ -0,0 +1,23 @@ +//===- ReduceInstructionsMIR.h - Specialized Delta Pass ------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements a function which calls the Generic Delta pass in order +// to reduce uninteresting MachineInstr from the MachineFunction. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TOOLS_LLVM_REDUCE_DELTAS_REDUCEINSTRUCTIONS_MIR_H +#define LLVM_TOOLS_LLVM_REDUCE_DELTAS_REDUCEINSTRUCTIONS_MIR_H + +#include "Delta.h" + +namespace llvm { +void reduceInstructionsMIRDeltaPass(TestRunner &Test); +} // namespace llvm + +#endif diff --git a/llvm/tools/llvm-reduce/llvm-reduce.cpp b/llvm/tools/llvm-reduce/llvm-reduce.cpp index 90b7ed5f8272e..11e3dd0394992 100644 --- a/llvm/tools/llvm-reduce/llvm-reduce.cpp +++ b/llvm/tools/llvm-reduce/llvm-reduce.cpp @@ -15,15 +15,21 @@ //===----------------------------------------------------------------------===// #include "DeltaManager.h" +#include "ReducerWorkItem.h" #include "TestRunner.h" #include "llvm/ADT/SmallString.h" +#include "llvm/CodeGen/CommandFlags.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Verifier.h" #include "llvm/IRReader/IRReader.h" +#include "llvm/MC/TargetRegistry.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/Host.h" #include "llvm/Support/InitLLVM.h" #include "llvm/Support/SourceMgr.h" +#include "llvm/Support/TargetSelect.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetMachine.h" #include #include @@ -39,7 +45,8 @@ static cl::opt Version("v", cl::desc("Alias for -version"), cl::Hidden, static cl::opt PrintDeltaPasses("print-delta-passes", cl::desc("Print list of delta passes, passable to " - "--delta-passes as a comma separated list")); + "--delta-passes as a comma separated list"), + cl::cat(Options)); static cl::opt InputFilename(cl::Positional, cl::Required, cl::desc(""), @@ -55,9 +62,8 @@ static cl::list cl::desc("Arguments passed onto the interesting-ness test"), cl::cat(Options)); -static cl::opt - OutputFilename("output", - cl::desc("Specify the output file. default: reduced.ll")); +static cl::opt OutputFilename( + "output", cl::desc("Specify the output file. default: reduced.ll|mir")); static cl::alias OutputFileAlias("o", cl::desc("Alias for -output"), cl::aliasopt(OutputFilename), cl::cat(Options)); @@ -68,30 +74,27 @@ static cl::opt "with the reduced version!"), cl::cat(Options)); -// Parses IR into a Module and verifies it -static std::unique_ptr parseInputFile(StringRef Filename, - LLVMContext &Ctxt) { - SMDiagnostic Err; - std::unique_ptr Result = parseIRFile(Filename, Err, Ctxt); - if (!Result) { - Err.print("llvm-reduce", errs()); - return Result; - } +enum class InputLanguages { None, IR, MIR }; - if (verifyModule(*Result, &errs())) { - errs() << "Error: " << Filename << " - input module is broken!\n"; - return std::unique_ptr(); - } +static cl::opt + InputLanguage("x", cl::ValueOptional, + cl::desc("Input language ('ir' or 'mir')"), + cl::init(InputLanguages::None), + cl::values(clEnumValN(InputLanguages::IR, "ir", ""), + clEnumValN(InputLanguages::MIR, "mir", "")), + cl::cat(Options)); - return Result; -} +static cl::opt TargetTriple("mtriple", + cl::desc("Set the target triple"), + cl::cat(Options)); + +static codegen::RegisterCodeGenFlags CGF; -void writeOutput(Module &M, StringRef Message) { +void writeOutput(ReducerWorkItem &M, StringRef Message) { if (ReplaceInput) // In-place OutputFilename = InputFilename.c_str(); else if (OutputFilename.empty() || OutputFilename == "-") - OutputFilename = "reduced.ll"; - + OutputFilename = M.isMIR() ? "reduced.mir" : "reduced.ll"; std::error_code EC; raw_fd_ostream Out(OutputFilename, EC); if (EC) { @@ -102,21 +105,54 @@ void writeOutput(Module &M, StringRef Message) { errs() << Message << OutputFilename << "\n"; } +static std::unique_ptr createTargetMachine() { + InitializeAllTargets(); + InitializeAllTargetMCs(); + InitializeAllAsmPrinters(); + InitializeAllAsmParsers(); + + if (TargetTriple == "") + TargetTriple = sys::getDefaultTargetTriple(); + auto TT(Triple::normalize(TargetTriple)); + std::string CPU(codegen::getCPUStr()); + std::string FS(codegen::getFeaturesStr()); + + std::string Error; + const Target *TheTarget = TargetRegistry::lookupTarget(TT, Error); + + return std::unique_ptr( + static_cast(TheTarget->createTargetMachine( + TT, CPU, FS, TargetOptions(), None, None, CodeGenOpt::Default))); +} + int main(int Argc, char **Argv) { InitLLVM X(Argc, Argv); cl::HideUnrelatedOptions({&Options, &getColorCategory()}); cl::ParseCommandLineOptions(Argc, Argv, "LLVM automatic testcase reducer.\n"); + bool ReduceModeMIR = false; + if (InputLanguage != InputLanguages::None) { + if (InputLanguage == InputLanguages::MIR) + ReduceModeMIR = true; + } else if (StringRef(InputFilename).endswith(".mir")) { + ReduceModeMIR = true; + } + if (PrintDeltaPasses) { printDeltaPasses(errs()); return 0; } LLVMContext Context; - std::unique_ptr OriginalProgram = - parseInputFile(InputFilename, Context); - + std::unique_ptr TM; + std::unique_ptr MMI; + std::unique_ptr OriginalProgram; + if (ReduceModeMIR) { + TM = createTargetMachine(); + MMI = std::make_unique(TM.get()); + } + OriginalProgram = parseReducerWorkItem(InputFilename, Context, MMI.get()); if (!OriginalProgram) { return 1; } diff --git a/llvm/tools/obj2yaml/macho2yaml.cpp b/llvm/tools/obj2yaml/macho2yaml.cpp index b7289bff67ed5..d3b4bf1bf8cc1 100644 --- a/llvm/tools/obj2yaml/macho2yaml.cpp +++ b/llvm/tools/obj2yaml/macho2yaml.cpp @@ -29,6 +29,7 @@ class MachODumper { const object::MachOObjectFile &Obj; std::unique_ptr DWARFCtx; + unsigned RawSegments; void dumpHeader(std::unique_ptr &Y); Error dumpLoadCommands(std::unique_ptr &Y); void dumpLinkEdit(std::unique_ptr &Y); @@ -52,8 +53,8 @@ class MachODumper { public: MachODumper(const object::MachOObjectFile &O, - std::unique_ptr DCtx) - : Obj(O), DWARFCtx(std::move(DCtx)) {} + std::unique_ptr DCtx, unsigned RawSegments) + : Obj(O), DWARFCtx(std::move(DCtx)), RawSegments(RawSegments) {} Expected> dump(); }; @@ -176,6 +177,13 @@ Expected MachODumper::extractSections( if (Expected S = constructSection(Sec, Sections.size() + 1)) { StringRef SecName(S->sectname); + + // Copy data sections if requested. + if ((RawSegments & RawSegments::data) && + StringRef(S->segname).startswith("__DATA")) + S->content = + yaml::BinaryRef(Obj.getSectionContents(Sec.offset, Sec.size)); + if (SecName.startswith("__debug_")) { // If the DWARF section cannot be successfully parsed, emit raw content // instead of an entry in the DWARF section of the YAML. @@ -282,7 +290,11 @@ Expected> MachODumper::dump() { dumpHeader(Y); if (Error Err = dumpLoadCommands(Y)) return std::move(Err); - dumpLinkEdit(Y); + if (RawSegments & RawSegments::linkedit) + Y->RawLinkEditSegment = + yaml::BinaryRef(Obj.getSegmentContents("__LINKEDIT")); + else + dumpLinkEdit(Y); return std::move(Y); } @@ -587,9 +599,10 @@ void MachODumper::dumpSymbols(std::unique_ptr &Y) { } } -Error macho2yaml(raw_ostream &Out, const object::MachOObjectFile &Obj) { +Error macho2yaml(raw_ostream &Out, const object::MachOObjectFile &Obj, + unsigned RawSegments) { std::unique_ptr DCtx = DWARFContext::create(Obj); - MachODumper Dumper(Obj, std::move(DCtx)); + MachODumper Dumper(Obj, std::move(DCtx), RawSegments); Expected> YAML = Dumper.dump(); if (!YAML) return YAML.takeError(); @@ -602,7 +615,8 @@ Error macho2yaml(raw_ostream &Out, const object::MachOObjectFile &Obj) { return Error::success(); } -Error macho2yaml(raw_ostream &Out, const object::MachOUniversalBinary &Obj) { +Error macho2yaml(raw_ostream &Out, const object::MachOUniversalBinary &Obj, + unsigned RawSegments) { yaml::YamlObjectFile YAMLFile; YAMLFile.FatMachO.reset(new MachOYAML::UniversalBinary()); MachOYAML::UniversalBinary &YAML = *YAMLFile.FatMachO; @@ -624,7 +638,7 @@ Error macho2yaml(raw_ostream &Out, const object::MachOUniversalBinary &Obj) { return SliceObj.takeError(); std::unique_ptr DCtx = DWARFContext::create(*SliceObj.get()); - MachODumper Dumper(*SliceObj.get(), std::move(DCtx)); + MachODumper Dumper(*SliceObj.get(), std::move(DCtx), RawSegments); Expected> YAMLObj = Dumper.dump(); if (!YAMLObj) return YAMLObj.takeError(); @@ -636,12 +650,13 @@ Error macho2yaml(raw_ostream &Out, const object::MachOUniversalBinary &Obj) { return Error::success(); } -Error macho2yaml(raw_ostream &Out, const object::Binary &Binary) { +Error macho2yaml(raw_ostream &Out, const object::Binary &Binary, + unsigned RawSegments) { if (const auto *MachOObj = dyn_cast(&Binary)) - return macho2yaml(Out, *MachOObj); + return macho2yaml(Out, *MachOObj, RawSegments); if (const auto *MachOObj = dyn_cast(&Binary)) - return macho2yaml(Out, *MachOObj); + return macho2yaml(Out, *MachOObj, RawSegments); llvm_unreachable("unexpected Mach-O file format"); } diff --git a/llvm/tools/obj2yaml/obj2yaml.cpp b/llvm/tools/obj2yaml/obj2yaml.cpp index e9e47d1a2b186..9c7a3385850db 100644 --- a/llvm/tools/obj2yaml/obj2yaml.cpp +++ b/llvm/tools/obj2yaml/obj2yaml.cpp @@ -1,4 +1,4 @@ -//===------ utils/obj2yaml.cpp - obj2yaml conversion tool -------*- C++ -*-===// +//===------ utils/obj2yaml.cpp - obj2yaml conversion tool -----------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -18,6 +18,14 @@ using namespace llvm; using namespace llvm::object; +static cl::opt + InputFilename(cl::Positional, cl::desc(""), cl::init("-")); +static cl::bits RawSegment( + "raw-segment", + cl::desc("Mach-O: dump the raw contents of the listed segments instead of " + "parsing them:"), + cl::values(clEnumVal(data, "__DATA"), clEnumVal(linkedit, "__LINKEDIT"))); + static Error dumpObject(const ObjectFile &Obj) { if (Obj.isCOFF()) return errorCodeToError(coff2yaml(outs(), cast(Obj))); @@ -54,7 +62,7 @@ static Error dumpInput(StringRef File) { // Universal MachO is not a subclass of ObjectFile, so it needs to be handled // here with the other binary types. if (Binary.isMachO() || Binary.isMachOUniversalBinary()) - return macho2yaml(outs(), Binary); + return macho2yaml(outs(), Binary, RawSegment.getBits()); if (ObjectFile *Obj = dyn_cast(&Binary)) return dumpObject(*Obj); if (MinidumpFile *Minidump = dyn_cast(&Binary)) @@ -74,9 +82,6 @@ static void reportError(StringRef Input, Error Err) { errs().flush(); } -cl::opt InputFilename(cl::Positional, cl::desc(""), - cl::init("-")); - int main(int argc, char *argv[]) { InitLLVM X(argc, argv); cl::ParseCommandLineOptions(argc, argv); diff --git a/llvm/tools/obj2yaml/obj2yaml.h b/llvm/tools/obj2yaml/obj2yaml.h index fdd9b2a00185c..c026482eaf0cf 100644 --- a/llvm/tools/obj2yaml/obj2yaml.h +++ b/llvm/tools/obj2yaml/obj2yaml.h @@ -20,12 +20,13 @@ #include "llvm/Support/MemoryBufferRef.h" #include +enum RawSegments : unsigned { none = 0, data = 1, linkedit = 1 << 1 }; std::error_code coff2yaml(llvm::raw_ostream &Out, const llvm::object::COFFObjectFile &Obj); llvm::Error elf2yaml(llvm::raw_ostream &Out, const llvm::object::ObjectFile &Obj); -llvm::Error macho2yaml(llvm::raw_ostream &Out, - const llvm::object::Binary &Obj); +llvm::Error macho2yaml(llvm::raw_ostream &Out, const llvm::object::Binary &Obj, + unsigned RawSegments); llvm::Error minidump2yaml(llvm::raw_ostream &Out, const llvm::object::MinidumpFile &Obj); llvm::Error xcoff2yaml(llvm::raw_ostream &Out, diff --git a/llvm/tools/opt-viewer/optrecord.py b/llvm/tools/opt-viewer/optrecord.py index bd5ef6c7393e9..6a53e13f4c2b8 100644 --- a/llvm/tools/opt-viewer/optrecord.py +++ b/llvm/tools/opt-viewer/optrecord.py @@ -274,7 +274,7 @@ def get_remarks(input_file, filter_=None): file_remarks = defaultdict(functools.partial(defaultdict, list)) with io.open(input_file, encoding = 'utf-8') as f: - docs = yaml.safe_load_all(f, Loader=Loader) + docs = yaml.load_all(f, Loader=Loader) filter_e = None if filter_: diff --git a/llvm/tools/opt/NewPMDriver.cpp b/llvm/tools/opt/NewPMDriver.cpp index 794c01f31c11f..631d8eed5d7a8 100644 --- a/llvm/tools/opt/NewPMDriver.cpp +++ b/llvm/tools/opt/NewPMDriver.cpp @@ -344,9 +344,7 @@ bool llvm::runPassPipeline(StringRef Arg0, Module &M, TargetMachine *TM, if (Name == "asan-pipeline") { MPM.addPass( RequireAnalysisPass()); - MPM.addPass( - createModuleToFunctionPassAdaptor(AddressSanitizerPass(Opts))); - MPM.addPass(ModuleAddressSanitizerPass()); + MPM.addPass(ModuleAddressSanitizerPass(Opts)); return true; } else if (Name == "asan-function-pipeline") { MPM.addPass( diff --git a/llvm/tools/vfabi-demangle-fuzzer/vfabi-demangler-fuzzer.cpp b/llvm/tools/vfabi-demangle-fuzzer/vfabi-demangler-fuzzer.cpp index d19629858a429..b0b80131bf48f 100644 --- a/llvm/tools/vfabi-demangle-fuzzer/vfabi-demangler-fuzzer.cpp +++ b/llvm/tools/vfabi-demangle-fuzzer/vfabi-demangler-fuzzer.cpp @@ -34,7 +34,7 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) { const auto Info = VFABI::tryDemangleForVFABI(MangledName, *M); // Do not optimize away the return value. Inspired by - // https://github.com/google/benchmark/blob/master/include/benchmark/benchmark.h#L307-L345 + // https://github.com/google/benchmark/blob/main/include/benchmark/benchmark.h#L307-L345 asm volatile("" : : "r,m"(Info) : "memory"); return 0; diff --git a/llvm/unittests/ADT/CMakeLists.txt b/llvm/unittests/ADT/CMakeLists.txt index e64b1a0c7b3e6..f98624f60f9c0 100644 --- a/llvm/unittests/ADT/CMakeLists.txt +++ b/llvm/unittests/ADT/CMakeLists.txt @@ -15,6 +15,7 @@ add_llvm_unittest(ADTTests BreadthFirstIteratorTest.cpp BumpPtrListTest.cpp CoalescingBitVectorTest.cpp + CombinationGeneratorTest.cpp DAGDeltaAlgorithmTest.cpp DeltaAlgorithmTest.cpp DenseMapTest.cpp diff --git a/llvm/unittests/tools/llvm-exegesis/SnippetGeneratorTest.cpp b/llvm/unittests/ADT/CombinationGeneratorTest.cpp similarity index 91% rename from llvm/unittests/tools/llvm-exegesis/SnippetGeneratorTest.cpp rename to llvm/unittests/ADT/CombinationGeneratorTest.cpp index 760caa8ce325a..8d25457e67a3b 100644 --- a/llvm/unittests/tools/llvm-exegesis/SnippetGeneratorTest.cpp +++ b/llvm/unittests/ADT/CombinationGeneratorTest.cpp @@ -1,4 +1,4 @@ -//===-- SnippetGeneratorTest.cpp --------------------------------*- C++ -*-===// +//===- llvm/unittest/ADT/CombinationGeneratorTest.cpp ---------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -6,13 +6,21 @@ // //===----------------------------------------------------------------------===// -#include "SnippetGenerator.h" +#include "llvm/ADT/CombinationGenerator.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/STLForwardCompat.h" +#include "llvm/ADT/iterator_range.h" +#include "llvm/Support/ErrorHandling.h" #include "gmock/gmock.h" #include "gtest/gtest.h" -#include +#include +#include +#include +#include +#include -namespace llvm { -namespace exegesis { +using namespace llvm; namespace { @@ -170,6 +178,4 @@ TEST(CombinationGenerator, Singleton) { ASSERT_THAT(Variants, ::testing::ContainerEq(ExpectedVariants)); } -} // namespace -} // namespace exegesis -} // namespace llvm +} // end anonymous namespace diff --git a/llvm/unittests/ADT/STLExtrasTest.cpp b/llvm/unittests/ADT/STLExtrasTest.cpp index de2c8968aac17..c286cd5263a4d 100644 --- a/llvm/unittests/ADT/STLExtrasTest.cpp +++ b/llvm/unittests/ADT/STLExtrasTest.cpp @@ -307,6 +307,13 @@ TEST(STLExtrasTest, ToVector) { EXPECT_EQ(I, Enumerated[I].index()); EXPECT_EQ(v[I], Enumerated[I].value()); } + + auto EnumeratedImplicitSize = to_vector(enumerate(v)); + ASSERT_EQ(3u, EnumeratedImplicitSize.size()); + for (size_t I = 0; I < v.size(); ++I) { + EXPECT_EQ(I, EnumeratedImplicitSize[I].index()); + EXPECT_EQ(v[I], EnumeratedImplicitSize[I].value()); + } } TEST(STLExtrasTest, ConcatRange) { diff --git a/llvm/unittests/ADT/SequenceTest.cpp b/llvm/unittests/ADT/SequenceTest.cpp index 8ac1e208a946c..71fae0b56ffda 100644 --- a/llvm/unittests/ADT/SequenceTest.cpp +++ b/llvm/unittests/ADT/SequenceTest.cpp @@ -16,6 +16,7 @@ using namespace llvm; using testing::ElementsAre; +using testing::IsEmpty; namespace { @@ -68,17 +69,6 @@ TYPED_TEST(StrongIntTest, Operations) { EXPECT_EQ(Actual - (Actual + 2), -2); } -TEST(StrongIntTest, Enums) { - enum UntypedEnum { A = 3 }; - EXPECT_EQ(CheckedInt::from(A).to(), A); - - enum TypedEnum : uint32_t { B = 3 }; - EXPECT_EQ(CheckedInt::from(B).to(), B); - - enum class ScopedEnum : uint16_t { C = 3 }; - EXPECT_EQ(CheckedInt::from(ScopedEnum::C).to(), ScopedEnum::C); -} - #if defined(GTEST_HAS_DEATH_TEST) && !defined(NDEBUG) TEST(StrongIntDeathTest, OutOfBounds) { // Values above 'INTMAX_MAX' are not representable. @@ -215,4 +205,94 @@ TEST(SequenceTest, Dereference) { EXPECT_EQ(Backward[2], 7); } -} // anonymous namespace +enum UntypedEnum { A = 3 }; +enum TypedEnum : uint32_t { B = 3 }; + +namespace X { +enum class ScopedEnum : uint16_t { C = 3 }; +} // namespace X + +struct S { + enum NestedEnum { D = 4 }; + enum NestedEnum2 { E = 5 }; + +private: + enum NestedEnum3 { F = 6 }; + friend struct llvm::enum_iteration_traits; + +public: + static auto getNestedEnum3() { return NestedEnum3::F; } +}; + +} // namespace + +namespace llvm { + +template <> struct enum_iteration_traits { + static constexpr bool is_iterable = true; +}; + +template <> struct enum_iteration_traits { + static constexpr bool is_iterable = true; +}; + +template <> struct enum_iteration_traits { + static constexpr bool is_iterable = true; +}; + +template <> struct enum_iteration_traits { + static constexpr bool is_iterable = true; +}; + +template <> struct enum_iteration_traits { + static constexpr bool is_iterable = true; +}; + +} // namespace llvm + +namespace { + +TEST(StrongIntTest, Enums) { + EXPECT_EQ(CheckedInt::from(A).to(), A); + EXPECT_EQ(CheckedInt::from(B).to(), B); + EXPECT_EQ(CheckedInt::from(X::ScopedEnum::C).to(), + X::ScopedEnum::C); +} + +TEST(SequenceTest, IterableEnums) { + EXPECT_THAT(enum_seq(UntypedEnum::A, UntypedEnum::A), IsEmpty()); + EXPECT_THAT(enum_seq_inclusive(UntypedEnum::A, UntypedEnum::A), + ElementsAre(UntypedEnum::A)); + + EXPECT_THAT(enum_seq(TypedEnum::B, TypedEnum::B), IsEmpty()); + EXPECT_THAT(enum_seq_inclusive(TypedEnum::B, TypedEnum::B), + ElementsAre(TypedEnum::B)); + + EXPECT_THAT(enum_seq(X::ScopedEnum::C, X::ScopedEnum::C), IsEmpty()); + EXPECT_THAT(enum_seq_inclusive(X::ScopedEnum::C, X::ScopedEnum::C), + ElementsAre(X::ScopedEnum::C)); + + EXPECT_THAT(enum_seq_inclusive(S::NestedEnum::D, S::NestedEnum::D), + ElementsAre(S::NestedEnum::D)); + EXPECT_THAT(enum_seq_inclusive(S::getNestedEnum3(), S::getNestedEnum3()), + ElementsAre(S::getNestedEnum3())); +} + +TEST(SequenceTest, NonIterableEnums) { + EXPECT_THAT(enum_seq(S::NestedEnum2::E, S::NestedEnum2::E, + force_iteration_on_noniterable_enum), + IsEmpty()); + EXPECT_THAT(enum_seq_inclusive(S::NestedEnum2::E, S::NestedEnum2::E, + force_iteration_on_noniterable_enum), + ElementsAre(S::NestedEnum2::E)); + + // Check that this also works with enums marked as iterable. + EXPECT_THAT(enum_seq(UntypedEnum::A, UntypedEnum::A, + force_iteration_on_noniterable_enum), + IsEmpty()); + EXPECT_THAT(enum_seq_inclusive(UntypedEnum::A, UntypedEnum::A, + force_iteration_on_noniterable_enum), + ElementsAre(UntypedEnum::A)); +} + +} // namespace diff --git a/llvm/unittests/ADT/StringExtrasTest.cpp b/llvm/unittests/ADT/StringExtrasTest.cpp index 20437f9fbbb39..49a9bcd79db99 100644 --- a/llvm/unittests/ADT/StringExtrasTest.cpp +++ b/llvm/unittests/ADT/StringExtrasTest.cpp @@ -91,7 +91,7 @@ TEST(StringExtrasTest, ToAndFromHex) { EXPECT_EQ(EvenData, fromHex(EvenStr)); EXPECT_EQ(StringRef(EvenStr).lower(), toHex(EvenData, true)); - std::string InvalidStr = "A5ZX"; + std::string InvalidStr = "A50\xFF"; std::string IgnoredOutput; EXPECT_FALSE(tryGetFromHex(InvalidStr, IgnoredOutput)); } diff --git a/llvm/unittests/ADT/TripleTest.cpp b/llvm/unittests/ADT/TripleTest.cpp index a275732ed6eb6..a6a79ed5a39e9 100644 --- a/llvm/unittests/ADT/TripleTest.cpp +++ b/llvm/unittests/ADT/TripleTest.cpp @@ -224,6 +224,16 @@ TEST(TripleTest, ParsedIDs) { EXPECT_EQ(Triple::UnknownVendor, T.getVendor()); EXPECT_EQ(Triple::UnknownOS, T.getOS()); + T = Triple("spirv32-unknown-unknown"); + EXPECT_EQ(Triple::spirv32, T.getArch()); + EXPECT_EQ(Triple::UnknownVendor, T.getVendor()); + EXPECT_EQ(Triple::UnknownOS, T.getOS()); + + T = Triple("spirv64-unknown-unknown"); + EXPECT_EQ(Triple::spirv64, T.getArch()); + EXPECT_EQ(Triple::UnknownVendor, T.getVendor()); + EXPECT_EQ(Triple::UnknownOS, T.getOS()); + T = Triple("x86_64-unknown-ananas"); EXPECT_EQ(Triple::x86_64, T.getArch()); EXPECT_EQ(Triple::UnknownVendor, T.getVendor()); @@ -865,6 +875,16 @@ TEST(TripleTest, BitWidthPredicates) { EXPECT_FALSE(T.isArch32Bit()); EXPECT_TRUE(T.isArch64Bit()); + T.setArch(Triple::spirv32); + EXPECT_FALSE(T.isArch16Bit()); + EXPECT_TRUE(T.isArch32Bit()); + EXPECT_FALSE(T.isArch64Bit()); + + T.setArch(Triple::spirv64); + EXPECT_FALSE(T.isArch16Bit()); + EXPECT_FALSE(T.isArch32Bit()); + EXPECT_TRUE(T.isArch64Bit()); + T.setArch(Triple::sparc); EXPECT_FALSE(T.isArch16Bit()); EXPECT_TRUE(T.isArch32Bit()); @@ -1032,6 +1052,14 @@ TEST(TripleTest, BitWidthArchVariants) { EXPECT_EQ(Triple::spir, T.get32BitArchVariant().getArch()); EXPECT_EQ(Triple::spir64, T.get64BitArchVariant().getArch()); + T.setArch(Triple::spirv32); + EXPECT_EQ(Triple::spirv32, T.get32BitArchVariant().getArch()); + EXPECT_EQ(Triple::spirv64, T.get64BitArchVariant().getArch()); + + T.setArch(Triple::spirv64); + EXPECT_EQ(Triple::spirv32, T.get32BitArchVariant().getArch()); + EXPECT_EQ(Triple::spirv64, T.get64BitArchVariant().getArch()); + T.setArch(Triple::wasm32); EXPECT_EQ(Triple::wasm32, T.get32BitArchVariant().getArch()); EXPECT_EQ(Triple::wasm64, T.get64BitArchVariant().getArch()); diff --git a/llvm/unittests/Analysis/LazyCallGraphTest.cpp b/llvm/unittests/Analysis/LazyCallGraphTest.cpp index b154c6f290889..d6e73f3a95f2a 100644 --- a/llvm/unittests/Analysis/LazyCallGraphTest.cpp +++ b/llvm/unittests/Analysis/LazyCallGraphTest.cpp @@ -1978,7 +1978,8 @@ TEST(LazyCallGraphTest, HandleBlockAddress) { LazyCallGraph::Node &G = *CG.lookup(lookupFunction(*M, "g")); EXPECT_EQ(&FRC, CG.lookupRefSCC(F)); EXPECT_EQ(&GRC, CG.lookupRefSCC(G)); - EXPECT_TRUE(GRC.isParentOf(FRC)); + EXPECT_FALSE(GRC.isParentOf(FRC)); + EXPECT_FALSE(FRC.isParentOf(GRC)); } // Test that a blockaddress that refers to itself creates no new RefSCC diff --git a/llvm/unittests/Analysis/ScalarEvolutionTest.cpp b/llvm/unittests/Analysis/ScalarEvolutionTest.cpp index 2664ffa22faaa..2101039abc15e 100644 --- a/llvm/unittests/Analysis/ScalarEvolutionTest.cpp +++ b/llvm/unittests/Analysis/ScalarEvolutionTest.cpp @@ -1538,4 +1538,212 @@ TEST_F(ScalarEvolutionsTest, SCEVUDivFloorCeiling) { }); } +TEST_F(ScalarEvolutionsTest, ComputeMaxTripCountFromArrayNormal) { + LLVMContext C; + SMDiagnostic Err; + std::unique_ptr M = parseAssemblyString( + "define void @foo(i32 signext %len) { " + "entry: " + " %a = alloca [7 x i32], align 4 " + " %cmp4 = icmp sgt i32 %len, 0 " + " br i1 %cmp4, label %for.body.preheader, label %for.cond.cleanup " + "for.body.preheader: " + " br label %for.body " + "for.cond.cleanup.loopexit: " + " br label %for.cond.cleanup " + "for.cond.cleanup: " + " ret void " + "for.body: " + " %iv = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ] " + " %idxprom = zext i32 %iv to i64 " + " %arrayidx = getelementptr inbounds [7 x i32], [7 x i32]* %a, i64 0, \ + i64 %idxprom " + " store i32 0, i32* %arrayidx, align 4 " + " %inc = add nuw nsw i32 %iv, 1 " + " %cmp = icmp slt i32 %inc, %len " + " br i1 %cmp, label %for.body, label %for.cond.cleanup.loopexit " + "} ", + Err, C); + + ASSERT_TRUE(M && "Could not parse module?"); + ASSERT_TRUE(!verifyModule(*M) && "Must have been well formed!"); + + runWithSE(*M, "foo", [](Function &F, LoopInfo &LI, ScalarEvolution &SE) { + auto *ScevIV = SE.getSCEV(getInstructionByName(F, "iv")); + const Loop *L = cast(ScevIV)->getLoop(); + + const SCEV *ITC = SE.getConstantMaxTripCountFromArray(L); + EXPECT_FALSE(isa(ITC)); + EXPECT_TRUE(isa(ITC)); + EXPECT_EQ(cast(ITC)->getAPInt().getSExtValue(), 8); + }); +} + +TEST_F(ScalarEvolutionsTest, ComputeMaxTripCountFromZeroArray) { + LLVMContext C; + SMDiagnostic Err; + std::unique_ptr M = parseAssemblyString( + "define void @foo(i32 signext %len) { " + "entry: " + " %a = alloca [0 x i32], align 4 " + " %cmp4 = icmp sgt i32 %len, 0 " + " br i1 %cmp4, label %for.body.preheader, label %for.cond.cleanup " + "for.body.preheader: " + " br label %for.body " + "for.cond.cleanup.loopexit: " + " br label %for.cond.cleanup " + "for.cond.cleanup: " + " ret void " + "for.body: " + " %iv = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ] " + " %idxprom = zext i32 %iv to i64 " + " %arrayidx = getelementptr inbounds [0 x i32], [0 x i32]* %a, i64 0, \ + i64 %idxprom " + " store i32 0, i32* %arrayidx, align 4 " + " %inc = add nuw nsw i32 %iv, 1 " + " %cmp = icmp slt i32 %inc, %len " + " br i1 %cmp, label %for.body, label %for.cond.cleanup.loopexit " + "} ", + Err, C); + + ASSERT_TRUE(M && "Could not parse module?"); + ASSERT_TRUE(!verifyModule(*M) && "Must have been well formed!"); + + runWithSE(*M, "foo", [](Function &F, LoopInfo &LI, ScalarEvolution &SE) { + auto *ScevIV = SE.getSCEV(getInstructionByName(F, "iv")); + const Loop *L = cast(ScevIV)->getLoop(); + + const SCEV *ITC = SE.getConstantMaxTripCountFromArray(L); + EXPECT_FALSE(isa(ITC)); + EXPECT_TRUE(isa(ITC)); + EXPECT_EQ(cast(ITC)->getAPInt().getSExtValue(), 1); + }); +} + +TEST_F(ScalarEvolutionsTest, ComputeMaxTripCountFromExtremArray) { + LLVMContext C; + SMDiagnostic Err; + std::unique_ptr M = parseAssemblyString( + "define void @foo(i32 signext %len) { " + "entry: " + " %a = alloca [4294967295 x i1], align 4 " + " %cmp4 = icmp sgt i32 %len, 0 " + " br i1 %cmp4, label %for.body.preheader, label %for.cond.cleanup " + "for.body.preheader: " + " br label %for.body " + "for.cond.cleanup.loopexit: " + " br label %for.cond.cleanup " + "for.cond.cleanup: " + " ret void " + "for.body: " + " %iv = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ] " + " %idxprom = zext i32 %iv to i64 " + " %arrayidx = getelementptr inbounds [4294967295 x i1], \ + [4294967295 x i1]* %a, i64 0, i64 %idxprom " + " store i1 0, i1* %arrayidx, align 4 " + " %inc = add nuw nsw i32 %iv, 1 " + " %cmp = icmp slt i32 %inc, %len " + " br i1 %cmp, label %for.body, label %for.cond.cleanup.loopexit " + "} ", + Err, C); + + ASSERT_TRUE(M && "Could not parse module?"); + ASSERT_TRUE(!verifyModule(*M) && "Must have been well formed!"); + + runWithSE(*M, "foo", [](Function &F, LoopInfo &LI, ScalarEvolution &SE) { + auto *ScevIV = SE.getSCEV(getInstructionByName(F, "iv")); + const Loop *L = cast(ScevIV)->getLoop(); + + const SCEV *ITC = SE.getConstantMaxTripCountFromArray(L); + EXPECT_TRUE(isa(ITC)); + }); +} + +TEST_F(ScalarEvolutionsTest, ComputeMaxTripCountFromArrayInBranch) { + LLVMContext C; + SMDiagnostic Err; + std::unique_ptr M = parseAssemblyString( + "define void @foo(i32 signext %len) { " + "entry: " + " %a = alloca [8 x i32], align 4 " + " br label %for.cond " + "for.cond: " + " %iv = phi i32 [ %inc, %for.inc ], [ 0, %entry ] " + " %cmp = icmp slt i32 %iv, %len " + " br i1 %cmp, label %for.body, label %for.cond.cleanup " + "for.cond.cleanup: " + " br label %for.end " + "for.body: " + " %cmp1 = icmp slt i32 %iv, 8 " + " br i1 %cmp1, label %if.then, label %if.end " + "if.then: " + " %idxprom = sext i32 %iv to i64 " + " %arrayidx = getelementptr inbounds [8 x i32], [8 x i32]* %a, i64 0, \ + i64 %idxprom " + " store i32 0, i32* %arrayidx, align 4 " + " br label %if.end " + "if.end: " + " br label %for.inc " + "for.inc: " + " %inc = add nsw i32 %iv, 1 " + " br label %for.cond " + "for.end: " + " ret void " + "} ", + Err, C); + + ASSERT_TRUE(M && "Could not parse module?"); + ASSERT_TRUE(!verifyModule(*M) && "Must have been well formed!"); + + runWithSE(*M, "foo", [](Function &F, LoopInfo &LI, ScalarEvolution &SE) { + auto *ScevIV = SE.getSCEV(getInstructionByName(F, "iv")); + const Loop *L = cast(ScevIV)->getLoop(); + + const SCEV *ITC = SE.getConstantMaxTripCountFromArray(L); + EXPECT_TRUE(isa(ITC)); + }); +} + +TEST_F(ScalarEvolutionsTest, ComputeMaxTripCountFromMultiDemArray) { + LLVMContext C; + SMDiagnostic Err; + std::unique_ptr M = parseAssemblyString( + "define void @foo(i32 signext %len) { " + "entry: " + " %a = alloca [3 x [5 x i32]], align 4 " + " br label %for.cond " + "for.cond: " + " %iv = phi i32 [ %inc, %for.inc ], [ 0, %entry ] " + " %cmp = icmp slt i32 %iv, %len " + " br i1 %cmp, label %for.body, label %for.cond.cleanup " + "for.cond.cleanup: " + " br label %for.end " + "for.body: " + " %arrayidx = getelementptr inbounds [3 x [5 x i32]], \ + [3 x [5 x i32]]* %a, i64 0, i64 3 " + " %idxprom = sext i32 %iv to i64 " + " %arrayidx1 = getelementptr inbounds [5 x i32], [5 x i32]* %arrayidx, \ + i64 0, i64 %idxprom " + " store i32 0, i32* %arrayidx1, align 4" + " br label %for.inc " + "for.inc: " + " %inc = add nsw i32 %iv, 1 " + " br label %for.cond " + "for.end: " + " ret void " + "} ", + Err, C); + + ASSERT_TRUE(M && "Could not parse module?"); + ASSERT_TRUE(!verifyModule(*M) && "Must have been well formed!"); + + runWithSE(*M, "foo", [](Function &F, LoopInfo &LI, ScalarEvolution &SE) { + auto *ScevIV = SE.getSCEV(getInstructionByName(F, "iv")); + const Loop *L = cast(ScevIV)->getLoop(); + + const SCEV *ITC = SE.getConstantMaxTripCountFromArray(L); + EXPECT_TRUE(isa(ITC)); + }); +} + } // end namespace llvm diff --git a/llvm/unittests/CodeGen/InstrRefLDVTest.cpp b/llvm/unittests/CodeGen/InstrRefLDVTest.cpp index 2900c59a65d58..866eef8f824ed 100644 --- a/llvm/unittests/CodeGen/InstrRefLDVTest.cpp +++ b/llvm/unittests/CodeGen/InstrRefLDVTest.cpp @@ -487,7 +487,7 @@ body: | TEST_F(InstrRefLDVTest, MTransferDefs) { MachineFunction *MF = readMIRBlock( " $rax = MOV64ri 0\n" - " RETQ $rax\n"); + " RET64 $rax\n"); setupLDVObj(MF); // We should start with only SP tracked. @@ -519,7 +519,7 @@ TEST_F(InstrRefLDVTest, MTransferDefs) { MF = readMIRBlock( " $rax = MOV64ri 0\n" " $al = MOV8ri 0\n" - " RETQ $rax\n"); + " RET64 $rax\n"); setupLDVObj(MF); TransferMap.clear(); TransferMap.resize(1); @@ -556,7 +556,7 @@ TEST_F(InstrRefLDVTest, MTransferDefs) { " $rdi = MOV64ri 0\n" // instr 4 " $rsi = MOV64ri 0\n" // instr 5 " CALL64r $rax, csr_64, implicit $rsp, implicit $ssp, implicit $rdi, implicit $rsi, implicit-def $rsp, implicit-def $ssp, implicit-def $rax, implicit-def $esp, implicit-def $sp\n\n\n\n" // instr 6 - " RETQ $rax\n"); // instr 7 + " RET64 $rax\n"); // instr 7 setupLDVObj(MF); TransferMap.clear(); TransferMap.resize(1); @@ -590,7 +590,7 @@ TEST_F(InstrRefLDVTest, MTransferDefs) { // When we DBG_PHI something, we should track all its subregs. MF = readMIRBlock( " DBG_PHI $rdi, 0\n" - " RETQ\n"); + " RET64\n"); setupLDVObj(MF); TransferMap.clear(); TransferMap.resize(1); @@ -613,7 +613,7 @@ TEST_F(InstrRefLDVTest, MTransferMeta) { " $rax = MOV64ri 0\n" " $rax = IMPLICIT_DEF\n" " $rax = KILL killed $rax\n" - " RETQ $rax\n"); + " RET64 $rax\n"); setupLDVObj(MF); TransferMap.clear(); TransferMap.resize(1); @@ -632,7 +632,7 @@ TEST_F(InstrRefLDVTest, MTransferCopies) { MachineFunction *MF = readMIRBlock( " $rax = MOV64ri 0\n" " MOV64mr $rsp, 1, $noreg, 16, $noreg, $rax :: (store 8 into %stack.0)\n" - " RETQ $rax\n"); + " RET64 $rax\n"); setupLDVObj(MF); TransferMap.clear(); TransferMap.resize(1); @@ -657,7 +657,7 @@ TEST_F(InstrRefLDVTest, MTransferCopies) { " $rax = MOV64ri 0\n" " MOV64mr $rsp, 1, $noreg, 16, $noreg, $rax :: (store 8 into %stack.0)\n" " $rbx = MOV64rm $rsp, 1, $noreg, 0, $noreg :: (load 8 from %stack.0)\n" - " RETQ\n"); + " RET64\n"); setupLDVObj(MF); TransferMap.clear(); TransferMap.resize(1); @@ -681,7 +681,7 @@ TEST_F(InstrRefLDVTest, MTransferCopies) { " $rax = MOV64ri 0\n" " $rcx = COPY $rax\n" " $rbx = MOV64rr $rcx\n" - " RETQ\n"); + " RET64\n"); setupLDVObj(MF); TransferMap.clear(); TransferMap.resize(1); @@ -711,7 +711,7 @@ TEST_F(InstrRefLDVTest, MTransferCopies) { MF = readMIRBlock( " $rax = MOV64ri 0\n" " $ecx = COPY $eax\n" - " RETQ\n"); + " RET64\n"); setupLDVObj(MF); TransferMap.clear(); TransferMap.resize(1); @@ -741,7 +741,7 @@ TEST_F(InstrRefLDVTest, MTransferSubregSpills) { " $rax = MOV64ri 0\n" " MOV64mr $rsp, 1, $noreg, 16, $noreg, $rax :: (store 8 into %stack.0)\n" " $rbx = MOV64rm $rsp, 1, $noreg, 0, $noreg :: (load 8 from %stack.0)\n" - " RETQ\n"); + " RET64\n"); setupLDVObj(MF); TransferMap.clear(); TransferMap.resize(1); @@ -788,7 +788,7 @@ TEST_F(InstrRefLDVTest, MTransferSubregSpills) { " MOV64mr $rsp, 1, $noreg, 16, $noreg, $rax :: (store 8 into %stack.0)\n" " MOV32mr $rsp, 1, $noreg, 16, $noreg, $eax :: (store 4 into %stack.0)\n" " $rbx = MOV64rm $rsp, 1, $noreg, 0, $noreg :: (load 8 from %stack.0)\n" - " RETQ\n"); + " RET64\n"); setupLDVObj(MF); TransferMap.clear(); TransferMap.resize(1); @@ -839,7 +839,7 @@ TEST_F(InstrRefLDVTest, MTransferSubregSpills) { " $xmm0 = IMPLICIT_DEF\n" " MOVUPDmr $rsp, 1, $noreg, 16, $noreg, killed $xmm0 :: (store (s128) into %stack.0)\n" " $rbx = MOV64rm $rsp, 1, $noreg, 0, $noreg :: (load 8 from %stack.0)\n" - " RETQ\n"); + " RET64\n"); setupLDVObj(MF); TransferMap.clear(); TransferMap.resize(1); @@ -874,7 +874,7 @@ TEST_F(InstrRefLDVTest, MTransferSubregSpills) { " $rax = MOV64ri 0\n" " MOV8mr $rsp, 1, $noreg, 16, $noreg, $ah :: (store 1 into %stack.0)\n" " $al = MOV8rm $rsp, 1, $noreg, 0, $noreg :: (load 1 from %stack.0)\n" - " RETQ\n"); + " RET64\n"); setupLDVObj(MF); TransferMap.clear(); TransferMap.resize(1); diff --git a/llvm/unittests/DebugInfo/DWARF/DWARFDieTest.cpp b/llvm/unittests/DebugInfo/DWARF/DWARFDieTest.cpp index d40bf6445f40d..82aaa458515a5 100644 --- a/llvm/unittests/DebugInfo/DWARF/DWARFDieTest.cpp +++ b/llvm/unittests/DebugInfo/DWARF/DWARFDieTest.cpp @@ -183,11 +183,9 @@ TEST(DWARFDie, getDeclFile) { std::string DeclFile = MainDie.getDeclFile( DILineInfoSpecifier::FileLineInfoKind::AbsoluteFilePath); -#if defined(_WIN32) - EXPECT_EQ(DeclFile, "/tmp\\main.cpp"); -#else - EXPECT_EQ(DeclFile, "/tmp/main.cpp"); -#endif + std::string Ref = + ("/tmp" + llvm::sys::path::get_separator() + "main.cpp").str(); + EXPECT_EQ(DeclFile, Ref); } TEST(DWARFDie, getDeclFileAbstractOrigin) { @@ -291,11 +289,9 @@ TEST(DWARFDie, getDeclFileAbstractOrigin) { std::string DeclFile = MainDie.getDeclFile( DILineInfoSpecifier::FileLineInfoKind::AbsoluteFilePath); -#if defined(_WIN32) - EXPECT_EQ(DeclFile, "/tmp\\main.cpp"); -#else - EXPECT_EQ(DeclFile, "/tmp/main.cpp"); -#endif + std::string Ref = + ("/tmp" + llvm::sys::path::get_separator() + "main.cpp").str(); + EXPECT_EQ(DeclFile, Ref); } TEST(DWARFDie, getDeclFileSpecification) { @@ -398,11 +394,9 @@ TEST(DWARFDie, getDeclFileSpecification) { std::string DeclFile = MainDie.getDeclFile( DILineInfoSpecifier::FileLineInfoKind::AbsoluteFilePath); -#if defined(_WIN32) - EXPECT_EQ(DeclFile, "/tmp\\main.cpp"); -#else - EXPECT_EQ(DeclFile, "/tmp/main.cpp"); -#endif + std::string Ref = + ("/tmp" + llvm::sys::path::get_separator() + "main.cpp").str(); + EXPECT_EQ(DeclFile, Ref); } TEST(DWARFDie, getDeclFileAbstractOriginAcrossCUBoundary) { @@ -522,11 +516,9 @@ TEST(DWARFDie, getDeclFileAbstractOriginAcrossCUBoundary) { std::string DeclFile = MainDie.getDeclFile( DILineInfoSpecifier::FileLineInfoKind::AbsoluteFilePath); -#if defined(_WIN32) - EXPECT_EQ(DeclFile, "/tmp\\main.cpp"); -#else - EXPECT_EQ(DeclFile, "/tmp/main.cpp"); -#endif + std::string Ref = + ("/tmp" + llvm::sys::path::get_separator() + "main.cpp").str(); + EXPECT_EQ(DeclFile, Ref); } TEST(DWARFDie, getDeclFileSpecificationAcrossCUBoundary) { @@ -646,11 +638,9 @@ TEST(DWARFDie, getDeclFileSpecificationAcrossCUBoundary) { std::string DeclFile = MainDie.getDeclFile( DILineInfoSpecifier::FileLineInfoKind::AbsoluteFilePath); -#if defined(_WIN32) - EXPECT_EQ(DeclFile, "/tmp\\main.cpp"); -#else - EXPECT_EQ(DeclFile, "/tmp/main.cpp"); -#endif + std::string Ref = + ("/tmp" + llvm::sys::path::get_separator() + "main.cpp").str(); + EXPECT_EQ(DeclFile, Ref); } } // end anonymous namespace diff --git a/llvm/unittests/IR/ConstantRangeTest.cpp b/llvm/unittests/IR/ConstantRangeTest.cpp index 7bf7e0b4c9278..17cc29b0b268f 100644 --- a/llvm/unittests/IR/ConstantRangeTest.cpp +++ b/llvm/unittests/IR/ConstantRangeTest.cpp @@ -558,8 +558,8 @@ TEST_F(ConstantRangeTest, IntersectWith) { EXPECT_EQ(LHS.intersectWith(RHS), ConstantRange(APInt(32, 15), APInt(32, 0))); } -template -void testBinarySetOperationExhaustive(Fn1 OpFn, Fn2 InResultFn) { +template +void testBinarySetOperationExhaustive(Fn1 OpFn, Fn2 ExactOpFn, Fn3 InResultFn) { unsigned Bits = 4; EnumerateTwoConstantRanges(Bits, [=](const ConstantRange &CR1, const ConstantRange &CR2) { @@ -577,6 +577,13 @@ void testBinarySetOperationExhaustive(Fn1 OpFn, Fn2 InResultFn) { ConstantRange SignedCR = OpFn(CR1, CR2, ConstantRange::Signed); TestRange(SignedCR, Elems, PreferSmallestNonFullSigned, {CR1, CR2}); + + Optional ExactCR = ExactOpFn(CR1, CR2); + if (SmallestCR.isSizeLargerThan(Elems.count())) { + EXPECT_TRUE(!ExactCR.hasValue()); + } else { + EXPECT_EQ(SmallestCR, *ExactCR); + } }); } @@ -586,6 +593,9 @@ TEST_F(ConstantRangeTest, IntersectWithExhaustive) { ConstantRange::PreferredRangeType Type) { return CR1.intersectWith(CR2, Type); }, + [](const ConstantRange &CR1, const ConstantRange &CR2) { + return CR1.exactIntersectWith(CR2); + }, [](const ConstantRange &CR1, const ConstantRange &CR2, const APInt &N) { return CR1.contains(N) && CR2.contains(N); }); @@ -597,6 +607,9 @@ TEST_F(ConstantRangeTest, UnionWithExhaustive) { ConstantRange::PreferredRangeType Type) { return CR1.unionWith(CR2, Type); }, + [](const ConstantRange &CR1, const ConstantRange &CR2) { + return CR1.exactUnionWith(CR2); + }, [](const ConstantRange &CR1, const ConstantRange &CR2, const APInt &N) { return CR1.contains(N) || CR2.contains(N); }); @@ -1572,8 +1585,7 @@ void ICmpTestImpl(CmpInst::Predicate Pred) { } TEST(ConstantRange, ICmp) { - for (auto Pred : seq_inclusive(CmpInst::Predicate::FIRST_ICMP_PREDICATE, - CmpInst::Predicate::LAST_ICMP_PREDICATE)) + for (auto Pred : ICmpInst::predicates()) ICmpTestImpl(Pred); } @@ -1973,6 +1985,23 @@ TEST(ConstantRange, GetEquivalentICmp) { ConstantRange(APInt(32, -1)).inverse().getEquivalentICmp(Pred, RHS)); EXPECT_EQ(Pred, CmpInst::ICMP_NE); EXPECT_EQ(RHS, APInt(32, -1)); + + EnumerateConstantRanges(4, [](const ConstantRange &CR) { + CmpInst::Predicate Pred; + APInt RHS, Offset; + CR.getEquivalentICmp(Pred, RHS, Offset); + ForeachNumInConstantRange(ConstantRange::getFull(4), [&](const APInt &N) { + bool Result = ICmpInst::compare(N + Offset, RHS, Pred); + EXPECT_EQ(CR.contains(N), Result); + }); + + if (CR.getEquivalentICmp(Pred, RHS)) { + ForeachNumInConstantRange(ConstantRange::getFull(4), [&](const APInt &N) { + bool Result = ICmpInst::compare(N, RHS, Pred); + EXPECT_EQ(CR.contains(N), Result); + }); + } + }); } #define EXPECT_MAY_OVERFLOW(op) \ @@ -2531,8 +2560,7 @@ void testConstantRangeICmpPredEquivalence(ICmpInst::Predicate SrcPred, T Func) { } TEST_F(ConstantRangeTest, areInsensitiveToSignednessOfICmpPredicate) { - for (auto Pred : seq_inclusive(ICmpInst::Predicate::FIRST_ICMP_PREDICATE, - ICmpInst::Predicate::LAST_ICMP_PREDICATE)) { + for (auto Pred : ICmpInst::predicates()) { if (ICmpInst::isEquality(Pred)) continue; ICmpInst::Predicate FlippedSignednessPred = @@ -2548,8 +2576,7 @@ TEST_F(ConstantRangeTest, areInsensitiveToSignednessOfICmpPredicate) { } TEST_F(ConstantRangeTest, areInsensitiveToSignednessOfInvertedICmpPredicate) { - for (auto Pred : seq_inclusive(ICmpInst::Predicate::FIRST_ICMP_PREDICATE, - ICmpInst::Predicate::LAST_ICMP_PREDICATE)) { + for (auto Pred : ICmpInst::predicates()) { if (ICmpInst::isEquality(Pred)) continue; ICmpInst::Predicate InvertedFlippedSignednessPred = @@ -2567,8 +2594,7 @@ TEST_F(ConstantRangeTest, areInsensitiveToSignednessOfInvertedICmpPredicate) { } TEST_F(ConstantRangeTest, getEquivalentPredWithFlippedSignedness) { - for (auto Pred : seq_inclusive(ICmpInst::Predicate::FIRST_ICMP_PREDICATE, - ICmpInst::Predicate::LAST_ICMP_PREDICATE)) { + for (auto Pred : ICmpInst::predicates()) { if (ICmpInst::isEquality(Pred)) continue; testConstantRangeICmpPredEquivalence( @@ -2581,4 +2607,15 @@ TEST_F(ConstantRangeTest, getEquivalentPredWithFlippedSignedness) { } } +TEST_F(ConstantRangeTest, isSizeLargerThan) { + EXPECT_FALSE(Empty.isSizeLargerThan(0)); + + EXPECT_TRUE(Full.isSizeLargerThan(0)); + EXPECT_TRUE(Full.isSizeLargerThan(65535)); + EXPECT_FALSE(Full.isSizeLargerThan(65536)); + + EXPECT_TRUE(One.isSizeLargerThan(0)); + EXPECT_FALSE(One.isSizeLargerThan(1)); +} + } // anonymous namespace diff --git a/llvm/unittests/IR/InstructionsTest.cpp b/llvm/unittests/IR/InstructionsTest.cpp index f3fc08ca583c6..5b977ef65b89a 100644 --- a/llvm/unittests/IR/InstructionsTest.cpp +++ b/llvm/unittests/IR/InstructionsTest.cpp @@ -6,10 +6,12 @@ // //===----------------------------------------------------------------------===// -#include "llvm/AsmParser/Parser.h" #include "llvm/IR/Instructions.h" +#include "llvm/ADT/CombinationGenerator.h" #include "llvm/ADT/STLExtras.h" #include "llvm/Analysis/ValueTracking.h" +#include "llvm/Analysis/VectorUtils.h" +#include "llvm/AsmParser/Parser.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" @@ -1115,6 +1117,103 @@ TEST(InstructionsTest, ShuffleMaskQueries) { delete Id15; } +TEST(InstructionsTest, ShuffleMaskIsReplicationMask) { + for (int ReplicationFactor : seq_inclusive(1, 8)) { + for (int VF : seq_inclusive(1, 8)) { + const auto ReplicatedMask = createReplicatedMask(ReplicationFactor, VF); + int GuessedReplicationFactor = -1, GuessedVF = -1; + EXPECT_TRUE(ShuffleVectorInst::isReplicationMask( + ReplicatedMask, GuessedReplicationFactor, GuessedVF)); + EXPECT_EQ(GuessedReplicationFactor, ReplicationFactor); + EXPECT_EQ(GuessedVF, VF); + + for (int OpVF : seq_inclusive(VF, 2 * VF + 1)) { + LLVMContext Ctx; + Type *OpVFTy = FixedVectorType::get(IntegerType::getInt1Ty(Ctx), OpVF); + Value *Op = ConstantVector::getNullValue(OpVFTy); + ShuffleVectorInst *SVI = new ShuffleVectorInst(Op, Op, ReplicatedMask); + EXPECT_EQ(SVI->isReplicationMask(GuessedReplicationFactor, GuessedVF), + OpVF == VF); + delete SVI; + } + } + } +} + +TEST(InstructionsTest, ShuffleMaskIsReplicationMask_undef) { + for (int ReplicationFactor : seq_inclusive(1, 4)) { + for (int VF : seq_inclusive(1, 4)) { + const auto ReplicatedMask = createReplicatedMask(ReplicationFactor, VF); + int GuessedReplicationFactor = -1, GuessedVF = -1; + + // If we change some mask elements to undef, we should still match. + + SmallVector> ElementChoices(ReplicatedMask.size(), + {false, true}); + + CombinationGenerator + G(ElementChoices); + + G.generate([&](ArrayRef UndefOverrides) -> bool { + SmallVector AdjustedMask; + AdjustedMask.reserve(ReplicatedMask.size()); + for (auto I : zip(ReplicatedMask, UndefOverrides)) + AdjustedMask.emplace_back(std::get<1>(I) ? -1 : std::get<0>(I)); + assert(AdjustedMask.size() == ReplicatedMask.size() && + "Size misprediction"); + + EXPECT_TRUE(ShuffleVectorInst::isReplicationMask( + AdjustedMask, GuessedReplicationFactor, GuessedVF)); + // Do not check GuessedReplicationFactor and GuessedVF, + // with enough undef's we may deduce a different tuple. + + return /*Abort=*/false; + }); + } + } +} + +TEST(InstructionsTest, ShuffleMaskIsReplicationMask_Exhaustive_Correctness) { + for (int ShufMaskNumElts : seq_inclusive(1, 6)) { + SmallVector PossibleShufMaskElts; + PossibleShufMaskElts.reserve(ShufMaskNumElts + 2); + for (int PossibleShufMaskElt : seq_inclusive(-1, ShufMaskNumElts)) + PossibleShufMaskElts.emplace_back(PossibleShufMaskElt); + assert(PossibleShufMaskElts.size() == ShufMaskNumElts + 2U && + "Size misprediction"); + + SmallVector> ElementChoices(ShufMaskNumElts, + PossibleShufMaskElts); + + CombinationGenerator + G(ElementChoices); + + G.generate([&](ArrayRef Mask) -> bool { + int GuessedReplicationFactor = -1, GuessedVF = -1; + bool Match = ShuffleVectorInst::isReplicationMask( + Mask, GuessedReplicationFactor, GuessedVF); + if (!Match) + return /*Abort=*/false; + + const auto ActualMask = + createReplicatedMask(GuessedReplicationFactor, GuessedVF); + EXPECT_EQ(Mask.size(), ActualMask.size()); + for (auto I : zip(Mask, ActualMask)) { + int Elt = std::get<0>(I); + int ActualElt = std::get<0>(I); + + if (Elt != -1) { + EXPECT_EQ(Elt, ActualElt); + } + } + + return /*Abort=*/false; + }); + } +} + TEST(InstructionsTest, GetSplat) { // Create the elements for various constant vectors. LLVMContext Ctx; diff --git a/llvm/unittests/Support/CommandLineTest.cpp b/llvm/unittests/Support/CommandLineTest.cpp index d8fd6f6516cdd..db7255e5569a4 100644 --- a/llvm/unittests/Support/CommandLineTest.cpp +++ b/llvm/unittests/Support/CommandLineTest.cpp @@ -1112,6 +1112,11 @@ TEST(CommandLineTest, PositionalEatArgsError) { } #ifdef _WIN32 +void checkSeparators(StringRef Path) { + char UndesiredSeparator = sys::path::get_separator()[0] == '/' ? '\\' : '/'; + ASSERT_EQ(Path.find(UndesiredSeparator), StringRef::npos); +} + TEST(CommandLineTest, GetCommandLineArguments) { int argc = __argc; char **argv = __argv; @@ -1121,6 +1126,7 @@ TEST(CommandLineTest, GetCommandLineArguments) { EXPECT_EQ(llvm::sys::path::is_absolute(argv[0]), llvm::sys::path::is_absolute(__argv[0])); + checkSeparators(argv[0]); EXPECT_TRUE( llvm::sys::path::filename(argv[0]).equals_insensitive("supporttests.exe")) diff --git a/llvm/unittests/Support/LinearPolyBaseTest.cpp b/llvm/unittests/Support/LinearPolyBaseTest.cpp index 6942ea2a14ea1..f9a2d4e7521ee 100644 --- a/llvm/unittests/Support/LinearPolyBaseTest.cpp +++ b/llvm/unittests/Support/LinearPolyBaseTest.cpp @@ -124,9 +124,6 @@ TEST(UnivariateLinearPolyBase, Univariate3D_GetValue) { EXPECT_EQ(Univariate3D(42, 1).getValue(0), 0); EXPECT_EQ(Univariate3D(42, 1).getValue(1), 42); EXPECT_EQ(Univariate3D(42, 1).getValue(2), 0); - - EXPECT_EQ(Univariate3D(42, 0).getValue(), 42); - EXPECT_EQ(Univariate3D(42, 1).getValue(), 42); } TEST(UnivariateLinearPolyBase, Univariate3D_Add) { diff --git a/llvm/unittests/Support/Path.cpp b/llvm/unittests/Support/Path.cpp index 927b7eb35e9cb..35c1de7202796 100644 --- a/llvm/unittests/Support/Path.cpp +++ b/llvm/unittests/Support/Path.cpp @@ -64,6 +64,13 @@ using namespace llvm::sys; namespace { +void checkSeparators(StringRef Path) { +#ifdef _WIN32 + char UndesiredSeparator = sys::path::get_separator()[0] == '/' ? '\\' : '/'; + ASSERT_EQ(Path.find(UndesiredSeparator), StringRef::npos); +#endif +} + struct FileDescriptorCloser { explicit FileDescriptorCloser(int FD) : FD(FD) {} ~FileDescriptorCloser() { ::close(FD); } @@ -75,7 +82,9 @@ TEST(is_style_Style, Works) { // Check platform-independent results. EXPECT_TRUE(is_style_posix(Style::posix)); EXPECT_TRUE(is_style_windows(Style::windows)); + EXPECT_TRUE(is_style_windows(Style::windows_slash)); EXPECT_FALSE(is_style_posix(Style::windows)); + EXPECT_FALSE(is_style_posix(Style::windows_slash)); EXPECT_FALSE(is_style_windows(Style::posix)); // Check platform-dependent results. @@ -95,12 +104,19 @@ TEST(is_separator, Works) { EXPECT_FALSE(path::is_separator(' ')); EXPECT_TRUE(path::is_separator('\\', path::Style::windows)); + EXPECT_TRUE(path::is_separator('\\', path::Style::windows_slash)); EXPECT_FALSE(path::is_separator('\\', path::Style::posix)); EXPECT_EQ(path::is_style_windows(path::Style::native), path::is_separator('\\')); } +TEST(get_separator, Works) { + EXPECT_EQ(path::get_separator(path::Style::posix), "/"); + EXPECT_EQ(path::get_separator(path::Style::windows_backslash), "\\"); + EXPECT_EQ(path::get_separator(path::Style::windows_slash), "/"); +} + TEST(is_absolute_gnu, Works) { // Test tuple . const std::tuple Paths[] = { @@ -383,6 +399,8 @@ TEST(Support, PathIterator) { testing::ElementsAre("/", ".c", ".d", "..", ".")); EXPECT_THAT(GetComponents("c:\\c\\e\\foo.txt", path::Style::windows), testing::ElementsAre("c:", "\\", "c", "e", "foo.txt")); + EXPECT_THAT(GetComponents("c:\\c\\e\\foo.txt", path::Style::windows_slash), + testing::ElementsAre("c:", "\\", "c", "e", "foo.txt")); EXPECT_THAT(GetComponents("//net/"), testing::ElementsAre("//net", "/")); EXPECT_THAT(GetComponents("//net/c/foo.txt"), testing::ElementsAre("//net", "/", "c", "foo.txt")); @@ -425,6 +443,9 @@ std::string getEnvWin(const wchar_t *Var) { ArrayRef ref{reinterpret_cast(path), pathLen * sizeof(wchar_t)}; convertUTF16ToUTF8String(ref, expected); + SmallString<32> Buf(expected); + path::make_preferred(Buf); + expected.assign(Buf.begin(), Buf.end()); } return expected; } @@ -572,9 +593,15 @@ TEST(Support, TempDirectory) { #ifdef _WIN32 static std::string path2regex(std::string Path) { size_t Pos = 0; + bool Forward = path::get_separator()[0] == '/'; while ((Pos = Path.find('\\', Pos)) != std::string::npos) { - Path.replace(Pos, 1, "\\\\"); - Pos += 2; + if (Forward) { + Path.replace(Pos, 1, "/"); + Pos += 1; + } else { + Path.replace(Pos, 1, "\\\\"); + Pos += 2; + } } return Path; } @@ -721,10 +748,12 @@ TEST_F(FileSystemTest, RealPath) { // how we specified it. Make sure to compare against the real_path of the // TestDirectory, and not just the value of TestDirectory. ASSERT_NO_ERROR(fs::real_path(TestDirectory, RealBase)); + checkSeparators(RealBase); path::native(Twine(RealBase) + "/test1/test2", Expected); ASSERT_NO_ERROR(fs::real_path( Twine(TestDirectory) + "/././test1/../test1/test2/./test3/..", Actual)); + checkSeparators(Actual); EXPECT_EQ(Expected, Actual); @@ -733,7 +762,9 @@ TEST_F(FileSystemTest, RealPath) { // This can fail if $HOME is not set and getpwuid fails. bool Result = llvm::sys::path::home_directory(HomeDir); if (Result) { + checkSeparators(HomeDir); ASSERT_NO_ERROR(fs::real_path(HomeDir, Expected)); + checkSeparators(Expected); ASSERT_NO_ERROR(fs::real_path("~", Actual, true)); EXPECT_EQ(Expected, Actual); ASSERT_NO_ERROR(fs::real_path("~/", Actual, true)); @@ -1425,10 +1456,25 @@ TEST(Support, NormalizePath) { for (auto &T : Tests) { SmallString<64> Win(std::get<0>(T)); SmallString<64> Posix(Win); + SmallString<64> WinSlash(Win); path::native(Win, path::Style::windows); path::native(Posix, path::Style::posix); + path::native(WinSlash, path::Style::windows_slash); EXPECT_EQ(std::get<1>(T), Win); EXPECT_EQ(std::get<2>(T), Posix); + EXPECT_EQ(std::get<2>(T), WinSlash); + } + + for (auto &T : Tests) { + SmallString<64> WinBackslash(std::get<0>(T)); + SmallString<64> Posix(WinBackslash); + SmallString<64> WinSlash(WinBackslash); + path::make_preferred(WinBackslash, path::Style::windows_backslash); + path::make_preferred(Posix, path::Style::posix); + path::make_preferred(WinSlash, path::Style::windows_slash); + EXPECT_EQ(std::get<1>(T), WinBackslash); + EXPECT_EQ(std::get<0>(T), Posix); // Posix remains unchanged here + EXPECT_EQ(std::get<2>(T), WinSlash); } #if defined(_WIN32) @@ -1437,10 +1483,15 @@ TEST(Support, NormalizePath) { const char *Path7a = "~/aaa"; SmallString<64> Path7(Path7a); - path::native(Path7); + path::native(Path7, path::Style::windows_backslash); EXPECT_TRUE(Path7.endswith("\\aaa")); EXPECT_TRUE(Path7.startswith(PathHome)); EXPECT_EQ(Path7.size(), PathHome.size() + strlen(Path7a + 1)); + Path7 = Path7a; + path::native(Path7, path::Style::windows_slash); + EXPECT_TRUE(Path7.endswith("/aaa")); + EXPECT_TRUE(Path7.startswith(PathHome)); + EXPECT_EQ(Path7.size(), PathHome.size() + strlen(Path7a + 1)); const char *Path8a = "~"; SmallString<64> Path8(Path8a); @@ -1454,7 +1505,7 @@ TEST(Support, NormalizePath) { const char *Path10a = "aaa/~/b"; SmallString<64> Path10(Path10a); - path::native(Path10); + path::native(Path10, path::Style::windows_backslash); EXPECT_EQ(Path10, "aaa\\~\\b"); #endif } @@ -2235,6 +2286,30 @@ TEST_F(FileSystemTest, widenPath) { ASSERT_NO_ERROR(windows::widenPath(Input, Result)); EXPECT_EQ(Result, Expected); + // Pass a path with forward slashes, check that it ends up with + // backslashes when widened with the long path prefix. + SmallString InputForward(Input); + path::make_preferred(InputForward, path::Style::windows_slash); + ASSERT_NO_ERROR(windows::widenPath(InputForward, Result)); + EXPECT_EQ(Result, Expected); + + // Pass a path which has the long path prefix prepended originally, but + // which is short enough to not require the long path prefix. If such a + // path is passed with forward slashes, make sure it gets normalized to + // backslashes. + SmallString PrefixedPath("\\\\?\\C:\\foldername"); + ASSERT_NO_ERROR(windows::UTF8ToUTF16(PrefixedPath, Expected)); + // Mangle the input to forward slashes. + path::make_preferred(PrefixedPath, path::Style::windows_slash); + ASSERT_NO_ERROR(windows::widenPath(PrefixedPath, Result)); + EXPECT_EQ(Result, Expected); + + // A short path with an inconsistent prefix is passed through as-is; this + // is a degenerate case that we currently don't care about handling. + PrefixedPath.assign("/\\?/C:/foldername"); + ASSERT_NO_ERROR(windows::UTF8ToUTF16(PrefixedPath, Expected)); + ASSERT_NO_ERROR(windows::widenPath(PrefixedPath, Result)); + EXPECT_EQ(Result, Expected); // Test that UNC paths are handled correctly. const std::string ShareName("\\\\sharename\\"); diff --git a/llvm/unittests/Support/ProgramTest.cpp b/llvm/unittests/Support/ProgramTest.cpp index 98eb81c0abf58..d899026a358a0 100644 --- a/llvm/unittests/Support/ProgramTest.cpp +++ b/llvm/unittests/Support/ProgramTest.cpp @@ -110,17 +110,26 @@ class ProgramEnvTest : public testing::Test { }; #ifdef _WIN32 +void checkSeparators(StringRef Path) { + char UndesiredSeparator = sys::path::get_separator()[0] == '/' ? '\\' : '/'; + ASSERT_EQ(Path.find(UndesiredSeparator), StringRef::npos); +} + TEST_F(ProgramEnvTest, CreateProcessLongPath) { if (getenv("LLVM_PROGRAM_TEST_LONG_PATH")) exit(0); // getMainExecutable returns an absolute path; prepend the long-path prefix. - std::string MyAbsExe = - sys::fs::getMainExecutable(TestMainArgv0, &ProgramTestStringArg1); + SmallString<128> MyAbsExe( + sys::fs::getMainExecutable(TestMainArgv0, &ProgramTestStringArg1)); + checkSeparators(MyAbsExe); + // Force a path with backslashes, when we are going to prepend the \\?\ + // prefix. + sys::path::native(MyAbsExe, sys::path::Style::windows_backslash); std::string MyExe; if (!StringRef(MyAbsExe).startswith("\\\\?\\")) MyExe.append("\\\\?\\"); - MyExe.append(MyAbsExe); + MyExe.append(std::string(MyAbsExe.begin(), MyAbsExe.end())); StringRef ArgV[] = {MyExe, "--gtest_filter=ProgramEnvTest.CreateProcessLongPath"}; diff --git a/llvm/unittests/Support/TargetParserTest.cpp b/llvm/unittests/Support/TargetParserTest.cpp index ba0e7dc4d444e..8dc803a974943 100644 --- a/llvm/unittests/Support/TargetParserTest.cpp +++ b/llvm/unittests/Support/TargetParserTest.cpp @@ -1008,6 +1008,17 @@ INSTANTIATE_TEST_SUITE_P( AArch64::AEK_DOTPROD | AArch64::AEK_RCPC | AArch64::AEK_SSBS, "8.2-A"), + ARMCPUTestParams("cortex-x2", "armv9-a", "neon-fp-armv8", + AArch64::AEK_CRC | AArch64::AEK_FP | + AArch64::AEK_SIMD | AArch64::AEK_RAS | + AArch64::AEK_LSE | AArch64::AEK_RDM | + AArch64::AEK_RCPC | AArch64::AEK_SVE2 | + AArch64::AEK_DOTPROD | AArch64::AEK_MTE | + AArch64::AEK_PAUTH | AArch64::AEK_I8MM | + AArch64::AEK_BF16 | AArch64::AEK_SVE2BITPERM | + AArch64::AEK_SSBS | AArch64::AEK_SB | + AArch64::AEK_FP16FML, + "9-A"), ARMCPUTestParams("cyclone", "armv8-a", "crypto-neon-fp-armv8", AArch64::AEK_NONE | AArch64::AEK_CRYPTO | AArch64::AEK_FP | AArch64::AEK_SIMD, @@ -1197,7 +1208,7 @@ INSTANTIATE_TEST_SUITE_P( AArch64::AEK_LSE | AArch64::AEK_RDM, "8.2-A"))); -static constexpr unsigned NumAArch64CPUArchs = 50; +static constexpr unsigned NumAArch64CPUArchs = 51; TEST(TargetParserTest, testAArch64CPUArchList) { SmallVector List; diff --git a/llvm/unittests/tools/llvm-exegesis/CMakeLists.txt b/llvm/unittests/tools/llvm-exegesis/CMakeLists.txt index f010cf7f6cd69..6b08dc4d139d5 100644 --- a/llvm/unittests/tools/llvm-exegesis/CMakeLists.txt +++ b/llvm/unittests/tools/llvm-exegesis/CMakeLists.txt @@ -15,7 +15,6 @@ set(exegesis_sources ClusteringTest.cpp PerfHelperTest.cpp RegisterValueTest.cpp - SnippetGeneratorTest.cpp ) set(exegesis_link_libraries LLVMExegesis) diff --git a/llvm/unittests/tools/llvm-exegesis/X86/SnippetRepetitorTest.cpp b/llvm/unittests/tools/llvm-exegesis/X86/SnippetRepetitorTest.cpp index b5aca1eb35366..91a271c5c1ad5 100644 --- a/llvm/unittests/tools/llvm-exegesis/X86/SnippetRepetitorTest.cpp +++ b/llvm/unittests/tools/llvm-exegesis/X86/SnippetRepetitorTest.cpp @@ -71,7 +71,7 @@ TEST_F(X86SnippetRepetitorTest, Duplicate) { ASSERT_EQ(MF->getNumBlockIDs(), 1u); EXPECT_THAT(MF->getBlockNumbered(0)->instrs(), ElementsAre(HasOpcode(X86::NOOP), HasOpcode(X86::NOOP), - HasOpcode(X86::NOOP), HasOpcode(X86::RETQ))); + HasOpcode(X86::NOOP), HasOpcode(X86::RET64))); } TEST_F(X86SnippetRepetitorTest, Loop) { @@ -90,7 +90,7 @@ TEST_F(X86SnippetRepetitorTest, Loop) { LiveReg(State.getExegesisTarget().getLoopCounterRegister( State.getTargetMachine().getTargetTriple())))); EXPECT_THAT(MF->getBlockNumbered(2)->instrs(), - ElementsAre(HasOpcode(X86::RETQ))); + ElementsAre(HasOpcode(X86::RET64))); } } // namespace diff --git a/llvm/utils/TableGen/AsmMatcherEmitter.cpp b/llvm/utils/TableGen/AsmMatcherEmitter.cpp index 9c1d1a4b64aad..be17d5c718c25 100644 --- a/llvm/utils/TableGen/AsmMatcherEmitter.cpp +++ b/llvm/utils/TableGen/AsmMatcherEmitter.cpp @@ -3924,8 +3924,7 @@ void AsmMatcherEmitter::run(raw_ostream &OS) { if (HasDeprecation) { OS << " std::string Info;\n"; - OS << " if (!getParser().getTargetParser().\n"; - OS << " getTargetOptions().MCNoDeprecatedWarn &&\n"; + OS << " if (!getParser().getTargetParser().getTargetOptions().MCNoDeprecatedWarn &&\n"; OS << " MII.getDeprecatedInfo(Inst, getSTI(), Info)) {\n"; OS << " SMLoc Loc = ((" << Target.getName() << "Operand &)*Operands[0]).getStartLoc();\n"; diff --git a/llvm/utils/TableGen/IntrinsicEmitter.cpp b/llvm/utils/TableGen/IntrinsicEmitter.cpp index 3d1d258e342e9..437b5f002027c 100644 --- a/llvm/utils/TableGen/IntrinsicEmitter.cpp +++ b/llvm/utils/TableGen/IntrinsicEmitter.cpp @@ -249,7 +249,8 @@ enum IIT_Info { IIT_BF16 = 48, IIT_STRUCT9 = 49, IIT_V256 = 50, - IIT_AMX = 51 + IIT_AMX = 51, + IIT_PPCF128 = 52 }; static void EncodeFixedValueType(MVT::SimpleValueType VT, @@ -274,6 +275,7 @@ static void EncodeFixedValueType(MVT::SimpleValueType VT, case MVT::f32: return Sig.push_back(IIT_F32); case MVT::f64: return Sig.push_back(IIT_F64); case MVT::f128: return Sig.push_back(IIT_F128); + case MVT::ppcf128: return Sig.push_back(IIT_PPCF128); case MVT::token: return Sig.push_back(IIT_TOKEN); case MVT::Metadata: return Sig.push_back(IIT_METADATA); case MVT::x86mmx: return Sig.push_back(IIT_MMX); diff --git a/llvm/utils/extract_symbols.py b/llvm/utils/extract_symbols.py index 6f01cd12fcd81..d9d89093f3d3b 100755 --- a/llvm/utils/extract_symbols.py +++ b/llvm/utils/extract_symbols.py @@ -126,6 +126,11 @@ def readobj_is_32bit_windows(lib): return (match.group(1) == 'COFF-i386') return False +# On AIX, there isn't an easy way to detect 32-bit windows objects with the system toolchain, +# so just assume false. +def aix_is_32bit_windows(lib): + return False + # MSVC mangles names to ?@. By examining the # identifier/type mangling we can decide which symbols could possibly be # required and which we can discard. @@ -357,7 +362,7 @@ def extract_symbols(arg): 'objdump' : (None, objdump_is_32bit_windows), 'llvm-readobj' : (readobj_get_symbols, readobj_is_32bit_windows) } get_symbols = None - is_32bit_windows = None + is_32bit_windows = aix_is_32bit_windows if sys.platform.startswith('aix') else None # If we have a tools argument then use that for the list of tools to check if args.tools: tool_exes = args.tools diff --git a/llvm/utils/gdb-scripts/prettyprinters.py b/llvm/utils/gdb-scripts/prettyprinters.py index fccde53531694..8d5f2d404434d 100644 --- a/llvm/utils/gdb-scripts/prettyprinters.py +++ b/llvm/utils/gdb-scripts/prettyprinters.py @@ -16,9 +16,6 @@ def next(self): def children(self): return self -def escape_bytes(val, l): - return '"' + val.string(encoding='Latin-1', length=l).encode('unicode_escape').decode() + '"' - class SmallStringPrinter: """Print an llvm::SmallString object.""" @@ -26,8 +23,12 @@ def __init__(self, val): self.val = val def to_string(self): - begin = self.val['BeginX'] - return escape_bytes(begin.cast(gdb.lookup_type('char').pointer()), self.val['Size']) + data = self.val['BeginX'].cast(gdb.lookup_type('char').pointer()) + length = self.val['Size'] + return data.lazy_string(length=length) + + def display_hint (self): + return 'string' class StringRefPrinter: """Print an llvm::StringRef object.""" @@ -36,7 +37,12 @@ def __init__(self, val): self.val = val def to_string(self): - return escape_bytes(self.val['Data'], self.val['Length']) + data = self.val['Data'] + length = self.val['Length'] + return data.lazy_string(length=length) + + def display_hint(self): + return 'string' class SmallVectorPrinter(Iterator): """Print an llvm::SmallVector object.""" @@ -265,7 +271,7 @@ def string_from_pretty_printer_lookup(self, val): # register the LazyString type, so we can't check # "type(s) == gdb.LazyString". if 'LazyString' in type(s).__name__: - s = s.value().address.string() + s = s.value().string() else: print(('No pretty printer for {} found. The resulting Twine ' + @@ -300,12 +306,9 @@ def string_from_child(self, child, kind): if self.is_twine_kind(kind, 'PtrAndLengthKind'): val = child['ptrAndLength'] - return val['ptr'].string(encoding='Latin-1', length=val['length']).encode('unicode_escape').decode() - - if self.is_twine_kind(kind, 'SmallStringKind'): - val = child['smallString'].dereference() - pp = SmallStringPrinter(val) - return pp.to_string() + data = val['ptr'] + length = val['length'] + return data.string(length=length) if self.is_twine_kind(kind, 'CharKind'): return chr(child['character']) @@ -340,11 +343,9 @@ def string_from_child(self, child, kind): def string_from_twine_object(self, twine): '''Return the string representation of the Twine object twine.''' - lhs_str = '' - rhs_str = '' - lhs = twine['LHS'] rhs = twine['RHS'] + lhs_kind = str(twine['LHSKind']) rhs_kind = str(twine['RHSKind']) @@ -356,6 +357,9 @@ def string_from_twine_object(self, twine): def to_string(self): return self.string_from_twine_object(self._val) + def display_hint(self): + return 'string' + def get_pointer_int_pair(val): """Get tuple from llvm::PointerIntPair.""" info_name = val.type.template_argument(4).strip_typedefs().name diff --git a/llvm/utils/gn/README.rst b/llvm/utils/gn/README.rst index 74b4e88da4c7a..9ca545061099d 100644 --- a/llvm/utils/gn/README.rst +++ b/llvm/utils/gn/README.rst @@ -63,7 +63,7 @@ before starting the build. GN has extensive built-in help; try e.g. ``llvm/utils/gn/gn.py help gen`` to see the help for the ``gen`` command. The full GN reference is also `available -online `_. +online `_. GN has an autoformatter: ``git ls-files '*.gn' '*.gni' | xargs llvm/utils/gn/gn.py format`` diff --git a/llvm/utils/gn/build/BUILD.gn b/llvm/utils/gn/build/BUILD.gn index 03f6d8e74fe34..3b22f6f302214 100644 --- a/llvm/utils/gn/build/BUILD.gn +++ b/llvm/utils/gn/build/BUILD.gn @@ -7,7 +7,8 @@ import("//llvm/utils/gn/build/toolchain/target_flags.gni") declare_args() { # Whether to build everything with coverage information. # After building with this, run tests and then run - # llvm/utils/prepare-code-coverage-artifact.py \ + # llvm/utils/prepare-code-coverage-artifact.py \ + # --compilation-dir=out/gn \ # .../llvm-profdata .../llvm-cov out/gn/profiles/ report/ \ # out/gn/bin/llvm-undname ... # to generate a HTML report for the binaries passed in the last line. @@ -226,6 +227,10 @@ config("compiler_defaults") { cflags += [ "-fcoverage-mapping", + # For build determinism. Using this requires passing --compilation-dir to + # llvm/utils/prepare-code-coverage-artifact.py. + "-fcoverage-compilation-dir=.", + # Using an absolute path here is lame, but it's used at test execution # time to generate the profiles, and lit doesn't specify a fixed folder # for test execution -- so this is the only way to get all profiles into @@ -234,14 +239,6 @@ config("compiler_defaults") { "-fprofile-instr-generate=" + rebase_path("$root_build_dir/profiles/%4m.profraw"), ] - if (use_goma) { - # goma has a bug where it writes the server-side absolute path. - # Work around that. - # FIXME: Instead, set this to `.` for deterministic builds and pass - # the build dir to prepare-code-coverage-artifact.py instead. - cflags += [ "-fcoverage-compilation-dir=" + rebase_path(root_build_dir) ] - } - if (host_os != "win") { ldflags += [ "-fprofile-instr-generate" ] } @@ -265,10 +262,7 @@ config("compiler_defaults") { } } if (use_relative_paths_in_debug_info) { - cflags += [ - "-fdebug-compilation-dir", - ".", - ] + cflags += [ "-fdebug-compilation-dir=." ] } } if (sysroot != "") { @@ -373,7 +367,8 @@ config("no_rtti") { config("zdefs") { # -Wl,-z,defs doesn't work with sanitizers. # https://clang.llvm.org/docs/AddressSanitizer.html - if (current_os != "ios" && current_os != "mac" && current_os != "win" && !(use_asan || use_tsan || use_ubsan)) { + if (current_os != "ios" && current_os != "mac" && current_os != "win" && + !(use_asan || use_tsan || use_ubsan)) { ldflags = [ "-Wl,-z,defs" ] } } diff --git a/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/abseil/BUILD.gn b/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/abseil/BUILD.gn index 732dd0d45ead4..671eabaa67ceb 100644 --- a/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/abseil/BUILD.gn +++ b/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/abseil/BUILD.gn @@ -13,6 +13,7 @@ static_library("abseil") { ] sources = [ "AbseilTidyModule.cpp", + "CleanupCtadCheck.cpp", "DurationAdditionCheck.cpp", "DurationComparisonCheck.cpp", "DurationConversionCastCheck.cpp", diff --git a/llvm/utils/gn/secondary/clang/unittests/Interpreter/ExceptionTests/BUILD.gn b/llvm/utils/gn/secondary/clang/unittests/Interpreter/ExceptionTests/BUILD.gn index 5b0173badd658..ea52221b73dd8 100644 --- a/llvm/utils/gn/secondary/clang/unittests/Interpreter/ExceptionTests/BUILD.gn +++ b/llvm/utils/gn/secondary/clang/unittests/Interpreter/ExceptionTests/BUILD.gn @@ -9,10 +9,10 @@ unittest("ClangReplInterpreterExceptionTests") { deps = [ "//clang/lib/AST", "//clang/lib/Basic", - "//clang/lib/Interpreter", "//clang/lib/Frontend", - "//llvm/lib/IR", + "//clang/lib/Interpreter", "//llvm/lib/ExecutionEngine/Orc", + "//llvm/lib/IR", "//llvm/lib/Support", "//llvm/lib/Target:TargetsToBuild", ] diff --git a/llvm/utils/gn/secondary/clang/unittests/StaticAnalyzer/BUILD.gn b/llvm/utils/gn/secondary/clang/unittests/StaticAnalyzer/BUILD.gn index ae3c5a59ddba9..592a91bc309b0 100644 --- a/llvm/utils/gn/secondary/clang/unittests/StaticAnalyzer/BUILD.gn +++ b/llvm/utils/gn/secondary/clang/unittests/StaticAnalyzer/BUILD.gn @@ -15,6 +15,7 @@ unittest("StaticAnalysisTests") { "BugReportInterestingnessTest.cpp", "CallDescriptionTest.cpp", "CallEventTest.cpp", + "ConflictingEvalCallsTest.cpp", "FalsePositiveRefutationBRVisitorTest.cpp", "NoStateChangeFuncVisitorTest.cpp", "ParamRegionTest.cpp", diff --git a/llvm/utils/gn/secondary/compiler-rt/BUILD.gn b/llvm/utils/gn/secondary/compiler-rt/BUILD.gn index 7a0f40be3f925..a234a3ce1f847 100644 --- a/llvm/utils/gn/secondary/compiler-rt/BUILD.gn +++ b/llvm/utils/gn/secondary/compiler-rt/BUILD.gn @@ -18,9 +18,7 @@ if (android_ndk_path != "") { ] } group("compiler-rt") { - deps = [ - "//compiler-rt/include($host_toolchain)", - ] + deps = [ "//compiler-rt/include($host_toolchain)" ] foreach(toolchain, supported_toolchains) { deps += [ "//compiler-rt/lib($toolchain)" ] } diff --git a/llvm/utils/gn/secondary/lld/COFF/BUILD.gn b/llvm/utils/gn/secondary/lld/COFF/BUILD.gn index 78c79844bb2a7..8c1e6ca36273d 100644 --- a/llvm/utils/gn/secondary/lld/COFF/BUILD.gn +++ b/llvm/utils/gn/secondary/lld/COFF/BUILD.gn @@ -27,9 +27,9 @@ static_library("COFF") { "//llvm/lib/WindowsManifest", ] sources = [ + "COFFLinkerContext.cpp", "CallGraphSort.cpp", "Chunks.cpp", - "COFFLinkerContext.cpp", "DLL.cpp", "DebugTypes.cpp", "Driver.cpp", diff --git a/llvm/utils/gn/secondary/lldb/include/lldb/Host/BUILD.gn b/llvm/utils/gn/secondary/lldb/include/lldb/Host/BUILD.gn index f51980c1ca872..e74cbab85d701 100644 --- a/llvm/utils/gn/secondary/lldb/include/lldb/Host/BUILD.gn +++ b/llvm/utils/gn/secondary/lldb/include/lldb/Host/BUILD.gn @@ -52,7 +52,7 @@ write_cmake_config("Config") { "LLDB_ENABLE_TERMIOS=1", ] } - + if (current_os == "win" || current_os == "linux" || current_os == "android") { values += [ "HAVE_SYS_EVENT_H=" ] } else { diff --git a/llvm/utils/gn/secondary/lldb/source/Plugins/BUILD.gn b/llvm/utils/gn/secondary/lldb/source/Plugins/BUILD.gn index 9ec2a2e914379..8de3a6c0bb8d8 100644 --- a/llvm/utils/gn/secondary/lldb/source/Plugins/BUILD.gn +++ b/llvm/utils/gn/secondary/lldb/source/Plugins/BUILD.gn @@ -215,7 +215,8 @@ write_cmake_config("Plugins.def") { # These are in separate variables to make sure ProcessWindowsCommon is # initalized after all plugins, but before ProcessGDBRemote. if (current_os == "win") { - values += [ "LLDB_PROCESS_WINDOWS_PLUGIN=LLDB_PLUGIN(ProcessWindowsCommon)" ] + values += + [ "LLDB_PROCESS_WINDOWS_PLUGIN=LLDB_PLUGIN(ProcessWindowsCommon)" ] } else { values += [ "LLDB_PROCESS_WINDOWS_PLUGIN=" ] } diff --git a/llvm/utils/gn/secondary/lldb/source/Plugins/ObjectFile/JIT/BUILD.gn b/llvm/utils/gn/secondary/lldb/source/Plugins/ObjectFile/JIT/BUILD.gn index 735d15d82dc1a..b031967e7d62e 100644 --- a/llvm/utils/gn/secondary/lldb/source/Plugins/ObjectFile/JIT/BUILD.gn +++ b/llvm/utils/gn/secondary/lldb/source/Plugins/ObjectFile/JIT/BUILD.gn @@ -9,6 +9,7 @@ static_library("JIT") { #"//lldb/source/Target", # 2-hop dependency cycle. "//llvm/lib/Support", ] + # For Utility/UuidCompatibility.h. include_dirs = [ "//lldb/source" ] sources = [ "ObjectFileJIT.cpp" ] diff --git a/llvm/utils/gn/secondary/lldb/source/Plugins/Platform/Linux/BUILD.gn b/llvm/utils/gn/secondary/lldb/source/Plugins/Platform/Linux/BUILD.gn index a115e58453e1b..2d65a06fdc445 100644 --- a/llvm/utils/gn/secondary/lldb/source/Plugins/Platform/Linux/BUILD.gn +++ b/llvm/utils/gn/secondary/lldb/source/Plugins/Platform/Linux/BUILD.gn @@ -9,9 +9,10 @@ static_library("Linux") { "//lldb/source/Core", "//lldb/source/Host", "//lldb/source/Interpreter", - "//lldb/source/Target", "//lldb/source/Plugins/Platform/POSIX", + "//lldb/source/Target", ] + # Reaches into Plugins/Platform/POSIX. include_dirs = [ "//lldb/source" ] sources = [ "PlatformLinux.cpp" ] diff --git a/llvm/utils/gn/secondary/lldb/source/Plugins/Process/Linux/BUILD.gn b/llvm/utils/gn/secondary/lldb/source/Plugins/Process/Linux/BUILD.gn index c80449916f8da..8ad09074646c3 100644 --- a/llvm/utils/gn/secondary/lldb/source/Plugins/Process/Linux/BUILD.gn +++ b/llvm/utils/gn/secondary/lldb/source/Plugins/Process/Linux/BUILD.gn @@ -4,13 +4,14 @@ static_library("Linux") { deps = [ "//lldb/source/Core", "//lldb/source/Host", + "//lldb/source/Plugins/Process/POSIX", + "//lldb/source/Plugins/Process/Utility", "//lldb/source/Symbol", "//lldb/source/Target", "//lldb/source/Utility", - "//lldb/source/Plugins/Process/POSIX", - "//lldb/source/Plugins/Process/Utility", "//llvm/lib/Support", ] + # Uses source-relative includes for own headers. include_dirs = [ "//lldb/source" ] sources = [ @@ -26,4 +27,3 @@ static_library("Linux") { "SingleStepCheck.cpp", ] } - diff --git a/llvm/utils/gn/secondary/lldb/source/Plugins/Process/POSIX/BUILD.gn b/llvm/utils/gn/secondary/lldb/source/Plugins/Process/POSIX/BUILD.gn index 2be26de4fae2c..dad3128ce224a 100644 --- a/llvm/utils/gn/secondary/lldb/source/Plugins/Process/POSIX/BUILD.gn +++ b/llvm/utils/gn/secondary/lldb/source/Plugins/Process/POSIX/BUILD.gn @@ -2,10 +2,11 @@ static_library("POSIX") { output_name = "lldbPluginProcessPOSIX" configs += [ "//llvm/utils/gn/build:lldb_code" ] deps = [ - "//lldb/source/Utility", "//lldb/source/Plugins/Process/Utility", + "//lldb/source/Utility", "//llvm/lib/Support", ] + # Reaches into Plugins/Platform/Process/Utility. include_dirs = [ "//lldb/source" ] sources = [ diff --git a/llvm/utils/gn/secondary/lldb/tools/driver/BUILD.gn b/llvm/utils/gn/secondary/lldb/tools/driver/BUILD.gn index 9514ef229e01c..7f69adc2cd7e0 100644 --- a/llvm/utils/gn/secondary/lldb/tools/driver/BUILD.gn +++ b/llvm/utils/gn/secondary/lldb/tools/driver/BUILD.gn @@ -43,7 +43,7 @@ executable("lldb") { if (current_os == "linux") { deps += [ "//lldb/tools/lldb-server" ] } - foreach (toolchain, supported_toolchains) { + foreach(toolchain, supported_toolchains) { deps += [ "//lldb/tools/lldb-server($toolchain)" ] } diff --git a/llvm/utils/gn/secondary/lldb/tools/lldb-server/BUILD.gn b/llvm/utils/gn/secondary/lldb/tools/lldb-server/BUILD.gn index 178fb05bdd394..b5b3f65515d19 100644 --- a/llvm/utils/gn/secondary/lldb/tools/lldb-server/BUILD.gn +++ b/llvm/utils/gn/secondary/lldb/tools/lldb-server/BUILD.gn @@ -18,8 +18,8 @@ executable("lldb-server") { "//lldb/source:lldbBase", "//lldb/source/Host", "//lldb/source/Initialization", - "//lldb/source/Plugins/Instruction/ARM", + #"//lldb/source/Plugins/Instruction/MIPS", # XXX #"//lldb/source/Plugins/Instruction/MIPS64", # XXX "//llvm/lib/Option", diff --git a/llvm/utils/gn/secondary/llvm/include/llvm/Config/BUILD.gn b/llvm/utils/gn/secondary/llvm/include/llvm/Config/BUILD.gn index 9a991bf833b31..a1ce9dcab38e2 100644 --- a/llvm/utils/gn/secondary/llvm/include/llvm/Config/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/include/llvm/Config/BUILD.gn @@ -119,6 +119,7 @@ write_cmake_config("config") { "LLVM_TARGET_TRIPLE_ENV=", "LLVM_VERSION_INFO=", "LLVM_VERSION_PRINTER_SHOW_HOST_TARGET_INFO=1", + "LLVM_WINDOWS_PREFER_FORWARD_SLASH=", "PACKAGE_BUGREPORT=https://bugs.llvm.org/", "PACKAGE_NAME=LLVM", "PACKAGE_STRING=LLVM ${llvm_version}git", diff --git a/llvm/utils/gn/secondary/llvm/tools/llvm-reduce/BUILD.gn b/llvm/utils/gn/secondary/llvm/tools/llvm-reduce/BUILD.gn index b76e0311d7fe7..e87bff3ea66c9 100644 --- a/llvm/utils/gn/secondary/llvm/tools/llvm-reduce/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/tools/llvm-reduce/BUILD.gn @@ -1,5 +1,6 @@ executable("llvm-reduce") { deps = [ + "//llvm/lib/CodeGen/MIRParser", "//llvm/lib/IR", "//llvm/lib/IRReader", "//llvm/lib/Support", @@ -10,6 +11,7 @@ executable("llvm-reduce") { include_dirs = [ "." ] sources = [ "DeltaManager.cpp", + "ReducerWorkItem.cpp", "TestRunner.cpp", "deltas/Delta.cpp", "deltas/ReduceAliases.cpp", @@ -18,10 +20,12 @@ executable("llvm-reduce") { "deltas/ReduceBasicBlocks.cpp", "deltas/ReduceFunctionBodies.cpp", "deltas/ReduceFunctions.cpp", + "deltas/ReduceGlobalObjects.cpp", "deltas/ReduceGlobalValues.cpp", "deltas/ReduceGlobalVarInitializers.cpp", "deltas/ReduceGlobalVars.cpp", "deltas/ReduceInstructions.cpp", + "deltas/ReduceInstructionsMIR.cpp", "deltas/ReduceMetadata.cpp", "deltas/ReduceModuleData.cpp", "deltas/ReduceOperandBundles.cpp", diff --git a/llvm/utils/gn/secondary/llvm/unittests/ADT/BUILD.gn b/llvm/utils/gn/secondary/llvm/unittests/ADT/BUILD.gn index 8755f1e98b474..d9f41b90bbe45 100644 --- a/llvm/utils/gn/secondary/llvm/unittests/ADT/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/unittests/ADT/BUILD.gn @@ -22,6 +22,7 @@ unittest("ADTTests") { "BreadthFirstIteratorTest.cpp", "BumpPtrListTest.cpp", "CoalescingBitVectorTest.cpp", + "CombinationGeneratorTest.cpp", "DAGDeltaAlgorithmTest.cpp", "DeltaAlgorithmTest.cpp", "DenseMapTest.cpp", diff --git a/llvm/utils/gn/secondary/llvm/unittests/tools/llvm-exegesis/BUILD.gn b/llvm/utils/gn/secondary/llvm/unittests/tools/llvm-exegesis/BUILD.gn index 85fdf8f38dc91..9eb3c9d3037ec 100644 --- a/llvm/utils/gn/secondary/llvm/unittests/tools/llvm-exegesis/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/unittests/tools/llvm-exegesis/BUILD.gn @@ -15,6 +15,5 @@ unittest("LLVMExegesisTests") { "ClusteringTest.cpp", "PerfHelperTest.cpp", "RegisterValueTest.cpp", - "SnippetGeneratorTest.cpp", ] } diff --git a/llvm/utils/lit/lit/llvm/config.py b/llvm/utils/lit/lit/llvm/config.py index 7bc4b445b75e0..5231271f8fef9 100644 --- a/llvm/utils/lit/lit/llvm/config.py +++ b/llvm/utils/lit/lit/llvm/config.py @@ -38,7 +38,7 @@ def __init__(self, lit_config, config): # Many tools behave strangely if these environment variables aren't # set. self.with_system_environment( - ['SystemDrive', 'SystemRoot', 'TEMP', 'TMP']) + ['SystemDrive', 'SystemRoot', 'TEMP', 'TMP', 'PLATFORM']) self.use_lit_shell = True global lit_path_displayed diff --git a/llvm/utils/reduce_pipeline.py b/llvm/utils/reduce_pipeline.py index 7779bb805e132..baf6b2f6930d2 100755 --- a/llvm/utils/reduce_pipeline.py +++ b/llvm/utils/reduce_pipeline.py @@ -57,7 +57,6 @@ extra_opt_args)) lst = pipeline.fromStr(args.passes) -passes = '-passes={}'.format(pipeline.toStr(lst)) ll_input = args.input # Step #-1 @@ -67,7 +66,8 @@ if not args.dont_expand_passes: run_args = [ args.opt_binary, '-disable-symbolication', '-disable-output', - '-print-pipeline-passes', passes, ll_input + '-print-pipeline-passes', '-passes={}'.format(pipeline.toStr(lst)), + ll_input ] run_args.extend(extra_opt_args) opt = subprocess.run(run_args, @@ -81,15 +81,15 @@ exit(1) stdout = opt.stdout.decode() stdout = stdout[:stdout.rfind('\n')] - print('Expanded pass sequence: {}'.format(stdout)) - passes = '-passes={}'.format(stdout) + lst = pipeline.fromStr(stdout) + print('Expanded pass sequence: {}'.format(pipeline.toStr(lst))) # Step #0 # Confirm that the given input, passes and options result in failure. print('---Starting step #0---') run_args = [ - args.opt_binary, '-disable-symbolication', '-disable-output', passes, - ll_input + args.opt_binary, '-disable-symbolication', '-disable-output', + '-passes={}'.format(pipeline.toStr(lst)), ll_input ] run_args.extend(extra_opt_args) opt = subprocess.run(run_args, stdout=subprocess.PIPE, stderr=subprocess.PIPE) @@ -121,22 +121,20 @@ if not args.dont_remove_empty_pm: lstA = pipeline.prune(lstA) lstB = pipeline.prune(lstB) - passesA = '-passes=' + pipeline.toStr(lstA) - passesB = '-passes=' + pipeline.toStr(lstB) intermediate = 'intermediate-0.ll' if idx % 2 else 'intermediate-1.ll' intermediate = tmpd.name + '/' + intermediate run_args = [ args.opt_binary, '-disable-symbolication', '-S', '-o', intermediate, - passesA, ll_input + '-passes={}'.format(pipeline.toStr(lstA)), ll_input ] run_args.extend(extra_opt_args) optA = subprocess.run(run_args, stdout=subprocess.PIPE, stderr=subprocess.PIPE) run_args = [ - args.opt_binary, '-disable-symbolication', '-disable-output', passesB, - intermediate + args.opt_binary, '-disable-symbolication', '-disable-output', + '-passes={}'.format(pipeline.toStr(lstB)), intermediate ] run_args.extend(extra_opt_args) optB = subprocess.run(run_args, @@ -161,10 +159,9 @@ [lstA, lstB] = pipeline.split(lst, idx) if not args.dont_remove_empty_pm: lstA = pipeline.prune(lstA) - passesA = '-passes=' + pipeline.toStr(lstA) run_args = [ - args.opt_binary, '-disable-symbolication', '-disable-output', passesA, - ll_input + args.opt_binary, '-disable-symbolication', '-disable-output', + '-passes={}'.format(pipeline.toStr(lstA)), ll_input ] run_args.extend(extra_opt_args) optA = subprocess.run(run_args, @@ -188,10 +185,9 @@ candLst = pipeline.remove(lst, idx) if not args.dont_remove_empty_pm: candLst = pipeline.prune(candLst) - passes = '-passes=' + pipeline.toStr(candLst) run_args = [ args.opt_binary, '-disable-symbolication', '-disable-output', - passes, ll_input + '-passes={}'.format(pipeline.toStr(candLst)), ll_input ] run_args.extend(extra_opt_args) opt = subprocess.run(run_args, diff --git a/llvm/utils/reduce_pipeline_test/fake_opt.py b/llvm/utils/reduce_pipeline_test/fake_opt.py index 225b6ee814329..ca661995c17fb 100755 --- a/llvm/utils/reduce_pipeline_test/fake_opt.py +++ b/llvm/utils/reduce_pipeline_test/fake_opt.py @@ -23,9 +23,12 @@ parser.add_argument('input') [args, unknown_args] = parser.parse_known_args() -# Echo pipeline if '-print-pipeline-passes'. +# Expand pipeline if '-print-pipeline-passes'. if args.print_pipeline_passes: - print(args.passes) + if args.passes == 'EXPAND_a_to_f': + print('a,b,c,d,e,f') + else: + print(args.passes) exit(0) # Parse '-crash-seq'. diff --git a/llvm/utils/reduce_pipeline_test/test.py b/llvm/utils/reduce_pipeline_test/test.py index 03616fd17bdbe..170b2d4e8c6a3 100755 --- a/llvm/utils/reduce_pipeline_test/test.py +++ b/llvm/utils/reduce_pipeline_test/test.py @@ -45,19 +45,32 @@ def test_1(self): self.assertEqual(run.returncode, 0) self.assertEqual(getFinalPasses(run), '-passes="a,i"') - def test_2(self): - """Test the '--dont-expand-passes' option.""" + def test_2_0(self): + """Test expansion of EXPAND_a_to_f (expands into 'a,b,c,d,e,f').""" run_args = [ './utils/reduce_pipeline.py', '--opt-binary=./utils/reduce_pipeline_test/fake_opt.py', - '--input=/dev/null', '--passes=a,b,c,A(d,B(e,f),g),h,i', - '-crash-seq=b,d,f', '--dont-expand-passes' + '--input=/dev/null', '--passes=EXPAND_a_to_f', '-crash-seq=b,e' ] run = subprocess.run(run_args, stdout=subprocess.PIPE, stderr=subprocess.PIPE) self.assertEqual(run.returncode, 0) - self.assertEqual(getFinalPasses(run), '-passes="b,A(d,B(f))"') + self.assertEqual(getFinalPasses(run), '-passes="b,e"') + + def test_2_1(self): + """Test EXPAND_a_to_f and the '--dont-expand-passes' option.""" + run_args = [ + './utils/reduce_pipeline.py', + '--opt-binary=./utils/reduce_pipeline_test/fake_opt.py', + '--input=/dev/null', '--passes=EXPAND_a_to_f', + '-crash-seq=EXPAND_a_to_f', '--dont-expand-passes' + ] + run = subprocess.run(run_args, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE) + self.assertEqual(run.returncode, 0) + self.assertEqual(getFinalPasses(run), '-passes="EXPAND_a_to_f"') def test_3(self): """Test that empty pass-managers get removed by default.""" diff --git a/mlir/docs/Dialects/SPIR-V.md b/mlir/docs/Dialects/SPIR-V.md index 9e9106b3f5382..b79d9e8bdd31f 100644 --- a/mlir/docs/Dialects/SPIR-V.md +++ b/mlir/docs/Dialects/SPIR-V.md @@ -1422,8 +1422,8 @@ dialect. [GitHubLoweringTracking]: https://github.com/tensorflow/mlir/issues/303 [GenSpirvUtilsPy]: https://github.com/llvm/llvm-project/blob/main/mlir/utils/spirv/gen_spirv_dialect.py [CustomTypeAttrTutorial]: ../Tutorials/DefiningAttributesAndTypes.md -[VulkanExtensionPhysicalStorageBuffer]: https://github.com/KhronosGroup/SPIRV-Registry/blob/master/extensions/KHR/SPV_KHR_physical_storage_buffer.html -[VulkanExtensionVariablePointers]: https://github.com/KhronosGroup/SPIRV-Registry/blob/master/extensions/KHR/SPV_KHR_variable_pointers.html +[VulkanExtensionPhysicalStorageBuffer]: https://github.com/KhronosGroup/SPIRV-Registry/blob/main/extensions/KHR/SPV_KHR_physical_storage_buffer.html +[VulkanExtensionVariablePointers]: https://github.com/KhronosGroup/SPIRV-Registry/blob/main/extensions/KHR/SPV_KHR_variable_pointers.html [VulkanSpirv]: https://renderdoc.org/vkspec_chunked/chap40.html#spirvenv [VulkanShaderInterface]: https://renderdoc.org/vkspec_chunked/chap14.html#interfaces-resources [VulkanShaderInterfaceStorageClass]: https://renderdoc.org/vkspec_chunked/chap15.html#interfaces diff --git a/mlir/docs/LangRef.md b/mlir/docs/LangRef.md index c3818d33fff24..357778a54f087 100644 --- a/mlir/docs/LangRef.md +++ b/mlir/docs/LangRef.md @@ -3,7 +3,7 @@ MLIR (Multi-Level IR) is a compiler intermediate representation with similarities to traditional three-address SSA representations (like [LLVM IR](http://llvm.org/docs/LangRef.html) or -[SIL](https://github.com/apple/swift/blob/master/docs/SIL.rst)), but which +[SIL](https://github.com/apple/swift/blob/main/docs/SIL.rst)), but which introduces notions from polyhedral loop optimization as first-class concepts. This hybrid design is optimized to represent, analyze, and transform high level dataflow graphs as well as target-specific code generated for high performance diff --git a/mlir/docs/Rationale/Rationale.md b/mlir/docs/Rationale/Rationale.md index c64e6f69f9130..192ec7ef7e2e8 100644 --- a/mlir/docs/Rationale/Rationale.md +++ b/mlir/docs/Rationale/Rationale.md @@ -13,7 +13,7 @@ about their consistency or readability. MLIR is a compiler intermediate representation with similarities to traditional three-address SSA representations (like [LLVM IR](http://llvm.org/docs/LangRef.html) or -[SIL](https://github.com/apple/swift/blob/master/docs/SIL.rst)), but which +[SIL](https://github.com/apple/swift/blob/main/docs/SIL.rst)), but which introduces notions from the polyhedral loop optimization works as first class concepts. This hybrid design is optimized to represent, analyze, and transform high level dataflow graphs as well as target-specific code generated for high @@ -195,10 +195,10 @@ represented in either form) but block arguments have several advantages: [landingpad instruction](http://llvm.org/docs/LangRef.html#landingpad-instruction) is a hack used to represent this. MLIR doesn't make use of this capability, but SIL uses it extensively, e.g. in the - [switch_enum instruction](https://github.com/apple/swift/blob/master/docs/SIL.rst#switch-enum). + [switch_enum instruction](https://github.com/apple/swift/blob/main/docs/SIL.rst#switch-enum). For more context, block arguments were previously used in the Swift -[SIL Intermediate Representation](https://github.com/apple/swift/blob/master/docs/SIL.rst), +[SIL Intermediate Representation](https://github.com/apple/swift/blob/main/docs/SIL.rst), and described in [a talk on YouTube](https://www.youtube.com/watch?v=Ntj8ab-5cvE). The section of interest diff --git a/mlir/docs/Rationale/RationaleGenericDAGRewriter.md b/mlir/docs/Rationale/RationaleGenericDAGRewriter.md index b0a1c163ee71c..d586b1f7719c0 100644 --- a/mlir/docs/Rationale/RationaleGenericDAGRewriter.md +++ b/mlir/docs/Rationale/RationaleGenericDAGRewriter.md @@ -102,7 +102,7 @@ GCC LLVM's [DAG Combiner](https://github.com/llvm-mirror/llvm/blob/master/lib/CodeGen/SelectionDAG/DAGCombiner.cpp), the Swift compiler's -[SIL Combiner](https://github.com/apple/swift/tree/master/lib/SILOptimizer/SILCombiner), +[SIL Combiner](https://github.com/apple/swift/tree/main/lib/SILOptimizer/SILCombiner), etc. These generally match one or more operations and produce zero or more operations as a result. The LLVM [Legalization](https://github.com/llvm/llvm-project/tree/main/llvm/lib/CodeGen/SelectionDAG) diff --git a/mlir/docs/Tutorials/DefiningAttributesAndTypes.md b/mlir/docs/Tutorials/DefiningAttributesAndTypes.md index 30d6a6e9412e8..0f8edc5bf1ae7 100644 --- a/mlir/docs/Tutorials/DefiningAttributesAndTypes.md +++ b/mlir/docs/Tutorials/DefiningAttributesAndTypes.md @@ -382,3 +382,172 @@ the things named `*Type` are generally now named `*Attr`. Aside from that, all of the interfaces for uniquing and storage construction are all the same. + +## Defining Custom Parsers and Printers using Assembly Formats + +Attributes and types defined in ODS with a mnemonic can define an +`assemblyFormat` to declaratively describe custom parsers and printers. The +assembly format consists of literals, variables, and directives. + +* A literal is a keyword or valid punctuation enclosed in backticks, e.g. + `` `keyword` `` or `` `<` ``. +* A variable is a parameter name preceeded by a dollar sign, e.g. `$param0`, + which captures one attribute or type parameter. +* A directive is a keyword followed by an optional argument list that defines + special parser and printer behaviour. + +```tablegen +// An example type with an assembly format. +def MyType : TypeDef { + // Define a mnemonic to allow the dialect's parser hook to call into the + // generated parser. + let mnemonic = "my_type"; + + // Define two parameters whose C++ types are indicated in string literals. + let parameters = (ins "int":$count, "AffineMap":$map); + + // Define the assembly format. Surround the format with less `<` and greater + // `>` so that MLIR's printers use the pretty format. + let assemblyFormat = "`<` $count `,` `map` `=` $map `>`"; +} +``` + +The declarative assembly format for `MyType` results in the following format +in the IR: + +```mlir +!my_dialect.my_type<42, map = affine_map<(i, j) -> (j, i)> +``` + +### Parameter Parsing and Printing + +For many basic parameter types, no additional work is needed to define how +these parameters are parsed or printerd. + +* The default printer for any parameter is `$_printer << $_self`, + where `$_self` is the C++ value of the parameter and `$_printer` is a + `DialectAsmPrinter`. +* The default parser for a parameter is + `FieldParser<$cppClass>::parse($_parser)`, where `$cppClass` is the C++ type + of the parameter and `$_parser` is a `DialectAsmParser`. + +Printing and parsing behaviour can be added to additional C++ types by +overloading these functions or by defining a `parser` and `printer` in an ODS +parameter class. + +Example of overloading: + +```c++ +using MyParameter = std::pair; + +DialectAsmPrinter &operator<<(DialectAsmPrinter &printer, MyParameter param) { + printer << param.first << " * " << param.second; +} + +template <> struct FieldParser { + static FailureOr parse(DialectAsmParser &parser) { + int a, b; + if (parser.parseInteger(a) || parser.parseStar() || + parser.parseInteger(b)) + return failure(); + return MyParameter(a, b); + } +}; +``` + +Example of using ODS parameter classes: + +``` +def MyParameter : TypeParameter<"std::pair", "pair of ints"> { + let printer = [{ $_printer << $_self.first << " * " << $_self.second }]; + let parser = [{ [&] -> FailureOr> { + int a, b; + if ($_parser.parseInteger(a) || $_parser.parseStar() || + $_parser.parseInteger(b)) + return failure(); + return std::make_pair(a, b); + }() }]; +} +``` + +A type using this parameter with the assembly format `` `<` $myParam `>` `` +will look as follows in the IR: + +```mlir +!my_dialect.my_type<42 * 24> +``` + +#### Non-POD Parameters + +Parameters that aren't plain-old-data (e.g. references) may need to define a +`cppStorageType` to contain the data until it is copied into the allocator. +For example, `StringRefParameter` uses `std::string` as its storage type, +whereas `ArrayRefParameter` uses `SmallVector` as its storage type. The parsers +for these parameters are expected to return `FailureOr<$cppStorageType>`. + +### Assembly Format Directives + +Attribute and type assembly formats have the following directives: + +* `params`: capture all parameters of an attribute or type. +* `struct`: generate a "struct-like" parser and printer for a list of key-value + pairs. + +#### `params` Directive + +This directive is used to refer to all parameters of an attribute or type. +When used as a top-level directive, `params` generates a parser and printer for +a comma-separated list of the parameters. For example: + +```tablegen +def MyPairType : TypeDef { + let parameters = (ins "int":$a, "int":$b); + let mnemonic = "pair"; + let assemblyFormat = "`<` params `>`"; +} +``` + +In the IR, this type will appear as: + +```mlir +!my_dialect.pair<42, 24> +``` + +The `params` directive can also be passed to other directives, such as `struct`, +as an argument that refers to all parameters in place of explicitly listing all +parameters as variables. + +#### `struct` Directive + +The `struct` directive accepts a list of variables to capture and will generate +a parser and printer for a comma-separated list of key-value pairs. The +variables are printed in the order they are specified in the argument list **but +can be parsed in any order**. For example: + +```tablegen +def MyStructType : TypeDef { + let parameters = (ins StringRefParameter<>:$sym_name, + "int":$a, "int":$b, "int":$c); + let mnemonic = "struct"; + let assemblyFormat = "`<` $sym_name `->` struct($a, $b, $c) `>`"; +} +``` + +In the IR, this type can appear with any permutation of the order of the +parameters captured in the directive. + +```mlir +!my_dialect.struct<"foo" -> a = 1, b = 2, c = 3> +!my_dialect.struct<"foo" -> b = 2, c = 3, a = 1> +``` + +Passing `params` as the only argument to `struct` makes the directive capture +all the parameters of the attribute or type. For the same type above, an +assembly format of `` `<` struct(params) `>` `` will result in: + +```mlir +!my_dialect.struct +``` + +The order in which the parameters are printed is the order in which they are +declared in the attribute's or type's `parameter` list. diff --git a/mlir/docs/doxygen.cfg.in b/mlir/docs/doxygen.cfg.in index 307981eed5f21..a0e4465c7dec0 100644 --- a/mlir/docs/doxygen.cfg.in +++ b/mlir/docs/doxygen.cfg.in @@ -1220,7 +1220,7 @@ CHM_FILE = HHC_LOCATION = # The GENERATE_CHI flag controls if a separate .chi index file is generated ( -# YES) or that it should be included in the master .chm file ( NO). +# YES) or that it should be included in the main .chm file ( NO). # The default value is: NO. # This tag requires that the tag GENERATE_HTMLHELP is set to YES. diff --git a/mlir/include/mlir-c/AffineExpr.h b/mlir/include/mlir-c/AffineExpr.h index 5516f29088e43..14e951ddee9ad 100644 --- a/mlir/include/mlir-c/AffineExpr.h +++ b/mlir/include/mlir-c/AffineExpr.h @@ -39,6 +39,8 @@ DEFINE_C_API_STRUCT(MlirAffineExpr, const void); #undef DEFINE_C_API_STRUCT +struct MlirAffineMap; + /// Gets the context that owns the affine expression. MLIR_CAPI_EXPORTED MlirContext mlirAffineExprGetContext(MlirAffineExpr affineExpr); @@ -86,6 +88,10 @@ MLIR_CAPI_EXPORTED bool mlirAffineExprIsMultipleOf(MlirAffineExpr affineExpr, MLIR_CAPI_EXPORTED bool mlirAffineExprIsFunctionOfDim(MlirAffineExpr affineExpr, intptr_t position); +/// Composes the given map with the given expression. +MLIR_CAPI_EXPORTED MlirAffineExpr mlirAffineExprCompose( + MlirAffineExpr affineExpr, struct MlirAffineMap affineMap); + //===----------------------------------------------------------------------===// // Affine Dimension Expression. //===----------------------------------------------------------------------===// diff --git a/mlir/include/mlir-c/IR.h b/mlir/include/mlir-c/IR.h index ca0c45224f3a5..1610191256eea 100644 --- a/mlir/include/mlir-c/IR.h +++ b/mlir/include/mlir-c/IR.h @@ -54,6 +54,7 @@ DEFINE_C_API_STRUCT(MlirOperation, void); DEFINE_C_API_STRUCT(MlirOpPrintingFlags, void); DEFINE_C_API_STRUCT(MlirBlock, void); DEFINE_C_API_STRUCT(MlirRegion, void); +DEFINE_C_API_STRUCT(MlirSymbolTable, void); DEFINE_C_API_STRUCT(MlirAttribute, const void); DEFINE_C_API_STRUCT(MlirIdentifier, const void); @@ -738,6 +739,47 @@ MLIR_CAPI_EXPORTED bool mlirTypeIDEqual(MlirTypeID typeID1, MlirTypeID typeID2); /// Returns the hash value of the type id. MLIR_CAPI_EXPORTED size_t mlirTypeIDHashValue(MlirTypeID typeID); +//===----------------------------------------------------------------------===// +// Symbol and SymbolTable API. +//===----------------------------------------------------------------------===// + +/// Returns the name of the attribute used to store symbol names compatible with +/// symbol tables. +MLIR_CAPI_EXPORTED MlirStringRef mlirSymbolTableGetSymbolAttributeName(); + +/// Creates a symbol table for the given operation. If the operation does not +/// have the SymbolTable trait, returns a null symbol table. +MLIR_CAPI_EXPORTED MlirSymbolTable +mlirSymbolTableCreate(MlirOperation operation); + +/// Returns true if the symbol table is null. +static inline bool mlirSymbolTableIsNull(MlirSymbolTable symbolTable) { + return !symbolTable.ptr; +} + +/// Destroys the symbol table created with mlirSymbolTableCreate. This does not +/// affect the operations in the table. +MLIR_CAPI_EXPORTED void mlirSymbolTableDestroy(MlirSymbolTable symbolTable); + +/// Looks up a symbol with the given name in the given symbol table and returns +/// the operation that corresponds to the symbol. If the symbol cannot be found, +/// returns a null operation. +MLIR_CAPI_EXPORTED MlirOperation +mlirSymbolTableLookup(MlirSymbolTable symbolTable, MlirStringRef name); + +/// Inserts the given operation into the given symbol table. The operation must +/// have the symbol trait. If the symbol table already has a symbol with the +/// same name, renames the symbol being inserted to ensure name uniqueness. Note +/// that this does not move the operation itself into the block of the symbol +/// table operation, this should be done separately. Returns the name of the +/// symbol after insertion. +MLIR_CAPI_EXPORTED MlirAttribute +mlirSymbolTableInsert(MlirSymbolTable symbolTable, MlirOperation operation); + +/// Removes the given operation from the symbol table and erases it. +MLIR_CAPI_EXPORTED void mlirSymbolTableErase(MlirSymbolTable symbolTable, + MlirOperation operation); + #ifdef __cplusplus } #endif diff --git a/mlir/include/mlir-c/Interfaces.h b/mlir/include/mlir-c/Interfaces.h index f03dd6ea5c83b..87862834201d4 100644 --- a/mlir/include/mlir-c/Interfaces.h +++ b/mlir/include/mlir-c/Interfaces.h @@ -45,7 +45,7 @@ mlirOperationImplementsInterfaceStatic(MlirStringRef operationName, MLIR_CAPI_EXPORTED MlirTypeID mlirInferTypeOpInterfaceTypeID(); /// These callbacks are used to return multiple types from functions while -/// transferring ownerhsip to the caller. The first argument is the number of +/// transferring ownership to the caller. The first argument is the number of /// consecutive elements pointed to by the second argument. The third argument /// is an opaque pointer forwarded to the callback by the caller. typedef void (*MlirTypesCallback)(intptr_t, MlirType *, void *); diff --git a/mlir/include/mlir-c/Support.h b/mlir/include/mlir-c/Support.h index 315f6c4564eba..f20e58fe62317 100644 --- a/mlir/include/mlir-c/Support.h +++ b/mlir/include/mlir-c/Support.h @@ -79,6 +79,10 @@ inline static MlirStringRef mlirStringRefCreate(const char *str, MLIR_CAPI_EXPORTED MlirStringRef mlirStringRefCreateFromCString(const char *str); +/// Returns true if two string references are equal, false otherwise. +MLIR_CAPI_EXPORTED bool mlirStringRefEqual(MlirStringRef string, + MlirStringRef other); + /// A callback for returning string references. /// /// This function is called back by the functions that need to return a diff --git a/mlir/include/mlir/CAPI/IR.h b/mlir/include/mlir/CAPI/IR.h index d5e961367e79a..8366b0bce6d70 100644 --- a/mlir/include/mlir/CAPI/IR.h +++ b/mlir/include/mlir/CAPI/IR.h @@ -27,6 +27,7 @@ DEFINE_C_API_PTR_METHODS(MlirOperation, mlir::Operation) DEFINE_C_API_PTR_METHODS(MlirBlock, mlir::Block) DEFINE_C_API_PTR_METHODS(MlirOpPrintingFlags, mlir::OpPrintingFlags) DEFINE_C_API_PTR_METHODS(MlirRegion, mlir::Region) +DEFINE_C_API_PTR_METHODS(MlirSymbolTable, mlir::SymbolTable) DEFINE_C_API_METHODS(MlirAttribute, mlir::Attribute) DEFINE_C_API_METHODS(MlirIdentifier, mlir::Identifier) diff --git a/mlir/include/mlir/Conversion/GPUToNVVM/GPUToNVVMPass.h b/mlir/include/mlir/Conversion/GPUToNVVM/GPUToNVVMPass.h index 267a362b372ce..229ccd0e95da8 100644 --- a/mlir/include/mlir/Conversion/GPUToNVVM/GPUToNVVMPass.h +++ b/mlir/include/mlir/Conversion/GPUToNVVM/GPUToNVVMPass.h @@ -9,6 +9,7 @@ #define MLIR_CONVERSION_GPUTONVVM_GPUTONVVMPASS_H_ #include "mlir/Conversion/LLVMCommon/LoweringOptions.h" +#include "mlir/Dialect/LLVMIR/LLVMTypes.h" #include namespace mlir { @@ -22,8 +23,11 @@ class OperationPass; namespace gpu { class GPUModuleOp; +class MMAMatrixType; } +LLVM::LLVMStructType convertMMAToLLVMType(gpu::MMAMatrixType type); + /// Configure target to convert from the GPU dialect to NVVM. void configureGpuToNVVMConversionLegality(ConversionTarget &target); diff --git a/mlir/include/mlir/Dialect/Arithmetic/IR/ArithmeticOps.td b/mlir/include/mlir/Dialect/Arithmetic/IR/ArithmeticOps.td index b3ef7ed02b038..4ec51079322ae 100644 --- a/mlir/include/mlir/Dialect/Arithmetic/IR/ArithmeticOps.td +++ b/mlir/include/mlir/Dialect/Arithmetic/IR/ArithmeticOps.td @@ -92,16 +92,16 @@ def SignlessFixedWidthIntegerLike : TypeConstraint traits = []> : Arith_CastOp; + SignlessFixedWidthIntegerLike, traits>; // Cast from an integer type to a floating point type. class Arith_IToFCastOp traits = []> : - Arith_CastOp; + Arith_CastOp; // Cast from a floating point type to an integer type. class Arith_FToICastOp traits = []> : - Arith_CastOp; + Arith_CastOp; // Cast from a floating point type to another floating point type. class Arith_FToFCastOp traits = []> : - Arith_CastOp; + Arith_CastOp; // Base class for compare operations. Requires two operands of the same type // and returns a single `BoolLike` result. If the operand type is a vector or diff --git a/mlir/include/mlir/Dialect/GPU/CMakeLists.txt b/mlir/include/mlir/Dialect/GPU/CMakeLists.txt index 73aa1d92ffc1e..4808ec53e4e75 100644 --- a/mlir/include/mlir/Dialect/GPU/CMakeLists.txt +++ b/mlir/include/mlir/Dialect/GPU/CMakeLists.txt @@ -22,4 +22,9 @@ mlir_tablegen(Passes.capi.h.inc -gen-pass-capi-header --prefix GPU) mlir_tablegen(Passes.capi.cpp.inc -gen-pass-capi-impl --prefix GPU) add_public_tablegen_target(MLIRGPUPassIncGen) +set(LLVM_TARGET_DEFINITIONS GPUOps.td) +mlir_tablegen(GPUOpsEnums.h.inc -gen-enum-decls) +mlir_tablegen(GPUOpsEnums.cpp.inc -gen-enum-defs) +add_public_tablegen_target(MLIRGPUOpsEnumsGen) + add_mlir_doc(Passes GPUPasses ./ -gen-pass-doc) diff --git a/mlir/include/mlir/Dialect/GPU/GPUBase.td b/mlir/include/mlir/Dialect/GPU/GPUBase.td index a7bd8ece6a1c7..6c2fa43679d23 100644 --- a/mlir/include/mlir/Dialect/GPU/GPUBase.td +++ b/mlir/include/mlir/Dialect/GPU/GPUBase.td @@ -115,18 +115,4 @@ def GPU_AsyncOpInterface : OpInterface<"AsyncOpInterface"> { ]; } -// Cases of the String enum Attribute for SubgroupMmaOpLayout, representing -// the layouts of the operands supported by the ops that use this attribute. -def RowMajor: StrEnumAttrCase<"RowMajor", 0>; -def ColMajor: StrEnumAttrCase<"ColMajor", 1>; - -// Specifies a String enum Attribute for Warp wide matrix operations, -// representing the layout of respective operands. The layout later governs -// the lowerings to appropriate intrinsics. -def SubgroupMmaOpLayout: StrEnumAttr<"Layout", "Specifies whether op is row/col major", - [RowMajor, ColMajor]> { - let stringToSymbolFnName = "LayoutStrToEnum"; - let symbolToStringFnName = "EnumToLayoutStr"; -} - #endif // GPU_BASE diff --git a/mlir/include/mlir/Dialect/GPU/GPUDialect.h b/mlir/include/mlir/Dialect/GPU/GPUDialect.h index 79e8dca5af9c1..5c1b9db33c563 100644 --- a/mlir/include/mlir/Dialect/GPU/GPUDialect.h +++ b/mlir/include/mlir/Dialect/GPU/GPUDialect.h @@ -166,6 +166,8 @@ void addAsyncDependency(Operation *op, Value token); } // end namespace gpu } // end namespace mlir +#include "mlir/Dialect/GPU/GPUOpsEnums.h.inc" + #include "mlir/Dialect/GPU/GPUOpsDialect.h.inc" #include "mlir/Dialect/GPU/GPUOpInterfaces.h.inc" diff --git a/mlir/include/mlir/Dialect/GPU/GPUOps.td b/mlir/include/mlir/Dialect/GPU/GPUOps.td index b92d315b19ffb..3f1ad84278cb0 100644 --- a/mlir/include/mlir/Dialect/GPU/GPUOps.td +++ b/mlir/include/mlir/Dialect/GPU/GPUOps.td @@ -591,13 +591,13 @@ def GPU_YieldOp : GPU_Op<"yield", [NoSideEffect, Terminator]>, } // add, mul mirror the XLA ComparisonDirection enum. -def GPU_AllReduceOpAdd : StrEnumAttrCase<"add">; -def GPU_AllReduceOpAnd : StrEnumAttrCase<"and">; -def GPU_AllReduceOpMax : StrEnumAttrCase<"max">; -def GPU_AllReduceOpMin : StrEnumAttrCase<"min">; -def GPU_AllReduceOpMul : StrEnumAttrCase<"mul">; -def GPU_AllReduceOpOr : StrEnumAttrCase<"or">; -def GPU_AllReduceOpXor : StrEnumAttrCase<"xor">; +def GPU_AllReduceOpAdd : StrEnumAttrCase<"ADD", -1, "add">; +def GPU_AllReduceOpAnd : StrEnumAttrCase<"AND", -1, "and">; +def GPU_AllReduceOpMax : StrEnumAttrCase<"MAX", -1, "max">; +def GPU_AllReduceOpMin : StrEnumAttrCase<"MIN", -1, "min">; +def GPU_AllReduceOpMul : StrEnumAttrCase<"MUL", -1, "mul">; +def GPU_AllReduceOpOr : StrEnumAttrCase<"OR", -1, "or">; +def GPU_AllReduceOpXor : StrEnumAttrCase<"XOR", -1, "xor">; def GPU_AllReduceOperationAttr : StrEnumAttr<"AllReduceOperationAttr", "built-in reduction operations supported by gpu.allreduce.", @@ -609,7 +609,9 @@ def GPU_AllReduceOperationAttr : StrEnumAttr<"AllReduceOperationAttr", GPU_AllReduceOpMul, GPU_AllReduceOpOr, GPU_AllReduceOpXor - ]>; + ]>{ + let cppNamespace = "::mlir::gpu"; +} def GPU_AllReduceOp : GPU_Op<"all_reduce", [SameOperandsAndResultType, IsolatedFromAbove]>, @@ -644,13 +646,16 @@ def GPU_AllReduceOp : GPU_Op<"all_reduce", let verifier = [{ return ::verifyAllReduce(*this); }]; } -def GPU_ShuffleOpXor : StrEnumAttrCase<"xor">; +def GPU_ShuffleOpXor : StrEnumAttrCase<"XOR", -1, "xor">; def GPU_ShuffleModeAttr : StrEnumAttr<"ShuffleModeAttr", "Indexing modes supported by gpu.shuffle.", [ GPU_ShuffleOpXor, - ]>; + ]>{ + let cppNamespace = "::mlir::gpu"; +} + def GPU_ShuffleOp : GPU_Op<"shuffle", [NoSideEffect]>, Arguments<(ins AnyType:$value, I32:$offset, I32:$width, @@ -1063,8 +1068,8 @@ def GPU_SubgroupMmaComputeOp : GPU_Op<"subgroup_mma_compute", ``` }]; - let arguments = (ins Arg>:$opA, - Arg>:$opB, + let arguments = (ins Arg>:$opA, + Arg>:$opB, Arg>:$opC); let results = (outs GPU_MMAMatrix:$res); @@ -1121,4 +1126,60 @@ def GPU_SubgroupMmaConstantMatrixOp : GPU_Op<"subgroup_mma_constant_matrix", }]; } +def GPU_ELEMENTWISE_OP_ADD : StrEnumAttrCase<"ADDF">; +def GPU_ELEMENTWISE_OP_MUL : StrEnumAttrCase<"MULF">; +def GPU_ELEMENTWISE_OP_MAXF : StrEnumAttrCase<"MAXF">; +def GPU_ELEMENTWISE_OP_MINF : StrEnumAttrCase<"MINF">; + +def MMAElementWiseAttr : StrEnumAttr<"MMAElementwiseOp", + "elementwise operation to apply to mma matrix", + [GPU_ELEMENTWISE_OP_ADD, GPU_ELEMENTWISE_OP_MUL, + GPU_ELEMENTWISE_OP_MAXF, GPU_ELEMENTWISE_OP_MINF]> { + let cppNamespace = "::mlir::gpu"; + let storageType = "::mlir::StringAttr"; + let returnType = "::mlir::gpu::MMAElementwiseOp"; + let convertFromStorage = "*symbolizeMMAElementwiseOp($_self.getValue())"; + let constBuilderCall = "$_builder.getStringAttr(stringifyEnum($0))"; +} + +def GPU_SubgroupMmaElementwiseOp : GPU_Op<"subgroup_mma_elementwise", + [NoSideEffect, + AllTypesMatch<["args"]>]>{ + + let summary = "GPU warp elementwise operation on a matrix"; + + let description = [{ + The `gpu.subgroup_mma_elementwise` takes `!gpu.mma_matrix` inputs and + compute a new `!gpu.mma_matrix` by applying an elementwise operation to each + element. + + Since the operation is elementwise and the matrix type must match, the + matrix elements are processed independently of the matrix layout. + + This op is meant to be used along with `gpu.subgroup_mma_compute`. + + Example: + + ```mlir + %0 = %A, %B { operation = "ADD" } : + (!gpu.mma_matrix<16x16xf16, "COp">, !gpu.mma_matrix<16x16xf16, "COp">) + -> !gpu.mma_matrix<16x16xf16, "COp"> + ``` + }]; + + let arguments = (ins Variadic:$args, MMAElementWiseAttr:$operation); + + let results = (outs GPU_MMAMatrix:$res); + + let extraClassDeclaration = [{ + gpu::MMAMatrixType getType() { + return res().getType().cast(); + } + }]; + + let assemblyFormat = [{ + $args attr-dict `:` functional-type($args, $res) + }]; +} + #endif // GPU_OPS diff --git a/mlir/include/mlir/Dialect/LLVMIR/CMakeLists.txt b/mlir/include/mlir/Dialect/LLVMIR/CMakeLists.txt index f7ca6dd7624bb..822e60da7c20d 100644 --- a/mlir/include/mlir/Dialect/LLVMIR/CMakeLists.txt +++ b/mlir/include/mlir/Dialect/LLVMIR/CMakeLists.txt @@ -33,6 +33,8 @@ add_mlir_dialect(NVVMOps nvvm) add_mlir_doc(NVVMOps NVVMDialect Dialects/ -gen-dialect-doc) set(LLVM_TARGET_DEFINITIONS NVVMOps.td) mlir_tablegen(NVVMConversions.inc -gen-llvmir-conversions) +mlir_tablegen(NVVMOpsEnums.h.inc -gen-enum-decls) +mlir_tablegen(NVVMOpsEnums.cpp.inc -gen-enum-defs) add_public_tablegen_target(MLIRNVVMConversionsIncGen) add_mlir_dialect(ROCDLOps rocdl) diff --git a/mlir/include/mlir/Dialect/LLVMIR/NVVMDialect.h b/mlir/include/mlir/Dialect/LLVMIR/NVVMDialect.h index fff82e3b9f4f4..4c5d786f119a8 100644 --- a/mlir/include/mlir/Dialect/LLVMIR/NVVMDialect.h +++ b/mlir/include/mlir/Dialect/LLVMIR/NVVMDialect.h @@ -18,6 +18,16 @@ #include "mlir/IR/Dialect.h" #include "mlir/IR/OpDefinition.h" #include "mlir/Interfaces/SideEffectInterfaces.h" +#include "llvm/IR/IntrinsicsNVPTX.h" + +#include "mlir/Dialect/LLVMIR/NVVMOpsEnums.h.inc" + +/// Return the element type and number of elements associated with a wmma matrix +/// of given chracteristics. This matches the logic in IntrinsicsNVVM.td +/// WMMA_REGS structure. +std::pair inferMMAType(mlir::NVVM::MMATypes type, + mlir::NVVM::MMAFrag frag, + mlir::MLIRContext *context); ///// Ops ///// #define GET_OP_CLASSES diff --git a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td index a3f5a84dad59f..db76cc1a93c32 100644 --- a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td +++ b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td @@ -151,124 +151,355 @@ def NVVM_MmaOp : let verifier = [{ return ::verify(*this); }]; } -// Base class for all the variants of WMMA loadOps that may be defined. -class NVVM_WMMALoadOp : NVVM_Op, - Results<(outs LLVM_AnyStruct:$res)>, - Arguments<(ins Variadic:$args)> { +/// Helpers to instantiate different version of wmma intrinsics. +/// This matches the hierarchy used in IntrinsicsNVVM.td to define all the +/// combinations of the intrinsics. +class GEOM { + int m = M; + int n = N; + int k = K; +} - let summary = "Warp synchronous matrix load"; +/// Class containing information about valid mma matrix types. +class WMMA_REGS { + int m = Geom.m; + int n = Geom.n; + int k = Geom.k; + string geom = "m"#Geom.m#"n"#Geom.n#"k"#Geom.k; + string frag = Frag; + string ptx_elt_type = PtxEltType; + string gft = geom#":"#Frag#":"#ptx_elt_type; +} - string baseDescription = [{"The `nvvm.wmma.m*n*k*.load.[a, b, c]` operation" - "loads a matrix collectively using all the threads in a warp." +//// Generate enum value of the mma.load/mma.store intrinsic. +class WMMA_NAME_LDST { + string id = "llvm::Intrinsic::nvvm_wmma" + # "_" # Frag.geom + # "_" # Op + # "_" # Frag.frag + # "_" # Frag.ptx_elt_type + # "_" # Layout + # !if(WithStride, "_stride", ""); +} - "The operation takes two arguments, the address from where the matrix" - "elements are to be loaded from and a stride. The stride argument" - "represents the leading dimension of the source matrix. The address and" - "the stride are required to be the same across all threads in the warp." - "Each thread in a warp holds a certain number of elements. The Op returns" - "a LLVMStruct which holds the elements of the matrix held by this thread." +/// Generate the signature part of the mma intrinsic name. +class MMA_SIGNATURE { + list id_frags = !cond( + // FP16 ops are identified by accumulator & result type. + !eq(A.ptx_elt_type, "f16") : [D, C], + // other ops are identified by input types. + !ne(A.ptx_elt_type, B.ptx_elt_type): [A, B], + true: [A] + ); + string ret = !foldl("", id_frags, a, b, !strconcat(a, "_", b.ptx_elt_type)); +} - "This op is meant to be used along with `nvvm.wmma.m*n*k*.store` and" - "`nvvm.wmma.m*n*k*.mma`."}]; +/// Generate enum value of the wmma.mma intrinsic. +class WMMA_NAME { + string signature = MMA_SIGNATURE.ret; + string id = "llvm::Intrinsic::nvvm_wmma" + # "_" # A.geom + # "_" # Op + # "_" # ALayout + # "_" # BLayout + # signature; +} - let assemblyFormat = "$args attr-dict `:` functional-type($args, $res)"; +// Generates list of 4-tuples of WMMA_REGS representing a valid MMA op. +// Geom: list of supported geometries. +// TypeN: PTX type of the corresponding fragment's element. +// TypeB and TypeD may be empty if it must match that of TypeA or TypeC. +class MMA_OPS Geom, list TypeA, list TypeB, + list TypeC, list TypeD> { + list> ret = + !foldl([]>, Geom, t1, geom, !listconcat(t1, + !foldl([]>, TypeA, t2, type_a, !listconcat(t2, + !foldl([]>, !if(!size(TypeB), TypeB, [type_a]), t3, type_b, !listconcat(t3, + !foldl([]>, TypeC, t4, type_c, !listconcat(t4, + !foldl([]>, !if(!size(TypeD), TypeD, [type_c]), t5, type_d, !listconcat(t5, + [[WMMA_REGS, + WMMA_REGS, + WMMA_REGS, + WMMA_REGS]])))))))))); + // Debugging aid for readable representation of the list above. + list> ops = !foreach(x, ret, [x[0].gft, x[1].gft, x[2].gft, x[3].gft]); } -def NVVM_WMMALoadAM16N16K16Op : - NVVM_WMMALoadOp<"wmma.m16n16k16.load.a.f16.row.stride">{ +/// Creates a list of combinations of load/store operations supported. +class MMA_LDST_OPS Geom, list Frags, list Types> { + list ret = + !foldl([], Geom, t1, geom, !listconcat(t1, + !foldl([], Frags, t2, frag, !listconcat(t2, + !foldl([], Types, t3, type, !listconcat(t3, + [WMMA_REGS])))))); + // Debugging aid for readable representation of the list above. + list ops = !foreach(x, ret, x.gft); +} - string llvmBuilder = [{ - $res = createNvvmIntrinsicCall( - builder, llvm::Intrinsic::nvvm_wmma_m16n16k16_load_a_f16_row_stride, $args); - }]; +// Creates list of valid combinations of fragments. This is a subset of what +// llvm supports and can be extended as needed. +class NVVM_MMA_OPS { + list> tf32_wmma_ops = MMA_OPS< + [GEOM<16, 16, 8>], + ["tf32"], [], ["f32"], []>.ret; + list> fp_wmma_ops = MMA_OPS< + [GEOM<16, 16, 16>, GEOM<32, 8, 16>, GEOM<8, 32, 16>], + ["f16"], [], ["f16", "f32"], []>.ret; + list> all_wmma_ops = !listconcat( + tf32_wmma_ops, + fp_wmma_ops); + + list ldst_ab_ops = MMA_LDST_OPS< + [GEOM<16, 16, 16>, GEOM<32, 8, 16>, GEOM<8, 32, 16>], + ["a", "b"], ["f16"]>.ret; + list ldst_cd_ops = MMA_LDST_OPS< + [GEOM<16, 16, 16>, GEOM<32, 8, 16>, GEOM<8, 32, 16>], + ["c", "d"], ["f16", "f32"]>.ret; + list ldst_tf32_ab_ops = MMA_LDST_OPS< + [GEOM<16, 16, 8>], + ["a", "b"], ["tf32"]>.ret; + list ldst_tf32_cd_ops = MMA_LDST_OPS< + [GEOM<16, 16, 8>], + ["c", "d"], ["f32"]>.ret; + list all_ldst_ops = !listconcat(ldst_ab_ops, ldst_cd_ops, + ldst_tf32_ab_ops, + ldst_tf32_cd_ops); + // Separate A/B/C fragments (loads) from D (stores). + list all_ld_ops = !filter(op, all_ldst_ops, !ne(op.frag, "d")); + list all_st_ops = !filter(op, all_ldst_ops, !eq(op.frag, "d")); +} - string opDescription = [{ - Example: +def NVVM_MMA_OPS : NVVM_MMA_OPS; + +/// Helper to create the mapping between the configuration and the store +/// intrinsic enum value. +class MMA_ST_INTR { + list> cond0 = !foreach(frag, NVVM_MMA_OPS.all_st_ops, + !foreach(layout, ["row", "col"], + "if (layout == \"" # layout # "\" && m == " # frag.m # " &&" + " n == " #frag.n # " && k == " # frag.k # " && \"" # + frag.ptx_elt_type # "\" == eltype)" + " return " #WMMA_NAME_LDST.id #";")); + string id = !foldl("", + !foldl([""], cond0, acc, el, !listconcat(acc, el)), + acc1, el1, acc1 # "\n" # el1); +} - ```mlir - %2 = nvvm.wmma.m16n16k16.load.a %0, %1 : !llvm.ptr, !llvm.i32 -> - !llvm.struct<(vec<2 x half>, vec<2 x half>, vec<2 x half>, vec<2 x half>, - vec<2 x half>, vec<2 x half>, vec<2 x half>, vec<2 x half>)> - ``` - }]; +/// Helper to map a mxk shape to a supported mxnxk matrix type. This will return +/// the n value of the supported configuration. +class MMA_ST_INFER_N ldst> { + list cond = !foreach(frag, ldst, + "if (m == " # frag.m # " && k == " #frag.k # " && \"" # + frag.ptx_elt_type # "\" == eltype)" + " return "# frag.n #";"); + string id = !foldl("", cond, acc, el, acc # "\n" # el); +} - let description = !strconcat(baseDescription, opDescription); +/// Helper to map a kxn shape to a supported mxnxk matrix type. This will return +/// the m value of the supported configuration. +class MMA_ST_INFER_M ldst> { + list cond = !foreach(frag, ldst, + "if (n == " # frag.n # " && k == " #frag.k # " && \"" # + frag.ptx_elt_type # "\" == eltype)" + " return "# frag.m #";"); + string id = !foldl("", cond, acc, el, acc # "\n" # el); +} - let verifier = [{ return ::verify(*this); }]; +/// Helper to map a mxn shape to a supported mxnxk matrix type. This will return +/// the k value of the supported configuration. +class MMA_ST_INFER_K ldst> { + list cond = !foreach(frag, ldst, + "if (m == " # frag.m # " && n == " #frag.n # " && \"" # + frag.ptx_elt_type # "\" == eltype)" + " return "# frag.k #";"); + string id = !foldl("", cond, acc, el, acc # "\n" # el); } -def NVVM_WMMALoadBM16N16K16Op : - NVVM_WMMALoadOp<"wmma.m16n16k16.load.b.f16.row.stride">{ +/// Helper to create the mapping between the configuration and the load +/// intrinsic enum value. +class MMA_LD_INTR { + list> cond0 = !foreach(frag, NVVM_MMA_OPS.all_ld_ops, + !foreach(layout, ["row", "col"], + "if (layout == \"" # layout # "\" && m == " # frag.m # " &&" + " n == " #frag.n # " && k == " # frag.k # " && \"" # + frag.ptx_elt_type # "\" == eltype && frag == \""#frag.frag#"\")" + " return "# WMMA_NAME_LDST.id #";")); + string id = !foldl("", + !foldl([""], cond0, acc, el, !listconcat(acc, el)), + acc1, el1, acc1 # "\n" # el1); +} - string llvmBuilder = [{ - $res = createNvvmIntrinsicCall( - builder, llvm::Intrinsic::nvvm_wmma_m16n16k16_load_b_f16_row_stride, $args); - }]; +/// Helper to create the mapping between the configuration and the wmma.mma +/// intrinsic enum value. +class MMA_MMA_INTR { + list>> cond0 = + !foreach(op, NVVM_MMA_OPS.all_wmma_ops, + !foreach(layoutA, ["row", "col"], + !foreach(layoutB, ["row", "col"], + "if (layoutA == \"" # layoutA # "\" && layoutB == \"" # layoutB # "\" && " + " m == " # op[0].m # " && n == " #op[0].n # " && k == " # op[0].k # + " && \"" # op[0].ptx_elt_type # "\" == eltypeA && \"" + # op[3].ptx_elt_type # "\" == eltypeB)" + " return " # + WMMA_NAME.id # ";"))); + list f = !foldl([""], + !foldl([[""]], cond0, acc, el, !listconcat(acc, el)), + acc1, el1, !listconcat(acc1, el1)); + string id = !foldl("", f, acc, el, acc # "\n" # el); +} - string opDescription = [{ - Example: +def MMALayoutRow : StrEnumAttrCase<"row">; +def MMALayoutCol : StrEnumAttrCase<"col">; - ```mlir - %2 = nvvm.wmma.m16n16k16.load.b %0, %1 : !llvm.ptr, !llvm.i32 -> - !llvm.struct<(vec<2 x half>, vec<2 x half>, vec<2 x half>, vec<2 x half>, - vec<2 x half>, vec<2 x half>, vec<2 x half>, vec<2 x half>)> - ``` - }]; +/// Enum attribute of the different matrix layout. +def MMALayout : StrEnumAttr<"MMALayout", "NVVM MMA layout", + [MMALayoutRow, MMALayoutCol]> { + let cppNamespace = "::mlir::NVVM"; + let storageType = "mlir::StringAttr"; + let returnType = "NVVM::MMALayout"; + let convertFromStorage = "*symbolizeEnum($_self.getValue())"; + let constBuilderCall = "$_builder.getStringAttr(stringifyEnum($0))"; +} - let description = !strconcat(baseDescription, opDescription); +def MMATypeF16 : StrEnumAttrCase<"f16">; +def MMATypeF32 : StrEnumAttrCase<"f32">; +def MMATypeTF32 : StrEnumAttrCase<"tf32">; - let verifier = [{ return ::verify(*this); }]; +/// Enum attribute of the different matrix types. +def MMATypes : StrEnumAttr<"MMATypes", "NVVM MMA types", + [MMATypeF16, MMATypeF32, MMATypeTF32]> { + let cppNamespace = "::mlir::NVVM"; + let storageType = "mlir::StringAttr"; + let returnType = "NVVM::MMATypes"; + let convertFromStorage = "*symbolizeEnum($_self.getValue())"; + let constBuilderCall = "$_builder.getStringAttr(stringifyEnum($0))"; } -def NVVM_WMMALoadCF16M16N16K16Op : - NVVM_WMMALoadOp<"wmma.m16n16k16.load.c.f16.row.stride">{ - string llvmBuilder = [{ - $res = createNvvmIntrinsicCall( - builder, llvm::Intrinsic::nvvm_wmma_m16n16k16_load_c_f16_row_stride, $args); - }]; +def MMAFragA : StrEnumAttrCase<"a">; +def MMAFragB : StrEnumAttrCase<"b">; +def MMAFragC : StrEnumAttrCase<"c">; - string opDescription = [{ - Example: +/// Enum attribute of the different frag types. +def MMAFragAttr : StrEnumAttr<"MMAFrag", "NVVM MMA frag type", + [MMAFragA, MMAFragB, MMAFragC]> { + let cppNamespace = "::mlir::NVVM"; + let storageType = "mlir::StringAttr"; + let returnType = "NVVM::MMAFrag"; + let convertFromStorage = "*symbolizeEnum($_self.getValue())"; + let constBuilderCall = "$_builder.getStringAttr(stringifyEnum($0))"; +} - ```mlir - %2 = nvvm.wmma.m16n16k16.load.c.f16.row.stride %0, %1 : !llvm.ptr, !llvm.i32 -> - !llvm.struct<(vec<2 x half>, vec<2 x half>, vec<2 x half>, vec<2 x half>)> - ``` - }]; +def NVVM_WMMALoadOp: NVVM_Op<"wmma.load">, + Results<(outs LLVM_AnyStruct:$res)>, + Arguments<(ins LLVM_AnyPointer: $ptr, I32: $stride, I32Attr:$m, + I32Attr:$n, I32Attr:$k, MMALayout:$layout, MMATypes:$eltype, + MMAFragAttr:$frag)> { - let description = !strconcat(baseDescription, opDescription); + let summary = "Warp synchronous matrix load"; + + // Since LLVM intrinsic IDs are enum that cannot be dynamically generated in + // C++ we instanciate a function in tablegen to map the valide configuration + // to the corresponsding intrinsic ID. + // Because we want a single source of truth, this mean the source of truth + // about valid combinations needs to be in tablgen, therefore we generate + // extra helpers to query valid configurations based on the shapes of + // load/store operations. + let extraClassDeclaration = + "static llvm::Intrinsic::ID getIntrinsicID(" + "int m, int n, int k, mlir::NVVM::MMALayout layoutEnum," + "mlir::NVVM::MMATypes eltypeEnum,mlir::NVVM::MMAFrag fragEnum) {" + "llvm::StringRef layout = stringifyEnum(layoutEnum);" + "llvm::StringRef eltype = stringifyEnum(eltypeEnum);" + "llvm::StringRef frag = stringifyEnum(fragEnum);" + #MMA_LD_INTR<"load">.id# "\n" + "return 0;" + "}\n" + "/// Helpers to find valid n dimension based on mxk load shape.\n" + "static int inferNDimension(int m, int k, mlir::NVVM::MMATypes eltypeEnum) {" + " llvm::StringRef eltype = stringifyEnum(eltypeEnum);" + #MMA_ST_INFER_N.id# "\n" + "return 0;" + "}\n" + "/// Helpers to find valid m dimension based on kxn load shape.\n" + "static int inferMDimension(int k, int n, mlir::NVVM::MMATypes eltypeEnum) {" + " llvm::StringRef eltype = stringifyEnum(eltypeEnum);" + #MMA_ST_INFER_M.id# "\n" + "return 0;" + "}\n" + "/// Helpers to find valid k dimension based on mxn load shape.\n" + "static int inferKDimension(int m, int n, mlir::NVVM::MMATypes eltypeEnum) {" + " llvm::StringRef eltype = stringifyEnum(eltypeEnum);" + #MMA_ST_INFER_K.id# "\n" + "return 0;" + "}\n"; - let verifier = [{ return ::verify(*this); }]; -} -def NVVM_WMMALoadCF32M16N16K16Op : - NVVM_WMMALoadOp<"wmma.m16n16k16.load.c.f32.row.stride">{ string llvmBuilder = [{ - $res = createNvvmIntrinsicCall( - builder, llvm::Intrinsic::nvvm_wmma_m16n16k16_load_c_f32_row_stride, $args); + auto operands = moduleTranslation.lookupValues(opInst.getOperands()); + auto intId = mlir::NVVM::WMMALoadOp::getIntrinsicID( + $m, $n, $k, $layout, $eltype, $frag); + $res = createIntrinsicCall(builder, intId, operands, {operands[0]->getType()}); }]; - string opDescription = [{ + string baseDescription = [{ + The `nvvm.wmma.load` operation loads a matrix collectively using all the + threads in a warp. + + The operation takes two arguments, the address from where the matrix + elements are to be loaded from and a stride. The stride argument + represents the leading dimension of the source matrix. The address and + the stride are required to be the same across all threads in the warp. + Each thread in a warp holds a certain number of elements. The Op returns + a LLVMStruct which holds the elements of the matrix held by this thread. + + This op is meant to be used along with `nvvm.wmma.store` and + `nvvm.wmma.mma`. + Example: ```mlir - %2 = nvvm.wmma.m16n16k16.load.c.f32.row.stride %0, %1 : !llvm.ptr, !llvm.i32 -> - !llvm.struct<(f32, f32, f32, f32, f32, f32, f32, f32)> + %2 = nvvm.wmma.load %0, %1 + {eltype = "f16", frag = "a", k = 16 : i32, layout = "row", m = 16 : i32, n = 16 : i32} + : (!llvm.ptr) -> !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)> ``` - }]; - - let description = !strconcat(baseDescription, opDescription); + }]; + let assemblyFormat = "$ptr `,` $stride attr-dict `:` functional-type($ptr, $res)"; let verifier = [{ return ::verify(*this); }]; } -// Base class for all the variants of WMMA storeOps that may be defined. -class NVVM_WMMAStoreOp : NVVM_Op, - Arguments<(ins Variadic:$args)>{ +def NVVM_WMMAStoreOp : NVVM_Op<"wmma.store">, + Arguments<(ins LLVM_AnyPointer: $ptr, + I32Attr:$m, I32Attr:$n, I32Attr:$k, MMALayout:$layout, + MMATypes:$eltype, Variadic:$args, I32: $stride)>{ let summary = "Warp synchronous matrix store"; + let extraClassDeclaration = + "static llvm::Intrinsic::ID getIntrinsicID(" + "int m, int n, int k, mlir::NVVM::MMALayout layoutEnum," + "mlir::NVVM::MMATypes eltypeEnum) {" + " llvm::StringRef layout = stringifyEnum(layoutEnum);" + " llvm::StringRef eltype = stringifyEnum(eltypeEnum);" + #MMA_ST_INTR<"store">.id# "\n" + "return 0;" + "}\n" + "/// Helpers to find valid k dimension based on mxn store shape.\n" + "static int inferKDimension(int m, int n, mlir::NVVM::MMATypes eltypeEnum) {" + " llvm::StringRef eltype = stringifyEnum(eltypeEnum);" + #MMA_ST_INFER_K.id# "\n" + "return 0;" + "}"; + + string llvmBuilder = [{ + auto operands = moduleTranslation.lookupValues(opInst.getOperands()); + auto intId = + mlir::NVVM::WMMAStoreOp::getIntrinsicID($m, $n, $k, $layout, $eltype); + createIntrinsicCall(builder, intId, operands, {operands[0]->getType()}); + }]; + string baseDescription = [{ - The `nvvm.wmma.m*n*k*.store` operation stores a matrix collectively using + The `nvvm.wmma.store` operation stores a matrix collectively using all the threads in a warp. The operation takes as arguments the address to where the matrix elements are @@ -279,60 +510,50 @@ class NVVM_WMMAStoreOp : NVVM_Op, This op is meant to be used along with `nvvm.wmma.m16n16k16.load` and `nvvm.wmma.m16n16k16.mma`. - }]; - - let assemblyFormat = "$args attr-dict `:` type($args)"; -} - -def NVVM_WMMAStoreF16M16N16K16Op : NVVM_WMMAStoreOp<"wmma.m16n16k16.store.d.f16.row.stride"> { - string llvmBuilder = [{ - createNvvmIntrinsicCall( - builder, llvm::Intrinsic::nvvm_wmma_m16n16k16_store_d_f16_row_stride, $args); - }]; - - string opDescription = [{ - Example: - - ```mlir - nvvm.wmma.m16n16k16.stored.f16.row.stride %0, %1, %2, %3, %4, %5, %6 : !llvm.ptr, - !llvm.struct<(vec<2 x half>, vec<2 x half>, vec<2 x half>, vec<2 x half>)>, !llvm.i32 - ``` - }]; - - let description = !strconcat(baseDescription, opDescription); - - let verifier = [{ return ::verify(*this); }]; -} -def NVVM_WMMAStoreF32M16N16K16Op : NVVM_WMMAStoreOp<"wmma.m16n16k16.store.d.f32.row.stride"> { - string llvmBuilder = [{ - createNvvmIntrinsicCall( - builder, llvm::Intrinsic::nvvm_wmma_m16n16k16_store_d_f32_row_stride, $args); - }]; - - string opDescription = [{ Example: ```mlir - nvvm.wmma.m16n16k16.store.d.f32.row.stride %0, %1, %2, %3, %4, %5, %6, %7, %8, %9, - %10 : !llvm.ptr, !llvm.struct<(f32, f32, f32, f32, f32, f32, f32, f32)>, - !llvm.i32 + nvvm.wmma.store %0, %1, %2, %3, %4, %5 + {eltype = "f16", k = 16 : i32, layout = "row", m = 16 : i32, n = 16 : i32} + : !llvm.ptr, vector<2 x f16>, vector<2 x f16>, vector<2 x f16>, vector<2 x f16> ``` }]; - let description = !strconcat(baseDescription, opDescription); - + let assemblyFormat = "$ptr `,` $stride `,` $args attr-dict `:` type($ptr) `,` type($args)"; let verifier = [{ return ::verify(*this); }]; } // Base class for all the variants of WMMA mmaOps that may be defined. -class NVVM_WMMAMmaOp : NVVM_Op, +def NVVM_WMMAMmaOp : NVVM_Op<"wmma.mma">, Results<(outs LLVM_AnyStruct:$res)>, - Arguments<(ins Variadic:$args)>{ + Arguments<(ins I32Attr:$m, I32Attr:$n, I32Attr:$k, MMALayout:$layoutA, + MMALayout:$layoutB, MMATypes:$eltypeA, MMATypes:$eltypeB, + Variadic:$args)>{ let summary = "Warp synchronous matrix-multiply accumulate using tensor cores."; + let extraClassDeclaration = + "static llvm::Intrinsic::ID getIntrinsicID(" + "int m, int n, int k, mlir::NVVM::MMALayout layoutAEnum," + "mlir::NVVM::MMALayout layoutBEnum, mlir::NVVM::MMATypes eltypeAEnum," + "mlir::NVVM::MMATypes eltypeBEnum) {" + "llvm::StringRef layoutA = stringifyEnum(layoutAEnum);" + "llvm::StringRef layoutB = stringifyEnum(layoutBEnum);" + "llvm::StringRef eltypeA = stringifyEnum(eltypeAEnum);" + "llvm::StringRef eltypeB = stringifyEnum(eltypeBEnum);" + #MMA_MMA_INTR<"mma">.id# "\n" + "return 0;" + "}"; + + string llvmBuilder = [{ + auto operands = moduleTranslation.lookupValues(opInst.getOperands()); + auto intId = mlir::NVVM::WMMAMmaOp::getIntrinsicID( + $m, $n, $k, $layoutA, $layoutB, $eltypeA, $eltypeB); + $res = createIntrinsicCall(builder, intId, operands); + }]; + string baseDescription = [{ - The `nvvm.wmma.m*n*k*.mma` operation performs a matrix-multiply accumulate + The `nvvm.wmma.mma` operation performs a matrix-multiply accumulate (mma) operation using all the threads in a warp. The operation performed is represented as `D = A * B + C`. The operation takes @@ -340,64 +561,20 @@ class NVVM_WMMAMmaOp : NVVM_Op, current thread. The op returns a LLVM struct which holds a part of the result held by the current thread. - This op is meant to be used along with `nvvm.wmma.m16n16k16.load` and `nvvm.wmma. - m16n16k16.store`. - }]; -} - -def NVVM_WMMAMmaF16F16M16N16K16Op : NVVM_WMMAMmaOp<"wmma.m16n16k16.mma.row.row.f16.f16">{ - string llvmBuilder = [{ - $res = createNvvmIntrinsicCall( - builder, llvm::Intrinsic::nvvm_wmma_m16n16k16_mma_row_row_f16_f16, $args); - }]; - - string opDescription = [{ - Example: - - ```mlir - %20 = nvvm.wmma.m16n16k16.mma.row.row.f16.f16 %0, %1, %2, %3, %4, %5, %6, %7, %8, - %9, %10, %11, %12, %13, %14, %15, %16, %17, %18, %19 : vector<2xf16> -> !llvm.struct - <(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)> - ``` - }]; - - let parser = [{ - return parseWMMAMmaF16F16M16N16K16Op(parser, result); - }]; - - let printer = [{ - printWMMAMmaF16F16M16N16K16Op(p, *this); - }]; + This op is meant to be used along with `nvvm.wmma.load` and + `nvvm.wmma.store`. - let description = !strconcat(baseDescription, opDescription); - - let verifier = [{ return ::verify(*this); }]; -} - -def NVVM_WMMAMmaF32F32M16N16K16Op : NVVM_WMMAMmaOp<"wmma.m16n16k16.mma.row.row.f32.f32">{ - string llvmBuilder = [{ - $res = createNvvmIntrinsicCall( - builder, llvm::Intrinsic::nvvm_wmma_m16n16k16_mma_row_row_f32_f32, $args); - }]; - - string opDescription = [{ Example: ```mlir - %24 = nvvm.wmma.m16n16k16.mma.row.row.f32.f32 %0, %1, %2, %3, %4, %5, %6, %7, %8 - %9, %10, %11, %12, %13, %14, %15, %16, %17, %18, %19, %20, %21, %22, %23 : - (vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, - vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, - vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, - vector<2xf16>, f32, f32, f32, f32, f32, f32, f32, f32) -> !llvm.struct<(f32, - f32, f32, f32, f32, f32, f32, f32)> + %16 = nvvm.wmma.mma %0, %1, %2, %3, %4, %5, %6, %7, %8, %9, %10, %11, %12, %13, %14, %15 + {eltypeA = "tf32", eltypeB = "f32", k = 8 : i32, layoutA = "row", layoutB = "row", m = 16 : i32, n = 16 : i32} + : (i32, i32, i32, i32, i32, i32, i32, i32, f32, f32, f32, f32, f32, f32, f32, f32) + -> !llvm.struct<(f32, f32, f32, f32, f32, f32, f32, f32)> ``` }]; let assemblyFormat = "$args attr-dict `:` functional-type($args, $res)"; - - let description = !strconcat(baseDescription, opDescription); - let verifier = [{ return ::verify(*this); }]; } diff --git a/mlir/include/mlir/Dialect/LLVMIR/ROCDLDialect.h b/mlir/include/mlir/Dialect/LLVMIR/ROCDLDialect.h index b00b8ac0b125a..478b31f3a6d8a 100644 --- a/mlir/include/mlir/Dialect/LLVMIR/ROCDLDialect.h +++ b/mlir/include/mlir/Dialect/LLVMIR/ROCDLDialect.h @@ -13,8 +13,8 @@ // pointed to here. However the following links contain more information about // ROCDL (ROCm-Device-Library) // -// https://github.com/RadeonOpenCompute/ROCm-Device-Libs/blob/master/doc/OCML.md -// https://github.com/RadeonOpenCompute/ROCm-Device-Libs/blob/master/doc/OCKL.md +// https://github.com/RadeonOpenCompute/ROCm-Device-Libs/blob/amd-stg-open/doc/OCML.md +// https://github.com/RadeonOpenCompute/ROCm-Device-Libs/blob/amd-stg-open/doc/OCKL.md // https://llvm.org/docs/AMDGPUUsage.html // //===----------------------------------------------------------------------===// diff --git a/mlir/include/mlir/Dialect/Linalg/CMakeLists.txt b/mlir/include/mlir/Dialect/Linalg/CMakeLists.txt index d8a082f10db3f..63a9fa7aee7c3 100644 --- a/mlir/include/mlir/Dialect/Linalg/CMakeLists.txt +++ b/mlir/include/mlir/Dialect/Linalg/CMakeLists.txt @@ -1,3 +1,4 @@ +add_subdirectory(ComprehensiveBufferize) add_subdirectory(IR) set(LLVM_TARGET_DEFINITIONS Passes.td) diff --git a/mlir/include/mlir/Dialect/Linalg/ComprehensiveBufferize/BufferizableOpInterface.h b/mlir/include/mlir/Dialect/Linalg/ComprehensiveBufferize/BufferizableOpInterface.h new file mode 100644 index 0000000000000..a2494d1fb59d6 --- /dev/null +++ b/mlir/include/mlir/Dialect/Linalg/ComprehensiveBufferize/BufferizableOpInterface.h @@ -0,0 +1,36 @@ +//===- BufferizableOpInterface.h - Comprehensive Bufferize ------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef MLIR_DIALECT_LINALG_COMPREHENSIVEBUFFERIZE_BUFFERIZABLEOPINTERFACE_H_ +#define MLIR_DIALECT_LINALG_COMPREHENSIVEBUFFERIZE_BUFFERIZABLEOPINTERFACE_H_ + +#include "mlir/IR/Builders.h" +#include "mlir/IR/BuiltinTypes.h" +#include "mlir/IR/Operation.h" +#include "mlir/Support/LLVM.h" + +namespace mlir { +class BlockAndValueMapping; + +namespace linalg { +struct AllocationCallbacks; +class BufferizationAliasInfo; + +/// Specify fine-grain relationship between buffers to enable more analysis. +enum class BufferRelation { + None, + // TODO: ResultContainsOperand, + // TODO: OperandContainsResult, + Equivalent +}; +} // namespace linalg +} // namespace mlir + +#include "mlir/Dialect/Linalg/ComprehensiveBufferize/BufferizableOpInterface.h.inc" + +#endif // MLIR_DIALECT_LINALG_COMPREHENSIVEBUFFERIZE_BUFFERIZABLEOPINTERFACE_H_ diff --git a/mlir/include/mlir/Dialect/Linalg/ComprehensiveBufferize/BufferizableOpInterface.td b/mlir/include/mlir/Dialect/Linalg/ComprehensiveBufferize/BufferizableOpInterface.td new file mode 100644 index 0000000000000..cff45a2fdbfa2 --- /dev/null +++ b/mlir/include/mlir/Dialect/Linalg/ComprehensiveBufferize/BufferizableOpInterface.td @@ -0,0 +1,170 @@ +//===-- BufferizableOpInterface.td - Compreh. Bufferize ----*- tablegen -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef BUFFERIZABLE_OP_INTERFACE +#define BUFFERIZABLE_OP_INTERFACE + +include "mlir/IR/OpBase.td" + +def BufferizableOpInterface : OpInterface<"BufferizableOpInterface"> { + let description = [{ + An op interface for Comprehensive Bufferization. Ops that implement this + interface can be bufferized using Comprehensive Bufferization. + }]; + let cppNamespace = "::mlir::linalg"; + let methods = [ + InterfaceMethod< + /*desc=*/[{ + Return `true` if the given OpOperand bufferizes to a memory read. This + method will never be called on OpOperands that do not have a tensor + type. + + Note: It is always safe to consider an OpOperand as a memory read, + even if it does actually not read; however, this can introduce + unnecessary out-of-place bufferization decisions. The analysis of + Comprehensive Bufferize considers OpOperands of unknown ops (that do + not implement this interface) as reading OpOperands. + }], + /*retType=*/"bool", + /*methodName=*/"bufferizesToMemoryRead", + /*args=*/(ins "OpOperand &":$opOperand), + /*methodBody=*/"", + /*defaultImplementation=*/[{ + // Does not have to be implemented for ops without tensor OpOperands. + llvm_unreachable("bufferizesToMemoryRead not implemented"); + }] + >, + InterfaceMethod< + /*desc=*/[{ + Return `true` if the given OpOperand bufferizes to a memory write. + This method will never be called on OpOperands that do not have a + tensor type. + + Note: It is always safe to consider an OpOperand as a memory write, + even if it does actually not write; however, this can introduce + unnecessary out-of-place bufferization decisions. The analysis of + Comprehensive Bufferize considers OpOperands of unknown ops (that do + not implement this interface) as writing OpOperands. + }], + /*retType=*/"bool", + /*methodName=*/"bufferizesToMemoryWrite", + /*args=*/(ins "OpOperand &":$opOperand), + /*methodBody=*/"", + /*defaultImplementation=*/[{ + // Does not have to be implemented for ops without tensor OpOperands. + llvm_unreachable("bufferizesToMemoryWrite not implemented"); + }] + >, + InterfaceMethod< + /*desc=*/[{ + Return the OpResult that aliases with a given OpOperand when + bufferized in-place. This method will never be called on OpOperands + that do not have a tensor type. + }], + /*retType=*/"OpResult", + /*methodName=*/"getAliasingOpResult", + /*args=*/(ins "OpOperand &":$opOperand), + /*methodBody=*/"", + /*defaultImplementation=*/[{ + // Does not have to be implemented for ops without tensor OpOperands. + llvm_unreachable("getAliasingOpResult not implemented"); + }] + >, + InterfaceMethod< + /*desc=*/[{ + Return the OpOperands that alias with a given OpResult when + bufferized in-place. This method will never be called on OpResults + that do not have a tensor type. + + Note: This method can return multiple OpOperands, indicating that the + given OpResult may at runtime alias with any of the OpOperands. This + is useful for branches and for ops such as `std.select`. + }], + /*retType=*/"SmallVector", + /*methodName=*/"getAliasingOpOperand", + /*args=*/(ins "OpResult":$opResult), + /*methodBody=*/"", + /*defaultImplementation=*/[{ + // Does not have to be implemented for ops without tensor OpResults. + llvm_unreachable("getInplaceableOpResult not implemented"); + }] + >, + InterfaceMethod< + /*desc=*/[{ + Return the buffer relation between the given OpOperand and its + aliasing OpResult when bufferized in-place. Most OpOperands have an + "equivalence" relation. + + TODO: Support other relations such as "OpOperand is included in + OpResult". + }], + /*retType=*/"BufferRelation", + /*methodName=*/"bufferRelation", + /*args=*/(ins "OpOperand &":$opOperand), + /*methodBody=*/"", + /*defaultImplementation=*/[{ + // Does not have to be implemented for ops without tensor OpOperands. + llvm_unreachable("bufferRelation not implemented"); + }] + >, + // TODO: Simplify method signature: Pass an OpBuilder and a + // BufferizationState object. + InterfaceMethod< + /*desc=*/[{ + Bufferize this op, i.e., rewrite it into a memref-based equivalent. + `bvm` maps tensor values to memref values and this method should map + tensor results to memref results after creating/modifying ops. + }], + /*retType=*/"LogicalResult", + /*methodName=*/"bufferize", + /*args=*/(ins "OpBuilder &":$b, + "BlockAndValueMapping &":$bvm, + "BufferizationAliasInfo &":$aliasInfo, + "AllocationCallbacks &":$allocationFn), + /*methodBody=*/"", + /*defaultImplementation=*/[{ + llvm_unreachable("bufferize not implemented"); + return failure(); + }] + >, + InterfaceMethod< + /*desc=*/[{ + Return `true` if the given OpOperand can be written to in-place. This + is the case for most ops, but some ops such as ConstantOp may + bufferize to non-writable (read-only) memory locations. This method + will never be called on OpResults that do not have a tensor type. + }], + /*retType=*/"bool", + /*methodName=*/"isWritable", + /*args=*/(ins "OpResult":$opResult), + /*methodBody=*/"", + /*defaultImplementation=*/[{ + return true; + }] + > + ]; + + let extraClassDeclaration = [{ + /// Return `true` if the given OpOperand creates an alias but does neither + /// read nor write. This implies that `bufferizesToMemoryRead` and + /// `bufferizesToMemoryWrite` must return `false`. This method will never + /// be called on OpOperands that do not have a tensor type. + /// + /// Examples of such ops are `tensor.extract_slice` and `tensor.cast`. + bool bufferizesToAliasOnly(OpOperand &opOperand) { + auto bufferizableOp = + cast(getOperation()); + return !bufferizableOp.bufferizesToMemoryRead(opOperand) + && !bufferizableOp.bufferizesToMemoryWrite(opOperand) + && static_cast( + bufferizableOp.getAliasingOpResult(opOperand)); + } + }]; +} + +#endif // BUFFERIZABLE_OP_INTERFACE diff --git a/mlir/include/mlir/Dialect/Linalg/ComprehensiveBufferize/CMakeLists.txt b/mlir/include/mlir/Dialect/Linalg/ComprehensiveBufferize/CMakeLists.txt new file mode 100644 index 0000000000000..c2a58e98061b1 --- /dev/null +++ b/mlir/include/mlir/Dialect/Linalg/ComprehensiveBufferize/CMakeLists.txt @@ -0,0 +1,5 @@ +set(LLVM_TARGET_DEFINITIONS BufferizableOpInterface.td) +mlir_tablegen(BufferizableOpInterface.h.inc -gen-op-interface-decls) +mlir_tablegen(BufferizableOpInterface.cpp.inc -gen-op-interface-defs) +add_public_tablegen_target(MLIRBufferizableOpInterfaceIncGen) +add_dependencies(mlir-headers MLIRBufferizableOpInterfaceIncGen) diff --git a/mlir/include/mlir/Dialect/Linalg/Transforms/ComprehensiveBufferize.h b/mlir/include/mlir/Dialect/Linalg/ComprehensiveBufferize/ComprehensiveBufferize.h similarity index 57% rename from mlir/include/mlir/Dialect/Linalg/Transforms/ComprehensiveBufferize.h rename to mlir/include/mlir/Dialect/Linalg/ComprehensiveBufferize/ComprehensiveBufferize.h index 5baec15446df6..8976f69757268 100644 --- a/mlir/include/mlir/Dialect/Linalg/Transforms/ComprehensiveBufferize.h +++ b/mlir/include/mlir/Dialect/Linalg/ComprehensiveBufferize/ComprehensiveBufferize.h @@ -6,8 +6,8 @@ // //===----------------------------------------------------------------------===// -#ifndef MLIR_DIALECT_LINALG_TRANSFORMS_COMPREHENSIVE_BUFFERIZE_H -#define MLIR_DIALECT_LINALG_TRANSFORMS_COMPREHENSIVE_BUFFERIZE_H +#ifndef MLIR_DIALECT_LINALG_COMPREHENSIVEBUFFERIZE_COMPREHENSIVE_BUFFERIZE_H +#define MLIR_DIALECT_LINALG_COMPREHENSIVEBUFFERIZE_COMPREHENSIVE_BUFFERIZE_H #include "mlir/Dialect/Tensor/IR/Tensor.h" #include "mlir/IR/Value.h" @@ -19,9 +19,13 @@ namespace mlir { class DominanceInfo; class FuncOp; class GlobalCreator; +class ModuleOp; namespace linalg { +// TODO: from some HW description. +static constexpr int64_t kBufferAlignments = 128; + /// The BufferizationAliasInfo class maintains a list of buffer aliases and /// equivalence classes to support bufferization. /// ExtractSliceOps have special behavior, they act as a level of indirection @@ -34,14 +38,6 @@ namespace linalg { /// uses BufferizationAliasInfo. class BufferizationAliasInfo { public: - /// Specify fine-grain relationship between buffers to enable more analysis. - enum class BufferRelation { - None, - // TODO: ResultContainsOperand, - // TODO: OperandContainsResult, - Equivalent - }; - explicit BufferizationAliasInfo(Operation *rootOp); /// Add a new entry for `v` in the `aliasInfo` and `equivalentInfo`. In the @@ -56,14 +52,6 @@ class BufferizationAliasInfo { /// `alias`. Additionally, merge their equivalence classes. void insertNewBufferEquivalence(Value newValue, Value alias); - /// Return true if, under current bufferization decisions, the buffer of - /// `value` is not writable. - bool aliasesNonWritableBuffer(Value value) const; - - /// Return true if the buffer to which `operand` would bufferize is equivalent - /// to some buffer write. - bool aliasesInPlaceWrite(Value v) const; - /// Set the inPlace bufferization spec to true. /// Merge result's and operand's aliasing sets and iterate to a fixed point. void bufferizeInPlace(OpResult result, OpOperand &operand); @@ -71,23 +59,6 @@ class BufferizationAliasInfo { /// Set the inPlace bufferization spec to false. void bufferizeOutOfPlace(OpResult result); - /// Return true if `value` has an ExtractSliceOp matching the given - /// InsertSliceOp in its reverse SSA use-def chain. - bool hasMatchingExtractSliceOp(Value value, - tensor::InsertSliceOp insertOp) const; - - /// Return true if bufferizing `opOperand` inplace with `opResult` would - /// create a write to a non-writable buffer. - bool wouldCreateWriteToNonWritableBuffer(OpOperand &opOperand, - OpResult opResult) const; - - /// Assume that result bufferizes in-place with one of the operation's - /// operands. Return true if it is possible to find an inplace write W that - /// creates a conflict. - bool - wouldCreateReadAfterWriteInterference(OpOperand &operand, OpResult result, - const DominanceInfo &domInfo) const; - /// Return true if `v1` and `v2` bufferize to equivalent buffers. bool areEquivalentBufferizedValues(Value v1, Value v2) const { // Return `false` if we have no information about `v1` or `v2`. @@ -99,14 +70,13 @@ class BufferizationAliasInfo { equivalentInfo.getLeaderValue(v2); } - /// Return true if the source of an `insertSliceOp` bufferizes to an - /// equivalent ExtractSliceOp. - bool isSourceEquivalentToAMatchingInplaceExtractSliceOp( - tensor::InsertSliceOp insertSliceOp) const; - /// Apply `fun` to all the members of the equivalence class of `v`. void applyOnEquivalenceClass(Value v, function_ref fun) const; + /// Apply `fun` to all aliases of `v`. + void applyOnAliases(Value v, function_ref fun) const; + + // TODO: Move these out of BufferizationAliasInfo. /// Return true if the value is known to bufferize to writable memory. bool bufferizesToWritableMemory(Value v) const; @@ -122,52 +92,30 @@ class BufferizationAliasInfo { void dumpEquivalences() const; private: - /// llvm::EquivalenceClasses wants comparable elements because it uses - /// std::set as the underlying impl. - /// ValueWrapper wraps Value and uses pointer comparison on the defining op. - /// This is a poor man's comparison but it's not like UnionFind needs ordering - /// anyway .. - struct ValueWrapper { - ValueWrapper(Value val) : v(val) {} - operator Value() const { return v; } - bool operator<(const ValueWrapper &wrap) const { - return v.getImpl() < wrap.v.getImpl(); + /// llvm::EquivalenceClasses wants comparable elements. This comparator uses + /// uses pointer comparison on the defining op. This is a poor man's + /// comparison but it's not like UnionFind needs ordering anyway. + struct ValueComparator { + bool operator()(const Value &lhs, const Value &rhs) const { + return lhs.getImpl() < rhs.getImpl(); } - bool operator==(const ValueWrapper &wrap) const { return v == wrap.v; } - Value v; }; using EquivalenceClassRangeType = llvm::iterator_range< - llvm::EquivalenceClasses::member_iterator>; + llvm::EquivalenceClasses::member_iterator>; /// Check that aliasInfo for `v` exists and return a reference to it. EquivalenceClassRangeType getAliases(Value v) const; - /// Return true if the (ExtractSliceOp, InsertSliceOp) pair match (i.e. - /// equivalent operand / result and same offset/sizes/strides specification). - /// - /// This is one particular type of relationship between ops on tensors that - /// reduce to an equivalence on buffers. This should be generalized and - /// exposed as interfaces on the proper types. - bool areEquivalentExtractSliceOps(tensor::ExtractSliceOp st, - tensor::InsertSliceOp sti) const; - - /// Given sets of uses and writes, return true if there is a RaW conflict - /// under the assumption that all given reads/writes alias the same buffer and - /// that all given writes bufferize inplace. - bool hasReadAfterWriteInterference(const DenseSet &usesRead, - const DenseSet &usesWrite, - const DominanceInfo &domInfo) const; - /// Set of tensors that are known to bufferize to writable memory. llvm::DenseSet bufferizeToWritableMemory; /// Auxiliary structure to store all the values a given value aliases with. /// These are the conservative cases that can further decompose into /// "equivalent" buffer relationships. - llvm::EquivalenceClasses aliasInfo; + llvm::EquivalenceClasses aliasInfo; /// Auxiliary structure to store all the equivalent buffer classes. - llvm::EquivalenceClasses equivalentInfo; + llvm::EquivalenceClasses equivalentInfo; }; /// Analyze the `ops` to determine which OpResults are inplaceable. @@ -176,26 +124,43 @@ LogicalResult inPlaceAnalysis(SmallVector &ops, const DominanceInfo &domInfo, unsigned analysisFuzzerSeed = 0); +// TODO: Do not expose those functions in the header file. /// Default allocation function that is used by the comprehensive bufferization /// pass. The default currently creates a ranked memref using `memref.alloc`. -Optional defaultAllocationFn(OpBuilder &b, Location loc, - Value shapedValue); +Optional defaultAllocationFn(OpBuilder &b, Location loc, MemRefType type, + const SmallVector &dynShape); /// Default deallocation function that is used by the comprehensive /// bufferization pass. It expects to recieve back the value called from the /// `defaultAllocationFn`. void defaultDeallocationFn(OpBuilder &b, Location loc, Value allocatedBuffer); +/// Default memory copy function that is used by the comprehensive bufferization +/// pass. Creates a `linalg.copy` op. +void defaultMemCpyFn(OpBuilder &b, Location loc, Value from, Value to); + /// Callback functions that are used by the comprehensive bufferization pass to /// allocate/deallocate memory. These default to use the /// `defaultAllocationFn`/`defaultDeallocationFn`, but can be overridden by the /// caller. The `deallocationFn` is gauranteed to recieve the `Value` returned /// by the `allocationFn`. struct AllocationCallbacks { - std::function(OpBuilder &b, Location loc, Value shapedValue)> - allocationFn = defaultAllocationFn; - std::function deallocationFn = - defaultDeallocationFn; + using AllocationFn = std::function( + OpBuilder &, Location, MemRefType, const SmallVector &)>; + using DeallocationFn = std::function; + using MemCpyFn = std::function; + + AllocationCallbacks(AllocationFn allocFn, DeallocationFn deallocFn, + MemCpyFn copyFn) + : allocationFn(allocFn), deallocationFn(deallocFn), memCpyFn(copyFn) {} + + AllocationCallbacks() + : allocationFn(defaultAllocationFn), + deallocationFn(defaultDeallocationFn), memCpyFn(defaultMemCpyFn) {} + + AllocationFn allocationFn; + DeallocationFn deallocationFn; + MemCpyFn memCpyFn; }; /// Bufferize one particular op. @@ -207,7 +172,45 @@ bufferizeOp(Operation *op, BlockAndValueMapping &bvm, AllocationCallbacks allocationFns, DenseMap *bufferizedFunctionTypes = nullptr); +/// Register external models implemented for the `BufferizableOpInterface`. +void registerBufferizableOpInterfaceExternalModels(DialectRegistry ®istry); + +/// Try to eliminate InitTensorOps inside `funcOp`. +/// +/// * `rewriteFunc` generates the replacement for the InitTensorOp. +/// * Only InitTensorOps that are anchored on a matching OpOperand as per +/// `anchorMatchFunc` are considered. "Anchored" means that there is a path on +/// the reverse SSA use-def chain, starting from the OpOperand and always +/// following the aliasing OpOperand, that eventually ends at a single +/// InitTensorOp. +/// * The result of `rewriteFunc` must usually be analyzed for inplacability. +/// This analysis can be skipped with `skipAnalysis`. +LogicalResult initTensorElimination( + FuncOp funcOp, BufferizationAliasInfo &aliasInfo, DominanceInfo &domInfo, + std::function anchorMatchFunc, + std::function rewriteFunc, + bool skipAnalysis = false); + +/// Try to eliminate InitTensorOps inside funcOp that are anchored on an +/// InsertSliceOp, i.e., if it is eventually inserted into another tensor +/// (and some other conditions are met). +LogicalResult eliminateInsertSliceAnchoredInitTensorOps( + FuncOp funcOp, BufferizationAliasInfo &aliasInfo, DominanceInfo &domInfo); + +struct BufferizationOptions { + BufferizationOptions() + : allocationFns(std::make_unique()) {} + + std::unique_ptr allocationFns; + bool allowReturnMemref = false; + unsigned analysisFuzzerSeed = 0; + bool testAnalysisOnly = false; +}; + +LogicalResult runComprehensiveBufferize(ModuleOp moduleOp, + const BufferizationOptions &options); + } // namespace linalg } // namespace mlir -#endif // define MLIR_DIALECT_LINALG_TRANSFORMS_COMPREHENSIVE_BUFFERIZE_H +#endif // MLIR_DIALECT_LINALG_COMPREHENSIVEBUFFERIZE_COMPREHENSIVE_BUFFERIZE_H diff --git a/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h b/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h index bbaabac10cdb0..5e38ae3acebf0 100644 --- a/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h +++ b/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h @@ -46,7 +46,15 @@ void populateConvVectorizationPatterns( MLIRContext *context, SmallVectorImpl &patterns, ArrayRef tileSizes); -/// Populates patterns for vectorizing convolution ops. +/// Populates patterns to decompose high-D convolution ops into low-D ones. This +/// is a step in progressive lowering for convolution ops, afterwards we can +/// vectorize the low-D convolution ops. +void populateDecomposeConvolutionPatterns(RewritePatternSet &patterns, + PatternBenefit benefit = 1); + +/// Populates patterns for vectorizing low-D convolution ops. This is a step in +/// progressive lowering for convolution ops, it assume high-D convolution ops +/// were decomposed previously. void populateConvolutionVectorizationPatterns(RewritePatternSet &patterns, PatternBenefit benefit = 1); @@ -440,11 +448,18 @@ struct LinalgTransformationFilter { return addFilter( [](Operation *op) { return success(isa(op)); }); } + LinalgTransformationFilter &setMatchByDefault() { + matchByDefault = true; + return *this; + } private: SmallVector filters; SmallVector matchDisjunction; Optional replacement; + /// When set to true, if the attribute is not set, it will be treated as + /// a match. Default is false. + bool matchByDefault; }; using TileSizeComputationFunction = diff --git a/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h b/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h index b32d8e1c12b00..0924a5c59dcc7 100644 --- a/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h +++ b/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h @@ -212,6 +212,7 @@ class TileLoopNest { bool isEmpty(); /// Returns true if the tile loop nest invariants are satisfied: + /// - The `rootOp` has been tiled at least once. /// - The number of tile loop operations and dimensions match. /// - The innermost tile loop is the parent of `tiledOp`. /// - The tile loops are directly nested. @@ -233,8 +234,8 @@ class TileLoopNest { bool hasOtherUses(BlockArgument bbArg, tensor::ExtractSliceOp sliceOp); LinalgOp rootOp; - SmallVector loopOps; - SmallVector loopDims; + SmallVector tileLoopOps; + DenseMap> tiledRootAndFusedOpsLoops; }; /// Tiles `consumerOp` and fuses its dependencies if possible. Uses the diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td index ed04dc4ca395d..d552132b4d49e 100644 --- a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td +++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td @@ -148,6 +148,68 @@ def ScheduleModifier : StrEnumAttr<"ScheduleModifier", "OpenMP Schedule Modifier let cppNamespace = "::mlir::omp"; } +//===----------------------------------------------------------------------===// +// 2.8.1 Sections Construct +//===----------------------------------------------------------------------===// + +def SectionOp : OpenMP_Op<"section", [HasParent<"SectionsOp">]> { + let summary = "section directive"; + let description = [{ + A section operation encloses a region which represents one section in a + sections construct. A section op should always be surrounded by an + `omp.sections` operation. + }]; + let regions = (region AnyRegion:$region); + let assemblyFormat = "$region attr-dict"; +} + +def SectionsOp : OpenMP_Op<"sections", [AttrSizedOperandSegments]> { + let summary = "sections construct"; + let description = [{ + The sections construct is a non-iterative worksharing construct that + contains `omp.section` operations. The `omp.section` operations are to be + distributed among and executed by the threads in a team. Each `omp.section` + is executed once by one of the threads in the team in the context of its + implicit task. + + `private_vars`, `firstprivate_vars` and`lastprivate_vars` arguments are + variadic list of operands that specify the data sharing attributes of the + list of values. They are optional. + + Reductions can be performed in a sections construct by specifying reduction + accumulator variables in `reduction_vars` and symbols referring to reduction + declarations in the `reductions` attribute. Each reduction is identified + by the accumulator it uses and accumulators must not be repeated in the same + reduction. The `omp.reduction` operation accepts the accumulator and a + partial value which is considered to be produced by the section for the + given reduction. If multiple values are produced for the same accumulator, + i.e. there are multiple `omp.reduction`s, the last value is taken. The + reduction declaration specifies how to combine the values from each section + into the final value, which is available in the accumulator after all the + sections complete. + + The $allocators_vars and $allocate_vars parameters are a variadic list of values + that specify the memory allocator to be used to obtain storage for private values. + + The `nowait` attribute, when present, signifies that there should be no + implicit barrier at the end of the construct. + }]; + let arguments = (ins Variadic:$private_vars, + Variadic:$firstprivate_vars, + Variadic:$lastprivate_vars, + Variadic:$reduction_vars, + OptionalAttr:$reductions, + Variadic:$allocate_vars, + Variadic:$allocators_vars, + UnitAttr:$nowait); + + let regions = (region SizedRegion<1>:$region); + + let parser = [{ return parseSectionsOp(parser, result); }]; + let printer = [{ return printSectionsOp(p, *this); }]; + let verifier = [{ return verifySectionsOp(*this); }]; +} + //===----------------------------------------------------------------------===// // 2.9.2 Workshare Loop Construct //===----------------------------------------------------------------------===// diff --git a/mlir/include/mlir/Dialect/SPIRV/IR/SPIRVAtomicOps.td b/mlir/include/mlir/Dialect/SPIRV/IR/SPIRVAtomicOps.td index 122987a212d0e..406f6647a13d0 100644 --- a/mlir/include/mlir/Dialect/SPIRV/IR/SPIRVAtomicOps.td +++ b/mlir/include/mlir/Dialect/SPIRV/IR/SPIRVAtomicOps.td @@ -102,6 +102,77 @@ def SPV_AtomicAndOp : SPV_AtomicUpdateWithValueOp<"AtomicAnd", []> { // ----- +def SPV_AtomicCompareExchangeOp : SPV_Op<"AtomicCompareExchange", []> { + let summary = [{ + Perform the following steps atomically with respect to any other atomic + accesses within Scope to the same location: + }]; + + let description = [{ + 1) load through Pointer to get an Original Value, + + 2) get a New Value from Value only if Original Value equals Comparator, + and + + 3) store the New Value back through Pointer'only if 'Original Value + equaled Comparator. + + The instruction's result is the Original Value. + + Result Type must be an integer type scalar. + + Use Equal for the memory semantics of this instruction when Value and + Original Value compare equal. + + Use Unequal for the memory semantics of this instruction when Value and + Original Value compare unequal. Unequal must not be set to Release or + Acquire and Release. In addition, Unequal cannot be set to a stronger + memory-order then Equal. + + The type of Value must be the same as Result Type. The type of the + value pointed to by Pointer must be the same as Result Type. This type + must also match the type of Comparator. + + Memory is a memory Scope. + + + + ``` + atomic-compare-exchange-op ::= + `spv.AtomicCompareExchange` scope memory-semantics memory-semantics + ssa-use `,` ssa-use `,` ssa-use + `:` spv-pointer-type + ```mlir + + #### Example: + + ``` + %0 = spv.AtomicCompareExchange "Workgroup" "Acquire" "None" + %pointer, %value, %comparator + : !spv.ptr + ``` + }]; + + let arguments = (ins + SPV_AnyPtr:$pointer, + SPV_ScopeAttr:$memory_scope, + SPV_MemorySemanticsAttr:$equal_semantics, + SPV_MemorySemanticsAttr:$unequal_semantics, + SPV_Integer:$value, + SPV_Integer:$comparator + ); + + let results = (outs + SPV_Integer:$result + ); + + let parser = [{ return ::parseAtomicCompareExchangeImpl(parser, result); }]; + let printer = [{ return ::printAtomicCompareExchangeImpl(*this, p); }]; + let verifier = [{ return ::verifyAtomicCompareExchangeImpl(*this); }]; +} + +// ----- + def SPV_AtomicCompareExchangeWeakOp : SPV_Op<"AtomicCompareExchangeWeak", []> { let summary = "Deprecated (use OpAtomicCompareExchange)."; @@ -147,6 +218,62 @@ def SPV_AtomicCompareExchangeWeakOp : SPV_Op<"AtomicCompareExchangeWeak", []> { let results = (outs SPV_Integer:$result ); + + let parser = [{ return ::parseAtomicCompareExchangeImpl(parser, result); }]; + let printer = [{ return ::printAtomicCompareExchangeImpl(*this, p); }]; + let verifier = [{ return ::verifyAtomicCompareExchangeImpl(*this); }]; +} + +// ----- + +def SPV_AtomicExchangeOp : SPV_Op<"AtomicExchange", []> { + let summary = [{ + Perform the following steps atomically with respect to any other atomic + accesses within Scope to the same location: + }]; + + let description = [{ + 1) load through Pointer to get an Original Value, + + 2) get a New Value from copying Value, and + + 3) store the New Value back through Pointer. + + The instruction's result is the Original Value. + + Result Type must be a scalar of integer type or floating-point type. + + The type of Value must be the same as Result Type. The type of the + value pointed to by Pointer must be the same as Result Type. + + Memory is a memory Scope. + + + + ``` + atomic-exchange-op ::= + `spv.AtomicCompareExchange` scope memory-semantics + ssa-use `,` ssa-use `:` spv-pointer-type + ```mlir + + #### Example: + + ``` + %0 = spv.AtomicExchange "Workgroup" "Acquire" %pointer, %value, + : !spv.ptr + ``` + }]; + + let arguments = (ins + SPV_AnyPtr:$pointer, + SPV_ScopeAttr:$memory_scope, + SPV_MemorySemanticsAttr:$semantics, + SPV_Numerical:$value + ); + + let results = (outs + SPV_Numerical:$result + ); } // ----- diff --git a/mlir/include/mlir/Dialect/SPIRV/IR/SPIRVBase.td b/mlir/include/mlir/Dialect/SPIRV/IR/SPIRVBase.td index 1bbe01a5e8a1b..3b54c80ba0da6 100644 --- a/mlir/include/mlir/Dialect/SPIRV/IR/SPIRVBase.td +++ b/mlir/include/mlir/Dialect/SPIRV/IR/SPIRVBase.td @@ -3352,6 +3352,8 @@ def SPV_OC_OpBitReverse : I32EnumAttrCase<"OpBitReverse", 204>; def SPV_OC_OpBitCount : I32EnumAttrCase<"OpBitCount", 205>; def SPV_OC_OpControlBarrier : I32EnumAttrCase<"OpControlBarrier", 224>; def SPV_OC_OpMemoryBarrier : I32EnumAttrCase<"OpMemoryBarrier", 225>; +def SPV_OC_OpAtomicExchange : I32EnumAttrCase<"OpAtomicExchange", 229>; +def SPV_OC_OpAtomicCompareExchange : I32EnumAttrCase<"OpAtomicCompareExchange", 230>; def SPV_OC_OpAtomicCompareExchangeWeak : I32EnumAttrCase<"OpAtomicCompareExchangeWeak", 231>; def SPV_OC_OpAtomicIIncrement : I32EnumAttrCase<"OpAtomicIIncrement", 232>; def SPV_OC_OpAtomicIDecrement : I32EnumAttrCase<"OpAtomicIDecrement", 233>; @@ -3442,6 +3444,7 @@ def SPV_OpcodeAttr : SPV_OC_OpBitwiseAnd, SPV_OC_OpNot, SPV_OC_OpBitFieldInsert, SPV_OC_OpBitFieldSExtract, SPV_OC_OpBitFieldUExtract, SPV_OC_OpBitReverse, SPV_OC_OpBitCount, SPV_OC_OpControlBarrier, SPV_OC_OpMemoryBarrier, + SPV_OC_OpAtomicExchange, SPV_OC_OpAtomicCompareExchange, SPV_OC_OpAtomicCompareExchangeWeak, SPV_OC_OpAtomicIIncrement, SPV_OC_OpAtomicIDecrement, SPV_OC_OpAtomicIAdd, SPV_OC_OpAtomicISub, SPV_OC_OpAtomicSMin, SPV_OC_OpAtomicUMin, SPV_OC_OpAtomicSMax, diff --git a/mlir/include/mlir/Dialect/Tosa/IR/TosaOpBase.td b/mlir/include/mlir/Dialect/Tosa/IR/TosaOpBase.td index b1ee907e9d850..6b35941ba5fbe 100644 --- a/mlir/include/mlir/Dialect/Tosa/IR/TosaOpBase.td +++ b/mlir/include/mlir/Dialect/Tosa/IR/TosaOpBase.td @@ -174,8 +174,8 @@ def Tosa_UnaryOpQuantInfoBuilder : OpBuilder< buildUnaryOpWithQuantInfo($_builder, $_state, outputType, input); }]>; -// This builder is called on the TOSA pad operator that needs to create its own -// OptionalAttr quantization_attr parameter to scale the padding values +// These builders are called on the TOSA pad operator that needs to create its +// own OptionalAttr quantization_attr parameter to scale the padding values // correctly. def Tosa_PadOpQuantInfoBuilder : OpBuilder< (ins "Type":$outputType, "Value":$input, "Value":$paddings), @@ -184,6 +184,14 @@ def Tosa_PadOpQuantInfoBuilder : OpBuilder< input, paddings); }]>; +def Tosa_ExplicitValuePadOpQuantInfoBuilder : OpBuilder< + (ins "Type":$outputType, "Value":$input, "Value":$paddings, + "Value":$pad_value), + [{ + buildExplicitValuePadOpWithQuantInfo($_builder, $_state, outputType, + input, paddings, pad_value); + }]>; + //===----------------------------------------------------------------------===// // TOSA Operator. //===----------------------------------------------------------------------===// diff --git a/mlir/include/mlir/Dialect/Tosa/IR/TosaOps.td b/mlir/include/mlir/Dialect/Tosa/IR/TosaOps.td index b57e8b2fb8cbb..bdc7ac13e675a 100644 --- a/mlir/include/mlir/Dialect/Tosa/IR/TosaOps.td +++ b/mlir/include/mlir/Dialect/Tosa/IR/TosaOps.td @@ -321,9 +321,11 @@ def Tosa_ClampOp : Tosa_Op<"clamp", [ let summary = "Computes clamp(features, min, max)."; let description = [{ - Clamp to an arbitrary minimum and maximum value. Note that the maximum and - minimum values are specified as signed quantized values, no scaling happens - before or after this operation. + Clamp to an arbitrary minimum and maximum value. + Maximum and minimum values are specified as values in the range of the + input type. + No zero point subtraction is done to the values, thus to clamp to the zero + point value, the zero point itself should be supplied as the minimum value. }]; let arguments = (ins @@ -442,6 +444,8 @@ def Tosa_AddOp : Tosa_Op<"add", [ let results = (outs Tosa_Tensor:$output ); + + let hasCanonicalizer = 1; } //===----------------------------------------------------------------------===// @@ -1392,15 +1396,16 @@ def Tosa_PadOp : Tosa_Op<"pad", [ DeclareOpInterfaceMethods, NoSideEffect]> { - let summary = "Pads a tensor with zeros."; + let summary = "Pads a tensor with value specified."; let description = [{ - Zero-pads a tensor along borders of each dimension. + Pads a tensor along borders of each dimension with pad_value. }]; let arguments = (ins Tosa_RankedTensor:$input1, Tosa_Int32Or64Tensor:$padding, + Optional:$pad_const, OptionalAttr:$quantization_info ); @@ -1408,7 +1413,8 @@ def Tosa_PadOp : Tosa_Op<"pad", [ Tosa_RankedTensor:$output ); - let builders = [Tosa_PadOpQuantInfoBuilder]; + let builders = [Tosa_PadOpQuantInfoBuilder, + Tosa_ExplicitValuePadOpQuantInfoBuilder]; } //===----------------------------------------------------------------------===// diff --git a/mlir/include/mlir/Dialect/Tosa/IR/TosaTypesBase.td b/mlir/include/mlir/Dialect/Tosa/IR/TosaTypesBase.td index c2888ea56dd80..46a4cbc0623cd 100644 --- a/mlir/include/mlir/Dialect/Tosa/IR/TosaTypesBase.td +++ b/mlir/include/mlir/Dialect/Tosa/IR/TosaTypesBase.td @@ -117,6 +117,9 @@ class Tosa_TensorOfOrNone allowedTypes, string description = ""> : // Tensor types with constrained ranks. //===----------------------------------------------------------------------===// +// Rank-0 (scalar) tensor +def Tosa_ScalarTensor : TensorRankOf<[Tosa_AnyNumber], [0]>; + // We include unranked tensors as a supported type for all possible tosa // Tensors as unranked does not guarantee invalid. If unranked tensors exist // they should be shape propagate used Tosa's shape inference pass and verified diff --git a/mlir/include/mlir/Dialect/Vector/VectorRewritePatterns.h b/mlir/include/mlir/Dialect/Vector/VectorRewritePatterns.h index 587f334bc0473..433ab8df0571e 100644 --- a/mlir/include/mlir/Dialect/Vector/VectorRewritePatterns.h +++ b/mlir/include/mlir/Dialect/Vector/VectorRewritePatterns.h @@ -29,6 +29,8 @@ enum class VectorTransposeLowering { /// Lower 2-D transpose to `vector.flat_transpose`, maps 1-1 to LLVM matrix /// intrinsics. Flat = 1, + /// Lower 2-D transpose to `vector.shuffle`. + Shuffle = 2, }; /// Enum to control the lowering of `vector.multi_reduction` operations. enum class VectorMultiReductionLowering { diff --git a/mlir/include/mlir/ExecutionEngine/SparseTensorUtils.h b/mlir/include/mlir/ExecutionEngine/SparseTensorUtils.h new file mode 100644 index 0000000000000..4361fc7d43e75 --- /dev/null +++ b/mlir/include/mlir/ExecutionEngine/SparseTensorUtils.h @@ -0,0 +1,55 @@ +//===- SparseTensorUtils.h - Enums shared with the runtime ------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This header file defines several enums shared between +// Transforms/SparseTensorConversion.cpp and ExecutionEngine/SparseUtils.cpp +// +//===----------------------------------------------------------------------===// + +#ifndef MLIR_EXECUTIONENGINE_SPARSETENSORUTILS_H_ +#define MLIR_EXECUTIONENGINE_SPARSETENSORUTILS_H_ + +#include + +extern "C" { + +/// Encoding of the elemental type, for "overloading" @newSparseTensor. +enum class OverheadType : uint32_t { kU64 = 1, kU32 = 2, kU16 = 3, kU8 = 4 }; + +/// Encoding of the elemental type, for "overloading" @newSparseTensor. +enum class PrimaryType : uint32_t { + kF64 = 1, + kF32 = 2, + kI64 = 3, + kI32 = 4, + kI16 = 5, + kI8 = 6 +}; + +/// The actions performed by @newSparseTensor. +enum class Action : uint32_t { + kEmpty = 0, + kFromFile = 1, + kFromCOO = 2, + kEmptyCOO = 3, + kToCOO = 4, + kToIterator = 5 +}; + +/// This enum mimics `SparseTensorEncodingAttr::DimLevelType` for +/// breaking dependency cycles. `SparseTensorEncodingAttr::DimLevelType` +/// is the source of truth and this enum should be kept consistent with it. +enum class DimLevelType : uint8_t { + kDense = 0, + kCompressed = 1, + kSingleton = 2 +}; + +} // extern "C" + +#endif // MLIR_EXECUTIONENGINE_SPARSETENSORUTILS_H_ diff --git a/mlir/include/mlir/IR/BuiltinAttributes.td b/mlir/include/mlir/IR/BuiltinAttributes.td index 51ac32d9a5649..01af84c421e97 100644 --- a/mlir/include/mlir/IR/BuiltinAttributes.td +++ b/mlir/include/mlir/IR/BuiltinAttributes.td @@ -389,6 +389,10 @@ def Builtin_DictionaryAttr : Builtin_Attr<"Dictionary", [ Optional getNamed(StringRef name) const; Optional getNamed(Identifier name) const; + /// Return whether the specified attribute is present. + bool contains(StringRef name) const; + bool contains(Identifier name) const; + /// Support range iteration. using iterator = llvm::ArrayRef::iterator; iterator begin() const; diff --git a/mlir/include/mlir/IR/DialectImplementation.h b/mlir/include/mlir/IR/DialectImplementation.h index 728e24605c29f..9302eb9146d4b 100644 --- a/mlir/include/mlir/IR/DialectImplementation.h +++ b/mlir/include/mlir/IR/DialectImplementation.h @@ -47,6 +47,74 @@ class DialectAsmParser : public AsmParser { virtual StringRef getFullSymbolSpec() const = 0; }; +//===----------------------------------------------------------------------===// +// Parse Fields +//===----------------------------------------------------------------------===// + +/// Provide a template class that can be specialized by users to dispatch to +/// parsers. Auto-generated parsers generate calls to `FieldParser::parse`, +/// where `T` is the parameter storage type, to parse custom types. +template +struct FieldParser; + +/// Parse an attribute. +template +struct FieldParser< + AttributeT, std::enable_if_t::value, + AttributeT>> { + static FailureOr parse(DialectAsmParser &parser) { + AttributeT value; + if (parser.parseAttribute(value)) + return failure(); + return value; + } +}; + +/// Parse any integer. +template +struct FieldParser::value, IntT>> { + static FailureOr parse(DialectAsmParser &parser) { + IntT value; + if (parser.parseInteger(value)) + return failure(); + return value; + } +}; + +/// Parse a string. +template <> +struct FieldParser { + static FailureOr parse(DialectAsmParser &parser) { + std::string value; + if (parser.parseString(&value)) + return failure(); + return value; + } +}; + +/// Parse any container that supports back insertion as a list. +template +struct FieldParser< + ContainerT, std::enable_if_t::value, + ContainerT>> { + using ElementT = typename ContainerT::value_type; + static FailureOr parse(DialectAsmParser &parser) { + ContainerT elements; + auto elementParser = [&]() { + auto element = FieldParser::parse(parser); + if (failed(element)) + return failure(); + elements.push_back(element.getValue()); + return success(); + }; + if (parser.parseCommaSeparatedList(elementParser)) + return failure(); + return elements; + } +}; + } // end namespace mlir -#endif +#endif // MLIR_IR_DIALECTIMPLEMENTATION_H diff --git a/mlir/include/mlir/IR/OpBase.td b/mlir/include/mlir/IR/OpBase.td index ec0d5355dcf18..37bf1d233c2b3 100644 --- a/mlir/include/mlir/IR/OpBase.td +++ b/mlir/include/mlir/IR/OpBase.td @@ -1187,11 +1187,11 @@ class EnumAttrCaseInfo { } // An enum attribute case stored with StringAttr. -class StrEnumAttrCase : - EnumAttrCaseInfo, +class StrEnumAttrCase : + EnumAttrCaseInfo, StringBasedAttr< - CPred<"$_self.cast<::mlir::StringAttr>().getValue() == \"" # sym # "\"">, - "case " # sym>; + CPred<"$_self.cast<::mlir::StringAttr>().getValue() == \"" # str # "\"">, + "case " # str>; // An enum attribute case stored with IntegerAttr, which has an integer value, // its representation as a string and a C++ symbol name which may be different. @@ -2886,6 +2886,11 @@ class AttrOrTypeDef defTraits, code printer = ?; code parser = ?; + // Custom assembly format. Requires 'mnemonic' to be specified. Cannot be + // specified at the same time as either 'printer' or 'parser'. The generated + // printer requires 'genAccessors' to be true. + string assemblyFormat = ?; + // If set, generate accessors for each parameter. bit genAccessors = 1; @@ -2964,10 +2969,22 @@ class AttrOrTypeParameter { string cppType = type; // The C++ type of the accessor for this parameter. string cppAccessorType = !if(!empty(accessorType), type, accessorType); + // The C++ storage type of of this parameter if it is a reference, e.g. + // `std::string` for `StringRef` or `SmallVector` for `ArrayRef`. + string cppStorageType = ?; // One-line human-readable description of the argument. string summary = desc; // The format string for the asm syntax (documentation only). string syntax = ?; + // The default parameter parser is `::mlir::parseField($_parser)`, which + // returns `FailureOr`. Overload `parseField` to support parsing for your + // type. Or you can provide a customer printer. For attributes, "$_type" will + // be replaced with the required attribute type. + string parser = ?; + // The default parameter printer is `$_printer << $_self`. Overload the stream + // operator of `DialectAsmPrinter` as necessary to print your type. Or you can + // provide a custom printer. + string printer = ?; } class AttrParameter : AttrOrTypeParameter; @@ -2978,6 +2995,8 @@ class TypeParameter class StringRefParameter : AttrOrTypeParameter<"::llvm::StringRef", desc> { let allocator = [{$_dst = $_allocator.copyInto($_self);}]; + let printer = [{$_printer << '"' << $_self << '"';}]; + let cppStorageType = "std::string"; } // For APFloats, which require comparison. @@ -2990,6 +3009,7 @@ class APFloatParameter : class ArrayRefParameter : AttrOrTypeParameter<"::llvm::ArrayRef<" # arrayOf # ">", desc> { let allocator = [{$_dst = $_allocator.copyInto($_self);}]; + let cppStorageType = "::llvm::SmallVector<" # arrayOf # ">"; } // For classes which require allocation and have their own allocateInto method. diff --git a/mlir/include/mlir/IR/OpImplementation.h b/mlir/include/mlir/IR/OpImplementation.h index db1f7a3c071d2..34e6cd08ea3c7 100644 --- a/mlir/include/mlir/IR/OpImplementation.h +++ b/mlir/include/mlir/IR/OpImplementation.h @@ -182,10 +182,10 @@ operator<<(AsmPrinterT &p, const TypeRange &types) { llvm::interleaveComma(types, p); return p; } -template +template inline std::enable_if_t::value, AsmPrinterT &> -operator<<(AsmPrinterT &p, ArrayRef types) { +operator<<(AsmPrinterT &p, ArrayRef types) { llvm::interleaveComma(types, p); return p; } diff --git a/mlir/include/mlir/IR/Operation.h b/mlir/include/mlir/IR/Operation.h index 0f74021184b3e..15f8e6b061468 100644 --- a/mlir/include/mlir/IR/Operation.h +++ b/mlir/include/mlir/IR/Operation.h @@ -27,8 +27,8 @@ namespace mlir { /// 'Block' class. class alignas(8) Operation final : public llvm::ilist_node_with_parent, - private llvm::TrailingObjects { + private llvm::TrailingObjects { public: /// Create a new Operation with the specific fields. static Operation *create(Location location, OperationName name, @@ -244,7 +244,10 @@ class alignas(8) Operation final operand_iterator operand_end() { return getOperands().end(); } /// Returns an iterator on the underlying Value's. - operand_range getOperands() { return operand_range(this); } + operand_range getOperands() { + MutableArrayRef operands = getOpOperands(); + return OperandRange(operands.data(), operands.size()); + } MutableArrayRef getOpOperands() { return LLVM_LIKELY(hasOperandStorage) ? getOperandStorage().getOperands() @@ -329,8 +332,8 @@ class alignas(8) Operation final /// Return true if the operation has an attribute with the provided name, /// false otherwise. - bool hasAttr(Identifier name) { return static_cast(getAttr(name)); } - bool hasAttr(StringRef name) { return static_cast(getAttr(name)); } + bool hasAttr(Identifier name) { return attrs.contains(name); } + bool hasAttr(StringRef name) { return attrs.contains(name); } template bool hasAttrOfType(NameT &&name) { return static_cast( @@ -698,8 +701,11 @@ class alignas(8) Operation final friend class llvm::ilist_node_with_parent; // This stuff is used by the TrailingObjects template. - friend llvm::TrailingObjects; + friend llvm::TrailingObjects; + size_t numTrailingObjects(OverloadToken) const { + return hasOperandStorage ? 1 : 0; + } size_t numTrailingObjects(OverloadToken) const { return numSuccs; } diff --git a/mlir/include/mlir/IR/OperationSupport.h b/mlir/include/mlir/IR/OperationSupport.h index 5c41123878982..ca3a1a61f85f1 100644 --- a/mlir/include/mlir/IR/OperationSupport.h +++ b/mlir/include/mlir/IR/OperationSupport.h @@ -245,6 +245,63 @@ class AbstractOperation { ArrayRef attributeNames; }; +//===----------------------------------------------------------------------===// +// Attribute Dictionary-Like Interface +//===----------------------------------------------------------------------===// + +/// Attribute collections provide a dictionary-like interface. Define common +/// lookup functions. +namespace impl { + +/// Unsorted string search or identifier lookups are linear scans. +template +std::pair findAttrUnsorted(IteratorT first, IteratorT last, + NameT name) { + for (auto it = first; it != last; ++it) + if (it->first == name) + return {it, true}; + return {last, false}; +} + +/// Using llvm::lower_bound requires an extra string comparison to check whether +/// the returned iterator points to the found element or whether it indicates +/// the lower bound. Skip this redundant comparison by checking if `compare == +/// 0` during the binary search. +template +std::pair findAttrSorted(IteratorT first, IteratorT last, + StringRef name) { + ptrdiff_t length = std::distance(first, last); + + while (length > 0) { + ptrdiff_t half = length / 2; + IteratorT mid = first + half; + int compare = mid->first.strref().compare(name); + if (compare < 0) { + first = mid + 1; + length = length - half - 1; + } else if (compare > 0) { + length = half; + } else { + return {mid, true}; + } + } + return {first, false}; +} + +/// Identifier lookups on large attribute lists will switch to string binary +/// search. String binary searches become significantly faster than linear scans +/// with the identifier when the attribute list becomes very large. +template +std::pair findAttrSorted(IteratorT first, IteratorT last, + Identifier name) { + constexpr unsigned kSmallAttributeList = 16; + if (std::distance(first, last) > kSmallAttributeList) + return findAttrSorted(first, last, name.strref()); + return findAttrUnsorted(first, last, name); +} + +} // end namespace impl + //===----------------------------------------------------------------------===// // NamedAttrList //===----------------------------------------------------------------------===// @@ -253,9 +310,10 @@ class AbstractOperation { /// and does some basic work to remain sorted. class NamedAttrList { public: + using iterator = SmallVectorImpl::iterator; using const_iterator = SmallVectorImpl::const_iterator; - using const_reference = const NamedAttribute &; using reference = NamedAttribute &; + using const_reference = const NamedAttribute &; using size_type = size_t; NamedAttrList() : dictionarySorted({}, true) {} @@ -346,6 +404,8 @@ class NamedAttrList { Attribute erase(Identifier name); Attribute erase(StringRef name); + iterator begin() { return attrs.begin(); } + iterator end() { return attrs.end(); } const_iterator begin() const { return attrs.begin(); } const_iterator end() const { return attrs.end(); } @@ -359,6 +419,14 @@ class NamedAttrList { /// Erase the attribute at the given iterator position. Attribute eraseImpl(SmallVectorImpl::iterator it); + /// Lookup an attribute in the list. + template + static auto findAttr(AttrListT &attrs, NameT name) { + return attrs.isSorted() + ? impl::findAttrSorted(attrs.begin(), attrs.end(), name) + : impl::findAttrUnsorted(attrs.begin(), attrs.end(), name); + } + // These are marked mutable as they may be modified (e.g., sorted) mutable SmallVector attrs; // Pair with cached DictionaryAttr and status of whether attrs is sorted. @@ -520,47 +588,12 @@ struct OperationState { //===----------------------------------------------------------------------===// namespace detail { -/// This class contains the information for a trailing operand storage. -struct TrailingOperandStorage final - : public llvm::TrailingObjects { -#if defined(BYTE_ORDER) && defined(BIG_ENDIAN) && (BYTE_ORDER == BIG_ENDIAN) - TrailingOperandStorage() : numOperands(0), capacity(0), reserved(0) {} -#else - TrailingOperandStorage() : reserved(0), capacity(0), numOperands(0) {} -#endif - ~TrailingOperandStorage() { - for (auto &operand : getOperands()) - operand.~OpOperand(); - } - - /// Return the operands held by this storage. - MutableArrayRef getOperands() { - return {getTrailingObjects(), numOperands}; - } - -#if defined(BYTE_ORDER) && defined(BIG_ENDIAN) && (BYTE_ORDER == BIG_ENDIAN) - /// The number of operands within the storage. - unsigned numOperands; - /// The total capacity number of operands that the storage can hold. - unsigned capacity : 31; - /// We reserve a range of bits for use by the operand storage. - unsigned reserved : 1; -#else - /// We reserve a range of bits for use by the operand storage. - unsigned reserved : 1; - /// The total capacity number of operands that the storage can hold. - unsigned capacity : 31; - /// The number of operands within the storage. - unsigned numOperands; -#endif -}; - /// This class handles the management of operation operands. Operands are /// stored either in a trailing array, or a dynamically resizable vector. -class OperandStorage final - : private llvm::TrailingObjects { +class alignas(8) OperandStorage { public: - OperandStorage(Operation *owner, ValueRange values); + OperandStorage(Operation *owner, OpOperand *trailingOperands, + ValueRange values); ~OperandStorage(); /// Replace the operands contained in the storage with the ones provided in @@ -581,62 +614,25 @@ class OperandStorage final void eraseOperands(const llvm::BitVector &eraseIndices); /// Get the operation operands held by the storage. - MutableArrayRef getOperands() { - return getStorage().getOperands(); - } + MutableArrayRef getOperands() { return {operandStorage, size()}; } /// Return the number of operands held in the storage. - unsigned size() { return getStorage().numOperands; } - - /// Returns the additional size necessary for allocating this object. - static size_t additionalAllocSize(unsigned numOperands) { - return additionalSizeToAlloc(numOperands); - } + unsigned size() { return numOperands; } private: - /// Pointer type traits for the storage pointer that ensures that we use the - /// lowest bit for the storage pointer. - struct StoragePointerLikeTypeTraits - : llvm::PointerLikeTypeTraits { - static constexpr int NumLowBitsAvailable = 1; - }; - /// Resize the storage to the given size. Returns the array containing the new /// operands. MutableArrayRef resize(Operation *owner, unsigned newSize); - /// Returns the current internal storage instance. - TrailingOperandStorage &getStorage() { - return LLVM_UNLIKELY(isDynamicStorage()) ? getDynamicStorage() - : getInlineStorage(); - } - - /// Returns the storage container if the storage is inline. - TrailingOperandStorage &getInlineStorage() { - assert(!isDynamicStorage() && "expected storage to be inline"); - return inlineStorage; - } - - /// Returns the storage container if this storage is dynamic. - TrailingOperandStorage &getDynamicStorage() { - assert(isDynamicStorage() && "expected dynamic storage"); - return *dynamicStorage.getPointer(); - } - - /// Returns true if the storage is currently dynamic. - bool isDynamicStorage() const { return dynamicStorage.getInt(); } - - /// The current representation of the storage. This is either a - /// InlineOperandStorage, or a pointer to a InlineOperandStorage. - union { - TrailingOperandStorage inlineStorage; - llvm::PointerIntPair - dynamicStorage; - }; - - /// This stuff is used by the TrailingObjects template. - friend llvm::TrailingObjects; + /// The total capacity number of operands that the storage can hold. + unsigned capacity : 31; + /// A flag indicating if the operand storage was dynamically allocated, as + /// opposed to inlined into the owning operation. + unsigned isStorageDynamic : 1; + /// The number of operands within the storage. + unsigned numOperands; + /// A pointer to the operand storage. + OpOperand *operandStorage; }; } // end namespace detail @@ -718,7 +714,6 @@ class OperandRange final : public llvm::detail::indexed_accessor_range_base< OperandRange, OpOperand *, Value, Value, Value> { public: using RangeBaseT::RangeBaseT; - OperandRange(Operation *op); /// Returns the types of the values within this range. using type_iterator = ValueTypeIterator; diff --git a/mlir/include/mlir/IR/SymbolTable.h b/mlir/include/mlir/IR/SymbolTable.h index 07a8f3fbb2dbf..3950fee156abd 100644 --- a/mlir/include/mlir/IR/SymbolTable.h +++ b/mlir/include/mlir/IR/SymbolTable.h @@ -49,8 +49,9 @@ class SymbolTable { /// Insert a new symbol into the table, and rename it as necessary to avoid /// collisions. Also insert at the specified location in the body of the /// associated operation if it is not already there. It is asserted that the - /// symbol is not inside another operation. - void insert(Operation *symbol, Block::iterator insertPt = {}); + /// symbol is not inside another operation. Return the name of the symbol + /// after insertion as attribute. + StringAttr insert(Operation *symbol, Block::iterator insertPt = {}); /// Return the name of the attribute used for symbol names. static StringRef getSymbolAttrName() { return "sym_name"; } diff --git a/mlir/include/mlir/Pass/AnalysisManager.h b/mlir/include/mlir/Pass/AnalysisManager.h index 21318b0097a49..d5840818dffc4 100644 --- a/mlir/include/mlir/Pass/AnalysisManager.h +++ b/mlir/include/mlir/Pass/AnalysisManager.h @@ -212,7 +212,7 @@ class AnalysisMap { return static_cast &>(*it->second).analysis; } - /// Construct analysis using two arguments contructor (OpT, AnalysisManager) + /// Construct analysis using two arguments constructor (OpT, AnalysisManager) template ::value> * = nullptr> @@ -220,7 +220,7 @@ class AnalysisMap { return std::make_unique>(op, am); } - /// Construct analysis using single argument contructor (OpT) + /// Construct analysis using single argument constructor (OpT) template ::value> * = nullptr> diff --git a/mlir/include/mlir/Reducer/ReductionNode.h b/mlir/include/mlir/Reducer/ReductionNode.h index a43b2f1008561..6cdbd2354810a 100644 --- a/mlir/include/mlir/Reducer/ReductionNode.h +++ b/mlir/include/mlir/Reducer/ReductionNode.h @@ -160,7 +160,7 @@ class ReductionNode { Tester::Interestingness interesting; /// `ranges` represents the selected subset of operations in the region. We - /// implictly number each operation in the region and ReductionTreePass will + /// implicitly number each operation in the region and ReductionTreePass will /// apply reducer patterns on the operation falls into the `ranges`. We will /// generate new ReductionNode with subset of `ranges` to see if we can do /// further reduction. we may split the element in the `ranges` so that we can diff --git a/mlir/include/mlir/Rewrite/FrozenRewritePatternSet.h b/mlir/include/mlir/Rewrite/FrozenRewritePatternSet.h index ea09505a488e0..00c62163c23bc 100644 --- a/mlir/include/mlir/Rewrite/FrozenRewritePatternSet.h +++ b/mlir/include/mlir/Rewrite/FrozenRewritePatternSet.h @@ -42,7 +42,7 @@ class FrozenRewritePatternSet { /// `disabledPatternLabels` is a set of labels used to filter out input /// patterns with a label in this set. `enabledPatternLabels` is a set of /// labels used to filter out input patterns that do not have one of the - /// lables in this set. + /// labels in this set. FrozenRewritePatternSet( RewritePatternSet &&patterns, ArrayRef disabledPatternLabels = llvm::None, diff --git a/mlir/include/mlir/Support/DebugAction.h b/mlir/include/mlir/Support/DebugAction.h index 8a2f88a1f63c1..74f0f4c47b22e 100644 --- a/mlir/include/mlir/Support/DebugAction.h +++ b/mlir/include/mlir/Support/DebugAction.h @@ -6,8 +6,8 @@ // //===----------------------------------------------------------------------===// // -// This file contains defintions for the debug action framework. This framework -// allows for external entites to control certain actions taken by the compiler +// This file contains definitions for the debug action framework. This framework +// allows for external entities to control certain actions taken by the compiler // by registering handler functions. A debug action handler provides the // internal implementation for the various queries on a debug action, such as // whether it should execute or not. @@ -64,7 +64,7 @@ class DebugActionManager { /// This class represents a generic action handler. A generic handler allows /// for handling any action type. Handlers of this type are useful for - /// implementing general functionality that doesn’t necessarily need to + /// implementing general functionality that doesn't necessarily need to /// interpret the exact action parameters, or can rely on an external /// interpreter (such as the user). Given that these handlers are generic, /// they take a set of opaque parameters that try to map the context of the @@ -90,7 +90,7 @@ class DebugActionManager { /// Register the given action handler with the manager. void registerActionHandler(std::unique_ptr handler) { // The manager is always disabled if built without debug. -#ifndef NDEBUG +#if LLVM_ENABLE_ABI_BREAKING_CHECKS actionHandlers.emplace_back(std::move(handler)); #endif } @@ -109,7 +109,7 @@ class DebugActionManager { template bool shouldExecute(Args &&... args) { // The manager is always disabled if built without debug. -#ifdef NDEBUG +#if !LLVM_ENABLE_ABI_BREAKING_CHECKS return true; #else // Invoke the `shouldExecute` method on the provided handler. @@ -127,7 +127,7 @@ class DebugActionManager { private: // The manager is always disabled if built without debug. -#ifndef NDEBUG +#if LLVM_ENABLE_ABI_BREAKING_CHECKS //===--------------------------------------------------------------------===// // Query to Handler Dispatch //===--------------------------------------------------------------------===// @@ -175,7 +175,7 @@ class DebugActionManager { /// A debug action is a specific action that is to be taken by the compiler, /// that can be toggled and controlled by an external user. There are no -/// constraints on the granulity of an action, it could be as simple as +/// constraints on the granularity of an action, it could be as simple as /// "perform this fold" and as complex as "run this pass pipeline". Via template /// parameters `ParameterTs`, a user may provide the set of argument types that /// are provided when handling a query on this action. Derived classes are diff --git a/mlir/include/mlir/Support/MlirOptMain.h b/mlir/include/mlir/Support/MlirOptMain.h index 4ae3535d4c408..51a26d08341a4 100644 --- a/mlir/include/mlir/Support/MlirOptMain.h +++ b/mlir/include/mlir/Support/MlirOptMain.h @@ -27,6 +27,12 @@ class MemoryBuffer; namespace mlir { class DialectRegistry; class PassPipelineCLParser; +class PassManager; + +/// This defines the function type used to setup the pass manager. This can be +/// used to pass in a callback to setup a default pass pipeline to be applied on +/// the loaded IR. +using PassPipelineFn = llvm::function_ref; /// Perform the core processing behind `mlir-opt`: /// - outputStream is the stream where the resulting IR is printed. @@ -52,6 +58,17 @@ LogicalResult MlirOptMain(llvm::raw_ostream &outputStream, bool allowUnregisteredDialects, bool preloadDialectsInContext = false); +/// Support a callback to setup the pass manager. +/// - passManagerSetupFn is the callback invoked to setup the pass manager to +/// apply on the loaded IR. +LogicalResult MlirOptMain(llvm::raw_ostream &outputStream, + std::unique_ptr buffer, + PassPipelineFn passManagerSetupFn, + DialectRegistry ®istry, bool splitInputFile, + bool verifyDiagnostics, bool verifyPasses, + bool allowUnregisteredDialects, + bool preloadDialectsInContext = false); + /// Implementation for tools like `mlir-opt`. /// - toolName is used for the header displayed by `--help`. /// - registry should contain all the dialects that can be parsed in the source. diff --git a/mlir/include/mlir/TableGen/AttrOrTypeDef.h b/mlir/include/mlir/TableGen/AttrOrTypeDef.h index 2029c0e624cd3..09294c2fa8081 100644 --- a/mlir/include/mlir/TableGen/AttrOrTypeDef.h +++ b/mlir/include/mlir/TableGen/AttrOrTypeDef.h @@ -101,6 +101,9 @@ class AttrOrTypeDef { // None. Otherwise, returns the contents of that code block. Optional getParserCode() const; + // Returns the custom assembly format, if one was specified. + Optional getAssemblyFormat() const; + // Returns true if the accessors based on the parameters should be generated. bool genAccessors() const; @@ -199,6 +202,15 @@ class AttrOrTypeParameter { // Get the C++ accessor type of this parameter. StringRef getCppAccessorType() const; + // Get the C++ storage type of this parameter. + StringRef getCppStorageType() const; + + // Get an optional C++ parameter parser. + Optional getParser() const; + + // Get an optional C++ parameter printer. + Optional getPrinter() const; + // Get a description of this parameter for documentation purposes. Optional getSummary() const; diff --git a/mlir/include/mlir/TableGen/Dialect.h b/mlir/include/mlir/TableGen/Dialect.h index 2de0d9b0406eb..3030d6556b5bd 100644 --- a/mlir/include/mlir/TableGen/Dialect.h +++ b/mlir/include/mlir/TableGen/Dialect.h @@ -1,3 +1,4 @@ +//===- Dialect.h - Dialect class --------------------------------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/mlir/include/mlir/Target/LLVMIR/ModuleTranslation.h b/mlir/include/mlir/Target/LLVMIR/ModuleTranslation.h index c95488d14a49b..cdba5a8094931 100644 --- a/mlir/include/mlir/Target/LLVMIR/ModuleTranslation.h +++ b/mlir/include/mlir/Target/LLVMIR/ModuleTranslation.h @@ -359,14 +359,6 @@ llvm::Value *createIntrinsicCall(llvm::IRBuilderBase &builder, llvm::Intrinsic::ID intrinsic, ArrayRef args = {}, ArrayRef tys = {}); - -/// Creates a call to an LLVM IR intrinsic function with the given arguments -/// for NVVM WMMA ops. Handles cases where the intrinsic name is overloaded -/// using the types of arguments supplied. Selects the correct intrinsic -/// by inspecting the argument types. -llvm::Value *createNvvmIntrinsicCall(llvm::IRBuilderBase &builder, - llvm::Intrinsic::ID intrinsic, - ArrayRef args = {}); } // namespace detail } // namespace LLVM diff --git a/mlir/lib/Bindings/Python/IRAffine.cpp b/mlir/lib/Bindings/Python/IRAffine.cpp index 50a96c8c8cede..da80cda9c5823 100644 --- a/mlir/lib/Bindings/Python/IRAffine.cpp +++ b/mlir/lib/Bindings/Python/IRAffine.cpp @@ -205,6 +205,18 @@ class PyAffineAddExpr return PyAffineAddExpr(lhs.getContext(), expr); } + static PyAffineAddExpr getRHSConstant(PyAffineExpr lhs, intptr_t rhs) { + MlirAffineExpr expr = mlirAffineAddExprGet( + lhs, mlirAffineConstantExprGet(mlirAffineExprGetContext(lhs), rhs)); + return PyAffineAddExpr(lhs.getContext(), expr); + } + + static PyAffineAddExpr getLHSConstant(intptr_t lhs, PyAffineExpr rhs) { + MlirAffineExpr expr = mlirAffineAddExprGet( + mlirAffineConstantExprGet(mlirAffineExprGetContext(rhs), lhs), rhs); + return PyAffineAddExpr(rhs.getContext(), expr); + } + static void bindDerived(ClassTy &c) { c.def_static("get", &PyAffineAddExpr::get); } @@ -222,6 +234,18 @@ class PyAffineMulExpr return PyAffineMulExpr(lhs.getContext(), expr); } + static PyAffineMulExpr getRHSConstant(PyAffineExpr lhs, intptr_t rhs) { + MlirAffineExpr expr = mlirAffineMulExprGet( + lhs, mlirAffineConstantExprGet(mlirAffineExprGetContext(lhs), rhs)); + return PyAffineMulExpr(lhs.getContext(), expr); + } + + static PyAffineMulExpr getLHSConstant(intptr_t lhs, PyAffineExpr rhs) { + MlirAffineExpr expr = mlirAffineMulExprGet( + mlirAffineConstantExprGet(mlirAffineExprGetContext(rhs), lhs), rhs); + return PyAffineMulExpr(rhs.getContext(), expr); + } + static void bindDerived(ClassTy &c) { c.def_static("get", &PyAffineMulExpr::get); } @@ -239,6 +263,18 @@ class PyAffineModExpr return PyAffineModExpr(lhs.getContext(), expr); } + static PyAffineModExpr getRHSConstant(PyAffineExpr lhs, intptr_t rhs) { + MlirAffineExpr expr = mlirAffineModExprGet( + lhs, mlirAffineConstantExprGet(mlirAffineExprGetContext(lhs), rhs)); + return PyAffineModExpr(lhs.getContext(), expr); + } + + static PyAffineModExpr getLHSConstant(intptr_t lhs, PyAffineExpr rhs) { + MlirAffineExpr expr = mlirAffineModExprGet( + mlirAffineConstantExprGet(mlirAffineExprGetContext(rhs), lhs), rhs); + return PyAffineModExpr(rhs.getContext(), expr); + } + static void bindDerived(ClassTy &c) { c.def_static("get", &PyAffineModExpr::get); } @@ -256,6 +292,18 @@ class PyAffineFloorDivExpr return PyAffineFloorDivExpr(lhs.getContext(), expr); } + static PyAffineFloorDivExpr getRHSConstant(PyAffineExpr lhs, intptr_t rhs) { + MlirAffineExpr expr = mlirAffineFloorDivExprGet( + lhs, mlirAffineConstantExprGet(mlirAffineExprGetContext(lhs), rhs)); + return PyAffineFloorDivExpr(lhs.getContext(), expr); + } + + static PyAffineFloorDivExpr getLHSConstant(intptr_t lhs, PyAffineExpr rhs) { + MlirAffineExpr expr = mlirAffineFloorDivExprGet( + mlirAffineConstantExprGet(mlirAffineExprGetContext(rhs), lhs), rhs); + return PyAffineFloorDivExpr(rhs.getContext(), expr); + } + static void bindDerived(ClassTy &c) { c.def_static("get", &PyAffineFloorDivExpr::get); } @@ -273,6 +321,18 @@ class PyAffineCeilDivExpr return PyAffineCeilDivExpr(lhs.getContext(), expr); } + static PyAffineCeilDivExpr getRHSConstant(PyAffineExpr lhs, intptr_t rhs) { + MlirAffineExpr expr = mlirAffineCeilDivExprGet( + lhs, mlirAffineConstantExprGet(mlirAffineExprGetContext(lhs), rhs)); + return PyAffineCeilDivExpr(lhs.getContext(), expr); + } + + static PyAffineCeilDivExpr getLHSConstant(intptr_t lhs, PyAffineExpr rhs) { + MlirAffineExpr expr = mlirAffineCeilDivExprGet( + mlirAffineConstantExprGet(mlirAffineExprGetContext(rhs), lhs), rhs); + return PyAffineCeilDivExpr(rhs.getContext(), expr); + } + static void bindDerived(ClassTy &c) { c.def_static("get", &PyAffineCeilDivExpr::get); } @@ -435,17 +495,19 @@ void mlir::python::populateIRAffine(py::module &m) { .def_property_readonly(MLIR_PYTHON_CAPI_PTR_ATTR, &PyAffineExpr::getCapsule) .def(MLIR_PYTHON_CAPI_FACTORY_ATTR, &PyAffineExpr::createFromCapsule) - .def("__add__", - [](PyAffineExpr &self, PyAffineExpr &other) { - return PyAffineAddExpr::get(self, other); - }) - .def("__mul__", - [](PyAffineExpr &self, PyAffineExpr &other) { - return PyAffineMulExpr::get(self, other); - }) - .def("__mod__", - [](PyAffineExpr &self, PyAffineExpr &other) { - return PyAffineModExpr::get(self, other); + .def("__add__", &PyAffineAddExpr::get) + .def("__add__", &PyAffineAddExpr::getRHSConstant) + .def("__radd__", &PyAffineAddExpr::getRHSConstant) + .def("__mul__", &PyAffineMulExpr::get) + .def("__mul__", &PyAffineMulExpr::getRHSConstant) + .def("__rmul__", &PyAffineMulExpr::getRHSConstant) + .def("__mod__", &PyAffineModExpr::get) + .def("__mod__", &PyAffineModExpr::getRHSConstant) + .def("__rmod__", + [](PyAffineExpr &self, intptr_t other) { + return PyAffineModExpr::get( + PyAffineConstantExpr::get(other, *self.getContext().get()), + self); }) .def("__sub__", [](PyAffineExpr &self, PyAffineExpr &other) { @@ -454,6 +516,17 @@ void mlir::python::populateIRAffine(py::module &m) { return PyAffineAddExpr::get(self, PyAffineMulExpr::get(negOne, other)); }) + .def("__sub__", + [](PyAffineExpr &self, intptr_t other) { + return PyAffineAddExpr::get( + self, + PyAffineConstantExpr::get(-other, *self.getContext().get())); + }) + .def("__rsub__", + [](PyAffineExpr &self, intptr_t other) { + return PyAffineAddExpr::getLHSConstant( + other, PyAffineMulExpr::getLHSConstant(-1, self)); + }) .def("__eq__", [](PyAffineExpr &self, PyAffineExpr &other) { return self == other; }) .def("__eq__", @@ -474,24 +547,63 @@ void mlir::python::populateIRAffine(py::module &m) { printAccum.parts.append(")"); return printAccum.join(); }) + .def("__hash__", + [](PyAffineExpr &self) { + return static_cast(llvm::hash_value(self.get().ptr)); + }) .def_property_readonly( "context", [](PyAffineExpr &self) { return self.getContext().getObject(); }) + .def("compose", + [](PyAffineExpr &self, PyAffineMap &other) { + return PyAffineExpr(self.getContext(), + mlirAffineExprCompose(self, other)); + }) .def_static( "get_add", &PyAffineAddExpr::get, "Gets an affine expression containing a sum of two expressions.") + .def_static("get_add", &PyAffineAddExpr::getLHSConstant, + "Gets an affine expression containing a sum of a constant " + "and another expression.") + .def_static("get_add", &PyAffineAddExpr::getRHSConstant, + "Gets an affine expression containing a sum of an expression " + "and a constant.") .def_static( "get_mul", &PyAffineMulExpr::get, "Gets an affine expression containing a product of two expressions.") + .def_static("get_mul", &PyAffineMulExpr::getLHSConstant, + "Gets an affine expression containing a product of a " + "constant and another expression.") + .def_static("get_mul", &PyAffineMulExpr::getRHSConstant, + "Gets an affine expression containing a product of an " + "expression and a constant.") .def_static("get_mod", &PyAffineModExpr::get, "Gets an affine expression containing the modulo of dividing " "one expression by another.") + .def_static("get_mod", &PyAffineModExpr::getLHSConstant, + "Gets a semi-affine expression containing the modulo of " + "dividing a constant by an expression.") + .def_static("get_mod", &PyAffineModExpr::getRHSConstant, + "Gets an affine expression containing the module of dividing" + "an expression by a constant.") .def_static("get_floor_div", &PyAffineFloorDivExpr::get, "Gets an affine expression containing the rounded-down " "result of dividing one expression by another.") + .def_static("get_floor_div", &PyAffineFloorDivExpr::getLHSConstant, + "Gets a semi-affine expression containing the rounded-down " + "result of dividing a constant by an expression.") + .def_static("get_floor_div", &PyAffineFloorDivExpr::getRHSConstant, + "Gets an affine expression containing the rounded-down " + "result of dividing an expression by a constant.") .def_static("get_ceil_div", &PyAffineCeilDivExpr::get, "Gets an affine expression containing the rounded-up result " "of dividing one expression by another.") + .def_static("get_ceil_div", &PyAffineCeilDivExpr::getLHSConstant, + "Gets a semi-affine expression containing the rounded-up " + "result of dividing a constant by an expression.") + .def_static("get_ceil_div", &PyAffineCeilDivExpr::getRHSConstant, + "Gets an affine expression containing the rounded-up result " + "of dividing an expression by a constant.") .def_static("get_constant", &PyAffineConstantExpr::get, py::arg("value"), py::arg("context") = py::none(), "Gets a constant affine expression with the given value.") @@ -542,6 +654,10 @@ void mlir::python::populateIRAffine(py::module &m) { printAccum.parts.append(")"); return printAccum.join(); }) + .def("__hash__", + [](PyAffineMap &self) { + return static_cast(llvm::hash_value(self.get().ptr)); + }) .def_static("compress_unused_symbols", [](py::list affineMaps, DefaultingPyMlirContext context) { SmallVector maps; @@ -714,6 +830,10 @@ void mlir::python::populateIRAffine(py::module &m) { printAccum.parts.append(")"); return printAccum.join(); }) + .def("__hash__", + [](PyIntegerSet &self) { + return static_cast(llvm::hash_value(self.get().ptr)); + }) .def_property_readonly( "context", [](PyIntegerSet &self) { return self.getContext().getObject(); }) diff --git a/mlir/lib/Bindings/Python/IRCore.cpp b/mlir/lib/Bindings/Python/IRCore.cpp index d47d06a3aa75e..cf59a67f9c8f0 100644 --- a/mlir/lib/Bindings/Python/IRCore.cpp +++ b/mlir/lib/Bindings/Python/IRCore.cpp @@ -1153,7 +1153,7 @@ PyOpView::buildGeneric(py::object cls, py::list resultTypeList, throw py::value_error((llvm::Twine("Operation \"") + name + "\" requires " + llvm::Twine(resultSegmentSpec.size()) + - "result segments but was provided " + + " result segments but was provided " + llvm::Twine(resultTypeList.size())) .str()); } @@ -1164,7 +1164,7 @@ PyOpView::buildGeneric(py::object cls, py::list resultTypeList, if (segmentSpec == 1 || segmentSpec == 0) { // Unpack unary element. try { - auto resultType = py::cast(std::get<0>(it.value())); + auto *resultType = py::cast(std::get<0>(it.value())); if (resultType) { resultTypes.push_back(resultType); resultSegmentLengths.push_back(1); @@ -1530,6 +1530,57 @@ PyValue PyValue::createFromCapsule(pybind11::object capsule) { return PyValue(ownerRef, value); } +//------------------------------------------------------------------------------ +// PySymbolTable. +//------------------------------------------------------------------------------ + +PySymbolTable::PySymbolTable(PyOperationBase &operation) + : operation(operation.getOperation().getRef()) { + symbolTable = mlirSymbolTableCreate(operation.getOperation().get()); + if (mlirSymbolTableIsNull(symbolTable)) { + throw py::cast_error("Operation is not a Symbol Table."); + } +} + +py::object PySymbolTable::dunderGetItem(const std::string &name) { + operation->checkValid(); + MlirOperation symbol = mlirSymbolTableLookup( + symbolTable, mlirStringRefCreate(name.data(), name.length())); + if (mlirOperationIsNull(symbol)) + throw py::key_error("Symbol '" + name + "' not in the symbol table."); + + return PyOperation::forOperation(operation->getContext(), symbol, + operation.getObject()) + ->createOpView(); +} + +void PySymbolTable::erase(PyOperationBase &symbol) { + operation->checkValid(); + symbol.getOperation().checkValid(); + mlirSymbolTableErase(symbolTable, symbol.getOperation().get()); + // The operation is also erased, so we must invalidate it. There may be Python + // references to this operation so we don't want to delete it from the list of + // live operations here. + symbol.getOperation().valid = false; +} + +void PySymbolTable::dunderDel(const std::string &name) { + py::object operation = dunderGetItem(name); + erase(py::cast(operation)); +} + +PyAttribute PySymbolTable::insert(PyOperationBase &symbol) { + operation->checkValid(); + symbol.getOperation().checkValid(); + MlirAttribute symbolAttr = mlirOperationGetAttributeByName( + symbol.getOperation().get(), mlirSymbolTableGetSymbolAttributeName()); + if (mlirAttributeIsNull(symbolAttr)) + throw py::value_error("Expected operation to have a symbol name."); + return PyAttribute( + symbol.getOperation().getContext(), + mlirSymbolTableInsert(symbolTable, symbol.getOperation().get())); +} + namespace { /// CRTP base class for Python MLIR values that subclass Value and should be /// castable from it. The value hierarchy is one level deep and is not supposed @@ -2120,6 +2171,10 @@ void mlir::python::populateIRCore(py::module &m) { }) .def("__eq__", [](PyOperationBase &self, py::object other) { return false; }) + .def("__hash__", + [](PyOperationBase &self) { + return static_cast(llvm::hash_value(&self.getOperation())); + }) .def_property_readonly("attributes", [](PyOperationBase &self) { return PyOpAttributeMap( @@ -2507,7 +2562,10 @@ void mlir::python::populateIRCore(py::module &m) { .def("__eq__", [](PyAttribute &self, PyAttribute &other) { return self == other; }) .def("__eq__", [](PyAttribute &self, py::object &other) { return false; }) - .def("__hash__", [](PyAttribute &self) { return (size_t)self.get().ptr; }) + .def("__hash__", + [](PyAttribute &self) { + return static_cast(llvm::hash_value(self.get().ptr)); + }) .def( "dump", [](PyAttribute &self) { mlirAttributeDump(self); }, kDumpDocstring) @@ -2601,7 +2659,10 @@ void mlir::python::populateIRCore(py::module &m) { "Context that owns the Type") .def("__eq__", [](PyType &self, PyType &other) { return self == other; }) .def("__eq__", [](PyType &self, py::object &other) { return false; }) - .def("__hash__", [](PyType &self) { return (size_t)self.get().ptr; }) + .def("__hash__", + [](PyType &self) { + return static_cast(llvm::hash_value(self.get().ptr)); + }) .def( "dump", [](PyType &self) { mlirTypeDump(self); }, kDumpDocstring) .def( @@ -2652,6 +2713,10 @@ void mlir::python::populateIRCore(py::module &m) { return self.get().ptr == other.get().ptr; }) .def("__eq__", [](PyValue &self, py::object other) { return false; }) + .def("__hash__", + [](PyValue &self) { + return static_cast(llvm::hash_value(self.get().ptr)); + }) .def( "__str__", [](PyValue &self) { @@ -2670,6 +2735,20 @@ void mlir::python::populateIRCore(py::module &m) { PyBlockArgument::bind(m); PyOpResult::bind(m); + //---------------------------------------------------------------------------- + // Mapping of SymbolTable. + //---------------------------------------------------------------------------- + py::class_(m, "SymbolTable", py::module_local()) + .def(py::init()) + .def("__getitem__", &PySymbolTable::dunderGetItem) + .def("insert", &PySymbolTable::insert) + .def("erase", &PySymbolTable::erase) + .def("__delitem__", &PySymbolTable::dunderDel) + .def("__contains__", [](PySymbolTable &table, const std::string &name) { + return !mlirOperationIsNull(mlirSymbolTableLookup( + table, mlirStringRefCreate(name.data(), name.length()))); + }); + // Container bindings. PyBlockArgumentList::bind(m); PyBlockIterator::bind(m); diff --git a/mlir/lib/Bindings/Python/IRModule.h b/mlir/lib/Bindings/Python/IRModule.h index 73924fc74bdbf..eb5c2385a165d 100644 --- a/mlir/lib/Bindings/Python/IRModule.h +++ b/mlir/lib/Bindings/Python/IRModule.h @@ -32,6 +32,7 @@ class DefaultingPyMlirContext; class PyModule; class PyOperation; class PyType; +class PySymbolTable; class PyValue; /// Template for a reference to a concrete type which captures a python @@ -513,6 +514,7 @@ class PyOperation : public PyOperationBase, public BaseContextObject { bool valid = true; friend class PyOperationBase; + friend class PySymbolTable; }; /// A PyOpView is equivalent to the C++ "Op" wrappers: these are the basis for @@ -876,6 +878,38 @@ class PyIntegerSet : public BaseContextObject { MlirIntegerSet integerSet; }; +/// Bindings for MLIR symbol tables. +class PySymbolTable { +public: + /// Constructs a symbol table for the given operation. + explicit PySymbolTable(PyOperationBase &operation); + + /// Destroys the symbol table. + ~PySymbolTable() { mlirSymbolTableDestroy(symbolTable); } + + /// Returns the symbol (opview) with the given name, throws if there is no + /// such symbol in the table. + pybind11::object dunderGetItem(const std::string &name); + + /// Removes the given operation from the symbol table and erases it. + void erase(PyOperationBase &symbol); + + /// Removes the operation with the given name from the symbol table and erases + /// it, throws if there is no such symbol in the table. + void dunderDel(const std::string &name); + + /// Inserts the given operation into the symbol table. The operation must have + /// the symbol trait. + PyAttribute insert(PyOperationBase &symbol); + + /// Casts the bindings class into the C API structure. + operator MlirSymbolTable() { return symbolTable; } + +private: + PyOperationRef operation; + MlirSymbolTable symbolTable; +}; + void populateIRAffine(pybind11::module &m); void populateIRAttributes(pybind11::module &m); void populateIRCore(pybind11::module &m); diff --git a/mlir/lib/Bindings/Python/IRTypes.cpp b/mlir/lib/Bindings/Python/IRTypes.cpp index 1cfd799bf6934..89fdb1f06a91a 100644 --- a/mlir/lib/Bindings/Python/IRTypes.cpp +++ b/mlir/lib/Bindings/Python/IRTypes.cpp @@ -284,6 +284,19 @@ class PyShapedType : public PyConcreteType { }, "Returns whether the given value is used as a placeholder for dynamic " "strides and offsets in shaped types."); + c.def_property_readonly( + "shape", + [](PyShapedType &self) { + self.requireHasRank(); + + std::vector shape; + int64_t rank = mlirShapedTypeGetRank(self); + shape.reserve(rank); + for (int64_t i = 0; i < rank; ++i) + shape.push_back(mlirShapedTypeGetDimSize(self, i)); + return shape; + }, + "Returns the shape of the ranked shaped type as a list of integers."); } private: diff --git a/mlir/lib/CAPI/IR/AffineExpr.cpp b/mlir/lib/CAPI/IR/AffineExpr.cpp index 2d8bc3ce569af..5b25ab5337e2f 100644 --- a/mlir/lib/CAPI/IR/AffineExpr.cpp +++ b/mlir/lib/CAPI/IR/AffineExpr.cpp @@ -56,6 +56,11 @@ bool mlirAffineExprIsFunctionOfDim(MlirAffineExpr affineExpr, return unwrap(affineExpr).isFunctionOfDim(position); } +MlirAffineExpr mlirAffineExprCompose(MlirAffineExpr affineExpr, + MlirAffineMap affineMap) { + return wrap(unwrap(affineExpr).compose(unwrap(affineMap))); +} + //===----------------------------------------------------------------------===// // Affine Dimension Expression. //===----------------------------------------------------------------------===// diff --git a/mlir/lib/CAPI/IR/IR.cpp b/mlir/lib/CAPI/IR/IR.cpp index 6f617dc19269d..13490b342d9f7 100644 --- a/mlir/lib/CAPI/IR/IR.cpp +++ b/mlir/lib/CAPI/IR/IR.cpp @@ -763,3 +763,36 @@ bool mlirTypeIDEqual(MlirTypeID typeID1, MlirTypeID typeID2) { size_t mlirTypeIDHashValue(MlirTypeID typeID) { return hash_value(unwrap(typeID)); } + +//===----------------------------------------------------------------------===// +// Symbol and SymbolTable API. +//===----------------------------------------------------------------------===// + +MlirStringRef mlirSymbolTableGetSymbolAttributeName() { + return wrap(SymbolTable::getSymbolAttrName()); +} + +MlirSymbolTable mlirSymbolTableCreate(MlirOperation operation) { + if (!unwrap(operation)->hasTrait()) + return wrap(static_cast(nullptr)); + return wrap(new SymbolTable(unwrap(operation))); +} + +void mlirSymbolTableDestroy(MlirSymbolTable symbolTable) { + delete unwrap(symbolTable); +} + +MlirOperation mlirSymbolTableLookup(MlirSymbolTable symbolTable, + MlirStringRef name) { + return wrap(unwrap(symbolTable)->lookup(StringRef(name.data, name.length))); +} + +MlirAttribute mlirSymbolTableInsert(MlirSymbolTable symbolTable, + MlirOperation operation) { + return wrap(unwrap(symbolTable)->insert(unwrap(operation))); +} + +void mlirSymbolTableErase(MlirSymbolTable symbolTable, + MlirOperation operation) { + unwrap(symbolTable)->erase(unwrap(operation)); +} diff --git a/mlir/lib/CAPI/IR/Support.cpp b/mlir/lib/CAPI/IR/Support.cpp index e4b409906297d..b6e1f9180c771 100644 --- a/mlir/lib/CAPI/IR/Support.cpp +++ b/mlir/lib/CAPI/IR/Support.cpp @@ -7,9 +7,15 @@ //===----------------------------------------------------------------------===// #include "mlir-c/Support.h" +#include "llvm/ADT/StringRef.h" #include MlirStringRef mlirStringRefCreateFromCString(const char *str) { return mlirStringRefCreate(str, strlen(str)); } + +bool mlirStringRefEqual(MlirStringRef string, MlirStringRef other) { + return llvm::StringRef(string.data, string.length) == + llvm::StringRef(other.data, other.length); +} diff --git a/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp b/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp index 3ac7ee4e2d204..d0589f1fc35bb 100644 --- a/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp +++ b/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp @@ -134,34 +134,8 @@ struct LowerGpuOpsToNVVMOpsPass // Lowering for MMAMatrixType. converter.addConversion([&](gpu::MMAMatrixType type) -> Type { - // The number of items in structToReturn are dependent on the the dataType - // and the MMA operand that this operation is associated with. - llvm::DenseMap numElemsPerThreadF16, - numElemsPerThreadF32; - numElemsPerThreadF16["AOp"] = 8; - numElemsPerThreadF16["BOp"] = 8; - numElemsPerThreadF16["COp"] = 4; - numElemsPerThreadF32["AOp"] = 8; - numElemsPerThreadF32["BOp"] = 8; - numElemsPerThreadF32["COp"] = 8; - Type structToReturn; - if (type.getElementType().isF16()) { - // Number of f16's in 32-bit. - unsigned vecSize = 2; - Type vec = VectorType::get(vecSize, FloatType::getF16(&getContext())); - unsigned size = numElemsPerThreadF16[type.getOperand()]; - SmallVector elements(size, vec); - structToReturn = - LLVM::LLVMStructType::getLiteral(&getContext(), elements); - } else if (type.getElementType().isF32()) { - unsigned size = numElemsPerThreadF32[type.getOperand()]; - SmallVector elements(size, FloatType::getF32(&getContext())); - structToReturn = - LLVM::LLVMStructType::getLiteral(&getContext(), elements); - } - return structToReturn; + return convertMMAToLLVMType(type); }); - RewritePatternSet patterns(m.getContext()); RewritePatternSet llvmPatterns(m.getContext()); diff --git a/mlir/lib/Conversion/GPUToNVVM/WmmaOpsToNvvm.cpp b/mlir/lib/Conversion/GPUToNVVM/WmmaOpsToNvvm.cpp index 681725441539f..b0bf94b7f8066 100644 --- a/mlir/lib/Conversion/GPUToNVVM/WmmaOpsToNvvm.cpp +++ b/mlir/lib/Conversion/GPUToNVVM/WmmaOpsToNvvm.cpp @@ -11,10 +11,12 @@ // //===----------------------------------------------------------------------===// +#include "mlir/Conversion/GPUToNVVM/GPUToNVVMPass.h" #include "mlir/Conversion/LLVMCommon/Pattern.h" #include "mlir/Dialect/GPU/GPUDialect.h" #include "mlir/Dialect/LLVMIR/LLVMDialect.h" #include "mlir/Dialect/LLVMIR/NVVMDialect.h" +#include "mlir/IR/TypeUtilities.h" using namespace mlir; @@ -36,26 +38,26 @@ static LogicalResult areAllLLVMTypes(Operation *op, ValueRange operands, return success(); } -/// Error string to emit when unimplemented WMMA variant is encountered. -static constexpr StringRef kInvalidCaseStr = - "Unimplemented WMMA variant, Only M16N16K16 version implemented."; +/// Error string to emit when an unimplemented WMMA variant is encountered. +static constexpr StringRef kInvalidCaseStr = "Unsupported WMMA variant."; + +static NVVM::MMAFrag convertOperand(StringRef operandName) { + if (operandName.equals("AOp")) + return NVVM::MMAFrag::a; + if (operandName.equals("BOp")) + return NVVM::MMAFrag::b; + if (operandName.equals("COp")) + return NVVM::MMAFrag::c; + llvm_unreachable("Unknown operand name"); +} -/// Return the LLVMStructureType corresponding to the MMAMatrixType `type`. -static LLVM::LLVMStructType convertMMAToLLVMType(gpu::MMAMatrixType type) { - StringRef operandStr = type.getOperand(); - assert(type.getElementType().isa()); - Type baseType = type.getElementType().isF16() - ? VectorType::get(2, type.getElementType()) - : type.getElementType(); - auto getLLVMType = [&](int64_t numElements) { - return LLVM::LLVMStructType::getLiteral( - type.getContext(), SmallVector(numElements, baseType)); - }; - if (operandStr.equals("AOp") || operandStr.equals("BOp")) - return getLLVMType(8); +static NVVM::MMATypes getElementType(gpu::MMAMatrixType type) { if (type.getElementType().isF16()) - return getLLVMType(4); - return getLLVMType(8); + return NVVM::MMATypes::f16; + if (type.getElementType().isF32()) + return type.getOperand().equals("COp") ? NVVM::MMATypes::f32 + : NVVM::MMATypes::tf32; + llvm_unreachable("Unsupported type"); } /// This class implements the conversion of GPU MMA loadOp to wmma.load op @@ -118,41 +120,41 @@ struct WmmaLoadOpToNVVMLowering gpu::MMAMatrixType retType = subgroupMmaLoadMatrixOp.res().getType().cast(); ArrayRef retTypeShape = retType.getShape(); + int64_t m = 0; + int64_t n = 0; + int64_t k = 0; + NVVM::MMATypes eltype = getElementType(retType); + // NVVM intrinsics require to give mxnxk dimensions, infer the missing + // dimension based on the valid intrinsics available. + if (retType.getOperand().equals("AOp")) { + m = retTypeShape[0]; + k = retTypeShape[1]; + n = NVVM::WMMALoadOp::inferNDimension(m, k, eltype); + } else if (retType.getOperand().equals("BOp")) { + k = retTypeShape[0]; + n = retTypeShape[1]; + m = NVVM::WMMALoadOp::inferMDimension(k, n, eltype); + } else if (retType.getOperand().equals("COp")) { + m = retTypeShape[0]; + n = retTypeShape[1]; + k = NVVM::WMMALoadOp::inferKDimension(m, n, eltype); + } + NVVM::MMALayout layout = NVVM::MMALayout::row; + NVVM::MMAFrag frag = convertOperand(retType.getOperand()); + // Check that there is an exisiting instruction for the combination we need. + if (NVVM::WMMALoadOp::getIntrinsicID(m, n, k, layout, eltype, frag) == 0) + return rewriter.notifyMatchFailure(op, kInvalidCaseStr); Type resType = convertMMAToLLVMType(retType); - StringRef operandStr = retType.getOperand(); // Create nvvm.mma_load op according to the operand types. Value leadingDim32 = rewriter.create( loc, rewriter.getI32Type(), leadDimension); - SmallVector loadOpOperands({loadAddressCasted, leadingDim32}); - if (operandStr.equals("AOp")) { - if (retTypeShape[0] == 16 && retTypeShape[1] == 16) { - rewriter.replaceOpWithNewOp(op, resType, - loadOpOperands); - } else { - return rewriter.notifyMatchFailure(op, kInvalidCaseStr); - } - } else if (operandStr.equals("BOp")) { - if (retTypeShape[0] == 16 && retTypeShape[1] == 16) { - rewriter.replaceOpWithNewOp(op, resType, - loadOpOperands); - } else { - return rewriter.notifyMatchFailure(op, kInvalidCaseStr); - } - } else { - if (retTypeShape[0] == 16 && retTypeShape[1] == 16) { - if (retType.getElementType().isF16()) { - rewriter.replaceOpWithNewOp( - op, resType, loadOpOperands); - } else if (retType.getElementType().isF32()) { - rewriter.replaceOpWithNewOp( - op, resType, loadOpOperands); - } - } else { - return rewriter.notifyMatchFailure(op, kInvalidCaseStr); - } - } + + rewriter.replaceOpWithNewOp( + op, resType, loadAddressCasted, leadingDim32, m, n, k, layout, eltype, + frag); + return success(); } }; @@ -212,13 +214,18 @@ struct WmmaStoreOpToNVVMLowering storeAddress); SmallVector storeOpOperands; - storeOpOperands.push_back(storeAddressCasted); - // Get the shape of the MMAMatrix type being stored. The shape will // choose which intrinsic this op will be lowered to. gpu::MMAMatrixType srcType = subgroupMmaStoreMatrixOp.src().getType().cast(); ArrayRef srcTypeShape = srcType.getShape(); + NVVM::MMALayout layout = NVVM::MMALayout::row; + NVVM::MMATypes eltype = getElementType(srcType); + int64_t m = srcTypeShape[0]; + int64_t n = srcTypeShape[1]; + int64_t k = NVVM::WMMAStoreOp::inferKDimension(m, n, eltype); + if (NVVM::WMMAStoreOp::getIntrinsicID(m, n, k, layout, eltype) == 0) + return rewriter.notifyMatchFailure(op, kInvalidCaseStr); auto matrixType = adaptor.src().getType().cast(); for (unsigned i = 0, e = matrixType.getBody().size(); i < e; ++i) { @@ -229,29 +236,11 @@ struct WmmaStoreOpToNVVMLowering } Value leadingDim32 = rewriter.create( loc, rewriter.getI32Type(), leadDimension); - storeOpOperands.push_back(leadingDim32); - // Unpack the results from the source. - if (srcType.getElementType().isF16()) { - // Create nvvm.mma_store op. - if (srcTypeShape[0] == 16 && srcTypeShape[1] == 16) { - rewriter.create(loc, storeOpOperands); - } else { - return rewriter.notifyMatchFailure(op, kInvalidCaseStr); - } - rewriter.eraseOp(op); - return success(); - } - if (srcType.getElementType().isF32()) { - // Create nvvm.mma_store op. - if (srcTypeShape[0] == 16 && srcTypeShape[1] == 16) - rewriter.create(loc, storeOpOperands); - else { - return rewriter.notifyMatchFailure(op, kInvalidCaseStr); - } - rewriter.eraseOp(op); - return success(); - } - return failure(); + rewriter.create(loc, storeAddressCasted, m, n, k, layout, + eltype, storeOpOperands, leadingDim32); + + rewriter.eraseOp(op); + return success(); } }; @@ -292,40 +281,27 @@ struct WmmaMmaOpToNVVMLowering gpu::MMAMatrixType aType = subgroupMmaComputeOp.opA().getType().cast(); ArrayRef aTypeShape = aType.getShape(); - gpu::MMAMatrixType bType = - subgroupMmaComputeOp.opB().getType().cast(); - ArrayRef bTypeShape = bType.getShape(); gpu::MMAMatrixType cType = subgroupMmaComputeOp.opC().getType().cast(); ArrayRef cTypeShape = cType.getShape(); + int64_t m = cTypeShape[0]; + int64_t n = cTypeShape[1]; + int64_t k = aTypeShape[1]; + NVVM::MMALayout layout = NVVM::MMALayout::row; + NVVM::MMATypes sourceType = getElementType(aType); + NVVM::MMATypes destType = getElementType(cType); + if (NVVM::WMMAMmaOp::getIntrinsicID(m, n, k, layout, layout, sourceType, + destType) == 0) + return rewriter.notifyMatchFailure(op, kInvalidCaseStr); unpackOp(adaptor.opA()); unpackOp(adaptor.opB()); unpackOp(adaptor.opC()); - if (cType.getElementType().isF16()) { - if (aTypeShape[0] == 16 && aTypeShape[1] == 16 && bTypeShape[0] == 16 && - bTypeShape[1] == 16 && cTypeShape[0] == 16 && cTypeShape[1] == 16) { - // Create nvvm.wmma.mma op. - rewriter.replaceOpWithNewOp( - op, adaptor.opC().getType(), unpackedOps); - - return success(); - } - return rewriter.notifyMatchFailure(op, kInvalidCaseStr); - } - if (cType.getElementType().isF32()) { - if (aTypeShape[0] == 16 && aTypeShape[1] == 16 && bTypeShape[0] == 16 && - bTypeShape[1] == 16 && cTypeShape[0] == 16 && cTypeShape[1] == 16) { - // Create nvvm.wmma.mma op. - rewriter.replaceOpWithNewOp( - op, adaptor.opC().getType(), unpackedOps); - - return success(); - } - return rewriter.notifyMatchFailure(op, kInvalidCaseStr); - } - return failure(); + rewriter.replaceOpWithNewOp( + op, adaptor.opC().getType(), m, n, k, layout, layout, sourceType, + destType, unpackedOps); + return success(); } }; @@ -368,13 +344,101 @@ struct WmmaConstantOpToNVVMLowering } }; +static Value createMinMaxF(OpBuilder &builder, Location loc, Value lhs, + Value rhs, bool isMin) { + auto floatType = getElementTypeOrSelf(lhs.getType()).cast(); + Type i1Type = builder.getI1Type(); + if (auto vecType = lhs.getType().dyn_cast()) + i1Type = VectorType::get(vecType.getShape(), i1Type); + Value cmp = builder.create( + loc, i1Type, isMin ? LLVM::FCmpPredicate::olt : LLVM::FCmpPredicate::ogt, + lhs, rhs); + Value sel = builder.create(loc, cmp, lhs, rhs); + Value isNan = builder.create( + loc, i1Type, LLVM::FCmpPredicate::uno, lhs, rhs); + Value nan = builder.create( + loc, lhs.getType(), + builder.getFloatAttr(floatType, + APFloat::getQNaN(floatType.getFloatSemantics()))); + return builder.create(loc, isNan, sel, nan); +} + +static Value createScalarOp(OpBuilder &builder, Location loc, + gpu::MMAElementwiseOp op, + ArrayRef operands) { + switch (op) { + case gpu::MMAElementwiseOp::ADDF: + return builder.create(loc, operands[0].getType(), operands); + case gpu::MMAElementwiseOp::MULF: + return builder.create(loc, operands[0].getType(), operands); + case gpu::MMAElementwiseOp::MAXF: + return createMinMaxF(builder, loc, operands[0], operands[1], + /*isMin=*/false); + case gpu::MMAElementwiseOp::MINF: + return createMinMaxF(builder, loc, operands[0], operands[1], + /*isMin=*/true); + } + llvm_unreachable("unknown op"); +} + +/// Convert GPU MMA elementwise ops to extract + op + insert. +struct WmmaElementwiseOpToNVVMLowering + : public ConvertOpToLLVMPattern { + using ConvertOpToLLVMPattern< + gpu::SubgroupMmaElementwiseOp>::ConvertOpToLLVMPattern; + + LogicalResult + matchAndRewrite(gpu::SubgroupMmaElementwiseOp subgroupMmaElementwiseOp, + OpAdaptor adaptor, + ConversionPatternRewriter &rewriter) const override { + if (failed(areAllLLVMTypes(subgroupMmaElementwiseOp.getOperation(), + adaptor.getOperands(), rewriter))) + return failure(); + Location loc = subgroupMmaElementwiseOp.getLoc(); + size_t numOperands = adaptor.getOperands().size(); + LLVM::LLVMStructType destType = convertMMAToLLVMType( + subgroupMmaElementwiseOp.getType().cast()); + Value matrixStruct = rewriter.create(loc, destType); + for (size_t i = 0, e = destType.getBody().size(); i < e; ++i) { + SmallVector extractedOperands; + for (size_t opIdx = 0; opIdx < numOperands; opIdx++) { + Type elementType = adaptor.getOperands()[opIdx] + .getType() + .cast() + .getBody()[i]; + extractedOperands.push_back(rewriter.create( + loc, elementType, adaptor.getOperands()[opIdx], + rewriter.getI32ArrayAttr(i))); + } + Value element = + createScalarOp(rewriter, loc, subgroupMmaElementwiseOp.operation(), + extractedOperands); + matrixStruct = rewriter.create( + loc, matrixStruct, element, rewriter.getI32ArrayAttr(i)); + } + rewriter.replaceOp(subgroupMmaElementwiseOp, matrixStruct); + return success(); + } +}; + } // anonymous namespace namespace mlir { + +/// Return the LLVMStructureType corresponding to the MMAMatrixType `type`. +LLVM::LLVMStructType convertMMAToLLVMType(gpu::MMAMatrixType type) { + NVVM::MMAFrag frag = convertOperand(type.getOperand()); + NVVM::MMATypes eltType = getElementType(type); + std::pair typeInfo = + inferMMAType(eltType, frag, type.getContext()); + return LLVM::LLVMStructType::getLiteral( + type.getContext(), SmallVector(typeInfo.second, typeInfo.first)); +} + void populateGpuWMMAToNVVMConversionPatterns(LLVMTypeConverter &converter, RewritePatternSet &patterns) { patterns.insert( - converter); + WmmaStoreOpToNVVMLowering, WmmaConstantOpToNVVMLowering, + WmmaElementwiseOpToNVVMLowering>(converter); } } // namespace mlir diff --git a/mlir/lib/Conversion/SPIRVToLLVM/SPIRVToLLVM.cpp b/mlir/lib/Conversion/SPIRVToLLVM/SPIRVToLLVM.cpp index 51df6c763c3fe..b416c303ad51a 100644 --- a/mlir/lib/Conversion/SPIRVToLLVM/SPIRVToLLVM.cpp +++ b/mlir/lib/Conversion/SPIRVToLLVM/SPIRVToLLVM.cpp @@ -1364,6 +1364,65 @@ class ModuleConversionPattern : public SPIRVToLLVMConversion { } }; +//===----------------------------------------------------------------------===// +// VectorShuffleOp conversion +//===----------------------------------------------------------------------===// + +class VectorShufflePattern + : public SPIRVToLLVMConversion { +public: + using SPIRVToLLVMConversion::SPIRVToLLVMConversion; + LogicalResult + matchAndRewrite(spirv::VectorShuffleOp op, OpAdaptor adaptor, + ConversionPatternRewriter &rewriter) const override { + Location loc = op.getLoc(); + auto components = adaptor.components(); + auto vector1 = adaptor.vector1(); + auto vector2 = adaptor.vector2(); + int vector1Size = vector1.getType().cast().getNumElements(); + int vector2Size = vector2.getType().cast().getNumElements(); + if (vector1Size == vector2Size) { + rewriter.replaceOpWithNewOp(op, vector1, vector2, + components); + return success(); + } + + auto dstType = typeConverter.convertType(op.getType()); + auto scalarType = dstType.cast().getElementType(); + auto componentsArray = components.getValue(); + auto context = rewriter.getContext(); + auto llvmI32Type = IntegerType::get(context, 32); + Value targetOp = rewriter.create(loc, dstType); + for (unsigned i = 0; i < componentsArray.size(); i++) { + if (componentsArray[i].isa()) + op.emitError("unable to support non-constant component"); + + int indexVal = componentsArray[i].cast().getInt(); + if (indexVal == -1) + continue; + + int offsetVal = 0; + Value baseVector = vector1; + if (indexVal >= vector1Size) { + offsetVal = vector1Size; + baseVector = vector2; + } + + Value dstIndex = rewriter.create( + loc, llvmI32Type, rewriter.getIntegerAttr(rewriter.getI32Type(), i)); + Value index = rewriter.create( + loc, llvmI32Type, + rewriter.getIntegerAttr(rewriter.getI32Type(), indexVal - offsetVal)); + + auto extractOp = rewriter.create( + loc, scalarType, baseVector, index); + targetOp = rewriter.create(loc, dstType, targetOp, + extractOp, dstIndex); + } + rewriter.replaceOp(op, targetOp); + return success(); + } +}; } // namespace //===----------------------------------------------------------------------===// @@ -1489,6 +1548,7 @@ void mlir::populateSPIRVToLLVMConversionPatterns( CompositeExtractPattern, CompositeInsertPattern, DirectConversionPattern, DirectConversionPattern, + VectorShufflePattern, // Shift ops ShiftPattern, diff --git a/mlir/lib/Conversion/VectorToGPU/VectorToGPU.cpp b/mlir/lib/Conversion/VectorToGPU/VectorToGPU.cpp index 8e037ecf5c852..b97a04638a653 100644 --- a/mlir/lib/Conversion/VectorToGPU/VectorToGPU.cpp +++ b/mlir/lib/Conversion/VectorToGPU/VectorToGPU.cpp @@ -130,6 +130,26 @@ static bool broadcastSupportsMMAMatrixType(vector::BroadcastOp broadcastOp) { broadcastOp.source().getType().isa(); } +/// Return the MMA elementwise enum associated with `op` if it is supported. +/// Return `llvm::None` otherwise. +static llvm::Optional +convertElementwiseOpToMMA(Operation *op) { + if (isa(op)) + return gpu::MMAElementwiseOp::ADDF; + if (isa(op)) + return gpu::MMAElementwiseOp::MULF; + if (isa(op)) + return gpu::MMAElementwiseOp::MAXF; + if (isa(op)) + return gpu::MMAElementwiseOp::MINF; + return llvm::None; +} + +/// Return true if the op is supported as elementwise op on MMAMatrix type. +static bool elementwiseSupportsMMAMatrixType(Operation *op) { + return convertElementwiseOpToMMA(op).hasValue(); +} + static bool supportsMMaMatrixType(Operation *op) { if (isa(op)) return true; @@ -143,7 +163,7 @@ static bool supportsMMaMatrixType(Operation *op) { return constantSupportsMMAMatrixType(constant); if (auto broadcast = dyn_cast(op)) return broadcastSupportsMMAMatrixType(broadcast); - return false; + return elementwiseSupportsMMAMatrixType(op); } // Analyze slice of operations based on convert op to figure out if the whole @@ -423,6 +443,18 @@ static void convertYieldOp(scf::YieldOp op, op.erase(); } +/// Convert an elementwise op to the equivalent elementwise op on MMA matrix. +static void convertElementwiseOp(Operation *op, gpu::MMAElementwiseOp opType, + llvm::DenseMap &valueMapping) { + OpBuilder b(op); + SmallVector matrixOperands; + for (Value operand : op->getOperands()) + matrixOperands.push_back(valueMapping.find(operand)->second); + Value newOp = b.create( + op->getLoc(), matrixOperands[0].getType(), matrixOperands, opType); + valueMapping[op->getResult(0)] = newOp; +} + namespace mlir { void populatePrepareVectorToMMAPatterns(RewritePatternSet &patterns) { @@ -448,6 +480,8 @@ void convertVectorToMMAOps(FuncOp funcOp) { convertForOp(forOp, valueMapping); } else if (auto yiledOp = dyn_cast(op)) { convertYieldOp(yiledOp, valueMapping); + } else if (auto elementwiseType = convertElementwiseOpToMMA(op)) { + convertElementwiseOp(op, *elementwiseType, valueMapping); } } } diff --git a/mlir/lib/Dialect/GPU/CMakeLists.txt b/mlir/lib/Dialect/GPU/CMakeLists.txt index 2beb7ea7bc882..14520ce6767d8 100644 --- a/mlir/lib/Dialect/GPU/CMakeLists.txt +++ b/mlir/lib/Dialect/GPU/CMakeLists.txt @@ -24,6 +24,7 @@ add_mlir_dialect_library(MLIRGPUOps DEPENDS MLIRGPUOpsIncGen + MLIRGPUOpsEnumsGen MLIRGPUOpInterfacesIncGen LINK_LIBS PUBLIC diff --git a/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp b/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp index ba1710b57a919..9baff7f53ca8f 100644 --- a/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp +++ b/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp @@ -1185,6 +1185,7 @@ void AllocOp::getCanonicalizationPatterns(RewritePatternSet &results, } #include "mlir/Dialect/GPU/GPUOpInterfaces.cpp.inc" +#include "mlir/Dialect/GPU/GPUOpsEnums.cpp.inc" #define GET_OP_CLASSES #include "mlir/Dialect/GPU/GPUOps.cpp.inc" diff --git a/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp b/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp index 6cf166f41cd18..ff82f9e1eaa66 100644 --- a/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp +++ b/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp @@ -31,6 +31,7 @@ using namespace mlir; using namespace NVVM; #include "mlir/Dialect/LLVMIR/NVVMOpsDialect.cpp.inc" +#include "mlir/Dialect/LLVMIR/NVVMOpsEnums.cpp.inc" //===----------------------------------------------------------------------===// // Printing/parsing for NVVM ops @@ -132,201 +133,100 @@ static LogicalResult verify(MmaOp op) { return op.emitOpError("unimplemented mma.sync variant"); } -template -static LogicalResult verifyWMMALoadOp(T op, StringRef operand) { - MLIRContext *context = op.getContext(); - auto i32Ty = IntegerType::get(context, 32); - auto i32Ptr1Ty = LLVM::LLVMPointerType::get(i32Ty, 1); - auto i32Ptr3Ty = LLVM::LLVMPointerType::get(i32Ty, 3); - auto i32Ptr0Ty = LLVM::LLVMPointerType::get(i32Ty, 0); - auto f16Ty = FloatType::getF16(context); - auto f32Ty = FloatType::getF32(context); - auto f16x2Ty = VectorType::get(2, f16Ty); - auto f16x2x4StructTy = LLVM::LLVMStructType::getLiteral( - context, {f16x2Ty, f16x2Ty, f16x2Ty, f16x2Ty}); - auto f16x2x8StructTy = LLVM::LLVMStructType::getLiteral( - context, - {f16x2Ty, f16x2Ty, f16x2Ty, f16x2Ty, f16x2Ty, f16x2Ty, f16x2Ty, f16x2Ty}); - auto f32x8StructTy = LLVM::LLVMStructType::getLiteral( - context, {f32Ty, f32Ty, f32Ty, f32Ty, f32Ty, f32Ty, f32Ty, f32Ty}); - - SmallVector operandTypes(op.getOperandTypes().begin(), - op.getOperandTypes().end()); - if (operandTypes != SmallVector{i32Ptr1Ty, i32Ty} && - operandTypes != SmallVector{i32Ptr3Ty, i32Ty} && - operandTypes != SmallVector{i32Ptr0Ty, i32Ty}) { - return op.emitOpError("expected operands to be a source pointer in memory " - "space 0, 1, 3 followed by ldm of the source"); +std::pair +inferMMAType(NVVM::MMATypes type, NVVM::MMAFrag frag, MLIRContext *context) { + unsigned numberElements = 0; + Type elementType; + OpBuilder builder(context); + Type f16x2 = VectorType::get(2, builder.getF16Type()); + if (type == NVVM::MMATypes::f16) { + elementType = f16x2; + if (frag == NVVM::MMAFrag::a || frag == NVVM::MMAFrag::b) + numberElements = 8; + else + numberElements = 4; + } else if (type == NVVM::MMATypes::f32) { + elementType = builder.getF32Type(); + numberElements = 8; + } else if (type == NVVM::MMATypes::tf32) { + elementType = builder.getI32Type(); + numberElements = 4; } - - if (operand.equals("AOp") || operand.equals("BOp")) { - if (op.getType() != f16x2x8StructTy) { - return op.emitOpError("expected result type of loadAOp and loadBOp to be " - "a struct of 8 s"); - } - } else if (operand.equals("COp")) { - if (op.getType() != f16x2x4StructTy && op.getType() != f32x8StructTy) { - return op.emitOpError("expected result type of loadCOp to be a struct of " - "4 s or 8 f32s"); - } - } - - return success(); -} - -static LogicalResult verify(WMMALoadAM16N16K16Op op) { - return verifyWMMALoadOp(op, "AOp"); -} - -static LogicalResult verify(WMMALoadBM16N16K16Op op) { - return verifyWMMALoadOp(op, "BOp"); -} - -static LogicalResult verify(WMMALoadCF16M16N16K16Op op) { - return verifyWMMALoadOp(op, "COp"); -} - -static LogicalResult verify(WMMALoadCF32M16N16K16Op op) { - return verifyWMMALoadOp(op, "COp"); -} - -template -static bool verifyWMMAStoreOp(T op, SmallVector &containedElems) { - SmallVector operandTypes(op.getOperandTypes().begin(), - op.getOperandTypes().end()); - if (operandTypes == containedElems) - return true; - - return false; -} - -static LogicalResult verify(WMMAStoreF16M16N16K16Op op) { - MLIRContext *context = op.getContext(); - auto i32Ty = IntegerType::get(context, 32); - auto i32Ptr1Ty = LLVM::LLVMPointerType::get(i32Ty, 1); - auto i32Ptr3Ty = LLVM::LLVMPointerType::get(i32Ty, 3); - auto i32Ptr0Ty = LLVM::LLVMPointerType::get(i32Ty, 0); - auto f16Ty = FloatType::getF16(context); - auto f16x2Ty = VectorType::get(2, f16Ty); - SmallVector type1{i32Ptr1Ty, f16x2Ty, f16x2Ty, f16x2Ty, f16x2Ty, i32Ty}; - SmallVector type0{i32Ptr0Ty, f16x2Ty, f16x2Ty, f16x2Ty, f16x2Ty, i32Ty}; - SmallVector type3{i32Ptr3Ty, f16x2Ty, f16x2Ty, f16x2Ty, f16x2Ty, i32Ty}; - if (verifyWMMAStoreOp(op, type1) || verifyWMMAStoreOp(op, type0) || - verifyWMMAStoreOp(op, type3)) - return success(); - - return op.emitOpError("expected operands to be a source pointer in memory" - "space 0, 1, 3 followed by ldm of the source"); -} - -static LogicalResult verify(WMMAStoreF32M16N16K16Op op) { - MLIRContext *context = op.getContext(); - auto i32Ty = IntegerType::get(context, 32); - auto i32Ptr1Ty = LLVM::LLVMPointerType::get(i32Ty, 1); - auto i32Ptr3Ty = LLVM::LLVMPointerType::get(i32Ty, 3); - auto i32Ptr0Ty = LLVM::LLVMPointerType::get(i32Ty, 0); - auto f32Ty = FloatType::getF32(context); - - SmallVector type1{i32Ptr1Ty, f32Ty, f32Ty, f32Ty, f32Ty, - f32Ty, f32Ty, f32Ty, f32Ty, i32Ty}; - SmallVector type0{i32Ptr0Ty, f32Ty, f32Ty, f32Ty, f32Ty, - f32Ty, f32Ty, f32Ty, f32Ty, i32Ty}; - SmallVector type3{i32Ptr3Ty, f32Ty, f32Ty, f32Ty, f32Ty, - f32Ty, f32Ty, f32Ty, f32Ty, i32Ty}; - if (verifyWMMAStoreOp(op, type0) || verifyWMMAStoreOp(op, type1) || - verifyWMMAStoreOp(op, type3)) - return success(); - - return op.emitOpError("expected operands to be a source pointer in memory" - "space 0, 1, 3 followed by ldm of the source"); + assert(numberElements != 0 && elementType != nullptr); + return std::make_pair(elementType, numberElements); } -static LogicalResult verify(WMMAMmaF16F16M16N16K16Op op) { - MLIRContext *context = op.getContext(); - auto f16Ty = FloatType::getF16(context); - auto f16x2Ty = VectorType::get(2, f16Ty); - auto f16x2x4StructTy = LLVM::LLVMStructType::getLiteral( - context, {f16x2Ty, f16x2Ty, f16x2Ty, f16x2Ty}); - - SmallVector operandTypes(op.getOperandTypes().begin(), - op.getOperandTypes().end()); - if (operandTypes != SmallVector(20, f16x2Ty)) - return op.emitOpError("expected 20 s as operands"); - - if (op.getResult().getType() != f16x2x4StructTy) - return op.emitOpError("expected result type to be a struct of 4 s"); - +static LogicalResult verify(NVVM::WMMALoadOp op) { + unsigned addressSpace = + op.ptr().getType().cast().getAddressSpace(); + if (addressSpace != 0 && addressSpace != 1 && addressSpace != 3) + return op.emitOpError("expected source pointer in memory " + "space 0, 1, 3"); + + if (NVVM::WMMALoadOp::getIntrinsicID(op.m(), op.n(), op.k(), op.layout(), + op.eltype(), op.frag()) == 0) + return op.emitOpError() << "invalid attribute combination"; + std::pair typeInfo = + inferMMAType(op.eltype(), op.frag(), op.getContext()); + Type dstType = LLVM::LLVMStructType::getLiteral( + op.getContext(), SmallVector(typeInfo.second, typeInfo.first)); + if (op.getType() != dstType) + return op.emitOpError("expected destination type is a structure of ") + << typeInfo.second << " elements of type " << typeInfo.first; return success(); } -static LogicalResult parseWMMAMmaF16F16M16N16K16Op(OpAsmParser &parser, - OperationState &result) { - SmallVector operands; - ::llvm::SMLoc operandsLoc; - Type operandType; - Type resType; - - operandsLoc = parser.getCurrentLocation(); - if (parser.parseOperandList(operands) || - parser.parseOptionalAttrDict(result.attributes) || parser.parseColon() || - parser.parseType(operandType) || parser.parseArrow()) - return failure(); - - unsigned numOperands = operands.size(); - SmallVector operandTypes(numOperands, operandType); - if (parser.parseType(resType)) - return failure(); - result.addTypes(resType); - if (parser.resolveOperands(operands, operandTypes, operandsLoc, - result.operands)) - return failure(); +static LogicalResult verify(NVVM::WMMAStoreOp op) { + unsigned addressSpace = + op.ptr().getType().cast().getAddressSpace(); + if (addressSpace != 0 && addressSpace != 1 && addressSpace != 3) + return op.emitOpError("expected operands to be a source pointer in memory " + "space 0, 1, 3"); + + if (NVVM::WMMAStoreOp::getIntrinsicID(op.m(), op.n(), op.k(), op.layout(), + op.eltype()) == 0) + return op.emitOpError() << "invalid attribute combination"; + std::pair typeInfo = + inferMMAType(op.eltype(), NVVM::MMAFrag::c, op.getContext()); + if (op.args().size() != typeInfo.second) + return op.emitOpError() + << "expected " << typeInfo.second << " data operands"; + if (llvm::any_of(op.args(), [&typeInfo](Value operands) { + return operands.getType() != typeInfo.first; + })) + return op.emitOpError() + << "expected data operands of type " << typeInfo.first; return success(); } -static void printWMMAMmaF16F16M16N16K16Op(OpAsmPrinter &p, - WMMAMmaF16F16M16N16K16Op &op) { - p << ' '; - p << op.args(); - p.printOptionalAttrDict(op->getAttrs(), {}); - p << " : "; - p << op->getOperand(0).getType(); - p << ' ' << "->"; - p << ' '; - p << ::llvm::ArrayRef<::mlir::Type>(op.res().getType()); -} - -static LogicalResult verify(WMMAMmaF32F32M16N16K16Op op) { - unsigned numABOperands = 16; - unsigned numCOperands = 8; - MLIRContext *context = op.getContext(); - auto f16Ty = FloatType::getF16(context); - auto f32Ty = FloatType::getF32(context); - auto f16x2Ty = VectorType::get(2, f16Ty); - auto f32x8StructTy = LLVM::LLVMStructType::getLiteral( - context, {f32Ty, f32Ty, f32Ty, f32Ty, f32Ty, f32Ty, f32Ty, f32Ty}); - - SmallVector abOpTypes; - SmallVector bOpTypes; - SmallVector cOpTypes; - - for (auto operand : op->getOperands().take_front(numABOperands)) { - abOpTypes.push_back(operand.getType()); - } - - for (auto operand : - op->getOperands().drop_front(numABOperands).take_front(numCOperands)) { - cOpTypes.push_back(operand.getType()); +static LogicalResult verify(NVVM::WMMAMmaOp op) { + if (NVVM::WMMAMmaOp::getIntrinsicID(op.m(), op.n(), op.k(), op.layoutA(), + op.layoutB(), op.eltypeA(), + op.eltypeB()) == 0) + return op.emitOpError() << "invalid attribute combination"; + std::pair typeInfoA = + inferMMAType(op.eltypeA(), NVVM::MMAFrag::a, op.getContext()); + std::pair typeInfoB = + inferMMAType(op.eltypeA(), NVVM::MMAFrag::b, op.getContext()); + std::pair typeInfoC = + inferMMAType(op.eltypeB(), NVVM::MMAFrag::c, op.getContext()); + SmallVector arguments; + arguments.append(typeInfoA.second, typeInfoA.first); + arguments.append(typeInfoB.second, typeInfoB.first); + arguments.append(typeInfoC.second, typeInfoC.first); + unsigned numArgs = arguments.size(); + if (op.args().size() != numArgs) + return op.emitOpError() << "expected " << numArgs << " arguments"; + for (unsigned i = 0; i < numArgs; i++) { + if (op.args()[i].getType() != arguments[i]) + return op.emitOpError() + << "expected argument " << i << " to be of type " << arguments[i]; } - - if (abOpTypes != SmallVector(16, f16x2Ty)) - return op.emitOpError("expected 16 s for `a` and `b` operand"); - - if (cOpTypes != SmallVector(8, f32Ty)) - return op.emitOpError("expected 8 f32s for `c` operand"); - - if (op.getResult().getType() != f32x8StructTy) - return op.emitOpError("expected result type to be a struct of 8 f32s"); - + Type dstType = LLVM::LLVMStructType::getLiteral( + op.getContext(), SmallVector(typeInfoC.second, typeInfoC.first)); + if (op.getType() != dstType) + return op.emitOpError("expected destination type is a structure of ") + << typeInfoC.second << " elements of type " << typeInfoC.first; return success(); } diff --git a/mlir/lib/Dialect/Linalg/CMakeLists.txt b/mlir/lib/Dialect/Linalg/CMakeLists.txt index 35c4201f21454..d661b0bf363b7 100644 --- a/mlir/lib/Dialect/Linalg/CMakeLists.txt +++ b/mlir/lib/Dialect/Linalg/CMakeLists.txt @@ -1,4 +1,5 @@ add_subdirectory(Analysis) +add_subdirectory(ComprehensiveBufferize) add_subdirectory(IR) add_subdirectory(Transforms) add_subdirectory(Utils) diff --git a/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/BufferizableOpInterface.cpp b/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/BufferizableOpInterface.cpp new file mode 100644 index 0000000000000..33fdb6fdbd1be --- /dev/null +++ b/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/BufferizableOpInterface.cpp @@ -0,0 +1,17 @@ +//===- BufferizableOpInterface.cpp - Comprehensive Bufferize --------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "mlir/Dialect/Linalg/ComprehensiveBufferize/BufferizableOpInterface.h" + +namespace mlir { +namespace linalg { + +#include "mlir/Dialect/Linalg/ComprehensiveBufferize/BufferizableOpInterface.cpp.inc" + +} // namespace linalg +} // namespace mlir diff --git a/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/CMakeLists.txt b/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/CMakeLists.txt new file mode 100644 index 0000000000000..95f613937b9be --- /dev/null +++ b/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/CMakeLists.txt @@ -0,0 +1,30 @@ +set(LLVM_OPTIONAL_SOURCES + BufferizableOpInterface.cpp + ComprehensiveBufferize.cpp +) + +add_mlir_dialect_library(MLIRBufferizableOpInterface + BufferizableOpInterface.cpp + + DEPENDS + MLIRBufferizableOpInterfaceIncGen + + LINK_LIBS PUBLIC + MLIRIR +) + +add_mlir_dialect_library(MLIRComprehensiveBufferize + ComprehensiveBufferize.cpp + + LINK_LIBS PUBLIC + MLIRBufferizableOpInterface + MLIRInferTypeOpInterface + MLIRIR + MLIRMemRef + MLIRLinalg + MLIRSCF + MLIRStandard + MLIRStandardOpsTransforms + MLIRTensor + MLIRVector +) diff --git a/mlir/lib/Dialect/Linalg/Transforms/ComprehensiveBufferize.cpp b/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/ComprehensiveBufferize.cpp similarity index 62% rename from mlir/lib/Dialect/Linalg/Transforms/ComprehensiveBufferize.cpp rename to mlir/lib/Dialect/Linalg/ComprehensiveBufferize/ComprehensiveBufferize.cpp index 0373867ee1b6e..cf9ad6b5418ac 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/ComprehensiveBufferize.cpp +++ b/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/ComprehensiveBufferize.cpp @@ -105,15 +105,12 @@ // expected layouts after transformations. Combinations of memref.cast + // canonicalization are responsible for clean ups. -#include "mlir/Dialect/Linalg/Transforms/ComprehensiveBufferize.h" +#include "mlir/Dialect/Linalg/ComprehensiveBufferize/ComprehensiveBufferize.h" #include -#include "PassDetail.h" +#include "mlir/Dialect/Linalg/ComprehensiveBufferize/BufferizableOpInterface.h" #include "mlir/Dialect/Linalg/IR/LinalgOps.h" -#include "mlir/Dialect/Linalg/Passes.h" -#include "mlir/Dialect/Linalg/Transforms/Transforms.h" -#include "mlir/Dialect/Linalg/Utils/Utils.h" #include "mlir/Dialect/MemRef/IR/MemRef.h" #include "mlir/Dialect/SCF/SCF.h" #include "mlir/Dialect/Utils/StaticValueUtils.h" @@ -124,8 +121,6 @@ #include "mlir/Pass/Pass.h" #include "mlir/Pass/PassManager.h" #include "mlir/Transforms/BufferUtils.h" -#include "mlir/Transforms/GreedyPatternRewriteDriver.h" -#include "mlir/Transforms/Passes.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/ScopeExit.h" #include "llvm/ADT/SetVector.h" @@ -138,14 +133,9 @@ using namespace mlir; using namespace linalg; using namespace tensor; -using BufferRelation = BufferizationAliasInfo::BufferRelation; - #define DBGS() (llvm::dbgs() << '[' << DEBUG_TYPE << "] ") #define LDBG(X) LLVM_DEBUG(DBGS() << X) -// TODO: from some HW description. -static constexpr int64_t kBufferAlignments = 128; - // Forward declarations. static std::string printOperationInfo(Operation *, bool prefix = true); static std::string printValueInfo(Value, bool prefix = true); @@ -297,7 +287,7 @@ static void setInPlaceOpResult(OpResult opResult, /// result can be buferized inPlace. /// If no InPlaceSpec attribute has been set for `opResult`, return /// InPlaceSpec::None. -static InPlaceSpec getInPlace(OpResult opResult) { +LLVM_ATTRIBUTE_UNUSED static InPlaceSpec getInPlace(OpResult opResult) { if (!opResult) return InPlaceSpec::None; @@ -356,7 +346,7 @@ static void removeBufferizationFuncArguments(BlockArgument bbArg) { LinalgDialect::kInplaceableAttrName); } -LLVM_ATTRIBUTE_UNUSED static InPlaceSpec getInPlace(Value v) { +static InPlaceSpec getInPlace(Value v) { if (auto bbArg = v.dyn_cast()) return getInPlace(bbArg); return getInPlace(v.cast()); @@ -415,231 +405,62 @@ static std::string printValueInfo(Value value, bool prefix) { } //===----------------------------------------------------------------------===// -// Op-specific semantics helper to retrieve matching inplaceable result. -// These should become proper interfaces interfaces when the time is right. -// Modulo better naming, these helpers / interfaces comprise information on: -// 1. Whether an op has a known bufferization behavior (i.e. an instance of -// BufferizableOpInterface). -// 2. Whether an op, when bufferized inplace, can guarantee an -// (OpOperand, OpResult) pair bufferizes to equivalent (i.e. the same) -// buffers in memory. -// 3. Whether an op operand, when bufferized inplace, aliases a return value. -// 4. Whether an op return value, when bufferized inplace, aliases an operand. -// 5. Whether an op bufferizes to a memory read. -// 6. Whether an op bufferizes to a memory write. -// 7. The buffer relationship between an operand and it corresponding result -// (in case of in-place bufferization). -// These interfaces are necessary to distinguish between various cases and allow -// special inplace behavior for (ExtractSliceOp, InsertSliceOp) pairs. +// Helper functions for BufferizableOpInterface //===----------------------------------------------------------------------===// -/// Return `true` if the op is explicitly supported by bufferization or if it -/// has no result tensors. -/// Other cases must be conservative. -static bool hasKnownBufferizationAliasingBehavior(Operation *op) { - return - // clang-format off - isa(op) - // clang-format on - || (none_of(op->getResultTypes(), isaTensor) && - none_of(op->getOperandTypes(), isaTensor)); -} - -/// Return the OpResult that may bufferize into the same buffer as `opOperand` -/// when the op is bufferized inplace. -/// Return null if no such result exists. -static OpResult getInplaceableOpResult(TiledLoopOp op, OpOperand &opOperand) { - return op.getTiedOpResult(opOperand); -} - -/// Return the OpResult that may bufferize into the same buffer as `opOperand` -/// when the op is bufferized inplace. -/// Return null if no such result exists. -static OpResult getInplaceableOpResult(scf::ForOp forOp, OpOperand &opOperand) { - if (!opOperand.get().getType().isa()) - return OpResult(); - return forOp.getResultForOpOperand(opOperand); +/// Determine which OpOperand* will alias with `result` if the op is bufferized +/// in place. Return an empty vector if the op is not bufferizable. +static SmallVector getAliasingOpOperand(OpResult result) { + if (Operation *op = result.getDefiningOp()) + if (auto bufferizableOp = dyn_cast(op)) + return bufferizableOp.getAliasingOpOperand(result); + return {}; } -/// Return the OpResult that may bufferize into the same buffer as `opOperand` -/// when the op is bufferized inplace. -/// Return null if no such result exists. -static OpResult getInplaceableOpResult(LinalgOp linalgOp, - OpOperand &opOperand) { - if (!opOperand.get().getType().isa()) - return OpResult(); - // For now assume inputs are never inplaceable. - // TODO: refine this. - if (opOperand.getOperandNumber() < linalgOp.getNumInputs()) - return OpResult(); - int64_t outputOperandIndex = - opOperand.getOperandNumber() - linalgOp.getNumInputs(); - int64_t numOutputBuffers = 0; - for (unsigned idx = 0; idx < outputOperandIndex; ++idx) - if (!linalgOp.getOutputOperand(idx)->get().getType().isa()) - ++numOutputBuffers; - return linalgOp->getResult(outputOperandIndex - numOutputBuffers); -} - -/// Return the OpResult that may bufferize into the same buffer as `opOperand` -/// when the op is bufferized inplace. -/// Return null if no such result exists. -static OpResult getInplaceableOpResult(VectorTransferOpInterface op, - OpOperand &opOperand) { - if (opOperand.get() != op.source() || - !op.source().getType().isa() || - isa(op)) - return OpResult(); - return op->getResult(0); +/// Determine which OpResult will alias with `opOperand` if the op is bufferized +/// in place. Return an empty OpResult if the op is not bufferizable. +static OpResult getAliasingOpResult(OpOperand &opOperand) { + if (auto bufferizableOp = + dyn_cast(opOperand.getOwner())) + return bufferizableOp.getAliasingOpResult(opOperand); + return OpResult(); } -/// Return the OpResult that may bufferize into the same buffer as `opOperand` -/// when the op is bufferized inplace. -/// Return null if no such result exists. -static OpResult getInplaceableOpResult(InsertSliceOp op, OpOperand &opOperand) { - if (&opOperand != &op->getOpOperand(1) /*dest*/) - return OpResult(); - return op->getResult(0); -} - -/// Return the OpResult that may bufferize into the same buffer as `opOperand` -/// when the op is bufferized inplace. -/// The inplace analysis uses this information along with interfering read -/// analysis to determine which op results reuse the same buffer as some -/// operand. -static OpResult getInplaceableOpResult(OpOperand &opOperand) { - return TypeSwitch(opOperand.getOwner()) - // clang-format off - // Ops that perform destructive updates on operand(s) to produce - // result(s). - .Case( - [&](auto op) { return getInplaceableOpResult(op, opOperand); }) - // Some ops just return an alias to an operand when bufferized inplace. - // Such OpResults are never inplaceable on an OpOperand. - .Case( - [] (auto op) { return OpResult(); }) - // CallOpInterface is special, it needs to wait for the callee to be - // bufferized and needs to inspect the BufferAliasInfo object. It can't - // make a proper determination by itself and needs to be conservative. - .Case([&](CallOpInterface op) { return OpResult(); }) - // Other ops. - .Default([&](Operation *op) { return OpResult(); }); - // clang-format on -} - -/// Either one of the corresponding yield values from the then/else branches -/// may alias with the result. -static void populateAliasingOpOperands(scf::IfOp op, OpResult result, - SmallVector &operands) { - size_t resultNum = std::distance(op->getOpResults().begin(), - llvm::find(op->getOpResults(), result)); - operands.push_back(&op.thenYield()->getOpOperand(resultNum)); - operands.push_back(&op.elseYield()->getOpOperand(resultNum)); -} +/// Return true if `opOperand` bufferizes to a memory read. Return `true` if the +/// op is not bufferizable. +static bool bufferizesToMemoryRead(OpOperand &opOperand) { + if (auto bufferizableOp = + dyn_cast(opOperand.getOwner())) + return bufferizableOp.bufferizesToMemoryRead(opOperand); -/// Determine which OpOperand* will alias with `result` if the op is bufferized -/// in place. Note that multiple OpOperands can may potentially alias with an -/// OpResult. E.g.: std.select in the future. -static SmallVector getAliasingOpOperand(OpResult result) { - SmallVector r; - // Unknown ops are handled conservatively and never bufferize in-place. - if (!hasKnownBufferizationAliasingBehavior(result.getDefiningOp())) - return SmallVector(); - TypeSwitch(result.getDefiningOp()) - .Case([&](scf::IfOp op) { populateAliasingOpOperands(op, result, r); }) - .Case( - [&](auto op) { r.push_back(&op->getOpOperand(0)); }) - // In the case of scf::ForOp, this currently assumes the iter_args / yield - // are 1-1. This may fail and is verified at the end. - // TODO: update this. - .Case([&](scf::ForOp op) { - r.push_back(&op.getIterOpOperands()[result.getResultNumber()]); - }) - .Case([&](InsertSliceOp op) { r.push_back(&op->getOpOperand(1)); }) - .Case([&](LinalgOp op) { - r.push_back(op.getOutputTensorOperands()[result.getResultNumber()]); - }) - .Case([&](TiledLoopOp op) { - // TODO: TiledLoopOp helper method to avoid leaking impl details. - r.push_back(&op->getOpOperand(op.getNumControlOperands() + - op.getNumInputs() + - result.getResultNumber())); - }) - .Case([&](vector::TransferWriteOp op) { - r.push_back(&op->getOpOperand(1)); - }) - .Case( - [&](auto op) {}) - .Default([&](Operation *op) { - op->dump(); - llvm_unreachable("unexpected defining op"); - }); - return r; + // Unknown op that returns a tensor. The inplace analysis does not support it. + // Conservatively return true. + return true; } -/// If the an ExtractSliceOp is bufferized in-place, the source operand will -/// alias with the result. -static OpResult getAliasingOpResult(ExtractSliceOp op, OpOperand &opOperand) { - if (&op->getOpOperand(0) == &opOperand) - return op->getResult(0); - return OpResult(); -} +/// Return true if `opOperand` bufferizes to a memory write. Return +/// `true` if the op is not bufferizable. +static bool bufferizesToMemoryWrite(OpOperand &opOperand) { + if (auto bufferizableOp = + dyn_cast(opOperand.getOwner())) + return bufferizableOp.bufferizesToMemoryWrite(opOperand); -/// If the a tensor::CastOp is bufferized in-place, the source operand will -/// alias with the result. -static OpResult getAliasingOpResult(tensor::CastOp op, OpOperand &opOperand) { - if (&op->getOpOperand(0) == &opOperand) - return op->getResult(0); - return OpResult(); + // Unknown op that returns a tensor. The inplace analysis does not support it. + // Conservatively return true. + return true; } -/// Determine which OpResult will alias with `opOperand` if the op is bufferized -/// in place. This is a superset of `getInplaceableOpResult`. -/// TODO: in the future this may need to evolve towards a list of OpResult. -static OpResult getAliasingOpResult(OpOperand &opOperand) { - return TypeSwitch(opOperand.getOwner()) - // Some ops are different: Their result is not inplaceable on an OpOperand - // but when bufferized inplace, their result is aliasing (a subregion of) - // an OpOperand. - .Case( - [&](auto op) { return getAliasingOpResult(op, opOperand); }) - // All other ops, return the result of `getInplaceableOpResult`. - .Default( - [&](Operation *op) { return getInplaceableOpResult(opOperand); }); -} - -/// Return `true` if the given OpOperand does not bufferize to a memory read or -/// write, but creates an alias when bufferized inplace. +/// Return true if `opOperand` does neither read nor write but bufferizes to an +/// alias. Return false if the op is not bufferizable. static bool bufferizesToAliasOnly(OpOperand &opOperand) { - Operation *owner = opOperand.getOwner(); - // TODO: In the future this may need to evolve into a TypeSwitch. For all - // currently supported ops, the aliasing-only OpOperand is always the first - // one. - return isa(owner) && - &opOperand == &owner->getOpOperand(0); -} + if (auto bufferizableOp = + dyn_cast(opOperand.getOwner())) + return bufferizableOp.bufferizesToAliasOnly(opOperand); -// Predeclaration of function. -static bool bufferizesToMemoryRead(OpOperand &opOperand); + // Unknown op that returns a tensor. The inplace analysis does not support it. + // Conservatively return false. + return false; +} /// Return true if the given value is read by an op that bufferizes to a memory /// read. Also takes into account ops that create an alias but do not read by @@ -651,7 +472,7 @@ static bool isValueRead(Value value) { while (!workingSet.empty()) { OpOperand *uMaybeReading = workingSet.pop_back_val(); - // Skip over all ops that create an alias but do not read. + // Skip over all ops that neither read nor write (but create an alias). if (bufferizesToAliasOnly(*uMaybeReading)) for (OpOperand &use : getAliasingOpResult(*uMaybeReading).getUses()) workingSet.push_back(&use); @@ -662,78 +483,40 @@ static bool isValueRead(Value value) { return false; } -/// Return true if `opOperand` bufferizes to a memory read. -static bool bufferizesToMemoryRead(OpOperand &opOperand) { - // Unknown op that returns a tensor. The inplace analysis does not support - // it. Conservatively return true. - if (!hasKnownBufferizationAliasingBehavior(opOperand.getOwner())) - return true; - // Some ops alone do not bufferize to a memory read, but one of their uses - // may. - if (bufferizesToAliasOnly(opOperand)) - return false; - // scf::ForOp alone doesn't bufferize to a memory read, one of the uses of its - // matching bbArg may. - if (auto forOp = dyn_cast(opOperand.getOwner())) - return isValueRead(forOp.getRegionIterArgForOpOperand(opOperand)); - // TiledLoop alone doesn't bufferize to a memory read, one of the uses of its - // matching bbArg may. - if (auto tiledLoopOp = dyn_cast(opOperand.getOwner())) - return isValueRead(tiledLoopOp.getTiedBlockArgument(opOperand)); - // CallOpInterface alone doesn't bufferize to a memory read, one of the uses - // of the matching bbArg may. It is the responsibility of the caller to - // inspect bbArgs. In the absence of a BufferizationAliasInfo, we need to be - // conservative. - if (auto callOp = dyn_cast(opOperand.getOwner())) - return true; - if (auto linalgOp = dyn_cast(opOperand.getOwner())) - return linalgOp.isInputTensor(&opOperand) || - linalgOp.isInitTensor(&opOperand); - // All other cases are considered to bufferize to memory reads. - // In particular, terminators are often the last use and need to be considered - // as reads to return the proper value and avoid WAW clobbers. - return true; -} - -/// Return true if `opOperand` bufferizes to a memory write. -static bool bufferizesToMemoryWrite(OpOperand &opOperand) { - // These terminators are not writes. - if (isa(opOperand.getOwner())) - return false; - // Some ops alone do not bufferize to a memory write, but one of their uses - // may. - if (bufferizesToAliasOnly(opOperand)) - return false; - // CallOpInterface alone doesn't bufferize to a memory write, one of the uses - // of the matching bbArg may. It is the responsibility of the caller to - // inspect bbArgs. In the absence of a BufferizationAliasInfo, we need to be - // conservative. - if (auto callOp = dyn_cast(opOperand.getOwner())) - return true; - // Unknown op that returns a tensor. The inplace analysis does not support - // it. Conservatively return true. - if (!hasKnownBufferizationAliasingBehavior(opOperand.getOwner())) - return true; - OpResult opResult = getAliasingOpResult(opOperand); - // Only supported op with a matching result for opOperand bufferize to a - // write. E.g., ReturnOp does not bufferize to a write. - return static_cast(opResult); -} +/// Return the relationship between the operand and the its corresponding +/// OpResult that it may alias with. Return None if the op is not bufferizable. +static BufferRelation bufferRelation(OpOperand &opOperand) { + if (auto bufferizableOp = + dyn_cast(opOperand.getOwner())) + return bufferizableOp.bufferRelation(opOperand); -/// Returns the relationship between the operand and the its corresponding -/// OpResult that it may alias with. -static BufferRelation bufferRelation(OpOperand &operand) { - return TypeSwitch(operand.getOwner()) - // ExtractSliceOp returns a subview of the original tensor. - .Case([&](ExtractSliceOp op) { return BufferRelation::None; }) - // All other ops: Buffers are equivalent. - .Default([&](Operation *op) { return BufferRelation::Equivalent; }); + // Unknown op that returns a tensor. The inplace analysis does not support it. + // Conservatively return None. + return BufferRelation::None; } //===----------------------------------------------------------------------===// // Bufferization-specific alias analysis. //===----------------------------------------------------------------------===// +/// Return true if the (ExtractSliceOp, InsertSliceOp) pair match (i.e. +/// equivalent operand / result and same offset/sizes/strides specification). +/// +/// This is one particular type of relationship between ops on tensors that +/// reduce to an equivalence on buffers. This should be generalized and +/// exposed as interfaces on the proper types. +static bool +areEquivalentExtractSliceOps(const BufferizationAliasInfo &aliasInfo, + ExtractSliceOp st, InsertSliceOp sti) { + if (!st || !sti) + return false; + if (!aliasInfo.areEquivalentBufferizedValues(st.source(), sti.dest())) + return false; + if (!sameOffsetsSizesAndStrides(st, sti, isEqualConstantIntOrValue)) + return false; + return true; +} + /// Return true if opOperand has been decided to bufferize in-place. static bool isInplaceMemoryWrite(OpOperand &opOperand) { // Ops that do not bufferize to a memory write, cannot be write in-place. @@ -793,36 +576,43 @@ void BufferizationAliasInfo::insertNewBufferEquivalence(Value newValue, /// Return true if, under current bufferization decisions, the buffer of `value` /// is not writable. -bool BufferizationAliasInfo::aliasesNonWritableBuffer(Value value) const { +static bool aliasesNonWritableBuffer(Value value, + const BufferizationAliasInfo &aliasInfo) { LDBG("----Start aliasesNonWritableBuffer\n"); - for (Value v : getAliases(value)) { + bool foundNonWritableBuffer = false; + aliasInfo.applyOnAliases(value, [&](Value v) { LDBG("-----------examine: " << printValueInfo(v) << '\n'); - if (bufferizesToWritableMemory(v)) { + if (aliasInfo.bufferizesToWritableMemory(v)) { LDBG("-----------Value is known to be writable -> skip: " << printValueInfo(v) << '\n'); - continue; + return; } if (auto bbArg = v.dyn_cast()) { if (getInPlace(bbArg) == InPlaceSpec::True) { LDBG("-----------bbArg is writable -> skip: " << printValueInfo(bbArg) << '\n'); - continue; + return; } LDBG("-----------notWritable bbArg\n"); - return true; + foundNonWritableBuffer = true; + return; } - if (Operation *op = v.getDefiningOp()) { - if (isa(op) || - !hasKnownBufferizationAliasingBehavior(op)) { - LDBG("-----------notWritable op\n"); - return true; - } + auto bufferizableOp = dyn_cast(v.getDefiningOp()); + if (!bufferizableOp || !bufferizableOp.isWritable(v.cast())) { + // Unknown ops are treated conservatively: Assume that it is illegal to + // write to their OpResults in-place. + LDBG("-----------notWritable op\n"); + foundNonWritableBuffer = true; + return; } - } - LDBG("---->value is writable\n"); - return false; + }); + + if (!foundNonWritableBuffer) + LDBG("---->value is writable\n"); + + return foundNonWritableBuffer; } bool BufferizationAliasInfo::bufferizesToWritableMemory(Value v) const { @@ -836,20 +626,26 @@ void BufferizationAliasInfo::setBufferizesToWritableMemory(Value v) { /// Return true if the buffer to which `operand` would bufferize is equivalent /// to some buffer write. -bool BufferizationAliasInfo::aliasesInPlaceWrite(Value value) const { +static bool aliasesInPlaceWrite(Value value, + const BufferizationAliasInfo &aliasInfo) { LDBG("----Start aliasesInPlaceWrite\n"); LDBG("-------for : " << printValueInfo(value) << '\n'); - for (Value v : getAliases(value)) { + bool foundInplaceWrite = false; + aliasInfo.applyOnAliases(value, [&](Value v) { for (auto &use : v.getUses()) { if (isInplaceMemoryWrite(use)) { LDBG("-----------wants to bufferize to inPlace write: " << printOperationInfo(use.getOwner()) << '\n'); - return true; + foundInplaceWrite = true; + return; } } - } - LDBG("----------->does not alias an inplace write\n"); - return false; + }); + + if (!foundInplaceWrite) + LDBG("----------->does not alias an inplace write\n"); + + return foundInplaceWrite; } /// Set the inPlace bufferization spec to true. @@ -935,13 +731,14 @@ static Value findLastPrecedingWrite(Value value) { Operation *op = value.getDefiningOp(); if (!op) return true; - if (!hasKnownBufferizationAliasingBehavior(op)) + auto bufferizableOp = dyn_cast(op); + if (!bufferizableOp) return true; if (isa(op)) return true; SmallVector opOperands = - getAliasingOpOperand(value.cast()); + bufferizableOp.getAliasingOpOperand(value.cast()); assert(opOperands.size() <= 1 && "op with multiple aliasing OpOperands not expected"); @@ -956,11 +753,11 @@ static Value findLastPrecedingWrite(Value value) { /// Return true if `value` is originating from an ExtractSliceOp that matches /// the given InsertSliceOp. -bool BufferizationAliasInfo::hasMatchingExtractSliceOp( - Value value, InsertSliceOp insertOp) const { +static bool hasMatchingExtractSliceOp(const BufferizationAliasInfo &aliasInfo, + Value value, InsertSliceOp insertOp) { auto condition = [&](Value val) { if (auto extractOp = val.getDefiningOp()) - if (areEquivalentExtractSliceOps(extractOp, insertOp)) + if (areEquivalentExtractSliceOps(aliasInfo, extractOp, insertOp)) return true; return false; }; @@ -991,10 +788,11 @@ static bool happensBefore(Operation *a, Operation *b, /// A conflict is: According to SSA use-def chains, a read R is supposed to read /// the result of a write W1. But because of bufferization decisions, R actually /// reads another write W2. -bool BufferizationAliasInfo::hasReadAfterWriteInterference( - const DenseSet &usesRead, - const DenseSet &usesWrite, - const DominanceInfo &domInfo) const { +static bool +hasReadAfterWriteInterference(const DenseSet &usesRead, + const DenseSet &usesWrite, + const DominanceInfo &domInfo, + const BufferizationAliasInfo &aliasInfo) { for (OpOperand *uRead : usesRead) { Operation *readingOp = uRead->getOwner(); @@ -1075,7 +873,8 @@ bool BufferizationAliasInfo::hasReadAfterWriteInterference( // TODO: Use insertSliceOp.getDestOpOperand etc. when available. if (uRead == &insertSliceOp->getOpOperand(1) /*dest*/ && - hasMatchingExtractSliceOp(uConflictingWrite->get(), insertSliceOp)) + hasMatchingExtractSliceOp(aliasInfo, uConflictingWrite->get(), + insertSliceOp)) // Case 1: The main insight is that InsertSliceOp reads only part of // the destination tensor. The overwritten area is not read. If // uConflictingWrite writes into exactly the memory location that is @@ -1092,7 +891,7 @@ bool BufferizationAliasInfo::hasReadAfterWriteInterference( if (uRead == &insertSliceOp->getOpOperand(0) /*source*/ && uConflictingWrite == &insertSliceOp->getOpOperand(1) /*dest*/ && - hasMatchingExtractSliceOp(uRead->get(), insertSliceOp)) + hasMatchingExtractSliceOp(aliasInfo, uRead->get(), insertSliceOp)) // Case 2: The read of the source tensor and the write to the dest // tensor via an InsertSliceOp is not a conflict if the read is // reading exactly that part of an equivalent tensor that the @@ -1135,8 +934,9 @@ bool BufferizationAliasInfo::hasReadAfterWriteInterference( /// * However, adding an alias {%0, %t} would mean that the second /// TransferWriteOp overwrites the first one. Therefore, the TransferReadOp /// would no longer be reading the result of %1. -bool BufferizationAliasInfo::wouldCreateReadAfterWriteInterference( - OpOperand &operand, OpResult result, const DominanceInfo &domInfo) const { +bool wouldCreateReadAfterWriteInterference( + OpOperand &operand, OpResult result, const DominanceInfo &domInfo, + const BufferizationAliasInfo &aliasInfo) { #ifndef NDEBUG SmallVector opOperands = getAliasingOpOperand(result); assert(llvm::find(opOperands, &operand) != opOperands.end() && @@ -1145,20 +945,22 @@ bool BufferizationAliasInfo::wouldCreateReadAfterWriteInterference( // Helper function to iterate on aliases of `root` and capture the reads. auto getAliasingReads = [&](DenseSet &res, Value root) { - for (Value alias : getAliases(root)) + aliasInfo.applyOnAliases(root, [&](Value alias) { for (auto &use : alias.getUses()) // Read to a value that aliases root. if (bufferizesToMemoryRead(use)) res.insert(&use); + }); }; // Helper function to iterate on aliases of `root` and capture the writes. auto getAliasingInplaceWrites = [&](DenseSet &res, Value root) { - for (Value alias : getAliases(root)) + aliasInfo.applyOnAliases(root, [&](Value alias) { for (auto &use : alias.getUses()) // Inplace write to a value that aliases root. if (isInplaceMemoryWrite(use)) res.insert(&use); + }); }; // Collect reads and writes of all aliases of OpOperand and OpResult. @@ -1170,13 +972,14 @@ bool BufferizationAliasInfo::wouldCreateReadAfterWriteInterference( if (bufferizesToMemoryWrite(operand)) usesWrite.insert(&operand); - return hasReadAfterWriteInterference(usesRead, usesWrite, domInfo); + return hasReadAfterWriteInterference(usesRead, usesWrite, domInfo, aliasInfo); } /// Return true if bufferizing `opOperand` inplace with `opResult` would create /// a write to a non-writable buffer. -bool BufferizationAliasInfo::wouldCreateWriteToNonWritableBuffer( - OpOperand &opOperand, OpResult opResult) const { +static bool +wouldCreateWriteToNonWritableBuffer(OpOperand &opOperand, OpResult opResult, + const BufferizationAliasInfo &aliasInfo) { #ifndef NDEBUG SmallVector opOperands = getAliasingOpOperand(opResult); assert(llvm::find(opOperands, &opOperand) != opOperands.end() && @@ -1186,15 +989,15 @@ bool BufferizationAliasInfo::wouldCreateWriteToNonWritableBuffer( // Certain buffers are not writeable: // 1. A function bbArg that is not inplaceable or // 2. A constant op. - assert(!aliasesNonWritableBuffer(opResult) && + assert(!aliasesNonWritableBuffer(opResult, aliasInfo) && "expected that opResult does not alias non-writable buffer"); - bool nonWritable = aliasesNonWritableBuffer(opOperand.get()); + bool nonWritable = aliasesNonWritableBuffer(opOperand.get(), aliasInfo); if (!nonWritable) return false; // This is a problem only if the buffer is written to via some alias. - bool hasWrite = aliasesInPlaceWrite(opResult) || - aliasesInPlaceWrite(opOperand.get()) || + bool hasWrite = aliasesInPlaceWrite(opResult, aliasInfo) || + aliasesInPlaceWrite(opOperand.get(), aliasInfo) || bufferizesToMemoryWrite(opOperand); if (!hasWrite) return false; @@ -1203,35 +1006,22 @@ bool BufferizationAliasInfo::wouldCreateWriteToNonWritableBuffer( return true; } -/// Return true if the source of a `insertSliceOp` bufferizes to an -/// equivalent ExtractSliceOp that bufferizes inplace. -bool BufferizationAliasInfo::isSourceEquivalentToAMatchingInplaceExtractSliceOp( - InsertSliceOp insertSliceOp) const { - LDBG("isSourceEquivalentToAMatchingInplaceExtractSliceOp: " << *insertSliceOp - << '\n'); - auto leaderIt = equivalentInfo.findLeader(insertSliceOp.source()); - for (auto mit = leaderIt, meit = equivalentInfo.member_end(); mit != meit; - ++mit) { - auto extractSliceOp = - dyn_cast_or_null(mit->v.getDefiningOp()); - if (extractSliceOp && - areEquivalentExtractSliceOps(extractSliceOp, insertSliceOp) && - getInPlace(extractSliceOp.result()) == InPlaceSpec::True) { - LDBG("\tfound: " << *mit->v.getDefiningOp() << '\n'); - return true; - } - } - LDBG("\tnot equivalent\n"); - return false; -} - /// Apply `fun` to all the members of the equivalence class of `v`. void BufferizationAliasInfo::applyOnEquivalenceClass( Value v, function_ref fun) const { auto leaderIt = equivalentInfo.findLeader(v); for (auto mit = leaderIt, meit = equivalentInfo.member_end(); mit != meit; ++mit) { - fun(mit->v); + fun(*mit); + } +} + +/// Apply `fun` to all aliases of `v`. +void BufferizationAliasInfo::applyOnAliases( + Value v, function_ref fun) const { + auto leaderIt = aliasInfo.findLeader(v); + for (auto mit = leaderIt, meit = aliasInfo.member_end(); mit != meit; ++mit) { + fun(*mit); } } @@ -1291,20 +1081,6 @@ void BufferizationAliasInfo::dumpEquivalences() const { printEquivalences(llvm::errs()); } -/// This is one particular type of relationship between ops on tensors that -/// reduce to an equivalence on buffers. This should be generalized and exposed -/// as interfaces on the proper types. -bool BufferizationAliasInfo::areEquivalentExtractSliceOps( - ExtractSliceOp st, InsertSliceOp sti) const { - if (!st || !sti) - return false; - if (!equivalentInfo.isEquivalent(st.source(), sti.dest())) - return false; - if (!sameOffsetsSizesAndStrides(st, sti, isEqualConstantIntOrValue)) - return false; - return true; -} - //===----------------------------------------------------------------------===// // Forward declarations. //===----------------------------------------------------------------------===// @@ -1423,6 +1199,72 @@ Operation *getFirstParentOfType(Value v) { return nullptr; } +/// Helper function that creates a memref::DimOp or tensor::DimOp depending on +/// the type of `source`. +static Value createOrFoldDimOp(OpBuilder &b, Location loc, Value source, + int64_t dim) { + if (source.getType().isa()) + return b.createOrFold(loc, source, dim); + if (source.getType().isa()) + return b.createOrFold(loc, source, dim); + llvm_unreachable("Expected MemRefType or TensorType"); +} + +/// Compute the type of the `memref` to use for allocating the buffer for +/// `shapedValue`. Also returns (by reference in `dynShape`), the value for the +/// dynamic dimensions in the returned `memref` type. The function also sets the +/// insertion point of the builder `b` to the position where the allocation is +/// to be inserted. +static MemRefType getAllocationTypeAndShape(OpBuilder &b, Location loc, + Value shapedValue, + SmallVectorImpl &dynShape) { + MemRefType allocMemRefType = + getContiguousMemRefType(shapedValue.getType().cast()); + if (auto bbArg = shapedValue.dyn_cast()) { + b.setInsertionPointToStart(bbArg.getOwner()); + loc = bbArg.getOwner()->getParentOp()->getLoc(); + } else { + b.setInsertionPoint(shapedValue.getDefiningOp()); + loc = shapedValue.getDefiningOp()->getLoc(); + } + + // Compute the dynamic part of the shape. + bool foundDynamicShapes = false; + if (auto rankedOp = dyn_cast_or_null( + shapedValue.getDefiningOp())) { + ReifiedRankedShapedTypeDims resultDims; + if (succeeded(rankedOp.reifyResultShapes(b, resultDims))) { + foundDynamicShapes = true; + OpResult resultValue = shapedValue.dyn_cast(); + auto &shape = resultDims[resultValue.getResultNumber()]; + for (auto dim : enumerate(allocMemRefType.getShape())) + if (dim.value() == ShapedType::kDynamicSize) + dynShape.push_back(shape[dim.index()]); + } + } + if (!foundDynamicShapes) { + for (auto dim : enumerate(allocMemRefType.getShape())) + if (dim.value() == ShapedType::kDynamicSize) + dynShape.push_back(createOrFoldDimOp(b, loc, shapedValue, dim.index())); + } + + // If the buffer is statically shaped, try to hoist it to the first enclosing + // parallel region. + // TODO: this concept of parallel region and threadlocal needs interfaces. + // TODO: also hoist in the dynamic case. For now this relies on subsequent + // calls to LICM and buffer hoisting which will most likely not succeed. + // TODO: when packing, allocate a static bounding box which will enable more + // hoisting. + if (dynShape.empty()) { + Operation *parent = + getFirstParentOfType(shapedValue); + if (parent) + b.setInsertionPointToStart(&(parent->getRegion(0).front())); + } + return allocMemRefType; +} + /// Create an Allocop/DeAllocOp pair, where the AllocOp is after /// `shapedValue.getDefiningOp` (or at the top of the block in case of a /// bbArg) and the DeallocOp is at the end of the block. @@ -1432,20 +1274,26 @@ static Value createNewAllocDeallocPairForShapedValue( // Take a guard before anything else. OpBuilder::InsertionGuard g(b); + // 1. Create memory allocation. assert(shapedValue.getType().isa()); MemRefType memRefType = shapedValue.getType().dyn_cast(); - - Optional allocated = allocationFns.allocationFn(b, loc, shapedValue); + SmallVector dynShape; + // Note: getAllocationTypeAndShape also sets the insertion point. + MemRefType allocMemRefType = + getAllocationTypeAndShape(b, loc, shapedValue, dynShape); + Optional allocated = + allocationFns.allocationFn(b, loc, allocMemRefType, dynShape); // TODO: For now just assert the value is returned. Eventually need to // error-propagate. assert(allocated && "allocation failed"); Value casted = allocated.getValue(); - MemRefType allocMemRefType = allocated->getType().cast(); if (memRefType && memRefType != allocMemRefType) { casted = b.create(loc, memRefType, allocated.getValue()); aliasInfo.insertNewBufferEquivalence(casted, allocated.getValue()); } + // 2. Create memory deallocation. + b.setInsertionPoint(allocated.getValue().getParentBlock()->getTerminator()); allocationFns.deallocationFn(b, loc, allocated.getValue()); return casted; } @@ -1499,7 +1347,7 @@ static Value getResultBuffer(OpBuilder &b, OpResult result, if (!skipCopy) { // Set insertion point now that potential alloc/dealloc are introduced. b.setInsertionPoint(op); - b.create(loc, operandBuffer, resultBuffer); + allocationFns.memCpyFn(b, loc, operandBuffer, resultBuffer); } return resultBuffer; } @@ -1508,83 +1356,6 @@ static Value getResultBuffer(OpBuilder &b, OpResult result, return operandBuffer; } -/// Helper function for LinalgOp bufferization. -/// When allocating a new buffer, analyze whether `op` wants to read form that -/// buffer. Only in that case, a copy of the result buffer may be needed. -static LogicalResult allocateBuffersForResults( - OpBuilder &b, Location loc, LinalgOp op, - SmallVectorImpl &resultBuffers, BlockAndValueMapping &bvm, - BufferizationAliasInfo &aliasInfo, AllocationCallbacks &allocationFns) { - // Take a guard before anything else. - OpBuilder::InsertionGuard g(b); - b.setInsertionPoint(op); - - // TODO: provide the proper interface to iterate on OpResults and get the - // matching OpOperands. - for (OpOperand *opOperand : op.getOutputOperands()) { - OpResult opResult = getInplaceableOpResult(*opOperand); - assert(opResult && "could not find correspond OpResult"); - bool skipCopy = !op.payloadUsesValueFromOperand(opOperand); - Value resultBuffer = - getResultBuffer(b, opResult, bvm, aliasInfo, allocationFns, skipCopy); - if (!resultBuffer) - return failure(); - resultBuffers.push_back(resultBuffer); - } - - if (op->getNumResults()) - map(bvm, op->getResults(), resultBuffers); - - return success(); -} - -/// Generic conversion for any LinalgOp on tensors. -static LogicalResult bufferize(OpBuilder &b, LinalgOp op, - BlockAndValueMapping &bvm, - BufferizationAliasInfo &aliasInfo, - AllocationCallbacks &allocationFns) { - // Take a guard before anything else. - OpBuilder::InsertionGuard g(b); - - // Ensure op has only tensors. Allow mixed tensor-buffer mode on a per-need - // basis. - if (!op.hasTensorSemantics()) - return op->emitError() << "op does not have tensor semantics"; - - Location loc = op.getLoc(); - SmallVector newInputBuffers; - newInputBuffers.reserve(op.getNumInputs()); - for (OpOperand *opOperand : op.getInputOperands()) { - if (op.isScalar(opOperand)) { - newInputBuffers.push_back(opOperand->get()); - continue; - } - newInputBuffers.push_back(lookup(bvm, opOperand->get())); - assert(newInputBuffers.back() && "missing buffer"); - } - SmallVector newOutputBuffers; - // Try to allocate new buffers depending on op's inplace semantics. - if (failed(allocateBuffersForResults(b, loc, op, newOutputBuffers, bvm, - aliasInfo, allocationFns))) - return failure(); - - // Clone the newly bufferized op. - SmallVector newOperands = newInputBuffers; - newOperands.append(newOutputBuffers.begin(), newOutputBuffers.end()); - - // Set insertion point now that potential alloc/dealloc are introduced. - b.setInsertionPoint(op); - op.clone(b, loc, /*resultTypes=*/TypeRange{}, newOperands); - - // Replace the results of the old op with the new output buffers. - if (op->getNumResults()) - map(bvm, op->getResults(), newOutputBuffers); - - // The original op will be DCE'd away later. - - return success(); -} - /// In a first approximation, all the function arguments of a FuncOp are marked /// inplaceable. For now, it is the responsibility of the `callOp` bufferization /// to allow FuncOp that are inplaceable to write inPlace. @@ -1726,144 +1497,8 @@ bufferize(OpBuilder &b, CallOpInterface callOp, BlockAndValueMapping &bvm, return success(); } -/// tensor::CastOp bufferizes to memref::CastOp. -static LogicalResult bufferize(OpBuilder &b, tensor::CastOp castOp, - BlockAndValueMapping &bvm, - BufferizationAliasInfo &aliasInfo, - AllocationCallbacks &allocationFn) { - // Take a guard before anything else. - OpBuilder::InsertionGuard g(b); - b.setInsertionPoint(castOp); - - Value resultBuffer = - getResultBuffer(b, castOp->getResult(0), bvm, aliasInfo, allocationFn); - if (!resultBuffer) - return failure(); - Type sourceType = resultBuffer.getType(); - auto rankedMemRefType = sourceType.dyn_cast(); - auto unrankedMemRefType = sourceType.dyn_cast(); - assert(rankedMemRefType || unrankedMemRefType); - Attribute memorySpace = rankedMemRefType - ? rankedMemRefType.getMemorySpace() - : unrankedMemRefType.getMemorySpace(); - TensorType tensorType = castOp.getResult().getType().cast(); - MemRefLayoutAttrInterface layout = - rankedMemRefType && tensorType.isa() - ? rankedMemRefType.getLayout() - : MemRefLayoutAttrInterface(); - Type memRefType = getContiguousOrUnrankedMemRefType( - castOp.getResult().getType(), layout, memorySpace); - Value res = - b.create(castOp.getLoc(), memRefType, resultBuffer); - aliasInfo.insertNewBufferEquivalence(res, castOp.getResult()); - map(bvm, castOp.getResult(), res); - return success(); -} - -static LogicalResult bufferize(OpBuilder &b, arith::ConstantOp constantOp, - BlockAndValueMapping &bvm, - BufferizationAliasInfo &aliasInfo) { - assert(constantOp.getType().dyn_cast() && - "not a constant ranked tensor"); - auto moduleOp = constantOp->getParentOfType(); - if (!moduleOp) { - return constantOp.emitError( - "cannot bufferize constants not within builtin.module op"); - } - GlobalCreator globalCreator(moduleOp); - - // Take a guard before anything else. - OpBuilder::InsertionGuard g(b); - b.setInsertionPoint(constantOp); - - auto globalMemref = globalCreator.getGlobalFor(constantOp); - Value memref = b.create( - constantOp.getLoc(), globalMemref.type(), globalMemref.getName()); - aliasInfo.insertNewBufferEquivalence(memref, constantOp.getResult()); - map(bvm, constantOp, memref); - - return success(); -} - -/// DimOp tensor operand is modified inplace. This allows leaving dead -/// tensors behind that will get DCE'd. -static LogicalResult bufferize(OpBuilder &b, tensor::DimOp dimOp, - BlockAndValueMapping &bvm, - BufferizationAliasInfo &aliasInfo) { - // Take a guard before anything else. - OpBuilder::InsertionGuard g(b); - b.setInsertionPoint(dimOp); - - if (dimOp.source().getType().isa()) { - Value v = lookup(bvm, dimOp.source()); - assert(v && "missing buffer"); - dimOp.result().replaceAllUsesWith( - b.create(dimOp.getLoc(), v, dimOp.index())); - } - return success(); -} - -static LogicalResult bufferize(OpBuilder &b, scf::ForOp forOp, - BlockAndValueMapping &bvm, - BufferizationAliasInfo &aliasInfo, - AllocationCallbacks &allocationFn) { - // Take a guard before anything else. - OpBuilder::InsertionGuard g(b); - - for (OpResult opResult : forOp->getResults()) { - if (!opResult.getType().isa()) - continue; - // TODO: Atm we bail on unranked TensorType because we don't know how to - // alloc an UnrankedMemRefType + its underlying ranked MemRefType. - assert(opResult.getType().isa() && - "unsupported unranked tensor"); - - // TODO: More general: Matching bbArg does not bufferize to a read. - Value resultBuffer = - getResultBuffer(b, opResult, bvm, aliasInfo, allocationFn); - if (!resultBuffer) - return failure(); - - OpOperand &opOperand = forOp.getOpOperandForResult(opResult); - BlockArgument bbArg = forOp.getRegionIterArgForOpOperand(opOperand); - aliasInfo.createAliasInfoEntry(resultBuffer); - aliasInfo.insertNewBufferEquivalence(bbArg, resultBuffer); - map(bvm, bbArg, resultBuffer); - map(bvm, opResult, resultBuffer); - } - - return success(); -} - -static LogicalResult bufferize(OpBuilder &b, scf::IfOp ifOp, - BlockAndValueMapping &bvm, - BufferizationAliasInfo &aliasInfo, - AllocationCallbacks &allocationFn) { - // Take a guard before anything else. - OpBuilder::InsertionGuard g(b); - - for (OpResult opResult : ifOp->getResults()) { - if (!opResult.getType().isa()) - continue; - // TODO: Atm we bail on unranked TensorType because we don't know how to - // alloc an UnrankedMemRefType + its underlying ranked MemRefType. - assert(opResult.getType().isa() && - "unsupported unranked tensor"); - - Value resultBuffer = - getResultBuffer(b, opResult, bvm, aliasInfo, allocationFn); - if (!resultBuffer) - return failure(); - - aliasInfo.createAliasInfoEntry(resultBuffer); - map(bvm, opResult, resultBuffer); - } - - return success(); -} - -/// FuncOp always creates TensorToMemRef ops. -static LogicalResult bufferize(OpBuilder &b, FuncOp funcOp, +/// FuncOp always creates TensorToMemRef ops. +static LogicalResult bufferize(OpBuilder &b, FuncOp funcOp, BlockAndValueMapping &bvm, BufferizationAliasInfo &aliasInfo, AllocationCallbacks &allocationFn) { @@ -1888,410 +1523,6 @@ static LogicalResult bufferize(OpBuilder &b, FuncOp funcOp, return success(); } -/// InitTensor always allocates (unless it was eliminated). -/// TODO: consider hoisting across function boundaries prior to bufferization. -static LogicalResult bufferize(OpBuilder &b, InitTensorOp initTensorOp, - BlockAndValueMapping &bvm, - BufferizationAliasInfo &aliasInfo, - AllocationCallbacks &allocationFn) { - // The InitTensorOp may have been eliminated. - if (initTensorOp->getUses().empty()) - return success(); - - // Take a guard before anything else. - OpBuilder::InsertionGuard g(b); - b.setInsertionPoint(initTensorOp); - - Value alloc = createNewAllocDeallocPairForShapedValue( - b, initTensorOp->getLoc(), initTensorOp.result(), aliasInfo, - allocationFn); - map(bvm, initTensorOp.result(), alloc); - return success(); -} - -/// ReturnOp always creates memref::TensorLoadOp. -static LogicalResult bufferize(OpBuilder &b, ReturnOp returnOp, - BlockAndValueMapping &bvm, - BufferizationAliasInfo &aliasInfo) { - // Take a guard before anything else. - OpBuilder::InsertionGuard g(b); - // Cannot insert after returnOp. - b.setInsertionPoint(returnOp); - - assert(isa(returnOp->getParentOp()) && - "only support FuncOp parent for ReturnOp"); - for (OpOperand &operand : returnOp->getOpOperands()) { - auto tensorType = operand.get().getType().dyn_cast(); - if (!tensorType) - continue; - Value v = lookup(bvm, operand.get()); - assert(v && "missing buffer for result"); - Value returnTensor = b.create(returnOp.getLoc(), v); - operand.set(returnTensor); - aliasInfo.insertNewBufferEquivalence(returnTensor, v); - map(bvm, returnTensor, v); - } - return success(); -} - -/// Bufferization for TiledLoopOp.. -static LogicalResult bufferize(OpBuilder &b, TiledLoopOp tiledLoopOp, - BlockAndValueMapping &bvm, - BufferizationAliasInfo &aliasInfo, - AllocationCallbacks &allocationFn) { - // Take a guard before anything else. - OpBuilder::InsertionGuard g(b); - - // Allocate output buffers if needed, forward output tensor args to the - // terminator. - Operation *yieldOp = tiledLoopOp.getBody()->getTerminator(); - Block *body = tiledLoopOp.getBody(); - - // Take copies of the old input and output operands, so we can insert inplace - // easily. - auto oldInputs = llvm::to_vector<4>(tiledLoopOp.inputs()); - auto oldOutputs = llvm::to_vector<4>(tiledLoopOp.outputs()); - - int numLoops = tiledLoopOp.getNumLoops(); - int numControlOperands = tiledLoopOp.getNumControlOperands(); - - // Add buffers for outputs and the corresponding block arguments. - // Keep separate iterators to increment without further leaking impl. details. - // Start with outputs to avoid interference from new input buffers. - int numNewOutputBuffers = 0; - int resultIndex = 0; - int oldOutputBBArgIndex = numLoops + oldInputs.size(); - int nextOutputBBArgIndex = numLoops + oldInputs.size() + oldOutputs.size(); - int nextOutputOperandIndex = - numControlOperands + oldInputs.size() + oldOutputs.size(); - for (Value oldOutputTensor : oldOutputs) { - if (!oldOutputTensor.getType().isa()) { - // Skip and increment the old bbarg index only. - ++oldOutputBBArgIndex; - // Do not increment resultIndex as only tensors are returned. - // TODO: better interface to avoid leaking such impl details. - continue; - } - - assert(oldOutputTensor.getType().isa() && - "bufferizable output must be a ranked tensor"); - - const OpResult &opResult = tiledLoopOp->getResult(resultIndex); - OpOperand &yieldOperand = yieldOp->getOpOperand(resultIndex); - Value resultBuffer = - getResultBuffer(b, opResult, bvm, aliasInfo, allocationFn); - if (!resultBuffer) - return failure(); - - // Insert mapping and aliasing info. - aliasInfo.createAliasInfoEntry(resultBuffer); - aliasInfo.insertNewBufferEquivalence(opResult, resultBuffer); - map(bvm, opResult, resultBuffer); - - // Insert new operand and bbArg. - tiledLoopOp->insertOperands(nextOutputOperandIndex, resultBuffer); - BlockArgument newBufferBBArg = - body->insertArgument(nextOutputBBArgIndex, resultBuffer.getType()); - BlockArgument oldTensorBBArg = body->getArgument(oldOutputBBArgIndex); - // Insert mapping and aliasing info. - aliasInfo.createAliasInfoEntry(newBufferBBArg); - aliasInfo.insertNewBufferEquivalence(oldTensorBBArg, newBufferBBArg); - map(bvm, oldTensorBBArg, newBufferBBArg); - - // Set operand of `linalg.yield` to the bbArg so it just canonicalizes away - // later. - yieldOperand.set(oldTensorBBArg); - - // Increment indices. - ++numNewOutputBuffers; - ++resultIndex; - ++oldOutputBBArgIndex; - ++nextOutputBBArgIndex; - ++nextOutputOperandIndex; - } - - // Add buffers for inputs and the corresponding block arguments. - // Keep separate iterators to increment without further leaking impl. details. - int numNewInputBuffers = 0; - int oldInputBBArgIndex = numLoops; - int nextInputBBArgIndex = numLoops + oldInputs.size(); - int nextInputOperandIndex = numControlOperands + oldInputs.size(); - for (Value oldInputTensor : oldInputs) { - if (!oldInputTensor.getType().isa()) { - // Skip and increment the old bbarg index only. - ++oldInputBBArgIndex; - continue; - } - - Value inputBuffer = lookup(bvm, oldInputTensor); - assert(inputBuffer && " missing buffer for operand"); - - // Insert new operand and bbArg. - tiledLoopOp->insertOperands(nextInputOperandIndex, inputBuffer); - BlockArgument newBufferBBArg = - body->insertArgument(nextInputBBArgIndex, inputBuffer.getType()); - BlockArgument oldTensorBBArg = body->getArgument(oldInputBBArgIndex); - - // Insert mapping and aliasing info. - aliasInfo.createAliasInfoEntry(newBufferBBArg); - aliasInfo.insertNewBufferEquivalence(oldTensorBBArg, newBufferBBArg); - map(bvm, oldTensorBBArg, newBufferBBArg); - - // Increment indices. - ++numNewInputBuffers; - ++oldInputBBArgIndex; - ++nextInputBBArgIndex; - ++nextInputOperandIndex; - } - - // Update segment sizes. - // TODO: Helper method to avoid leaking impl details. - tiledLoopOp->setAttr( - TiledLoopOp::getOperandSegmentSizeAttr(), - b.getI32VectorAttr( - {numLoops, numLoops, numLoops, - static_cast(oldInputs.size()) + numNewInputBuffers, - static_cast(oldOutputs.size()) + numNewOutputBuffers})); - - return success(); -} - -/// Bufferize ExtractSliceOp to subview with optional alloc + copy depending on -/// whether or not it is marked inplaceable. -/// Note that `getInplaceableOpResult` on a ExtractSliceOp always returns null. -/// As consequence a ExtractSliceOp always alloc + copy when taken in -/// isolation. -static LogicalResult bufferize(OpBuilder &b, ExtractSliceOp extractSliceOp, - BlockAndValueMapping &bvm, - BufferizationAliasInfo &aliasInfo, - AllocationCallbacks &allocationFn) { - // Take a guard before anything else. - OpBuilder::InsertionGuard g(b); - - LDBG("bufferize: " << *extractSliceOp << '\n'); - - Location loc = extractSliceOp.getLoc(); - // Bail if source was not bufferized. - Value srcMemref = lookup(bvm, extractSliceOp.source()); - if (!srcMemref) - return failure(); - auto srcMemrefType = srcMemref.getType().cast(); - auto dstTensorType = - extractSliceOp.result().getType().cast(); - - // If not inplaceable, alloc. - Value alloc; - auto inPlace = getInPlace(extractSliceOp->getResult(0)); - if (inPlace != InPlaceSpec::True) - alloc = createNewAllocDeallocPairForShapedValue( - b, loc, extractSliceOp.result(), aliasInfo, allocationFn); - - // Set insertion point now that potential alloc/dealloc are introduced. - b.setInsertionPoint(extractSliceOp); - - // Bufferize to subview. - auto subviewMemRefType = - memref::SubViewOp::inferRankReducedResultType( - dstTensorType.getRank(), srcMemrefType, - extractSliceOp.getMixedOffsets(), extractSliceOp.getMixedSizes(), - extractSliceOp.getMixedStrides()) - .cast(); - Value subView = b.create( - loc, subviewMemRefType, srcMemref, extractSliceOp.getMixedOffsets(), - extractSliceOp.getMixedSizes(), extractSliceOp.getMixedStrides()); - // Insert new alias. - aliasInfo.insertNewBufferAlias(subView, srcMemref); - - /// If not inplaceable, copy. - if (alloc) { - // Do not copy if the copied data is never read. - if (isValueRead(extractSliceOp.result())) - b.create(extractSliceOp.getLoc(), subView, alloc); - subView = alloc; - } - - map(bvm, extractSliceOp.result(), subView); - return success(); -} - -static LogicalResult bufferize(OpBuilder &b, InsertSliceOp insertSliceOp, - BlockAndValueMapping &bvm, - BufferizationAliasInfo &aliasInfo, - AllocationCallbacks &allocationFn) { - // Take a guard before anything else. - OpBuilder::InsertionGuard g(b); - b.setInsertionPoint(insertSliceOp); - - LDBG("bufferize: " << *insertSliceOp << '\n'); - - Location loc = insertSliceOp.getLoc(); - // Since insert_slice arise from tiling and introducing loops, this - // case is generally a deal breaker. When used with loops, this ends up - // cloning the whole tensor on every single iteration and is a symptom - // of a catastrophically bad scheduling decision. - // TODO: be very loud about it or even consider failing the pass. - // Alloc a copy for `insertSliceOp.dest()`, it will become the result - // buffer. - Value dstMemref = getResultBuffer(b, insertSliceOp->getResult(0), bvm, - aliasInfo, allocationFn); - if (!dstMemref) - return failure(); - auto dstMemrefType = dstMemref.getType().cast(); - - Value srcMemref = lookup(bvm, insertSliceOp.source()); - if (!srcMemref) - return failure(); - auto subviewMemRefType = - memref::SubViewOp::inferRankReducedResultType( - insertSliceOp.getSourceType().getRank(), dstMemrefType, - insertSliceOp.getMixedOffsets(), insertSliceOp.getMixedSizes(), - insertSliceOp.getMixedStrides()) - .cast(); - - // A copy of the source buffer is needed if either: - // - The producer of `source` is not inplace. This is the case where a - // slice is computed out of place into the inplace full tensor. - // - The result is not inplace. This is the case where the whole tensor is - // cloned and the clone needs to be updated. - auto inPlace = getInPlace(insertSliceOp->getResult(0)); - // TODO: Is this necessary? - if (!aliasInfo.isSourceEquivalentToAMatchingInplaceExtractSliceOp( - insertSliceOp) || - inPlace != InPlaceSpec::True) { - LDBG("insert_slice needs extra source copy: " << insertSliceOp.source() - << " -> copy\n"); - // Take a subview of the dst. - Value subView = b.create( - loc, subviewMemRefType, dstMemref, insertSliceOp.getMixedOffsets(), - insertSliceOp.getMixedSizes(), insertSliceOp.getMixedStrides()); - // Insert new alias. - aliasInfo.insertNewBufferAlias(subView, dstMemref); - b.create(insertSliceOp.getLoc(), srcMemref, subView); - } - - map(bvm, insertSliceOp.result(), dstMemref); - - return success(); -} - -static LogicalResult bufferize(OpBuilder &b, VectorTransferOpInterface op, - BlockAndValueMapping &bvm, - BufferizationAliasInfo &aliasInfo, - AllocationCallbacks &allocationFn) { - // Take a guard before anything else. - OpBuilder::InsertionGuard g(b); - b.setInsertionPoint(op); - - if (op.getShapedType().isa()) - return failure(); - - /// transfer_read from buffer always reads from the bufferized - /// op.source(). - if (auto readOp = dyn_cast(op.getOperation())) { - Value v = lookup(bvm, op.source()); - assert(v && "missing buffer"); - readOp.sourceMutable().assign(v); - return success(); - } - - // Create a new transfer_write on buffer that doesn't have a return value. - // Leave the previous transfer_write to dead code as it still has uses at - // this point. - auto writeOp = cast(op.getOperation()); - Value resultBuffer = - getResultBuffer(b, op->getResult(0), bvm, aliasInfo, allocationFn); - if (!resultBuffer) - return failure(); - b.create( - op.getLoc(), writeOp.vector(), resultBuffer, writeOp.indices(), - writeOp.permutation_map(), - writeOp.in_bounds() ? *writeOp.in_bounds() : ArrayAttr()); - map(bvm, op->getResult(0), resultBuffer); - - return success(); -} - -static LogicalResult bufferize(OpBuilder &b, scf::YieldOp yieldOp, - BlockAndValueMapping &bvm, - BufferizationAliasInfo &aliasInfo) { - // Take a guard before anything else. - OpBuilder::InsertionGuard g(b); - // Cannot create IR past a yieldOp. - b.setInsertionPoint(yieldOp); - - if (auto execOp = dyn_cast(yieldOp->getParentOp())) { - if (execOp->getNumResults() != 0) - return execOp->emitError( - "expected result-less scf.execute_region containing op"); - return success(); - } - - if (auto ifOp = dyn_cast(yieldOp->getParentOp())) - return success(); - - scf::ForOp forOp = dyn_cast(yieldOp->getParentOp()); - if (!forOp) - return yieldOp->emitError("expected scf::ForOp parent for scf::YieldOp"); - for (OpOperand &operand : yieldOp->getOpOperands()) { - auto tensorType = operand.get().getType().dyn_cast(); - if (!tensorType) - continue; - - OpOperand &forOperand = forOp.getOpOperandForResult( - forOp->getResult(operand.getOperandNumber())); - auto bbArg = forOp.getRegionIterArgForOpOperand(forOperand); - Value yieldedBuffer = lookup(bvm, operand.get()); - Value bbArgBuffer = lookup(bvm, bbArg); - if (!aliasInfo.areEquivalentBufferizedValues(yieldedBuffer, bbArgBuffer)) { - // TODO: this could get resolved with copies but it can also turn into - // swaps so we need to be careful about order of copies. - return yieldOp->emitError() - << "Yield operand #" << operand.getOperandNumber() - << " does not bufferize to an equivalent buffer to the matching" - << " enclosing scf::for operand"; - } - - // Buffers are equivalent so the work is already done and we just yield the - // bbArg so that it later canonicalizes away. - operand.set(bbArg); - } - return success(); -} - -/// Bufferization for linalg::YieldOp either does not involve tensors or just -/// results in later canonicalization. In either case it does nothing. -static LogicalResult bufferize(OpBuilder &b, linalg::YieldOp yieldOp, - BlockAndValueMapping &bvm, - BufferizationAliasInfo &aliasInfo) { - // Take a guard before anything else. - OpBuilder::InsertionGuard g(b); - // Cannot create IR past a yieldOp. - b.setInsertionPoint(yieldOp); - - // No tensors -> success. - if (!llvm::any_of(yieldOp.getOperandTypes(), isaTensor)) - return success(); - // linalg::YieldOp nested under TiledLoop must just canonicalize. - if (yieldOp->getParentOfType()) - return success(); - llvm_unreachable("unexpected yieldOp"); -} - -/// Bufferization for tensor::ExtractOp just translate to memref.load, it only -/// reads the tensor. -static LogicalResult bufferize(OpBuilder &b, tensor::ExtractOp extractOp, - BlockAndValueMapping &bvm, - BufferizationAliasInfo &aliasInfo) { - // Take a guard before anything else. - OpBuilder::InsertionGuard g(b); - b.setInsertionPoint(extractOp); - - Location loc = extractOp.getLoc(); - Value srcMemref = lookup(bvm, extractOp.tensor()); - Value l = b.create(loc, srcMemref, extractOp.indices()); - extractOp.replaceAllUsesWith(l); - return success(); -} //===----------------------------------------------------------------------===// // Bufferization analyses. //===----------------------------------------------------------------------===// @@ -2317,8 +1548,9 @@ bufferizableInPlaceAnalysisImpl(OpOperand &operand, OpResult result, << printValueInfo(result) << '\n'); bool foundInterference = - aliasInfo.wouldCreateWriteToNonWritableBuffer(operand, result) || - aliasInfo.wouldCreateReadAfterWriteInterference(operand, result, domInfo); + wouldCreateWriteToNonWritableBuffer(operand, result, aliasInfo) || + wouldCreateReadAfterWriteInterference(operand, result, domInfo, + aliasInfo); if (foundInterference) aliasInfo.bufferizeOutOfPlace(result); @@ -2330,8 +1562,12 @@ bufferizableInPlaceAnalysisImpl(OpOperand &operand, OpResult result, return success(); } -/// This analysis function is used for OpOperands that alias with an OpResult -/// but are not inplaceable on it. E.g., ExtractSliceOp. +/// Determine if `operand` can be bufferized in-place with one of the op's +/// results. If so, set InPlaceSpec::True on the result. Otherwise, set +/// InPlaceSpec::False on the result. +/// +/// Even if an op does not read or write, it may still create an alias when +/// bufferized in-place. An example of such ops is tensor.extract_slice. /// /// Rationale for bufferizing `%1 = tensor.extract_slice %0[...]` inplace: /// @@ -2346,25 +1582,15 @@ bufferizableInPlaceAnalysisImpl(OpOperand &operand, OpResult result, /// An analysis is required to ensure inplace bufferization would not result in /// RaW dependence violations. static LogicalResult -bufferizableInPlaceAnalysisAliasOnlyOp(OpOperand &operand, - BufferizationAliasInfo &aliasInfo, - const DominanceInfo &domInfo) { - OpResult result = getAliasingOpResult(operand); - assert(result && "expected that the OpOperand has an aliasing OpResult"); - return bufferizableInPlaceAnalysisImpl(operand, result, aliasInfo, domInfo); -} - -/// Determine if `operand` can be bufferized in-place with one of the op's -/// results. If so, set InPlaceSpec::True on the result. Otherwise, set -/// InPlaceSpec::False on the result. -static LogicalResult bufferizableInPlaceAnalysis(OpOperand &operand, BufferizationAliasInfo &aliasInfo, const DominanceInfo &domInfo) { - OpResult result = getInplaceableOpResult(operand); - if (!result) + auto bufferizableOp = dyn_cast(operand.getOwner()); + if (!bufferizableOp) return success(); - return bufferizableInPlaceAnalysisImpl(operand, result, aliasInfo, domInfo); + if (OpResult result = bufferizableOp.getAliasingOpResult(operand)) + return bufferizableInPlaceAnalysisImpl(operand, result, aliasInfo, domInfo); + return success(); } /// Analyze the `ops` to determine which OpResults are inplaceable. Walk ops in @@ -2383,19 +1609,11 @@ LogicalResult mlir::linalg::inPlaceAnalysis(SmallVector &ops, } // Walk ops in reverse for better interference analysis. - for (Operation *op : reverse(ops)) { - for (OpOperand &opOperand : op->getOpOperands()) { - if (failed(bufferizableInPlaceAnalysis(opOperand, aliasInfo, domInfo))) - return failure(); - - // Special logic to analyze OpOperands that are not inplaceable on an - // OpResult but may create an alias. - if (bufferizesToAliasOnly(opOperand)) - if (failed(bufferizableInPlaceAnalysisAliasOnlyOp(opOperand, aliasInfo, - domInfo))) + for (Operation *op : reverse(ops)) + for (OpOperand &opOperand : op->getOpOperands()) + if (opOperand.get().getType().isa()) + if (failed(bufferizableInPlaceAnalysis(opOperand, aliasInfo, domInfo))) return failure(); - } - } return success(); } @@ -2440,132 +1658,54 @@ inPlaceAnalysisFuncOpBody(FuncOp funcOp, BufferizationAliasInfo &aliasInfo, // Bufferization entry-point for functions. //===----------------------------------------------------------------------===// -/// Compute the type of the `memref` to use for allocating the buffer for -/// `shapedValue`. Also returns (by reference in `dynShape`), the value for the -/// dynamic dimensions in the returned `memref` type. The function also sets the -/// insertion point of the builder `b` to the position where the allocation is -/// to be inserted. -static MemRefType getAllocationTypeAndShape(OpBuilder &b, Location loc, - Value shapedValue, - SmallVectorImpl &dynShape) { - MemRefType allocMemRefType = - getContiguousMemRefType(shapedValue.getType().cast()); - if (auto bbArg = shapedValue.dyn_cast()) { - b.setInsertionPointToStart(bbArg.getOwner()); - loc = bbArg.getOwner()->getParentOp()->getLoc(); - } else { - b.setInsertionPoint(shapedValue.getDefiningOp()); - loc = shapedValue.getDefiningOp()->getLoc(); - } +Optional +mlir::linalg::defaultAllocationFn(OpBuilder &b, Location loc, MemRefType type, + const SmallVector &dynShape) { + Value allocated = b.create( + loc, type, dynShape, b.getI64IntegerAttr(kBufferAlignments)); + return allocated; +} - // Compute the dynamic part of the shape. - bool foundDynamicShapes = false; - if (auto rankedOp = dyn_cast_or_null( - shapedValue.getDefiningOp())) { - ReifiedRankedShapedTypeDims resultDims; - if (succeeded(rankedOp.reifyResultShapes(b, resultDims))) { - foundDynamicShapes = true; - OpResult resultValue = shapedValue.dyn_cast(); - auto &shape = resultDims[resultValue.getResultNumber()]; - for (auto dim : enumerate(allocMemRefType.getShape())) - if (dim.value() == ShapedType::kDynamicSize) - dynShape.push_back(shape[dim.index()]); - } - } - if (!foundDynamicShapes) { - for (auto dim : enumerate(allocMemRefType.getShape())) - if (dim.value() == ShapedType::kDynamicSize) - dynShape.push_back(createOrFoldDimOp(b, loc, shapedValue, dim.index())); +void mlir::linalg::defaultDeallocationFn(OpBuilder &b, Location loc, + Value allocatedBuffer) { + b.create(loc, allocatedBuffer); +} + +void mlir::linalg::defaultMemCpyFn(OpBuilder &b, Location loc, Value from, + Value to) { + b.create(loc, from, to); +} + +LogicalResult mlir::linalg::bufferizeOp( + Operation *op, BlockAndValueMapping &bvm, BufferizationAliasInfo &aliasInfo, + AllocationCallbacks allocationFns, + DenseMap *bufferizedFunctionTypes) { + OpBuilder b(op->getContext()); + + // CallOps are handled separately. + if (auto callOp = dyn_cast(op)) { + LDBG("Begin bufferize:\n" << callOp << '\n'); + if (!bufferizedFunctionTypes) + llvm_unreachable( + "null bufferizedFunctionTypes when bufferizing CallOpInterface"); + return bufferize(b, callOp, bvm, aliasInfo, allocationFns, + *bufferizedFunctionTypes); } - // If the buffer is statically shaped, try to hoist it to the first enclosing - // parallel region. - // TODO: this concept of parallel region and threadlocal needs interfaces. - // TODO: also hoist in the dynamic case. For now this relies on subsequent - // calls to LICM and buffer hoisting which will most likely not succeed. - // TODO: when packing, allocate a static bounding box which will enable more - // hoisting. - if (dynShape.empty()) { - Operation *parent = - getFirstParentOfType(shapedValue); - if (parent) - b.setInsertionPointToStart(&(parent->getRegion(0).front())); - } - return allocMemRefType; -} - -Optional mlir::linalg::defaultAllocationFn(OpBuilder &b, Location loc, - Value shapedValue) { - // Take a guard before anything else. - OpBuilder::InsertionGuard g(b); - SmallVector dynShape; - MemRefType allocMemRefType = - getAllocationTypeAndShape(b, loc, shapedValue, dynShape); - Value allocated = b.create( - loc, allocMemRefType, dynShape, b.getI64IntegerAttr(kBufferAlignments)); - return allocated; -} - -static Optional allocationFnUsingAlloca(OpBuilder &b, Location loc, - Value shapedValue) { - OpBuilder::InsertionGuard g(b); - SmallVector dynShape; - MemRefType allocMemRefType = - getAllocationTypeAndShape(b, loc, shapedValue, dynShape); - Value allocated = b.create( - loc, allocMemRefType, dynShape, b.getI64IntegerAttr(kBufferAlignments)); - return allocated; -} + // Skip BufferCast and TensorLoad ops. + if (isa(op)) + return success(); -void mlir::linalg::defaultDeallocationFn(OpBuilder &b, Location loc, - Value allocatedBuffer) { - OpBuilder::InsertionGuard g(b); - b.setInsertionPoint(allocatedBuffer.getParentBlock()->getTerminator()); - b.create(loc, allocatedBuffer); -} + // Bufferize using `BufferizableOpInterface`. + if (auto bufferizableOp = dyn_cast(op)) + return bufferizableOp.bufferize(b, bvm, aliasInfo, allocationFns); -LogicalResult mlir::linalg::bufferizeOp( - Operation *op, BlockAndValueMapping &bvm, BufferizationAliasInfo &aliasInfo, - AllocationCallbacks allocationFns, - DenseMap *bufferizedFunctionTypes) { - OpBuilder b(op->getContext()); - return TypeSwitch(op) - // Skip BufferCast and TensorLoad ops. - .Case( - [&](auto) { return success(); }) - .Case( - [&](auto op) { - LDBG("Begin bufferize:\n" << op << '\n'); - return bufferize(b, op, bvm, aliasInfo, allocationFns); - }) - .Case([&](auto op) { - LDBG("Begin bufferize:\n" << op << '\n'); - return bufferize(b, op, bvm, aliasInfo); - }) - .Case([&](CallOpInterface op) { - LDBG("Begin bufferize:\n" << op << '\n'); - if (!bufferizedFunctionTypes) - llvm_unreachable( - "null bufferizedFunctionTypes when bufferizing CallOpInterface"); - return bufferize(b, op, bvm, aliasInfo, allocationFns, - *bufferizedFunctionTypes); - }) - .Case([&](arith::ConstantOp op) { - if (!isaTensor(op.getResult().getType())) - return success(); - LDBG("Begin bufferize:\n" << op << '\n'); - return bufferize(b, op, bvm, aliasInfo); - }) - .Default([&](Operation *op) -> LogicalResult { - auto isaTensor = [](Type t) { return t.isa(); }; - if (any_of(op->getOperandTypes(), isaTensor) || - any_of(op->getResultTypes(), isaTensor)) - return op->emitError() << "unsupported op with tensors"; - return success(); - }); + // Other op with tensors. No bufferization method specified. + auto isaTensor = [](Type t) { return t.isa(); }; + if (any_of(op->getOperandTypes(), isaTensor) || + any_of(op->getResultTypes(), isaTensor)) + return op->emitError() << "unsupported op with tensors"; + return success(); } static LogicalResult bufferizeFuncOpInternals( @@ -2872,32 +2012,6 @@ getFuncOpsOrderedByCalls(ModuleOp moduleOp, return success(); } -namespace { -struct LinalgComprehensiveModuleBufferize - : public LinalgComprehensiveModuleBufferizeBase< - LinalgComprehensiveModuleBufferize> { - LinalgComprehensiveModuleBufferize() {} - - LinalgComprehensiveModuleBufferize( - const LinalgComprehensiveModuleBufferize &p) {} - - void runOnOperation() override; - - void getDependentDialects(DialectRegistry ®istry) const override { - registry.insert(); - } - -private: - std::unique_ptr allocationFns; -}; -} // end namespace - -static void applyEnablingTransformations(ModuleOp moduleOp) { - RewritePatternSet patterns(moduleOp.getContext()); - patterns.add(moduleOp.getContext()); - (void)applyPatternsAndFoldGreedily(moduleOp, std::move(patterns)); -} - static void foreachCaller(const DenseMap> &callerMap, FuncOp callee, llvm::function_ref doit) { @@ -2989,6 +2103,78 @@ static void layoutPostProcessing(ModuleOp moduleOp) { } } +/// Try to eliminate InitTensorOps inside funcOp. An InitTensorOp is replaced +/// with the the result of `rewriteFunc` if it is anchored on a matching +/// OpOperand. "Anchored" means that there is a path on the reverse SSA use-def +/// chain, starting from the OpOperand and always following the aliasing +/// OpOperand, that eventually ends at a single InitTensorOp. +LogicalResult mlir::linalg::initTensorElimination( + FuncOp funcOp, BufferizationAliasInfo &aliasInfo, DominanceInfo &domInfo, + std::function anchorMatchFunc, + std::function rewriteFunc, + bool skipAnalysis) { + OpBuilder b(funcOp->getContext()); + + WalkResult status = funcOp->walk([&](Operation *op) { + for (OpOperand &operand : op->getOpOperands()) { + // Is this a matching OpOperand? + if (!anchorMatchFunc(operand)) + continue; + + SetVector maybeInitTensor = + findValueInReverseUseDefChain(operand.get(), [](Value val) { + // Continue traversal until this function returns true. + OpResult opResult = val.dyn_cast(); + if (!opResult) + return true; + if (getInPlace(opResult) != InPlaceSpec::True) + return true; + // Only equivalent tensors are supported at the moment. + // TODO: Support cases such as extract_slice(init_tensor). + SmallVector opOperands = + getAliasingOpOperand(opResult); + if (!llvm::all_of(opOperands, [](OpOperand *operand) { + return bufferRelation(*operand) == BufferRelation::Equivalent; + })) + return true; + return false; + }); + + // Replace only if the reverse use-def chain ends at exactly one + // InitTensorOp. + if (maybeInitTensor.size() != 1 || + !maybeInitTensor.front().getDefiningOp()) + return WalkResult::skip(); + Value initTensor = maybeInitTensor.front(); + + // Create a replacement for the InitTensorOp. + b.setInsertionPoint(initTensor.getDefiningOp()); + Value replacement = rewriteFunc(b, initTensor.getLoc(), operand); + if (!replacement) + continue; + + // Uses of the InitTensorOp are replaced here, but the op is not deleted. + // InitTensorOps without uses are ignored by the bufferization. + initTensor.replaceAllUsesWith(replacement); + aliasInfo.createAliasInfoEntry(replacement); + + // Run analysis on the newly created op. + if (auto opResult = replacement.dyn_cast()) { + if (!skipAnalysis) { + SmallVector ops(1, replacement.getDefiningOp()); + if (failed(inPlaceAnalysis(ops, aliasInfo, domInfo))) + return WalkResult::interrupt(); + } + } + } + + // Advance to the next operation. + return WalkResult::advance(); + }); + + return failure(status.wasInterrupted()); +} + /// Try to eliminate InitTensorOps inside funcOp. An InitTensorOp can be /// eliminated if it is eventually inserted into another tensor (and some other /// conditions are met). @@ -3017,87 +2203,36 @@ static void layoutPostProcessing(ModuleOp moduleOp) { /// /// Note that the newly inserted ExtractSliceOp may have to bufferize /// out-of-place due to RaW conflicts. -static LogicalResult runInitTensorElimination(FuncOp funcOp, - BufferizationAliasInfo &aliasInfo, - DominanceInfo &domInfo) { - OpBuilder b(funcOp->getContext()); - - WalkResult status = funcOp->walk([&](tensor::InsertSliceOp insertOp) { - // Only inplace bufferized InsertSliceOps are eligible. - if (getInPlace(insertOp->getOpResult(0)) != InPlaceSpec::True) - return WalkResult::skip(); - - SetVector maybeInitTensor = - findValueInReverseUseDefChain(insertOp.source(), [](Value val) { - // Continue traversal until this function returns true. - OpResult opResult = val.dyn_cast(); - if (!opResult) - return true; - if (getInPlace(opResult) != InPlaceSpec::True) - return true; - // Only equivalent tensors are supported at the moment. E.g., when - // taking a tensor.extract_slice of an init_tensor, we can currently - // not eliminate the init_tensor. - SmallVector opOperands = getAliasingOpOperand(opResult); - if (!llvm::all_of(opOperands, [](OpOperand *operand) { - return bufferRelation(*operand) == BufferRelation::Equivalent; - })) - return true; +LogicalResult mlir::linalg::eliminateInsertSliceAnchoredInitTensorOps( + FuncOp funcOp, BufferizationAliasInfo &aliasInfo, DominanceInfo &domInfo) { + return initTensorElimination( + funcOp, aliasInfo, domInfo, + [](OpOperand &operand) { + auto insertSliceOp = dyn_cast(operand.getOwner()); + if (!insertSliceOp) return false; - }); - // Replace only if the InsertSliceOp source originates from exactly one - // InitTensorOp. - if (maybeInitTensor.size() != 1 || - !maybeInitTensor.front().getDefiningOp()) - return WalkResult::skip(); - Value initTensor = maybeInitTensor.front(); - - b.setInsertionPoint(initTensor.getDefiningOp()); - auto extractOp = b.create( - initTensor.getLoc(), insertOp.dest(), insertOp.getMixedOffsets(), - insertOp.getMixedSizes(), insertOp.getMixedStrides()); - // Uses of the InitTensorOp are replaced here, but the op is not deleted. - // InitTensorOps without uses are ignored by the bufferization. - initTensor.replaceAllUsesWith(extractOp.result()); - aliasInfo.createAliasInfoEntry(extractOp.result()); - - // Run analysis on the ExtractSliceOp. - if (failed(bufferizableInPlaceAnalysisAliasOnlyOp( - extractOp->getOpOperand(0), aliasInfo, domInfo))) - return WalkResult::interrupt(); - - // Advance to the next operation. - return WalkResult::advance(); - }); - - return failure(status.wasInterrupted()); + // Only inplace bufferized InsertSliceOps are eligible. + if (getInPlace(insertSliceOp->getOpResult(0)) != InPlaceSpec::True) + return false; + return &operand == &insertSliceOp->getOpOperand(0) /*source*/; + }, + [](OpBuilder &b, Location loc, OpOperand &operand) { + auto insertSliceOp = cast(operand.getOwner()); + auto extractOp = b.create( + loc, insertSliceOp.dest(), insertSliceOp.getMixedOffsets(), + insertSliceOp.getMixedSizes(), insertSliceOp.getMixedStrides()); + return extractOp.result(); + }); } -void LinalgComprehensiveModuleBufferize::runOnOperation() { - if (!allocationFns) { - // The allocation functions to use needs to be set here. The flag for the - // pass and flag for the use of alloca map to LLVM command line - // options. These being static global objects have no set order in which - // they are defined. So ideally this should be in the constructor, but the - // constructor might be called before the flag is initialized using the - // command line option. So this is set up at the start of the pass. - if (useAlloca) { - AllocationCallbacks allocaAllocationFns = { - allocationFnUsingAlloca, [](OpBuilder &b, Location loc, Value v) {}}; - allocationFns = - std::make_unique(std::move(allocaAllocationFns)); - } else { - allocationFns = std::make_unique(); - } - } - ModuleOp moduleOp = getOperation(); - applyEnablingTransformations(moduleOp); - +LogicalResult +mlir::linalg::runComprehensiveBufferize(ModuleOp moduleOp, + const BufferizationOptions &options) { SmallVector orderedFuncOps; DenseMap> callerMap; DenseMap bufferizedFunctionTypes; if (failed(getFuncOpsOrderedByCalls(moduleOp, orderedFuncOps, callerMap))) - return signalPassFailure(); + return failure(); DominanceInfo domInfo(moduleOp); BufferizationAliasInfo aliasInfo(moduleOp); @@ -3123,48 +2258,41 @@ void LinalgComprehensiveModuleBufferize::runOnOperation() { // If the analysis fails, just return. if (failed(inPlaceAnalysisFuncOpBody(funcOp, aliasInfo, domInfo, - analysisFuzzerSeed))) { - signalPassFailure(); - return; - } + options.analysisFuzzerSeed))) + return failure(); // Try to eliminate InitTensorOps to avoid new allocations during the // bufferization phase. - if (failed(runInitTensorElimination(funcOp, aliasInfo, domInfo))) { - signalPassFailure(); - return; - } + if (failed(eliminateInsertSliceAnchoredInitTensorOps(funcOp, aliasInfo, + domInfo))) + return failure(); // Bufferization phase. - if (!testAnalysisOnly) { + if (!options.testAnalysisOnly) { BlockAndValueMapping tensorToBufferMap; if (failed(bufferizeFuncOpInternals(funcOp, tensorToBufferMap, aliasInfo, - *allocationFns, - bufferizedFunctionTypes))) { - signalPassFailure(); - return; - } + *options.allocationFns, + bufferizedFunctionTypes))) + return failure(); } } // Don't drop the attributes if we only want to report the analysis. - if (testAnalysisOnly) - return; + if (options.testAnalysisOnly) + return success(); for (FuncOp funcOp : orderedFuncOps) { // Note: It would be good to apply cleanups here but we cannot as aliasInfo // would be invalidated. if (failed(bufferizeFuncOpBoundary(funcOp, aliasInfo, - bufferizedFunctionTypes))) { - signalPassFailure(); - return; - } - if (!allowReturnMemref && + bufferizedFunctionTypes))) + return failure(); + + if (!options.allowReturnMemref && llvm::any_of(funcOp.getType().getResults(), [](Type t) { return t.isa(); })) { funcOp->emitError("memref return type is unsupported"); - signalPassFailure(); - return; + return failure(); } } @@ -3180,13 +2308,1182 @@ void LinalgComprehensiveModuleBufferize::runOnOperation() { removeBufferizationFuncArguments(bbArg); }); - OpPassManager cleanupPipeline("builtin.module"); - cleanupPipeline.addPass(createCanonicalizerPass()); - cleanupPipeline.addPass(createCSEPass()); - cleanupPipeline.addPass(createLoopInvariantCodeMotionPass()); - (void)runPipeline(cleanupPipeline, moduleOp); + return success(); +} + +//===----------------------------------------------------------------------===// +// BufferizableOpInterface Implementations +//===----------------------------------------------------------------------===// + +// TODO: Move these to a different file and BUILD target, so that they are +// decoupled from ComprehensiveBufferize. + +namespace mlir { +namespace linalg { +namespace arith_ext { + +struct ConstantOpInterface + : public BufferizableOpInterface::ExternalModel { + SmallVector getAliasingOpOperand(Operation *op, + OpResult opResult) const { + return {}; + } + + LogicalResult bufferize(Operation *op, OpBuilder &b, + BlockAndValueMapping &bvm, + BufferizationAliasInfo &aliasInfo, + AllocationCallbacks &allocationFn) const { + auto constantOp = cast(op); + if (!isaTensor(constantOp.getResult().getType())) + return success(); + assert(constantOp.getType().dyn_cast() && + "not a constant ranked tensor"); + auto moduleOp = constantOp->getParentOfType(); + if (!moduleOp) { + return constantOp.emitError( + "cannot bufferize constants not within builtin.module op"); + } + GlobalCreator globalCreator(moduleOp); + + // Take a guard before anything else. + OpBuilder::InsertionGuard g(b); + b.setInsertionPoint(constantOp); + + auto globalMemref = globalCreator.getGlobalFor(constantOp); + Value memref = b.create( + constantOp.getLoc(), globalMemref.type(), globalMemref.getName()); + aliasInfo.insertNewBufferEquivalence(memref, constantOp.getResult()); + map(bvm, constantOp, memref); + + return success(); + } + + bool isWritable(Operation *op, OpResult opResult) const { + // Memory locations returned by memref::GetGlobalOp may not be written to. + return false; + } +}; + +} // namespace arith_ext + +// TODO: Ops in the linalg dialect can directly implement this interface. +namespace linalg_ext { + +/// Helper function for LinalgOp bufferization. +/// When allocating a new buffer, analyze whether `op` wants to read form that +/// buffer. Only in that case, a copy of the result buffer may be needed. +static LogicalResult allocateBuffersForResults( + OpBuilder &b, Location loc, LinalgOp op, + SmallVectorImpl &resultBuffers, BlockAndValueMapping &bvm, + BufferizationAliasInfo &aliasInfo, AllocationCallbacks &allocationFns) { + // Take a guard before anything else. + OpBuilder::InsertionGuard g(b); + b.setInsertionPoint(op); + + // TODO: provide the proper interface to iterate on OpResults and get the + // matching OpOperands. + for (OpOperand *opOperand : op.getOutputOperands()) { + OpResult opResult = cast(op.getOperation()) + .getAliasingOpResult(*opOperand); + assert(opResult && "could not find correspond OpResult"); + bool skipCopy = !op.payloadUsesValueFromOperand(opOperand); + Value resultBuffer = + getResultBuffer(b, opResult, bvm, aliasInfo, allocationFns, skipCopy); + if (!resultBuffer) + return failure(); + resultBuffers.push_back(resultBuffer); + } + + if (op->getNumResults()) + map(bvm, op->getResults(), resultBuffers); + + return success(); } -std::unique_ptr mlir::createLinalgComprehensiveModuleBufferizePass() { - return std::make_unique(); +/// Generic conversion for any LinalgOp on tensors. +static LogicalResult bufferizeLinalgOp(OpBuilder &b, LinalgOp op, + BlockAndValueMapping &bvm, + BufferizationAliasInfo &aliasInfo, + AllocationCallbacks &allocationFns) { + // Take a guard before anything else. + OpBuilder::InsertionGuard g(b); + + // Ensure op has only tensors. Allow mixed tensor-buffer mode on a per-need + // basis. + if (!op.hasTensorSemantics()) + return op->emitError() << "op does not have tensor semantics"; + + Location loc = op.getLoc(); + SmallVector newInputBuffers; + newInputBuffers.reserve(op.getNumInputs()); + for (OpOperand *opOperand : op.getInputOperands()) { + if (op.isScalar(opOperand)) { + newInputBuffers.push_back(opOperand->get()); + continue; + } + newInputBuffers.push_back(lookup(bvm, opOperand->get())); + assert(newInputBuffers.back() && "missing buffer"); + } + SmallVector newOutputBuffers; + // Try to allocate new buffers depending on op's inplace semantics. + if (failed(allocateBuffersForResults(b, loc, op, newOutputBuffers, bvm, + aliasInfo, allocationFns))) + return failure(); + + // Clone the newly bufferized op. + SmallVector newOperands = newInputBuffers; + newOperands.append(newOutputBuffers.begin(), newOutputBuffers.end()); + + // Set insertion point now that potential alloc/dealloc are introduced. + b.setInsertionPoint(op); + op.clone(b, loc, /*resultTypes=*/TypeRange{}, newOperands); + + // Replace the results of the old op with the new output buffers. + if (op->getNumResults()) + map(bvm, op->getResults(), newOutputBuffers); + + // The original op will be DCE'd away later. + + return success(); } + +template +struct LinalgOpInterface + : public BufferizableOpInterface::ExternalModel, + OpTy> { + bool bufferizesToMemoryRead(Operation *op, OpOperand &opOperand) const { + auto genericOp = cast(op); + return genericOp.isInputTensor(&opOperand) || + genericOp.isInitTensor(&opOperand); + } + + bool bufferizesToMemoryWrite(Operation *op, OpOperand &opOperand) const { + auto genericOp = cast(op); + return genericOp.isOutputTensor(&opOperand); + } + + SmallVector getAliasingOpOperand(Operation *op, + OpResult opResult) const { + auto genericOp = cast(op); + return {genericOp.getOutputTensorOperands()[opResult.getResultNumber()]}; + } + + OpResult getAliasingOpResult(Operation *op, OpOperand &opOperand) const { + auto genericOp = cast(op); + if (!opOperand.get().getType().isa()) + return OpResult(); + // For now assume inputs are never inplaceable. + // TODO: refine this. + if (opOperand.getOperandNumber() < genericOp.getNumInputs()) + return OpResult(); + int64_t outputOperandIndex = + opOperand.getOperandNumber() - genericOp.getNumInputs(); + int64_t numOutputBuffers = 0; + for (unsigned idx = 0; idx < outputOperandIndex; ++idx) + if (!genericOp.getOutputOperand(idx)->get().getType().isa()) + ++numOutputBuffers; + return genericOp->getResult(outputOperandIndex - numOutputBuffers); + } + + BufferRelation bufferRelation(Operation *op, OpOperand &opOperand) const { + return BufferRelation::Equivalent; + } + + LogicalResult bufferize(Operation *op, OpBuilder &b, + BlockAndValueMapping &bvm, + BufferizationAliasInfo &aliasInfo, + AllocationCallbacks &allocationFn) const { + return bufferizeLinalgOp(b, cast(op), bvm, aliasInfo, + allocationFn); + } +}; + +struct InitTensorOpInterface + : public BufferizableOpInterface::ExternalModel { + SmallVector getAliasingOpOperand(Operation *op, + OpResult opResult) const { + return {}; + } + + LogicalResult bufferize(Operation *op, OpBuilder &b, + BlockAndValueMapping &bvm, + BufferizationAliasInfo &aliasInfo, + AllocationCallbacks &allocationFn) const { + auto initTensorOp = cast(op); + + // The InitTensorOp may have been eliminated. + if (initTensorOp->getUses().empty()) + return success(); + + // Take a guard before anything else. + OpBuilder::InsertionGuard g(b); + b.setInsertionPoint(initTensorOp); + + Value alloc = createNewAllocDeallocPairForShapedValue( + b, initTensorOp->getLoc(), initTensorOp.result(), aliasInfo, + allocationFn); + map(bvm, initTensorOp.result(), alloc); + return success(); + } +}; + +struct TiledLoopOpInterface + : public BufferizableOpInterface::ExternalModel { + bool bufferizesToMemoryRead(Operation *op, OpOperand &opOperand) const { + // TiledLoop alone doesn't bufferize to a memory read, one of the uses of + // its matching bbArg may. + auto tiledLoopOp = cast(op); + return isValueRead(tiledLoopOp.getTiedBlockArgument(opOperand)); + } + + bool bufferizesToMemoryWrite(Operation *op, OpOperand &opOperand) const { + // TiledLoop alone doesn't bufferize to a memory write, one of the uses of + // its matching bbArg may. + auto bufferizableOp = cast(op); + return static_cast(bufferizableOp.getAliasingOpResult(opOperand)); + } + + SmallVector getAliasingOpOperand(Operation *op, + OpResult opResult) const { + // TODO: TiledLoopOp helper method to avoid leaking impl details. + auto tiledLoopOp = cast(op); + return {&op->getOpOperand(tiledLoopOp.getNumControlOperands() + + tiledLoopOp.getNumInputs() + + opResult.getResultNumber())}; + } + + OpResult getAliasingOpResult(Operation *op, OpOperand &opOperand) const { + auto tiledLoopOp = cast(op); + return tiledLoopOp.getTiedOpResult(opOperand); + } + + BufferRelation bufferRelation(Operation *op, OpOperand &opOperand) const { + return BufferRelation::Equivalent; + } + + LogicalResult bufferize(Operation *op, OpBuilder &b, + BlockAndValueMapping &bvm, + BufferizationAliasInfo &aliasInfo, + AllocationCallbacks &allocationFn) const { + auto tiledLoopOp = cast(op); + + // Take a guard before anything else. + OpBuilder::InsertionGuard g(b); + + // Allocate output buffers if needed, forward output tensor args to the + // terminator. + Operation *yieldOp = tiledLoopOp.getBody()->getTerminator(); + Block *body = tiledLoopOp.getBody(); + + // Take copies of the old input and output operands, so we can insert + // inplace easily. + auto oldInputs = llvm::to_vector<4>(tiledLoopOp.inputs()); + auto oldOutputs = llvm::to_vector<4>(tiledLoopOp.outputs()); + + int numLoops = tiledLoopOp.getNumLoops(); + int numControlOperands = tiledLoopOp.getNumControlOperands(); + + // Add buffers for outputs and the corresponding block arguments. + // Keep separate iterators to increment without further leaking impl. + // details. Start with outputs to avoid interference from new input buffers. + int numNewOutputBuffers = 0; + int resultIndex = 0; + int oldOutputBBArgIndex = numLoops + oldInputs.size(); + int nextOutputBBArgIndex = numLoops + oldInputs.size() + oldOutputs.size(); + int nextOutputOperandIndex = + numControlOperands + oldInputs.size() + oldOutputs.size(); + for (Value oldOutputTensor : oldOutputs) { + if (!oldOutputTensor.getType().isa()) { + // Skip and increment the old bbarg index only. + ++oldOutputBBArgIndex; + // Do not increment resultIndex as only tensors are returned. + // TODO: better interface to avoid leaking such impl details. + continue; + } + + assert(oldOutputTensor.getType().isa() && + "bufferizable output must be a ranked tensor"); + + const OpResult &opResult = tiledLoopOp->getResult(resultIndex); + OpOperand &yieldOperand = yieldOp->getOpOperand(resultIndex); + Value resultBuffer = + getResultBuffer(b, opResult, bvm, aliasInfo, allocationFn); + if (!resultBuffer) + return failure(); + + // Insert mapping and aliasing info. + aliasInfo.createAliasInfoEntry(resultBuffer); + aliasInfo.insertNewBufferEquivalence(opResult, resultBuffer); + map(bvm, opResult, resultBuffer); + + // Insert new operand and bbArg. + tiledLoopOp->insertOperands(nextOutputOperandIndex, resultBuffer); + BlockArgument newBufferBBArg = + body->insertArgument(nextOutputBBArgIndex, resultBuffer.getType()); + BlockArgument oldTensorBBArg = body->getArgument(oldOutputBBArgIndex); + // Insert mapping and aliasing info. + aliasInfo.createAliasInfoEntry(newBufferBBArg); + aliasInfo.insertNewBufferEquivalence(oldTensorBBArg, newBufferBBArg); + map(bvm, oldTensorBBArg, newBufferBBArg); + + // Set operand of `linalg.yield` to the bbArg so it just canonicalizes + // away later. + yieldOperand.set(oldTensorBBArg); + + // Increment indices. + ++numNewOutputBuffers; + ++resultIndex; + ++oldOutputBBArgIndex; + ++nextOutputBBArgIndex; + ++nextOutputOperandIndex; + } + + // Add buffers for inputs and the corresponding block arguments. + // Keep separate iterators to increment without further leaking impl. + // details. + int numNewInputBuffers = 0; + int oldInputBBArgIndex = numLoops; + int nextInputBBArgIndex = numLoops + oldInputs.size(); + int nextInputOperandIndex = numControlOperands + oldInputs.size(); + for (Value oldInputTensor : oldInputs) { + if (!oldInputTensor.getType().isa()) { + // Skip and increment the old bbarg index only. + ++oldInputBBArgIndex; + continue; + } + + Value inputBuffer = lookup(bvm, oldInputTensor); + assert(inputBuffer && " missing buffer for operand"); + + // Insert new operand and bbArg. + tiledLoopOp->insertOperands(nextInputOperandIndex, inputBuffer); + BlockArgument newBufferBBArg = + body->insertArgument(nextInputBBArgIndex, inputBuffer.getType()); + BlockArgument oldTensorBBArg = body->getArgument(oldInputBBArgIndex); + + // Insert mapping and aliasing info. + aliasInfo.createAliasInfoEntry(newBufferBBArg); + aliasInfo.insertNewBufferEquivalence(oldTensorBBArg, newBufferBBArg); + map(bvm, oldTensorBBArg, newBufferBBArg); + + // Increment indices. + ++numNewInputBuffers; + ++oldInputBBArgIndex; + ++nextInputBBArgIndex; + ++nextInputOperandIndex; + } + + // Update segment sizes. + // TODO: Helper method to avoid leaking impl details. + tiledLoopOp->setAttr( + TiledLoopOp::getOperandSegmentSizeAttr(), + b.getI32VectorAttr( + {numLoops, numLoops, numLoops, + static_cast(oldInputs.size()) + numNewInputBuffers, + static_cast(oldOutputs.size()) + numNewOutputBuffers})); + + return success(); + } +}; + +struct YieldOpInterface + : public BufferizableOpInterface::ExternalModel { + bool bufferizesToMemoryRead(Operation *op, OpOperand &opOperand) const { + return true; + } + + bool bufferizesToMemoryWrite(Operation *op, OpOperand &opOperand) const { + return false; + } + + OpResult getAliasingOpResult(Operation *op, OpOperand &opOperand) const { + return OpResult(); + } + + LogicalResult bufferize(Operation *op, OpBuilder &b, + BlockAndValueMapping &bvm, + BufferizationAliasInfo &aliasInfo, + AllocationCallbacks &allocationFn) const { + auto yieldOp = cast(op); + + // Take a guard before anything else. + OpBuilder::InsertionGuard g(b); + // Cannot create IR past a yieldOp. + b.setInsertionPoint(yieldOp); + + // No tensors -> success. + if (!llvm::any_of(yieldOp.getOperandTypes(), isaTensor)) + return success(); + // linalg::YieldOp nested under TiledLoop must just canonicalize. + if (yieldOp->getParentOfType()) + return success(); + llvm_unreachable("unexpected yieldOp"); + } +}; + +} // namespace linalg_ext + +namespace scf_ext { + +struct IfOpInterface + : public BufferizableOpInterface::ExternalModel { + SmallVector getAliasingOpOperand(Operation *op, + OpResult opResult) const { + auto ifOp = cast(op); + // Either one of the corresponding yield values from the then/else branches + // may alias with the result. + size_t resultNum = std::distance(op->getOpResults().begin(), + llvm::find(op->getOpResults(), opResult)); + return {&ifOp.thenYield()->getOpOperand(resultNum), + &ifOp.elseYield()->getOpOperand(resultNum)}; + } + + LogicalResult bufferize(Operation *op, OpBuilder &b, + BlockAndValueMapping &bvm, + BufferizationAliasInfo &aliasInfo, + AllocationCallbacks &allocationFn) const { + auto ifOp = cast(op); + + // Take a guard before anything else. + OpBuilder::InsertionGuard g(b); + + for (OpResult opResult : ifOp->getResults()) { + if (!opResult.getType().isa()) + continue; + // TODO: Atm we bail on unranked TensorType because we don't know how to + // alloc an UnrankedMemRefType + its underlying ranked MemRefType. + assert(opResult.getType().isa() && + "unsupported unranked tensor"); + + Value resultBuffer = + getResultBuffer(b, opResult, bvm, aliasInfo, allocationFn); + if (!resultBuffer) + return failure(); + + aliasInfo.createAliasInfoEntry(resultBuffer); + map(bvm, opResult, resultBuffer); + } + + return success(); + } +}; + +struct ForOpInterface + : public BufferizableOpInterface::ExternalModel { + bool bufferizesToMemoryRead(Operation *op, OpOperand &opOperand) const { + // scf::ForOp alone doesn't bufferize to a memory read, one of the uses of + // its matching bbArg may. + auto forOp = cast(op); + return isValueRead(forOp.getRegionIterArgForOpOperand(opOperand)); + } + + bool bufferizesToMemoryWrite(Operation *op, OpOperand &opOperand) const { + // Tensor iter_args of scf::ForOps are always considered as a write. This is + // to simplify the analysis. + // TODO: Consider doing sth. like isValueWritten. + return true; + } + + SmallVector getAliasingOpOperand(Operation *op, + OpResult opResult) const { + auto forOp = cast(op); + return {&forOp.getIterOpOperands()[opResult.getResultNumber()]}; + } + + OpResult getAliasingOpResult(Operation *op, OpOperand &opOperand) const { + auto forOp = cast(op); + if (!opOperand.get().getType().isa()) + return OpResult(); + return forOp.getResultForOpOperand(opOperand); + } + + BufferRelation bufferRelation(Operation *op, OpOperand &opOperand) const { + return BufferRelation::Equivalent; + } + + LogicalResult bufferize(Operation *op, OpBuilder &b, + BlockAndValueMapping &bvm, + BufferizationAliasInfo &aliasInfo, + AllocationCallbacks &allocationFn) const { + auto forOp = cast(op); + + // Take a guard before anything else. + OpBuilder::InsertionGuard g(b); + + for (OpResult opResult : forOp->getResults()) { + if (!opResult.getType().isa()) + continue; + // TODO: Atm we bail on unranked TensorType because we don't know how to + // alloc an UnrankedMemRefType + its underlying ranked MemRefType. + assert(opResult.getType().isa() && + "unsupported unranked tensor"); + + // TODO: More general: Matching bbArg does not bufferize to a read. + Value resultBuffer = + getResultBuffer(b, opResult, bvm, aliasInfo, allocationFn); + if (!resultBuffer) + return failure(); + + OpOperand &opOperand = forOp.getOpOperandForResult(opResult); + BlockArgument bbArg = forOp.getRegionIterArgForOpOperand(opOperand); + aliasInfo.createAliasInfoEntry(resultBuffer); + aliasInfo.insertNewBufferEquivalence(bbArg, resultBuffer); + map(bvm, bbArg, resultBuffer); + map(bvm, opResult, resultBuffer); + } + + return success(); + } +}; + +struct YieldOpInterface + : public BufferizableOpInterface::ExternalModel { + bool bufferizesToMemoryRead(Operation *op, OpOperand &opOperand) const { + return true; + } + + bool bufferizesToMemoryWrite(Operation *op, OpOperand &opOperand) const { + return false; + } + + OpResult getAliasingOpResult(Operation *op, OpOperand &opOperand) const { + return OpResult(); + } + + LogicalResult bufferize(Operation *op, OpBuilder &b, + BlockAndValueMapping &bvm, + BufferizationAliasInfo &aliasInfo, + AllocationCallbacks &allocationFn) const { + auto yieldOp = cast(op); + + // Take a guard before anything else. + OpBuilder::InsertionGuard g(b); + // Cannot create IR past a yieldOp. + b.setInsertionPoint(yieldOp); + + if (auto execOp = dyn_cast(yieldOp->getParentOp())) { + if (execOp->getNumResults() != 0) + return execOp->emitError( + "expected result-less scf.execute_region containing op"); + return success(); + } + + if (auto ifOp = dyn_cast(yieldOp->getParentOp())) + return success(); + + scf::ForOp forOp = dyn_cast(yieldOp->getParentOp()); + if (!forOp) + return yieldOp->emitError("expected scf::ForOp parent for scf::YieldOp"); + for (OpOperand &operand : yieldOp->getOpOperands()) { + auto tensorType = operand.get().getType().dyn_cast(); + if (!tensorType) + continue; + + OpOperand &forOperand = forOp.getOpOperandForResult( + forOp->getResult(operand.getOperandNumber())); + auto bbArg = forOp.getRegionIterArgForOpOperand(forOperand); + Value yieldedBuffer = lookup(bvm, operand.get()); + Value bbArgBuffer = lookup(bvm, bbArg); + if (!aliasInfo.areEquivalentBufferizedValues(yieldedBuffer, + bbArgBuffer)) { + // TODO: this could get resolved with copies but it can also turn into + // swaps so we need to be careful about order of copies. + return yieldOp->emitError() + << "Yield operand #" << operand.getOperandNumber() + << " does not bufferize to an equivalent buffer to the matching" + << " enclosing scf::for operand"; + } + + // Buffers are equivalent so the work is already done and we just yield + // the bbArg so that it later canonicalizes away. + operand.set(bbArg); + } + return success(); + } +}; + +} // namespace scf_ext + +namespace std_ext { + +struct CallOpInterface + : public BufferizableOpInterface::ExternalModel { + bool bufferizesToMemoryRead(Operation *op, OpOperand &opOperand) const { + // CallOpInterface alone doesn't bufferize to a memory read, one of the uses + // of the matching bbArg may. It is the responsibility of the caller to + // inspect bbArgs. In the absence of a BufferizationAliasInfo, we need to be + // conservative. + return true; + } + + bool bufferizesToMemoryWrite(Operation *op, OpOperand &opOperand) const { + // CallOpInterface alone doesn't bufferize to a memory write, one of the + // uses of the matching bbArg may. It is the responsibility of the caller to + // inspect bbArgs. In the absence of a BufferizationAliasInfo, we need to be + // conservative. + return true; + } + + SmallVector getAliasingOpOperand(Operation *op, + OpResult opResult) const { + // TODO: Can we do better? + return {}; + } + + OpResult getAliasingOpResult(Operation *op, OpOperand &opOperand) const { + // CallOpInterface is special, it needs to wait for the callee to be + // bufferized and needs to inspect the BufferAliasInfo object. It can't + // make a proper determination by itself and needs to be conservative. + return OpResult(); + } + + BufferRelation bufferRelation(Operation *op, OpOperand &opOperand) const { + return BufferRelation::Equivalent; + } + + LogicalResult bufferize(Operation *op, OpBuilder &b, + BlockAndValueMapping &bvm, + BufferizationAliasInfo &aliasInfo, + AllocationCallbacks &allocationFn) const { + llvm_unreachable("CallOps are handled separately"); + return failure(); + } +}; + +struct ReturnOpInterface + : public BufferizableOpInterface::ExternalModel { + bool bufferizesToMemoryRead(Operation *op, OpOperand &opOperand) const { + return true; + } + + bool bufferizesToMemoryWrite(Operation *op, OpOperand &opOperand) const { + return false; + } + + OpResult getAliasingOpResult(Operation *op, OpOperand &opOperand) const { + return OpResult(); + } + + LogicalResult bufferize(Operation *op, OpBuilder &b, + BlockAndValueMapping &bvm, + BufferizationAliasInfo &aliasInfo, + AllocationCallbacks &allocationFn) const { + auto returnOp = cast(op); + + // Take a guard before anything else. + OpBuilder::InsertionGuard g(b); + // Cannot insert after returnOp. + b.setInsertionPoint(returnOp); + + assert(isa(returnOp->getParentOp()) && + "only support FuncOp parent for ReturnOp"); + for (OpOperand &operand : returnOp->getOpOperands()) { + auto tensorType = operand.get().getType().dyn_cast(); + if (!tensorType) + continue; + Value v = lookup(bvm, operand.get()); + assert(v && "missing buffer for result"); + Value returnTensor = b.create(returnOp.getLoc(), v); + operand.set(returnTensor); + aliasInfo.insertNewBufferEquivalence(returnTensor, v); + map(bvm, returnTensor, v); + } + return success(); + } +}; + +} // namespace std_ext + +namespace tensor_ext { + +struct CastOpInterface + : public BufferizableOpInterface::ExternalModel { + bool bufferizesToMemoryRead(Operation *op, OpOperand &opOperand) const { + return false; + } + + bool bufferizesToMemoryWrite(Operation *op, OpOperand &opOperand) const { + return false; + } + + SmallVector getAliasingOpOperand(Operation *op, + OpResult opResult) const { + return {&op->getOpOperand(0)}; + } + + OpResult getAliasingOpResult(Operation *op, OpOperand &opOperand) const { + return op->getResult(0); + } + + BufferRelation bufferRelation(Operation *op, OpOperand &opOperand) const { + return BufferRelation::Equivalent; + } + + LogicalResult bufferize(Operation *op, OpBuilder &b, + BlockAndValueMapping &bvm, + BufferizationAliasInfo &aliasInfo, + AllocationCallbacks &allocationFn) const { + auto castOp = cast(op); + + // Take a guard before anything else. + OpBuilder::InsertionGuard g(b); + b.setInsertionPoint(castOp); + + Value resultBuffer = + getResultBuffer(b, castOp->getResult(0), bvm, aliasInfo, allocationFn); + if (!resultBuffer) + return failure(); + Type sourceType = resultBuffer.getType(); + auto rankedMemRefType = sourceType.dyn_cast(); + auto unrankedMemRefType = sourceType.dyn_cast(); + assert(rankedMemRefType || unrankedMemRefType); + Attribute memorySpace = rankedMemRefType + ? rankedMemRefType.getMemorySpace() + : unrankedMemRefType.getMemorySpace(); + TensorType tensorType = castOp.getResult().getType().cast(); + MemRefLayoutAttrInterface layout = + rankedMemRefType && tensorType.isa() + ? rankedMemRefType.getLayout() + : MemRefLayoutAttrInterface(); + Type memRefType = getContiguousOrUnrankedMemRefType( + castOp.getResult().getType(), layout, memorySpace); + Value res = + b.create(castOp.getLoc(), memRefType, resultBuffer); + aliasInfo.insertNewBufferEquivalence(res, castOp.getResult()); + map(bvm, castOp.getResult(), res); + return success(); + } +}; + +struct DimOpInterface + : public BufferizableOpInterface::ExternalModel { + bool bufferizesToMemoryRead(Operation *op, OpOperand &opOperand) const { + return true; + } + + bool bufferizesToMemoryWrite(Operation *op, OpOperand &opOperand) const { + return false; + } + + OpResult getAliasingOpResult(Operation *op, OpOperand &opOperand) const { + return OpResult(); + } + + LogicalResult bufferize(Operation *op, OpBuilder &b, + BlockAndValueMapping &bvm, + BufferizationAliasInfo &aliasInfo, + AllocationCallbacks &allocationFn) const { + auto dimOp = cast(op); + + // Take a guard before anything else. + OpBuilder::InsertionGuard g(b); + b.setInsertionPoint(dimOp); + + if (dimOp.source().getType().isa()) { + Value v = lookup(bvm, dimOp.source()); + assert(v && "missing buffer"); + dimOp.result().replaceAllUsesWith( + b.create(dimOp.getLoc(), v, dimOp.index())); + } + return success(); + } +}; + +struct ExtractSliceOpInterface + : public BufferizableOpInterface::ExternalModel { + bool bufferizesToMemoryRead(Operation *op, OpOperand &opOperand) const { + return false; + } + + bool bufferizesToMemoryWrite(Operation *op, OpOperand &opOperand) const { + return false; + } + + SmallVector getAliasingOpOperand(Operation *op, + OpResult opResult) const { + return {&op->getOpOperand(0) /*source*/}; + } + + OpResult getAliasingOpResult(Operation *op, OpOperand &opOperand) const { + return &opOperand == &op->getOpOperand(0) /*source*/ + ? op->getResult(0) + : OpResult(); + } + + BufferRelation bufferRelation(Operation *op, OpOperand &opOperand) const { + return BufferRelation::None; + } + + LogicalResult bufferize(Operation *op, OpBuilder &b, + BlockAndValueMapping &bvm, + BufferizationAliasInfo &aliasInfo, + AllocationCallbacks &allocationFn) const { + auto extractSliceOp = cast(op); + + // Take a guard before anything else. + OpBuilder::InsertionGuard g(b); + + LDBG("bufferize: " << *extractSliceOp << '\n'); + + Location loc = extractSliceOp.getLoc(); + // Bail if source was not bufferized. + Value srcMemref = lookup(bvm, extractSliceOp.source()); + if (!srcMemref) + return failure(); + auto srcMemrefType = srcMemref.getType().cast(); + auto dstTensorType = + extractSliceOp.result().getType().cast(); + + // If not inplaceable, alloc. + Value alloc; + auto inPlace = getInPlace(extractSliceOp->getResult(0)); + if (inPlace != InPlaceSpec::True) + alloc = createNewAllocDeallocPairForShapedValue( + b, loc, extractSliceOp.result(), aliasInfo, allocationFn); + + // Set insertion point now that potential alloc/dealloc are introduced. + b.setInsertionPoint(extractSliceOp); + + // Bufferize to subview. + auto subviewMemRefType = + memref::SubViewOp::inferRankReducedResultType( + dstTensorType.getRank(), srcMemrefType, + extractSliceOp.getMixedOffsets(), extractSliceOp.getMixedSizes(), + extractSliceOp.getMixedStrides()) + .cast(); + Value subView = b.create( + loc, subviewMemRefType, srcMemref, extractSliceOp.getMixedOffsets(), + extractSliceOp.getMixedSizes(), extractSliceOp.getMixedStrides()); + // Insert new alias. + aliasInfo.insertNewBufferAlias(subView, srcMemref); + + /// If not inplaceable, copy. + if (alloc) { + // Do not copy if the copied data is never read. + if (isValueRead(extractSliceOp.result())) + allocationFn.memCpyFn(b, extractSliceOp.getLoc(), subView, alloc); + subView = alloc; + } + + map(bvm, extractSliceOp.result(), subView); + return success(); + } +}; + +struct ExtractOpInterface + : public BufferizableOpInterface::ExternalModel { + bool bufferizesToMemoryRead(Operation *op, OpOperand &opOperand) const { + return true; + } + + bool bufferizesToMemoryWrite(Operation *op, OpOperand &opOperand) const { + return false; + } + + OpResult getAliasingOpResult(Operation *op, OpOperand &opOperand) const { + return OpResult(); + } + + LogicalResult bufferize(Operation *op, OpBuilder &b, + BlockAndValueMapping &bvm, + BufferizationAliasInfo &aliasInfo, + AllocationCallbacks &allocationFn) const { + auto extractOp = cast(op); + + // Take a guard before anything else. + OpBuilder::InsertionGuard g(b); + b.setInsertionPoint(extractOp); + + Location loc = extractOp.getLoc(); + Value srcMemref = lookup(bvm, extractOp.tensor()); + Value l = b.create(loc, srcMemref, extractOp.indices()); + extractOp.replaceAllUsesWith(l); + return success(); + } +}; + +/// Return true if the source of a `insertSliceOp` bufferizes to an +/// equivalent ExtractSliceOp that bufferizes inplace. +static bool isSourceEquivalentToAMatchingInplaceExtractSliceOp( + const BufferizationAliasInfo &aliasInfo, InsertSliceOp insertSliceOp) { + LDBG("isSourceEquivalentToAMatchingInplaceExtractSliceOp: " << *insertSliceOp + << '\n'); + bool foundOp = false; + aliasInfo.applyOnEquivalenceClass(insertSliceOp.source(), [&](Value value) { + auto extractSliceOp = value.getDefiningOp(); + if (extractSliceOp && + areEquivalentExtractSliceOps(aliasInfo, extractSliceOp, + insertSliceOp) && + getInPlace(extractSliceOp.result()) == InPlaceSpec::True) { + LDBG("\tfound: " << extractSliceOp.getOperation() << '\n'); + foundOp = true; + } + }); + + if (!foundOp) + LDBG("\tnot equivalent\n"); + + return foundOp; +} + +struct InsertSliceOpInterface + : public BufferizableOpInterface::ExternalModel { + bool bufferizesToMemoryRead(Operation *op, OpOperand &opOperand) const { + return true; + } + + bool bufferizesToMemoryWrite(Operation *op, OpOperand &opOperand) const { + return &opOperand == &op->getOpOperand(1) /*dest*/; + } + + SmallVector getAliasingOpOperand(Operation *op, + OpResult opResult) const { + return {&op->getOpOperand(1) /*dest*/}; + } + + OpResult getAliasingOpResult(Operation *op, OpOperand &opOperand) const { + return &opOperand == &op->getOpOperand(1) /*dest*/ + ? op->getResult(0) + : OpResult(); + } + + BufferRelation bufferRelation(Operation *op, OpOperand &opOperand) const { + return BufferRelation::Equivalent; + } + + LogicalResult bufferize(Operation *op, OpBuilder &b, + BlockAndValueMapping &bvm, + BufferizationAliasInfo &aliasInfo, + AllocationCallbacks &allocationFn) const { + auto insertSliceOp = cast(op); + + // Take a guard before anything else. + OpBuilder::InsertionGuard g(b); + b.setInsertionPoint(insertSliceOp); + + LDBG("bufferize: " << *insertSliceOp << '\n'); + + Location loc = insertSliceOp.getLoc(); + // Since insert_slice arise from tiling and introducing loops, this + // case is generally a deal breaker. When used with loops, this ends up + // cloning the whole tensor on every single iteration and is a symptom + // of a catastrophically bad scheduling decision. + // TODO: be very loud about it or even consider failing the pass. + // Alloc a copy for `insertSliceOp.dest()`, it will become the result + // buffer. + Value dstMemref = getResultBuffer(b, insertSliceOp->getResult(0), bvm, + aliasInfo, allocationFn); + if (!dstMemref) + return failure(); + auto dstMemrefType = dstMemref.getType().cast(); + + Value srcMemref = lookup(bvm, insertSliceOp.source()); + if (!srcMemref) + return failure(); + auto subviewMemRefType = + memref::SubViewOp::inferRankReducedResultType( + insertSliceOp.getSourceType().getRank(), dstMemrefType, + insertSliceOp.getMixedOffsets(), insertSliceOp.getMixedSizes(), + insertSliceOp.getMixedStrides()) + .cast(); + + // A copy of the source buffer is needed if either: + // - The producer of `source` is not inplace. This is the case where a + // slice is computed out of place into the inplace full tensor. + // - The result is not inplace. This is the case where the whole tensor is + // cloned and the clone needs to be updated. + auto inPlace = getInPlace(insertSliceOp->getResult(0)); + // TODO: Is this necessary? + if (!isSourceEquivalentToAMatchingInplaceExtractSliceOp(aliasInfo, + insertSliceOp) || + inPlace != InPlaceSpec::True) { + LDBG("insert_slice needs extra source copy: " << insertSliceOp.source() + << " -> copy\n"); + // Take a subview of the dst. + Value subView = b.create( + loc, subviewMemRefType, dstMemref, insertSliceOp.getMixedOffsets(), + insertSliceOp.getMixedSizes(), insertSliceOp.getMixedStrides()); + // Insert new alias. + aliasInfo.insertNewBufferAlias(subView, dstMemref); + allocationFn.memCpyFn(b, insertSliceOp.getLoc(), srcMemref, subView); + } + + map(bvm, insertSliceOp.result(), dstMemref); + + return success(); + } +}; + +} // namespace tensor_ext + +namespace vector_ext { + +struct TransferReadOpInterface + : public BufferizableOpInterface::ExternalModel { + bool bufferizesToMemoryRead(Operation *op, OpOperand &opOperand) const { + assert(opOperand.get().getType().isa() && + "only tensor types expected"); + return true; + } + + bool bufferizesToMemoryWrite(Operation *op, OpOperand &opOperand) const { + assert(opOperand.get().getType().isa() && + "only tensor types expected"); + return false; + } + + OpResult getAliasingOpResult(Operation *op, OpOperand &opOperand) const { + return OpResult(); + } + + LogicalResult bufferize(Operation *op, OpBuilder &b, + BlockAndValueMapping &bvm, + BufferizationAliasInfo &aliasInfo, + AllocationCallbacks &allocationFn) const { + auto transferReadOp = cast(op); + + // Take a guard before anything else. + OpBuilder::InsertionGuard g(b); + b.setInsertionPoint(op); + + if (transferReadOp.getShapedType().isa()) + return failure(); + + // TransferReadOp always reads from the bufferized op.source(). + Value v = lookup(bvm, transferReadOp.source()); + assert(v && "missing buffer"); + transferReadOp.sourceMutable().assign(v); + return success(); + } +}; + +struct TransferWriteOpInterface + : public BufferizableOpInterface::ExternalModel { + bool bufferizesToMemoryRead(Operation *op, OpOperand &opOperand) const { + assert(opOperand.get().getType().isa() && + "only tensor types expected"); + return true; + } + + bool bufferizesToMemoryWrite(Operation *op, OpOperand &opOperand) const { + assert(opOperand.get().getType().isa() && + "only tensor types expected"); + return true; + } + + SmallVector getAliasingOpOperand(Operation *op, + OpResult opResult) const { + return {&op->getOpOperand(1)}; + } + + OpResult getAliasingOpResult(Operation *op, OpOperand &opOperand) const { + assert(opOperand.get().getType().isa() && + "only tensor types expected"); + return op->getOpResult(0); + } + + BufferRelation bufferRelation(Operation *op, OpOperand &opOperand) const { + return BufferRelation::Equivalent; + } + + LogicalResult bufferize(Operation *op, OpBuilder &b, + BlockAndValueMapping &bvm, + BufferizationAliasInfo &aliasInfo, + AllocationCallbacks &allocationFn) const { + auto writeOp = cast(op); + + // Take a guard before anything else. + OpBuilder::InsertionGuard g(b); + b.setInsertionPoint(op); + + if (writeOp.getShapedType().isa()) + return failure(); + + // Create a new transfer_write on buffer that doesn't have a return value. + // Leave the previous transfer_write to dead code as it still has uses at + // this point. + Value resultBuffer = + getResultBuffer(b, op->getResult(0), bvm, aliasInfo, allocationFn); + if (!resultBuffer) + return failure(); + b.create( + writeOp.getLoc(), writeOp.vector(), resultBuffer, writeOp.indices(), + writeOp.permutation_map(), + writeOp.in_bounds() ? *writeOp.in_bounds() : ArrayAttr()); + map(bvm, op->getResult(0), resultBuffer); + + return success(); + } +}; + +} // namespace vector_ext + +namespace { + +/// Helper structure that iterates over all LinalgOps in `OpTys` and registers +/// the `BufferizableOpInterface` with each of them. +template struct LinalgOpInterfaceHelper; + +template +struct LinalgOpInterfaceHelper { + static void registerOpInterface(DialectRegistry ®istry) { + registry.addOpInterface>(); + LinalgOpInterfaceHelper::registerOpInterface(registry); + } +}; + +template <> struct LinalgOpInterfaceHelper<> { + static void registerOpInterface(DialectRegistry ®istry) {} +}; + +} // namespace + +void registerBufferizableOpInterfaceExternalModels(DialectRegistry ®istry) { + registry.addOpInterface(); + registry.addOpInterface(); + registry + .addOpInterface(); + registry.addOpInterface(); + registry.addOpInterface(); + registry.addOpInterface(); + registry.addOpInterface(); + registry.addOpInterface(); + registry.addOpInterface(); + registry.addOpInterface(); + registry.addOpInterface(); + registry.addOpInterface(); + registry.addOpInterface(); + registry.addOpInterface(); + registry.addOpInterface(); + registry.addOpInterface(); + + // Register all Linalg structured ops. `LinalgOp` is an interface and it is + // not possible to attach an external interface to an existing interface. + // Therefore, attach the `BufferizableOpInterface` to all ops one-by-one. + LinalgOpInterfaceHelper< +#define GET_OP_LIST +#include "mlir/Dialect/Linalg/IR/LinalgStructuredOps.cpp.inc" + >::registerOpInterface(registry); +} + +} // namespace linalg +} // namespace mlir diff --git a/mlir/lib/Dialect/Linalg/Transforms/CMakeLists.txt b/mlir/lib/Dialect/Linalg/Transforms/CMakeLists.txt index 840978bf56484..d75b5742b38d3 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/CMakeLists.txt +++ b/mlir/lib/Dialect/Linalg/Transforms/CMakeLists.txt @@ -1,7 +1,7 @@ add_mlir_dialect_library(MLIRLinalgTransforms Bufferize.cpp CodegenStrategy.cpp - ComprehensiveBufferize.cpp + ComprehensiveBufferizePass.cpp Detensorize.cpp Distribution.cpp DropUnitDims.cpp @@ -32,7 +32,9 @@ add_mlir_dialect_library(MLIRLinalgTransforms MLIRAffineUtils MLIRAnalysis MLIRArithmetic + MLIRBufferizableOpInterface MLIRComplex + MLIRComprehensiveBufferize MLIRInferTypeOpInterface MLIRIR MLIRMemRef diff --git a/mlir/lib/Dialect/Linalg/Transforms/ComprehensiveBufferizePass.cpp b/mlir/lib/Dialect/Linalg/Transforms/ComprehensiveBufferizePass.cpp new file mode 100644 index 0000000000000..d5d1743ef48ce --- /dev/null +++ b/mlir/lib/Dialect/Linalg/Transforms/ComprehensiveBufferizePass.cpp @@ -0,0 +1,86 @@ +//===- ComprehensiveBufferize.cpp - Single pass bufferization -------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "PassDetail.h" +#include "mlir/Dialect/Linalg/ComprehensiveBufferize/ComprehensiveBufferize.h" +#include "mlir/Dialect/Linalg/Passes.h" +#include "mlir/Pass/Pass.h" +#include "mlir/Pass/PassManager.h" +#include "mlir/Transforms/GreedyPatternRewriteDriver.h" +#include "mlir/Transforms/Passes.h" + +using namespace mlir; +using namespace mlir::linalg; + +namespace { +struct LinalgComprehensiveModuleBufferize + : public LinalgComprehensiveModuleBufferizeBase< + LinalgComprehensiveModuleBufferize> { + LinalgComprehensiveModuleBufferize() {} + + LinalgComprehensiveModuleBufferize( + const LinalgComprehensiveModuleBufferize &p) {} + + void runOnOperation() override; + + void getDependentDialects(DialectRegistry ®istry) const override { + registry + .insert(); + registerBufferizableOpInterfaceExternalModels(registry); + } +}; +} // end namespace + +static void applyEnablingTransformations(ModuleOp moduleOp) { + RewritePatternSet patterns(moduleOp.getContext()); + patterns.add(moduleOp.getContext()); + (void)applyPatternsAndFoldGreedily(moduleOp, std::move(patterns)); +} + +static Optional +allocationFnUsingAlloca(OpBuilder &b, Location loc, MemRefType type, + const SmallVector &dynShape) { + Value allocated = b.create( + loc, type, dynShape, b.getI64IntegerAttr(kBufferAlignments)); + return allocated; +} + +void LinalgComprehensiveModuleBufferize::runOnOperation() { + BufferizationOptions options; + if (useAlloca) { + options.allocationFns->allocationFn = allocationFnUsingAlloca; + options.allocationFns->deallocationFn = [](OpBuilder &b, Location loc, + Value v) {}; + } + options.allowReturnMemref = allowReturnMemref; + options.analysisFuzzerSeed = analysisFuzzerSeed; + options.testAnalysisOnly = testAnalysisOnly; + + ModuleOp moduleOp = getOperation(); + applyEnablingTransformations(moduleOp); + + if (failed(runComprehensiveBufferize(moduleOp, options))) { + signalPassFailure(); + return; + } + + if (options.testAnalysisOnly) + return; + + OpPassManager cleanupPipeline("builtin.module"); + cleanupPipeline.addPass(createCanonicalizerPass()); + cleanupPipeline.addPass(createCSEPass()); + cleanupPipeline.addPass(createLoopInvariantCodeMotionPass()); + (void)runPipeline(cleanupPipeline, moduleOp); +} + +std::unique_ptr mlir::createLinalgComprehensiveModuleBufferizePass() { + return std::make_unique(); +} diff --git a/mlir/lib/Dialect/Linalg/Transforms/FusionOnTensors.cpp b/mlir/lib/Dialect/Linalg/Transforms/FusionOnTensors.cpp index bfac63b305860..7156515cedae0 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/FusionOnTensors.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/FusionOnTensors.cpp @@ -42,19 +42,62 @@ static SmallVector getTiledSliceDims(OpOperand *consumerOperand, AffineMap indexingMap = consumerOp.getTiedIndexingMap(consumerOperand); // Search the slice dimensions tiled by a tile loop dimension. - DenseSet tiledSliceDims; + DenseSet tiledSliceDimIndices; for (auto en : enumerate(indexingMap.getResults())) { for (auto tiledLoopDim : tiledLoopDims) { if (en.value().isFunctionOfDim(tiledLoopDim)) - tiledSliceDims.insert(en.index()); + tiledSliceDimIndices.insert(en.index()); } } - return {tiledSliceDims.begin(), tiledSliceDims.end()}; + return {tiledSliceDimIndices.begin(), tiledSliceDimIndices.end()}; +} + +/// Given a vector of `tiledSliceDimIndices` that represent the tiled dimensions +/// of the producer result slice returns the tiled producer loop dimensions. +/// Example: +/// ``` +/// %res = linalg.fill(%cst, %input) +/// scf.for %i +/// scf.for %j +/// %slice = tensor.extract_slice %res[%i, %j] +/// ``` +/// getTiledProducerLoops(%res, [0, 1]) returns the loop indices [0, 1]. +static SmallVector +getTiledProducerLoops(OpResult producerResult, + ArrayRef tiledSliceDimIndices) { + LinalgOp producerOp = producerResult.getOwner(); + + // Get the indexing map of the `producerOp` output operand that matches + // ´producerResult´. + AffineMap producerIndexingMap = producerOp.getTiedIndexingMap( + producerOp.getOutputOperand(producerResult.getResultNumber())); + + // Keep only the tiled result slice dimensions of `producerIndexingMap`. + AffineMap tiledProducerIndexingSubMap = + producerIndexingMap.getSubMap(SmallVector( + tiledSliceDimIndices.begin(), tiledSliceDimIndices.end())); + + // Compute the producer loop indices mapped to the tiled result slice + // dimensions. As the output indexing map of structured operations are + // projected permutations, `tiledProducerIndexingSubMap` has to be a + // projected permutation as well. We can thus obtain the producer loop indices + // by getting the positions of the result dimensions. + // Example: + // (d0, d1, d2) -> (d0, d2) has the result positions [0, 2]. + assert(tiledProducerIndexingSubMap.isProjectedPermutation() && + "expect slice and producer loop dimensions map one-to-one"); + SmallVector tiledProducerLoopIndices; + transform(llvm::seq(0, tiledProducerIndexingSubMap.getNumResults()), + std::back_inserter(tiledProducerLoopIndices), [&](unsigned idx) { + return tiledProducerIndexingSubMap.getDimPosition(idx); + }); + + return tiledProducerLoopIndices; } /// Returns the producer fused in place of `sliceOp`. Tile the producer operands -/// along the `tiledSliceDims` and clone the producer. Consider the case of -/// fusion of an output tensor: +/// along the `tiledSliceDimIndices` and clone the producer. Consider the case +/// of fusion of an output tensor: /// ``` /// %1 = producer ins(...) outs(%0) /// %2 = consumer ins(...) outs(%1) @@ -84,7 +127,8 @@ static SmallVector getTiledSliceDims(OpOperand *consumerOperand, /// producer is fused into a consumer and fold away unused iter_args. static LinalgOp getTiledProducer(OpBuilder &b, OpResult producerResult, tensor::ExtractSliceOp sliceOp, - ArrayRef tiledSliceDims, + ArrayRef tiledSliceDimIndices, + ArrayRef tiledProducerLoopIndices, OpOperand *iterArg) { // Clone the producer after `sliceOp` since the slice may be reused to pass in // the producer result. @@ -102,23 +146,16 @@ static LinalgOp getTiledProducer(OpBuilder &b, OpResult producerResult, [](Range range) { return range.size; }); SmallVector sliceOpRanges = sliceOp.getOrCreateRanges(b, loc); - // Get the producer result indexing map. - AffineMap producerIndexingMap = producerOp.getTiedIndexingMap( - producerOp.getOutputOperand(producerResult.getResultNumber())); - // Tile the producer operands given the `sliceOp` ranges. Iterate the - // `tiledSliceDims` and store the tile offset and size for the tiled slice - // dimension. Assumes the mapping from slice dimensions to producer loops is a - // permutation. + // `tiledSliceDimIndices` and store the tile offset and size for the tiled + // slice dimension. auto zero = b.create(loc, 0); SmallVector tileIvs(producerOp.getNumLoops(), nullptr); SmallVector tileSizes(producerOp.getNumLoops(), zero); SmallVector allIvs(producerOp.getNumLoops(), nullptr); - for (int64_t tiledSliceDim : tiledSliceDims) { - AffineExpr result = producerIndexingMap.getResults()[tiledSliceDim]; - assert(result.isa() && - "expect producer indexing map is a projected permutation"); - int64_t tiledProducerLoop = result.cast().getPosition(); + for (auto it : zip(tiledSliceDimIndices, tiledProducerLoopIndices)) { + int64_t tiledSliceDim = std::get<0>(it); + int64_t tiledProducerLoop = std::get<1>(it); tileIvs[tiledProducerLoop] = sliceOpRanges[tiledSliceDim].offset; tileSizes[tiledProducerLoop] = sliceOpRanges[tiledSliceDim].size; allIvs[tiledProducerLoop] = tileIvs[tiledProducerLoop]; @@ -156,22 +193,26 @@ static LinalgOp getTiledProducer(OpBuilder &b, OpResult producerResult, // TileLoopNest specific helpers. //===----------------------------------------------------------------------===// -bool TileLoopNest::isEmpty() { return loopOps.empty(); } +bool TileLoopNest::isEmpty() { return tileLoopOps.empty(); } bool TileLoopNest::isValid() { - // Check if the number of `tileLoopOps` and `tileLoopDims` match. - if (loopOps.size() != loopDims.size()) + // Check if `rootOp` has been tiled at least once. + if (isEmpty() || tiledRootAndFusedOpsLoops.count(rootOp) == 0) + return false; + + // Check if the number of loop operations and dimensions match. + if (tileLoopOps.size() != tiledRootAndFusedOpsLoops[rootOp].size()) return false; // Check if the innermost tile loop is the parent of `tiledOp`. - if (rootOp->getParentOp() != loopOps.back()) + if (rootOp->getParentOp() != tileLoopOps.back()) return false; // Check if the tile loops are directly nested. - return std::adjacent_find(loopOps.begin(), loopOps.end(), + return std::adjacent_find(tileLoopOps.begin(), tileLoopOps.end(), [](Operation *op1, Operation *op2) { return op1 != op2->getParentOp(); - }) == loopOps.end(); + }) == tileLoopOps.end(); } SmallVector TileLoopNest::getTiedBBArgs(BlockArgument bbArg) { @@ -179,7 +220,7 @@ SmallVector TileLoopNest::getTiedBBArgs(BlockArgument bbArg) { SmallVector bbArgs; // Search all tile loop block arguments from inner to outer. - for (auto tileLoop : reverse(loopOps)) { + for (auto tileLoop : reverse(tileLoopOps)) { if (bbArg.getOwner()->getParentOp() != tileLoop) return {}; bbArgs.push_back(bbArg); @@ -194,9 +235,9 @@ SmallVector TileLoopNest::getTiedBBArgs(BlockArgument bbArg) { OpOperand *TileLoopNest::getTiedIterArg(BlockArgument bbArg) { // Search all block arguments and return the matching iteration argument. SmallVector bbArgs = getTiedBBArgs(bbArg); - if (bbArgs.size() != loopOps.size()) + if (bbArgs.size() != tileLoopOps.size()) return nullptr; - return &loopOps.front().getOpOperandForRegionIterArg(bbArgs.front()); + return &tileLoopOps.front().getOpOperandForRegionIterArg(bbArgs.front()); } bool TileLoopNest::hasOtherUses(BlockArgument bbArg, @@ -255,24 +296,29 @@ LogicalResult TileLoopNest::tileRootOp(OpBuilder &b, if (!isEmpty()) rootOp->replaceAllUsesWith(tiledRootOp->tensorResults); + // Transfer the stored `rootOp` loop dimensions if it has been tiled before. + if (tiledRootAndFusedOpsLoops.count(rootOp) != 0) { + tiledRootAndFusedOpsLoops[tiledRootOp->op] = + tiledRootAndFusedOpsLoops[rootOp]; + } + // Update the root operation and append the loops and tile loop dimensions. rootOp = tiledRootOp->op; - loopOps.append(tiledRootOp->loops.begin(), tiledRootOp->loops.end()); + tileLoopOps.append(tiledRootOp->loops.begin(), tiledRootOp->loops.end()); for (auto en : enumerate(tileSizes)) { // Copy only the tiled loop dimensions with non-zero tile size. if (en.value() == 0) continue; - loopDims.push_back(tileInterchange[en.index()]); + tiledRootAndFusedOpsLoops[rootOp].push_back(tileInterchange[en.index()]); } assert(isValid() && "expect tile loop nest to be valid after tiling"); - return success(); } FailureOr TileLoopNest::fuseProducer(OpBuilder &b, - OpOperand *rootOpOperand) { - assert(rootOpOperand->getOwner() == rootOp && - "expect the root op to be the owner of the operand to fuse"); + OpOperand *consumerOpOperand) { + assert(tiledRootAndFusedOpsLoops.count(consumerOpOperand->getOwner()) != 0 && + "expect the operand owner is the root operation or a fused producer"); assert(this->isValid() && "expect the tile loop nest to satisfy all invariants"); @@ -280,13 +326,16 @@ FailureOr TileLoopNest::fuseProducer(OpBuilder &b, if (isEmpty()) return failure(); - // Check `rootOpOperand` is defined by an ExtractSliceOp. - auto sliceOp = rootOpOperand->get().getDefiningOp(); + // Check `consumerOpOperand` is defined by an ExtractSliceOp. + auto sliceOp = + consumerOpOperand->get().getDefiningOp(); if (!sliceOp) return failure(); - // Check `sliceOp` is tiled by the tile loop nest. - if (sliceOp->getParentOp() != rootOp->getParentOp()) + // Check `sliceOp` and `consumerOp` are in the same block. + LinalgOp consumerOp = consumerOpOperand->getOwner(); + if (sliceOp->getBlock() != rootOp->getBlock() || + consumerOp->getBlock() != rootOp->getBlock()) return failure(); // Check if the producer is a LinalgOp possibly passed by iteration argument. @@ -302,19 +351,24 @@ FailureOr TileLoopNest::fuseProducer(OpBuilder &b, if (!producerResult || !isa(producerResult.getOwner())) return failure(); - // Compute the tiled producer slice dimensions given the tiled root operation - // loop dimensions `loopDims`. - SmallVector tiledSliceDims = - getTiledSliceDims(rootOpOperand, loopDims); - if (tiledSliceDims.empty()) + // Compute the tiled producer slice dimensions given the tiled consumer loops. + SmallVector tiledSliceDimIndices = getTiledSliceDims( + consumerOpOperand, tiledRootAndFusedOpsLoops[consumerOp]); + if (tiledSliceDimIndices.empty()) return failure(); + // Compute the tiled producer loop indices. + SmallVector tiledProducerLoopIndices = + getTiledProducerLoops(producerResult, tiledSliceDimIndices); + // Tile the producer operands and clone the producer in place of `sliceOp`. LinalgOp clonedOp = - getTiledProducer(b, producerResult, sliceOp, tiledSliceDims, iterArg); + getTiledProducer(b, producerResult, sliceOp, tiledSliceDimIndices, + tiledProducerLoopIndices, iterArg); + tiledRootAndFusedOpsLoops[clonedOp] = tiledProducerLoopIndices; // Cast the `clonedOp` result to gap type mismatches before canonicalization. - Type consumerOperandType = rootOpOperand->get().getType(); + Type consumerOperandType = consumerOpOperand->get().getType(); Value newResult = clonedOp->getResult(producerResult.getResultNumber()); if (newResult.getType() != consumerOperandType) { OpBuilder::InsertionGuard guard(b); @@ -330,7 +384,7 @@ FailureOr TileLoopNest::fuseProducer(OpBuilder &b, ValueRange TileLoopNest::getRootOpReplacementResults() { assert(!isEmpty() && "expect tile loop nest to be non-empty"); - return loopOps.front()->getOpResults(); + return tileLoopOps.front()->getOpResults(); } //===----------------------------------------------------------------------===// @@ -359,14 +413,25 @@ mlir::linalg::tileConsumerAndFuseProducers(OpBuilder &b, LinalgOp consumerOp, }); int64_t split = std::distance(iterTypes.begin(), it); + // Helper to fuse the producers greedily using a queue of fusion candidates. + auto fuseProducersGreedily = [&](ArrayRef operands) { + SmallVector candidates(operands.begin(), operands.end()); + while (!candidates.empty()) { + FailureOr fusedProducer = + tileLoopNest.fuseProducer(b, candidates.pop_back_val()); + if (failed(fusedProducer)) + continue; + candidates.append(fusedProducer->getInputAndOutputOperands()); + } + }; + // Tile the outer parallel loops and fuse the output operands. SmallVector outerTileSizes; outerTileSizes.append(tileSizes.begin(), tileSizes.begin() + split); outerTileSizes.append(tileSizes.size() - split, 0); if (failed(tileLoopNest.tileRootOp(b, outerTileSizes, tileInterchange))) return failure(); - for (OpOperand *opOperand : tileLoopNest.getRootOp().getOutputOperands()) - (void)tileLoopNest.fuseProducer(b, opOperand); + fuseProducersGreedily(tileLoopNest.getRootOp().getOutputOperands()); // Tile the remaining loops and fuse the input operands. SmallVector innerTileSizes; @@ -374,10 +439,7 @@ mlir::linalg::tileConsumerAndFuseProducers(OpBuilder &b, LinalgOp consumerOp, innerTileSizes.append(tileSizes.begin() + split, tileSizes.end()); if (failed(tileLoopNest.tileRootOp(b, innerTileSizes, tileInterchange))) return failure(); - SmallVector inputOperands = - tileLoopNest.getRootOp().getInputOperands(); - for (OpOperand *opOperand : tileLoopNest.getRootOp().getInputOperands()) - (void)tileLoopNest.fuseProducer(b, opOperand); + fuseProducersGreedily(tileLoopNest.getRootOp().getInputOperands()); return tileLoopNest; } diff --git a/mlir/lib/Dialect/Linalg/Transforms/HoistPadding.cpp b/mlir/lib/Dialect/Linalg/Transforms/HoistPadding.cpp index 5346e236b1672..30a93e55c0eee 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/HoistPadding.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/HoistPadding.cpp @@ -48,6 +48,10 @@ using namespace mlir::linalg; /// contains an unknown op with a region. /// 4. The backward slice from the pad op to the scf::ForOp to hoist above is /// empty. +/// 5. The source tensor of pad op is not defined by an extract slice op. +/// 6. The source tensor of the extract slice op is not defined outside of +/// the outermost enclosing scf::ForOp. +/// 7. There is no enclosing scf::ForOp that indexes the padded data. /// Other cases succeed and will trigger hoisting of the pad op. struct HoistingAnalysis { HoistingAnalysis(PadTensorOp padTensorOp, int nLevels); @@ -82,6 +86,26 @@ struct HoistingAnalysis { packingLoops; private: + /// Returns the loops in `backwardSlice` used to index the padded data. The + /// method starts from `padTensorOp` and `sliceOp`, follows the use-def + /// chains of their index operands, and stores any enclosing loop whose + /// induction variable is part of the walked index computation. + /// + /// Example: + /// ``` + /// %source = linalg.fill(%cst, %arg0) + /// scf.for %i + /// scf.for %j + /// scf.for %k // not used to index %source! + /// %ubi = affine.min #map(%i) + /// %ubj = affine.min #map(%j) + /// %slice = tensor.extract_slice %source [%i, %j] [%ubi, %ubj] + /// %padded_slice = linalg.pad_tensor %slice + /// ``` + /// getIndexingLoops(%padded_slice, %slice) returns [scf.for %i, scf.for %j] + SetVector getIndexingLoops(PadTensorOp padTensorOp, + tensor::ExtractSliceOp sliceOp); + /// Encodes whether the analysis is valid and hoisting can proceed. bool valid; }; @@ -166,28 +190,115 @@ HoistingAnalysis::HoistingAnalysis(PadTensorOp padTensorOp, int nLevels) if (analysisFailure || backwardSlice.empty()) return; - // Backward slice is a topologically sorted list of ops starting at - // `outermostEnclosingForOp`. - assert(outermostEnclosingForOp == backwardSlice.front()); + // Get the `sliceOp` that defines the source tensor of `padTensorOp` and + // check its source is defined outside of the outermost loop. This check + // ensures the padded data is available for packing before entering the + // outermost enclosing loop. + // + // Example: + // ``` + // %source = linalg.fill(%cst, %arg0) + // // %source is available for packing here! + // scf.for %i + // scf.for %j + // scf.for %k + // %slice = tensor.extract_slice %source [%i, %j] + // %padded_slice = linalg.pad_tensor %slice + // ``` + auto sliceOp = padTensorOp.source().getDefiningOp(); + if (!sliceOp) { + LLVM_DEBUG(DBGS() << "Cannot find the extract slice op -> skip\n"); + return; + } + if (!outermostEnclosingForOp.isDefinedOutsideOfLoop(sliceOp.source())) { + LLVM_DEBUG(DBGS() << "Source not defined outside of loops -> skip\n"); + return; + } + + // Search the loops found in `backwardSlice` used to index the padded data. + SetVector indexingLoops = getIndexingLoops(padTensorOp, sliceOp); - // Filter out the loops whose induction variable is not used to compute the - // padded result. As a first approximation, just look for IVs that have no use - // in the backwardSlice. - // These are the dimensions of reuse that we can exploit to reduce the amount - // of copy / memory. + // Add only the loops part of `indexingLoops` to the packing loops. All other + // loops are not used to index the padded data and consequently access the + // same data in every loop iteration. Adding them to the packing loops would + // increase the cache footprint of the packed data by storing the same data + // multiple times. for (scf::ForOp forOp : llvm::reverse(reverseEnclosingLoops)) { - for (Operation *user : forOp.getInductionVar().getUsers()) { - if (backwardSlice.contains(user)) { - packingLoops.insert(forOp); - break; - } - } + if (indexingLoops.contains(forOp)) + packingLoops.insert(forOp); + } + assert(indexingLoops.size() == packingLoops.size() && + "expect the all indexing loops are enclosing loops"); + if (packingLoops.empty()) { + LLVM_DEBUG(DBGS() << "Cannot find a packing loop -> skip\n"); + return; } // The analysis is valid and hoisting can occur. valid = true; } +/// Add all index operands of `operation` to `indexEdges`. An index operand is +/// an operand of type index. +static void addIndexOperandsToIndexEdges(Operation *operation, + SetVector &indexEdges) { + for (Value operand : operation->getOperands()) + if (operand.getType().isIndex()) + indexEdges.insert(operand); +} + +SetVector +HoistingAnalysis::getIndexingLoops(PadTensorOp padTensorOp, + tensor::ExtractSliceOp sliceOp) { + // Set of all values used for index computation. + SetVector indexEdges; + + // Starting from `padTensorOp` and `sliceOp` walk the use-def edges of index + // type in `backwardSlice`. Add the index operands of an operation to + // `indexEdges` if one of its results is an index edge found so far and store + // all loops part of the index computation to `indexingLoops`. + // + // Example: + // ``` + // %source = linalg.fill(%cst, %arg0) + // scf.for %i + // scf.for %j + // scf.for %k // not used to index %source! + // %ubi = affine.min #map(%i) + // %ubj = affine.min #map(%j) + // %slice = tensor.extract_slice %source [%i, %j] [%ubi, %ubj] + // %padded_slice = linalg.pad_tensor %slice + // ``` + // After iterating `backwardSlice` we obtain: + // indexEdges = [%i, %j, %ubi, %ubj] + // indexingLoops = [scf.for %i, scf.for %j] + SetVector indexingLoops; + for (Operation *op : llvm::reverse(backwardSlice)) { + // Add the index operands of `padTensorOp` and `sliceOp` to start the + // exploration of the index computation. + if (op == padTensorOp || op == sliceOp) { + addIndexOperandsToIndexEdges(op, indexEdges); + continue; + } + // Add the index operands of the loop if its induction variable is + // used for index computation. Additionally, insert the loop into + // `indexingLoops` + if (auto forOp = dyn_cast(op)) { + if (indexEdges.contains(forOp.getInductionVar())) { + addIndexOperandsToIndexEdges(op, indexEdges); + indexingLoops.insert(forOp); + continue; + } + } + // Add the index operands of all other operations if at least one result is + // used for index computation. + if (llvm::any_of(op->getResults(), + [&](Value result) { return indexEdges.contains(result); })) + addIndexOperandsToIndexEdges(op, indexEdges); + } + return indexingLoops; +} + static bool isDefinedOutsideOrConstant(scf::ForOp outer, Value v) { return outer.isDefinedOutsideOfLoop(v) || v.getDefiningOp(); } @@ -204,6 +315,8 @@ static bool isDefinedOutsideOrConstant(scf::ForOp outer, Value v) { /// - scf::ForOp are simply skipped. /// - AffineApplyOp are composed to replace the result by an equality. /// - AffineMinOp are composed by adding each entry as an upper bound. +/// Additionally, the following terminal operations are handled: +/// - DimOp and ConstantOp are skipped. /// If any other operation is met, return failure. // TODO: extend on a per-need basis. static LogicalResult @@ -213,23 +326,60 @@ foldUpperBoundsIntoConstraintsSet(FlatAffineValueConstraints &constraints, SetVector toProjectOut; for (scf::ForOp loop : loops) { auto ub = loop.upperBound(); - if (isDefinedOutsideOrConstant(outerLimit, ub)) - continue; - // Compute a backward slice up to, but not including, `outerLimit`. - SetVector backwardSlice; - getBackwardSlice(ub, &backwardSlice, [&](Operation *op) { - return outerLimit->isProperAncestor(op); + // Set of all values used for index computation. + SetVector indexEdges; + indexEdges.insert(ub); + + // Compute the backward slice `indexSlice` containing the index computation + // performed to obtain the upper bound `ub`. Starting from `ub` add the + // index operands of an operation to `indexEdges` if one of its results is + // an index edge. Otherwise, stop the slice computation. For a loop, check + // if its induction variable is an index edge. + // + // Example: + // ``` + // %c0 = arith.constant 0 + // scf.for %i = %c0 to ... + // scf.for %j = %c0 to ... + // %ub = affine.min #map(%i) + // scf.for %k = %c0 to %ub + // ``` + // After computing the backward slice we obtain: + // indexEdges = [%ub, %i, %c0] + // indexSlice = [arith.constant 0, scf.for %i, affine.min #map(%i)] + SetVector indexSlice; + getBackwardSlice(ub, &indexSlice, [&](Operation *op) { + // Continue only along the index operands of the ForOp. + if (auto forOp = dyn_cast(op)) { + // Consider only loops part of the enclosing loops. + if (!outerLimit->isAncestor(op)) + return false; + if (!indexEdges.contains(forOp.getInductionVar())) + return false; + addIndexOperandsToIndexEdges(op, indexEdges); + return true; + } + // All supported index operations have one result. + assert(op->getNumResults() == 1 && + "expect operations to have one result"); + if (!indexEdges.contains(op->getResult(0))) + return false; + addIndexOperandsToIndexEdges(op, indexEdges); + return true; }); - backwardSlice.insert(ub.getDefiningOp()); + indexSlice.insert(ub.getDefiningOp()); // Iterate over all ops in the slice and compose them in the constraints. - for (Operation *op : llvm::reverse(backwardSlice)) { - if (!isa(op)) - return failure(); - if (isa(op)) + for (Operation *op : llvm::reverse(indexSlice)) { + // All ForOps have previously been added to the constraints and ConstantOp + // and DimOp are terminals of the index computation. + if (isa(op)) continue; - // Ensure there is a + // Check all index computation operations are supported. + if (!isa(op)) + return failure(); + // Ensure there is an id. auto ensureIdFailed = [&](Value v) { if (constraints.containsId(v)) { unsigned pos; @@ -247,6 +397,8 @@ foldUpperBoundsIntoConstraintsSet(FlatAffineValueConstraints &constraints, // All supported ops have 1 result. // TODO: extend when needed. + assert(op->getNumResults() == 1 && + "expect operations to have one result"); toProjectOut.insert(op->getResult(0)); // Compose supported ops. diff --git a/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp b/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp index f81ce919a4faf..758aeecf380d3 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp @@ -51,14 +51,14 @@ const StringLiteral mlir::linalg::LinalgTransforms::kLinalgTransformMarker = mlir::linalg::LinalgTransformationFilter::LinalgTransformationFilter( ArrayRef matchDisjunction, Optional replacement) : matchDisjunction(matchDisjunction.begin(), matchDisjunction.end()), - replacement(replacement) {} + replacement(replacement), matchByDefault(false) {} mlir::linalg::LinalgTransformationFilter::LinalgTransformationFilter( FilterFunction f, ArrayRef matchDisjunction, Optional replacement) : filters(), matchDisjunction(matchDisjunction.begin(), matchDisjunction.end()), - replacement(replacement) { + replacement(replacement), matchByDefault(false) { if (f) filters.push_back(f); } @@ -74,7 +74,7 @@ LogicalResult mlir::linalg::LinalgTransformationFilter::checkAndNotify( if (!attr) { // 1. Has no filter case and matchDisjunction is empty. - if (matchDisjunction.empty()) + if (matchDisjunction.empty() || matchByDefault) return success(); // 2. Has no filter but was expecting a filter. @@ -840,3 +840,98 @@ LogicalResult ExtractSliceOfPadTensorSwapPattern::matchAndRewrite( rewriter.replaceOp(sliceOp, tiledPadOp->getResults()); return success(); } + +namespace { +// The following are patterns for downscaling convolution ops with size-1 +// window dimensions. +// +// Note that we'd eventually want to write such transformations in a generic +// way, e.g., converting to linalg.generic, removing the size-1 dimensions, +// and then turning back to named ops. But for now it's fine to have a few +// patterns matching special ops to get started. + +/// Rewrites 2-D convolution ops with size-1 window dimensions into 1-D +/// convolution ops. +struct DownscaleSizeOneWindowed2DConvolution final + : public OpRewritePattern { + using OpRewritePattern::OpRewritePattern; + + LogicalResult matchAndRewrite(linalg::Conv2DNhwcHwcfOp convOp, + PatternRewriter &rewriter) const override { + auto linalgOp = cast(*convOp); + if (linalgOp.hasBufferSemantics()) + return failure(); // To be implemented + + Value input = convOp.inputs().front(); + Value filter = convOp.inputs().back(); + Value output = convOp.outputs().front(); + + auto inputType = input.getType().dyn_cast(); + auto filterType = filter.getType().dyn_cast(); + auto outputType = output.getType().dyn_cast(); + + auto inputShape = inputType.getShape(); + auto filterShape = filterType.getShape(); + auto outputShape = outputType.getShape(); + + // Only handle the case where at least one of the window dimensions is + // of size 1. Other cases can rely on tiling to reduce to such cases. + int64_t fhSize = filterShape[0], fwSize = filterShape[1]; + int64_t ohSize = outputShape[1], owSize = outputShape[2]; + if (!(fhSize == 1 && ohSize == 1) && !(fwSize == 1 && owSize == 1)) + return failure(); + bool removeH = ohSize == 1; + + // Get new shapes and types for all operands by removing the size-1 + // dimension. + + SmallVector newInputShape{ + inputShape[0], inputShape[removeH ? 2 : 1], inputShape[3]}; + auto newInputType = RankedTensorType::get( + newInputShape, inputType.getElementType(), inputType.getEncoding()); + + SmallVector newFilterShape{filterShape[removeH ? 1 : 0], + filterShape[2], filterShape[3]}; + auto newFilterType = RankedTensorType::get( + newFilterShape, filterType.getElementType(), filterType.getEncoding()); + + SmallVector newOutputShape{ + outputShape[0], outputShape[removeH ? 2 : 1], outputShape[3]}; + auto newOutputType = RankedTensorType::get( + newOutputShape, outputType.getElementType(), outputType.getEncoding()); + + SmallVector ioReshapeIndices = {{0}, {1, 2}, {3}}; + SmallVector fReshapeIndices = {{0, 1}, {2}, {3}}; + + // Reshape all operands for 1-D convolution. + Location loc = convOp.getLoc(); + Value newInput = rewriter.create( + loc, newInputType, input, ioReshapeIndices); + Value newFilter = rewriter.create( + loc, newFilterType, filter, fReshapeIndices); + Value newOutput = rewriter.create( + loc, newOutputType, output, ioReshapeIndices); + + // We need to shrink the strides and dilations too. + auto stride = convOp.strides().getFlatValue(removeH ? 1 : 0); + auto stridesAttr = rewriter.getI64VectorAttr(stride); + auto dilation = convOp.dilations().getFlatValue(removeH ? 1 : 0); + auto dilationsAttr = rewriter.getI64VectorAttr(dilation); + + auto conv1DOp = rewriter.create( + loc, newOutputType, ValueRange{newInput, newFilter}, + ValueRange{newOutput}, stridesAttr, dilationsAttr); + + rewriter.replaceOpWithNewOp( + convOp, outputType, conv1DOp.getResult(0), ioReshapeIndices); + return success(); + }; +}; + +} // namespace + +void linalg::populateDecomposeConvolutionPatterns(RewritePatternSet &patterns, + PatternBenefit benefit) { + patterns.add(patterns.getContext(), + benefit); +} diff --git a/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp b/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp index b7520f1a62fa3..c65d2a1de869a 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp @@ -1396,8 +1396,7 @@ namespace { /// Iters: ({Par(), Par(), Par(), Red(), Red()}) /// Layout: {{n, strideW * w + dilationW * kw, c}, {kw, c, f}, {n, w, f}} /// ``` -/// w and kw are unrolled. -/// TODO: do not unroll w (resp. kw) when the strideW ( resp. dilationW) is > 1. +/// kw is unrolled, w is unrolled iff dilationW > 1. struct Conv1D_NWC_WCF_Generator : public StructuredGenerator { Conv1D_NWC_WCF_Generator(OpBuilder &builder, LinalgOp linalgOp, int strideW, int dilationW) @@ -1455,58 +1454,103 @@ struct Conv1D_NWC_WCF_Generator : public StructuredGenerator { vector::TransferWriteOp write; Value zero = builder.create(loc, 0); + // w is unrolled (i.e. wSizeStep == 1) iff strideW > 1. + // When strideW == 1, we can batch the contiguous loads and avoid unrolling int64_t wSizeStep = strideW == 1 ? wSize : 1; + Type lhsEltType = lhsShapedType.getElementType(); + Type rhsEltType = rhsShapedType.getElementType(); + Type resEltType = resShapedType.getElementType(); + VectorType lhsType = VectorType::get( + {nSize, (wSize - 1) * strideW + 1 + (kwSize - 1) * dilationW + 1, + cSize}, + lhsEltType); + VectorType rhsType = VectorType::get({kwSize, cSize, fSize}, rhsEltType); + VectorType resType = VectorType::get({nSize, wSize, fSize}, resEltType); + + // Read lhs slice of size {w * strideW + kw * dilationW, c, f} @ [0, 0, 0]. + Value lhs = builder.create( + loc, lhsType, lhsShaped, ValueRange{zero, zero, zero}); + // Read rhs slice of size {kw, c, f} @ [0, 0, 0]. + Value rhs = builder.create( + loc, rhsType, rhsShaped, ValueRange{zero, zero, zero}); + // Read res slice of size {n, w, f} @ [0, 0, 0]. + Value res = builder.create( + loc, resType, resShaped, ValueRange{zero, zero, zero}); + + //===------------------------------------------------------------------===// + // Begin vector-only rewrite part + //===------------------------------------------------------------------===// // Unroll along kw and read slices of lhs and rhs. - // Alternatively we could preload both 3-d slices and extract smaller slices - // iteratively without touching memory. But this will quickly spill. + SmallVector lhsVals, rhsVals, resVals; for (int64_t kw = 0; kw < kwSize; ++kw) { - // Read rhs slice of size {c, f} @ [kw, 0, 0]. - Value kwVal = builder.create(loc, kw); - VectorType rhsType = - VectorType::get({cSize, fSize}, rhsShapedType.getElementType()); - Value rhs = builder.create( - loc, rhsType, rhsShaped, ValueRange{kwVal, zero, zero}); - - for (int64_t w_iv = 0; w_iv < wSize; w_iv += wSizeStep) { - // Read lhs slice of size {n, wSizeStep, c} + // Extract rhs slice of size {c, f} @ [kw]. + rhsVals.push_back(builder.create( + loc, rhs, /*offsets=*/ArrayRef{kw})); + + for (int64_t w = 0; w < wSize; w += wSizeStep) { + // Extract lhs slice of size {n, wSizeStep, c} // @ [0, sw * w + dw * kw, 0]. - Value lhsStridedIdx = builder.create( - loc, strideW * w_iv + dilationW * kw); - VectorType lhsType = VectorType::get({nSize, wSizeStep, cSize}, - lhsShapedType.getElementType()); - Value lhs = builder.create( - loc, lhsType, lhsShaped, ValueRange{zero, lhsStridedIdx, zero}); - - // Read res slice: {n, wSizeStep, f} @ [0, w, 0]. - Value wVal = builder.create(loc, w_iv); - VectorType resType = VectorType::get({nSize, wSizeStep, fSize}, - resShapedType.getElementType()); - // When operating on tensors, reading from the updated value is required - // for vector.transfer_read/write hoisting to function as expected. - Value res = builder.create( - loc, resType, resShaped, ValueRange{zero, wVal, zero}); - - // Compute contraction: I{n, w, c} * F{c, f} -> O{n, w, f} - StringRef par = Par().strRef, red = Red().strRef; - AffineExpr n, w, f, c; - bindDims(ctx, n, w, f, c); - // clang-format off - res = builder.create( - loc, lhs, rhs, res, - /*indexingMaps=*/MapList{{n, w, c}, {c, f}, {n, w, f}}, - /*iteratorTypes=*/ArrayRef{par, par, par, red}); - // clang-format on - - // Write back res slice: {n, wSizeStep, f} @ [0, w, 0]. - write = builder.create( - loc, res, resShaped, ValueRange{zero, wVal, zero}); - if (write.getNumResults() == 1) - resShaped = write->getResult(0); + lhsVals.push_back(builder.create( + loc, lhs, + /*offsets=*/ArrayRef{0, w * strideW + kw * dilationW, 0}, + /*sizes=*/ArrayRef{nSize, wSizeStep, cSize}, + /*strides=*/ArrayRef{1, 1, 1})); + + // This does not depend on kw. + if (kw == 0) { + // Extract res slice: {n, wSizeStep, f} @ [0, w, 0]. + resVals.push_back(builder.create( + loc, res, + /*offsets=*/ArrayRef{0, w, 0}, + /*sizes=*/ArrayRef{nSize, wSizeStep, fSize}, + /*strides=*/ArrayRef{1, 1, 1})); + } } } - return write.getOperation(); + auto linearIndex = [&](int64_t kw, int64_t w) { + return kw * (wSize / wSizeStep) + w; + }; + + // Compute contraction: O{n, w, f} += I{n, sw * w + dw * kw, c} * F{c, f} + for (int64_t kw = 0; kw < kwSize; ++kw) { + for (int64_t w = 0; w < wSize; w += wSizeStep) { + resVals[w] = conv1dSliceAsContraction( + builder, loc, lhsVals[linearIndex(kw, w)], rhsVals[kw], resVals[w]); + } + } + + // Write back res slice: {n, wSizeStep, f} @ [0, w, 0]. + // This does not depend on kw. + for (int64_t w = 0; w < wSize; w += wSizeStep) { + res = builder.create( + loc, resVals[w], res, + /*offsets=*/ArrayRef{0, w, 0}, + /*strides=*/ArrayRef{1, 1, 1}); + } + //===------------------------------------------------------------------===// + // End vector-only rewrite part + //===------------------------------------------------------------------===// + + // Write back res slice of size {n, w, f} @ [0, 0, 0]. + return builder + .create(loc, res, resShaped, + ValueRange{zero, zero, zero}) + .getOperation(); + } + + // Create a contraction: lhs{n, w, c} * rhs{c, f} -> res{n, w, f} + vector::ContractionOp conv1dSliceAsContraction(OpBuilder &b, Location loc, + Value lhs, Value rhs, + Value res) { + StringRef par = Par().strRef, red = Red().strRef; + AffineExpr n, w, f, c; + bindDims(ctx, n, w, f, c); + return builder.create( + loc, lhs, rhs, res, + /*indexingMaps=*/MapList{{n, w, c}, {c, f}, {n, w, f}}, + /*iteratorTypes=*/ArrayRef{par, par, par, red}); } /// Entry point that transposes into the common form: diff --git a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp index e85a4b722aced..b24fb868dcd4b 100644 --- a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp +++ b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp @@ -152,9 +152,6 @@ static ParseResult parseAllocateAndAllocator( static void printAllocateAndAllocator(OpAsmPrinter &p, OperandRange varsAllocate, OperandRange varsAllocator) { - if (varsAllocate.empty()) - return; - p << "allocate("; for (unsigned i = 0; i < varsAllocate.size(); ++i) { std::string separator = i == varsAllocate.size() - 1 ? ") " : ", "; @@ -182,7 +179,9 @@ static void printParallelOp(OpAsmPrinter &p, ParallelOp op) { printDataVars(p, op.firstprivate_vars(), "firstprivate"); printDataVars(p, op.shared_vars(), "shared"); printDataVars(p, op.copyin_vars(), "copyin"); - printAllocateAndAllocator(p, op.allocate_vars(), op.allocators_vars()); + + if (!op.allocate_vars().empty()) + printAllocateAndAllocator(p, op.allocate_vars(), op.allocators_vars()); if (auto def = op.default_val()) p << "default(" << def->drop_front(3) << ") "; @@ -231,7 +230,7 @@ parseLinearClause(OpAsmParser &parser, static void printLinearClause(OpAsmPrinter &p, OperandRange linearVars, OperandRange linearStepVars) { size_t linearVarsSize = linearVars.size(); - p << "("; + p << "linear("; for (unsigned i = 0; i < linearVarsSize; ++i) { std::string separator = i == linearVarsSize - 1 ? ") " : ", "; p << linearVars[i]; @@ -296,7 +295,7 @@ static void printScheduleClause(OpAsmPrinter &p, StringRef &sched, llvm::Optional modifier, Value scheduleChunkVar) { std::string schedLower = sched.lower(); - p << "(" << schedLower; + p << "schedule(" << schedLower; if (scheduleChunkVar) p << " = " << scheduleChunkVar; if (modifier && modifier.getValue() != "none") @@ -333,6 +332,7 @@ parseReductionVarList(OpAsmParser &parser, static void printReductionVarList(OpAsmPrinter &p, Optional reductions, OperandRange reduction_vars) { + p << "reduction("; for (unsigned i = 0, e = reductions->size(); i < e; ++i) { if (i != 0) p << ", "; @@ -864,6 +864,83 @@ static ParseResult parseParallelOp(OpAsmParser &parser, return success(); } +//===----------------------------------------------------------------------===// +// Parser, printer and verifier for SectionsOp +//===----------------------------------------------------------------------===// + +/// Parses an OpenMP Sections operation +/// +/// sections ::= `omp.sections` clause-list +/// clause-list ::= clause clause-list | empty +/// clause ::= private | firstprivate | lastprivate | reduction | allocate | +/// nowait +static ParseResult parseSectionsOp(OpAsmParser &parser, + OperationState &result) { + + SmallVector clauses = {privateClause, firstprivateClause, + lastprivateClause, reductionClause, + allocateClause, nowaitClause}; + + SmallVector segments; + + if (failed(parseClauses(parser, result, clauses, segments))) + return failure(); + + result.addAttribute("operand_segment_sizes", + parser.getBuilder().getI32VectorAttr(segments)); + + // Now parse the body. + Region *body = result.addRegion(); + if (parser.parseRegion(*body)) + return failure(); + return success(); +} + +static void printSectionsOp(OpAsmPrinter &p, SectionsOp op) { + p << " "; + printDataVars(p, op.private_vars(), "private"); + printDataVars(p, op.firstprivate_vars(), "firstprivate"); + printDataVars(p, op.lastprivate_vars(), "lastprivate"); + + if (!op.reduction_vars().empty()) + printReductionVarList(p, op.reductions(), op.reduction_vars()); + + if (!op.allocate_vars().empty()) + printAllocateAndAllocator(p, op.allocate_vars(), op.allocators_vars()); + + if (op.nowait()) + p << "nowait "; + + p.printRegion(op.region()); +} + +static LogicalResult verifySectionsOp(SectionsOp op) { + + // A list item may not appear in more than one clause on the same directive, + // except that it may be specified in both firstprivate and lastprivate + // clauses. + for (auto var : op.private_vars()) { + if (llvm::is_contained(op.firstprivate_vars(), var)) + return op.emitOpError() + << "operand used in both private and firstprivate clauses"; + if (llvm::is_contained(op.lastprivate_vars(), var)) + return op.emitOpError() + << "operand used in both private and lastprivate clauses"; + } + + if (op.allocate_vars().size() != op.allocators_vars().size()) + return op.emitError( + "expected equal sizes for allocate and allocator variables"); + + for (auto &inst : *op.region().begin()) { + if (!(isa(inst) || isa(inst))) + op.emitOpError() + << "expected omp.section op or terminator op inside region"; + } + + return verifyReductionVarList(op, op.reductions(), op.reduction_vars()); +} + /// Parses an OpenMP Workshare Loop operation /// /// wsloop ::= `omp.wsloop` loop-control clause-list @@ -944,16 +1021,12 @@ static void printWsLoopOp(OpAsmPrinter &p, WsLoopOp op) { printDataVars(p, op.firstprivate_vars(), "firstprivate"); printDataVars(p, op.lastprivate_vars(), "lastprivate"); - if (op.linear_vars().size()) { - p << "linear"; + if (op.linear_vars().size()) printLinearClause(p, op.linear_vars(), op.linear_step_vars()); - } - if (auto sched = op.schedule_val()) { - p << "schedule"; + if (auto sched = op.schedule_val()) printScheduleClause(p, sched.getValue(), op.schedule_modifier(), op.schedule_chunk_var()); - } if (auto collapse = op.collapse_val()) p << "collapse(" << collapse << ") "; @@ -967,10 +1040,8 @@ static void printWsLoopOp(OpAsmPrinter &p, WsLoopOp op) { if (auto order = op.order_val()) p << "order(" << order << ") "; - if (!op.reduction_vars().empty()) { - p << "reduction("; + if (!op.reduction_vars().empty()) printReductionVarList(p, op.reductions(), op.reduction_vars()); - } p.printRegion(op.region(), /*printEntryBlockArgs=*/false); } diff --git a/mlir/lib/Dialect/SPIRV/IR/SPIRVOps.cpp b/mlir/lib/Dialect/SPIRV/IR/SPIRVOps.cpp index 14d58ef107684..554248f9c5c19 100644 --- a/mlir/lib/Dialect/SPIRV/IR/SPIRVOps.cpp +++ b/mlir/lib/Dialect/SPIRV/IR/SPIRVOps.cpp @@ -1138,12 +1138,16 @@ static LogicalResult verify(spirv::AddressOfOp addressOfOp) { return success(); } -//===----------------------------------------------------------------------===// -// spv.AtomicCompareExchangeWeak -//===----------------------------------------------------------------------===// +template +static void printAtomicCompareExchangeImpl(T atomOp, OpAsmPrinter &printer) { + printer << " \"" << stringifyScope(atomOp.memory_scope()) << "\" \"" + << stringifyMemorySemantics(atomOp.equal_semantics()) << "\" \"" + << stringifyMemorySemantics(atomOp.unequal_semantics()) << "\" " + << atomOp.getOperands() << " : " << atomOp.pointer().getType(); +} -static ParseResult parseAtomicCompareExchangeWeakOp(OpAsmParser &parser, - OperationState &state) { +static ParseResult parseAtomicCompareExchangeImpl(OpAsmParser &parser, + OperationState &state) { spirv::Scope memoryScope; spirv::MemorySemantics equalSemantics, unequalSemantics; SmallVector operandInfo; @@ -1173,15 +1177,8 @@ static ParseResult parseAtomicCompareExchangeWeakOp(OpAsmParser &parser, return parser.addTypeToList(ptrType.getPointeeType(), state.types); } -static void print(spirv::AtomicCompareExchangeWeakOp atomOp, - OpAsmPrinter &printer) { - printer << " \"" << stringifyScope(atomOp.memory_scope()) << "\" \"" - << stringifyMemorySemantics(atomOp.equal_semantics()) << "\" \"" - << stringifyMemorySemantics(atomOp.unequal_semantics()) << "\" " - << atomOp.getOperands() << " : " << atomOp.pointer().getType(); -} - -static LogicalResult verify(spirv::AtomicCompareExchangeWeakOp atomOp) { +template +static LogicalResult verifyAtomicCompareExchangeImpl(T atomOp) { // According to the spec: // "The type of Value must be the same as Result Type. The type of the value // pointed to by Pointer must be the same as Result Type. This type must also @@ -1197,8 +1194,10 @@ static LogicalResult verify(spirv::AtomicCompareExchangeWeakOp atomOp) { "result, but found ") << atomOp.comparator().getType() << " vs " << atomOp.getType(); - Type pointeeType = - atomOp.pointer().getType().cast().getPointeeType(); + Type pointeeType = atomOp.pointer() + .getType() + .template cast() + .getPointeeType(); if (atomOp.getType() != pointeeType) return atomOp.emitOpError( "pointer operand's pointee type must have the same " @@ -1211,6 +1210,59 @@ static LogicalResult verify(spirv::AtomicCompareExchangeWeakOp atomOp) { return success(); } +//===----------------------------------------------------------------------===// +// spv.AtomicExchange +//===----------------------------------------------------------------------===// + +static void print(spirv::AtomicExchangeOp atomOp, OpAsmPrinter &printer) { + printer << " \"" << stringifyScope(atomOp.memory_scope()) << "\" \"" + << stringifyMemorySemantics(atomOp.semantics()) << "\" " + << atomOp.getOperands() << " : " << atomOp.pointer().getType(); +} + +static ParseResult parseAtomicExchangeOp(OpAsmParser &parser, + OperationState &state) { + spirv::Scope memoryScope; + spirv::MemorySemantics semantics; + SmallVector operandInfo; + Type type; + if (parseEnumStrAttr(memoryScope, parser, state, kMemoryScopeAttrName) || + parseEnumStrAttr(semantics, parser, state, kSemanticsAttrName) || + parser.parseOperandList(operandInfo, 2)) + return failure(); + + auto loc = parser.getCurrentLocation(); + if (parser.parseColonType(type)) + return failure(); + + auto ptrType = type.dyn_cast(); + if (!ptrType) + return parser.emitError(loc, "expected pointer type"); + + if (parser.resolveOperands(operandInfo, {ptrType, ptrType.getPointeeType()}, + parser.getNameLoc(), state.operands)) + return failure(); + + return parser.addTypeToList(ptrType.getPointeeType(), state.types); +} + +static LogicalResult verify(spirv::AtomicExchangeOp atomOp) { + if (atomOp.getType() != atomOp.value().getType()) + return atomOp.emitOpError("value operand must have the same type as the op " + "result, but found ") + << atomOp.value().getType() << " vs " << atomOp.getType(); + + Type pointeeType = + atomOp.pointer().getType().cast().getPointeeType(); + if (atomOp.getType() != pointeeType) + return atomOp.emitOpError( + "pointer operand's pointee type must have the same " + "as the op result type, but found ") + << pointeeType << " vs " << atomOp.getType(); + + return success(); +} + //===----------------------------------------------------------------------===// // spv.BitcastOp //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp b/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp index 1ee943cd7adbd..2349ce516c97c 100644 --- a/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp +++ b/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp @@ -171,6 +171,8 @@ LogicalResult SparseTensorEncodingAttr::verifyEncoding( // Check integrity with tensor type specifics. Dimension ordering is optional, // but we always should have dimension level types for the full rank. unsigned size = shape.size(); + if (size == 0) + return emitError() << "expected non-scalar sparse tensor"; if (getDimOrdering() && getDimOrdering().getNumResults() != size) return emitError() << "expected an affine map of size " << size << " for dimension ordering"; diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp index ed2c5401e7211..77e0ff16ba28a 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp +++ b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp @@ -22,6 +22,7 @@ #include "mlir/Dialect/SparseTensor/Transforms/Passes.h" #include "mlir/Dialect/StandardOps/IR/Ops.h" #include "mlir/Dialect/Tensor/IR/Tensor.h" +#include "mlir/ExecutionEngine/SparseTensorUtils.h" #include "mlir/Transforms/DialectConversion.h" using namespace mlir; @@ -29,69 +30,10 @@ using namespace mlir::sparse_tensor; namespace { -/// New tensor storage action. Keep these values consistent with -/// the sparse runtime support library. -enum Action : uint32_t { - kEmpty = 0, - kFromFile = 1, - kFromCOO = 2, - kEmptyCOO = 3, - kToCOO = 4, - kToIter = 5 -}; - //===----------------------------------------------------------------------===// // Helper methods. //===----------------------------------------------------------------------===// -/// Returns internal type encoding for primary storage. Keep these -/// values consistent with the sparse runtime support library. -static uint32_t getPrimaryTypeEncoding(Type tp) { - if (tp.isF64()) - return 1; - if (tp.isF32()) - return 2; - if (tp.isInteger(64)) - return 3; - if (tp.isInteger(32)) - return 4; - if (tp.isInteger(16)) - return 5; - if (tp.isInteger(8)) - return 6; - return 0; -} - -/// Returns internal type encoding for overhead storage. Keep these -/// values consistent with the sparse runtime support library. -static uint32_t getOverheadTypeEncoding(unsigned width) { - switch (width) { - default: - return 1; - case 32: - return 2; - case 16: - return 3; - case 8: - return 4; - } -} - -/// Returns internal dimension level type encoding. Keep these -/// values consistent with the sparse runtime support library. -static uint32_t -getDimLevelTypeEncoding(SparseTensorEncodingAttr::DimLevelType dlt) { - switch (dlt) { - case SparseTensorEncodingAttr::DimLevelType::Dense: - return 0; - case SparseTensorEncodingAttr::DimLevelType::Compressed: - return 1; - case SparseTensorEncodingAttr::DimLevelType::Singleton: - return 2; - } - llvm_unreachable("Unknown SparseTensorEncodingAttr::DimLevelType"); -} - /// Generates a constant zero of the given type. inline static Value constantZero(ConversionPatternRewriter &rewriter, Location loc, Type t) { @@ -116,6 +58,73 @@ inline static Value constantI8(ConversionPatternRewriter &rewriter, return rewriter.create(loc, i, 8); } +/// Generates a constant of the given `Action`. +static Value constantAction(ConversionPatternRewriter &rewriter, Location loc, + Action action) { + return constantI32(rewriter, loc, static_cast(action)); +} + +/// Generates a constant of the internal type encoding for overhead storage. +static Value constantOverheadTypeEncoding(ConversionPatternRewriter &rewriter, + Location loc, unsigned width) { + OverheadType sec; + switch (width) { + default: + sec = OverheadType::kU64; + break; + case 32: + sec = OverheadType::kU32; + break; + case 16: + sec = OverheadType::kU16; + break; + case 8: + sec = OverheadType::kU8; + break; + } + return constantI32(rewriter, loc, static_cast(sec)); +} + +/// Generates a constant of the internal type encoding for primary storage. +static Value constantPrimaryTypeEncoding(ConversionPatternRewriter &rewriter, + Location loc, Type tp) { + PrimaryType primary; + if (tp.isF64()) + primary = PrimaryType::kF64; + else if (tp.isF32()) + primary = PrimaryType::kF32; + else if (tp.isInteger(64)) + primary = PrimaryType::kI64; + else if (tp.isInteger(32)) + primary = PrimaryType::kI32; + else if (tp.isInteger(16)) + primary = PrimaryType::kI16; + else if (tp.isInteger(8)) + primary = PrimaryType::kI8; + else + llvm_unreachable("Unknown element type"); + return constantI32(rewriter, loc, static_cast(primary)); +} + +/// Generates a constant of the internal dimension level type encoding. +static Value +constantDimLevelTypeEncoding(ConversionPatternRewriter &rewriter, Location loc, + SparseTensorEncodingAttr::DimLevelType dlt) { + DimLevelType dlt2; + switch (dlt) { + case SparseTensorEncodingAttr::DimLevelType::Dense: + dlt2 = DimLevelType::kDense; + break; + case SparseTensorEncodingAttr::DimLevelType::Compressed: + dlt2 = DimLevelType::kCompressed; + break; + case SparseTensorEncodingAttr::DimLevelType::Singleton: + dlt2 = DimLevelType::kSingleton; + break; + } + return constantI8(rewriter, loc, static_cast(dlt2)); +} + /// Returns a function reference (first hit also inserts into module). Sets /// the "_emit_c_interface" on the function declaration when requested, /// so that LLVM lowering generates a wrapper function that takes care @@ -238,7 +247,7 @@ static Value genBuffer(ConversionPatternRewriter &rewriter, Location loc, /// computation. static void newParams(ConversionPatternRewriter &rewriter, SmallVector ¶ms, Operation *op, - SparseTensorEncodingAttr &enc, uint32_t action, + SparseTensorEncodingAttr &enc, Action action, ValueRange szs, Value ptr = Value()) { Location loc = op->getLoc(); ArrayRef dlt = enc.getDimLevelType(); @@ -246,7 +255,7 @@ static void newParams(ConversionPatternRewriter &rewriter, // Sparsity annotations. SmallVector attrs; for (unsigned i = 0; i < sz; i++) - attrs.push_back(constantI8(rewriter, loc, getDimLevelTypeEncoding(dlt[i]))); + attrs.push_back(constantDimLevelTypeEncoding(rewriter, loc, dlt[i])); params.push_back(genBuffer(rewriter, loc, attrs)); // Dimension sizes array of the enveloping tensor. Useful for either // verification of external data, or for construction of internal data. @@ -268,18 +277,17 @@ static void newParams(ConversionPatternRewriter &rewriter, params.push_back(genBuffer(rewriter, loc, rev)); // Secondary and primary types encoding. ShapedType resType = op->getResult(0).getType().cast(); - uint32_t secPtr = getOverheadTypeEncoding(enc.getPointerBitWidth()); - uint32_t secInd = getOverheadTypeEncoding(enc.getIndexBitWidth()); - uint32_t primary = getPrimaryTypeEncoding(resType.getElementType()); - assert(primary); - params.push_back(constantI32(rewriter, loc, secPtr)); - params.push_back(constantI32(rewriter, loc, secInd)); - params.push_back(constantI32(rewriter, loc, primary)); + params.push_back( + constantOverheadTypeEncoding(rewriter, loc, enc.getPointerBitWidth())); + params.push_back( + constantOverheadTypeEncoding(rewriter, loc, enc.getIndexBitWidth())); + params.push_back( + constantPrimaryTypeEncoding(rewriter, loc, resType.getElementType())); // User action and pointer. Type pTp = LLVM::LLVMPointerType::get(rewriter.getI8Type()); if (!ptr) ptr = rewriter.create(loc, pTp); - params.push_back(constantI32(rewriter, loc, action)); + params.push_back(constantAction(rewriter, loc, action)); params.push_back(ptr); } @@ -530,7 +538,7 @@ class SparseTensorNewConverter : public OpConversionPattern { SmallVector params; sizesFromType(rewriter, sizes, op.getLoc(), resType.cast()); Value ptr = adaptor.getOperands()[0]; - newParams(rewriter, params, op, enc, kFromFile, sizes, ptr); + newParams(rewriter, params, op, enc, Action::kFromFile, sizes, ptr); rewriter.replaceOp(op, genNewCall(rewriter, op, params)); return success(); } @@ -549,7 +557,7 @@ class SparseTensorInitConverter : public OpConversionPattern { // Generate the call to construct empty tensor. The sizes are // explicitly defined by the arguments to the init operator. SmallVector params; - newParams(rewriter, params, op, enc, kEmpty, adaptor.getOperands()); + newParams(rewriter, params, op, enc, Action::kEmpty, adaptor.getOperands()); rewriter.replaceOp(op, genNewCall(rewriter, op, params)); return success(); } @@ -588,13 +596,13 @@ class SparseTensorConvertConverter : public OpConversionPattern { auto enc = SparseTensorEncodingAttr::get( op->getContext(), encDst.getDimLevelType(), encDst.getDimOrdering(), encSrc.getPointerBitWidth(), encSrc.getIndexBitWidth()); - newParams(rewriter, params, op, enc, kToCOO, sizes, src); + newParams(rewriter, params, op, enc, Action::kToCOO, sizes, src); Value coo = genNewCall(rewriter, op, params); - params[3] = constantI32( - rewriter, loc, getOverheadTypeEncoding(encDst.getPointerBitWidth())); - params[4] = constantI32( - rewriter, loc, getOverheadTypeEncoding(encDst.getIndexBitWidth())); - params[6] = constantI32(rewriter, loc, kFromCOO); + params[3] = constantOverheadTypeEncoding(rewriter, loc, + encDst.getPointerBitWidth()); + params[4] = constantOverheadTypeEncoding(rewriter, loc, + encDst.getIndexBitWidth()); + params[6] = constantAction(rewriter, loc, Action::kFromCOO); params[7] = coo; rewriter.replaceOp(op, genNewCall(rewriter, op, params)); return success(); @@ -613,7 +621,7 @@ class SparseTensorConvertConverter : public OpConversionPattern { Type elemTp = dstTensorTp.getElementType(); // Fabricate a no-permutation encoding for newParams(). // The pointer/index types must be those of `src`. - // The dimLevelTypes aren't actually used by kToIter. + // The dimLevelTypes aren't actually used by Action::kToIterator. encDst = SparseTensorEncodingAttr::get( op->getContext(), SmallVector( @@ -622,7 +630,7 @@ class SparseTensorConvertConverter : public OpConversionPattern { SmallVector sizes; SmallVector params; sizesFromPtr(rewriter, sizes, op, encSrc, srcTensorTp, src); - newParams(rewriter, params, op, encDst, kToIter, sizes, src); + newParams(rewriter, params, op, encDst, Action::kToIterator, sizes, src); Value iter = genNewCall(rewriter, op, params); Value ind = genAlloca(rewriter, loc, rank, rewriter.getIndexType()); Value elemPtr = genAllocaScalar(rewriter, loc, elemTp); @@ -677,7 +685,7 @@ class SparseTensorConvertConverter : public OpConversionPattern { SmallVector sizes; SmallVector params; sizesFromSrc(rewriter, sizes, loc, src); - newParams(rewriter, params, op, encDst, kEmptyCOO, sizes); + newParams(rewriter, params, op, encDst, Action::kEmptyCOO, sizes); Value ptr = genNewCall(rewriter, op, params); Value ind = genAlloca(rewriter, loc, rank, rewriter.getIndexType()); Value perm = params[2]; @@ -718,7 +726,7 @@ class SparseTensorConvertConverter : public OpConversionPattern { return {}; }); // Final call to construct sparse tensor storage. - params[6] = constantI32(rewriter, loc, kFromCOO); + params[6] = constantAction(rewriter, loc, Action::kFromCOO); params[7] = ptr; rewriter.replaceOp(op, genNewCall(rewriter, op, params)); return success(); diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp index cfab38616d55f..f8db7eb00319a 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp +++ b/mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp @@ -39,7 +39,7 @@ namespace { enum SortMask { kSparseOnly = 0x0, kIncludeDense = 0x1, kIncludeUndef = 0x2 }; // Reduction kinds. -enum Reduction { kSum, kProduct, kAnd, kOr, kXor }; +enum Reduction { kNoReduc, kSum, kProduct, kAnd, kOr, kXor }; // Code generation. struct CodeGen { @@ -50,7 +50,7 @@ struct CodeGen { highs(numTensors, std::vector(numLoops)), pidxs(numTensors, std::vector(numLoops)), idxs(numTensors, std::vector(numLoops)), redExp(-1u), redVal(), - curVecLength(1), curVecMask() {} + redKind(kNoReduc), curVecLength(1), curVecMask() {} /// Sparsification options. SparsificationOptions options; /// Universal dense indices and upper bounds (by index). The loops array @@ -71,9 +71,7 @@ struct CodeGen { std::vector> pidxs; std::vector> idxs; /// Current reduction, updated during code generation. When indices of a - /// reduction are exhausted, all inner loops can "scalarize" the reduction. - // TODO: currently only done for (a chain of) innermost for-loops, where it - // is most effective; we could generalize to more outer and while-loops. + /// reduction are exhausted, all inner loops can use a scalarized reduction. unsigned redExp; Value redVal; Reduction redKind; @@ -314,12 +312,14 @@ static bool isAdmissableTensorExp(Merger &merger, linalg::GenericOp op, } //===----------------------------------------------------------------------===// -// Sparse compiler synthesis methods (statements and expressions). +// Sparse compiler synthesis methods (reductions). //===----------------------------------------------------------------------===// /// Maps reduction kind to name encoding. static StringRef getReductionName(Reduction kind) { switch (kind) { + case kNoReduc: + break; case kSum: return "add"; case kProduct: @@ -356,13 +356,16 @@ static Reduction getReduction(Kind kind) { } } -/// Generates an initial value for a vector reductions, following the scheme +/// Generates an initial value for a vector reduction, following the scheme /// given in Chapter 5 of "The Software Vectorization Handbook", where the /// initial scalar value is correctly embedded in the vector reduction value, /// and a straightforward horizontal reduction will complete the operation. -static Value genReductionInit(PatternRewriter &rewriter, Location loc, - Reduction kind, VectorType vtp, Value r) { - switch (kind) { +static Value genVectorReducInit(CodeGen &codegen, PatternRewriter &rewriter, + Location loc, VectorType vtp) { + Value r = codegen.redVal; + switch (codegen.redKind) { + case kNoReduc: + break; case kSum: case kXor: { // Initialize reduction vector to: | 0 | .. | 0 | r | @@ -390,6 +393,25 @@ static Value genReductionInit(PatternRewriter &rewriter, Location loc, llvm_unreachable("unknown reduction kind"); } +/// Generates final value for a vector reduction. +static Value genVectorReducEnd(CodeGen &codegen, PatternRewriter &rewriter, + Location loc, VectorType vtp) { + StringRef name = getReductionName(codegen.redKind); + StringAttr kind = rewriter.getStringAttr(name); + return rewriter.create(loc, vtp.getElementType(), kind, + codegen.redVal, ValueRange{}); +} + +/// Updates scalarized reduction value. +static void updateReduc(Merger &merger, CodeGen &codegen, Value reduc) { + assert(codegen.redKind != kNoReduc); + codegen.redVal = merger.exp(codegen.redExp).val = reduc; +} + +//===----------------------------------------------------------------------===// +// Sparse compiler synthesis methods (statements and expressions). +//===----------------------------------------------------------------------===// + /// Maps sparse integer option to actual integral storage type. static Type genIntType(PatternRewriter &rewriter, unsigned width) { if (width == 0) @@ -516,7 +538,7 @@ static VectorType vectorType(CodeGen &codegen, Value ptr) { static Value genVectorMask(CodeGen &codegen, PatternRewriter &rewriter, Value iv, Value lo, Value hi, Value step) { Location loc = iv.getLoc(); - VectorType mtp = vectorType(codegen, rewriter.getIntegerType(1)); + VectorType mtp = vectorType(codegen, genIntType(rewriter, 1)); // Special case if the vector length evenly divides the trip count (for // example, "for i = 0, 128, 16"). A constant all-true mask is generated // so that all subsequent masked memory operations are immediately folded @@ -671,7 +693,7 @@ static void genTensorStore(Merger &merger, CodeGen &codegen, if (codegen.curVecLength > 1) rhs = rewriter.create(op.getLoc(), codegen.curVecMask, rhs, codegen.redVal); - codegen.redVal = rhs; + updateReduc(merger, codegen, rhs); return; } // Actual store. @@ -708,11 +730,11 @@ static Value genLoad(CodeGen &codegen, PatternRewriter &rewriter, Location loc, if (!etp.isa()) { if (etp.getIntOrFloatBitWidth() < 32) vload = rewriter.create( - loc, vload, vectorType(codegen, rewriter.getIntegerType(32))); + loc, vload, vectorType(codegen, genIntType(rewriter, 32))); else if (etp.getIntOrFloatBitWidth() < 64 && !codegen.options.enableSIMDIndex32) vload = rewriter.create( - loc, vload, vectorType(codegen, rewriter.getIntegerType(64))); + loc, vload, vectorType(codegen, genIntType(rewriter, 64))); } return vload; } @@ -723,8 +745,8 @@ static Value genLoad(CodeGen &codegen, PatternRewriter &rewriter, Location loc, Value load = rewriter.create(loc, ptr, s); if (!load.getType().isa()) { if (load.getType().getIntOrFloatBitWidth() < 64) - load = rewriter.create(loc, load, - rewriter.getIntegerType(64)); + load = + rewriter.create(loc, load, genIntType(rewriter, 64)); load = rewriter.create(loc, load, rewriter.getIndexType()); } @@ -752,43 +774,6 @@ static Value genAddress(CodeGen &codegen, PatternRewriter &rewriter, return rewriter.create(loc, mul, i); } -/// Generates start of a reduction. -static Value genReductionStart(Merger &merger, CodeGen &codegen, - PatternRewriter &rewriter, - linalg::GenericOp op) { - if (codegen.redVal) - return codegen.redVal; // chained with previous for-loop - // Generate vector or scalar start of a reduction. - unsigned vl = codegen.curVecLength; - if (vl > 1) { - VectorType vtp = vectorType(codegen, codegen.buffers[codegen.redExp]); - assert(!merger.exp(codegen.redExp).val); - codegen.curVecLength = 1; - Value load = genTensorLoad(merger, codegen, rewriter, op, codegen.redExp); - codegen.curVecLength = vl; - return genReductionInit(rewriter, op.getLoc(), codegen.redKind, vtp, load); - } - return genTensorLoad(merger, codegen, rewriter, op, codegen.redExp); -} - -/// Generates end of a reduction. -static void genReductionEnd(Merger &merger, CodeGen &codegen, - PatternRewriter &rewriter, linalg::GenericOp op) { - Value red = codegen.redVal; - if (!red) - return; - assert(codegen.curVecLength == 1); - codegen.redVal = merger.exp(codegen.redExp).val = Value(); // end chain - // Generate vector or scalar end of a reduction. - if (auto vtp = red.getType().dyn_cast()) { - StringRef name = getReductionName(codegen.redKind); - StringAttr kind = rewriter.getStringAttr(name); - red = rewriter.create( - op.getLoc(), vtp.getElementType(), kind, red, ValueRange{}); - } - genTensorStore(merger, codegen, rewriter, op, red); -} - /// Recursively generates tensor expression. static Value genExp(Merger &merger, CodeGen &codegen, PatternRewriter &rewriter, linalg::GenericOp op, unsigned exp) { @@ -828,7 +813,7 @@ static bool isInvariantAffine(const CodeGen &codegen, AffineExpr a, /// Hoists loop invariant tensor loads for which indices have been exhausted. static void genInvariants(Merger &merger, CodeGen &codegen, PatternRewriter &rewriter, linalg::GenericOp op, - unsigned exp, unsigned ldx, bool hoist, + unsigned exp, unsigned ldx, bool atStart, Kind last = Kind::kTensor) { if (exp == -1u) return; @@ -844,14 +829,27 @@ static void genInvariants(Merger &merger, CodeGen &codegen, return; // still in play } // All exhausted at this level (atLevel denotes exactly at this level). + if (!atLevel) + return; OpOperand *lhs = op.getOutputOperand(0); if (lhs == t) { - codegen.redExp = hoist ? exp : -1u; - codegen.redKind = getReduction(last); - assert(!codegen.redVal); - } else if (atLevel) { + // Start or end a scalarized reduction + if (atStart) { + Value load = genTensorLoad(merger, codegen, rewriter, op, exp); + codegen.redKind = getReduction(last); + codegen.redExp = exp; + updateReduc(merger, codegen, load); + } else { + Value redVal = codegen.redVal; + updateReduc(merger, codegen, Value()); + codegen.redExp = -1u; + codegen.redKind = kNoReduc; + genTensorStore(merger, codegen, rewriter, op, redVal); + } + } else { + // Start or end loop invariant hoisting of a tensor load. merger.exp(exp).val = - hoist ? genTensorLoad(merger, codegen, rewriter, op, exp) : Value(); + atStart ? genTensorLoad(merger, codegen, rewriter, op, exp) : Value(); } } else if (merger.exp(exp).kind != Kind::kInvariant) { // Traverse into the binary operations. Note that we only hoist @@ -860,8 +858,8 @@ static void genInvariants(Merger &merger, CodeGen &codegen, Kind last = merger.exp(exp).kind; unsigned e0 = merger.exp(exp).children.e0; unsigned e1 = merger.exp(exp).children.e1; - genInvariants(merger, codegen, rewriter, op, e0, ldx, hoist, last); - genInvariants(merger, codegen, rewriter, op, e1, ldx, hoist, last); + genInvariants(merger, codegen, rewriter, op, e0, ldx, atStart, last); + genInvariants(merger, codegen, rewriter, op, e1, ldx, atStart, last); } } @@ -1005,18 +1003,20 @@ static Operation *genFor(Merger &merger, CodeGen &codegen, return parOp; } - // Emit a sequential loop, potentially with a scalarized reduction. - bool scalarRed = isInner && codegen.redExp != -1u; + // Emit a sequential or vector loop. SmallVector operands; - if (scalarRed) { - Value load = genReductionStart(merger, codegen, rewriter, op); - operands.push_back(load); + if (codegen.redVal) { + // In a vector loop, bring reduction into SIMD form, if not already. + if (isVector && !codegen.redVal.getType().isa()) { + VectorType vtp = vectorType(codegen, codegen.redVal.getType()); + Value vred = genVectorReducInit(codegen, rewriter, loc, vtp); + updateReduc(merger, codegen, vred); + } + operands.push_back(codegen.redVal); } scf::ForOp forOp = rewriter.create(loc, lo, hi, step, operands); - if (scalarRed) { - codegen.redVal = merger.exp(codegen.redExp).val = - forOp.getRegionIterArgs().front(); - } + if (codegen.redVal) + updateReduc(merger, codegen, forOp.getRegionIterArgs().front()); // Assign induction variable to sparse or dense index. Value iv = forOp.getInductionVar(); if (isSparse) @@ -1044,17 +1044,18 @@ static Operation *genWhile(Merger &merger, CodeGen &codegen, unsigned tensor = merger.tensor(b); assert(idx == merger.index(b)); types.push_back(indexType); - assert(codegen.pidxs[tensor][idx].getType().isa() && - "type mismatch for sparse index"); operands.push_back(codegen.pidxs[tensor][idx]); } } + if (codegen.redVal) { + types.push_back(codegen.redVal.getType()); + operands.push_back(codegen.redVal); + } if (needsUniv) { types.push_back(indexType); - assert(codegen.loops[idx].getType().isa() && - "type mismatch for universal index"); operands.push_back(codegen.loops[idx]); } + assert(types.size() == operands.size()); Location loc = op.getLoc(); scf::WhileOp whileOp = rewriter.create(loc, types, operands); Block *before = rewriter.createBlock(&whileOp.before(), {}, types); @@ -1077,6 +1078,8 @@ static Operation *genWhile(Merger &merger, CodeGen &codegen, codegen.pidxs[tensor][idx] = after->getArgument(o++); } } + if (codegen.redVal) + updateReduc(merger, codegen, after->getArgument(o++)); if (needsUniv) codegen.loops[idx] = after->getArgument(o++); assert(o == operands.size()); @@ -1098,7 +1101,6 @@ static Operation *genLoop(Merger &merger, CodeGen &codegen, return genFor(merger, codegen, rewriter, op, isOuter, isInner, idx, indices); } - genReductionEnd(merger, codegen, rewriter, op); // cannot chain return genWhile(merger, codegen, rewriter, op, idx, needsUniv, indices); } @@ -1163,8 +1165,24 @@ static void genLocals(Merger &merger, CodeGen &codegen, static void genWhileInduction(Merger &merger, CodeGen &codegen, PatternRewriter &rewriter, linalg::GenericOp op, unsigned idx, bool needsUniv, - llvm::BitVector &induction, ResultRange results) { + llvm::BitVector &induction, + scf::WhileOp whileOp) { Location loc = op.getLoc(); + // Finalize each else branch of all if statements. + if (codegen.redVal) { + while (auto ifOp = dyn_cast_or_null( + rewriter.getInsertionBlock()->getParentOp())) { + rewriter.create(loc, codegen.redVal); + updateReduc(merger, codegen, ifOp.getResult(0)); + rewriter.setInsertionPointAfter(ifOp); + } + } + rewriter.setInsertionPointToEnd(&whileOp.after().front()); + // Finalize the induction. Note that the induction could be performed + // in the individual if-branches to avoid re-evaluating the conditions. + // However, that would result in a rather elaborate forest of yield + // instructions during code generation. Moreover, performing the induction + // after the if-statements more closely resembles code generated by TACO. unsigned o = 0; SmallVector operands; Value one = rewriter.create(loc, 1); @@ -1179,16 +1197,38 @@ static void genWhileInduction(Merger &merger, CodeGen &codegen, op1, op2); Value add = rewriter.create(loc, op3, one); operands.push_back(rewriter.create(loc, cmp, add, op3)); - codegen.pidxs[tensor][idx] = results[o++]; + codegen.pidxs[tensor][idx] = whileOp->getResult(o++); } } + if (codegen.redVal) { + operands.push_back(codegen.redVal); + updateReduc(merger, codegen, whileOp->getResult(o++)); + } if (needsUniv) { operands.push_back( rewriter.create(loc, codegen.loops[idx], one)); - codegen.loops[idx] = results[o++]; + codegen.loops[idx] = whileOp->getResult(o++); } assert(o == operands.size()); rewriter.create(loc, operands); + rewriter.setInsertionPointAfter(whileOp); +} + +/// Generates the induction structure for a for-loop. +static void genForInduction(Merger &merger, CodeGen &codegen, + PatternRewriter &rewriter, linalg::GenericOp op, + Operation *loop) { + Location loc = op.getLoc(); + unsigned o = 0; + SmallVector operands; + if (codegen.redVal) { + operands.push_back(codegen.redVal); + updateReduc(merger, codegen, loop->getResult(o++)); + } + assert(o == operands.size()); + if (o > 0) + rewriter.create(loc, operands); + rewriter.setInsertionPointAfter(loop); } /// Generates a single if-statement within a while-loop. @@ -1196,6 +1236,7 @@ static scf::IfOp genIf(Merger &merger, CodeGen &codegen, PatternRewriter &rewriter, linalg::GenericOp op, unsigned idx, llvm::BitVector &conditions) { Location loc = op.getLoc(); + SmallVector types; Value cond; for (unsigned b = 0, be = conditions.size(); b < be; b++) { if (conditions[b]) { @@ -1213,11 +1254,23 @@ static scf::IfOp genIf(Merger &merger, CodeGen &codegen, cond = cond ? rewriter.create(loc, cond, clause) : clause; } } - scf::IfOp ifOp = rewriter.create(loc, cond, /*else*/ true); + if (codegen.redVal) + types.push_back(codegen.redVal.getType()); + scf::IfOp ifOp = rewriter.create(loc, types, cond, /*else=*/true); rewriter.setInsertionPointToStart(&ifOp.thenRegion().front()); return ifOp; } +/// Generates end of true branch of if-statement within a while-loop. +static void endIf(Merger &merger, CodeGen &codegen, PatternRewriter &rewriter, + linalg::GenericOp op, scf::IfOp ifOp, Value ifInput) { + if (codegen.redVal) { + rewriter.create(op.getLoc(), codegen.redVal); + updateReduc(merger, codegen, ifInput); + } + rewriter.setInsertionPointToStart(&ifOp.elseRegion().front()); +} + //===----------------------------------------------------------------------===// // Sparse compiler synthesis methods (loop sequence). //===----------------------------------------------------------------------===// @@ -1230,14 +1283,16 @@ static bool startLoopSeq(Merger &merger, CodeGen &codegen, unsigned at, unsigned idx, unsigned ldx, unsigned lts) { assert(codegen.curVecLength == 1); + assert(!codegen.loops[idx]); // Emit invariants at this loop sequence level. - genInvariants(merger, codegen, rewriter, op, exp, ldx, /*hoist=*/true); + genInvariants(merger, codegen, rewriter, op, exp, ldx, /*atStart=*/true); // Emit further intitialization at this loop sequence level. unsigned l0 = merger.set(lts)[0]; - if (genInit(merger, codegen, rewriter, op, topSort, at, - merger.lat(l0).bits)) { - // Maintain the universal index only if it is actually - // consumed by a subsequent lattice point. + bool needsUniv = + genInit(merger, codegen, rewriter, op, topSort, at, merger.lat(l0).bits); + // Maintain the universal index only if it is actually + // consumed by a subsequent lattice point. + if (needsUniv) { unsigned lsize = merger.set(lts).size(); for (unsigned i = 1; i < lsize; i++) { unsigned li = merger.set(lts)[i]; @@ -1270,16 +1325,12 @@ static bool endLoop(Merger &merger, CodeGen &codegen, PatternRewriter &rewriter, codegen.curVecLength = 1; // End a while-loop. if (auto whileOp = dyn_cast(loop)) { - rewriter.setInsertionPointToEnd(&whileOp.after().front()); genWhileInduction(merger, codegen, rewriter, op, idx, needsUniv, - merger.lat(li).bits, whileOp.results()); + merger.lat(li).bits, whileOp); return needsUniv; } // End a for-loop. - if (codegen.redVal) { - rewriter.create(op.getLoc(), codegen.redVal); - codegen.redVal = loop->getResult(0); - } + genForInduction(merger, codegen, rewriter, op, loop); return false; } @@ -1288,11 +1339,14 @@ static void endLoopSeq(Merger &merger, CodeGen &codegen, PatternRewriter &rewriter, linalg::GenericOp op, unsigned exp, unsigned idx, unsigned ldx) { assert(codegen.curVecLength == 1); - // Finalize any pending reduction. - genReductionEnd(merger, codegen, rewriter, op); - // Unmark bookkeeping of invariants and loop index. - genInvariants(merger, codegen, rewriter, op, exp, ldx, /*hoist=*/false); codegen.loops[idx] = Value(); + // Bring a pending reduction back from SIMD form when sequence ends. + if (codegen.redVal) + if (auto vtp = codegen.redVal.getType().dyn_cast()) + updateReduc(merger, codegen, + genVectorReducEnd(codegen, rewriter, op.getLoc(), vtp)); + // Unmark bookkeeping of invariants and loop index. + genInvariants(merger, codegen, rewriter, op, exp, ldx, /*atStart=*/false); } /// Recursively generates code while computing iteration lattices in order @@ -1327,6 +1381,7 @@ static void genStmt(Merger &merger, CodeGen &codegen, PatternRewriter &rewriter, // Visit all lattices points with Li >= Lj to generate the // loop-body, possibly with if statements for coiteration. + Value ifInput = codegen.redVal; bool isWhile = dyn_cast(loop) != nullptr; for (unsigned j = 0; j < lsize; j++) { unsigned lj = merger.set(lts)[j]; @@ -1337,7 +1392,7 @@ static void genStmt(Merger &merger, CodeGen &codegen, PatternRewriter &rewriter, scf::IfOp ifOp = genIf(merger, codegen, rewriter, op, idx, merger.lat(lj).simple); genStmt(merger, codegen, rewriter, op, topSort, ej, at + 1); - rewriter.setInsertionPointToStart(&ifOp.elseRegion().front()); + endIf(merger, codegen, rewriter, op, ifOp, ifInput); } else { genStmt(merger, codegen, rewriter, op, topSort, ej, at + 1); } @@ -1347,7 +1402,6 @@ static void genStmt(Merger &merger, CodeGen &codegen, PatternRewriter &rewriter, // End a loop. needsUniv = endLoop(merger, codegen, rewriter, op, loop, idx, li, needsUniv); - rewriter.setInsertionPointAfter(loop); } // End a loop sequence. @@ -1426,18 +1480,19 @@ struct GenericOpSparsifier : public OpRewritePattern { return failure(); // Builds the tensor expression for the Linalg operation in SSA form. - Optional exp = merger.buildTensorExpFromLinalg(op); - if (!exp.hasValue()) + Optional optExp = merger.buildTensorExpFromLinalg(op); + if (!optExp.hasValue()) return failure(); + unsigned exp = optExp.getValue(); // Rejects an inadmissable tensor expression. - if (!isAdmissableTensorExp(merger, op, exp.getValue())) + if (!isAdmissableTensorExp(merger, op, exp)) return failure(); // Recursively generates code. CodeGen codegen(options, numTensors, numLoops); genBuffers(merger, codegen, rewriter, op); - genStmt(merger, codegen, rewriter, op, topSort, exp.getValue(), 0); + genStmt(merger, codegen, rewriter, op, topSort, exp, 0); genResult(merger, codegen, rewriter, op); return success(); } diff --git a/mlir/lib/Dialect/Tosa/IR/TosaOps.cpp b/mlir/lib/Dialect/Tosa/IR/TosaOps.cpp index 90146f5bc29b0..85415d92bd1b6 100644 --- a/mlir/lib/Dialect/Tosa/IR/TosaOps.cpp +++ b/mlir/lib/Dialect/Tosa/IR/TosaOps.cpp @@ -289,6 +289,55 @@ void TransposeOp::getCanonicalizationPatterns(OwningRewritePatternList &results, results.insert(context); } +struct AddZeroOptimization : public OpRewritePattern { + using OpRewritePattern::OpRewritePattern; + + LogicalResult matchAndRewrite(tosa::AddOp op, + PatternRewriter &rewriter) const override { + auto input1 = op.input1(); + auto input2 = op.input2(); + + DenseElementsAttr input1Attr; + if (matchPattern(input1, m_Constant(&input1Attr)) && input1Attr.isSplat() && + input2.getType() == op.getType()) { + if (input1Attr.getType().getElementType().isa() && + input1Attr.getSplatValue().isZero()) { + rewriter.replaceOp(op, op.input2()); + return success(); + } + + if (input1Attr.getType().getElementType().isa() && + input1Attr.getSplatValue().isZero()) { + rewriter.replaceOp(op, op.input2()); + return success(); + } + } + + DenseElementsAttr input2Attr; + if (matchPattern(input2, m_Constant(&input2Attr)) && input2Attr.isSplat() && + input1.getType() == op.getType()) { + if (input2Attr.getType().getElementType().isa() && + input2Attr.getSplatValue().isZero()) { + rewriter.replaceOp(op, op.input1()); + return success(); + } + + if (input2Attr.getType().getElementType().isa() && + input2Attr.getSplatValue().isZero()) { + rewriter.replaceOp(op, op.input1()); + return success(); + } + } + + return failure(); + } +}; + +void AddOp::getCanonicalizationPatterns(OwningRewritePatternList &results, + MLIRContext *context) { + results.insert(context); +} + //===----------------------------------------------------------------------===// // Operator Folders. //===----------------------------------------------------------------------===// @@ -560,7 +609,7 @@ static void buildUnaryOpWithQuantInfo(OpBuilder &builder, /// This builder is called on TOSA pad operator that needs to create its own /// OptionalAttr quantization_attr parameter to scale the padding values -/// correctly. +/// correctly. No pad_const is interpreted as zero-padding. static void buildPadOpWithQuantInfo(OpBuilder &builder, OperationState &result, Type outputType, Value input, Value paddings) { @@ -571,6 +620,20 @@ static void buildPadOpWithQuantInfo(OpBuilder &builder, OperationState &result, result.types.push_back(outputType); } +/// This builder is called on TOSA pad operator when an explicit pad_const +/// value is passed in. It also optionally constructs quantization_attr. +static void buildExplicitValuePadOpWithQuantInfo(OpBuilder &builder, + OperationState &result, + Type outputType, Value input, + Value paddings, + Value pad_const) { + result.addOperands({input, paddings, pad_const}); + auto quantAttr = buildPadOpQuantizationAttr(builder, input); + if (quantAttr) + result.addAttribute("quantization_info", quantAttr); + result.types.push_back(outputType); +} + //===----------------------------------------------------------------------===// // TOSA Operator Return Type Inference. //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Dialect/Vector/VectorTransforms.cpp b/mlir/lib/Dialect/Vector/VectorTransforms.cpp index c6f29e0a641a2..6528789810bfa 100644 --- a/mlir/lib/Dialect/Vector/VectorTransforms.cpp +++ b/mlir/lib/Dialect/Vector/VectorTransforms.cpp @@ -686,6 +686,12 @@ class TransposeOpLowering : public OpRewritePattern { for (auto attr : op.transp()) transp.push_back(attr.cast().getInt()); + if (vectorTransformOptions.vectorTransposeLowering == + vector::VectorTransposeLowering::Shuffle && + resType.getRank() == 2 && transp[0] == 1 && transp[1] == 0) + return rewriter.notifyMatchFailure( + op, "Options specifies lowering to shuffle"); + // Handle a true 2-D matrix transpose differently when requested. if (vectorTransformOptions.vectorTransposeLowering == vector::VectorTransposeLowering::Flat && @@ -740,6 +746,61 @@ class TransposeOpLowering : public OpRewritePattern { vector::VectorTransformsOptions vectorTransformOptions; }; +/// Rewrite a 2-D vector.transpose as a sequence of: +/// vector.shape_cast 2D -> 1D +/// vector.shuffle +/// vector.shape_cast 1D -> 2D +class TransposeOp2DToShuffleLowering + : public OpRewritePattern { +public: + using OpRewritePattern::OpRewritePattern; + + TransposeOp2DToShuffleLowering( + vector::VectorTransformsOptions vectorTransformOptions, + MLIRContext *context) + : OpRewritePattern(context), + vectorTransformOptions(vectorTransformOptions) {} + + LogicalResult matchAndRewrite(vector::TransposeOp op, + PatternRewriter &rewriter) const override { + auto loc = op.getLoc(); + + VectorType srcType = op.getVectorType(); + if (srcType.getRank() != 2) + return rewriter.notifyMatchFailure(op, "Not a 2D transpose"); + + SmallVector transp; + for (auto attr : op.transp()) + transp.push_back(attr.cast().getInt()); + if (transp[0] != 1 && transp[1] != 0) + return rewriter.notifyMatchFailure(op, "Not a 2D transpose permutation"); + + if (vectorTransformOptions.vectorTransposeLowering != + VectorTransposeLowering::Shuffle) + return rewriter.notifyMatchFailure(op, "Options do not ask for Shuffle"); + + int64_t m = srcType.getShape().front(), n = srcType.getShape().back(); + Value casted = rewriter.create( + loc, VectorType::get({m * n}, srcType.getElementType()), op.vector()); + SmallVector mask; + mask.reserve(m * n); + for (int64_t j = 0; j < n; ++j) + for (int64_t i = 0; i < m; ++i) + mask.push_back(i * n + j); + + Value shuffled = + rewriter.create(loc, casted, casted, mask); + rewriter.replaceOpWithNewOp(op, op.getResultType(), + shuffled); + + return success(); + } + +private: + /// Options to control the vector patterns. + vector::VectorTransformsOptions vectorTransformOptions; +}; + /// Progressive lowering of OuterProductOp. /// One: /// %x = vector.outerproduct %lhs, %rhs, %acc @@ -3518,7 +3579,7 @@ class DropInnerMostUnitDims : public OpRewritePattern { LogicalResult matchAndRewrite(vector::TransferReadOp readOp, PatternRewriter &rewriter) const override { - auto srcType = readOp.source().getType().cast(); + auto srcType = readOp.source().getType().dyn_cast(); if (!srcType || !srcType.hasStaticShape()) return failure(); @@ -3656,7 +3717,8 @@ void mlir::vector::populateVectorContractLoweringPatterns( void mlir::vector::populateVectorTransposeLoweringPatterns( RewritePatternSet &patterns, VectorTransformsOptions options) { - patterns.add(options, patterns.getContext()); + patterns.add( + options, patterns.getContext()); } void mlir::vector::populateVectorReductionToContractPatterns( diff --git a/mlir/lib/ExecutionEngine/CMakeLists.txt b/mlir/lib/ExecutionEngine/CMakeLists.txt index 97e354cdba299..d630a3cb17956 100644 --- a/mlir/lib/ExecutionEngine/CMakeLists.txt +++ b/mlir/lib/ExecutionEngine/CMakeLists.txt @@ -5,7 +5,7 @@ set(LLVM_OPTIONAL_SOURCES AsyncRuntime.cpp CRunnerUtils.cpp CudaRuntimeWrappers.cpp - SparseUtils.cpp + SparseTensorUtils.cpp ExecutionEngine.cpp RocmRuntimeWrappers.cpp RunnerUtils.cpp @@ -79,7 +79,7 @@ add_mlir_library(MLIRJitRunner add_mlir_library(mlir_c_runner_utils SHARED CRunnerUtils.cpp - SparseUtils.cpp + SparseTensorUtils.cpp EXCLUDE_FROM_LIBMLIR ) diff --git a/mlir/lib/ExecutionEngine/SparseUtils.cpp b/mlir/lib/ExecutionEngine/SparseTensorUtils.cpp similarity index 84% rename from mlir/lib/ExecutionEngine/SparseUtils.cpp rename to mlir/lib/ExecutionEngine/SparseTensorUtils.cpp index 24b60300a760f..52396d4ce6fcd 100644 --- a/mlir/lib/ExecutionEngine/SparseUtils.cpp +++ b/mlir/lib/ExecutionEngine/SparseTensorUtils.cpp @@ -1,4 +1,4 @@ -//===- SparseUtils.cpp - Sparse Utils for MLIR execution ------------------===// +//===- SparseTensorUtils.cpp - Sparse Tensor Utils for MLIR execution -----===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -14,6 +14,7 @@ // //===----------------------------------------------------------------------===// +#include "mlir/ExecutionEngine/SparseTensorUtils.h" #include "mlir/ExecutionEngine/CRunnerUtils.h" #ifdef MLIR_CRUNNERUTILS_DEFINE_FUNCTIONS @@ -162,8 +163,6 @@ struct SparseTensorCOO { /// function overloading to implement "partial" method specialization. class SparseTensorStorageBase { public: - enum DimLevelType : uint8_t { kDense = 0, kCompressed = 1, kSingleton = 2 }; - virtual uint64_t getDimSize(uint64_t) = 0; // Overhead storage. @@ -206,7 +205,7 @@ class SparseTensorStorage : public SparseTensorStorageBase { /// permutation, and per-dimension dense/sparse annotations, using /// the coordinate scheme tensor for the initial contents if provided. SparseTensorStorage(const std::vector &szs, const uint64_t *perm, - const uint8_t *sparsity, SparseTensorCOO *tensor) + const DimLevelType *sparsity, SparseTensorCOO *tensor) : sizes(szs), rev(getRank()), pointers(getRank()), indices(getRank()) { uint64_t rank = getRank(); // Store "reverse" permutation. @@ -216,17 +215,18 @@ class SparseTensorStorage : public SparseTensorStorageBase { // TODO: needs fine-tuning based on sparsity for (uint64_t r = 0, s = 1; r < rank; r++) { s *= sizes[r]; - if (sparsity[r] == kCompressed) { + if (sparsity[r] == DimLevelType::kCompressed) { pointers[r].reserve(s + 1); indices[r].reserve(s); s = 1; } else { - assert(sparsity[r] == kDense && "singleton not yet supported"); + assert(sparsity[r] == DimLevelType::kDense && + "singleton not yet supported"); } } // Prepare sparse pointer structures for all dimensions. for (uint64_t r = 0; r < rank; r++) - if (sparsity[r] == kCompressed) + if (sparsity[r] == DimLevelType::kCompressed) pointers[r].push_back(0); // Then assign contents from coordinate scheme tensor if provided. if (tensor) { @@ -288,7 +288,7 @@ class SparseTensorStorage : public SparseTensorStorageBase { /// permutation as is desired for the new sparse tensor storage. static SparseTensorStorage * newSparseTensor(uint64_t rank, const uint64_t *sizes, const uint64_t *perm, - const uint8_t *sparsity, SparseTensorCOO *tensor) { + const DimLevelType *sparsity, SparseTensorCOO *tensor) { SparseTensorStorage *n = nullptr; if (tensor) { assert(tensor->getRank() == rank); @@ -311,8 +311,8 @@ class SparseTensorStorage : public SparseTensorStorageBase { /// Initializes sparse tensor storage scheme from a memory-resident sparse /// tensor in coordinate scheme. This method prepares the pointers and /// indices arrays under the given per-dimension dense/sparse annotations. - void fromCOO(SparseTensorCOO *tensor, const uint8_t *sparsity, uint64_t lo, - uint64_t hi, uint64_t d) { + void fromCOO(SparseTensorCOO *tensor, const DimLevelType *sparsity, + uint64_t lo, uint64_t hi, uint64_t d) { const std::vector> &elements = tensor->getElements(); // Once dimensions are exhausted, insert the numerical values. if (d == getRank()) { @@ -331,7 +331,7 @@ class SparseTensorStorage : public SparseTensorStorageBase { while (seg < hi && elements[seg].indices[d] == idx) seg++; // Handle segment in interval for sparse or dense dimension. - if (sparsity[d] == kCompressed) { + if (sparsity[d] == DimLevelType::kCompressed) { indices[d].push_back(idx); } else { // For dense storage we must fill in all the zero values between @@ -346,7 +346,7 @@ class SparseTensorStorage : public SparseTensorStorageBase { lo = seg; } // Finalize the sparse pointer structure at this dimension. - if (sparsity[d] == kCompressed) { + if (sparsity[d] == DimLevelType::kCompressed) { pointers[d].push_back(indices[d].size()); } else { // For dense storage we must fill in all the zero values after @@ -543,53 +543,35 @@ typedef uint64_t index_t; // //===----------------------------------------------------------------------===// -enum OverheadTypeEnum : uint32_t { kU64 = 1, kU32 = 2, kU16 = 3, kU8 = 4 }; - -enum PrimaryTypeEnum : uint32_t { - kF64 = 1, - kF32 = 2, - kI64 = 3, - kI32 = 4, - kI16 = 5, - kI8 = 6 -}; - -enum Action : uint32_t { - kEmpty = 0, - kFromFile = 1, - kFromCOO = 2, - kEmptyCOO = 3, - kToCOO = 4, - kToIter = 5 -}; - #define CASE(p, i, v, P, I, V) \ if (ptrTp == (p) && indTp == (i) && valTp == (v)) { \ SparseTensorCOO *tensor = nullptr; \ - if (action <= kFromCOO) { \ - if (action == kFromFile) { \ + if (action <= Action::kFromCOO) { \ + if (action == Action::kFromFile) { \ char *filename = static_cast(ptr); \ tensor = openSparseTensorCOO(filename, rank, sizes, perm); \ - } else if (action == kFromCOO) { \ + } else if (action == Action::kFromCOO) { \ tensor = static_cast *>(ptr); \ } else { \ - assert(action == kEmpty); \ + assert(action == Action::kEmpty); \ } \ return SparseTensorStorage::newSparseTensor(rank, sizes, perm, \ sparsity, tensor); \ - } else if (action == kEmptyCOO) { \ + } else if (action == Action::kEmptyCOO) { \ return SparseTensorCOO::newSparseTensorCOO(rank, sizes, perm); \ } else { \ tensor = static_cast *>(ptr)->toCOO(perm); \ - if (action == kToIter) { \ + if (action == Action::kToIterator) { \ tensor->startIterator(); \ } else { \ - assert(action == kToCOO); \ + assert(action == Action::kToCOO); \ } \ return tensor; \ } \ } +#define CASE_SECSAME(p, v, P, V) CASE(p, p, v, P, P, V) + #define IMPL_SPARSEVALUES(NAME, TYPE, LIB) \ void _mlir_ciface_##NAME(StridedMemRefType *ref, void *tensor) { \ assert(ref); \ @@ -656,78 +638,110 @@ enum Action : uint32_t { /// Constructs a new sparse tensor. This is the "swiss army knife" /// method for materializing sparse tensors into the computation. /// -/// action: +/// Action: /// kEmpty = returns empty storage to fill later /// kFromFile = returns storage, where ptr contains filename to read /// kFromCOO = returns storage, where ptr contains coordinate scheme to assign /// kEmptyCOO = returns empty coordinate scheme to fill and use with kFromCOO /// kToCOO = returns coordinate scheme from storage in ptr to use with kFromCOO -/// kToIter = returns iterator from storage in ptr (call getNext() to use) +/// kToIterator = returns iterator from storage in ptr (call getNext() to use) void * -_mlir_ciface_newSparseTensor(StridedMemRefType *aref, // NOLINT +_mlir_ciface_newSparseTensor(StridedMemRefType *aref, // NOLINT StridedMemRefType *sref, StridedMemRefType *pref, - uint32_t ptrTp, uint32_t indTp, uint32_t valTp, - uint32_t action, void *ptr) { + OverheadType ptrTp, OverheadType indTp, + PrimaryType valTp, Action action, void *ptr) { assert(aref && sref && pref); assert(aref->strides[0] == 1 && sref->strides[0] == 1 && pref->strides[0] == 1); assert(aref->sizes[0] == sref->sizes[0] && sref->sizes[0] == pref->sizes[0]); - const uint8_t *sparsity = aref->data + aref->offset; + const DimLevelType *sparsity = aref->data + aref->offset; const index_t *sizes = sref->data + sref->offset; const index_t *perm = pref->data + pref->offset; uint64_t rank = aref->sizes[0]; // Double matrices with all combinations of overhead storage. - CASE(kU64, kU64, kF64, uint64_t, uint64_t, double); - CASE(kU64, kU32, kF64, uint64_t, uint32_t, double); - CASE(kU64, kU16, kF64, uint64_t, uint16_t, double); - CASE(kU64, kU8, kF64, uint64_t, uint8_t, double); - CASE(kU32, kU64, kF64, uint32_t, uint64_t, double); - CASE(kU32, kU32, kF64, uint32_t, uint32_t, double); - CASE(kU32, kU16, kF64, uint32_t, uint16_t, double); - CASE(kU32, kU8, kF64, uint32_t, uint8_t, double); - CASE(kU16, kU64, kF64, uint16_t, uint64_t, double); - CASE(kU16, kU32, kF64, uint16_t, uint32_t, double); - CASE(kU16, kU16, kF64, uint16_t, uint16_t, double); - CASE(kU16, kU8, kF64, uint16_t, uint8_t, double); - CASE(kU8, kU64, kF64, uint8_t, uint64_t, double); - CASE(kU8, kU32, kF64, uint8_t, uint32_t, double); - CASE(kU8, kU16, kF64, uint8_t, uint16_t, double); - CASE(kU8, kU8, kF64, uint8_t, uint8_t, double); + CASE(OverheadType::kU64, OverheadType::kU64, PrimaryType::kF64, uint64_t, + uint64_t, double); + CASE(OverheadType::kU64, OverheadType::kU32, PrimaryType::kF64, uint64_t, + uint32_t, double); + CASE(OverheadType::kU64, OverheadType::kU16, PrimaryType::kF64, uint64_t, + uint16_t, double); + CASE(OverheadType::kU64, OverheadType::kU8, PrimaryType::kF64, uint64_t, + uint8_t, double); + CASE(OverheadType::kU32, OverheadType::kU64, PrimaryType::kF64, uint32_t, + uint64_t, double); + CASE(OverheadType::kU32, OverheadType::kU32, PrimaryType::kF64, uint32_t, + uint32_t, double); + CASE(OverheadType::kU32, OverheadType::kU16, PrimaryType::kF64, uint32_t, + uint16_t, double); + CASE(OverheadType::kU32, OverheadType::kU8, PrimaryType::kF64, uint32_t, + uint8_t, double); + CASE(OverheadType::kU16, OverheadType::kU64, PrimaryType::kF64, uint16_t, + uint64_t, double); + CASE(OverheadType::kU16, OverheadType::kU32, PrimaryType::kF64, uint16_t, + uint32_t, double); + CASE(OverheadType::kU16, OverheadType::kU16, PrimaryType::kF64, uint16_t, + uint16_t, double); + CASE(OverheadType::kU16, OverheadType::kU8, PrimaryType::kF64, uint16_t, + uint8_t, double); + CASE(OverheadType::kU8, OverheadType::kU64, PrimaryType::kF64, uint8_t, + uint64_t, double); + CASE(OverheadType::kU8, OverheadType::kU32, PrimaryType::kF64, uint8_t, + uint32_t, double); + CASE(OverheadType::kU8, OverheadType::kU16, PrimaryType::kF64, uint8_t, + uint16_t, double); + CASE(OverheadType::kU8, OverheadType::kU8, PrimaryType::kF64, uint8_t, + uint8_t, double); // Float matrices with all combinations of overhead storage. - CASE(kU64, kU64, kF32, uint64_t, uint64_t, float); - CASE(kU64, kU32, kF32, uint64_t, uint32_t, float); - CASE(kU64, kU16, kF32, uint64_t, uint16_t, float); - CASE(kU64, kU8, kF32, uint64_t, uint8_t, float); - CASE(kU32, kU64, kF32, uint32_t, uint64_t, float); - CASE(kU32, kU32, kF32, uint32_t, uint32_t, float); - CASE(kU32, kU16, kF32, uint32_t, uint16_t, float); - CASE(kU32, kU8, kF32, uint32_t, uint8_t, float); - CASE(kU16, kU64, kF32, uint16_t, uint64_t, float); - CASE(kU16, kU32, kF32, uint16_t, uint32_t, float); - CASE(kU16, kU16, kF32, uint16_t, uint16_t, float); - CASE(kU16, kU8, kF32, uint16_t, uint8_t, float); - CASE(kU8, kU64, kF32, uint8_t, uint64_t, float); - CASE(kU8, kU32, kF32, uint8_t, uint32_t, float); - CASE(kU8, kU16, kF32, uint8_t, uint16_t, float); - CASE(kU8, kU8, kF32, uint8_t, uint8_t, float); - - // Integral matrices with same overhead storage. - CASE(kU64, kU64, kI64, uint64_t, uint64_t, int64_t); - CASE(kU64, kU64, kI32, uint64_t, uint64_t, int32_t); - CASE(kU64, kU64, kI16, uint64_t, uint64_t, int16_t); - CASE(kU64, kU64, kI8, uint64_t, uint64_t, int8_t); - CASE(kU32, kU32, kI32, uint32_t, uint32_t, int32_t); - CASE(kU32, kU32, kI16, uint32_t, uint32_t, int16_t); - CASE(kU32, kU32, kI8, uint32_t, uint32_t, int8_t); - CASE(kU16, kU16, kI32, uint16_t, uint16_t, int32_t); - CASE(kU16, kU16, kI16, uint16_t, uint16_t, int16_t); - CASE(kU16, kU16, kI8, uint16_t, uint16_t, int8_t); - CASE(kU8, kU8, kI32, uint8_t, uint8_t, int32_t); - CASE(kU8, kU8, kI16, uint8_t, uint8_t, int16_t); - CASE(kU8, kU8, kI8, uint8_t, uint8_t, int8_t); + CASE(OverheadType::kU64, OverheadType::kU64, PrimaryType::kF32, uint64_t, + uint64_t, float); + CASE(OverheadType::kU64, OverheadType::kU32, PrimaryType::kF32, uint64_t, + uint32_t, float); + CASE(OverheadType::kU64, OverheadType::kU16, PrimaryType::kF32, uint64_t, + uint16_t, float); + CASE(OverheadType::kU64, OverheadType::kU8, PrimaryType::kF32, uint64_t, + uint8_t, float); + CASE(OverheadType::kU32, OverheadType::kU64, PrimaryType::kF32, uint32_t, + uint64_t, float); + CASE(OverheadType::kU32, OverheadType::kU32, PrimaryType::kF32, uint32_t, + uint32_t, float); + CASE(OverheadType::kU32, OverheadType::kU16, PrimaryType::kF32, uint32_t, + uint16_t, float); + CASE(OverheadType::kU32, OverheadType::kU8, PrimaryType::kF32, uint32_t, + uint8_t, float); + CASE(OverheadType::kU16, OverheadType::kU64, PrimaryType::kF32, uint16_t, + uint64_t, float); + CASE(OverheadType::kU16, OverheadType::kU32, PrimaryType::kF32, uint16_t, + uint32_t, float); + CASE(OverheadType::kU16, OverheadType::kU16, PrimaryType::kF32, uint16_t, + uint16_t, float); + CASE(OverheadType::kU16, OverheadType::kU8, PrimaryType::kF32, uint16_t, + uint8_t, float); + CASE(OverheadType::kU8, OverheadType::kU64, PrimaryType::kF32, uint8_t, + uint64_t, float); + CASE(OverheadType::kU8, OverheadType::kU32, PrimaryType::kF32, uint8_t, + uint32_t, float); + CASE(OverheadType::kU8, OverheadType::kU16, PrimaryType::kF32, uint8_t, + uint16_t, float); + CASE(OverheadType::kU8, OverheadType::kU8, PrimaryType::kF32, uint8_t, + uint8_t, float); + + // Integral matrices with both overheads of the same type. + CASE_SECSAME(OverheadType::kU64, PrimaryType::kI64, uint64_t, int64_t); + CASE_SECSAME(OverheadType::kU64, PrimaryType::kI32, uint64_t, int32_t); + CASE_SECSAME(OverheadType::kU64, PrimaryType::kI16, uint64_t, int16_t); + CASE_SECSAME(OverheadType::kU64, PrimaryType::kI8, uint64_t, int8_t); + CASE_SECSAME(OverheadType::kU32, PrimaryType::kI32, uint32_t, int32_t); + CASE_SECSAME(OverheadType::kU32, PrimaryType::kI16, uint32_t, int16_t); + CASE_SECSAME(OverheadType::kU32, PrimaryType::kI8, uint32_t, int8_t); + CASE_SECSAME(OverheadType::kU16, PrimaryType::kI32, uint16_t, int32_t); + CASE_SECSAME(OverheadType::kU16, PrimaryType::kI16, uint16_t, int16_t); + CASE_SECSAME(OverheadType::kU16, PrimaryType::kI8, uint16_t, int8_t); + CASE_SECSAME(OverheadType::kU8, PrimaryType::kI32, uint8_t, int32_t); + CASE_SECSAME(OverheadType::kU8, PrimaryType::kI16, uint8_t, int16_t); + CASE_SECSAME(OverheadType::kU8, PrimaryType::kI8, uint8_t, int8_t); // Unsupported case (add above if needed). fputs("unsupported combination of types\n", stderr); @@ -830,7 +844,7 @@ void delSparseTensor(void *tensor) { void *convertToMLIRSparseTensor(uint64_t rank, uint64_t nse, uint64_t *shape, double *values, uint64_t *indices) { // Setup all-dims compressed and default ordering. - std::vector sparse(rank, SparseTensorStorageBase::kCompressed); + std::vector sparse(rank, DimLevelType::kCompressed); std::vector perm(rank); std::iota(perm.begin(), perm.end(), 0); // Convert external format to internal COO. diff --git a/mlir/lib/IR/AsmPrinter.cpp b/mlir/lib/IR/AsmPrinter.cpp index cf1eb8b56d807..15625255f3374 100644 --- a/mlir/lib/IR/AsmPrinter.cpp +++ b/mlir/lib/IR/AsmPrinter.cpp @@ -16,6 +16,7 @@ #include "mlir/IR/AsmState.h" #include "mlir/IR/Attributes.h" #include "mlir/IR/Builders.h" +#include "mlir/IR/BuiltinDialect.h" #include "mlir/IR/BuiltinTypes.h" #include "mlir/IR/Dialect.h" #include "mlir/IR/DialectImplementation.h" @@ -1608,6 +1609,9 @@ void AsmPrinter::Impl::printAttribute(Attribute attr, if (state && succeeded(state->getAliasState().getAlias(attr, os))) return; + if (!isa(attr.getDialect())) + return printDialectAttribute(attr); + auto attrType = attr.getType(); if (auto opaqueAttr = attr.dyn_cast()) { printDialectSymbol(os, "#", opaqueAttr.getDialectNamespace(), @@ -1728,11 +1732,7 @@ void AsmPrinter::Impl::printAttribute(Attribute attr, } else if (auto locAttr = attr.dyn_cast()) { printLocation(locAttr); - - } else { - return printDialectAttribute(attr); } - // Don't print the type if we must elide it, or if it is a None type. if (typeElision != AttrTypeElision::Must && !attrType.isa()) { os << " : "; diff --git a/mlir/lib/IR/Builders.cpp b/mlir/lib/IR/Builders.cpp index 775d0c40c53c5..68471a5fbf7da 100644 --- a/mlir/lib/IR/Builders.cpp +++ b/mlir/lib/IR/Builders.cpp @@ -392,9 +392,11 @@ Operation *OpBuilder::createOperation(const OperationState &state) { /// Note: This function does not erase the operation on a successful fold. LogicalResult OpBuilder::tryFold(Operation *op, SmallVectorImpl &results) { - results.reserve(op->getNumResults()); + ResultRange opResults = op->getResults(); + + results.reserve(opResults.size()); auto cleanupFailure = [&] { - results.assign(op->result_begin(), op->result_end()); + results.assign(opResults.begin(), opResults.end()); return failure(); }; @@ -405,7 +407,7 @@ LogicalResult OpBuilder::tryFold(Operation *op, // Check to see if any operands to the operation is constant and whether // the operation knows how to constant fold itself. SmallVector constOperands(op->getNumOperands()); - for (unsigned i = 0, e = op->getNumOperands(); i != e; ++i) + for (unsigned i = 0, e = constOperands.size(); i != e; ++i) matchPattern(op->getOperand(i), m_Constant(&constOperands[i])); // Try to fold the operation. @@ -419,9 +421,14 @@ LogicalResult OpBuilder::tryFold(Operation *op, // Populate the results with the folded results. Dialect *dialect = op->getDialect(); - for (auto &it : llvm::enumerate(foldResults)) { + for (auto it : llvm::zip(foldResults, opResults.getTypes())) { + Type expectedType = std::get<1>(it); + // Normal values get pushed back directly. - if (auto value = it.value().dyn_cast()) { + if (auto value = std::get<0>(it).dyn_cast()) { + if (value.getType() != expectedType) + return cleanupFailure(); + results.push_back(value); continue; } @@ -431,9 +438,9 @@ LogicalResult OpBuilder::tryFold(Operation *op, return cleanupFailure(); // Ask the dialect to materialize a constant operation for this value. - Attribute attr = it.value().get(); - auto *constOp = dialect->materializeConstant( - cstBuilder, attr, op->getResult(it.index()).getType(), op->getLoc()); + Attribute attr = std::get<0>(it).get(); + auto *constOp = dialect->materializeConstant(cstBuilder, attr, expectedType, + op->getLoc()); if (!constOp) { // Erase any generated constants. for (Operation *cst : generatedConstants) diff --git a/mlir/lib/IR/BuiltinAttributes.cpp b/mlir/lib/IR/BuiltinAttributes.cpp index 72891d995af67..41a8c46c0c6d9 100644 --- a/mlir/lib/IR/BuiltinAttributes.cpp +++ b/mlir/lib/IR/BuiltinAttributes.cpp @@ -185,26 +185,30 @@ DictionaryAttr DictionaryAttr::getWithSorted(MLIRContext *context, /// Return the specified attribute if present, null otherwise. Attribute DictionaryAttr::get(StringRef name) const { - Optional attr = getNamed(name); - return attr ? attr->second : nullptr; + auto it = impl::findAttrSorted(begin(), end(), name); + return it.second ? it.first->second : Attribute(); } Attribute DictionaryAttr::get(Identifier name) const { - Optional attr = getNamed(name); - return attr ? attr->second : nullptr; + auto it = impl::findAttrSorted(begin(), end(), name); + return it.second ? it.first->second : Attribute(); } /// Return the specified named attribute if present, None otherwise. Optional DictionaryAttr::getNamed(StringRef name) const { - ArrayRef values = getValue(); - const auto *it = llvm::lower_bound(values, name); - return it != values.end() && it->first == name ? *it - : Optional(); + auto it = impl::findAttrSorted(begin(), end(), name); + return it.second ? *it.first : Optional(); } Optional DictionaryAttr::getNamed(Identifier name) const { - for (auto elt : getValue()) - if (elt.first == name) - return elt; - return llvm::None; + auto it = impl::findAttrSorted(begin(), end(), name); + return it.second ? *it.first : Optional(); +} + +/// Return whether the specified attribute is present. +bool DictionaryAttr::contains(StringRef name) const { + return impl::findAttrSorted(begin(), end(), name).second; +} +bool DictionaryAttr::contains(Identifier name) const { + return impl::findAttrSorted(begin(), end(), name).second; } DictionaryAttr::iterator DictionaryAttr::begin() const { @@ -1336,8 +1340,11 @@ Attribute SparseElementsAttr::getZeroAttr() const { if (eltType.isa()) return FloatAttr::get(eltType, 0); + // Handle string type. + if (getValues().isa()) + return StringAttr::get("", eltType); + // Otherwise, this is an integer. - // TODO: Handle StringAttr here. return IntegerAttr::get(eltType, 0); } diff --git a/mlir/lib/IR/FunctionSupport.cpp b/mlir/lib/IR/FunctionSupport.cpp index 4f6f76cfbcfb9..d67d277f69d31 100644 --- a/mlir/lib/IR/FunctionSupport.cpp +++ b/mlir/lib/IR/FunctionSupport.cpp @@ -164,7 +164,7 @@ void mlir::function_like_impl::insertFunctionArguments( // Update the function type and any entry block arguments. op->setAttr(getTypeAttrName(), TypeAttr::get(newType)); for (unsigned i = 0, e = argIndices.size(); i < e; ++i) - entry.insertArgument(argIndices[i], argTypes[i], + entry.insertArgument(argIndices[i] + i, argTypes[i], argLocs.empty() ? Optional{} : argLocs[i]); } diff --git a/mlir/lib/IR/Operation.cpp b/mlir/lib/IR/Operation.cpp index 3f1310f73a78b..d232b1d253ac3 100644 --- a/mlir/lib/IR/Operation.cpp +++ b/mlir/lib/IR/Operation.cpp @@ -125,9 +125,8 @@ Operation *Operation::create(Location location, OperationName name, // into account the size of the operation, its trailing objects, and its // prefixed objects. size_t byteSize = - totalSizeToAlloc( - numSuccessors, numRegions, needsOperandStorage ? 1 : 0) + - detail::OperandStorage::additionalAllocSize(numOperands); + totalSizeToAlloc( + needsOperandStorage ? 1 : 0, numSuccessors, numRegions, numOperands); size_t prefixByteSize = llvm::alignTo( Operation::prefixAllocSize(numTrailingResults, numInlineResults), alignof(Operation)); @@ -156,8 +155,10 @@ Operation *Operation::create(Location location, OperationName name, new (&op->getRegion(i)) Region(op); // Initialize the operands. - if (needsOperandStorage) - new (&op->getOperandStorage()) detail::OperandStorage(op, operands); + if (needsOperandStorage) { + new (&op->getOperandStorage()) detail::OperandStorage( + op, op->getTrailingObjects(), operands); + } // Initialize the successors. auto blockOperands = op->getBlockOperands(); diff --git a/mlir/lib/IR/OperationSupport.cpp b/mlir/lib/IR/OperationSupport.cpp index 002d746c1a81f..4c9bc848ce472 100644 --- a/mlir/lib/IR/OperationSupport.cpp +++ b/mlir/lib/IR/OperationSupport.cpp @@ -81,42 +81,24 @@ void NamedAttrList::push_back(NamedAttribute newAttribute) { attrs.push_back(newAttribute); } -/// Helper function to find attribute in possible sorted vector of -/// NamedAttributes. -template -static auto *findAttr(SmallVectorImpl &attrs, T name, - bool sorted) { - if (!sorted) { - return llvm::find_if( - attrs, [name](NamedAttribute attr) { return attr.first == name; }); - } - - auto *it = llvm::lower_bound(attrs, name); - if (it == attrs.end() || it->first != name) - return attrs.end(); - return it; -} - /// Return the specified attribute if present, null otherwise. Attribute NamedAttrList::get(StringRef name) const { - auto *it = findAttr(attrs, name, isSorted()); - return it != attrs.end() ? it->second : nullptr; + auto it = findAttr(*this, name); + return it.second ? it.first->second : Attribute(); } - -/// Return the specified attribute if present, null otherwise. Attribute NamedAttrList::get(Identifier name) const { - auto *it = findAttr(attrs, name, isSorted()); - return it != attrs.end() ? it->second : nullptr; + auto it = findAttr(*this, name); + return it.second ? it.first->second : Attribute(); } /// Return the specified named attribute if present, None otherwise. Optional NamedAttrList::getNamed(StringRef name) const { - auto *it = findAttr(attrs, name, isSorted()); - return it != attrs.end() ? *it : Optional(); + auto it = findAttr(*this, name); + return it.second ? *it.first : Optional(); } Optional NamedAttrList::getNamed(Identifier name) const { - auto *it = findAttr(attrs, name, isSorted()); - return it != attrs.end() ? *it : Optional(); + auto it = findAttr(*this, name); + return it.second ? *it.first : Optional(); } /// If the an attribute exists with the specified name, change it to the new @@ -124,34 +106,36 @@ Optional NamedAttrList::getNamed(Identifier name) const { Attribute NamedAttrList::set(Identifier name, Attribute value) { assert(value && "attributes may never be null"); - // Look for an existing value for the given name, and set it in-place. - auto *it = findAttr(attrs, name, isSorted()); - if (it != attrs.end()) { - // Only update if the value is different from the existing. - Attribute oldValue = it->second; - if (oldValue != value) { + // Look for an existing attribute with the given name, and set its value + // in-place. Return the previous value of the attribute, if there was one. + auto it = findAttr(*this, name); + if (it.second) { + // Update the existing attribute by swapping out the old value for the new + // value. Return the old value. + if (it.first->second != value) { + std::swap(it.first->second, value); + // If the attributes have changed, the dictionary is invalidated. dictionarySorted.setPointer(nullptr); - it->second = value; } - return oldValue; + return value; } - - // Otherwise, insert the new attribute into its sorted position. - it = llvm::lower_bound(attrs, name); + // Perform a string lookup to insert the new attribute into its sorted + // position. + if (isSorted()) + it = findAttr(*this, name.strref()); + attrs.insert(it.first, {name, value}); + // Invalidate the dictionary. Return null as there was no previous value. dictionarySorted.setPointer(nullptr); - attrs.insert(it, {name, value}); return Attribute(); } + Attribute NamedAttrList::set(StringRef name, Attribute value) { - assert(value && "setting null attribute not supported"); + assert(value && "attributes may never be null"); return set(mlir::Identifier::get(name, value.getContext()), value); } Attribute NamedAttrList::eraseImpl(SmallVectorImpl::iterator it) { - if (it == attrs.end()) - return nullptr; - // Erasing does not affect the sorted property. Attribute attr = it->second; attrs.erase(it); @@ -160,11 +144,13 @@ NamedAttrList::eraseImpl(SmallVectorImpl::iterator it) { } Attribute NamedAttrList::erase(Identifier name) { - return eraseImpl(findAttr(attrs, name, isSorted())); + auto it = findAttr(*this, name); + return it.second ? eraseImpl(it.first) : Attribute(); } Attribute NamedAttrList::erase(StringRef name) { - return eraseImpl(findAttr(attrs, name, isSorted())); + auto it = findAttr(*this, name); + return it.second ? eraseImpl(it.first) : Attribute(); } NamedAttrList & @@ -226,26 +212,22 @@ void OperationState::addRegions( // OperandStorage //===----------------------------------------------------------------------===// -detail::OperandStorage::OperandStorage(Operation *owner, ValueRange values) - : inlineStorage() { - auto &inlineStorage = getInlineStorage(); - inlineStorage.numOperands = inlineStorage.capacity = values.size(); - auto *operandPtrBegin = getTrailingObjects(); - for (unsigned i = 0, e = inlineStorage.numOperands; i < e; ++i) - new (&operandPtrBegin[i]) OpOperand(owner, values[i]); +detail::OperandStorage::OperandStorage(Operation *owner, + OpOperand *trailingOperands, + ValueRange values) + : isStorageDynamic(false), operandStorage(trailingOperands) { + numOperands = capacity = values.size(); + for (unsigned i = 0; i < numOperands; ++i) + new (&operandStorage[i]) OpOperand(owner, values[i]); } detail::OperandStorage::~OperandStorage() { - // Destruct the current storage container. - if (isDynamicStorage()) { - TrailingOperandStorage &storage = getDynamicStorage(); - storage.~TrailingOperandStorage(); - // Work around -Wfree-nonheap-object false positive fixed by D102728. - auto *mem = &storage; - free(mem); - } else { - getInlineStorage().~TrailingOperandStorage(); - } + for (auto &operand : getOperands()) + operand.~OpOperand(); + + // If the storage is dynamic, deallocate it. + if (isStorageDynamic) + free(operandStorage); } /// Replace the operands contained in the storage with the ones provided in @@ -291,24 +273,22 @@ void detail::OperandStorage::setOperands(Operation *owner, unsigned start, /// Erase an operand held by the storage. void detail::OperandStorage::eraseOperands(unsigned start, unsigned length) { - TrailingOperandStorage &storage = getStorage(); - MutableArrayRef operands = storage.getOperands(); + MutableArrayRef operands = getOperands(); assert((start + length) <= operands.size()); - storage.numOperands -= length; + numOperands -= length; // Shift all operands down if the operand to remove is not at the end. - if (start != storage.numOperands) { + if (start != numOperands) { auto *indexIt = std::next(operands.begin(), start); std::rotate(indexIt, std::next(indexIt, length), operands.end()); } for (unsigned i = 0; i != length; ++i) - operands[storage.numOperands + i].~OpOperand(); + operands[numOperands + i].~OpOperand(); } void detail::OperandStorage::eraseOperands( const llvm::BitVector &eraseIndices) { - TrailingOperandStorage &storage = getStorage(); - MutableArrayRef operands = storage.getOperands(); + MutableArrayRef operands = getOperands(); assert(eraseIndices.size() == operands.size()); // Check that at least one operand is erased. @@ -317,11 +297,11 @@ void detail::OperandStorage::eraseOperands( return; // Shift all of the removed operands to the end, and destroy them. - storage.numOperands = firstErasedIndice; + numOperands = firstErasedIndice; for (unsigned i = firstErasedIndice + 1, e = operands.size(); i < e; ++i) if (!eraseIndices.test(i)) - operands[storage.numOperands++] = std::move(operands[i]); - for (OpOperand &operand : operands.drop_front(storage.numOperands)) + operands[numOperands++] = std::move(operands[i]); + for (OpOperand &operand : operands.drop_front(numOperands)) operand.~OpOperand(); } @@ -329,24 +309,21 @@ void detail::OperandStorage::eraseOperands( /// operands. MutableArrayRef detail::OperandStorage::resize(Operation *owner, unsigned newSize) { - TrailingOperandStorage &storage = getStorage(); - // If the number of operands is less than or equal to the current amount, we // can just update in place. - unsigned &numOperands = storage.numOperands; - MutableArrayRef operands = storage.getOperands(); + MutableArrayRef origOperands = getOperands(); if (newSize <= numOperands) { // If the number of new size is less than the current, remove any extra // operands. for (unsigned i = newSize; i != numOperands; ++i) - operands[i].~OpOperand(); + origOperands[i].~OpOperand(); numOperands = newSize; - return operands.take_front(newSize); + return origOperands.take_front(newSize); } // If the new size is within the original inline capacity, grow inplace. - if (newSize <= storage.capacity) { - OpOperand *opBegin = operands.data(); + if (newSize <= capacity) { + OpOperand *opBegin = origOperands.data(); for (unsigned e = newSize; numOperands != e; ++numOperands) new (&opBegin[numOperands]) OpOperand(owner); return MutableArrayRef(opBegin, newSize); @@ -354,36 +331,32 @@ MutableArrayRef detail::OperandStorage::resize(Operation *owner, // Otherwise, we need to allocate a new storage. unsigned newCapacity = - std::max(unsigned(llvm::NextPowerOf2(storage.capacity + 2)), newSize); - auto *newStorageMem = - malloc(TrailingOperandStorage::totalSizeToAlloc(newCapacity)); - auto *newStorage = ::new (newStorageMem) TrailingOperandStorage(); - newStorage->numOperands = newSize; - newStorage->capacity = newCapacity; + std::max(unsigned(llvm::NextPowerOf2(capacity + 2)), newSize); + OpOperand *newOperandStorage = + reinterpret_cast(malloc(sizeof(OpOperand) * newCapacity)); // Move the current operands to the new storage. - MutableArrayRef newOperands = newStorage->getOperands(); - std::uninitialized_copy(std::make_move_iterator(operands.begin()), - std::make_move_iterator(operands.end()), + MutableArrayRef newOperands(newOperandStorage, newSize); + std::uninitialized_copy(std::make_move_iterator(origOperands.begin()), + std::make_move_iterator(origOperands.end()), newOperands.begin()); // Destroy the original operands. - for (auto &operand : operands) + for (auto &operand : origOperands) operand.~OpOperand(); // Initialize any new operands. for (unsigned e = newSize; numOperands != e; ++numOperands) new (&newOperands[numOperands]) OpOperand(owner); - // If the current storage is also dynamic, free it. - if (isDynamicStorage()) { - // Work around -Wfree-nonheap-object false positive fixed by D102728. - auto *mem = &storage; - free(mem); - } + // If the current storage is dynamic, free it. + if (isStorageDynamic) + free(operandStorage); // Update the storage representation to use the new dynamic storage. - dynamicStorage.setPointerAndInt(newStorage, true); + operandStorage = newOperandStorage; + capacity = newCapacity; + isStorageDynamic = true; return newOperands; } @@ -394,9 +367,6 @@ MutableArrayRef detail::OperandStorage::resize(Operation *owner, //===----------------------------------------------------------------------===// // OperandRange -OperandRange::OperandRange(Operation *op) - : OperandRange(op->getOpOperands().data(), op->getNumOperands()) {} - unsigned OperandRange::getBeginOperandIndex() const { assert(!empty() && "range must not be empty"); return base->getOperandNumber(); diff --git a/mlir/lib/IR/SymbolTable.cpp b/mlir/lib/IR/SymbolTable.cpp index 6634eab4150eb..93d605ff1e845 100644 --- a/mlir/lib/IR/SymbolTable.cpp +++ b/mlir/lib/IR/SymbolTable.cpp @@ -151,8 +151,9 @@ void SymbolTable::erase(Operation *symbol) { // TODO: Consider if this should be renamed to something like insertOrUpdate /// Insert a new symbol into the table and associated operation if not already -/// there and rename it as necessary to avoid collisions. -void SymbolTable::insert(Operation *symbol, Block::iterator insertPt) { +/// there and rename it as necessary to avoid collisions. Return the name of +/// the symbol after insertion as attribute. +StringAttr SymbolTable::insert(Operation *symbol, Block::iterator insertPt) { // The symbol cannot be the child of another op and must be the child of the // symbolTableOp after this. // @@ -180,10 +181,10 @@ void SymbolTable::insert(Operation *symbol, Block::iterator insertPt) { // detected. StringAttr name = getSymbolName(symbol); if (symbolTable.insert({name, symbol}).second) - return; + return name; // If the symbol was already in the table, also return. if (symbolTable.lookup(name) == symbol) - return; + return name; // If a conflict was detected, then the symbol will not have been added to // the symbol table. Try suffixes until we get to a unique name that works. SmallString<128> nameBuffer(name.getValue()); @@ -199,6 +200,7 @@ void SymbolTable::insert(Operation *symbol, Block::iterator insertPt) { } while (!symbolTable.insert({StringAttr::get(context, nameBuffer), symbol}) .second); setSymbolName(symbol, nameBuffer); + return getSymbolName(symbol); } /// Returns the name of the given symbol operation. diff --git a/mlir/lib/Support/MlirOptMain.cpp b/mlir/lib/Support/MlirOptMain.cpp index 6da9f993eeafb..9a8b21d37f254 100644 --- a/mlir/lib/Support/MlirOptMain.cpp +++ b/mlir/lib/Support/MlirOptMain.cpp @@ -48,7 +48,7 @@ using llvm::SMLoc; static LogicalResult performActions(raw_ostream &os, bool verifyDiagnostics, bool verifyPasses, SourceMgr &sourceMgr, MLIRContext *context, - const PassPipelineCLParser &passPipeline) { + PassPipelineFn passManagerSetupFn) { DefaultTimingManager tm; applyDefaultTimingManagerCLOptions(tm); TimingScope timing = tm.getRootScope(); @@ -72,13 +72,8 @@ static LogicalResult performActions(raw_ostream &os, bool verifyDiagnostics, applyPassManagerCLOptions(pm); pm.enableTiming(timing); - auto errorHandler = [&](const Twine &msg) { - emitError(UnknownLoc::get(context)) << msg; - return failure(); - }; - - // Build the provided pipeline. - if (failed(passPipeline.addToPipeline(pm, errorHandler))) + // Callback to build the pipeline. + if (failed(passManagerSetupFn(pm))) return failure(); // Run the pipeline. @@ -98,8 +93,8 @@ static LogicalResult processBuffer(raw_ostream &os, std::unique_ptr ownedBuffer, bool verifyDiagnostics, bool verifyPasses, bool allowUnregisteredDialects, bool preloadDialectsInContext, - const PassPipelineCLParser &passPipeline, - DialectRegistry ®istry, llvm::ThreadPool &threadPool) { + PassPipelineFn passManagerSetupFn, DialectRegistry ®istry, + llvm::ThreadPool &threadPool) { // Tell sourceMgr about this buffer, which is what the parser will pick up. SourceMgr sourceMgr; sourceMgr.AddNewSourceBuffer(std::move(ownedBuffer), SMLoc()); @@ -122,7 +117,7 @@ processBuffer(raw_ostream &os, std::unique_ptr ownedBuffer, if (!verifyDiagnostics) { SourceMgrDiagnosticHandler sourceMgrHandler(sourceMgr, &context); return performActions(os, verifyDiagnostics, verifyPasses, sourceMgr, - &context, passPipeline); + &context, passManagerSetupFn); } SourceMgrDiagnosticVerifierHandler sourceMgrHandler(sourceMgr, &context); @@ -131,7 +126,7 @@ processBuffer(raw_ostream &os, std::unique_ptr ownedBuffer, // these actions succeed or fail, we only care what diagnostics they produce // and whether they match our expectations. (void)performActions(os, verifyDiagnostics, verifyPasses, sourceMgr, &context, - passPipeline); + passManagerSetupFn); // Verify the diagnostic handler to make sure that each of the diagnostics // matched. @@ -140,7 +135,7 @@ processBuffer(raw_ostream &os, std::unique_ptr ownedBuffer, LogicalResult mlir::MlirOptMain(raw_ostream &outputStream, std::unique_ptr buffer, - const PassPipelineCLParser &passPipeline, + PassPipelineFn passManagerSetupFn, DialectRegistry ®istry, bool splitInputFile, bool verifyDiagnostics, bool verifyPasses, bool allowUnregisteredDialects, @@ -156,17 +151,36 @@ LogicalResult mlir::MlirOptMain(raw_ostream &outputStream, [&](std::unique_ptr chunkBuffer, raw_ostream &os) { return processBuffer(os, std::move(chunkBuffer), verifyDiagnostics, verifyPasses, allowUnregisteredDialects, - preloadDialectsInContext, passPipeline, registry, - threadPool); + preloadDialectsInContext, passManagerSetupFn, + registry, threadPool); }, outputStream); return processBuffer(outputStream, std::move(buffer), verifyDiagnostics, verifyPasses, allowUnregisteredDialects, - preloadDialectsInContext, passPipeline, registry, + preloadDialectsInContext, passManagerSetupFn, registry, threadPool); } +LogicalResult mlir::MlirOptMain(raw_ostream &outputStream, + std::unique_ptr buffer, + const PassPipelineCLParser &passPipeline, + DialectRegistry ®istry, bool splitInputFile, + bool verifyDiagnostics, bool verifyPasses, + bool allowUnregisteredDialects, + bool preloadDialectsInContext) { + auto passManagerSetupFn = [&](PassManager &pm) { + auto errorHandler = [&](const Twine &msg) { + emitError(UnknownLoc::get(pm.getContext())) << msg; + return failure(); + }; + return passPipeline.addToPipeline(pm, errorHandler); + }; + return MlirOptMain(outputStream, std::move(buffer), passManagerSetupFn, + registry, splitInputFile, verifyDiagnostics, verifyPasses, + allowUnregisteredDialects, preloadDialectsInContext); +} + LogicalResult mlir::MlirOptMain(int argc, char **argv, llvm::StringRef toolName, DialectRegistry ®istry, bool preloadDialectsInContext) { diff --git a/mlir/lib/TableGen/AttrOrTypeDef.cpp b/mlir/lib/TableGen/AttrOrTypeDef.cpp index 2a0ad96ea4e93..f43949c30a222 100644 --- a/mlir/lib/TableGen/AttrOrTypeDef.cpp +++ b/mlir/lib/TableGen/AttrOrTypeDef.cpp @@ -132,6 +132,10 @@ Optional AttrOrTypeDef::getParserCode() const { return def->getValueAsOptionalString("parser"); } +Optional AttrOrTypeDef::getAssemblyFormat() const { + return def->getValueAsOptionalString("assemblyFormat"); +} + bool AttrOrTypeDef::genAccessors() const { return def->getValueAsBit("genAccessors"); } @@ -219,6 +223,32 @@ StringRef AttrOrTypeParameter::getCppAccessorType() const { return getCppType(); } +StringRef AttrOrTypeParameter::getCppStorageType() const { + if (auto *param = dyn_cast(def->getArg(index))) { + if (auto type = param->getDef()->getValueAsOptionalString("cppStorageType")) + return *type; + } + return getCppType(); +} + +Optional AttrOrTypeParameter::getParser() const { + auto *parameterType = def->getArg(index); + if (auto *param = dyn_cast(parameterType)) { + if (auto parser = param->getDef()->getValueAsOptionalString("parser")) + return *parser; + } + return {}; +} + +Optional AttrOrTypeParameter::getPrinter() const { + auto *parameterType = def->getArg(index); + if (auto *param = dyn_cast(parameterType)) { + if (auto printer = param->getDef()->getValueAsOptionalString("printer")) + return *printer; + } + return {}; +} + Optional AttrOrTypeParameter::getSummary() const { auto *parameterType = def->getArg(index); if (auto *param = dyn_cast(parameterType)) { diff --git a/mlir/lib/Target/LLVMIR/Dialect/NVVM/NVVMToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/NVVM/NVVMToLLVMIRTranslation.cpp index 7675387690a44..bdcb451323add 100644 --- a/mlir/lib/Target/LLVMIR/Dialect/NVVM/NVVMToLLVMIRTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/Dialect/NVVM/NVVMToLLVMIRTranslation.cpp @@ -22,7 +22,6 @@ using namespace mlir; using namespace mlir::LLVM; using mlir::LLVM::detail::createIntrinsicCall; -using mlir::LLVM::detail::createNvvmIntrinsicCall; static llvm::Intrinsic::ID getShflBflyIntrinsicId(llvm::Type *resultType, bool withPredicate) { diff --git a/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp b/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp index e9c57bab97dea..39b83c2255654 100644 --- a/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp @@ -443,29 +443,6 @@ llvm::Value *mlir::LLVM::detail::createIntrinsicCall( return builder.CreateCall(fn, args); } -llvm::Value * -mlir::LLVM::detail::createNvvmIntrinsicCall(llvm::IRBuilderBase &builder, - llvm::Intrinsic::ID intrinsic, - ArrayRef args) { - llvm::Module *module = builder.GetInsertBlock()->getModule(); - llvm::Function *fn; - if (llvm::Intrinsic::isOverloaded(intrinsic)) { - if (intrinsic != llvm::Intrinsic::nvvm_wmma_m16n16k16_mma_row_row_f16_f16 && - intrinsic != llvm::Intrinsic::nvvm_wmma_m16n16k16_mma_row_row_f32_f32) { - // NVVM load and store instrinsic names are overloaded on the - // source/destination pointer type. Pointer is the first argument in the - // corresponding NVVM Op. - fn = llvm::Intrinsic::getDeclaration(module, intrinsic, - {args[0]->getType()}); - } else { - fn = llvm::Intrinsic::getDeclaration(module, intrinsic, {}); - } - } else { - fn = llvm::Intrinsic::getDeclaration(module, intrinsic); - } - return builder.CreateCall(fn, args); -} - /// Given a single MLIR operation, create the corresponding LLVM IR operation /// using the `builder`. LogicalResult diff --git a/mlir/lib/Tools/mlir-lsp-server/lsp/Protocol.h b/mlir/lib/Tools/mlir-lsp-server/lsp/Protocol.h index d0f7b052620b6..8a3f47ed59891 100644 --- a/mlir/lib/Tools/mlir-lsp-server/lsp/Protocol.h +++ b/mlir/lib/Tools/mlir-lsp-server/lsp/Protocol.h @@ -7,7 +7,7 @@ //===----------------------------------------------------------------------===// // // This file contains structs based on the LSP specification at -// https://github.com/Microsoft/language-server-protocol/blob/master/protocol.md +// https://github.com/Microsoft/language-server-protocol/blob/main/protocol.md // // This is not meant to be a complete implementation, new interfaces are added // when they're needed. diff --git a/mlir/lib/Transforms/Utils/DialectConversion.cpp b/mlir/lib/Transforms/Utils/DialectConversion.cpp index d90439b6d12d4..cea4b2aaf80b2 100644 --- a/mlir/lib/Transforms/Utils/DialectConversion.cpp +++ b/mlir/lib/Transforms/Utils/DialectConversion.cpp @@ -682,14 +682,6 @@ LogicalResult ArgConverter::materializeLiveConversions( // Process the remapping for each of the original arguments. for (unsigned i = 0, e = origBlock->getNumArguments(); i != e; ++i) { - // FIXME: We should run the below checks even if a type converter wasn't - // provided, but a lot of existing lowering rely on the block argument - // being blindly replaced. We should rework argument materialization to be - // more robust for temporary source materializations, update existing - // patterns, and remove these checks. - if (!blockInfo.converter && blockInfo.argInfo[i]) - continue; - // If the type of this argument changed and the argument is still live, we // need to materialize a conversion. BlockArgument origArg = origBlock->getArgument(i); diff --git a/mlir/test/CAPI/ir.c b/mlir/test/CAPI/ir.c index fd245cd6afb17..1056f65080be7 100644 --- a/mlir/test/CAPI/ir.c +++ b/mlir/test/CAPI/ir.c @@ -1393,6 +1393,13 @@ int affineMapFromExprs(MlirContext ctx) { if (!mlirAffineExprEqual(mlirAffineMapGetResult(map, 1), affineSymbolExpr)) return 3; + MlirAffineExpr affineDim2Expr = mlirAffineDimExprGet(ctx, 1); + MlirAffineExpr composed = mlirAffineExprCompose(affineDim2Expr, map); + // CHECK: s1 + mlirAffineExprDump(composed); + if (!mlirAffineExprEqual(composed, affineSymbolExpr)) + return 4; + return 0; } @@ -1692,57 +1699,6 @@ static void deleteUserData(void *userData) { (intptr_t)userData); } -void testDiagnostics() { - MlirContext ctx = mlirContextCreate(); - MlirDiagnosticHandlerID id = mlirContextAttachDiagnosticHandler( - ctx, errorHandler, (void *)42, deleteUserData); - fprintf(stderr, "@test_diagnostics\n"); - MlirLocation unknownLoc = mlirLocationUnknownGet(ctx); - mlirEmitError(unknownLoc, "test diagnostics"); - MlirLocation fileLineColLoc = mlirLocationFileLineColGet( - ctx, mlirStringRefCreateFromCString("file.c"), 1, 2); - mlirEmitError(fileLineColLoc, "test diagnostics"); - MlirLocation callSiteLoc = mlirLocationCallSiteGet( - mlirLocationFileLineColGet( - ctx, mlirStringRefCreateFromCString("other-file.c"), 2, 3), - fileLineColLoc); - mlirEmitError(callSiteLoc, "test diagnostics"); - MlirLocation null = {0}; - MlirLocation nameLoc = - mlirLocationNameGet(ctx, mlirStringRefCreateFromCString("named"), null); - mlirEmitError(nameLoc, "test diagnostics"); - MlirLocation locs[2] = {nameLoc, callSiteLoc}; - MlirAttribute nullAttr = {0}; - MlirLocation fusedLoc = mlirLocationFusedGet(ctx, 2, locs, nullAttr); - mlirEmitError(fusedLoc, "test diagnostics"); - mlirContextDetachDiagnosticHandler(ctx, id); - mlirEmitError(unknownLoc, "more test diagnostics"); - // CHECK-LABEL: @test_diagnostics - // CHECK: processing diagnostic (userData: 42) << - // CHECK: test diagnostics - // CHECK: loc(unknown) - // CHECK: >> end of diagnostic (userData: 42) - // CHECK: processing diagnostic (userData: 42) << - // CHECK: test diagnostics - // CHECK: loc("file.c":1:2) - // CHECK: >> end of diagnostic (userData: 42) - // CHECK: processing diagnostic (userData: 42) << - // CHECK: test diagnostics - // CHECK: loc(callsite("other-file.c":2:3 at "file.c":1:2)) - // CHECK: >> end of diagnostic (userData: 42) - // CHECK: processing diagnostic (userData: 42) << - // CHECK: test diagnostics - // CHECK: loc("named") - // CHECK: >> end of diagnostic (userData: 42) - // CHECK: processing diagnostic (userData: 42) << - // CHECK: test diagnostics - // CHECK: loc(fused["named", callsite("other-file.c":2:3 at "file.c":1:2)]) - // CHECK: deleting user data (userData: 42) - // CHECK-NOT: processing diagnostic - // CHECK: more test diagnostics - mlirContextDestroy(ctx); -} - int testTypeID(MlirContext ctx) { fprintf(stderr, "@testTypeID\n"); @@ -1841,6 +1797,148 @@ int testTypeID(MlirContext ctx) { return 0; } +int testSymbolTable(MlirContext ctx) { + fprintf(stderr, "@testSymbolTable\n"); + + const char *moduleString = "func private @foo()" + "func private @bar()"; + const char *otherModuleString = "func private @qux()" + "func private @foo()"; + + MlirModule module = + mlirModuleCreateParse(ctx, mlirStringRefCreateFromCString(moduleString)); + MlirModule otherModule = mlirModuleCreateParse( + ctx, mlirStringRefCreateFromCString(otherModuleString)); + + MlirSymbolTable symbolTable = + mlirSymbolTableCreate(mlirModuleGetOperation(module)); + + MlirOperation funcFoo = + mlirSymbolTableLookup(symbolTable, mlirStringRefCreateFromCString("foo")); + if (mlirOperationIsNull(funcFoo)) + return 1; + + MlirOperation funcBar = + mlirSymbolTableLookup(symbolTable, mlirStringRefCreateFromCString("bar")); + if (mlirOperationEqual(funcFoo, funcBar)) + return 2; + + MlirOperation missing = + mlirSymbolTableLookup(symbolTable, mlirStringRefCreateFromCString("qux")); + if (!mlirOperationIsNull(missing)) + return 3; + + MlirBlock moduleBody = mlirModuleGetBody(module); + MlirBlock otherModuleBody = mlirModuleGetBody(otherModule); + MlirOperation operation = mlirBlockGetFirstOperation(otherModuleBody); + mlirOperationRemoveFromParent(operation); + mlirBlockAppendOwnedOperation(moduleBody, operation); + + // At this moment, the operation is still missing from the symbol table. + MlirOperation stillMissing = + mlirSymbolTableLookup(symbolTable, mlirStringRefCreateFromCString("qux")); + if (!mlirOperationIsNull(stillMissing)) + return 4; + + // After it is added to the symbol table, and not only the operation with + // which the table is associated, it can be looked up. + mlirSymbolTableInsert(symbolTable, operation); + MlirOperation funcQux = + mlirSymbolTableLookup(symbolTable, mlirStringRefCreateFromCString("qux")); + if (!mlirOperationEqual(operation, funcQux)) + return 5; + + // Erasing from the symbol table also removes the operation. + mlirSymbolTableErase(symbolTable, funcBar); + MlirOperation nowMissing = + mlirSymbolTableLookup(symbolTable, mlirStringRefCreateFromCString("bar")); + if (!mlirOperationIsNull(nowMissing)) + return 6; + + // Adding a symbol with the same name to the table should rename. + MlirOperation duplicateNameOp = mlirBlockGetFirstOperation(otherModuleBody); + mlirOperationRemoveFromParent(duplicateNameOp); + mlirBlockAppendOwnedOperation(moduleBody, duplicateNameOp); + MlirAttribute newName = mlirSymbolTableInsert(symbolTable, duplicateNameOp); + MlirStringRef newNameStr = mlirStringAttrGetValue(newName); + if (mlirStringRefEqual(newNameStr, mlirStringRefCreateFromCString("foo"))) + return 7; + MlirAttribute updatedName = mlirOperationGetAttributeByName( + duplicateNameOp, mlirSymbolTableGetSymbolAttributeName()); + if (!mlirAttributeEqual(updatedName, newName)) + return 8; + + mlirOperationDump(mlirModuleGetOperation(module)); + mlirOperationDump(mlirModuleGetOperation(otherModule)); + // clang-format off + // CHECK-LABEL: @testSymbolTable + // CHECK: module + // CHECK: func private @foo + // CHECK: func private @qux + // CHECK: func private @foo{{.+}} + // CHECK: module + // CHECK-NOT: @qux + // CHECK-NOT: @foo + // clang-format on + + mlirSymbolTableDestroy(symbolTable); + mlirModuleDestroy(module); + mlirModuleDestroy(otherModule); + + return 0; +} + +void testDiagnostics() { + MlirContext ctx = mlirContextCreate(); + MlirDiagnosticHandlerID id = mlirContextAttachDiagnosticHandler( + ctx, errorHandler, (void *)42, deleteUserData); + fprintf(stderr, "@test_diagnostics\n"); + MlirLocation unknownLoc = mlirLocationUnknownGet(ctx); + mlirEmitError(unknownLoc, "test diagnostics"); + MlirLocation fileLineColLoc = mlirLocationFileLineColGet( + ctx, mlirStringRefCreateFromCString("file.c"), 1, 2); + mlirEmitError(fileLineColLoc, "test diagnostics"); + MlirLocation callSiteLoc = mlirLocationCallSiteGet( + mlirLocationFileLineColGet( + ctx, mlirStringRefCreateFromCString("other-file.c"), 2, 3), + fileLineColLoc); + mlirEmitError(callSiteLoc, "test diagnostics"); + MlirLocation null = {0}; + MlirLocation nameLoc = + mlirLocationNameGet(ctx, mlirStringRefCreateFromCString("named"), null); + mlirEmitError(nameLoc, "test diagnostics"); + MlirLocation locs[2] = {nameLoc, callSiteLoc}; + MlirAttribute nullAttr = {0}; + MlirLocation fusedLoc = mlirLocationFusedGet(ctx, 2, locs, nullAttr); + mlirEmitError(fusedLoc, "test diagnostics"); + mlirContextDetachDiagnosticHandler(ctx, id); + mlirEmitError(unknownLoc, "more test diagnostics"); + // CHECK-LABEL: @test_diagnostics + // CHECK: processing diagnostic (userData: 42) << + // CHECK: test diagnostics + // CHECK: loc(unknown) + // CHECK: >> end of diagnostic (userData: 42) + // CHECK: processing diagnostic (userData: 42) << + // CHECK: test diagnostics + // CHECK: loc("file.c":1:2) + // CHECK: >> end of diagnostic (userData: 42) + // CHECK: processing diagnostic (userData: 42) << + // CHECK: test diagnostics + // CHECK: loc(callsite("other-file.c":2:3 at "file.c":1:2)) + // CHECK: >> end of diagnostic (userData: 42) + // CHECK: processing diagnostic (userData: 42) << + // CHECK: test diagnostics + // CHECK: loc("named") + // CHECK: >> end of diagnostic (userData: 42) + // CHECK: processing diagnostic (userData: 42) << + // CHECK: test diagnostics + // CHECK: loc(fused["named", callsite("other-file.c":2:3 at "file.c":1:2)]) + // CHECK: deleting user data (userData: 42) + // CHECK-NOT: processing diagnostic + // CHECK: more test diagnostics + mlirContextDestroy(ctx); +} + int main() { MlirContext ctx = mlirContextCreate(); mlirRegisterAllDialects(ctx); @@ -1870,9 +1968,10 @@ int main() { return 11; if (testClone()) return 12; - if (testTypeID(ctx)) { + if (testTypeID(ctx)) return 13; - } + if (testSymbolTable(ctx)) + return 14; mlirContextDestroy(ctx); diff --git a/mlir/test/Conversion/GPUToNVVM/wmma-ops-to-nvvm.mlir b/mlir/test/Conversion/GPUToNVVM/wmma-ops-to-nvvm.mlir index 9dd853a39b423..c0ac8a050288f 100644 --- a/mlir/test/Conversion/GPUToNVVM/wmma-ops-to-nvvm.mlir +++ b/mlir/test/Conversion/GPUToNVVM/wmma-ops-to-nvvm.mlir @@ -22,7 +22,8 @@ gpu.module @test_module { // CHECK: %[[ADDRESS:.*]] = llvm.getelementptr %[[BASE]][%[[LIJO]]] : (!llvm.ptr, i64) -> !llvm.ptr // CHECK: %[[CADDRESS:.*]] = llvm.bitcast %[[ADDRESS]] : !llvm.ptr to !llvm.ptr // CHECK: %[[LDM32:.*]] = llvm.mlir.constant(32 : index) : i32 - // CHECK: %[[FRAG:.*]] = nvvm.wmma.m16n16k16.load.a.f16.row.stride %[[CADDRESS]], %[[LDM32]] : (!llvm.ptr, i32) -> !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)> + // CHECK: %[[FRAG:.*]] = nvvm.wmma.load %[[CADDRESS]], %[[LDM32]] + // CHECK-SAME: {eltype = "f16", frag = "a", k = 16 : i32, layout = "row", m = 16 : i32, n = 16 : i32} : (!llvm.ptr) -> !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)> // CHECK: llvm.return %[[FRAG]] : !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)> // CHECK32: %[[INX:.*]] = llvm.mlir.constant(16 : index) : i32 @@ -36,7 +37,8 @@ gpu.module @test_module { // CHECK32: %[[ADDRESS:.*]] = llvm.getelementptr %[[BASE]][%[[LIJO]]] : (!llvm.ptr, i32) -> !llvm.ptr // CHECK32: %[[CADDRESS:.*]] = llvm.bitcast %[[ADDRESS]] : !llvm.ptr to !llvm.ptr // CHECK32: %[[LDM32:.*]] = llvm.mlir.constant(32 : index) : i32 - // CHECK32: %[[FRAG:.*]] = nvvm.wmma.m16n16k16.load.a.f16.row.stride %[[CADDRESS]], %[[LDM32]] : (!llvm.ptr, i32) -> !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)> + // CHECK32: %[[FRAG:.*]] = nvvm.wmma.load %[[CADDRESS]], %[[LDM32]] + // CHECK32-SAME: {eltype = "f16", frag = "a", k = 16 : i32, layout = "row", m = 16 : i32, n = 16 : i32} : (!llvm.ptr) -> !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)> // CHECK32: llvm.return %[[FRAG]] : !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)> return %0 : !gpu.mma_matrix<16x16xf16, "AOp"> } @@ -70,7 +72,8 @@ gpu.module @test_module { // CHECK: %[[EL3:.*]] = llvm.extractvalue %[[D]][2 : i32] : !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)> // CHECK: %[[EL4:.*]] = llvm.extractvalue %[[D]][3 : i32] : !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)> // CHECK: %[[LDM32:.*]] = llvm.mlir.constant(32 : index) : i32 - // CHECK: nvvm.wmma.m16n16k16.store.d.f16.row.stride %[[CADDRESS]], %[[EL1]], %[[EL2]], %[[EL3]], %[[EL4]], %[[LDM32]] : !llvm.ptr, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, i32 + // CHECK: nvvm.wmma.store %[[CADDRESS]], %[[LDM32]], %[[EL1]], %[[EL2]], %[[EL3]], %[[EL4]] + // CHECK-SAME: {eltype = "f16", k = 16 : i32, layout = "row", m = 16 : i32, n = 16 : i32} : !llvm.ptr, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16> // CHECK: llvm.return // CHECK32: %[[INX:.*]] = llvm.mlir.constant(16 : index) : i32 @@ -88,7 +91,8 @@ gpu.module @test_module { // CHECK32: %[[EL3:.*]] = llvm.extractvalue %[[D]][2 : i32] : !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)> // CHECK32: %[[EL4:.*]] = llvm.extractvalue %[[D]][3 : i32] : !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)> // CHECK32: %[[LDM32:.*]] = llvm.mlir.constant(32 : index) : i32 - // CHECK32: nvvm.wmma.m16n16k16.store.d.f16.row.stride %[[CADDRESS]], %[[EL1]], %[[EL2]], %[[EL3]], %[[EL4]], %[[LDM32]] : !llvm.ptr, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, i32 + // CHECK32: nvvm.wmma.store %[[CADDRESS]], %[[LDM32]], %[[EL1]], %[[EL2]], %[[EL3]], %[[EL4]] + // CHECK32-SAME: {eltype = "f16", k = 16 : i32, layout = "row", m = 16 : i32, n = 16 : i32} : !llvm.ptr, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16> // CHECK32: llvm.return return } @@ -122,7 +126,9 @@ gpu.module @test_module { // CHECK: %[[C2:.*]] = llvm.extractvalue %[[C]][1 : i32] : !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)> // CHECK: %[[C3:.*]] = llvm.extractvalue %[[C]][2 : i32] : !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)> // CHECK: %[[C4:.*]] = llvm.extractvalue %[[C]][3 : i32] : !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)> - // CHECK: %[[RES:.*]] = nvvm.wmma.m16n16k16.mma.row.row.f16.f16 %[[A1]], %[[A2]], %[[A3]], %[[A4]], %[[A5]], %[[A6]], %[[A7]], %[[A8]], %[[B1]], %[[B2]], %[[B3]], %[[B4]], %[[B5]], %[[B6]], %[[B7]], %[[B8]], %[[C1]], %[[C2]], %[[C3]], %[[C4]] : vector<2xf16> -> !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)> + // CHECK: %[[RES:.*]] = nvvm.wmma.mma %[[A1]], %[[A2]], %[[A3]], %[[A4]], %[[A5]], %[[A6]], %[[A7]], %[[A8]], %[[B1]], %[[B2]], %[[B3]], %[[B4]], %[[B5]], %[[B6]], %[[B7]], %[[B8]], %[[C1]], %[[C2]], %[[C3]], %[[C4]] + // CHECK-SAME: {eltypeA = "f16", eltypeB = "f16", k = 16 : i32, layoutA = "row", layoutB = "row", m = 16 : i32, n = 16 : i32} : ( + // CHECK-SAME: vector<2xf16>, {{.*}}) -> !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)> // CHECK: llvm.return %[[RES]] : !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)> return %D : !gpu.mma_matrix<16x16xf16, "COp"> } @@ -133,13 +139,13 @@ gpu.module @test_module { gpu.module @test_module { // CHECK-LABEL: func @gpu_wmma_mma_loop_op -// CHECK: %[[C:.+]] = nvvm.wmma.m16n16k16.load.c.f16.row.stride %{{.*}}, %{{.*}} : (!llvm.ptr, i32) -> !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)> +// CHECK: %[[C:.+]] = nvvm.wmma.load %{{.*}}, %{{.*}} {eltype = "f16", frag = "c", k = 16 : i32, layout = "row", m = 16 : i32, n = 16 : i32} : (!llvm.ptr) -> !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)> // CHECK: llvm.br ^bb1(%{{.*}}, %[[C]] : i64, !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)>) // CHECK: ^bb1(%{{.*}}: i64, %[[ACC:.+]]: !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)>): // 2 preds: ^bb0, ^bb2 // CHECK: llvm.cond_br %{{.*}}, ^bb2, ^bb3 // CHECK: ^bb2: // pred: ^bb1 -// CHECK: %[[A:.+]] = nvvm.wmma.m16n16k16.load.a.f16.row.stride %{{.*}}, %{{.*}} : (!llvm.ptr, i32) -> !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)> -// CHECK: %[[B:.+]] = nvvm.wmma.m16n16k16.load.b.f16.row.stride %{{.*}}, %{{.*}} : (!llvm.ptr, i32) -> !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)> +// CHECK: %[[A:.+]] = nvvm.wmma.load %{{.*}}, %{{.*}} {eltype = "f16", frag = "a", k = 16 : i32, layout = "row", m = 16 : i32, n = 16 : i32} : (!llvm.ptr) -> !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)> +// CHECK: %[[B:.+]] = nvvm.wmma.load %{{.*}}, %{{.*}} {eltype = "f16", frag = "b", k = 16 : i32, layout = "row", m = 16 : i32, n = 16 : i32} : (!llvm.ptr) -> !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)> // CHECK: %[[A0:.+]] = llvm.extractvalue %[[A]][0 : i32] : !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)> // CHECK: %[[A1:.+]] = llvm.extractvalue %[[A]][1 : i32] : !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)> // CHECK: %[[A2:.+]] = llvm.extractvalue %[[A]][2 : i32] : !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)> @@ -160,14 +166,14 @@ gpu.module @test_module { // CHECK: %[[ACC1:.+]] = llvm.extractvalue %[[ACC]][1 : i32] : !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)> // CHECK: %[[ACC2:.+]] = llvm.extractvalue %[[ACC]][2 : i32] : !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)> // CHECK: %[[ACC3:.+]] = llvm.extractvalue %[[ACC]][3 : i32] : !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)> -// CHECK: %[[ACC_MUL:.+]] = nvvm.wmma.m16n16k16.mma.row.row.f16.f16 %[[A0]], %[[A1]], %[[A2]], %[[A3]], %[[A4]], %[[A5]], %[[A6]], %[[A7]], %[[B0]], %[[B1]], %[[B2]], %[[B3]], %[[B4]], %[[B5]], %[[B6]], %[[B7]], %[[ACC0]], %[[ACC1]], %[[ACC2]], %[[ACC3]] : vector<2xf16> -> !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)> +// CHECK: %[[ACC_MUL:.+]] = nvvm.wmma.mma %[[A0]], %[[A1]], %[[A2]], %[[A3]], %[[A4]], %[[A5]], %[[A6]], %[[A7]], %[[B0]], %[[B1]], %[[B2]], %[[B3]], %[[B4]], %[[B5]], %[[B6]], %[[B7]], %[[ACC0]], %[[ACC1]], %[[ACC2]], %[[ACC3]] {eltypeA = "f16", eltypeB = "f16", k = 16 : i32, layoutA = "row", layoutB = "row", m = 16 : i32, n = 16 : i32} : (vector<2xf16>, {{.*}} -> !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)> // CHECK: llvm.br ^bb1(%{{.*}}, %[[ACC_MUL]] : i64, !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)>) // CHECK: ^bb3: // pred: ^bb1 // CHECK: %[[E0:.+]] = llvm.extractvalue %[[ACC]][0 : i32] : !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)> // CHECK: %[[E1:.+]] = llvm.extractvalue %[[ACC]][1 : i32] : !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)> // CHECK: %[[E2:.+]] = llvm.extractvalue %[[ACC]][2 : i32] : !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)> // CHECK: %[[E3:.+]] = llvm.extractvalue %[[ACC]][3 : i32] : !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)> -// CHECK: nvvm.wmma.m16n16k16.store.d.f16.row.stride %{{.*}}, %[[E0]], %[[E1]], %[[E2]], %[[E3]], %{{.*}} : !llvm.ptr, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, i32 +// CHECK: nvvm.wmma.store %{{.*}}, %{{.*}}, %[[E0]], %[[E1]], %[[E2]], %[[E3]] {eltype = "f16", k = 16 : i32, layout = "row", m = 16 : i32, n = 16 : i32} : !llvm.ptr, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16> builtin.func @gpu_wmma_mma_loop_op(%arg0: memref<128x128xf16>, %arg1: memref<128x128xf16>, %arg2: memref<128x128xf16>) { %c0 = arith.constant 0 : index @@ -214,3 +220,33 @@ gpu.module @test_module { return %C : !gpu.mma_matrix<16x16xf16, "COp"> } } + +// ----- + +gpu.module @test_module { + +// CHECK-LABEL: func @gpu_wmma_elementwise +// CHECK: %[[M0:.*]] = llvm.mlir.undef : !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)> +// CHECK: %[[A0:.*]] = llvm.extractvalue %{{.*}}[0 : i32] : !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)> +// CHECK: %[[B0:.*]] = llvm.extractvalue %{{.*}}[0 : i32] : !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)> +// CHECK: %[[C0:.*]] = llvm.fadd %[[A0]], %[[B0]] : vector<2xf16> +// CHECK: %[[M1:.*]] = llvm.insertvalue %[[C0]], %[[M0]][0 : i32] : !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)> +// CHECK: %[[A1:.*]] = llvm.extractvalue %{{.*}}[1 : i32] : !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)> +// CHECK: %[[B1:.*]] = llvm.extractvalue %{{.*}}[1 : i32] : !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)> +// CHECK: %[[C1:.*]] = llvm.fadd %[[A1]], %[[B1]] : vector<2xf16> +// CHECK: %[[M2:.*]] = llvm.insertvalue %[[C1]], %[[M1]][1 : i32] : !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)> +// CHECK: %[[A2:.*]] = llvm.extractvalue %{{.*}}[2 : i32] : !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)> +// CHECK: %[[B2:.*]] = llvm.extractvalue %{{.*}}[2 : i32] : !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)> +// CHECK: %[[C2:.*]] = llvm.fadd %[[A2]], %[[B2]] : vector<2xf16> +// CHECK: %[[M3:.*]] = llvm.insertvalue %[[C2]], %[[M2]][2 : i32] : !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)> +// CHECK: %[[A3:.*]] = llvm.extractvalue %{{.*}}[3 : i32] : !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)> +// CHECK: %[[B3:.*]] = llvm.extractvalue %{{.*}}[3 : i32] : !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)> +// CHECK: %[[C3:.*]] = llvm.fadd %[[A3]], %[[B3]] : vector<2xf16> +// CHECK: %[[M4:.*]] = llvm.insertvalue %[[C3]], %[[M3]][3 : i32] : !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)> +// CHECK: llvm.return %[[M4]] : !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)> + builtin.func @gpu_wmma_elementwise(%A : !gpu.mma_matrix<16x16xf16, "COp">, %B : !gpu.mma_matrix<16x16xf16, "COp">) ->(!gpu.mma_matrix<16x16xf16, "COp">) { + %C = gpu.subgroup_mma_elementwise %A, %B { operation = "ADDF" } : + (!gpu.mma_matrix<16x16xf16, "COp">, !gpu.mma_matrix<16x16xf16, "COp">) -> !gpu.mma_matrix<16x16xf16, "COp"> + return %C : !gpu.mma_matrix<16x16xf16, "COp"> + } +} diff --git a/mlir/test/Conversion/SPIRVToLLVM/misc-ops-to-llvm.mlir b/mlir/test/Conversion/SPIRVToLLVM/misc-ops-to-llvm.mlir index c8528d062f7cb..38d55b9a659b8 100644 --- a/mlir/test/Conversion/SPIRVToLLVM/misc-ops-to-llvm.mlir +++ b/mlir/test/Conversion/SPIRVToLLVM/misc-ops-to-llvm.mlir @@ -58,6 +58,32 @@ spv.func @select_vector(%arg0: vector<2xi1>, %arg1: vector<2xi32>) "None" { spv.Return } +//===----------------------------------------------------------------------===// +// spv.VectorShuffle +//===----------------------------------------------------------------------===// + +spv.func @vector_shuffle_same_size(%vector1: vector<2xf32>, %vector2: vector<2xf32>) -> vector<3xf32> "None" { + // CHECK: %[[res:.*]] = llvm.shufflevector {{.*}} [0 : i32, 2 : i32, -1 : i32] : vector<2xf32>, vector<2xf32> + // CHECK-NEXT: return %[[res]] : vector<3xf32> + %0 = spv.VectorShuffle [0: i32, 2: i32, 0xffffffff: i32] %vector1: vector<2xf32>, %vector2: vector<2xf32> -> vector<3xf32> + spv.ReturnValue %0: vector<3xf32> +} + +spv.func @vector_shuffle_different_size(%vector1: vector<3xf32>, %vector2: vector<2xf32>) -> vector<3xf32> "None" { + // CHECK: %[[UNDEF:.*]] = llvm.mlir.undef : vector<3xf32> + // CHECK-NEXT: %[[C0_0:.*]] = llvm.mlir.constant(0 : i32) : i32 + // CHECK-NEXT: %[[C0_1:.*]] = llvm.mlir.constant(0 : i32) : i32 + // CHECK-NEXT: %[[EXT0:.*]] = llvm.extractelement %arg0[%[[C0_1]] : i32] : vector<3xf32> + // CHECK-NEXT: %[[INSERT0:.*]] = llvm.insertelement %[[EXT0]], %[[UNDEF]][%[[C0_0]] : i32] : vector<3xf32> + // CHECK-NEXT: %[[C1_0:.*]] = llvm.mlir.constant(1 : i32) : i32 + // CHECK-NEXT: %[[C1_1:.*]] = llvm.mlir.constant(1 : i32) : i32 + // CHECK-NEXT: %[[EXT1:.*]] = llvm.extractelement {{.*}}[%[[C1_1]] : i32] : vector<2xf32> + // CHECK-NEXT: %[[RES:.*]] = llvm.insertelement %[[EXT1]], %[[INSERT0]][%[[C1_0]] : i32] : vector<3xf32> + // CHECK-NEXT: llvm.return %[[RES]] : vector<3xf32> + %0 = spv.VectorShuffle [0: i32, 4: i32, 0xffffffff: i32] %vector1: vector<3xf32>, %vector2: vector<2xf32> -> vector<3xf32> + spv.ReturnValue %0: vector<3xf32> +} + //===----------------------------------------------------------------------===// // spv.EntryPoint and spv.ExecutionMode //===----------------------------------------------------------------------===// diff --git a/mlir/test/Conversion/VectorToGPU/vector-to-mma-ops.mlir b/mlir/test/Conversion/VectorToGPU/vector-to-mma-ops.mlir index 3a7c89343cab1..2ca899fa5bac4 100644 --- a/mlir/test/Conversion/VectorToGPU/vector-to-mma-ops.mlir +++ b/mlir/test/Conversion/VectorToGPU/vector-to-mma-ops.mlir @@ -83,3 +83,26 @@ func @matmul_loop(%arg0: memref<128x128xf16>, %arg1: memref<128x128xf16>, %arg2: vector.transfer_write %14, %arg2[%c0, %c0] {in_bounds = [true, true]} : vector<16x16xf16>, memref<128x128xf16> return } + +// CHECK-LABEL: func @matmul_fused_elementwise +// CHECK-DAG: %[[CST_0:.+]] = arith.constant 0.000000e+00 : f16 +// CHECK-DAG: %[[CST_1:.+]] = arith.constant 1.000000e+00 : f16 +// CHECK-DAG: %[[A:.+]] = gpu.subgroup_mma_load_matrix %{{.*}}[%{{.*}}, %{{.*}}] {leadDimension = 16 : index} : memref<16x16xf16> -> !gpu.mma_matrix<16x16xf16, "AOp"> +// CHECK-DAG: %[[B:.+]] = gpu.subgroup_mma_load_matrix %{{.*}}[%c0, %c0] {leadDimension = 16 : index} : memref<16x16xf16> -> !gpu.mma_matrix<16x16xf16, "BOp"> +// CHECK-DAG: %[[C0:.+]] = gpu.subgroup_mma_constant_matrix %[[CST_0]] : !gpu.mma_matrix<16x16xf16, "COp"> +// CHECK-DAG: %[[C1:.+]] = gpu.subgroup_mma_constant_matrix %[[CST_1]] : !gpu.mma_matrix<16x16xf16, "COp"> +// CHECK: %[[D:.+]] = gpu.subgroup_mma_compute %[[A]], %[[B]], %[[C0]] : !gpu.mma_matrix<16x16xf16, "AOp">, !gpu.mma_matrix<16x16xf16, "BOp"> -> !gpu.mma_matrix<16x16xf16, "COp"> +// CHECK: %[[E:.+]] = gpu.subgroup_mma_elementwise %[[D]], %[[C1]] {operation = "ADDF"} : (!gpu.mma_matrix<16x16xf16, "COp">, !gpu.mma_matrix<16x16xf16, "COp">) -> !gpu.mma_matrix<16x16xf16, "COp"> +// CHECK: gpu.subgroup_mma_store_matrix %[[E]], %{{.*}}[%{{.*}}, %{{.*}}] {leadDimension = 16 : index} : !gpu.mma_matrix<16x16xf16, "COp">, memref<16x16xf16> +func @matmul_fused_elementwise(%arg0: memref<16x16xf16>, %arg1: memref<16x16xf16>, %arg2: memref<16x16xf16>) { + %cst_0 = arith.constant dense<0.000000e+00> : vector<16x16xf16> + %cst_1 = arith.constant dense<1.000000e+00> : vector<16x16xf16> + %c0 = arith.constant 0 : index + %cst = arith.constant 0.000000e+00 : f16 + %A = vector.transfer_read %arg0[%c0, %c0], %cst {in_bounds = [true, true]} : memref<16x16xf16>, vector<16x16xf16> + %B = vector.transfer_read %arg1[%c0, %c0], %cst {permutation_map = #map0, in_bounds = [true, true]} : memref<16x16xf16>, vector<16x16xf16> + %D = vector.contract {indexing_maps = [#map1, #map2, #map3], iterator_types = ["parallel", "parallel", "reduction"], kind = #vector.kind} %A, %B, %cst_0 : vector<16x16xf16>, vector<16x16xf16> into vector<16x16xf16> + %E = arith.addf %D, %cst_1 : vector<16x16xf16> + vector.transfer_write %E, %arg2[%c0, %c0] {in_bounds = [true, true]} : vector<16x16xf16>, memref<16x16xf16> + return +} diff --git a/mlir/test/Dialect/GPU/ops.mlir b/mlir/test/Dialect/GPU/ops.mlir index 297fb5fe6fe20..c24fd7bf8a818 100644 --- a/mlir/test/Dialect/GPU/ops.mlir +++ b/mlir/test/Dialect/GPU/ops.mlir @@ -220,7 +220,10 @@ module attributes {gpu.container_module} { %0 = gpu.subgroup_mma_load_matrix %wg[%i, %i] {leadDimension = 32 : index} : memref<32x32xf16, 3> -> !gpu.mma_matrix<16x16xf16, "AOp"> // CHECK: gpu.subgroup_mma_load_matrix %[[wg]][%[[i]], %[[i]]] {leadDimension = 32 : index} : memref<32x32xf16, 3> -> !gpu.mma_matrix<16x16xf16, "AOp"> %1 = gpu.subgroup_mma_constant_matrix %cst : !gpu.mma_matrix<16x16xf32, "COp"> - // CHECK: gpu.subgroup_mma_constant_matrix %[[cst]] : !gpu.mma_matrix<16x16xf32, "COp"> + // CHECK: gpu.subgroup_mma_elementwise %{{.*}}, %{{.*}} {operation = "ADDF"} : (!gpu.mma_matrix<16x16xf32, "COp">, !gpu.mma_matrix<16x16xf32, "COp">) -> !gpu.mma_matrix<16x16xf32, "COp"> + %2 = gpu.subgroup_mma_elementwise %1, %1 {operation = "ADDF"} : (!gpu.mma_matrix<16x16xf32, "COp">, !gpu.mma_matrix<16x16xf32, "COp">) -> !gpu.mma_matrix<16x16xf32, "COp"> + // CHECK: gpu.subgroup_mma_elementwise %{{.*}}, %{{.*}} {operation = "MAXF"} : (!gpu.mma_matrix<16x16xf32, "COp">, !gpu.mma_matrix<16x16xf32, "COp">) -> !gpu.mma_matrix<16x16xf32, "COp"> + %3 = gpu.subgroup_mma_elementwise %2, %1 {operation = "MAXF"} : (!gpu.mma_matrix<16x16xf32, "COp">, !gpu.mma_matrix<16x16xf32, "COp">) -> !gpu.mma_matrix<16x16xf32, "COp"> return } } diff --git a/mlir/test/Dialect/LLVMIR/invalid.mlir b/mlir/test/Dialect/LLVMIR/invalid.mlir index 1caa3f415ee20..3f07f173ec875 100644 --- a/mlir/test/Dialect/LLVMIR/invalid.mlir +++ b/mlir/test/Dialect/LLVMIR/invalid.mlir @@ -1040,44 +1040,30 @@ module { // ----- llvm.func @wmmaLoadOp_invalid_mem_space(%arg0: !llvm.ptr, %arg1: i32) { - // expected-error@+1 {{'nvvm.wmma.m16n16k16.load.a.f16.row.stride' op expected operands to be a source pointer in memory space 0, 1, 3 followed by ldm of the source}} - %0 = nvvm.wmma.m16n16k16.load.a.f16.row.stride %arg0, %arg1 : (!llvm.ptr, i32) -> !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)> - - llvm.return -} - -// ----- - -llvm.func @wmmaLoadOp_invalid_missing_ldm(%arg0: !llvm.ptr, %arg1: i32) { - // expected-error@+1 {{'nvvm.wmma.m16n16k16.load.a.f16.row.stride' op expected operands to be a source pointer in memory space 0, 1, 3 followed by ldm of the source}} - %0 = nvvm.wmma.m16n16k16.load.a.f16.row.stride %arg0: (!llvm.ptr) -> !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)> - - llvm.return -} - -// ----- - -llvm.func @wmmaLoadOp_invalid_AOp(%arg0: !llvm.ptr, %arg1: i32) { - // expected-error@+1 {{'nvvm.wmma.m16n16k16.load.a.f16.row.stride' op expected result type of loadAOp and loadBOp to be a struct of 8 s}} - %0 = nvvm.wmma.m16n16k16.load.a.f16.row.stride %arg0, %arg1 : (!llvm.ptr, i32) -> !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)> - + // expected-error@+1 {{'nvvm.wmma.load' op expected source pointer in memory space 0, 1, 3}} + %0 = nvvm.wmma.load %arg0, %arg1 + {eltype = "f16", frag = "a", k = 16 : i32, layout = "row", m = 16 : i32, n = 16 : i32} + : (!llvm.ptr) -> !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)> llvm.return } // ----- llvm.func @wmmaLoadOp_invalid_AOp(%arg0: !llvm.ptr, %arg1: i32) { - // expected-error@+1 {{nvvm.wmma.m16n16k16.load.a.f16.row.stride' op expected result type of loadAOp and loadBOp to be a struct of 8 s}} - %0 = nvvm.wmma.m16n16k16.load.a.f16.row.stride %arg0, %arg1 : (!llvm.ptr, i32) -> !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)> - + // expected-error@+1 {{'nvvm.wmma.load' op expected destination type is a structure of 8 elements of type 'vector<2xf16>'}} + %0 = nvvm.wmma.load %arg0, %arg1 + {eltype = "f16", frag = "a", k = 16 : i32, layout = "row", m = 16 : i32, n = 16 : i32} + : (!llvm.ptr) -> !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)> llvm.return } // ----- llvm.func @wmmaLoadOp_invalid_BOp(%arg0: !llvm.ptr, %arg1: i32) { - // expected-error@+1 {{'nvvm.wmma.m16n16k16.load.b.f16.row.stride' op expected result type of loadAOp and loadBOp to be a struct of 8 s}} - %0 = nvvm.wmma.m16n16k16.load.b.f16.row.stride %arg0, %arg1 : (!llvm.ptr, i32) -> !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)> + // expected-error@+1 {{'nvvm.wmma.load' op expected destination type is a structure of 8 elements of type 'vector<2xf16>'}} + %0 = nvvm.wmma.load %arg0, %arg1 + {eltype = "f16", frag = "b", k = 16 : i32, layout = "row", m = 16 : i32, n = 16 : i32} + : (!llvm.ptr) -> !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)> llvm.return } @@ -1085,29 +1071,23 @@ llvm.func @wmmaLoadOp_invalid_BOp(%arg0: !llvm.ptr, %arg1: i32) { // ----- llvm.func @wmmaLoadOp_invalid_COp(%arg0: !llvm.ptr, %arg1: i32) { - // expected-error@+1 {{'nvvm.wmma.m16n16k16.load.c.f16.row.stride' op expected result type of loadCOp to be a struct of 4 s or 8 f32s}} - %0 = nvvm.wmma.m16n16k16.load.c.f16.row.stride %arg0, %arg1 : (!llvm.ptr, i32) -> !llvm.struct<(vector<2xf16>, vector<2xf16>)> + // expected-error@+1 {{'nvvm.wmma.load' op expected destination type is a structure of 4 elements of type 'vector<2xf16>'}} + %0 = nvvm.wmma.load %arg0, %arg1 + {eltype = "f16", frag = "c", k = 16 : i32, layout = "row", m = 16 : i32, n = 16 : i32} + : (!llvm.ptr) -> !llvm.struct<(vector<2xf16>, vector<2xf16>)> llvm.return } // ----- -llvm.func @wmmaStoreOp_invalid_mem_space(%arg0: !llvm.ptr, %arg1: vector<2 x f16>, +llvm.func @wmmaStoreOp_invalid_mem_space(%arg0: !llvm.ptr, %arg1: i32, %arg2: vector<2 x f16>, %arg3: vector<2 x f16>, - %arg4: vector<2 xf16>, %arg5: i32) { - // expected-error@+1 {{'nvvm.wmma.m16n16k16.store.d.f16.row.stride' op expected operands to be a source pointer in memoryspace 0, 1, 3 followed by ldm of the source}} - nvvm.wmma.m16n16k16.store.d.f16.row.stride %arg0, %arg1, %arg2, %arg3, %arg4, %arg5 : !llvm.ptr, vector<2 x f16>, vector<2 x f16>, vector<2 x f16>, vector<2 x f16>, i32 - llvm.return -} - -// ----- - -llvm.func @wmmaStoreOp_invalid_missing_ldm(%arg0: !llvm.ptr, %arg1: vector<2 x f16>, - %arg2: vector<2 x f16>, %arg3: vector<2 x f16>, - %arg4: vector<2 xf16>, %arg5: i32) { - // expected-error@+1 {{'nvvm.wmma.m16n16k16.store.d.f16.row.stride' op expected operands to be a source pointer in memoryspace 0, 1, 3 followed by ldm of the source}} - nvvm.wmma.m16n16k16.store.d.f16.row.stride %arg0, %arg1, %arg2, %arg3, %arg4 : !llvm.ptr, vector<2 x f16>, vector<2 x f16>, vector<2 x f16>, vector<2 x f16> + %arg4: vector<2 x f16>, %arg5: vector<2 xf16>) { + // expected-error@+1 {{'nvvm.wmma.store' op expected operands to be a source pointer in memory space 0, 1, 3}} + nvvm.wmma.store %arg0, %arg1, %arg2, %arg3, %arg4, %arg5 + {eltype = "f16", k = 16 : i32, layout = "row", m = 16 : i32, n = 16 : i32} + : !llvm.ptr, vector<2 x f16>, vector<2 x f16>, vector<2 x f16>, vector<2 x f16> llvm.return } @@ -1123,8 +1103,12 @@ llvm.func @gpu_wmma_mma_op_invalid_operands(%arg0: vector<2 x f16>, %arg1: vecto %arg14: vector<2 x f16>, %arg15: vector<2 x f16>, %arg16: vector<2 x f16>, %arg17: vector<2 x f16>, %arg18: vector<2 x f16>) { - // expected-error@+1 {{'nvvm.wmma.m16n16k16.mma.row.row.f16.f16' op expected 20 s as operands}} - %0 = nvvm.wmma.m16n16k16.mma.row.row.f16.f16 %arg0, %arg1, %arg2, %arg3, %arg4, %arg5, %arg6, %arg7, %arg8, %arg9, %arg10, %arg11, %arg12, %arg13, %arg14, %arg15, %arg16, %arg17, %arg18 : vector<2 x f16> -> !llvm.struct<(vector<2 x f16>, vector<2 x f16>, vector<2 x f16>, vector<2 x f16>)> + // expected-error@+1 {{'nvvm.wmma.mma' op expected 20 arguments}} + %0 = nvvm.wmma.mma %arg0, %arg1, %arg2, %arg3, %arg4, %arg5, %arg6, %arg7, %arg8, %arg9, %arg10, %arg11, %arg12, %arg13, %arg14, %arg15, %arg16, %arg17, %arg18 + {eltypeA = "f16", eltypeB = "f16", k = 16 : i32, layoutA = "row", layoutB = "row", m = 16 : i32, n = 16 : i32} + : (vector<2 x f16>, vector<2 x f16>, vector<2 x f16>, vector<2 x f16>, vector<2 x f16>, vector<2 x f16>, vector<2 x f16>, vector<2 x f16>, vector<2 x f16>, + vector<2 x f16>, vector<2 x f16>, vector<2 x f16>, vector<2 x f16>, vector<2 x f16>, vector<2 x f16>, vector<2 x f16>, vector<2 x f16>, vector<2 x f16>, vector<2 x f16>) + -> !llvm.struct<(vector<2 x f16>, vector<2 x f16>, vector<2 x f16>, vector<2 x f16>)> llvm.return } @@ -1140,9 +1124,12 @@ llvm.func @gpu_wmma_mma_op_results(%arg0: vector<2 x f16>, %arg1: vector<2 x f16 %arg14: vector<2 x f16>, %arg15: vector<2 x f16>, %arg16: vector<2 x f16>, %arg17: vector<2 x f16>, %arg18: vector<2 x f16>, %arg19: vector<2 x f16>) { - // expected-error@+1 {{expected result type to be a struct of 4 s}} - %0 = nvvm.wmma.m16n16k16.mma.row.row.f16.f16 %arg0, %arg1, %arg2, %arg3, %arg4, %arg5, %arg6, %arg7, %arg8, %arg9, %arg10, %arg11, %arg12, %arg13, %arg14, %arg15, %arg16, %arg17, %arg18, %arg19 : vector<2 x f16> -> !llvm.struct<(vector<2 x f16>, vector<2 x f16>, vector<2 x f16>)> - llvm.return + // expected-error@+1 {{'nvvm.wmma.mma' op expected destination type is a structure of 4 elements of type 'vector<2xf16>'}} + %0 = nvvm.wmma.mma %arg0, %arg1, %arg2, %arg3, %arg4, %arg5, %arg6, %arg7, %arg8, %arg9, %arg10, %arg11, %arg12, %arg13, %arg14, %arg15, %arg16, %arg17, %arg18, %arg19 + {eltypeA = "f16", eltypeB = "f16", k = 16 : i32, layoutA = "row", layoutB = "row", m = 16 : i32, n = 16 : i32} + : (vector<2 x f16>, vector<2 x f16>, vector<2 x f16>, vector<2 x f16>, vector<2 x f16>, vector<2 x f16>, vector<2 x f16>, vector<2 x f16>, vector<2 x f16>, vector<2 x f16>, + vector<2 x f16>, vector<2 x f16>, vector<2 x f16>, vector<2 x f16>, vector<2 x f16>, vector<2 x f16>, vector<2 x f16>, vector<2 x f16>, vector<2 x f16>, vector<2 x f16>) + -> !llvm.struct<(vector<2 x f16>, vector<2 x f16>, vector<2 x f16>)> llvm.return } // ----- @@ -1157,8 +1144,10 @@ llvm.func @gpu_wmma_mma_op_invalid_ab_operands(%arg0: vector<2 x f16>, %arg1: ve %arg14: vector<2 x f16>, %arg15: f32, %arg16: f32, %arg17: f32, %arg18: f32, %arg19: f32, %arg20: f32, %arg21: f32, %arg22: f32, %arg23: f32) { - // expected-error@+1 {{'nvvm.wmma.m16n16k16.mma.row.row.f32.f32' op expected 16 s for `a` and `b` operand}} - %0 = nvvm.wmma.m16n16k16.mma.row.row.f32.f32 %arg0, %arg1, %arg2, %arg3, %arg4, %arg5, %arg6, %arg7, %arg8, %arg9, %arg10, %arg11, %arg12, %arg13, %arg14, %arg15, %arg16, %arg17, %arg18, %arg19, %arg20, %arg21, %arg22, %arg23 : (vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, f32, f32, f32, f32, f32, f32, f32, f32, f32) -> !llvm.struct<(f32, f32, f32, f32, f32, f32, f32, f32)> + // expected-error@+1 {{'nvvm.wmma.mma' op expected argument 15 to be of type 'vector<2xf16>'}} + %0 = nvvm.wmma.mma %arg0, %arg1, %arg2, %arg3, %arg4, %arg5, %arg6, %arg7, %arg8, %arg9, %arg10, %arg11, %arg12, %arg13, %arg14, %arg15, %arg16, %arg17, %arg18, %arg19, %arg20, %arg21, %arg22, %arg23 + {eltypeA = "f16", eltypeB = "f32", k = 16 : i32, layoutA = "row", layoutB = "row", m = 16 : i32, n = 16 : i32} + : (vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, f32, f32, f32, f32, f32, f32, f32, f32, f32) -> !llvm.struct<(f32, f32, f32, f32, f32, f32, f32, f32)> llvm.return } @@ -1174,8 +1163,10 @@ llvm.func @gpu_wmma_mma_op_invalid_c_operand(%arg0: vector<2 x f16>, %arg1: vect %arg14: vector<2 x f16>, %arg15: vector<2xf16>, %arg16: f32, %arg17: f32, %arg18: f32, %arg19: f32, %arg20: f32, %arg21: f32, %arg22: f32, %arg23: vector<2xf16>) { - // expected-error@+1 {{'nvvm.wmma.m16n16k16.mma.row.row.f32.f32' op expected 8 f32s for `c` operand}} - %0 = nvvm.wmma.m16n16k16.mma.row.row.f32.f32 %arg0, %arg1, %arg2, %arg3, %arg4, %arg5, %arg6, %arg7, %arg8, %arg9, %arg10, %arg11, %arg12, %arg13, %arg14, %arg15, %arg16, %arg17, %arg18, %arg19, %arg20, %arg21, %arg22, %arg23 : (vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, f32, f32, f32, f32, f32, f32, f32, vector<2xf16>) -> !llvm.struct<(f32, f32, f32, f32, f32, f32, f32, f32)> + // expected-error@+1 {{'nvvm.wmma.mma' op expected argument 23 to be of type 'f32'}} + %0 = nvvm.wmma.mma %arg0, %arg1, %arg2, %arg3, %arg4, %arg5, %arg6, %arg7, %arg8, %arg9, %arg10, %arg11, %arg12, %arg13, %arg14, %arg15, %arg16, %arg17, %arg18, %arg19, %arg20, %arg21, %arg22, %arg23 + {eltypeA = "f16", eltypeB = "f32", k = 16 : i32, layoutA = "row", layoutB = "row", m = 16 : i32, n = 16 : i32} + : (vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, f32, f32, f32, f32, f32, f32, f32, vector<2xf16>) -> !llvm.struct<(f32, f32, f32, f32, f32, f32, f32, f32)> llvm.return } @@ -1191,8 +1182,10 @@ llvm.func @gpu_wmma_mma_op_invalid_result(%arg0: vector<2 x f16>, %arg1: vector< %arg14: vector<2 x f16>, %arg15: vector<2xf16>, %arg16: f32, %arg17: f32, %arg18: f32, %arg19: f32, %arg20: f32, %arg21: f32, %arg22: f32, %arg23: f32) { - // expected-error@+1 {{'nvvm.wmma.m16n16k16.mma.row.row.f32.f32' op expected result type to be a struct of 8 f32s}} - %0 = nvvm.wmma.m16n16k16.mma.row.row.f32.f32 %arg0, %arg1, %arg2, %arg3, %arg4, %arg5, %arg6, %arg7, %arg8, %arg9, %arg10, %arg11, %arg12, %arg13, %arg14, %arg15, %arg16, %arg17, %arg18, %arg19, %arg20, %arg21, %arg22, %arg23 : (vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, f32, f32, f32, f32, f32, f32, f32, f32) -> !llvm.struct<(f32, f32, f32, f32, f32, f32, f32, vector<2xf16>)> + // expected-error@+1 {{'nvvm.wmma.mma' op expected destination type is a structure of 8 elements of type 'f32'}} + %0 = nvvm.wmma.mma %arg0, %arg1, %arg2, %arg3, %arg4, %arg5, %arg6, %arg7, %arg8, %arg9, %arg10, %arg11, %arg12, %arg13, %arg14, %arg15, %arg16, %arg17, %arg18, %arg19, %arg20, %arg21, %arg22, %arg23 + {eltypeA = "f16", eltypeB = "f32", k = 16 : i32, layoutA = "row", layoutB = "row", m = 16 : i32, n = 16 : i32} + : (vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, f32, f32, f32, f32, f32, f32, f32, f32) -> !llvm.struct<(f32, f32, f32, f32, f32, f32, f32, vector<2xf16>)> llvm.return } diff --git a/mlir/test/Dialect/LLVMIR/nvvm.mlir b/mlir/test/Dialect/LLVMIR/nvvm.mlir index 1e3d6dc739785..638a5ab47dd0d 100644 --- a/mlir/test/Dialect/LLVMIR/nvvm.mlir +++ b/mlir/test/Dialect/LLVMIR/nvvm.mlir @@ -69,6 +69,27 @@ func @nvvm_mma(%a0 : vector<2xf16>, %a1 : vector<2xf16>, llvm.return %0 : !llvm.struct<(f32, f32, f32, f32, f32, f32, f32, f32)> } +func @nvvm_wmma_load_tf32(%arg0: !llvm.ptr, %arg1 : i32) -> !llvm.struct<(i32, i32, i32, i32)> { + // CHECK: nvvm.wmma.load {{.*}} {eltype = "tf32", frag = "a", k = 8 : i32, layout = "row", m = 16 : i32, n = 16 : i32} + %0 = nvvm.wmma.load %arg0, %arg1 + {eltype = "tf32", frag = "a", k = 8 : i32, layout = "row", m = 16 : i32, n = 16 : i32} + : (!llvm.ptr) -> !llvm.struct<(i32, i32, i32, i32)> + llvm.return %0 : !llvm.struct<(i32, i32, i32, i32)> +} + +func @nvvm_wmma_mma(%0 : i32, %1 : i32, %2 : i32, %3 : i32, %4 : i32, %5 : i32, + %6 : i32, %7 : i32, %8 : f32, %9 : f32, %10 : f32, + %11 : f32, %12 : f32, %13 : f32, %14 : f32, %15 : f32) + -> !llvm.struct<(f32, f32, f32, f32, f32, f32, f32, f32)> { + // CHECK: nvvm.wmma.mma {{.*}} {eltypeA = "tf32", eltypeB = "f32", k = 8 : i32, layoutA = "row", layoutB = "row", m = 16 : i32, n = 16 : i32} + %r = nvvm.wmma.mma %0, %1, %2, %3, %4, %5, %6, %7, %8, %9, %10, %11, %12, %13, %14, %15 + {eltypeA = "tf32", eltypeB = "f32", k = 8 : i32, layoutA = "row", layoutB = "row", m = 16 : i32, n = 16 : i32} + : (i32, i32, i32, i32, i32, i32, i32, i32, f32, f32, f32, f32, f32, f32, f32, f32) + -> !llvm.struct<(f32, f32, f32, f32, f32, f32, f32, f32)> + llvm.return %r : !llvm.struct<(f32, f32, f32, f32, f32, f32, f32, f32)> +} + + // ----- // expected-error@below {{attribute attached to unexpected op}} diff --git a/mlir/test/Dialect/Linalg/decompose-convolution.mlir b/mlir/test/Dialect/Linalg/decompose-convolution.mlir new file mode 100644 index 0000000000000..ebd7dd6d4a2af --- /dev/null +++ b/mlir/test/Dialect/Linalg/decompose-convolution.mlir @@ -0,0 +1,67 @@ +// RUN: mlir-opt -split-input-file -test-linalg-transform-patterns=test-decompose-convolution-patterns %s | FileCheck %s + +// CHECK-LABEL: func @conv2d_nhwc_4x1x2x8_tensor +// CHECK-SAME: (%[[INPUT:.+]]: tensor<4x1x6x3xf32>, %[[FILTER:.+]]: tensor<1x2x3x8xf32>, %[[INIT:.+]]: tensor<4x1x2x8xf32>) +func @conv2d_nhwc_4x1x2x8_tensor(%input: tensor<4x1x6x3xf32>, %filter: tensor<1x2x3x8xf32>, %init: tensor<4x1x2x8xf32>) -> tensor<4x1x2x8xf32> { + %0 = linalg.conv_2d_nhwc_hwcf + {dilations = dense<[2, 3]> : tensor<2xi64>, strides = dense<[3, 2]> : tensor<2xi64>} + ins(%input, %filter : tensor<4x1x6x3xf32>, tensor<1x2x3x8xf32>) + outs(%init : tensor<4x1x2x8xf32>) -> tensor<4x1x2x8xf32> + return %0 : tensor<4x1x2x8xf32> +} + +// CHECK: %[[INPUT_1D:.+]] = linalg.tensor_collapse_shape %[[INPUT]] +// CHECK-SAME{LITERAL}: [[0], [1, 2], [3]] : tensor<4x1x6x3xf32> into tensor<4x6x3xf32> +// CHECK: %[[FILTER_1D:.+]] = linalg.tensor_collapse_shape %[[FILTER]] +// CHECK-SAME{LITERAL}: [[0, 1], [2], [3]] : tensor<1x2x3x8xf32> into tensor<2x3x8xf32> +// CHECK: %[[INIT_1D:.+]] = linalg.tensor_collapse_shape %[[INIT]] +// CHECK-SAME{LITERAL}: [[0], [1, 2], [3]] : tensor<4x1x2x8xf32> into tensor<4x2x8xf32> +// CHECK: %[[CONV_1D:.+]] = linalg.conv_1d_nwc_wcf +// CHECK-SAME: dilations = dense<3> : vector<1xi64> +// CHECK-SAME: strides = dense<2> : vector<1xi64> +// CHECK-SAME: ins(%[[INPUT_1D]], %[[FILTER_1D]] : tensor<4x6x3xf32>, tensor<2x3x8xf32>) +// CHECK-SAME: outs(%[[INIT_1D]] : tensor<4x2x8xf32>) +// CHECK: %[[CONV_2D:.+]] = linalg.tensor_expand_shape %[[CONV_1D]] +// CHECK-SAME{LITERAL}: [[0], [1, 2], [3]] : tensor<4x2x8xf32> into tensor<4x1x2x8xf32> +// CHECK: return %[[CONV_2D]] + +// ----- + +// CHECK-LABEL: func @conv2d_nhwc_qxqx1xq_tensor +// CHECK-SAME: (%[[INPUT:.+]]: tensor, %[[FILTER:.+]]: tensor, %[[INIT:.+]]: tensor) +func @conv2d_nhwc_qxqx1xq_tensor(%input: tensor, %filter: tensor, %init: tensor) -> tensor { + %0 = linalg.conv_2d_nhwc_hwcf + {dilations = dense<[2, 3]> : tensor<2xi64>, strides = dense<[3, 2]> : tensor<2xi64>} + ins(%input, %filter : tensor, tensor) + outs(%init : tensor) -> tensor + return %0 : tensor +} + +// CHECK: %[[INPUT_1D:.+]] = linalg.tensor_collapse_shape %[[INPUT]] +// CHECK-SAME{LITERAL}: [[0], [1, 2], [3]] : tensor into tensor +// CHECK: %[[FILTER_1D:.+]] = linalg.tensor_collapse_shape %[[FILTER]] +// CHECK-SAME{LITERAL}: [[0, 1], [2], [3]] : tensor into tensor +// CHECK: %[[INIT_1D:.+]] = linalg.tensor_collapse_shape %[[INIT]] +// CHECK-SAME{LITERAL}: [[0], [1, 2], [3]] : tensor into tensor +// CHECK: %[[CONV_1D:.+]] = linalg.conv_1d_nwc_wcf +// CHECK-SAME: dilations = dense<2> : vector<1xi64> +// CHECK-SAME: strides = dense<3> : vector<1xi64> +// CHECK-SAME: ins(%[[INPUT_1D]], %[[FILTER_1D]] : tensor, tensor) +// CHECK-SAME: outs(%[[INIT_1D]] : tensor) +// CHECK: %[[CONV_2D:.+]] = linalg.tensor_expand_shape %[[CONV_1D]] +// CHECK-SAME{LITERAL}: [[0], [1, 2], [3]] : tensor into tensor +// CHECK: return %[[CONV_2D]] + +// ----- + +// Do not convert convolution ops whose window dimensions are not ones. + +// CHECK-LABEL: func @conv2d_nhwc_4x1x2x8_tensor +func @conv2d_nhwc_4x1x2x8_tensor(%input: tensor<4x3x5x3xf32>, %filter: tensor<2x2x3x8xf32>, %init: tensor<4x1x2x8xf32>) -> tensor<4x1x2x8xf32> { + // CHECK: linalg.conv_2d_nhwc_hwcf + %0 = linalg.conv_2d_nhwc_hwcf + {dilations = dense<[2, 3]> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} + ins(%input, %filter : tensor<4x3x5x3xf32>, tensor<2x2x3x8xf32>) + outs(%init : tensor<4x1x2x8xf32>) -> tensor<4x1x2x8xf32> + return %0 : tensor<4x1x2x8xf32> +} diff --git a/mlir/test/Dialect/Linalg/hoist-padding.mlir b/mlir/test/Dialect/Linalg/hoist-padding.mlir index b6c0eed3f111c..8f24fe81a9eaa 100644 --- a/mlir/test/Dialect/Linalg/hoist-padding.mlir +++ b/mlir/test/Dialect/Linalg/hoist-padding.mlir @@ -141,45 +141,39 @@ func @matmul_tensors( // ----- - // CHECK-DAG: #[[$MIN_REST8:[0-9a-z]+]] = affine_map<(d0)[s0] -> (8, -d0 + s0)> -// CHECK-DAG: #[[$MIN_REST4:[0-9a-z]+]] = affine_map<(d0, d1) -> (4, d0 - d1)> -// CHECK-DAG: #[[$MIN_REST2:[0-9a-z]+]] = affine_map<(d0, d1) -> (2, d0 - d1)> // CHECK-DAG: #[[$DIV4:[0-9a-z]+]] = affine_map<(d0) -> (d0 ceildiv 4)> // CHECK-DAG: #[[$DIV2:[0-9a-z]+]] = affine_map<(d0) -> (d0 ceildiv 2)> #map0 = affine_map<(d0)[s0] -> (8, -d0 + s0)> -#map1 = affine_map<(d0, d1) -> (4, d0 - d1)> -#map2 = affine_map<(d0, d1) -> (2, d0 - d1)> +#map1 = affine_map<(d0, d1) -> (4, -d0 + d1)> +#map2 = affine_map<(d0, d1) -> (2, -d0 + d1)> +#map3 = affine_map<(d0, d1, d2) -> (d0 + d1 + d2)> // CHECK-LABEL: func @dot // VERIFIER-ONLY-LABEL: func @dot func @dot(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor { + %cst = arith.constant 0.000000e+00 : f32 %c8 = arith.constant 8 : index + %c0 = arith.constant 0 : index %c4 = arith.constant 4 : index - %cst = arith.constant 0.000000e+00 : f32 %c2 = arith.constant 2 : index - %c0 = arith.constant 0 : index - %1 = tensor.dim %arg0, %c0 : tensor - %2 = tensor.dim %arg0, %c0 : tensor - %3 = tensor.dim %arg1, %c0 : tensor + %0 = tensor.dim %arg0, %c0 : tensor // CHECK: scf.for %[[I:[0-9a-z]+]] = // // CHECK: %[[MR8:.*]] = affine.min #[[$MIN_REST8]](%[[I]]) - // CHECK: %[[D0:.*]] = affine.apply #[[$DIV4]](%[[MR8]]) // Init tensor and pack. - // CHECK: %[[INIT_PACKED_A:.*]] = linalg.init_tensor [%[[D0]], 2, 2] : tensor - // CHECK: %[[CAST_INIT_PACKED_A:.*]] = tensor.cast %[[INIT_PACKED_A]] : tensor to tensor + // CHECK: %[[INIT_PACKED_A:.*]] = linalg.init_tensor [2, 2, 2] : tensor<2x2x2xf32> + // CHECK: %[[CAST_INIT_PACKED_A:.*]] = tensor.cast %[[INIT_PACKED_A]] : tensor<2x2x2xf32> to tensor // CHECK: %[[PACKED_A:.*]] = scf.for %[[II:[0-9a-z]+]] = {{.*}} iter_args(%{{.*}} = %[[CAST_INIT_PACKED_A]]) -> (tensor) { // CHECK: scf.for %[[III:[0-9a-z]+]] = // CHECK: tensor.insert_slice %{{.*}} into %{{.*}}[%{{.*}}, %{{.*}}, 0] [1, 1, 2] [1, 1, 1] : tensor<2xf32> into tensor // - // CHECK: %[[D0_2:.*]] = affine.apply #[[$DIV4]](%[[MR8]]) // Init tensor and pack. - // CHECK: %[[INIT_PACKED_B:.*]] = linalg.init_tensor [%[[D0_2]], 2, 2] : tensor - // CHECK: %[[CAST_INIT_PACKED_B:.*]] = tensor.cast %[[INIT_PACKED_B]] : tensor to tensor + // CHECK: %[[INIT_PACKED_B:.*]] = linalg.init_tensor [2, 2, 2] : tensor<2x2x2xf32> + // CHECK: %[[CAST_INIT_PACKED_B:.*]] = tensor.cast %[[INIT_PACKED_B]] : tensor<2x2x2xf32> to tensor // CHECK: %[[PACKED_B:.*]] = scf.for %[[II_2:[0-9a-z]+]] = {{.*}} iter_args(%{{.*}} = %[[CAST_INIT_PACKED_B]]) -> (tensor) { // CHECK: scf.for %[[III_2:[0-9a-z]+]] = // CHECK: tensor.insert_slice %{{.*}} into %{{.*}}[%{{.*}}, %{{.*}}, 0] [1, 1, 2] [1, 1, 1] : tensor<2xf32> into tensor @@ -194,39 +188,32 @@ func @dot(%arg0: tensor, %arg1: tensor, %arg2: tensor) // CHECK: %[[B:.*]] = tensor.extract_slice %[[PACKED_B]][%[[IDX0_2]], %[[IDX1_2]], 0] [1, 1, 2] [1, 1, 1] : tensor to tensor<2xf32> // CHECK: linalg.dot ins(%[[A]], %[[B]] : tensor<2xf32>, tensor<2xf32>) outs(%[[C]] : tensor) -> tensor - %4 = scf.for %arg3 = %c0 to %1 step %c8 iter_args(%arg4 = %arg2) -> (tensor) { - %5 = affine.min #map0(%arg3)[%2] - %6 = tensor.extract_slice %arg0[%arg3] [%5] [1] : tensor to tensor - %7 = affine.min #map0(%arg3)[%3] - %8 = tensor.extract_slice %arg1[%arg3] [%7] [1] : tensor to tensor - %9 = scf.for %arg5 = %c0 to %5 step %c4 iter_args(%arg6 = %arg4) -> (tensor) { - %10 = affine.min #map1(%5, %arg5) - %11 = tensor.extract_slice %6[%arg5] [%10] [1] : tensor to tensor - %12 = affine.min #map1(%7, %arg5) - %13 = tensor.extract_slice %8[%arg5] [%12] [1] : tensor to tensor - %14 = scf.for %arg7 = %c0 to %10 step %c2 iter_args(%arg8 = %arg6) -> (tensor) { - %15 = affine.min #map2(%10, %arg7) - %16 = tensor.extract_slice %11[%arg7] [%15] [1] : tensor to tensor - %17 = affine.min #map2(%12, %arg7) - %18 = tensor.extract_slice %13[%arg7] [%17] [1] : tensor to tensor - %19 = arith.subi %c2, %15 : index - %20 = linalg.pad_tensor %16 low[%c0] high[%19] { + %1 = scf.for %arg3 = %c0 to %0 step %c8 iter_args(%arg4 = %arg2) -> (tensor) { + %2 = affine.min #map0(%arg3)[%0] + %3 = scf.for %arg5 = %c0 to %2 step %c4 iter_args(%arg6 = %arg4) -> (tensor) { + %4 = affine.min #map1(%arg5, %2) + %5 = scf.for %arg7 = %c0 to %4 step %c2 iter_args(%arg8 = %arg6) -> (tensor) { + %6 = affine.min #map2(%arg7, %4) + %7 = affine.apply #map3(%arg7, %arg5, %arg3) + %8 = tensor.extract_slice %arg0[%7] [%6] [1] : tensor to tensor + %9 = tensor.extract_slice %arg1[%7] [%6] [1] : tensor to tensor + %10 = arith.subi %c2, %6 : index + %11 = linalg.pad_tensor %8 low[%c0] high[%10] { ^bb0(%arg9: index): // no predecessors linalg.yield %cst : f32 } : tensor to tensor<2xf32> - %21 = arith.subi %c2, %17 : index - %22 = linalg.pad_tensor %18 low[%c0] high[%21] { + %12 = linalg.pad_tensor %9 low[%c0] high[%10] { ^bb0(%arg9: index): // no predecessors linalg.yield %cst : f32 } : tensor to tensor<2xf32> - %23 = linalg.dot ins(%20, %22 : tensor<2xf32>, tensor<2xf32>) outs(%arg8 : tensor) -> tensor - scf.yield %23 : tensor + %13 = linalg.dot ins(%11, %12 : tensor<2xf32>, tensor<2xf32>) outs(%arg8 : tensor) -> tensor + scf.yield %13 : tensor } - scf.yield %14 : tensor + scf.yield %5 : tensor } - scf.yield %9 : tensor + scf.yield %3 : tensor } - return %4 : tensor + return %1 : tensor } // ----- diff --git a/mlir/test/Dialect/Linalg/pad-and-hoist.mlir b/mlir/test/Dialect/Linalg/pad-and-hoist.mlir index 93e2bf5f189d2..e0a3e226c5f2b 100644 --- a/mlir/test/Dialect/Linalg/pad-and-hoist.mlir +++ b/mlir/test/Dialect/Linalg/pad-and-hoist.mlir @@ -1,25 +1,23 @@ // RUN: mlir-opt %s -test-linalg-transform-patterns="test-pad-pattern pack-paddings=1,1,0 hoist-paddings=2,1,0" -cse -canonicalize -split-input-file | FileCheck %s -// RUN: mlir-opt %s -test-linalg-transform-patterns="test-pad-pattern pack-paddings=1,1,0 hoist-paddings=4,3,0" -cse -canonicalize -split-input-file | FileCheck %s --check-prefix=CHECK-DOUBLE +// RUN: mlir-opt %s -test-linalg-transform-patterns="test-pad-pattern pack-paddings=1,1,0 hoist-paddings=3,2,0" -cse -canonicalize -split-input-file | FileCheck %s --check-prefix=CHECK-DOUBLE // CHECK-DAG: #[[MAP0:[0-9a-z]+]] = affine_map<(d0) -> (5, -d0 + 24)> -// CHECK-DAG: #[[MAP1:[0-9a-z]+]] = affine_map<(d0) -> (8, -d0 + 12)> +// CHECK-DAG: #[[MAP1:[0-9a-z]+]] = affine_map<(d0) -> (7, -d0 + 25)> // CHECK-DAG: #[[DIV6:[0-9a-z]+]] = affine_map<(d0) -> (d0 ceildiv 6)> #map0 = affine_map<(d0) -> (5, -d0 + 24)> -#map1 = affine_map<(d0) -> (8, -d0 + 12)> -#map2 = affine_map<(d0) -> (7, -d0 + 25)> - -// CHECK: single_tiling -// CHECK-DOUBLE: single_tiling +#map1 = affine_map<(d0) -> (7, -d0 + 25)> +// CHECK: static_sizes +// CHECK-DOUBLE: static_sizes // CHECK-SAME: %[[ARG0:[0-9a-zA-Z]*]]: tensor<24x12xf32> // CHECK-SAME: %[[ARG1:[0-9a-zA-Z]*]]: tensor<12x25xf32> // CHECK-SAME: %[[ARG2:[0-9a-zA-Z]*]]: tensor<24x25xf32> -func @single_tiling(%arg0: tensor<24x12xf32>, - %arg1: tensor<12x25xf32>, - %arg2: tensor<24x25xf32>) -> tensor<24x25xf32> { +func @static_sizes(%arg0: tensor<24x12xf32>, + %arg1: tensor<12x25xf32>, + %arg2: tensor<24x25xf32>) -> tensor<24x25xf32> { // CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index // CHECK-DAG: %[[C5:.*]] = arith.constant 5 - // CHECK-DAG: %[[C8:.*]] = arith.constant 8 + // CHECK-DAG: %[[C7:.*]] = arith.constant 7 %c0 = arith.constant 0 : index %c12 = arith.constant 12 : index %c25 = arith.constant 25 : index @@ -33,11 +31,11 @@ func @single_tiling(%arg0: tensor<24x12xf32>, // Packing the first input operand for all values of IV2 (IV2x5x6). // CHECK: = linalg.init_tensor [2, 5, 6] - // CHECK: %[[PT0:.*]] = scf.for %[[P0IV2:[0-9a-z]+]] = - // CHECK: %[[PIDX0:.*]] = affine.apply #[[DIV6]](%[[P0IV2]]) + // CHECK: %[[PT0:.*]] = scf.for %[[PIV0:[0-9a-z]+]] = + // CHECK: %[[PIDX0:.*]] = affine.apply #[[DIV6]](%[[PIV0]]) // CHECK: %[[TS0:.*]] = affine.min #[[MAP0]](%[[IV0]]) // CHECK: %[[T0:.*]] = tensor.extract_slice %[[ARG0]] - // CHECK-SAME: %[[IV0]], %[[P0IV2]] + // CHECK-SAME: %[[IV0]], %[[PIV0]] // CHECK-SAME: %[[TS0]], 6 // CHECK: %[[V0:.*]] = arith.subi %[[C5]], %[[TS0]] // CHECK: %[[T1:.*]] = linalg.pad_tensor %[[T0]] nofold {{.*}} high[%[[V0]] @@ -47,15 +45,15 @@ func @single_tiling(%arg0: tensor<24x12xf32>, // CHECK: scf.for %[[IV1:[0-9a-zA-Z]*]] = %1 = scf.for %arg5 = %c0 to %c25 step %c7 iter_args(%arg6 = %arg4) -> (tensor<24x25xf32>) { - // Packing the second input operand for all values of IV2 (IV2x6x8). - // CHECK: = linalg.init_tensor [2, 6, 8] - // CHECK: %[[PT1:.*]] = scf.for %[[P1IV2:[0-9a-z]+]] = - // CHECK: %[[PIDX1:.*]] = affine.apply #[[DIV6]](%[[P1IV2]]) + // Packing the second input operand for all values of IV2 (IV2x6x7). + // CHECK: = linalg.init_tensor [2, 6, 7] + // CHECK: %[[PT1:.*]] = scf.for %[[PIV1:[0-9a-z]+]] = + // CHECK: %[[PIDX1:.*]] = affine.apply #[[DIV6]](%[[PIV1]]) // CHECK: %[[TS1:.*]] = affine.min #[[MAP1]](%[[IV1]]) // CHECK: %[[T3:.*]] = tensor.extract_slice %[[ARG1]] - // CHECK-SAME: %[[P1IV2]], %[[IV1]] + // CHECK-SAME: %[[PIV1]], %[[IV1]] // CHECK-SAME: 6, %[[TS1]] - // CHECK: %[[V1:.*]] = arith.subi %[[C8]], %[[TS1]] + // CHECK: %[[V1:.*]] = arith.subi %[[C7]], %[[TS1]] // CHECK: %[[T4:.*]] = linalg.pad_tensor %[[T3]] nofold {{.*}} high[%[[C0]], %[[V1]] // CHECK: %[[T5:.*]] = tensor.insert_slice %[[T4:.*]] into %{{.*}}[%[[PIDX1]], 0, 0] // CHECK: scf.yield %[[T5:.*]] @@ -63,6 +61,7 @@ func @single_tiling(%arg0: tensor<24x12xf32>, // CHECK: scf.for %[[IV2:[0-9a-zA-Z]*]] = {{.*}} iter_args(%[[ARG4:.*]] = %2 = scf.for %arg7 = %c0 to %c12 step %c6 iter_args(%arg8 = %arg6) -> (tensor<24x25xf32>) { %3 = affine.min #map0(%arg3) + // Index the packed operands. // CHECK-DAG: %[[IDX:.*]] = affine.apply #[[DIV6]](%[[IV2]]) // CHECK-DAG: %[[T6:.*]] = tensor.extract_slice %[[PT0]][%[[IDX]] @@ -91,6 +90,108 @@ func @single_tiling(%arg0: tensor<24x12xf32>, // ----- +// CHECK-DAG: #[[MAP0:[0-9a-z]+]] = affine_map<(d0)[s0] -> (5, -d0 + s0)> +// CHECK-DAG: #[[MAP1:[0-9a-z]+]] = affine_map<(d0)[s0] -> (6, -d0 + s0)> +// CHECK-DAG: #[[MAP2:[0-9a-z]+]] = affine_map<(d0)[s0] -> (7, -d0 + s0)> +// CHECK-DAG: #[[SDIV6:[0-9a-z]+]] = affine_map<()[s0] -> (s0 ceildiv 6)> +// CHECK-DAG: #[[DDIV6:[0-9a-z]+]] = affine_map<(d0) -> (d0 ceildiv 6)> +#map0 = affine_map<(d0)[s0] -> (5, -d0 + s0)> +#map1 = affine_map<(d0)[s0] -> (6, -d0 + s0)> +#map2 = affine_map<(d0)[s0] -> (7, -d0 + s0)> + +// CHECK: dynamic_sizes +// CHECK-DOUBLE: dynamic_sizes +// CHECK-SAME: %[[ARG0:[0-9a-zA-Z]*]]: tensor +// CHECK-SAME: %[[ARG1:[0-9a-zA-Z]*]]: tensor +// CHECK-SAME: %[[ARG2:[0-9a-zA-Z]*]]: tensor +func @dynamic_sizes(%arg0: tensor, + %arg1: tensor, + %arg2: tensor) -> tensor { + // CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index + // CHECK-DAG: %[[C1:.*]] = arith.constant 1 + // CHECK-DAG: %[[C5:.*]] = arith.constant 5 + // CHECK-DAG: %[[C6:.*]] = arith.constant 6 + %c1 = arith.constant 1 : index + %c0 = arith.constant 0 : index + %c6 = arith.constant 6 : index + %c7 = arith.constant 7 : index + %c5 = arith.constant 5 : index + + // CHECK-DAG: %[[D0:.*]] = tensor.dim %[[ARG0]], %[[C0]] + // CHECK-DAG: %[[D1:.*]] = tensor.dim %[[ARG0]], %[[C1]] + // CHECK-DAG: %[[D2:.*]] = tensor.dim %[[ARG1]], %[[C1]] + %0 = tensor.dim %arg0, %c0 : tensor + %1 = tensor.dim %arg0, %c1 : tensor + %2 = tensor.dim %arg1, %c1 : tensor + + // CHECK: scf.for %[[IV0:[0-9a-zA-Z]*]] = + %3 = scf.for %arg3 = %c0 to %0 step %c5 iter_args(%arg4 = %arg2) -> (tensor) { + + // Packing the first input operand for all values of IV2 (IV2x5x6). + // CHECK: %[[PS0:.*]] = affine.apply #[[SDIV6]]()[%[[D1]] + // CHECK: = linalg.init_tensor [%[[PS0]], 5, 6] + // CHECK: %[[PT0:.*]] = scf.for %[[PIV0:[0-9a-z]+]] = + // CHECK: %[[PIDX0:.*]] = affine.apply #[[DDIV6]](%[[PIV0]]) + // CHECK: %[[TS0:.*]] = affine.min #[[MAP0]](%[[IV0]])[%[[D0]] + // CHECK: %[[TS1:.*]] = affine.min #[[MAP1]](%[[PIV0]])[%[[D1]] + // CHECK: %[[T0:.*]] = tensor.extract_slice %[[ARG0]] + // CHECK-SAME: %[[IV0]], %[[PIV0]] + // CHECK-SAME: %[[TS0]], %[[TS1]] + // CHECK: %[[V0:.*]] = arith.subi %[[C5]], %[[TS0]] + // CHECK: %[[V1:.*]] = arith.subi %[[C6]], %[[TS1]] + // CHECK: %[[T1:.*]] = linalg.pad_tensor %[[T0]] nofold {{.*}} high[%[[V0]], %[[V1]] + // CHECK: %[[T2:.*]] = tensor.insert_slice %[[T1:.*]] into %{{.*}}[%[[PIDX0]], 0, 0] + // CHECK: scf.yield %[[T2:.*]] + + // CHECK: scf.for %[[IV1:[0-9a-zA-Z]*]] = + %4 = scf.for %arg5 = %c0 to %2 step %c7 iter_args(%arg6 = %arg4) -> (tensor) { + + // Packing the second input operand for all values of IV2 (IV2x6x7). + // CHECK: = linalg.init_tensor [%[[PS0]], 6, 7] + // CHECK: %[[PT1:.*]] = scf.for %[[PIV1:[0-9a-z]+]] = + // CHECK: %[[PIDX1:.*]] = affine.apply #[[DDIV6]](%[[PIV1]]) + // CHECK: %[[TS2:.*]] = affine.min #[[MAP1]](%[[PIV1]])[%[[D1]] + // CHECK: %[[TS3:.*]] = affine.min #[[MAP2]](%[[IV1]])[%[[D2]] + // CHECK: %[[T3:.*]] = tensor.extract_slice %[[ARG1]] + // CHECK-SAME: %[[PIV1]], %[[IV1]] + // CHECK-SAME: %[[TS2]], %[[TS3]] + // CHECK: %[[V2:.*]] = arith.subi %[[C6]], %[[TS2]] + // CHECK: %[[V3:.*]] = arith.subi %[[C7]], %[[TS3]] + // CHECK: %[[T4:.*]] = linalg.pad_tensor %[[T3]] nofold {{.*}} high[%[[V2]], %[[V3]] + // CHECK: %[[T5:.*]] = tensor.insert_slice %[[T4:.*]] into %{{.*}}[%[[PIDX1]], 0, 0] + // CHECK: scf.yield %[[T5:.*]] + + // CHECK: scf.for %[[IV2:[0-9a-zA-Z]*]] = {{.*}} iter_args(%[[ARG4:.*]] = + %5 = scf.for %arg7 = %c0 to %1 step %c6 iter_args(%arg8 = %arg6) -> (tensor) { + %6 = affine.min #map0(%arg3)[%0] + %7 = affine.min #map1(%arg7)[%1] + + // Index the packed operands. + // CHECK-DAG: %[[IDX:.*]] = affine.apply #[[DDIV6]](%[[IV2]]) + // CHECK-DAG: %[[T6:.*]] = tensor.extract_slice %[[PT0]][%[[IDX]] + // CHECK-DAG: %[[T7:.*]] = tensor.extract_slice %[[PT1]][%[[IDX]] + %8 = tensor.extract_slice %arg0[%arg3, %arg7] [%6, %7] [1, 1] : tensor to tensor + %9 = affine.min #map2(%arg5)[%2] + %10 = tensor.extract_slice %arg1[%arg7, %arg5] [%7, %9] [1, 1] : tensor to tensor + %11 = tensor.extract_slice %arg8[%arg3, %arg5] [%6, %9] [1, 1] : tensor to tensor + + // Check matmul uses the packed input operands. + // CHECK: = linalg.matmul ins(%[[T6]], %[[T7]] + %12 = linalg.matmul {__internal_linalg_transform__ = "pad"} ins(%8, %10 : tensor, tensor) outs(%11 : tensor) -> tensor + %13 = tensor.insert_slice %12 into %arg8[%arg3, %arg5] [%6, %9] [1, 1] : tensor into tensor + scf.yield %13 : tensor + } + scf.yield %5 : tensor + } + scf.yield %4 : tensor + } + return %3 : tensor +} + +// ----- + +// CHECK-DOUBLE-DAG: #[[DIV5:[0-9a-z]+]] = affine_map<(d0) -> (d0 ceildiv 5)> +// CHECK-DOUBLE-DAG: #[[DIV6:[0-9a-z]+]] = affine_map<(d0) -> (d0 ceildiv 6)> #map0 = affine_map<(d0) -> (15, -d0 + 24)> #map1 = affine_map<(d0) -> (16, -d0 + 25)> #map2 = affine_map<(d0, d1) -> (5, -d0 + d1)> @@ -99,7 +200,6 @@ func @single_tiling(%arg0: tensor<24x12xf32>, // CHECK: double_tiling // CHECK-DOUBLE: double_tiling - // CHECK-DOUBLE-SAME: %[[ARG0:[0-9a-zA-Z]*]]: tensor<24x12xf32> // CHECK-DOUBLE-SAME: %[[ARG1:[0-9a-zA-Z]*]]: tensor<12x25xf32> // CHECK-DOUBLE-SAME: %[[ARG2:[0-9a-zA-Z]*]]: tensor<24x25xf32> @@ -114,16 +214,17 @@ func @double_tiling(%arg0: tensor<24x12xf32>, %c5 = arith.constant 5 : index %c6 = arith.constant 6 : index - // Packing the first input operand. - // CHECK-DOUBLE: = linalg.init_tensor - // CHECK-DOUBLE: = linalg.pad_tensor {{.*}} nofold - // CHECK-DOUBLE: scf.for %[[IV0:[0-9a-zA-Z]*]] = %0 = scf.for %arg3 = %c0 to %c24 step %c15 iter_args(%arg4 = %arg2) -> (tensor<24x25xf32>) { - // Packing the second input operand. - // CHECK-DOUBLE: = linalg.init_tensor - // CHECK-DOUBLE: = linalg.pad_tensor {{.*}} nofold + // Packing the first input operand. + // CHECK-DOUBLE: = linalg.init_tensor [3, 5, 12] + // CHECK-DOUBLE: %[[PT0:.*]] = scf.for %[[PIV0:[0-9a-z]+]] = + // CHECK-DOUBLE: %[[PIDX0:.*]] = affine.apply #[[DIV5]](%[[PIV0]]) + // CHECK-DOUBLE: %[[T0:.*]] = tensor.extract_slice %[[ARG0]] + // CHECK-DOUBLE: %[[T1:.*]] = linalg.pad_tensor %[[T0]] nofold + // CHECK-DOUBLE: %[[T2:.*]] = tensor.insert_slice %[[T1:.*]] into %{{.*}}[%[[PIDX0]], 0, 0] + // CHECK-DOUBLE: scf.yield %[[T2:.*]] // CHECK-DOUBLE: scf.for %[[IV1:[0-9a-zA-Z]*]] = %1 = scf.for %arg5 = %c0 to %c25 step %c16 iter_args(%arg6 = %arg4) -> (tensor<24x25xf32>) { @@ -131,6 +232,15 @@ func @double_tiling(%arg0: tensor<24x12xf32>, %3 = affine.min #map1(%arg5) %4 = tensor.extract_slice %arg6[%arg3, %arg5] [%2, %3] [1, 1] : tensor<24x25xf32> to tensor + // Packing the second input operand. + // CHECK-DOUBLE: = linalg.init_tensor [3, 12, 6] + // CHECK-DOUBLE: %[[PT1:.*]] = scf.for %[[PIV1:[0-9a-z]+]] = + // CHECK-DOUBLE: %[[PIDX1:.*]] = affine.apply #[[DIV6]](%[[PIV1]]) + // CHECK-DOUBLE: %[[T3:.*]] = tensor.extract_slice %[[ARG1]] + // CHECK-DOUBLE: %[[T4:.*]] = linalg.pad_tensor %[[T3]] nofold + // CHECK-DOUBLE: %[[T5:.*]] = tensor.insert_slice %[[T4:.*]] into %{{.*}}[%[[PIDX1]], 0, 0] + // CHECK-DOUBLE: scf.yield %[[T5:.*]] + // CHECK-DOUBLE: scf.for %[[IV2:[0-9a-zA-Z]*]] = %5 = scf.for %arg7 = %c0 to %2 step %c5 iter_args(%arg8 = %4) -> (tensor) { @@ -138,6 +248,12 @@ func @double_tiling(%arg0: tensor<24x12xf32>, %7 = scf.for %arg9 = %c0 to %3 step %c6 iter_args(%arg10 = %arg8) -> (tensor) { %8 = affine.min #map2(%arg7, %2) %9 = affine.apply #map3(%arg7, %arg3) + + // Index the packed operands. + // CHECK-DOUBLE-DAG: %[[IDX0:.*]] = affine.apply #[[DIV5]](%[[IV2]]) + // CHECK-DOUBLE-DAG: %[[T6:.*]] = tensor.extract_slice %[[PT0]][%[[IDX0]] + // CHECK-DOUBLE-DAG: %[[IDX1:.*]] = affine.apply #[[DIV6]](%[[IV3]]) + // CHECK-DOUBLE-DAG: %[[T7:.*]] = tensor.extract_slice %[[PT1]][%[[IDX1]] %10 = tensor.extract_slice %arg0[%9, 0] [%8, 12] [1, 1] : tensor<24x12xf32> to tensor %11 = affine.min #map4(%arg9, %3) %12 = affine.apply #map3(%arg9, %arg5) @@ -146,9 +262,8 @@ func @double_tiling(%arg0: tensor<24x12xf32>, %15 = affine.min #map4(%arg9, %3) %16 = tensor.extract_slice %arg10[%arg7, %arg9] [%14, %15] [1, 1] : tensor to tensor - // Pad the output operand and perform the multiplication. - // CHECK-DOUBLE: = linalg.pad_tensor - // CHECK-DOUBLE: = linalg.matmul + // Check matmul uses the packed input operands. + // CHECK-DOUBLE: = linalg.matmul ins(%[[T6]], %[[T7]] %17 = linalg.matmul {__internal_linalg_transform__ = "pad"} ins(%10, %13 : tensor, tensor<12x?xf32>) outs(%16 : tensor) -> tensor %18 = tensor.insert_slice %17 into %arg10[%arg7, %arg9] [%14, %15] [1, 1] : tensor into tensor scf.yield %18 : tensor diff --git a/mlir/test/Dialect/Linalg/tile-and-fuse-sequence-on-tensors.mlir b/mlir/test/Dialect/Linalg/tile-and-fuse-sequence-on-tensors.mlir new file mode 100644 index 0000000000000..1578d230017ba --- /dev/null +++ b/mlir/test/Dialect/Linalg/tile-and-fuse-sequence-on-tensors.mlir @@ -0,0 +1,83 @@ +// RUN: mlir-opt %s -linalg-tile-and-fuse-tensor-ops="tile-sizes=4,4,0,0 tile-interchange=0,1,2,3" -cse --canonicalize -split-input-file | FileCheck %s + +// CHECK: fuse_conv_chain +// CHECK-SAME: %[[ARG0:[0-9a-zA-Z]*]]: tensor<2x2xf32> +// CHECK-SAME: %[[ARG1:[0-9a-zA-Z]*]]: tensor<11x11xf32> +// CHECK-SAME: %[[ARG2:[0-9a-zA-Z]*]]: tensor<10x10xf32> +// CHECK-SAME: %[[ARG3:[0-9a-zA-Z]*]]: tensor<9x9xf32> +// CHECK-SAME: %[[ARG4:[0-9a-zA-Z]*]]: tensor<8x8xf32> +builtin.func @fuse_conv_chain(%arg0: tensor<2x2xf32>, + %arg1: tensor<11x11xf32>, + %arg2: tensor<10x10xf32>, + %arg3: tensor<9x9xf32>, + %arg4: tensor<8x8xf32>) -> tensor<8x8xf32> { + %cst = arith.constant 1.0 : f32 + + // Do not tile the filter fill since the filter dimensions are not tiled. + // CHECK: %[[T0:.*]] = linalg.fill(%{{.*}}, %[[ARG0]]) + %0 = linalg.fill(%cst, %arg0) : f32, tensor<2x2xf32> -> tensor<2x2xf32> + + // Fuse all other operations. + // CHECK: scf.for %[[IV0:.*]] = {{.*}} iter_args(%[[ARG5:.*]] = %[[ARG4]] + // CHECK: scf.for %[[IV1:.*]] = {{.*}} iter_args(%[[ARG6:.*]] = %[[ARG5]] + + // CHECK: %[[T1:.*]] = tensor.extract_slice %[[ARG1]] + // CHECK-SAME: %[[IV0]], %[[IV1]] + // CHECK: %[[T2:.*]] = tensor.extract_slice %[[ARG2]] + // CHECK-SAME: %[[IV0]], %[[IV1]] + // CHECK: %[[T3:.*]] = linalg.fill(%{{.*}}, %[[T2]]) + // CHECK: %[[T4:.*]] = linalg.conv_2d ins(%[[T1]], %[[T0]] : {{.*}} outs(%[[T3]] + %1 = linalg.fill(%cst, %arg2) : f32, tensor<10x10xf32> -> tensor<10x10xf32> + %2 = linalg.conv_2d ins(%arg1, %0 : tensor<11x11xf32>, tensor<2x2xf32>) outs(%1 : tensor<10x10xf32>) -> tensor<10x10xf32> + + // CHECK: %[[T5:.*]] = tensor.extract_slice %[[ARG3]] + // CHECK-SAME: %[[IV0]], %[[IV1]] + // CHECK: %[[T6:.*]] = linalg.fill(%{{.*}}, %[[T5]]) + // CHECK: %[[T7:.*]] = linalg.conv_2d ins(%[[T4]], %[[T0]] : {{.*}} outs(%[[T6]] + %3 = linalg.fill(%cst, %arg3) : f32, tensor<9x9xf32> -> tensor<9x9xf32> + %4 = linalg.conv_2d ins(%2, %0 : tensor<10x10xf32>, tensor<2x2xf32>) outs(%3 : tensor<9x9xf32>) -> tensor<9x9xf32> + + // Use the argument passed in by iteration argument. + // CHECK: %[[T8:.*]] = tensor.extract_slice %[[ARG6]] + // CHECK-SAME: %[[IV0]], %[[IV1]] + // CHECK: %[[T9:.*]] = linalg.fill(%{{.*}}, %[[T8]]) + // CHECK: %[[T5:.*]] = linalg.conv_2d ins(%[[T7]], %[[T0]] {{.*}} outs(%[[T9]] + %5 = linalg.fill(%cst, %arg4) : f32, tensor<8x8xf32> -> tensor<8x8xf32> + %6 = linalg.conv_2d ins(%4, %0 : tensor<9x9xf32>, tensor<2x2xf32>) outs(%5 : tensor<8x8xf32>) -> tensor<8x8xf32> + return %6 : tensor<8x8xf32> +} + +// ----- + +// CHECK: fuse_matmul_chain +// CHECK-SAME: %[[ARG0:[0-9a-zA-Z]*]]: tensor<8x8xf32> +builtin.func @fuse_matmul_chain(%arg0: tensor<8x8xf32>) -> tensor<8x8xf32> { + %c0 = arith.constant 0 : index + %c12 = arith.constant 12 : index + %c25 = arith.constant 25 : index + %c24 = arith.constant 24 : index + %c4 = arith.constant 4 : index + %cst = arith.constant 0.000000e+00 : f32 + + // Do not tile rhs fill of the producer matmul since none of its loop dimension is tiled. + // CHECK: %[[T0:.*]] = linalg.fill(%{{.*}}, %[[ARG0]]) + %0 = linalg.fill(%cst, %arg0) : f32, tensor<8x8xf32> -> tensor<8x8xf32> + + // CHECK: scf.for %[[IV0:.*]] = {{.*}} iter_args(%[[ARG1:.*]] = %[[ARG0]] + // CHECK: scf.for %[[IV1:.*]] = {{.*}} iter_args(%[[ARG2:.*]] = %[[ARG1]] + + // Only the outermost loop of the producer matmul is tiled. + // CHECK: %[[T1:.*]] = tensor.extract_slice %[[ARG0]] + // CHECK-SAME: %[[IV0]], 0 + // CHECK: %[[T2:.*]] = linalg.fill(%{{.*}}, %[[T1]]) + // CHECK: %[[T3:.*]] = linalg.matmul ins(%[[T2]], %[[T0]] {{.*}} + %1 = linalg.matmul ins(%0, %0 : tensor<8x8xf32>, tensor<8x8xf32>) outs(%0 : tensor<8x8xf32>) -> tensor<8x8xf32> + + // Use the argument passed in by iteration argument. + // CHECK: %[[T4:.*]] = tensor.extract_slice %[[ARG2]] + // CHECK-SAME: %[[IV0]], %[[IV1]] + // CHECK: %[[T5:.*]] = linalg.fill(%{{.*}}, %[[T4]]) + // CHECK: %{{.*}} = linalg.matmul ins(%[[T3]], {{.*}} outs(%[[T5]] + %2 = linalg.matmul ins(%1, %0 : tensor<8x8xf32>, tensor<8x8xf32>) outs(%0 : tensor<8x8xf32>) -> tensor<8x8xf32> + return %2 : tensor<8x8xf32> +} diff --git a/mlir/test/Dialect/Linalg/vectorize-convolution.mlir b/mlir/test/Dialect/Linalg/vectorize-convolution.mlir index b1802fded0b13..0a1cbc41d58e4 100644 --- a/mlir/test/Dialect/Linalg/vectorize-convolution.mlir +++ b/mlir/test/Dialect/Linalg/vectorize-convolution.mlir @@ -16,30 +16,48 @@ func @conv1d_nwc_4x2x8_memref(%input: memref<4x6x3xf32>, %filter: memref<1x3x8xf // CHECK-SAME: (%[[INPUT:.+]]: memref<4x6x3xf32>, %[[FILTER:.+]]: memref<1x3x8xf32>, %[[OUTPUT:.+]]: memref<4x2x8xf32>) // CHECK-DAG: %[[C0:.+]] = arith.constant 0 : index -// CHECK-DAG: %[[C1:.+]] = arith.constant 1 : index -// CHECK-DAG: %[[C3:.+]] = arith.constant 3 : index // CHECK-DAG: %[[F0:.+]] = arith.constant 0.000000e+00 : f32 +/// Read the whole data in one shot. +// CHECK-DAG: %[[V_INPUT_R:.+]] = vector.transfer_read %[[INPUT]][%[[C0]], %[[C0]], %[[C0]]], %[[F0]] +// CHECK-DAG: %[[V_FILTER_R:.+]] = vector.transfer_read %[[FILTER]][%[[C0]], %[[C0]], %[[C0]]], %[[F0]] +// CHECK-DAG: %[[V_OUTPUT_R:.+]] = vector.transfer_read %[[OUTPUT]][%[[C0]], %[[C0]], %[[C0]]], %[[F0]] + +// CHECK: %[[V_FILTER:.+]] = vector.extract %[[V_FILTER_R]][0] : vector<1x3x8xf32> +/// w == 0, kw == 0 +// CHECK: %[[V_INPUT_0:.+]] = vector.extract_strided_slice %[[V_INPUT_R]] +// CHECK-SAME: {offsets = [0, 0, 0], sizes = [4, 1, 3], strides = [1, 1, 1]} : vector<4x5x3xf32> to vector<4x1x3xf32> +// CHECK: %[[V_OUTPUT_0:.+]] = vector.extract_strided_slice %[[V_OUTPUT_R]] +// CHECK-SAME: {offsets = [0, 0, 0], sizes = [4, 1, 8], strides = [1, 1, 1]} : vector<4x2x8xf32> to vector<4x1x8xf32> +/// w == 1, kw == 0 +// CHECK: %[[V_INPUT_1:.+]] = vector.extract_strided_slice %[[V_INPUT_R]] +// CHECK-SAME: {offsets = [0, 3, 0], sizes = [4, 1, 3], strides = [1, 1, 1]} : vector<4x5x3xf32> to vector<4x1x3xf32> +// CHECK: %[[V_OUTPUT_1:.+]] = vector.extract_strided_slice %[[V_OUTPUT_R]] +// CHECK-SAME: {offsets = [0, 1, 0], sizes = [4, 1, 8], strides = [1, 1, 1]} : vector<4x2x8xf32> to vector<4x1x8xf32> + /// w == 0, kw == 0 -// CHECK: %[[V_FILTER:.+]] = vector.transfer_read %[[FILTER]][%[[C0]], %[[C0]], %[[C0]]], %[[F0]] -// CHECK: %[[V_INPUT0:.+]] = vector.transfer_read %[[INPUT]][%[[C0]], %[[C0]], %[[C0]]], %[[F0]] -// CHECK: %[[V_OUTPUT_0:.+]] = vector.transfer_read %[[OUTPUT]][%[[C0]], %[[C0]], %[[C0]]], %[[F0]] -// CHECK: %[[CONTRACT0:.+]] = vector.contract { +// CHECK: %[[CONTRACT_0:.+]] = vector.contract { // CHECK-SAME: indexing_maps = [#[[INPUT_MAP]], #[[FILTER_MAP]], #[[OUTPUT_MAP]]], // CHECK-SAME: iterator_types = ["parallel", "parallel", "parallel", "reduction"]} -// CHECK-SAME: %[[V_INPUT0]], %[[V_FILTER]], %[[V_OUTPUT_0]] +// CHECK-SAME: %[[V_INPUT_0]], %[[V_FILTER]], %[[V_OUTPUT_0]] // CHECK-SAME: : vector<4x1x3xf32>, vector<3x8xf32> into vector<4x1x8xf32> -// CHECK: vector.transfer_write %[[CONTRACT0]], %[[OUTPUT]][%[[C0]], %[[C0]], %[[C0]]] /// w == 1, kw == 0 -// CHECK: %[[V_INPUT3:.+]] = vector.transfer_read %[[INPUT]][%[[C0]], %[[C3]], %[[C0]]], %[[F0]] -// CHECK: %[[V_OUTPUT_1:.+]] = vector.transfer_read %[[OUTPUT]][%[[C0]], %[[C1]], %[[C0]]], %[[F0]] -// CHECK: %[[CONTRACT1:.+]] = vector.contract { +// CHECK: %[[CONTRACT_1:.+]] = vector.contract { // CHECK-SAME: indexing_maps = [#[[INPUT_MAP]], #[[FILTER_MAP]], #[[OUTPUT_MAP]]], // CHECK-SAME: iterator_types = ["parallel", "parallel", "parallel", "reduction"]} -// CHECK-SAME: %[[V_INPUT3]], %[[V_FILTER]], %[[V_OUTPUT_1]] +// CHECK-SAME: %[[V_INPUT_1]], %[[V_FILTER]], %[[V_OUTPUT_1]] // CHECK-SAME: : vector<4x1x3xf32>, vector<3x8xf32> into vector<4x1x8xf32> -// CHECK: vector.transfer_write %[[CONTRACT1]], %[[OUTPUT]][%[[C0]], %[[C1]], %[[C0]]] + +/// w == 0, kw == 0 +// CHECK: %[[RES_0:.+]] = vector.insert_strided_slice %[[CONTRACT_0]], %[[V_OUTPUT_R]] +// CHECK-SAME: {offsets = [0, 0, 0], strides = [1, 1, 1]} : vector<4x1x8xf32> into vector<4x2x8xf32> +/// w == 1, kw == 0 +// CHECK: %[[RES_1:.+]] = vector.insert_strided_slice %[[CONTRACT_1]], %[[RES_0]] +// CHECK-SAME: {offsets = [0, 1, 0], strides = [1, 1, 1]} : vector<4x1x8xf32> into vector<4x2x8xf32> + +// Write the result back in one shot. +// CHECK: vector.transfer_write %[[RES_1]], %[[OUTPUT]][%[[C0]], %[[C0]], %[[C0]]] // ----- @@ -59,57 +77,78 @@ func @conv1d_nwc_4x2x8_memref(%input: memref<4x6x3xf32>, %filter: memref<2x3x8xf // CHECK-SAME: (%[[INPUT:.+]]: memref<4x6x3xf32>, %[[FILTER:.+]]: memref<2x3x8xf32>, %[[OUTPUT:.+]]: memref<4x2x8xf32>) // CHECK-DAG: %[[C0:.+]] = arith.constant 0 : index -// CHECK-DAG: %[[C1:.+]] = arith.constant 1 : index -// CHECK-DAG: %[[C2:.+]] = arith.constant 2 : index -// CHECK-DAG: %[[C3:.+]] = arith.constant 3 : index -// CHECK-DAG: %[[C5:.+]] = arith.constant 5 : index // CHECK-DAG: %[[F0:.+]] = arith.constant 0.000000e+00 : f32 +/// Read the whole data in one shot. +// CHECK-DAG: %[[V_INPUT_R:.+]] = vector.transfer_read %[[INPUT]][%[[C0]], %[[C0]], %[[C0]]], %[[F0]] +// CHECK-DAG: %[[V_FILTER_R:.+]] = vector.transfer_read %[[FILTER]][%[[C0]], %[[C0]], %[[C0]]], %[[F0]] +// CHECK-DAG: %[[V_OUTPUT_R:.+]] = vector.transfer_read %[[OUTPUT]][%[[C0]], %[[C0]], %[[C0]]], %[[F0]] + + /// w == 0, kw == 0 -// CHECK: %[[V_FILTER_A:.+]] = vector.transfer_read %[[FILTER]][%[[C0]], %[[C0]], %[[C0]]], %[[F0]] -// CHECK: %[[V_INPUT0_A:.+]] = vector.transfer_read %[[INPUT]][%[[C0]], %[[C0]], %[[C0]]], %[[F0]] -// CHECK: %[[V_OUTPUT_0_A:.+]] = vector.transfer_read %[[OUTPUT]][%[[C0]], %[[C0]], %[[C0]]], %[[F0]] -// CHECK: %[[CONTRACT0_A:.+]] = vector.contract { -// CHECK-SAME: indexing_maps = [#[[INPUT_MAP]], #[[FILTER_MAP]], #[[OUTPUT_MAP]]], -// CHECK-SAME: iterator_types = ["parallel", "parallel", "parallel", "reduction"]} -// CHECK-SAME: %[[V_INPUT0_A]], %[[V_FILTER_A]], %[[V_OUTPUT_0_A]] -// CHECK-SAME: : vector<4x1x3xf32>, vector<3x8xf32> into vector<4x1x8xf32> -// CHECK: vector.transfer_write %[[CONTRACT0_A]], %[[OUTPUT]][%[[C0]], %[[C0]], %[[C0]]] +// CHECK: %[[V_FILTER_0:.+]] = vector.extract %[[V_FILTER_R]][0] : vector<2x3x8xf32> +// CHECK: %[[V_INPUT_0:.+]] = vector.extract_strided_slice %[[V_INPUT_R]] +// CHECK-SAME: {offsets = [0, 0, 0], sizes = [4, 1, 3], strides = [1, 1, 1]} : vector<4x7x3xf32> to vector<4x1x3xf32> +// CHECK: %[[V_OUTPUT_0:.+]] = vector.extract_strided_slice %[[V_OUTPUT_R]] +// CHECK-SAME: {offsets = [0, 0, 0], sizes = [4, 1, 8], strides = [1, 1, 1]} : vector<4x2x8xf32> to vector<4x1x8xf32> +/// w == 1, kw == 0 +// CHECK: %[[V_INPUT_1:.+]] = vector.extract_strided_slice %[[V_INPUT_R]] +// CHECK-SAME: {offsets = [0, 3, 0], sizes = [4, 1, 3], strides = [1, 1, 1]} : vector<4x7x3xf32> to vector<4x1x3xf32> +// CHECK: %[[V_OUTPUT_1:.+]] = vector.extract_strided_slice %[[V_OUTPUT_R]] +// CHECK-SAME: {offsets = [0, 1, 0], sizes = [4, 1, 8], strides = [1, 1, 1]} : vector<4x2x8xf32> to vector<4x1x8xf32> /// w == 0, kw == 1 -// CHECK: %[[V_INPUT3_A:.+]] = vector.transfer_read %[[INPUT]][%[[C0]], %[[C3]], %[[C0]]], %[[F0]] -// CHECK: %[[V_OUTPUT_1_A:.+]] = vector.transfer_read %[[OUTPUT]][%[[C0]], %[[C1]], %[[C0]]], %[[F0]] -// CHECK: %[[CONTRACT1_A:.+]] = vector.contract { +// CHECK: %[[V_FILTER_1:.+]] = vector.extract %[[V_FILTER_R]][1] : vector<2x3x8xf32> +// CHECK: %[[V_INPUT_2:.+]] = vector.extract_strided_slice %[[V_INPUT_R]] +// CHECK-SAME: {offsets = [0, 2, 0], sizes = [4, 1, 3], strides = [1, 1, 1]} : vector<4x7x3xf32> to vector<4x1x3xf32> +/// w == 1, kw == 0 +// CHECK: %[[V_INPUT_3:.+]] = vector.extract_strided_slice %[[V_INPUT_R]] +// CHECK-SAME: {offsets = [0, 5, 0], sizes = [4, 1, 3], strides = [1, 1, 1]} : vector<4x7x3xf32> to vector<4x1x3xf32> + +/// w == 0, kw == 0 +// CHECK: %[[CONTRACT_0:.+]] = vector.contract { // CHECK-SAME: indexing_maps = [#[[INPUT_MAP]], #[[FILTER_MAP]], #[[OUTPUT_MAP]]], // CHECK-SAME: iterator_types = ["parallel", "parallel", "parallel", "reduction"]} -// CHECK-SAME: %[[V_INPUT3_A]], %[[V_FILTER_A]], %[[V_OUTPUT_1_A]] +// CHECK-SAME: %[[V_INPUT_0]], %[[V_FILTER_0]], %[[V_OUTPUT_0]] // CHECK-SAME: : vector<4x1x3xf32>, vector<3x8xf32> into vector<4x1x8xf32> -// CHECK: vector.transfer_write %[[CONTRACT1_A]], %[[OUTPUT]][%[[C0]], %[[C1]], %[[C0]]] - /// w == 1, kw == 0 -// CHECK: %[[V_FILTER_B:.+]] = vector.transfer_read %[[FILTER]][%[[C1]], %[[C0]], %[[C0]]], %[[F0]] -// CHECK: %[[V_INPUT0_B:.+]] = vector.transfer_read %[[INPUT]][%[[C0]], %[[C2]], %[[C0]]], %[[F0]] -// CHECK: %[[V_OUTPUT_0_B:.+]] = vector.transfer_read %[[OUTPUT]][%[[C0]], %[[C0]], %[[C0]]], %[[F0]] -// CHECK: %[[CONTRACT0_B:.+]] = vector.contract { +// CHECK: %[[CONTRACT_1:.+]] = vector.contract { // CHECK-SAME: indexing_maps = [#[[INPUT_MAP]], #[[FILTER_MAP]], #[[OUTPUT_MAP]]], // CHECK-SAME: iterator_types = ["parallel", "parallel", "parallel", "reduction"]} -// CHECK-SAME: %[[V_INPUT0_B]], %[[V_FILTER_B]], %[[V_OUTPUT_0_B]] +// CHECK-SAME: %[[V_INPUT_1]], %[[V_FILTER_0]], %[[V_OUTPUT_1]] // CHECK-SAME: : vector<4x1x3xf32>, vector<3x8xf32> into vector<4x1x8xf32> -// CHECK: vector.transfer_write %[[CONTRACT0_B]], %[[OUTPUT]][%[[C0]], %[[C0]], %[[C0]]] - /// w == 1, kw == 1 -// CHECK: %[[V_INPUT3_B:.+]] = vector.transfer_read %[[INPUT]][%[[C0]], %[[C5]], %[[C0]]], %[[F0]] -// CHECK: %[[V_OUTPUT_1_B:.+]] = vector.transfer_read %[[OUTPUT]][%[[C0]], %[[C1]], %[[C0]]], %[[F0]] -// CHECK: %[[CONTRACT1_B:.+]] = vector.contract { +// CHECK: %[[CONTRACT_2:.+]] = vector.contract { // CHECK-SAME: indexing_maps = [#[[INPUT_MAP]], #[[FILTER_MAP]], #[[OUTPUT_MAP]]], // CHECK-SAME: iterator_types = ["parallel", "parallel", "parallel", "reduction"]} -// CHECK-SAME: %[[V_INPUT3_B]], %[[V_FILTER_B]], %[[V_OUTPUT_1_B]] +// CHECK-SAME: %[[V_INPUT_2]], %[[V_FILTER_1]], %[[CONTRACT_0]] +// CHECK-SAME: : vector<4x1x3xf32>, vector<3x8xf32> into vector<4x1x8xf32> +/// w == 1, kw == 1 +// CHECK: %[[CONTRACT_3:.+]] = vector.contract { +// CHECK-SAME: indexing_maps = [#[[INPUT_MAP]], #[[FILTER_MAP]], #[[OUTPUT_MAP]]], +// CHECK-SAME: iterator_types = ["parallel", "parallel", "parallel", "reduction"]} +// CHECK-SAME: %[[V_INPUT_3]], %[[V_FILTER_1]], %[[CONTRACT_1]] // CHECK-SAME: : vector<4x1x3xf32>, vector<3x8xf32> into vector<4x1x8xf32> -// CHECK: vector.transfer_write %[[CONTRACT1_B]], %[[OUTPUT]][%[[C0]], %[[C1]], %[[C0]]] -// ----- +/// w == 0, kw == 0 +// CHECK: %[[RES_0:.+]] = vector.insert_strided_slice %[[CONTRACT_2]], %[[V_OUTPUT_R]] +// CHECK-SAME: {offsets = [0, 0, 0], strides = [1, 1, 1]} : vector<4x1x8xf32> into vector<4x2x8xf32> +/// w == 1, kw == 0 +// CHECK: %[[RES_1:.+]] = vector.insert_strided_slice %[[CONTRACT_3]], %[[RES_0]] +// CHECK-SAME: {offsets = [0, 1, 0], strides = [1, 1, 1]} : vector<4x1x8xf32> into vector<4x2x8xf32> +// Write the result back in one shot. +// CHECK: vector.transfer_write %[[RES_1]], %[[OUTPUT]][%[[C0]], %[[C0]], %[[C0]]] +// ----- + +func @conv1d_nwc_4x2x8_memref(%input: memref<4x6x3xf32>, %filter: memref<2x3x8xf32>, %output: memref<4x2x8xf32>) { + linalg.conv_1d_nwc_wcf + {dilations = dense<2> : tensor<1xi64>, strides = dense<1> : tensor<1xi64>} + ins(%input, %filter : memref<4x6x3xf32>, memref<2x3x8xf32>) + outs(%output : memref<4x2x8xf32>) + return +} // CHECK: #[[INPUT_MAP:.+]] = affine_map<(d0, d1, d2, d3) -> (d0, d1, d3)> // CHECK: #[[FILTER_MAP:.+]] = affine_map<(d0, d1, d2, d3) -> (d3, d2)> @@ -117,36 +156,36 @@ func @conv1d_nwc_4x2x8_memref(%input: memref<4x6x3xf32>, %filter: memref<2x3x8xf // CHECK: func @conv1d_nwc_4x2x8_memref // CHECK-SAME: (%[[INPUT:.+]]: memref<4x6x3xf32>, %[[FILTER:.+]]: memref<2x3x8xf32>, %[[OUTPUT:.+]]: memref<4x2x8xf32>) -func @conv1d_nwc_4x2x8_memref(%input: memref<4x6x3xf32>, %filter: memref<2x3x8xf32>, %output: memref<4x2x8xf32>) { + // CHECK-DAG: %[[C0:.+]] = arith.constant 0 : index -// CHECK-DAG: %[[C1:.+]] = arith.constant 1 : index -// CHECK-DAG: %[[C2:.+]] = arith.constant 2 : index // CHECK-DAG: %[[F0:.+]] = arith.constant 0.000000e+00 : f32 +/// Read the whole data in one shot. +// CHECK-DAG: %[[V_INPUT_R:.+]] = vector.transfer_read %[[INPUT]][%[[C0]], %[[C0]], %[[C0]]], %[[F0]] +// CHECK-DAG: %[[V_FILTER_R:.+]] = vector.transfer_read %[[FILTER]][%[[C0]], %[[C0]], %[[C0]]], %[[F0]] +// CHECK-DAG: %[[V_OUTPUT_R:.+]] = vector.transfer_read %[[OUTPUT]][%[[C0]], %[[C0]], %[[C0]]], %[[F0]] + /// w == 0, kw == 0 -// CHECK: %[[V_FILTER_000:.+]] = vector.transfer_read %[[FILTER]][%[[C0]], %[[C0]], %[[C0]]], %[[F0]]{{.*}} vector<3x8xf32> -// CHECK: %[[V_INPUT_000:.+]] = vector.transfer_read %[[INPUT]][%[[C0]], %[[C0]], %[[C0]]], %[[F0]]{{.*}} vector<4x2x3xf32> -// CHECK: %[[V_OUTPUT_0:.+]] = vector.transfer_read %[[OUTPUT]][%[[C0]], %[[C0]], %[[C0]]], %[[F0]]{{.*}} vector<4x2x8xf32> -// CHECK: %[[CONTRACT0:.+]] = vector.contract { +// CHECK: %[[V_FILTER_0:.+]] = vector.extract %[[V_FILTER_R]][0] : vector<2x3x8xf32> +// CHECK: %[[V_INPUT_0:.+]] = vector.extract_strided_slice %[[V_INPUT_R]] +// CHECK-SAME: {offsets = [0, 0, 0], sizes = [4, 2, 3], strides = [1, 1, 1]} : vector<4x5x3xf32> to vector<4x2x3xf32> +/// w == 0, kw == 1 +// CHECK: %[[V_FILTER_1:.+]] = vector.extract %[[V_FILTER_R]][1] : vector<2x3x8xf32> +// CHECK: %[[V_INPUT_1:.+]] = vector.extract_strided_slice %[[V_INPUT_R]] +// CHECK-SAME: {offsets = [0, 2, 0], sizes = [4, 2, 3], strides = [1, 1, 1]} : vector<4x5x3xf32> to vector<4x2x3xf32> + +/// w == 0, kw == 0 +// CHECK: %[[CONTRACT_0:.+]] = vector.contract { // CHECK-SAME: indexing_maps = [#[[INPUT_MAP]], #[[FILTER_MAP]], #[[OUTPUT_MAP]]], // CHECK-SAME: iterator_types = ["parallel", "parallel", "parallel", "reduction"]} -// CHECK-SAME: %[[V_INPUT_000]], %[[V_FILTER_000]], %[[V_OUTPUT_0]] +// CHECK-SAME: %[[V_INPUT_0]], %[[V_FILTER_0]], %[[V_OUTPUT_R]] // CHECK-SAME: : vector<4x2x3xf32>, vector<3x8xf32> into vector<4x2x8xf32> -// CHECK: vector.transfer_write %[[CONTRACT0]], %[[OUTPUT]][%[[C0]], %[[C0]], %[[C0]]] - -/// w == 0, kw == 1 -// CHECK: %[[V_FILTER_100:.+]] = vector.transfer_read %[[FILTER]][%[[C1]], %[[C0]], %[[C0]]], %[[F0]]{{.*}} vector<3x8xf32> -// CHECK: %[[V_INPUT_020:.+]] = vector.transfer_read %[[INPUT]][%[[C0]], %[[C2]], %[[C0]]], %[[F0]]{{.*}} vector<4x2x3xf32> -// CHECK: %[[V_OUTPUT_1:.+]] = vector.transfer_read %[[OUTPUT]][%[[C0]], %[[C0]], %[[C0]]], %[[F0]]{{.*}} vector<4x2x8xf32> -// CHECK: %[[CONTRACT1:.+]] = vector.contract { +/// w == 1, kw == 1 +// CHECK: %[[CONTRACT_1:.+]] = vector.contract { // CHECK-SAME: indexing_maps = [#[[INPUT_MAP]], #[[FILTER_MAP]], #[[OUTPUT_MAP]]], // CHECK-SAME: iterator_types = ["parallel", "parallel", "parallel", "reduction"]} -// CHECK-SAME: %[[V_INPUT_020]], %[[V_FILTER_100]], %[[V_OUTPUT_1]] +// CHECK-SAME: %[[V_INPUT_1]], %[[V_FILTER_1]], %[[CONTRACT_0]] // CHECK-SAME: : vector<4x2x3xf32>, vector<3x8xf32> into vector<4x2x8xf32> -// CHECK: vector.transfer_write %[[CONTRACT1]], %[[OUTPUT]][%[[C0]], %[[C0]], %[[C0]]] - linalg.conv_1d_nwc_wcf - {dilations = dense<2> : tensor<1xi64>, strides = dense<1> : tensor<1xi64>} - ins(%input, %filter : memref<4x6x3xf32>, memref<2x3x8xf32>) - outs(%output : memref<4x2x8xf32>) - return -} + +// Write the result back in one shot. +// CHECK: vector.transfer_write %[[CONTRACT_1]], %[[OUTPUT]][%[[C0]], %[[C0]], %[[C0]]] diff --git a/mlir/test/Dialect/OpenMP/invalid.mlir b/mlir/test/Dialect/OpenMP/invalid.mlir index 36eee320af555..d3a78857f6b41 100644 --- a/mlir/test/Dialect/OpenMP/invalid.mlir +++ b/mlir/test/Dialect/OpenMP/invalid.mlir @@ -598,3 +598,188 @@ func @omp_atomic_write6(%addr : memref, %val : i32) { omp.atomic.write %addr, %val memory_order(xyz) : memref, i32 return } + +// ----- + +func @omp_sections(%data_var1 : memref, %data_var2 : memref, %data_var3 : memref) -> () { + // expected-error @below {{operand used in both private and firstprivate clauses}} + omp.sections private(%data_var1 : memref) firstprivate(%data_var1 : memref) { + omp.terminator + } + return +} + +// ----- + +func @omp_sections(%data_var1 : memref, %data_var2 : memref, %data_var3 : memref) -> () { + // expected-error @below {{operand used in both private and lastprivate clauses}} + omp.sections private(%data_var1 : memref) lastprivate(%data_var1 : memref) { + omp.terminator + } + return +} + +// ----- + +func @omp_sections(%data_var1 : memref, %data_var2 : memref, %data_var3 : memref) -> () { + // expected-error @below {{operand used in both private and lastprivate clauses}} + omp.sections private(%data_var1 : memref, %data_var2 : memref) lastprivate(%data_var3 : memref, %data_var2 : memref) { + omp.terminator + } + return +} + +// ----- + +func @omp_sections(%data_var : memref) -> () { + // expected-error @below {{expected equal sizes for allocate and allocator variables}} + "omp.sections" (%data_var) ({ + omp.terminator + }) {operand_segment_sizes = dense<[0,0,0,0,1,0]> : vector<6xi32>} : (memref) -> () + return +} + +// ----- + +func @omp_sections(%data_var : memref) -> () { + // expected-error @below {{expected as many reduction symbol references as reduction variables}} + "omp.sections" (%data_var) ({ + omp.terminator + }) {operand_segment_sizes = dense<[0,0,0,1,0,0]> : vector<6xi32>} : (memref) -> () + return +} + +// ----- + +func @omp_sections(%data_var : memref) -> () { + // expected-error @below {{expected omp.section op or terminator op inside region}} + omp.sections { + "test.payload" () : () -> () + } + return +} + +// ----- + +func @omp_sections(%cond : i1) { + // expected-error @below {{if is not a valid clause for the omp.sections operation}} + omp.sections if(%cond) { + omp.terminator + } + return +} + +// ----- + +func @omp_sections() { + // expected-error @below {{num_threads is not a valid clause for the omp.sections operation}} + omp.sections num_threads(10) { + omp.terminator + } + return +} + +// ----- + +func @omp_sections(%datavar : memref) { + // expected-error @below {{shared is not a valid clause for the omp.sections operation}} + omp.sections shared(%datavar : memref) { + omp.terminator + } + return +} + +// ----- + +func @omp_sections(%datavar : memref) { + // expected-error @below {{copyin is not a valid clause for the omp.sections operation}} + omp.sections copyin(%datavar : memref) { + omp.terminator + } + return +} + +// ----- + +func @omp_sections() { + // expected-error @below {{default is not a valid clause for the omp.sections operation}} + omp.sections default(private) { + omp.terminator + } + return +} + +// ----- + +func @omp_sections() { + // expected-error @below {{proc_bind is not a valid clause for the omp.sections operation}} + omp.sections proc_bind(close) { + omp.terminator + } + return +} + +// ----- + +func @omp_sections(%data_var : memref, %linear_var : i32) { + // expected-error @below {{linear is not a valid clause for the omp.sections operation}} + omp.sections linear(%data_var = %linear_var : memref) { + omp.terminator + } + return +} + +// ----- + +func @omp_sections() { + // expected-error @below {{schedule is not a valid clause for the omp.sections operation}} + omp.sections schedule(static, none) { + omp.terminator + } + return +} + +// ----- + +func @omp_sections() { + // expected-error @below {{collapse is not a valid clause for the omp.sections operation}} + omp.sections collapse(3) { + omp.terminator + } + return +} + +// ----- + +func @omp_sections() { + // expected-error @below {{ordered is not a valid clause for the omp.sections operation}} + omp.sections ordered(2) { + omp.terminator + } + return +} + +// ----- + +func @omp_sections() { + // expected-error @below {{order is not a valid clause for the omp.sections operation}} + omp.sections order(concurrent) { + omp.terminator + } + return +} + +// ----- + +func @omp_sections() { + // expected-error @below {{failed to verify constraint: region with 1 blocks}} + omp.sections { + omp.section { + omp.terminator + } + omp.terminator + ^bb2: + omp.terminator + } + return +} diff --git a/mlir/test/Dialect/OpenMP/ops.mlir b/mlir/test/Dialect/OpenMP/ops.mlir index 4d0801de0cd44..522d9b48291d3 100644 --- a/mlir/test/Dialect/OpenMP/ops.mlir +++ b/mlir/test/Dialect/OpenMP/ops.mlir @@ -523,3 +523,124 @@ func @omp_atomic_write(%addr : memref, %val : i32) { omp.atomic.write %addr, %val hint(speculative, uncontended) : memref, i32 return } + +// CHECK-LABEL: omp_sectionsop +func @omp_sectionsop(%data_var1 : memref, %data_var2 : memref, + %data_var3 : memref, %redn_var : !llvm.ptr) { + + // CHECK: omp.sections private(%{{.*}} : memref) { + "omp.sections" (%data_var1) ({ + // CHECK: omp.terminator + omp.terminator + }) {operand_segment_sizes = dense<[1,0,0,0,0,0]> : vector<6xi32>} : (memref) -> () + + // CHECK: omp.sections firstprivate(%{{.*}} : memref) { + "omp.sections" (%data_var1) ({ + // CHECK: omp.terminator + omp.terminator + }) {operand_segment_sizes = dense<[0,1,0,0,0,0]> : vector<6xi32>} : (memref) -> () + + // CHECK: omp.sections lastprivate(%{{.*}} : memref) { + "omp.sections" (%data_var1) ({ + // CHECK: omp.terminator + omp.terminator + }) {operand_segment_sizes = dense<[0,0,1,0,0,0]> : vector<6xi32>} : (memref) -> () + + // CHECK: omp.sections private(%{{.*}} : memref) firstprivate(%{{.*}} : memref) lastprivate(%{{.*}} : memref) { + "omp.sections" (%data_var1, %data_var2, %data_var3) ({ + // CHECK: omp.terminator + omp.terminator + }) {operand_segment_sizes = dense<[1,1,1,0,0,0]> : vector<6xi32>} : (memref, memref, memref) -> () + + // CHECK: omp.sections allocate(%{{.*}} : memref -> %{{.*}} : memref) + "omp.sections" (%data_var1, %data_var1) ({ + // CHECK: omp.terminator + omp.terminator + }) {operand_segment_sizes = dense<[0,0,0,0,1,1]> : vector<6xi32>} : (memref, memref) -> () + + // CHECK: omp.sections reduction(@add_f32 -> %{{.*}} : !llvm.ptr) + "omp.sections" (%redn_var) ({ + // CHECK: omp.terminator + omp.terminator + }) {operand_segment_sizes = dense<[0,0,0,1,0,0]> : vector<6xi32>, reductions=[@add_f32]} : (!llvm.ptr) -> () + + // CHECK: omp.sections private(%{{.*}} : memref) { + omp.sections private(%data_var1 : memref) { + // CHECK: omp.terminator + omp.terminator + } + + // CHECK: omp.sections firstprivate(%{{.*}} : memref) + omp.sections firstprivate(%data_var1 : memref) { + // CHECK: omp.terminator + omp.terminator + } + + // CHECK: omp.sections lastprivate(%{{.*}} : memref) + omp.sections lastprivate(%data_var1 : memref) { + // CHECK: omp.terminator + omp.terminator + } + + // CHECK: omp.sections private(%{{.*}} : memref) firstprivate(%{{.*}} : memref) lastprivate(%{{.*}} : memref) { + omp.sections private(%data_var1 : memref) firstprivate(%data_var2 : memref) lastprivate(%data_var3 : memref) { + // CHECK: omp.terminator + omp.terminator + } + + // CHECK: omp.sections private(%{{.*}} : memref) firstprivate(%{{.*}} : memref) lastprivate(%{{.*}} : memref) { + omp.sections lastprivate(%data_var1 : memref) firstprivate(%data_var2 : memref) private(%data_var3 : memref) { + // CHECK: omp.terminator + omp.terminator + } + + // CHECK: omp.sections private(%{{.*}} : memref) nowait { + omp.sections nowait private(%data_var1 : memref) { + // CHECK: omp.terminator + omp.terminator + } + + // CHECK: omp.sections firstprivate(%{{.*}} : memref, %{{.*}} : memref) lastprivate(%{{.*}} : memref) { + omp.sections firstprivate(%data_var1 : memref, %data_var2 : memref) lastprivate(%data_var1 : memref) { + // CHECK: omp.terminator + omp.terminator + } + + // CHECK: omp.sections reduction(@add_f32 -> %{{.*}} : !llvm.ptr) { + omp.sections reduction(@add_f32 -> %redn_var : !llvm.ptr) { + // CHECK: omp.terminator + omp.terminator + } + + // CHECK: omp.sections allocate(%{{.*}} : memref -> %{{.*}} : memref) + omp.sections allocate(%data_var1 : memref -> %data_var1 : memref) { + // CHECK: omp.terminator + omp.terminator + } + + // CHECK: omp.sections nowait + omp.sections nowait { + // CHECK: omp.section + omp.section { + // CHECK: %{{.*}} = "test.payload"() : () -> i32 + %1 = "test.payload"() : () -> i32 + // CHECK: %{{.*}} = "test.payload"() : () -> i32 + %2 = "test.payload"() : () -> i32 + // CHECK: %{{.*}} = "test.payload"(%{{.*}}, %{{.*}}) : (i32, i32) -> i32 + %3 = "test.payload"(%1, %2) : (i32, i32) -> i32 + } + // CHECK: omp.section + omp.section { + // CHECK: %{{.*}} = "test.payload"(%{{.*}}) : (!llvm.ptr) -> i32 + %1 = "test.payload"(%redn_var) : (!llvm.ptr) -> i32 + } + // CHECK: omp.section + omp.section { + // CHECK: "test.payload"(%{{.*}}) : (!llvm.ptr) -> () + "test.payload"(%redn_var) : (!llvm.ptr) -> () + } + // CHECK: omp.terminator + omp.terminator + } + return +} diff --git a/mlir/test/Dialect/SPIRV/IR/atomic-ops.mlir b/mlir/test/Dialect/SPIRV/IR/atomic-ops.mlir index 7a10878fad5ef..2bc800025f989 100644 --- a/mlir/test/Dialect/SPIRV/IR/atomic-ops.mlir +++ b/mlir/test/Dialect/SPIRV/IR/atomic-ops.mlir @@ -37,6 +37,42 @@ func @atomic_and(%ptr : !spv.ptr, %value : i32) -> i32 { // ----- +//===----------------------------------------------------------------------===// +// spv.AtomicCompareExchange +//===----------------------------------------------------------------------===// + +func @atomic_compare_exchange(%ptr: !spv.ptr, %value: i32, %comparator: i32) -> i32 { + // CHECK: spv.AtomicCompareExchange "Workgroup" "Release" "Acquire" %{{.*}}, %{{.*}}, %{{.*}} : !spv.ptr + %0 = spv.AtomicCompareExchange "Workgroup" "Release" "Acquire" %ptr, %value, %comparator: !spv.ptr + return %0: i32 +} + +// ----- + +func @atomic_compare_exchange(%ptr: !spv.ptr, %value: i64, %comparator: i32) -> i32 { + // expected-error @+1 {{value operand must have the same type as the op result, but found 'i64' vs 'i32'}} + %0 = "spv.AtomicCompareExchange"(%ptr, %value, %comparator) {memory_scope = 4: i32, equal_semantics = 0x4: i32, unequal_semantics = 0x2:i32} : (!spv.ptr, i64, i32) -> (i32) + return %0: i32 +} + +// ----- + +func @atomic_compare_exchange(%ptr: !spv.ptr, %value: i32, %comparator: i16) -> i32 { + // expected-error @+1 {{comparator operand must have the same type as the op result, but found 'i16' vs 'i32'}} + %0 = "spv.AtomicCompareExchange"(%ptr, %value, %comparator) {memory_scope = 4: i32, equal_semantics = 0x4: i32, unequal_semantics = 0x2:i32} : (!spv.ptr, i32, i16) -> (i32) + return %0: i32 +} + +// ----- + +func @atomic_compare_exchange(%ptr: !spv.ptr, %value: i32, %comparator: i32) -> i32 { + // expected-error @+1 {{pointer operand's pointee type must have the same as the op result type, but found 'i64' vs 'i32'}} + %0 = "spv.AtomicCompareExchange"(%ptr, %value, %comparator) {memory_scope = 4: i32, equal_semantics = 0x4: i32, unequal_semantics = 0x2:i32} : (!spv.ptr, i32, i32) -> (i32) + return %0: i32 +} + +// ----- + //===----------------------------------------------------------------------===// // spv.AtomicCompareExchangeWeak //===----------------------------------------------------------------------===// @@ -73,6 +109,34 @@ func @atomic_compare_exchange_weak(%ptr: !spv.ptr, %value: i32, // ----- +//===----------------------------------------------------------------------===// +// spv.AtomicExchange +//===----------------------------------------------------------------------===// + +func @atomic_exchange(%ptr: !spv.ptr, %value: i32) -> i32 { + // CHECK: spv.AtomicExchange "Workgroup" "Release" %{{.*}}, %{{.*}} : !spv.ptr + %0 = spv.AtomicExchange "Workgroup" "Release" %ptr, %value: !spv.ptr + return %0: i32 +} + +// ----- + +func @atomic_exchange(%ptr: !spv.ptr, %value: i64) -> i32 { + // expected-error @+1 {{value operand must have the same type as the op result, but found 'i64' vs 'i32'}} + %0 = "spv.AtomicExchange"(%ptr, %value) {memory_scope = 4: i32, semantics = 0x4: i32} : (!spv.ptr, i64) -> (i32) + return %0: i32 +} + +// ----- + +func @atomic_exchange(%ptr: !spv.ptr, %value: i32) -> i32 { + // expected-error @+1 {{pointer operand's pointee type must have the same as the op result type, but found 'i64' vs 'i32'}} + %0 = "spv.AtomicExchange"(%ptr, %value) {memory_scope = 4: i32, semantics = 0x4: i32} : (!spv.ptr, i32) -> (i32) + return %0: i32 +} + +// ----- + //===----------------------------------------------------------------------===// // spv.AtomicIAdd //===----------------------------------------------------------------------===// diff --git a/mlir/test/Dialect/SparseTensor/invalid_encoding.mlir b/mlir/test/Dialect/SparseTensor/invalid_encoding.mlir index 2c60c63705333..b78d921b4aa4e 100644 --- a/mlir/test/Dialect/SparseTensor/invalid_encoding.mlir +++ b/mlir/test/Dialect/SparseTensor/invalid_encoding.mlir @@ -1,5 +1,8 @@ // RUN: mlir-opt %s -split-input-file -verify-diagnostics +#a = #sparse_tensor.encoding<{dimLevelType = []}> +func private @scalar(%arg0: tensor) -> () // expected-error {{expected non-scalar sparse tensor}} + // ----- #a = #sparse_tensor.encoding<{dimLevelType = ["dense", "compressed"]}> diff --git a/mlir/test/Dialect/SparseTensor/sparse_1d.mlir b/mlir/test/Dialect/SparseTensor/sparse_1d.mlir index 01cbf09785396..7c5fc72c63288 100644 --- a/mlir/test/Dialect/SparseTensor/sparse_1d.mlir +++ b/mlir/test/Dialect/SparseTensor/sparse_1d.mlir @@ -842,24 +842,24 @@ func @two_way_inv_alt(%arga: tensor<16xf32, #SV>, } // CHECK-LABEL: func @sum_reduction( -// CHECK-SAME: %[[VAL_0:.*]]: tensor>, -// CHECK-SAME: %[[VAL_1:.*]]: tensor) -> tensor { -// CHECK: %[[VAL_2:.*]] = arith.constant 0 : index -// CHECK: %[[VAL_3:.*]] = arith.constant 1 : index +// CHECK-SAME: %[[VAL_0:.*]]: tensor>, +// CHECK-SAME: %[[VAL_1:.*]]: tensor) -> tensor { +// CHECK-DAG: %[[VAL_2:.*]] = arith.constant 0 : index +// CHECK-DAG: %[[VAL_3:.*]] = arith.constant 1 : index // CHECK: %[[VAL_4:.*]] = sparse_tensor.pointers %[[VAL_0]], %[[VAL_2]] : tensor> to memref // CHECK: %[[VAL_5:.*]] = sparse_tensor.values %[[VAL_0]] : tensor> to memref // CHECK: %[[VAL_6:.*]] = memref.buffer_cast %[[VAL_1]] : memref // CHECK: %[[VAL_7:.*]] = memref.alloc() : memref // CHECK: memref.copy %[[VAL_6]], %[[VAL_7]] : memref to memref -// CHECK: %[[VAL_8:.*]] = memref.load %[[VAL_4]]{{\[}}%[[VAL_2]]] : memref -// CHECK: %[[VAL_9:.*]] = memref.load %[[VAL_4]]{{\[}}%[[VAL_3]]] : memref -// CHECK: %[[VAL_10:.*]] = memref.load %[[VAL_7]][] : memref +// CHECK-DAG: %[[VAL_8:.*]] = memref.load %[[VAL_4]]{{\[}}%[[VAL_2]]] : memref +// CHECK-DAG: %[[VAL_9:.*]] = memref.load %[[VAL_4]]{{\[}}%[[VAL_3]]] : memref +// CHECK-DAG: %[[VAL_10:.*]] = memref.load %[[VAL_7]][] : memref // CHECK: %[[VAL_11:.*]] = scf.for %[[VAL_12:.*]] = %[[VAL_8]] to %[[VAL_9]] step %[[VAL_3]] iter_args(%[[VAL_13:.*]] = %[[VAL_10]]) -> (f32) { // CHECK: %[[VAL_14:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_12]]] : memref // CHECK: %[[VAL_15:.*]] = arith.addf %[[VAL_13]], %[[VAL_14]] : f32 // CHECK: scf.yield %[[VAL_15]] : f32 // CHECK: } -// CHECK: memref.store %[[VAL_16:.*]], %[[VAL_7]][] : memref +// CHECK: memref.store %[[VAL_11]], %[[VAL_7]][] : memref // CHECK: %[[VAL_17:.*]] = memref.tensor_load %[[VAL_7]] : memref // CHECK: return %[[VAL_17]] : tensor // CHECK: } @@ -885,11 +885,11 @@ func @sum_reduction(%arga: tensor, %argx: tensor) -> tensor>, -// CHECK-SAME: %[[VAL_1:.*1]]: tensor<16xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>>, -// CHECK-SAME: %[[VAL_2:.*2]]: tensor) -> tensor { -// CHECK: %[[VAL_3:.*]] = arith.constant 0 : index -// CHECK: %[[VAL_4:.*]] = arith.constant 1 : index +// CHECK-SAME: %[[VAL_0:.*0]]: tensor<16xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>>, +// CHECK-SAME: %[[VAL_1:.*1]]: tensor<16xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>>, +// CHECK-SAME: %[[VAL_2:.*2]]: tensor) -> tensor { +// CHECK-DAG: %[[VAL_3:.*]] = arith.constant 0 : index +// CHECK-DAG: %[[VAL_4:.*]] = arith.constant 1 : index // CHECK: %[[VAL_5:.*]] = sparse_tensor.pointers %[[VAL_0]], %[[VAL_3]] : tensor<16xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref // CHECK: %[[VAL_6:.*]] = sparse_tensor.indices %[[VAL_0]], %[[VAL_3]] : tensor<16xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref // CHECK: %[[VAL_7:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<16xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref @@ -899,71 +899,71 @@ func @sum_reduction(%arga: tensor, %argx: tensor) -> tensor // CHECK: %[[VAL_12:.*]] = memref.alloc() : memref // CHECK: memref.copy %[[VAL_11]], %[[VAL_12]] : memref to memref -// CHECK: %[[VAL_13:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_3]]] : memref -// CHECK: %[[VAL_14:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_4]]] : memref -// CHECK: %[[VAL_15:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_3]]] : memref -// CHECK: %[[VAL_16:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_4]]] : memref -// CHECK: %[[VAL_17:.*]]:2 = scf.while (%[[VAL_18:.*]] = %[[VAL_13]], %[[VAL_19:.*]] = %[[VAL_15]]) : (index, index) -> (index, index) { -// CHECK: %[[VAL_20:.*]] = arith.cmpi ult, %[[VAL_18]], %[[VAL_14]] : index -// CHECK: %[[VAL_21:.*]] = arith.cmpi ult, %[[VAL_19]], %[[VAL_16]] : index -// CHECK: %[[VAL_22:.*]] = arith.andi %[[VAL_20]], %[[VAL_21]] : i1 -// CHECK: scf.condition(%[[VAL_22]]) %[[VAL_18]], %[[VAL_19]] : index, index +// CHECK: %[[VAL_13:.*]] = memref.load %[[VAL_12]][] : memref +// CHECK: %[[VAL_14:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_3]]] : memref +// CHECK: %[[VAL_15:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_4]]] : memref +// CHECK: %[[VAL_16:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_3]]] : memref +// CHECK: %[[VAL_17:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_4]]] : memref +// CHECK: %[[VAL_18:.*]]:3 = scf.while (%[[VAL_19:.*]] = %[[VAL_14]], %[[VAL_20:.*]] = %[[VAL_16]], %[[VAL_21:.*]] = %[[VAL_13]]) : (index, index, f32) -> (index, index, f32) { +// CHECK: %[[VAL_22:.*]] = arith.cmpi ult, %[[VAL_19]], %[[VAL_15]] : index +// CHECK: %[[VAL_23:.*]] = arith.cmpi ult, %[[VAL_20]], %[[VAL_17]] : index +// CHECK: %[[VAL_24:.*]] = arith.andi %[[VAL_22]], %[[VAL_23]] : i1 +// CHECK: scf.condition(%[[VAL_24]]) %[[VAL_19]], %[[VAL_20]], %[[VAL_21]] : index, index, f32 // CHECK: } do { -// CHECK: ^bb0(%[[VAL_23:.*]]: index, %[[VAL_24:.*]]: index): -// CHECK: %[[VAL_25:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_23]]] : memref -// CHECK: %[[VAL_26:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_24]]] : memref -// CHECK: %[[VAL_27:.*]] = arith.cmpi ult, %[[VAL_26]], %[[VAL_25]] : index -// CHECK: %[[VAL_28:.*]] = select %[[VAL_27]], %[[VAL_26]], %[[VAL_25]] : index -// CHECK: %[[VAL_29:.*]] = arith.cmpi eq, %[[VAL_25]], %[[VAL_28]] : index -// CHECK: %[[VAL_30:.*]] = arith.cmpi eq, %[[VAL_26]], %[[VAL_28]] : index -// CHECK: %[[VAL_31:.*]] = arith.andi %[[VAL_29]], %[[VAL_30]] : i1 -// CHECK: scf.if %[[VAL_31]] { -// CHECK: %[[VAL_32:.*]] = memref.load %[[VAL_12]][] : memref -// CHECK: %[[VAL_33:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_23]]] : memref -// CHECK: %[[VAL_34:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_24]]] : memref -// CHECK: %[[VAL_35:.*]] = arith.addf %[[VAL_33]], %[[VAL_34]] : f32 -// CHECK: %[[VAL_36:.*]] = arith.addf %[[VAL_32]], %[[VAL_35]] : f32 -// CHECK: memref.store %[[VAL_36]], %[[VAL_12]][] : memref +// CHECK: ^bb0(%[[VAL_25:.*]]: index, %[[VAL_26:.*]]: index, %[[VAL_27:.*]]: f32): +// CHECK: %[[VAL_28:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_25]]] : memref +// CHECK: %[[VAL_29:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_26]]] : memref +// CHECK: %[[VAL_30:.*]] = arith.cmpi ult, %[[VAL_29]], %[[VAL_28]] : index +// CHECK: %[[VAL_31:.*]] = select %[[VAL_30]], %[[VAL_29]], %[[VAL_28]] : index +// CHECK: %[[VAL_32:.*]] = arith.cmpi eq, %[[VAL_28]], %[[VAL_31]] : index +// CHECK: %[[VAL_33:.*]] = arith.cmpi eq, %[[VAL_29]], %[[VAL_31]] : index +// CHECK: %[[VAL_34:.*]] = arith.andi %[[VAL_32]], %[[VAL_33]] : i1 +// CHECK: %[[VAL_35:.*]] = scf.if %[[VAL_34]] -> (f32) { +// CHECK: %[[VAL_36:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_25]]] : memref +// CHECK: %[[VAL_37:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_26]]] : memref +// CHECK: %[[VAL_38:.*]] = arith.addf %[[VAL_36]], %[[VAL_37]] : f32 +// CHECK: %[[VAL_39:.*]] = arith.addf %[[VAL_27]], %[[VAL_38]] : f32 +// CHECK: scf.yield %[[VAL_39]] : f32 // CHECK: } else { -// CHECK: %[[VAL_37:.*]] = arith.cmpi eq, %[[VAL_25]], %[[VAL_28]] : index -// CHECK: scf.if %[[VAL_37]] { -// CHECK: %[[VAL_38:.*]] = memref.load %[[VAL_12]][] : memref -// CHECK: %[[VAL_39:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_23]]] : memref -// CHECK: %[[VAL_40:.*]] = arith.addf %[[VAL_38]], %[[VAL_39]] : f32 -// CHECK: memref.store %[[VAL_40]], %[[VAL_12]][] : memref +// CHECK: %[[VAL_40:.*]] = arith.cmpi eq, %[[VAL_28]], %[[VAL_31]] : index +// CHECK: %[[VAL_41:.*]] = scf.if %[[VAL_40]] -> (f32) { +// CHECK: %[[VAL_42:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_25]]] : memref +// CHECK: %[[VAL_43:.*]] = arith.addf %[[VAL_27]], %[[VAL_42]] : f32 +// CHECK: scf.yield %[[VAL_43]] : f32 // CHECK: } else { -// CHECK: %[[VAL_41:.*]] = arith.cmpi eq, %[[VAL_26]], %[[VAL_28]] : index -// CHECK: scf.if %[[VAL_41]] { -// CHECK: %[[VAL_42:.*]] = memref.load %[[VAL_12]][] : memref -// CHECK: %[[VAL_43:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_24]]] : memref -// CHECK: %[[VAL_44:.*]] = arith.addf %[[VAL_42]], %[[VAL_43]] : f32 -// CHECK: memref.store %[[VAL_44]], %[[VAL_12]][] : memref +// CHECK: %[[VAL_44:.*]] = arith.cmpi eq, %[[VAL_29]], %[[VAL_31]] : index +// CHECK: %[[VAL_45:.*]] = scf.if %[[VAL_44]] -> (f32) { +// CHECK: %[[VAL_46:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_26]]] : memref +// CHECK: %[[VAL_47:.*]] = arith.addf %[[VAL_27]], %[[VAL_46]] : f32 +// CHECK: scf.yield %[[VAL_47]] : f32 // CHECK: } else { +// CHECK: scf.yield %[[VAL_27]] : f32 // CHECK: } +// CHECK: scf.yield %[[VAL_48:.*]] : f32 // CHECK: } +// CHECK: scf.yield %[[VAL_49:.*]] : f32 // CHECK: } -// CHECK: %[[VAL_45:.*]] = arith.cmpi eq, %[[VAL_25]], %[[VAL_28]] : index -// CHECK: %[[VAL_46:.*]] = arith.addi %[[VAL_23]], %[[VAL_4]] : index -// CHECK: %[[VAL_47:.*]] = select %[[VAL_45]], %[[VAL_46]], %[[VAL_23]] : index -// CHECK: %[[VAL_48:.*]] = arith.cmpi eq, %[[VAL_26]], %[[VAL_28]] : index -// CHECK: %[[VAL_49:.*]] = arith.addi %[[VAL_24]], %[[VAL_4]] : index -// CHECK: %[[VAL_50:.*]] = select %[[VAL_48]], %[[VAL_49]], %[[VAL_24]] : index -// CHECK: scf.yield %[[VAL_47]], %[[VAL_50]] : index, index +// CHECK: %[[VAL_50:.*]] = arith.cmpi eq, %[[VAL_28]], %[[VAL_31]] : index +// CHECK: %[[VAL_51:.*]] = arith.addi %[[VAL_25]], %[[VAL_4]] : index +// CHECK: %[[VAL_52:.*]] = select %[[VAL_50]], %[[VAL_51]], %[[VAL_25]] : index +// CHECK: %[[VAL_53:.*]] = arith.cmpi eq, %[[VAL_29]], %[[VAL_31]] : index +// CHECK: %[[VAL_54:.*]] = arith.addi %[[VAL_26]], %[[VAL_4]] : index +// CHECK: %[[VAL_55:.*]] = select %[[VAL_53]], %[[VAL_54]], %[[VAL_26]] : index +// CHECK: scf.yield %[[VAL_52]], %[[VAL_55]], %[[VAL_56:.*]] : index, index, f32 // CHECK: } -// CHECK: %[[VAL_51:.*]] = memref.load %[[VAL_12]][] : memref -// CHECK: %[[VAL_52:.*]] = scf.for %[[VAL_53:.*]] = %[[VAL_54:.*]]#0 to %[[VAL_14]] step %[[VAL_4]] iter_args(%[[VAL_55:.*]] = %[[VAL_51]]) -> (f32) { -// CHECK: %[[VAL_56:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_53]]] : memref -// CHECK: %[[VAL_57:.*]] = arith.addf %[[VAL_55]], %[[VAL_56]] : f32 -// CHECK: scf.yield %[[VAL_57]] : f32 +// CHECK: %[[VAL_57:.*]] = scf.for %[[VAL_58:.*]] = %[[VAL_59:.*]]#0 to %[[VAL_15]] step %[[VAL_4]] iter_args(%[[VAL_60:.*]] = %[[VAL_59]]#2) -> (f32) { +// CHECK: %[[VAL_61:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_58]]] : memref +// CHECK: %[[VAL_62:.*]] = arith.addf %[[VAL_60]], %[[VAL_61]] : f32 +// CHECK: scf.yield %[[VAL_62]] : f32 // CHECK: } -// CHECK: %[[VAL_58:.*]] = scf.for %[[VAL_59:.*]] = %[[VAL_60:.*]]#1 to %[[VAL_16]] step %[[VAL_4]] iter_args(%[[VAL_61:.*]] = %[[VAL_62:.*]]) -> (f32) { -// CHECK: %[[VAL_63:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_59]]] : memref -// CHECK: %[[VAL_64:.*]] = arith.addf %[[VAL_61]], %[[VAL_63]] : f32 -// CHECK: scf.yield %[[VAL_64]] : f32 +// CHECK: %[[VAL_63:.*]] = scf.for %[[VAL_64:.*]] = %[[VAL_65:.*]]#1 to %[[VAL_17]] step %[[VAL_4]] iter_args(%[[VAL_66:.*]] = %[[VAL_67:.*]]) -> (f32) { +// CHECK: %[[VAL_68:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_64]]] : memref +// CHECK: %[[VAL_69:.*]] = arith.addf %[[VAL_66]], %[[VAL_68]] : f32 +// CHECK: scf.yield %[[VAL_69]] : f32 // CHECK: } -// CHECK: memref.store %[[VAL_65:.*]], %[[VAL_12]][] : memref -// CHECK: %[[VAL_66:.*]] = memref.tensor_load %[[VAL_12]] : memref -// CHECK: return %[[VAL_66]] : tensor +// CHECK: memref.store %[[VAL_70:.*]], %[[VAL_12]][] : memref +// CHECK: %[[VAL_71:.*]] = memref.tensor_load %[[VAL_12]] : memref +// CHECK: return %[[VAL_71]] : tensor // CHECK: } func @sum_reduction_ss(%arga: tensor<16xf32, #SV>, %argb: tensor<16xf32, #SV>, @@ -993,12 +993,12 @@ func @sum_reduction_ss(%arga: tensor<16xf32, #SV>, } // CHECK-LABEL: func @sum_reduction_inv( -// CHECK-SAME: %[[VAL_0:.*0]]: tensor<16xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>>, -// CHECK-SAME: %[[VAL_1:.*1]]: tensor, -// CHECK-SAME: %[[VAL_2:.*2]]: tensor<16xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>>, -// CHECK-SAME: %[[VAL_3:.*3]]: tensor) -> tensor { -// CHECK: %[[VAL_4:.*]] = arith.constant 0 : index -// CHECK: %[[VAL_5:.*]] = arith.constant 1 : index +// CHECK-SAME: %[[VAL_0:.*0]]: tensor<16xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>>, +// CHECK-SAME: %[[VAL_1:.*1]]: tensor, +// CHECK-SAME: %[[VAL_2:.*2]]: tensor<16xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>>, +// CHECK-SAME: %[[VAL_3:.*3]]: tensor) -> tensor { +// CHECK-DAG: %[[VAL_4:.*]] = arith.constant 0 : index +// CHECK-DAG: %[[VAL_5:.*]] = arith.constant 1 : index // CHECK: %[[VAL_6:.*]] = sparse_tensor.pointers %[[VAL_0]], %[[VAL_4]] : tensor<16xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref // CHECK: %[[VAL_7:.*]] = sparse_tensor.indices %[[VAL_0]], %[[VAL_4]] : tensor<16xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref // CHECK: %[[VAL_8:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<16xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref @@ -1009,75 +1009,75 @@ func @sum_reduction_ss(%arga: tensor<16xf32, #SV>, // CHECK: %[[VAL_13:.*]] = memref.buffer_cast %[[VAL_3]] : memref // CHECK: %[[VAL_14:.*]] = memref.alloc() : memref // CHECK: memref.copy %[[VAL_13]], %[[VAL_14]] : memref to memref -// CHECK: %[[VAL_15:.*]] = memref.load %[[VAL_9]][] : memref -// CHECK: %[[VAL_16:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_4]]] : memref -// CHECK: %[[VAL_17:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_5]]] : memref -// CHECK: %[[VAL_18:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_4]]] : memref -// CHECK: %[[VAL_19:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_5]]] : memref -// CHECK: %[[VAL_20:.*]]:2 = scf.while (%[[VAL_21:.*]] = %[[VAL_16]], %[[VAL_22:.*]] = %[[VAL_18]]) : (index, index) -> (index, index) { -// CHECK: %[[VAL_23:.*]] = arith.cmpi ult, %[[VAL_21]], %[[VAL_17]] : index -// CHECK: %[[VAL_24:.*]] = arith.cmpi ult, %[[VAL_22]], %[[VAL_19]] : index -// CHECK: %[[VAL_25:.*]] = arith.andi %[[VAL_23]], %[[VAL_24]] : i1 -// CHECK: scf.condition(%[[VAL_25]]) %[[VAL_21]], %[[VAL_22]] : index, index +// CHECK: %[[VAL_15:.*]] = memref.load %[[VAL_14]][] : memref +// CHECK: %[[VAL_16:.*]] = memref.load %[[VAL_9]][] : memref +// CHECK: %[[VAL_17:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_4]]] : memref +// CHECK: %[[VAL_18:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_5]]] : memref +// CHECK: %[[VAL_19:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_4]]] : memref +// CHECK: %[[VAL_20:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_5]]] : memref +// CHECK: %[[VAL_21:.*]]:3 = scf.while (%[[VAL_22:.*]] = %[[VAL_17]], %[[VAL_23:.*]] = %[[VAL_19]], %[[VAL_24:.*]] = %[[VAL_15]]) : (index, index, f32) -> (index, index, f32) { +// CHECK: %[[VAL_25:.*]] = arith.cmpi ult, %[[VAL_22]], %[[VAL_18]] : index +// CHECK: %[[VAL_26:.*]] = arith.cmpi ult, %[[VAL_23]], %[[VAL_20]] : index +// CHECK: %[[VAL_27:.*]] = arith.andi %[[VAL_25]], %[[VAL_26]] : i1 +// CHECK: scf.condition(%[[VAL_27]]) %[[VAL_22]], %[[VAL_23]], %[[VAL_24]] : index, index, f32 // CHECK: } do { -// CHECK: ^bb0(%[[VAL_26:.*]]: index, %[[VAL_27:.*]]: index): -// CHECK: %[[VAL_28:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_26]]] : memref -// CHECK: %[[VAL_29:.*]] = memref.load %[[VAL_11]]{{\[}}%[[VAL_27]]] : memref -// CHECK: %[[VAL_30:.*]] = arith.cmpi ult, %[[VAL_29]], %[[VAL_28]] : index -// CHECK: %[[VAL_31:.*]] = select %[[VAL_30]], %[[VAL_29]], %[[VAL_28]] : index -// CHECK: %[[VAL_32:.*]] = arith.cmpi eq, %[[VAL_28]], %[[VAL_31]] : index -// CHECK: %[[VAL_33:.*]] = arith.cmpi eq, %[[VAL_29]], %[[VAL_31]] : index -// CHECK: %[[VAL_34:.*]] = arith.andi %[[VAL_32]], %[[VAL_33]] : i1 -// CHECK: scf.if %[[VAL_34]] { -// CHECK: %[[VAL_35:.*]] = memref.load %[[VAL_14]][] : memref -// CHECK: %[[VAL_36:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_26]]] : memref -// CHECK: %[[VAL_37:.*]] = arith.mulf %[[VAL_36]], %[[VAL_15]] : f32 -// CHECK: %[[VAL_38:.*]] = memref.load %[[VAL_12]]{{\[}}%[[VAL_27]]] : memref -// CHECK: %[[VAL_39:.*]] = arith.addf %[[VAL_37]], %[[VAL_38]] : f32 -// CHECK: %[[VAL_40:.*]] = arith.addf %[[VAL_35]], %[[VAL_39]] : f32 -// CHECK: memref.store %[[VAL_40]], %[[VAL_14]][] : memref +// CHECK: ^bb0(%[[VAL_28:.*]]: index, %[[VAL_29:.*]]: index, %[[VAL_30:.*]]: f32): +// CHECK: %[[VAL_31:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_28]]] : memref +// CHECK: %[[VAL_32:.*]] = memref.load %[[VAL_11]]{{\[}}%[[VAL_29]]] : memref +// CHECK: %[[VAL_33:.*]] = arith.cmpi ult, %[[VAL_32]], %[[VAL_31]] : index +// CHECK: %[[VAL_34:.*]] = select %[[VAL_33]], %[[VAL_32]], %[[VAL_31]] : index +// CHECK: %[[VAL_35:.*]] = arith.cmpi eq, %[[VAL_31]], %[[VAL_34]] : index +// CHECK: %[[VAL_36:.*]] = arith.cmpi eq, %[[VAL_32]], %[[VAL_34]] : index +// CHECK: %[[VAL_37:.*]] = arith.andi %[[VAL_35]], %[[VAL_36]] : i1 +// CHECK: %[[VAL_38:.*]] = scf.if %[[VAL_37]] -> (f32) { +// CHECK: %[[VAL_39:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_28]]] : memref +// CHECK: %[[VAL_40:.*]] = arith.mulf %[[VAL_39]], %[[VAL_16]] : f32 +// CHECK: %[[VAL_41:.*]] = memref.load %[[VAL_12]]{{\[}}%[[VAL_29]]] : memref +// CHECK: %[[VAL_42:.*]] = arith.addf %[[VAL_40]], %[[VAL_41]] : f32 +// CHECK: %[[VAL_43:.*]] = arith.addf %[[VAL_30]], %[[VAL_42]] : f32 +// CHECK: scf.yield %[[VAL_43]] : f32 // CHECK: } else { -// CHECK: %[[VAL_41:.*]] = arith.cmpi eq, %[[VAL_28]], %[[VAL_31]] : index -// CHECK: scf.if %[[VAL_41]] { -// CHECK: %[[VAL_42:.*]] = memref.load %[[VAL_14]][] : memref -// CHECK: %[[VAL_43:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_26]]] : memref -// CHECK: %[[VAL_44:.*]] = arith.mulf %[[VAL_43]], %[[VAL_15]] : f32 -// CHECK: %[[VAL_45:.*]] = arith.addf %[[VAL_42]], %[[VAL_44]] : f32 -// CHECK: memref.store %[[VAL_45]], %[[VAL_14]][] : memref +// CHECK: %[[VAL_44:.*]] = arith.cmpi eq, %[[VAL_31]], %[[VAL_34]] : index +// CHECK: %[[VAL_45:.*]] = scf.if %[[VAL_44]] -> (f32) { +// CHECK: %[[VAL_46:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_28]]] : memref +// CHECK: %[[VAL_47:.*]] = arith.mulf %[[VAL_46]], %[[VAL_16]] : f32 +// CHECK: %[[VAL_48:.*]] = arith.addf %[[VAL_30]], %[[VAL_47]] : f32 +// CHECK: scf.yield %[[VAL_48]] : f32 // CHECK: } else { -// CHECK: %[[VAL_46:.*]] = arith.cmpi eq, %[[VAL_29]], %[[VAL_31]] : index -// CHECK: scf.if %[[VAL_46]] { -// CHECK: %[[VAL_47:.*]] = memref.load %[[VAL_14]][] : memref -// CHECK: %[[VAL_48:.*]] = memref.load %[[VAL_12]]{{\[}}%[[VAL_27]]] : memref -// CHECK: %[[VAL_49:.*]] = arith.addf %[[VAL_47]], %[[VAL_48]] : f32 -// CHECK: memref.store %[[VAL_49]], %[[VAL_14]][] : memref +// CHECK: %[[VAL_49:.*]] = arith.cmpi eq, %[[VAL_32]], %[[VAL_34]] : index +// CHECK: %[[VAL_50:.*]] = scf.if %[[VAL_49]] -> (f32) { +// CHECK: %[[VAL_51:.*]] = memref.load %[[VAL_12]]{{\[}}%[[VAL_29]]] : memref +// CHECK: %[[VAL_52:.*]] = arith.addf %[[VAL_30]], %[[VAL_51]] : f32 +// CHECK: scf.yield %[[VAL_52]] : f32 // CHECK: } else { +// CHECK: scf.yield %[[VAL_30]] : f32 // CHECK: } +// CHECK: scf.yield %[[VAL_53:.*]] : f32 // CHECK: } +// CHECK: scf.yield %[[VAL_54:.*]] : f32 // CHECK: } -// CHECK: %[[VAL_50:.*]] = arith.cmpi eq, %[[VAL_28]], %[[VAL_31]] : index -// CHECK: %[[VAL_51:.*]] = arith.addi %[[VAL_26]], %[[VAL_5]] : index -// CHECK: %[[VAL_52:.*]] = select %[[VAL_50]], %[[VAL_51]], %[[VAL_26]] : index -// CHECK: %[[VAL_53:.*]] = arith.cmpi eq, %[[VAL_29]], %[[VAL_31]] : index -// CHECK: %[[VAL_54:.*]] = arith.addi %[[VAL_27]], %[[VAL_5]] : index -// CHECK: %[[VAL_55:.*]] = select %[[VAL_53]], %[[VAL_54]], %[[VAL_27]] : index -// CHECK: scf.yield %[[VAL_52]], %[[VAL_55]] : index, index +// CHECK: %[[VAL_55:.*]] = arith.cmpi eq, %[[VAL_31]], %[[VAL_34]] : index +// CHECK: %[[VAL_56:.*]] = arith.addi %[[VAL_28]], %[[VAL_5]] : index +// CHECK: %[[VAL_57:.*]] = select %[[VAL_55]], %[[VAL_56]], %[[VAL_28]] : index +// CHECK: %[[VAL_58:.*]] = arith.cmpi eq, %[[VAL_32]], %[[VAL_34]] : index +// CHECK: %[[VAL_59:.*]] = arith.addi %[[VAL_29]], %[[VAL_5]] : index +// CHECK: %[[VAL_60:.*]] = select %[[VAL_58]], %[[VAL_59]], %[[VAL_29]] : index +// CHECK: scf.yield %[[VAL_57]], %[[VAL_60]], %[[VAL_61:.*]] : index, index, f32 // CHECK: } -// CHECK: %[[VAL_56:.*]] = memref.load %[[VAL_14]][] : memref -// CHECK: %[[VAL_57:.*]] = scf.for %[[VAL_58:.*]] = %[[VAL_59:.*]]#0 to %[[VAL_17]] step %[[VAL_5]] iter_args(%[[VAL_60:.*]] = %[[VAL_56]]) -> (f32) { -// CHECK: %[[VAL_61:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_58]]] : memref -// CHECK: %[[VAL_62:.*]] = arith.mulf %[[VAL_61]], %[[VAL_15]] : f32 -// CHECK: %[[VAL_63:.*]] = arith.addf %[[VAL_60]], %[[VAL_62]] : f32 -// CHECK: scf.yield %[[VAL_63]] : f32 +// CHECK: %[[VAL_62:.*]] = scf.for %[[VAL_63:.*]] = %[[VAL_64:.*]]#0 to %[[VAL_18]] step %[[VAL_5]] iter_args(%[[VAL_65:.*]] = %[[VAL_64]]#2) -> (f32) { +// CHECK: %[[VAL_66:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_63]]] : memref +// CHECK: %[[VAL_67:.*]] = arith.mulf %[[VAL_66]], %[[VAL_16]] : f32 +// CHECK: %[[VAL_68:.*]] = arith.addf %[[VAL_65]], %[[VAL_67]] : f32 +// CHECK: scf.yield %[[VAL_68]] : f32 // CHECK: } -// CHECK: %[[VAL_64:.*]] = scf.for %[[VAL_65:.*]] = %[[VAL_66:.*]]#1 to %[[VAL_19]] step %[[VAL_5]] iter_args(%[[VAL_67:.*]] = %[[VAL_68:.*]]) -> (f32) { -// CHECK: %[[VAL_69:.*]] = memref.load %[[VAL_12]]{{\[}}%[[VAL_65]]] : memref -// CHECK: %[[VAL_70:.*]] = arith.addf %[[VAL_67]], %[[VAL_69]] : f32 -// CHECK: scf.yield %[[VAL_70]] : f32 +// CHECK: %[[VAL_69:.*]] = scf.for %[[VAL_70:.*]] = %[[VAL_71:.*]]#1 to %[[VAL_20]] step %[[VAL_5]] iter_args(%[[VAL_72:.*]] = %[[VAL_73:.*]]) -> (f32) { +// CHECK: %[[VAL_74:.*]] = memref.load %[[VAL_12]]{{\[}}%[[VAL_70]]] : memref +// CHECK: %[[VAL_75:.*]] = arith.addf %[[VAL_72]], %[[VAL_74]] : f32 +// CHECK: scf.yield %[[VAL_75]] : f32 // CHECK: } -// CHECK: memref.store %[[VAL_71:.*]], %[[VAL_14]][] : memref -// CHECK: %[[VAL_72:.*]] = memref.tensor_load %[[VAL_14]] : memref -// CHECK: return %[[VAL_72]] : tensor +// CHECK: memref.store %[[VAL_76:.*]], %[[VAL_14]][] : memref +// CHECK: %[[VAL_77:.*]] = memref.tensor_load %[[VAL_14]] : memref +// CHECK: return %[[VAL_77]] : tensor // CHECK: } func @sum_reduction_inv(%arga: tensor<16xf32, #SV>, %argb: tensor, @@ -1289,12 +1289,12 @@ func @four_tensors_op(%arga: tensor, } // CHECK-LABEL: func @red3s( -// CHECK-SAME: %[[VAL_0:.*0]]: tensor>, -// CHECK-SAME: %[[VAL_1:.*1]]: tensor>, -// CHECK-SAME: %[[VAL_2:.*2]]: tensor>, -// CHECK-SAME: %[[VAL_3:.*3]]: tensor) -> tensor { -// CHECK: %[[VAL_4:.*]] = arith.constant 0 : index -// CHECK: %[[VAL_5:.*]] = arith.constant 1 : index +// CHECK-SAME: %[[VAL_0:.*0]]: tensor>, +// CHECK-SAME: %[[VAL_1:.*1]]: tensor>, +// CHECK-SAME: %[[VAL_2:.*2]]: tensor>, +// CHECK-SAME: %[[VAL_3:.*3]]: tensor) -> tensor { +// CHECK-DAG: %[[VAL_4:.*]] = arith.constant 0 : index +// CHECK-DAG: %[[VAL_5:.*]] = arith.constant 1 : index // CHECK: %[[VAL_6:.*]] = sparse_tensor.pointers %[[VAL_0]], %[[VAL_4]] : tensor> to memref // CHECK: %[[VAL_7:.*]] = sparse_tensor.indices %[[VAL_0]], %[[VAL_4]] : tensor> to memref // CHECK: %[[VAL_8:.*]] = sparse_tensor.values %[[VAL_0]] : tensor> to memref @@ -1307,277 +1307,275 @@ func @four_tensors_op(%arga: tensor, // CHECK: %[[VAL_15:.*]] = memref.buffer_cast %[[VAL_3]] : memref // CHECK: %[[VAL_16:.*]] = memref.alloc() : memref // CHECK: memref.copy %[[VAL_15]], %[[VAL_16]] : memref to memref -// CHECK: %[[VAL_17:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_4]]] : memref -// CHECK: %[[VAL_18:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_5]]] : memref -// CHECK: %[[VAL_19:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_4]]] : memref -// CHECK: %[[VAL_20:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_5]]] : memref -// CHECK: %[[VAL_21:.*]] = memref.load %[[VAL_12]]{{\[}}%[[VAL_4]]] : memref -// CHECK: %[[VAL_22:.*]] = memref.load %[[VAL_12]]{{\[}}%[[VAL_5]]] : memref -// CHECK: %[[VAL_23:.*]]:3 = scf.while (%[[VAL_24:.*]] = %[[VAL_17]], %[[VAL_25:.*]] = %[[VAL_19]], %[[VAL_26:.*]] = %[[VAL_21]]) : (index, index, index) -> (index, index, index) { -// CHECK: %[[VAL_27:.*]] = arith.cmpi ult, %[[VAL_24]], %[[VAL_18]] : index -// CHECK: %[[VAL_28:.*]] = arith.cmpi ult, %[[VAL_25]], %[[VAL_20]] : index -// CHECK: %[[VAL_29:.*]] = arith.andi %[[VAL_27]], %[[VAL_28]] : i1 -// CHECK: %[[VAL_30:.*]] = arith.cmpi ult, %[[VAL_26]], %[[VAL_22]] : index +// CHECK: %[[VAL_17:.*]] = memref.load %[[VAL_16]][] : memref +// CHECK: %[[VAL_18:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_4]]] : memref +// CHECK: %[[VAL_19:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_5]]] : memref +// CHECK: %[[VAL_20:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_4]]] : memref +// CHECK: %[[VAL_21:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_5]]] : memref +// CHECK: %[[VAL_22:.*]] = memref.load %[[VAL_12]]{{\[}}%[[VAL_4]]] : memref +// CHECK: %[[VAL_23:.*]] = memref.load %[[VAL_12]]{{\[}}%[[VAL_5]]] : memref +// CHECK: %[[VAL_24:.*]]:4 = scf.while (%[[VAL_25:.*]] = %[[VAL_18]], %[[VAL_26:.*]] = %[[VAL_20]], %[[VAL_27:.*]] = %[[VAL_22]], %[[VAL_28:.*]] = %[[VAL_17]]) : (index, index, index, f64) -> (index, index, index, f64) { +// CHECK: %[[VAL_29:.*]] = arith.cmpi ult, %[[VAL_25]], %[[VAL_19]] : index +// CHECK: %[[VAL_30:.*]] = arith.cmpi ult, %[[VAL_26]], %[[VAL_21]] : index // CHECK: %[[VAL_31:.*]] = arith.andi %[[VAL_29]], %[[VAL_30]] : i1 -// CHECK: scf.condition(%[[VAL_31]]) %[[VAL_24]], %[[VAL_25]], %[[VAL_26]] : index, index, index +// CHECK: %[[VAL_32:.*]] = arith.cmpi ult, %[[VAL_27]], %[[VAL_23]] : index +// CHECK: %[[VAL_33:.*]] = arith.andi %[[VAL_31]], %[[VAL_32]] : i1 +// CHECK: scf.condition(%[[VAL_33]]) %[[VAL_25]], %[[VAL_26]], %[[VAL_27]], %[[VAL_28]] : index, index, index, f64 // CHECK: } do { -// CHECK: ^bb0(%[[VAL_32:.*]]: index, %[[VAL_33:.*]]: index, %[[VAL_34:.*]]: index): -// CHECK: %[[VAL_35:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_32]]] : memref -// CHECK: %[[VAL_36:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_33]]] : memref -// CHECK: %[[VAL_37:.*]] = arith.cmpi ult, %[[VAL_36]], %[[VAL_35]] : index -// CHECK: %[[VAL_38:.*]] = select %[[VAL_37]], %[[VAL_36]], %[[VAL_35]] : index -// CHECK: %[[VAL_39:.*]] = memref.load %[[VAL_13]]{{\[}}%[[VAL_34]]] : memref +// CHECK: ^bb0(%[[VAL_34:.*]]: index, %[[VAL_35:.*]]: index, %[[VAL_36:.*]]: index, %[[VAL_37:.*]]: f64): +// CHECK: %[[VAL_38:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_34]]] : memref +// CHECK: %[[VAL_39:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_35]]] : memref // CHECK: %[[VAL_40:.*]] = arith.cmpi ult, %[[VAL_39]], %[[VAL_38]] : index // CHECK: %[[VAL_41:.*]] = select %[[VAL_40]], %[[VAL_39]], %[[VAL_38]] : index -// CHECK: %[[VAL_42:.*]] = arith.cmpi eq, %[[VAL_35]], %[[VAL_41]] : index -// CHECK: %[[VAL_43:.*]] = arith.cmpi eq, %[[VAL_36]], %[[VAL_41]] : index -// CHECK: %[[VAL_44:.*]] = arith.andi %[[VAL_42]], %[[VAL_43]] : i1 -// CHECK: %[[VAL_45:.*]] = arith.cmpi eq, %[[VAL_39]], %[[VAL_41]] : index -// CHECK: %[[VAL_46:.*]] = arith.andi %[[VAL_44]], %[[VAL_45]] : i1 -// CHECK: scf.if %[[VAL_46]] { -// CHECK: %[[VAL_47:.*]] = memref.load %[[VAL_16]][] : memref -// CHECK: %[[VAL_48:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_32]]] : memref -// CHECK: %[[VAL_49:.*]] = arith.addf %[[VAL_47]], %[[VAL_48]] : f64 -// CHECK: %[[VAL_50:.*]] = memref.load %[[VAL_11]]{{\[}}%[[VAL_33]]] : memref -// CHECK: %[[VAL_51:.*]] = arith.addf %[[VAL_49]], %[[VAL_50]] : f64 -// CHECK: %[[VAL_52:.*]] = memref.load %[[VAL_14]]{{\[}}%[[VAL_34]]] : memref -// CHECK: %[[VAL_53:.*]] = arith.addf %[[VAL_51]], %[[VAL_52]] : f64 -// CHECK: memref.store %[[VAL_53]], %[[VAL_16]][] : memref +// CHECK: %[[VAL_42:.*]] = memref.load %[[VAL_13]]{{\[}}%[[VAL_36]]] : memref +// CHECK: %[[VAL_43:.*]] = arith.cmpi ult, %[[VAL_42]], %[[VAL_41]] : index +// CHECK: %[[VAL_44:.*]] = select %[[VAL_43]], %[[VAL_42]], %[[VAL_41]] : index +// CHECK: %[[VAL_45:.*]] = arith.cmpi eq, %[[VAL_38]], %[[VAL_44]] : index +// CHECK: %[[VAL_46:.*]] = arith.cmpi eq, %[[VAL_39]], %[[VAL_44]] : index +// CHECK: %[[VAL_47:.*]] = arith.andi %[[VAL_45]], %[[VAL_46]] : i1 +// CHECK: %[[VAL_48:.*]] = arith.cmpi eq, %[[VAL_42]], %[[VAL_44]] : index +// CHECK: %[[VAL_49:.*]] = arith.andi %[[VAL_47]], %[[VAL_48]] : i1 +// CHECK: %[[VAL_50:.*]] = scf.if %[[VAL_49]] -> (f64) { +// CHECK: %[[VAL_51:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_34]]] : memref +// CHECK: %[[VAL_52:.*]] = arith.addf %[[VAL_37]], %[[VAL_51]] : f64 +// CHECK: %[[VAL_53:.*]] = memref.load %[[VAL_11]]{{\[}}%[[VAL_35]]] : memref +// CHECK: %[[VAL_54:.*]] = arith.addf %[[VAL_52]], %[[VAL_53]] : f64 +// CHECK: %[[VAL_55:.*]] = memref.load %[[VAL_14]]{{\[}}%[[VAL_36]]] : memref +// CHECK: %[[VAL_56:.*]] = arith.addf %[[VAL_54]], %[[VAL_55]] : f64 +// CHECK: scf.yield %[[VAL_56]] : f64 // CHECK: } else { -// CHECK: %[[VAL_54:.*]] = arith.cmpi eq, %[[VAL_36]], %[[VAL_41]] : index -// CHECK: %[[VAL_55:.*]] = arith.cmpi eq, %[[VAL_39]], %[[VAL_41]] : index -// CHECK: %[[VAL_56:.*]] = arith.andi %[[VAL_54]], %[[VAL_55]] : i1 -// CHECK: scf.if %[[VAL_56]] { -// CHECK: %[[VAL_57:.*]] = memref.load %[[VAL_16]][] : memref -// CHECK: %[[VAL_58:.*]] = memref.load %[[VAL_11]]{{\[}}%[[VAL_33]]] : memref -// CHECK: %[[VAL_59:.*]] = arith.addf %[[VAL_57]], %[[VAL_58]] : f64 -// CHECK: %[[VAL_60:.*]] = memref.load %[[VAL_14]]{{\[}}%[[VAL_34]]] : memref -// CHECK: %[[VAL_61:.*]] = arith.addf %[[VAL_59]], %[[VAL_60]] : f64 -// CHECK: memref.store %[[VAL_61]], %[[VAL_16]][] : memref +// CHECK: %[[VAL_57:.*]] = arith.cmpi eq, %[[VAL_39]], %[[VAL_44]] : index +// CHECK: %[[VAL_58:.*]] = arith.cmpi eq, %[[VAL_42]], %[[VAL_44]] : index +// CHECK: %[[VAL_59:.*]] = arith.andi %[[VAL_57]], %[[VAL_58]] : i1 +// CHECK: %[[VAL_60:.*]] = scf.if %[[VAL_59]] -> (f64) { +// CHECK: %[[VAL_61:.*]] = memref.load %[[VAL_11]]{{\[}}%[[VAL_35]]] : memref +// CHECK: %[[VAL_62:.*]] = arith.addf %[[VAL_37]], %[[VAL_61]] : f64 +// CHECK: %[[VAL_63:.*]] = memref.load %[[VAL_14]]{{\[}}%[[VAL_36]]] : memref +// CHECK: %[[VAL_64:.*]] = arith.addf %[[VAL_62]], %[[VAL_63]] : f64 +// CHECK: scf.yield %[[VAL_64]] : f64 // CHECK: } else { -// CHECK: %[[VAL_62:.*]] = arith.cmpi eq, %[[VAL_35]], %[[VAL_41]] : index -// CHECK: %[[VAL_63:.*]] = arith.cmpi eq, %[[VAL_39]], %[[VAL_41]] : index -// CHECK: %[[VAL_64:.*]] = arith.andi %[[VAL_62]], %[[VAL_63]] : i1 -// CHECK: scf.if %[[VAL_64]] { -// CHECK: %[[VAL_65:.*]] = memref.load %[[VAL_16]][] : memref -// CHECK: %[[VAL_66:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_32]]] : memref -// CHECK: %[[VAL_67:.*]] = arith.addf %[[VAL_65]], %[[VAL_66]] : f64 -// CHECK: %[[VAL_68:.*]] = memref.load %[[VAL_14]]{{\[}}%[[VAL_34]]] : memref -// CHECK: %[[VAL_69:.*]] = arith.addf %[[VAL_67]], %[[VAL_68]] : f64 -// CHECK: memref.store %[[VAL_69]], %[[VAL_16]][] : memref +// CHECK: %[[VAL_65:.*]] = arith.cmpi eq, %[[VAL_38]], %[[VAL_44]] : index +// CHECK: %[[VAL_66:.*]] = arith.cmpi eq, %[[VAL_42]], %[[VAL_44]] : index +// CHECK: %[[VAL_67:.*]] = arith.andi %[[VAL_65]], %[[VAL_66]] : i1 +// CHECK: %[[VAL_68:.*]] = scf.if %[[VAL_67]] -> (f64) { +// CHECK: %[[VAL_69:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_34]]] : memref +// CHECK: %[[VAL_70:.*]] = arith.addf %[[VAL_37]], %[[VAL_69]] : f64 +// CHECK: %[[VAL_71:.*]] = memref.load %[[VAL_14]]{{\[}}%[[VAL_36]]] : memref +// CHECK: %[[VAL_72:.*]] = arith.addf %[[VAL_70]], %[[VAL_71]] : f64 +// CHECK: scf.yield %[[VAL_72]] : f64 // CHECK: } else { -// CHECK: %[[VAL_70:.*]] = arith.cmpi eq, %[[VAL_39]], %[[VAL_41]] : index -// CHECK: scf.if %[[VAL_70]] { -// CHECK: %[[VAL_71:.*]] = memref.load %[[VAL_16]][] : memref -// CHECK: %[[VAL_72:.*]] = memref.load %[[VAL_14]]{{\[}}%[[VAL_34]]] : memref -// CHECK: %[[VAL_73:.*]] = arith.addf %[[VAL_71]], %[[VAL_72]] : f64 -// CHECK: memref.store %[[VAL_73]], %[[VAL_16]][] : memref +// CHECK: %[[VAL_73:.*]] = arith.cmpi eq, %[[VAL_42]], %[[VAL_44]] : index +// CHECK: %[[VAL_74:.*]] = scf.if %[[VAL_73]] -> (f64) { +// CHECK: %[[VAL_75:.*]] = memref.load %[[VAL_14]]{{\[}}%[[VAL_36]]] : memref +// CHECK: %[[VAL_76:.*]] = arith.addf %[[VAL_37]], %[[VAL_75]] : f64 +// CHECK: scf.yield %[[VAL_76]] : f64 // CHECK: } else { -// CHECK: %[[VAL_74:.*]] = arith.cmpi eq, %[[VAL_35]], %[[VAL_41]] : index -// CHECK: %[[VAL_75:.*]] = arith.cmpi eq, %[[VAL_36]], %[[VAL_41]] : index -// CHECK: %[[VAL_76:.*]] = arith.andi %[[VAL_74]], %[[VAL_75]] : i1 -// CHECK: scf.if %[[VAL_76]] { -// CHECK: %[[VAL_77:.*]] = memref.load %[[VAL_16]][] : memref -// CHECK: %[[VAL_78:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_32]]] : memref -// CHECK: %[[VAL_79:.*]] = arith.addf %[[VAL_77]], %[[VAL_78]] : f64 -// CHECK: %[[VAL_80:.*]] = memref.load %[[VAL_11]]{{\[}}%[[VAL_33]]] : memref -// CHECK: %[[VAL_81:.*]] = arith.addf %[[VAL_79]], %[[VAL_80]] : f64 -// CHECK: memref.store %[[VAL_81]], %[[VAL_16]][] : memref +// CHECK: %[[VAL_77:.*]] = arith.cmpi eq, %[[VAL_38]], %[[VAL_44]] : index +// CHECK: %[[VAL_78:.*]] = arith.cmpi eq, %[[VAL_39]], %[[VAL_44]] : index +// CHECK: %[[VAL_79:.*]] = arith.andi %[[VAL_77]], %[[VAL_78]] : i1 +// CHECK: %[[VAL_80:.*]] = scf.if %[[VAL_79]] -> (f64) { +// CHECK: %[[VAL_81:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_34]]] : memref +// CHECK: %[[VAL_82:.*]] = arith.addf %[[VAL_37]], %[[VAL_81]] : f64 +// CHECK: %[[VAL_83:.*]] = memref.load %[[VAL_11]]{{\[}}%[[VAL_35]]] : memref +// CHECK: %[[VAL_84:.*]] = arith.addf %[[VAL_82]], %[[VAL_83]] : f64 +// CHECK: scf.yield %[[VAL_84]] : f64 // CHECK: } else { -// CHECK: %[[VAL_82:.*]] = arith.cmpi eq, %[[VAL_36]], %[[VAL_41]] : index -// CHECK: scf.if %[[VAL_82]] { -// CHECK: %[[VAL_83:.*]] = memref.load %[[VAL_16]][] : memref -// CHECK: %[[VAL_84:.*]] = memref.load %[[VAL_11]]{{\[}}%[[VAL_33]]] : memref -// CHECK: %[[VAL_85:.*]] = arith.addf %[[VAL_83]], %[[VAL_84]] : f64 -// CHECK: memref.store %[[VAL_85]], %[[VAL_16]][] : memref +// CHECK: %[[VAL_85:.*]] = arith.cmpi eq, %[[VAL_39]], %[[VAL_44]] : index +// CHECK: %[[VAL_86:.*]] = scf.if %[[VAL_85]] -> (f64) { +// CHECK: %[[VAL_87:.*]] = memref.load %[[VAL_11]]{{\[}}%[[VAL_35]]] : memref +// CHECK: %[[VAL_88:.*]] = arith.addf %[[VAL_37]], %[[VAL_87]] : f64 +// CHECK: scf.yield %[[VAL_88]] : f64 // CHECK: } else { -// CHECK: %[[VAL_86:.*]] = arith.cmpi eq, %[[VAL_35]], %[[VAL_41]] : index -// CHECK: scf.if %[[VAL_86]] { -// CHECK: %[[VAL_87:.*]] = memref.load %[[VAL_16]][] : memref -// CHECK: %[[VAL_88:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_32]]] : memref -// CHECK: %[[VAL_89:.*]] = arith.addf %[[VAL_87]], %[[VAL_88]] : f64 -// CHECK: memref.store %[[VAL_89]], %[[VAL_16]][] : memref +// CHECK: %[[VAL_89:.*]] = arith.cmpi eq, %[[VAL_38]], %[[VAL_44]] : index +// CHECK: %[[VAL_90:.*]] = scf.if %[[VAL_89]] -> (f64) { +// CHECK: %[[VAL_91:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_34]]] : memref +// CHECK: %[[VAL_92:.*]] = arith.addf %[[VAL_37]], %[[VAL_91]] : f64 +// CHECK: scf.yield %[[VAL_92]] : f64 // CHECK: } else { +// CHECK: scf.yield %[[VAL_37]] : f64 // CHECK: } +// CHECK: scf.yield %[[VAL_93:.*]] : f64 // CHECK: } +// CHECK: scf.yield %[[VAL_94:.*]] : f64 // CHECK: } +// CHECK: scf.yield %[[VAL_95:.*]] : f64 // CHECK: } +// CHECK: scf.yield %[[VAL_96:.*]] : f64 // CHECK: } +// CHECK: scf.yield %[[VAL_97:.*]] : f64 // CHECK: } +// CHECK: scf.yield %[[VAL_98:.*]] : f64 // CHECK: } -// CHECK: %[[VAL_90:.*]] = arith.cmpi eq, %[[VAL_35]], %[[VAL_41]] : index -// CHECK: %[[VAL_91:.*]] = arith.addi %[[VAL_32]], %[[VAL_5]] : index -// CHECK: %[[VAL_92:.*]] = select %[[VAL_90]], %[[VAL_91]], %[[VAL_32]] : index -// CHECK: %[[VAL_93:.*]] = arith.cmpi eq, %[[VAL_36]], %[[VAL_41]] : index -// CHECK: %[[VAL_94:.*]] = arith.addi %[[VAL_33]], %[[VAL_5]] : index -// CHECK: %[[VAL_95:.*]] = select %[[VAL_93]], %[[VAL_94]], %[[VAL_33]] : index -// CHECK: %[[VAL_96:.*]] = arith.cmpi eq, %[[VAL_39]], %[[VAL_41]] : index -// CHECK: %[[VAL_97:.*]] = arith.addi %[[VAL_34]], %[[VAL_5]] : index -// CHECK: %[[VAL_98:.*]] = select %[[VAL_96]], %[[VAL_97]], %[[VAL_34]] : index -// CHECK: scf.yield %[[VAL_92]], %[[VAL_95]], %[[VAL_98]] : index, index, index +// CHECK: %[[VAL_99:.*]] = arith.cmpi eq, %[[VAL_38]], %[[VAL_44]] : index +// CHECK: %[[VAL_100:.*]] = arith.addi %[[VAL_34]], %[[VAL_5]] : index +// CHECK: %[[VAL_101:.*]] = select %[[VAL_99]], %[[VAL_100]], %[[VAL_34]] : index +// CHECK: %[[VAL_102:.*]] = arith.cmpi eq, %[[VAL_39]], %[[VAL_44]] : index +// CHECK: %[[VAL_103:.*]] = arith.addi %[[VAL_35]], %[[VAL_5]] : index +// CHECK: %[[VAL_104:.*]] = select %[[VAL_102]], %[[VAL_103]], %[[VAL_35]] : index +// CHECK: %[[VAL_105:.*]] = arith.cmpi eq, %[[VAL_42]], %[[VAL_44]] : index +// CHECK: %[[VAL_106:.*]] = arith.addi %[[VAL_36]], %[[VAL_5]] : index +// CHECK: %[[VAL_107:.*]] = select %[[VAL_105]], %[[VAL_106]], %[[VAL_36]] : index +// CHECK: scf.yield %[[VAL_101]], %[[VAL_104]], %[[VAL_107]], %[[VAL_108:.*]] : index, index, index, f64 // CHECK: } -// CHECK: %[[VAL_99:.*]]:2 = scf.while (%[[VAL_100:.*]] = %[[VAL_101:.*]]#1, %[[VAL_102:.*]] = %[[VAL_101]]#2) : (index, index) -> (index, index) { -// CHECK: %[[VAL_103:.*]] = arith.cmpi ult, %[[VAL_100]], %[[VAL_20]] : index -// CHECK: %[[VAL_104:.*]] = arith.cmpi ult, %[[VAL_102]], %[[VAL_22]] : index -// CHECK: %[[VAL_105:.*]] = arith.andi %[[VAL_103]], %[[VAL_104]] : i1 -// CHECK: scf.condition(%[[VAL_105]]) %[[VAL_100]], %[[VAL_102]] : index, index +// CHECK: %[[VAL_109:.*]]:3 = scf.while (%[[VAL_110:.*]] = %[[VAL_111:.*]]#1, %[[VAL_112:.*]] = %[[VAL_111]]#2, %[[VAL_113:.*]] = %[[VAL_111]]#3) : (index, index, f64) -> (index, index, f64) { +// CHECK: %[[VAL_114:.*]] = arith.cmpi ult, %[[VAL_110]], %[[VAL_21]] : index +// CHECK: %[[VAL_115:.*]] = arith.cmpi ult, %[[VAL_112]], %[[VAL_23]] : index +// CHECK: %[[VAL_116:.*]] = arith.andi %[[VAL_114]], %[[VAL_115]] : i1 +// CHECK: scf.condition(%[[VAL_116]]) %[[VAL_110]], %[[VAL_112]], %[[VAL_113]] : index, index, f64 // CHECK: } do { -// CHECK: ^bb0(%[[VAL_106:.*]]: index, %[[VAL_107:.*]]: index): -// CHECK: %[[VAL_108:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_106]]] : memref -// CHECK: %[[VAL_109:.*]] = memref.load %[[VAL_13]]{{\[}}%[[VAL_107]]] : memref -// CHECK: %[[VAL_110:.*]] = arith.cmpi ult, %[[VAL_109]], %[[VAL_108]] : index -// CHECK: %[[VAL_111:.*]] = select %[[VAL_110]], %[[VAL_109]], %[[VAL_108]] : index -// CHECK: %[[VAL_112:.*]] = arith.cmpi eq, %[[VAL_108]], %[[VAL_111]] : index -// CHECK: %[[VAL_113:.*]] = arith.cmpi eq, %[[VAL_109]], %[[VAL_111]] : index -// CHECK: %[[VAL_114:.*]] = arith.andi %[[VAL_112]], %[[VAL_113]] : i1 -// CHECK: scf.if %[[VAL_114]] { -// CHECK: %[[VAL_115:.*]] = memref.load %[[VAL_16]][] : memref -// CHECK: %[[VAL_116:.*]] = memref.load %[[VAL_11]]{{\[}}%[[VAL_106]]] : memref -// CHECK: %[[VAL_117:.*]] = arith.addf %[[VAL_115]], %[[VAL_116]] : f64 -// CHECK: %[[VAL_118:.*]] = memref.load %[[VAL_14]]{{\[}}%[[VAL_107]]] : memref -// CHECK: %[[VAL_119:.*]] = arith.addf %[[VAL_117]], %[[VAL_118]] : f64 -// CHECK: memref.store %[[VAL_119]], %[[VAL_16]][] : memref +// CHECK: ^bb0(%[[VAL_117:.*]]: index, %[[VAL_118:.*]]: index, %[[VAL_119:.*]]: f64): +// CHECK: %[[VAL_120:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_117]]] : memref +// CHECK: %[[VAL_121:.*]] = memref.load %[[VAL_13]]{{\[}}%[[VAL_118]]] : memref +// CHECK: %[[VAL_122:.*]] = arith.cmpi ult, %[[VAL_121]], %[[VAL_120]] : index +// CHECK: %[[VAL_123:.*]] = select %[[VAL_122]], %[[VAL_121]], %[[VAL_120]] : index +// CHECK: %[[VAL_124:.*]] = arith.cmpi eq, %[[VAL_120]], %[[VAL_123]] : index +// CHECK: %[[VAL_125:.*]] = arith.cmpi eq, %[[VAL_121]], %[[VAL_123]] : index +// CHECK: %[[VAL_126:.*]] = arith.andi %[[VAL_124]], %[[VAL_125]] : i1 +// CHECK: %[[VAL_127:.*]] = scf.if %[[VAL_126]] -> (f64) { +// CHECK: %[[VAL_128:.*]] = memref.load %[[VAL_11]]{{\[}}%[[VAL_117]]] : memref +// CHECK: %[[VAL_129:.*]] = arith.addf %[[VAL_119]], %[[VAL_128]] : f64 +// CHECK: %[[VAL_130:.*]] = memref.load %[[VAL_14]]{{\[}}%[[VAL_118]]] : memref +// CHECK: %[[VAL_131:.*]] = arith.addf %[[VAL_129]], %[[VAL_130]] : f64 +// CHECK: scf.yield %[[VAL_131]] : f64 // CHECK: } else { -// CHECK: %[[VAL_120:.*]] = arith.cmpi eq, %[[VAL_109]], %[[VAL_111]] : index -// CHECK: scf.if %[[VAL_120]] { -// CHECK: %[[VAL_121:.*]] = memref.load %[[VAL_16]][] : memref -// CHECK: %[[VAL_122:.*]] = memref.load %[[VAL_14]]{{\[}}%[[VAL_107]]] : memref -// CHECK: %[[VAL_123:.*]] = arith.addf %[[VAL_121]], %[[VAL_122]] : f64 -// CHECK: memref.store %[[VAL_123]], %[[VAL_16]][] : memref +// CHECK: %[[VAL_132:.*]] = arith.cmpi eq, %[[VAL_121]], %[[VAL_123]] : index +// CHECK: %[[VAL_133:.*]] = scf.if %[[VAL_132]] -> (f64) { +// CHECK: %[[VAL_134:.*]] = memref.load %[[VAL_14]]{{\[}}%[[VAL_118]]] : memref +// CHECK: %[[VAL_135:.*]] = arith.addf %[[VAL_119]], %[[VAL_134]] : f64 +// CHECK: scf.yield %[[VAL_135]] : f64 // CHECK: } else { -// CHECK: %[[VAL_124:.*]] = arith.cmpi eq, %[[VAL_108]], %[[VAL_111]] : index -// CHECK: scf.if %[[VAL_124]] { -// CHECK: %[[VAL_125:.*]] = memref.load %[[VAL_16]][] : memref -// CHECK: %[[VAL_126:.*]] = memref.load %[[VAL_11]]{{\[}}%[[VAL_106]]] : memref -// CHECK: %[[VAL_127:.*]] = arith.addf %[[VAL_125]], %[[VAL_126]] : f64 -// CHECK: memref.store %[[VAL_127]], %[[VAL_16]][] : memref +// CHECK: %[[VAL_136:.*]] = arith.cmpi eq, %[[VAL_120]], %[[VAL_123]] : index +// CHECK: %[[VAL_137:.*]] = scf.if %[[VAL_136]] -> (f64) { +// CHECK: %[[VAL_138:.*]] = memref.load %[[VAL_11]]{{\[}}%[[VAL_117]]] : memref +// CHECK: %[[VAL_139:.*]] = arith.addf %[[VAL_119]], %[[VAL_138]] : f64 +// CHECK: scf.yield %[[VAL_139]] : f64 // CHECK: } else { +// CHECK: scf.yield %[[VAL_119]] : f64 // CHECK: } +// CHECK: scf.yield %[[VAL_140:.*]] : f64 // CHECK: } +// CHECK: scf.yield %[[VAL_141:.*]] : f64 // CHECK: } -// CHECK: %[[VAL_128:.*]] = arith.cmpi eq, %[[VAL_108]], %[[VAL_111]] : index -// CHECK: %[[VAL_129:.*]] = arith.addi %[[VAL_106]], %[[VAL_5]] : index -// CHECK: %[[VAL_130:.*]] = select %[[VAL_128]], %[[VAL_129]], %[[VAL_106]] : index -// CHECK: %[[VAL_131:.*]] = arith.cmpi eq, %[[VAL_109]], %[[VAL_111]] : index -// CHECK: %[[VAL_132:.*]] = arith.addi %[[VAL_107]], %[[VAL_5]] : index -// CHECK: %[[VAL_133:.*]] = select %[[VAL_131]], %[[VAL_132]], %[[VAL_107]] : index -// CHECK: scf.yield %[[VAL_130]], %[[VAL_133]] : index, index +// CHECK: %[[VAL_142:.*]] = arith.cmpi eq, %[[VAL_120]], %[[VAL_123]] : index +// CHECK: %[[VAL_143:.*]] = arith.addi %[[VAL_117]], %[[VAL_5]] : index +// CHECK: %[[VAL_144:.*]] = select %[[VAL_142]], %[[VAL_143]], %[[VAL_117]] : index +// CHECK: %[[VAL_145:.*]] = arith.cmpi eq, %[[VAL_121]], %[[VAL_123]] : index +// CHECK: %[[VAL_146:.*]] = arith.addi %[[VAL_118]], %[[VAL_5]] : index +// CHECK: %[[VAL_147:.*]] = select %[[VAL_145]], %[[VAL_146]], %[[VAL_118]] : index +// CHECK: scf.yield %[[VAL_144]], %[[VAL_147]], %[[VAL_148:.*]] : index, index, f64 // CHECK: } -// CHECK: %[[VAL_134:.*]]:2 = scf.while (%[[VAL_135:.*]] = %[[VAL_136:.*]]#0, %[[VAL_137:.*]] = %[[VAL_138:.*]]#1) : (index, index) -> (index, index) { -// CHECK: %[[VAL_139:.*]] = arith.cmpi ult, %[[VAL_135]], %[[VAL_18]] : index -// CHECK: %[[VAL_140:.*]] = arith.cmpi ult, %[[VAL_137]], %[[VAL_22]] : index -// CHECK: %[[VAL_141:.*]] = arith.andi %[[VAL_139]], %[[VAL_140]] : i1 -// CHECK: scf.condition(%[[VAL_141]]) %[[VAL_135]], %[[VAL_137]] : index, index +// CHECK: %[[VAL_149:.*]]:3 = scf.while (%[[VAL_150:.*]] = %[[VAL_151:.*]]#0, %[[VAL_152:.*]] = %[[VAL_153:.*]]#1, %[[VAL_154:.*]] = %[[VAL_153]]#2) : (index, index, f64) -> (index, index, f64) { +// CHECK: %[[VAL_155:.*]] = arith.cmpi ult, %[[VAL_150]], %[[VAL_19]] : index +// CHECK: %[[VAL_156:.*]] = arith.cmpi ult, %[[VAL_152]], %[[VAL_23]] : index +// CHECK: %[[VAL_157:.*]] = arith.andi %[[VAL_155]], %[[VAL_156]] : i1 +// CHECK: scf.condition(%[[VAL_157]]) %[[VAL_150]], %[[VAL_152]], %[[VAL_154]] : index, index, f64 // CHECK: } do { -// CHECK: ^bb0(%[[VAL_142:.*]]: index, %[[VAL_143:.*]]: index): -// CHECK: %[[VAL_144:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_142]]] : memref -// CHECK: %[[VAL_145:.*]] = memref.load %[[VAL_13]]{{\[}}%[[VAL_143]]] : memref -// CHECK: %[[VAL_146:.*]] = arith.cmpi ult, %[[VAL_145]], %[[VAL_144]] : index -// CHECK: %[[VAL_147:.*]] = select %[[VAL_146]], %[[VAL_145]], %[[VAL_144]] : index -// CHECK: %[[VAL_148:.*]] = arith.cmpi eq, %[[VAL_144]], %[[VAL_147]] : index -// CHECK: %[[VAL_149:.*]] = arith.cmpi eq, %[[VAL_145]], %[[VAL_147]] : index -// CHECK: %[[VAL_150:.*]] = arith.andi %[[VAL_148]], %[[VAL_149]] : i1 -// CHECK: scf.if %[[VAL_150]] { -// CHECK: %[[VAL_151:.*]] = memref.load %[[VAL_16]][] : memref -// CHECK: %[[VAL_152:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_142]]] : memref -// CHECK: %[[VAL_153:.*]] = arith.addf %[[VAL_151]], %[[VAL_152]] : f64 -// CHECK: %[[VAL_154:.*]] = memref.load %[[VAL_14]]{{\[}}%[[VAL_143]]] : memref -// CHECK: %[[VAL_155:.*]] = arith.addf %[[VAL_153]], %[[VAL_154]] : f64 -// CHECK: memref.store %[[VAL_155]], %[[VAL_16]][] : memref +// CHECK: ^bb0(%[[VAL_158:.*]]: index, %[[VAL_159:.*]]: index, %[[VAL_160:.*]]: f64): +// CHECK: %[[VAL_161:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_158]]] : memref +// CHECK: %[[VAL_162:.*]] = memref.load %[[VAL_13]]{{\[}}%[[VAL_159]]] : memref +// CHECK: %[[VAL_163:.*]] = arith.cmpi ult, %[[VAL_162]], %[[VAL_161]] : index +// CHECK: %[[VAL_164:.*]] = select %[[VAL_163]], %[[VAL_162]], %[[VAL_161]] : index +// CHECK: %[[VAL_165:.*]] = arith.cmpi eq, %[[VAL_161]], %[[VAL_164]] : index +// CHECK: %[[VAL_166:.*]] = arith.cmpi eq, %[[VAL_162]], %[[VAL_164]] : index +// CHECK: %[[VAL_167:.*]] = arith.andi %[[VAL_165]], %[[VAL_166]] : i1 +// CHECK: %[[VAL_168:.*]] = scf.if %[[VAL_167]] -> (f64) { +// CHECK: %[[VAL_169:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_158]]] : memref +// CHECK: %[[VAL_170:.*]] = arith.addf %[[VAL_160]], %[[VAL_169]] : f64 +// CHECK: %[[VAL_171:.*]] = memref.load %[[VAL_14]]{{\[}}%[[VAL_159]]] : memref +// CHECK: %[[VAL_172:.*]] = arith.addf %[[VAL_170]], %[[VAL_171]] : f64 +// CHECK: scf.yield %[[VAL_172]] : f64 // CHECK: } else { -// CHECK: %[[VAL_156:.*]] = arith.cmpi eq, %[[VAL_145]], %[[VAL_147]] : index -// CHECK: scf.if %[[VAL_156]] { -// CHECK: %[[VAL_157:.*]] = memref.load %[[VAL_16]][] : memref -// CHECK: %[[VAL_158:.*]] = memref.load %[[VAL_14]]{{\[}}%[[VAL_143]]] : memref -// CHECK: %[[VAL_159:.*]] = arith.addf %[[VAL_157]], %[[VAL_158]] : f64 -// CHECK: memref.store %[[VAL_159]], %[[VAL_16]][] : memref +// CHECK: %[[VAL_173:.*]] = arith.cmpi eq, %[[VAL_162]], %[[VAL_164]] : index +// CHECK: %[[VAL_174:.*]] = scf.if %[[VAL_173]] -> (f64) { +// CHECK: %[[VAL_175:.*]] = memref.load %[[VAL_14]]{{\[}}%[[VAL_159]]] : memref +// CHECK: %[[VAL_176:.*]] = arith.addf %[[VAL_160]], %[[VAL_175]] : f64 +// CHECK: scf.yield %[[VAL_176]] : f64 // CHECK: } else { -// CHECK: %[[VAL_160:.*]] = arith.cmpi eq, %[[VAL_144]], %[[VAL_147]] : index -// CHECK: scf.if %[[VAL_160]] { -// CHECK: %[[VAL_161:.*]] = memref.load %[[VAL_16]][] : memref -// CHECK: %[[VAL_162:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_142]]] : memref -// CHECK: %[[VAL_163:.*]] = arith.addf %[[VAL_161]], %[[VAL_162]] : f64 -// CHECK: memref.store %[[VAL_163]], %[[VAL_16]][] : memref +// CHECK: %[[VAL_177:.*]] = arith.cmpi eq, %[[VAL_161]], %[[VAL_164]] : index +// CHECK: %[[VAL_178:.*]] = scf.if %[[VAL_177]] -> (f64) { +// CHECK: %[[VAL_179:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_158]]] : memref +// CHECK: %[[VAL_180:.*]] = arith.addf %[[VAL_160]], %[[VAL_179]] : f64 +// CHECK: scf.yield %[[VAL_180]] : f64 // CHECK: } else { +// CHECK: scf.yield %[[VAL_160]] : f64 // CHECK: } +// CHECK: scf.yield %[[VAL_181:.*]] : f64 // CHECK: } +// CHECK: scf.yield %[[VAL_182:.*]] : f64 // CHECK: } -// CHECK: %[[VAL_164:.*]] = arith.cmpi eq, %[[VAL_144]], %[[VAL_147]] : index -// CHECK: %[[VAL_165:.*]] = arith.addi %[[VAL_142]], %[[VAL_5]] : index -// CHECK: %[[VAL_166:.*]] = select %[[VAL_164]], %[[VAL_165]], %[[VAL_142]] : index -// CHECK: %[[VAL_167:.*]] = arith.cmpi eq, %[[VAL_145]], %[[VAL_147]] : index -// CHECK: %[[VAL_168:.*]] = arith.addi %[[VAL_143]], %[[VAL_5]] : index -// CHECK: %[[VAL_169:.*]] = select %[[VAL_167]], %[[VAL_168]], %[[VAL_143]] : index -// CHECK: scf.yield %[[VAL_166]], %[[VAL_169]] : index, index +// CHECK: %[[VAL_183:.*]] = arith.cmpi eq, %[[VAL_161]], %[[VAL_164]] : index +// CHECK: %[[VAL_184:.*]] = arith.addi %[[VAL_158]], %[[VAL_5]] : index +// CHECK: %[[VAL_185:.*]] = select %[[VAL_183]], %[[VAL_184]], %[[VAL_158]] : index +// CHECK: %[[VAL_186:.*]] = arith.cmpi eq, %[[VAL_162]], %[[VAL_164]] : index +// CHECK: %[[VAL_187:.*]] = arith.addi %[[VAL_159]], %[[VAL_5]] : index +// CHECK: %[[VAL_188:.*]] = select %[[VAL_186]], %[[VAL_187]], %[[VAL_159]] : index +// CHECK: scf.yield %[[VAL_185]], %[[VAL_188]], %[[VAL_189:.*]] : index, index, f64 // CHECK: } -// CHECK: %[[VAL_170:.*]] = memref.load %[[VAL_16]][] : memref -// CHECK: %[[VAL_171:.*]] = scf.for %[[VAL_172:.*]] = %[[VAL_173:.*]]#1 to %[[VAL_22]] step %[[VAL_5]] iter_args(%[[VAL_174:.*]] = %[[VAL_170]]) -> (f64) { -// CHECK: %[[VAL_175:.*]] = memref.load %[[VAL_14]]{{\[}}%[[VAL_172]]] : memref -// CHECK: %[[VAL_176:.*]] = arith.addf %[[VAL_174]], %[[VAL_175]] : f64 -// CHECK: scf.yield %[[VAL_176]] : f64 +// CHECK: %[[VAL_190:.*]] = scf.for %[[VAL_191:.*]] = %[[VAL_192:.*]]#1 to %[[VAL_23]] step %[[VAL_5]] iter_args(%[[VAL_193:.*]] = %[[VAL_192]]#2) -> (f64) { +// CHECK: %[[VAL_194:.*]] = memref.load %[[VAL_14]]{{\[}}%[[VAL_191]]] : memref +// CHECK: %[[VAL_195:.*]] = arith.addf %[[VAL_193]], %[[VAL_194]] : f64 +// CHECK: scf.yield %[[VAL_195]] : f64 // CHECK: } -// CHECK: memref.store %[[VAL_177:.*]], %[[VAL_16]][] : memref -// CHECK: %[[VAL_178:.*]]:2 = scf.while (%[[VAL_179:.*]] = %[[VAL_180:.*]]#0, %[[VAL_181:.*]] = %[[VAL_182:.*]]#0) : (index, index) -> (index, index) { -// CHECK: %[[VAL_183:.*]] = arith.cmpi ult, %[[VAL_179]], %[[VAL_18]] : index -// CHECK: %[[VAL_184:.*]] = arith.cmpi ult, %[[VAL_181]], %[[VAL_20]] : index -// CHECK: %[[VAL_185:.*]] = arith.andi %[[VAL_183]], %[[VAL_184]] : i1 -// CHECK: scf.condition(%[[VAL_185]]) %[[VAL_179]], %[[VAL_181]] : index, index +// CHECK: %[[VAL_196:.*]]:3 = scf.while (%[[VAL_197:.*]] = %[[VAL_198:.*]]#0, %[[VAL_199:.*]] = %[[VAL_200:.*]]#0, %[[VAL_201:.*]] = %[[VAL_202:.*]]) : (index, index, f64) -> (index, index, f64) { +// CHECK: %[[VAL_203:.*]] = arith.cmpi ult, %[[VAL_197]], %[[VAL_19]] : index +// CHECK: %[[VAL_204:.*]] = arith.cmpi ult, %[[VAL_199]], %[[VAL_21]] : index +// CHECK: %[[VAL_205:.*]] = arith.andi %[[VAL_203]], %[[VAL_204]] : i1 +// CHECK: scf.condition(%[[VAL_205]]) %[[VAL_197]], %[[VAL_199]], %[[VAL_201]] : index, index, f64 // CHECK: } do { -// CHECK: ^bb0(%[[VAL_186:.*]]: index, %[[VAL_187:.*]]: index): -// CHECK: %[[VAL_188:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_186]]] : memref -// CHECK: %[[VAL_189:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_187]]] : memref -// CHECK: %[[VAL_190:.*]] = arith.cmpi ult, %[[VAL_189]], %[[VAL_188]] : index -// CHECK: %[[VAL_191:.*]] = select %[[VAL_190]], %[[VAL_189]], %[[VAL_188]] : index -// CHECK: %[[VAL_192:.*]] = arith.cmpi eq, %[[VAL_188]], %[[VAL_191]] : index -// CHECK: %[[VAL_193:.*]] = arith.cmpi eq, %[[VAL_189]], %[[VAL_191]] : index -// CHECK: %[[VAL_194:.*]] = arith.andi %[[VAL_192]], %[[VAL_193]] : i1 -// CHECK: scf.if %[[VAL_194]] { -// CHECK: %[[VAL_195:.*]] = memref.load %[[VAL_16]][] : memref -// CHECK: %[[VAL_196:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_186]]] : memref -// CHECK: %[[VAL_197:.*]] = arith.addf %[[VAL_195]], %[[VAL_196]] : f64 -// CHECK: %[[VAL_198:.*]] = memref.load %[[VAL_11]]{{\[}}%[[VAL_187]]] : memref -// CHECK: %[[VAL_199:.*]] = arith.addf %[[VAL_197]], %[[VAL_198]] : f64 -// CHECK: memref.store %[[VAL_199]], %[[VAL_16]][] : memref +// CHECK: ^bb0(%[[VAL_206:.*]]: index, %[[VAL_207:.*]]: index, %[[VAL_208:.*]]: f64): +// CHECK: %[[VAL_209:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_206]]] : memref +// CHECK: %[[VAL_210:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_207]]] : memref +// CHECK: %[[VAL_211:.*]] = arith.cmpi ult, %[[VAL_210]], %[[VAL_209]] : index +// CHECK: %[[VAL_212:.*]] = select %[[VAL_211]], %[[VAL_210]], %[[VAL_209]] : index +// CHECK: %[[VAL_213:.*]] = arith.cmpi eq, %[[VAL_209]], %[[VAL_212]] : index +// CHECK: %[[VAL_214:.*]] = arith.cmpi eq, %[[VAL_210]], %[[VAL_212]] : index +// CHECK: %[[VAL_215:.*]] = arith.andi %[[VAL_213]], %[[VAL_214]] : i1 +// CHECK: %[[VAL_216:.*]] = scf.if %[[VAL_215]] -> (f64) { +// CHECK: %[[VAL_217:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_206]]] : memref +// CHECK: %[[VAL_218:.*]] = arith.addf %[[VAL_208]], %[[VAL_217]] : f64 +// CHECK: %[[VAL_219:.*]] = memref.load %[[VAL_11]]{{\[}}%[[VAL_207]]] : memref +// CHECK: %[[VAL_220:.*]] = arith.addf %[[VAL_218]], %[[VAL_219]] : f64 +// CHECK: scf.yield %[[VAL_220]] : f64 // CHECK: } else { -// CHECK: %[[VAL_200:.*]] = arith.cmpi eq, %[[VAL_189]], %[[VAL_191]] : index -// CHECK: scf.if %[[VAL_200]] { -// CHECK: %[[VAL_201:.*]] = memref.load %[[VAL_16]][] : memref -// CHECK: %[[VAL_202:.*]] = memref.load %[[VAL_11]]{{\[}}%[[VAL_187]]] : memref -// CHECK: %[[VAL_203:.*]] = arith.addf %[[VAL_201]], %[[VAL_202]] : f64 -// CHECK: memref.store %[[VAL_203]], %[[VAL_16]][] : memref +// CHECK: %[[VAL_221:.*]] = arith.cmpi eq, %[[VAL_210]], %[[VAL_212]] : index +// CHECK: %[[VAL_222:.*]] = scf.if %[[VAL_221]] -> (f64) { +// CHECK: %[[VAL_223:.*]] = memref.load %[[VAL_11]]{{\[}}%[[VAL_207]]] : memref +// CHECK: %[[VAL_224:.*]] = arith.addf %[[VAL_208]], %[[VAL_223]] : f64 +// CHECK: scf.yield %[[VAL_224]] : f64 // CHECK: } else { -// CHECK: %[[VAL_204:.*]] = arith.cmpi eq, %[[VAL_188]], %[[VAL_191]] : index -// CHECK: scf.if %[[VAL_204]] { -// CHECK: %[[VAL_205:.*]] = memref.load %[[VAL_16]][] : memref -// CHECK: %[[VAL_206:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_186]]] : memref -// CHECK: %[[VAL_207:.*]] = arith.addf %[[VAL_205]], %[[VAL_206]] : f64 -// CHECK: memref.store %[[VAL_207]], %[[VAL_16]][] : memref +// CHECK: %[[VAL_225:.*]] = arith.cmpi eq, %[[VAL_209]], %[[VAL_212]] : index +// CHECK: %[[VAL_226:.*]] = scf.if %[[VAL_225]] -> (f64) { +// CHECK: %[[VAL_227:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_206]]] : memref +// CHECK: %[[VAL_228:.*]] = arith.addf %[[VAL_208]], %[[VAL_227]] : f64 +// CHECK: scf.yield %[[VAL_228]] : f64 // CHECK: } else { +// CHECK: scf.yield %[[VAL_208]] : f64 // CHECK: } +// CHECK: scf.yield %[[VAL_229:.*]] : f64 // CHECK: } +// CHECK: scf.yield %[[VAL_230:.*]] : f64 // CHECK: } -// CHECK: %[[VAL_208:.*]] = arith.cmpi eq, %[[VAL_188]], %[[VAL_191]] : index -// CHECK: %[[VAL_209:.*]] = arith.addi %[[VAL_186]], %[[VAL_5]] : index -// CHECK: %[[VAL_210:.*]] = select %[[VAL_208]], %[[VAL_209]], %[[VAL_186]] : index -// CHECK: %[[VAL_211:.*]] = arith.cmpi eq, %[[VAL_189]], %[[VAL_191]] : index -// CHECK: %[[VAL_212:.*]] = arith.addi %[[VAL_187]], %[[VAL_5]] : index -// CHECK: %[[VAL_213:.*]] = select %[[VAL_211]], %[[VAL_212]], %[[VAL_187]] : index -// CHECK: scf.yield %[[VAL_210]], %[[VAL_213]] : index, index +// CHECK: %[[VAL_231:.*]] = arith.cmpi eq, %[[VAL_209]], %[[VAL_212]] : index +// CHECK: %[[VAL_232:.*]] = arith.addi %[[VAL_206]], %[[VAL_5]] : index +// CHECK: %[[VAL_233:.*]] = select %[[VAL_231]], %[[VAL_232]], %[[VAL_206]] : index +// CHECK: %[[VAL_234:.*]] = arith.cmpi eq, %[[VAL_210]], %[[VAL_212]] : index +// CHECK: %[[VAL_235:.*]] = arith.addi %[[VAL_207]], %[[VAL_5]] : index +// CHECK: %[[VAL_236:.*]] = select %[[VAL_234]], %[[VAL_235]], %[[VAL_207]] : index +// CHECK: scf.yield %[[VAL_233]], %[[VAL_236]], %[[VAL_237:.*]] : index, index, f64 // CHECK: } -// CHECK: %[[VAL_214:.*]] = memref.load %[[VAL_16]][] : memref -// CHECK: %[[VAL_215:.*]] = scf.for %[[VAL_216:.*]] = %[[VAL_217:.*]]#1 to %[[VAL_20]] step %[[VAL_5]] iter_args(%[[VAL_218:.*]] = %[[VAL_214]]) -> (f64) { -// CHECK: %[[VAL_219:.*]] = memref.load %[[VAL_11]]{{\[}}%[[VAL_216]]] : memref -// CHECK: %[[VAL_220:.*]] = arith.addf %[[VAL_218]], %[[VAL_219]] : f64 -// CHECK: scf.yield %[[VAL_220]] : f64 +// CHECK: %[[VAL_238:.*]] = scf.for %[[VAL_239:.*]] = %[[VAL_240:.*]]#1 to %[[VAL_21]] step %[[VAL_5]] iter_args(%[[VAL_241:.*]] = %[[VAL_240]]#2) -> (f64) { +// CHECK: %[[VAL_242:.*]] = memref.load %[[VAL_11]]{{\[}}%[[VAL_239]]] : memref +// CHECK: %[[VAL_243:.*]] = arith.addf %[[VAL_241]], %[[VAL_242]] : f64 +// CHECK: scf.yield %[[VAL_243]] : f64 // CHECK: } -// CHECK: %[[VAL_221:.*]] = scf.for %[[VAL_222:.*]] = %[[VAL_223:.*]]#0 to %[[VAL_18]] step %[[VAL_5]] iter_args(%[[VAL_224:.*]] = %[[VAL_225:.*]]) -> (f64) { -// CHECK: %[[VAL_226:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_222]]] : memref -// CHECK: %[[VAL_227:.*]] = arith.addf %[[VAL_224]], %[[VAL_226]] : f64 -// CHECK: scf.yield %[[VAL_227]] : f64 +// CHECK: %[[VAL_244:.*]] = scf.for %[[VAL_245:.*]] = %[[VAL_246:.*]]#0 to %[[VAL_19]] step %[[VAL_5]] iter_args(%[[VAL_247:.*]] = %[[VAL_248:.*]]) -> (f64) { +// CHECK: %[[VAL_249:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_245]]] : memref +// CHECK: %[[VAL_250:.*]] = arith.addf %[[VAL_247]], %[[VAL_249]] : f64 +// CHECK: scf.yield %[[VAL_250]] : f64 // CHECK: } -// CHECK: memref.store %[[VAL_228:.*]], %[[VAL_16]][] : memref -// CHECK: %[[VAL_229:.*]] = memref.tensor_load %[[VAL_16]] : memref -// CHECK: return %[[VAL_229]] : tensor +// CHECK: memref.store %[[VAL_251:.*]], %[[VAL_16]][] : memref +// CHECK: %[[VAL_252:.*]] = memref.tensor_load %[[VAL_16]] : memref +// CHECK: return %[[VAL_252]] : tensor // CHECK: } func @red3s(%arga: tensor, %argb: tensor, diff --git a/mlir/test/Dialect/SparseTensor/sparse_2d.mlir b/mlir/test/Dialect/SparseTensor/sparse_2d.mlir index 1de13929b36b6..954ad0622663e 100644 --- a/mlir/test/Dialect/SparseTensor/sparse_2d.mlir +++ b/mlir/test/Dialect/SparseTensor/sparse_2d.mlir @@ -870,12 +870,12 @@ func @mul_sd_ds(%arga: tensor<32x16xf32, #Tsd>, %argb: tensor<32x16xf32, #Tds>, } // CHECK-LABEL: func @matvec( -// CHECK-SAME: %[[VAL_0:.*]]: tensor<16x32xf32, #sparse_tensor.encoding<{ dimLevelType = [ "dense", "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>>, -// CHECK-SAME: %[[VAL_1:.*]]: tensor<32xf32>, -// CHECK-SAME: %[[VAL_2:.*]]: tensor<16xf32>) -> tensor<16xf32> { -// CHECK-DAG: %[[VAL_3:.*]] = arith.constant 16 : index -// CHECK-DAG: %[[VAL_4:.*]] = arith.constant 0 : index -// CHECK-DAG: %[[VAL_5:.*]] = arith.constant 1 : index +// CHECK-SAME: %[[VAL_0:.*]]: tensor<16x32xf32, #sparse_tensor.encoding<{ dimLevelType = [ "dense", "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>>, +// CHECK-SAME: %[[VAL_1:.*]]: tensor<32xf32>, +// CHECK-SAME: %[[VAL_2:.*]]: tensor<16xf32>) -> tensor<16xf32> { +// CHECK-DAG: %[[VAL_3:.*]] = arith.constant 16 : index +// CHECK-DAG: %[[VAL_4:.*]] = arith.constant 0 : index +// CHECK-DAG: %[[VAL_5:.*]] = arith.constant 1 : index // CHECK: %[[VAL_6:.*]] = sparse_tensor.pointers %[[VAL_0]], %[[VAL_5]] : tensor<16x32xf32, #sparse_tensor.encoding<{ dimLevelType = [ "dense", "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref // CHECK: %[[VAL_7:.*]] = sparse_tensor.indices %[[VAL_0]], %[[VAL_5]] : tensor<16x32xf32, #sparse_tensor.encoding<{ dimLevelType = [ "dense", "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref // CHECK: %[[VAL_8:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<16x32xf32, #sparse_tensor.encoding<{ dimLevelType = [ "dense", "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref @@ -884,10 +884,10 @@ func @mul_sd_ds(%arga: tensor<32x16xf32, #Tsd>, %argb: tensor<32x16xf32, #Tds>, // CHECK: %[[VAL_11:.*]] = memref.alloc() : memref<16xf32> // CHECK: memref.copy %[[VAL_10]], %[[VAL_11]] : memref<16xf32> to memref<16xf32> // CHECK: scf.for %[[VAL_12:.*]] = %[[VAL_4]] to %[[VAL_3]] step %[[VAL_5]] { -// CHECK: %[[VAL_13:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_12]]] : memref -// CHECK: %[[VAL_14:.*]] = arith.addi %[[VAL_12]], %[[VAL_5]] : index -// CHECK: %[[VAL_15:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_14]]] : memref -// CHECK: %[[VAL_16:.*]] = memref.load %[[VAL_11]]{{\[}}%[[VAL_12]]] : memref<16xf32> +// CHECK-DAG: %[[VAL_13:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_12]]] : memref +// CHECK-DAG: %[[VAL_14:.*]] = arith.addi %[[VAL_12]], %[[VAL_5]] : index +// CHECK-DAG: %[[VAL_15:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_14]]] : memref +// CHECK-DAG: %[[VAL_16:.*]] = memref.load %[[VAL_11]]{{\[}}%[[VAL_12]]] : memref<16xf32> // CHECK: %[[VAL_17:.*]] = scf.for %[[VAL_18:.*]] = %[[VAL_13]] to %[[VAL_15]] step %[[VAL_5]] iter_args(%[[VAL_19:.*]] = %[[VAL_16]]) -> (f32) { // CHECK: %[[VAL_20:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_18]]] : memref // CHECK: %[[VAL_21:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_18]]] : memref @@ -896,7 +896,7 @@ func @mul_sd_ds(%arga: tensor<32x16xf32, #Tsd>, %argb: tensor<32x16xf32, #Tds>, // CHECK: %[[VAL_24:.*]] = arith.addf %[[VAL_23]], %[[VAL_19]] : f32 // CHECK: scf.yield %[[VAL_24]] : f32 // CHECK: } -// CHECK: memref.store %[[VAL_25:.*]], %[[VAL_11]]{{\[}}%[[VAL_12]]] : memref<16xf32> +// CHECK: memref.store %[[VAL_17]], %[[VAL_11]]{{\[}}%[[VAL_12]]] : memref<16xf32> // CHECK: } // CHECK: %[[VAL_26:.*]] = memref.tensor_load %[[VAL_11]] : memref<16xf32> // CHECK: return %[[VAL_26]] : tensor<16xf32> @@ -923,30 +923,31 @@ func @matvec(%argA: tensor<16x32xf32, #Tds>, %argb: tensor<32xf32>, %argx: tenso } // CHECK-LABEL: func @sum_reduction( -// CHECK-SAME: %[[VAL_0:.*]]: tensor<10x20xf32, #sparse_tensor.encoding<{ dimLevelType = [ "dense", "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>>, -// CHECK-SAME: %[[VAL_1:.*]]: tensor) -> tensor { -// CHECK-DAG: %[[VAL_2:.*]] = arith.constant 10 : index -// CHECK-DAG: %[[VAL_3:.*]] = arith.constant 0 : index -// CHECK-DAG: %[[VAL_4:.*]] = arith.constant 1 : index -// CHECK: %[[VAL_5:.*]] = sparse_tensor.pointers %[[VAL_0]], %[[VAL_4]] : tensor<10x20xf32, #sparse_tensor.encoding<{ dimLevelType = [ "dense", "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref +// CHECK-SAME: %[[VAL_0:.*]]: tensor<10x20xf32, #sparse_tensor.encoding<{ dimLevelType = [ "dense", "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>>, +// CHECK-SAME: %[[VAL_1:.*]]: tensor) -> tensor { +// CHECK-DAG: %[[VAL_2:.*]] = arith.constant 10 : index +// CHECK-DAG: %[[VAL_3:.*]] = arith.constant 1 : index +// CHECK-DAG: %[[VAL_4:.*]] = arith.constant 0 : index +// CHECK: %[[VAL_5:.*]] = sparse_tensor.pointers %[[VAL_0]], %[[VAL_3]] : tensor<10x20xf32, #sparse_tensor.encoding<{ dimLevelType = [ "dense", "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref // CHECK: %[[VAL_6:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<10x20xf32, #sparse_tensor.encoding<{ dimLevelType = [ "dense", "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref // CHECK: %[[VAL_7:.*]] = memref.buffer_cast %[[VAL_1]] : memref // CHECK: %[[VAL_8:.*]] = memref.alloc() : memref // CHECK: memref.copy %[[VAL_7]], %[[VAL_8]] : memref to memref -// CHECK: scf.for %[[VAL_9:.*]] = %[[VAL_3]] to %[[VAL_2]] step %[[VAL_4]] { -// CHECK: %[[VAL_10:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_9]]] : memref -// CHECK: %[[VAL_11:.*]] = arith.addi %[[VAL_9]], %[[VAL_4]] : index -// CHECK: %[[VAL_12:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_11]]] : memref -// CHECK: %[[VAL_13:.*]] = memref.load %[[VAL_8]][] : memref -// CHECK: %[[VAL_14:.*]] = scf.for %[[VAL_15:.*]] = %[[VAL_10]] to %[[VAL_12]] step %[[VAL_4]] iter_args(%[[VAL_16:.*]] = %[[VAL_13]]) -> (f32) { -// CHECK: %[[VAL_17:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_15]]] : memref -// CHECK: %[[VAL_18:.*]] = arith.addf %[[VAL_16]], %[[VAL_17]] : f32 -// CHECK: scf.yield %[[VAL_18]] : f32 +// CHECK: %[[VAL_9:.*]] = memref.load %[[VAL_8]][] : memref +// CHECK: %[[VAL_10:.*]] = scf.for %[[VAL_11:.*]] = %[[VAL_4]] to %[[VAL_2]] step %[[VAL_3]] iter_args(%[[VAL_12:.*]] = %[[VAL_9]]) -> (f32) { +// CHECK: %[[VAL_13:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_11]]] : memref +// CHECK: %[[VAL_14:.*]] = arith.addi %[[VAL_11]], %[[VAL_3]] : index +// CHECK: %[[VAL_15:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_14]]] : memref +// CHECK: %[[VAL_16:.*]] = scf.for %[[VAL_17:.*]] = %[[VAL_13]] to %[[VAL_15]] step %[[VAL_3]] iter_args(%[[VAL_18:.*]] = %[[VAL_12]]) -> (f32) { +// CHECK: %[[VAL_19:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_17]]] : memref +// CHECK: %[[VAL_20:.*]] = arith.addf %[[VAL_18]], %[[VAL_19]] : f32 +// CHECK: scf.yield %[[VAL_20]] : f32 // CHECK: } -// CHECK: memref.store %[[VAL_19:.*]], %[[VAL_8]][] : memref +// CHECK: scf.yield %[[VAL_16]] : f32 // CHECK: } -// CHECK: %[[VAL_20:.*]] = memref.tensor_load %[[VAL_8]] : memref -// CHECK: return %[[VAL_20]] : tensor +// CHECK: memref.store %[[VAL_10]], %[[VAL_8]][] : memref +// CHECK: %[[VAL_23:.*]] = memref.tensor_load %[[VAL_8]] : memref +// CHECK: return %[[VAL_23]] : tensor // CHECK: } func @sum_reduction(%arga: tensor<10x20xf32, #Tds>, %argx: tensor) -> tensor { %0 = linalg.generic #trait_sum_reduction @@ -1020,10 +1021,10 @@ func @scale(%arga: tensor, %argx: tensor) -> tensor>, -// CHECK-SAME: %[[VAL_1:.*1]]: tensor, -// CHECK-SAME: %[[VAL_2:.*2]]: tensor, -// CHECK-SAME: %[[VAL_3:.*3]]: tensor) -> tensor { +// CHECK-SAME: %[[VAL_0:.*0]]: tensor>, +// CHECK-SAME: %[[VAL_1:.*1]]: tensor, +// CHECK-SAME: %[[VAL_2:.*2]]: tensor, +// CHECK-SAME: %[[VAL_3:.*3]]: tensor) -> tensor { // CHECK: %[[VAL_4:.*]] = arith.constant 0 : index // CHECK: %[[VAL_5:.*]] = arith.constant 1 : index // CHECK: %[[VAL_6:.*]] = sparse_tensor.pointers %[[VAL_0]], %[[VAL_4]] : tensor> to memref @@ -1047,9 +1048,9 @@ func @scale(%arga: tensor, %argx: tensor) -> tensor // CHECK: scf.for %[[VAL_25:.*]] = %[[VAL_22]] to %[[VAL_24]] step %[[VAL_5]] { -// CHECK: %[[VAL_26:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_25]]] : memref -// CHECK: %[[VAL_27:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_25]]] : memref -// CHECK: %[[VAL_28:.*]] = memref.load %[[VAL_17]]{{\[}}%[[VAL_21]], %[[VAL_26]]] : memref +// CHECK-DAG: %[[VAL_26:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_25]]] : memref +// CHECK-DAG: %[[VAL_27:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_25]]] : memref +// CHECK-DAG: %[[VAL_28:.*]] = memref.load %[[VAL_17]]{{\[}}%[[VAL_21]], %[[VAL_26]]] : memref // CHECK: %[[VAL_29:.*]] = scf.for %[[VAL_30:.*]] = %[[VAL_4]] to %[[VAL_12]] step %[[VAL_5]] iter_args(%[[VAL_31:.*]] = %[[VAL_28]]) -> (f32) { // CHECK: %[[VAL_32:.*]] = memref.load %[[VAL_11]]{{\[}}%[[VAL_21]], %[[VAL_30]]] : memref // CHECK: %[[VAL_33:.*]] = memref.load %[[VAL_13]]{{\[}}%[[VAL_30]], %[[VAL_26]]] : memref @@ -1058,7 +1059,7 @@ func @scale(%arga: tensor, %argx: tensor) -> tensor +// CHECK: memref.store %[[VAL_29]], %[[VAL_17]]{{\[}}%[[VAL_21]], %[[VAL_26]]] : memref // CHECK: } // CHECK: } // CHECK: %[[VAL_38:.*]] = memref.tensor_load %[[VAL_17]] : memref @@ -1094,25 +1095,25 @@ func @sampled_dense_dense(%args: tensor, } // CHECK-LABEL: func @sum_kernel_with_inv( -// CHECK-SAME: %[[VAL_0:.*0]]: tensor>, -// CHECK-SAME: %[[VAL_1:.*1]]: tensor>, -// CHECK-SAME: %[[VAL_2:.*2]]: tensor>, -// CHECK-SAME: %[[VAL_3:.*3]]: tensor, -// CHECK-SAME: %[[VAL_4:.*4]]: tensor, -// CHECK-SAME: %[[VAL_5:.*5]]: tensor) -> tensor { -// CHECK-DAG: %[[VAL_6:.*]] = arith.constant 0 : index -// CHECK-DAG: %[[VAL_7:.*]] = arith.constant true -// CHECK-DAG: %[[VAL_8:.*]] = arith.constant 1 : index +// CHECK-SAME: %[[VAL_0:.*0]]: tensor>, +// CHECK-SAME: %[[VAL_1:.*1]]: tensor>, +// CHECK-SAME: %[[VAL_2:.*2]]: tensor>, +// CHECK-SAME: %[[VAL_3:.*3]]: tensor, +// CHECK-SAME: %[[VAL_4:.*4]]: tensor, +// CHECK-SAME: %[[VAL_5:.*5]]: tensor) -> tensor { +// CHECK-DAG: %[[VAL_6:.*]] = arith.constant 0 : index +// CHECK-DAG: %[[VAL_7:.*]] = arith.constant 1 : index +// CHECK-DAG: %[[VAL_8:.*]] = arith.constant true // CHECK: %[[VAL_9:.*]] = sparse_tensor.pointers %[[VAL_0]], %[[VAL_6]] : tensor> to memref // CHECK: %[[VAL_10:.*]] = sparse_tensor.indices %[[VAL_0]], %[[VAL_6]] : tensor> to memref -// CHECK: %[[VAL_11:.*]] = sparse_tensor.pointers %[[VAL_0]], %[[VAL_8]] : tensor> to memref -// CHECK: %[[VAL_12:.*]] = sparse_tensor.indices %[[VAL_0]], %[[VAL_8]] : tensor> to memref +// CHECK: %[[VAL_11:.*]] = sparse_tensor.pointers %[[VAL_0]], %[[VAL_7]] : tensor> to memref +// CHECK: %[[VAL_12:.*]] = sparse_tensor.indices %[[VAL_0]], %[[VAL_7]] : tensor> to memref // CHECK: %[[VAL_13:.*]] = sparse_tensor.values %[[VAL_0]] : tensor> to memref -// CHECK: %[[VAL_14:.*]] = sparse_tensor.pointers %[[VAL_1]], %[[VAL_8]] : tensor> to memref -// CHECK: %[[VAL_15:.*]] = sparse_tensor.indices %[[VAL_1]], %[[VAL_8]] : tensor> to memref +// CHECK: %[[VAL_14:.*]] = sparse_tensor.pointers %[[VAL_1]], %[[VAL_7]] : tensor> to memref +// CHECK: %[[VAL_15:.*]] = sparse_tensor.indices %[[VAL_1]], %[[VAL_7]] : tensor> to memref // CHECK: %[[VAL_16:.*]] = sparse_tensor.values %[[VAL_1]] : tensor> to memref -// CHECK: %[[VAL_17:.*]] = sparse_tensor.pointers %[[VAL_2]], %[[VAL_8]] : tensor> to memref -// CHECK: %[[VAL_18:.*]] = sparse_tensor.indices %[[VAL_2]], %[[VAL_8]] : tensor> to memref +// CHECK: %[[VAL_17:.*]] = sparse_tensor.pointers %[[VAL_2]], %[[VAL_7]] : tensor> to memref +// CHECK: %[[VAL_18:.*]] = sparse_tensor.indices %[[VAL_2]], %[[VAL_7]] : tensor> to memref // CHECK: %[[VAL_19:.*]] = sparse_tensor.values %[[VAL_2]] : tensor> to memref // CHECK: %[[VAL_20:.*]] = memref.buffer_cast %[[VAL_3]] : memref // CHECK: %[[VAL_21:.*]] = memref.buffer_cast %[[VAL_4]] : memref @@ -1122,7 +1123,7 @@ func @sampled_dense_dense(%args: tensor, // CHECK: memref.copy %[[VAL_23]], %[[VAL_24]] : memref to memref // CHECK: %[[VAL_25:.*]] = memref.load %[[VAL_21]][] : memref // CHECK: %[[VAL_26:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_6]]] : memref -// CHECK: %[[VAL_27:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_8]]] : memref +// CHECK: %[[VAL_27:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_7]]] : memref // CHECK: %[[VAL_28:.*]]:2 = scf.while (%[[VAL_29:.*]] = %[[VAL_26]], %[[VAL_30:.*]] = %[[VAL_6]]) : (index, index) -> (index, index) { // CHECK: %[[VAL_31:.*]] = arith.cmpi ult, %[[VAL_29]], %[[VAL_27]] : index // CHECK: scf.condition(%[[VAL_31]]) %[[VAL_29]], %[[VAL_30]] : index, index @@ -1131,158 +1132,158 @@ func @sampled_dense_dense(%args: tensor, // CHECK: %[[VAL_34:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_32]]] : memref // CHECK: %[[VAL_35:.*]] = arith.cmpi eq, %[[VAL_34]], %[[VAL_33]] : index // CHECK: scf.if %[[VAL_35]] { -// CHECK: %[[VAL_36:.*]] = memref.load %[[VAL_20]]{{\[}}%[[VAL_33]]] : memref -// CHECK: %[[VAL_37:.*]] = memref.load %[[VAL_11]]{{\[}}%[[VAL_32]]] : memref -// CHECK: %[[VAL_38:.*]] = arith.addi %[[VAL_32]], %[[VAL_8]] : index -// CHECK: %[[VAL_39:.*]] = memref.load %[[VAL_11]]{{\[}}%[[VAL_38]]] : memref -// CHECK: %[[VAL_40:.*]] = memref.load %[[VAL_14]]{{\[}}%[[VAL_33]]] : memref -// CHECK: %[[VAL_41:.*]] = arith.addi %[[VAL_33]], %[[VAL_8]] : index -// CHECK: %[[VAL_42:.*]] = memref.load %[[VAL_14]]{{\[}}%[[VAL_41]]] : memref -// CHECK: %[[VAL_43:.*]] = memref.load %[[VAL_17]]{{\[}}%[[VAL_33]]] : memref -// CHECK: %[[VAL_44:.*]] = arith.addi %[[VAL_33]], %[[VAL_8]] : index -// CHECK: %[[VAL_45:.*]] = memref.load %[[VAL_17]]{{\[}}%[[VAL_44]]] : memref -// CHECK: %[[VAL_46:.*]]:3 = scf.while (%[[VAL_47:.*]] = %[[VAL_37]], %[[VAL_48:.*]] = %[[VAL_40]], %[[VAL_49:.*]] = %[[VAL_43]]) : (index, index, index) -> (index, index, index) { -// CHECK: %[[VAL_50:.*]] = arith.cmpi ult, %[[VAL_47]], %[[VAL_39]] : index -// CHECK: %[[VAL_51:.*]] = arith.cmpi ult, %[[VAL_48]], %[[VAL_42]] : index -// CHECK: %[[VAL_52:.*]] = arith.andi %[[VAL_50]], %[[VAL_51]] : i1 -// CHECK: %[[VAL_53:.*]] = arith.cmpi ult, %[[VAL_49]], %[[VAL_45]] : index +// CHECK: %[[VAL_36:.*]] = memref.load %[[VAL_24]]{{\[}}%[[VAL_33]]] : memref +// CHECK: %[[VAL_37:.*]] = memref.load %[[VAL_20]]{{\[}}%[[VAL_33]]] : memref +// CHECK: %[[VAL_38:.*]] = memref.load %[[VAL_11]]{{\[}}%[[VAL_32]]] : memref +// CHECK: %[[VAL_39:.*]] = arith.addi %[[VAL_32]], %[[VAL_7]] : index +// CHECK: %[[VAL_40:.*]] = memref.load %[[VAL_11]]{{\[}}%[[VAL_39]]] : memref +// CHECK: %[[VAL_41:.*]] = memref.load %[[VAL_14]]{{\[}}%[[VAL_33]]] : memref +// CHECK: %[[VAL_42:.*]] = arith.addi %[[VAL_33]], %[[VAL_7]] : index +// CHECK: %[[VAL_43:.*]] = memref.load %[[VAL_14]]{{\[}}%[[VAL_42]]] : memref +// CHECK: %[[VAL_44:.*]] = memref.load %[[VAL_17]]{{\[}}%[[VAL_33]]] : memref +// CHECK: %[[VAL_45:.*]] = arith.addi %[[VAL_33]], %[[VAL_7]] : index +// CHECK: %[[VAL_46:.*]] = memref.load %[[VAL_17]]{{\[}}%[[VAL_45]]] : memref +// CHECK: %[[VAL_47:.*]]:4 = scf.while (%[[VAL_48:.*]] = %[[VAL_38]], %[[VAL_49:.*]] = %[[VAL_41]], %[[VAL_50:.*]] = %[[VAL_44]], %[[VAL_51:.*]] = %[[VAL_36]]) : (index, index, index, f32) -> (index, index, index, f32) { +// CHECK: %[[VAL_52:.*]] = arith.cmpi ult, %[[VAL_48]], %[[VAL_40]] : index +// CHECK: %[[VAL_53:.*]] = arith.cmpi ult, %[[VAL_49]], %[[VAL_43]] : index // CHECK: %[[VAL_54:.*]] = arith.andi %[[VAL_52]], %[[VAL_53]] : i1 -// CHECK: scf.condition(%[[VAL_54]]) %[[VAL_47]], %[[VAL_48]], %[[VAL_49]] : index, index, index +// CHECK: %[[VAL_55:.*]] = arith.cmpi ult, %[[VAL_50]], %[[VAL_46]] : index +// CHECK: %[[VAL_56:.*]] = arith.andi %[[VAL_54]], %[[VAL_55]] : i1 +// CHECK: scf.condition(%[[VAL_56]]) %[[VAL_48]], %[[VAL_49]], %[[VAL_50]], %[[VAL_51]] : index, index, index, f32 // CHECK: } do { -// CHECK: ^bb0(%[[VAL_55:.*]]: index, %[[VAL_56:.*]]: index, %[[VAL_57:.*]]: index): -// CHECK: %[[VAL_58:.*]] = memref.load %[[VAL_12]]{{\[}}%[[VAL_55]]] : memref -// CHECK: %[[VAL_59:.*]] = memref.load %[[VAL_15]]{{\[}}%[[VAL_56]]] : memref -// CHECK: %[[VAL_60:.*]] = arith.cmpi ult, %[[VAL_59]], %[[VAL_58]] : index -// CHECK: %[[VAL_61:.*]] = select %[[VAL_60]], %[[VAL_59]], %[[VAL_58]] : index -// CHECK: %[[VAL_62:.*]] = memref.load %[[VAL_18]]{{\[}}%[[VAL_57]]] : memref +// CHECK: ^bb0(%[[VAL_57:.*]]: index, %[[VAL_58:.*]]: index, %[[VAL_59:.*]]: index, %[[VAL_60:.*]]: f32): +// CHECK: %[[VAL_61:.*]] = memref.load %[[VAL_12]]{{\[}}%[[VAL_57]]] : memref +// CHECK: %[[VAL_62:.*]] = memref.load %[[VAL_15]]{{\[}}%[[VAL_58]]] : memref // CHECK: %[[VAL_63:.*]] = arith.cmpi ult, %[[VAL_62]], %[[VAL_61]] : index // CHECK: %[[VAL_64:.*]] = select %[[VAL_63]], %[[VAL_62]], %[[VAL_61]] : index -// CHECK: %[[VAL_65:.*]] = arith.cmpi eq, %[[VAL_58]], %[[VAL_64]] : index -// CHECK: %[[VAL_66:.*]] = arith.cmpi eq, %[[VAL_59]], %[[VAL_64]] : index -// CHECK: %[[VAL_67:.*]] = arith.andi %[[VAL_65]], %[[VAL_66]] : i1 -// CHECK: %[[VAL_68:.*]] = arith.cmpi eq, %[[VAL_62]], %[[VAL_64]] : index -// CHECK: %[[VAL_69:.*]] = arith.andi %[[VAL_67]], %[[VAL_68]] : i1 -// CHECK: scf.if %[[VAL_69]] { -// CHECK: %[[VAL_70:.*]] = memref.load %[[VAL_24]]{{\[}}%[[VAL_33]]] : memref -// CHECK: %[[VAL_71:.*]] = memref.load %[[VAL_13]]{{\[}}%[[VAL_55]]] : memref -// CHECK: %[[VAL_72:.*]] = memref.load %[[VAL_16]]{{\[}}%[[VAL_56]]] : memref -// CHECK: %[[VAL_73:.*]] = arith.mulf %[[VAL_71]], %[[VAL_72]] : f32 -// CHECK: %[[VAL_74:.*]] = arith.mulf %[[VAL_73]], %[[VAL_36]] : f32 -// CHECK: %[[VAL_75:.*]] = arith.mulf %[[VAL_74]], %[[VAL_25]] : f32 -// CHECK: %[[VAL_76:.*]] = memref.load %[[VAL_19]]{{\[}}%[[VAL_57]]] : memref -// CHECK: %[[VAL_77:.*]] = arith.addf %[[VAL_75]], %[[VAL_76]] : f32 -// CHECK: %[[VAL_78:.*]] = arith.addf %[[VAL_70]], %[[VAL_77]] : f32 -// CHECK: memref.store %[[VAL_78]], %[[VAL_24]]{{\[}}%[[VAL_33]]] : memref +// CHECK: %[[VAL_65:.*]] = memref.load %[[VAL_18]]{{\[}}%[[VAL_59]]] : memref +// CHECK: %[[VAL_66:.*]] = arith.cmpi ult, %[[VAL_65]], %[[VAL_64]] : index +// CHECK: %[[VAL_67:.*]] = select %[[VAL_66]], %[[VAL_65]], %[[VAL_64]] : index +// CHECK: %[[VAL_68:.*]] = arith.cmpi eq, %[[VAL_61]], %[[VAL_67]] : index +// CHECK: %[[VAL_69:.*]] = arith.cmpi eq, %[[VAL_62]], %[[VAL_67]] : index +// CHECK: %[[VAL_70:.*]] = arith.andi %[[VAL_68]], %[[VAL_69]] : i1 +// CHECK: %[[VAL_71:.*]] = arith.cmpi eq, %[[VAL_65]], %[[VAL_67]] : index +// CHECK: %[[VAL_72:.*]] = arith.andi %[[VAL_70]], %[[VAL_71]] : i1 +// CHECK: %[[VAL_73:.*]] = scf.if %[[VAL_72]] -> (f32) { +// CHECK: %[[VAL_74:.*]] = memref.load %[[VAL_13]]{{\[}}%[[VAL_57]]] : memref +// CHECK: %[[VAL_75:.*]] = memref.load %[[VAL_16]]{{\[}}%[[VAL_58]]] : memref +// CHECK: %[[VAL_76:.*]] = arith.mulf %[[VAL_74]], %[[VAL_75]] : f32 +// CHECK: %[[VAL_77:.*]] = arith.mulf %[[VAL_76]], %[[VAL_37]] : f32 +// CHECK: %[[VAL_78:.*]] = arith.mulf %[[VAL_77]], %[[VAL_25]] : f32 +// CHECK: %[[VAL_79:.*]] = memref.load %[[VAL_19]]{{\[}}%[[VAL_59]]] : memref +// CHECK: %[[VAL_80:.*]] = arith.addf %[[VAL_78]], %[[VAL_79]] : f32 +// CHECK: %[[VAL_81:.*]] = arith.addf %[[VAL_60]], %[[VAL_80]] : f32 +// CHECK: scf.yield %[[VAL_81]] : f32 // CHECK: } else { -// CHECK: %[[VAL_79:.*]] = arith.cmpi eq, %[[VAL_58]], %[[VAL_64]] : index -// CHECK: %[[VAL_80:.*]] = arith.cmpi eq, %[[VAL_59]], %[[VAL_64]] : index -// CHECK: %[[VAL_81:.*]] = arith.andi %[[VAL_79]], %[[VAL_80]] : i1 -// CHECK: scf.if %[[VAL_81]] { -// CHECK: %[[VAL_82:.*]] = memref.load %[[VAL_24]]{{\[}}%[[VAL_33]]] : memref -// CHECK: %[[VAL_83:.*]] = memref.load %[[VAL_13]]{{\[}}%[[VAL_55]]] : memref -// CHECK: %[[VAL_84:.*]] = memref.load %[[VAL_16]]{{\[}}%[[VAL_56]]] : memref -// CHECK: %[[VAL_85:.*]] = arith.mulf %[[VAL_83]], %[[VAL_84]] : f32 -// CHECK: %[[VAL_86:.*]] = arith.mulf %[[VAL_85]], %[[VAL_36]] : f32 -// CHECK: %[[VAL_87:.*]] = arith.mulf %[[VAL_86]], %[[VAL_25]] : f32 -// CHECK: %[[VAL_88:.*]] = arith.addf %[[VAL_82]], %[[VAL_87]] : f32 -// CHECK: memref.store %[[VAL_88]], %[[VAL_24]]{{\[}}%[[VAL_33]]] : memref +// CHECK: %[[VAL_82:.*]] = arith.cmpi eq, %[[VAL_61]], %[[VAL_67]] : index +// CHECK: %[[VAL_83:.*]] = arith.cmpi eq, %[[VAL_62]], %[[VAL_67]] : index +// CHECK: %[[VAL_84:.*]] = arith.andi %[[VAL_82]], %[[VAL_83]] : i1 +// CHECK: %[[VAL_85:.*]] = scf.if %[[VAL_84]] -> (f32) { +// CHECK: %[[VAL_86:.*]] = memref.load %[[VAL_13]]{{\[}}%[[VAL_57]]] : memref +// CHECK: %[[VAL_87:.*]] = memref.load %[[VAL_16]]{{\[}}%[[VAL_58]]] : memref +// CHECK: %[[VAL_88:.*]] = arith.mulf %[[VAL_86]], %[[VAL_87]] : f32 +// CHECK: %[[VAL_89:.*]] = arith.mulf %[[VAL_88]], %[[VAL_37]] : f32 +// CHECK: %[[VAL_90:.*]] = arith.mulf %[[VAL_89]], %[[VAL_25]] : f32 +// CHECK: %[[VAL_91:.*]] = arith.addf %[[VAL_60]], %[[VAL_90]] : f32 +// CHECK: scf.yield %[[VAL_91]] : f32 // CHECK: } else { -// CHECK: %[[VAL_89:.*]] = arith.cmpi eq, %[[VAL_62]], %[[VAL_64]] : index -// CHECK: scf.if %[[VAL_89]] { -// CHECK: %[[VAL_90:.*]] = memref.load %[[VAL_24]]{{\[}}%[[VAL_33]]] : memref -// CHECK: %[[VAL_91:.*]] = memref.load %[[VAL_19]]{{\[}}%[[VAL_57]]] : memref -// CHECK: %[[VAL_92:.*]] = arith.addf %[[VAL_90]], %[[VAL_91]] : f32 -// CHECK: memref.store %[[VAL_92]], %[[VAL_24]]{{\[}}%[[VAL_33]]] : memref +// CHECK: %[[VAL_92:.*]] = arith.cmpi eq, %[[VAL_65]], %[[VAL_67]] : index +// CHECK: %[[VAL_93:.*]] = scf.if %[[VAL_92]] -> (f32) { +// CHECK: %[[VAL_94:.*]] = memref.load %[[VAL_19]]{{\[}}%[[VAL_59]]] : memref +// CHECK: %[[VAL_95:.*]] = arith.addf %[[VAL_60]], %[[VAL_94]] : f32 +// CHECK: scf.yield %[[VAL_95]] : f32 // CHECK: } else { +// CHECK: scf.yield %[[VAL_60]] : f32 // CHECK: } +// CHECK: scf.yield %[[VAL_96:.*]] : f32 // CHECK: } +// CHECK: scf.yield %[[VAL_97:.*]] : f32 // CHECK: } -// CHECK: %[[VAL_93:.*]] = arith.cmpi eq, %[[VAL_58]], %[[VAL_64]] : index -// CHECK: %[[VAL_94:.*]] = arith.addi %[[VAL_55]], %[[VAL_8]] : index -// CHECK: %[[VAL_95:.*]] = select %[[VAL_93]], %[[VAL_94]], %[[VAL_55]] : index -// CHECK: %[[VAL_96:.*]] = arith.cmpi eq, %[[VAL_59]], %[[VAL_64]] : index -// CHECK: %[[VAL_97:.*]] = arith.addi %[[VAL_56]], %[[VAL_8]] : index -// CHECK: %[[VAL_98:.*]] = select %[[VAL_96]], %[[VAL_97]], %[[VAL_56]] : index -// CHECK: %[[VAL_99:.*]] = arith.cmpi eq, %[[VAL_62]], %[[VAL_64]] : index -// CHECK: %[[VAL_100:.*]] = arith.addi %[[VAL_57]], %[[VAL_8]] : index -// CHECK: %[[VAL_101:.*]] = select %[[VAL_99]], %[[VAL_100]], %[[VAL_57]] : index -// CHECK: scf.yield %[[VAL_95]], %[[VAL_98]], %[[VAL_101]] : index, index, index +// CHECK: %[[VAL_98:.*]] = arith.cmpi eq, %[[VAL_61]], %[[VAL_67]] : index +// CHECK: %[[VAL_99:.*]] = arith.addi %[[VAL_57]], %[[VAL_7]] : index +// CHECK: %[[VAL_100:.*]] = select %[[VAL_98]], %[[VAL_99]], %[[VAL_57]] : index +// CHECK: %[[VAL_101:.*]] = arith.cmpi eq, %[[VAL_62]], %[[VAL_67]] : index +// CHECK: %[[VAL_102:.*]] = arith.addi %[[VAL_58]], %[[VAL_7]] : index +// CHECK: %[[VAL_103:.*]] = select %[[VAL_101]], %[[VAL_102]], %[[VAL_58]] : index +// CHECK: %[[VAL_104:.*]] = arith.cmpi eq, %[[VAL_65]], %[[VAL_67]] : index +// CHECK: %[[VAL_105:.*]] = arith.addi %[[VAL_59]], %[[VAL_7]] : index +// CHECK: %[[VAL_106:.*]] = select %[[VAL_104]], %[[VAL_105]], %[[VAL_59]] : index +// CHECK: scf.yield %[[VAL_100]], %[[VAL_103]], %[[VAL_106]], %[[VAL_107:.*]] : index, index, index, f32 // CHECK: } -// CHECK: %[[VAL_102:.*]]:2 = scf.while (%[[VAL_103:.*]] = %[[VAL_104:.*]]#0, %[[VAL_105:.*]] = %[[VAL_104]]#1) : (index, index) -> (index, index) { -// CHECK: %[[VAL_106:.*]] = arith.cmpi ult, %[[VAL_103]], %[[VAL_39]] : index -// CHECK: %[[VAL_107:.*]] = arith.cmpi ult, %[[VAL_105]], %[[VAL_42]] : index -// CHECK: %[[VAL_108:.*]] = arith.andi %[[VAL_106]], %[[VAL_107]] : i1 -// CHECK: scf.condition(%[[VAL_108]]) %[[VAL_103]], %[[VAL_105]] : index, index +// CHECK: %[[VAL_108:.*]]:3 = scf.while (%[[VAL_109:.*]] = %[[VAL_110:.*]]#0, %[[VAL_111:.*]] = %[[VAL_110]]#1, %[[VAL_112:.*]] = %[[VAL_110]]#3) : (index, index, f32) -> (index, index, f32) { +// CHECK: %[[VAL_113:.*]] = arith.cmpi ult, %[[VAL_109]], %[[VAL_40]] : index +// CHECK: %[[VAL_114:.*]] = arith.cmpi ult, %[[VAL_111]], %[[VAL_43]] : index +// CHECK: %[[VAL_115:.*]] = arith.andi %[[VAL_113]], %[[VAL_114]] : i1 +// CHECK: scf.condition(%[[VAL_115]]) %[[VAL_109]], %[[VAL_111]], %[[VAL_112]] : index, index, f32 // CHECK: } do { -// CHECK: ^bb0(%[[VAL_109:.*]]: index, %[[VAL_110:.*]]: index): -// CHECK: %[[VAL_111:.*]] = memref.load %[[VAL_12]]{{\[}}%[[VAL_109]]] : memref -// CHECK: %[[VAL_112:.*]] = memref.load %[[VAL_15]]{{\[}}%[[VAL_110]]] : memref -// CHECK: %[[VAL_113:.*]] = arith.cmpi ult, %[[VAL_112]], %[[VAL_111]] : index -// CHECK: %[[VAL_114:.*]] = select %[[VAL_113]], %[[VAL_112]], %[[VAL_111]] : index -// CHECK: %[[VAL_115:.*]] = arith.cmpi eq, %[[VAL_111]], %[[VAL_114]] : index -// CHECK: %[[VAL_116:.*]] = arith.cmpi eq, %[[VAL_112]], %[[VAL_114]] : index -// CHECK: %[[VAL_117:.*]] = arith.andi %[[VAL_115]], %[[VAL_116]] : i1 -// CHECK: scf.if %[[VAL_117]] { -// CHECK: %[[VAL_118:.*]] = memref.load %[[VAL_24]]{{\[}}%[[VAL_33]]] : memref -// CHECK: %[[VAL_119:.*]] = memref.load %[[VAL_13]]{{\[}}%[[VAL_109]]] : memref -// CHECK: %[[VAL_120:.*]] = memref.load %[[VAL_16]]{{\[}}%[[VAL_110]]] : memref -// CHECK: %[[VAL_121:.*]] = arith.mulf %[[VAL_119]], %[[VAL_120]] : f32 -// CHECK: %[[VAL_122:.*]] = arith.mulf %[[VAL_121]], %[[VAL_36]] : f32 -// CHECK: %[[VAL_123:.*]] = arith.mulf %[[VAL_122]], %[[VAL_25]] : f32 -// CHECK: %[[VAL_124:.*]] = arith.addf %[[VAL_118]], %[[VAL_123]] : f32 -// CHECK: memref.store %[[VAL_124]], %[[VAL_24]]{{\[}}%[[VAL_33]]] : memref +// CHECK: ^bb0(%[[VAL_116:.*]]: index, %[[VAL_117:.*]]: index, %[[VAL_118:.*]]: f32): +// CHECK: %[[VAL_119:.*]] = memref.load %[[VAL_12]]{{\[}}%[[VAL_116]]] : memref +// CHECK: %[[VAL_120:.*]] = memref.load %[[VAL_15]]{{\[}}%[[VAL_117]]] : memref +// CHECK: %[[VAL_121:.*]] = arith.cmpi ult, %[[VAL_120]], %[[VAL_119]] : index +// CHECK: %[[VAL_122:.*]] = select %[[VAL_121]], %[[VAL_120]], %[[VAL_119]] : index +// CHECK: %[[VAL_123:.*]] = arith.cmpi eq, %[[VAL_119]], %[[VAL_122]] : index +// CHECK: %[[VAL_124:.*]] = arith.cmpi eq, %[[VAL_120]], %[[VAL_122]] : index +// CHECK: %[[VAL_125:.*]] = arith.andi %[[VAL_123]], %[[VAL_124]] : i1 +// CHECK: %[[VAL_126:.*]] = scf.if %[[VAL_125]] -> (f32) { +// CHECK: %[[VAL_127:.*]] = memref.load %[[VAL_13]]{{\[}}%[[VAL_116]]] : memref +// CHECK: %[[VAL_128:.*]] = memref.load %[[VAL_16]]{{\[}}%[[VAL_117]]] : memref +// CHECK: %[[VAL_129:.*]] = arith.mulf %[[VAL_127]], %[[VAL_128]] : f32 +// CHECK: %[[VAL_130:.*]] = arith.mulf %[[VAL_129]], %[[VAL_37]] : f32 +// CHECK: %[[VAL_131:.*]] = arith.mulf %[[VAL_130]], %[[VAL_25]] : f32 +// CHECK: %[[VAL_132:.*]] = arith.addf %[[VAL_118]], %[[VAL_131]] : f32 +// CHECK: scf.yield %[[VAL_132]] : f32 // CHECK: } else { +// CHECK: scf.yield %[[VAL_118]] : f32 // CHECK: } -// CHECK: %[[VAL_125:.*]] = arith.cmpi eq, %[[VAL_111]], %[[VAL_114]] : index -// CHECK: %[[VAL_126:.*]] = arith.addi %[[VAL_109]], %[[VAL_8]] : index -// CHECK: %[[VAL_127:.*]] = select %[[VAL_125]], %[[VAL_126]], %[[VAL_109]] : index -// CHECK: %[[VAL_128:.*]] = arith.cmpi eq, %[[VAL_112]], %[[VAL_114]] : index -// CHECK: %[[VAL_129:.*]] = arith.addi %[[VAL_110]], %[[VAL_8]] : index -// CHECK: %[[VAL_130:.*]] = select %[[VAL_128]], %[[VAL_129]], %[[VAL_110]] : index -// CHECK: scf.yield %[[VAL_127]], %[[VAL_130]] : index, index +// CHECK: %[[VAL_133:.*]] = arith.cmpi eq, %[[VAL_119]], %[[VAL_122]] : index +// CHECK: %[[VAL_134:.*]] = arith.addi %[[VAL_116]], %[[VAL_7]] : index +// CHECK: %[[VAL_135:.*]] = select %[[VAL_133]], %[[VAL_134]], %[[VAL_116]] : index +// CHECK: %[[VAL_136:.*]] = arith.cmpi eq, %[[VAL_120]], %[[VAL_122]] : index +// CHECK: %[[VAL_137:.*]] = arith.addi %[[VAL_117]], %[[VAL_7]] : index +// CHECK: %[[VAL_138:.*]] = select %[[VAL_136]], %[[VAL_137]], %[[VAL_117]] : index +// CHECK: scf.yield %[[VAL_135]], %[[VAL_138]], %[[VAL_139:.*]] : index, index, f32 // CHECK: } -// CHECK: %[[VAL_131:.*]] = memref.load %[[VAL_24]]{{\[}}%[[VAL_33]]] : memref -// CHECK: %[[VAL_132:.*]] = scf.for %[[VAL_133:.*]] = %[[VAL_134:.*]]#2 to %[[VAL_45]] step %[[VAL_8]] iter_args(%[[VAL_135:.*]] = %[[VAL_131]]) -> (f32) { -// CHECK: %[[VAL_136:.*]] = memref.load %[[VAL_19]]{{\[}}%[[VAL_133]]] : memref -// CHECK: %[[VAL_137:.*]] = arith.addf %[[VAL_135]], %[[VAL_136]] : f32 -// CHECK: scf.yield %[[VAL_137]] : f32 +// CHECK: %[[VAL_140:.*]] = scf.for %[[VAL_141:.*]] = %[[VAL_142:.*]]#2 to %[[VAL_46]] step %[[VAL_7]] iter_args(%[[VAL_143:.*]] = %[[VAL_144:.*]]#2) -> (f32) { +// CHECK: %[[VAL_145:.*]] = memref.load %[[VAL_19]]{{\[}}%[[VAL_141]]] : memref +// CHECK: %[[VAL_146:.*]] = arith.addf %[[VAL_143]], %[[VAL_145]] : f32 +// CHECK: scf.yield %[[VAL_146]] : f32 // CHECK: } -// CHECK: memref.store %[[VAL_138:.*]], %[[VAL_24]]{{\[}}%[[VAL_33]]] : memref +// CHECK: memref.store %[[VAL_147:.*]], %[[VAL_24]]{{\[}}%[[VAL_33]]] : memref // CHECK: } else { -// CHECK: scf.if %[[VAL_7]] { -// CHECK: %[[VAL_139:.*]] = memref.load %[[VAL_17]]{{\[}}%[[VAL_33]]] : memref -// CHECK: %[[VAL_140:.*]] = arith.addi %[[VAL_33]], %[[VAL_8]] : index -// CHECK: %[[VAL_141:.*]] = memref.load %[[VAL_17]]{{\[}}%[[VAL_140]]] : memref -// CHECK: %[[VAL_142:.*]] = memref.load %[[VAL_24]]{{\[}}%[[VAL_33]]] : memref -// CHECK: %[[VAL_143:.*]] = scf.for %[[VAL_144:.*]] = %[[VAL_139]] to %[[VAL_141]] step %[[VAL_8]] iter_args(%[[VAL_145:.*]] = %[[VAL_142]]) -> (f32) { -// CHECK: %[[VAL_146:.*]] = memref.load %[[VAL_19]]{{\[}}%[[VAL_144]]] : memref -// CHECK: %[[VAL_147:.*]] = arith.addf %[[VAL_145]], %[[VAL_146]] : f32 -// CHECK: scf.yield %[[VAL_147]] : f32 +// CHECK: scf.if %[[VAL_8]] { +// CHECK: %[[VAL_148:.*]] = memref.load %[[VAL_24]]{{\[}}%[[VAL_33]]] : memref +// CHECK: %[[VAL_149:.*]] = memref.load %[[VAL_17]]{{\[}}%[[VAL_33]]] : memref +// CHECK: %[[VAL_150:.*]] = arith.addi %[[VAL_33]], %[[VAL_7]] : index +// CHECK: %[[VAL_151:.*]] = memref.load %[[VAL_17]]{{\[}}%[[VAL_150]]] : memref +// CHECK: %[[VAL_152:.*]] = scf.for %[[VAL_153:.*]] = %[[VAL_149]] to %[[VAL_151]] step %[[VAL_7]] iter_args(%[[VAL_154:.*]] = %[[VAL_148]]) -> (f32) { +// CHECK: %[[VAL_155:.*]] = memref.load %[[VAL_19]]{{\[}}%[[VAL_153]]] : memref +// CHECK: %[[VAL_156:.*]] = arith.addf %[[VAL_154]], %[[VAL_155]] : f32 +// CHECK: scf.yield %[[VAL_156]] : f32 // CHECK: } -// CHECK: memref.store %[[VAL_148:.*]], %[[VAL_24]]{{\[}}%[[VAL_33]]] : memref +// CHECK: memref.store %[[VAL_157:.*]], %[[VAL_24]]{{\[}}%[[VAL_33]]] : memref // CHECK: } else { // CHECK: } // CHECK: } -// CHECK: %[[VAL_149:.*]] = arith.cmpi eq, %[[VAL_34]], %[[VAL_33]] : index -// CHECK: %[[VAL_150:.*]] = arith.addi %[[VAL_32]], %[[VAL_8]] : index -// CHECK: %[[VAL_151:.*]] = select %[[VAL_149]], %[[VAL_150]], %[[VAL_32]] : index -// CHECK: %[[VAL_152:.*]] = arith.addi %[[VAL_33]], %[[VAL_8]] : index -// CHECK: scf.yield %[[VAL_151]], %[[VAL_152]] : index, index +// CHECK: %[[VAL_158:.*]] = arith.cmpi eq, %[[VAL_34]], %[[VAL_33]] : index +// CHECK: %[[VAL_159:.*]] = arith.addi %[[VAL_32]], %[[VAL_7]] : index +// CHECK: %[[VAL_160:.*]] = select %[[VAL_158]], %[[VAL_159]], %[[VAL_32]] : index +// CHECK: %[[VAL_161:.*]] = arith.addi %[[VAL_33]], %[[VAL_7]] : index +// CHECK: scf.yield %[[VAL_160]], %[[VAL_161]] : index, index // CHECK: } -// CHECK: scf.for %[[VAL_153:.*]] = %[[VAL_154:.*]]#1 to %[[VAL_22]] step %[[VAL_8]] { -// CHECK: %[[VAL_155:.*]] = memref.load %[[VAL_17]]{{\[}}%[[VAL_153]]] : memref -// CHECK: %[[VAL_156:.*]] = arith.addi %[[VAL_153]], %[[VAL_8]] : index -// CHECK: %[[VAL_157:.*]] = memref.load %[[VAL_17]]{{\[}}%[[VAL_156]]] : memref -// CHECK: %[[VAL_158:.*]] = memref.load %[[VAL_24]]{{\[}}%[[VAL_153]]] : memref -// CHECK: %[[VAL_159:.*]] = scf.for %[[VAL_160:.*]] = %[[VAL_155]] to %[[VAL_157]] step %[[VAL_8]] iter_args(%[[VAL_161:.*]] = %[[VAL_158]]) -> (f32) { -// CHECK: %[[VAL_162:.*]] = memref.load %[[VAL_19]]{{\[}}%[[VAL_160]]] : memref -// CHECK: %[[VAL_163:.*]] = arith.addf %[[VAL_161]], %[[VAL_162]] : f32 -// CHECK: scf.yield %[[VAL_163]] : f32 +// CHECK: scf.for %[[VAL_162:.*]] = %[[VAL_163:.*]]#1 to %[[VAL_22]] step %[[VAL_7]] { +// CHECK: %[[VAL_164:.*]] = memref.load %[[VAL_24]]{{\[}}%[[VAL_162]]] : memref +// CHECK: %[[VAL_165:.*]] = memref.load %[[VAL_17]]{{\[}}%[[VAL_162]]] : memref +// CHECK: %[[VAL_166:.*]] = arith.addi %[[VAL_162]], %[[VAL_7]] : index +// CHECK: %[[VAL_167:.*]] = memref.load %[[VAL_17]]{{\[}}%[[VAL_166]]] : memref +// CHECK: %[[VAL_168:.*]] = scf.for %[[VAL_169:.*]] = %[[VAL_165]] to %[[VAL_167]] step %[[VAL_7]] iter_args(%[[VAL_170:.*]] = %[[VAL_164]]) -> (f32) { +// CHECK: %[[VAL_171:.*]] = memref.load %[[VAL_19]]{{\[}}%[[VAL_169]]] : memref +// CHECK: %[[VAL_172:.*]] = arith.addf %[[VAL_170]], %[[VAL_171]] : f32 +// CHECK: scf.yield %[[VAL_172]] : f32 // CHECK: } -// CHECK: memref.store %[[VAL_164:.*]], %[[VAL_24]]{{\[}}%[[VAL_153]]] : memref +// CHECK: memref.store %[[VAL_173:.*]], %[[VAL_24]]{{\[}}%[[VAL_162]]] : memref // CHECK: } -// CHECK: %[[VAL_165:.*]] = memref.tensor_load %[[VAL_24]] : memref -// CHECK: return %[[VAL_165]] : tensor +// CHECK: %[[VAL_174:.*]] = memref.tensor_load %[[VAL_24]] : memref +// CHECK: return %[[VAL_174]] : tensor // CHECK: } func @sum_kernel_with_inv(%arga: tensor, %argb: tensor, diff --git a/mlir/test/Dialect/SparseTensor/sparse_3d.mlir b/mlir/test/Dialect/SparseTensor/sparse_3d.mlir index 734ea159b8207..9070ac36d3b16 100644 --- a/mlir/test/Dialect/SparseTensor/sparse_3d.mlir +++ b/mlir/test/Dialect/SparseTensor/sparse_3d.mlir @@ -1194,39 +1194,41 @@ func @kernel_3d(%arga: tensor, } // CHECK-LABEL: func @sum_reduction( -// CHECK-SAME: %[[VAL_0:.*]]: tensor<10x20x30xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed", "compressed", "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>>, -// CHECK-SAME: %[[VAL_1:.*]]: tensor) -> tensor { -// CHECK-DAG: %[[VAL_2:.*]] = arith.constant 2 : index -// CHECK-DAG: %[[VAL_3:.*]] = arith.constant 0 : index -// CHECK-DAG: %[[VAL_4:.*]] = arith.constant 1 : index -// CHECK: %[[VAL_5:.*]] = sparse_tensor.pointers %[[VAL_0]], %[[VAL_3]] : tensor<10x20x30xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed", "compressed", "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref -// CHECK: %[[VAL_6:.*]] = sparse_tensor.pointers %[[VAL_0]], %[[VAL_4]] : tensor<10x20x30xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed", "compressed", "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref -// CHECK: %[[VAL_7:.*]] = sparse_tensor.pointers %[[VAL_0]], %[[VAL_2]] : tensor<10x20x30xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed", "compressed", "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref -// CHECK: %[[VAL_8:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<10x20x30xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed", "compressed", "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref +// CHECK-SAME: %[[VAL_0:.*]]: tensor<10x20x30xf32, #sparse_tensor.encoding<{{{.*}}}>>, +// CHECK-SAME: %[[VAL_1:.*]]: tensor) -> tensor { +// CHECK: %[[VAL_2:.*]] = arith.constant 0 : index +// CHECK: %[[VAL_3:.*]] = arith.constant 1 : index +// CHECK: %[[VAL_4:.*]] = arith.constant 2 : index +// CHECK: %[[VAL_5:.*]] = sparse_tensor.pointers %[[VAL_0]], %[[VAL_2]] : tensor<10x20x30xf32, #sparse_tensor.encoding<{{{.*}}>> +// CHECK: %[[VAL_6:.*]] = sparse_tensor.pointers %[[VAL_0]], %[[VAL_3]] : tensor<10x20x30xf32, #sparse_tensor.encoding<{{{.*}}>> +// CHECK: %[[VAL_7:.*]] = sparse_tensor.pointers %[[VAL_0]], %[[VAL_4]] : tensor<10x20x30xf32, #sparse_tensor.encoding<{{{.*}}}>> +// CHECK: %[[VAL_8:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<10x20x30xf32, #sparse_tensor.encoding<{{{.*}}}>> // CHECK: %[[VAL_9:.*]] = memref.buffer_cast %[[VAL_1]] : memref // CHECK: %[[VAL_10:.*]] = memref.alloc() : memref // CHECK: memref.copy %[[VAL_9]], %[[VAL_10]] : memref to memref -// CHECK: %[[VAL_11:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_3]]] : memref -// CHECK: %[[VAL_12:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_4]]] : memref -// CHECK: scf.for %[[VAL_13:.*]] = %[[VAL_11]] to %[[VAL_12]] step %[[VAL_4]] { -// CHECK: %[[VAL_14:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_13]]] : memref -// CHECK: %[[VAL_15:.*]] = arith.addi %[[VAL_13]], %[[VAL_4]] : index -// CHECK: %[[VAL_16:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_15]]] : memref -// CHECK: scf.for %[[VAL_17:.*]] = %[[VAL_14]] to %[[VAL_16]] step %[[VAL_4]] { -// CHECK: %[[VAL_18:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_17]]] : memref -// CHECK: %[[VAL_19:.*]] = arith.addi %[[VAL_17]], %[[VAL_4]] : index -// CHECK: %[[VAL_20:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_19]]] : memref -// CHECK: %[[VAL_21:.*]] = memref.load %[[VAL_10]][] : memref -// CHECK: %[[VAL_22:.*]] = scf.for %[[VAL_23:.*]] = %[[VAL_18]] to %[[VAL_20]] step %[[VAL_4]] iter_args(%[[VAL_24:.*]] = %[[VAL_21]]) -> (f32) { -// CHECK: %[[VAL_25:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_23]]] : memref -// CHECK: %[[VAL_26:.*]] = arith.addf %[[VAL_24]], %[[VAL_25]] : f32 -// CHECK: scf.yield %[[VAL_26]] : f32 +// CHECK: %[[VAL_11:.*]] = memref.load %[[VAL_10]][] : memref +// CHECK: %[[VAL_12:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_2]]] : memref +// CHECK: %[[VAL_13:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_3]]] : memref +// CHECK: %[[VAL_14:.*]] = scf.for %[[VAL_15:.*]] = %[[VAL_12]] to %[[VAL_13]] step %[[VAL_3]] iter_args(%[[VAL_16:.*]] = %[[VAL_11]]) -> (f32) { +// CHECK: %[[VAL_17:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_15]]] : memref +// CHECK: %[[VAL_18:.*]] = arith.addi %[[VAL_15]], %[[VAL_3]] : index +// CHECK: %[[VAL_19:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_18]]] : memref +// CHECK: %[[VAL_20:.*]] = scf.for %[[VAL_21:.*]] = %[[VAL_17]] to %[[VAL_19]] step %[[VAL_3]] iter_args(%[[VAL_22:.*]] = %[[VAL_16]]) -> (f32) { +// CHECK: %[[VAL_23:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_21]]] : memref +// CHECK: %[[VAL_24:.*]] = arith.addi %[[VAL_21]], %[[VAL_3]] : index +// CHECK: %[[VAL_25:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_24]]] : memref +// CHECK: %[[VAL_26:.*]] = scf.for %[[VAL_27:.*]] = %[[VAL_23]] to %[[VAL_25]] step %[[VAL_3]] iter_args(%[[VAL_28:.*]] = %[[VAL_22]]) -> (f32) { +// CHECK: %[[VAL_29:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_27]]] : memref +// CHECK: %[[VAL_30:.*]] = arith.addf %[[VAL_28]], %[[VAL_29]] : f32 +// CHECK: scf.yield %[[VAL_30]] : f32 // CHECK: } -// CHECK: memref.store %[[VAL_27:.*]], %[[VAL_10]][] : memref +// CHECK: scf.yield %[[VAL_26]] : f32 // CHECK: } +// CHECK: scf.yield %[[VAL_20]] : f32 // CHECK: } -// CHECK: %[[VAL_28:.*]] = memref.tensor_load %[[VAL_10]] : memref -// CHECK: return %[[VAL_28]] : tensor +// CHECK: memref.store %[[VAL_14]], %[[VAL_10]][] : memref +// CHECK: %[[VAL_34:.*]] = memref.tensor_load %[[VAL_10]] : memref +// CHECK: return %[[VAL_34]] : tensor // CHECK: } func @sum_reduction(%arga: tensor<10x20x30xf32, #Tsss>, %argx: tensor) -> tensor { %0 = linalg.generic #trait_sum_reduction @@ -1250,35 +1252,37 @@ func @sum_reduction(%arga: tensor<10x20x30xf32, #Tsss>, %argx: tensor) -> t } // CHECK-LABEL: func @sum_reduction_inv( -// CHECK-SAME: %[[VAL_0:.*]]: tensor, -// CHECK-SAME: %[[VAL_1:.*]]: tensor>, -// CHECK-SAME: %[[VAL_2:.*]]: tensor) -> tensor { -// CHECK-DAG: %[[VAL_3:.*]] = arith.constant 2 : index -// CHECK-DAG: %[[VAL_4:.*]] = arith.constant 0 : index -// CHECK-DAG: %[[VAL_5:.*]] = arith.constant 1 : index -// CHECK: %[[VAL_6:.*]] = tensor.dim %[[VAL_0]], %[[VAL_5]] : tensor -// CHECK: %[[VAL_7:.*]] = tensor.dim %[[VAL_0]], %[[VAL_3]] : tensor +// CHECK-SAME: %[[VAL_0:.*]]: tensor, +// CHECK-SAME: %[[VAL_1:.*]]: tensor> +// CHECK-SAME: %[[VAL_2:.*]]: tensor) -> tensor { +// CHECK: %[[VAL_3:.*]] = arith.constant 1 : index +// CHECK: %[[VAL_4:.*]] = arith.constant 2 : index +// CHECK: %[[VAL_5:.*]] = arith.constant 0 : index +// CHECK: %[[VAL_6:.*]] = tensor.dim %[[VAL_0]], %[[VAL_3]] : tensor +// CHECK: %[[VAL_7:.*]] = tensor.dim %[[VAL_0]], %[[VAL_4]] : tensor // CHECK: %[[VAL_8:.*]] = memref.buffer_cast %[[VAL_0]] : memref -// CHECK: %[[VAL_9:.*]] = tensor.dim %[[VAL_1]], %[[VAL_4]] : tensor> -// CHECK: %[[VAL_10:.*]] = sparse_tensor.values %[[VAL_1]] : tensor> to memref +// CHECK: %[[VAL_9:.*]] = tensor.dim %[[VAL_1]], %[[VAL_5]] : tensor> +// CHECK: %[[VAL_10:.*]] = sparse_tensor.values %[[VAL_1]] : tensor> // CHECK: %[[VAL_11:.*]] = memref.buffer_cast %[[VAL_2]] : memref // CHECK: %[[VAL_12:.*]] = memref.alloc() : memref // CHECK: memref.copy %[[VAL_11]], %[[VAL_12]] : memref to memref -// CHECK: scf.for %[[VAL_13:.*]] = %[[VAL_4]] to %[[VAL_9]] step %[[VAL_5]] { -// CHECK: %[[VAL_14:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_13]]] : memref -// CHECK: scf.for %[[VAL_15:.*]] = %[[VAL_4]] to %[[VAL_6]] step %[[VAL_5]] { -// CHECK: %[[VAL_16:.*]] = memref.load %[[VAL_12]][] : memref -// CHECK: %[[VAL_17:.*]] = scf.for %[[VAL_18:.*]] = %[[VAL_4]] to %[[VAL_7]] step %[[VAL_5]] iter_args(%[[VAL_19:.*]] = %[[VAL_16]]) -> (f32) { -// CHECK: %[[VAL_20:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_13]], %[[VAL_15]], %[[VAL_18]]] : memref -// CHECK: %[[VAL_21:.*]] = arith.mulf %[[VAL_20]], %[[VAL_14]] : f32 -// CHECK: %[[VAL_22:.*]] = arith.addf %[[VAL_19]], %[[VAL_21]] : f32 -// CHECK: scf.yield %[[VAL_22]] : f32 +// CHECK: %[[VAL_13:.*]] = memref.load %[[VAL_12]][] : memref +// CHECK: %[[VAL_14:.*]] = scf.for %[[VAL_15:.*]] = %[[VAL_5]] to %[[VAL_9]] step %[[VAL_3]] iter_args(%[[VAL_16:.*]] = %[[VAL_13]]) -> (f32) { +// CHECK: %[[VAL_17:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_15]]] : memref +// CHECK: %[[VAL_18:.*]] = scf.for %[[VAL_19:.*]] = %[[VAL_5]] to %[[VAL_6]] step %[[VAL_3]] iter_args(%[[VAL_20:.*]] = %[[VAL_16]]) -> (f32) { +// CHECK: %[[VAL_21:.*]] = scf.for %[[VAL_22:.*]] = %[[VAL_5]] to %[[VAL_7]] step %[[VAL_3]] iter_args(%[[VAL_23:.*]] = %[[VAL_20]]) -> (f32) { +// CHECK: %[[VAL_24:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_15]], %[[VAL_19]], %[[VAL_22]]] : memref +// CHECK: %[[VAL_25:.*]] = arith.mulf %[[VAL_24]], %[[VAL_17]] : f32 +// CHECK: %[[VAL_26:.*]] = arith.addf %[[VAL_23]], %[[VAL_25]] : f32 +// CHECK: scf.yield %[[VAL_26]] : f32 // CHECK: } -// CHECK: memref.store %[[VAL_23:.*]], %[[VAL_12]][] : memref +// CHECK: scf.yield %[[VAL_21]] : f32 // CHECK: } +// CHECK: scf.yield %[[VAL_18]] : f32 // CHECK: } -// CHECK: %[[VAL_24:.*]] = memref.tensor_load %[[VAL_12]] : memref -// CHECK: return %[[VAL_24]] : tensor +// CHECK: memref.store %[[VAL_14]], %[[VAL_12]][] : memref +// CHECK: %[[VAL_30:.*]] = memref.tensor_load %[[VAL_12]] : memref +// CHECK: return %[[VAL_30]] : tensor // CHECK: } func @sum_reduction_inv(%arga: tensor, %argb: tensor, diff --git a/mlir/test/Dialect/SparseTensor/sparse_lower.mlir b/mlir/test/Dialect/SparseTensor/sparse_lower.mlir index 2ad36917530dd..363a0281af7b6 100644 --- a/mlir/test/Dialect/SparseTensor/sparse_lower.mlir +++ b/mlir/test/Dialect/SparseTensor/sparse_lower.mlir @@ -21,24 +21,24 @@ } // CHECK-HIR-LABEL: func @matvec( -// CHECK-HIR-SAME: %[[VAL_0:.*]]: tensor<32x64xf64, #sparse_tensor.encoding<{ dimLevelType = [ "dense", "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>>, -// CHECK-HIR-SAME: %[[VAL_1:.*]]: tensor<64xf64>, -// CHECK-HIR-SAME: %[[VAL_2:.*]]: tensor<32xf64>) -> tensor<32xf64> { +// CHECK-HIR-SAME: %[[VAL_0:.*]]: tensor<32x64xf64, #sparse_tensor.encoding<{{{.*}}}>> +// CHECK-HIR-SAME: %[[VAL_1:.*]]: tensor<64xf64>, +// CHECK-HIR-SAME: %[[VAL_2:.*]]: tensor<32xf64>) -> tensor<32xf64> { // CHECK-HIR-DAG: %[[VAL_3:.*]] = arith.constant 32 : index // CHECK-HIR-DAG: %[[VAL_4:.*]] = arith.constant 0 : index // CHECK-HIR-DAG: %[[VAL_5:.*]] = arith.constant 1 : index -// CHECK-HIR: %[[VAL_6:.*]] = sparse_tensor.pointers %[[VAL_0]], %[[VAL_5]] : tensor<32x64xf64, #sparse_tensor.encoding<{ dimLevelType = [ "dense", "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref -// CHECK-HIR: %[[VAL_7:.*]] = sparse_tensor.indices %[[VAL_0]], %[[VAL_5]] : tensor<32x64xf64, #sparse_tensor.encoding<{ dimLevelType = [ "dense", "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref -// CHECK-HIR: %[[VAL_8:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x64xf64, #sparse_tensor.encoding<{ dimLevelType = [ "dense", "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref +// CHECK-HIR: %[[VAL_6:.*]] = sparse_tensor.pointers %[[VAL_0]], %[[VAL_5]] : tensor<32x64xf64, #sparse_tensor.encoding<{{{.*}}}>> +// CHECK-HIR: %[[VAL_7:.*]] = sparse_tensor.indices %[[VAL_0]], %[[VAL_5]] : tensor<32x64xf64, #sparse_tensor.encoding<{{{.*}}}>> +// CHECK-HIR: %[[VAL_8:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x64xf64, #sparse_tensor.encoding<{{{.*}}}>> // CHECK-HIR: %[[VAL_9:.*]] = memref.buffer_cast %[[VAL_1]] : memref<64xf64> // CHECK-HIR: %[[VAL_10:.*]] = memref.buffer_cast %[[VAL_2]] : memref<32xf64> // CHECK-HIR: %[[VAL_11:.*]] = memref.alloc() : memref<32xf64> // CHECK-HIR: memref.copy %[[VAL_10]], %[[VAL_11]] : memref<32xf64> to memref<32xf64> // CHECK-HIR: scf.for %[[VAL_12:.*]] = %[[VAL_4]] to %[[VAL_3]] step %[[VAL_5]] { -// CHECK-HIR: %[[VAL_13:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_12]]] : memref -// CHECK-HIR: %[[VAL_14:.*]] = arith.addi %[[VAL_12]], %[[VAL_5]] : index -// CHECK-HIR: %[[VAL_15:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_14]]] : memref -// CHECK-HIR: %[[VAL_16:.*]] = memref.load %[[VAL_11]]{{\[}}%[[VAL_12]]] : memref<32xf64> +// CHECK-HIR-DAG: %[[VAL_13:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_12]]] : memref +// CHECK-HIR-DAG: %[[VAL_14:.*]] = arith.addi %[[VAL_12]], %[[VAL_5]] : index +// CHECK-HIR-DAG: %[[VAL_15:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_14]]] : memref +// CHECK-HIR-DAG: %[[VAL_16:.*]] = memref.load %[[VAL_11]]{{\[}}%[[VAL_12]]] : memref<32xf64> // CHECK-HIR: %[[VAL_17:.*]] = scf.for %[[VAL_18:.*]] = %[[VAL_13]] to %[[VAL_15]] step %[[VAL_5]] iter_args(%[[VAL_19:.*]] = %[[VAL_16]]) -> (f64) { // CHECK-HIR: %[[VAL_20:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_18]]] : memref // CHECK-HIR: %[[VAL_21:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_18]]] : memref @@ -47,16 +47,16 @@ // CHECK-HIR: %[[VAL_24:.*]] = arith.addf %[[VAL_19]], %[[VAL_23]] : f64 // CHECK-HIR: scf.yield %[[VAL_24]] : f64 // CHECK-HIR: } -// CHECK-HIR: memref.store %[[VAL_25:.*]], %[[VAL_11]]{{\[}}%[[VAL_12]]] : memref<32xf64> +// CHECK-HIR: memref.store %[[VAL_17]], %[[VAL_11]]{{\[}}%[[VAL_12]]] : memref<32xf64> // CHECK-HIR: } // CHECK-HIR: %[[VAL_26:.*]] = memref.tensor_load %[[VAL_11]] : memref<32xf64> // CHECK-HIR: return %[[VAL_26]] : tensor<32xf64> // CHECK-HIR: } // CHECK-MIR-LABEL: func @matvec( -// CHECK-MIR-SAME: %[[VAL_0:.*]]: !llvm.ptr, -// CHECK-MIR-SAME: %[[VAL_1:.*]]: tensor<64xf64>, -// CHECK-MIR-SAME: %[[VAL_2:.*]]: tensor<32xf64>) -> tensor<32xf64> { +// CHECK-MIR-SAME: %[[VAL_0:.*]]: !llvm.ptr, +// CHECK-MIR-SAME: %[[VAL_1:.*]]: tensor<64xf64>, +// CHECK-MIR-SAME: %[[VAL_2:.*]]: tensor<32xf64>) -> tensor<32xf64> { // CHECK-MIR-DAG: %[[VAL_3:.*]] = arith.constant 32 : index // CHECK-MIR-DAG: %[[VAL_4:.*]] = arith.constant 0 : index // CHECK-MIR-DAG: %[[VAL_5:.*]] = arith.constant 1 : index @@ -68,10 +68,10 @@ // CHECK-MIR: %[[VAL_11:.*]] = memref.alloc() : memref<32xf64> // CHECK-MIR: memref.copy %[[VAL_10]], %[[VAL_11]] : memref<32xf64> to memref<32xf64> // CHECK-MIR: scf.for %[[VAL_14:.*]] = %[[VAL_4]] to %[[VAL_3]] step %[[VAL_5]] { -// CHECK-MIR: %[[VAL_15:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_14]]] : memref -// CHECK-MIR: %[[VAL_16:.*]] = arith.addi %[[VAL_14]], %[[VAL_5]] : index -// CHECK-MIR: %[[VAL_17:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_16]]] : memref -// CHECK-MIR: %[[VAL_18:.*]] = memref.load %[[VAL_11]]{{\[}}%[[VAL_14]]] : memref<32xf64> +// CHECK-MIR-DAG: %[[VAL_15:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_14]]] : memref +// CHECK-MIR-DAG: %[[VAL_16:.*]] = arith.addi %[[VAL_14]], %[[VAL_5]] : index +// CHECK-MIR-DAG: %[[VAL_17:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_16]]] : memref +// CHECK-MIR-DAG: %[[VAL_18:.*]] = memref.load %[[VAL_11]]{{\[}}%[[VAL_14]]] : memref<32xf64> // CHECK-MIR: %[[VAL_19:.*]] = scf.for %[[VAL_20:.*]] = %[[VAL_15]] to %[[VAL_17]] step %[[VAL_5]] iter_args(%[[VAL_21:.*]] = %[[VAL_18]]) -> (f64) { // CHECK-MIR: %[[VAL_22:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_20]]] : memref // CHECK-MIR: %[[VAL_23:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_20]]] : memref @@ -80,16 +80,16 @@ // CHECK-MIR: %[[VAL_26:.*]] = arith.addf %[[VAL_21]], %[[VAL_25]] : f64 // CHECK-MIR: scf.yield %[[VAL_26]] : f64 // CHECK-MIR: } -// CHECK-MIR: memref.store %[[VAL_27:.*]], %[[VAL_11]]{{\[}}%[[VAL_14]]] : memref<32xf64> +// CHECK-MIR: memref.store %[[VAL_19]], %[[VAL_11]]{{\[}}%[[VAL_14]]] : memref<32xf64> // CHECK-MIR: } // CHECK-MIR: %[[VAL_28:.*]] = memref.tensor_load %[[VAL_11]] : memref<32xf64> // CHECK-MIR: return %[[VAL_28]] : tensor<32xf64> // CHECK-MIR: } // CHECK-LIR-LABEL: func @matvec( -// CHECK-LIR-SAME: %[[VAL_0:.*]]: !llvm.ptr, -// CHECK-LIR-SAME: %[[VAL_1:.*]]: memref<64xf64>, -// CHECK-LIR-SAME: %[[VAL_2:.*]]: memref<32xf64>) -> memref<32xf64> { +// CHECK-LIR-SAME: %[[VAL_0:.*]]: !llvm.ptr, +// CHECK-LIR-SAME: %[[VAL_1:.*]]: memref<64xf64>, +// CHECK-LIR-SAME: %[[VAL_2:.*]]: memref<32xf64>) -> memref<32xf64> { // CHECK-LIR-DAG: %[[VAL_3:.*]] = arith.constant 32 : index // CHECK-LIR-DAG: %[[VAL_4:.*]] = arith.constant 0 : index // CHECK-LIR-DAG: %[[VAL_5:.*]] = arith.constant 1 : index @@ -99,10 +99,10 @@ // CHECK-LIR: %[[VAL_9:.*]] = memref.alloc() : memref<32xf64> // CHECK-LIR: memref.copy %[[VAL_2]], %[[VAL_9]] : memref<32xf64> to memref<32xf64> // CHECK-LIR: scf.for %[[VAL_12:.*]] = %[[VAL_4]] to %[[VAL_3]] step %[[VAL_5]] { -// CHECK-LIR: %[[VAL_13:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_12]]] : memref -// CHECK-LIR: %[[VAL_14:.*]] = arith.addi %[[VAL_12]], %[[VAL_5]] : index -// CHECK-LIR: %[[VAL_15:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_14]]] : memref -// CHECK-LIR: %[[VAL_16:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_12]]] : memref<32xf64> +// CHECK-LIR-DAG: %[[VAL_13:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_12]]] : memref +// CHECK-LIR-DAG: %[[VAL_14:.*]] = arith.addi %[[VAL_12]], %[[VAL_5]] : index +// CHECK-LIR-DAG: %[[VAL_15:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_14]]] : memref +// CHECK-LIR-DAG: %[[VAL_16:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_12]]] : memref<32xf64> // CHECK-LIR: %[[VAL_17:.*]] = scf.for %[[VAL_18:.*]] = %[[VAL_13]] to %[[VAL_15]] step %[[VAL_5]] iter_args(%[[VAL_19:.*]] = %[[VAL_16]]) -> (f64) { // CHECK-LIR: %[[VAL_20:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_18]]] : memref // CHECK-LIR: %[[VAL_21:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_18]]] : memref @@ -111,7 +111,7 @@ // CHECK-LIR: %[[VAL_24:.*]] = arith.addf %[[VAL_19]], %[[VAL_23]] : f64 // CHECK-LIR: scf.yield %[[VAL_24]] : f64 // CHECK-LIR: } -// CHECK-LIR: memref.store %[[VAL_25:.*]], %[[VAL_9]]{{\[}}%[[VAL_12]]] : memref<32xf64> +// CHECK-LIR: memref.store %[[VAL_17]], %[[VAL_9]]{{\[}}%[[VAL_12]]] : memref<32xf64> // CHECK-LIR: } // CHECK-LIR: return %[[VAL_9]] : memref<32xf64> // CHECK-LIR: } diff --git a/mlir/test/Dialect/SparseTensor/sparse_lower_inplace.mlir b/mlir/test/Dialect/SparseTensor/sparse_lower_inplace.mlir index 40bc39f4605b0..728ef0d184fcd 100644 --- a/mlir/test/Dialect/SparseTensor/sparse_lower_inplace.mlir +++ b/mlir/test/Dialect/SparseTensor/sparse_lower_inplace.mlir @@ -21,22 +21,22 @@ } // CHECK-HIR-LABEL: func @matvec( -// CHECK-HIR-SAME: %[[VAL_0:.*]]: tensor<32x64xf64, #sparse_tensor.encoding<{ dimLevelType = [ "dense", "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>>, -// CHECK-HIR-SAME: %[[VAL_1:.*]]: tensor<64xf64>, -// CHECK-HIR-SAME: %[[VAL_2:.*]]: tensor<32xf64> {linalg.inplaceable = true}) -> tensor<32xf64> { +// CHECK-HIR-SAME: %[[VAL_0:.*]]: tensor<32x64xf64, #sparse_tensor.encoding<{{{.*}}}>>, +// CHECK-HIR-SAME: %[[VAL_1:.*]]: tensor<64xf64>, +// CHECK-HIR-SAME: %[[VAL_2:.*]]: tensor<32xf64> {linalg.inplaceable = true}) -> tensor<32xf64> { // CHECK-HIR-DAG: %[[VAL_3:.*]] = arith.constant 32 : index // CHECK-HIR-DAG: %[[VAL_4:.*]] = arith.constant 0 : index // CHECK-HIR-DAG: %[[VAL_5:.*]] = arith.constant 1 : index -// CHECK-HIR: %[[VAL_6:.*]] = sparse_tensor.pointers %[[VAL_0]], %[[VAL_5]] : tensor<32x64xf64, #sparse_tensor.encoding<{ dimLevelType = [ "dense", "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref -// CHECK-HIR: %[[VAL_7:.*]] = sparse_tensor.indices %[[VAL_0]], %[[VAL_5]] : tensor<32x64xf64, #sparse_tensor.encoding<{ dimLevelType = [ "dense", "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref -// CHECK-HIR: %[[VAL_8:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x64xf64, #sparse_tensor.encoding<{ dimLevelType = [ "dense", "compressed" ], pointerBitWidth = 0, indexBitWidth = 0 }>> to memref +// CHECK-HIR: %[[VAL_6:.*]] = sparse_tensor.pointers %[[VAL_0]], %[[VAL_5]] : tensor<32x64xf64, #sparse_tensor.encoding<{{{.*}}}>> +// CHECK-HIR: %[[VAL_7:.*]] = sparse_tensor.indices %[[VAL_0]], %[[VAL_5]] : tensor<32x64xf64, #sparse_tensor.encoding<{{{.*}}}>> +// CHECK-HIR: %[[VAL_8:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x64xf64, #sparse_tensor.encoding<{{{.*}}}>> // CHECK-HIR: %[[VAL_9:.*]] = memref.buffer_cast %[[VAL_1]] : memref<64xf64> // CHECK-HIR: %[[VAL_10:.*]] = memref.buffer_cast %[[VAL_2]] : memref<32xf64> // CHECK-HIR: scf.for %[[VAL_11:.*]] = %[[VAL_4]] to %[[VAL_3]] step %[[VAL_5]] { -// CHECK-HIR: %[[VAL_12:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_11]]] : memref -// CHECK-HIR: %[[VAL_13:.*]] = arith.addi %[[VAL_11]], %[[VAL_5]] : index -// CHECK-HIR: %[[VAL_14:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_13]]] : memref -// CHECK-HIR: %[[VAL_15:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_11]]] : memref<32xf64> +// CHECK-HIR-DAG: %[[VAL_12:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_11]]] : memref +// CHECK-HIR-DAG: %[[VAL_13:.*]] = arith.addi %[[VAL_11]], %[[VAL_5]] : index +// CHECK-HIR-DAG: %[[VAL_14:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_13]]] : memref +// CHECK-HIR-DAG: %[[VAL_15:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_11]]] : memref<32xf64> // CHECK-HIR: %[[VAL_16:.*]] = scf.for %[[VAL_17:.*]] = %[[VAL_12]] to %[[VAL_14]] step %[[VAL_5]] iter_args(%[[VAL_18:.*]] = %[[VAL_15]]) -> (f64) { // CHECK-HIR: %[[VAL_19:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_17]]] : memref // CHECK-HIR: %[[VAL_20:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_17]]] : memref @@ -45,16 +45,16 @@ // CHECK-HIR: %[[VAL_23:.*]] = arith.addf %[[VAL_18]], %[[VAL_22]] : f64 // CHECK-HIR: scf.yield %[[VAL_23]] : f64 // CHECK-HIR: } -// CHECK-HIR: memref.store %[[VAL_24:.*]], %[[VAL_10]]{{\[}}%[[VAL_11]]] : memref<32xf64> +// CHECK-HIR: memref.store %[[VAL_16]], %[[VAL_10]]{{\[}}%[[VAL_11]]] : memref<32xf64> // CHECK-HIR: } // CHECK-HIR: %[[VAL_25:.*]] = memref.tensor_load %[[VAL_10]] : memref<32xf64> // CHECK-HIR: return %[[VAL_25]] : tensor<32xf64> // CHECK-HIR: } // CHECK-MIR-LABEL: func @matvec( -// CHECK-MIR-SAME: %[[VAL_0:.*]]: !llvm.ptr, -// CHECK-MIR-SAME: %[[VAL_1:.*]]: tensor<64xf64>, -// CHECK-MIR-SAME: %[[VAL_2:.*]]: tensor<32xf64> {linalg.inplaceable = true}) -> tensor<32xf64> { +// CHECK-MIR-SAME: %[[VAL_0:.*]]: !llvm.ptr, +// CHECK-MIR-SAME: %[[VAL_1:.*]]: tensor<64xf64>, +// CHECK-MIR-SAME: %[[VAL_2:.*]]: tensor<32xf64> {linalg.inplaceable = true}) -> tensor<32xf64> { // CHECK-MIR-DAG: %[[VAL_3:.*]] = arith.constant 32 : index // CHECK-MIR-DAG: %[[VAL_4:.*]] = arith.constant 0 : index // CHECK-MIR-DAG: %[[VAL_5:.*]] = arith.constant 1 : index @@ -64,10 +64,10 @@ // CHECK-MIR: %[[VAL_9:.*]] = memref.buffer_cast %[[VAL_1]] : memref<64xf64> // CHECK-MIR: %[[VAL_10:.*]] = memref.buffer_cast %[[VAL_2]] : memref<32xf64> // CHECK-MIR: scf.for %[[VAL_11:.*]] = %[[VAL_4]] to %[[VAL_3]] step %[[VAL_5]] { -// CHECK-MIR: %[[VAL_12:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_11]]] : memref -// CHECK-MIR: %[[VAL_13:.*]] = arith.addi %[[VAL_11]], %[[VAL_5]] : index -// CHECK-MIR: %[[VAL_14:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_13]]] : memref -// CHECK-MIR: %[[VAL_15:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_11]]] : memref<32xf64> +// CHECK-MIR-DAG: %[[VAL_12:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_11]]] : memref +// CHECK-MIR-DAG: %[[VAL_13:.*]] = arith.addi %[[VAL_11]], %[[VAL_5]] : index +// CHECK-MIR-DAG: %[[VAL_14:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_13]]] : memref +// CHECK-MIR-DAG: %[[VAL_15:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_11]]] : memref<32xf64> // CHECK-MIR: %[[VAL_16:.*]] = scf.for %[[VAL_17:.*]] = %[[VAL_12]] to %[[VAL_14]] step %[[VAL_5]] iter_args(%[[VAL_18:.*]] = %[[VAL_15]]) -> (f64) { // CHECK-MIR: %[[VAL_19:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_17]]] : memref // CHECK-MIR: %[[VAL_20:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_17]]] : memref @@ -76,16 +76,16 @@ // CHECK-MIR: %[[VAL_23:.*]] = arith.addf %[[VAL_18]], %[[VAL_22]] : f64 // CHECK-MIR: scf.yield %[[VAL_23]] : f64 // CHECK-MIR: } -// CHECK-MIR: memref.store %[[VAL_24:.*]], %[[VAL_10]]{{\[}}%[[VAL_11]]] : memref<32xf64> +// CHECK-MIR: memref.store %[[VAL_16]], %[[VAL_10]]{{\[}}%[[VAL_11]]] : memref<32xf64> // CHECK-MIR: } // CHECK-MIR: %[[VAL_25:.*]] = memref.tensor_load %[[VAL_10]] : memref<32xf64> // CHECK-MIR: return %[[VAL_25]] : tensor<32xf64> // CHECK-MIR: } // CHECK-LIR-LABEL: func @matvec( -// CHECK-LIR-SAME: %[[VAL_0:.*]]: !llvm.ptr, -// CHECK-LIR-SAME: %[[VAL_1:.*]]: memref<64xf64>, -// CHECK-LIR-SAME: %[[VAL_2:.*]]: memref<32xf64> {linalg.inplaceable = true}) -> memref<32xf64> { +// CHECK-LIR-SAME: %[[VAL_0:.*]]: !llvm.ptr, +// CHECK-LIR-SAME: %[[VAL_1:.*]]: memref<64xf64>, +// CHECK-LIR-SAME: %[[VAL_2:.*]]: memref<32xf64> {linalg.inplaceable = true}) -> memref<32xf64> { // CHECK-LIR-DAG: %[[VAL_3:.*]] = arith.constant 32 : index // CHECK-LIR-DAG: %[[VAL_4:.*]] = arith.constant 0 : index // CHECK-LIR-DAG: %[[VAL_5:.*]] = arith.constant 1 : index @@ -93,10 +93,10 @@ // CHECK-LIR: %[[VAL_7:.*]] = call @sparseIndices(%[[VAL_0]], %[[VAL_5]]) : (!llvm.ptr, index) -> memref // CHECK-LIR: %[[VAL_8:.*]] = call @sparseValuesF64(%[[VAL_0]]) : (!llvm.ptr) -> memref // CHECK-LIR: scf.for %[[VAL_9:.*]] = %[[VAL_4]] to %[[VAL_3]] step %[[VAL_5]] { -// CHECK-LIR: %[[VAL_10:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_9]]] : memref -// CHECK-LIR: %[[VAL_11:.*]] = arith.addi %[[VAL_9]], %[[VAL_5]] : index -// CHECK-LIR: %[[VAL_12:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_11]]] : memref -// CHECK-LIR: %[[VAL_13:.*]] = memref.load %[[VAL_2]]{{\[}}%[[VAL_9]]] : memref<32xf64> +// CHECK-LIR-DAG: %[[VAL_10:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_9]]] : memref +// CHECK-LIR-DAG: %[[VAL_11:.*]] = arith.addi %[[VAL_9]], %[[VAL_5]] : index +// CHECK-LIR-DAG: %[[VAL_12:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_11]]] : memref +// CHECK-LIR-DAG: %[[VAL_13:.*]] = memref.load %[[VAL_2]]{{\[}}%[[VAL_9]]] : memref<32xf64> // CHECK-LIR: %[[VAL_14:.*]] = scf.for %[[VAL_15:.*]] = %[[VAL_10]] to %[[VAL_12]] step %[[VAL_5]] iter_args(%[[VAL_16:.*]] = %[[VAL_13]]) -> (f64) { // CHECK-LIR: %[[VAL_17:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_15]]] : memref // CHECK-LIR: %[[VAL_18:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_15]]] : memref @@ -105,7 +105,7 @@ // CHECK-LIR: %[[VAL_21:.*]] = arith.addf %[[VAL_16]], %[[VAL_20]] : f64 // CHECK-LIR: scf.yield %[[VAL_21]] : f64 // CHECK-LIR: } -// CHECK-LIR: memref.store %[[VAL_22:.*]], %[[VAL_2]]{{\[}}%[[VAL_9]]] : memref<32xf64> +// CHECK-LIR: memref.store %[[VAL_14]], %[[VAL_2]]{{\[}}%[[VAL_9]]] : memref<32xf64> // CHECK-LIR: } // CHECK-LIR: return %[[VAL_2]] : memref<32xf64> // CHECK-LIR: } diff --git a/mlir/test/Dialect/SparseTensor/sparse_perm_lower.mlir b/mlir/test/Dialect/SparseTensor/sparse_perm_lower.mlir index 30d36b5655a13..43135912e41c8 100644 --- a/mlir/test/Dialect/SparseTensor/sparse_perm_lower.mlir +++ b/mlir/test/Dialect/SparseTensor/sparse_perm_lower.mlir @@ -17,67 +17,71 @@ } // CHECK-HIR-LABEL: func @sparse_dynamic_dims( -// CHECK-HIR-SAME: %[[VAL_0:.*]]: tensor>, -// CHECK-HIR-SAME: %[[VAL_1:.*]]: tensor) -> tensor { -// CHECK-HIR-DAG: %[[C0:.*]] = arith.constant 0 : index -// CHECK-HIR-DAG: %[[C1:.*]] = arith.constant 1 : index -// CHECK-HIR-DAG: %[[C2:.*]] = arith.constant 2 : index -// CHECK-HIR: %[[VAL_5:.*]] = tensor.dim %[[VAL_0]], %[[C2]] : tensor> -// CHECK-HIR: %[[VAL_6:.*]] = tensor.dim %[[VAL_0]], %[[C0]] : tensor> -// CHECK-HIR: %[[VAL_7:.*]] = tensor.dim %[[VAL_0]], %[[C1]] : tensor> -// CHECK-HIR: %[[VAL_8:.*]] = sparse_tensor.values %[[VAL_0]] : tensor> +// CHECK-HIR-SAME: %[[VAL_0:.*]]: tensor>, +// CHECK-HIR-SAME: %[[VAL_1:.*]]: tensor) -> tensor { +// CHECK-HIR-DAG: %[[VAL_2:.*]] = arith.constant 1 : index +// CHECK-HIR-DAG: %[[VAL_3:.*]] = arith.constant 0 : index +// CHECK-HIR-DAG: %[[VAL_4:.*]] = arith.constant 2 : index +// CHECK-HIR: %[[VAL_5:.*]] = tensor.dim %[[VAL_0]], %[[VAL_4]] : tensor> +// CHECK-HIR: %[[VAL_6:.*]] = tensor.dim %[[VAL_0]], %[[VAL_3]] : tensor> +// CHECK-HIR: %[[VAL_7:.*]] = tensor.dim %[[VAL_0]], %[[VAL_2]] : tensor> +// CHECK-HIR: %[[VAL_8:.*]] = sparse_tensor.values %[[VAL_0]] : tensor> // CHECK-HIR: %[[VAL_9:.*]] = memref.buffer_cast %[[VAL_1]] : memref // CHECK-HIR: %[[VAL_10:.*]] = memref.alloc() : memref // CHECK-HIR: memref.copy %[[VAL_9]], %[[VAL_10]] : memref to memref -// CHECK-HIR: scf.for %[[VAL_11:.*]] = %[[C0]] to %[[VAL_5]] step %[[C1]] { -// CHECK-HIR: scf.for %[[VAL_12:.*]] = %[[C0]] to %[[VAL_6]] step %[[C1]] { -// CHECK-HIR: %[[VAL_13:.*]] = arith.muli %[[VAL_6]], %[[VAL_11]] : index -// CHECK-HIR: %[[VAL_14:.*]] = arith.addi %[[VAL_13]], %[[VAL_12]] : index -// CHECK-HIR: %[[VAL_15:.*]] = memref.load %[[VAL_10]][] : memref -// CHECK-HIR: %[[VAL_16:.*]] = scf.for %[[VAL_17:.*]] = %[[C0]] to %[[VAL_7]] step %[[C1]] iter_args(%[[VAL_18:.*]] = %[[VAL_15]]) -> (f32) { -// CHECK-HIR: %[[VAL_19:.*]] = arith.muli %[[VAL_7]], %[[VAL_14]] : index -// CHECK-HIR: %[[VAL_20:.*]] = arith.addi %[[VAL_19]], %[[VAL_17]] : index -// CHECK-HIR: %[[VAL_21:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_20]]] : memref -// CHECK-HIR: %[[VAL_22:.*]] = arith.addf %[[VAL_18]], %[[VAL_21]] : f32 -// CHECK-HIR: scf.yield %[[VAL_22]] : f32 +// CHECK-HIR: %[[VAL_11:.*]] = memref.load %[[VAL_10]][] : memref +// CHECK-HIR: %[[VAL_12:.*]] = scf.for %[[VAL_13:.*]] = %[[VAL_3]] to %[[VAL_5]] step %[[VAL_2]] iter_args(%[[VAL_14:.*]] = %[[VAL_11]]) -> (f32) { +// CHECK-HIR: %[[VAL_15:.*]] = scf.for %[[VAL_16:.*]] = %[[VAL_3]] to %[[VAL_6]] step %[[VAL_2]] iter_args(%[[VAL_17:.*]] = %[[VAL_14]]) -> (f32) { +// CHECK-HIR: %[[VAL_18:.*]] = arith.muli %[[VAL_6]], %[[VAL_13]] : index +// CHECK-HIR: %[[VAL_19:.*]] = arith.addi %[[VAL_18]], %[[VAL_16]] : index +// CHECK-HIR: %[[VAL_20:.*]] = scf.for %[[VAL_21:.*]] = %[[VAL_3]] to %[[VAL_7]] step %[[VAL_2]] iter_args(%[[VAL_22:.*]] = %[[VAL_17]]) -> (f32) { +// CHECK-HIR: %[[VAL_23:.*]] = arith.muli %[[VAL_7]], %[[VAL_19]] : index +// CHECK-HIR: %[[VAL_24:.*]] = arith.addi %[[VAL_23]], %[[VAL_21]] : index +// CHECK-HIR: %[[VAL_25:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_24]]] : memref +// CHECK-HIR: %[[VAL_26:.*]] = arith.addf %[[VAL_22]], %[[VAL_25]] : f32 +// CHECK-HIR: scf.yield %[[VAL_26]] : f32 // CHECK-HIR: } -// CHECK-HIR: memref.store %[[VAL_23:.*]], %[[VAL_10]][] : memref +// CHECK-HIR: scf.yield %[[VAL_20]] : f32 // CHECK-HIR: } +// CHECK-HIR: scf.yield %[[VAL_15]] : f32 // CHECK-HIR: } -// CHECK-HIR: %[[VAL_24:.*]] = memref.tensor_load %[[VAL_10]] : memref -// CHECK-HIR: return %[[VAL_24]] : tensor +// CHECK-HIR: memref.store %[[VAL_12]], %[[VAL_10]][] : memref +// CHECK-HIR: %[[VAL_30:.*]] = memref.tensor_load %[[VAL_10]] : memref +// CHECK-HIR: return %[[VAL_30]] : tensor // CHECK-HIR: } // // CHECK-MIR-LABEL: func @sparse_dynamic_dims( -// CHECK-MIR-SAME: %[[VAL_0:.*]]: !llvm.ptr, -// CHECK-MIR-SAME: %[[VAL_1:.*]]: tensor) -> tensor { -// CHECK-MIR-DAG: %[[C0:.*]] = arith.constant 0 : index -// CHECK-MIR-DAG: %[[C1:.*]] = arith.constant 1 : index -// CHECK-MIR-DAG: %[[C2:.*]] = arith.constant 2 : index -// CHECK-MIR: %[[VAL_5:.*]] = call @sparseDimSize(%[[VAL_0]], %[[C0]]) : (!llvm.ptr, index) -> index -// CHECK-MIR: %[[VAL_6:.*]] = call @sparseDimSize(%[[VAL_0]], %[[C1]]) : (!llvm.ptr, index) -> index -// CHECK-MIR: %[[VAL_7:.*]] = call @sparseDimSize(%[[VAL_0]], %[[C2]]) : (!llvm.ptr, index) -> index +// CHECK-MIR-SAME: %[[VAL_0:.*]]: !llvm.ptr, +// CHECK-MIR-SAME: %[[VAL_1:.*]]: tensor) -> tensor { +// CHECK-MIR-DAG: %[[VAL_2:.*]] = arith.constant 2 : index +// CHECK-MIR-DAG: %[[VAL_3:.*]] = arith.constant 1 : index +// CHECK-MIR-DAG: %[[VAL_4:.*]] = arith.constant 0 : index +// CHECK-MIR: %[[VAL_5:.*]] = call @sparseDimSize(%[[VAL_0]], %[[VAL_4]]) : (!llvm.ptr, index) -> index +// CHECK-MIR: %[[VAL_6:.*]] = call @sparseDimSize(%[[VAL_0]], %[[VAL_3]]) : (!llvm.ptr, index) -> index +// CHECK-MIR: %[[VAL_7:.*]] = call @sparseDimSize(%[[VAL_0]], %[[VAL_2]]) : (!llvm.ptr, index) -> index // CHECK-MIR: %[[VAL_8:.*]] = call @sparseValuesF32(%[[VAL_0]]) : (!llvm.ptr) -> memref // CHECK-MIR: %[[VAL_9:.*]] = memref.buffer_cast %[[VAL_1]] : memref // CHECK-MIR: %[[VAL_10:.*]] = memref.alloc() : memref // CHECK-MIR: memref.copy %[[VAL_9]], %[[VAL_10]] : memref to memref -// CHECK-MIR: scf.for %[[VAL_11:.*]] = %[[C0]] to %[[VAL_5]] step %[[C1]] { -// CHECK-MIR: scf.for %[[VAL_12:.*]] = %[[C0]] to %[[VAL_6]] step %[[C1]] { -// CHECK-MIR: %[[VAL_13:.*]] = arith.muli %[[VAL_6]], %[[VAL_11]] : index -// CHECK-MIR: %[[VAL_14:.*]] = arith.addi %[[VAL_13]], %[[VAL_12]] : index -// CHECK-MIR: %[[VAL_15:.*]] = memref.load %[[VAL_10]][] : memref -// CHECK-MIR: %[[VAL_16:.*]] = scf.for %[[VAL_17:.*]] = %[[C0]] to %[[VAL_7]] step %[[C1]] iter_args(%[[VAL_18:.*]] = %[[VAL_15]]) -> (f32) { -// CHECK-MIR: %[[VAL_19:.*]] = arith.muli %[[VAL_7]], %[[VAL_14]] : index -// CHECK-MIR: %[[VAL_20:.*]] = arith.addi %[[VAL_19]], %[[VAL_17]] : index -// CHECK-MIR: %[[VAL_21:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_20]]] : memref -// CHECK-MIR: %[[VAL_22:.*]] = arith.addf %[[VAL_18]], %[[VAL_21]] : f32 -// CHECK-MIR: scf.yield %[[VAL_22]] : f32 +// CHECK-MIR: %[[VAL_11:.*]] = memref.load %[[VAL_10]][] : memref +// CHECK-MIR: %[[VAL_12:.*]] = scf.for %[[VAL_13:.*]] = %[[VAL_4]] to %[[VAL_5]] step %[[VAL_3]] iter_args(%[[VAL_14:.*]] = %[[VAL_11]]) -> (f32) { +// CHECK-MIR: %[[VAL_15:.*]] = scf.for %[[VAL_16:.*]] = %[[VAL_4]] to %[[VAL_6]] step %[[VAL_3]] iter_args(%[[VAL_17:.*]] = %[[VAL_14]]) -> (f32) { +// CHECK-MIR: %[[VAL_18:.*]] = arith.muli %[[VAL_6]], %[[VAL_13]] : index +// CHECK-MIR: %[[VAL_19:.*]] = arith.addi %[[VAL_18]], %[[VAL_16]] : index +// CHECK-MIR: %[[VAL_20:.*]] = scf.for %[[VAL_21:.*]] = %[[VAL_4]] to %[[VAL_7]] step %[[VAL_3]] iter_args(%[[VAL_22:.*]] = %[[VAL_17]]) -> (f32) { +// CHECK-MIR: %[[VAL_23:.*]] = arith.muli %[[VAL_7]], %[[VAL_19]] : index +// CHECK-MIR: %[[VAL_24:.*]] = arith.addi %[[VAL_23]], %[[VAL_21]] : index +// CHECK-MIR: %[[VAL_25:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_24]]] : memref +// CHECK-MIR: %[[VAL_26:.*]] = arith.addf %[[VAL_22]], %[[VAL_25]] : f32 +// CHECK-MIR: scf.yield %[[VAL_26]] : f32 // CHECK-MIR: } -// CHECK-MIR: memref.store %[[VAL_23:.*]], %[[VAL_10]][] : memref +// CHECK-MIR: scf.yield %[[VAL_20]] : f32 // CHECK-MIR: } +// CHECK-MIR: scf.yield %[[VAL_15]] : f32 // CHECK-MIR: } -// CHECK-MIR: %[[VAL_24:.*]] = memref.tensor_load %[[VAL_10]] : memref -// CHECK-MIR: return %[[VAL_24]] : tensor +// CHECK-MIR: memref.store %[[VAL_12]], %[[VAL_10]][] : memref +// CHECK-MIR: %[[VAL_30:.*]] = memref.tensor_load %[[VAL_10]] : memref +// CHECK-MIR: return %[[VAL_30]] : tensor // CHECK-MIR: } func @sparse_dynamic_dims(%arga: tensor, %argx: tensor) -> tensor { diff --git a/mlir/test/Dialect/SparseTensor/sparse_vector_chain.mlir b/mlir/test/Dialect/SparseTensor/sparse_vector_chain.mlir new file mode 100644 index 0000000000000..ab694417a38c3 --- /dev/null +++ b/mlir/test/Dialect/SparseTensor/sparse_vector_chain.mlir @@ -0,0 +1,129 @@ +// NOTE: Assertions have been autogenerated by utils/generate-test-checks.py + +// RUN: mlir-opt %s -sparsification="vectorization-strategy=2 vl=8" -canonicalize | \ +// RUN: FileCheck %s + +#SparseMatrix = #sparse_tensor.encoding<{dimLevelType = ["dense","compressed"]}> + +#trait = { + indexing_maps = [ + affine_map<(i,j) -> (i,j)>, // a (in) + affine_map<(i,j) -> (i,j)>, // b (in) + affine_map<(i,j) -> ()> // x (out) + ], + iterator_types = ["reduction", "reduction"] +} + +// Verifies that the SIMD reductions in the two for-loops after the +// while-loop are chained before horizontally reducing these back to scalar. +// +// CHECK-LABEL: func @sparse_matrix_sum( +// CHECK-SAME: %[[VAL_0:.*]]: tensor {linalg.inplaceable = true}, +// CHECK-SAME: %[[VAL_1:.*]]: tensor<64x32xf64, #sparse_tensor.encoding<{{{.*}}}>>, +// CHECK-SAME: %[[VAL_2:.*]]: tensor<64x32xf64, #sparse_tensor.encoding<{{{.*}}}>>) -> tensor { +// CHECK-DAG: %[[VAL_3:.*]] = arith.constant dense<0.000000e+00> : vector<8xf64> +// CHECK-DAG: %[[VAL_4:.*]] = arith.constant 8 : index +// CHECK-DAG: %[[VAL_5:.*]] = arith.constant 0 : i32 +// CHECK-DAG: %[[VAL_6:.*]] = arith.constant 0 : index +// CHECK-DAG: %[[VAL_7:.*]] = arith.constant 64 : index +// CHECK-DAG: %[[VAL_8:.*]] = arith.constant 1 : index +// CHECK: %[[VAL_9:.*]] = sparse_tensor.pointers %[[VAL_1]], %[[VAL_8]] : tensor<64x32xf64, #sparse_tensor.encoding<{{{.*}}}>> +// CHECK: %[[VAL_10:.*]] = sparse_tensor.indices %[[VAL_1]], %[[VAL_8]] : tensor<64x32xf64, #sparse_tensor.encoding<{{{.*}}}>> +// CHECK: %[[VAL_11:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<64x32xf64, #sparse_tensor.encoding<{{{.*}}}>> +// CHECK: %[[VAL_12:.*]] = sparse_tensor.pointers %[[VAL_2]], %[[VAL_8]] : tensor<64x32xf64, #sparse_tensor.encoding<{{{.*}}}>> +// CHECK: %[[VAL_13:.*]] = sparse_tensor.indices %[[VAL_2]], %[[VAL_8]] : tensor<64x32xf64, #sparse_tensor.encoding<{{{.*}}}>> +// CHECK: %[[VAL_14:.*]] = sparse_tensor.values %[[VAL_2]] : tensor<64x32xf64, #sparse_tensor.encoding<{{{.*}}}>> +// CHECK: %[[VAL_15:.*]] = memref.buffer_cast %[[VAL_0]] : memref +// CHECK: %[[VAL_16:.*]] = tensor.extract %[[VAL_0]][] : tensor +// CHECK: %[[VAL_17:.*]] = scf.for %[[VAL_18:.*]] = %[[VAL_6]] to %[[VAL_7]] step %[[VAL_8]] iter_args(%[[VAL_19:.*]] = %[[VAL_16]]) -> (f64) { +// CHECK: %[[VAL_20:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_18]]] : memref +// CHECK: %[[VAL_21:.*]] = arith.addi %[[VAL_18]], %[[VAL_8]] : index +// CHECK: %[[VAL_22:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_21]]] : memref +// CHECK: %[[VAL_23:.*]] = memref.load %[[VAL_12]]{{\[}}%[[VAL_18]]] : memref +// CHECK: %[[VAL_24:.*]] = arith.addi %[[VAL_18]], %[[VAL_8]] : index +// CHECK: %[[VAL_25:.*]] = memref.load %[[VAL_12]]{{\[}}%[[VAL_24]]] : memref +// CHECK: %[[VAL_26:.*]]:3 = scf.while (%[[VAL_27:.*]] = %[[VAL_20]], %[[VAL_28:.*]] = %[[VAL_23]], %[[VAL_29:.*]] = %[[VAL_19]]) : (index, index, f64) -> (index, index, f64) { +// CHECK: %[[VAL_30:.*]] = arith.cmpi ult, %[[VAL_27]], %[[VAL_22]] : index +// CHECK: %[[VAL_31:.*]] = arith.cmpi ult, %[[VAL_28]], %[[VAL_25]] : index +// CHECK: %[[VAL_32:.*]] = arith.andi %[[VAL_30]], %[[VAL_31]] : i1 +// CHECK: scf.condition(%[[VAL_32]]) %[[VAL_27]], %[[VAL_28]], %[[VAL_29]] : index, index, f64 +// CHECK: } do { +// CHECK: ^bb0(%[[VAL_33:.*]]: index, %[[VAL_34:.*]]: index, %[[VAL_35:.*]]: f64): +// CHECK: %[[VAL_36:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_33]]] : memref +// CHECK: %[[VAL_37:.*]] = memref.load %[[VAL_13]]{{\[}}%[[VAL_34]]] : memref +// CHECK: %[[VAL_38:.*]] = arith.cmpi ult, %[[VAL_37]], %[[VAL_36]] : index +// CHECK: %[[VAL_39:.*]] = select %[[VAL_38]], %[[VAL_37]], %[[VAL_36]] : index +// CHECK: %[[VAL_40:.*]] = arith.cmpi eq, %[[VAL_36]], %[[VAL_39]] : index +// CHECK: %[[VAL_41:.*]] = arith.cmpi eq, %[[VAL_37]], %[[VAL_39]] : index +// CHECK: %[[VAL_42:.*]] = arith.andi %[[VAL_40]], %[[VAL_41]] : i1 +// CHECK: %[[VAL_43:.*]] = scf.if %[[VAL_42]] -> (f64) { +// CHECK: %[[VAL_44:.*]] = memref.load %[[VAL_11]]{{\[}}%[[VAL_33]]] : memref +// CHECK: %[[VAL_45:.*]] = memref.load %[[VAL_14]]{{\[}}%[[VAL_34]]] : memref +// CHECK: %[[VAL_46:.*]] = arith.addf %[[VAL_44]], %[[VAL_45]] : f64 +// CHECK: %[[VAL_47:.*]] = arith.addf %[[VAL_35]], %[[VAL_46]] : f64 +// CHECK: scf.yield %[[VAL_47]] : f64 +// CHECK: } else { +// CHECK: %[[VAL_48:.*]] = arith.cmpi eq, %[[VAL_36]], %[[VAL_39]] : index +// CHECK: %[[VAL_49:.*]] = scf.if %[[VAL_48]] -> (f64) { +// CHECK: %[[VAL_50:.*]] = memref.load %[[VAL_11]]{{\[}}%[[VAL_33]]] : memref +// CHECK: %[[VAL_51:.*]] = arith.addf %[[VAL_35]], %[[VAL_50]] : f64 +// CHECK: scf.yield %[[VAL_51]] : f64 +// CHECK: } else { +// CHECK: %[[VAL_52:.*]] = arith.cmpi eq, %[[VAL_37]], %[[VAL_39]] : index +// CHECK: %[[VAL_53:.*]] = scf.if %[[VAL_52]] -> (f64) { +// CHECK: %[[VAL_54:.*]] = memref.load %[[VAL_14]]{{\[}}%[[VAL_34]]] : memref +// CHECK: %[[VAL_55:.*]] = arith.addf %[[VAL_35]], %[[VAL_54]] : f64 +// CHECK: scf.yield %[[VAL_55]] : f64 +// CHECK: } else { +// CHECK: scf.yield %[[VAL_35]] : f64 +// CHECK: } +// CHECK: scf.yield %[[VAL_56:.*]] : f64 +// CHECK: } +// CHECK: scf.yield %[[VAL_57:.*]] : f64 +// CHECK: } +// CHECK: %[[VAL_58:.*]] = arith.cmpi eq, %[[VAL_36]], %[[VAL_39]] : index +// CHECK: %[[VAL_59:.*]] = arith.addi %[[VAL_33]], %[[VAL_8]] : index +// CHECK: %[[VAL_60:.*]] = select %[[VAL_58]], %[[VAL_59]], %[[VAL_33]] : index +// CHECK: %[[VAL_61:.*]] = arith.cmpi eq, %[[VAL_37]], %[[VAL_39]] : index +// CHECK: %[[VAL_62:.*]] = arith.addi %[[VAL_34]], %[[VAL_8]] : index +// CHECK: %[[VAL_63:.*]] = select %[[VAL_61]], %[[VAL_62]], %[[VAL_34]] : index +// CHECK: scf.yield %[[VAL_60]], %[[VAL_63]], %[[VAL_64:.*]] : index, index, f64 +// CHECK: } +// CHECK: %[[VAL_65:.*]] = vector.insertelement %[[VAL_66:.*]]#2, %[[VAL_3]]{{\[}}%[[VAL_5]] : i32] : vector<8xf64> +// CHECK: %[[VAL_67:.*]] = scf.for %[[VAL_68:.*]] = %[[VAL_66]]#0 to %[[VAL_22]] step %[[VAL_4]] iter_args(%[[VAL_69:.*]] = %[[VAL_65]]) -> (vector<8xf64>) { +// CHECK: %[[VAL_70:.*]] = affine.min #map(%[[VAL_22]], %[[VAL_68]]) +// CHECK: %[[VAL_71:.*]] = vector.create_mask %[[VAL_70]] : vector<8xi1> +// CHECK: %[[VAL_72:.*]] = vector.maskedload %[[VAL_11]]{{\[}}%[[VAL_68]]], %[[VAL_71]], %[[VAL_3]] : memref, vector<8xi1>, vector<8xf64> into vector<8xf64> +// CHECK: %[[VAL_73:.*]] = arith.addf %[[VAL_69]], %[[VAL_72]] : vector<8xf64> +// CHECK: %[[VAL_74:.*]] = select %[[VAL_71]], %[[VAL_73]], %[[VAL_69]] : vector<8xi1>, vector<8xf64> +// CHECK: scf.yield %[[VAL_74]] : vector<8xf64> +// CHECK: } +// CHECK: %[[VAL_75:.*]] = scf.for %[[VAL_76:.*]] = %[[VAL_66]]#1 to %[[VAL_25]] step %[[VAL_4]] iter_args(%[[VAL_77:.*]] = %[[VAL_78:.*]]) -> (vector<8xf64>) { +// CHECK: %[[VAL_79:.*]] = affine.min #map(%[[VAL_25]], %[[VAL_76]]) +// CHECK: %[[VAL_80:.*]] = vector.create_mask %[[VAL_79]] : vector<8xi1> +// CHECK: %[[VAL_81:.*]] = vector.maskedload %[[VAL_14]]{{\[}}%[[VAL_76]]], %[[VAL_80]], %[[VAL_3]] : memref, vector<8xi1>, vector<8xf64> into vector<8xf64> +// CHECK: %[[VAL_82:.*]] = arith.addf %[[VAL_77]], %[[VAL_81]] : vector<8xf64> +// CHECK: %[[VAL_83:.*]] = select %[[VAL_80]], %[[VAL_82]], %[[VAL_77]] : vector<8xi1>, vector<8xf64> +// CHECK: scf.yield %[[VAL_83]] : vector<8xf64> +// CHECK: } +// CHECK: %[[VAL_84:.*]] = vector.reduction "add", %[[VAL_85:.*]] : vector<8xf64> into f64 +// CHECK: scf.yield %[[VAL_84]] : f64 +// CHECK: } +// CHECK: memref.store %[[VAL_86:.*]], %[[VAL_15]][] : memref +// CHECK: %[[VAL_87:.*]] = memref.tensor_load %[[VAL_15]] : memref +// CHECK: return %[[VAL_87]] : tensor +// CHECK: } +func @sparse_matrix_sum(%argx: tensor {linalg.inplaceable = true}, + %arga: tensor<64x32xf64, #SparseMatrix>, + %argb: tensor<64x32xf64, #SparseMatrix>) -> tensor { + %0 = linalg.generic #trait + ins(%arga, %argb: tensor<64x32xf64, #SparseMatrix>, + tensor<64x32xf64, #SparseMatrix>) + outs(%argx: tensor) { + ^bb(%a: f64, %b: f64, %x: f64): + %m = arith.addf %a, %b : f64 + %t = arith.addf %x, %m : f64 + linalg.yield %t : f64 + } -> tensor + return %0 : tensor +} diff --git a/mlir/test/Dialect/SparseTensor/sparse_vector_peeled.mlir b/mlir/test/Dialect/SparseTensor/sparse_vector_peeled.mlir index 4fe805fa97b8d..79523598856dd 100644 --- a/mlir/test/Dialect/SparseTensor/sparse_vector_peeled.mlir +++ b/mlir/test/Dialect/SparseTensor/sparse_vector_peeled.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -sparsification="vectorization-strategy=2 vl=16" -for-loop-peeling -canonicalize -split-input-file | \ +// RUN: mlir-opt %s -sparsification="vectorization-strategy=2 vl=16" -for-loop-peeling -canonicalize | \ // RUN: FileCheck %s #SparseVector = #sparse_tensor.encoding<{ diff --git a/mlir/test/Dialect/Tosa/canonicalize.mlir b/mlir/test/Dialect/Tosa/canonicalize.mlir index e6cf1a15ac67f..e4614853c71e4 100644 --- a/mlir/test/Dialect/Tosa/canonicalize.mlir +++ b/mlir/test/Dialect/Tosa/canonicalize.mlir @@ -9,6 +9,38 @@ func @argmax_nofold(%arg0: tensor) -> tensor { // ----- +// CHECK-LABEL: @add_zero_different_shape +func @add_zero_different_shape(%arg0: tensor<2x3xf32>) -> tensor<4x2x3xf32> { + // CHECK: tosa.add + %zeros = "tosa.const"() {value = dense<0.0> : tensor<4x2x3xf32>} : () -> tensor<4x2x3xf32> + %1 = "tosa.add"(%arg0, %zeros) : (tensor<2x3xf32>, tensor<4x2x3xf32>) -> tensor<4x2x3xf32> + return %1 : tensor<4x2x3xf32> +} + +// ----- + +// CHECK-LABEL: @add_zero_float +func @add_zero_float(%arg0: tensor<2x3xf32>) -> tensor<2x3xf32> { + // CHECK: return %arg0 + // CHECK-NOT: tosa.add + %zeros = "tosa.const"() {value = dense<0.0> : tensor<2x3xf32>} : () -> tensor<2x3xf32> + %1 = "tosa.add"(%arg0, %zeros) : (tensor<2x3xf32>, tensor<2x3xf32>) -> tensor<2x3xf32> + return %1 : tensor<2x3xf32> +} + +// ----- + +// CHECK-LABEL: @add_zero_int +func @add_zero_int(%arg0: tensor<2x3xi32>) -> tensor<2x3xi32> { + // CHECK: return %arg0 + // CHECK-NOT: tosa.add + %zeros = "tosa.const"() {value = dense<0> : tensor<2x3xi32>} : () -> tensor<2x3xi32> + %1 = "tosa.add"(%arg0, %zeros) : (tensor<2x3xi32>, tensor<2x3xi32>) -> tensor<2x3xi32> + return %1 : tensor<2x3xi32> +} + +// ----- + // CHECK-LABEL: @cast_fold func @cast_fold(%arg0: tensor) -> tensor { // CHECK: return %arg0 diff --git a/mlir/test/Dialect/Tosa/ops.mlir b/mlir/test/Dialect/Tosa/ops.mlir index 29ff5b3f8307b..a774c2303ff80 100644 --- a/mlir/test/Dialect/Tosa/ops.mlir +++ b/mlir/test/Dialect/Tosa/ops.mlir @@ -395,6 +395,14 @@ func @test_pad(%arg0: tensor<13x21x3xf32>, %arg1: tensor<3x2xi32>) -> tensor<13x return %0 : tensor<13x21x3xf32> } +// ----- +// CHECK-LABEL: pad_explicit_value +func @test_pad_explicit_value(%arg0: tensor<13x21x3xf32>, %arg1: tensor<3x2xi32>) -> tensor<13x21x3xf32> { + %0 = "tosa.const"() {value = dense<3.14> : tensor} : () -> tensor + %1 = "tosa.pad"(%arg0, %arg1, %0) : (tensor<13x21x3xf32>, tensor<3x2xi32>, tensor) -> tensor<13x21x3xf32> + return %1 : tensor<13x21x3xf32> +} + // ----- // CHECK-LABEL: reshape func @test_reshape(%arg0: tensor<13x21x3xf32>) -> tensor<1x819xf32> { diff --git a/mlir/test/Dialect/Vector/vector-transpose-to-shuffle.mlir b/mlir/test/Dialect/Vector/vector-transpose-to-shuffle.mlir new file mode 100644 index 0000000000000..1b65579b5c813 --- /dev/null +++ b/mlir/test/Dialect/Vector/vector-transpose-to-shuffle.mlir @@ -0,0 +1,14 @@ +// RUN: mlir-opt %s -test-vector-contraction-conversion=vector-shuffle-transpose=1 | FileCheck %s + +// CHECK-LABEL: func @transpose +func @transpose(%arg0: vector<2x4xf32>) -> vector<4x2xf32> { + // CHECK: vector.shape_cast %{{.*}} : vector<2x4xf32> to vector<8xf32> + // 0 4 + // 0 1 2 3 1 5 + // 4 5 6 7 -> 2 6 + // 3 7 + // CHECK: vector.shuffle %{{.*}} [0, 4, 1, 5, 2, 6, 3, 7] : vector<8xf32>, vector<8xf32> + // CHECK: vector.shape_cast %{{.*}} : vector<8xf32> to vector<4x2xf32> + %0 = vector.transpose %arg0, [1, 0] : vector<2x4xf32> to vector<4x2xf32> + return %0 : vector<4x2xf32> +} diff --git a/mlir/test/IR/test-func-insert-arg.mlir b/mlir/test/IR/test-func-insert-arg.mlir index 2de6c666d0d31..61308a1862b38 100644 --- a/mlir/test/IR/test-func-insert-arg.mlir +++ b/mlir/test/IR/test-func-insert-arg.mlir @@ -1,49 +1,49 @@ // RUN: mlir-opt %s -test-func-insert-arg -split-input-file | FileCheck %s -// CHECK: func @f(%arg0: f32 {test.A}) +// CHECK: func @f(%arg0: i1 {test.A}) func @f() attributes {test.insert_args = [ - [0, f32, {test.A}]]} { + [0, i1, {test.A}]]} { return } // ----- -// CHECK: func @f(%arg0: f32 {test.A}, %arg1: f32 {test.B}) -func @f(%arg0: f32 {test.B}) attributes {test.insert_args = [ - [0, f32, {test.A}]]} { +// CHECK: func @f(%arg0: i1 {test.A}, %arg1: i2 {test.B}) +func @f(%arg0: i2 {test.B}) attributes {test.insert_args = [ + [0, i1, {test.A}]]} { return } // ----- -// CHECK: func @f(%arg0: f32 {test.A}, %arg1: f32 {test.B}) -func @f(%arg0: f32 {test.A}) attributes {test.insert_args = [ - [1, f32, {test.B}]]} { +// CHECK: func @f(%arg0: i1 {test.A}, %arg1: i2 {test.B}) +func @f(%arg0: i1 {test.A}) attributes {test.insert_args = [ + [1, i2, {test.B}]]} { return } // ----- -// CHECK: func @f(%arg0: f32 {test.A}, %arg1: f32 {test.B}, %arg2: f32 {test.C}) -func @f(%arg0: f32 {test.A}, %arg1: f32 {test.C}) attributes {test.insert_args = [ - [1, f32, {test.B}]]} { +// CHECK: func @f(%arg0: i1 {test.A}, %arg1: i2 {test.B}, %arg2: i3 {test.C}) +func @f(%arg0: i1 {test.A}, %arg1: i3 {test.C}) attributes {test.insert_args = [ + [1, i2, {test.B}]]} { return } // ----- -// CHECK: func @f(%arg0: f32 {test.A}, %arg1: f32 {test.B}, %arg2: f32 {test.C}) -func @f(%arg0: f32 {test.B}) attributes {test.insert_args = [ - [0, f32, {test.A}], - [1, f32, {test.C}]]} { +// CHECK: func @f(%arg0: i1 {test.A}, %arg1: i2 {test.B}, %arg2: i3 {test.C}) +func @f(%arg0: i2 {test.B}) attributes {test.insert_args = [ + [0, i1, {test.A}], + [1, i3, {test.C}]]} { return } // ----- -// CHECK: func @f(%arg0: f32 {test.A}, %arg1: f32 {test.B}, %arg2: f32 {test.C}) -func @f(%arg0: f32 {test.C}) attributes {test.insert_args = [ - [0, f32, {test.A}], - [0, f32, {test.B}]]} { +// CHECK: func @f(%arg0: i1 {test.A}, %arg1: i2 {test.B}, %arg2: i3 {test.C}) +func @f(%arg0: i3 {test.C}) attributes {test.insert_args = [ + [0, i1, {test.A}], + [0, i2, {test.B}]]} { return } diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_cast.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_cast.mlir index 06073c202596a..30f1a14219b34 100644 --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_cast.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_cast.mlir @@ -13,7 +13,7 @@ // Do the same run, but now with SIMDization as well. This should not change the outcome. // // RUN: mlir-opt %s \ -// RUN: --sparsification="vectorization-strategy=2 vl=2 enable-simd-index32" --sparse-tensor-conversion \ +// RUN: --sparsification="vectorization-strategy=2 vl=2" --sparse-tensor-conversion \ // RUN: --convert-vector-to-scf --convert-scf-to-std \ // RUN: --func-bufferize --tensor-constant-bufferize --tensor-bufferize \ // RUN: --std-bufferize --finalizing-bufferize --lower-affine \ @@ -23,7 +23,6 @@ // RUN: -e entry -entry-point-result=void \ // RUN: -shared-libs=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \ // RUN: FileCheck %s -// #SV = #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ] }> diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_flatten.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_flatten.mlir index 2c6dd21d46425..fb0fc2475d3af 100644 --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_flatten.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_flatten.mlir @@ -2,7 +2,21 @@ // RUN: --sparsification --sparse-tensor-conversion \ // RUN: --convert-vector-to-scf --convert-scf-to-std \ // RUN: --func-bufferize --tensor-constant-bufferize --tensor-bufferize \ -// RUN: --std-bufferize --finalizing-bufferize \ +// RUN: --std-bufferize --finalizing-bufferize --lower-affine \ +// RUN: --convert-vector-to-llvm --convert-memref-to-llvm --convert-std-to-llvm --reconcile-unrealized-casts | \ +// RUN: TENSOR0="%mlir_integration_test_dir/data/test.tns" \ +// RUN: mlir-cpu-runner \ +// RUN: -e entry -entry-point-result=void \ +// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \ +// RUN: FileCheck %s +// +// Do the same run, but now with SIMDization as well. This should not change the outcome. +// +// RUN: mlir-opt %s \ +// RUN: --sparsification="vectorization-strategy=2 vl=4" --sparse-tensor-conversion \ +// RUN: --convert-vector-to-scf --convert-scf-to-std \ +// RUN: --func-bufferize --tensor-constant-bufferize --tensor-bufferize \ +// RUN: --std-bufferize --finalizing-bufferize --lower-affine \ // RUN: --convert-vector-to-llvm --convert-memref-to-llvm --convert-std-to-llvm --reconcile-unrealized-casts | \ // RUN: TENSOR0="%mlir_integration_test_dir/data/test.tns" \ // RUN: mlir-cpu-runner \ diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_mttkrp.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_mttkrp.mlir index c940b83042bb6..7f63da3869fb5 100644 --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_mttkrp.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_mttkrp.mlir @@ -2,7 +2,21 @@ // RUN: --sparsification --sparse-tensor-conversion \ // RUN: --convert-vector-to-scf --convert-scf-to-std \ // RUN: --func-bufferize --tensor-constant-bufferize --tensor-bufferize \ -// RUN: --std-bufferize --finalizing-bufferize \ +// RUN: --std-bufferize --finalizing-bufferize --lower-affine \ +// RUN: --convert-vector-to-llvm --convert-memref-to-llvm --convert-std-to-llvm --reconcile-unrealized-casts | \ +// RUN: TENSOR0="%mlir_integration_test_dir/data/mttkrp_b.tns" \ +// RUN: mlir-cpu-runner \ +// RUN: -e entry -entry-point-result=void \ +// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \ +// RUN: FileCheck %s +// +// Do the same run, but now with SIMDization as well. This should not change the outcome. +// +// RUN: mlir-opt %s \ +// RUN: --sparsification="vectorization-strategy=2 vl=4" --sparse-tensor-conversion \ +// RUN: --convert-vector-to-scf --convert-scf-to-std \ +// RUN: --func-bufferize --tensor-constant-bufferize --tensor-bufferize \ +// RUN: --std-bufferize --finalizing-bufferize --lower-affine \ // RUN: --convert-vector-to-llvm --convert-memref-to-llvm --convert-std-to-llvm --reconcile-unrealized-casts | \ // RUN: TENSOR0="%mlir_integration_test_dir/data/mttkrp_b.tns" \ // RUN: mlir-cpu-runner \ diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_out_simple.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_out_simple.mlir index ef636f1e0b057..d076ec5362cd8 100644 --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_out_simple.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_out_simple.mlir @@ -2,7 +2,21 @@ // RUN: --sparsification --sparse-tensor-conversion \ // RUN: --convert-vector-to-scf --convert-scf-to-std \ // RUN: --func-bufferize --tensor-constant-bufferize --tensor-bufferize \ -// RUN: --std-bufferize --finalizing-bufferize \ +// RUN: --std-bufferize --finalizing-bufferize --lower-affine \ +// RUN: --convert-vector-to-llvm --convert-memref-to-llvm --convert-std-to-llvm --reconcile-unrealized-casts | \ +// RUN: TENSOR0="%mlir_integration_test_dir/data/test.mtx" \ +// RUN: mlir-cpu-runner \ +// RUN: -e entry -entry-point-result=void \ +// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \ +// RUN: FileCheck %s +// +// Do the same run, but now with SIMDization as well. This should not change the outcome. +// +// RUN: mlir-opt %s \ +// RUN: --sparsification="vectorization-strategy=2 vl=4" --sparse-tensor-conversion \ +// RUN: --convert-vector-to-scf --convert-scf-to-std \ +// RUN: --func-bufferize --tensor-constant-bufferize --tensor-bufferize \ +// RUN: --std-bufferize --finalizing-bufferize --lower-affine \ // RUN: --convert-vector-to-llvm --convert-memref-to-llvm --convert-std-to-llvm --reconcile-unrealized-casts | \ // RUN: TENSOR0="%mlir_integration_test_dir/data/test.mtx" \ // RUN: mlir-cpu-runner \ diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_quantized_matmul.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_quantized_matmul.mlir index 3ad96e9cb41fa..f441b87fd7c0b 100644 --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_quantized_matmul.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_quantized_matmul.mlir @@ -10,6 +10,21 @@ // RUN: -e entry -entry-point-result=void \ // RUN: -shared-libs=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \ // RUN: FileCheck %s +// +// Do the same run, but now with SIMDization as well. This should not change the outcome. +// +// RUN: mlir-opt %s \ +// RUN: --linalg-generalize-named-ops --linalg-fuse-elementwise-ops \ +// RUN: --sparsification="vectorization-strategy=2 vl=2" --sparse-tensor-conversion \ +// RUN: --convert-vector-to-scf --convert-scf-to-std \ +// RUN: --func-bufferize --tensor-constant-bufferize --tensor-bufferize \ +// RUN: --std-bufferize --finalizing-bufferize --lower-affine \ +// RUN: --convert-vector-to-llvm --convert-memref-to-llvm \ +// RUN: --convert-std-to-llvm --reconcile-unrealized-casts | \ +// RUN: mlir-cpu-runner \ +// RUN: -e entry -entry-point-result=void \ +// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \ +// RUN: FileCheck %s #DCSR = #sparse_tensor.encoding<{ dimLevelType = [ "compressed", "compressed" ] }> diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sampled_mm_fusion.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sampled_mm_fusion.mlir index b1e4aa3bd5831..7a102b5ad2bdf 100755 --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sampled_mm_fusion.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sampled_mm_fusion.mlir @@ -12,8 +12,7 @@ // RUN: -shared-libs=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \ // RUN: FileCheck %s // -// Do the same run, but now with SIMDization as well. -// This should not change the outcome. +// Do the same run, but now with SIMDization as well. This should not change the outcome. // // RUN: mlir-opt %s \ // RUN: --linalg-generalize-named-ops --linalg-fuse-elementwise-ops \ @@ -28,11 +27,10 @@ // RUN: -e entry -entry-point-result=void \ // RUN: -shared-libs=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \ // RUN: FileCheck %s -// + // Interop between linalg/sparse leaves some issues to be revolved: // UNSUPPORTED: asan - #SM = #sparse_tensor.encoding<{ dimLevelType = [ "compressed", "compressed" ] }> #trait_sampled_dense_dense = { diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_scale.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_scale.mlir index 9eb2c0165c84b..7b965a1b09502 100644 --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_scale.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_scale.mlir @@ -2,7 +2,20 @@ // RUN: --sparsification --sparse-tensor-conversion \ // RUN: --convert-vector-to-scf --convert-scf-to-std \ // RUN: --func-bufferize --tensor-constant-bufferize --tensor-bufferize \ -// RUN: --std-bufferize --finalizing-bufferize \ +// RUN: --std-bufferize --finalizing-bufferize --lower-affine \ +// RUN: --convert-vector-to-llvm --convert-memref-to-llvm --convert-std-to-llvm --reconcile-unrealized-casts | \ +// RUN: mlir-cpu-runner \ +// RUN: -e entry -entry-point-result=void \ +// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \ +// RUN: FileCheck %s +// +// Do the same run, but now with SIMDization as well. This should not change the outcome. +// +// RUN: mlir-opt %s \ +// RUN: --sparsification="vectorization-strategy=2 vl=4" --sparse-tensor-conversion \ +// RUN: --convert-vector-to-scf --convert-scf-to-std \ +// RUN: --func-bufferize --tensor-constant-bufferize --tensor-bufferize \ +// RUN: --std-bufferize --finalizing-bufferize --lower-affine \ // RUN: --convert-vector-to-llvm --convert-memref-to-llvm --convert-std-to-llvm --reconcile-unrealized-casts | \ // RUN: mlir-cpu-runner \ // RUN: -e entry -entry-point-result=void \ diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_spmm.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_spmm.mlir index 01e437cfb83f7..22fb911493402 100644 --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_spmm.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_spmm.mlir @@ -2,7 +2,21 @@ // RUN: --sparsification --sparse-tensor-conversion \ // RUN: --convert-vector-to-scf --convert-scf-to-std \ // RUN: --func-bufferize --tensor-constant-bufferize --tensor-bufferize \ -// RUN: --std-bufferize --finalizing-bufferize \ +// RUN: --std-bufferize --finalizing-bufferize --lower-affine \ +// RUN: --convert-vector-to-llvm --convert-memref-to-llvm --convert-std-to-llvm --reconcile-unrealized-casts | \ +// RUN: TENSOR0="%mlir_integration_test_dir/data/wide.mtx" \ +// RUN: mlir-cpu-runner \ +// RUN: -e entry -entry-point-result=void \ +// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \ +// RUN: FileCheck %s +// +// Do the same run, but now with SIMDization as well. This should not change the outcome. +// +// RUN: mlir-opt %s \ +// RUN: --sparsification="vectorization-strategy=2 vl=2" --sparse-tensor-conversion \ +// RUN: --convert-vector-to-scf --convert-scf-to-std \ +// RUN: --func-bufferize --tensor-constant-bufferize --tensor-bufferize \ +// RUN: --std-bufferize --finalizing-bufferize --lower-affine \ // RUN: --convert-vector-to-llvm --convert-memref-to-llvm --convert-std-to-llvm --reconcile-unrealized-casts | \ // RUN: TENSOR0="%mlir_integration_test_dir/data/wide.mtx" \ // RUN: mlir-cpu-runner \ diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sum.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sum.mlir index 5e4c80e9c54f0..01583f92e511e 100644 --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sum.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sum.mlir @@ -2,7 +2,21 @@ // RUN: --sparsification --sparse-tensor-conversion \ // RUN: --convert-vector-to-scf --convert-scf-to-std \ // RUN: --func-bufferize --tensor-constant-bufferize --tensor-bufferize \ -// RUN: --std-bufferize --finalizing-bufferize \ +// RUN: --std-bufferize --finalizing-bufferize --lower-affine \ +// RUN: --convert-vector-to-llvm --convert-memref-to-llvm --convert-std-to-llvm --reconcile-unrealized-casts | \ +// RUN: TENSOR0="%mlir_integration_test_dir/data/test.mtx" \ +// RUN: mlir-cpu-runner \ +// RUN: -e entry -entry-point-result=void \ +// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \ +// RUN: FileCheck %s +// +// Do the same run, but now with SIMDization as well. This should not change the outcome. +// +// RUN: mlir-opt %s \ +// RUN: --sparsification="vectorization-strategy=2 vl=2" --sparse-tensor-conversion \ +// RUN: --convert-vector-to-scf --convert-scf-to-std \ +// RUN: --func-bufferize --tensor-constant-bufferize --tensor-bufferize \ +// RUN: --std-bufferize --finalizing-bufferize --lower-affine \ // RUN: --convert-vector-to-llvm --convert-memref-to-llvm --convert-std-to-llvm --reconcile-unrealized-casts | \ // RUN: TENSOR0="%mlir_integration_test_dir/data/test.mtx" \ // RUN: mlir-cpu-runner \ diff --git a/mlir/test/Target/LLVMIR/nvvmir.mlir b/mlir/test/Target/LLVMIR/nvvmir.mlir index 8fefd7866fd5d..08859b22c0d83 100644 --- a/mlir/test/Target/LLVMIR/nvvmir.mlir +++ b/mlir/test/Target/LLVMIR/nvvmir.mlir @@ -77,18 +77,22 @@ llvm.func @nvvm_mma(%a0 : vector<2xf16>, %a1 : vector<2xf16>, // in the LLVM NVPTX backend. llvm.func @gpu_wmma_load_op(%arg0: !llvm.ptr, %arg1: i32) { // CHECK: call { <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half> } @llvm.nvvm.wmma.m16n16k16.load.a.row.stride.f16.p3i32(i32 addrspace(3)* %{{.*}}, i32 %{{.*}}) - %0 = nvvm.wmma.m16n16k16.load.a.f16.row.stride %arg0, %arg1 : (!llvm.ptr, i32) -> !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)> + %0 = nvvm.wmma.load %arg0, %arg1 + {eltype = "f16", frag = "a", k = 16 : i32, layout = "row", m = 16 : i32, n = 16 : i32} + : (!llvm.ptr) -> !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)> llvm.return } // The test below checks the correct mapping of the nvvm.wmma.*.store.* op to the correct intrinsic // in the LLVM NVPTX backend. -llvm.func @gpu_wmma_store_op(%arg0: !llvm.ptr, %arg1: vector<2 x f16>, +llvm.func @gpu_wmma_store_op(%arg0: !llvm.ptr, %arg1: i32, %arg2: vector<2 x f16>, %arg3: vector<2 x f16>, - %arg4: vector<2 xf16>, %arg5: i32) { + %arg4: vector<2 xf16>, %arg5: vector<2 x f16>) { // CHECK: call void @llvm.nvvm.wmma.m16n16k16.store.d.row.stride.f16.p3i32(i32 addrspace(3)* %{{.*}}, <2 x half> {{.*}}, <2 x half> %{{.*}}, <2 x half> %{{.*}}, <2 x half> %{{.*}}, i32 %{{.*}}) - nvvm.wmma.m16n16k16.store.d.f16.row.stride %arg0, %arg1, %arg2, %arg3, %arg4, %arg5 : !llvm.ptr, vector<2 x f16>, vector<2 x f16>, vector<2 x f16>, vector<2 x f16>, i32 + nvvm.wmma.store %arg0, %arg1, %arg2, %arg3, %arg4, %arg5 + {eltype = "f16", k = 16 : i32, layout = "row", m = 16 : i32, n = 16 : i32} + : !llvm.ptr, vector<2 x f16>, vector<2 x f16>, vector<2 x f16>, vector<2 x f16> llvm.return } @@ -105,8 +109,32 @@ llvm.func @gpu_wmma_mma_op(%arg0: vector<2 x f16>, %arg1: vector<2 x f16>, %arg16: vector<2 x f16>, %arg17: vector<2 x f16>, %arg18: vector<2 x f16>, %arg19: vector<2 x f16>) { // CHECK: call { <2 x half>, <2 x half>, <2 x half>, <2 x half> } @llvm.nvvm.wmma.m16n16k16.mma.row.row.f16.f16(<2 x half> {{.*}}, <2 x half> {{.*}}, <2 x half> {{.*}}, <2 x half> {{.*}}, <2 x half> {{.*}}, <2 x half> {{.*}}, <2 x half> {{.*}}, <2 x half> {{.*}}, <2 x half> {{.*}}, <2 x half> {{.*}}, <2 x half> {{.*}}, <2 x half> {{.*}}, <2 x half> {{.*}}, <2 x half> {{.*}}, <2 x half> {{.*}}, <2 x half> {{.*}}, <2 x half> {{.*}}, <2 x half> {{.*}}, <2 x half> {{.*}}, <2 x half> {{.*}}) - %0 = nvvm.wmma.m16n16k16.mma.row.row.f16.f16 %arg0, %arg1, %arg2, %arg3, %arg4, %arg5, %arg6, %arg7, %arg8, %arg9, %arg10, %arg11, %arg12, %arg13, %arg14, %arg15, %arg16, %arg17, %arg18, %arg19 : vector<2 x f16> -> !llvm.struct<(vector<2 x f16>, vector<2 x f16>, vector<2 x f16>, vector<2 x f16>)> + %0 = nvvm.wmma.mma %arg0, %arg1, %arg2, %arg3, %arg4, %arg5, %arg6, %arg7, %arg8, %arg9, %arg10, %arg11, %arg12, %arg13, %arg14, %arg15, %arg16, %arg17, %arg18, %arg19 + {eltypeA = "f16", eltypeB = "f16", k = 16 : i32, layoutA = "row", layoutB = "row", m = 16 : i32, n = 16 : i32} + : (vector<2 x f16>, vector<2 x f16>, vector<2 x f16>, vector<2 x f16>, vector<2 x f16>, + vector<2 x f16>, vector<2 x f16>, vector<2 x f16>, vector<2 x f16>, vector<2 x f16>, + vector<2 x f16>, vector<2 x f16>, vector<2 x f16>, vector<2 x f16>, vector<2 x f16>, + vector<2 x f16>, vector<2 x f16>, vector<2 x f16>, vector<2 x f16>, vector<2 x f16>) + -> !llvm.struct<(vector<2 x f16>, vector<2 x f16>, vector<2 x f16>, vector<2 x f16>)> + llvm.return +} + +llvm.func @nvvm_wmma_load_tf32(%arg0: !llvm.ptr, %arg1 : i32) { + // CHECK: call { i32, i32, i32, i32 } @llvm.nvvm.wmma.m16n16k8.load.a.row.stride.tf32.p0i32(i32* %{{.*}}, i32 %{{.*}}) + %0 = nvvm.wmma.load %arg0, %arg1 + {eltype = "tf32", frag = "a", k = 8 : i32, layout = "row", m = 16 : i32, n = 16 : i32} + : (!llvm.ptr) -> !llvm.struct<(i32, i32, i32, i32)> + llvm.return +} +llvm.func @nvvm_wmma_mma(%0 : i32, %1 : i32, %2 : i32, %3 : i32, %4 : i32, %5 : i32, + %6 : i32, %7 : i32, %8 : f32, %9 : f32, %10 : f32, + %11 : f32, %12 : f32, %13 : f32, %14 : f32, %15 : f32) { + // CHECK: { float, float, float, float, float, float, float, float } @llvm.nvvm.wmma.m16n16k8.mma.row.row.tf32(i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, float %{{.*}}, float %{{.*}}, float %{{.*}}, float %{{.*}}, float %{{.*}}, float %{{.*}}, float %{{.*}}, float %{{.*}}) + %r = nvvm.wmma.mma %0, %1, %2, %3, %4, %5, %6, %7, %8, %9, %10, %11, %12, %13, %14, %15 + {eltypeA = "tf32", eltypeB = "f32", k = 8 : i32, layoutA = "row", layoutB = "row", m = 16 : i32, n = 16 : i32} + : (i32, i32, i32, i32, i32, i32, i32, i32, f32, f32, f32, f32, f32, f32, f32, f32) + -> !llvm.struct<(f32, f32, f32, f32, f32, f32, f32, f32)> llvm.return } diff --git a/mlir/test/Target/SPIRV/atomic-ops.mlir b/mlir/test/Target/SPIRV/atomic-ops.mlir index 6bf32af37155b..252d3d5deee23 100644 --- a/mlir/test/Target/SPIRV/atomic-ops.mlir +++ b/mlir/test/Target/SPIRV/atomic-ops.mlir @@ -27,6 +27,10 @@ spv.module Logical GLSL450 requires #spv.vce { %10 = spv.AtomicUMin "Device" "Release" %ptr, %value : !spv.ptr // CHECK: spv.AtomicXor "Workgroup" "AcquireRelease" %{{.*}}, %{{.*}} : !spv.ptr %11 = spv.AtomicXor "Workgroup" "AcquireRelease" %ptr, %value : !spv.ptr + // CHECK: spv.AtomicCompareExchange "Workgroup" "Release" "Acquire" %{{.*}}, %{{.*}}, %{{.*}} : !spv.ptr + %12 = spv.AtomicCompareExchange "Workgroup" "Release" "Acquire" %ptr, %value, %comparator: !spv.ptr + // CHECK: spv.AtomicExchange "Workgroup" "Release" %{{.*}}, %{{.*}} : !spv.ptr + %13 = spv.AtomicExchange "Workgroup" "Release" %ptr, %value: !spv.ptr spv.ReturnValue %0: i32 } } diff --git a/mlir/test/Transforms/test-legalize-type-conversion.mlir b/mlir/test/Transforms/test-legalize-type-conversion.mlir index 4887a87d0156f..cfb09f8f272ac 100644 --- a/mlir/test/Transforms/test-legalize-type-conversion.mlir +++ b/mlir/test/Transforms/test-legalize-type-conversion.mlir @@ -98,3 +98,17 @@ func @test_block_argument_not_converted() { }) : () -> () return } + +// ----- + +// Make sure argument type changes aren't implicitly forwarded. +func @test_signature_conversion_no_converter() { + "test.signature_conversion_no_converter"() ({ + // expected-error@below {{failed to materialize conversion for block argument #0 that remained live after conversion}} + ^bb0(%arg0: f32): + // expected-note@below {{see existing live user here}} + "test.type_consumer"(%arg0) : (f32) -> () + "test.return"(%arg0) : (f32) -> () + }) : () -> () + return +} diff --git a/mlir/test/Transforms/test-legalizer.mlir b/mlir/test/Transforms/test-legalizer.mlir index 3342402740209..556e820465da5 100644 --- a/mlir/test/Transforms/test-legalizer.mlir +++ b/mlir/test/Transforms/test-legalizer.mlir @@ -307,3 +307,13 @@ builtin.module { } } + +// ----- + +// The "passthrough_fold" folder will naively return its operand, but we don't +// want to fold here because of the type mismatch. +func @typemismatch(%arg: f32) -> i32 { + // expected-remark@+1 {{op 'test.passthrough_fold' is not legalizable}} + %0 = "test.passthrough_fold"(%arg) : (f32) -> (i32) + "test.return"(%0) : (i32) -> () +} diff --git a/mlir/test/lib/Dialect/Linalg/TestLinalgTransforms.cpp b/mlir/test/lib/Dialect/Linalg/TestLinalgTransforms.cpp index 25525fb851d24..78711310a8d88 100644 --- a/mlir/test/lib/Dialect/Linalg/TestLinalgTransforms.cpp +++ b/mlir/test/lib/Dialect/Linalg/TestLinalgTransforms.cpp @@ -152,6 +152,11 @@ struct TestLinalgTransforms llvm::cl::desc("Specify the type of loops to generate: for, parallel or " "tiled_loop"), llvm::cl::init("for")}; + Option testDecomposeConvolutionPattern{ + *this, "test-decompose-convolution-patterns", + llvm::cl::desc("Test a set of patterns to rewrite high-D convolution ops " + "into low-D ones"), + llvm::cl::init(false)}; }; } // end anonymous namespace @@ -576,6 +581,12 @@ static void applyLinalgToVectorPatterns(FuncOp funcOp) { (void)applyPatternsAndFoldGreedily(funcOp, std::move(patterns)); } +static void applyDecomposeConvolutionPatterns(FuncOp funcOp) { + RewritePatternSet patterns(funcOp.getContext()); + populateDecomposeConvolutionPatterns(patterns); + (void)applyPatternsAndFoldGreedily(funcOp, std::move(patterns)); +} + static void applyPadTensorToGenericPatterns(FuncOp funcOp) { RewritePatternSet patterns(funcOp.getContext()); patterns.add(funcOp.getContext()); @@ -819,6 +830,8 @@ void TestLinalgTransforms::runOnFunction() { return applyPadPattern(getFunction(), packPaddings, hoistPaddings); if (testInterchangePattern.hasValue()) return applyInterchangePattern(getFunction(), testInterchangePattern); + if (testDecomposeConvolutionPattern) + return applyDecomposeConvolutionPatterns(getFunction()); } namespace mlir { diff --git a/mlir/test/lib/Dialect/Test/TestAttrDefs.td b/mlir/test/lib/Dialect/Test/TestAttrDefs.td index 3062fd6c65ca9..06e2599def7df 100644 --- a/mlir/test/lib/Dialect/Test/TestAttrDefs.td +++ b/mlir/test/lib/Dialect/Test/TestAttrDefs.td @@ -116,4 +116,44 @@ def TestSubElementsAccessAttr : Test_Attr<"TestSubElementsAccess", [ ); } +def TestParamOne : AttrParameter<"int64_t", ""> {} + +def TestParamTwo : AttrParameter<"std::string", "", "llvm::StringRef"> { + let printer = "$_printer << '\"' << $_self << '\"'"; +} + +def TestParamFour : ArrayRefParameter<"int", ""> { + let cppStorageType = "llvm::SmallVector"; + let parser = "::parseIntArray($_parser)"; + let printer = "::printIntArray($_printer, $_self)"; +} + +def TestAttrWithFormat : Test_Attr<"TestAttrWithFormat"> { + let parameters = ( + ins + TestParamOne:$one, + TestParamTwo:$two, + "::mlir::IntegerAttr":$three, + TestParamFour:$four + ); + + let mnemonic = "attr_with_format"; + let assemblyFormat = "`<` $one `:` struct($two, $four) `:` $three `>`"; + let genVerifyDecl = 1; +} + +def TestAttrUgly : Test_Attr<"TestAttrUgly"> { + let parameters = (ins "::mlir::Attribute":$attr); + + let mnemonic = "attr_ugly"; + let assemblyFormat = "`begin` $attr `end`"; +} + +def TestAttrParams: Test_Attr<"TestAttrParams"> { + let parameters = (ins "int":$v0, "int":$v1); + + let mnemonic = "attr_params"; + let assemblyFormat = "`<` params `>`"; +} + #endif // TEST_ATTRDEFS diff --git a/mlir/test/lib/Dialect/Test/TestAttributes.cpp b/mlir/test/lib/Dialect/Test/TestAttributes.cpp index 9cd9c574a7bf2..e0c8ebbb7b93f 100644 --- a/mlir/test/lib/Dialect/Test/TestAttributes.cpp +++ b/mlir/test/lib/Dialect/Test/TestAttributes.cpp @@ -16,9 +16,11 @@ #include "mlir/IR/Builders.h" #include "mlir/IR/DialectImplementation.h" #include "mlir/IR/Types.h" +#include "mlir/Support/LogicalResult.h" #include "llvm/ADT/Hashing.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/TypeSwitch.h" +#include "llvm/ADT/bit.h" using namespace mlir; using namespace test; @@ -127,6 +129,36 @@ TestI64ElementsAttr::verify(function_ref emitError, return success(); } +LogicalResult +TestAttrWithFormatAttr::verify(function_ref emitError, + int64_t one, std::string two, IntegerAttr three, + ArrayRef four) { + if (four.size() != static_cast(one)) + return emitError() << "expected 'one' to equal 'four.size()'"; + return success(); +} + +//===----------------------------------------------------------------------===// +// Utility Functions for Generated Attributes +//===----------------------------------------------------------------------===// + +static FailureOr> parseIntArray(DialectAsmParser &parser) { + SmallVector ints; + if (parser.parseLSquare() || parser.parseCommaSeparatedList([&]() { + ints.push_back(0); + return parser.parseInteger(ints.back()); + }) || + parser.parseRSquare()) + return failure(); + return ints; +} + +static void printIntArray(DialectAsmPrinter &printer, ArrayRef ints) { + printer << '['; + llvm::interleaveComma(ints, printer); + printer << ']'; +} + //===----------------------------------------------------------------------===// // TestSubElementsAccessAttr //===----------------------------------------------------------------------===// diff --git a/mlir/test/lib/Dialect/Test/TestOps.td b/mlir/test/lib/Dialect/Test/TestOps.td index 656ec7ef86b6f..c2b408244d081 100644 --- a/mlir/test/lib/Dialect/Test/TestOps.td +++ b/mlir/test/lib/Dialect/Test/TestOps.td @@ -1520,6 +1520,11 @@ def TestSignatureConversionUndoOp : TEST_Op<"signature_conversion_undo"> { let regions = (region AnyRegion); } +def TestSignatureConversionNoConverterOp + : TEST_Op<"signature_conversion_no_converter"> { + let regions = (region AnyRegion); +} + //===----------------------------------------------------------------------===// // Test parser. //===----------------------------------------------------------------------===// diff --git a/mlir/test/lib/Dialect/Test/TestPatterns.cpp b/mlir/test/lib/Dialect/Test/TestPatterns.cpp index c93d233900a2e..4df5730b282fd 100644 --- a/mlir/test/lib/Dialect/Test/TestPatterns.cpp +++ b/mlir/test/lib/Dialect/Test/TestPatterns.cpp @@ -950,6 +950,34 @@ struct TestSignatureConversionUndo } }; +/// Call signature conversion without providing a type converter to handle +/// materializations. +struct TestTestSignatureConversionNoConverter + : public OpConversionPattern { + TestTestSignatureConversionNoConverter(TypeConverter &converter, + MLIRContext *context) + : OpConversionPattern(context), + converter(converter) {} + + LogicalResult + matchAndRewrite(TestSignatureConversionNoConverterOp op, OpAdaptor adaptor, + ConversionPatternRewriter &rewriter) const final { + Region ®ion = op->getRegion(0); + Block *entry = ®ion.front(); + + // Convert the original entry arguments. + TypeConverter::SignatureConversion result(entry->getNumArguments()); + if (failed( + converter.convertSignatureArgs(entry->getArgumentTypes(), result))) + return failure(); + rewriter.updateRootInPlace( + op, [&] { rewriter.applySignatureConversion(®ion, result); }); + return success(); + } + + TypeConverter &converter; +}; + /// Just forward the operands to the root op. This is essentially a no-op /// pattern that is used to trigger target materialization. struct TestTypeConsumerForward @@ -1041,11 +1069,17 @@ struct TestTypeConversionDriver // Allow casts from F64 to F32. return (*op.operand_type_begin()).isF64() && op.getType().isF32(); }); + target.addDynamicallyLegalOp( + [&](TestSignatureConversionNoConverterOp op) { + return converter.isLegal(op.getRegion().front().getArgumentTypes()); + }); // Initialize the set of rewrite patterns. RewritePatternSet patterns(&getContext()); patterns.add(converter, &getContext()); + TestSignatureConversionUndo, + TestTestSignatureConversionNoConverter>(converter, + &getContext()); patterns.add(&getContext()); mlir::populateFuncOpTypeConversionPattern(patterns, converter); diff --git a/mlir/test/lib/Dialect/Test/TestTypeDefs.td b/mlir/test/lib/Dialect/Test/TestTypeDefs.td index aef9baa894737..66008f12d574b 100644 --- a/mlir/test/lib/Dialect/Test/TestTypeDefs.td +++ b/mlir/test/lib/Dialect/Test/TestTypeDefs.td @@ -15,6 +15,7 @@ // To get the test dialect def. include "TestOps.td" +include "TestAttrDefs.td" include "mlir/IR/BuiltinTypes.td" include "mlir/Interfaces/DataLayoutInterfaces.td" @@ -189,4 +190,44 @@ def TestTypeWithTrait : Test_Type<"TestTypeWithTrait", [TestTypeTrait]> { let mnemonic = "test_type_with_trait"; } +// Type with assembly format. +def TestTypeWithFormat : Test_Type<"TestTypeWithFormat"> { + let parameters = ( + ins + TestParamOne:$one, + TestParamTwo:$two, + "::mlir::Attribute":$three + ); + + let mnemonic = "type_with_format"; + let assemblyFormat = "`<` $one `,` struct($three, $two) `>`"; +} + +// Test dispatch to parseField +def TestTypeNoParser : Test_Type<"TestTypeNoParser"> { + let parameters = ( + ins + "uint32_t":$one, + ArrayRefParameter<"int64_t">:$two, + StringRefParameter<>:$three, + "::test::CustomParam":$four + ); + + let mnemonic = "no_parser"; + let assemblyFormat = "`<` $one `,` `[` $two `]` `,` $three `,` $four `>`"; +} + +def TestTypeStructCaptureAll : Test_Type<"TestStructTypeCaptureAll"> { + let parameters = ( + ins + "int":$v0, + "int":$v1, + "int":$v2, + "int":$v3 + ); + + let mnemonic = "struct_capture_all"; + let assemblyFormat = "`<` struct(params) `>`"; +} + #endif // TEST_TYPEDEFS diff --git a/mlir/test/lib/Dialect/Test/TestTypes.h b/mlir/test/lib/Dialect/Test/TestTypes.h index 9da2e1713d9d0..7614ae401d1f0 100644 --- a/mlir/test/lib/Dialect/Test/TestTypes.h +++ b/mlir/test/lib/Dialect/Test/TestTypes.h @@ -38,8 +38,38 @@ struct FieldInfo { } }; +/// A custom type for a test type parameter. +struct CustomParam { + int value; + + bool operator==(const CustomParam &other) const { + return other.value == value; + } +}; + +inline llvm::hash_code hash_value(const test::CustomParam ¶m) { + return llvm::hash_value(param.value); +} + } // namespace test +namespace mlir { +template <> +struct FieldParser { + static FailureOr parse(DialectAsmParser &parser) { + auto value = FieldParser::parse(parser); + if (failed(value)) + return failure(); + return test::CustomParam{value.getValue()}; + } +}; +} // end namespace mlir + +inline mlir::DialectAsmPrinter &operator<<(mlir::DialectAsmPrinter &printer, + const test::CustomParam ¶m) { + return printer << param.value; +} + #include "TestTypeInterfaces.h.inc" #define GET_TYPEDEF_CLASSES @@ -52,17 +82,19 @@ namespace test { struct TestRecursiveTypeStorage : public ::mlir::TypeStorage { using KeyTy = ::llvm::StringRef; - explicit TestRecursiveTypeStorage(::llvm::StringRef key) : name(key), body(::mlir::Type()) {} + explicit TestRecursiveTypeStorage(::llvm::StringRef key) + : name(key), body(::mlir::Type()) {} bool operator==(const KeyTy &other) const { return name == other; } - static TestRecursiveTypeStorage *construct(::mlir::TypeStorageAllocator &allocator, - const KeyTy &key) { + static TestRecursiveTypeStorage * + construct(::mlir::TypeStorageAllocator &allocator, const KeyTy &key) { return new (allocator.allocate()) TestRecursiveTypeStorage(allocator.copyInto(key)); } - ::mlir::LogicalResult mutate(::mlir::TypeStorageAllocator &allocator, ::mlir::Type newBody) { + ::mlir::LogicalResult mutate(::mlir::TypeStorageAllocator &allocator, + ::mlir::Type newBody) { // Cannot set a different body than before. if (body && body != newBody) return ::mlir::failure(); @@ -79,11 +111,13 @@ struct TestRecursiveTypeStorage : public ::mlir::TypeStorage { /// type, potentially itself. This requires the body to be mutated separately /// from type creation. class TestRecursiveType - : public ::mlir::Type::TypeBase { + : public ::mlir::Type::TypeBase { public: using Base::Base; - static TestRecursiveType get(::mlir::MLIRContext *ctx, ::llvm::StringRef name) { + static TestRecursiveType get(::mlir::MLIRContext *ctx, + ::llvm::StringRef name) { return Base::get(ctx, name); } diff --git a/mlir/test/lib/Dialect/Vector/TestVectorTransforms.cpp b/mlir/test/lib/Dialect/Vector/TestVectorTransforms.cpp index e7d520bcdb173..12d57489af60b 100644 --- a/mlir/test/lib/Dialect/Vector/TestVectorTransforms.cpp +++ b/mlir/test/lib/Dialect/Vector/TestVectorTransforms.cpp @@ -116,6 +116,10 @@ struct TestVectorContractionConversion *this, "vector-flat-transpose", llvm::cl::desc("Lower 2-D vector.transpose to vector.flat_transpose"), llvm::cl::init(false)}; + Option lowerToShuffleTranspose{ + *this, "vector-shuffle-transpose", + llvm::cl::desc("Lower 2-D vector.transpose to shape_cast + shuffle"), + llvm::cl::init(false)}; Option lowerToOuterProduct{ *this, "vector-outerproduct", llvm::cl::desc("Lower vector.contract to vector.outerproduct"), @@ -165,12 +169,15 @@ struct TestVectorContractionConversion VectorTransposeLowering::EltWise; if (lowerToFlatTranspose) transposeLowering = VectorTransposeLowering::Flat; + if (lowerToShuffleTranspose) + transposeLowering = VectorTransposeLowering::Shuffle; VectorTransformsOptions options{ contractLowering, vectorMultiReductionLowering, transposeLowering}; populateVectorBroadcastLoweringPatterns(patterns); populateVectorContractLoweringPatterns(patterns, options); populateVectorMaskOpLoweringPatterns(patterns); - populateVectorShapeCastLoweringPatterns(patterns); + if (!lowerToShuffleTranspose) + populateVectorShapeCastLoweringPatterns(patterns); populateVectorTransposeLoweringPatterns(patterns, options); (void)applyPatternsAndFoldGreedily(getFunction(), std::move(patterns)); } diff --git a/mlir/test/mlir-tblgen/attr-or-type-format-invalid.td b/mlir/test/mlir-tblgen/attr-or-type-format-invalid.td new file mode 100644 index 0000000000000..372aef6dfa3e5 --- /dev/null +++ b/mlir/test/mlir-tblgen/attr-or-type-format-invalid.td @@ -0,0 +1,76 @@ +// RUN: mlir-tblgen -gen-typedef-defs -I %S/../../include -asmformat-error-is-fatal=false %s 2>&1 | FileCheck %s + +include "mlir/IR/OpBase.td" + +def Test_Dialect : Dialect { + let name = "TestDialect"; + let cppNamespace = "::test"; +} + +class InvalidType : TypeDef { + let mnemonic = asm; +} + +/// Test format is missing a parameter capture. +def InvalidTypeA : InvalidType<"InvalidTypeA", "invalid_a"> { + let parameters = (ins "int":$v0, "int":$v1); + // CHECK: format is missing reference to parameter: v1 + let assemblyFormat = "`<` $v0 `>`"; +} + +/// Test format has duplicate parameter captures. +def InvalidTypeB : InvalidType<"InvalidTypeB", "invalid_b"> { + let parameters = (ins "int":$v0, "int":$v1); + // CHECK: duplicate parameter 'v0' + let assemblyFormat = "`<` $v0 `,` $v1 `,` $v0 `>`"; +} + +/// Test format has invalid syntax. +def InvalidTypeC : InvalidType<"InvalidTypeC", "invalid_c"> { + let parameters = (ins "int":$v0, "int":$v1); + // CHECK: expected literal, directive, or variable + let assemblyFormat = "`<` $v0, $v1 `>`"; +} + +/// Test struct directive has invalid syntax. +def InvalidTypeD : InvalidType<"InvalidTypeD", "invalid_d"> { + let parameters = (ins "int":$v0); + // CHECK: literals may only be used in the top-level section of the format + // CHECK: expected a variable in `struct` argument list + let assemblyFormat = "`<` struct($v0, `,`) `>`"; +} + +/// Test struct directive cannot capture zero parameters. +def InvalidTypeE : InvalidType<"InvalidTypeE", "invalid_e"> { + let parameters = (ins "int":$v0); + // CHECK: `struct` argument list expected a variable or directive + let assemblyFormat = "`<` struct() $v0 `>`"; +} + +/// Test capture parameter that does not exist. +def InvalidTypeF : InvalidType<"InvalidTypeF", "invalid_f"> { + let parameters = (ins "int":$v0); + // CHECK: InvalidTypeF has no parameter named 'v1' + let assemblyFormat = "`<` $v0 $v1 `>`"; +} + +/// Test duplicate capture of parameter in capture-all struct. +def InvalidTypeG : InvalidType<"InvalidTypeG", "invalid_g"> { + let parameters = (ins "int":$v0, "int":$v1, "int":$v2); + // CHECK: duplicate parameter 'v0' + let assemblyFormat = "`<` struct(params) $v0 `>`"; +} + +/// Test capture-all struct duplicate capture. +def InvalidTypeH : InvalidType<"InvalidTypeH", "invalid_h"> { + let parameters = (ins "int":$v0, "int":$v1, "int":$v2); + // CHECK: `params` captures duplicate parameter: v0 + let assemblyFormat = "`<` $v0 struct(params) `>`"; +} + +/// Test capture of parameter after `params` directive. +def InvalidTypeI : InvalidType<"InvalidTypeI", "invalid_i"> { + let parameters = (ins "int":$v0); + // CHECK: duplicate parameter 'v0' + let assemblyFormat = "`<` params $v0 `>`"; +} diff --git a/mlir/test/mlir-tblgen/attr-or-type-format-roundtrip.mlir b/mlir/test/mlir-tblgen/attr-or-type-format-roundtrip.mlir new file mode 100644 index 0000000000000..f403f6f4b059d --- /dev/null +++ b/mlir/test/mlir-tblgen/attr-or-type-format-roundtrip.mlir @@ -0,0 +1,21 @@ +// RUN: mlir-opt %s | mlir-opt | FileCheck %s + +// CHECK-LABEL: @test_roundtrip_parameter_parsers +// CHECK: !test.type_with_format<111, three = #test<"attr_ugly begin 5 : index end">, two = "foo"> +// CHECK: !test.type_with_format<2147, three = "hi", two = "hi"> +func private @test_roundtrip_parameter_parsers(!test.type_with_format<111, three = #test<"attr_ugly begin 5 : index end">, two = "foo">) -> !test.type_with_format<2147, two = "hi", three = "hi"> +attributes { + // CHECK: #test.attr_with_format<3 : two = "hello", four = [1, 2, 3] : 42 : i64> + attr0 = #test.attr_with_format<3 : two = "hello", four = [1, 2, 3] : 42 : i64>, + // CHECK: #test.attr_with_format<5 : two = "a_string", four = [4, 5, 6, 7, 8] : 8 : i8> + attr1 = #test.attr_with_format<5 : two = "a_string", four = [4, 5, 6, 7, 8] : 8 : i8>, + // CHECK: #test<"attr_ugly begin 5 : index end"> + attr2 = #test<"attr_ugly begin 5 : index end">, + // CHECK: #test.attr_params<42, 24> + attr3 = #test.attr_params<42, 24> +} + +// CHECK-LABEL: @test_roundtrip_default_parsers_struct +// CHECK: !test.no_parser<255, [1, 2, 3, 4, 5], "foobar", 4> +// CHECK: !test.struct_capture_all +func private @test_roundtrip_default_parsers_struct(!test.no_parser<255, [1, 2, 3, 4, 5], "foobar", 4>) -> !test.struct_capture_all diff --git a/mlir/test/mlir-tblgen/attr-or-type-format.mlir b/mlir/test/mlir-tblgen/attr-or-type-format.mlir new file mode 100644 index 0000000000000..3ff638c5f640f --- /dev/null +++ b/mlir/test/mlir-tblgen/attr-or-type-format.mlir @@ -0,0 +1,127 @@ +// RUN: mlir-opt --split-input-file %s --verify-diagnostics + +func private @test_ugly_attr_cannot_be_pretty() -> () attributes { + // expected-error@+1 {{expected 'begin'}} + attr = #test.attr_ugly +} + +// ----- + +func private @test_ugly_attr_no_mnemonic() -> () attributes { + // expected-error@+1 {{expected valid keyword}} + attr = #test<""> +} + +// ----- + +func private @test_ugly_attr_parser_dispatch() -> () attributes { + // expected-error@+1 {{expected 'begin'}} + attr = #test<"attr_ugly"> +} + +// ----- + +func private @test_ugly_attr_missing_parameter() -> () attributes { + // expected-error@+2 {{failed to parse TestAttrUgly parameter 'attr'}} + // expected-error@+1 {{expected non-function type}} + attr = #test<"attr_ugly begin"> +} + +// ----- + +func private @test_ugly_attr_missing_literal() -> () attributes { + // expected-error@+1 {{expected 'end'}} + attr = #test<"attr_ugly begin \"string_attr\""> +} + +// ----- + +func private @test_pretty_attr_expects_less() -> () attributes { + // expected-error@+1 {{expected '<'}} + attr = #test.attr_with_format +} + +// ----- + +func private @test_pretty_attr_missing_param() -> () attributes { + // expected-error@+2 {{expected integer value}} + // expected-error@+1 {{failed to parse TestAttrWithFormat parameter 'one'}} + attr = #test.attr_with_format<> +} + +// ----- + +func private @test_parse_invalid_param() -> () attributes { + // Test parameter parser failure is propagated + // expected-error@+2 {{expected integer value}} + // expected-error@+1 {{failed to parse TestAttrWithFormat parameter 'one'}} + attr = #test.attr_with_format<"hi"> +} + +// ----- + +func private @test_pretty_attr_invalid_syntax() -> () attributes { + // expected-error@+1 {{expected ':'}} + attr = #test.attr_with_format<42> +} + +// ----- + +func private @test_struct_missing_key() -> () attributes { + // expected-error@+2 {{expected valid keyword}} + // expected-error@+1 {{expected a parameter name in struct}} + attr = #test.attr_with_format<42 :> +} + +// ----- + +func private @test_struct_unknown_key() -> () attributes { + // expected-error@+1 {{duplicate or unknown struct parameter}} + attr = #test.attr_with_format<42 : nine = "foo"> +} + +// ----- + +func private @test_struct_duplicate_key() -> () attributes { + // expected-error@+1 {{duplicate or unknown struct parameter}} + attr = #test.attr_with_format<42 : two = "foo", two = "bar"> +} + +// ----- + +func private @test_struct_not_enough_values() -> () attributes { + // expected-error@+1 {{expected ','}} + attr = #test.attr_with_format<42 : two = "foo"> +} + +// ----- + +func private @test_parse_param_after_struct() -> () attributes { + // expected-error@+2 {{expected non-function type}} + // expected-error@+1 {{failed to parse TestAttrWithFormat parameter 'three'}} + attr = #test.attr_with_format<42 : two = "foo", four = [1, 2, 3] : > +} + +// ----- + +// expected-error@+1 {{expected '<'}} +func private @test_invalid_type() -> !test.type_with_format + +// ----- + +// expected-error@+2 {{expected integer value}} +// expected-error@+1 {{failed to parse TestTypeWithFormat parameter 'one'}} +func private @test_pretty_type_invalid_param() -> !test.type_with_format<> + +// ----- + +// expected-error@+2 {{expected ':'}} +// expected-error@+1 {{failed to parse TestTypeWithFormat parameter 'three'}} +func private @test_type_syntax_error() -> !test.type_with_format<42, two = "hi", three = #test.attr_with_format<42>> + +// ----- + +func private @test_verifier_fails() -> () attributes { + // expected-error@+1 {{expected 'one' to equal 'four.size()'}} + attr = #test.attr_with_format<42 : two = "hello", four = [1, 2, 3] : 42 : i64> +} diff --git a/mlir/test/mlir-tblgen/attr-or-type-format.td b/mlir/test/mlir-tblgen/attr-or-type-format.td new file mode 100644 index 0000000000000..2d426935fa415 --- /dev/null +++ b/mlir/test/mlir-tblgen/attr-or-type-format.td @@ -0,0 +1,394 @@ +// RUN: mlir-tblgen -gen-attrdef-defs -I %S/../../include %s | FileCheck %s --check-prefix=ATTR +// RUN: mlir-tblgen -gen-typedef-defs -I %S/../../include %s | FileCheck %s --check-prefix=TYPE + +include "mlir/IR/OpBase.td" + +/// Test that attribute and type printers and parsers are correctly generated. +def Test_Dialect : Dialect { + let name = "TestDialect"; + let cppNamespace = "::test"; +} + +class TestAttr : AttrDef; +class TestType : TypeDef; + +def AttrParamA : AttrParameter<"TestParamA", "an attribute param A"> { + let parser = "::parseAttrParamA($_parser, $_type)"; + let printer = "::printAttrParamA($_printer, $_self)"; +} + +def AttrParamB : AttrParameter<"TestParamB", "an attribute param B"> { + let parser = "$_type ? ::parseAttrWithType($_parser, $_type) : ::parseAttrWithout($_parser)"; + let printer = "::printAttrB($_printer, $_self)"; +} + +def TypeParamA : TypeParameter<"TestParamC", "a type param C"> { + let parser = "::parseTypeParamC($_parser)"; + let printer = "$_printer << $_self"; +} + +def TypeParamB : TypeParameter<"TestParamD", "a type param D"> { + let parser = "someFcnCall()"; + let printer = "myPrinter($_self)"; +} + +/// Check simple attribute parser and printer are generated correctly. + +// ATTR: ::mlir::Attribute TestAAttr::parse(::mlir::DialectAsmParser &parser, +// ATTR: ::mlir::Type attrType) { +// ATTR: FailureOr _result_value; +// ATTR: FailureOr _result_complex; +// ATTR: if (parser.parseKeyword("hello")) +// ATTR: return {}; +// ATTR: if (parser.parseEqual()) +// ATTR: return {}; +// ATTR: _result_value = ::mlir::FieldParser::parse(parser); +// ATTR: if (failed(_result_value)) +// ATTR: return {}; +// ATTR: if (parser.parseComma()) +// ATTR: return {}; +// ATTR: _result_complex = ::parseAttrParamA(parser, attrType); +// ATTR: if (failed(_result_complex)) +// ATTR: return {}; +// ATTR: if (parser.parseRParen()) +// ATTR: return {}; +// ATTR: return TestAAttr::get(parser.getContext(), +// ATTR: _result_value.getValue(), +// ATTR: _result_complex.getValue()); +// ATTR: } + +// ATTR: void TestAAttr::print(::mlir::DialectAsmPrinter &printer) const { +// ATTR: printer << "attr_a"; +// ATTR: printer << ' ' << "hello"; +// ATTR: printer << ' ' << "="; +// ATTR: printer << ' '; +// ATTR: printer << getValue(); +// ATTR: printer << ","; +// ATTR: printer << ' '; +// ATTR: ::printAttrParamA(printer, getComplex()); +// ATTR: printer << ")"; +// ATTR: } + +def AttrA : TestAttr<"TestA"> { + let parameters = (ins + "IntegerAttr":$value, + AttrParamA:$complex + ); + + let mnemonic = "attr_a"; + let assemblyFormat = "`hello` `=` $value `,` $complex `)`"; +} + +/// Test simple struct parser and printer are generated correctly. + +// ATTR: ::mlir::Attribute TestBAttr::parse(::mlir::DialectAsmParser &parser, +// ATTR: ::mlir::Type attrType) { +// ATTR: bool _seen_v0 = false; +// ATTR: bool _seen_v1 = false; +// ATTR: for (unsigned _index = 0; _index < 2; ++_index) { +// ATTR: StringRef _paramKey; +// ATTR: if (parser.parseKeyword(&_paramKey)) +// ATTR: return {}; +// ATTR: if (parser.parseEqual()) +// ATTR: return {}; +// ATTR: if (!_seen_v0 && _paramKey == "v0") { +// ATTR: _seen_v0 = true; +// ATTR: _result_v0 = ::parseAttrParamA(parser, attrType); +// ATTR: if (failed(_result_v0)) +// ATTR: return {}; +// ATTR: } else if (!_seen_v1 && _paramKey == "v1") { +// ATTR: _seen_v1 = true; +// ATTR: _result_v1 = attrType ? ::parseAttrWithType(parser, attrType) : ::parseAttrWithout(parser); +// ATTR: if (failed(_result_v1)) +// ATTR: return {}; +// ATTR: } else { +// ATTR: return {}; +// ATTR: } +// ATTR: if ((_index != 2 - 1) && parser.parseComma()) +// ATTR: return {}; +// ATTR: } +// ATTR: return TestBAttr::get(parser.getContext(), +// ATTR: _result_v0.getValue(), +// ATTR: _result_v1.getValue()); +// ATTR: } + +// ATTR: void TestBAttr::print(::mlir::DialectAsmPrinter &printer) const { +// ATTR: printer << "v0"; +// ATTR: printer << ' ' << "="; +// ATTR: printer << ' '; +// ATTR: ::printAttrParamA(printer, getV0()); +// ATTR: printer << ","; +// ATTR: printer << ' ' << "v1"; +// ATTR: printer << ' ' << "="; +// ATTR: printer << ' '; +// ATTR: ::printAttrB(printer, getV1()); +// ATTR: } + +def AttrB : TestAttr<"TestB"> { + let parameters = (ins + AttrParamA:$v0, + AttrParamB:$v1 + ); + + let mnemonic = "attr_b"; + let assemblyFormat = "`{` struct($v0, $v1) `}`"; +} + +/// Test attribute with capture-all params has correct parser and printer. + +// ATTR: ::mlir::Attribute TestFAttr::parse(::mlir::DialectAsmParser &parser, +// ATTR: ::mlir::Type attrType) { +// ATTR: ::mlir::FailureOr _result_v0; +// ATTR: ::mlir::FailureOr _result_v1; +// ATTR: _result_v0 = ::mlir::FieldParser::parse(parser); +// ATTR: if (failed(_result_v0)) +// ATTR: return {}; +// ATTR: if (parser.parseComma()) +// ATTR: return {}; +// ATTR: _result_v1 = ::mlir::FieldParser::parse(parser); +// ATTR: if (failed(_result_v1)) +// ATTR: return {}; +// ATTR: return TestFAttr::get(parser.getContext(), +// ATTR: _result_v0.getValue(), +// ATTR: _result_v1.getValue()); +// ATTR: } + +// ATTR: void TestFAttr::print(::mlir::DialectAsmPrinter &printer) const { +// ATTR: printer << "attr_c"; +// ATTR: printer << ' '; +// ATTR: printer << getV0(); +// ATTR: printer << ","; +// ATTR: printer << ' '; +// ATTR: printer << getV1(); +// ATTR: } + +def AttrC : TestAttr<"TestF"> { + let parameters = (ins "int":$v0, "int":$v1); + + let mnemonic = "attr_c"; + let assemblyFormat = "params"; +} + +/// Test type parser and printer that mix variables and struct are generated +/// correctly. + +// TYPE: ::mlir::Type TestCType::parse(::mlir::DialectAsmParser &parser) { +// TYPE: FailureOr _result_value; +// TYPE: FailureOr _result_complex; +// TYPE: if (parser.parseKeyword("foo")) +// TYPE: return {}; +// TYPE: if (parser.parseComma()) +// TYPE: return {}; +// TYPE: if (parser.parseColon()) +// TYPE: return {}; +// TYPE: if (parser.parseKeyword("bob")) +// TYPE: return {}; +// TYPE: if (parser.parseKeyword("bar")) +// TYPE: return {}; +// TYPE: _result_value = ::mlir::FieldParser::parse(parser); +// TYPE: if (failed(_result_value)) +// TYPE: return {}; +// TYPE: bool _seen_complex = false; +// TYPE: for (unsigned _index = 0; _index < 1; ++_index) { +// TYPE: StringRef _paramKey; +// TYPE: if (parser.parseKeyword(&_paramKey)) +// TYPE: return {}; +// TYPE: if (!_seen_complex && _paramKey == "complex") { +// TYPE: _seen_complex = true; +// TYPE: _result_complex = ::parseTypeParamC(parser); +// TYPE: if (failed(_result_complex)) +// TYPE: return {}; +// TYPE: } else { +// TYPE: return {}; +// TYPE: } +// TYPE: if ((_index != 1 - 1) && parser.parseComma()) +// TYPE: return {}; +// TYPE: } +// TYPE: if (parser.parseRParen()) +// TYPE: return {}; +// TYPE: } + +// TYPE: void TestCType::print(::mlir::DialectAsmPrinter &printer) const { +// TYPE: printer << "type_c"; +// TYPE: printer << ' ' << "foo"; +// TYPE: printer << ","; +// TYPE: printer << ' ' << ":"; +// TYPE: printer << ' ' << "bob"; +// TYPE: printer << ' ' << "bar"; +// TYPE: printer << ' '; +// TYPE: printer << getValue(); +// TYPE: printer << ' ' << "complex"; +// TYPE: printer << ' ' << "="; +// TYPE: printer << ' '; +// TYPE: printer << getComplex(); +// TYPE: printer << ")"; +// TYPE: } + +def TypeA : TestType<"TestC"> { + let parameters = (ins + "IntegerAttr":$value, + TypeParamA:$complex + ); + + let mnemonic = "type_c"; + let assemblyFormat = "`foo` `,` `:` `bob` `bar` $value struct($complex) `)`"; +} + +/// Test type parser and printer with mix of variables and struct are generated +/// correctly. + +// TYPE: ::mlir::Type TestDType::parse(::mlir::DialectAsmParser &parser) { +// TYPE: _result_v0 = ::parseTypeParamC(parser); +// TYPE: if (failed(_result_v0)) +// TYPE: return {}; +// TYPE: bool _seen_v1 = false; +// TYPE: bool _seen_v2 = false; +// TYPE: for (unsigned _index = 0; _index < 2; ++_index) { +// TYPE: StringRef _paramKey; +// TYPE: if (parser.parseKeyword(&_paramKey)) +// TYPE: return {}; +// TYPE: if (parser.parseEqual()) +// TYPE: return {}; +// TYPE: if (!_seen_v1 && _paramKey == "v1") { +// TYPE: _seen_v1 = true; +// TYPE: _result_v1 = someFcnCall(); +// TYPE: if (failed(_result_v1)) +// TYPE: return {}; +// TYPE: } else if (!_seen_v2 && _paramKey == "v2") { +// TYPE: _seen_v2 = true; +// TYPE: _result_v2 = ::parseTypeParamC(parser); +// TYPE: if (failed(_result_v2)) +// TYPE: return {}; +// TYPE: } else { +// TYPE: return {}; +// TYPE: } +// TYPE: if ((_index != 2 - 1) && parser.parseComma()) +// TYPE: return {}; +// TYPE: } +// TYPE: _result_v3 = someFcnCall(); +// TYPE: if (failed(_result_v3)) +// TYPE: return {}; +// TYPE: return TestDType::get(parser.getContext(), +// TYPE: _result_v0.getValue(), +// TYPE: _result_v1.getValue(), +// TYPE: _result_v2.getValue(), +// TYPE: _result_v3.getValue()); +// TYPE: } + +// TYPE: void TestDType::print(::mlir::DialectAsmPrinter &printer) const { +// TYPE: printer << getV0(); +// TYPE: myPrinter(getV1()); +// TYPE: printer << ' ' << "v2"; +// TYPE: printer << ' ' << "="; +// TYPE: printer << ' '; +// TYPE: printer << getV2(); +// TYPE: myPrinter(getV3()); +// TYPE: } + +def TypeB : TestType<"TestD"> { + let parameters = (ins + TypeParamA:$v0, + TypeParamB:$v1, + TypeParamA:$v2, + TypeParamB:$v3 + ); + + let mnemonic = "type_d"; + let assemblyFormat = "`<` `foo` `:` $v0 `,` struct($v1, $v2) `,` $v3 `>`"; +} + +/// Type test with two struct directives has correctly generated parser and +/// printer. + +// TYPE: ::mlir::Type TestEType::parse(::mlir::DialectAsmParser &parser) { +// TYPE: FailureOr _result_v0; +// TYPE: FailureOr _result_v1; +// TYPE: FailureOr _result_v2; +// TYPE: FailureOr _result_v3; +// TYPE: bool _seen_v0 = false; +// TYPE: bool _seen_v2 = false; +// TYPE: for (unsigned _index = 0; _index < 2; ++_index) { +// TYPE: StringRef _paramKey; +// TYPE: if (parser.parseKeyword(&_paramKey)) +// TYPE: return {}; +// TYPE: if (parser.parseEqual()) +// TYPE: return {}; +// TYPE: if (!_seen_v0 && _paramKey == "v0") { +// TYPE: _seen_v0 = true; +// TYPE: _result_v0 = ::mlir::FieldParser::parse(parser); +// TYPE: if (failed(_result_v0)) +// TYPE: return {}; +// TYPE: } else if (!_seen_v2 && _paramKey == "v2") { +// TYPE: _seen_v2 = true; +// TYPE: _result_v2 = ::mlir::FieldParser::parse(parser); +// TYPE: if (failed(_result_v2)) +// TYPE: return {}; +// TYPE: } else { +// TYPE: return {}; +// TYPE: } +// TYPE: if ((_index != 2 - 1) && parser.parseComma()) +// TYPE: return {}; +// TYPE: } +// TYPE: bool _seen_v1 = false; +// TYPE: bool _seen_v3 = false; +// TYPE: for (unsigned _index = 0; _index < 2; ++_index) { +// TYPE: StringRef _paramKey; +// TYPE: if (parser.parseKeyword(&_paramKey)) +// TYPE: return {}; +// TYPE: if (parser.parseEqual()) +// TYPE: return {}; +// TYPE: if (!_seen_v1 && _paramKey == "v1") { +// TYPE: _seen_v1 = true; +// TYPE: _result_v1 = ::mlir::FieldParser::parse(parser); +// TYPE: if (failed(_result_v1)) +// TYPE: return {}; +// TYPE: } else if (!_seen_v3 && _paramKey == "v3") { +// TYPE: _seen_v3 = true; +// TYPE: _result_v3 = ::mlir::FieldParser::parse(parser); +// TYPE: if (failed(_result_v3)) +// TYPE: return {}; +// TYPE: } else { +// TYPE: return {}; +// TYPE: } +// TYPE: if ((_index != 2 - 1) && parser.parseComma()) +// TYPE: return {}; +// TYPE: } +// TYPE: return TestEType::get(parser.getContext(), +// TYPE: _result_v0.getValue(), +// TYPE: _result_v1.getValue(), +// TYPE: _result_v2.getValue(), +// TYPE: _result_v3.getValue()); +// TYPE: } + +// TYPE: void TestEType::print(::mlir::DialectAsmPrinter &printer) const { +// TYPE: printer << "v0"; +// TYPE: printer << ' ' << "="; +// TYPE: printer << ' '; +// TYPE: printer << getV0(); +// TYPE: printer << ","; +// TYPE: printer << ' ' << "v2"; +// TYPE: printer << ' ' << "="; +// TYPE: printer << ' '; +// TYPE: printer << getV2(); +// TYPE: printer << "v1"; +// TYPE: printer << ' ' << "="; +// TYPE: printer << ' '; +// TYPE: printer << getV1(); +// TYPE: printer << ","; +// TYPE: printer << ' ' << "v3"; +// TYPE: printer << ' ' << "="; +// TYPE: printer << ' '; +// TYPE: printer << getV3(); +// TYPE: } + +def TypeC : TestType<"TestE"> { + let parameters = (ins + "IntegerAttr":$v0, + "IntegerAttr":$v1, + "IntegerAttr":$v2, + "IntegerAttr":$v3 + ); + + let mnemonic = "type_e"; + let assemblyFormat = "`{` struct($v0, $v2) `}` `{` struct($v1, $v3) `}`"; +} diff --git a/mlir/test/mlir-tblgen/op-attribute.td b/mlir/test/mlir-tblgen/op-attribute.td index cf5150c067bb5..207af6cfb5281 100644 --- a/mlir/test/mlir-tblgen/op-attribute.td +++ b/mlir/test/mlir-tblgen/op-attribute.td @@ -65,14 +65,16 @@ def AOp : NS_Op<"a_op", []> { // DEF: ::mlir::LogicalResult AOpAdaptor::verify // DEF: auto tblgen_aAttr = odsAttrs.get("aAttr"); -// DEF-NEXT: if (!tblgen_aAttr) return emitError(loc, "'test.a_op' op ""requires attribute 'aAttr'"); -// DEF: if (!((some-condition))) return emitError(loc, "'test.a_op' op ""attribute 'aAttr' failed to satisfy constraint: some attribute kind"); +// DEF-NEXT: if (!tblgen_aAttr) +// DEF-NEXT: return emitError(loc, "'test.a_op' op ""requires attribute 'aAttr'"); +// DEF: if (tblgen_aAttr && !((some-condition))) +// DEF-NEXT: return emitError(loc, "'test.a_op' op ""attribute 'aAttr' failed to satisfy constraint: some attribute kind"); // DEF: auto tblgen_bAttr = odsAttrs.get("bAttr"); -// DEF-NEXT: if (tblgen_bAttr) { -// DEF-NEXT: if (!((some-condition))) return emitError(loc, "'test.a_op' op ""attribute 'bAttr' failed to satisfy constraint: some attribute kind"); +// DEF-NEXT: if (tblgen_bAttr && !((some-condition))) +// DEF-NEXT: return emitError(loc, "'test.a_op' op ""attribute 'bAttr' failed to satisfy constraint: some attribute kind"); // DEF: auto tblgen_cAttr = odsAttrs.get("cAttr"); -// DEF-NEXT: if (tblgen_cAttr) { -// DEF-NEXT: if (!((some-condition))) return emitError(loc, "'test.a_op' op ""attribute 'cAttr' failed to satisfy constraint: some attribute kind"); +// DEF-NEXT: if (tblgen_cAttr && !((some-condition))) +// DEF-NEXT: return emitError(loc, "'test.a_op' op ""attribute 'cAttr' failed to satisfy constraint: some attribute kind"); // Test getter methods // --- @@ -177,14 +179,16 @@ def AgetOp : Op { // DEF: ::mlir::LogicalResult AgetOpAdaptor::verify // DEF: auto tblgen_aAttr = odsAttrs.get("aAttr"); -// DEF-NEXT: if (!tblgen_aAttr) return emitError(loc, "'test2.a_get_op' op ""requires attribute 'aAttr'"); -// DEF: if (!((some-condition))) return emitError(loc, "'test2.a_get_op' op ""attribute 'aAttr' failed to satisfy constraint: some attribute kind"); +// DEF-NEXT: if (!tblgen_aAttr) +// DEF-NEXT. return emitError(loc, "'test2.a_get_op' op ""requires attribute 'aAttr'"); +// DEF: if (tblgen_aAttr && !((some-condition))) +// DEF-NEXT: return emitError(loc, "'test2.a_get_op' op ""attribute 'aAttr' failed to satisfy constraint: some attribute kind"); // DEF: auto tblgen_bAttr = odsAttrs.get("bAttr"); -// DEF-NEXT: if (tblgen_bAttr) { -// DEF-NEXT: if (!((some-condition))) return emitError(loc, "'test2.a_get_op' op ""attribute 'bAttr' failed to satisfy constraint: some attribute kind"); +// DEF-NEXT: if (tblgen_bAttr && !((some-condition))) +// DEF-NEXT: return emitError(loc, "'test2.a_get_op' op ""attribute 'bAttr' failed to satisfy constraint: some attribute kind"); // DEF: auto tblgen_cAttr = odsAttrs.get("cAttr"); -// DEF-NEXT: if (tblgen_cAttr) { -// DEF-NEXT: if (!((some-condition))) return emitError(loc, "'test2.a_get_op' op ""attribute 'cAttr' failed to satisfy constraint: some attribute kind"); +// DEF-NEXT: if (tblgen_cAttr && !((some-condition))) +// DEF-NEXT: return emitError(loc, "'test2.a_get_op' op ""attribute 'cAttr' failed to satisfy constraint: some attribute kind"); // Test getter methods // --- @@ -267,19 +271,19 @@ def BOp : NS_Op<"b_op", []> { // --- // DEF-LABEL: BOpAdaptor::verify -// DEF: if (!((true))) -// DEF: if (!((tblgen_bool_attr.isa<::mlir::BoolAttr>()))) -// DEF: if (!(((tblgen_i32_attr.isa<::mlir::IntegerAttr>())) && ((tblgen_i32_attr.cast<::mlir::IntegerAttr>().getType().isSignlessInteger(32))))) -// DEF: if (!(((tblgen_i64_attr.isa<::mlir::IntegerAttr>())) && ((tblgen_i64_attr.cast<::mlir::IntegerAttr>().getType().isSignlessInteger(64))))) -// DEF: if (!(((tblgen_f32_attr.isa<::mlir::FloatAttr>())) && ((tblgen_f32_attr.cast<::mlir::FloatAttr>().getType().isF32())))) -// DEF: if (!(((tblgen_f64_attr.isa<::mlir::FloatAttr>())) && ((tblgen_f64_attr.cast<::mlir::FloatAttr>().getType().isF64())))) -// DEF: if (!((tblgen_str_attr.isa<::mlir::StringAttr>()))) -// DEF: if (!((tblgen_elements_attr.isa<::mlir::ElementsAttr>()))) -// DEF: if (!((tblgen_function_attr.isa<::mlir::FlatSymbolRefAttr>()))) -// DEF: if (!(((tblgen_some_type_attr.isa<::mlir::TypeAttr>())) && ((tblgen_some_type_attr.cast<::mlir::TypeAttr>().getValue().isa())))) -// DEF: if (!((tblgen_array_attr.isa<::mlir::ArrayAttr>()))) -// DEF: if (!(((tblgen_some_attr_array.isa<::mlir::ArrayAttr>())) && (::llvm::all_of(tblgen_some_attr_array.cast<::mlir::ArrayAttr>(), [&](::mlir::Attribute attr) { return (some-condition); })))) -// DEF: if (!(((tblgen_type_attr.isa<::mlir::TypeAttr>())) && ((tblgen_type_attr.cast<::mlir::TypeAttr>().getValue().isa<::mlir::Type>())))) +// DEF: if (tblgen_any_attr && !((true))) +// DEF: if (tblgen_bool_attr && !((tblgen_bool_attr.isa<::mlir::BoolAttr>()))) +// DEF: if (tblgen_i32_attr && !(((tblgen_i32_attr.isa<::mlir::IntegerAttr>())) && ((tblgen_i32_attr.cast<::mlir::IntegerAttr>().getType().isSignlessInteger(32))))) +// DEF: if (tblgen_i64_attr && !(((tblgen_i64_attr.isa<::mlir::IntegerAttr>())) && ((tblgen_i64_attr.cast<::mlir::IntegerAttr>().getType().isSignlessInteger(64))))) +// DEF: if (tblgen_f32_attr && !(((tblgen_f32_attr.isa<::mlir::FloatAttr>())) && ((tblgen_f32_attr.cast<::mlir::FloatAttr>().getType().isF32())))) +// DEF: if (tblgen_f64_attr && !(((tblgen_f64_attr.isa<::mlir::FloatAttr>())) && ((tblgen_f64_attr.cast<::mlir::FloatAttr>().getType().isF64())))) +// DEF: if (tblgen_str_attr && !((tblgen_str_attr.isa<::mlir::StringAttr>()))) +// DEF: if (tblgen_elements_attr && !((tblgen_elements_attr.isa<::mlir::ElementsAttr>()))) +// DEF: if (tblgen_function_attr && !((tblgen_function_attr.isa<::mlir::FlatSymbolRefAttr>()))) +// DEF: if (tblgen_some_type_attr && !(((tblgen_some_type_attr.isa<::mlir::TypeAttr>())) && ((tblgen_some_type_attr.cast<::mlir::TypeAttr>().getValue().isa())))) +// DEF: if (tblgen_array_attr && !((tblgen_array_attr.isa<::mlir::ArrayAttr>()))) +// DEF: if (tblgen_some_attr_array && !(((tblgen_some_attr_array.isa<::mlir::ArrayAttr>())) && (::llvm::all_of(tblgen_some_attr_array.cast<::mlir::ArrayAttr>(), [&](::mlir::Attribute attr) { return (some-condition); })))) +// DEF: if (tblgen_type_attr && !(((tblgen_type_attr.isa<::mlir::TypeAttr>())) && ((tblgen_type_attr.cast<::mlir::TypeAttr>().getValue().isa<::mlir::Type>())))) // Test common attribute kind getters' return types // --- diff --git a/mlir/test/mlir-tblgen/op-python-bindings.td b/mlir/test/mlir-tblgen/op-python-bindings.td index d6dc56428eb57..becce13050a18 100644 --- a/mlir/test/mlir-tblgen/op-python-bindings.td +++ b/mlir/test/mlir-tblgen/op-python-bindings.td @@ -18,7 +18,7 @@ class TestOp traits = []> : // CHECK: @_ods_cext.register_operation(_Dialect) // CHECK: class AttrSizedOperandsOp(_ods_ir.OpView): // CHECK-LABEL: OPERATION_NAME = "test.attr_sized_operands" -// CHECK: _ODS_OPERAND_SEGMENTS = [-1,1,-1,] +// CHECK: _ODS_OPERAND_SEGMENTS = [-1,1,0,] def AttrSizedOperandsOp : TestOp<"attr_sized_operands", [AttrSizedOperandSegments]> { // CHECK: def __init__(self, variadic1, non_variadic, variadic2, *, loc=None, ip=None): @@ -28,7 +28,7 @@ def AttrSizedOperandsOp : TestOp<"attr_sized_operands", // CHECK: regions = None // CHECK: operands.append(_get_op_results_or_values(variadic1)) // CHECK: operands.append(_get_op_result_or_value(non_variadic)) - // CHECK: if variadic2 is not None: operands.append(_get_op_result_or_value(variadic2)) + // CHECK: operands.append(_get_op_result_or_value(variadic2) if variadic2 is not None else None) // CHECK: _ods_successors = None // CHECK: super().__init__(self.build_generic( // CHECK: attributes=attributes, results=results, operands=operands, @@ -40,6 +40,7 @@ def AttrSizedOperandsOp : TestOp<"attr_sized_operands", // CHECK: self.operation.operands, // CHECK: self.operation.attributes["operand_segment_sizes"], 0) // CHECK: return operand_range + // CHECK-NOT: if len(operand_range) // // CHECK: @builtins.property // CHECK: def non_variadic(self): @@ -61,7 +62,7 @@ def AttrSizedOperandsOp : TestOp<"attr_sized_operands", // CHECK: @_ods_cext.register_operation(_Dialect) // CHECK: class AttrSizedResultsOp(_ods_ir.OpView): // CHECK-LABEL: OPERATION_NAME = "test.attr_sized_results" -// CHECK: _ODS_RESULT_SEGMENTS = [-1,1,-1,] +// CHECK: _ODS_RESULT_SEGMENTS = [0,1,-1,] def AttrSizedResultsOp : TestOp<"attr_sized_results", [AttrSizedResultSegments]> { // CHECK: def __init__(self, variadic1, non_variadic, variadic2, *, loc=None, ip=None): @@ -71,7 +72,7 @@ def AttrSizedResultsOp : TestOp<"attr_sized_results", // CHECK: regions = None // CHECK: if variadic1 is not None: results.append(variadic1) // CHECK: results.append(non_variadic) - // CHECK: if variadic2 is not None: results.append(variadic2) + // CHECK: results.append(variadic2) // CHECK: _ods_successors = None // CHECK: super().__init__(self.build_generic( // CHECK: attributes=attributes, results=results, operands=operands, @@ -97,8 +98,9 @@ def AttrSizedResultsOp : TestOp<"attr_sized_results", // CHECK: self.operation.results, // CHECK: self.operation.attributes["result_segment_sizes"], 2) // CHECK: return result_range + // CHECK-NOT: if len(result_range) let results = (outs Optional:$variadic1, AnyType:$non_variadic, - Optional:$variadic2); + Variadic:$variadic2); } @@ -277,6 +279,35 @@ def MissingNamesOp : TestOp<"missing_names"> { let results = (outs I32:$i32, AnyFloat, I64:$i64); } +// CHECK: @_ods_cext.register_operation(_Dialect) +// CHECK: class OneOptionalOperandOp(_ods_ir.OpView): +// CHECK-LABEL: OPERATION_NAME = "test.one_optional_operand" +// CHECK-NOT: _ODS_OPERAND_SEGMENTS +// CHECK-NOT: _ODS_RESULT_SEGMENTS +def OneOptionalOperandOp : TestOp<"one_optional_operand"> { + let arguments = (ins AnyType:$non_optional, Optional:$optional); + // CHECK: def __init__(self, non_optional, optional, *, loc=None, ip=None): + // CHECK: operands = [] + // CHECK: results = [] + // CHECK: attributes = {} + // CHECK: regions = None + // CHECK: operands.append(_get_op_result_or_value(non_optional)) + // CHECK: if optional is not None: operands.append(_get_op_result_or_value(optional)) + // CHECK: _ods_successors = None + // CHECK: super().__init__(self.build_generic( + // CHECK: attributes=attributes, results=results, operands=operands, + // CHECK: successors=_ods_successors, regions=regions, loc=loc, ip=ip)) + + // CHECK: @builtins.property + // CHECK: def non_optional(self): + // CHECK: return self.operation.operands[0] + + // CHECK: @builtins.property + // CHECK: def optional(self): + // CHECK: return self.operation.operands[1] if len(self.operation.operands) > 2 else None + +} + // CHECK: @_ods_cext.register_operation(_Dialect) // CHECK: class OneVariadicOperandOp(_ods_ir.OpView): // CHECK-LABEL: OPERATION_NAME = "test.one_variadic_operand" diff --git a/mlir/test/mlir-tblgen/predicate.td b/mlir/test/mlir-tblgen/predicate.td index 71ed215b774c8..2170bfb829d55 100644 --- a/mlir/test/mlir-tblgen/predicate.td +++ b/mlir/test/mlir-tblgen/predicate.td @@ -51,7 +51,7 @@ def OpF : NS_Op<"op_for_int_min_val", []> { // CHECK-LABEL: OpFAdaptor::verify // CHECK: (tblgen_attr.cast<::mlir::IntegerAttr>().getInt() >= 10) -// CHECK-SAME: "attribute 'attr' failed to satisfy constraint: 32-bit signless integer attribute whose minimum value is 10" +// CHECK-NEXT: "attribute 'attr' failed to satisfy constraint: 32-bit signless integer attribute whose minimum value is 10" def OpFX : NS_Op<"op_for_int_max_val", []> { let arguments = (ins Confined]>:$attr); @@ -59,7 +59,7 @@ def OpFX : NS_Op<"op_for_int_max_val", []> { // CHECK-LABEL: OpFXAdaptor::verify // CHECK: (tblgen_attr.cast<::mlir::IntegerAttr>().getInt() <= 10) -// CHECK-SAME: "attribute 'attr' failed to satisfy constraint: 32-bit signless integer attribute whose maximum value is 10" +// CHECK-NEXT: "attribute 'attr' failed to satisfy constraint: 32-bit signless integer attribute whose maximum value is 10" def OpG : NS_Op<"op_for_arr_min_count", []> { let arguments = (ins Confined]>:$attr); @@ -67,7 +67,7 @@ def OpG : NS_Op<"op_for_arr_min_count", []> { // CHECK-LABEL: OpGAdaptor::verify // CHECK: (tblgen_attr.cast<::mlir::ArrayAttr>().size() >= 8) -// CHECK-SAME: "attribute 'attr' failed to satisfy constraint: array attribute with at least 8 elements" +// CHECK-NEXT: "attribute 'attr' failed to satisfy constraint: array attribute with at least 8 elements" def OpH : NS_Op<"op_for_arr_value_at_index", []> { let arguments = (ins Confined]>:$attr); @@ -75,7 +75,7 @@ def OpH : NS_Op<"op_for_arr_value_at_index", []> { // CHECK-LABEL: OpHAdaptor::verify // CHECK: (((tblgen_attr.cast<::mlir::ArrayAttr>().size() > 0)) && ((tblgen_attr.cast<::mlir::ArrayAttr>()[0].cast<::mlir::IntegerAttr>().getInt() == 8))))) -// CHECK-SAME: "attribute 'attr' failed to satisfy constraint: array attribute whose 0-th element must be 8" +// CHECK-NEXT: "attribute 'attr' failed to satisfy constraint: array attribute whose 0-th element must be 8" def OpI: NS_Op<"op_for_arr_min_value_at_index", []> { let arguments = (ins Confined]>:$attr); @@ -83,7 +83,7 @@ def OpI: NS_Op<"op_for_arr_min_value_at_index", []> { // CHECK-LABEL: OpIAdaptor::verify // CHECK: (((tblgen_attr.cast<::mlir::ArrayAttr>().size() > 0)) && ((tblgen_attr.cast<::mlir::ArrayAttr>()[0].cast<::mlir::IntegerAttr>().getInt() >= 8))))) -// CHECK-SAME: "attribute 'attr' failed to satisfy constraint: array attribute whose 0-th element must be at least 8" +// CHECK-NEXT: "attribute 'attr' failed to satisfy constraint: array attribute whose 0-th element must be at least 8" def OpJ: NS_Op<"op_for_TCopVTEtAreSameAt", [ PredOpTrait<"operands indexed at 0, 2, 3 should all have " @@ -121,4 +121,4 @@ def OpL : NS_Op<"op_for_StringEscaping", []> { // CHECK-LABEL: OpLAdaptor::verify // CHECK: getValue() == "foo" -// CHECK-SAME: only value \"foo\" is allowed +// CHECK-NEXT: only value \"foo\" is allowed diff --git a/mlir/test/python/dialects/tensor.py b/mlir/test/python/dialects/tensor.py new file mode 100644 index 0000000000000..3754097badd73 --- /dev/null +++ b/mlir/test/python/dialects/tensor.py @@ -0,0 +1,39 @@ +# RUN: %PYTHON %s | FileCheck %s + +from mlir.ir import * +import mlir.dialects.arith as arith +import mlir.dialects.builtin as builtin +import mlir.dialects.tensor as tensor + + +def run(f): + print("\nTEST:", f.__name__) + f() + return f + + +# CHECK-LABEL: TEST: testDimOp +@run +def testDimOp(): + with Context() as ctx, Location.unknown(): + module = Module.create() + f32Type = F32Type.get() + indexType = IndexType.get() + with InsertionPoint(module.body): + + @builtin.FuncOp.from_py_func(RankedTensorType.get((-1, -1), f32Type)) + # CHECK: func @tensor_static_dim + # CHECK-SAME: %[[ARG0:.+]]: tensor + # CHECK-DAG: %[[C0:.+]] = arith.constant 0 : index + # CHECK-DAG: %[[C1:.+]] = arith.constant 1 : index + # CHECK: %[[D0:.+]] = tensor.dim %[[ARG0]], %[[C0]] + # CHECK: %[[D1:.+]] = tensor.dim %[[ARG0]], %[[C1]] + # CHECK: return %[[D0]], %[[D1]] + def tensor_static_dim(t): + c0 = arith.ConstantOp(indexType, 0) + c1 = arith.ConstantOp(indexType, 1) + d0 = tensor.DimOp(t, c0) + d1 = tensor.DimOp(t, c1) + return [d0.result, d1.result] + + print(module) diff --git a/mlir/test/python/dialects/vector.py b/mlir/test/python/dialects/vector.py index 4d7052859e7df..b8db94070d6a2 100644 --- a/mlir/test/python/dialects/vector.py +++ b/mlir/test/python/dialects/vector.py @@ -2,25 +2,58 @@ from mlir.ir import * import mlir.dialects.builtin as builtin +import mlir.dialects.std as std import mlir.dialects.vector as vector def run(f): print("\nTEST:", f.__name__) - f() + with Context(), Location.unknown(): + f() + return f # CHECK-LABEL: TEST: testPrintOp @run def testPrintOp(): - with Context() as ctx, Location.unknown(): - module = Module.create() - with InsertionPoint(module.body): - @builtin.FuncOp.from_py_func(VectorType.get((12, 5), F32Type.get())) - def print_vector(arg): - return vector.PrintOp(arg) - - # CHECK-LABEL: func @print_vector( - # CHECK-SAME: %[[ARG:.*]]: vector<12x5xf32>) { - # CHECK: vector.print %[[ARG]] : vector<12x5xf32> - # CHECK: return - # CHECK: } - print(module) + module = Module.create() + with InsertionPoint(module.body): + + @builtin.FuncOp.from_py_func(VectorType.get((12, 5), F32Type.get())) + def print_vector(arg): + return vector.PrintOp(arg) + + # CHECK-LABEL: func @print_vector( + # CHECK-SAME: %[[ARG:.*]]: vector<12x5xf32>) { + # CHECK: vector.print %[[ARG]] : vector<12x5xf32> + # CHECK: return + # CHECK: } + print(module) + + +# CHECK-LABEL: TEST: testTransferReadOp +@run +def testTransferReadOp(): + module = Module.create() + with InsertionPoint(module.body): + vector_type = VectorType.get([2, 3], F32Type.get()) + memref_type = MemRefType.get([-1, -1], F32Type.get()) + index_type = IndexType.get() + mask_type = VectorType.get(vector_type.shape, IntegerType.get_signless(1)) + identity_map = AffineMap.get_identity(vector_type.rank) + identity_map_attr = AffineMapAttr.get(identity_map) + func = builtin.FuncOp("transfer_read", + ([memref_type, index_type, + F32Type.get(), mask_type], [])) + with InsertionPoint(func.add_entry_block()): + A, zero, padding, mask = func.arguments + vector.TransferReadOp(vector_type, A, [zero, zero], identity_map_attr, + padding, mask, None) + vector.TransferReadOp(vector_type, A, [zero, zero], identity_map_attr, + padding, None, None) + std.ReturnOp([]) + + # CHECK: @transfer_read(%[[MEM:.*]]: memref, %[[IDX:.*]]: index, + # CHECK: %[[PAD:.*]]: f32, %[[MASK:.*]]: vector<2x3xi1>) + # CHECK: vector.transfer_read %[[MEM]][%[[IDX]], %[[IDX]]], %[[PAD]], %[[MASK]] + # CHECK: vector.transfer_read %[[MEM]][%[[IDX]], %[[IDX]]], %[[PAD]] + # CHECK-NOT: %[[MASK]] + print(module) diff --git a/mlir/test/python/ir/affine_expr.py b/mlir/test/python/ir/affine_expr.py index 184466870a578..9854b496fe460 100644 --- a/mlir/test/python/ir/affine_expr.py +++ b/mlir/test/python/ir/affine_expr.py @@ -137,6 +137,14 @@ def testAffineAddExpr(): # CHECK: d1 + d2 print(d12op) + d1cst_op = d1 + 2 + # CHECK: d1 + 2 + print(d1cst_op) + + d1cst_op2 = 2 + d1 + # CHECK: d1 + 2 + print(d1cst_op2) + assert d12 == d12op assert d12.lhs == d1 assert d12.rhs == d2 @@ -156,7 +164,16 @@ def testAffineMulExpr(): op = d1 * c2 print(op) + # CHECK: d1 * 2 + op_cst = d1 * 2 + print(op_cst) + + # CHECK: d1 * 2 + op_cst2 = 2 * d1 + print(op_cst2) + assert expr == op + assert expr == op_cst assert expr.lhs == d1 assert expr.rhs == c2 @@ -175,10 +192,32 @@ def testAffineModExpr(): op = d1 % c2 print(op) + # CHECK: d1 mod 2 + op_cst = d1 % 2 + print(op_cst) + + # CHECK: 2 mod d1 + print(2 % d1) + assert expr == op + assert expr == op_cst assert expr.lhs == d1 assert expr.rhs == c2 + expr2 = AffineExpr.get_mod(c2, d1) + expr3 = AffineExpr.get_mod(2, d1) + expr4 = AffineExpr.get_mod(d1, 2) + + # CHECK: 2 mod d1 + print(expr2) + # CHECK: 2 mod d1 + print(expr3) + # CHECK: d1 mod 2 + print(expr4) + + assert expr2 == expr3 + assert expr4 == expr + # CHECK-LABEL: TEST: testAffineFloorDivExpr @run @@ -193,6 +232,20 @@ def testAffineFloorDivExpr(): assert expr.lhs == d1 assert expr.rhs == c2 + expr2 = AffineExpr.get_floor_div(c2, d1) + expr3 = AffineExpr.get_floor_div(2, d1) + expr4 = AffineExpr.get_floor_div(d1, 2) + + # CHECK: 2 floordiv d1 + print(expr2) + # CHECK: 2 floordiv d1 + print(expr3) + # CHECK: d1 floordiv 2 + print(expr4) + + assert expr2 == expr3 + assert expr4 == expr + # CHECK-LABEL: TEST: testAffineCeilDivExpr @run @@ -207,6 +260,20 @@ def testAffineCeilDivExpr(): assert expr.lhs == d1 assert expr.rhs == c2 + expr2 = AffineExpr.get_ceil_div(c2, d1) + expr3 = AffineExpr.get_ceil_div(2, d1) + expr4 = AffineExpr.get_ceil_div(d1, 2) + + # CHECK: 2 ceildiv d1 + print(expr2) + # CHECK: 2 ceildiv d1 + print(expr3) + # CHECK: d1 ceildiv 2 + print(expr4) + + assert expr2 == expr3 + assert expr4 == expr + # CHECK-LABEL: TEST: testAffineExprSub @run @@ -225,6 +292,15 @@ def testAffineExprSub(): # CHECK: -1 print(rhs.rhs) + # CHECK: d1 - 42 + print(d1 - 42) + # CHECK: -d1 + 42 + print(42 - d1) + + c42 = AffineConstantExpr.get(42) + assert d1 - 42 == d1 - c42 + assert 42 - d1 == c42 - d1 + # CHECK-LABEL: TEST: testClassHierarchy @run def testClassHierarchy(): @@ -289,3 +365,38 @@ def testIsInstance(): print(AffineMulExpr.isinstance(mul)) # CHECK: False print(AffineAddExpr.isinstance(mul)) + + +# CHECK-LABEL: TEST: testCompose +@run +def testCompose(): + with Context(): + # d0 + d2. + expr = AffineAddExpr.get(AffineDimExpr.get(0), AffineDimExpr.get(2)) + + # (d0, d1, d2)[s0, s1] -> (d0 + s1, d1 + s0, d0 + d1 + d2) + map1 = AffineAddExpr.get(AffineDimExpr.get(0), AffineSymbolExpr.get(1)) + map2 = AffineAddExpr.get(AffineDimExpr.get(1), AffineSymbolExpr.get(0)) + map3 = AffineAddExpr.get( + AffineAddExpr.get(AffineDimExpr.get(0), AffineDimExpr.get(1)), + AffineDimExpr.get(2)) + map = AffineMap.get(3, 2, [map1, map2, map3]) + + # CHECK: d0 + s1 + d0 + d1 + d2 + print(expr.compose(map)) + + +# CHECK-LABEL: TEST: testHash +@run +def testHash(): + with Context(): + d0 = AffineDimExpr.get(0) + s1 = AffineSymbolExpr.get(1) + assert hash(d0) == hash(AffineDimExpr.get(0)) + assert hash(d0 + s1) == hash(AffineAddExpr.get(d0, s1)) + + dictionary = dict() + dictionary[d0] = 0 + dictionary[s1] = 1 + assert d0 in dictionary + assert s1 in dictionary diff --git a/mlir/test/python/ir/affine_map.py b/mlir/test/python/ir/affine_map.py index da5d230f42cde..52c7261500c90 100644 --- a/mlir/test/python/ir/affine_map.py +++ b/mlir/test/python/ir/affine_map.py @@ -9,9 +9,11 @@ def run(f): f() gc.collect() assert Context._get_live_count() == 0 + return f # CHECK-LABEL: TEST: testAffineMapCapsule +@run def testAffineMapCapsule(): with Context() as ctx: am1 = AffineMap.get_empty(ctx) @@ -23,10 +25,8 @@ def testAffineMapCapsule(): assert am2.context is ctx -run(testAffineMapCapsule) - - # CHECK-LABEL: TEST: testAffineMapGet +@run def testAffineMapGet(): with Context() as ctx: d0 = AffineDimExpr.get(0) @@ -100,10 +100,8 @@ def testAffineMapGet(): print(e) -run(testAffineMapGet) - - # CHECK-LABEL: TEST: testAffineMapDerive +@run def testAffineMapDerive(): with Context() as ctx: map5 = AffineMap.get_identity(5) @@ -121,10 +119,8 @@ def testAffineMapDerive(): print(map34) -run(testAffineMapDerive) - - # CHECK-LABEL: TEST: testAffineMapProperties +@run def testAffineMapProperties(): with Context(): d0 = AffineDimExpr.get(0) @@ -147,10 +143,8 @@ def testAffineMapProperties(): print(map3.is_projected_permutation) -run(testAffineMapProperties) - - # CHECK-LABEL: TEST: testAffineMapExprs +@run def testAffineMapExprs(): with Context(): d0 = AffineDimExpr.get(0) @@ -181,10 +175,8 @@ def testAffineMapExprs(): assert list(map3.results) == [d2, d0, d1] -run(testAffineMapExprs) - - # CHECK-LABEL: TEST: testCompressUnusedSymbols +@run def testCompressUnusedSymbols(): with Context() as ctx: d0, d1, d2 = (AffineDimExpr.get(0), AffineDimExpr.get(1), @@ -210,10 +202,8 @@ def testCompressUnusedSymbols(): print(compressed_maps) -run(testCompressUnusedSymbols) - - # CHECK-LABEL: TEST: testReplace +@run def testReplace(): with Context() as ctx: d0, d1, d2 = (AffineDimExpr.get(0), AffineDimExpr.get(1), @@ -236,4 +226,16 @@ def testReplace(): print(replace3) -run(testReplace) +# CHECK-LABEL: TEST: testHash +@run +def testHash(): + with Context(): + d0, d1 = AffineDimExpr.get(0), AffineDimExpr.get(1) + m1 = AffineMap.get(2, 0, [d0, d1]) + m2 = AffineMap.get(2, 0, [d1, d0]) + assert hash(m1) == hash(AffineMap.get(2, 0, [d0, d1])) + + dictionary = dict() + dictionary[m1] = 1 + dictionary[m2] = 2 + assert m1 in dictionary diff --git a/mlir/test/python/ir/attributes.py b/mlir/test/python/ir/attributes.py index 9bbf23cf20855..5f8dd0ad1183f 100644 --- a/mlir/test/python/ir/attributes.py +++ b/mlir/test/python/ir/attributes.py @@ -66,10 +66,6 @@ def testAttrHash(): a3 = Attribute.parse('"attr1"') # CHECK: hash(a1) == hash(a3): True print("hash(a1) == hash(a3):", a1.__hash__() == a3.__hash__()) - # In general, hashes don't have to be unique. In this case, however, the - # hash is just the underlying pointer so it will be. - # CHECK: hash(a1) == hash(a2): False - print("hash(a1) == hash(a2):", a1.__hash__() == a2.__hash__()) s = set() s.add(a1) diff --git a/mlir/test/python/ir/builtin_types.py b/mlir/test/python/ir/builtin_types.py index 911391f2d528b..7d881b90f0fb2 100644 --- a/mlir/test/python/ir/builtin_types.py +++ b/mlir/test/python/ir/builtin_types.py @@ -67,10 +67,6 @@ def testTypeHash(): # CHECK: hash(t1) == hash(t3): True print("hash(t1) == hash(t3):", t1.__hash__() == t3.__hash__()) - # In general, hashes don't have to be unique. In this case, however, the - # hash is just the underlying pointer so it will be. - # CHECK: hash(t1) == hash(t2): False - print("hash(t1) == hash(t2):", t1.__hash__() == t2.__hash__()) s = set() s.add(t1) @@ -319,6 +315,9 @@ def testRankedTensorType(): # Encoding should be None. assert RankedTensorType.get(shape, f32).encoding is None + tensor = RankedTensorType.get(shape, f32) + assert tensor.shape == shape + # CHECK-LABEL: TEST: testUnrankedTensorType @run @@ -400,6 +399,8 @@ def testMemRefType(): else: print("Exception not produced") + assert memref.shape == shape + # CHECK-LABEL: TEST: testUnrankedMemRefType @run diff --git a/mlir/test/python/ir/integer_set.py b/mlir/test/python/ir/integer_set.py index bdec8afba0ebf..b916d9ab386e9 100644 --- a/mlir/test/python/ir/integer_set.py +++ b/mlir/test/python/ir/integer_set.py @@ -8,9 +8,11 @@ def run(f): f() gc.collect() assert Context._get_live_count() == 0 + return f # CHECK-LABEL: TEST: testIntegerSetCapsule +@run def testIntegerSetCapsule(): with Context() as ctx: is1 = IntegerSet.get_empty(1, 1, ctx) @@ -21,10 +23,9 @@ def testIntegerSetCapsule(): assert is1 == is2 assert is2.context is ctx -run(testIntegerSetCapsule) - # CHECK-LABEL: TEST: testIntegerSetGet +@run def testIntegerSetGet(): with Context(): d0 = AffineDimExpr.get(0) @@ -92,10 +93,9 @@ def testIntegerSetGet(): # CHECK: Invalid expression (None?) when attempting to create an IntegerSet by replacing symbols print(e) -run(testIntegerSetGet) - # CHECK-LABEL: TEST: testIntegerSetProperties +@run def testIntegerSetProperties(): with Context(): d0 = AffineDimExpr.get(0) @@ -125,4 +125,17 @@ def testIntegerSetProperties(): print(cstr.expr, end='') print(" == 0" if cstr.is_eq else " >= 0") -run(testIntegerSetProperties) + +# CHECK_LABEL: TEST: testHash +@run +def testHash(): + with Context(): + d0 = AffineDimExpr.get(0) + d1 = AffineDimExpr.get(1) + set = IntegerSet.get(2, 0, [d0 + d1], [True]) + + assert hash(set) == hash(IntegerSet.get(2, 0, [d0 + d1], [True])) + + dictionary = dict() + dictionary[set] = 42 + assert set in dictionary diff --git a/mlir/test/python/ir/operation.py b/mlir/test/python/ir/operation.py index c94c22ea53a0b..8771ca046b8b8 100644 --- a/mlir/test/python/ir/operation.py +++ b/mlir/test/python/ir/operation.py @@ -741,6 +741,7 @@ def testOperationLoc(): assert op.location == loc assert op.operation.location == loc + # CHECK-LABEL: TEST: testModuleMerge @run def testModuleMerge(): @@ -773,7 +774,7 @@ def testAppendMoveFromAnotherBlock(): with Context(): m1 = Module.parse("func private @foo()") m2 = Module.parse("func private @bar()") - func = m1.body.operations[0] + func = m1.body.operations[0] m2.body.append(func) # CHECK: module @@ -803,3 +804,86 @@ def testDetachFromParent(): print(m1) # CHECK-NOT: func private @foo + + +# CHECK-LABEL: TEST: testSymbolTable +@run +def testSymbolTable(): + with Context() as ctx: + ctx.allow_unregistered_dialects = True + m1 = Module.parse(""" + func private @foo() + func private @bar()""") + m2 = Module.parse(""" + func private @qux() + func private @foo() + "foo.bar"() : () -> ()""") + + symbol_table = SymbolTable(m1.operation) + + # CHECK: func private @foo + # CHECK: func private @bar + assert "foo" in symbol_table + print(symbol_table["foo"]) + assert "bar" in symbol_table + bar = symbol_table["bar"] + print(symbol_table["bar"]) + + assert "qux" not in symbol_table + + del symbol_table["bar"] + try: + symbol_table.erase(symbol_table["bar"]) + except KeyError: + pass + else: + assert False, "expected KeyError" + + # CHECK: module + # CHECK: func private @foo() + print(m1) + assert "bar" not in symbol_table + + try: + print(bar) + except RuntimeError as e: + if "the operation has been invalidated" not in str(e): + raise + else: + assert False, "expected RuntimeError due to invalidated operation" + + qux = m2.body.operations[0] + m1.body.append(qux) + symbol_table.insert(qux) + assert "qux" in symbol_table + + # Check that insertion actually renames this symbol in the symbol table. + foo2 = m2.body.operations[0] + m1.body.append(foo2) + updated_name = symbol_table.insert(foo2) + assert foo2.name.value != "foo" + assert foo2.name == updated_name + + # CHECK: module + # CHECK: func private @foo() + # CHECK: func private @qux() + # CHECK: func private @foo{{.*}} + print(m1) + + try: + symbol_table.insert(m2.body.operations[0]) + except ValueError as e: + if "Expected operation to have a symbol name" not in str(e): + raise + else: + assert False, "exepcted ValueError when adding a non-symbol" + + +# CHECK-LABEL: TEST: testOperationHash +@run +def testOperationHash(): + ctx = Context() + ctx.allow_unregistered_dialects = True + with ctx, Location.unknown(): + op = Operation.create("custom.op1") + assert hash(op) == hash(op.operation) diff --git a/mlir/test/python/ir/value.py b/mlir/test/python/ir/value.py index 230025a8f3306..cfbfdf35fcf6f 100644 --- a/mlir/test/python/ir/value.py +++ b/mlir/test/python/ir/value.py @@ -55,3 +55,22 @@ def testValueIsInstance(): op = func.regions[0].blocks[0].operations[0] assert not BlockArgument.isinstance(op.results[0]) assert OpResult.isinstance(op.results[0]) + + +# CHECK-LABEL: TEST: testValueHash +@run +def testValueHash(): + ctx = Context() + ctx.allow_unregistered_dialects = True + module = Module.parse( + r""" + func @foo(%arg0: f32) -> f32 { + %0 = "some_dialect.some_op"(%arg0) : (f32) -> f32 + return %0 : f32 + }""", ctx) + + [func] = module.body.operations + block = func.entry_block + op, ret = block.operations + assert hash(block.arguments[0]) == hash(op.operands[0]) + assert hash(op.result) == hash(ret.operands[0]) diff --git a/mlir/tools/mlir-tblgen/AttrOrTypeDefGen.cpp b/mlir/tools/mlir-tblgen/AttrOrTypeDefGen.cpp index a1b0836a55d7b..5e4cb4d73a392 100644 --- a/mlir/tools/mlir-tblgen/AttrOrTypeDefGen.cpp +++ b/mlir/tools/mlir-tblgen/AttrOrTypeDefGen.cpp @@ -6,6 +6,7 @@ // //===----------------------------------------------------------------------===// +#include "AttrOrTypeFormatGen.h" #include "mlir/Support/LogicalResult.h" #include "mlir/TableGen/AttrOrTypeDef.h" #include "mlir/TableGen/CodeGenHelpers.h" @@ -24,6 +25,17 @@ using namespace mlir; using namespace mlir::tblgen; +//===----------------------------------------------------------------------===// +// Utility Functions +//===----------------------------------------------------------------------===// + +std::string mlir::tblgen::getParameterAccessorName(StringRef name) { + assert(!name.empty() && "parameter has empty name"); + auto ret = "get" + name.str(); + ret[3] = llvm::toUpper(ret[3]); // uppercase first letter of the name + return ret; +} + /// Find all the AttrOrTypeDef for the specified dialect. If no dialect /// specified and can only find one dialect's defs, use that. static void collectAllDefs(StringRef selectedDialect, @@ -399,7 +411,8 @@ void DefGenerator::emitDefDecl(const AttrOrTypeDef &def) { << " }\n"; // If mnemonic specified, emit print/parse declarations. - if (def.getParserCode() || def.getPrinterCode() || !params.empty()) { + if (def.getParserCode() || def.getPrinterCode() || + def.getAssemblyFormat() || !params.empty()) { os << llvm::formatv(defDeclParsePrintStr, valueType, isAttrGenerator ? ", ::mlir::Type type" : ""); } @@ -410,10 +423,8 @@ void DefGenerator::emitDefDecl(const AttrOrTypeDef &def) { def.getParameters(parameters); for (AttrOrTypeParameter ¶meter : parameters) { - SmallString<16> name = parameter.getName(); - name[0] = llvm::toUpper(name[0]); - os << formatv(" {0} get{1}() const;\n", parameter.getCppAccessorType(), - name); + os << formatv(" {0} {1}() const;\n", parameter.getCppAccessorType(), + getParameterAccessorName(parameter.getName())); } } @@ -700,8 +711,32 @@ void DefGenerator::emitStorageClass(const AttrOrTypeDef &def) { } void DefGenerator::emitParsePrint(const AttrOrTypeDef &def) { + auto printerCode = def.getPrinterCode(); + auto parserCode = def.getParserCode(); + auto assemblyFormat = def.getAssemblyFormat(); + if (assemblyFormat && (printerCode || parserCode)) { + // Custom assembly format cannot be specified at the same time as either + // custom printer or parser code. + PrintFatalError(def.getLoc(), + def.getName() + ": assembly format cannot be specified at " + "the same time as printer or parser code"); + } + + // Generate a parser and printer based on the assembly format, if specified. + if (assemblyFormat) { + // A custom assembly format requires accessors to be generated for the + // generated printer. + if (!def.genAccessors()) { + PrintFatalError(def.getLoc(), + def.getName() + + ": the generated printer from 'assemblyFormat' " + "requires 'genAccessors' to be true"); + } + return generateAttrOrTypeFormat(def, os); + } + // Emit the printer code, if specified. - if (Optional printerCode = def.getPrinterCode()) { + if (printerCode) { // Both the mnenomic and printerCode must be defined (for parity with // parserCode). os << "void " << def.getCppClassName() @@ -717,7 +752,7 @@ void DefGenerator::emitParsePrint(const AttrOrTypeDef &def) { } // Emit the parser code, if specified. - if (Optional parserCode = def.getParserCode()) { + if (parserCode) { FmtContext fmtCtxt; fmtCtxt.addSubst("_parser", "parser") .addSubst("_ctxt", "parser.getContext()"); @@ -857,11 +892,10 @@ void DefGenerator::emitDefDef(const AttrOrTypeDef &def) { paramStorageName = param.getName(); } - SmallString<16> name = param.getName(); - name[0] = llvm::toUpper(name[0]); - os << formatv("{0} {3}::get{1}() const {{ return getImpl()->{2}; }\n", - param.getCppAccessorType(), name, paramStorageName, - def.getCppClassName()); + os << formatv("{0} {3}::{1}() const {{ return getImpl()->{2}; }\n", + param.getCppAccessorType(), + getParameterAccessorName(param.getName()), + paramStorageName, def.getCppClassName()); } } } diff --git a/mlir/tools/mlir-tblgen/AttrOrTypeFormatGen.cpp b/mlir/tools/mlir-tblgen/AttrOrTypeFormatGen.cpp new file mode 100644 index 0000000000000..52e921f2fb27a --- /dev/null +++ b/mlir/tools/mlir-tblgen/AttrOrTypeFormatGen.cpp @@ -0,0 +1,781 @@ +//===- AttrOrTypeFormatGen.cpp - MLIR attribute and type format generator -===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "AttrOrTypeFormatGen.h" +#include "FormatGen.h" +#include "mlir/Support/LLVM.h" +#include "mlir/Support/LogicalResult.h" +#include "mlir/TableGen/AttrOrTypeDef.h" +#include "mlir/TableGen/Format.h" +#include "mlir/TableGen/GenInfo.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/SourceMgr.h" +#include "llvm/TableGen/Error.h" +#include "llvm/TableGen/TableGenBackend.h" + +using namespace mlir; +using namespace mlir::tblgen; + +using llvm::formatv; + +//===----------------------------------------------------------------------===// +// Element +//===----------------------------------------------------------------------===// + +namespace { + +/// This class represents a single format element. +class Element { +public: + /// LLVM-style RTTI. + enum class Kind { + /// This element is a directive. + ParamsDirective, + StructDirective, + + /// This element is a literal. + Literal, + + /// This element is a variable. + Variable, + }; + Element(Kind kind) : kind(kind) {} + virtual ~Element() = default; + + /// Return the kind of this element. + Kind getKind() const { return kind; } + +private: + /// The kind of this element. + Kind kind; +}; + +/// This class represents an instance of a literal element. +class LiteralElement : public Element { +public: + LiteralElement(StringRef literal) + : Element(Kind::Literal), literal(literal) {} + + static bool classof(const Element *el) { + return el->getKind() == Kind::Literal; + } + + /// Get the literal spelling. + StringRef getSpelling() const { return literal; } + +private: + /// The spelling of the literal for this element. + StringRef literal; +}; + +/// This class represents an instance of a variable element. A variable refers +/// to an attribute or type parameter. +class VariableElement : public Element { +public: + VariableElement(AttrOrTypeParameter param) + : Element(Kind::Variable), param(param) {} + + static bool classof(const Element *el) { + return el->getKind() == Kind::Variable; + } + + /// Get the parameter in the element. + const AttrOrTypeParameter &getParam() const { return param; } + +private: + AttrOrTypeParameter param; +}; + +/// Base class for a directive that contains references to multiple variables. +template +class ParamsDirectiveBase : public Element { +public: + using Base = ParamsDirectiveBase; + + ParamsDirectiveBase(SmallVector> &¶ms) + : Element(ElementKind), params(std::move(params)) {} + + static bool classof(const Element *el) { + return el->getKind() == ElementKind; + } + + /// Get the parameters contained in this directive. + auto getParams() const { + return llvm::map_range(params, [](auto &el) { + return cast(el.get())->getParam(); + }); + } + + /// Get the number of parameters. + unsigned getNumParams() const { return params.size(); } + + /// Take all of the parameters from this directive. + SmallVector> takeParams() { + return std::move(params); + } + +private: + /// The parameters captured by this directive. + SmallVector> params; +}; + +/// This class represents a `params` directive that refers to all parameters +/// of an attribute or type. When used as a top-level directive, it generates +/// a format of the form: +/// +/// (param-value (`,` param-value)*)? +/// +/// When used as an argument to another directive that accepts variables, +/// `params` can be used in place of manually listing all parameters of an +/// attribute or type. +class ParamsDirective + : public ParamsDirectiveBase { +public: + using Base::Base; +}; + +/// This class represents a `struct` directive that generates a struct format +/// of the form: +/// +/// `{` param-name `=` param-value (`,` param-name `=` param-value)* `}` +/// +class StructDirective + : public ParamsDirectiveBase { +public: + using Base::Base; +}; + +} // end anonymous namespace + +//===----------------------------------------------------------------------===// +// Format Strings +//===----------------------------------------------------------------------===// + +/// Format for defining an attribute parser. +/// +/// $0: The attribute C++ class name. +static const char *const attrParserDefn = R"( +::mlir::Attribute $0::parse(::mlir::DialectAsmParser &$_parser, + ::mlir::Type $_type) { +)"; + +/// Format for defining a type parser. +/// +/// $0: The type C++ class name. +static const char *const typeParserDefn = R"( +::mlir::Type $0::parse(::mlir::DialectAsmParser &$_parser) { +)"; + +/// Default parser for attribute or type parameters. +static const char *const defaultParameterParser = + "::mlir::FieldParser<$0>::parse($_parser)"; + +/// Default printer for attribute or type parameters. +static const char *const defaultParameterPrinter = "$_printer << $_self"; + +/// Print an error when failing to parse an element. +/// +/// $0: The parameter C++ class name. +static const char *const parseErrorStr = + "$_parser.emitError($_parser.getCurrentLocation(), "; + +/// Format for defining an attribute or type printer. +/// +/// $0: The attribute or type C++ class name. +/// $1: The attribute or type mnemonic. +static const char *const attrOrTypePrinterDefn = R"( +void $0::print(::mlir::DialectAsmPrinter &$_printer) const { + $_printer << "$1"; +)"; + +/// Loop declaration for struct parser. +/// +/// $0: Number of expected parameters. +static const char *const structParseLoopStart = R"( + for (unsigned _index = 0; _index < $0; ++_index) { + StringRef _paramKey; + if ($_parser.parseKeyword(&_paramKey)) { + $_parser.emitError($_parser.getCurrentLocation(), + "expected a parameter name in struct"); + return {}; + } +)"; + +/// Terminator code segment for the struct parser loop. Check for duplicate or +/// unknown parameters. Parse a comma except on the last element. +/// +/// {0}: Code template for printing an error. +/// {1}: Number of elements in the struct. +static const char *const structParseLoopEnd = R"({{ + {0}"duplicate or unknown struct parameter name: ") << _paramKey; + return {{}; + } + if ((_index != {1} - 1) && parser.parseComma()) + return {{}; + } +)"; + +/// Code format to parse a variable. Separate by lines because variable parsers +/// may be generated inside other directives, which requires indentation. +/// +/// {0}: The parameter name. +/// {1}: The parse code for the parameter. +/// {2}: Code template for printing an error. +/// {3}: Name of the attribute or type. +/// {4}: C++ class of the parameter. +static const char *const variableParser[] = { + " // Parse variable '{0}'", + " _result_{0} = {1};", + " if (failed(_result_{0})) {{", + " {2}\"failed to parse {3} parameter '{0}' which is to be a `{4}`\");", + " return {{};", + " }", +}; + +//===----------------------------------------------------------------------===// +// Utility Functions +//===----------------------------------------------------------------------===// + +/// Get a list of an attribute's or type's parameters. These can be wrapper +/// objects around `AttrOrTypeParameter` or string inits. +static auto getParameters(const AttrOrTypeDef &def) { + SmallVector params; + def.getParameters(params); + return params; +} + +//===----------------------------------------------------------------------===// +// AttrOrTypeFormat +//===----------------------------------------------------------------------===// + +namespace { +class AttrOrTypeFormat { +public: + AttrOrTypeFormat(const AttrOrTypeDef &def, + std::vector> &&elements) + : def(def), elements(std::move(elements)) {} + + /// Generate the attribute or type parser. + void genParser(raw_ostream &os); + /// Generate the attribute or type printer. + void genPrinter(raw_ostream &os); + +private: + /// Generate the parser code for a specific format element. + void genElementParser(Element *el, FmtContext &ctx, raw_ostream &os); + /// Generate the parser code for a literal. + void genLiteralParser(StringRef value, FmtContext &ctx, raw_ostream &os, + unsigned indent = 0); + /// Generate the parser code for a variable. + void genVariableParser(const AttrOrTypeParameter ¶m, FmtContext &ctx, + raw_ostream &os, unsigned indent = 0); + /// Generate the parser code for a `params` directive. + void genParamsParser(ParamsDirective *el, FmtContext &ctx, raw_ostream &os); + /// Generate the parser code for a `struct` directive. + void genStructParser(StructDirective *el, FmtContext &ctx, raw_ostream &os); + + /// Generate the printer code for a specific format element. + void genElementPrinter(Element *el, FmtContext &ctx, raw_ostream &os); + /// Generate the printer code for a literal. + void genLiteralPrinter(StringRef value, FmtContext &ctx, raw_ostream &os); + /// Generate the printer code for a variable. + void genVariablePrinter(const AttrOrTypeParameter ¶m, FmtContext &ctx, + raw_ostream &os); + /// Generate the printer code for a `params` directive. + void genParamsPrinter(ParamsDirective *el, FmtContext &ctx, raw_ostream &os); + /// Generate the printer code for a `struct` directive. + void genStructPrinter(StructDirective *el, FmtContext &ctx, raw_ostream &os); + + /// The ODS definition of the attribute or type whose format is being used to + /// generate a parser and printer. + const AttrOrTypeDef &def; + /// The list of top-level format elements returned by the assembly format + /// parser. + std::vector> elements; + + /// Flags for printing spaces. + bool shouldEmitSpace; + bool lastWasPunctuation; +}; +} // end anonymous namespace + +//===----------------------------------------------------------------------===// +// ParserGen +//===----------------------------------------------------------------------===// + +void AttrOrTypeFormat::genParser(raw_ostream &os) { + FmtContext ctx; + ctx.addSubst("_parser", "parser"); + + /// Generate the definition. + if (isa(def)) { + ctx.addSubst("_type", "attrType"); + os << tgfmt(attrParserDefn, &ctx, def.getCppClassName()); + } else { + os << tgfmt(typeParserDefn, &ctx, def.getCppClassName()); + } + + /// Declare variables to store all of the parameters. Allocated parameters + /// such as `ArrayRef` and `StringRef` must provide a `storageType`. Store + /// FailureOr to defer type construction for parameters that are parsed in + /// a loop (parsers return FailureOr anyways). + SmallVector params = getParameters(def); + for (const AttrOrTypeParameter ¶m : params) { + os << formatv(" ::mlir::FailureOr<{0}> _result_{1};\n", + param.getCppStorageType(), param.getName()); + } + + /// Store the initial location of the parser. + ctx.addSubst("_loc", "loc"); + os << tgfmt(" ::llvm::SMLoc $_loc = $_parser.getCurrentLocation();\n" + " (void) $_loc;\n", + &ctx); + + /// Generate call to each parameter parser. + for (auto &el : elements) + genElementParser(el.get(), ctx, os); + + /// Generate call to the attribute or type builder. Use the checked getter + /// if one was generated. + if (def.genVerifyDecl()) { + os << tgfmt(" return $_parser.getChecked<$0>($_loc, $_parser.getContext()", + &ctx, def.getCppClassName()); + } else { + os << tgfmt(" return $0::get($_parser.getContext()", &ctx, + def.getCppClassName()); + } + for (const AttrOrTypeParameter ¶m : params) + os << formatv(",\n _result_{0}.getValue()", param.getName()); + os << ");\n}\n\n"; +} + +void AttrOrTypeFormat::genElementParser(Element *el, FmtContext &ctx, + raw_ostream &os) { + if (auto *literal = dyn_cast(el)) + return genLiteralParser(literal->getSpelling(), ctx, os); + if (auto *var = dyn_cast(el)) + return genVariableParser(var->getParam(), ctx, os); + if (auto *params = dyn_cast(el)) + return genParamsParser(params, ctx, os); + if (auto *strct = dyn_cast(el)) + return genStructParser(strct, ctx, os); + + llvm_unreachable("unknown format element"); +} + +void AttrOrTypeFormat::genLiteralParser(StringRef value, FmtContext &ctx, + raw_ostream &os, unsigned indent) { + os.indent(indent) << " // Parse literal '" << value << "'\n"; + os.indent(indent) << tgfmt(" if ($_parser.parse", &ctx); + if (value.front() == '_' || isalpha(value.front())) { + os << "Keyword(\"" << value << "\")"; + } else { + os << StringSwitch(value) + .Case("->", "Arrow") + .Case(":", "Colon") + .Case(",", "Comma") + .Case("=", "Equal") + .Case("<", "Less") + .Case(">", "Greater") + .Case("{", "LBrace") + .Case("}", "RBrace") + .Case("(", "LParen") + .Case(")", "RParen") + .Case("[", "LSquare") + .Case("]", "RSquare") + .Case("?", "Question") + .Case("+", "Plus") + .Case("*", "Star") + << "()"; + } + os << ")\n"; + // Parser will emit an error + os.indent(indent) << " return {};\n"; +} + +void AttrOrTypeFormat::genVariableParser(const AttrOrTypeParameter ¶m, + FmtContext &ctx, raw_ostream &os, + unsigned indent) { + /// Check for a custom parser. Use the default attribute parser otherwise. + auto customParser = param.getParser(); + auto parser = + customParser ? *customParser : StringRef(defaultParameterParser); + for (const char *line : variableParser) { + os.indent(indent) << formatv(line, param.getName(), + tgfmt(parser, &ctx, param.getCppStorageType()), + tgfmt(parseErrorStr, &ctx), def.getName(), + param.getCppType()) + << "\n"; + } +} + +void AttrOrTypeFormat::genParamsParser(ParamsDirective *el, FmtContext &ctx, + raw_ostream &os) { + os << " // Parse parameter list\n"; + llvm::interleave( + el->getParams(), [&](auto param) { genVariableParser(param, ctx, os); }, + [&]() { genLiteralParser(",", ctx, os); }); +} + +void AttrOrTypeFormat::genStructParser(StructDirective *el, FmtContext &ctx, + raw_ostream &os) { + os << " // Parse parameter struct\n"; + + /// Declare a "seen" variable for each key. + for (const AttrOrTypeParameter ¶m : el->getParams()) + os << formatv(" bool _seen_{0} = false;\n", param.getName()); + + /// Generate the parsing loop. + os << tgfmt(structParseLoopStart, &ctx, el->getNumParams()); + genLiteralParser("=", ctx, os, 2); + os << " "; + for (const AttrOrTypeParameter ¶m : el->getParams()) { + os << formatv("if (!_seen_{0} && _paramKey == \"{0}\") {\n" + " _seen_{0} = true;\n", + param.getName()); + genVariableParser(param, ctx, os, 4); + os << " } else "; + } + + /// Duplicate or unknown parameter. + os << formatv(structParseLoopEnd, tgfmt(parseErrorStr, &ctx), + el->getNumParams()); + + /// Because the loop loops N times and each non-failing iteration sets 1 of + /// N flags, successfully exiting the loop means that all parameters have been + /// seen. `parseOptionalComma` would cause issues with any formats that use + /// "struct(...) `,`" beacuse structs aren't sounded by braces. +} + +//===----------------------------------------------------------------------===// +// PrinterGen +//===----------------------------------------------------------------------===// + +void AttrOrTypeFormat::genPrinter(raw_ostream &os) { + FmtContext ctx; + ctx.addSubst("_printer", "printer"); + + /// Generate the definition. + os << tgfmt(attrOrTypePrinterDefn, &ctx, def.getCppClassName(), + *def.getMnemonic()); + + /// Generate printers. + shouldEmitSpace = true; + lastWasPunctuation = false; + for (auto &el : elements) + genElementPrinter(el.get(), ctx, os); + + os << "}\n\n"; +} + +void AttrOrTypeFormat::genElementPrinter(Element *el, FmtContext &ctx, + raw_ostream &os) { + if (auto *literal = dyn_cast(el)) + return genLiteralPrinter(literal->getSpelling(), ctx, os); + if (auto *params = dyn_cast(el)) + return genParamsPrinter(params, ctx, os); + if (auto *strct = dyn_cast(el)) + return genStructPrinter(strct, ctx, os); + if (auto *var = dyn_cast(el)) + return genVariablePrinter(var->getParam(), ctx, os); + + llvm_unreachable("unknown format element"); +} + +void AttrOrTypeFormat::genLiteralPrinter(StringRef value, FmtContext &ctx, + raw_ostream &os) { + /// Don't insert a space before certain punctuation. + bool needSpace = + shouldEmitSpace && shouldEmitSpaceBefore(value, lastWasPunctuation); + os << tgfmt(" $_printer$0 << \"$1\";\n", &ctx, needSpace ? " << ' '" : "", + value); + + /// Update the flags. + shouldEmitSpace = + value.size() != 1 || !StringRef("<({[").contains(value.front()); + lastWasPunctuation = !(value.front() == '_' || isalpha(value.front())); +} + +void AttrOrTypeFormat::genVariablePrinter(const AttrOrTypeParameter ¶m, + FmtContext &ctx, raw_ostream &os) { + /// Insert a space before the next parameter, if necessary. + if (shouldEmitSpace || !lastWasPunctuation) + os << tgfmt(" $_printer << ' ';\n", &ctx); + shouldEmitSpace = true; + lastWasPunctuation = false; + + ctx.withSelf(getParameterAccessorName(param.getName()) + "()"); + os << " "; + if (auto printer = param.getPrinter()) + os << tgfmt(*printer, &ctx) << ";\n"; + else + os << tgfmt(defaultParameterPrinter, &ctx) << ";\n"; +} + +void AttrOrTypeFormat::genParamsPrinter(ParamsDirective *el, FmtContext &ctx, + raw_ostream &os) { + llvm::interleave( + el->getParams(), [&](auto param) { genVariablePrinter(param, ctx, os); }, + [&]() { genLiteralPrinter(",", ctx, os); }); +} + +void AttrOrTypeFormat::genStructPrinter(StructDirective *el, FmtContext &ctx, + raw_ostream &os) { + llvm::interleave( + el->getParams(), + [&](auto param) { + genLiteralPrinter(param.getName(), ctx, os); + genLiteralPrinter("=", ctx, os); + os << tgfmt(" $_printer << ' ';\n", &ctx); + genVariablePrinter(param, ctx, os); + }, + [&]() { genLiteralPrinter(",", ctx, os); }); +} + +//===----------------------------------------------------------------------===// +// FormatParser +//===----------------------------------------------------------------------===// + +namespace { +class FormatParser { +public: + FormatParser(llvm::SourceMgr &mgr, const AttrOrTypeDef &def) + : lexer(mgr, def.getLoc()[0]), curToken(lexer.lexToken()), def(def), + seenParams(def.getNumParameters()) {} + + /// Parse the attribute or type format and create the format elements. + FailureOr parse(); + +private: + /// The current context of the parser when parsing an element. + enum ParserContext { + /// The element is being parsed in the default context - at the top of the + /// format + TopLevelContext, + /// The element is being parsed as a child to a `struct` directive. + StructDirective, + }; + + /// Emit an error. + LogicalResult emitError(const Twine &msg) { + lexer.emitError(curToken.getLoc(), msg); + return failure(); + } + + /// Parse an expected token. + LogicalResult parseToken(FormatToken::Kind kind, const Twine &msg) { + if (curToken.getKind() != kind) + return emitError(msg); + consumeToken(); + return success(); + } + + /// Advance the lexer to the next token. + void consumeToken() { + assert(curToken.getKind() != FormatToken::eof && + curToken.getKind() != FormatToken::error && + "shouldn't advance past EOF or errors"); + curToken = lexer.lexToken(); + } + + /// Parse any element. + FailureOr> parseElement(ParserContext ctx); + /// Parse a literal element. + FailureOr> parseLiteral(ParserContext ctx); + /// Parse a variable element. + FailureOr> parseVariable(ParserContext ctx); + /// Parse a directive. + FailureOr> parseDirective(ParserContext ctx); + /// Parse a `params` directive. + FailureOr> parseParamsDirective(); + /// Parse a `struct` directive. + FailureOr> parseStructDirective(); + + /// The current format lexer. + FormatLexer lexer; + /// The current token in the stream. + FormatToken curToken; + /// Attribute or type tablegen def. + const AttrOrTypeDef &def; + + /// Seen attribute or type parameters. + llvm::BitVector seenParams; +}; +} // end anonymous namespace + +FailureOr FormatParser::parse() { + std::vector> elements; + elements.reserve(16); + + /// Parse the format elements. + while (curToken.getKind() != FormatToken::eof) { + auto element = parseElement(TopLevelContext); + if (failed(element)) + return failure(); + + /// Add the format element and continue. + elements.push_back(std::move(*element)); + } + + /// Check that all parameters have been seen. + SmallVector params = getParameters(def); + for (auto it : llvm::enumerate(params)) { + if (!seenParams.test(it.index())) { + return emitError("format is missing reference to parameter: " + + it.value().getName()); + } + } + + return AttrOrTypeFormat(def, std::move(elements)); +} + +FailureOr> +FormatParser::parseElement(ParserContext ctx) { + if (curToken.getKind() == FormatToken::literal) + return parseLiteral(ctx); + if (curToken.getKind() == FormatToken::variable) + return parseVariable(ctx); + if (curToken.isKeyword()) + return parseDirective(ctx); + + return emitError("expected literal, directive, or variable"); +} + +FailureOr> +FormatParser::parseLiteral(ParserContext ctx) { + if (ctx != TopLevelContext) { + return emitError( + "literals may only be used in the top-level section of the format"); + } + + /// Get the literal spelling without the surrounding "`". + auto value = curToken.getSpelling().drop_front().drop_back(); + if (!isValidLiteral(value)) + return emitError("literal '" + value + "' is not valid"); + + consumeToken(); + return {std::make_unique(value)}; +} + +FailureOr> +FormatParser::parseVariable(ParserContext ctx) { + /// Get the parameter name without the preceding "$". + auto name = curToken.getSpelling().drop_front(); + + /// Lookup the parameter. + SmallVector params = getParameters(def); + auto *it = llvm::find_if( + params, [&](auto ¶m) { return param.getName() == name; }); + + /// Check that the parameter reference is valid. + if (it == params.end()) + return emitError(def.getName() + " has no parameter named '" + name + "'"); + auto idx = std::distance(params.begin(), it); + if (seenParams.test(idx)) + return emitError("duplicate parameter '" + name + "'"); + seenParams.set(idx); + + consumeToken(); + return {std::make_unique(*it)}; +} + +FailureOr> +FormatParser::parseDirective(ParserContext ctx) { + + switch (curToken.getKind()) { + case FormatToken::kw_params: + return parseParamsDirective(); + case FormatToken::kw_struct: + if (ctx != TopLevelContext) { + return emitError( + "`struct` may only be used in the top-level section of the format"); + } + return parseStructDirective(); + default: + return emitError("unknown directive in format: " + curToken.getSpelling()); + } +} + +FailureOr> FormatParser::parseParamsDirective() { + consumeToken(); + /// Collect all of the attribute's or type's parameters. + SmallVector params = getParameters(def); + SmallVector> vars; + /// Ensure that none of the parameters have already been captured. + for (auto it : llvm::enumerate(params)) { + if (seenParams.test(it.index())) { + return emitError("`params` captures duplicate parameter: " + + it.value().getName()); + } + seenParams.set(it.index()); + vars.push_back(std::make_unique(it.value())); + } + return {std::make_unique(std::move(vars))}; +} + +FailureOr> FormatParser::parseStructDirective() { + consumeToken(); + if (failed(parseToken(FormatToken::l_paren, + "expected '(' before `struct` argument list"))) + return failure(); + + /// Parse variables captured by `struct`. + SmallVector> vars; + + /// Parse first captured parameter or a `params` directive. + FailureOr> var = parseElement(StructDirective); + if (failed(var) || !isa(*var)) + return emitError("`struct` argument list expected a variable or directive"); + if (isa(*var)) { + /// Parse any other parameters. + vars.push_back(std::move(*var)); + while (curToken.getKind() == FormatToken::comma) { + consumeToken(); + var = parseElement(StructDirective); + if (failed(var) || !isa(*var)) + return emitError("expected a variable in `struct` argument list"); + vars.push_back(std::move(*var)); + } + } else { + /// `struct(params)` captures all parameters in the attribute or type. + vars = cast(var->get())->takeParams(); + } + + if (curToken.getKind() != FormatToken::r_paren) + return emitError("expected ')' at the end of an argument list"); + + consumeToken(); + return {std::make_unique<::StructDirective>(std::move(vars))}; +} + +//===----------------------------------------------------------------------===// +// Interface +//===----------------------------------------------------------------------===// + +void mlir::tblgen::generateAttrOrTypeFormat(const AttrOrTypeDef &def, + raw_ostream &os) { + llvm::SourceMgr mgr; + mgr.AddNewSourceBuffer( + llvm::MemoryBuffer::getMemBuffer(*def.getAssemblyFormat()), + llvm::SMLoc()); + + /// Parse the custom assembly format> + FormatParser parser(mgr, def); + FailureOr format = parser.parse(); + if (failed(format)) { + if (formatErrorIsFatal) + PrintFatalError(def.getLoc(), "failed to parse assembly format"); + return; + } + + /// Generate the parser and printer. + format->genParser(os); + format->genPrinter(os); +} diff --git a/mlir/tools/mlir-tblgen/AttrOrTypeFormatGen.h b/mlir/tools/mlir-tblgen/AttrOrTypeFormatGen.h new file mode 100644 index 0000000000000..2a10a157dfc90 --- /dev/null +++ b/mlir/tools/mlir-tblgen/AttrOrTypeFormatGen.h @@ -0,0 +1,32 @@ +//===- AttrOrTypeFormatGen.h - MLIR attribute and type format generator ---===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef MLIR_TOOLS_MLIRTBLGEN_ATTRORTYPEFORMATGEN_H_ +#define MLIR_TOOLS_MLIRTBLGEN_ATTRORTYPEFORMATGEN_H_ + +#include "llvm/Support/raw_ostream.h" + +#include + +namespace mlir { +namespace tblgen { +class AttrOrTypeDef; + +/// Generate a parser and printer based on a custom assembly format for an +/// attribute or type. +void generateAttrOrTypeFormat(const AttrOrTypeDef &def, llvm::raw_ostream &os); + +/// From the parameter name, get the name of the accessor function in camelcase. +/// The first letter of the parameter is upper-cased and prefixed with "get". +/// E.g. 'value' -> 'getValue'. +std::string getParameterAccessorName(llvm::StringRef name); + +} // end namespace tblgen +} // end namespace mlir + +#endif // MLIR_TOOLS_MLIRTBLGEN_ATTRORTYPEFORMATGEN_H_ diff --git a/mlir/tools/mlir-tblgen/CMakeLists.txt b/mlir/tools/mlir-tblgen/CMakeLists.txt index f16e8965daca4..a937a9d89a1d3 100644 --- a/mlir/tools/mlir-tblgen/CMakeLists.txt +++ b/mlir/tools/mlir-tblgen/CMakeLists.txt @@ -6,10 +6,12 @@ set(LLVM_LINK_COMPONENTS add_tablegen(mlir-tblgen MLIR AttrOrTypeDefGen.cpp + AttrOrTypeFormatGen.cpp CodeGenHelpers.cpp DialectGen.cpp DirectiveCommonGen.cpp EnumsGen.cpp + FormatGen.cpp LLVMIRConversionGen.cpp LLVMIRIntrinsicGen.cpp mlir-tblgen.cpp diff --git a/mlir/tools/mlir-tblgen/FormatGen.cpp b/mlir/tools/mlir-tblgen/FormatGen.cpp new file mode 100644 index 0000000000000..fa6c0603ac7e1 --- /dev/null +++ b/mlir/tools/mlir-tblgen/FormatGen.cpp @@ -0,0 +1,225 @@ +//===- FormatGen.cpp - Utilities for custom assembly formats ----*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "FormatGen.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/Support/SourceMgr.h" +#include "llvm/TableGen/Error.h" + +using namespace mlir; +using namespace mlir::tblgen; + +//===----------------------------------------------------------------------===// +// FormatToken +//===----------------------------------------------------------------------===// + +llvm::SMLoc FormatToken::getLoc() const { + return llvm::SMLoc::getFromPointer(spelling.data()); +} + +//===----------------------------------------------------------------------===// +// FormatLexer +//===----------------------------------------------------------------------===// + +FormatLexer::FormatLexer(llvm::SourceMgr &mgr, llvm::SMLoc loc) + : mgr(mgr), loc(loc), + curBuffer(mgr.getMemoryBuffer(mgr.getMainFileID())->getBuffer()), + curPtr(curBuffer.begin()) {} + +FormatToken FormatLexer::emitError(llvm::SMLoc loc, const Twine &msg) { + mgr.PrintMessage(loc, llvm::SourceMgr::DK_Error, msg); + llvm::SrcMgr.PrintMessage(this->loc, llvm::SourceMgr::DK_Note, + "in custom assembly format for this operation"); + return formToken(FormatToken::error, loc.getPointer()); +} + +FormatToken FormatLexer::emitError(const char *loc, const Twine &msg) { + return emitError(llvm::SMLoc::getFromPointer(loc), msg); +} + +FormatToken FormatLexer::emitErrorAndNote(llvm::SMLoc loc, const Twine &msg, + const Twine ¬e) { + mgr.PrintMessage(loc, llvm::SourceMgr::DK_Error, msg); + llvm::SrcMgr.PrintMessage(this->loc, llvm::SourceMgr::DK_Note, + "in custom assembly format for this operation"); + mgr.PrintMessage(loc, llvm::SourceMgr::DK_Note, note); + return formToken(FormatToken::error, loc.getPointer()); +} + +int FormatLexer::getNextChar() { + char curChar = *curPtr++; + switch (curChar) { + default: + return (unsigned char)curChar; + case 0: { + // A nul character in the stream is either the end of the current buffer or + // a random nul in the file. Disambiguate that here. + if (curPtr - 1 != curBuffer.end()) + return 0; + + // Otherwise, return end of file. + --curPtr; + return EOF; + } + case '\n': + case '\r': + // Handle the newline character by ignoring it and incrementing the line + // count. However, be careful about 'dos style' files with \n\r in them. + // Only treat a \n\r or \r\n as a single line. + if ((*curPtr == '\n' || (*curPtr == '\r')) && *curPtr != curChar) + ++curPtr; + return '\n'; + } +} + +FormatToken FormatLexer::lexToken() { + const char *tokStart = curPtr; + + // This always consumes at least one character. + int curChar = getNextChar(); + switch (curChar) { + default: + // Handle identifiers: [a-zA-Z_] + if (isalpha(curChar) || curChar == '_') + return lexIdentifier(tokStart); + + // Unknown character, emit an error. + return emitError(tokStart, "unexpected character"); + case EOF: + // Return EOF denoting the end of lexing. + return formToken(FormatToken::eof, tokStart); + + // Lex punctuation. + case '^': + return formToken(FormatToken::caret, tokStart); + case ':': + return formToken(FormatToken::colon, tokStart); + case ',': + return formToken(FormatToken::comma, tokStart); + case '=': + return formToken(FormatToken::equal, tokStart); + case '<': + return formToken(FormatToken::less, tokStart); + case '>': + return formToken(FormatToken::greater, tokStart); + case '?': + return formToken(FormatToken::question, tokStart); + case '(': + return formToken(FormatToken::l_paren, tokStart); + case ')': + return formToken(FormatToken::r_paren, tokStart); + case '*': + return formToken(FormatToken::star, tokStart); + + // Ignore whitespace characters. + case 0: + case ' ': + case '\t': + case '\n': + return lexToken(); + + case '`': + return lexLiteral(tokStart); + case '$': + return lexVariable(tokStart); + } +} + +FormatToken FormatLexer::lexLiteral(const char *tokStart) { + assert(curPtr[-1] == '`'); + + // Lex a literal surrounded by ``. + while (const char curChar = *curPtr++) { + if (curChar == '`') + return formToken(FormatToken::literal, tokStart); + } + return emitError(curPtr - 1, "unexpected end of file in literal"); +} + +FormatToken FormatLexer::lexVariable(const char *tokStart) { + if (!isalpha(curPtr[0]) && curPtr[0] != '_') + return emitError(curPtr - 1, "expected variable name"); + + // Otherwise, consume the rest of the characters. + while (isalnum(*curPtr) || *curPtr == '_') + ++curPtr; + return formToken(FormatToken::variable, tokStart); +} + +FormatToken FormatLexer::lexIdentifier(const char *tokStart) { + // Match the rest of the identifier regex: [0-9a-zA-Z_\-]* + while (isalnum(*curPtr) || *curPtr == '_' || *curPtr == '-') + ++curPtr; + + // Check to see if this identifier is a keyword. + StringRef str(tokStart, curPtr - tokStart); + auto kind = + StringSwitch(str) + .Case("attr-dict", FormatToken::kw_attr_dict) + .Case("attr-dict-with-keyword", FormatToken::kw_attr_dict_w_keyword) + .Case("custom", FormatToken::kw_custom) + .Case("functional-type", FormatToken::kw_functional_type) + .Case("operands", FormatToken::kw_operands) + .Case("params", FormatToken::kw_params) + .Case("ref", FormatToken::kw_ref) + .Case("regions", FormatToken::kw_regions) + .Case("results", FormatToken::kw_results) + .Case("struct", FormatToken::kw_struct) + .Case("successors", FormatToken::kw_successors) + .Case("type", FormatToken::kw_type) + .Default(FormatToken::identifier); + return FormatToken(kind, str); +} + +//===----------------------------------------------------------------------===// +// Utility Functions +//===----------------------------------------------------------------------===// + +bool mlir::tblgen::shouldEmitSpaceBefore(StringRef value, + bool lastWasPunctuation) { + if (value.size() != 1 && value != "->") + return true; + if (lastWasPunctuation) + return !StringRef(">)}],").contains(value.front()); + return !StringRef("<>(){}[],").contains(value.front()); +} + +bool mlir::tblgen::canFormatStringAsKeyword(StringRef value) { + if (!isalpha(value.front()) && value.front() != '_') + return false; + return llvm::all_of(value.drop_front(), [](char c) { + return isalnum(c) || c == '_' || c == '$' || c == '.'; + }); +} + +bool mlir::tblgen::isValidLiteral(StringRef value) { + if (value.empty()) + return false; + char front = value.front(); + + // If there is only one character, this must either be punctuation or a + // single character bare identifier. + if (value.size() == 1) + return isalpha(front) || StringRef("_:,=<>()[]{}?+*").contains(front); + + // Check the punctuation that are larger than a single character. + if (value == "->") + return true; + + // Otherwise, this must be an identifier. + return canFormatStringAsKeyword(value); +} + +//===----------------------------------------------------------------------===// +// Commandline Options +//===----------------------------------------------------------------------===// + +llvm::cl::opt mlir::tblgen::formatErrorIsFatal( + "asmformat-error-is-fatal", + llvm::cl::desc("Emit a fatal error if format parsing fails"), + llvm::cl::init(true)); diff --git a/mlir/tools/mlir-tblgen/FormatGen.h b/mlir/tools/mlir-tblgen/FormatGen.h new file mode 100644 index 0000000000000..f061d1ed5c678 --- /dev/null +++ b/mlir/tools/mlir-tblgen/FormatGen.h @@ -0,0 +1,161 @@ +//===- FormatGen.h - Utilities for custom assembly formats ------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains common classes for building custom assembly format parsers +// and generators. +// +//===----------------------------------------------------------------------===// + +#ifndef MLIR_TOOLS_MLIRTBLGEN_FORMATGEN_H_ +#define MLIR_TOOLS_MLIRTBLGEN_FORMATGEN_H_ + +#include "mlir/Support/LLVM.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/SMLoc.h" + +namespace llvm { +class SourceMgr; +} // end namespace llvm + +namespace mlir { +namespace tblgen { + +//===----------------------------------------------------------------------===// +// FormatToken +//===----------------------------------------------------------------------===// + +/// This class represents a specific token in the input format. +class FormatToken { +public: + /// Basic token kinds. + enum Kind { + // Markers. + eof, + error, + + // Tokens with no info. + l_paren, + r_paren, + caret, + colon, + comma, + equal, + less, + greater, + question, + star, + + // Keywords. + keyword_start, + kw_attr_dict, + kw_attr_dict_w_keyword, + kw_custom, + kw_functional_type, + kw_operands, + kw_params, + kw_ref, + kw_regions, + kw_results, + kw_struct, + kw_successors, + kw_type, + keyword_end, + + // String valued tokens. + identifier, + literal, + variable, + }; + + FormatToken(Kind kind, StringRef spelling) : kind(kind), spelling(spelling) {} + + /// Return the bytes that make up this token. + StringRef getSpelling() const { return spelling; } + + /// Return the kind of this token. + Kind getKind() const { return kind; } + + /// Return a location for this token. + llvm::SMLoc getLoc() const; + + /// Return if this token is a keyword. + bool isKeyword() const { + return getKind() > Kind::keyword_start && getKind() < Kind::keyword_end; + } + +private: + /// Discriminator that indicates the kind of token this is. + Kind kind; + + /// A reference to the entire token contents; this is always a pointer into + /// a memory buffer owned by the source manager. + StringRef spelling; +}; + +//===----------------------------------------------------------------------===// +// FormatLexer +//===----------------------------------------------------------------------===// + +/// This class implements a simple lexer for operation assembly format strings. +class FormatLexer { +public: + FormatLexer(llvm::SourceMgr &mgr, llvm::SMLoc loc); + + /// Lex the next token and return it. + FormatToken lexToken(); + + /// Emit an error to the lexer with the given location and message. + FormatToken emitError(llvm::SMLoc loc, const Twine &msg); + FormatToken emitError(const char *loc, const Twine &msg); + + FormatToken emitErrorAndNote(llvm::SMLoc loc, const Twine &msg, + const Twine ¬e); + +private: + /// Return the next character in the stream. + int getNextChar(); + + /// Lex an identifier, literal, or variable. + FormatToken lexIdentifier(const char *tokStart); + FormatToken lexLiteral(const char *tokStart); + FormatToken lexVariable(const char *tokStart); + + /// Create a token with the current pointer and a start pointer. + FormatToken formToken(FormatToken::Kind kind, const char *tokStart) { + return FormatToken(kind, StringRef(tokStart, curPtr - tokStart)); + } + + /// The source manager containing the format string. + llvm::SourceMgr &mgr; + /// Location of the format string. + llvm::SMLoc loc; + /// Buffer containing the format string. + StringRef curBuffer; + /// Current pointer in the buffer. + const char *curPtr; +}; + +/// Whether a space needs to be emitted before a literal. E.g., two keywords +/// back-to-back require a space separator, but a keyword followed by '<' does +/// not require a space. +bool shouldEmitSpaceBefore(StringRef value, bool lastWasPunctuation); + +/// Returns true if the given string can be formatted as a keyword. +bool canFormatStringAsKeyword(StringRef value); + +/// Returns true if the given string is valid format literal element. +bool isValidLiteral(StringRef value); + +/// Whether a failure in parsing the assembly format should be a fatal error. +extern llvm::cl::opt formatErrorIsFatal; + +} // end namespace tblgen +} // end namespace mlir + +#endif // MLIR_TOOLS_MLIRTBLGEN_FORMATGEN_H_ diff --git a/mlir/tools/mlir-tblgen/OpDefinitionsGen.cpp b/mlir/tools/mlir-tblgen/OpDefinitionsGen.cpp index 399bd8898bfa6..6e88480c2fb15 100644 --- a/mlir/tools/mlir-tblgen/OpDefinitionsGen.cpp +++ b/mlir/tools/mlir-tblgen/OpDefinitionsGen.cpp @@ -42,6 +42,17 @@ static const char *const generatedArgName = "odsArg"; static const char *const odsBuilder = "odsBuilder"; static const char *const builderOpState = "odsState"; +// Code for OpAdaptors to lookup an attribute using strings on the provided +// DictionaryAttr. +// +// {0}: The attribute name. +static const char *const adaptorGetAttr = "odsAttrs.get(\"{0}\")"; + +// Code for Ops to lookup an attribute using the cached identifier. +// +// {0}: The attribute's getter name. +static const char *const opGetAttr = "(*this)->getAttr({0}AttrName())"; + // The logic to calculate the actual value range for a declared operand/result // of an op with variadic operands/results. Note that this logic is not for // general use; it assumes all variadic operands/results must have the same @@ -163,8 +174,7 @@ static std::string getArgumentName(const Operator &op, int index) { const auto &operand = op.getOperand(index); if (!operand.name.empty()) return std::string(operand.name); - else - return std::string(formatv("{0}_{1}", generatedArgName, index)); + return std::string(formatv("{0}_{1}", generatedArgName, index)); } // Returns true if we can use unwrapped value for the given `attr` in builders. @@ -370,13 +380,16 @@ class OpEmitter { // an operand (the generated function call returns an OperandRange); // - resultGet corresponds to the name of the function to get an result (the // generated function call returns a ValueRange); +// - opRequired whether an op instance is needed static void populateSubstitutions(const Operator &op, const char *attrGet, const char *operandGet, const char *resultGet, - FmtContext &ctx) { + FmtContext &ctx, bool opRequired) { // Populate substitutions for attributes and named operands. - for (const auto &namedAttr : op.getAttributes()) + for (const auto &namedAttr : op.getAttributes()) { ctx.addSubst(namedAttr.name, - formatv("{0}(\"{1}\")", attrGet, namedAttr.name)); + formatv(attrGet, opRequired ? op.getGetterName(namedAttr.name) + : namedAttr.name)); + } for (int i = 0, e = op.getNumOperands(); i < e; ++i) { auto &value = op.getOperand(i); if (value.name.empty()) @@ -413,62 +426,63 @@ static void genAttributeVerifier(const Operator &op, const char *attrGet, const Twine &emitErrorPrefix, bool emitVerificationRequiringOp, FmtContext &ctx, OpMethodBody &body) { + // Check that a required attribute exists. + // + // {0}: Attribute variable name. + // {1}: Emit error prefix. + // {2}: Attribute name. + const char *const checkRequiredAttr = R"( + if (!{0}) + return {1}"requires attribute '{2}'"); + )"; + // Check the condition on an attribute if it is required. This assumes that + // default values are valid. + // TODO: verify the default value is valid (perhaps in debug mode only). + // + // {0}: Attribute variable name. + // {1}: Attribute condition code. + // {2}: Emit error prefix. + // {3}: Attribute/constraint description. + const char *const checkAttrCondition = R"( + if ({0} && !({1})) + return {2}"attribute '{3}' failed to satisfy constraint: {4}"); + )"; + for (const auto &namedAttr : op.getAttributes()) { const auto &attr = namedAttr.attr; + StringRef attrName = namedAttr.name; if (attr.isDerivedAttr()) continue; - auto attrName = namedAttr.name; bool allowMissingAttr = attr.hasDefaultValue() || attr.isOptional(); auto attrPred = attr.getPredicate(); - auto condition = attrPred.isNull() ? "" : attrPred.getCondition(); - // There is a condition to emit only if the use of $_op and whether to - // emit verifications for op matches. - bool hasConditionToEmit = (!(condition.find("$_op") != StringRef::npos) ^ - emitVerificationRequiringOp); + std::string condition = attrPred.isNull() ? "" : attrPred.getCondition(); + // If the attribute's condition needs an op but none is available, then the + // condition cannot be emitted. + bool canEmitCondition = + !StringRef(condition).contains("$_op") || emitVerificationRequiringOp; // Prefix with `tblgen_` to avoid hiding the attribute accessor. - auto varName = tblgenNamePrefix + attrName; - - // If the attribute is - // 1. Required (not allowed missing) and not in op verification, or - // 2. Has a condition that will get verified - // then the variable will be used. - // - // Therefore, for optional attributes whose verification requires that an - // op already exists for verification/emitVerificationRequiringOp is set - // has nothing that can be verified here. - if ((allowMissingAttr || emitVerificationRequiringOp) && - !hasConditionToEmit) - continue; + Twine varName = tblgenNamePrefix + attrName; - body << formatv(" {\n auto {0} = {1}(\"{2}\");\n", varName, attrGet, - attrName); - - if (!emitVerificationRequiringOp && !allowMissingAttr) { - body << " if (!" << varName << ") return " << emitErrorPrefix - << "\"requires attribute '" << attrName << "'\");\n"; - } - - if (!hasConditionToEmit) { - body << " }\n"; + // If the attribute is not required and we cannot emit the condition, then + // there is nothing to be done. + if (allowMissingAttr && !canEmitCondition) continue; - } - if (allowMissingAttr) { - // If the attribute has a default value, then only verify the predicate if - // set. This does effectively assume that the default value is valid. - // TODO: verify the debug value is valid (perhaps in debug mode only). - body << " if (" << varName << ") {\n"; + body << formatv(" {\n auto {0} = {1};", varName, + formatv(attrGet, emitVerificationRequiringOp + ? op.getGetterName(attrName) + : attrName)); + + if (!allowMissingAttr) + body << formatv(checkRequiredAttr, varName, emitErrorPrefix, attrName); + if (canEmitCondition) { + body << formatv(checkAttrCondition, varName, + tgfmt(condition, &ctx.withSelf(varName)), emitErrorPrefix, + attrName, escapeString(attr.getSummary())); } - - body << tgfmt(" if (!($0)) return $1\"attribute '$2' " - "failed to satisfy constraint: $3\");\n", - /*ctx=*/nullptr, tgfmt(condition, &ctx.withSelf(varName)), - emitErrorPrefix, attrName, escapeString(attr.getSummary())); - if (allowMissingAttr) - body << " }\n"; - body << " }\n"; + body << "}\n"; } } @@ -2085,21 +2099,71 @@ void OpEmitter::genPrinter() { method->body() << " " << tgfmt(printer, &fctx); } +/// Generate verification on native traits requiring attributes. +static void genNativeTraitAttrVerifier(OpMethodBody &body, const Operator &op, + const char *const attrGet, + const Twine &emitError, + bool opRequired) { + // Check that the variadic segment sizes attribute exists and contains the + // expected number of elements. + // + // {0}: Attribute name. + // {1}: Expected number of elements. + // {2}: "operand" or "result". + // {3}: Attribute getter call. + // {4}: Emit error prefix. + const char *const checkAttrSizedValueSegmentsCode = R"( + { + auto sizeAttr = {3}.dyn_cast<::mlir::DenseIntElementsAttr>(); + if (!sizeAttr) + return {4}"missing segment sizes attribute '{0}'"); + auto numElements = + sizeAttr.getType().cast<::mlir::ShapedType>().getNumElements(); + if (numElements != {1}) + return {4}"'{0}' attribute for specifying {2} segments must have {1} " + "elements, but got ") << numElements; + } + )"; + + // Verify a few traits first so that we can use getODSOperands() and + // getODSResults() in the rest of the verifier. + for (auto &trait : op.getTraits()) { + auto *t = dyn_cast(&trait); + if (!t) + continue; + std::string traitName = t->getFullyQualifiedTraitName(); + if (traitName == "::mlir::OpTrait::AttrSizedOperandSegments") { + StringRef attrName = "operand_segment_sizes"; + body << formatv( + checkAttrSizedValueSegmentsCode, attrName, op.getNumOperands(), + "operand", + formatv(attrGet, opRequired ? op.getGetterName(attrName) : attrName), + emitError); + } else if (traitName == "::mlir::OpTrait::AttrSizedResultSegments") { + StringRef attrName = "result_segment_sizes"; + body << formatv( + checkAttrSizedValueSegmentsCode, attrName, op.getNumResults(), + "result", + formatv(attrGet, opRequired ? op.getGetterName(attrName) : attrName), + emitError); + } + } +} + void OpEmitter::genVerifier() { auto *method = opClass.addMethodAndPrune("::mlir::LogicalResult", "verify"); ERROR_IF_PRUNED(method, "verify", op); auto &body = method->body(); - body << " if (::mlir::failed(" << op.getAdaptorName() - << "(*this).verify((*this)->getLoc()))) " - << "return ::mlir::failure();\n"; + + genNativeTraitAttrVerifier(body, op, opGetAttr, "emitOpError(", true); auto *valueInit = def.getValueInit("verifier"); StringInit *stringInit = dyn_cast(valueInit); bool hasCustomVerify = stringInit && !stringInit->getValue().empty(); - populateSubstitutions(op, "(*this)->getAttr", "this->getODSOperands", - "this->getODSResults", verifyCtx); + populateSubstitutions(op, opGetAttr, "this->getODSOperands", + "this->getODSResults", verifyCtx, /*opRequired=*/true); - genAttributeVerifier(op, "(*this)->getAttr", "emitOpError(", + genAttributeVerifier(op, opGetAttr, "emitOpError(", /*emitVerificationRequiringOp=*/true, verifyCtx, body); genOperandResultVerifier(body, op.getOperands(), "operand"); genOperandResultVerifier(body, op.getResults(), "result"); @@ -2530,39 +2594,16 @@ void OpOperandAdaptorEmitter::addVerification() { ERROR_IF_PRUNED(method, "verify", op); auto &body = method->body(); - const char *checkAttrSizedValueSegmentsCode = R"( - { - auto sizeAttr = odsAttrs.get("{0}").cast<::mlir::DenseIntElementsAttr>(); - auto numElements = sizeAttr.getType().cast<::mlir::ShapedType>().getNumElements(); - if (numElements != {1}) - return emitError(loc, "'{0}' attribute for specifying {2} segments " - "must have {1} elements, but got ") << numElements; - } - )"; - - // Verify a few traits first so that we can use - // getODSOperands()/getODSResults() in the rest of the verifier. - for (auto &trait : op.getTraits()) { - if (auto *t = dyn_cast(&trait)) { - if (t->getFullyQualifiedTraitName() == - "::mlir::OpTrait::AttrSizedOperandSegments") { - body << formatv(checkAttrSizedValueSegmentsCode, - "operand_segment_sizes", op.getNumOperands(), - "operand"); - } else if (t->getFullyQualifiedTraitName() == - "::mlir::OpTrait::AttrSizedResultSegments") { - body << formatv(checkAttrSizedValueSegmentsCode, "result_segment_sizes", - op.getNumResults(), "result"); - } - } - } + std::string emitError = + "emitError(loc, \"'" + op.getOperationName() + "' op \""; + genNativeTraitAttrVerifier(body, op, adaptorGetAttr, emitError, + /*opRequired=*/false); FmtContext verifyCtx; - populateSubstitutions(op, "odsAttrs.get", "getODSOperands", - "", verifyCtx); - genAttributeVerifier(op, "odsAttrs.get", - Twine("emitError(loc, \"'") + op.getOperationName() + - "' op \"", + populateSubstitutions(op, adaptorGetAttr, "getODSOperands", + "", verifyCtx, + /*opRequired=*/false); + genAttributeVerifier(op, adaptorGetAttr, emitError, /*emitVerificationRequiringOp*/ false, verifyCtx, body); body << " return ::mlir::success();"; diff --git a/mlir/tools/mlir-tblgen/OpFormatGen.cpp b/mlir/tools/mlir-tblgen/OpFormatGen.cpp index 3fb0e06e942d7..19dd6fa7c1016 100644 --- a/mlir/tools/mlir-tblgen/OpFormatGen.cpp +++ b/mlir/tools/mlir-tblgen/OpFormatGen.cpp @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// #include "OpFormatGen.h" +#include "FormatGen.h" #include "mlir/Support/LogicalResult.h" #include "mlir/TableGen/Format.h" #include "mlir/TableGen/GenInfo.h" @@ -20,7 +21,6 @@ #include "llvm/ADT/SmallBitVector.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/TypeSwitch.h" -#include "llvm/Support/CommandLine.h" #include "llvm/Support/Signals.h" #include "llvm/TableGen/Error.h" #include "llvm/TableGen/Record.h" @@ -30,20 +30,6 @@ using namespace mlir; using namespace mlir::tblgen; -static llvm::cl::opt formatErrorIsFatal( - "asmformat-error-is-fatal", - llvm::cl::desc("Emit a fatal error if format parsing fails"), - llvm::cl::init(true)); - -/// Returns true if the given string can be formatted as a keyword. -static bool canFormatStringAsKeyword(StringRef value) { - if (!isalpha(value.front()) && value.front() != '_') - return false; - return llvm::all_of(value.drop_front(), [](char c) { - return isalnum(c) || c == '_' || c == '$' || c == '.'; - }); -} - //===----------------------------------------------------------------------===// // Element //===----------------------------------------------------------------------===// @@ -273,33 +259,12 @@ class LiteralElement : public Element { /// Return the literal for this element. StringRef getLiteral() const { return literal; } - /// Returns true if the given string is a valid literal. - static bool isValidLiteral(StringRef value); - private: /// The spelling of the literal for this element. StringRef literal; }; } // end anonymous namespace -bool LiteralElement::isValidLiteral(StringRef value) { - if (value.empty()) - return false; - char front = value.front(); - - // If there is only one character, this must either be punctuation or a - // single character bare identifier. - if (value.size() == 1) - return isalpha(front) || StringRef("_:,=<>()[]{}?+*").contains(front); - - // Check the punctuation that are larger than a single character. - if (value == "->") - return true; - - // Otherwise, this must be an identifier. - return canFormatStringAsKeyword(value); -} - //===----------------------------------------------------------------------===// // WhitespaceElement @@ -1658,8 +1623,8 @@ const char *regionSingleBlockImplicitTerminatorPrinterCode = R"( term->getNumOperands() != 0 || term->getNumResults() != 0; } - p.printRegion({0}, /*printEntryBlockArgs=*/true, - /*printBlockTerminators=*/printTerminator); + _odsPrinter.printRegion({0}, /*printEntryBlockArgs=*/true, + /*printBlockTerminators=*/printTerminator); } )"; @@ -1677,7 +1642,8 @@ const char *enumAttrBeginPrinterCode = R"( /// Generate the printer for the 'attr-dict' directive. static void genAttrDictPrinter(OperationFormat &fmt, Operator &op, OpMethodBody &body, bool withKeyword) { - body << " p.printOptionalAttrDict" << (withKeyword ? "WithKeyword" : "") + body << " _odsPrinter.printOptionalAttrDict" + << (withKeyword ? "WithKeyword" : "") << "((*this)->getAttrs(), /*elidedAttrs=*/{"; // Elide the variadic segment size attributes if necessary. if (!fmt.allOperands && @@ -1701,17 +1667,10 @@ static void genAttrDictPrinter(OperationFormat &fmt, Operator &op, /// the previous element was a punctuation literal. static void genLiteralPrinter(StringRef value, OpMethodBody &body, bool &shouldEmitSpace, bool &lastWasPunctuation) { - body << " p"; + body << " _odsPrinter"; // Don't insert a space for certain punctuation. - auto shouldPrintSpaceBeforeLiteral = [&] { - if (value.size() != 1 && value != "->") - return true; - if (lastWasPunctuation) - return !StringRef(">)}],").contains(value.front()); - return !StringRef("<>(){}[],").contains(value.front()); - }; - if (shouldEmitSpace && shouldPrintSpaceBeforeLiteral()) + if (shouldEmitSpace && shouldEmitSpaceBefore(value, lastWasPunctuation)) body << " << ' '"; body << " << \"" << value << "\";\n"; @@ -1726,7 +1685,7 @@ static void genLiteralPrinter(StringRef value, OpMethodBody &body, static void genSpacePrinter(bool value, OpMethodBody &body, bool &shouldEmitSpace, bool &lastWasPunctuation) { if (value) { - body << " p << ' ';\n"; + body << " _odsPrinter << ' ';\n"; lastWasPunctuation = false; } else { lastWasPunctuation = true; @@ -1776,7 +1735,7 @@ static void genCustomDirectiveParameterPrinter(Element *element, /// Generate the printer for a custom directive. static void genCustomDirectivePrinter(CustomDirective *customDir, const Operator &op, OpMethodBody &body) { - body << " print" << customDir->getName() << "(p, *this"; + body << " print" << customDir->getName() << "(_odsPrinter, *this"; for (Element ¶m : customDir->getArguments()) { body << ", "; genCustomDirectiveParameterPrinter(¶m, op, body); @@ -1791,13 +1750,13 @@ static void genRegionPrinter(const Twine ®ionName, OpMethodBody &body, body << llvm::formatv(regionSingleBlockImplicitTerminatorPrinterCode, regionName); else - body << " p.printRegion(" << regionName << ");\n"; + body << " _odsPrinter.printRegion(" << regionName << ");\n"; } static void genVariadicRegionPrinter(const Twine ®ionListName, OpMethodBody &body, bool hasImplicitTermTrait) { body << " llvm::interleaveComma(" << regionListName - << ", p, [&](::mlir::Region ®ion) {\n "; + << ", _odsPrinter, [&](::mlir::Region ®ion) {\n "; genRegionPrinter("region", body, hasImplicitTermTrait); body << " });\n"; } @@ -1856,10 +1815,10 @@ static void genEnumAttrPrinter(const NamedAttribute *var, const Operator &op, body << '"' << cases[it].getStr() << '"'; }); body << ")))\n" - " p << '\"' << caseValueStr << '\"';\n" + " _odsPrinter << '\"' << caseValueStr << '\"';\n" " else\n "; } - body << " p << caseValueStr;\n" + body << " _odsPrinter << caseValueStr;\n" " }\n"; return; } @@ -1889,17 +1848,17 @@ static void genEnumAttrPrinter(const NamedAttribute *var, const Operator &op, llvm::isDigit(symbol.front()) ? ("_" + symbol) : symbol); } - body << " p << caseValueStr;\n" + body << " _odsPrinter << caseValueStr;\n" " break;\n" " default:\n" - " p << '\"' << caseValueStr << '\"';\n" + " _odsPrinter << '\"' << caseValueStr << '\"';\n" " break;\n" " }\n" " }\n"; return; } - body << " p << caseValueStr;\n" + body << " _odsPrinter << caseValueStr;\n" " }\n"; } @@ -1942,7 +1901,7 @@ void OperationFormat::genElementPrinter(Element *element, OpMethodBody &body, // Emit a whitespace element. if (isa(element)) { - body << " p.printNewline();\n"; + body << " _odsPrinter.printNewline();\n"; return; } if (SpaceElement *space = dyn_cast(element)) @@ -1999,7 +1958,7 @@ void OperationFormat::genElementPrinter(Element *element, OpMethodBody &body, // Optionally insert a space before the next element. The AttrDict printer // already adds a space as necessary. if (shouldEmitSpace || !lastWasPunctuation) - body << " p << ' ';\n"; + body << " _odsPrinter << ' ';\n"; lastWasPunctuation = false; shouldEmitSpace = true; @@ -2012,31 +1971,33 @@ void OperationFormat::genElementPrinter(Element *element, OpMethodBody &body, // If we are formatting as a symbol name, handle it as a symbol name. if (shouldFormatSymbolNameAttr(var)) { - body << " p.printSymbolName(" << op.getGetterName(var->name) + body << " _odsPrinter.printSymbolName(" << op.getGetterName(var->name) << "Attr().getValue());\n"; return; } // Elide the attribute type if it is buildable. if (attr->getTypeBuilder()) - body << " p.printAttributeWithoutType(" << op.getGetterName(var->name) - << "Attr());\n"; + body << " _odsPrinter.printAttributeWithoutType(" + << op.getGetterName(var->name) << "Attr());\n"; else - body << " p.printAttribute(" << op.getGetterName(var->name) + body << " _odsPrinter.printAttribute(" << op.getGetterName(var->name) << "Attr());\n"; } else if (auto *operand = dyn_cast(element)) { if (operand->getVar()->isVariadicOfVariadic()) { body << " ::llvm::interleaveComma(" << op.getGetterName(operand->getVar()->name) - << "(), p, [&](const auto &operands) { p << \"(\" << operands << " + << "(), _odsPrinter, [&](const auto &operands) { _odsPrinter << " + "\"(\" << operands << " "\")\"; });\n"; } else if (operand->getVar()->isOptional()) { body << " if (::mlir::Value value = " << op.getGetterName(operand->getVar()->name) << "())\n" - << " p << value;\n"; + << " _odsPrinter << value;\n"; } else { - body << " p << " << op.getGetterName(operand->getVar()->name) << "();\n"; + body << " _odsPrinter << " << op.getGetterName(operand->getVar()->name) + << "();\n"; } } else if (auto *region = dyn_cast(element)) { const NamedRegion *var = region->getVar(); @@ -2050,32 +2011,34 @@ void OperationFormat::genElementPrinter(Element *element, OpMethodBody &body, const NamedSuccessor *var = successor->getVar(); std::string name = op.getGetterName(var->name); if (var->isVariadic()) - body << " ::llvm::interleaveComma(" << name << "(), p);\n"; + body << " ::llvm::interleaveComma(" << name << "(), _odsPrinter);\n"; else - body << " p << " << name << "();\n"; + body << " _odsPrinter << " << name << "();\n"; } else if (auto *dir = dyn_cast(element)) { genCustomDirectivePrinter(dir, op, body); } else if (isa(element)) { - body << " p << getOperation()->getOperands();\n"; + body << " _odsPrinter << getOperation()->getOperands();\n"; } else if (isa(element)) { genVariadicRegionPrinter("getOperation()->getRegions()", body, hasImplicitTermTrait); } else if (isa(element)) { - body << " ::llvm::interleaveComma(getOperation()->getSuccessors(), p);\n"; + body << " ::llvm::interleaveComma(getOperation()->getSuccessors(), " + "_odsPrinter);\n"; } else if (auto *dir = dyn_cast(element)) { if (auto *operand = dyn_cast(dir->getOperand())) { if (operand->getVar()->isVariadicOfVariadic()) { - body << llvm::formatv(" ::llvm::interleaveComma({0}().getTypes(), p, " - "[&](::mlir::TypeRange types) {{ p << \"(\" << " - "types << \")\"; });\n", - op.getGetterName(operand->getVar()->name)); + body << llvm::formatv( + " ::llvm::interleaveComma({0}().getTypes(), _odsPrinter, " + "[&](::mlir::TypeRange types) {{ _odsPrinter << \"(\" << " + "types << \")\"; });\n", + op.getGetterName(operand->getVar()->name)); return; } } - body << " p << "; + body << " _odsPrinter << "; genTypeOperandPrinter(dir->getOperand(), op, body) << ";\n"; } else if (auto *dir = dyn_cast(element)) { - body << " p.printFunctionalType("; + body << " _odsPrinter.printFunctionalType("; genTypeOperandPrinter(dir->getInputs(), op, body) << ", "; genTypeOperandPrinter(dir->getResults(), op, body) << ");\n"; } else { @@ -2084,8 +2047,8 @@ void OperationFormat::genElementPrinter(Element *element, OpMethodBody &body, } void OperationFormat::genPrinter(Operator &op, OpClass &opClass) { - auto *method = - opClass.addMethodAndPrune("void", "print", "::mlir::OpAsmPrinter &p"); + auto *method = opClass.addMethodAndPrune("void", "print", + "::mlir::OpAsmPrinter &_odsPrinter"); auto &body = method->body(); // Flags for if we should emit a space, and if the last element was @@ -2096,253 +2059,6 @@ void OperationFormat::genPrinter(Operator &op, OpClass &opClass) { lastWasPunctuation); } -//===----------------------------------------------------------------------===// -// FormatLexer -//===----------------------------------------------------------------------===// - -namespace { -/// This class represents a specific token in the input format. -class Token { -public: - enum Kind { - // Markers. - eof, - error, - - // Tokens with no info. - l_paren, - r_paren, - caret, - colon, - comma, - equal, - less, - greater, - question, - - // Keywords. - keyword_start, - kw_attr_dict, - kw_attr_dict_w_keyword, - kw_custom, - kw_functional_type, - kw_operands, - kw_ref, - kw_regions, - kw_results, - kw_successors, - kw_type, - keyword_end, - - // String valued tokens. - identifier, - literal, - variable, - }; - Token(Kind kind, StringRef spelling) : kind(kind), spelling(spelling) {} - - /// Return the bytes that make up this token. - StringRef getSpelling() const { return spelling; } - - /// Return the kind of this token. - Kind getKind() const { return kind; } - - /// Return a location for this token. - llvm::SMLoc getLoc() const { - return llvm::SMLoc::getFromPointer(spelling.data()); - } - - /// Return if this token is a keyword. - bool isKeyword() const { return kind > keyword_start && kind < keyword_end; } - -private: - /// Discriminator that indicates the kind of token this is. - Kind kind; - - /// A reference to the entire token contents; this is always a pointer into - /// a memory buffer owned by the source manager. - StringRef spelling; -}; - -/// This class implements a simple lexer for operation assembly format strings. -class FormatLexer { -public: - FormatLexer(llvm::SourceMgr &mgr, Operator &op); - - /// Lex the next token and return it. - Token lexToken(); - - /// Emit an error to the lexer with the given location and message. - Token emitError(llvm::SMLoc loc, const Twine &msg); - Token emitError(const char *loc, const Twine &msg); - - Token emitErrorAndNote(llvm::SMLoc loc, const Twine &msg, const Twine ¬e); - -private: - Token formToken(Token::Kind kind, const char *tokStart) { - return Token(kind, StringRef(tokStart, curPtr - tokStart)); - } - - /// Return the next character in the stream. - int getNextChar(); - - /// Lex an identifier, literal, or variable. - Token lexIdentifier(const char *tokStart); - Token lexLiteral(const char *tokStart); - Token lexVariable(const char *tokStart); - - llvm::SourceMgr &srcMgr; - Operator &op; - StringRef curBuffer; - const char *curPtr; -}; -} // end anonymous namespace - -FormatLexer::FormatLexer(llvm::SourceMgr &mgr, Operator &op) - : srcMgr(mgr), op(op) { - curBuffer = srcMgr.getMemoryBuffer(mgr.getMainFileID())->getBuffer(); - curPtr = curBuffer.begin(); -} - -Token FormatLexer::emitError(llvm::SMLoc loc, const Twine &msg) { - srcMgr.PrintMessage(loc, llvm::SourceMgr::DK_Error, msg); - llvm::SrcMgr.PrintMessage(op.getLoc()[0], llvm::SourceMgr::DK_Note, - "in custom assembly format for this operation"); - return formToken(Token::error, loc.getPointer()); -} -Token FormatLexer::emitErrorAndNote(llvm::SMLoc loc, const Twine &msg, - const Twine ¬e) { - srcMgr.PrintMessage(loc, llvm::SourceMgr::DK_Error, msg); - llvm::SrcMgr.PrintMessage(op.getLoc()[0], llvm::SourceMgr::DK_Note, - "in custom assembly format for this operation"); - srcMgr.PrintMessage(loc, llvm::SourceMgr::DK_Note, note); - return formToken(Token::error, loc.getPointer()); -} -Token FormatLexer::emitError(const char *loc, const Twine &msg) { - return emitError(llvm::SMLoc::getFromPointer(loc), msg); -} - -int FormatLexer::getNextChar() { - char curChar = *curPtr++; - switch (curChar) { - default: - return (unsigned char)curChar; - case 0: { - // A nul character in the stream is either the end of the current buffer or - // a random nul in the file. Disambiguate that here. - if (curPtr - 1 != curBuffer.end()) - return 0; - - // Otherwise, return end of file. - --curPtr; - return EOF; - } - case '\n': - case '\r': - // Handle the newline character by ignoring it and incrementing the line - // count. However, be careful about 'dos style' files with \n\r in them. - // Only treat a \n\r or \r\n as a single line. - if ((*curPtr == '\n' || (*curPtr == '\r')) && *curPtr != curChar) - ++curPtr; - return '\n'; - } -} - -Token FormatLexer::lexToken() { - const char *tokStart = curPtr; - - // This always consumes at least one character. - int curChar = getNextChar(); - switch (curChar) { - default: - // Handle identifiers: [a-zA-Z_] - if (isalpha(curChar) || curChar == '_') - return lexIdentifier(tokStart); - - // Unknown character, emit an error. - return emitError(tokStart, "unexpected character"); - case EOF: - // Return EOF denoting the end of lexing. - return formToken(Token::eof, tokStart); - - // Lex punctuation. - case '^': - return formToken(Token::caret, tokStart); - case ':': - return formToken(Token::colon, tokStart); - case ',': - return formToken(Token::comma, tokStart); - case '=': - return formToken(Token::equal, tokStart); - case '<': - return formToken(Token::less, tokStart); - case '>': - return formToken(Token::greater, tokStart); - case '?': - return formToken(Token::question, tokStart); - case '(': - return formToken(Token::l_paren, tokStart); - case ')': - return formToken(Token::r_paren, tokStart); - - // Ignore whitespace characters. - case 0: - case ' ': - case '\t': - case '\n': - return lexToken(); - - case '`': - return lexLiteral(tokStart); - case '$': - return lexVariable(tokStart); - } -} - -Token FormatLexer::lexLiteral(const char *tokStart) { - assert(curPtr[-1] == '`'); - - // Lex a literal surrounded by ``. - while (const char curChar = *curPtr++) { - if (curChar == '`') - return formToken(Token::literal, tokStart); - } - return emitError(curPtr - 1, "unexpected end of file in literal"); -} - -Token FormatLexer::lexVariable(const char *tokStart) { - if (!isalpha(curPtr[0]) && curPtr[0] != '_') - return emitError(curPtr - 1, "expected variable name"); - - // Otherwise, consume the rest of the characters. - while (isalnum(*curPtr) || *curPtr == '_') - ++curPtr; - return formToken(Token::variable, tokStart); -} - -Token FormatLexer::lexIdentifier(const char *tokStart) { - // Match the rest of the identifier regex: [0-9a-zA-Z_\-]* - while (isalnum(*curPtr) || *curPtr == '_' || *curPtr == '-') - ++curPtr; - - // Check to see if this identifier is a keyword. - StringRef str(tokStart, curPtr - tokStart); - Token::Kind kind = - StringSwitch(str) - .Case("attr-dict", Token::kw_attr_dict) - .Case("attr-dict-with-keyword", Token::kw_attr_dict_w_keyword) - .Case("custom", Token::kw_custom) - .Case("functional-type", Token::kw_functional_type) - .Case("operands", Token::kw_operands) - .Case("ref", Token::kw_ref) - .Case("regions", Token::kw_regions) - .Case("results", Token::kw_results) - .Case("successors", Token::kw_successors) - .Case("type", Token::kw_type) - .Default(Token::identifier); - return Token(kind, str); -} - //===----------------------------------------------------------------------===// // FormatParser //===----------------------------------------------------------------------===// @@ -2361,8 +2077,8 @@ namespace { class FormatParser { public: FormatParser(llvm::SourceMgr &mgr, OperationFormat &format, Operator &op) - : lexer(mgr, op), curToken(lexer.lexToken()), fmt(format), op(op), - seenOperandTypes(op.getNumOperands()), + : lexer(mgr, op.getLoc()[0]), curToken(lexer.lexToken()), fmt(format), + op(op), seenOperandTypes(op.getNumOperands()), seenResultTypes(op.getNumResults()) {} /// Parse the operation assembly format. @@ -2464,7 +2180,8 @@ class FormatParser { LogicalResult parseCustomDirectiveParameter( std::vector> ¶meters); LogicalResult parseFunctionalTypeDirective(std::unique_ptr &element, - Token tok, ParserContext context); + FormatToken tok, + ParserContext context); LogicalResult parseOperandsDirective(std::unique_ptr &element, llvm::SMLoc loc, ParserContext context); LogicalResult parseReferenceDirective(std::unique_ptr &element, @@ -2476,8 +2193,8 @@ class FormatParser { LogicalResult parseSuccessorsDirective(std::unique_ptr &element, llvm::SMLoc loc, ParserContext context); - LogicalResult parseTypeDirective(std::unique_ptr &element, Token tok, - ParserContext context); + LogicalResult parseTypeDirective(std::unique_ptr &element, + FormatToken tok, ParserContext context); LogicalResult parseTypeDirectiveOperand(std::unique_ptr &element, bool isRefChild = false); @@ -2487,12 +2204,12 @@ class FormatParser { /// Advance the current lexer onto the next token. void consumeToken() { - assert(curToken.getKind() != Token::eof && - curToken.getKind() != Token::error && + assert(curToken.getKind() != FormatToken::eof && + curToken.getKind() != FormatToken::error && "shouldn't advance past EOF or errors"); curToken = lexer.lexToken(); } - LogicalResult parseToken(Token::Kind kind, const Twine &msg) { + LogicalResult parseToken(FormatToken::Kind kind, const Twine &msg) { if (curToken.getKind() != kind) return emitError(curToken.getLoc(), msg); consumeToken(); @@ -2513,7 +2230,7 @@ class FormatParser { //===--------------------------------------------------------------------===// FormatLexer lexer; - Token curToken; + FormatToken curToken; OperationFormat &fmt; Operator &op; @@ -2534,7 +2251,7 @@ LogicalResult FormatParser::parse() { llvm::SMLoc loc = curToken.getLoc(); // Parse each of the format elements into the main format. - while (curToken.getKind() != Token::eof) { + while (curToken.getKind() != FormatToken::eof) { std::unique_ptr element; if (failed(parseElement(element, TopLevelContext))) return ::mlir::failure(); @@ -2859,13 +2576,13 @@ LogicalResult FormatParser::parseElement(std::unique_ptr &element, if (curToken.isKeyword()) return parseDirective(element, context); // Literals. - if (curToken.getKind() == Token::literal) + if (curToken.getKind() == FormatToken::literal) return parseLiteral(element, context); // Optionals. - if (curToken.getKind() == Token::l_paren) + if (curToken.getKind() == FormatToken::l_paren) return parseOptional(element, context); // Variables. - if (curToken.getKind() == Token::variable) + if (curToken.getKind() == FormatToken::variable) return parseVariable(element, context); return emitError(curToken.getLoc(), "expected directive, literal, variable, or optional group"); @@ -2873,7 +2590,7 @@ LogicalResult FormatParser::parseElement(std::unique_ptr &element, LogicalResult FormatParser::parseVariable(std::unique_ptr &element, ParserContext context) { - Token varTok = curToken; + FormatToken varTok = curToken; consumeToken(); StringRef name = varTok.getSpelling().drop_front(); @@ -2953,31 +2670,31 @@ LogicalResult FormatParser::parseVariable(std::unique_ptr &element, LogicalResult FormatParser::parseDirective(std::unique_ptr &element, ParserContext context) { - Token dirTok = curToken; + FormatToken dirTok = curToken; consumeToken(); switch (dirTok.getKind()) { - case Token::kw_attr_dict: + case FormatToken::kw_attr_dict: return parseAttrDictDirective(element, dirTok.getLoc(), context, /*withKeyword=*/false); - case Token::kw_attr_dict_w_keyword: + case FormatToken::kw_attr_dict_w_keyword: return parseAttrDictDirective(element, dirTok.getLoc(), context, /*withKeyword=*/true); - case Token::kw_custom: + case FormatToken::kw_custom: return parseCustomDirective(element, dirTok.getLoc(), context); - case Token::kw_functional_type: + case FormatToken::kw_functional_type: return parseFunctionalTypeDirective(element, dirTok, context); - case Token::kw_operands: + case FormatToken::kw_operands: return parseOperandsDirective(element, dirTok.getLoc(), context); - case Token::kw_regions: + case FormatToken::kw_regions: return parseRegionsDirective(element, dirTok.getLoc(), context); - case Token::kw_results: + case FormatToken::kw_results: return parseResultsDirective(element, dirTok.getLoc(), context); - case Token::kw_successors: + case FormatToken::kw_successors: return parseSuccessorsDirective(element, dirTok.getLoc(), context); - case Token::kw_ref: + case FormatToken::kw_ref: return parseReferenceDirective(element, dirTok.getLoc(), context); - case Token::kw_type: + case FormatToken::kw_type: return parseTypeDirective(element, dirTok, context); default: @@ -2987,7 +2704,7 @@ LogicalResult FormatParser::parseDirective(std::unique_ptr &element, LogicalResult FormatParser::parseLiteral(std::unique_ptr &element, ParserContext context) { - Token literalTok = curToken; + FormatToken literalTok = curToken; if (context != TopLevelContext) { return emitError( literalTok.getLoc(), @@ -3009,7 +2726,7 @@ LogicalResult FormatParser::parseLiteral(std::unique_ptr &element, } // Check that the parsed literal is valid. - if (!LiteralElement::isValidLiteral(value)) + if (!isValidLiteral(value)) return emitError(literalTok.getLoc(), "expected valid literal"); element = std::make_unique(value); @@ -3030,14 +2747,15 @@ LogicalResult FormatParser::parseOptional(std::unique_ptr &element, do { if (failed(parseOptionalChildElement(thenElements, anchorIdx))) return ::mlir::failure(); - } while (curToken.getKind() != Token::r_paren); + } while (curToken.getKind() != FormatToken::r_paren); consumeToken(); // Parse the `else` elements of this optional group. - if (curToken.getKind() == Token::colon) { + if (curToken.getKind() == FormatToken::colon) { consumeToken(); - if (failed(parseToken(Token::l_paren, "expected '(' to start else branch " - "of optional group"))) + if (failed(parseToken(FormatToken::l_paren, + "expected '(' to start else branch " + "of optional group"))) return failure(); do { llvm::SMLoc childLoc = curToken.getLoc(); @@ -3046,11 +2764,12 @@ LogicalResult FormatParser::parseOptional(std::unique_ptr &element, failed(verifyOptionalChildElement(elseElements.back().get(), childLoc, /*isAnchor=*/false))) return failure(); - } while (curToken.getKind() != Token::r_paren); + } while (curToken.getKind() != FormatToken::r_paren); consumeToken(); } - if (failed(parseToken(Token::question, "expected '?' after optional group"))) + if (failed(parseToken(FormatToken::question, + "expected '?' after optional group"))) return ::mlir::failure(); // The optional group is required to have an anchor. @@ -3085,7 +2804,7 @@ LogicalResult FormatParser::parseOptionalChildElement( return ::mlir::failure(); // Check to see if this element is the anchor of the optional group. - bool isAnchor = curToken.getKind() == Token::caret; + bool isAnchor = curToken.getKind() == FormatToken::caret; if (isAnchor) { if (anchorIdx) return emitError(childLoc, "only one element can be marked as the anchor " @@ -3189,16 +2908,16 @@ FormatParser::parseCustomDirective(std::unique_ptr &element, return emitError(loc, "'custom' is only valid as a top-level directive"); // Parse the custom directive name. - if (failed( - parseToken(Token::less, "expected '<' before custom directive name"))) + if (failed(parseToken(FormatToken::less, + "expected '<' before custom directive name"))) return ::mlir::failure(); - Token nameTok = curToken; - if (failed(parseToken(Token::identifier, + FormatToken nameTok = curToken; + if (failed(parseToken(FormatToken::identifier, "expected custom directive name identifier")) || - failed(parseToken(Token::greater, + failed(parseToken(FormatToken::greater, "expected '>' after custom directive name")) || - failed(parseToken(Token::l_paren, + failed(parseToken(FormatToken::l_paren, "expected '(' before custom directive parameters"))) return ::mlir::failure(); @@ -3207,12 +2926,12 @@ FormatParser::parseCustomDirective(std::unique_ptr &element, do { if (failed(parseCustomDirectiveParameter(elements))) return ::mlir::failure(); - if (curToken.getKind() != Token::comma) + if (curToken.getKind() != FormatToken::comma) break; consumeToken(); } while (true); - if (failed(parseToken(Token::r_paren, + if (failed(parseToken(FormatToken::r_paren, "expected ')' after custom directive parameters"))) return ::mlir::failure(); @@ -3249,9 +2968,8 @@ LogicalResult FormatParser::parseCustomDirectiveParameter( return ::mlir::success(); } -LogicalResult -FormatParser::parseFunctionalTypeDirective(std::unique_ptr &element, - Token tok, ParserContext context) { +LogicalResult FormatParser::parseFunctionalTypeDirective( + std::unique_ptr &element, FormatToken tok, ParserContext context) { llvm::SMLoc loc = tok.getLoc(); if (context != TopLevelContext) return emitError( @@ -3259,11 +2977,14 @@ FormatParser::parseFunctionalTypeDirective(std::unique_ptr &element, // Parse the main operand. std::unique_ptr inputs, results; - if (failed(parseToken(Token::l_paren, "expected '(' before argument list")) || + if (failed(parseToken(FormatToken::l_paren, + "expected '(' before argument list")) || failed(parseTypeDirectiveOperand(inputs)) || - failed(parseToken(Token::comma, "expected ',' after inputs argument")) || + failed(parseToken(FormatToken::comma, + "expected ',' after inputs argument")) || failed(parseTypeDirectiveOperand(results)) || - failed(parseToken(Token::r_paren, "expected ')' after argument list"))) + failed( + parseToken(FormatToken::r_paren, "expected ')' after argument list"))) return ::mlir::failure(); element = std::make_unique(std::move(inputs), std::move(results)); @@ -3294,9 +3015,11 @@ FormatParser::parseReferenceDirective(std::unique_ptr &element, return emitError(loc, "'ref' is only valid within a `custom` directive"); std::unique_ptr operand; - if (failed(parseToken(Token::l_paren, "expected '(' before argument list")) || + if (failed(parseToken(FormatToken::l_paren, + "expected '(' before argument list")) || failed(parseElement(operand, RefDirectiveContext)) || - failed(parseToken(Token::r_paren, "expected ')' after argument list"))) + failed( + parseToken(FormatToken::r_paren, "expected ')' after argument list"))) return ::mlir::failure(); element = std::make_unique(std::move(operand)); @@ -3355,17 +3078,19 @@ FormatParser::parseSuccessorsDirective(std::unique_ptr &element, } LogicalResult -FormatParser::parseTypeDirective(std::unique_ptr &element, Token tok, - ParserContext context) { +FormatParser::parseTypeDirective(std::unique_ptr &element, + FormatToken tok, ParserContext context) { llvm::SMLoc loc = tok.getLoc(); if (context == TypeDirectiveContext) return emitError(loc, "'type' cannot be used as a child of another `type`"); bool isRefChild = context == RefDirectiveContext; std::unique_ptr operand; - if (failed(parseToken(Token::l_paren, "expected '(' before argument list")) || + if (failed(parseToken(FormatToken::l_paren, + "expected '(' before argument list")) || failed(parseTypeDirectiveOperand(operand, isRefChild)) || - failed(parseToken(Token::r_paren, "expected ')' after argument list"))) + failed( + parseToken(FormatToken::r_paren, "expected ')' after argument list"))) return ::mlir::failure(); element = std::make_unique(std::move(operand)); diff --git a/mlir/tools/mlir-tblgen/OpPythonBindingGen.cpp b/mlir/tools/mlir-tblgen/OpPythonBindingGen.cpp index d9ce2963a8f37..8babff25db07b 100644 --- a/mlir/tools/mlir-tblgen/OpPythonBindingGen.cpp +++ b/mlir/tools/mlir-tblgen/OpPythonBindingGen.cpp @@ -67,6 +67,7 @@ class {0}(_ods_ir.OpView): /// Each segment spec is either None (default) or an array of integers /// where: /// 1 = single element (expect non sequence operand/result) +/// 0 = optional element (expect a value or None) /// -1 = operand/result is a sequence corresponding to a variadic constexpr const char *opClassSizedSegmentsTemplate = R"Py( _ODS_{0}_SEGMENTS = {1} @@ -505,6 +506,9 @@ constexpr const char *singleResultAppendTemplate = "results.append({0})"; /// {0} is the field name. constexpr const char *optionalAppendOperandTemplate = "if {0} is not None: operands.append(_get_op_result_or_value({0}))"; +constexpr const char *optionalAppendAttrSizedOperandsTemplate = + "operands.append(_get_op_result_or_value({0}) if {0} is not None else " + "None)"; constexpr const char *optionalAppendResultTemplate = "if {0} is not None: results.append({0})"; @@ -693,7 +697,11 @@ populateBuilderLinesOperand(const Operator &op, if (!element.isVariableLength()) { formatString = singleOperandAppendTemplate; } else if (element.isOptional()) { - formatString = optionalAppendOperandTemplate; + if (sizedSegments) { + formatString = optionalAppendAttrSizedOperandsTemplate; + } else { + formatString = optionalAppendOperandTemplate; + } } else { assert(element.isVariadic() && "unhandled element group type"); // If emitting with sizedSegments, then we add the actual list-typed @@ -882,10 +890,10 @@ static void emitSegmentSpec( std::string segmentSpec("["); for (int i = 0, e = getNumElements(op); i < e; ++i) { const NamedTypeConstraint &element = getElement(op, i); - if (element.isVariableLength()) { - segmentSpec.append("-1,"); - } else if (element.isOptional()) { + if (element.isOptional()) { segmentSpec.append("0,"); + } else if (element.isVariadic()) { + segmentSpec.append("-1,"); } else { segmentSpec.append("1,"); } diff --git a/mlir/unittests/IR/AttributeTest.cpp b/mlir/unittests/IR/AttributeTest.cpp index 891abd1a4f23c..aaff61e7d5f9f 100644 --- a/mlir/unittests/IR/AttributeTest.cpp +++ b/mlir/unittests/IR/AttributeTest.cpp @@ -205,4 +205,50 @@ TEST(DenseScalarTest, ExtractZeroRankElement) { EXPECT_TRUE(attr.getValue({0}) == value); } +TEST(SparseElementsAttrTest, GetZero) { + MLIRContext context; + context.allowUnregisteredDialects(); + + IntegerType intTy = IntegerType::get(&context, 32); + FloatType floatTy = FloatType::getF32(&context); + Type stringTy = OpaqueType::get(Identifier::get("test", &context), "string"); + + ShapedType tensorI32 = RankedTensorType::get({2, 2}, intTy); + ShapedType tensorF32 = RankedTensorType::get({2, 2}, floatTy); + ShapedType tensorString = RankedTensorType::get({2, 2}, stringTy); + + auto indicesType = + RankedTensorType::get({1, 2}, IntegerType::get(&context, 64)); + auto indices = + DenseIntElementsAttr::get(indicesType, {APInt(64, 0), APInt(64, 0)}); + + RankedTensorType intValueTy = RankedTensorType::get({1}, intTy); + auto intValue = DenseIntElementsAttr::get(intValueTy, {1}); + + RankedTensorType floatValueTy = RankedTensorType::get({1}, floatTy); + auto floatValue = DenseFPElementsAttr::get(floatValueTy, {1.0f}); + + RankedTensorType stringValueTy = RankedTensorType::get({1}, stringTy); + auto stringValue = DenseElementsAttr::get(stringValueTy, {StringRef("foo")}); + + auto sparseInt = SparseElementsAttr::get(tensorI32, indices, intValue); + auto sparseFloat = SparseElementsAttr::get(tensorF32, indices, floatValue); + auto sparseString = + SparseElementsAttr::get(tensorString, indices, stringValue); + + // Only index (0, 0) contains an element, others are supposed to return + // the zero/empty value. + auto zeroIntValue = sparseInt.getValue({1, 1}); + EXPECT_EQ(zeroIntValue.cast().getInt(), 0); + EXPECT_TRUE(zeroIntValue.getType() == intTy); + + auto zeroFloatValue = sparseFloat.getValue({1, 1}); + EXPECT_EQ(zeroFloatValue.cast().getValueAsDouble(), 0.0f); + EXPECT_TRUE(zeroFloatValue.getType() == floatTy); + + auto zeroStringValue = sparseString.getValue({1, 1}); + EXPECT_TRUE(zeroStringValue.cast().getValue().empty()); + EXPECT_TRUE(zeroStringValue.getType() == stringTy); +} + } // end namespace diff --git a/mlir/unittests/Support/DebugActionTest.cpp b/mlir/unittests/Support/DebugActionTest.cpp index ba28b91a323d8..0a73436f572d7 100644 --- a/mlir/unittests/Support/DebugActionTest.cpp +++ b/mlir/unittests/Support/DebugActionTest.cpp @@ -10,7 +10,7 @@ #include "gmock/gmock.h" // DebugActionManager is only enabled in DEBUG mode. -#ifndef NDEBUG +#if LLVM_ENABLE_ABI_BREAKING_CHECKS using namespace mlir; diff --git a/mlir/unittests/Support/DebugCounterTest.cpp b/mlir/unittests/Support/DebugCounterTest.cpp index 09ba20f87165c..7ca48cdf35731 100644 --- a/mlir/unittests/Support/DebugCounterTest.cpp +++ b/mlir/unittests/Support/DebugCounterTest.cpp @@ -12,7 +12,7 @@ using namespace mlir; // DebugActionManager is only enabled in DEBUG mode. -#ifndef NDEBUG +#if LLVM_ENABLE_ABI_BREAKING_CHECKS namespace { diff --git a/openmp/docs/doxygen.cfg.in b/openmp/docs/doxygen.cfg.in index f02c70336040e..f52c234da05de 100644 --- a/openmp/docs/doxygen.cfg.in +++ b/openmp/docs/doxygen.cfg.in @@ -1220,7 +1220,7 @@ CHM_FILE = HHC_LOCATION = # The GENERATE_CHI flag controls if a separate .chi index file is generated ( -# YES) or that it should be included in the master .chm file ( NO). +# YES) or that it should be included in the main .chm file ( NO). # The default value is: NO. # This tag requires that the tag GENERATE_HTMLHELP is set to YES. diff --git a/openmp/libomptarget/DeviceRTL/CMakeLists.txt b/openmp/libomptarget/DeviceRTL/CMakeLists.txt index 419c64d381168..242df638f80d3 100644 --- a/openmp/libomptarget/DeviceRTL/CMakeLists.txt +++ b/openmp/libomptarget/DeviceRTL/CMakeLists.txt @@ -60,7 +60,7 @@ set(devicertl_base_directory ${CMAKE_CURRENT_SOURCE_DIR}) set(include_directory ${devicertl_base_directory}/include) set(source_directory ${devicertl_base_directory}/src) -set(all_capabilities 35 37 50 52 53 60 61 62 70 72 75 80) +set(all_capabilities 35 37 50 52 53 60 61 62 70 72 75 80 86) set(LIBOMPTARGET_NVPTX_COMPUTE_CAPABILITIES ${all_capabilities} CACHE STRING "List of CUDA Compute Capabilities to be used to compile the NVPTX DeviceRTL.") diff --git a/openmp/libomptarget/DeviceRTL/include/Interface.h b/openmp/libomptarget/DeviceRTL/include/Interface.h index da04e143124b8..302e3ebc5bedd 100644 --- a/openmp/libomptarget/DeviceRTL/include/Interface.h +++ b/openmp/libomptarget/DeviceRTL/include/Interface.h @@ -249,6 +249,8 @@ void __kmpc_barrier(IdentTy *Loc_ref, int32_t TId); void __kmpc_barrier_simple_spmd(IdentTy *Loc_ref, int32_t TId); +void __kmpc_barrier_simple_generic(IdentTy *Loc_ref, int32_t TId); + int32_t __kmpc_master(IdentTy *Loc, int32_t TId); void __kmpc_end_master(IdentTy *Loc, int32_t TId); diff --git a/openmp/libomptarget/DeviceRTL/include/Mapping.h b/openmp/libomptarget/DeviceRTL/include/Mapping.h index a3193f3575640..4f65d28da513f 100644 --- a/openmp/libomptarget/DeviceRTL/include/Mapping.h +++ b/openmp/libomptarget/DeviceRTL/include/Mapping.h @@ -34,9 +34,19 @@ bool isSPMDMode(); bool isGenericMode(); /// Return true if the executing thread is the main thread in generic mode. +/// These functions will lookup state and it is required that that is OK for the +/// thread and location. See also `isInitialThreadInLevel0` for a stateless +/// alternative for certain situations, e.g. during initialization. bool isMainThreadInGenericMode(); bool isMainThreadInGenericMode(bool IsSPMD); +/// Return true if this thread is the initial thread in parallel level 0. +/// +/// The thread for which this returns true should be used for single threaded +/// initialization tasks. We pick a special thread to ensure there are no +/// races between the initialization and the first read of initialized state. +bool isInitialThreadInLevel0(bool IsSPMD); + /// Return true if the executing thread has the lowest Id of the active threads /// in the warp. bool isLeaderInWarp(); diff --git a/openmp/libomptarget/DeviceRTL/src/Kernel.cpp b/openmp/libomptarget/DeviceRTL/src/Kernel.cpp index 94bf432acb9a9..bf3d4ca24090b 100644 --- a/openmp/libomptarget/DeviceRTL/src/Kernel.cpp +++ b/openmp/libomptarget/DeviceRTL/src/Kernel.cpp @@ -83,7 +83,7 @@ int32_t __kmpc_target_init(IdentTy *Ident, int8_t Mode, return -1; } - if (mapping::isMainThreadInGenericMode(IsSPMD)) + if (mapping::isInitialThreadInLevel0(IsSPMD)) return -1; if (UseGenericStateMachine) diff --git a/openmp/libomptarget/DeviceRTL/src/Mapping.cpp b/openmp/libomptarget/DeviceRTL/src/Mapping.cpp index 9bd26c80636ef..bece29489a6d1 100644 --- a/openmp/libomptarget/DeviceRTL/src/Mapping.cpp +++ b/openmp/libomptarget/DeviceRTL/src/Mapping.cpp @@ -10,6 +10,7 @@ //===----------------------------------------------------------------------===// #include "Mapping.h" +#include "Interface.h" #include "State.h" #include "Types.h" #include "Utils.h" @@ -43,6 +44,12 @@ uint32_t getWorkgroupDim(uint32_t group_id, uint32_t grid_size, return (r < group_size) ? r : group_size; } +uint32_t getNumHardwareThreadsInBlock() { + return getWorkgroupDim(__builtin_amdgcn_workgroup_id_x(), + __builtin_amdgcn_grid_size_x(), + __builtin_amdgcn_workgroup_size_x()); +} + LaneMaskTy activemask() { return __builtin_amdgcn_read_exec(); } LaneMaskTy lanemaskLT() { @@ -67,13 +74,6 @@ uint32_t getThreadIdInWarp() { uint32_t getThreadIdInBlock() { return __builtin_amdgcn_workitem_id_x(); } -uint32_t getBlockSize() { - // TODO: verify this logic for generic mode. - return getWorkgroupDim(__builtin_amdgcn_workgroup_id_x(), - __builtin_amdgcn_grid_size_x(), - __builtin_amdgcn_workgroup_size_x()); -} - uint32_t getKernelSize() { return __builtin_amdgcn_grid_size_x(); } uint32_t getBlockId() { return __builtin_amdgcn_workgroup_id_x(); } @@ -83,12 +83,8 @@ uint32_t getNumberOfBlocks() { __builtin_amdgcn_workgroup_size_x()); } -uint32_t getNumberOfProcessorElements() { - return getBlockSize(); -} - uint32_t getWarpId() { - return mapping::getThreadIdInBlock() / mapping::getWarpSize(); + return impl::getThreadIdInBlock() / mapping::getWarpSize(); } uint32_t getNumberOfWarpsInBlock() { @@ -104,6 +100,10 @@ uint32_t getNumberOfWarpsInBlock() { #pragma omp begin declare variant match( \ device = {arch(nvptx, nvptx64)}, implementation = {extension(match_any)}) +uint32_t getNumHardwareThreadsInBlock() { + return __nvvm_read_ptx_sreg_ntid_x(); +} + constexpr const llvm::omp::GV &getGridValue() { return llvm::omp::NVPTXGridValues; } @@ -126,29 +126,23 @@ LaneMaskTy lanemaskGT() { return Res; } -uint32_t getThreadIdInWarp() { - return mapping::getThreadIdInBlock() & (mapping::getWarpSize() - 1); -} - uint32_t getThreadIdInBlock() { return __nvvm_read_ptx_sreg_tid_x(); } -uint32_t getBlockSize() { - return __nvvm_read_ptx_sreg_ntid_x() - - (!mapping::isSPMDMode() * mapping::getWarpSize()); +uint32_t getThreadIdInWarp() { + return impl::getThreadIdInBlock() & (mapping::getWarpSize() - 1); } -uint32_t getKernelSize() { return __nvvm_read_ptx_sreg_nctaid_x(); } +uint32_t getKernelSize() { + return __nvvm_read_ptx_sreg_nctaid_x() * + mapping::getNumberOfProcessorElements(); +} uint32_t getBlockId() { return __nvvm_read_ptx_sreg_ctaid_x(); } uint32_t getNumberOfBlocks() { return __nvvm_read_ptx_sreg_nctaid_x(); } -uint32_t getNumberOfProcessorElements() { - return __nvvm_read_ptx_sreg_ntid_x(); -} - uint32_t getWarpId() { - return mapping::getThreadIdInBlock() / mapping::getWarpSize(); + return impl::getThreadIdInBlock() / mapping::getWarpSize(); } uint32_t getNumberOfWarpsInBlock() { @@ -164,20 +158,34 @@ uint32_t getWarpSize() { return getGridValue().GV_Warp_Size; } } // namespace impl } // namespace _OMP +/// We have to be deliberate about the distinction of `mapping::` and `impl::` +/// below to avoid repeating assumptions or including irrelevant ones. +///{ + +static bool isInLastWarp() { + uint32_t MainTId = (mapping::getNumberOfProcessorElements() - 1) & + ~(mapping::getWarpSize() - 1); + return mapping::getThreadIdInBlock() == MainTId; +} + bool mapping::isMainThreadInGenericMode(bool IsSPMD) { if (IsSPMD || icv::Level) return false; // Check if this is the last warp in the block. - uint32_t MainTId = (mapping::getNumberOfProcessorElements() - 1) & - ~(mapping::getWarpSize() - 1); - return mapping::getThreadIdInBlock() == MainTId; + return isInLastWarp(); } bool mapping::isMainThreadInGenericMode() { return mapping::isMainThreadInGenericMode(mapping::isSPMDMode()); } +bool mapping::isInitialThreadInLevel0(bool IsSPMD) { + if (IsSPMD) + return mapping::getThreadIdInBlock() == 0; + return isInLastWarp(); +} + bool mapping::isLeaderInWarp() { __kmpc_impl_lanemask_t Active = mapping::activemask(); __kmpc_impl_lanemask_t LaneMaskLT = mapping::lanemaskLT(); @@ -190,37 +198,67 @@ LaneMaskTy mapping::lanemaskLT() { return impl::lanemaskLT(); } LaneMaskTy mapping::lanemaskGT() { return impl::lanemaskGT(); } -uint32_t mapping::getThreadIdInWarp() { return impl::getThreadIdInWarp(); } +uint32_t mapping::getThreadIdInWarp() { + uint32_t ThreadIdInWarp = impl::getThreadIdInWarp(); + ASSERT(ThreadIdInWarp < impl::getWarpSize()); + return ThreadIdInWarp; +} -uint32_t mapping::getThreadIdInBlock() { return impl::getThreadIdInBlock(); } +uint32_t mapping::getThreadIdInBlock() { + uint32_t ThreadIdInBlock = impl::getThreadIdInBlock(); + ASSERT(ThreadIdInBlock < impl::getNumHardwareThreadsInBlock()); + return ThreadIdInBlock; +} -uint32_t mapping::getBlockSize() { return impl::getBlockSize(); } +uint32_t mapping::getWarpSize() { return impl::getWarpSize(); } -uint32_t mapping::getKernelSize() { return impl::getKernelSize(); } +uint32_t mapping::getBlockSize() { + uint32_t BlockSize = mapping::getNumberOfProcessorElements() - + (!mapping::isSPMDMode() * impl::getWarpSize()); + return BlockSize; +} -uint32_t mapping::getBlockId() { return impl::getBlockId(); } +uint32_t mapping::getKernelSize() { return impl::getKernelSize(); } -uint32_t mapping::getNumberOfBlocks() { return impl::getNumberOfBlocks(); } +uint32_t mapping::getWarpId() { + uint32_t WarpID = impl::getWarpId(); + ASSERT(WarpID < impl::getNumberOfWarpsInBlock()); + return WarpID; +} -uint32_t mapping::getNumberOfProcessorElements() { - return impl::getNumberOfProcessorElements(); +uint32_t mapping::getBlockId() { + uint32_t BlockId = impl::getBlockId(); + ASSERT(BlockId < impl::getNumberOfBlocks()); + return BlockId; } -uint32_t mapping::getWarpId() { return impl::getWarpId(); } +uint32_t mapping::getNumberOfWarpsInBlock() { + uint32_t NumberOfWarpsInBlocks = impl::getNumberOfWarpsInBlock(); + ASSERT(impl::getWarpId() < NumberOfWarpsInBlocks); + return NumberOfWarpsInBlocks; +} -uint32_t mapping::getWarpSize() { return impl::getWarpSize(); } +uint32_t mapping::getNumberOfBlocks() { + uint32_t NumberOfBlocks = impl::getNumberOfBlocks(); + ASSERT(impl::getBlockId() < NumberOfBlocks); + return NumberOfBlocks; +} -uint32_t mapping::getNumberOfWarpsInBlock() { - return impl::getNumberOfWarpsInBlock(); +uint32_t mapping::getNumberOfProcessorElements() { + uint32_t NumberOfProcessorElements = impl::getNumHardwareThreadsInBlock(); + ASSERT(impl::getThreadIdInBlock() < NumberOfProcessorElements); + return NumberOfProcessorElements; } +///} + /// Execution mode /// ///{ static int SHARED(IsSPMDMode); void mapping::init(bool IsSPMD) { - if (!mapping::getThreadIdInBlock()) + if (mapping::isInitialThreadInLevel0(IsSPMD)) IsSPMDMode = IsSPMD; } @@ -237,7 +275,7 @@ __attribute__((noinline)) uint32_t __kmpc_get_hardware_thread_id_in_block() { __attribute__((noinline)) uint32_t __kmpc_get_hardware_num_threads_in_block() { FunctionTracingRAII(); - return mapping::getNumberOfProcessorElements(); + return impl::getNumHardwareThreadsInBlock(); } } #pragma omp end declare target diff --git a/openmp/libomptarget/DeviceRTL/src/State.cpp b/openmp/libomptarget/DeviceRTL/src/State.cpp index 40d10a65817ea..a16fa1b1a0fac 100644 --- a/openmp/libomptarget/DeviceRTL/src/State.cpp +++ b/openmp/libomptarget/DeviceRTL/src/State.cpp @@ -366,7 +366,7 @@ void *&state::lookupPtr(ValueKind Kind, bool IsReadonly) { void state::init(bool IsSPMD) { SharedMemorySmartStack.init(IsSPMD); - if (!mapping::getThreadIdInBlock()) + if (mapping::isInitialThreadInLevel0(IsSPMD)) TeamState.init(IsSPMD); ThreadStates[mapping::getThreadIdInBlock()] = nullptr; diff --git a/openmp/libomptarget/DeviceRTL/src/Synchronization.cpp b/openmp/libomptarget/DeviceRTL/src/Synchronization.cpp index e219c75d04ea6..6b4bab0bcbb20 100644 --- a/openmp/libomptarget/DeviceRTL/src/Synchronization.cpp +++ b/openmp/libomptarget/DeviceRTL/src/Synchronization.cpp @@ -348,6 +348,12 @@ __attribute__((noinline)) void __kmpc_barrier_simple_spmd(IdentTy *Loc, synchronize::threadsAligned(); } +__attribute__((noinline)) void __kmpc_barrier_simple_generic(IdentTy *Loc, + int32_t TId) { + FunctionTracingRAII(); + synchronize::threads(); +} + int32_t __kmpc_master(IdentTy *Loc, int32_t TId) { FunctionTracingRAII(); return omp_get_team_num() == 0; diff --git a/openmp/libomptarget/DeviceRTL/src/Utils.cpp b/openmp/libomptarget/DeviceRTL/src/Utils.cpp index 8fcb96b158cf0..df574970ba263 100644 --- a/openmp/libomptarget/DeviceRTL/src/Utils.cpp +++ b/openmp/libomptarget/DeviceRTL/src/Utils.cpp @@ -25,6 +25,7 @@ __attribute__((used, weak, optnone)) void keepAlive() { __kmpc_get_hardware_thread_id_in_block(); __kmpc_get_hardware_num_threads_in_block(); __kmpc_barrier_simple_spmd(nullptr, 0); + __kmpc_barrier_simple_generic(nullptr, 0); } } // namespace _OMP diff --git a/openmp/libomptarget/deviceRTLs/common/src/support.cu b/openmp/libomptarget/deviceRTLs/common/src/support.cu index 6dd591325c0bf..b3bf550364bde 100644 --- a/openmp/libomptarget/deviceRTLs/common/src/support.cu +++ b/openmp/libomptarget/deviceRTLs/common/src/support.cu @@ -226,4 +226,14 @@ void __kmp_invoke_microtask(kmp_int32 global_tid, kmp_int32 bound_tid, void *fn, } } +namespace _OMP { +/// Helper to keep code alive without introducing a performance penalty. +__attribute__((used, weak, optnone)) void keepAlive() { + __kmpc_get_hardware_thread_id_in_block(); + __kmpc_get_hardware_num_threads_in_block(); + __kmpc_barrier_simple_spmd(nullptr, 0); + __kmpc_barrier_simple_generic(nullptr, 0); +} +} // namespace _OMP + #pragma omp end declare target diff --git a/openmp/libomptarget/deviceRTLs/common/src/sync.cu b/openmp/libomptarget/deviceRTLs/common/src/sync.cu index 8711cd200051a..823c9fc1ef40b 100644 --- a/openmp/libomptarget/deviceRTLs/common/src/sync.cu +++ b/openmp/libomptarget/deviceRTLs/common/src/sync.cu @@ -78,6 +78,9 @@ EXTERN void __kmpc_barrier_simple_spmd(kmp_Ident *loc_ref, int32_t tid) { __kmpc_impl_syncthreads(); PRINT0(LD_SYNC, "completed kmpc_barrier_simple_spmd\n"); } +EXTERN void __kmpc_barrier_simple_generic(kmp_Ident *loc_ref, int32_t tid) { + return __kmpc_barrier_simple_spmd(loc_ref, tid); +} //////////////////////////////////////////////////////////////////////////////// // KMP MASTER diff --git a/openmp/libomptarget/deviceRTLs/interface.h b/openmp/libomptarget/deviceRTLs/interface.h index cb193c9ca9cc5..00aa07c01419c 100644 --- a/openmp/libomptarget/deviceRTLs/interface.h +++ b/openmp/libomptarget/deviceRTLs/interface.h @@ -380,6 +380,7 @@ EXTERN int64_t __kmpc_shuffle_int64(int64_t val, int16_t delta, int16_t size); // sync barrier EXTERN void __kmpc_barrier(kmp_Ident *loc_ref, int32_t tid); EXTERN void __kmpc_barrier_simple_spmd(kmp_Ident *loc_ref, int32_t tid); +EXTERN void __kmpc_barrier_simple_generic(kmp_Ident *loc_ref, int32_t tid); EXTERN int32_t __kmpc_cancel_barrier(kmp_Ident *loc, int32_t global_tid); // single diff --git a/openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt b/openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt index 42cfbaf23beb7..3f1c4e75cbc16 100644 --- a/openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt +++ b/openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt @@ -93,7 +93,7 @@ set(devicertl_common_directory set(devicertl_nvptx_directory ${devicertl_base_directory}/nvptx) -set(all_capabilities 35 37 50 52 53 60 61 62 70 72 75 80) +set(all_capabilities 35 37 50 52 53 60 61 62 70 72 75 80 86) set(LIBOMPTARGET_NVPTX_COMPUTE_CAPABILITIES ${all_capabilities} CACHE STRING "List of CUDA Compute Capabilities to be used to compile the NVPTX device RTL.") diff --git a/openmp/libomptarget/plugins/remote/src/CMakeLists.txt b/openmp/libomptarget/plugins/remote/src/CMakeLists.txt index de3177ffedca4..f688bddc55271 100644 --- a/openmp/libomptarget/plugins/remote/src/CMakeLists.txt +++ b/openmp/libomptarget/plugins/remote/src/CMakeLists.txt @@ -10,7 +10,7 @@ # ##===----------------------------------------------------------------------===## -cmake_minimum_required(VERSION 3.15) +cmake_minimum_required(VERSION 3.13.4) # Define the suffix for the runtime messaging dumps. add_definitions(-DTARGET_NAME=RPC) diff --git a/openmp/runtime/doc/doxygen/config b/openmp/runtime/doc/doxygen/config index cd1eca2f2888f..85968dd113436 100644 --- a/openmp/runtime/doc/doxygen/config +++ b/openmp/runtime/doc/doxygen/config @@ -1048,7 +1048,7 @@ HHC_LOCATION = # If the GENERATE_HTMLHELP tag is set to YES, the GENERATE_CHI flag # controls if a separate .chi index file is generated (YES) or that -# it should be included in the master .chm file (NO). +# it should be included in the main .chm file (NO). GENERATE_CHI = NO diff --git a/openmp/runtime/src/kmp_settings.cpp b/openmp/runtime/src/kmp_settings.cpp index aa0641f7f70f8..a8c08f83c96f0 100644 --- a/openmp/runtime/src/kmp_settings.cpp +++ b/openmp/runtime/src/kmp_settings.cpp @@ -1753,17 +1753,9 @@ static void __kmp_stg_parse_barrier_pattern(char const *name, char const *value, } } } - if ((dist_req == 0) && (non_dist_req != 0)) { - // Something was set to a barrier other than dist; set all others to hyper - for (int i = bs_plain_barrier; i < bs_last_barrier; i++) { - if (__kmp_barrier_release_pattern[i] == bp_dist_bar) - __kmp_barrier_release_pattern[i] = bp_hyper_bar; - if (__kmp_barrier_gather_pattern[i] == bp_dist_bar) - __kmp_barrier_gather_pattern[i] = bp_hyper_bar; - } - } else if (non_dist_req != 0) { - // some requests for dist, plus requests for others; set all to dist - if (non_dist_req > 0 && dist_req > 0 && warn) { + if (dist_req != 0) { + // set all barriers to dist + if ((non_dist_req != 0) && warn) { KMP_INFORM(BarrierPatternOverride, name, __kmp_barrier_pattern_name[bp_dist_bar]); warn = 0; diff --git a/polly/docs/doxygen.cfg.in b/polly/docs/doxygen.cfg.in index 36786aa564105..c2f9f7ff3fa79 100644 --- a/polly/docs/doxygen.cfg.in +++ b/polly/docs/doxygen.cfg.in @@ -1220,7 +1220,7 @@ CHM_FILE = HHC_LOCATION = # The GENERATE_CHI flag controls if a separate .chi index file is generated ( -# YES) or that it should be included in the master .chm file ( NO). +# YES) or that it should be included in the main .chm file ( NO). # The default value is: NO. # This tag requires that the tag GENERATE_HTMLHELP is set to YES. diff --git a/polly/include/polly/ScheduleTreeTransform.h b/polly/include/polly/ScheduleTreeTransform.h index 993245cfce481..5ed0c64ad3f86 100644 --- a/polly/include/polly/ScheduleTreeTransform.h +++ b/polly/include/polly/ScheduleTreeTransform.h @@ -13,6 +13,7 @@ #ifndef POLLY_SCHEDULETREETRANSFORM_H #define POLLY_SCHEDULETREETRANSFORM_H +#include "polly/Support/ISLTools.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/Support/ErrorHandling.h" #include "isl/isl-noexceptions.h" @@ -147,8 +148,7 @@ struct RecursiveScheduleTreeVisitor /// By default, recursively visit the child nodes. RetTy visitNode(isl::schedule_node Node, Args... args) { - isl_size NumChildren = Node.n_children().release(); - for (isl_size i = 0; i < NumChildren; i += 1) + for (unsigned i : rangeIslSize(0, Node.n_children())) getDerived().visit(Node.child(i), std::forward(args)...); return RetTy(); } @@ -208,7 +208,7 @@ isl::set getPartialTilePrefixes(isl::set ScheduleRange, int VectorWidth); /// belong to the current band node. /// @param OutDimsNum A number of dimensions that should belong to /// the current band node. -isl::union_set getIsolateOptions(isl::set IsolateDomain, isl_size OutDimsNum); +isl::union_set getIsolateOptions(isl::set IsolateDomain, unsigned OutDimsNum); /// Create an isl::union_set, which describes the specified option for the /// dimension of the current node. diff --git a/polly/include/polly/ScopInfo.h b/polly/include/polly/ScopInfo.h index 5eff6eb3e8d20..9739e3310dacd 100644 --- a/polly/include/polly/ScopInfo.h +++ b/polly/include/polly/ScopInfo.h @@ -81,7 +81,7 @@ extern bool UseInstructionNames; // The maximal number of basic sets we allow during domain construction to // be created. More complex scops will result in very high compile time and // are also unlikely to result in good code. -extern int const MaxDisjunctsInDomain; +extern unsigned const MaxDisjunctsInDomain; /// The different memory kinds used in Polly. /// diff --git a/polly/include/polly/Support/ISLTools.h b/polly/include/polly/Support/ISLTools.h index c4e62ca312852..790f7b0025334 100644 --- a/polly/include/polly/Support/ISLTools.h +++ b/polly/include/polly/Support/ISLTools.h @@ -14,8 +14,32 @@ #ifndef POLLY_ISLTOOLS_H #define POLLY_ISLTOOLS_H +#include "llvm/ADT/Sequence.h" #include "llvm/ADT/iterator.h" #include "isl/isl-noexceptions.h" +#include + +/// In debug builds assert that the @p Size is valid, in non-debug builds +/// disable the mandatory state checking but do not enforce the error checking. +inline void islAssert(const isl::size &Size) { +#ifdef NDEBUG + // Calling is_error() marks that the error status has been checked which + // disables the error-status-not-checked errors that would otherwise occur + // when using the value. + (void)Size.is_error(); +#else + // Assert on error in debug builds. + assert(!Size.is_error()); +#endif +} + +/// Check that @p Size is valid (only on debug builds) and cast it to unsigned. +/// Cast the @p Size to unsigned. If the @p Size is not valid (Size.is_error() +/// == true) then an assert and an abort are triggered. +inline unsigned unsignedFromIslSize(const isl::size &Size) { + islAssert(Size); + return static_cast(Size); +} namespace isl { inline namespace noexceptions { @@ -160,7 +184,7 @@ isl::set singleton(isl::union_set USet, isl::space ExpectedSpace); /// The implementation currently returns the maximum number of dimensions it /// encounters, if different, and 0 if none is encountered. However, most other /// code will most likely fail if one of these happen. -isl_size getNumScatterDims(const isl::union_map &Schedule); +unsigned getNumScatterDims(const isl::union_map &Schedule); /// Return the scatter space of a @p Schedule. /// @@ -498,6 +522,13 @@ isl::set subtractParams(isl::set Set, isl::set Params); /// value. Otherwise, return NaN. isl::val getConstant(isl::pw_aff PwAff, bool Max, bool Min); +/// Check that @p End is valid and return an iterator from @p Begin to @p End +/// +/// Use case example: +/// for (unsigned i : rangeIslSize(0, Map.domain_tuple_dim())) +/// // do stuff +llvm::iota_range rangeIslSize(unsigned Begin, isl::size End); + /// Dump a description of the argument to llvm::errs(). /// /// In contrast to isl's dump function, there are a few differences: diff --git a/polly/lib/Analysis/DependenceInfo.cpp b/polly/lib/Analysis/DependenceInfo.cpp index 0ac7ff1a14c0b..b8798a1c761eb 100644 --- a/polly/lib/Analysis/DependenceInfo.cpp +++ b/polly/lib/Analysis/DependenceInfo.cpp @@ -190,7 +190,7 @@ static void collectInfo(Scop &S, isl_union_map *&Read, /// Fix all dimension of @p Zero to 0 and add it to @p user static void fixSetToZero(isl::set Zero, isl::union_set *User) { - for (auto i : seq(0, Zero.tuple_dim().release())) + for (auto i : rangeIslSize(0, Zero.tuple_dim())) Zero = Zero.fix_si(isl::dim::set, i, 0); *User = User->unite(Zero); } @@ -680,7 +680,7 @@ bool Dependences::isValidSchedule( Dependences = Dependences.apply_range(Schedule); isl::set Zero = isl::set::universe(ScheduleSpace); - for (auto i : seq(0, Zero.tuple_dim().release())) + for (auto i : rangeIslSize(0, Zero.tuple_dim())) Zero = Zero.fix_si(isl::dim::set, i, 0); isl::union_set UDeltas = Dependences.deltas(); diff --git a/polly/lib/Analysis/ScopBuilder.cpp b/polly/lib/Analysis/ScopBuilder.cpp index 2cb6f37441e8d..d06d5b72b71ce 100644 --- a/polly/lib/Analysis/ScopBuilder.cpp +++ b/polly/lib/Analysis/ScopBuilder.cpp @@ -73,7 +73,7 @@ bool polly::ModelReadOnlyScalars; // More complex access ranges will result in very high compile time and are also // unlikely to result in good code. This value is very high and should only // trigger for corner cases (e.g., the "dct_luma" function in h264, SPEC2006). -static int const MaxDimensionsInAccessRange = 9; +static unsigned const MaxDimensionsInAccessRange = 9; static cl::opt XModelReadOnlyScalars( "polly-analyze-read-only-scalars", @@ -108,7 +108,7 @@ static cl::opt RunTimeChecksMaxArraysPerGroup( cl::desc("The maximal number of arrays to compare in each alias group."), cl::Hidden, cl::ZeroOrMore, cl::init(20), cl::cat(PollyCategory)); -static cl::opt RunTimeChecksMaxAccessDisjuncts( +static cl::opt RunTimeChecksMaxAccessDisjuncts( "polly-rtc-max-array-disjuncts", cl::desc("The maximal number of disjunts allowed in memory accesses to " "to build RTCs."), @@ -198,8 +198,8 @@ static bool containsErrorBlock(RegionNode *RN, const Region &R, static isl::map createNextIterationMap(isl::space SetSpace, unsigned Dim) { isl::space MapSpace = SetSpace.map_from_set(); isl::map NextIterationMap = isl::map::universe(MapSpace); - for (auto u : seq(0, NextIterationMap.domain_tuple_dim().release())) - if (u != (isl_size)Dim) + for (unsigned u : rangeIslSize(0, NextIterationMap.domain_tuple_dim())) + if (u != Dim) NextIterationMap = NextIterationMap.equate(isl::dim::in, u, isl::dim::out, u); isl::constraint C = @@ -226,10 +226,10 @@ static isl::set collectBoundedParts(isl::set S) { /// both with regards to the dimension @p Dim. static std::pair partitionSetParts(isl::set S, unsigned Dim) { - for (unsigned u = 0, e = S.tuple_dim().release(); u < e; u++) + for (unsigned u : rangeIslSize(0, S.tuple_dim())) S = S.lower_bound_si(isl::dim::set, u, 0); - unsigned NumDimsS = S.tuple_dim().release(); + unsigned NumDimsS = unsignedFromIslSize(S.tuple_dim()); isl::set OnlyDimS = S; // Remove dimensions that are greater than Dim as they are not interesting. @@ -323,8 +323,8 @@ isl::set ScopBuilder::adjustDomainDimensions(isl::set Dom, Loop *OldL, Dom = Dom.add_dims(isl::dim::set, 1); } else { assert(OldDepth > NewDepth); - int Diff = OldDepth - NewDepth; - int NumDim = Dom.tuple_dim().release(); + unsigned Diff = OldDepth - NewDepth; + unsigned NumDim = unsignedFromIslSize(Dom.tuple_dim()); assert(NumDim >= Diff); Dom = Dom.project_out(isl::dim::set, NumDim - Diff, Diff); } @@ -540,13 +540,13 @@ bool ScopBuilder::buildConditionSets( isl_set *AlternativeCondSet = nullptr; bool TooComplex = - isl_set_n_basic_set(ConsequenceCondSet) >= MaxDisjunctsInDomain; + isl_set_n_basic_set(ConsequenceCondSet) >= (int)MaxDisjunctsInDomain; if (!TooComplex) { AlternativeCondSet = isl_set_subtract(isl_set_copy(Domain), isl_set_copy(ConsequenceCondSet)); TooComplex = - isl_set_n_basic_set(AlternativeCondSet) >= MaxDisjunctsInDomain; + isl_set_n_basic_set(AlternativeCondSet) >= (int)MaxDisjunctsInDomain; } if (TooComplex) { @@ -910,7 +910,7 @@ bool ScopBuilder::buildDomainsWithBranchConstraints( continue; isl::set Domain = scop->getDomainConditions(BB); - scop->updateMaxLoopDepth(Domain.tuple_dim().release()); + scop->updateMaxLoopDepth(unsignedFromIslSize(Domain.tuple_dim())); auto *BBLoop = getRegionNodeLoop(RN, LI); // Propagate the domain from BB directly to blocks that have a superset @@ -984,7 +984,7 @@ bool ScopBuilder::buildDomainsWithBranchConstraints( // Check if the maximal number of domain disjunctions was reached. // In case this happens we will clean up and bail. - if (SuccDomain.n_basic_set().release() < MaxDisjunctsInDomain) + if (unsignedFromIslSize(SuccDomain.n_basic_set()) < MaxDisjunctsInDomain) continue; scop->invalidate(COMPLEXITY, DebugLoc()); @@ -1064,7 +1064,8 @@ bool ScopBuilder::propagateInvalidStmtDomains( // Check if the maximal number of domain disjunctions was reached. // In case this happens we will bail. - if (SuccInvalidDomain.n_basic_set().release() < MaxDisjunctsInDomain) + if (unsignedFromIslSize(SuccInvalidDomain.n_basic_set()) < + MaxDisjunctsInDomain) continue; InvalidDomainMap.erase(BB); @@ -1155,15 +1156,15 @@ static isl::schedule combineInSequence(isl::schedule Prev, isl::schedule Succ) { // mapping. // @param N The dimension to map to. // @returns A mapping from USet to its N-th dimension. -static isl::multi_union_pw_aff mapToDimension(isl::union_set USet, int N) { - assert(N >= 0); +static isl::multi_union_pw_aff mapToDimension(isl::union_set USet, unsigned N) { assert(!USet.is_null()); assert(!USet.is_empty()); auto Result = isl::union_pw_multi_aff::empty(USet.get_space()); for (isl::set S : USet.get_set_list()) { - int Dim = S.tuple_dim().release(); + unsigned Dim = unsignedFromIslSize(S.tuple_dim()); + assert(Dim >= N); auto PMA = isl::pw_multi_aff::project_out_map(S.get_space(), isl::dim::set, N, Dim - N); if (N > 1) @@ -2212,8 +2213,8 @@ void ScopBuilder::foldSizeConstantsToRight() { isl::map Transform = isl::map::universe(Array->getSpace().map_from_set()); std::vector Int; - int Dims = Elements.tuple_dim().release(); - for (int i = 0; i < Dims; i++) { + unsigned Dims = unsignedFromIslSize(Elements.tuple_dim()); + for (unsigned i = 0; i < Dims; i++) { isl::set DimOnly = isl::set(Elements).project_out(isl::dim::set, 0, i); DimOnly = DimOnly.project_out(isl::dim::set, 1, Dims - i - 1); DimOnly = DimOnly.lower_bound_si(isl::dim::set, 0, 0); @@ -2226,7 +2227,7 @@ void ScopBuilder::foldSizeConstantsToRight() { continue; } - if (DimHull.dim(isl::dim::div).release() == 1) { + if (unsignedFromIslSize(DimHull.dim(isl::dim::div)) == 1) { isl::aff Diff = DimHull.get_div(0); isl::val Val = Diff.get_denominator_val(); @@ -2625,11 +2626,11 @@ void ScopBuilder::hoistInvariantLoads() { /// /// @returns True if the access range is too complex. static bool isAccessRangeTooComplex(isl::set AccessRange) { - int NumTotalDims = 0; + unsigned NumTotalDims = 0; for (isl::basic_set BSet : AccessRange.get_basic_set_list()) { - NumTotalDims += BSet.dim(isl::dim::div).release(); - NumTotalDims += BSet.dim(isl::dim::set).release(); + NumTotalDims += unsignedFromIslSize(BSet.dim(isl::dim::div)); + NumTotalDims += unsignedFromIslSize(BSet.dim(isl::dim::set)); } if (NumTotalDims > MaxDimensionsInAccessRange) @@ -2658,8 +2659,9 @@ void ScopBuilder::addUserContext() { isl::set UserContext = isl::set(scop->getIslCtx(), UserContextStr.c_str()); isl::space Space = scop->getParamSpace(); - if (Space.dim(isl::dim::param).release() != - UserContext.dim(isl::dim::param).release()) { + isl::size SpaceParams = Space.dim(isl::dim::param); + if (unsignedFromIslSize(SpaceParams) != + unsignedFromIslSize(UserContext.dim(isl::dim::param))) { std::string SpaceStr = stringFromIslObj(Space, "null"); errs() << "Error: the context provided in -polly-context has not the same " << "number of dimensions than the computed context. Due to this " @@ -2668,7 +2670,7 @@ void ScopBuilder::addUserContext() { return; } - for (auto i : seq(0, Space.dim(isl::dim::param).release())) { + for (auto i : rangeIslSize(0, SpaceParams)) { std::string NameContext = scop->getContext().get_dim_name(isl::dim::param, i); std::string NameUserContext = UserContext.get_dim_name(isl::dim::param, i); @@ -2752,7 +2754,8 @@ isl::set ScopBuilder::getNonHoistableCtx(MemoryAccess *Access, return WrittenCtx; WrittenCtx = WrittenCtx.remove_divs(); - bool TooComplex = WrittenCtx.n_basic_set().release() >= MaxDisjunctsInDomain; + bool TooComplex = + unsignedFromIslSize(WrittenCtx.n_basic_set()) >= MaxDisjunctsInDomain; if (TooComplex || !isRequiredInvariantLoad(LI)) return {}; @@ -2818,7 +2821,7 @@ void ScopBuilder::addInvariantLoads(ScopStmt &Stmt, isl::set DomainCtx = Stmt.getDomain().params(); DomainCtx = DomainCtx.subtract(StmtInvalidCtx); - if (DomainCtx.n_basic_set().release() >= MaxDisjunctsInDomain) { + if (unsignedFromIslSize(DomainCtx.n_basic_set()) >= MaxDisjunctsInDomain) { auto *AccInst = InvMAs.front().MA->getAccessInstruction(); scop->invalidate(COMPLEXITY, AccInst->getDebugLoc(), AccInst->getParent()); return; @@ -3094,7 +3097,7 @@ static bool buildMinMaxAccess(isl::set Set, Set = Set.remove_divs(); polly::simplify(Set); - if (Set.n_basic_set().release() > RunTimeChecksMaxAccessDisjuncts) + if (unsignedFromIslSize(Set.n_basic_set()) > RunTimeChecksMaxAccessDisjuncts) Set = Set.simple_hull(); // Restrict the number of parameters involved in the access as the lexmin/ @@ -3128,14 +3131,18 @@ static bool buildMinMaxAccess(isl::set Set, MinPMA = MinPMA.coalesce(); MaxPMA = MaxPMA.coalesce(); + if (MaxPMA.is_null()) + return false; + + unsigned MaxOutputSize = unsignedFromIslSize(MaxPMA.dim(isl::dim::out)); + // Adjust the last dimension of the maximal access by one as we want to // enclose the accessed memory region by MinPMA and MaxPMA. The pointer // we test during code generation might now point after the end of the // allocated array but we will never dereference it anyway. - assert((MaxPMA.is_null() || MaxPMA.dim(isl::dim::out).release()) && - "Assumed at least one output dimension"); + assert(MaxOutputSize >= 1 && "Assumed at least one output dimension"); - Pos = MaxPMA.dim(isl::dim::out).release() - 1; + Pos = MaxOutputSize - 1; LastDimAff = MaxPMA.at(Pos); OneAff = isl::aff(isl::local_space(LastDimAff.get_domain_space())); OneAff = OneAff.add_constant_si(1); @@ -3176,7 +3183,8 @@ bool ScopBuilder::calculateMinMaxAccess(AliasGroupTy AliasGroup, static isl::set getAccessDomain(MemoryAccess *MA) { isl::set Domain = MA->getStatement()->getDomain(); - Domain = Domain.project_out(isl::dim::set, 0, Domain.tuple_dim().release()); + Domain = Domain.project_out(isl::dim::set, 0, + unsignedFromIslSize(Domain.tuple_dim())); return Domain.reset_tuple_id(); } diff --git a/polly/lib/Analysis/ScopInfo.cpp b/polly/lib/Analysis/ScopInfo.cpp index 06b0aa3c3e8df..1115832a452dd 100644 --- a/polly/lib/Analysis/ScopInfo.cpp +++ b/polly/lib/Analysis/ScopInfo.cpp @@ -112,7 +112,7 @@ STATISTIC(NumSingletonWrites, "Number of singleton writes after ScopInfo"); STATISTIC(NumSingletonWritesInLoops, "Number of singleton writes nested in affine loops after ScopInfo"); -int const polly::MaxDisjunctsInDomain = 20; +unsigned const polly::MaxDisjunctsInDomain = 20; // The number of disjunct in the context after which we stop to add more // disjuncts. This parameter is there to avoid exponential growth in the @@ -443,9 +443,10 @@ void MemoryAccess::updateDimensionality() { isl::space AccessSpace = AccessRelation.get_space().range(); isl::ctx Ctx = ArraySpace.ctx(); - auto DimsArray = ArraySpace.dim(isl::dim::set).release(); - auto DimsAccess = AccessSpace.dim(isl::dim::set).release(); - auto DimsMissing = DimsArray - DimsAccess; + unsigned DimsArray = unsignedFromIslSize(ArraySpace.dim(isl::dim::set)); + unsigned DimsAccess = unsignedFromIslSize(AccessSpace.dim(isl::dim::set)); + assert(DimsArray >= DimsAccess); + unsigned DimsMissing = DimsArray - DimsAccess; auto *BB = getStatement()->getEntryBlock(); auto &DL = BB->getModule()->getDataLayout(); @@ -455,10 +456,10 @@ void MemoryAccess::updateDimensionality() { isl::map Map = isl::map::from_domain_and_range( isl::set::universe(AccessSpace), isl::set::universe(ArraySpace)); - for (auto i : seq(0, DimsMissing)) + for (auto i : seq(0, DimsMissing)) Map = Map.fix_si(isl::dim::out, i, 0); - for (auto i : seq(DimsMissing, DimsArray)) + for (auto i : seq(DimsMissing, DimsArray)) Map = Map.equate(isl::dim::in, i - DimsMissing, isl::dim::out, i); AccessRelation = AccessRelation.apply_range(Map); @@ -497,9 +498,10 @@ void MemoryAccess::updateDimensionality() { if (ElemBytes > ArrayElemSize) { assert(ElemBytes % ArrayElemSize == 0 && "Loaded element size should be multiple of canonical element size"); + assert(DimsArray >= 1); isl::map Map = isl::map::from_domain_and_range( isl::set::universe(ArraySpace), isl::set::universe(ArraySpace)); - for (auto i : seq(0, DimsArray - 1)) + for (auto i : seq(0, DimsArray - 1)) Map = Map.equate(isl::dim::in, i, isl::dim::out, i); isl::constraint C; @@ -1008,10 +1010,10 @@ bool MemoryAccess::isStrideX(isl::map Schedule, int StrideWidth) const { Stride = getStride(Schedule); StrideX = isl::set::universe(Stride.get_space()); - for (auto i : seq(0, StrideX.tuple_dim().release() - 1)) + int Size = unsignedFromIslSize(StrideX.tuple_dim()); + for (auto i : seq(0, Size - 1)) StrideX = StrideX.fix_si(isl::dim::set, i, 0); - StrideX = StrideX.fix_si(isl::dim::set, StrideX.tuple_dim().release() - 1, - StrideWidth); + StrideX = StrideX.fix_si(isl::dim::set, Size - 1, StrideWidth); IsStrideX = Stride.is_subset(StrideX); return IsStrideX; @@ -1070,9 +1072,9 @@ void MemoryAccess::setNewAccessRelation(isl::map NewAccess) { // Check whether access dimensions correspond to number of dimensions of the // accesses array. - isl_size Dims = SAI->getNumberOfDimensions(); - assert(NewAccessSpace.dim(isl::dim::set).release() == Dims && - "Access dims must match array dims"); + unsigned Dims = SAI->getNumberOfDimensions(); + unsigned SpaceSize = unsignedFromIslSize(NewAccessSpace.dim(isl::dim::set)); + assert(SpaceSize == Dims && "Access dims must match array dims"); #endif NewAccess = NewAccess.gist_params(getStatement()->getParent()->getContext()); diff --git a/polly/lib/CodeGen/BlockGenerators.cpp b/polly/lib/CodeGen/BlockGenerators.cpp index 9297a51ef23c2..e946c7ef960c2 100644 --- a/polly/lib/CodeGen/BlockGenerators.cpp +++ b/polly/lib/CodeGen/BlockGenerators.cpp @@ -17,6 +17,7 @@ #include "polly/CodeGen/RuntimeDebugBuilder.h" #include "polly/Options.h" #include "polly/ScopInfo.h" +#include "polly/Support/ISLTools.h" #include "polly/Support/ScopHelper.h" #include "polly/Support/VirtualInstruction.h" #include "llvm/Analysis/LoopInfo.h" @@ -688,8 +689,7 @@ void BlockGenerator::generateBeginStmtTrace(ScopStmt &Stmt, LoopToScevMapT <S, Values.push_back(RuntimeDebugBuilder::getPrintableString(Builder, "(")); // Add the coordinate of the statement instance. - int DomDims = ScheduleMultiPwAff.dim(isl::dim::out).release(); - for (int i = 0; i < DomDims; i += 1) { + for (unsigned i : rangeIslSize(0, ScheduleMultiPwAff.dim(isl::dim::out))) { if (i > 0) Values.push_back(RuntimeDebugBuilder::getPrintableString(Builder, ",")); diff --git a/polly/lib/CodeGen/PPCGCodeGeneration.cpp b/polly/lib/CodeGen/PPCGCodeGeneration.cpp index 3c56dee49c16f..a10a5312b60d7 100644 --- a/polly/lib/CodeGen/PPCGCodeGeneration.cpp +++ b/polly/lib/CodeGen/PPCGCodeGeneration.cpp @@ -22,6 +22,7 @@ #include "polly/Options.h" #include "polly/ScopDetection.h" #include "polly/ScopInfo.h" +#include "polly/Support/ISLTools.h" #include "polly/Support/SCEVValidator.h" #include "llvm/ADT/PostOrderIterator.h" #include "llvm/Analysis/TargetTransformInfo.h" @@ -1151,7 +1152,7 @@ Value *GPUNodeBuilder::getArrayOffset(gpu_array_info *Array) { isl::set ZeroSet = isl::set::universe(Min.get_space()); - for (long i = 0, n = Min.tuple_dim().release(); i < n; i++) + for (unsigned i : rangeIslSize(0, Min.tuple_dim())) ZeroSet = ZeroSet.fix_si(isl::dim::set, i, 0); if (Min.is_subset(ZeroSet)) { @@ -1160,7 +1161,7 @@ Value *GPUNodeBuilder::getArrayOffset(gpu_array_info *Array) { isl::ast_expr Result = isl::ast_expr::from_val(isl::val(Min.ctx(), 0)); - for (long i = 0, n = Min.tuple_dim().release(); i < n; i++) { + for (unsigned i : rangeIslSize(0, Min.tuple_dim())) { if (i > 0) { isl::pw_aff Bound_I = isl::manage(isl_multi_pw_aff_get_pw_aff(Array->bound, i - 1)); @@ -2885,8 +2886,10 @@ class PPCGCodeGeneration : public ScopPass { isl::pw_aff Val = isl::aff::var_on_domain(LS, isl::dim::set, 0); isl::pw_aff OuterMin = AccessSet.dim_min(0); isl::pw_aff OuterMax = AccessSet.dim_max(0); - OuterMin = OuterMin.add_dims(isl::dim::in, Val.dim(isl::dim::in).release()); - OuterMax = OuterMax.add_dims(isl::dim::in, Val.dim(isl::dim::in).release()); + OuterMin = OuterMin.add_dims(isl::dim::in, + unsignedFromIslSize(Val.dim(isl::dim::in))); + OuterMax = OuterMax.add_dims(isl::dim::in, + unsignedFromIslSize(Val.dim(isl::dim::in))); OuterMin = OuterMin.set_tuple_id(isl::dim::in, Array->getBasePtrId()); OuterMax = OuterMax.set_tuple_id(isl::dim::in, Array->getBasePtrId()); @@ -2910,7 +2913,8 @@ class PPCGCodeGeneration : public ScopPass { isl::pw_aff Val = isl::aff::var_on_domain( isl::local_space(Array->getSpace()), isl::dim::set, i); - PwAff = PwAff.add_dims(isl::dim::in, Val.dim(isl::dim::in).release()); + PwAff = PwAff.add_dims(isl::dim::in, + unsignedFromIslSize(Val.dim(isl::dim::in))); PwAff = PwAff.set_tuple_id(isl::dim::in, Val.get_tuple_id(isl::dim::in)); isl::set Set = PwAff.gt_set(Val); Extent = Set.intersect(Extent); diff --git a/polly/lib/Exchange/JSONExporter.cpp b/polly/lib/Exchange/JSONExporter.cpp index 4bff2e033bc72..507caa1d89729 100644 --- a/polly/lib/Exchange/JSONExporter.cpp +++ b/polly/lib/Exchange/JSONExporter.cpp @@ -16,6 +16,7 @@ #include "polly/Options.h" #include "polly/ScopInfo.h" #include "polly/ScopPass.h" +#include "polly/Support/ISLTools.h" #include "polly/Support/ScopLocation.h" #include "llvm/ADT/Statistic.h" #include "llvm/IR/Module.h" @@ -230,8 +231,8 @@ static bool importContext(Scop &S, const json::Object &JScop) { return false; } - unsigned OldContextDim = OldContext.dim(isl::dim::param).release(); - unsigned NewContextDim = NewContext.dim(isl::dim::param).release(); + unsigned OldContextDim = unsignedFromIslSize(OldContext.dim(isl::dim::param)); + unsigned NewContextDim = unsignedFromIslSize(NewContext.dim(isl::dim::param)); // Check if the imported context has the right number of parameters. if (OldContextDim != NewContextDim) { diff --git a/polly/lib/External/isl/include/isl/isl-noexceptions.h b/polly/lib/External/isl/include/isl/isl-noexceptions.h index 4e81211c77094..26d5f3337018e 100644 --- a/polly/lib/External/isl/include/isl/isl-noexceptions.h +++ b/polly/lib/External/isl/include/isl/isl-noexceptions.h @@ -198,7 +198,6 @@ enum class dim { }; } // namespace isl - #include #include #include diff --git a/polly/lib/Support/ISLTools.cpp b/polly/lib/Support/ISLTools.cpp index 1afd07e504dd6..4a40df2958416 100644 --- a/polly/lib/Support/ISLTools.cpp +++ b/polly/lib/Support/ISLTools.cpp @@ -56,8 +56,8 @@ isl::basic_map makeTupleSwapBasicMap(isl::space FromSpace1, assert(FromSpace1.is_set()); assert(FromSpace2.is_set()); - unsigned Dims1 = FromSpace1.dim(isl::dim::set).release(); - unsigned Dims2 = FromSpace2.dim(isl::dim::set).release(); + unsigned Dims1 = unsignedFromIslSize(FromSpace1.dim(isl::dim::set)); + unsigned Dims2 = unsignedFromIslSize(FromSpace2.dim(isl::dim::set)); isl::space FromSpace = FromSpace1.map_from_domain_and_range(FromSpace2).wrap(); @@ -160,13 +160,13 @@ isl::set polly::singleton(isl::union_set USet, isl::space ExpectedSpace) { return Result; } -isl_size polly::getNumScatterDims(const isl::union_map &Schedule) { - isl_size Dims = 0; +unsigned polly::getNumScatterDims(const isl::union_map &Schedule) { + unsigned Dims = 0; for (isl::map Map : Schedule.get_map_list()) { if (Map.is_null()) continue; - Dims = std::max(Dims, Map.range_tuple_dim().release()); + Dims = std::max(Dims, unsignedFromIslSize(Map.range_tuple_dim())); } return Dims; } @@ -214,7 +214,7 @@ isl::union_map polly::reverseDomain(const isl::union_map &UMap) { } isl::set polly::shiftDim(isl::set Set, int Pos, int Amount) { - int NumDims = Set.tuple_dim().release(); + unsigned NumDims = unsignedFromIslSize(Set.tuple_dim()); if (Pos < 0) Pos = NumDims + Pos; assert(Pos < NumDims && "Dimension index must be in range"); @@ -235,7 +235,7 @@ isl::union_set polly::shiftDim(isl::union_set USet, int Pos, int Amount) { } isl::map polly::shiftDim(isl::map Map, isl::dim Dim, int Pos, int Amount) { - int NumDims = Map.dim(Dim).release(); + unsigned NumDims = unsignedFromIslSize(Map.dim(Dim)); if (Pos < 0) Pos = NumDims + Pos; assert(Pos < NumDims && "Dimension index must be in range"); @@ -449,16 +449,16 @@ isl::map polly::distributeDomain(isl::map Map) { isl::space DomainSpace = Space.domain(); if (DomainSpace.is_null()) return {}; - unsigned DomainDims = DomainSpace.dim(isl::dim::set).release(); + unsigned DomainDims = unsignedFromIslSize(DomainSpace.dim(isl::dim::set)); isl::space RangeSpace = Space.range().unwrap(); isl::space Range1Space = RangeSpace.domain(); if (Range1Space.is_null()) return {}; - unsigned Range1Dims = Range1Space.dim(isl::dim::set).release(); + unsigned Range1Dims = unsignedFromIslSize(Range1Space.dim(isl::dim::set)); isl::space Range2Space = RangeSpace.range(); if (Range2Space.is_null()) return {}; - unsigned Range2Dims = Range2Space.dim(isl::dim::set).release(); + unsigned Range2Dims = unsignedFromIslSize(Range2Space.dim(isl::dim::set)); isl::space OutputSpace = DomainSpace.map_from_domain_and_range(Range1Space) @@ -582,6 +582,11 @@ isl::val polly::getConstant(isl::pw_aff PwAff, bool Max, bool Min) { return Result; } +llvm::iota_range polly::rangeIslSize(unsigned Begin, isl::size End) { + unsigned UEnd = unsignedFromIslSize(End); + return llvm::seq(std::min(Begin, UEnd), UEnd); +} + #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) static void foreachPoint(const isl::set &Set, const std::function &F) { @@ -606,17 +611,19 @@ static int flatCompare(const isl::basic_set &A, const isl::basic_set &B) { if (A.is_null() || B.is_null()) return 0; - unsigned ALen = A.dim(isl::dim::set).release(); - unsigned BLen = B.dim(isl::dim::set).release(); + unsigned ALen = unsignedFromIslSize(A.dim(isl::dim::set)); + unsigned BLen = unsignedFromIslSize(B.dim(isl::dim::set)); unsigned Len = std::min(ALen, BLen); for (unsigned i = 0; i < Len; i += 1) { isl::basic_set ADim = - A.project_out(isl::dim::param, 0, A.dim(isl::dim::param).release()) + A.project_out(isl::dim::param, 0, + unsignedFromIslSize(A.dim(isl::dim::param))) .project_out(isl::dim::set, i + 1, ALen - i - 1) .project_out(isl::dim::set, 0, i); isl::basic_set BDim = - B.project_out(isl::dim::param, 0, B.dim(isl::dim::param).release()) + B.project_out(isl::dim::param, 0, + unsignedFromIslSize(B.dim(isl::dim::param))) .project_out(isl::dim::set, i + 1, BLen - i - 1) .project_out(isl::dim::set, 0, i); @@ -687,8 +694,8 @@ static int structureCompare(const isl::space &ASpace, const isl::space &BSpace, return NameCompare; if (ConsiderTupleLen) { - int LenCompare = BSpace.dim(isl::dim::set).release() - - ASpace.dim(isl::dim::set).release(); + int LenCompare = (int)unsignedFromIslSize(BSpace.dim(isl::dim::set)) - + (int)unsignedFromIslSize(ASpace.dim(isl::dim::set)); if (LenCompare != 0) return LenCompare; } @@ -782,15 +789,17 @@ static void printSortedPolyhedra(isl::union_set USet, llvm::raw_ostream &OS, OS << "\n}\n"; } -static void recursiveExpand(isl::basic_set BSet, int Dim, isl::set &Expanded) { - int Dims = BSet.dim(isl::dim::set).release(); +static void recursiveExpand(isl::basic_set BSet, unsigned Dim, + isl::set &Expanded) { + unsigned Dims = unsignedFromIslSize(BSet.dim(isl::dim::set)); if (Dim >= Dims) { Expanded = Expanded.unite(BSet); return; } isl::basic_set DimOnly = - BSet.project_out(isl::dim::param, 0, BSet.dim(isl::dim::param).release()) + BSet.project_out(isl::dim::param, 0, + unsignedFromIslSize(BSet.dim(isl::dim::param))) .project_out(isl::dim::set, Dim + 1, Dims - Dim - 1) .project_out(isl::dim::set, 0, Dim); if (!DimOnly.is_bounded()) { diff --git a/polly/lib/Transform/FlattenAlgo.cpp b/polly/lib/Transform/FlattenAlgo.cpp index d9efe3fbfa844..f8ed332348ab1 100644 --- a/polly/lib/Transform/FlattenAlgo.cpp +++ b/polly/lib/Transform/FlattenAlgo.cpp @@ -26,10 +26,11 @@ namespace { /// i.e. there are two constants Min and Max, such that every value x of the /// chosen dimensions is Min <= x <= Max. bool isDimBoundedByConstant(isl::set Set, unsigned dim) { - auto ParamDims = Set.dim(isl::dim::param).release(); + auto ParamDims = unsignedFromIslSize(Set.dim(isl::dim::param)); Set = Set.project_out(isl::dim::param, 0, ParamDims); Set = Set.project_out(isl::dim::set, 0, dim); - auto SetDims = Set.tuple_dim().release(); + auto SetDims = unsignedFromIslSize(Set.tuple_dim()); + assert(SetDims >= 1); Set = Set.project_out(isl::dim::set, 1, SetDims - 1); return bool(Set.is_bounded()); } @@ -40,7 +41,8 @@ bool isDimBoundedByConstant(isl::set Set, unsigned dim) { /// Min_p <= x <= Max_p. bool isDimBoundedByParameter(isl::set Set, unsigned dim) { Set = Set.project_out(isl::dim::set, 0, dim); - auto SetDims = Set.tuple_dim().release(); + auto SetDims = unsignedFromIslSize(Set.tuple_dim()); + assert(SetDims >= 1); Set = Set.project_out(isl::dim::set, 1, SetDims - 1); return bool(Set.is_bounded()); } @@ -124,27 +126,12 @@ isl::union_map scheduleProjectOut(const isl::union_map &UMap, unsigned first, return Result; } -/// Return the number of dimensions in the input map's range. -/// -/// Because this function takes an isl_union_map, the out dimensions could be -/// different. We return the maximum number in this case. However, a different -/// number of dimensions is not supported by the other code in this file. -isl_size scheduleScatterDims(const isl::union_map &Schedule) { - isl_size Dims = 0; - for (isl::map Map : Schedule.get_map_list()) { - if (Map.is_null()) - continue; - - Dims = std::max(Dims, Map.range_tuple_dim().release()); - } - return Dims; -} - /// Return the @p pos' range dimension, converted to an isl_union_pw_aff. isl::union_pw_aff scheduleExtractDimAff(isl::union_map UMap, unsigned pos) { auto SingleUMap = isl::union_map::empty(UMap.ctx()); for (isl::map Map : UMap.get_map_list()) { - unsigned MapDims = Map.range_tuple_dim().release(); + unsigned MapDims = unsignedFromIslSize(Map.range_tuple_dim()); + assert(MapDims > pos); isl::map SingleMap = Map.project_out(isl::dim::out, 0, pos); SingleMap = SingleMap.project_out(isl::dim::out, 1, MapDims - pos - 1); SingleUMap = SingleUMap.unite(SingleMap); @@ -179,8 +166,8 @@ isl::union_map tryFlattenSequence(isl::union_map Schedule) { auto ScatterSet = isl::set(Schedule.range()); auto ParamSpace = Schedule.get_space().params(); - auto Dims = ScatterSet.tuple_dim().release(); - assert(Dims >= 2); + auto Dims = unsignedFromIslSize(ScatterSet.tuple_dim()); + assert(Dims >= 2u); // Would cause an infinite loop. if (!isDimBoundedByConstant(ScatterSet, 0)) { @@ -205,7 +192,8 @@ isl::union_map tryFlattenSequence(isl::union_map Schedule) { SubSchedule = scheduleProjectOut(SubSchedule, 0, 1); SubSchedule = flattenSchedule(SubSchedule); - auto SubDims = scheduleScatterDims(SubSchedule); + unsigned SubDims = getNumScatterDims(SubSchedule); + assert(SubDims >= 1); auto FirstSubSchedule = scheduleProjectOut(SubSchedule, 1, SubDims - 1); auto FirstScheduleAff = scheduleExtractDimAff(FirstSubSchedule, 0); auto RemainingSubSchedule = scheduleProjectOut(SubSchedule, 0, 1); @@ -264,14 +252,16 @@ isl::union_map tryFlattenSequence(isl::union_map Schedule) { /// largest value. Then, construct a new schedule /// { Stmt[i] -> [i * (u_X() - l_X() + 1), ...] } isl::union_map tryFlattenLoop(isl::union_map Schedule) { - assert(scheduleScatterDims(Schedule) >= 2); + assert(getNumScatterDims(Schedule) >= 2); auto Remaining = scheduleProjectOut(Schedule, 0, 1); auto SubSchedule = flattenSchedule(Remaining); - auto SubDims = scheduleScatterDims(SubSchedule); + unsigned SubDims = getNumScatterDims(SubSchedule); + + assert(SubDims >= 1); auto SubExtent = isl::set(SubSchedule.range()); - auto SubExtentDims = SubExtent.dim(isl::dim::param).release(); + auto SubExtentDims = unsignedFromIslSize(SubExtent.dim(isl::dim::param)); SubExtent = SubExtent.project_out(isl::dim::param, 0, SubExtentDims); SubExtent = SubExtent.project_out(isl::dim::set, 1, SubDims - 1); @@ -313,7 +303,7 @@ isl::union_map tryFlattenLoop(isl::union_map Schedule) { } // anonymous namespace isl::union_map polly::flattenSchedule(isl::union_map Schedule) { - auto Dims = scheduleScatterDims(Schedule); + unsigned Dims = getNumScatterDims(Schedule); LLVM_DEBUG(dbgs() << "Recursive schedule to process:\n " << Schedule << "\n"); diff --git a/polly/lib/Transform/MatmulOptimizer.cpp b/polly/lib/Transform/MatmulOptimizer.cpp index d2049d5a0938f..60dd9eda3c2c0 100644 --- a/polly/lib/Transform/MatmulOptimizer.cpp +++ b/polly/lib/Transform/MatmulOptimizer.cpp @@ -188,8 +188,8 @@ static isl::union_set getUnrollIsolatedSetOptions(isl::ctx Ctx) { /// @return The modified map. static isl::map permuteDimensions(isl::map Map, isl::dim DimType, unsigned DstPos, unsigned SrcPos) { - assert((isl_size)DstPos < Map.dim(DimType).release() && - (isl_size)SrcPos < Map.dim(DimType).release()); + assert(DstPos < unsignedFromIslSize(Map.dim(DimType)) && + SrcPos < unsignedFromIslSize(Map.dim(DimType))); if (DstPos == SrcPos) return Map; isl::id DimId; @@ -229,7 +229,7 @@ static bool isMatMulOperandAcc(isl::set Domain, isl::map AccMap, int &FirstPos, isl::space Space = AccMap.get_space(); isl::map Universe = isl::map::universe(Space); - if (Space.dim(isl::dim::out).release() != 2) + if (unsignedFromIslSize(Space.dim(isl::dim::out)) != 2) return false; // MatMul has the form: @@ -317,7 +317,7 @@ static bool containsOnlyMatrMultAcc(isl::map PartialSchedule, MatMulInfoTy &MMI) { auto InputDimId = PartialSchedule.get_tuple_id(isl::dim::in); auto *Stmt = static_cast(InputDimId.get_user()); - isl_size OutDimNum = PartialSchedule.range_tuple_dim().release(); + unsigned OutDimNum = unsignedFromIslSize(PartialSchedule.range_tuple_dim()); assert(OutDimNum > 2 && "In case of the matrix multiplication the loop nest " "and, consequently, the corresponding scheduling " "functions have at least three dimensions."); @@ -363,7 +363,7 @@ static bool containsOnlyMatMulDep(isl::map Schedule, const Dependences *D, auto DomainSpace = Schedule.get_space().domain(); auto Space = DomainSpace.map_from_domain_and_range(DomainSpace); auto Deltas = Dep.extract_map(Space).deltas(); - isl_size DeltasDimNum = Deltas.dim(isl::dim::set).release(); + int DeltasDimNum = unsignedFromIslSize(Deltas.dim(isl::dim::set)); for (int i = 0; i < DeltasDimNum; i++) { auto Val = Deltas.plain_get_val_if_fixed(isl::dim::set, i); Pos = Pos < 0 && Val.is_one() ? i : Pos; @@ -727,9 +727,10 @@ static isl::schedule_node optimizePackedB(isl::schedule_node Node, ScopStmt *CopyStmt = S->addScopStmt(AccRelB, AccRelPackedB, Domain); MMI.B->setNewAccessRelation(AccRelPackedB); + unsigned Dim = unsignedFromIslSize(MapOldIndVar.range_tuple_dim()); + assert(Dim >= 2); // Insert into the schedule tree. - isl::map ExtMap = MapOldIndVar.project_out( - isl::dim::out, 2, MapOldIndVar.range_tuple_dim().release() - 2); + isl::map ExtMap = MapOldIndVar.project_out(isl::dim::out, 2, Dim - 2); ExtMap = ExtMap.reverse(); ExtMap = ExtMap.fix_si(isl::dim::out, MMI.i, 0); ExtMap = ExtMap.intersect_range(Domain); @@ -870,9 +871,9 @@ getInductionVariablesSubstitution(isl::schedule_node Node, auto Child = Node.child(0); auto UnMapOldIndVar = Child.get_prefix_schedule_union_map(); auto MapOldIndVar = isl::map::from_union_map(UnMapOldIndVar); - if (MapOldIndVar.range_tuple_dim().release() > 9) - return MapOldIndVar.project_out( - isl::dim::out, 0, MapOldIndVar.range_tuple_dim().release() - 9); + unsigned Dim = unsignedFromIslSize(MapOldIndVar.range_tuple_dim()); + if (Dim > 9u) + return MapOldIndVar.project_out(isl::dim::out, 0, Dim - 9); return MapOldIndVar; } @@ -896,7 +897,8 @@ isolateAndUnrollMatMulInnerLoops(isl::schedule_node Node, isl::schedule_node Child = Node.child(0); isl::union_map UnMapOldIndVar = Child.get_prefix_schedule_relation(); isl::set Prefix = isl::map::from_union_map(UnMapOldIndVar).range(); - isl_size Dims = Prefix.tuple_dim().release(); + unsigned Dims = unsignedFromIslSize(Prefix.tuple_dim()); + assert(Dims >= 1); Prefix = Prefix.project_out(isl::dim::set, Dims - 1, 1); Prefix = getPartialTilePrefixes(Prefix, MicroKernelParams.Nr); Prefix = getPartialTilePrefixes(Prefix, MicroKernelParams.Mr); @@ -940,8 +942,8 @@ getBandNodeWithOriginDimOrder(isl::schedule_node Node) { auto Domain = Node.get_universe_domain(); assert(isl_union_set_n_set(Domain.get()) == 1); if (Node.get_schedule_depth().release() != 0 || - (isl::set(Domain).tuple_dim().release() != - isl_schedule_node_band_n_member(Node.get()))) + (unsignedFromIslSize(isl::set(Domain).tuple_dim()) != + unsignedFromIslSize(Node.as().n_member()))) return Node; Node = isl::manage(isl_schedule_node_delete(Node.copy())); auto PartialSchedulePwAff = Domain.identity_union_pw_multi_aff(); diff --git a/polly/lib/Transform/MaximalStaticExpansion.cpp b/polly/lib/Transform/MaximalStaticExpansion.cpp index 4a1665633f092..f0893b72db7aa 100644 --- a/polly/lib/Transform/MaximalStaticExpansion.cpp +++ b/polly/lib/Transform/MaximalStaticExpansion.cpp @@ -118,10 +118,11 @@ class MaximalStaticExpander : public ScopPass { /// i.e. there are two constants Min and Max, such that every value x of the /// chosen dimensions is Min <= x <= Max. static bool isDimBoundedByConstant(isl::set Set, unsigned dim) { - auto ParamDims = Set.dim(isl::dim::param).release(); + auto ParamDims = unsignedFromIslSize(Set.dim(isl::dim::param)); Set = Set.project_out(isl::dim::param, 0, ParamDims); Set = Set.project_out(isl::dim::set, 0, dim); - auto SetDims = Set.tuple_dim().release(); + auto SetDims = unsignedFromIslSize(Set.tuple_dim()); + assert(SetDims >= 1); Set = Set.project_out(isl::dim::set, 1, SetDims - 1); return bool(Set.is_bounded()); } @@ -350,7 +351,8 @@ ScopArrayInfo *MaximalStaticExpander::expandAccess(Scop &S, MemoryAccess *MA) { // Get the current AM. auto CurrentAccessMap = MA->getAccessRelation(); - unsigned in_dimensions = CurrentAccessMap.domain_tuple_dim().release(); + unsigned in_dimensions = + unsignedFromIslSize(CurrentAccessMap.domain_tuple_dim()); // Get domain from the current AM. auto Domain = CurrentAccessMap.domain(); @@ -404,8 +406,8 @@ ScopArrayInfo *MaximalStaticExpander::expandAccess(Scop &S, MemoryAccess *MA) { // Add constraints to linked output with input id. auto SpaceMap = NewAccessMap.get_space(); - auto ConstraintBasicMap = - isl::basic_map::equal(SpaceMap, SpaceMap.dim(isl::dim::in).release()); + auto ConstraintBasicMap = isl::basic_map::equal( + SpaceMap, unsignedFromIslSize(SpaceMap.dim(isl::dim::in))); NewAccessMap = isl::map(ConstraintBasicMap); // Set the new access relation map. diff --git a/polly/lib/Transform/ScheduleOptimizer.cpp b/polly/lib/Transform/ScheduleOptimizer.cpp index 02d468577387b..03878d5c8e4ba 100644 --- a/polly/lib/Transform/ScheduleOptimizer.cpp +++ b/polly/lib/Transform/ScheduleOptimizer.cpp @@ -53,6 +53,7 @@ #include "polly/Options.h" #include "polly/ScheduleTreeTransform.h" #include "polly/Support/ISLOStream.h" +#include "polly/Support/ISLTools.h" #include "llvm/ADT/Sequence.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" @@ -388,15 +389,15 @@ isl::schedule_node ScheduleTreeOptimizer::prevectSchedBand( assert(isl_schedule_node_get_type(Node.get()) == isl_schedule_node_band); auto Space = isl::manage(isl_schedule_node_band_get_space(Node.get())); - isl_size ScheduleDimensions = Space.dim(isl::dim::set).release(); - assert((isl_size)DimToVectorize < ScheduleDimensions); + unsigned ScheduleDimensions = unsignedFromIslSize(Space.dim(isl::dim::set)); + assert(DimToVectorize < ScheduleDimensions); if (DimToVectorize > 0) { Node = isl::manage( isl_schedule_node_band_split(Node.release(), DimToVectorize)); Node = Node.child(0); } - if ((isl_size)DimToVectorize < ScheduleDimensions - 1) + if (DimToVectorize < ScheduleDimensions - 1) Node = isl::manage(isl_schedule_node_band_split(Node.release(), 1)); Space = isl::manage(isl_schedule_node_band_get_space(Node.get())); auto Sizes = isl::multi_val::zero(Space); @@ -456,9 +457,8 @@ bool ScheduleTreeOptimizer::isTileableBandNode(isl::schedule_node Node) { return false; auto Space = isl::manage(isl_schedule_node_band_get_space(Node.get())); - auto Dims = Space.dim(isl::dim::set).release(); - if (Dims <= 1) + if (unsignedFromIslSize(Space.dim(isl::dim::set)) <= 1u) return false; return isSimpleInnermostBand(Node); @@ -490,7 +490,7 @@ ScheduleTreeOptimizer::applyTileBandOpt(isl::schedule_node Node) { isl::schedule_node ScheduleTreeOptimizer::applyPrevectBandOpt(isl::schedule_node Node) { auto Space = isl::manage(isl_schedule_node_band_get_space(Node.get())); - auto Dims = Space.dim(isl::dim::set).release(); + int Dims = unsignedFromIslSize(Space.dim(isl::dim::set)); for (int i = Dims - 1; i >= 0; i--) if (Node.as().member_get_coincident(i)) { diff --git a/polly/lib/Transform/ScheduleTreeTransform.cpp b/polly/lib/Transform/ScheduleTreeTransform.cpp index 0a35fc0064260..a2cb538021fbe 100644 --- a/polly/lib/Transform/ScheduleTreeTransform.cpp +++ b/polly/lib/Transform/ScheduleTreeTransform.cpp @@ -58,7 +58,7 @@ applyBandMemberAttributes(isl::schedule_node_band Target, int TargetIdx, template static isl::schedule rebuildBand(isl::schedule_node_band OldBand, isl::schedule Body, CbTy IncludeCb) { - int NumBandDims = OldBand.n_member().release(); + int NumBandDims = unsignedFromIslSize(OldBand.n_member()); bool ExcludeAny = false; bool IncludeAny = false; @@ -323,7 +323,7 @@ struct ExtensionNodeRewriter isl::union_map NewPartialSchedMap = isl::union_map::from(PartialSched); unsigned BandDims = isl_schedule_node_band_n_member(OldNode.get()); for (isl::map Ext : NewChildExtensions.get_map_list()) { - unsigned ExtDims = Ext.domain_tuple_dim().release(); + unsigned ExtDims = unsignedFromIslSize(Ext.domain_tuple_dim()); assert(ExtDims >= BandDims); unsigned OuterDims = ExtDims - BandDims; @@ -574,7 +574,8 @@ static isl::basic_set isDivisibleBySet(isl::ctx &Ctx, long Factor, /// @param Set A set, which should be modified. /// @param VectorWidth A parameter, which determines the constraint. static isl::set addExtentConstraints(isl::set Set, int VectorWidth) { - unsigned Dims = Set.tuple_dim().release(); + unsigned Dims = unsignedFromIslSize(Set.tuple_dim()); + assert(Dims >= 1); isl::space Space = Set.get_space(); isl::local_space LocalSpace = isl::local_space(Space); isl::constraint ExtConstr = isl::constraint::alloc_inequality(LocalSpace); @@ -602,7 +603,7 @@ class BandCollapseRewriter : public ScheduleTreeRewriter { // Do not merge permutable band to avoid loosing the permutability property. // Cannot collapse even two permutable loops, they might be permutable // individually, but not necassarily accross. - if (Band.n_member().release() > 1 && Band.permutable()) + if (unsignedFromIslSize(Band.n_member()) > 1u && Band.permutable()) return getBase().visitBand(Band); // Find collapsable bands. @@ -611,7 +612,7 @@ class BandCollapseRewriter : public ScheduleTreeRewriter { isl::schedule_node Body; while (true) { Nest.push_back(Band); - NumTotalLoops += Band.n_member().release(); + NumTotalLoops += unsignedFromIslSize(Band.n_member()); Body = Band.first_child(); if (!Body.isa()) break; @@ -619,7 +620,7 @@ class BandCollapseRewriter : public ScheduleTreeRewriter { // Do not include next band if it is permutable to not lose its // permutability property. - if (Band.n_member().release() > 1 && Band.permutable()) + if (unsignedFromIslSize(Band.n_member()) > 1u && Band.permutable()) break; } @@ -640,7 +641,7 @@ class BandCollapseRewriter : public ScheduleTreeRewriter { // Collect partial schedules from all members. isl::union_pw_aff_list PartScheds{Ctx, NumTotalLoops}; for (isl::schedule_node_band Band : Nest) { - int NumLoops = Band.n_member().release(); + int NumLoops = unsignedFromIslSize(Band.n_member()); isl::multi_union_pw_aff BandScheds = Band.get_partial_schedule(); for (auto j : seq(0, NumLoops)) PartScheds = PartScheds.add(BandScheds.at(j)); @@ -657,7 +658,7 @@ class BandCollapseRewriter : public ScheduleTreeRewriter { // Copy over loop attributes form original bands. int LoopIdx = 0; for (isl::schedule_node_band Band : Nest) { - int NumLoops = Band.n_member().release(); + int NumLoops = unsignedFromIslSize(Band.n_member()); for (int i : seq(0, NumLoops)) { CollapsedBand = applyBandMemberAttributes(std::move(CollapsedBand), LoopIdx, Band, i); @@ -713,7 +714,7 @@ static void collectPotentiallyFusableBands( /// everything that we already know is executed in-order. static isl::union_map remainingDepsFromPartialSchedule(isl::union_map PartSched, isl::union_map Deps) { - int NumDims = getNumScatterDims(PartSched); + unsigned NumDims = getNumScatterDims(PartSched); auto ParamSpace = PartSched.get_space().params(); // { Scatter[] } @@ -876,7 +877,8 @@ class GreedyFusionRewriter // { Domain[] -> Scatter[] } isl::union_map PartSched = isl::union_map::from(Band.get_partial_schedule()); - assert(getNumScatterDims(PartSched) == Band.n_member().release()); + assert(getNumScatterDims(PartSched) == + unsignedFromIslSize(Band.n_member())); isl::space ParamSpace = PartSched.get_space().params(); // { Scatter[] -> Domain[] } @@ -1030,7 +1032,7 @@ isl::schedule polly::applyFullUnroll(isl::schedule_node BandToUnroll) { isl::multi_union_pw_aff PartialSched = isl::manage( isl_schedule_node_band_get_partial_schedule(BandToUnroll.get())); - assert(PartialSched.dim(isl::dim::out).release() == 1 && + assert(unsignedFromIslSize(PartialSched.dim(isl::dim::out)) == 1u && "Can only unroll a single dimension"); isl::union_pw_aff PartialSchedUAff = PartialSched.at(0); @@ -1139,7 +1141,8 @@ isl::schedule polly::applyPartialUnroll(isl::schedule_node BandToUnroll, isl::set polly::getPartialTilePrefixes(isl::set ScheduleRange, int VectorWidth) { - isl_size Dims = ScheduleRange.tuple_dim().release(); + unsigned Dims = unsignedFromIslSize(ScheduleRange.tuple_dim()); + assert(Dims >= 1); isl::set LoopPrefixes = ScheduleRange.drop_constraints_involving_dims(isl::dim::set, Dims - 1, 1); auto ExtentPrefixes = addExtentConstraints(LoopPrefixes, VectorWidth); @@ -1150,8 +1153,8 @@ isl::set polly::getPartialTilePrefixes(isl::set ScheduleRange, } isl::union_set polly::getIsolateOptions(isl::set IsolateDomain, - isl_size OutDimsNum) { - isl_size Dims = IsolateDomain.tuple_dim().release(); + unsigned OutDimsNum) { + unsigned Dims = unsignedFromIslSize(IsolateDomain.tuple_dim()); assert(OutDimsNum <= Dims && "The isl::set IsolateDomain is used to describe the range of schedule " "dimensions values, which should be isolated. Consequently, the " @@ -1182,9 +1185,8 @@ isl::schedule_node polly::tileNode(isl::schedule_node Node, auto Dims = Space.dim(isl::dim::set); auto Sizes = isl::multi_val::zero(Space); std::string IdentifierString(Identifier); - for (auto i : seq(0, Dims.release())) { - auto tileSize = - i < (isl_size)TileSizes.size() ? TileSizes[i] : DefaultTileSize; + for (unsigned i : rangeIslSize(0, Dims)) { + unsigned tileSize = i < TileSizes.size() ? TileSizes[i] : DefaultTileSize; Sizes = Sizes.set_val(i, isl::val(Node.ctx(), tileSize)); } auto TileLoopMarkerStr = IdentifierString + " - Tiles"; diff --git a/polly/lib/Transform/ScopInliner.cpp b/polly/lib/Transform/ScopInliner.cpp index 5054b66cf6ae6..ed54731c6b2a3 100644 --- a/polly/lib/Transform/ScopInliner.cpp +++ b/polly/lib/Transform/ScopInliner.cpp @@ -68,9 +68,17 @@ class ScopInliner : public CallGraphSCCPass { } PassBuilder PB; + // Populate analysis managers and register Polly-specific analyses. + LoopAnalysisManager LAM; FunctionAnalysisManager FAM; + CGSCCAnalysisManager CGAM; + ModuleAnalysisManager MAM; FAM.registerPass([] { return ScopAnalysis(); }); + PB.registerModuleAnalyses(MAM); + PB.registerCGSCCAnalyses(CGAM); PB.registerFunctionAnalyses(FAM); + PB.registerLoopAnalyses(LAM); + PB.crossRegisterProxies(LAM, FAM, CGAM, MAM); RegionInfo &RI = FAM.getResult(*F); ScopDetection &SD = FAM.getResult(*F); @@ -84,9 +92,6 @@ class ScopInliner : public CallGraphSCCPass { << " has scop as top level region"); F->addFnAttr(llvm::Attribute::AlwaysInline); - ModuleAnalysisManager MAM; - PB.registerModuleAnalyses(MAM); - MAM.registerPass([&] { return FunctionAnalysisManagerModuleProxy(FAM); }); ModulePassManager MPM; MPM.addPass(AlwaysInlinerPass()); Module *M = F->getParent(); diff --git a/polly/lib/Transform/Simplify.cpp b/polly/lib/Transform/Simplify.cpp index d839289bdcb67..2f5788becfc64 100644 --- a/polly/lib/Transform/Simplify.cpp +++ b/polly/lib/Transform/Simplify.cpp @@ -37,7 +37,7 @@ namespace { /// that the analysis of accesses in a statement is becoming too complex. Chosen /// to be relatively small because all the common cases should access only few /// array elements per statement. -static int const SimplifyMaxDisjuncts = 4; +static unsigned const SimplifyMaxDisjuncts = 4; TWO_STATISTICS(ScopsProcessed, "Number of SCoPs processed"); TWO_STATISTICS(ScopsModified, "Number of SCoPs simplified"); @@ -95,18 +95,19 @@ static isl::union_map underapproximatedAddMap(isl::union_map UMap, // Fast path: If known that we cannot exceed the disjunct limit, just add // them. - if (isl_map_n_basic_map(PrevMap.get()) + isl_map_n_basic_map(Map.get()) <= + if (unsignedFromIslSize(PrevMap.n_basic_map()) + + unsignedFromIslSize(Map.n_basic_map()) <= SimplifyMaxDisjuncts) return UMap.unite(Map); isl::map Result = isl::map::empty(PrevMap.get_space()); for (isl::basic_map BMap : PrevMap.get_basic_map_list()) { - if (Result.n_basic_map().release() > SimplifyMaxDisjuncts) + if (unsignedFromIslSize(Result.n_basic_map()) > SimplifyMaxDisjuncts) break; Result = Result.unite(BMap); } for (isl::basic_map BMap : Map.get_basic_map_list()) { - if (isl_map_n_basic_map(Result.get()) > SimplifyMaxDisjuncts) + if (unsignedFromIslSize(Result.n_basic_map()) > SimplifyMaxDisjuncts) break; Result = Result.unite(BMap); } diff --git a/polly/lib/Transform/ZoneAlgo.cpp b/polly/lib/Transform/ZoneAlgo.cpp index 1aec6708ef220..4c86891d2cf7d 100644 --- a/polly/lib/Transform/ZoneAlgo.cpp +++ b/polly/lib/Transform/ZoneAlgo.cpp @@ -686,12 +686,11 @@ isl::map ZoneAlgorithm::getDefToTarget(ScopStmt *DefStmt, TargetStmt->getSurroundingLoop())) { isl::set DefDomain = getDomainFor(DefStmt); isl::set TargetDomain = getDomainFor(TargetStmt); - assert(DefDomain.tuple_dim().release() <= - TargetDomain.tuple_dim().release()); + assert(unsignedFromIslSize(DefDomain.tuple_dim()) <= + unsignedFromIslSize(TargetDomain.tuple_dim())); Result = isl::map::from_domain_and_range(DefDomain, TargetDomain); - for (unsigned i = 0, DefDims = DefDomain.tuple_dim().release(); i < DefDims; - i += 1) + for (unsigned i : rangeIslSize(0, DefDomain.tuple_dim())) Result = Result.equate(isl::dim::in, i, isl::dim::out, i); } diff --git a/polly/unittests/Isl/IslTest.cpp b/polly/unittests/Isl/IslTest.cpp index 42730af278dd2..d3c64407d5da9 100644 --- a/polly/unittests/Isl/IslTest.cpp +++ b/polly/unittests/Isl/IslTest.cpp @@ -644,16 +644,16 @@ TEST(ISLTools, getNumScatterDims) { &isl_ctx_free); // Basic usage - EXPECT_EQ(0, getNumScatterDims(UMAP("{ [] -> [] }"))); - EXPECT_EQ(1, getNumScatterDims(UMAP("{ [] -> [i] }"))); - EXPECT_EQ(2, getNumScatterDims(UMAP("{ [] -> [i,j] }"))); - EXPECT_EQ(3, getNumScatterDims(UMAP("{ [] -> [i,j,k] }"))); + EXPECT_EQ(0u, getNumScatterDims(UMAP("{ [] -> [] }"))); + EXPECT_EQ(1u, getNumScatterDims(UMAP("{ [] -> [i] }"))); + EXPECT_EQ(2u, getNumScatterDims(UMAP("{ [] -> [i,j] }"))); + EXPECT_EQ(3u, getNumScatterDims(UMAP("{ [] -> [i,j,k] }"))); // Different scatter spaces - EXPECT_EQ(0, getNumScatterDims(UMAP("{ A[] -> []; [] -> []}"))); - EXPECT_EQ(1, getNumScatterDims(UMAP("{ A[] -> []; [] -> [i] }"))); - EXPECT_EQ(2, getNumScatterDims(UMAP("{ A[] -> [i]; [] -> [i,j] }"))); - EXPECT_EQ(3, getNumScatterDims(UMAP("{ A[] -> [i]; [] -> [i,j,k] }"))); + EXPECT_EQ(0u, getNumScatterDims(UMAP("{ A[] -> []; [] -> []}"))); + EXPECT_EQ(1u, getNumScatterDims(UMAP("{ A[] -> []; [] -> [i] }"))); + EXPECT_EQ(2u, getNumScatterDims(UMAP("{ A[] -> [i]; [] -> [i,j] }"))); + EXPECT_EQ(3u, getNumScatterDims(UMAP("{ A[] -> [i]; [] -> [i,j,k] }"))); } TEST(ISLTools, getScatterSpace) { diff --git a/utils/bazel/.bazelrc b/utils/bazel/.bazelrc index ed2a41dfb46f6..0ad68148b8b36 100644 --- a/utils/bazel/.bazelrc +++ b/utils/bazel/.bazelrc @@ -72,8 +72,16 @@ build:generic_gcc --copt=-Werror --host_copt=-Werror # Generic Windows flags common to both MSVC and Clang. ############################################################################### -# Yay for security warnings. Boo for non-standard. -build:windows --copt=/D_CRT_SECURE_NO_WARNINGS --host_copt=/D_CRT_SECURE_NO_WARNINGS +# C++14 standard version is required. +build:windows --cxxopt=/std:c++14 --host_cxxopt=/std:c++14 + +# Other generic dialect flags. +build:windows --copt=/Zc:strictStrings --host_copt=/Zc:strictStrings +build:windows --copt=/Oi --host_copt=/Oi +build:windows --cxxopt=/Zc:rvalueCast --host_cxxopt=/Zc:rvalueCast + +# Use the more flexible bigobj format for C++ files that have lots of symbols. +build:windows --cxxopt=/bigobj --host_cxxopt=/bigobj ############################################################################### # Windows specific flags for building with MSVC. @@ -107,9 +115,6 @@ build:clang-cl --config=windows # Switch from MSVC to the `clang-cl` compiler. build:clang-cl --compiler=clang-cl -# C++14 standard version is required. -build:clang-cl --cxxopt=/std:c++14 --host_cxxopt=/std:c++14 - # Use Clang's internal warning flags instead of the ones that sometimes map # through to MSVC's flags. build:clang-cl --copt=/clang:-Wall --host_copt=/clang:-Wall @@ -121,6 +126,10 @@ build:clang-cl --copt=/clang:-Wno-unused --host_copt=/clang:-Wno-unused # There appears to be an unused constant in GoogleTest on Windows. build:clang-cl --copt=/clang:-Wno-unused-const-variable --host_copt=/clang:-Wno-unused-const-variable +# Disable some warnings hit even with `clang-cl` in Clang's own code. +build:clang-cl --copt=/clang:-Wno-inconsistent-dllimport --host_copt=/clang:-Wno-inconsistent-dllimport +build:clang-cl --cxxopt=/clang:-Wno-c++11-narrowing --host_cxxopt=/clang:-Wno-c++11-narrowing + ############################################################################### ############################################################################### @@ -149,7 +158,7 @@ build:rbe --action_env=BAZEL_DO_NOT_DETECT_CPP_TOOLCHAIN=1 # Platform flags: # The toolchain container used for execution is defined in the target indicated # by "extra_execution_platforms", "host_platform" and "platforms". -# More about platforms: https://docs.bazel.build/versions/master/platforms.html +# More about platforms: https://docs.bazel.build/versions/main/platforms.html build:rbe --extra_toolchains=@rbe_default//config:cc-toolchain build:rbe --extra_execution_platforms=@rbe_default//config:platform build:rbe --host_platform=@rbe_default//config:platform diff --git a/utils/bazel/README.md b/utils/bazel/README.md index 3521f897ca88e..fe8ead32f7d90 100644 --- a/utils/bazel/README.md +++ b/utils/bazel/README.md @@ -28,7 +28,7 @@ for adding this configuration. you don't have a checkout yet. 2. Install Bazel at the version indicated by [.bazelversion](./.bazelversion), following the official instructions, if you don't have it installed yet: - https://docs.bazel.build/versions/master/install.html. + https://docs.bazel.build/versions/main/install.html. 3. `cd utils/bazel` 4. `bazel build --config=generic_clang @llvm-project//...` (if building on Unix with Clang). `--config=generic_gcc` and `--config=msvc` are also available. @@ -45,7 +45,7 @@ build --config=generic_clang ``` You can enable -[disk caching](https://docs.bazel.build/versions/master/remote-caching.html#disk-cache), +[disk caching](https://docs.bazel.build/versions/main/remote-caching.html#disk-cache), which will cache build results ```.bazelrc @@ -53,7 +53,7 @@ build --disk_cache=~/.cache/bazel-disk-cache ``` You can instruct Bazel to use a ramdisk for its sandboxing operations via -[--sandbox_base](https://docs.bazel.build/versions/master/command-line-reference.html#flag--sandbox_base), +[--sandbox_base](https://docs.bazel.build/versions/main/command-line-reference.html#flag--sandbox_base), which can help avoid IO bottlenecks for the symlink stragegy used for sandboxing. This is especially important with many inputs and many cores (see https://github.com/bazelbuild/bazel/issues/11868): diff --git a/utils/bazel/llvm-project-overlay/clang/BUILD.bazel b/utils/bazel/llvm-project-overlay/clang/BUILD.bazel index 488c23ba8fb62..fcd1b598f2499 100644 --- a/utils/bazel/llvm-project-overlay/clang/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/clang/BUILD.bazel @@ -691,14 +691,25 @@ cc_library( "lib/AST/*.h", "lib/AST/Interp/*.cpp", "lib/AST/Interp/*.h", - ]), + ]) + [ + "lib/AST/AttrDocTable.inc", + "lib/AST/Interp/Opcodes.inc", + ], hdrs = glob([ "include/clang/AST/*.h", ]), - includes = [ - "include", - "lib/AST", - "lib/AST/Interp", + copts = [ + # FIXME: This is necessary to allow "file relative" include paths from + # non-generated `srcs` to find generated `srcs` above. Bazel should + # either make this work automatically by creating a unified tree of + # `srcs` or at least provide a `local_includes` that has the path + # translation logic of `includes` but is only used locally (similar to + # `local_defines` vs. `defines`). Until one of those lands, this is the + # least bad approach. Using `includes` is *specifically* problematic for + # this library because it contains files that collide easily with system + # headers such as `CXXABI.h`. + "-I$(GENDIR)/external/llvm-project/clang/lib/AST", + "-I$(GENDIR)/external/llvm-project/clang/lib/AST/Interp", ], textual_hdrs = [ "include/clang/AST/AttrImpl.inc", @@ -715,8 +726,6 @@ cc_library( "include/clang/AST/DeclNodes.inc", "include/clang/AST/StmtDataCollectors.inc", "include/clang/AST/StmtNodes.inc", - "lib/AST/AttrDocTable.inc", - "lib/AST/Interp/Opcodes.inc", ] + glob([ "include/clang/AST/*.def", ]), @@ -1313,6 +1322,10 @@ cc_library( # directly #including "Tools.h". "lib/Driver", ], + linkopts = select({ + "@bazel_tools//src/conditions:windows": ["version.lib"], + "//conditions:default": [], + }), textual_hdrs = glob([ "include/clang/Driver/*.def", ]), @@ -1732,12 +1745,13 @@ cc_library( ) cc_library( - name = "libclang_library", + name = "libclang_static", srcs = glob([ "tools/libclang/*.cpp", "tools/libclang/*.h", ]), hdrs = glob(["include/clang-c/*.h"]), + defines = ["CINDEX_NO_EXPORTS"], deps = [ ":arc_migrate", ":ast", @@ -1758,18 +1772,36 @@ cc_library( ], ) -cc_library( - name = "c-bindings", +cc_plugin_library( + name = "libclang", + srcs = glob([ + "tools/libclang/*.cpp", + "tools/libclang/*.h", + ]), hdrs = glob(["include/clang-c/*.h"]), + copts = select({ + "@bazel_tools//src/conditions:windows": ["-D_CINDEX_LIB_"], + "//conditions:default": [], + }), + strip_include_prefix = "include", deps = [ - ":libclang_library", + ":arc_migrate", + ":ast", + ":basic", + ":codegen", + ":config", + ":driver", + ":frontend", + ":index", + ":lex", + ":rewrite", + ":sema", + ":tooling", + "//llvm:BitstreamReader", + "//llvm:FrontendOpenMP", + "//llvm:Support", + "//llvm:config", ], - alwayslink = 1, -) - -cc_plugin_library( - name = "libclang", - deps = [":c-bindings"], ) filegroup( @@ -1802,14 +1834,12 @@ cc_binary( deps = [ ":ast", ":basic", - ":c-bindings", ":codegen", ":config", ":frontend", ":index", ":lex", - ":parse", - ":sema", + ":libclang", ":serialization", "//llvm:Core", "//llvm:MC", @@ -1837,11 +1867,14 @@ cc_binary( name = "c-arcmt-test", testonly = 1, srcs = ["tools/c-arcmt-test/c-arcmt-test.c"], - copts = ["-std=gnu99"], + copts = select({ + "@bazel_tools//src/conditions:windows": [], + "//conditions:default": ["-std=gnu99"], + }), stamp = 0, deps = [ - ":c-bindings", ":codegen", + ":libclang", "//llvm:MC", "//llvm:Support", ], diff --git a/utils/bazel/llvm-project-overlay/clang/include/clang/Config/config.h b/utils/bazel/llvm-project-overlay/clang/include/clang/Config/config.h index b46028078c536..4d5cd0520a0c4 100644 --- a/utils/bazel/llvm-project-overlay/clang/include/clang/Config/config.h +++ b/utils/bazel/llvm-project-overlay/clang/include/clang/Config/config.h @@ -75,7 +75,7 @@ /* #undef CLANG_HAVE_LIBXML */ /* Define if we have sys/resource.h (rlimits) */ -#define CLANG_HAVE_RLIMITS 1 +/* CLANG_HAVE_RLIMITS defined conditionally below */ /* The LLVM product name and version */ #define BACKEND_PACKAGE_STRING "LLVM 12.0.0git" @@ -100,4 +100,14 @@ /* Spawn a new process clang.exe for the CC1 tool invocation, when necessary */ #define CLANG_SPAWN_CC1 0 +/* Directly provide definitions here behind platform preprocessor definitions. + * The preprocessor conditions are sufficient to handle all of the configuration + * on platforms targeted by Bazel, and defining these here more faithfully + * matches how the users of this header expect things to work with CMake. + */ + +#ifndef _WIN32 +#define CLANG_HAVE_RLIMITS 1 +#endif + #endif diff --git a/utils/bazel/llvm-project-overlay/clang/unittests/BUILD.bazel b/utils/bazel/llvm-project-overlay/clang/unittests/BUILD.bazel index 8aef95625edb2..108bd6db9c12d 100644 --- a/utils/bazel/llvm-project-overlay/clang/unittests/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/clang/unittests/BUILD.bazel @@ -480,8 +480,16 @@ cc_test( ) + [ "libclang/TestUtils.h", ], + args = select({ + "@bazel_tools//src/conditions:windows": [ + # Need to disable the VFS tests that don't use Windows friendly + # paths. These are also disabled on Windows in the CMake build. + "--gtest_filter=-*VirtualFileOverlay*", + ], + "//conditions:default": [], + }), deps = [ - "//clang:c-bindings", + "//clang:libclang", "//llvm:Support", "//llvm:gtest", "//llvm:gtest_main", diff --git a/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel b/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel index 39751be589b69..6546272ed8bee 100644 --- a/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel @@ -3576,6 +3576,7 @@ cc_binary( includes = ["tools/llvm-reduce"], stamp = 0, deps = [ + ":AllTargetsAsmParsers", ":AllTargetsCodeGens", ":BitReader", ":Core", diff --git a/utils/bazel/llvm-project-overlay/llvm/cc_plugin_library.bzl b/utils/bazel/llvm-project-overlay/llvm/cc_plugin_library.bzl index b3206652d6585..2ebd39c630dcb 100644 --- a/utils/bazel/llvm-project-overlay/llvm/cc_plugin_library.bzl +++ b/utils/bazel/llvm-project-overlay/llvm/cc_plugin_library.bzl @@ -4,51 +4,72 @@ """A macro to produce a loadable plugin library for the target OS. -This macro produces a `cc_binary` rule with the name `name + "_impl"`. It -forces the rule to statically link in its dependencies but to be linked as a -shared "plugin" library. It then creates binary aliases to `.so`, `.dylib` -,and `.dll` suffixed names for use on various platforms and selects between -these into a filegroup with the exact name passed to the macro. +This macro produces a set of platform-specific `cc_binary` rules, by appending +the platform suffix (`.dll`, `.dylib`, or `.so`) to the provided `name`. It then +connects these to a `cc_import` rule with `name` exactly and `hdrs` that can be +used by other Bazel rules to depend on the plugin library. + +The `srcs` attribute for the `cc_binary` rules is `srcs + hdrs`. Other explicit +arguments are passed to all of the rules where they apply, and can be used to +configure generic aspects of all generated rules such as `testonly`. Lastly, +`kwargs` is expanded into all the `cc_binary` rules. """ -load("@rules_cc//cc:defs.bzl", "cc_binary") -load(":binary_alias.bzl", "binary_alias") +load("@rules_cc//cc:defs.bzl", "cc_binary", "cc_import", "cc_library") -def cc_plugin_library(name, **kwargs): +def cc_plugin_library(name, srcs, hdrs, include_prefix = None, strip_include_prefix = None, alwayslink = False, features = [], tags = [], testonly = False, **kwargs): # Neither the name of the plugin binary nor tags on whether it is built are - # configurable. Instead, we build a `cc_binary` that implements the plugin - # library using a `_impl` suffix. Bazel will use appropriate flags to cause - # this file to be a plugin library regardless of its name. We then create - # binary aliases in the different possible platform names, and select - # between these different names into a filegroup. The macro's name becomes - # the filegroup name and it contains exactly one target that is the target - # platform suffixed plugin library. + # configurable. Instead, we build a `cc_binary` with each name and + # selectively depend on them based on platform. # # All-in-all, this is a pretty poor workaround. I think this is part of the # Bazel issue: https://github.com/bazelbuild/bazel/issues/7538 - cc_binary( - name = name + "_impl", - linkshared = True, - linkstatic = True, - **kwargs - ) - binary_alias( - name = name + ".so", - binary = ":" + name + "_impl", - ) - binary_alias( - name = name + ".dll", - binary = ":" + name + "_impl", - ) - binary_alias( - name = name + ".dylib", - binary = ":" + name + "_impl", - ) + so_name = name + ".so" + dll_name = name + ".dll" + dylib_name = name + ".dylib" + interface_output_name = name + "_interface_output" + import_name = name + "_import" + for impl_name in [dll_name, dylib_name, so_name]: + cc_binary( + name = impl_name, + srcs = srcs + hdrs, + linkshared = True, + linkstatic = True, + features = features, + tags = ["manual"] + tags, + testonly = testonly, + **kwargs + ) native.filegroup( - name = name, + name = interface_output_name, srcs = select({ - "@bazel_tools//src/conditions:windows": [":" + name + ".dll"], - "@bazel_tools//src/conditions:darwin": [":" + name + ".dylib"], - "//conditions:default": [":" + name + ".so"], + "@bazel_tools//src/conditions:windows": [":" + dll_name], + "@bazel_tools//src/conditions:darwin": [":" + dylib_name], + "//conditions:default": [":" + so_name], + }), + output_group = "interface_library", + ) + cc_import( + name = import_name, + interface_library = ":" + interface_output_name, + shared_library = select({ + "@bazel_tools//src/conditions:windows": ":" + dll_name, + "@bazel_tools//src/conditions:darwin": ":" + dylib_name, + "//conditions:default": ":" + so_name, }), + alwayslink = alwayslink, + features = features, + tags = tags, + testonly = testonly, + ) + cc_library( + name = name, + hdrs = hdrs, + include_prefix = include_prefix, + strip_include_prefix = strip_include_prefix, + deps = [":" + import_name], + alwayslink = alwayslink, + features = features, + tags = tags, + testonly = testonly, ) diff --git a/utils/bazel/llvm-project-overlay/llvm/config.bzl b/utils/bazel/llvm-project-overlay/llvm/config.bzl index f4e4bdeaf3c96..cad0c4d84416b 100644 --- a/utils/bazel/llvm-project-overlay/llvm/config.bzl +++ b/utils/bazel/llvm-project-overlay/llvm/config.bzl @@ -36,6 +36,7 @@ posix_defines = [ "HAVE_STRERROR_R=1", "HAVE_SYSEXITS_H=1", "HAVE_UNISTD_H=1", + "LLVM_WINDOWS_PREFER_FORWARD_SLASH=0", ] linux_defines = posix_defines + [ @@ -57,13 +58,20 @@ macos_defines = posix_defines + [ ] win32_defines = [ - # MSVC specific - "stricmp=_stricmp", - "strdup=_strdup", + # Windows system library specific defines. + "_CRT_SECURE_NO_DEPRECATE", + "_CRT_SECURE_NO_WARNINGS", + "_CRT_NONSTDC_NO_DEPRECATE", + "_CRT_NONSTDC_NO_WARNINGS", + "_SCL_SECURE_NO_DEPRECATE", + "_SCL_SECURE_NO_WARNINGS", + "UNICODE", + "_UNICODE", # LLVM features r'LTDL_SHLIB_EXT=\".dll\"', r'LLVM_PLUGIN_EXT=\".dll\"', + "LLVM_WINDOWS_PREFER_FORWARD_SLASH=1", ] # TODO: We should switch to platforms-based config settings to make this easier diff --git a/utils/bazel/llvm-project-overlay/llvm/include/llvm/Config/config.h b/utils/bazel/llvm-project-overlay/llvm/include/llvm/Config/config.h index 1f640e91cc561..8baab15cc0c9e 100644 --- a/utils/bazel/llvm-project-overlay/llvm/include/llvm/Config/config.h +++ b/utils/bazel/llvm-project-overlay/llvm/include/llvm/Config/config.h @@ -352,10 +352,10 @@ #define HAVE_STD_IS_TRIVIALLY_COPYABLE 1 /* Define to a function implementing stricmp */ -/* stricmp defined in Bazel */ +/* stricmp defined conditionally below. */ /* Define to a function implementing strdup */ -/* strdup defined in Bazel */ +/* strdup defined conditionally below. */ /* Whether GlobalISel rule coverage is being collected */ #define LLVM_GISEL_COV_ENABLED 0 @@ -368,4 +368,17 @@ /* HAVE_PROC_PID_RUSAGE defined in Bazel */ +/* Directly provide definitions here behind platform preprocessor definitions. + * The preprocessor conditions are sufficient to handle all of the configuration + * on platforms targeted by Bazel, and defining these here more faithfully + * matches how the users of this header expect things to work with CMake. + * FIXME: We should consider moving other platform defines to use this technique + * as well. + */ + +#ifdef _WIN32 +#define stricmp _stricmp +#define strdup _strdup +#endif + #endif diff --git a/utils/bazel/llvm-project-overlay/llvm/unittests/BUILD.bazel b/utils/bazel/llvm-project-overlay/llvm/unittests/BUILD.bazel index dcac33b0311e2..f1fa27bcb5e97 100644 --- a/utils/bazel/llvm-project-overlay/llvm/unittests/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/llvm/unittests/BUILD.bazel @@ -286,7 +286,8 @@ cc_test( cc_test( name = "ir_tests", - size = "medium", # ConstantRangeTest cases may take several seconds each. + size = "medium", + timeout = "long", # ConstantRangeTest cases may take several seconds each. srcs = glob( [ "IR/*.cpp", diff --git a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel index 39c258ade3fe2..6aa22bf533b2d 100644 --- a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel @@ -1707,7 +1707,10 @@ cc_library( cc_library( name = "SparseTensorTransforms", srcs = glob(["lib/Dialect/SparseTensor/Transforms/*.cpp"]), - hdrs = ["include/mlir/Dialect/SparseTensor/Transforms/Passes.h"], + hdrs = [ + "include/mlir/Dialect/SparseTensor/Transforms/Passes.h", + "include/mlir/ExecutionEngine/SparseTensorUtils.h", + ], includes = ["include"], deps = [ ":Affine", @@ -2768,6 +2771,14 @@ gentbl_cc_library( ["-gen-op-defs"], "include/mlir/Dialect/GPU/GPUOps.cpp.inc", ), + ( + ["-gen-enum-decls"], + "include/mlir/Dialect/GPU/GPUOpsEnums.h.inc", + ), + ( + ["-gen-enum-defs"], + "include/mlir/Dialect/GPU/GPUOpsEnums.cpp.inc", + ), ], tblgen = ":mlir-tblgen", td_file = "include/mlir/Dialect/GPU/GPUOps.td", @@ -3295,6 +3306,14 @@ gentbl_cc_library( ], "include/mlir/Dialect/LLVMIR/NVVMOpsDialect.cpp.inc", ), + ( + ["-gen-enum-decls"], + "include/mlir/Dialect/LLVMIR/NVVMOpsEnums.h.inc", + ), + ( + ["-gen-enum-defs"], + "include/mlir/Dialect/LLVMIR/NVVMOpsEnums.cpp.inc", + ), ], tblgen = ":mlir-tblgen", td_file = "include/mlir/Dialect/LLVMIR/NVVMOps.td", @@ -5373,9 +5392,12 @@ cc_library( name = "mlir_c_runner_utils", srcs = [ "lib/ExecutionEngine/CRunnerUtils.cpp", - "lib/ExecutionEngine/SparseUtils.cpp", + "lib/ExecutionEngine/SparseTensorUtils.cpp", + ], + hdrs = [ + "include/mlir/ExecutionEngine/CRunnerUtils.h", + "include/mlir/ExecutionEngine/SparseTensorUtils.h", ], - hdrs = ["include/mlir/ExecutionEngine/CRunnerUtils.h"], includes = ["include"], ) @@ -6110,6 +6132,54 @@ gentbl_cc_library( deps = [":LinalgStructuredOpsTdFiles"], ) +td_library( + name = "BufferizableOpInterfaceTdFiles", + srcs = [ + "include/mlir/Dialect/Linalg/ComprehensiveBufferize/BufferizableOpInterface.td", + ], + includes = ["include"], + deps = [ + ":OpBaseTdFiles", + ], +) + +gentbl_cc_library( + name = "BufferizableOpInterfaceIncGen", + strip_include_prefix = "include", + tbl_outs = [ + ( + ["-gen-op-interface-decls"], + "include/mlir/Dialect/Linalg/ComprehensiveBufferize/BufferizableOpInterface.h.inc", + ), + ( + ["-gen-op-interface-defs"], + "include/mlir/Dialect/Linalg/ComprehensiveBufferize/BufferizableOpInterface.cpp.inc", + ), + ], + tblgen = ":mlir-tblgen", + td_file = "include/mlir/Dialect/Linalg/ComprehensiveBufferize/BufferizableOpInterface.td", + deps = [ + ":BufferizableOpInterfaceTdFiles", + ], +) + +cc_library( + name = "BufferizableOpInterface", + srcs = [ + "lib/Dialect/Linalg/ComprehensiveBufferize/BufferizableOpInterface.cpp", + ], + hdrs = [ + "include/mlir/Dialect/Linalg/ComprehensiveBufferize/BufferizableOpInterface.h", + ], + includes = ["include"], + deps = [ + ":BufferizableOpInterfaceIncGen", + ":IR", + ":Support", + "//llvm:Support", + ], +) + td_library( name = "LinalgDocTdFiles", srcs = ["include/mlir/Dialect/Linalg/IR/LinalgDoc.td"], @@ -6299,7 +6369,6 @@ cc_library( "include/mlir/Dialect/Linalg/Analysis/DependenceAnalysis.h", "include/mlir/Dialect/Linalg/Passes.h", "include/mlir/Dialect/Linalg/Transforms/CodegenStrategy.h", - "include/mlir/Dialect/Linalg/Transforms/ComprehensiveBufferize.h", "include/mlir/Dialect/Linalg/Transforms/HoistPadding.h", "include/mlir/Dialect/Linalg/Transforms/Hoisting.h", "include/mlir/Dialect/Linalg/Transforms/Transforms.h", @@ -6312,6 +6381,7 @@ cc_library( ":Analysis", ":ArithmeticDialect", ":ComplexDialect", + ":ComprehensiveBufferize", ":DialectUtils", ":IR", ":InferTypeOpInterface", @@ -6334,6 +6404,35 @@ cc_library( ], ) +cc_library( + name = "ComprehensiveBufferize", + srcs = [ + "lib/Dialect/Linalg/ComprehensiveBufferize/ComprehensiveBufferize.cpp", + ], + hdrs = [ + "include/mlir/Dialect/Linalg/ComprehensiveBufferize/ComprehensiveBufferize.h", + ], + includes = ["include"], + deps = [ + ":ArithmeticDialect", + ":BufferizableOpInterface", + ":DialectUtils", + ":IR", + ":InferTypeOpInterface", + ":LinalgOps", + ":LinalgStructuredOpsIncGen", + ":MemRefDialect", + ":Pass", + ":SCFDialect", + ":StandardOps", + ":Support", + ":TensorDialect", + ":TransformUtils", + ":VectorOps", + "//llvm:Support", + ], +) + cc_library( name = "TilingInterface", srcs = ["lib/Interfaces/TilingInterface.cpp"], diff --git a/utils/bazel/llvm-project-overlay/mlir/python/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/python/BUILD.bazel new file mode 100644 index 0000000000000..d44da4c6a47fd --- /dev/null +++ b/utils/bazel/llvm-project-overlay/mlir/python/BUILD.bazel @@ -0,0 +1,610 @@ +# This file is licensed under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +# Description: +# Python bindings for MLIR. +# +# We define separate filegroups for files in different directories so +# that downstream users can mirror the tree in their own py_library() rules. + +load("//mlir:tblgen.bzl", "gentbl_filegroup", "td_library") + + +package( + default_visibility = [ + "//visibility:public", + ], + licenses = ["notice"], +) + +##---------------------------------------------------------------------------## +# Core IR modules. +##---------------------------------------------------------------------------## + +filegroup( + name = "ConversionsPyFiles", + srcs = glob([ + "mlir/conversions/*.py", + ]), +) + +filegroup( + name = "DialectCorePyFiles", + srcs = [ + "mlir/dialects/_ods_common.py", + ], +) + +filegroup( + name = "ExecutionEnginePyFiles", + srcs = [ + "mlir/execution_engine.py", + ], +) + +filegroup( + name = "IRPyFiles", + srcs = [ + "mlir/ir.py", + ], +) + +filegroup( + name = "PassManagerPyFiles", + srcs = [ + "mlir/passmanager.py", + ], +) + +filegroup( + name = "RuntimePyFiles", + srcs = glob([ + "mlir/runtime/*.py", + ]), +) + +filegroup( + name = "TransformsPyFiles", + srcs = glob([ + "mlir/transforms/*.py", + ]), +) + +filegroup( + name = "AllPassesRegistrationPyFiles", + srcs = glob([ + "mlir/all_passes_registration/*.py", + ]), +) + +##---------------------------------------------------------------------------## +# Builtin dialect. +##---------------------------------------------------------------------------## + +td_library( + name = "BuiltinOpsPyTdFiles", + srcs = [ + "mlir/dialects/BuiltinOps.td", + "//mlir:include/mlir/Bindings/Python/Attributes.td", + ], + deps = [ + "//mlir:BuiltinDialectTdFiles", + "//mlir:OpBaseTdFiles", + ], +) + +gentbl_filegroup( + name = "BuiltinOpsPyGen", + tbl_outs = [ + ( + [ + "-gen-python-op-bindings", + "-bind-dialect=builtin", + ], + "mlir/dialects/_builtin_ops_gen.py", + ), + ], + tblgen = "//mlir:mlir-tblgen", + td_file = "mlir/dialects/BuiltinOps.td", + deps = [ + ":BuiltinOpsPyTdFiles", + ], +) + +filegroup( + name = "BuiltinOpsPyFiles", + srcs = [ + "mlir/dialects/_builtin_ops_ext.py", + "mlir/dialects/builtin.py", + ":BuiltinOpsPyGen", + ], +) + +##---------------------------------------------------------------------------## +# Linalg dialect. +##---------------------------------------------------------------------------## + +td_library( + name = "LinalgOpsPyTdFiles", + srcs = [ + "mlir/dialects/LinalgOps.td", + "//mlir:include/mlir/Bindings/Python/Attributes.td", + ], + deps = [ + "//mlir:LinalgOpsTdFiles", + "//mlir:LinalgStructuredOpsTdFiles", + "//mlir:OpBaseTdFiles", + ], +) + +gentbl_filegroup( + name = "LinalgOpsPyGen", + tbl_outs = [ + ( + [ + "-gen-python-op-bindings", + "-bind-dialect=linalg", + ], + "mlir/dialects/_linalg_ops_gen.py", + ), + ], + tblgen = "//mlir:mlir-tblgen", + td_file = "mlir/dialects/LinalgOps.td", + deps = [ + ":LinalgOpsPyTdFiles", + ], +) + +filegroup( + name = "LinalgOpsPyFiles", + srcs = [ + "mlir/dialects/_linalg_ops_ext.py", + ":LinalgOpsPyGen", + ], +) + +filegroup( + name = "LinalgOpsPackagePyFiles", + srcs = glob(["mlir/dialects/linalg/*.py"]), +) + +filegroup( + name = "LinalgOpsPackageOpDSLPyFiles", + srcs = glob(["mlir/dialects/linalg/opdsl/*.py"]), +) + +filegroup( + name = "LinalgOpsPackageOpDSLLangPyFiles", + srcs = glob(["mlir/dialects/linalg/opdsl/lang/*.py"]), +) + +filegroup( + name = "LinalgOpsPackageOpDSLOpsPyFiles", + srcs = glob(["mlir/dialects/linalg/opdsl/ops/*.py"]), +) + +filegroup( + name = "LinalgOpsPackagePassesPyFiles", + srcs = glob(["mlir/dialects/linalg/passes/*.py"]), +) + +##---------------------------------------------------------------------------## +# Arithmetic dialect. +##---------------------------------------------------------------------------## + +td_library( + name = "ArithmeticOpsPyTdFiles", + srcs = [ + "//mlir:include/mlir/Bindings/Python/Attributes.td", + ], + includes = ["../include"], + deps = [ + "//mlir:ArithmeticOpsTdFiles", + "//mlir:OpBaseTdFiles", + ], +) + +gentbl_filegroup( + name = "ArithmeticOpsPyGen", + tbl_outs = [ + ( + [ + "-gen-python-op-bindings", + "-bind-dialect=arith", + ], + "mlir/dialects/_arith_ops_gen.py", + ), + ], + tblgen = "//mlir:mlir-tblgen", + td_file = "mlir/dialects/ArithmeticOps.td", + deps = [ + ":ArithmeticOpsPyTdFiles", + ], +) + +filegroup( + name = "ArithmeticOpsPyFiles", + srcs = [ + "mlir/dialects/_arith_ops_ext.py", + "mlir/dialects/arith.py", + ":ArithmeticOpsPyGen", + ], +) + +##---------------------------------------------------------------------------## +# Math dialect. +##---------------------------------------------------------------------------## + +td_library( + name = "MathOpsPyTdFiles", + srcs = [ + "//mlir:include/mlir/Bindings/Python/Attributes.td", + ], + includes = ["../include"], + deps = [ + "//mlir:MathOpsTdFiles", + "//mlir:OpBaseTdFiles", + ], +) + +gentbl_filegroup( + name = "MathOpsPyGen", + tbl_outs = [ + ( + [ + "-gen-python-op-bindings", + "-bind-dialect=math", + ], + "mlir/dialects/_math_ops_gen.py", + ), + ], + tblgen = "//mlir:mlir-tblgen", + td_file = "mlir/dialects/MathOps.td", + deps = [ + ":MathOpsPyTdFiles", + ], +) + +filegroup( + name = "MathOpsPyFiles", + srcs = [ + "mlir/dialects/math.py", + ":MathOpsPyGen", + ], +) + +##---------------------------------------------------------------------------## +# MemRef dialect. +##---------------------------------------------------------------------------## + +td_library( + name = "MemRefOpsPyTdFiles", + srcs = [ + "//mlir:include/mlir/Bindings/Python/Attributes.td", + ], + includes = ["../include"], + deps = [ + "//mlir:MemRefOpsTdFiles", + "//mlir:OpBaseTdFiles", + ], +) + +gentbl_filegroup( + name = "MemRefOpsPyGen", + tbl_outs = [ + ( + [ + "-gen-python-op-bindings", + "-bind-dialect=memref", + ], + "mlir/dialects/_memref_ops_gen.py", + ), + ], + tblgen = "//mlir:mlir-tblgen", + td_file = "mlir/dialects/MemRefOps.td", + deps = [ + ":MemRefOpsPyTdFiles", + ], +) + +filegroup( + name = "MemRefOpsPyFiles", + srcs = [ + "mlir/dialects/_memref_ops_ext.py", + "mlir/dialects/memref.py", + ":MemRefOpsPyGen", + ], +) + +##---------------------------------------------------------------------------## +# PythonTest dialect. +##---------------------------------------------------------------------------## + +td_library( + name = "PythonTestPyTdFiles", + srcs = [ + "//mlir:include/mlir/Bindings/Python/Attributes.td", + ], + deps = [ + "//mlir:InferTypeOpInterfaceTdFiles", + "//mlir:OpBaseTdFiles", + ], +) + +gentbl_filegroup( + name = "PythonTestPyGen", + tbl_outs = [ + ( + [ + "-gen-python-op-bindings", + "-bind-dialect=python_test", + ], + "mlir/dialects/_python_test_ops_gen.py", + ), + ], + tblgen = "//mlir:mlir-tblgen", + td_file = "//mlir/test/python:python_test_ops.td", + deps = [ + ":PythonTestPyTdFiles", + ], +) + +filegroup( + name = "PythonTestPyFiles", + srcs = [ + "mlir/dialects/python_test.py", + ":PythonTestPyGen", + ], +) + +##---------------------------------------------------------------------------## +# SCF dialect. +##---------------------------------------------------------------------------## + +td_library( + name = "SCFPyTdFiles", + srcs = [ + "//mlir:include/mlir/Bindings/Python/Attributes.td", + ], + includes = ["../include"], + deps = [ + "//mlir:OpBaseTdFiles", + "//mlir:SCFTdFiles", + ], +) + +gentbl_filegroup( + name = "SCFPyGen", + tbl_outs = [ + ( + [ + "-gen-python-op-bindings", + "-bind-dialect=scf", + ], + "mlir/dialects/_scf_ops_gen.py", + ), + ], + tblgen = "//mlir:mlir-tblgen", + td_file = "mlir/dialects/SCFOps.td", + deps = [ + ":SCFPyTdFiles", + ], +) + +filegroup( + name = "SCFPyFiles", + srcs = [ + "mlir/dialects/_scf_ops_ext.py", + "mlir/dialects/scf.py", + ":SCFPyGen", + ], +) + +##---------------------------------------------------------------------------## +# Shape dialect. +##---------------------------------------------------------------------------## + +td_library( + name = "ShapeOpsPyTdFiles", + srcs = [ + "//mlir:include/mlir/Bindings/Python/Attributes.td", + ], + includes = ["../include"], + deps = [ + "//mlir:OpBaseTdFiles", + "//mlir:ShapeOpsTdFiles", + ], +) + +gentbl_filegroup( + name = "ShapeOpsPyGen", + tbl_outs = [ + ( + [ + "-gen-python-op-bindings", + "-bind-dialect=shape", + ], + "mlir/dialects/_shape_ops_gen.py", + ), + ], + tblgen = "//mlir:mlir-tblgen", + td_file = "mlir/dialects/ShapeOps.td", + deps = [ + ":ShapeOpsPyTdFiles", + ], +) + +filegroup( + name = "ShapeOpsPyFiles", + srcs = [ + "mlir/dialects/shape.py", + ":ShapeOpsPyGen", + ], +) + +##---------------------------------------------------------------------------## +# Standard dialect. +##---------------------------------------------------------------------------## + +td_library( + name = "StandardOpsPyTdFiles", + srcs = [ + "//mlir:include/mlir/Bindings/Python/Attributes.td", + ], + deps = [ + "//mlir:OpBaseTdFiles", + "//mlir:StdOpsTdFiles", + ], +) + +gentbl_filegroup( + name = "StandardOpsPyGen", + tbl_outs = [ + ( + [ + "-gen-python-op-bindings", + "-bind-dialect=std", + ], + "mlir/dialects/_std_ops_gen.py", + ), + ], + tblgen = "//mlir:mlir-tblgen", + td_file = "mlir/dialects/StandardOps.td", + deps = [ + ":StandardOpsPyTdFiles", + ], +) + +filegroup( + name = "StandardOpsPyFiles", + srcs = [ + "mlir/dialects/_std_ops_ext.py", + "mlir/dialects/std.py", + ":StandardOpsPyGen", + ], +) + +##---------------------------------------------------------------------------## +# SparseTensor dialect. +##---------------------------------------------------------------------------## + +td_library( + name = "SparseTensorOpsPyTdFiles", + srcs = [ + "//mlir:include/mlir/Bindings/Python/Attributes.td", + ], + deps = [ + "//mlir:OpBaseTdFiles", + "//mlir:SparseTensorTdFiles", + ], +) + +gentbl_filegroup( + name = "SparseTensorOpsPyGen", + tbl_outs = [ + ( + [ + "-gen-python-op-bindings", + "-bind-dialect=sparse_tensor", + ], + "mlir/dialects/_sparse_tensor_ops_gen.py", + ), + ], + tblgen = "//mlir:mlir-tblgen", + td_file = "mlir/dialects/SparseTensorOps.td", + deps = [ + ":SparseTensorOpsPyTdFiles", + ], +) + +filegroup( + name = "SparseTensorOpsPyFiles", + srcs = [ + "mlir/dialects/sparse_tensor.py", + ":SparseTensorOpsPyGen", + ], +) + +##---------------------------------------------------------------------------## +# Tosa dialect. +##---------------------------------------------------------------------------## + +td_library( + name = "TosaOpsPyTdFiles", + srcs = [ + "//mlir:include/mlir/Bindings/Python/Attributes.td", + ], + deps = [ + "//mlir:OpBaseTdFiles", + "//mlir:TosaDialectTdFiles", + ], +) + +gentbl_filegroup( + name = "TosaOpsPyGen", + tbl_outs = [ + ( + [ + "-gen-python-op-bindings", + "-bind-dialect=tosa", + ], + "mlir/dialects/_tosa_ops_gen.py", + ), + ], + tblgen = "//mlir:mlir-tblgen", + td_file = "mlir/dialects/TosaOps.td", + deps = [ + ":TosaOpsPyTdFiles", + ], +) + +filegroup( + name = "TosaOpsPyFiles", + srcs = [ + "mlir/dialects/tosa.py", + ":TosaOpsPyGen", + ], +) + +##---------------------------------------------------------------------------## +# Vector dialect. +##---------------------------------------------------------------------------## + +td_library( + name = "VectorOpsPyTdFiles", + srcs = [ + "//mlir:include/mlir/Bindings/Python/Attributes.td", + ], + includes = ["../include"], + deps = [ + "//mlir:OpBaseTdFiles", + "//mlir:VectorOpsTdFiles", + ], +) + +gentbl_filegroup( + name = "VectorOpsPyGen", + tbl_outs = [ + ( + [ + "-gen-python-op-bindings", + "-bind-dialect=vector", + ], + "mlir/dialects/_vector_ops_gen.py", + ), + ], + tblgen = "//mlir:mlir-tblgen", + td_file = "mlir/dialects/VectorOps.td", + deps = [ + ":VectorOpsPyTdFiles", + ], +) + +filegroup( + name = "VectorOpsPyFiles", + srcs = [ + "mlir/dialects/vector.py", + ":VectorOpsPyGen", + ], +) \ No newline at end of file diff --git a/utils/bazel/llvm-project-overlay/mlir/test/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/test/BUILD.bazel index f776696ade5ba..eb19a15cabc85 100644 --- a/utils/bazel/llvm-project-overlay/mlir/test/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/mlir/test/BUILD.bazel @@ -220,6 +220,7 @@ cc_library( "//mlir:SideEffects", "//mlir:StandardOps", "//mlir:StandardOpsTransforms", + "//mlir:Support", "//mlir:TensorDialect", "//mlir:TransformUtils", "//mlir:Transforms", diff --git a/utils/bazel/llvm-project-overlay/mlir/test/python/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/test/python/BUILD.bazel new file mode 100644 index 0000000000000..74eaa33a2faa2 --- /dev/null +++ b/utils/bazel/llvm-project-overlay/mlir/test/python/BUILD.bazel @@ -0,0 +1,10 @@ +# This file is licensed under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +licenses(["notice"]) + +exports_files( + srcs = ["python_test_ops.td"], + visibility = ["//visibility:public"], +) diff --git a/utils/bazel/llvm_configs/config.h.cmake b/utils/bazel/llvm_configs/config.h.cmake index 37a0d234844d1..1d982b544a63a 100644 --- a/utils/bazel/llvm_configs/config.h.cmake +++ b/utils/bazel/llvm_configs/config.h.cmake @@ -19,6 +19,10 @@ /* Define to 1 to enable crash memory dumps, and to 0 otherwise. */ #cmakedefine01 LLVM_ENABLE_CRASH_DUMPS +/* Define to 1 to prefer forward slashes on Windows, and to 0 prefer + backslashes. */ +#cmakedefine01 LLVM_WINDOWS_PREFER_FORWARD_SLASH + /* Define to 1 if you have the `backtrace' function. */ #cmakedefine HAVE_BACKTRACE ${HAVE_BACKTRACE}