diff --git a/.config/CredScanSuppressions.json b/.config/CredScanSuppressions.json index da14baee20e3..984a86f68e69 100644 --- a/.config/CredScanSuppressions.json +++ b/.config/CredScanSuppressions.json @@ -1,49 +1,70 @@ { - "tool": "Credential Scanner", - "suppressions": [ - { - "file": [ - "/eng/common/internal-feed-operations.ps1", - "/eng/common/internal-feed-operations.sh", - "/src/libraries/Common/src/Interop/Windows/WinHttp/Interop.winhttp_types.cs", - "/src/libraries/Common/src/System/Security/Cryptography/EccSecurityTransforms.cs", - "/src/libraries/Common/tests/System/Net/Configuration.Certificates.cs", - "/src/libraries/Common/tests/System/Net/Http/HttpClientHandlerTest.Authentication.cs", - "/src/libraries/Common/tests/System/Net/Http/HttpClientHandlerTest.cs", - "/src/libraries/Common/tests/System/Net/Http/HttpClientHandlerTest.DefaultProxyCredentials.cs", - "/src/libraries/Common/tests/System/Net/Http/HttpClientHandlerTest.Proxy.cs", - "/src/libraries/Common/tests/System/Net/Http/HttpClientHandlerTest.ServerCertificates.cs", - "/src/libraries/Common/tests/System/Net/Http/PostScenarioTest.cs", - "/src/libraries/Common/tests/System/Net/Prerequisites/Deployment/setup_certificates.ps1", - "/src/libraries/Common/tests/System/Security/Cryptography/AlgorithmImplementations/EC/ECKeyFileTests.cs", - "/src/libraries/Common/tests/System/Security/Cryptography/AlgorithmImplementations/EC/ECKeyFileTests.LimitedPrivate.cs", - "/src/libraries/Common/tests/System/Security/Cryptography/AlgorithmImplementations/RSA/RSAKeyFileTests.cs", - "/src/libraries/Common/tests/System/Security/Cryptography/AlgorithmImplementations/RSA/RSAKeyPemTests.cs", - "/src/libraries/System.Data.Common/tests/System/Data/Common/DbConnectionStringBuilderTest.cs", - "/src/libraries/System.Diagnostics.Process/tests/ProcessStartInfoTests.cs", - "/src/libraries/System.DirectoryServices.AccountManagement/src/System/DirectoryServices/AccountManagement/constants.cs", - "/src/libraries/System.DirectoryServices.AccountManagement/tests/PrincipalTest.cs", - "/src/libraries/System.DirectoryServices.AccountManagement/tests/UserPrincipalTest.cs", - "/src/libraries/System.Net.Http/tests/FunctionalTests/SocketsHttpHandlerTest.cs", - "/src/libraries/System.Net.Http/tests/UnitTests/DigestAuthenticationTests.cs", - "/src/libraries/System.Net.Http/tests/UnitTests/HttpEnvironmentProxyTest.cs", - "/src/libraries/System.Net.Mail/tests/Functional/SmtpClientTest.cs", - "/src/libraries/System.Net.Requests/src/System/Net/FtpControlStream.cs", - "/src/libraries/System.Net.Requests/src/System/Net/FtpWebRequest.cs", - "/src/libraries/System.Net.WebSockets.Client/tests/ConnectTest.cs", - "/src/libraries/System.Private.Uri/tests/ExtendedFunctionalTests/UriRelativeResolutionTest.cs", - "/src/libraries/System.Private.Uri/tests/FunctionalTests/UriBuilderRefreshTest.cs", - "/src/libraries/System.Private.Uri/tests/FunctionalTests/UriBuilderTests.cs", - "/src/libraries/System.Private.Uri/tests/FunctionalTests/UriRelativeResolutionTest.cs", - "/src/libraries/System.Runtime/tests/System/Uri.CreateStringTests.cs", - "/src/libraries/System.Security.Cryptography.Algorithms/tests/Rfc2898Tests.cs", - "/src/libraries/System.Security.Cryptography.Pkcs/tests/Pkcs12/Pkcs12Documents.cs", - "/src/libraries/System.Security.Cryptography.X509Certificates/tests/ExportTests.cs", - "/src/libraries/System.Security.Cryptography.Xml/tests/EncryptedXmlTest.cs", - "/src/libraries/System.Security.Cryptography.Xml/tests/SignedXmlTest.cs", - "/src/libraries/System.Security.Cryptography.Xml/tests/TestHelpers.cs" - ], - "_justification": "Mostly test files. Other files contain harmless examples or constants." - }, - ] + "tool": "Credential Scanner", + "suppressions": [ + { + "_justification": "Unit test containing connection strings under the test.", + "file": [ + "src/libraries/System.Data.Common/tests/System/Data/Common/DbConnectionStringBuilderTest.cs" + ] + }, + { + "_justification": "Private key for testing purpose.", + "file": [ + "src/libraries/Common/tests/System/Security/Cryptography/AlgorithmImplementations/DSA/DSAKeyPemTests.cs", + "src/libraries/Common/tests/System/Security/Cryptography/AlgorithmImplementations/EC/ECKeyPemTests.cs", + "src/libraries/Common/tests/System/Security/Cryptography/AlgorithmImplementations/RSA/RSAKeyPemTests.cs", + "src/libraries/System.Security.Cryptography.X509Certificates/tests/TestData.cs" + ], + "placeholder": [ + "-----BEGIN PRIVATE KEY-----", + "-----BEGIN * PRIVATE KEY-----" + ] + }, + { + "_justification": "Test credential for Uri testing", + "file": [ + "src/libraries/System.Net.Http/tests/UnitTests/HttpEnvironmentProxyTest.cs", + "src/libraries/System.Private.Uri/tests/ExtendedFunctionalTests/UriRelativeResolutionTest.cs", + "src/libraries/System.Private.Uri/tests/FunctionalTests/UriBuilderRefreshTest.cs", + "src/libraries/System.Private.Uri/tests/FunctionalTests/UriBuilderTests.cs", + "src/libraries/System.Private.Uri/tests/FunctionalTests/UriRelativeResolutionTest.cs", + "src/libraries/System.Runtime/tests/System/Uri.CreateStringTests.cs" + ], + "placeholder": [ + "//*:;&$=123USERINFO@", + "//*:bar@", + "//*:bar1@", + "//*:password1@", + "//*:psw@", + "//*:userinfo2@" + ] + }, + { + "_justification": "Generic test password.", + "file": [ + "src/libraries/Common/tests/System/Net/Configuration.Certificates.cs", + "src/libraries/Common/tests/System/Net/Http/HttpClientHandlerTest.Authentication.cs", + "src/libraries/Common/tests/System/Net/Http/HttpClientHandlerTest.cs", + "src/libraries/Common/tests/System/Net/Http/HttpClientHandlerTest.DefaultProxyCredentials.cs", + "src/libraries/Common/tests/System/Net/Http/PostScenarioTest.cs", + "src/libraries/Common/tests/System/Net/Prerequisites/Deployment/setup_certificates.ps1", + "src/libraries/System.Net.Http/tests/FunctionalTests/SocketsHttpHandlerTest.cs", + "src/libraries/System.Net.Http/tests/UnitTests/DigestAuthenticationTests.cs", + "src/libraries/System.Net.Http/tests/UnitTests/HttpEnvironmentProxyTest.cs", + "src/libraries/System.Net.Mail/tests/Functional/SmtpClientTest.cs", + "src/libraries/System.Security.Cryptography.Xml/tests/SignedXmlTest.cs", + "src/libraries/System.Security.Cryptography.Xml/tests/TestHelpers.cs" + ], + "placeholder": [ + "\"anotherpassword\"", + "\"bar\"", + "\"mono\"", + "\"password1\"", + "\"rightpassword\"", + "\"testcertificate\"", + "\"unused\"", + "\"wrongpassword\"" + ] + } + ] } diff --git a/.config/dotnet-tools.json b/.config/dotnet-tools.json index 519943094947..d6009aca5b52 100644 --- a/.config/dotnet-tools.json +++ b/.config/dotnet-tools.json @@ -15,7 +15,7 @@ ] }, "microsoft.dotnet.xharness.cli": { - "version": "1.0.0-prerelease.20352.2", + "version": "1.0.0-prerelease.20403.2", "commands": [ "xharness" ] diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS deleted file mode 100644 index 3f2a376ee711..000000000000 --- a/.github/CODEOWNERS +++ /dev/null @@ -1,56 +0,0 @@ -# Users referenced in this file will automatically be requested as reviewers for PRs that modify the given paths. -# See https://help.github.com/articles/about-code-owners/ - -/src/libraries/Common/src/System/Net/Http/aspnetcore/ @dotnet/http -/src/libraries/Common/tests/Tests/System/Net/aspnetcore/ @dotnet/http -/src/libraries/System.Text.Json/ @ahsonkhan @steveharter @layomia @Jozkee -/src/libraries/System.Buffers/ @ahsonkhan -/src/libraries/System.Memory/ @ahsonkhan - -# Mono Code Owners - -/src/mono @marek-safar - -/src/mono/llvm @vargaz @SamMonoRT @imhameed @EgorBo - -/src/mono/mono/arch @vargaz @lewurm -/src/mono/mono/benchmark @SamMonoRT @lewurm -/src/mono/mono/dis @lambdageek @vargaz -/src/mono/mono/eglib @vargaz @lambdageek @CoffeeFlux @lewurm - -/src/mono/mono/metadata @vargaz @lambdageek @thaystg @CoffeeFlux @lewurm @alexischr -/src/mono/mono/metadata/*-win* @lateralusX @lambdageek @CoffeeFlux -/src/mono/mono/metadata/handle* @lambdageek @vargaz -/src/mono/mono/metadata/monitor* @brzvlad @vargaz -/src/mono/mono/metadata/sgen* @brzvlad @vargaz @naricc -/src/mono/mono/metadata/thread* @lateralusX @lambdageek -/src/mono/mono/metadata/w32* @lateralusX @lambdageek @CoffeeFlux - -/src/mono/mono/mini @vargaz @lewurm @lambdageek @SamMonoRT @CoffeeFlux @alexischr -/src/mono/mono/mini/*cfgdump* @lewurm @vargaz -/src/mono/mono/mini/*exceptions* @lewurm @vargaz @BrzVlad -/src/mono/mono/mini/*llvm* @vargaz @imhameed @EgorBo -/src/mono/mono/mini/*ppc* @lewurm @vargaz -/src/mono/mono/mini/*profiler* @BrzVlad @lambdageek -/src/mono/mono/mini/*riscv* @alexrp @lewurm -/src/mono/mono/mini/*type-check* @lewurm @lambdageek -/src/mono/mono/mini/debugger-agent.c @vargaz @thaystg @DavidKarlas @lambdageek -/src/mono/mono/mini/interp/* @lewurm @BrzVlad - -/src/mono/mono/native @baulig @egorbo @alexischr @marek-safar -/src/mono/mono/profiler @BrzVlad @lambdageek -/src/mono/mono/sgen @BrzVlad @lambdageek - -/src/mono/mono/utils @vargaz @lewurm @lambdageek @CoffeeFlux @alexischr -/src/mono/mono/utils/*-win* @lateralusX @lambdageek @CoffeeFlux -/src/mono/mono/utils/atomic* @lewurm @vargaz -/src/mono/mono/utils/mono-hwcap* @lewurm @vargaz -/src/mono/mono/utils/mono-mem* @lewurm @vargaz -/src/mono/mono/utils/mono-merp* @alexischr @lambdageek -/src/mono/mono/utils/mono-state* @alexischr @lambdageek -/src/mono/mono/utils/mono-threads* @lambdageek @vargaz - -/src/mono/msvc @lateralusX @kg @akoeplinger -/src/mono/msvc/scripts @akoeplinger @kg - -/src/mono/netcore @marek-safar @akoeplinger @egorbo @vargaz @steveisok diff --git a/.github/workflows/backport.yml b/.github/workflows/backport.yml new file mode 100644 index 000000000000..79cde1d3ce29 --- /dev/null +++ b/.github/workflows/backport.yml @@ -0,0 +1,26 @@ +name: Backport PR to branch +on: + issue_comment: + types: [created] + +jobs: + backport: + if: github.event.issue.pull_request != '' && contains(github.event.comment.body, '/backport to') + runs-on: ubuntu-20.04 + steps: + - name: Checkout repo + uses: actions/checkout@v2 + - name: Run backport + uses: ./eng/actions/backport + with: + auth_token: ${{ secrets.GITHUB_TOKEN }} + pr_description_template: | + Backport of #%source_pr_number% to %target_branch% + + /cc %cc_users% + + ## Customer Impact + + ## Testing + + ## Risk diff --git a/.gitignore b/.gitignore index bfe14adb6c04..7596f954d951 100644 --- a/.gitignore +++ b/.gitignore @@ -319,7 +319,6 @@ Vagrantfile CMakeFiles/ cmake_install.cmake CMakeCache.txt -Makefile # Cross compilation cross/rootfs/* @@ -353,3 +352,7 @@ src/coreclr/src/System.Private.CoreLib/common # The debug directory should not be ignored !src/coreclr/src/debug + +# Mono Wasm-specific excludes +src/mono/wasm/emsdk/ +src/mono/wasm/.stamp-wasm-install-and-select* diff --git a/Build.proj b/Build.proj index dc4a15ab5f41..2d96f02056fe 100644 --- a/Build.proj +++ b/Build.proj @@ -1,5 +1,9 @@ + + BuildTargetFramework=$([MSBuild]::ValueOrDefault('$(BuildTargetFramework)', '$(NetCoreAppCurrent)')) + + $([MSBuild]::NormalizeDirectory('$(ArtifactsBinDir)', 'installer.tasks')) $([MSBuild]::NormalizePath('$(InstallerTasksOutputPath)', 'Debug', 'netstandard2.0', 'installer.tasks.dll')) - $([MSBuild]::NormalizePath('$(InstallerTasksOutputPath)', 'Debug', 'net46', 'installer.tasks.dll')) + $([MSBuild]::NormalizePath('$(InstallerTasksOutputPath)', 'Debug', 'net461', 'installer.tasks.dll')) $([MSBuild]::NormalizeDirectory('$(RepoRoot)', 'docs')) $([MSBuild]::NormalizeDirectory('$(DocsDir)', 'manpages')) @@ -51,11 +51,13 @@ $([MSBuild]::NormalizeDirectory('$(ArtifactsBinDir)', 'AppleAppBuilder', 'Debug', '$(NetCoreAppCurrent)')) $([MSBuild]::NormalizeDirectory('$(ArtifactsBinDir)', 'AndroidAppBuilder', 'Debug', '$(NetCoreAppCurrent)')) $([MSBuild]::NormalizeDirectory('$(ArtifactsBinDir)', 'WasmAppBuilder', 'Debug', '$(NetCoreAppCurrent)', 'publish')) + $([MSBuild]::NormalizeDirectory('$(ArtifactsBinDir)', 'CreateWasmBundle', 'Debug', '$(NetCoreAppCurrent)', 'publish')) $([MSBuild]::NormalizeDirectory('$(ArtifactsBinDir)', 'MonoAOTCompiler', 'Debug', '$(NetCoreAppCurrent)')) $([MSBuild]::NormalizePath('$(AppleAppBuilderDir)', 'AppleAppBuilder.dll')) $([MSBuild]::NormalizePath('$(AndroidAppBuilderDir)', 'AndroidAppBuilder.dll')) $([MSBuild]::NormalizePath('$(WasmAppBuilderDir)', 'WasmAppBuilder.dll')) + $([MSBuild]::NormalizePath('$(CreateWasmBundleDir)', 'CreateWasmBundle.dll')) $([MSBuild]::NormalizePath('$(MonoAOTCompilerDir)', 'MonoAOTCompiler.dll')) @@ -84,6 +86,9 @@ false + + + false @@ -101,6 +106,9 @@ Properties $([System.Runtime.InteropServices.RuntimeInformation]::ProcessArchitecture.ToString().ToLowerInvariant()) + + + false diff --git a/Directory.Build.targets b/Directory.Build.targets index f16e04d9b4e2..e4c7e1d21a57 100644 --- a/Directory.Build.targets +++ b/Directory.Build.targets @@ -6,6 +6,8 @@ tell Microsoft.Common.targets not to import Directory.Build.targets again --> false + + false diff --git a/NuGet.config b/NuGet.config index 13929bbcd87a..5f2930685412 100644 --- a/NuGet.config +++ b/NuGet.config @@ -16,6 +16,8 @@ + + diff --git a/SECURITY.md b/SECURITY.md index 29863ccaa542..8312d20e4537 100644 --- a/SECURITY.md +++ b/SECURITY.md @@ -6,8 +6,8 @@ The .NET Core and ASP.NET Core support policy, including supported versions can ## Reporting a Vulnerability -Security issues and bugs should be reported privately to the Microsoft Security Response Center (MSRC), either by emailing secure@microsoft.com or via the portal at https://msrc.microsoft.com. -You should receive a response within 24 hours. If for some reason you do not, please follow up via email to ensure we received your +Security issues and bugs should be reported privately to the Microsoft Security Response Center (MSRC), either by emailing secure@microsoft.com or via the portal at https://msrc.microsoft.com. +You should receive a response within 24 hours. If for some reason you do not, please follow up via email to ensure we received your original message. Further information, including the MSRC PGP key, can be found in the [MSRC Report an Issue FAQ](https://www.microsoft.com/en-us/msrc/faqs-report-an-issue). Reports via MSRC may qualify for the .NET Core Bug Bounty. Details of the .NET Core Bug Bounty including terms and conditions are at [https://aka.ms/corebounty](https://aka.ms/corebounty). diff --git a/docs/area-owners.md b/docs/area-owners.md index 17b260a9c9ea..65bbfc47fa66 100644 --- a/docs/area-owners.md +++ b/docs/area-owners.md @@ -1,6 +1,10 @@ -If you need to tag folks on an issue or PR, you will generally want to tag the owners (not the lead). +# Pull Requests Tagging -Note: Editing this file doesn't update the mapping used by the `@msftbot` issue notification bot to tag owners. Some area owners prefer not to get those notifications. To update those notifications, contact any one of `@danmosemsft`, `@jeffschw`, `@ericstj`, or `@karelz`. If you're a community member interested in these notifications, you won't appear in this table but we can add you to notifications - just let us know. +If you need to tag folks on an issue or PR, you will generally want to tag the owners (not the lead) for [area](#areas) to which the change or issue is closest to. For areas which are large and can be operating system or architecture specific it's better to tag owners of [OS](#operating-systems) or [Architecture](#architectures). + +## Areas + +Note: Editing this file doesn't update the mapping used by the `@msftbot` issue notification bot to tag owners. Some area owners prefer not to get those notifications. To update those notifications, contact any one of `@danmosemsft`, `@jeffschw`, `@marek-safar`, `@ericstj`, or `@karelz`, they have permissions to update [`msftbot` configuration](https://fabric-cp.azurewebsites.net/bot/?repo=dotnet/runtime). If you're a community member interested in these notifications, you won't appear in this table but we can add you to notifications - just let us know. | Area | Lead | Owners (area experts to tag in PR's and issues) | Description | |------------------------------------------------|---------------|-----------------------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| @@ -43,9 +47,9 @@ Note: Editing this file doesn't update the mapping used by the `@msftbot` issue | area-Interop-coreclr | @jeffschwMSFT | @jeffschwMSFT @AaronRobinsonMSFT | | | area-Interop-mono | @marek-safar | @lambdageek | | | area-Meta | @ericstj | @joperezr | Issues without clear association to any specific API/contract, e.g.
  • new contract proposals
  • cross-cutting code/test pattern changes (e.g. FxCop failures)
  • project-wide docs

| -| area-Microsoft.CSharp | @jaredpar | @cston @333fred | Archived component - limited churn/contributions (see [#33170](https://github.com/dotnet/corefx/issues/33170)) | +| area-Microsoft.CSharp | @jaredpar | @cston @333fred | Archived component - limited churn/contributions (see [#27790](https://github.com/dotnet/runtime/issues/27790)) | | area-Microsoft.Extensions | @ericstj | @maryamariyan | | -| area-Microsoft.VisualBasic | @jaredpar | @cston @333fred | Archived component - limited churn/contributions (see [#33170](https://github.com/dotnet/corefx/issues/33170)) | +| area-Microsoft.VisualBasic | @jaredpar | @cston @333fred | Archived component - limited churn/contributions (see [#27790](https://github.com/dotnet/runtime/issues/27790)) | | area-Microsoft.Win32 | @ericstj | @maryamariyan @Anipik | Including System.Windows.Extensions | | area-PAL-coreclr | @mangod9 | @janvorli | | | area-Performance-mono | @SamMonoRT | @SamMonoRT | | @@ -73,13 +77,13 @@ Note: Editing this file doesn't update the mapping used by the `@msftbot` issue | area-System.Diagnostics.Tracing | @tommcdon | @noahfalk @tommcdon @tarekgh @Anipik | Packages:
  • System.Diagnostics.DiagnosticSource
  • System.Diagnostics.PerformanceCounter - [@Anipik](https://github.com/Anipik)
  • System.Diagnostics.Tracing
  • System.Diagnostics.TraceSource - [@Anipik](https://github.com/Anipik)

| | area-System.DirectoryServices | @tquerec | @tquerec @josephisenhour @joperezr | | | area-System.Drawing | @jeffhandley | @safern @tannergooding | | -| area-System.Dynamic.Runtime | @jaredpar | @cston @333fred | Archived component - limited churn/contributions (see [#33170](https://github.com/dotnet/corefx/issues/33170)) | +| area-System.Dynamic.Runtime | @jaredpar | @cston @333fred | Archived component - limited churn/contributions (see [#27790](https://github.com/dotnet/runtime/issues/27790)) | | area-System.Globalization | @ericstj | @safern @tarekgh @krwq | | | area-System.IO | @jeffhandley | @carlossanlop @jozkee | | | area-System.IO.Compression | @jeffhandley | @carlossanlop @ericstj |
  • Also includes System.IO.Packaging
| | area-System.IO.Pipelines | @kevinpi | @davidfowl @halter73 @jkotalik | | | area-System.Linq | @jeffhandley | @eiriktsarpalis @adamsitnik | | -| area-System.Linq.Expressions | @jaredpar | @cston @333fred | Archived component - limited churn/contributions (see [#33170](https://github.com/dotnet/corefx/issues/33170)) | +| area-System.Linq.Expressions | @jaredpar | @cston @333fred | Archived component - limited churn/contributions (see [#27790](https://github.com/dotnet/runtime/issues/27790)) | | area-System.Linq.Parallel | @ericstj | @tarekgh @kouvel | | | area-System.Management | @ericstj | @Anipik | WMI | | area-System.Memory | @jeffhandley | @GrabYourPitchforks @adamsitnik | | @@ -121,3 +125,20 @@ Note: Editing this file doesn't update the mapping used by the `@msftbot` issue | area-UWP | @tommcdon | @jashook | UWP-specific issues including Microsoft.NETCore.UniversalWindowsPlatform and Microsoft.Net.UWPCoreRuntimeSdk | | area-VM-coreclr | @mangod9 | @mangod9 | | | area-VM-meta-mono | @SamMonoRT | @lambdageek @CoffeeFlux | | +## Operating Systems + +| Operating System | Lead | Owners (area experts to tag in PR's and issues) | Description | +|------------------|---------------|-----------------------------------------------------|--------------| +| os-alpine | | | | +| os-android | @steveisok | @akoeplinger | | +| os-freebsd | | | | +| os-mac-os-x | @steveisok | | | +| os-ios | @steveisok | @vargaz | | +| os-tvos | @steveisok | @vargaz | | + +## Architectures + +| Architecture | Lead | Owners (area experts to tag in PR's and issues) | Description | +|-----------------|---------------|-----------------------------------------------------|--------------| +| arch-wasm | @lewing | @lewing @BrzVlad | | + diff --git a/docs/coding-guidelines/api-guidelines/System.Memory.md b/docs/coding-guidelines/api-guidelines/System.Memory.md index 894b8ca77d9e..6df28c18ca5d 100644 --- a/docs/coding-guidelines/api-guidelines/System.Memory.md +++ b/docs/coding-guidelines/api-guidelines/System.Memory.md @@ -51,4 +51,4 @@ specs can be found here: can be implicitly converted to `ReadOnlySpan`. * **AVOID** providing overloads for both `ReadOnlySpan`/`Span` as well as pointers and arrays as those can be implicitly converted to - `ReadOnlySpan`/`Span`. \ No newline at end of file + `ReadOnlySpan`/`Span`. diff --git a/docs/coding-guidelines/api-guidelines/nullability.md b/docs/coding-guidelines/api-guidelines/nullability.md index 8c029d01fcce..9cb1b96525c2 100644 --- a/docs/coding-guidelines/api-guidelines/nullability.md +++ b/docs/coding-guidelines/api-guidelines/nullability.md @@ -99,6 +99,7 @@ The C# compiler respects a set of attributes that impact its flow analysis. We - **DO** add `[NotNullIfNotNull(string)]` if nullable ref argument will be non-`null` upon exit, when an other argument passed evaluated to non-`null`, pass that argument name as string. Example: `public void Exchange([NotNullIfNotNull("value")] ref object? location, object? value);`. - **DO** add `[return: NotNullIfNotNull(string)]` if a method would not return `null` in case an argument passed evaluated to non-`null`, pass that argument name as string. Example: `[return: NotNullIfNotNull("name")] public string? FormatName(string? name);` - **DO** add `[MemberNotNull(string fieldName)]` to a helper method which initializes member field(s), passing in the field name. Example: `[MemberNotNull("_buffer")] private void InitializeBuffer()`. This will help to avoid spurious warnings at call sites that call the initialization method and then proceed to use the specified field. Note that there are two constructors to `MemberNotNull`; one that takes a single `string`, and one that takes a `params string[]`. When the number of fields initialized is small (e.g. <= 3), it's preferable to use multiple `[MemberNotNull(string)]` attributes on the method rather than one `[MemberNotNull(string, string, string, ...)]` attribute, as the latter is not CLS compliant and will likely require `#pragma warning disable` and `#pragma warning restore` around the line to suppress warnings. +- **AVOID** using `[MaybeNull]`, not because it's problematic, but because there's almost always a better option, such as `T?` (as of this writing, in all of the dotnet/runtime there are only 7 occurrences of `[MaybeNull]`). One example of where it's applicable is `AsyncLocal.Value`; `[DisallowNull]` can't be used here, because `null` is valid if `T` is nullable, and `T?` shouldn't be used because `Value` shouldn't be set to `null` if `T` isn't nullable. Another is in the relatively rare case where a public or protected field is exposed, may begin life as null, but shouldn't be explicitly set to null. ## Code Review Guidance @@ -113,7 +114,7 @@ A code review for enabling nullability generally involves three passes: - Adding `!` to reference type usage. These essentially suppress the null warning, telling the compiler to treat the expression as if it's non-null. These evaporate at compile-time. - Adding `Debug.Assert(reference != null);` statements. These inform the compiler that the mentioned reference is non-`null`, which will cause the compiler to factor that in and have the effect of suppressing subsequent warnings on that reference (until the flow analysis suggests that could change). As with any `Debug.Assert`, these evaporate at compile-time in release builds (where `DEBUG` isn't defined). - + - Most any other changes have the potential to change the IL, which should not be necessary for the feature. In particular, it's common for `?`s on dereferences to sneak in, e.g. changing `someVar.SomeMethod()` to `someVar?.SomeMethod()`; that is a change to the IL, and should only be employed when there's an actual known bug that's important to fix, as otherwise we're incurring unnecessary cost. Similarly, it's easy to accidentally add `?` to value types, which has a significant impact, changing the `T` to a `Nullable` and should be avoided. - Any `!`s added that should have been unnecessary and are required due to either a compiler issue or due to lack of expressibility about annotations should have a `// TODO-NULLABLE: http://link/to/relevant/issue` comment added on the same line. diff --git a/docs/coding-guidelines/breaking-change-definitions.md b/docs/coding-guidelines/breaking-change-definitions.md index 6eb6f958fc7e..dbcbfd6c6cde 100644 --- a/docs/coding-guidelines/breaking-change-definitions.md +++ b/docs/coding-guidelines/breaking-change-definitions.md @@ -6,7 +6,7 @@ Behavioral Change A behavioral change represents changes to the behavior of a member. A behavioral change may including throwing a new exception, adding or removing internal method calls, or alternating the way in which a return value is calculated. Behavioral changes can be the hardest type of change to categorize as acceptable or not - they can be severe in impact, or relatively innocuous. -Binary Compatibility +Binary Compatibility -------------------- Refers to the ability of existing consumers of an API to be able to use a newer version without recompilation. By definition, if an assembly's public signatures have been removed, or altered so that consumers can no longer access the same interface exposed by the assembly, the change is said to be a _binary incompatible change_. @@ -16,19 +16,19 @@ Source Compatibility Refers to the ability of existing consumers of an API to recompile against a newer version without any source changes. By definition, if a consumer needs to make changes to its code in order for it to build successfully against a newer version of an API, the change is said to be a _source incompatible change_. -Design-Time Compatibility +Design-Time Compatibility ------------------------- _Design-time compatibility_ refers to preserving the design-time experience across versions of Visual Studio and other design-time environments. This can involve details around the UI of the designer, but by far the most interesting design-time compatibility is project compatibility. A potential project (or solution), must be able to be opened, and used on a newer version of a designer. -Backwards Compatibility +Backwards Compatibility ----------------------- -_Backwards compatibility_ refers to the ability of an existing consumer of an API to run against, and behave in the same way against a newer version. By definition, if a consumer is not able to run, or behaves differently against the newer version of the API, then the API is said to be _backwards incompatible_. +_Backwards compatibility_ refers to the ability of an existing consumer of an API to run against, and behave in the same way against a newer version. By definition, if a consumer is not able to run, or behaves differently against the newer version of the API, then the API is said to be _backwards incompatible_. Changes that affect backwards compatibility are strongly discouraged. All alternates should be actively considered, since developers will, by default, expect backwards compatibility in newer versions of an API. -Forwards Compatibility +Forwards Compatibility ---------------------- _Forwards compatibility_ is the exact reverse of backwards compatibility; it refers to the ability of an existing consumer of an API to run against, and behave in the way against a _older_ version. By definition, if a consumer is not able to run, or behaves differently against an older version of the API, then the API is said to be _forwards incompatible_. diff --git a/docs/coding-guidelines/breaking-change-rules.md b/docs/coding-guidelines/breaking-change-rules.md index 6fff08837e16..ab230f9d279a 100644 --- a/docs/coding-guidelines/breaking-change-rules.md +++ b/docs/coding-guidelines/breaking-change-rules.md @@ -19,14 +19,14 @@ Breaking Change Rules ### Property, Field, Parameter and Return Values ✓ **Allowed** * Increasing the range of accepted values for a property or parameter if the member _is not_ `virtual` - + Note that the range can only increase to the extent that it does not impact the static type. e.g. it is OK to remove `if (x > 10) throw new ArgumentOutOfRangeException("x")`, but it is not OK to change the type of `x` from `int` to `long` or `int?`. * Returning a value of a more derived type for a property, field, return or `out` value Note, again, that the static type cannot change. e.g. it is OK to return a `string` instance where an `object` was returned previously, but it is not OK to change the return type from `object` to `string`. -✗ **Disallowed** +✗ **Disallowed** * Increasing the range of accepted values for a property or parameter if the member _is_ `virtual` This is breaking because any existing overridden members will now not function correctly for the extended range of values. @@ -135,7 +135,7 @@ Breaking Change Rules So long as it does not introduce any new abstract members or change the semantics or behavior of existing members, a type can be introduced into a hierarchy between two existing types. For example, between .NET Framework 1.1 and .NET Framework 2.0, we introduced `DbConnection` as a new base class for `SqlConnection` which previously derived from `Component`. * Adding an interface implementation to a type - + This is acceptable because it will not adversely affect existing clients. Any changes which could be made to the type being changed in this situation, will have to work within the boundaries of acceptable changes defined here, in order for the new implementation to remain acceptable. Extreme caution is urged when adding interfaces that directly affect the ability of the designer or serializer to generate code or data, that cannot be consumed down-level. An example is the `ISerializable` interface. Care should be taken when the interface (or one of the interfaces that this interface requires) has default interface implementations for other interface methods. The default implementation could conflict with other default implementations in a derived class. @@ -205,7 +205,7 @@ Breaking Change Rules * Adding an overload that precludes an existing overload, and defines different behavior - This will break existing clients that were bound to the previous overload. For example, if you have a class that has a single version of a method that accepts a `uint`, an existing consumer will + This will break existing clients that were bound to the previous overload. For example, if you have a class that has a single version of a method that accepts a `uint`, an existing consumer will successfully bind to that overload, if simply passing an `int` value. However, if you add an overload that accepts an `int`, recompiling or via late-binding the application will now bind to the new overload. If different behavior results, then this is a breaking change. * Moving an exposed field onto a class higher in the hierarchy tree of the type from which it was removed diff --git a/docs/coding-guidelines/breaking-changes.md b/docs/coding-guidelines/breaking-changes.md index 5e4798cf7d1b..78b787cb70b8 100644 --- a/docs/coding-guidelines/breaking-changes.md +++ b/docs/coding-guidelines/breaking-changes.md @@ -97,7 +97,7 @@ more latitude here in .NET Core. For buckets #2 and #3 we apply a risk-benefit analysis. It doesn't matter if the old behavior is "wrong", we still need to think through the implications. This -can result in one of the following outcomes: +can result in one of the following outcomes: * **Accepted with compat switch**. Depending on the estimated customer impact, we may decide to add a compat switch that allows consumers to bring back the diff --git a/docs/coding-guidelines/coding-style.md b/docs/coding-guidelines/coding-style.md index 6686ac7702df..1a0936bb56f5 100644 --- a/docs/coding-guidelines/coding-style.md +++ b/docs/coding-guidelines/coding-style.md @@ -24,7 +24,7 @@ The general rule we follow is "use Visual Studio defaults". 9. If a file happens to differ in style from these guidelines (e.g. private members are named `m_member` rather than `_member`), the existing style in that file takes precedence. 10. We only use `var` when it's obvious what the variable type is (e.g. `var stream = new FileStream(...)` not `var stream = OpenStandardInput()`). -11. We use language keywords instead of BCL types (e.g. `int, string, float` instead of `Int32, String, Single`, etc) for both type references as well as method calls (e.g. `int.Parse` instead of `Int32.Parse`). See issue [391](https://github.com/dotnet/corefx/issues/391) for examples. +11. We use language keywords instead of BCL types (e.g. `int, string, float` instead of `Int32, String, Single`, etc) for both type references as well as method calls (e.g. `int.Parse` instead of `Int32.Parse`). See issue [#13976](https://github.com/dotnet/runtime/issues/13976) for examples. 12. We use PascalCasing to name all our constant local variables and fields. The only exception is for interop code where the constant value should exactly match the name and value of the code you are calling via interop. 13. We use ```nameof(...)``` instead of ```"..."``` whenever possible and relevant. 14. Fields should be specified at the top within type declarations. diff --git a/docs/coding-guidelines/interop-guidelines.md b/docs/coding-guidelines/interop-guidelines.md index e6e3a1d32060..7dff0711a8d3 100644 --- a/docs/coding-guidelines/interop-guidelines.md +++ b/docs/coding-guidelines/interop-guidelines.md @@ -58,7 +58,7 @@ internal static partial class Interop ``` As shown above, platforms may be additive, in that an assembly may use functionality from multiple folders, e.g. System.IO.FileSystem's Linux build will use functionality both from Unix (common across all Unix systems) and from Linux (specific to Linux and not available across non-Linux Unix systems). -   + - Interop.*.cs files are created in a way such that every assembly consuming the file will need every DllImport it contains. - If multiple related DllImports will all be needed by every consumer, they may be declared in the same file, named for the functionality grouping, e.g. Interop.IOErrors.cs. - Otherwise, in the limit (and the expected case for most situations) each Interop.*.cs file will contain a single DllImport and associated interop types (e.g. the structs used with that signature) and helper wrappers, e.g. Interop.strerror.cs. @@ -104,7 +104,7 @@ internal static partial class Interop // contents of Common\src\Interop\Windows\ ``` (Note that this will likely result in some extra constants defined in each assembly that uses interop, which minimally violates one of the goals, but it's very minimal.) -   + - .csproj project files then include the interop code they need, e.g. ```XML @@ -170,10 +170,10 @@ To address this, we're moving to a model where all UNIX interop from dotnet/runt Guidelines for shim C++ API: -- Keep them as "thin"/1:1 as possible. - - We want to write the majority of code in C#. +- Keep them as "thin"/1:1 as possible. + - We want to write the majority of code in C#. - Never skip the shim and P/Invoke directly to the underlying platform API. It's easy to assume something is safe/guaranteed when it isn't. -- Don't cheat and take advantage of coincidental agreement between one flavor's ABI and the shim's ABI. +- Don't cheat and take advantage of coincidental agreement between one flavor's ABI and the shim's ABI. - Use PascalCase in a style closer to Win32 than libc. - If an export point has a 1:1 correspondence to the platform API, then name it after the platform API in PascalCase (e.g. stat -> Stat, fstat -> FStat). - If an export is not 1:1, then spell things out as we typically would in dotnet/runtime code (i.e. don't use abbreviations unless they come from the underlying API. diff --git a/docs/coding-guidelines/project-guidelines.md b/docs/coding-guidelines/project-guidelines.md index 36a43555afff..afc05f4037b2 100644 --- a/docs/coding-guidelines/project-guidelines.md +++ b/docs/coding-guidelines/project-guidelines.md @@ -1,41 +1,33 @@ # Build Project Guidelines -In order to work in dotnet/runtime repo you must first run build.cmd/sh from the root of the repo at least -once before you can iterate and work on a given library project. +In order to work in the dotnet/runtime repo you must first run build.cmd/sh from the root of the repo at least once before you can iterate and work on a given library project. ## Behind the scenes with build.cmd/sh -- Setup tools (currently done in restore in build.cmd/sh) +- Restore tools - Restore external dependencies - CoreCLR - Copy to `bin\runtime\$(BuildTargetFramework)-$(TargetOS)-$(Configuration)-$(TargetArchitecture)` - - Netstandard Library - Copy to `bin\ref\netstandard2.0` - - NetFx targeting pack - Copy to `bin\ref\net472` - Build targeting pack - Build src\libraries\ref.proj which builds all references assembly projects. For reference assembly project information see [ref](#ref) - Build product - Build src\libraries\src.proj which builds all the source library projects. For source library project information see [src](#src). -- Sign product - - Build src\sign.proj # Build Pivots Below is a list of all the various options we pivot the project builds on: -- **Target Frameworks:** NetFx (aka Desktop), netstandard (aka dotnet/Portable), NETCoreApp (aka .NET Core) -- **Platform Runtimes:** NetFx (aka CLR/Desktop), CoreCLR, Mono +- **Target Frameworks:** .NETFramework, .NETStandard, .NETCoreApp +- **Platform Runtimes:** .NETFramework (aka CLR/Desktop), CoreCLR, Mono - **OS:** Windows_NT, Linux, OSX, FreeBSD, AnyOS - **Flavor:** Debug, Release -- **Architecture:** x86, x64, arm, arm64, AnyCPU ## Individual build properties The following are the properties associated with each build pivot -- `$(BuildTargetFramework) -> netstandard2.1 | net5.0 | net472` +- `$(BuildTargetFramework) -> Any .NETCoreApp or .NETFramework TFM, e.g. net5.0` - `$(TargetOS) -> Windows | Linux | OSX | FreeBSD | [defaults to running OS when empty]` - `$(Configuration) -> Release | [defaults to Debug when empty]` - `$(TargetArchitecture) - x86 | x64 | arm | arm64 | [defaults to x64 when empty]` - `$(RuntimeOS) - win7 | osx10.10 | ubuntu.14.04 | [any other RID OS+version] | [defaults to running OS when empty]` See [RIDs](https://github.com/dotnet/runtime/tree/master/src/libraries/pkg/Microsoft.NETCore.Platforms) for more info. -For more information on various targets see also [.NET Standard](https://github.com/dotnet/standard/blob/master/docs/versions.md) - ## Aggregate build properties Each project will define a set of supported TargetFrameworks @@ -60,21 +52,8 @@ Pure netstandard configuration: All supported targets with unique windows/unix build for netcoreapp: ``` - $(NetCoreAppCurrent)-Windows_NT;$(NetCoreAppCurrent)-Unix;$(NetFrameworkCurrent)-Windows_NT - -``` - -### Placeholder Target Frameworks -Placeholder Target Framework can be added to the `` property to indicate the build system that the specific project is inbox in that framework and that Build Setting needs to be ignored. - -Placeholder target frameworks start with _ prefix. - -Example: -When we have a project that has a `netstandard2.0` target framework that means that this project is compatible with any build setting. So if we do a vertical build for `net472` this project will be built as part of the vertical because `net472` is compatible with `netstandard2.0`. This means that in the runtime and testhost binaries the netstandard2.0 implementation will be included, and we will test against those assets instead of testing against the framework inbox asset. In order to tell the build system to not include this project as part of the `net472` vertical we need to add a placeholder target framework: -``` + $(NetCoreAppCurrent)-Windows_NT;$(NetCoreAppCurrent)-Unix;net461-Windows_NT - netstandard2.0;_net472 - ``` ## Options for building @@ -94,7 +73,7 @@ When building an individual project the `BuildTargetFramework` and `TargetOS` wi ## Supported full build settings - .NET Core latest on current OS (default) -> `$(NetCoreAppCurrent)-[RunningOS]` -- .NET Framework latest -> `$(NetFrameworkCurrent)-Windows_NT` +- .NET Framework latest -> `net48-Windows_NT` # Library project guidelines @@ -152,44 +131,34 @@ Reference assemblies are required for any library that has more than one impleme In the ref directory for the library there should be at most **one** `.csproj` that contains the latest API for the reference assembly for the library. That project can contain multiple entries in its `TargetFrameworks` property. Ref projects should use `` for its dependencies. ### ref output -The output for the ref project build will be a flat targeting pack folder in the following directory: +All ref outputs should be under -`bin\ref\$(TargetFramework)` - -
//**CONSIDER**: Do we need a specific BuildTargetFramework version of TargetFramework for this output path to ensure all projects output to same targeting path? +`bin\$(MSBuildProjectName)\ref\$(TargetFramework)` ## src In the src directory for a library there should be only **one** `.csproj` file that contains any information necessary to build the library in various target frameworks. All supported target frameworks should be listed in the `TargetFrameworks` property. -All libraries should use `` for all their project references. That will cause them to be resolved against a targeting pack (i.e. `bin\ref\net5.0` or `\bin\ref\netstandard2.0`) based on the project target framework. There should not be any direct project references to other libraries. The only exception to that rule right now is for partial facades which directly reference System.Private.CoreLib and thus need to directly reference other partial facades to avoid type conflicts. -
//**CONSIDER**: just using Reference and use a reference to System.Private.CoreLib as a trigger to turn the other References into a ProjectReference automatically. That will allow us to have consistency where all projects just use Reference. +All libraries should use `` for all their references to libraries that compose the shared framework of the current .NETCoreApp. That will cause them to be resolved against the locally built targeting pack which is located at `artifacts\bin\microsoft.netcore.app.ref`. The only exception to that rule right now is for partial facades which directly reference System.Private.CoreLib and thus need to directly reference other partial facades to avoid type conflicts. -### src output -The output for the src product build will be a flat runtime folder into the following directory: +Other target frameworks than .NETCoreApp latest (i.e. `netstandard2.0`, `net461`, `netcoreapp3.0`) should use ProjectReference items to reference dependencies. -`bin\runtime\$(BuildSettings)` +### src output +All src outputs are under -Note: The `BuildSettings` is a global property and not the project setting because we need all projects to output to the same runtime directory no matter which compatible target framework we select and build the project with. -```$(BuildTargetFramework)-$(TargetOS)-(Configuration)-(TargetArchitecture)``` +`bin\$(MSBuildProjectName)\$(TargetFramework)` ## pkg In the pkg directory for the library there should be only **one** `.pkgproj` for the primary package for the library. If the library has platform-specific implementations those should be split into platform specific projects in a subfolder for each platform. (see [Package projects](./package-projects.md)) -TODO: Outline changes needed for pkgprojs - ## tests Similar to the src projects tests projects will define a `TargetFrameworks` property so they can list out the set of target frameworks they support. -Tests should not have any `` or `` items in their project because they will automatically reference everything in the targeting pack based on the TargetFramework they are building in. The only exception to this is a `` can be used to reference other test helper libraries or assets. - -In order to build and run a test project in a given build target framework a root level build.cmd/sh must have been completed for that build target framework first. Tests will run on the live built runtime at `bin\runtime\$(BuildSettings)`. -TODO: We need update our test host so that it can run from the shared runtime directory as well as resolve assemblies from the test output directory. +Tests don't need to reference default references which are part of the targeting packs (i.e. `mscorlib` on .NETFramework or `System.Runtime` on .NETCoreApp). Everything on top of targeting packs should be referenced via ProjectReference items for live built assets. ### tests output All test outputs should be under -`bin\tests\$(MSBuildProjectName)\$(TargetFramework)` or -`bin\tests\$(MSBuildProjectName)\netstandard2.0` +`bin\$(MSBuildProjectName)\$(TargetFramework)` ## Facades Facade are unique in that they don't have any code and instead are generated by finding a contract reference assembly with the matching identity and generating type forwards for all the types to where they live in the implementation assemblies (aka facade seeds). There are also partial facades which contain some type forwards as well as some code definitions. All the various build configurations should be contained in the one csproj file per library. diff --git a/docs/design/coreclr/botr/corelib.md b/docs/design/coreclr/botr/corelib.md index b23ddba8143a..288e4be0353b 100644 --- a/docs/design/coreclr/botr/corelib.md +++ b/docs/design/coreclr/botr/corelib.md @@ -201,7 +201,7 @@ Here's a real-world example from the `String` class: ```CSharp public partial sealed class String -{ +{ [MethodImpl(MethodImplOptions.InternalCall)] private extern string? IsInterned(); diff --git a/docs/design/coreclr/botr/method-descriptor.md b/docs/design/coreclr/botr/method-descriptor.md index 7d3f24ccf40f..453e850f1d7e 100644 --- a/docs/design/coreclr/botr/method-descriptor.md +++ b/docs/design/coreclr/botr/method-descriptor.md @@ -1,4 +1,4 @@ -Method Descriptor +Method Descriptor ================= Author: Jan Kotas ([@jkotas](https://github.com/jkotas)) - 2006 diff --git a/docs/design/coreclr/botr/readytorun-format.md b/docs/design/coreclr/botr/readytorun-format.md index 8db3423dbadf..ff48f53c16c3 100644 --- a/docs/design/coreclr/botr/readytorun-format.md +++ b/docs/design/coreclr/botr/readytorun-format.md @@ -9,7 +9,7 @@ Revisions: # Introduction This document describes ReadyToRun format 3.1 implemented in CoreCLR as of June 2019 and not yet -implemented proposed extensions 4.1 for the support of composite R2R file format. +implemented proposed extensions 4.1 for the support of composite R2R file format. **Composite R2R file format** has basically the same structure as the traditional R2R file format defined in earlier revisions except that the output file represents a larger number of input MSIL assemblies compiled together as a logical unit. @@ -320,8 +320,8 @@ basic encoding, with extended encoding for large values). ## ReadyToRunSectionType.RuntimeFunctions -This section contains sorted array of `RUNTIME_FUNCTION` entries that describe all code blocks in the image with pointers to their unwind info. -Despite the name, these code block might represent a method body, or it could be just a part of it (e.g. a funclet) that requires its own unwind data. +This section contains sorted array of `RUNTIME_FUNCTION` entries that describe all code blocks in the image with pointers to their unwind info. +Despite the name, these code block might represent a method body, or it could be just a part of it (e.g. a funclet) that requires its own unwind data. The standard Windows xdata/pdata format is used. ARM format is used for x86 to compensate for the lack of x86 unwind info standard. The unwind info blob is immediately followed by the GC info blob. The encoding slightly differs for amd64 diff --git a/docs/design/coreclr/botr/shared-generics.md b/docs/design/coreclr/botr/shared-generics.md index 6b23563ebf22..367d2caa6d8b 100644 --- a/docs/design/coreclr/botr/shared-generics.md +++ b/docs/design/coreclr/botr/shared-generics.md @@ -47,7 +47,7 @@ This feature is currently only supported for instantiations over reference types The dictionary used by any given generic method is pointed at by the `m_pPerInstInfo` field on the `InstantiatedMethodDesc` structure of that method. It's a direct pointer to the contents of the generic dictionary data. -On generic types, there's an extra level of indirection: the `m_pPerInstInfo` field on the `MethodTable` structure is a pointer to a table of dictionaries, and each entry in that table is a pointer to the actual generic dictionary data. This is because types have inheritance, and derived generic types inherit the dictionaries of their base types. +On generic types, there's an extra level of indirection: the `m_pPerInstInfo` field on the `MethodTable` structure is a pointer to a table of dictionaries, and each entry in that table is a pointer to the actual generic dictionary data. This is because types have inheritance, and derived generic types inherit the dictionaries of their base types. Here's an example: ```c# diff --git a/docs/design/coreclr/botr/type-system.md b/docs/design/coreclr/botr/type-system.md index dba3a0e22fa7..d111fd79f03f 100644 --- a/docs/design/coreclr/botr/type-system.md +++ b/docs/design/coreclr/botr/type-system.md @@ -1,4 +1,4 @@ -Type System Overview +Type System Overview ==================== Author: David Wrighton ([@davidwrighton](https://github.com/davidwrighton)) - 2010 diff --git a/docs/design/coreclr/botr/vectors-and-intrinsics.md b/docs/design/coreclr/botr/vectors-and-intrinsics.md index 0e0848950c2c..2688db9ca376 100644 --- a/docs/design/coreclr/botr/vectors-and-intrinsics.md +++ b/docs/design/coreclr/botr/vectors-and-intrinsics.md @@ -3,7 +3,7 @@ Vectors and Hardware Intrinsics Support --- # Introduction -The CoreCLR runtime has support for several varieties of hardware intrinsics, and various ways to compile code which uses them. This support varies by target processor, and the code produced depends on how the jit compiler is invoked. This document describes the various behaviors of intrinsics in the runtime, and concludes with implications for developers working on the runtime and libraries portions of the runtime. +The CoreCLR runtime has support for several varieties of hardware intrinsics, and various ways to compile code which uses them. This support varies by target processor, and the code produced depends on how the jit compiler is invoked. This document describes the various behaviors of intrinsics in the runtime, and concludes with implications for developers working on the runtime and libraries portions of the runtime. # Acronyms and definitions | Acronym | Definition @@ -44,8 +44,8 @@ There are 2 different implementations of AOT compilation under development at th ###Code written in System.Private.CoreLib.dll #### Crossgen implementation rules -- Any code which uses `Vector` will not be compiled AOT. (See code which throws a TypeLoadException using `IDS_EE_SIMD_NGEN_DISALLOWED`) -- Code which uses Sse and Sse2 platform hardware intrinsics is always generated as it would be at jit time. +- Any code which uses `Vector` will not be compiled AOT. (See code which throws a TypeLoadException using `IDS_EE_SIMD_NGEN_DISALLOWED`) +- Code which uses Sse and Sse2 platform hardware intrinsics is always generated as it would be at jit time. - Code which uses Sse3, Ssse3, Sse41, Sse42, Popcnt, Pclmulqdq, and Lzcnt instruction sets will be generated, but the associated IsSupported check will be a runtime check. See `FilterNamedIntrinsicMethodAttribs` for details on how this is done. - Code which uses other instruction sets will be generated as if the processor does not support that instruction set. (For instance, a usage of Avx2.IsSupported in CoreLib will generate native code where it unconditionally returns false, and then if and when tiered compilation occurs, the function may be rejitted and have code where the property returns true.) - Non-platform intrinsics which require more hardware support than the minimum supported hardware capability will not take advantage of that capability. In particular the code generated for `Vector2/3/4.Dot`, and `Math.Round`, and `MathF.Round`. See `FilterNamedIntrinsicMethodAttribs` for details. MethodImplOptions.AggressiveOptimization may be used to disable precompilation compilation of this sub-par code. @@ -58,8 +58,8 @@ The rules here provide the following characteristics. - AOT generated code which could take advantage of more advanced hardware support experiences a performance penalty until rejitted. (If a customer chooses to disable tiered compilation, then customer code may always run slowly). #### Code review rules for code written in System.Private.CoreLib.dll -- Any use of a platform intrinsic in the codebase MUST be wrapped with a call to the associated IsSupported property. This wrapping MUST be done within the same function that uses the hardware intrinsic, and MUST NOT be in a wrapper function unless it is one of the intrinsics that are enabled by default for crossgen compilation of System.Private.CoreLib (See list above in the implementation rules section). -- Within a single function that uses platform intrinsics, it must behave identically regardless of whether IsSupported returns true or not. This rule is required as code inside of an IsSupported check that calls a helper function cannot assume that the helper function will itself see its use of the same IsSupported check return true. This is due to the impact of tiered compilation on code execution within the process. +- Any use of a platform intrinsic in the codebase MUST be wrapped with a call to the associated IsSupported property. This wrapping MUST be done within the same function that uses the hardware intrinsic, and MUST NOT be in a wrapper function unless it is one of the intrinsics that are enabled by default for crossgen compilation of System.Private.CoreLib (See list above in the implementation rules section). +- Within a single function that uses platform intrinsics, it must behave identically regardless of whether IsSupported returns true or not. This rule is required as code inside of an IsSupported check that calls a helper function cannot assume that the helper function will itself see its use of the same IsSupported check return true. This is due to the impact of tiered compilation on code execution within the process. - Excessive use of intrinsics may cause startup performance problems due to additional jitting, or may not achieve desired performance characteristics due to suboptimal codegen. ACCEPTABLE Code @@ -130,7 +130,7 @@ public class BitOperations of this method may be compiled as if the Avx2 feature is not available, and is not reliably rejitted at the same time as the PopCount function. - As a special note, on the x86 and x64 platforms, this generally unsafe pattern may be used + As a special note, on the x86 and x64 platforms, this generally unsafe pattern may be used with the Sse, Sse2, Sse3, Sssse3, Ssse41 and Sse42 instruction sets as those instruction sets are treated specially by both crossgen1 and crossgen2 when compiling System.Private.CoreLib.dll. } @@ -140,16 +140,16 @@ public class BitOperations ### Code written in other assemblies (both first and third party) #### Crossgen implementation rules -- Any code which uses an intrinsic from the `System.Runtime.Intrinsics.Arm` or `System.Runtime.Intrinsics.X86` namespace will not be compiled AOT. (See code which throws a TypeLoadException using `IDS_EE_HWINTRINSIC_NGEN_DISALLOWED`) -- Any code which uses `Vector` will not be compiled AOT. (See code which throws a TypeLoadException using `IDS_EE_SIMD_NGEN_DISALLOWED`) -- Any code which uses `Vector64`, `Vector128` or `Vector256` will not be compiled AOT. (See code which throws a TypeLoadException using `IDS_EE_HWINTRINSIC_NGEN_DISALLOWED`) +- Any code which uses an intrinsic from the `System.Runtime.Intrinsics.Arm` or `System.Runtime.Intrinsics.X86` namespace will not be compiled AOT. (See code which throws a TypeLoadException using `IDS_EE_HWINTRINSIC_NGEN_DISALLOWED`) +- Any code which uses `Vector` will not be compiled AOT. (See code which throws a TypeLoadException using `IDS_EE_SIMD_NGEN_DISALLOWED`) +- Any code which uses `Vector64`, `Vector128` or `Vector256` will not be compiled AOT. (See code which throws a TypeLoadException using `IDS_EE_HWINTRINSIC_NGEN_DISALLOWED`) - Non-platform intrinsics which require more hardware support than the minimum supported hardware capability will not take advantage of that capability. In particular the code generated for Vector2/3/4 is sub-optimal. MethodImplOptions.AggressiveOptimization may be used to disable compilation of this sub-par code. #### Characteristics which result from rules The rules here provide the following characteristics. - Use of platform specific hardware intrinsics causes runtime jit and startup time concerns. - Use of `Vector` causes runtime jit and startup time concerns -- AOT generated code which could take advantage of more advanced hardware support experiences a performance penalty until rejitted. (If a customer chooses to disable tiered compilation, then customer code may always run slowly). +- AOT generated code which could take advantage of more advanced hardware support experiences a performance penalty until rejitted. (If a customer chooses to disable tiered compilation, then customer code may always run slowly). #### Code review rules for use of platform intrinsics - Any use of a platform intrinsic in the codebase SHOULD be wrapped with a call to the associated IsSupported property. This wrapping may be done within the same function that uses the hardware intrinsic, but this is not required as long as the programmer can control all entrypoints to a function that uses the hardware intrinsic. @@ -183,7 +183,7 @@ Since System.Private.CoreLib.dll is known to be code reviewed with the code revi # Mechanisms in the JIT to generate correct code to handle varied instruction set support -The JIT receives flags which instruct it on what instruction sets are valid to use, and has access to a new jit interface api `notifyInstructionSetUsage(isa, bool supportBehaviorRequired)`. +The JIT receives flags which instruct it on what instruction sets are valid to use, and has access to a new jit interface api `notifyInstructionSetUsage(isa, bool supportBehaviorRequired)`. The notifyInstructionSetUsage api is used to notify the AOT compiler infrastructure that the code may only execute if the runtime environment of the code is exactly the same as the boolean parameter indicates it should be. For instance, if `notifyInstructionSetUsage(Avx, false)` is used, then the code generated must not be used if the `Avx` instruction set is useable. Similarly `notifyInstructionSetUsage(Avx, true)` will indicate that the code may only be used if the `Avx` instruction set is available. diff --git a/docs/design/coreclr/botr/virtual-stub-dispatch.md b/docs/design/coreclr/botr/virtual-stub-dispatch.md index af80f0a84c96..bf67c5895bcb 100644 --- a/docs/design/coreclr/botr/virtual-stub-dispatch.md +++ b/docs/design/coreclr/botr/virtual-stub-dispatch.md @@ -91,7 +91,7 @@ The following is a small class structure (modeled in C#), and what the resulting ![Figure 1](images/virtualstubdispatch-fig1.png) -Thus, looking at this map, we see that the first column of the sub-maps of the slot maps correspond to the slot number in the classic virtual table view (remember that System.Object contributes four virtual methods of its own, which are omitted for clarity). Searches for method implementations are always bottom-up. Thus, if I had an object of type _B_ and I wished to invoke _I.Foo_, I would look for a mapping of _I.Foo_ starting at _B_'s slot map. Not finding it there, I would look in _A_'s slot map and find it there. It states that virtual slot 0 of _I_ (corresponding to _I.Foo_) is implemented by virtual slot 0. Then I return to _B_'s slot map and search for an implementation for slot 0, and find that it is implemented by slot 1 in its own implementation table. +Thus, looking at this map, we see that the first column of the sub-maps of the slot maps correspond to the slot number in the classic virtual table view (remember that System.Object contributes four virtual methods of its own, which are omitted for clarity). Searches for method implementations are always bottom-up. Thus, if I had an object of type _B_ and I wished to invoke _I.Foo_, I would look for a mapping of _I.Foo_ starting at _B_'s slot map. Not finding it there, I would look in _A_'s slot map and find it there. It states that virtual slot 0 of _I_ (corresponding to _I.Foo_) is implemented by virtual slot 4. Then I return to _B_'s slot map and search for an implementation for virtual slot 4, and find that it is implemented by slot 1 in its own implementation table. ### Additional Uses diff --git a/docs/design/coreclr/botr/xplat-minidump-generation.md b/docs/design/coreclr/botr/xplat-minidump-generation.md index f0836ae5c214..46fd6bdf3727 100644 --- a/docs/design/coreclr/botr/xplat-minidump-generation.md +++ b/docs/design/coreclr/botr/xplat-minidump-generation.md @@ -1,6 +1,6 @@ # Introduction # -Core dump generation on Linux and other non-Windows platforms has several challenges. Dumps can be very large and the default name/location of a dump is not consistent across all our supported platforms. The size of a full core dumps can be controlled somewhat with the "coredump_filter" file/flags but even with the smallest settings may be still too large and may not contain all the managed state needed for debugging. By default, some platforms use _core_ as the name and place the core dump in the current directory from where the program is launched; others add the _pid_ to the name. Configuring the core name and location requires superuser permission. Requiring superuser to make this consistent is not a satisfactory option. +Dump generation on Windows, Linux and other non-Windows platforms has several challenges. Dumps can be very large and the default name/location of a dump is not consistent across all our supported platforms. The size of a full core dumps can be controlled somewhat with the "coredump_filter" file/flags but even with the smallest settings may be still too large and may not contain all the managed state needed for debugging. By default, some platforms use _core_ as the name and place the core dump in the current directory from where the program is launched; others add the _pid_ to the name. Configuring the core name and location requires superuser permission. Requiring superuser to make this consistent is not a satisfactory option. Our goal is to generate core dumps that are on par with WER (Windows Error Reporting) crash dumps on any supported Linux platform. To the very least we want to enable the following: - automatic generation of minimal size minidumps. The quality and quantity of the information contained in the dump should be on par with the information contained in a traditional Windows mini-dump. @@ -12,7 +12,7 @@ Our solution at this time is to intercept any unhandled exception in the PAL lay We looked at the existing technologies like Breakpad and its derivatives (e.g.: an internal MS version called _msbreakpad_ from the SQL team....). Breakpad generates Windows minidumps but they are not compatible with existing tools like Windbg, etc. Msbreakpad even more so. There is a minidump to Linux core conversion utility but it seems like a wasted extra step. _Breakpad_ does allow the minidump to be generated in-process inside the signal handlers. It restricts the APIs to what was allowed in a "async" signal handler (like SIGSEGV) and has a small subset of the C++ runtime that was also similarly constrained. We also need to add the set of memory regions for the "managed" state which requires loading and using the _DAC_'s (*) enumerate memory interfaces. Loading modules is not allowed in an async signal handler but forking/execve is allowed so launching an utility that loads the _DAC_, enumerates the list of memory regions and writes the dump is the only reasonable option. It would also allow uploading the dump to a server too. -\* The _DAC_ is a special build of parts of the coreclr runtime that allows inspection of the runtime's managed state (stacks, variables, GC state heaps) out of context. One of the many interfaces it provides is [ICLRDataEnumMemoryRegions](https://github.com/dotnet/runtime/blob/master/src/coreclr/src/debug/daccess/dacimpl.h) which enumerates all the managed state a minidump would require to enable a fuitful debugging experience. +\* The _DAC_ is a special build of parts of the coreclr runtime that allows inspection of the runtime's managed state (stacks, variables, GC state heaps) out of context. One of the many interfaces it provides is [ICLRDataEnumMemoryRegions](https://github.com/dotnet/runtime/blob/master/src/coreclr/src/debug/daccess/dacimpl.h) which enumerates all the managed state a minidump would require to enable a fruitful debugging experience. _Breakpad_ could have still been used out of context in the generation utility but there seemed no value to their Windows-like minidump format when it would have to be converted to the native Linux core format away because in most scenarios using the platform tools like _lldb_ is necessary. It also adds a coreclr build dependency on Google's _Breakpad_ or SQL's _msbreakpad_ source repo. The only advantage is that the breakpad minidumps may be a little smaller because minidumps memory regions are byte granule and Linux core memory regions need to be page granule. @@ -42,7 +42,11 @@ There will be some differences gathering the crash information but these platfor ### OS X ### -Gathering the crash information on OS X will be quite a bit different than Linux and the core dump will be written in the Mach-O format instead of ELF. The OS X support currently has not been implemented. +As of .NET 5.0, createdump is supported on MacOS but instead of the MachO dump format, it generates the ELF coredumps. This is because of time constraints developing a MachO dump writer on the generation side and a MachO reader for the diagnostics tooling side (dotnet-dump and CLRMD). This means the native debuggers like gdb and lldb will not work with these dumps but the dotnet-dump tool will allow the managed state to be analyzed. Because of this behavior an additional environment variable will need to be set (COMPlus_DbgEnableElfDumpOnMacOS=1) along with the ones below in the Configuration/Policy section. + +### Windows ### + +As of .NET 5.0, createdump and the below configuration environment variables are supported on Windows. It is implemented using the Windows MiniDumpWriteDump API. This allows consistent crash/unhandled exception dumps across all of our platforms. # Configuration/Policy # @@ -75,14 +79,25 @@ The createdump utility can also be run from the command line on arbitrary .NET C `sudo createdump ` - createdump [options] pid - -f, --name - dump path and file name. The pid can be placed in the name with %d. The default is "/tmp/coredump.%d" - -n, --normal - create minidump (default). - -h, --withheap - create minidump with heap. + -f, --name - dump path and file name. The %p, %e, %h %t format characters are supported. The default is '/tmp/coredump.%p' + -n, --normal - create minidump. + -h, --withheap - create minidump with heap (default). -t, --triage - create triage minidump. -u, --full - create full core dump. -d, --diag - enable diagnostic messages. + +**Dump name formatting** + +As of .NET 5.0, the following subset of the core pattern (see [core](https://man7.org/linux/man-pages/man5/core.5.html)) dump name formatting is supported: + + %% A single % character. + %d PID of dumped process (for backwards createdump compatibility). + %p PID of dumped process. + %e The process executable filename. + %h Hostname return by gethostname(). + %t Time of dump, expressed as seconds since the Epoch, 1970-01-01 00:00:00 +0000 (UTC). + # Testing # The test plan is to modify the SOS tests in the (still) private debuggertests repo to trigger and use the core minidumps generated. Debugging managed core dumps on Linux is not supported by _mdbg_ at this time until we have a ELF core dump reader so only the SOS tests (which use _lldb_ on Linux) will be modified. diff --git a/docs/design/coreclr/jit/GuardedDevirtualization.md b/docs/design/coreclr/jit/GuardedDevirtualization.md index 95b3caa65364..84eeaa085f72 100644 --- a/docs/design/coreclr/jit/GuardedDevirtualization.md +++ b/docs/design/coreclr/jit/GuardedDevirtualization.md @@ -28,7 +28,7 @@ happens quite early (during importation) and there is only minimal ability to do data flow analysis at this stage. So for current devirtualization the source of the type information and the consumption must be fairly close in the code. A more detailed accounting of some of the shortcomings can be found in -[CoreCLR#9908](https://github.com/dotnet/coreclr/issues/9908). +[#7541](https://github.com/dotnet/runtime/issues/7541). Resolution of these issues will improve the ability of the JIT to devirtualize, but even the best analysis possible will still miss out on many cases. Some call @@ -277,7 +277,7 @@ setup, may be able to optimize away the null check, and opens the door for inlining. So it should be slightly cheaper on average and significantly cheaper in some cases. -(Note [CoreCLR#1422](https://github.com/dotnet/coreclr/issues/14222) indicates +(Note [#9027](https://github.com/dotnet/runtime/issues/9027) indicates we should be able to optimize away the null check in any case). If the guarded tests fails we've filtered out one method table the dispatch cell diff --git a/docs/design/coreclr/jit/JitOptimizerTodoAssessment.md b/docs/design/coreclr/jit/JitOptimizerTodoAssessment.md index 7799abdabf6c..1c1eec65a4b6 100644 --- a/docs/design/coreclr/jit/JitOptimizerTodoAssessment.md +++ b/docs/design/coreclr/jit/JitOptimizerTodoAssessment.md @@ -72,8 +72,8 @@ high priority. We haven't been targeting benchmarks that spend a lot of time doing computations in an inner loop. Pursuing loop optimizations for the peanut butter effect would seem odd. So this simply hasn't bubbled up in priority yet, though it's -bound to eventually. Obvious candidates include [IV widening](https://github.com/dotnet/coreclr/issues/9179), -[unrolling](https://github.com/dotnet/coreclr/issues/11606), load/store motion, +bound to eventually. Obvious candidates include [IV widening](https://github.com/dotnet/runtime/issues/7312), +[unrolling](https://github.com/dotnet/runtime/issues/8107), load/store motion, and strength reduction. @@ -112,7 +112,7 @@ handle SSA renames. We've made note of the prevalence of async/await in modern code (and particularly in web server code such as TechEmpower), and have some opportunities listed in -[#7914](https://github.com/dotnet/coreclr/issues/7914). Some sort of study of +[#6916](https://github.com/dotnet/runtime/issues/6916). Some sort of study of async peanut butter to find more opportunities is probably in order, but what would that look like? @@ -120,7 +120,7 @@ would that look like? ### If-Conversion (cmov formation) This hits big in microbenchmarks where it hits. There's some work in flight -on this (see [#7447](https://github.com/dotnet/coreclr/issues/7447) and +on this (see [#6749](https://github.com/dotnet/runtime/issues/6749) and [#10861](https://github.com/dotnet/coreclr/pull/10861)). @@ -149,7 +149,7 @@ helpers that are known not to trash them, but the information about which helpers trash which registers is spread across a few places in the codebase, and has some puzzling quirks like separate "GC" and "NoGC" kill sets for the same helper. Unifying the information sources and then refining the recorded -kill sets would help avoid more stack traffic. See [#12940](https://github.com/dotnet/coreclr/issues/12940). +kill sets would help avoid more stack traffic. See [#8605](https://github.com/dotnet/runtime/issues/8605). Low-Hanging Fruit ----------------- @@ -160,8 +160,8 @@ The MSIL `switch` instruction is actually encoded as a jump table, so (for better or worse) intelligent optimization of source-level switch statements largely falls to the MSIL generator (e.g. Roslyn), since encoding sparse switches as jump tables in MSIL would be impractical. That said, when the MSIL -has a switch of just a few cases (as in [#12868](https://github.com/dotnet/coreclr/issues/12868)), -or just a few distinct cases that can be efficiently checked (as in [#12477](https://github.com/dotnet/coreclr/issues/12477)), +has a switch of just a few cases (as in [#8573](https://github.com/dotnet/runtime/issues/8573)), +or just a few distinct cases that can be efficiently checked (as in [#8418](https://github.com/dotnet/runtime/issues/8418)), the JIT needn't blindly emit these as jump tables in the native code. Work is underway to address the latter case in [#12552](https://github.com/dotnet/coreclr/pull/12552). @@ -170,7 +170,7 @@ underway to address the latter case in [#12552](https://github.com/dotnet/corecl A number of suggestions have been made for having the JIT recognize certain patterns and emit specialized write barriers that avoid various overheads -- -see [#13006](https://github.com/dotnet/coreclr/issues/13006) and [#12812](https://github.com/dotnet/coreclr/issues/12812). +see [#8627](https://github.com/dotnet/runtime/issues/8627) and [#8547](https://github.com/dotnet/runtime/issues/8547). ### Byref-Exposed Store/Load Value Propagation @@ -178,8 +178,8 @@ see [#13006](https://github.com/dotnet/coreclr/issues/13006) and [#12812](https: There are a few tweaks to our value-numbering for byref-exposed loads and stores to share some of the machinery we use for heap loads and stores that would allow better propagation through byref-exposed locals and out parameters -- -see [#13457](https://github.com/dotnet/coreclr/issues/13457) and -[#13458](https://github.com/dotnet/coreclr/issues/13458). +see [#8767](https://github.com/dotnet/runtime/issues/8767) and +[#8768](https://github.com/dotnet/runtime/issues/8768). Miscellaneous ------------- @@ -199,4 +199,4 @@ Maybe it's worth reconsidering the priority based on throughput? RyuJIT has an implementation that handles the valuable cases (see [analysis](https://gist.github.com/JosephTremoulet/c1246b17ea2803e93e203b9969ee5a25#file-mulshift-md) and [follow-up](https://github.com/dotnet/coreclr/pull/13128) for details). The current implementation is split across Morph and CodeGen; ideally it would -be moved to Lower, which is tracked by [#13150](https://github.com/dotnet/coreclr/issues/13150). +be moved to Lower, which is tracked by [#8668](https://github.com/dotnet/runtime/issues/8668). diff --git a/docs/design/coreclr/jit/first-class-structs.md b/docs/design/coreclr/jit/first-class-structs.md index 2153b8cb96aa..e24596b653ec 100644 --- a/docs/design/coreclr/jit/first-class-structs.md +++ b/docs/design/coreclr/jit/first-class-structs.md @@ -213,7 +213,7 @@ This would be done in multiple phases: for a general idea of the kinds of VM changes that may be required. * Defer retyping of struct return types (`Compiler::impFixupStructReturnType()` and `Compiler::impFixupCallStructReturn()`) - * This is probably the "right" way to fix [#26491](https://github.com/dotnet/coreclr/issues/26491). + * This is probably the "right" way to fix [#13355](https://github.com/dotnet/runtime/issues/13355). * Next, eliminate the "pessimizations". * For cases where `GT_LCL_FLD` is currently used to "retype" the struct, change it to use *either* `GT_LCL_FLD`, if it is already address-taken, or to use a `GT_BITCAST` otherwise. @@ -230,7 +230,7 @@ This would be done in multiple phases: * The removal of each of these pessimizations should result in improved code generation in cases where previously disabled optimizations are now enabled. * Other ABI-related issues: - * [#8289](https://github.com/dotnet/coreclr/issues/8289) - code generation for x86 promoted struct args. + * [#7048](https://github.com/dotnet/runtime/issues/7048) - code generation for x86 promoted struct args. Related issues: #1133 (maybe), #4766, #23675, #23129 @@ -262,14 +262,14 @@ This would be enabled first by [Defer ABI-specific transformations to Lowering]( (#10019, #9594, #7313) * Support partial struct promotion when some fields are more frequently accessed. * Aggressively promote lclVar struct incoming or outgoing args or returns whose fields match the ABI requirements. - * This should address [\#26710](https://github.com/dotnet/coreclr/issues/26710). + * This should address [\#13417](https://github.com/dotnet/runtime/issues/13417). * Aggressively promote pointer-sized fields of structs used as args or returns * Allow struct promotion of locals that are passed or returned in a way that doesn't match the field types. * Investigate whether it would be useful to re-type single-field structs, rather than creating new lclVars. This would complicate type analysis when copied, passed or returned, but would avoid unnecessarily expanding the lclVar data structures. - * Allow promotion of 32-byte SIMD on 16-byte alignment [\#24368](https://github.com/dotnet/coreclr/issues/24368) + * Allow promotion of 32-byte SIMD on 16-byte alignment [\#12623](https://github.com/dotnet/runtime/issues/12623) * Related: #6839, #9477, #16887 * Also, #11888, which suggests adding a struct promotion stress mode. @@ -294,7 +294,7 @@ Struct-Related Issues in RyuJIT The following issues illustrate some of the motivation for improving the handling of value types (structs) in RyuJIT: -* [\#11407 [RyuJIT] Fully enregister structs that fit into a single register when profitable](https://github.com/dotnet/coreclr/issues/11407), also VSO Bug 98404: .NET JIT x86 - poor code generated for value type initialization +* [\#8016 [RyuJIT] Fully enregister structs that fit into a single register when profitable](https://github.com/dotnet/runtime/issues/8016), also VSO Bug 98404: .NET JIT x86 - poor code generated for value type initialization * This is a simple test case that should generate simply `xor eax; ret` on x86 and x64, but instead generates many unnecessary copies. It is addressed by full enregistration of structs that fit into a register. See [Support Full Enregistration of Struct Types](#support-full-enregistration-of-struct-types): @@ -304,7 +304,7 @@ struct foo { public byte b1, b2, b3, b4; } static foo getfoo() { return new foo(); } ``` -* [\#1133 JIT: Excessive copies when inlining](https://github.com/dotnet/coreclr/issues/1133) +* [\#4308 JIT: Excessive copies when inlining](https://github.com/dotnet/runtime/issues/4308) * The scenario given in this issue involves a struct that is larger than 8 bytes, so it is not impacted by the fixed-size types. However, by enabling value numbering and assertion propagation for struct types (which, in turn is made easier by using normal assignments), the @@ -314,54 +314,54 @@ static foo getfoo() { return new foo(); } in the first place. * This case may now be handled; needs verification -* [\#1161 RyuJIT properly optimizes structs with a single field if the field type is int but not if it is double](https://github.com/dotnet/coreclr/issues/1161) +* [\#4323 RyuJIT properly optimizes structs with a single field if the field type is int but not if it is double](https://github.com/dotnet/runtime/issues/4323) * This issue arises because we never promote a struct with a single double field, due to the fact that such a struct may be passed or returned in a general purpose register. This issue could be addressed independently, but should "fall out" of improved heuristics for when to promote and enregister structs. - * Related: [\#8828](https://github.com/dotnet/coreclr/issues/8828) + * Related: [\#7200](https://github.com/dotnet/runtime/issues/7200) * [\#1636 Add optimization to avoid copying a struct if passed by reference and there are no - writes to and no reads after passed to a callee](https://github.com/dotnet/coreclr/issues/1636). + writes to and no reads after passed to a callee](https://github.com/dotnet/runtime/issues/4524). * This issue is related to #1133, except that in this case the desire is to eliminate unneeded copies locally (i.e. not just due to inlining), in the case where the struct may or may not be passed or returned directly. * Unfortunately, there is not currently a scenario or test case for this issue. -* [\#19425 Unix: Unnecessary struct copy while passsing struct of size <=16](https://github.com/dotnet/coreclr/issues/19425) -* [\#16619 [RyuJIT] Eliminate unecessary copies when passing structs](https://github.com/dotnet/coreclr/issues/16619) +* [\#10879 Unix: Unnecessary struct copy while passsing struct of size <=16](https://github.com/dotnet/runtime/issues/10879) +* [\#9839 [RyuJIT] Eliminate unecessary copies when passing structs](https://github.com/dotnet/runtime/issues/9839) * These require changing both the callsite and the callee to avoid copying the parameter onto the stack. -* [\#3144 Avoid marking tmp as DoNotEnregister in tmp=GT_CALL() where call returns a - enregisterable struct in two return registers](https://github.com/dotnet/coreclr/issues/3144) +* [\#5112 Avoid marking tmp as DoNotEnregister in tmp=GT_CALL() where call returns a + enregisterable struct in two return registers](https://github.com/dotnet/runtime/issues/5112) * This issue could be addressed without First Class Structs. However, it should be done along with the streamlining of the handling of ABI-specific struct passing and return values. -* [\#4766 Pi-Digits: Extra Struct copies of BigInteger](https://github.com/dotnet/coreclr/issues/4766) +* [\#5785 Pi-Digits: Extra Struct copies of BigInteger](https://github.com/dotnet/runtime/issues/5785) * In addition to suffering from the same issue as #1133, this has a struct that is promoted even though it is passed (by reference) to its non-inlined constructor. This means that any copy to/from this struct will be field-by-field. -* [\#11816 Extra zeroing with structs and inlining](https://github.com/dotnet/coreclr/issues/11816) +* [\#8186 Extra zeroing with structs and inlining](https://github.com/dotnet/runtime/issues/8186) * This issue illustrates the failure of the JIT to eliminate zero-initialization of structs that are subsequently fully defined. It is a related but somewhat different manifestation of the issue in #1133, i.e. that structs are not fully supported in value numbering and optimization. -* [\#12865 JIT: inefficient codegen for calls returning 16-byte structs on Linux x64](https://github.com/dotnet/coreclr/issues/12865) +* [\#8571 JIT: inefficient codegen for calls returning 16-byte structs on Linux x64](https://github.com/dotnet/runtime/issues/8571) * This is related to #3144, and requires supporting the assignment of a multi-reg call return into a promoted local variable, and enabling subsequent elimination of any redundant copies. -* [\#22445](https://github.com/dotnet/coreclr/issues/22445) and [\#22319](https://github.com/dotnet/coreclr/issues/22319) +* [\#11992](https://github.com/dotnet/runtime/issues/11992) and [\#11940](https://github.com/dotnet/runtime/issues/11940) * These are both cases where we introduce a `GT_LCL_FLD` to retype a value that needs to be passed in a register. ## Other Struct-related Issues -* [\#17207](https://github.com/dotnet/coreclr/issues/17207) +* [\#10029](https://github.com/dotnet/runtime/issues/10029) * This suffers from pessimization due to poor handling of conversion (`Unsafe.As`) from `Quaternion` to `Vector4`. It's not immediately clear what's the best way to improve this. -* [#7740](https://github.com/dotnet/coreclr/issues/7740) +* [#6858](https://github.com/dotnet/runtime/issues/6858) * Addressing mode expression optimization for struct fields Sample IR diff --git a/docs/design/coreclr/jit/jit-call-morphing.md b/docs/design/coreclr/jit/jit-call-morphing.md index cf46fa674b14..5c99ac293912 100644 --- a/docs/design/coreclr/jit/jit-call-morphing.md +++ b/docs/design/coreclr/jit/jit-call-morphing.md @@ -17,7 +17,6 @@ post/pre increment, perhaps like this: `Foo(j, a[j++])`. Here `j` is updated vi when the second arg is evaluated, so the earlier uses of `j` would need to be evaluated and saved in a new LclVar. -  One simple approach would be to create new single definition, single use LclVars for every argument that is passed. This would preserve the evaluation order. However, it would potentially create hundreds of LclVar for moderately sized methods and that would overflow the limited number of @@ -25,7 +24,6 @@ tracked local variables in the JIT. One observation is that many arguments to m either constants or LclVars and can be set up anytime we want. They usually will not need a new LclVar to preserve the order of evaluation rule. -  Each argument is an arbitrary expression tree. The JIT tracks a summary of observable side-effects using a set of five bit flags in every GenTree node: `GTF_ASG`, `GTF_CALL`, `GTF_EXCEPT`, `GTF_GLOB_REF`, and `GTF_ORDER_SIDEEFF`. These flags are propagated up the tree so that the top node has a particular diff --git a/docs/design/coreclr/jit/lsra-detail.md b/docs/design/coreclr/jit/lsra-detail.md index 7a35d1947adf..a637b8ac6982 100644 --- a/docs/design/coreclr/jit/lsra-detail.md +++ b/docs/design/coreclr/jit/lsra-detail.md @@ -3,25 +3,54 @@ Linear Scan Register Allocation: Design and Implementation Notes Table of Contents ----------------- -[Overview](#overview) - -[Preconditions](#preconditions) - -[Post-Conditions](#post-conditions) - -[LSRA Phases](#lsra-phases) - -[Key Data Structures](#key-data-structures) - -[Dumps and Debugging Support](#dumps-and-debugging-support) - -[LSRA Stress Modes](#lsra-stress-modes) - -[Assertions & Validation](#assertions-validation) - -[Future Extensions and Enhancements](#future-extensions-and-enhancements) - -[References](#references) + * [Overview](#overview) + * [Preconditions](#preconditions) + + [Lowered IR Form (LIR)](#lowered-ir-form-lir) + + [Register Requirements](#register-requirements) + * [Post-Conditions](#post-conditions) + * [LSRA Phases](#lsra-phases) + + [Liveness and Candidate Identification](#liveness-and-candidate-identification) + + [Block Ordering](#block-ordering) + + [Building Intervals and RefPositions](#building-intervals-and-refpositions) + + [Register allocation (doLinearScan)](#register-allocation-dolinearscan) + * [Key Data Structures](#key-data-structures) + + [Live In](#live-in) + + [currentLiveVars](#currentlivevars) + + [Referenceable](#referenceable) + + [Interval](#interval) + + [RegRecord](#regrecord) + + [RefPosition](#refposition) + + [GenTree Nodes](#gentree-nodes) + + [VarToRegMap](#vartoregmap) + * [Dumps and Debugging Support](#dumps-and-debugging-support) + * [LSRA Stress Modes](#lsra-stress-modes) + * [Assertions & Validation](#assertions--validation) + * [Future Extensions and Enhancements](#future-extensions-and-enhancements) + * [Feature Enhancements](#feature-enhancements) + + [Support for Allocating Consecutive Registers](#support-for-allocating-consecutive-registers) + * [Code Quality Enhancements](#code-quality-enhancements) + + [Merge Allocation of Free and Busy Registers](#merge-allocation-of-free-and-busy-registers) + + [Auto-tuning of register selection](#auto-tuning-of-register-selection) + + [Pre-allocating high frequency lclVars](#pre-allocating-high-frequency-lclvars) + + [Avoid Splitting Loop Backedges](#avoid-splitting-loop-backedges) + + [Enable EHWriteThru by default](#enable-ehwritethru-by-default) + + [Avoid Spill When Stack Copy is Valid](#avoid-spill-when-stack-copy-is-valid) + + [Rematerialization](#rematerialization) + + [Improving Reg-Optional Support](#improving-reg-optional-support) + - [Reg-Optional Defs](#reg-optional-defs) + - [Don't Pre-determine Reg-Optional Operand](#dont-pre-determine-reg-optional-operand) + - [Don't Mark DelayFree for Duplicate Operands](#dont-mark-delayfree-for-duplicate-operands) + + [Improving Preferencing](#improving-preferencing) + + [Leveraging SSA form](#leveraging-ssa-form) + + [Spanning trees for physical registers](#spanning-trees-for-physical-registers) + + [Improve the handling of def/use conflicts](#improve-the-handling-of-defuse-conflicts) + * [Throughput Enhancements](#throughput-enhancements) + + [Allocation Window for Min-Opts and Tier 0](#allocation-window-for-min-opts-and-tier-0) + + [Distinguish Intra-Block versus Inter-Block Variables](#distinguish-intra-block-versus-inter-block-variables) + + [Improve the VarToRegMap](#improve-the-vartoregmap) + + [Other Throughput Investigations](#other-throughput-investigations) + * [Test and Cleanup Issues](#test-and-cleanup-issues) + * [References](#references) Overview -------- @@ -63,7 +92,7 @@ There are four main phases to LSRA: - Note that the order doesn't affect correctness, as the location of `lclVar`s across block boundaries is fixed up - as necessary by the resolution phase. When not optimizing + as necessary by the resolution phase. When not optimizing, `lclVar`s are not enregistered, so there is no benefit to using a different order. @@ -98,6 +127,12 @@ There are four main phases to LSRA: as both a source and target (where the source is not marked `delayRegFree`. + - An exception is multi-reg local stores of multi-reg sources. + For these, the code generator will read each source register, + and then move it, if needed, to the destination register. + These nodes have 2*N locations where N is the number of registers, + so that the liveness can be reflected accordingly. + - For each node, `RefPosition`s are built to reflect the uses, definitions and kills of any registers involved in the evaluation of the node. @@ -119,7 +154,8 @@ There are four main phases to LSRA: - Splitting or spilling an `Interval` doesn't involve creating a new one. Instead, the `RefPosition` simply gets a new assignment, and is either marked for reload/copy or its location is simply - updated in the incoming map. + updated in the incoming map. This differs from other linear-scan + allocators, where separate intervals are constructed for this case. - The resolution phase has two parts: @@ -246,11 +282,11 @@ Post-Conditions After LSRA, the graph has the following properties: -- The `gtRegNum` of each tree node contains the allocated register, +- The `_gtRegNum` of each tree node (`GetRegNum()`) contains the allocated register, if any. Nodes that produce multiple registers are similarly assigned, via extended register number fields. If the node does not produce a value directly (i.e. it is either of void type, or it is - evaluated as part of its parent) its gtRegNum is set to REG_NA. + evaluated as part of its parent) its `_gtRegNum` is set to `REG_NA`. - In most cases, this register must satisfy the constraints specified for each `RefPosition` by the `BuildNode` methods. @@ -273,20 +309,24 @@ After LSRA, the graph has the following properties: - However, if such a node is constrained to a set of registers, and its current location does not satisfy that requirement, LSRA - must insert a `GT_COPY` node between the node and its parent.  - The gtRegNum on the `GT_COPY` node must satisfy the register + must insert a `GT_COPY` node between the node and its parent. + The `_gtRegNum` on the `GT_COPY` node must satisfy the register requirement of the parent. -- GenTree::gtRsvdRegs has a set of registers used for internal temps. +- `GenTree::gtRsvdRegs` has a set of registers used for internal temps. These must satisfy the constraints specified by the associated `RefPosition`s. - A tree node is marked `GTF_SPILL` if the tree node must be spilled by the code generator after it has been evaluated. + - Note that a write-thru variable def is always written to the stack, and the `GTF_SPILLED` + flag (not otherwise used for pure defs) to indicate that it also remains live + in the assigned register. + - A tree node is marked `GTF_SPILLED` if it is a lclVar that must be reloaded prior to use. - - The register (gtRegNum) on the node indicates the register to + - The register (`_gtRegNum`) on the node indicates the register to which it must be reloaded. - For lclVar nodes, since the uses and defs are distinct tree @@ -299,18 +339,23 @@ After LSRA, the graph has the following properties: insert a `GT_RELOAD` node to specify the register to which it should be reloaded. +- Note that `GT_COPY` and `GT_RELOAD` nodes are inserted immediately after the + instruction that must be copied or reloaded. However, the reload or copy + isn't actually generated until the code generator is generating code for + the consuming node. + - Local variable table (`LclVarDsc`): - `LclVarDsc::lvRegister` is set to true if a local variable has the same register assignment for its entire lifetime. - - `LclVarDsc::lvRegNum` is initialized to its initial register + - `LclVarDsc::_lvRegNum` is initialized to its initial register assignment. - For incoming parameters, this is the register to which `genFnPrologCalleeRegArgs()` will move it. - - Codegen will set `lvRegNum` to its current value as it processes + - Codegen will set `_lvRegNum` to its current value as it processes the trees, since a variable can be assigned different registers over its lifetimes. @@ -347,7 +392,7 @@ well as supporting components) in more depth. should probably handle them in `Compiler::lvaMarkLocalVars()` when it is called after `Lowering`. - - It sets the ` lvLRACandidate` flag on lclVars that are going + - It sets the `lvLRACandidate` flag on lclVars that are going to be register candidates. ### Block Ordering @@ -364,6 +409,8 @@ that satisfies the following properties: - We use block weight, since edge weight is not tracked in the JIT. +- Blocks that enter EH regions have no predecessor. All live-in vars are on the stack. + The order of the `BasicBlock`s is captured in the `blockSequence` member of `LinearScan`. Other implementations of linear scan register allocation aim to ensure @@ -389,7 +436,8 @@ critical edges. This also captured in the `LsraBlockInfo` and is used by the res `Interval`s are built for lclVars up-front. These are maintained in an array, `localVarIntervals` which is indexed by the `lvVarIndex` (not the `varNum`, since we never allocate registers for non-tracked lclVars). Other intervals (for tree temps and -internal registers) are constructed as the relevant node is encountered. +internal registers) are constructed as the relevant node is encountered. Intervals for +`lclVar`s that are live into an exception region are marked `isWriteThru`. The building of `RefPosition`s is done via a traversal of the nodes, using the `blockSequence` constructed as described above. This traversal invokes `LinearScan::BuildNode()` for each @@ -412,6 +460,7 @@ node, which builds `RefPositions` according to the liveness model described abov - A contained memory operand or addressing mode will cause `RefPosition`s to be created for any (non-contained) base or index registers. + - A single `RefPosition` is created for non-contained nodes. In order to build these uses, we need to find the `Interval` associated with the @@ -420,7 +469,8 @@ node, which builds `RefPositions` according to the liveness model described abov have not yet seen the use. This is a simple list on the assumption that the distance between defs and uses of tree temps is rarely very great. - For x86 and x64, when we have an instruction that will overwrite one of its sources, + When we have an instruction that will overwrite one of its sources, such as RMW + operands common on x86 and x64, we need to ensure that the other source isn't given the same register as the target. For this, we annotate the use `RefPosition` with `delayRegFree`. @@ -428,12 +478,15 @@ node, which builds `RefPositions` according to the liveness model described abov This is cleared before the next instruction is handled. - Next, any registers in the kill set for the instruction are killed. This is performed - by `buildKillPositionsForNode()`, which takes a kill mask that is generally provided - by a `getKillSetForXXX()` method. + by `buildKillPositionsForNode()`, which takes a kill mask that is node-specific and + either provided directly by the `buildXXX()` method for the node, or by a `getKillSetForXXX()` + method. There is a debug-only method, `getKillSetForNode()` which is only used for validation. - Finally, we create `RefTypeDef` `RefPositions` for any registers that are defined by the node. + - For a `STORE_LCL_VAR` of a write-thru `lclVar`, the `RefPosition` is marked `writeThru`. + - A `RefTypeBB` `RefPosition` marks the beginning of a block, at which the incoming live variables are set to their locations at the end of the selected predecessor. @@ -462,13 +515,7 @@ During this phase, preferences are set: (at a previous definition) been assigned a register, and we want to try to use that register again, as well as the case where it has yet to be assigned a register. - This area has room for improvement: - - - A specific case that could be improved is [Issue #25312](https://github.com/dotnet/coreclr/issues/25312) - which involves preferencing for HW intrinsics. - - - Issue [#22374](https://github.com/dotnet/coreclr/issues/22374) also has a pointer - to some methods that could benefit from improved preferencing. + This area has room for improvement, (see [Improving Preferencing](#improving-preferencing)). - Register preferences are set: @@ -528,7 +575,7 @@ LinearScanAllocation(List refPositions) - Currently, parameters may not be allocated a register if their weighted reference count is less than `BB_UNITY_WEIGHT`, however plenty of room remains for improving the allocation of - parameters [Issue \#11356](https://github.com/dotnet/coreclr/issues/11356) + parameters [Issue \#7999](https://github.com/dotnet/runtime/issues/7999) - `TryAllocateFreeReg()` iterates over the registers, attempting to find the best free register (if any) to allocate: @@ -547,17 +594,21 @@ LinearScanAllocation(List refPositions) which is not currently live, but which previously occupied that register). + - Currently it doesn't take encoding size into account. + [Issue \#7996](https://github.com/dotnet/runtime/issues/7996) + tracks this. + - It always uses the same order for iterating over the registers. The jit32 register allocator used a different ordering for tree temps than for lclVars. It's unclear if this matters for LSRA, - but [Issue \#11357](https://github.com/dotnet/coreclr/issues/11357) + but [Issue \#8000](https://github.com/dotnet/runtime/issues/8000) tracks this question. - `AllocateBusyReg()` iterates over all the registers trying to find the best register to spill (it must only be called if `tryAllocateFreeReg()` was unable to find one): - - It takes into account the following: + - It takes into account a number of heuristics including: - The distance to the next use of the `Interval` being spilled @@ -567,16 +618,15 @@ LinearScanAllocation(List refPositions) - Whether the `RefPosition` being allocated, or the one potentially being spilled, is reg-optional + - Both `tryAllocateFreeReg()` and `allocateBusyReg()` currently fully evaluate the "goodness" + of each register. + - It will always spill an `Interval` either at its most recent use, or at the entry to the current block. - - Issues [\#7609](https://github.com/dotnet/coreclr/issues/7609) and - [\#7665](https://github.com/dotnet/coreclr/issues/7665) track improvement of spill - placement. - - - It is quite possible that combining `TryAllocateFreeReg()` and + - It is quite likely that combining `TryAllocateFreeReg()` and `AllocateBusyReg()` would be more effective, see - [Merge Allocation of Free and Busy Registers](#combine) + [Merge Allocation of Free and Busy Registers](#merge-allocation-of-free-and-busy-registers) - Resolution @@ -599,18 +649,10 @@ LinearScanAllocation(List refPositions) - Resolution of exception edges - - This is currently done by ensuring that any variable that's - live in to an exception region is maintained on stack. - - - Issue \#6001 raises the performance issue due to this - implementation. - - - Work is in-progress to support the notion - of "write-thru" variables; for these, all definitions - would write to memory, but uses could use a register - value, if available. + - When `COMPlus_EnableEHWriteThru == 0`, any variable that's + live in to an exception region is always referenced on the stack. - - The value is reloaded at exception boundaries. + - See [Enable EHWriteThru by default](#enable-EHWriteThru-by-default). - Code generation (genGenerateCode) @@ -699,7 +741,7 @@ fixed registers. The representation of `TYP_DOUBLE` registers on 32-bit `Arm` is complicated by the fact that they overlap two `TYP_FLOAT` registers. The handling of this -case could be improved. +case could be improved. See [Support for Allocating Consecutive Registers](#Support-for-Allocating-Consecutive-Registers). ### RefPosition @@ -727,14 +769,11 @@ enregisterable variable or temporary or physical register. It contains - `RefTypeUse` is a pure use of an `Interval` . - `RefTypeKill` is a location at which a physical register is - killed. These only exist on `RegRecord`s, not on `Interval`s - - - Note that this type is probably not needed -- see especially - notes about physical registers in "future" section. + killed. These only exist on `RegRecord`s, not on `Interval`s - `RefTypeBB` is really just a marker in the list of `RefPosition`s, - where the register allocator needs to record the register - locations at block boundaries. It is not associated with an + where the register allocator needs to record the register + locations at block boundaries. It is not associated with an `Interval` or `RegRecord`. - `RefTypeFixedReg` is a `RefPosition` on a `RegRecord` that marks a @@ -807,7 +846,7 @@ The following dumps and debugging modes are provided: - For each incoming arg: its type and incoming location - For each instruction: - - The current contents of the `OperandToLocationInfoMap`. + - The current contents of the `defList`. This corresponds to all the nodes that have defined values that have not yet been consumed. - An abbreviated dump of the GenTree node. @@ -822,8 +861,8 @@ The following dumps and debugging modes are provided: progresses. - After allocation - - A dump of `RefPosition`s, sequentially, and grouped for Var - `Interval` s + - A dump of `RefPosition`s, sequentially, and grouped for `lclVar` + `Interval`s - During resolution - A list of the candidates for resolution at each split, join or @@ -895,18 +934,15 @@ exclusive: - Never allocate a register for a `RefPosition` marked `regOptional` (0x1000). -It may be useful to also have a stress mode that deliberately trashes registers that -are not currently occupied (e.g. at block boundaries). Issue [#18944](https://github.com/dotnet/coreclr/issues/18944). - Assertions & Validation ----------------------- There are many assertions in `LinearScan`. The following are the most effective at identifying issues (i.e. they frequently show up in bugs): -- The node information isn't showing the number of consumed registers - that are expected: - - `assert((consume == 0) || (ComputeAvailableSrcCount(tree) == consume));` +- The def and use counts don't match what's expected: + - See the asserts at the end of the `LinearScan::BuildNode()` method (these are + architecture-specific, and can be found in lsraxarch.cpp, lsraarm64.cpp and lsraarm.cpp). - This usually means that the `BuildXXX` method for this node is not building `RefPosition`s for all of its uses (which is what `consume` has been set to). - The liveness information is incorrect. This assert comes from `LinearScan::checkLastUses()` which @@ -918,19 +954,26 @@ effective at identifying issues (i.e. they frequently show up in bugs): possibly because it is a stress mode, and the instruction hasn't correctly specified its minimum number of registers. -At the end of write-back (`resolveRegisters()`), `verifyFinalAllocation()` runs. It doesn't do a lot of validation, but it - prints the final allocation (including final spill placement), so is useful for tracking down correctness issues. +At the end of write-back (`resolveRegisters()`), `verifyFinalAllocation()` runs. It doesn't do a +lot of validation, but it prints the final allocation (including final spill placement), so is +useful for tracking down correctness issues. Future Extensions and Enhancements ---------------------------------- -The potential enhancements to the JIT, some of which are referenced in this document, can generally be found by [searching for LSRA in open issues](https://github.com/dotnet/coreclr/issues?utf8=%E2%9C%93&q=is%3Aissue+is%3Aopen+LSRA+in%3Atitle). The ones that are focused on JIT throughput are labeled `JITThroughput`. +The potential enhancements to the JIT, some of which are referenced in this document, can generally be found by [searching for LSRA in open issues](https://github.com/dotnet/runtime/issues?q=is%3Aissue+is%3Aopen+lsra+in%3Atitle). The ones that are focused on JIT throughput are labeled `JITThroughput`. + +## Feature Enhancements + +### Support for Allocating Consecutive Registers + +This is [\#39457](https://github.com/dotnet/runtime/issues/39457). As described there, the challenge is to do this without impacting the common path. This should also include cleaning up the allocating of consecutive registers for `TYP_DOUBLE` for Arm32 [\#8758](https://github.com/dotnet/runtime/issues/8758). ## Code Quality Enhancements -### Merge Allocation of Free and Busy Registers +### Merge Allocation of Free and Busy Registers -This is captured as [\#15408](https://github.com/dotnet/coreclr/issues/15408) +This is captured as [\#9399](https://github.com/dotnet/runtime/issues/9399) Consider merging allocating free & busy regs. Currently the register allocator will always allocate an available register, even if it only meets @@ -943,31 +986,34 @@ The alternative approach under consideration is to combine free and busy registe (`tryAllocateFreeReg()` and `allocateBusyReg()`) such that a busy register will be spilled if there are no suitable free registers, and the current `Interval` has greater weight than the `Interval` occupying the register. This must be accompanied by some improvements in the efficiency of the -checks, so as not to degrade throughput. This is currently a work-in-progress (https://github.com/CarolEidt/coreclr/tree/CombineAlloc) but hasn't yet been ported to the runtime repo, and needs +checks, so as not to degrade throughput. This is currently a work-in-progress (https://github.com/CarolEidt/runtime/tree/CombineAlloc), and needs further work to eliminate diffs and improve throughput. This would make it possible to spill a register for a higher weight `lclVar` rather than "settling" for a register that's a poor fit for its requirements. This is probably the best approach to -address Issues [\#7664](https://github.com/dotnet/coreclr/issues/7664) +address Issues [\#6824](https://github.com/dotnet/runtime/issues/6824): Heuristics for callee saved reg allocation and -[\#13735](https://github.com/dotnet/coreclr/issues/13735) +[\#8846](https://github.com/dotnet/runtime/issues/8846): Let variables within a loop use register first. The following issues are related: -* Both `tryAllocateFreeReg()` and `allocateBusyReg()` currently fully evaluate the "goodness" of each register. - Issue [\#7301](https://github.com/dotnet/coreclr/issues/7301) tracks the possibility of short-circuiting - this evaluation. - Making such an improvement should probably be done in conjunction with this work. +- Issues [\#6806](https://github.com/dotnet/runtime/issues/6806) and + [\#6825](https://github.com/dotnet/runtime/issues/6825) track improvement of spill + placement. + +- Issue [\#6705](https://github.com/dotnet/runtime/issues/6705) tracks the possibility of + short-circuiting this evaluation. + Making such an improvement should probably be done in conjunction with this work. -* Issue [\#26847](https://github.com/dotnet/coreclr/issues/26847) - Heuristics for callee saved reg allocation. +- Issue [\#13466](https://github.com/dotnet/runtime/issues/13466): + Inefficient register allocation in simple method which dereferences a span ### Auto-tuning of register selection This is not yet captured as a GitHub issue. -This would be best done after [Merge Allocation of Free and Busy Registers](#combine). +This would be best done after [Merge Allocation of Free and Busy Registers](#merge-allocation-of-free-and-busy-registers). The idea would be to add support to change the weight given to the various selection heuristics according to a configuration specification, allowing them to be auto-tuned. @@ -980,7 +1026,7 @@ would be added as an alternate path in the register allocator, leaving the defau ### Pre-allocating high frequency lclVars -This is captured as Issue [\#11424](https://github.com/dotnet/coreclr/issues/11424) +This is captured as Issue [\#8019](https://github.com/dotnet/runtime/issues/8019) Consider pre-allocating high-frequency lclVars. The idea here is to ensure that high frequency lclVars aren't forced to use less-than-optimal @@ -1015,7 +1061,7 @@ One strategy would be to do something along the lines of (appropriate hand-wavin ### Avoid Splitting Loop Backedges -This is captured as Issue [\#16857](https://github.com/dotnet/coreclr/issues/16857). +This is captured as Issue [\#9909](https://github.com/dotnet/runtime/issues/9909). When the register allocator performs resolution across block boundaries, it may split critical edges (edges from a block with multiple successors to a block with multiple predecessors). @@ -1029,16 +1075,43 @@ set would ensure that the variable locations match. This would eliminate not jus but all the extra branches currently inserted for resolution. It remains to be seen whether this would outweigh the impact of cases where more resolution moves would be required. +I have an old experimental branch where I started working on this: https://github.com/CarolEidt/coreclr/tree/NoEdgeSplitting (not yet ported to the runtime repo). + +### Enable EHWriteThru by default + +When `COMPlus_EnableEHWriteThru` is set, some performance regressions are observed. When an EH write-thru variable (i.e. one that is live into an exception region) is defined, its value is +always stored, in addition to potentially remaining live in a register. This increases register +pressure which may result in worse code. + +Further investigation is needed, but the following mitigations may be effective (here the +term "EH Var" means a `lclVar` marked `lvLiveInOutOfHndlr`): + +- Adjust the heuristics: + + 1. For determining whether an EH var should be a candidate for register allocation, + e.g. if the defs outweight the uses. + + 2. For determining when a definition of an EH var should be only stored to the stack, + rather than also remaining live in the register. + +- If the weight of the defs exceeds the weight of the blocks with successors in exception + regions, consider spilling the `lclVar` to the stack only at those boundaries. + +The original issue to enable EH WriteThru is [#6212](https://github.com/dotnet/runtime/issues/6212). +It remains open pending the resolution of the performance regressions. + ### Avoid Spill When Stack Copy is Valid The idea here is to avoid spilling at a use if the value on the stack is already the correct value. -Issues that this might address include [\#11344] Spill single-def vars at def, [\#7665] Improve spill placement, -and [\#7465] Avoiding reg spill to memory when reg-value is consistent with memory. +Issues that this might address include: +- [\#7994](https://github.com/dotnet/runtime/issues/7994) Spill single-def vars at def, +- [\#6825](https://github.com/dotnet/runtime/issues/6825) Improve spill placement, and +- [\#6761](https://github.com/dotnet/runtime/issues/6761) Avoiding reg spill to memory when reg-value is consistent with memory. Currently the register allocator doesn't track whether a local variable has the same value on the stack -as in a register. The work-in-progress to support "write-thru" EH variables (variables live across exception -boundaries) adds capability to liveness analysis and code generation (in addition to the register allocator) +as in a register. The support for "write-thru" EH variables (variables live across exception +boundaries) has added the capability to liveness analysis and code generation (in addition to the register allocator) to handle variables that are live in both registers and on the stack. This support could be further leveraged to avoid spilling single-def variables to memory if they have already been spilled at their definition. @@ -1047,19 +1120,29 @@ Extending such support to more generally track whether there is already a valid work. Fully general support would require such information at block boundaries, but it might be worth investigating whether it would be worthwhile and cheaper to simply track this information within a block. -### Support Reg-Optional Defs +### Rematerialization + +This would involve identifying `Interval`s whose values are cheaper to recompute than to spill +and reload. Without SSA form, this would probably be easiest to do when there's a single def. +Issue [\#6264](https://github.com/dotnet/runtime/issues/6264). + +### Improving Reg-Optional Support + +#### Reg-Optional Defs -Issues [\#7752](https://github.com/dotnet/coreclr/issues/7752) and -[\#7753](https://github.com/dotnet/coreclr/issues/7753) track the +Issues [\#6862](https://github.com/dotnet/runtime/issues/6862) and +[\#6863](https://github.com/dotnet/runtime/issues/6863) track the proposal to support "folding" of operations using a tree temp when the defining operation supports read-modify-write (RMW) to memory. This involves supporting the possibility of a def being reg-optional, as well as its use, so that it need never occupy a register. -### Don't Pre-determine Reg-Optional Operand +I have an old experimental branch: https://github.com/CarolEidt/coreclr/tree/RegOptDef where I started working on this. -Issue [\#6361](https://github.com/dotnet/coreclr/issues/6361) +#### Don't Pre-determine Reg-Optional Operand + +Issue [\#6358](https://github.com/dotnet/runtime/issues/6358) tracks the problem that `Lowering` currently has to select a single operand to be reg-optional, even if either operand could be. This requires some additional state because @@ -1067,6 +1150,18 @@ LSRA can't easily navigate from one use to the other to communicate whether the first operand has been assigned a register. +#### Don't Mark DelayFree for Duplicate Operands + +Issue [\#9896](https://github.com/dotnet/runtime/issues/9896). + +### Improving Preferencing + +- Issue [#12945](https://github.com/dotnet/runtime/issues/12945) + involves preferencing for HW intrinsics. + +- Issue [#11959](https://github.com/dotnet/runtime/issues/11959) also has a pointer + to some methods that could benefit from improved preferencing. + ### Leveraging SSA form This has not yet been opened as a github issue. @@ -1083,7 +1178,7 @@ Making SSA form available to LSRA would: This has not yet been opened as a github issue. LLVM has extended their linear scan register allocator with something it -calls "Greedy Register Allocation". This uses a priority queue for the +calls "Greedy Register Allocation"[[6](#6),[7](#7)]. This uses a priority queue for the order of allocation (sorted by decreasing spill cost), and a B+ tree to represent each physical register. I think that using the B+ trees for physical registers would be an improvement over the current PhysRegs, @@ -1091,6 +1186,13 @@ and we may want to experiment with changing the allocation order as well. It would not be necessary to significantly modify the process of creating `Interval`s, nor the resolution phase. +### Improve the handling of def/use conflicts + +Def/use conflicts arise when the producing and conusming nodes each have register requirements, +and they conflict. The current mechanism, in which the register assignment of one of the +`RefPosition`s is changed, can lead to problems because there's then +no associated `RefTypeFixedReg` for that reference. This is Issue [\#10196](https://github.com/dotnet/runtime/issues/10196). + ## Throughput Enhancements ### Allocation Window for Min-Opts and Tier 0 @@ -1111,48 +1213,75 @@ form of a `defList` that holds all of the tree temp values that have been define Once this is empty, the register allocator could process the current list of `RefPosition`s and then start over. +[Issue \#6690](https://github.com/dotnet/runtime/issues/6690) proposes to build `RefPositions` incrementally, which is part of this item. + ### Distinguish Intra-Block versus Inter-Block Variables It is unclear whether it would be beneficial, but if we could keep track of the variables that are only used within a block (presumably true of many introduced temps), we may find that we could continue to limit the number of variables whose liveness is tracked across blocks, keeping an expanded -set only for transient liveness. Issue [\#11339](https://github.com/dotnet/coreclr/issues/11339). +set only for transient liveness. Issue [\#7992](https://github.com/dotnet/runtime/issues/7992). Note that this would only improve JIT throughput for optimized code. ### Improve the VarToRegMap -The `VarToRegMap` incurs non-trivial JIT-time overhead. Issue \#11396 addresses +The `VarToRegMap` incurs non-trivial JIT-time overhead. +Issue [\#8013](https://github.com/dotnet/runtime/issues/8013) addresses the question of whether there is an alternative that would have better performance. This would also improve JIT throughput only for optimized code. +### Other Throughput Investigations + +Issue [\#7998](https://github.com/dotnet/runtime/issues/7998) suggests evluating the throughput cost of updating the preferences at each +kill site. + +## Test and Cleanup Issues + +Issue [\#9767](https://github.com/dotnet/runtime/issues/9767) captures the issue that the +"spill always" stress mode, `LSRA_SPILL_ALWAYS`, `COMPlus_JitStressRegs=0x800` doesn't work properly. + +Issue [\#6261](https://github.com/dotnet/runtime/issues/6261) has to do with `RegOptional` +`RefPositions` that are marked as `copyReg` or `moveReg`. See the notes on this issue; +I don't think such cases should arise, but there may be some cleanup needed here. + +Issue [\#5793](https://github.com/dotnet/runtime/issues/5793) suggests adding a stress mode that +allocates registers forr mullti-reg nodes in the reverse of the ABI requirements. + +Issue [#10691](https://github.com/dotnet/runtime/issues/10691) suggests adding a stress mode that +deliberately trashes registers that are not currently occupied (e.g. at block boundaries). + References ---------- -1. Boissinot, B. et +1. Boissinot, B. et al "Fast liveness checking for ssa-form programs," CGO 2008, pp. 35-44. http://portal.acm.org/citation.cfm?id=1356058.1356064&coll=ACM&dl=ACM&CFID=105967773&CFTOKEN=80545349 -2. Boissinot, B. et al, "Revisiting +2. Boissinot, B. et al, "Revisiting Out-of-SSA Translation for Correctness, Code Quality and Efficiency," CGO 2009, pp. 114-125. -3. Wimmer, C. and Mössenböck, D. "Optimized +3. Wimmer, C. and Mössenböck, D. "Optimized Interval Splitting in a Linear Scan Register Allocator," ACM VEE 2005, pp. 132-141. -4. Wimmer, C. and Franz, M. "Linear Scan +4. Wimmer, C. and Franz, M. "Linear Scan Register Allocation on SSA Form," ACM CGO 2010, pp. 170-179. -5. Traub, O. et al "Quality and Speed in Linear-scan Register +5. Traub, O. et al "Quality and Speed in Linear-scan Register Allocation," SIGPLAN '98, pp. 142-151. -6. Olesen, J. "Greedy Register Allocation in LLVM 3.0," LLVM Project Blog, Sept. 2011. +6. Olesen, J. "Greedy Register Allocation in LLVM 3.0," LLVM Project Blog, Sept. 2011. - (Last retrieved Feb. 2012) + (Last retrieved July 2020) + +7. Yatsina, M. "LLVM Greedy Register Allocator," LLVM Dev Meeting, April 2018. + + (Last retrieved July 2020) diff --git a/docs/design/coreclr/jit/object-stack-allocation.md b/docs/design/coreclr/jit/object-stack-allocation.md index ec6fe14b3d64..914013878000 100644 --- a/docs/design/coreclr/jit/object-stack-allocation.md +++ b/docs/design/coreclr/jit/object-stack-allocation.md @@ -21,7 +21,7 @@ various Java runtimes. This optimization is more important for Java since it do [roslyn #2104](https://github.com/dotnet/roslyn/issues/2104) Compiler should optimize "alloc temporary small object" to "alloc on stack" -[coreclr #1784](https://github.com/dotnet/coreclr/issues/1784) CLR/JIT should optimize "alloc temporary small object" to "alloc on stack" automatically +[runtime #4584](https://github.com/dotnet/runtime/issues/4584) CLR/JIT should optimize "alloc temporary small object" to "alloc on stack" automatically ## Escape Analysis @@ -157,7 +157,7 @@ So the upper bound from this experiment is 22.2%. @AndyAyersMS recently resurrected @echesakovMSFT work and used it to [prototype stack allocation of a simple delegate that's directly invoked](https://github.com/dotnet/coreclr/compare/master...AndyAyersMS:NonNullPlusStackAlloc). It exposed a number of things that need to be done in the jit to generate better code for stack-allocated objects. The details are in comments of -[coreclr #1784](https://github.com/dotnet/coreclr/issues/1784). +[runtime #4584](https://github.com/dotnet/runtime/issues/4584). We did some analysis of Roslyn csc self-build to see where this optimization may be beneficial. One hot place was found in [GreenNode.WriteTo](https://github.com/dotnet/roslyn/blob/fab7134296816fc80019c60b0f5bef7400cf23ea/src/Compilers/Core/Portable/Syntax/GreenNode.cs#L647). This object allocation accounts for 8.17% of all object allocations in this scenario. The number is not as impressive as a percentage diff --git a/docs/design/coreclr/jit/ryujit-tutorial.md b/docs/design/coreclr/jit/ryujit-tutorial.md index 050c11495b6a..69e90580fbfa 100644 --- a/docs/design/coreclr/jit/ryujit-tutorial.md +++ b/docs/design/coreclr/jit/ryujit-tutorial.md @@ -606,7 +606,7 @@ public static int PopCount(ulong bitVectorArg) #### Notes The sample I'm going to walk through implements support for pop count (counting the number of '1' bits in a 64-bit value). -  + We're going to start by assuming that we have a method with a known signature that implements PopCount. Here's the implementation we're going to use. It simply takes the input value, and keeps anding with one, and then shifting right. We're first going to simply recognize the name and signature, and replace the method call with a simple PopCnt IR node. diff --git a/docs/design/coreclr/jit/variabletracking.md b/docs/design/coreclr/jit/variabletracking.md index e37dd8b54f4a..c4f86a6cd56d 100644 --- a/docs/design/coreclr/jit/variabletracking.md +++ b/docs/design/coreclr/jit/variabletracking.md @@ -338,7 +338,7 @@ There are many things we can do to improve optimized debugging: Currently we don't have the IL offset of them. And this is broadly used to improve code performance. -- [Promoted structs](https://github.com/dotnet/coreclr/issues/23542): There is no debug support for fields of promoted structs, we just report the struct itself. +- [Promoted structs](https://github.com/dotnet/runtime/issues/12369): There is no debug support for fields of promoted structs, we just report the struct itself. -- [Reduce space used for VariableLiveDescriptor](https://github.com/dotnet/coreclr/issues/23544): we are currently using a `jitstd::list`, which is a double linked list. +- [Reduce space used for VariableLiveDescriptor](https://github.com/dotnet/runtime/issues/12371): we are currently using a `jitstd::list`, which is a double linked list. We could use a simple single linked list with push_back(), head(), tail(), size() operations and an iterator and we would be saving memory. diff --git a/docs/design/coreclr/profiling/Profiler Attach on CoreCLR.md b/docs/design/coreclr/profiling/Profiler Attach on CoreCLR.md index 1e494a79f364..6a724e4fdc2f 100644 --- a/docs/design/coreclr/profiling/Profiler Attach on CoreCLR.md +++ b/docs/design/coreclr/profiling/Profiler Attach on CoreCLR.md @@ -13,7 +13,7 @@ Attaching a profiler to a running CoreCLR process involves sending a message fro 2) `uint attachTimeout` - (Required) A timeout that informs the runtime how long to wait while attempting to attach. This does not impact the timeout of trying to send the attach message. 3) `Guid profilerGuid` - (Required) The profiler's GUID to use when initializing. 4) `string profilerPath` - (Required) The path to the profiler on disk. -5) `byte[] additionalData` - (Optional) A data blob that will be passed to `ICorProfilerCallback3::InitializeForAttach` as `pvClientData`. +5) `byte[] additionalData` - (Optional) A data blob that will be passed to `ICorProfilerCallback3::InitializeForAttach` as `pvClientData`. This method returns a status HR following the usual convention, 0 (S_OK) means a profiler was successfully attached and any other value is an error indicating what went wrong. diff --git a/docs/design/coreclr/profiling/Profiler Breaking Changes.md b/docs/design/coreclr/profiling/Profiler Breaking Changes.md index 07c54c7277ff..0e6d5e2444b9 100644 --- a/docs/design/coreclr/profiling/Profiler Breaking Changes.md +++ b/docs/design/coreclr/profiling/Profiler Breaking Changes.md @@ -4,4 +4,4 @@ Over time we will need to modify the Profiler APIs, this document will serve as 1. Code Versioning introduced changes documented [here](../../features/code-versioning-profiler-breaking-changes.md) 2. The work to allow adding new types and methods after module load means ICorProfilerInfo7::ApplyMetadata will now potentially trigger a GC, and will not be callable in situations where a GC can not happen (for example ICorProfilerCallback::RootReferences). -3. As part of the work to allow ReJIT on attach ReJITted methods will no longer be inlined (ever). Since the inlining is blocked there won't be a `ICorProfilerCallback::JITInlining` callback. \ No newline at end of file +3. As part of the work to allow ReJIT on attach ReJITted methods will no longer be inlined (ever). Since the inlining is blocked there won't be a `ICorProfilerCallback::JITInlining` callback. diff --git a/docs/design/coreclr/profiling/davbr-blog-archive/Attach.md b/docs/design/coreclr/profiling/davbr-blog-archive/Attach.md index 93335dd51fec..f9f08680f24f 100644 --- a/docs/design/coreclr/profiling/davbr-blog-archive/Attach.md +++ b/docs/design/coreclr/profiling/davbr-blog-archive/Attach.md @@ -7,8 +7,6 @@ Profiler attach is a feature that allows you to attach a profiler to an already Please note! You can't just take any profiler you bought and suddenly be able to attach it to a running application. The profiler must be built with "attachability" in mind. So if you're a profiler developer looking to pump some attachability into your product, read on--this article is for you. Everyone else, this article will probably be less useful--but just as riveting. -# - # The Players So how do you get your profiler attached to a running process? The process has already started, and the CLR code which interrogates the environment to determine whether to load a profiler has already run. So how do you kick the process into loading your profiler? The answer: Another process! @@ -19,17 +17,17 @@ In order to force your profiler DLL to load into the target profilee process, yo # Inside the Trigger Process -Your trigger uses a simple API method, AttachProfiler, to request the target process to load your profiler. Where is this method defined? Well, it doesn't make much sense to put it on ICorProfilerInfo, since that interface is only available to a profiler after it's been loaded. You could imagine a C export from mscoree.dll. But because of in-process side-by-side CLR instances, we're moving away from mscoree.dll exports to a COM-based interface model called "metahost". +Your trigger uses a simple API method, AttachProfiler, to request the target process to load your profiler. Where is this method defined? Well, it doesn't make much sense to put it on ICorProfilerInfo, since that interface is only available to a profiler after it's been loaded. You could imagine a C export from mscoree.dll. But because of in-process side-by-side CLR instances, we're moving away from mscoree.dll exports to a COM-based interface model called "metahost". ## Meta-whos-its? Whereas the "hosting" interfaces enable one to host and manage a CLR in a process, the "metahost" interfaces allow one to manage multiple CLRs that may be installed onto a machine or loaded into a single process. Here's a high-level view of how you navigate your way through metahost to find AttachProfiler() (there’s a pointer to actual sample code below). -- Get ICLRMetaHost -- Enumerate the CLRs loaded into the target process -- Get ICLRRuntimeInfo for the particular CLR in the target process you want to profile -- Get the corresponding ICLRProfiling -- Call ICLRProfiling::AttachProfiler +- Get ICLRMetaHost +- Enumerate the CLRs loaded into the target process +- Get ICLRRuntimeInfo for the particular CLR in the target process you want to profile +- Get the corresponding ICLRProfiling +- Call ICLRProfiling::AttachProfiler ## Users and Integrity @@ -57,8 +55,6 @@ From your InitializeForAttach implementation, your profiler will call SetEventMa It was impossible to enable all profiling scenarios for attach in the time we had for the V4 release. So only profilers that do **sampling** and **memory** analysis will function properly after attaching to a live process. Attempts to use other profiling APIs after attach will be met with CORPROF\_E\_UNSUPPORTED\_FOR\_ATTACHING\_PROFILER. -### - ## Specific Callback Limitations When your attaching profiler calls SetEventMask, you will be limited to only those event mask flags present in the COR\_PRF\_ALLOWABLE\_AFTER\_ATTACH bitmask (you'll find it in corprof.idl). Any other flags, and SetEventMask will return CORPROF\_E\_UNSUPPORTED\_FOR\_ATTACHING\_PROFILER. @@ -67,14 +63,14 @@ When your attaching profiler calls SetEventMask, you will be limited to only tho Most of the ICorProfilerInfo\* methods are available to your attaching profiler, however some are not--particularly those involved in **IL rewriting**. Here's a list of all ICorProfilerInfo\* methods NOT supported for attaching profilers: -- GetILFunctionBody -- GetILFunctionBodyAllocator -- SetILFunctionBody -- SetILInstrumentedCodeMap -- SetEnterLeaveFunctionHooks\* -- SetFunctionIDMapper\* -- GetNotifiedExceptionClauseInfo -- All methods related to Enter/Leave/Tailcall +- GetILFunctionBody +- GetILFunctionBodyAllocator +- SetILFunctionBody +- SetILInstrumentedCodeMap +- SetEnterLeaveFunctionHooks\* +- SetFunctionIDMapper\* +- GetNotifiedExceptionClauseInfo +- All methods related to Enter/Leave/Tailcall It's expected that future releases of the CLR will enable more API methods for use by attaching profilers. @@ -84,9 +80,9 @@ It's expected that future releases of the CLR will enable more API methods for u To understand limitations around the GC modes, here's a quick review of the GC modes an app can run under: -- **Workstation Blocking mode**. The thread that triggered the GC performs the GC while all other threads executing managed code must wait. -- **Workstation Concurrent / Background mode (the default)**. Concurrent GC (V1 & V2) allows portions of a full GC to execute while other threads are allowed to run. Background GC (its replacement in V4) takes it one step further, and also allows an ephemeral GC (i.e., gen 0 or gen 1) to execute while a gen 2 GC is executing. -- **Server mode**. Hosts like ASP.NET may choose to enable server mode which creates a heap + dedicated GC thread per CPU. This allows GCs to be fanned out to multiple threads. +- **Workstation Blocking mode**. The thread that triggered the GC performs the GC while all other threads executing managed code must wait. +- **Workstation Concurrent / Background mode (the default)**. Concurrent GC (V1 & V2) allows portions of a full GC to execute while other threads are allowed to run. Background GC (its replacement in V4) takes it one step further, and also allows an ephemeral GC (i.e., gen 0 or gen 1) to execute while a gen 2 GC is executing. +- **Server mode**. Hosts like ASP.NET may choose to enable server mode which creates a heap + dedicated GC thread per CPU. This allows GCs to be fanned out to multiple threads. Of course, [Maoni's blog](https://devblogs.microsoft.com/dotnet/author/maoni/) is required reading for anyone who wants to understand how the GC works. @@ -96,15 +92,13 @@ So here's the catch. What if a V4 app starts up in background GC mode _without_ Of course, you could forcibly turn off concurrent / background mode every time the app starts up via a config file: -| - -\ - \ - \ - \ -\ - - | +```xml + + + + + +``` But you don't really want to be running your apps with a sub-optimal GC mode all the time, just on the off-chance you might need to attach a memory profiler to it. If you suspect you might need to do some memory profiling of a client app, you should just start up your app with the memory profiler to begin with. diff --git a/docs/design/coreclr/profiling/davbr-blog-archive/Attach2.md b/docs/design/coreclr/profiling/davbr-blog-archive/Attach2.md index d2db6ca61301..6ff0409ebebd 100644 --- a/docs/design/coreclr/profiling/davbr-blog-archive/Attach2.md +++ b/docs/design/coreclr/profiling/davbr-blog-archive/Attach2.md @@ -11,12 +11,12 @@ A profiler that loads on startup of an application has the option to know the en [NoBirthAnnouncement](media/NoBirthAnnouncement.JPG) Drawing by Magdalena Hermawan - + There are two fundamental ways your profiler can catch up on the current state of an application: -- Lazy catch-up—as the profiler encounters new IDs, the profiler queries information about those IDs as it needs them, rather than assuming it has a full cache that’s always built up as the IDs are first created. This is analogous to Dorothy meeting a new grown-up, and gracefully accepting the fact that that person exists. -- Enumeration—for certain kinds of IDs, the profiler can (at attach time) request a complete list of the currently active IDs and query information about them at that time. Sort of like Dorothy first going to the Oz City Hall and looking up the birth records for everyone. +- Lazy catch-up—as the profiler encounters new IDs, the profiler queries information about those IDs as it needs them, rather than assuming it has a full cache that’s always built up as the IDs are first created. This is analogous to Dorothy meeting a new grown-up, and gracefully accepting the fact that that person exists. +- Enumeration—for certain kinds of IDs, the profiler can (at attach time) request a complete list of the currently active IDs and query information about them at that time. Sort of like Dorothy first going to the Oz City Hall and looking up the birth records for everyone. Lazy catch-up is fairly self-explanatory. For example, if your sampling profiler encounters an IP in a FunctionID you’ve never seen before, just look up whatever info you need about that FunctionID the first time you encounter it, rather than assuming you’d already built up a cache when the function was first JITted. And if you discover that FunctionID resides in a module you’ve never seen before, then just look up whatever info you need about that ModuleID at that point, rather than assuming you already have a complete cache of all modules. Many of you are already doing something like this today if you support sampling against regular NGENd images (since you don’t get JIT notifications of those functions anyway). @@ -26,8 +26,8 @@ Enumeration, on the other hand, has some caveats and is worthwhile to describe i Some kinds of IDs have new enumerator methods as part of the profiling API. In particular: -- ICorProfilerInfo3::EnumModules -- ICorProfilerInfo3::EnumJITedFunctions +- ICorProfilerInfo3::EnumModules +- ICorProfilerInfo3::EnumJITedFunctions Your profiler calls these methods, and they return a standard enumerator you use to iterate through all of the currently-loaded IDs of that type. It’s worth noting that EnumJITedFunctions only enumerates FunctionIDs for which you would receive JITCompilationStarted/Finished events, and will not include FunctionIDs from NGENd modules. @@ -39,24 +39,24 @@ As you may recall, once your profiler is attached to the process, the CLR calls Bad timeline (loading; enumerating too soon): -1. Profiler attaches -2. Profiler calls EnumModules -3. Module starts to load -4. ModuleID is now enumerable -5. ModuleLoadFinished event would fire here if events were enabled (but they’re not yet!) -6. CLR enables events +1. Profiler attaches +2. Profiler calls EnumModules +3. Module starts to load +4. ModuleID is now enumerable +5. ModuleLoadFinished event would fire here if events were enabled (but they’re not yet!) +6. CLR enables events The problem is that the profiler calls EnumModules too early. If your profiler only calls EnumModules after CLR enables events, then you’re assured of either seeing a ModuleID via EnumModules or via a ModuleLoad event. In the above scenario, your profiler might as well have never done enumeration at all, since it will still not be notified of the ModuleID before it comes across that ModuleID in action later on. It gets even worse for modules that unload: Bad timeline (unloading; enumerating too soon): -1. Module loads -2. ModuleID is now enumerable -3. Profiler attaches -4. Profiler calls EnumModules (includes the ModuleID) -5. Module starts to unload -6. ModuleUnloadStarted event would fire here if events were enabled (but they’re not yet!) -7. CLR enables events +1. Module loads +2. ModuleID is now enumerable +3. Profiler attaches +4. Profiler calls EnumModules (includes the ModuleID) +5. Module starts to unload +6. ModuleUnloadStarted event would fire here if events were enabled (but they’re not yet!) +7. CLR enables events In the above case, the profiler discovers a ModuleID via EnumModules, but has no idea that the module is now in the process of unloading. So the profiler might query information about the stale ModuleID, potentially causing an AV. Again, this is caused because the profiler called the enumeration API too soon (i.e., before the CLR enabled event callbacks). @@ -68,24 +68,24 @@ When your profiler calls the Enum\* methods, the CLR creates a snapshot of all Bad timeline (loading): -1. Module starts to load -2. ModuleLoadFinished event would fire here if events were enabled (but they’re not yet—no profiler is attached!) -3. Profiler attaches -4. CLR enables events, calls ProfilerAttachComplete() -5. Profiler calls EnumModules -6. ModuleID is now enumerable +1. Module starts to load +2. ModuleLoadFinished event would fire here if events were enabled (but they’re not yet—no profiler is attached!) +3. Profiler attaches +4. CLR enables events, calls ProfilerAttachComplete() +5. Profiler calls EnumModules +6. ModuleID is now enumerable Because 2 comes before 6, it’s possible for a profiler to attach and grab an enumeration in the middle, and thus never hear about a ModuleID (even though the profiler avoided Race #1 from the previous section). Again, an even worse problem occurs for module unloading. Suppose the CLR were to change an ID’s enumerable status to false after sending the unload event. That would also lead to holes: Bad timeline (unloading): -1. Module loads, event would fire if profiler were attached (but it’s not), then ModuleID becomes enumerable -2. Module starts to unload -3. ModuleUnloadStarted event would fire here if events were enabled (but they’re not yet—no profiler is attached!) -4. Profiler attaches -5. CLR enables events, calls ProfilerAttachComplete() -6. Profiler calls EnumModules (ModuleID is still enumerable, so profiler discovers ModuleID at this point) -7. ModuleID is no longer enumerable +1. Module loads, event would fire if profiler were attached (but it’s not), then ModuleID becomes enumerable +2. Module starts to unload +3. ModuleUnloadStarted event would fire here if events were enabled (but they’re not yet—no profiler is attached!) +4. Profiler attaches +5. CLR enables events, calls ProfilerAttachComplete() +6. Profiler calls EnumModules (ModuleID is still enumerable, so profiler discovers ModuleID at this point) +7. ModuleID is no longer enumerable Because 3 comes before 7, a profiler could attach in the middle, grab an enumeration, discover the ModuleID via the enumeration, and have no idea that module was in the process of unloading. If the profiler were to use that ModuleID later on, an AV could result. The above led to the following golden rule: @@ -93,10 +93,10 @@ Because 3 comes before 7, a profiler could attach in the middle, grab an enumera In other words, an ID becomes enumerable _before_ the LoadFinished (or JITCompilationFinished) event. And an ID ceases to be enumerable _before_ the UnloadStarted event. Or you can think of it as, “The event is always last”. This eliminates any potential holes. So to be even more explicit, here’s the enumerability vs. event ordering: -1. ID available in enumerations snapped now -2. LoadFinished -3. ID no longer in enumerations snapped now -4. UnloadStarted +1. ID available in enumerations snapped now +2. LoadFinished +3. ID no longer in enumerations snapped now +4. UnloadStarted If an ID is present, the profiler will discover the ID via the enumerator or a LoadFinished event (or both). If an ID is not present, the profiler will either not see the ID via the enumerator or will see an UnloadStarted event (or both). In all cases, the event is more recent, and so the profiler should always trust an event over an enumeration that was generated prior. (More on that last point later.) @@ -104,36 +104,36 @@ The astute reader will notice that what we’ve done here is trade one race for Good timeline (loading with duplicate): -1. Module starts to load -2. ModuleID is now enumerable -3. Profiler attaches -4. CLR enables events, calls ProfilerAttachComplete() -5. Profiler calls EnumModules -6. Profiler receives ModuleLoadFinished +1. Module starts to load +2. ModuleID is now enumerable +3. Profiler attaches +4. CLR enables events, calls ProfilerAttachComplete() +5. Profiler calls EnumModules +6. Profiler receives ModuleLoadFinished At first it might seem a little strange. The enumerator contains the ModuleID, so the profiler sees that the module is loaded. But then the profiler receives a ModuleLoadFinished event, which might seem odd, since the enumerator implied the module was already loaded. This is what I mean by “duplicate”—the profiler is notified of a ModuleID twice (once via the enumeration, and once via the event). The profiler will need to be resilient to this. Although it’s a bit awkward, it’s better than the alternative of a hole, since the profiler would have no way to know the hole occurred. Unloading has a similar situation: Good timeline (unloading with duplicate): -1. Module loads, event would have fired if profiler were attached (but it’s not), ModuleID becomes enumerable -2. Module starts to unload -3. ModuleID is no longer enumerable -4. Profiler attaches -5. CLR enables events, calls ProfilerAttachComplete() -6. Profiler calls EnumModules -7. Profiler receives ModuleUnloadStarted event +1. Module loads, event would have fired if profiler were attached (but it’s not), ModuleID becomes enumerable +2. Module starts to unload +3. ModuleID is no longer enumerable +4. Profiler attaches +5. CLR enables events, calls ProfilerAttachComplete() +6. Profiler calls EnumModules +7. Profiler receives ModuleUnloadStarted event In step 6, the profiler does not see the unloading ModuleID (since it’s no longer enumerable). But in step 7 the profiler is notified that the ModuleID is unloading. Perhaps it’s a bit awkward that the profiler would be told that a seemingly nonexistent ModuleID is unloading. But again, this is better than the alternative, where a profiler finds an unloading ID in the enumeration, and is never told that the ModuleID got unloaded. One more case that’s worthwhile to bring out occurs when we move the profiler attach a bit earlier in the sequence. Good timeline (unloading without duplicate): -1. Module loads, event would fire if profiler were attached, ModuleID becomes enumerable -2. Module starts to unload -3. Profiler attaches -4. CLR enables events, calls ProfilerAttachComplete() -5. Profiler calls EnumModules (ModuleID is still present in the enumeration) -6. ModuleID is no longer enumerable -7. Profiler receives ModuleUnloadStarted event +1. Module loads, event would fire if profiler were attached, ModuleID becomes enumerable +2. Module starts to unload +3. Profiler attaches +4. CLR enables events, calls ProfilerAttachComplete() +5. Profiler calls EnumModules (ModuleID is still present in the enumeration) +6. ModuleID is no longer enumerable +7. Profiler receives ModuleUnloadStarted event Here the profiler discovers the ModuleID exists in step 5 (as the ModuleID is still enumerable at that point), but the profiler almost immediately after discovers that the module is unloading in step 7. As stated above, events are more recent, and should always take precedence over enumerations that were generated prior. This could get a bit tricky, though, as the profiler generates an enumeration before it iterates over the enumeration. In the above sequence, the enumeration is generated in step 5. However, the profiler could be iterating though the generated enumeration for quite some time, and might not come across the unloading ModuleID until after step 7 (multiple threads means fun for everyone!). For this reason, it’s important for the profiler to give precedence to events that occur after the enumeration was _generated_, even though iteration over that enumeration might occur later. @@ -151,7 +151,5 @@ It may be beneficial to program your profiler such that, upon attaching to the p It’s worth reiterating a limitation I stated in the first attach post (linked above): the ObjectAllocated() callback is unavailable to profilers that attach to running processes. Therefore, any logic your profiler has that assumes it gets all the ObjectAllocated() callbacks will need to be addressed. Any objects newly allocated since the last GC may still be unknown to your profiler until it comes across their references via GC callbacks during the next GC (unless your profiler comes across those objects in other ways—example: as parameters to methods you hook with the Enter/Leave/Tailcall probes). - - OK, that about covers the first steps your profiler should take once it attaches to a running process. It will either need to use lazy catch-up or the catch-up enumerations (or, quite likely, a combination of both). When using the enumerations, be careful to avoid holes (by calling the enumeration methods from inside ProfilerAttachComplete()), and be resilient to receiving information duplicated across the enumeration and the load / unload events. For memory profilers, be wary of GCs already in progress at the time your profiler attaches, and consider inducing your own GC at attach-time to build your initial cache of GC objects. diff --git a/docs/design/coreclr/profiling/davbr-blog-archive/CORPROF_E_UNSUPPORTED_CALL_SEQUENCE.md b/docs/design/coreclr/profiling/davbr-blog-archive/CORPROF_E_UNSUPPORTED_CALL_SEQUENCE.md index c6feb97c779a..b2a900ca8757 100644 --- a/docs/design/coreclr/profiling/davbr-blog-archive/CORPROF_E_UNSUPPORTED_CALL_SEQUENCE.md +++ b/docs/design/coreclr/profiling/davbr-blog-archive/CORPROF_E_UNSUPPORTED_CALL_SEQUENCE.md @@ -11,24 +11,24 @@ On the other hand, if you're hijacking or otherwise calling ICorProfilerInfo fun In 2.0 we've added some simple checks to help you avoid this problem. If you call an unsafe ICorProfilerInfo function asynchronously, instead of crossing its fingers and trying, it will fail with CORPROF\_E\_UNSUPPORTED\_CALL\_SEQUENCE. The general rule of thumb is, nothing is safe to call asynchronously. But here are the exceptions that are safe, and that we specifically allow to be called asynchronously: -- GetEventMask/SetEventMask -- GetCurrentThreadID -- GetThreadContext -- GetThreadAppDomain -- GetFunctionFromIP -- GetFunctionInfo/GetFunctionInfo2 -- GetCodeInfo/GetCodeInfo2 -- GetModuleInfo -- GetClassIDInfo/GetClassIDInfo2 -- IsArrayClass -- SetFunctionIDMapper -- DoStackSnapshot +- GetEventMask/SetEventMask +- GetCurrentThreadID +- GetThreadContext +- GetThreadAppDomain +- GetFunctionFromIP +- GetFunctionInfo/GetFunctionInfo2 +- GetCodeInfo/GetCodeInfo2 +- GetModuleInfo +- GetClassIDInfo/GetClassIDInfo2 +- IsArrayClass +- SetFunctionIDMapper +- DoStackSnapshot There are also a few things to keep in mind: -1. ICorProfilerInfo calls made from within the fast-path Enter/Leave callbacks are considered asynchronous. (Though ICorProfilerInfo calls made from within the _slow_-path Enter/Leave callbacks are considered synchronous.) See the blog entries [here](ELT - The Basics.md) and [here](http://blogs.msdn.com/jkeljo/archive/2005/08/11/450506.aspx) for more info on fast / slow path. -2. ICorProfilerInfo calls made from within instrumented code (i.e., IL you've rewritten to call into your profiler and then into ICorProfilerInfo) are considered asynchronous. -3. Calls made inside your FunctionIDMapper hook are considered to be synchronous. -4. Calls made on threads created by your profiler, are always considered to be synchronous. (This is because there's no danger of conflicts resulting from interrupting and then re-entering the CLR on that thread, since a profiler-created thread was not in the CLR to begin with.) -5. Calls made inside a StackSnapshotCallback are considered to be synchronous iff the call to DoStackSnapshot was synchronous. +1. ICorProfilerInfo calls made from within the fast-path Enter/Leave callbacks are considered asynchronous. (Though ICorProfilerInfo calls made from within the _slow_-path Enter/Leave callbacks are considered synchronous.) See the blog entries [here](ELT - The Basics.md) and [here](http://blogs.msdn.com/jkeljo/archive/2005/08/11/450506.aspx) for more info on fast / slow path. +2. ICorProfilerInfo calls made from within instrumented code (i.e., IL you've rewritten to call into your profiler and then into ICorProfilerInfo) are considered asynchronous. +3. Calls made inside your FunctionIDMapper hook are considered to be synchronous. +4. Calls made on threads created by your profiler, are always considered to be synchronous. (This is because there's no danger of conflicts resulting from interrupting and then re-entering the CLR on that thread, since a profiler-created thread was not in the CLR to begin with.) +5. Calls made inside a StackSnapshotCallback are considered to be synchronous iff the call to DoStackSnapshot was synchronous. diff --git a/docs/design/coreclr/profiling/davbr-blog-archive/Debugging - Activation.md b/docs/design/coreclr/profiling/davbr-blog-archive/Debugging - Activation.md index 79ee3e1d291a..4afd8111db88 100644 --- a/docs/design/coreclr/profiling/davbr-blog-archive/Debugging - Activation.md +++ b/docs/design/coreclr/profiling/davbr-blog-archive/Debugging - Activation.md @@ -15,9 +15,8 @@ Environment variables --\> Registry --\> Profiler DLL on File system. The first link in this chain is to check the environment variables inside the process that should be profiled. If you're running the process from a command-prompt, you can just try a "set co" from the command prompt: -| ``` -**C:\>** set co +C:\> set co (blah blah, other vars beginning with "co") ``` @@ -25,7 +24,6 @@ The first link in this chain is to check the environment variables inside the pr Cor_Enable_Profiling=0x1 COR_PROFILER={C5F90153-B93E-4138-9DB7-EB7156B07C4C} ``` - | If your scenario doesn't allow you to just run the process from a command prompt, like say an asp.net scenario, you may want to attach a debugger to the process that's supposed to be profiled, or use IFEO (HKEY\_LOCAL\_MACHINE\SOFTWARE\Microsoft\Windows NT\CurrentVersion\Image File Execution Options) to force a debugger to start when the worker process starts. In the debugger, you can then use "!peb" to view the environment block, which will include the environment variables. @@ -62,4 +60,3 @@ or even set a breakpoint inside your Profiler DLL's **DllMain.** Now go, and s If you're still going strong, set a breakpoint in your profiler's **Initialize** () callback. Failures here are actually a popular cause for activation problems. Inside your Initialize() callback, your profiler is likely calling QueryInterface for the ICorProfilerInfoX interface of your choice, and then calling SetEventMask, and doing other initialization-related tasks, like calling SetEnterLeaveFunctionHooks(2). Do any of these fail? Is your Initialize() callback returning a failure HRESULT? Hopefully by now you've isolated the failure point. If not, and your Initialize() is happily returning S\_OK, then your profiler is apparently loading just fine. At least it is when you're debugging it. :-) - diff --git a/docs/design/coreclr/profiling/davbr-blog-archive/Debugging - SOS and IDs.md b/docs/design/coreclr/profiling/davbr-blog-archive/Debugging - SOS and IDs.md index 8f032244926c..7616327f8773 100644 --- a/docs/design/coreclr/profiling/davbr-blog-archive/Debugging - SOS and IDs.md +++ b/docs/design/coreclr/profiling/davbr-blog-archive/Debugging - SOS and IDs.md @@ -9,9 +9,8 @@ SOS.DLL is a debugger extension DLL that ships with the CLR. You'll find it sit In windbg, you'll need mscorwks.dll to load first, and then you can load SOS. Often, I don't need SOS until well into my debugging session, at which point mscorwks.dll has already been loaded anyway. However, there are some cases where you'd like SOS loaded at the first possible moment, so you can use some of its commands early (like !bpmd to set a breakpoint on a managed method). So a surefire way to get SOS loaded ASAP is to have the debugger break when mscorwks gets loaded (e.g., "sxe ld mscorwks"). Once mscorwks is loaded, you can load SOS using the .loadby command: -| ``` -0:000\> **sxe ld mscorwks** +0:000\> sxe ld mscorwks 0:000\> g ModLoad: 79e70000 7a3ff000 C:\Windows\Microsoft.NET\Framework\v2.0.50727\mscorwks.dll eax=00000000 ebx=00000000 ecx=00000000 edx=00000000 esi=7efdd000 edi=20000000 @@ -19,9 +18,8 @@ In windbg, you'll need mscorwks.dll to load first, and then you can load SOS. O cs=0023 ss=002b ds=002b es=002b fs=0053 gs=002b efl=00000202 ntdll!NtMapViewOfSection+0x12: 77a1a9fa c22800 ret 28h - 0:000\> **.loadby sos mscorwks** + 0:000\> .loadby sos mscorwks ``` - | With SOS loaded, you can now use its commands to inspect the various IDs that the profiling API passes to your profiler. @@ -35,7 +33,6 @@ As far as your profiler is concerned, a FunctionID is just an opaque number. It Ok, so FunctionID = (MethodDesc \*). How does that help you? SOS just so happens to have a command to inspect MethodDescs: !dumpmd. So if you're in a debugger looking at your profiler code that's operating on a FunctionID, it can beneficial to you to find out which function that FunctionID actually refers to. In the example below, the debugger will break in my proifler's JITCompilationStarted callback and look at the FunctionID. It's assumed that you've already loaded SOS as per above. -| ``` 0:000\> bu UnitTestSampleProfiler!SampleCallbackImpl::JITCompilationStarted 0:000\> g @@ -50,36 +47,30 @@ Breakpoint 0 hit UnitTestSampleProfiler!SampleCallbackImpl::JITCompilationStarted: 10003fc0 55 push ebp ``` - | The debugger is now sitting at the beginning of my profiler's JITCompilationStarted callback. Let's take a look at the parameters. -| ``` 0:000\> dv this = 0x00c133f8 - **functionID = 0x1e3170** + functionID = 0x1e3170 fIsSafeToBlock = 1 ``` - | Aha, that's the FunctionID about to get JITted. Now use SOS to see what that function really is. -| ``` 0:000\> !dumpmd 0x1e3170 Method Name: test.Class1.Main(System.String[]) Class: 001e1288 -**MethodTable: 001e3180** mdToken: 06000001 +MethodTable: 001e3180 mdToken: 06000001 Module: 001e2d8c IsJitted: no m\_CodeOrIL: ffffffff ``` - | Lots of juicy info here, though the Method Name typically is what helps me the most in my debugging sessions. mdToken tells us the metadata token for this method. MethodTable tells us where another internal CLR data structure is stored that contains information about the class containing the function. In fact, the profiing API's ClassID is simply a MethodTable \*. [Note: the "Class: 001e1288" in the output above is very different from the MethodTable, and thus different from the profiling API's ClassID. Don't let the name fool you!] So we could go and inspect a bit further by dumping information about the MethodTable: -| ``` 0:000\> !dumpmt 0x001e3180 EEClass: 001e1288 @@ -91,7 +82,6 @@ Lots of juicy info here, though the Method Name typically is what helps me the m Number of IFaces in IFaceMap: 0 Slots in VTable: 6 ``` - | And of course, !dumpmt can be used anytime you come across a ClassID and want more info on it. @@ -126,17 +116,15 @@ It would probably be quicker to list what _isn't_ useful! I encourage you to do !bpmd lets you place a breakpoint on a managed method. Just specify the module name and the fully-qualified method name. For example: -| ``` !bpmd MyModule.exe MyNamespace.MyClass.Foo ``` - | If the method hasn't jitted yet, no worries. A "pending" breakpoint is placed. If your profiler performs IL rewriting, then using !bpmd on startup to set a managed breakpoint can be a handy way to break into the debugger just before your instrumented code will run (which, in turn, is typically just after your instrumented code has been jitted). This can help you in reproducing and diagnosing issues your profiler may run into when instrumenting particular functions (due to something interesting about the signature, generics, etc.). !PrintException: If you use this without arguments you get to see a pretty-printing of the last outstanding managed exception on the thread; or specify a particular Exception object's address. - + Ok, that about does it for SOS. Hopefully this info can help you track down problems a little faster, or better yet, perhaps this can help you step through and verify your code before problems arise. diff --git a/docs/design/coreclr/profiling/davbr-blog-archive/DoStackSnapshot - Callback CONTEXT Registers.md b/docs/design/coreclr/profiling/davbr-blog-archive/DoStackSnapshot - Callback CONTEXT Registers.md index 2f6576708260..784e65b09103 100644 --- a/docs/design/coreclr/profiling/davbr-blog-archive/DoStackSnapshot - Callback CONTEXT Registers.md +++ b/docs/design/coreclr/profiling/davbr-blog-archive/DoStackSnapshot - Callback CONTEXT Registers.md @@ -5,9 +5,6 @@ In my initial [post](DoStackSnapshot - Exception Filters.md) about DoStackSnapsh The quick answer is that **nonvolatile (i.e., preserved), integer registers** should be valid. You don't really need many registers to walk the stack anyway. Obviously, you want a good stack pointer and instruction pointer. And hey, a frame pointer is handy when you come across an EBP-based frame in x86 (RBP on x64). These are all included in the set, of course. Specifically by architecture, you can trust these fields in your context: -x86: Edi, Esi, Ebx, Ebp, Esp, Eip -x64: Rdi, Rsi, Rbx, Rbp, Rsp, Rip, R12:R15 -ia64: IntS0:IntS3, RsBSP, StIFS, RsPFS, IntSp, StIIP, StIPSR - - - +- x86: Edi, Esi, Ebx, Ebp, Esp, Eip +- x64: Rdi, Rsi, Rbx, Rbp, Rsp, Rip, R12:R15 +- ia64: IntS0:IntS3, RsBSP, StIFS, RsPFS, IntSp, StIIP, StIPSR diff --git a/docs/design/coreclr/profiling/davbr-blog-archive/DoStackSnapshot - Exception Filters.md b/docs/design/coreclr/profiling/davbr-blog-archive/DoStackSnapshot - Exception Filters.md index 60ce221b4600..7e9a9484f3fd 100644 --- a/docs/design/coreclr/profiling/davbr-blog-archive/DoStackSnapshot - Exception Filters.md +++ b/docs/design/coreclr/profiling/davbr-blog-archive/DoStackSnapshot - Exception Filters.md @@ -7,38 +7,38 @@ For those of you diehard C# fans, you might be unaware of the existence of excep First, a little background. For the full deal, check out the MSDN Library topic on VB.NET's [try/catch/finally statements](http://msdn.microsoft.com/library/default.asp?url=/library/en-us/vblr7/html/vastmTryCatchFinally.asp). But here's an appetizer. In VB.NET you can do this: -``` -Function Negative() As Boolean - Return False -End Function - -Function Positive() As Boolean - Return True -End Function - -Sub Thrower - Throw New Exception -End Sub - -Sub Main() - Try - Thrower() - Catch ex As Exception When Negative() - MsgBox("Negative") - Catch ex As Exception When Positive() - MsgBox("Positive") - End Try -End Sub ``` +Function Negative() As Boolean + Return False +End Function -The filters are the things that come after "When". We all know that, when an exception is thrown, its type must match the type specified in a Catch clause in order for that Catch clause to be executed. "When" is a way to further restrict whether a Catch clause will be executed. Now, not only must the exception's type match, but also the When clause must evaluate to True for that Catch clause to be chosen. In the example above, when we run, we'll skip the first Catch clause (because its filter returned False), and execute the second, thus showing a message box with "Positive" in it. - -The thing you need to realize about DoStackSnapshot's behavior (indeed, CLR in general) is that the execution of a When clause is really a separate function call. In the above example, imagine we take a stack snapshot while inside Positive(). Our managed-only stack trace, as reported by DoStackSnapshot, would then look like this (stack grows up): - -Positive -Main -Thrower -Main - -It's that highlighted Main that seems odd at first. While the exception is thrown inside Thrower(), the CLR needs to execute the filter clauses to figure out which Catch wins. These filter executions are actually _function calls_. Since filter clauses don't have their own names, we just use the name of the function containing the filter clause for stack reporting purposes. Thus, the highlighted Main above is the execution of a filter clause located inside Main (in this case, "When Positive()"). When each filter clause completes, we "return" back to Thrower() to continue our search for the filter that returns True. Since this is how the call stack is built up, that's what DoStackSnapshot will report. +Function Positive() As Boolean + Return True +End Function + +Sub Thrower + Throw New Exception +End Sub + +Sub Main() + Try + Thrower() + Catch ex As Exception When Negative() + MsgBox("Negative") + Catch ex As Exception When Positive() + MsgBox("Positive") + End Try +End Sub +``` + +The filters are the things that come after "When". We all know that, when an exception is thrown, its type must match the type specified in a Catch clause in order for that Catch clause to be executed. "When" is a way to further restrict whether a Catch clause will be executed. Now, not only must the exception's type match, but also the When clause must evaluate to True for that Catch clause to be chosen. In the example above, when we run, we'll skip the first Catch clause (because its filter returned False), and execute the second, thus showing a message box with "Positive" in it. + +The thing you need to realize about DoStackSnapshot's behavior (indeed, CLR in general) is that the execution of a When clause is really a separate function call. In the above example, imagine we take a stack snapshot while inside Positive(). Our managed-only stack trace, as reported by DoStackSnapshot, would then look like this (stack grows up): + +Positive\ +Main\ +Thrower\ +Main + +It's that highlighted Main that seems odd at first. While the exception is thrown inside Thrower(), the CLR needs to execute the filter clauses to figure out which Catch wins. These filter executions are actually _function calls_. Since filter clauses don't have their own names, we just use the name of the function containing the filter clause for stack reporting purposes. Thus, the highlighted Main above is the execution of a filter clause located inside Main (in this case, "When Positive()"). When each filter clause completes, we "return" back to Thrower() to continue our search for the filter that returns True. Since this is how the call stack is built up, that's what DoStackSnapshot will report. diff --git a/docs/design/coreclr/profiling/davbr-blog-archive/DoStackSnapshot - HRESULTs.md b/docs/design/coreclr/profiling/davbr-blog-archive/DoStackSnapshot - HRESULTs.md index 7d9952ff7545..59d46109f365 100644 --- a/docs/design/coreclr/profiling/davbr-blog-archive/DoStackSnapshot - HRESULTs.md +++ b/docs/design/coreclr/profiling/davbr-blog-archive/DoStackSnapshot - HRESULTs.md @@ -5,14 +5,14 @@ Generally, corerror.h tells you all you need to know about what kinds of HRESULT ### E\_FAIL -I don't much like E\_FAIL. If DoStackSnapshot fails, you will typically see a more descriptive, custom HRESULT. However, there are regrettably a few ways DoStackSnapshot can fail where you'll see the dreaded E\_FAIL instead. From your code's point of view, you shouldn't assume E\_FAIL will always imply one of the cases below (or conversely that each of these cases will always result in E\_FAIL). But this is just good stuff to know as you develop and debug your profiler, so you don't get blindsided. - -1) No managed frames on stack - +I don't much like E\_FAIL. If DoStackSnapshot fails, you will typically see a more descriptive, custom HRESULT. However, there are regrettably a few ways DoStackSnapshot can fail where you'll see the dreaded E\_FAIL instead. From your code's point of view, you shouldn't assume E\_FAIL will always imply one of the cases below (or conversely that each of these cases will always result in E\_FAIL). But this is just good stuff to know as you develop and debug your profiler, so you don't get blindsided. + +1) No managed frames on stack + If you call DoStackSnapshot when there are no managed functions on your target thread's stack, you can get E\_FAIL. For example, if you try to walk the stack of a target thread very early on in its execution, there simply might not be any managed frames there yet. Or, if you try to walk the stack of the finalizer thread while it's waiting to do work, there will certainly be no managed frames on its stack. It's also possible that walking a stack with no managed frames on it will yield S\_OK instead of E\_FAIL (e.g., if the target thread is jit-compiling the first managed function to be called on that thread). Again, your code probably doesn't need to worry about all these cases. If we call your StackSnapshotCallback for a managed frame, you can trust that frame is there. If we don't call your StackSnapshotCallback, you can assume there are no managed frames on the stack. -2) OS kernel handling a hardware exception - +2) OS kernel handling a hardware exception + This one is less likely to happen, but it certainly can. When an app throws a hardware exception (e.g., divide by 0), the offending thread enters the Windows kernel. The kernel spends some time recording the thread's current user-mode register context, modifying some registers, and moving the instruction pointer to the user-mode exception dispatch routine. At this point the thread is ready to reenter user-mode. But if you are unlucky enough to call DoStackSnapshot while the target thread is still in the kernel doing this stuff, you will get E\_FAIL. 3) Detectably bad seed @@ -25,11 +25,11 @@ Generally, this HRESULT means that your profiler requested to abort the stack wa One of the beautiful things about running 64-bit Windows is that you can get the Windows OS to perform (native) stack walks for you. Read up on [RtlVirtualUnwind](http://msdn.microsoft.com/library/default.asp?url=/library/en-us/debug/base/rtlvirtualunwind.asp) if you're unfamiliar with this. The Windows OS has a critical section to protect a block of memory used to help perform this stack walk. So what would happen if: -- The OS's exception handling code causes a thread to walk its own stack -- The thread therefore enters this critical section -- Your profiler (via DoStackSnapshot) suspends this thread while the thread is still inside the critical section -- DoStackSnapshot uses RtlVirtualUnwind to help walk this suspended thread -- RtlVirtualUnwind (executing on the current thread) tries to enter the critical section (already owned by suspended target thread) +- The OS's exception handling code causes a thread to walk its own stack +- The thread therefore enters this critical section +- Your profiler (via DoStackSnapshot) suspends this thread while the thread is still inside the critical section +- DoStackSnapshot uses RtlVirtualUnwind to help walk this suspended thread +- RtlVirtualUnwind (executing on the current thread) tries to enter the critical section (already owned by suspended target thread) If your answer was "deadlock", congratulations! DoStackSnapshot has some code that tries to avoid this scenario, by aborting the stack walk before the deadlock can occur. When this happens, DoStackSnapshot will return CORPROF\_E\_STACKSNAPSHOT\_ABORTED. Note that this whole scenario is pretty rare, and only happens on WIN64. diff --git a/docs/design/coreclr/profiling/davbr-blog-archive/ELT Hooks - The Basics.md b/docs/design/coreclr/profiling/davbr-blog-archive/ELT Hooks - The Basics.md index af5d867ee4a4..7acb138cbd1e 100644 --- a/docs/design/coreclr/profiling/davbr-blog-archive/ELT Hooks - The Basics.md +++ b/docs/design/coreclr/profiling/davbr-blog-archive/ELT Hooks - The Basics.md @@ -5,79 +5,84 @@ The CLR Profiling API allows you to hook managed functions so that your profiler ### Setting up the hooks -1. On initialization, your profiler must call SetEnterLeaveFunctionHooks(2) to specify which functions inside your profiler should be called whenever a managed function is entered, returns, or exits via tail call, respectively. - _(Profiler calls this…)_ +1. On initialization, your profiler must call SetEnterLeaveFunctionHooks(2) to specify which functions inside your profiler should be called whenever a managed function is entered, returns, or exits via tail call, respectively. + + _(Profiler calls this…)_ + ``` HRESULT SetEnterLeaveFunctionHooks( - [in] FunctionEnter \*pFuncEnter, - [in] FunctionLeave \*pFuncLeave, - [in] FunctionTailcall \*pFuncTailcall); - ``` - - _(Profiler implements these…)_ - ``` - typedef void FunctionEnter(FunctionID funcID); - typedef void FunctionLeave(FunctionID funcID); - typedef void FunctionTailcall(FunctionID funcID); + [in] FunctionEnter *pFuncEnter, + [in] FunctionLeave *pFuncLeave, + [in] FunctionTailcall *pFuncTailcall); ``` - **OR** + _(Profiler implements these…)_ - _(Profiler calls this…)_ - ``` - HRESULT SetEnterLeaveFunctionHooks2( - [in] FunctionEnter2 *pFuncEnter, - [in] FunctionLeave2 *pFuncLeave, - [in] FunctionTailcall2 *pFuncTailcall); - ``` - + ``` + typedef void FunctionEnter(FunctionID funcID); + typedef void FunctionLeave(FunctionID funcID); + typedef void FunctionTailcall(FunctionID funcID); + ``` - _(Profiler implements these…)_ - ``` - typedef void FunctionEnter2( - FunctionID funcId, - UINT_PTR clientData, - COR_PRF_FRAME_INFO func, - COR_PRF_FUNCTION_ARGUMENT_INFO *argumentInfo); - - typedef void FunctionLeave2( - FunctionID funcId, - UINT_PTR clientData, - COR_PRF_FRAME_INFO func, - COR_PRF_FUNCTION_ARGUMENT_RANGE *retvalRange); - - typedef void FunctionTailcall2( - FunctionID funcId, - UINT_PTR clientData, - COR_PRF_FRAME_INFO func); - ``` + **OR** + + _(Profiler calls this…)_ + + ``` + HRESULT SetEnterLeaveFunctionHooks2( + [in] FunctionEnter2 *pFuncEnter, + [in] FunctionLeave2 *pFuncLeave, + [in] FunctionTailcall2 *pFuncTailcall); + ``` - This step alone does not cause the enter/leave/tailcall (ELT) hooks to be called. But you must do this on startup to get things rolling. + _(Profiler implements these…)_ -2. At any time during the run, your profiler calls SetEventMask specifying COR\_PRF\_MONITOR\_ENTERLEAVE in the bitmask. Your profiler may set or reset this flag at any time to cause ELT hooks to be called or ignored, respectively. + ``` + typedef void FunctionEnter2( + FunctionID funcId, + UINT_PTR clientData, + COR_PRF_FRAME_INFO func, + COR_PRF_FUNCTION_ARGUMENT_INFO *argumentInfo); + + typedef void FunctionLeave2( + FunctionID funcId, + UINT_PTR clientData, + COR_PRF_FRAME_INFO func, + COR_PRF_FUNCTION_ARGUMENT_RANGE *retvalRange); + + typedef void FunctionTailcall2( + FunctionID funcId, + UINT_PTR clientData, + COR_PRF_FRAME_INFO func); + ``` + + This step alone does not cause the enter/leave/tailcall (ELT) hooks to be called. But you must do this on startup to get things rolling. + +2. At any time during the run, your profiler calls SetEventMask specifying COR\_PRF\_MONITOR\_ENTERLEAVE in the bitmask. Your profiler may set or reset this flag at any time to cause ELT hooks to be called or ignored, respectively. ### FunctionIDMapper In addition to the above two steps, your profiler may specify more granularly which managed functions should have ELT hooks compiled into them: -1. At any time, your profiler may call ICorProfilerInfo2::SetFunctionIDMapper to specify a special hook to be called when a function is JITted. +1. At any time, your profiler may call ICorProfilerInfo2::SetFunctionIDMapper to specify a special hook to be called when a function is JITted. + + _(Profiler calls this…)_ -_(Profiler calls this…)_ -``` - HRESULT SetFunctionIDMapper([in] FunctionIDMapper \*pFunc); -``` - + ``` + HRESULT SetFunctionIDMapper([in] FunctionIDMapper \*pFunc); + ``` + + _(Profiler implements this…)_ + + ``` + typedef UINT_PTR __stdcall FunctionIDMapper( + FunctionID funcId, + BOOL *pbHookFunction); + ``` - _(Profiler implements this…)_ -``` -typedef UINT_PTR __stdcall FunctionIDMapper( - FunctionID funcId, - BOOL *pbHookFunction); -``` - 2. When FunctionIDMapper is called: - a. Your profiler sets the pbHookFunction [out] parameter appropriately to determine whether the function identified by funcId should have ELT hooks compiled into it. + a. Your profiler sets the pbHookFunction \[out] parameter appropriately to determine whether the function identified by funcId should have ELT hooks compiled into it. b. Of course, the primary purpose of FunctionIDMapper is to allow your profiler to specify an alternate ID for that function. Your profiler does this by returning that ID from FunctionIDMapper . The CLR will pass this alternate ID to your ELT hooks (as funcID if you're using the 1.x ELT, and as clientData if you're using the 2.x ELT). ### Writing your ELT hooks @@ -92,8 +97,6 @@ The solution is “NGEN /Profile”. For example, if you run this command agains `ngen install MyAssembly.dll /Profile` - - it will NGEN MyAssembly.dll with the “Profile” flavor (also called “profiler-enhanced”). This flavor causes extra hooks to be baked in to enable features like ELT hooks, loader callbacks, managed/unmanaged code transition callbacks, and the JITCachedFunctionSearchStarted/Finished callbacks. The original NGENd versions of all your assemblies still stay around in your NGEN cache. NGEN /Profile simply causes a new set of NGENd assemblies to be generated as well, marked as the “profiler-enhanced” set of NGENd assemblies. At run-time, the CLR determines which flavor should be loaded. If a profiler is attached and enables certain features that only work with profiler-enhanced (not regular) NGENd assemblies (such as ELT via a call to SetEnterLeaveFunctionHooks(2), or any of several other features that are requested by setting particular event flags via SetEventMask), then the CLR will only load profiler-enhanced NGENd images--and if none exist then the CLR degrades to JIT in order to support the features requested by the profiler. In contrast, if the profiler does not specify such event flags, or there is no profiler to begin with, then the CLR loads the regular-flavored NGENd assemblies. @@ -128,4 +131,3 @@ Why do you care? Well, it's always good to know what price you're paying. If you ### Next time... That about covers it for the ELT basics. Next installment of this riveting series will talk about that enigma known as tailcall. - diff --git a/docs/design/coreclr/profiling/davbr-blog-archive/ELT Hooks - tail calls.md b/docs/design/coreclr/profiling/davbr-blog-archive/ELT Hooks - tail calls.md index 3cca8c9a9c5c..d00ceba195e7 100644 --- a/docs/design/coreclr/profiling/davbr-blog-archive/ELT Hooks - tail calls.md +++ b/docs/design/coreclr/profiling/davbr-blog-archive/ELT Hooks - tail calls.md @@ -79,7 +79,7 @@ typedef void FunctionTailcall2( COR_PRF_FRAME_INFO func); ``` -**Tip** : More than once I've seen profiler writers make the following mistake. They will take their naked assembly-language wrapper for their Enter2 and Leave2 hooks, and paste it again to use as the Tailcall2 assembly-language wrapper. The problem is they forget that the Tailcall2 hook takes a different number of parameters than the Enter2 / Leave2 hooks (or, more to the point, a different number of _bytes_ is passed on the stack to invoke the Tailcall2 hook). So, they'll take the "ret 16" at the end of their Enter2/Leave2 hook wrappers and stick that into their Tailcall2 hook wrapper, forgetting to change it to a "ret 12". Don't make the same mistake! +**Tip** : More than once I've seen profiler writers make the following mistake. They will take their naked assembly-language wrapper for their Enter2 and Leave2 hooks, and paste it again to use as the Tailcall2 assembly-language wrapper. The problem is they forget that the Tailcall2 hook takes a different number of parameters than the Enter2 / Leave2 hooks (or, more to the point, a different number of _bytes_ is passed on the stack to invoke the Tailcall2 hook). So, they'll take the "ret 16" at the end of their Enter2/Leave2 hook wrappers and stick that into their Tailcall2 hook wrapper, forgetting to change it to a "ret 12". Don't make the same mistake! It's worth noting what these parameters mean. With the Enter and Leave hooks it's pretty obvious that the parameters your hook is given (e.g., funcId) apply to the function being Entered or Left. But what about the Tailcall hook? Do the Tailcall hook's parameters describe the caller (function making the tail call) or the callee (function being tail called into)? @@ -95,7 +95,7 @@ Ok, enough dilly-dallying. What should your profiler do in its Tailcall hook? Tw The [CLRProfiler](http://www.microsoft.com/downloads/details.aspx?FamilyID=a362781c-3870-43be-8926-862b40aa0cd0&DisplayLang=en) is a great example of using Enter/Leave/Tailcall hooks to maintain shadow stacks. A shadow stack is your profiler's own copy of the current stack of function calls on a given thread at any given time. Upon Enter of a function, you push that FunctionID (and whatever other info interests you, such as arguments) onto your data structure that represents that thread's stack. Upon Leave of a function, you pop that FunctionID. This gives you a live list of managed calls in play on the thread. The CLRProfiler uses shadow stacks so that whenever the managed app being profiled chooses to allocate a new object, the CLRProfiler can know the managed call stack that led to the allocation. (Note that an alternate way of accomplishing this would be to call DoStackSnapshot at every allocation point instead of maintaining a shadow stack. Since objects are allocated so frequently, however, you'd end up calling DoStackSnapshot extremely frequently and will often see worse performance than if you had been maintaining shadow stacks in the first place.) - + OK, so when your profiler maintains a shadow stack, it's clear what your profiler should do on Enter or Leave, but what should it do on Tailcall? There are a couple ways one could imagine answering that question, but only one of them will work! Taking the example from the top of this post, imagine the stack looks like this: @@ -121,9 +121,11 @@ Method 2: On tailcall, "mark" the FunctionID at the top of your stack as needing With this strategy, for the duration of the call to Three(), the shadow stack will look like this: -Three -Helper (marked for deferred pop) -Main +``` +Three +Helper (marked for deferred pop) +Main +``` which some might consider more user-friendly. And as soon as Three() returns, your profiler will sneakily do a double-pop leaving just this: @@ -163,9 +165,11 @@ Method 2: Shadow stack fails At stage (4), the shadow stack looks like this: -Helper -Thread.Sleep (marked for "deferred pop") +``` +Helper +Thread.Sleep (marked for "deferred pop") Main +``` If you think it might be complicated to explain tail calls to your users so they can understand the Method 1 form of shadow stack presentation, just try explaining why it makes sense to present to them that Thread.Sleep() is calling Helper()! @@ -184,11 +188,11 @@ static public void Main() would yield: ``` -Helper -Thread.Sleep (marked for "deferred pop") -Thread.Sleep (marked for "deferred pop") -Thread.Sleep (marked for "deferred pop") -Thread.Sleep (marked for "deferred pop") +Helper +Thread.Sleep (marked for "deferred pop") +Thread.Sleep (marked for "deferred pop") +Thread.Sleep (marked for "deferred pop") +Thread.Sleep (marked for "deferred pop") Main ``` @@ -211,11 +215,11 @@ static public void Helper() would yield: ``` -Thread.Sleep (marked for "deferred pop") -Thread.Sleep (marked for "deferred pop") -Thread.Sleep (marked for "deferred pop") -Thread.Sleep (marked for "deferred pop") -Helper +Thread.Sleep (marked for "deferred pop") +Thread.Sleep (marked for "deferred pop") +Thread.Sleep (marked for "deferred pop") +Thread.Sleep (marked for "deferred pop") +Helper Main ``` @@ -337,7 +341,7 @@ ildasm Class1.exe Inside ildasm, use File.Dump to generate a text file that contains a textual representation of the IL from Class1.exe. Call it Class1WithTail.il. Open up that file and add the tail. prefix just before the call you want optimized into a tail call (see highlighted yellow for changes): ``` -.method private hidebysig static int32 +.method private hidebysig static int32 Helper(int32 i) cil managed { ~~// Code size 45 (0x2d) @@ -386,5 +390,5 @@ If you didn't learn anything, I hope you at least got some refreshing sleep than - Since some managed functions may tail call into native helper functions inside the CLR (for which you won't get an Enter hook notification), your Tailcall hook should treat the tail call as if it were a Leave, and not depend on the next Enter hook correlating to the target of the last tail call. With shadow stacks, for example, this means you should simply pop the calling function off your shadow stack in your Tailcall hook. - Since tail calls can be elusive to find in practice, it's well worth your while to use ildasm/ilasm to manufacture explicit tail calls so you can step through your Tailcall hook and test its logic. -_David has been a developer at Microsoft for over 70 years (allowing for his upcoming time-displacement correction). He joined Microsoft in 2079, first starting in the experimental time-travel group. His current assignment is to apply his knowledge of the future to eliminate the "Wait for V3" effect customers commonly experience in his source universe. By using Retroactive Hindsight-ellisenseTM his goal is to "get it right the first time, this time" in a variety of product groups._ +_David has been a developer at Microsoft for over 70 years (allowing for his upcoming time-displacement correction). He joined Microsoft in 2079, first starting in the experimental time-travel group. His current assignment is to apply his knowledge of the future to eliminate the "Wait for V3" effect customers commonly experience in his source universe. By using Retroactive Hindsight-ellisenseTM his goal is to "get it right the first time, this time" in a variety of product groups._ diff --git a/docs/design/coreclr/profiling/davbr-blog-archive/Generics and Your Profiler.md b/docs/design/coreclr/profiling/davbr-blog-archive/Generics and Your Profiler.md index 149a0c0a7888..856290fd9931 100644 --- a/docs/design/coreclr/profiling/davbr-blog-archive/Generics and Your Profiler.md +++ b/docs/design/coreclr/profiling/davbr-blog-archive/Generics and Your Profiler.md @@ -7,7 +7,7 @@ If you’re writing a profiler that you expect to run against CLR 2.0 or greater Let's say a C# developer writes code like this: - + ``` class MyClass { @@ -57,8 +57,6 @@ HRESULT GetFunctionInfo2([in] FunctionID funcId, typeArgs[]: This is the array of **type arguments** to MyClass\.Foo\. So this will be an array of only one element: the ClassID for float. (The int in MyClass\ is a type argument to MyClass, not to Foo, and you would only see that when you call GetClassIDInfo2 with MyClass\.) -## - ## GetClassIDInfo2 OK, someone in parentheses said something about calling GetClassIDInfo2, so let’s do that. Since we got the ClassID for MyClass\ above, let’s pass it to GetClassIDInfo2 to see what we get: @@ -89,21 +87,19 @@ To understand why, it’s necessary to understand an internal optimization the C For now, the important point is that, once we’re inside JITted code that is shared across different generic instantiations, how can one know which instantiation is the actual one that caused the current invocation? Well, in many cases, the CLR may not have that data readily lying around. However, as a profiler, you can capture this information and pass it back to the CLR when it needs it. This is done through a COR\_PRF\_FRAME\_INFO. There are two ways your profiler can get a COR\_PRF\_FRAME\_INFO: -1. Via slow-path Enter/Leave/Tailcall probes -2. Via your DoStackSnapshot callback +1. Via slow-path Enter/Leave/Tailcall probes +2. Via your DoStackSnapshot callback I lied. #1 is really the only way for your profiler to get a COR\_PRF\_FRAME\_INFO. #2 may seem like a way—at least the profiling API suggests that the CLR gives your profiler a COR\_PRF\_FRAME\_INFO in the DSS callback—but unfortunately the COR\_PRF\_FRAME\_INFO you get there is pretty useless. I suspect the COR\_PRF\_FRAME\_INFO parameter was added to the signature of the profiler’s DSS callback function so that it could “light up” at some point in the future when we could work on finding out how to create a sufficiently helpful COR\_PRF\_FRAME\_INFO during stack walks. However, that day has not yet arrived. So if you want a COR\_PRF\_FRAME\_INFO, you’ll need to grab it—and use it from—your slow-path Enter/Leave/Tailcall probe. With a valid COR\_PRF\_FRAME\_INFO, GetFunctionInfo2 will give you helpful, specific ClassIDs in the typeArgs [out] array and pClassId [out] parameter. If the profiler passes NULL for COR\_PRF\_FRAME\_INFO, here’s what you can expect: -- If you’re using CLR V2, pClassId will point to NULL if the function sits on _any_ generic class (shared or not). In CLR V4 this got a little better, and you’ll generally only see pClassId point to NULL if the function sits on a “shared” generic class (instantiated with reference types). - - Note: If it’s impossible for the profiler to have a COR\_PRF\_FRAME\_INFO handy to pass to GetFunctionInfo2, and that results in a NULL \*pClassID, the profiler can always use the metadata interfaces to find the mdTypeDef token of the class on which the function resides for the purposes of pretty-printing the class name to the user. Of course, the profiler will not know the specific instantiating type arguments that were used on the class in that case. -- the typeArgs [out] array will contain the ClassID for **System.\_\_Canon** , rather than the actual instantiating type(s), if the function itself is generic and is instantiated with reference type argument(s). +- If you’re using CLR V2, pClassId will point to NULL if the function sits on _any_ generic class (shared or not). In CLR V4 this got a little better, and you’ll generally only see pClassId point to NULL if the function sits on a “shared” generic class (instantiated with reference types). + - Note: If it’s impossible for the profiler to have a COR\_PRF\_FRAME\_INFO handy to pass to GetFunctionInfo2, and that results in a NULL \*pClassID, the profiler can always use the metadata interfaces to find the mdTypeDef token of the class on which the function resides for the purposes of pretty-printing the class name to the user. Of course, the profiler will not know the specific instantiating type arguments that were used on the class in that case. +- the typeArgs [out] array will contain the ClassID for **System.\_\_Canon** , rather than the actual instantiating type(s), if the function itself is generic and is instantiated with reference type argument(s). It’s worth noting here that there is a bug in GetFunctionInfo2, in that the [out] pClassId you get for the class containing the function can be wrong with generic virtual functions. Take a look at [this forum post](http://social.msdn.microsoft.com/Forums/en-US/netfxtoolsdev/thread/ed6f972f-712a-48df-8cce-74f8951503fa/) for more information and a workaround. -## - ## ClassIDs & FunctionIDs vs. Metadata Tokens Although you can infer this from the above, let’s take a breather and review. When you have multiple generic instantiations of a generic type, that type is defined with one mdTypeDef (metadata token), but you’ll see multiple ClassIDs (one per instantiation). When you have multiple generic instantiations of a generic method, it’s defined with one mdMethodDef (metadata token), but you’ll see multiple FunctionIDs (one per instantiation). @@ -120,14 +116,14 @@ If you got curious, and ran such a profiler under the debugger, you could use th If your profiler performs IL rewriting, it’s important to understand that it must NOT do instantiation-specific IL rewriting. Huh? Let’s take an example. Suppose you’re profiling code that uses MyClass\.Foo\ and MyClass\.Foo\. Your profiler will see two JITCompilationStarted callbacks, and will have two opportunities to rewrite the IL. Your profiler may call GetFunctionInfo2 on those two FunctionIDs and determine that they’re two different instantiations of the same generic function. You may then be tempted to make use of the fact that one is instantiated with float, and the other with long, and provide different IL for the two different JIT compilations. The problem with this is that the IL stored in metadata, as well as the IL provided to SetILFunctionBody, is always specified relative to the mdMethodDef. (Remember, SetILFunctionBody doesn’t take a FunctionID as input; it takes an mdMethodDef.) And it’s the profiler’s responsibility always to specify the same rewritten IL for any given mdMethodDef no matter how many times it’s JITted. And a given mdMethodDef can be JITted multiple times due to a number of reasons: -- Two threads simultaneously trying to call the same function for the first time (and thus both trying to JIT that function) -- Strange dependency chains involving class constructors (more on this in the MSDN [reference topic](http://msdn.microsoft.com/en-us/library/ms230586.aspx)) -- Multiple AppDomains using the same (non-domain-neutral) function -- And of course multiple generic instantiations! +- Two threads simultaneously trying to call the same function for the first time (and thus both trying to JIT that function) +- Strange dependency chains involving class constructors (more on this in the MSDN [reference topic](http://msdn.microsoft.com/en-us/library/ms230586.aspx)) +- Multiple AppDomains using the same (non-domain-neutral) function +- And of course multiple generic instantiations! Regardless of the reason, the profiler must always rewrite with exactly the same IL. Otherwise, an invariant in the CLR will have been broken by the profiler, and you will get strange, undefined behavior as a result. And no one wants that. - + That’s it! Hopefully this gives you a good idea of how the CLR Profiling API will behave in the face of generic classes and functions, and what is expected of your profiler. diff --git a/docs/design/coreclr/profiling/davbr-blog-archive/Metadata Tokens, Run-Time IDs, and Type Loading.md b/docs/design/coreclr/profiling/davbr-blog-archive/Metadata Tokens, Run-Time IDs, and Type Loading.md index 1b1e2f74d7a3..283194e860dd 100644 --- a/docs/design/coreclr/profiling/davbr-blog-archive/Metadata Tokens, Run-Time IDs, and Type Loading.md +++ b/docs/design/coreclr/profiling/davbr-blog-archive/Metadata Tokens, Run-Time IDs, and Type Loading.md @@ -31,16 +31,12 @@ Yes, that is a good example. You are an astute reader. Memory profilers that w # Going from metadata token to run-time ID -# - -# - As I mentioned above, the safest way to do this is to build up your own map and do reverse-lookups as necessary. If that scheme meets your needs, then by all means do that, and stop reading! But in the cases where this is insufficient, you may need to resort to using GetFunctionFromToken(AndTypeArgs) and GetClassFromToken(AndTypeArgs). There is no simple, foolproof way to use these APIs safely, but here is your guideline: **Never call GetFunctionFromToken(AndTypeArgs) and GetClassFromToken(AndTypeArgs) unless you’re certain the relevant types have been loaded.** (“Relevant types” include the ClassID containing the FunctionID whose mdMethodDef you pass to GetFunctionFromToken(AndTypeArgs), and the ClassID whose mdTypeDef you pass to GetClassFromToken(AndTypeArgs).) If these types have not been loaded, _you may cause them to be loaded now_! This is bad because: -- This is an easy way to crash the app. Trying to load a type at the wrong time could cause cycles, causing infinite loops (depending on what your profiler does in response to class load notifications) or outright crashes. For example, trying to load a type while its containing assembly is still in an early phase of loading is a great and fun way to crash the CLR. -- You will impact the behavior of the app. If you’re lucky enough not to crash the app, you’ve still impacted its behavior, by causing types to get loaded in a different order than they normally would. Any impact to app behavior like this makes it difficult for your users to reproduce problems that they are trying to use your tool to diagnose, or may hide problems that they don’t discover until they run their application outside of your tool. +- This is an easy way to crash the app. Trying to load a type at the wrong time could cause cycles, causing infinite loops (depending on what your profiler does in response to class load notifications) or outright crashes. For example, trying to load a type while its containing assembly is still in an early phase of loading is a great and fun way to crash the CLR. +- You will impact the behavior of the app. If you’re lucky enough not to crash the app, you’ve still impacted its behavior, by causing types to get loaded in a different order than they normally would. Any impact to app behavior like this makes it difficult for your users to reproduce problems that they are trying to use your tool to diagnose, or may hide problems that they don’t discover until they run their application outside of your tool. ## Determining whether a class was loaded @@ -54,14 +50,14 @@ MyRetType MyClass::MyFunction(MyArgumentType myArgumentType) then you can be reasonably assured that the following are loaded: -- MyClass -- MyArgumentType (if it’s a value-type) -- MyRetType (if it’s a value-type) -- For any class you know is loaded, so should be: - - its base class - - its value-type fields (not necessarily reference-type fields!) - - implemented interfaces - - value-type generic type arguments (and even reference-type generic type arguments in the case of MyClass) +- MyClass +- MyArgumentType (if it’s a value-type) +- MyRetType (if it’s a value-type) +- For any class you know is loaded, so should be: + - its base class + - its value-type fields (not necessarily reference-type fields!) + - implemented interfaces + - value-type generic type arguments (and even reference-type generic type arguments in the case of MyClass) So much for stacks. What if you encounter an instance of a class on the heap? Surely the class is loaded then, right? Well, probably. If you encounter an object on heap just after GC (inside **GarbageCollectionFinished** , before you return), it should be safe to inspect the class’s layout, and then peek through ObjectIDs to see the values of their fields. @@ -73,7 +69,7 @@ In general, a lot of the uncertainty above comes from types stored in NGENd modu Now is a good time remind you that, not only is it dangerous to inspect run-time IDs too early (i.e., before they load); it’s also dangerous to inspect run-time IDs too late (i.e., after they **unload** ). For example, if you store ClassIDs and FunctionIDs for later use, and use them “too late”, you can easily crash the CLR. The profiling API does pretty much no validation of anything (in many cases, it’s incapable of doing so without using up significant amounts of memory to maintain lookup tables for everything). So we generally take any run-time ID that you pass to ICorProfilerInfo\* methods, cast it to an internal CLR structure ptr, and go boom if the ID is bad. -There is no way to just ask the CLR if a FunctionID or ClassID is valid. Indeed, classes could get unloaded, and new classes loaded, and your ClassID may now refer to a totally different (valid) class. +There is no way to just ask the CLR if a FunctionID or ClassID is valid. Indeed, classes could get unloaded, and new classes loaded, and your ClassID may now refer to a totally different (valid) class. You need to keep track of the unloads yourself. You are notified when run-time IDs go out of scope (today, this happens at the level of an AppDomain unloading or a collectible assembly unloading—in both cases all IDs “contained” in the unloading thing are now invalid). Once a run-time ID is out of scope, you are not allowed to pass that run-time ID back to the CLR. In fact, you should consider whether thread synchronization will be necessary in your profiler to maintain this invariant. For example, if a run-time ID gets unloaded on thread A, you’re still not allowed to pass that run-time ID back to the CLR on thread B. So you may need to block on a critical section in thread A during the \*UnloadStarted / AppDomainShutdown\* callbacks, to prevent them from returning to the CLR until any uses of the contained IDs in thread B are finished. @@ -91,16 +87,16 @@ ResolveTypeRef doesn’t know about any of this—it was never designed to be us If you absolutely need to resolve refs to defs, your best bet may be to use your own algorithm which will be as accurate as you can make it, under the circumstances, and which will never try to locate a module that hasn’t been loaded yet. That means that you shouldn’t try to resolve a ref to a def if that def hasn’t actually been loaded into a type by the CLR. Consider using an algorithm similar to the following: -1. Get the AssemblyRef from the TypeRef to get to the name, public key token and version of the assembly where the type should reside. -2. Enumerate all loaded modules that the Profiling API has notified you of (or via [EnumModules](http://msdn.microsoft.com/en-us/library/dd490890)) (you can filter out a specific AppDomain at this point if you want). -3. In each enumerated module, search for a TypeDef with the same name and namespace as the TypeRef (IMetaDataImport::FindTypeDefByName) -4. Pay attention to **type forwarding**! Once you find the TypeDef, it may actually be an “exported” type, in which case you will need to follow the trail to the next module. Read toward the bottom of [this post](Type Forwarding.md) for more info. +1. Get the AssemblyRef from the TypeRef to get to the name, public key token and version of the assembly where the type should reside. +2. Enumerate all loaded modules that the Profiling API has notified you of (or via [EnumModules](http://msdn.microsoft.com/en-us/library/dd490890)) (you can filter out a specific AppDomain at this point if you want). +3. In each enumerated module, search for a TypeDef with the same name and namespace as the TypeRef (IMetaDataImport::FindTypeDefByName) +4. Pay attention to **type forwarding**! Once you find the TypeDef, it may actually be an “exported” type, in which case you will need to follow the trail to the next module. Read toward the bottom of [this post](Type Forwarding.md) for more info. The above can be a little bit smarter by paying attention to what order you choose to search through the modules: -- First search for the TypeDef in assemblies which exactly match the name, public key token and version for the AssemblyRef. -- If that fails, then search through assemblies matching name and public key token (where the version is higher than the one supplied – this can happen for Framework assemblies). -- If that fails, then search through all the other assemblies +- First search for the TypeDef in assemblies which exactly match the name, public key token and version for the AssemblyRef. +- If that fails, then search through assemblies matching name and public key token (where the version is higher than the one supplied – this can happen for Framework assemblies). +- If that fails, then search through all the other assemblies I must warn you that the above scheme is **not tested and not supported. Use at your own risk!** @@ -108,7 +104,7 @@ I must warn you that the above scheme is **not tested and not supported. Use at Although I cannot comment on what will or will not be in any particular future version of the CLR, I can tell you that it is clear to us on the CLR team that we have work to do, to make dealing with metadata tokens and their corresponding run-time type information easier from the profiling API. After all, it doesn’t take a rocket scientist to read the above and conclude that it does take a rocket scientist to actually follow all this advice. So for now, enjoy the fact that what you do is really hard, making you difficult to replace, and thus your job all the more secure. You’re welcome. - + Special thanks to David Wrighton and Karel Zikmund, who have helped considerably with all content in this entry around the type system and metadata. diff --git a/docs/design/coreclr/profiling/davbr-blog-archive/Profiler Detach.md b/docs/design/coreclr/profiling/davbr-blog-archive/Profiler Detach.md index 987ee24507b4..4b87a0018c44 100644 --- a/docs/design/coreclr/profiling/davbr-blog-archive/Profiler Detach.md +++ b/docs/design/coreclr/profiling/davbr-blog-archive/Profiler Detach.md @@ -9,9 +9,9 @@ The Detach feature allows a profiler that the user is finished with to be unload Not every V4 profiler is allowed to detach from a running process. The general rule is that a profiler which has caused an irreversible impact in the process it’s profiling should _not_ attempt to detach. The CLR catches the following cases: -- Profiler set immutable flags (COR\_PRF\_MONITOR\_IMMUTABLE) via SetEventMask. -- Profiler performed IL rewriting via SetILFunctionBody -- Profiler used the Enter/Leave/Tailcall methods to add callouts to its probes +- Profiler set immutable flags (COR\_PRF\_MONITOR\_IMMUTABLE) via SetEventMask. +- Profiler performed IL rewriting via SetILFunctionBody +- Profiler used the Enter/Leave/Tailcall methods to add callouts to its probes If the profiler attempts to detach after doing any of the above, the CLR will disallow the attempt (see below for details). @@ -25,20 +25,20 @@ There’s one, deceptively simple-looking method the profiler calls to detach it So, the sequence works like this: -1. The profiler **deactivates all the ways control could enter the profiler** (aside from the CLR Profiling API itself). This means removing any Windows callbacks, timer interrupts, hijacking, disabling any other components that may try to call into the profiler DLL, etc. The profiler must also wait for all threads that it has created (e.g., a sampling thread, inter-process communication threads, a ForceGC thread, etc.) to exit, except for the one thread the profiler will use to call RequestProfilerDetach(). Any threads created by the CLR, of course, should not be tampered with. - - Your profiler must block here until all those ways control can enter your profiler DLL have truly been deactivated (e.g., just setting a flag to disable sampling may not be enough if your sampling thread is currently performing a sample already in progress). You must coordinate with all components of your profiler so that your profiler DLL knows that everything is verifiably deactivated, and all profiler-created threads have exited (except for the one thread the profiler will use to call RequestProfilerDetach()). -2. If the profiler will use a thread of its own creation to call RequestProfilerDetach() (which is the typical way this API will be called), that thread must own a reference onto the profiler’s DLL, via its own **LoadLibrary()** call that it makes on the profiler DLL. This can either be done when the thread starts up, or now, or sometime in between. But that reference must be added at some point before calling RequestProfilerDetach(). -3. Profiler calls ICorProfilerInfo3:: **RequestProfilerDetach** (). - - (A) This causes the CLR to (synchronously) set internal state to avoid making any further calls into the profiler via the ICorProfilerCallback\* interfaces, and to refuse any calls from the profiler into ICorProfilerInfo\* interfaces (such calls will now fail early with CORPROF\_E\_PROFILER\_DETACHING). - - (B) The CLR also (asynchronously) begins a period safety check on another thread to determine when all pre-existing calls into the profiler via the ICorProfilerCallback\* interfaces have returned. - - Note: It is expected that your profiler will not make any more “unsolicited” calls back into the CLR via any interfaces (ICorProfilerInfo\*, hosting, metahost, metadata, etc.). By “unsolicited”, I’m referring to calls that didn’t originate from the CLR via ICorProfilerCallback\*. In other words, it’s ok for the profiler to continue to do its usual stuff in its implementation of ICorProfilerCallback methods (which may include calling into the CLR via ICorProfilerInfo\*), as the CLR will wait for those outer ICorProfilerCallback methods to return as per 3B. But the profiler must not make any other calls into the CLR (i.e., that are not sandwiched inside an ICorProfilerCallback call). You should already have deactivated any component of your profiler that would make such unsolicited calls in step 1. -4. Assuming the above RequestProfilerDetach call was made on a profiler-created thread, that thread must now call [**FreeLibraryAndExitThread**](http://msdn.microsoft.com/en-us/library/ms683153(VS.85).aspx)**()**. (Note: that’s a specialized Windows API that combines FreeLibrary() and ExitThread() in such a way that races can be avoided—do not call FreeLibrary() and ExitThread() separately.) -5. On another thread, the CLR continues its **period safety checks** from 3B above. Eventually the CLR determines that there are no more ICorProfilerCallback\* interface calls currently executing, and it is therefore safe to unload the profiler. -6. The CLR calls ICorProfilerCallback3:: **ProfilerDetachSucceeded**. The profiler can use this signal to know that it’s about to be unloaded. It’s expected that the profiler will do very little in this callback—probably just notifying the user that the profiler is about to be unloaded. Any cleanup the profiler needs to do should already have been done during step 1. -7. CLR makes the necessary number of **Release** () calls on ICorProfilerCallback3. The reference count should go down to 0 at this point, and the profiler may deallocate any memory it had previously allocated to support its callback implementation. -8. CLR calls **FreeLibrary** () on the profiler DLL. This should be the last reference to the profiler’s DLL, and your DLL will now be unloaded. - - Note: in some cases, it’s theoretically possible that step 4 doesn’t happen until _after_ this step, in which case the last reference to the profiler’s DLL will actually be released by your profiler’s thread that called RequestProfilerDetach and then FreeLibraryAndExitThread. That’s because steps 1-4 happen on your profiler’s thread, and steps 5-8 happen on a dedicated CLR thread (for detaching profilers) sometime after step 3 is completed. So there’s a race between step 4 and all of steps 5-8. There’s no harm in this, so long as you’re playing nice by doing your own LoadLibrary and FreeLibraryAndExitThread as described above. -9. The CLR adds an Informational entry to the Application Event Log noting that the profiler has been unloaded. The CLR is now ready to service any profiler attach requests. +1. The profiler **deactivates all the ways control could enter the profiler** (aside from the CLR Profiling API itself). This means removing any Windows callbacks, timer interrupts, hijacking, disabling any other components that may try to call into the profiler DLL, etc. The profiler must also wait for all threads that it has created (e.g., a sampling thread, inter-process communication threads, a ForceGC thread, etc.) to exit, except for the one thread the profiler will use to call RequestProfilerDetach(). Any threads created by the CLR, of course, should not be tampered with. + - Your profiler must block here until all those ways control can enter your profiler DLL have truly been deactivated (e.g., just setting a flag to disable sampling may not be enough if your sampling thread is currently performing a sample already in progress). You must coordinate with all components of your profiler so that your profiler DLL knows that everything is verifiably deactivated, and all profiler-created threads have exited (except for the one thread the profiler will use to call RequestProfilerDetach()). +2. If the profiler will use a thread of its own creation to call RequestProfilerDetach() (which is the typical way this API will be called), that thread must own a reference onto the profiler’s DLL, via its own **LoadLibrary()** call that it makes on the profiler DLL. This can either be done when the thread starts up, or now, or sometime in between. But that reference must be added at some point before calling RequestProfilerDetach(). +3. Profiler calls ICorProfilerInfo3:: **RequestProfilerDetach** (). + - (A) This causes the CLR to (synchronously) set internal state to avoid making any further calls into the profiler via the ICorProfilerCallback\* interfaces, and to refuse any calls from the profiler into ICorProfilerInfo\* interfaces (such calls will now fail early with CORPROF\_E\_PROFILER\_DETACHING). + - (B) The CLR also (asynchronously) begins a period safety check on another thread to determine when all pre-existing calls into the profiler via the ICorProfilerCallback\* interfaces have returned. + - Note: It is expected that your profiler will not make any more “unsolicited” calls back into the CLR via any interfaces (ICorProfilerInfo\*, hosting, metahost, metadata, etc.). By “unsolicited”, I’m referring to calls that didn’t originate from the CLR via ICorProfilerCallback\*. In other words, it’s ok for the profiler to continue to do its usual stuff in its implementation of ICorProfilerCallback methods (which may include calling into the CLR via ICorProfilerInfo\*), as the CLR will wait for those outer ICorProfilerCallback methods to return as per 3B. But the profiler must not make any other calls into the CLR (i.e., that are not sandwiched inside an ICorProfilerCallback call). You should already have deactivated any component of your profiler that would make such unsolicited calls in step 1. +4. Assuming the above RequestProfilerDetach call was made on a profiler-created thread, that thread must now call [**FreeLibraryAndExitThread**](http://msdn.microsoft.com/en-us/library/ms683153(VS.85).aspx)**()**. (Note: that’s a specialized Windows API that combines FreeLibrary() and ExitThread() in such a way that races can be avoided—do not call FreeLibrary() and ExitThread() separately.) +5. On another thread, the CLR continues its **period safety checks** from 3B above. Eventually the CLR determines that there are no more ICorProfilerCallback\* interface calls currently executing, and it is therefore safe to unload the profiler. +6. The CLR calls ICorProfilerCallback3:: **ProfilerDetachSucceeded**. The profiler can use this signal to know that it’s about to be unloaded. It’s expected that the profiler will do very little in this callback—probably just notifying the user that the profiler is about to be unloaded. Any cleanup the profiler needs to do should already have been done during step 1. +7. CLR makes the necessary number of **Release** () calls on ICorProfilerCallback3. The reference count should go down to 0 at this point, and the profiler may deallocate any memory it had previously allocated to support its callback implementation. +8. CLR calls **FreeLibrary** () on the profiler DLL. This should be the last reference to the profiler’s DLL, and your DLL will now be unloaded. + - Note: in some cases, it’s theoretically possible that step 4 doesn’t happen until _after_ this step, in which case the last reference to the profiler’s DLL will actually be released by your profiler’s thread that called RequestProfilerDetach and then FreeLibraryAndExitThread. That’s because steps 1-4 happen on your profiler’s thread, and steps 5-8 happen on a dedicated CLR thread (for detaching profilers) sometime after step 3 is completed. So there’s a race between step 4 and all of steps 5-8. There’s no harm in this, so long as you’re playing nice by doing your own LoadLibrary and FreeLibraryAndExitThread as described above. +9. The CLR adds an Informational entry to the Application Event Log noting that the profiler has been unloaded. The CLR is now ready to service any profiler attach requests. ## RequestProfilerDetach @@ -46,17 +46,17 @@ Let’s dive a little deeper into the method you call to detach your profiler: `HRESULT RequestProfilerDetach([in] DWORD dwExpectedCompletionMilliseconds);` - + First off, you’ll notice this is on ICorProfilerInfo3, the interface your profiler DLL uses, in the same process as your profilee. Although the AttachProfiler API is called from outside the process, this detach method is called from in-process. Why? Well, the general rule with profilers is that _everything_ is done in-process. Attach is an exception because your profiler isn’t in the process yet. You need to somehow trigger your profiler to load, and you can’t do that from a process in which you have no code executing yet! So Attach is sort of a boot-strapping API that has to be called from a process of your own making. Once your profiler DLL is up and running, it is in charge of everything, from within the same process as the profilee. And detach is no exception. Now with that said, it’s probably typical that your profiler will detach in response to an end user action—probably via some GUI that you ship that runs in its own process. So a case could be made that the CLR team could have made your life easier by providing an out-of-process way to do a detach, so that your GUI could easily trigger a detach, just as it triggered the attach. However, you could make that same argument about all the ways you might want to control a profiler via a GUI, such as these commands: -- Do a GC now and show me the heap -- Dial up or down the sampling frequency -- Change which instrumented methods should log their invocations -- Start / stop monitoring exceptions -- etc. +- Do a GC now and show me the heap +- Dial up or down the sampling frequency +- Change which instrumented methods should log their invocations +- Start / stop monitoring exceptions +- etc. The point is, if you have a GUI to control your profiler, then you probably already have an inter-process mechanism for the GUI to communicate with your profiler DLL. So think of “detach” as yet one more command your GUI will send to your profiler DLL. @@ -66,10 +66,10 @@ The CLR uses that value in its Sleep() statement that sits between each periodic Until the profiler can be unloaded, it will be considered “loaded” (though deactivated in the sense that no new callback methods will be called). This prevents any new profiler from attaching. - + Ok, that wraps up how detaching works. If you remember only one thing from this post, remember that it’s really easy to cause an application you profile to AV after your profiler unloads if you’re not careful. While the CLR tracks outgoing ICorProfilerCallback\* calls, it does not track any other way that control can enter your profiler DLL. _Before_ your profiler calls RequestProfilerDetach: -- You must take care to deactivate all other ways control can enter your profiler DLL -- Your profiler must block until all those other ways control can enter your profiler DLL have verifiably been deactivated +- You must take care to deactivate all other ways control can enter your profiler DLL +- Your profiler must block until all those other ways control can enter your profiler DLL have verifiably been deactivated diff --git a/docs/design/coreclr/profiling/davbr-blog-archive/Profiler stack walking Basics and beyond.md b/docs/design/coreclr/profiling/davbr-blog-archive/Profiler stack walking Basics and beyond.md index 682ad7bf583e..935138b64f90 100644 --- a/docs/design/coreclr/profiling/davbr-blog-archive/Profiler stack walking Basics and beyond.md +++ b/docs/design/coreclr/profiling/davbr-blog-archive/Profiler stack walking Basics and beyond.md @@ -17,31 +17,31 @@ It’s nice to be able to get call stacks whenever you want them. But with powe So let’s take a look at the beast. Here’s what your profiler calls (you can find this in ICorProfilerInfo2, in corprof.idl): ``` -HRESULT DoStackSnapshot( - [in] ThreadID thread, - [in] StackSnapshotCallback *callback, - [in] ULONG32 infoFlags, - [in] void *clientData, - [in, size_is(contextSize), length_is(contextSize)] BYTE context[], +HRESULT DoStackSnapshot( + [in] ThreadID thread, + [in] StackSnapshotCallback *callback, + [in] ULONG32 infoFlags, + [in] void *clientData, + [in, size_is(contextSize), length_is(contextSize)] BYTE context[], [in] ULONG32 contextSize); ``` And here’s what the CLR calls on your profiler (you can also find this in corprof.idl). You’ll pass a pointer to your implementation of this function in the callback parameter above. ``` -typedef HRESULT \_\_stdcall StackSnapshotCallback( - FunctionID funcId, - UINT_PTR ip, - COR_PRF_FRAME_INFO frameInfo, - ULONG32 contextSize, - BYTE context[], +typedef HRESULT __stdcall StackSnapshotCallback( + FunctionID funcId, + UINT_PTR ip, + COR_PRF_FRAME_INFO frameInfo, + ULONG32 contextSize, + BYTE context[], void *clientData); ``` It’s like a sandwich. When your profiler wants to walk the stack, you call DoStackSnapshot. Before the CLR returns from that call, it calls your StackSnapshotCallback several times, once for each managed frame (or run of unmanaged frames) on the stack: ``` -Profiler calls DoStackSnapshot. Whole wheat bread - CLR calls StackSnapshotCallback. Lettuce frame (“leaf”-most frame, ha) - CLR calls StackSnapshotCallback. Tomato frame - CLR calls StackSnapshotCallback. Bacon frame (root or “main” frame) +Profiler calls DoStackSnapshot. Whole wheat bread + CLR calls StackSnapshotCallback. Lettuce frame (“leaf”-most frame, ha) + CLR calls StackSnapshotCallback. Tomato frame + CLR calls StackSnapshotCallback. Bacon frame (root or “main” frame) CLR returns back to profiler from DoStackSnapshot Whole wheat bread ``` @@ -77,50 +77,20 @@ Before I continue from this exciting cliffhanger, a brief interlude. Everyone k Now that we’re speaking the same language. Let’s look at a mixed-mode stack: -| - +``` Unmanaged - - | -| - D (Managed) - - | -| - Unmanaged - - | -| - C (Managed) - - | -| - B (Managed) - - | -| - Unmanaged - - | -| - A (Managed) - - | -| - Main (Managed) - - | +``` Stepping back a bit, it’s worthwhile to understand why DoStackSnapshot exists in the first place. It’s there to help you walk _managed_ frames on the stack. If you tried to walk managed frames yourself, you would get unreliable results, particularly on 32 bits, because of some wacky calling conventions used in managed code. The CLR understands these calling conventions, and DoStackSnapshot is therefore in a uniquely suitable position to help you decode them. However, DoStackSnapshot is not a complete solution if you want to be able to walk the entire stack, including unmanaged frames. Here’s where you have a choice: -1. Do nothing and report stacks with “unmanaged holes” to your users, or +1. Do nothing and report stacks with “unmanaged holes” to your users, or 2. Write your own unmanaged stack walker to fill in those holes. When DoStackSnapshot comes across a block of unmanaged frames, it calls your StackSnapshotCallback with funcId=0. (I think I mentioned this before, but I’m not sure you were listening.) If you’re going with option #1 above, simply do nothing in your callback when funcId=0. We’ll call you again for the next managed frame and you can wake up at that point. @@ -145,81 +115,67 @@ But before you get too deep, note that the issue of whether and how to seed a st For the truly adventurous profiler that is doing an asynchronous, cross-thread, seeded stack walk while filling in the unmanaged holes, here’s what it would look like. -| - -Block of -Unmanaged -Frames +Block of Unmanaged Frames - | -1. You suspend the target thread (target thread’s suspend count is now 1) -2. You get the target thread’s current register context -3. You determine if the register context points to unmanaged code (e.g., call ICorProfilerInfo2::GetFunctionFromIP(), and see if you get back a 0 FunctionID) +1. You suspend the target thread (target thread’s suspend count is now 1) +2. You get the target thread’s current register context +3. You determine if the register context points to unmanaged code (e.g., call ICorProfilerInfo2::GetFunctionFromIP(), and see if you get back a 0 FunctionID) 4. In this case the register context does point to unmanaged code, so you perform an unmanaged stack walk until you find the top-most managed frame (D) - | -| -Function D -(Managed) + ``` + Function D + (Managed) + ``` - | 1. You call DoStackSnapshot with your seed context. CLR suspends target thread again: its suspend count is now 2. Our sandwich begins. 1. CLR calls your StackSnapshotCallback with FunctionID for D. - | -| -Block of -Unmanaged -Frames + ``` + Block of + Unmanaged + Frames + ``` - | 1. CLR calls your StackSnapshotCallback with FunctionID=0. You’ll need to walk this block yourself. You can stop when you hit the first managed frame, or you can cheat: delay your unmanaged walk until sometime after your next callback, as the next callback will tell you exactly where the next managed frame begins (and thus where your unmanaged walk should end). - | -| -Function C -(Managed) - - | + ``` + Function C + (Managed) + ``` 1. CLR calls your StackSnapshotCallback with FunctionID for C. - | -| -Function B -(Managed) + ``` + Function B + (Managed) + ``` - | 1. CLR calls your StackSnapshotCallback with FunctionID for B. - | -| -Block of -Unmanaged -Frames + ``` + Block of + Unmanaged + Frames + ``` - | 1. CLR calls your StackSnapshotCallback with FunctionID=0. Again, you’ll need to walk this block yourself. - | -| -Function A -(Managed) + ``` + Function A + (Managed) + ``` - | 1. CLR calls your StackSnapshotCallback with FunctionID for A. - | -| -Main -(Managed) + ``` + Main + (Managed) + ``` - | -1. CLR calls your StackSnapshotCallback with FunctionID for Main. +1. CLR calls your StackSnapshotCallback with FunctionID for Main. 2. DoStackSnapshot “resumes” target thread (its suspend count is now 1) and returns. Our sandwich is complete. 1. You resume target thread (its suspend count is now 0, so it’s resumed for real). - | **Triumph over evil** @@ -253,8 +209,8 @@ Problem 2: _While you suspend the target thread, the target thread tries to susp “Come on! Like that could really happen.” Believe it or not, if: -- Your app runs on a multiproc box, and -- Thread A runs on one proc and thread B runs on another, and +- Your app runs on a multiproc box, and +- Thread A runs on one proc and thread B runs on another, and - A tries to suspend B while B tries to suspend A then it’s possible that both suspensions win, and both threads end up suspended. It’s like the line from that movie: “Multiproc means never having to say, ‘I lose.’”. Since each thread is waiting for the other to wake it up, they stay suspended forever. It is the most romantic of all deadlocks. @@ -265,7 +221,7 @@ Ok, so, why is the target thread trying to suspend you anyway? Well, in a hypot A less obvious reason that the target thread might try to suspend your walking thread is due to the inner workings of the CLR. The CLR suspends application threads to help with things like garbage collection. So if your walker tries to walk (and thus suspend) the thread doing the GC at the same time the thread doing the GC tries to suspend your walker, you are hosed. -The way out, fortunately, is quite simple. The CLR is only going to suspend threads it needs to suspend in order to do its work. Let’s label the two threads involved in your stack walk: Thread A = the current thread (the thread performing the walk), and Thread B = the target thread (the thread whose stack is walked). As long as Thread A has _never executed managed code_ (and is therefore of no use to the CLR during a garbage collection), then the CLR will never try to suspend Thread A. This means it’s safe for your profiler to have Thread A suspend Thread B, as the CLR will have no reason for B to suspend A. +The way out, fortunately, is quite simple. The CLR is only going to suspend threads it needs to suspend in order to do its work. Let’s label the two threads involved in your stack walk: Thread A = the current thread (the thread performing the walk), and Thread B = the target thread (the thread whose stack is walked). As long as Thread A has _never executed managed code_ (and is therefore of no use to the CLR during a garbage collection), then the CLR will never try to suspend Thread A. This means it’s safe for your profiler to have Thread A suspend Thread B, as the CLR will have no reason for B to suspend A. If you’re writing a sampling profiler, it’s quite natural to ensure all of this. You will typically have a separate thread of your own creation that responds to timer interrupts and walks the stacks of other threads. Call this your sampler thread. Since you create this sampler thread yourself and have control over what it executes, the CLR will have no reason to suspend it. And this also fixes the “poorly-written profiler” example above, since this sampler thread is the only thread of your profiler trying to walk or suspend other threads. So your profiler will never try to directly suspend the sampler thread. @@ -281,7 +237,7 @@ Lucky for you, the CLR notifies profilers when a thread is about to be destroyed Rule 2: Block in ThreadDestroyed callback until that thread’s stack walk is complete - + **_GC helps you make a cycle_** @@ -293,25 +249,25 @@ A while back I mentioned that it is clearly a bad idea for your profiler to hold Example #1: -- Thread A successfully grabs and now owns one of your profiler locks -- Thread B = thread doing the GC -- Thread B calls profiler’s GarbageCollectionStarted callback -- Thread B blocks on the same profiler lock -- Thread A executes GetClassFromTokenAndTypeArgs() -- GetClassFromTokenAndTypeArgs tries to trigger a GC, but notices a GC is already in progress. -- Thread A blocks, waiting for GC currently in progress (Thread B) to complete +- Thread A successfully grabs and now owns one of your profiler locks +- Thread B = thread doing the GC +- Thread B calls profiler’s GarbageCollectionStarted callback +- Thread B blocks on the same profiler lock +- Thread A executes GetClassFromTokenAndTypeArgs() +- GetClassFromTokenAndTypeArgs tries to trigger a GC, but notices a GC is already in progress. +- Thread A blocks, waiting for GC currently in progress (Thread B) to complete - But B is waiting for A, because of your profiler lock. ![](media/gccycle.jpg) Example #2: -- Thread A successfully grabs and now owns one of your profiler locks -- Thread B calls profiler’s ModuleLoadStarted callback -- Thread B blocks on the same profiler lock -- Thread A executes GetClassFromTokenAndTypeArgs() -- GetClassFromTokenAndTypeArgs triggers a GC -- Thread A (now doing the GC) waits for B to be ready to be collected +- Thread A successfully grabs and now owns one of your profiler locks +- Thread B calls profiler’s ModuleLoadStarted callback +- Thread B blocks on the same profiler lock +- Thread A executes GetClassFromTokenAndTypeArgs() +- GetClassFromTokenAndTypeArgs triggers a GC +- Thread A (now doing the GC) waits for B to be ready to be collected - But B is waiting for A, because of your profiler lock. ![](media/deadlock.jpg) @@ -332,10 +288,10 @@ Yeah, if you read carefully, you’ll see that this rule never even mentions DoS I’m just about tuckered out, so I’m gonna close this out with a quick summary of the highlights. Here's what's important to remember. -1. Synchronous stack walks involve walking the current thread in response to a profiler callback. These don’t require seeding, suspending, or any special rules. Enjoy! -2. Asynchronous walks require a seed if the top of the stack is unmanaged code not part of a PInvoke or COM call. You supply a seed by directly suspending the target thread and walking it yourself, until you find the top-most managed frame. If you don’t supply a seed in this case, DoStackSnapshot will just return a failure code to you. -3. If you directly suspend threads, remember that only a thread that has never run managed code can suspend another thread -4. When doing asynchronous walks, always block in your ThreadDestroyed callback until that thread’s stack walk is complete +1. Synchronous stack walks involve walking the current thread in response to a profiler callback. These don’t require seeding, suspending, or any special rules. Enjoy! +2. Asynchronous walks require a seed if the top of the stack is unmanaged code not part of a PInvoke or COM call. You supply a seed by directly suspending the target thread and walking it yourself, until you find the top-most managed frame. If you don’t supply a seed in this case, DoStackSnapshot will just return a failure code to you. +3. If you directly suspend threads, remember that only a thread that has never run managed code can suspend another thread +4. When doing asynchronous walks, always block in your ThreadDestroyed callback until that thread’s stack walk is complete 5. Do not hold a lock while your profiler calls into a CLR function that can trigger a GC Finally, a note of thanks to the rest of the CLR Profiling API team, as the writing of these rules is truly a team effort. And special thanks to Sean Selitrennikoff who provided an earlier incarnation of much of this content. diff --git a/docs/design/coreclr/profiling/davbr-blog-archive/ReJIT - The Basics.md b/docs/design/coreclr/profiling/davbr-blog-archive/ReJIT - The Basics.md index 746a45d38c0c..544e0da32d0f 100644 --- a/docs/design/coreclr/profiling/davbr-blog-archive/ReJIT - The Basics.md +++ b/docs/design/coreclr/profiling/davbr-blog-archive/ReJIT - The Basics.md @@ -2,13 +2,13 @@ This post is organized in chronological order, telling what your profiler should be doing at the following times in the process: -- Startup Time -- ModuleLoadFinished Time -- RequestReJIT Time -- Actual ReJIT Time -- RequestRevert Time +- Startup Time +- ModuleLoadFinished Time +- RequestReJIT Time +- Actual ReJIT Time +- RequestRevert Time + - ## Startup Time @@ -22,12 +22,6 @@ Typically, your profiler will also create a new thread at this point, call it yo ## ModuleLoadFinished Time -### - -### - -### - ### Metadata Changes As each module loads, you will likely need to add metadata so that your future ReJITs will have the tokens they need. What you do here heavily depends on the kind of instrumentation you want to do. I’m assuming you’re doing instrumentation that adds some calls from the user code into brand new profiler helper methods you will add somewhere. If you plan to instrument mscorlib, you will likely want to add those profiler helper methods into mscorlib (remember, mscorlib is not allowed to contain an AssemblyRef that points to any other assembly!). Otherwise, perhaps you plan to ship a managed helper assembly that will sit on your user’s disk, and all your profiler helper methods will reside in this on-disk managed helper assembly. @@ -46,19 +40,15 @@ This won’t make much sense until you’ve read the next section, but I’m pla Now imagine your user has turned some dial on your out-of-process GUI, to request that some functions get instrumented (or re-instrumented (or re-re-instrumented (or …))). This results in a signal sent to your in-process profiler component. Your ReJIT Thread now knows it must call **RequestReJIT**. You can call this API once in bulk for a list of functions to ReJIT. Note that functions are expressed in terms of ModuleID + mdMethodDef metadata tokens. A few things to note about this: -- You request that all instantiations of a generic function (or function on a generic class) get ReJITted with a single ModuleID + mdMethodDef pair. You cannot request a specific instantiation be ReJITted, or provide instantiation-specific IL. This is nothing new, as classic first-JIT-instrumentation should never be customized per instantiation either. But the ReJIT API is designed with this restriction in mind, as you’ll see later on. -- ModuleID is specific to one AppDomain for unshared modules, or the SharedDomain for shared modules. Thus: - - If ModuleID is shared, then your request will simultaneously apply to all domains using the shared copy of this module (and thus function) - - If ModuleID is unshared, then your request will apply only to the single AppDomain using this module (and function) - - Therefore, if you want this ReJIT request to apply to _all unshared copies_ of this function: - - You’ll need to include all such ModuleIDs in this request. - - And… any _future_ unshared loads of this module will result in new ModuleIDs. So as those loads happen, you’ll need to make further calls to RequestReJIT with the new ModuleIDs to ensure those copies get ReJITted as well. - - This is optional, and only need be done if you truly want this ReJIT request to apply to all unshared copies of the function. You’re perfectly welcome to ReJIT only those unshared copies you want (and / or the shared copy). - - Now you can re-read the “Re-Request Prior ReJITs” section above. :-) - -## - -### +- You request that all instantiations of a generic function (or function on a generic class) get ReJITted with a single ModuleID + mdMethodDef pair. You cannot request a specific instantiation be ReJITted, or provide instantiation-specific IL. This is nothing new, as classic first-JIT-instrumentation should never be customized per instantiation either. But the ReJIT API is designed with this restriction in mind, as you’ll see later on. +- ModuleID is specific to one AppDomain for unshared modules, or the SharedDomain for shared modules. Thus: + - If ModuleID is shared, then your request will simultaneously apply to all domains using the shared copy of this module (and thus function) + - If ModuleID is unshared, then your request will apply only to the single AppDomain using this module (and function) + - Therefore, if you want this ReJIT request to apply to _all unshared copies_ of this function: + - You’ll need to include all such ModuleIDs in this request. + - And… any _future_ unshared loads of this module will result in new ModuleIDs. So as those loads happen, you’ll need to make further calls to RequestReJIT with the new ModuleIDs to ensure those copies get ReJITted as well. + - This is optional, and only need be done if you truly want this ReJIT request to apply to all unshared copies of the function. You’re perfectly welcome to ReJIT only those unshared copies you want (and / or the shared copy). + - Now you can re-read the “Re-Request Prior ReJITs” section above. :-) ### More on AppDomains @@ -81,18 +71,18 @@ You may have noticed that you have read a whole lot of words so far, but we have IF this is the first generic instantiation to ReJIT, for a given RequestReJIT call (or this is not a generic at all), THEN: - CLR calls **GetReJITParameters** - - This callback passes an ICorProfilerFunctionControl to your profiler. Inside your implementation of GetReJITParameters (and no later!) you may call into ICorProfilerFunctionControl to provide the instrumented IL and codegen flags that the CLR should use during the ReJIT - - Therefore it is here where you may: - - Call GetILFunctionBody - - Add any new LocalVarSigTokens to the function’s module’s metadata. (You may not do any other metadata modifications here, though!) - - Rewrite the IL to your specifications, passing it to ICorProfilerFunctionControl::SetILFunctionBody. - - You may NOT call ICorProfilerInfo::SetILFunctionBody for a ReJIT! This API still exists if you want to do classic first-JIT IL rewriting only. - - Note that GetReJITParameters expresses the function getting compiled in terms of the ModuleID + mdMethodDef pair you previously specified to RequestReJIT, and _not_ in terms of a FunctionID. As mentioned before, you may not provide instantiation-specific IL! + - This callback passes an ICorProfilerFunctionControl to your profiler. Inside your implementation of GetReJITParameters (and no later!) you may call into ICorProfilerFunctionControl to provide the instrumented IL and codegen flags that the CLR should use during the ReJIT + - Therefore it is here where you may: + - Call GetILFunctionBody + - Add any new LocalVarSigTokens to the function’s module’s metadata. (You may not do any other metadata modifications here, though!) + - Rewrite the IL to your specifications, passing it to ICorProfilerFunctionControl::SetILFunctionBody. + - You may NOT call ICorProfilerInfo::SetILFunctionBody for a ReJIT! This API still exists if you want to do classic first-JIT IL rewriting only. + - Note that GetReJITParameters expresses the function getting compiled in terms of the ModuleID + mdMethodDef pair you previously specified to RequestReJIT, and _not_ in terms of a FunctionID. As mentioned before, you may not provide instantiation-specific IL! And then, for all ReJITs (regardless of whether they are for the first generic instantiation or not): -- CLR calls **ReJITCompilationStarted** -- CLR calls **ReJITCompilationFinished** +- CLR calls **ReJITCompilationStarted** +- CLR calls **ReJITCompilationFinished** These callbacks express the function getting compiled in terms of FunctionID + ReJITID. (ReJITID is simply a disambiguating value so that each ReJITted version of a function instantiation can be uniquely identified via FunctionID + ReJITID.) Your profiler doesn’t need to do anything in the above callbacks if it doesn’t want to. They just notify you that the ReJIT is occurring, and get called for each generic instantiation (or non-generic) that gets ReJITted. @@ -114,12 +104,12 @@ Note that RequestRevert allows you to revert back to the original JITted IL, and If there are any errors with performing the ReJIT, you will be notified by the dedicated callback ICorProfilerCallback4::ReJITError(). Errors can happen at a couple times: -- RequestReJIT Time: These are fundamental errors with the request itself. This can include bad parameter values, requesting to ReJIT dynamic (Ref.Emit) code, out of memory, etc. If errors occur here, you’ll get a callback to your implementation of ReJITError(), sandwiched inside your call to RequestReJIT on your ReJIT Thread. -- Actual ReJIT Time: These are errors we don’t encounter until actually trying to ReJIT the function itself. When these later errors occur, your implementation of ReJITError() is called on whatever CLR thread encountered the error. +- RequestReJIT Time: These are fundamental errors with the request itself. This can include bad parameter values, requesting to ReJIT dynamic (Ref.Emit) code, out of memory, etc. If errors occur here, you’ll get a callback to your implementation of ReJITError(), sandwiched inside your call to RequestReJIT on your ReJIT Thread. +- Actual ReJIT Time: These are errors we don’t encounter until actually trying to ReJIT the function itself. When these later errors occur, your implementation of ReJITError() is called on whatever CLR thread encountered the error. You’ll note that ReJITError can provide you not only the ModuleID + mdMethodDef pair that caused the error, but optionally a FunctionID as well. Depending on the nature of the error occurred, the FunctionID may be available, so that your profiler may know the exact generic instantiation involved with the error. If FunctionID is null, then the error was fundamental to the generic function itself (and thus occurred for all instantiations). - + Ok, that about covers it on how your profiler is expected to use ReJIT. As you can see, there are several different tasks your profiler needs to do at different times to get everything right. But I trust you, you’re smart. diff --git a/docs/design/coreclr/profiling/davbr-blog-archive/Sample A Signature Blob Parser for your Profiler.md b/docs/design/coreclr/profiling/davbr-blog-archive/Sample A Signature Blob Parser for your Profiler.md index 34be8594412d..88ab78844b3d 100644 --- a/docs/design/coreclr/profiling/davbr-blog-archive/Sample A Signature Blob Parser for your Profiler.md +++ b/docs/design/coreclr/profiling/davbr-blog-archive/Sample A Signature Blob Parser for your Profiler.md @@ -3,43 +3,46 @@ If your profiler plays with metadata, you've undoubtedly come across signature blobs. They’re used to encode type information for method definitions & references, local variables, and a whole lot more. They’re wonderfully compact, recursively versatile, and sometimes, well, challenging to parse. Fortunately, [Rico Mariani](https://docs.microsoft.com/en-us/archive/blogs/ricom/) was feeling generous one day, and churned out a simple parser that can read these types of signatures: -MethodDefSig -MethodRefSig -StandAloneMethodSig -FieldSig -PropertySig -LocalVarSig - -Here are the files: -[sigparse.cpp](samples/sigparse.cpp) (Rico's signature parser) -[sigformat.cpp](samples/sigformat.cpp) (An example extension to the parser) -[PlugInToYourProfiler.cpp](samples/PlugInToYourProfiler.cpp) (Example code to plug the extension into your profiler) +- MethodDefSig +- MethodRefSig +- StandAloneMethodSig +- FieldSig +- PropertySig +- LocalVarSig + +Here are the files: + +- [sigparse.cpp](samples/sigparse.cpp) (Rico's signature parser) +- [sigformat.cpp](samples/sigformat.cpp) (An example extension to the parser) +- [PlugInToYourProfiler.cpp](samples/PlugInToYourProfiler.cpp) (Example code to plug the extension into your profiler) Open up **sigparse.cpp** in your favorite editor and take a look at the grammar at the top. The grammar comes from the ECMA CLI spec. Jonathan Keljo has a [link](http://blogs.msdn.com/jkeljo/archive/2005/08/04/447726.aspx) to it from his blog. This tells you the types of signature blobs the parser can handle. Sigparse.cpp is structured without any dependencies on any headers, so you can easily absorb it into your profiler project. There are two things you will need to do to make use of the code. I provided examples of each of these in the download above to help you out: -1. You will **extend the code** to make use of the parsed components of the signature however you like. Perhaps you’ll build up your own internal structures based on what you find. Or maybe you’ll build a pretty-printer that displays method prototypes in the managed language of your choice. -2. You will then **call the code** to perform the parse on signature blobs you encounter while profiling. +1. You will **extend the code** to make use of the parsed components of the signature however you like. Perhaps you’ll build up your own internal structures based on what you find. Or maybe you’ll build a pretty-printer that displays method prototypes in the managed language of your choice. +2. You will then **call the code** to perform the parse on signature blobs you encounter while profiling. ## Extending the code Simply derive a new class from SigParser, and override the virtual functions. The functions you override are events to be handled as the parser traverses the signature in top-down fashion. For example, when the parser encounters a MethodDef, you might see calls to your overrides of: -NotifyBeginMethod() - NotifyParamCount() - NotifyBeginRetType() - NotifyBeginType() - NotifyTypeSimple() - NotifyEndType() - NotifyEndRetType() - NotifyBeginParam() - NotifyBeginType() - NotifyTypeSimple() - NotifyEndType() - NotifyEndParam() - _… (more parameter notifications occur here if more parameters exist)_ +``` +NotifyBeginMethod() + NotifyParamCount() + NotifyBeginRetType() + NotifyBeginType() + NotifyTypeSimple() + NotifyEndType() + NotifyEndRetType() + NotifyBeginParam() + NotifyBeginType() + NotifyTypeSimple() + NotifyEndType() + NotifyEndParam() + _… (more parameter notifications occur here if more parameters exist)_ NotifyEndMethod() +``` And yes, generics are handled as well. @@ -60,4 +63,3 @@ Don't worry, it's optional. I mentioned above that only signatures whose grammar The only gotcha is that TypeSpecs & MethodSpecs don’t have a unique byte that introduces them. For example, GENERICINST could indicate the beginning of a TypeSpec or a MethodSpec. You’ll see that SigParser::Parse() switches on the intro byte to determine what it’s looking at. So to keep things simple, you’ll want to add a couple more top-level functions to SigParser to parse TypeSpecs & MethodSpecs (say, ParseTypeSpec() & ParseMethodSpec()). You’d then call those functions instead of Parse() when you have a TypeSpec or MethodSpec on your hands. Of course, if you don’t care about TypeSpecs and MethodSpecs, you can use the code as is and not worry. But this stuff is so much fun, you’ll probably want to add the capability anyway. Hope you find this useful. And thanks again to Rico Mariani for sigparse.cpp! - diff --git a/docs/design/coreclr/profiling/davbr-blog-archive/Tail call JIT conditions.md b/docs/design/coreclr/profiling/davbr-blog-archive/Tail call JIT conditions.md index 194c0ba517aa..ad3e937e8e3a 100644 --- a/docs/design/coreclr/profiling/davbr-blog-archive/Tail call JIT conditions.md +++ b/docs/design/coreclr/profiling/davbr-blog-archive/Tail call JIT conditions.md @@ -8,28 +8,28 @@ _First, Grant talked about the 64-bit JITs (one for x64, one for ia64):_ For the 64-bit JIT, we tail call whenever we’re allowed to. Here’s what prevents us from tail calling (in no particular order): - We inline the call instead (we never inline recursive calls to the same method, but we will tail call them) -- The call/callvirt/calli is followed by something other than nop or ret IL instructions. -- The caller or callee return a value type. -- The caller and callee return different types. -- The caller is synchronized (MethodImplOptions.Synchronized). -- The caller is a shared generic method. -- The caller has imperative security (a call to Assert, Demand, Deny, etc.). -- The caller has declarative security (custom attributes). +- The call/callvirt/calli is followed by something other than nop or ret IL instructions. +- The caller or callee return a value type. +- The caller and callee return different types. +- The caller is synchronized (MethodImplOptions.Synchronized). +- The caller is a shared generic method. +- The caller has imperative security (a call to Assert, Demand, Deny, etc.). +- The caller has declarative security (custom attributes). - The caller is varargs -- The callee is varargs. +- The callee is varargs. - The runtime forbids the JIT to tail call. (_There are various reasons the runtime may disallow tail calling, such as caller / callee being in different assemblies, the call going to the application's entrypoint, any conflicts with usage of security features, and other esoteric cases._) -- The il did not have the tail. prefix and we are not optimizing (the profiler and debugger control this) -- The il did not have the tail. prefix and the caller had a localloc instruction (think alloca or dynamic stack allocation) -- The caller is getting some GS security cookie checks -- The il did not have the tail. prefix and a local or parameter has had its address taken (ldarga, or ldloca) +- The il did not have the tail. prefix and we are not optimizing (the profiler and debugger control this) +- The il did not have the tail. prefix and the caller had a localloc instruction (think alloca or dynamic stack allocation) +- The caller is getting some GS security cookie checks +- The il did not have the tail. prefix and a local or parameter has had its address taken (ldarga, or ldloca) - The caller is the same as the callee and the runtime disallows inlining - The callee is invoked via stub dispatch (_i.e., via intermediate code that's generated at runtime to optimize certain types of calls_). -- For x64 we have these additional restrictions: +- For x64 we have these additional restrictions: - - The callee has one or more parameters that are valuetypes of size 3,5,6,7 or \>8 bytes - - The callee has more than 4 arguments (don’t forget to count the this pointer, generics, etc.) and more than the caller + - The callee has one or more parameters that are valuetypes of size 3,5,6,7 or \>8 bytes + - The callee has more than 4 arguments (don’t forget to count the this pointer, generics, etc.) and more than the caller - For all of the parameters passed on the stack the GC-ness must match between the caller and callee. (_"GC-ness" means the state of being a pointer to the beginning of an object managed by the GC, or a pointer to the interior of an object managed by the GC (e.g., a byref field), or neither (e.g., an integer or struct)._) -- For ia64 we have this additional restriction: +- For ia64 we have this additional restriction: - Any of the callee arguments do not get passed in a register. diff --git a/docs/design/coreclr/profiling/davbr-blog-archive/Type Forwarding.md b/docs/design/coreclr/profiling/davbr-blog-archive/Type Forwarding.md index f503b612cda3..aae3f7d87402 100644 --- a/docs/design/coreclr/profiling/davbr-blog-archive/Type Forwarding.md +++ b/docs/design/coreclr/profiling/davbr-blog-archive/Type Forwarding.md @@ -9,14 +9,12 @@ Type forwarding is nothing new. However, in CLR V4, we are enabling type forwar The example I’ll use where the .NET Framework uses type forwarding is the TimeZoneInfo class. In CLR V4, TimeZoneInfo is now forwarded from System.Core.dll to mscorlib.dll. If you open the CLR V4 copy of System.Core.dll in ildasm and choose Dump, you'll see the following: -| ``` .class extern /*27000004*/ forwarder System.TimeZoneInfo { - .assembly extern mscorlib /*23000001*/ + .assembly extern mscorlib /*23000001*/ } ``` - | In each assembly’s metadata is an exported types table. The above means that System.Core.dll's exported types table includes an entry for System.TimeZoneInfo (indexed by token 27000004). What's significant is that System.Core.dll no longer has a typeDef for System.TimeZoneInfo, only an exported type. The fact that the token begins at the left with 0x27 tells you that it's an mdtExportedType (not a mdtTypeDef, which begins at the left with 0x02). @@ -28,15 +26,15 @@ This walkthrough assumes you have .NET 4.0 or later installed **and** an older r Code up a simple C# app that uses System.TimeZoneInfo: ``` -namespace test -{ - class Class1 - { - static void Main(string[] args) - { - System.TimeZoneInfo ti = null; - } - } +namespace test +{ + class Class1 + { + static void Main(string[] args) + { + System.TimeZoneInfo ti = null; + } + } } ``` @@ -49,7 +47,7 @@ csc /debug+ /o- /r:"C:\Program Files (x86)\Reference Assemblies\Microsoft\Framew Again, be sure you’re using an old csc.exe from, say, a NET 3.5 installation. To verify, open up Class1.exe in ildasm, and take a look at Main(). It should look something like this: ``` -.method /*06000001*/ private hidebysig static +.method /*06000001*/ private hidebysig static void Main(string[] args) cil managed { .entrypoint @@ -70,11 +68,11 @@ Note that, if you were to build the above C# code using the .NET 4.0 C# compiler Ok, so how do we run this pre-.NET 4.0 executable against .NET 4.0? A config file, of course. Paste the following into a file named Class1.exe.config that sits next to Class1.exe: ``` - - - - - + + + + + ``` The above will force Class1.exe to bind against .NET 4.0 Beta 1. And when it comes time to look for TimeZoneInfo, the CLR will first look in System.Core.dll, find the exported types table entry, and then hop over to mscorlib.dll to load the type. What does that look like to your profiler? Make your guess and hold that thought. First, another walkthrough… @@ -83,14 +81,14 @@ The above will force Class1.exe to bind against .NET 4.0 Beta 1. And when it co To experiment with forwarding your own types, the process is: -- Create Version 1 of your library - - - Create version 1 of your library assembly that defines your type (MyLibAssemblyA.dll) - - Create an app that references your type in MyLibAssemblyA.dll (MyClient.exe) -- Create version 2 of your library - - - Recompile MyLibAssemblyA.dll to forward your type elsewhere (MyLibAssemblyB.dll) - - Don’t recompile MyClient.exe. Let it still think the type is defined in MyLibAssemblyA.dll. +- Create Version 1 of your library + + - Create version 1 of your library assembly that defines your type (MyLibAssemblyA.dll) + - Create an app that references your type in MyLibAssemblyA.dll (MyClient.exe) +- Create version 2 of your library + + - Recompile MyLibAssemblyA.dll to forward your type elsewhere (MyLibAssemblyB.dll) + - Don’t recompile MyClient.exe. Let it still think the type is defined in MyLibAssemblyA.dll. ### Version 1 @@ -140,9 +138,9 @@ Ok, time to upgrade! ### Version 2 Time goes by, your library is growing, and its time to split it into two DLLs. Gotta move Foo into the new DLL. Save this into MyLibAssemblyB.cs ``` -using System; -public class Foo -{ +using System; +public class Foo +{ } ``` @@ -170,7 +168,7 @@ Foo, MyLibAssemblyB, Version=0.0.0.0, Culture=neutral, PublicKeyToken=null And this all despite the fact that MyClient.exe still believes that Foo lives in MyLibAssemblyA: ``` -.method /*06000001*/ public hidebysig static +.method /*06000001*/ public hidebysig static void Main() cil managed { .entrypoint @@ -188,7 +186,6 @@ And this all despite the fact that MyClient.exe still believes that Foo lives in IL\_001c: ret } // end of method Test::Main ``` - | ## Profilers @@ -199,5 +196,3 @@ This should make life easy for profilers, since they generally expect to be able However, type forwarding is important to understand if your profiler needs to follow metadata references directly. More generally, if your profiler is reading through metadata and expects to come across a typeDef (e.g., perhaps a metadata reference points to a type in that module, or perhaps your profiler expects certain known types to be in certain modules), then your profiler should be prepared to find an mdtExportedType instead, and to deal gracefully with it rather than doing something silly like crashing. In any case, whether you think your profiler will be affected by type forwarding, be sure to test, test, test! - - \ No newline at end of file diff --git a/docs/design/features/COM-activation.md b/docs/design/features/COM-activation.md index c4302e53f2b1..409d950f045a 100644 --- a/docs/design/features/COM-activation.md +++ b/docs/design/features/COM-activation.md @@ -105,7 +105,7 @@ When `DllGetClassObject()` is called in a COM activation scenario, the following ``` Note this API is not exposed outside of `System.Private.CoreLib` and is subject to change at any time. * The loading of the assembly will take place in a new [`AssemblyLoadContext`](https://docs.microsoft.com/dotnet/api/system.runtime.loader.assemblyloadcontext) for dependency isolation. Each assembly path will get a seperate `AssemblyLoadContext`. This means that if an assembly provides multiple COM servers all of the servers from that assembly will reside in the same `AssemblyLoadContext`. - * The created `AssemblyLoadContext` will use an [`AssemblyDependencyResolver`](https://github.com/dotnet/corefx/issues/33165) that was supplied with the path to the assembly to load assemblies. + * The created `AssemblyLoadContext` will use an [`AssemblyDependencyResolver`](https://github.com/dotnet/runtime/issues/27787) that was supplied with the path to the assembly to load assemblies. 1) The `IClassFactory` instance is returned to the caller of `DllGetClassObject()` to attempt class activation. The `DllCanUnloadNow()` function will always return `S_FALSE` indicating the shim is never able to be unloaded. This matches .NET Framework semantics but may be adjusted in the future if needed. diff --git a/docs/design/features/DotNetCore-SharedPackageStore.md b/docs/design/features/DotNetCore-SharedPackageStore.md index 45b7aaa01828..ad60f8e8a5dd 100644 --- a/docs/design/features/DotNetCore-SharedPackageStore.md +++ b/docs/design/features/DotNetCore-SharedPackageStore.md @@ -24,7 +24,7 @@ The package store can be either a global system-wide folder or a dotnet.exe rela + netcoreapp2.1 + refs + netcoreapp2.0 - + netcoreapp2.1 + + netcoreapp2.1 ``` The layout within `netcoreapp*` folders is a NuGet cache layout. @@ -34,7 +34,7 @@ The layout within `netcoreapp*` folders is a NuGet cache layout. To compose the layout of the shared package store, we will use a dotnet command called `dotnet store`. We expect the *hosting providers* (ex: Antares) to use the command to prime their machines and framework authors who want to provide *pre-optimized package archives* create the compressed archive layouts. -The layout is composed from a list of package names and versions specified as xml: +The layout is composed from a list of package names and versions specified as xml: **Roslyn Example** ```xml @@ -72,7 +72,7 @@ The output folder will be consumed by the runtime by adding to the `DOTNET_SHARE # Building apps with shared packages -The current mechanism to build applications that share assemblies is by not specifying a RID in the project file. Then, a portable app model is assumed and assemblies that are part of Microsoft.NETCore.App are found under the `dotnet` install root. With shared package store, applications have the ability to filter any set of packages from their publish output. Thus the decision of a portable or a standalone application is not made at the time of project authoring but is instead done at publish time. +The current mechanism to build applications that share assemblies is by not specifying a RID in the project file. Then, a portable app model is assumed and assemblies that are part of Microsoft.NETCore.App are found under the `dotnet` install root. With shared package store, applications have the ability to filter any set of packages from their publish output. Thus the decision of a portable or a standalone application is not made at the time of project authoring but is instead done at publish time. ## Project Authoring We will by default treat `Microsoft.NETCore.App` as though `type: platform` is always specified, thus requiring no explicit RID specification by the user. It will be an `ERROR` to specify a RID in the csproj file using the `` tag. diff --git a/docs/design/features/IJW-activation.md b/docs/design/features/IJW-activation.md index 4f63f1327418..4d0fe9c17375 100644 --- a/docs/design/features/IJW-activation.md +++ b/docs/design/features/IJW-activation.md @@ -8,7 +8,7 @@ To support any C++/CLI users that wish to use .NET Core, the runtime and hosting * Load the appropriate version of .NET Core for the assembly if a .NET Core instance is not running, or validate that the currently running .NET Core instance can satisfy the assemblies requirements. * Load the (already-in-memory) assembly into the runtime. * Patch the vtfixup table tokens to point to JIT stubs. - + ## Design IJW activation has a variety of hard problems associated with it, mainly with loading in mixed mode assemblies that are not the application. diff --git a/docs/design/features/Linux-Hugepage-Crossgen2.md b/docs/design/features/Linux-Hugepage-Crossgen2.md index dcc37774b398..32083cc39eba 100644 --- a/docs/design/features/Linux-Hugepage-Crossgen2.md +++ b/docs/design/features/Linux-Hugepage-Crossgen2.md @@ -1,7 +1,7 @@ Configuring Huge Pages for loading composite binaries using CoreCLR on Linux ---- -Huge pages can provide performance benefits to reduce the cost of TLB cache misses when +Huge pages can provide performance benefits to reduce the cost of TLB cache misses when executing code. In general, the largest available wins may be achieved by enabling huge pages for use by the GC, which will dominate the memory use in the process, but in some circumstances, if the application is sufficiently large, there may be a benefit to using @@ -16,7 +16,7 @@ images using the hugetlbfs. Doing some requires several steps. 2. The composite image must be copied into a hugetlbfs filesystem which is visible to the .NET process instead of the composite image being loaded from the normal path. - IMPORTANT: The composite image must NOT be located in the normal path next to the application binary, or that file will be used instead of the huge page version. - The environment variable `COMPlus_NativeImageSearchPaths` must be set to point at the location of the hugetlbfs in use. For instance, `COMPlus_NativeImageSearchPaths` might be set to `/var/lib/hugetlbfs/user/USER/pagesize-2MB` - - As the cp command does not support copying into a hugetlbfs due to lack of support for the write syscall in that file system, a custom copy application must be used. A sample application that may be used to perform this task has a source listing in Appendix A. + - As the cp command does not support copying into a hugetlbfs due to lack of support for the write syscall in that file system, a custom copy application must be used. A sample application that may be used to perform this task has a source listing in Appendix A. 3. The machine must be configured to have sufficient huge pages available in the appropriate huge page pool. The memory requirements of huge page PE loading are as follows. - Sufficient pages to hold the unmodified copy of the composite image in the hugetlbfs. These pages will be used by the initial copy which emplaces the composite image into huge pages. - By default the runtime will map each page of the composite image using a MAP_PRIVATE mapping. This will require that the maximum number of huge pages is large enough to hold a completely separate copy of the image as loaded. @@ -62,7 +62,7 @@ int main(int argc, char** argv) printf("fdSrc fstat failed\n"); return 1; } - + addrSrc = mmap(0, st.st_size, PROT_READ | PROT_WRITE, MAP_SHARED, fdSrc, 0); if (addrSrc == MAP_FAILED) { diff --git a/docs/design/features/OnStackReplacement.md b/docs/design/features/OnStackReplacement.md index 905696875046..757eb05dcba0 100644 --- a/docs/design/features/OnStackReplacement.md +++ b/docs/design/features/OnStackReplacement.md @@ -120,7 +120,7 @@ while the old code is active in some stack frames. An implementation must come up with solutions to several related sub problems, which we describe briefly here, and in more detail below. -* **Patchpoints** : Identify where in the original method OSR is possible. +* **Patchpoints** : Identify where in the original method OSR is possible. We will use the term _patchpoint_ to describe a particular location in a method's code that supports OSR transitions. * **Triggers** : Determine what will trigger an OSR transition @@ -258,13 +258,13 @@ PatchpointHelper(int ppID, int* counter) switch (s) { - case Unknown: - *counter = initialThreshold; + case Unknown: + *counter = initialThreshold; SetState(s, Active); return; - case Active: - *counter = checkThreshold; + case Active: + *counter = checkThreshold; SetState(s, Pending); RequestAlternative(ppID); return; @@ -273,7 +273,7 @@ PatchpointHelper(int ppID, int* counter) *counter = checkThreshold; return; - case Ready: + case Ready: Transition(...); // does not return } } @@ -483,7 +483,7 @@ this is to just leave the original frame in place, and have the OSR frame #### 3.4.1 The Prototype The original method conditionally calls to the patchpoint helper at -patchpoints. The helper will return if there is no transition. +patchpoints. The helper will return if there is no transition. For a transition, the helper will capture context and virtually unwind itself and the original method from the stack to recover callee-save register values @@ -554,7 +554,7 @@ frame pointers. When control is executing in a funclet there are effectively two activation records on the stack that share a single frame: the parent frame and the funclet frame. The funclet frame is largely a stub frame and most of the frame -state is kept in the parent frame. +state is kept in the parent frame. These two frames are not adjacent; they are separated by some number of runtime frames. This means it is going to be difficult for our system to handle @@ -799,7 +799,7 @@ G_M6138_IG03: FFC9 dec ecx 894DF0 mov dword ptr [rbp-10H], ecx 837DF000 cmp dword ptr [rbp-10H], 0 // ... > 0 ? - 7F0E jg SHORT G_M6138_IG05 + 7F0E jg SHORT G_M6138_IG05 G_M6138_IG04: ;; bbWeight=0.01 488D4DF0 lea rcx, bword ptr [rbp-10H] // &patchpointCounter @@ -910,7 +910,7 @@ For example: 5F pop rdi 4883C448 add rsp, 72 5D pop rbp - C3 ret + C3 ret ``` with unwind info: ``` diff --git a/docs/design/features/PinnedHeap.md b/docs/design/features/PinnedHeap.md index d9d2ffa30047..81c76e620804 100644 --- a/docs/design/features/PinnedHeap.md +++ b/docs/design/features/PinnedHeap.md @@ -42,7 +42,7 @@ An object allocated on the pinned heap can be referenced by other objects normal ## API - allocating an array on the pinned heap -For users who want to allocate their objects pinned, we provide [a new API](https://github.com/dotnet/corefx/issues/31787) to allocate such an object. The API declaration is the following: +For users who want to allocate their objects pinned, we provide [a new API](https://github.com/dotnet/runtime/issues/27146) to allocate such an object. The API declaration is the following: ```csharp class GC @@ -154,7 +154,7 @@ It might make sense to provide an additional API to allocate an array of these o **Alignment support** -However, there is another scenario for high perf that could warrant a generational pinned heap which is objects with a specified alignment mostly [for SIMD operations](https://github.com/dotnet/corefx/issues/22790) or aligning on cache lines to avoid false sharing. This scenario doesn’t imply the object always needs to be pinned, however being pinned does mean they would be convenient for interop. For example, matrix multiplication with SIMD where the matrix is also used in native code. It also isn’t clear the object is necessarily long lived, however we do make the assumption that the amount of memory occupied by pinned objects should be small compared to non pinned objects so treating all of them as part of gen2 is acceptable. A different design option would be to allocate these on the normal heap but keep their alignment when compacting but this makes compaction slower. +However, there is another scenario for high perf that could warrant a generational pinned heap which is objects with a specified alignment mostly [for SIMD operations](https://github.com/dotnet/runtime/issues/22990) or aligning on cache lines to avoid false sharing. This scenario doesn’t imply the object always needs to be pinned, however being pinned does mean they would be convenient for interop. For example, matrix multiplication with SIMD where the matrix is also used in native code. It also isn’t clear the object is necessarily long lived, however we do make the assumption that the amount of memory occupied by pinned objects should be small compared to non pinned objects so treating all of them as part of gen2 is acceptable. A different design option would be to allocate these on the normal heap but keep their alignment when compacting but this makes compaction slower. For alignment support, we can also limit it to a few specific alignments if it makes the implementation noticeably easier – 16, 32, 64, 128 bytes, and possibly page size (unclear if this is really needed). diff --git a/docs/design/features/StringDeduplication.md b/docs/design/features/StringDeduplication.md index edd3d0abd9a7..fa69d2c21b88 100644 --- a/docs/design/features/StringDeduplication.md +++ b/docs/design/features/StringDeduplication.md @@ -14,18 +14,18 @@ Dedup - string deduplication is often shortened to dedup in this document. This is an opt-in feature and should have no performance penalty when it’s off. And by default it’s off. -When it’s on, we aim to – +When it’s on, we aim to – - Only deduplicate strings in old generations of the GC heap. - Not increase the STW pauses for ephemeral GCs. - Not regress string allocation speed. -- Provide static analysis and runtime checks to detect patterns incompatible with string deduping. This is required to enable customers to opt-in into this feature with confidence. +- Provide static analysis and runtime checks to detect patterns incompatible with string deduping. This is required to enable customers to opt-in into this feature with confidence. ## Details -#### **History** +#### **History** -The string deduplication feature has been brought up before. See [runtime issue #9022](https://github.com/dotnet/runtime/issues/9022) for discussion. +The string deduplication feature has been brought up before. See [runtime issue #9022](https://github.com/dotnet/runtime/issues/9022) for discussion. And a proof of concept was gracefully [attempted](https://github.com/dotnet/coreclr/pull/15135) by [@Rattenkrieg](https://github.com/Rattenkrieg) before. But it was incomplete and the design didn’t have the kind of perf characteristics desired – it had most of the logic in GC vs outside GC. @@ -33,9 +33,9 @@ An example of a user implemented string deduplication is Roslyn’s [StringTable #### **Customer impact estimation and validation** -As a general rule we want to have this for all features we add to the runtime. +As a general rule we want to have this for all features we add to the runtime. -Issue #[9022](https://github.com/dotnet/runtime/issues/9022) It mentioned some general data: +Issue #[9022](https://github.com/dotnet/runtime/issues/9022) It mentioned some general data: “The expectation is that typical apps have 20% of their GC heap be strings. Some measurements we have seen is that for at least some applications, 10-30% of strings all may be duplicated, so this might save 2-3% of the GC heap. Not huge, but the feature is not that difficult either.” @@ -48,9 +48,9 @@ There are 2 sources of data we could get – #### **Design outline** -This is an opt in feature. When the runtime detects it’s turned on, it creates a dedup thread to do the work. +This is an opt in feature. When the runtime detects it’s turned on, it creates a dedup thread to do the work. -Detection of duplicated strings is done by looking into a hash table. The key into this hash table is the hash code of the content of a string. Detailed description of this detection is later in this doc. +Detection of duplicated strings is done by looking into a hash table. The key into this hash table is the hash code of the content of a string. Detailed description of this detection is later in this doc. As the dedup thread goes through the old generations linearly, it looks for references to a string object (denoted by the method table) and either calculates or looks up the hash code of that string to see if it already exists in the hash table. If so it will attempt to change the reference to point to that string with a CAS operation. If this fails, which means some other thread changed the reference at the mean time, we simply ignore this and move on. We expect the CAS failure rate to be very low. @@ -58,17 +58,17 @@ Since the new string reference we will write to the heap has the exact same type The dedup hash table acts as weak references to the strings. Depending on the scenario we might choose to null out these weak references or not (if it’s more performant to rebuild the hash table). If we we do the former these weak references would be treated as short weak handles so the following will happen before we scan for finalization - -- During BGC final mark phase we will need to null out the strings that are not marked in the hash table. This can be made concurrent. +- During BGC final mark phase we will need to null out the strings that are not marked in the hash table. This can be made concurrent. - During a full blocking GC we will need to null out the strings that are not marked in the hash table, and relocate the ones that got promoted if we are doing a compacting GC. **Alternate design points** -- Should we create multiple threads to do the work? +- Should we create multiple threads to do the work? -Deduping can be done leisurely, so it doesn’t merit having multiple threads. +Deduping can be done leisurely, so it doesn’t merit having multiple threads. -- Can we use an existing thread to do the work on? +- Can we use an existing thread to do the work on? The finalizer thread is something that’s idling most of the time. However there are already plenty of types of work scheduled to potentially run on the finalizer thread so adding yet another thing, especially an opt in feature, can get messy. @@ -80,9 +80,9 @@ Only strings allocated on the managed heap will be considered for deduplication. Currently calling GetHashCode of a string calculates a 32-bit hash code. This is not stored anywhere, unlike the default hash code that’s stored either in the syncblk or a syncblk entry, depending whether the syncblk is also used by something else like locking. As the deduping thread goes through the heap it will calculate the 32-bit hash code and actually install it. -However, a 32-bit hash code means we always need to check for collision by actually comparing the string content if the hash code is the same. And for large strings having to compare the string content could be very costly. For LOH compaction we already allocate a padding object for each large object (which currently takes up at most 0.4% of LOH space on 64-bit). We could make this padding object 1-ptr size larger and store the address of the string it’s deduped too. Likewise we can also use this to store the fact “this is the copy the hash table keeps track of so no need to dedup”. This way we can avoid having to do the detection multiple times for the same string. Below illustrates a scenario where large strings are deduplicated. +However, a 32-bit hash code means we always need to check for collision by actually comparing the string content if the hash code is the same. And for large strings having to compare the string content could be very costly. For LOH compaction we already allocate a padding object for each large object (which currently takes up at most 0.4% of LOH space on 64-bit). We could make this padding object 1-ptr size larger and store the address of the string it’s deduped too. Likewise we can also use this to store the fact “this is the copy the hash table keeps track of so no need to dedup”. This way we can avoid having to do the detection multiple times for the same string. Below illustrates a scenario where large strings are deduplicated. -`pad | s0 | pad | s1 | pad | s0_1` +`pad | s0 | pad | s1 | pad | s0_1` `obj0 (-> s0) | obj1 (-> s0_1) | obj2 (->s1) | obj3 (->s0_1) | obj4 (->s1) ` @@ -90,12 +90,12 @@ Each string obj, ie, s*, is a string on LOH and has a padding object in front of s0_1 has the same content as s0. s1 has the same hash code but not the same content. -"obj->s" means obj points to a string object s, or has a ref to s. So obj0 has a ref to s0, obj1 has a ref to s0_1, obj2 has a ref to s1 and so on. +"obj->s" means obj points to a string object s, or has a ref to s. So obj0 has a ref to s0, obj1 has a ref to s0_1, obj2 has a ref to s1 and so on. 1. As we go through the heap, we see obj0 which points to s0. 2. s0’s hash is calculated which we use to look into the hash table. 3. We see that no entries exist for that hash so we create an entry for it and in s0’s padding indicates that it’s stored in the hash table, ie, it’s the copy we keep. -4. Then we see obj1 which points to s0_1 whose hash doesn’t exist yet. We calculate the hash for s0_1, and see that there’s an entry for this hash already in the hash table, now we compare the content and see that it’s the same, now we store s0 in the padding object before s0_1 and change obj1’s ref to point to s0. +4. Then we see obj1 which points to s0_1 whose hash doesn’t exist yet. We calculate the hash for s0_1, and see that there’s an entry for this hash already in the hash table, now we compare the content and see that it’s the same, now we store s0 in the padding object before s0_1 and change obj1’s ref to point to s0. 5. Then we see obj2 and calculate s1’s hash. We notice an entry already exists for that hash so we compare the content and the content is not the same as s0’s. So we enter s1 into the hash table and indicate that it’s stored in the hash table. 6. Then we see obj3, and s0_1 indicates that it should be deduped to s0 so we change obj3’s ref to point to s0_1 right away. 7. Then we see obj4 which points to s1 and s1 indicates it’s stored in the hash table so we don’t need to dedup. @@ -106,11 +106,11 @@ Since we know the size of a string object trivially, we know which strings are o - If `InterlockCompareExchangePointer` fails because the ref was modified while we were finding a copy to dedup to (or insert into the hash table), we skip this ref. - If too many collisions exist for a hash code, we skip deduping for strings with that hash code. This avoids the DoS attack by creating too many strings for the same hash. -- If the string is too large. At some point going through a very large string to calculate its hash code will become simply not worth the effort. We'll need to do some perf investigation to figure out a good limit. +- If the string is too large. At some point going through a very large string to calculate its hash code will become simply not worth the effort. We'll need to do some perf investigation to figure out a good limit. **Alternate design points** -- Should we calculate the hash codes for SOH strings as gen1 GCs promote them into gen2? +- Should we calculate the hash codes for SOH strings as gen1 GCs promote them into gen2? This would increase gen1 pause. @@ -132,7 +132,7 @@ The following scenarios become problematic or more problematic when deduping is - Mutating the string content -Strings are supposed to be immutable. However in unsafe code you can change the string content after it’s created. Changing string content already asking for trouble without deduping – you could be changing the interned copy which means you are modifying someone else’s string which could cause completely unpredictable results for them. +Strings are supposed to be immutable. However in unsafe code you can change the string content after it’s created. Changing string content already asking for trouble without deduping – you could be changing the interned copy which means you are modifying someone else’s string which could cause completely unpredictable results for them. The most common way is to use the fixed keyword: @@ -143,7 +143,7 @@ fixed (char* p = str) } ``` -There are other ways such as +There are other ways such as `((char*)(gcHandlePointingToString.AddrOfPinnedObject())) = 'c';` @@ -153,7 +153,7 @@ Or - Locking on a string -Locking on a string object is already discouraged due to a string can be interned. Having string dedup on can make this problematic more often if the string you called lock on is now deduped to a different string object. +Locking on a string object is already discouraged due to a string can be interned. Having string dedup on can make this problematic more often if the string you called lock on is now deduped to a different string object. - Reference equality @@ -187,7 +187,7 @@ To start with we will provide analysis for the following – I’m seeing that there are almost 600 places in libraries that do `fixed (char*` but hopefully most of them do not actually modify the string content. We should definitely be encouraging folks to switch to using `string.Create` like what [PR#31700](https://github.com/dotnet/runtime/pull/31700) did. -2. Using lock on a string object. +2. Using lock on a string object. - Reference equality checks on strings @@ -195,13 +195,13 @@ Since `ReferenceEquals` is performance critical API, we cannot do checks in its We do have some libraries that rely on `ReferenceEquals`. We need to figure out what to do about them. See discussion [here](https://github.com/dotnet/runtime/pull/31971#pullrequestreview-355531406). -- Additional checks in heap verification +- Additional checks in heap verification Heap verification will now include checks to verify that no one changes the string content after it’s hash is computed. This can be turned on when a certain level of COMPlus_HeapVerify is specified. - Stress mode -Instead of waiting till the productive moment to start the next deduping cycle, we can have a stress mode where we dedup randomly to catch problems sooner, same idea as GC stress to detect GC holes sooner. +Instead of waiting till the productive moment to start the next deduping cycle, we can have a stress mode where we dedup randomly to catch problems sooner, same idea as GC stress to detect GC holes sooner. We could even artificially create duplicates in this stress mode to find places that depend on object identity. @@ -219,8 +219,8 @@ We might see some performance gains using RTM (Restricted Transactional Memory) **Deduping other types of objects** -We might consider to not limit deduping to just strings. There was a discussion in [runtime issue #12628](https://github.com/dotnet/runtime/issues/12628). +We might consider to not limit deduping to just strings. There was a discussion in [runtime issue #12628](https://github.com/dotnet/runtime/issues/12628). -**Deduping long lived references on stack** +**Deduping long lived references on stack** -There might be merit to look into deduping long lived refs on the stack. The amount of work it requires and the return makes it low priority but it may help with some corner cases. \ No newline at end of file +There might be merit to look into deduping long lived refs on the stack. The amount of work it requires and the return makes it low priority but it may help with some corner cases. diff --git a/docs/design/features/additional-deps.md b/docs/design/features/additional-deps.md index b7046ea7067c..2bc3e59492c8 100644 --- a/docs/design/features/additional-deps.md +++ b/docs/design/features/additional-deps.md @@ -52,9 +52,7 @@ The proposal for this is to "roll-backwards" starting with the "found" version. #### Roll-forward uses app's TFM -A secondary issue with with the store's naming convention for framework. It contains a path such as: - `\dotnet\store\x64\netcoreapp2.0\microsoft.applicationinsights\2.4.0` -where 'netcoreapp2.0' is a "tfm" (target framework moniker). During roll-forward cases, the tfm is still the value specified in the app's runtimeconfig. The host only includes store folders that match that tfm, so it may not find packages from other deps files that were generated off a different tfm. In addition, with the advent of multiple frameworks, it makes it cumbersome to be forced to install to every tfm because multiple frameworks may use the same package, and because each package is still identified by an exact version. +A secondary issue with with the store's naming convention for framework. It contains a path such as: `\dotnet\store\x64\netcoreapp2.0\microsoft.applicationinsights\2.4.0` where 'netcoreapp2.0' is a "tfm" (target framework moniker). During roll-forward cases, the tfm is still the value specified in the app's runtimeconfig. The host only includes store folders that match that tfm, so it may not find packages from other deps files that were generated off a different tfm. In addition, with the advent of multiple frameworks, it makes it cumbersome to be forced to install to every tfm because multiple frameworks may use the same package, and because each package is still identified by an exact version. The proposal for this is to add an "any" tfm. @@ -80,9 +78,8 @@ Where "found" means the version that is being used at run time including roll-fo ## 2.1 proposal (add an "any" tfm to store) For example, `\dotnet\store\x64\any\microsoft.applicationinsights\2.4.0` - -The `any` tfm would be used if the specified tfm (e.g. netcoreapp2.0) is not found: - `\dotnet\store\x64\netcoreapp2.0\microsoft.applicationinsights\2.4.0` + +The `any` tfm would be used if the specified tfm (e.g. netcoreapp2.0) is not found: `\dotnet\store\x64\netcoreapp2.0\microsoft.applicationinsights\2.4.0` _Possible risk: doesn't this make "uninstall" more difficult? Because multiple installs may write the same packages and try to remove packages that another installer created?_ @@ -95,7 +92,7 @@ The current ordering for resolving deps files is: 1) The app's deps file 2) The additional-deps file(s) 3) The framework(s) deps file(s) - + The order is important because "first-in" wins. Since the additional-deps is before the framework, the additional-deps will "win" in all cases except during a minor\major roll-forward. The reason minor\major roll-forward is different is because the framework has special logic (new in 2.1) to compare assembly and file version numbers from the deps files, and pick the newest. The proposed ordering change for 2.1 is: @@ -108,7 +105,7 @@ In addition, the additional-deps will always look for assembly and file version ## 2.1 proposal (add runtimeconfig knob to to disable `%DOTNET_ADDITIONAL_DEPS%`) Add an `additionalDepsLookup` option to the runtimeconfig with these values: - + 0) The `%DOTNET_ADDITIONAL_DEPS%` is not used 1) `DOTNET_ADDITIONAL_DEPS` is used (the default) diff --git a/docs/design/features/arm64-intrinsics.md b/docs/design/features/arm64-intrinsics.md index 67aff026878d..e814833ad126 100644 --- a/docs/design/features/arm64-intrinsics.md +++ b/docs/design/features/arm64-intrinsics.md @@ -289,7 +289,7 @@ This document will refer to half precision floating point as `Half`. + CoreCLR and `CIL` in general do not have general support for a `Half` type + There is an open request to expose `Half` intrinsics + There is an outstanding proposal to add `System.Half` to support this request -https://github.com/dotnet/corefx/issues/25702 +https://github.com/dotnet/runtime/issues/936 + Implementation of `Half` features will be adjusted based on + Implementation of the `System.Half` proposal + Availability of supporting hardware (extensions) diff --git a/docs/design/features/code-versioning-profiler-breaking-changes.md b/docs/design/features/code-versioning-profiler-breaking-changes.md index 7a7add6323b0..760623741cb6 100644 --- a/docs/design/features/code-versioning-profiler-breaking-changes.md +++ b/docs/design/features/code-versioning-profiler-breaking-changes.md @@ -26,4 +26,4 @@ Code versioning, and in particular its use for tiered compilation means that the 2. The timing of ReJITCompilationFinished has been adjusted to be slightly earlier (after the new code body is generated, but prior to updating the previous jitted code to modify control flow). This raises a slim possibility for a ReJIT error to be reported after ReJITCompilationFinished in the case of OOM or process memory corruption. -There are likely some other variations of the changed behavior I haven't thought of yet, but if further testing, code review, or discussion brings it to the surface I'll add it here. Feel free to get in touch on github (@noahfalk), or if you have anything you want to discuss in private you can email me at noahfalk AT microsoft.com \ No newline at end of file +There are likely some other variations of the changed behavior I haven't thought of yet, but if further testing, code review, or discussion brings it to the surface I'll add it here. Feel free to get in touch on github (@noahfalk), or if you have anything you want to discuss in private you can email me at noahfalk AT microsoft.com diff --git a/docs/design/features/covariant-return-methods.md b/docs/design/features/covariant-return-methods.md index 3c9ac8c4db89..9416253ebf06 100644 --- a/docs/design/features/covariant-return-methods.md +++ b/docs/design/features/covariant-return-methods.md @@ -2,7 +2,7 @@ Covariant return methods is a runtime feature designed to support the [covariant return types](https://github.com/dotnet/csharplang/blob/master/proposals/covariant-returns.md) and [records](https://github.com/dotnet/csharplang/blob/master/proposals/records.md) C# language features posed for C# 9.0. -This feature allows an overriding method to have a return type that is different than the one on the method it overrides, but compatible with it. The type compability rules are defined in ECMA I.8.7.1. Example: using a more derived return type. +This feature allows an overriding method to have a return type that is different than the one on the method it overrides, but compatible with it. The type compability rules are defined in ECMA I.8.7.1. Example: using a more derived return type. Covariant return methods can only be described through MethodImpl records, and as an initial implementation will only be applicable to methods on reference types. Methods on interfaces and value types will not be supported (may be supported later in the future). @@ -24,9 +24,9 @@ During enumeration of MethodImpls on a type (`MethodTableBuilder::EnumerateMetho + Load the `TypeHandle` of the return type of the method on base type. + Load the `TypeHandle` of the return type of the method on the current type being validated. + Verify that the second `TypeHandle` is compatible with the first `TypeHandle` using the `MethodTable::CanCastTo()` API. If they are not compatible, a TypeLoadException is thrown. - + The only exception where `CanCastTo()` will return true for an incompatible type according to the ECMA rules is for structs implementing interfaces, so we explicitly check for that case and throw a TypeLoadException if we hit it. - + Once a method is flagged for return type checking, every time the vtable slot containing that method gets overridden on a derived type, the new override will also be checked for compatiblity. This is to ensure that no derived type can implicitly override some virtual method that has already been overridden by some MethodImpl with a covariant return type. ### VTable Slot Unification @@ -64,7 +64,7 @@ This slot unification step will also take place during the last step of type loa An interface method may be both non-final and have a MethodImpl that declares that it overrides another interface method. If it does, NO other interface method may .override it. Instead further overrides must override the method that it overrode. Also the overriding method may only override 1 method. The default interface method resolution algorithm shall change from: - + ``` console Given interface method M and type T. Let MSearch = M diff --git a/docs/design/features/crossgen2-compilation-structure-enhancements.md b/docs/design/features/crossgen2-compilation-structure-enhancements.md index bf164d89fd0b..7f993df95e88 100644 --- a/docs/design/features/crossgen2-compilation-structure-enhancements.md +++ b/docs/design/features/crossgen2-compilation-structure-enhancements.md @@ -59,7 +59,7 @@ Note, this approach is probably more complete than we will finish in one release For non-generic code this is straightforward. Either compile all the non-generic code in the binary, or compile only that which is specified via a profile guided optimization step. This choice shall be driven by a per "input assembly" switch as in the presence of a composite R2R image we likely will want to have different policy for different assemblies, as has proven valuable in the past. Until proven otherwise, per assembly specification of this behavior shall be considered to be sufficient. -We shall set a guideline for how much generic code to generate, and the amount of generic code to generate shall be gated as a multiplier of the amount of non-generic code generated. +We shall set a guideline for how much generic code to generate, and the amount of generic code to generate shall be gated as a multiplier of the amount of non-generic code generated. For generic code we also need a per assembly switch to adjust between various behaviors, but the proposal is as follows: @@ -87,7 +87,7 @@ Runtime Layer Each layer in this stack will be compiled as a consistent set of crossgen2 compilations. -I propose to reduce the generics duplication problem to allow duplication between layers, but not within a layer. There are two ways to do this. The first of which is to produce composite R2R images for a layer. Within a single composite R2R image generation, running heuristics and generating generics eagerly should be straightforward. This composite R2R image would have all instantiations statically computed that are local to that particular layer of compilation, and also any instantiations from other layers. The duplication problem would be reduced in that a single analysis would trigger these multi-layer dependent compilations, and so which there may be duplication between layers, there wouldn't be duplication within a layer. And given that the count of layers is not expected to exceed 3 or 4, that duplication will not be a major concern. +I propose to reduce the generics duplication problem to allow duplication between layers, but not within a layer. There are two ways to do this. The first of which is to produce composite R2R images for a layer. Within a single composite R2R image generation, running heuristics and generating generics eagerly should be straightforward. This composite R2R image would have all instantiations statically computed that are local to that particular layer of compilation, and also any instantiations from other layers. The duplication problem would be reduced in that a single analysis would trigger these multi-layer dependent compilations, and so which there may be duplication between layers, there wouldn't be duplication within a layer. And given that the count of layers is not expected to exceed 3 or 4, that duplication will not be a major concern. The second approach is to split compilation up into assembly level units, run the heuristics per assembly, generate the completely local generics in the individual assemblies, and then nominate a final mop up assembly that consumes a series of data files produced by the individual assembly compilations and holds all of the stuff that didn't make sense in the individual assemblies. In my opinion this second approach would be better for debug builds, but the first approach is strictly better for release builds, and really shouldn't be terribly slow. diff --git a/docs/design/features/event-counter.md b/docs/design/features/event-counter.md index 29fa231f03ae..935a42b47f86 100644 --- a/docs/design/features/event-counter.md +++ b/docs/design/features/event-counter.md @@ -108,4 +108,4 @@ For EventCounter and PollingCounter we expect simple viewers to use the display ### Metadata -To add any optional metadata about the counters that we do not already provide a way of encoding, users can call the `AddMetaData(string key, string value)` API. This API exists on all variants of the Counter APIs, and allows users to add one or many key-value pairs of metadata, which is dumped to the Payload as a comma-separated string value. This API exists so that users can add any metadata about their Counter that is not known to us and is different from the ones we provide by default (i.e. `DisplayName`, `CounterType`, `DisplayRateTimeScale`). \ No newline at end of file +To add any optional metadata about the counters that we do not already provide a way of encoding, users can call the `AddMetaData(string key, string value)` API. This API exists on all variants of the Counter APIs, and allows users to add one or many key-value pairs of metadata, which is dumped to the Payload as a comma-separated string value. This API exists so that users can add any metadata about their Counter that is not known to us and is different from the ones we provide by default (i.e. `DisplayName`, `CounterType`, `DisplayRateTimeScale`). diff --git a/docs/design/features/framework-version-resolution.md b/docs/design/features/framework-version-resolution.md index cd20470ce34f..349d0f250191 100644 --- a/docs/design/features/framework-version-resolution.md +++ b/docs/design/features/framework-version-resolution.md @@ -30,8 +30,7 @@ In the `.runtimeconfig.json` these values are defined like this: ``` #### Framework name -Each framework reference identifies the framework by its name. -Framework names are case sensitive (since they're used as folder names even on Linux systems). +Each framework reference identifies the framework by its name. Framework names are case sensitive (since they're used as folder names even on Linux systems). #### Version Framework version must be a [SemVer V2](https://semver.org) valid version. @@ -146,13 +145,15 @@ Pros Cons * Testing behavior of new releases with pre-release versions is not fully possible (see below). -* Some special cases don't work: -One special case which would not work: -*Component A which asks for `2.0.0 LatestMajor` is loaded first on a machine which has `3.0.0` and also `3.1.0-preview` installed. Because it's the first in the process it will resolve the runtime according to the above rules - that is prefer release version - and thus will select `3.0.0`. -Later on component B is loaded which asks for `3.1.0-preview LatestMajor` (for example the one in active development). This load will fail since `3.0.0` is not enough to run this component. -Loading the components in reverse order (B first and then A) will work since the `3.1.0-preview` runtime will be selected.* +* Some special cases don't work. -Modification to automatic roll forward to latest patch: + One special case which would not work: + *Component A which asks for `2.0.0 LatestMajor` is loaded first on a machine which has `3.0.0` and also `3.1.0-preview` installed. Because it's the first in the process it will resolve the runtime according to the above rules - that is prefer release version - and thus will select `3.0.0`.* + + *Later on component B is loaded which asks for `3.1.0-preview LatestMajor` (for example the one in active development). This load will fail since `3.0.0` is not enough to run this component.* + *Loading the components in reverse order (B first and then A) will work since the `3.1.0-preview` runtime will be selected.* + +Modification to automatic roll forward to latest patch: Existing behavior is to find a matching framework based on the above rules and then apply roll forward to latest patch (except if `Disable` is specified). The new behavior should be: * If the above rules find a matching pre-release version of a framework, then automatic roll forward to latest patch is not applied. * If the above rules find a matching release version of a framework, automatic roll forward to latest patch is applied. @@ -218,12 +219,12 @@ Items lower in the list override those higher in the list. At each precedence sc This setting is also described in [roll-forward-on-no-candidate-fx](roll-forward-on-no-candidate-fx.md). It can be specified as a property either for the entire `.runtimeconfig.json` or per framework reference (it has no environment variable of command line argument). It disables rolling forward to the latest patch. The host will compute effective value of `applyPatches` for each framework reference. -The `applyPatches` value is only considered if the effective `rollForward` value for a given framework reference is +The `applyPatches` value is only considered if the effective `rollForward` value for a given framework reference is * `LatestPatch` * `Minor` * `Major` -For the other values `applyPatches` is ignored. +For the other values `applyPatches` is ignored. *This is to maintain backward compatibility with `rollForwardOnNoCandidateFx`. `applyPatches` is now considered obsolete.* If `applyPatches` is set to `true` (the default), then the roll-forward rules described above apply fully. @@ -259,7 +260,7 @@ There's a direct mapping from the `rollForward` setting to the internal represen | `rollForward` | `version_compatibility_range` | `roll_to_highest_version` | | --------------------- | ----------------------------- | ------------------------------------------ | | `Disable` | `exact` | `false` | -| `LatestPatch` | `patch` | `false` (always picks latest patch anyway) | +| `LatestPatch` | `patch` | `false` (always picks latest patch anyway) | | `Minor` | `minor` | `false` | | `LatestMinor` | `minor` | `true` | | `Major` | `major` | `false` | @@ -306,7 +307,7 @@ Steps * By doing this for all `framework references` here, before the next loop, we minimize the number of re-try attempts. 4. For each `framework reference` in `config fx references`: 5. --> If the framework's `name` is not in `resolved frameworks` Then resolve the `framework reference` to the actual framework on disk: - * If the framework `name` already exists in the `effective fx references` reconcile the currently processed `framework reference` with the one from the `effective fx references` (see above for the algorithm). + * If the framework `name` already exists in the `effective fx references` reconcile the currently processed `framework reference` with the one from the `effective fx references` (see above for the algorithm). *Term "reconcile framework references" is used for this in the code, this used to be called "soft-roll-forward" as well.* * The reconciliation will always pick the higher `version` and will merge the `rollForward` and `applyPatches` settings. * The reconciliation may fail if it's not possible to roll forward from one `framework reference` to the other. @@ -368,7 +369,7 @@ This might be more of an issue for components (COM and such), which we will reco The above proposal will impact behavior of existing apps (because framework resolution is in `hostfxr` which is global on the machine for all frameworks). This is a description of the changes as they apply to apps using either default settings, `rollForwardOnNoCandidateFx` or `applyPatches`. ### Fixing ordering issues -In 2.* the algorithm had a bug in it which caused it to resolve different version depending solely on the order of framework references. Consider this example: +In 2.* the algorithm had a bug in it which caused it to resolve different version depending solely on the order of framework references. Consider this example: `Microsoft.NETCore.App` is available on the machine with versions `2.1.1` and `2.1.2`. diff --git a/docs/design/features/globalization-invariant-mode.md b/docs/design/features/globalization-invariant-mode.md index 3112ce7dd42a..f3c4132fcc0d 100644 --- a/docs/design/features/globalization-invariant-mode.md +++ b/docs/design/features/globalization-invariant-mode.md @@ -1,5 +1,5 @@ # .NET Core Globalization Invariant Mode - + Author: [Tarek Mahmoud Sayed](https://github.com/tarekgh) The globalization invariant mode - new in .NET Core 2.0 - enables you to remove application dependencies on globalization data and [globalization behavior](https://docs.microsoft.com/en-us/dotnet/standard/globalization-localization/). This mode is an opt-in feature that provides more flexibility if you care more about reducing dependencies and the size of distribution than globalization functionality or globalization-correctness. @@ -17,7 +17,7 @@ The following scenarios are affected when the invariant mode is enabled. Their i - Time Zone display name on Linux ## Background - + Globalization rules and the data that represents those rules frequently change, often due to country-specific policy changes (for example, changes in currency symbol, sorting behavior or time zones). Developers expect globalization behavior to always be current and for their applications to adapt to new data over time. In order to keep up with those changes, .NET Core (and the .NET Framework, too) depends on the underlying OS to keep up with these changes. Relying on the underlying OS for globalization data has the following benefits: @@ -32,11 +32,11 @@ Globalization support has the following potential challenges for applications: * Installing/carrying the [ICU](http://icu-project.org) package on Linux (~28 MB). Note: On Linux, .NET Core relies on globalization data from ICU. For example, [.NET Core Linux Docker images](https://github.com/dotnet/dotnet-docker/blob/master/2.0/runtime-deps/stretch/amd64/Dockerfile) install this component. Globalization data is available on Windows and macOS as part of their base installs. - + ## Cultures and culture data - -When enabling the invariant mode, all cultures behave like the invariant culture. The invariant culture has the following characteristics: - + +When enabling the invariant mode, all cultures behave like the invariant culture. The invariant culture has the following characteristics: + * Culture names (English, native display, ISO, language names) will return invariant names. For instance, when requesting culture native name, you will get "Invariant Language (Invariant Country)". * All cultures LCID will have value 0x1000 (which means Custom Locale ID). The exception is the invariant cultures which will still have 0x7F. * All culture parents will be invariant. In other word, there will not be any neutral cultures by default but the apps can still create a culture like "en". @@ -45,45 +45,45 @@ When enabling the invariant mode, all cultures behave like the invariant culture * Numbers will always be formatted as the invariant culture. For example, decimal point will always be formatted as ".". Number strings previously formatted with cultures that have different symbols will fail parsing. * All cultures will have currency symbol as "¤" * Culture enumeration will always return a list with one culture which is the invariant culture. - + ## String casing - + String casing (ToUpper and ToLower) will be performed for the ASCII range only. Requests to case code points outside that range will not be performed, however no exception will be thrown. In other words, casing will only be performed for character range ['a'..'z']. - + Turkish I casing will not be supported when using Turkish cultures. - + ## String sorting and searching String operations like [Compare](https://docs.microsoft.com/dotnet/api/?term=string.compare), [IndexOf](https://docs.microsoft.com/dotnet/api/?term=string.indexof) and [LastIndexOf](https://docs.microsoft.com/dotnet/api/?term=string.lastindexof) are always performed as [ordinal](https://en.wikipedia.org/wiki/Ordinal_number) and not linguistic operations regardless of the string comparing options passed to the APIs. - + The [ignore case](https://docs.microsoft.com/dotnet/api/system.globalization.compareoptions.ignorecase) string sorting option is supported but only for the ASCII range as mentioned previously. - + For example, the following comparison will resolve to being unequal: * 'i', compared to * Turkish I '\u0130', given -* Turkish culture, using +* Turkish culture, using * CompareOptions.Ignorecase However, the following comparison will resolve to being equal: * 'i', compared to -* 'I', using +* 'I', using * CompareOptions.Ignorecase - + It is worth noticing that all other [sort comparison options](https://docs.microsoft.com/dotnet/api/system.globalization.compareoptions) (for example, ignore symbols, ignore space, Katakana, Hiragana) will have no effect in the invariant mode (they are ignored). - + ## Sort keys - + Sort keys are used mostly when indexing some data (for example, database indexing). When generating sort keys of 2 strings and comparing the sort keys the results should hold the exact same results as if comparing the original 2 strings. In the invariant mode, sort keys will be generated according to ordinal comparison while respecting ignore casing options. - + ## String normalization - -String normalization normalizes a string into some form (for example, composed, decomposed forms). Normalization data is required to perform these operations, which isn't available in invariant mode. In this mode, all strings are considered as already normalized, per the following behavior: -* If the app requested to normalize any string, the original string is returned without modification. +String normalization normalizes a string into some form (for example, composed, decomposed forms). Normalization data is required to perform these operations, which isn't available in invariant mode. In this mode, all strings are considered as already normalized, per the following behavior: + +* If the app requested to normalize any string, the original string is returned without modification. * If the app asked if any string is normalized, the return value will always be `true`. - + ## Internationalized Domain Names (IDN) support [Internationalized Domain Names](https://en.wikipedia.org/wiki/Internationalized_domain_name) require globalization data to perform conversion to ASCII or Unicode forms, which isn't available in the invariant mode. In this mode, IDN functionality has the following behavior: @@ -91,13 +91,13 @@ String normalization normalizes a string into some form (for example, composed, * IDN support doesn't conform to the latest standard. * IDN support will be incorrect if the input IDN string is not normalized since normalization is not supported in invariant mode. * Some basic IDN strings will still produce correct values. - + ## Time zone display name in Linux - + When running on Linux, ICU is used to get the time zone display name. In invariant mode, the standard time zone names are returned instead. - + ## Enabling the invariant mode - + Applications can enable the invariant mode by either of the following: 1. in project file: @@ -119,13 +119,13 @@ Applications can enable the invariant mode by either of the following: } } ``` - + 3. setting environment variable value `DOTNET_SYSTEM_GLOBALIZATION_INVARIANT` to `true` or `1`. Note: value set in project file or `runtimeconfig.json` has higher priority than the environment variable. ## APP behavior with and without the invariant config switch - + - If the invariant config switch is not set or it is set false - The framework will depend on the OS for the globalization support. - On Linux, if the ICU package is not installed, the application will fail to start. diff --git a/docs/design/features/host-component-dependencies-resolution.md b/docs/design/features/host-component-dependencies-resolution.md index 094f3d082e94..f107961714e5 100644 --- a/docs/design/features/host-component-dependencies-resolution.md +++ b/docs/design/features/host-component-dependencies-resolution.md @@ -40,4 +40,4 @@ This feature certainly provides a somewhat duplicate functionality to the existi * Currently we don't consider frameworks for the app when computing probing paths for resolving assets from the component's `.deps.json`. This is a different behavior from the app startup where these are considered. Is it important - needed? * Add ability to corelate tracing with the runtime - probably some kind of activity ID * Handling of native assets - currently returning just probing paths. Would be cleaner to return full resolved paths. But we would have to keep some probing paths. In the case of missing `.deps.json` the native library should be looked for in the component directory - thus requires probing - we can't figure out which of the files in the folder are native libraries in the hosts. -* Handling of satellite assemblies (resource assets) - currently returning just probing paths which exclude the culture. So from a resolved asset `./foo/en-us/resource.dll` we only take `./foo` as the probing path. Consider using full paths instead - probably would require more parsing as we would have to be able to figure out the culture ID somewhere to build the true map AssemblyName->path in the managed class. Just like for native assets, if there's no `.deps.json` the only possible solution is to use probing, so the probing semantics would have to be supported anyway. \ No newline at end of file +* Handling of satellite assemblies (resource assets) - currently returning just probing paths which exclude the culture. So from a resolved asset `./foo/en-us/resource.dll` we only take `./foo` as the probing path. Consider using full paths instead - probably would require more parsing as we would have to be able to figure out the culture ID somewhere to build the true map AssemblyName->path in the managed class. Just like for native assets, if there's no `.deps.json` the only possible solution is to use probing, so the probing semantics would have to be supported anyway. diff --git a/docs/design/features/host-error-codes.md b/docs/design/features/host-error-codes.md index 941c2a92a001..21ea9c8bc711 100644 --- a/docs/design/features/host-error-codes.md +++ b/docs/design/features/host-error-codes.md @@ -7,25 +7,23 @@ Note that the exit code returned by running an application via `dotnet.exe` or ` * `Success` (`0`) - Operation was successful. -* `Success_HostAlreadyInitialized` (`0x00000001`) - Initialization was successful, but another host context is already initialized, so the returned context is "secondary". The requested context was otherwise fully compatible with the already initialized context. -This is returned by `hostfxr_initialize_for_runtime_config` if it's called when the host is already initialized in the process. Comes from `corehost_initialize` in `hostpolicy`. +* `Success_HostAlreadyInitialized` (`0x00000001`) - Initialization was successful, but another host context is already initialized, so the returned context is "secondary". The requested context was otherwise fully compatible with the already initialized context. This is returned by `hostfxr_initialize_for_runtime_config` if it's called when the host is already initialized in the process. Comes from `corehost_initialize` in `hostpolicy`. -* `Success_DifferentRuntimeProperties` (`0x00000002`) - Initialization was successful, but another host context is already initialized and the requested context specified some runtime properties which are not the same (either in value or in presence) to the already initialized context. -This is returned by `hostfxr_initialize_for_runtime_config` if it's called when the host is already initialized in the process. Comes from `corehost_initialize` in `hostpolicy`. +* `Success_DifferentRuntimeProperties` (`0x00000002`) - Initialization was successful, but another host context is already initialized and the requested context specified some runtime properties which are not the same (either in value or in presence) to the already initialized context. This is returned by `hostfxr_initialize_for_runtime_config` if it's called when the host is already initialized in the process. Comes from `corehost_initialize` in `hostpolicy`. ### Failure error/exit codes * `InvalidArgFailure` (`0x80008081`) - One of the specified arguments for the operation is invalid. -* `CoreHostLibLoadFailure` (`0x80008082`) - There was a failure loading a dependent library. If any of the hosting components calls `LoadLibrary`/`dlopen` on a dependent library and the call fails, this error code is returned. The most common case for this failure is if the dependent library is missing some of its dependencies (for example the necessary CRT is missing on the machine), likely corrupt or incomplete install. +* `CoreHostLibLoadFailure` (`0x80008082`) - There was a failure loading a dependent library. If any of the hosting components calls `LoadLibrary`/`dlopen` on a dependent library and the call fails, this error code is returned. The most common case for this failure is if the dependent library is missing some of its dependencies (for example the necessary CRT is missing on the machine), likely corrupt or incomplete install. This error code is also returned from `corehost_resolve_component_dependencies` if it's called on a `hostpolicy` which has not been initialized via the hosting layer. This would typically happen if `coreclr` is loaded directly without the hosting layer and then `AssemblyDependencyResolver` is used (which is an unsupported scenario). * `CoreHostLibMissingFailure` (`0x80008083`) - One of the dependent libraries is missing. Typically when the `hostfxr`, `hostpolicy` or `coreclr` dynamic libraries are not present in the expected locations. Probably means corrupted or incomplete installation. * `CoreHostEntryPointFailure` (`0x80008084`) - One of the dependent libraries is missing a required entry point. -* `CoreHostCurHostFindFailure` (`0x80008085`) - If the hosting component is trying to use the path to the current module (the hosting component itself) and from it deduce the location of the installation. Either the location of the current module could not be determined (some weird OS call failure) or the location is not in the right place relative to other expected components. +* `CoreHostCurHostFindFailure` (`0x80008085`) - If the hosting component is trying to use the path to the current module (the hosting component itself) and from it deduce the location of the installation. Either the location of the current module could not be determined (some weird OS call failure) or the location is not in the right place relative to other expected components. For example the `hostfxr` may look at its location and try to deduce the location of the `shared` folder with the framework from it. It assumes the typical install layout on disk. If this doesn't work, this error will be returned. * `CoreClrResolveFailure` (`0x80008087`) - If the `coreclr` library could not be found. The hosting layer (`hostpolicy`) looks for `coreclr` library either next to the app itself (for self-contained) or in the root framework (for framework-dependent). This search can be done purely by looking at disk or more commonly by looking into the respective `.deps.json`. If the `coreclr` library is missing in `.deps.json` or it's there but doesn't exist on disk, this error is returned. @@ -61,7 +59,7 @@ For example the `hostfxr` may look at its location and try to deduce the locatio * Other inconsistencies (for example `rollForward` and `applyPatches` are not allowed to be specified in the same config file) * Any of the above failures reading the `.runtimecofig.dev.json` file * Self-contained `.runtimeconfig.json` used in `hostfxr_initialize_for_runtime_config` -Note that missing `.runtimconfig.json` is not an error (means self-contained app). +Note that missing `.runtimconfig.json` is not an error (means self-contained app). This error code is also used when there is a problem reading the CLSID map file in `comhost`. * `AppArgNotRunnable` (`0x80008094`) - Used internally when the command line for `dotnet.exe` doesn't contain path to the application to run. In such case the command line is considered to be a CLI/SDK command. This error code should never be returned to external caller. @@ -70,8 +68,8 @@ This error code is also used when there is a problem reading the CLSID map file * The `apphost` binary has not been imprinted with the path to the app to run (so freshly built `apphost.exe` from the branch will fail to run like this) * The `apphost` is a bundle (single-file exe) and it failed to extract correctly. -* `FrameworkMissingFailure` (`0x80008096`) - It was not possible to find a compatible framework version. This originates in `hostfxr` (`resolve_framework_reference`) and means that the app specified a reference to a framework in its `.runtimeconfig.json` which could not be resolved. The failure to resolve can mean that no such framework is available on the disk, or that the available frameworks don't match the minimum version specified or that the roll forward options specified excluded all available frameworks. -Typically this would be used if a 3.0 app is trying to run on a machine which has no 3.0 installed. +* `FrameworkMissingFailure` (`0x80008096`) - It was not possible to find a compatible framework version. This originates in `hostfxr` (`resolve_framework_reference`) and means that the app specified a reference to a framework in its `.runtimeconfig.json` which could not be resolved. The failure to resolve can mean that no such framework is available on the disk, or that the available frameworks don't match the minimum version specified or that the roll forward options specified excluded all available frameworks. +Typically this would be used if a 3.0 app is trying to run on a machine which has no 3.0 installed. It would also be used for example if a 32bit 3.0 app is running on a machine which has 3.0 installed but only for 64bit. * `HostApiFailed` (`0x80008097`) - Returned by `hostfxr_get_native_search_directories` if the `hostpolicy` could not calculate the `NATIVE_DLL_SEARCH_DIRECTORIES`. @@ -87,7 +85,7 @@ It would also be used for example if a 32bit 3.0 app is running on a machine whi * `SdkResolverResolveFailure` (`0x8000809b`) - Returned from `hostfxr_resolve_sdk2` when it fails to find matching SDK. Similar to `LibHostSdkFindFailure` but only used in the `hostfxr_resolve_sdk2`. -* `FrameworkCompatFailure` (`0x8000809c`) - During processing of `.runtimeconfig.json` there were two framework references to the same framework which were not compatible. This can happen if the app specified a framework reference to a lower-level framework which is also specified by a higher-level framework which is also used by the app. +* `FrameworkCompatFailure` (`0x8000809c`) - During processing of `.runtimeconfig.json` there were two framework references to the same framework which were not compatible. This can happen if the app specified a framework reference to a lower-level framework which is also specified by a higher-level framework which is also used by the app. For example, this would happen if the app referenced `Microsoft.AspNet.App` version 2.0 and `Microsoft.NETCore.App` version 3.0. In such case the `Microsoft.AspNet.App` has `.runtimeconfig.json` which also references `Microsoft.NETCore.App` but it only allows versions 2.0 up to 2.9 (via roll forward options). So the version 3.0 requested by the app is incompatible. * `FrameworkCompatRetry` (`0x8000809d`) - Error used internally if the processing of framework references from `.runtimeconfig.json` reached a point where it needs to reprocess another already processed framework reference. If this error is returned to the external caller, it would mean there's a bug in the framework resolution algorithm. @@ -100,10 +98,10 @@ For example, this would happen if the app referenced `Microsoft.AspNet.App` vers * `LibHostDuplicateProperty` (`0x800080a1`) - The `.runtimeconfig.json` specified by the app contains a runtime property which is also produced by the hosting layer. For example if the `.runtimeconfig.json` would specify a property `TRUSTED_PLATFORM_ROOTS`, this error code would be returned. It is not allowed to specify properties which are otherwise populated by the hosting layer (`hostpolicy`) as there is not good way to resolve such conflicts. -* `HostApiUnsupportedVersion` (`0x800080a2`) - Feature which requires certain version of the hosting layer binaries was used on a version which doesn't support it. +* `HostApiUnsupportedVersion` (`0x800080a2`) - Feature which requires certain version of the hosting layer binaries was used on a version which doesn't support it. For example if COM component specified to run on 2.0 `Microsoft.NETCore.App` - as that contains older version of `hostpolicy` which doesn't support the necessary features to provide COM services. -* `HostInvalidState` (`0x800080a3`) - Error code returned by the hosting APIs in `hostfxr` if the current state is incompatible with the requested operation. There are many such cases, please refer to the documentation of the hosting APIs for details. +* `HostInvalidState` (`0x800080a3`) - Error code returned by the hosting APIs in `hostfxr` if the current state is incompatible with the requested operation. There are many such cases, please refer to the documentation of the hosting APIs for details. For example if `hostfxr_get_runtime_property_value` is called with the `host_context_handle` `nullptr` (meaning get property from the active runtime) but there's no active runtime in the process. * `HostPropertyNotFound` (`0x800080a4`) - property requested by `hostfxr_get_runtime_property_value` doesn't exist. diff --git a/docs/design/features/host-probing.md b/docs/design/features/host-probing.md index 9883c676ca08..38df2347406b 100644 --- a/docs/design/features/host-probing.md +++ b/docs/design/features/host-probing.md @@ -25,29 +25,29 @@ The dotnet host uses probing when it searches for actual file on disk for a give The library relative path in this case is `newtonsoft.json/11.0.2` and the asset relative path is `lib/netstandard2.0/Newtonsoft.Json.dll`. So the goal of the probing logic is to find the `Newtonsoft.Json.dll` file using the above relative paths. ## Probing -The probing itself is done by going over a list of probing paths, which are ordered according to their priority. For each path, the host will append the relative parts of the path as per above and see if the file actually exists on the disk. -If the file is found, the probing is done, and the full path just resolved is stored. +The probing itself is done by going over a list of probing paths, which are ordered according to their priority. For each path, the host will append the relative parts of the path as per above and see if the file actually exists on the disk. +If the file is found, the probing is done, and the full path just resolved is stored. If the file is not found, the probing continues with the next path on the list. If all paths are tried and the asset is still not found this is reported as an error (with the exception of app's `.deps.json` asset, in which case it's ignored). ## Probing paths The list of probing paths ordered according to their priority. First path in the list below is tried first and so on. -* Servicing paths - Servicing paths are only used for serviceable assets, that is the corresponding library record must specify `serviceable: true`. +* Servicing paths + Servicing paths are only used for serviceable assets, that is the corresponding library record must specify `serviceable: true`. The base servicing path is * On Windows x64 `%ProgramFiles(x86)%\coreservicing` * On Windows x86 `%ProgramFiles%\coreservicing` - * Otherwise (Linux/Mac) `$CORE_SERVICING` + * Otherwise (Linux/Mac) `$CORE_SERVICING` Given the base servicing path, the probing paths are * Servicing NI probe path `/|arch|` - this is used only for `runtime` assets * Servicing normal probe path `/pkgs` - this is used for all assets * The application (or framework if we're resolving framework assets) directory -* Framework directories - If the app (or framework) has dependencies on frameworks, these frameworks are used as probing paths. - The order is from the higher level framework to lower level framework. The app is considered the highest level, it direct dependencies are next and so on. - For assets from frameworks, only that framework and lower level frameworks are considered. +* Framework directories + If the app (or framework) has dependencies on frameworks, these frameworks are used as probing paths. + The order is from the higher level framework to lower level framework. The app is considered the highest level, it direct dependencies are next and so on. + For assets from frameworks, only that framework and lower level frameworks are considered. Note: These directories come directly out of the framework resolution process. Special note on Windows where global locations are always considered even if the app is not executed via the shared `dotnet.exe`. More details can be found in [Multi-level Shared FX Lookup](multilevel-sharedfx-lookup.md). * Shared store paths * `$DOTNET_SHARED_STORE/|arch|/|tfm|` - The environment variable `DOTNET_SHARED_STORE` can contain multiple paths, in which case each is appended with `|arch|/|tfm|` and used as a probing path. @@ -56,10 +56,10 @@ The list of probing paths ordered according to their priority. First path in the * On Windows, the global shared store is used * If running in WOW64 mode - `%ProgramFiles(x86)%\dotnet\store\|arch|\|tfm|` * Otherwise - `%ProgramFiles%\dotnet\store\|arch|\|tfm|` -* Additional probing paths +* Additional probing paths In these paths the `|arch|/|tfm|` string can be used and will be replaced with the actual values before using the path. * `--additionalprobingpath` command line arguments * `additionalProbingPaths` specified in `.runtimeconfig.json` and `.runtimeconfig.dev.json` for the app and each framework (highest to lowest) - Note about framework-dependent and self-contained apps. With regard to probing the main difference is that self-contained apps don't have any framework dependencies, so all assets (including assemblies which normally come from a framework) are probed for in the app's directory. \ No newline at end of file + Note about framework-dependent and self-contained apps. With regard to probing the main difference is that self-contained apps don't have any framework dependencies, so all assets (including assemblies which normally come from a framework) are probed for in the app's directory. diff --git a/docs/design/features/host-startup-hook.md b/docs/design/features/host-startup-hook.md index b5c29871a282..e2b4623fab03 100644 --- a/docs/design/features/host-startup-hook.md +++ b/docs/design/features/host-startup-hook.md @@ -57,8 +57,8 @@ Each part may be either * the assembly name must be considered a valid assembly name as specified by the `AssemblyName` class. -Note that white-spaces are preserved and considered part of the specified -path/name. So for example path separator followed by a white-space and +Note that white-spaces are preserved and considered part of the specified +path/name. So for example path separator followed by a white-space and another path separator is invalid, since the white-space only string in between the path separators will be considered as assembly name. @@ -90,10 +90,10 @@ centralized, while still allowing user code to do its own thing if it so desires. The producer of `StartupHook.dll` needs to ensure that -`StartupHook.dll` is compatible with the dependencies specified in the -main application's deps.json, since those dependencies are put on the -Trusted Platform Assemblies (TPA) list during the runtime startup, -before `StartupHook.dll` is loaded. This means that `StartupHook.dll` +`StartupHook.dll` is compatible with the dependencies specified in the +main application's deps.json, since those dependencies are put on the +Trusted Platform Assemblies (TPA) list during the runtime startup, +before `StartupHook.dll` is loaded. This means that `StartupHook.dll` needs to be built against the same or lower version of .NET Core than the app. ## Example diff --git a/docs/design/features/host-tracing.md b/docs/design/features/host-tracing.md index be60e9e5db2c..78f4b3ca4e75 100644 --- a/docs/design/features/host-tracing.md +++ b/docs/design/features/host-tracing.md @@ -1,6 +1,6 @@ # Host tracing -The various .NET Core host components provide detailed tracing of diagnostic information which can help solve issues around runtime, framework and assembly resolution and others. +The various .NET Core host components provide detailed tracing of diagnostic information which can help solve issues around runtime, framework and assembly resolution and others. ## Existing support Currently (as of .NET Core 2.1) the host tracing is only written to the `stderr` output of the process. It can be turned on by setting `COREHOST_TRACE=1`. @@ -47,13 +47,13 @@ The functions behave exactly the same in both components. The `listener` paramet * a pointer to an implementation of `host_trace_listener` which is then registered the only listener for all tracing. * `NULL` value which unregisters any previously registered listener. After this call tracing is disabled. -Custom host can and should register the trace listener as the first thing it does with the respective host component to ensure that all tracing is routed to it. +Custom host can and should register the trace listener as the first thing it does with the respective host component to ensure that all tracing is routed to it. -Only one trace listener can be registered at any given time. +Only one trace listener can be registered at any given time. Registering custom trace listener or setting it to `NULL` doesn't override the tracing enabled by environment variables. If a trace listener is registered and the `COREHOST_TRACE=1` is set as well, the traces will be routed to both the `stderr` as well as the registered listener. -The `hostfxr` component will propagate the trace listener to the `hostpolicy` component before it calls into it. So custom host only needs to register its trace listener with the `hostfxr` component and not both. The propagation of the trace listener is only done for the duration necessary after which it will be unregistered again. So custom host might need to register its own listener if it makes calls directly to `hostpolicy` on top of the calls to `hostfxr`. +The `hostfxr` component will propagate the trace listener to the `hostpolicy` component before it calls into it. So custom host only needs to register its trace listener with the `hostfxr` component and not both. The propagation of the trace listener is only done for the duration necessary after which it will be unregistered again. So custom host might need to register its own listener if it makes calls directly to `hostpolicy` on top of the calls to `hostfxr`. In case of new (.NET Core 3) `hostfxr` component which would call into an old (.NET Core 2.1) `hostpolicy` component, the `hostfxr` will not perform the propagation in any way since the older `hostpolicy` doesn't support this mechanism. The trace listener interface looks like this: @@ -68,14 +68,14 @@ struct host_trace_listener } ``` -The `message` parameter is a standard `NUL` terminated string and it's the message to trace with the respective verbosity level. +The `message` parameter is a standard `NUL` terminated string and it's the message to trace with the respective verbosity level. The `activityId` parameter is a standard `NUL` terminated string. It's used to correlate traces for a given binding event. The content of the string is not yet defined, but the trace listeners should consider it opaque. Trace listeners should include this string in the trace of the message in some form. The parameter may be `NULL` in which case the trace doesn't really belong to any specific binding event. Methods on the trace listener interface can be called from any thread in the app, and should be able to handle multiple calls at the same time from different threads. ## Future investments ### Trace content -Currently the host components tend to trace a lot. The trace contains lot of interesting information but it's done in a very verbose way which is sometimes hard to navigate. Future investment should look at the common scenarios which are using the host tracing and optimize the trace output for those scenarios. This doesn't necessarily mean decrease the amount of tracing, but possibly introduce "summary sections" which would describe the end result decisions for certain scenarios. +Currently the host components tend to trace a lot. The trace contains lot of interesting information but it's done in a very verbose way which is sometimes hard to navigate. Future investment should look at the common scenarios which are using the host tracing and optimize the trace output for those scenarios. This doesn't necessarily mean decrease the amount of tracing, but possibly introduce "summary sections" which would describe the end result decisions for certain scenarios. It would also be good to review the usage of verbose versus info tracing and make it consistent. ### Interaction with other diagnostics in the .NET Core diff --git a/docs/design/features/hw-intrinsics.md b/docs/design/features/hw-intrinsics.md index fb1287c42185..0abddc147cae 100644 --- a/docs/design/features/hw-intrinsics.md +++ b/docs/design/features/hw-intrinsics.md @@ -30,7 +30,7 @@ The vector types supported by one or more target ISAs are supported across platf * `Vector256` - A 256-bit vector of type `T` * `Vector256` intrinsics are supported only on x86 (and x64). -Note that these are generic types, which distinguishes these from native intrinsic vector types. It also somewhat complicates interop, as the runtime currently doesn't support interop for generic types. See https://github.com/dotnet/coreclr/issues/1685 +Note that these are generic types, which distinguishes these from native intrinsic vector types. It also somewhat complicates interop, as the runtime currently doesn't support interop for generic types. See https://github.com/dotnet/runtime/issues/4547 Not all intrinsics defined on these types support all primitive type parameters. When not supported, they are expected to throw `NotSupportedException`. This is generally handled by the C# implementation code, though for the most part this is a non-issue, as the ISA-specific intrinsics are declared over all supported concrete types (e.g. `Vector128` rather than `Vector128`). @@ -69,7 +69,7 @@ Currently, the JIT determines in the importer whether it will: * Generate a call (e.g. if it is a recognized intrinsic but an operand is not immediate as it is expected to be). The `mustExpand` option, which is returned by the VM as an "out" parameter to the `getIntrinsicID` method, must be false in this case. * Throw `PlatformNotSupportedException` if it is not a recognized and supported intrinsic for the current platform. -There is some room for improvement here. For example, it may be that an argument that appears to be non-constant could later be determined to be a constant value (https://github.com/dotnet/coreclr/issues/17108). +There is some room for improvement here. For example, it may be that an argument that appears to be non-constant could later be determined to be a constant value (https://github.com/dotnet/runtime/issues/9989). ### Hardware Intrinsics Table @@ -114,7 +114,7 @@ The only thing that makes the hardware intrinsics different in the area of instr The encodings are largely specified by `coreclr\src\jit\instrs{arch}.h`, and most of the target-specific code is in the `emit{arch}.*` files. -This is an area of the JIT that could use some redesign and refactoring (https://github.com/dotnet/coreclr/issues/23006 and https://github.com/dotnet/coreclr/issues/21441 among others). +This is an area of the JIT that could use some redesign and refactoring (https://github.com/dotnet/runtime/issues/12178 and https://github.com/dotnet/runtime/issues/11631 among others). ## Testing diff --git a/docs/design/features/raw-eventlistener.md b/docs/design/features/raw-eventlistener.md index f13b6a6096e0..aaf562ff3b72 100644 --- a/docs/design/features/raw-eventlistener.md +++ b/docs/design/features/raw-eventlistener.md @@ -23,7 +23,7 @@ public enum EventListenerSettings None, RawEventDispatch } -``` +``` This parameter is used to specify the desired dispatch behavior (in this case, do not deserialize event payloads). @@ -33,7 +33,7 @@ The new raw dispatch API will be: ``` public void OnEventWrittenRaw(RawEventWrittenEventArgs args); - + public sealed class RawEventWrittenEventArgs { @@ -52,9 +52,9 @@ public sealed class RawEventWrittenEventArgs public EventLevel Level { get; } public long OSThreadId { get; } public DateTime TimeStamp { get; } - + // Replacement properties for Payload and PayloadNames. - public ReadOnlySpan Metadata { get; } + public ReadOnlySpan Metadata { get; } public ReadOnlySpan Payload { get; } } ``` diff --git a/docs/design/features/readytorun-composite-format-design.md b/docs/design/features/readytorun-composite-format-design.md index b0fcf288874b..afa7a4ed1574 100644 --- a/docs/design/features/readytorun-composite-format-design.md +++ b/docs/design/features/readytorun-composite-format-design.md @@ -50,7 +50,7 @@ we propose using two complementary strategies: * In the composite R2R file with embedded metadata, there must be a new table of COR headers and metadata blobs representing the MSIL metadata from all the input assemblies. The table -must be indexable by simple assembly name for fast lookup. +must be indexable by simple assembly name for fast lookup. * in contrast to managed assemblies and single-input R2R executables, composite R2R files don't expose any COR header (it's not meaningful as the file potentially contains a larger @@ -98,7 +98,7 @@ this encoding are still work in progress and likely to further evolve. version bubble is represented by an arbitrary mixture of single-input and composite R2R files. If that is the case, manifest metadata would need to be decoupled from the index to `READYTORUN_SECTION_ASSEMBLIES`. - + Alternatively we could make it such that `READYTORUN_SECTION_MANIFEST_METADATA` holds all component assemblies of the current composite image at the beginning of the AssemblyRef table followed by the other needed assemblies *within the version bubble outside of the current @@ -157,7 +157,7 @@ that could be subsequently opened by ILDASM or ILSpy. Ideally we should patch ILDASM / ILSpy to cleanly handle the composite R2R file format; sadly this may end up being a relatively complex change due to the presence of multiple MSIL metadata blocks in the -file. +file. # Required diagnostic changes diff --git a/docs/design/features/roll-forward-on-no-candidate-fx.md b/docs/design/features/roll-forward-on-no-candidate-fx.md index dbfc04b632cb..7417498675d4 100644 --- a/docs/design/features/roll-forward-on-no-candidate-fx.md +++ b/docs/design/features/roll-forward-on-no-candidate-fx.md @@ -11,19 +11,19 @@ If the version specified is a _production_ version, the default behavior is: ``` For instance: - + Desired version: 1.0.1 Available versions: 1.0.0, 1.0.1, 1.0.2, 1.0.3, 1.1.0, 1.1.1, 2.0.1 Chosen version: 1.0.3 - + Desired version: 1.0.1 Available versions: 1.0.0, 1.1.0-preview1-x, 1.1.0-preview2-x, 1.2.0-preview1-x Chosen version: 1.1.0-preview2-x - + Desired version: 1.0.1 Available versions: 1.0.0, 1.1.0-preview1-x, 1.2.0, 1.2.1-preview1-x Chosen version: 1.2.0 - + Desired version: 1.0.1 Available versions: 1.0.0, 2.0.0 Chosen version: there is no compatible version available @@ -40,14 +40,14 @@ This means _preview_ is never rolled forward to _production_. Desired version: 1.0.1-preview2-x Available versions: 1.0.1-preview2-x, 1.0.1-preview3-x Chosen version: 1.0.1-preview2-x - + Desired version: 1.0.1-preview2-x Available versions: 1.0.1-preview3-x Chosen version: 1.0.1-preview3-x - + Desired version: 1.0.1-preview2-x Available versions: 1.0.1, 1.0.2-preview3-x - Chosen version: there is no compatible version available + Chosen version: there is no compatible version available ## Settings to control behavior ### applyPatches @@ -59,7 +59,7 @@ Once a compatible framework version is chosen as explained above, the latest pat Desired version: 1.0.1 Available versions: 1.0.1, 1.0.2 Chosen version: 1.0.2 - + Patch roll forward: disabled Desired version: 1.0.1 Available versions: 1.0.1, 1.0.2 @@ -79,7 +79,7 @@ To specify the exact desired framework version, use the command line argument '- - Command line argument ('--roll-forward-on-no-candidate-fx' argument) - Runtime configuration file ('rollForwardOnNoCandidateFx' property) - DOTNET_ROLL_FORWARD_ON_NO_CANDIDATE_FX environment variable - + The valid values: 0) Off (_do not roll forward_) @@ -101,7 +101,7 @@ If this feature is enabled and no compatible framework version is found, we'll s Desired Version: 1.0.0 Available versions: 1.1.1, 1.1.3, 1.2.0 Chosen version: 1.1.1 - + Patch roll forward: enabled Roll Forward On No Candidate Fx: 0 (disabled) Desired Version: 1.0.0 @@ -111,7 +111,7 @@ If this feature is enabled and no compatible framework version is found, we'll s It's important to notice that, even if "Roll Forward On No Candidate Fx" is enabled, only the specified framework version will be accepted if the '--fx-version' argument is used. -Since there are three ways to specify the values, conflicts will be resolved by the order listed above (command line has priority over config, which has priority over the environment variable). +Since there are three ways to specify the values, conflicts will be resolved by the order listed above (command line has priority over config, which has priority over the environment variable). ``` For instance: @@ -123,8 +123,8 @@ Since there are three ways to specify the values, conflicts will be resolved by 'rollForwardOnNoCandidateFx' property is set to '1' DOTNET_ROLL_FORWARD_ON_NO_CANDIDATE_FX env var is set to '1' The feature is DISABLED. -``` - +``` + There is no inheritance when there are chained framework references. If the app references FX1, and FX1 references FX2, then the resolution of FX2 only takes into account settings from `.runtimeconfig.json` in FX1, CLI and env. variable. The settings in the app's `.runtimeconfig.json` have no effect on resolution of FX2. ## Multilevel SharedFx Lookup diff --git a/docs/design/features/source-generator-pinvokes.md b/docs/design/features/source-generator-pinvokes.md index 1efec51604bc..fbd588950df9 100644 --- a/docs/design/features/source-generator-pinvokes.md +++ b/docs/design/features/source-generator-pinvokes.md @@ -2,7 +2,7 @@ ## Purpose -The CLR possesses a rich built-in marshaling mechanism for interoperability with native code that is handled at runtime. This system was designed to free .NET developers from having to author complex and potentially ABI sensitive [type conversion code][typemarshal_link] from a managed to an unmanaged environment. The built-in system works with both [P/Invoke][pinvoke_link] (i.e. `DllImportAttribute`) and [COM interop](https://docs.microsoft.com/dotnet/standard/native-interop/cominterop). The generated portion is typically called an ["IL Stub"][il_stub_link] since the stub is generated by inserting IL instructions into a stream and then passing that stream to the JIT for compilation. +The CLR possesses a rich built-in marshaling mechanism for interoperability with native code that is handled at runtime. This system was designed to free .NET developers from having to author complex and potentially ABI sensitive [type conversion code][typemarshal_link] from a managed to an unmanaged environment. The built-in system works with both [P/Invoke][pinvoke_link] (i.e. `DllImportAttribute`) and [COM interop](https://docs.microsoft.com/dotnet/standard/native-interop/cominterop). The generated portion is typically called an ["IL Stub"][il_stub_link] since the stub is generated by inserting IL instructions into a stream and then passing that stream to the JIT for compilation. A consequence of this approach is that marshaling code is not immediately available post-link for AOT scenarios (e.g. [`crossgen`](../../workflow/building/coreclr/crossgen.md) and [`crossgen2`](crossgen2-compilation-structure-enhancements.md)). The immediate unavailability of this code has been mitigated by a complex mechanism to have marshalling code generated by during AOT compilation. The [IL Linker][ilinker_link] is another tool that struggles with runtime generated code since it is unable to understand all potential used types without seeing what is generated. diff --git a/docs/design/features/tiered-compilation.md b/docs/design/features/tiered-compilation.md index 242dbc94b3ac..caedd79f94f1 100644 --- a/docs/design/features/tiered-compilation.md +++ b/docs/design/features/tiered-compilation.md @@ -92,4 +92,4 @@ Most of the implementation is relatively straightforward given the design and be 1. The current call counter implementation is utterly naive and using the PreStub has a high per-invocation cost relative to other more sophisticated implementation options. We expected it would need to change sooner, but so far despite having some measurable cost it hasn't been reached the top of the priority list for performance gain vs. work necessary. Part of what makes it not as bad as it looks is that there is a bound on the number of times it can be called for any one method and relative to typical 100,000 cycle costs for jitting a method even an expensive call counter doesn't make a huge impact. -2. Right now background compilation is limited to a single thread taken from the threadpool and used for up to 10ms. If we need more time than that we return the thread and request another. The goal is to be a good citizen in the threadpool's overall workload while still doing enough work in chunks that we get decent cache and thread quantum utilization. It's possible we could do better as the policy here hasn't been profiled much. Thus far we haven't profiled any performance issues that suggested we should be handling this differently. \ No newline at end of file +2. Right now background compilation is limited to a single thread taken from the threadpool and used for up to 10ms. If we need more time than that we return the thread and request another. The goal is to be a good citizen in the threadpool's overall workload while still doing enough work in chunks that we get decent cache and thread quantum utilization. It's possible we could do better as the policy here hasn't been profiled much. Thus far we haven't profiled any performance issues that suggested we should be handling this differently. diff --git a/docs/design/features/unloadability.md b/docs/design/features/unloadability.md index 4b61b0f8b3ce..0d1cddd0f096 100644 --- a/docs/design/features/unloadability.md +++ b/docs/design/features/unloadability.md @@ -1,4 +1,4 @@ -# `AssemblyLoadContext` unloadability +# `AssemblyLoadContext` unloadability ## Goals * Provide a building block for unloadable plug-ins * Users can load an assembly and its dependencies into an unloadable `AssemblyLoadContext`. @@ -100,7 +100,7 @@ Unloading is initialized by the user code calling `AssemblyLoadContext.Unload` m * The `AssemblyLoadContext` fires the `Unloading` event to allow the user code to perform cleanup if required (e.g. stop threads running inside of the context, remove references and destroy handles, etc.) * The `AssemblyLoadContext.InitiateUnload` method is called. It creates a strong GC handle referring to the `AssemblyLoadContext` to keep it around until the unload is complete. For example, finalizers of types that are loaded into the `AssemblyLoadContext` may need access to the `AssemblyLoadContext`. * Then it calls `AssemblyNative::PrepareForAssemblyLoadContextRelease` method with that strong handle as an argument, which in turn calls `CLRPrivBinderAssemblyLoadContext::PrepareForLoadContextRelease` -* That method stores the passed in strong GC handle in `CLRPrivBinderAssemblyLoadContext::m_ptrManagedStrongAssemblyLoadContext`. +* That method stores the passed in strong GC handle in `CLRPrivBinderAssemblyLoadContext::m_ptrManagedStrongAssemblyLoadContext`. * Then it decrements refcount of the `AssemblyLoaderAllocator` the `CLRPrivBinderAssemblyLoadContext` points to. * Finally, it destroys the strong handle to the managed `LoaderAllocator`. That allows the `LoaderAllocator` to be collected. ### Second phase of unloading diff --git a/docs/design/specs/Ecma-335-Augments.md b/docs/design/specs/Ecma-335-Augments.md index 1d42a5e4c909..df4e429912a1 100644 --- a/docs/design/specs/Ecma-335-Augments.md +++ b/docs/design/specs/Ecma-335-Augments.md @@ -147,7 +147,7 @@ error with a suitable message rather than a stack overflow. Related issues: * https://github.com/dotnet/roslyn/issues/7971 -* https://github.com/dotnet/coreclr/issues/2674 +* https://github.com/dotnet/runtime/issues/4945 #### Proposed specification change @@ -195,13 +195,13 @@ c) In section II.23.2.6 LocalVarSig, replace the diagram with production rules: ```ebnf LocalVarSig ::= LOCAL_SIG Count LocalVarType+ - + LocalVarType ::= Type CustomMod* Constraint BYREF? Type CustomMod* BYREF Type CustomMod* TYPEDBYREF - + ``` d) In section II.23.2.10 Param, replace the diagram with production rules: @@ -227,7 +227,7 @@ f) In section II.23.2.12 Type, add a production rule to the definition of `Type` ```ebnf Type ::= CustomMod* Type - + ``` g) In sections II.23.2.12 Type and II.23.2.14 TypeSpec replace production rule diff --git a/docs/design/specs/PE-COFF.md b/docs/design/specs/PE-COFF.md index 69074307155d..dff7248a326c 100644 --- a/docs/design/specs/PE-COFF.md +++ b/docs/design/specs/PE-COFF.md @@ -19,11 +19,11 @@ PE/COFF Specification defines the structure of Debug Directory in section 5.1.1. | Offset | Size | Field | Description | |:-------|:-----|:---------------|----------------------------------------------------------------| | 0 | 4 | Signature | 0x52 0x53 0x44 0x53 (ASCII string: "RSDS") | -| 4 | 16 | Guid | GUID (Globally Unique Identifier) of the associated PDB. +| 4 | 16 | Guid | GUID (Globally Unique Identifier) of the associated PDB. | 20 | 4 | Age | Iteration of the PDB. The first iteration is 1. The iteration is incremented each time the PDB content is augmented. | 24 | | Path | UTF-8 NUL-terminated path to the associated .pdb file | -Guid and Age are used to match PE/COFF image with the associated PDB. +Guid and Age are used to match PE/COFF image with the associated PDB. The associated .pdb file may not exist at the path indicated by Path field. If it doesn't the Path, Guid and Age can be used to find the corresponding PDB file locally or on a symbol server. The exact search algorithm used by tools to locate the PDB depends on the tool and its configuration. @@ -37,11 +37,11 @@ If the containing PE/COFF file is deterministic the Guid field above and DateTim The entry doesn't have any data associated with it. All fields of the entry, but Type shall be zero. -Presence of this entry indicates that the containing PE/COFF file is deterministic. +Presence of this entry indicates that the containing PE/COFF file is deterministic. ### Embedded Portable PDB Debug Directory Entry (type 17) -Declares that debugging information is embedded in the PE file at location specified by PointerToRawData. +Declares that debugging information is embedded in the PE file at location specified by PointerToRawData. *Version Major=any, Minor=0x0100* of the data format: @@ -49,7 +49,7 @@ Declares that debugging information is embedded in the PE file at location speci |:-------|:---------------|:-----------------|-------------------------------------------------------| | 0 | 4 | Signature | 0x4D 0x50 0x44 0x42 | | 4 | 4 | UncompressedSize | The size of decompressed Portable PDB image | -| 8 | SizeOfData - 8 | PortablePdbImage | Portable PDB image compressed using Deflate algorithm | +| 8 | SizeOfData - 8 | PortablePdbImage | Portable PDB image compressed using Deflate algorithm | If both CodeView and Embedded Portable PDB entries are present then they shall represent the same data. @@ -70,9 +70,9 @@ The value of Stamp field in the entry shall be 0. Stores crypto hash of the content of the symbol file the PE/COFF file was built with. -The hash can be used to validate that a given PDB file was built with the PE/COFF file and not altered in any way. +The hash can be used to validate that a given PDB file was built with the PE/COFF file and not altered in any way. -More than one entry can be present, in case multiple PDBs were produced during the build of the PE/COFF file (e.g. private and public symbols). +More than one entry can be present, in case multiple PDBs were produced during the build of the PE/COFF file (e.g. private and public symbols). *Version Major=0x0001, Minor=0x0000* of the entry data format is following: @@ -103,7 +103,7 @@ When validating that Portable PDB matches the debug directory record check that If the symbol format is Windows PDB the checksum is calculated by hashing the entire content of the PDB file with the PDB signature comprising of 16B GUID and 4B timestamp zeroed. -When validating that Windows PDB matches the debug directory record check that the checksums match and that the PDB signature (both GUID and timestamp values) match the data in the corresponding [CodeView record](#WindowsCodeViewEntry). +When validating that Windows PDB matches the debug directory record check that the checksums match and that the PDB signature (both GUID and timestamp values) match the data in the corresponding [CodeView record](#WindowsCodeViewEntry). > Note that when the debugger (or other tool) searches for the PDB only GUID and Age fields are used to match the PDB, but the timestamp of the CodeView debug directory entry does not need to match the timestamp stored in the PDB. Therefore, to verify byte-for-byte identity of the PDB, the timestamp field should also be checked. diff --git a/docs/design/specs/PortablePdb-Metadata.md b/docs/design/specs/PortablePdb-Metadata.md index d3d9b1277abd..d642bc0a6523 100644 --- a/docs/design/specs/PortablePdb-Metadata.md +++ b/docs/design/specs/PortablePdb-Metadata.md @@ -34,7 +34,7 @@ When debugging metadata is generated to a separate data blob "#Pdb" and "#~" str #### #Pdb stream The #Pdb stream has the following structure: - + | Offset | Size | Field | Description | |:-------|:-----|:---------------|----------------------------------------------------------------| | 0 | 20 | PDB id | A byte sequence uniquely representing the debugging metadata blob content. | @@ -42,10 +42,10 @@ The #Pdb stream has the following structure: | 24 | 8 | ReferencedTypeSystemTables | Bit vector of referenced type system metadata tables, let n be the number of bits that are 1. | | 32 | 4*n | TypeSystemTableRows | Array of n 4-byte unsigned integers indicating the number of rows for each referenced type system metadata table. | -#### #~ stream +#### #~ stream "#~" stream shall only contain debugging information tables defined above. - + References to heaps (strings, blobs, guids) are references to heaps of the debugging metadata. The sizes of references to type system tables are determined using the algorithm described in ECMA-335-II Chapter 24.2.6, except their respective row counts are found in _TypeSystemTableRows_ field of the #Pdb stream. ### Document Table: 0x30 @@ -58,7 +58,7 @@ The Document table has the following columns: The table is not required to be sorted. -There shall be no duplicate rows in the _Document_ table, based upon document name. +There shall be no duplicate rows in the _Document_ table, based upon document name. _Name_ shall not be nil. It can however encode an empty name string. @@ -170,7 +170,7 @@ _InitialDocument_ is only present if the _Document_ field of the _MethodDebugInf | _δILOffset_ | 0 | unsigned compressed | | _Document_ | Document row id | unsigned compressed | -Each _SequencePointRecord_ represents a single sequence point. The sequence point inherits the value of _Document_ property from the previous record (_SequencePointRecord_ or _document-record_), from the _Document_ field of the _MethodDebugInformation_ table if it's the first sequence point of a method body that spans a single document, or from _InitialDocument_ if it's the first sequence point of a method body that spans multiple documents. The value of _IL Offset_ is calculated using the value of the previous sequence point (if any) and the value stored in the record. +Each _SequencePointRecord_ represents a single sequence point. The sequence point inherits the value of _Document_ property from the previous record (_SequencePointRecord_ or _document-record_), from the _Document_ field of the _MethodDebugInformation_ table if it's the first sequence point of a method body that spans a single document, or from _InitialDocument_ if it's the first sequence point of a method body that spans multiple documents. The value of _IL Offset_ is calculated using the value of the previous sequence point (if any) and the value stored in the record. The values of _Start Line_, _Start Column_, _End Line_ and _End Column_ of a non-hidden sequence point are calculated based upon the values of the previous non-hidden sequence point (if any) and the data stored in the record. @@ -253,14 +253,14 @@ There shall be no duplicate rows in the LocalConstant table, based upon owner an The structure of the blob is Blob ::= CustomMod* (PrimitiveConstant | EnumConstant | GeneralConstant) - - PrimitiveConstant ::= PrimitiveTypeCode PrimitiveValue + + PrimitiveConstant ::= PrimitiveTypeCode PrimitiveValue PrimitiveTypeCode ::= BOOLEAN | CHAR | I1 | U1 | I2 | U2 | I4 | U4 | I8 | U8 | R4 | R8 | STRING - - EnumConstant ::= EnumTypeCode EnumValue EnumType + + EnumConstant ::= EnumTypeCode EnumValue EnumType EnumTypeCode ::= BOOLEAN | CHAR | I1 | U1 | I2 | U2 | I4 | U4 | I8 | U8 EnumType ::= TypeDefOrRefOrSpecEncoded - + GeneralConstant ::= (CLASS | VALUETYPE) TypeDefOrRefOrSpecEncoded GeneralValue? | OBJECT @@ -289,18 +289,18 @@ The encoding of the _PrimitiveValue_ and _EnumValue_ is determined based upon th | ```U8``` | uint64 | | ```R4``` | float32 | | ```R8``` | float64 | -| ```STRING``` | A single byte 0xff (represents a null string reference), or a UTF-16 little-endian encoded string (possibly empty). | +| ```STRING``` | A single byte 0xff (represents a null string reference), or a UTF-16 little-endian encoded string (possibly empty). | The numeric values of the type codes are defined by ECMA-335 §II.23.1.16. _EnumType_ must be an enum type as defined in ECMA-335 §II.14.3. The value of _EnumTypeCode_ must match the underlying type of the _EnumType_. -The encoding of the _GeneralValue_ is determined based upon the type expressed by _TypeDefOrRefOrSpecEncoded_ specified in _GeneralConstant_. _GeneralValue_ for special types listed in the table below has to be present and is encoded as specified. If the _GeneralValue_ is not present the value of the constant is the default value of the type. If the type is a reference type the value is a null reference, if the type is a pointer type the value is a null pointer, etc. +The encoding of the _GeneralValue_ is determined based upon the type expressed by _TypeDefOrRefOrSpecEncoded_ specified in _GeneralConstant_. _GeneralValue_ for special types listed in the table below has to be present and is encoded as specified. If the _GeneralValue_ is not present the value of the constant is the default value of the type. If the type is a reference type the value is a null reference, if the type is a pointer type the value is a null pointer, etc. | Namespace | Name | _GeneralValue_ encoding | |:--------------|:---------|:-------------------------| | System | Decimal | sign (highest bit), scale (bits 0..7), low (uint32), mid (uint32), high (uint32) | -| System | DateTime | int64: ticks | +| System | DateTime | int64: ticks | ### ImportScope Table: 0x35 The ImportScope table has the following columns: diff --git a/docs/issues-pr-management.md b/docs/issues-pr-management.md index 05090b2332e5..503de72b452d 100644 --- a/docs/issues-pr-management.md +++ b/docs/issues-pr-management.md @@ -12,7 +12,7 @@ dotnet/runtime issues and pull requests are a shared resource. As such, it will Here are a few of the most salient components of working well together, and the FAQ has much more detail. ## Scenarios where we all have to work together: -- All incoming issues and pull requests will be automatically labeled with an `area-*` label. The bot will also assign the `untriaged` label to only issues, once they get created. +- All incoming issues and pull requests will be automatically labeled with an `area-*` label. The bot will also assign the `untriaged` label to only issues, once they get created. - All issues and pull requests should have exactly 1 `area-*` label. - Issues are considered triaged when the `untriaged` label has been removed. - When issues have `area-*` labels switched, the `untriaged` label must be added. This prevents issues being lost in a `triaged` state when they have not actually been triaged by the area owner. In the future, a bot may automatically ensure this happens. diff --git a/docs/project/dogfooding.md b/docs/project/dogfooding.md index 6b564f7ec239..b584ada81b85 100644 --- a/docs/project/dogfooding.md +++ b/docs/project/dogfooding.md @@ -73,7 +73,7 @@ To install additional .NET Core runtimes or SDKs: ... - + ``` (Documentation for configuring feeds is [here](https://docs.microsoft.com/en-us/nuget/consume-packages/configuring-nuget-behavior).) diff --git a/docs/project/glossary.md b/docs/project/glossary.md index f69d3749938b..c1b6b687354b 100644 --- a/docs/project/glossary.md +++ b/docs/project/glossary.md @@ -24,7 +24,7 @@ terminology. | JIT | [Just-in-Time](https://github.com/dotnet/runtime/blob/master/docs/design/coreclr/jit/ryujit-overview.md) compiler. RyuJIT is the code name for the next generation Just-in-Time(aka "JIT") for the .NET runtime. | | LCG | Lightweight Code Generation. An early name for [dynamic methods](https://github.com/dotnet/runtime/blob/master/src/coreclr/src/System.Private.CoreLib/src/System/Reflection/Emit/DynamicMethod.cs). | | MD | MetaData. | -| MDA | Managed Debugging Assistant - see [details](https://docs.microsoft.com/en-us/dotnet/framework/debug-trace-profile/diagnosing-errors-with-managed-debugging-assistants) (Note: Not in .NET Core, equivalent diagnostic functionality is made available on a case-by-case basis, e.g. [#15465](https://github.com/dotnet/coreclr/issues/15465)) | +| MDA | Managed Debugging Assistant - see [details](https://docs.microsoft.com/en-us/dotnet/framework/debug-trace-profile/diagnosing-errors-with-managed-debugging-assistants) (Note: Not in .NET Core, equivalent diagnostic functionality is made available on a case-by-case basis, e.g. [#9418](https://github.com/dotnet/runtime/issues/9418)) | | NGen | Native Image Generator. | | NYI | Not Yet Implemented. | | PAL | [Platform Adaptation Layer](http://archive.oreilly.com/pub/a/dotnet/2002/03/04/rotor.html). Provides an abstraction layer between the runtime and the operating system. | diff --git a/docs/project/library-servicing.md b/docs/project/library-servicing.md index 29934d1606f1..3dc16895951d 100644 --- a/docs/project/library-servicing.md +++ b/docs/project/library-servicing.md @@ -4,7 +4,7 @@ This document provides the steps necessary after modifying a CoreFx library in a ## Check for existence of a .pkgproj -Most CoreFx libraries are not packaged by default. Some libraries have their output packaged in `Microsoft.Private.CoreFx.NetCoreApp`, which is always built, while other libraries have their own specific packages, which are only built on-demand. Your first step is to determine whether or not your library has its own package. To do this, go into the root folder for the library you've made changes to. If there is a `pkg` folder there (which should have a `.pkgproj` file inside of it), your library does have its own package. If there is no `pkg` folder there, the library should be built as part of `Microsoft.Private.CoreFx.NetCoreApp` and shipped as part of `Microsoft.NetCore.App`. To confirm this, check for the `IsNETCoreApp` property being set to `true` in the library's `Directory.Build.props` (or dir.props). If it is, then there is nothing that needs to be done. If it's not, contact a member of the servicing team for guidance, as this situation goes against our convention. +Most libraries are not packaged by default. Some libraries have their output packaged in `Microsoft.Private.CoreFx.NetCoreApp`, which is always built, while other libraries have their own specific packages, which are only built on-demand. Your first step is to determine whether or not your library has its own package. To do this, go into the root folder for the library you've made changes to. If there is a `pkg` folder there (which should have a `.pkgproj` file inside of it), your library does have its own package. If there is no `pkg` folder there, the library should be built as part of `Microsoft.Private.CoreFx.NetCoreApp` and shipped as part of `Microsoft.NetCore.App`. To confirm this, check if the library is listed in NetCoreAppLibrary.props. If it is, then there is nothing that needs to be done. If it's not, contact a member of the servicing team for guidance, as this situation goes against our convention. For example, if you made changes to [System.Data.SqlClient](https://github.com/dotnet/runtime/tree/master/src/libraries/Microsoft.Win32.Registry), then you have a .pkgproj, and will have to follow the steps in this document. However, if you made changes to [System.Collections](https://github.com/dotnet/runtime/tree/master/src/libraries/System.Collections), then you don't have a .pkgproj, and you do not need to do any further work for servicing. diff --git a/docs/project/linux-performance-tracing.md b/docs/project/linux-performance-tracing.md index 2899d7e683a2..bb39bf904629 100644 --- a/docs/project/linux-performance-tracing.md +++ b/docs/project/linux-performance-tracing.md @@ -1,4 +1,4 @@ -Performance Tracing on Linux +Performance Tracing on Linux ============================ When a performance problem is encountered on Linux, these instructions can be used to gather detailed information about what was happening on the machine at the time of the performance problem. diff --git a/docs/project/list-of-obsoletions.md b/docs/project/list-of-obsoletions.md index 3b7d6696c62d..0b23c097979f 100644 --- a/docs/project/list-of-obsoletions.md +++ b/docs/project/list-of-obsoletions.md @@ -24,3 +24,4 @@ Currently the identifiers `SYSLIB0001` through `SYSLIB0999` are carved out for o | __`SYSLIB0009`__ | The AuthenticationManager Authenticate and PreAuthenticate methods are not supported and throw PlatformNotSupportedException. | | __`SYSLIB0010`__ | This Remoting API is not supported and throws PlatformNotSupportedException. | | __`SYSLIB0011`__ | `BinaryFormatter` serialization is obsolete and should not be used. See https://aka.ms/binaryformatter for recommended alternatives. | +| __`SYSLIB0012`__ | Assembly.CodeBase and Assembly.EscapedCodeBase are only included for .NET Framework compatibility. Use Assembly.Location instead. | diff --git a/docs/project/profiling-api-status.md b/docs/project/profiling-api-status.md index a9279b69d428..5e649414cea8 100644 --- a/docs/project/profiling-api-status.md +++ b/docs/project/profiling-api-status.md @@ -20,7 +20,7 @@ ReJIT feature is only available on x86/x64 for now. ### Profiler Attach/Detach -We only support launch at the moment, see https://github.com/dotnet/coreclr/issues/16796 +We only support launch at the moment, see https://github.com/dotnet/runtime/issues/9886 ### Any issues we missed? diff --git a/docs/project/strong-name-signing.md b/docs/project/strong-name-signing.md index c082df374305..269bb102c4ad 100644 --- a/docs/project/strong-name-signing.md +++ b/docs/project/strong-name-signing.md @@ -10,7 +10,7 @@ All .NET Core assemblies are [strong-named](https://docs.microsoft.com/en-us/dot ## FAQ ### 1. Microsoft strong-names their assemblies, should I? -For the most part, the majority of applications do not need strong-names. Strong-names are left over from previous eras of .NET where [sandboxing](https://en.wikipedia.org/wiki/Sandbox_(computer_security)) needed to differentiate between code that was trusted, versus code that was untrusted. However in recent years, sandboxing via AppDomains, especially to [isolate ASP.NET web applications](https://support.microsoft.com/en-us/help/2698981/asp-net-partial-trust-does-not-guarantee-application-isolation), is no longer guaranteed and is not recommended. +For the most part, the majority of applications do not need strong-names. Strong-names are left over from previous eras of .NET where [sandboxing](https://en.wikipedia.org/wiki/Sandbox_(computer_security)) needed to differentiate between code that was trusted, versus code that was untrusted. However in recent years, sandboxing via AppDomains, especially to [isolate ASP.NET web applications](https://support.microsoft.com/en-us/help/2698981/asp-net-partial-trust-does-not-guarantee-application-isolation), is no longer guaranteed and is not recommended. However, strong-names are still required in applications in some rare situations, most of which are called out on this page: [Strong-Named Assemblies](https://docs.microsoft.com/en-us/dotnet/framework/app-domains/strong-named-assemblies). @@ -21,6 +21,6 @@ There are three major problems that developers run into after strong naming thei 1. _Binding Policy_. When developers talk about strong-names, they are usually conflating it with the strict binding policy of the .NET Framework that kicks in _when_ you strong-name. This binding policy is problematic because it forces, by default, an exact match between reference and version, and requires developers to author complex [binding redirects](https://docs.microsoft.com/en-us/dotnet/framework/configure-apps/file-schema/runtime/bindingredirect-element) when they don't. In recent versions of Visual Studio, however, we've added [Automatic Binding Redirection](https://docs.microsoft.com/en-us/dotnet/framework/configure-apps/how-to-enable-and-disable-automatic-binding-redirection) as an attempt to reduce pain of this policy on developers. On top of this, all newer platforms, including _Silverlight_, _WinRT-based platforms_ (Phone and Store), _.NET Native_ and _ASP.NET 5_ this policy has been loosened, allowing later versions of an assembly to satisfy earlier references, thereby completely removing the need to ever write binding redirects on those platforms. -2. _Virality_. Once you've strong-named an assembly, you can only statically reference other strong-named assemblies. +2. _Virality_. Once you've strong-named an assembly, you can only statically reference other strong-named assemblies. 3. _No drop-in replacement_. This is a problem for open source libraries where the strong-name private key is not checked into the repository. This means that developers are unable to build to their own version of the library and then use it as a drop-in replacement without recompiling _all_ consuming libraries up stack to pick up the new identity. This is extremely problematic for libraries, such as Json.NET, which have large incoming dependencies. Firstly, we would recommend that these open source projects check-in their private key (remember, [strong-names are used for identity, and not for security](https://docs.microsoft.com/en-us/dotnet/framework/app-domains/strong-named-assemblies)). Failing that, however, we've introduced a new concept called [Public Signing](public-signing.md) that enables developers to build drop-in replacements without needing access to the strong-name private key. This is the mechanism that .NET Core libraries use by default. diff --git a/docs/project/versioning.md b/docs/project/versioning.md index ab5b027397e2..728195b10845 100644 --- a/docs/project/versioning.md +++ b/docs/project/versioning.md @@ -57,4 +57,4 @@ The version we produce by our calculations is mainly used in two places: - As the [Assembly File Version](https://msdn.microsoft.com/en-us/library/51ket42z(v=vs.110).aspx) - As the packages version number -To get more information on where are we doing the calculations for the versioning, you can [click here](https://github.com/dotnet/buildtools/blob/master/src/Microsoft.DotNet.Build.Tasks/PackageFiles/versioning.targets) to find the targets file where we create the versioning assets, and [here](https://github.com/dotnet/buildtools/blob/master/src/Microsoft.DotNet.Build.Tasks/GenerateCurrentVersion.cs) to see the code on where we calculate BuildNumberMajor and BuildNumberMinor. \ No newline at end of file +To get more information on where are we doing the calculations for the versioning, you can [click here](https://github.com/dotnet/buildtools/blob/master/src/Microsoft.DotNet.Build.Tasks/PackageFiles/versioning.targets) to find the targets file where we create the versioning assets, and [here](https://github.com/dotnet/buildtools/blob/master/src/Microsoft.DotNet.Build.Tasks/GenerateCurrentVersion.cs) to see the code on where we calculate BuildNumberMajor and BuildNumberMinor. diff --git a/docs/project/windows-performance-tracing.md b/docs/project/windows-performance-tracing.md index 04fab29feca9..d23fc61f092a 100644 --- a/docs/project/windows-performance-tracing.md +++ b/docs/project/windows-performance-tracing.md @@ -11,4 +11,4 @@ PerfView has significant documentation built-in, which includes: To get started, download PerfView and use the links on the main screen to get help. -If you have specific questions, please post them in an issue here. \ No newline at end of file +If you have specific questions, please post them in an issue here. diff --git a/docs/project/writing-tests.md b/docs/project/writing-tests.md index f2984f020b7b..dd66fc96cd2b 100644 --- a/docs/project/writing-tests.md +++ b/docs/project/writing-tests.md @@ -56,7 +56,7 @@ public async Task Headers_SetAfterRequestSubmitted_ThrowsInvalidOperationExcepti ``` # OuterLoop -This one is fairly simple but often used incorrectly. When running tests which depend on outside influences like e.g. Hardware (Internet, SerialPort, ...) and you can't mitigate these dependencies, you might consider using the `[OuterLoop]` attribute for your test. +This one is fairly simple but often used incorrectly. When running tests which depend on outside influences like e.g. Hardware (Internet, SerialPort, ...) and you can't mitigate these dependencies, you might consider using the `[OuterLoop]` attribute for your test. With this attribute, tests are executed in a dedicated CI loop and won't break the default CI loops which get created when you submit a PR. To run OuterLoop tests locally you need to set the msbuild property "OuterLoop" to true: `/p:OuterLoop=true`. To run OuterLoop tests in CI you need to mention dotnet-bot and identify the tests you want to run. See `@dotnet-bot help` for the exact loop names. diff --git a/docs/workflow/README.md b/docs/workflow/README.md index 7a17fecd50ef..cc08240c14aa 100644 --- a/docs/workflow/README.md +++ b/docs/workflow/README.md @@ -42,7 +42,7 @@ To build just one part you use the root build script (build.cmd/sh), and you add ## Configurations -You may need to build the tree in a combination of configurations. This section explains why. +You may need to build the tree in a combination of configurations. This section explains why. A quick reminder of some concepts -- see the [glossary](../project/glossary.md) for more on these: @@ -51,7 +51,7 @@ A quick reminder of some concepts -- see the [glossary](../project/glossary.md) * **Release Configuration** -- Optimized code. Asserts are disabled. Runs at the best speed, and suitable for performance profiling. You will have limited debugging experience. When we talk about mixing configurations, we're discussing the following sub-components: - + * **Runtime** is the execution engine for managed code and there are two different implementations available. Both are written in C/C++, therefore, easier to debug when built in a Debug configuration. * CoreCLR is the comprehensive execution engine which if build in Debug Configuration it executes managed code very slowly. For example, it will take a long time to run the managed code unit tests. The code lives under [src/coreclr](../../src/coreclr). * Mono is portable and also slimmer runtime and it's not that sensitive to Debug Configuration for running managed code. You will still need to build it without optimizations to have good runtime debugging experience though. The code lives under [src/mono](../../src/mono). diff --git a/docs/workflow/building/coreclr/README.md b/docs/workflow/building/coreclr/README.md index c00d90a9799a..520ecd98a4fc 100644 --- a/docs/workflow/building/coreclr/README.md +++ b/docs/workflow/building/coreclr/README.md @@ -22,6 +22,8 @@ CoreCLR also supports a 'checked' build type which has asserts enabled like 'deb ./build.sh -subset clr -configuration checked ``` +To pass extra compiler/linker flags to the coreclr build, set the environment variables `EXTRA_CFLAGS`, `EXTRA_CXXFLAGS` and `EXTRA_LDFLAGS` as needed. Don't set `CFLAGS`/`CXXFLAGS`/`LDFLAGS` directly as that might lead to configure-time tests failing. + This will produce outputs as follows: - Product binaries will be dropped in `artifacts\bin\coreclr\..` folder. diff --git a/docs/workflow/building/coreclr/freebsd-instructions.md b/docs/workflow/building/coreclr/freebsd-instructions.md index 49c29f49deff..ef31a0b4d215 100644 --- a/docs/workflow/building/coreclr/freebsd-instructions.md +++ b/docs/workflow/building/coreclr/freebsd-instructions.md @@ -10,7 +10,7 @@ These instructions are written assuming FreeBSD 10.1-RELEASE, since that's the r These instructions assume you use the binary package tool `pkg` (analog to `apt-get` or `yum` on Linux) to install the environment. Compiling the dependencies from source using the ports tree might work too, but is untested. -Minimum RAM required to build is 1GB. The build is known to fail on 512 MB VMs ([Issue 536](https://github.com/dotnet/coreclr/issues/536)). +Minimum RAM required to build is 1GB. The build is known to fail on 512 MB VMs ([Issue 4069](https://github.com/dotnet/runtime/issues/4069)). Toolchain Setup --------------- @@ -166,7 +166,7 @@ Download NuGet Packages With Mono and NuGet in hand, you can use NuGet to get the required dependencies. -Make a `packages.config` file with the following text. These are the required dependencies of this particular app. Different apps will have different dependencies and require a different `packages.config` - see [Issue #480](https://github.com/dotnet/coreclr/issues/480). +Make a `packages.config` file with the following text. These are the required dependencies of this particular app. Different apps will have different dependencies and require a different `packages.config` - see [Issue #4053](https://github.com/dotnet/runtime/issues/4053). ```xml @@ -198,7 +198,7 @@ And restore your packages.config file: janhenke@freebsd-frankfurt:~/coreclr-demo/packages % mono nuget.exe restore -Source https://www.myget.org/F/dotnet-corefx/ -PackagesDirectory . ``` -NOTE: This assumes you already installed the default CA certs. If you have problems downloading the packages please see [Issue #602](https://github.com/dotnet/coreclr/issues/602#issuecomment-88203778). The command for FreeBSD is: +NOTE: This assumes you already installed the default CA certs. If you have problems downloading the packages please see [Issue #4089](https://github.com/dotnet/runtime/issues/4089#issuecomment-88203778). The command for FreeBSD is: ```sh janhenke@freebsd-frankfurt:~/coreclr-demo/packages % mozroots --import --sync diff --git a/docs/workflow/building/libraries/README.md b/docs/workflow/building/libraries/README.md index 44eb0eb209ee..de6969f28658 100644 --- a/docs/workflow/building/libraries/README.md +++ b/docs/workflow/building/libraries/README.md @@ -9,11 +9,11 @@ Here is one example of a daily workflow for a developer working mainly on the li git clean -xdf git pull upstream master & git push origin master :: Build Debug libraries on top of Release runtime: -build -subset clr+libs -runtimeConfiguration Release +build.cmd clr+libs -rc Release :: The above you may only perform once in a day, or when you pull down significant new changes. :: If you use Visual Studio, you might open System.Text.RegularExpressions.sln here. -build -vs System.Text.RegularExpressions +build.cmd -vs System.Text.RegularExpressions :: Switch to working on a given library (RegularExpressions in this case) cd src\libraries\System.Text.RegularExpressions @@ -33,7 +33,7 @@ The instructions for Linux and macOS are essentially the same: git clean -xdf git pull upstream master & git push origin master # Build Debug libraries on top of Release runtime: -./build.sh -subset clr+libs -runtimeconfiguration Release +./build.sh clr+libs -rc Release # The above you may only perform once in a day, or when you pull down significant new changes. # Switch to working on a given library (RegularExpressions in this case) @@ -56,12 +56,12 @@ These example commands will build a release CoreCLR (and CoreLib), debug librari For Linux: ```bash -./build.sh -runtimeConfiguration Release +./build.sh -rc Release ``` For Windows: ```bat -./build.cmd -runtimeConfiguration Release +./build.cmd -rc Release ``` Detailed information about building and testing runtimes and the libraries is in the documents linked below. @@ -74,7 +74,7 @@ The libraries build has two logical components, the native build which produces The build settings (BuildTargetFramework, TargetOS, Configuration, Architecture) are generally defaulted based on where you are building (i.e. which OS or which architecture) but we have a few shortcuts for the individual properties that can be passed to the build scripts: -- `-framework|-f` identifies the target framework for the build. Possible values include `net5.0` (currently the latest .NET version) or `net472`. (msbuild property `BuildTargetFramework`) +- `-framework|-f` identifies the target framework for the build. Possible values include `net5.0` (currently the latest .NET version) or `net48` (the latest .NETFramework version). (msbuild property `BuildTargetFramework`) - `-os` identifies the OS for the build. It defaults to the OS you are running on but possible values include `Windows_NT`, `Unix`, `Linux`, or `OSX`. (msbuild property `TargetOS`) - `-configuration|-c Debug|Release` controls the optimization level the compilers use for the build. It defaults to `Debug`. (msbuild property `Configuration`) - `-arch` identifies the architecture for the build. It defaults to `x64` but possible values include `x64`, `x86`, `arm`, or `arm64`. (msbuild property `TargetArchitecture`) @@ -83,26 +83,20 @@ For more details on the build settings see [project-guidelines](../../../coding- If you invoke the `build` script without any actions, the default action chain `-restore -build` is executed. -By default the `build` script only builds the product libraries and none of the tests. If you want to include tests, you want to add the subset `-subset libtests`. If you want to run the tests you want to use the `-test` action instead of the `-build`, e.g. `build.cmd/sh -subset libs.tests -test`. To specify just the libraries, use `-subset libs`. +By default the `build` script only builds the product libraries and none of the tests. If you want to include tests, you want to add the subset `libs.tests`. If you want to run the tests you want to use the `-test` action instead of the `-build`, e.g. `build.cmd/sh libs.tests -test`. To specify just the libraries, use `libs`. **Examples** - Building in release mode for platform x64 (restore and build are implicit here as no actions are passed in) ```bash -./build.sh -subset libs -c Release -arch x64 +./build.sh libs -c Release -arch x64 ``` - Building the src assemblies and build and run tests (running all tests takes a considerable amount of time!) ```bash -./build.sh -subset libs -test +./build.sh libs -test ``` -- Building for different target frameworks (restore and build are implicit again as no action is passed in) -```bash -./build.sh -subset libs -framework net5.0 -./build.sh -subset libs -framework net472 -``` - -- Clean the entire solution +- Clean the entire artifacts folder ```bash ./build.sh -clean ``` @@ -191,7 +185,7 @@ You can iterate on `System.Private.CoreLib` by running: build.cmd clr.corelib+clr.nativecorelib+libs.pretest -rc Release ``` -When this `System.Private.CoreLib` will be built in Release mode, then it will be crossgen'd and we will update the testhost to the latest version of corelib. +When this `System.Private.CoreLib` will be built in Release mode, then it will be crossgen'd and we will update the testhost to the latest version of corelib. You can use the same workflow for mono runtime by using `mono.corelib+libs.pretest` subsets. @@ -199,24 +193,24 @@ You can use the same workflow for mono runtime by using `mono.corelib+libs.prete By default the libraries will attempt to build using the CoreCLR version of `System.Private.CoreLib.dll`. In order to build against the Mono version you need to use the `/p:RuntimeFlavor=Mono` argument. ``` -.\build.cmd -subset libs /p:RuntimeFlavor=Mono +.\build.cmd libs /p:RuntimeFlavor=Mono ``` ### Building all for other OSes By default, building from the root will only build the libraries for the OS you are running on. One can -build for another OS by specifying `./build.sh -subset libs -os [value]`. +build for another OS by specifying `./build.sh libs -os [value]`. Note that you cannot generally build native components for another OS but you can for managed components so if you need to do that you can do it at the individual project level or build all via passing `/p:BuildNative=false`. ### Building in Release or Debug By default, building from the root or within a project will build the libraries in Debug mode. -One can build in Debug or Release mode from the root by doing `./build.sh -subset libs -c Release` or `./build.sh -subset libs -c Debug`. +One can build in Debug or Release mode from the root by doing `./build.sh libs -c Release` or `./build.sh libs`. ### Building other Architectures -One can build 32- or 64-bit binaries or for any architecture by specifying in the root `./build.sh -subset libs -arch [value]` or in a project `/p:TargetArchitecture=[value]` after the `dotnet build` command. +One can build 32- or 64-bit binaries or for any architecture by specifying in the root `./build.sh libs -arch [value]` or in a project `/p:TargetArchitecture=[value]` after the `dotnet build` command. ## Working in Visual Studio @@ -227,3 +221,17 @@ If you are working on Windows, and use Visual Studio, you can open individual li For more details about running tests inside Visual Studio, [go here](../../testing/visualstudio.md). For more about running tests, read the [running tests](../../testing/libraries/testing.md) document. + +## Build packages +To build a library's package, simply invoke `dotnet pack` on the src project after you successfully built the .NETCoreApp vertical from root: + +``` +build libs +dotnet pack src\libraries\System.Text.Json\src\ +``` + +Same as for `dotnet build` or `dotnet publish`, you can specify the desired configuration via the `-c` flag: + +``` +dotnet pack src\libraries\System.Text.Json\src\ -c Release +``` diff --git a/docs/workflow/building/libraries/cross-building.md b/docs/workflow/building/libraries/cross-building.md index c7b0fe1fda42..24393ba5cf73 100644 --- a/docs/workflow/building/libraries/cross-building.md +++ b/docs/workflow/building/libraries/cross-building.md @@ -90,7 +90,7 @@ The output is at `artifacts/bin/[BuildSettings]` where `BuildSettings` looks som Building corefx for Linux ARM Emulator ======================================= -It is possible to build corefx binaries (native and managed) for the Linux ARM Emulator (latest version provided here: [#3805](https://github.com/dotnet/coreclr/issues/3805)). +It is possible to build corefx binaries (native and managed) for the Linux ARM Emulator (latest version provided here: [#5394](https://github.com/dotnet/runtime/issues/5394)). The `scripts/arm32_ci_script.sh` script does this. The following instructions assume that: diff --git a/docs/workflow/building/libraries/freebsd-instructions.md b/docs/workflow/building/libraries/freebsd-instructions.md index 09008a4f49c4..d62686b60839 100644 --- a/docs/workflow/building/libraries/freebsd-instructions.md +++ b/docs/workflow/building/libraries/freebsd-instructions.md @@ -20,18 +20,18 @@ This is certainly undesirable and it should be avoided if possible. ``` mkdir ~/dotnet cd ~/dotnet -curl https://dotnetcli.blob.core.windows.net/dotnet/Sdk/master/dotnet-sdk-latest-freebsd-x64.tar.gz | tar xfz - +curl https://dotnetcli.blob.core.windows.net/dotnet/Sdk/master/dotnet-sdk-latest-freebsd-x64.tar.gz | tar xfz - ``` -if on 12.x you may also need to set `LD_PRELOAD` to `/usr/lib/libpthread.so` to avoid issue when cli freezes. +if on 12.x you may also need to set `LD_PRELOAD` to `/usr/lib/libpthread.so` to avoid issue when cli freezes. As of summer 2019 this CLI is no longer good enough to build all repos. If that is your case jump to section [Updating CLI](#updating--bootstrap-cli) Binary snapshot can be obtained from https://github.com/wfurt/blob as dotnet-sdk-freebsd-x64-latest.tgz ## Getting sources -master of source-build pulls in source code of specific snapshot instead of tip of master branches. -That is generally OK but in case of FreeBSD it may miss some changes crucial for build. -(or pending un-submitted change) +master of source-build pulls in source code of specific snapshot instead of tip of master branches. +That is generally OK but in case of FreeBSD it may miss some changes crucial for build. +(or pending un-submitted change) ``` git clone https://github.com/dotnet/source-build @@ -44,9 +44,9 @@ git submodule update (cd src/coreclr ; git checkout master) ``` -port change from +port change from ```https://github.com/dotnet/corefx/commit/037859ac403ef17879655bb2f2e821d52e6eb4f3``` -In ideal case we could sync up to **master** but that brings Arcade changes and **breaks** the build. +In ideal case we could sync up to **master** but that brings Arcade changes and **breaks** the build. Bootstrap Arcade ``` @@ -86,8 +86,8 @@ index 81b8c7b..bb26868 100644 $(BuildArguments) -PortableBuild=$(PortableBuild) ``` -Depending of the day and moon phase you may need to get some updates as well. -If build breaks look for pending PRs with FreeBSD tag or label and pull pending changes. +Depending of the day and moon phase you may need to get some updates as well. +If build breaks look for pending PRs with FreeBSD tag or label and pull pending changes. ## Building @@ -106,7 +106,7 @@ export DOTNET_CLI_TELEMETRY_OPTOUT=1 ``` In ideal situation this will build whole sdk. Right now it fails somewhere in cli. -There is problem with rebuild and build will attempt to patch files again and/or make git updates. +There is problem with rebuild and build will attempt to patch files again and/or make git updates. ```export SOURCE_BUILD_SKIP_SUBMODULE_CHECK=1``` @@ -114,23 +114,23 @@ To build single repo again one can do: ```./build.sh /p:RootRepo=corefx /p:SkipRepoReferences=true ``` ## Resolving issues -Rebuild or source-build has issues. -Often running ```clean.sh``` from top helps. Be careful, that may undo any local pending changes. +Rebuild or source-build has issues. +Often running ```clean.sh``` from top helps. Be careful, that may undo any local pending changes. Sometimes it would try to apply patches and it would fail. -You can pass -```/p:SkipPatches=true``` to top level build.sh script. +You can pass +```/p:SkipPatches=true``` to top level build.sh script. ## Running CoreFX tests -Follow steps above to build at least corefx and it's dependencies. +Follow steps above to build at least corefx and it's dependencies. TBD ## Updating bootstrap CLI. -As build changes, previous versions of CLI may not be good enough any more. Changes in runtime or build dependency on 3.0 JSON are some example of braking changes. Following steps outline steps to update published CLI to what build needs. It will require other system where builds is supported. As close similarity and availability Linux will be used in examples bellow but Windows or MacOS should also yield same result. +As build changes, previous versions of CLI may not be good enough any more. Changes in runtime or build dependency on 3.0 JSON are some example of braking changes. Following steps outline steps to update published CLI to what build needs. It will require other system where builds is supported. As close similarity and availability Linux will be used in examples bellow but Windows or MacOS should also yield same result. Often build would ask for slightly different version without actually have real dependency on it (that is part of rolling updates across repos). One can cheat in this case and simply: @@ -138,12 +138,12 @@ One can cheat in this case and simply: ln -s ~/dotnet/sdk/old_version ~/dotnet/sdk/new_version ``` - + ### Finding versions and commit hashes -First we need to find what version are are trying to recreate. That is 'sdk' section in global.json in each repo. As of preview9ih time, this is set to 3.0.100-preview6-012264 and such version will be used in examples. One advantage of using release branches is that it is in coherent state e.g. all repos should need exactly same version. +First we need to find what version are are trying to recreate. That is 'sdk' section in global.json in each repo. As of preview9ih time, this is set to 3.0.100-preview6-012264 and such version will be used in examples. One advantage of using release branches is that it is in coherent state e.g. all repos should need exactly same version. -Let's get SDK for supported OS. Sync code base to same version you are trying to build on FreeBSD. +Let's get SDK for supported OS. Sync code base to same version you are trying to build on FreeBSD. ``` ./eng/common/build.sh --restore Downloading 'https://dot.net/v1/dotnet-install.sh' @@ -186,7 +186,7 @@ cd core-sdk git checkout be3f0c1a03f80492d45396c9f5b855b10a8a0b79 ``` -Set variables and assemble SKD without crossgen. (set DropSuffix=true to strip `preview6` from version). +Set variables and assemble SKD without crossgen. (set DropSuffix=true to strip `preview6` from version). ``` export DISABLE_CROSSGEN=true export CLIBUILD_SKIP_TESTS=true @@ -212,7 +212,7 @@ cd coreclr git checkout 7ec87b0097fdd4400a8632a2eae56612914579ef ``` -and build +and build ``` mkdir -p .dotnet curl https://dotnetcli.blob.core.windows.net/dotnet/Sdk/master/dotnet-sdk-latest-freebsd-x64.tar.gz | tar xfz - -C .dotnet @@ -247,7 +247,7 @@ git checkout d47cae744ddfb625db8e391cecb261e4c3d7bb1c ``` #### Building core-setup -As this has very little platform dependency it is unlikely this needs to be touched. +As this has very little platform dependency it is unlikely this needs to be touched. If we want to do this to pick up fix or for consistency than ... TBD ``` diff --git a/docs/workflow/building/libraries/webassembly-instructions.md b/docs/workflow/building/libraries/webassembly-instructions.md index c3195a30665e..973047b2d54c 100644 --- a/docs/workflow/building/libraries/webassembly-instructions.md +++ b/docs/workflow/building/libraries/webassembly-instructions.md @@ -14,7 +14,7 @@ export EMSDK_PATH=PATH_TO_SDK_INSTALL/emsdk ## Building everything -At this time no other build configurations are necessary to start building for WebAssembly. The CoreLib for WebAssembly build configurations will be built by default using the WebAssembly configuration shown below. +At this time no other build configurations are necessary to start building for WebAssembly. The CoreLib for WebAssembly build configurations will be built by default using the WebAssembly configuration shown below. This document explains how to work on libraries. In order to work on library projects or run library tests it is necessary to have built the runtime to give the libraries something to run on. If you haven't already done so, please read [this document](../../README.md#Configurations) to understand configurations. @@ -34,12 +34,12 @@ The libraries build contains some native code. This includes shims over libc, op - Building in debug mode for platform wasm and Browser operating system ```bash -./build.sh --arch wasm --os Browser --subset Libs.Native --configuration Debug +./build.sh libs.native --arch wasm --os Browser ``` - Building in release mode for platform wasm and Browser operating system ```bash -./build.sh --arch wasm --os Browser --subset Libs.Native --configuration Release +./build.sh libs.native --arch wasm --os Browser -c Release ``` ## How to build mono System.Private.CoreLib @@ -48,20 +48,19 @@ If you are working on core parts of mono libraries you will probably need to bui ```bash -./build.sh --arch wasm --os Browser --configuration release --subset Mono +./build.sh mono --arch wasm --os Browser -c Release ``` To build just SPC without mono you can use the Mono.CoreLib subset. ```bash -./build.sh --arch wasm --os Browser --configuration release --subset Mono.CoreLib +./build.sh mono.corelib --arch wasm --os Browser -c Release ``` - Building the managed libraries as well: ```bash -./build.sh --arch wasm --os Browser --configuration release --subset Mono+Libs +./build.sh mono+libs --arch wasm --os Browser -c Release ``` ## Building individual libraries @@ -71,16 +70,16 @@ Individual projects and libraries can be build by specifying the build configura Building individual libraries **Examples** -- Build all projects for a given library (e.g.: System.Net.Http) including running the tests +- Build all projects for a given library (e.g.: System.Net.Http) including the tests ```bash - ./build.sh --arch wasm --os Browser --configuration release --projects src/libraries/System.Net.Http/System.Net.Http.sln + ./build.sh --arch wasm --os Browser -c Release --projects src/libraries/System.Net.Http/System.Net.Http.sln ``` - Build only the source project of a given library (e.g.: System.Net.Http) ```bash - ./build.sh --arch wasm --os Browser --configuration release --projects src/libraries/System.Net.Http/src/System.Net.Http.csproj + ./build.sh --arch wasm --os Browser -c Release --projects src/libraries/System.Net.Http/src/System.Net.Http.csproj ``` More information and examples can be found in the [libraries](./README.md#building-individual-libraries) document. @@ -91,7 +90,7 @@ The WebAssembly implementation files are built and made available in the artifac For Linux and MacOSX: ```bash -./dotnet.sh build /p:Configuration=Debug|Release /p:TargetArchitecture=wasm /p:TargetOS=Browser src/libraries/src.proj /t:NativeBinPlace +./dotnet.sh build /p:Configuration=Debug|Release /p:TargetArchitecture=wasm /p:TargetOS=Browser src/libraries/src.proj /t:BuildWasmRuntimes ``` __Note__: A `Debug` build sets the following environment variables by default. When built from the command line this way the `Configuration` value is case sensitive. @@ -105,7 +104,7 @@ __Note__: A `Debug` build sets the following environment variables by default. #### Example: ``` -L: GC_MAJOR_SWEEP: major size: 752K in use: 39K +L: GC_MAJOR_SWEEP: major size: 752K in use: 39K L: GC_MAJOR: (user request) time 3.00ms, stw 3.00ms los size: 0K in use: 0K ``` @@ -126,7 +125,7 @@ First update emscripten version in the [webassembly Dockerfile](https://github.c ENV EMSCRIPTEN_VERSION=1.39.16 ``` -Submit a PR request with the updated version, wait for all checks to pass and for the request to be merged. A [master.json file](https://github.com/dotnet/versions/blob/master/build-info/docker/image-info.dotnet-dotnet-buildtools-prereqs-docker-master.json#L1126) will be updated with the a new docker image. +Submit a PR request with the updated version, wait for all checks to pass and for the request to be merged. A [master.json file](https://github.com/dotnet/versions/blob/master/build-info/docker/image-info.dotnet-dotnet-buildtools-prereqs-docker-master.json#L1126) will be updated with the a new docker image. ``` { @@ -155,4 +154,4 @@ container: registry: mcr ``` -Open a PR request with the new image. \ No newline at end of file +Open a PR request with the new image. diff --git a/docs/workflow/building/mono/README.md b/docs/workflow/building/mono/README.md index 276ae512ceda..0ac8cf2e6bcf 100644 --- a/docs/workflow/building/mono/README.md +++ b/docs/workflow/building/mono/README.md @@ -10,19 +10,19 @@ Before proceeding further, please click on the link above that matches your mach ## Concept -To build the Mono runtime, you must first do a complete runtime build (coreclr, libraries, and then mono). At the repo root, simply execute: +To build a complete runtime environment, you need to build both the Mono runtime and libraries. At the repo root, simply execute: ```bash -./build.sh +./build.sh --subset mono+libs ``` or on Windows, ```bat -build.cmd +build.cmd -subset mono+libs ``` Note that the debug configuration is the default option. It generates a 'debug' output and that includes asserts, fewer code optimizations, and is easier for debugging. If you want to make performance measurements, or just want tests to execute more quickly, you can also build the 'release' version which does not have these checks by adding the flag `-configuration release` (or `-c release`). - -Once you've built the whole runtime and assuming you want to work with just mono, you want to use the following command: + +Once you've built the complete runtime and assuming you want to work with just mono, you want to use the following command: ```bash ./build.sh --subset mono @@ -40,7 +40,7 @@ Here are a list of build arguments that may be of use: `/p:MonoEnableLlvm=true /p:MonoLLVMDir=path/to/llvm` - Builds mono w/ LLVM from a custom path -`/p:MonoEnableLlvm=true /p:MonoLLVMDir=path/to/llvm /p:MonoLLVMUseCxx11Abi=true` - Builds mono w/ LLVM +`/p:MonoEnableLlvm=true /p:MonoLLVMDir=path/to/llvm /p:MonoLLVMUseCxx11Abi=true` - Builds mono w/ LLVM from a custom path (and that LLVM was built with C++11 ABI) For `build.sh` @@ -49,6 +49,34 @@ For `build.sh` The build has a number of options that you can learn about using build -?. +### WebAssembly + +In addition to the normal build requirements, WebAssembly builds require a local emsdk to be downloaded. This can either be external or acquired via a make target. + +To acquire it externally, move to a directory outside of the runtime repository and run: +```bash +git clone https://github.com/emscripten-core/emsdk.git +``` + +To use the make target, from the root of the runtime repo: +```bash +cd src/mono/wasm +make provision-wasm +cd ../../.. +``` + +When building for WebAssembly, regardless of the machine architecture, you must set the `EMSDK_PATH` environmental variable and architecture/os, calling build.sh like so: +```bash +EMSDK_PATH={path to emsdk repo} ./build.sh --subset mono+libs --arch wasm --os browser -c release +``` + +If using the locally provisioned emsdk, this will be: +```bash +EMSDK_PATH={path to runtime repo}/src/mono/wasm/emsdk ./build.sh --subset mono+libs --arch wasm --os browser -c release +``` + +Artifacts will be placed in `artifacts/bin/microsoft.netcore.app.runtime.browser-wasm/Release/`. When rebuilding with `build.sh`, you _must_ rebuild with `mono+libs` even for mono-only changes, or this directory will not be updated. Alternative, you can rebuild just the runtime-specific bits from the `src/mono/wasm` directory by running either `make runtime` or `make corlib` when modifying Mono or System.Private.CoreLib respectively. + ## Packages To generate nuget packages: @@ -68,7 +96,7 @@ The following packages will be created under `artifacts\packages\ - `transport.Microsoft.NETCore.Runtime.Mono.-dev..1.nupkg` - `transport.runtime..Microsoft.NETCore.Runtime.Mono.-dev..1.nupkg` -## Important Notes +## Important Notes Test binaries are not yet available for mono. diff --git a/docs/workflow/ci/coreclr-ci-health.md b/docs/workflow/ci/coreclr-ci-health.md index 5e095b6af444..8aa9c5b0e7e9 100644 --- a/docs/workflow/ci/coreclr-ci-health.md +++ b/docs/workflow/ci/coreclr-ci-health.md @@ -4,7 +4,7 @@ Note that this document focuses on coreclr testing in `dotnet/runtime`. -https://github.com/dotnet/coreclr/issues/27231 was opened as a way to simply view in one place all issues that are affecting `dotnet/runtime`'s CI. +https://github.com/dotnet/runtime/issues/702 was opened as a way to simply view in one place all issues that are affecting `dotnet/runtime`'s CI. ## TOC @@ -15,7 +15,7 @@ https://github.com/dotnet/coreclr/issues/27231 was opened as a way to simply vie #### Terminology -In order to follow some of the terminology used, there is an expected familiarity of Azure DevOps required. For an in depth guide with Azure DevOps pipeline definitions, please see: https://docs.microsoft.com/en-us/azure/devops/pipelines/yaml-schema?view=azure-devops&tabs=schema. +In order to follow some of the terminology used, there is an expected familiarity of Azure DevOps required. For an in depth guide with Azure DevOps pipeline definitions, please see: https://docs.microsoft.com/en-us/azure/devops/pipelines/yaml-schema?view=azure-devops&tabs=schema. The most common terminology and most important are the different containers work happens in. @@ -25,7 +25,7 @@ The most common terminology and most important are the different containers work `Job`: Jobs are the smallest unit of work which happen on a unique machine. Jobs by default run in parallel, but may be set to depend on another job. **Every job executes its work on a unique machine**. -`Steps`: Steps are the smallest unit of work, they generally correspond to one command that will happen in a job. Normally a job contains steps, which execute serially. +`Steps`: Steps are the smallest unit of work, they generally correspond to one command that will happen in a job. Normally a job contains steps, which execute serially. ## CI Overview @@ -157,7 +157,7 @@ This tracks the overall end to end run time of a pipeline. This graph is useful Specifically the query is useful for finding out whether a specific Helix Queue (a group of machines) is overloaded or not. This is useful for diagnosing arm hardware issues, because we have a fixed amount that is easily overloaded. ``` -WorkItems +WorkItems | where QueueName == "ubuntu.1804.armarch.open" | extend DaysAgo = datetime_diff('Day', now(), Queued) | extend QueueTimeInSeconds = datetime_diff('Second', Started, Queued) diff --git a/docs/workflow/debugging/libraries/debugging-packages.md b/docs/workflow/debugging/libraries/debugging-packages.md index 5b25a1ffb14f..f72887149b7f 100644 --- a/docs/workflow/debugging/libraries/debugging-packages.md +++ b/docs/workflow/debugging/libraries/debugging-packages.md @@ -14,13 +14,13 @@ Debugging CoreFX build issues (This documentation is work in progress.) -I found the following process to help when investigating some of the build issues caused by incorrect packaging. +I found the following process to help when investigating some of the build issues caused by incorrect packaging. -To quickly validate if a given project compiles on all supported configurations use `dotnet build /t:RebuildAll`. This applies for running tests as well. For more information, see [Building individual libraries](../../building/libraries/README.md#building-individual-libraries) +To quickly validate if a given project compiles on all supported configurations use `dotnet build /t:RebuildAll`. This applies for running tests as well. For more information, see [Building individual libraries](../../building/libraries/README.md#building-individual-libraries) Assuming the current directory is `\src\contractname\`: -1. Build the `\ref` folder: `dotnet build` +1. Build the `\ref` folder: `dotnet build` Check the logs for output such as: @@ -58,7 +58,7 @@ Use the same technique above to ensure that the binaries include the correct imp Ensure that all Build Pivots are actually being built. This should build all .\ref and .\src variations as well as actually creating the NuGet packages. -Verify that the contents of the nuspec as well as the actual package is correct. You can find the packages by searching for the following pattern in the msbuild output: +Verify that the contents of the nuspec as well as the actual package is correct. You can find the packages by searching for the following pattern in the msbuild output: ``` GetPkgProjPackageDependencies: diff --git a/docs/workflow/debugging/libraries/windows-instructions.md b/docs/workflow/debugging/libraries/windows-instructions.md index ec075d8b10bd..46d916e74da8 100644 --- a/docs/workflow/debugging/libraries/windows-instructions.md +++ b/docs/workflow/debugging/libraries/windows-instructions.md @@ -25,16 +25,16 @@ As Administrator: windbg -I ``` -You may need to do this for both x64 and x86 versions. +You may need to do this for both x64 and x86 versions. Any application that crashes should now automatically start a WinDBG session. ## Debugging tests To run a single test from command line: -* Locate the test binary folder based on the CSPROJ name. +* Locate the test binary folder based on the CSPROJ name. For example: `src\System.Net.Sockets\tests\Functional\System.Net.Sockets.Tests.csproj` will build and output binaries at `bin\tests\Windows_NT.AnyCPU.Debug\System.Net.Sockets.Tests\netcoreapp1.0`. - + * Execute the test Assuming that your repo is at `C:\corefx`: @@ -44,7 +44,7 @@ cd C:\corefx\bin\tests\Windows_NT.AnyCPU.Debug\System.Net.Sockets.Tests\netcorea C:\corefx\bin\tests\Windows_NT.AnyCPU.Debug\System.Net.Sockets.Tests\netcoreapp1.0\CoreRun.exe xunit.console.dll System.Net.Sockets.Tests.dll -xml testResults.xml -notrait category=nonwindowstests -notrait category=OuterLoop -notrait category=failing ``` -* If the test crashes or encounters a `Debugger.Launch()` method call, WinDBG will automatically start and attach to the `CoreRun.exe` process +* If the test crashes or encounters a `Debugger.Launch()` method call, WinDBG will automatically start and attach to the `CoreRun.exe` process The following commands will properly configure the debugging extension and fix symbol and source-code references: @@ -129,7 +129,7 @@ Logs are going to be placed in %SYSTEMDRIVE%\sockets.etl. 1. Install [PerfView](https://github.com/Microsoft/perfview/blob/master/documentation/Downloading.md) 2. Run PerfView as Administrator -3. Press Alt+C to collect events +3. Press Alt+C to collect events 4. Disable all other collection parameters 5. Add Additional Providers (see below - Important: keep the "*" wildcard before the names.) @@ -137,7 +137,7 @@ Logs are going to be placed in %SYSTEMDRIVE%\sockets.etl. ### Built-in EventSource tracing -The following EventSources are built-in to CoreFX. The ones that are not marked as [__TestCode__] can be enabled in production scenarios for log collection. +The following EventSources are built-in to CoreFX. The ones that are not marked as [__TestCode__] can be enabled in production scenarios for log collection. #### Global * `*System.Diagnostics.Eventing.FrameworkEventSource {8E9F5090-2D75-4d03-8A81-E5AFBF85DAF1}`: Global EventSource used by multiple namespaces. @@ -169,5 +169,5 @@ Helper scripts are available at https://github.com/dotnet/runtime/tree/master/sr * `*System.Threading.Tasks.Parallel.EventSource`: Provides an event source for tracing TPL information. * `*System.Threading.Tasks.Dataflow.DataflowEventSource {16F53577-E41D-43D4-B47E-C17025BF4025}`: Provides an event source for tracing Dataflow information. -## Notes +## Notes * You can find the test invocation command-line by looking at the logs generated after the `dotnet build /t:test` within the test folder. diff --git a/docs/workflow/requirements/freebsd-requirements.md b/docs/workflow/requirements/freebsd-requirements.md index 59b3c64d928f..66e50ec878b3 100644 --- a/docs/workflow/requirements/freebsd-requirements.md +++ b/docs/workflow/requirements/freebsd-requirements.md @@ -21,7 +21,7 @@ with all needed prerequisites to build. As the example bellow may become stale, ```sh TAG=mcr.microsoft.com/dotnet-buildtools/prereqs:ubuntu-18.04-cross-freebsd-11-20200430154008-a84b0d2 -docker run --rm --volume $(pwd):$(pwd) --workdir $(pwd) --env ROOTFS_DIR=/crossrootfs/x64 -ti $TAG ./build.sh -cross -FreeBSD +docker run --rm --volume $(pwd):$(pwd) --workdir $(pwd) --env ROOTFS_DIR=/crossrootfs/x64 -ti $TAG ./build.sh -cross -os FreeBSD ``` Build using Toolchain Setup diff --git a/docs/workflow/requirements/linux-requirements.md b/docs/workflow/requirements/linux-requirements.md index 0cdd9cc39978..b6dee2b8baf2 100644 --- a/docs/workflow/requirements/linux-requirements.md +++ b/docs/workflow/requirements/linux-requirements.md @@ -55,8 +55,13 @@ The following dependencies are needed if Mono Runtime is enabled (default behavi - autoconf - automake -- libtool +- libtool - ~$ sudo apt-get install cmake llvm-9 clang-9 autoconf automake libtool build-essential python curl git lldb-6.0 liblldb-6.0-dev libunwind8 libunwind8-dev gettext libicu-dev liblttng-ust-dev libssl-dev libnuma-dev libkrb5-dev zlib1g-dev +``` +sudo apt-get install -y cmake llvm-9 clang-9 autoconf automake \ +libtool build-essential python curl git lldb-6.0 liblldb-6.0-dev \ +libunwind8 libunwind8-dev gettext libicu-dev liblttng-ust-dev \ +libssl-dev libnuma-dev libkrb5-dev zlib1g-dev +``` You now have all the required components. diff --git a/docs/workflow/testing/coreclr/running-aspnet-benchmarks-with-crossgen2.md b/docs/workflow/testing/coreclr/running-aspnet-benchmarks-with-crossgen2.md index 55ffc5f7459a..4d5c20b50f08 100644 --- a/docs/workflow/testing/coreclr/running-aspnet-benchmarks-with-crossgen2.md +++ b/docs/workflow/testing/coreclr/running-aspnet-benchmarks-with-crossgen2.md @@ -84,10 +84,10 @@ profiles: cores: 12 jobs: application: - endpoints: + endpoints: - http://asp-perf-win:5001 load: - endpoints: + endpoints: - http://asp-perf-load:5001 aspnet-physical-lin: @@ -96,18 +96,18 @@ profiles: cores: 12 jobs: application: - endpoints: + endpoints: - http://asp-perf-lin:5001 load: - endpoints: + endpoints: - http://asp-perf-load:5001 ``` Now, what does this configuration mean and how is it applied? Let's go over the most important fields to understand its main functionality. -* **Imports**: These are external tools hosted in the Benchmarks repo. -In this case, we only need `wrk`, which is a tool that loads and tests +* **Imports**: These are external tools hosted in the Benchmarks repo. +In this case, we only need `wrk`, which is a tool that loads and tests performance in Web applications. * **Jobs**: Here go the job descriptions. A job in this context is the set of diff --git a/docs/workflow/testing/libraries/testing-android.md b/docs/workflow/testing/libraries/testing-android.md index 7ac22f84062f..5ad1b27f7900 100644 --- a/docs/workflow/testing/libraries/testing-android.md +++ b/docs/workflow/testing/libraries/testing-android.md @@ -88,4 +88,4 @@ Or simply open `logcat` window in Android Studio or Visual Stuido. ### Existing Limitations - `-os Android` is not supported for Windows yet (`WSL` can be used instead) - XHarness.CLI is not able to boot emulators yet (so you need to boot via `AVD Manager` or IDE) -- AOT and Interpreter modes are not supported yet \ No newline at end of file +- AOT and Interpreter modes are not supported yet diff --git a/docs/workflow/testing/libraries/testing-wasm.md b/docs/workflow/testing/libraries/testing-wasm.md index 8e067e9f6e49..8c08a729074f 100644 --- a/docs/workflow/testing/libraries/testing-wasm.md +++ b/docs/workflow/testing/libraries/testing-wasm.md @@ -7,14 +7,14 @@ In order to be able to run tests, the following JavaScript engines should be ins They can be installed as a part of [jsvu](https://github.com/GoogleChromeLabs/jsvu). -Please make sure that a JavaScript engine binary is available via command line, +Please make sure that a JavaScript engine binary is available via command line, e.g. for V8: ```bash $ v8 V8 version 8.5.62 ``` -If you use `jsvu`, first add its location to PATH variable +If you use `jsvu`, first add its location to PATH variable e.g. for V8 ```bash @@ -35,14 +35,14 @@ and even run tests one by one for each library: ### Running individual test suites The following shows how to run tests for a specific library ``` -./dotnet.sh build /t:Test src/libraries/System.AppContext/tests /p:TargetOS=Browser /p:TargetArchitecture=wasm /p:Configuration=release +./dotnet.sh build /t:Test src/libraries/System.AppContext/tests /p:TargetOS=Browser /p:TargetArchitecture=wasm /p:Configuration=Release ``` ### Running tests using different JavaScript engines -It's possible to set a JavaScript engine explicitly by adding `/p:JSEngine` property: +It's possible to set a JavaScript engine explicitly by adding `/p:JSEngine` property: ``` -./dotnet.sh build /t:Test src/libraries/System.AppContext/tests /p:TargetOS=Browser /p:TargetArchitecture=wasm /p:Configuration=release /p:JSEngine=SpiderMonkey +./dotnet.sh build /t:Test src/libraries/System.AppContext/tests /p:TargetOS=Browser /p:TargetArchitecture=wasm /p:Configuration=Release /p:JSEngine=SpiderMonkey ``` At the moment supported values are: diff --git a/docs/workflow/testing/mono/testing.md b/docs/workflow/testing/mono/testing.md index e710b77e00b4..1bffa1de3389 100644 --- a/docs/workflow/testing/mono/testing.md +++ b/docs/workflow/testing/mono/testing.md @@ -1,10 +1,10 @@ # Running Tests using Mono Runtime ## Running Runtime Tests -We currently only support running tests against coreclr. There are additional mono runtime tests in mono/mono, but they +We currently only support running tests against coreclr. There are additional mono runtime tests in mono/mono, but they have not been moved over yet. Simply run the following command: -``` +``` dotnet build /t:RunCoreClrTests $(REPO_ROOT)/src/mono/mono.proj ``` @@ -36,8 +36,8 @@ dotnet build /t:Test /p:RuntimeFlavor=mono ``` # Patching Local dotnet (.dotnet-mono) -Another way to test mono out is by 'patching' a local dotnet with our runtime bits. This is a good way to write simple -test programs and get a glimpse of how mono will work with the dotnet tooling. +Another way to test mono out is by 'patching' a local dotnet with our runtime bits. This is a good way to write simple +test programs and get a glimpse of how mono will work with the dotnet tooling. To generate a local .dotnet-mono, execute this command: @@ -51,4 +51,4 @@ You can then, for example, run our HelloWorld sample via: dotnet build -c Release $(REPO_ROOT)/src/mono/netcore/sample/HelloWorld MONO_ENV_OPTIONS="" COMPlus_DebugWriteToStdErr=1 \ $(REPO_ROOT)/.dotnet-mono/dotnet $(REPO_ROOT)/src/mono/netcore/sample/HelloWorld/bin/HelloWorld.dll -``` \ No newline at end of file +``` diff --git a/docs/workflow/testing/using-your-build.md b/docs/workflow/testing/using-your-build.md index 31a7f6bc8d3f..33e5ef19e174 100644 --- a/docs/workflow/testing/using-your-build.md +++ b/docs/workflow/testing/using-your-build.md @@ -1,7 +1,7 @@ # Using your .NET Runtime Build -We assume that you have successfully built CoreCLR repository and thus have files of the form +We assume that you have successfully built the repository and thus have files of the form ``` ~/runtime/artifacts/bin/coreclr/../ ``` @@ -11,7 +11,7 @@ a 'host' program that will load the Runtime as well as all the other .NET librar code that your application needs. The easiest way to get all this other stuff is to simply use the standard 'dotnet' host that installs with .NET SDK. -The released version of 'dotnet' tool may not be compatible with the live CoreCLR repository. The following steps +The released version of 'dotnet' tool may not be compatible with the live repository. The following steps assume use of a dogfood build of the .NET SDK. ## Acquire the latest nightly .NET SDK @@ -157,7 +157,7 @@ Assert failure(PID 13452 [0x0000348c], Thread: 10784 [0x2a20]): Consistency chec ## Using .NET SDK to run your .NET Application If you don't like the idea of copying files manually you can follow [these instructions](../using-dotnet-cli.md) to use dotnet cli to do this for you. -However the steps described here are the simplest and most commonly used by CoreCLR developers for ad-hoc testing. +However the steps described here are the simplest and most commonly used by runtime developers for ad-hoc testing. ## Using CoreRun to run your .NET Application diff --git a/docs/workflow/testing/visualstudio.md b/docs/workflow/testing/visualstudio.md index c8c20b9cfc7f..b519aa3a400a 100644 --- a/docs/workflow/testing/visualstudio.md +++ b/docs/workflow/testing/visualstudio.md @@ -1,9 +1,25 @@ -# Visual Studio Test Explorer support -For Visual Studio Test Explorer to work in dotnet/runtime, the following test settings need to be enabled: -- Test parameters (like which `dotnet` host to use) are persisted in an auto-generated .runsettings file. For that to work, make sure that the "Auto detect runsettings Files" (`Options -> Test`) option is enabled. -- Make sure that the "Processor Architecture for AnyCPU project" (`Test Explore pane -> Test Explorer toolbar options --> Settings`) value is set to `auto`. +# Working in dotnet/runtime using Visual Studio + +Visual Studio is a great tool to use when working in the dotnet/runtime repo. + +Almost all its features should work well, but there are a few special considerations to bear in mind: + +## Test Explorer + +You can run tests from the Visual Studio Test Explorer, but there are a few settings you need: +- Enable `Auto detect runsettings Files` (`Test Explorer window -> Settings button -> Options`). Test parameters (like which `dotnet` host to use) are persisted in an auto-generated .runsettings file, and it's important that Visual Studio knows to use it. +- Set `Processor Architecture for AnyCPU project` to `auto` (`Test Explorer window -> Settings button`). +- Consider whether to disable `Discover tests in real time from C# and Visual Basic .NET source files` (`Test explorer window -> Settings button -> Options`). + - You may want it enabled if you're actively writing new tests and want them to show up in Test Explorer without building first. + - You may want it disabled if you're mostly running existing tests, and some of them have conditional attributes. Many of our unit tests have attributes, like `[SkipOnTargetFramework]`, to indicate that they're only valid in certain configurations. Because the real-time discovery feature does not currently recognize these attributes the tests will show up in Test Explorer as well, and fail or possibly hang when you try to run them. +- Consider whether to enable `Run tests in Parallel` (`Test Explorer window -> Settings button`). + - You may want it enabled if some of the unit tests you're working with run slowly or there's many of them. + - You may want it disabled if you want to simplify debugging or viewing debug output. + +If you encounter puzzling behavior while running tests within Visual Studio, first check the settings above, verify they run correctly from the command line, and also make sure you're using the latest Visual Studio. It can be helpful to enable detailed logging of the test runner (`Test explorer window -> Settings button -> Options > Logging Level: Trace`) - it may suggest the problem, or at least provide more information to share. + +## Start with Debugging (F5) -# Visual Studio F5 Debugging support dotnet/runtime uses `dotnet test` ([VSTest](https://github.com/Microsoft/vstest)) which spawns child processes during test execution. Visual Studio by default doesn't automatically debug child processes, therefore preliminary steps need to be done to enable Debugging "F5" support. Note that these steps aren't necessary for Visual Studio Test Explorer support. @@ -14,3 +30,4 @@ Note that these steps aren't necessary for Visual Studio Test Explorer support. ## References - https://github.com/dotnet/project-system/issues/6176 tracks enabling the native code debugging functionality for multiple projects without user interaction. - https://github.com/dotnet/sdk/issues/7419#issuecomment-298261617 explains the necessary steps to install and enable the mentioned extension in more detail. +- https://github.com/microsoft/vstest/ is the repo for issues with the Visual Studio test execution features. diff --git a/docs/workflow/using-dotnet-cli.md b/docs/workflow/using-dotnet-cli.md index 7c9efa28c829..8090f0eddc2c 100644 --- a/docs/workflow/using-dotnet-cli.md +++ b/docs/workflow/using-dotnet-cli.md @@ -1,23 +1,23 @@ # Using your .NET Runtime build with .NET SDK -This walkthrough explains how to run against your local CoreCLR build using .NET SDK only. +This walkthrough explains how to run your own app against your local build using only the .NET SDK. For other walkthroughs see: -- [Using Your Build - Update CoreCLR from raw binary output](./testing/using-your-build.md) +- [Using Your Build - Update from raw build output](./testing/using-your-build.md) - [Using CoreRun To Run .NET Application](./testing/using-corerun.md) - [Dogfooding .NET SDK](https://github.com/dotnet/runtime/blob/master/docs/project/dogfooding.md). ## Prerequisites -1. Successfully built CoreCLR repository and thus have files of the form shown below. From now on we call this folder NuGet package folder. +1. Successfully built this repository and thus have files of the form shown below. From now on we call this folder NuGet package folder. ``` - artifacts\bin\coreclr\..\.nuget\pkg\runtime.-.Microsoft.NETCore.Runtime.CoreCLR..nupkg + artifacts\packages\\Shipping\ ``` -2. Acquired the latest nightly .NET SDK from [here](https://github.com/dotnet/cli/blob/master/README.md#installers-and-binaries) and added it's root folder to your [path](requirements/windows-requirements.md#adding-to-the-default-path-variable) +2. Acquired the latest nightly .NET SDK from [here](https://github.com/dotnet/installer) and added its root folder to your [path](requirements/windows-requirements.md#adding-to-the-default-path-variable) ## First Run @@ -29,13 +29,11 @@ From now on all instructions relate to this folder as "app folder". ### 2. Create NuGet.Config file -The build script creates NuGet packages and puts them to `artifacts\bin\coreclr\..\.nuget\pkg\`. .NET SDK has no idea about its existence and we need to tell it where to search for the packages. +The build script creates NuGet packages and puts them to `artifacts\packages\\Shipping\`. .NET SDK has no idea about its existence and we need to tell it where to search for the packages. Please run `dotnet new nugetconfig` in the app folder and update the created `NuGet.Config` file: -* **set path to local CoreCLR NuGet folder!!** -* add address to dotnet core tools NuGet feed - +* ** adjust path below to point to your in-repo NuGet folder** ```xml @@ -44,11 +42,9 @@ Please run `dotnet new nugetconfig` in the app folder and update the created `Nu - - + - ``` ### 3. Create and update the Project file @@ -60,39 +56,37 @@ Please run `dotnet new console` in the app folder and update the created `.cspro Exe - netcoreapp3.0 + net5.0 win-x64 - 3.0.0-preview1-26210-0 - - +
``` -**You have to set the correct values for `RuntimeIdentifier` (RI), `RuntimeFrameworkVersion` and versions of both packages.** +**You have to set the correct values for `RuntimeIdentifier` (RI) and `RuntimeFrameworkVersion`.** You can generally figure that out by looking at the packages you found in your output. -In our example you will see there is a package with the name `runtime.win-x64.Microsoft.NETCore.Runtime.CoreCLR.3.0.0-preview1-26210-0.nupkg` +In our example you will see there is a package with the name `Microsoft.NETCore.App.Runtime.win-x64.5.0.0-dev.nupkg` ``` -runtime.win-x64.Microsoft.NETCore.Runtime.CoreCLR.3.0.0-preview1-26210-0.nupkg - ^--RI---^ ^--------version-------^ +Microsoft.NETCore.App.Runtime.win-x64.5.0.0-dev.nupkg + ^-RI--^ ^version^ ``` ### 4. Change Program.cs -To make sure that you run against your local coreclr build please change your `Main` method in `Program.cs` file to: +To make sure that you run against your local build of this repo please change your `Main` method in `Program.cs` file to: ```cs static void Main(string[] args) { - var coreAssemblyInfo = System.Diagnostics.FileVersionInfo.GetVersionInfo(typeof(object).Assembly.Location); - Console.WriteLine($"Hello World from Core {coreAssemblyInfo.ProductVersion}"); - Console.WriteLine($"The location is {typeof(object).Assembly.Location}"); + var coreAssemblyInfo = System.Diagnostics.FileVersionInfo.GetVersionInfo(typeof(object).Assembly.Location); + Console.WriteLine($"Hello World from .NET {coreAssemblyInfo.ProductVersion}"); + Console.WriteLine($"The location is {typeof(object).Assembly.Location}"); } ``` @@ -108,40 +102,56 @@ dotnet publish Make sure that restoring done by `dotnet publish` installed the explicit version of the Runtime that you have specified: ``` -PS C:\coreclr\helloWorld> dotnet publish - Restoring packages for C:\coreclr\helloWorld\helloWorld.csproj... - Installing runtime.win-x64.Microsoft.NETCore.Runtime.CoreCLR 3.0.0-preview1-26210- +c:\runtime\helloworld>dotnet publish +Microsoft (R) Build Engine version 16.7.0-preview-20360-03+188921e2f for .NET +Copyright (C) Microsoft Corporation. All rights reserved. + + Determining projects to restore... + Restored c:\runtime\helloworld\helloworld.csproj (in 114 ms). + You are using a preview version of .NET. See: https://aka.ms/dotnet-core-preview + helloworld -> c:\runtime\helloworld\bin\Debug\net5.0\win-x64\helloworld.dll + helloworld -> c:\runtime\helloworld\bin\Debug\net5.0\win-x64\publish\ ``` If you see something like the message below it means that it has failed to restore your local runtime packages. In such case double check your `NuGet.config` file and paths used in it. ``` -C:\coreclr\helloWorld\helloWorld.csproj : warning NU1603: helloWorld depends on runtime.win-x64.Microsoft.NETCore.Runtime.CoreCLR (>= 3.0.0-preview1-26210-0) but runtime.win-x64.Microsoft.NETCore.Runtime.CoreCLR 3.0.0-preview1-26210-0 was not found. An approximate best match of runtime.win-x64.Microsoft.NETCore.Runtime.CoreCLR 3.0.0-preview2-25501-02 was resolved. +c:\runtime\helloworld>dotnet publish +Microsoft (R) Build Engine version 16.7.0-preview-20360-03+188921e2f for .NET +Copyright (C) Microsoft Corporation. All rights reserved. + + Determining projects to restore... +c:\runtime\helloworld\helloworld.csproj : error NU1102: Unable to find package Microsoft.NETCore.App.Runtime.win-x64 with version (= 5.0.0-does-not-exist) +c:\runtime\helloworld\helloworld.csproj : error NU1102: - Found 25 version(s) in nuget [ Nearest version: 5.0.0-preview.1.20120.5 ] +c:\runtime\helloworld\helloworld.csproj : error NU1102: - Found 1 version(s) in local runtime [ Nearest version: 5.0.0-dev ] +c:\runtime\helloworld\helloworld.csproj : error NU1102: Unable to find package Microsoft.NETCore.App.Host.win-x64 with version (= 5.0.0-does-not-exist) +c:\runtime\helloworld\helloworld.csproj : error NU1102: - Found 27 version(s) in nuget [ Nearest version: 5.0.0-preview.1.20120.5 ] +c:\runtime\helloworld\helloworld.csproj : error NU1102: - Found 1 version(s) in local runtime [ Nearest version: 5.0.0-dev ] + Failed to restore c:\runtime\helloworld\helloworld.csproj (in 519 ms). ``` ### 6. Run the app -After you publish you will find all the binaries needed to run your application under `bin\Debug\netcoreapp3.0\win-x64\publish\`. +After you publish you will find all the binaries needed to run your application under `bin\Debug\net5.0\win-x64\publish\`. To run the application simply run the EXE that is in this publish directory (it is the name of the app, or specified in the project file). ``` -.\bin\Debug\netcoreapp3.0\win-x64\publish\HelloWorld.exe +.\bin\Debug\net5.0\win-x64\publish\HelloWorld.exe ``` -Running the app should tell you the version and which user and machine build the assembly as well as the commit hash of the code -at the time of building: +Running the app should tell you the version and where the location of System.Private.CoreLib in the publish directory: ``` -Hello World from Core 4.6.26210.0 @BuiltBy: adsitnik-MININT-O513E3V @SrcCode: https://github.com/dotnet/runtime/tree/3d6da797d1f7dc47d5934189787a4e8006ab3a04 -The location is C:\coreclr\helloWorld\bin\Debug\netcoreapp3.0\win-x64\publish\System.Private.CoreLib.dll +Hello World from .NET 5.0.0-dev +The location is c:\runtime\helloworld\bin\Debug\net5.0\win-x64\publish\System.Private.CoreLib.dll ``` -**Congratulations! You have just run your first app against local CoreCLR build!** +**Congratulations! You have just run your first app against your local build of this repo** -## Update CoreCLR using runtime nuget package +## Update using runtime nuget package -Updating CoreCLR from raw binary output is easier for quick one-off testing but using the nuget package is better -for referencing your CoreCLR build in your actual application because of it does not require manual copying of files +Updating the runtime from raw binary output is easier for quick one-off testing but using the nuget package is better +for referencing your build in your actual application because of it does not require manual copying of files around each time the application is built and plugs into the rest of the tool chain. This set of instructions will cover the further steps needed to consume the runtime nuget package. diff --git a/eng/Analyzers.props b/eng/Analyzers.props index 2c9b1c8bee44..ff442658c331 100644 --- a/eng/Analyzers.props +++ b/eng/Analyzers.props @@ -6,7 +6,7 @@ - + diff --git a/eng/AvoidRestoreCycleOnSelfReference.targets b/eng/AvoidRestoreCycleOnSelfReference.targets new file mode 100644 index 000000000000..cb665cb070d9 --- /dev/null +++ b/eng/AvoidRestoreCycleOnSelfReference.targets @@ -0,0 +1,14 @@ + + + + <_PackageIdTemp>$(PackageId) + $(PackageId)_temp + + + + + $(_PackageIdTemp) + + + \ No newline at end of file diff --git a/eng/BeforeTargetFrameworkInference.targets b/eng/BeforeTargetFrameworkInference.targets index 92adb5df6353..abef4c8981a2 100644 --- a/eng/BeforeTargetFrameworkInference.targets +++ b/eng/BeforeTargetFrameworkInference.targets @@ -6,10 +6,6 @@ $(TargetFramework.SubString(0, $(TargetFramework.IndexOf('-')))) - - $([MSBuild]::NormalizeDirectory('$(RefRootPath)', '$(TargetFramework)')) - - diff --git a/eng/CodeAnalysis.ruleset b/eng/CodeAnalysis.ruleset index 42ea385a079d..3560577d8f3c 100644 --- a/eng/CodeAnalysis.ruleset +++ b/eng/CodeAnalysis.ruleset @@ -64,6 +64,8 @@ + + @@ -111,7 +113,9 @@ - + + + @@ -174,6 +178,15 @@ + + + + + + + + + @@ -239,6 +252,8 @@ + + diff --git a/eng/Configurations.props b/eng/Configurations.props index aa8e9add17dd..3d123f81972a 100644 --- a/eng/Configurations.props +++ b/eng/Configurations.props @@ -29,9 +29,6 @@ $(NetCoreAppCurrent) Microsoft.NETCore.App .NET $(NetCoreAppCurrentVersion) - - net472 - WINDOWS7.0 diff --git a/eng/DefaultGenApiDocIds.txt b/eng/DefaultGenApiDocIds.txt index 5576d9fad6ae..8b2af83b92d4 100644 --- a/eng/DefaultGenApiDocIds.txt +++ b/eng/DefaultGenApiDocIds.txt @@ -1,11 +1,6 @@ // These attributes should be excluded from reference assemblies. -T:System.ComponentModel.DesignerAttribute -T:System.ComponentModel.Design.Serialization.DesignerSerializerAttribute T:System.ComponentModel.Design.Serialization.RootDesignerSerializerAttribute -T:System.ComponentModel.EditorAttribute -T:System.ComponentModel.ToolboxItemAttribute -T:System.ComponentModel.TypeDescriptionProviderAttribute T:System.Configuration.ConfigurationPropertyAttribute T:System.Diagnostics.CodeAnalysis.DynamicDependencyAttribute T:System.Diagnostics.CodeAnalysis.ExcludeFromCodeCoverageAttribute diff --git a/eng/SignCheckExclusionsFile.txt b/eng/SignCheckExclusionsFile.txt index dc77ca3e4603..b45e2daaceb8 100644 --- a/eng/SignCheckExclusionsFile.txt +++ b/eng/SignCheckExclusionsFile.txt @@ -6,10 +6,10 @@ ;; The apphost and comhost are template files, modified by the SDK to produce the executable for FDE ;; and SCD apps. If they are signed, the file that the SDK produces has an invalid signature and ;; can't be signed again. More info at https://github.com/dotnet/core-setup/pull/7549. -*apphost.exe;;Template, https://github.com/dotnet/core-setup/pull/7549 -*singlefilehost.exe;;Template, https://github.com/dotnet/core-setup/pull/7549 -*comhost.dll;;Template, https://github.com/dotnet/core-setup/pull/7549 -*apphosttemplateapphostexe.exe;;Template, https://github.com/dotnet/core-setup/pull/7549 -*comhosttemplatecomhostdll.dll;;Template, https://github.com/dotnet/core-setup/pull/7549 -*staticapphosttemplateapphostexe.exe;;Template, https://github.com/dotnet/core-setup/pull/7549 +*apphost.exe;;Template, DO-NOT-SIGN, https://github.com/dotnet/core-setup/pull/7549 +*singlefilehost.exe;;Template, DO-NOT-SIGN, https://github.com/dotnet/core-setup/pull/7549 +*comhost.dll;;Template, DO-NOT-SIGN, https://github.com/dotnet/core-setup/pull/7549 +*apphosttemplateapphostexe.exe;;Template, DO-NOT-SIGN, https://github.com/dotnet/core-setup/pull/7549 +*comhosttemplatecomhostdll.dll;;Template, DO-NOT-SIGN, https://github.com/dotnet/core-setup/pull/7549 +*staticapphosttemplateapphostexe.exe;;Template, DO-NOT-SIGN, https://github.com/dotnet/core-setup/pull/7549 *dotnet.js;;Workaround, https://github.com/dotnet/core-eng/issues/9933 diff --git a/eng/Signing.props b/eng/Signing.props index 34821db7fc4e..1ca34c107da6 100644 --- a/eng/Signing.props +++ b/eng/Signing.props @@ -28,6 +28,13 @@ + + + + + + + @@ -108,11 +115,6 @@ - - - - - diff --git a/eng/Subsets.props b/eng/Subsets.props index 958389db150f..f7f61f98f6e9 100644 --- a/eng/Subsets.props +++ b/eng/Subsets.props @@ -57,9 +57,9 @@ clr.runtime+linuxdac+clr.corelib+clr.nativecorelib+clr.tools+clr.packages mono.llvm+ - $(DefaultMonoSubsets)mono.runtime+mono.corelib + $(DefaultMonoSubsets)mono.runtime+mono.corelib+mono.packages - libs.depprojs+libs.native+libs.ref+libs.src+libs.pretest+libs.packages + libs.native+libs.ref+libs.src+libs.pretest+libs.packages corehost+installer.managed+installer.depprojs+installer.pkgprojs+bundles+installers+installer.tests installer.pkgprojs @@ -70,6 +70,7 @@ <_subset>$(_subset.Replace('+mono+', '+$(DefaultMonoSubsets)+')) <_subset>$(_subset.Replace('+libs+', '+$(DefaultLibrariesSubsets)+')) <_subset>$(_subset.Replace('+installer+', '+$(DefaultInstallerSubsets)+')) + <_subset>$(_subset.Replace('+installer.nocorehost+', '+$(DefaultInstallerSubsets.Replace('corehost+', ''))+')) <_subset>+$(_subset.Trim('+'))+ @@ -92,10 +93,10 @@ + - @@ -106,6 +107,7 @@ + @@ -168,6 +170,10 @@ + + + + @@ -177,12 +183,6 @@ - - - Configuration=$(LibrariesConfiguration) - - - @@ -222,15 +222,11 @@ - - - - diff --git a/eng/Version.Details.xml b/eng/Version.Details.xml index 2940b3c13c76..733f23eec608 100644 --- a/eng/Version.Details.xml +++ b/eng/Version.Details.xml @@ -4,155 +4,155 @@ https://github.com/dotnet/standard cfe95a23647c7de1fe1a349343115bd7720d6949 - + https://github.com/dotnet/icu - 797c523dd8d75096319f3591958f703b8d74d04b + 8025faea58fb28c51ddaf3484a7bf506eaa7897b - + https://github.com/dotnet/arcade - ff5d4b6c8dbdaeacb6e6159d3f8185118dffd915 + 56a95cc477558c1ccdf16d7abe962849ea970ba4 - + https://github.com/dotnet/arcade - ff5d4b6c8dbdaeacb6e6159d3f8185118dffd915 + 56a95cc477558c1ccdf16d7abe962849ea970ba4 - + https://github.com/dotnet/arcade - ff5d4b6c8dbdaeacb6e6159d3f8185118dffd915 + 56a95cc477558c1ccdf16d7abe962849ea970ba4 - + https://github.com/dotnet/arcade - ff5d4b6c8dbdaeacb6e6159d3f8185118dffd915 + 56a95cc477558c1ccdf16d7abe962849ea970ba4 - + https://github.com/dotnet/arcade - ff5d4b6c8dbdaeacb6e6159d3f8185118dffd915 + 56a95cc477558c1ccdf16d7abe962849ea970ba4 - + https://github.com/dotnet/arcade - ff5d4b6c8dbdaeacb6e6159d3f8185118dffd915 + 56a95cc477558c1ccdf16d7abe962849ea970ba4 - + https://github.com/dotnet/arcade - ff5d4b6c8dbdaeacb6e6159d3f8185118dffd915 + 56a95cc477558c1ccdf16d7abe962849ea970ba4 - + https://github.com/dotnet/arcade - ff5d4b6c8dbdaeacb6e6159d3f8185118dffd915 + 56a95cc477558c1ccdf16d7abe962849ea970ba4 - + https://github.com/dotnet/arcade - ff5d4b6c8dbdaeacb6e6159d3f8185118dffd915 + 56a95cc477558c1ccdf16d7abe962849ea970ba4 - + https://github.com/dotnet/arcade - ff5d4b6c8dbdaeacb6e6159d3f8185118dffd915 + 56a95cc477558c1ccdf16d7abe962849ea970ba4 - + https://github.com/dotnet/arcade - ff5d4b6c8dbdaeacb6e6159d3f8185118dffd915 + 56a95cc477558c1ccdf16d7abe962849ea970ba4 - + https://github.com/dotnet/arcade - ff5d4b6c8dbdaeacb6e6159d3f8185118dffd915 + 56a95cc477558c1ccdf16d7abe962849ea970ba4 - + https://github.com/dotnet/arcade - ff5d4b6c8dbdaeacb6e6159d3f8185118dffd915 + 56a95cc477558c1ccdf16d7abe962849ea970ba4 - + https://github.com/dotnet/arcade - ff5d4b6c8dbdaeacb6e6159d3f8185118dffd915 + 56a95cc477558c1ccdf16d7abe962849ea970ba4 - + https://dev.azure.com/dnceng/internal/_git/dotnet-optimization - d0bb63d2ec7060714e63ee4082fac48f2e57f3e2 + f69d7fc09c4fdb9e9427741b9a176e867dab577f - + https://dev.azure.com/dnceng/internal/_git/dotnet-optimization - d0bb63d2ec7060714e63ee4082fac48f2e57f3e2 + f69d7fc09c4fdb9e9427741b9a176e867dab577f - + https://dev.azure.com/dnceng/internal/_git/dotnet-optimization - d0bb63d2ec7060714e63ee4082fac48f2e57f3e2 + f69d7fc09c4fdb9e9427741b9a176e867dab577f - + https://dev.azure.com/dnceng/internal/_git/dotnet-optimization - d0bb63d2ec7060714e63ee4082fac48f2e57f3e2 + f69d7fc09c4fdb9e9427741b9a176e867dab577f - + https://dev.azure.com/dnceng/internal/_git/dotnet-optimization - d0bb63d2ec7060714e63ee4082fac48f2e57f3e2 + f69d7fc09c4fdb9e9427741b9a176e867dab577f - + https://github.com/microsoft/vstest - 069d8bd6357e2dbc260a35016ddbefe5dfec4102 + f5e870bd4f3402c9e6e71679f8d0b716653143f5 - + https://github.com/dotnet/runtime-assets - 5a041ae14a25fe6e5db62666778a7adb59d5a056 + 3697381858b21a189224431a11850e2a4733a131 - + https://github.com/dotnet/runtime-assets - 5a041ae14a25fe6e5db62666778a7adb59d5a056 + 3697381858b21a189224431a11850e2a4733a131 - + https://github.com/dotnet/runtime-assets - 5a041ae14a25fe6e5db62666778a7adb59d5a056 + 3697381858b21a189224431a11850e2a4733a131 - + https://github.com/dotnet/runtime-assets - 5a041ae14a25fe6e5db62666778a7adb59d5a056 + 3697381858b21a189224431a11850e2a4733a131 - + https://github.com/dotnet/runtime-assets - 5a041ae14a25fe6e5db62666778a7adb59d5a056 + 3697381858b21a189224431a11850e2a4733a131 - + https://github.com/dotnet/runtime-assets - 5a041ae14a25fe6e5db62666778a7adb59d5a056 + 3697381858b21a189224431a11850e2a4733a131 - + https://github.com/dotnet/runtime-assets - 5a041ae14a25fe6e5db62666778a7adb59d5a056 + 3697381858b21a189224431a11850e2a4733a131 - + https://github.com/dotnet/runtime-assets - 5a041ae14a25fe6e5db62666778a7adb59d5a056 + 3697381858b21a189224431a11850e2a4733a131 - + https://github.com/dotnet/llvm-project - 4e6a09468cb4e4e1be38ac25fcf866ca8136638b + 365591200ce46a48a3a00ef0ca0d0b33ea6bd4b4 - + https://github.com/dotnet/llvm-project - 4e6a09468cb4e4e1be38ac25fcf866ca8136638b + 365591200ce46a48a3a00ef0ca0d0b33ea6bd4b4 - + https://github.com/dotnet/llvm-project - 4e6a09468cb4e4e1be38ac25fcf866ca8136638b + 365591200ce46a48a3a00ef0ca0d0b33ea6bd4b4 - + https://github.com/dotnet/llvm-project - 4e6a09468cb4e4e1be38ac25fcf866ca8136638b + 365591200ce46a48a3a00ef0ca0d0b33ea6bd4b4 - + https://github.com/dotnet/llvm-project - 4e6a09468cb4e4e1be38ac25fcf866ca8136638b + 365591200ce46a48a3a00ef0ca0d0b33ea6bd4b4 - + https://github.com/dotnet/llvm-project - 4e6a09468cb4e4e1be38ac25fcf866ca8136638b + 365591200ce46a48a3a00ef0ca0d0b33ea6bd4b4 - + https://github.com/dotnet/llvm-project - 4e6a09468cb4e4e1be38ac25fcf866ca8136638b + 365591200ce46a48a3a00ef0ca0d0b33ea6bd4b4 - + https://github.com/dotnet/llvm-project - 4e6a09468cb4e4e1be38ac25fcf866ca8136638b + 365591200ce46a48a3a00ef0ca0d0b33ea6bd4b4 https://github.com/dotnet/runtime @@ -182,17 +182,17 @@ https://github.com/dotnet/runtime 0375524a91a47ca4db3ee1be548f74bab7e26e76 - + https://github.com/mono/linker - f7c8a2a9e5aa47718169140db23c42f3439e6660 + cefde0af8d4714c59b32eec1075f3e68a9666419 - + https://github.com/dotnet/xharness - 5c95b40b725e1aa9d596411c453900385cf6f84c + 1ff8172e92ae0bfb1d998993f34e8685568807c1 - + https://github.com/dotnet/xharness - 5c95b40b725e1aa9d596411c453900385cf6f84c + 1ff8172e92ae0bfb1d998993f34e8685568807c1 diff --git a/eng/Versions.props b/eng/Versions.props index 0c899b297886..57f439d5fc99 100644 --- a/eng/Versions.props +++ b/eng/Versions.props @@ -1,16 +1,18 @@ - 5.0.0 + 6.0.0 - 5 + 6 0 0 Utf8String 1 - $(MajorVersion).$(MinorVersion).0.0 + + 5.0.0.0 false release @@ -18,6 +20,7 @@ true true false + true dotnet $(ContainerName) @@ -45,22 +48,21 @@ - - - + + 3.8.0-2.20403.2 - 5.0.0-beta.20364.3 - 5.0.0-beta.20364.3 - 5.0.0-beta.20364.3 - 5.0.0-beta.20364.3 - 5.0.0-beta.20364.3 - 5.0.0-beta.20364.3 - 2.5.1-beta.20364.3 - 5.0.0-beta.20364.3 - 5.0.0-beta.20364.3 - 5.0.0-beta.20364.3 + 5.0.0-beta.20419.21 + 5.0.0-beta.20419.21 + 5.0.0-beta.20419.21 + 5.0.0-beta.20419.21 + 5.0.0-beta.20419.21 + 5.0.0-beta.20419.21 + 2.5.1-beta.20419.21 + 5.0.0-beta.20419.21 + 5.0.0-beta.20419.21 + 5.0.0-beta.20419.21 5.0.0-preview.4.20202.18 5.0.0-preview.4.20202.18 @@ -69,25 +71,55 @@ 5.0.0-preview.8.20359.4 + 4.5.1 + 4.3.0 + 4.3.0 + 4.7.0 + 4.8.1 + 4.3.0 + 4.3.0 + 4.3.0 + 4.3.0 + 4.3.0 + 4.5.4 + 4.3.4 + 4.3.1 + 4.5.0 + 4.3.0 + 4.3.1 + 4.3.1 + 4.3.0 + 4.3.0 + 4.3.0 + 4.3.1 + 4.7.0 + 4.7.0 + 4.7.0 5.0.0-preview.4.20202.18 + 4.3.0 + 4.5.4 + 4.5.0 + 1.1.1 + 4.3.0 5.0.0-alpha.1.19563.3 - 5.0.0-beta.20364.1 - 5.0.0-beta.20364.1 - 5.0.0-beta.20364.1 - 5.0.0-beta.20364.1 - 5.0.0-beta.20364.1 - 5.0.0-beta.20364.1 - 5.0.0-beta.20364.1 - 5.0.0-beta.20364.1 + 5.0.0-beta.20420.1 + 5.0.0-beta.20420.1 + 5.0.0-beta.20420.1 + 5.0.0-beta.20420.1 + 5.0.0-beta.20420.1 + 5.0.0-beta.20420.1 + 5.0.0-beta.20420.1 + 5.0.0-beta.20420.1 2.2.0-prerelease.19564.1 + 2.0.3 - 99.99.99-master-20200228.3 - 99.99.99-master-20200228.3 - 99.99.99-master-20200228.3 - 99.99.99-master-20200228.3 - 99.99.99-master-20200228.3 + 99.99.99-master-20200806.6 + 99.99.99-master-20200806.6 + 99.99.99-master-20200806.6 + 99.99.99-master-20200806.6 + 99.99.99-master-20200806.6 1.7.0 2.0.0-beta1.20253.1 @@ -101,33 +133,32 @@ $(RefOnlyMicrosoftBuildVersion) 4.9.4 4.9.4 - - 4.8.0 - 16.8.0-preview-20200716-03 - 1.0.0-prerelease.20352.3 - 1.0.0-prerelease.20352.3 + 16.8.0-release-20200821-04 + 1.0.0-prerelease.20424.1 + 1.0.0-prerelease.20424.1 2.4.1 2.4.2 1.3.0 2.0.5 12.0.3 4.12.0 + 2.14.3 3.0.0-preview-20200715.1 - 5.0.0-preview.3.20366.2 + 6.0.0-alpha.1.20424.3 - 5.0.0-preview.8.20370.1 + 5.0.0-preview.8.20421.1 - 9.0.1-alpha.1.20365.1 - 9.0.1-alpha.1.20365.1 - 9.0.1-alpha.1.20365.1 - 9.0.1-alpha.1.20365.1 - 9.0.1-alpha.1.20365.1 - 9.0.1-alpha.1.20365.1 - 9.0.1-alpha.1.20365.1 - 9.0.1-alpha.1.20365.1 + 9.0.1-alpha.1.20418.1 + 9.0.1-alpha.1.20418.1 + 9.0.1-alpha.1.20418.1 + 9.0.1-alpha.1.20418.1 + 9.0.1-alpha.1.20418.1 + 9.0.1-alpha.1.20418.1 + 9.0.1-alpha.1.20418.1 + 9.0.1-alpha.1.20418.1 @@ -146,7 +177,7 @@ Microsoft.NETCore.Runtime.ICU.Transport $([MSBuild]::NormalizeDirectory('$(NuGetPackageRoot)', '$(MicrosoftPrivateIntellisensePackage)', '$(MicrosoftPrivateIntellisenseVersion)', 'IntellisenseFiles', 'net')) diff --git a/eng/actions/backport/action.yml b/eng/actions/backport/action.yml new file mode 100644 index 000000000000..c136dd5ff6d4 --- /dev/null +++ b/eng/actions/backport/action.yml @@ -0,0 +1,18 @@ +name: 'PR Backporter' +description: 'Backports a pull request to a branch using the "/backport to " comment' +inputs: + auth_token: + description: 'The token used to authenticate to GitHub.' + pr_title_template: + description: 'The template used for the PR title. Special placeholder tokens that will be replaced with a value: %target_branch%, %source_pr_title%, %source_pr_number%, %cc_users%.' + default: '[%target_branch%] %source_pr_title%' + pr_description_template: + description: 'The template used for the PR description. Special placeholder tokens that will be replaced with a value: %target_branch%, %source_pr_title%, %source_pr_number%, %cc_users%.' + default: | + Backport of #%source_pr_number% to %target_branch% + + /cc %cc_users% + +runs: + using: 'node12' + main: 'index.js' diff --git a/eng/actions/backport/index.js b/eng/actions/backport/index.js new file mode 100644 index 000000000000..076196100f37 --- /dev/null +++ b/eng/actions/backport/index.js @@ -0,0 +1,174 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +function BackportException(message, postToGitHub = true) { + this.message = message; + this.postToGitHub = postToGitHub; +} + +async function run() { + const util = require("util"); + const jsExec = util.promisify(require("child_process").exec); + + console.log("Installing npm dependencies"); + const { stdout, stderr } = await jsExec("npm install @actions/core @actions/github @actions/exec"); + console.log("npm-install stderr:\n\n" + stderr); + console.log("npm-install stdout:\n\n" + stdout); + console.log("Finished installing npm dependencies"); + + const core = require("@actions/core"); + const github = require("@actions/github"); + const exec = require("@actions/exec"); + + if (github.context.eventName !== "issue_comment") throw "Error: This action only works on issue_comment events."; + + const run_id = process.env.GITHUB_RUN_ID; + const repo_owner = github.context.payload.repository.owner.login; + const repo_name = github.context.payload.repository.name; + const pr_number = github.context.payload.issue.number; + const pr_source_ref = process.env.GITHUB_REF; + const comment_user = github.context.payload.comment.user.login; + + let octokit = github.getOctokit(core.getInput("auth_token")); + let target_branch = ""; + + try { + // verify the comment user is a repo collaborator + try { + await octokit.repos.checkCollaborator({ + owner: repo_owner, + repo: repo_name, + username: comment_user + }); + console.log(`Verified ${comment_user} is a repo collaborator.`); + } catch { + throw new BackportException(`Error: @${comment_user} is not a repo collaborator, backporting is not allowed.`); + } + + // extract the target branch name from the trigger phrase containing these characters: a-z, A-Z, digits, forward slash, dot, hyphen, underscore + console.log(`Extracting target branch`); + const regex = /\/backport to ([a-zA-Z\d\/\.\-\_]+)/; + target_branch = regex.exec(github.context.payload.comment.body)[1]; + if (target_branch == null) throw new BackportException("Error: No backport branch found in the trigger phrase."); + try { await exec.exec(`git ls-remote --exit-code --heads origin ${target_branch}`) } catch { throw new BackportException(`Error: The specified backport target branch ${target_branch} wasn't found in the repo.`); } + console.log(`Backport target branch: ${target_branch}`); + + // Post backport started comment to pull request + const backport_start_body = `Started backporting to ${target_branch}: https://github.com/${repo_owner}/${repo_name}/actions/runs/${run_id}`; + await octokit.issues.createComment({ + owner: repo_owner, + repo: repo_name, + issue_number: pr_number, + body: backport_start_body + }); + + console.log("Applying backport patch"); + + await exec.exec(`git -c protocol.version=2 fetch --no-tags --progress --no-recurse-submodules origin ${target_branch} ${pr_source_ref}`); + await exec.exec(`git checkout ${target_branch}`); + await exec.exec(`git clean -xdff`); + + // configure git + await exec.exec(`git config user.name "github-actions"`); + await exec.exec(`git config user.email "github-actions@github.com"`); + + // create temporary backport branch + const temp_branch = `backport/pr-${pr_number}-to-${target_branch}`; + await exec.exec(`git checkout -b ${temp_branch}`); + + // skip opening PR if the branch already exists on the origin remote since that means it was opened + // by an earlier backport and force pushing to the branch updates the existing PR + let should_open_pull_request = true; + try { + await exec.exec(`git ls-remote --exit-code --heads origin ${temp_branch}`); + should_open_pull_request = false; + } catch { } + + // download and apply patch + await exec.exec(`curl -sSL "${github.context.payload.issue.pull_request.patch_url}" --output changes.patch`); + + const git_am_command = "git am --3way --ignore-whitespace --keep-non-patch changes.patch"; + let git_am_output = `$ ${git_am_command}\n\n`; + let git_am_failed = false; + try { + await exec.exec(git_am_command, [], { + listeners: { + stdout: function stdout(data) { git_am_output += data; }, + stderr: function stderr(data) { git_am_output += data; } + } + }); + } catch (error) { + git_am_output += error; + git_am_failed = true; + } + + if (git_am_failed) { + const git_am_failed_body = `@${github.context.payload.comment.user.login} backporting to ${target_branch} failed, the patch most likely resulted in conflicts:\n\n\`\`\`shell\n${git_am_output}\n\`\`\`\n\nPlease backport manually!`; + await octokit.issues.createComment({ + owner: repo_owner, + repo: repo_name, + issue_number: pr_number, + body: git_am_failed_body + }); + throw new BackportException("Error: git am failed, most likely due to a merge conflict.", false); + } + else { + // push the temp branch to the repository + await exec.exec(`git push --force --set-upstream origin HEAD:${temp_branch}`); + } + + if (!should_open_pull_request) { + console.log("Backport temp branch already exists, skipping opening a PR."); + return; + } + + // prepate the GitHub PR details + let backport_pr_title = core.getInput("pr_title_template"); + let backport_pr_description = core.getInput("pr_description_template"); + + // get users to cc (append PR author if different from user who issued the backport command) + let cc_users = `@${comment_user}`; + if (comment_user != github.context.payload.issue.user.login) cc_users += ` @${github.context.payload.issue.user.login}`; + + // replace the special placeholder tokens with values + backport_pr_title = backport_pr_title + .replace(/%target_branch%/g, target_branch) + .replace(/%source_pr_title%/g, github.context.payload.issue.title) + .replace(/%source_pr_number%/g, github.context.payload.issue.number) + .replace(/%cc_users%/g, cc_users); + + backport_pr_description = backport_pr_description + .replace(/%target_branch%/g, target_branch) + .replace(/%source_pr_title%/g, github.context.payload.issue.title) + .replace(/%source_pr_number%/g, github.context.payload.issue.number) + .replace(/%cc_users%/g, cc_users); + + // open the GitHub PR + await octokit.pulls.create({ + owner: repo_owner, + repo: repo_name, + title: backport_pr_title, + body: backport_pr_description, + head: temp_branch, + base: target_branch + }); + + console.log("Successfully opened the GitHub PR."); + } catch (error) { + + core.setFailed(error); + + if (error.postToGitHub === undefined || error.postToGitHub == true) { + // post failure to GitHub comment + const unknown_error_body = `@${comment_user} an error occurred while backporting to ${target_branch}, please check the run log for details!\n\n${error.message}`; + await octokit.issues.createComment({ + owner: repo_owner, + repo: repo_name, + issue_number: pr_number, + body: unknown_error_body + }); + } + } +} + +run(); diff --git a/eng/build.ps1 b/eng/build.ps1 index f8b205bd3930..016ed15d2035 100644 --- a/eng/build.ps1 +++ b/eng/build.ps1 @@ -65,7 +65,7 @@ function Get-Help() { Write-Host "Libraries settings:" Write-Host " -allconfigurations Build packages for all build configurations." Write-Host " -coverage Collect code coverage when testing." - Write-Host " -framework (-f) Build framework: net5.0 or net472." + Write-Host " -framework (-f) Build framework: net5.0 or net48." Write-Host " [Default: net5.0]" Write-Host " -testnobuild Skip building tests when invoking -test." Write-Host " -testscope Scope tests, allowed values: innerloop, outerloop, all." diff --git a/eng/build.sh b/eng/build.sh index eb0cb586ebf3..afa16a69447a 100755 --- a/eng/build.sh +++ b/eng/build.sh @@ -60,7 +60,7 @@ usage() echo "Libraries settings:" echo " --allconfigurations Build packages for all build configurations." echo " --coverage Collect code coverage when testing." - echo " --framework (-f) Build framework: net5.0 or net472." + echo " --framework (-f) Build framework: net5.0 or net48." echo " [Default: net5.0]" echo " --testnobuild Skip building tests when invoking -test." echo " --testscope Test scope, allowed values: innerloop, outerloop, all." @@ -97,6 +97,9 @@ usage() echo "* Build CoreCLR for Linux x64 on Debug configuration using GCC 8.4." echo "./build.sh clr -gcc8.4" echo "" + echo "* Build CoreCLR for Linux x64 using extra compiler flags (-fstack-clash-protection)." + echo "EXTRA_CFLAGS=-fstack-clash-protection EXTRA_CXXFLAGS=-fstack-clash-protection ./build.sh clr" + echo "" echo "* Cross-compile CoreCLR runtime for Linux ARM64 on Release configuration." echo "./build.sh clr.runtime -arch arm64 -c release -cross" echo "" diff --git a/eng/codeOptimization.targets b/eng/codeOptimization.targets index 6ed7e997487b..73e19c956055 100644 --- a/eng/codeOptimization.targets +++ b/eng/codeOptimization.targets @@ -4,12 +4,7 @@ true false false - - false + false ... section. @@ -74,7 +76,7 @@ if [ "$?" != "0" ]; then PackageSourcesNodeFooter="" PackageSourceCredentialsTemplate="${TB}${NL}${TB}" - sed -i.bak "s|$PackageSourcesNodeFooter|$PackageSourcesNodeFooter${NL}$PackageSourceCredentialsTemplate|" NuGet.config + sed -i.bak "s|$PackageSourcesNodeFooter|$PackageSourcesNodeFooter${NL}$PackageSourceCredentialsTemplate|" $ConfigFile fi PackageSources=() @@ -146,3 +148,20 @@ for FeedName in ${PackageSources[@]} ; do sed -i.bak "s|$PackageSourceCredentialsNodeFooter|$NewCredential${NL}$PackageSourceCredentialsNodeFooter|" $ConfigFile fi done + +# Re-enable any entries in disabledPackageSources where the feed name contains darc-int +grep -i "" $ConfigFile +if [ "$?" == "0" ]; then + DisabledDarcIntSources=() + echo "Re-enabling any disabled \"darc-int\" package sources in $ConfigFile" + DisabledDarcIntSources+=$(grep -oh '"darc-int-[^"]*" value="true"' $ConfigFile | tr -d '"') + for DisabledSourceName in ${DisabledDarcIntSources[@]} ; do + if [[ $DisabledSourceName == darc-int* ]] + then + OldDisableValue="add key=\"$DisabledSourceName\" value=\"true\"" + NewDisableValue="add key=\"$DisabledSourceName\" value=\"false\"" + sed -i.bak "s|$OldDisableValue|$NewDisableValue|" $ConfigFile + echo "Neutralized disablePackageSources entry for '$DisabledSourceName'" + fi + done +fi diff --git a/eng/common/cross/arm64/tizen-fetch.sh b/eng/common/cross/arm64/tizen-fetch.sh index 338d1c3bf3c7..a48a6f51c49d 100644 --- a/eng/common/cross/arm64/tizen-fetch.sh +++ b/eng/common/cross/arm64/tizen-fetch.sh @@ -161,7 +161,7 @@ fetch_tizen_pkgs aarch64 gcc glibc glibc-devel libicu libicu-devel libatomic lin Inform "fetch coreclr packages" fetch_tizen_pkgs aarch64 lldb lldb-devel libgcc libstdc++ libstdc++-devel libunwind libunwind-devel lttng-ust-devel lttng-ust userspace-rcu-devel userspace-rcu Inform "fetch corefx packages" -fetch_tizen_pkgs aarch64 libcom_err libcom_err-devel zlib zlib-devel libopenssl libopenssl1.1-devel krb5 krb5-devel +fetch_tizen_pkgs aarch64 libcom_err libcom_err-devel zlib zlib-devel libopenssl11 libopenssl1.1-devel krb5 krb5-devel Inform "Initialize standard unified" fetch_tizen_pkgs_init standard unified diff --git a/eng/common/cross/armel/tizen-fetch.sh b/eng/common/cross/armel/tizen-fetch.sh index ed70e0a86ebd..2776cbba4e46 100755 --- a/eng/common/cross/armel/tizen-fetch.sh +++ b/eng/common/cross/armel/tizen-fetch.sh @@ -51,7 +51,7 @@ if [ ! -d $TMPDIR ]; then mkdir -p $TMPDIR fi -TIZEN_URL=http://download.tizen.org/releases/milestone/tizen +TIZEN_URL=http://download.tizen.org/snapshots/tizen BUILD_XML=build.xml REPOMD_XML=repomd.xml PRIMARY_XML=primary.xml @@ -157,12 +157,11 @@ fetch_tizen_pkgs() Inform "Initialize arm base" fetch_tizen_pkgs_init standard base Inform "fetch common packages" -fetch_tizen_pkgs armv7l gcc glibc glibc-devel libicu libicu-devel libatomic -fetch_tizen_pkgs noarch linux-glibc-devel +fetch_tizen_pkgs armv7l gcc glibc glibc-devel libicu libicu-devel libatomic linux-glibc-devel Inform "fetch coreclr packages" fetch_tizen_pkgs armv7l lldb lldb-devel libgcc libstdc++ libstdc++-devel libunwind libunwind-devel lttng-ust-devel lttng-ust userspace-rcu-devel userspace-rcu Inform "fetch corefx packages" -fetch_tizen_pkgs armv7l libcom_err libcom_err-devel zlib zlib-devel libopenssl libopenssl-devel krb5 krb5-devel libcurl libcurl-devel +fetch_tizen_pkgs armv7l libcom_err libcom_err-devel zlib zlib-devel libopenssl11 libopenssl1.1-devel krb5 krb5-devel Inform "Initialize standard unified" fetch_tizen_pkgs_init standard unified diff --git a/eng/common/cross/toolchain.cmake b/eng/common/cross/toolchain.cmake index 88a758afb19c..137736c0a272 100644 --- a/eng/common/cross/toolchain.cmake +++ b/eng/common/cross/toolchain.cmake @@ -15,11 +15,13 @@ if(TARGET_ARCH_NAME STREQUAL "armel") set(CMAKE_SYSTEM_PROCESSOR armv7l) set(TOOLCHAIN "arm-linux-gnueabi") if("$ENV{__DistroRid}" MATCHES "tizen.*") - set(TIZEN_TOOLCHAIN "armv7l-tizen-linux-gnueabi/6.2.1") + set(TIZEN_TOOLCHAIN "armv7l-tizen-linux-gnueabi/9.2.0") endif() elseif(TARGET_ARCH_NAME STREQUAL "arm") set(CMAKE_SYSTEM_PROCESSOR armv7l) - if(EXISTS ${CROSS_ROOTFS}/usr/lib/gcc/armv6-alpine-linux-musleabihf) + if(EXISTS ${CROSS_ROOTFS}/usr/lib/gcc/armv7-alpine-linux-musleabihf) + set(TOOLCHAIN "armv7-alpine-linux-musleabihf") + elseif(EXISTS ${CROSS_ROOTFS}/usr/lib/gcc/armv6-alpine-linux-musleabihf) set(TOOLCHAIN "armv6-alpine-linux-musleabihf") else() set(TOOLCHAIN "arm-linux-gnueabihf") @@ -127,29 +129,40 @@ endif() # Specify link flags +function(add_toolchain_linker_flag Flag) + set(Config "${ARGV1}") + set(CONFIG_SUFFIX "") + if (NOT Config STREQUAL "") + set(CONFIG_SUFFIX "_${Config}") + endif() + set("CMAKE_EXE_LINKER_FLAGS${CONFIG_SUFFIX}" "${CMAKE_EXE_LINKER_FLAGS${CONFIG_SUFFIX}} ${Flag}" PARENT_SCOPE) + set("CMAKE_SHARED_LINKER_FLAGS${CONFIG_SUFFIX}" "${CMAKE_SHARED_LINKER_FLAGS${CONFIG_SUFFIX}} ${Flag}" PARENT_SCOPE) +endfunction() + + if(TARGET_ARCH_NAME STREQUAL "armel") if(DEFINED TIZEN_TOOLCHAIN) # For Tizen only - add_link_options("-B${CROSS_ROOTFS}/usr/lib/gcc/${TIZEN_TOOLCHAIN}") - add_link_options("-L${CROSS_ROOTFS}/lib") - add_link_options("-L${CROSS_ROOTFS}/usr/lib") - add_link_options("-L${CROSS_ROOTFS}/usr/lib/gcc/${TIZEN_TOOLCHAIN}") + add_toolchain_linker_flag("-B${CROSS_ROOTFS}/usr/lib/gcc/${TIZEN_TOOLCHAIN}") + add_toolchain_linker_flag("-L${CROSS_ROOTFS}/lib") + add_toolchain_linker_flag("-L${CROSS_ROOTFS}/usr/lib") + add_toolchain_linker_flag("-L${CROSS_ROOTFS}/usr/lib/gcc/${TIZEN_TOOLCHAIN}") endif() elseif(TARGET_ARCH_NAME STREQUAL "arm64") if(DEFINED TIZEN_TOOLCHAIN) # For Tizen only - add_link_options("-B${CROSS_ROOTFS}/usr/lib64/gcc/${TIZEN_TOOLCHAIN}") - add_link_options("-L${CROSS_ROOTFS}/lib64") - add_link_options("-L${CROSS_ROOTFS}/usr/lib64") - add_link_options("-L${CROSS_ROOTFS}/usr/lib64/gcc/${TIZEN_TOOLCHAIN}") - - add_link_options("-Wl,--rpath-link=${CROSS_ROOTFS}/lib64") - add_link_options("-Wl,--rpath-link=${CROSS_ROOTFS}/usr/lib64") - add_link_options("-Wl,--rpath-link=${CROSS_ROOTFS}/usr/lib64/gcc/${TIZEN_TOOLCHAIN}") + add_toolchain_linker_flag("-B${CROSS_ROOTFS}/usr/lib64/gcc/${TIZEN_TOOLCHAIN}") + add_toolchain_linker_flag("-L${CROSS_ROOTFS}/lib64") + add_toolchain_linker_flag("-L${CROSS_ROOTFS}/usr/lib64") + add_toolchain_linker_flag("-L${CROSS_ROOTFS}/usr/lib64/gcc/${TIZEN_TOOLCHAIN}") + + add_toolchain_linker_flag("-Wl,--rpath-link=${CROSS_ROOTFS}/lib64") + add_toolchain_linker_flag("-Wl,--rpath-link=${CROSS_ROOTFS}/usr/lib64") + add_toolchain_linker_flag("-Wl,--rpath-link=${CROSS_ROOTFS}/usr/lib64/gcc/${TIZEN_TOOLCHAIN}") endif() elseif(TARGET_ARCH_NAME STREQUAL "x86") - add_link_options(-m32) + add_toolchain_linker_flag(-m32) elseif(ILLUMOS) - add_link_options("-L${CROSS_ROOTFS}/lib/amd64") - add_link_options("-L${CROSS_ROOTFS}/usr/amd64/lib") + add_toolchain_linker_flag("-L${CROSS_ROOTFS}/lib/amd64") + add_toolchain_linker_flag("-L${CROSS_ROOTFS}/usr/amd64/lib") endif() # Specify compile options diff --git a/eng/common/performance/blazor_perf.proj b/eng/common/performance/blazor_perf.proj new file mode 100644 index 000000000000..3b25359c4380 --- /dev/null +++ b/eng/common/performance/blazor_perf.proj @@ -0,0 +1,30 @@ + + + python3 + $(HelixPreCommands);chmod +x $HELIX_WORKITEM_PAYLOAD/SOD/SizeOnDisk + + + + + %(Identity) + + + + + %HELIX_CORRELATION_PAYLOAD%\performance\src\scenarios\ + $(ScenarioDirectory)blazor\ + + + $HELIX_CORRELATION_PAYLOAD/performance/src/scenarios/ + $(ScenarioDirectory)blazor/ + + + + + $(WorkItemDirectory) + cd $(BlazorDirectory);$(Python) pre.py publish --msbuild %27/p:_TrimmerDumpDependencies=true%27 --msbuild-static AdditionalMonoLinkerOptions=%27"%24(AdditionalMonoLinkerOptions) --dump-dependencies"%27 --binlog %27./traces/blazor_publish.binlog%27 + $(Python) test.py sod --scenario-name "%(Identity)" + $(Python) post.py + + + \ No newline at end of file diff --git a/eng/common/performance/crossgen_perf.proj b/eng/common/performance/crossgen_perf.proj new file mode 100644 index 000000000000..4264920382e1 --- /dev/null +++ b/eng/common/performance/crossgen_perf.proj @@ -0,0 +1,69 @@ + + + + + %(Identity) + + + + + + py -3 + $(HelixPreCommands) + %HELIX_CORRELATION_PAYLOAD%\Core_Root + %HELIX_CORRELATION_PAYLOAD%\performance\src\scenarios\ + $(ScenarioDirectory)crossgen\ + $(ScenarioDirectory)crossgen2\ + + + python3 + $(HelixPreCommands);chmod +x $HELIX_WORKITEM_PAYLOAD/startup/Startup;chmod +x $HELIX_WORKITEM_PAYLOAD/startup/perfcollect;sudo apt update + $HELIX_CORRELATION_PAYLOAD/Core_Root + $HELIX_CORRELATION_PAYLOAD/performance/src/scenarios/ + $(ScenarioDirectory)crossgen/ + $(ScenarioDirectory)crossgen2/ + + + + + + + + + + + + + + + + $(WorkItemDirectory) + $(Python) $(CrossgenDirectory)test.py crossgen --core-root $(CoreRoot) --test-name %(Identity) + + + + + + $(WorkItemDirectory) + $(Python) $(Crossgen2Directory)test.py crossgen2 --core-root $(CoreRoot) --single %(Identity) + + + + + + + 4:00 + + + + 4:00 + + + $(WorkItemDirectory) + $(Python) $(Crossgen2Directory)test.py crossgen2 --core-root $(CoreRoot) --composite $(Crossgen2Directory)framework-r2r.dll.rsp + 1:00 + + + \ No newline at end of file diff --git a/eng/common/performance/perfhelixpublish.proj b/eng/common/performance/microbenchmarks.proj similarity index 66% rename from eng/common/performance/perfhelixpublish.proj rename to eng/common/performance/microbenchmarks.proj index 272366da95fc..94b6efbc9297 100644 --- a/eng/common/performance/perfhelixpublish.proj +++ b/eng/common/performance/microbenchmarks.proj @@ -41,11 +41,15 @@ $HELIX_WORKITEM_ROOT/testResults.xml + + $(CliArguments) --wasm + + - --corerun %HELIX_CORRELATION_PAYLOAD%\dotnet-mono\shared\Microsoft.NETCore.App\5.0.0\corerun.exe + --corerun %HELIX_CORRELATION_PAYLOAD%\dotnet-mono\shared\Microsoft.NETCore.App\6.0.0\corerun.exe - --corerun $(BaseDirectory)/dotnet-mono/shared/Microsoft.NETCore.App/5.0.0/corerun + --corerun $(BaseDirectory)/dotnet-mono/shared/Microsoft.NETCore.App/6.0.0/corerun @@ -137,56 +141,4 @@ 4:00 - - - - $(WorkItemDirectory)\ScenarioCorrelation - $(Python) %HELIX_CORRELATION_PAYLOAD%\performance\src\scenarios\crossgen\test.py crossgen --test-name System.Private.Xml.dll --core-root %HELIX_CORRELATION_PAYLOAD%\Core_Root - - - $(WorkItemDirectory)\ScenarioCorrelation - $(Python) %HELIX_CORRELATION_PAYLOAD%\performance\src\scenarios\crossgen\test.py crossgen --test-name System.Linq.Expressions.dll --core-root %HELIX_CORRELATION_PAYLOAD%\Core_Root - - - $(WorkItemDirectory)\ScenarioCorrelation - $(Python) %HELIX_CORRELATION_PAYLOAD%\performance\src\scenarios\crossgen\test.py crossgen --test-name Microsoft.CodeAnalysis.VisualBasic.dll --core-root %HELIX_CORRELATION_PAYLOAD%\Core_Root - - - $(WorkItemDirectory)\ScenarioCorrelation - $(Python) %HELIX_CORRELATION_PAYLOAD%\performance\src\scenarios\crossgen\test.py crossgen --test-name Microsoft.CodeAnalysis.CSharp.dll --core-root %HELIX_CORRELATION_PAYLOAD%\Core_Root - - - $(WorkItemDirectory)\ScenarioCorrelation - $(Python) %HELIX_CORRELATION_PAYLOAD%\performance\src\scenarios\crossgen\test.py crossgen --test-name System.Private.CoreLib.dll --core-root %HELIX_CORRELATION_PAYLOAD%\Core_Root - - - - - - $(WorkItemDirectory)\ScenarioCorrelation - $(Python) %HELIX_CORRELATION_PAYLOAD%\performance\src\scenarios\crossgen2\test.py crossgen2 --single System.Private.Xml.dll --core-root %HELIX_CORRELATION_PAYLOAD%\Core_Root - - - $(WorkItemDirectory)\ScenarioCorrelation - $(Python) %HELIX_CORRELATION_PAYLOAD%\performance\src\scenarios\crossgen2\test.py crossgen2 --single System.Linq.Expressions.dll --core-root %HELIX_CORRELATION_PAYLOAD%\Core_Root - - - $(WorkItemDirectory)\ScenarioCorrelation - $(Python) %HELIX_CORRELATION_PAYLOAD%\performance\src\scenarios\crossgen2\test.py crossgen2 --single Microsoft.CodeAnalysis.VisualBasic.dll --core-root %HELIX_CORRELATION_PAYLOAD%\Core_Root - - - $(WorkItemDirectory)\ScenarioCorrelation - $(Python) %HELIX_CORRELATION_PAYLOAD%\performance\src\scenarios\crossgen2\test.py crossgen2 --single Microsoft.CodeAnalysis.CSharp.dll --core-root %HELIX_CORRELATION_PAYLOAD%\Core_Root - - - $(WorkItemDirectory)\ScenarioCorrelation - $(Python) %HELIX_CORRELATION_PAYLOAD%\performance\src\scenarios\crossgen2\test.py crossgen2 --single System.Private.CoreLib.dll --core-root %HELIX_CORRELATION_PAYLOAD%\Core_Root - - - $(WorkItemDirectory)\ScenarioCorrelation - $(Python) %HELIX_CORRELATION_PAYLOAD%\performance\src\scenarios\crossgen2\test.py crossgen2 --composite %HELIX_CORRELATION_PAYLOAD%\performance\src\scenarios\crossgen2\framework-r2r.dll.rsp --core-root %HELIX_CORRELATION_PAYLOAD%\Core_Root - 1:00 - - - \ No newline at end of file diff --git a/eng/common/performance/performance-setup.ps1 b/eng/common/performance/performance-setup.ps1 index 656c0bd9022c..8518ba733bf0 100644 --- a/eng/common/performance/performance-setup.ps1 +++ b/eng/common/performance/performance-setup.ps1 @@ -54,6 +54,11 @@ if ($Internal) { $HelixSourcePrefix = "official" } +if($Architecture -eq "arm64") +{ + $ExtraBenchmarkDotNetArguments = "--exclusion-filter *FannkuchRedux_9.FannkuchRedux_9*" +} + if($MonoInterpreter) { $ExtraBenchmarkDotNetArguments = "--category-exclusion-filter NoInterpreter" diff --git a/eng/common/performance/performance-setup.sh b/eng/common/performance/performance-setup.sh index c87cbf0fc232..94c2570646bf 100755 --- a/eng/common/performance/performance-setup.sh +++ b/eng/common/performance/performance-setup.sh @@ -24,6 +24,9 @@ run_from_perf_repo=false use_core_run=true use_baseline_core_run=true using_mono=false +wasm_runtime_loc= +using_wasm=false +use_latest_dotnet=false while (($# > 0)); do lowerI="$(echo $1 | awk '{print tolower($0)}')" @@ -70,7 +73,7 @@ while (($# > 0)); do ;; --kind) kind=$2 - configurations="CompliationMode=$compilation_mode RunKind=$kind" + configurations="CompilationMode=$compilation_mode RunKind=$kind" shift 2 ;; --runcategories) @@ -101,6 +104,10 @@ while (($# > 0)); do mono_dotnet=$2 shift 2 ;; + --wasm) + wasm_runtime_loc=$2 + shift 2 + ;; --compare) compare=true shift 1 @@ -109,7 +116,11 @@ while (($# > 0)); do configurations=$2 shift 2 ;; - --help) + --latestdotnet) + use_latest_dotnet=true + shift 1 + ;; + *) echo "Common settings:" echo " --corerootdirectory Directory where Core_Root exists, if running perf testing with --corerun" echo " --architecture Architecture of the testing being run" @@ -130,6 +141,8 @@ while (($# > 0)); do echo " --runcategories Related to csproj. Categories of benchmarks to run. Defaults to \"coreclr corefx\"" echo " --internal If the benchmarks are running as an official job." echo " --monodotnet Pass the path to the mono dotnet for mono performance testing." + echo " --wasm Path to the unpacked wasm runtime pack." + echo " --latestdotnet --dotnet-versions will not be specified. --dotnet-versions defaults to LKG version in global.json " echo "" exit 0 ;; @@ -141,7 +154,7 @@ if [ "$repository" == "dotnet/performance" ] || [ "$repository" == "dotnet-perfo fi if [ -z "$configurations" ]; then - configurations="CompliationMode=$compilation_mode" + configurations="CompilationMode=$compilation_mode" fi if [ -z "$core_root_directory" ]; then @@ -182,28 +195,36 @@ if [[ "$internal" == true ]]; then if [[ "$architecture" = "arm64" ]]; then queue=Ubuntu.1804.Arm64.Perf + extra_benchmark_dotnet_arguments="--exclusion-filter *FannkuchRedux_9.FannkuchRedux_9*" else queue=Ubuntu.1804.Amd64.Tiger.Perf fi fi -if [[ "$mono_dotnet" != "" ]]; then +if [[ "$mono_dotnet" != "" ]] && [[ "$monointerpreter" == "false" ]]; then configurations="$configurations LLVM=$llvm MonoInterpreter=$monointerpreter MonoAOT=$monoaot" + extra_benchmark_dotnet_arguments="$extra_benchmark_dotnet_arguments --category-exclusion-filter NoMono" fi -if [[ "$monointerpreter" == "true" ]]; then - extra_benchmark_dotnet_arguments="--category-exclusion-filter NoInterpreter" +if [[ "$wasm_runtime_loc" != "" ]]; then + configurations="CompilationMode=wasm RunKind=$kind" + extra_benchmark_dotnet_arguments="$extra_benchmark_dotnet_arguments --category-exclusion-filter NoInterpreter NoWASM NoMono" +fi + +if [[ "$mono_dotnet" != "" ]] && [[ "$monointerpreter" == "true" ]]; then + extra_benchmark_dotnet_arguments="$extra_benchmark_dotnet_arguments --category-exclusion-filter NoInterpreter NoMono" fi common_setup_arguments="--channel master --queue $queue --build-number $build_number --build-configs $configurations --architecture $architecture" setup_arguments="--repository https://github.com/$repository --branch $branch --get-perf-hash --commit-sha $commit_sha $common_setup_arguments" -# Get the tools section from the global.json. -# This grabs the LKG version number of dotnet and passes it to our scripts -dotnet_version=`cat global.json | python3 -c 'import json,sys;obj=json.load(sys.stdin);print(obj["tools"]["dotnet"])'` -setup_arguments="--dotnet-versions $dotnet_version $setup_arguments" - +if [[ "$use_latest_dotnet" = false ]]; then + # Get the tools section from the global.json. + # This grabs the LKG version number of dotnet and passes it to our scripts + dotnet_version=`cat global.json | python3 -c 'import json,sys;obj=json.load(sys.stdin);print(obj["tools"]["dotnet"])'` + setup_arguments="--dotnet-versions $dotnet_version $setup_arguments" +fi if [[ "$run_from_perf_repo" = true ]]; then payload_directory= @@ -217,6 +238,13 @@ else mv $docs_directory $workitem_directory fi +if [[ "$wasm_runtime_loc" != "" ]]; then + using_wasm=true + wasm_dotnet_path=$payload_directory/dotnet-wasm + mv $wasm_runtime_loc $wasm_dotnet_path + extra_benchmark_dotnet_arguments="$extra_benchmark_dotnet_arguments --wasmMainJS \$HELIX_CORRELATION_PAYLOAD/dotnet-wasm/runtime-test.js --wasmEngine /home/helixbot/.jsvu/v8 --customRuntimePack \$HELIX_CORRELATION_PAYLOAD/dotnet-wasm" +fi + if [[ "$mono_dotnet" != "" ]]; then using_mono=true mono_dotnet_path=$payload_directory/dotnet-mono @@ -247,7 +275,7 @@ Write-PipelineSetVariable -name "PerformanceDirectory" -value "$performance_dire Write-PipelineSetVariable -name "WorkItemDirectory" -value "$workitem_directory" -is_multi_job_variable false Write-PipelineSetVariable -name "Queue" -value "$queue" -is_multi_job_variable false Write-PipelineSetVariable -name "SetupArguments" -value "$setup_arguments" -is_multi_job_variable false -Write-PipelineSetVariable -name "Python" -value "$python3" -is_multi_job_variable false +Write-PipelineSetVariable -name "Python" -value "python3" -is_multi_job_variable false Write-PipelineSetVariable -name "PerfLabArguments" -value "$perflab_arguments" -is_multi_job_variable false Write-PipelineSetVariable -name "ExtraBenchmarkDotNetArguments" -value "$extra_benchmark_dotnet_arguments" -is_multi_job_variable false Write-PipelineSetVariable -name "BDNCategories" -value "$run_categories" -is_multi_job_variable false @@ -259,3 +287,4 @@ Write-PipelineSetVariable -name "Kind" -value "$kind" -is_multi_job_variable fal Write-PipelineSetVariable -name "_BuildConfig" -value "$architecture.$kind.$framework" -is_multi_job_variable false Write-PipelineSetVariable -name "Compare" -value "$compare" -is_multi_job_variable false Write-PipelineSetVariable -name "MonoDotnet" -value "$using_mono" -is_multi_job_variable false +Write-PipelineSetVariable -name "WasmDotnet" -value "$using_wasm" -is_multi_job_variable false diff --git a/eng/common/post-build/publish-using-darc.ps1 b/eng/common/post-build/publish-using-darc.ps1 index d22eb439c486..f044afe8fc63 100644 --- a/eng/common/post-build/publish-using-darc.ps1 +++ b/eng/common/post-build/publish-using-darc.ps1 @@ -1,12 +1,13 @@ param( [Parameter(Mandatory=$true)][int] $BuildId, + [Parameter(Mandatory=$true)][int] $PublishingInfraVersion, [Parameter(Mandatory=$true)][string] $AzdoToken, [Parameter(Mandatory=$true)][string] $MaestroToken, [Parameter(Mandatory=$false)][string] $MaestroApiEndPoint = 'https://maestro-prod.westus2.cloudapp.azure.com', [Parameter(Mandatory=$true)][string] $WaitPublishingFinish, - [Parameter(Mandatory=$true)][string] $EnableSourceLinkValidation, - [Parameter(Mandatory=$true)][string] $EnableSigningValidation, - [Parameter(Mandatory=$true)][string] $EnableNugetValidation, + [Parameter(Mandatory=$false)][string] $EnableSourceLinkValidation, + [Parameter(Mandatory=$false)][string] $EnableSigningValidation, + [Parameter(Mandatory=$false)][string] $EnableNugetValidation, [Parameter(Mandatory=$true)][string] $PublishInstallersAndChecksums, [Parameter(Mandatory=$false)][string] $ArtifactsPublishingAdditionalParameters, [Parameter(Mandatory=$false)][string] $SigningValidationAdditionalParameters @@ -14,7 +15,8 @@ param( try { . $PSScriptRoot\post-build-utils.ps1 - . $PSScriptRoot\..\darc-init.ps1 + # Hard coding darc version till the next arcade-services roll out, cos this version has required API changes for darc add-build-to-channel + . $PSScriptRoot\..\darc-init.ps1 -darcVersion "1.1.0-beta.20418.1" $optionalParams = [System.Collections.ArrayList]::new() @@ -49,12 +51,13 @@ try { } & darc add-build-to-channel ` - --id $buildId ` - --default-channels ` - --source-branch master ` - --azdev-pat $AzdoToken ` - --bar-uri $MaestroApiEndPoint ` - --password $MaestroToken ` + --id $buildId ` + --publishing-infra-version $PublishingInfraVersion ` + --default-channels ` + --source-branch master ` + --azdev-pat $AzdoToken ` + --bar-uri $MaestroApiEndPoint ` + --password $MaestroToken ` @optionalParams if ($LastExitCode -ne 0) { diff --git a/eng/common/sdk-task.ps1 b/eng/common/sdk-task.ps1 index 32ad5c7e91cb..e159c6f1848d 100644 --- a/eng/common/sdk-task.ps1 +++ b/eng/common/sdk-task.ps1 @@ -42,6 +42,7 @@ function Build([string]$target) { /p:Configuration=$configuration ` /p:RepoRoot=$RepoRoot ` /p:BaseIntermediateOutputPath=$outputPath ` + /v:$verbosity ` @properties } diff --git a/eng/common/templates/post-build/channels/generic-internal-channel.yml b/eng/common/templates/post-build/channels/generic-internal-channel.yml index 12db36ebe74f..59eb93a407cb 100644 --- a/eng/common/templates/post-build/channels/generic-internal-channel.yml +++ b/eng/common/templates/post-build/channels/generic-internal-channel.yml @@ -1,4 +1,6 @@ parameters: + BARBuildId: '' + PromoteToChannelIds: '' artifactsPublishingAdditionalParameters: '' dependsOn: - Validate @@ -19,6 +21,9 @@ stages: displayName: ${{ parameters.channelName }} Publishing jobs: - template: ../setup-maestro-vars.yml + parameters: + BARBuildId: ${{ parameters.BARBuildId }} + PromoteToChannelIds: ${{ parameters.PromoteToChannelIds }} - job: publish_symbols displayName: Symbol Publishing @@ -138,6 +143,7 @@ stages: inputs: filePath: eng\common\sdk-task.ps1 arguments: -task PublishArtifactsInManifest -restore -msbuildEngine dotnet + /p:PublishingInfraVersion=2 /p:IsStableBuild=$(IsStableBuild) /p:IsInternalBuild=$(IsInternalBuild) /p:RepositoryName=$(Build.Repository.Name) diff --git a/eng/common/templates/post-build/channels/generic-public-channel.yml b/eng/common/templates/post-build/channels/generic-public-channel.yml index bf98d990e88e..7e80a621a303 100644 --- a/eng/common/templates/post-build/channels/generic-public-channel.yml +++ b/eng/common/templates/post-build/channels/generic-public-channel.yml @@ -1,4 +1,6 @@ parameters: + BARBuildId: '' + PromoteToChannelIds: '' artifactsPublishingAdditionalParameters: '' dependsOn: - Validate @@ -21,6 +23,9 @@ stages: displayName: ${{ parameters.channelName }} Publishing jobs: - template: ../setup-maestro-vars.yml + parameters: + BARBuildId: ${{ parameters.BARBuildId }} + PromoteToChannelIds: ${{ parameters.PromoteToChannelIds }} - job: publish_symbols displayName: Symbol Publishing @@ -137,6 +142,7 @@ stages: inputs: filePath: eng\common\sdk-task.ps1 arguments: -task PublishArtifactsInManifest -restore -msbuildEngine dotnet + /p:PublishingInfraVersion=2 /p:ArtifactsCategory=$(ArtifactsCategory) /p:IsStableBuild=$(IsStableBuild) /p:IsInternalBuild=$(IsInternalBuild) diff --git a/eng/common/templates/post-build/post-build.yml b/eng/common/templates/post-build/post-build.yml index 0e79a546b553..030808632f05 100644 --- a/eng/common/templates/post-build/post-build.yml +++ b/eng/common/templates/post-build/post-build.yml @@ -1,13 +1,20 @@ parameters: + # Which publishing infra should be used. THIS SHOULD MATCH THE VERSION ON THE BUILD MANIFEST. + # Publishing V2 accepts optionally outlining the publishing stages - default is inline. + # Publishing V3 DOES NOT accept inlining the publishing stages. + publishingInfraVersion: 2 # When set to true the publishing templates from the repo will be used # otherwise Darc add-build-to-channel will be used to trigger the promotion pipeline inline: true # Only used if inline==false. When set to true will stall the current build until - # the Promotion Pipeline build finishes. Otherwise, the current build continue + # the Promotion Pipeline build finishes. Otherwise, the current build will continue # execution concurrently with the promotion build. waitPublishingFinish: true + BARBuildId: '' + PromoteToChannelIds: '' + enableSourceLinkValidation: false enableSigningValidation: true enableSymbolValidation: false @@ -39,6 +46,7 @@ parameters: NetEngLatestChannelId: 2 NetEngValidationChannelId: 9 NetDev5ChannelId: 131 + NetDev6ChannelId: 1296 GeneralTestingChannelId: 529 NETCoreToolingDevChannelId: 548 NETCoreToolingReleaseChannelId: 549 @@ -46,7 +54,6 @@ parameters: NETCoreExperimentalChannelId: 562 NetEngServicesIntChannelId: 678 NetEngServicesProdChannelId: 679 - Net5Preview7ChannelId: 1065 Net5Preview8ChannelId: 1155 Net5RC1ChannelId: 1157 NetCoreSDK313xxChannelId: 759 @@ -59,14 +66,183 @@ parameters: VSMasterChannelId: 1012 stages: -- ${{ if ne(parameters.inline, 'true') }}: +- stage: Validate + dependsOn: ${{ parameters.validateDependsOn }} + displayName: Validate Build Assets + variables: + - template: common-variables.yml + jobs: + - template: setup-maestro-vars.yml + parameters: + BARBuildId: ${{ parameters.BARBuildId }} + PromoteToChannelIds: ${{ parameters.PromoteToChannelIds }} + + - job: + displayName: Post-build Checks + dependsOn: setupMaestroVars + variables: + - name: TargetChannels + value: $[ dependencies.setupMaestroVars.outputs['setReleaseVars.TargetChannels'] ] + pool: + vmImage: 'windows-2019' + steps: + - task: PowerShell@2 + displayName: Maestro Channels Consistency + inputs: + filePath: $(Build.SourcesDirectory)/eng/common/post-build/check-channel-consistency.ps1 + arguments: -PromoteToChannels "$(TargetChannels)" + -AvailableChannelIds ${{parameters.NetEngLatestChannelId}},${{parameters.NetEngValidationChannelId}},${{parameters.NetDev5ChannelId}},${{parameters.NetDev6ChannelId}},${{parameters.GeneralTestingChannelId}},${{parameters.NETCoreToolingDevChannelId}},${{parameters.NETCoreToolingReleaseChannelId}},${{parameters.NETInternalToolingChannelId}},${{parameters.NETCoreExperimentalChannelId}},${{parameters.NetEngServicesIntChannelId}},${{parameters.NetEngServicesProdChannelId}},${{parameters.Net5Preview8ChannelId}},${{parameters.Net5RC1ChannelId}},${{parameters.NetCoreSDK313xxChannelId}},${{parameters.NetCoreSDK313xxInternalChannelId}},${{parameters.NetCoreSDK314xxChannelId}},${{parameters.NetCoreSDK314xxInternalChannelId}},${{parameters.VS166ChannelId}},${{parameters.VS167ChannelId}},${{parameters.VS168ChannelId}},${{parameters.VSMasterChannelId}} + + - job: + displayName: NuGet Validation + dependsOn: setupMaestroVars + condition: eq( ${{ parameters.enableNugetValidation }}, 'true') + pool: + vmImage: 'windows-2019' + variables: + - name: AzDOProjectName + value: $[ dependencies.setupMaestroVars.outputs['setReleaseVars.AzDOProjectName'] ] + - name: AzDOPipelineId + value: $[ dependencies.setupMaestroVars.outputs['setReleaseVars.AzDOPipelineId'] ] + - name: AzDOBuildId + value: $[ dependencies.setupMaestroVars.outputs['setReleaseVars.AzDOBuildId'] ] + steps: + - task: DownloadBuildArtifacts@0 + displayName: Download Package Artifacts + inputs: + buildType: specific + buildVersionToDownload: specific + project: $(AzDOProjectName) + pipeline: $(AzDOPipelineId) + buildId: $(AzDOBuildId) + artifactName: PackageArtifacts + + - task: PowerShell@2 + displayName: Validate + inputs: + filePath: $(Build.SourcesDirectory)/eng/common/post-build/nuget-validation.ps1 + arguments: -PackagesPath $(Build.ArtifactStagingDirectory)/PackageArtifacts/ + -ToolDestinationPath $(Agent.BuildDirectory)/Extract/ + + - job: + displayName: Signing Validation + dependsOn: setupMaestroVars + condition: eq( ${{ parameters.enableSigningValidation }}, 'true') + variables: + - template: common-variables.yml + - name: AzDOProjectName + value: $[ dependencies.setupMaestroVars.outputs['setReleaseVars.AzDOProjectName'] ] + - name: AzDOPipelineId + value: $[ dependencies.setupMaestroVars.outputs['setReleaseVars.AzDOPipelineId'] ] + - name: AzDOBuildId + value: $[ dependencies.setupMaestroVars.outputs['setReleaseVars.AzDOBuildId'] ] + pool: + vmImage: 'windows-2019' + steps: + - ${{ if eq(parameters.useBuildManifest, true) }}: + - task: DownloadBuildArtifacts@0 + displayName: Download build manifest + inputs: + buildType: specific + buildVersionToDownload: specific + project: $(AzDOProjectName) + pipeline: $(AzDOPipelineId) + buildId: $(AzDOBuildId) + artifactName: BuildManifests + - task: DownloadBuildArtifacts@0 + displayName: Download Package Artifacts + inputs: + buildType: specific + buildVersionToDownload: specific + project: $(AzDOProjectName) + pipeline: $(AzDOPipelineId) + buildId: $(AzDOBuildId) + artifactName: PackageArtifacts + + # This is necessary whenever we want to publish/restore to an AzDO private feed + # Since sdk-task.ps1 tries to restore packages we need to do this authentication here + # otherwise it'll complain about accessing a private feed. + - task: NuGetAuthenticate@0 + displayName: 'Authenticate to AzDO Feeds' + + - task: PowerShell@2 + displayName: Enable cross-org publishing + inputs: + filePath: eng\common\enable-cross-org-publishing.ps1 + arguments: -token $(dn-bot-dnceng-artifact-feeds-rw) + + # Signing validation will optionally work with the buildmanifest file which is downloaded from + # Azure DevOps above. + - task: PowerShell@2 + displayName: Validate + inputs: + filePath: eng\common\sdk-task.ps1 + arguments: -task SigningValidation -restore -msbuildEngine vs + /p:PackageBasePath='$(Build.ArtifactStagingDirectory)/PackageArtifacts' + /p:SignCheckExclusionsFile='$(Build.SourcesDirectory)/eng/SignCheckExclusionsFile.txt' + ${{ parameters.signingValidationAdditionalParameters }} + + - template: ../steps/publish-logs.yml + parameters: + StageLabel: 'Validation' + JobLabel: 'Signing' + + - job: + displayName: SourceLink Validation + dependsOn: setupMaestroVars + condition: eq( ${{ parameters.enableSourceLinkValidation }}, 'true') + variables: + - template: common-variables.yml + - name: AzDOProjectName + value: $[ dependencies.setupMaestroVars.outputs['setReleaseVars.AzDOProjectName'] ] + - name: AzDOPipelineId + value: $[ dependencies.setupMaestroVars.outputs['setReleaseVars.AzDOPipelineId'] ] + - name: AzDOBuildId + value: $[ dependencies.setupMaestroVars.outputs['setReleaseVars.AzDOBuildId'] ] + pool: + vmImage: 'windows-2019' + steps: + - task: DownloadBuildArtifacts@0 + displayName: Download Blob Artifacts + inputs: + buildType: specific + buildVersionToDownload: specific + project: $(AzDOProjectName) + pipeline: $(AzDOPipelineId) + buildId: $(AzDOBuildId) + artifactName: BlobArtifacts + + - task: PowerShell@2 + displayName: Validate + inputs: + filePath: $(Build.SourcesDirectory)/eng/common/post-build/sourcelink-validation.ps1 + arguments: -InputPath $(Build.ArtifactStagingDirectory)/BlobArtifacts/ + -ExtractPath $(Agent.BuildDirectory)/Extract/ + -GHRepoName $(Build.Repository.Name) + -GHCommit $(Build.SourceVersion) + -SourcelinkCliVersion $(SourceLinkCLIVersion) + continueOnError: true + + - template: /eng/common/templates/job/execute-sdl.yml + parameters: + enable: ${{ parameters.SDLValidationParameters.enable }} + dependsOn: setupMaestroVars + additionalParameters: ${{ parameters.SDLValidationParameters.params }} + continueOnError: ${{ parameters.SDLValidationParameters.continueOnError }} + artifactNames: ${{ parameters.SDLValidationParameters.artifactNames }} + downloadArtifacts: ${{ parameters.SDLValidationParameters.downloadArtifacts }} + +- ${{ if or(ge(parameters.publishingInfraVersion, 3), eq(parameters.inline, 'false')) }}: - stage: publish_using_darc - dependsOn: ${{ parameters.validateDependsOn }} + dependsOn: Validate displayName: Publish using Darc variables: - template: common-variables.yml jobs: - template: setup-maestro-vars.yml + parameters: + BARBuildId: ${{ parameters.BARBuildId }} + PromoteToChannelIds: ${{ parameters.PromoteToChannelIds }} - job: displayName: Publish Using Darc @@ -82,182 +258,17 @@ stages: inputs: filePath: $(Build.SourcesDirectory)/eng/common/post-build/publish-using-darc.ps1 arguments: -BuildId $(BARBuildId) + -PublishingInfraVersion ${{ parameters.PublishingInfraVersion }} -AzdoToken '$(publishing-dnceng-devdiv-code-r-build-re)' -MaestroToken '$(MaestroApiAccessToken)' -WaitPublishingFinish ${{ parameters.waitPublishingFinish }} - -EnableSourceLinkValidation ${{ parameters.enableSourceLinkValidation }} - -EnableSigningValidation ${{ parameters.enableSourceLinkValidation }} - -EnableNugetValidation ${{ parameters.enableSourceLinkValidation }} -PublishInstallersAndChecksums ${{ parameters.publishInstallersAndChecksums }} - -ArtifactsPublishingAdditionalParameters '${{ parameters.artifactsPublishingAdditionalParameters }}' - -SigningValidationAdditionalParameters '${{ parameters.signingValidationAdditionalParameters }}' - -- ${{ if eq(parameters.inline, 'true') }}: - - stage: Validate - dependsOn: ${{ parameters.validateDependsOn }} - displayName: Validate Build Assets - variables: - - template: common-variables.yml - jobs: - - template: setup-maestro-vars.yml - - - job: - displayName: Post-build Checks - dependsOn: setupMaestroVars - variables: - - name: TargetChannels - value: $[ dependencies.setupMaestroVars.outputs['setReleaseVars.TargetChannels'] ] - pool: - vmImage: 'windows-2019' - steps: - - task: PowerShell@2 - displayName: Maestro Channels Consistency - inputs: - filePath: $(Build.SourcesDirectory)/eng/common/post-build/check-channel-consistency.ps1 - arguments: -PromoteToChannels "$(TargetChannels)" - -AvailableChannelIds ${{parameters.NetEngLatestChannelId}},${{parameters.NetEngValidationChannelId}},${{parameters.NetDev5ChannelId}},${{parameters.GeneralTestingChannelId}},${{parameters.NETCoreToolingDevChannelId}},${{parameters.NETCoreToolingReleaseChannelId}},${{parameters.NETInternalToolingChannelId}},${{parameters.NETCoreExperimentalChannelId}},${{parameters.NetEngServicesIntChannelId}},${{parameters.NetEngServicesProdChannelId}},${{parameters.Net5Preview7ChannelId}},${{parameters.Net5Preview8ChannelId}},${{parameters.Net5RC1ChannelId}},${{parameters.NetCoreSDK313xxChannelId}},${{parameters.NetCoreSDK313xxInternalChannelId}},${{parameters.NetCoreSDK314xxChannelId}},${{parameters.NetCoreSDK314xxInternalChannelId}},${{parameters.VS166ChannelId}},${{parameters.VS167ChannelId}},${{parameters.VS168ChannelId}},${{parameters.VSMasterChannelId}} - - - job: - displayName: NuGet Validation - dependsOn: setupMaestroVars - condition: eq( ${{ parameters.enableNugetValidation }}, 'true') - pool: - vmImage: 'windows-2019' - variables: - - name: AzDOProjectName - value: $[ dependencies.setupMaestroVars.outputs['setReleaseVars.AzDOProjectName'] ] - - name: AzDOPipelineId - value: $[ dependencies.setupMaestroVars.outputs['setReleaseVars.AzDOPipelineId'] ] - - name: AzDOBuildId - value: $[ dependencies.setupMaestroVars.outputs['setReleaseVars.AzDOBuildId'] ] - steps: - - task: DownloadBuildArtifacts@0 - displayName: Download Package Artifacts - inputs: - buildType: specific - buildVersionToDownload: specific - project: $(AzDOProjectName) - pipeline: $(AzDOPipelineId) - buildId: $(AzDOBuildId) - artifactName: PackageArtifacts - - - task: PowerShell@2 - displayName: Validate - inputs: - filePath: $(Build.SourcesDirectory)/eng/common/post-build/nuget-validation.ps1 - arguments: -PackagesPath $(Build.ArtifactStagingDirectory)/PackageArtifacts/ - -ToolDestinationPath $(Agent.BuildDirectory)/Extract/ - - - job: - displayName: Signing Validation - dependsOn: setupMaestroVars - condition: eq( ${{ parameters.enableSigningValidation }}, 'true') - variables: - - template: common-variables.yml - - name: AzDOProjectName - value: $[ dependencies.setupMaestroVars.outputs['setReleaseVars.AzDOProjectName'] ] - - name: AzDOPipelineId - value: $[ dependencies.setupMaestroVars.outputs['setReleaseVars.AzDOPipelineId'] ] - - name: AzDOBuildId - value: $[ dependencies.setupMaestroVars.outputs['setReleaseVars.AzDOBuildId'] ] - pool: - vmImage: 'windows-2019' - steps: - - ${{ if eq(parameters.useBuildManifest, true) }}: - - task: DownloadBuildArtifacts@0 - displayName: Download build manifest - inputs: - buildType: specific - buildVersionToDownload: specific - project: $(AzDOProjectName) - pipeline: $(AzDOPipelineId) - buildId: $(AzDOBuildId) - artifactName: BuildManifests - - task: DownloadBuildArtifacts@0 - displayName: Download Package Artifacts - inputs: - buildType: specific - buildVersionToDownload: specific - project: $(AzDOProjectName) - pipeline: $(AzDOPipelineId) - buildId: $(AzDOBuildId) - artifactName: PackageArtifacts - - # This is necessary whenever we want to publish/restore to an AzDO private feed - # Since sdk-task.ps1 tries to restore packages we need to do this authentication here - # otherwise it'll complain about accessing a private feed. - - task: NuGetAuthenticate@0 - displayName: 'Authenticate to AzDO Feeds' - - - task: PowerShell@2 - displayName: Enable cross-org publishing - inputs: - filePath: eng\common\enable-cross-org-publishing.ps1 - arguments: -token $(dn-bot-dnceng-artifact-feeds-rw) - - # Signing validation will optionally work with the buildmanifest file which is downloaded from - # Azure DevOps above. - - task: PowerShell@2 - displayName: Validate - inputs: - filePath: eng\common\sdk-task.ps1 - arguments: -task SigningValidation -restore -msbuildEngine vs - /p:PackageBasePath='$(Build.ArtifactStagingDirectory)/PackageArtifacts' - /p:SignCheckExclusionsFile='$(Build.SourcesDirectory)/eng/SignCheckExclusionsFile.txt' - ${{ parameters.signingValidationAdditionalParameters }} - - - template: ../steps/publish-logs.yml - parameters: - StageLabel: 'Validation' - JobLabel: 'Signing' - - - job: - displayName: SourceLink Validation - dependsOn: setupMaestroVars - condition: eq( ${{ parameters.enableSourceLinkValidation }}, 'true') - variables: - - template: common-variables.yml - - name: AzDOProjectName - value: $[ dependencies.setupMaestroVars.outputs['setReleaseVars.AzDOProjectName'] ] - - name: AzDOPipelineId - value: $[ dependencies.setupMaestroVars.outputs['setReleaseVars.AzDOPipelineId'] ] - - name: AzDOBuildId - value: $[ dependencies.setupMaestroVars.outputs['setReleaseVars.AzDOBuildId'] ] - pool: - vmImage: 'windows-2019' - steps: - - task: DownloadBuildArtifacts@0 - displayName: Download Blob Artifacts - inputs: - buildType: specific - buildVersionToDownload: specific - project: $(AzDOProjectName) - pipeline: $(AzDOPipelineId) - buildId: $(AzDOBuildId) - artifactName: BlobArtifacts - - - task: PowerShell@2 - displayName: Validate - inputs: - filePath: $(Build.SourcesDirectory)/eng/common/post-build/sourcelink-validation.ps1 - arguments: -InputPath $(Build.ArtifactStagingDirectory)/BlobArtifacts/ - -ExtractPath $(Agent.BuildDirectory)/Extract/ - -GHRepoName $(Build.Repository.Name) - -GHCommit $(Build.SourceVersion) - -SourcelinkCliVersion $(SourceLinkCLIVersion) - continueOnError: true - - - template: /eng/common/templates/job/execute-sdl.yml - parameters: - enable: ${{ parameters.SDLValidationParameters.enable }} - dependsOn: setupMaestroVars - additionalParameters: ${{ parameters.SDLValidationParameters.params }} - continueOnError: ${{ parameters.SDLValidationParameters.continueOnError }} - artifactNames: ${{ parameters.SDLValidationParameters.artifactNames }} - downloadArtifacts: ${{ parameters.SDLValidationParameters.downloadArtifacts }} +- ${{ if and(le(parameters.publishingInfraVersion, 2), eq(parameters.inline, 'true')) }}: - template: \eng\common\templates\post-build\channels\generic-public-channel.yml parameters: + BARBuildId: ${{ parameters.BARBuildId }} + PromoteToChannelIds: ${{ parameters.PromoteToChannelIds }} artifactsPublishingAdditionalParameters: ${{ parameters.artifactsPublishingAdditionalParameters }} dependsOn: ${{ parameters.publishDependsOn }} publishInstallersAndChecksums: ${{ parameters.publishInstallersAndChecksums }} @@ -272,20 +283,24 @@ stages: - template: \eng\common\templates\post-build\channels\generic-public-channel.yml parameters: + BARBuildId: ${{ parameters.BARBuildId }} + PromoteToChannelIds: ${{ parameters.PromoteToChannelIds }} artifactsPublishingAdditionalParameters: ${{ parameters.artifactsPublishingAdditionalParameters }} dependsOn: ${{ parameters.publishDependsOn }} publishInstallersAndChecksums: ${{ parameters.publishInstallersAndChecksums }} symbolPublishingAdditionalParameters: ${{ parameters.symbolPublishingAdditionalParameters }} - stageName: 'Net5_Preview7_Publish' - channelName: '.NET 5 Preview 7' - akaMSChannelName: 'net5/preview7' - channelId: ${{ parameters.Net5Preview7ChannelId }} - transportFeed: 'https://pkgs.dev.azure.com/dnceng/public/_packaging/dotnet5-transport/nuget/v3/index.json' - shippingFeed: 'https://pkgs.dev.azure.com/dnceng/public/_packaging/dotnet5/nuget/v3/index.json' - symbolsFeed: 'https://pkgs.dev.azure.com/dnceng/public/_packaging/dotnet5-symbols/nuget/v3/index.json' + stageName: 'NetCore_Dev6_Publish' + channelName: '.NET 6 Dev' + akaMSChannelName: 'net6/dev' + channelId: ${{ parameters.NetDev6ChannelId }} + transportFeed: 'https://pkgs.dev.azure.com/dnceng/public/_packaging/dotnet6-transport/nuget/v3/index.json' + shippingFeed: 'https://pkgs.dev.azure.com/dnceng/public/_packaging/dotnet6/nuget/v3/index.json' + symbolsFeed: 'https://pkgs.dev.azure.com/dnceng/public/_packaging/dotnet6-symbols/nuget/v3/index.json' - template: \eng\common\templates\post-build\channels\generic-internal-channel.yml parameters: + BARBuildId: ${{ parameters.BARBuildId }} + PromoteToChannelIds: ${{ parameters.PromoteToChannelIds }} artifactsPublishingAdditionalParameters: ${{ parameters.artifactsPublishingAdditionalParameters }} dependsOn: ${{ parameters.publishDependsOn }} publishInstallersAndChecksums: ${{ parameters.publishInstallersAndChecksums }} @@ -298,8 +313,10 @@ stages: shippingFeed: 'https://pkgs.dev.azure.com/dnceng/internal/_packaging/dotnet5-internal/nuget/v3/index.json' symbolsFeed: 'https://pkgs.dev.azure.com/dnceng/internal/_packaging/dotnet5-internal-symbols/nuget/v3/index.json' - - template: \eng\common\templates\post-build\channels\generic-internal-channel.yml + - template: \eng\common\templates\post-build\channels\generic-public-channel.yml parameters: + BARBuildId: ${{ parameters.BARBuildId }} + PromoteToChannelIds: ${{ parameters.PromoteToChannelIds }} artifactsPublishingAdditionalParameters: ${{ parameters.artifactsPublishingAdditionalParameters }} dependsOn: ${{ parameters.publishDependsOn }} publishInstallersAndChecksums: ${{ parameters.publishInstallersAndChecksums }} @@ -308,12 +325,14 @@ stages: channelName: '.NET 5 RC 1' akaMSChannelName: 'net5/rc1' channelId: ${{ parameters.Net5RC1ChannelId }} - transportFeed: 'https://pkgs.dev.azure.com/dnceng/internal/_packaging/dotnet5-internal-transport/nuget/v3/index.json' - shippingFeed: 'https://pkgs.dev.azure.com/dnceng/internal/_packaging/dotnet5-internal/nuget/v3/index.json' - symbolsFeed: 'https://pkgs.dev.azure.com/dnceng/internal/_packaging/dotnet5-internal-symbols/nuget/v3/index.json' + transportFeed: 'https://pkgs.dev.azure.com/dnceng/public/_packaging/dotnet5-transport/nuget/v3/index.json' + shippingFeed: 'https://pkgs.dev.azure.com/dnceng/public/_packaging/dotnet5/nuget/v3/index.json' + symbolsFeed: 'https://pkgs.dev.azure.com/dnceng/public/_packaging/dotnet5-symbols/nuget/v3/index.json' - template: \eng\common\templates\post-build\channels\generic-public-channel.yml parameters: + BARBuildId: ${{ parameters.BARBuildId }} + PromoteToChannelIds: ${{ parameters.PromoteToChannelIds }} artifactsPublishingAdditionalParameters: ${{ parameters.artifactsPublishingAdditionalParameters }} dependsOn: ${{ parameters.publishDependsOn }} publishInstallersAndChecksums: ${{ parameters.publishInstallersAndChecksums }} @@ -328,6 +347,8 @@ stages: - template: \eng\common\templates\post-build\channels\generic-public-channel.yml parameters: + BARBuildId: ${{ parameters.BARBuildId }} + PromoteToChannelIds: ${{ parameters.PromoteToChannelIds }} artifactsPublishingAdditionalParameters: ${{ parameters.artifactsPublishingAdditionalParameters }} dependsOn: ${{ parameters.publishDependsOn }} publishInstallersAndChecksums: ${{ parameters.publishInstallersAndChecksums }} @@ -342,6 +363,8 @@ stages: - template: \eng\common\templates\post-build\channels\generic-public-channel.yml parameters: + BARBuildId: ${{ parameters.BARBuildId }} + PromoteToChannelIds: ${{ parameters.PromoteToChannelIds }} artifactsPublishingAdditionalParameters: ${{ parameters.artifactsPublishingAdditionalParameters }} dependsOn: ${{ parameters.publishDependsOn }} publishInstallersAndChecksums: ${{ parameters.publishInstallersAndChecksums }} @@ -356,6 +379,8 @@ stages: - template: \eng\common\templates\post-build\channels\generic-public-channel.yml parameters: + BARBuildId: ${{ parameters.BARBuildId }} + PromoteToChannelIds: ${{ parameters.PromoteToChannelIds }} artifactsPublishingAdditionalParameters: ${{ parameters.artifactsPublishingAdditionalParameters }} dependsOn: ${{ parameters.publishDependsOn }} publishInstallersAndChecksums: ${{ parameters.publishInstallersAndChecksums }} @@ -369,6 +394,8 @@ stages: - template: \eng\common\templates\post-build\channels\generic-public-channel.yml parameters: + BARBuildId: ${{ parameters.BARBuildId }} + PromoteToChannelIds: ${{ parameters.PromoteToChannelIds }} artifactsPublishingAdditionalParameters: ${{ parameters.artifactsPublishingAdditionalParameters }} dependsOn: ${{ parameters.publishDependsOn }} publishInstallersAndChecksums: ${{ parameters.publishInstallersAndChecksums }} @@ -382,6 +409,8 @@ stages: - template: \eng\common\templates\post-build\channels\generic-internal-channel.yml parameters: + BARBuildId: ${{ parameters.BARBuildId }} + PromoteToChannelIds: ${{ parameters.PromoteToChannelIds }} artifactsPublishingAdditionalParameters: ${{ parameters.artifactsPublishingAdditionalParameters }} dependsOn: ${{ parameters.publishDependsOn }} publishInstallersAndChecksums: ${{ parameters.publishInstallersAndChecksums }} @@ -395,6 +424,8 @@ stages: - template: \eng\common\templates\post-build\channels\generic-public-channel.yml parameters: + BARBuildId: ${{ parameters.BARBuildId }} + PromoteToChannelIds: ${{ parameters.PromoteToChannelIds }} artifactsPublishingAdditionalParameters: ${{ parameters.artifactsPublishingAdditionalParameters }} dependsOn: ${{ parameters.publishDependsOn }} publishInstallersAndChecksums: ${{ parameters.publishInstallersAndChecksums }} @@ -408,6 +439,8 @@ stages: - template: \eng\common\templates\post-build\channels\generic-public-channel.yml parameters: + BARBuildId: ${{ parameters.BARBuildId }} + PromoteToChannelIds: ${{ parameters.PromoteToChannelIds }} artifactsPublishingAdditionalParameters: ${{ parameters.artifactsPublishingAdditionalParameters }} dependsOn: ${{ parameters.publishDependsOn }} publishInstallersAndChecksums: ${{ parameters.publishInstallersAndChecksums }} @@ -421,6 +454,8 @@ stages: - template: \eng\common\templates\post-build\channels\generic-public-channel.yml parameters: + BARBuildId: ${{ parameters.BARBuildId }} + PromoteToChannelIds: ${{ parameters.PromoteToChannelIds }} artifactsPublishingAdditionalParameters: ${{ parameters.artifactsPublishingAdditionalParameters }} dependsOn: ${{ parameters.publishDependsOn }} publishInstallersAndChecksums: ${{ parameters.publishInstallersAndChecksums }} @@ -434,6 +469,8 @@ stages: - template: \eng\common\templates\post-build\channels\generic-public-channel.yml parameters: + BARBuildId: ${{ parameters.BARBuildId }} + PromoteToChannelIds: ${{ parameters.PromoteToChannelIds }} artifactsPublishingAdditionalParameters: ${{ parameters.artifactsPublishingAdditionalParameters }} dependsOn: ${{ parameters.publishDependsOn }} publishInstallersAndChecksums: ${{ parameters.publishInstallersAndChecksums }} @@ -447,6 +484,8 @@ stages: - template: \eng\common\templates\post-build\channels\generic-internal-channel.yml parameters: + BARBuildId: ${{ parameters.BARBuildId }} + PromoteToChannelIds: ${{ parameters.PromoteToChannelIds }} artifactsPublishingAdditionalParameters: ${{ parameters.artifactsPublishingAdditionalParameters }} dependsOn: ${{ parameters.publishDependsOn }} publishInstallersAndChecksums: ${{ parameters.publishInstallersAndChecksums }} @@ -460,6 +499,8 @@ stages: - template: \eng\common\templates\post-build\channels\generic-public-channel.yml parameters: + BARBuildId: ${{ parameters.BARBuildId }} + PromoteToChannelIds: ${{ parameters.PromoteToChannelIds }} artifactsPublishingAdditionalParameters: ${{ parameters.artifactsPublishingAdditionalParameters }} dependsOn: ${{ parameters.publishDependsOn }} publishInstallersAndChecksums: ${{ parameters.publishInstallersAndChecksums }} @@ -473,6 +514,8 @@ stages: - template: \eng\common\templates\post-build\channels\generic-internal-channel.yml parameters: + BARBuildId: ${{ parameters.BARBuildId }} + PromoteToChannelIds: ${{ parameters.PromoteToChannelIds }} artifactsPublishingAdditionalParameters: ${{ parameters.artifactsPublishingAdditionalParameters }} dependsOn: ${{ parameters.publishDependsOn }} publishInstallersAndChecksums: ${{ parameters.publishInstallersAndChecksums }} @@ -486,6 +529,8 @@ stages: - template: \eng\common\templates\post-build\channels\generic-public-channel.yml parameters: + BARBuildId: ${{ parameters.BARBuildId }} + PromoteToChannelIds: ${{ parameters.PromoteToChannelIds }} artifactsPublishingAdditionalParameters: ${{ parameters.artifactsPublishingAdditionalParameters }} dependsOn: ${{ parameters.publishDependsOn }} publishInstallersAndChecksums: ${{ parameters.publishInstallersAndChecksums }} @@ -499,6 +544,8 @@ stages: - template: \eng\common\templates\post-build\channels\generic-public-channel.yml parameters: + BARBuildId: ${{ parameters.BARBuildId }} + PromoteToChannelIds: ${{ parameters.PromoteToChannelIds }} artifactsPublishingAdditionalParameters: ${{ parameters.artifactsPublishingAdditionalParameters }} dependsOn: ${{ parameters.publishDependsOn }} publishInstallersAndChecksums: ${{ parameters.publishInstallersAndChecksums }} @@ -512,6 +559,8 @@ stages: - template: \eng\common\templates\post-build\channels\generic-public-channel.yml parameters: + BARBuildId: ${{ parameters.BARBuildId }} + PromoteToChannelIds: ${{ parameters.PromoteToChannelIds }} artifactsPublishingAdditionalParameters: ${{ parameters.artifactsPublishingAdditionalParameters }} dependsOn: ${{ parameters.publishDependsOn }} publishInstallersAndChecksums: ${{ parameters.publishInstallersAndChecksums }} @@ -525,6 +574,8 @@ stages: - template: \eng\common\templates\post-build\channels\generic-public-channel.yml parameters: + BARBuildId: ${{ parameters.BARBuildId }} + PromoteToChannelIds: ${{ parameters.PromoteToChannelIds }} artifactsPublishingAdditionalParameters: ${{ parameters.artifactsPublishingAdditionalParameters }} dependsOn: ${{ parameters.publishDependsOn }} publishInstallersAndChecksums: ${{ parameters.publishInstallersAndChecksums }} diff --git a/eng/common/templates/post-build/setup-maestro-vars.yml b/eng/common/templates/post-build/setup-maestro-vars.yml index b3d29d4498e3..d0cbfb6c6ffd 100644 --- a/eng/common/templates/post-build/setup-maestro-vars.yml +++ b/eng/common/templates/post-build/setup-maestro-vars.yml @@ -1,25 +1,23 @@ +parameters: + BARBuildId: '' + PromoteToChannelIds: '' + jobs: - job: setupMaestroVars displayName: Setup Maestro Vars variables: - template: common-variables.yml - - name: BuildId - value: $[ coalesce(variables.BARBuildId, 0) ] - - name: PromoteToMaestroChannels - value: $[ coalesce(variables.PromoteToChannelIds, 0) ] - - name: PromoteToMaestroChannel - value: $[ coalesce(variables.PromoteToMaestroChannelId, 0) ] pool: vmImage: 'windows-2019' steps: - checkout: none - - task: DownloadBuildArtifacts@0 - displayName: Download Release Configs - condition: and(eq(variables.PromoteToMaestroChannels, 0), eq(variables.PromoteToMaestroChannel, 0)) - inputs: - buildType: current - artifactName: ReleaseConfigs + - ${{ if eq(coalesce(parameters.PromoteToChannelIds, 0), 0) }}: + - task: DownloadBuildArtifacts@0 + displayName: Download Release Configs + inputs: + buildType: current + artifactName: ReleaseConfigs - task: PowerShell@2 name: setReleaseVars @@ -28,7 +26,7 @@ jobs: targetType: inline script: | try { - if ($Env:PromoteToMaestroChannels -eq 0 -and $Env:PromoteToMaestroChannel -eq 0) { + if (!$Env:PromoteToMaestroChannels -or $Env:PromoteToMaestroChannels.Trim() -eq '') { $Content = Get-Content $(Build.StagingDirectory)/ReleaseConfigs/ReleaseConfigs.txt $BarId = $Content | Select -Index 0 @@ -51,7 +49,7 @@ jobs: $BarId = $Env:BARBuildId $Channels = $Env:PromoteToMaestroChannels -split "," $Channels = $Channels -join "][" - $Channels = "[$Channels][$Env:PromoteToMaestroChannel]" + $Channels = "[$Channels]" $IsStableBuild = $buildInfo.stable $AzureDevOpsProject = $buildInfo.azureDevOpsProject @@ -75,3 +73,5 @@ jobs: } env: MAESTRO_API_TOKEN: $(MaestroApiAccessToken) + BARBuildId: ${{ parameters.BARBuildId }} + PromoteToMaestroChannels: ${{ parameters.PromoteToChannelIds }} diff --git a/eng/common/templates/steps/perf-send-to-helix.yml b/eng/common/templates/steps/perf-send-to-helix.yml index b3ea9acf1f16..e003fe2ef29e 100644 --- a/eng/common/templates/steps/perf-send-to-helix.yml +++ b/eng/common/templates/steps/perf-send-to-helix.yml @@ -1,5 +1,6 @@ # Please remember to update the documentation if you make changes to these parameters! parameters: + ProjectFile: '' # required -- project file that specifies the helix workitems HelixSource: 'pr/default' # required -- sources must start with pr/, official/, prodcon/, or agent/ HelixType: 'tests/default/' # required -- Helix telemetry which identifies what type of data this is; should include "test" for clarity and must end in '/' HelixBuild: $(Build.BuildNumber) # required -- the build number Helix will use to identify this -- automatically set to the AzDO build number @@ -18,9 +19,10 @@ parameters: DisplayNamePrefix: 'Send job to Helix' # optional -- rename the beginning of the displayName of the steps in AzDO condition: succeeded() # optional -- condition for step to execute; defaults to succeeded() continueOnError: false # optional -- determines whether to continue the build if the step errors; defaults to false + steps: - - powershell: $(Build.SourcesDirectory)\eng\common\msbuild.ps1 $(Build.SourcesDirectory)\eng\common\performance\perfhelixpublish.proj /restore /t:Test /bl:$(Build.SourcesDirectory)\artifacts\log\$env:BuildConfig\SendToHelix.binlog + - powershell: $(Build.SourcesDirectory)\eng\common\msbuild.ps1 $(Build.SourcesDirectory)\eng\common\performance\${{ parameters.ProjectFile }} /restore /t:Test /bl:$(Build.SourcesDirectory)\artifacts\log\$env:BuildConfig\SendToHelix.binlog displayName: ${{ parameters.DisplayNamePrefix }} (Windows) env: BuildConfig: $(_BuildConfig) @@ -42,7 +44,7 @@ steps: SYSTEM_ACCESSTOKEN: $(System.AccessToken) condition: and(${{ parameters.condition }}, eq(variables['Agent.Os'], 'Windows_NT')) continueOnError: ${{ parameters.continueOnError }} - - script: $BUILD_SOURCESDIRECTORY/eng/common/msbuild.sh $BUILD_SOURCESDIRECTORY/eng/common/performance/perfhelixpublish.proj /restore /t:Test /bl:$BUILD_SOURCESDIRECTORY/artifacts/log/$BuildConfig/SendToHelix.binlog + - script: $BUILD_SOURCESDIRECTORY/eng/common/msbuild.sh $BUILD_SOURCESDIRECTORY/eng/common/performance/${{ parameters.ProjectFile }} /restore /t:Test /bl:$BUILD_SOURCESDIRECTORY/artifacts/log/$BuildConfig/SendToHelix.binlog displayName: ${{ parameters.DisplayNamePrefix }} (Unix) env: BuildConfig: $(_BuildConfig) diff --git a/eng/docker/Readme.md b/eng/docker/Readme.md index 0f73be1740e7..d2482bb08bf0 100644 --- a/eng/docker/Readme.md +++ b/eng/docker/Readme.md @@ -5,7 +5,7 @@ Provides reusable docker build infrastructure for the dotnet/runtime repo. ## libraries-sdk Dockerfiles The `libraries-sdk` Dockerfiles can be used to build dotnet sdk docker images -that contain the current libraries built from source. +that contain the current libraries built from source. These images can be used to build dockerized dotnet services that target the current libraries. Currently, debian and windows nanoserver sdk's are supported. diff --git a/eng/docker/libraries-sdk-aspnetcore.linux.Dockerfile b/eng/docker/libraries-sdk-aspnetcore.linux.Dockerfile index 04974370ebac..b0bf080d8ff3 100644 --- a/eng/docker/libraries-sdk-aspnetcore.linux.Dockerfile +++ b/eng/docker/libraries-sdk-aspnetcore.linux.Dockerfile @@ -21,9 +21,9 @@ ARG CONFIGURATION=Release ARG COREFX_SHARED_FRAMEWORK_NAME=Microsoft.NETCore.App ARG ASPNETCORE_SHARED_NAME=Microsoft.AspNetCore.App -ARG SOURCE_COREFX_VERSION=5.0.0 +ARG SOURCE_COREFX_VERSION=6.0.0 ARG TARGET_SHARED_FRAMEWORK=/usr/share/dotnet/shared -ARG TARGET_COREFX_VERSION=3.0.0 +ARG TARGET_COREFX_VERSION=$DOTNET_VERSION COPY --from=corefxbuild \ $TESTHOST_LOCATION/$TFM-$OS-$CONFIGURATION-$ARCH/shared/$COREFX_SHARED_FRAMEWORK_NAME/$SOURCE_COREFX_VERSION/* \ diff --git a/eng/docker/libraries-sdk-aspnetcore.windows.Dockerfile b/eng/docker/libraries-sdk-aspnetcore.windows.Dockerfile index 32f91f5d8595..6e860466a1b4 100644 --- a/eng/docker/libraries-sdk-aspnetcore.windows.Dockerfile +++ b/eng/docker/libraries-sdk-aspnetcore.windows.Dockerfile @@ -11,9 +11,9 @@ ARG CONFIGURATION=Release ARG COREFX_SHARED_FRAMEWORK_NAME=Microsoft.NETCore.App ARG ASPNETCORE_SHARED_NAME=Microsoft.AspNetCore.App -ARG SOURCE_COREFX_VERSION=5.0.0 +ARG SOURCE_COREFX_VERSION=6.0.0 ARG TARGET_SHARED_FRAMEWORK="C:\\Program Files\\dotnet\\shared" -ARG TARGET_COREFX_VERSION=3.0.0 +ARG TARGET_COREFX_VERSION=$DOTNET_VERSION COPY ` $TESTHOST_LOCATION\$TFM-$OS-$CONFIGURATION-$ARCH\shared\$COREFX_SHARED_FRAMEWORK_NAME\$SOURCE_COREFX_VERSION\ ` diff --git a/eng/docker/libraries-sdk.linux.Dockerfile b/eng/docker/libraries-sdk.linux.Dockerfile index 2cc979dba6a0..4f1cb5185a35 100644 --- a/eng/docker/libraries-sdk.linux.Dockerfile +++ b/eng/docker/libraries-sdk.linux.Dockerfile @@ -19,9 +19,9 @@ ARG ARCH=x64 ARG CONFIGURATION=Release ARG COREFX_SHARED_FRAMEWORK_NAME=Microsoft.NETCore.App -ARG SOURCE_COREFX_VERSION=5.0.0 +ARG SOURCE_COREFX_VERSION=6.0.0 ARG TARGET_SHARED_FRAMEWORK=/usr/share/dotnet/shared -ARG TARGET_COREFX_VERSION=3.0.0 +ARG TARGET_COREFX_VERSION=$DOTNET_VERSION COPY --from=corefxbuild \ $TESTHOST_LOCATION/$TFM-$OS-$CONFIGURATION-$ARCH/shared/$COREFX_SHARED_FRAMEWORK_NAME/$SOURCE_COREFX_VERSION/* \ diff --git a/eng/docker/libraries-sdk.windows.Dockerfile b/eng/docker/libraries-sdk.windows.Dockerfile index 50c3f357c9b7..e88f52d7ce7e 100644 --- a/eng/docker/libraries-sdk.windows.Dockerfile +++ b/eng/docker/libraries-sdk.windows.Dockerfile @@ -10,9 +10,9 @@ ARG ARCH=x64 ARG CONFIGURATION=Release ARG COREFX_SHARED_FRAMEWORK_NAME=Microsoft.NETCore.App -ARG SOURCE_COREFX_VERSION=5.0.0 +ARG SOURCE_COREFX_VERSION=6.0.0 ARG TARGET_SHARED_FRAMEWORK="C:\\Program Files\\dotnet\\shared" -ARG TARGET_COREFX_VERSION=3.0.0 +ARG TARGET_COREFX_VERSION=$DOTNET_VERSION COPY ` $TESTHOST_LOCATION\$TFM-$OS-$CONFIGURATION-$ARCH\shared\$COREFX_SHARED_FRAMEWORK_NAME\$SOURCE_COREFX_VERSION\ ` diff --git a/eng/illink.targets b/eng/illink.targets index 601b0b798fbc..928e99348585 100644 --- a/eng/illink.targets +++ b/eng/illink.targets @@ -27,7 +27,7 @@ $([MSBuild]::NormalizeDirectory('$(PkgMicrosoft_NET_ILLink_Tasks)', 'tools')) $(ILLinkTasksDir)netcoreapp3.0/ILLink.Tasks.dll - $(ILLinkTasksDir)$(NetFrameworkCurrent)/ILLink.Tasks.dll + $(ILLinkTasksDir)net472/ILLink.Tasks.dll $(IntermediateOutputPath)$(TargetName)$(TargetExt) $(IntermediateOutputPath)$(TargetName).pdb $(IntermediateOutputPath)PreTrim/ @@ -184,22 +184,37 @@ + + + + + $(ILLinkArgs) -t + + + $(ILLinkArgs) --strip-link-attributes false --ignore-link-attributes true + + $(ILLinkArgs) --skip-unresolved true + + $(ILLinkArgs) --disable-opt unusedinterfaces + + + - + - $(ILLinkArgs)-r $(TargetName) + $(ILLinkArgs) -r $(TargetName) $(ILLinkArgs) -c skip $(ILLinkArgs) -u skip $(ILLinkArgs) -p link $(TargetName) - - $(ILLinkArgs) -t $(ILLinkArgs) -b true $(ILLinkArgs) --strip-descriptors false @@ -207,15 +222,26 @@ $(ILLinkArgs) -x "$(ILLinkTrimXmlLibraryBuild)" $(ILLinkArgs) --strip-substitutions false - - - $(ILLinkArgs) --strip-link-attributes false --ignore-link-attributes true - - $(ILLinkArgs) --skip-unresolved true - - $(ILLinkArgs) --disable-opt unusedinterfaces $(ILLinkArgs) --keep-dep-attributes true + + IL2008;IL2009;IL2012;IL2025;IL2026;IL2035;IL2050 + + $(LinkerNoWarn);IL2032;IL2055;IL2057;IL2058;IL2059;IL2060;IL2061 + + $(LinkerNoWarn);IL2062;IL2063;IL2064;IL2065;IL2066 + + $(LinkerNoWarn);IL2067;IL2068;IL2069;IL2070;IL2071;IL2072;IL2073;IL2074;IL2075;IL2076;IL2077;IL2078;IL2079;IL2080;IL2081;IL2082;IL2083;IL2084;IL2085;IL2086;IL2087;IL2088;IL2089;IL2090;IL2091 + $(ILLinkArgs) --nowarn $(LinkerNoWarn) @@ -255,9 +281,8 @@ - <_DotNetHostDirectory>$(NetCoreRoot) - <_DotNetHostFileName>dotnet - <_DotNetHostFileName Condition=" '$(OS)' == 'Windows_NT' ">dotnet.exe + <_DotNetHostDirectory>$(DotNetRoot) + <_DotNetHostFileName>$([System.IO.Path]::GetFileName('$(DotNetTool)')) $([MSBuild]::NormalizeDirectory('$(CoreCLRArtifactsPath)')) + $([MSBuild]::NormalizeDirectory('$(CoreCLRArtifactsPath)','PDB')) $([MSBuild]::NormalizeDirectory('$(CoreCLRArtifactsPath)','sharedFramework')) + $([MSBuild]::NormalizeDirectory('$(CoreCLRSharedFrameworkDir)','PDB')) $([MSBuild]::NormalizeDirectory('$(CoreCLRArtifactsPath)','$(CoreCLRCrossTargetComponentDirName)','sharedFramework')) @@ -112,18 +114,19 @@ + $(CoreCLRSharedFrameworkPdbDir)*.pdb; + $(CoreCLRSharedFrameworkPdbDir)*.dbg; + $(CoreCLRSharedFrameworkPdbDir)*.dwarf" /> - + Include="$(CoreCLRArtifactsPdbDir)System.Private.CoreLib.pdb" /> + + + $(CoreCLRSharedFrameworkPdbDir)*.pdb; + $(CoreCLRSharedFrameworkPdbDir)*.dbg; + $(CoreCLRSharedFrameworkPdbDir)*.dwarf" /> runtime/$(CoreCLRCrossTargetComponentDirName)_$(TargetArchitecture)/native @@ -192,7 +195,7 @@ $(LibrariesNativeArtifactsPath)dotnet.js; $(LibrariesNativeArtifactsPath)dotnet.wasm; $(LibrariesNativeArtifactsPath)dotnet.timezones.blat; - $(LibrariesNativeArtifactsPath)icudt.dat;" + $(LibrariesNativeArtifactsPath)*.dat;" IsNative="true" /> diff --git a/eng/native/build-commons.sh b/eng/native/build-commons.sh index b976f5fdc6cf..7315e43f2761 100755 --- a/eng/native/build-commons.sh +++ b/eng/native/build-commons.sh @@ -48,11 +48,6 @@ check_prereqs() { echo "Checking prerequisites..." - if ! cmake --help 2>&1 | grep -q \\-B; then - echo "Please install cmake v3.14.5 or newer from https://www.cmake.org/download/." - exit 1 - fi - if [[ "$__HostOS" == "OSX" ]]; then # Check presence of pkg-config on the path command -v pkg-config 2>/dev/null || { echo >&2 "Please install pkg-config before running this script, see https://github.com/dotnet/runtime/blob/master/docs/workflow/requirements/macos-requirements.md"; exit 1; } @@ -163,6 +158,17 @@ EOF return fi + SAVED_CFLAGS="${CFLAGS}" + SAVED_CXXFLAGS="${CXXFLAGS}" + SAVED_LDFLAGS="${LDFLAGS}" + + # Let users provide additional compiler/linker flags via EXTRA_CFLAGS/EXTRA_CXXFLAGS/EXTRA_LDFLAGS. + # If users directly override CFLAG/CXXFLAGS/LDFLAGS, that may lead to some configure tests working incorrectly. + # See https://github.com/dotnet/runtime/issues/35727 for more information. + export CFLAGS="${CFLAGS} ${EXTRA_CFLAGS}" + export CXXFLAGS="${CXXFLAGS} ${EXTRA_CXXFLAGS}" + export LDFLAGS="${LDFLAGS} ${EXTRA_LDFLAGS}" + if [[ "$__StaticAnalyzer" == 1 ]]; then pushd "$intermediatesDir" @@ -177,10 +183,14 @@ EOF cmake_command="emcmake $cmake_command" fi - echo "Executing $cmake_command --build \"$intermediatesDir\" --target install -j $__NumProc" - $cmake_command --build "$intermediatesDir" --target install -j "$__NumProc" + echo "Executing $cmake_command --build \"$intermediatesDir\" --target install -- -j $__NumProc" + $cmake_command --build "$intermediatesDir" --target install -- -j "$__NumProc" fi + CFLAGS="${SAVED_CFLAGS}" + CXXFLAGS="${SAVED_CXXFLAGS}" + LDFLAGS="${SAVED_LDFLAGS}" + local exit_code="$?" if [[ "$exit_code" != 0 ]]; then echo "${__ErrMsgPrefix}Failed to build \"$message\"." @@ -234,6 +244,20 @@ __BuildOS=$os __msbuildonunsupportedplatform=0 +# Get the number of processors available to the scheduler +# Other techniques such as `nproc` only get the number of +# processors available to a single process. +platform="$(uname)" +if [[ "$platform" == "FreeBSD" ]]; then + __NumProc=$(sysctl hw.ncpu | awk '{ print $2+1 }') +elif [[ "$platform" == "NetBSD" || "$platform" == "SunOS" ]]; then + __NumProc=$(($(getconf NPROCESSORS_ONLN)+1)) +elif [[ "$platform" == "Darwin" ]]; then + __NumProc=$(($(getconf _NPROCESSORS_ONLN)+1)) +else + __NumProc=$(nproc --all) +fi + while :; do if [[ "$#" -le 0 ]]; then break @@ -400,20 +424,6 @@ while :; do shift done -# Get the number of processors available to the scheduler -# Other techniques such as `nproc` only get the number of -# processors available to a single process. -platform="$(uname)" -if [[ "$platform" == "FreeBSD" ]]; then - __NumProc=$(sysctl hw.ncpu | awk '{ print $2+1 }') -elif [[ "$platform" == "NetBSD" || "$platform" == "SunOS" ]]; then - __NumProc=$(($(getconf NPROCESSORS_ONLN)+1)) -elif [[ "$platform" == "Darwin" ]]; then - __NumProc=$(($(getconf _NPROCESSORS_ONLN)+1)) -else - __NumProc=$(nproc --all) -fi - __CommonMSBuildArgs="/p:TargetArchitecture=$__BuildArch /p:Configuration=$__BuildType /p:TargetOS=$__TargetOS /nodeReuse:false $__OfficialBuildIdArg $__SignTypeArg $__SkipRestoreArg" # Configure environment if we are doing a verbose build diff --git a/eng/native/configurecompiler.cmake b/eng/native/configurecompiler.cmake index 9d1b88f08011..d2cb169cb0e7 100644 --- a/eng/native/configurecompiler.cmake +++ b/eng/native/configurecompiler.cmake @@ -8,8 +8,6 @@ set(CMAKE_C_STANDARD_REQUIRED ON) set(CMAKE_CXX_STANDARD 11) set(CMAKE_CXX_STANDARD_REQUIRED ON) -cmake_policy(SET CMP0083 NEW) - include(CheckCXXCompilerFlag) # "configureoptimization.cmake" must be included after CLR_CMAKE_HOST_UNIX has been set. @@ -19,9 +17,7 @@ include(${CMAKE_CURRENT_LIST_DIR}/configureoptimization.cmake) # Initialize Cmake compiler flags and other variables #----------------------------------------------------- -if(MSVC) - add_compile_options(/Zi /FC /Zc:strictStrings) -elseif (CLR_CMAKE_HOST_UNIX) +if (CLR_CMAKE_HOST_UNIX) add_compile_options(-g) add_compile_options(-Wall) if (CMAKE_CXX_COMPILER_ID MATCHES "Clang") @@ -40,11 +36,18 @@ set(CMAKE_CXX_FLAGS_CHECKED "") set(CMAKE_EXE_LINKER_FLAGS_CHECKED "") set(CMAKE_SHARED_LINKER_FLAGS_CHECKED "") +set(CMAKE_SHARED_LINKER_FLAGS_DEBUG "") +set(CMAKE_SHARED_LINKER_FLAGS_RELEASE "") +set(CMAKE_SHARED_LINKER_FLAGS_RELWITHDEBINFO "") +set(CMAKE_EXE_LINKER_FLAGS_DEBUG "") +set(CMAKE_EXE_LINKER_FLAGS_DEBUG "") +set(CMAKE_EXE_LINKER_FLAGS_RELWITHDEBINFO "") + add_compile_definitions("$<$,$>:DEBUG;_DEBUG;_DBG;URTBLDENV_FRIENDLY=Checked;BUILDENV_CHECKED=1>") add_compile_definitions("$<$,$>:NDEBUG;URTBLDENV_FRIENDLY=Retail>") if (MSVC) - add_link_options(/GUARD:CF) + add_linker_flag(/GUARD:CF) # Linker flags # @@ -57,48 +60,51 @@ if (MSVC) endif () #Do not create Side-by-Side Assembly Manifest - add_link_options($<$,SHARED_LIBRARY>:/MANIFEST:NO>) + set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} /MANIFEST:NO") # can handle addresses larger than 2 gigabytes - add_link_options($<$,SHARED_LIBRARY>:/LARGEADDRESSAWARE>) + set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} /LARGEADDRESSAWARE") #Compatible with Data Execution Prevention - add_link_options($<$,SHARED_LIBRARY>:/NXCOMPAT>) + set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} /NXCOMPAT") #Use address space layout randomization - add_link_options($<$,SHARED_LIBRARY>:/DYNAMICBASE>) + set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} /DYNAMICBASE") #shrink pdb size - add_link_options($<$,SHARED_LIBRARY>:/PDBCOMPRESS>) + set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} /PDBCOMPRESS") - add_link_options($<$,SHARED_LIBRARY>:/DEBUG>) - add_link_options($<$,SHARED_LIBRARY>:/IGNORE:4197,4013,4254,4070,4221>) - add_link_options($<$,SHARED_LIBRARY>:/SUBSYSTEM:WINDOWS,${WINDOWS_SUBSYSTEM_VERSION}>) + set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} /DEBUG") + set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} /IGNORE:4197,4013,4254,4070,4221") + set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} /SUBSYSTEM:WINDOWS,${WINDOWS_SUBSYSTEM_VERSION}") set(CMAKE_STATIC_LINKER_FLAGS "${CMAKE_STATIC_LINKER_FLAGS} /IGNORE:4221") - add_link_options($<$,EXECUTABLE>:/DEBUG>) - add_link_options($<$,EXECUTABLE>:/PDBCOMPRESS>) - add_link_options($<$,EXECUTABLE>:/STACK:1572864>) + set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} /DEBUG") + set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} /PDBCOMPRESS") + set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} /STACK:1572864") # Debug build specific flags - add_link_options($<$,$>,$,SHARED_LIBRARY>>:/NOVCFEATURE>) + set(CMAKE_SHARED_LINKER_FLAGS_DEBUG "${CMAKE_SHARED_LINKER_FLAGS_DEBUG} /NOVCFEATURE") + set(CMAKE_SHARED_LINKER_FLAGS_CHECKED "${CMAKE_SHARED_LINKER_FLAGS_CHECKED} /NOVCFEATURE") # Checked build specific flags - add_link_options($<$:/INCREMENTAL:NO>) # prevent "warning LNK4075: ignoring '/INCREMENTAL' due to '/OPT:REF' specification" - add_link_options($<$:/OPT:REF>) - add_link_options($<$:/OPT:NOICF>) + add_linker_flag(/INCREMENTAL:NO CHECKED) # prevent "warning LNK4075: ignoring '/INCREMENTAL' due to '/OPT:REF' specification" + add_linker_flag(/OPT:REF CHECKED) + add_linker_flag(/OPT:NOICF CHECKED) # Release build specific flags - add_link_options($<$:/LTCG>) - add_link_options($<$:/OPT:REF>) - add_link_options($<$:/OPT:ICF>) + add_linker_flag(/LTCG RELEASE) + add_linker_flag(/OPT:REF RELEASE) + add_linker_flag(/OPT:ICF RELEASE) + add_linker_flag(/INCREMENTAL:NO RELEASE) set(CMAKE_STATIC_LINKER_FLAGS_RELEASE "${CMAKE_STATIC_LINKER_FLAGS_RELEASE} /LTCG") # ReleaseWithDebugInfo build specific flags - add_link_options($<$:/LTCG>) - add_link_options($<$:/OPT:REF>) - add_link_options($<$:/OPT:ICF>) + add_linker_flag(/LTCG RELWITHDEBINFO) + add_linker_flag(/OPT:REF RELWITHDEBINFO) + add_linker_flag(/OPT:ICF RELWITHDEBINFO) set(CMAKE_STATIC_LINKER_FLAGS_RELWITHDEBINFO "${CMAKE_STATIC_LINKER_FLAGS_RELWITHDEBINFO} /LTCG") # Force uCRT to be dynamically linked for Release build - add_link_options("$<$:/NODEFAULTLIB:libucrt.lib;/DEFAULTLIB:ucrt.lib>") + add_linker_flag(/NODEFAULTLIB:libucrt.lib RELEASE) + add_linker_flag(/DEFAULTLIB:ucrt.lib RELEASE) elseif (CLR_CMAKE_HOST_UNIX) # Set the values to display when interactively configuring CMAKE_BUILD_TYPE @@ -157,11 +163,10 @@ elseif (CLR_CMAKE_HOST_UNIX) # -fdata-sections -ffunction-sections: each function has own section instead of one per .o file (needed for --gc-sections) # -O1: optimization level used instead of -O0 to avoid compile error "invalid operand for inline asm constraint" - add_compile_definitions("$<$,$>:${CLR_SANITIZE_CXX_OPTIONS};-fdata-sections;--ffunction-sections;-O1>") - add_link_options($<$,$>,$,EXECUTABLE>>:${CLR_SANITIZE_LINK_OPTIONS}>) - + add_compile_options("$<$,$>:${CLR_SANITIZE_CXX_OPTIONS};-fdata-sections;--ffunction-sections;-O1>") + add_linker_flag("${CLR_SANITIZE_LINK_OPTIONS}" DEBUG CHECKED) # -Wl and --gc-sections: drop unused sections\functions (similar to Windows /Gy function-level-linking) - add_link_options("$<$,$>,$,SHARED_LIBRARY>>:${CLR_SANITIZE_LINK_OPTIONS};-Wl,--gc-sections>") + add_linker_flag("-Wl,--gc-sections" DEBUG CHECKED) endif () endif(UPPERCASE_CMAKE_BUILD_TYPE STREQUAL DEBUG OR UPPERCASE_CMAKE_BUILD_TYPE STREQUAL CHECKED) endif(MSVC) @@ -173,15 +178,18 @@ endif(MSVC) # ./build-native.sh cmakeargs "-DCLR_ADDITIONAL_COMPILER_OPTIONS=<...>" cmakeargs "-DCLR_ADDITIONAL_LINKER_FLAGS=<...>" # if(CLR_CMAKE_HOST_UNIX) - add_link_options(${CLR_ADDITIONAL_LINKER_FLAGS}) + foreach(ADDTL_LINKER_FLAG ${CLR_ADDITIONAL_LINKER_FLAGS}) + add_linker_flag(${ADDTL_LINKER_FLAG}) + endforeach() endif(CLR_CMAKE_HOST_UNIX) if(CLR_CMAKE_HOST_LINUX) add_compile_options($<$:-Wa,--noexecstack>) - add_link_options(-Wl,--build-id=sha1 -Wl,-z,relro,-z,now) + add_linker_flag(-Wl,--build-id=sha1) + add_linker_flag(-Wl,-z,relro,-z,now) elseif(CLR_CMAKE_HOST_FREEBSD) add_compile_options($<$:-Wa,--noexecstack>) - add_link_options(LINKER:--build-id=sha1) + add_linker_flag("-Wl,--build-id=sha1") elseif(CLR_CMAKE_HOST_SUNOS) add_compile_options($<$:-Wa,--noexecstack>) set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fstack-protector") @@ -351,7 +359,7 @@ if (CLR_CMAKE_HOST_UNIX) endif() # Some architectures (e.g., ARM) assume char type is unsigned while CoreCLR assumes char is signed - # as x64 does. It has been causing issues in ARM (https://github.com/dotnet/coreclr/issues/4746) + # as x64 does. It has been causing issues in ARM (https://github.com/dotnet/runtime/issues/5778) add_compile_options(-fsigned-char) # We mark the function which needs exporting with DLLEXPORT @@ -359,9 +367,9 @@ if (CLR_CMAKE_HOST_UNIX) # Specify the minimum supported version of macOS if(CLR_CMAKE_HOST_OSX) - set(MACOS_VERSION_MIN_FLAGS -mmacosx-version-min=10.12) + set(MACOS_VERSION_MIN_FLAGS -mmacosx-version-min=10.13) add_compile_options(${MACOS_VERSION_MIN_FLAGS}) - add_link_options(${MACOS_VERSION_MIN_FLAGS}) + add_linker_flag(${MACOS_VERSION_MIN_FLAGS}) endif(CLR_CMAKE_HOST_OSX) endif(CLR_CMAKE_HOST_UNIX) @@ -417,25 +425,20 @@ endif(CLR_CMAKE_HOST_UNIX) if (MSVC) # Compile options for targeting windows - # The following options are set by the razzle build add_compile_options(/TP) # compile all files as C++ add_compile_options(/nologo) # Suppress Startup Banner add_compile_options(/W3) # set warning level to 3 add_compile_options(/WX) # treat warnings as errors add_compile_options(/Oi) # enable intrinsics add_compile_options(/Oy-) # disable suppressing of the creation of frame pointers on the call stack for quicker function calls - add_compile_options(/U_MT) # undefine the predefined _MT macro - add_compile_options(/GF) # enable read-only string pooling add_compile_options(/Gm-) # disable minimal rebuild add_compile_options(/Zp8) # pack structs on 8-byte boundary add_compile_options(/Gy) # separate functions for linker - add_compile_options(/Zc:wchar_t-) # C++ language conformance: wchar_t is NOT the native type, but a typedef - add_compile_options(/Zc:forScope) # C++ language conformance: enforce Standard C++ for scoping rules set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /GR-") # disable C++ RTTI add_compile_options(/FC) # use full pathnames in diagnostics add_compile_options(/MP) # Build with Multiple Processes (number of processes equal to the number of processors) - add_compile_options(/GS) # Buffer Security Check add_compile_options(/Zm200) # Specify Precompiled Header Memory Allocation Limit of 150MB + add_compile_options(/Zc:strictStrings) # Disable string-literal to char* or wchar_t* conversion add_compile_options(/wd4960 /wd4961 /wd4603 /wd4627 /wd4838 /wd4456 /wd4457 /wd4458 /wd4459 /wd4091 /we4640) @@ -517,7 +520,7 @@ if(CLR_CMAKE_ENABLE_CODE_COVERAGE) add_compile_options(-fprofile-arcs) add_compile_options(-ftest-coverage) - add_link_options(--coverage) + add_linker_flag(--coverage) else() message(FATAL_ERROR "Code coverage builds not supported on current platform") endif(CLR_CMAKE_HOST_UNIX) diff --git a/eng/native/configureplatform.cmake b/eng/native/configureplatform.cmake index c65274141dd4..dbfadfda5991 100644 --- a/eng/native/configureplatform.cmake +++ b/eng/native/configureplatform.cmake @@ -1,4 +1,3 @@ -include(CheckPIESupported) include(${CMAKE_CURRENT_LIST_DIR}/functions.cmake) # If set, indicates that this is not an officially supported release @@ -382,24 +381,11 @@ else() endif() if(NOT CLR_CMAKE_TARGET_BROWSER) - # Skip check_pie_supported call on Android as ld from llvm toolchain with NDK API level 21 - # complains about missing linker flag `-no-pie` (while level 28's ld does support this flag, - # but since we know that PIE is supported, we can safely skip this redundant check). - # # The default linker on Solaris also does not support PIE. - if(NOT CLR_CMAKE_TARGET_ANDROID AND NOT CLR_CMAKE_TARGET_SUNOS) - # All code we build should be compiled as position independent - get_property(languages GLOBAL PROPERTY ENABLED_LANGUAGES) - if("CXX" IN_LIST languages) - set(CLR_PIE_LANGUAGE CXX) - else() - set(CLR_PIE_LANGUAGE C) - endif() - check_pie_supported(OUTPUT_VARIABLE PIE_SUPPORT_OUTPUT LANGUAGES ${CLR_PIE_LANGUAGE}) - if(NOT MSVC AND NOT CMAKE_${CLR_PIE_LANGUAGE}_LINK_PIE_SUPPORTED) - message(WARNING "PIE is not supported at link time: ${PIE_SUPPORT_OUTPUT}.\n" - "PIE link options will not be passed to linker.") - endif() + if(NOT CLR_CMAKE_TARGET_ANDROID AND NOT CLR_CMAKE_TARGET_SUNOS AND NOT CLR_CMAKE_TARGET_OSX AND NOT CLR_CMAKE_HOST_TVOS AND NOT CLR_CMAKE_HOST_IOS AND NOT MSVC) + set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -pie") + add_compile_options($<$,EXECUTABLE>:-fPIE>) + add_compile_options($<$,SHARED_LIBRARY>:-fPIC>) endif() set(CMAKE_POSITION_INDEPENDENT_CODE ON) diff --git a/eng/native/functions.cmake b/eng/native/functions.cmake index 8b73581ed142..1509a17fa59b 100644 --- a/eng/native/functions.cmake +++ b/eng/native/functions.cmake @@ -148,7 +148,7 @@ function(preprocess_compile_asm) set(options "") set(oneValueArgs TARGET OUTPUT_OBJECTS) set(multiValueArgs ASM_FILES) - cmake_parse_arguments(PARSE_ARGV 0 COMPILE_ASM "${options}" "${oneValueArgs}" "${multiValueArgs}") + cmake_parse_arguments(COMPILE_ASM "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGV}) get_include_directories_asm(ASM_INCLUDE_DIRECTORIES) @@ -241,7 +241,7 @@ function(target_precompile_header) set(options "") set(oneValueArgs TARGET HEADER) set(multiValueArgs ADDITIONAL_INCLUDE_DIRECTORIES) - cmake_parse_arguments(PARSE_ARGV 0 PRECOMPILE_HEADERS "${options}" "${oneValueArgs}" "${multiValueArgs}") + cmake_parse_arguments(PRECOMPILE_HEADERS "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGV}) if ("${PRECOMPILE_HEADERS_TARGET}" STREQUAL "") message(SEND_ERROR "No target supplied to target_precompile_header.") @@ -360,7 +360,7 @@ endfunction() function(install_clr) set(oneValueArgs ADDITIONAL_DESTINATION) set(multiValueArgs TARGETS) - cmake_parse_arguments(PARSE_ARGV 0 INSTALL_CLR "${options}" "${oneValueArgs}" "${multiValueArgs}") + cmake_parse_arguments(INSTALL_CLR "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGV}) if ("${INSTALL_CLR_TARGETS}" STREQUAL "") message(FATAL_ERROR "At least one target must be passed to install_clr(TARGETS )") @@ -416,6 +416,15 @@ function(disable_pax_mprotect targetName) endif() endfunction() +if (CMAKE_VERSION VERSION_LESS "3.12") + # Polyfill add_compile_definitions when it is unavailable + function(add_compile_definitions) + get_directory_property(DIR_COMPILE_DEFINITIONS COMPILE_DEFINITIONS) + list(APPEND DIR_COMPILE_DEFINITIONS ${ARGV}) + set_directory_properties(PROPERTIES COMPILE_DEFINITIONS "${DIR_COMPILE_DEFINITIONS}") + endfunction() +endif() + function(_add_executable) if(NOT WIN32) add_executable(${ARGV} ${VERSION_FILE_PATH}) @@ -479,3 +488,16 @@ function(generate_module_index Target ModuleIndexFile) DEPENDS ${ModuleIndexFile} ) endfunction(generate_module_index) + +# add_linker_flag(Flag [Config1 Config2 ...]) +function(add_linker_flag Flag) + if (ARGN STREQUAL "") + set("CMAKE_EXE_LINKER_FLAGS" "${CMAKE_EXE_LINKER_FLAGS} ${Flag}" PARENT_SCOPE) + set("CMAKE_SHARED_LINKER_FLAGS" "${CMAKE_SHARED_LINKER_FLAGS} ${Flag}" PARENT_SCOPE) + else() + foreach(Config ${ARGN}) + set("CMAKE_EXE_LINKER_FLAGS_${Config}" "${CMAKE_EXE_LINKER_FLAGS_${Config}} ${Flag}" PARENT_SCOPE) + set("CMAKE_SHARED_LINKER_FLAGS_${Config}" "${CMAKE_SHARED_LINKER_FLAGS_${Config}} ${Flag}" PARENT_SCOPE) + endforeach() + endif() +endfunction() diff --git a/eng/native/gen-buildsys.sh b/eng/native/gen-buildsys.sh index f27bb33e3577..1b4c2e02c597 100755 --- a/eng/native/gen-buildsys.sh +++ b/eng/native/gen-buildsys.sh @@ -91,6 +91,9 @@ if [[ "$build_arch" == "wasm" ]]; then cmake_command="emcmake $cmake_command" fi +# We have to be able to build with CMake 3.6.2, so we can't use the -S or -B options +pushd "$3" + # Include CMAKE_USER_MAKE_RULES_OVERRIDE as uninitialized since it will hold its value in the CMake cache otherwise can cause issues when branch switching $cmake_command \ -G "$generator" \ @@ -98,5 +101,6 @@ $cmake_command \ "-DCMAKE_INSTALL_PREFIX=$__CMakeBinDir" \ $cmake_extra_defines \ $__UnprocessedCMakeArgs \ - -S "$1" \ - -B "$3" + "$1" + +popd diff --git a/eng/native/init-distro-rid.sh b/eng/native/init-distro-rid.sh index b6921a903469..f5d14f2f7cd5 100644 --- a/eng/native/init-distro-rid.sh +++ b/eng/native/init-distro-rid.sh @@ -146,13 +146,6 @@ initDistroRidGlobal() fi fi - if [ "$buildArch" = "armel" ]; then - # Armel cross build is Tizen specific and does not support Portable RID build - __PortableBuild=0 - export __PortableBuild - isPortable=0 - fi - initNonPortableDistroRid "${targetOs}" "${buildArch}" "${isPortable}" "${rootfsDir}" if [ "$buildArch" = "wasm" ]; then diff --git a/eng/notSupported.SourceBuild.targets b/eng/notSupported.SourceBuild.targets deleted file mode 100644 index 743c4a3ace00..000000000000 --- a/eng/notSupported.SourceBuild.targets +++ /dev/null @@ -1,29 +0,0 @@ - - - - - $(IntermediateOutputPath) - - - - - - <_notSupportedSourceDirectory>$([MSBuild]::NormalizeDirectory('$(DotNetSourceBuildIntermediatePath)', '$(MSBuildProjectName)', '$(TargetFramework)-$(TargetOS)')) - $(_notSupportedSourceDirectory)$(TargetName).notsupported.cs - - - - - - - - - - - - \ No newline at end of file diff --git a/eng/packaging.props b/eng/packaging.props index 9db71c397b49..06c250ee1b99 100644 --- a/eng/packaging.props +++ b/eng/packaging.props @@ -20,7 +20,7 @@ true - 5.0.0 + 6.0.0 $(ProductVersion) @@ -72,5 +72,8 @@ Include="$(PkgDir)useSharedDesignerContext.txt"> true + + + diff --git a/eng/packaging.targets b/eng/packaging.targets index e0f7edaeac7f..47484ae1f4e8 100644 --- a/eng/packaging.targets +++ b/eng/packaging.targets @@ -2,9 +2,9 @@ + - https://github.com/dotnet/runtime/issues/27470 -> Why reference assets were removed from the package + - https://github.com/dotnet/aspnetcore/issues/11206 -> Why ASP.NET required a ref to be added back for netcoreapp + - https://github.com/dotnet/runtime/issues/29966 -> Issue tracking to work of readding a ref to netcoreapp --> @@ -26,4 +26,16 @@ '$(_excludeCompile)' == 'true' and '%(Dependency.Identity)' != '_._'" /> - \ No newline at end of file + + + $(MSBuildProjectDirectory)\..\pkg\$(MSBuildProjectName).pkgproj + + _BuildPkgProj + + + + + + diff --git a/eng/pipelines/common/createdump-entitlements.plist b/eng/pipelines/common/createdump-entitlements.plist new file mode 100644 index 000000000000..1f2d3798ee2f --- /dev/null +++ b/eng/pipelines/common/createdump-entitlements.plist @@ -0,0 +1,12 @@ + + + + + com.apple.security.cs.allow-dyld-environment-variables + + com.apple.security.cs.disable-library-validation + + com.apple.security.cs.debugger + + + diff --git a/eng/pipelines/common/entitlements.plist b/eng/pipelines/common/entitlements.plist new file mode 100644 index 000000000000..f4ea418fb45a --- /dev/null +++ b/eng/pipelines/common/entitlements.plist @@ -0,0 +1,18 @@ + + + + + com.apple.security.cs.allow-jit + + com.apple.security.cs.allow-unsigned-executable-memory + + com.apple.security.cs.allow-dyld-environment-variables + + com.apple.security.cs.disable-library-validation + + com.apple.security.cs.debugger + + com.apple.security.get-task-allow + + + diff --git a/eng/pipelines/common/global-build-job.yml b/eng/pipelines/common/global-build-job.yml index dcbba9fb3654..221f8d366262 100644 --- a/eng/pipelines/common/global-build-job.yml +++ b/eng/pipelines/common/global-build-job.yml @@ -25,8 +25,8 @@ jobs: dependsOn: checkout pool: ${{ parameters.pool }} container: ${{ parameters.container }} - timeoutInMinutes: ${{ parameters.timeoutInMinutes }} condition: and(succeeded(), ${{ parameters.condition }}) + timeoutInMinutes: ${{ parameters.timeoutInMinutes }} workspace: clean: all diff --git a/eng/pipelines/common/macos-sign-with-entitlements.yml b/eng/pipelines/common/macos-sign-with-entitlements.yml new file mode 100644 index 000000000000..6c65193845d7 --- /dev/null +++ b/eng/pipelines/common/macos-sign-with-entitlements.yml @@ -0,0 +1,65 @@ +parameters: + filesToSign: [] + +steps: + - task: UseDotNet@2 + displayName: 'Use .NET Core SDK 2.1.808' + inputs: + packageType: sdk + version: 2.1.808 + + - ${{ each file in parameters.filesToSign }}: + - script: codesign -s - -f --entitlements ${{ file.entitlementsFile }} ${{ file.path }}/${{ file.name }} + displayName: 'Add entitlements to ${{ file.name }}' + + - task: CopyFiles@2 + displayName: 'Copy entitled file ${{ file.name }}' + inputs: + contents: '${{ file.path }}/${{ file.name }}' + targetFolder: '$(Build.ArtifactStagingDirectory)/mac_entitled' + overWrite: true + + - task: ArchiveFiles@2 + displayName: 'Zip MacOS files for signing' + inputs: + rootFolderOrFile: '$(Build.ArtifactStagingDirectory)/mac_entitled' + archiveFile: '$(Build.ArtifactStagingDirectory)/mac_entitled_to_sign.zip' + archiveType: zip + includeRootFolder: true + replaceExistingArchive: true + + - task: SFP.build-tasks.custom-build-task-1.EsrpCodeSigning@1 + displayName: 'ESRP CodeSigning' + inputs: + ConnectedServiceName: 'ESRP CodeSigning' + FolderPath: '$(Build.ArtifactStagingDirectory)/' + Pattern: 'mac_entitled_to_sign.zip' + UseMinimatch: true + signConfigType: inlineSignParams + inlineOperation: | + [ + { + "keyCode": "CP-401337-Apple", + "operationCode": "MacAppDeveloperSign", + "parameters" : { + "hardening": "Enable" + }, + "toolName": "sign", + "toolVersion": "1.0" + } + ] + + - task: ExtractFiles@1 + displayName: 'Extract MacOS after signing' + inputs: + archiveFilePatterns: '$(Build.ArtifactStagingDirectory)/mac_entitled_to_sign.zip' + destinationFolder: '$(Build.ArtifactStagingDirectory)/mac_entitled_signed' + + - ${{ each file in parameters.filesToSign }}: + - task: CopyFiles@2 + displayName: 'Copy ${{ file.name }} to destination' + inputs: + contents: ${{ file.name }} + sourceFolder: '$(Build.ArtifactStagingDirectory)/mac_entitled_signed' + targetFolder: '${{ file.path }}' + overWrite: true diff --git a/eng/pipelines/common/platform-matrix.yml b/eng/pipelines/common/platform-matrix.yml index b4f802024006..cfec86006703 100644 --- a/eng/pipelines/common/platform-matrix.yml +++ b/eng/pipelines/common/platform-matrix.yml @@ -86,11 +86,7 @@ jobs: archType: x64 platform: Linux_musl_x64 container: - # alpine coreclr cmake errors on newer builds - ${{ if eq(parameters.runtimeFlavor, 'mono') }}: - image: alpine-3.9-WithNode-0fc54a3-20200131134036 - ${{ if eq(parameters.runtimeFlavor, 'coreclr') }}: - image: alpine-3.9-WithNode-0fc54a3-20190918214015 + image: alpine-3.9-WithNode-20200602002639-0fc54a3 registry: mcr jobParameters: runtimeFlavor: ${{ parameters.runtimeFlavor }} @@ -128,7 +124,7 @@ jobs: # Linux x64 -- ${{ if or(containsValue(parameters.platforms, 'Linux_x64'), in(parameters.platformGroup, 'all', 'gcstress')) }}: +- ${{ if or(containsValue(parameters.platforms, 'Linux_x64'), containsValue(parameters.platforms, 'CoreClrTestBuildHost'), in(parameters.platformGroup, 'all', 'gcstress')) }}: - template: xplat-setup.yml parameters: jobTemplate: ${{ parameters.jobTemplate }} @@ -161,7 +157,7 @@ jobs: archType: wasm platform: Browser_wasm container: - image: ubuntu-18.04-webassembly-20200529220811-6a6da63 + image: ubuntu-18.04-webassembly-20200827125937-9740252 registry: mcr jobParameters: runtimeFlavor: ${{ parameters.runtimeFlavor }} @@ -408,7 +404,7 @@ jobs: # macOS x64 -- ${{ if or(containsValue(parameters.platforms, 'OSX_x64'), containsValue(parameters.platforms, 'CoreClrTestBuildHost'), eq(parameters.platformGroup, 'all')) }}: +- ${{ if or(containsValue(parameters.platforms, 'OSX_x64'), eq(parameters.platformGroup, 'all')) }}: - template: xplat-setup.yml parameters: jobTemplate: ${{ parameters.jobTemplate }} diff --git a/eng/pipelines/common/templates/runtimes/build-test-job.yml b/eng/pipelines/common/templates/runtimes/build-test-job.yml index 56fa8dffddc1..c4eabe0eaa42 100644 --- a/eng/pipelines/common/templates/runtimes/build-test-job.yml +++ b/eng/pipelines/common/templates/runtimes/build-test-job.yml @@ -120,7 +120,7 @@ jobs: displayName: Disk Usage before Build # Build managed test components - - script: $(coreClrRepoRootDir)build-test$(scriptExt) allTargets skipstressdependencies skipnative skipgeneratelayout skiptestwrappers $(buildConfig) $(archType) $(crossArg) $(priorityArg) ci $(librariesOverrideArg) + - script: $(Build.SourcesDirectory)/src/coreclr/build-test$(scriptExt) allTargets skipstressdependencies skipnative skipgeneratelayout skiptestwrappers $(buildConfig) $(archType) $(crossArg) $(priorityArg) ci $(librariesOverrideArg) displayName: Build managed test components - ${{ if in(parameters.osGroup, 'OSX', 'iOS','tvOS') }}: diff --git a/eng/pipelines/common/templates/runtimes/run-test-job.yml b/eng/pipelines/common/templates/runtimes/run-test-job.yml index 4d93701584fe..f5fdcfb895ed 100644 --- a/eng/pipelines/common/templates/runtimes/run-test-job.yml +++ b/eng/pipelines/common/templates/runtimes/run-test-job.yml @@ -244,24 +244,24 @@ jobs: # and directly unzip them there after download). Unfortunately the logic to copy # the native artifacts to the final test folders is dependent on availability of the # managed test artifacts. - - script: $(coreClrRepoRootDir)build-test$(scriptExt) skipstressdependencies copynativeonly $(crossgenArg) $(buildConfig) $(archType) $(priorityArg) $(librariesOverrideArg) + - script: $(Build.SourcesDirectory)/src/coreclr/build-test$(scriptExt) skipstressdependencies copynativeonly $(crossgenArg) $(buildConfig) $(archType) $(priorityArg) $(librariesOverrideArg) displayName: Copy native test components to test output folder # Generate test wrappers. This is the step that examines issues.targets to exclude tests. - - script: $(coreClrRepoRootDir)build-test$(scriptExt) buildtestwrappersonly $(runtimeFlavorArgs) $(crossgenArg) $(buildConfig) $(archType) $(crossArg) $(priorityArg) $(librariesOverrideArg) + - script: $(Build.SourcesDirectory)/src/coreclr/build-test$(scriptExt) buildtestwrappersonly $(runtimeFlavorArgs) $(crossgenArg) $(buildConfig) $(archType) $(crossArg) $(priorityArg) $(librariesOverrideArg) displayName: Generate test wrappers # Compose the Core_Root folder containing all artifacts needed for running # CoreCLR tests. - - script: $(coreClrRepoRootDir)build-test$(scriptExt) generatelayoutonly $(runtimeFlavorArgs) $(crossgenArg) $(buildConfig) $(archType) $(crossArg) $(priorityArg) $(librariesOverrideArg) + - script: $(Build.SourcesDirectory)/src/coreclr/build-test$(scriptExt) generatelayoutonly $(runtimeFlavorArgs) $(crossgenArg) $(buildConfig) $(archType) $(crossArg) $(priorityArg) $(librariesOverrideArg) displayName: Generate CORE_ROOT # Crossgen framework assemblies prior to triggering readyToRun execution runs. - ${{ if eq(parameters.readyToRun, true) }}: - - script: $(coreClrRepoRootDir)build-test$(scriptExt) crossgenframeworkonly $(crossgenArg) $(buildConfig) $(archType) $(crossArg) $(priorityArg) $(librariesOverrideArg) + - script: $(Build.SourcesDirectory)/src/coreclr/build-test$(scriptExt) crossgenframeworkonly $(crossgenArg) $(buildConfig) $(archType) $(crossArg) $(priorityArg) $(librariesOverrideArg) displayName: Crossgen framework assemblies # Overwrite coreclr runtime binaries with mono ones @@ -274,7 +274,7 @@ jobs: displayName: "Patch dotnet with mono" - ${{ if and(eq(parameters.runtimeFlavor, 'mono'), eq(parameters.runtimeVariant, 'llvmaot')) }}: - - script: $(coreClrRepoRootDir)build-test$(scriptExt) mono_aot $(buildConfig) + - script: $(Build.SourcesDirectory)/src/coreclr/build-test$(scriptExt) mono_aot $(buildConfig) displayName: "LLVM AOT compile CoreCLR tests" # Send tests to Helix @@ -285,7 +285,6 @@ jobs: archType: ${{ parameters.archType }} osGroup: ${{ parameters.osGroup }} osSubgroup: ${{ parameters.osSubgroup}} - coreClrRepoRoot: $(coreClrRepoRoot) runtimeFlavorDisplayName: ${{ parameters.runtimeFlavorDisplayName }} ${{ if eq(variables['System.TeamProject'], 'public') }}: @@ -322,7 +321,7 @@ jobs: # DotNet-HelixApi-Access variable group helixAccessToken: $(HelixApiAccessToken) - helixProjectArguments: '$(coreClrRepoRoot)/tests/helixpublishwitharcade.proj' + helixProjectArguments: '$(Build.SourcesDirectory)/src/coreclr/tests/helixpublishwitharcade.proj' ${{ if in(parameters.testGroup, 'innerloop', 'outerloop') }}: scenarios: diff --git a/eng/pipelines/common/templates/runtimes/send-to-helix-inner-step.yml b/eng/pipelines/common/templates/runtimes/send-to-helix-inner-step.yml new file mode 100644 index 000000000000..28600d9e331b --- /dev/null +++ b/eng/pipelines/common/templates/runtimes/send-to-helix-inner-step.yml @@ -0,0 +1,34 @@ +parameters: + osGroup: '' + restoreParams: '' + sendParams: '' + condition: '' + displayName: '' + environment: {} + +steps: +- ${{ if eq(parameters.osGroup, 'Windows_NT') }}: + # TODO: Remove and consolidate this when we move to arcade via init-tools.cmd. + - powershell: $(Build.SourcesDirectory)\eng\common\build.ps1 -ci ${{ parameters.restoreParams }} + displayName: Restore blob feed tasks + condition: ${{ parameters.condition }} + + - powershell: $(Build.SourcesDirectory)\eng\common\msbuild.ps1 -ci ${{ parameters.sendParams }} + displayName: ${{ parameters.displayName }} + condition: ${{ parameters.condition }} + env: ${{ parameters.environment }} + +- ${{ if ne(parameters.osGroup, 'Windows_NT') }}: + # TODO: Remove and consolidate this when we move to arcade via init-tools.sh. + - script: $(Build.SourcesDirectory)/eng/common/build.sh --ci ${{ parameters.restoreParams }} + displayName: Restore blob feed tasks + condition: ${{ parameters.condition }} + ${{ if eq(parameters.osGroup, 'FreeBSD') }}: + env: + # Arcade uses this SDK instead of trying to restore one. + DotNetCoreSdkDir: /usr/local/dotnet + + - script: $(Build.SourcesDirectory)/eng/common/msbuild.sh --ci ${{ parameters.sendParams }} + displayName: ${{ parameters.displayName }} + condition: ${{ parameters.condition }} + env: ${{ parameters.environment }} diff --git a/eng/pipelines/common/templates/runtimes/send-to-helix-step.yml b/eng/pipelines/common/templates/runtimes/send-to-helix-step.yml index 3602929653dc..975179232eb7 100644 --- a/eng/pipelines/common/templates/runtimes/send-to-helix-step.yml +++ b/eng/pipelines/common/templates/runtimes/send-to-helix-step.yml @@ -1,6 +1,6 @@ parameters: displayName: '' - condition: '' + condition: true archType: '' osGroup: '' osSubgroup: '' @@ -22,68 +22,18 @@ parameters: runInUnloadableContext: '' longRunningGcTests: '' gcSimulatorTests: '' - coreClrRepoRoot: '' runtimeFlavorDisplayName: 'CoreCLR' runtimeVariant: '' steps: -- ${{ if eq(parameters.osGroup, 'Windows_NT') }}: - # TODO: Remove and consolidate this when we move to arcade via init-tools.cmd. - - powershell: $(Build.SourcesDirectory)\eng\common\build.ps1 /p:DotNetPublishToBlobFeed=true -ci -restore -projects $(Build.SourcesDirectory)\eng\empty.csproj - displayName: Restore blob feed tasks - ${{ if ne(parameters.condition, '') }}: - condition: ${{ parameters.condition }} - - - powershell: eng\common\msbuild.ps1 -ci ${{ parameters.helixProjectArguments }} /maxcpucount /bl:$(Build.SourcesDirectory)/artifacts/log/SendToHelix.binlog - displayName: ${{ parameters.displayName }} - ${{ if ne(parameters.condition, '') }}: - condition: ${{ parameters.condition }} - env: - __BuildArch: ${{ parameters.archType }} - __TargetOS: ${{ parameters.osGroup }}${{ parameters.osSubgroup }} - __BuildType: ${{ parameters.buildConfig }} - _Creator: ${{ parameters.creator }} - _PublishTestResults: ${{ parameters.publishTestResults }} - _HelixAccessToken: ${{ parameters.helixAccessToken }} - _HelixBuild: ${{ parameters.helixBuild }} - _HelixSource: ${{ parameters.helixSource }} - _HelixTargetQueues: ${{ join(',', parameters.helixQueues) }} - _HelixType: ${{ parameters.helixType }} - _RunCrossGen: ${{ parameters.runCrossGen }} - _RunCrossGen2: ${{ parameters.runCrossGen2 }} - _CompositeBuildMode: ${{ parameters.compositeBuildMode }} - _RunInUnloadableContext: ${{ parameters.runInUnloadableContext }} - _LongRunningGcTests: ${{ parameters.longRunningGcTests }} - _GcSimulatorTests: ${{ parameters.gcSimulatorTests }} - _Scenarios: ${{ join(',', parameters.scenarios) }} - _TimeoutPerTestCollectionInMinutes: ${{ parameters.timeoutPerTestCollectionInMinutes }} - _TimeoutPerTestInMinutes: ${{ parameters.timeoutPerTestInMinutes }} - runtimeFlavorDisplayName: ${{ parameters.runtimeFlavorDisplayName }} - _RuntimeVariant: ${{ parameters.runtimeVariant }} - ${{ if eq(parameters.publishTestResults, 'true') }}: - SYSTEM_ACCESSTOKEN: $(System.AccessToken) - # TODO: remove NUGET_PACKAGES once https://github.com/dotnet/arcade/issues/1578 is fixed - NUGET_PACKAGES: $(Build.SourcesDirectory)\.packages - -- ${{ if ne(parameters.osGroup, 'Windows_NT') }}: - # TODO: Remove and consolidate this when we move to arcade via init-tools.sh. - - script: $(Build.SourcesDirectory)/eng/common/build.sh /p:DotNetPublishToBlobFeed=true --ci --restore --projects $(Build.SourcesDirectory)/eng/empty.csproj - displayName: Restore blob feed tasks - ${{ if ne(parameters.condition, '') }}: - condition: ${{ parameters.condition }} - ${{ if eq(parameters.osGroup, 'FreeBSD') }}: - env: - # Arcade uses this SDK instead of trying to restore one. - DotNetCoreSdkDir: /usr/local/dotnet - - - script: $(Build.SourcesDirectory)/eng/common/msbuild.sh --ci ${{ parameters.helixProjectArguments }} /maxcpucount /bl:$(Build.SourcesDirectory)/artifacts/log/SendToHelix.binlog +- template: send-to-helix-inner-step.yml + parameters: + osGroup: ${{ parameters.osGroup }} + restoreParams: /p:DotNetPublishToBlobFeed=true -restore -projects $(Build.SourcesDirectory)$(dir)eng$(dir)empty.csproj + sendParams: ${{ parameters.helixProjectArguments }} /maxcpucount /bl:$(Build.SourcesDirectory)/artifacts/log/SendToHelix.binlog /p:TargetArchitecture=${{ parameters.archType }} /p:TargetOS=${{ parameters.osGroup }} /p:TargetOSSubgroup=${{ parameters.osSubgroup }} /p:Configuration=${{ parameters.buildConfig }} + condition: and(succeeded(), ${{ parameters.condition }}) displayName: ${{ parameters.displayName }} - ${{ if ne(parameters.condition, '') }}: - condition: ${{ parameters.condition }} - env: - __BuildArch: ${{ parameters.archType }} - __TargetOS: ${{ parameters.osGroup }}${{ parameters.osSubgroup }} - __BuildType: ${{ parameters.buildConfig }} + environment: _Creator: ${{ parameters.creator }} _PublishTestResults: ${{ parameters.publishTestResults }} _HelixAccessToken: ${{ parameters.helixAccessToken }} @@ -105,4 +55,4 @@ steps: ${{ if eq(parameters.publishTestResults, 'true') }}: SYSTEM_ACCESSTOKEN: $(System.AccessToken) # TODO: remove NUGET_PACKAGES once https://github.com/dotnet/arcade/issues/1578 is fixed - NUGET_PACKAGES: $(Build.SourcesDirectory)/.packages + NUGET_PACKAGES: $(Build.SourcesDirectory)$(dir).packages diff --git a/eng/pipelines/common/templates/runtimes/wasm-runtime-and-send-to-helix.yml b/eng/pipelines/common/templates/runtimes/wasm-runtime-and-send-to-helix.yml new file mode 100644 index 000000000000..d5224b82b8e3 --- /dev/null +++ b/eng/pipelines/common/templates/runtimes/wasm-runtime-and-send-to-helix.yml @@ -0,0 +1,83 @@ +parameters: + buildConfig: '' + archType: 'wasm' + osGroup: 'Browser' + osSubgroup: '' + container: '' + testGroup: '' + crossrootfsDir: '' + readyToRun: false + liveLibrariesBuildConfig: '' + crossgen2: false + compositeBuildMode: false + helixQueues: '' + stagedBuild: false + displayNameArgs: '' + runInUnloadableContext: false + runtimeVariant: '' + variables: {} + pool: '' + runtimeFlavor: 'mono' + runtimeFlavorDisplayName: 'Mono' + dependsOn: [] + #arcade-specific parameters + condition: always() + continueOnError: false + displayName: '' + timeoutInMinutes: '' + enableMicrobuild: '' + gatherAssetManifests: false + + +steps: + - script: $(Build.SourcesDirectory)/src/coreclr/build-test$(scriptExt) /p:LibrariesConfiguration=${{ parameters.buildConfig }} -ci -skipstressdependencies -excludemonofailures os Browser wasm $(buildConfigUpper) + displayName: Build Tests + + # Send tests to Helix + - template: /eng/pipelines/common/templates/runtimes/send-to-helix-step.yml + parameters: + displayName: Send tests to Helix + buildConfig: $(buildConfigUpper) + archType: ${{ parameters.archType }} + osGroup: ${{ parameters.osGroup }} + osSubgroup: ${{ parameters.osSubgroup}} + coreClrRepoRoot: $(Build.SourcesDirectory)/src/coreclr + runtimeFlavorDisplayName: ${{ parameters.runtimeFlavorDisplayName }} + + ${{ if eq(variables['System.TeamProject'], 'public') }}: + creator: $(Build.DefinitionName) + + helixBuild: $(Build.BuildNumber) + helixSource: $(_HelixSource) + + # REVIEW: not sure why "cli" is part of the names here. Leave it for the ones that already had it, + # but don't add it to new ones. + ${{ if eq(parameters.readyToRun, true) }}: + helixType: 'test/functional/r2r/cli/' + ${{ if ne(parameters.readyToRun, true) }}: + helixType: 'test/functional/cli/' + + helixQueues: ${{ parameters.helixQueues }} + + # This tests whether an array is empty + ${{ if eq(join('', parameters.helixQueues), '') }}: + condition: false + + publishTestResults: true + + timeoutPerTestInMinutes: $(timeoutPerTestInMinutes) + timeoutPerTestCollectionInMinutes: $(timeoutPerTestCollectionInMinutes) + + runCrossGen: ${{ and(eq(parameters.readyToRun, true), ne(parameters.crossgen2, true)) }} + runCrossGen2: ${{ and(eq(parameters.readyToRun, true), eq(parameters.crossgen2, true)) }} + compositeBuildMode: ${{ parameters.compositeBuildMode }} + runInUnloadableContext: ${{ parameters.runInUnloadableContext }} + + ${{ if eq(variables['System.TeamProject'], 'internal') }}: + # Access token variable for internal project from the + # DotNet-HelixApi-Access variable group + helixAccessToken: $(HelixApiAccessToken) + + helixProjectArguments: '$(Build.SourcesDirectory)/src/coreclr/tests/helixpublishwitharcade.proj' + + scenarios: normal \ No newline at end of file diff --git a/eng/pipelines/common/xplat-setup.yml b/eng/pipelines/common/xplat-setup.yml index f6a5ee11f4ec..9c025864515f 100644 --- a/eng/pipelines/common/xplat-setup.yml +++ b/eng/pipelines/common/xplat-setup.yml @@ -136,6 +136,9 @@ jobs: ${{ if eq(parameters.helixQueuesTemplate, '') }}: + # macOS hosted pool machines are slower so we need to give a greater timeout than the 60 mins default. + ${{ if and(eq(parameters.jobParameters.timeoutInMinutes, ''), in(parameters.osGroup, 'OSX', 'iOS', 'tvOS')) }}: + timeoutInMinutes: 120 ${{ insert }}: ${{ parameters.jobParameters }} ${{ if ne(parameters.helixQueuesTemplate, '') }}: jobTemplate: ${{ parameters.jobTemplate }} diff --git a/eng/pipelines/coreclr/ci.yml b/eng/pipelines/coreclr/ci.yml index 7e72c87e16b8..c58c4804174b 100644 --- a/eng/pipelines/coreclr/ci.yml +++ b/eng/pipelines/coreclr/ci.yml @@ -164,4 +164,5 @@ jobs: jobTemplate: /eng/pipelines/coreclr/templates/format-job.yml platforms: - Linux_x64 - - Windows_NT_x64 + # Isssue: https://github.com/dotnet/runtime/issues/40034 + #- Windows_NT_x64 diff --git a/eng/pipelines/coreclr/crossgen2-gcstress.yml b/eng/pipelines/coreclr/crossgen2-gcstress.yml new file mode 100644 index 000000000000..7942e747e26f --- /dev/null +++ b/eng/pipelines/coreclr/crossgen2-gcstress.yml @@ -0,0 +1,60 @@ +trigger: none + +pr: none + +schedules: +- cron: "0 6 * * 0,1" + displayName: Sat and Sun at 10:00 PM (UTC-8:00) + branches: + include: + - master + always: true + +jobs: +# +# Checkout repository +# +- template: /eng/pipelines/common/checkout-job.yml + +- template: /eng/pipelines/common/platform-matrix.yml + parameters: + jobTemplate: /eng/pipelines/common/build-coreclr-and-libraries-job.yml + buildConfig: checked + platforms: + - Linux_x64 + - Linux_arm64 + - OSX_x64 + - Windows_NT_x64 + - Windows_NT_arm64 + - CoreClrTestBuildHost # Either OSX_x64 or Linux_x64 + jobParameters: + testGroup: gcstress-extra + +- template: /eng/pipelines/common/platform-matrix.yml + parameters: + jobTemplate: /eng/pipelines/common/templates/runtimes/build-test-job.yml + buildConfig: checked + platforms: + - CoreClrTestBuildHost # Either OSX_x64 or Linux_x64 + jobParameters: + testGroup: gcstress-extra + liveLibrariesBuildConfig: Release + +- template: /eng/pipelines/common/platform-matrix.yml + parameters: + jobTemplate: /eng/pipelines/common/templates/runtimes/run-test-job.yml + helixQueuesTemplate: /eng/pipelines/coreclr/templates/helix-queues-setup.yml + buildConfig: checked + platforms: + - Linux_x64 + - Linux_arm64 + - OSX_x64 + - Windows_NT_x64 + - Windows_NT_arm64 + jobParameters: + testGroup: gcstress-extra + readyToRun: true + crossgen2: true + compositeBuildMode: true + displayNameArgs: Composite + liveLibrariesBuildConfig: Release diff --git a/eng/pipelines/coreclr/perf.yml b/eng/pipelines/coreclr/perf.yml index 149eb1aed6fb..a3fd8bb532b6 100644 --- a/eng/pipelines/coreclr/perf.yml +++ b/eng/pipelines/coreclr/perf.yml @@ -17,25 +17,8 @@ trigger: - README.md - SECURITY.md - THIRD-PARTY-NOTICES.TXT - - -pr: - branches: - include: - - master - paths: - include: - - '*' - - src/libraries/System.Private.CoreLib/* - exclude: - - docs/* - - CODE-OF-CONDUCT.md - - CONTRIBUTING.md - - LICENSE.TXT - - PATENTS.TXT - - README.md - - SECURITY.md - - THIRD-PARTY-NOTICES.TXT + +pr: none jobs: # @@ -43,6 +26,7 @@ jobs: # - template: /eng/pipelines/common/checkout-job.yml +# build coreclr and libraries - template: /eng/pipelines/common/platform-matrix.yml parameters: jobTemplate: /eng/pipelines/common/build-coreclr-and-libraries-job.yml @@ -54,6 +38,28 @@ jobs: jobParameters: testGroup: perf +# build mono on wasm +- template: /eng/pipelines/common/platform-matrix.yml + parameters: + jobTemplate: /eng/pipelines/common/global-build-job.yml + buildConfig: release + runtimeFlavor: mono + platforms: + - Browser_wasm + jobParameters: + buildArgs: -s mono+libs+installer -c $(_BuildConfig) + nameSuffix: wasm + isOfficialBuild: ${{ variables.isOfficialBuild }} + extraStepsTemplate: /eng/pipelines/common/upload-artifact-step.yml + extraStepsParameters: + rootFolder: '$(Build.SourcesDirectory)/artifacts/' + includeRootFolder: true + displayName: Browser Wasm Artifacts + artifactName: BrowserWasm + archiveType: zip + archiveExtension: .zip + +# build mono - template: /eng/pipelines/common/platform-matrix.yml parameters: jobTemplate: /eng/pipelines/mono/templates/build-job.yml @@ -62,6 +68,23 @@ jobs: platforms: - Linux_x64 +# run mono microbenchmarks perf job +- template: /eng/pipelines/common/platform-matrix.yml + parameters: + jobTemplate: /eng/pipelines/coreclr/templates/perf-job.yml + buildConfig: release + runtimeFlavor: mono + platforms: + - Linux_x64 + jobParameters: + testGroup: perf + liveLibrariesBuildConfig: Release + runtimeType: mono + projectFile: microbenchmarks.proj + runKind: micro_mono + runJobTemplate: /eng/pipelines/coreclr/templates/run-performance-job.yml + +# run mono interpreter perf job - template: /eng/pipelines/common/platform-matrix.yml parameters: jobTemplate: /eng/pipelines/coreclr/templates/perf-job.yml @@ -73,21 +96,30 @@ jobs: testGroup: perf liveLibrariesBuildConfig: Release runtimeType: mono + codeGenType: 'Interpreter' + projectFile: microbenchmarks.proj + runKind: micro_mono + runJobTemplate: /eng/pipelines/coreclr/templates/run-performance-job.yml + +# run mono wasm microbenchmarks perf job +- template: /eng/pipelines/common/platform-matrix.yml + parameters: + jobTemplate: /eng/pipelines/coreclr/templates/perf-job.yml # NOTE: should we move this file out of coreclr tempelates because it contains mono jobs? + buildConfig: release + runtimeFlavor: wasm + platforms: + - Linux_x64 + jobParameters: + testGroup: perf + liveLibrariesBuildConfig: Release + runtimeType: wasm + codeGenType: 'wasm' + projectFile: microbenchmarks.proj + runKind: micro + runJobTemplate: /eng/pipelines/coreclr/templates/run-performance-job.yml -- ${{ if and(ne(variables['System.TeamProject'], 'public'), notin(variables['Build.Reason'], 'PullRequest')) }}: - - template: /eng/pipelines/common/platform-matrix.yml - parameters: - jobTemplate: /eng/pipelines/coreclr/templates/perf-job.yml - buildConfig: release - runtimeFlavor: mono - platforms: - - Linux_x64 - jobParameters: - testGroup: perf - liveLibrariesBuildConfig: Release - runtimeType: mono - codeGenType: 'Interpreter' +# run coreclr microbenchmarks perf job - template: /eng/pipelines/common/platform-matrix.yml parameters: jobTemplate: /eng/pipelines/coreclr/templates/perf-job.yml @@ -100,3 +132,41 @@ jobs: jobParameters: testGroup: perf liveLibrariesBuildConfig: Release + projectFile: microbenchmarks.proj + runKind: micro + runJobTemplate: /eng/pipelines/coreclr/templates/run-performance-job.yml + +# run coreclr crossgen perf job +- template: /eng/pipelines/common/platform-matrix.yml + parameters: + jobTemplate: /eng/pipelines/coreclr/templates/perf-job.yml + buildConfig: release + runtimeFlavor: coreclr + platforms: + - Linux_x64 + - Windows_NT_x64 + - Windows_NT_x86 + jobParameters: + testGroup: perf + liveLibrariesBuildConfig: Release + projectFile: crossgen_perf.proj + runKind: crossgen_scenarios + runJobTemplate: /eng/pipelines/coreclr/templates/run-scenarios-job.yml + +# run mono wasm blazor perf job +- template: /eng/pipelines/common/platform-matrix.yml + parameters: + jobTemplate: /eng/pipelines/coreclr/templates/perf-job.yml + buildConfig: release + runtimeFlavor: wasm + platforms: + - Linux_x64 + jobParameters: + testGroup: perf + liveLibrariesBuildConfig: Release + runtimeType: wasm + projectFile: blazor_perf.proj + runKind: blazor_scenarios + runJobTemplate: /eng/pipelines/coreclr/templates/run-scenarios-job.yml + additionalSetupParameters: '--latestdotnet' + diff --git a/eng/pipelines/coreclr/readme.md b/eng/pipelines/coreclr/readme.md index 71c391dcc908..0969c41af214 100644 --- a/eng/pipelines/coreclr/readme.md +++ b/eng/pipelines/coreclr/readme.md @@ -56,4 +56,4 @@ internal.yml -> platform-matrix.yml -------> build-job.yml -------> xplat-job.ym | (passed-in jobTemplate) | (arcade) \------> test-job.yml ------/ \------> format-job.yml ----/ -``` \ No newline at end of file +``` diff --git a/eng/pipelines/coreclr/templates/build-job.yml b/eng/pipelines/coreclr/templates/build-job.yml index f2b45e8072cc..91a5cb062475 100644 --- a/eng/pipelines/coreclr/templates/build-job.yml +++ b/eng/pipelines/coreclr/templates/build-job.yml @@ -141,10 +141,10 @@ jobs: # Build CoreCLR Runtime - ${{ if ne(parameters.osGroup, 'Windows_NT') }}: - - script: $(coreClrRepoRootDir)build-runtime$(scriptExt) $(buildConfig) $(archType) $(crossArg) $(osArg) -ci $(compilerArg) $(officialBuildIdArg) $(clrInterpreterBuildArg) + - script: $(Build.SourcesDirectory)/src/coreclr/build-runtime$(scriptExt) $(buildConfig) $(archType) $(crossArg) $(osArg) -ci $(compilerArg) $(officialBuildIdArg) $(clrInterpreterBuildArg) displayName: Build CoreCLR Runtime - ${{ if eq(parameters.osGroup, 'Windows_NT') }}: - - script: set __TestIntermediateDir=int&&$(coreClrRepoRootDir)build-runtime$(scriptExt) $(buildConfig) $(archType) -ci $(enforcePgoArg) $(officialBuildIdArg) $(clrInterpreterBuildArg) + - script: set __TestIntermediateDir=int&&$(Build.SourcesDirectory)/src/coreclr/build-runtime$(scriptExt) $(buildConfig) $(archType) -ci $(enforcePgoArg) $(officialBuildIdArg) $(clrInterpreterBuildArg) displayName: Build CoreCLR Runtime - ${{ if in(parameters.osGroup, 'OSX', 'iOS','tvOS') }}: @@ -164,9 +164,31 @@ jobs: # Build native test components - ${{ if ne(parameters.isOfficialBuild, true) }}: - - script: $(coreClrRepoRootDir)build-test$(scriptExt) skipstressdependencies skipmanaged skipgeneratelayout $(buildConfig) $(archType) $(crossArg) $(osArg) $(priorityArg) $(compilerArg) + - script: $(Build.SourcesDirectory)/src/coreclr/build-test$(scriptExt) skipstressdependencies skipmanaged skipgeneratelayout $(buildConfig) $(archType) $(crossArg) $(osArg) $(priorityArg) $(compilerArg) displayName: Build native test components + # Sign and add entitlements to these MacOS binaries + - ${{ if and(ne(variables['System.TeamProject'], 'public'), notin(variables['Build.Reason'], 'PullRequest')) }}: + - ${{ if eq(parameters.osGroup, 'OSX') }}: + + - template: /eng/pipelines/common/macos-sign-with-entitlements.yml + parameters: + filesToSign: + - name: createdump + path: $(buildProductRootFolderPath) + entitlementsFile: $(Build.SourcesDirectory)/eng/pipelines/common/createdump-entitlements.plist + - name: corerun + path: $(buildProductRootFolderPath) + entitlementsFile: $(Build.SourcesDirectory)/eng/pipelines/common/entitlements.plist + + - task: CopyFiles@2 + displayName: 'Copy signed createdump to sharedFramework' + inputs: + contents: createdump + sourceFolder: $(buildProductRootFolderPath) + targetFolder: $(buildProductRootFolderPath)/sharedFramework + overWrite: true + # Sign on Windows - ${{ if and(eq(parameters.osGroup, 'Windows_NT'), eq(parameters.signBinaries, 'true'), ne(parameters.testGroup, 'clrTools')) }}: - powershell: eng\common\build.ps1 -ci -sign -restore -configuration:$(buildConfig) -warnaserror:0 /p:ArcadeBuild=true /p:OfficialBuild=true /p:TargetOS=$(osGroup) /p:TargetArchitecture=$(archType) /p:Configuration=$(_BuildConfig) /p:DotNetSignType=$env:_SignType -projects $(Build.SourcesDirectory)\eng\empty.csproj diff --git a/eng/pipelines/coreclr/templates/crossdac-build.yml b/eng/pipelines/coreclr/templates/crossdac-build.yml index b17b72af8325..16f16d3c9882 100644 --- a/eng/pipelines/coreclr/templates/crossdac-build.yml +++ b/eng/pipelines/coreclr/templates/crossdac-build.yml @@ -7,7 +7,7 @@ steps: # Always build the crossdac, that way we know in CI/PR if things break to build. - ${{ if eq(parameters.osGroup, 'Windows_NT') }}: - ${{ if notin(parameters.archType, 'x86') }}: - - script: set __TestIntermediateDir=int&&$(coreClrRepoRootDir)build-runtime$(scriptExt) $(buildConfig) $(archType) -ci -linuxdac $(officialBuildIdArg) + - script: set __TestIntermediateDir=int&&$(Build.SourcesDirectory)/src/coreclr/build-runtime$(scriptExt) $(buildConfig) $(archType) -ci -linuxdac $(officialBuildIdArg) displayName: Build Cross OS Linux DAC for Windows @@ -15,7 +15,7 @@ steps: - ${{ if and(ne(variables['System.TeamProject'], 'public'), ne(variables['Build.Reason'], 'PullRequest')) }}: - ${{ if eq(parameters.osGroup, 'Windows_NT') }}: - ${{ if notin(parameters.archType, 'x86', 'arm') }}: - - script: set __TestIntermediateDir=int&&$(coreClrRepoRootDir)build-runtime$(scriptExt) $(buildConfig) $(archType) -ci -alpinedac $(officialBuildIdArg) + - script: set __TestIntermediateDir=int&&$(Build.SourcesDirectory)/src/coreclr/build-runtime$(scriptExt) $(buildConfig) $(archType) -ci -alpinedac $(officialBuildIdArg) displayName: Build Cross OS Linux-musl DAC for Windows - task: CopyFiles@2 diff --git a/eng/pipelines/coreclr/templates/crossgen-comparison-job.yml b/eng/pipelines/coreclr/templates/crossgen-comparison-job.yml index 0e537cb6c7e1..bc1c6b7d3d56 100644 --- a/eng/pipelines/coreclr/templates/crossgen-comparison-job.yml +++ b/eng/pipelines/coreclr/templates/crossgen-comparison-job.yml @@ -104,7 +104,7 @@ jobs: displayName: 'live-built libraries' # Populate Core_Root - - script: $(coreClrRepoRootDir)build-test$(scriptExt) $(buildConfig) $(archType) $(crossArg) generatelayoutonly + - script: $(Build.SourcesDirectory)/src/coreclr/build-test$(scriptExt) $(buildConfig) $(archType) $(crossArg) generatelayoutonly displayName: Populate Core_Root # Create directories and ensure crossgen is executable @@ -123,7 +123,7 @@ jobs: displayName: Create cross-platform crossgen baseline inputs: scriptSource: 'filePath' - scriptPath: $(coreClrRepoRoot)/tests/scripts/crossgen_comparison.py + scriptPath: $(Build.SourcesDirectory)/src/coreclr/tests/scripts/crossgen_comparison.py pythonInterpreter: /usr/bin/python3 ${{ if ne(parameters.osGroup, 'Windows_NT') }}: arguments: @@ -164,7 +164,7 @@ jobs: Creator: $(Creator) WorkItemTimeout: 3:00 # 3 hours WorkItemDirectory: '$(workItemDirectory)' - CorrelationPayloadDirectory: '$(coreClrRepoRoot)/tests/scripts' + CorrelationPayloadDirectory: '$(Build.SourcesDirectory)/src/coreclr/tests/scripts' ${{ if ne(parameters.osName, 'Windows_NT') }}: WorkItemCommand: chmod +x $HELIX_WORKITEM_PAYLOAD/crossgen; diff --git a/eng/pipelines/coreclr/templates/format-job.yml b/eng/pipelines/coreclr/templates/format-job.yml index 7e0921823b1d..c8871536bae5 100644 --- a/eng/pipelines/coreclr/templates/format-job.yml +++ b/eng/pipelines/coreclr/templates/format-job.yml @@ -52,12 +52,12 @@ jobs: displayName: Run tests/scripts/format.py inputs: scriptSource: 'filePath' - scriptPath: $(coreClrRepoRoot)/tests/scripts/format.py - arguments: '-c $(coreClrRepoRoot) -o $(osGroup) -a $(archType)' + scriptPath: $(Build.SourcesDirectory)/src/coreclr/tests/scripts/format.py + arguments: '-c $(Build.SourcesDirectory)/src/coreclr -o $(osGroup) -a $(archType)' - task: PublishBuildArtifacts@1 displayName: Publish format.patch inputs: - PathtoPublish: '$(coreClrRepoRoot)/format.patch' + PathtoPublish: '$(Build.SourcesDirectory)/src/coreclr/format.patch' ArtifactName: format.$(osGroup).$(archType).patch continueOnError: true condition: failed() diff --git a/eng/pipelines/coreclr/templates/helix-queues-setup.yml b/eng/pipelines/coreclr/templates/helix-queues-setup.yml index c7c62451b779..bb6789a277c4 100644 --- a/eng/pipelines/coreclr/templates/helix-queues-setup.yml +++ b/eng/pipelines/coreclr/templates/helix-queues-setup.yml @@ -124,8 +124,12 @@ jobs: # Windows_NT arm64 - ${{ if eq(parameters.platform, 'Windows_NT_arm64') }}: - ${{ if and(eq(variables['System.TeamProject'], 'public'), in(parameters.jobParameters.helixQueueGroup, 'pr', 'ci', 'libraries')) }}: - - Windows.10.Arm64.Open + - Windows.10.Arm64v8.Open - ${{ if eq(variables['System.TeamProject'], 'internal') }}: - Windows.10.Arm64 + # WebAssembly + - ${{ if eq(parameters.platform, 'Browser_wasm') }}: + - Ubuntu.1804.Amd64.Open + ${{ insert }}: ${{ parameters.jobParameters }} diff --git a/eng/pipelines/coreclr/templates/perf-job.yml b/eng/pipelines/coreclr/templates/perf-job.yml index 4284a79368f9..ea1ff3b89255 100644 --- a/eng/pipelines/coreclr/templates/perf-job.yml +++ b/eng/pipelines/coreclr/templates/perf-job.yml @@ -11,6 +11,10 @@ parameters: runtimeType: 'coreclr' pool: '' codeGenType: 'JIT' + projetFile: '' + runKind: '' + runJobTemplate: '/eng/pipelines/coreclr/templates/run-performance-job.yml' + additionalSetupParameters: '' ### Perf job @@ -18,11 +22,11 @@ parameters: ### buildConfig and archType. jobs: -- template: run-performance-job.yml +- template: ${{ parameters.runJobTemplate }} parameters: # Compute job name from template parameters - jobName: ${{ format('perfbuild_{0}{1}_{2}_{3}_{4}_{5}', parameters.osGroup, parameters.osSubgroup, parameters.archType, parameters.buildConfig, parameters.runtimeType, parameters.codeGenType) }} - displayName: ${{ format('Performance {0}{1} {2} {3} {4} {5}', parameters.osGroup, parameters.osSubgroup, parameters.archType, parameters.buildConfig, parameters.runtimeType, parameters.codeGenType) }} + jobName: ${{ format('perfbuild_{0}{1}_{2}_{3}_{4}_{5}_{6}', parameters.osGroup, parameters.osSubgroup, parameters.archType, parameters.buildConfig, parameters.runtimeType, parameters.codeGenType, parameters.runKind) }} + displayName: ${{ format('Performance {0}{1} {2} {3} {4} {5} {6}', parameters.osGroup, parameters.osSubgroup, parameters.archType, parameters.buildConfig, parameters.runtimeType, parameters.codeGenType, parameters.runKind) }} pool: ${{ parameters.pool }} buildConfig: ${{ parameters.buildConfig }} archType: ${{ parameters.archType }} @@ -32,6 +36,9 @@ jobs: liveLibrariesBuildConfig: ${{ parameters.liveLibrariesBuildConfig }} runtimeType: ${{ parameters.runtimeType }} codeGenType: ${{ parameters.codeGenType }} + projectFile: ${{ parameters.projectFile }} + runKind: ${{ parameters.runKind }} + additionalSetupParameters: ${{ parameters.additionalSetupParameters }} # Test job depends on the corresponding build job dependsOn: - ${{ format('coreclr_{0}_product_build_{1}{2}_{3}_{4}', parameters.runtimeVariant, parameters.osGroup, parameters.osSubgroup, parameters.archType, parameters.buildConfig) }} @@ -39,18 +46,22 @@ jobs: - ${{ format('libraries_build_{0}{1}_{2}_{3}', parameters.osGroup, parameters.osSubgroup, parameters.archType, parameters.liveLibrariesBuildConfig) }} - ${{ if eq(parameters.runtimeType, 'mono') }}: - ${{ format('mono_{0}_product_build_{1}{2}_{3}_{4}', parameters.runtimeVariant, parameters.osGroup, parameters.osSubgroup, parameters.archType, parameters.buildConfig) }} + - ${{ if eq(parameters.runtimeType, 'wasm') }}: + - ${{ format('build_{0}{1}_{2}_{3}_{4}', 'Browser', '', 'wasm', parameters.buildConfig, parameters.runtimeType) }} ${{ if eq(parameters.osGroup, 'Windows_NT') }}: ${{ if eq(parameters.runtimeType, 'mono') }}: - extraSetupParameters: -Architecture ${{ parameters.archType }} -MonoDotnet $(Build.SourcesDirectory)\.dotnet-mono -Kind micro_mono - ${{ if ne(parameters.runtimeType, 'mono') }}: + extraSetupParameters: -Architecture ${{ parameters.archType }} -MonoDotnet $(Build.SourcesDirectory)\.dotnet-mono + ${{ if eq(parameters.runtimeType, 'coreclr') }}: extraSetupParameters: -CoreRootDirectory $(Build.SourcesDirectory)\artifacts\tests\coreclr\${{ parameters.osGroup }}.${{ parameters.archType }}.Release\Tests\Core_Root -Architecture ${{ parameters.archType }} ${{ if ne(parameters.osGroup, 'Windows_NT') }}: ${{ if eq(parameters.runtimeType, 'mono') }}: - extraSetupParameters: --architecture ${{ parameters.archType }} --monodotnet $(Build.SourcesDirectory)/.dotnet-mono --kind micro_mono - ${{ if ne(parameters.runtimeType, 'mono') }}: + extraSetupParameters: --architecture ${{ parameters.archType }} --monodotnet $(Build.SourcesDirectory)/.dotnet-mono + ${{ if eq(parameters.runtimeType, 'wasm') }}: + extraSetupParameters: --architecture ${{ parameters.archType }} --wasm $(librariesDownloadDir)/bin/wasm + ${{ if eq(parameters.runtimeType, 'coreclr') }}: extraSetupParameters: --corerootdirectory $(Build.SourcesDirectory)/artifacts/tests/coreclr/${{ parameters.osGroup }}.${{ parameters.archType }}.Release/Tests/Core_Root --architecture ${{ parameters.archType }} - + variables: ${{ parameters.variables }} frameworks: @@ -68,15 +79,15 @@ jobs: artifactName: '$(librariesBuildArtifactName)' displayName: 'live-built libraries' - - # Download product binaries directory + # Download coreclr - template: /eng/pipelines/common/download-artifact-step.yml parameters: unpackFolder: $(buildProductRootFolderPath) artifactFileName: '$(buildProductArtifactName)$(archiveExtension)' artifactName: '$(buildProductArtifactName)' - displayName: 'product build' + displayName: 'Coreclr product build' + # Download mono - ${{ if eq(parameters.runtimeType, 'mono') }}: - template: /eng/pipelines/common/download-artifact-step.yml parameters: @@ -86,15 +97,28 @@ jobs: artifactName: 'MonoProduct_${{ parameters.runtimeVariant }}_$(osGroup)_$(archType)_$(buildConfig)' displayName: 'Mono runtime' + # Download wasm + - ${{ if eq(parameters.runtimeType, 'wasm') }}: + - template: /eng/pipelines/common/download-artifact-step.yml + parameters: + unpackFolder: $(librariesDownloadDir)/BrowserWasm + artifactFileName: BrowserWasm.zip + artifactName: BrowserWasm + displayName: BrowserWasm + + - script: "mkdir $(librariesDownloadDir)/bin/wasm;unzip -o $(librariesDownloadDir)/BrowserWasm/artifacts/packages/Release/Shipping/Microsoft.NETCore.App.Runtime.browser-wasm.6.0.0-ci.nupkg data/* runtimes/* -d $(librariesDownloadDir)/bin/wasm;cp src/mono/wasm/runtime-test.js $(librariesDownloadDir)/bin/wasm/runtime-test.js;find $(librariesDownloadDir)/bin/wasm -type f -exec chmod 664 {} \\;" + displayName: "Create wasm directory (Linux)" + # Create Core_Root - - script: $(coreClrRepoRootDir)build-test$(scriptExt) $(buildConfig) $(archType) generatelayoutonly $(librariesOverrideArg) + - script: $(Build.SourcesDirectory)/src/coreclr/build-test$(scriptExt) $(buildConfig) $(archType) generatelayoutonly $(librariesOverrideArg) displayName: Create Core_Root condition: and(succeeded(), ne(variables.runtimeFlavorName, 'Mono')) - - script: "build.cmd -subset libs.pretest -configuration release -ci -arch $(archType) -testscope innerloop /p:RuntimeArtifactsPath=$(librariesDownloadDir)\\bin\\mono\\$(osGroup).$(archType).$(buildConfigUpper) /p:RuntimeFlavor=mono;xcopy $(Build.SourcesDirectory)\\artifacts\\bin\\testhost\\$(_Framework)-$(osGroup)-$(buildConfigUpper)-$(archType)\\* $(Build.SourcesDirectory)\\.dotnet-mono /E /I /Y;copy $(Build.SourcesDirectory)\\artifacts\\bin\\coreclr\\$(osGroup).$(archType).$(buildConfigUpper)\\corerun.exe $(Build.SourcesDirectory)\\.dotnet-mono\\shared\\Microsoft.NETCore.App\\5.0.0\\corerun.exe" + # Copy the runtime directory into the testhost folder to include OOBs. + - script: "build.cmd -subset libs.pretest -configuration release -ci -arch $(archType) -testscope innerloop /p:RuntimeArtifactsPath=$(librariesDownloadDir)\\bin\\mono\\$(osGroup).$(archType).$(buildConfigUpper) /p:RuntimeFlavor=mono;xcopy $(Build.SourcesDirectory)\\artifacts\\bin\\runtime\\$(_Framework)-$(osGroup)-$(buildConfigUpper)-$(archType)\\* $(Build.SourcesDirectory)\\artifacts\\bin\\testhost\\$(_Framework)-$(osGroup)-$(buildConfigUpper)-$(archType)\\shared\\Microsoft.NETCore.App\\6.0.0 /E /I /Y;xcopy $(Build.SourcesDirectory)\\artifacts\\bin\\testhost\\$(_Framework)-$(osGroup)-$(buildConfigUpper)-$(archType)\\* $(Build.SourcesDirectory)\\.dotnet-mono /E /I /Y;copy $(Build.SourcesDirectory)\\artifacts\\bin\\coreclr\\$(osGroup).$(archType).$(buildConfigUpper)\\corerun.exe $(Build.SourcesDirectory)\\.dotnet-mono\\shared\\Microsoft.NETCore.App\\6.0.0\\corerun.exe" displayName: "Create mono dotnet (Windows)" condition: and(and(succeeded(), eq(variables.runtimeFlavorName, 'Mono')), eq(variables.osGroup, 'Windows_NT')) - - script: "mkdir $(Build.SourcesDirectory)/.dotnet-mono;./build.sh -subset libs.pretest -configuration release -ci -arch $(archType) -testscope innerloop /p:RuntimeArtifactsPath=$(librariesDownloadDir)/bin/mono/$(osGroup).$(archType).$(buildConfigUpper) /p:RuntimeFlavor=mono;cp $(Build.SourcesDirectory)/artifacts/bin/testhost/$(_Framework)-$(osGroup)-$(buildConfigUpper)-$(archType)/* $(Build.SourcesDirectory)/.dotnet-mono -r;cp $(Build.SourcesDirectory)/artifacts/bin/coreclr/$(osGroup).$(archType).$(buildConfigUpper)/corerun $(Build.SourcesDirectory)/.dotnet-mono/shared/Microsoft.NETCore.App/5.0.0/corerun" + - script: "mkdir $(Build.SourcesDirectory)/.dotnet-mono;./build.sh -subset libs.pretest -configuration release -ci -arch $(archType) -testscope innerloop /p:RuntimeArtifactsPath=$(librariesDownloadDir)/bin/mono/$(osGroup).$(archType).$(buildConfigUpper) /p:RuntimeFlavor=mono;cp $(Build.SourcesDirectory)/artifacts/bin/runtime/$(_Framework)-$(osGroup)-$(buildConfigUpper)-$(archType)/* $(Build.SourcesDirectory)/artifacts/bin/testhost/$(_Framework)-$(osGroup)-$(buildConfigUpper)-$(archType)/shared/Microsoft.NETCore.App/6.0.0 -rf;cp $(Build.SourcesDirectory)/artifacts/bin/testhost/$(_Framework)-$(osGroup)-$(buildConfigUpper)-$(archType)/* $(Build.SourcesDirectory)/.dotnet-mono -r;cp $(Build.SourcesDirectory)/artifacts/bin/coreclr/$(osGroup).$(archType).$(buildConfigUpper)/corerun $(Build.SourcesDirectory)/.dotnet-mono/shared/Microsoft.NETCore.App/6.0.0/corerun" displayName: "Create mono dotnet (Linux)" condition: and(and(succeeded(), eq(variables.runtimeFlavorName, 'Mono')), ne(variables.osGroup, 'Windows_NT')) diff --git a/eng/pipelines/coreclr/templates/run-performance-job.yml b/eng/pipelines/coreclr/templates/run-performance-job.yml index 7dadb9bd5eee..a3790ae6c381 100644 --- a/eng/pipelines/coreclr/templates/run-performance-job.yml +++ b/eng/pipelines/coreclr/templates/run-performance-job.yml @@ -18,6 +18,8 @@ parameters: liveLibrariesBuildConfig: '' # optional -- live-live libraries configuration to use for the run runtimeType: 'coreclr' # optional -- Sets the runtime as coreclr or mono codeGenType: 'JIT' # optional -- Decides on the codegen technology if running on mono + projectFile: 'microbenchmarks.proj' # optional -- project file to build helix workitems + runKind: '' # required -- test category jobs: - template: xplat-pipeline-job.yml @@ -51,7 +53,7 @@ jobs: - IsInternal: '' - HelixApiAccessToken: '' - HelixPreCommandStemWindows: 'py -3 -m venv %HELIX_WORKITEM_PAYLOAD%\.venv;call %HELIX_WORKITEM_PAYLOAD%\.venv\Scripts\activate.bat;set PYTHONPATH=;py -3 -m pip install --user azure.storage.blob==12.0.0 --force-reinstall;py -3 -m pip install --user azure.storage.queue==12.0.0 --force-reinstall;set "PERFLAB_UPLOAD_TOKEN=$(PerfCommandUploadToken)"' - - HelixPreCommandStemLinux: 'sudo apt-get -y install python3-venv;python3 -m venv $HELIX_WORKITEM_PAYLOAD/.venv;source $HELIX_WORKITEM_PAYLOAD/.venv/Scripts/activate;export PYTHONPATH=;pip3 install --user azure.storage.blob==12.0.0 --force-reinstall;pip3 install --user azure.storage.queue==12.0.0 --force-reinstall;export PERFLAB_UPLOAD_TOKEN="$(PerfCommandUploadTokenLinux)"' + - HelixPreCommandStemLinux: 'sudo apt-get -y install python3-venv;python3 -m venv $HELIX_WORKITEM_PAYLOAD/.venv;source $HELIX_WORKITEM_PAYLOAD/.venv/Scripts/activate;export PYTHONPATH=;pip3 install --user azure.storage.blob==12.0.0 --force-reinstall;pip3 install --user azure.storage.queue==12.0.0 --force-reinstall;sudo apt-get update;sudo apt -y install curl dirmngr apt-transport-https lsb-release ca-certificates;curl -sL https://deb.nodesource.com/setup_12.x | sudo -E bash -;sudo apt-get -y install nodejs;sudo apt-get -y install npm;npm install --prefix $HELIX_WORKITEM_PAYLOAD jsvu -g;$HELIX_WORKITEM_PAYLOAD/bin/jsvu --os=linux64 --engines=v8;export PERFLAB_UPLOAD_TOKEN="$(PerfCommandUploadTokenLinux)"' - ExtraMSBuildLogsWindows: 'set MSBUILDDEBUGCOMM=1;set "MSBUILDDEBUGPATH=%HELIX_WORKITEM_UPLOAD_ROOT%"' - ExtraMSBuildLogsLinux: 'export MSBUILDDEBUGCOMM=1;export "MSBUILDDEBUGPATH=$HELIX_WORKITEM_UPLOAD_ROOT"' - HelixPreCommand: '' @@ -71,7 +73,7 @@ jobs: - ${{ if eq( parameters.osGroup, 'Windows_NT') }}: - HelixPreCommand: $(ExtraMSBuildLogsWindows) - ${{ if ne(parameters.osGroup, 'Windows_NT') }}: - - HelixPreCommand: $(ExtraMSBuildLogsLinux) + - HelixPreCommand: $(ExtraMSBuildLogsLinux);npm install --prefix $HELIX_WORKITEM_PAYLOAD jsvu -g;$HELIX_WORKITEM_PAYLOAD/bin/jsvu --os=linux64 --engines=v8 - ${{ if and(eq(parameters.codeGenType, 'Interpreter'), eq(parameters.runtimeType, 'mono')) }}: @@ -90,6 +92,7 @@ jobs: - HelixPreCommand: 'export MONO_ENV_OPTIONS="--interpreter";$(ExtraMSBuildLogsLinux)' - Interpreter: ' --monointerpreter' + workspace: clean: all pool: @@ -102,24 +105,16 @@ jobs: _Framework: ${{ framework }} steps: - ${{ parameters.steps }} - - powershell: $(Build.SourcesDirectory)\eng\common\performance\performance-setup.ps1 $(IsInternal)$(Interpreter) -Framework $(_Framework) ${{ parameters.extraSetupParameters }} + - powershell: $(Build.SourcesDirectory)\eng\common\performance\performance-setup.ps1 $(IsInternal)$(Interpreter) -Framework $(_Framework) -Kind ${{ parameters.runKind }} ${{ parameters.extraSetupParameters }} displayName: Performance Setup (Windows) condition: and(succeeded(), eq(variables['Agent.Os'], 'Windows_NT')) continueOnError: ${{ parameters.continueOnError }} - - script: $(Build.SourcesDirectory)/eng/common/performance/performance-setup.sh $(IsInternal)$(Interpreter) --framework $(_Framework) ${{ parameters.extraSetupParameters }} + - script: $(Build.SourcesDirectory)/eng/common/performance/performance-setup.sh $(IsInternal)$(Interpreter) --framework $(_Framework) --kind ${{ parameters.runKind }} ${{ parameters.extraSetupParameters }} displayName: Performance Setup (Unix) condition: and(succeeded(), ne(variables['Agent.Os'], 'Windows_NT')) continueOnError: ${{ parameters.continueOnError }} - script: $(Python) $(PerformanceDirectory)/scripts/ci_setup.py $(SetupArguments) displayName: Run ci setup script - - script: xcopy $(PerformanceDirectory)\scripts $(WorkItemDirectory)\ScenarioCorrelation\scripts\/e && xcopy $(PerformanceDirectory)\src\scenarios\shared $(WorkItemDirectory)\ScenarioCorrelation\shared\/e && xcopy $(PerformanceDirectory)\src\scenarios\staticdeps $(WorkItemDirectory)\ScenarioCorrelation\staticdeps\/e - displayName: Copy scenario support files - condition: and(succeeded(), eq(variables['Agent.Os'], 'Windows_NT')) - - script: $(PerformanceDirectory)\tools\dotnet\$(Architecture)\dotnet publish -c Release -o $(WorkItemDirectory)\ScenarioCorrelation\Startup -f netcoreapp3.1 -r win-$(Architecture) $(PerformanceDirectory)\src\tools\ScenarioMeasurement\Startup\Startup.csproj - displayName: Build scenario tools - env: - PERFLAB_TARGET_FRAMEWORKS: netcoreapp3.1 - condition: and(succeeded(), eq(variables['Agent.Os'], 'Windows_NT')) # Run perf testing in helix - template: /eng/common/templates/steps/perf-send-to-helix.yml parameters: @@ -132,10 +127,11 @@ jobs: WorkItemTimeout: 4:00 # 4 hours WorkItemDirectory: '$(WorkItemDirectory)' # WorkItemDirectory can not be empty, so we send it some docs to keep it happy CorrelationPayloadDirectory: '$(PayloadDirectory)' # it gets checked out to a folder with shorter path than WorkItemDirectory so we can avoid file name too long exceptions + ProjectFile: ${{ parameters.projectFile }} - task: PublishPipelineArtifact@1 displayName: Publish Logs inputs: targetPath: $(Build.SourcesDirectory)/artifacts/log - artifactName: 'Performance_Run_$(osGroup)$(osSubgroup)_$(archType)_$(buildConfig)_${{ parameters.runtimeType }}_${{ parameters.codeGenType }}' + artifactName: 'Performance_Run_$(osGroup)$(osSubgroup)_$(archType)_$(buildConfig)_${{ parameters.runtimeType }}_${{ parameters.codeGenType }}_${{ parameters.runKind }}' continueOnError: true condition: always() diff --git a/eng/pipelines/coreclr/templates/run-scenarios-job.yml b/eng/pipelines/coreclr/templates/run-scenarios-job.yml new file mode 100644 index 000000000000..cd34ba2a405a --- /dev/null +++ b/eng/pipelines/coreclr/templates/run-scenarios-job.yml @@ -0,0 +1,162 @@ +parameters: + steps: [] # optional -- any additional steps that need to happen before pulling down the performance repo and sending the performance benchmarks to helix (ie building your repo) + variables: [] # optional -- list of additional variables to send to the template + jobName: '' # required -- job name + displayName: '' # optional -- display name for the job. Will use jobName if not passed + pool: '' # required -- name of the Build pool + container: '' # required -- name of the container + buildConfig: '' # required -- build configuration + archType: '' # required -- targeting CPU architecture + osGroup: '' # required -- operating system for the job + osSubgroup: '' # optional -- operating system subgroup + extraSetupParameters: '' # optional -- extra arguments to pass to the setup script + frameworks: ['netcoreapp3.0'] # optional -- list of frameworks to run against + continueOnError: 'false' # optional -- determines whether to continue the build if the step errors + dependsOn: '' # optional -- dependencies of the job + timeoutInMinutes: 320 # optional -- timeout for the job + enableTelemetry: false # optional -- enable for telemetry + liveLibrariesBuildConfig: '' # optional -- live-live libraries configuration to use for the run + runtimeType: 'coreclr' # optional -- Sets the runtime as coreclr or mono + codeGenType: 'JIT' # optional -- Decides on the codegen technology if running on mono + projectFile: '' # required -- project file to build helix workitems + runKind: '' # required -- test category + additionalSetupParameters: '' # optional -- additional setup parameters that are job-specific + +jobs: +- template: xplat-pipeline-job.yml + parameters: + dependsOn: ${{ parameters.dependsOn }} + buildConfig: ${{ parameters.buildConfig }} + archType: ${{ parameters.archType }} + osGroup: ${{ parameters.osGroup }} + osSubgroup: ${{ parameters.osSubgroup }} + liveLibrariesBuildConfig: ${{ parameters.liveLibrariesBuildConfig }} + enableTelemetry: ${{ parameters.enableTelemetry }} + enablePublishBuildArtifacts: true + continueOnError: ${{ parameters.continueOnError }} + + ${{ if ne(parameters.displayName, '') }}: + displayName: '${{ parameters.displayName }}' + ${{ if eq(parameters.displayName, '') }}: + displayName: '${{ parameters.jobName }}' + + timeoutInMinutes: ${{ parameters.timeoutInMinutes }} + + variables: + - ${{ each variable in parameters.variables }}: + - ${{ if ne(variable.name, '') }}: + - name: ${{ variable.name }} + value: ${{ variable.value }} + - ${{ if ne(variable.group, '') }}: + - group: ${{ variable.group }} + + - IsInternal: '' + - HelixApiAccessToken: '' + - SharedHelixPreCommands: '' + - AdditionalHelixPreCommands: '' + # run machine-setup and set PYTHONPATH for both public and private jobs + - ${{ if eq(parameters.osGroup, 'Windows_NT') }}: + - SharedHelixPreCommands: 'call %HELIX_WORKITEM_PAYLOAD%\machine-setup.cmd;set PYTHONPATH=%HELIX_WORKITEM_PAYLOAD%\scripts%3B%HELIX_WORKITEM_PAYLOAD%' + - ${{ if ne(parameters.osGroup, 'Windows_NT') }}: + - SharedHelixPreCommands: 'chmod +x $HELIX_WORKITEM_PAYLOAD/machine-setup.sh;. $HELIX_WORKITEM_PAYLOAD/machine-setup.sh;export PYTHONPATH=$HELIX_WORKITEM_PAYLOAD/scripts:$HELIX_WORKITEM_PAYLOAD' + + # extra private job settings + - ${{ if and(ne(variables['System.TeamProject'], 'public'), notin(variables['Build.Reason'], 'PullRequest')) }}: + - ${{ if eq(parameters.osGroup, 'Windows_NT') }}: + - AdditionalHelixPreCommands: 'py -3 -m venv %HELIX_WORKITEM_PAYLOAD%\.venv;call %HELIX_WORKITEM_PAYLOAD%\.venv\Scripts\activate.bat;set PYTHONPATH=;py -3 -m pip install --user azure.storage.blob==12.0.0 --force-reinstall;py -3 -m pip install --user azure.storage.queue==12.0.0 --force-reinstall;set "PERFLAB_UPLOAD_TOKEN=$(PerfCommandUploadToken)"' + - IsInternal: -Internal + - ${{ if ne(parameters.osGroup, 'Windows_NT') }}: + - AdditionalHelixPreCommands: 'sudo apt-get -y install python3-venv;python3 -m venv $HELIX_WORKITEM_PAYLOAD/.venv;source $HELIX_WORKITEM_PAYLOAD/.venv/Scripts/activate;export PYTHONPATH=;pip3 install --user azure.storage.blob==12.0.0 --force-reinstall;pip3 install --user azure.storage.queue==12.0.0 --force-reinstall;export PERFLAB_UPLOAD_TOKEN="$(PerfCommandUploadTokenLinux)"' + - IsInternal: --internal + - group: DotNet-HelixApi-Access + - group: dotnet-benchview + + workspace: + clean: all + pool: + ${{ parameters.pool }} + container: ${{ parameters.container }} + strategy: + matrix: + ${{ each framework in parameters.frameworks }}: + ${{ framework }}: + _Framework: ${{ framework }} + steps: + - ${{ parameters.steps }} + # run performance-setup + - powershell: $(Build.SourcesDirectory)\eng\common\performance\performance-setup.ps1 $(IsInternal) -Framework $(_Framework) -Kind ${{ parameters.runKind }} ${{ parameters.extraSetupParameters }} ${{ parameters.additionalSetupParameters }} + displayName: Performance Setup (Windows) + condition: and(succeeded(), eq(variables['Agent.Os'], 'Windows_NT')) + continueOnError: ${{ parameters.continueOnError }} + - script: $(Build.SourcesDirectory)/eng/common/performance/performance-setup.sh $(IsInternal) --framework $(_Framework) --kind ${{ parameters.runKind }} ${{ parameters.extraSetupParameters }} ${{ parameters.additionalSetupParameters }} + displayName: Performance Setup (Linux) + condition: and(succeeded(), ne(variables['Agent.Os'], 'Windows_NT')) + continueOnError: ${{ parameters.continueOnError }} + # parse Version.props to get the latest stable sdk version on master + - script: $(Python) scripts/parse_props.py --branch-name master # defaults to master branch of sdk so we can get the latest + displayName: Parse Version.props + workingDirectory: $(PerformanceDirectory) + continueOnError: ${{ parameters.continueOnError }} + # run ci-setup + - script: $(Python) $(PerformanceDirectory)\scripts\ci_setup.py $(DotnetVersion) $(SetupArguments) --install-dir $(PayloadDirectory)\dotnet --output-file $(WorkItemDirectory)\machine-setup.cmd + displayName: Run ci setup script (Windows) + condition: and(succeeded(), eq(variables['Agent.Os'], 'Windows_NT')) + - script: $(Python) $(PerformanceDirectory)/scripts/ci_setup.py $(DotnetVersion) $(SetupArguments) --install-dir $(PayloadDirectory)/dotnet --output-file $(WorkItemDirectory)/machine-setup.sh + displayName: Run ci setup script (Linux) + condition: and(succeeded(), ne(variables['Agent.Os'], 'Windows_NT')) + # copy wasm packs if running on wasm + - script: cp -r $(librariesDownloadDir)/BrowserWasm/artifacts/bin/microsoft.netcore.app.runtime.browser-wasm $(PayloadDirectory);cp -r $(librariesDownloadDir)/BrowserWasm/artifacts/bin/microsoft.netcore.app.ref $(PayloadDirectory) + displayName: Copy browserwasm and runtime ref packs + condition: and(succeeded(), eq('${{ parameters.runtimeType }}', 'wasm')) + # copy scenario support files + - script: xcopy $(PerformanceDirectory)\scripts $(WorkItemDirectory)\scripts\/e && xcopy $(PerformanceDirectory)\src\scenarios\shared $(WorkItemDirectory)\shared\/e && xcopy $(PerformanceDirectory)\src\scenarios\staticdeps $(WorkItemDirectory)\staticdeps\/e + displayName: Copy scenario support files (Windows) + condition: and(succeeded(), eq(variables['Agent.Os'], 'Windows_NT')) + - script: cp -r $(PerformanceDirectory)/scripts $(WorkItemDirectory)/scripts/ && cp -r $(PerformanceDirectory)/src/scenarios/shared $(WorkItemDirectory)/shared/ && cp -r $(PerformanceDirectory)/src/scenarios/staticdeps/ $(WorkItemDirectory)/staticdeps/ + displayName: Copy scenario support files (Linux) + condition: and(succeeded(), ne(variables['Agent.Os'], 'Windows_NT')) + # build Startup + - script: $(PayloadDirectory)\dotnet\dotnet.exe publish -c Release -o $(WorkItemDirectory)\Startup -f netcoreapp3.1 -r win-$(Architecture) $(PerformanceDirectory)\src\tools\ScenarioMeasurement\Startup\Startup.csproj + displayName: Build Startup tool (Windows) + env: + PERFLAB_TARGET_FRAMEWORKS: netcoreapp3.1 + condition: and(succeeded(), eq(variables['Agent.Os'], 'Windows_NT')) + - script: $(PayloadDirectory)/dotnet/dotnet publish -c Release -o $(WorkItemDirectory)/startup -f netcoreapp3.1 -r linux-$(Architecture) $(PerformanceDirectory)/src/tools/ScenarioMeasurement/Startup/Startup.csproj + displayName: Build Startup tool (Linux) + env: + PERFLAB_TARGET_FRAMEWORKS: netcoreapp3.1 + condition: and(succeeded(), ne(variables['Agent.Os'], 'Windows_NT')) + # build SizeOnDisk + - script: $(PayloadDirectory)\dotnet\dotnet.exe publish -c Release -o $(WorkItemDirectory)SOD -f netcoreapp3.1 -r win-$(Architecture) $(PerformanceDirectory)\src\tools\ScenarioMeasurement\SizeOnDisk\SizeOnDisk.csproj + displayName: Build SizeOnDisk tool (Windows) + env: + PERFLAB_TARGET_FRAMEWORKS: netcoreapp3.1 + condition: and(succeeded(), eq(variables['Agent.Os'], 'Windows_NT')) + - script: $(PayloadDirectory)/dotnet/dotnet publish -c Release -o $(WorkItemDirectory)/SOD -f netcoreapp3.1 -r linux-$(Architecture) $(PerformanceDirectory)/src/tools/ScenarioMeasurement/SizeOnDisk/SizeOnDisk.csproj + displayName: Build SizeOnDisk tool (Linux) + env: + PERFLAB_TARGET_FRAMEWORKS: netcoreapp3.1 + condition: and(succeeded(), ne(variables['Agent.Os'], 'Windows_NT')) + + # run perf testing in helix + - template: /eng/common/templates/steps/perf-send-to-helix.yml + parameters: + HelixSource: '$(HelixSourcePrefix)/$(Build.Repository.Name)/$(Build.SourceBranch)' # sources must start with pr/, official/, prodcon/, or agent/ + HelixType: 'test/performance/$(Kind)/$(_Framework)/$(Architecture)' + HelixAccessToken: $(HelixApiAccessToken) + HelixTargetQueues: $(Queue) + HelixPreCommands: '$(AdditionalHelixPreCommands);$(SharedHelixPreCommands)' # $(HelixPreCommands) should follow $(AdditionalHelixPreCommands) because PYTHONPATH is cleared by the former + Creator: $(Creator) + WorkItemTimeout: 4:00 # 4 hours + WorkItemDirectory: '$(WorkItemDirectory)' # contains scenario tools, shared python scripts, dotnet tool + CorrelationPayloadDirectory: '$(PayloadDirectory)' # contains performance repo and built product + ProjectFile: ${{ parameters.projectFile }} + + # publish logs + - task: PublishPipelineArtifact@1 + displayName: Publish Logs + inputs: + targetPath: $(Build.SourcesDirectory)/artifacts/log + artifactName: 'Performance_Run_$(osGroup)$(osSubgroup)_$(archType)_$(buildConfig)_${{ parameters.runtimeType }}_${{ parameters.codeGenType }}_${{ parameters.runKind }}' + continueOnError: true + condition: always() diff --git a/eng/pipelines/coreclr/templates/xplat-pipeline-job.yml b/eng/pipelines/coreclr/templates/xplat-pipeline-job.yml index 0e789b84e919..2e4e5122c94a 100644 --- a/eng/pipelines/coreclr/templates/xplat-pipeline-job.yml +++ b/eng/pipelines/coreclr/templates/xplat-pipeline-job.yml @@ -57,12 +57,6 @@ jobs: - name: testArtifactRootName value: ${{ parameters.Group }}${{ parameters.Subgroup }}_${{ parameters.archType }}_${{ parameters.buildConfig }} - - name: coreClrRepoRoot - value: '$(Build.SourcesDirectory)/src/coreclr' - - - name: coreClrRepoRootDir - value: '$(coreClrRepoRoot)$(dir)' - - name: binTestsPath value: '$(Build.SourcesDirectory)/artifacts/tests/coreclr' diff --git a/eng/pipelines/installer/jobs/base-job.yml b/eng/pipelines/installer/jobs/base-job.yml index b74bf18fb60c..9ad565067c74 100644 --- a/eng/pipelines/installer/jobs/base-job.yml +++ b/eng/pipelines/installer/jobs/base-job.yml @@ -132,7 +132,7 @@ jobs: - name: BaseJobBuildCommand value: >- - $(Build.SourcesDirectory)/build.sh -subset installer -ci + $(Build.SourcesDirectory)/build.sh -ci $(BuildAction) -configuration $(_BuildConfig) $(LiveOverridePathArgs) @@ -456,8 +456,29 @@ jobs: df -h displayName: Disk Usage before Build - - script: $(BaseJobBuildCommand) - displayName: Build + # Build the default subset non-MacOS platforms + - ${{ if ne(parameters.osGroup, 'OSX') }}: + - script: $(BaseJobBuildCommand) + displayName: Build + + # Build corehost, sign and add entitlements to MacOS binaries + - ${{ if eq(parameters.osGroup, 'OSX') }}: + - script: $(BaseJobBuildCommand) -subset corehost + displayName: Build CoreHost + + - ${{ if and(ne(variables['System.TeamProject'], 'public'), notin(variables['Build.Reason'], 'PullRequest')) }}: + - template: /eng/pipelines/common/macos-sign-with-entitlements.yml + parameters: + filesToSign: + - name: dotnet + path: $(Build.SourcesDirectory)/artifacts/bin/osx-${{ parameters.archType }}.$(_BuildConfig)/corehost + entitlementsFile: $(Build.SourcesDirectory)/eng/pipelines/common/entitlements.plist + - name: apphost + path: $(Build.SourcesDirectory)/artifacts/bin/osx-${{ parameters.archType }}.$(_BuildConfig)/corehost + entitlementsFile: $(Build.SourcesDirectory)/eng/pipelines/common/entitlements.plist + + - script: $(BaseJobBuildCommand) -subset installer.nocorehost + displayName: Build and Package - ${{ if in(parameters.osGroup, 'OSX', 'iOS','tvOS') }}: - script: | diff --git a/eng/pipelines/libraries/base-job.yml b/eng/pipelines/libraries/base-job.yml index bf5a9a2321fd..2ae778e89393 100644 --- a/eng/pipelines/libraries/base-job.yml +++ b/eng/pipelines/libraries/base-job.yml @@ -4,7 +4,7 @@ parameters: archType: '' osSubgroup: '' crossrootfsDir: '' - framework: '' + framework: 'net5.0' isOfficialAllConfigurations: false isSourceBuild: false liveRuntimeBuildConfig: '' @@ -25,10 +25,10 @@ parameters: jobs: - template: /eng/common/templates/job/job.yml parameters: - ${{ if notIn(parameters.framework, 'allConfigurations', 'net472') }}: + ${{ if notIn(parameters.framework, 'allConfigurations', 'net48') }}: displayName: ${{ format('Libraries {0} {1}{2} {3} {4}', parameters.displayName, parameters.osGroup, parameters.osSubgroup, parameters.archType, parameters.buildConfig) }} name: ${{ format('libraries_{0}_{1}{2}_{3}_{4}', parameters.name, parameters.osGroup, parameters.osSubgroup, parameters.archType, parameters.buildConfig) }} - ${{ if in(parameters.framework, 'allConfigurations', 'net472') }}: + ${{ if in(parameters.framework, 'allConfigurations', 'net48') }}: displayName: ${{ format('Libraries {0} {1} {2} {3} {4}', parameters.displayName, parameters.osGroup, parameters.framework, parameters.archType, parameters.buildConfig) }} name: ${{ format('libraries_{0}_{1}_{2}{3}_{4}_{5}', parameters.name, parameters.framework, parameters.osGroup, parameters.osSubgroup, parameters.archType, parameters.buildConfig) }} diff --git a/eng/pipelines/libraries/build-job.yml b/eng/pipelines/libraries/build-job.yml index f4b02cf06215..6a70952a3a22 100644 --- a/eng/pipelines/libraries/build-job.yml +++ b/eng/pipelines/libraries/build-job.yml @@ -4,7 +4,7 @@ parameters: osSubgroup: '' archType: '' crossrootfsDir: '' - framework: '' + framework: 'net5.0' isOfficialBuild: false isOfficialAllConfigurations: false runtimeVariant: '' @@ -62,10 +62,8 @@ jobs: - _subset: libs - _additionalBuildArguments: '' - ${{ parameters.variables }} - - ${{ if eq(parameters.osGroup, 'Browser') }}: - - EMSDK_PATH: /usr/local/emscripten - # Tests only run for 'allConfiguration' and 'net472' build-jobs + # Tests only run for 'allConfiguration' and 'net48' build-jobs # If platform is in testBuildPlatforms we build tests as well. - ${{ if or(eq(parameters.runTests, true), containsValue(parameters.testBuildPlatforms, parameters.platform)) }}: - _subset: libs+libs.tests diff --git a/eng/pipelines/libraries/helix-queues-setup.yml b/eng/pipelines/libraries/helix-queues-setup.yml index 58d1f89fe3c0..20ca92542d92 100644 --- a/eng/pipelines/libraries/helix-queues-setup.yml +++ b/eng/pipelines/libraries/helix-queues-setup.yml @@ -64,7 +64,7 @@ jobs: - ${{ if eq(parameters.jobParameters.isFullMatrix, false) }}: - Centos.7.Amd64.Open - RedHat.7.Amd64.Open - - Debian.9.Amd64.Open + - (Debian.10.Amd64.Open)ubuntu.1604.amd64.open@mcr.microsoft.com/dotnet-buildtools/prereqs:debian-10-helix-amd64-bfcd90a-20200121150006 - Ubuntu.1604.Amd64.Open - Ubuntu.1804.Amd64.Open - SLES.15.Amd64.Open @@ -78,17 +78,15 @@ jobs: - ${{ if eq(parameters.jobParameters.isFullMatrix, true) }}: - OSX.1013.Amd64.Open - OSX.1014.Amd64.Open - # The 10.15 machines aren't in the same configuration, see - # https://github.com/dotnet/runtime/issues/24736 - #- OSX.1015.Amd64.Open + - OSX.1015.Amd64.Open - ${{ if eq(parameters.jobParameters.isFullMatrix, false) }}: - - OSX.1013.Amd64.Open - OSX.1014.Amd64.Open + - OSX.1015.Amd64.Open # Windows_NT x64 - ${{ if eq(parameters.platform, 'Windows_NT_x64') }}: # netcoreapp - - ${{ if notIn(parameters.jobParameters.framework, 'allConfigurations', 'net472') }}: + - ${{ if notIn(parameters.jobParameters.framework, 'allConfigurations', 'net48') }}: - ${{ if eq(parameters.jobParameters.isFullMatrix, true) }}: - Windows.81.Amd64.Open - Windows.10.Amd64.ServerRS5.Open @@ -101,8 +99,8 @@ jobs: - ${{ if ne(parameters.jobParameters.runtimeFlavor, 'mono') }}: - (Windows.Nano.1809.Amd64.Open)windows.10.amd64.serverrs5.open@mcr.microsoft.com/dotnet-buildtools/prereqs:nanoserver-1809-helix-amd64-08e8e40-20200107182504 - # NET472 - - ${{ if eq(parameters.jobParameters.framework, 'net472') }}: + # .NETFramework + - ${{ if eq(parameters.jobParameters.framework, 'net48') }}: - Windows.10.Amd64.Client19H1.Open # AllConfigurations @@ -112,7 +110,7 @@ jobs: # Windows_NT x86 - ${{ if eq(parameters.platform, 'Windows_NT_x86') }}: # netcoreapp - - ${{ if notIn(parameters.jobParameters.framework, 'allConfigurations', 'net472') }}: + - ${{ if notIn(parameters.jobParameters.framework, 'allConfigurations', 'net48') }}: - ${{ if eq(parameters.jobParameters.isFullMatrix, true) }}: - Windows.7.Amd64.Open - Windows.10.Amd64.ServerRS5.Open @@ -124,8 +122,8 @@ jobs: - Windows.7.Amd64.Open - Windows.10.Amd64.Server19H1.Open - # NET472 - - ${{ if eq(parameters.jobParameters.framework, 'net472') }}: + # .NETFramework + - ${{ if eq(parameters.jobParameters.framework, 'net48') }}: - Windows.10.Amd64.Client19H1.Open # Windows_NT arm diff --git a/eng/pipelines/libraries/outerloop.yml b/eng/pipelines/libraries/outerloop.yml index 9afbf206e5f3..d88481ea37dd 100644 --- a/eng/pipelines/libraries/outerloop.yml +++ b/eng/pipelines/libraries/outerloop.yml @@ -99,6 +99,6 @@ jobs: jobParameters: isOfficialBuild: ${{ variables['isOfficialBuild'] }} isFullMatrix: ${{ variables['isFullMatrix'] }} - framework: net472 + framework: net48 runTests: true testScope: outerloop \ No newline at end of file diff --git a/eng/pipelines/libraries/run-test-job.yml b/eng/pipelines/libraries/run-test-job.yml index 6c5e397d4dd0..58bcd933f3da 100644 --- a/eng/pipelines/libraries/run-test-job.yml +++ b/eng/pipelines/libraries/run-test-job.yml @@ -3,7 +3,7 @@ parameters: osGroup: '' osSubgroup: '' archType: '' - framework: '' + framework: 'net5.0' isOfficialBuild: false liveRuntimeBuildConfig: '' runtimeFlavor: 'coreclr' @@ -55,7 +55,7 @@ jobs: - ${{ if ne(parameters.dependsOn[0], '') }}: - ${{ parameters.dependsOn }} - ${{ if eq(parameters.dependsOn[0], '') }}: - - ${{ if notIn(parameters.framework, 'allConfigurations', 'net472') }}: + - ${{ if notIn(parameters.framework, 'allConfigurations', 'net48') }}: - ${{ format('libraries_build_{0}{1}_{2}_{3}', parameters.osGroup, parameters.osSubgroup, parameters.archType, parameters.buildConfig) }} # tests are built as part of product build - ${{ if or(ne(parameters.archType, parameters.dependsOnTestArchitecture), ne(parameters.buildConfig, parameters.dependsOnTestBuildConfiguration)) }}: @@ -143,10 +143,7 @@ jobs: - jitstress2 - jitstress2_tiered - zapdisable - # tailcallstress currently has hundreds of failures on Linux/arm32, so disable it. - # Tracked by https://github.com/dotnet/runtime/issues/38892. - - ${{ if or(eq(parameters.osGroup, 'Windows_NT'), ne(parameters.archType, 'arm')) }}: - - tailcallstress + - tailcallstress ${{ if in(parameters.coreclrTestGroup, 'jitstressregs' ) }}: scenarios: - jitstressregs1 diff --git a/eng/pipelines/mono/templates/build-job.yml b/eng/pipelines/mono/templates/build-job.yml index 901bab9a0f66..e46ca5ee005f 100644 --- a/eng/pipelines/mono/templates/build-job.yml +++ b/eng/pipelines/mono/templates/build-job.yml @@ -68,8 +68,6 @@ jobs: - name: osOverride value: -os Android - ${{ if eq(parameters.osGroup, 'Browser') }}: - - name: EMSDK_PATH - value: /usr/local/emscripten - name: archType value: wasm - name: osOverride diff --git a/eng/pipelines/mono/templates/xplat-job.yml b/eng/pipelines/mono/templates/xplat-job.yml index 28971ca101ba..0cb275692c5c 100644 --- a/eng/pipelines/mono/templates/xplat-job.yml +++ b/eng/pipelines/mono/templates/xplat-job.yml @@ -79,12 +79,6 @@ jobs: - name: osSubgroup value: ${{ parameters.osSubgroup }} - - name: coreClrRepoRoot - value: '$(Build.SourcesDirectory)/src/coreclr' - - - name: coreClrRepoRootDir - value: '$(coreClrRepoRoot)$(dir)' - - ${{ if and(eq(variables['System.TeamProject'], 'internal'), ne(variables['Build.Reason'], 'PullRequest')) }}: - name: _HelixSource value: official/dotnet/runtime/$(Build.SourceBranch) diff --git a/eng/pipelines/runtime-linker-tests.yml b/eng/pipelines/runtime-linker-tests.yml index ceffb1e237e7..bb51f3c0c736 100644 --- a/eng/pipelines/runtime-linker-tests.yml +++ b/eng/pipelines/runtime-linker-tests.yml @@ -64,6 +64,7 @@ jobs: - Linux_x64 jobParameters: testGroup: innerloop + timeoutInMinutes: 120 nameSuffix: Runtime_Release buildArgs: -s clr+libs -c $(_BuildConfig) extraStepsTemplate: /eng/pipelines/libraries/execute-trimming-tests-steps.yml diff --git a/eng/pipelines/runtime-official.yml b/eng/pipelines/runtime-official.yml index e1a3f8e7eefb..619db5a33c99 100644 --- a/eng/pipelines/runtime-official.yml +++ b/eng/pipelines/runtime-official.yml @@ -139,7 +139,8 @@ stages: buildConfig: release runtimeFlavor: mono jobParameters: - buildArgs: -s mono+libs+installer -c $(_BuildConfig) /p:MonoEnableLLVM=true + buildArgs: -s mono+libs+installer -c $(_BuildConfig) + /p:MonoEnableLLVM=true /p:MonoBundleLLVMOptimizer=false nameSuffix: AllSubsets_Mono_LLVMJIT runtimeVariant: LLVMJIT isOfficialBuild: ${{ variables.isOfficialBuild }} diff --git a/eng/pipelines/runtime.yml b/eng/pipelines/runtime.yml index 831fa959900c..55911d981203 100644 --- a/eng/pipelines/runtime.yml +++ b/eng/pipelines/runtime.yml @@ -226,7 +226,8 @@ jobs: jobTemplate: /eng/pipelines/coreclr/templates/format-job.yml platforms: - Linux_x64 - - Windows_NT_x64 + # Isssue: https://github.com/dotnet/runtime/issues/40034 + #- Windows_NT_x64 jobParameters: condition: >- and( @@ -339,6 +340,44 @@ jobs: eq(variables['monoContainsChange'], true), eq(variables['isFullMatrix'], true)) +# +# Build the whole product using Mono and run runtime tests +# +- template: /eng/pipelines/common/platform-matrix.yml + parameters: + jobTemplate: /eng/pipelines/common/global-build-job.yml + helixQueuesTemplate: /eng/pipelines/coreclr/templates/helix-queues-setup.yml + buildConfig: Release + runtimeFlavor: mono + platforms: + - Browser_wasm + variables: + - ${{ if and(eq(variables['System.TeamProject'], 'public'), eq(variables['Build.Reason'], 'PullRequest')) }}: + - name: _HelixSource + value: pr/dotnet/runtime/$(Build.SourceBranch) + - ${{ if and(eq(variables['System.TeamProject'], 'public'), ne(variables['Build.Reason'], 'PullRequest')) }}: + - name: _HelixSource + value: ci/dotnet/runtime/$(Build.SourceBranch) + - name: timeoutPerTestInMinutes + value: 10 + - name: timeoutPerTestCollectionInMinutes + value: 200 + jobParameters: + testGroup: innerloop + nameSuffix: AllSubsets_Mono_RuntimeTests + buildArgs: -s mono+libs -c $(_BuildConfig) + timeoutInMinutes: 180 + condition: >- + or( + eq(dependencies.checkout.outputs['SetPathVars_runtimetests.containsChange'], true), + eq(dependencies.checkout.outputs['SetPathVars_mono.containsChange'], true), + eq(variables['isFullMatrix'], true)) + # extra steps, run tests + extraStepsTemplate: /eng/pipelines/common/templates/runtimes/wasm-runtime-and-send-to-helix.yml + extraStepsParameters: + creator: dotnet-bot + testRunNamePrefixSuffix: Mono_$(_BuildConfig) + # # Build Mono and Installer on LLVMJIT mode # @@ -576,7 +615,7 @@ jobs: helixQueuesTemplate: /eng/pipelines/libraries/helix-queues-setup.yml jobParameters: isFullMatrix: ${{ variables.isFullMatrix }} - framework: net472 + framework: net48 runTests: true testScope: innerloop condition: >- diff --git a/eng/pipelines/runtimelab.yml b/eng/pipelines/runtimelab.yml index a58307c71263..04d9e8ad34c1 100644 --- a/eng/pipelines/runtimelab.yml +++ b/eng/pipelines/runtimelab.yml @@ -106,7 +106,7 @@ jobs: jobTemplate: /eng/pipelines/common/templates/runtimes/build-test-job.yml buildConfig: Checked platforms: - - Linux_x64 + - CoreClrTestBuildHost # Either OSX_x64 or Linux_x64 jobParameters: testGroup: innerloop liveLibrariesBuildConfig: Release diff --git a/eng/referenceAssemblies.props b/eng/referenceAssemblies.props index a5e0e553f12f..4aabb96b8999 100644 --- a/eng/referenceAssemblies.props +++ b/eng/referenceAssemblies.props @@ -1,9 +1,12 @@ - $(AdditionalBuildTargetFrameworks);netstandard2.0 - $(AdditionalBuildTargetFrameworks);netstandard2.1 + !$(BuildTargetFramework.StartsWith('netstandard')) and + !$(BuildTargetFramework.StartsWith('net4'))"> + $(AdditionalBuildTargetFrameworks);netstandard2.0 + $(AdditionalBuildTargetFrameworks);netstandard2.1 + + + false diff --git a/eng/referenceFromRuntime.targets b/eng/referenceFromRuntime.targets deleted file mode 100644 index 34d0eb49199c..000000000000 --- a/eng/referenceFromRuntime.targets +++ /dev/null @@ -1,124 +0,0 @@ - - - - AddRuntimeProjectReference; - $(PrepareProjectReferencesDependsOn); - - - AddRuntimeProjectReference; - $(ResolveReferencesDependsOn); - - - AddRuntimeProjectReference; - $(CleanDependsOn) - - - - - $([MSBuild]::NormalizePath('$(LibrariesProjectRoot)', 'restore', 'runtime', 'runtime.depproj')) - - - - - - - - false - _referencePathFromRestoredRuntime - - - - - - - - - - - - - - - - - - - - - - - - - <_referencePathFromRuntime Include="@(RuntimeFiles)" Private="false" /> - <_referencePathFromRuntime Include="@(_referencePathFromRestoredRuntime)" Private="false" /> - - <_referencePathFromRuntime Include="@(ReferenceFromRuntime->'$(RuntimePath)%(Identity).dll')" Condition="'$(IsTestProject)' == 'true' or '$(IsTestSupportProject)' == 'true'" /> - - <_referencePathFromRuntimeByFileName Include="@(_referencePathFromRuntime->'%(FileName)')" Condition="'%(_referencePathFromRuntime.Extension)' == '.dll'" > - %(Identity) - - - - - - - - - - - <_filteredReferencePathFromRuntimeByFileName Include="@(_referencePathFromRuntimeByFileNameFiltered)" - Condition="'@(_referencePathFromRuntimeByFileNameFiltered)' == '@(ReferenceFromRuntime)' and '%(Identity)' != ''"> - @(ReferenceFromRuntime->'%(Aliases)') - - - <_remainingReferenceFromRuntime Include="@(ReferenceFromRuntime)" Exclude="@(_filteredReferencePathFromRuntimeByFileName)" /> - - - <_remainingReferenceFromRuntimeWithNI Include="@(_remainingReferenceFromRuntime->'%(Identity).ni')"> - %(Identity) - - - <_filteredReferencePathFromRuntimeByFileName Include="@(_referencePathFromRuntimeByFileNameFiltered)" - Condition="'@(_referencePathFromRuntimeByFileNameFiltered)' == '@(_remainingReferenceFromRuntimeWithNI)' and '%(Identity)' != ''"> - @(_remainingReferenceFromRuntimeWithNI->'%(Aliases)') - - - <_missingReferenceFromRuntime Include="@(_remainingReferenceFromRuntimeWithNI)" Exclude="@(_filteredReferencePathFromRuntimeByFileName)" /> - - - - - - - - <_aliasedReferencePathFromRuntime Include="@(_filteredReferencePathFromRuntimeByFileName->'%(ReferencePath)')" Condition="'%(_filteredReferencePathFromRuntimeByFileName.Aliases)' != ''" /> - - - - - - - - - - - - - - diff --git a/eng/references.targets b/eng/references.targets index efcac7d05152..38037cf78ab0 100644 --- a/eng/references.targets +++ b/eng/references.targets @@ -1,67 +1,56 @@ - $(RefPath) - $(RefPath) - $(AssemblySearchPaths);$(RefPath);{RawFileName} <_FindDependencies>false - - <_TargetFrameworkDirectories>$(MSBuildThisFileDirectory) - <_FullFrameworkReferenceAssemblyPaths>$(MSBuildThisFileDirectory) - - - true - true - - - - - - - - - - - - - - false - - - + + + + - - - + + + + - - - + + + + false + + - - %(DefaultReferenceDirs.Identity) - - <_defaultReferenceExclusionsFullPath Include="%(DefaultReferenceExclusions.RefDir)%(DefaultReferenceExclusions.Identity).dll" /> - - - + + + + <_transitiveProjectReferenceWithExclusion Include="@(ProjectReference)"> + %(DefaultReferenceExclusion.Identity) + + - + + - + - - \ No newline at end of file + diff --git a/eng/resolveContract.targets b/eng/resolveContract.targets index 2f59f5d9a517..6f4bb0e253ee 100644 --- a/eng/resolveContract.targets +++ b/eng/resolveContract.targets @@ -1,8 +1,18 @@ + + $(MicrosoftNetCoreAppRefPackRefDir) + + $(ContractDependencyPaths);@(ReferencePath->'%(RelativeDir)'->Distinct()) + + - $(LibrariesProjectRoot)$(MSBuildProjectName)/ref/$(MSBuildProjectName).csproj + $(LibrariesProjectRoot)$(MSBuildProjectName)\ref\$(MSBuildProjectName).csproj true - $(RefPath)/$(MSBuildProjectName).dll + $(NetCoreAppCurrentRefPath)$(TargetFileName) + $([MSBuild]::NormalizePath('$(BaseOutputPath)', 'ref', '$(TargetFramework)-$(Configuration)', '$(TargetFileName)')) false @@ -10,8 +20,43 @@ + + + false + ResolvedMatchingContract + + + + + + + + + + + + + false + + + + <_resolvedP2PFiltered Include="@(ProjectReference)"> + $([System.IO.Path]::GetFullPath('%(ProjectReference.Identity)')) + %(ProjectReference.SkipUseReferenceAssembly) + + <_ResolvedProjectReferencePaths Update="@(_resolvedProjectReferenceFiltred)" + Condition="'%(_resolvedP2PFiltered.ProjectReferenceItemSpec)' == '%(_resolvedP2PFiltered.MSBuildSourceProjectFile)' and + '%(_resolvedP2PFiltered.SkipUseReferenceAssembly)' == 'true'" + ReferenceAssembly="" /> + + \ No newline at end of file diff --git a/eng/restore/repoRestore.props b/eng/restore/repoRestore.props index 300c542e8cbe..d2ca92d6db01 100644 --- a/eng/restore/repoRestore.props +++ b/eng/restore/repoRestore.props @@ -1,5 +1,6 @@ + $(RepoRoot)artifacts\toolset\Common\ false unused diff --git a/eng/targetingpacks.targets b/eng/targetingpacks.targets new file mode 100644 index 000000000000..404f9552d073 --- /dev/null +++ b/eng/targetingpacks.targets @@ -0,0 +1,111 @@ + + + <_UseLocalTargetingRuntimePack>true + false + + + + <_ShortFrameworkIdentifier>$(TargetFramework.TrimEnd('.0123456789')) + <_ShortFrameworkVersion>$(TargetFramework.Substring($(_ShortFrameworkIdentifier.Length))) + + + + + + + $(PkgMicrosoft_NETCore_App)\ref\$(_ShortFrameworkIdentifier)$(_ShortFrameworkVersion)\ + + + + + + + + + + + false + + + + + + + %(ResolvedFrameworkReference.TargetingPackPath)\ref\$(_ShortFrameworkIdentifier)$(_ShortFrameworkVersion)\ + + + + + + + + + + $(AssemblySearchPaths);$(MicrosoftNetCoreAppRefPackRefDir.TrimEnd('/\')) + $(DesignTimeAssemblySearchPaths);$(MicrosoftNetCoreAppRefPackRefDir.TrimEnd('/\')) + + + + + + + + + + + + + + + + + + + + + + + <_targetingPackReferenceExclusion Include="$(TargetName)" /> + <_targetingPackReferenceExclusion Include="@(_ResolvedProjectReferencePaths->'%(Filename)')" /> + <_targetingPackReferenceExclusion Include="@(DefaultReferenceExclusion)" /> + + + + <_targetingPackReferenceWithExclusion Include="@(Reference)"> + %(_targetingPackReferenceExclusion.Identity) + + + + + \ No newline at end of file diff --git a/eng/testing/.runsettings b/eng/testing/.runsettings index fabc0310a7aa..cf00c4a46737 100644 --- a/eng/testing/.runsettings +++ b/eng/testing/.runsettings @@ -18,10 +18,6 @@ $$TESTCASEFILTER$$ $$DOTNETHOSTPATH$$ - - - $$DEVPATH$$ - diff --git a/eng/testing/RunnerTemplate.cmd b/eng/testing/RunnerTemplate.cmd index fe28e9157a6e..10737eabe709 100644 --- a/eng/testing/RunnerTemplate.cmd +++ b/eng/testing/RunnerTemplate.cmd @@ -41,12 +41,6 @@ set EXECUTION_DIR=%~dp0 :argparser_end -if not defined RUNTIME_PATH ( - echo error: -r^|--runtime-path argument is required. - call :usage - exit /b -1 -) - :: Don't use a globally installed SDK. set DOTNET_MULTILEVEL_LOOKUP=0 diff --git a/eng/testing/linker/SupportFiles/Directory.Build.targets b/eng/testing/linker/SupportFiles/Directory.Build.targets index 49422a33b9be..a9dfda8012d9 100644 --- a/eng/testing/linker/SupportFiles/Directory.Build.targets +++ b/eng/testing/linker/SupportFiles/Directory.Build.targets @@ -20,11 +20,14 @@ + BeforeTargets="PrepareForILLink"> link + + + diff --git a/eng/testing/linker/project.csproj.template b/eng/testing/linker/project.csproj.template index ef63096e8019..9e1501f1b98d 100644 --- a/eng/testing/linker/project.csproj.template +++ b/eng/testing/linker/project.csproj.template @@ -6,9 +6,24 @@ {RuntimeIdentifier} {RuntimePackDir} {TargetingPackDir} - <_ExtraTrimmerArgs>{ExtraTrimmerArgs} $(_ExtraTrimmerArgs) + + + IL2026 + + $(LinkerNoWarn);IL2032;IL2055;IL2057;IL2058;IL2059;IL2060;IL2061 + + $(LinkerNoWarn);IL2062;IL2063;IL2064;IL2065;IL2066 + + $(LinkerNoWarn);IL2067;IL2068;IL2069;IL2070;IL2071;IL2072;IL2073;IL2074;IL2075;IL2076;IL2077;IL2078;IL2079;IL2080;IL2081;IL2082;IL2083;IL2084;IL2085;IL2086;IL2087;IL2088;IL2089;IL2090;IL2091 + + $(LinkerNoWarn);IL2008;IL2009;IL2037 + <_ExtraTrimmerArgs>{ExtraTrimmerArgs} $(_ExtraTrimmerArgs) --nowarn $(LinkerNoWarn) + + {AdditionalProjectReferences} + + @@ -28,4 +43,4 @@ GeneratePathProperty="true" /> - \ No newline at end of file + diff --git a/eng/testing/linker/trimmingTests.targets b/eng/testing/linker/trimmingTests.targets index 3729bd306bca..6d89002d9e0c 100644 --- a/eng/testing/linker/trimmingTests.targets +++ b/eng/testing/linker/trimmingTests.targets @@ -57,6 +57,15 @@ <_projectSourceFile>%(TestConsoleApps.ProjectCompileItems) + + <_additionalProjectReferenceTemp Include="$(AdditionalProjectReferences)" /> + <_additionalProjectReference Include="<ProjectReference Include="$(LibrariesProjectRoot)%(_additionalProjectReferenceTemp.Identity)\src\%(_additionalProjectReferenceTemp.Identity).csproj" SkipUseReferenceAssembly="true" />" /> + + + + <_additionalProjectReferencesString>@(_additionalProjectReference, '%0a') + + <_additionalProjectSourceFiles Include="%(TestConsoleApps.AdditionalSourceFiles)" /> @@ -69,7 +78,8 @@ .Replace('{TargetingPackDir}','$(MicrosoftNetCoreAppRefPackDir)') .Replace('{RuntimeIdentifier}','%(TestConsoleApps.TestRuntimeIdentifier)') .Replace('{MicrosoftNETILLinkTasksVersion}', '$(MicrosoftNETILLinkTasksVersion)') - .Replace('{ExtraTrimmerArgs}', '%(TestConsoleApps.ExtraTrimmerArgs)'))" + .Replace('{ExtraTrimmerArgs}', '%(TestConsoleApps.ExtraTrimmerArgs)') + .Replace('{AdditionalProjectReferences}', '$(_additionalProjectReferencesString)'))" Overwrite="true" /> diff --git a/eng/testing/netfx.exe.config b/eng/testing/netfx.exe.config index ed7d7d082438..e131497fc761 100644 --- a/eng/testing/netfx.exe.config +++ b/eng/testing/netfx.exe.config @@ -1,7 +1,6 @@ - \ No newline at end of file diff --git a/eng/testing/outerBuild.targets b/eng/testing/outerBuild.targets index 4623b0e54d25..c071944c21d9 100644 --- a/eng/testing/outerBuild.targets +++ b/eng/testing/outerBuild.targets @@ -1,8 +1,12 @@ + Targets="Test"> + + + + \ No newline at end of file diff --git a/eng/testing/runsettings.targets b/eng/testing/runsettings.targets index 10496127b8a9..5a2c9a84d689 100644 --- a/eng/testing/runsettings.targets +++ b/eng/testing/runsettings.targets @@ -36,7 +36,6 @@ .Replace('$$DISABLEPARALLELIZATION$$', '$([MSBuild]::ValueOrDefault('$(TestDisableParallelization)', 'false'))') .Replace('$$DISABLEAPPDOMAIN$$', '$([MSBuild]::ValueOrDefault('$(TestDisableAppDomain)', 'false'))') .Replace('$$TESTCASEFILTER$$', '$(_testFilter)') - .Replace('$$DEVPATH$$', '$(TestHostRootPath)') .Replace('$$DOTNETHOSTPATH$$', '$(TestHostRootPath)$([System.IO.Path]::GetFileName('$(DotNetTool)'))')) diff --git a/eng/testing/runtimeConfiguration.targets b/eng/testing/runtimeConfiguration.targets index f687adfad40f..d69182992c7b 100644 --- a/eng/testing/runtimeConfiguration.targets +++ b/eng/testing/runtimeConfiguration.targets @@ -1,45 +1,31 @@ - - $(MSBuildThisFileDirectory)netfx.exe.config - - true + true + true + true + $(MSBuildThisFileDirectory)netfx.exe.config + + $(TargetPath).config - - + - - - - - - - \ No newline at end of file diff --git a/eng/testing/tests.mobile.targets b/eng/testing/tests.mobile.targets index 5622b19e249b..039b0d9a39b3 100644 --- a/eng/testing/tests.mobile.targets +++ b/eng/testing/tests.mobile.targets @@ -132,43 +132,43 @@ + + - + - - - - - - - - - - - - - - - - - - - - - - - + AssemblySearchPaths="@(AssemblySearchPaths)" /> + + + + + + 2020a + + + + + + + - - - - - - - - - - - - + DependsOnTargets="Publish;BundleTestAppleApp;BundleTestAndroidApp;BundleTestWasmApp;BundleWasmTestData;ArchiveTests" /> diff --git a/eng/testing/tests.props b/eng/testing/tests.props index bf2f93fcceb3..65664f7f7499 100644 --- a/eng/testing/tests.props +++ b/eng/testing/tests.props @@ -29,8 +29,6 @@ $(PackageRID) true - false - diff --git a/eng/testing/tests.targets b/eng/testing/tests.targets index 757ff0163600..2fe669a10ba0 100644 --- a/eng/testing/tests.targets +++ b/eng/testing/tests.targets @@ -19,11 +19,6 @@ $(RunScriptHostDir)dotnet - - - - - PrepareForRun @@ -96,7 +91,10 @@ - "$(RunScriptOutputPath)" --runtime-path "$(TestHostRootPath.TrimEnd('\/'))" + "$(RunScriptOutputPath)" + + $(RunTestsCommand) --runtime-path "$(TestHostRootPath.TrimEnd('\/'))" $(RunTestsCommand) --rsp-file "$(TestRspFile)" "$(RunScriptOutputPath)" $(AssemblyName) $(TargetArchitecture) "$(RunScriptOutputPath)" $(JSEngine) $(AssemblyName).dll $(_withoutCategories.Replace(';', ' -notrait category=')) diff --git a/eng/testing/xunit/xunit.console.targets b/eng/testing/xunit/xunit.console.targets index 6364d461124c..5b71327e9645 100644 --- a/eng/testing/xunit/xunit.console.targets +++ b/eng/testing/xunit/xunit.console.targets @@ -43,19 +43,17 @@ - - <_testRunnerConfigSourceFile Include="$(TargetDir)$(TargetName).exe.config" /> <_testRunnerConfigDestFile Include="$(TargetDir)xunit.console.exe.config" /> - - diff --git a/eng/testing/xunit/xunit.targets b/eng/testing/xunit/xunit.targets index b14d3b7cbf22..feb429aab6bf 100644 --- a/eng/testing/xunit/xunit.targets +++ b/eng/testing/xunit/xunit.targets @@ -1,4 +1,11 @@ + + + + + $(OutDir) diff --git a/eng/versioning.targets b/eng/versioning.targets index 1eba158bb6be..efb7af48b76c 100644 --- a/eng/versioning.targets +++ b/eng/versioning.targets @@ -22,13 +22,25 @@ - + - - <_Parameter1>$(MinimiumSupportedWindowsPlatform) + + <_Parameter1>windows + + <_unsupportedOSPlatforms Include="$(UnsupportedOSPlatforms)" /> + + + + + + <_Parameter1>%(_unsupportedOSPlatforms.Identity) + + + + diff --git a/global.json b/global.json index 64916dd7429c..30a1da02a1ea 100644 --- a/global.json +++ b/global.json @@ -12,10 +12,10 @@ "python3": "3.7.1" }, "msbuild-sdks": { - "Microsoft.DotNet.Build.Tasks.TargetFramework.Sdk": "5.0.0-beta.20364.3", - "Microsoft.DotNet.Arcade.Sdk": "5.0.0-beta.20364.3", - "Microsoft.DotNet.Build.Tasks.SharedFramework.Sdk": "5.0.0-beta.20364.3", - "Microsoft.DotNet.Helix.Sdk": "5.0.0-beta.20364.3", + "Microsoft.DotNet.Build.Tasks.TargetFramework.Sdk": "5.0.0-beta.20419.21", + "Microsoft.DotNet.Arcade.Sdk": "5.0.0-beta.20419.21", + "Microsoft.DotNet.Build.Tasks.SharedFramework.Sdk": "5.0.0-beta.20419.21", + "Microsoft.DotNet.Helix.Sdk": "5.0.0-beta.20419.21", "Microsoft.FIX-85B6-MERGE-9C38-CONFLICT": "1.0.0", "Microsoft.NET.Sdk.IL": "5.0.0-preview.8.20359.4", "Microsoft.Build.NoTargets": "1.0.53", diff --git a/src/coreclr/CMakeLists.txt b/src/coreclr/CMakeLists.txt index 0d60cf16fc10..b94eb54a6715 100644 --- a/src/coreclr/CMakeLists.txt +++ b/src/coreclr/CMakeLists.txt @@ -1,4 +1,4 @@ -cmake_minimum_required(VERSION 3.14.5) +cmake_minimum_required(VERSION 3.6.2) cmake_policy(SET CMP0042 NEW) diff --git a/src/coreclr/build-runtime.cmd b/src/coreclr/build-runtime.cmd index aa9573d790cb..628f4eefbff2 100644 --- a/src/coreclr/build-runtime.cmd +++ b/src/coreclr/build-runtime.cmd @@ -550,7 +550,7 @@ if %__BuildNative% EQU 1 ( if /i "%__BuildArch%" == "arm64" goto SkipCopyUcrt - set "__UCRTDir=%UniversalCRTSDKDIR%Redist\ucrxxxt\DLLs\%__BuildArch%\" + set "__UCRTDir=%UniversalCRTSDKDIR%Redist\ucrt\DLLs\%__BuildArch%\" if not exist "!__UCRTDir!" set "__UCRTDir=%UniversalCRTSDKDIR%Redist\%UCRTVersion%\ucrt\DLLs\%__BuildArch%\" if not exist "!__UCRTDir!" ( echo %__ErrMsgPrefix%%__MsgPrefix%Error: Please install the Redistributable Universal C Runtime. diff --git a/src/coreclr/build-test.sh b/src/coreclr/build-test.sh index b67fbef45f92..9a8a471a5655 100755 --- a/src/coreclr/build-test.sh +++ b/src/coreclr/build-test.sh @@ -34,7 +34,6 @@ build_test_wrappers() nextCommand="\"${__DotNetCli}\" msbuild \"${__ProjectDir}/tests/src/runtest.proj\" /nodereuse:false /p:BuildWrappers=true /p:TestBuildMode=$__TestBuildMode /p:TargetsWindows=false $__Logging /p:TargetOS=$__TargetOS /p:Configuration=$__BuildType /p:TargetArchitecture=$__BuildArch /p:RuntimeFlavor=$__RuntimeFlavor \"/bl:${__RepoRootDir}/artifacts/log/${__BuildType}/build_test_wrappers_${__RuntimeFlavor}.binlog\" ${__UnprocessedBuildArgs[@]}" eval $nextCommand - local exitCode="$?" if [[ "$exitCode" -ne 0 ]]; then echo "${__ErrMsgPrefix}${__MsgPrefix}Error: XUnit wrapper build failed. Refer to the build log files for details (above)" @@ -368,7 +367,7 @@ build_Tests() fi fi - if [[ "$__SkipNative" != 1 ]]; then + if [[ "$__SkipNative" != 1 && "$__BuildArch" != "wasm" ]]; then build_native "$__BuildArch" "$__TestDir" "$__ProjectRoot" "$__NativeTestIntermediatesDir" "CoreCLR test component" if [[ "$?" -ne 0 ]]; then @@ -404,7 +403,7 @@ build_Tests() if [[ "$__CopyNativeTestBinaries" == 1 ]]; then echo "Copying native test binaries to output..." - build_MSBuild_projects "Tests_Managed" "$__ProjectDir/tests/build.proj" "Managed tests build (build tests)" "/t:CopyAllNativeProjectReferenceBinaries" + build_MSBuild_projects "Tests_Managed" "$__ProjectDir/tests/build.proj" "Managed tests build (build tests)" "/t:CopyAllNativeProjectReferenceBinaries" "/bl:${__RepoRootDir}/artifacts/log/${__BuildType}/copy_native_test_binaries${__RuntimeFlavor}.binlog" if [[ "$?" -ne 0 ]]; then echo "${__ErrMsgPrefix}${__MsgPrefix}Error: copying native test binaries failed. Refer to the build log files for details (above)" diff --git a/src/coreclr/clr.featuredefines.props b/src/coreclr/clr.featuredefines.props index b5979c8d2390..c5119c80424d 100644 --- a/src/coreclr/clr.featuredefines.props +++ b/src/coreclr/clr.featuredefines.props @@ -66,9 +66,5 @@ $(DefineConstants);PROFILING_SUPPORTED $(DefineConstants);FEATURE_PROFAPI_ATTACH_DETACH - - $(DefineConstants);TARGET_UNIX - $(DefineConstants);TARGET_WINDOWS - $(DefineConstants);TARGET_OSX diff --git a/src/coreclr/clrdefinitions.cmake b/src/coreclr/clrdefinitions.cmake index 7be0a6915051..b717c10d840a 100644 --- a/src/coreclr/clrdefinitions.cmake +++ b/src/coreclr/clrdefinitions.cmake @@ -30,7 +30,6 @@ if (CLR_CMAKE_TARGET_UNIX) if(CLR_CMAKE_TARGET_OSX) add_definitions(-D_XOPEN_SOURCE) - add_definitions(-DFEATURE_DATATARGET4) endif(CLR_CMAKE_TARGET_OSX) if (CLR_CMAKE_TARGET_ARCH_AMD64) @@ -173,9 +172,9 @@ set(FEATURE_READYTORUN 1) add_compile_definitions($<$>>:FEATURE_REJIT>) -if (CLR_CMAKE_HOST_UNIX AND CLR_CMAKE_TARGET_UNIX AND NOT CLR_CMAKE_TARGET_OSX) +if (CLR_CMAKE_HOST_UNIX AND CLR_CMAKE_TARGET_UNIX) add_definitions(-DFEATURE_REMOTE_PROC_MEM) -endif (CLR_CMAKE_HOST_UNIX AND CLR_CMAKE_TARGET_UNIX AND NOT CLR_CMAKE_TARGET_OSX) +endif (CLR_CMAKE_HOST_UNIX AND CLR_CMAKE_TARGET_UNIX) if (CLR_CMAKE_TARGET_UNIX OR CLR_CMAKE_TARGET_ARCH_ARM64) add_definitions(-DFEATURE_STUBS_AS_IL) diff --git a/src/coreclr/crossgen-corelib.cmd b/src/coreclr/crossgen-corelib.cmd index f47aa1799093..64957d7757e8 100644 --- a/src/coreclr/crossgen-corelib.cmd +++ b/src/coreclr/crossgen-corelib.cmd @@ -182,7 +182,7 @@ if %__PgoInstrument% EQU 1 ( goto ExitWithError ) - REM HACK: Workaround for [dotnet/coreclr#13970](https://github.com/dotnet/coreclr/issues/13970) + REM HACK: Workaround for [dotnet/runtime#8929](https://github.com/dotnet/runtime/issues/8929) set __PgoRtPath= for /f "tokens=*" %%f in ('where pgort*.dll') do ( if not defined __PgoRtPath set "__PgoRtPath=%%~f" diff --git a/src/coreclr/dir.common.props b/src/coreclr/dir.common.props index f45eb84e8f27..9dca67abfb99 100644 --- a/src/coreclr/dir.common.props +++ b/src/coreclr/dir.common.props @@ -43,7 +43,7 @@ - 5.0.0 + 6.0.0 false diff --git a/src/coreclr/pgosupport.cmake b/src/coreclr/pgosupport.cmake index 4b1198090171..04bde2bc20bc 100644 --- a/src/coreclr/pgosupport.cmake +++ b/src/coreclr/pgosupport.cmake @@ -1,5 +1,18 @@ -include(CheckIPOSupported) -check_ipo_supported(RESULT HAVE_LTO) +include(CheckCXXSourceCompiles) +include(CheckCXXCompilerFlag) + +# VC++ guarantees support for LTCG (LTO's equivalent) +if(NOT WIN32) + # Function required to give CMAKE_REQUIRED_* local scope + function(check_have_lto) + set(CMAKE_REQUIRED_FLAGS -flto) + set(CMAKE_REQUIRED_LIBRARIES -flto -fuse-ld=gold) + check_cxx_source_compiles("int main() { return 0; }" HAVE_LTO) + endfunction(check_have_lto) + check_have_lto() + + check_cxx_compiler_flag(-faligned-new COMPILER_SUPPORTS_F_ALIGNED_NEW) +endif(NOT WIN32) # Adds Profile Guided Optimization (PGO) flags to the current target function(add_pgo TargetName) diff --git a/src/coreclr/scripts/coreclr_arguments.py b/src/coreclr/scripts/coreclr_arguments.py index 6806446128c9..df777d1a6b8f 100644 --- a/src/coreclr/scripts/coreclr_arguments.py +++ b/src/coreclr/scripts/coreclr_arguments.py @@ -71,9 +71,9 @@ def __init__(self, self.require_built_core_root = require_built_core_root self.require_built_test_dir = require_built_test_dir - self.valid_arches = ["x64", "x86", "arm", "arm64"] + self.valid_arches = ["x64", "x86", "arm", "arm64", "wasm"] self.valid_build_types = ["Debug", "Checked", "Release"] - self.valid_host_os = ["Windows_NT", "OSX", "Linux", "illumos", "Solaris"] + self.valid_host_os = ["Windows_NT", "OSX", "Linux", "illumos", "Solaris", "Browser"] self.__initialize__(args) diff --git a/src/coreclr/scripts/superpmi.py b/src/coreclr/scripts/superpmi.py index fa0f3e2f165d..12be260c67a9 100755 --- a/src/coreclr/scripts/superpmi.py +++ b/src/coreclr/scripts/superpmi.py @@ -860,7 +860,8 @@ def replay(self): altjit_string = "*" if self.coreclr_args.altjit else "" altjit_flags = [ "-jitoption", "force", "AltJit=" + altjit_string, - "-jitoption", "force", "AltJitNgen=" + altjit_string + "-jitoption", "force", "AltJitNgen=" + altjit_string, + "-jitoption", "force", "EnableExtraSuperPmiQueries=0" ] flags += altjit_flags @@ -1032,8 +1033,10 @@ def replay_with_asm_diffs(self): altjit_flags = [ "-jitoption", "force", "AltJit=" + altjit_string, "-jitoption", "force", "AltJitNgen=" + altjit_string, + "-jitoption", "force", "EnableExtraSuperPmiQueries=0", "-jit2option", "force", "AltJit=" + altjit_string, - "-jit2option", "force", "AltJitNgen=" + altjit_string + "-jit2option", "force", "AltJitNgen=" + altjit_string, + "-jit2option", "force", "EnableExtraSuperPmiQueries=0" ] flags += altjit_flags @@ -1208,7 +1211,8 @@ def replay_with_asm_diffs(self): altjit_string = "*" if self.coreclr_args.altjit else "" altjit_flags = [ "-jitoption", "force", "AltJit=" + altjit_string, - "-jitoption", "force", "AltJitNgen=" + altjit_string + "-jitoption", "force", "AltJitNgen=" + altjit_string, + "-jitoption", "force", "EnableExtraSuperPmiQueries=0" ] async def create_asm(print_prefix, item, self, text_differences, base_asm_location, diff_asm_location): @@ -2128,7 +2132,7 @@ def verify_superpmi_common_args(): # yielding # [0]: "" # [1]: "\Windows_NT.x64.Checked" - standard_location_split = os.path.dirname(coreclr_args.jit_path).split(os.path.dirname(coreclr_args.product_location)) + standard_location_split = os.path.dirname(coreclr_args.base_jit_path).split(os.path.dirname(coreclr_args.product_location)) assert(coreclr_args.host_os in standard_location_split[1]) # Get arch/flavor. Remove leading slash. diff --git a/src/coreclr/src/.nuget/Directory.Build.props b/src/coreclr/src/.nuget/Directory.Build.props index 1a8f5c0d7414..257da02a7ffc 100644 --- a/src/coreclr/src/.nuget/Directory.Build.props +++ b/src/coreclr/src/.nuget/Directory.Build.props @@ -3,14 +3,19 @@ + + + AnyCPU - - true + + false true diff --git a/src/coreclr/src/.nuget/Microsoft.NET.Sdk.IL/targets/Microsoft.NET.Sdk.IL.targets b/src/coreclr/src/.nuget/Microsoft.NET.Sdk.IL/targets/Microsoft.NET.Sdk.IL.targets index 9a4272c39479..3389fc581498 100644 --- a/src/coreclr/src/.nuget/Microsoft.NET.Sdk.IL/targets/Microsoft.NET.Sdk.IL.targets +++ b/src/coreclr/src/.nuget/Microsoft.NET.Sdk.IL/targets/Microsoft.NET.Sdk.IL.targets @@ -30,7 +30,7 @@ Copyright (c) .NET Foundation. All rights reserved. <_OSArchitecture>$([System.Runtime.InteropServices.RuntimeInformation]::OSArchitecture) $(_OSPlatform)-$(_OSArchitecture.ToLower()) - 5.0.0 + 6.0.0 runtime.$(MicrosoftNetCoreIlasmPackageRuntimeId).microsoft.netcore.ilasm runtime.$(MicrosoftNetCoreIlasmPackageRuntimeId).microsoft.netcore.ildasm diff --git a/src/coreclr/src/System.Private.CoreLib/CreateRuntimeRootILLinkDescriptorFile.targets b/src/coreclr/src/System.Private.CoreLib/CreateRuntimeRootILLinkDescriptorFile.targets index 18a6d21e57dc..4b232bbfdc21 100644 --- a/src/coreclr/src/System.Private.CoreLib/CreateRuntimeRootILLinkDescriptorFile.targets +++ b/src/coreclr/src/System.Private.CoreLib/CreateRuntimeRootILLinkDescriptorFile.targets @@ -7,12 +7,12 @@ <_ILLinkRuntimeRootDescriptorFilePath>$(ILLinkTrimXml) <_NamespaceFilePath Condition=" '$(_NamespaceFilePath)' == '' ">$(MSBuildThisFileDirectory)..\vm\namespace.h - <_MscorlibFilePath Condition=" '$(_MscorlibFilePath)' == '' ">$(MSBuildThisFileDirectory)..\vm\mscorlib.h + <_MscorlibFilePath Condition=" '$(_MscorlibFilePath)' == '' ">$(MSBuildThisFileDirectory)..\vm\corelib.h <_CortypeFilePath Condition=" '$(_CortypeFilePath)' == '' ">$(MSBuildThisFileDirectory)..\inc\cortypeinfo.h <_RexcepFilePath Condition=" '$(_RexcepFilePath)' == '' ">$(MSBuildThisFileDirectory)..\vm\rexcep.h <_ILLinkDescriptorsIntermediatePath>$(IntermediateOutputPath)ILLink.Descriptors.Combined.xml <_ILLinkTasksToolsDir>$(PkgMicrosoft_NET_ILLink_Tasks)/tools - <_ILLinkTasksDir>$(_ILLinkTasksToolsDir)/$(NetFrameworkCurrent)/ + <_ILLinkTasksDir>$(_ILLinkTasksToolsDir)/net472/ <_ILLinkTasksDir Condition="'$(MSBuildRuntimeType)' == 'Core'">$(_ILLinkTasksToolsDir)/netcoreapp3.0/ <_ILLinkTasksPath>$(_ILLinkTasksDir)ILLink.Tasks.dll diff --git a/src/coreclr/src/System.Private.CoreLib/ILLinkTrim.xml b/src/coreclr/src/System.Private.CoreLib/ILLinkTrim.xml index 11a67fdac313..8031688ca0b9 100644 --- a/src/coreclr/src/System.Private.CoreLib/ILLinkTrim.xml +++ b/src/coreclr/src/System.Private.CoreLib/ILLinkTrim.xml @@ -17,5 +17,12 @@ + + + + + + + diff --git a/src/coreclr/src/System.Private.CoreLib/System.Private.CoreLib.csproj b/src/coreclr/src/System.Private.CoreLib/System.Private.CoreLib.csproj index 501487c6a111..faf3af6a526e 100644 --- a/src/coreclr/src/System.Private.CoreLib/System.Private.CoreLib.csproj +++ b/src/coreclr/src/System.Private.CoreLib/System.Private.CoreLib.csproj @@ -37,14 +37,7 @@ false true - true - true - 6.00 v4.0.30319 - true - true - prompt - 4 true $(NoWarn),0419,0649,CA2249,CA1830 enable @@ -53,10 +46,6 @@ CORECLR;NETCOREAPP;SYSTEM_PRIVATE_CORELIB true - - <_TargetFrameworkDirectories>$(MSBuildThisFileDirectory)/Documentation - <_FullFrameworkReferenceAssemblyPaths>$(MSBuildThisFileDirectory)/Documentation true $(OutputPath)$(MSBuildProjectName).xml true @@ -224,6 +213,7 @@ + diff --git a/src/coreclr/src/System.Private.CoreLib/src/Internal/Runtime/InteropServices/ComActivator.cs b/src/coreclr/src/System.Private.CoreLib/src/Internal/Runtime/InteropServices/ComActivator.cs index 45f4d4839fb8..c41d4dd96616 100644 --- a/src/coreclr/src/System.Private.CoreLib/src/Internal/Runtime/InteropServices/ComActivator.cs +++ b/src/coreclr/src/System.Private.CoreLib/src/Internal/Runtime/InteropServices/ComActivator.cs @@ -25,7 +25,7 @@ public interface IClassFactory void CreateInstance( [MarshalAs(UnmanagedType.Interface)] object? pUnkOuter, ref Guid riid, - [MarshalAs(UnmanagedType.Interface)] out object? ppvObject); + out IntPtr ppvObject); void LockServer([MarshalAs(UnmanagedType.Bool)] bool fLock); } @@ -51,7 +51,7 @@ internal interface IClassFactory2 : IClassFactory new void CreateInstance( [MarshalAs(UnmanagedType.Interface)] object? pUnkOuter, ref Guid riid, - [MarshalAs(UnmanagedType.Interface)] out object? ppvObject); + out IntPtr ppvObject); new void LockServer([MarshalAs(UnmanagedType.Bool)] bool fLock); @@ -66,7 +66,7 @@ void CreateInstanceLic( [MarshalAs(UnmanagedType.Interface)] object? pUnkReserved, ref Guid riid, [MarshalAs(UnmanagedType.BStr)] string bstrKey, - [MarshalAs(UnmanagedType.Interface)] out object ppvObject); + out IntPtr ppvObject); } [StructLayout(LayoutKind.Sequential)] @@ -493,28 +493,32 @@ public static Type GetValidatedInterfaceType(Type classType, ref Guid riid, obje #endif } - public static void ValidateObjectIsMarshallableAsInterface(object obj, Type interfaceType) + public static IntPtr GetObjectAsInterface(object obj, Type interfaceType) { #if FEATURE_COMINTEROP_UNMANAGED_ACTIVATION - // If the requested "interface type" is type object then return - // because type object is always marshallable. + // If the requested "interface type" is type object then return as IUnknown if (interfaceType == typeof(object)) { - return; + return Marshal.GetIUnknownForObject(obj); } Debug.Assert(interfaceType.IsInterface); - // The intent of this call is to validate the interface can be + // The intent of this call is to get AND validate the interface can be // marshalled to native code. An exception will be thrown if the // type is unable to be marshalled to native code. // Scenarios where this is relevant: // - Interfaces that use Generics // - Interfaces that define implementation - IntPtr ptr = Marshal.GetComInterfaceForObject(obj, interfaceType, CustomQueryInterfaceMode.Ignore); + IntPtr interfaceMaybe = Marshal.GetComInterfaceForObject(obj, interfaceType, CustomQueryInterfaceMode.Ignore); - // Decrement the above 'Marshal.GetComInterfaceForObject()' - Marshal.Release(ptr); + if (interfaceMaybe == IntPtr.Zero) + { + // E_NOINTERFACE + throw new InvalidCastException(); + } + + return interfaceMaybe; #else throw new PlatformNotSupportedException(); #endif @@ -544,18 +548,18 @@ public static object CreateAggregatedObject(object pUnkOuter, object comObject) public void CreateInstance( [MarshalAs(UnmanagedType.Interface)] object? pUnkOuter, ref Guid riid, - [MarshalAs(UnmanagedType.Interface)] out object? ppvObject) + out IntPtr ppvObject) { #if FEATURE_COMINTEROP_UNMANAGED_ACTIVATION Type interfaceType = BasicClassFactory.GetValidatedInterfaceType(_classType, ref riid, pUnkOuter); - ppvObject = Activator.CreateInstance(_classType)!; + object obj = Activator.CreateInstance(_classType)!; if (pUnkOuter != null) { - ppvObject = BasicClassFactory.CreateAggregatedObject(pUnkOuter, ppvObject); + obj = BasicClassFactory.CreateAggregatedObject(pUnkOuter, obj); } - BasicClassFactory.ValidateObjectIsMarshallableAsInterface(ppvObject, interfaceType); + ppvObject = BasicClassFactory.GetObjectAsInterface(obj, interfaceType); #else throw new PlatformNotSupportedException(); #endif @@ -593,7 +597,7 @@ public LicenseClassFactory(Guid clsid, Type classType) public void CreateInstance( [MarshalAs(UnmanagedType.Interface)] object? pUnkOuter, ref Guid riid, - [MarshalAs(UnmanagedType.Interface)] out object? ppvObject) + out IntPtr ppvObject) { #if FEATURE_COMINTEROP_UNMANAGED_ACTIVATION CreateInstanceInner(pUnkOuter, ref riid, key: null, isDesignTime: true, out ppvObject); @@ -640,7 +644,7 @@ public void CreateInstanceLic( [MarshalAs(UnmanagedType.Interface)] object? pUnkReserved, ref Guid riid, [MarshalAs(UnmanagedType.BStr)] string bstrKey, - [MarshalAs(UnmanagedType.Interface)] out object ppvObject) + out IntPtr ppvObject) { #if FEATURE_COMINTEROP_UNMANAGED_ACTIVATION Debug.Assert(pUnkReserved == null); @@ -655,18 +659,18 @@ private void CreateInstanceInner( ref Guid riid, string? key, bool isDesignTime, - out object ppvObject) + out IntPtr ppvObject) { #if FEATURE_COMINTEROP_UNMANAGED_ACTIVATION Type interfaceType = BasicClassFactory.GetValidatedInterfaceType(_classType, ref riid, pUnkOuter); - ppvObject = _licenseProxy.AllocateAndValidateLicense(_classType, key, isDesignTime); + object obj = _licenseProxy.AllocateAndValidateLicense(_classType, key, isDesignTime); if (pUnkOuter != null) { - ppvObject = BasicClassFactory.CreateAggregatedObject(pUnkOuter, ppvObject); + obj = BasicClassFactory.CreateAggregatedObject(pUnkOuter, obj); } - BasicClassFactory.ValidateObjectIsMarshallableAsInterface(ppvObject, interfaceType); + ppvObject = BasicClassFactory.GetObjectAsInterface(obj, interfaceType); #else throw new PlatformNotSupportedException(); #endif diff --git a/src/coreclr/src/System.Private.CoreLib/src/System/ArgIterator.cs b/src/coreclr/src/System.Private.CoreLib/src/System/ArgIterator.cs index d9ed61c6c103..10ede24a6573 100644 --- a/src/coreclr/src/System.Private.CoreLib/src/System/ArgIterator.cs +++ b/src/coreclr/src/System.Private.CoreLib/src/System/ArgIterator.cs @@ -134,50 +134,50 @@ public override bool Equals(object? o) #else public ArgIterator(RuntimeArgumentHandle arglist) { - throw new PlatformNotSupportedException(SR.PlatformNotSupported_ArgIterator); // https://github.com/dotnet/coreclr/issues/9204 + throw new PlatformNotSupportedException(SR.PlatformNotSupported_ArgIterator); // https://github.com/dotnet/runtime/issues/7317 } [CLSCompliant(false)] public unsafe ArgIterator(RuntimeArgumentHandle arglist, void* ptr) { - throw new PlatformNotSupportedException(SR.PlatformNotSupported_ArgIterator); // https://github.com/dotnet/coreclr/issues/9204 + throw new PlatformNotSupportedException(SR.PlatformNotSupported_ArgIterator); // https://github.com/dotnet/runtime/issues/7317 } public void End() { - throw new PlatformNotSupportedException(SR.PlatformNotSupported_ArgIterator); // https://github.com/dotnet/coreclr/issues/9204 + throw new PlatformNotSupportedException(SR.PlatformNotSupported_ArgIterator); // https://github.com/dotnet/runtime/issues/7317 } public override bool Equals(object? o) { - throw new PlatformNotSupportedException(SR.PlatformNotSupported_ArgIterator); // https://github.com/dotnet/coreclr/issues/9204 + throw new PlatformNotSupportedException(SR.PlatformNotSupported_ArgIterator); // https://github.com/dotnet/runtime/issues/7317 } public override int GetHashCode() { - throw new PlatformNotSupportedException(SR.PlatformNotSupported_ArgIterator); // https://github.com/dotnet/coreclr/issues/9204 + throw new PlatformNotSupportedException(SR.PlatformNotSupported_ArgIterator); // https://github.com/dotnet/runtime/issues/7317 } [CLSCompliant(false)] public System.TypedReference GetNextArg() { - throw new PlatformNotSupportedException(SR.PlatformNotSupported_ArgIterator); // https://github.com/dotnet/coreclr/issues/9204 + throw new PlatformNotSupportedException(SR.PlatformNotSupported_ArgIterator); // https://github.com/dotnet/runtime/issues/7317 } [CLSCompliant(false)] public System.TypedReference GetNextArg(System.RuntimeTypeHandle rth) { - throw new PlatformNotSupportedException(SR.PlatformNotSupported_ArgIterator); // https://github.com/dotnet/coreclr/issues/9204 + throw new PlatformNotSupportedException(SR.PlatformNotSupported_ArgIterator); // https://github.com/dotnet/runtime/issues/7317 } public unsafe System.RuntimeTypeHandle GetNextArgType() { - throw new PlatformNotSupportedException(SR.PlatformNotSupported_ArgIterator); // https://github.com/dotnet/coreclr/issues/9204 + throw new PlatformNotSupportedException(SR.PlatformNotSupported_ArgIterator); // https://github.com/dotnet/runtime/issues/7317 } public int GetRemainingCount() { - throw new PlatformNotSupportedException(SR.PlatformNotSupported_ArgIterator); // https://github.com/dotnet/coreclr/issues/9204 + throw new PlatformNotSupportedException(SR.PlatformNotSupported_ArgIterator); // https://github.com/dotnet/runtime/issues/7317 } #endif // TARGET_WINDOWS } diff --git a/src/coreclr/src/System.Private.CoreLib/src/System/Collections/Generic/ComparerHelpers.cs b/src/coreclr/src/System.Private.CoreLib/src/System/Collections/Generic/ComparerHelpers.cs index 7a49fad02133..1fb54677d0c5 100644 --- a/src/coreclr/src/System.Private.CoreLib/src/System/Collections/Generic/ComparerHelpers.cs +++ b/src/coreclr/src/System.Private.CoreLib/src/System/Collections/Generic/ComparerHelpers.cs @@ -126,7 +126,7 @@ internal static object CreateDefaultEqualityComparer(Type type) result = new ByteEqualityComparer(); } // If T implements IEquatable return a GenericEqualityComparer - else if (typeof(IEquatable<>).MakeGenericType(type).IsAssignableFrom(type)) + else if (type.IsAssignableTo(typeof(IEquatable<>).MakeGenericType(type))) { result = CreateInstanceForAnotherGenericParameter((RuntimeType)typeof(GenericEqualityComparer), runtimeType); } diff --git a/src/coreclr/src/System.Private.CoreLib/src/System/Diagnostics/Debugger.cs b/src/coreclr/src/System.Private.CoreLib/src/System/Diagnostics/Debugger.cs index 5e51ac062923..1d026a643233 100644 --- a/src/coreclr/src/System.Private.CoreLib/src/System/Diagnostics/Debugger.cs +++ b/src/coreclr/src/System.Private.CoreLib/src/System/Diagnostics/Debugger.cs @@ -16,9 +16,6 @@ public static class Debugger [MethodImpl(MethodImplOptions.NoInlining)] public static void Break() => BreakInternal(); - // The VM depends on this private method. - private static void BreakCanThrow() => BreakInternal(); - [MethodImpl(MethodImplOptions.InternalCall)] private static extern void BreakInternal(); diff --git a/src/coreclr/src/System.Private.CoreLib/src/System/Environment.CoreCLR.cs b/src/coreclr/src/System.Private.CoreLib/src/System/Environment.CoreCLR.cs index d2ea1e3fbdd0..b375cfddc7ba 100644 --- a/src/coreclr/src/System.Private.CoreLib/src/System/Environment.CoreCLR.cs +++ b/src/coreclr/src/System.Private.CoreLib/src/System/Environment.CoreCLR.cs @@ -11,7 +11,11 @@ namespace System { public static partial class Environment { - public static int CurrentManagedThreadId => Thread.CurrentThread.ManagedThreadId; + public static extern int CurrentManagedThreadId + { + [MethodImpl(MethodImplOptions.InternalCall)] + get; + } // Terminates this process with the given exit code. [DllImport(RuntimeHelpers.QCall, CharSet = CharSet.Unicode)] @@ -83,9 +87,7 @@ public static string[] GetCommandLineArgs() [DllImport(RuntimeHelpers.QCall, CharSet = CharSet.Unicode)] private static extern int GetProcessorCount(); - // If you change this method's signature then you must change the code that calls it - // in excep.cpp and probably you will have to visit mscorlib.h to add the new signature - // as well as metasig.h to create the new signature type + // Used by VM internal static string? GetResourceStringLocal(string key) => SR.GetResourceString(key); public static string StackTrace diff --git a/src/coreclr/src/System.Private.CoreLib/src/System/Exception.CoreCLR.cs b/src/coreclr/src/System.Private.CoreLib/src/System/Exception.CoreCLR.cs index c3f1e5b4fd43..d6bb435387b0 100644 --- a/src/coreclr/src/System.Private.CoreLib/src/System/Exception.CoreCLR.cs +++ b/src/coreclr/src/System.Private.CoreLib/src/System/Exception.CoreCLR.cs @@ -266,8 +266,8 @@ private string? SerializationStackTraceString } // This piece of infrastructure exists to help avoid deadlocks - // between parts of mscorlib that might throw an exception while - // holding a lock that are also used by mscorlib's ResourceManager + // between parts of CoreLib that might throw an exception while + // holding a lock that are also used by CoreLib's ResourceManager // instance. As a special case of code that may throw while holding // a lock, we also need to fix our asynchronous exceptions to use // Win32 resources as well (assuming we ever call a managed diff --git a/src/coreclr/src/System.Private.CoreLib/src/System/GC.cs b/src/coreclr/src/System.Private.CoreLib/src/System/GC.cs index 865600fe8d2d..d932d18d9fc9 100644 --- a/src/coreclr/src/System.Private.CoreLib/src/System/GC.cs +++ b/src/coreclr/src/System.Private.CoreLib/src/System/GC.cs @@ -294,7 +294,7 @@ public static int GetGeneration(WeakReference wo) public static void WaitForPendingFinalizers() { - // QCalls can not be exposed from mscorlib directly, need to wrap it. + // QCalls can not be exposed directly, need to wrap it. _WaitForPendingFinalizers(); } @@ -659,7 +659,7 @@ internal static void UnregisterMemoryLoadChangeNotification(Action notification) /// If pinned is set to true, must not be a reference type or a type that contains object references. /// [MethodImpl(MethodImplOptions.AggressiveInlining)] // forced to ensure no perf drop for small memory buffers (hot path) - public static T[] AllocateUninitializedArray(int length, bool pinned = false) + public static T[] AllocateUninitializedArray(int length, bool pinned = false) // T[] rather than T?[] to match `new T[length]` behavior { if (!pinned) { @@ -685,7 +685,7 @@ public static T[] AllocateUninitializedArray(int length, bool pinned = false) // kept outside of the small arrays hot path to have inlining without big size growth return AllocateNewUninitializedArray(length, pinned); - // remove the local function when https://github.com/dotnet/coreclr/issues/5329 is implemented + // remove the local function when https://github.com/dotnet/runtime/issues/5973 is implemented static T[] AllocateNewUninitializedArray(int length, bool pinned) { GC_ALLOC_FLAGS flags = GC_ALLOC_FLAGS.GC_ALLOC_ZEROING_OPTIONAL; @@ -705,7 +705,7 @@ static T[] AllocateNewUninitializedArray(int length, bool pinned) /// /// If pinned is set to true, must not be a reference type or a type that contains object references. /// - public static T[] AllocateArray(int length, bool pinned = false) + public static T[] AllocateArray(int length, bool pinned = false) // T[] rather than T?[] to match `new T[length]` behavior { GC_ALLOC_FLAGS flags = GC_ALLOC_FLAGS.GC_ALLOC_NO_FLAGS; diff --git a/src/coreclr/src/System.Private.CoreLib/src/System/Math.CoreCLR.cs b/src/coreclr/src/System.Private.CoreLib/src/System/Math.CoreCLR.cs index 74aca4342ee3..beeb322b52c0 100644 --- a/src/coreclr/src/System.Private.CoreLib/src/System/Math.CoreCLR.cs +++ b/src/coreclr/src/System.Private.CoreLib/src/System/Math.CoreCLR.cs @@ -56,6 +56,7 @@ public static partial class Math [MethodImpl(MethodImplOptions.InternalCall)] public static extern double Cbrt(double d); + [Intrinsic] [MethodImpl(MethodImplOptions.InternalCall)] public static extern double Ceiling(double a); @@ -71,6 +72,7 @@ public static partial class Math [MethodImpl(MethodImplOptions.InternalCall)] public static extern double Exp(double d); + [Intrinsic] [MethodImpl(MethodImplOptions.InternalCall)] public static extern double Floor(double d); diff --git a/src/coreclr/src/System.Private.CoreLib/src/System/MathF.CoreCLR.cs b/src/coreclr/src/System.Private.CoreLib/src/System/MathF.CoreCLR.cs index 371780f6abe0..f3dd3289c1c2 100644 --- a/src/coreclr/src/System.Private.CoreLib/src/System/MathF.CoreCLR.cs +++ b/src/coreclr/src/System.Private.CoreLib/src/System/MathF.CoreCLR.cs @@ -45,6 +45,7 @@ public static partial class MathF [MethodImpl(MethodImplOptions.InternalCall)] public static extern float Cbrt(float x); + [Intrinsic] [MethodImpl(MethodImplOptions.InternalCall)] public static extern float Ceiling(float x); @@ -60,6 +61,7 @@ public static partial class MathF [MethodImpl(MethodImplOptions.InternalCall)] public static extern float Exp(float x); + [Intrinsic] [MethodImpl(MethodImplOptions.InternalCall)] public static extern float Floor(float x); diff --git a/src/coreclr/src/System.Private.CoreLib/src/System/MulticastDelegate.cs b/src/coreclr/src/System.Private.CoreLib/src/System/MulticastDelegate.cs index a2598bd7779a..fc3ef65f74b5 100644 --- a/src/coreclr/src/System.Private.CoreLib/src/System/MulticastDelegate.cs +++ b/src/coreclr/src/System.Private.CoreLib/src/System/MulticastDelegate.cs @@ -437,7 +437,7 @@ public sealed override Delegate[] GetInvocationList() // so it can become a simple test if (d2 is null) { - // return true/false not the test result https://github.com/dotnet/coreclr/issues/914 + // return true/false not the test result https://github.com/dotnet/runtime/issues/4207 return (d1 is null) ? true : false; } @@ -454,7 +454,7 @@ public sealed override Delegate[] GetInvocationList() // so it can become a simple test if (d2 is null) { - // return true/false not the test result https://github.com/dotnet/coreclr/issues/914 + // return true/false not the test result https://github.com/dotnet/runtime/issues/4207 return (d1 is null) ? false : true; } diff --git a/src/coreclr/src/System.Private.CoreLib/src/System/Reflection/CustomAttribute.cs b/src/coreclr/src/System.Private.CoreLib/src/System/Reflection/CustomAttribute.cs index a7625465c0e8..aa21a11492e6 100644 --- a/src/coreclr/src/System.Private.CoreLib/src/System/Reflection/CustomAttribute.cs +++ b/src/coreclr/src/System.Private.CoreLib/src/System/Reflection/CustomAttribute.cs @@ -1336,7 +1336,7 @@ private static bool FilterCustomAttributeRecord( if (ctorHasParameters) { // Resolve method ctor token found in decorated decoratedModule scope - // See https://github.com/dotnet/coreclr/issues/21456 for why we fast-path non-generics here (fewer allocations) + // See https://github.com/dotnet/runtime/issues/11637 for why we fast-path non-generics here (fewer allocations) if (attributeType.IsGenericType) { ctorWithParameters = decoratedModule.ResolveMethod(caCtorToken, attributeType.GenericTypeArguments, null)!.MethodHandle.GetMethodInfo(); diff --git a/src/coreclr/src/System.Private.CoreLib/src/System/Reflection/Emit/AssemblyBuilder.cs b/src/coreclr/src/System.Private.CoreLib/src/System/Reflection/Emit/AssemblyBuilder.cs index 60d6791a0572..7313b405bf0e 100644 --- a/src/coreclr/src/System.Private.CoreLib/src/System/Reflection/Emit/AssemblyBuilder.cs +++ b/src/coreclr/src/System.Private.CoreLib/src/System/Reflection/Emit/AssemblyBuilder.cs @@ -497,7 +497,7 @@ public override FileStream[] GetFiles(bool getResourceModules) public override Module? GetModule(string name) => InternalAssembly.GetModule(name); - [RequiresUnreferencedCode("Types might be removed")] + [RequiresUnreferencedCode("Assembly references might be removed")] public override AssemblyName[] GetReferencedAssemblies() { return InternalAssembly.GetReferencedAssemblies(); diff --git a/src/coreclr/src/System.Private.CoreLib/src/System/Reflection/Emit/ConstructorBuilder.cs b/src/coreclr/src/System.Private.CoreLib/src/System/Reflection/Emit/ConstructorBuilder.cs index de668d586341..f8405246e7b3 100644 --- a/src/coreclr/src/System.Private.CoreLib/src/System/Reflection/Emit/ConstructorBuilder.cs +++ b/src/coreclr/src/System.Private.CoreLib/src/System/Reflection/Emit/ConstructorBuilder.cs @@ -45,6 +45,10 @@ private TypeBuilder GetTypeBuilder() { return m_methodBuilder.GetTypeBuilder(); } + internal SignatureHelper GetMethodSignature() + { + return m_methodBuilder.GetMethodSignature(); + } #endregion #region Object Overrides diff --git a/src/coreclr/src/System.Private.CoreLib/src/System/Reflection/Emit/DynamicILGenerator.cs b/src/coreclr/src/System.Private.CoreLib/src/System/Reflection/Emit/DynamicILGenerator.cs index 34d501c1363d..693a2937c152 100644 --- a/src/coreclr/src/System.Private.CoreLib/src/System/Reflection/Emit/DynamicILGenerator.cs +++ b/src/coreclr/src/System.Private.CoreLib/src/System/Reflection/Emit/DynamicILGenerator.cs @@ -428,6 +428,8 @@ public override void EndScope() private int GetMemberRefToken(MethodBase methodInfo, Type[]? optionalParameterTypes) { Type[]? parameterTypes; + Type[][]? requiredCustomModifiers; + Type[][]? optionalCustomModifiers; if (optionalParameterTypes != null && (methodInfo.CallingConvention & CallingConventions.VarArgs) == 0) throw new InvalidOperationException(SR.InvalidOperation_NotAVarArgCallingConvention); @@ -442,17 +444,28 @@ private int GetMemberRefToken(MethodBase methodInfo, Type[]? optionalParameterTy if (paramInfo != null && paramInfo.Length != 0) { parameterTypes = new Type[paramInfo.Length]; + requiredCustomModifiers = new Type[parameterTypes.Length][]; + optionalCustomModifiers = new Type[parameterTypes.Length][]; + for (int i = 0; i < paramInfo.Length; i++) + { parameterTypes[i] = paramInfo[i].ParameterType; + requiredCustomModifiers[i] = paramInfo[i].GetRequiredCustomModifiers(); + optionalCustomModifiers[i] = paramInfo[i].GetOptionalCustomModifiers(); + } } else { parameterTypes = null; + requiredCustomModifiers = null; + optionalCustomModifiers = null; } SignatureHelper sig = GetMemberRefSignature(methodInfo.CallingConvention, MethodBuilder.GetMethodBaseReturnType(methodInfo), parameterTypes, + requiredCustomModifiers, + optionalCustomModifiers, optionalParameterTypes); if (rtMeth != null) @@ -465,13 +478,17 @@ internal override SignatureHelper GetMemberRefSignature( CallingConventions call, Type? returnType, Type[]? parameterTypes, + Type[][]? requiredCustomModifiers, + Type[][]? optionalCustomModifiers, Type[]? optionalParameterTypes) { SignatureHelper sig = SignatureHelper.GetMethodSigHelper(call, returnType); if (parameterTypes != null) { - foreach (Type t in parameterTypes) - sig.AddArgument(t); + for (int i = 0; i < parameterTypes.Length; i++) + { + sig.AddArgument(parameterTypes[i], requiredCustomModifiers![i], optionalCustomModifiers![i]); + } } if (optionalParameterTypes != null && optionalParameterTypes.Length != 0) { diff --git a/src/coreclr/src/System.Private.CoreLib/src/System/Reflection/Emit/ILGenerator.cs b/src/coreclr/src/System.Private.CoreLib/src/System/Reflection/Emit/ILGenerator.cs index c587b18c62ac..43ecc3adcd5a 100644 --- a/src/coreclr/src/System.Private.CoreLib/src/System/Reflection/Emit/ILGenerator.cs +++ b/src/coreclr/src/System.Private.CoreLib/src/System/Reflection/Emit/ILGenerator.cs @@ -162,16 +162,24 @@ private int GetMethodToken(MethodBase method, Type[]? optionalParameterTypes, bo return ((ModuleBuilder)m_methodBuilder.Module).GetMethodTokenInternal(method, optionalParameterTypes, useMethodDef); } + internal SignatureHelper GetMemberRefSignature( + CallingConventions call, + Type? returnType, + Type[]? parameterTypes, + Type[]? optionalParameterTypes) + { + return GetMemberRefSignature(call, returnType, parameterTypes, null, null, optionalParameterTypes); + } internal virtual SignatureHelper GetMemberRefSignature(CallingConventions call, Type? returnType, - Type[]? parameterTypes, Type[]? optionalParameterTypes) + Type[]? parameterTypes, Type[][]? requiredCustomModifiers, Type[][]? optionalCustomModifiers, Type[]? optionalParameterTypes) { - return GetMemberRefSignature(call, returnType, parameterTypes, optionalParameterTypes, 0); + return GetMemberRefSignature(call, returnType, parameterTypes, requiredCustomModifiers, optionalCustomModifiers, optionalParameterTypes, 0); } private SignatureHelper GetMemberRefSignature(CallingConventions call, Type? returnType, - Type[]? parameterTypes, Type[]? optionalParameterTypes, int cGenericParameters) + Type[]? parameterTypes, Type[][]? requiredCustomModifiers, Type[][]? optionalCustomModifiers, Type[]? optionalParameterTypes, int cGenericParameters) { - return ((ModuleBuilder)m_methodBuilder.Module).GetMemberRefSignature(call, returnType, parameterTypes, optionalParameterTypes, cGenericParameters); + return ((ModuleBuilder)m_methodBuilder.Module).GetMemberRefSignature(call, returnType, parameterTypes, requiredCustomModifiers, optionalCustomModifiers, optionalParameterTypes, cGenericParameters); } internal byte[]? BakeByteArray() diff --git a/src/coreclr/src/System.Private.CoreLib/src/System/Reflection/Emit/ModuleBuilder.cs b/src/coreclr/src/System.Private.CoreLib/src/System/Reflection/Emit/ModuleBuilder.cs index c1ed6b887a02..c390d486dd3b 100644 --- a/src/coreclr/src/System.Private.CoreLib/src/System/Reflection/Emit/ModuleBuilder.cs +++ b/src/coreclr/src/System.Private.CoreLib/src/System/Reflection/Emit/ModuleBuilder.cs @@ -357,12 +357,11 @@ private static RuntimeModule GetRuntimeModuleFromModule(Module? m) return (m as RuntimeModule)!; } - private int GetMemberRefToken(MethodBase method, IEnumerable? optionalParameterTypes) + private int GetMemberRefToken(MethodBase method, Type[]? optionalParameterTypes) { - Type[] parameterTypes; - Type? returnType; int tkParent; int cGenericParameters = 0; + SignatureHelper sigHelp; if (method.IsGenericMethod) { @@ -387,55 +386,22 @@ private int GetMemberRefToken(MethodBase method, IEnumerable? optionalPara if (method.DeclaringType!.IsGenericType) { - MethodBase methDef; // methodInfo = G.M ==> methDef = G.M + MethodBase methDef = GetGenericMethodBaseDefinition(method); - if (method is MethodOnTypeBuilderInstantiation motbi) - { - methDef = motbi.m_method; - } - else if (method is ConstructorOnTypeBuilderInstantiation cotbi) - { - methDef = cotbi.m_ctor; - } - else if (method is MethodBuilder || method is ConstructorBuilder) - { - // methodInfo must be GenericMethodDefinition; trying to emit G.M - methDef = method; - } - else - { - Debug.Assert(method is RuntimeMethodInfo || method is RuntimeConstructorInfo); - - if (method.IsGenericMethod) - { - Debug.Assert(masmi != null); - - methDef = masmi.GetGenericMethodDefinition()!; - methDef = methDef.Module.ResolveMethod( - method.MetadataToken, - methDef.DeclaringType?.GetGenericArguments(), - methDef.GetGenericArguments())!; - } - else - { - methDef = method.Module.ResolveMethod( - method.MetadataToken, - method.DeclaringType?.GetGenericArguments(), - null)!; - } - } - - parameterTypes = methDef.GetParameterTypes(); - returnType = MethodBuilder.GetMethodBaseReturnType(methDef); + sigHelp = GetMemberRefSignature(methDef, cGenericParameters); } else { - parameterTypes = method.GetParameterTypes(); - returnType = MethodBuilder.GetMethodBaseReturnType(method); + sigHelp = GetMemberRefSignature(method, cGenericParameters); } - byte[] sigBytes = GetMemberRefSignature(method.CallingConvention, returnType, parameterTypes, - optionalParameterTypes, cGenericParameters).InternalGetSignature(out int sigLength); + if (optionalParameterTypes?.Length > 0) + { + sigHelp.AddSentinel(); + sigHelp.AddArguments(optionalParameterTypes, null, null); + } + + byte[] sigBytes = sigHelp.InternalGetSignature(out int sigLength); if (method.DeclaringType!.IsGenericType) { @@ -460,15 +426,16 @@ private int GetMemberRefToken(MethodBase method, IEnumerable? optionalPara } internal SignatureHelper GetMemberRefSignature(CallingConventions call, Type? returnType, - Type[]? parameterTypes, IEnumerable? optionalParameterTypes, int cGenericParameters) + Type[]? parameterTypes, Type[][]? requiredCustomModifiers, Type[][]? optionalCustomModifiers, + IEnumerable? optionalParameterTypes, int cGenericParameters) { SignatureHelper sig = SignatureHelper.GetMethodSigHelper(this, call, returnType, cGenericParameters); if (parameterTypes != null) { - foreach (Type t in parameterTypes) + for (int i = 0; i < parameterTypes.Length; i++) { - sig.AddArgument(t); + sig.AddArgument(parameterTypes[i], requiredCustomModifiers![i], optionalCustomModifiers![i]); } } @@ -491,6 +458,90 @@ internal SignatureHelper GetMemberRefSignature(CallingConventions call, Type? re return sig; } + private MethodBase GetGenericMethodBaseDefinition(MethodBase methodBase) + { + // methodInfo = G.M ==> methDef = G.M + MethodInfo? masmi = methodBase as MethodInfo; + MethodBase methDef; + + if (methodBase is MethodOnTypeBuilderInstantiation motbi) + { + methDef = motbi.m_method; + } + else if (methodBase is ConstructorOnTypeBuilderInstantiation cotbi) + { + methDef = cotbi.m_ctor; + } + else if (methodBase is MethodBuilder || methodBase is ConstructorBuilder) + { + // methodInfo must be GenericMethodDefinition; trying to emit G.M + methDef = methodBase; + } + else + { + Debug.Assert(methodBase is RuntimeMethodInfo || methodBase is RuntimeConstructorInfo); + + if (methodBase.IsGenericMethod) + { + Debug.Assert(masmi != null); + + methDef = masmi.GetGenericMethodDefinition()!; + methDef = methDef.Module.ResolveMethod( + methodBase.MetadataToken, + methDef.DeclaringType?.GetGenericArguments(), + methDef.GetGenericArguments())!; + } + else + { + methDef = methodBase.Module.ResolveMethod( + methodBase.MetadataToken, + methodBase.DeclaringType?.GetGenericArguments(), + null)!; + } + } + + return methDef; + } + + internal SignatureHelper GetMemberRefSignature(MethodBase? method, int cGenericParameters) + { + switch (method) + { + case MethodBuilder methodBuilder: + return methodBuilder.GetMethodSignature(); + case ConstructorBuilder constructorBuilder: + return constructorBuilder.GetMethodSignature(); + case MethodOnTypeBuilderInstantiation motbi when motbi.m_method is MethodBuilder methodBuilder: + return methodBuilder.GetMethodSignature(); + case MethodOnTypeBuilderInstantiation motbi: + method = motbi.m_method; + break; + case ConstructorOnTypeBuilderInstantiation cotbi when cotbi.m_ctor is ConstructorBuilder constructorBuilder: + return constructorBuilder.GetMethodSignature(); + case ConstructorOnTypeBuilderInstantiation cotbi: + method = cotbi.m_ctor; + break; + } + + Debug.Assert(method is RuntimeMethodInfo || method is RuntimeConstructorInfo); + ParameterInfo[] parameters = method.GetParametersNoCopy(); + + Type[] parameterTypes = new Type[parameters.Length]; + Type[][] requiredCustomModifiers = new Type[parameterTypes.Length][]; + Type[][] optionalCustomModifiers = new Type[parameterTypes.Length][]; + + for (int i = 0; i < parameters.Length; i++) + { + parameterTypes[i] = parameters[i].ParameterType; + requiredCustomModifiers[i] = parameters[i].GetRequiredCustomModifiers(); + optionalCustomModifiers[i] = parameters[i].GetOptionalCustomModifiers(); + } + + ParameterInfo? returnParameter = method is MethodInfo mi ? mi.ReturnParameter : null; + SignatureHelper sigHelp = SignatureHelper.GetMethodSigHelper(this, method.CallingConvention, cGenericParameters, returnParameter?.ParameterType, returnParameter?.GetRequiredCustomModifiers(), returnParameter?.GetOptionalCustomModifiers(), parameterTypes, requiredCustomModifiers, optionalCustomModifiers); + return sigHelp; + } + #endregion public override bool Equals(object? obj) => InternalModule.Equals(obj); @@ -742,13 +793,13 @@ public override FieldInfo[] GetFields(BindingFlags bindingFlags) return InternalModule.GetField(name, bindingAttr); } - [RequiresUnreferencedCode("Fields might be removed")] + [RequiresUnreferencedCode("Methods might be removed")] public override MethodInfo[] GetMethods(BindingFlags bindingFlags) { return InternalModule.GetMethods(bindingFlags); } - [RequiresUnreferencedCode("Fields might be removed")] + [RequiresUnreferencedCode("Methods might be removed")] protected override MethodInfo? GetMethodImpl(string name, BindingFlags bindingAttr, Binder? binder, CallingConventions callConvention, Type[]? types, ParameterModifier[]? modifiers) { @@ -1261,7 +1312,7 @@ private MethodToken GetMethodTokenNoLock(MethodInfo method, bool getGenericTypeD return new MethodToken(mr); } - internal int GetMethodTokenInternal(MethodBase method, IEnumerable? optionalParameterTypes, bool useMethodDef) + internal int GetMethodTokenInternal(MethodBase method, Type[]? optionalParameterTypes, bool useMethodDef) { int tk; MethodInfo? methodInfo = method as MethodInfo; diff --git a/src/coreclr/src/System.Private.CoreLib/src/System/Reflection/INVOCATION_FLAGS.cs b/src/coreclr/src/System.Private.CoreLib/src/System/Reflection/INVOCATION_FLAGS.cs index e7d9b97ac5c6..b6a55518914a 100644 --- a/src/coreclr/src/System.Private.CoreLib/src/System/Reflection/INVOCATION_FLAGS.cs +++ b/src/coreclr/src/System.Private.CoreLib/src/System/Reflection/INVOCATION_FLAGS.cs @@ -14,7 +14,8 @@ internal enum INVOCATION_FLAGS : uint INVOCATION_FLAGS_INITIALIZED = 0x00000001, // it's used for both method and field to signify that no access is allowed INVOCATION_FLAGS_NO_INVOKE = 0x00000002, - /* unused 0x00000004 */ + // Set for static ctors, to ensure that the static ctor is run as a static ctor before it is explicitly executed via reflection + INVOCATION_FLAGS_RUN_CLASS_CONSTRUCTOR = 0x00000004, // Set for static ctors and ctors on abstract types, which // can be invoked only if the "this" object is provided (even if it's null). INVOCATION_FLAGS_NO_CTOR_INVOKE = 0x00000008, diff --git a/src/coreclr/src/System.Private.CoreLib/src/System/Reflection/RuntimeAssembly.cs b/src/coreclr/src/System.Private.CoreLib/src/System/Reflection/RuntimeAssembly.cs index d2476660973f..20a72464b3ac 100644 --- a/src/coreclr/src/System.Private.CoreLib/src/System/Reflection/RuntimeAssembly.cs +++ b/src/coreclr/src/System.Private.CoreLib/src/System/Reflection/RuntimeAssembly.cs @@ -69,19 +69,33 @@ public override event ModuleResolveEventHandler? ModuleResolve } [DllImport(RuntimeHelpers.QCall, CharSet = CharSet.Unicode)] - private static extern void GetCodeBase(QCallAssembly assembly, - bool copiedName, + private static extern bool GetCodeBase(QCallAssembly assembly, StringHandleOnStack retString); - internal string? GetCodeBase(bool copiedName) + internal string? GetCodeBase() { string? codeBase = null; RuntimeAssembly runtimeAssembly = this; - GetCodeBase(new QCallAssembly(ref runtimeAssembly), copiedName, new StringHandleOnStack(ref codeBase)); - return codeBase; + if (GetCodeBase(new QCallAssembly(ref runtimeAssembly), new StringHandleOnStack(ref codeBase))) + { + return codeBase; + } + return null; } - public override string? CodeBase => GetCodeBase(false); + public override string? CodeBase + { + get + { + var codeBase = GetCodeBase(); + if (codeBase is null) + { + // Not supported if the assembly was loaded from memory + throw new NotSupportedException(SR.NotSupported_CodeBase); + } + return codeBase; + } + } internal RuntimeAssembly GetNativeHandle() => this; @@ -90,7 +104,7 @@ private static extern void GetCodeBase(QCallAssembly assembly, // is returned. public override AssemblyName GetName(bool copiedName) { - string? codeBase = GetCodeBase(copiedName); + string? codeBase = GetCodeBase(); var an = new AssemblyName(GetSimpleName(), GetPublicKey(), @@ -360,6 +374,12 @@ private static extern void InternalLoad(ObjectHandleOnStack assemblyName, // given name. (Name should not include path.) public override FileStream? GetFile(string name) { + if (Location.Length == 0) + { + // Throw if the assembly was loaded from memory, indicated by Location returning an empty string + throw new FileNotFoundException(SR.IO_NoFileTableInInMemoryAssemblies); + } + RuntimeModule? m = (RuntimeModule?)GetModule(name); if (m == null) return null; @@ -371,6 +391,12 @@ private static extern void InternalLoad(ObjectHandleOnStack assemblyName, public override FileStream[] GetFiles(bool getResourceModules) { + if (Location.Length == 0) + { + // Throw if the assembly was loaded from memory, indicated by Location returning an empty string + throw new FileNotFoundException(SR.IO_NoFileTableInInMemoryAssemblies); + } + Module[] m = GetModules(getResourceModules); FileStream[] fs = new FileStream[m.Length]; diff --git a/src/coreclr/src/System.Private.CoreLib/src/System/Reflection/RuntimeConstructorInfo.cs b/src/coreclr/src/System.Private.CoreLib/src/System/Reflection/RuntimeConstructorInfo.cs index 9d0086ca4425..e06251e6ad85 100644 --- a/src/coreclr/src/System.Private.CoreLib/src/System/Reflection/RuntimeConstructorInfo.cs +++ b/src/coreclr/src/System.Private.CoreLib/src/System/Reflection/RuntimeConstructorInfo.cs @@ -5,6 +5,7 @@ using System.Diagnostics; using System.Diagnostics.CodeAnalysis; using System.Globalization; +using System.Runtime.CompilerServices; using System.Text; using RuntimeTypeCache = System.RuntimeType.RuntimeTypeCache; @@ -47,7 +48,12 @@ internal INVOCATION_FLAGS InvocationFlags // We don't need other flags if this method cannot be invoked invocationFlags |= INVOCATION_FLAGS.INVOCATION_FLAGS_NO_INVOKE; } - else if (IsStatic || declaringType != null && declaringType.IsAbstract) + else if (IsStatic) + { + invocationFlags |= INVOCATION_FLAGS.INVOCATION_FLAGS_RUN_CLASS_CONSTRUCTOR | + INVOCATION_FLAGS.INVOCATION_FLAGS_NO_CTOR_INVOKE; + } + else if (declaringType != null && declaringType.IsAbstract) { invocationFlags |= INVOCATION_FLAGS.INVOCATION_FLAGS_NO_CTOR_INVOKE; } @@ -280,6 +286,21 @@ internal void ThrowNoInvokeException() // check basic method consistency. This call will throw if there are problems in the target/method relationship CheckConsistency(obj); + if ((invocationFlags & INVOCATION_FLAGS.INVOCATION_FLAGS_RUN_CLASS_CONSTRUCTOR) != 0) + { + // Run the class constructor through the class constructor mechanism instead of the Invoke path. + // This avoids allowing mutation of readonly static fields, and initializes the type correctly. + + var declaringType = DeclaringType; + + if (declaringType != null) + RuntimeHelpers.RunClassConstructor(declaringType.TypeHandle); + else + RuntimeHelpers.RunModuleConstructor(Module.ModuleHandle); + + return null; + } + Signature sig = Signature; // get the signature diff --git a/src/coreclr/src/System.Private.CoreLib/src/System/Runtime/CompilerServices/RuntimeHelpers.CoreCLR.cs b/src/coreclr/src/System.Private.CoreLib/src/System/Runtime/CompilerServices/RuntimeHelpers.CoreCLR.cs index 5c9a38e2a6f7..6b6e4639bfec 100644 --- a/src/coreclr/src/System.Private.CoreLib/src/System/Runtime/CompilerServices/RuntimeHelpers.CoreCLR.cs +++ b/src/coreclr/src/System.Private.CoreLib/src/System/Runtime/CompilerServices/RuntimeHelpers.CoreCLR.cs @@ -286,9 +286,6 @@ public static IntPtr AllocateTypeAssociatedMemory(Type type, int size) [MethodImpl(MethodImplOptions.InternalCall)] private static extern IntPtr AllocTailCallArgBuffer(int size, IntPtr gcDesc); - [MethodImpl(MethodImplOptions.InternalCall)] - private static extern void FreeTailCallArgBuffer(); - [MethodImpl(MethodImplOptions.InternalCall)] private static unsafe extern TailCallTls* GetTailCallInfo(IntPtr retAddrSlot, IntPtr* retAddr); @@ -323,6 +320,12 @@ private static unsafe void DispatchTailCalls( finally { tls->Frame = prevFrame; + + // If the arg buffer is reporting inst argument, it is safe to abandon it now + if (tls->ArgBuffer != IntPtr.Zero && *(int*)tls->ArgBuffer == 1 /* TAILCALLARGBUFFER_INSTARG_ONLY */) + { + *(int*)tls->ArgBuffer = 2 /* TAILCALLARGBUFFER_ABANDONED */; + } } } @@ -481,9 +484,6 @@ internal unsafe struct TailCallTls { public PortableTailCallFrame* Frame; public IntPtr ArgBuffer; - private IntPtr _argBufferSize; - private IntPtr _argBufferGCDesc; - private fixed byte _argBufferInline[64]; } } diff --git a/src/coreclr/src/System.Private.CoreLib/src/System/Runtime/InteropServices/ComEventsHelper.cs b/src/coreclr/src/System.Private.CoreLib/src/System/Runtime/InteropServices/ComEventsHelper.cs index 368d942f403a..93fd276b9d26 100644 --- a/src/coreclr/src/System.Private.CoreLib/src/System/Runtime/InteropServices/ComEventsHelper.cs +++ b/src/coreclr/src/System.Private.CoreLib/src/System/Runtime/InteropServices/ComEventsHelper.cs @@ -81,12 +81,15 @@ // means that the problem is already quite complex and we should not be dealing with it - see // ComEventsMethod.Invoke +using System.Runtime.Versioning; + namespace System.Runtime.InteropServices { /// /// The static methods provided in ComEventsHelper allow using .NET delegates to subscribe to events /// raised COM objects. /// + [SupportedOSPlatform("windows")] public static class ComEventsHelper { /// diff --git a/src/coreclr/src/System.Private.CoreLib/src/System/Runtime/InteropServices/ComWrappers.cs b/src/coreclr/src/System.Private.CoreLib/src/System/Runtime/InteropServices/ComWrappers.cs index 1b0c1a717487..1e377a9978f4 100644 --- a/src/coreclr/src/System.Private.CoreLib/src/System/Runtime/InteropServices/ComWrappers.cs +++ b/src/coreclr/src/System.Private.CoreLib/src/System/Runtime/InteropServices/ComWrappers.cs @@ -5,6 +5,7 @@ using System.Collections; using System.Threading; using System.Runtime.CompilerServices; +using System.Runtime.Versioning; using Internal.Runtime.CompilerServices; namespace System.Runtime.InteropServices @@ -68,6 +69,7 @@ internal enum ComWrappersScenario /// /// Class for managing wrappers of COM IUnknown types. /// + [SupportedOSPlatform("windows")] [CLSCompliant(false)] public abstract partial class ComWrappers { diff --git a/src/coreclr/src/System.Private.CoreLib/src/System/Runtime/InteropServices/Marshal.CoreCLR.cs b/src/coreclr/src/System.Private.CoreLib/src/System/Runtime/InteropServices/Marshal.CoreCLR.cs index 33d43881adb3..e81335a3d05c 100644 --- a/src/coreclr/src/System.Private.CoreLib/src/System/Runtime/InteropServices/Marshal.CoreCLR.cs +++ b/src/coreclr/src/System.Private.CoreLib/src/System/Runtime/InteropServices/Marshal.CoreCLR.cs @@ -5,6 +5,7 @@ using System.Reflection; using System.Runtime.CompilerServices; using System.Runtime.InteropServices.ComTypes; +using System.Runtime.Versioning; using System.StubHelpers; namespace System.Runtime.InteropServices @@ -25,7 +26,7 @@ public static partial class Marshal [MethodImpl(MethodImplOptions.InternalCall)] internal static extern int SizeOfHelper(Type t, bool throwIfNotMarshalable); - [UnconditionalSuppressMessage("ReflectionAnalysis", "IL2006:UnrecognizedReflectionPattern", + [UnconditionalSuppressMessage("ReflectionAnalysis", "IL2070:UnrecognizedReflectionPattern", Justification = "Trimming doesn't affect types eligible for marshalling. Different exception for invalid inputs doesn't matter.")] public static IntPtr OffsetOf(Type t, string fieldName) { @@ -306,6 +307,7 @@ public static IntPtr ReAllocHGlobal(IntPtr pv, IntPtr cb) /// /// Given a managed object that wraps an ITypeInfo, return its name. /// + [SupportedOSPlatform("windows")] public static string GetTypeInfoName(ITypeInfo typeInfo) { if (typeInfo is null) @@ -319,12 +321,14 @@ public static string GetTypeInfoName(ITypeInfo typeInfo) // This method is identical to Type.GetTypeFromCLSID. Since it's interop specific, we expose it // on Marshal for more consistent API surface. + [SupportedOSPlatform("windows")] public static Type? GetTypeFromCLSID(Guid clsid) => RuntimeType.GetTypeFromCLSIDImpl(clsid, null, throwOnError: false); /// /// Return the IUnknown* for an Object if the current context is the one /// where the RCW was first seen. Will return null otherwise. /// + [SupportedOSPlatform("windows")] public static IntPtr /* IUnknown* */ GetIUnknownForObject(object o) { if (o is null) @@ -348,6 +352,7 @@ public static string GetTypeInfoName(ITypeInfo typeInfo) /// /// Return the IDispatch* for an Object. /// + [SupportedOSPlatform("windows")] public static IntPtr /* IDispatch */ GetIDispatchForObject(object o) { if (o is null) @@ -365,6 +370,7 @@ public static string GetTypeInfoName(ITypeInfo typeInfo) /// Return the IUnknown* representing the interface for the Object. /// Object o should support Type T /// + [SupportedOSPlatform("windows")] public static IntPtr /* IUnknown* */ GetComInterfaceForObject(object o, Type T) { if (o is null) @@ -380,6 +386,7 @@ public static string GetTypeInfoName(ITypeInfo typeInfo) return GetComInterfaceForObjectNative(o, T, false, true); } + [SupportedOSPlatform("windows")] public static IntPtr GetComInterfaceForObject([DisallowNull] T o) => GetComInterfaceForObject(o!, typeof(TInterface)); /// @@ -387,6 +394,7 @@ public static string GetTypeInfoName(ITypeInfo typeInfo) /// Object o should support Type T, it refer the value of mode to /// invoke customized QueryInterface or not. /// + [SupportedOSPlatform("windows")] public static IntPtr /* IUnknown* */ GetComInterfaceForObject(object o, Type T, CustomQueryInterfaceMode mode) { if (o is null) @@ -409,6 +417,7 @@ public static string GetTypeInfoName(ITypeInfo typeInfo) /// /// Return the managed object representing the IUnknown* /// + [SupportedOSPlatform("windows")] public static object GetObjectForIUnknown(IntPtr /* IUnknown* */ pUnk) { if (pUnk == IntPtr.Zero) @@ -422,6 +431,7 @@ public static object GetObjectForIUnknown(IntPtr /* IUnknown* */ pUnk) [MethodImpl(MethodImplOptions.InternalCall)] private static extern object GetObjectForIUnknownNative(IntPtr /* IUnknown* */ pUnk); + [SupportedOSPlatform("windows")] public static object GetUniqueObjectForIUnknown(IntPtr unknown) { if (unknown == IntPtr.Zero) @@ -445,12 +455,15 @@ public static object GetUniqueObjectForIUnknown(IntPtr unknown) /// Return an Object for IUnknown, using the Type T. /// Type T should be either a COM imported Type or a sub-type of COM imported Type /// + [SupportedOSPlatform("windows")] [MethodImpl(MethodImplOptions.InternalCall)] public static extern object GetTypedObjectForIUnknown(IntPtr /* IUnknown* */ pUnk, Type t); + [SupportedOSPlatform("windows")] [MethodImpl(MethodImplOptions.InternalCall)] public static extern IntPtr CreateAggregatedObject(IntPtr pOuter, object o); + [SupportedOSPlatform("windows")] public static IntPtr CreateAggregatedObject(IntPtr pOuter, T o) where T : notnull { return CreateAggregatedObject(pOuter, (object)o); @@ -549,6 +562,7 @@ public static string PtrToStringBSTR(IntPtr ptr) /// Release the COM component and if the reference hits 0 zombie this object. /// Further usage of this Object might throw an exception /// + [SupportedOSPlatform("windows")] public static int ReleaseComObject(object o) { if (o is null) @@ -571,6 +585,7 @@ public static int ReleaseComObject(object o) /// Release the COM component and zombie this object. /// Further usage of this Object might throw an exception /// + [SupportedOSPlatform("windows")] public static int FinalReleaseComObject(object o) { if (o is null) @@ -589,6 +604,7 @@ public static int FinalReleaseComObject(object o) [MethodImpl(MethodImplOptions.InternalCall)] internal static extern void InternalFinalReleaseComObject(object o); + [SupportedOSPlatform("windows")] public static object? GetComObjectData(object obj, object key) { if (obj is null) @@ -614,6 +630,7 @@ public static int FinalReleaseComObject(object o) /// false if the data could not be added because there already was data for the /// specified key. /// + [SupportedOSPlatform("windows")] public static bool SetComObjectData(object obj, object key, object? data) { if (obj is null) @@ -637,6 +654,7 @@ public static bool SetComObjectData(object obj, object key, object? data) /// This method takes the given COM object and wraps it in an object /// of the specified type. The type must be derived from __ComObject. /// + [SupportedOSPlatform("windows")] [return: NotNullIfNotNull("o")] public static object? CreateWrapperOfType(object? o, Type t) { @@ -687,7 +705,8 @@ public static bool SetComObjectData(object obj, object key, object? data) return Wrapper; } - public static TWrapper CreateWrapperOfType([AllowNull] T o) + [SupportedOSPlatform("windows")] + public static TWrapper CreateWrapperOfType(T? o) { return (TWrapper)CreateWrapperOfType(o, typeof(TWrapper))!; } @@ -701,6 +720,7 @@ public static TWrapper CreateWrapperOfType([AllowNull] T o) [MethodImpl(MethodImplOptions.InternalCall)] public static extern bool IsTypeVisibleFromCom(Type t); + [SupportedOSPlatform("windows")] public static unsafe int QueryInterface(IntPtr pUnk, ref Guid iid, out IntPtr ppv) { if (pUnk == IntPtr.Zero) @@ -713,6 +733,7 @@ public static unsafe int QueryInterface(IntPtr pUnk, ref Guid iid, out IntPtr pp } } + [SupportedOSPlatform("windows")] public static unsafe int AddRef(IntPtr pUnk) { if (pUnk == IntPtr.Zero) @@ -721,6 +742,7 @@ public static unsafe int AddRef(IntPtr pUnk) return ((delegate * stdcall )(*(*(void***)pUnk + 1 /* IUnknown.AddRef slot */)))(pUnk); } + [SupportedOSPlatform("windows")] public static unsafe int Release(IntPtr pUnk) { if (pUnk == IntPtr.Zero) @@ -729,26 +751,31 @@ public static unsafe int Release(IntPtr pUnk) return ((delegate * stdcall )(*(*(void***)pUnk + 2 /* IUnknown.Release slot */)))(pUnk); } + [SupportedOSPlatform("windows")] [MethodImpl(MethodImplOptions.InternalCall)] public static extern void GetNativeVariantForObject(object? obj, /* VARIANT * */ IntPtr pDstNativeVariant); - public static void GetNativeVariantForObject([AllowNull] T obj, IntPtr pDstNativeVariant) + [SupportedOSPlatform("windows")] + public static void GetNativeVariantForObject(T? obj, IntPtr pDstNativeVariant) { GetNativeVariantForObject((object?)obj, pDstNativeVariant); } + [SupportedOSPlatform("windows")] [MethodImpl(MethodImplOptions.InternalCall)] public static extern object? GetObjectForNativeVariant(/* VARIANT * */ IntPtr pSrcNativeVariant); - [return: MaybeNull] - public static T GetObjectForNativeVariant(IntPtr pSrcNativeVariant) + [SupportedOSPlatform("windows")] + public static T? GetObjectForNativeVariant(IntPtr pSrcNativeVariant) { - return (T)GetObjectForNativeVariant(pSrcNativeVariant)!; + return (T?)GetObjectForNativeVariant(pSrcNativeVariant); } + [SupportedOSPlatform("windows")] [MethodImpl(MethodImplOptions.InternalCall)] public static extern object?[] GetObjectsForNativeVariants(/* VARIANT * */ IntPtr aSrcNativeVariant, int cVars); + [SupportedOSPlatform("windows")] public static T[] GetObjectsForNativeVariants(IntPtr aSrcNativeVariant, int cVars) { object?[] objects = GetObjectsForNativeVariants(aSrcNativeVariant, cVars); @@ -763,15 +790,18 @@ public static T[] GetObjectsForNativeVariants(IntPtr aSrcNativeVariant, int c /// Returns the first valid COM slot that GetMethodInfoForSlot will work on /// This will be 3 for IUnknown based interfaces and 7 for IDispatch based interfaces. /// + [SupportedOSPlatform("windows")] [MethodImpl(MethodImplOptions.InternalCall)] public static extern int GetStartComSlot(Type t); /// /// Returns the last valid COM slot that GetMethodInfoForSlot will work on. /// + [SupportedOSPlatform("windows")] [MethodImpl(MethodImplOptions.InternalCall)] public static extern int GetEndComSlot(Type t); + [SupportedOSPlatform("windows")] public static object BindToMoniker(string monikerName) { CreateBindCtx(0, out IBindCtx bindctx); @@ -791,6 +821,7 @@ public static object BindToMoniker(string monikerName) [DllImport(Interop.Libraries.Ole32, PreserveSig = false)] private static extern void BindMoniker(IMoniker pmk, uint grfOpt, ref Guid iidResult, [MarshalAs(UnmanagedType.Interface)] out object ppvResult); + [SupportedOSPlatform("windows")] [MethodImpl(MethodImplOptions.InternalCall)] public static extern void ChangeWrapperHandleStrength(object otp, bool fIsWeak); #endif // FEATURE_COMINTEROP diff --git a/src/coreclr/src/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/X86Base.CoreCLR.cs b/src/coreclr/src/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/X86Base.CoreCLR.cs new file mode 100644 index 000000000000..e523487f0d6b --- /dev/null +++ b/src/coreclr/src/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/X86Base.CoreCLR.cs @@ -0,0 +1,14 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; + +namespace System.Runtime.Intrinsics.X86 +{ + public abstract partial class X86Base + { + [DllImport(RuntimeHelpers.QCall)] + private static extern unsafe void __cpuidex(int* cpuInfo, int functionId, int subFunctionId); + } +} diff --git a/src/coreclr/src/System.Private.CoreLib/src/System/Runtime/Versioning/CompatibilitySwitch.cs b/src/coreclr/src/System.Private.CoreLib/src/System/Runtime/Versioning/CompatibilitySwitch.cs index 3b69f789e61c..d90f81d48e98 100644 --- a/src/coreclr/src/System.Private.CoreLib/src/System/Runtime/Versioning/CompatibilitySwitch.cs +++ b/src/coreclr/src/System.Private.CoreLib/src/System/Runtime/Versioning/CompatibilitySwitch.cs @@ -7,24 +7,7 @@ namespace System.Runtime.Versioning { internal static class CompatibilitySwitch { - /* This class contains 3 sets of api: - * 1. internal apis : These apis are supposed to be used by mscorlib.dll and other assemblies which use the section in config - * These apis query for the value of quirk not only in windows quirk DB but also in runtime section of config files, - * registry and environment vars. - * 2. public apis : These apis are supposed to be used by FX assemblies which do not read the runtime section of config files and have - * have their own section in config files or do not use configs at all. - * - * 3. specialized apis: These apis are defined in order to retrieve a specific value defined in CLR Config. That value can have specific look-up rules - * for the order and location of the config sources used. - * - * These apis are for internal use only for FX assemblies. It has not been decided if they can be used by OOB components due to EULA restrictions - */ - internal static string? GetValueInternal(string compatibilitySwitchName) - { - return GetValueInternalCall(compatibilitySwitchName, false); - } - [MethodImpl(MethodImplOptions.InternalCall)] - private static extern string? GetValueInternalCall(string compatibilitySwitchName, bool onlyDB); + internal static extern string? GetValueInternal(string compatibilitySwitchName); } } diff --git a/src/coreclr/src/System.Private.CoreLib/src/System/RuntimeType.CoreCLR.cs b/src/coreclr/src/System.Private.CoreLib/src/System/RuntimeType.CoreCLR.cs index a883bad112b3..f0d6247ea6bf 100644 --- a/src/coreclr/src/System.Private.CoreLib/src/System/RuntimeType.CoreCLR.cs +++ b/src/coreclr/src/System.Private.CoreLib/src/System/RuntimeType.CoreCLR.cs @@ -234,6 +234,48 @@ internal MemberInfoCache(RuntimeTypeCache runtimeTypeCache) internal MethodBase AddMethod(RuntimeType declaringType, RuntimeMethodHandleInternal method, CacheType cacheType) { + // First, see if we've already cached an RuntimeMethodInfo or + // RuntimeConstructorInfo that corresponds to this member. Since another + // thread could be updating the backing store at the same time it's + // possible that the check below will result in a false negative. That's + // ok; we'll handle any concurrency issues in the later call to Insert. + + T?[]? allMembersLocal = m_allMembers; + if (allMembersLocal != null) + { + // if not a Method or a Constructor, fall through + if (cacheType == CacheType.Method) + { + foreach (T? candidate in allMembersLocal) + { + if (candidate is null) + { + break; // end of list; stop iteration and fall through to slower path + } + + if (candidate is RuntimeMethodInfo candidateRMI && candidateRMI.MethodHandle.Value == method.Value) + { + return candidateRMI; // match! + } + } + } + else if (cacheType == CacheType.Constructor) + { + foreach (T? candidate in allMembersLocal) + { + if (candidate is null) + { + break; // end of list; stop iteration and fall through to slower path + } + + if (candidate is RuntimeConstructorInfo candidateRCI && candidateRCI.MethodHandle.Value == method.Value) + { + return candidateRCI; // match! + } + } + } + } + T[] list = null!; MethodAttributes methodAttributes = RuntimeMethodHandle.GetAttributes(method); bool isPublic = (methodAttributes & MethodAttributes.MemberAccessMask) == MethodAttributes.Public; @@ -264,6 +306,29 @@ internal MethodBase AddMethod(RuntimeType declaringType, RuntimeMethodHandleInte internal FieldInfo AddField(RuntimeFieldHandleInternal field) { + // First, see if we've already cached an RtFieldInfo that corresponds + // to this field. Since another thread could be updating the backing + // store at the same time it's possible that the check below will + // result in a false negative. That's ok; we'll handle any concurrency + // issues in the later call to Insert. + + T?[]? allMembersLocal = m_allMembers; + if (allMembersLocal != null) + { + foreach (T? candidate in allMembersLocal) + { + if (candidate is null) + { + break; // end of list; stop iteration and fall through to slower path + } + + if (candidate is RtFieldInfo candidateRtFI && candidateRtFI.GetFieldHandle() == field.Value) + { + return candidateRtFI; // match! + } + } + } + // create the runtime field info FieldAttributes fieldAttributes = RuntimeFieldHandle.GetAttributes(field); bool isPublic = (fieldAttributes & FieldAttributes.FieldAccessMask) == FieldAttributes.Public; @@ -507,7 +572,7 @@ private void MergeWithGlobalList(T[] list) } Debug.Assert(cachedMembers![freeSlotIndex] == null); - cachedMembers[freeSlotIndex] = newMemberInfo; + Volatile.Write(ref cachedMembers[freeSlotIndex], newMemberInfo); // value may be read outside of lock freeSlotIndex++; } } @@ -3798,7 +3863,9 @@ private void CreateInstanceCheckThis() throw new NotSupportedException(SR.Acc_CreateVoid); } - [UnconditionalSuppressMessage("ReflectionAnalysis", "IL2006:UnrecognizedReflectionPattern", + [UnconditionalSuppressMessage("ReflectionAnalysis", "IL2082:UnrecognizedReflectionPattern", + Justification = "Implementation detail of Activator that linker intrinsically recognizes")] + [UnconditionalSuppressMessage("ReflectionAnalysis", "IL2085:UnrecognizedReflectionPattern", Justification = "Implementation detail of Activator that linker intrinsically recognizes")] internal object? CreateInstanceImpl( BindingFlags bindingAttr, Binder? binder, object?[]? args, CultureInfo? culture) diff --git a/src/coreclr/src/System.Private.CoreLib/src/System/ValueType.cs b/src/coreclr/src/System.Private.CoreLib/src/System/ValueType.cs index 646a4792e9ab..72ffecfe902d 100644 --- a/src/coreclr/src/System.Private.CoreLib/src/System/ValueType.cs +++ b/src/coreclr/src/System.Private.CoreLib/src/System/ValueType.cs @@ -20,7 +20,7 @@ namespace System [System.Runtime.CompilerServices.TypeForwardedFrom("mscorlib, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089")] public abstract class ValueType { - [UnconditionalSuppressMessage("ReflectionAnalysis", "IL2006:UnrecognizedReflectionPattern", + [UnconditionalSuppressMessage("ReflectionAnalysis", "IL2075:UnrecognizedReflectionPattern", Justification = "Trimmed fields don't make a difference for equality")] public override bool Equals(object? obj) { diff --git a/src/coreclr/src/ToolBox/SOS/DacTableGen/CMakeLists.txt b/src/coreclr/src/ToolBox/SOS/DacTableGen/CMakeLists.txt index e3fc3ed9e3a5..98cc178b8a30 100644 --- a/src/coreclr/src/ToolBox/SOS/DacTableGen/CMakeLists.txt +++ b/src/coreclr/src/ToolBox/SOS/DacTableGen/CMakeLists.txt @@ -1,3 +1,4 @@ +cmake_minimum_required(VERSION 3.8) # Quick note: The CMake C# support is using the CSC bundled with the MSBuild that the native build runs on, not the one supplied by the local .NET SDK. project(DacTableGen LANGUAGES CSharp) diff --git a/src/coreclr/src/ToolBox/superpmi/superpmi-shared/compileresult.cpp b/src/coreclr/src/ToolBox/superpmi/superpmi-shared/compileresult.cpp index e27491bbd5ee..086d4c446f2e 100644 --- a/src/coreclr/src/ToolBox/superpmi/superpmi-shared/compileresult.cpp +++ b/src/coreclr/src/ToolBox/superpmi/superpmi-shared/compileresult.cpp @@ -796,6 +796,16 @@ void CompileResult::applyRelocs(unsigned char* block1, ULONG blocksize1, void* o size_t address = section_begin + (size_t)fixupLocation - (size_t)originalAddr; if ((section_begin <= address) && (address < section_end)) // A reloc for our section? { +#if defined(TARGET_AMD64) + // During an actual compile, recordRelocation() will be called before the compile + // is actually finished, and it will write the relative offset into the fixupLocation. + // Then, emitEndCodeGen() will patch forward jumps by subtracting any adjustment due + // to overestimation of instruction sizes. Because we're applying the relocs after the + // compile has finished, we need to reverse that: i.e. add in the (negative) adjustment + // that's now in the fixupLocation. + INT32 adjustment = *(INT32*)address; + delta += adjustment; +#endif LogDebug(" fixupLoc-%016llX (@%p) : %08X => %08X", fixupLocation, address, *(DWORD*)address, delta); *(DWORD*)address = (DWORD)delta; @@ -885,9 +895,17 @@ void* CompileResult::repAddressMap(void* replayAddress) { if (AddressMap == nullptr) return nullptr; - Agnostic_AddressMap value; - value = AddressMap->Get((DWORDLONG)replayAddress); - return (void*)value.Address; + + int index = AddressMap->GetIndex((DWORDLONG)replayAddress); + + if (index != -1) + { + Agnostic_AddressMap value; + value = AddressMap->Get((DWORDLONG)replayAddress); + return (void*)value.Address; + } + + return nullptr; } void* CompileResult::searchAddressMap(void* newAddress) { @@ -952,39 +970,6 @@ void CompileResult::dmpAllocUnwindInfo(DWORD key, const Agnostic_AllocUnwindInfo value.pUnwindBlock_index, value.funcKind); } -void CompileResult::recAllocMethodBlockCounts(ULONG count, ICorJitInfo::BlockCounts** pBlockCounts, HRESULT result) -{ - if (AllocMethodBlockCounts == nullptr) - AllocMethodBlockCounts = new LightWeightMap(); - - Agnostic_AllocMethodBlockCounts value; - - value.count = (DWORD)count; - value.result = (DWORD)result; - value.pBlockCounts_index = - AllocMethodBlockCounts->AddBuffer((unsigned char*)*pBlockCounts, count * sizeof(ICorJitInfo::BlockCounts)); - - AllocMethodBlockCounts->Add((DWORD)0, value); -} -void CompileResult::dmpAllocMethodBlockCounts(DWORD key, const Agnostic_AllocMethodBlockCounts& value) -{ - printf("AllocMethodBlockCounts key %u, value cnt-%u ind-%u res-%08X", key, value.count, value.pBlockCounts_index, - value.result); -} -HRESULT CompileResult::repAllocMethodBlockCounts(ULONG count, ICorJitInfo::BlockCounts** pBlockCounts) -{ - Agnostic_AllocMethodBlockCounts value; - value = AllocMethodBlockCounts->Get((DWORD)0); - - if (count != value.count) - __debugbreak(); - - HRESULT result = (HRESULT)value.result; - *pBlockCounts = (ICorJitInfo::BlockCounts*)AllocMethodBlockCounts->GetBuffer(value.pBlockCounts_index); - recAddressMap((void*)0x4242, (void*)*pBlockCounts, count * (sizeof(ICorJitInfo::BlockCounts))); - return result; -} - void CompileResult::recRecordCallSite(ULONG instrOffset, CORINFO_SIG_INFO* callSig, CORINFO_METHOD_HANDLE methodHandle) { repRecordCallSite(instrOffset, callSig, methodHandle); diff --git a/src/coreclr/src/ToolBox/superpmi/superpmi-shared/compileresult.h b/src/coreclr/src/ToolBox/superpmi/superpmi-shared/compileresult.h index 09ec839c4b52..125d9feb9250 100644 --- a/src/coreclr/src/ToolBox/superpmi/superpmi-shared/compileresult.h +++ b/src/coreclr/src/ToolBox/superpmi/superpmi-shared/compileresult.h @@ -166,12 +166,6 @@ class CompileResult DWORD HandlerLength; DWORD ClassToken; // one view of symetric union }; - struct Agnostic_AllocMethodBlockCounts - { - DWORD count; - DWORD pBlockCounts_index; - DWORD result; - }; struct Agnostic_CORINFO_SIG_INFO2 { DWORD callConv; @@ -328,10 +322,6 @@ class CompileResult CorJitFuncKind funcKind); void dmpAllocUnwindInfo(DWORD key, const Agnostic_AllocUnwindInfo& value); - void recAllocMethodBlockCounts(ULONG count, ICorJitInfo::BlockCounts** pBlockCounts, HRESULT result); - void dmpAllocMethodBlockCounts(DWORD key, const Agnostic_AllocMethodBlockCounts& value); - HRESULT repAllocMethodBlockCounts(ULONG count, ICorJitInfo::BlockCounts** pBlockCounts); - void recRecordCallSite(ULONG instrOffset, CORINFO_SIG_INFO* callSig, CORINFO_METHOD_HANDLE methodHandle); void dmpRecordCallSite(DWORD key, const Agnostic_RecordCallSite& value); void repRecordCallSite(ULONG instrOffset, CORINFO_SIG_INFO* callSig, CORINFO_METHOD_HANDLE methodHandle); diff --git a/src/coreclr/src/ToolBox/superpmi/superpmi-shared/crlwmlist.h b/src/coreclr/src/ToolBox/superpmi/superpmi-shared/crlwmlist.h index b42077de7abe..12b473a10a64 100644 --- a/src/coreclr/src/ToolBox/superpmi/superpmi-shared/crlwmlist.h +++ b/src/coreclr/src/ToolBox/superpmi/superpmi-shared/crlwmlist.h @@ -19,7 +19,6 @@ #endif LWM(AddressMap, DWORDLONG, CompileResult::Agnostic_AddressMap) -LWM(AllocMethodBlockCounts, DWORD, CompileResult::Agnostic_AllocMethodBlockCounts) LWM(AllocGCInfo, DWORD, CompileResult::Agnostic_AllocGCInfo) LWM(AllocMem, DWORD, CompileResult::Agnostic_AllocMemDetails) DENSELWM(AllocUnwindInfo, CompileResult::Agnostic_AllocUnwindInfo) diff --git a/src/coreclr/src/ToolBox/superpmi/superpmi-shared/lightweightmap.h b/src/coreclr/src/ToolBox/superpmi/superpmi-shared/lightweightmap.h index 2a5594f81243..adce2c12d259 100644 --- a/src/coreclr/src/ToolBox/superpmi/superpmi-shared/lightweightmap.h +++ b/src/coreclr/src/ToolBox/superpmi/superpmi-shared/lightweightmap.h @@ -78,6 +78,33 @@ class LightWeightMapBuffer return newOffset + sizeof(unsigned int); } + const unsigned char* CreateBuffer(unsigned int len) + { + if (len == 0) + { + return nullptr; + } + + if (locked) + { + LogError("Added item that extended the buffer after it was locked by a call to GetBuffer()"); + __debugbreak(); + } + + unsigned int newbuffsize = bufferLength + sizeof(unsigned int) + len; + unsigned char* newbuffer = new unsigned char[newbuffsize]; + unsigned int newOffset = bufferLength; + if (bufferLength > 0) + memcpy(newbuffer, buffer, bufferLength); + memset(newbuffer + bufferLength + sizeof(unsigned int), 0, len); + *((unsigned int*)(newbuffer + bufferLength)) = len; + bufferLength += sizeof(unsigned int) + len; + if (buffer != nullptr) + delete[] buffer; + buffer = newbuffer; + return buffer + newOffset + sizeof(unsigned int); + } + unsigned char* GetBuffer(unsigned int offset) { if (offset == (unsigned int)-1) diff --git a/src/coreclr/src/ToolBox/superpmi/superpmi-shared/lwmlist.h b/src/coreclr/src/ToolBox/superpmi/superpmi-shared/lwmlist.h index 1922e7d0f5df..b9731198754a 100644 --- a/src/coreclr/src/ToolBox/superpmi/superpmi-shared/lwmlist.h +++ b/src/coreclr/src/ToolBox/superpmi/superpmi-shared/lwmlist.h @@ -18,6 +18,7 @@ #define DENSELWM(map, value) LWM(map, this_is_an_error, value) #endif +LWM(AllocMethodBlockCounts, DWORD, Agnostic_AllocMethodBlockCounts) LWM(AppendClassName, Agnostic_AppendClassName, DWORD) LWM(AreTypesEquivalent, DLDL, DWORD) LWM(AsCorInfoType, DWORDLONG, DWORD) diff --git a/src/coreclr/src/ToolBox/superpmi/superpmi-shared/methodcontext.cpp b/src/coreclr/src/ToolBox/superpmi/superpmi-shared/methodcontext.cpp index d45c5e07942d..4a134fe42f7a 100644 --- a/src/coreclr/src/ToolBox/superpmi/superpmi-shared/methodcontext.cpp +++ b/src/coreclr/src/ToolBox/superpmi/superpmi-shared/methodcontext.cpp @@ -5133,6 +5133,50 @@ DWORD MethodContext::repGetFieldThreadLocalStoreID(CORINFO_FIELD_HANDLE field, v return (DWORD)value.B; } + +void MethodContext::recAllocMethodBlockCounts(ULONG count, ICorJitInfo::BlockCounts** pBlockCounts, HRESULT result) +{ + if (AllocMethodBlockCounts == nullptr) + AllocMethodBlockCounts = new LightWeightMap(); + + Agnostic_AllocMethodBlockCounts value; + + value.address = (DWORDLONG)*pBlockCounts; + value.count = (DWORD)count; + value.result = (DWORD)result; + + AllocMethodBlockCounts->Add((DWORD)0, value); +} +void MethodContext::dmpAllocMethodBlockCounts(DWORD key, const Agnostic_AllocMethodBlockCounts& value) +{ + printf("AllocMethodBlockCounts key %u, value addr-%016llX cnt-%u res-%08X", key, value.address, value.count, value.result); +} +HRESULT MethodContext::repAllocMethodBlockCounts(ULONG count, ICorJitInfo::BlockCounts** pBlockCounts) +{ + Agnostic_AllocMethodBlockCounts value; + value = AllocMethodBlockCounts->Get((DWORD)0); + + if (count != value.count) + { + LogWarning("AllocMethodBlockCount mismatch: record %d, replay %d", value.count, count); + } + + HRESULT result = (HRESULT)value.result; + + // Allocate a scratch buffer, linked to method context via AllocMethodBlockCounts, so it gets + // cleaned up when the method context does. + // + // We won't bother recording this via AddBuffer because currently SPMI will never look at it. + // But we need a writeable buffer because the jit will store IL offsets inside. + // + // Todo, perhaps: record the buffer as a compile result instead, and defer copying until + // jit completion so we can snapshot the offsets the jit writes. + // + *pBlockCounts = (ICorJitInfo::BlockCounts*)AllocMethodBlockCounts->CreateBuffer(count * sizeof(ICorJitInfo::BlockCounts)); + cr->recAddressMap((void*)value.address, (void*)*pBlockCounts, count * (sizeof(ICorJitInfo::BlockCounts))); + return result; +} + void MethodContext::recGetMethodBlockCounts(CORINFO_METHOD_HANDLE ftnHnd, UINT32 * pCount, ICorJitInfo::BlockCounts** pBlockCounts, diff --git a/src/coreclr/src/ToolBox/superpmi/superpmi-shared/methodcontext.h b/src/coreclr/src/ToolBox/superpmi/superpmi-shared/methodcontext.h index 36c0fbb478bb..00618039c9ba 100644 --- a/src/coreclr/src/ToolBox/superpmi/superpmi-shared/methodcontext.h +++ b/src/coreclr/src/ToolBox/superpmi/superpmi-shared/methodcontext.h @@ -422,6 +422,12 @@ class MethodContext DWORDLONG method; DWORDLONG delegateCls; }; + struct Agnostic_AllocMethodBlockCounts + { + DWORDLONG address; + DWORD count; + DWORD result; + }; struct Agnostic_GetMethodBlockCounts { DWORD count; @@ -1168,6 +1174,10 @@ class MethodContext void dmpGetFieldThreadLocalStoreID(DWORDLONG key, DLD value); DWORD repGetFieldThreadLocalStoreID(CORINFO_FIELD_HANDLE field, void** ppIndirection); + void recAllocMethodBlockCounts(ULONG count, ICorJitInfo::BlockCounts** pBlockCounts, HRESULT result); + void dmpAllocMethodBlockCounts(DWORD key, const Agnostic_AllocMethodBlockCounts& value); + HRESULT repAllocMethodBlockCounts(ULONG count, ICorJitInfo::BlockCounts** pBlockCounts); + void recGetMethodBlockCounts(CORINFO_METHOD_HANDLE ftnHnd, UINT32 * pCount, ICorJitInfo::BlockCounts** pBlockCounts, @@ -1338,6 +1348,7 @@ class MethodContext // ************************************************************************************* enum mcPackets { + Packet_AllocMethodBlockCounts = 131, Packet_AppendClassName = 149, // Added 8/6/2014 - needed for SIMD Packet_AreTypesEquivalent = 1, Packet_AsCorInfoType = 2, @@ -1493,7 +1504,6 @@ enum mcPackets Packet_ShouldEnforceCallvirtRestriction = 112, // Retired 2/18/2020 PacketCR_AddressMap = 113, - PacketCR_AllocMethodBlockCounts = 131, PacketCR_AllocGCInfo = 114, PacketCR_AllocMem = 115, PacketCR_AllocUnwindInfo = 132, diff --git a/src/coreclr/src/ToolBox/superpmi/superpmi-shim-collector/icorjitinfo.cpp b/src/coreclr/src/ToolBox/superpmi/superpmi-shim-collector/icorjitinfo.cpp index f5b69299113e..bb1bba81f01d 100644 --- a/src/coreclr/src/ToolBox/superpmi/superpmi-shim-collector/icorjitinfo.cpp +++ b/src/coreclr/src/ToolBox/superpmi/superpmi-shim-collector/icorjitinfo.cpp @@ -2030,7 +2030,7 @@ HRESULT interceptor_ICJI::allocMethodBlockCounts(UINT32 count, // The n { mc->cr->AddCall("allocMethodBlockCounts"); HRESULT result = original_ICorJitInfo->allocMethodBlockCounts(count, pBlockCounts); - mc->cr->recAllocMethodBlockCounts(count, pBlockCounts, result); + mc->recAllocMethodBlockCounts(count, pBlockCounts, result); return result; } diff --git a/src/coreclr/src/ToolBox/superpmi/superpmi/icorjitinfo.cpp b/src/coreclr/src/ToolBox/superpmi/superpmi/icorjitinfo.cpp index f0e2114d992c..6b8f4540a522 100644 --- a/src/coreclr/src/ToolBox/superpmi/superpmi/icorjitinfo.cpp +++ b/src/coreclr/src/ToolBox/superpmi/superpmi/icorjitinfo.cpp @@ -1784,7 +1784,7 @@ HRESULT MyICJI::allocMethodBlockCounts(UINT32 count, // The number of b BlockCounts** pBlockCounts) { jitInstance->mc->cr->AddCall("allocMethodBlockCounts"); - return jitInstance->mc->cr->repAllocMethodBlockCounts(count, pBlockCounts); + return jitInstance->mc->repAllocMethodBlockCounts(count, pBlockCounts); } // get profile information to be used for optimizing the current method. The format diff --git a/src/coreclr/src/binder/CMakeLists.txt b/src/coreclr/src/binder/CMakeLists.txt index 9c242ed1518d..208f1214dd0d 100644 --- a/src/coreclr/src/binder/CMakeLists.txt +++ b/src/coreclr/src/binder/CMakeLists.txt @@ -82,11 +82,13 @@ endif(CLR_CMAKE_TARGET_WIN32) convert_to_absolute_path(BINDER_SOURCES ${BINDER_SOURCES}) convert_to_absolute_path(BINDER_CROSSGEN_SOURCES ${BINDER_CROSSGEN_SOURCES}) -add_library_clr(v3binder +add_library_clr(v3binder_obj OBJECT ${BINDER_SOURCES} ) -add_dependencies(v3binder eventing_headers) +add_dependencies(v3binder_obj eventing_headers) +add_library(v3binder INTERFACE) +target_sources(v3binder INTERFACE $) add_library_clr(v3binder_crossgen STATIC diff --git a/src/coreclr/src/binder/assemblybinder.cpp b/src/coreclr/src/binder/assemblybinder.cpp index 5f13adc7b2c8..02a3646dab4a 100644 --- a/src/coreclr/src/binder/assemblybinder.cpp +++ b/src/coreclr/src/binder/assemblybinder.cpp @@ -447,25 +447,25 @@ namespace BINDER_SPACE // Satellite assembly's path: // * Absolute path when looking for a file on disk // * Bundle-relative path when looking within the single-file bundle. - StackSString sMscorlibSatellite; + StackSString sCoreLibSatellite; BinderTracing::PathSource pathSource = BinderTracing::PathSource::Bundle; BundleFileLocation bundleFileLocation = Bundle::ProbeAppBundle(relativePath, /*pathIsBundleRelative */ true); if (!bundleFileLocation.IsValid()) { - sMscorlibSatellite.Set(systemDirectory); + sCoreLibSatellite.Set(systemDirectory); pathSource = BinderTracing::PathSource::ApplicationAssemblies; } - CombinePath(sMscorlibSatellite, relativePath, sMscorlibSatellite); + CombinePath(sCoreLibSatellite, relativePath, sCoreLibSatellite); ReleaseHolder pSystemAssembly; - IF_FAIL_GO(AssemblyBinder::GetAssembly(sMscorlibSatellite, + IF_FAIL_GO(AssemblyBinder::GetAssembly(sCoreLibSatellite, TRUE /* fIsInGAC */, FALSE /* fExplicitBindToNativeImage */, &pSystemAssembly, NULL /* szMDAssemblyPath */, bundleFileLocation)); - BinderTracing::PathProbed(sMscorlibSatellite, pathSource, hr); + BinderTracing::PathProbed(sCoreLibSatellite, pathSource, hr); *ppSystemAssembly = pSystemAssembly.Extract(); diff --git a/src/coreclr/src/binder/assemblyname.cpp b/src/coreclr/src/binder/assemblyname.cpp index b073dc66f88b..71e70283c0b3 100644 --- a/src/coreclr/src/binder/assemblyname.cpp +++ b/src/coreclr/src/binder/assemblyname.cpp @@ -315,10 +315,10 @@ namespace BINDER_SPACE return ulRef; } - BOOL AssemblyName::IsMscorlib() + BOOL AssemblyName::IsCoreLib() { // TODO: Is this simple comparison enough? - return EqualsCaseInsensitive(GetSimpleName(), g_BinderVariables->mscorlib); + return EqualsCaseInsensitive(GetSimpleName(), g_BinderVariables->corelib); } ULONG AssemblyName::Hash(DWORD dwIncludeFlags) diff --git a/src/coreclr/src/binder/bindertracing.cpp b/src/coreclr/src/binder/bindertracing.cpp index 2e5c83d7b09b..29ba819ee6c9 100644 --- a/src/coreclr/src/binder/bindertracing.cpp +++ b/src/coreclr/src/binder/bindertracing.cpp @@ -216,14 +216,14 @@ namespace BinderTracing AssemblyBindOperation::~AssemblyBindOperation() { - if (!BinderTracing::IsEnabled() || ShouldIgnoreBind()) - return; - - // Make sure the bind request is populated. Tracing may have been enabled mid-bind. - if (!m_populatedBindRequest) - PopulateBindRequest(m_bindRequest); + if (BinderTracing::IsEnabled() && !ShouldIgnoreBind()) + { + // Make sure the bind request is populated. Tracing may have been enabled mid-bind. + if (!m_populatedBindRequest) + PopulateBindRequest(m_bindRequest); - FireAssemblyLoadStop(m_bindRequest, m_resultAssembly, m_cached); + FireAssemblyLoadStop(m_bindRequest, m_resultAssembly, m_cached); + } if (m_resultAssembly != nullptr) m_resultAssembly->Release(); @@ -246,7 +246,7 @@ namespace BinderTracing // ActivityTracker or EventSource may have triggered the system satellite load. // Don't track system satellite binding to avoid potential infinite recursion. - m_ignoreBind = m_bindRequest.AssemblySpec->IsMscorlibSatellite(); + m_ignoreBind = m_bindRequest.AssemblySpec->IsCoreLibSatellite(); m_checkedIgnoreBind = true; return m_ignoreBind; } diff --git a/src/coreclr/src/binder/clrprivbinderassemblyloadcontext.cpp b/src/coreclr/src/binder/clrprivbinderassemblyloadcontext.cpp index f492adfab28b..69d8d8337289 100644 --- a/src/coreclr/src/binder/clrprivbinderassemblyloadcontext.cpp +++ b/src/coreclr/src/binder/clrprivbinderassemblyloadcontext.cpp @@ -20,8 +20,8 @@ HRESULT CLRPrivBinderAssemblyLoadContext::BindAssemblyByNameWorker(BINDER_SPACE: HRESULT hr = S_OK; #ifdef _DEBUG - // MSCORLIB should be bound using BindToSystem - _ASSERTE(!pAssemblyName->IsMscorlib()); + // CoreLib should be bound using BindToSystem + _ASSERTE(!pAssemblyName->IsCoreLib()); #endif // Do we have the assembly already loaded in the context of the current binder? @@ -145,7 +145,7 @@ HRESULT CLRPrivBinderAssemblyLoadContext::BindUsingPEImage( /* in */ PEImage *pP // Disallow attempt to bind to the core library. Aside from that, // the LoadContext can load any assembly (even if it was in a different LoadContext like TPA). - if (pAssemblyName->IsMscorlib()) + if (pAssemblyName->IsCoreLib()) { IF_FAIL_GO(HRESULT_FROM_WIN32(ERROR_FILE_NOT_FOUND)); } diff --git a/src/coreclr/src/binder/clrprivbindercoreclr.cpp b/src/coreclr/src/binder/clrprivbindercoreclr.cpp index 1045cb93e76d..292ad99fd855 100644 --- a/src/coreclr/src/binder/clrprivbindercoreclr.cpp +++ b/src/coreclr/src/binder/clrprivbindercoreclr.cpp @@ -20,8 +20,8 @@ HRESULT CLRPrivBinderCoreCLR::BindAssemblyByNameWorker(BINDER_SPACE::AssemblyNam HRESULT hr = S_OK; #ifdef _DEBUG - // MSCORLIB should be bound using BindToSystem - _ASSERTE(!pAssemblyName->IsMscorlib()); + // CoreLib should be bound using BindToSystem + _ASSERTE(!pAssemblyName->IsCoreLib()); #endif hr = AssemblyBinder::BindAssembly(&m_appContext, @@ -155,8 +155,8 @@ HRESULT CLRPrivBinderCoreCLR::BindUsingPEImage( /* in */ PEImage *pPEImage, IF_FAIL_GO(HRESULT_FROM_WIN32(ERROR_BAD_FORMAT)); } - // Easy out for mscorlib - if (pAssemblyName->IsMscorlib()) + // Easy out for CoreLib + if (pAssemblyName->IsCoreLib()) { IF_FAIL_GO(HRESULT_FROM_WIN32(ERROR_FILE_NOT_FOUND)); } diff --git a/src/coreclr/src/binder/inc/assemblyname.hpp b/src/coreclr/src/binder/inc/assemblyname.hpp index 38590e09f78a..fb66d9830e0a 100644 --- a/src/coreclr/src/binder/inc/assemblyname.hpp +++ b/src/coreclr/src/binder/inc/assemblyname.hpp @@ -65,7 +65,7 @@ namespace BINDER_SPACE inline void SetHave(DWORD dwIdentityFlags); - BOOL IsMscorlib(); + BOOL IsCoreLib(); ULONG Hash(/* in */ DWORD dwIncludeFlags); BOOL Equals(/* in */ AssemblyName *pAssemblyName, diff --git a/src/coreclr/src/binder/inc/variables.hpp b/src/coreclr/src/binder/inc/variables.hpp index 50f392d0366e..d060a691bbec 100644 --- a/src/coreclr/src/binder/inc/variables.hpp +++ b/src/coreclr/src/binder/inc/variables.hpp @@ -31,7 +31,7 @@ namespace BINDER_SPACE // AssemblyName string constants SString cultureNeutral; - SString mscorlib; + SString corelib; }; extern Variables *g_BinderVariables; diff --git a/src/coreclr/src/binder/variables.cpp b/src/coreclr/src/binder/variables.cpp index d9c3f672657d..fbdd106b4dd2 100644 --- a/src/coreclr/src/binder/variables.cpp +++ b/src/coreclr/src/binder/variables.cpp @@ -40,7 +40,7 @@ namespace BINDER_SPACE // AssemblyName string constants cultureNeutral.SetLiteral(W("neutral")); - mscorlib.SetLiteral(CoreLibName_W); + corelib.SetLiteral(CoreLibName_W); } EX_CATCH_HRESULT(hr); diff --git a/src/coreclr/src/classlibnative/bcltype/CMakeLists.txt b/src/coreclr/src/classlibnative/bcltype/CMakeLists.txt index c3122ec12ec3..fdcf344c16ac 100644 --- a/src/coreclr/src/classlibnative/bcltype/CMakeLists.txt +++ b/src/coreclr/src/classlibnative/bcltype/CMakeLists.txt @@ -10,9 +10,11 @@ set(BCLTYPE_SOURCES variant.cpp ) -add_library_clr(bcltype +add_library_clr(bcltype_obj OBJECT ${BCLTYPE_SOURCES} ) -add_dependencies(bcltype eventing_headers) +add_dependencies(bcltype_obj eventing_headers) +add_library(bcltype INTERFACE) +target_sources(bcltype INTERFACE $) diff --git a/src/coreclr/src/classlibnative/bcltype/system.cpp b/src/coreclr/src/classlibnative/bcltype/system.cpp index c037236f6959..74623125fdf4 100644 --- a/src/coreclr/src/classlibnative/bcltype/system.cpp +++ b/src/coreclr/src/classlibnative/bcltype/system.cpp @@ -46,7 +46,7 @@ void WINAPI InitializeGetSystemTimeAsFileTime(LPFILETIME lpSystemTimeAsFileTime) { // GetSystemTimePreciseAsFileTime exists and we'd like to use it. However, on // misconfigured systems, it's possible for the "precise" time to be inaccurate: - // https://github.com/dotnet/coreclr/issues/14187 + // https://github.com/dotnet/runtime/issues/9014 // If it's inaccurate, though, we expect it to be wildly inaccurate, so as a // workaround/heuristic, we get both the "normal" and "precise" times, and as // long as they're close, we use the precise one. This workaround can be removed @@ -607,9 +607,17 @@ BOOL QCALLTYPE SystemNative::WinRTSupported() #endif // FEATURE_COMINTEROP +#if defined(TARGET_X86) || defined(TARGET_AMD64) +void QCALLTYPE SystemNative::X86BaseCpuId(int cpuInfo[4], int functionId, int subFunctionId) +{ + QCALL_CONTRACT; + BEGIN_QCALL; + __cpuidex(cpuInfo, functionId, subFunctionId); + END_QCALL; +} - +#endif // defined(TARGET_X86) || defined(TARGET_AMD64) diff --git a/src/coreclr/src/classlibnative/bcltype/system.h b/src/coreclr/src/classlibnative/bcltype/system.h index 20d357c17302..ff6720f0a8c0 100644 --- a/src/coreclr/src/classlibnative/bcltype/system.h +++ b/src/coreclr/src/classlibnative/bcltype/system.h @@ -81,6 +81,10 @@ class SystemNative // Return a method info for the method were the exception was thrown static FCDECL1(ReflectMethodObject*, GetMethodFromStackTrace, ArrayBase* pStackTraceUNSAFE); +#if defined(TARGET_X86) || defined(TARGET_AMD64) + static void QCALLTYPE X86BaseCpuId(int cpuInfo[4], int functionId, int subFunctionId); +#endif // defined(TARGET_X86) || defined(TARGET_AMD64) + private: // Common processing code for FailFast static void GenericFailFast(STRINGREF refMesgString, EXCEPTIONREF refExceptionForWatsonBucketing, UINT_PTR retAddress, UINT exitCode, STRINGREF errorSource); diff --git a/src/coreclr/src/classlibnative/bcltype/varargsnative.cpp b/src/coreclr/src/classlibnative/bcltype/varargsnative.cpp index fb2c1aefe3c7..3a9ed227cfdd 100644 --- a/src/coreclr/src/classlibnative/bcltype/varargsnative.cpp +++ b/src/coreclr/src/classlibnative/bcltype/varargsnative.cpp @@ -502,7 +502,7 @@ VarArgsNative::GetNextArgHelper( value->data = (BYTE*)origArgPtr + (sizeof(void*)-1); } #endif - value->type = MscorlibBinder::GetElementType(elemType); + value->type = CoreLibBinder::GetElementType(elemType); break; case ELEMENT_TYPE_I2: @@ -513,7 +513,7 @@ VarArgsNative::GetNextArgHelper( value->data = (BYTE*)origArgPtr + (sizeof(void*)-2); } #endif - value->type = MscorlibBinder::GetElementType(elemType); + value->type = CoreLibBinder::GetElementType(elemType); break; case ELEMENT_TYPE_I4: @@ -522,13 +522,13 @@ VarArgsNative::GetNextArgHelper( case ELEMENT_TYPE_STRING: case ELEMENT_TYPE_I: case ELEMENT_TYPE_U: - value->type = MscorlibBinder::GetElementType(elemType); + value->type = CoreLibBinder::GetElementType(elemType); break; case ELEMENT_TYPE_I8: case ELEMENT_TYPE_U8: case ELEMENT_TYPE_R8: - value->type = MscorlibBinder::GetElementType(elemType); + value->type = CoreLibBinder::GetElementType(elemType); #if !defined(HOST_64BIT) && (DATA_ALIGNMENT > 4) if ( fData && origArgPtr == value->data ) { // allocate an aligned copy of the value diff --git a/src/coreclr/src/classlibnative/float/CMakeLists.txt b/src/coreclr/src/classlibnative/float/CMakeLists.txt index 2345ad0b9135..b2c47ea39b65 100644 --- a/src/coreclr/src/classlibnative/float/CMakeLists.txt +++ b/src/coreclr/src/classlibnative/float/CMakeLists.txt @@ -7,9 +7,12 @@ set(FLOAT_SOURCES floatsingle.cpp ) -add_library_clr(comfloat_wks +add_library_clr(comfloat_wks_obj OBJECT ${FLOAT_SOURCES} ) -add_dependencies(comfloat_wks eventing_headers) +add_dependencies(comfloat_wks_obj eventing_headers) + +add_library(comfloat_wks INTERFACE) +target_sources(comfloat_wks INTERFACE $) \ No newline at end of file diff --git a/src/coreclr/src/classlibnative/float/floatdouble.cpp b/src/coreclr/src/classlibnative/float/floatdouble.cpp index d3ef36c66a61..a2a00b0628bd 100644 --- a/src/coreclr/src/classlibnative/float/floatdouble.cpp +++ b/src/coreclr/src/classlibnative/float/floatdouble.cpp @@ -125,7 +125,7 @@ FCIMPLEND #if defined(_MSC_VER) && defined(TARGET_AMD64) // The /fp:fast form of `ceil` for AMD64 does not correctly handle: `-1.0 < value <= -0.0` -// https://github.com/dotnet/coreclr/issues/19739 +// https://github.com/dotnet/runtime/issues/11003 #pragma float_control(push) #pragma float_control(precise, on) #endif @@ -172,7 +172,7 @@ FCIMPLEND #if defined(_MSC_VER) && defined(TARGET_X86) // The /fp:fast form of `floor` for x86 does not correctly handle: `-0.0` -// https://github.com/dotnet/coreclr/issues/19739 +// https://github.com/dotnet/runtime/issues/11003 #pragma float_control(push) #pragma float_control(precise, on) #endif diff --git a/src/coreclr/src/classlibnative/float/floatsingle.cpp b/src/coreclr/src/classlibnative/float/floatsingle.cpp index 781badfc1f8a..9972e17c6901 100644 --- a/src/coreclr/src/classlibnative/float/floatsingle.cpp +++ b/src/coreclr/src/classlibnative/float/floatsingle.cpp @@ -123,7 +123,7 @@ FCIMPLEND #if defined(_MSC_VER) && defined(TARGET_AMD64) // The /fp:fast form of `ceilf` for AMD64 does not correctly handle: `-1.0 < value <= -0.0` -// https://github.com/dotnet/coreclr/issues/19739 +// https://github.com/dotnet/runtime/issues/11003 #pragma float_control(push) #pragma float_control(precise, on) #endif diff --git a/src/coreclr/src/debug/createdump/CMakeLists.txt b/src/coreclr/src/debug/createdump/CMakeLists.txt index d5137b0cf5f1..80f9487dfc09 100644 --- a/src/coreclr/src/debug/createdump/CMakeLists.txt +++ b/src/coreclr/src/debug/createdump/CMakeLists.txt @@ -14,6 +14,7 @@ if(CLR_CMAKE_HOST_WIN32) set(CREATEDUMP_SOURCES main.cpp + dumpname.cpp createdumpwindows.cpp createdump.rc ) @@ -28,6 +29,7 @@ if(CLR_CMAKE_HOST_WIN32) advapi32.lib version.lib dbghelp.lib + ws2_32.lib ) else(CLR_CMAKE_HOST_WIN32) @@ -49,6 +51,7 @@ else(CLR_CMAKE_HOST_WIN32) set(CREATEDUMP_SOURCES main.cpp + dumpname.cpp createdumpunix.cpp crashinfo.cpp threadinfo.cpp diff --git a/src/coreclr/src/debug/createdump/createdump.h b/src/coreclr/src/debug/createdump/createdump.h index 234cf09033fb..41cb93414374 100644 --- a/src/coreclr/src/debug/createdump/createdump.h +++ b/src/coreclr/src/debug/createdump/createdump.h @@ -95,4 +95,6 @@ typedef int T_CONTEXT; #define MAX_LONGPATH 1024 #endif -bool CreateDump(const char* dumpPathTemplate, int pid, MINIDUMP_TYPE minidumpType); +bool FormatDumpName(std::string& name, const char* pattern, const char* exename, int pid); +bool CreateDump(const char* dumpPathTemplate, int pid, const char* dumpType, MINIDUMP_TYPE minidumpType); + diff --git a/src/coreclr/src/debug/createdump/createdumpunix.cpp b/src/coreclr/src/debug/createdump/createdumpunix.cpp index 156b58da6a46..6ddc21a75c2a 100644 --- a/src/coreclr/src/debug/createdump/createdumpunix.cpp +++ b/src/coreclr/src/debug/createdump/createdumpunix.cpp @@ -7,10 +7,11 @@ // The Linux/MacOS create dump code // bool -CreateDump(const char* dumpPath, int pid, MINIDUMP_TYPE minidumpType) +CreateDump(const char* dumpPathTemplate, int pid, const char* dumpType, MINIDUMP_TYPE minidumpType) { ReleaseHolder crashInfo = new CrashInfo(pid); DumpWriter dumpWriter(*crashInfo); + std::string dumpPath; bool result = false; // Initialize the crash info @@ -18,7 +19,7 @@ CreateDump(const char* dumpPath, int pid, MINIDUMP_TYPE minidumpType) { goto exit; } - printf("Process %d %s\n", crashInfo->Pid(), crashInfo->Name().c_str()); + printf("Gathering state for process %d %s\n", pid, crashInfo->Name().c_str()); // Suspend all the threads in the target process and build the list of threads if (!crashInfo->EnumerateAndSuspendThreads()) @@ -30,7 +31,15 @@ CreateDump(const char* dumpPath, int pid, MINIDUMP_TYPE minidumpType) { goto exit; } - if (!dumpWriter.OpenDump(dumpPath)) + // Format the dump pattern template now that the process name on MacOS has been obtained + if (!FormatDumpName(dumpPath, dumpPathTemplate, crashInfo->Name().c_str(), pid)) + { + goto exit; + } + printf("Writing %s to file %s\n", dumpType, dumpPath.c_str()); + + // Write the actual dump file + if (!dumpWriter.OpenDump(dumpPath.c_str())) { goto exit; } diff --git a/src/coreclr/src/debug/createdump/createdumpwindows.cpp b/src/coreclr/src/debug/createdump/createdumpwindows.cpp index 51c6dbe87a91..1eed949603ed 100644 --- a/src/coreclr/src/debug/createdump/createdumpwindows.cpp +++ b/src/coreclr/src/debug/createdump/createdumpwindows.cpp @@ -2,28 +2,42 @@ // The .NET Foundation licenses this file to you under the MIT license. #include "createdump.h" +#include "psapi.h" // // The Windows create dump code // bool -CreateDump(const char* dumpPath, int pid, MINIDUMP_TYPE minidumpType) +CreateDump(const char* dumpPathTemplate, int pid, const char* dumpType, MINIDUMP_TYPE minidumpType) { HANDLE hFile = INVALID_HANDLE_VALUE; HANDLE hProcess = NULL; bool result = false; + ArrayHolder pszName = new char[MAX_LONGPATH + 1]; + std::string dumpPath; + hProcess = OpenProcess(PROCESS_QUERY_INFORMATION | PROCESS_VM_READ, FALSE, pid); if (hProcess == NULL) { fprintf(stderr, "Invalid process id '%d' error %d\n", pid, GetLastError()); goto exit; } + if (GetModuleBaseNameA(hProcess, NULL, pszName, MAX_LONGPATH) <= 0) + { + fprintf(stderr, "Get process name FAILED %d\n", GetLastError()); + goto exit; + } + if (!FormatDumpName(dumpPath, dumpPathTemplate, pszName, pid)) + { + goto exit; + } + printf("Writing %s to file %s\n", dumpType, dumpPath.c_str()); - hFile = CreateFileA(dumpPath, GENERIC_READ | GENERIC_WRITE, 0, NULL, CREATE_ALWAYS, FILE_ATTRIBUTE_NORMAL, NULL); + hFile = CreateFileA(dumpPath.c_str(), GENERIC_READ | GENERIC_WRITE, 0, NULL, CREATE_ALWAYS, FILE_ATTRIBUTE_NORMAL, NULL); if (hFile == INVALID_HANDLE_VALUE) { - fprintf(stderr, "Invalid dump path '%s' error %d\n", dumpPath, GetLastError()); + fprintf(stderr, "Invalid dump path '%s' error %d\n", dumpPath.c_str(), GetLastError()); goto exit; } diff --git a/src/coreclr/src/debug/createdump/dumpname.cpp b/src/coreclr/src/debug/createdump/dumpname.cpp new file mode 100644 index 000000000000..751997c6755b --- /dev/null +++ b/src/coreclr/src/debug/createdump/dumpname.cpp @@ -0,0 +1,124 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#include "createdump.h" +#include +#ifdef HOST_WINDOWS +#include +#endif + +// +// Format the core dump name using a subset of the standard coredump pattern +// defined here: https://man7.org/linux/man-pages/man5/core.5.html. +// +// Supported: +// +// %% A single % character. +// %d PID of dumped process (for backwards createdump compatibility). +// %p PID of dumped process. +// %e The process executable filename. +// %h Hostname return by gethostname(). +// %t Time of dump, expressed as seconds since the Epoch, 1970-01-01 00:00:00 +0000 (UTC). +// +// Unsupported: +// +// %c Core file size soft resource limit of crashing process. +// %E Pathname of executable, with slashes ('/') replaced by exclamation marks ('!'). +// %g Numeric real GID of dumped process. +// %i TID of thread that triggered core dump, as seen in the PID namespace in which the thread resides. +// %I TID of thread that triggered core dump, as seen in the initial PID namespace. +// %P PID of dumped process, as seen in the initial PID namespace. +// %s Number of signal causing dump. +// %u Numeric real UID of dumped process. +// +bool +FormatDumpName(std::string& name, const char* pattern, const char* exename, int pid) +{ + const char* p = pattern; + if (*p == '|') + { + fprintf(stderr, "Pipe syntax in dump name not supported\n"); + return false; + } + +#ifdef HOST_WINDOWS + WSAData wsadata; + int wsaerr = WSAStartup(1, &wsadata); +#endif + + while (*p) + { + if (*p != '%') + { + name.append(1, *p); + } + else + { + switch (*++p) + { + case '\0': + return true; + + case '%': + name.append(1, '%'); + break; + + // process Id + case 'd': + case 'p': + name.append(std::to_string(pid)); + break; + + // time of dump + case 't': + time_t dumptime; + time(&dumptime); + name.append(std::to_string(dumptime)); + break; + + // hostname + case 'h': { + ArrayHolder buffer = new char[MAX_LONGPATH + 1]; + if (gethostname(buffer, MAX_LONGPATH) != 0) + { + fprintf(stderr, "Could not get the host name for dump name: %d\n", +#ifdef HOST_WINDOWS + WSAGetLastError()); +#else + errno); +#endif + return false; + } + name.append(buffer); + break; + } + + // executable file name + case 'e': + name.append(exename); + break; + + // executable file path with / replaced with ! + case 'E': + // signal number that caused the dump + case 's': + // gid + case 'g': + // coredump size limit + case 'c': + // the numeric real UID of dumped process + case 'u': + // thread id that triggered the dump + case 'i': + case 'I': + // pid of dumped process + case 'P': + default: + fprintf(stderr, "Invalid dump name format char '%c'\n", *p); + return false; + } + } + ++p; + } + return true; +} diff --git a/src/coreclr/src/debug/createdump/main.cpp b/src/coreclr/src/debug/createdump/main.cpp index 626175c2903c..cb4352cda83e 100644 --- a/src/coreclr/src/debug/createdump/main.cpp +++ b/src/coreclr/src/debug/createdump/main.cpp @@ -5,14 +5,18 @@ #ifdef HOST_WINDOWS #define DEFAULT_DUMP_PATH "%TEMP%\\" -#define DEFAULT_DUMP_TEMPLATE "dump.%d.dmp" +#define DEFAULT_DUMP_TEMPLATE "dump.%p.dmp" #else #define DEFAULT_DUMP_PATH "/tmp/" -#define DEFAULT_DUMP_TEMPLATE "coredump.%d" +#define DEFAULT_DUMP_TEMPLATE "coredump.%p" #endif const char* g_help = "createdump [options] pid\n" -"-f, --name - dump path and file name. The pid can be placed in the name with %d. The default is '" DEFAULT_DUMP_PATH DEFAULT_DUMP_TEMPLATE "'\n" +"-f, --name - dump path and file name. The default is '" DEFAULT_DUMP_PATH DEFAULT_DUMP_TEMPLATE "'. These specifiers are substituted with following values:\n" +" %p PID of dumped process.\n" +" %e The process executable filename.\n" +" %h Hostname return by gethostname().\n" +" %t Time of dump, expressed as seconds since the Epoch, 1970-01-01 00:00:00 +0000 (UTC).\n" "-n, --normal - create minidump.\n" "-h, --withheap - create minidump with heap (default).\n" "-t, --triage - create triage minidump.\n" @@ -21,8 +25,6 @@ const char* g_help = "createdump [options] pid\n" bool g_diagnostics = false; -bool CreateDump(const char* dumpPathTemplate, int pid, MINIDUMP_TYPE minidumpType); - // // Main entry point // @@ -35,10 +37,20 @@ int __cdecl main(const int argc, const char* argv[]) MiniDumpWithFullMemoryInfo | MiniDumpWithThreadInfo | MiniDumpWithTokenInformation); + const char* dumpType = "minidump with heap"; const char* dumpPathTemplate = nullptr; int exitCode = 0; int pid = 0; +#ifdef __APPLE__ + char* enabled = getenv("COMPlus_DbgEnableElfDumpOnMacOS"); + if (enabled == nullptr || strcmp(enabled, "1") != 0) + { + fprintf(stderr, "MachO coredumps are not supported. To enable ELF coredumps on MacOS, set the COMPlus_DbgEnableElfDumpOnMacOS environment variable to 1.\n"); + return -1; + } +#endif + #ifdef HOST_UNIX exitCode = PAL_InitializeDLL(); if (exitCode != 0) @@ -60,11 +72,13 @@ int __cdecl main(const int argc, const char* argv[]) } else if ((strcmp(*argv, "-n") == 0) || (strcmp(*argv, "--normal") == 0)) { + dumpType = "minidump"; minidumpType = (MINIDUMP_TYPE)(MiniDumpNormal | MiniDumpWithThreadInfo); } else if ((strcmp(*argv, "-h") == 0) || (strcmp(*argv, "--withheap") == 0)) { + dumpType = "minidump with heap"; minidumpType = (MINIDUMP_TYPE)(MiniDumpWithPrivateReadWriteMemory | MiniDumpWithDataSegs | MiniDumpWithHandleData | @@ -75,11 +89,13 @@ int __cdecl main(const int argc, const char* argv[]) } else if ((strcmp(*argv, "-t") == 0) || (strcmp(*argv, "--triage") == 0)) { + dumpType = "triage minidump"; minidumpType = (MINIDUMP_TYPE)(MiniDumpFilterTriage | MiniDumpWithThreadInfo); } else if ((strcmp(*argv, "-u") == 0) || (strcmp(*argv, "--full") == 0)) { + dumpType = "full dump"; minidumpType = (MINIDUMP_TYPE)(MiniDumpWithFullMemory | MiniDumpWithDataSegs | MiniDumpWithHandleData | @@ -102,7 +118,6 @@ int __cdecl main(const int argc, const char* argv[]) if (pid != 0) { ArrayHolder tmpPath = new char[MAX_LONGPATH]; - ArrayHolder dumpPath = new char[MAX_LONGPATH]; if (dumpPathTemplate == nullptr) { @@ -120,29 +135,7 @@ int __cdecl main(const int argc, const char* argv[]) dumpPathTemplate = tmpPath; } - snprintf(dumpPath, MAX_LONGPATH, dumpPathTemplate, pid); - - const char* dumpType = "minidump"; - switch (minidumpType) - { - case MiniDumpWithPrivateReadWriteMemory: - dumpType = "minidump with heap"; - break; - - case MiniDumpFilterTriage: - dumpType = "triage minidump"; - break; - - case MiniDumpWithFullMemory: - dumpType = "full dump"; - break; - - default: - break; - } - printf("Writing %s to file %s\n", dumpType, (char*)dumpPath); - - if (CreateDump(dumpPath, pid, minidumpType)) + if (CreateDump(dumpPathTemplate, pid, dumpType, minidumpType)) { printf("Dump successfully written\n"); } @@ -150,6 +143,7 @@ int __cdecl main(const int argc, const char* argv[]) { exitCode = -1; } + fflush(stdout); fflush(stderr); } diff --git a/src/coreclr/src/debug/daccess/dacdbiimpl.cpp b/src/coreclr/src/debug/daccess/dacdbiimpl.cpp index 4bcd6f2ff4cb..c0e0a23f53a3 100644 --- a/src/coreclr/src/debug/daccess/dacdbiimpl.cpp +++ b/src/coreclr/src/debug/daccess/dacdbiimpl.cpp @@ -2399,7 +2399,7 @@ TypeHandle DacDbiInterfaceImpl::FindLoadedElementType(CorElementType elementType // Lookup operations run the class loader in non-load mode. ENABLE_FORBID_GC_LOADER_USE_IN_THIS_SCOPE(); - MethodTable * pMethodTable = (&g_Mscorlib)->GetElementType(elementType); + MethodTable * pMethodTable = (&g_CoreLib)->GetElementType(elementType); return TypeHandle(pMethodTable); } // DacDbiInterfaceImpl::FindLoadedElementType @@ -4272,6 +4272,30 @@ void DacDbiInterfaceImpl::GetModuleSimpleName(VMPTR_Module vmModule, IStringHold IfFailThrow(pStrFilename->AssignCopy(convert.GetUnicode())); } +HRESULT DacDbiInterfaceImpl::IsModuleMapped(VMPTR_Module pModule, OUT BOOL *isModuleMapped) +{ + LOG((LF_CORDB, LL_INFO10000, "DDBII::IMM - TADDR 0x%x\n", pModule)); + DD_ENTER_MAY_THROW; + + HRESULT hr = S_FALSE; + PTR_Module pTargetModule = pModule.GetDacPtr(); + + EX_TRY + { + PTR_PEFile pPEFile = pTargetModule->GetFile(); + _ASSERTE(pPEFile != NULL); + + if (pPEFile->HasLoadedIL()) + { + *isModuleMapped = pPEFile->GetLoadedIL()->IsMapped(); + hr = S_OK; + } + } + EX_CATCH_HRESULT(hr); + + return hr; +} + // Helper to intialize a TargetBuffer from a MemoryRange // // Arguments: @@ -7240,7 +7264,7 @@ HRESULT DacDbiInterfaceImpl::GetArrayLayout(COR_TYPEID id, COR_ARRAY_LAYOUT *pLa if (mt->IsString()) { COR_TYPEID token; - token.token1 = MscorlibBinder::GetElementType(ELEMENT_TYPE_CHAR).GetAddr(); + token.token1 = CoreLibBinder::GetElementType(ELEMENT_TYPE_CHAR).GetAddr(); token.token2 = 0; pLayout->componentID = token; diff --git a/src/coreclr/src/debug/daccess/dacdbiimpl.h b/src/coreclr/src/debug/daccess/dacdbiimpl.h index 9178e7173626..219b51dc77af 100644 --- a/src/coreclr/src/debug/daccess/dacdbiimpl.h +++ b/src/coreclr/src/debug/daccess/dacdbiimpl.h @@ -363,6 +363,8 @@ class DacDbiInterfaceImpl : HRESULT GetLoaderHeapMemoryRanges(OUT DacDbiArrayList * pRanges); + HRESULT IsModuleMapped(VMPTR_Module pModule, OUT BOOL *isModuleMapped); + // retrieves the list of COM interfaces implemented by vmObject, as it is known at // the time of the call (the list may change as new interface types become available // in the runtime) diff --git a/src/coreclr/src/debug/daccess/enummem.cpp b/src/coreclr/src/debug/daccess/enummem.cpp index 95909b89e3f8..a746aa46b3d1 100644 --- a/src/coreclr/src/debug/daccess/enummem.cpp +++ b/src/coreclr/src/debug/daccess/enummem.cpp @@ -276,7 +276,7 @@ HRESULT ClrDataAccess::EnumMemCLRStatic(IN CLRDataEnumMemoryFlags flags) } CATCH_ALL_EXCEPT_RETHROW_COR_E_OPERATIONCANCELLED( g_pEEDbgInterfaceImpl.EnumMem(); ) CATCH_ALL_EXCEPT_RETHROW_COR_E_OPERATIONCANCELLED( g_CORDebuggerControlFlags.EnumMem(); ) - CATCH_ALL_EXCEPT_RETHROW_COR_E_OPERATIONCANCELLED( g_Mscorlib.EnumMem(); ) + CATCH_ALL_EXCEPT_RETHROW_COR_E_OPERATIONCANCELLED( g_CoreLib.EnumMem(); ) CATCH_ALL_EXCEPT_RETHROW_COR_E_OPERATIONCANCELLED( g_pPredefinedArrayTypes[ELEMENT_TYPE_OBJECT].EnumMemoryRegions(flags); ) CATCH_ALL_EXCEPT_RETHROW_COR_E_OPERATIONCANCELLED( StubManager::EnumMemoryRegions(flags); ) CATCH_ALL_EXCEPT_RETHROW_COR_E_OPERATIONCANCELLED( g_pFinalizerThread.EnumMem(); ) diff --git a/src/coreclr/src/debug/daccess/inspect.cpp b/src/coreclr/src/debug/daccess/inspect.cpp index 75cfcc3cadce..28375862bd38 100644 --- a/src/coreclr/src/debug/daccess/inspect.cpp +++ b/src/coreclr/src/debug/daccess/inspect.cpp @@ -136,7 +136,7 @@ GetTypeFieldValueFlags(TypeHandle typeHandle, // Perform extra checks to identify well-known classes. // - if ((&g_Mscorlib)->IsClass(typeHandle.GetMethodTable(), CLASS__STRING)) + if ((&g_CoreLib)->IsClass(typeHandle.GetMethodTable(), CLASS__STRING)) { otherFlags |= CLRDATA_VALUE_IS_STRING; } diff --git a/src/coreclr/src/debug/daccess/nidump.cpp b/src/coreclr/src/debug/daccess/nidump.cpp index 5a0bbfd81006..b385e278d698 100644 --- a/src/coreclr/src/debug/daccess/nidump.cpp +++ b/src/coreclr/src/debug/daccess/nidump.cpp @@ -477,7 +477,7 @@ NativeImageDumper::NativeImageDumper(PTR_VOID loadedBase, m_dis(dis), m_MetadataSize(0), m_ILHostCopy(NULL), - m_isMscorlibHardBound(false), + m_isCoreLibHardBound(false), m_sectionAlignment(0) { IfFailThrow(m_display->GetDumpOptions(&m_dumpOptions)); @@ -1132,54 +1132,54 @@ NativeImageDumper::DumpNativeImage() /* XXX Wed 12/14/2005 * Now for the real insanity. I need to initialize static classes in - * the DAC. First I need to find mscorlib's dependency entry. Search + * the DAC. First I need to find CoreLib's dependency entry. Search * through all of the dependencies to find the one marked as - * fIsMscorlib. If I don't find anything marked that way, then "self" - * is mscorlib. + * fIsCoreLib. If I don't find anything marked that way, then "self" + * is CoreLib. */ - Dependency * mscorlib = NULL; + Dependency * corelib = NULL; for( COUNT_T i = 0; i < m_numDependencies; ++i ) { - if( m_dependencies[i].fIsMscorlib ) + if( m_dependencies[i].fIsCoreLib ) { - mscorlib = &m_dependencies[i]; + corelib = &m_dependencies[i]; break; } } - //If we're actually dumping mscorlib, remap the mscorlib dependency to our own native image. - if( (mscorlib == NULL) || !wcscmp(m_name, CoreLibName_W)) + //If we're actually dumping CoreLib, remap the CoreLib dependency to our own native image. + if( (corelib == NULL) || !wcscmp(m_name, CoreLibName_W)) { - mscorlib = GetDependency(0); - mscorlib->fIsMscorlib = TRUE; - _ASSERTE(mscorlib->fIsHardbound); + corelib = GetDependency(0); + corelib->fIsCoreLib = TRUE; + _ASSERTE(corelib->fIsHardbound); } - _ASSERTE(mscorlib != NULL); - if( mscorlib->fIsHardbound ) + _ASSERTE(corelib != NULL); + if( corelib->fIsHardbound ) { - m_isMscorlibHardBound = true; + m_isCoreLibHardBound = true; } - if( m_isMscorlibHardBound ) + if( m_isCoreLibHardBound ) { //go through the module to the binder. - PTR_Module mscorlibModule = mscorlib->pModule; + PTR_Module corelibModule = corelib->pModule; - PTR_MscorlibBinder binder = mscorlibModule->m_pBinder; - g_Mscorlib = *binder; + PTR_CoreLibBinder binder = corelibModule->m_pBinder; + g_CoreLib = *binder; - PTR_MethodTable mt = MscorlibBinder::GetExistingClass(CLASS__OBJECT); + PTR_MethodTable mt = CoreLibBinder::GetExistingClass(CLASS__OBJECT); g_pObjectClass = mt; } if (g_pObjectClass == NULL) { - //if mscorlib is not hard bound, then warn the user (many features of nidump are shut off) - m_display->ErrorPrintF( "Assembly %S is soft bound to mscorlib. nidump cannot dump MethodTables completely.\n", m_name ); + //if CoreLib is not hard bound, then warn the user (many features of nidump are shut off) + m_display->ErrorPrintF( "Assembly %S is soft bound to CoreLib. nidump cannot dump MethodTables completely.\n", m_name ); // TritonTODO: reason? // reset "hard bound state" - m_isMscorlibHardBound = false; + m_isCoreLibHardBound = false; } } @@ -1267,8 +1267,8 @@ void NativeImageDumper::TraceDumpDependency(int idx, NativeImageDumper::Dependen m_display->ErrorPrintF("\tSize: %x (%d)\n", dependency->size, dependency->size); m_display->ErrorPrintF("\tModule: P=%p, L=%p\n", DataPtrToDisplay(dac_cast(dependency->pModule)), PTR_TO_TADDR(dependency->pModule)); - m_display->ErrorPrintF("Mscorlib=%s, Hardbound=%s\n", - (dependency->fIsMscorlib ? "true" : "false"), + m_display->ErrorPrintF("CoreLib=%s, Hardbound=%s\n", + (dependency->fIsCoreLib ? "true" : "false"), (dependency->fIsHardbound ? "true" : "false")); m_display->ErrorPrintF("Name: %S\n", dependency->name); } @@ -2391,7 +2391,7 @@ mdAssemblyRef NativeImageDumper::MapAssemblyRefToManifest(mdAssemblyRef token, I } else if (wcscmp(szAssemblyName, CoreLibName_W) == 0) { - // Mscorlib is special - version number and public key token are ignored. + // CoreLib is special - version number and public key token are ignored. ret = currentRef; break; } @@ -2400,7 +2400,7 @@ mdAssemblyRef NativeImageDumper::MapAssemblyRefToManifest(mdAssemblyRef token, I metadata.usBuildNumber == 255 && metadata.usRevisionNumber == 255) { - // WinMDs encode all assemblyrefs with version 255.255.255.255 including CLR assembly dependencies (mscorlib, System). + // WinMDs encode all assemblyrefs with version 255.255.255.255 including CLR assembly dependencies (corelib, System). ret = currentRef; } else @@ -2602,8 +2602,8 @@ NativeImageDumper::Dependency *NativeImageDumper::OpenDependency(int index) Dependency& dependency = m_dependencies[index]; AppendTokenName(entry->dwAssemblyRef, buf, m_manifestImport, true); bool isHardBound = !!(entry->signNativeImage != INVALID_NGEN_SIGNATURE); - SString mscorlibStr(SString::Literal, CoreLibName_W); - bool isMscorlib = (0 == buf.Compare( mscorlibStr )); + SString corelibStr(SString::Literal, CoreLibName_W); + bool isCoreLib = (0 == buf.Compare( corelibStr )); dependency.fIsHardbound = isHardBound; wcscpy_s(dependency.name, _countof(dependency.name), (const WCHAR*)buf); @@ -2703,7 +2703,7 @@ NativeImageDumper::Dependency *NativeImageDumper::OpenDependency(int index) ofRead, IID_IMetaDataImport2, (IUnknown **) &dependency.pImport)); - dependency.fIsMscorlib = isMscorlib; + dependency.fIsCoreLib = isCoreLib; } m_dependencies[index].entry = entry; @@ -3716,7 +3716,7 @@ void NativeImageDumper::DumpModule( PTR_Module module ) /* REVISIT_TODO Fri 10/14/2005 * Dump the binder */ - PTR_MscorlibBinder binder = module->m_pBinder; + PTR_CoreLibBinder binder = module->m_pBinder; if( NULL != binder ) { DisplayStartStructureWithOffset( m_pBinder, DPtrToPreferredAddr(binder), @@ -3726,38 +3726,38 @@ void NativeImageDumper::DumpModule( PTR_Module module ) //these four fields don't have anything useful in ngen images. DisplayWriteFieldPointer( m_classDescriptions, DPtrToPreferredAddr(binder->m_classDescriptions), - MscorlibBinder, MODULE ); + CoreLibBinder, MODULE ); DisplayWriteFieldPointer( m_methodDescriptions, DPtrToPreferredAddr(binder->m_methodDescriptions), - MscorlibBinder, MODULE ); + CoreLibBinder, MODULE ); DisplayWriteFieldPointer( m_fieldDescriptions, DPtrToPreferredAddr(binder->m_fieldDescriptions), - MscorlibBinder, MODULE ); + CoreLibBinder, MODULE ); DisplayWriteFieldPointer( m_pModule, DPtrToPreferredAddr(binder->m_pModule), - MscorlibBinder, MODULE ); + CoreLibBinder, MODULE ); - DisplayWriteFieldInt( m_cClasses, binder->m_cClasses, MscorlibBinder, + DisplayWriteFieldInt( m_cClasses, binder->m_cClasses, CoreLibBinder, MODULE ); DisplayWriteFieldAddress( m_pClasses, DPtrToPreferredAddr(binder->m_pClasses), sizeof(*binder->m_pClasses) * binder->m_cClasses, - MscorlibBinder, MODULE ); - DisplayWriteFieldInt( m_cFields, binder->m_cFields, MscorlibBinder, + CoreLibBinder, MODULE ); + DisplayWriteFieldInt( m_cFields, binder->m_cFields, CoreLibBinder, MODULE ); DisplayWriteFieldAddress( m_pFields, DPtrToPreferredAddr(binder->m_pFields), sizeof(*binder->m_pFields) * binder->m_cFields, - MscorlibBinder, MODULE ); - DisplayWriteFieldInt( m_cMethods, binder->m_cMethods, MscorlibBinder, + CoreLibBinder, MODULE ); + DisplayWriteFieldInt( m_cMethods, binder->m_cMethods, CoreLibBinder, MODULE ); DisplayWriteFieldAddress( m_pMethods, DPtrToPreferredAddr(binder->m_pMethods), sizeof(*binder->m_pMethods) * binder->m_cMethods, - MscorlibBinder, MODULE ); + CoreLibBinder, MODULE ); DisplayEndStructure( MODULE ); //m_pBinder } @@ -6766,11 +6766,11 @@ NativeImageDumper::DumpMethodTable( PTR_MethodTable mt, const char * name, MethodTableToString( mt, buf ); m_display->ErrorPrintF( "WARNING! MethodTable %S is generic but is not hard bound to its EEClass. Cannot compute generic dictionary sizes.\n", (const WCHAR *)buf ); } - else if( !m_isMscorlibHardBound ) + else if( !m_isCoreLibHardBound ) { /* REVISIT_TODO Mon 8/20/2007 - * If we're not hard bound to mscorlib, most things don't work. They depend on knowing what - * g_pObjectClass is. Without the hard binding to mscorlib, I can't figure that out. + * If we're not hard bound to CoreLib, most things don't work. They depend on knowing what + * g_pObjectClass is. Without the hard binding to CoreLib, I can't figure that out. */ haveCompleteExtents = false; } @@ -7996,7 +7996,7 @@ void NativeImageDumper::DumpMethodDesc( PTR_MethodDesc md, PTR_Module module ) InstantiatedMethodDesc, METHODDESCS ); #ifdef FEATURE_COMINTEROP - if (imd->IMD_HasComPlusCallInfo()) + if (imd->IsGenericComPlusCall()) { PTR_ComPlusCallInfo compluscall = imd->IMD_GetComPlusCallInfo(); DumpComPlusCallInfo( compluscall, METHODDESCS ); diff --git a/src/coreclr/src/debug/daccess/nidump.h b/src/coreclr/src/debug/daccess/nidump.h index b8c0a04ddb41..a6e9461a475a 100644 --- a/src/coreclr/src/debug/daccess/nidump.h +++ b/src/coreclr/src/debug/daccess/nidump.h @@ -305,7 +305,7 @@ class NativeImageDumper TADDR pMetadataStartTarget; TADDR pMetadataStartHost; SIZE_T MetadataSize; - bool fIsMscorlib; + bool fIsCoreLib; bool fIsHardbound; WCHAR name[128]; }; @@ -548,9 +548,9 @@ class NativeImageDumper COUNT_T m_ILSectionSize; #endif - //This is true if we are hard bound to mscorlib. This enables various forms of generics dumping and MT + //This is true if we are hard bound to corelib. This enables various forms of generics dumping and MT //dumping that require g_pObjectClass to be set. - bool m_isMscorlibHardBound; + bool m_isCoreLibHardBound; #if 0 PTR_CCOR_SIGNATURE metadataToHostDAC( PCCOR_SIGNATURE pSig, diff --git a/src/coreclr/src/debug/daccess/request.cpp b/src/coreclr/src/debug/daccess/request.cpp index 4c6671149bee..5047421d5959 100644 --- a/src/coreclr/src/debug/daccess/request.cpp +++ b/src/coreclr/src/debug/daccess/request.cpp @@ -1058,7 +1058,7 @@ HRESULT ClrDataAccess::GetMethodDescData( OBJECTREF value = pResolver->GetManagedResolver(); if (value) { - FieldDesc *pField = (&g_Mscorlib)->GetField(FIELD__DYNAMICRESOLVER__DYNAMIC_METHOD); + FieldDesc *pField = (&g_CoreLib)->GetField(FIELD__DYNAMICRESOLVER__DYNAMIC_METHOD); _ASSERTE(pField); value = pField->GetRefValue(value); if (value) diff --git a/src/coreclr/src/debug/daccess/stack.cpp b/src/coreclr/src/debug/daccess/stack.cpp index 09e16b4979ab..94ef49b972ad 100644 --- a/src/coreclr/src/debug/daccess/stack.cpp +++ b/src/coreclr/src/debug/daccess/stack.cpp @@ -1391,7 +1391,7 @@ ClrDataFrame::ValueFromDebugInfo(MetaSig* sig, // XXX Microsoft - Sometimes types can't be looked // up and this at least allows the value to be used, // but is it the right behavior? - argType = TypeHandle(MscorlibBinder::GetElementType(ELEMENT_TYPE_U8)); + argType = TypeHandle(CoreLibBinder::GetElementType(ELEMENT_TYPE_U8)); valueFlags = 0; } else diff --git a/src/coreclr/src/debug/dbgutil/machoreader.cpp b/src/coreclr/src/debug/dbgutil/machoreader.cpp index 155ec15a3af0..fa94b49b8d4a 100644 --- a/src/coreclr/src/debug/dbgutil/machoreader.cpp +++ b/src/coreclr/src/debug/dbgutil/machoreader.cpp @@ -401,6 +401,7 @@ MachOReader::ReadString(const char* address, std::string& str) char c = 0; if (!ReadMemory((void*)(address + i), &c, sizeof(char))) { + Trace("ERROR: Failed to read string at %p\n", (void*)(address + i)); return false; } if (c == '\0') diff --git a/src/coreclr/src/debug/debug-pal/CMakeLists.txt b/src/coreclr/src/debug/debug-pal/CMakeLists.txt index 12a0005c0532..213fa59e784c 100644 --- a/src/coreclr/src/debug/debug-pal/CMakeLists.txt +++ b/src/coreclr/src/debug/debug-pal/CMakeLists.txt @@ -34,4 +34,6 @@ if(CLR_CMAKE_HOST_UNIX) endif(CLR_CMAKE_HOST_UNIX) -_add_library(debug-pal OBJECT ${TWO_WAY_PIPE_SOURCES}) +_add_library(debug-pal_obj OBJECT ${TWO_WAY_PIPE_SOURCES}) +add_library(debug-pal INTERFACE) +target_sources(debug-pal INTERFACE $) diff --git a/src/coreclr/src/debug/debug-pal/unix/diagnosticsipc.cpp b/src/coreclr/src/debug/debug-pal/unix/diagnosticsipc.cpp index 632ac03a44c2..2dc6c8807a65 100644 --- a/src/coreclr/src/debug/debug-pal/unix/diagnosticsipc.cpp +++ b/src/coreclr/src/debug/debug-pal/unix/diagnosticsipc.cpp @@ -61,8 +61,8 @@ IpcStream::DiagnosticsIpc *IpcStream::DiagnosticsIpc::Create(const char *const p "socket"); } - if (mode == ConnectionMode::CLIENT) - return new IpcStream::DiagnosticsIpc(-1, &serverAddress, ConnectionMode::CLIENT); + if (mode == ConnectionMode::CONNECT) + return new IpcStream::DiagnosticsIpc(-1, &serverAddress, ConnectionMode::CONNECT); #ifdef __APPLE__ mode_t prev_mask = umask(~(S_IRUSR | S_IWUSR)); // This will set the default permission bit to 600 @@ -116,8 +116,8 @@ IpcStream::DiagnosticsIpc *IpcStream::DiagnosticsIpc::Create(const char *const p bool IpcStream::DiagnosticsIpc::Listen(ErrorCallback callback) { - _ASSERTE(mode == ConnectionMode::SERVER); - if (mode != ConnectionMode::SERVER) + _ASSERTE(mode == ConnectionMode::LISTEN); + if (mode != ConnectionMode::LISTEN) { if (callback != nullptr) callback("Cannot call Listen on a client connection", -1); @@ -150,7 +150,7 @@ bool IpcStream::DiagnosticsIpc::Listen(ErrorCallback callback) IpcStream *IpcStream::DiagnosticsIpc::Accept(ErrorCallback callback) { - _ASSERTE(mode == ConnectionMode::SERVER); + _ASSERTE(mode == ConnectionMode::LISTEN); _ASSERTE(_isListening); sockaddr_un from; @@ -168,7 +168,7 @@ IpcStream *IpcStream::DiagnosticsIpc::Accept(ErrorCallback callback) IpcStream *IpcStream::DiagnosticsIpc::Connect(ErrorCallback callback) { - _ASSERTE(mode == ConnectionMode::CLIENT); + _ASSERTE(mode == ConnectionMode::CONNECT); sockaddr_un clientAddress{}; clientAddress.sun_family = AF_UNIX; @@ -194,7 +194,7 @@ IpcStream *IpcStream::DiagnosticsIpc::Connect(ErrorCallback callback) return nullptr; } - return new IpcStream(clientSocket, ConnectionMode::CLIENT); + return new IpcStream(clientSocket, ConnectionMode::CONNECT); } int32_t IpcStream::DiagnosticsIpc::Poll(IpcPollHandle *rgIpcPollHandles, uint32_t nHandles, int32_t timeoutMs, ErrorCallback callback) @@ -208,7 +208,7 @@ int32_t IpcStream::DiagnosticsIpc::Poll(IpcPollHandle *rgIpcPollHandles, uint32_ if (rgIpcPollHandles[i].pIpc != nullptr) { // SERVER - _ASSERTE(rgIpcPollHandles[i].pIpc->mode == ConnectionMode::SERVER); + _ASSERTE(rgIpcPollHandles[i].pIpc->mode == ConnectionMode::LISTEN); fd = rgIpcPollHandles[i].pIpc->_serverSocket; } else @@ -246,6 +246,8 @@ int32_t IpcStream::DiagnosticsIpc::Poll(IpcPollHandle *rgIpcPollHandles, uint32_ { if (pollfds[i].revents != 0) { + if (callback != nullptr) + callback("IpcStream::DiagnosticsIpc::Poll - poll revents", (uint32_t)pollfds[i].revents); // error check FIRST if (pollfds[i].revents & POLLHUP) { @@ -253,21 +255,22 @@ int32_t IpcStream::DiagnosticsIpc::Poll(IpcPollHandle *rgIpcPollHandles, uint32_ // will technically meet the requirements for POLLIN // i.e., a call to recv/read won't block rgIpcPollHandles[i].revents = (uint8_t)PollEvents::HANGUP; - delete[] pollfds; - return -1; } else if ((pollfds[i].revents & (POLLERR|POLLNVAL))) { if (callback != nullptr) callback("Poll error", (uint32_t)pollfds[i].revents); rgIpcPollHandles[i].revents = (uint8_t)PollEvents::ERR; - delete[] pollfds; - return -1; } else if (pollfds[i].revents & (POLLIN|POLLPRI)) { rgIpcPollHandles[i].revents = (uint8_t)PollEvents::SIGNALED; - break; + } + else + { + rgIpcPollHandles[i].revents = (uint8_t)PollEvents::UNKNOWN; + if (callback != nullptr) + callback("unkown poll response", (uint32_t)pollfds[i].revents); } } } @@ -341,7 +344,7 @@ bool IpcStream::Read(void *lpBuffer, const uint32_t nBytesToRead, uint32_t &nByt pfd.fd = _clientSocket; pfd.events = POLLIN; int retval = poll(&pfd, 1, timeoutMs); - if (retval <= 0 || pfd.revents != POLLIN) + if (retval <= 0 || !(pfd.revents & POLLIN)) { // timeout or error return false; @@ -382,7 +385,7 @@ bool IpcStream::Write(const void *lpBuffer, const uint32_t nBytesToWrite, uint32 pfd.fd = _clientSocket; pfd.events = POLLOUT; int retval = poll(&pfd, 1, timeoutMs); - if (retval <= 0 || pfd.revents != POLLOUT) + if (retval <= 0 || !(pfd.revents & POLLOUT)) { // timeout or error return false; diff --git a/src/coreclr/src/debug/debug-pal/win/diagnosticsipc.cpp b/src/coreclr/src/debug/debug-pal/win/diagnosticsipc.cpp index 6c1b55e31118..9a28e482342f 100644 --- a/src/coreclr/src/debug/debug-pal/win/diagnosticsipc.cpp +++ b/src/coreclr/src/debug/debug-pal/win/diagnosticsipc.cpp @@ -56,8 +56,8 @@ IpcStream::DiagnosticsIpc *IpcStream::DiagnosticsIpc::Create(const char *const p bool IpcStream::DiagnosticsIpc::Listen(ErrorCallback callback) { - _ASSERTE(mode == ConnectionMode::SERVER); - if (mode != ConnectionMode::SERVER) + _ASSERTE(mode == ConnectionMode::LISTEN); + if (mode != ConnectionMode::LISTEN) { if (callback != nullptr) callback("Cannot call Listen on a client connection", -1); @@ -119,6 +119,7 @@ bool IpcStream::DiagnosticsIpc::Listen(ErrorCallback callback) _hPipe = INVALID_HANDLE_VALUE; ::CloseHandle(_oOverlap.hEvent); _oOverlap.hEvent = INVALID_HANDLE_VALUE; + memset(&_oOverlap, 0, sizeof(OVERLAPPED)); // clear the overlapped objects state return false; } } @@ -130,7 +131,7 @@ bool IpcStream::DiagnosticsIpc::Listen(ErrorCallback callback) IpcStream *IpcStream::DiagnosticsIpc::Accept(ErrorCallback callback) { _ASSERTE(_isListening); - _ASSERTE(mode == ConnectionMode::SERVER); + _ASSERTE(mode == ConnectionMode::LISTEN); DWORD dwDummy = 0; bool fSuccess = GetOverlappedResult( @@ -147,7 +148,7 @@ IpcStream *IpcStream::DiagnosticsIpc::Accept(ErrorCallback callback) } // create new IpcStream using handle and reset the Server object so it can listen again - IpcStream *pStream = new IpcStream(_hPipe, ConnectionMode::SERVER); + IpcStream *pStream = new IpcStream(_hPipe, ConnectionMode::LISTEN); // reset the server _hPipe = INVALID_HANDLE_VALUE; @@ -166,8 +167,8 @@ IpcStream *IpcStream::DiagnosticsIpc::Accept(ErrorCallback callback) IpcStream *IpcStream::DiagnosticsIpc::Connect(ErrorCallback callback) { - _ASSERTE(mode == ConnectionMode::CLIENT); - if (mode != ConnectionMode::CLIENT) + _ASSERTE(mode == ConnectionMode::CONNECT); + if (mode != ConnectionMode::CONNECT) { if (callback != nullptr) callback("Cannot call connect on a server connection", 0); @@ -193,27 +194,38 @@ IpcStream *IpcStream::DiagnosticsIpc::Connect(ErrorCallback callback) return new IpcStream(hPipe, mode); } -void IpcStream::DiagnosticsIpc::Close(bool isShutdown, ErrorCallback) +void IpcStream::DiagnosticsIpc::Close(bool isShutdown, ErrorCallback callback) { // don't attempt cleanup on shutdown and let the OS handle it if (isShutdown) + { + if (callback != nullptr) + callback("Closing without cleaning underlying handles", 100); return; + } if (_hPipe != INVALID_HANDLE_VALUE) { - if (mode == DiagnosticsIpc::ConnectionMode::SERVER) + if (mode == DiagnosticsIpc::ConnectionMode::LISTEN) { const BOOL fSuccessDisconnectNamedPipe = ::DisconnectNamedPipe(_hPipe); _ASSERTE(fSuccessDisconnectNamedPipe != 0); + if (fSuccessDisconnectNamedPipe != 0 && callback != nullptr) + callback("Failed to disconnect NamedPipe", ::GetLastError()); } const BOOL fSuccessCloseHandle = ::CloseHandle(_hPipe); _ASSERTE(fSuccessCloseHandle != 0); + if (fSuccessCloseHandle != 0 && callback != nullptr) + callback("Failed to close pipe handle", ::GetLastError()); } if (_oOverlap.hEvent != INVALID_HANDLE_VALUE) { - ::CloseHandle(_oOverlap.hEvent); + const BOOL fSuccessCloseEvent = ::CloseHandle(_oOverlap.hEvent); + _ASSERTE(fSuccessCloseEvent != 0); + if (fSuccessCloseEvent != 0 && callback != nullptr) + callback("Failed to close overlap event handle", ::GetLastError()); } } @@ -230,25 +242,32 @@ IpcStream::~IpcStream() Close(); } -void IpcStream::Close(ErrorCallback) +void IpcStream::Close(ErrorCallback callback) { if (_hPipe != INVALID_HANDLE_VALUE) { Flush(); - if (_mode == DiagnosticsIpc::ConnectionMode::SERVER) + if (_mode == DiagnosticsIpc::ConnectionMode::LISTEN) { const BOOL fSuccessDisconnectNamedPipe = ::DisconnectNamedPipe(_hPipe); _ASSERTE(fSuccessDisconnectNamedPipe != 0); + if (fSuccessDisconnectNamedPipe != 0 && callback != nullptr) + callback("Failed to disconnect NamedPipe", ::GetLastError()); } const BOOL fSuccessCloseHandle = ::CloseHandle(_hPipe); _ASSERTE(fSuccessCloseHandle != 0); + if (fSuccessCloseHandle != 0 && callback != nullptr) + callback("Failed to close pipe handle", ::GetLastError()); } if (_oOverlap.hEvent != INVALID_HANDLE_VALUE) { - ::CloseHandle(_oOverlap.hEvent); + const BOOL fSuccessCloseEvent = ::CloseHandle(_oOverlap.hEvent); + _ASSERTE(fSuccessCloseEvent != 0); + if (fSuccessCloseEvent != 0 && callback != nullptr) + callback("Failed to close overlapped event handle", ::GetLastError()); } } @@ -262,7 +281,7 @@ int32_t IpcStream::DiagnosticsIpc::Poll(IpcPollHandle *rgIpcPollHandles, uint32_ if (rgIpcPollHandles[i].pIpc != nullptr) { // SERVER - _ASSERTE(rgIpcPollHandles[i].pIpc->mode == DiagnosticsIpc::ConnectionMode::SERVER); + _ASSERTE(rgIpcPollHandles[i].pIpc->mode == DiagnosticsIpc::ConnectionMode::LISTEN); pHandles[i] = rgIpcPollHandles[i].pIpc->_oOverlap.hEvent; } else @@ -302,6 +321,11 @@ int32_t IpcStream::DiagnosticsIpc::Poll(IpcPollHandle *rgIpcPollHandles, uint32_ return -1; } } + else + { + // there's already data to be read + pHandles[i] = rgIpcPollHandles[i].pStream->_oOverlap.hEvent; + } } else { @@ -369,12 +393,12 @@ int32_t IpcStream::DiagnosticsIpc::Poll(IpcPollHandle *rgIpcPollHandles, uint32_ if (!fSuccess) { DWORD error = ::GetLastError(); - if (error == ERROR_PIPE_NOT_CONNECTED) + if (error == ERROR_PIPE_NOT_CONNECTED || error == ERROR_BROKEN_PIPE) rgIpcPollHandles[index].revents = (uint8_t)IpcStream::DiagnosticsIpc::PollEvents::HANGUP; else { if (callback != nullptr) - callback("Client connection error", -1); + callback("Client connection error", error); rgIpcPollHandles[index].revents = (uint8_t)IpcStream::DiagnosticsIpc::PollEvents::ERR; delete[] pHandles; return -1; @@ -410,39 +434,43 @@ bool IpcStream::Read(void *lpBuffer, const uint32_t nBytesToRead, uint32_t &nByt if (!fSuccess) { + // if we're waiting infinitely, only make one syscall if (timeoutMs == InfiniteTimeout) { - fSuccess = GetOverlappedResult(_hPipe, - overlap, - &nNumberOfBytesRead, - true) != 0; + fSuccess = GetOverlappedResult(_hPipe, // pipe + overlap, // overlapped + &nNumberOfBytesRead, // out actual number of bytes read + true) != 0; // block until async IO completes } else { DWORD dwError = GetLastError(); if (dwError == ERROR_IO_PENDING) { + // Wait on overlapped IO event (triggers when async IO is complete regardless of success) + // or timeout DWORD dwWait = WaitForSingleObject(_oOverlap.hEvent, (DWORD)timeoutMs); if (dwWait == WAIT_OBJECT_0) { - // get the result - fSuccess = GetOverlappedResult(_hPipe, - overlap, - &nNumberOfBytesRead, - true) != 0; + // async IO compelted, get the result + fSuccess = GetOverlappedResult(_hPipe, // pipe + overlap, // overlapped + &nNumberOfBytesRead, // out actual number of bytes read + true) != 0; // block until async IO completes } else { - // cancel IO and ensure the cancel happened - if (CancelIo(_hPipe)) + // We either timed out or something else went wrong. + // For any error, attempt to cancel IO and ensure the cancel happened + if (CancelIoEx(_hPipe, overlap) != 0) { // check if the async write beat the cancellation fSuccess = GetOverlappedResult(_hPipe, overlap, &nNumberOfBytesRead, true) != 0; + // Failure here isn't recoverable, so return as such } } } } - // TODO: Add error handling. } nBytesRead = static_cast(nNumberOfBytesRead); @@ -464,40 +492,43 @@ bool IpcStream::Write(const void *lpBuffer, const uint32_t nBytesToWrite, uint32 if (!fSuccess) { - DWORD dwError = GetLastError(); - if (dwError == ERROR_IO_PENDING) + // if we're waiting infinitely, only make one syscall + if (timeoutMs == InfiniteTimeout) { - if (timeoutMs == InfiniteTimeout) - { - // if we're waiting infinitely, don't bother with extra kernel call - fSuccess = GetOverlappedResult(_hPipe, - overlap, - &nNumberOfBytesWritten, - true) != 0; - } - else + fSuccess = GetOverlappedResult(_hPipe, // pipe + overlap, // overlapped + &nNumberOfBytesWritten, // out actual number of bytes written + true) != 0; // block until async IO completes + } + else + { + DWORD dwError = GetLastError(); + if (dwError == ERROR_IO_PENDING) { + // Wait on overlapped IO event (triggers when async IO is complete regardless of success) + // or timeout DWORD dwWait = WaitForSingleObject(_oOverlap.hEvent, (DWORD)timeoutMs); if (dwWait == WAIT_OBJECT_0) { - // get the result - fSuccess = GetOverlappedResult(_hPipe, - overlap, - &nNumberOfBytesWritten, - true) != 0; + // async IO compelted, get the result + fSuccess = GetOverlappedResult(_hPipe, // pipe + overlap, // overlapped + &nNumberOfBytesWritten, // out actual number of bytes written + true) != 0; // block until async IO completes } else { - // cancel IO and ensure the cancel happened - if (CancelIo(_hPipe)) + // We either timed out or something else went wrong. + // For any error, attempt to cancel IO and ensure the cancel happened + if (CancelIoEx(_hPipe, overlap) != 0) { // check if the async write beat the cancellation fSuccess = GetOverlappedResult(_hPipe, overlap, &nNumberOfBytesWritten, true) != 0; + // Failure here isn't recoverable, so return as such } } } } - // TODO: Add error handling. } nBytesWritten = static_cast(nNumberOfBytesWritten); diff --git a/src/coreclr/src/debug/di/module.cpp b/src/coreclr/src/debug/di/module.cpp index d02dfdd08146..9554f61dc860 100644 --- a/src/coreclr/src/debug/di/module.cpp +++ b/src/coreclr/src/debug/di/module.cpp @@ -1200,6 +1200,10 @@ HRESULT CordbModule::QueryInterface(REFIID id, void **pInterface) { *pInterface = static_cast(this); } + else if (id == IID_ICorDebugModule4) + { + *pInterface = static_cast(this); + } else if (id == IID_IUnknown) { *pInterface = static_cast(static_cast(this)); @@ -2752,6 +2756,24 @@ HRESULT CordbModule::GetJITCompilerFlags(DWORD *pdwFlags ) return hr; } +HRESULT CordbModule::IsMappedLayout(BOOL *isMapped) +{ + VALIDATE_POINTER_TO_OBJECT(isMapped, BOOL*); + FAIL_IF_NEUTERED(this); + + HRESULT hr = S_OK; + CordbProcess *pProcess = GetProcess(); + + ATT_REQUIRE_STOPPED_MAY_FAIL(pProcess); + PUBLIC_API_BEGIN(pProcess); + { + hr = pProcess->GetDAC()->IsModuleMapped(m_vmModule, isMapped); + } + PUBLIC_API_END(hr); + + return hr; +} + /* ------------------------------------------------------------------------- * * CordbCode class * ------------------------------------------------------------------------- */ diff --git a/src/coreclr/src/debug/di/rspriv.h b/src/coreclr/src/debug/di/rspriv.h index 1bb48df2356c..4dc93ebf7317 100644 --- a/src/coreclr/src/debug/di/rspriv.h +++ b/src/coreclr/src/debug/di/rspriv.h @@ -4139,7 +4139,8 @@ class CordbProcess : class CordbModule : public CordbBase, public ICorDebugModule, public ICorDebugModule2, - public ICorDebugModule3 + public ICorDebugModule3, + public ICorDebugModule4 { public: CordbModule(CordbProcess * process, @@ -4234,6 +4235,11 @@ class CordbModule : public CordbBase, COM_METHOD CreateReaderForInMemorySymbols(REFIID riid, void** ppObj); + //----------------------------------------------------------- + // ICorDebugModule4 + //----------------------------------------------------------- + COM_METHOD IsMappedLayout(BOOL *isMapped); + //----------------------------------------------------------- // Internal members //----------------------------------------------------------- diff --git a/src/coreclr/src/debug/di/rstype.cpp b/src/coreclr/src/debug/di/rstype.cpp index 36b137949664..a7412dea5713 100644 --- a/src/coreclr/src/debug/di/rstype.cpp +++ b/src/coreclr/src/debug/di/rstype.cpp @@ -683,7 +683,7 @@ HRESULT CordbType::GetType(CorElementType *pType) // Determining if something is a VC or not can involve asking the EE. // We could do it ourselves based on the metadata but it's non-trivial // determining if a class has System.ValueType as a parent (we have - // to find and OpenScope the mscorlib.dll which we don't currently do + // to find and OpenScope the System.Private.CoreLib.dll which we don't currently do // on the right-side). But the IsValueClass call can fail if the // class is not yet loaded on the right side. In that case we // ignore the failure and return ELEMENT_TYPE_CLASS diff --git a/src/coreclr/src/debug/di/shimremotedatatarget.cpp b/src/coreclr/src/debug/di/shimremotedatatarget.cpp index 38bf162e430f..9a01508e2115 100644 --- a/src/coreclr/src/debug/di/shimremotedatatarget.cpp +++ b/src/coreclr/src/debug/di/shimremotedatatarget.cpp @@ -68,7 +68,7 @@ class ShimRemoteDataTarget : public ShimDataTarget DbgTransportTarget * m_pProxy; DbgTransportSession * m_pTransport; #ifdef FEATURE_REMOTE_PROC_MEM - int m_fd; // /proc//mem handle + DWORD m_memoryHandle; // PAL_ReadProcessMemory handle or UINT32_MAX if fallback #endif }; @@ -106,9 +106,7 @@ ShimRemoteDataTarget::ShimRemoteDataTarget(DWORD processId, m_pContinueStatusChangedUserData = NULL; #ifdef FEATURE_REMOTE_PROC_MEM - char memPath[128]; - _snprintf_s(memPath, sizeof(memPath), sizeof(memPath), "/proc/%lu/mem", m_processId); - m_fd = _open(memPath, 0); // O_RDONLY + PAL_OpenProcessMemory(m_processId, &m_memoryHandle); #endif } @@ -135,11 +133,8 @@ ShimRemoteDataTarget::~ShimRemoteDataTarget() void ShimRemoteDataTarget::Dispose() { #ifdef FEATURE_REMOTE_PROC_MEM - if (m_fd != -1) - { - _close(m_fd); - m_fd = -1; - } + PAL_CloseProcessMemory(m_memoryHandle); + m_memoryHandle = UINT32_MAX; #endif if (m_pTransport != NULL) { @@ -269,10 +264,9 @@ ShimRemoteDataTarget::ReadVirtual( HRESULT hr = S_OK; #ifdef FEATURE_REMOTE_PROC_MEM - if (m_fd != -1) + if (m_memoryHandle != UINT32_MAX) { - read = _pread(m_fd, pBuffer, cbRequestSize, (ULONG64)address); - if (read == (size_t)-1) + if (!PAL_ReadProcessMemory(m_memoryHandle, (ULONG64)address, pBuffer, cbRequestSize, &read)) { hr = E_FAIL; } diff --git a/src/coreclr/src/debug/ee/arm64/arm64walker.cpp b/src/coreclr/src/debug/ee/arm64/arm64walker.cpp index d46b07958b72..ae6e8c1fc293 100644 --- a/src/coreclr/src/debug/ee/arm64/arm64walker.cpp +++ b/src/coreclr/src/debug/ee/arm64/arm64walker.cpp @@ -102,6 +102,7 @@ BYTE* NativeWalker::SetupOrSimulateInstructionForPatchSkip(T_CONTEXT * context, /* + Based off ARM DDI 0487F.c (C.4.1) Modify the patchBypass if the opcode is IP-relative, otherwise return it The following are the instructions that are IP-relative : . ADR and ADRP. @@ -113,9 +114,8 @@ BYTE* NativeWalker::SetupOrSimulateInstructionForPatchSkip(T_CONTEXT * context, _ASSERTE((UINT_PTR)address == context->Pc); - if ((opcode & 0x1F000000) == 0x10000000) //ADR & ADRP + if ((opcode & 0x1F000000) == 0x10000000) //ADR & ADRP (PC-Relative) { - TADDR immhigh = ((opcode >> 5) & 0x007FFFF) << 2; TADDR immlow = (opcode & 0x60000000) >> 29; offset = immhigh | immlow; //ADR @@ -136,13 +136,10 @@ BYTE* NativeWalker::SetupOrSimulateInstructionForPatchSkip(T_CONTEXT * context, { LOG((LF_CORDB, LL_INFO100000, "Arm64Walker::Simulate opcode: %x to ADR X%d %p\n", opcode, RegNum, offset)); } - - } else if ((opcode & 0x3B000000) == 0x18000000) //LDR Literal (General or SIMD) - { - + { offset = Expand19bitoffset(opcode); RegNum = (opcode & 0x1F); LOG((LF_CORDB, LL_INFO100000, "Arm64Walker::Simulate opcode: %x to LDR[SW] | PRFM X%d %p\n", opcode, RegNum, offset)); @@ -187,81 +184,55 @@ BYTE* NativeWalker::SetupOrSimulateInstructionForPatchSkip(T_CONTEXT * context, { CORDbgSetInstruction((CORDB_ADDRESS_TYPE *)patchBypass, 0xd503201f); //Add Nop in buffer - PCODE RegContents; if ((opcode & 0x3B000000) == 0x18000000) //LDR Literal { - RegContents = (PCODE)GetMem(ip); - if ((opcode & 0x4000000)) //LDR literal for SIMD + bool isSimd = ((opcode & 0x4000000) != 0); //LDR literal for SIMD + NEON128 SimdRegContents = { 0 }; + short opc = (opcode >> 30); + + LOG((LF_CORDB, LL_INFO100000, "Arm64Walker::Simulate opcode: %x (opc: %x, isSimd: %x)\n", opcode, opc, isSimd)); + + switch (opc) { - NEON128 SimdRegContents; - LOG((LF_CORDB, LL_INFO100000, "Arm64Walker::Simulate opcode: %x to LDR V%d %p\n", opcode, RegNum, offset)); - short opc = (opcode >> 30); - switch (opc) - { - case 0: //4byte data into St - SimdRegContents.Low = 0xFFFFFFFF & RegContents; //zero the upper 32bit - SimdRegContents.High = 0; + case 0: //load 4 bytes + SimdRegContents.Low = GetMem(ip, 4, /* signExtend */ false); + SimdRegContents.High = 0; + if (isSimd) //LDR St [imm] SetSimdReg(context, RegNum, SimdRegContents); - break; - case 1: //8byte data into Dt - SimdRegContents.Low = RegContents; - SimdRegContents.High = 0; + else // LDR Wt [imm] + SetReg(context, RegNum, SimdRegContents.Low); + + break; + case 1: //load 8 bytes + SimdRegContents.Low = GetMem(ip, 8, /* signExtend */ false); + SimdRegContents.High = 0; + if (isSimd) //LDR Dt [imm] SetSimdReg(context, RegNum, SimdRegContents); - break; - case 2: //SIMD 16 byte data + else // LDR Xt [imm] + SetReg(context, RegNum, SimdRegContents.Low); + break; + case 2: //SIMD 16 byte data + if (isSimd) //LDR Qt [imm] + { SimdRegContents = GetSimdMem(ip); SetSimdReg(context, RegNum, SimdRegContents); - break; - default: - LOG((LF_CORDB, LL_INFO100000, "Arm64Walker::Simulate Unknown opcode: %x [LDR(litera,SIMD &FP)] \n", opcode)); - _ASSERTE(!("Arm64Walker::Simulated Unknown opcode")); - } - } - else - { - short opc = (opcode >> 30); - switch (opc) + else //LDR St [imm] (sign extendeded) { - case 0: //4byte data into Wt - LOG((LF_CORDB, LL_INFO100000, "Arm64Walker::Simulate opcode: %x to LDR W%d %p\n", opcode, RegNum, offset)); - RegContents = 0xFFFFFFFF & RegContents; //zero the upper 32bits - SetReg(context, RegNum, RegContents); - break; - - case 1: //8byte data into Xt - LOG((LF_CORDB, LL_INFO100000, "Arm64Walker::Simulate opcode: %x to LDR X%d %p\n", opcode, RegNum, offset)); - SetReg(context, RegNum, RegContents); - break; - - case 2: //LDRSW - LOG((LF_CORDB, LL_INFO100000, "Arm64Walker::Simulate opcode: %x to LDRSW X%d %p\n", opcode, RegNum, offset)); - RegContents = 0xFFFFFFFF & RegContents; - - if (RegContents & 0x80000000) //Sign extend the Word - { - RegContents = 0xFFFFFFFF00000000 | RegContents; - } - SetReg(context, RegNum, RegContents); - break; - case 3: - LOG((LF_CORDB, LL_INFO100000, "Arm64Walker::Simulate opcode: %x as PRFM ,but do nothing \n", opcode)); - - break; - default: - LOG((LF_CORDB, LL_INFO100000, "Arm64Walker::Simulate Unknown opcode: %x [LDR(literal)] \n", opcode)); - _ASSERTE(!("Arm64Walker::Simulated Unknown opcode")); - + SimdRegContents.Low = GetMem(ip, 4, /* signExtend */ true); + SetReg(context, RegNum, SimdRegContents.Low); } + break; + case 3: + LOG((LF_CORDB, LL_INFO100000, "Arm64Walker::Simulate opcode: %x as PRFM ,but do nothing \n", opcode)); + break; + default: + LOG((LF_CORDB, LL_INFO100000, "Arm64Walker::Simulate Unknown opcode: %x [LDR(litera,SIMD &FP)] \n", opcode)); + _ASSERTE(!("Arm64Walker::Simulated Unknown opcode")); } - } - else - { - RegContents = ip; - SetReg(context, RegNum, RegContents); - } + LOG((LF_CORDB, LL_INFO100000, "Arm64Walker::Simulate loadedMemory [Hi: %ull, lo: %ull]\n", SimdRegContents.High, SimdRegContents.Low)); - LOG((LF_CORDB, LL_INFO100000, "Arm64Walker::Simulate opcode: %x to update Reg X[V]%d, as %p \n", opcode, RegNum, GetReg(context, RegNum))); + } } //else Just execute the opcodes as IS //{ diff --git a/src/coreclr/src/debug/ee/debugger.cpp b/src/coreclr/src/debug/ee/debugger.cpp index 7b2cd48b22ed..de49c11ee162 100644 --- a/src/coreclr/src/debug/ee/debugger.cpp +++ b/src/coreclr/src/debug/ee/debugger.cpp @@ -3458,14 +3458,14 @@ void Debugger::getBoundaries(MethodDesc * md, if (pModule == SystemDomain::SystemModule()) { - // We don't look up PDBs for mscorlib. This is not quite right, but avoids + // We don't look up PDBs for CoreLib. This is not quite right, but avoids // a bootstrapping problem. When an EXE loads, it has the option of setting // the COM apartment model to STA if we need to. It is important that no // other Coinitialize happens before this. Since loading the PDB reader uses // com we can not come first. However managed code IS run before the COM // apartment model is set, and thus we have a problem since this code is // called for when JITTing managed code. We avoid the problem by just - // bailing for mscorlib. + // bailing for CoreLib. return; } @@ -7953,7 +7953,7 @@ void Debugger::ProcessAnyPendingEvals(Thread *pThread) // Now clear the bit else we'll see it again when we process the Exception notification // from this upcoming UserAbort exception. pThread->ResetThreadStateNC(Thread::TSNC_DebuggerReAbort); - pThread->UserAbort(Thread::TAR_Thread, EEPolicy::TA_Safe, INFINITE, Thread::UAC_Normal); + pThread->UserAbort(Thread::TAR_Thread, EEPolicy::TA_Safe, INFINITE); } #endif @@ -15242,15 +15242,6 @@ HRESULT Debugger::FuncEvalSetup(DebuggerIPCE_FuncEvalInfo *pEvalInfo, return CORDBG_E_FUNC_EVAL_BAD_START_POINT; } - if (MethodDescBackpatchInfoTracker::IsLockOwnedByAnyThread()) - { - // A thread may have suspended for the debugger while holding the slot backpatching lock while trying to enter - // cooperative GC mode. If the FuncEval calls a method that is eligible for slot backpatching (virtual or interface - // methods that are eligible for tiering), the FuncEval may deadlock on trying to acquire the same lock. Fail the - // FuncEval to avoid the issue. - return CORDBG_E_FUNC_EVAL_BAD_START_POINT; - } - // Create a DebuggerEval to hold info about this eval while its in progress. Constructor copies the thread's // CONTEXT. DebuggerEval *pDE = new (interopsafe, nothrow) DebuggerEval(filterContext, pEvalInfo, fInException); @@ -15484,7 +15475,7 @@ Debugger::FuncEvalAbort( // EX_TRY { - hr = pDE->m_thread->UserAbort(Thread::TAR_FuncEval, EEPolicy::TA_Safe, (DWORD)FUNC_EVAL_DEFAULT_TIMEOUT_VALUE, Thread::UAC_Normal); + hr = pDE->m_thread->UserAbort(Thread::TAR_FuncEval, EEPolicy::TA_Safe, (DWORD)FUNC_EVAL_DEFAULT_TIMEOUT_VALUE); if (hr == HRESULT_FROM_WIN32(ERROR_TIMEOUT)) { hr = S_OK; @@ -15550,7 +15541,7 @@ Debugger::FuncEvalRudeAbort( // EX_TRY { - hr = pDE->m_thread->UserAbort(Thread::TAR_FuncEval, EEPolicy::TA_Rude, (DWORD)FUNC_EVAL_DEFAULT_TIMEOUT_VALUE, Thread::UAC_Normal); + hr = pDE->m_thread->UserAbort(Thread::TAR_FuncEval, EEPolicy::TA_Rude, (DWORD)FUNC_EVAL_DEFAULT_TIMEOUT_VALUE); if (hr == HRESULT_FROM_WIN32(ERROR_TIMEOUT)) { hr = S_OK; diff --git a/src/coreclr/src/debug/ee/debugger.h b/src/coreclr/src/debug/ee/debugger.h index 2450ab65d4f9..6c113315852b 100644 --- a/src/coreclr/src/debug/ee/debugger.h +++ b/src/coreclr/src/debug/ee/debugger.h @@ -725,37 +725,6 @@ class DebuggerRCThread return GetRCThreadSendBuffer(); } - DebuggerIPCEvent *GetIPCEventSendBufferContinuation( - DebuggerIPCEvent *eventCur) - { - CONTRACTL - { - NOTHROW; - GC_NOTRIGGER; - PRECONDITION(eventCur != NULL); - PRECONDITION(eventCur->next == NULL); - } - CONTRACTL_END; - - DebuggerIPCEvent *dipce = (DebuggerIPCEvent *) new (nothrow) BYTE [CorDBIPC_BUFFER_SIZE]; - dipce->next = NULL; - - LOG((LF_CORDB,LL_INFO1000000, "About to GIPCESBC 0x%x\n",dipce)); - - if (dipce != NULL) - { - eventCur->next = dipce; - } -#ifdef _DEBUG - else - { - _ASSERTE( !"GetIPCEventSendBufferContinuation failed to allocate mem!" ); - } -#endif //_DEBUG - - return dipce; - } - // Send an IPCEvent once we're ready for sending. This should be done inbetween // SENDIPCEVENT_BEGIN & SENDIPCEVENT_END. See definition of SENDIPCEVENT_BEGIN // for usage pattern diff --git a/src/coreclr/src/debug/ee/funceval.cpp b/src/coreclr/src/debug/ee/funceval.cpp index 0febdcbaef7f..8700b93d1318 100644 --- a/src/coreclr/src/debug/ee/funceval.cpp +++ b/src/coreclr/src/debug/ee/funceval.cpp @@ -3886,7 +3886,7 @@ void * STDCALL FuncEvalHijackWorker(DebuggerEval *pDE) FrameWithCookie FEFrame(pDE, GetIP(&pDE->m_context), false); FEFrame.Push(); - pDE->m_thread->UserAbort(pDE->m_requester, EEPolicy::TA_Safe, INFINITE, Thread::UAC_Normal); + pDE->m_thread->UserAbort(pDE->m_requester, EEPolicy::TA_Safe, INFINITE); _ASSERTE(!"Should not return from UserAbort here!"); return NULL; } diff --git a/src/coreclr/src/debug/ee/wks/CMakeLists.txt b/src/coreclr/src/debug/ee/wks/CMakeLists.txt index a6891ebb052c..3dd5e3612dfc 100644 --- a/src/coreclr/src/debug/ee/wks/CMakeLists.txt +++ b/src/coreclr/src/debug/ee/wks/CMakeLists.txt @@ -9,9 +9,9 @@ if (CLR_CMAKE_TARGET_WIN32) if(CLR_CMAKE_HOST_ARCH_ARM OR CLR_CMAKE_HOST_ARCH_ARM64) - preprocess_compile_asm(TARGET cordbee_wks ASM_FILES ${ASM_FILE} OUTPUT_OBJECTS ASM_OBJECTS) + preprocess_compile_asm(TARGET cordbee_wks_obj ASM_FILES ${ASM_FILE} OUTPUT_OBJECTS ASM_OBJECTS) - add_library_clr(cordbee_wks OBJECT ${CORDBEE_SOURCES_WKS} ${ASM_FILE} ${ASM_OBJECTS}) + add_library_clr(cordbee_wks_obj OBJECT ${CORDBEE_SOURCES_WKS} ${ASM_FILE} ${ASM_OBJECTS}) else () @@ -23,19 +23,21 @@ if (CLR_CMAKE_TARGET_WIN32) set_source_files_properties(${ASM_FILE} PROPERTIES COMPILE_OPTIONS "${ASM_OPTIONS}") - add_library_clr(cordbee_wks OBJECT ${CORDBEE_SOURCES_WKS} ${ASM_FILE}) + add_library_clr(cordbee_wks_obj OBJECT ${CORDBEE_SOURCES_WKS} ${ASM_FILE}) endif() else () if(CLR_CMAKE_HOST_ARCH_AMD64 OR CLR_CMAKE_HOST_ARCH_ARM OR CLR_CMAKE_HOST_ARCH_ARM64 OR CLR_CMAKE_HOST_ARCH_I386) - add_library_clr(cordbee_wks OBJECT ${CORDBEE_SOURCES_WKS} ../${ARCH_SOURCES_DIR}/dbghelpers.S) + add_library_clr(cordbee_wks_obj OBJECT ${CORDBEE_SOURCES_WKS} ../${ARCH_SOURCES_DIR}/dbghelpers.S) else() message(FATAL_ERROR "Unknown platform") endif() endif (CLR_CMAKE_TARGET_WIN32) -target_precompile_header(TARGET cordbee_wks HEADER stdafx.h) -add_dependencies(cordbee_wks eventing_headers) +target_precompile_header(TARGET cordbee_wks_obj HEADER stdafx.h) +add_dependencies(cordbee_wks_obj eventing_headers) +add_library(cordbee_wks INTERFACE) +target_sources(cordbee_wks INTERFACE $) diff --git a/src/coreclr/src/debug/ildbsymlib/CMakeLists.txt b/src/coreclr/src/debug/ildbsymlib/CMakeLists.txt index b5b249228d26..362da1f64830 100644 --- a/src/coreclr/src/debug/ildbsymlib/CMakeLists.txt +++ b/src/coreclr/src/debug/ildbsymlib/CMakeLists.txt @@ -10,5 +10,6 @@ set( ILDBSYMLIB_SOURCES symwrite.cpp ) -add_library_clr(ildbsymlib OBJECT ${ILDBSYMLIB_SOURCES}) - +add_library_clr(ildbsymlib_obj OBJECT ${ILDBSYMLIB_SOURCES}) +add_library(ildbsymlib INTERFACE) +target_sources(ildbsymlib INTERFACE $) diff --git a/src/coreclr/src/debug/inc/dacdbiinterface.h b/src/coreclr/src/debug/inc/dacdbiinterface.h index 79405d60e3d8..1f99f5f29665 100644 --- a/src/coreclr/src/debug/inc/dacdbiinterface.h +++ b/src/coreclr/src/debug/inc/dacdbiinterface.h @@ -2733,6 +2733,9 @@ class IDacDbiInterface virtual HRESULT GetLoaderHeapMemoryRanges(OUT DacDbiArrayList *pRanges) = 0; + virtual + HRESULT IsModuleMapped(VMPTR_Module pModule, OUT BOOL *isModuleMapped) = 0; + // The following tag tells the DD-marshalling tool to stop scanning. // END_MARSHAL diff --git a/src/coreclr/src/debug/inc/dbgipcevents.h b/src/coreclr/src/debug/inc/dbgipcevents.h index eda4029c0fb1..6fd2cf197c65 100644 --- a/src/coreclr/src/debug/inc/dbgipcevents.h +++ b/src/coreclr/src/debug/inc/dbgipcevents.h @@ -848,7 +848,7 @@ typedef VMPTR_Base VMPTR_CONTEXT; #endif // DomainFile is a base-class for a CLR module, with app-domain affinity. -// For domain-neutral modules (like mscorlib), there is a DomainFile instance +// For domain-neutral modules (like CoreLib), there is a DomainFile instance // for each appdomain the module lives in. // This is the canonical handle ICorDebug uses to a CLR module. DEFINE_VMPTR(class DomainFile, PTR_DomainFile, VMPTR_DomainFile); @@ -984,20 +984,9 @@ struct MSLAYOUT IPCEventTypeNameMapping const char * eventName; }; -constexpr IPCEventTypeNameMapping DbgIPCEventTypeNames[] = -{ - #define IPC_EVENT_TYPE0(type, val) { type, #type }, - #define IPC_EVENT_TYPE1(type, val) { type, #type }, - #define IPC_EVENT_TYPE2(type, val) { type, #type }, - #include "dbgipceventtypes.h" - #undef IPC_EVENT_TYPE2 - #undef IPC_EVENT_TYPE1 - #undef IPC_EVENT_TYPE0 - { DB_IPCE_INVALID_EVENT, "DB_IPCE_Error" } -}; - -const size_t nameCount = sizeof(DbgIPCEventTypeNames) / sizeof(DbgIPCEventTypeNames[0]); +extern const IPCEventTypeNameMapping DbgIPCEventTypeNames[]; +extern const size_t nameCount; struct MSLAYOUT IPCENames // We use a class/struct so that the function can remain in a shared header file { @@ -1028,7 +1017,7 @@ struct MSLAYOUT IPCENames // We use a class/struct so that the function can rema #undef IPC_EVENT_TYPE0 }; - unsigned int i, lim; + size_t i, lim; if (eventType < DB_IPCE_DEBUGGER_FIRST) { diff --git a/src/coreclr/src/debug/inc/diagnosticsipc.h b/src/coreclr/src/debug/inc/diagnosticsipc.h index 99d670ca6ca5..b5fff7afc65a 100644 --- a/src/coreclr/src/debug/inc/diagnosticsipc.h +++ b/src/coreclr/src/debug/inc/diagnosticsipc.h @@ -16,6 +16,7 @@ typedef void (*ErrorCallback)(const char *szMessage, uint32_t code); class IpcStream final { + friend class IpcStreamFactory; public: static constexpr int32_t InfiniteTimeout = -1; ~IpcStream(); @@ -26,11 +27,12 @@ class IpcStream final class DiagnosticsIpc final { + friend class IpcStreamFactory; public: enum ConnectionMode { - CLIENT, - SERVER + CONNECT, + LISTEN }; enum class PollEvents : uint8_t @@ -38,7 +40,8 @@ class IpcStream final NONE = 0x00, // no events SIGNALED = 0x01, // ready for use HANGUP = 0x02, // connection remotely closed - ERR = 0x04 // other error + ERR = 0x04, // error + UNKNOWN = 0x80 // unknown state }; // The bookeeping struct used for polling on server and client structs @@ -99,7 +102,7 @@ class IpcStream final sockaddr_un *const _pServerAddress; bool _isClosed; - DiagnosticsIpc(const int serverSocket, sockaddr_un *const pServerAddress, ConnectionMode mode = ConnectionMode::SERVER); + DiagnosticsIpc(const int serverSocket, sockaddr_un *const pServerAddress, ConnectionMode mode = ConnectionMode::LISTEN); // Used to unlink the socket so it can be removed from the filesystem // when the last reference to it is closed. @@ -110,7 +113,7 @@ class IpcStream final HANDLE _hPipe = INVALID_HANDLE_VALUE; OVERLAPPED _oOverlap = {}; - DiagnosticsIpc(const char(&namedPipeName)[MaxNamedPipeNameLength], ConnectionMode mode = ConnectionMode::SERVER); + DiagnosticsIpc(const char(&namedPipeName)[MaxNamedPipeNameLength], ConnectionMode mode = ConnectionMode::LISTEN); #endif /* TARGET_UNIX */ bool _isListening; @@ -125,13 +128,13 @@ class IpcStream final private: #ifdef TARGET_UNIX int _clientSocket = -1; - IpcStream(int clientSocket, int serverSocket, DiagnosticsIpc::ConnectionMode mode = DiagnosticsIpc::ConnectionMode::SERVER) + IpcStream(int clientSocket, DiagnosticsIpc::ConnectionMode mode = DiagnosticsIpc::ConnectionMode::LISTEN) : _clientSocket(clientSocket), _mode(mode) {} #else HANDLE _hPipe = INVALID_HANDLE_VALUE; OVERLAPPED _oOverlap = {}; BOOL _isTestReading = false; // used to check whether we are already doing a 0-byte read to test for data - IpcStream(HANDLE hPipe, DiagnosticsIpc::ConnectionMode mode = DiagnosticsIpc::ConnectionMode::SERVER); + IpcStream(HANDLE hPipe, DiagnosticsIpc::ConnectionMode mode = DiagnosticsIpc::ConnectionMode::LISTEN); #endif /* TARGET_UNIX */ DiagnosticsIpc::ConnectionMode _mode; diff --git a/src/coreclr/src/debug/shared/utils.cpp b/src/coreclr/src/debug/shared/utils.cpp index 5363e30ee8a8..b9c7d72db3f7 100644 --- a/src/coreclr/src/debug/shared/utils.cpp +++ b/src/coreclr/src/debug/shared/utils.cpp @@ -200,3 +200,17 @@ void ExportILToNativeMap(ULONG32 cMap, // [in] Min size of mapExt, m #endif // _DEBUG } } + +const IPCEventTypeNameMapping DbgIPCEventTypeNames[] = +{ + #define IPC_EVENT_TYPE0(type, val) { type, #type }, + #define IPC_EVENT_TYPE1(type, val) { type, #type }, + #define IPC_EVENT_TYPE2(type, val) { type, #type }, + #include "dbgipceventtypes.h" + #undef IPC_EVENT_TYPE2 + #undef IPC_EVENT_TYPE1 + #undef IPC_EVENT_TYPE0 + { DB_IPCE_INVALID_EVENT, "DB_IPCE_Error" } +}; + +const size_t nameCount = sizeof(DbgIPCEventTypeNames) / sizeof(DbgIPCEventTypeNames[0]); diff --git a/src/coreclr/src/debug/shim/debugshim.cpp b/src/coreclr/src/debug/shim/debugshim.cpp index 25e7a25d6764..b86c8eb0cfcc 100644 --- a/src/coreclr/src/debug/shim/debugshim.cpp +++ b/src/coreclr/src/debug/shim/debugshim.cpp @@ -50,6 +50,31 @@ typedef HRESULT (STDAPICALLTYPE *OpenVirtualProcess2FnPtr)(ULONG64 clrInstanceI typedef HMODULE (STDAPICALLTYPE *LoadLibraryWFnPtr)(LPCWSTR lpLibFileName); +static bool IsTargetWindows(ICorDebugDataTarget* pDataTarget) +{ + CorDebugPlatform targetPlatform; + + HRESULT result = pDataTarget->GetPlatform(&targetPlatform); + + if(FAILED(result)) + { + _ASSERTE(!"Unexpected error"); + return false; + } + + switch (targetPlatform) + { + case CORDB_PLATFORM_WINDOWS_X86: + case CORDB_PLATFORM_WINDOWS_AMD64: + case CORDB_PLATFORM_WINDOWS_IA64: + case CORDB_PLATFORM_WINDOWS_ARM: + case CORDB_PLATFORM_WINDOWS_ARM64: + return true; + default: + return false; + } +} + // Implementation of ICLRDebugging::OpenVirtualProcess // // Arguments: @@ -212,7 +237,7 @@ STDMETHODIMP CLRDebuggingImpl::OpenVirtualProcess( _ASSERTE(pFlags == NULL || *pFlags == 0); } } -#ifdef TARGET_UNIX +#ifdef HOST_UNIX else { // On Linux/MacOS the DAC module handle needs to be re-created using the DAC PAL instance @@ -232,7 +257,7 @@ STDMETHODIMP CLRDebuggingImpl::OpenVirtualProcess( hr = E_HANDLE; } } -#endif // TARGET_UNIX +#endif // HOST_UNIX } // If no errors so far and "OpenVirtualProcessImpl2" doesn't exist @@ -277,7 +302,7 @@ STDMETHODIMP CLRDebuggingImpl::OpenVirtualProcess( if (pDacModulePath != NULL) { -#ifdef TARGET_UNIX +#ifdef HOST_UNIX free(pDacModulePath); #else CoTaskMemFree(pDacModulePath); @@ -286,7 +311,7 @@ STDMETHODIMP CLRDebuggingImpl::OpenVirtualProcess( if (pDbiModulePath != NULL) { -#ifdef TARGET_UNIX +#ifdef HOST_UNIX free(pDbiModulePath); #else CoTaskMemFree(pDbiModulePath); @@ -409,193 +434,198 @@ HRESULT CLRDebuggingImpl::GetCLRInfo(ICorDebugDataTarget* pDataTarget, __out_z __inout_ecount(dwDacNameCharCount) WCHAR* pDacName, DWORD dwDacNameCharCount) { -#ifndef TARGET_UNIX - WORD imageFileMachine = 0; - DWORD resourceSectionRVA = 0; - HRESULT hr = GetMachineAndResourceSectionRVA(pDataTarget, moduleBaseAddress, &imageFileMachine, &resourceSectionRVA); - - // We want the version resource which has type = RT_VERSION = 16, name = 1, language = 0x409 - DWORD versionResourceRVA = 0; - DWORD versionResourceSize = 0; - if(SUCCEEDED(hr)) - { - hr = GetResourceRvaFromResourceSectionRva(pDataTarget, moduleBaseAddress, resourceSectionRVA, 16, 1, 0x409, - &versionResourceRVA, &versionResourceSize); - } - - // At last we get our version info - VS_FIXEDFILEINFO fixedFileInfo = {0}; - if(SUCCEEDED(hr)) - { - // The version resource has 3 words, then the unicode string "VS_VERSION_INFO" - // (16 WCHARS including the null terminator) - // then padding to a 32-bit boundary, then the VS_FIXEDFILEINFO struct - DWORD fixedFileInfoRVA = ((versionResourceRVA + 3*2 + 16*2 + 3)/4)*4; - hr = ReadFromDataTarget(pDataTarget, moduleBaseAddress + fixedFileInfoRVA, (BYTE*)&fixedFileInfo, sizeof(fixedFileInfo)); - } - - //Verify the signature on the version resource - if(SUCCEEDED(hr) && fixedFileInfo.dwSignature != PE_FIXEDFILEINFO_SIGNATURE) - { - hr = CORDBG_E_NOT_CLR; - } - - // Record the version information - if(SUCCEEDED(hr)) - { - pVersion->wMajor = (WORD) (fixedFileInfo.dwProductVersionMS >> 16); - pVersion->wMinor = (WORD) (fixedFileInfo.dwProductVersionMS & 0xFFFF); - pVersion->wBuild = (WORD) (fixedFileInfo.dwProductVersionLS >> 16); - pVersion->wRevision = (WORD) (fixedFileInfo.dwProductVersionLS & 0xFFFF); - } - - // Now grab the special clr debug info resource - // We may need to scan a few different names searching though... - // 1) CLRDEBUGINFO where host_os = 'WINDOWS' or 'CORESYS' and host_arch = 'X86' or 'ARM' or 'AMD64' - // 2) For back-compat if the host os is windows and the host architecture matches the target then CLRDEBUGINFO is used with no suffix. - DWORD debugResourceRVA = 0; - DWORD debugResourceSize = 0; - BOOL useCrossPlatformNaming = FALSE; - if(SUCCEEDED(hr)) +#ifdef HOST_WINDOWS + if(IsTargetWindows(pDataTarget)) { - // the initial state is that we haven't found a proper resource - HRESULT hrGetResource = E_FAIL; - - // First check for the resource which has type = RC_DATA = 10, name = "CLRDEBUGINFO", language = 0 -#if defined (HOST_WINDOWS) && defined(HOST_X86) - const WCHAR * resourceName = W("CLRDEBUGINFOWINDOWSX86"); -#endif + WORD imageFileMachine = 0; + DWORD resourceSectionRVA = 0; + HRESULT hr = GetMachineAndResourceSectionRVA(pDataTarget, moduleBaseAddress, &imageFileMachine, &resourceSectionRVA); -#if !defined (HOST_WINDOWS) && defined(HOST_X86) - const WCHAR * resourceName = W("CLRDEBUGINFOCORESYSX86"); -#endif - -#if defined (HOST_WINDOWS) && defined(HOST_AMD64) - const WCHAR * resourceName = W("CLRDEBUGINFOWINDOWSAMD64"); -#endif - -#if !defined (HOST_WINDOWS) && defined(HOST_AMD64) - const WCHAR * resourceName = W("CLRDEBUGINFOCORESYSAMD64"); -#endif - -#if defined (HOST_WINDOWS) && defined(HOST_ARM64) - const WCHAR * resourceName = W("CLRDEBUGINFOWINDOWSARM64"); -#endif - -#if !defined (HOST_WINDOWS) && defined(HOST_ARM64) - const WCHAR * resourceName = W("CLRDEBUGINFOCORESYSARM64"); -#endif + // We want the version resource which has type = RT_VERSION = 16, name = 1, language = 0x409 + DWORD versionResourceRVA = 0; + DWORD versionResourceSize = 0; + if(SUCCEEDED(hr)) + { + hr = GetResourceRvaFromResourceSectionRva(pDataTarget, moduleBaseAddress, resourceSectionRVA, 16, 1, 0x409, + &versionResourceRVA, &versionResourceSize); + } -#if defined (HOST_WINDOWS) && defined(HOST_ARM) - const WCHAR * resourceName = W("CLRDEBUGINFOWINDOWSARM"); -#endif + // At last we get our version info + VS_FIXEDFILEINFO fixedFileInfo = {0}; + if(SUCCEEDED(hr)) + { + // The version resource has 3 words, then the unicode string "VS_VERSION_INFO" + // (16 WCHARS including the null terminator) + // then padding to a 32-bit boundary, then the VS_FIXEDFILEINFO struct + DWORD fixedFileInfoRVA = ((versionResourceRVA + 3*2 + 16*2 + 3)/4)*4; + hr = ReadFromDataTarget(pDataTarget, moduleBaseAddress + fixedFileInfoRVA, (BYTE*)&fixedFileInfo, sizeof(fixedFileInfo)); + } -#if !defined (HOST_WINDOWS) && defined(HOST_ARM) - const WCHAR * resourceName = W("CLRDEBUGINFOCORESYSARM"); -#endif + //Verify the signature on the version resource + if(SUCCEEDED(hr) && fixedFileInfo.dwSignature != PE_FIXEDFILEINFO_SIGNATURE) + { + hr = CORDBG_E_NOT_CLR; + } - hrGetResource = GetResourceRvaFromResourceSectionRvaByName(pDataTarget, moduleBaseAddress, resourceSectionRVA, 10, resourceName, 0, - &debugResourceRVA, &debugResourceSize); - useCrossPlatformNaming = SUCCEEDED(hrGetResource); + // Record the version information + if(SUCCEEDED(hr)) + { + pVersion->wMajor = (WORD) (fixedFileInfo.dwProductVersionMS >> 16); + pVersion->wMinor = (WORD) (fixedFileInfo.dwProductVersionMS & 0xFFFF); + pVersion->wBuild = (WORD) (fixedFileInfo.dwProductVersionLS >> 16); + pVersion->wRevision = (WORD) (fixedFileInfo.dwProductVersionLS & 0xFFFF); + } + // Now grab the special clr debug info resource + // We may need to scan a few different names searching though... + // 1) CLRDEBUGINFO where host_os = 'WINDOWS' or 'CORESYS' and host_arch = 'X86' or 'ARM' or 'AMD64' + // 2) For back-compat if the host os is windows and the host architecture matches the target then CLRDEBUGINFO is used with no suffix. + DWORD debugResourceRVA = 0; + DWORD debugResourceSize = 0; + BOOL useCrossPlatformNaming = FALSE; + if(SUCCEEDED(hr)) + { + // the initial state is that we haven't found a proper resource + HRESULT hrGetResource = E_FAIL; + + // First check for the resource which has type = RC_DATA = 10, name = "CLRDEBUGINFO", language = 0 + #if defined (HOST_WINDOWS) && defined(HOST_X86) + const WCHAR * resourceName = W("CLRDEBUGINFOWINDOWSX86"); + #endif + + #if !defined (HOST_WINDOWS) && defined(HOST_X86) + const WCHAR * resourceName = W("CLRDEBUGINFOCORESYSX86"); + #endif + + #if defined (HOST_WINDOWS) && defined(HOST_AMD64) + const WCHAR * resourceName = W("CLRDEBUGINFOWINDOWSAMD64"); + #endif + + #if !defined (HOST_WINDOWS) && defined(HOST_AMD64) + const WCHAR * resourceName = W("CLRDEBUGINFOCORESYSAMD64"); + #endif + + #if defined (HOST_WINDOWS) && defined(HOST_ARM64) + const WCHAR * resourceName = W("CLRDEBUGINFOWINDOWSARM64"); + #endif + + #if !defined (HOST_WINDOWS) && defined(HOST_ARM64) + const WCHAR * resourceName = W("CLRDEBUGINFOCORESYSARM64"); + #endif + + #if defined (HOST_WINDOWS) && defined(HOST_ARM) + const WCHAR * resourceName = W("CLRDEBUGINFOWINDOWSARM"); + #endif + + #if !defined (HOST_WINDOWS) && defined(HOST_ARM) + const WCHAR * resourceName = W("CLRDEBUGINFOCORESYSARM"); + #endif + + hrGetResource = GetResourceRvaFromResourceSectionRvaByName(pDataTarget, moduleBaseAddress, resourceSectionRVA, 10, resourceName, 0, + &debugResourceRVA, &debugResourceSize); + useCrossPlatformNaming = SUCCEEDED(hrGetResource); + + + #if defined(HOST_WINDOWS) && (defined(HOST_X86) || defined(HOST_AMD64) || defined(HOST_ARM)) + #if defined(HOST_X86) + #define _HOST_MACHINE_TYPE IMAGE_FILE_MACHINE_I386 + #elif defined(HOST_AMD64) + #define _HOST_MACHINE_TYPE IMAGE_FILE_MACHINE_AMD64 + #elif defined(HOST_ARM) + #define _HOST_MACHINE_TYPE IMAGE_FILE_MACHINE_ARMNT + #endif + + // if this is windows, and if host_arch matches target arch then we can fallback to searching for CLRDEBUGINFO on failure + if(FAILED(hrGetResource) && (imageFileMachine == _HOST_MACHINE_TYPE)) + { + hrGetResource = GetResourceRvaFromResourceSectionRvaByName(pDataTarget, moduleBaseAddress, resourceSectionRVA, 10, W("CLRDEBUGINFO"), 0, + &debugResourceRVA, &debugResourceSize); + } -#if defined(HOST_WINDOWS) && (defined(HOST_X86) || defined(HOST_AMD64) || defined(HOST_ARM)) - #if defined(HOST_X86) - #define _HOST_MACHINE_TYPE IMAGE_FILE_MACHINE_I386 - #elif defined(HOST_AMD64) - #define _HOST_MACHINE_TYPE IMAGE_FILE_MACHINE_AMD64 - #elif defined(HOST_ARM) - #define _HOST_MACHINE_TYPE IMAGE_FILE_MACHINE_ARMNT - #endif + #undef _HOST_MACHINE_TYPE + #endif + // if the search failed, we don't recognize the CLR + if(FAILED(hrGetResource)) + hr = CORDBG_E_NOT_CLR; + } - // if this is windows, and if host_arch matches target arch then we can fallback to searching for CLRDEBUGINFO on failure - if(FAILED(hrGetResource) && (imageFileMachine == _HOST_MACHINE_TYPE)) + CLR_DEBUG_RESOURCE debugResource; + if(SUCCEEDED(hr) && debugResourceSize != sizeof(debugResource)) { - hrGetResource = GetResourceRvaFromResourceSectionRvaByName(pDataTarget, moduleBaseAddress, resourceSectionRVA, 10, W("CLRDEBUGINFO"), 0, - &debugResourceRVA, &debugResourceSize); + hr = CORDBG_E_NOT_CLR; } - #undef _HOST_MACHINE_TYPE -#endif - // if the search failed, we don't recognize the CLR - if(FAILED(hrGetResource)) + // Get the special debug resource from the image and return the results + if(SUCCEEDED(hr)) + { + hr = ReadFromDataTarget(pDataTarget, moduleBaseAddress + debugResourceRVA, (BYTE*)&debugResource, sizeof(debugResource)); + } + if(SUCCEEDED(hr) && (debugResource.dwVersion != 0)) + { hr = CORDBG_E_NOT_CLR; - } - - CLR_DEBUG_RESOURCE debugResource; - if(SUCCEEDED(hr) && debugResourceSize != sizeof(debugResource)) - { - hr = CORDBG_E_NOT_CLR; - } - - // Get the special debug resource from the image and return the results - if(SUCCEEDED(hr)) - { - hr = ReadFromDataTarget(pDataTarget, moduleBaseAddress + debugResourceRVA, (BYTE*)&debugResource, sizeof(debugResource)); - } - if(SUCCEEDED(hr) && (debugResource.dwVersion != 0)) - { - hr = CORDBG_E_NOT_CLR; - } + } - // The signature needs to match m_skuId exactly, except for m_skuId=CLR_ID_ONECORE_CLR which is - // also compatible with the older CLR_ID_PHONE_CLR signature. - if(SUCCEEDED(hr) && - (debugResource.signature != m_skuId) && - !( (debugResource.signature == CLR_ID_PHONE_CLR) && (m_skuId == CLR_ID_ONECORE_CLR) )) - { - hr = CORDBG_E_NOT_CLR; - } + // The signature needs to match m_skuId exactly, except for m_skuId=CLR_ID_ONECORE_CLR which is + // also compatible with the older CLR_ID_PHONE_CLR signature. + if(SUCCEEDED(hr) && + (debugResource.signature != m_skuId) && + !( (debugResource.signature == CLR_ID_PHONE_CLR) && (m_skuId == CLR_ID_ONECORE_CLR) )) + { + hr = CORDBG_E_NOT_CLR; + } - if(SUCCEEDED(hr) && - (debugResource.signature != CLR_ID_ONECORE_CLR) && - useCrossPlatformNaming) - { - FormatLongDacModuleName(pDacName, dwDacNameCharCount, imageFileMachine, &fixedFileInfo); - swprintf_s(pDbiName, dwDbiNameCharCount, W("%s_%s.dll"), MAIN_DBI_MODULE_NAME_W, W("x86")); - } - else - { - if(m_skuId == CLR_ID_V4_DESKTOP) - swprintf_s(pDacName, dwDacNameCharCount, W("%s.dll"), CLR_DAC_MODULE_NAME_W); + if(SUCCEEDED(hr) && + (debugResource.signature != CLR_ID_ONECORE_CLR) && + useCrossPlatformNaming) + { + FormatLongDacModuleName(pDacName, dwDacNameCharCount, imageFileMachine, &fixedFileInfo); + swprintf_s(pDbiName, dwDbiNameCharCount, W("%s_%s.dll"), MAIN_DBI_MODULE_NAME_W, W("x86")); + } else - swprintf_s(pDacName, dwDacNameCharCount, W("%s.dll"), CORECLR_DAC_MODULE_NAME_W); - swprintf_s(pDbiName, dwDbiNameCharCount, W("%s.dll"), MAIN_DBI_MODULE_NAME_W); - } + { + if(m_skuId == CLR_ID_V4_DESKTOP) + swprintf_s(pDacName, dwDacNameCharCount, W("%s.dll"), CLR_DAC_MODULE_NAME_W); + else + swprintf_s(pDacName, dwDacNameCharCount, W("%s.dll"), CORECLR_DAC_MODULE_NAME_W); + swprintf_s(pDbiName, dwDbiNameCharCount, W("%s.dll"), MAIN_DBI_MODULE_NAME_W); + } - if(SUCCEEDED(hr)) - { - *pdwDbiTimeStamp = debugResource.dwDbiTimeStamp; - *pdwDbiSizeOfImage = debugResource.dwDbiSizeOfImage; - *pdwDacTimeStamp = debugResource.dwDacTimeStamp; - *pdwDacSizeOfImage = debugResource.dwDacSizeOfImage; - } + if(SUCCEEDED(hr)) + { + *pdwDbiTimeStamp = debugResource.dwDbiTimeStamp; + *pdwDbiSizeOfImage = debugResource.dwDbiSizeOfImage; + *pdwDacTimeStamp = debugResource.dwDacTimeStamp; + *pdwDacSizeOfImage = debugResource.dwDacSizeOfImage; + } - // any failure should be interpreted as this module not being a CLR - if(FAILED(hr)) - { - return CORDBG_E_NOT_CLR; + // any failure should be interpreted as this module not being a CLR + if(FAILED(hr)) + { + return CORDBG_E_NOT_CLR; + } + else + { + return S_OK; + } } else +#endif // !HOST_WINDOWS { - return S_OK; - } -#else - swprintf_s(pDacName, dwDacNameCharCount, W("%s"), MAKEDLLNAME_W(CORECLR_DAC_MODULE_NAME_W)); - swprintf_s(pDbiName, dwDbiNameCharCount, W("%s"), MAKEDLLNAME_W(MAIN_DBI_MODULE_NAME_W)); + swprintf_s(pDacName, dwDacNameCharCount, W("%s"), MAKEDLLNAME_W(CORECLR_DAC_MODULE_NAME_W)); + swprintf_s(pDbiName, dwDbiNameCharCount, W("%s"), MAKEDLLNAME_W(MAIN_DBI_MODULE_NAME_W)); - pVersion->wMajor = 0; - pVersion->wMinor = 0; - pVersion->wBuild = 0; - pVersion->wRevision = 0; + pVersion->wMajor = 0; + pVersion->wMinor = 0; + pVersion->wBuild = 0; + pVersion->wRevision = 0; - *pdwDbiTimeStamp = 0; - *pdwDbiSizeOfImage = 0; - *pdwDacTimeStamp = 0; - *pdwDacSizeOfImage = 0; + *pdwDbiTimeStamp = 0; + *pdwDbiSizeOfImage = 0; + *pdwDacTimeStamp = 0; + *pdwDacSizeOfImage = 0; - return S_OK; -#endif // TARGET_UNIX + return S_OK; + } } // Formats the long name for DAC diff --git a/src/coreclr/src/dlls/mscordac/mscordac_unixexports.src b/src/coreclr/src/dlls/mscordac/mscordac_unixexports.src index 29c010b9e849..31dd9ea4875e 100644 --- a/src/coreclr/src/dlls/mscordac/mscordac_unixexports.src +++ b/src/coreclr/src/dlls/mscordac/mscordac_unixexports.src @@ -43,6 +43,9 @@ nativeStringResourceTable_mscorrc #PAL_InitializeDLL #PAL_TerminateEx #PAL_IsDebuggerPresent +#PAL_OpenProcessMemory +#PAL_CloseProcessMemory +#PAL_ReadProcessMemory #PAL_ProbeMemory #PAL_Random #PAL_memcpy diff --git a/src/coreclr/src/dlls/mscoree/coreclr/CMakeLists.txt b/src/coreclr/src/dlls/mscoree/coreclr/CMakeLists.txt index f01133ce40ff..2a25b2119b36 100644 --- a/src/coreclr/src/dlls/mscoree/coreclr/CMakeLists.txt +++ b/src/coreclr/src/dlls/mscoree/coreclr/CMakeLists.txt @@ -10,17 +10,17 @@ if (CLR_CMAKE_HOST_WIN32) list(APPEND CLR_SOURCES ${CMAKE_CURRENT_BINARY_DIR}/coreclr.def) - add_link_options(/ENTRY:CoreDllMain) + add_linker_flag("/ENTRY:CoreDllMain") # Incremental linking results in the linker inserting extra padding and routing function calls via thunks that can break the # invariants (e.g. size of region between Jit_PatchedCodeLast-Jit_PatchCodeStart needs to fit in a page). - add_link_options(/INCREMENTAL:NO) + add_linker_flag("/INCREMENTAL:NO") # Delay load libraries required for WinRT as that is not supported on all platforms - add_link_options("/DELAYLOAD:api-ms-win-core-winrt-l1-1-0.dll") + add_linker_flag("/DELAYLOAD:api-ms-win-core-winrt-l1-1-0.dll") # Delay load version.dll so that we can specify how to search when loading it as it is not part of Windows' known DLLs - add_link_options("/DELAYLOAD:version.dll") + add_linker_flag("/DELAYLOAD:version.dll") # No library groups for Win32 set(START_LIBRARY_GROUP) @@ -33,7 +33,7 @@ else(CLR_CMAKE_HOST_WIN32) if(CLR_CMAKE_TARGET_LINUX OR CLR_CMAKE_TARGET_FREEBSD OR CLR_CMAKE_TARGET_NETBSD OR CLR_CMAKE_TARGET_SUNOS) # This option is necessary to ensure that the overloaded delete operator defined inside # of the utilcode will be used instead of the standard library delete operator. - add_link_options("LINKER:-Bsymbolic") + add_linker_flag("-Wl,-Bsymbolic") # The following linked options can be inserted into the linker libraries list to # ensure proper resolving of circular references between a subset of the libraries. @@ -109,7 +109,7 @@ set(CORECLR_LIBRARIES ildbsymlib utilcode v3binder - System.Globalization.Native-Static + System.Globalization.Native-static interop ) @@ -168,8 +168,10 @@ if(FEATURE_MERGE_JIT_AND_ENGINE) set(CLRJIT_STATIC clrjit_static) endif(FEATURE_MERGE_JIT_AND_ENGINE) -target_link_libraries(coreclr PUBLIC ${CORECLR_LIBRARIES} ${CLRJIT_STATIC} cee_wks cee_wks_core) -target_link_libraries(coreclr_static PUBLIC ${CORECLR_LIBRARIES} clrjit_static cee_wks_mergeable cee_wks_core) +target_sources(coreclr PUBLIC $) +target_link_libraries(coreclr PUBLIC ${CORECLR_LIBRARIES} ${CLRJIT_STATIC} cee_wks) +target_sources(coreclr_static PUBLIC $) +target_link_libraries(coreclr_static PUBLIC ${CORECLR_LIBRARIES} clrjit_static cee_wks_mergeable) # Create the runtime module index header file containing the coreclr build id # for xplat and the timestamp/size on Windows. diff --git a/src/coreclr/src/dlls/mscoree/coreclr/README.md b/src/coreclr/src/dlls/mscoree/coreclr/README.md index 0e291a87a3d4..b4f3e6f8845d 100644 --- a/src/coreclr/src/dlls/mscoree/coreclr/README.md +++ b/src/coreclr/src/dlls/mscoree/coreclr/README.md @@ -4,4 +4,4 @@ but if that changes we can always create a little nicer tooling for it. dump\_helper\_resource.bin is used to populate the DUMP\_HELPER resource inside coreclr.dll on Windows. When an application crashes, Windows MinidumpWriteDump is planning to scan modules looking for this resource. The content of the resource is expected to be the name of a dll in the same folder, encoded in UTF8, null terminated, that implements the CLRDataCreateInterface function. For OS security purposes MinidumpWriteDump will do an authenticode signing check before loading the indicated binary, however if your build isn't -signed you can get around this limitation by registering it at HKLM\Software\Microsoft\WindowsNT\CurrentVersion\MiniDumpAuxilliaryDlls. \ No newline at end of file +signed you can get around this limitation by registering it at HKLM\Software\Microsoft\WindowsNT\CurrentVersion\MiniDumpAuxilliaryDlls. diff --git a/src/coreclr/src/dlls/mscoree/mscoree.cpp b/src/coreclr/src/dlls/mscoree/mscoree.cpp index 810a3e88f3fe..f9e1e3d89ef6 100644 --- a/src/coreclr/src/dlls/mscoree/mscoree.cpp +++ b/src/coreclr/src/dlls/mscoree/mscoree.cpp @@ -444,7 +444,7 @@ HRESULT SetInternalSystemDirectory() } #if defined(CROSSGEN_COMPILE) -void SetMscorlibPath(LPCWSTR wzSystemDirectory) +void SetCoreLibPath(LPCWSTR wzSystemDirectory) { DWORD len = (DWORD)wcslen(wzSystemDirectory); bool appendSeparator = wzSystemDirectory[len-1] != DIRECTORY_SEPARATOR_CHAR_W; diff --git a/src/coreclr/src/dlls/mscoree/unixinterface.cpp b/src/coreclr/src/dlls/mscoree/unixinterface.cpp index def877265312..e23ece987224 100644 --- a/src/coreclr/src/dlls/mscoree/unixinterface.cpp +++ b/src/coreclr/src/dlls/mscoree/unixinterface.cpp @@ -27,6 +27,9 @@ typedef NewArrayHolder ConstWStringHolder; // Specifies whether coreclr is embedded or standalone extern bool g_coreclr_embedded; +// Specifies whether hostpolicy is embedded in executable or standalone +extern bool g_hostpolicy_embedded; + // Holder for array of wide strings class ConstWStringArrayHolder : public NewArrayHolder { @@ -116,7 +119,8 @@ static void ConvertConfigPropertiesToUnicode( int propertyCount, LPCWSTR** propertyKeysWRef, LPCWSTR** propertyValuesWRef, - BundleProbe** bundleProbe) + BundleProbe** bundleProbe, + bool* hostPolicyEmbedded) { LPCWSTR* propertyKeysW = new (nothrow) LPCWSTR[propertyCount]; ASSERTE_ALL_BUILDS(propertyKeysW != nullptr); @@ -135,6 +139,11 @@ static void ConvertConfigPropertiesToUnicode( // is passed in as the value of "BUNDLE_PROBE" property (encoded as a string). *bundleProbe = (BundleProbe*)_wcstoui64(propertyValuesW[propertyIndex], nullptr, 0); } + else if (strcmp(propertyKeys[propertyIndex], "HOSTPOLICY_EMBEDDED") == 0) + { + // The HOSTPOLICY_EMBEDDED property indicates if the executable has hostpolicy statically linked in + *hostPolicyEmbedded = (wcscmp(propertyValuesW[propertyIndex], W("true")) == 0); + } } *propertyKeysWRef = propertyKeysW; @@ -177,6 +186,7 @@ int coreclr_initialize( LPCWSTR* propertyKeysW; LPCWSTR* propertyValuesW; BundleProbe* bundleProbe = nullptr; + bool hostPolicyEmbedded = false; ConvertConfigPropertiesToUnicode( propertyKeys, @@ -184,7 +194,8 @@ int coreclr_initialize( propertyCount, &propertyKeysW, &propertyValuesW, - &bundleProbe); + &bundleProbe, + &hostPolicyEmbedded); #ifdef TARGET_UNIX DWORD error = PAL_InitializeCoreCLR(exePath, g_coreclr_embedded); @@ -198,6 +209,8 @@ int coreclr_initialize( } #endif + g_hostpolicy_embedded = hostPolicyEmbedded; + ReleaseHolder host; hr = CorHost2::CreateObject(IID_ICLRRuntimeHost4, (void**)&host); diff --git a/src/coreclr/src/dlls/mscorpe/ceefilegenwriter.cpp b/src/coreclr/src/dlls/mscorpe/ceefilegenwriter.cpp index 77afa5a83024..0fb18b0629b2 100644 --- a/src/coreclr/src/dlls/mscorpe/ceefilegenwriter.cpp +++ b/src/coreclr/src/dlls/mscorpe/ceefilegenwriter.cpp @@ -12,10 +12,168 @@ #include #include "corerror.h" -#include "stubs.h" #include #include +// The following block contains a template for the default entry point stubs of a COM+ +// IL only program. One can emit these stubs (with some fix-ups) and make +// the code supplied the entry point value for the image. The fix-ups will +// in turn cause mscoree.dll to be loaded and the correct entry point to be +// called. +// +// Note: Although these stubs contain x86 specific code, they are used +// for all platforms + + +//***************************************************************************** +// This stub is designed for a x86 Windows application. It will call the +// _CorExeMain function in mscoree.dll. This entry point will in turn load +// and run the IL program. +// +// jump _CorExeMain(); +// +// The code jumps to the imported function _CorExeMain using the iat. +// The address in the template is address of the iat entry which is +// fixed up by the loader when the image is paged in. +//***************************************************************************** + +const BYTE ExeMainX86Template[] = +{ + // Jump through IAT to _CorExeMain + 0xFF, 0x25, // jmp [iat:_CorDllMain entry] + 0x00, 0x00, 0x00, 0x00, // address to replace + +}; + +#define ExeMainX86TemplateSize sizeof(ExeMainX86Template) +#define CorExeMainX86IATOffset 2 + +//***************************************************************************** +// This stub is designed for a x86 Windows application. It will call the +// _CorDllMain function in mscoree.dll with with the base entry point for +// the loaded DLL. This entry point will in turn load and run the IL program. +// +// jump _CorDllMain +// +// The code jumps to the imported function _CorExeMain using the iat. +// The address in the template is address of the iat entry which is +// fixed up by the loader when the image is paged in. +//***************************************************************************** + +const BYTE DllMainX86Template[] = +{ + // Jump through IAT to CorDllMain + 0xFF, 0x25, // jmp [iat:_CorDllMain entry] + 0x00, 0x00, 0x00, 0x00, // address to replace +}; + +#define DllMainX86TemplateSize sizeof(DllMainX86Template) +#define CorDllMainX86IATOffset 2 + +//***************************************************************************** +// This stub is designed for a AMD64 Windows application. It will call the +// _CorExeMain function in mscoree.dll. This entry point will in turn load +// and run the IL program. +// +// mov rax, _CorExeMain(); +// jmp [rax] +// +// The code jumps to the imported function _CorExeMain using the iat. +// The address in the template is address of the iat entry which is +// fixed up by the loader when the image is paged in. +//***************************************************************************** + +const BYTE ExeMainAMD64Template[] = +{ + // Jump through IAT to _CorExeMain + 0x48, 0xA1, // rex.w rex.b mov rax,[following address] + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,//address of iat:_CorExeMain entry + 0xFF, 0xE0 // jmp [rax] +}; + +#define ExeMainAMD64TemplateSize sizeof(ExeMainAMD64Template) +#define CorExeMainAMD64IATOffset 2 + +//***************************************************************************** +// This stub is designed for a AMD64 Windows application. It will call the +// _CorDllMain function in mscoree.dll with with the base entry point for +// the loaded DLL. This entry point will in turn load and run the IL program. +// +// mov rax, _CorDllMain(); +// jmp [rax] +// +// The code jumps to the imported function _CorDllMain using the iat. +// The address in the template is address of the iat entry which is +// fixed up by the loader when the image is paged in. +//***************************************************************************** + +const BYTE DllMainAMD64Template[] = +{ + // Jump through IAT to CorDllMain + 0x48, 0xA1, // rex.w rex.b mov rax,[following address] + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,//address of iat:_CorDllMain entry + 0xFF, 0xE0 // jmp [rax] +}; + +#define DllMainAMD64TemplateSize sizeof(DllMainAMD64Template) +#define CorDllMainAMD64IATOffset 2 + +//***************************************************************************** +// This stub is designed for an ia64 Windows application. It will call the +// _CorExeMain function in mscoree.dll. This entry point will in turn load +// and run the IL program. +// +// jump _CorExeMain(); +// +// The code jumps to the imported function _CorExeMain using the iat. +// We set the value of gp to point at the iat table entry for _CorExeMain +//***************************************************************************** + +const BYTE ExeMainIA64Template[] = +{ + // ld8 r9 = [gp] ;; + // ld8 r10 = [r9],8 + // nop.i ;; + // ld8 gp = [r9] + // mov b6 = r10 + // br.cond.sptk.few b6 + // + 0x0B, 0x48, 0x00, 0x02, 0x18, 0x10, 0xA0, 0x40, + 0x24, 0x30, 0x28, 0x00, 0x00, 0x00, 0x04, 0x00, + 0x10, 0x08, 0x00, 0x12, 0x18, 0x10, 0x60, 0x50, + 0x04, 0x80, 0x03, 0x00, 0x60, 0x00, 0x80, 0x00 +}; + +#define ExeMainIA64TemplateSize sizeof(ExeMainIA64Template) + +//***************************************************************************** +// This stub is designed for an ia64 Windows application. It will call the +// _CorDllMain function in mscoree.dll with with the base entry point for +// the loaded DLL. This entry point will in turn load and run the IL program. +// +// jump _CorDllMain +// +// The code jumps to the imported function _CorExeMain using the iat. +// We set the value of gp to point at the iat table entry for _CorExeMain +//***************************************************************************** + +const BYTE DllMainIA64Template[] = +{ + // ld8 r9 = [gp] ;; + // ld8 r10 = [r9],8 + // nop.i ;; + // ld8 gp = [r9] + // mov b6 = r10 + // br.cond.sptk.few b6 + // + 0x0B, 0x48, 0x00, 0x02, 0x18, 0x10, 0xA0, 0x40, + 0x24, 0x30, 0x28, 0x00, 0x00, 0x00, 0x04, 0x00, + 0x10, 0x08, 0x00, 0x12, 0x18, 0x10, 0x60, 0x50, + 0x04, 0x80, 0x03, 0x00, 0x60, 0x00, 0x80, 0x00 +}; + +#define DllMainIA64TemplateSize sizeof(DllMainIA64Template) + #ifdef EMIT_FIXUPS // Emitted PEFIXUP structure looks like this diff --git a/src/coreclr/src/dlls/mscorpe/stubs.h b/src/coreclr/src/dlls/mscorpe/stubs.h deleted file mode 100644 index f0e7ce380df3..000000000000 --- a/src/coreclr/src/dlls/mscorpe/stubs.h +++ /dev/null @@ -1,168 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. -//***************************************************************************** -// Stubs.h -// -// This file contains a template for the default entry point stubs of a COM+ -// IL only program. One can emit these stubs (with some fix-ups) and make -// the code supplied the entry point value for the image. The fix-ups will -// in turn cause mscoree.dll to be loaded and the correct entry point to be -// called. -// -// Note: Although these stubs contain x86 specific code, they are used -// for all platforms -// -//***************************************************************************** -#ifndef __STUBS_H__ -#define __STUBS_H__ - -//***************************************************************************** -// This stub is designed for a x86 Windows application. It will call the -// _CorExeMain function in mscoree.dll. This entry point will in turn load -// and run the IL program. -// -// jump _CorExeMain(); -// -// The code jumps to the imported function _CorExeMain using the iat. -// The address in the template is address of the iat entry which is -// fixed up by the loader when the image is paged in. -//***************************************************************************** - -constexpr BYTE ExeMainX86Template[] = -{ - // Jump through IAT to _CorExeMain - 0xFF, 0x25, // jmp [iat:_CorDllMain entry] - 0x00, 0x00, 0x00, 0x00, // address to replace - -}; - -#define ExeMainX86TemplateSize sizeof(ExeMainX86Template) -#define CorExeMainX86IATOffset 2 - -//***************************************************************************** -// This stub is designed for a x86 Windows application. It will call the -// _CorDllMain function in mscoree.dll with with the base entry point for -// the loaded DLL. This entry point will in turn load and run the IL program. -// -// jump _CorDllMain -// -// The code jumps to the imported function _CorExeMain using the iat. -// The address in the template is address of the iat entry which is -// fixed up by the loader when the image is paged in. -//***************************************************************************** - -constexpr BYTE DllMainX86Template[] = -{ - // Jump through IAT to CorDllMain - 0xFF, 0x25, // jmp [iat:_CorDllMain entry] - 0x00, 0x00, 0x00, 0x00, // address to replace -}; - -#define DllMainX86TemplateSize sizeof(DllMainX86Template) -#define CorDllMainX86IATOffset 2 - -//***************************************************************************** -// This stub is designed for a AMD64 Windows application. It will call the -// _CorExeMain function in mscoree.dll. This entry point will in turn load -// and run the IL program. -// -// mov rax, _CorExeMain(); -// jmp [rax] -// -// The code jumps to the imported function _CorExeMain using the iat. -// The address in the template is address of the iat entry which is -// fixed up by the loader when the image is paged in. -//***************************************************************************** - -constexpr BYTE ExeMainAMD64Template[] = -{ - // Jump through IAT to _CorExeMain - 0x48, 0xA1, // rex.w rex.b mov rax,[following address] - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,//address of iat:_CorExeMain entry - 0xFF, 0xE0 // jmp [rax] -}; - -#define ExeMainAMD64TemplateSize sizeof(ExeMainAMD64Template) -#define CorExeMainAMD64IATOffset 2 - -//***************************************************************************** -// This stub is designed for a AMD64 Windows application. It will call the -// _CorDllMain function in mscoree.dll with with the base entry point for -// the loaded DLL. This entry point will in turn load and run the IL program. -// -// mov rax, _CorDllMain(); -// jmp [rax] -// -// The code jumps to the imported function _CorDllMain using the iat. -// The address in the template is address of the iat entry which is -// fixed up by the loader when the image is paged in. -//***************************************************************************** - -constexpr BYTE DllMainAMD64Template[] = -{ - // Jump through IAT to CorDllMain - 0x48, 0xA1, // rex.w rex.b mov rax,[following address] - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,//address of iat:_CorDllMain entry - 0xFF, 0xE0 // jmp [rax] -}; - -#define DllMainAMD64TemplateSize sizeof(DllMainAMD64Template) -#define CorDllMainAMD64IATOffset 2 - -//***************************************************************************** -// This stub is designed for an ia64 Windows application. It will call the -// _CorExeMain function in mscoree.dll. This entry point will in turn load -// and run the IL program. -// -// jump _CorExeMain(); -// -// The code jumps to the imported function _CorExeMain using the iat. -// We set the value of gp to point at the iat table entry for _CorExeMain -//***************************************************************************** - -constexpr BYTE ExeMainIA64Template[] = -{ - // ld8 r9 = [gp] ;; - // ld8 r10 = [r9],8 - // nop.i ;; - // ld8 gp = [r9] - // mov b6 = r10 - // br.cond.sptk.few b6 - // - 0x0B, 0x48, 0x00, 0x02, 0x18, 0x10, 0xA0, 0x40, - 0x24, 0x30, 0x28, 0x00, 0x00, 0x00, 0x04, 0x00, - 0x10, 0x08, 0x00, 0x12, 0x18, 0x10, 0x60, 0x50, - 0x04, 0x80, 0x03, 0x00, 0x60, 0x00, 0x80, 0x00 -}; - -#define ExeMainIA64TemplateSize sizeof(ExeMainIA64Template) - -//***************************************************************************** -// This stub is designed for an ia64 Windows application. It will call the -// _CorDllMain function in mscoree.dll with with the base entry point for -// the loaded DLL. This entry point will in turn load and run the IL program. -// -// jump _CorDllMain -// -// The code jumps to the imported function _CorExeMain using the iat. -// We set the value of gp to point at the iat table entry for _CorExeMain -//***************************************************************************** - -constexpr BYTE DllMainIA64Template[] = -{ - // ld8 r9 = [gp] ;; - // ld8 r10 = [r9],8 - // nop.i ;; - // ld8 gp = [r9] - // mov b6 = r10 - // br.cond.sptk.few b6 - // - 0x0B, 0x48, 0x00, 0x02, 0x18, 0x10, 0xA0, 0x40, - 0x24, 0x30, 0x28, 0x00, 0x00, 0x00, 0x04, 0x00, - 0x10, 0x08, 0x00, 0x12, 0x18, 0x10, 0x60, 0x50, - 0x04, 0x80, 0x03, 0x00, 0x60, 0x00, 0x80, 0x00 -}; - -#define DllMainIA64TemplateSize sizeof(DllMainIA64Template) - -#endif // __STUBS_H__ diff --git a/src/coreclr/src/dlls/mscorrc/CMakeLists.txt b/src/coreclr/src/dlls/mscorrc/CMakeLists.txt index ed5ee8876508..e114ec19cea6 100644 --- a/src/coreclr/src/dlls/mscorrc/CMakeLists.txt +++ b/src/coreclr/src/dlls/mscorrc/CMakeLists.txt @@ -19,7 +19,9 @@ if(CLR_CMAKE_HOST_WIN32) else() build_resources(${CMAKE_CURRENT_SOURCE_DIR}/include.rc mscorrc TARGET_CPP_FILE) - add_library_clr(mscorrc OBJECT + add_library_clr(mscorrc_obj OBJECT ${TARGET_CPP_FILE} ) + add_library(mscorrc INTERFACE) + target_sources(mscorrc INTERFACE $) endif(CLR_CMAKE_HOST_WIN32) diff --git a/src/coreclr/src/dlls/mscorrc/mscorrc.rc b/src/coreclr/src/dlls/mscorrc/mscorrc.rc index a50f742cef00..360442044ca4 100644 --- a/src/coreclr/src/dlls/mscorrc/mscorrc.rc +++ b/src/coreclr/src/dlls/mscorrc/mscorrc.rc @@ -182,6 +182,7 @@ BEGIN IDS_EE_NDIRECT_UNSUPPORTED_SIG "Method's type signature is not PInvoke compatible." IDS_EE_COM_UNSUPPORTED_SIG "Method's type signature is not Interop compatible." IDS_EE_COM_UNSUPPORTED_TYPE "The method returned a COM Variant type that is not Interop compatible." + IDS_EE_MULTIPLE_CALLCONV_UNSUPPORTED "Multiple unmanaged calling conventions are specified. Only a single calling convention is supported." IDS_EE_NDIRECT_BADNATL "Invalid PInvoke or UnmanagedFunctionPointer metadata format." IDS_EE_NDIRECT_BADNATL_CALLCONV "Invalid PInvoke or UnmanagedFunctionPointer calling convention." IDS_EE_NDIRECT_BADNATL_VARARGS_CALLCONV "Invalid PInvoke calling convention. Vararg functions must use the cdecl calling convention." diff --git a/src/coreclr/src/dlls/mscorrc/resource.h b/src/coreclr/src/dlls/mscorrc/resource.h index 738b751c7fae..b78c5d84c304 100644 --- a/src/coreclr/src/dlls/mscorrc/resource.h +++ b/src/coreclr/src/dlls/mscorrc/resource.h @@ -43,6 +43,7 @@ #define IDS_EE_COM_UNSUPPORTED_SIG 0x170d #define IDS_EE_NOSYNCHRONIZED 0x170f #define IDS_EE_NDIRECT_BADNATL_THISCALL 0x1710 +#define IDS_EE_MULTIPLE_CALLCONV_UNSUPPORTED 0x1711 #define IDS_EE_LOAD_BAD_MAIN_SIG 0x1712 #define IDS_EE_COM_UNSUPPORTED_TYPE 0x1713 diff --git a/src/coreclr/src/gc/CMakeLists.txt b/src/coreclr/src/gc/CMakeLists.txt index c46f46fdfbae..c68bbcefc347 100644 --- a/src/coreclr/src/gc/CMakeLists.txt +++ b/src/coreclr/src/gc/CMakeLists.txt @@ -1,11 +1,11 @@ set(CMAKE_INCLUDE_CURRENT_DIR ON) -# Local GC meta-issue: https://github.com/dotnet/coreclr/issues/11518 +# Local GC meta-issue: https://github.com/dotnet/runtime/issues/8061 -# https://github.com/dotnet/coreclr/issues/11516 +# https://github.com/dotnet/runtime/issues/8059 remove_definitions(-DSTRESS_HEAP) -# https://github.com/dotnet/coreclr/issues/11519 +# https://github.com/dotnet/runtime/issues/8062 remove_definitions(-DWRITE_BARRIER_CHECK) set( GC_SOURCES @@ -42,7 +42,7 @@ endif(CLR_CMAKE_HOST_UNIX) if (CLR_CMAKE_TARGET_ARCH_AMD64 AND CLR_CMAKE_TARGET_WIN32) set ( GC_SOURCES ${GC_SOURCES} - vxsort/isa_detection_dummy.cpp + vxsort/isa_detection.cpp vxsort/do_vxsort_avx2.cpp vxsort/do_vxsort_avx512.cpp vxsort/machine_traits.avx2.cpp @@ -50,6 +50,7 @@ if (CLR_CMAKE_TARGET_ARCH_AMD64 AND CLR_CMAKE_TARGET_WIN32) vxsort/smallsort/bitonic_sort.AVX2.int32_t.generated.cpp vxsort/smallsort/bitonic_sort.AVX512.int64_t.generated.cpp vxsort/smallsort/bitonic_sort.AVX512.int32_t.generated.cpp + vxsort/smallsort/avx2_load_mask_tables.cpp ) endif (CLR_CMAKE_TARGET_ARCH_AMD64 AND CLR_CMAKE_TARGET_WIN32) @@ -112,7 +113,7 @@ target_link_libraries(clrgc ${GC_LINK_LIBRARIES}) install_clr(TARGETS clrgc) if(CLR_CMAKE_HOST_UNIX) - # dprintf causes many warnings (https://github.com/dotnet/coreclr/issues/13367) + # dprintf causes many warnings (https://github.com/dotnet/runtime/issues/8737) add_compile_options(-Wno-format) endif(CLR_CMAKE_HOST_UNIX) diff --git a/src/coreclr/src/gc/env/gcenv.ee.h b/src/coreclr/src/gc/env/gcenv.ee.h index 986acabacbec..d596575ad2b4 100644 --- a/src/coreclr/src/gc/env/gcenv.ee.h +++ b/src/coreclr/src/gc/env/gcenv.ee.h @@ -86,7 +86,7 @@ class GCToEEInterface static uint32_t GetTotalNumSizedRefHandles(); static bool AnalyzeSurvivorsRequested(int condemnedGeneration); - static void AnalyzeSurvivorsFinished(int condemnedGeneration); + static void AnalyzeSurvivorsFinished(size_t gcIndex, int condemnedGeneration, uint64_t promoted_bytes, void (*reportGenerationBounds)()); static void VerifySyncTableEntry(); static void UpdateGCEventStatus(int publicLevel, int publicKeywords, int privateLevel, int privateKeywords); diff --git a/src/coreclr/src/gc/env/gcenv.os.h b/src/coreclr/src/gc/env/gcenv.os.h index 3dee37ad8e54..f8724fc8687a 100644 --- a/src/coreclr/src/gc/env/gcenv.os.h +++ b/src/coreclr/src/gc/env/gcenv.os.h @@ -290,7 +290,7 @@ class GCToOSInterface // size - size of the virtual memory range // Return: // Address of the allocated memory - static void* VirtualReserveAndCommitLargePages(size_t size); + static void* VirtualReserveAndCommitLargePages(size_t size, uint16_t node = NUMA_NODE_UNDEFINED); // Decomit virtual memory range. // Parameters: diff --git a/src/coreclr/src/gc/gc.cpp b/src/coreclr/src/gc/gc.cpp index 6bf8e51a6bce..a678a1b7fdc1 100644 --- a/src/coreclr/src/gc/gc.cpp +++ b/src/coreclr/src/gc/gc.cpp @@ -53,6 +53,9 @@ BOOL bgc_heap_walk_for_etw_p = FALSE; #define MAX_PTR ((uint8_t*)(~(ptrdiff_t)0)) #define commit_min_th (16*OS_PAGE_SIZE) +#define MIN_SOH_CROSS_GEN_REFS (400) +#define MIN_LOH_CROSS_GEN_REFS (800) + static size_t smoothed_desired_per_heap = 0; #ifdef SERVER_GC @@ -445,7 +448,7 @@ void log_va_msg(const char *fmt, va_list args) { gc_log_lock.Enter(); - const int BUFFERSIZE = 512; + const int BUFFERSIZE = 4096; static char rgchBuffer[BUFFERSIZE]; char * pBuffer = &rgchBuffer[0]; @@ -1984,6 +1987,8 @@ void stomp_write_barrier_resize(bool is_runtime_suspended, bool requires_upper_b void stomp_write_barrier_ephemeral(uint8_t* ephemeral_low, uint8_t* ephemeral_high) { + initGCShadow(); + WriteBarrierParameters args = {}; args.operation = WriteBarrierOp::StompEphemeral; args.is_runtime_suspended = true; @@ -2089,7 +2094,7 @@ void qsort1(uint8_t** low, uint8_t** high, unsigned int depth); #endif //USE_INTROSORT void* virtual_alloc (size_t size); -void* virtual_alloc (size_t size, bool use_large_pages_p); +void* virtual_alloc (size_t size, bool use_large_pages_p, uint16_t numa_node = NUMA_NODE_UNDEFINED); /* per heap static initialization */ #if defined(BACKGROUND_GC) && !defined(MULTIPLE_HEAPS) @@ -2182,6 +2187,8 @@ size_t gc_heap::current_total_committed_bookkeeping = 0; double gc_heap::short_plugs_pad_ratio = 0; #endif //SHORT_PLUGS +int gc_heap::generation_skip_ratio_threshold = 0; + uint64_t gc_heap::suspended_start_time = 0; uint64_t gc_heap::end_gc_time = 0; uint64_t gc_heap::total_suspended_time = 0; @@ -2271,6 +2278,12 @@ uint32_t gc_heap::fgn_maxgen_percent = 0; size_t gc_heap::fgn_last_alloc = 0; int gc_heap::generation_skip_ratio = 100; +#ifdef FEATURE_CARD_MARKING_STEALING +VOLATILE(size_t) gc_heap::n_eph_soh = 0; +VOLATILE(size_t) gc_heap::n_gen_soh = 0; +VOLATILE(size_t) gc_heap::n_eph_loh = 0; +VOLATILE(size_t) gc_heap::n_gen_loh = 0; +#endif //FEATURE_CARD_MARKING_STEALING uint64_t gc_heap::loh_alloc_since_cg = 0; @@ -3039,7 +3052,7 @@ gc_heap::dt_low_card_table_efficiency_p (gc_tuning_point tp) /* promote into max-generation if the card table has too many * generation faults besides the n -> 0 */ - ret = (generation_skip_ratio < 30); + ret = (generation_skip_ratio < generation_skip_ratio_threshold); break; } @@ -3914,6 +3927,14 @@ struct imemory_data uint8_t* memory_base; }; +struct numa_reserved_block +{ + uint8_t* memory_base; + size_t block_size; + + numa_reserved_block() : memory_base(nullptr), block_size(0) { } +}; + struct initial_memory_details { imemory_data *initial_memory; @@ -3935,7 +3956,8 @@ struct initial_memory_details ALLATONCE = 1, EACH_GENERATION, EACH_BLOCK, - ALLATONCE_SEPARATED_POH + ALLATONCE_SEPARATED_POH, + EACH_NUMA_NODE }; size_t allocation_pattern; @@ -3977,11 +3999,13 @@ struct initial_memory_details } }; + int numa_reserved_block_count; + numa_reserved_block* numa_reserved_block_table; }; initial_memory_details memory_details; -BOOL gc_heap::reserve_initial_memory (size_t normal_size, size_t large_size, size_t pinned_size, int num_heaps, bool use_large_pages_p, bool separated_poh_p) +BOOL gc_heap::reserve_initial_memory (size_t normal_size, size_t large_size, size_t pinned_size, int num_heaps, bool use_large_pages_p, bool separated_poh_p, uint16_t* heap_no_to_numa_node) { BOOL reserve_success = FALSE; @@ -4025,127 +4049,286 @@ BOOL gc_heap::reserve_initial_memory (size_t normal_size, size_t large_size, siz return FALSE; } - size_t temp_pinned_size = (separated_poh_p ? 0 : pinned_size); - size_t separate_pinned_size = memory_details.block_count * pinned_size; - size_t requestedMemory = memory_details.block_count * (normal_size + large_size + temp_pinned_size); - - uint8_t* allatonce_block = (uint8_t*)virtual_alloc (requestedMemory, use_large_pages_p); - uint8_t* separated_poh_block = nullptr; - if (allatonce_block && separated_poh_p) + // figure out number of NUMA nodes and allocate additional table for NUMA local reservation + memory_details.numa_reserved_block_count = 0; + memory_details.numa_reserved_block_table = nullptr; + int numa_node_count = 0; + if (heap_no_to_numa_node != nullptr) { - separated_poh_block = (uint8_t*)virtual_alloc (separate_pinned_size, false); - if (!separated_poh_block) + uint16_t highest_numa_node = 0; + + // figure out the highest NUMA node + for (int heap_no = 0; heap_no < num_heaps; heap_no++) { - virtual_free (allatonce_block, requestedMemory); - allatonce_block = nullptr; + uint16_t heap_numa_node = heap_no_to_numa_node[heap_no]; + highest_numa_node = max (highest_numa_node, heap_numa_node); } - } - if (allatonce_block) - { - if (separated_poh_p) + + assert (highest_numa_node < MAX_SUPPORTED_CPUS); + + numa_node_count = highest_numa_node + 1; + memory_details.numa_reserved_block_count = numa_node_count * (1 + separated_poh_p); + memory_details.numa_reserved_block_table = new (nothrow) numa_reserved_block[memory_details.numa_reserved_block_count]; + if (memory_details.numa_reserved_block_table == nullptr) { - g_gc_lowest_address = min (allatonce_block, separated_poh_block); - g_gc_highest_address = max ((allatonce_block + requestedMemory), (separated_poh_block + separate_pinned_size)); - memory_details.allocation_pattern = initial_memory_details::ALLATONCE_SEPARATED_POH; + // we couldn't get the memory - continue as if doing the non-NUMA case + dprintf(2, ("failed to reserve %Id bytes for numa_reserved_block data", memory_details.numa_reserved_block_count * sizeof(numa_reserved_block))); + memory_details.numa_reserved_block_count = 0; } - else + } + + if (memory_details.numa_reserved_block_table != nullptr) + { + // figure out how much to reserve on each NUMA node + // note this can be very different between NUMA nodes, depending on + // which processors our heaps are associated with + size_t merged_pinned_size = separated_poh_p ? 0 : pinned_size; + for (int heap_no = 0; heap_no < num_heaps; heap_no++) { - g_gc_lowest_address = allatonce_block; - g_gc_highest_address = allatonce_block + requestedMemory; - memory_details.allocation_pattern = initial_memory_details::ALLATONCE; + uint16_t heap_numa_node = heap_no_to_numa_node[heap_no]; + + numa_reserved_block * block = &memory_details.numa_reserved_block_table[heap_numa_node]; + + // add the size required for this heap + block->block_size += normal_size + large_size + merged_pinned_size; + + if (separated_poh_p) + { + numa_reserved_block* pinned_block = &memory_details.numa_reserved_block_table[numa_node_count + heap_numa_node]; + + // add the pinned size required for this heap + pinned_block->block_size += pinned_size; + } } - for (int i = 0; i < memory_details.block_count; i++) + // reserve the appropriate size on each NUMA node + bool failure = false; + for (int block_index = 0; block_index < memory_details.numa_reserved_block_count; block_index++) { - memory_details.initial_normal_heap[i].memory_base = allatonce_block + - (i * normal_size); - memory_details.initial_large_heap[i].memory_base = allatonce_block + - (memory_details.block_count * normal_size) + (i * large_size); - if (separated_poh_p) + numa_reserved_block * block = &memory_details.numa_reserved_block_table[block_index]; + + if (block->block_size == 0) + continue; + + int numa_node = block_index % numa_node_count; + bool pinned_block = block_index >= numa_node_count; + block->memory_base = (uint8_t*)virtual_alloc (block->block_size, use_large_pages_p && !pinned_block, numa_node); + if (block->memory_base == nullptr) { - memory_details.initial_pinned_heap[i].memory_base = separated_poh_block + - (i * pinned_size); + dprintf(2, ("failed to reserve %Id bytes for on NUMA node %u", block->block_size, numa_node)); + failure = true; + break; } else { - memory_details.initial_pinned_heap[i].memory_base = allatonce_block + - (memory_details.block_count * (normal_size + large_size)) + (i * pinned_size); + g_gc_lowest_address = min(g_gc_lowest_address, block->memory_base); + g_gc_highest_address = max(g_gc_highest_address, block->memory_base + block->block_size); } + } + if (failure) + { + // if we had any failures, undo the work done so far + // we will instead use one of the other allocation patterns + // we could try to use what we did succeed to reserve, but that gets complicated + for (int block_index = 0; block_index < memory_details.numa_reserved_block_count; block_index++) + { + numa_reserved_block * block = &memory_details.numa_reserved_block_table[block_index]; + + if (block->memory_base != nullptr) + { + virtual_free(block->memory_base, block->block_size); + block->memory_base = nullptr; + } + } + delete [] memory_details.numa_reserved_block_table; + memory_details.numa_reserved_block_table = nullptr; + memory_details.numa_reserved_block_count = 0; + } + else + { + // for each NUMA node, give out the memory to its heaps + for (uint16_t numa_node = 0; numa_node < numa_node_count; numa_node++) + { + numa_reserved_block * block = &memory_details.numa_reserved_block_table[numa_node]; + + numa_reserved_block* pinned_block = separated_poh_p ? &memory_details.numa_reserved_block_table[numa_node_count + numa_node] : nullptr; + + // if the block's size is 0, there can be no heaps on this NUMA node + if (block->block_size == 0) + { + assert((pinned_block == nullptr) || (pinned_block->block_size == 0)); + continue; + } + + uint8_t* memory_base = block->memory_base; + uint8_t* pinned_memory_base = ((pinned_block == nullptr) ? nullptr : pinned_block->memory_base); + for (int heap_no = 0; heap_no < num_heaps; heap_no++) + { + uint16_t heap_numa_node = heap_no_to_numa_node[heap_no]; + + if (heap_numa_node != numa_node) + { + // this heap is on another NUMA node + continue; + } + + memory_details.initial_normal_heap[heap_no].memory_base = memory_base; + memory_base += normal_size; + + memory_details.initial_large_heap[heap_no].memory_base = memory_base; + memory_base += large_size; + + if (separated_poh_p) + { + memory_details.initial_pinned_heap[heap_no].memory_base = pinned_memory_base; + pinned_memory_base += pinned_size; + } + else + { + memory_details.initial_pinned_heap[heap_no].memory_base = memory_base; + memory_base += pinned_size; + } + } + // sanity check - we should be at the end of the memory block for this NUMA node + assert (memory_base == block->memory_base + block->block_size); + assert ((pinned_block == nullptr) || (pinned_memory_base == pinned_block->memory_base + pinned_block->block_size)); + } + memory_details.allocation_pattern = initial_memory_details::EACH_NUMA_NODE; reserve_success = TRUE; } } - else + + if (!reserve_success) { - // try to allocate 3 blocks - uint8_t* b1 = (uint8_t*)virtual_alloc (memory_details.block_count * normal_size, use_large_pages_p); - uint8_t* b2 = (uint8_t*)virtual_alloc (memory_details.block_count * large_size, use_large_pages_p); - uint8_t* b3 = (uint8_t*)virtual_alloc (memory_details.block_count * pinned_size, use_large_pages_p && !separated_poh_p); + size_t temp_pinned_size = (separated_poh_p ? 0 : pinned_size); + size_t separate_pinned_size = memory_details.block_count * pinned_size; + size_t requestedMemory = memory_details.block_count * (normal_size + large_size + temp_pinned_size); - if (b1 && b2 && b3) + uint8_t* allatonce_block = (uint8_t*)virtual_alloc(requestedMemory, use_large_pages_p); + uint8_t* separated_poh_block = nullptr; + if (allatonce_block && separated_poh_p) + { + separated_poh_block = (uint8_t*)virtual_alloc(separate_pinned_size, false); + if (!separated_poh_block) + { + virtual_free(allatonce_block, requestedMemory); + allatonce_block = nullptr; + } + } + if (allatonce_block) { - memory_details.allocation_pattern = initial_memory_details::EACH_GENERATION; - g_gc_lowest_address = min (b1, min(b2, b3)); - g_gc_highest_address = max (b1 + memory_details.block_count * normal_size, - max (b2 + memory_details.block_count * large_size, - b3 + memory_details.block_count * pinned_size)); + if (separated_poh_p) + { + g_gc_lowest_address = min(allatonce_block, separated_poh_block); + g_gc_highest_address = max((allatonce_block + requestedMemory), (separated_poh_block + separate_pinned_size)); + memory_details.allocation_pattern = initial_memory_details::ALLATONCE_SEPARATED_POH; + } + else + { + g_gc_lowest_address = allatonce_block; + g_gc_highest_address = allatonce_block + requestedMemory; + memory_details.allocation_pattern = initial_memory_details::ALLATONCE; + } for (int i = 0; i < memory_details.block_count; i++) { - memory_details.initial_normal_heap[i].memory_base = b1 + (i * normal_size); - memory_details.initial_large_heap[i].memory_base = b2 + (i * large_size); - memory_details.initial_pinned_heap[i].memory_base = b3 + (i * pinned_size); + memory_details.initial_normal_heap[i].memory_base = allatonce_block + + (i * normal_size); + memory_details.initial_large_heap[i].memory_base = allatonce_block + + (memory_details.block_count * normal_size) + (i * large_size); + if (separated_poh_p) + { + memory_details.initial_pinned_heap[i].memory_base = separated_poh_block + + (i * pinned_size); + } + else + { + memory_details.initial_pinned_heap[i].memory_base = allatonce_block + + (memory_details.block_count * (normal_size + large_size)) + (i * pinned_size); + } } - reserve_success = TRUE; } else { - // allocation failed, we'll go on to try allocating each block. - // We could preserve the b1 alloc, but code complexity increases - if (b1) - virtual_free (b1, memory_details.block_count * normal_size); - if (b2) - virtual_free (b2, memory_details.block_count * large_size); - if (b3) - virtual_free (b3, memory_details.block_count * pinned_size); - } + // try to allocate 3 blocks + uint8_t* b1 = (uint8_t*)virtual_alloc(memory_details.block_count * normal_size, use_large_pages_p); + uint8_t* b2 = (uint8_t*)virtual_alloc(memory_details.block_count * large_size, use_large_pages_p); + uint8_t* b3 = (uint8_t*)virtual_alloc(memory_details.block_count * pinned_size, use_large_pages_p && !separated_poh_p); - if ((b2 == NULL) && (memory_details.block_count > 1)) - { - memory_details.allocation_pattern = initial_memory_details::EACH_BLOCK; + if (b1 && b2 && b3) + { + memory_details.allocation_pattern = initial_memory_details::EACH_GENERATION; + g_gc_lowest_address = min(b1, min(b2, b3)); + g_gc_highest_address = max(b1 + memory_details.block_count * normal_size, + max(b2 + memory_details.block_count * large_size, + b3 + memory_details.block_count * pinned_size)); - imemory_data* current_block = memory_details.initial_memory; - for (int i = 0; i < (memory_details.block_count * (total_generation_count - ephemeral_generation_count)); i++, current_block++) + for (int i = 0; i < memory_details.block_count; i++) + { + memory_details.initial_normal_heap[i].memory_base = b1 + (i * normal_size); + memory_details.initial_large_heap[i].memory_base = b2 + (i * large_size); + memory_details.initial_pinned_heap[i].memory_base = b3 + (i * pinned_size); + } + + reserve_success = TRUE; + } + else { - size_t block_size = memory_details.block_size (i); - current_block->memory_base = - (uint8_t*)virtual_alloc (block_size, use_large_pages_p); - if (current_block->memory_base == 0) - { - // Free the blocks that we've allocated so far - current_block = memory_details.initial_memory; - for (int j = 0; j < i; j++, current_block++) { - if (current_block->memory_base != 0) { - block_size = memory_details.block_size (i); - virtual_free (current_block->memory_base, block_size); + // allocation failed, we'll go on to try allocating each block. + // We could preserve the b1 alloc, but code complexity increases + if (b1) + virtual_free(b1, memory_details.block_count * normal_size); + if (b2) + virtual_free(b2, memory_details.block_count * large_size); + if (b3) + virtual_free(b3, memory_details.block_count * pinned_size); + } + + if ((b2 == NULL) && (memory_details.block_count > 1)) + { + memory_details.allocation_pattern = initial_memory_details::EACH_BLOCK; + + imemory_data* current_block = memory_details.initial_memory; + for (int i = 0; i < (memory_details.block_count * (total_generation_count - ephemeral_generation_count)); i++, current_block++) + { + size_t block_size = memory_details.block_size(i); + uint16_t numa_node = NUMA_NODE_UNDEFINED; + if (heap_no_to_numa_node != nullptr) + { + int heap_no = i % memory_details.block_count; + numa_node = heap_no_to_numa_node[heap_no]; + } + current_block->memory_base = + (uint8_t*)virtual_alloc(block_size, use_large_pages_p, numa_node); + if (current_block->memory_base == 0) + { + // Free the blocks that we've allocated so far + current_block = memory_details.initial_memory; + for (int j = 0; j < i; j++, current_block++) { + if (current_block->memory_base != 0) { + block_size = memory_details.block_size(i); + virtual_free(current_block->memory_base, block_size); + } } + reserve_success = FALSE; + break; } - reserve_success = FALSE; - break; - } - else - { - if (current_block->memory_base < g_gc_lowest_address) - g_gc_lowest_address = current_block->memory_base; - if (((uint8_t*)current_block->memory_base + block_size) > g_gc_highest_address) - g_gc_highest_address = (current_block->memory_base + block_size); + else + { + if (current_block->memory_base < g_gc_lowest_address) + g_gc_lowest_address = current_block->memory_base; + if (((uint8_t*)current_block->memory_base + block_size) > g_gc_highest_address) + g_gc_highest_address = (current_block->memory_base + block_size); + } + reserve_success = TRUE; } - reserve_success = TRUE; } } } + return reserve_success; } @@ -4153,36 +4336,37 @@ void gc_heap::destroy_initial_memory() { if (memory_details.initial_memory != NULL) { - if (memory_details.allocation_pattern == initial_memory_details::ALLATONCE) + switch (memory_details.allocation_pattern) { - virtual_free(memory_details.initial_memory[0].memory_base, + case initial_memory_details::ALLATONCE: + virtual_free (memory_details.initial_memory[0].memory_base, memory_details.block_count*(memory_details.block_size_normal + memory_details.block_size_large + memory_details.block_size_pinned)); - } - else if (memory_details.allocation_pattern == initial_memory_details::ALLATONCE_SEPARATED_POH) - { + break; + + case initial_memory_details::ALLATONCE_SEPARATED_POH: virtual_free(memory_details.initial_memory[0].memory_base, - memory_details.block_count*(memory_details.block_size_normal + - memory_details.block_size_large)); + memory_details.block_count * (memory_details.block_size_normal + + memory_details.block_size_large)); virtual_free(memory_details.initial_pinned_heap[0].memory_base, - memory_details.block_count*(memory_details.block_size_pinned)); - } - else if (memory_details.allocation_pattern == initial_memory_details::EACH_GENERATION) - { + memory_details.block_count * (memory_details.block_size_pinned)); + break; + + case initial_memory_details::EACH_GENERATION: virtual_free (memory_details.initial_normal_heap[0].memory_base, memory_details.block_count*memory_details.block_size_normal); virtual_free (memory_details.initial_large_heap[0].memory_base, memory_details.block_count*memory_details.block_size_large); - virtual_free (memory_details.initial_pinned_heap[0].memory_base, + virtual_free (memory_details.initial_pinned_heap[0].memory_base, memory_details.block_count*memory_details.block_size_pinned); - } - else + break; + + case initial_memory_details::EACH_BLOCK: { - assert (memory_details.allocation_pattern == initial_memory_details::EACH_BLOCK); - imemory_data *current_block = memory_details.initial_memory; - for (int i = 0; i < (memory_details.block_count*(total_generation_count - ephemeral_generation_count)); i++, current_block++) + imemory_data* current_block = memory_details.initial_memory; + for (int i = 0; i < (memory_details.block_count * (total_generation_count - ephemeral_generation_count)); i++, current_block++) { size_t block_size = memory_details.block_size (i); if (current_block->memory_base != NULL) @@ -4190,6 +4374,24 @@ void gc_heap::destroy_initial_memory() virtual_free (current_block->memory_base, block_size); } } + break; + } + case initial_memory_details::EACH_NUMA_NODE: + for (int block_index = 0; block_index < memory_details.numa_reserved_block_count; block_index++) + { + numa_reserved_block * block = &memory_details.numa_reserved_block_table[block_index]; + + if (block->memory_base != nullptr) + { + virtual_free (block->memory_base, block->block_size); + } + } + delete [] memory_details.numa_reserved_block_table; + break; + + default: + assert (!"unexpected allocation_pattern"); + break; } delete [] memory_details.initial_memory; @@ -4215,7 +4417,7 @@ void* virtual_alloc (size_t size) return virtual_alloc(size, false); } -void* virtual_alloc (size_t size, bool use_large_pages_p) +void* virtual_alloc (size_t size, bool use_large_pages_p, uint16_t numa_node) { size_t requested_size = size; @@ -4238,8 +4440,8 @@ void* virtual_alloc (size_t size, bool use_large_pages_p) #endif // !FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP void* prgmem = use_large_pages_p ? - GCToOSInterface::VirtualReserveAndCommitLargePages(requested_size) : - GCToOSInterface::VirtualReserve(requested_size, card_size * card_word_width, flags); + GCToOSInterface::VirtualReserveAndCommitLargePages(requested_size, numa_node) : + GCToOSInterface::VirtualReserve(requested_size, card_size * card_word_width, flags, numa_node); void *aligned_mem = prgmem; // We don't want (prgmem + size) to be right at the end of the address space @@ -4817,10 +5019,45 @@ class heap_select memset(sniff_buffer, 0, sniff_buf_size*sizeof(uint8_t)); } - //can not enable gc numa aware, force all heaps to be in - //one numa node by filling the array with all 0s - if (!GCToOSInterface::CanEnableGCNumaAware()) - memset(heap_no_to_numa_node, 0, sizeof (heap_no_to_numa_node)); + bool do_numa = GCToOSInterface::CanEnableGCNumaAware(); + + // we want to assign heap indices such that there is a contiguous + // range of heap numbers for each numa node + + // we do this in two passes: + // 1. gather processor numbers and numa node numbers for all heaps + // 2. assign heap numbers for each numa node + + // Pass 1: gather processor numbers and numa node numbers + uint16_t proc_no[MAX_SUPPORTED_CPUS]; + uint16_t node_no[MAX_SUPPORTED_CPUS]; + uint16_t max_node_no = 0; + for (int i = 0; i < n_heaps; i++) + { + if (!GCToOSInterface::GetProcessorForHeap (i, &proc_no[i], &node_no[i])) + break; + if (!do_numa || node_no[i] == NUMA_NODE_UNDEFINED) + node_no[i] = 0; + max_node_no = max(max_node_no, node_no[i]); + } + + // Pass 2: assign heap numbers by numa node + int cur_heap_no = 0; + for (uint16_t cur_node_no = 0; cur_node_no <= max_node_no; cur_node_no++) + { + for (int i = 0; i < n_heaps; i++) + { + if (node_no[i] != cur_node_no) + continue; + + // we found a heap on cur_node_no + heap_no_to_proc_no[cur_heap_no] = proc_no[i]; + heap_no_to_numa_node[cur_heap_no] = cur_node_no; + proc_no_to_numa_node[proc_no[i]] = cur_node_no; + + cur_heap_no++; + } + } return TRUE; } @@ -5021,6 +5258,9 @@ class heap_select uint16_t numa_node = heap_no_to_numa_node[hn]; *start = (int)numa_node_to_heap_map[numa_node]; *end = (int)(numa_node_to_heap_map[numa_node+1]); +#ifdef HEAP_BALANCE_INSTRUMENTATION + dprintf(HEAP_BALANCE_TEMP_LOG, ("TEMPget_heap_range: %d is in numa node %d, start = %d, end = %d", hn, numa_node, *start, *end)); +#endif //HEAP_BALANCE_INSTRUMENTATION } // This gets the next valid numa node index starting at current_index+1. @@ -5206,7 +5446,7 @@ void add_to_hb_numa ( (hb_info_proc->index)++; } -const int hb_log_buffer_size = 1024; +const int hb_log_buffer_size = 4096; static char hb_log_buffer[hb_log_buffer_size]; int last_hb_recorded_gc_index = -1; #endif //HEAP_BALANCE_INSTRUMENTATION @@ -5421,24 +5661,6 @@ void gc_heap::destroy_thread_support () } } -bool get_proc_and_numa_for_heap (int heap_number) -{ - uint16_t proc_no; - uint16_t node_no; - - bool res = GCToOSInterface::GetProcessorForHeap (heap_number, &proc_no, &node_no); - if (res) - { - heap_select::set_proc_no_for_heap (heap_number, proc_no); - if (node_no != NUMA_NODE_UNDEFINED) - { - heap_select::set_numa_node_for_heap_and_proc (heap_number, proc_no, node_no); - } - } - - return res; -} - void set_thread_affinity_for_heap (int heap_number, uint16_t proc_no) { if (!GCToOSInterface::SetThreadAffinity (proc_no)) @@ -5622,7 +5844,7 @@ bool gc_heap::virtual_commit (void* address, size_t size, gc_oh_num oh, int h_nu check_commit_cs.Enter(); bool exceeded_p = false; - if (heap_hard_limit_oh[0] != 0) + if (heap_hard_limit_oh[soh] != 0) { if ((oh != gc_oh_num::none) && (committed_by_oh[oh] + size) > heap_hard_limit_oh[oh]) { @@ -8282,44 +8504,16 @@ static void do_vxsort (uint8_t** item_array, ptrdiff_t item_count, uint8_t* rang if (IsSupportedInstructionSet (InstructionSet::AVX2) && (item_count > AVX2_THRESHOLD_SIZE)) { - // is the range small enough for a 32-bit sort? - // the 32-bit sort is almost twice as fast - ptrdiff_t range = range_high - range_low; - assert(sizeof(uint8_t*) == (1 << 3)); - ptrdiff_t scaled_range = range >> 3; - if ((uint32_t)scaled_range == scaled_range) - { - dprintf (3, ("Sorting mark lists as 32-bit offsets")); - - do_pack_avx2 (item_array, item_count, range_low); - - int32_t* item_array_32 = (int32_t*)item_array; - - // use AVX512F only if the list is large enough to pay for downclocking impact - if (IsSupportedInstructionSet (InstructionSet::AVX512F) && (item_count > AVX512F_THRESHOLD_SIZE)) - { - do_vxsort_avx512 (item_array_32, &item_array_32[item_count - 1]); - } - else - { - do_vxsort_avx2 (item_array_32, &item_array_32[item_count - 1]); - } + dprintf(3, ("Sorting mark lists")); - do_unpack_avx2 (item_array_32, item_count, range_low); + // use AVX512F only if the list is large enough to pay for downclocking impact + if (IsSupportedInstructionSet (InstructionSet::AVX512F) && (item_count > AVX512F_THRESHOLD_SIZE)) + { + do_vxsort_avx512 (item_array, &item_array[item_count - 1], range_low, range_high); } else { - dprintf(3, ("Sorting mark lists")); - - // use AVX512F only if the list is large enough to pay for downclocking impact - if (IsSupportedInstructionSet (InstructionSet::AVX512F) && (item_count > AVX512F_THRESHOLD_SIZE)) - { - do_vxsort_avx512 (item_array, &item_array[item_count - 1]); - } - else - { - do_vxsort_avx2 (item_array, &item_array[item_count - 1]); - } + do_vxsort_avx2 (item_array, &item_array[item_count - 1], range_low, range_high); } } else @@ -8347,6 +8541,8 @@ void gc_heap::sort_mark_list() { if (settings.condemned_generation >= max_generation) { + // fake a mark list overflow so merge_mark_lists knows to quit early + mark_list_index = mark_list_end + 1; return; } @@ -8397,7 +8593,7 @@ void gc_heap::sort_mark_list() high = max (high, heap_segment_allocated (hp->ephemeral_heap_segment)); } - // give up if this is not an ephemeral GC or the mark list size is unreasonably large + // give up if the mark list size is unreasonably large if (total_mark_list_size > (total_ephemeral_size / 256)) { mark_list_index = mark_list_end + 1; @@ -10250,8 +10446,13 @@ HRESULT gc_heap::initialize_gc (size_t soh_segment_size, reserved_memory = 0; size_t initial_heap_size = soh_segment_size + loh_segment_size + poh_segment_size; + uint16_t* heap_no_to_numa_node = nullptr; #ifdef MULTIPLE_HEAPS reserved_memory_limit = initial_heap_size * number_of_heaps; + if (!heap_select::init(number_of_heaps)) + return E_OUTOFMEMORY; + if (GCToOSInterface::CanEnableGCNumaAware()) + heap_no_to_numa_node = heap_select::heap_no_to_numa_node; #else //MULTIPLE_HEAPS reserved_memory_limit = initial_heap_size; int number_of_heaps = 1; @@ -10262,8 +10463,8 @@ HRESULT gc_heap::initialize_gc (size_t soh_segment_size, check_commit_cs.Initialize(); } - bool separated_poh_p = use_large_pages_p && heap_hard_limit_oh[0] && (GCConfig::GetGCHeapHardLimitPOH() == 0) && (GCConfig::GetGCHeapHardLimitPOHPercent() == 0); - if (!reserve_initial_memory (soh_segment_size, loh_segment_size, poh_segment_size, number_of_heaps, use_large_pages_p, separated_poh_p)) + bool separated_poh_p = use_large_pages_p && heap_hard_limit_oh[soh] && (GCConfig::GetGCHeapHardLimitPOH() == 0) && (GCConfig::GetGCHeapHardLimitPOHPercent() == 0); + if (!reserve_initial_memory (soh_segment_size, loh_segment_size, poh_segment_size, number_of_heaps, use_large_pages_p, separated_poh_p, heap_no_to_numa_node)) return E_OUTOFMEMORY; #ifdef CARD_BUNDLE @@ -10331,9 +10532,6 @@ HRESULT gc_heap::initialize_gc (size_t soh_segment_size, if (!create_thread_support (number_of_heaps)) return E_OUTOFMEMORY; - if (!heap_select::init (number_of_heaps)) - return E_OUTOFMEMORY; - #endif //MULTIPLE_HEAPS #ifdef MULTIPLE_HEAPS @@ -10562,6 +10760,8 @@ gc_heap::init_semi_shared() short_plugs_pad_ratio = (double)DESIRED_PLUG_LENGTH / (double)(DESIRED_PLUG_LENGTH - Align (min_obj_size)); #endif //SHORT_PLUGS + generation_skip_ratio_threshold = (int)GCConfig::GetGCLowSkipRatio(); + ret = 1; cleanup: @@ -10803,6 +11003,12 @@ gc_heap::init_gc_heap (int h_number) generation_skip_ratio = 100; +#ifdef FEATURE_CARD_MARKING_STEALING + n_eph_soh = 0; + n_gen_soh = 0; + n_eph_loh = 0; + n_gen_loh = 0; +#endif //FEATURE_CARD_MARKING_STEALING mark_stack_tos = 0; mark_stack_bos = 0; @@ -11030,7 +11236,6 @@ gc_heap::init_gc_heap (int h_number) } #ifdef MULTIPLE_HEAPS - get_proc_and_numa_for_heap (heap_number); if (!create_gc_thread ()) return 0; @@ -13860,7 +14065,7 @@ void gc_heap::balance_heaps (alloc_context* acontext) home_hp = acontext->get_home_heap ()->pGenGCHeap; proc_hp_num = heap_select::select_heap (acontext); - if (acontext->get_home_heap () != GCHeap::GetHeap (proc_hp_num)) + if (home_hp != gc_heap::g_heaps[proc_hp_num]) { #ifdef HEAP_BALANCE_INSTRUMENTATION alloc_count_p = false; @@ -13869,10 +14074,6 @@ void gc_heap::balance_heaps (alloc_context* acontext) } else if ((acontext->alloc_count & 15) == 0) set_home_heap = TRUE; - - if (set_home_heap) - { - } } else { @@ -13924,84 +14125,153 @@ void gc_heap::balance_heaps (alloc_context* acontext) return; } +#ifdef HEAP_BALANCE_INSTRUMENTATION + proc_no = GCToOSInterface::GetCurrentProcessorNumber (); + if (proc_no != last_proc_no) + { + dprintf (HEAP_BALANCE_TEMP_LOG, ("TEMPSP: %d->%d", last_proc_no, proc_no)); + multiple_procs_p = true; + last_proc_no = proc_no; + } + + int new_home_hp_num = heap_select::proc_no_to_heap_no[proc_no]; +#else + int new_home_hp_num = heap_select::select_heap(acontext); +#endif //HEAP_BALANCE_INSTRUMENTATION + gc_heap* new_home_hp = gc_heap::g_heaps[new_home_hp_num]; + acontext->set_home_heap (new_home_hp->vm_heap); + int start, end, finish; - heap_select::get_heap_range_for_heap (org_hp->heap_number, &start, &end); + heap_select::get_heap_range_for_heap (new_home_hp_num, &start, &end); finish = start + n_heaps; -try_again: - gc_heap* new_home_hp = 0; - do { max_hp = org_hp; max_hp_num = org_hp_num; max_size = org_size + delta; -#ifdef HEAP_BALANCE_INSTRUMENTATION - proc_no = GCToOSInterface::GetCurrentProcessorNumber (); - if (proc_no != last_proc_no) - { - dprintf (HEAP_BALANCE_TEMP_LOG, ("TEMPSP: %d->%d", last_proc_no, proc_no)); - multiple_procs_p = true; - last_proc_no = proc_no; - } - - int current_hp_num = heap_select::proc_no_to_heap_no[proc_no]; - acontext->set_home_heap (GCHeap::GetHeap (current_hp_num)); -#else - acontext->set_home_heap (GCHeap::GetHeap (heap_select::select_heap (acontext))); -#endif //HEAP_BALANCE_INSTRUMENTATION - new_home_hp = acontext->get_home_heap ()->pGenGCHeap; + org_alloc_context_count = org_hp->alloc_context_count; + max_alloc_context_count = org_alloc_context_count; if (org_hp == new_home_hp) max_size = max_size + delta; - org_alloc_context_count = org_hp->alloc_context_count; - max_alloc_context_count = org_alloc_context_count; if (max_alloc_context_count > 1) max_size /= max_alloc_context_count; - int actual_start = start; - int actual_end = (end - 1); - - for (int i = start; i < end; i++) + // check if the new home heap has more space + if (org_hp != new_home_hp) { - gc_heap* hp = GCHeap::GetHeap (i % n_heaps)->pGenGCHeap; - dd = hp->dynamic_data_of (0); - ptrdiff_t size = dd_new_allocation (dd); + dd = new_home_hp->dynamic_data_of(0); + ptrdiff_t size = dd_new_allocation(dd); - if (hp == new_home_hp) - { - size = size + delta; - } - int hp_alloc_context_count = hp->alloc_context_count; + // favor new home heap over org heap + size += delta * 2; + + int new_home_hp_alloc_context_count = new_home_hp->alloc_context_count; + if (new_home_hp_alloc_context_count > 0) + size /= (new_home_hp_alloc_context_count + 1); - if (hp_alloc_context_count > 0) - { - size /= (hp_alloc_context_count + 1); - } if (size > max_size) { #ifdef HEAP_BALANCE_INSTRUMENTATION - dprintf (HEAP_BALANCE_TEMP_LOG, ("TEMPorg h%d(%dmb), m h%d(%dmb)", + dprintf(HEAP_BALANCE_TEMP_LOG, ("TEMPorg h%d(%dmb), m h%d(%dmb)", org_hp_num, (max_size / 1024 / 1024), - hp->heap_number, (size / 1024 / 1024))); + new_home_hp_num, (size / 1024 / 1024))); #endif //HEAP_BALANCE_INSTRUMENTATION - max_hp = hp; + max_hp = new_home_hp; max_size = size; - max_hp_num = max_hp->heap_number; - max_alloc_context_count = hp_alloc_context_count; + max_hp_num = new_home_hp_num; + max_alloc_context_count = new_home_hp_alloc_context_count; } } - } - while (org_alloc_context_count != org_hp->alloc_context_count || - max_alloc_context_count != max_hp->alloc_context_count); - if ((max_hp == org_hp) && (end < finish)) - { - start = end; end = finish; - delta = local_delta * 2; // Make it twice as hard to balance to remote nodes on NUMA. - goto try_again; + // consider heaps both inside our local NUMA node, + // and outside, but with different thresholds + enum + { + LOCAL_NUMA_NODE, + REMOTE_NUMA_NODE + }; + + for (int pass = LOCAL_NUMA_NODE; pass <= REMOTE_NUMA_NODE; pass++) + { + int count = end - start; + int max_tries = min(count, 4); + + // we will consider max_tries consecutive (in a circular sense) + // other heaps from a semi random starting point + + // alloc_count often increases by multiples of 16 (due to logic at top of routine), + // and we want to advance the starting point by 4 between successive calls, + // therefore the shift right by 2 bits + int heap_num = start + ((acontext->alloc_count >> 2) + new_home_hp_num) % count; + +#ifdef HEAP_BALANCE_INSTRUMENTATION + dprintf(HEAP_BALANCE_TEMP_LOG, ("TEMP starting at h%d (home_heap_num = %d, alloc_count = %d)", heap_num, new_home_hp_num, acontext->alloc_count)); +#endif //HEAP_BALANCE_INSTRUMENTATION + + for (int tries = max_tries; --tries >= 0; heap_num++) + { + // wrap around if we hit the end of our range + if (heap_num >= end) + heap_num -= count; + // wrap around if we hit the end of the heap numbers + if (heap_num >= n_heaps) + heap_num -= n_heaps; + + assert (heap_num < n_heaps); + gc_heap* hp = gc_heap::g_heaps[heap_num]; + dd = hp->dynamic_data_of(0); + ptrdiff_t size = dd_new_allocation(dd); + +#ifdef HEAP_BALANCE_INSTRUMENTATION + dprintf(HEAP_BALANCE_TEMP_LOG, ("TEMP looking at h%d(%dmb)", + heap_num, (size / 1024 / 1024))); +#endif //HEAP_BALANCE_INSTRUMENTATION + // if the size is not bigger than what we already have, + // give up immediately, as it can't be a winner... + // this is a micro-optimization to avoid fetching the + // alloc_context_count and possibly dividing by it + if (size <= max_size) + continue; + + int hp_alloc_context_count = hp->alloc_context_count; + + if (hp_alloc_context_count > 0) + { + size /= (hp_alloc_context_count + 1); + } + + if (size > max_size) + { +#ifdef HEAP_BALANCE_INSTRUMENTATION + dprintf(HEAP_BALANCE_TEMP_LOG, ("TEMPorg h%d(%dmb), m h%d(%dmb)", + org_hp_num, (max_size / 1024 / 1024), + hp->heap_number, (size / 1024 / 1024))); +#endif //HEAP_BALANCE_INSTRUMENTATION + + max_hp = hp; + max_size = size; + max_hp_num = max_hp->heap_number; + max_alloc_context_count = hp_alloc_context_count; + } + } + + if ((max_hp == org_hp) && (end < finish)) + { + start = end; end = finish; + delta = local_delta * 2; // Make it twice as hard to balance to remote nodes on NUMA. + } + else + { + // we already found a better heap, or there are no remote NUMA nodes + break; + } + } } + while (org_alloc_context_count != org_hp->alloc_context_count || + max_alloc_context_count != max_hp->alloc_context_count); #ifdef HEAP_BALANCE_INSTRUMENTATION uint16_t ideal_proc_no_before_set_ideal = 0; @@ -18088,12 +18358,12 @@ uint8_t* gc_heap::find_object (uint8_t* interior) { // this is a pointer to a UOH object heap_segment* seg = find_segment (interior, FALSE); - if (seg + if (seg) + { #ifdef FEATURE_CONSERVATIVE_GC - && (GCConfig::GetConservativeGC() || interior <= heap_segment_allocated(seg)) + if (interior >= heap_segment_allocated(seg)) + return 0; #endif - ) - { // If interior falls within the first free object at the beginning of a generation, // we don't have brick entry for it, and we may incorrectly treat it as on large object heap. int align_const = get_alignment_constant (heap_segment_read_only_p (seg) @@ -20512,6 +20782,13 @@ void gc_heap::mark_phase (int condemned_gen_number, BOOL mark_only_p) if (!full_p) { +#ifdef FEATURE_CARD_MARKING_STEALING + n_eph_soh = 0; + n_gen_soh = 0; + n_eph_loh = 0; + n_gen_loh = 0; +#endif //FEATURE_CARD_MARKING_STEALING + #ifdef CARD_BUNDLE #ifdef MULTIPLE_HEAPS if (gc_t_join.r_join(this, gc_r_join_update_card_bundle)) @@ -20634,9 +20911,19 @@ void gc_heap::mark_phase (int condemned_gen_number, BOOL mark_only_p) if (gc_t_join.joined()) #endif //MULTIPLE_HEAPS { + uint64_t promoted_bytes_global = 0; #ifdef HEAP_ANALYZE heap_analyze_enabled = FALSE; - GCToEEInterface::AnalyzeSurvivorsFinished(condemned_gen_number); +#ifdef MULTIPLE_HEAPS + for (int i = 0; i < n_heaps; i++) + { + promoted_bytes_global += promoted_bytes (i); + } +#else + promoted_bytes_global = promoted_bytes (0); +#endif //MULTIPLE_HEAPS + + GCToEEInterface::AnalyzeSurvivorsFinished (settings.gc_index, condemned_gen_number, promoted_bytes_global, GCHeap::ReportGenerationBounds); #endif // HEAP_ANALYZE GCToEEInterface::AfterGcScanRoots (condemned_gen_number, max_generation, &sc); @@ -20654,6 +20941,18 @@ void gc_heap::mark_phase (int condemned_gen_number, BOOL mark_only_p) #ifdef FEATURE_CARD_MARKING_STEALING reset_card_marking_enumerators(); + + if (!full_p) + { + int generation_skip_ratio_soh = ((n_eph_soh > MIN_SOH_CROSS_GEN_REFS) ? + (int)(((float)n_gen_soh / (float)n_eph_soh) * 100) : 100); + int generation_skip_ratio_loh = ((n_eph_loh > MIN_LOH_CROSS_GEN_REFS) ? + (int)(((float)n_gen_loh / (float)n_eph_loh) * 100) : 100); + + generation_skip_ratio = min (generation_skip_ratio_soh, generation_skip_ratio_loh); + dprintf (2, ("h%d skip ratio soh: %d, loh: %d", heap_number, + generation_skip_ratio_soh, generation_skip_ratio_loh)); + } #endif // FEATURE_CARD_MARKING_STEALING // null out the target of short weakref that were not promoted. @@ -23347,7 +23646,8 @@ void gc_heap::plan_phase (int condemned_gen_number) } } - if (maxgen_size_inc_p && provisional_mode_triggered) + if (maxgen_size_inc_p && provisional_mode_triggered && + !(background_running_p() || (current_bgc_state == bgc_initialized))) { pm_trigger_full_gc = true; dprintf (GTC_LOG, ("in PM: maxgen size inc, doing a sweeping gen1 and trigger NGC2")); @@ -23458,7 +23758,8 @@ void gc_heap::plan_phase (int condemned_gen_number) rearrange_uoh_segments (); } - if (maxgen_size_inc_p && provisional_mode_triggered) + if (maxgen_size_inc_p && provisional_mode_triggered && + !(background_running_p() || (current_bgc_state == bgc_initialized))) { pm_trigger_full_gc = true; dprintf (GTC_LOG, ("in PM: maxgen size inc, doing a sweeping gen1 and trigger NGC2")); @@ -23495,7 +23796,8 @@ void gc_heap::plan_phase (int condemned_gen_number) if (!pm_trigger_full_gc && pm_stress_on && provisional_mode_triggered) { if ((settings.condemned_generation == (max_generation - 1)) && - ((settings.gc_index % 5) == 0)) + ((settings.gc_index % 5) == 0) && + !(background_running_p() || (current_bgc_state == bgc_initialized))) { pm_trigger_full_gc = true; } @@ -29468,7 +29770,6 @@ gc_heap::compute_next_boundary (int gen_number, assert (gen_number > settings.condemned_generation); return generation_allocation_start (generation_of (gen_number - 1 )); } - } inline void @@ -29543,7 +29844,6 @@ gc_heap::mark_through_cards_helper (uint8_t** poo, size_t& n_gen, cg_pointers_found ++; dprintf (4, ("cg pointer %Ix found, %Id so far", (size_t)*poo, cg_pointers_found )); - } } @@ -30011,9 +30311,20 @@ void gc_heap::mark_through_cards_for_segments (card_fn fn, BOOL relocating CARD_ // compute the efficiency ratio of the card table if (!relocating) { - generation_skip_ratio = ((n_eph > 400)? (int)(((float)n_gen / (float)n_eph) * 100) : 100); - dprintf (3, ("Msoh: cross: %Id, useful: %Id, cards set: %Id, cards cleared: %Id, ratio: %d", - n_eph, n_gen , n_card_set, total_cards_cleared, generation_skip_ratio)); +#ifdef FEATURE_CARD_MARKING_STEALING + Interlocked::ExchangeAddPtr(&n_eph_soh, n_eph); + Interlocked::ExchangeAddPtr(&n_gen_soh, n_gen); + dprintf (3, ("h%d marking h%d Msoh: cross: %Id, useful: %Id, cards set: %Id, cards cleared: %Id, ratio: %d", + hpt->heap_number, heap_number, n_eph, n_gen, n_card_set, total_cards_cleared, + (n_eph ? (int)(((float)n_gen / (float)n_eph) * 100) : 0))); + dprintf (3, ("h%d marking h%d Msoh: total cross %Id, useful: %Id, running ratio: %d", + hpt->heap_number, heap_number, n_eph_soh, n_gen_soh, + (n_eph_soh ? (int)(((float)n_gen_soh / (float)n_eph_soh) * 100) : 0))); +#else + generation_skip_ratio = ((n_eph > MIN_SOH_CROSS_GEN_REFS) ? (int)(((float)n_gen / (float)n_eph) * 100) : 100); + dprintf (3, ("marking h%d Msoh: cross: %Id, useful: %Id, cards set: %Id, cards cleared: %Id, ratio: %d", + heap_number, n_eph, n_gen, n_card_set, total_cards_cleared, generation_skip_ratio)); +#endif //FEATURE_CARD_MARKING_STEALING } else { @@ -34147,12 +34458,22 @@ void gc_heap::mark_through_cards_for_uoh_objects (card_fn fn, // compute the efficiency ratio of the card table if (!relocating) { - generation_skip_ratio = min (((n_eph > 800) ? - (int)(((float)n_gen / (float)n_eph) * 100) : 100), - generation_skip_ratio); - - dprintf (3, ("Mloh: cross: %Id, useful: %Id, cards cleared: %Id, cards set: %Id, ratio: %d", - n_eph, n_gen, total_cards_cleared, n_card_set, generation_skip_ratio)); +#ifdef FEATURE_CARD_MARKING_STEALING + Interlocked::ExchangeAddPtr(&n_eph_loh, n_eph); + Interlocked::ExchangeAddPtr(&n_gen_loh, n_gen); + dprintf (3, ("h%d marking h%d Mloh: cross: %Id, useful: %Id, cards set: %Id, cards cleared: %Id, ratio: %d", + hpt->heap_number, heap_number, n_eph, n_gen, n_card_set, total_cards_cleared, + (n_eph ? (int)(((float)n_gen / (float)n_eph) * 100) : 0))); + dprintf (3, ("h%d marking h%d Mloh: total cross %Id, useful: %Id, running ratio: %d", + hpt->heap_number, heap_number, n_eph_loh, n_gen_loh, + (n_eph_loh ? (int)(((float)n_gen_loh / (float)n_eph_loh) * 100) : 0))); +#else + generation_skip_ratio = min (((n_eph > MIN_LOH_CROSS_GEN_REFS) ? + (int)(((float)n_gen / (float)n_eph) * 100) : 100), + generation_skip_ratio); + dprintf (3, ("marking h%d Mloh: cross: %Id, useful: %Id, cards cleared: %Id, cards set: %Id, ratio: %d", + heap_number, n_eph, n_gen, total_cards_cleared, n_card_set, generation_skip_ratio)); +#endif //FEATURE_CARD_MARKING_STEALING } else { @@ -35437,25 +35758,21 @@ HRESULT GCHeap::Initialize() #ifdef HOST_64BIT gc_heap::heap_hard_limit = (size_t)GCConfig::GetGCHeapHardLimit(); - gc_heap::heap_hard_limit_oh[0] = (size_t)GCConfig::GetGCHeapHardLimitSOH(); - gc_heap::heap_hard_limit_oh[1] = (size_t)GCConfig::GetGCHeapHardLimitLOH(); - gc_heap::heap_hard_limit_oh[2] = (size_t)GCConfig::GetGCHeapHardLimitPOH(); + gc_heap::heap_hard_limit_oh[soh] = (size_t)GCConfig::GetGCHeapHardLimitSOH(); + gc_heap::heap_hard_limit_oh[loh] = (size_t)GCConfig::GetGCHeapHardLimitLOH(); + gc_heap::heap_hard_limit_oh[poh] = (size_t)GCConfig::GetGCHeapHardLimitPOH(); - if (gc_heap::heap_hard_limit_oh[0] || gc_heap::heap_hard_limit_oh[1] || gc_heap::heap_hard_limit_oh[2]) + if (gc_heap::heap_hard_limit_oh[soh] || gc_heap::heap_hard_limit_oh[loh] || gc_heap::heap_hard_limit_oh[poh]) { - if (!gc_heap::heap_hard_limit_oh[0]) + if (!gc_heap::heap_hard_limit_oh[soh]) { return E_INVALIDARG; } - if (!gc_heap::heap_hard_limit_oh[1]) + if (!gc_heap::heap_hard_limit_oh[loh]) { return E_INVALIDARG; } - if (gc_heap::heap_hard_limit_oh[2] < min_segment_size_hard_limit) - { - gc_heap::heap_hard_limit_oh[2] = min_segment_size_hard_limit; - } - gc_heap::heap_hard_limit = gc_heap::heap_hard_limit_oh[0] + gc_heap::heap_hard_limit_oh[1] + gc_heap::heap_hard_limit_oh[2]; + gc_heap::heap_hard_limit = gc_heap::heap_hard_limit_oh[soh] + gc_heap::heap_hard_limit_oh[loh] + gc_heap::heap_hard_limit_oh[poh]; } else { @@ -35480,17 +35797,10 @@ HRESULT GCHeap::Initialize() { return E_INVALIDARG; } - gc_heap::heap_hard_limit_oh[0] = (size_t)(gc_heap::total_physical_mem * (uint64_t)percent_of_mem_soh / (uint64_t)100); - gc_heap::heap_hard_limit_oh[1] = (size_t)(gc_heap::total_physical_mem * (uint64_t)percent_of_mem_loh / (uint64_t)100); - if (percent_of_mem_poh == 0) - { - gc_heap::heap_hard_limit_oh[2] = min_segment_size_hard_limit; - } - else - { - gc_heap::heap_hard_limit_oh[2] = (size_t)(gc_heap::total_physical_mem * (uint64_t)percent_of_mem_poh / (uint64_t)100); - } - gc_heap::heap_hard_limit = gc_heap::heap_hard_limit_oh[0] + gc_heap::heap_hard_limit_oh[1] + gc_heap::heap_hard_limit_oh[2]; + gc_heap::heap_hard_limit_oh[soh] = (size_t)(gc_heap::total_physical_mem * (uint64_t)percent_of_mem_soh / (uint64_t)100); + gc_heap::heap_hard_limit_oh[loh] = (size_t)(gc_heap::total_physical_mem * (uint64_t)percent_of_mem_loh / (uint64_t)100); + gc_heap::heap_hard_limit_oh[poh] = (size_t)(gc_heap::total_physical_mem * (uint64_t)percent_of_mem_poh / (uint64_t)100); + gc_heap::heap_hard_limit = gc_heap::heap_hard_limit_oh[soh] + gc_heap::heap_hard_limit_oh[loh] + gc_heap::heap_hard_limit_oh[poh]; } } @@ -35584,13 +35894,18 @@ HRESULT GCHeap::Initialize() if (gc_heap::heap_hard_limit) { gc_heap::use_large_pages_p = GCConfig::GetGCLargePages(); - if (gc_heap::heap_hard_limit_oh[0]) + if (gc_heap::heap_hard_limit_oh[soh]) { #ifdef MULTIPLE_HEAPS if (nhp_from_config == 0) { for (int i = 0; i < (total_oh_count - 1); i++) { + if (i == poh && gc_heap::heap_hard_limit_oh[poh] == 0) + { + // if size 0 was specified for POH, ignore it for the nhp computation + continue; + } uint32_t nhp_oh = (uint32_t)(gc_heap::heap_hard_limit_oh[i] / min_segment_size_hard_limit); nhp = min (nhp, nhp_oh); } @@ -35600,9 +35915,9 @@ HRESULT GCHeap::Initialize() } } #endif - seg_size = gc_heap::heap_hard_limit_oh[0] / nhp; - large_seg_size = gc_heap::heap_hard_limit_oh[1] / nhp; - pin_seg_size = gc_heap::heap_hard_limit_oh[2] / nhp; + seg_size = gc_heap::heap_hard_limit_oh[soh] / nhp; + large_seg_size = gc_heap::heap_hard_limit_oh[loh] / nhp; + pin_seg_size = (gc_heap::heap_hard_limit_oh[poh] != 0) ? (gc_heap::heap_hard_limit_oh[2] / nhp) : min_segment_size_hard_limit; size_t aligned_seg_size = align_on_segment_hard_limit (seg_size); size_t aligned_large_seg_size = align_on_segment_hard_limit (large_seg_size); @@ -36237,6 +36552,13 @@ bool GCHeap::StressHeap(gc_alloc_context * context) unsigned sizeToNextObj = (unsigned)Align(size(str)); uint8_t* freeObj = ((uint8_t*) str) + sizeToNextObj - sizeOfNewObj; pGenGCHeap->make_unused_array (freeObj, sizeOfNewObj); + +#if !defined(TARGET_AMD64) && !defined(TARGET_X86) + // ensure that the write to the new free object is seen by + // background GC *before* the write to the string length below + MemoryBarrier(); +#endif + str->SetStringLength(str->GetStringLength() - (sizeOfNewObj / sizeof(WCHAR))); } else @@ -37094,11 +37416,6 @@ void gc_heap::update_recorded_gen_data (last_recorded_gc_info* gc_info) void gc_heap::do_post_gc() { - if (!settings.concurrent) - { - initGCShadow(); - } - #ifdef MULTIPLE_HEAPS gc_heap* hp = g_heaps[0]; #else @@ -38607,8 +38924,8 @@ void gc_heap::walk_heap_per_heap (walk_fn fn, void* context, int gen_number, BOO generation_allocation_start (gen)); uint8_t* end = heap_segment_allocated (seg); - BOOL small_object_segments = TRUE; - int align_const = get_alignment_constant (small_object_segments); + int align_const = get_alignment_constant (TRUE); + BOOL walk_pinned_object_heap = walk_large_object_heap_p; while (1) @@ -38623,20 +38940,25 @@ void gc_heap::walk_heap_per_heap (walk_fn fn, void* context, int gen_number, BOO } else { - if (small_object_segments && walk_large_object_heap_p) - + if (walk_large_object_heap_p) { - small_object_segments = FALSE; - align_const = get_alignment_constant (small_object_segments); + walk_large_object_heap_p = FALSE; seg = generation_start_segment (large_object_generation); - x = heap_segment_mem (seg); - end = heap_segment_allocated (seg); - continue; + } + else if (walk_pinned_object_heap) + { + walk_pinned_object_heap = FALSE; + seg = generation_start_segment (pinned_object_generation); } else { break; } + + align_const = get_alignment_constant (FALSE); + x = heap_segment_mem (seg); + end = heap_segment_allocated (seg); + continue; } } diff --git a/src/coreclr/src/gc/gcconfig.h b/src/coreclr/src/gc/gcconfig.h index 085562f56dd4..62ea34f3659e 100644 --- a/src/coreclr/src/gc/gcconfig.h +++ b/src/coreclr/src/gc/gcconfig.h @@ -75,7 +75,7 @@ class GCConfigStringHolder BOOL_CONFIG (LogEnabled, "GCLogEnabled", NULL, false, "Specifies if you want to turn on logging in GC") \ BOOL_CONFIG (ConfigLogEnabled, "GCConfigLogEnabled", NULL, false, "Specifies the name of the GC config log file") \ BOOL_CONFIG (GCNumaAware, "GCNumaAware", NULL, true, "Enables numa allocations in the GC") \ - BOOL_CONFIG (GCCpuGroup, "GCCpuGroup", NULL, false, "Enables CPU groups in the GC") \ + BOOL_CONFIG (GCCpuGroup, "GCCpuGroup", "System.GC.CpuGroup", false, "Enables CPU groups in the GC") \ BOOL_CONFIG (GCLargePages, "GCLargePages", "System.GC.LargePages", false, "Enables using Large Pages in the GC") \ INT_CONFIG (HeapVerifyLevel, "HeapVerify", NULL, HEAPVERIFY_NONE, "When set verifies the integrity of the managed heap on entry and exit of each GC") \ INT_CONFIG (LOHCompactionMode, "GCLOHCompact", NULL, 0, "Specifies the LOH compaction mode") \ @@ -83,7 +83,7 @@ class GCConfigStringHolder INT_CONFIG (BGCSpinCount, "BGCSpinCount", NULL, 140, "Specifies the bgc spin count") \ INT_CONFIG (BGCSpin, "BGCSpin", NULL, 2, "Specifies the bgc spin time") \ INT_CONFIG (HeapCount, "GCHeapCount", "System.GC.HeapCount", 0, "Specifies the number of server GC heaps") \ - INT_CONFIG (Gen0Size, "GCgen0size", NULL, 0, "Specifies the smallest gen0 size") \ + INT_CONFIG (Gen0Size, "GCgen0size", NULL, 0, "Specifies the smallest gen0 budget") \ INT_CONFIG (SegmentSize, "GCSegmentSize", NULL, 0, "Specifies the managed heap segment size") \ INT_CONFIG (LatencyMode, "GCLatencyMode", NULL, -1, "Specifies the GC latency mode - batch, interactive or low latency (note that the same " \ "thing can be specified via API which is the supported way") \ @@ -92,13 +92,14 @@ class GCConfigStringHolder INT_CONFIG (LogFileSize, "GCLogFileSize", NULL, 0, "Specifies the GC log file size") \ INT_CONFIG (CompactRatio, "GCCompactRatio", NULL, 0, "Specifies the ratio compacting GCs vs sweeping") \ INT_CONFIG (GCHeapAffinitizeMask, "GCHeapAffinitizeMask", "System.GC.HeapAffinitizeMask", 0, "Specifies processor mask for Server GC threads") \ - STRING_CONFIG(GCHeapAffinitizeRanges, "GCHeapAffinitizeRanges", NULL, "Specifies list of processors for Server GC threads. The format is a comma separated " \ + STRING_CONFIG(GCHeapAffinitizeRanges, "GCHeapAffinitizeRanges", "System.GC.HeapAffinitizeRanges", "Specifies list of processors for Server GC threads. The format is a comma separated " \ "list of processor numbers or ranges of processor numbers. On Windows, each entry is " \ "prefixed by the CPU group number. Example: Unix - 1,3,5,7-9,12, Windows - 0:1,1:7-9") \ - INT_CONFIG (GCHighMemPercent, "GCHighMemPercent", NULL, 0, "The percent for GC to consider as high memory") \ + INT_CONFIG (GCHighMemPercent, "GCHighMemPercent", "System.GC.HighMemoryPercent", 0, "The percent for GC to consider as high memory") \ INT_CONFIG (GCProvModeStress, "GCProvModeStress", NULL, 0, "Stress the provisional modes") \ INT_CONFIG (GCGen0MaxBudget, "GCGen0MaxBudget", NULL, 0, "Specifies the largest gen0 allocation budget") \ - INT_CONFIG (GCHeapHardLimit, "GCHeapHardLimit", NULL, 0, "Specifies a hard limit for the GC heap") \ + INT_CONFIG (GCLowSkipRatio, "GCLowSkipRatio", NULL, 30, "Specifies the low generation skip ratio") \ + INT_CONFIG (GCHeapHardLimit, "GCHeapHardLimit", "System.GC.HeapHardLimit", 0, "Specifies a hard limit for the GC heap") \ INT_CONFIG (GCHeapHardLimitPercent, "GCHeapHardLimitPercent", "System.GC.HeapHardLimitPercent", 0, "Specifies the GC heap usage as a percentage of the total memory") \ INT_CONFIG (GCTotalPhysicalMemory, "GCTotalPhysicalMemory", NULL, 0, "Specifies what the GC should consider to be total physical memory") \ STRING_CONFIG(LogFile, "GCLogFile", NULL, "Specifies the name of the GC log file") \ @@ -122,13 +123,13 @@ class GCConfigStringHolder INT_CONFIG (BGCFLEnableTBH, "BGCFLEnableTBH", NULL, 0, "Enables TBH") \ INT_CONFIG (BGCFLEnableFF, "BGCFLEnableFF", NULL, 0, "Enables FF") \ INT_CONFIG (BGCG2RatioStep, "BGCG2RatioStep", NULL, 5, "Ratio correction factor for ML loop") \ - INT_CONFIG (GCHeapHardLimitSOH, "GCHeapHardLimitSOH", NULL, 0, "Specifies a hard limit for the GC heap SOH") \ - INT_CONFIG (GCHeapHardLimitLOH, "GCHeapHardLimitLOH", NULL, 0, "Specifies a hard limit for the GC heap LOH") \ - INT_CONFIG (GCHeapHardLimitPOH, "GCHeapHardLimitPOH", NULL, 0, "Specifies a hard limit for the GC heap POH") \ - INT_CONFIG (GCHeapHardLimitSOHPercent, "GCHeapHardLimitSOHPercent", NULL, 0, "Specifies the GC heap SOH usage as a percentage of the total memory") \ - INT_CONFIG (GCHeapHardLimitLOHPercent, "GCHeapHardLimitLOHPercent", NULL, 0, "Specifies the GC heap LOH usage as a percentage of the total memory") \ - INT_CONFIG (GCHeapHardLimitPOHPercent, "GCHeapHardLimitPOHPercent", NULL, 0, "Specifies the GC heap POH usage as a percentage of the total memory") \ - INT_CONFIG (GCEnabledInstructionSets, "GCEnabledInstructionSets", NULL, -1, "Specifies whether GC can use AVX2 or AVX512F - 0 for neither, 1 for AVX2, 3 for AVX512F")\ + INT_CONFIG (GCHeapHardLimitSOH, "GCHeapHardLimitSOH", "System.GC.HeapHardLimitSOH", 0, "Specifies a hard limit for the GC heap SOH") \ + INT_CONFIG (GCHeapHardLimitLOH, "GCHeapHardLimitLOH", "System.GC.HeapHardLimitLOH", 0, "Specifies a hard limit for the GC heap LOH") \ + INT_CONFIG (GCHeapHardLimitPOH, "GCHeapHardLimitPOH", "System.GC.HeapHardLimitPOH", 0, "Specifies a hard limit for the GC heap POH") \ + INT_CONFIG (GCHeapHardLimitSOHPercent, "GCHeapHardLimitSOHPercent", "System.GC.HeapHardLimitSOHPercent", 0, "Specifies the GC heap SOH usage as a percentage of the total memory") \ + INT_CONFIG (GCHeapHardLimitLOHPercent, "GCHeapHardLimitLOHPercent", "System.GC.HeapHardLimitLOHPercent", 0, "Specifies the GC heap LOH usage as a percentage of the total memory") \ + INT_CONFIG (GCHeapHardLimitPOHPercent, "GCHeapHardLimitPOHPercent", "System.GC.HeapHardLimitPOHPercent", 0, "Specifies the GC heap POH usage as a percentage of the total memory") \ + INT_CONFIG (GCEnabledInstructionSets, "GCEnabledInstructionSets", NULL, -1, "Specifies whether GC can use AVX2 or AVX512F - 0 for neither, 1 for AVX2, 3 for AVX512F")\ // This class is responsible for retreiving configuration information // for how the GC should operate. diff --git a/src/coreclr/src/gc/gcee.cpp b/src/coreclr/src/gc/gcee.cpp index 2964b14190e9..4912f2e86691 100644 --- a/src/coreclr/src/gc/gcee.cpp +++ b/src/coreclr/src/gc/gcee.cpp @@ -53,6 +53,11 @@ void GCHeap::UpdatePreGCCounters() #endif // BACKGROUND_GC FIRE_EVENT(GCStart_V2, count, depth, reason, static_cast(type)); + ReportGenerationBounds(); +} + +void GCHeap::ReportGenerationBounds() +{ g_theGCHeap->DiagDescrGenerations([](void*, int generation, uint8_t* rangeStart, uint8_t* rangeEnd, uint8_t* rangeEndReserved) { uint64_t range = static_cast(rangeEnd - rangeStart); @@ -148,12 +153,7 @@ void GCHeap::UpdatePostGCCounters() #endif //FEATURE_EVENT_TRACE #ifdef FEATURE_EVENT_TRACE - g_theGCHeap->DiagDescrGenerations([](void*, int generation, uint8_t* rangeStart, uint8_t* rangeEnd, uint8_t* rangeEndReserved) - { - uint64_t range = static_cast(rangeEnd - rangeStart); - uint64_t rangeReserved = static_cast(rangeEndReserved - rangeStart); - FIRE_EVENT(GCGenerationRange, generation, rangeStart, range, rangeReserved); - }, nullptr); + ReportGenerationBounds(); FIRE_EVENT(GCEnd_V1, static_cast(pSettings->gc_index), condemned_gen); diff --git a/src/coreclr/src/gc/gcenv.ee.standalone.inl b/src/coreclr/src/gc/gcenv.ee.standalone.inl index 650812644b01..f14b327a31e1 100644 --- a/src/coreclr/src/gc/gcenv.ee.standalone.inl +++ b/src/coreclr/src/gc/gcenv.ee.standalone.inl @@ -274,10 +274,10 @@ inline bool GCToEEInterface::AnalyzeSurvivorsRequested(int condemnedGeneration) return g_theGCToCLR->AnalyzeSurvivorsRequested(condemnedGeneration); } -inline void GCToEEInterface::AnalyzeSurvivorsFinished(int condemnedGeneration) +inline void GCToEEInterface::AnalyzeSurvivorsFinished(size_t gcIndex, int condemnedGeneration, uint64_t promoted_bytes, void (*reportGenerationBounds)()) { assert(g_theGCToCLR != nullptr); - g_theGCToCLR->AnalyzeSurvivorsFinished(condemnedGeneration); + g_theGCToCLR->AnalyzeSurvivorsFinished(gcIndex, condemnedGeneration, promoted_bytes, reportGenerationBounds); } inline void GCToEEInterface::VerifySyncTableEntry() diff --git a/src/coreclr/src/gc/gcimpl.h b/src/coreclr/src/gc/gcimpl.h index d1f062efb44c..b1c8cb91a7b6 100644 --- a/src/coreclr/src/gc/gcimpl.h +++ b/src/coreclr/src/gc/gcimpl.h @@ -31,7 +31,7 @@ inline void deleteGCShadow() {} inline void checkGCWriteBarrier() {} #endif -void GCProfileWalkHeap(); +void GCProfileWalkHeap(bool etwOnly); class gc_heap; class CFinalize; @@ -57,7 +57,7 @@ class GCHeap : public IGCHeapInternal friend void EnterAllocLock(); friend void LeaveAllocLock(); friend void ProfScanRootsHelper(Object** object, ScanContext *pSC, uint32_t dwFlags); - friend void GCProfileWalkHeap(); + friend void GCProfileWalkHeap(bool etwOnly); public: //In order to keep gc.cpp cleaner, ugly EE specific code is relegated to methods. @@ -315,6 +315,8 @@ class GCHeap : public IGCHeapInternal size_t GetLastGCGenerationSize(int gen); virtual void Shutdown(); + + static void ReportGenerationBounds(); }; #endif // GCIMPL_H_ diff --git a/src/coreclr/src/gc/gcinterface.ee.h b/src/coreclr/src/gc/gcinterface.ee.h index 158da1867dbb..f61fdf5c5e65 100644 --- a/src/coreclr/src/gc/gcinterface.ee.h +++ b/src/coreclr/src/gc/gcinterface.ee.h @@ -414,7 +414,7 @@ class IGCToCLR { bool AnalyzeSurvivorsRequested(int condemnedGeneration) = 0; virtual - void AnalyzeSurvivorsFinished(int condemnedGeneration) = 0; + void AnalyzeSurvivorsFinished(size_t gcIndex, int condemnedGeneration, uint64_t promoted_bytes, void (*reportGenerationBounds)()) = 0; virtual void VerifySyncTableEntry() = 0; diff --git a/src/coreclr/src/gc/gcinterface.h b/src/coreclr/src/gc/gcinterface.h index 331f8e122108..bfe02b7db9da 100644 --- a/src/coreclr/src/gc/gcinterface.h +++ b/src/coreclr/src/gc/gcinterface.h @@ -396,7 +396,7 @@ typedef enum * They are currently used for EnC for adding new field members to existing instantiations under EnC modes where * the primary object is the original instantiation and the secondary represents the added field. * - * They are also used to implement the ConditionalWeakTable class in mscorlib.dll. If you want to use + * They are also used to implement the managed ConditionalWeakTable class. If you want to use * these from managed code, they are exposed to BCL through the managed DependentHandle class. * * @@ -587,7 +587,7 @@ class IGCHeap { /* =========================================================================== - BCL routines. These are routines that are directly exposed by mscorlib + BCL routines. These are routines that are directly exposed by CoreLib as a part of the `System.GC` class. These routines behave in the same manner as the functions on `System.GC`. =========================================================================== @@ -640,14 +640,14 @@ class IGCHeap { virtual int GetGcLatencyMode() = 0; // Sets the current GC latency mode. newLatencyMode has already been - // verified by mscorlib to be valid. + // verified by CoreLib to be valid. virtual int SetGcLatencyMode(int newLatencyMode) = 0; // Gets the current LOH compaction mode. virtual int GetLOHCompactionMode() = 0; // Sets the current LOH compaction mode. newLOHCompactionMode has - // already been verified by mscorlib to be valid. + // already been verified by CoreLib to be valid. virtual void SetLOHCompactionMode(int newLOHCompactionMode) = 0; // Registers for a full GC notification, raising a notification if the gen 2 or diff --git a/src/coreclr/src/gc/gcpriv.h b/src/coreclr/src/gc/gcpriv.h index 0606b57d7279..fb82ea41d77a 100644 --- a/src/coreclr/src/gc/gcpriv.h +++ b/src/coreclr/src/gc/gcpriv.h @@ -233,7 +233,7 @@ const int policy_expand = 2; #ifdef SIMPLE_DPRINTF void GCLog (const char *fmt, ... ); -#define dprintf(l,x) {if ((l <= 1) || (l == GTC_LOG)) {GCLog x;}} +#define dprintf(l,x) {if ((l == 1) || (l == GTC_LOG)) {GCLog x;}} #else //SIMPLE_DPRINTF // Nobody used the logging mechanism that used to be here. If we find ourselves // wanting to inspect GC logs on unmodified builds, we can use this define here @@ -1318,7 +1318,7 @@ class gc_heap protected: PER_HEAP_ISOLATED - BOOL reserve_initial_memory (size_t normal_size, size_t large_size, size_t pinned_size, int num_heaps, bool use_large_pages_p, bool separated_poh_p); + BOOL reserve_initial_memory (size_t normal_size, size_t large_size, size_t pinned_size, int num_heaps, bool use_large_pages_p, bool separated_poh_p, uint16_t* heap_no_to_numa_node); PER_HEAP_ISOLATED void destroy_initial_memory(); @@ -4039,6 +4039,20 @@ class gc_heap PER_HEAP int generation_skip_ratio;//in % +#ifdef FEATURE_CARD_MARKING_STEALING + PER_HEAP + VOLATILE(size_t) n_eph_soh; + PER_HEAP + VOLATILE(size_t) n_gen_soh; + PER_HEAP + VOLATILE(size_t) n_eph_loh; + PER_HEAP + VOLATILE(size_t) n_gen_loh; +#endif //FEATURE_CARD_MARKING_STEALING + + PER_HEAP_ISOLATED + int generation_skip_ratio_threshold; + PER_HEAP BOOL gen0_bricks_cleared; PER_HEAP diff --git a/src/coreclr/src/gc/sample/CMakeLists.txt b/src/coreclr/src/gc/sample/CMakeLists.txt index 40bb0b5dcd54..88dae6fc03d7 100644 --- a/src/coreclr/src/gc/sample/CMakeLists.txt +++ b/src/coreclr/src/gc/sample/CMakeLists.txt @@ -27,7 +27,7 @@ set(SOURCES if (CLR_CMAKE_TARGET_ARCH_AMD64 AND CLR_CMAKE_TARGET_WIN32) set ( SOURCES ${SOURCES} - ../vxsort/isa_detection_dummy.cpp + ../vxsort/isa_detection.cpp ../vxsort/do_vxsort_avx2.cpp ../vxsort/do_vxsort_avx512.cpp ../vxsort/machine_traits.avx2.cpp @@ -35,6 +35,7 @@ if (CLR_CMAKE_TARGET_ARCH_AMD64 AND CLR_CMAKE_TARGET_WIN32) ../vxsort/smallsort/bitonic_sort.AVX2.int32_t.generated.cpp ../vxsort/smallsort/bitonic_sort.AVX512.int64_t.generated.cpp ../vxsort/smallsort/bitonic_sort.AVX512.int32_t.generated.cpp + ../vxsort/smallsort/avx2_load_mask_tables.cpp ) endif (CLR_CMAKE_TARGET_ARCH_AMD64 AND CLR_CMAKE_TARGET_WIN32) diff --git a/src/coreclr/src/gc/sample/gcenv.ee.cpp b/src/coreclr/src/gc/sample/gcenv.ee.cpp index 4ed20f07786e..1480e62c23b4 100644 --- a/src/coreclr/src/gc/sample/gcenv.ee.cpp +++ b/src/coreclr/src/gc/sample/gcenv.ee.cpp @@ -339,7 +339,7 @@ inline bool GCToEEInterface::AnalyzeSurvivorsRequested(int condemnedGeneration) return false; } -inline void GCToEEInterface::AnalyzeSurvivorsFinished(int condemnedGeneration) +inline void GCToEEInterface::AnalyzeSurvivorsFinished(size_t gcIndex, int condemnedGeneration, uint64_t promoted_bytes, void (*reportGenerationBounds)()) { } diff --git a/src/coreclr/src/gc/unix/config.gc.h.in b/src/coreclr/src/gc/unix/config.gc.h.in index 954176f74a34..42b6429be80e 100644 --- a/src/coreclr/src/gc/unix/config.gc.h.in +++ b/src/coreclr/src/gc/unix/config.gc.h.in @@ -18,7 +18,8 @@ #cmakedefine01 HAVE_PTHREAD_CONDATTR_SETCLOCK #cmakedefine01 HAVE_MACH_ABSOLUTE_TIME #cmakedefine01 HAVE_SCHED_GETAFFINITY -#cmakedefine01 HAVE_PTHREAD_GETAFFINITY_NP +#cmakedefine01 HAVE_SCHED_SETAFFINITY +#cmakedefine01 HAVE_PTHREAD_SETAFFINITY_NP #cmakedefine01 HAVE_PTHREAD_NP_H #cmakedefine01 HAVE_CPUSET_T #cmakedefine01 HAVE__SC_AVPHYS_PAGES diff --git a/src/coreclr/src/gc/unix/configure.cmake b/src/coreclr/src/gc/unix/configure.cmake index cc7fb90265d8..6d190a8c4673 100644 --- a/src/coreclr/src/gc/unix/configure.cmake +++ b/src/coreclr/src/gc/unix/configure.cmake @@ -86,6 +86,7 @@ check_cxx_source_runs(" check_library_exists(c sched_getaffinity "" HAVE_SCHED_GETAFFINITY) +check_library_exists(c sched_setaffinity "" HAVE_SCHED_SETAFFINITY) check_library_exists(pthread pthread_create "" HAVE_LIBPTHREAD) if (HAVE_LIBPTHREAD) @@ -94,7 +95,7 @@ elseif (HAVE_PTHREAD_IN_LIBC) set(PTHREAD_LIBRARY c) endif() -check_library_exists(${PTHREAD_LIBRARY} pthread_getaffinity_np "" HAVE_PTHREAD_GETAFFINITY_NP) +check_library_exists(${PTHREAD_LIBRARY} pthread_setaffinity_np "" HAVE_PTHREAD_SETAFFINITY_NP) check_cxx_symbol_exists(_SC_PHYS_PAGES unistd.h HAVE__SC_PHYS_PAGES) check_cxx_symbol_exists(_SC_AVPHYS_PAGES unistd.h HAVE__SC_AVPHYS_PAGES) diff --git a/src/coreclr/src/gc/unix/gcenv.unix.cpp b/src/coreclr/src/gc/unix/gcenv.unix.cpp index e7a122498699..8bcc98a3526a 100644 --- a/src/coreclr/src/gc/unix/gcenv.unix.cpp +++ b/src/coreclr/src/gc/unix/gcenv.unix.cpp @@ -646,7 +646,7 @@ bool GCToOSInterface::VirtualRelease(void* address, size_t size) // size - size of the virtual memory range // Return: // Starting virtual address of the committed range -void* GCToOSInterface::VirtualReserveAndCommitLargePages(size_t size) +void* GCToOSInterface::VirtualReserveAndCommitLargePages(size_t size, uint16_t node) { #if HAVE_MAP_HUGETLB uint32_t largePagesFlag = MAP_HUGETLB; @@ -657,7 +657,7 @@ void* GCToOSInterface::VirtualReserveAndCommitLargePages(size_t size) #endif void* pRetVal = VirtualReserveInner(size, OS_PAGE_SIZE, 0, largePagesFlag); - if (VirtualCommit(pRetVal, size, NUMA_NODE_UNDEFINED)) + if (VirtualCommit(pRetVal, size, node)) { return pRetVal; } @@ -985,19 +985,32 @@ size_t GCToOSInterface::GetCacheSizePerLogicalCpu(bool trueSize) // true if setting the affinity was successful, false otherwise. bool GCToOSInterface::SetThreadAffinity(uint16_t procNo) { -#if HAVE_PTHREAD_GETAFFINITY_NP +#if HAVE_SCHED_SETAFFINITY || HAVE_PTHREAD_SETAFFINITY_NP cpu_set_t cpuSet; CPU_ZERO(&cpuSet); CPU_SET((int)procNo, &cpuSet); + // Snap's default strict confinement does not allow sched_setaffinity(, ...) without manually connecting the + // process-control plug. sched_setaffinity(, ...) is also currently not allowed, only + // sched_setaffinity(0, ...). pthread_setaffinity_np(pthread_self(), ...) seems to call + // sched_setaffinity(, ...) in at least one implementation, and does not work. To work around those + // issues, use sched_setaffinity(0, ...) if available and only otherwise fall back to pthread_setaffinity_np(). See the + // following for more information: + // - https://github.com/dotnet/runtime/pull/38795 + // - https://github.com/dotnet/runtime/issues/1634 + // - https://forum.snapcraft.io/t/requesting-autoconnect-for-interfaces-in-pigmeat-process-control-home/17987/13 +#if HAVE_SCHED_SETAFFINITY + int st = sched_setaffinity(0, sizeof(cpu_set_t), &cpuSet); +#else int st = pthread_setaffinity_np(pthread_self(), sizeof(cpu_set_t), &cpuSet); +#endif return (st == 0); -#else // HAVE_PTHREAD_GETAFFINITY_NP +#else // !(HAVE_SCHED_SETAFFINITY || HAVE_PTHREAD_SETAFFINITY_NP) // There is no API to manage thread affinity, so let's ignore the request return false; -#endif // HAVE_PTHREAD_GETAFFINITY_NP +#endif // HAVE_SCHED_SETAFFINITY || HAVE_PTHREAD_SETAFFINITY_NP } // Boosts the calling thread's thread priority to a level higher than the default diff --git a/src/coreclr/src/gc/vxsort/alignment.h b/src/coreclr/src/gc/vxsort/alignment.h index df61c3a30f41..a32261be951f 100644 --- a/src/coreclr/src/gc/vxsort/alignment.h +++ b/src/coreclr/src/gc/vxsort/alignment.h @@ -4,8 +4,6 @@ #ifndef VXSORT_ALIGNNMENT_H #define VXSORT_ALIGNNMENT_H -//#include - namespace vxsort { using namespace std; diff --git a/src/coreclr/src/gc/vxsort/defs.h b/src/coreclr/src/gc/vxsort/defs.h index 628315e5110a..0cc72b23fa24 100644 --- a/src/coreclr/src/gc/vxsort/defs.h +++ b/src/coreclr/src/gc/vxsort/defs.h @@ -45,4 +45,45 @@ #define NOINLINE __attribute__((noinline)) #endif +namespace std { +template +class numeric_limits { + public: + static constexpr _Ty Max() { static_assert(sizeof(_Ty) != sizeof(_Ty), "func must be specialized!"); return _Ty(); } + static constexpr _Ty Min() { static_assert(sizeof(_Ty) != sizeof(_Ty), "func must be specialized!"); return _Ty(); } +}; + +template <> +class numeric_limits { +public: + static constexpr int32_t Max() { return 0x7fffffff; } + static constexpr int32_t Min() { return -0x7fffffff - 1; } +}; + +template <> +class numeric_limits { +public: + static constexpr uint32_t Max() { return 0xffffffff; } + static constexpr uint32_t Min() { return 0; } +}; + +template <> +class numeric_limits { + public: + static constexpr int64_t Max() { return 0x7fffffffffffffffi64; } + + static constexpr int64_t Min() { return -0x7fffffffffffffffi64 - 1; } +}; +} // namespace std + +#ifndef max +template +T max(T a, T b) { + if (a > b) + return a; + else + return b; +} +#endif + #endif // VXSORT_DEFS_H diff --git a/src/coreclr/src/gc/vxsort/do_vxsort.h b/src/coreclr/src/gc/vxsort/do_vxsort.h index 50a5e1ef77a7..edd803f310f4 100644 --- a/src/coreclr/src/gc/vxsort/do_vxsort.h +++ b/src/coreclr/src/gc/vxsort/do_vxsort.h @@ -11,14 +11,6 @@ enum class InstructionSet void InitSupportedInstructionSet (int32_t configSetting); bool IsSupportedInstructionSet (InstructionSet instructionSet); -void do_vxsort_avx2 (uint8_t** low, uint8_t** high); -void do_vxsort_avx2 (int32_t* low, int32_t* high); +void do_vxsort_avx2 (uint8_t** low, uint8_t** high, uint8_t *range_low, uint8_t *range_high); -void do_pack_avx2 (uint8_t** mem, size_t len, uint8_t* base); -void do_unpack_avx2 (int32_t* mem, size_t len, uint8_t* base); - -void do_vxsort_avx512 (uint8_t** low, uint8_t** high); -void do_vxsort_avx512 (int32_t* low, int32_t* high); - -void do_pack_avx512 (uint8_t** mem, size_t len, uint8_t* base); -void do_unpack_avx512 (int32_t* mem, size_t len, uint8_t* base); +void do_vxsort_avx512 (uint8_t** low, uint8_t** high, uint8_t* range_low, uint8_t* range_high); diff --git a/src/coreclr/src/gc/vxsort/do_vxsort_avx2.cpp b/src/coreclr/src/gc/vxsort/do_vxsort_avx2.cpp index 3e4fd10d15f4..1f097ede355d 100644 --- a/src/coreclr/src/gc/vxsort/do_vxsort_avx2.cpp +++ b/src/coreclr/src/gc/vxsort/do_vxsort_avx2.cpp @@ -5,82 +5,15 @@ #include "vxsort_targets_enable_avx2.h" -namespace std -{ - template - class numeric_limits - { - public: - static _Ty Max() - { - return _Ty(); - } - static _Ty Min() - { - return _Ty(); - } - }; - template <> - class numeric_limits - { - public: - static int32_t Max() - { - return 0x7fffffff; - } - static int32_t Min() - { - return -0x7fffffff-1; - } - }; - template <> - class numeric_limits - { - public: - static int64_t Max() - { - return 0x7fffffffffffffffi64; - } - - static int64_t Min() - { - return -0x7fffffffffffffffi64-1; - } - }; -} - -#ifndef max -template -T max (T a, T b) -{ - if (a > b) return a; else return b; -} -#endif #include "vxsort.h" #include "machine_traits.avx2.h" #include "packer.h" -void do_vxsort_avx2 (uint8_t** low, uint8_t** high) -{ - auto sorter = vxsort::vxsort(); - sorter.sort ((int64_t*)low, (int64_t*)high); -} - -void do_vxsort_avx2 (int32_t* low, int32_t* high) -{ - auto sorter = vxsort::vxsort(); - sorter.sort (low, high); -} - -void do_pack_avx2 (uint8_t** mem, size_t len, uint8_t* base) -{ - auto packer = vxsort::packer(); - packer.pack ((int64_t*)mem, len, (int64_t)base); -} - -void do_unpack_avx2 (int32_t* mem, size_t len, uint8_t* base) +void do_vxsort_avx2 (uint8_t** low, uint8_t** high, uint8_t* range_low, uint8_t* range_high) { - auto packer = vxsort::packer(); - packer.unpack (mem, len, (int64_t)base); + const int shift = 3; + assert((1 << shift) == sizeof(size_t)); + auto sorter = vxsort::vxsort(); + sorter.sort ((int64_t*)low, (int64_t*)high, (int64_t)range_low, (int64_t)(range_high+sizeof(uint8_t*))); } #include "vxsort_targets_disable.h" diff --git a/src/coreclr/src/gc/vxsort/do_vxsort_avx512.cpp b/src/coreclr/src/gc/vxsort/do_vxsort_avx512.cpp index aa0a8f99442e..792492046fd9 100644 --- a/src/coreclr/src/gc/vxsort/do_vxsort_avx512.cpp +++ b/src/coreclr/src/gc/vxsort/do_vxsort_avx512.cpp @@ -5,71 +5,14 @@ #include "vxsort_targets_enable_avx512.h" -namespace std -{ - template - class numeric_limits - { - public: - static _Ty Max() - { - return _Ty(); - } - static _Ty Min() - { - return _Ty(); - } - }; - template <> - class numeric_limits - { - public: - static int32_t Max() - { - return 0x7fffffff; - } - static int32_t Min() - { - return -0x7fffffff - 1; - } - }; - template <> - class numeric_limits - { - public: - static int64_t Max() - { - return 0x7fffffffffffffffi64; - } - - static int64_t Min() - { - return -0x7fffffffffffffffi64 - 1; - } - }; -} - -#ifndef max -template -T max (T a, T b) -{ - if (a > b) return a; else return b; -} -#endif - #include "vxsort.h" #include "machine_traits.avx512.h" -void do_vxsort_avx512 (uint8_t** low, uint8_t** high) +void do_vxsort_avx512 (uint8_t** low, uint8_t** high, uint8_t* range_low, uint8_t* range_high) { - auto sorter = vxsort::vxsort(); - sorter.sort ((int64_t*)low, (int64_t*)high); + const int shift = 3; + assert((1 << shift) == sizeof(size_t)); + auto sorter = vxsort::vxsort(); + sorter.sort ((int64_t*)low, (int64_t*)high, (int64_t)range_low, (int64_t)(range_high+sizeof(uint8_t*))); } - -void do_vxsort_avx512 (int32_t* low, int32_t* high) -{ - auto sorter = vxsort::vxsort(); - sorter.sort (low, high); -} - #include "vxsort_targets_disable.h" diff --git a/src/coreclr/src/gc/vxsort/isa_detection.cpp b/src/coreclr/src/gc/vxsort/isa_detection.cpp index ac469a615dde..2a60ea01207a 100644 --- a/src/coreclr/src/gc/vxsort/isa_detection.cpp +++ b/src/coreclr/src/gc/vxsort/isa_detection.cpp @@ -2,7 +2,11 @@ // The .NET Foundation licenses this file to you under the MIT license. #include "common.h" + +#ifdef TARGET_WINDOWS #include +#include +#endif #include "do_vxsort.h" @@ -17,13 +21,11 @@ enum class SupportedISA static DWORD64 GetEnabledXStateFeaturesHelper() { - LIMITED_METHOD_CONTRACT; - // On Windows we have an api(GetEnabledXStateFeatures) to check if AVX is supported typedef DWORD64(WINAPI* PGETENABLEDXSTATEFEATURES)(); PGETENABLEDXSTATEFEATURES pfnGetEnabledXStateFeatures = NULL; - HMODULE hMod = WszLoadLibraryEx(WINDOWS_KERNEL32_DLLNAME_W, NULL, LOAD_LIBRARY_SEARCH_SYSTEM32); + HMODULE hMod = LoadLibraryExW(L"kernel32.dll", NULL, LOAD_LIBRARY_SEARCH_SYSTEM32); if (hMod == NULL) return 0; @@ -54,10 +56,11 @@ SupportedISA DetermineSupportedISA() // bit definitions to make code more readable enum bits { - OCXSAVE = 1<<27, - AVX = 1<<28, - AVX2 = 1<<5, - AVX512F=1<<16, + OCXSAVE = 1<<27, + AVX = 1<<28, + AVX2 = 1<< 5, + AVX512F = 1<<16, + AVX512DQ = 1<<17, }; int reg[COUNT]; @@ -80,8 +83,8 @@ SupportedISA DetermineSupportedISA() // get processor extended feature flag info __cpuid(reg, 7); - // check if both AVX2 and AVX512F are supported by both processor and OS - if ((reg[EBX] & (AVX2 | AVX512F)) == (AVX2 | AVX512F) && + // check if all of AVX2, AVX512F and AVX512DQ are supported by both processor and OS + if ((reg[EBX] & (AVX2 | AVX512F | AVX512DQ)) == (AVX2 | AVX512F | AVX512DQ) && (xcr0 & 0xe6) == 0xe6 && (FeatureMask & (XSTATE_MASK_AVX | XSTATE_MASK_AVX512)) == (XSTATE_MASK_AVX | XSTATE_MASK_AVX512)) { diff --git a/src/coreclr/src/gc/vxsort/isa_detection_dummy.cpp b/src/coreclr/src/gc/vxsort/isa_detection_dummy.cpp deleted file mode 100644 index e277a7675a9a..000000000000 --- a/src/coreclr/src/gc/vxsort/isa_detection_dummy.cpp +++ /dev/null @@ -1,19 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. - -#include "common.h" - -#include "do_vxsort.h" - -#if defined(TARGET_AMD64) && defined(TARGET_WINDOWS) - -void InitSupportedInstructionSet (int32_t) -{ -} - -bool IsSupportedInstructionSet (InstructionSet) -{ - return false; -} -#endif // defined(TARGET_AMD64) && defined(TARGET_WINDOWS) - diff --git a/src/coreclr/src/gc/vxsort/machine_traits.avx2.cpp b/src/coreclr/src/gc/vxsort/machine_traits.avx2.cpp index d693d08ea414..e4e86d4b239c 100644 --- a/src/coreclr/src/gc/vxsort/machine_traits.avx2.cpp +++ b/src/coreclr/src/gc/vxsort/machine_traits.avx2.cpp @@ -2,8 +2,6 @@ // The .NET Foundation licenses this file to you under the MIT license. #include "common.h" -//#include - #include "machine_traits.avx2.h" namespace vxsort { diff --git a/src/coreclr/src/gc/vxsort/machine_traits.avx2.h b/src/coreclr/src/gc/vxsort/machine_traits.avx2.h index 1944b57a18f1..3720e78668e4 100644 --- a/src/coreclr/src/gc/vxsort/machine_traits.avx2.h +++ b/src/coreclr/src/gc/vxsort/machine_traits.avx2.h @@ -11,9 +11,8 @@ #include "vxsort_targets_enable_avx2.h" #include -//#include #include - +#include #include "defs.h" #include "machine_traits.h" @@ -37,16 +36,24 @@ static void not_supported() // in _DEBUG, we #define return to be something more complicated, // containing a statement, so #define away constexpr for _DEBUG #define constexpr -#endif //_DEBUG +#endif //_DEBUG template <> class vxsort_machine_traits { public: + typedef int32_t T; typedef __m256i TV; typedef uint32_t TMASK; + typedef int32_t TPACK; + typedef typename std::make_unsigned::type TU; static constexpr bool supports_compress_writes() { return false; } + static constexpr bool supports_packing() { return false; } + + template + static constexpr bool can_pack(T span) { return false; } + static INLINE TV load_vec(TV* p) { return _mm256_lddqu_si256(p); } static INLINE void store_vec(TV* ptr, TV v) { _mm256_storeu_si256(ptr, v); } @@ -56,7 +63,7 @@ class vxsort_machine_traits { static INLINE TV partition_vector(TV v, int mask) { assert(mask >= 0); assert(mask <= 255); - return s2i(_mm256_permutevar8x32_ps(i2s(v), _mm256_cvtepu8_epi32(_mm_loadu_si128((__m128i*)(perm_table_32 + mask * 8))))); + return s2i(_mm256_permutevar8x32_ps(i2s(v), _mm256_cvtepi8_epi32(_mm_loadu_si128((__m128i*)(perm_table_32 + mask * 8))))); } static INLINE TV broadcast(int32_t pivot) { return _mm256_set1_epi32(pivot); } @@ -67,82 +74,47 @@ class vxsort_machine_traits { static INLINE TV add(TV a, TV b) { return _mm256_add_epi32(a, b); } static INLINE TV sub(TV a, TV b) { return _mm256_sub_epi32(a, b); }; -}; - -template <> -class vxsort_machine_traits { - public: - typedef __m256i TV; - typedef uint32_t TMASK; - - static constexpr bool supports_compress_writes() { return false; } - - static INLINE TV load_vec(TV* p) { return _mm256_lddqu_si256(p); } - static INLINE void store_vec(TV* ptr, TV v) { _mm256_storeu_si256(ptr, v); } - - static void store_compress_vec(TV* ptr, TV v, TMASK mask) { not_supported(); } + static INLINE TV pack_ordered(TV a, TV b) { return a; } + static INLINE TV pack_unordered(TV a, TV b) { return a; } + static INLINE void unpack_ordered(TV p, TV& u1, TV& u2) { } - static INLINE TV partition_vector(TV v, int mask) { - assert(mask >= 0); - assert(mask <= 255); - return s2i(_mm256_permutevar8x32_ps(i2s(v), _mm256_cvtepu8_epi32(_mm_loadu_si128((__m128i*)(perm_table_32 + mask * 8))))); + template + static T shift_n_sub(T v, T sub) { + if (Shift > 0) + v >>= Shift; + v -= sub; + return v; } - static INLINE TV broadcast(uint32_t pivot) { return _mm256_set1_epi32(pivot); } - static INLINE TMASK get_cmpgt_mask(TV a, TV b) { - __m256i top_bit = _mm256_set1_epi32(1U << 31); - return _mm256_movemask_ps(i2s(_mm256_cmpgt_epi32(_mm256_xor_si256(top_bit, a), _mm256_xor_si256(top_bit, b)))); + template + static T unshift_and_add(TPACK from, T add) { + add += from; + if (Shift > 0) + add = (T) (((TU) add) << Shift); + return add; } - - static TV shift_right(TV v, int i) { return _mm256_srli_epi32(v, i); } - static TV shift_left(TV v, int i) { return _mm256_slli_epi32(v, i); } - - static INLINE TV add(TV a, TV b) { return _mm256_add_epi32(a, b); } - static INLINE TV sub(TV a, TV b) { return _mm256_sub_epi32(a, b); }; }; template <> -class vxsort_machine_traits { +class vxsort_machine_traits { public: - typedef __m256 TV; + typedef int64_t T; + typedef __m256i TV; typedef uint32_t TMASK; + typedef int32_t TPACK; + typedef typename std::make_unsigned::type TU; static constexpr bool supports_compress_writes() { return false; } - static INLINE TV load_vec(TV* p) { return _mm256_loadu_ps((float*)p); } + static constexpr bool supports_packing() { return true; } - static INLINE void store_vec(TV* ptr, TV v) { _mm256_storeu_ps((float*)ptr, v); } - - static void store_compress_vec(TV* ptr, TV v, TMASK mask) { not_supported(); } - - static INLINE TV partition_vector(TV v, int mask) { - assert(mask >= 0); - assert(mask <= 255); - return _mm256_permutevar8x32_ps(v, _mm256_cvtepu8_epi32(_mm_loadu_si128((__m128i*)(perm_table_32 + mask * 8)))); - } - - static INLINE TV broadcast(float pivot) { return _mm256_set1_ps(pivot); } - - static INLINE TMASK get_cmpgt_mask(TV a, TV b) { - /// 0x0E: Greater-than (ordered, signaling) \n - /// 0x1E: Greater-than (ordered, non-signaling) - return _mm256_movemask_ps(_mm256_cmp_ps(a, b, _CMP_GT_OS)); + template + static constexpr bool can_pack(T span) { + const auto PACK_LIMIT = (((TU) std::numeric_limits::Max() + 1)) << Shift; + return ((TU) span) < PACK_LIMIT; } - static INLINE TV add(TV a, TV b) { return _mm256_add_ps(a, b); } - static INLINE TV sub(TV a, TV b) { return _mm256_sub_ps(a, b); }; - -}; - -template <> -class vxsort_machine_traits { - public: - typedef __m256i TV; - typedef uint32_t TMASK; - - static constexpr bool supports_compress_writes() { return false; } - static INLINE TV load_vec(TV* p) { return _mm256_lddqu_si256(p); } static INLINE void store_vec(TV* ptr, TV v) { _mm256_storeu_si256(ptr, v); } @@ -164,8 +136,6 @@ class vxsort_machine_traits { static INLINE TV add(TV a, TV b) { return _mm256_add_epi64(a, b); } static INLINE TV sub(TV a, TV b) { return _mm256_sub_epi64(a, b); }; - - static INLINE TV pack_ordered(TV a, TV b) { a = _mm256_permute4x64_epi64(_mm256_shuffle_epi32(a, _MM_PERM_DBCA), _MM_PERM_DBCA); b = _mm256_permute4x64_epi64(_mm256_shuffle_epi32(b, _MM_PERM_DBCA), _MM_PERM_CADB); @@ -177,106 +147,31 @@ class vxsort_machine_traits { return _mm256_blend_epi32(a, b, 0b10101010); } - static INLINE void unpack_ordered_signed(TV p, TV& u1, TV& u2) { + static INLINE void unpack_ordered(TV p, TV& u1, TV& u2) { auto p01 = _mm256_extracti128_si256(p, 0); auto p02 = _mm256_extracti128_si256(p, 1); u1 = _mm256_cvtepi32_epi64(p01); u2 = _mm256_cvtepi32_epi64(p02); - - } - - static INLINE void unpack_ordered_unsigned(TV p, TV& u1, TV& u2) { - auto p01 = _mm256_extracti128_si256(p, 0); - auto p02 = _mm256_extracti128_si256(p, 1); - - u1 = _mm256_cvtepu32_epi64(p01); - u2 = _mm256_cvtepu32_epi64(p02); - } -/* - template<> - static INLINE TV pack_ordered(TV a, TV b) { - a = _mm256_permute4x64_epi64(_mm256_shuffle_epi32(a, _MM_PERM_DBCA), _MM_PERM_DBCA); - b = _mm256_permute4x64_epi64(_mm256_shuffle_epi32(b, _MM_PERM_DBCA), _MM_PERM_CADB); - return _mm256_blend_epi32(a, b, 0b11110000); + template + static T shift_n_sub(T v, T sub) { + if (Shift > 0) + v >>= Shift; + v -= sub; + return v; } - template<> - static INLINE typename vxsort_machine_traits::TV pack_unordered(TV a, TV b) { - b = _mm256_shuffle_epi32(b, _MM_PERM_CDAB); - return _mm256_blend_epi32(a, b, 0b10101010); + template + static T unshift_and_add(TPACK from, T add) { + add += from; + if (Shift > 0) + add = (T) (((TU) add) << Shift); + return add; } - - */ - - - }; -template <> -class vxsort_machine_traits { - public: - typedef __m256i TV; - typedef uint32_t TMASK; - - static constexpr bool supports_compress_writes() { return false; } - - static INLINE TV load_vec(TV* p) { return _mm256_lddqu_si256(p); } - - static INLINE void store_vec(TV* ptr, TV v) { _mm256_storeu_si256(ptr, v); } - - static void store_compress_vec(TV* ptr, TV v, TMASK mask) { not_supported(); } - - static INLINE TV partition_vector(TV v, int mask) { - assert(mask >= 0); - assert(mask <= 15); - return s2i(_mm256_permutevar8x32_ps(i2s(v), _mm256_cvtepu8_epi32(_mm_loadu_si128((__m128i*)(perm_table_64 + mask * 8))))); - } - static INLINE TV broadcast(int64_t pivot) { return _mm256_set1_epi64x(pivot); } - static INLINE TMASK get_cmpgt_mask(TV a, TV b) { - __m256i top_bit = _mm256_set1_epi64x(1LLU << 63); - return _mm256_movemask_pd(i2d(_mm256_cmpgt_epi64(_mm256_xor_si256(top_bit, a), _mm256_xor_si256(top_bit, b)))); - } - - static INLINE TV shift_right(TV v, int i) { return _mm256_srli_epi64(v, i); } - static INLINE TV shift_left(TV v, int i) { return _mm256_slli_epi64(v, i); } - - static INLINE TV add(TV a, TV b) { return _mm256_add_epi64(a, b); } - static INLINE TV sub(TV a, TV b) { return _mm256_sub_epi64(a, b); }; -}; - -template <> -class vxsort_machine_traits { - public: - typedef __m256d TV; - typedef uint32_t TMASK; - - static constexpr bool supports_compress_writes() { return false; } - - static INLINE TV load_vec(TV* p) { return _mm256_loadu_pd((double*)p); } - - static INLINE void store_vec(TV* ptr, TV v) { _mm256_storeu_pd((double*)ptr, v); } - - static void store_compress_vec(TV* ptr, TV v, TMASK mask) { not_supported(); } - - static INLINE TV partition_vector(TV v, int mask) { - assert(mask >= 0); - assert(mask <= 15); - return s2d(_mm256_permutevar8x32_ps(d2s(v), _mm256_cvtepu8_epi32(_mm_loadu_si128((__m128i*)(perm_table_64 + mask * 8))))); - } - - static INLINE TV broadcast(double pivot) { return _mm256_set1_pd(pivot); } - static INLINE TMASK get_cmpgt_mask(TV a, TV b) { - /// 0x0E: Greater-than (ordered, signaling) \n - /// 0x1E: Greater-than (ordered, non-signaling) - return _mm256_movemask_pd(_mm256_cmp_pd(a, b, _CMP_GT_OS)); - } - - static INLINE TV add(TV a, TV b) { return _mm256_add_pd(a, b); } - static INLINE TV sub(TV a, TV b) { return _mm256_sub_pd(a, b); }; -}; } diff --git a/src/coreclr/src/gc/vxsort/machine_traits.avx512.h b/src/coreclr/src/gc/vxsort/machine_traits.avx512.h index 443654a39b60..8df8660aa13a 100644 --- a/src/coreclr/src/gc/vxsort/machine_traits.avx512.h +++ b/src/coreclr/src/gc/vxsort/machine_traits.avx512.h @@ -18,205 +18,137 @@ // in _DEBUG, we #define return to be something more complicated, // containing a statement, so #define away constexpr for _DEBUG #define constexpr -#endif //_DEBUG +#endif //_DEBUG namespace vxsort { template <> class vxsort_machine_traits { - public: - typedef __m512i TV; - typedef __mmask16 TMASK; - - static constexpr bool supports_compress_writes() { return true; } + public: + typedef int32_t T; + typedef __m512i TV; + typedef __mmask16 TMASK; + typedef int32_t TPACK; + typedef typename std::make_unsigned::type TU; - static INLINE TV load_vec(TV* p) { - return _mm512_loadu_si512(p); - } + static constexpr bool supports_compress_writes() { return true; } - static INLINE void store_vec(TV* ptr, TV v) { - _mm512_storeu_si512(ptr, v); - } + static constexpr bool supports_packing() { return false; } - // Will never be called - static INLINE TV partition_vector(TV v, int mask) { return v; } + template + static constexpr bool can_pack(T span) { return false; } + static INLINE TV load_vec(TV* p) { return _mm512_loadu_si512(p); } - static void store_compress_vec(TV *ptr, TV v, TMASK mask) { - _mm512_mask_compressstoreu_epi32(ptr, mask, v); - } + static INLINE void store_vec(TV* ptr, TV v) { _mm512_storeu_si512(ptr, v); } - static INLINE TV broadcast(int32_t pivot) { - return _mm512_set1_epi32(pivot); - } + // Will never be called + static INLINE TV partition_vector(TV v, int mask) { return v; } - static INLINE TMASK get_cmpgt_mask(TV a, TV b) { - return _mm512_cmp_epi32_mask(a, b, _MM_CMPINT_GT); - } -}; + static void store_compress_vec(TV* ptr, TV v, TMASK mask) { _mm512_mask_compressstoreu_epi32(ptr, mask, v); } -template <> -class vxsort_machine_traits { - public: - typedef __m512i TV; - typedef __mmask16 TMASK; + static INLINE TV broadcast(int32_t pivot) { return _mm512_set1_epi32(pivot); } - static constexpr bool supports_compress_writes() { return true; } + static INLINE TMASK get_cmpgt_mask(TV a, TV b) { return _mm512_cmp_epi32_mask(a, b, _MM_CMPINT_GT); } - static INLINE TV load_vec(TV* p) { - return _mm512_loadu_si512(p); - } + static TV shift_right(TV v, int i) { return _mm512_srli_epi32(v, i); } + static TV shift_left(TV v, int i) { return _mm512_slli_epi32(v, i); } - static INLINE void store_vec(TV* ptr, TV v) { - _mm512_storeu_si512(ptr, v); - } + static INLINE TV add(TV a, TV b) { return _mm512_add_epi32(a, b); } + static INLINE TV sub(TV a, TV b) { return _mm512_sub_epi32(a, b); }; - // Will never be called - static INLINE TV partition_vector(TV v, int mask) { return v; } + static INLINE TV pack_ordered(TV a, TV b) { return a; } + static INLINE TV pack_unordered(TV a, TV b) { return a; } + static INLINE void unpack_ordered(TV p, TV& u1, TV& u2) { } + template + static T shift_n_sub(T v, T sub) { + if (Shift > 0) + v >>= Shift; + v -= sub; + return v; + } - static void store_compress_vec(TV *ptr, TV v, TMASK mask) { - _mm512_mask_compressstoreu_epi32(ptr, mask, v); - } - - static INLINE TV broadcast(uint32_t pivot) { - return _mm512_set1_epi32(pivot); - } - - static INLINE TMASK get_cmpgt_mask(TV a, TV b) { - return _mm512_cmp_epu32_mask(a, b, _MM_CMPINT_GT); - } -}; - -template <> -class vxsort_machine_traits { - public: - typedef __m512 TV; - typedef __mmask16 TMASK; - - static constexpr bool supports_compress_writes() { return true; } - - static INLINE TV load_vec(TV* p) { - return _mm512_loadu_ps(p); - } - - static INLINE void store_vec(TV* ptr, TV v) { - _mm512_storeu_ps(ptr, v); - } - - // Will never be called - static INLINE TV partition_vector(TV v, int mask) { return v; } - - - static void store_compress_vec(TV *ptr, TV v, TMASK mask) { - _mm512_mask_compressstoreu_ps(ptr, mask, v); - } - - static INLINE TV broadcast(float pivot) { - return _mm512_set1_ps(pivot); - } - - static INLINE TMASK get_cmpgt_mask(TV a, TV b) { - return _mm512_cmp_ps_mask(a, b, _CMP_GT_OS); - } + template + static T unshift_and_add(TPACK from, T add) { + add += from; + if (Shift > 0) + add = (T) (((TU) add) << Shift); + return add; + } }; template <> class vxsort_machine_traits { - public: - typedef __m512i TV; - typedef __mmask8 TMASK; - - static bool supports_compress_writes() { return true; } + public: + typedef int64_t T; + typedef __m512i TV; + typedef __mmask8 TMASK; + typedef int32_t TPACK; + typedef typename std::make_unsigned::type TU; - static INLINE TV load_vec(TV* p) { - return _mm512_loadu_si512(p); - } + static constexpr bool supports_compress_writes() { return true; } - static INLINE void store_vec(TV* ptr, TV v) { - _mm512_storeu_si512(ptr, v); - } + static constexpr bool supports_packing() { return true; } - // Will never be called - static INLINE TV partition_vector(TV v, int mask) { return v; } + template + static constexpr bool can_pack(T span) { + const auto PACK_LIMIT = (((TU) std::numeric_limits::Max() + 1)) << Shift; + return ((TU) span) < PACK_LIMIT; + } + static INLINE TV load_vec(TV* p) { return _mm512_loadu_si512(p); } - static void store_compress_vec(TV *ptr, TV v, TMASK mask) { - _mm512_mask_compressstoreu_epi64(ptr, mask, v); - } + static INLINE void store_vec(TV* ptr, TV v) { _mm512_storeu_si512(ptr, v); } - static INLINE TV broadcast(int64_t pivot) { - return _mm512_set1_epi64(pivot); - } + // Will never be called + static INLINE TV partition_vector(TV v, int mask) { return v; } - static INLINE TMASK get_cmpgt_mask(TV a, TV b) { - return _mm512_cmp_epi64_mask(a, b, _MM_CMPINT_GT); - } -}; - -template <> -class vxsort_machine_traits { - public: - typedef __m512i TV; - typedef __mmask8 TMASK; + static void store_compress_vec(TV* ptr, TV v, TMASK mask) { _mm512_mask_compressstoreu_epi64(ptr, mask, v); } - static constexpr bool supports_compress_writes() { return true; } + static INLINE TV broadcast(int64_t pivot) { return _mm512_set1_epi64(pivot); } - static INLINE TV load_vec(TV* p) { - return _mm512_loadu_si512(p); - } + static INLINE TMASK get_cmpgt_mask(TV a, TV b) { return _mm512_cmp_epi64_mask(a, b, _MM_CMPINT_GT); } - static INLINE void store_vec(TV* ptr, TV v) { - _mm512_storeu_si512(ptr, v); - } + static TV shift_right(TV v, int i) { return _mm512_srli_epi64(v, i); } + static TV shift_left(TV v, int i) { return _mm512_slli_epi64(v, i); } - // Will never be called - static INLINE TV partition_vector(TV v, int mask) { return v; } + static INLINE TV add(TV a, TV b) { return _mm512_add_epi64(a, b); } + static INLINE TV sub(TV a, TV b) { return _mm512_sub_epi64(a, b); }; + static INLINE TV pack_ordered(TV a, TV b) { + a = _mm512_permutex_epi64(_mm512_shuffle_epi32(a, _MM_PERM_DBCA), _MM_PERM_DBCA); + b = _mm512_permutex_epi64(_mm512_shuffle_epi32(b, _MM_PERM_DBCA), _MM_PERM_CADB); + return _mm512_shuffle_i64x2(a, b, _MM_PERM_DBCA); + } - static void store_compress_vec(TV *ptr, TV v, TMASK mask) { - _mm512_mask_compressstoreu_epi64(ptr, mask, v); - } - - static INLINE TV broadcast(uint64_t pivot) { - return _mm512_set1_epi64(pivot); - } - - static INLINE TMASK get_cmpgt_mask(TV a, TV b) { - return _mm512_cmp_epu64_mask(a, b, _MM_CMPINT_GT); - } -}; - -template <> -class vxsort_machine_traits { - public: - typedef __m512d TV; - typedef __mmask8 TMASK; - - static constexpr bool supports_compress_writes() { return true; } + static INLINE TV pack_unordered(TV a, TV b) { return _mm512_mask_shuffle_epi32(a, 0b1010101010101010, b, _MM_PERM_CDAB); } - static INLINE TV load_vec(TV* p) { - return _mm512_loadu_pd(p); - } + static INLINE void unpack_ordered(TV p, TV& u1, TV& u2) { + auto p01 = _mm512_extracti32x8_epi32(p, 0); + auto p02 = _mm512_extracti32x8_epi32(p, 1); - static INLINE void store_vec(TV* ptr, TV v) { - _mm512_storeu_pd(ptr, v); - } + u1 = _mm512_cvtepi32_epi64(p01); + u2 = _mm512_cvtepi32_epi64(p02); + } - // Will never be called - static INLINE TV partition_vector(TV v, int mask) { return v; } + template + static T shift_n_sub(T v, T sub) { + if (Shift > 0) + v >>= Shift; + v -= sub; + return v; + } + template + static T unshift_and_add(TPACK from, T add) { + add += from; - static void store_compress_vec(TV *ptr, TV v, TMASK mask) { - _mm512_mask_compressstoreu_pd(ptr, mask, v); - } + if (Shift > 0) + add = (T) (((TU) add) << Shift); - static INLINE TV broadcast(double pivot) { - return _mm512_set1_pd(pivot); - } + return add; + } - static INLINE TMASK get_cmpgt_mask(TV a, TV b) { - return _mm512_cmp_pd_mask(a, b, _CMP_GT_OS); - } }; } diff --git a/src/coreclr/src/gc/vxsort/machine_traits.h b/src/coreclr/src/gc/vxsort/machine_traits.h index cd31ed365777..7862d4b0d9ea 100644 --- a/src/coreclr/src/gc/vxsort/machine_traits.h +++ b/src/coreclr/src/gc/vxsort/machine_traits.h @@ -8,8 +8,6 @@ #ifndef VXSORT_MACHINE_TRAITS_H #define VXSORT_MACHINE_TRAITS_H -//#include - namespace vxsort { enum vector_machine { @@ -22,14 +20,35 @@ enum vector_machine { template struct vxsort_machine_traits { public: - typedef int TV; - typedef int TMASK; + typedef T TV; + typedef T TMASK; + typedef T TPACK; + + static constexpr bool supports_compress_writes() { + static_assert(sizeof(TV) != sizeof(TV), "func must be specialized!"); + return false; + } + + static constexpr bool supports_packing() { + static_assert(sizeof(TV) != sizeof(TV), "func must be specialized!"); + return false; + } - static constexpr bool supports_compress_writes(); + template + static constexpr bool can_pack(T span) { + static_assert(sizeof(TV) != sizeof(TV), "func must be specialized!"); + return false; + } - static TV load_vec(TV* ptr); - static void store_vec(TV* ptr, TV v); - static void store_compress_vec(TV* ptr, TV v, TMASK mask); + static TV load_vec(TV* ptr) { + static_assert(sizeof(TV) != sizeof(TV), "func must be specialized!"); + } + static void store_vec(TV* ptr, TV v) { + static_assert(sizeof(TV) != sizeof(TV), "func must be specialized!"); + } + static void store_compress_vec(TV* ptr, TV v, TMASK mask) { + static_assert(sizeof(TV) != sizeof(TV), "func must be specialized!"); + } static TV partition_vector(TV v, int mask); static TV broadcast(T pivot); static TMASK get_cmpgt_mask(TV a, TV b); @@ -43,11 +62,25 @@ struct vxsort_machine_traits { static TV pack_ordered(TV a, TV b); static TV pack_unordered(TV a, TV b); - static void unpack_ordered_signed(TV p, TV& u1, TV& u2); - static void unpack_ordered_unsigned(TV p, TV& u1, TV& u2); + static void unpack_ordered(TV p, TV& u1, TV& u2) { + static_assert(sizeof(TV) != sizeof(TV), "func must be specialized!"); + } + template + static T shift_n_sub(T v, T sub) { + static_assert(sizeof(TV) != sizeof(TV), "func must be specialized!"); + return v; + } + template + static T unshift_and_add(TPACK from, T add) { + static_assert(sizeof(TV) != sizeof(TV), "func must be specialized!"); + return add; + } }; + } + + #endif // VXSORT_MACHINE_TRAITS_H diff --git a/src/coreclr/src/gc/vxsort/packer.h b/src/coreclr/src/gc/vxsort/packer.h index 4c7257a58f17..be50b7d5fb41 100644 --- a/src/coreclr/src/gc/vxsort/packer.h +++ b/src/coreclr/src/gc/vxsort/packer.h @@ -4,196 +4,259 @@ #ifndef VXSORT_PACKER_H #define VXSORT_PACKER_H -#include "vxsort_targets_enable_avx2.h" - -//#include -//#include -//#include -#//include +#include "defs.h" #include "alignment.h" #include "machine_traits.h" -#include "machine_traits.avx2.h" -#include "machine_traits.avx512.h" #include -//#include namespace vxsort { -template +template class packer { - static_assert(Shift <= 31, "Shift must be in the range 0..31"); - using MT = vxsort_machine_traits; - typedef typename MT::TV TV; - typedef typename std::make_unsigned::type TU; - static const int N = sizeof(TV) / sizeof(TFrom); - typedef alignment_hint AH; - - static const size_t ALIGN = AH::ALIGN; - static const size_t ALIGN_MASK = ALIGN - 1; - - static INLINE void pack_scalar(const TFrom offset, TFrom*& mem_read, TTo*& mem_write) { - auto d = *(mem_read++); - if (Shift > 0) - d >>= Shift; - d -= offset; - *(mem_write++) = (TTo) d; - } - - static INLINE void unpack_scalar(const TFrom offset, TTo*& mem_read, TFrom*& mem_write) { - TFrom d = *(--mem_read); - - d += offset; - - if (Shift > 0) - d = (TFrom) (((TU) d) << Shift); - - *(--mem_write) = d; - } - - public: - - static void pack(TFrom *mem, size_t len, TFrom base) { - TFrom offset = (base >> Shift) - std::numeric_limits::Min(); - auto baseVec = MT::broadcast(offset); - - auto pre_aligned_mem = reinterpret_cast(reinterpret_cast(mem) & ~ALIGN_MASK); - - auto mem_read = mem; - auto mem_write = (TTo *) mem; - - // Include a "special" pass to handle very short scalar - // passes - if (MinLength < N && len < N) { - while (len--) { - pack_scalar(offset, mem_read, mem_write); - } - return; + static_assert(Shift <= 31, "Shift must be in the range 0..31"); + static_assert(Unroll >= 1, "Unroll can be in the range 1..4"); + static_assert(Unroll <= 4, "Unroll can be in the range 1..4"); + + using MT = vxsort_machine_traits; + typedef typename MT::TV TV; + static const int N = sizeof(TV) / sizeof(TFrom); + typedef alignment_hint AH; + + static const size_t ALIGN = AH::ALIGN; + static const size_t ALIGN_MASK = ALIGN - 1; + + + static INLINE TV pack_vectorized(const TV baseVec, TV d01, TV d02) { + if (Shift > 0) { // This is statically compiled in/out + d01 = MT::shift_right(d01, Shift); + d02 = MT::shift_right(d02, Shift); + } + d01 = MT::sub(d01, baseVec); + d02 = MT::sub(d02, baseVec); + + auto packed_data = RespectPackingOrder ? + MT::pack_ordered(d01, d02) : + MT::pack_unordered(d01, d02); + return packed_data; } - // We have at least - // one vector worth of data to handle - // Let's try to align to vector size first - - if (pre_aligned_mem < mem) { - const auto alignment_point = pre_aligned_mem + N; - len -= (alignment_point - mem_read); - while (mem_read < alignment_point) { - pack_scalar(offset, mem_read, mem_write); - } - } - - assert(AH::is_aligned(mem_read)); - - auto memv_read = (TV *) mem_read; - auto memv_write = (TV *) mem_write; + static NOINLINE void unpack_vectorized(const TV baseVec, TV d01, TV& u01, TV& u02) { + MT::unpack_ordered(d01, u01, u02); - auto lenv = len / N; - len -= (lenv * N); + u01 = MT::add(u01, baseVec); + u02 = MT::add(u02, baseVec); - while (lenv >= 2) { - assert(memv_read >= memv_write); - - auto d01 = MT::load_vec(memv_read); - auto d02 = MT::load_vec(memv_read + 1); - if (Shift > 0) { // This is statically compiled in/out - d01 = MT::shift_right(d01, Shift); - d02 = MT::shift_right(d02, Shift); - } - d01 = MT::sub(d01, baseVec); - d02 = MT::sub(d02, baseVec); - - auto packed_data = RespectPackingOrder ? - MT::pack_ordered(d01, d02) : - MT::pack_unordered(d01, d02); - - MT::store_vec(memv_write, packed_data); - - memv_read += 2; - memv_write++; - lenv -= 2; + if (Shift > 0) { // This is statically compiled in/out + u01 = MT::shift_left(u01, Shift); + u02 = MT::shift_left(u02, Shift); + } } - len += lenv * N; - - mem_read = (TFrom *) memv_read; - mem_write = (TTo *) memv_write; - - while (len-- > 0) { - pack_scalar(offset, mem_read, mem_write); - } - } - - static void unpack(TTo *mem, size_t len, TFrom base) { - TFrom offset = (base >> Shift) - std::numeric_limits::Min(); - auto baseVec = MT::broadcast(offset); - - auto mem_read = mem + len; - auto mem_write = ((TFrom *) mem) + len; - - - // Include a "special" pass to handle very short scalar - // passers - if (MinLength < 2*N && len < 2*N) { - while (len--) { - unpack_scalar(offset, mem_read, mem_write); - } - return; - } - - auto pre_aligned_mem = reinterpret_cast(reinterpret_cast(mem_read) & ~ALIGN_MASK); - - if (pre_aligned_mem < mem_read) { - len -= (mem_read - pre_aligned_mem); - while (mem_read > pre_aligned_mem) { - unpack_scalar(offset, mem_read, mem_write); - } - } - - assert(AH::is_aligned(mem_read)); - - auto lenv = len / (N*2); - auto memv_read = ((TV *) mem_read) - 1; - auto memv_write = ((TV *) mem_write) - 2; - len -= lenv * N * 2; - - while (lenv > 0) { - assert(memv_read <= memv_write); - TV d01, d02; - - if (std::numeric_limits::Min() < 0) - MT::unpack_ordered_signed(MT::load_vec(memv_read), d01, d02); - else - MT::unpack_ordered_unsigned(MT::load_vec(memv_read), d01, d02); - - d01 = MT::add(d01, baseVec); - d02 = MT::add(d02, baseVec); - - if (Shift > 0) { // This is statically compiled in/out - d01 = MT::shift_left(d01, Shift); - d02 = MT::shift_left(d02, Shift); - } - - MT::store_vec(memv_write, d01); - MT::store_vec(memv_write + 1, d02); - - memv_read -= 1; - memv_write -= 2; - lenv--; + public: + + static void pack(TFrom *mem, size_t len, TFrom base) { + TFrom offset = MT::template shift_n_sub(base, (TFrom) std::numeric_limits::Min()); + auto baseVec = MT::broadcast(offset); + + auto pre_aligned_mem = reinterpret_cast(reinterpret_cast(mem) & ~ALIGN_MASK); + + auto mem_read = mem; + auto mem_write = (TTo *) mem; + + // Include a "special" pass to handle very short scalar + // passes + if (MinLength < N && len < N) { + while (len--) { + *(mem_write++) = (TTo) MT::template shift_n_sub(*(mem_read++), offset); + } + return; + } + + // We have at least + // one vector worth of data to handle + // Let's try to align to vector size first + + if (pre_aligned_mem < mem) { + const auto alignment_point = pre_aligned_mem + N; + len -= (alignment_point - mem_read); + while (mem_read < alignment_point) { + *(mem_write++) = (TTo) MT::template shift_n_sub(*(mem_read++), offset); + } + } + + assert(AH::is_aligned(mem_read)); + + auto memv_read = (TV *) mem_read; + auto memv_write = (TV *) mem_write; + + auto lenv = len / N; + len -= (lenv * N); + + while (lenv >= 2 * Unroll) { + assert(memv_read >= memv_write); + + TV d01, d02, d03, d04, d05, d06, d07, d08; + + do { + d01 = MT::load_vec(memv_read + 0); + d02 = MT::load_vec(memv_read + 1); + if (Unroll == 1) break; + d03 = MT::load_vec(memv_read + 2); + d04 = MT::load_vec(memv_read + 3); + if (Unroll == 2) break; + d05 = MT::load_vec(memv_read + 4); + d06 = MT::load_vec(memv_read + 5); + if (Unroll == 3) break; + d07 = MT::load_vec(memv_read + 6); + d08 = MT::load_vec(memv_read + 7); + break; + } while (true); + + do { + MT::store_vec(memv_write + 0, pack_vectorized(baseVec, d01, d02)); + if (Unroll == 1) break; + MT::store_vec(memv_write + 1, pack_vectorized(baseVec, d03, d04)); + if (Unroll == 2) break; + MT::store_vec(memv_write + 2, pack_vectorized(baseVec, d05, d06)); + if (Unroll == 3) break; + MT::store_vec(memv_write + 3, pack_vectorized(baseVec, d07, d08)); + break; + } while(true); + + memv_read += 2*Unroll; + memv_write += Unroll; + lenv -= 2*Unroll; + } + + if (Unroll > 1) { + while (lenv >= 2) { + assert(memv_read >= memv_write); + TV d01, d02; + + d01 = MT::load_vec(memv_read + 0); + d02 = MT::load_vec(memv_read + 1); + + MT::store_vec(memv_write + 0, pack_vectorized(baseVec, d01, d02)); + memv_read += 2; + memv_write++; + lenv -= 2; + } + } + + len += lenv * N; + + mem_read = (TFrom *) memv_read; + mem_write = (TTo *) memv_write; + + while (len-- > 0) { + *(mem_write++) = (TTo) MT::template shift_n_sub(*(mem_read++), offset); + } } - mem_read = (TTo *) (memv_read + 1); - mem_write = (TFrom *) (memv_write + 2); - while (len-- > 0) { - unpack_scalar(offset, mem_read, mem_write); + static void unpack(TTo *mem, size_t len, TFrom base) { + TFrom offset = MT::template shift_n_sub(base, (TFrom) std::numeric_limits::Min()); + auto baseVec = MT::broadcast(offset); + + auto mem_read = mem + len; + auto mem_write = ((TFrom *) mem) + len; + + + // Include a "special" pass to handle very short scalar + // passers + if (MinLength < 2 * N && len < 2 * N) { + while (len--) { + *(--mem_write) = MT::template unshift_and_add(*(--mem_read), offset); + } + return; + } + + auto pre_aligned_mem = reinterpret_cast(reinterpret_cast(mem_read) & ~ALIGN_MASK); + + if (pre_aligned_mem < mem_read) { + len -= (mem_read - pre_aligned_mem); + while (mem_read > pre_aligned_mem) { + *(--mem_write) = MT::template unshift_and_add(*(--mem_read), offset); + } + } + + assert(AH::is_aligned(mem_read)); + + auto lenv = len / (N * 2); + auto memv_read = ((TV *) mem_read) - 1; + auto memv_write = ((TV *) mem_write) - 2; + len -= lenv * N * 2; + + while (lenv >= Unroll) { + assert(memv_read <= memv_write); + + TV d01, d02, d03, d04; + TV u01, u02, u03, u04, u05, u06, u07, u08; + + do { + d01 = MT::load_vec(memv_read + 0); + if (Unroll == 1) break; + d02 = MT::load_vec(memv_read - 1); + if (Unroll == 2) break; + d03 = MT::load_vec(memv_read - 2); + if (Unroll == 3) break; + d04 = MT::load_vec(memv_read - 3); + break; + } while(true); + + do { + unpack_vectorized(baseVec, d01, u01, u02); + MT::store_vec(memv_write + 0, u01); + MT::store_vec(memv_write + 1, u02); + if (Unroll == 1) break; + unpack_vectorized(baseVec, d02, u03, u04); + MT::store_vec(memv_write - 2, u03); + MT::store_vec(memv_write - 1, u04); + if (Unroll == 2) break; + unpack_vectorized(baseVec, d03, u05, u06); + MT::store_vec(memv_write - 4, u05); + MT::store_vec(memv_write - 3, u06); + if (Unroll == 3) break; + unpack_vectorized(baseVec, d04, u07, u08); + MT::store_vec(memv_write - 6, u07); + MT::store_vec(memv_write - 5, u08); + break; + } while(true); + + memv_read -= Unroll; + memv_write -= 2 * Unroll; + lenv -= Unroll; + } + + if (Unroll > 1) { + while (lenv >= 1) { + assert(memv_read <= memv_write); + + TV d01; + TV u01, u02; + + d01 = MT::load_vec(memv_read + 0); + + unpack_vectorized(baseVec, d01, u01, u02); + MT::store_vec(memv_write + 0, u01); + MT::store_vec(memv_write + 1, u02); + + memv_read--; + memv_write -= 2; + lenv--; + } + } + + mem_read = (TTo *) (memv_read + 1); + mem_write = (TFrom *) (memv_write + 2); + + while (len-- > 0) { + *(--mem_write) = MT::template unshift_and_add(*(--mem_read), offset); + } } - } }; - } -#include "vxsort_targets_disable.h" - #endif // VXSORT_PACKER_H diff --git a/src/coreclr/src/gc/vxsort/smallsort/avx2_load_mask_tables.cpp b/src/coreclr/src/gc/vxsort/smallsort/avx2_load_mask_tables.cpp new file mode 100644 index 000000000000..8f2f24f7d5c1 --- /dev/null +++ b/src/coreclr/src/gc/vxsort/smallsort/avx2_load_mask_tables.cpp @@ -0,0 +1,41 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#include "common.h" + +#include "bitonic_sort.h" + +namespace vxsort { + namespace smallsort { + + extern "C" alignas(16) const uint8_t mask_table_4[M4_SIZE] = { + 0xFF, 0xFF, 0xFF, 0xFF, // 0b0000 (0) + 0xFF, 0x00, 0x00, 0x00, // 0b0001 (1) + 0xFF, 0xFF, 0x00, 0x00, // 0b0011 (3) + 0xFF, 0xFF, 0xFF, 0x00, // 0b0111 (7) + #if defined(__has_feature) + #if __has_feature(address_sanitizer) + 0xCC, 0xCC, 0xCC, 0xCC, // Garbage to make ASAN happy + 0xCC, 0xCC, 0xCC, 0xCC, // Garbage to make ASAN happy + 0xCC, 0xCC, 0xCC, 0xCC, // Garbage to make ASAN happy + #endif + #endif + }; + + extern "C" alignas(128) const uint8_t mask_table_8[M8_SIZE] = { + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // 0b00000000 ( 0) + 0xFF, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 0b00000001 ( 1) + 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 0b00000011 ( 3) + 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0x00, // 0b00000111 ( 7) + 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, // 0b00001111 ( 15) + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, // 0b00011111 ( 31) + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, // 0b00111111 ( 63) + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, // 0b01111111 (127) + #if defined(__has_feature) + #if __has_feature(address_sanitizer) + 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, // Garbage to make ASAN happy + #endif + #endif + }; + } +} diff --git a/src/coreclr/src/gc/vxsort/smallsort/bitonic_sort.AVX2.int32_t.generated.cpp b/src/coreclr/src/gc/vxsort/smallsort/bitonic_sort.AVX2.int32_t.generated.cpp index 17ddcd815057..b72cecf4536c 100644 --- a/src/coreclr/src/gc/vxsort/smallsort/bitonic_sort.AVX2.int32_t.generated.cpp +++ b/src/coreclr/src/gc/vxsort/smallsort/bitonic_sort.AVX2.int32_t.generated.cpp @@ -7,24 +7,25 @@ using namespace vxsort; void vxsort::smallsort::bitonic::sort(int32_t *ptr, size_t length) { - const int N = 8; - - switch(length / N) { - case 1: sort_01v(ptr); break; - case 2: sort_02v(ptr); break; - case 3: sort_03v(ptr); break; - case 4: sort_04v(ptr); break; - case 5: sort_05v(ptr); break; - case 6: sort_06v(ptr); break; - case 7: sort_07v(ptr); break; - case 8: sort_08v(ptr); break; - case 9: sort_09v(ptr); break; - case 10: sort_10v(ptr); break; - case 11: sort_11v(ptr); break; - case 12: sort_12v(ptr); break; - case 13: sort_13v(ptr); break; - case 14: sort_14v(ptr); break; - case 15: sort_15v(ptr); break; - case 16: sort_16v(ptr); break; + const auto fullvlength = length / N; + const int remainder = (int) (length - fullvlength * N); + const auto v = fullvlength + ((remainder > 0) ? 1 : 0); + switch(v) { + case 1: sort_01v_alt(ptr, remainder); break; + case 2: sort_02v_alt(ptr, remainder); break; + case 3: sort_03v_alt(ptr, remainder); break; + case 4: sort_04v_alt(ptr, remainder); break; + case 5: sort_05v_alt(ptr, remainder); break; + case 6: sort_06v_alt(ptr, remainder); break; + case 7: sort_07v_alt(ptr, remainder); break; + case 8: sort_08v_alt(ptr, remainder); break; + case 9: sort_09v_alt(ptr, remainder); break; + case 10: sort_10v_alt(ptr, remainder); break; + case 11: sort_11v_alt(ptr, remainder); break; + case 12: sort_12v_alt(ptr, remainder); break; + case 13: sort_13v_alt(ptr, remainder); break; + case 14: sort_14v_alt(ptr, remainder); break; + case 15: sort_15v_alt(ptr, remainder); break; + case 16: sort_16v_alt(ptr, remainder); break; } } diff --git a/src/coreclr/src/gc/vxsort/smallsort/bitonic_sort.AVX2.int32_t.generated.h b/src/coreclr/src/gc/vxsort/smallsort/bitonic_sort.AVX2.int32_t.generated.h index 79bdbcc870d4..8557cf462033 100644 --- a/src/coreclr/src/gc/vxsort/smallsort/bitonic_sort.AVX2.int32_t.generated.h +++ b/src/coreclr/src/gc/vxsort/smallsort/bitonic_sort.AVX2.int32_t.generated.h @@ -3,7 +3,7 @@ ///////////////////////////////////////////////////////////////////////////// //// -// This file was auto-generated by a tool at 2020-06-22 05:27:48 +// This file was auto-generated by a tool at 2020-07-21 14:05:39 // // It is recommended you DO NOT directly edit this file but instead edit // the code-generator that generated this source file instead. @@ -33,7 +33,13 @@ namespace vxsort { namespace smallsort { + +extern "C" const uint8_t mask_table_4[16]; +extern "C" const uint8_t mask_table_8[64]; + template<> struct bitonic { + static const int N = 8; + static constexpr int32_t MAX = std::numeric_limits::Max(); public: static INLINE void sort_01v_ascending(__m256i& d01) { @@ -252,7 +258,7 @@ template<> struct bitonic { sort_02v_merge_descending(d01, d02); sort_01v_merge_descending(d03); } - static INLINE void sort_04v_ascending(__m256i& d01, __m256i& d02, __m256i& d03, __m256i& d04) { + static NOINLINE void sort_04v_ascending(__m256i& d01, __m256i& d02, __m256i& d03, __m256i& d04) { __m256i tmp; sort_02v_ascending(d01, d02); @@ -271,7 +277,7 @@ template<> struct bitonic { sort_02v_merge_ascending(d01, d02); sort_02v_merge_ascending(d03, d04); } - static INLINE void sort_04v_descending(__m256i& d01, __m256i& d02, __m256i& d03, __m256i& d04) { + static NOINLINE void sort_04v_descending(__m256i& d01, __m256i& d02, __m256i& d03, __m256i& d04) { __m256i tmp; sort_02v_descending(d01, d02); @@ -290,7 +296,7 @@ template<> struct bitonic { sort_02v_merge_descending(d01, d02); sort_02v_merge_descending(d03, d04); } - static INLINE void sort_04v_merge_ascending(__m256i& d01, __m256i& d02, __m256i& d03, __m256i& d04) { + static NOINLINE void sort_04v_merge_ascending(__m256i& d01, __m256i& d02, __m256i& d03, __m256i& d04) { __m256i tmp; tmp = d01; @@ -308,7 +314,7 @@ template<> struct bitonic { sort_02v_merge_ascending(d01, d02); sort_02v_merge_ascending(d03, d04); } - static INLINE void sort_04v_merge_descending(__m256i& d01, __m256i& d02, __m256i& d03, __m256i& d04) { + static NOINLINE void sort_04v_merge_descending(__m256i& d01, __m256i& d02, __m256i& d03, __m256i& d04) { __m256i tmp; tmp = d01; @@ -548,7 +554,7 @@ template<> struct bitonic { sort_04v_merge_descending(d01, d02, d03, d04); sort_03v_merge_descending(d05, d06, d07); } - static INLINE void sort_08v_ascending(__m256i& d01, __m256i& d02, __m256i& d03, __m256i& d04, __m256i& d05, __m256i& d06, __m256i& d07, __m256i& d08) { + static NOINLINE void sort_08v_ascending(__m256i& d01, __m256i& d02, __m256i& d03, __m256i& d04, __m256i& d05, __m256i& d06, __m256i& d07, __m256i& d08) { __m256i tmp; sort_04v_ascending(d01, d02, d03, d04); @@ -577,7 +583,7 @@ template<> struct bitonic { sort_04v_merge_ascending(d01, d02, d03, d04); sort_04v_merge_ascending(d05, d06, d07, d08); } - static INLINE void sort_08v_descending(__m256i& d01, __m256i& d02, __m256i& d03, __m256i& d04, __m256i& d05, __m256i& d06, __m256i& d07, __m256i& d08) { + static NOINLINE void sort_08v_descending(__m256i& d01, __m256i& d02, __m256i& d03, __m256i& d04, __m256i& d05, __m256i& d06, __m256i& d07, __m256i& d08) { __m256i tmp; sort_04v_descending(d01, d02, d03, d04); @@ -606,7 +612,7 @@ template<> struct bitonic { sort_04v_merge_descending(d01, d02, d03, d04); sort_04v_merge_descending(d05, d06, d07, d08); } - static INLINE void sort_08v_merge_ascending(__m256i& d01, __m256i& d02, __m256i& d03, __m256i& d04, __m256i& d05, __m256i& d06, __m256i& d07, __m256i& d08) { + static NOINLINE void sort_08v_merge_ascending(__m256i& d01, __m256i& d02, __m256i& d03, __m256i& d04, __m256i& d05, __m256i& d06, __m256i& d07, __m256i& d08) { __m256i tmp; tmp = d01; @@ -636,7 +642,7 @@ template<> struct bitonic { sort_04v_merge_ascending(d01, d02, d03, d04); sort_04v_merge_ascending(d05, d06, d07, d08); } - static INLINE void sort_08v_merge_descending(__m256i& d01, __m256i& d02, __m256i& d03, __m256i& d04, __m256i& d05, __m256i& d06, __m256i& d07, __m256i& d08) { + static NOINLINE void sort_08v_merge_descending(__m256i& d01, __m256i& d02, __m256i& d03, __m256i& d04, __m256i& d05, __m256i& d06, __m256i& d07, __m256i& d08) { __m256i tmp; tmp = d01; @@ -780,7 +786,7 @@ template<> struct bitonic { sort_08v_merge_descending(d01, d02, d03, d04, d05, d06, d07, d08); sort_03v_merge_descending(d09, d10, d11); } - static INLINE void sort_12v_ascending(__m256i& d01, __m256i& d02, __m256i& d03, __m256i& d04, __m256i& d05, __m256i& d06, __m256i& d07, __m256i& d08, __m256i& d09, __m256i& d10, __m256i& d11, __m256i& d12) { + static NOINLINE void sort_12v_ascending(__m256i& d01, __m256i& d02, __m256i& d03, __m256i& d04, __m256i& d05, __m256i& d06, __m256i& d07, __m256i& d08, __m256i& d09, __m256i& d10, __m256i& d11, __m256i& d12) { __m256i tmp; sort_08v_ascending(d01, d02, d03, d04, d05, d06, d07, d08); @@ -809,7 +815,7 @@ template<> struct bitonic { sort_08v_merge_ascending(d01, d02, d03, d04, d05, d06, d07, d08); sort_04v_merge_ascending(d09, d10, d11, d12); } - static INLINE void sort_12v_descending(__m256i& d01, __m256i& d02, __m256i& d03, __m256i& d04, __m256i& d05, __m256i& d06, __m256i& d07, __m256i& d08, __m256i& d09, __m256i& d10, __m256i& d11, __m256i& d12) { + static NOINLINE void sort_12v_descending(__m256i& d01, __m256i& d02, __m256i& d03, __m256i& d04, __m256i& d05, __m256i& d06, __m256i& d07, __m256i& d08, __m256i& d09, __m256i& d10, __m256i& d11, __m256i& d12) { __m256i tmp; sort_08v_descending(d01, d02, d03, d04, d05, d06, d07, d08); @@ -1072,7 +1078,7 @@ template<> struct bitonic { sort_08v_merge_descending(d01, d02, d03, d04, d05, d06, d07, d08); sort_07v_merge_descending(d09, d10, d11, d12, d13, d14, d15); } - static INLINE void sort_16v_ascending(__m256i& d01, __m256i& d02, __m256i& d03, __m256i& d04, __m256i& d05, __m256i& d06, __m256i& d07, __m256i& d08, __m256i& d09, __m256i& d10, __m256i& d11, __m256i& d12, __m256i& d13, __m256i& d14, __m256i& d15, __m256i& d16) { + static NOINLINE void sort_16v_ascending(__m256i& d01, __m256i& d02, __m256i& d03, __m256i& d04, __m256i& d05, __m256i& d06, __m256i& d07, __m256i& d08, __m256i& d09, __m256i& d10, __m256i& d11, __m256i& d12, __m256i& d13, __m256i& d14, __m256i& d15, __m256i& d16) { __m256i tmp; sort_08v_ascending(d01, d02, d03, d04, d05, d06, d07, d08); @@ -1121,7 +1127,7 @@ template<> struct bitonic { sort_08v_merge_ascending(d01, d02, d03, d04, d05, d06, d07, d08); sort_08v_merge_ascending(d09, d10, d11, d12, d13, d14, d15, d16); } - static INLINE void sort_16v_descending(__m256i& d01, __m256i& d02, __m256i& d03, __m256i& d04, __m256i& d05, __m256i& d06, __m256i& d07, __m256i& d08, __m256i& d09, __m256i& d10, __m256i& d11, __m256i& d12, __m256i& d13, __m256i& d14, __m256i& d15, __m256i& d16) { + static NOINLINE void sort_16v_descending(__m256i& d01, __m256i& d02, __m256i& d03, __m256i& d04, __m256i& d05, __m256i& d06, __m256i& d07, __m256i& d08, __m256i& d09, __m256i& d10, __m256i& d11, __m256i& d12, __m256i& d13, __m256i& d14, __m256i& d15, __m256i& d16) { __m256i tmp; sort_08v_descending(d01, d02, d03, d04, d05, d06, d07, d08); @@ -1171,80 +1177,94 @@ template<> struct bitonic { sort_08v_merge_descending(d09, d10, d11, d12, d13, d14, d15, d16); } - static NOINLINE void sort_01v(int32_t *ptr) { - __m256i d01 = _mm256_lddqu_si256((__m256i const *) ptr + 0);; + static NOINLINE void sort_01v_alt(int32_t *ptr, int remainder) { + const auto mask = _mm256_cvtepi8_epi32(_mm_loadu_si128((__m128i*)(mask_table_8 + remainder * N))); + + __m256i d01 = _mm256_or_si256(_mm256_maskload_epi32((int32_t const *) ((__m256i const *) ptr + 0), mask), _mm256_andnot_si256(mask, _mm256_set1_epi32(MAX))); sort_01v_ascending(d01); - _mm256_storeu_si256((__m256i *) ptr + 0, d01); -} + _mm256_maskstore_epi32((int32_t *) ((__m256i *) ptr + 0), mask, d01); + } + + static NOINLINE void sort_02v_alt(int32_t *ptr, int remainder) { + const auto mask = _mm256_cvtepi8_epi32(_mm_loadu_si128((__m128i*)(mask_table_8 + remainder * N))); - static NOINLINE void sort_02v(int32_t *ptr) { __m256i d01 = _mm256_lddqu_si256((__m256i const *) ptr + 0);; - __m256i d02 = _mm256_lddqu_si256((__m256i const *) ptr + 1);; + __m256i d02 = _mm256_or_si256(_mm256_maskload_epi32((int32_t const *) ((__m256i const *) ptr + 1), mask), _mm256_andnot_si256(mask, _mm256_set1_epi32(MAX))); sort_02v_ascending(d01, d02); _mm256_storeu_si256((__m256i *) ptr + 0, d01); - _mm256_storeu_si256((__m256i *) ptr + 1, d02); -} + _mm256_maskstore_epi32((int32_t *) ((__m256i *) ptr + 1), mask, d02); + } + + static NOINLINE void sort_03v_alt(int32_t *ptr, int remainder) { + const auto mask = _mm256_cvtepi8_epi32(_mm_loadu_si128((__m128i*)(mask_table_8 + remainder * N))); - static NOINLINE void sort_03v(int32_t *ptr) { __m256i d01 = _mm256_lddqu_si256((__m256i const *) ptr + 0);; __m256i d02 = _mm256_lddqu_si256((__m256i const *) ptr + 1);; - __m256i d03 = _mm256_lddqu_si256((__m256i const *) ptr + 2);; + __m256i d03 = _mm256_or_si256(_mm256_maskload_epi32((int32_t const *) ((__m256i const *) ptr + 2), mask), _mm256_andnot_si256(mask, _mm256_set1_epi32(MAX))); sort_03v_ascending(d01, d02, d03); _mm256_storeu_si256((__m256i *) ptr + 0, d01); _mm256_storeu_si256((__m256i *) ptr + 1, d02); - _mm256_storeu_si256((__m256i *) ptr + 2, d03); -} + _mm256_maskstore_epi32((int32_t *) ((__m256i *) ptr + 2), mask, d03); + } + + static NOINLINE void sort_04v_alt(int32_t *ptr, int remainder) { + const auto mask = _mm256_cvtepi8_epi32(_mm_loadu_si128((__m128i*)(mask_table_8 + remainder * N))); - static NOINLINE void sort_04v(int32_t *ptr) { __m256i d01 = _mm256_lddqu_si256((__m256i const *) ptr + 0);; __m256i d02 = _mm256_lddqu_si256((__m256i const *) ptr + 1);; __m256i d03 = _mm256_lddqu_si256((__m256i const *) ptr + 2);; - __m256i d04 = _mm256_lddqu_si256((__m256i const *) ptr + 3);; + __m256i d04 = _mm256_or_si256(_mm256_maskload_epi32((int32_t const *) ((__m256i const *) ptr + 3), mask), _mm256_andnot_si256(mask, _mm256_set1_epi32(MAX))); sort_04v_ascending(d01, d02, d03, d04); _mm256_storeu_si256((__m256i *) ptr + 0, d01); _mm256_storeu_si256((__m256i *) ptr + 1, d02); _mm256_storeu_si256((__m256i *) ptr + 2, d03); - _mm256_storeu_si256((__m256i *) ptr + 3, d04); -} + _mm256_maskstore_epi32((int32_t *) ((__m256i *) ptr + 3), mask, d04); + } + + static NOINLINE void sort_05v_alt(int32_t *ptr, int remainder) { + const auto mask = _mm256_cvtepi8_epi32(_mm_loadu_si128((__m128i*)(mask_table_8 + remainder * N))); - static NOINLINE void sort_05v(int32_t *ptr) { __m256i d01 = _mm256_lddqu_si256((__m256i const *) ptr + 0);; __m256i d02 = _mm256_lddqu_si256((__m256i const *) ptr + 1);; __m256i d03 = _mm256_lddqu_si256((__m256i const *) ptr + 2);; __m256i d04 = _mm256_lddqu_si256((__m256i const *) ptr + 3);; - __m256i d05 = _mm256_lddqu_si256((__m256i const *) ptr + 4);; + __m256i d05 = _mm256_or_si256(_mm256_maskload_epi32((int32_t const *) ((__m256i const *) ptr + 4), mask), _mm256_andnot_si256(mask, _mm256_set1_epi32(MAX))); sort_05v_ascending(d01, d02, d03, d04, d05); _mm256_storeu_si256((__m256i *) ptr + 0, d01); _mm256_storeu_si256((__m256i *) ptr + 1, d02); _mm256_storeu_si256((__m256i *) ptr + 2, d03); _mm256_storeu_si256((__m256i *) ptr + 3, d04); - _mm256_storeu_si256((__m256i *) ptr + 4, d05); -} + _mm256_maskstore_epi32((int32_t *) ((__m256i *) ptr + 4), mask, d05); + } + + static NOINLINE void sort_06v_alt(int32_t *ptr, int remainder) { + const auto mask = _mm256_cvtepi8_epi32(_mm_loadu_si128((__m128i*)(mask_table_8 + remainder * N))); - static NOINLINE void sort_06v(int32_t *ptr) { __m256i d01 = _mm256_lddqu_si256((__m256i const *) ptr + 0);; __m256i d02 = _mm256_lddqu_si256((__m256i const *) ptr + 1);; __m256i d03 = _mm256_lddqu_si256((__m256i const *) ptr + 2);; __m256i d04 = _mm256_lddqu_si256((__m256i const *) ptr + 3);; __m256i d05 = _mm256_lddqu_si256((__m256i const *) ptr + 4);; - __m256i d06 = _mm256_lddqu_si256((__m256i const *) ptr + 5);; + __m256i d06 = _mm256_or_si256(_mm256_maskload_epi32((int32_t const *) ((__m256i const *) ptr + 5), mask), _mm256_andnot_si256(mask, _mm256_set1_epi32(MAX))); sort_06v_ascending(d01, d02, d03, d04, d05, d06); _mm256_storeu_si256((__m256i *) ptr + 0, d01); _mm256_storeu_si256((__m256i *) ptr + 1, d02); _mm256_storeu_si256((__m256i *) ptr + 2, d03); _mm256_storeu_si256((__m256i *) ptr + 3, d04); _mm256_storeu_si256((__m256i *) ptr + 4, d05); - _mm256_storeu_si256((__m256i *) ptr + 5, d06); -} + _mm256_maskstore_epi32((int32_t *) ((__m256i *) ptr + 5), mask, d06); + } + + static NOINLINE void sort_07v_alt(int32_t *ptr, int remainder) { + const auto mask = _mm256_cvtepi8_epi32(_mm_loadu_si128((__m128i*)(mask_table_8 + remainder * N))); - static NOINLINE void sort_07v(int32_t *ptr) { __m256i d01 = _mm256_lddqu_si256((__m256i const *) ptr + 0);; __m256i d02 = _mm256_lddqu_si256((__m256i const *) ptr + 1);; __m256i d03 = _mm256_lddqu_si256((__m256i const *) ptr + 2);; __m256i d04 = _mm256_lddqu_si256((__m256i const *) ptr + 3);; __m256i d05 = _mm256_lddqu_si256((__m256i const *) ptr + 4);; __m256i d06 = _mm256_lddqu_si256((__m256i const *) ptr + 5);; - __m256i d07 = _mm256_lddqu_si256((__m256i const *) ptr + 6);; + __m256i d07 = _mm256_or_si256(_mm256_maskload_epi32((int32_t const *) ((__m256i const *) ptr + 6), mask), _mm256_andnot_si256(mask, _mm256_set1_epi32(MAX))); sort_07v_ascending(d01, d02, d03, d04, d05, d06, d07); _mm256_storeu_si256((__m256i *) ptr + 0, d01); _mm256_storeu_si256((__m256i *) ptr + 1, d02); @@ -1252,10 +1272,12 @@ template<> struct bitonic { _mm256_storeu_si256((__m256i *) ptr + 3, d04); _mm256_storeu_si256((__m256i *) ptr + 4, d05); _mm256_storeu_si256((__m256i *) ptr + 5, d06); - _mm256_storeu_si256((__m256i *) ptr + 6, d07); -} + _mm256_maskstore_epi32((int32_t *) ((__m256i *) ptr + 6), mask, d07); + } + + static NOINLINE void sort_08v_alt(int32_t *ptr, int remainder) { + const auto mask = _mm256_cvtepi8_epi32(_mm_loadu_si128((__m128i*)(mask_table_8 + remainder * N))); - static NOINLINE void sort_08v(int32_t *ptr) { __m256i d01 = _mm256_lddqu_si256((__m256i const *) ptr + 0);; __m256i d02 = _mm256_lddqu_si256((__m256i const *) ptr + 1);; __m256i d03 = _mm256_lddqu_si256((__m256i const *) ptr + 2);; @@ -1263,7 +1285,7 @@ template<> struct bitonic { __m256i d05 = _mm256_lddqu_si256((__m256i const *) ptr + 4);; __m256i d06 = _mm256_lddqu_si256((__m256i const *) ptr + 5);; __m256i d07 = _mm256_lddqu_si256((__m256i const *) ptr + 6);; - __m256i d08 = _mm256_lddqu_si256((__m256i const *) ptr + 7);; + __m256i d08 = _mm256_or_si256(_mm256_maskload_epi32((int32_t const *) ((__m256i const *) ptr + 7), mask), _mm256_andnot_si256(mask, _mm256_set1_epi32(MAX))); sort_08v_ascending(d01, d02, d03, d04, d05, d06, d07, d08); _mm256_storeu_si256((__m256i *) ptr + 0, d01); _mm256_storeu_si256((__m256i *) ptr + 1, d02); @@ -1272,10 +1294,12 @@ template<> struct bitonic { _mm256_storeu_si256((__m256i *) ptr + 4, d05); _mm256_storeu_si256((__m256i *) ptr + 5, d06); _mm256_storeu_si256((__m256i *) ptr + 6, d07); - _mm256_storeu_si256((__m256i *) ptr + 7, d08); -} + _mm256_maskstore_epi32((int32_t *) ((__m256i *) ptr + 7), mask, d08); + } + + static NOINLINE void sort_09v_alt(int32_t *ptr, int remainder) { + const auto mask = _mm256_cvtepi8_epi32(_mm_loadu_si128((__m128i*)(mask_table_8 + remainder * N))); - static NOINLINE void sort_09v(int32_t *ptr) { __m256i d01 = _mm256_lddqu_si256((__m256i const *) ptr + 0);; __m256i d02 = _mm256_lddqu_si256((__m256i const *) ptr + 1);; __m256i d03 = _mm256_lddqu_si256((__m256i const *) ptr + 2);; @@ -1284,7 +1308,7 @@ template<> struct bitonic { __m256i d06 = _mm256_lddqu_si256((__m256i const *) ptr + 5);; __m256i d07 = _mm256_lddqu_si256((__m256i const *) ptr + 6);; __m256i d08 = _mm256_lddqu_si256((__m256i const *) ptr + 7);; - __m256i d09 = _mm256_lddqu_si256((__m256i const *) ptr + 8);; + __m256i d09 = _mm256_or_si256(_mm256_maskload_epi32((int32_t const *) ((__m256i const *) ptr + 8), mask), _mm256_andnot_si256(mask, _mm256_set1_epi32(MAX))); sort_09v_ascending(d01, d02, d03, d04, d05, d06, d07, d08, d09); _mm256_storeu_si256((__m256i *) ptr + 0, d01); _mm256_storeu_si256((__m256i *) ptr + 1, d02); @@ -1294,10 +1318,12 @@ template<> struct bitonic { _mm256_storeu_si256((__m256i *) ptr + 5, d06); _mm256_storeu_si256((__m256i *) ptr + 6, d07); _mm256_storeu_si256((__m256i *) ptr + 7, d08); - _mm256_storeu_si256((__m256i *) ptr + 8, d09); -} + _mm256_maskstore_epi32((int32_t *) ((__m256i *) ptr + 8), mask, d09); + } + + static NOINLINE void sort_10v_alt(int32_t *ptr, int remainder) { + const auto mask = _mm256_cvtepi8_epi32(_mm_loadu_si128((__m128i*)(mask_table_8 + remainder * N))); - static NOINLINE void sort_10v(int32_t *ptr) { __m256i d01 = _mm256_lddqu_si256((__m256i const *) ptr + 0);; __m256i d02 = _mm256_lddqu_si256((__m256i const *) ptr + 1);; __m256i d03 = _mm256_lddqu_si256((__m256i const *) ptr + 2);; @@ -1307,7 +1333,7 @@ template<> struct bitonic { __m256i d07 = _mm256_lddqu_si256((__m256i const *) ptr + 6);; __m256i d08 = _mm256_lddqu_si256((__m256i const *) ptr + 7);; __m256i d09 = _mm256_lddqu_si256((__m256i const *) ptr + 8);; - __m256i d10 = _mm256_lddqu_si256((__m256i const *) ptr + 9);; + __m256i d10 = _mm256_or_si256(_mm256_maskload_epi32((int32_t const *) ((__m256i const *) ptr + 9), mask), _mm256_andnot_si256(mask, _mm256_set1_epi32(MAX))); sort_10v_ascending(d01, d02, d03, d04, d05, d06, d07, d08, d09, d10); _mm256_storeu_si256((__m256i *) ptr + 0, d01); _mm256_storeu_si256((__m256i *) ptr + 1, d02); @@ -1318,10 +1344,12 @@ template<> struct bitonic { _mm256_storeu_si256((__m256i *) ptr + 6, d07); _mm256_storeu_si256((__m256i *) ptr + 7, d08); _mm256_storeu_si256((__m256i *) ptr + 8, d09); - _mm256_storeu_si256((__m256i *) ptr + 9, d10); -} + _mm256_maskstore_epi32((int32_t *) ((__m256i *) ptr + 9), mask, d10); + } + + static NOINLINE void sort_11v_alt(int32_t *ptr, int remainder) { + const auto mask = _mm256_cvtepi8_epi32(_mm_loadu_si128((__m128i*)(mask_table_8 + remainder * N))); - static NOINLINE void sort_11v(int32_t *ptr) { __m256i d01 = _mm256_lddqu_si256((__m256i const *) ptr + 0);; __m256i d02 = _mm256_lddqu_si256((__m256i const *) ptr + 1);; __m256i d03 = _mm256_lddqu_si256((__m256i const *) ptr + 2);; @@ -1332,7 +1360,7 @@ template<> struct bitonic { __m256i d08 = _mm256_lddqu_si256((__m256i const *) ptr + 7);; __m256i d09 = _mm256_lddqu_si256((__m256i const *) ptr + 8);; __m256i d10 = _mm256_lddqu_si256((__m256i const *) ptr + 9);; - __m256i d11 = _mm256_lddqu_si256((__m256i const *) ptr + 10);; + __m256i d11 = _mm256_or_si256(_mm256_maskload_epi32((int32_t const *) ((__m256i const *) ptr + 10), mask), _mm256_andnot_si256(mask, _mm256_set1_epi32(MAX))); sort_11v_ascending(d01, d02, d03, d04, d05, d06, d07, d08, d09, d10, d11); _mm256_storeu_si256((__m256i *) ptr + 0, d01); _mm256_storeu_si256((__m256i *) ptr + 1, d02); @@ -1344,10 +1372,12 @@ template<> struct bitonic { _mm256_storeu_si256((__m256i *) ptr + 7, d08); _mm256_storeu_si256((__m256i *) ptr + 8, d09); _mm256_storeu_si256((__m256i *) ptr + 9, d10); - _mm256_storeu_si256((__m256i *) ptr + 10, d11); -} + _mm256_maskstore_epi32((int32_t *) ((__m256i *) ptr + 10), mask, d11); + } + + static NOINLINE void sort_12v_alt(int32_t *ptr, int remainder) { + const auto mask = _mm256_cvtepi8_epi32(_mm_loadu_si128((__m128i*)(mask_table_8 + remainder * N))); - static NOINLINE void sort_12v(int32_t *ptr) { __m256i d01 = _mm256_lddqu_si256((__m256i const *) ptr + 0);; __m256i d02 = _mm256_lddqu_si256((__m256i const *) ptr + 1);; __m256i d03 = _mm256_lddqu_si256((__m256i const *) ptr + 2);; @@ -1359,7 +1389,7 @@ template<> struct bitonic { __m256i d09 = _mm256_lddqu_si256((__m256i const *) ptr + 8);; __m256i d10 = _mm256_lddqu_si256((__m256i const *) ptr + 9);; __m256i d11 = _mm256_lddqu_si256((__m256i const *) ptr + 10);; - __m256i d12 = _mm256_lddqu_si256((__m256i const *) ptr + 11);; + __m256i d12 = _mm256_or_si256(_mm256_maskload_epi32((int32_t const *) ((__m256i const *) ptr + 11), mask), _mm256_andnot_si256(mask, _mm256_set1_epi32(MAX))); sort_12v_ascending(d01, d02, d03, d04, d05, d06, d07, d08, d09, d10, d11, d12); _mm256_storeu_si256((__m256i *) ptr + 0, d01); _mm256_storeu_si256((__m256i *) ptr + 1, d02); @@ -1372,10 +1402,12 @@ template<> struct bitonic { _mm256_storeu_si256((__m256i *) ptr + 8, d09); _mm256_storeu_si256((__m256i *) ptr + 9, d10); _mm256_storeu_si256((__m256i *) ptr + 10, d11); - _mm256_storeu_si256((__m256i *) ptr + 11, d12); -} + _mm256_maskstore_epi32((int32_t *) ((__m256i *) ptr + 11), mask, d12); + } + + static NOINLINE void sort_13v_alt(int32_t *ptr, int remainder) { + const auto mask = _mm256_cvtepi8_epi32(_mm_loadu_si128((__m128i*)(mask_table_8 + remainder * N))); - static NOINLINE void sort_13v(int32_t *ptr) { __m256i d01 = _mm256_lddqu_si256((__m256i const *) ptr + 0);; __m256i d02 = _mm256_lddqu_si256((__m256i const *) ptr + 1);; __m256i d03 = _mm256_lddqu_si256((__m256i const *) ptr + 2);; @@ -1388,7 +1420,7 @@ template<> struct bitonic { __m256i d10 = _mm256_lddqu_si256((__m256i const *) ptr + 9);; __m256i d11 = _mm256_lddqu_si256((__m256i const *) ptr + 10);; __m256i d12 = _mm256_lddqu_si256((__m256i const *) ptr + 11);; - __m256i d13 = _mm256_lddqu_si256((__m256i const *) ptr + 12);; + __m256i d13 = _mm256_or_si256(_mm256_maskload_epi32((int32_t const *) ((__m256i const *) ptr + 12), mask), _mm256_andnot_si256(mask, _mm256_set1_epi32(MAX))); sort_13v_ascending(d01, d02, d03, d04, d05, d06, d07, d08, d09, d10, d11, d12, d13); _mm256_storeu_si256((__m256i *) ptr + 0, d01); _mm256_storeu_si256((__m256i *) ptr + 1, d02); @@ -1402,10 +1434,12 @@ template<> struct bitonic { _mm256_storeu_si256((__m256i *) ptr + 9, d10); _mm256_storeu_si256((__m256i *) ptr + 10, d11); _mm256_storeu_si256((__m256i *) ptr + 11, d12); - _mm256_storeu_si256((__m256i *) ptr + 12, d13); -} + _mm256_maskstore_epi32((int32_t *) ((__m256i *) ptr + 12), mask, d13); + } + + static NOINLINE void sort_14v_alt(int32_t *ptr, int remainder) { + const auto mask = _mm256_cvtepi8_epi32(_mm_loadu_si128((__m128i*)(mask_table_8 + remainder * N))); - static NOINLINE void sort_14v(int32_t *ptr) { __m256i d01 = _mm256_lddqu_si256((__m256i const *) ptr + 0);; __m256i d02 = _mm256_lddqu_si256((__m256i const *) ptr + 1);; __m256i d03 = _mm256_lddqu_si256((__m256i const *) ptr + 2);; @@ -1419,7 +1453,7 @@ template<> struct bitonic { __m256i d11 = _mm256_lddqu_si256((__m256i const *) ptr + 10);; __m256i d12 = _mm256_lddqu_si256((__m256i const *) ptr + 11);; __m256i d13 = _mm256_lddqu_si256((__m256i const *) ptr + 12);; - __m256i d14 = _mm256_lddqu_si256((__m256i const *) ptr + 13);; + __m256i d14 = _mm256_or_si256(_mm256_maskload_epi32((int32_t const *) ((__m256i const *) ptr + 13), mask), _mm256_andnot_si256(mask, _mm256_set1_epi32(MAX))); sort_14v_ascending(d01, d02, d03, d04, d05, d06, d07, d08, d09, d10, d11, d12, d13, d14); _mm256_storeu_si256((__m256i *) ptr + 0, d01); _mm256_storeu_si256((__m256i *) ptr + 1, d02); @@ -1434,10 +1468,12 @@ template<> struct bitonic { _mm256_storeu_si256((__m256i *) ptr + 10, d11); _mm256_storeu_si256((__m256i *) ptr + 11, d12); _mm256_storeu_si256((__m256i *) ptr + 12, d13); - _mm256_storeu_si256((__m256i *) ptr + 13, d14); -} + _mm256_maskstore_epi32((int32_t *) ((__m256i *) ptr + 13), mask, d14); + } + + static NOINLINE void sort_15v_alt(int32_t *ptr, int remainder) { + const auto mask = _mm256_cvtepi8_epi32(_mm_loadu_si128((__m128i*)(mask_table_8 + remainder * N))); - static NOINLINE void sort_15v(int32_t *ptr) { __m256i d01 = _mm256_lddqu_si256((__m256i const *) ptr + 0);; __m256i d02 = _mm256_lddqu_si256((__m256i const *) ptr + 1);; __m256i d03 = _mm256_lddqu_si256((__m256i const *) ptr + 2);; @@ -1452,7 +1488,7 @@ template<> struct bitonic { __m256i d12 = _mm256_lddqu_si256((__m256i const *) ptr + 11);; __m256i d13 = _mm256_lddqu_si256((__m256i const *) ptr + 12);; __m256i d14 = _mm256_lddqu_si256((__m256i const *) ptr + 13);; - __m256i d15 = _mm256_lddqu_si256((__m256i const *) ptr + 14);; + __m256i d15 = _mm256_or_si256(_mm256_maskload_epi32((int32_t const *) ((__m256i const *) ptr + 14), mask), _mm256_andnot_si256(mask, _mm256_set1_epi32(MAX))); sort_15v_ascending(d01, d02, d03, d04, d05, d06, d07, d08, d09, d10, d11, d12, d13, d14, d15); _mm256_storeu_si256((__m256i *) ptr + 0, d01); _mm256_storeu_si256((__m256i *) ptr + 1, d02); @@ -1468,10 +1504,12 @@ template<> struct bitonic { _mm256_storeu_si256((__m256i *) ptr + 11, d12); _mm256_storeu_si256((__m256i *) ptr + 12, d13); _mm256_storeu_si256((__m256i *) ptr + 13, d14); - _mm256_storeu_si256((__m256i *) ptr + 14, d15); -} + _mm256_maskstore_epi32((int32_t *) ((__m256i *) ptr + 14), mask, d15); + } + + static NOINLINE void sort_16v_alt(int32_t *ptr, int remainder) { + const auto mask = _mm256_cvtepi8_epi32(_mm_loadu_si128((__m128i*)(mask_table_8 + remainder * N))); - static NOINLINE void sort_16v(int32_t *ptr) { __m256i d01 = _mm256_lddqu_si256((__m256i const *) ptr + 0);; __m256i d02 = _mm256_lddqu_si256((__m256i const *) ptr + 1);; __m256i d03 = _mm256_lddqu_si256((__m256i const *) ptr + 2);; @@ -1487,7 +1525,7 @@ template<> struct bitonic { __m256i d13 = _mm256_lddqu_si256((__m256i const *) ptr + 12);; __m256i d14 = _mm256_lddqu_si256((__m256i const *) ptr + 13);; __m256i d15 = _mm256_lddqu_si256((__m256i const *) ptr + 14);; - __m256i d16 = _mm256_lddqu_si256((__m256i const *) ptr + 15);; + __m256i d16 = _mm256_or_si256(_mm256_maskload_epi32((int32_t const *) ((__m256i const *) ptr + 15), mask), _mm256_andnot_si256(mask, _mm256_set1_epi32(MAX))); sort_16v_ascending(d01, d02, d03, d04, d05, d06, d07, d08, d09, d10, d11, d12, d13, d14, d15, d16); _mm256_storeu_si256((__m256i *) ptr + 0, d01); _mm256_storeu_si256((__m256i *) ptr + 1, d02); @@ -1504,8 +1542,8 @@ template<> struct bitonic { _mm256_storeu_si256((__m256i *) ptr + 12, d13); _mm256_storeu_si256((__m256i *) ptr + 13, d14); _mm256_storeu_si256((__m256i *) ptr + 14, d15); - _mm256_storeu_si256((__m256i *) ptr + 15, d16); -} + _mm256_maskstore_epi32((int32_t *) ((__m256i *) ptr + 15), mask, d16); + } static void sort(int32_t *ptr, size_t length); }; diff --git a/src/coreclr/src/gc/vxsort/smallsort/bitonic_sort.AVX2.int64_t.generated.cpp b/src/coreclr/src/gc/vxsort/smallsort/bitonic_sort.AVX2.int64_t.generated.cpp index 00360ae70f0c..b74e0d5c2671 100644 --- a/src/coreclr/src/gc/vxsort/smallsort/bitonic_sort.AVX2.int64_t.generated.cpp +++ b/src/coreclr/src/gc/vxsort/smallsort/bitonic_sort.AVX2.int64_t.generated.cpp @@ -7,24 +7,25 @@ using namespace vxsort; void vxsort::smallsort::bitonic::sort(int64_t *ptr, size_t length) { - const int N = 4; - - switch(length / N) { - case 1: sort_01v(ptr); break; - case 2: sort_02v(ptr); break; - case 3: sort_03v(ptr); break; - case 4: sort_04v(ptr); break; - case 5: sort_05v(ptr); break; - case 6: sort_06v(ptr); break; - case 7: sort_07v(ptr); break; - case 8: sort_08v(ptr); break; - case 9: sort_09v(ptr); break; - case 10: sort_10v(ptr); break; - case 11: sort_11v(ptr); break; - case 12: sort_12v(ptr); break; - case 13: sort_13v(ptr); break; - case 14: sort_14v(ptr); break; - case 15: sort_15v(ptr); break; - case 16: sort_16v(ptr); break; + const auto fullvlength = length / N; + const int remainder = (int) (length - fullvlength * N); + const auto v = fullvlength + ((remainder > 0) ? 1 : 0); + switch(v) { + case 1: sort_01v_alt(ptr, remainder); break; + case 2: sort_02v_alt(ptr, remainder); break; + case 3: sort_03v_alt(ptr, remainder); break; + case 4: sort_04v_alt(ptr, remainder); break; + case 5: sort_05v_alt(ptr, remainder); break; + case 6: sort_06v_alt(ptr, remainder); break; + case 7: sort_07v_alt(ptr, remainder); break; + case 8: sort_08v_alt(ptr, remainder); break; + case 9: sort_09v_alt(ptr, remainder); break; + case 10: sort_10v_alt(ptr, remainder); break; + case 11: sort_11v_alt(ptr, remainder); break; + case 12: sort_12v_alt(ptr, remainder); break; + case 13: sort_13v_alt(ptr, remainder); break; + case 14: sort_14v_alt(ptr, remainder); break; + case 15: sort_15v_alt(ptr, remainder); break; + case 16: sort_16v_alt(ptr, remainder); break; } } diff --git a/src/coreclr/src/gc/vxsort/smallsort/bitonic_sort.AVX2.int64_t.generated.h b/src/coreclr/src/gc/vxsort/smallsort/bitonic_sort.AVX2.int64_t.generated.h index 5e9d2fea0dcf..475fac681b1b 100644 --- a/src/coreclr/src/gc/vxsort/smallsort/bitonic_sort.AVX2.int64_t.generated.h +++ b/src/coreclr/src/gc/vxsort/smallsort/bitonic_sort.AVX2.int64_t.generated.h @@ -3,7 +3,7 @@ ///////////////////////////////////////////////////////////////////////////// //// -// This file was auto-generated by a tool at 2020-06-22 05:27:48 +// This file was auto-generated by a tool at 2020-07-21 14:05:39 // // It is recommended you DO NOT directly edit this file but instead edit // the code-generator that generated this source file instead. @@ -33,7 +33,13 @@ namespace vxsort { namespace smallsort { + +extern "C" const uint8_t mask_table_4[16]; +extern "C" const uint8_t mask_table_8[64]; + template<> struct bitonic { + static const int N = 4; + static constexpr int64_t MAX = std::numeric_limits::Max(); public: static INLINE void sort_01v_ascending(__m256i& d01) { @@ -212,7 +218,7 @@ template<> struct bitonic { sort_02v_merge_descending(d01, d02); sort_01v_merge_descending(d03); } - static INLINE void sort_04v_ascending(__m256i& d01, __m256i& d02, __m256i& d03, __m256i& d04) { + static NOINLINE void sort_04v_ascending(__m256i& d01, __m256i& d02, __m256i& d03, __m256i& d04) { __m256i tmp, cmp; sort_02v_ascending(d01, d02); @@ -231,7 +237,7 @@ template<> struct bitonic { sort_02v_merge_ascending(d01, d02); sort_02v_merge_ascending(d03, d04); } - static INLINE void sort_04v_descending(__m256i& d01, __m256i& d02, __m256i& d03, __m256i& d04) { + static NOINLINE void sort_04v_descending(__m256i& d01, __m256i& d02, __m256i& d03, __m256i& d04) { __m256i tmp, cmp; sort_02v_descending(d01, d02); @@ -250,7 +256,7 @@ template<> struct bitonic { sort_02v_merge_descending(d01, d02); sort_02v_merge_descending(d03, d04); } - static INLINE void sort_04v_merge_ascending(__m256i& d01, __m256i& d02, __m256i& d03, __m256i& d04) { + static NOINLINE void sort_04v_merge_ascending(__m256i& d01, __m256i& d02, __m256i& d03, __m256i& d04) { __m256i tmp, cmp; tmp = d01; @@ -268,7 +274,7 @@ template<> struct bitonic { sort_02v_merge_ascending(d01, d02); sort_02v_merge_ascending(d03, d04); } - static INLINE void sort_04v_merge_descending(__m256i& d01, __m256i& d02, __m256i& d03, __m256i& d04) { + static NOINLINE void sort_04v_merge_descending(__m256i& d01, __m256i& d02, __m256i& d03, __m256i& d04) { __m256i tmp, cmp; tmp = d01; @@ -508,7 +514,7 @@ template<> struct bitonic { sort_04v_merge_descending(d01, d02, d03, d04); sort_03v_merge_descending(d05, d06, d07); } - static INLINE void sort_08v_ascending(__m256i& d01, __m256i& d02, __m256i& d03, __m256i& d04, __m256i& d05, __m256i& d06, __m256i& d07, __m256i& d08) { + static NOINLINE void sort_08v_ascending(__m256i& d01, __m256i& d02, __m256i& d03, __m256i& d04, __m256i& d05, __m256i& d06, __m256i& d07, __m256i& d08) { __m256i tmp, cmp; sort_04v_ascending(d01, d02, d03, d04); @@ -537,7 +543,7 @@ template<> struct bitonic { sort_04v_merge_ascending(d01, d02, d03, d04); sort_04v_merge_ascending(d05, d06, d07, d08); } - static INLINE void sort_08v_descending(__m256i& d01, __m256i& d02, __m256i& d03, __m256i& d04, __m256i& d05, __m256i& d06, __m256i& d07, __m256i& d08) { + static NOINLINE void sort_08v_descending(__m256i& d01, __m256i& d02, __m256i& d03, __m256i& d04, __m256i& d05, __m256i& d06, __m256i& d07, __m256i& d08) { __m256i tmp, cmp; sort_04v_descending(d01, d02, d03, d04); @@ -566,7 +572,7 @@ template<> struct bitonic { sort_04v_merge_descending(d01, d02, d03, d04); sort_04v_merge_descending(d05, d06, d07, d08); } - static INLINE void sort_08v_merge_ascending(__m256i& d01, __m256i& d02, __m256i& d03, __m256i& d04, __m256i& d05, __m256i& d06, __m256i& d07, __m256i& d08) { + static NOINLINE void sort_08v_merge_ascending(__m256i& d01, __m256i& d02, __m256i& d03, __m256i& d04, __m256i& d05, __m256i& d06, __m256i& d07, __m256i& d08) { __m256i tmp, cmp; tmp = d01; @@ -596,7 +602,7 @@ template<> struct bitonic { sort_04v_merge_ascending(d01, d02, d03, d04); sort_04v_merge_ascending(d05, d06, d07, d08); } - static INLINE void sort_08v_merge_descending(__m256i& d01, __m256i& d02, __m256i& d03, __m256i& d04, __m256i& d05, __m256i& d06, __m256i& d07, __m256i& d08) { + static NOINLINE void sort_08v_merge_descending(__m256i& d01, __m256i& d02, __m256i& d03, __m256i& d04, __m256i& d05, __m256i& d06, __m256i& d07, __m256i& d08) { __m256i tmp, cmp; tmp = d01; @@ -740,7 +746,7 @@ template<> struct bitonic { sort_08v_merge_descending(d01, d02, d03, d04, d05, d06, d07, d08); sort_03v_merge_descending(d09, d10, d11); } - static INLINE void sort_12v_ascending(__m256i& d01, __m256i& d02, __m256i& d03, __m256i& d04, __m256i& d05, __m256i& d06, __m256i& d07, __m256i& d08, __m256i& d09, __m256i& d10, __m256i& d11, __m256i& d12) { + static NOINLINE void sort_12v_ascending(__m256i& d01, __m256i& d02, __m256i& d03, __m256i& d04, __m256i& d05, __m256i& d06, __m256i& d07, __m256i& d08, __m256i& d09, __m256i& d10, __m256i& d11, __m256i& d12) { __m256i tmp, cmp; sort_08v_ascending(d01, d02, d03, d04, d05, d06, d07, d08); @@ -769,7 +775,7 @@ template<> struct bitonic { sort_08v_merge_ascending(d01, d02, d03, d04, d05, d06, d07, d08); sort_04v_merge_ascending(d09, d10, d11, d12); } - static INLINE void sort_12v_descending(__m256i& d01, __m256i& d02, __m256i& d03, __m256i& d04, __m256i& d05, __m256i& d06, __m256i& d07, __m256i& d08, __m256i& d09, __m256i& d10, __m256i& d11, __m256i& d12) { + static NOINLINE void sort_12v_descending(__m256i& d01, __m256i& d02, __m256i& d03, __m256i& d04, __m256i& d05, __m256i& d06, __m256i& d07, __m256i& d08, __m256i& d09, __m256i& d10, __m256i& d11, __m256i& d12) { __m256i tmp, cmp; sort_08v_descending(d01, d02, d03, d04, d05, d06, d07, d08); @@ -1032,7 +1038,7 @@ template<> struct bitonic { sort_08v_merge_descending(d01, d02, d03, d04, d05, d06, d07, d08); sort_07v_merge_descending(d09, d10, d11, d12, d13, d14, d15); } - static INLINE void sort_16v_ascending(__m256i& d01, __m256i& d02, __m256i& d03, __m256i& d04, __m256i& d05, __m256i& d06, __m256i& d07, __m256i& d08, __m256i& d09, __m256i& d10, __m256i& d11, __m256i& d12, __m256i& d13, __m256i& d14, __m256i& d15, __m256i& d16) { + static NOINLINE void sort_16v_ascending(__m256i& d01, __m256i& d02, __m256i& d03, __m256i& d04, __m256i& d05, __m256i& d06, __m256i& d07, __m256i& d08, __m256i& d09, __m256i& d10, __m256i& d11, __m256i& d12, __m256i& d13, __m256i& d14, __m256i& d15, __m256i& d16) { __m256i tmp, cmp; sort_08v_ascending(d01, d02, d03, d04, d05, d06, d07, d08); @@ -1081,7 +1087,7 @@ template<> struct bitonic { sort_08v_merge_ascending(d01, d02, d03, d04, d05, d06, d07, d08); sort_08v_merge_ascending(d09, d10, d11, d12, d13, d14, d15, d16); } - static INLINE void sort_16v_descending(__m256i& d01, __m256i& d02, __m256i& d03, __m256i& d04, __m256i& d05, __m256i& d06, __m256i& d07, __m256i& d08, __m256i& d09, __m256i& d10, __m256i& d11, __m256i& d12, __m256i& d13, __m256i& d14, __m256i& d15, __m256i& d16) { + static NOINLINE void sort_16v_descending(__m256i& d01, __m256i& d02, __m256i& d03, __m256i& d04, __m256i& d05, __m256i& d06, __m256i& d07, __m256i& d08, __m256i& d09, __m256i& d10, __m256i& d11, __m256i& d12, __m256i& d13, __m256i& d14, __m256i& d15, __m256i& d16) { __m256i tmp, cmp; sort_08v_descending(d01, d02, d03, d04, d05, d06, d07, d08); @@ -1131,80 +1137,94 @@ template<> struct bitonic { sort_08v_merge_descending(d09, d10, d11, d12, d13, d14, d15, d16); } - static NOINLINE void sort_01v(int64_t *ptr) { - __m256i d01 = _mm256_lddqu_si256((__m256i const *) ptr + 0);; + static NOINLINE void sort_01v_alt(int64_t *ptr, int remainder) { + const auto mask = _mm256_cvtepi8_epi64(_mm_loadu_si128((__m128i*)(mask_table_4 + remainder * N))); + + __m256i d01 = _mm256_or_si256(_mm256_maskload_epi64((long long const *) ((__m256i const *) ptr + 0), mask), _mm256_andnot_si256(mask, _mm256_set1_epi64x(MAX))); sort_01v_ascending(d01); - _mm256_storeu_si256((__m256i *) ptr + 0, d01); -} + _mm256_maskstore_epi64((long long *) ((__m256i *) ptr + 0), mask, d01); + } + + static NOINLINE void sort_02v_alt(int64_t *ptr, int remainder) { + const auto mask = _mm256_cvtepi8_epi64(_mm_loadu_si128((__m128i*)(mask_table_4 + remainder * N))); - static NOINLINE void sort_02v(int64_t *ptr) { __m256i d01 = _mm256_lddqu_si256((__m256i const *) ptr + 0);; - __m256i d02 = _mm256_lddqu_si256((__m256i const *) ptr + 1);; + __m256i d02 = _mm256_or_si256(_mm256_maskload_epi64((long long const *) ((__m256i const *) ptr + 1), mask), _mm256_andnot_si256(mask, _mm256_set1_epi64x(MAX))); sort_02v_ascending(d01, d02); _mm256_storeu_si256((__m256i *) ptr + 0, d01); - _mm256_storeu_si256((__m256i *) ptr + 1, d02); -} + _mm256_maskstore_epi64((long long *) ((__m256i *) ptr + 1), mask, d02); + } + + static NOINLINE void sort_03v_alt(int64_t *ptr, int remainder) { + const auto mask = _mm256_cvtepi8_epi64(_mm_loadu_si128((__m128i*)(mask_table_4 + remainder * N))); - static NOINLINE void sort_03v(int64_t *ptr) { __m256i d01 = _mm256_lddqu_si256((__m256i const *) ptr + 0);; __m256i d02 = _mm256_lddqu_si256((__m256i const *) ptr + 1);; - __m256i d03 = _mm256_lddqu_si256((__m256i const *) ptr + 2);; + __m256i d03 = _mm256_or_si256(_mm256_maskload_epi64((long long const *) ((__m256i const *) ptr + 2), mask), _mm256_andnot_si256(mask, _mm256_set1_epi64x(MAX))); sort_03v_ascending(d01, d02, d03); _mm256_storeu_si256((__m256i *) ptr + 0, d01); _mm256_storeu_si256((__m256i *) ptr + 1, d02); - _mm256_storeu_si256((__m256i *) ptr + 2, d03); -} + _mm256_maskstore_epi64((long long *) ((__m256i *) ptr + 2), mask, d03); + } + + static NOINLINE void sort_04v_alt(int64_t *ptr, int remainder) { + const auto mask = _mm256_cvtepi8_epi64(_mm_loadu_si128((__m128i*)(mask_table_4 + remainder * N))); - static NOINLINE void sort_04v(int64_t *ptr) { __m256i d01 = _mm256_lddqu_si256((__m256i const *) ptr + 0);; __m256i d02 = _mm256_lddqu_si256((__m256i const *) ptr + 1);; __m256i d03 = _mm256_lddqu_si256((__m256i const *) ptr + 2);; - __m256i d04 = _mm256_lddqu_si256((__m256i const *) ptr + 3);; + __m256i d04 = _mm256_or_si256(_mm256_maskload_epi64((long long const *) ((__m256i const *) ptr + 3), mask), _mm256_andnot_si256(mask, _mm256_set1_epi64x(MAX))); sort_04v_ascending(d01, d02, d03, d04); _mm256_storeu_si256((__m256i *) ptr + 0, d01); _mm256_storeu_si256((__m256i *) ptr + 1, d02); _mm256_storeu_si256((__m256i *) ptr + 2, d03); - _mm256_storeu_si256((__m256i *) ptr + 3, d04); -} + _mm256_maskstore_epi64((long long *) ((__m256i *) ptr + 3), mask, d04); + } + + static NOINLINE void sort_05v_alt(int64_t *ptr, int remainder) { + const auto mask = _mm256_cvtepi8_epi64(_mm_loadu_si128((__m128i*)(mask_table_4 + remainder * N))); - static NOINLINE void sort_05v(int64_t *ptr) { __m256i d01 = _mm256_lddqu_si256((__m256i const *) ptr + 0);; __m256i d02 = _mm256_lddqu_si256((__m256i const *) ptr + 1);; __m256i d03 = _mm256_lddqu_si256((__m256i const *) ptr + 2);; __m256i d04 = _mm256_lddqu_si256((__m256i const *) ptr + 3);; - __m256i d05 = _mm256_lddqu_si256((__m256i const *) ptr + 4);; + __m256i d05 = _mm256_or_si256(_mm256_maskload_epi64((long long const *) ((__m256i const *) ptr + 4), mask), _mm256_andnot_si256(mask, _mm256_set1_epi64x(MAX))); sort_05v_ascending(d01, d02, d03, d04, d05); _mm256_storeu_si256((__m256i *) ptr + 0, d01); _mm256_storeu_si256((__m256i *) ptr + 1, d02); _mm256_storeu_si256((__m256i *) ptr + 2, d03); _mm256_storeu_si256((__m256i *) ptr + 3, d04); - _mm256_storeu_si256((__m256i *) ptr + 4, d05); -} + _mm256_maskstore_epi64((long long *) ((__m256i *) ptr + 4), mask, d05); + } + + static NOINLINE void sort_06v_alt(int64_t *ptr, int remainder) { + const auto mask = _mm256_cvtepi8_epi64(_mm_loadu_si128((__m128i*)(mask_table_4 + remainder * N))); - static NOINLINE void sort_06v(int64_t *ptr) { __m256i d01 = _mm256_lddqu_si256((__m256i const *) ptr + 0);; __m256i d02 = _mm256_lddqu_si256((__m256i const *) ptr + 1);; __m256i d03 = _mm256_lddqu_si256((__m256i const *) ptr + 2);; __m256i d04 = _mm256_lddqu_si256((__m256i const *) ptr + 3);; __m256i d05 = _mm256_lddqu_si256((__m256i const *) ptr + 4);; - __m256i d06 = _mm256_lddqu_si256((__m256i const *) ptr + 5);; + __m256i d06 = _mm256_or_si256(_mm256_maskload_epi64((long long const *) ((__m256i const *) ptr + 5), mask), _mm256_andnot_si256(mask, _mm256_set1_epi64x(MAX))); sort_06v_ascending(d01, d02, d03, d04, d05, d06); _mm256_storeu_si256((__m256i *) ptr + 0, d01); _mm256_storeu_si256((__m256i *) ptr + 1, d02); _mm256_storeu_si256((__m256i *) ptr + 2, d03); _mm256_storeu_si256((__m256i *) ptr + 3, d04); _mm256_storeu_si256((__m256i *) ptr + 4, d05); - _mm256_storeu_si256((__m256i *) ptr + 5, d06); -} + _mm256_maskstore_epi64((long long *) ((__m256i *) ptr + 5), mask, d06); + } + + static NOINLINE void sort_07v_alt(int64_t *ptr, int remainder) { + const auto mask = _mm256_cvtepi8_epi64(_mm_loadu_si128((__m128i*)(mask_table_4 + remainder * N))); - static NOINLINE void sort_07v(int64_t *ptr) { __m256i d01 = _mm256_lddqu_si256((__m256i const *) ptr + 0);; __m256i d02 = _mm256_lddqu_si256((__m256i const *) ptr + 1);; __m256i d03 = _mm256_lddqu_si256((__m256i const *) ptr + 2);; __m256i d04 = _mm256_lddqu_si256((__m256i const *) ptr + 3);; __m256i d05 = _mm256_lddqu_si256((__m256i const *) ptr + 4);; __m256i d06 = _mm256_lddqu_si256((__m256i const *) ptr + 5);; - __m256i d07 = _mm256_lddqu_si256((__m256i const *) ptr + 6);; + __m256i d07 = _mm256_or_si256(_mm256_maskload_epi64((long long const *) ((__m256i const *) ptr + 6), mask), _mm256_andnot_si256(mask, _mm256_set1_epi64x(MAX))); sort_07v_ascending(d01, d02, d03, d04, d05, d06, d07); _mm256_storeu_si256((__m256i *) ptr + 0, d01); _mm256_storeu_si256((__m256i *) ptr + 1, d02); @@ -1212,10 +1232,12 @@ template<> struct bitonic { _mm256_storeu_si256((__m256i *) ptr + 3, d04); _mm256_storeu_si256((__m256i *) ptr + 4, d05); _mm256_storeu_si256((__m256i *) ptr + 5, d06); - _mm256_storeu_si256((__m256i *) ptr + 6, d07); -} + _mm256_maskstore_epi64((long long *) ((__m256i *) ptr + 6), mask, d07); + } + + static NOINLINE void sort_08v_alt(int64_t *ptr, int remainder) { + const auto mask = _mm256_cvtepi8_epi64(_mm_loadu_si128((__m128i*)(mask_table_4 + remainder * N))); - static NOINLINE void sort_08v(int64_t *ptr) { __m256i d01 = _mm256_lddqu_si256((__m256i const *) ptr + 0);; __m256i d02 = _mm256_lddqu_si256((__m256i const *) ptr + 1);; __m256i d03 = _mm256_lddqu_si256((__m256i const *) ptr + 2);; @@ -1223,7 +1245,7 @@ template<> struct bitonic { __m256i d05 = _mm256_lddqu_si256((__m256i const *) ptr + 4);; __m256i d06 = _mm256_lddqu_si256((__m256i const *) ptr + 5);; __m256i d07 = _mm256_lddqu_si256((__m256i const *) ptr + 6);; - __m256i d08 = _mm256_lddqu_si256((__m256i const *) ptr + 7);; + __m256i d08 = _mm256_or_si256(_mm256_maskload_epi64((long long const *) ((__m256i const *) ptr + 7), mask), _mm256_andnot_si256(mask, _mm256_set1_epi64x(MAX))); sort_08v_ascending(d01, d02, d03, d04, d05, d06, d07, d08); _mm256_storeu_si256((__m256i *) ptr + 0, d01); _mm256_storeu_si256((__m256i *) ptr + 1, d02); @@ -1232,10 +1254,12 @@ template<> struct bitonic { _mm256_storeu_si256((__m256i *) ptr + 4, d05); _mm256_storeu_si256((__m256i *) ptr + 5, d06); _mm256_storeu_si256((__m256i *) ptr + 6, d07); - _mm256_storeu_si256((__m256i *) ptr + 7, d08); -} + _mm256_maskstore_epi64((long long *) ((__m256i *) ptr + 7), mask, d08); + } + + static NOINLINE void sort_09v_alt(int64_t *ptr, int remainder) { + const auto mask = _mm256_cvtepi8_epi64(_mm_loadu_si128((__m128i*)(mask_table_4 + remainder * N))); - static NOINLINE void sort_09v(int64_t *ptr) { __m256i d01 = _mm256_lddqu_si256((__m256i const *) ptr + 0);; __m256i d02 = _mm256_lddqu_si256((__m256i const *) ptr + 1);; __m256i d03 = _mm256_lddqu_si256((__m256i const *) ptr + 2);; @@ -1244,7 +1268,7 @@ template<> struct bitonic { __m256i d06 = _mm256_lddqu_si256((__m256i const *) ptr + 5);; __m256i d07 = _mm256_lddqu_si256((__m256i const *) ptr + 6);; __m256i d08 = _mm256_lddqu_si256((__m256i const *) ptr + 7);; - __m256i d09 = _mm256_lddqu_si256((__m256i const *) ptr + 8);; + __m256i d09 = _mm256_or_si256(_mm256_maskload_epi64((long long const *) ((__m256i const *) ptr + 8), mask), _mm256_andnot_si256(mask, _mm256_set1_epi64x(MAX))); sort_09v_ascending(d01, d02, d03, d04, d05, d06, d07, d08, d09); _mm256_storeu_si256((__m256i *) ptr + 0, d01); _mm256_storeu_si256((__m256i *) ptr + 1, d02); @@ -1254,10 +1278,12 @@ template<> struct bitonic { _mm256_storeu_si256((__m256i *) ptr + 5, d06); _mm256_storeu_si256((__m256i *) ptr + 6, d07); _mm256_storeu_si256((__m256i *) ptr + 7, d08); - _mm256_storeu_si256((__m256i *) ptr + 8, d09); -} + _mm256_maskstore_epi64((long long *) ((__m256i *) ptr + 8), mask, d09); + } + + static NOINLINE void sort_10v_alt(int64_t *ptr, int remainder) { + const auto mask = _mm256_cvtepi8_epi64(_mm_loadu_si128((__m128i*)(mask_table_4 + remainder * N))); - static NOINLINE void sort_10v(int64_t *ptr) { __m256i d01 = _mm256_lddqu_si256((__m256i const *) ptr + 0);; __m256i d02 = _mm256_lddqu_si256((__m256i const *) ptr + 1);; __m256i d03 = _mm256_lddqu_si256((__m256i const *) ptr + 2);; @@ -1267,7 +1293,7 @@ template<> struct bitonic { __m256i d07 = _mm256_lddqu_si256((__m256i const *) ptr + 6);; __m256i d08 = _mm256_lddqu_si256((__m256i const *) ptr + 7);; __m256i d09 = _mm256_lddqu_si256((__m256i const *) ptr + 8);; - __m256i d10 = _mm256_lddqu_si256((__m256i const *) ptr + 9);; + __m256i d10 = _mm256_or_si256(_mm256_maskload_epi64((long long const *) ((__m256i const *) ptr + 9), mask), _mm256_andnot_si256(mask, _mm256_set1_epi64x(MAX))); sort_10v_ascending(d01, d02, d03, d04, d05, d06, d07, d08, d09, d10); _mm256_storeu_si256((__m256i *) ptr + 0, d01); _mm256_storeu_si256((__m256i *) ptr + 1, d02); @@ -1278,10 +1304,12 @@ template<> struct bitonic { _mm256_storeu_si256((__m256i *) ptr + 6, d07); _mm256_storeu_si256((__m256i *) ptr + 7, d08); _mm256_storeu_si256((__m256i *) ptr + 8, d09); - _mm256_storeu_si256((__m256i *) ptr + 9, d10); -} + _mm256_maskstore_epi64((long long *) ((__m256i *) ptr + 9), mask, d10); + } + + static NOINLINE void sort_11v_alt(int64_t *ptr, int remainder) { + const auto mask = _mm256_cvtepi8_epi64(_mm_loadu_si128((__m128i*)(mask_table_4 + remainder * N))); - static NOINLINE void sort_11v(int64_t *ptr) { __m256i d01 = _mm256_lddqu_si256((__m256i const *) ptr + 0);; __m256i d02 = _mm256_lddqu_si256((__m256i const *) ptr + 1);; __m256i d03 = _mm256_lddqu_si256((__m256i const *) ptr + 2);; @@ -1292,7 +1320,7 @@ template<> struct bitonic { __m256i d08 = _mm256_lddqu_si256((__m256i const *) ptr + 7);; __m256i d09 = _mm256_lddqu_si256((__m256i const *) ptr + 8);; __m256i d10 = _mm256_lddqu_si256((__m256i const *) ptr + 9);; - __m256i d11 = _mm256_lddqu_si256((__m256i const *) ptr + 10);; + __m256i d11 = _mm256_or_si256(_mm256_maskload_epi64((long long const *) ((__m256i const *) ptr + 10), mask), _mm256_andnot_si256(mask, _mm256_set1_epi64x(MAX))); sort_11v_ascending(d01, d02, d03, d04, d05, d06, d07, d08, d09, d10, d11); _mm256_storeu_si256((__m256i *) ptr + 0, d01); _mm256_storeu_si256((__m256i *) ptr + 1, d02); @@ -1304,10 +1332,12 @@ template<> struct bitonic { _mm256_storeu_si256((__m256i *) ptr + 7, d08); _mm256_storeu_si256((__m256i *) ptr + 8, d09); _mm256_storeu_si256((__m256i *) ptr + 9, d10); - _mm256_storeu_si256((__m256i *) ptr + 10, d11); -} + _mm256_maskstore_epi64((long long *) ((__m256i *) ptr + 10), mask, d11); + } + + static NOINLINE void sort_12v_alt(int64_t *ptr, int remainder) { + const auto mask = _mm256_cvtepi8_epi64(_mm_loadu_si128((__m128i*)(mask_table_4 + remainder * N))); - static NOINLINE void sort_12v(int64_t *ptr) { __m256i d01 = _mm256_lddqu_si256((__m256i const *) ptr + 0);; __m256i d02 = _mm256_lddqu_si256((__m256i const *) ptr + 1);; __m256i d03 = _mm256_lddqu_si256((__m256i const *) ptr + 2);; @@ -1319,7 +1349,7 @@ template<> struct bitonic { __m256i d09 = _mm256_lddqu_si256((__m256i const *) ptr + 8);; __m256i d10 = _mm256_lddqu_si256((__m256i const *) ptr + 9);; __m256i d11 = _mm256_lddqu_si256((__m256i const *) ptr + 10);; - __m256i d12 = _mm256_lddqu_si256((__m256i const *) ptr + 11);; + __m256i d12 = _mm256_or_si256(_mm256_maskload_epi64((long long const *) ((__m256i const *) ptr + 11), mask), _mm256_andnot_si256(mask, _mm256_set1_epi64x(MAX))); sort_12v_ascending(d01, d02, d03, d04, d05, d06, d07, d08, d09, d10, d11, d12); _mm256_storeu_si256((__m256i *) ptr + 0, d01); _mm256_storeu_si256((__m256i *) ptr + 1, d02); @@ -1332,10 +1362,12 @@ template<> struct bitonic { _mm256_storeu_si256((__m256i *) ptr + 8, d09); _mm256_storeu_si256((__m256i *) ptr + 9, d10); _mm256_storeu_si256((__m256i *) ptr + 10, d11); - _mm256_storeu_si256((__m256i *) ptr + 11, d12); -} + _mm256_maskstore_epi64((long long *) ((__m256i *) ptr + 11), mask, d12); + } + + static NOINLINE void sort_13v_alt(int64_t *ptr, int remainder) { + const auto mask = _mm256_cvtepi8_epi64(_mm_loadu_si128((__m128i*)(mask_table_4 + remainder * N))); - static NOINLINE void sort_13v(int64_t *ptr) { __m256i d01 = _mm256_lddqu_si256((__m256i const *) ptr + 0);; __m256i d02 = _mm256_lddqu_si256((__m256i const *) ptr + 1);; __m256i d03 = _mm256_lddqu_si256((__m256i const *) ptr + 2);; @@ -1348,7 +1380,7 @@ template<> struct bitonic { __m256i d10 = _mm256_lddqu_si256((__m256i const *) ptr + 9);; __m256i d11 = _mm256_lddqu_si256((__m256i const *) ptr + 10);; __m256i d12 = _mm256_lddqu_si256((__m256i const *) ptr + 11);; - __m256i d13 = _mm256_lddqu_si256((__m256i const *) ptr + 12);; + __m256i d13 = _mm256_or_si256(_mm256_maskload_epi64((long long const *) ((__m256i const *) ptr + 12), mask), _mm256_andnot_si256(mask, _mm256_set1_epi64x(MAX))); sort_13v_ascending(d01, d02, d03, d04, d05, d06, d07, d08, d09, d10, d11, d12, d13); _mm256_storeu_si256((__m256i *) ptr + 0, d01); _mm256_storeu_si256((__m256i *) ptr + 1, d02); @@ -1362,10 +1394,12 @@ template<> struct bitonic { _mm256_storeu_si256((__m256i *) ptr + 9, d10); _mm256_storeu_si256((__m256i *) ptr + 10, d11); _mm256_storeu_si256((__m256i *) ptr + 11, d12); - _mm256_storeu_si256((__m256i *) ptr + 12, d13); -} + _mm256_maskstore_epi64((long long *) ((__m256i *) ptr + 12), mask, d13); + } + + static NOINLINE void sort_14v_alt(int64_t *ptr, int remainder) { + const auto mask = _mm256_cvtepi8_epi64(_mm_loadu_si128((__m128i*)(mask_table_4 + remainder * N))); - static NOINLINE void sort_14v(int64_t *ptr) { __m256i d01 = _mm256_lddqu_si256((__m256i const *) ptr + 0);; __m256i d02 = _mm256_lddqu_si256((__m256i const *) ptr + 1);; __m256i d03 = _mm256_lddqu_si256((__m256i const *) ptr + 2);; @@ -1379,7 +1413,7 @@ template<> struct bitonic { __m256i d11 = _mm256_lddqu_si256((__m256i const *) ptr + 10);; __m256i d12 = _mm256_lddqu_si256((__m256i const *) ptr + 11);; __m256i d13 = _mm256_lddqu_si256((__m256i const *) ptr + 12);; - __m256i d14 = _mm256_lddqu_si256((__m256i const *) ptr + 13);; + __m256i d14 = _mm256_or_si256(_mm256_maskload_epi64((long long const *) ((__m256i const *) ptr + 13), mask), _mm256_andnot_si256(mask, _mm256_set1_epi64x(MAX))); sort_14v_ascending(d01, d02, d03, d04, d05, d06, d07, d08, d09, d10, d11, d12, d13, d14); _mm256_storeu_si256((__m256i *) ptr + 0, d01); _mm256_storeu_si256((__m256i *) ptr + 1, d02); @@ -1394,10 +1428,12 @@ template<> struct bitonic { _mm256_storeu_si256((__m256i *) ptr + 10, d11); _mm256_storeu_si256((__m256i *) ptr + 11, d12); _mm256_storeu_si256((__m256i *) ptr + 12, d13); - _mm256_storeu_si256((__m256i *) ptr + 13, d14); -} + _mm256_maskstore_epi64((long long *) ((__m256i *) ptr + 13), mask, d14); + } + + static NOINLINE void sort_15v_alt(int64_t *ptr, int remainder) { + const auto mask = _mm256_cvtepi8_epi64(_mm_loadu_si128((__m128i*)(mask_table_4 + remainder * N))); - static NOINLINE void sort_15v(int64_t *ptr) { __m256i d01 = _mm256_lddqu_si256((__m256i const *) ptr + 0);; __m256i d02 = _mm256_lddqu_si256((__m256i const *) ptr + 1);; __m256i d03 = _mm256_lddqu_si256((__m256i const *) ptr + 2);; @@ -1412,7 +1448,7 @@ template<> struct bitonic { __m256i d12 = _mm256_lddqu_si256((__m256i const *) ptr + 11);; __m256i d13 = _mm256_lddqu_si256((__m256i const *) ptr + 12);; __m256i d14 = _mm256_lddqu_si256((__m256i const *) ptr + 13);; - __m256i d15 = _mm256_lddqu_si256((__m256i const *) ptr + 14);; + __m256i d15 = _mm256_or_si256(_mm256_maskload_epi64((long long const *) ((__m256i const *) ptr + 14), mask), _mm256_andnot_si256(mask, _mm256_set1_epi64x(MAX))); sort_15v_ascending(d01, d02, d03, d04, d05, d06, d07, d08, d09, d10, d11, d12, d13, d14, d15); _mm256_storeu_si256((__m256i *) ptr + 0, d01); _mm256_storeu_si256((__m256i *) ptr + 1, d02); @@ -1428,10 +1464,12 @@ template<> struct bitonic { _mm256_storeu_si256((__m256i *) ptr + 11, d12); _mm256_storeu_si256((__m256i *) ptr + 12, d13); _mm256_storeu_si256((__m256i *) ptr + 13, d14); - _mm256_storeu_si256((__m256i *) ptr + 14, d15); -} + _mm256_maskstore_epi64((long long *) ((__m256i *) ptr + 14), mask, d15); + } + + static NOINLINE void sort_16v_alt(int64_t *ptr, int remainder) { + const auto mask = _mm256_cvtepi8_epi64(_mm_loadu_si128((__m128i*)(mask_table_4 + remainder * N))); - static NOINLINE void sort_16v(int64_t *ptr) { __m256i d01 = _mm256_lddqu_si256((__m256i const *) ptr + 0);; __m256i d02 = _mm256_lddqu_si256((__m256i const *) ptr + 1);; __m256i d03 = _mm256_lddqu_si256((__m256i const *) ptr + 2);; @@ -1447,7 +1485,7 @@ template<> struct bitonic { __m256i d13 = _mm256_lddqu_si256((__m256i const *) ptr + 12);; __m256i d14 = _mm256_lddqu_si256((__m256i const *) ptr + 13);; __m256i d15 = _mm256_lddqu_si256((__m256i const *) ptr + 14);; - __m256i d16 = _mm256_lddqu_si256((__m256i const *) ptr + 15);; + __m256i d16 = _mm256_or_si256(_mm256_maskload_epi64((long long const *) ((__m256i const *) ptr + 15), mask), _mm256_andnot_si256(mask, _mm256_set1_epi64x(MAX))); sort_16v_ascending(d01, d02, d03, d04, d05, d06, d07, d08, d09, d10, d11, d12, d13, d14, d15, d16); _mm256_storeu_si256((__m256i *) ptr + 0, d01); _mm256_storeu_si256((__m256i *) ptr + 1, d02); @@ -1464,8 +1502,8 @@ template<> struct bitonic { _mm256_storeu_si256((__m256i *) ptr + 12, d13); _mm256_storeu_si256((__m256i *) ptr + 13, d14); _mm256_storeu_si256((__m256i *) ptr + 14, d15); - _mm256_storeu_si256((__m256i *) ptr + 15, d16); -} + _mm256_maskstore_epi64((long long *) ((__m256i *) ptr + 15), mask, d16); + } static void sort(int64_t *ptr, size_t length); }; diff --git a/src/coreclr/src/gc/vxsort/smallsort/bitonic_sort.AVX512.int32_t.generated.cpp b/src/coreclr/src/gc/vxsort/smallsort/bitonic_sort.AVX512.int32_t.generated.cpp index 9efdf598ea49..28c2ee4b807c 100644 --- a/src/coreclr/src/gc/vxsort/smallsort/bitonic_sort.AVX512.int32_t.generated.cpp +++ b/src/coreclr/src/gc/vxsort/smallsort/bitonic_sort.AVX512.int32_t.generated.cpp @@ -7,24 +7,25 @@ using namespace vxsort; void vxsort::smallsort::bitonic::sort(int32_t *ptr, size_t length) { - const int N = 16; - - switch(length / N) { - case 1: sort_01v(ptr); break; - case 2: sort_02v(ptr); break; - case 3: sort_03v(ptr); break; - case 4: sort_04v(ptr); break; - case 5: sort_05v(ptr); break; - case 6: sort_06v(ptr); break; - case 7: sort_07v(ptr); break; - case 8: sort_08v(ptr); break; - case 9: sort_09v(ptr); break; - case 10: sort_10v(ptr); break; - case 11: sort_11v(ptr); break; - case 12: sort_12v(ptr); break; - case 13: sort_13v(ptr); break; - case 14: sort_14v(ptr); break; - case 15: sort_15v(ptr); break; - case 16: sort_16v(ptr); break; + const auto fullvlength = length / N; + const int remainder = (int) (length - fullvlength * N); + const auto v = fullvlength + ((remainder > 0) ? 1 : 0); + switch(v) { + case 1: sort_01v_alt(ptr, remainder); break; + case 2: sort_02v_alt(ptr, remainder); break; + case 3: sort_03v_alt(ptr, remainder); break; + case 4: sort_04v_alt(ptr, remainder); break; + case 5: sort_05v_alt(ptr, remainder); break; + case 6: sort_06v_alt(ptr, remainder); break; + case 7: sort_07v_alt(ptr, remainder); break; + case 8: sort_08v_alt(ptr, remainder); break; + case 9: sort_09v_alt(ptr, remainder); break; + case 10: sort_10v_alt(ptr, remainder); break; + case 11: sort_11v_alt(ptr, remainder); break; + case 12: sort_12v_alt(ptr, remainder); break; + case 13: sort_13v_alt(ptr, remainder); break; + case 14: sort_14v_alt(ptr, remainder); break; + case 15: sort_15v_alt(ptr, remainder); break; + case 16: sort_16v_alt(ptr, remainder); break; } } diff --git a/src/coreclr/src/gc/vxsort/smallsort/bitonic_sort.AVX512.int32_t.generated.h b/src/coreclr/src/gc/vxsort/smallsort/bitonic_sort.AVX512.int32_t.generated.h index 21c992c3e0df..1b1843ec093c 100644 --- a/src/coreclr/src/gc/vxsort/smallsort/bitonic_sort.AVX512.int32_t.generated.h +++ b/src/coreclr/src/gc/vxsort/smallsort/bitonic_sort.AVX512.int32_t.generated.h @@ -3,7 +3,7 @@ ///////////////////////////////////////////////////////////////////////////// //// -// This file was auto-generated by a tool at 2020-06-22 05:27:48 +// This file was auto-generated by a tool at 2020-07-21 14:05:39 // // It is recommended you DO NOT directly edit this file but instead edit // the code-generator that generated this source file instead. @@ -35,6 +35,8 @@ namespace vxsort { namespace smallsort { template<> struct bitonic { + static const int N = 16; + static constexpr int32_t MAX = std::numeric_limits::Max(); public: static INLINE void sort_01v_ascending(__m512i& d01) { @@ -253,7 +255,7 @@ template<> struct bitonic { sort_02v_merge_descending(d01, d02); sort_01v_merge_descending(d03); } - static INLINE void sort_04v_ascending(__m512i& d01, __m512i& d02, __m512i& d03, __m512i& d04) { + static NOINLINE void sort_04v_ascending(__m512i& d01, __m512i& d02, __m512i& d03, __m512i& d04) { __m512i tmp; sort_02v_ascending(d01, d02); @@ -270,7 +272,7 @@ template<> struct bitonic { sort_02v_merge_ascending(d01, d02); sort_02v_merge_ascending(d03, d04); } - static INLINE void sort_04v_descending(__m512i& d01, __m512i& d02, __m512i& d03, __m512i& d04) { + static NOINLINE void sort_04v_descending(__m512i& d01, __m512i& d02, __m512i& d03, __m512i& d04) { __m512i tmp; sort_02v_descending(d01, d02); @@ -287,7 +289,7 @@ template<> struct bitonic { sort_02v_merge_descending(d01, d02); sort_02v_merge_descending(d03, d04); } - static INLINE void sort_04v_merge_ascending(__m512i& d01, __m512i& d02, __m512i& d03, __m512i& d04) { + static NOINLINE void sort_04v_merge_ascending(__m512i& d01, __m512i& d02, __m512i& d03, __m512i& d04) { __m512i tmp; tmp = d01; @@ -301,7 +303,7 @@ template<> struct bitonic { sort_02v_merge_ascending(d01, d02); sort_02v_merge_ascending(d03, d04); } - static INLINE void sort_04v_merge_descending(__m512i& d01, __m512i& d02, __m512i& d03, __m512i& d04) { + static NOINLINE void sort_04v_merge_descending(__m512i& d01, __m512i& d02, __m512i& d03, __m512i& d04) { __m512i tmp; tmp = d01; @@ -501,7 +503,7 @@ template<> struct bitonic { sort_04v_merge_descending(d01, d02, d03, d04); sort_03v_merge_descending(d05, d06, d07); } - static INLINE void sort_08v_ascending(__m512i& d01, __m512i& d02, __m512i& d03, __m512i& d04, __m512i& d05, __m512i& d06, __m512i& d07, __m512i& d08) { + static NOINLINE void sort_08v_ascending(__m512i& d01, __m512i& d02, __m512i& d03, __m512i& d04, __m512i& d05, __m512i& d06, __m512i& d07, __m512i& d08) { __m512i tmp; sort_04v_ascending(d01, d02, d03, d04); @@ -526,7 +528,7 @@ template<> struct bitonic { sort_04v_merge_ascending(d01, d02, d03, d04); sort_04v_merge_ascending(d05, d06, d07, d08); } - static INLINE void sort_08v_descending(__m512i& d01, __m512i& d02, __m512i& d03, __m512i& d04, __m512i& d05, __m512i& d06, __m512i& d07, __m512i& d08) { + static NOINLINE void sort_08v_descending(__m512i& d01, __m512i& d02, __m512i& d03, __m512i& d04, __m512i& d05, __m512i& d06, __m512i& d07, __m512i& d08) { __m512i tmp; sort_04v_descending(d01, d02, d03, d04); @@ -551,7 +553,7 @@ template<> struct bitonic { sort_04v_merge_descending(d01, d02, d03, d04); sort_04v_merge_descending(d05, d06, d07, d08); } - static INLINE void sort_08v_merge_ascending(__m512i& d01, __m512i& d02, __m512i& d03, __m512i& d04, __m512i& d05, __m512i& d06, __m512i& d07, __m512i& d08) { + static NOINLINE void sort_08v_merge_ascending(__m512i& d01, __m512i& d02, __m512i& d03, __m512i& d04, __m512i& d05, __m512i& d06, __m512i& d07, __m512i& d08) { __m512i tmp; tmp = d01; @@ -573,7 +575,7 @@ template<> struct bitonic { sort_04v_merge_ascending(d01, d02, d03, d04); sort_04v_merge_ascending(d05, d06, d07, d08); } - static INLINE void sort_08v_merge_descending(__m512i& d01, __m512i& d02, __m512i& d03, __m512i& d04, __m512i& d05, __m512i& d06, __m512i& d07, __m512i& d08) { + static NOINLINE void sort_08v_merge_descending(__m512i& d01, __m512i& d02, __m512i& d03, __m512i& d04, __m512i& d05, __m512i& d06, __m512i& d07, __m512i& d08) { __m512i tmp; tmp = d01; @@ -697,7 +699,7 @@ template<> struct bitonic { sort_08v_merge_descending(d01, d02, d03, d04, d05, d06, d07, d08); sort_03v_merge_descending(d09, d10, d11); } - static INLINE void sort_12v_ascending(__m512i& d01, __m512i& d02, __m512i& d03, __m512i& d04, __m512i& d05, __m512i& d06, __m512i& d07, __m512i& d08, __m512i& d09, __m512i& d10, __m512i& d11, __m512i& d12) { + static NOINLINE void sort_12v_ascending(__m512i& d01, __m512i& d02, __m512i& d03, __m512i& d04, __m512i& d05, __m512i& d06, __m512i& d07, __m512i& d08, __m512i& d09, __m512i& d10, __m512i& d11, __m512i& d12) { __m512i tmp; sort_08v_ascending(d01, d02, d03, d04, d05, d06, d07, d08); @@ -722,7 +724,7 @@ template<> struct bitonic { sort_08v_merge_ascending(d01, d02, d03, d04, d05, d06, d07, d08); sort_04v_merge_ascending(d09, d10, d11, d12); } - static INLINE void sort_12v_descending(__m512i& d01, __m512i& d02, __m512i& d03, __m512i& d04, __m512i& d05, __m512i& d06, __m512i& d07, __m512i& d08, __m512i& d09, __m512i& d10, __m512i& d11, __m512i& d12) { + static NOINLINE void sort_12v_descending(__m512i& d01, __m512i& d02, __m512i& d03, __m512i& d04, __m512i& d05, __m512i& d06, __m512i& d07, __m512i& d08, __m512i& d09, __m512i& d10, __m512i& d11, __m512i& d12) { __m512i tmp; sort_08v_descending(d01, d02, d03, d04, d05, d06, d07, d08); @@ -945,7 +947,7 @@ template<> struct bitonic { sort_08v_merge_descending(d01, d02, d03, d04, d05, d06, d07, d08); sort_07v_merge_descending(d09, d10, d11, d12, d13, d14, d15); } - static INLINE void sort_16v_ascending(__m512i& d01, __m512i& d02, __m512i& d03, __m512i& d04, __m512i& d05, __m512i& d06, __m512i& d07, __m512i& d08, __m512i& d09, __m512i& d10, __m512i& d11, __m512i& d12, __m512i& d13, __m512i& d14, __m512i& d15, __m512i& d16) { + static NOINLINE void sort_16v_ascending(__m512i& d01, __m512i& d02, __m512i& d03, __m512i& d04, __m512i& d05, __m512i& d06, __m512i& d07, __m512i& d08, __m512i& d09, __m512i& d10, __m512i& d11, __m512i& d12, __m512i& d13, __m512i& d14, __m512i& d15, __m512i& d16) { __m512i tmp; sort_08v_ascending(d01, d02, d03, d04, d05, d06, d07, d08); @@ -986,7 +988,7 @@ template<> struct bitonic { sort_08v_merge_ascending(d01, d02, d03, d04, d05, d06, d07, d08); sort_08v_merge_ascending(d09, d10, d11, d12, d13, d14, d15, d16); } - static INLINE void sort_16v_descending(__m512i& d01, __m512i& d02, __m512i& d03, __m512i& d04, __m512i& d05, __m512i& d06, __m512i& d07, __m512i& d08, __m512i& d09, __m512i& d10, __m512i& d11, __m512i& d12, __m512i& d13, __m512i& d14, __m512i& d15, __m512i& d16) { + static NOINLINE void sort_16v_descending(__m512i& d01, __m512i& d02, __m512i& d03, __m512i& d04, __m512i& d05, __m512i& d06, __m512i& d07, __m512i& d08, __m512i& d09, __m512i& d10, __m512i& d11, __m512i& d12, __m512i& d13, __m512i& d14, __m512i& d15, __m512i& d16) { __m512i tmp; sort_08v_descending(d01, d02, d03, d04, d05, d06, d07, d08); @@ -1028,80 +1030,108 @@ template<> struct bitonic { sort_08v_merge_descending(d09, d10, d11, d12, d13, d14, d15, d16); } - static NOINLINE void sort_01v(int32_t *ptr) { - __m512i d01 = _mm512_loadu_si512((__m512i const *) ptr + 0);; + static NOINLINE void sort_01v_alt(int32_t *ptr, int remainder) { + const auto mask = 0xFFFF >> ((N - remainder) & (N-1)); + + __m512i d01 = _mm512_mask_loadu_epi32(_mm512_set1_epi32(MAX), + mask, + (int32_t const *) ((__m512i const *) ptr + 0)); sort_01v_ascending(d01); - _mm512_storeu_si512((__m512i *) ptr + 0, d01); -} + _mm512_mask_storeu_epi32((__m512i *) ptr + 0, mask, d01); + } + + static NOINLINE void sort_02v_alt(int32_t *ptr, int remainder) { + const auto mask = 0xFFFF >> ((N - remainder) & (N-1)); - static NOINLINE void sort_02v(int32_t *ptr) { __m512i d01 = _mm512_loadu_si512((__m512i const *) ptr + 0);; - __m512i d02 = _mm512_loadu_si512((__m512i const *) ptr + 1);; + __m512i d02 = _mm512_mask_loadu_epi32(_mm512_set1_epi32(MAX), + mask, + (int32_t const *) ((__m512i const *) ptr + 1)); sort_02v_ascending(d01, d02); _mm512_storeu_si512((__m512i *) ptr + 0, d01); - _mm512_storeu_si512((__m512i *) ptr + 1, d02); -} + _mm512_mask_storeu_epi32((__m512i *) ptr + 1, mask, d02); + } + + static NOINLINE void sort_03v_alt(int32_t *ptr, int remainder) { + const auto mask = 0xFFFF >> ((N - remainder) & (N-1)); - static NOINLINE void sort_03v(int32_t *ptr) { __m512i d01 = _mm512_loadu_si512((__m512i const *) ptr + 0);; __m512i d02 = _mm512_loadu_si512((__m512i const *) ptr + 1);; - __m512i d03 = _mm512_loadu_si512((__m512i const *) ptr + 2);; + __m512i d03 = _mm512_mask_loadu_epi32(_mm512_set1_epi32(MAX), + mask, + (int32_t const *) ((__m512i const *) ptr + 2)); sort_03v_ascending(d01, d02, d03); _mm512_storeu_si512((__m512i *) ptr + 0, d01); _mm512_storeu_si512((__m512i *) ptr + 1, d02); - _mm512_storeu_si512((__m512i *) ptr + 2, d03); -} + _mm512_mask_storeu_epi32((__m512i *) ptr + 2, mask, d03); + } + + static NOINLINE void sort_04v_alt(int32_t *ptr, int remainder) { + const auto mask = 0xFFFF >> ((N - remainder) & (N-1)); - static NOINLINE void sort_04v(int32_t *ptr) { __m512i d01 = _mm512_loadu_si512((__m512i const *) ptr + 0);; __m512i d02 = _mm512_loadu_si512((__m512i const *) ptr + 1);; __m512i d03 = _mm512_loadu_si512((__m512i const *) ptr + 2);; - __m512i d04 = _mm512_loadu_si512((__m512i const *) ptr + 3);; + __m512i d04 = _mm512_mask_loadu_epi32(_mm512_set1_epi32(MAX), + mask, + (int32_t const *) ((__m512i const *) ptr + 3)); sort_04v_ascending(d01, d02, d03, d04); _mm512_storeu_si512((__m512i *) ptr + 0, d01); _mm512_storeu_si512((__m512i *) ptr + 1, d02); _mm512_storeu_si512((__m512i *) ptr + 2, d03); - _mm512_storeu_si512((__m512i *) ptr + 3, d04); -} + _mm512_mask_storeu_epi32((__m512i *) ptr + 3, mask, d04); + } + + static NOINLINE void sort_05v_alt(int32_t *ptr, int remainder) { + const auto mask = 0xFFFF >> ((N - remainder) & (N-1)); - static NOINLINE void sort_05v(int32_t *ptr) { __m512i d01 = _mm512_loadu_si512((__m512i const *) ptr + 0);; __m512i d02 = _mm512_loadu_si512((__m512i const *) ptr + 1);; __m512i d03 = _mm512_loadu_si512((__m512i const *) ptr + 2);; __m512i d04 = _mm512_loadu_si512((__m512i const *) ptr + 3);; - __m512i d05 = _mm512_loadu_si512((__m512i const *) ptr + 4);; + __m512i d05 = _mm512_mask_loadu_epi32(_mm512_set1_epi32(MAX), + mask, + (int32_t const *) ((__m512i const *) ptr + 4)); sort_05v_ascending(d01, d02, d03, d04, d05); _mm512_storeu_si512((__m512i *) ptr + 0, d01); _mm512_storeu_si512((__m512i *) ptr + 1, d02); _mm512_storeu_si512((__m512i *) ptr + 2, d03); _mm512_storeu_si512((__m512i *) ptr + 3, d04); - _mm512_storeu_si512((__m512i *) ptr + 4, d05); -} + _mm512_mask_storeu_epi32((__m512i *) ptr + 4, mask, d05); + } + + static NOINLINE void sort_06v_alt(int32_t *ptr, int remainder) { + const auto mask = 0xFFFF >> ((N - remainder) & (N-1)); - static NOINLINE void sort_06v(int32_t *ptr) { __m512i d01 = _mm512_loadu_si512((__m512i const *) ptr + 0);; __m512i d02 = _mm512_loadu_si512((__m512i const *) ptr + 1);; __m512i d03 = _mm512_loadu_si512((__m512i const *) ptr + 2);; __m512i d04 = _mm512_loadu_si512((__m512i const *) ptr + 3);; __m512i d05 = _mm512_loadu_si512((__m512i const *) ptr + 4);; - __m512i d06 = _mm512_loadu_si512((__m512i const *) ptr + 5);; + __m512i d06 = _mm512_mask_loadu_epi32(_mm512_set1_epi32(MAX), + mask, + (int32_t const *) ((__m512i const *) ptr + 5)); sort_06v_ascending(d01, d02, d03, d04, d05, d06); _mm512_storeu_si512((__m512i *) ptr + 0, d01); _mm512_storeu_si512((__m512i *) ptr + 1, d02); _mm512_storeu_si512((__m512i *) ptr + 2, d03); _mm512_storeu_si512((__m512i *) ptr + 3, d04); _mm512_storeu_si512((__m512i *) ptr + 4, d05); - _mm512_storeu_si512((__m512i *) ptr + 5, d06); -} + _mm512_mask_storeu_epi32((__m512i *) ptr + 5, mask, d06); + } + + static NOINLINE void sort_07v_alt(int32_t *ptr, int remainder) { + const auto mask = 0xFFFF >> ((N - remainder) & (N-1)); - static NOINLINE void sort_07v(int32_t *ptr) { __m512i d01 = _mm512_loadu_si512((__m512i const *) ptr + 0);; __m512i d02 = _mm512_loadu_si512((__m512i const *) ptr + 1);; __m512i d03 = _mm512_loadu_si512((__m512i const *) ptr + 2);; __m512i d04 = _mm512_loadu_si512((__m512i const *) ptr + 3);; __m512i d05 = _mm512_loadu_si512((__m512i const *) ptr + 4);; __m512i d06 = _mm512_loadu_si512((__m512i const *) ptr + 5);; - __m512i d07 = _mm512_loadu_si512((__m512i const *) ptr + 6);; + __m512i d07 = _mm512_mask_loadu_epi32(_mm512_set1_epi32(MAX), + mask, + (int32_t const *) ((__m512i const *) ptr + 6)); sort_07v_ascending(d01, d02, d03, d04, d05, d06, d07); _mm512_storeu_si512((__m512i *) ptr + 0, d01); _mm512_storeu_si512((__m512i *) ptr + 1, d02); @@ -1109,10 +1139,12 @@ template<> struct bitonic { _mm512_storeu_si512((__m512i *) ptr + 3, d04); _mm512_storeu_si512((__m512i *) ptr + 4, d05); _mm512_storeu_si512((__m512i *) ptr + 5, d06); - _mm512_storeu_si512((__m512i *) ptr + 6, d07); -} + _mm512_mask_storeu_epi32((__m512i *) ptr + 6, mask, d07); + } + + static NOINLINE void sort_08v_alt(int32_t *ptr, int remainder) { + const auto mask = 0xFFFF >> ((N - remainder) & (N-1)); - static NOINLINE void sort_08v(int32_t *ptr) { __m512i d01 = _mm512_loadu_si512((__m512i const *) ptr + 0);; __m512i d02 = _mm512_loadu_si512((__m512i const *) ptr + 1);; __m512i d03 = _mm512_loadu_si512((__m512i const *) ptr + 2);; @@ -1120,7 +1152,9 @@ template<> struct bitonic { __m512i d05 = _mm512_loadu_si512((__m512i const *) ptr + 4);; __m512i d06 = _mm512_loadu_si512((__m512i const *) ptr + 5);; __m512i d07 = _mm512_loadu_si512((__m512i const *) ptr + 6);; - __m512i d08 = _mm512_loadu_si512((__m512i const *) ptr + 7);; + __m512i d08 = _mm512_mask_loadu_epi32(_mm512_set1_epi32(MAX), + mask, + (int32_t const *) ((__m512i const *) ptr + 7)); sort_08v_ascending(d01, d02, d03, d04, d05, d06, d07, d08); _mm512_storeu_si512((__m512i *) ptr + 0, d01); _mm512_storeu_si512((__m512i *) ptr + 1, d02); @@ -1129,10 +1163,12 @@ template<> struct bitonic { _mm512_storeu_si512((__m512i *) ptr + 4, d05); _mm512_storeu_si512((__m512i *) ptr + 5, d06); _mm512_storeu_si512((__m512i *) ptr + 6, d07); - _mm512_storeu_si512((__m512i *) ptr + 7, d08); -} + _mm512_mask_storeu_epi32((__m512i *) ptr + 7, mask, d08); + } + + static NOINLINE void sort_09v_alt(int32_t *ptr, int remainder) { + const auto mask = 0xFFFF >> ((N - remainder) & (N-1)); - static NOINLINE void sort_09v(int32_t *ptr) { __m512i d01 = _mm512_loadu_si512((__m512i const *) ptr + 0);; __m512i d02 = _mm512_loadu_si512((__m512i const *) ptr + 1);; __m512i d03 = _mm512_loadu_si512((__m512i const *) ptr + 2);; @@ -1141,7 +1177,9 @@ template<> struct bitonic { __m512i d06 = _mm512_loadu_si512((__m512i const *) ptr + 5);; __m512i d07 = _mm512_loadu_si512((__m512i const *) ptr + 6);; __m512i d08 = _mm512_loadu_si512((__m512i const *) ptr + 7);; - __m512i d09 = _mm512_loadu_si512((__m512i const *) ptr + 8);; + __m512i d09 = _mm512_mask_loadu_epi32(_mm512_set1_epi32(MAX), + mask, + (int32_t const *) ((__m512i const *) ptr + 8)); sort_09v_ascending(d01, d02, d03, d04, d05, d06, d07, d08, d09); _mm512_storeu_si512((__m512i *) ptr + 0, d01); _mm512_storeu_si512((__m512i *) ptr + 1, d02); @@ -1151,10 +1189,12 @@ template<> struct bitonic { _mm512_storeu_si512((__m512i *) ptr + 5, d06); _mm512_storeu_si512((__m512i *) ptr + 6, d07); _mm512_storeu_si512((__m512i *) ptr + 7, d08); - _mm512_storeu_si512((__m512i *) ptr + 8, d09); -} + _mm512_mask_storeu_epi32((__m512i *) ptr + 8, mask, d09); + } + + static NOINLINE void sort_10v_alt(int32_t *ptr, int remainder) { + const auto mask = 0xFFFF >> ((N - remainder) & (N-1)); - static NOINLINE void sort_10v(int32_t *ptr) { __m512i d01 = _mm512_loadu_si512((__m512i const *) ptr + 0);; __m512i d02 = _mm512_loadu_si512((__m512i const *) ptr + 1);; __m512i d03 = _mm512_loadu_si512((__m512i const *) ptr + 2);; @@ -1164,7 +1204,9 @@ template<> struct bitonic { __m512i d07 = _mm512_loadu_si512((__m512i const *) ptr + 6);; __m512i d08 = _mm512_loadu_si512((__m512i const *) ptr + 7);; __m512i d09 = _mm512_loadu_si512((__m512i const *) ptr + 8);; - __m512i d10 = _mm512_loadu_si512((__m512i const *) ptr + 9);; + __m512i d10 = _mm512_mask_loadu_epi32(_mm512_set1_epi32(MAX), + mask, + (int32_t const *) ((__m512i const *) ptr + 9)); sort_10v_ascending(d01, d02, d03, d04, d05, d06, d07, d08, d09, d10); _mm512_storeu_si512((__m512i *) ptr + 0, d01); _mm512_storeu_si512((__m512i *) ptr + 1, d02); @@ -1175,10 +1217,12 @@ template<> struct bitonic { _mm512_storeu_si512((__m512i *) ptr + 6, d07); _mm512_storeu_si512((__m512i *) ptr + 7, d08); _mm512_storeu_si512((__m512i *) ptr + 8, d09); - _mm512_storeu_si512((__m512i *) ptr + 9, d10); -} + _mm512_mask_storeu_epi32((__m512i *) ptr + 9, mask, d10); + } + + static NOINLINE void sort_11v_alt(int32_t *ptr, int remainder) { + const auto mask = 0xFFFF >> ((N - remainder) & (N-1)); - static NOINLINE void sort_11v(int32_t *ptr) { __m512i d01 = _mm512_loadu_si512((__m512i const *) ptr + 0);; __m512i d02 = _mm512_loadu_si512((__m512i const *) ptr + 1);; __m512i d03 = _mm512_loadu_si512((__m512i const *) ptr + 2);; @@ -1189,7 +1233,9 @@ template<> struct bitonic { __m512i d08 = _mm512_loadu_si512((__m512i const *) ptr + 7);; __m512i d09 = _mm512_loadu_si512((__m512i const *) ptr + 8);; __m512i d10 = _mm512_loadu_si512((__m512i const *) ptr + 9);; - __m512i d11 = _mm512_loadu_si512((__m512i const *) ptr + 10);; + __m512i d11 = _mm512_mask_loadu_epi32(_mm512_set1_epi32(MAX), + mask, + (int32_t const *) ((__m512i const *) ptr + 10)); sort_11v_ascending(d01, d02, d03, d04, d05, d06, d07, d08, d09, d10, d11); _mm512_storeu_si512((__m512i *) ptr + 0, d01); _mm512_storeu_si512((__m512i *) ptr + 1, d02); @@ -1201,10 +1247,12 @@ template<> struct bitonic { _mm512_storeu_si512((__m512i *) ptr + 7, d08); _mm512_storeu_si512((__m512i *) ptr + 8, d09); _mm512_storeu_si512((__m512i *) ptr + 9, d10); - _mm512_storeu_si512((__m512i *) ptr + 10, d11); -} + _mm512_mask_storeu_epi32((__m512i *) ptr + 10, mask, d11); + } + + static NOINLINE void sort_12v_alt(int32_t *ptr, int remainder) { + const auto mask = 0xFFFF >> ((N - remainder) & (N-1)); - static NOINLINE void sort_12v(int32_t *ptr) { __m512i d01 = _mm512_loadu_si512((__m512i const *) ptr + 0);; __m512i d02 = _mm512_loadu_si512((__m512i const *) ptr + 1);; __m512i d03 = _mm512_loadu_si512((__m512i const *) ptr + 2);; @@ -1216,7 +1264,9 @@ template<> struct bitonic { __m512i d09 = _mm512_loadu_si512((__m512i const *) ptr + 8);; __m512i d10 = _mm512_loadu_si512((__m512i const *) ptr + 9);; __m512i d11 = _mm512_loadu_si512((__m512i const *) ptr + 10);; - __m512i d12 = _mm512_loadu_si512((__m512i const *) ptr + 11);; + __m512i d12 = _mm512_mask_loadu_epi32(_mm512_set1_epi32(MAX), + mask, + (int32_t const *) ((__m512i const *) ptr + 11)); sort_12v_ascending(d01, d02, d03, d04, d05, d06, d07, d08, d09, d10, d11, d12); _mm512_storeu_si512((__m512i *) ptr + 0, d01); _mm512_storeu_si512((__m512i *) ptr + 1, d02); @@ -1229,10 +1279,12 @@ template<> struct bitonic { _mm512_storeu_si512((__m512i *) ptr + 8, d09); _mm512_storeu_si512((__m512i *) ptr + 9, d10); _mm512_storeu_si512((__m512i *) ptr + 10, d11); - _mm512_storeu_si512((__m512i *) ptr + 11, d12); -} + _mm512_mask_storeu_epi32((__m512i *) ptr + 11, mask, d12); + } + + static NOINLINE void sort_13v_alt(int32_t *ptr, int remainder) { + const auto mask = 0xFFFF >> ((N - remainder) & (N-1)); - static NOINLINE void sort_13v(int32_t *ptr) { __m512i d01 = _mm512_loadu_si512((__m512i const *) ptr + 0);; __m512i d02 = _mm512_loadu_si512((__m512i const *) ptr + 1);; __m512i d03 = _mm512_loadu_si512((__m512i const *) ptr + 2);; @@ -1245,7 +1297,9 @@ template<> struct bitonic { __m512i d10 = _mm512_loadu_si512((__m512i const *) ptr + 9);; __m512i d11 = _mm512_loadu_si512((__m512i const *) ptr + 10);; __m512i d12 = _mm512_loadu_si512((__m512i const *) ptr + 11);; - __m512i d13 = _mm512_loadu_si512((__m512i const *) ptr + 12);; + __m512i d13 = _mm512_mask_loadu_epi32(_mm512_set1_epi32(MAX), + mask, + (int32_t const *) ((__m512i const *) ptr + 12)); sort_13v_ascending(d01, d02, d03, d04, d05, d06, d07, d08, d09, d10, d11, d12, d13); _mm512_storeu_si512((__m512i *) ptr + 0, d01); _mm512_storeu_si512((__m512i *) ptr + 1, d02); @@ -1259,10 +1313,12 @@ template<> struct bitonic { _mm512_storeu_si512((__m512i *) ptr + 9, d10); _mm512_storeu_si512((__m512i *) ptr + 10, d11); _mm512_storeu_si512((__m512i *) ptr + 11, d12); - _mm512_storeu_si512((__m512i *) ptr + 12, d13); -} + _mm512_mask_storeu_epi32((__m512i *) ptr + 12, mask, d13); + } + + static NOINLINE void sort_14v_alt(int32_t *ptr, int remainder) { + const auto mask = 0xFFFF >> ((N - remainder) & (N-1)); - static NOINLINE void sort_14v(int32_t *ptr) { __m512i d01 = _mm512_loadu_si512((__m512i const *) ptr + 0);; __m512i d02 = _mm512_loadu_si512((__m512i const *) ptr + 1);; __m512i d03 = _mm512_loadu_si512((__m512i const *) ptr + 2);; @@ -1276,7 +1332,9 @@ template<> struct bitonic { __m512i d11 = _mm512_loadu_si512((__m512i const *) ptr + 10);; __m512i d12 = _mm512_loadu_si512((__m512i const *) ptr + 11);; __m512i d13 = _mm512_loadu_si512((__m512i const *) ptr + 12);; - __m512i d14 = _mm512_loadu_si512((__m512i const *) ptr + 13);; + __m512i d14 = _mm512_mask_loadu_epi32(_mm512_set1_epi32(MAX), + mask, + (int32_t const *) ((__m512i const *) ptr + 13)); sort_14v_ascending(d01, d02, d03, d04, d05, d06, d07, d08, d09, d10, d11, d12, d13, d14); _mm512_storeu_si512((__m512i *) ptr + 0, d01); _mm512_storeu_si512((__m512i *) ptr + 1, d02); @@ -1291,10 +1349,12 @@ template<> struct bitonic { _mm512_storeu_si512((__m512i *) ptr + 10, d11); _mm512_storeu_si512((__m512i *) ptr + 11, d12); _mm512_storeu_si512((__m512i *) ptr + 12, d13); - _mm512_storeu_si512((__m512i *) ptr + 13, d14); -} + _mm512_mask_storeu_epi32((__m512i *) ptr + 13, mask, d14); + } + + static NOINLINE void sort_15v_alt(int32_t *ptr, int remainder) { + const auto mask = 0xFFFF >> ((N - remainder) & (N-1)); - static NOINLINE void sort_15v(int32_t *ptr) { __m512i d01 = _mm512_loadu_si512((__m512i const *) ptr + 0);; __m512i d02 = _mm512_loadu_si512((__m512i const *) ptr + 1);; __m512i d03 = _mm512_loadu_si512((__m512i const *) ptr + 2);; @@ -1309,7 +1369,9 @@ template<> struct bitonic { __m512i d12 = _mm512_loadu_si512((__m512i const *) ptr + 11);; __m512i d13 = _mm512_loadu_si512((__m512i const *) ptr + 12);; __m512i d14 = _mm512_loadu_si512((__m512i const *) ptr + 13);; - __m512i d15 = _mm512_loadu_si512((__m512i const *) ptr + 14);; + __m512i d15 = _mm512_mask_loadu_epi32(_mm512_set1_epi32(MAX), + mask, + (int32_t const *) ((__m512i const *) ptr + 14)); sort_15v_ascending(d01, d02, d03, d04, d05, d06, d07, d08, d09, d10, d11, d12, d13, d14, d15); _mm512_storeu_si512((__m512i *) ptr + 0, d01); _mm512_storeu_si512((__m512i *) ptr + 1, d02); @@ -1325,10 +1387,12 @@ template<> struct bitonic { _mm512_storeu_si512((__m512i *) ptr + 11, d12); _mm512_storeu_si512((__m512i *) ptr + 12, d13); _mm512_storeu_si512((__m512i *) ptr + 13, d14); - _mm512_storeu_si512((__m512i *) ptr + 14, d15); -} + _mm512_mask_storeu_epi32((__m512i *) ptr + 14, mask, d15); + } + + static NOINLINE void sort_16v_alt(int32_t *ptr, int remainder) { + const auto mask = 0xFFFF >> ((N - remainder) & (N-1)); - static NOINLINE void sort_16v(int32_t *ptr) { __m512i d01 = _mm512_loadu_si512((__m512i const *) ptr + 0);; __m512i d02 = _mm512_loadu_si512((__m512i const *) ptr + 1);; __m512i d03 = _mm512_loadu_si512((__m512i const *) ptr + 2);; @@ -1344,7 +1408,9 @@ template<> struct bitonic { __m512i d13 = _mm512_loadu_si512((__m512i const *) ptr + 12);; __m512i d14 = _mm512_loadu_si512((__m512i const *) ptr + 13);; __m512i d15 = _mm512_loadu_si512((__m512i const *) ptr + 14);; - __m512i d16 = _mm512_loadu_si512((__m512i const *) ptr + 15);; + __m512i d16 = _mm512_mask_loadu_epi32(_mm512_set1_epi32(MAX), + mask, + (int32_t const *) ((__m512i const *) ptr + 15)); sort_16v_ascending(d01, d02, d03, d04, d05, d06, d07, d08, d09, d10, d11, d12, d13, d14, d15, d16); _mm512_storeu_si512((__m512i *) ptr + 0, d01); _mm512_storeu_si512((__m512i *) ptr + 1, d02); @@ -1361,8 +1427,8 @@ template<> struct bitonic { _mm512_storeu_si512((__m512i *) ptr + 12, d13); _mm512_storeu_si512((__m512i *) ptr + 13, d14); _mm512_storeu_si512((__m512i *) ptr + 14, d15); - _mm512_storeu_si512((__m512i *) ptr + 15, d16); -} + _mm512_mask_storeu_epi32((__m512i *) ptr + 15, mask, d16); + } static void sort(int32_t *ptr, size_t length); }; diff --git a/src/coreclr/src/gc/vxsort/smallsort/bitonic_sort.AVX512.int64_t.generated.cpp b/src/coreclr/src/gc/vxsort/smallsort/bitonic_sort.AVX512.int64_t.generated.cpp index cf8b62809b36..20648e786d11 100644 --- a/src/coreclr/src/gc/vxsort/smallsort/bitonic_sort.AVX512.int64_t.generated.cpp +++ b/src/coreclr/src/gc/vxsort/smallsort/bitonic_sort.AVX512.int64_t.generated.cpp @@ -7,24 +7,25 @@ using namespace vxsort; void vxsort::smallsort::bitonic::sort(int64_t *ptr, size_t length) { - const int N = 8; - - switch(length / N) { - case 1: sort_01v(ptr); break; - case 2: sort_02v(ptr); break; - case 3: sort_03v(ptr); break; - case 4: sort_04v(ptr); break; - case 5: sort_05v(ptr); break; - case 6: sort_06v(ptr); break; - case 7: sort_07v(ptr); break; - case 8: sort_08v(ptr); break; - case 9: sort_09v(ptr); break; - case 10: sort_10v(ptr); break; - case 11: sort_11v(ptr); break; - case 12: sort_12v(ptr); break; - case 13: sort_13v(ptr); break; - case 14: sort_14v(ptr); break; - case 15: sort_15v(ptr); break; - case 16: sort_16v(ptr); break; + const auto fullvlength = length / N; + const int remainder = (int) (length - fullvlength * N); + const auto v = fullvlength + ((remainder > 0) ? 1 : 0); + switch(v) { + case 1: sort_01v_alt(ptr, remainder); break; + case 2: sort_02v_alt(ptr, remainder); break; + case 3: sort_03v_alt(ptr, remainder); break; + case 4: sort_04v_alt(ptr, remainder); break; + case 5: sort_05v_alt(ptr, remainder); break; + case 6: sort_06v_alt(ptr, remainder); break; + case 7: sort_07v_alt(ptr, remainder); break; + case 8: sort_08v_alt(ptr, remainder); break; + case 9: sort_09v_alt(ptr, remainder); break; + case 10: sort_10v_alt(ptr, remainder); break; + case 11: sort_11v_alt(ptr, remainder); break; + case 12: sort_12v_alt(ptr, remainder); break; + case 13: sort_13v_alt(ptr, remainder); break; + case 14: sort_14v_alt(ptr, remainder); break; + case 15: sort_15v_alt(ptr, remainder); break; + case 16: sort_16v_alt(ptr, remainder); break; } } diff --git a/src/coreclr/src/gc/vxsort/smallsort/bitonic_sort.AVX512.int64_t.generated.h b/src/coreclr/src/gc/vxsort/smallsort/bitonic_sort.AVX512.int64_t.generated.h index 483cf5a1e158..b7f16d607373 100644 --- a/src/coreclr/src/gc/vxsort/smallsort/bitonic_sort.AVX512.int64_t.generated.h +++ b/src/coreclr/src/gc/vxsort/smallsort/bitonic_sort.AVX512.int64_t.generated.h @@ -3,7 +3,7 @@ ///////////////////////////////////////////////////////////////////////////// //// -// This file was auto-generated by a tool at 2020-06-22 05:27:48 +// This file was auto-generated by a tool at 2020-07-21 14:05:39 // // It is recommended you DO NOT directly edit this file but instead edit // the code-generator that generated this source file instead. @@ -35,6 +35,8 @@ namespace vxsort { namespace smallsort { template<> struct bitonic { + static const int N = 8; + static constexpr int64_t MAX = std::numeric_limits::Max(); public: static INLINE void sort_01v_ascending(__m512i& d01) { @@ -213,7 +215,7 @@ template<> struct bitonic { sort_02v_merge_descending(d01, d02); sort_01v_merge_descending(d03); } - static INLINE void sort_04v_ascending(__m512i& d01, __m512i& d02, __m512i& d03, __m512i& d04) { + static NOINLINE void sort_04v_ascending(__m512i& d01, __m512i& d02, __m512i& d03, __m512i& d04) { __m512i tmp; sort_02v_ascending(d01, d02); @@ -230,7 +232,7 @@ template<> struct bitonic { sort_02v_merge_ascending(d01, d02); sort_02v_merge_ascending(d03, d04); } - static INLINE void sort_04v_descending(__m512i& d01, __m512i& d02, __m512i& d03, __m512i& d04) { + static NOINLINE void sort_04v_descending(__m512i& d01, __m512i& d02, __m512i& d03, __m512i& d04) { __m512i tmp; sort_02v_descending(d01, d02); @@ -247,7 +249,7 @@ template<> struct bitonic { sort_02v_merge_descending(d01, d02); sort_02v_merge_descending(d03, d04); } - static INLINE void sort_04v_merge_ascending(__m512i& d01, __m512i& d02, __m512i& d03, __m512i& d04) { + static NOINLINE void sort_04v_merge_ascending(__m512i& d01, __m512i& d02, __m512i& d03, __m512i& d04) { __m512i tmp; tmp = d01; @@ -261,7 +263,7 @@ template<> struct bitonic { sort_02v_merge_ascending(d01, d02); sort_02v_merge_ascending(d03, d04); } - static INLINE void sort_04v_merge_descending(__m512i& d01, __m512i& d02, __m512i& d03, __m512i& d04) { + static NOINLINE void sort_04v_merge_descending(__m512i& d01, __m512i& d02, __m512i& d03, __m512i& d04) { __m512i tmp; tmp = d01; @@ -461,7 +463,7 @@ template<> struct bitonic { sort_04v_merge_descending(d01, d02, d03, d04); sort_03v_merge_descending(d05, d06, d07); } - static INLINE void sort_08v_ascending(__m512i& d01, __m512i& d02, __m512i& d03, __m512i& d04, __m512i& d05, __m512i& d06, __m512i& d07, __m512i& d08) { + static NOINLINE void sort_08v_ascending(__m512i& d01, __m512i& d02, __m512i& d03, __m512i& d04, __m512i& d05, __m512i& d06, __m512i& d07, __m512i& d08) { __m512i tmp; sort_04v_ascending(d01, d02, d03, d04); @@ -486,7 +488,7 @@ template<> struct bitonic { sort_04v_merge_ascending(d01, d02, d03, d04); sort_04v_merge_ascending(d05, d06, d07, d08); } - static INLINE void sort_08v_descending(__m512i& d01, __m512i& d02, __m512i& d03, __m512i& d04, __m512i& d05, __m512i& d06, __m512i& d07, __m512i& d08) { + static NOINLINE void sort_08v_descending(__m512i& d01, __m512i& d02, __m512i& d03, __m512i& d04, __m512i& d05, __m512i& d06, __m512i& d07, __m512i& d08) { __m512i tmp; sort_04v_descending(d01, d02, d03, d04); @@ -511,7 +513,7 @@ template<> struct bitonic { sort_04v_merge_descending(d01, d02, d03, d04); sort_04v_merge_descending(d05, d06, d07, d08); } - static INLINE void sort_08v_merge_ascending(__m512i& d01, __m512i& d02, __m512i& d03, __m512i& d04, __m512i& d05, __m512i& d06, __m512i& d07, __m512i& d08) { + static NOINLINE void sort_08v_merge_ascending(__m512i& d01, __m512i& d02, __m512i& d03, __m512i& d04, __m512i& d05, __m512i& d06, __m512i& d07, __m512i& d08) { __m512i tmp; tmp = d01; @@ -533,7 +535,7 @@ template<> struct bitonic { sort_04v_merge_ascending(d01, d02, d03, d04); sort_04v_merge_ascending(d05, d06, d07, d08); } - static INLINE void sort_08v_merge_descending(__m512i& d01, __m512i& d02, __m512i& d03, __m512i& d04, __m512i& d05, __m512i& d06, __m512i& d07, __m512i& d08) { + static NOINLINE void sort_08v_merge_descending(__m512i& d01, __m512i& d02, __m512i& d03, __m512i& d04, __m512i& d05, __m512i& d06, __m512i& d07, __m512i& d08) { __m512i tmp; tmp = d01; @@ -657,7 +659,7 @@ template<> struct bitonic { sort_08v_merge_descending(d01, d02, d03, d04, d05, d06, d07, d08); sort_03v_merge_descending(d09, d10, d11); } - static INLINE void sort_12v_ascending(__m512i& d01, __m512i& d02, __m512i& d03, __m512i& d04, __m512i& d05, __m512i& d06, __m512i& d07, __m512i& d08, __m512i& d09, __m512i& d10, __m512i& d11, __m512i& d12) { + static NOINLINE void sort_12v_ascending(__m512i& d01, __m512i& d02, __m512i& d03, __m512i& d04, __m512i& d05, __m512i& d06, __m512i& d07, __m512i& d08, __m512i& d09, __m512i& d10, __m512i& d11, __m512i& d12) { __m512i tmp; sort_08v_ascending(d01, d02, d03, d04, d05, d06, d07, d08); @@ -682,7 +684,7 @@ template<> struct bitonic { sort_08v_merge_ascending(d01, d02, d03, d04, d05, d06, d07, d08); sort_04v_merge_ascending(d09, d10, d11, d12); } - static INLINE void sort_12v_descending(__m512i& d01, __m512i& d02, __m512i& d03, __m512i& d04, __m512i& d05, __m512i& d06, __m512i& d07, __m512i& d08, __m512i& d09, __m512i& d10, __m512i& d11, __m512i& d12) { + static NOINLINE void sort_12v_descending(__m512i& d01, __m512i& d02, __m512i& d03, __m512i& d04, __m512i& d05, __m512i& d06, __m512i& d07, __m512i& d08, __m512i& d09, __m512i& d10, __m512i& d11, __m512i& d12) { __m512i tmp; sort_08v_descending(d01, d02, d03, d04, d05, d06, d07, d08); @@ -905,7 +907,7 @@ template<> struct bitonic { sort_08v_merge_descending(d01, d02, d03, d04, d05, d06, d07, d08); sort_07v_merge_descending(d09, d10, d11, d12, d13, d14, d15); } - static INLINE void sort_16v_ascending(__m512i& d01, __m512i& d02, __m512i& d03, __m512i& d04, __m512i& d05, __m512i& d06, __m512i& d07, __m512i& d08, __m512i& d09, __m512i& d10, __m512i& d11, __m512i& d12, __m512i& d13, __m512i& d14, __m512i& d15, __m512i& d16) { + static NOINLINE void sort_16v_ascending(__m512i& d01, __m512i& d02, __m512i& d03, __m512i& d04, __m512i& d05, __m512i& d06, __m512i& d07, __m512i& d08, __m512i& d09, __m512i& d10, __m512i& d11, __m512i& d12, __m512i& d13, __m512i& d14, __m512i& d15, __m512i& d16) { __m512i tmp; sort_08v_ascending(d01, d02, d03, d04, d05, d06, d07, d08); @@ -946,7 +948,7 @@ template<> struct bitonic { sort_08v_merge_ascending(d01, d02, d03, d04, d05, d06, d07, d08); sort_08v_merge_ascending(d09, d10, d11, d12, d13, d14, d15, d16); } - static INLINE void sort_16v_descending(__m512i& d01, __m512i& d02, __m512i& d03, __m512i& d04, __m512i& d05, __m512i& d06, __m512i& d07, __m512i& d08, __m512i& d09, __m512i& d10, __m512i& d11, __m512i& d12, __m512i& d13, __m512i& d14, __m512i& d15, __m512i& d16) { + static NOINLINE void sort_16v_descending(__m512i& d01, __m512i& d02, __m512i& d03, __m512i& d04, __m512i& d05, __m512i& d06, __m512i& d07, __m512i& d08, __m512i& d09, __m512i& d10, __m512i& d11, __m512i& d12, __m512i& d13, __m512i& d14, __m512i& d15, __m512i& d16) { __m512i tmp; sort_08v_descending(d01, d02, d03, d04, d05, d06, d07, d08); @@ -988,80 +990,108 @@ template<> struct bitonic { sort_08v_merge_descending(d09, d10, d11, d12, d13, d14, d15, d16); } - static NOINLINE void sort_01v(int64_t *ptr) { - __m512i d01 = _mm512_loadu_si512((__m512i const *) ptr + 0);; + static NOINLINE void sort_01v_alt(int64_t *ptr, int remainder) { + const auto mask = 0xFF >> ((N - remainder) & (N-1)); + + __m512i d01 = _mm512_mask_loadu_epi64(_mm512_set1_epi64(MAX), + mask, + (int64_t const *) ((__m512i const *) ptr + 0)); sort_01v_ascending(d01); - _mm512_storeu_si512((__m512i *) ptr + 0, d01); -} + _mm512_mask_storeu_epi64((__m512i *) ptr + 0, mask, d01); + } + + static NOINLINE void sort_02v_alt(int64_t *ptr, int remainder) { + const auto mask = 0xFF >> ((N - remainder) & (N-1)); - static NOINLINE void sort_02v(int64_t *ptr) { __m512i d01 = _mm512_loadu_si512((__m512i const *) ptr + 0);; - __m512i d02 = _mm512_loadu_si512((__m512i const *) ptr + 1);; + __m512i d02 = _mm512_mask_loadu_epi64(_mm512_set1_epi64(MAX), + mask, + (int64_t const *) ((__m512i const *) ptr + 1)); sort_02v_ascending(d01, d02); _mm512_storeu_si512((__m512i *) ptr + 0, d01); - _mm512_storeu_si512((__m512i *) ptr + 1, d02); -} + _mm512_mask_storeu_epi64((__m512i *) ptr + 1, mask, d02); + } + + static NOINLINE void sort_03v_alt(int64_t *ptr, int remainder) { + const auto mask = 0xFF >> ((N - remainder) & (N-1)); - static NOINLINE void sort_03v(int64_t *ptr) { __m512i d01 = _mm512_loadu_si512((__m512i const *) ptr + 0);; __m512i d02 = _mm512_loadu_si512((__m512i const *) ptr + 1);; - __m512i d03 = _mm512_loadu_si512((__m512i const *) ptr + 2);; + __m512i d03 = _mm512_mask_loadu_epi64(_mm512_set1_epi64(MAX), + mask, + (int64_t const *) ((__m512i const *) ptr + 2)); sort_03v_ascending(d01, d02, d03); _mm512_storeu_si512((__m512i *) ptr + 0, d01); _mm512_storeu_si512((__m512i *) ptr + 1, d02); - _mm512_storeu_si512((__m512i *) ptr + 2, d03); -} + _mm512_mask_storeu_epi64((__m512i *) ptr + 2, mask, d03); + } + + static NOINLINE void sort_04v_alt(int64_t *ptr, int remainder) { + const auto mask = 0xFF >> ((N - remainder) & (N-1)); - static NOINLINE void sort_04v(int64_t *ptr) { __m512i d01 = _mm512_loadu_si512((__m512i const *) ptr + 0);; __m512i d02 = _mm512_loadu_si512((__m512i const *) ptr + 1);; __m512i d03 = _mm512_loadu_si512((__m512i const *) ptr + 2);; - __m512i d04 = _mm512_loadu_si512((__m512i const *) ptr + 3);; + __m512i d04 = _mm512_mask_loadu_epi64(_mm512_set1_epi64(MAX), + mask, + (int64_t const *) ((__m512i const *) ptr + 3)); sort_04v_ascending(d01, d02, d03, d04); _mm512_storeu_si512((__m512i *) ptr + 0, d01); _mm512_storeu_si512((__m512i *) ptr + 1, d02); _mm512_storeu_si512((__m512i *) ptr + 2, d03); - _mm512_storeu_si512((__m512i *) ptr + 3, d04); -} + _mm512_mask_storeu_epi64((__m512i *) ptr + 3, mask, d04); + } + + static NOINLINE void sort_05v_alt(int64_t *ptr, int remainder) { + const auto mask = 0xFF >> ((N - remainder) & (N-1)); - static NOINLINE void sort_05v(int64_t *ptr) { __m512i d01 = _mm512_loadu_si512((__m512i const *) ptr + 0);; __m512i d02 = _mm512_loadu_si512((__m512i const *) ptr + 1);; __m512i d03 = _mm512_loadu_si512((__m512i const *) ptr + 2);; __m512i d04 = _mm512_loadu_si512((__m512i const *) ptr + 3);; - __m512i d05 = _mm512_loadu_si512((__m512i const *) ptr + 4);; + __m512i d05 = _mm512_mask_loadu_epi64(_mm512_set1_epi64(MAX), + mask, + (int64_t const *) ((__m512i const *) ptr + 4)); sort_05v_ascending(d01, d02, d03, d04, d05); _mm512_storeu_si512((__m512i *) ptr + 0, d01); _mm512_storeu_si512((__m512i *) ptr + 1, d02); _mm512_storeu_si512((__m512i *) ptr + 2, d03); _mm512_storeu_si512((__m512i *) ptr + 3, d04); - _mm512_storeu_si512((__m512i *) ptr + 4, d05); -} + _mm512_mask_storeu_epi64((__m512i *) ptr + 4, mask, d05); + } + + static NOINLINE void sort_06v_alt(int64_t *ptr, int remainder) { + const auto mask = 0xFF >> ((N - remainder) & (N-1)); - static NOINLINE void sort_06v(int64_t *ptr) { __m512i d01 = _mm512_loadu_si512((__m512i const *) ptr + 0);; __m512i d02 = _mm512_loadu_si512((__m512i const *) ptr + 1);; __m512i d03 = _mm512_loadu_si512((__m512i const *) ptr + 2);; __m512i d04 = _mm512_loadu_si512((__m512i const *) ptr + 3);; __m512i d05 = _mm512_loadu_si512((__m512i const *) ptr + 4);; - __m512i d06 = _mm512_loadu_si512((__m512i const *) ptr + 5);; + __m512i d06 = _mm512_mask_loadu_epi64(_mm512_set1_epi64(MAX), + mask, + (int64_t const *) ((__m512i const *) ptr + 5)); sort_06v_ascending(d01, d02, d03, d04, d05, d06); _mm512_storeu_si512((__m512i *) ptr + 0, d01); _mm512_storeu_si512((__m512i *) ptr + 1, d02); _mm512_storeu_si512((__m512i *) ptr + 2, d03); _mm512_storeu_si512((__m512i *) ptr + 3, d04); _mm512_storeu_si512((__m512i *) ptr + 4, d05); - _mm512_storeu_si512((__m512i *) ptr + 5, d06); -} + _mm512_mask_storeu_epi64((__m512i *) ptr + 5, mask, d06); + } + + static NOINLINE void sort_07v_alt(int64_t *ptr, int remainder) { + const auto mask = 0xFF >> ((N - remainder) & (N-1)); - static NOINLINE void sort_07v(int64_t *ptr) { __m512i d01 = _mm512_loadu_si512((__m512i const *) ptr + 0);; __m512i d02 = _mm512_loadu_si512((__m512i const *) ptr + 1);; __m512i d03 = _mm512_loadu_si512((__m512i const *) ptr + 2);; __m512i d04 = _mm512_loadu_si512((__m512i const *) ptr + 3);; __m512i d05 = _mm512_loadu_si512((__m512i const *) ptr + 4);; __m512i d06 = _mm512_loadu_si512((__m512i const *) ptr + 5);; - __m512i d07 = _mm512_loadu_si512((__m512i const *) ptr + 6);; + __m512i d07 = _mm512_mask_loadu_epi64(_mm512_set1_epi64(MAX), + mask, + (int64_t const *) ((__m512i const *) ptr + 6)); sort_07v_ascending(d01, d02, d03, d04, d05, d06, d07); _mm512_storeu_si512((__m512i *) ptr + 0, d01); _mm512_storeu_si512((__m512i *) ptr + 1, d02); @@ -1069,10 +1099,12 @@ template<> struct bitonic { _mm512_storeu_si512((__m512i *) ptr + 3, d04); _mm512_storeu_si512((__m512i *) ptr + 4, d05); _mm512_storeu_si512((__m512i *) ptr + 5, d06); - _mm512_storeu_si512((__m512i *) ptr + 6, d07); -} + _mm512_mask_storeu_epi64((__m512i *) ptr + 6, mask, d07); + } + + static NOINLINE void sort_08v_alt(int64_t *ptr, int remainder) { + const auto mask = 0xFF >> ((N - remainder) & (N-1)); - static NOINLINE void sort_08v(int64_t *ptr) { __m512i d01 = _mm512_loadu_si512((__m512i const *) ptr + 0);; __m512i d02 = _mm512_loadu_si512((__m512i const *) ptr + 1);; __m512i d03 = _mm512_loadu_si512((__m512i const *) ptr + 2);; @@ -1080,7 +1112,9 @@ template<> struct bitonic { __m512i d05 = _mm512_loadu_si512((__m512i const *) ptr + 4);; __m512i d06 = _mm512_loadu_si512((__m512i const *) ptr + 5);; __m512i d07 = _mm512_loadu_si512((__m512i const *) ptr + 6);; - __m512i d08 = _mm512_loadu_si512((__m512i const *) ptr + 7);; + __m512i d08 = _mm512_mask_loadu_epi64(_mm512_set1_epi64(MAX), + mask, + (int64_t const *) ((__m512i const *) ptr + 7)); sort_08v_ascending(d01, d02, d03, d04, d05, d06, d07, d08); _mm512_storeu_si512((__m512i *) ptr + 0, d01); _mm512_storeu_si512((__m512i *) ptr + 1, d02); @@ -1089,10 +1123,12 @@ template<> struct bitonic { _mm512_storeu_si512((__m512i *) ptr + 4, d05); _mm512_storeu_si512((__m512i *) ptr + 5, d06); _mm512_storeu_si512((__m512i *) ptr + 6, d07); - _mm512_storeu_si512((__m512i *) ptr + 7, d08); -} + _mm512_mask_storeu_epi64((__m512i *) ptr + 7, mask, d08); + } + + static NOINLINE void sort_09v_alt(int64_t *ptr, int remainder) { + const auto mask = 0xFF >> ((N - remainder) & (N-1)); - static NOINLINE void sort_09v(int64_t *ptr) { __m512i d01 = _mm512_loadu_si512((__m512i const *) ptr + 0);; __m512i d02 = _mm512_loadu_si512((__m512i const *) ptr + 1);; __m512i d03 = _mm512_loadu_si512((__m512i const *) ptr + 2);; @@ -1101,7 +1137,9 @@ template<> struct bitonic { __m512i d06 = _mm512_loadu_si512((__m512i const *) ptr + 5);; __m512i d07 = _mm512_loadu_si512((__m512i const *) ptr + 6);; __m512i d08 = _mm512_loadu_si512((__m512i const *) ptr + 7);; - __m512i d09 = _mm512_loadu_si512((__m512i const *) ptr + 8);; + __m512i d09 = _mm512_mask_loadu_epi64(_mm512_set1_epi64(MAX), + mask, + (int64_t const *) ((__m512i const *) ptr + 8)); sort_09v_ascending(d01, d02, d03, d04, d05, d06, d07, d08, d09); _mm512_storeu_si512((__m512i *) ptr + 0, d01); _mm512_storeu_si512((__m512i *) ptr + 1, d02); @@ -1111,10 +1149,12 @@ template<> struct bitonic { _mm512_storeu_si512((__m512i *) ptr + 5, d06); _mm512_storeu_si512((__m512i *) ptr + 6, d07); _mm512_storeu_si512((__m512i *) ptr + 7, d08); - _mm512_storeu_si512((__m512i *) ptr + 8, d09); -} + _mm512_mask_storeu_epi64((__m512i *) ptr + 8, mask, d09); + } + + static NOINLINE void sort_10v_alt(int64_t *ptr, int remainder) { + const auto mask = 0xFF >> ((N - remainder) & (N-1)); - static NOINLINE void sort_10v(int64_t *ptr) { __m512i d01 = _mm512_loadu_si512((__m512i const *) ptr + 0);; __m512i d02 = _mm512_loadu_si512((__m512i const *) ptr + 1);; __m512i d03 = _mm512_loadu_si512((__m512i const *) ptr + 2);; @@ -1124,7 +1164,9 @@ template<> struct bitonic { __m512i d07 = _mm512_loadu_si512((__m512i const *) ptr + 6);; __m512i d08 = _mm512_loadu_si512((__m512i const *) ptr + 7);; __m512i d09 = _mm512_loadu_si512((__m512i const *) ptr + 8);; - __m512i d10 = _mm512_loadu_si512((__m512i const *) ptr + 9);; + __m512i d10 = _mm512_mask_loadu_epi64(_mm512_set1_epi64(MAX), + mask, + (int64_t const *) ((__m512i const *) ptr + 9)); sort_10v_ascending(d01, d02, d03, d04, d05, d06, d07, d08, d09, d10); _mm512_storeu_si512((__m512i *) ptr + 0, d01); _mm512_storeu_si512((__m512i *) ptr + 1, d02); @@ -1135,10 +1177,12 @@ template<> struct bitonic { _mm512_storeu_si512((__m512i *) ptr + 6, d07); _mm512_storeu_si512((__m512i *) ptr + 7, d08); _mm512_storeu_si512((__m512i *) ptr + 8, d09); - _mm512_storeu_si512((__m512i *) ptr + 9, d10); -} + _mm512_mask_storeu_epi64((__m512i *) ptr + 9, mask, d10); + } + + static NOINLINE void sort_11v_alt(int64_t *ptr, int remainder) { + const auto mask = 0xFF >> ((N - remainder) & (N-1)); - static NOINLINE void sort_11v(int64_t *ptr) { __m512i d01 = _mm512_loadu_si512((__m512i const *) ptr + 0);; __m512i d02 = _mm512_loadu_si512((__m512i const *) ptr + 1);; __m512i d03 = _mm512_loadu_si512((__m512i const *) ptr + 2);; @@ -1149,7 +1193,9 @@ template<> struct bitonic { __m512i d08 = _mm512_loadu_si512((__m512i const *) ptr + 7);; __m512i d09 = _mm512_loadu_si512((__m512i const *) ptr + 8);; __m512i d10 = _mm512_loadu_si512((__m512i const *) ptr + 9);; - __m512i d11 = _mm512_loadu_si512((__m512i const *) ptr + 10);; + __m512i d11 = _mm512_mask_loadu_epi64(_mm512_set1_epi64(MAX), + mask, + (int64_t const *) ((__m512i const *) ptr + 10)); sort_11v_ascending(d01, d02, d03, d04, d05, d06, d07, d08, d09, d10, d11); _mm512_storeu_si512((__m512i *) ptr + 0, d01); _mm512_storeu_si512((__m512i *) ptr + 1, d02); @@ -1161,10 +1207,12 @@ template<> struct bitonic { _mm512_storeu_si512((__m512i *) ptr + 7, d08); _mm512_storeu_si512((__m512i *) ptr + 8, d09); _mm512_storeu_si512((__m512i *) ptr + 9, d10); - _mm512_storeu_si512((__m512i *) ptr + 10, d11); -} + _mm512_mask_storeu_epi64((__m512i *) ptr + 10, mask, d11); + } + + static NOINLINE void sort_12v_alt(int64_t *ptr, int remainder) { + const auto mask = 0xFF >> ((N - remainder) & (N-1)); - static NOINLINE void sort_12v(int64_t *ptr) { __m512i d01 = _mm512_loadu_si512((__m512i const *) ptr + 0);; __m512i d02 = _mm512_loadu_si512((__m512i const *) ptr + 1);; __m512i d03 = _mm512_loadu_si512((__m512i const *) ptr + 2);; @@ -1176,7 +1224,9 @@ template<> struct bitonic { __m512i d09 = _mm512_loadu_si512((__m512i const *) ptr + 8);; __m512i d10 = _mm512_loadu_si512((__m512i const *) ptr + 9);; __m512i d11 = _mm512_loadu_si512((__m512i const *) ptr + 10);; - __m512i d12 = _mm512_loadu_si512((__m512i const *) ptr + 11);; + __m512i d12 = _mm512_mask_loadu_epi64(_mm512_set1_epi64(MAX), + mask, + (int64_t const *) ((__m512i const *) ptr + 11)); sort_12v_ascending(d01, d02, d03, d04, d05, d06, d07, d08, d09, d10, d11, d12); _mm512_storeu_si512((__m512i *) ptr + 0, d01); _mm512_storeu_si512((__m512i *) ptr + 1, d02); @@ -1189,10 +1239,12 @@ template<> struct bitonic { _mm512_storeu_si512((__m512i *) ptr + 8, d09); _mm512_storeu_si512((__m512i *) ptr + 9, d10); _mm512_storeu_si512((__m512i *) ptr + 10, d11); - _mm512_storeu_si512((__m512i *) ptr + 11, d12); -} + _mm512_mask_storeu_epi64((__m512i *) ptr + 11, mask, d12); + } + + static NOINLINE void sort_13v_alt(int64_t *ptr, int remainder) { + const auto mask = 0xFF >> ((N - remainder) & (N-1)); - static NOINLINE void sort_13v(int64_t *ptr) { __m512i d01 = _mm512_loadu_si512((__m512i const *) ptr + 0);; __m512i d02 = _mm512_loadu_si512((__m512i const *) ptr + 1);; __m512i d03 = _mm512_loadu_si512((__m512i const *) ptr + 2);; @@ -1205,7 +1257,9 @@ template<> struct bitonic { __m512i d10 = _mm512_loadu_si512((__m512i const *) ptr + 9);; __m512i d11 = _mm512_loadu_si512((__m512i const *) ptr + 10);; __m512i d12 = _mm512_loadu_si512((__m512i const *) ptr + 11);; - __m512i d13 = _mm512_loadu_si512((__m512i const *) ptr + 12);; + __m512i d13 = _mm512_mask_loadu_epi64(_mm512_set1_epi64(MAX), + mask, + (int64_t const *) ((__m512i const *) ptr + 12)); sort_13v_ascending(d01, d02, d03, d04, d05, d06, d07, d08, d09, d10, d11, d12, d13); _mm512_storeu_si512((__m512i *) ptr + 0, d01); _mm512_storeu_si512((__m512i *) ptr + 1, d02); @@ -1219,10 +1273,12 @@ template<> struct bitonic { _mm512_storeu_si512((__m512i *) ptr + 9, d10); _mm512_storeu_si512((__m512i *) ptr + 10, d11); _mm512_storeu_si512((__m512i *) ptr + 11, d12); - _mm512_storeu_si512((__m512i *) ptr + 12, d13); -} + _mm512_mask_storeu_epi64((__m512i *) ptr + 12, mask, d13); + } + + static NOINLINE void sort_14v_alt(int64_t *ptr, int remainder) { + const auto mask = 0xFF >> ((N - remainder) & (N-1)); - static NOINLINE void sort_14v(int64_t *ptr) { __m512i d01 = _mm512_loadu_si512((__m512i const *) ptr + 0);; __m512i d02 = _mm512_loadu_si512((__m512i const *) ptr + 1);; __m512i d03 = _mm512_loadu_si512((__m512i const *) ptr + 2);; @@ -1236,7 +1292,9 @@ template<> struct bitonic { __m512i d11 = _mm512_loadu_si512((__m512i const *) ptr + 10);; __m512i d12 = _mm512_loadu_si512((__m512i const *) ptr + 11);; __m512i d13 = _mm512_loadu_si512((__m512i const *) ptr + 12);; - __m512i d14 = _mm512_loadu_si512((__m512i const *) ptr + 13);; + __m512i d14 = _mm512_mask_loadu_epi64(_mm512_set1_epi64(MAX), + mask, + (int64_t const *) ((__m512i const *) ptr + 13)); sort_14v_ascending(d01, d02, d03, d04, d05, d06, d07, d08, d09, d10, d11, d12, d13, d14); _mm512_storeu_si512((__m512i *) ptr + 0, d01); _mm512_storeu_si512((__m512i *) ptr + 1, d02); @@ -1251,10 +1309,12 @@ template<> struct bitonic { _mm512_storeu_si512((__m512i *) ptr + 10, d11); _mm512_storeu_si512((__m512i *) ptr + 11, d12); _mm512_storeu_si512((__m512i *) ptr + 12, d13); - _mm512_storeu_si512((__m512i *) ptr + 13, d14); -} + _mm512_mask_storeu_epi64((__m512i *) ptr + 13, mask, d14); + } + + static NOINLINE void sort_15v_alt(int64_t *ptr, int remainder) { + const auto mask = 0xFF >> ((N - remainder) & (N-1)); - static NOINLINE void sort_15v(int64_t *ptr) { __m512i d01 = _mm512_loadu_si512((__m512i const *) ptr + 0);; __m512i d02 = _mm512_loadu_si512((__m512i const *) ptr + 1);; __m512i d03 = _mm512_loadu_si512((__m512i const *) ptr + 2);; @@ -1269,7 +1329,9 @@ template<> struct bitonic { __m512i d12 = _mm512_loadu_si512((__m512i const *) ptr + 11);; __m512i d13 = _mm512_loadu_si512((__m512i const *) ptr + 12);; __m512i d14 = _mm512_loadu_si512((__m512i const *) ptr + 13);; - __m512i d15 = _mm512_loadu_si512((__m512i const *) ptr + 14);; + __m512i d15 = _mm512_mask_loadu_epi64(_mm512_set1_epi64(MAX), + mask, + (int64_t const *) ((__m512i const *) ptr + 14)); sort_15v_ascending(d01, d02, d03, d04, d05, d06, d07, d08, d09, d10, d11, d12, d13, d14, d15); _mm512_storeu_si512((__m512i *) ptr + 0, d01); _mm512_storeu_si512((__m512i *) ptr + 1, d02); @@ -1285,10 +1347,12 @@ template<> struct bitonic { _mm512_storeu_si512((__m512i *) ptr + 11, d12); _mm512_storeu_si512((__m512i *) ptr + 12, d13); _mm512_storeu_si512((__m512i *) ptr + 13, d14); - _mm512_storeu_si512((__m512i *) ptr + 14, d15); -} + _mm512_mask_storeu_epi64((__m512i *) ptr + 14, mask, d15); + } + + static NOINLINE void sort_16v_alt(int64_t *ptr, int remainder) { + const auto mask = 0xFF >> ((N - remainder) & (N-1)); - static NOINLINE void sort_16v(int64_t *ptr) { __m512i d01 = _mm512_loadu_si512((__m512i const *) ptr + 0);; __m512i d02 = _mm512_loadu_si512((__m512i const *) ptr + 1);; __m512i d03 = _mm512_loadu_si512((__m512i const *) ptr + 2);; @@ -1304,7 +1368,9 @@ template<> struct bitonic { __m512i d13 = _mm512_loadu_si512((__m512i const *) ptr + 12);; __m512i d14 = _mm512_loadu_si512((__m512i const *) ptr + 13);; __m512i d15 = _mm512_loadu_si512((__m512i const *) ptr + 14);; - __m512i d16 = _mm512_loadu_si512((__m512i const *) ptr + 15);; + __m512i d16 = _mm512_mask_loadu_epi64(_mm512_set1_epi64(MAX), + mask, + (int64_t const *) ((__m512i const *) ptr + 15)); sort_16v_ascending(d01, d02, d03, d04, d05, d06, d07, d08, d09, d10, d11, d12, d13, d14, d15, d16); _mm512_storeu_si512((__m512i *) ptr + 0, d01); _mm512_storeu_si512((__m512i *) ptr + 1, d02); @@ -1321,8 +1387,8 @@ template<> struct bitonic { _mm512_storeu_si512((__m512i *) ptr + 12, d13); _mm512_storeu_si512((__m512i *) ptr + 13, d14); _mm512_storeu_si512((__m512i *) ptr + 14, d15); - _mm512_storeu_si512((__m512i *) ptr + 15, d16); -} + _mm512_mask_storeu_epi64((__m512i *) ptr + 15, mask, d16); + } static void sort(int64_t *ptr, size_t length); }; diff --git a/src/coreclr/src/gc/vxsort/smallsort/bitonic_sort.h b/src/coreclr/src/gc/vxsort/smallsort/bitonic_sort.h index 0e87b3742266..ba5635d4d945 100644 --- a/src/coreclr/src/gc/vxsort/smallsort/bitonic_sort.h +++ b/src/coreclr/src/gc/vxsort/smallsort/bitonic_sort.h @@ -4,16 +4,39 @@ #ifndef BITONIC_SORT_H #define BITONIC_SORT_H -#include #include "../defs.h" #include "../machine_traits.h" namespace vxsort { namespace smallsort { +using namespace std; + +// * We might read the last 4 bytes into a 128-bit vector for 64-bit element masking +// * We might read the last 8 bytes into a 128-bit vector for 32-bit element masking +// This mostly applies to debug mode, since without optimizations, most compilers +// actually execute the instruction stream _mm256_cvtepi8_epiNN + _mm_loadu_si128 as they are given. +// In contract, release/optimizing compilers, turn that very specific instruction pair to +// a more reasonable: vpmovsxbq ymm0, dword [rax*4 + mask_table_4], eliminating the 128-bit +// load completely and effectively reading 4/8 (depending if the instruction is vpmovsxb[q,d] +#if !defined(__has_feature) +#define __has_feature(a) (0) +#endif +#if !__has_feature(address_sanitizer) +const int M4_SIZE = 16; +const int M8_SIZE = 64; +#else +const int M4_SIZE = 16 + 12; +const int M8_SIZE = 64 + 8; +#endif + +extern "C" const uint8_t mask_table_4[M4_SIZE]; +extern "C" const uint8_t mask_table_8[M8_SIZE]; + template struct bitonic { public: static void sort(T* ptr, size_t length); + static void sort_alt(T* ptr, size_t length); }; } // namespace smallsort } // namespace gcsort diff --git a/src/coreclr/src/gc/vxsort/smallsort/codegen/avx2.py b/src/coreclr/src/gc/vxsort/smallsort/codegen/avx2.py index 5f941d37ff1e..7bf5b86f0413 100644 --- a/src/coreclr/src/gc/vxsort/smallsort/codegen/avx2.py +++ b/src/coreclr/src/gc/vxsort/smallsort/codegen/avx2.py @@ -201,6 +201,33 @@ def get_load_intrinsic(self, v, offset): return f"_mm256_loadu_ps(({t} const *) ((__m256 const *) {v} + {offset}))" return f"_mm256_lddqu_si256((__m256i const *) {v} + {offset});" + def get_mask_load_intrinsic(self, v, offset, mask): + t = self.type + + if self.vector_size() == 4: + int_suffix = "epi64" + max_value = f"_mm256_andnot_si256({mask}, _mm256_set1_epi64x(MAX))" + elif self.vector_size() == 8: + int_suffix = "epi32" + max_value = f"_mm256_andnot_si256({mask}, _mm256_set1_epi32(MAX))" + + if t == "double": + max_value = f"_mm256_andnot_pd(i2d(mask), _mm256_set1_pd(MAX))" + load = f"_mm256_maskload_pd(({t} const *) ((__m256d const *) {v} + {offset}), {mask})" + return f"_mm256_or_pd({load}, {max_value})" + if t == "float": + max_value = f"_mm256_andnot_ps(i2s(mask), _mm256_set1_ps(MAX))" + load = f"_mm256_maskload_ps(({t} const *) ((__m256 const *) {v} + {offset}), {mask})" + return f"_mm256_or_ps({load}, {max_value})" + + + if t == "int64_t" or t == "uint64_t": + it = "long long" + else: + it = t[1:] if t[0] == 'u' else t + + load = f"_mm256_maskload_{int_suffix}(({it} const *) ((__m256i const *) {v} + {offset}), {mask})" + return f"_mm256_or_si256({load}, {max_value})" def get_store_intrinsic(self, ptr, offset, value): t = self.type @@ -210,6 +237,26 @@ def get_store_intrinsic(self, ptr, offset, value): return f"_mm256_storeu_ps(({t} *) ((__m256 *) {ptr} + {offset}), {value})" return f"_mm256_storeu_si256((__m256i *) {ptr} + {offset}, {value})" + def get_mask_store_intrinsic(self, ptr, offset, value, mask): + t = self.type + + if self.vector_size() == 4: + int_suffix = "epi64" + elif self.vector_size() == 8: + int_suffix = "epi32" + + if t == "double": + return f"_mm256_maskstore_pd(({t} *) ((__m256d *) {ptr} + {offset}), {mask}, {value})" + if t == "float": + return f"_mm256_maskstore_ps(({t} *) ((__m256 *) {ptr} + {offset}), {mask}, {value})" + + if t == "int64_t" or t == "uint64_t": + it = "long long" + else: + it = t[1:] if t[0] == 'u' else t; + return f"_mm256_maskstore_{int_suffix}(({it} *) ((__m256i *) {ptr} + {offset}), {mask}, {value})" + + def autogenerated_blabber(self): return f"""///////////////////////////////////////////////////////////////////////////// //// @@ -235,6 +282,7 @@ def generate_prologue(self, f): #endif #endif +#include #include #include "bitonic_sort.h" @@ -247,7 +295,13 @@ def generate_prologue(self, f): namespace vxsort {{ namespace smallsort {{ + +extern "C" const uint8_t mask_table_4[16]; +extern "C" const uint8_t mask_table_8[64]; + template<> struct bitonic<{t}, AVX2> {{ + static const int N = {self.vector_size()}; + static constexpr {t} MAX = std::numeric_limits<{t}>::max(); public: """ print(s, file=f) @@ -416,7 +470,7 @@ def generate_compounded_merger(self, f, width, ascending, inline): suffix = "ascending" if ascending else "descending" rev_suffix = "descending" if ascending else "ascending" - + inl = "INLINE" if inline else "NOINLINE" s = f""" static {inl} void sort_{width:02d}v_merge_{suffix}({g.generate_param_def_list(width)}) {{ @@ -440,12 +494,12 @@ def generate_compounded_merger(self, f, width, ascending, inline): print(" }", file=f) - def generate_entry_points(self, f): + def generate_entry_points_old(self, f): type = self.type g = self for m in range(1, g.max_bitonic_sort_vectors() + 1): s = f""" - static NOINLINE void sort_{m:02d}v({type} *ptr) {{""" + static NOINLINE void sort_{m:02d}v_old({type} *ptr) {{""" print(s, file=f) for l in range(0, m): @@ -459,7 +513,34 @@ def generate_entry_points(self, f): s = f" {g.get_store_intrinsic('ptr', l, f'd{l + 1:02d}')};" print(s, file=f) - print("}", file=f) + print(" }", file=f) + + def generate_entry_points(self, f): + type = self.type + g = self + for m in range(1, g.max_bitonic_sort_vectors() + 1): + s = f""" + static NOINLINE void sort_{m:02d}v_alt({type} *ptr, int remainder) {{ + const auto mask = _mm256_cvtepi8_epi{int(256 / self.vector_size())}(_mm_loadu_si128((__m128i*)(mask_table_{self.vector_size()} + remainder * N))); +""" + print(s, file=f) + + for l in range(0, m-1): + s = f" {g.vector_type()} d{l + 1:02d} = {g.get_load_intrinsic('ptr', l)};" + print(s, file=f) + s = f" {g.vector_type()} d{m:02d} = {g.get_mask_load_intrinsic('ptr', m - 1, 'mask')};" + print(s, file=f) + + s = f" sort_{m:02d}v_ascending({g.generate_param_list(1, m)});" + print(s, file=f) + + for l in range(0, m-1): + s = f" {g.get_store_intrinsic('ptr', l, f'd{l + 1:02d}')};" + print(s, file=f) + s = f" {g.get_mask_store_intrinsic('ptr', m - 1, f'd{m:02d}', 'mask')};" + print(s, file=f) + + print(" }", file=f) def generate_master_entry_point(self, f_header, f_src): @@ -473,18 +554,34 @@ def generate_master_entry_point(self, f_header, f_src): t = self.type g = self + # s = f""" static void sort_old({t} *ptr, size_t length);""" + # print(s, file=f_header) + s = f""" static void sort({t} *ptr, size_t length);""" print(s, file=f_header) - s = f"""void vxsort::smallsort::bitonic<{t}, vector_machine::AVX2 >::sort({t} *ptr, size_t length) {{ - const int N = {g.vector_size()}; - switch(length / N) {{""" + # s = f"""void vxsort::smallsort::bitonic<{t}, vector_machine::AVX2 >::sort({t} *ptr, size_t length) {{ + # switch(length / N) {{""" + # print(s, file=f_src) + # + # for m in range(1, self.max_bitonic_sort_vectors() + 1): + # s = f" case {m}: sort_{m:02d}v(ptr); break;" + # print(s, file=f_src) + # print(" }", file=f_src) + # print("}", file=f_src) + + s = f"""void vxsort::smallsort::bitonic<{t}, vector_machine::AVX2 >::sort({t} *ptr, size_t length) {{ + const auto fullvlength = length / N; + const int remainder = (int) (length - fullvlength * N); + const auto v = fullvlength + ((remainder > 0) ? 1 : 0); + switch(v) {{""" print(s, file=f_src) for m in range(1, self.max_bitonic_sort_vectors() + 1): - s = f" case {m}: sort_{m:02d}v(ptr); break;" + s = f" case {m}: sort_{m:02d}v_alt(ptr, remainder); break;" print(s, file=f_src) print(" }", file=f_src) print("}", file=f_src) + pass diff --git a/src/coreclr/src/gc/vxsort/smallsort/codegen/avx512.py b/src/coreclr/src/gc/vxsort/smallsort/codegen/avx512.py index f08fda8a4445..6cb6e9048356 100644 --- a/src/coreclr/src/gc/vxsort/smallsort/codegen/avx512.py +++ b/src/coreclr/src/gc/vxsort/smallsort/codegen/avx512.py @@ -210,6 +210,29 @@ def get_load_intrinsic(self, v, offset): return f"_mm512_loadu_ps(({t} const *) ((__m512 const *) {v} + {offset}))" return f"_mm512_loadu_si512((__m512i const *) {v} + {offset});" + def get_mask_load_intrinsic(self, v, offset, mask): + t = self.type + + if self.vector_size() == 8: + int_suffix = "epi64" + max_value = f"_mm512_set1_epi64(MAX)" + elif self.vector_size() == 16: + int_suffix = "epi32" + max_value = f"_mm512_set1_epi32(MAX)" + + if t == "double": + return f"""_mm512_mask_loadu_pd(_mm512_set1_pd(MAX), + {mask}, + ({t} const *) ((__m512d const *) {v} + {offset}))""" + elif t == "float": + return f"""_mm512_mask_loadu_ps(_mm512_set1_ps(MAX), + {mask}, + ({t} const *) ((__m512 const *) {v} + {offset}))""" + + return f"""_mm512_mask_loadu_{int_suffix}({max_value}, + {mask}, + ({t} const *) ((__m512i const *) {v} + {offset}))""" + def get_store_intrinsic(self, ptr, offset, value): t = self.type @@ -219,6 +242,20 @@ def get_store_intrinsic(self, ptr, offset, value): return f"_mm512_storeu_ps(({t} *) ((__m512 *) {ptr} + {offset}), {value})" return f"_mm512_storeu_si512((__m512i *) {ptr} + {offset}, {value})" + def get_mask_store_intrinsic(self, ptr, offset, value, mask): + t = self.type + + if self.vector_size() == 8: + int_suffix = "epi64" + elif self.vector_size() == 16: + int_suffix = "epi32" + + if t == "double": + return f"_mm512_mask_storeu_pd(({t} *) ((__m512d *) {ptr} + {offset}), {mask}, {value})" + if t == "float": + return f"_mm512_mask_storeu_ps(({t} *) ((__m512 *) {ptr} + {offset}), {mask}, {value})" + return f"_mm512_mask_storeu_{int_suffix}((__m512i *) {ptr} + {offset}, {mask}, {value})" + def autogenerated_blabber(self): return f"""///////////////////////////////////////////////////////////////////////////// //// @@ -245,6 +282,7 @@ def generate_prologue(self, f): #endif #endif +#include #include #include "bitonic_sort.h" @@ -258,6 +296,8 @@ def generate_prologue(self, f): namespace vxsort {{ namespace smallsort {{ template<> struct bitonic<{t}, AVX512> {{ + static const int N = {self.vector_size()}; + static constexpr {t} MAX = std::numeric_limits<{t}>::max(); public: """ print(s, file=f) @@ -440,12 +480,12 @@ def generate_compounded_merger(self, f, width, ascending, inline): print(" }", file=f) - def generate_entry_points(self, f): + def generate_entry_points_old(self, f): type = self.type g = self for m in range(1, g.max_bitonic_sort_vectors() + 1): s = f""" - static NOINLINE void sort_{m:02d}v({type} *ptr) {{""" + static NOINLINE void sort_{m:02d}v_old({type} *ptr) {{""" print(s, file=f) for l in range(0, m): @@ -459,7 +499,34 @@ def generate_entry_points(self, f): s = f" {g.get_store_intrinsic('ptr', l, f'd{l + 1:02d}')};" print(s, file=f) - print("}", file=f) + print(" }", file=f) + + def generate_entry_points(self, f): + type = self.type + g = self + for m in range(1, g.max_bitonic_sort_vectors() + 1): + s = f""" + static NOINLINE void sort_{m:02d}v_alt({type} *ptr, int remainder) {{ + const auto mask = 0x{((1 << self.vector_size()) - 1):X} >> ((N - remainder) & (N-1)); +""" + print(s, file=f) + + for l in range(0, m-1): + s = f" {g.vector_type()} d{l + 1:02d} = {g.get_load_intrinsic('ptr', l)};" + print(s, file=f) + s = f" {g.vector_type()} d{m:02d} = {g.get_mask_load_intrinsic('ptr', m - 1, 'mask')};" + print(s, file=f) + + s = f" sort_{m:02d}v_ascending({g.generate_param_list(1, m)});" + print(s, file=f) + + for l in range(0, m-1): + s = f" {g.get_store_intrinsic('ptr', l, f'd{l + 1:02d}')};" + print(s, file=f) + s = f" {g.get_mask_store_intrinsic('ptr', m - 1, f'd{m:02d}', 'mask')};" + print(s, file=f) + + print(" }", file=f) def generate_master_entry_point(self, f_header, f_src): @@ -473,18 +540,35 @@ def generate_master_entry_point(self, f_header, f_src): t = self.type g = self + # s = f""" static void sort_old({t} *ptr, size_t length);""" + # print(s, file=f_header) + s = f""" static void sort({t} *ptr, size_t length);""" print(s, file=f_header) - s = f"""void vxsort::smallsort::bitonic<{t}, vector_machine::AVX512 >::sort({t} *ptr, size_t length) {{ - const int N = {g.vector_size()}; - switch(length / N) {{""" + # s = f"""void vxsort::smallsort::bitonic<{t}, vector_machine::AVX512 >::sort_old({t} *ptr, size_t length) {{ + # switch(length / N) {{""" + # print(s, file=f_src) + # + # for m in range(1, self.max_bitonic_sort_vectors() + 1): + # s = f" case {m}: sort_{m:02d}v(ptr); break;" + # print(s, file=f_src) + # print(" }", file=f_src) + # print("}", file=f_src) + + + s = f"""void vxsort::smallsort::bitonic<{t}, vector_machine::AVX512 >::sort({t} *ptr, size_t length) {{ + const auto fullvlength = length / N; + const int remainder = (int) (length - fullvlength * N); + const auto v = fullvlength + ((remainder > 0) ? 1 : 0); + switch(v) {{""" print(s, file=f_src) for m in range(1, self.max_bitonic_sort_vectors() + 1): - s = f" case {m}: sort_{m:02d}v(ptr); break;" + s = f" case {m}: sort_{m:02d}v_alt(ptr, remainder); break;" print(s, file=f_src) print(" }", file=f_src) + print("}", file=f_src) pass diff --git a/src/coreclr/src/gc/vxsort/smallsort/codegen/bitonic_gen.py b/src/coreclr/src/gc/vxsort/smallsort/codegen/bitonic_gen.py index 4681e4986c3f..55ef7bbc0f4c 100644 --- a/src/coreclr/src/gc/vxsort/smallsort/codegen/bitonic_gen.py +++ b/src/coreclr/src/gc/vxsort/smallsort/codegen/bitonic_gen.py @@ -10,8 +10,8 @@ # usage: bitonic_gen.py [-h] [--vector-isa VECTOR_ISA [VECTOR_ISA ...]] # [--break-inline BREAK_INLINE] [--output-dir OUTPUT_DIR] # -# the files in src/coreclr/src/gc/vxsort/smallsort checked in can be generated with: -# python bitonic_gen.py --output-dir c:\temp --vector-isa AVX2 AVX512 +# the files in src/coreclr/src/gc/vxsort/smallsort that are currently checked in can be generated with: +# python bitonic_gen.py --output-dir c:\temp --vector-isa AVX2 AVX512 --break-inline 4 # import argparse import os @@ -55,7 +55,7 @@ def generate_per_type(f_header, f_src, type, vector_isa, break_inline): for width in range(2, g.max_bitonic_sort_vectors() + 1): # Allow breaking the inline chain once in a while (configurable) - if break_inline == 0 or width & break_inline != 0: + if break_inline == 0 or width % break_inline != 0: inline = True else: inline = False @@ -65,6 +65,7 @@ def generate_per_type(f_header, f_src, type, vector_isa, break_inline): g.generate_compounded_merger(f_header, width, ascending=True, inline=inline) g.generate_compounded_merger(f_header, width, ascending=False, inline=inline) + #g.generate_entry_points_old(f_header) g.generate_entry_points(f_header) g.generate_master_entry_point(f_header, f_src) g.generate_epilogue(f_header) diff --git a/src/coreclr/src/gc/vxsort/vxsort.h b/src/coreclr/src/gc/vxsort/vxsort.h index 35812d9356f3..b8eaac51f421 100644 --- a/src/coreclr/src/gc/vxsort/vxsort.h +++ b/src/coreclr/src/gc/vxsort/vxsort.h @@ -17,22 +17,28 @@ #include #include - #include "defs.h" -//#include "isa_detection.h" #include "alignment.h" #include "machine_traits.h" +#ifdef VXSORT_STATS +#include "vxsort_stats.h" +#endif //VXSORT_STATS +#include "packer.h" #include "smallsort/bitonic_sort.h" -//#include -//#include -//#include - namespace vxsort { using vxsort::smallsort::bitonic; - -template +/** + * sort primitives, quickly + * @tparam T The primitive type being sorted + * @tparam M The vector machine model/ISA (e.g. AVX2, AVX512 etc.) + * @tparam Unroll The unroll factor, controls to some extent, the code-bloat/speedup ration at the call site + * Defaults to 1 + * @tparam Shift Optional; specify how many LSB bits are known to be zero in the original input. Can be used + * to further speed up sorting. + */ +template class vxsort { static_assert(Unroll >= 1, "Unroll can be in the range 1..12"); static_assert(Unroll <= 12, "Unroll can be in the range 1..12"); @@ -40,6 +46,7 @@ class vxsort { private: using MT = vxsort_machine_traits; typedef typename MT::TV TV; + typedef typename MT::TPACK TPACK; typedef alignment_hint AH; static const int ELEMENT_ALIGN = sizeof(T) - 1; @@ -64,6 +71,18 @@ class vxsort { static const int PARTITION_TMP_SIZE_IN_ELEMENTS = (2 * SLACK_PER_SIDE_IN_ELEMENTS + N + 4*N); + void reset(T* start, T* end) { + _depth = 0; + _startPtr = start; + _endPtr = end; + } + + T* _startPtr = nullptr; + T* _endPtr = nullptr; + + T _temp[PARTITION_TMP_SIZE_IN_ELEMENTS]; + int _depth = 0; + static int floor_log2_plus_one(size_t n) { auto result = 0; while (n >= 1) { @@ -83,18 +102,6 @@ class vxsort { swap(left, right); } - static void insertion_sort(T* lo, T* hi) { - for (auto i = lo + 1; i <= hi; i++) { - auto j = i; - auto t = *i; - while ((j > lo) && (t < *(j - 1))) { - *j = *(j - 1); - j--; - } - *j = t; - } - } - static void heap_sort(T* lo, T* hi) { size_t n = hi - lo + 1; for (size_t i = n / 2; i >= 1; i--) { @@ -122,18 +129,6 @@ class vxsort { *(lo + i - 1) = d; } - void reset(T* start, T* end) { - _depth = 0; - _startPtr = start; - _endPtr = end; - } - - T* _startPtr = nullptr; - T* _endPtr = nullptr; - - T _temp[PARTITION_TMP_SIZE_IN_ELEMENTS]; - int _depth = 0; - NOINLINE T* align_left_scalar_uncommon(T* read_left, T pivot, T*& tmp_left, T*& tmp_right) { @@ -172,8 +167,8 @@ class vxsort { return readRight; } - void sort(T* left, T* right, AH realignHint, - int depthLimit) { + void sort(T* left, T* right, T left_hint, T right_hint, AH realignHint, + int depth_limit) { auto length = (size_t)(right - left + 1); T* mid; @@ -194,16 +189,11 @@ class vxsort { // Go to insertion sort below this threshold if (length <= SMALL_SORT_THRESHOLD_ELEMENTS) { - - auto nextLength = (length & (N-1)) > 0 ? (length + N) & ~(N-1) : length; - - auto extraSpaceNeeded = nextLength - length; - auto fakeLeft = left - extraSpaceNeeded; - if (fakeLeft >= _startPtr) { - bitonic::sort(fakeLeft, nextLength); - } else { - insertion_sort(left, right); - } +#ifdef VXSORT_STATS + vxsort_stats::bump_small_sorts(); + vxsort_stats::record_small_sort_size(length); +#endif + bitonic::sort(left, length); return; } @@ -211,12 +201,24 @@ class vxsort { // will not do well: // 1. Reverse sorted array // 2. High degree of repeated values (dutch flag problem, one value) - if (depthLimit == 0) { + if (depth_limit == 0) { heap_sort(left, right); _depth--; return; } - depthLimit--; + + depth_limit--; + + + if (MT::supports_packing()) { + if (MT::template can_pack(right_hint - left_hint)) { + packer::pack(left, length, left_hint); + auto packed_sorter = vxsort(); + packed_sorter.sort((TPACK *) left, ((TPACK *) left) + length - 1); + packer::unpack((TPACK *) left, length, left_hint); + return; + } + } // This is going to be a bit weird: // Pre/Post alignment calculations happen here: we prepare hints to the @@ -274,11 +276,9 @@ class vxsort { vectorized_partition(left, right, realignHint) : vectorized_partition(left, right, realignHint); - - _depth++; - sort(left, sep - 2, realignHint.realign_right(), depthLimit); - sort(sep, right, realignHint.realign_left(), depthLimit); + sort(left, sep - 2, left_hint, *sep, realignHint.realign_right(), depth_limit); + sort(sep, right, *(sep - 2), right_hint, realignHint.realign_left(), depth_limit); _depth--; } @@ -287,6 +287,10 @@ class vxsort { const TV& P, T*& left, T*& right) { +#ifdef VXSORT_STATS + vxsort_stats::bump_vec_loads(); + vxsort_stats::bump_vec_stores(2); +#endif if (MT::supports_compress_writes()) { partition_block_with_compress(dataVec, P, left, right); } else { @@ -298,6 +302,9 @@ class vxsort { const TV& P, T*& left, T*& right) { +#ifdef VXSORT_STATS + vxsort_stats::bump_perms(); +#endif auto mask = MT::get_cmpgt_mask(dataVec, P); dataVec = MT::partition_vector(dataVec, mask); MT::store_vec(reinterpret_cast(left), dataVec); @@ -325,6 +332,10 @@ class vxsort { assert((reinterpret_cast(left) & ELEMENT_ALIGN) == 0); assert((reinterpret_cast(right) & ELEMENT_ALIGN) == 0); +#ifdef VXSORT_STATS + vxsort_stats::bump_partitions((right - left) + 1); +#endif + // Vectorized double-pumped (dual-sided) partitioning: // We start with picking a pivot using the media-of-3 "method" // Once we have sensible pivot stored as the last element of the array @@ -505,7 +516,7 @@ class vxsort { *writeLeft++ = pivot; assert(writeLeft > left); - assert(writeLeft <= right); + assert(writeLeft <= right+1); return writeLeft; } @@ -526,6 +537,11 @@ class vxsort { const auto preAlignedLeft = (TV*) (left + leftAlign); const auto preAlignedRight = (TV*) (right + rightAlign - N); +#ifdef VXSORT_STATS + vxsort_stats::bump_vec_loads(2); + vxsort_stats::bump_vec_stores(4); +#endif + // Alignment with vectorization is tricky, so read carefully before changing code: // 1. We load data, which we might need to align, if the alignment hints // mean pre-alignment (or overlapping alignment) @@ -565,6 +581,9 @@ class vxsort { tmpStartRight -= rightAlign & rai; } else { +#ifdef VXSORT_STATS + vxsort_stats::bump_perms(2); +#endif RT0 = MT::partition_vector(RT0, rtMask); LT0 = MT::partition_vector(LT0, ltMask); MT::store_vec((TV*) tmpRight, RT0); @@ -588,10 +607,27 @@ class vxsort { } public: - NOINLINE void sort(T* left, T* right) { + /** + * Sort a given range + * @param left The left edge of the range, including + * @param right The right edge of the range, including + * @param left_hint Optional; A hint, Use to speed up the sorting operation, describing a single value that is known to be + * smaller-than, or equalt to all values contained within the provided array. + * @param right_hint Optional; A hint, Use to speed up the sorting operation, describing a single value that is known to be + * larger-than than all values contained within the provided array. + */ + NOINLINE void sort(T* left, T* right, + T left_hint = std::numeric_limits::Min(), + T right_hint = std::numeric_limits::Max()) + { +// init_isa_detection(); + +#ifdef VXSORT_STATS + vxsort_stats::bump_sorts((right - left) + 1); +#endif reset(left, right); auto depthLimit = 2 * floor_log2_plus_one(right + 1 - left); - sort(left, right, AH(), depthLimit); + sort(left, right, left_hint, right_hint, AH(), depthLimit); } }; diff --git a/src/coreclr/src/gc/vxsort/vxsort_targets_enable_avx512.h b/src/coreclr/src/gc/vxsort/vxsort_targets_enable_avx512.h index c5bfe4998a8f..38b0728d4217 100644 --- a/src/coreclr/src/gc/vxsort/vxsort_targets_enable_avx512.h +++ b/src/coreclr/src/gc/vxsort/vxsort_targets_enable_avx512.h @@ -3,9 +3,9 @@ #ifdef __GNUC__ #ifdef __clang__ -#pragma clang attribute push (__attribute__((target("avx512f"))), apply_to = any(function)) +#pragma clang attribute push (__attribute__((target("avx512f,avx512dq"))), apply_to = any(function)) #else #pragma GCC push_options -#pragma GCC target("avx512f") +#pragma GCC target("avx512f,avx512dq") #endif #endif diff --git a/src/coreclr/src/gc/windows/gcenv.windows.cpp b/src/coreclr/src/gc/windows/gcenv.windows.cpp index 4b44ca9e8d5b..53b868df6820 100644 --- a/src/coreclr/src/gc/windows/gcenv.windows.cpp +++ b/src/coreclr/src/gc/windows/gcenv.windows.cpp @@ -692,14 +692,14 @@ void* GCToOSInterface::VirtualReserve(size_t size, size_t alignment, uint32_t fl assert((alignment & (alignment - 1)) == 0); assert(alignment <= 0x10000); + DWORD memFlags = (flags & VirtualReserveFlags::WriteWatch) ? (MEM_RESERVE | MEM_WRITE_WATCH) : MEM_RESERVE; if (node == NUMA_NODE_UNDEFINED) { - DWORD memFlags = (flags & VirtualReserveFlags::WriteWatch) ? (MEM_RESERVE | MEM_WRITE_WATCH) : MEM_RESERVE; return ::VirtualAlloc (nullptr, size, memFlags, PAGE_READWRITE); } else { - return ::VirtualAllocExNuma (::GetCurrentProcess (), NULL, size, MEM_RESERVE, PAGE_READWRITE, node); + return ::VirtualAllocExNuma (::GetCurrentProcess (), NULL, size, memFlags, PAGE_READWRITE, node); } } @@ -719,7 +719,7 @@ bool GCToOSInterface::VirtualRelease(void* address, size_t size) // size - size of the virtual memory range // Return: // Starting virtual address of the committed range -void* GCToOSInterface::VirtualReserveAndCommitLargePages(size_t size) +void* GCToOSInterface::VirtualReserveAndCommitLargePages(size_t size, uint16_t node) { void* pRetVal = nullptr; @@ -736,7 +736,14 @@ void* GCToOSInterface::VirtualReserveAndCommitLargePages(size_t size) SIZE_T largePageMinimum = GetLargePageMinimum(); size = (size + (largePageMinimum - 1)) & ~(largePageMinimum - 1); - return ::VirtualAlloc(nullptr, size, MEM_RESERVE | MEM_COMMIT | MEM_LARGE_PAGES, PAGE_READWRITE); + if (node == NUMA_NODE_UNDEFINED) + { + return ::VirtualAlloc(nullptr, size, MEM_RESERVE | MEM_COMMIT | MEM_LARGE_PAGES, PAGE_READWRITE); + } + else + { + return ::VirtualAllocExNuma(::GetCurrentProcess(), NULL, size, MEM_RESERVE | MEM_COMMIT | MEM_LARGE_PAGES, PAGE_READWRITE, node); + } } // Commit virtual memory range. It must be part of a range reserved using VirtualReserve. diff --git a/src/coreclr/src/gcinfo/CMakeLists.txt b/src/coreclr/src/gcinfo/CMakeLists.txt index 3862de8633d0..f40ff3deb83a 100644 --- a/src/coreclr/src/gcinfo/CMakeLists.txt +++ b/src/coreclr/src/gcinfo/CMakeLists.txt @@ -3,6 +3,7 @@ set(CMAKE_INCLUDE_CURRENT_DIR ON) set( GCINFO_SOURCES arraylist.cpp gcinfoencoder.cpp + simplerhash.cpp ) @@ -16,11 +17,14 @@ endif(CLR_CMAKE_TARGET_ARCH_I386) convert_to_absolute_path(GCINFO_SOURCES ${GCINFO_SOURCES}) -add_library_clr(gcinfo +add_library_clr(gcinfo_obj OBJECT ${GCINFO_SOURCES} ) +add_library(gcinfo INTERFACE) +target_sources(gcinfo INTERFACE $) + add_library_clr(gcinfo_crossgen STATIC ${GCINFO_SOURCES} diff --git a/src/coreclr/src/gcinfo/simplerhash.cpp b/src/coreclr/src/gcinfo/simplerhash.cpp new file mode 100644 index 000000000000..1233b91538c2 --- /dev/null +++ b/src/coreclr/src/gcinfo/simplerhash.cpp @@ -0,0 +1,41 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#include "simplerhash.h" + +// Table of primes and their magic-number-divide constant. +// For more info see the book "Hacker's Delight" chapter 10.9 "Unsigned Division by Divisors >= 1" +// These were selected by looking for primes, each roughly twice as big as the next, having +// 32-bit magic numbers, (because the algorithm for using 33-bit magic numbers is slightly slower). +// + +const PrimeInfo primeInfo[] = +{ + PrimeInfo(9, 0x38e38e39, 1), + PrimeInfo(23, 0xb21642c9, 4), + PrimeInfo(59, 0x22b63cbf, 3), + PrimeInfo(131, 0xfa232cf3, 7), + PrimeInfo(239, 0x891ac73b, 7), + PrimeInfo(433, 0x975a751, 4), + PrimeInfo(761, 0x561e46a5, 8), + PrimeInfo(1399, 0xbb612aa3, 10), + PrimeInfo(2473, 0x6a009f01, 10), + PrimeInfo(4327, 0xf2555049, 12), + PrimeInfo(7499, 0x45ea155f, 11), + PrimeInfo(12973, 0x1434f6d3, 10), + PrimeInfo(22433, 0x2ebe18db, 12), + PrimeInfo(46559, 0xb42bebd5, 15), + PrimeInfo(96581, 0xadb61b1b, 16), + PrimeInfo(200341, 0x29df2461, 15), + PrimeInfo(415517, 0xa181c46d, 18), + PrimeInfo(861719, 0x4de0bde5, 18), + PrimeInfo(1787021, 0x9636c46f, 20), + PrimeInfo(3705617, 0x4870adc1, 20), + PrimeInfo(7684087, 0x8bbc5b83, 22), + PrimeInfo(15933877, 0x86c65361, 23), + PrimeInfo(33040633, 0x40fec79b, 23), + PrimeInfo(68513161, 0x7d605cd1, 25), + PrimeInfo(142069021, 0xf1da390b, 27), + PrimeInfo(294594427, 0x74a2507d, 27), + PrimeInfo(733045421, 0x5dbec447, 28), +}; diff --git a/src/coreclr/src/hosts/unixcoreruncommon/coreruncommon.cpp b/src/coreclr/src/hosts/unixcoreruncommon/coreruncommon.cpp index b95903062f37..5971175804d9 100644 --- a/src/coreclr/src/hosts/unixcoreruncommon/coreruncommon.cpp +++ b/src/coreclr/src/hosts/unixcoreruncommon/coreruncommon.cpp @@ -365,7 +365,7 @@ int ExecuteManagedAssembly( // libunwind library is used to unwind stack frame, but libunwind for ARM // does not support ARM vfpv3/NEON registers in DWARF format correctly. // Therefore let's disable stack unwinding using DWARF information - // See https://github.com/dotnet/coreclr/issues/6698 + // See https://github.com/dotnet/runtime/issues/6479 // // libunwind use following methods to unwind stack frame. // UNW_ARM_METHOD_ALL 0xFF diff --git a/src/coreclr/src/ilasm/CMakeLists.txt b/src/coreclr/src/ilasm/CMakeLists.txt index 13e7467e0b1e..3e6c15b657ab 100644 --- a/src/coreclr/src/ilasm/CMakeLists.txt +++ b/src/coreclr/src/ilasm/CMakeLists.txt @@ -48,7 +48,7 @@ endif(CLR_CMAKE_TARGET_WIN32) if(CLR_CMAKE_HOST_UNIX) # Need generate a right form of asmparse.cpp to avoid the following options. # Clang also produces a bad-codegen on this prebuilt file with optimization. - # https://github.com/dotnet/coreclr/issues/2305 + # https://github.com/dotnet/runtime/issues/4776 add_compile_options("$<$:-Wno-register>") add_compile_options(-Wno-array-bounds) add_compile_options(-Wno-unused-label) diff --git a/src/coreclr/src/ilasm/main.cpp b/src/coreclr/src/ilasm/main.cpp index 9eec9a853277..342720ff8534 100644 --- a/src/coreclr/src/ilasm/main.cpp +++ b/src/coreclr/src/ilasm/main.cpp @@ -639,7 +639,7 @@ extern "C" int _cdecl wmain(int argc, __in WCHAR **argv) if (bGeneratePdb && CLASSIC == pdbFormat) { // Classic PDB format is not supported on CoreCLR - // https://github.com/dotnet/coreclr/issues/2982 + // https://github.com/dotnet/runtime/issues/5051 printf("WARNING: Classic PDB format is not supported on CoreCLR.\n"); printf("Use '/PDBFMT=PORTABLE' option in order to generate portable PDB format. \n"); diff --git a/src/coreclr/src/ildasm/dasm.cpp b/src/coreclr/src/ildasm/dasm.cpp index da3d12f0f336..eecd1edbf263 100644 --- a/src/coreclr/src/ildasm/dasm.cpp +++ b/src/coreclr/src/ildasm/dasm.cpp @@ -3300,7 +3300,9 @@ void DumpGenericParsCA(mdToken tok, void* GUICookie/*=NULL*/) } //end if(g_fShowCA) } -// Sets *pbOverridingTypeSpec to TRUE if we are overriding a method declared by a type spec. +// Sets *pbOverridingTypeSpec to TRUE if we are overriding a method declared by a type spec or +// if the method has a signature which does not exactly match between the overrider and overridee. +// That case is commonly caused by covariant overrides. // In that case the syntax is slightly different (there are additional 'method' keywords). // Refer to Expert .NET 2.0 IL Assembler page 242. void PrettyPrintOverrideDecl(ULONG i, __inout __nullterminated char* szString, void* GUICookie, mdToken tkOverrider, @@ -3320,6 +3322,11 @@ void PrettyPrintOverrideDecl(ULONG i, __inout __nullterminated char* szString, v if(g_pImport->IsValidToken(tkDecl)) { + bool needsFullTokenPrint = false; + bool hasTkDeclParent = false; + + // Determine if the decl is a typespec method, in which case the "method" syntax + full token print + // must be used to generate the disassembly. if(SUCCEEDED(g_pImport->GetParentToken(tkDecl,&tkDeclParent))) { if(g_pImport->IsValidToken(tkDeclParent)) @@ -3334,20 +3341,99 @@ void PrettyPrintOverrideDecl(ULONG i, __inout __nullterminated char* szString, v { if(TypeFromToken(tkDeclParent)==mdtTypeSpec) { - szptr += sprintf_s(szptr,SZSTRING_REMAINING_SIZE(szptr), " %s ",KEYWORD("method")); - PrettyPrintToken(szString,tkDecl,g_pImport,GUICookie,tkOverrider); - - *pbOverridingTypeSpec = TRUE; - return; + needsFullTokenPrint = true; } - PrettyPrintToken(szString, tkDeclParent, g_pImport,GUICookie,tkOverrider); - strcat_s(szString, SZSTRING_SIZE,"::"); - szptr = &szString[strlen(szString)]; + hasTkDeclParent = true; } } else szptr+=sprintf_s(szptr,SZSTRING_REMAINING_SIZE(szptr),"%s",ERRORMSG("INVALID OVERRIDDEN METHOD'S PARENT TOKEN")); } + + // Determine if the sig of the decl does not match the sig of the body + // In that case the full "method" syntax must be used + if ((TypeFromToken(tkOverrider) == mdtMethodDef) && !needsFullTokenPrint) + { + PCCOR_SIGNATURE pComSigDecl; + ULONG cComSigDecl; + mdToken tkDeclSigTok = tkDecl; + bool successfullyGotDeclSig = false; + bool successfullyGotBodySig = false; + + if (TypeFromToken(tkDeclSigTok) == mdtMethodSpec) + { + mdToken meth=0; + if (SUCCEEDED(g_pImport->GetMethodSpecProps(tkDeclSigTok, &meth, NULL, NULL))) + { + tkDeclSigTok = meth; + } + } + + if (TypeFromToken(tkDeclSigTok) == mdtMethodDef) + { + if (SUCCEEDED(g_pImport->GetSigOfMethodDef(tkDeclSigTok, &cComSigDecl, &pComSigDecl))) + { + successfullyGotDeclSig = true; + } + } + else if (TypeFromToken(tkDeclSigTok) == mdtMemberRef) + { + const char *pszMemberNameUnused; + if (SUCCEEDED(g_pImport->GetNameAndSigOfMemberRef( + tkDeclSigTok, + &pComSigDecl, + &cComSigDecl, + &pszMemberNameUnused))) + { + successfullyGotDeclSig = true; + } + } + + PCCOR_SIGNATURE pComSigBody; + ULONG cComSigBody; + if (SUCCEEDED(g_pImport->GetSigOfMethodDef(tkOverrider, &cComSigBody, &pComSigBody))) + { + successfullyGotBodySig = true; + } + + if (successfullyGotDeclSig && successfullyGotBodySig) + { + if (cComSigBody != cComSigDecl) + { + needsFullTokenPrint = true; + } + else if (memcmp(pComSigBody, pComSigDecl, cComSigBody) != 0) + { + needsFullTokenPrint = true; + } + + // Signature are binary identical, full sig printing not needed + } + else + { + szptr+=sprintf_s(szptr,SZSTRING_REMAINING_SIZE(szptr),"%s",ERRORMSG("INVALID BODY OR DECL SIG")); + } + } + + if (needsFullTokenPrint) + { + // In this case, the shortcut syntax cannot be used, and a full token must be printed. + // Print the full token and return. + szptr += sprintf_s(szptr,SZSTRING_REMAINING_SIZE(szptr), " %s ",KEYWORD("method")); + PrettyPrintToken(szString,tkDecl,g_pImport,GUICookie,tkOverrider); + + *pbOverridingTypeSpec = TRUE; + return; + } + + if (hasTkDeclParent) + { + // If the tkDeclParent was successfully retrieved during parent discovery print it here. + PrettyPrintToken(szString, tkDeclParent, g_pImport,GUICookie,tkOverrider); + strcat_s(szString, SZSTRING_SIZE,"::"); + szptr = &szString[strlen(szString)]; + } + if(TypeFromToken(tkDecl) == mdtMethodSpec) { mdToken meth=0; diff --git a/src/coreclr/src/inc/CMakeLists.txt b/src/coreclr/src/inc/CMakeLists.txt index 817622012516..4f75d3a882d4 100644 --- a/src/coreclr/src/inc/CMakeLists.txt +++ b/src/coreclr/src/inc/CMakeLists.txt @@ -58,7 +58,9 @@ if(FEATURE_JIT_PITCHING) endif(FEATURE_JIT_PITCHING) # Compile *_i.cpp to lib -_add_library(corguids OBJECT ${CORGUIDS_SOURCES}) +_add_library(corguids_obj OBJECT ${CORGUIDS_SOURCES}) +add_library(corguids INTERFACE) +target_sources(corguids INTERFACE $) # Binplace the inc files for packaging later. @@ -75,4 +77,3 @@ _install (FILES cfi.h gcinfoencoder.h gcinfotypes.h DESTINATION inc) -_install (TARGETS corguids DESTINATION lib) diff --git a/src/coreclr/src/inc/clrconfigvalues.h b/src/coreclr/src/inc/clrconfigvalues.h index ddb1db3ce647..cad4f8bae96d 100644 --- a/src/coreclr/src/inc/clrconfigvalues.h +++ b/src/coreclr/src/inc/clrconfigvalues.h @@ -553,6 +553,8 @@ RETAIL_CONFIG_DWORD_INFO(INTERNAL_ThreadSuspendInjection, W("INTERNAL_ThreadSusp RETAIL_CONFIG_DWORD_INFO(INTERNAL_DefaultStackSize, W("DefaultStackSize"), 0, "Stack size to use for new VM threads when thread is created with default stack size (dwStackSize == 0).") RETAIL_CONFIG_DWORD_INFO(INTERNAL_Thread_DeadThreadCountThresholdForGCTrigger, W("Thread_DeadThreadCountThresholdForGCTrigger"), 75, "In the heuristics to clean up dead threads, this threshold must be reached before triggering a GC will be considered. Set to 0 to disable triggering a GC based on dead threads.") RETAIL_CONFIG_DWORD_INFO(INTERNAL_Thread_DeadThreadGCTriggerPeriodMilliseconds, W("Thread_DeadThreadGCTriggerPeriodMilliseconds"), 1000 * 60 * 30, "In the heuristics to clean up dead threads, this much time must have elapsed since the previous max-generation GC before triggering another GC will be considered") +RETAIL_CONFIG_DWORD_INFO(EXTERNAL_Thread_UseAllCpuGroups, W("Thread_UseAllCpuGroups"), 0, "Specifies whether to query and use CPU group information for determining the processor count.") +RETAIL_CONFIG_DWORD_INFO(EXTERNAL_Thread_AssignCpuGroups, W("Thread_AssignCpuGroups"), 1, "Specifies whether to automatically distribute threads created by the CLR across CPU Groups. Effective only when Thread_UseAllCpuGroups and GCCpuGroup are enabled.") /// /// Threadpool @@ -568,7 +570,6 @@ RETAIL_CONFIG_DWORD_INFO(INTERNAL_ThreadPool_UnfairSemaphoreSpinLimit, W("Thread #else // !TARGET_ARM64 RETAIL_CONFIG_DWORD_INFO(INTERNAL_ThreadPool_UnfairSemaphoreSpinLimit, W("ThreadPool_UnfairSemaphoreSpinLimit"), 0x46, "Maximum number of spins a thread pool worker thread performs before waiting for work") #endif // TARGET_ARM64 -RETAIL_CONFIG_DWORD_INFO(EXTERNAL_Thread_UseAllCpuGroups, W("Thread_UseAllCpuGroups"), 0, "Specifies if to automatically distribute thread across CPU Groups") CONFIG_DWORD_INFO(INTERNAL_ThreadpoolTickCountAdjustment, W("ThreadpoolTickCountAdjustment"), 0, "") @@ -691,7 +692,6 @@ RETAIL_CONFIG_STRING_INFO(INTERNAL_EventNameFilter, W("EventNameFilter"), "") /// Interop /// CONFIG_DWORD_INFO_DIRECT_ACCESS(INTERNAL_ExposeExceptionsInCOM, W("ExposeExceptionsInCOM"), "") -RETAIL_CONFIG_STRING_INFO_EX(EXTERNAL_PInvokeInline, W("PInvokeInline"), "", CLRConfig::EEConfig_default) RETAIL_CONFIG_DWORD_INFO(UNSUPPORTED_InteropValidatePinnedObjects, W("InteropValidatePinnedObjects"), 0, "After returning from a managed-to-unmanaged interop call, validate GC heap around objects pinned by IL stubs.") RETAIL_CONFIG_DWORD_INFO(EXTERNAL_InteropLogArguments, W("InteropLogArguments"), 0, "Log all pinned arguments passed to an interop call") RETAIL_CONFIG_STRING_INFO(UNSUPPORTED_LogCCWRefCountChange, W("LogCCWRefCountChange"), "Outputs debug information and calls LogCCWRefCountChange_BREAKPOINT when AddRef or Release is called on a CCW.") @@ -709,11 +709,19 @@ RETAIL_CONFIG_DWORD_INFO(INTERNAL_EventPipeRundown, W("EventPipeRundown"), 1, "E RETAIL_CONFIG_DWORD_INFO(INTERNAL_EventPipeCircularMB, W("EventPipeCircularMB"), 1024, "The EventPipe circular buffer size in megabytes.") RETAIL_CONFIG_DWORD_INFO(INTERNAL_EventPipeProcNumbers, W("EventPipeProcNumbers"), 0, "Enable/disable capturing processor numbers in EventPipe event headers") +// +// Generational Aware Analysis +// +RETAIL_CONFIG_DWORD_INFO(INTERNAL_GCGenAnalysisGen, W("GCGenAnalysisGen"), 0, "The generation to trigger generational aware analysis") +RETAIL_CONFIG_DWORD_INFO(INTERNAL_GCGenAnalysisBytes, W("GCGenAnalysisBytes"), 0, "The number of bytes to trigger generational aware analysis") +RETAIL_CONFIG_DWORD_INFO(INTERNAL_GCGenAnalysisIndex, W("GCGenAnalysisIndex"), 0, "The gc index to trigger generational aware analysis") +RETAIL_CONFIG_STRING_INFO(INTERNAL_GCGenAnalysisCmd, W("GCGenAnalysisCmd"), "An optional filter to match with the command line used to spawn the process") + // -// Diagnostics Server +// Diagnostics Ports // -RETAIL_CONFIG_STRING_INFO_EX(EXTERNAL_DOTNET_DiagnosticsMonitorAddress, W("DOTNET_DiagnosticsMonitorAddress"), "NamedPipe path without '\\\\.\\pipe\\' on Windows; Full path of Unix Domain Socket on Linux/Unix. Used for Diagnostics Monitoring Agents.", CLRConfig::DontPrependCOMPlus_); -RETAIL_CONFIG_DWORD_INFO_EX(EXTERNAL_DOTNET_DiagnosticsMonitorPauseOnStart, W("DOTNET_DiagnosticsMonitorPauseOnStart"), 1, "If DOTNET_DiagnosticsMonitorAddress is set, this will cause the runtime to pause during startup. Resume using the Diagnostics IPC ResumeStartup command.", CLRConfig::DontPrependCOMPlus_); +RETAIL_CONFIG_DWORD_INFO_EX(EXTERNAL_DOTNET_DefaultDiagnosticPortSuspend, W("DOTNET_DefaultDiagnosticPortSuspend"), 0, "This sets the deafult diagnostic port to suspend causing the runtime to pause during startup before major subsystems are started. Resume using the Diagnostics IPC ResumeStartup command on the default diagnostic port.", CLRConfig::DontPrependCOMPlus_); +RETAIL_CONFIG_STRING_INFO_EX(EXTERNAL_DOTNET_DiagnosticPorts, W("DOTNET_DiagnosticPorts"), "A semicolon delimited list of additional Diagnostic Ports, where a Diagnostic Port is a NamedPipe path without '\\\\.\\pipe\\' on Windows or the full path of Unix Domain Socket on Linux/Unix followed by optional tags, e.g., ',connect,nosuspend;'", CLRConfig::DontPrependCOMPlus_); // // LTTng diff --git a/src/coreclr/src/inc/cor.h b/src/coreclr/src/inc/cor.h index 8d2b63294602..035ba44e7f87 100644 --- a/src/coreclr/src/inc/cor.h +++ b/src/coreclr/src/inc/cor.h @@ -2093,7 +2093,7 @@ inline ULONG CorSigUncompressData( // return number of bytes of that compre } -constexpr mdToken g_tkCorEncodeToken[4] ={mdtTypeDef, mdtTypeRef, mdtTypeSpec, mdtBaseType}; +extern const mdToken g_tkCorEncodeToken[]; // uncompress a token inline mdToken CorSigUncompressToken( // return the token. diff --git a/src/coreclr/src/inc/corcompile.h b/src/coreclr/src/inc/corcompile.h index f02a7a1475d0..82308d7b899a 100644 --- a/src/coreclr/src/inc/corcompile.h +++ b/src/coreclr/src/inc/corcompile.h @@ -1412,7 +1412,7 @@ class ICorCompileInfo // So, the host must call StartupAsCompilationProcess before compiling // any code, and Shutdown after finishing. // - // The arguments control which native image of mscorlib to use. + // The arguments control which native image of CoreLib to use. // This matters for hardbinding. // @@ -1548,8 +1548,8 @@ class ICorCompileInfo mdFieldDef *token ) = 0; - // Get the loader module for mscorlib - virtual CORINFO_MODULE_HANDLE GetLoaderModuleForMscorlib() = 0; + // Get the loader module for CoreLib + virtual CORINFO_MODULE_HANDLE GetLoaderModuleForCoreLib() = 0; // Get the loader module for a type (where the type is regarded as // living for the purposes of loading, unloading, and ngen). diff --git a/src/coreclr/src/inc/cordebug.idl b/src/coreclr/src/inc/cordebug.idl index 7ab2867d9f28..efecf3b55063 100644 --- a/src/coreclr/src/inc/cordebug.idl +++ b/src/coreclr/src/inc/cordebug.idl @@ -2361,7 +2361,7 @@ interface ICorDebugAppDomain4 : IUnknown * Assembly interface * An ICorDebugAssembly instance corresponds to a a managed assembly loaded * into a specific AppDomain in the CLR. For assemblies shared between multiple - * AppDomains (eg. mscorlib), there will be a separate ICorDebugAssembly instance + * AppDomains (eg. CoreLib), there will be a separate ICorDebugAssembly instance * per AppDomain in which it is used. * ------------------------------------------------------------------------- */ [ @@ -5173,6 +5173,30 @@ interface ICorDebugModule3 : IUnknown [out][iid_is(riid)] void **ppObj); } +/* + * ICorDebugModule4 is a logical extension to ICorDebugModule. + */ +[ + object, + local, + uuid(FF8B8EAF-25CD-4316-8859-84416DE4402E), + pointer_default(unique) +] +interface ICorDebugModule4 : IUnknown +{ + /* + * Query to see if the module is loaded into memory in mapped/hydrated format + * + * Arguments: + * pIsMapped - BOOL to store mapping information. TRUE will represent mapped + format while FALSE represents flat format. + * Return Value: + * S_OK in successful case. + * Notes: + */ + HRESULT IsMappedLayout([out] BOOL *pIsMapped); +} + /* * ICorDebugRuntimeUnwindableFrame is a specialized interface of ICorDebugFrame for unmanaged methods * which requires special knowledge to unwind. They are not jitted code. When the debugger sees this type @@ -5196,7 +5220,7 @@ interface ICorDebugRuntimeUnwindableFrame : ICorDebugFrame * specific AppDomain. Normally this is an executable or a DLL, but it may also be * some other file of a multi-module assembly. There is an ICorDebugModule instance * for each AppDomain a module is loaded into, even in the case of shared modules like - * mscorlib. + * CoreLib. */ [ diff --git a/src/coreclr/src/inc/corerror.xml b/src/coreclr/src/inc/corerror.xml index 94fe6a1ddcea..b1a0b5b53ca0 100644 --- a/src/coreclr/src/inc/corerror.xml +++ b/src/coreclr/src/inc/corerror.xml @@ -2201,12 +2201,6 @@ File is PE32 - - NGEN_E_SYS_ASM_NI_MISSING - "NGen cannot proceed because Mscorlib.dll does not have a native image" - Compiling any assembly other than mscorlib in the absence of mscorlib.ni.dll is not allowed. - - CLDB_E_INTERNALERROR @@ -2249,8 +2243,8 @@ CLR_E_BIND_SYS_ASM_NI_MISSING - "Could not use native image because Mscorlib.dll is missing a native image" - Returned when loading an assembly that only has a native image and no IL and cannot hardbind to mscorlib.ni.dll. + "Could not use native image because System.Private.CoreLib.dll is missing a native image" + Returned when loading an assembly that only has a native image and no IL and cannot hardbind to System.Private.CoreLib.ni.dll. diff --git a/src/coreclr/src/inc/corexcep.h b/src/coreclr/src/inc/corexcep.h index faca6b49c593..e1c00b50b321 100644 --- a/src/coreclr/src/inc/corexcep.h +++ b/src/coreclr/src/inc/corexcep.h @@ -13,7 +13,7 @@ // All COM+ exceptions are expressed as a RaiseException with this exception // code. If you change this value, you must also change -// mscorlib\src\system\Exception.cs's _COMPlusExceptionCode value. +// Exception.cs's _COMPlusExceptionCode value. #define EXCEPTION_MSVC 0xe06d7363 // 0xe0000000 | 'msc' diff --git a/src/coreclr/src/inc/corinfo.h b/src/coreclr/src/inc/corinfo.h index 7341e43358bb..7870683ecf43 100644 --- a/src/coreclr/src/inc/corinfo.h +++ b/src/coreclr/src/inc/corinfo.h @@ -208,11 +208,11 @@ TODO: Talk about initializing strutures before use // ////////////////////////////////////////////////////////////////////////////////////////////////////////// -constexpr GUID JITEEVersionIdentifier = { /* 164b4e4f-21f6-4d05-b560-3728395404f2 */ - 0x164b4e4f, - 0x21f6, - 0x4d05, - { 0xb5, 0x60, 0x37, 0x28, 0x39, 0x54, 0x04, 0xf2 } +constexpr GUID JITEEVersionIdentifier = { /* a5eec3a4-4176-43a7-8c2b-a05b551d4f49 */ + 0xa5eec3a4, + 0x4176, + 0x43a7, + {0x8c, 0x2b, 0xa0, 0x5b, 0x55, 0x1d, 0x4f, 0x49} }; ////////////////////////////////////////////////////////////////////////////////////////////////////////// @@ -891,28 +891,6 @@ enum CorInfoException enum CorInfoIntrinsics { - CORINFO_INTRINSIC_Sin, - CORINFO_INTRINSIC_Cos, - CORINFO_INTRINSIC_Cbrt, - CORINFO_INTRINSIC_Sqrt, - CORINFO_INTRINSIC_Abs, - CORINFO_INTRINSIC_Round, - CORINFO_INTRINSIC_Cosh, - CORINFO_INTRINSIC_Sinh, - CORINFO_INTRINSIC_Tan, - CORINFO_INTRINSIC_Tanh, - CORINFO_INTRINSIC_Asin, - CORINFO_INTRINSIC_Asinh, - CORINFO_INTRINSIC_Acos, - CORINFO_INTRINSIC_Acosh, - CORINFO_INTRINSIC_Atan, - CORINFO_INTRINSIC_Atan2, - CORINFO_INTRINSIC_Atanh, - CORINFO_INTRINSIC_Log10, - CORINFO_INTRINSIC_Pow, - CORINFO_INTRINSIC_Exp, - CORINFO_INTRINSIC_Ceiling, - CORINFO_INTRINSIC_Floor, CORINFO_INTRINSIC_GetChar, // fetch character out of string CORINFO_INTRINSIC_Array_GetDimLength, // Get number of elements in a given dimension of an array CORINFO_INTRINSIC_Array_Get, // Get the value of an element in an array diff --git a/src/coreclr/src/inc/corpriv.h b/src/coreclr/src/inc/corpriv.h index 52cf63a0ee07..fb0b39924cc8 100644 --- a/src/coreclr/src/inc/corpriv.h +++ b/src/coreclr/src/inc/corpriv.h @@ -235,7 +235,7 @@ typedef enum CorElementTypeZapSig // where the encoding/decoding takes place. ELEMENT_TYPE_NATIVE_VALUETYPE_ZAPSIG = 0x3d, - ELEMENT_TYPE_CANON_ZAPSIG = 0x3e, // zapsig encoding for [mscorlib]System.__Canon + ELEMENT_TYPE_CANON_ZAPSIG = 0x3e, // zapsig encoding for System.__Canon ELEMENT_TYPE_MODULE_ZAPSIG = 0x3f, // zapsig encoding for external module id# } CorElementTypeZapSig; diff --git a/src/coreclr/src/inc/dacvars.h b/src/coreclr/src/inc/dacvars.h index f480851e1b27..92f3edf77855 100644 --- a/src/coreclr/src/inc/dacvars.h +++ b/src/coreclr/src/inc/dacvars.h @@ -144,7 +144,7 @@ DEFINE_DACVAR(ULONG, PTR_GcNotification, dac__g_pGcNotificationTable, ::g_pGcNot DEFINE_DACVAR(ULONG, PTR_EEConfig, dac__g_pConfig, ::g_pConfig) -DEFINE_DACVAR(ULONG, MscorlibBinder, dac__g_Mscorlib, ::g_Mscorlib) +DEFINE_DACVAR(ULONG, CoreLibBinder, dac__g_CoreLib, ::g_CoreLib) #if defined(PROFILING_SUPPORTED) || defined(PROFILING_SUPPORTED_DATA) DEFINE_DACVAR(ULONG, ProfControlBlock, dac__g_profControlBlock, ::g_profControlBlock) diff --git a/src/coreclr/src/inc/delayloadhelpers.h b/src/coreclr/src/inc/delayloadhelpers.h deleted file mode 100644 index 160a9da30680..000000000000 --- a/src/coreclr/src/inc/delayloadhelpers.h +++ /dev/null @@ -1,112 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. -// - -// -// Contains convenience functionality for lazily loading modules -// and getting entrypoints within them. -// - -#ifndef DelayLoadHelpers_h -#define DelayLoadHelpers_h - -#include "volatile.h" - -namespace DelayLoad -{ - //================================================================================================================= - // Contains information needed to load and cache a module. Use through - // the DELAY_LOADED_MODULE macro defined below. - struct Module - { - LPCWSTR const m_wzDllName; - HMODULE m_hMod; - HRESULT m_hr; - Volatile m_fInitialized; - - // Returns a non-ref-counted HMODULE; will load the module if necessary. - // Do not FreeLibrary the returned value. - HRESULT GetValue(HMODULE *pHMODULE); - }; -} - -//===================================================================================================================== -// Use at global scope to declare a delay loaded module represented as a -// DelayLoad::Module instance. The module may then be accessed as -// 'DelayLoad::Modules::DLL_NAME'. -// -// Parameters: -// DLL_NAME - the simple name (without extension) of the DLL. -// -// Example: -// DELAY_LOADED_MODULE(Kernel32); -// void Foo() { -// HMODULE hModKernel32 = nullptr; -// IfFailThrow(DelayLoad::Modules::Kernel32.GetValue(&hModKernel32)); -// // Use hModKernel32 as needed. Do not FreeLibrary the value! -// } - -#define DELAY_LOADED_MODULE(DLL_NAME) \ - namespace DelayLoad { \ - namespace Modules { \ - constexpr Module DLL_NAME = { L#DLL_NAME W(".dll"), nullptr, S_OK, false }; \ - } \ - } - -namespace DelayLoad -{ - //================================================================================================================= - // Contains information needed to load a function pointer from a DLL. Builds - // on the DelayLoad::Module functionality, and should be used through - // the DELAY_LOADED_FUNCTION macro defined below. - struct Function - { - Module * const m_pModule; - LPCSTR const m_szFunctionName; - PVOID m_pvFunction; - HRESULT m_hr; - Volatile m_fInitialized; - - // On success, ppvFunc is set to point to the entrypoint corresponding to - // m_szFunctionName as exported from m_pModule. - HRESULT GetValue(LPVOID * ppvFunc); - - // Convenience function that does the necessary casting for you. - template inline - HRESULT GetValue(FnT ** ppFunc) - { - return GetValue(reinterpret_cast(ppFunc)); - } - }; -} - -//===================================================================================================================== -// Use at global scope to declare a delay loaded function and its associated module, -// represented as DelayLoad::Function and DelayLoad::Module instances, respectively. -// The function may then be accessed as 'DelayLoad::DLL_NAME::FUNC_NAME', and the -// module may be access as described in DELAY_LOADED_MODULE's comment. -// -// Parameters: -// DLL_NAME - unquoted simple name (without extension) of the DLL containing -// the function. -// FUNC_NAME - unquoted entrypoint name exported from the DLL. -// -// Example: -// DELAY_LOADED_FUNCTION(MyDll, MyFunction); -// HRESULT Foo(...) { -// typedef HRESULT MyFunction_t(); -// MyFunction_t * pFunc = nullptr; -// IfFailRet(DelayLoad::WinTypes::RoResolveNamespace.GetValue(&pFunc)); -// return (*pFunc)(...); -// } - -#define DELAY_LOADED_FUNCTION(DLL_NAME, FUNC_NAME) \ - DELAY_LOADED_MODULE(DLL_NAME) \ - namespace DelayLoad { \ - namespace DLL_NAME { \ - constexpr Function FUNC_NAME = { &Modules::##DLL_NAME, #FUNC_NAME, nullptr, S_OK, false }; \ - } \ - } - -#endif // DelayLoadHelpers_h - diff --git a/src/coreclr/src/inc/random.h b/src/coreclr/src/inc/random.h index 53cc17d64908..f2501c356832 100644 --- a/src/coreclr/src/inc/random.h +++ b/src/coreclr/src/inc/random.h @@ -5,8 +5,7 @@ // // -// Defines a random number generator, initially from the System.Random code in the BCL. If you notice any problems, -// please compare to the implementation in src\mscorlib\src\system\random.cs. +// Defines a random number generator, initially from the System.Random code in the BCL. // // Main advantages over rand() are: // diff --git a/src/coreclr/src/inc/readme.md b/src/coreclr/src/inc/readme.md index 1e7754ea5e07..e8f3405859e0 100644 --- a/src/coreclr/src/inc/readme.md +++ b/src/coreclr/src/inc/readme.md @@ -9,4 +9,4 @@ for midl.exe which did that conversion so we work around the issue by doing: - If needed, adjust any of the .cpp files in src\pal\prebuilt\idl\ by hand, using the corresponding artifacts\obj\Windows_NT.x64.Debug\src\inc\idls_out\*_i.c as a guide. Typically this is just adding MIDL_DEFINE_GUID(...) for any new classes/interfaces that have been added to the idl file. -Include these src changes with the remainder of your work when you submit a PR. \ No newline at end of file +Include these src changes with the remainder of your work when you submit a PR. diff --git a/src/coreclr/src/inc/readytorun.h b/src/coreclr/src/inc/readytorun.h index b883f4b558e9..e1719a2843eb 100644 --- a/src/coreclr/src/inc/readytorun.h +++ b/src/coreclr/src/inc/readytorun.h @@ -395,4 +395,13 @@ enum ReadyToRunRuntimeConstants : DWORD READYTORUN_ReversePInvokeTransitionFrameSizeInPointerUnits = 2 }; +enum ReadyToRunHFAElemType : DWORD +{ + READYTORUN_HFA_ELEMTYPE_None = 0, + READYTORUN_HFA_ELEMTYPE_Float32 = 1, + READYTORUN_HFA_ELEMTYPE_Float64 = 2, + READYTORUN_HFA_ELEMTYPE_Vector64 = 3, + READYTORUN_HFA_ELEMTYPE_Vector128 = 4, +}; + #endif // __READYTORUN_H__ diff --git a/src/coreclr/src/inc/safemath.h b/src/coreclr/src/inc/safemath.h index 84ea377c54b8..d93e4a57011e 100644 --- a/src/coreclr/src/inc/safemath.h +++ b/src/coreclr/src/inc/safemath.h @@ -154,8 +154,7 @@ inline bool DoubleFitsInIntType(double val) //----------------------------------------------------------------------------- // -// Liberally lifted from the Office example on MSDN and modified. -// http://msdn.microsoft.com/library/en-us/dncode/html/secure01142004.asp +// Liberally lifted from https://github.com/dcleblanc/SafeInt and modified. // // Modified to track an overflow bit instead of throwing exceptions. In most // cases the Visual C++ optimizer (Whidbey beta1 - v14.00.40607) is able to diff --git a/src/coreclr/src/inc/simplerhash.inl b/src/coreclr/src/inc/simplerhash.inl index 309778e91791..6694ab61212b 100644 --- a/src/coreclr/src/inc/simplerhash.inl +++ b/src/coreclr/src/inc/simplerhash.inl @@ -303,36 +303,7 @@ void SimplerHashTable::Reallocate(unsigned newTable // 32-bit magic numbers, (because the algorithm for using 33-bit magic numbers is slightly slower). // -constexpr PrimeInfo primeInfo[] = -{ - PrimeInfo(9, 0x38e38e39, 1), - PrimeInfo(23, 0xb21642c9, 4), - PrimeInfo(59, 0x22b63cbf, 3), - PrimeInfo(131, 0xfa232cf3, 7), - PrimeInfo(239, 0x891ac73b, 7), - PrimeInfo(433, 0x975a751, 4), - PrimeInfo(761, 0x561e46a5, 8), - PrimeInfo(1399, 0xbb612aa3, 10), - PrimeInfo(2473, 0x6a009f01, 10), - PrimeInfo(4327, 0xf2555049, 12), - PrimeInfo(7499, 0x45ea155f, 11), - PrimeInfo(12973, 0x1434f6d3, 10), - PrimeInfo(22433, 0x2ebe18db, 12), - PrimeInfo(46559, 0xb42bebd5, 15), - PrimeInfo(96581, 0xadb61b1b, 16), - PrimeInfo(200341, 0x29df2461, 15), - PrimeInfo(415517, 0xa181c46d, 18), - PrimeInfo(861719, 0x4de0bde5, 18), - PrimeInfo(1787021, 0x9636c46f, 20), - PrimeInfo(3705617, 0x4870adc1, 20), - PrimeInfo(7684087, 0x8bbc5b83, 22), - PrimeInfo(15933877, 0x86c65361, 23), - PrimeInfo(33040633, 0x40fec79b, 23), - PrimeInfo(68513161, 0x7d605cd1, 25), - PrimeInfo(142069021, 0xf1da390b, 27), - PrimeInfo(294594427, 0x74a2507d, 27), - PrimeInfo(733045421, 0x5dbec447, 28), -}; +extern const PrimeInfo primeInfo[27]; template PrimeInfo SimplerHashTable::NextPrime(unsigned number) diff --git a/src/coreclr/src/inc/utilcode.h b/src/coreclr/src/inc/utilcode.h index f411acb7e66a..4ed456df94c7 100644 --- a/src/coreclr/src/inc/utilcode.h +++ b/src/coreclr/src/inc/utilcode.h @@ -1272,6 +1272,7 @@ class CPUGroupInfo static WORD m_nProcessors; static BOOL m_enableGCCPUGroups; static BOOL m_threadUseAllCpuGroups; + static BOOL m_threadAssignCpuGroups; static WORD m_initialGroup; static CPU_Group_Info *m_CPUGroupInfoArray; static bool s_hadSingleProcessorAtStartup; @@ -1285,6 +1286,7 @@ class CPUGroupInfo static void EnsureInitialized(); static BOOL CanEnableGCCPUGroups(); static BOOL CanEnableThreadUseAllCpuGroups(); + static BOOL CanAssignCpuGroupsToThreads(); static WORD GetNumActiveProcessors(); static void GetGroupForProcessor(WORD processor_number, WORD *group_number, WORD *group_processor_number); @@ -4071,13 +4073,14 @@ HRESULT GetImageRuntimeVersionString(PVOID pMetaData, LPCSTR* pString); // The registry keys and values that contain the information regarding // the default registered unmanaged debugger. //***************************************************************************** -constexpr WCHAR kDebugApplicationsPoliciesKey[] = W("SOFTWARE\\Policies\\Microsoft\\Windows\\Windows Error Reporting\\DebugApplications"); -constexpr WCHAR kDebugApplicationsKey[] = W("SOFTWARE\\Microsoft\\Windows\\Windows Error Reporting\\DebugApplications"); -constexpr WCHAR kUnmanagedDebuggerKey[] = W("SOFTWARE\\Microsoft\\Windows NT\\CurrentVersion\\AeDebug"); -constexpr WCHAR kUnmanagedDebuggerValue[] = W("Debugger"); -constexpr WCHAR kUnmanagedDebuggerAutoValue[] = W("Auto"); -constexpr WCHAR kUnmanagedDebuggerAutoExclusionListKey[] = W("SOFTWARE\\Microsoft\\Windows NT\\CurrentVersion\\AeDebug\\AutoExclusionList"); +#define kDebugApplicationsPoliciesKey W("SOFTWARE\\Policies\\Microsoft\\Windows\\Windows Error Reporting\\DebugApplications") +#define kDebugApplicationsKey W("SOFTWARE\\Microsoft\\Windows\\Windows Error Reporting\\DebugApplications") + +#define kUnmanagedDebuggerKey W("SOFTWARE\\Microsoft\\Windows NT\\CurrentVersion\\AeDebug") +#define kUnmanagedDebuggerValue W("Debugger") +#define kUnmanagedDebuggerAutoValue W("Auto") +#define kUnmanagedDebuggerAutoExclusionListKey W("SOFTWARE\\Microsoft\\Windows NT\\CurrentVersion\\AeDebug\\AutoExclusionList") BOOL GetRegistryLongValue(HKEY hKeyParent, // Parent key. LPCWSTR szKey, // Key name to look at. diff --git a/src/coreclr/src/inc/zapper.h b/src/coreclr/src/inc/zapper.h index ba4f65b15dd9..b88d4d51b676 100644 --- a/src/coreclr/src/inc/zapper.h +++ b/src/coreclr/src/inc/zapper.h @@ -285,7 +285,7 @@ class Zapper ~Zapper(); - // The arguments control which native image of mscorlib to use. + // The arguments control which native image of CoreLib to use. // This matters for hardbinding. void InitEE(BOOL fForceDebug, BOOL fForceProfile, BOOL fForceInstrument); void LoadAndInitializeJITForNgen(LPCWSTR pwzJitName, OUT HINSTANCE* phJit, OUT ICorJitCompiler** ppICorJitCompiler); diff --git a/src/coreclr/src/interop/CMakeLists.txt b/src/coreclr/src/interop/CMakeLists.txt index b8a0e769318d..1642f55a04da 100644 --- a/src/coreclr/src/interop/CMakeLists.txt +++ b/src/coreclr/src/interop/CMakeLists.txt @@ -30,7 +30,10 @@ endif(WIN32) convert_to_absolute_path(INTEROP_SOURCES ${INTEROP_SOURCES}) -add_library_clr(interop +add_library_clr(interop_obj OBJECT ${INTEROP_SOURCES} ) + +add_library(interop INTERFACE) +target_sources(interop INTERFACE $) \ No newline at end of file diff --git a/src/coreclr/src/jit/CMakeLists.txt b/src/coreclr/src/jit/CMakeLists.txt index 15aa4d59b63c..3f695823be4f 100644 --- a/src/coreclr/src/jit/CMakeLists.txt +++ b/src/coreclr/src/jit/CMakeLists.txt @@ -54,6 +54,7 @@ set( JIT_SOURCES instr.cpp jitconfig.cpp jiteh.cpp + jithashtable.cpp jittelemetry.cpp lclmorph.cpp lclvars.cpp diff --git a/src/coreclr/src/jit/_typeinfo.h b/src/coreclr/src/jit/_typeinfo.h index 4bc90e50dbe6..26173db3fae4 100644 --- a/src/coreclr/src/jit/_typeinfo.h +++ b/src/coreclr/src/jit/_typeinfo.h @@ -42,7 +42,7 @@ enum ti_types namespace { #endif // _MSC_VER -constexpr char* g_ti_type_names_map[] = { +const char* g_ti_type_names_map[] = { #define DEF_TI(ti, nm) nm, #include "titypes.h" #undef DEF_TI @@ -57,7 +57,7 @@ constexpr char* g_ti_type_names_map[] = { namespace { #endif // _MSC_VER -constexpr ti_types g_jit_types_map[] = { +const ti_types g_jit_types_map[] = { #define DEF_TP(tn, nm, jitType, verType, sz, sze, asze, st, al, tf, howUsed) verType, #include "typelist.h" #undef DEF_TP @@ -92,7 +92,7 @@ inline ti_types varType2tiType(var_types type) namespace { #endif // _MSC_VER -constexpr ti_types g_ti_types_map[CORINFO_TYPE_COUNT] = { +const ti_types g_ti_types_map[CORINFO_TYPE_COUNT] = { // see the definition of enum CorInfoType in file inc/corinfo.h TI_ERROR, // CORINFO_TYPE_UNDEF = 0x0, TI_ERROR, // CORINFO_TYPE_VOID = 0x1, diff --git a/src/coreclr/src/jit/codegen.h b/src/coreclr/src/jit/codegen.h index d6e14d7a308c..e876b155ce35 100644 --- a/src/coreclr/src/jit/codegen.h +++ b/src/coreclr/src/jit/codegen.h @@ -341,12 +341,12 @@ class CodeGen final : public CodeGenInterface void genSaveCalleeSavedRegistersHelp(regMaskTP regsToSaveMask, int lowestCalleeSavedOffset, int spDelta); void genRestoreCalleeSavedRegistersHelp(regMaskTP regsToRestoreMask, int lowestCalleeSavedOffset, int spDelta); - void genPushCalleeSavedRegisters(regNumber initReg, bool* pInitRegModified); + void genPushCalleeSavedRegisters(regNumber initReg, bool* pInitRegZeroed); #else void genPushCalleeSavedRegisters(); #endif - void genAllocLclFrame(unsigned frameSize, regNumber initReg, bool* pInitRegModified, regMaskTP maskArgRegsLiveIn); + void genAllocLclFrame(unsigned frameSize, regNumber initReg, bool* pInitRegZeroed, regMaskTP maskArgRegsLiveIn); #if defined(TARGET_ARM) @@ -434,18 +434,18 @@ class CodeGen final : public CodeGenInterface void genZeroInitFltRegs(const regMaskTP& initFltRegs, const regMaskTP& initDblRegs, const regNumber& initReg); - regNumber genGetZeroReg(regNumber initReg, bool* pInitRegModified); + regNumber genGetZeroReg(regNumber initReg, bool* pInitRegZeroed); - void genZeroInitFrame(int untrLclHi, int untrLclLo, regNumber initReg, bool* pInitRegModified); + void genZeroInitFrame(int untrLclHi, int untrLclLo, regNumber initReg, bool* pInitRegZeroed); - void genReportGenericContextArg(regNumber initReg, bool* pInitRegModified); + void genReportGenericContextArg(regNumber initReg, bool* pInitRegZeroed); - void genSetGSSecurityCookie(regNumber initReg, bool* pInitRegModified); + void genSetGSSecurityCookie(regNumber initReg, bool* pInitRegZeroed); void genFinalizeFrame(); #ifdef PROFILING_SUPPORTED - void genProfilingEnterCallback(regNumber initReg, bool* pInitRegModified); + void genProfilingEnterCallback(regNumber initReg, bool* pInitRegZeroed); void genProfilingLeaveCallback(unsigned helper); #endif // PROFILING_SUPPORTED @@ -511,7 +511,7 @@ class CodeGen final : public CodeGenInterface void genFuncletEpilog(); void genCaptureFuncletPrologEpilogInfo(); - void genSetPSPSym(regNumber initReg, bool* pInitRegModified); + void genSetPSPSym(regNumber initReg, bool* pInitRegZeroed); void genUpdateCurrentFunclet(BasicBlock* block); #if defined(TARGET_ARM) diff --git a/src/coreclr/src/jit/codegenarm.cpp b/src/coreclr/src/jit/codegenarm.cpp index 0407c710e404..2eaa80862396 100644 --- a/src/coreclr/src/jit/codegenarm.cpp +++ b/src/coreclr/src/jit/codegenarm.cpp @@ -45,7 +45,8 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX // if caller knows for certain the constant will fit. // // Return Value: -// returns true if the immediate was too large and tmpReg was used and modified. +// returns true if the immediate was small enough to be encoded inside instruction. If not, +// returns false meaning the immediate was too large and tmpReg was used and modified. // bool CodeGen::genInstrWithConstant( instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, ssize_t imm, insFlags flags, regNumber tmpReg) @@ -100,7 +101,8 @@ bool CodeGen::genInstrWithConstant( // tmpReg - an available temporary register // // Return Value: -// true if `tmpReg` was used. +// returns true if the immediate was small enough to be encoded inside instruction. If not, +// returns false meaning the immediate was too large and tmpReg was used and modified. // bool CodeGen::genStackPointerAdjustment(ssize_t spDelta, regNumber tmpReg) { @@ -1650,14 +1652,14 @@ void CodeGen::genCodeForMulLong(GenTreeMultiRegOp* node) // genProfilingEnterCallback: Generate the profiling function enter callback. // // Arguments: -// initReg - register to use as scratch register -// pInitRegModified - OUT parameter. *pInitRegModified set to 'true' if and only if -// this call sets 'initReg' to a non-zero value. +// initReg - register to use as scratch register +// pInitRegZeroed - OUT parameter. *pInitRegZeroed set to 'false' if 'initReg' is +// not zero after this call. // // Return Value: // None // -void CodeGen::genProfilingEnterCallback(regNumber initReg, bool* pInitRegModified) +void CodeGen::genProfilingEnterCallback(regNumber initReg, bool* pInitRegZeroed) { assert(compiler->compGeneratingProlog); @@ -1690,7 +1692,7 @@ void CodeGen::genProfilingEnterCallback(regNumber initReg, bool* pInitRegModifie if (initReg == argReg) { - *pInitRegModified = true; + *pInitRegZeroed = false; } } @@ -1820,17 +1822,14 @@ void CodeGen::genProfilingLeaveCallback(unsigned helper) // Arguments: // frameSize - the size of the stack frame being allocated. // initReg - register to use as a scratch register. -// pInitRegModified - OUT parameter. *pInitRegModified is set to 'true' if and only if +// pInitRegZeroed - OUT parameter. *pInitRegZeroed is set to 'false' if and only if // this call sets 'initReg' to a non-zero value. // maskArgRegsLiveIn - incoming argument registers that are currently live. // // Return value: // None // -void CodeGen::genAllocLclFrame(unsigned frameSize, - regNumber initReg, - bool* pInitRegModified, - regMaskTP maskArgRegsLiveIn) +void CodeGen::genAllocLclFrame(unsigned frameSize, regNumber initReg, bool* pInitRegZeroed, regMaskTP maskArgRegsLiveIn) { assert(compiler->compGeneratingProlog); @@ -1860,7 +1859,7 @@ void CodeGen::genAllocLclFrame(unsigned frameSize, } regSet.verifyRegUsed(initReg); - *pInitRegModified = true; + *pInitRegZeroed = false; // The initReg does not contain zero instGen_Set_Reg_To_Imm(EA_PTRSIZE, initReg, frameSize); compiler->unwindPadding(); @@ -1880,7 +1879,7 @@ void CodeGen::genAllocLclFrame(unsigned frameSize, if ((genRegMask(initReg) & (RBM_STACK_PROBE_HELPER_ARG | RBM_STACK_PROBE_HELPER_CALL_TARGET | RBM_STACK_PROBE_HELPER_TRASH)) != RBM_NONE) { - *pInitRegModified = true; + *pInitRegZeroed = false; } } diff --git a/src/coreclr/src/jit/codegenarm64.cpp b/src/coreclr/src/jit/codegenarm64.cpp index 2edf42d8580e..453c4c58b287 100644 --- a/src/coreclr/src/jit/codegenarm64.cpp +++ b/src/coreclr/src/jit/codegenarm64.cpp @@ -156,7 +156,9 @@ void CodeGen::genStackPointerAdjustment(ssize_t spDelta, regNumber tmpReg, bool* // Even though INS_add is specified here, the encoder will choose either // an INS_add or an INS_sub and encode the immediate as a positive value // - if (genInstrWithConstant(INS_add, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, spDelta, tmpReg, true)) + bool wasTempRegisterUsedForImm = + !genInstrWithConstant(INS_add, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, spDelta, tmpReg, true); + if (wasTempRegisterUsedForImm) { if (pTmpRegIsZero != nullptr) { @@ -242,6 +244,13 @@ void CodeGen::genPrologSaveRegPair(regNumber reg1, assert(spOffset <= 504); GetEmitter()->emitIns_R_R_R_I(INS_stp, EA_PTRSIZE, reg1, reg2, REG_SPBASE, spOffset); +#if defined(TARGET_UNIX) + if (compiler->generateCFIUnwindCodes()) + { + useSaveNextPair = false; + } +#endif // TARGET_UNIX + if (useSaveNextPair) { // This works as long as we've only been saving pairs, in order, and we've saved the previous one just @@ -370,6 +379,13 @@ void CodeGen::genEpilogRestoreRegPair(regNumber reg1, { GetEmitter()->emitIns_R_R_R_I(INS_ldp, EA_PTRSIZE, reg1, reg2, REG_SPBASE, spOffset); +#if defined(TARGET_UNIX) + if (compiler->generateCFIUnwindCodes()) + { + useSaveNextPair = false; + } +#endif // TARGET_UNIX + if (useSaveNextPair) { compiler->unwindSaveNext(); @@ -1443,7 +1459,7 @@ void CodeGen::genCaptureFuncletPrologEpilogInfo() genFuncletInfo.fiSaveRegs = rsMaskSaveRegs; genFuncletInfo.fiSP_to_FPLR_save_delta = SP_to_FPLR_save_delta; genFuncletInfo.fiSP_to_PSP_slot_delta = SP_to_PSP_slot_delta; - genFuncletInfo.fiSP_to_CalleeSave_delta = SP_to_PSP_slot_delta + REGSIZE_BYTES; + genFuncletInfo.fiSP_to_CalleeSave_delta = SP_to_PSP_slot_delta + PSPSize; genFuncletInfo.fiCallerSP_to_PSP_slot_delta = CallerSP_to_PSP_slot_delta; #ifdef DEBUG @@ -2974,7 +2990,7 @@ void CodeGen::genCodeForCmpXchg(GenTreeCmpXchg* treeNode) gcInfo.gcMarkRegPtrVal(addrReg, addr->TypeGet()); // TODO-ARM64-CQ Use ARMv8.1 atomics if available - // https://github.com/dotnet/coreclr/issues/11881 + // https://github.com/dotnet/runtime/issues/8225 // Emit code like this: // retry: @@ -4810,14 +4826,14 @@ void CodeGen::genStoreLclTypeSIMD12(GenTree* treeNode) // genProfilingEnterCallback: Generate the profiling function enter callback. // // Arguments: -// initReg - register to use as scratch register -// pInitRegModified - OUT parameter. *pInitRegModified set to 'true' if 'initReg' is -// not zero after this call. +// initReg - register to use as scratch register +// pInitRegZeroed - OUT parameter. *pInitRegZeroed set to 'false' if 'initReg' is +// set to non-zero value after this call. // // Return Value: // None // -void CodeGen::genProfilingEnterCallback(regNumber initReg, bool* pInitRegModified) +void CodeGen::genProfilingEnterCallback(regNumber initReg, bool* pInitRegZeroed) { assert(compiler->compGeneratingProlog); @@ -4845,7 +4861,7 @@ void CodeGen::genProfilingEnterCallback(regNumber initReg, bool* pInitRegModifie if ((genRegMask(initReg) & RBM_PROFILER_ENTER_TRASH) != RBM_NONE) { - *pInitRegModified = true; + *pInitRegZeroed = false; } } @@ -9592,19 +9608,16 @@ void CodeGen::genArm64EmitterUnitTests() // on Windows as well just to be consistent, even though it should not be necessary. // // Arguments: -// frameSize - the size of the stack frame being allocated. -// initReg - register to use as a scratch register. -// pInitRegModified - OUT parameter. *pInitRegModified is set to 'true' if and only if -// this call sets 'initReg' to a non-zero value. -// maskArgRegsLiveIn - incoming argument registers that are currently live. +// frameSize - the size of the stack frame being allocated. +// initReg - register to use as a scratch register. +// pInitRegZeroed - OUT parameter. *pInitRegZeroed is set to 'false' if and only if +// this call sets 'initReg' to a non-zero value. Otherwise, it is unchanged. +// maskArgRegsLiveIn - incoming argument registers that are currently live. // // Return value: // None // -void CodeGen::genAllocLclFrame(unsigned frameSize, - regNumber initReg, - bool* pInitRegModified, - regMaskTP maskArgRegsLiveIn) +void CodeGen::genAllocLclFrame(unsigned frameSize, regNumber initReg, bool* pInitRegZeroed, regMaskTP maskArgRegsLiveIn) { assert(compiler->compGeneratingProlog); @@ -9641,7 +9654,7 @@ void CodeGen::genAllocLclFrame(unsigned frameSize, instGen_Set_Reg_To_Imm(EA_PTRSIZE, initReg, -(ssize_t)probeOffset); GetEmitter()->emitIns_R_R_R(INS_ldr, EA_4BYTE, REG_ZR, REG_SPBASE, initReg); regSet.verifyRegUsed(initReg); - *pInitRegModified = true; + *pInitRegZeroed = false; // The initReg does not contain zero lastTouchDelta -= pageSize; } @@ -9701,7 +9714,7 @@ void CodeGen::genAllocLclFrame(unsigned frameSize, GetEmitter()->emitIns_R_R(INS_cmp, EA_PTRSIZE, rLimit, rOffset); // If equal, we need to probe again GetEmitter()->emitIns_J(INS_bls, NULL, -4); - *pInitRegModified = true; + *pInitRegZeroed = false; // The initReg does not contain zero compiler->unwindPadding(); @@ -9716,7 +9729,7 @@ void CodeGen::genAllocLclFrame(unsigned frameSize, compiler->unwindPadding(); regSet.verifyRegUsed(initReg); - *pInitRegModified = true; + *pInitRegZeroed = false; // The initReg does not contain zero } } diff --git a/src/coreclr/src/jit/codegenarmarch.cpp b/src/coreclr/src/jit/codegenarmarch.cpp index a2d881588ab5..c1dcc7319afc 100644 --- a/src/coreclr/src/jit/codegenarmarch.cpp +++ b/src/coreclr/src/jit/codegenarmarch.cpp @@ -561,14 +561,14 @@ void CodeGen::genSetRegToIcon(regNumber reg, ssize_t val, var_types type, insFla // genSetGSSecurityCookie: Set the "GS" security cookie in the prolog. // // Arguments: -// initReg - register to use as a scratch register -// pInitRegModified - OUT parameter. *pInitRegModified is set to 'true' if and only if -// this call sets 'initReg' to a non-zero value. +// initReg - register to use as a scratch register +// pInitRegZeroed - OUT parameter. *pInitRegZeroed is set to 'false' if and only if +// this call sets 'initReg' to a non-zero value. // // Return Value: // None // -void CodeGen::genSetGSSecurityCookie(regNumber initReg, bool* pInitRegModified) +void CodeGen::genSetGSSecurityCookie(regNumber initReg, bool* pInitRegZeroed) { assert(compiler->compGeneratingProlog); @@ -593,7 +593,7 @@ void CodeGen::genSetGSSecurityCookie(regNumber initReg, bool* pInitRegModified) GetEmitter()->emitIns_S_R(INS_str, EA_PTRSIZE, initReg, compiler->lvaGSSecurityCookie, 0); } - *pInitRegModified = true; + *pInitRegZeroed = false; } //--------------------------------------------------------------------- @@ -616,31 +616,31 @@ void CodeGen::genIntrinsic(GenTree* treeNode) // Right now only Abs/Ceiling/Floor/Round/Sqrt are treated as math intrinsics. // - switch (treeNode->AsIntrinsic()->gtIntrinsicId) + switch (treeNode->AsIntrinsic()->gtIntrinsicName) { - case CORINFO_INTRINSIC_Abs: + case NI_System_Math_Abs: genConsumeOperands(treeNode->AsOp()); GetEmitter()->emitInsBinary(INS_ABS, emitActualTypeSize(treeNode), treeNode, srcNode); break; #ifdef TARGET_ARM64 - case CORINFO_INTRINSIC_Ceiling: + case NI_System_Math_Ceiling: genConsumeOperands(treeNode->AsOp()); GetEmitter()->emitInsBinary(INS_frintp, emitActualTypeSize(treeNode), treeNode, srcNode); break; - case CORINFO_INTRINSIC_Floor: + case NI_System_Math_Floor: genConsumeOperands(treeNode->AsOp()); GetEmitter()->emitInsBinary(INS_frintm, emitActualTypeSize(treeNode), treeNode, srcNode); break; - case CORINFO_INTRINSIC_Round: + case NI_System_Math_Round: genConsumeOperands(treeNode->AsOp()); GetEmitter()->emitInsBinary(INS_frintn, emitActualTypeSize(treeNode), treeNode, srcNode); break; #endif // TARGET_ARM64 - case CORINFO_INTRINSIC_Sqrt: + case NI_System_Math_Sqrt: genConsumeOperands(treeNode->AsOp()); GetEmitter()->emitInsBinary(INS_SQRT, emitActualTypeSize(treeNode), treeNode, srcNode); break; @@ -815,11 +815,15 @@ void CodeGen::genPutArgStk(GenTreePutArgStk* treeNode) // so update 'source' to point this GT_LCL_VAR_ADDR node // and continue to the codegen for the LCL_VAR node below // + assert(addrNode->isContained()); varNode = addrNode->AsLclVarCommon(); addrNode = nullptr; } else // addrNode is used { + // TODO-Cleanup: `Lowering::NewPutArg` marks only `LCL_VAR_ADDR` as contained nowadays, + // but we use `genConsumeAddress` as a precaution, use `genConsumeReg()` instead. + assert(!addrNode->isContained()); // Generate code to load the address that we need into a register genConsumeAddress(addrNode); addrReg = addrNode->GetRegNum(); @@ -1253,6 +1257,7 @@ void CodeGen::genPutArgSplit(GenTreePutArgSplit* treeNode) if (varNode != nullptr) { + assert(varNode->isContained()); srcVarNum = varNode->GetLclNum(); assert(srcVarNum < compiler->lvaCount); @@ -1270,6 +1275,9 @@ void CodeGen::genPutArgSplit(GenTreePutArgSplit* treeNode) else // addrNode is used { assert(addrNode != nullptr); + // TODO-Cleanup: `Lowering::NewPutArg` marks only `LCL_VAR_ADDR` as contained nowadays, + // but we use `genConsumeAddress` as a precaution, use `genConsumeReg()` instead. + assert(!addrNode->isContained()); // Generate code to load the address that we need into a register genConsumeAddress(addrNode); diff --git a/src/coreclr/src/jit/codegencommon.cpp b/src/coreclr/src/jit/codegencommon.cpp index ccfd7567f46a..5923e6512f40 100644 --- a/src/coreclr/src/jit/codegencommon.cpp +++ b/src/coreclr/src/jit/codegencommon.cpp @@ -4821,7 +4821,7 @@ void CodeGen::genCheckUseBlockInit() */ #if defined(TARGET_ARM64) -void CodeGen::genPushCalleeSavedRegisters(regNumber initReg, bool* pInitRegModified) +void CodeGen::genPushCalleeSavedRegisters(regNumber initReg, bool* pInitRegZeroed) #else void CodeGen::genPushCalleeSavedRegisters() #endif @@ -4873,7 +4873,7 @@ void CodeGen::genPushCalleeSavedRegisters() // - Generate fully interruptible code for loops that contains calls // - Generate fully interruptible code for leaf methods // - // Given the limited benefit from this optimization (<10k for mscorlib NGen image), the extra complexity + // Given the limited benefit from this optimization (<10k for CoreLib NGen image), the extra complexity // is not worth it. // rsPushRegs |= RBM_LR; // We must save the return address (in the LR register) @@ -5310,8 +5310,7 @@ void CodeGen::genPushCalleeSavedRegisters() JITDUMP(" spAdjustment2=%d\n", spAdjustment2); - genPrologSaveRegPair(REG_FP, REG_LR, alignmentAdjustment2, -spAdjustment2, false, initReg, - pInitRegModified); + genPrologSaveRegPair(REG_FP, REG_LR, alignmentAdjustment2, -spAdjustment2, false, initReg, pInitRegZeroed); offset += spAdjustment2; // Now subtract off the #outsz (or the rest of the #outsz if it was unaligned, and the above "sub" @@ -5331,13 +5330,13 @@ void CodeGen::genPushCalleeSavedRegisters() // We've already established the frame pointer, so no need to report the stack pointer change to unwind // info. - genStackPointerAdjustment(-spAdjustment3, initReg, pInitRegModified, /* reportUnwindData */ false); + genStackPointerAdjustment(-spAdjustment3, initReg, pInitRegZeroed, /* reportUnwindData */ false); offset += spAdjustment3; } else { genPrologSaveRegPair(REG_FP, REG_LR, compiler->lvaOutgoingArgSpaceSize, -remainingFrameSz, false, initReg, - pInitRegModified); + pInitRegZeroed); offset += remainingFrameSz; offsetSpToSavedFp = compiler->lvaOutgoingArgSpaceSize; @@ -5369,7 +5368,7 @@ void CodeGen::genPushCalleeSavedRegisters() JITDUMP(" remainingFrameSz=%d\n", remainingFrameSz); // We've already established the frame pointer, so no need to report the stack pointer change to unwind info. - genStackPointerAdjustment(-remainingFrameSz, initReg, pInitRegModified, /* reportUnwindData */ false); + genStackPointerAdjustment(-remainingFrameSz, initReg, pInitRegZeroed, /* reportUnwindData */ false); offset += remainingFrameSz; } else @@ -6098,17 +6097,17 @@ void CodeGen::genPopCalleeSavedRegisters(bool jmpEpilog) #endif // TARGET* -// We need a register with value zero. Zero the initReg, if necessary, and set *pInitRegModified if so. +// We need a register with value zero. Zero the initReg, if necessary, and set *pInitRegZeroed if so. // Return the register to use. On ARM64, we never touch the initReg, and always just return REG_ZR. -regNumber CodeGen::genGetZeroReg(regNumber initReg, bool* pInitRegModified) +regNumber CodeGen::genGetZeroReg(regNumber initReg, bool* pInitRegZeroed) { #ifdef TARGET_ARM64 return REG_ZR; #else // !TARGET_ARM64 - if (*pInitRegModified) + if (*pInitRegZeroed == false) { instGen_Set_Reg_To_Zero(EA_PTRSIZE, initReg); - *pInitRegModified = false; + *pInitRegZeroed = true; } return initReg; #endif // !TARGET_ARM64 @@ -6118,14 +6117,14 @@ regNumber CodeGen::genGetZeroReg(regNumber initReg, bool* pInitRegModified) // genZeroInitFrame: Zero any untracked pointer locals and/or initialize memory for locspace // // Arguments: -// untrLclHi - (Untracked locals High-Offset) The upper bound offset at which the zero init -// code will end initializing memory (not inclusive). -// untrLclLo - (Untracked locals Low-Offset) The lower bound at which the zero init code will -// start zero initializing memory. -// initReg - A scratch register (that gets set to zero on some platforms). -// pInitRegModified - Sets a flag that tells the callee whether or not the initReg register got zeroed. -// -void CodeGen::genZeroInitFrame(int untrLclHi, int untrLclLo, regNumber initReg, bool* pInitRegModified) +// untrLclHi - (Untracked locals High-Offset) The upper bound offset at which the zero init +// code will end initializing memory (not inclusive). +// untrLclLo - (Untracked locals Low-Offset) The lower bound at which the zero init code will +// start zero initializing memory. +// initReg - A scratch register (that gets set to zero on some platforms). +// pInitRegZeroed - OUT parameter. *pInitRegZeroed is set to 'true' if this method sets initReg register to zero, +// 'false' if initReg was set to a non-zero value, and left unchanged if initReg was not touched. +void CodeGen::genZeroInitFrame(int untrLclHi, int untrLclLo, regNumber initReg, bool* pInitRegZeroed) { assert(compiler->compGeneratingProlog); @@ -6200,8 +6199,8 @@ void CodeGen::genZeroInitFrame(int untrLclHi, int untrLclLo, regNumber initReg, #else // !define(TARGET_ARM) - rAddr = initReg; - *pInitRegModified = true; + rAddr = initReg; + *pInitRegZeroed = false; #endif // !defined(TARGET_ARM) @@ -6242,7 +6241,7 @@ void CodeGen::genZeroInitFrame(int untrLclHi, int untrLclLo, regNumber initReg, // Load immediate into the InitReg register instGen_Set_Reg_To_Imm(EA_PTRSIZE, initReg, (ssize_t)untrLclLo); GetEmitter()->emitIns_R_R_R(INS_add, EA_PTRSIZE, rAddr, genFramePointerReg(), initReg); - *pInitRegModified = true; + *pInitRegZeroed = false; } if (useLoop) @@ -6254,7 +6253,7 @@ void CodeGen::genZeroInitFrame(int untrLclHi, int untrLclLo, regNumber initReg, } #if defined(TARGET_ARM) - rZero1 = genGetZeroReg(initReg, pInitRegModified); + rZero1 = genGetZeroReg(initReg, pInitRegZeroed); instGen_Set_Reg_To_Zero(EA_PTRSIZE, rZero2); target_ssize_t stmImm = (target_ssize_t)(genRegMask(rZero1) | genRegMask(rZero2)); #endif // TARGET_ARM @@ -6343,7 +6342,7 @@ void CodeGen::genZeroInitFrame(int untrLclHi, int untrLclLo, regNumber initReg, #endif if (blkSize < minSimdSize) { - zeroReg = genGetZeroReg(initReg, pInitRegModified); + zeroReg = genGetZeroReg(initReg, pInitRegZeroed); int i = 0; for (; i + REGSIZE_BYTES <= blkSize; i += REGSIZE_BYTES) @@ -6405,7 +6404,7 @@ void CodeGen::genZeroInitFrame(int untrLclHi, int untrLclLo, regNumber initReg, assert(alignmentLoBlkSize < XMM_REGSIZE_BYTES); assert((alignedLclLo - alignmentLoBlkSize) == untrLclLo); - zeroReg = genGetZeroReg(initReg, pInitRegModified); + zeroReg = genGetZeroReg(initReg, pInitRegZeroed); int i = 0; for (; i + REGSIZE_BYTES <= alignmentLoBlkSize; i += REGSIZE_BYTES) @@ -6513,7 +6512,7 @@ void CodeGen::genZeroInitFrame(int untrLclHi, int untrLclLo, regNumber initReg, emit->emitIns_J(INS_jne, nullptr, -5); // initReg will be zero at end of the loop - *pInitRegModified = false; + *pInitRegZeroed = true; } if (untrLclHi != alignedLclHi) @@ -6522,7 +6521,7 @@ void CodeGen::genZeroInitFrame(int untrLclHi, int untrLclLo, regNumber initReg, assert(alignmentHiBlkSize < XMM_REGSIZE_BYTES); assert((alignedLclHi + alignmentHiBlkSize) == untrLclHi); - zeroReg = genGetZeroReg(initReg, pInitRegModified); + zeroReg = genGetZeroReg(initReg, pInitRegZeroed); int i = 0; for (; i + REGSIZE_BYTES <= alignmentHiBlkSize; i += REGSIZE_BYTES) @@ -6589,13 +6588,13 @@ void CodeGen::genZeroInitFrame(int untrLclHi, int untrLclLo, regNumber initReg, if (layout->IsGCPtr(i)) { GetEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, - genGetZeroReg(initReg, pInitRegModified), varNum, i * REGSIZE_BYTES); + genGetZeroReg(initReg, pInitRegZeroed), varNum, i * REGSIZE_BYTES); } } } else { - regNumber zeroReg = genGetZeroReg(initReg, pInitRegModified); + regNumber zeroReg = genGetZeroReg(initReg, pInitRegZeroed); // zero out the whole thing rounded up to a single stack slot size unsigned lclSize = roundUp(compiler->lvaLclSize(varNum), (unsigned)sizeof(int)); @@ -6627,7 +6626,7 @@ void CodeGen::genZeroInitFrame(int untrLclHi, int untrLclLo, regNumber initReg, // printf("initialize untracked spillTmp [EBP-%04X]\n", stkOffs); - inst_ST_RV(ins_Store(TYP_I_IMPL), tempThis, 0, genGetZeroReg(initReg, pInitRegModified), TYP_I_IMPL); + inst_ST_RV(ins_Store(TYP_I_IMPL), tempThis, 0, genGetZeroReg(initReg, pInitRegZeroed), TYP_I_IMPL); } } @@ -6762,7 +6761,7 @@ void CodeGen::genZeroInitFrame(int untrLclHi, int untrLclLo, regNumber initReg, * ICodeManager::GetParamTypeArg(). */ -void CodeGen::genReportGenericContextArg(regNumber initReg, bool* pInitRegModified) +void CodeGen::genReportGenericContextArg(regNumber initReg, bool* pInitRegZeroed) { // For OSR the original method has set this up for us. if (compiler->opts.IsOSR()) @@ -6827,8 +6826,8 @@ void CodeGen::genReportGenericContextArg(regNumber initReg, bool* pInitRegModifi // We will just use the initReg since it is an available register // and we are probably done using it anyway... - reg = initReg; - *pInitRegModified = true; + reg = initReg; + *pInitRegZeroed = false; // mov reg, [compiler->info.compTypeCtxtArg] GetEmitter()->emitIns_R_AR(ins_Load(TYP_I_IMPL), EA_PTRSIZE, reg, genFramePointerReg(), varDsc->lvStkOffs); @@ -7672,9 +7671,13 @@ void CodeGen::genFnProlog() /* Choose the register to use for zero initialization */ - regNumber initReg = REG_SCRATCH; // Unless we find a better register below - bool initRegModified = true; - regMaskTP excludeMask = intRegState.rsCalleeRegArgMaskLiveIn; + regNumber initReg = REG_SCRATCH; // Unless we find a better register below + + // Track if initReg holds non-zero value. Start conservative and assume it has non-zero value. + // If initReg is ever set to zero, this variable is set to true and zero initializing initReg + // will be skipped. + bool initRegZeroed = false; + regMaskTP excludeMask = intRegState.rsCalleeRegArgMaskLiveIn; regMaskTP tempMask; // We should not use the special PINVOKE registers as the initReg @@ -7807,11 +7810,11 @@ void CodeGen::genFnProlog() // been calculated to be one of the callee-saved registers (say, if all the integer argument registers are // in use, and perhaps with other conditions being satisfied). This is ok in other cases, after the callee-saved // registers have been saved. So instead of letting genAllocLclFrame use initReg as a temporary register, - // always use REG_SCRATCH. We don't care if it trashes it, so ignore the initRegModified output argument. - bool ignoreInitRegModified = true; - genAllocLclFrame(compiler->compLclFrameSize, REG_SCRATCH, &ignoreInitRegModified, + // always use REG_SCRATCH. We don't care if it trashes it, so ignore the initRegZeroed output argument. + bool ignoreInitRegZeroed = false; + genAllocLclFrame(compiler->compLclFrameSize, REG_SCRATCH, &ignoreInitRegZeroed, intRegState.rsCalleeRegArgMaskLiveIn); - genPushCalleeSavedRegisters(initReg, &initRegModified); + genPushCalleeSavedRegisters(initReg, &initRegZeroed); #else // !TARGET_ARM64 genPushCalleeSavedRegisters(); #endif // !TARGET_ARM64 @@ -7855,7 +7858,7 @@ void CodeGen::genFnProlog() if (maskStackAlloc == RBM_NONE) { - genAllocLclFrame(compiler->compLclFrameSize, initReg, &initRegModified, intRegState.rsCalleeRegArgMaskLiveIn); + genAllocLclFrame(compiler->compLclFrameSize, initReg, &initRegZeroed, intRegState.rsCalleeRegArgMaskLiveIn); } #endif // !TARGET_ARM64 @@ -7918,11 +7921,11 @@ void CodeGen::genFnProlog() // Zero out the frame as needed // - genZeroInitFrame(untrLclHi, untrLclLo, initReg, &initRegModified); + genZeroInitFrame(untrLclHi, untrLclLo, initReg, &initRegZeroed); #if defined(FEATURE_EH_FUNCLETS) - genSetPSPSym(initReg, &initRegModified); + genSetPSPSym(initReg, &initRegZeroed); #else // !FEATURE_EH_FUNCLETS @@ -7935,10 +7938,10 @@ void CodeGen::genFnProlog() // Zero out the slot for nesting level 0 unsigned firstSlotOffs = filterEndOffsetSlotOffs - TARGET_POINTER_SIZE; - if (initRegModified) + if (!initRegZeroed) { instGen_Set_Reg_To_Zero(EA_PTRSIZE, initReg); - initRegModified = false; + initRegZeroed = true; } GetEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, initReg, compiler->lvaShadowSPslotsVar, @@ -7947,7 +7950,7 @@ void CodeGen::genFnProlog() #endif // !FEATURE_EH_FUNCLETS - genReportGenericContextArg(initReg, &initRegModified); + genReportGenericContextArg(initReg, &initRegZeroed); #ifdef JIT32_GCENCODER // Initialize the LocalAllocSP slot if there is localloc in the function. @@ -7959,7 +7962,7 @@ void CodeGen::genFnProlog() // Set up the GS security cookie - genSetGSSecurityCookie(initReg, &initRegModified); + genSetGSSecurityCookie(initReg, &initRegZeroed); #ifdef PROFILING_SUPPORTED @@ -7967,7 +7970,7 @@ void CodeGen::genFnProlog() // OSR methods aren't called, so don't have enter hooks. if (!compiler->opts.IsOSR()) { - genProfilingEnterCallback(initReg, &initRegModified); + genProfilingEnterCallback(initReg, &initRegZeroed); } #endif // PROFILING_SUPPORTED @@ -8029,15 +8032,15 @@ void CodeGen::genFnProlog() } else { - xtraReg = REG_SCRATCH; - initRegModified = true; + xtraReg = REG_SCRATCH; + initRegZeroed = false; } genFnPrologCalleeRegArgs(xtraReg, &xtraRegClobbered, regState); if (xtraRegClobbered) { - initRegModified = true; + initRegZeroed = false; } } } @@ -8057,7 +8060,7 @@ void CodeGen::genFnProlog() if (regMask & initRegs) { // Check if we have already zeroed this register - if ((reg == initReg) && !initRegModified) + if ((reg == initReg) && initRegZeroed) { continue; } @@ -8066,7 +8069,7 @@ void CodeGen::genFnProlog() instGen_Set_Reg_To_Zero(EA_PTRSIZE, reg); if (reg == initReg) { - initRegModified = false; + initRegZeroed = true; } } } @@ -8078,17 +8081,17 @@ void CodeGen::genFnProlog() // If initReg is not in initRegs then we will use REG_SCRATCH if ((genRegMask(initReg) & initRegs) == 0) { - initReg = REG_SCRATCH; - initRegModified = true; + initReg = REG_SCRATCH; + initRegZeroed = false; } #ifdef TARGET_ARM // This is needed only for Arm since it can use a zero initialized int register // to initialize vfp registers. - if (initRegModified) + if (!initRegZeroed) { instGen_Set_Reg_To_Zero(EA_PTRSIZE, initReg); - initRegModified = false; + initRegZeroed = true; } #endif // TARGET_ARM @@ -9095,12 +9098,12 @@ void CodeGen::genFuncletProlog(BasicBlock* block) maskArgRegsLiveIn = RBM_R0; } - regNumber initReg = REG_R3; // R3 is never live on entry to a funclet, so it can be trashed - bool initRegModified = true; + regNumber initReg = REG_R3; // R3 is never live on entry to a funclet, so it can be trashed + bool initRegZeroed = false; if (maskStackAlloc == RBM_NONE) { - genAllocLclFrame(genFuncletInfo.fiSpDelta, initReg, &initRegModified, maskArgRegsLiveIn); + genAllocLclFrame(genFuncletInfo.fiSpDelta, initReg, &initRegZeroed, maskArgRegsLiveIn); } // This is the end of the OS-reported prolog for purposes of unwinding @@ -9397,10 +9400,10 @@ void CodeGen::genFuncletProlog(BasicBlock* block) maskArgRegsLiveIn = RBM_ARG_0 | RBM_ARG_2; } - regNumber initReg = REG_EBP; // We already saved EBP, so it can be trashed - bool initRegModified = true; + regNumber initReg = REG_EBP; // We already saved EBP, so it can be trashed + bool initRegZeroed = false; - genAllocLclFrame(genFuncletInfo.fiSpDelta, initReg, &initRegModified, maskArgRegsLiveIn); + genAllocLclFrame(genFuncletInfo.fiSpDelta, initReg, &initRegZeroed, maskArgRegsLiveIn); // Callee saved float registers are copied to stack in their assigned stack slots // after allocating space for them as part of funclet frame. @@ -9739,7 +9742,7 @@ void CodeGen::genCaptureFuncletPrologEpilogInfo() * correctly reported, the PSPSym could be omitted in some cases.) *********************************** */ -void CodeGen::genSetPSPSym(regNumber initReg, bool* pInitRegModified) +void CodeGen::genSetPSPSym(regNumber initReg, bool* pInitRegZeroed) { assert(compiler->compGeneratingProlog); @@ -9785,8 +9788,8 @@ void CodeGen::genSetPSPSym(regNumber initReg, bool* pInitRegModified) // We will just use the initReg since it is an available register // and we are probably done using it anyway... - regNumber regTmp = initReg; - *pInitRegModified = true; + regNumber regTmp = initReg; + *pInitRegZeroed = false; GetEmitter()->emitIns_R_R_I(INS_add, EA_PTRSIZE, regTmp, regBase, callerSPOffs); GetEmitter()->emitIns_S_R(INS_str, EA_PTRSIZE, regTmp, compiler->lvaPSPSym, 0); @@ -9797,8 +9800,8 @@ void CodeGen::genSetPSPSym(regNumber initReg, bool* pInitRegModified) // We will just use the initReg since it is an available register // and we are probably done using it anyway... - regNumber regTmp = initReg; - *pInitRegModified = true; + regNumber regTmp = initReg; + *pInitRegZeroed = false; GetEmitter()->emitIns_R_R_Imm(INS_add, EA_PTRSIZE, regTmp, REG_SPBASE, SPtoCallerSPdelta); GetEmitter()->emitIns_S_R(INS_str, EA_PTRSIZE, regTmp, compiler->lvaPSPSym, 0); diff --git a/src/coreclr/src/jit/codegenlinear.cpp b/src/coreclr/src/jit/codegenlinear.cpp index 5ce369b52424..cf0d1cd149a0 100644 --- a/src/coreclr/src/jit/codegenlinear.cpp +++ b/src/coreclr/src/jit/codegenlinear.cpp @@ -344,7 +344,6 @@ void CodeGen::genCodeForBBlist() needLabel = true; } -#if defined(DEBUG) || defined(LATE_DISASM) // We also want to start a new Instruction group by calling emitAddLabel below, // when we need accurate bbWeights for this block in the emitter. We force this // whenever our previous block was a BBJ_COND and it has a different weight than us. @@ -356,7 +355,6 @@ void CodeGen::genCodeForBBlist() { needLabel = true; } -#endif // DEBUG || LATE_DISASM if (needLabel) { diff --git a/src/coreclr/src/jit/codegenxarch.cpp b/src/coreclr/src/jit/codegenxarch.cpp index e65dda6b46c7..88c021ac73d0 100644 --- a/src/coreclr/src/jit/codegenxarch.cpp +++ b/src/coreclr/src/jit/codegenxarch.cpp @@ -53,14 +53,14 @@ void CodeGen::genSetRegToIcon(regNumber reg, ssize_t val, var_types type, insFla // genSetGSSecurityCookie: Set the "GS" security cookie in the prolog. // // Arguments: -// initReg - register to use as a scratch register -// pInitRegModified - OUT parameter. *pInitRegModified is set to 'true' if and only if -// this call sets 'initReg' to a non-zero value. +// initReg - register to use as a scratch register +// pInitRegZeroed - OUT parameter. *pInitRegZeroed is set to 'false' if and only if +// this call sets 'initReg' to a non-zero value. // // Return Value: // None // -void CodeGen::genSetGSSecurityCookie(regNumber initReg, bool* pInitRegModified) +void CodeGen::genSetGSSecurityCookie(regNumber initReg, bool* pInitRegZeroed) { assert(compiler->compGeneratingProlog); @@ -84,7 +84,7 @@ void CodeGen::genSetGSSecurityCookie(regNumber initReg, bool* pInitRegModified) // initReg = #GlobalSecurityCookieVal64; [frame.GSSecurityCookie] = initReg genSetRegToIcon(initReg, compiler->gsGlobalSecurityCookieVal, TYP_I_IMPL); GetEmitter()->emitIns_S_R(INS_mov, EA_PTRSIZE, initReg, compiler->lvaGSSecurityCookie, 0); - *pInitRegModified = true; + *pInitRegZeroed = false; } else #endif @@ -105,7 +105,7 @@ void CodeGen::genSetGSSecurityCookie(regNumber initReg, bool* pInitRegModified) GetEmitter()->emitIns_S_R(INS_mov, EA_PTRSIZE, REG_EAX, compiler->lvaGSSecurityCookie, 0); if (initReg == REG_EAX) { - *pInitRegModified = true; + *pInitRegZeroed = false; } } } @@ -1950,19 +1950,16 @@ void CodeGen::genMultiRegStoreToSIMDLocal(GenTreeLclVar* lclNode) // genAllocLclFrame: Probe the stack and allocate the local stack frame - subtract from SP. // // Arguments: -// frameSize - the size of the stack frame being allocated. -// initReg - register to use as a scratch register. -// pInitRegModified - OUT parameter. *pInitRegModified is set to 'true' if and only if -// this call sets 'initReg' to a non-zero value. -// maskArgRegsLiveIn - incoming argument registers that are currently live. +// frameSize - the size of the stack frame being allocated. +// initReg - register to use as a scratch register. +// pInitRegZeroed - OUT parameter. *pInitRegZeroed is set to 'false' if and only if +// this call sets 'initReg' to a non-zero value. +// maskArgRegsLiveIn - incoming argument registers that are currently live. // // Return value: // None // -void CodeGen::genAllocLclFrame(unsigned frameSize, - regNumber initReg, - bool* pInitRegModified, - regMaskTP maskArgRegsLiveIn) +void CodeGen::genAllocLclFrame(unsigned frameSize, regNumber initReg, bool* pInitRegZeroed, regMaskTP maskArgRegsLiveIn) { assert(compiler->compGeneratingProlog); @@ -2046,7 +2043,7 @@ void CodeGen::genAllocLclFrame(unsigned frameSize, if (initReg == REG_DEFAULT_HELPER_CALL_TARGET) { - *pInitRegModified = true; + *pInitRegZeroed = false; } static_assert_no_msg((RBM_STACK_PROBE_HELPER_TRASH & RBM_STACK_PROBE_HELPER_ARG) == RBM_NONE); @@ -2056,7 +2053,7 @@ void CodeGen::genAllocLclFrame(unsigned frameSize, if (initReg == REG_STACK_PROBE_HELPER_ARG) { - *pInitRegModified = true; + *pInitRegZeroed = false; } } @@ -6781,7 +6778,7 @@ void CodeGen::genSSE2BitwiseOp(GenTree* treeNode) break; case GT_INTRINSIC: - assert(treeNode->AsIntrinsic()->gtIntrinsicId == CORINFO_INTRINSIC_Abs); + assert(treeNode->AsIntrinsic()->gtIntrinsicName == NI_System_Math_Abs); // Abs(x) = set sign-bit to zero // Abs(f) = f & 0x7fffffff @@ -6859,7 +6856,7 @@ void CodeGen::genSSE2BitwiseOp(GenTree* treeNode) // ii) treeNode oper is a GT_INTRINSIC // iii) treeNode type is a floating point type // iv) treeNode is not used from memory -// v) tree oper is CORINFO_INTRINSIC_Round, _Ceiling, or _Floor +// v) tree oper is NI_System_Math{F}_Round, _Ceiling, or _Floor // vi) caller of this routine needs to call genProduceReg() void CodeGen::genSSE41RoundOp(GenTreeOp* treeNode) { @@ -6887,18 +6884,18 @@ void CodeGen::genSSE41RoundOp(GenTreeOp* treeNode) unsigned ival = 0; - // v) tree oper is CORINFO_INTRINSIC_Round, _Ceiling, or _Floor - switch (treeNode->AsIntrinsic()->gtIntrinsicId) + // v) tree oper is NI_System_Math{F}_Round, _Ceiling, or _Floor + switch (treeNode->AsIntrinsic()->gtIntrinsicName) { - case CORINFO_INTRINSIC_Round: + case NI_System_Math_Round: ival = 4; break; - case CORINFO_INTRINSIC_Ceiling: + case NI_System_Math_Ceiling: ival = 10; break; - case CORINFO_INTRINSIC_Floor: + case NI_System_Math_Floor: ival = 9; break; @@ -6934,9 +6931,11 @@ void CodeGen::genSSE41RoundOp(GenTreeOp* treeNode) switch (memBase->OperGet()) { case GT_LCL_VAR_ADDR: + case GT_LCL_FLD_ADDR: { + assert(memBase->isContained()); varNum = memBase->AsLclVarCommon()->GetLclNum(); - offset = 0; + offset = memBase->AsLclVarCommon()->GetLclOffs(); // Ensure that all the GenTreeIndir values are set to their defaults. assert(memBase->GetRegNum() == REG_NA); @@ -7020,9 +7019,9 @@ void CodeGen::genSSE41RoundOp(GenTreeOp* treeNode) void CodeGen::genIntrinsic(GenTree* treeNode) { // Right now only Sqrt/Abs are treated as math intrinsics. - switch (treeNode->AsIntrinsic()->gtIntrinsicId) + switch (treeNode->AsIntrinsic()->gtIntrinsicName) { - case CORINFO_INTRINSIC_Sqrt: + case NI_System_Math_Sqrt: { // Both operand and its result must be of the same floating point type. GenTree* srcNode = treeNode->AsOp()->gtOp1; @@ -7034,13 +7033,13 @@ void CodeGen::genIntrinsic(GenTree* treeNode) break; } - case CORINFO_INTRINSIC_Abs: + case NI_System_Math_Abs: genSSE2BitwiseOp(treeNode); break; - case CORINFO_INTRINSIC_Round: - case CORINFO_INTRINSIC_Ceiling: - case CORINFO_INTRINSIC_Floor: + case NI_System_Math_Round: + case NI_System_Math_Ceiling: + case NI_System_Math_Floor: genSSE41RoundOp(treeNode->AsOp()); break; @@ -8450,9 +8449,8 @@ void CodeGen::genAmd64EmitterUnitTests() // genProfilingEnterCallback: Generate the profiling function enter callback. // // Arguments: -// initReg - register to use as scratch register -// pInitRegModified - OUT parameter. *pInitRegModified set to 'true' if 'initReg' is -// not zero after this call. +// initReg - register to use as scratch register +// pInitRegZeroed - OUT parameter. This variable remains unchanged. // // Return Value: // None @@ -8471,7 +8469,7 @@ void CodeGen::genAmd64EmitterUnitTests() // 4. All registers are preserved. // 5. The helper pops the FunctionIDOrClientID argument from the stack. // -void CodeGen::genProfilingEnterCallback(regNumber initReg, bool* pInitRegModified) +void CodeGen::genProfilingEnterCallback(regNumber initReg, bool* pInitRegZeroed) { assert(compiler->compGeneratingProlog); @@ -8616,14 +8614,14 @@ void CodeGen::genProfilingLeaveCallback(unsigned helper) // genProfilingEnterCallback: Generate the profiling function enter callback. // // Arguments: -// initReg - register to use as scratch register -// pInitRegModified - OUT parameter. *pInitRegModified set to 'true' if 'initReg' is -// not zero after this call. +// initReg - register to use as scratch register +// pInitRegZeroed - OUT parameter. *pInitRegZeroed is set to 'false' if and only if +// this call sets 'initReg' to a non-zero value. // // Return Value: // None // -void CodeGen::genProfilingEnterCallback(regNumber initReg, bool* pInitRegModified) +void CodeGen::genProfilingEnterCallback(regNumber initReg, bool* pInitRegZeroed) { assert(compiler->compGeneratingProlog); @@ -8760,7 +8758,7 @@ void CodeGen::genProfilingEnterCallback(regNumber initReg, bool* pInitRegModifie // If initReg is one of RBM_CALLEE_TRASH, then it needs to be zero'ed before using. if ((RBM_CALLEE_TRASH & genRegMask(initReg)) != 0) { - *pInitRegModified = true; + *pInitRegZeroed = false; } #else // !defined(UNIX_AMD64_ABI) @@ -8809,7 +8807,7 @@ void CodeGen::genProfilingEnterCallback(regNumber initReg, bool* pInitRegModifie // If initReg is one of RBM_CALLEE_TRASH, then it needs to be zero'ed before using. if ((RBM_CALLEE_TRASH & genRegMask(initReg)) != 0) { - *pInitRegModified = true; + *pInitRegZeroed = false; } #endif // !defined(UNIX_AMD64_ABI) diff --git a/src/coreclr/src/jit/compiler.cpp b/src/coreclr/src/jit/compiler.cpp index c78ee4477e0d..214fc3a9d5f2 100644 --- a/src/coreclr/src/jit/compiler.cpp +++ b/src/coreclr/src/jit/compiler.cpp @@ -1764,6 +1764,10 @@ void Compiler::compInit(ArenaAllocator* pAlloc, info.compPerfScore = 0.0; #endif // defined(DEBUG) || defined(LATE_DISASM) +#if defined(DEBUG) || defined(INLINE_DATA) + info.compMethodHashPrivate = 0; +#endif // defined(DEBUG) || defined(INLINE_DATA) + #ifdef DEBUG // Opt-in to jit stress based on method hash ranges. // @@ -4220,7 +4224,7 @@ void Compiler::EndPhase(Phases phase) // code:CILJit::compileMethod function. // // For an overview of the structure of the JIT, see: -// https://github.com/dotnet/runtime/blob/master/docs/design/coreclr/botr/ryujit-overview.md +// https://github.com/dotnet/runtime/blob/master/docs/design/coreclr/jit/ryujit-overview.md // // Also called for inlinees, though they will only be run through the first few phases. // @@ -5287,10 +5291,6 @@ int Compiler::compCompile(CORINFO_MODULE_HANDLE classPtr, verbose = compIsForInlining() ? impInlineInfo->InlinerCompiler->verbose : false; #endif -#if defined(DEBUG) || defined(INLINE_DATA) - info.compMethodHashPrivate = 0; -#endif // defined(DEBUG) || defined(INLINE_DATA) - #if FUNC_INFO_LOGGING LPCWSTR tmpJitFuncInfoFilename = JitConfig.JitFuncInfoFile(); @@ -9285,3 +9285,24 @@ bool Compiler::lvaIsOSRLocal(unsigned varNum) return false; } + +//------------------------------------------------------------------------------ +// gtChangeOperToNullCheck: helper to change tree oper to a NULLCHECK. +// +// Arguments: +// tree - the node to change; +// basicBlock - basic block of the node. +// +// Notes: +// the function should not be called after lowering for platforms that do not support +// emitting NULLCHECK nodes, like arm32. Use `Lowering::TransformUnusedIndirection` +// that handles it and calls this function when appropriate. +// +void Compiler::gtChangeOperToNullCheck(GenTree* tree, BasicBlock* block) +{ + assert(tree->OperIs(GT_FIELD, GT_IND, GT_OBJ, GT_BLK, GT_DYN_BLK)); + tree->ChangeOper(GT_NULLCHECK); + tree->ChangeType(TYP_INT); + block->bbFlags |= BBF_HAS_NULLCHECK; + optMethodFlags |= OMF_HAS_NULLCHECK; +} diff --git a/src/coreclr/src/jit/compiler.h b/src/coreclr/src/jit/compiler.h index c5f571838d4b..e94dc8ad4c9a 100644 --- a/src/coreclr/src/jit/compiler.h +++ b/src/coreclr/src/jit/compiler.h @@ -476,6 +476,8 @@ class LclVarDsc unsigned char lvContainsHoles : 1; // True when we have a promoted struct that contains holes unsigned char lvCustomLayout : 1; // True when this struct has "CustomLayout" + unsigned char lvForceLoadNormalize : 1; // True when this local had a cast on the LHS of an assignment + unsigned char lvIsMultiRegArg : 1; // true if this is a multireg LclVar struct used in an argument context unsigned char lvIsMultiRegRet : 1; // true if this is a multireg LclVar struct assigned from a multireg call @@ -884,14 +886,14 @@ class LclVarDsc { return varTypeIsSmall(TypeGet()) && // lvIsStructField is treated the same as the aliased local, see fgDoNormalizeOnStore. - (lvIsParam || lvAddrExposed || lvIsStructField); + (lvIsParam || lvAddrExposed || lvIsStructField || lvForceLoadNormalize); } bool lvNormalizeOnStore() const { return varTypeIsSmall(TypeGet()) && // lvIsStructField is treated the same as the aliased local, see fgDoNormalizeOnStore. - !(lvIsParam || lvAddrExposed || lvIsStructField); + !(lvIsParam || lvAddrExposed || lvIsStructField || lvForceLoadNormalize); } void incRefCnts(BasicBlock::weight_t weight, @@ -2701,6 +2703,8 @@ class Compiler GenTree* gtNewNullCheck(GenTree* addr, BasicBlock* basicBlock); + void gtChangeOperToNullCheck(GenTree* tree, BasicBlock* block); + GenTreeArgList* gtNewArgList(GenTree* op); GenTreeArgList* gtNewArgList(GenTree* op1, GenTree* op2); GenTreeArgList* gtNewArgList(GenTree* op1, GenTree* op2, GenTree* op3); @@ -3726,6 +3730,7 @@ class Compiler void impImportLeave(BasicBlock* block); void impResetLeaveBlock(BasicBlock* block, unsigned jmpAddr); + GenTree* impTypeIsAssignable(GenTree* typeTo, GenTree* typeFrom); GenTree* impIntrinsic(GenTree* newobjThis, CORINFO_CLASS_HANDLE clsHnd, CORINFO_METHOD_HANDLE method, @@ -3741,7 +3746,7 @@ class Compiler GenTree* impMathIntrinsic(CORINFO_METHOD_HANDLE method, CORINFO_SIG_INFO* sig, var_types callType, - CorInfoIntrinsics intrinsicID, + NamedIntrinsic intrinsicName, bool tailCall); NamedIntrinsic lookupNamedIntrinsic(CORINFO_METHOD_HANDLE method); GenTree* impUnsupportedNamedIntrinsic(unsigned helper, @@ -3937,9 +3942,9 @@ class Compiler bool VarTypeIsMultiByteAndCanEnreg( var_types type, CORINFO_CLASS_HANDLE typeClass, unsigned* typeSize, bool forReturn, bool isVarArg); - bool IsIntrinsicImplementedByUserCall(CorInfoIntrinsics intrinsicId); - bool IsTargetIntrinsic(CorInfoIntrinsics intrinsicId); - bool IsMathIntrinsic(CorInfoIntrinsics intrinsicId); + bool IsIntrinsicImplementedByUserCall(NamedIntrinsic intrinsicName); + bool IsTargetIntrinsic(NamedIntrinsic intrinsicName); + bool IsMathIntrinsic(NamedIntrinsic intrinsicName); bool IsMathIntrinsic(GenTree* tree); private: @@ -4633,6 +4638,9 @@ class Compiler void fgComputeLifeLIR(VARSET_TP& life, BasicBlock* block, VARSET_VALARG_TP volatileVars); + bool fgTryRemoveNonLocal(GenTree* node, LIR::Range* blockRange); + + void fgRemoveDeadStoreLIR(GenTree* store, BasicBlock* block); bool fgRemoveDeadStore(GenTree** pTree, LclVarDsc* varDsc, VARSET_VALARG_TP life, @@ -4964,7 +4972,7 @@ class Compiler // When the flow graph changes, we need to update the block numbers, predecessor lists, reachability sets, and // dominators. - void fgUpdateChangedFlowGraph(); + void fgUpdateChangedFlowGraph(bool computeDoms = true); public: // Compute the predecessors of the blocks in the control flow graph. @@ -5571,7 +5579,6 @@ class Compiler GenTree* fgCreateCallDispatcherAndGetResult(GenTreeCall* origCall, CORINFO_METHOD_HANDLE callTargetStubHnd, CORINFO_METHOD_HANDLE dispatcherHnd); - GenTree* getMethodPointerTree(CORINFO_RESOLVED_TOKEN* pResolvedToken, CORINFO_CALL_INFO* pCallInfo); GenTree* getLookupTree(CORINFO_RESOLVED_TOKEN* pResolvedToken, CORINFO_LOOKUP* pLookup, unsigned handleFlags, @@ -6326,11 +6333,12 @@ class Compiler struct CSEdsc { - CSEdsc* csdNextInBucket; // used by the hash table - - unsigned csdHashKey; // the orginal hashkey - - unsigned csdIndex; // 1..optCSECandidateCount + CSEdsc* csdNextInBucket; // used by the hash table + size_t csdHashKey; // the orginal hashkey + ssize_t csdConstDefValue; // When we CSE similar constants, this is the value that we use as the def + ValueNum csdConstDefVN; // When we CSE similar constants, this is the ValueNumber that we use for the LclVar + // assignment + unsigned csdIndex; // 1..optCSECandidateCount bool csdLiveAcrossCall; unsigned short csdDefCount; // definition count @@ -6359,9 +6367,15 @@ class Compiler ValueNum defConservNormVN; // if all def occurrences share the same conservative normal value // number, this will reflect it; otherwise, NoVN. + // not used for shared const CSE's }; - static const size_t s_optCSEhashSize; + static const size_t s_optCSEhashSizeInitial; + static const size_t s_optCSEhashGrowthFactor; + static const size_t s_optCSEhashBucketSize; + size_t optCSEhashSize; // The current size of hashtable + size_t optCSEhashCount; // Number of entries in hashtable + size_t optCSEhashMaxCountBeforeResize; // Number of entries before resize CSEdsc** optCSEhash; CSEdsc** optCSEtab; @@ -6406,6 +6420,16 @@ class Compiler void optEnsureClearCSEInfo(); #endif // DEBUG + static bool Is_Shared_Const_CSE(size_t key) + { + return ((key & TARGET_SIGN_BIT) != 0); + } + + static size_t Decode_Shared_Const_CSE_Value(size_t key) + { + return (key & ~TARGET_SIGN_BIT) << CSE_CONST_SHARED_LOW_BITS; + } + #endif // FEATURE_ANYCSE #if FEATURE_VALNUM_CSE @@ -6487,7 +6511,7 @@ class Compiler void optCopyProp(BasicBlock* block, Statement* stmt, GenTree* tree, LclNumToGenTreePtrStack* curSsaName); void optBlockCopyPropPopStacks(BasicBlock* block, LclNumToGenTreePtrStack* curSsaName); void optBlockCopyProp(BasicBlock* block, LclNumToGenTreePtrStack* curSsaName); - bool optIsSsaLocal(GenTree* tree); + unsigned optIsSsaLocal(GenTree* tree); int optCopyProp_LclVarScore(LclVarDsc* lclVarDsc, LclVarDsc* copyVarDsc, bool preferOp2); void optVnCopyProp(); INDEBUG(void optDumpCopyPropStack(LclNumToGenTreePtrStack* curSsaName)); @@ -8190,7 +8214,7 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX #if defined(TARGET_XARCH) if (getSIMDSupportLevel() == SIMD_AVX2_Supported) { - return TYP_SIMD32; + return JitConfig.EnableHWIntrinsic() ? TYP_SIMD32 : TYP_SIMD16; } else { @@ -8231,7 +8255,7 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX #if defined(TARGET_XARCH) if (getSIMDSupportLevel() == SIMD_AVX2_Supported) { - return YMM_REGSIZE_BYTES; + return JitConfig.EnableHWIntrinsic() ? YMM_REGSIZE_BYTES : XMM_REGSIZE_BYTES; } else { @@ -8261,7 +8285,7 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX #if defined(FEATURE_HW_INTRINSICS) && defined(TARGET_XARCH) if (compOpportunisticallyDependsOn(InstructionSet_AVX)) { - return YMM_REGSIZE_BYTES; + return JitConfig.EnableHWIntrinsic() ? YMM_REGSIZE_BYTES : XMM_REGSIZE_BYTES; } else { @@ -8802,8 +8826,7 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX bool doLateDisasm; // Run the late disassembler #endif // LATE_DISASM -#if DUMP_GC_TABLES && !defined(DEBUG) && defined(JIT32_GCENCODER) -// Only the JIT32_GCENCODER implements GC dumping in non-DEBUG code. +#if DUMP_GC_TABLES && !defined(DEBUG) #pragma message("NOTE: this non-debug build has GC ptr table dumping always enabled!") static const bool dspGCtbls = true; #endif @@ -9001,7 +9024,7 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX { #if 0 // Switching between size & speed has measurable throughput impact - // (3.5% on NGen mscorlib when measured). It used to be enabled for + // (3.5% on NGen CoreLib when measured). It used to be enabled for // DEBUG, but should generate identical code between CHK & RET builds, // so that's not acceptable. // TODO-Throughput: Figure out what to do about size vs. speed & throughput. @@ -9067,7 +9090,7 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX bool compIsVarArgs : 1; // Does the method have varargs parameters? bool compInitMem : 1; // Is the CORINFO_OPT_INIT_LOCALS bit set in the method info options? bool compProfilerCallback : 1; // JIT inserted a profiler Enter callback - bool compPublishStubParam : 1; // EAX captured in prolog will be available through an instrinsic + bool compPublishStubParam : 1; // EAX captured in prolog will be available through an intrinsic bool compRetBuffDefStack : 1; // The ret buff argument definitely points into the stack. bool compHasNextCallRetAddr : 1; // The NextCallReturnAddress intrinsic is used. diff --git a/src/coreclr/src/jit/compiler.hpp b/src/coreclr/src/jit/compiler.hpp index c12bacd94d7a..ca3ed3c6fc66 100644 --- a/src/coreclr/src/jit/compiler.hpp +++ b/src/coreclr/src/jit/compiler.hpp @@ -3169,6 +3169,7 @@ inline regMaskTP genIntAllRegArgMask(unsigned numRegs) inline regMaskTP genFltAllRegArgMask(unsigned numRegs) { +#ifndef TARGET_X86 assert(numRegs <= MAX_FLOAT_REG_ARG); regMaskTP result = RBM_NONE; @@ -3177,6 +3178,10 @@ inline regMaskTP genFltAllRegArgMask(unsigned numRegs) result |= fltArgMasks[i]; } return result; +#else + assert(!"no x86 float arg regs\n"); + return RBM_NONE; +#endif } /* diff --git a/src/coreclr/src/jit/copyprop.cpp b/src/coreclr/src/jit/copyprop.cpp index 295fcb8cb8a7..ca01e90250f6 100644 --- a/src/coreclr/src/jit/copyprop.cpp +++ b/src/coreclr/src/jit/copyprop.cpp @@ -36,8 +36,8 @@ void Compiler::optBlockCopyPropPopStacks(BasicBlock* block, LclNumToGenTreePtrSt { continue; } - unsigned lclNum = tree->AsLclVarCommon()->GetLclNum(); - if (!lvaInSsa(lclNum)) + const unsigned lclNum = optIsSsaLocal(tree); + if (lclNum == BAD_VAR_NUM) { continue; } @@ -61,8 +61,19 @@ void Compiler::optDumpCopyPropStack(LclNumToGenTreePtrStack* curSsaName) JITDUMP("{ "); for (LclNumToGenTreePtrStack::KeyIterator iter = curSsaName->Begin(); !iter.Equal(curSsaName->End()); ++iter) { - GenTree* node = iter.GetValue()->Top(); - JITDUMP("%d-[%06d]:V%02u ", iter.Get(), dspTreeID(node), node->AsLclVarCommon()->GetLclNum()); + GenTreeLclVarCommon* lclVar = iter.GetValue()->Top()->AsLclVarCommon(); + unsigned ssaLclNum = optIsSsaLocal(lclVar); + assert(ssaLclNum != BAD_VAR_NUM); + + if (ssaLclNum == lclVar->GetLclNum()) + { + JITDUMP("%d-[%06d]:V%02u ", iter.Get(), dspTreeID(lclVar), ssaLclNum); + } + else + { + // A promoted field was asigned using the parent struct, print `ssa field lclNum(parent lclNum)`. + JITDUMP("%d-[%06d]:V%02u(V%02u) ", iter.Get(), dspTreeID(lclVar), ssaLclNum, lclVar->GetLclNum()); + } } JITDUMP("}\n\n"); } @@ -150,10 +161,10 @@ void Compiler::optCopyProp(BasicBlock* block, Statement* stmt, GenTree* tree, Lc { return; } - unsigned lclNum = tree->AsLclVarCommon()->GetLclNum(); + const unsigned lclNum = optIsSsaLocal(tree); // Skip non-SSA variables. - if (!lvaInSsa(lclNum)) + if (lclNum == BAD_VAR_NUM) { return; } @@ -291,13 +302,39 @@ void Compiler::optCopyProp(BasicBlock* block, Statement* stmt, GenTree* tree, Lc return; } -/************************************************************************************** - * - * Helper to check if tree is a local that participates in SSA numbering. - */ -bool Compiler::optIsSsaLocal(GenTree* tree) +//------------------------------------------------------------------------------ +// optIsSsaLocal : helper to check if the tree is a local that participates in SSA numbering. +// +// Arguments: +// tree - The tree to perform the check on; +// +// Returns: +// - lclNum if the local is participating in SSA; +// - fieldLclNum if the parent local can be replaced by its only field; +// - BAD_VAR_NUM otherwise. +// +unsigned Compiler::optIsSsaLocal(GenTree* tree) { - return tree->IsLocal() && lvaInSsa(tree->AsLclVarCommon()->GetLclNum()); + if (!tree->IsLocal()) + { + return BAD_VAR_NUM; + } + + GenTreeLclVarCommon* lclNode = tree->AsLclVarCommon(); + unsigned lclNum = lclNode->GetLclNum(); + LclVarDsc* varDsc = lvaGetDesc(lclNum); + + if (!lvaInSsa(lclNum) && varDsc->CanBeReplacedWithItsField(this)) + { + lclNum = varDsc->lvFieldLclStart; + } + + if (!lvaInSsa(lclNum)) + { + return BAD_VAR_NUM; + } + + return lclNum; } //------------------------------------------------------------------------------ @@ -351,22 +388,22 @@ void Compiler::optBlockCopyProp(BasicBlock* block, LclNumToGenTreePtrStack* curS // embedded update. Killing the variable is a simplification to produce 0 ASM diffs // for an update release. // - if (optIsSsaLocal(tree) && (tree->gtFlags & GTF_VAR_DEF)) + const unsigned lclNum = optIsSsaLocal(tree); + if ((lclNum != BAD_VAR_NUM) && (tree->gtFlags & GTF_VAR_DEF)) { - VarSetOps::AddElemD(this, optCopyPropKillSet, lvaTable[tree->AsLclVarCommon()->GetLclNum()].lvVarIndex); + VarSetOps::AddElemD(this, optCopyPropKillSet, lvaTable[lclNum].lvVarIndex); } } // This logic must be in sync with SSA renaming process. for (GenTree* tree = stmt->GetTreeList(); tree != nullptr; tree = tree->gtNext) { - if (!optIsSsaLocal(tree)) + const unsigned lclNum = optIsSsaLocal(tree); + if (lclNum == BAD_VAR_NUM) { continue; } - unsigned lclNum = tree->AsLclVarCommon()->GetLclNum(); - // As we encounter a definition add it to the stack as a live definition. if (tree->gtFlags & GTF_VAR_DEF) { diff --git a/src/coreclr/src/jit/ee_il_dll.cpp b/src/coreclr/src/jit/ee_il_dll.cpp index 150efed411f6..bfd80ffb1200 100644 --- a/src/coreclr/src/jit/ee_il_dll.cpp +++ b/src/coreclr/src/jit/ee_il_dll.cpp @@ -316,7 +316,8 @@ unsigned CILJit::getMaxIntrinsicSIMDVectorLength(CORJIT_FLAGS cpuCompileFlags) // ensure that AVX2 is actually supported. Otherwise, we will end up getting asserts downstream. if ((JitConfig.EnableAVX2() != 0) && (JitConfig.EnableAVX() != 0) && (JitConfig.EnableSSE42() != 0) && (JitConfig.EnableSSE41() != 0) && (JitConfig.EnableSSSE3() != 0) && (JitConfig.EnableSSE3_4() != 0) && - (JitConfig.EnableSSE3() != 0) && (JitConfig.EnableSSE2() != 0) && (JitConfig.EnableSSE() != 0)) + (JitConfig.EnableSSE3() != 0) && (JitConfig.EnableSSE2() != 0) && (JitConfig.EnableSSE() != 0) && + (JitConfig.EnableHWIntrinsic() != 0)) { if (GetJitTls() != nullptr && JitTls::GetCompiler() != nullptr) { diff --git a/src/coreclr/src/jit/emit.cpp b/src/coreclr/src/jit/emit.cpp index cc12dabc5492..7a957b17f91e 100644 --- a/src/coreclr/src/jit/emit.cpp +++ b/src/coreclr/src/jit/emit.cpp @@ -3867,7 +3867,7 @@ void emitter::emitJumpDistBind() { if (tgtIG) { - printf("to G_M%03u_IG%02u\n", emitComp->compMethodID, tgtIG->igNum); + printf(" to G_M%03u_IG%02u\n", emitComp->compMethodID, tgtIG->igNum); } else { @@ -7692,7 +7692,12 @@ regMaskTP emitter::emitGetGCRegsKilledByNoGCCall(CorInfoHelpFunc helper) break; case CORINFO_HELP_PROF_FCN_LEAVE: +#if defined(TARGET_ARM) + // profiler scratch remains gc live + result = RBM_PROFILER_LEAVE_TRASH & ~RBM_PROFILER_RET_SCRATCH; +#else result = RBM_PROFILER_LEAVE_TRASH; +#endif break; case CORINFO_HELP_PROF_FCN_TAILCALL: diff --git a/src/coreclr/src/jit/emit.h b/src/coreclr/src/jit/emit.h index 2c7f0b073b5d..e9b428928299 100644 --- a/src/coreclr/src/jit/emit.h +++ b/src/coreclr/src/jit/emit.h @@ -870,7 +870,7 @@ class emitter // return value more than 15 that doesn't fit in 4 bits _idCodeSize. // If somehow we generate instruction that needs more than 15 bytes we // will fail on another assert in emit.cpp: noway_assert(id->idCodeSize() >= csz). - // Issue https://github.com/dotnet/coreclr/issues/25050. + // Issue https://github.com/dotnet/runtime/issues/12840. sz = 15; } assert(sz <= 15); // Intel decoder limit. diff --git a/src/coreclr/src/jit/emitxarch.cpp b/src/coreclr/src/jit/emitxarch.cpp index 1938efb9ed44..f21b1170eb83 100644 --- a/src/coreclr/src/jit/emitxarch.cpp +++ b/src/coreclr/src/jit/emitxarch.cpp @@ -163,8 +163,10 @@ bool emitter::IsDstSrcSrcAVXInstruction(instruction ins) bool emitter::AreUpper32BitsZero(regNumber reg) { - // Don't look back across IG boundaries (possible control flow) - if (emitCurIGinsCnt == 0) + // If there are no instructions in this IG, we can look back at + // the previous IG's instructions if this IG is an extension. + // + if ((emitCurIGinsCnt == 0) && ((emitCurIG->igFlags & IGF_EXTEND) == 0)) { return false; } @@ -2956,24 +2958,27 @@ void emitter::emitHandleMemOp(GenTreeIndir* indir, instrDesc* id, insFormat fmt, } else { + regNumber amBaseReg = REG_NA; if (memBase != nullptr) { - id->idAddr()->iiaAddrMode.amBaseReg = memBase->GetRegNum(); - } - else - { - id->idAddr()->iiaAddrMode.amBaseReg = REG_NA; + assert(!memBase->isContained()); + amBaseReg = memBase->GetRegNum(); + assert(amBaseReg != REG_NA); } + regNumber amIndxReg = REG_NA; if (indir->HasIndex()) { - id->idAddr()->iiaAddrMode.amIndxReg = indir->Index()->GetRegNum(); + GenTree* index = indir->Index(); + assert(!index->isContained()); + amIndxReg = index->GetRegNum(); + assert(amIndxReg != REG_NA); } - else - { - id->idAddr()->iiaAddrMode.amIndxReg = REG_NA; - } - id->idAddr()->iiaAddrMode.amScale = emitEncodeScale(indir->Scale()); + + assert((amBaseReg != REG_NA) || (amIndxReg != REG_NA) || (indir->Offset() != 0)); // At least one should be set. + id->idAddr()->iiaAddrMode.amBaseReg = amBaseReg; + id->idAddr()->iiaAddrMode.amIndxReg = amIndxReg; + id->idAddr()->iiaAddrMode.amScale = emitEncodeScale(indir->Scale()); id->idInsFmt(emitMapFmtForIns(fmt, ins)); @@ -3043,11 +3048,7 @@ void emitter::emitInsLoadInd(instruction ins, emitAttr attr, regNumber dstReg, G if (addr->OperIs(GT_LCL_VAR_ADDR, GT_LCL_FLD_ADDR)) { GenTreeLclVarCommon* varNode = addr->AsLclVarCommon(); - unsigned offset = 0; - if (addr->OperIs(GT_LCL_FLD_ADDR)) - { - offset = varNode->AsLclFld()->GetLclOffs(); - } + unsigned offset = varNode->GetLclOffs(); emitIns_R_S(ins, attr, dstReg, varNode->GetLclNum(), offset); // Updating variable liveness after instruction was emitted @@ -3288,9 +3289,11 @@ regNumber emitter::emitInsBinary(instruction ins, emitAttr attr, GenTree* dst, G switch (memBase->OperGet()) { case GT_LCL_VAR_ADDR: + case GT_LCL_FLD_ADDR: { + assert(memBase->isContained()); varNum = memBase->AsLclVarCommon()->GetLclNum(); - offset = 0; + offset = memBase->AsLclVarCommon()->GetLclOffs(); // Ensure that all the GenTreeIndir values are set to their defaults. assert(!memIndir->HasIndex()); @@ -3601,8 +3604,7 @@ void emitter::emitInsRMW(instruction ins, emitAttr attr, GenTreeStoreInd* storeI { GenTree* addr = storeInd->Addr(); addr = addr->gtSkipReloadOrCopy(); - assert(addr->OperGet() == GT_LCL_VAR || addr->OperGet() == GT_LCL_VAR_ADDR || addr->OperGet() == GT_LEA || - addr->OperGet() == GT_CLS_VAR_ADDR || addr->OperGet() == GT_CNS_INT); + assert(addr->OperIs(GT_LCL_VAR, GT_LCL_VAR_ADDR, GT_LEA, GT_CLS_VAR_ADDR, GT_CNS_INT)); instrDesc* id = nullptr; UNATIVE_OFFSET sz; @@ -3681,8 +3683,7 @@ void emitter::emitInsRMW(instruction ins, emitAttr attr, GenTreeStoreInd* storeI { GenTree* addr = storeInd->Addr(); addr = addr->gtSkipReloadOrCopy(); - assert(addr->OperGet() == GT_LCL_VAR || addr->OperGet() == GT_LCL_VAR_ADDR || addr->OperGet() == GT_CLS_VAR_ADDR || - addr->OperGet() == GT_LEA || addr->OperGet() == GT_CNS_INT); + assert(addr->OperIs(GT_LCL_VAR, GT_LCL_VAR_ADDR, GT_CLS_VAR_ADDR, GT_LEA, GT_CNS_INT)); ssize_t offset = 0; if (addr->OperGet() != GT_CLS_VAR_ADDR) diff --git a/src/coreclr/src/jit/flowgraph.cpp b/src/coreclr/src/jit/flowgraph.cpp index 65c9af9e8f2c..48f3273e38ea 100644 --- a/src/coreclr/src/jit/flowgraph.cpp +++ b/src/coreclr/src/jit/flowgraph.cpp @@ -175,6 +175,8 @@ void Compiler::fgInit() #ifdef FEATURE_SIMD fgPreviousCandidateSIMDFieldAsgStmt = nullptr; #endif + + fgHasSwitch = false; } bool Compiler::fgHaveProfileData() @@ -476,7 +478,8 @@ void Compiler::fgEnsureFirstBBisScratch() noway_assert(fgLastBB != nullptr); - block->bbFlags |= (BBF_INTERNAL | BBF_IMPORTED); + // Set the expected flags + block->bbFlags |= (BBF_INTERNAL | BBF_IMPORTED | BBF_JMP_TARGET | BBF_HAS_LABEL); // This new first BB has an implicit ref, and no others. block->bbRefs = 1; @@ -1865,7 +1868,7 @@ bool Compiler::fgReachable(BasicBlock* b1, BasicBlock* b2) * it again. */ -void Compiler::fgUpdateChangedFlowGraph() +void Compiler::fgUpdateChangedFlowGraph(bool computeDoms) { // We need to clear this so we don't hit an assert calling fgRenumberBlocks(). fgDomsComputed = false; @@ -1876,7 +1879,10 @@ void Compiler::fgUpdateChangedFlowGraph() fgComputePreds(); fgComputeEnterBlocksSet(); fgComputeReachabilitySets(); - fgComputeDoms(); + if (computeDoms) + { + fgComputeDoms(); + } } /***************************************************************************** @@ -3699,6 +3705,18 @@ PhaseStatus Compiler::fgInsertGCPolls() // We don't want to deal with all the outgoing edges of a switch block. pollType = GCPOLL_CALL; } + else if ((block->bbFlags & BBF_COLD) != 0) + { +#ifdef DEBUG + if (verbose) + { + printf("Selecting CALL poll in block " FMT_BB " because it is a cold block\n", block->bbNum); + } +#endif // DEBUG + + // We don't want to split a cold block. + pollType = GCPOLL_CALL; + } BasicBlock* curBasicBlock = fgCreateGCPoll(pollType, block); createdPollBlocks |= (block != curBasicBlock); @@ -3711,7 +3729,8 @@ PhaseStatus Compiler::fgInsertGCPolls() { noway_assert(opts.OptimizationEnabled()); fgReorderBlocks(); - fgUpdateChangedFlowGraph(); + constexpr bool computeDoms = false; + fgUpdateChangedFlowGraph(computeDoms); } #ifdef DEBUG if (verbose) @@ -4141,9 +4160,12 @@ BasicBlock* Compiler::fgCreateGCPoll(GCPollType pollType, BasicBlock* block) // We are allowed to split loops and we need to keep a few other flags... // - noway_assert((originalFlags & (BBF_SPLIT_NONEXIST & ~(BBF_LOOP_HEAD | BBF_LOOP_CALL0 | BBF_LOOP_CALL1))) == 0); - top->bbFlags = originalFlags & (~BBF_SPLIT_LOST | BBF_GC_SAFE_POINT); - bottom->bbFlags |= originalFlags & (BBF_SPLIT_GAINED | BBF_IMPORTED | BBF_GC_SAFE_POINT); + noway_assert((originalFlags & (BBF_SPLIT_NONEXIST & + ~(BBF_LOOP_HEAD | BBF_LOOP_CALL0 | BBF_LOOP_CALL1 | BBF_LOOP_PREHEADER | + BBF_RETLESS_CALL))) == 0); + top->bbFlags = originalFlags & (~(BBF_SPLIT_LOST | BBF_LOOP_PREHEADER | BBF_RETLESS_CALL) | BBF_GC_SAFE_POINT); + bottom->bbFlags |= originalFlags & (BBF_SPLIT_GAINED | BBF_IMPORTED | BBF_GC_SAFE_POINT | BBF_LOOP_PREHEADER | + BBF_RETLESS_CALL); bottom->inheritWeight(top); poll->bbFlags |= originalFlags & (BBF_SPLIT_GAINED | BBF_IMPORTED | BBF_GC_SAFE_POINT); @@ -7236,7 +7258,7 @@ bool Compiler::fgIsThrow(GenTree* tree) return true; } - // TODO-CQ: there are a bunch of managed methods in [mscorlib]System.ThrowHelper + // TODO-CQ: there are a bunch of managed methods in System.ThrowHelper // that would be nice to recognize. return false; @@ -7379,6 +7401,7 @@ GenTreeCall* Compiler::fgGetStaticsCCtorHelper(CORINFO_CLASS_HANDLE cls, CorInfo case CORINFO_HELP_GETSHARED_NONGCSTATIC_BASE_DYNAMICCLASS: case CORINFO_HELP_GETSHARED_GCTHREADSTATIC_BASE: case CORINFO_HELP_GETSHARED_GCTHREADSTATIC_BASE_DYNAMICCLASS: + case CORINFO_HELP_GETSHARED_NONGCTHREADSTATIC_BASE_DYNAMICCLASS: // type = TYP_BYREF; break; @@ -7392,7 +7415,6 @@ GenTreeCall* Compiler::fgGetStaticsCCtorHelper(CORINFO_CLASS_HANDLE cls, CorInfo case CORINFO_HELP_GETSHARED_NONGCSTATIC_BASE: case CORINFO_HELP_GETSHARED_NONGCTHREADSTATIC_BASE: - case CORINFO_HELP_GETSHARED_NONGCTHREADSTATIC_BASE_DYNAMICCLASS: case CORINFO_HELP_CLASSINIT_SHARED_DYNAMICCLASS: type = TYP_I_IMPL; break; @@ -14682,9 +14704,9 @@ bool Compiler::fgOptimizeSwitchBranches(BasicBlock* block) LIR::ReadOnlyRange range(zeroConstNode, switchTree); m_pLowering->LowerRange(block, range); } - else + else if (fgStmtListThreaded) { - // Re-link the nodes for this statement. + gtSetStmtInfo(switchStmt); fgSetStmtSeq(switchStmt); } diff --git a/src/coreclr/src/jit/gcencode.cpp b/src/coreclr/src/jit/gcencode.cpp index 3c45137b0593..81edb0b0d29e 100644 --- a/src/coreclr/src/jit/gcencode.cpp +++ b/src/coreclr/src/jit/gcencode.cpp @@ -3633,7 +3633,10 @@ void GCInfo::gcFindPtrsInFrame(const void* infoBlock, const void* codeBlock, uns template class JitHashTable; template class JitHashTable; -#ifdef DEBUG +#if defined(DEBUG) || DUMP_GC_TABLES + +// This is a copy of GcStackSlotBaseNames from gcinfotypes.h so we can compile in to non-DEBUG builds. +const char* const JitGcStackSlotBaseNames[] = {"caller.sp", "sp", "frame"}; static const char* const GcSlotFlagsNames[] = {"", "(byref) ", @@ -3652,7 +3655,7 @@ class GcInfoEncoderWithLogging public: GcInfoEncoderWithLogging(GcInfoEncoder* gcInfoEncoder, bool verbose) - : m_gcInfoEncoder(gcInfoEncoder), m_doLogging(verbose || JitConfig.JitGCInfoLogging() != 0) + : m_gcInfoEncoder(gcInfoEncoder), m_doLogging(verbose INDEBUG(|| JitConfig.JitGCInfoLogging() != 0)) { } @@ -3662,7 +3665,7 @@ class GcInfoEncoderWithLogging if (m_doLogging) { printf("Stack slot id for offset %d (0x%x) (%s) %s= %d.\n", spOffset, spOffset, - GcStackSlotBaseNames[spBase], GcSlotFlagsNames[flags & 7], newSlotId); + JitGcStackSlotBaseNames[spBase], GcSlotFlagsNames[flags & 7], newSlotId); } return newSlotId; } @@ -3827,14 +3830,14 @@ class GcInfoEncoderWithLogging }; #define GCENCODER_WITH_LOGGING(withLog, realEncoder) \ - GcInfoEncoderWithLogging withLog##Var(realEncoder, compiler->verbose || compiler->opts.dspGCtbls); \ + GcInfoEncoderWithLogging withLog##Var(realEncoder, INDEBUG(compiler->verbose ||) compiler->opts.dspGCtbls); \ GcInfoEncoderWithLogging* withLog = &withLog##Var; -#else // DEBUG +#else // !(defined(DEBUG) || DUMP_GC_TABLES) #define GCENCODER_WITH_LOGGING(withLog, realEncoder) GcInfoEncoder* withLog = realEncoder; -#endif // DEBUG +#endif // !(defined(DEBUG) || DUMP_GC_TABLES) void GCInfo::gcInfoBlockHdrSave(GcInfoEncoder* gcInfoEncoder, unsigned methodSize, unsigned prologSize) { @@ -4006,7 +4009,7 @@ void GCInfo::gcInfoBlockHdrSave(GcInfoEncoder* gcInfoEncoder, unsigned methodSiz #endif // DISPLAY_SIZES } -#ifdef DEBUG +#if defined(DEBUG) || DUMP_GC_TABLES #define Encoder GcInfoEncoderWithLogging #else #define Encoder GcInfoEncoder diff --git a/src/coreclr/src/jit/gentree.cpp b/src/coreclr/src/jit/gentree.cpp index 03b1d7ef87f1..3c1c1446e928 100644 --- a/src/coreclr/src/jit/gentree.cpp +++ b/src/coreclr/src/jit/gentree.cpp @@ -2482,7 +2482,7 @@ unsigned Compiler::gtSetListOrder(GenTree* list, bool isListCallArgs, bool callA // TODO: Do we have to compute costs differently for argument lists and // all other lists? - // https://github.com/dotnet/coreclr/issues/7095 + // https://github.com/dotnet/runtime/issues/6622 unsigned costSz = (isListCallArgs || (next == nullptr)) ? 0 : 1; unsigned costEx = (isListCallArgs || (next == nullptr)) ? 0 : 1; @@ -2514,7 +2514,7 @@ unsigned Compiler::gtSetListOrder(GenTree* list, bool isListCallArgs, bool callA // TODO: Do we have to compute levels differently for argument lists and // all other lists? - // https://github.com/dotnet/coreclr/issues/7095 + // https://github.com/dotnet/runtime/issues/6622 if (isListCallArgs) { if (level < lvl) @@ -3250,16 +3250,50 @@ unsigned Compiler::gtSetEvalOrder(GenTree* tree) switch (oper) { #ifdef TARGET_ARM - case GT_CNS_LNG: - costSz = 9; - costEx = 4; - goto COMMON_CNS; - case GT_CNS_STR: // Uses movw/movt - costSz = 7; - costEx = 3; + costSz = 8; + costEx = 2; + goto COMMON_CNS; + + case GT_CNS_LNG: + { + GenTreeIntConCommon* con = tree->AsIntConCommon(); + + INT64 lngVal = con->LngValue(); + INT32 loVal = (INT32)(lngVal & 0xffffffff); + INT32 hiVal = (INT32)(lngVal >> 32); + + if (lngVal == 0) + { + costSz = 1; + costEx = 1; + } + else + { + // Minimum of one instruction to setup hiVal, + // and one instruction to setup loVal + costSz = 4 + 4; + costEx = 1 + 1; + + if (!codeGen->validImmForInstr(INS_mov, (target_ssize_t)hiVal) && + !codeGen->validImmForInstr(INS_mvn, (target_ssize_t)hiVal)) + { + // Needs extra instruction: movw/movt + costSz += 4; + costEx += 1; + } + + if (!codeGen->validImmForInstr(INS_mov, (target_ssize_t)loVal) && + !codeGen->validImmForInstr(INS_mvn, (target_ssize_t)loVal)) + { + // Needs extra instruction: movw/movt + costSz += 4; + costEx += 1; + } + } goto COMMON_CNS; + } case GT_CNS_INT: { @@ -3267,61 +3301,87 @@ unsigned Compiler::gtSetEvalOrder(GenTree* tree) // applied to it. // Any constant that requires a reloc must use the movw/movt sequence // - GenTreeIntConCommon* con = tree->AsIntConCommon(); + GenTreeIntConCommon* con = tree->AsIntConCommon(); + INT32 conVal = con->IconValue(); - if (con->ImmedValNeedsReloc(this) || - !codeGen->validImmForInstr(INS_mov, (target_ssize_t)tree->AsIntCon()->gtIconVal)) + if (con->ImmedValNeedsReloc(this)) { - // Uses movw/movt - costSz = 7; - costEx = 3; + // Requires movw/movt + costSz = 8; + costEx = 2; } - else if (((unsigned)tree->AsIntCon()->gtIconVal) <= 0x00ff) + else if (codeGen->validImmForInstr(INS_add, (target_ssize_t)conVal)) { - // mov Rd, - costSz = 1; + // Typically included with parent oper + costSz = 2; costEx = 1; } - else + else if (codeGen->validImmForInstr(INS_mov, (target_ssize_t)conVal) && + codeGen->validImmForInstr(INS_mvn, (target_ssize_t)conVal)) { - // Uses movw/mvn - costSz = 3; + // Uses mov or mvn + costSz = 4; costEx = 1; } + else + { + // Needs movw/movt + costSz = 8; + costEx = 2; + } goto COMMON_CNS; } #elif defined TARGET_XARCH - case GT_CNS_LNG: - costSz = 10; - costEx = 3; - goto COMMON_CNS; - case GT_CNS_STR: +#ifdef TARGET_AMD64 + costSz = 10; + costEx = 2; +#else // TARGET_X86 costSz = 4; costEx = 1; +#endif goto COMMON_CNS; + case GT_CNS_LNG: case GT_CNS_INT: { + GenTreeIntConCommon* con = tree->AsIntConCommon(); + ssize_t conVal = (oper == GT_CNS_LNG) ? (ssize_t)con->LngValue() : con->IconValue(); + bool fitsInVal = true; + +#ifdef TARGET_X86 + if (oper == GT_CNS_LNG) + { + INT64 lngVal = con->LngValue(); + + conVal = (ssize_t)lngVal; // truncate to 32-bits + + fitsInVal = ((INT64)conVal == lngVal); + } +#endif // TARGET_X86 + // If the constant is a handle then it will need to have a relocation // applied to it. // - GenTreeIntConCommon* con = tree->AsIntConCommon(); - bool iconNeedsReloc = con->ImmedValNeedsReloc(this); - if (!iconNeedsReloc && con->FitsInI8()) + if (iconNeedsReloc) + { + costSz = 4; + costEx = 1; + } + else if (fitsInVal && GenTreeIntConCommon::FitsInI8(conVal)) { costSz = 1; costEx = 1; } -#if defined(TARGET_AMD64) - else if (iconNeedsReloc || !con->FitsInI32()) +#ifdef TARGET_AMD64 + else if (!GenTreeIntConCommon::FitsInI32(conVal)) { costSz = 10; - costEx = 3; + costEx = 2; } #endif // TARGET_AMD64 else @@ -3329,21 +3389,83 @@ unsigned Compiler::gtSetEvalOrder(GenTree* tree) costSz = 4; costEx = 1; } +#ifdef TARGET_X86 + if (oper == GT_CNS_LNG) + { + costSz += fitsInVal ? 1 : 4; + costEx += 1; + } +#endif // TARGET_X86 + goto COMMON_CNS; } #elif defined(TARGET_ARM64) - case GT_CNS_LNG: + case GT_CNS_STR: + case GT_CNS_LNG: case GT_CNS_INT: - // TODO-ARM64-NYI: Need cost estimates. - costSz = 1; - costEx = 1; + { + GenTreeIntConCommon* con = tree->AsIntConCommon(); + bool iconNeedsReloc = con->ImmedValNeedsReloc(this); + INT64 imm = con->LngValue(); + emitAttr size = EA_SIZE(emitActualTypeSize(tree)); + + if (iconNeedsReloc) + { + costSz = 8; + costEx = 2; + } + else if (emitter::emitIns_valid_imm_for_add(imm, size)) + { + costSz = 2; + costEx = 1; + } + else if (emitter::emitIns_valid_imm_for_mov(imm, size)) + { + costSz = 4; + costEx = 1; + } + else + { + // Arm64 allows any arbitrary 16-bit constant to be loaded into a register halfword + // There are three forms + // movk which loads into any halfword preserving the remaining halfwords + // movz which loads into any halfword zeroing the remaining halfwords + // movn which loads into any halfword zeroing the remaining halfwords then bitwise inverting + // the register + // In some cases it is preferable to use movn, because it has the side effect of filling the + // other halfwords + // with ones + + // Determine whether movn or movz will require the fewest instructions to populate the immediate + bool preferMovz = false; + bool preferMovn = false; + int instructionCount = 4; + + for (int i = (size == EA_8BYTE) ? 48 : 16; i >= 0; i -= 16) + { + if (!preferMovn && (uint16_t(imm >> i) == 0x0000)) + { + preferMovz = true; // by using a movk to start we can save one instruction + instructionCount--; + } + else if (!preferMovz && (uint16_t(imm >> i) == 0xffff)) + { + preferMovn = true; // by using a movn to start we can save one instruction + instructionCount--; + } + } + + costEx = instructionCount; + costSz = 4 * instructionCount; + } + } goto COMMON_CNS; #else - case GT_CNS_LNG: case GT_CNS_STR: + case GT_CNS_LNG: case GT_CNS_INT: #error "Unknown TARGET" #endif @@ -3502,6 +3624,8 @@ unsigned Compiler::gtSetEvalOrder(GenTree* tree) level = gtSetEvalOrder(op1); + GenTreeIntrinsic* intrinsic; + /* Special handling for some operators */ switch (oper) @@ -3563,54 +3687,82 @@ unsigned Compiler::gtSetEvalOrder(GenTree* tree) break; case GT_INTRINSIC: - // GT_INTRINSIC intrinsics Sin, Cos, Sqrt, Abs ... have higher costs. - // TODO: tune these costs target specific as some of these are - // target intrinsics and would cost less to generate code. - switch (tree->AsIntrinsic()->gtIntrinsicId) + intrinsic = tree->AsIntrinsic(); + if (intrinsic->gtIntrinsicId == CORINFO_INTRINSIC_Illegal) { - default: - assert(!"missing case for gtIntrinsicId"); - costEx = 12; - costSz = 12; - break; + // named intrinsic + assert(intrinsic->gtIntrinsicName != NI_Illegal); - case CORINFO_INTRINSIC_Sin: - case CORINFO_INTRINSIC_Cos: - case CORINFO_INTRINSIC_Sqrt: - case CORINFO_INTRINSIC_Cbrt: - case CORINFO_INTRINSIC_Cosh: - case CORINFO_INTRINSIC_Sinh: - case CORINFO_INTRINSIC_Tan: - case CORINFO_INTRINSIC_Tanh: - case CORINFO_INTRINSIC_Asin: - case CORINFO_INTRINSIC_Asinh: - case CORINFO_INTRINSIC_Acos: - case CORINFO_INTRINSIC_Acosh: - case CORINFO_INTRINSIC_Atan: - case CORINFO_INTRINSIC_Atanh: - case CORINFO_INTRINSIC_Atan2: - case CORINFO_INTRINSIC_Log10: - case CORINFO_INTRINSIC_Pow: - case CORINFO_INTRINSIC_Exp: - case CORINFO_INTRINSIC_Ceiling: - case CORINFO_INTRINSIC_Floor: - case CORINFO_INTRINSIC_Object_GetType: - // Giving intrinsics a large fixed execution cost is because we'd like to CSE - // them, even if they are implemented by calls. This is different from modeling - // user calls since we never CSE user calls. - costEx = 36; - costSz = 4; - break; + // GT_INTRINSIC intrinsics Sin, Cos, Sqrt, Abs ... have higher costs. + // TODO: tune these costs target specific as some of these are + // target intrinsics and would cost less to generate code. + switch (intrinsic->gtIntrinsicName) + { + default: + assert(!"missing case for gtIntrinsicName"); + costEx = 12; + costSz = 12; + break; - case CORINFO_INTRINSIC_Abs: - costEx = 5; - costSz = 15; - break; + case NI_System_Math_Sin: + case NI_System_Math_Cos: + case NI_System_Math_Sqrt: + case NI_System_Math_Cbrt: + case NI_System_Math_Cosh: + case NI_System_Math_Sinh: + case NI_System_Math_Tan: + case NI_System_Math_Tanh: + case NI_System_Math_Asin: + case NI_System_Math_Asinh: + case NI_System_Math_Acos: + case NI_System_Math_Acosh: + case NI_System_Math_Atan: + case NI_System_Math_Atanh: + case NI_System_Math_Atan2: + case NI_System_Math_Log10: + case NI_System_Math_Pow: + case NI_System_Math_Exp: + case NI_System_Math_Ceiling: + case NI_System_Math_Floor: + // Giving intrinsics a large fixed execution cost is because we'd like to CSE + // them, even if they are implemented by calls. This is different from modeling + // user calls since we never CSE user calls. + costEx = 36; + costSz = 4; + break; - case CORINFO_INTRINSIC_Round: - costEx = 3; - costSz = 4; - break; + case NI_System_Math_Abs: + costEx = 5; + costSz = 15; + break; + + case NI_System_Math_Round: + costEx = 3; + costSz = 4; + break; + } + } + else + { + // old style intrinsic + assert(intrinsic->gtIntrinsicName == NI_Illegal); + + switch (intrinsic->gtIntrinsicId) + { + default: + assert(!"missing case for gtIntrinsicId"); + costEx = 12; + costSz = 12; + break; + + case CORINFO_INTRINSIC_Object_GetType: + // Giving intrinsics a large fixed execution cost is because we'd like to CSE + // them, even if they are implemented by calls. This is different from modeling + // user calls since we never CSE user calls. + costEx = 36; + costSz = 4; + break; + } } level++; break; @@ -4093,10 +4245,10 @@ unsigned Compiler::gtSetEvalOrder(GenTree* tree) case GT_INTRINSIC: - switch (tree->AsIntrinsic()->gtIntrinsicId) + switch (tree->AsIntrinsic()->gtIntrinsicName) { - case CORINFO_INTRINSIC_Atan2: - case CORINFO_INTRINSIC_Pow: + case NI_System_Math_Atan2: + case NI_System_Math_Pow: // These math intrinsics are actually implemented by user calls. // Increase the Sethi 'complexity' by two to reflect the argument // register requirement. @@ -4153,7 +4305,7 @@ unsigned Compiler::gtSetEvalOrder(GenTree* tree) // so if possible it was set above. tryToSwap = false; } - else if ((oper == GT_INTRINSIC) && IsIntrinsicImplementedByUserCall(tree->AsIntrinsic()->gtIntrinsicId)) + else if ((oper == GT_INTRINSIC) && IsIntrinsicImplementedByUserCall(tree->AsIntrinsic()->gtIntrinsicName)) { // We do not swap operand execution order for intrinsics that are implemented by user calls // because of trickiness around ensuring the execution order does not change during rationalization. @@ -5355,7 +5507,7 @@ bool GenTree::OperRequiresCallFlag(Compiler* comp) return true; case GT_INTRINSIC: - return comp->IsIntrinsicImplementedByUserCall(this->AsIntrinsic()->gtIntrinsicId); + return comp->IsIntrinsicImplementedByUserCall(this->AsIntrinsic()->gtIntrinsicName); #if FEATURE_FIXED_OUT_ARGS && !defined(TARGET_64BIT) case GT_LSH: @@ -7577,7 +7729,8 @@ GenTree* Compiler::gtCloneExpr( case GT_INTRINSIC: copy = new (this, GT_INTRINSIC) GenTreeIntrinsic(tree->TypeGet(), tree->AsOp()->gtOp1, tree->AsOp()->gtOp2, - tree->AsIntrinsic()->gtIntrinsicId, tree->AsIntrinsic()->gtMethodHandle); + tree->AsIntrinsic()->gtIntrinsicId, tree->AsIntrinsic()->gtIntrinsicName, + tree->AsIntrinsic()->gtMethodHandle); #ifdef FEATURE_READYTORUN_COMPILER copy->AsIntrinsic()->gtEntryPoint = tree->AsIntrinsic()->gtEntryPoint; #endif @@ -11338,80 +11491,98 @@ void Compiler::gtDispTree(GenTree* tree, if (tree->gtOper == GT_INTRINSIC) { - switch (tree->AsIntrinsic()->gtIntrinsicId) + GenTreeIntrinsic* intrinsic = tree->AsIntrinsic(); + + if (intrinsic->gtIntrinsicId == CORINFO_INTRINSIC_Illegal) { - case CORINFO_INTRINSIC_Sin: - printf(" sin"); - break; - case CORINFO_INTRINSIC_Cos: - printf(" cos"); - break; - case CORINFO_INTRINSIC_Cbrt: - printf(" cbrt"); - break; - case CORINFO_INTRINSIC_Sqrt: - printf(" sqrt"); - break; - case CORINFO_INTRINSIC_Abs: - printf(" abs"); - break; - case CORINFO_INTRINSIC_Round: - printf(" round"); - break; - case CORINFO_INTRINSIC_Cosh: - printf(" cosh"); - break; - case CORINFO_INTRINSIC_Sinh: - printf(" sinh"); - break; - case CORINFO_INTRINSIC_Tan: - printf(" tan"); - break; - case CORINFO_INTRINSIC_Tanh: - printf(" tanh"); - break; - case CORINFO_INTRINSIC_Asin: - printf(" asin"); - break; - case CORINFO_INTRINSIC_Asinh: - printf(" asinh"); - break; - case CORINFO_INTRINSIC_Acos: - printf(" acos"); - break; - case CORINFO_INTRINSIC_Acosh: - printf(" acosh"); - break; - case CORINFO_INTRINSIC_Atan: - printf(" atan"); - break; - case CORINFO_INTRINSIC_Atan2: - printf(" atan2"); - break; - case CORINFO_INTRINSIC_Atanh: - printf(" atanh"); - break; - case CORINFO_INTRINSIC_Log10: - printf(" log10"); - break; - case CORINFO_INTRINSIC_Pow: - printf(" pow"); - break; - case CORINFO_INTRINSIC_Exp: - printf(" exp"); - break; - case CORINFO_INTRINSIC_Ceiling: - printf(" ceiling"); - break; - case CORINFO_INTRINSIC_Floor: - printf(" floor"); - break; - case CORINFO_INTRINSIC_Object_GetType: - printf(" objGetType"); - break; + // named intrinsic + assert(intrinsic->gtIntrinsicName != NI_Illegal); + switch (intrinsic->gtIntrinsicName) + { + case NI_System_Math_Sin: + printf(" sin"); + break; + case NI_System_Math_Cos: + printf(" cos"); + break; + case NI_System_Math_Cbrt: + printf(" cbrt"); + break; + case NI_System_Math_Sqrt: + printf(" sqrt"); + break; + case NI_System_Math_Abs: + printf(" abs"); + break; + case NI_System_Math_Round: + printf(" round"); + break; + case NI_System_Math_Cosh: + printf(" cosh"); + break; + case NI_System_Math_Sinh: + printf(" sinh"); + break; + case NI_System_Math_Tan: + printf(" tan"); + break; + case NI_System_Math_Tanh: + printf(" tanh"); + break; + case NI_System_Math_Asin: + printf(" asin"); + break; + case NI_System_Math_Asinh: + printf(" asinh"); + break; + case NI_System_Math_Acos: + printf(" acos"); + break; + case NI_System_Math_Acosh: + printf(" acosh"); + break; + case NI_System_Math_Atan: + printf(" atan"); + break; + case NI_System_Math_Atan2: + printf(" atan2"); + break; + case NI_System_Math_Atanh: + printf(" atanh"); + break; + case NI_System_Math_Log10: + printf(" log10"); + break; + case NI_System_Math_Pow: + printf(" pow"); + break; + case NI_System_Math_Exp: + printf(" exp"); + break; + case NI_System_Math_Ceiling: + printf(" ceiling"); + break; + case NI_System_Math_Floor: + printf(" floor"); + break; - default: - unreached(); + default: + unreached(); + } + } + else + { + // old style intrinsic + assert(intrinsic->gtIntrinsicName == NI_Illegal); + switch (intrinsic->gtIntrinsicId) + { + case CORINFO_INTRINSIC_Object_GetType: + printf(" objGetType"); + break; + + default: + unreached(); + } } } @@ -13611,16 +13782,13 @@ GenTree* Compiler::gtTryRemoveBoxUpstreamEffects(GenTree* op, BoxRemovalOptions // For struct types read the first byte of the // source struct; there's no need to read the // entire thing, and no place to put it. - assert(copySrc->gtOper == GT_OBJ || copySrc->gtOper == GT_IND || copySrc->gtOper == GT_FIELD); + assert(copySrc->OperIs(GT_OBJ, GT_IND, GT_FIELD)); copyStmt->SetRootNode(copySrc); if (options == BR_REMOVE_AND_NARROW || options == BR_REMOVE_AND_NARROW_WANT_TYPE_HANDLE) { JITDUMP(" to read first byte of struct via modified [%06u]\n", dspTreeID(copySrc)); - copySrc->ChangeOper(GT_NULLCHECK); - copySrc->gtType = TYP_BYTE; - compCurBB->bbFlags |= BBF_HAS_NULLCHECK; - optMethodFlags |= OMF_HAS_NULLCHECK; + gtChangeOperToNullCheck(copySrc, compCurBB); } else { @@ -15799,6 +15967,11 @@ void Compiler::gtExtractSideEffList(GenTree* expr, if (m_compiler->gtNodeHasSideEffects(node, m_flags)) { m_sideEffects.Push(node); + if (node->OperIsBlk() && !node->OperIsStoreBlk()) + { + JITDUMP("Replace an unused OBJ/BLK node [%06d] with a NULLCHECK\n", dspTreeID(node)); + m_compiler->gtChangeOperToNullCheck(node, m_compiler->compCurBB); + } return Compiler::WALK_SKIP_SUBTREES; } @@ -17471,7 +17644,9 @@ CORINFO_CLASS_HANDLE Compiler::gtGetStructHandleIfPresent(GenTree* tree) { CORINFO_FIELD_HANDLE fieldHnd = fieldSeq->m_fieldHnd; CorInfoType fieldCorType = info.compCompHnd->getFieldType(fieldHnd, &structHnd); - assert(fieldCorType == CORINFO_TYPE_VALUECLASS); + // With unsafe code and type casts + // this can return a primitive type and have nullptr for structHnd + // see runtime/issues/38541 } } } @@ -18634,7 +18809,7 @@ bool GenTree::isCommutativeSIMDIntrinsic() } } -// Returns true for the SIMD Instrinsic instructions that have MemoryLoad semantics, false otherwise +// Returns true for the SIMD Intrinsic instructions that have MemoryLoad semantics, false otherwise bool GenTreeSIMD::OperIsMemoryLoad() const { if (gtSIMDIntrinsicID == SIMDIntrinsicInitArray) @@ -18856,7 +19031,7 @@ GenTreeHWIntrinsic* Compiler::gtNewScalarHWIntrinsicNode( GenTreeHWIntrinsic(type, gtNewArgList(op1, op2, op3), hwIntrinsicID, TYP_UNKNOWN, 0); } -// Returns true for the HW Instrinsic instructions that have MemoryLoad semantics, false otherwise +// Returns true for the HW Intrinsic instructions that have MemoryLoad semantics, false otherwise bool GenTreeHWIntrinsic::OperIsMemoryLoad() const { #if defined(TARGET_XARCH) || defined(TARGET_ARM64) @@ -18899,7 +19074,7 @@ bool GenTreeHWIntrinsic::OperIsMemoryLoad() const return false; } -// Returns true for the HW Instrinsic instructions that have MemoryStore semantics, false otherwise +// Returns true for the HW Intrinsic instructions that have MemoryStore semantics, false otherwise bool GenTreeHWIntrinsic::OperIsMemoryStore() const { #if defined(TARGET_XARCH) || defined(TARGET_ARM64) @@ -18935,7 +19110,7 @@ bool GenTreeHWIntrinsic::OperIsMemoryStore() const return false; } -// Returns true for the HW Instrinsic instructions that have MemoryLoad semantics, false otherwise +// Returns true for the HW Intrinsic instructions that have MemoryLoad semantics, false otherwise bool GenTreeHWIntrinsic::OperIsMemoryLoadOrStore() const { #if defined(TARGET_XARCH) || defined(TARGET_ARM64) diff --git a/src/coreclr/src/jit/gentree.h b/src/coreclr/src/jit/gentree.h index b6a95a1d3c7a..5c3a94395db6 100644 --- a/src/coreclr/src/jit/gentree.h +++ b/src/coreclr/src/jit/gentree.h @@ -4201,6 +4201,7 @@ struct GenTreeCall final : public GenTree #define GTF_CALL_M_ALLOC_SIDE_EFFECTS 0x00400000 // GT_CALL -- this is a call to an allocator with side effects #define GTF_CALL_M_SUPPRESS_GC_TRANSITION 0x00800000 // GT_CALL -- suppress the GC transition (i.e. during a pinvoke) but a separate GC safe point is required. #define GTF_CALL_M_EXP_RUNTIME_LOOKUP 0x01000000 // GT_CALL -- this call needs to be tranformed into CFG for the dynamic dictionary expansion feature. +#define GTF_CALL_M_STRESS_TAILCALL 0x02000000 // GT_CALL -- the call is NOT "tail" prefixed but GTF_CALL_M_EXPLICIT_TAILCALL was added because of tail call stress mode // clang-format on @@ -4315,6 +4316,13 @@ struct GenTreeCall final : public GenTree return (gtCallMoreFlags & GTF_CALL_M_EXPLICIT_TAILCALL) != 0; } + // Returns true if this call didn't have an explicit tail. prefix in the IL + // but was marked as an explicit tail call because of tail call stress mode. + bool IsStressTailCall() const + { + return (gtCallMoreFlags & GTF_CALL_M_STRESS_TAILCALL) != 0; + } + // This method returning "true" implies that tail call flowgraph morhphing has // performed final checks and committed to making a tail call. bool IsTailCall() const @@ -4729,6 +4737,7 @@ struct GenTreeQmark : public GenTreeOp struct GenTreeIntrinsic : public GenTreeOp { CorInfoIntrinsics gtIntrinsicId; + NamedIntrinsic gtIntrinsicName; CORINFO_METHOD_HANDLE gtMethodHandle; // Method handle of the method which is treated as an intrinsic. #ifdef FEATURE_READYTORUN_COMPILER @@ -4736,15 +4745,31 @@ struct GenTreeIntrinsic : public GenTreeOp CORINFO_CONST_LOOKUP gtEntryPoint; #endif - GenTreeIntrinsic(var_types type, GenTree* op1, CorInfoIntrinsics intrinsicId, CORINFO_METHOD_HANDLE methodHandle) - : GenTreeOp(GT_INTRINSIC, type, op1, nullptr), gtIntrinsicId(intrinsicId), gtMethodHandle(methodHandle) + GenTreeIntrinsic(var_types type, + GenTree* op1, + CorInfoIntrinsics intrinsicId, + NamedIntrinsic intrinsicName, + CORINFO_METHOD_HANDLE methodHandle) + : GenTreeOp(GT_INTRINSIC, type, op1, nullptr) + , gtIntrinsicId(intrinsicId) + , gtIntrinsicName(intrinsicName) + , gtMethodHandle(methodHandle) { + assert(intrinsicId != CORINFO_INTRINSIC_Illegal || intrinsicName != NI_Illegal); } - GenTreeIntrinsic( - var_types type, GenTree* op1, GenTree* op2, CorInfoIntrinsics intrinsicId, CORINFO_METHOD_HANDLE methodHandle) - : GenTreeOp(GT_INTRINSIC, type, op1, op2), gtIntrinsicId(intrinsicId), gtMethodHandle(methodHandle) + GenTreeIntrinsic(var_types type, + GenTree* op1, + GenTree* op2, + CorInfoIntrinsics intrinsicId, + NamedIntrinsic intrinsicName, + CORINFO_METHOD_HANDLE methodHandle) + : GenTreeOp(GT_INTRINSIC, type, op1, op2) + , gtIntrinsicId(intrinsicId) + , gtIntrinsicName(intrinsicName) + , gtMethodHandle(methodHandle) { + assert(intrinsicId != CORINFO_INTRINSIC_Illegal || intrinsicName != NI_Illegal); } #if DEBUGGABLE_GENTREE @@ -4848,7 +4873,7 @@ struct GenTreeSIMD : public GenTreeJitIntrinsic gtSIMDIntrinsicID = simdIntrinsicID; } - bool OperIsMemoryLoad() const; // Returns true for the SIMD Instrinsic instructions that have MemoryLoad semantics, + bool OperIsMemoryLoad() const; // Returns true for the SIMD Intrinsic instructions that have MemoryLoad semantics, // false otherwise #if DEBUGGABLE_GENTREE @@ -4889,15 +4914,15 @@ struct GenTreeHWIntrinsic : public GenTreeJitIntrinsic } } - // Note that HW Instrinsic instructions are a sub class of GenTreeOp which only supports two operands - // However there are HW Instrinsic instructions that have 3 or even 4 operands and this is + // Note that HW Intrinsic instructions are a sub class of GenTreeOp which only supports two operands + // However there are HW Intrinsic instructions that have 3 or even 4 operands and this is // supported using a single op1 and using an ArgList for it: gtNewArgList(op1, op2, op3) - bool OperIsMemoryLoad() const; // Returns true for the HW Instrinsic instructions that have MemoryLoad semantics, + bool OperIsMemoryLoad() const; // Returns true for the HW Intrinsic instructions that have MemoryLoad semantics, // false otherwise - bool OperIsMemoryStore() const; // Returns true for the HW Instrinsic instructions that have MemoryStore semantics, + bool OperIsMemoryStore() const; // Returns true for the HW Intrinsic instructions that have MemoryStore semantics, // false otherwise - bool OperIsMemoryLoadOrStore() const; // Returns true for the HW Instrinsic instructions that have MemoryLoad or + bool OperIsMemoryLoadOrStore() const; // Returns true for the HW Intrinsic instructions that have MemoryLoad or // MemoryStore semantics, false otherwise #if DEBUGGABLE_GENTREE diff --git a/src/coreclr/src/jit/hwintrinsic.cpp b/src/coreclr/src/jit/hwintrinsic.cpp index 5e7eda61c30d..5723ac8f322b 100644 --- a/src/coreclr/src/jit/hwintrinsic.cpp +++ b/src/coreclr/src/jit/hwintrinsic.cpp @@ -202,7 +202,7 @@ CORINFO_CLASS_HANDLE Compiler::gtGetStructHandleForHWSIMD(var_types simdType, va { int numArgs = HWIntrinsicInfo::lookupNumArgs(hwIntrinsicID); - // HW Instrinsic's with -1 for numArgs have a varying number of args, so we currently + // HW Intrinsic's with -1 for numArgs have a varying number of args, so we currently // give themm a unique value number them, and don't add an extra argument. // if (numArgs == -1) @@ -210,7 +210,7 @@ CORINFO_CLASS_HANDLE Compiler::gtGetStructHandleForHWSIMD(var_types simdType, va return false; } - // We iterate over all of the different baseType's for this instrinsic in the HWIntrinsicInfo table + // We iterate over all of the different baseType's for this intrinsic in the HWIntrinsicInfo table // We set diffInsCount to the number of instructions that can execute differently. // unsigned diffInsCount = 0; diff --git a/src/coreclr/src/jit/hwintrinsiccodegenxarch.cpp b/src/coreclr/src/jit/hwintrinsiccodegenxarch.cpp index 574e8c48d15f..ddca579f34ab 100644 --- a/src/coreclr/src/jit/hwintrinsiccodegenxarch.cpp +++ b/src/coreclr/src/jit/hwintrinsiccodegenxarch.cpp @@ -469,9 +469,11 @@ void CodeGen::genHWIntrinsic_R_RM( switch (addr->OperGet()) { case GT_LCL_VAR_ADDR: + case GT_LCL_FLD_ADDR: { + assert(addr->isContained()); varNum = addr->AsLclVarCommon()->GetLclNum(); - offset = 0; + offset = addr->AsLclVarCommon()->GetLclOffs(); break; } @@ -698,9 +700,11 @@ void CodeGen::genHWIntrinsic_R_R_RM_I(GenTreeHWIntrinsic* node, instruction ins, switch (addr->OperGet()) { case GT_LCL_VAR_ADDR: + case GT_LCL_FLD_ADDR: { + assert(addr->isContained()); varNum = addr->AsLclVarCommon()->GetLclNum(); - offset = 0; + offset = addr->AsLclVarCommon()->GetLclOffs(); break; } @@ -861,9 +865,11 @@ void CodeGen::genHWIntrinsic_R_R_RM_R(GenTreeHWIntrinsic* node, instruction ins) switch (addr->OperGet()) { case GT_LCL_VAR_ADDR: + case GT_LCL_FLD_ADDR: { + assert(addr->isContained()); varNum = addr->AsLclVarCommon()->GetLclNum(); - offset = 0; + offset = addr->AsLclVarCommon()->GetLclOffs(); break; } @@ -986,9 +992,11 @@ void CodeGen::genHWIntrinsic_R_R_R_RM( switch (addr->OperGet()) { case GT_LCL_VAR_ADDR: + case GT_LCL_FLD_ADDR: { + assert(addr->isContained()); varNum = addr->AsLclVarCommon()->GetLclNum(); - offset = 0; + offset = addr->AsLclVarCommon()->GetLclOffs(); break; } diff --git a/src/coreclr/src/jit/hwintrinsicxarch.cpp b/src/coreclr/src/jit/hwintrinsicxarch.cpp index 8d7824ea189b..e624f18e713f 100644 --- a/src/coreclr/src/jit/hwintrinsicxarch.cpp +++ b/src/coreclr/src/jit/hwintrinsicxarch.cpp @@ -405,11 +405,12 @@ bool HWIntrinsicInfo::isScalarIsa(CORINFO_InstructionSet isa) case InstructionSet_BMI2_X64: case InstructionSet_LZCNT: case InstructionSet_LZCNT_X64: - case InstructionSet_POPCNT: - case InstructionSet_POPCNT_X64: case InstructionSet_X86Base: case InstructionSet_X86Base_X64: { + // InstructionSet_POPCNT and InstructionSet_POPCNT_X64 are excluded + // even though they are "scalar" ISA because they depend on SSE4.2 + // and Popcnt.IsSupported implies Sse42.IsSupported return true; } diff --git a/src/coreclr/src/jit/importer.cpp b/src/coreclr/src/jit/importer.cpp index e1443b849ad3..f03f63943120 100644 --- a/src/coreclr/src/jit/importer.cpp +++ b/src/coreclr/src/jit/importer.cpp @@ -1392,8 +1392,8 @@ GenTree* Compiler::impAssignStructPtr(GenTree* destAddr, } else if (asgType == TYP_STRUCT) { - asgType = impNormStructType(structHnd); - src->gtType = asgType; + // It should already have the appropriate type. + assert(asgType == impNormStructType(structHnd)); } if ((dest == nullptr) && (destAddr->OperGet() == GT_ADDR)) { @@ -2117,11 +2117,18 @@ GenTree* Compiler::impRuntimeLookupToTree(CORINFO_RESOLVED_TOKEN* pResolvedToken nullptr DEBUGARG("impRuntimeLookup indirectOffset")); } + // The last indirection could be subject to a size check (dynamic dictionary expansion) + bool isLastIndirectionWithSizeCheck = + ((i == pRuntimeLookup->indirections - 1) && (pRuntimeLookup->sizeOffset != CORINFO_NO_SIZE_CHECK)); + if (i != 0) { slotPtrTree = gtNewOperNode(GT_IND, TYP_I_IMPL, slotPtrTree); slotPtrTree->gtFlags |= GTF_IND_NONFAULTING; - slotPtrTree->gtFlags |= GTF_IND_INVARIANT; + if (!isLastIndirectionWithSizeCheck) + { + slotPtrTree->gtFlags |= GTF_IND_INVARIANT; + } } if ((i == 1 && pRuntimeLookup->indirectFirstOffset) || (i == 2 && pRuntimeLookup->indirectSecondOffset)) @@ -2131,8 +2138,7 @@ GenTree* Compiler::impRuntimeLookupToTree(CORINFO_RESOLVED_TOKEN* pResolvedToken if (pRuntimeLookup->offsets[i] != 0) { - // The last indirection could be subject to a size check (dynamic dictionary expansion) - if (i == pRuntimeLookup->indirections - 1 && pRuntimeLookup->sizeOffset != CORINFO_NO_SIZE_CHECK) + if (isLastIndirectionWithSizeCheck) { lastIndOfTree = impCloneExpr(slotPtrTree, &slotPtrTree, NO_CLASS_HANDLE, (unsigned)CHECK_SPILL_ALL, nullptr DEBUGARG("impRuntimeLookup indirectOffset")); @@ -3623,31 +3629,6 @@ GenTree* Compiler::impIntrinsic(GenTree* newobjThis, GenTree* op1; GenTree* op2; - case CORINFO_INTRINSIC_Sin: - case CORINFO_INTRINSIC_Cbrt: - case CORINFO_INTRINSIC_Sqrt: - case CORINFO_INTRINSIC_Abs: - case CORINFO_INTRINSIC_Cos: - case CORINFO_INTRINSIC_Round: - case CORINFO_INTRINSIC_Cosh: - case CORINFO_INTRINSIC_Sinh: - case CORINFO_INTRINSIC_Tan: - case CORINFO_INTRINSIC_Tanh: - case CORINFO_INTRINSIC_Asin: - case CORINFO_INTRINSIC_Asinh: - case CORINFO_INTRINSIC_Acos: - case CORINFO_INTRINSIC_Acosh: - case CORINFO_INTRINSIC_Atan: - case CORINFO_INTRINSIC_Atan2: - case CORINFO_INTRINSIC_Atanh: - case CORINFO_INTRINSIC_Log10: - case CORINFO_INTRINSIC_Pow: - case CORINFO_INTRINSIC_Exp: - case CORINFO_INTRINSIC_Ceiling: - case CORINFO_INTRINSIC_Floor: - retNode = impMathIntrinsic(method, sig, callType, intrinsicID, tailCall); - break; - #if defined(TARGET_XARCH) || defined(TARGET_ARM64) // TODO-ARM-CQ: reenable treating Interlocked operation as intrinsic @@ -3921,7 +3902,7 @@ GenTree* Compiler::impIntrinsic(GenTree* newobjThis, { JITDUMP("Expanding as special intrinsic\n"); impPopStack(); - op1 = new (this, GT_INTRINSIC) GenTreeIntrinsic(genActualType(callType), op1, intrinsicID, method); + op1 = new (this, GT_INTRINSIC) GenTreeIntrinsic(genActualType(callType), op1, intrinsicID, ni, method); // Set the CALL flag to indicate that the operator is implemented by a call. // Set also the EXCEPTION flag because the native implementation of @@ -4145,44 +4126,19 @@ GenTree* Compiler::impIntrinsic(GenTree* newobjThis, case NI_System_Type_IsAssignableFrom: { - // Optimize patterns like: - // - // typeof(TTo).IsAssignableFrom(typeof(TTFrom)) - // valueTypeVar.GetType().IsAssignableFrom(typeof(TTFrom)) - // - // to true/false GenTree* typeTo = impStackTop(1).val; GenTree* typeFrom = impStackTop(0).val; - if (typeTo->IsCall() && typeFrom->IsCall()) - { - // make sure both arguments are `typeof()` - CORINFO_METHOD_HANDLE hTypeof = eeFindHelper(CORINFO_HELP_TYPEHANDLE_TO_RUNTIMETYPE); - if ((typeTo->AsCall()->gtCallMethHnd == hTypeof) && (typeFrom->AsCall()->gtCallMethHnd == hTypeof)) - { - CORINFO_CLASS_HANDLE hClassTo = - gtGetHelperArgClassHandle(typeTo->AsCall()->gtCallArgs->GetNode()); - CORINFO_CLASS_HANDLE hClassFrom = - gtGetHelperArgClassHandle(typeFrom->AsCall()->gtCallArgs->GetNode()); - - if (hClassTo == NO_CLASS_HANDLE || hClassFrom == NO_CLASS_HANDLE) - { - break; - } + retNode = impTypeIsAssignable(typeTo, typeFrom); + break; + } - TypeCompareState castResult = info.compCompHnd->compareTypesForCast(hClassFrom, hClassTo); - if (castResult == TypeCompareState::May) - { - // requires runtime check - // e.g. __Canon, COMObjects, Nullable - break; - } + case NI_System_Type_IsAssignableTo: + { + GenTree* typeTo = impStackTop(0).val; + GenTree* typeFrom = impStackTop(1).val; - retNode = gtNewIconNode((castResult == TypeCompareState::Must) ? 1 : 0); - impPopStack(); // drop both CORINFO_HELP_TYPEHANDLE_TO_RUNTIMETYPE calls - impPopStack(); - } - } + retNode = impTypeIsAssignable(typeTo, typeFrom); break; } @@ -4218,7 +4174,6 @@ GenTree* Compiler::impIntrinsic(GenTree* newobjThis, #ifdef FEATURE_HW_INTRINSICS case NI_System_Math_FusedMultiplyAdd: - case NI_System_MathF_FusedMultiplyAdd: { #ifdef TARGET_XARCH if (compExactlyDependsOn(InstructionSet_FMA) && supportSIMDTypes()) @@ -4227,9 +4182,9 @@ GenTree* Compiler::impIntrinsic(GenTree* newobjThis, // We are constructing a chain of intrinsics similar to: // return FMA.MultiplyAddScalar( - // Vector128.CreateScalar(x), - // Vector128.CreateScalar(y), - // Vector128.CreateScalar(z) + // Vector128.CreateScalarUnsafe(x), + // Vector128.CreateScalarUnsafe(y), + // Vector128.CreateScalarUnsafe(z) // ).ToScalar(); GenTree* op3 = gtNewSimdHWIntrinsicNode(TYP_SIMD16, impPopStack().val, @@ -4243,20 +4198,66 @@ GenTree* Compiler::impIntrinsic(GenTree* newobjThis, retNode = gtNewSimdHWIntrinsicNode(callType, res, NI_Vector128_ToScalar, callType, 16); } -#endif // TARGET_XARCH +#elif defined(TARGET_ARM64) + if (compExactlyDependsOn(InstructionSet_AdvSimd)) + { + assert(varTypeIsFloating(callType)); + + // We are constructing a chain of intrinsics similar to: + // return AdvSimd.FusedMultiplyAddScalar( + // Vector64.Create{ScalarUnsafe}(z), + // Vector64.Create{ScalarUnsafe}(y), + // Vector64.Create{ScalarUnsafe}(x) + // ).ToScalar(); + + NamedIntrinsic createVector64 = + (callType == TYP_DOUBLE) ? NI_Vector64_Create : NI_Vector64_CreateScalarUnsafe; + + constexpr unsigned int simdSize = 8; + + GenTree* op3 = + gtNewSimdHWIntrinsicNode(TYP_SIMD8, impPopStack().val, createVector64, callType, simdSize); + GenTree* op2 = + gtNewSimdHWIntrinsicNode(TYP_SIMD8, impPopStack().val, createVector64, callType, simdSize); + GenTree* op1 = + gtNewSimdHWIntrinsicNode(TYP_SIMD8, impPopStack().val, createVector64, callType, simdSize); + + // Note that AdvSimd.FusedMultiplyAddScalar(op1,op2,op3) corresponds to op1 + op2 * op3 + // while Math{F}.FusedMultiplyAddScalar(op1,op2,op3) corresponds to op1 * op2 + op3 + retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD8, op3, op2, op1, NI_AdvSimd_FusedMultiplyAddScalar, + callType, simdSize); + + retNode = gtNewSimdHWIntrinsicNode(callType, retNode, NI_Vector64_ToScalar, callType, simdSize); + } +#endif break; } #endif // FEATURE_HW_INTRINSICS + case NI_System_Math_Sin: + case NI_System_Math_Cbrt: + case NI_System_Math_Sqrt: + case NI_System_Math_Abs: + case NI_System_Math_Cos: case NI_System_Math_Round: - case NI_System_MathF_Round: - { - // Math.Round and MathF.Round used to be a traditional JIT intrinsic. In order - // to simplify the transition, we will just treat it as if it was still the - // old intrinsic, CORINFO_INTRINSIC_Round. This should end up flowing properly - // everywhere else. - - retNode = impMathIntrinsic(method, sig, callType, CORINFO_INTRINSIC_Round, tailCall); + case NI_System_Math_Cosh: + case NI_System_Math_Sinh: + case NI_System_Math_Tan: + case NI_System_Math_Tanh: + case NI_System_Math_Asin: + case NI_System_Math_Asinh: + case NI_System_Math_Acos: + case NI_System_Math_Acosh: + case NI_System_Math_Atan: + case NI_System_Math_Atan2: + case NI_System_Math_Atanh: + case NI_System_Math_Log10: + case NI_System_Math_Pow: + case NI_System_Math_Exp: + case NI_System_Math_Ceiling: + case NI_System_Math_Floor: + { + retNode = impMathIntrinsic(method, sig, callType, ni, tailCall); break; } @@ -4342,17 +4343,61 @@ GenTree* Compiler::impIntrinsic(GenTree* newobjThis, return retNode; } +GenTree* Compiler::impTypeIsAssignable(GenTree* typeTo, GenTree* typeFrom) +{ + // Optimize patterns like: + // + // typeof(TTo).IsAssignableFrom(typeof(TTFrom)) + // valueTypeVar.GetType().IsAssignableFrom(typeof(TTFrom)) + // typeof(TTFrom).IsAssignableTo(typeof(TTo)) + // typeof(TTFrom).IsAssignableTo(valueTypeVar.GetType()) + // + // to true/false + + if (typeTo->IsCall() && typeFrom->IsCall()) + { + // make sure both arguments are `typeof()` + CORINFO_METHOD_HANDLE hTypeof = eeFindHelper(CORINFO_HELP_TYPEHANDLE_TO_RUNTIMETYPE); + if ((typeTo->AsCall()->gtCallMethHnd == hTypeof) && (typeFrom->AsCall()->gtCallMethHnd == hTypeof)) + { + CORINFO_CLASS_HANDLE hClassTo = gtGetHelperArgClassHandle(typeTo->AsCall()->gtCallArgs->GetNode()); + CORINFO_CLASS_HANDLE hClassFrom = gtGetHelperArgClassHandle(typeFrom->AsCall()->gtCallArgs->GetNode()); + + if (hClassTo == NO_CLASS_HANDLE || hClassFrom == NO_CLASS_HANDLE) + { + return nullptr; + } + + TypeCompareState castResult = info.compCompHnd->compareTypesForCast(hClassFrom, hClassTo); + if (castResult == TypeCompareState::May) + { + // requires runtime check + // e.g. __Canon, COMObjects, Nullable + return nullptr; + } + + GenTreeIntCon* retNode = gtNewIconNode((castResult == TypeCompareState::Must) ? 1 : 0); + impPopStack(); // drop both CORINFO_HELP_TYPEHANDLE_TO_RUNTIMETYPE calls + impPopStack(); + + return retNode; + } + } + + return nullptr; +} + GenTree* Compiler::impMathIntrinsic(CORINFO_METHOD_HANDLE method, CORINFO_SIG_INFO* sig, var_types callType, - CorInfoIntrinsics intrinsicID, + NamedIntrinsic intrinsicName, bool tailCall) { GenTree* op1; GenTree* op2; assert(callType != TYP_STRUCT); - assert(IsMathIntrinsic(intrinsicID)); + assert(IsMathIntrinsic(intrinsicName)); op1 = nullptr; @@ -4363,12 +4408,12 @@ GenTree* Compiler::impMathIntrinsic(CORINFO_METHOD_HANDLE method, // a) For back compatibility reasons on desktop .NET Framework 4.6 / 4.6.1 // b) It will be non-trivial task or too late to re-materialize a surviving // tail prefixed GT_INTRINSIC as tail call in rationalizer. - if (!IsIntrinsicImplementedByUserCall(intrinsicID) || !tailCall) + if (!IsIntrinsicImplementedByUserCall(intrinsicName) || !tailCall) #else // On x86 RyuJIT, importing intrinsics that are implemented as user calls can cause incorrect calculation // of the depth of the stack if these intrinsics are used as arguments to another call. This causes bad // code generation for certain EH constructs. - if (!IsIntrinsicImplementedByUserCall(intrinsicID)) + if (!IsIntrinsicImplementedByUserCall(intrinsicName)) #endif { switch (sig->numArgs) @@ -4383,7 +4428,8 @@ GenTree* Compiler::impMathIntrinsic(CORINFO_METHOD_HANDLE method, op1 = gtNewCastNode(callType, op1, false, callType); } - op1 = new (this, GT_INTRINSIC) GenTreeIntrinsic(genActualType(callType), op1, intrinsicID, method); + op1 = new (this, GT_INTRINSIC) + GenTreeIntrinsic(genActualType(callType), op1, CORINFO_INTRINSIC_Illegal, intrinsicName, method); break; case 2: @@ -4402,14 +4448,15 @@ GenTree* Compiler::impMathIntrinsic(CORINFO_METHOD_HANDLE method, op1 = gtNewCastNode(callType, op1, false, callType); } - op1 = new (this, GT_INTRINSIC) GenTreeIntrinsic(genActualType(callType), op1, op2, intrinsicID, method); + op1 = new (this, GT_INTRINSIC) GenTreeIntrinsic(genActualType(callType), op1, op2, + CORINFO_INTRINSIC_Illegal, intrinsicName, method); break; default: - NO_WAY("Unsupported number of args for Math Instrinsic"); + NO_WAY("Unsupported number of args for Math Intrinsic"); } - if (IsIntrinsicImplementedByUserCall(intrinsicID)) + if (IsIntrinsicImplementedByUserCall(intrinsicName)) { op1->gtFlags |= GTF_CALL; } @@ -4474,31 +4521,99 @@ NamedIntrinsic Compiler::lookupNamedIntrinsic(CORINFO_METHOD_HANDLE method) { result = NI_System_Enum_HasFlag; } - else if (strncmp(className, "Math", 4) == 0) + else if (strcmp(className, "Math") == 0 || strcmp(className, "MathF") == 0) { - className += 4; - - if (className[0] == '\0') + if (strcmp(methodName, "FusedMultiplyAdd") == 0) { - if (strcmp(methodName, "FusedMultiplyAdd") == 0) - { - result = NI_System_Math_FusedMultiplyAdd; - } - else if (strcmp(methodName, "Round") == 0) - { - result = NI_System_Math_Round; - } + result = NI_System_Math_FusedMultiplyAdd; } - else if (strcmp(className, "F") == 0) + else if (strcmp(methodName, "Round") == 0) { - if (strcmp(methodName, "FusedMultiplyAdd") == 0) - { - result = NI_System_MathF_FusedMultiplyAdd; - } - else if (strcmp(methodName, "Round") == 0) - { - result = NI_System_MathF_Round; - } + result = NI_System_Math_Round; + } + else if (strcmp(methodName, "Sin") == 0) + { + result = NI_System_Math_Sin; + } + else if (strcmp(methodName, "Cos") == 0) + { + result = NI_System_Math_Cos; + } + else if (strcmp(methodName, "Cbrt") == 0) + { + result = NI_System_Math_Cbrt; + } + else if (strcmp(methodName, "Sqrt") == 0) + { + result = NI_System_Math_Sqrt; + } + else if (strcmp(methodName, "Abs") == 0) + { + result = NI_System_Math_Abs; + } + else if (strcmp(methodName, "Cosh") == 0) + { + result = NI_System_Math_Cosh; + } + else if (strcmp(methodName, "Sinh") == 0) + { + result = NI_System_Math_Sinh; + } + else if (strcmp(methodName, "Tan") == 0) + { + result = NI_System_Math_Tan; + } + else if (strcmp(methodName, "Tanh") == 0) + { + result = NI_System_Math_Tanh; + } + else if (strcmp(methodName, "Asin") == 0) + { + result = NI_System_Math_Asin; + } + else if (strcmp(methodName, "Asinh") == 0) + { + result = NI_System_Math_Asinh; + } + else if (strcmp(methodName, "Acos") == 0) + { + result = NI_System_Math_Acos; + } + else if (strcmp(methodName, "Acosh") == 0) + { + result = NI_System_Math_Acosh; + } + else if (strcmp(methodName, "Atan") == 0) + { + result = NI_System_Math_Atan; + } + else if (strcmp(methodName, "Atan2") == 0) + { + result = NI_System_Math_Atan2; + } + else if (strcmp(methodName, "Atanh") == 0) + { + result = NI_System_Math_Atanh; + } + else if (strcmp(methodName, "Log10") == 0) + { + result = NI_System_Math_Log10; + } + else if (strcmp(methodName, "Pow") == 0) + { + result = NI_System_Math_Pow; + } + else if (strcmp(methodName, "Exp") == 0) + { + result = NI_System_Math_Exp; + } + else if (strcmp(methodName, "Ceiling") == 0) + { + result = NI_System_Math_Ceiling; + } + else if (strcmp(methodName, "Floor") == 0) + { + result = NI_System_Math_Floor; } } else if (strcmp(className, "GC") == 0) @@ -4518,6 +4633,10 @@ NamedIntrinsic Compiler::lookupNamedIntrinsic(CORINFO_METHOD_HANDLE method) { result = NI_System_Type_IsAssignableFrom; } + else if (strcmp(methodName, "IsAssignableTo") == 0) + { + result = NI_System_Type_IsAssignableTo; + } } } #if defined(TARGET_XARCH) || defined(TARGET_ARM64) @@ -4796,7 +4915,14 @@ GenTree* Compiler::impArrayAccessIntrinsic( if (intrinsicID != CORINFO_INTRINSIC_Array_Address) { - arrElem = gtNewOperNode(GT_IND, elemType, arrElem); + if (varTypeIsStruct(elemType)) + { + arrElem = gtNewObjNode(sig->retTypeClass, arrElem); + } + else + { + arrElem = gtNewOperNode(GT_IND, elemType, arrElem); + } } if (intrinsicID == CORINFO_INTRINSIC_Array_Set) @@ -7400,11 +7526,13 @@ enum PREFIX_TAILCALL_EXPLICIT = 0x00000001, // call has "tail" IL prefix PREFIX_TAILCALL_IMPLICIT = 0x00000010, // call is treated as having "tail" prefix even though there is no "tail" IL prefix - PREFIX_TAILCALL = (PREFIX_TAILCALL_EXPLICIT | PREFIX_TAILCALL_IMPLICIT), - PREFIX_VOLATILE = 0x00000100, - PREFIX_UNALIGNED = 0x00001000, - PREFIX_CONSTRAINED = 0x00010000, - PREFIX_READONLY = 0x00100000 + PREFIX_TAILCALL_STRESS = + 0x00000100, // call doesn't "tail" IL prefix but is treated as explicit because of tail call stress + PREFIX_TAILCALL = (PREFIX_TAILCALL_EXPLICIT | PREFIX_TAILCALL_IMPLICIT | PREFIX_TAILCALL_STRESS), + PREFIX_VOLATILE = 0x00001000, + PREFIX_UNALIGNED = 0x00010000, + PREFIX_CONSTRAINED = 0x00100000, + PREFIX_READONLY = 0x01000000 }; /******************************************************************************** @@ -7709,7 +7837,7 @@ var_types Compiler::impImportCall(OPCODE opcode, // If this is a call to JitTestLabel.Mark, do "early inlining", and record the test attribute. // This recognition should really be done by knowing the methHnd of the relevant Mark method(s). - // These should be in mscorlib.h, and available through a JIT/EE interface call. + // These should be in corelib.h, and available through a JIT/EE interface call. const char* modName; const char* className; const char* methodName; @@ -7944,9 +8072,9 @@ var_types Compiler::impImportCall(OPCODE opcode, // This is for a non-virtual, non-interface etc. call call = gtNewCallNode(CT_USER_FUNC, callInfo->hMethod, callRetTyp, nullptr, ilOffset); - // We remove the nullcheck for the GetType call instrinsic. + // We remove the nullcheck for the GetType call intrinsic. // TODO-CQ: JIT64 does not introduce the null check for many more helper calls - // and instrinsics. + // and intrinsics. if (callInfo->nullInstanceCheck && !((mflags & CORINFO_FLG_INTRINSIC) != 0 && (intrinsicID == CORINFO_INTRINSIC_Object_GetType))) { @@ -8548,6 +8676,7 @@ var_types Compiler::impImportCall(OPCODE opcode, { const bool isExplicitTailCall = (tailCallFlags & PREFIX_TAILCALL_EXPLICIT) != 0; const bool isImplicitTailCall = (tailCallFlags & PREFIX_TAILCALL_IMPLICIT) != 0; + const bool isStressTailCall = (tailCallFlags & PREFIX_TAILCALL_STRESS) != 0; // Exactly one of these should be true. assert(isExplicitTailCall != isImplicitTailCall); @@ -8614,6 +8743,12 @@ var_types Compiler::impImportCall(OPCODE opcode, // for in-lining. call->AsCall()->gtCallMoreFlags |= GTF_CALL_M_EXPLICIT_TAILCALL; JITDUMP("\nGTF_CALL_M_EXPLICIT_TAILCALL set for call [%06u]\n", dspTreeID(call)); + + if (isStressTailCall) + { + call->AsCall()->gtCallMoreFlags |= GTF_CALL_M_STRESS_TAILCALL; + JITDUMP("\nGTF_CALL_M_STRESS_TAILCALL set for call [%06u]\n", dspTreeID(call)); + } } else { @@ -13188,10 +13323,7 @@ void Compiler::impImportBlockCode(BasicBlock* block) // via an underlying address, just null check the address. if (op1->OperIs(GT_FIELD, GT_IND, GT_OBJ)) { - op1->ChangeOper(GT_NULLCHECK); - block->bbFlags |= BBF_HAS_NULLCHECK; - optMethodFlags |= OMF_HAS_NULLCHECK; - op1->gtType = TYP_BYTE; + gtChangeOperToNullCheck(op1, block); } else { @@ -14086,6 +14218,7 @@ void Compiler::impImportBlockCode(BasicBlock* block) // Stress the tailcall. JITDUMP(" (Tailcall stress: prefixFlags |= PREFIX_TAILCALL_EXPLICIT)"); prefixFlags |= PREFIX_TAILCALL_EXPLICIT; + prefixFlags |= PREFIX_TAILCALL_STRESS; } else { @@ -20072,10 +20205,10 @@ void Compiler::impMarkInlineCandidateHelper(GenTreeCall* call, // Returns true if the given intrinsic will be implemented by target-specific // instructions -bool Compiler::IsTargetIntrinsic(CorInfoIntrinsics intrinsicId) +bool Compiler::IsTargetIntrinsic(NamedIntrinsic intrinsicName) { #if defined(TARGET_XARCH) - switch (intrinsicId) + switch (intrinsicName) { // AMD64/x86 has SSE2 instructions to directly compute sqrt/abs and SSE4.1 // instructions to directly compute round/ceiling/floor. @@ -20086,37 +20219,37 @@ bool Compiler::IsTargetIntrinsic(CorInfoIntrinsics intrinsicId) // a CQ problem, it may be necessary to change the implementation of // the helper calls to decrease call overhead or switch back to the // x87 instructions. This is tracked by #7097. - case CORINFO_INTRINSIC_Sqrt: - case CORINFO_INTRINSIC_Abs: + case NI_System_Math_Sqrt: + case NI_System_Math_Abs: return true; - case CORINFO_INTRINSIC_Round: - case CORINFO_INTRINSIC_Ceiling: - case CORINFO_INTRINSIC_Floor: + case NI_System_Math_Round: + case NI_System_Math_Ceiling: + case NI_System_Math_Floor: return compOpportunisticallyDependsOn(InstructionSet_SSE41); default: return false; } #elif defined(TARGET_ARM64) - switch (intrinsicId) + switch (intrinsicName) { - case CORINFO_INTRINSIC_Sqrt: - case CORINFO_INTRINSIC_Abs: - case CORINFO_INTRINSIC_Round: - case CORINFO_INTRINSIC_Floor: - case CORINFO_INTRINSIC_Ceiling: + case NI_System_Math_Sqrt: + case NI_System_Math_Abs: + case NI_System_Math_Round: + case NI_System_Math_Floor: + case NI_System_Math_Ceiling: return true; default: return false; } #elif defined(TARGET_ARM) - switch (intrinsicId) + switch (intrinsicName) { - case CORINFO_INTRINSIC_Sqrt: - case CORINFO_INTRINSIC_Abs: - case CORINFO_INTRINSIC_Round: + case NI_System_Math_Sqrt: + case NI_System_Math_Abs: + case NI_System_Math_Round: return true; default: @@ -20134,41 +20267,41 @@ bool Compiler::IsTargetIntrinsic(CorInfoIntrinsics intrinsicId) // Returns true if the given intrinsic will be implemented by calling System.Math // methods. -bool Compiler::IsIntrinsicImplementedByUserCall(CorInfoIntrinsics intrinsicId) +bool Compiler::IsIntrinsicImplementedByUserCall(NamedIntrinsic intrinsicName) { // Currently, if a math intrinsic is not implemented by target-specific // instructions, it will be implemented by a System.Math call. In the // future, if we turn to implementing some of them with helper calls, // this predicate needs to be revisited. - return !IsTargetIntrinsic(intrinsicId); + return !IsTargetIntrinsic(intrinsicName); } -bool Compiler::IsMathIntrinsic(CorInfoIntrinsics intrinsicId) +bool Compiler::IsMathIntrinsic(NamedIntrinsic intrinsicName) { - switch (intrinsicId) - { - case CORINFO_INTRINSIC_Sin: - case CORINFO_INTRINSIC_Cbrt: - case CORINFO_INTRINSIC_Sqrt: - case CORINFO_INTRINSIC_Abs: - case CORINFO_INTRINSIC_Cos: - case CORINFO_INTRINSIC_Round: - case CORINFO_INTRINSIC_Cosh: - case CORINFO_INTRINSIC_Sinh: - case CORINFO_INTRINSIC_Tan: - case CORINFO_INTRINSIC_Tanh: - case CORINFO_INTRINSIC_Asin: - case CORINFO_INTRINSIC_Asinh: - case CORINFO_INTRINSIC_Acos: - case CORINFO_INTRINSIC_Acosh: - case CORINFO_INTRINSIC_Atan: - case CORINFO_INTRINSIC_Atan2: - case CORINFO_INTRINSIC_Atanh: - case CORINFO_INTRINSIC_Log10: - case CORINFO_INTRINSIC_Pow: - case CORINFO_INTRINSIC_Exp: - case CORINFO_INTRINSIC_Ceiling: - case CORINFO_INTRINSIC_Floor: + switch (intrinsicName) + { + case NI_System_Math_Sin: + case NI_System_Math_Cbrt: + case NI_System_Math_Sqrt: + case NI_System_Math_Abs: + case NI_System_Math_Cos: + case NI_System_Math_Round: + case NI_System_Math_Cosh: + case NI_System_Math_Sinh: + case NI_System_Math_Tan: + case NI_System_Math_Tanh: + case NI_System_Math_Asin: + case NI_System_Math_Asinh: + case NI_System_Math_Acos: + case NI_System_Math_Acosh: + case NI_System_Math_Atan: + case NI_System_Math_Atan2: + case NI_System_Math_Atanh: + case NI_System_Math_Log10: + case NI_System_Math_Pow: + case NI_System_Math_Exp: + case NI_System_Math_Ceiling: + case NI_System_Math_Floor: return true; default: return false; @@ -20177,7 +20310,7 @@ bool Compiler::IsMathIntrinsic(CorInfoIntrinsics intrinsicId) bool Compiler::IsMathIntrinsic(GenTree* tree) { - return (tree->OperGet() == GT_INTRINSIC) && IsMathIntrinsic(tree->AsIntrinsic()->gtIntrinsicId); + return (tree->OperGet() == GT_INTRINSIC) && IsMathIntrinsic(tree->AsIntrinsic()->gtIntrinsicName); } //------------------------------------------------------------------------ diff --git a/src/coreclr/src/jit/inline.cpp b/src/coreclr/src/jit/inline.cpp index 9b4d6f168614..759a6e366181 100644 --- a/src/coreclr/src/jit/inline.cpp +++ b/src/coreclr/src/jit/inline.cpp @@ -1624,7 +1624,7 @@ void InlineStrategy::DumpXml(FILE* file, unsigned indent) // Root context will be null if we're not optimizing the method. // - // Note there are cases of this in mscorlib even in release builds, + // Note there are cases of this in System.Private.CoreLib even in release builds, // eg Task.NotifyDebuggerOfWaitCompletion. // // For such methods there aren't any inlines. diff --git a/src/coreclr/src/jit/inline.h b/src/coreclr/src/jit/inline.h index 1019341de999..c8b58802d0ee 100644 --- a/src/coreclr/src/jit/inline.h +++ b/src/coreclr/src/jit/inline.h @@ -27,7 +27,7 @@ // // Enums are used throughout to provide various descriptions. // -// There are 4 sitations where inline candidacy is evaluated. In each +// There are 4 situations where inline candidacy is evaluated. In each // case an InlineResult is allocated on the stack to collect // information about the inline candidate. Each InlineResult refers // to an InlinePolicy. diff --git a/src/coreclr/src/jit/inlinepolicy.cpp b/src/coreclr/src/jit/inlinepolicy.cpp index 8852991cf200..09a029f99663 100644 --- a/src/coreclr/src/jit/inlinepolicy.cpp +++ b/src/coreclr/src/jit/inlinepolicy.cpp @@ -1700,7 +1700,7 @@ void DiscretionaryPolicy::MethodInfoObservations(CORINFO_METHOD_INFO* methodInfo // 0.100 * m_CalleeNativeSizeEstimate + // -0.100 * m_CallsiteNativeSizeEstimate // -// On the inlines in CoreCLR's mscorlib, release windows x64, this +// On the inlines in CoreCLR's CoreLib, release windows x64, this // yields scores of R=0.42, MSE=228, and MAE=7.25. // // This estimate can be improved slighly by refitting, resulting in diff --git a/src/coreclr/src/jit/instr.cpp b/src/coreclr/src/jit/instr.cpp index a13887baa0f6..d6f4c98f6b94 100644 --- a/src/coreclr/src/jit/instr.cpp +++ b/src/coreclr/src/jit/instr.cpp @@ -478,7 +478,7 @@ void CodeGen::inst_IV_handle(instruction ins, int val) void CodeGen::inst_set_SV_var(GenTree* tree) { #ifdef DEBUG - assert(tree && (tree->gtOper == GT_LCL_VAR || tree->gtOper == GT_LCL_VAR_ADDR || tree->gtOper == GT_STORE_LCL_VAR)); + assert((tree != nullptr) && tree->OperIs(GT_LCL_VAR, GT_LCL_VAR_ADDR, GT_STORE_LCL_VAR)); assert(tree->AsLclVarCommon()->GetLclNum() < compiler->lvaCount); GetEmitter()->emitVarRefOffs = tree->AsLclVar()->gtLclILoffs; @@ -1019,9 +1019,11 @@ void CodeGen::inst_RV_TT_IV(instruction ins, emitAttr attr, regNumber reg1, GenT switch (addr->OperGet()) { case GT_LCL_VAR_ADDR: + case GT_LCL_FLD_ADDR: { + assert(addr->isContained()); varNum = addr->AsLclVarCommon()->GetLclNum(); - offset = 0; + offset = addr->AsLclVarCommon()->GetLclOffs(); break; } @@ -1146,9 +1148,11 @@ void CodeGen::inst_RV_RV_TT( switch (addr->OperGet()) { case GT_LCL_VAR_ADDR: + case GT_LCL_FLD_ADDR: { + assert(addr->isContained()); varNum = addr->AsLclVarCommon()->GetLclNum(); - offset = 0; + offset = addr->AsLclVarCommon()->GetLclOffs(); break; } diff --git a/src/coreclr/src/jit/jitconfigvalues.h b/src/coreclr/src/jit/jitconfigvalues.h index e6c1ab307e46..329f526e9004 100644 --- a/src/coreclr/src/jit/jitconfigvalues.h +++ b/src/coreclr/src/jit/jitconfigvalues.h @@ -285,6 +285,20 @@ CONFIG_INTEGER(JitDisableSimdVN, W("JitDisableSimdVN"), 0) // Default 0, ValueNu // If 3, disable both SIMD and HW Intrinsic nodes #endif // FEATURE_SIMD +// Default 0, enable the CSE of Constants, including nearby offsets. (only for ARM64) +// If 1, disable all the CSE of Constants +// If 2, enable the CSE of Constants but don't combine with nearby offsets. (only for ARM64) +// If 3, enable the CSE of Constants including nearby offsets. (all platforms) +// If 4, enable the CSE of Constants but don't combine with nearby offsets. (all platforms) +// +CONFIG_INTEGER(JitConstCSE, W("JitConstCSE"), 0) + +#define CONST_CSE_ENABLE_ARM64 0 +#define CONST_CSE_DISABLE_ALL 1 +#define CONST_CSE_ENABLE_ARM64_NO_SHARING 2 +#define CONST_CSE_ENABLE_ALL 3 +#define CONST_CSE_ENABLE_ALL_NO_SHARING 4 + /// /// JIT /// diff --git a/src/coreclr/src/jit/jithashtable.cpp b/src/coreclr/src/jit/jithashtable.cpp new file mode 100644 index 000000000000..cf9f22639efe --- /dev/null +++ b/src/coreclr/src/jit/jithashtable.cpp @@ -0,0 +1,53 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#include "jitpch.h" + +#if defined(_MSC_VER) +#pragma hdrstop +#endif // defined(_MSC_VER) + +// Table of primes and their magic-number-divide constant. +// For more info see the book "Hacker's Delight" chapter 10.9 "Unsigned Division by Divisors >= 1" +// These were selected by looking for primes, each roughly twice as big as the next, having +// 32-bit magic numbers, (because the algorithm for using 33-bit magic numbers is slightly slower). + +#include "jithashtable.h" + +// Table of primes and their magic-number-divide constant. +// For more info see the book "Hacker's Delight" chapter 10.9 "Unsigned Division by Divisors >= 1" +// These were selected by looking for primes, each roughly twice as big as the next, having +// 32-bit magic numbers, (because the algorithm for using 33-bit magic numbers is slightly slower). + +// clang-format off +const JitPrimeInfo jitPrimeInfo[] +{ + JitPrimeInfo(9, 0x38e38e39, 1), + JitPrimeInfo(23, 0xb21642c9, 4), + JitPrimeInfo(59, 0x22b63cbf, 3), + JitPrimeInfo(131, 0xfa232cf3, 7), + JitPrimeInfo(239, 0x891ac73b, 7), + JitPrimeInfo(433, 0x975a751, 4), + JitPrimeInfo(761, 0x561e46a5, 8), + JitPrimeInfo(1399, 0xbb612aa3, 10), + JitPrimeInfo(2473, 0x6a009f01, 10), + JitPrimeInfo(4327, 0xf2555049, 12), + JitPrimeInfo(7499, 0x45ea155f, 11), + JitPrimeInfo(12973, 0x1434f6d3, 10), + JitPrimeInfo(22433, 0x2ebe18db, 12), + JitPrimeInfo(46559, 0xb42bebd5, 15), + JitPrimeInfo(96581, 0xadb61b1b, 16), + JitPrimeInfo(200341, 0x29df2461, 15), + JitPrimeInfo(415517, 0xa181c46d, 18), + JitPrimeInfo(861719, 0x4de0bde5, 18), + JitPrimeInfo(1787021, 0x9636c46f, 20), + JitPrimeInfo(3705617, 0x4870adc1, 20), + JitPrimeInfo(7684087, 0x8bbc5b83, 22), + JitPrimeInfo(15933877, 0x86c65361, 23), + JitPrimeInfo(33040633, 0x40fec79b, 23), + JitPrimeInfo(68513161, 0x7d605cd1, 25), + JitPrimeInfo(142069021, 0xf1da390b, 27), + JitPrimeInfo(294594427, 0x74a2507d, 27), + JitPrimeInfo(733045421, 0x5dbec447, 28), +}; +// clang-format on diff --git a/src/coreclr/src/jit/jithashtable.h b/src/coreclr/src/jit/jithashtable.h index cb2cc1e60d20..131f804a1127 100644 --- a/src/coreclr/src/jit/jithashtable.h +++ b/src/coreclr/src/jit/jithashtable.h @@ -91,38 +91,7 @@ class JitPrimeInfo // These were selected by looking for primes, each roughly twice as big as the next, having // 32-bit magic numbers, (because the algorithm for using 33-bit magic numbers is slightly slower). -// clang-format off -constexpr JitPrimeInfo jitPrimeInfo[] -{ - JitPrimeInfo(9, 0x38e38e39, 1), - JitPrimeInfo(23, 0xb21642c9, 4), - JitPrimeInfo(59, 0x22b63cbf, 3), - JitPrimeInfo(131, 0xfa232cf3, 7), - JitPrimeInfo(239, 0x891ac73b, 7), - JitPrimeInfo(433, 0x975a751, 4), - JitPrimeInfo(761, 0x561e46a5, 8), - JitPrimeInfo(1399, 0xbb612aa3, 10), - JitPrimeInfo(2473, 0x6a009f01, 10), - JitPrimeInfo(4327, 0xf2555049, 12), - JitPrimeInfo(7499, 0x45ea155f, 11), - JitPrimeInfo(12973, 0x1434f6d3, 10), - JitPrimeInfo(22433, 0x2ebe18db, 12), - JitPrimeInfo(46559, 0xb42bebd5, 15), - JitPrimeInfo(96581, 0xadb61b1b, 16), - JitPrimeInfo(200341, 0x29df2461, 15), - JitPrimeInfo(415517, 0xa181c46d, 18), - JitPrimeInfo(861719, 0x4de0bde5, 18), - JitPrimeInfo(1787021, 0x9636c46f, 20), - JitPrimeInfo(3705617, 0x4870adc1, 20), - JitPrimeInfo(7684087, 0x8bbc5b83, 22), - JitPrimeInfo(15933877, 0x86c65361, 23), - JitPrimeInfo(33040633, 0x40fec79b, 23), - JitPrimeInfo(68513161, 0x7d605cd1, 25), - JitPrimeInfo(142069021, 0xf1da390b, 27), - JitPrimeInfo(294594427, 0x74a2507d, 27), - JitPrimeInfo(733045421, 0x5dbec447, 28), -}; -// clang-format on +extern const JitPrimeInfo jitPrimeInfo[27]; // Hash table class definition diff --git a/src/coreclr/src/jit/liveness.cpp b/src/coreclr/src/jit/liveness.cpp index feb8f23a222e..cbc4ebbd05af 100644 --- a/src/coreclr/src/jit/liveness.cpp +++ b/src/coreclr/src/jit/liveness.cpp @@ -1980,17 +1980,13 @@ void Compiler::fgComputeLifeLIR(VARSET_TP& life, BasicBlock* block, VARSET_VALAR GenTree* data = store->OperIs(GT_STOREIND) ? store->AsStoreInd()->Data() : store->AsBlk()->Data(); data->SetUnusedValue(); + if (data->isIndir()) { - // This is a block assignment. An indirection of the rhs is not considered - // to happen until the assignment so mark it as non-faulting. - data->gtFlags |= GTF_IND_NONFAULTING; + Lowering::TransformUnusedIndirection(data->AsIndir(), this, block); } - blockRange.Remove(store); - - assert(!opts.MinOpts()); - fgStmtRemoved = true; + fgRemoveDeadStoreLIR(store, block); } } } @@ -2019,25 +2015,7 @@ void Compiler::fgComputeLifeLIR(VARSET_TP& life, BasicBlock* block, VARSET_VALAR // Remove the store. DCE will iteratively clean up any ununsed operands. lclVarNode->gtOp1->SetUnusedValue(); - // If the store is marked as a late argument, it is referenced by a call. Instead of removing - // it, bash it to a NOP. - if ((node->gtFlags & GTF_LATE_ARG) != 0) - { - JITDUMP("node is a late arg; replacing with NOP\n"); - node->gtBashToNOP(); - - // NOTE: this is a bit of a hack. We need to keep these nodes around as they are - // referenced by the call, but they're considered side-effect-free non-value-producing - // nodes, so they will be removed if we don't do this. - node->gtFlags |= GTF_ORDER_SIDEEFF; - } - else - { - blockRange.Remove(node); - } - - assert(!opts.MinOpts()); - fgStmtRemoved = true; + fgRemoveDeadStoreLIR(node, block); } break; @@ -2109,40 +2087,112 @@ void Compiler::fgComputeLifeLIR(VARSET_TP& life, BasicBlock* block, VARSET_VALAR break; case GT_NOP: + { // NOTE: we need to keep some NOPs around because they are referenced by calls. See the dead store // removal code above (case GT_STORE_LCL_VAR) for more explanation. if ((node->gtFlags & GTF_ORDER_SIDEEFF) != 0) { break; } - __fallthrough; + fgTryRemoveNonLocal(node, &blockRange); + } + break; - default: - assert(!node->OperIsLocal()); - if (!node->IsValue() || node->IsUnusedValue()) + case GT_BLK: + case GT_OBJ: + case GT_DYN_BLK: + { + bool removed = fgTryRemoveNonLocal(node, &blockRange); + if (!removed && node->IsUnusedValue()) { - // We are only interested in avoiding the removal of nodes with direct side-effects - // (as opposed to side effects of their children). - // This default case should never include calls or assignments. - assert(!node->OperRequiresAsgFlag() && !node->OperIs(GT_CALL)); - if (!node->gtSetFlags() && !node->OperMayThrow(this)) - { - JITDUMP("Removing dead node:\n"); - DISPNODE(node); - - node->VisitOperands([](GenTree* operand) -> GenTree::VisitResult { - operand->SetUnusedValue(); - return GenTree::VisitResult::Continue; - }); - - blockRange.Remove(node); - } + // IR doesn't expect dummy uses of `GT_OBJ/BLK/DYN_BLK`. + JITDUMP("Transform an unused OBJ/BLK node [%06d]\n", dspTreeID(node)); + Lowering::TransformUnusedIndirection(node->AsIndir(), this, block); } + } + break; + + default: + fgTryRemoveNonLocal(node, &blockRange); break; } } } +//--------------------------------------------------------------------- +// fgTryRemoveNonLocal - try to remove a node if it is unused and has no direct +// side effects. +// +// Arguments +// node - the non-local node to try; +// blockRange - the block range that contains the node. +// +// Return value: +// None +// +// Notes: local nodes are processed independently and are not expected in this function. +// +bool Compiler::fgTryRemoveNonLocal(GenTree* node, LIR::Range* blockRange) +{ + assert(!node->OperIsLocal()); + if (!node->IsValue() || node->IsUnusedValue()) + { + // We are only interested in avoiding the removal of nodes with direct side effects + // (as opposed to side effects of their children). + // This default case should never include calls or assignments. + assert(!node->OperRequiresAsgFlag() && !node->OperIs(GT_CALL)); + if (!node->gtSetFlags() && !node->OperMayThrow(this)) + { + JITDUMP("Removing dead node:\n"); + DISPNODE(node); + + node->VisitOperands([](GenTree* operand) -> GenTree::VisitResult { + operand->SetUnusedValue(); + return GenTree::VisitResult::Continue; + }); + + blockRange->Remove(node); + return true; + } + } + return false; +} + +//--------------------------------------------------------------------- +// fgRemoveDeadStoreSimple - remove a dead store +// +// pTree - GenTree** to local, including store-form local or local addr (post-rationalize) +// varDsc - var that is being stored to +// life - current live tracked vars (maintained as we walk backwards) +// doAgain - out parameter, true if we should restart the statement +// pStmtInfoDirty - should defer the cost computation to the point after the reverse walk is completed? +// +void Compiler::fgRemoveDeadStoreLIR(GenTree* store, BasicBlock* block) +{ + LIR::Range& blockRange = LIR::AsRange(block); + + // If the store is marked as a late argument, it is referenced by a call. + // Instead of removing it, bash it to a NOP. + if ((store->gtFlags & GTF_LATE_ARG) != 0) + { + JITDUMP("node is a late arg; replacing with NOP\n"); + store->gtBashToNOP(); + + // NOTE: this is a bit of a hack. We need to keep these nodes around as they are + // referenced by the call, but they're considered side-effect-free non-value-producing + // nodes, so they will be removed if we don't do this. + store->gtFlags |= GTF_ORDER_SIDEEFF; + } + else + { + blockRange.Remove(store); + } + + assert(!opts.MinOpts()); + fgStmtRemoved = true; +} + +//--------------------------------------------------------------------- // fgRemoveDeadStore - remove a store to a local which has no exposed uses. // // pTree - GenTree** to local, including store-form local or local addr (post-rationalize) @@ -2285,17 +2335,6 @@ bool Compiler::fgRemoveDeadStore(GenTree** pTree, } #endif // DEBUG // Extract the side effects - if (rhsNode->TypeGet() == TYP_STRUCT) - { - // This is a block assignment. An indirection of the rhs is not considered to - // happen until the assignment, so we will extract the side effects from only - // the address. - if (rhsNode->OperIsIndir()) - { - assert(rhsNode->OperGet() != GT_NULLCHECK); - rhsNode = rhsNode->AsIndir()->Addr(); - } - } gtExtractSideEffList(rhsNode, &sideEffList); } diff --git a/src/coreclr/src/jit/lower.cpp b/src/coreclr/src/jit/lower.cpp index cc9e2891e9a4..2867a7bacbb4 100644 --- a/src/coreclr/src/jit/lower.cpp +++ b/src/coreclr/src/jit/lower.cpp @@ -585,7 +585,7 @@ GenTree* Lowering::LowerSwitch(GenTree* node) // I think this is due to the fact that we use absolute addressing // instead of relative. But in CoreRT is used as a rule relative // addressing when we generate an executable. - // See also https://github.com/dotnet/coreclr/issues/13194 + // See also https://github.com/dotnet/runtime/issues/8683 // Also https://github.com/dotnet/coreclr/pull/13197 useJumpSequence = useJumpSequence || comp->IsTargetAbi(CORINFO_CORERT_ABI); #endif // defined(TARGET_UNIX) && defined(TARGET_ARM) @@ -3181,7 +3181,9 @@ void Lowering::LowerStoreLocCommon(GenTreeLclVarCommon* lclStore) // Create the assignment node. lclStore->ChangeOper(GT_STORE_OBJ); GenTreeBlk* objStore = lclStore->AsObj(); - objStore->gtFlags = GTF_ASG | GTF_IND_NONFAULTING | GTF_IND_TGT_NOT_HEAP; + // Only the GTF_LATE_ARG flag (if present) is preserved. + objStore->gtFlags &= GTF_LATE_ARG; + objStore->gtFlags |= GTF_ASG | GTF_IND_NONFAULTING | GTF_IND_TGT_NOT_HEAP; #ifndef JIT32_GCENCODER objStore->gtBlkOpGcUnsafe = false; #endif @@ -6450,41 +6452,7 @@ void Lowering::LowerIndir(GenTreeIndir* ind) if (ind->OperIs(GT_NULLCHECK) || ind->IsUnusedValue()) { - // A nullcheck is essentially the same as an indirection with no use. - // The difference lies in whether a target register must be allocated. - // On XARCH we can generate a compare with no target register as long as the addresss - // is not contained. - // On ARM64 we can generate a load to REG_ZR in all cases. - // However, on ARM we must always generate a load to a register. - // In the case where we require a target register, it is better to use GT_IND, since - // GT_NULLCHECK is a non-value node and would therefore require an internal register - // to use as the target. That is non-optimal because it will be modeled as conflicting - // with the source register(s). - // So, to summarize: - // - On ARM64, always use GT_NULLCHECK for a dead indirection. - // - On ARM, always use GT_IND. - // - On XARCH, use GT_IND if we have a contained address, and GT_NULLCHECK otherwise. - // In all cases, change the type to TYP_INT. - // - ind->gtType = TYP_INT; -#ifdef TARGET_ARM64 - bool useNullCheck = true; -#elif TARGET_ARM - bool useNullCheck = false; -#else // TARGET_XARCH - bool useNullCheck = !ind->Addr()->isContained(); -#endif // !TARGET_XARCH - - if (useNullCheck && ind->OperIs(GT_IND)) - { - ind->ChangeOper(GT_NULLCHECK); - ind->ClearUnusedValue(); - } - else if (!useNullCheck && ind->OperIs(GT_NULLCHECK)) - { - ind->ChangeOper(GT_IND); - ind->SetUnusedValue(); - } + TransformUnusedIndirection(ind, comp, m_block); } } else @@ -6496,6 +6464,55 @@ void Lowering::LowerIndir(GenTreeIndir* ind) } } +//------------------------------------------------------------------------ +// TransformUnusedIndirection: change the opcode and the type of the unused indirection. +// +// Arguments: +// ind - Indirection to transform. +// comp - Compiler instance. +// block - Basic block of the indirection. +// +void Lowering::TransformUnusedIndirection(GenTreeIndir* ind, Compiler* comp, BasicBlock* block) +{ + // A nullcheck is essentially the same as an indirection with no use. + // The difference lies in whether a target register must be allocated. + // On XARCH we can generate a compare with no target register as long as the addresss + // is not contained. + // On ARM64 we can generate a load to REG_ZR in all cases. + // However, on ARM we must always generate a load to a register. + // In the case where we require a target register, it is better to use GT_IND, since + // GT_NULLCHECK is a non-value node and would therefore require an internal register + // to use as the target. That is non-optimal because it will be modeled as conflicting + // with the source register(s). + // So, to summarize: + // - On ARM64, always use GT_NULLCHECK for a dead indirection. + // - On ARM, always use GT_IND. + // - On XARCH, use GT_IND if we have a contained address, and GT_NULLCHECK otherwise. + // In all cases, change the type to TYP_INT. + // + assert(ind->OperIs(GT_NULLCHECK, GT_IND, GT_BLK, GT_OBJ)); + + ind->gtType = TYP_INT; +#ifdef TARGET_ARM64 + bool useNullCheck = true; +#elif TARGET_ARM + bool useNullCheck = false; +#else // TARGET_XARCH + bool useNullCheck = !ind->Addr()->isContained(); +#endif // !TARGET_XARCH + + if (useNullCheck && !ind->OperIs(GT_NULLCHECK)) + { + comp->gtChangeOperToNullCheck(ind, block); + ind->ClearUnusedValue(); + } + else if (!useNullCheck && !ind->OperIs(GT_IND)) + { + ind->ChangeOper(GT_IND); + ind->SetUnusedValue(); + } +} + //------------------------------------------------------------------------ // LowerBlockStoreCommon: a common logic to lower STORE_OBJ/BLK/DYN_BLK. // diff --git a/src/coreclr/src/jit/lower.h b/src/coreclr/src/jit/lower.h index 0c620aebeb0f..ff13302c1ba9 100644 --- a/src/coreclr/src/jit/lower.h +++ b/src/coreclr/src/jit/lower.h @@ -8,6 +8,7 @@ XX Lower XX XX XX XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX +XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX */ #ifndef _LOWER_H_ @@ -326,12 +327,12 @@ class Lowering final : public Phase void LowerHWIntrinsicCmpOp(GenTreeHWIntrinsic* node, genTreeOps cmpOp); void LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node); void LowerHWIntrinsicDot(GenTreeHWIntrinsic* node); - void LowerFusedMultiplyAdd(GenTreeHWIntrinsic* node); - #if defined(TARGET_XARCH) + void LowerFusedMultiplyAdd(GenTreeHWIntrinsic* node); void LowerHWIntrinsicToScalar(GenTreeHWIntrinsic* node); #elif defined(TARGET_ARM64) bool IsValidConstForMovImm(GenTreeHWIntrinsic* node); + void LowerHWIntrinsicFusedMultiplyAddScalar(GenTreeHWIntrinsic* node); #endif // !TARGET_XARCH && !TARGET_ARM64 union VectorConstant { @@ -532,6 +533,8 @@ class Lowering final : public Phase bool IsContainableHWIntrinsicOp(GenTreeHWIntrinsic* containingNode, GenTree* node, bool* supportsRegOptional); #endif // FEATURE_HW_INTRINSICS + static void TransformUnusedIndirection(GenTreeIndir* ind, Compiler* comp, BasicBlock* block); + private: static bool NodesAreEquivalentLeaves(GenTree* candidate, GenTree* storeInd); diff --git a/src/coreclr/src/jit/lowerarmarch.cpp b/src/coreclr/src/jit/lowerarmarch.cpp index 2ab6fa947822..6d46545f718d 100644 --- a/src/coreclr/src/jit/lowerarmarch.cpp +++ b/src/coreclr/src/jit/lowerarmarch.cpp @@ -517,6 +517,76 @@ void Lowering::LowerSIMD(GenTreeSIMD* simdNode) #endif // FEATURE_SIMD #ifdef FEATURE_HW_INTRINSICS + +//---------------------------------------------------------------------------------------------- +// LowerHWIntrinsicFusedMultiplyAddScalar: Lowers AdvSimd_FusedMultiplyAddScalar intrinsics +// when some of the operands are negated by "containing" such negation. +// +// Arguments: +// node - The original hardware intrinsic node +// +// | op1 | op2 | op3 | +// | + | + | + | AdvSimd_FusedMultiplyAddScalar +// | + | + | - | AdvSimd_FusedMultiplySubtractScalar +// | + | - | + | AdvSimd_FusedMultiplySubtractScalar +// | + | - | - | AdvSimd_FusedMultiplyAddScalar +// | - | + | + | AdvSimd_FusedMultiplySubtractNegatedScalar +// | - | + | - | AdvSimd_FusedMultiplyAddNegatedScalar +// | - | - | + | AdvSimd_FusedMultiplyAddNegatedScalar +// | - | - | - | AdvSimd_FusedMultiplySubtractNegatedScalar +// +void Lowering::LowerHWIntrinsicFusedMultiplyAddScalar(GenTreeHWIntrinsic* node) +{ + assert(node->gtHWIntrinsicId == NI_AdvSimd_FusedMultiplyAddScalar); + + const HWIntrinsic intrin(node); + + GenTree* op1 = intrin.op1; + GenTree* op2 = intrin.op2; + GenTree* op3 = intrin.op3; + + auto lowerOperand = [this](GenTree* op) { + bool wasNegated = false; + + if (op->OperIsHWIntrinsic() && + ((op->AsHWIntrinsic()->gtHWIntrinsicId == NI_AdvSimd_Arm64_DuplicateToVector64) || + (op->AsHWIntrinsic()->gtHWIntrinsicId == NI_Vector64_CreateScalarUnsafe))) + { + GenTreeHWIntrinsic* createVector64 = op->AsHWIntrinsic(); + GenTree* valueOp = createVector64->gtGetOp1(); + + if (valueOp->OperIs(GT_NEG)) + { + createVector64->gtOp1 = valueOp->gtGetOp1(); + BlockRange().Remove(valueOp); + wasNegated = true; + } + } + + return wasNegated; + }; + + const bool op1WasNegated = lowerOperand(op1); + const bool op2WasNegated = lowerOperand(op2); + const bool op3WasNegated = lowerOperand(op3); + + if (op1WasNegated) + { + if (op2WasNegated != op3WasNegated) + { + node->gtHWIntrinsicId = NI_AdvSimd_FusedMultiplyAddNegatedScalar; + } + else + { + node->gtHWIntrinsicId = NI_AdvSimd_FusedMultiplySubtractNegatedScalar; + } + } + else if (op2WasNegated != op3WasNegated) + { + node->gtHWIntrinsicId = NI_AdvSimd_FusedMultiplySubtractScalar; + } +} + //---------------------------------------------------------------------------------------------- // Lowering::LowerHWIntrinsic: Perform containment analysis for a hardware intrinsic node. // @@ -573,6 +643,10 @@ void Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node) return; } + case NI_AdvSimd_FusedMultiplyAddScalar: + LowerHWIntrinsicFusedMultiplyAddScalar(node); + break; + default: break; } diff --git a/src/coreclr/src/jit/lowerxarch.cpp b/src/coreclr/src/jit/lowerxarch.cpp index 331c674eab9e..89ae55b003e8 100644 --- a/src/coreclr/src/jit/lowerxarch.cpp +++ b/src/coreclr/src/jit/lowerxarch.cpp @@ -592,7 +592,7 @@ void Lowering::LowerPutArgStk(GenTreePutArgStk* putArgStk) * * TODO-XArch-CQ: (Low-pri): Jit64 generates in-line code of 8 instructions for (i) above. * There are hardly any occurrences of this conversion operation in platform - * assemblies or in CQ perf benchmarks (1 occurrence in mscorlib, microsoft.jscript, + * assemblies or in CQ perf benchmarks (1 occurrence in corelib, microsoft.jscript, * 1 occurence in Roslyn and no occurrences in system, system.core, system.numerics * system.windows.forms, scimark, fractals, bio mums). If we ever find evidence that * doing this optimization is a win, should consider generating in-lined code. @@ -3439,8 +3439,7 @@ bool Lowering::IsRMWMemOpRootedAtStoreInd(GenTree* tree, GenTree** outIndirCandi assert(storeInd->IsRMWStatusUnknown()); // Early out if indirDst is not one of the supported memory operands. - if (indirDst->OperGet() != GT_LEA && indirDst->OperGet() != GT_LCL_VAR && indirDst->OperGet() != GT_LCL_VAR_ADDR && - indirDst->OperGet() != GT_CLS_VAR_ADDR && indirDst->OperGet() != GT_CNS_INT) + if (!indirDst->OperIs(GT_LEA, GT_LCL_VAR, GT_LCL_VAR_ADDR, GT_CLS_VAR_ADDR, GT_CNS_INT)) { storeInd->SetRMWStatus(STOREIND_RMW_UNSUPPORTED_ADDR); return false; @@ -4452,14 +4451,13 @@ bool Lowering::LowerRMWMemOp(GenTreeIndir* storeInd) } else { - assert(indirCandidateChild->OperGet() == GT_LCL_VAR || indirCandidateChild->OperGet() == GT_LCL_VAR_ADDR || - indirCandidateChild->OperGet() == GT_CLS_VAR_ADDR || indirCandidateChild->OperGet() == GT_CNS_INT); + assert(indirCandidateChild->OperIs(GT_LCL_VAR, GT_LCL_VAR_ADDR, GT_CLS_VAR_ADDR, GT_CNS_INT)); // If it is a GT_LCL_VAR, it still needs the reg to hold the address. // We would still need a reg for GT_CNS_INT if it doesn't fit within addressing mode base. // For GT_CLS_VAR_ADDR, we don't need a reg to hold the address, because field address value is known at jit // time. Also, we don't need a reg for GT_CLS_VAR_ADDR. - if (indirCandidateChild->OperGet() == GT_LCL_VAR_ADDR || indirCandidateChild->OperGet() == GT_CLS_VAR_ADDR) + if (indirCandidateChild->OperIs(GT_LCL_VAR_ADDR, GT_CLS_VAR_ADDR)) { indirDst->SetContained(); } @@ -4616,10 +4614,10 @@ void Lowering::ContainCheckIntrinsic(GenTreeOp* node) { assert(node->OperIs(GT_INTRINSIC)); - CorInfoIntrinsics intrinsicId = node->AsIntrinsic()->gtIntrinsicId; + NamedIntrinsic intrinsicName = node->AsIntrinsic()->gtIntrinsicName; - if (intrinsicId == CORINFO_INTRINSIC_Sqrt || intrinsicId == CORINFO_INTRINSIC_Round || - intrinsicId == CORINFO_INTRINSIC_Ceiling || intrinsicId == CORINFO_INTRINSIC_Floor) + if (intrinsicName == NI_System_Math_Sqrt || intrinsicName == NI_System_Math_Round || + intrinsicName == NI_System_Math_Ceiling || intrinsicName == NI_System_Math_Floor) { GenTree* op1 = node->gtGetOp1(); if (IsContainableMemoryOp(op1) || op1->IsCnsNonZeroFltOrDbl()) @@ -5138,7 +5136,7 @@ void Lowering::ContainCheckHWIntrinsicAddr(GenTreeHWIntrinsic* node, GenTree* ad { assert((addr->TypeGet() == TYP_I_IMPL) || (addr->TypeGet() == TYP_BYREF)); TryCreateAddrMode(addr, true); - if ((addr->OperIs(GT_CLS_VAR_ADDR, GT_LCL_VAR_ADDR, GT_LEA) || + if ((addr->OperIs(GT_CLS_VAR_ADDR, GT_LCL_VAR_ADDR, GT_LCL_FLD_ADDR, GT_LEA) || (addr->IsCnsIntOrI() && addr->AsIntConCommon()->FitsInAddrBase(comp))) && IsSafeToContainMem(node, addr)) { @@ -5354,7 +5352,7 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node) op2->SetRegOptional(); // TODO-XArch-CQ: For commutative nodes, either operand can be reg-optional. - // https://github.com/dotnet/coreclr/issues/6361 + // https://github.com/dotnet/runtime/issues/6358 } break; } @@ -5540,7 +5538,7 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node) // TODO-XArch-CQ: Technically any one of the three operands can // be reg-optional. With a limitation on op1 where // it can only be so if CopyUpperBits is off. - // https://github.com/dotnet/coreclr/issues/6361 + // https://github.com/dotnet/runtime/issues/6358 // 213 form: op1 = (op2 * op1) + op3 op3->SetRegOptional(); diff --git a/src/coreclr/src/jit/lsraarm.cpp b/src/coreclr/src/jit/lsraarm.cpp index f3b480fddd6d..8402bcdefc0f 100644 --- a/src/coreclr/src/jit/lsraarm.cpp +++ b/src/coreclr/src/jit/lsraarm.cpp @@ -290,10 +290,10 @@ int LinearScan::BuildNode(GenTree* tree) BuildUse(op1); srcCount = 1; - switch (tree->AsIntrinsic()->gtIntrinsicId) + switch (tree->AsIntrinsic()->gtIntrinsicName) { - case CORINFO_INTRINSIC_Abs: - case CORINFO_INTRINSIC_Sqrt: + case NI_System_Math_Abs: + case NI_System_Math_Sqrt: assert(dstCount == 1); BuildDef(tree); break; @@ -491,6 +491,8 @@ int LinearScan::BuildNode(GenTree* tree) case GT_RETURN: srcCount = BuildReturn(tree); + killMask = getKillSetForReturn(); + BuildDefsWithKills(tree, 0, RBM_NONE, killMask); break; case GT_RETFILT: diff --git a/src/coreclr/src/jit/lsraarm64.cpp b/src/coreclr/src/jit/lsraarm64.cpp index 514ed90feef4..2138b782c53e 100644 --- a/src/coreclr/src/jit/lsraarm64.cpp +++ b/src/coreclr/src/jit/lsraarm64.cpp @@ -319,11 +319,11 @@ int LinearScan::BuildNode(GenTree* tree) case GT_INTRINSIC: { - noway_assert((tree->AsIntrinsic()->gtIntrinsicId == CORINFO_INTRINSIC_Abs) || - (tree->AsIntrinsic()->gtIntrinsicId == CORINFO_INTRINSIC_Ceiling) || - (tree->AsIntrinsic()->gtIntrinsicId == CORINFO_INTRINSIC_Floor) || - (tree->AsIntrinsic()->gtIntrinsicId == CORINFO_INTRINSIC_Round) || - (tree->AsIntrinsic()->gtIntrinsicId == CORINFO_INTRINSIC_Sqrt)); + noway_assert((tree->AsIntrinsic()->gtIntrinsicName == NI_System_Math_Abs) || + (tree->AsIntrinsic()->gtIntrinsicName == NI_System_Math_Ceiling) || + (tree->AsIntrinsic()->gtIntrinsicName == NI_System_Math_Floor) || + (tree->AsIntrinsic()->gtIntrinsicName == NI_System_Math_Round) || + (tree->AsIntrinsic()->gtIntrinsicName == NI_System_Math_Sqrt)); // Both operand and its result must be of the same floating point type. GenTree* op1 = tree->gtGetOp1(); @@ -1050,9 +1050,24 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree) if (intrin.op1 != nullptr) { - // If we have an RMW intrinsic, we want to preference op1Reg to the target if - // op1 is not contained. - if (isRMW) + bool simdRegToSimdRegMove = false; + + if ((intrin.id == NI_Vector64_CreateScalarUnsafe) || (intrin.id == NI_Vector128_CreateScalarUnsafe)) + { + simdRegToSimdRegMove = varTypeIsFloating(intrin.op1); + } + else if (intrin.id == NI_AdvSimd_Arm64_DuplicateToVector64) + { + simdRegToSimdRegMove = (intrin.op1->TypeGet() == TYP_DOUBLE); + } + else if ((intrin.id == NI_Vector64_ToScalar) || (intrin.id == NI_Vector128_ToScalar)) + { + simdRegToSimdRegMove = varTypeIsFloating(intrinsicTree); + } + + // If we have an RMW intrinsic or an intrinsic with simple move semantic between two SIMD registers, + // we want to preference op1Reg to the target if op1 is not contained. + if (isRMW || simdRegToSimdRegMove) { tgtPrefOp1 = !intrin.op1->isContained(); } diff --git a/src/coreclr/src/jit/lsrabuild.cpp b/src/coreclr/src/jit/lsrabuild.cpp index d902b3a1ab4e..e2f8775f2bae 100644 --- a/src/coreclr/src/jit/lsrabuild.cpp +++ b/src/coreclr/src/jit/lsrabuild.cpp @@ -3443,10 +3443,15 @@ int LinearScan::BuildReturn(GenTree* tree) if (varTypeIsSIMD(tree) && !op1->IsMultiRegLclVar()) { useCandidates = allSIMDRegs(); + if (op1->OperGet() == GT_LCL_VAR) + { + assert(op1->TypeGet() != TYP_SIMD32); + useCandidates = RBM_DOUBLERET; + } BuildUse(op1, useCandidates); return 1; } -#endif // !TARGET_ARM64 +#endif // TARGET_ARM64 if (varTypeIsStruct(tree)) { diff --git a/src/coreclr/src/jit/lsraxarch.cpp b/src/coreclr/src/jit/lsraxarch.cpp index 8895dc95ecec..63f184a572c0 100644 --- a/src/coreclr/src/jit/lsraxarch.cpp +++ b/src/coreclr/src/jit/lsraxarch.cpp @@ -1280,6 +1280,11 @@ int LinearScan::BuildBlockStore(GenTreeBlk* blkNode) regMaskTP srcRegMask = RBM_NONE; regMaskTP sizeRegMask = RBM_NONE; + RefPosition* internalIntDef = nullptr; +#ifdef TARGET_X86 + bool internalIsByte = false; +#endif + if (blkNode->OperIsInitBlkOp()) { if (src->OperIs(GT_INIT_VAL)) @@ -1359,10 +1364,11 @@ int LinearScan::BuildBlockStore(GenTreeBlk* blkNode) if ((size & 1) != 0) { // We'll need to store a byte so a byte register is needed on x86. - regMask = allByteRegs(); + regMask = allByteRegs(); + internalIsByte = true; } #endif - buildInternalIntRegisterDefForNode(blkNode, regMask); + internalIntDef = buildInternalIntRegisterDefForNode(blkNode, regMask); } if (size >= XMM_REGSIZE_BYTES) @@ -1436,9 +1442,30 @@ int LinearScan::BuildBlockStore(GenTreeBlk* blkNode) BuildUse(blkNode->AsDynBlk()->gtDynamicSize, sizeRegMask); } +#ifdef TARGET_X86 + // If we require a byte register on x86, we may run into an over-constrained situation + // if we have BYTE_REG_COUNT or more uses (currently, it can be at most 4, if both the + // source and destination have base+index addressing). + // This is because the byteable register requirement doesn't "reserve" a specific register, + // and it would be possible for the incoming sources to all be occupying the byteable + // registers, leaving none free for the internal register. + // In this scenario, we will require rax to ensure that it is reserved and available. + // We need to make that modification prior to building the uses for the internal register, + // so that when we create the use we will also create the RefTypeFixedRef on the RegRecord. + // We don't expect a useCount of more than 3 for the initBlk case, so we haven't set + // internalIsByte in that case above. + assert((useCount < BYTE_REG_COUNT) || !blkNode->OperIsInitBlkOp()); + if (internalIsByte && (useCount >= BYTE_REG_COUNT)) + { + noway_assert(internalIntDef != nullptr); + internalIntDef->registerAssignment = RBM_RAX; + } +#endif + buildInternalRegisterUses(); regMaskTP killMask = getKillSetForBlockStore(blkNode); BuildDefsWithKills(blkNode, 0, RBM_NONE, killMask); + return useCount; } @@ -1594,6 +1621,15 @@ int LinearScan::BuildPutArgStk(GenTreePutArgStk* putArgStk) srcCount = BuildOperandUses(src); buildInternalRegisterUses(); + +#ifdef TARGET_X86 + // There are only 4 (BYTE_REG_COUNT) byteable registers on x86. If we require a byteable internal register, + // we must have less than BYTE_REG_COUNT sources. + // If we have BYTE_REG_COUNT or more sources, and require a byteable internal register, we need to reserve + // one explicitly (see BuildBlockStore()). + assert(srcCount < BYTE_REG_COUNT); +#endif + return srcCount; } #endif // FEATURE_PUT_STRUCT_ARG_STK @@ -1779,9 +1815,9 @@ int LinearScan::BuildIntrinsic(GenTree* tree) assert(op1->TypeGet() == tree->TypeGet()); RefPosition* internalFloatDef = nullptr; - switch (tree->AsIntrinsic()->gtIntrinsicId) + switch (tree->AsIntrinsic()->gtIntrinsicName) { - case CORINFO_INTRINSIC_Abs: + case NI_System_Math_Abs: // Abs(float x) = x & 0x7fffffff // Abs(double x) = x & 0x7ffffff ffffffff @@ -1798,16 +1834,16 @@ int LinearScan::BuildIntrinsic(GenTree* tree) break; #ifdef TARGET_X86 - case CORINFO_INTRINSIC_Cos: - case CORINFO_INTRINSIC_Sin: + case NI_System_Math_Cos: + case NI_System_Math_Sin: NYI_X86("Math intrinsics Cos and Sin"); break; #endif // TARGET_X86 - case CORINFO_INTRINSIC_Sqrt: - case CORINFO_INTRINSIC_Round: - case CORINFO_INTRINSIC_Ceiling: - case CORINFO_INTRINSIC_Floor: + case NI_System_Math_Sqrt: + case NI_System_Math_Round: + case NI_System_Math_Ceiling: + case NI_System_Math_Floor: break; default: @@ -2728,6 +2764,7 @@ int LinearScan::BuildIndir(GenTreeIndir* indirTree) } } } + #ifdef FEATURE_SIMD if (varTypeIsSIMD(indirTree)) { @@ -2736,6 +2773,16 @@ int LinearScan::BuildIndir(GenTreeIndir* indirTree) buildInternalRegisterUses(); #endif // FEATURE_SIMD +#ifdef TARGET_X86 + // There are only BYTE_REG_COUNT byteable registers on x86. If we have a source that requires + // such a register, we must have no more than BYTE_REG_COUNT sources. + // If we have more than BYTE_REG_COUNT sources, and require a byteable register, we need to reserve + // one explicitly (see BuildBlockStore()). + // (Note that the assert below doesn't count internal registers because we only have + // floating point internal registers, if any). + assert(srcCount <= BYTE_REG_COUNT); +#endif + if (indirTree->gtOper != GT_STOREIND) { BuildDef(indirTree); diff --git a/src/coreclr/src/jit/morph.cpp b/src/coreclr/src/jit/morph.cpp index c333a2799fe4..5430a2b02a96 100644 --- a/src/coreclr/src/jit/morph.cpp +++ b/src/coreclr/src/jit/morph.cpp @@ -2707,6 +2707,13 @@ void Compiler::fgInitArgInfo(GenTreeCall* call) indirectCellAddress->AsIntCon()->gtTargetHandle = (size_t)call->gtCallMethHnd; #endif indirectCellAddress->SetRegNum(REG_R2R_INDIRECT_PARAM); +#ifdef TARGET_ARM + // Issue #xxxx : Don't attempt to CSE this constant on ARM32 + // + // This constant has specific register requirements, and LSRA doesn't currently correctly + // handle them when the value is in a CSE'd local. + indirectCellAddress->SetDoNotCSE(); +#endif // TARGET_ARM // Push the stub address onto the list of arguments. call->gtCallArgs = gtPrependNewCallArg(indirectCellAddress, call->gtCallArgs); @@ -5735,7 +5742,9 @@ GenTree* Compiler::fgMorphStackArgForVarArgs(unsigned lclNum, var_types varType, GenTree* tree; if (varTypeIsStruct(varType)) { - tree = new (this, GT_BLK) GenTreeBlk(GT_BLK, TYP_STRUCT, ptrArg, typGetBlkLayout(varDsc->lvExactSize)); + CORINFO_CLASS_HANDLE typeHnd = varDsc->lvVerTypeInfo.GetClassHandle(); + assert(typeHnd != nullptr); + tree = gtNewObjNode(typeHnd, ptrArg); } else { @@ -5883,7 +5892,7 @@ GenTree* Compiler::fgMorphField(GenTree* tree, MorphAddrContext* mac) } #ifdef FEATURE_SIMD - // if this field belongs to simd struct, translate it to simd instrinsic. + // if this field belongs to simd struct, translate it to simd intrinsic. if (mac == nullptr) { GenTree* newTree = fgMorphFieldToSIMDIntrinsicGet(tree); @@ -6620,13 +6629,13 @@ void Compiler::fgMorphCallInlineHelper(GenTreeCall* call, InlineResult* result) // 1) If the callee has structs which cannot be enregistered it will be // reported as cannot fast tail call. This is an implementation limitation // where the callee only is checked for non enregisterable structs. This is -// tracked with https://github.com/dotnet/coreclr/issues/12644. +// tracked with https://github.com/dotnet/runtime/issues/8492. // // 2) If the caller or callee has stack arguments and the callee has more // arguments then the caller it will be reported as cannot fast tail call. // This is due to a bug in LowerFastTailCall which assumes that // nCalleeArgs <= nCallerArgs, which is always true on Windows Amd64. This -// is tracked with https://github.com/dotnet/coreclr/issues/12468. +// is tracked with https://github.com/dotnet/runtime/issues/8413. // // 3) If the callee has a 9 to 16 byte struct argument and the callee has // stack arguments, the decision will be to not fast tail call. This is @@ -6765,6 +6774,12 @@ bool Compiler::fgCanFastTailCall(GenTreeCall* callee, const char** failReason) return false; } + if (callee->IsStressTailCall()) + { + reportFastTailCallDecision("Fast tail calls are not performed under tail call stress"); + return false; + } + // Note on vararg methods: // If the caller is vararg method, we don't know the number of arguments passed by caller's caller. // But we can be sure that in-coming arg area of vararg caller would be sufficient to hold its @@ -7217,6 +7232,14 @@ GenTree* Compiler::fgMorphPotentialTailCall(GenTreeCall* call) // is set. This avoids the need for iterating through all lcl vars of the current // method. Right now throughout the code base we are not consistently using 'set' // method to set lvHasLdAddrOp and lvAddrExposed flags. + + bool isImplicitOrStressTailCall = call->IsImplicitTailCall() || call->IsStressTailCall(); + if (isImplicitOrStressTailCall && compLocallocUsed) + { + failTailCall("Localloc used"); + return nullptr; + } + bool hasStructParam = false; for (unsigned varNum = 0; varNum < lvaCount; varNum++) { @@ -7226,7 +7249,7 @@ GenTree* Compiler::fgMorphPotentialTailCall(GenTreeCall* call) // We still must check for any struct parameters and set 'hasStructParam' // so that we won't transform the recursive tail call into a loop. // - if (call->IsImplicitTailCall()) + if (isImplicitOrStressTailCall) { if (varDsc->lvHasLdAddrOp && !lvaIsImplicitByRefLocal(varNum)) { @@ -7677,6 +7700,8 @@ GenTree* Compiler::fgMorphTailCallViaHelpers(GenTreeCall* call, CORINFO_TAILCALL assert(!call->IsImplicitTailCall()); assert(!fgCanFastTailCall(call, nullptr)); + bool virtualCall = call->IsVirtual(); + // If VSD then get rid of arg to VSD since we turn this into a direct call. // The extra arg will be the first arg so this needs to be done before we // handle the retbuf below. @@ -7711,41 +7736,39 @@ GenTree* Compiler::fgMorphTailCallViaHelpers(GenTreeCall* call, CORINFO_TAILCALL // where we pass instantiating stub. if ((help.flags & CORINFO_TAILCALL_STORE_TARGET) != 0) { - // If asked to store target and we have a type arg we will store - // instantiating stub, so in that case we should not pass the type arg. - if (call->tailCallInfo->GetSig()->hasTypeArg()) + JITDUMP("Adding target since VM requested it\n"); + GenTree* target; + if (!virtualCall) { - JITDUMP("Removing type arg"); - - assert(call->gtCallArgs != nullptr); - if (Target::g_tgtArgOrder == Target::ARG_ORDER_R2L) + if (call->gtCallType == CT_INDIRECT) { - // Generic context is first arg - call->gtCallArgs = call->gtCallArgs->GetNext(); + noway_assert(call->gtCallAddr != nullptr); + target = call->gtCallAddr; } else { - // Generic context is last arg - GenTreeCall::Use** lastArgSlot = &call->gtCallArgs; - while ((*lastArgSlot)->GetNext() != nullptr) + CORINFO_CONST_LOOKUP addrInfo; + info.compCompHnd->getFunctionEntryPoint(call->gtCallMethHnd, &addrInfo); + + CORINFO_GENERIC_HANDLE handle = nullptr; + void* pIndirection = nullptr; + assert(addrInfo.accessType != IAT_PPVALUE && addrInfo.accessType != IAT_RELPVALUE); + + if (addrInfo.accessType == IAT_VALUE) { - lastArgSlot = &(*lastArgSlot)->NextRef(); + handle = addrInfo.handle; } - - *lastArgSlot = nullptr; + else if (addrInfo.accessType == IAT_PVALUE) + { + pIndirection = addrInfo.addr; + } + target = gtNewIconEmbHndNode(handle, pIndirection, GTF_ICON_FTN_ADDR, call->gtCallMethHnd); } - call->fgArgInfo = nullptr; - } - - JITDUMP("Adding target since VM requested it\n"); - GenTree* target; - if (call->tailCallInfo->IsCalli()) - { - noway_assert(call->gtCallType == CT_INDIRECT && call->gtCallAddr != nullptr); - target = call->gtCallAddr; } else { + assert(!call->tailCallInfo->GetSig()->hasTypeArg()); + CORINFO_CALL_INFO callInfo; unsigned flags = CORINFO_CALLINFO_LDFTN; if (call->tailCallInfo->IsCallvirt()) @@ -7755,19 +7778,10 @@ GenTree* Compiler::fgMorphTailCallViaHelpers(GenTreeCall* call, CORINFO_TAILCALL eeGetCallInfo(call->tailCallInfo->GetToken(), nullptr, (CORINFO_CALLINFO_FLAGS)flags, &callInfo); - if (!call->tailCallInfo->IsCallvirt() || - ((callInfo.methodFlags & (CORINFO_FLG_FINAL | CORINFO_FLG_STATIC)) != 0) || - ((callInfo.methodFlags & CORINFO_FLG_VIRTUAL) == 0)) - { - target = getMethodPointerTree(call->tailCallInfo->GetToken(), &callInfo); - } - else - { - assert(call->gtCallThisArg != nullptr); - // TODO: Proper cloning of the this pointer. - target = getVirtMethodPointerTree(gtCloneExpr(call->gtCallThisArg->GetNode()), - call->tailCallInfo->GetToken(), &callInfo); - } + assert(call->gtCallThisArg != nullptr); + // TODO: Proper cloning of the this pointer. + target = getVirtMethodPointerTree(gtCloneExpr(call->gtCallThisArg->GetNode()), + call->tailCallInfo->GetToken(), &callInfo); } // Insert target as last arg @@ -7894,21 +7908,54 @@ GenTree* Compiler::fgCreateCallDispatcherAndGetResult(GenTreeCall* orig // Add return value arg. GenTree* retValArg; - GenTree* retVal = nullptr; - unsigned int newRetLcl = BAD_VAR_NUM; + GenTree* retVal = nullptr; + unsigned int newRetLcl = BAD_VAR_NUM; + GenTree* copyToRetBufNode = nullptr; - // Use existing retbuf if there is one. if (origCall->HasRetBufArg()) { JITDUMP("Transferring retbuf\n"); GenTree* retBufArg = origCall->gtCallArgs->GetNode(); - assert((info.compRetBuffArg != BAD_VAR_NUM) && retBufArg->OperIsLocal() && - (retBufArg->AsLclVarCommon()->GetLclNum() == info.compRetBuffArg)); - retValArg = retBufArg; + assert(info.compRetBuffArg != BAD_VAR_NUM); + assert(retBufArg->OperIsLocal()); + assert(retBufArg->AsLclVarCommon()->GetLclNum() == info.compRetBuffArg); + + if (info.compRetBuffDefStack) + { + // Use existing retbuf. + retValArg = retBufArg; + } + else + { + // Caller return buffer argument retBufArg can point to GC heap while the dispatcher expects + // the return value argument retValArg to point to the stack. + // We use a temporary stack allocated return buffer to hold the value during the dispatcher call + // and copy the value back to the caller return buffer after that. + unsigned int tmpRetBufNum = lvaGrabTemp(true DEBUGARG("substitute local for return buffer")); + + constexpr bool unsafeValueClsCheck = false; + lvaSetStruct(tmpRetBufNum, origCall->gtRetClsHnd, unsafeValueClsCheck); + lvaSetVarAddrExposed(tmpRetBufNum); + + var_types tmpRetBufType = lvaGetDesc(tmpRetBufNum)->TypeGet(); + + retValArg = gtNewOperNode(GT_ADDR, TYP_I_IMPL, gtNewLclvNode(tmpRetBufNum, tmpRetBufType)); + + var_types callerRetBufType = lvaGetDesc(info.compRetBuffArg)->TypeGet(); + + GenTree* dstAddr = gtNewLclvNode(info.compRetBuffArg, callerRetBufType); + GenTree* dst = gtNewObjNode(info.compMethodInfo->args.retTypeClass, dstAddr); + GenTree* src = gtNewLclvNode(tmpRetBufNum, tmpRetBufType); + + constexpr bool isVolatile = false; + constexpr bool isCopyBlock = true; + copyToRetBufNode = gtNewBlkOpNode(dst, src, isVolatile, isCopyBlock); + } + if (origCall->gtType != TYP_VOID) { - retVal = gtClone(retValArg); + retVal = gtClone(retBufArg); } } else if (origCall->gtType != TYP_VOID) @@ -7962,46 +8009,30 @@ GenTree* Compiler::fgCreateCallDispatcherAndGetResult(GenTreeCall* orig GenTree* retAddrSlot = gtNewOperNode(GT_ADDR, TYP_I_IMPL, gtNewLclvNode(lvaRetAddrVar, TYP_I_IMPL)); callDispatcherNode->gtCallArgs = gtPrependNewCallArg(retAddrSlot, callDispatcherNode->gtCallArgs); + GenTree* finalTree = callDispatcherNode; + + if (copyToRetBufNode != nullptr) + { + finalTree = gtNewOperNode(GT_COMMA, TYP_VOID, callDispatcherNode, copyToRetBufNode); + } + if (origCall->gtType == TYP_VOID) { - return callDispatcherNode; + return finalTree; } assert(retVal != nullptr); - GenTree* comma = gtNewOperNode(GT_COMMA, origCall->TypeGet(), callDispatcherNode, retVal); + finalTree = gtNewOperNode(GT_COMMA, origCall->TypeGet(), finalTree, retVal); + // The JIT seems to want to CSE this comma and messes up multi-reg ret // values in the process. Just avoid CSE'ing this tree entirely in that // case. if (origCall->HasMultiRegRetVal()) { - comma->gtFlags |= GTF_DONT_CSE; + finalTree->gtFlags |= GTF_DONT_CSE; } - return comma; -} - -//------------------------------------------------------------------------ -// getMethodPointerTree: get a method pointer tree -// -// Arguments: -// pResolvedToken - resolved token of the call -// pCallInfo - the call info of the call -// -// Return Value: -// A node representing the method pointer -// -GenTree* Compiler::getMethodPointerTree(CORINFO_RESOLVED_TOKEN* pResolvedToken, CORINFO_CALL_INFO* pCallInfo) -{ - switch (pCallInfo->kind) - { - case CORINFO_CALL: - return new (this, GT_FTN_ADDR) GenTreeFptrVal(TYP_I_IMPL, pCallInfo->hMethod); - case CORINFO_CALL_CODE_POINTER: - return getLookupTree(pResolvedToken, &pCallInfo->codePointerLookup, GTF_ICON_FTN_ADDR, pCallInfo->hMethod); - default: - noway_assert(!"unknown call kind"); - return nullptr; - } + return finalTree; } //------------------------------------------------------------------------ @@ -8743,7 +8774,7 @@ GenTree* Compiler::fgMorphCall(GenTreeCall* call) assert(!call->CanTailCall()); #if FEATURE_MULTIREG_RET - if (fgGlobalMorph && call->HasMultiRegRetVal()) + if (fgGlobalMorph && call->HasMultiRegRetVal() && varTypeIsStruct(call->TypeGet())) { // The tail call has been rejected so we must finish the work deferred // by impFixupCallStructReturn for multi-reg-returning calls and transform @@ -8760,23 +8791,14 @@ GenTree* Compiler::fgMorphCall(GenTreeCall* call) lvaGrabTemp(false DEBUGARG("Return value temp for multi-reg return (rejected tail call).")); lvaTable[tmpNum].lvIsMultiRegRet = true; - GenTree* assg = nullptr; - if (varTypeIsStruct(call->TypeGet())) - { - CORINFO_CLASS_HANDLE structHandle = call->gtRetClsHnd; - assert(structHandle != NO_CLASS_HANDLE); - const bool unsafeValueClsCheck = false; - lvaSetStruct(tmpNum, structHandle, unsafeValueClsCheck); - var_types structType = lvaTable[tmpNum].lvType; - GenTree* dst = gtNewLclvNode(tmpNum, structType); - assg = gtNewAssignNode(dst, call); - } - else - { - assg = gtNewTempAssign(tmpNum, call); - } - - assg = fgMorphTree(assg); + CORINFO_CLASS_HANDLE structHandle = call->gtRetClsHnd; + assert(structHandle != NO_CLASS_HANDLE); + const bool unsafeValueClsCheck = false; + lvaSetStruct(tmpNum, structHandle, unsafeValueClsCheck); + var_types structType = lvaTable[tmpNum].lvType; + GenTree* dst = gtNewLclvNode(tmpNum, structType); + GenTree* assg = gtNewAssignNode(dst, call); + assg = fgMorphTree(assg); // Create the assignment statement and insert it before the current statement. Statement* assgStmt = gtNewStmt(assg, compCurStmt->GetILOffsetX()); @@ -8898,7 +8920,7 @@ GenTree* Compiler::fgMorphCall(GenTreeCall* call) GenTree* dest = call->gtCallArgs->GetNode(); assert(dest->OperGet() != GT_ARGPLACE); // If it was, we'd be in a remorph, which we've already excluded above. - if (dest->gtType == TYP_BYREF && !(dest->OperGet() == GT_ADDR && dest->AsOp()->gtOp1->OperGet() == GT_LCL_VAR)) + if (dest->TypeIs(TYP_BYREF) && !dest->IsLocalAddrExpr()) { // We'll exempt helper calls from this, assuming that the helper implementation // follows the old convention, and does whatever barrier is required. @@ -12159,7 +12181,7 @@ GenTree* Compiler::fgMorphSmpOp(GenTree* tree, MorphAddrContext* mac) #ifdef TARGET_ARM case GT_INTRINSIC: - if (tree->AsIntrinsic()->gtIntrinsicId == CORINFO_INTRINSIC_Round) + if (tree->AsIntrinsic()->gtIntrinsicName == NI_System_Math_Round) { switch (tree->TypeGet()) { @@ -13633,6 +13655,11 @@ GenTree* Compiler::fgMorphSmpOp(GenTree* tree, MorphAddrContext* mac) unsigned lclNum = temp->AsLclVarCommon()->GetLclNum(); LclVarDsc* varDsc = &lvaTable[lclNum]; + // Note that fgMorph uses GTF_DONT_CSE to mark the left side of an assignment + // Thus stores have this flag and load do not have this flag + // + bool isLoad = (tree->gtFlags & GTF_DONT_CSE) == 0; + // We will try to optimize when we have a promoted struct promoted with a zero lvFldOffset if (varDsc->lvPromoted && (varDsc->lvFldOffset == 0)) { @@ -13659,15 +13686,31 @@ GenTree* Compiler::fgMorphSmpOp(GenTree* tree, MorphAddrContext* mac) } // If the type of the IND (typ) is a "small int", and the type of the local has the // same width, then we can reduce to just the local variable -- it will be - // correctly normalized, and signed/unsigned differences won't matter. + // correctly normalized. // // The below transformation cannot be applied if the local var needs to be normalized on load. else if (varTypeIsSmall(typ) && (genTypeSize(lvaTable[lclNum].lvType) == genTypeSize(typ)) && !lvaTable[lclNum].lvNormalizeOnLoad()) { - tree->gtType = typ = temp->TypeGet(); - foldAndReturnTemp = true; + // For any stores of small types, we will force loads to be normalized + // this is necessary as we need to zero/sign extend any load + // after this kind of store. + // + if (!isLoad) + { + varDsc->lvForceLoadNormalize = true; + } + // otherwise we have a load operation + // + // And for loads signed/unsigned differences do matter. + // + else if (varTypeIsUnsigned(lvaTable[lclNum].lvType) == varTypeIsUnsigned(typ)) + { + tree->gtType = typ = temp->TypeGet(); + foldAndReturnTemp = true; + } } + // For matching types we can fold else if (!varTypeIsStruct(typ) && (lvaTable[lclNum].lvType == typ) && !lvaTable[lclNum].lvNormalizeOnLoad()) { diff --git a/src/coreclr/src/jit/namedintrinsiclist.h b/src/coreclr/src/jit/namedintrinsiclist.h index 63d79fbeff19..8ab264e36061 100644 --- a/src/coreclr/src/jit/namedintrinsiclist.h +++ b/src/coreclr/src/jit/namedintrinsiclist.h @@ -12,14 +12,34 @@ enum NamedIntrinsic : unsigned short NI_System_Enum_HasFlag, NI_System_Math_FusedMultiplyAdd, + NI_System_Math_Sin, + NI_System_Math_Cos, + NI_System_Math_Cbrt, + NI_System_Math_Sqrt, + NI_System_Math_Abs, NI_System_Math_Round, - NI_System_MathF_FusedMultiplyAdd, - NI_System_MathF_Round, + NI_System_Math_Cosh, + NI_System_Math_Sinh, + NI_System_Math_Tan, + NI_System_Math_Tanh, + NI_System_Math_Asin, + NI_System_Math_Asinh, + NI_System_Math_Acos, + NI_System_Math_Acosh, + NI_System_Math_Atan, + NI_System_Math_Atan2, + NI_System_Math_Atanh, + NI_System_Math_Log10, + NI_System_Math_Pow, + NI_System_Math_Exp, + NI_System_Math_Ceiling, + NI_System_Math_Floor, NI_System_Collections_Generic_EqualityComparer_get_Default, NI_System_Buffers_Binary_BinaryPrimitives_ReverseEndianness, NI_System_GC_KeepAlive, NI_System_Type_get_IsValueType, NI_System_Type_IsAssignableFrom, + NI_System_Type_IsAssignableTo, // These are used by HWIntrinsics but are defined more generally // to allow dead code optimization and handle the recursion case diff --git a/src/coreclr/src/jit/optcse.cpp b/src/coreclr/src/jit/optcse.cpp index 46e834df0dc4..06339353826b 100644 --- a/src/coreclr/src/jit/optcse.cpp +++ b/src/coreclr/src/jit/optcse.cpp @@ -20,7 +20,9 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX /*****************************************************************************/ /* static */ -const size_t Compiler::s_optCSEhashSize = EXPSET_SZ * 2; +const size_t Compiler::s_optCSEhashSizeInitial = EXPSET_SZ * 2; +const size_t Compiler::s_optCSEhashGrowthFactor = 2; +const size_t Compiler::s_optCSEhashBucketSize = 4; /***************************************************************************** * @@ -36,11 +38,11 @@ void Compiler::optCSEstop() CSEdsc* dsc; CSEdsc** ptr; - unsigned cnt; + size_t cnt; optCSEtab = new (this, CMK_CSE) CSEdsc*[optCSECandidateCount](); - for (cnt = s_optCSEhashSize, ptr = optCSEhash; cnt; cnt--, ptr++) + for (cnt = optCSEhashSize, ptr = optCSEhash; cnt; cnt--, ptr++) { for (dsc = *ptr; dsc; dsc = dsc->csdNextInBucket) { @@ -373,7 +375,11 @@ void Compiler::optValnumCSE_Init() cseMaskTraits = nullptr; // Allocate and clear the hash bucket table - optCSEhash = new (this, CMK_CSE) CSEdsc*[s_optCSEhashSize](); + optCSEhash = new (this, CMK_CSE) CSEdsc*[s_optCSEhashSizeInitial](); + + optCSEhashSize = s_optCSEhashSizeInitial; + optCSEhashMaxCountBeforeResize = optCSEhashSize * s_optCSEhashBucketSize; + optCSEhashCount = 0; optCSECandidateCount = 0; optDoCSE = false; // Stays false until we find duplicate CSE tree @@ -382,6 +388,20 @@ void Compiler::optValnumCSE_Init() optCseCheckedBoundMap = nullptr; } +unsigned optCSEKeyToHashIndex(size_t key, size_t optCSEhashSize) +{ + unsigned hash; + + hash = (unsigned)key; +#ifdef TARGET_64BIT + hash ^= (unsigned)(key >> 32); +#endif + hash *= (unsigned)(optCSEhashSize + 1); + hash >>= 7; + + return hash % optCSEhashSize; +} + //--------------------------------------------------------------------------- // optValnumCSE_Index: // - Returns the CSE index to use for this tree, @@ -401,10 +421,26 @@ void Compiler::optValnumCSE_Init() // unsigned Compiler::optValnumCSE_Index(GenTree* tree, Statement* stmt) { - unsigned key; - unsigned hash; + size_t key; unsigned hval; CSEdsc* hashDsc; + bool isIntConstHash = false; + bool enableSharedConstCSE = false; + int configValue = JitConfig.JitConstCSE(); + +#if defined(TARGET_ARM64) + // ARM64 - allow to combine with nearby offsets, when config is not 2 or 4 + if ((configValue != CONST_CSE_ENABLE_ARM64_NO_SHARING) && (configValue != CONST_CSE_ENABLE_ALL_NO_SHARING)) + { + enableSharedConstCSE = true; + } +#endif // TARGET_ARM64 + + // All Platforms - also allow to combine with nearby offsets, when config is 3 + if (configValue == CONST_CSE_ENABLE_ALL) + { + enableSharedConstCSE = true; + } // We use the liberal Value numbers when building the set of CSE ValueNum vnLib = tree->GetVN(VNK_Liberal); @@ -446,11 +482,11 @@ unsigned Compiler::optValnumCSE_Index(GenTree* tree, Statement* stmt) // if (vnOp2Lib != vnLib) { - key = (unsigned)vnLib; // include the exc set in the hash key + key = vnLib; // include the exc set in the hash key } else { - key = (unsigned)vnLibNorm; + key = vnLibNorm; } // If we didn't do the above we would have op1 as the CSE def @@ -459,18 +495,33 @@ unsigned Compiler::optValnumCSE_Index(GenTree* tree, Statement* stmt) // assert(vnLibNorm == vnStore->VNNormalValue(vnOp2Lib)); } - else // Not a GT_COMMA + else if (enableSharedConstCSE && tree->IsIntegralConst()) + { + assert(vnStore->IsVNConstant(vnLibNorm)); + key = vnStore->CoercedConstantValue(vnLibNorm); + + // We don't shared small offset constants when we require a reloc + if (!tree->AsIntConCommon()->ImmedValNeedsReloc(this)) + { + // Make constants that have the same upper bits use the same key + + // Shift the key right by CSE_CONST_SHARED_LOW_BITS bits, this sets the upper bits to zero + key >>= CSE_CONST_SHARED_LOW_BITS; + } + assert((key & TARGET_SIGN_BIT) == 0); + + // We use the sign bit of 'key' as the flag + // that we are hashing constants (with a shared offset) + key |= TARGET_SIGN_BIT; + } + else // Not a GT_COMMA or a GT_CNS_INT { - key = (unsigned)vnLibNorm; + key = vnLibNorm; } // Compute the hash value for the expression - hash = key; - hash *= (unsigned)(s_optCSEhashSize + 1); - hash >>= 7; - - hval = hash % s_optCSEhashSize; + hval = optCSEKeyToHashIndex(key, optCSEhashSize); /* Look for a matching index in the hash table */ @@ -480,6 +531,12 @@ unsigned Compiler::optValnumCSE_Index(GenTree* tree, Statement* stmt) { if (hashDsc->csdHashKey == key) { + // Check for mismatched types on GT_CNS_INT nodes + if ((tree->OperGet() == GT_CNS_INT) && (tree->TypeGet() != hashDsc->csdTree->TypeGet())) + { + continue; + } + treeStmtLst* newElem; /* Have we started the list of matching nodes? */ @@ -582,9 +639,42 @@ unsigned Compiler::optValnumCSE_Index(GenTree* tree, Statement* stmt) if (optCSECandidateCount < MAX_CSE_CNT) { + if (optCSEhashCount == optCSEhashMaxCountBeforeResize) + { + size_t newOptCSEhashSize = optCSEhashSize * s_optCSEhashGrowthFactor; + CSEdsc** newOptCSEhash = new (this, CMK_CSE) CSEdsc*[newOptCSEhashSize](); + + // Iterate through each existing entry, moving to the new table + CSEdsc** ptr; + CSEdsc* dsc; + size_t cnt; + for (cnt = optCSEhashSize, ptr = optCSEhash; cnt; cnt--, ptr++) + { + for (dsc = *ptr; dsc;) + { + CSEdsc* nextDsc = dsc->csdNextInBucket; + + size_t newHval = optCSEKeyToHashIndex(dsc->csdHashKey, newOptCSEhashSize); + + // Move CSEdsc to bucket in enlarged table + dsc->csdNextInBucket = newOptCSEhash[newHval]; + newOptCSEhash[newHval] = dsc; + + dsc = nextDsc; + } + } + + optCSEhash = newOptCSEhash; + optCSEhashSize = newOptCSEhashSize; + optCSEhashMaxCountBeforeResize = optCSEhashMaxCountBeforeResize * s_optCSEhashGrowthFactor; + } + + ++optCSEhashCount; hashDsc = new (this, CMK_CSE) CSEdsc; hashDsc->csdHashKey = key; + hashDsc->csdConstDefValue = 0; + hashDsc->csdConstDefVN = vnStore->VNForNull(); // uninit value hashDsc->csdIndex = 0; hashDsc->csdLiveAcrossCall = false; hashDsc->csdDefCount = 0; @@ -645,8 +735,17 @@ unsigned Compiler::optValnumCSE_Index(GenTree* tree, Statement* stmt) #ifdef DEBUG if (verbose) { - printf("\nCSE candidate #%02u, vn=", CSEindex); - vnPrint(key, 0); + printf("\nCSE candidate #%02u, key=", CSEindex); + if (!Compiler::Is_Shared_Const_CSE(key)) + { + vnPrint((unsigned)key, 0); + } + else + { + size_t kVal = Compiler::Decode_Shared_Const_CSE_Value(key); + printf("K_%p", dspPtr(kVal)); + } + printf(" in " FMT_BB ", [cost=%2u, size=%2u]: \n", compCurBB->bbNum, tree->GetCostEx(), tree->GetCostSz()); gtDispTree(tree); } @@ -666,6 +765,29 @@ unsigned Compiler::optValnumCSE_Locate() { // Locate CSE candidates and assign them indices + bool enableConstCSE = true; + + int configValue = JitConfig.JitConstCSE(); + + // all platforms - disable CSE of constant values when config is 1 + if (configValue == CONST_CSE_DISABLE_ALL) + { + enableConstCSE = false; + } + +#if !defined(TARGET_ARM64) + // non-ARM64 platforms - disable by default + // + enableConstCSE = false; + + // Check for the two enable cases for all platforms + // + if ((configValue == CONST_CSE_ENABLE_ALL) || (configValue == CONST_CSE_ENABLE_ALL_NO_SHARING)) + { + enableConstCSE = true; + } +#endif + for (BasicBlock* block = fgFirstBB; block != nullptr; block = block->bbNext) { /* Make the block publicly available */ @@ -691,6 +813,16 @@ unsigned Compiler::optValnumCSE_Locate() optCseUpdateCheckedBoundMap(tree); } + // Don't allow CSE of constants if it is disabled + // + if (tree->IsIntegralConst()) + { + if (!enableConstCSE) + { + continue; + } + } + if (!optIsCSEcandidate(tree)) { continue; @@ -701,15 +833,17 @@ unsigned Compiler::optValnumCSE_Locate() continue; } - // Don't CSE constant values, instead let the Value Number - // based Assertion Prop phase handle them. Here, unlike - // the rest of optCSE, we use the conservative value number + // We want to CSE simple constant leaf nodes, but we don't want to + // CSE non-leaf trees that compute CSE constant values. + // Instead we let the Value Number based Assertion Prop phase handle them. + // + // Here, unlike the rest of optCSE, we use the conservative value number // rather than the liberal one, since the conservative one // is what the Value Number based Assertion Prop will use // and the point is to avoid optimizing cases that it will // handle. // - if (vnStore->IsVNConstant(vnStore->VNConservativeNormalValue(tree->gtVNPair))) + if (!tree->OperIsLeaf() && vnStore->IsVNConstant(vnStore->VNConservativeNormalValue(tree->gtVNPair))) { continue; } @@ -1428,23 +1562,28 @@ void Compiler::optValnumCSE_Availablity() } } - // Record or update the value of desc->defConservNormVN + // For shared const CSE we don't set/use the defConservNormVN // - ValueNum theConservNormVN = vnStore->VNConservativeNormalValue(tree->gtVNPair); - - // Is defConservNormVN still set to the uninit marker value of VNForNull() ? - if (desc->defConservNormVN == vnStore->VNForNull()) - { - // This is the first def that we have visited, set defConservNormVN - desc->defConservNormVN = theConservNormVN; - } - else + if (!Is_Shared_Const_CSE(desc->csdHashKey)) { - // Check to see if all defs have the same conservative normal VN - if (theConservNormVN != desc->defConservNormVN) + // Record or update the value of desc->defConservNormVN + // + ValueNum theConservNormVN = vnStore->VNConservativeNormalValue(tree->gtVNPair); + + // Is defConservNormVN still set to the uninit marker value of VNForNull() ? + if (desc->defConservNormVN == vnStore->VNForNull()) + { + // This is the first def that we have visited, set defConservNormVN + desc->defConservNormVN = theConservNormVN; + } + else { - // This candidate has defs with differing conservative normal VNs, mark it with NoVN - desc->defConservNormVN = ValueNumStore::NoVN; // record the marker for differing VNs + // Check to see if all defs have the same conservative normal VN + if (theConservNormVN != desc->defConservNormVN) + { + // This candidate has defs with differing conservative normal VNs, mark it with NoVN + desc->defConservNormVN = ValueNumStore::NoVN; // record the marker for differing VNs + } } } @@ -1894,9 +2033,19 @@ class CSE_Heuristic cost = dsc->csdTree->GetCostEx(); } - printf("CSE #%02u, {$%-3x, $%-3x} useCnt=%d: [def=%3u, use=%3u, cost=%3u%s]\n :: ", - dsc->csdIndex, dsc->csdHashKey, dsc->defExcSetPromise, dsc->csdUseCount, def, use, cost, - dsc->csdLiveAcrossCall ? ", call" : " "); + if (!Compiler::Is_Shared_Const_CSE(dsc->csdHashKey)) + { + printf("CSE #%02u, {$%-3x, $%-3x} useCnt=%d: [def=%3u, use=%3u, cost=%3u%s]\n :: ", + dsc->csdIndex, dsc->csdHashKey, dsc->defExcSetPromise, dsc->csdUseCount, def, use, cost, + dsc->csdLiveAcrossCall ? ", call" : " "); + } + else + { + size_t kVal = Compiler::Decode_Shared_Const_CSE_Value(dsc->csdHashKey); + printf("CSE #%02u, {K_%p} useCnt=%d: [def=%3u, use=%3u, cost=%3u%s]\n :: ", dsc->csdIndex, + dspPtr(kVal), dsc->csdUseCount, def, use, cost, + dsc->csdLiveAcrossCall ? ", call" : " "); + } m_pCompiler->gtDispTree(expr, nullptr, nullptr, true); } @@ -2377,8 +2526,9 @@ class CSE_Heuristic #ifdef DEBUG if (m_pCompiler->verbose) { - printf("Moderate CSE Promotion (CSE is live across a call) (%u >= %u)\n", cseRefCnt, - moderateRefCnt); + printf("Moderate CSE Promotion (%s) (%u >= %u)\n", + candidate->LiveAcrossCall() ? "CSE is live across a call" : "not enregisterable", + cseRefCnt, moderateRefCnt); } #endif cse_def_cost = 2; @@ -2409,8 +2559,9 @@ class CSE_Heuristic #ifdef DEBUG if (m_pCompiler->verbose) { - printf("Conservative CSE Promotion (CSE never live at call) (%u < %u)\n", cseRefCnt, - moderateRefCnt); + printf("Conservative CSE Promotion (%s) (%u < %u)\n", + candidate->LiveAcrossCall() ? "CSE is live across a call" : "not enregisterable", + cseRefCnt, moderateRefCnt); } #endif cse_def_cost = 2; @@ -2660,8 +2811,7 @@ class CSE_Heuristic // // Later we will unmark any nested CSE's for the CSE uses. // - Compiler::CSEdsc* dsc = successfulCandidate->CseDsc(); - Compiler::treeStmtLst* lst; + Compiler::CSEdsc* dsc = successfulCandidate->CseDsc(); // If there's just a single def for the CSE, we'll put this // CSE into SSA form on the fly. We won't need any PHIs. @@ -2678,53 +2828,122 @@ class CSE_Heuristic cseSsaNum = m_pCompiler->lvaTable[cseLclVarNum].lvPerSsaData.AllocSsaNum(allocator); } -#ifdef DEBUG // Verify that all of the ValueNumbers in this list are correct as // Morph will change them when it performs a mutating operation. // - ValueNum firstVN = ValueNumStore::NoVN; - ValueNum currVN; - bool allSame = true; + bool setRefCnt = true; + bool allSame = true; + bool isSharedConst = Compiler::Is_Shared_Const_CSE(dsc->csdHashKey); + ValueNum bestVN = ValueNumStore::NoVN; + bool bestIsDef = false; + ssize_t bestConstValue = 0; + Compiler::treeStmtLst* lst = dsc->csdTreeList; - lst = dsc->csdTreeList; while (lst != nullptr) { // Ignore this node if the gtCSEnum value has been cleared if (IS_CSE_INDEX(lst->tslTree->gtCSEnum)) { // We used the liberal Value numbers when building the set of CSE - currVN = m_pCompiler->vnStore->VNLiberalNormalValue(lst->tslTree->gtVNPair); + ValueNum currVN = m_pCompiler->vnStore->VNLiberalNormalValue(lst->tslTree->gtVNPair); assert(currVN != ValueNumStore::NoVN); + ssize_t curConstValue = isSharedConst ? m_pCompiler->vnStore->CoercedConstantValue(currVN) : 0; + + GenTree* exp = lst->tslTree; + bool isDef = IS_CSE_DEF(exp->gtCSEnum); - if (firstVN == ValueNumStore::NoVN) + if (bestVN == ValueNumStore::NoVN) { - firstVN = currVN; + // first entry + // set bestVN + bestVN = currVN; + + if (isSharedConst) + { + // set bestConstValue and bestIsDef + bestConstValue = curConstValue; + bestIsDef = isDef; + } } - else if (currVN != firstVN) + else if (currVN != bestVN) { + assert(isSharedConst); // Must be true when we have differing VNs + + // subsequent entry + // clear allSame and check for a lower constant allSame = false; - break; + + ssize_t diff = curConstValue - bestConstValue; + + // The ARM64 ldr addressing modes allow for a subtraction of up to 255 + // so we will allow the diff to be up to -255 before replacing a CSE def + // This will minimize the number of extra subtract instructions. + // + if ((bestIsDef && (diff < -255)) || (!bestIsDef && (diff < 0))) + { + // set new bestVN, bestConstValue and bestIsDef + bestVN = currVN; + bestConstValue = curConstValue; + bestIsDef = isDef; + } + } + + BasicBlock* blk = lst->tslBlock; + BasicBlock::weight_t curWeight = blk->getBBWeight(m_pCompiler); + + if (setRefCnt) + { + m_pCompiler->lvaTable[cseLclVarNum].setLvRefCnt(1); + m_pCompiler->lvaTable[cseLclVarNum].setLvRefCntWtd(curWeight); + setRefCnt = false; + } + else + { + m_pCompiler->lvaTable[cseLclVarNum].incRefCnts(curWeight, m_pCompiler); + } + + // A CSE Def references the LclVar twice + // + if (isDef) + { + m_pCompiler->lvaTable[cseLclVarNum].incRefCnts(curWeight, m_pCompiler); } } lst = lst->tslNext; } - if (!allSame) + + dsc->csdConstDefValue = bestConstValue; + dsc->csdConstDefVN = bestVN; + +#ifdef DEBUG + if (m_pCompiler->verbose) { - lst = dsc->csdTreeList; - GenTree* firstTree = lst->tslTree; - printf("In %s, CSE (oper = %s, type = %s) has differing VNs: ", m_pCompiler->info.compFullName, - GenTree::OpName(firstTree->OperGet()), varTypeName(firstTree->TypeGet())); - while (lst != nullptr) + if (!allSame) { - if (IS_CSE_INDEX(lst->tslTree->gtCSEnum)) + if (isSharedConst) { - currVN = m_pCompiler->vnStore->VNLiberalNormalValue(lst->tslTree->gtVNPair); - printf("0x%x(%s " FMT_VN ") ", lst->tslTree, IS_CSE_USE(lst->tslTree->gtCSEnum) ? "use" : "def", - currVN); + printf("\nWe have shared Const CSE's and selected " FMT_VN " with a value of 0x%p as the base.\n", + dsc->csdConstDefVN, dspPtr(dsc->csdConstDefValue)); + } + else // !isSharedConst + { + lst = dsc->csdTreeList; + GenTree* firstTree = lst->tslTree; + printf("In %s, CSE (oper = %s, type = %s) has differing VNs: ", m_pCompiler->info.compFullName, + GenTree::OpName(firstTree->OperGet()), varTypeName(firstTree->TypeGet())); + while (lst != nullptr) + { + if (IS_CSE_INDEX(lst->tslTree->gtCSEnum)) + { + ValueNum currVN = m_pCompiler->vnStore->VNLiberalNormalValue(lst->tslTree->gtVNPair); + printf("0x%x(%s " FMT_VN ") ", lst->tslTree, + IS_CSE_USE(lst->tslTree->gtCSEnum) ? "use" : "def", currVN); + } + lst = lst->tslNext; + } + printf("\n"); } - lst = lst->tslNext; } - printf("\n"); } #endif // DEBUG @@ -2762,7 +2981,8 @@ class CSE_Heuristic // The cseLclVarType must be a compatible with expTyp // - noway_assert(IsCompatibleType(cseLclVarTyp, expTyp)); + ValueNumStore* vnStore = m_pCompiler->vnStore; + noway_assert(IsCompatibleType(cseLclVarTyp, expTyp) || (dsc->csdConstDefVN != vnStore->VNForNull())); // This will contain the replacement tree for exp // It will either be the CSE def or CSE ref @@ -2790,63 +3010,86 @@ class CSE_Heuristic // We will replace the CSE ref with a new tree // this is typically just a simple use of the new CSE LclVar // - ValueNumStore* vnStore = m_pCompiler->vnStore; - cse = m_pCompiler->gtNewLclvNode(cseLclVarNum, cseLclVarTyp); - // Assign the ssa num for the use. Note it may be the reserved num. - cse->AsLclVarCommon()->SetSsaNum(cseSsaNum); + // Create a reference to the CSE temp + GenTree* cseLclVar = m_pCompiler->gtNewLclvNode(cseLclVarNum, cseLclVarTyp); + cseLclVar->gtVNPair.SetBoth(dsc->csdConstDefVN); - // assign the proper ValueNumber, A CSE use discards any exceptions - cse->gtVNPair = vnStore->VNPNormalPair(exp->gtVNPair); - - ValueNum theConservativeVN = successfulCandidate->CseDsc()->defConservNormVN; + // Assign the ssa num for the lclvar use. Note it may be the reserved num. + cseLclVar->AsLclVarCommon()->SetSsaNum(cseSsaNum); - if (theConservativeVN != ValueNumStore::NoVN) + cse = cseLclVar; + if (isSharedConst) { - // All defs of this CSE share the same normal conservative VN, and we are rewriting this - // use to fetch the same value with no reload, so we can safely propagate that - // conservative VN to this use. This can help range check elimination later on. - cse->gtVNPair.SetConservative(theConservativeVN); - - // If the old VN was flagged as a checked bound, propagate that to the new VN - // to make sure assertion prop will pay attention to this VN. - ValueNum oldVN = exp->gtVNPair.GetConservative(); - if (!vnStore->IsVNConstant(theConservativeVN) && vnStore->IsVNCheckedBound(oldVN)) + ValueNum currVN = m_pCompiler->vnStore->VNLiberalNormalValue(exp->gtVNPair); + ssize_t curValue = m_pCompiler->vnStore->CoercedConstantValue(currVN); + ssize_t delta = curValue - dsc->csdConstDefValue; + if (delta != 0) { - vnStore->SetVNIsCheckedBound(theConservativeVN); + GenTree* deltaNode = m_pCompiler->gtNewIconNode(delta, cseLclVarTyp); + cse = m_pCompiler->gtNewOperNode(GT_ADD, cseLclVarTyp, cseLclVar, deltaNode); + cse->SetDoNotCSE(); } + } - GenTree* cmp; - if ((m_pCompiler->optCseCheckedBoundMap != nullptr) && - (m_pCompiler->optCseCheckedBoundMap->Lookup(exp, &cmp))) - { - // Propagate the new value number to this compare node as well, since - // subsequent range check elimination will try to correlate it with - // the other appearances that are getting CSEd. + // assign the proper ValueNumber, A CSE use discards any exceptions + cse->gtVNPair = vnStore->VNPNormalPair(exp->gtVNPair); - ValueNum oldCmpVN = cmp->gtVNPair.GetConservative(); - ValueNum newCmpArgVN; + // shared const CSE has the correct value number assigned + // and both liberal and conservative are identical + // and they do not use theConservativeVN + // + if (!isSharedConst) + { + ValueNum theConservativeVN = successfulCandidate->CseDsc()->defConservNormVN; - ValueNumStore::CompareCheckedBoundArithInfo info; - if (vnStore->IsVNCompareCheckedBound(oldCmpVN)) + if (theConservativeVN != ValueNumStore::NoVN) + { + // All defs of this CSE share the same normal conservative VN, and we are rewriting this + // use to fetch the same value with no reload, so we can safely propagate that + // conservative VN to this use. This can help range check elimination later on. + cse->gtVNPair.SetConservative(theConservativeVN); + + // If the old VN was flagged as a checked bound, propagate that to the new VN + // to make sure assertion prop will pay attention to this VN. + ValueNum oldVN = exp->gtVNPair.GetConservative(); + if (!vnStore->IsVNConstant(theConservativeVN) && vnStore->IsVNCheckedBound(oldVN)) { - // Comparison is against the bound directly. - - newCmpArgVN = theConservativeVN; - vnStore->GetCompareCheckedBound(oldCmpVN, &info); + vnStore->SetVNIsCheckedBound(theConservativeVN); } - else + + GenTree* cmp; + if ((m_pCompiler->optCseCheckedBoundMap != nullptr) && + (m_pCompiler->optCseCheckedBoundMap->Lookup(exp, &cmp))) { - // Comparison is against the bound +/- some offset. + // Propagate the new value number to this compare node as well, since + // subsequent range check elimination will try to correlate it with + // the other appearances that are getting CSEd. + + ValueNum oldCmpVN = cmp->gtVNPair.GetConservative(); + ValueNum newCmpArgVN; + + ValueNumStore::CompareCheckedBoundArithInfo info; + if (vnStore->IsVNCompareCheckedBound(oldCmpVN)) + { + // Comparison is against the bound directly. - assert(vnStore->IsVNCompareCheckedBoundArith(oldCmpVN)); - vnStore->GetCompareCheckedBoundArithInfo(oldCmpVN, &info); - newCmpArgVN = vnStore->VNForFunc(vnStore->TypeOfVN(info.arrOp), (VNFunc)info.arrOper, - info.arrOp, theConservativeVN); + newCmpArgVN = theConservativeVN; + vnStore->GetCompareCheckedBound(oldCmpVN, &info); + } + else + { + // Comparison is against the bound +/- some offset. + + assert(vnStore->IsVNCompareCheckedBoundArith(oldCmpVN)); + vnStore->GetCompareCheckedBoundArithInfo(oldCmpVN, &info); + newCmpArgVN = vnStore->VNForFunc(vnStore->TypeOfVN(info.arrOp), (VNFunc)info.arrOper, + info.arrOp, theConservativeVN); + } + ValueNum newCmpVN = vnStore->VNForFunc(vnStore->TypeOfVN(oldCmpVN), (VNFunc)info.cmpOper, + info.cmpOp, newCmpArgVN); + cmp->gtVNPair.SetConservative(newCmpVN); } - ValueNum newCmpVN = vnStore->VNForFunc(vnStore->TypeOfVN(oldCmpVN), (VNFunc)info.cmpOper, - info.cmpOp, newCmpArgVN); - cmp->gtVNPair.SetConservative(newCmpVN); } } #ifdef DEBUG @@ -2878,10 +3121,9 @@ class CSE_Heuristic } #endif - GenTree* cseVal = cse; - GenTree* curSideEff = sideEffList; - ValueNumStore* vnStore = m_pCompiler->vnStore; - ValueNumPair exceptions_vnp = ValueNumStore::VNPForEmptyExcSet(); + GenTree* cseVal = cse; + GenTree* curSideEff = sideEffList; + ValueNumPair exceptions_vnp = ValueNumStore::VNPForEmptyExcSet(); while ((curSideEff->OperGet() == GT_COMMA) || (curSideEff->OperGet() == GT_ASG)) { @@ -2936,6 +3178,17 @@ class CSE_Heuristic exp->gtCSEnum = NO_CSE; // clear the gtCSEnum field GenTree* val = exp; + if (isSharedConst) + { + ValueNum currVN = m_pCompiler->vnStore->VNLiberalNormalValue(exp->gtVNPair); + ssize_t curValue = m_pCompiler->vnStore->CoercedConstantValue(currVN); + ssize_t delta = curValue - dsc->csdConstDefValue; + if (delta != 0) + { + val = m_pCompiler->gtNewIconNode(dsc->csdConstDefValue, cseLclVarTyp); + val->gtVNPair.SetBoth(dsc->csdConstDefVN); + } + } /* Create an assignment of the value to the temp */ GenTree* asg = m_pCompiler->gtNewTempAssign(cseLclVarNum, val); @@ -2977,19 +3230,37 @@ class CSE_Heuristic } /* Create a reference to the CSE temp */ - GenTree* ref = m_pCompiler->gtNewLclvNode(cseLclVarNum, cseLclVarTyp); - ref->gtVNPair = val->gtVNPair; // The new 'ref' is the same as 'val' + GenTree* cseLclVar = m_pCompiler->gtNewLclvNode(cseLclVarNum, cseLclVarTyp); + cseLclVar->gtVNPair.SetBoth(dsc->csdConstDefVN); + + // Assign the ssa num for the lclvar use. Note it may be the reserved num. + cseLclVar->AsLclVarCommon()->SetSsaNum(cseSsaNum); - // Assign the ssa num for the ref use. Note it may be the reserved num. - ref->AsLclVarCommon()->SetSsaNum(cseSsaNum); + GenTree* cseUse = cseLclVar; + if (isSharedConst) + { + ValueNum currVN = m_pCompiler->vnStore->VNLiberalNormalValue(exp->gtVNPair); + ssize_t curValue = m_pCompiler->vnStore->CoercedConstantValue(currVN); + ssize_t delta = curValue - dsc->csdConstDefValue; + if (delta != 0) + { + GenTree* deltaNode = m_pCompiler->gtNewIconNode(delta, cseLclVarTyp); + cseUse = m_pCompiler->gtNewOperNode(GT_ADD, cseLclVarTyp, cseLclVar, deltaNode); + cseUse->SetDoNotCSE(); + } + } + cseUse->gtVNPair = val->gtVNPair; // The 'cseUse' is equal to 'val' /* Create a comma node for the CSE assignment */ - cse = m_pCompiler->gtNewOperNode(GT_COMMA, expTyp, origAsg, ref); - cse->gtVNPair = ref->gtVNPair; // The comma's value is the same as 'val' - // as the assignment to the CSE LclVar - // cannot add any new exceptions + cse = m_pCompiler->gtNewOperNode(GT_COMMA, expTyp, origAsg, cseUse); + cse->gtVNPair = cseUse->gtVNPair; // The comma's value is the same as 'val' + // as the assignment to the CSE LclVar + // cannot add any new exceptions } + cse->CopyReg(exp); // The cse inheirits any reg num property from the orginal exp node + exp->ClearRegNum(); // The exp node (for a CSE def) no longer has a register requirement + // Walk the statement 'stmt' and find the pointer // in the tree is pointing to 'exp' // @@ -3069,9 +3340,19 @@ class CSE_Heuristic #ifdef DEBUG if (m_pCompiler->verbose) { - printf("\nConsidering CSE #%02u {$%-3x, $%-3x} [def=%3u, use=%3u, cost=%3u%s]\n", candidate.CseIndex(), - dsc->csdHashKey, dsc->defExcSetPromise, candidate.DefCount(), candidate.UseCount(), - candidate.Cost(), dsc->csdLiveAcrossCall ? ", call" : " "); + if (!Compiler::Is_Shared_Const_CSE(dsc->csdHashKey)) + { + printf("\nConsidering CSE #%02u {$%-3x, $%-3x} [def=%3u, use=%3u, cost=%3u%s]\n", + candidate.CseIndex(), dsc->csdHashKey, dsc->defExcSetPromise, candidate.DefCount(), + candidate.UseCount(), candidate.Cost(), dsc->csdLiveAcrossCall ? ", call" : " "); + } + else + { + size_t kVal = Compiler::Decode_Shared_Const_CSE_Value(dsc->csdHashKey); + printf("\nConsidering CSE #%02u {K_%p} [def=%3u, use=%3u, cost=%3u%s]\n", candidate.CseIndex(), + dspPtr(kVal), candidate.DefCount(), candidate.UseCount(), candidate.Cost(), + dsc->csdLiveAcrossCall ? ", call" : " "); + } printf("CSE Expression : \n"); m_pCompiler->gtDispTree(candidate.Expr()); printf("\n"); @@ -3306,8 +3587,11 @@ bool Compiler::optIsCSEcandidate(GenTree* tree) return (tree->AsOp()->gtOp1->gtOper != GT_ARR_ELEM); - case GT_CNS_INT: case GT_CNS_LNG: +#ifndef TARGET_64BIT + return false; // Don't CSE 64-bit constants on 32-bit platforms +#endif + case GT_CNS_INT: case GT_CNS_DBL: case GT_CNS_STR: return true; // We reach here only when CSE_CONSTS is enabled diff --git a/src/coreclr/src/jit/optimizer.cpp b/src/coreclr/src/jit/optimizer.cpp index d953ecd92aed..ed5bd116b380 100644 --- a/src/coreclr/src/jit/optimizer.cpp +++ b/src/coreclr/src/jit/optimizer.cpp @@ -7920,10 +7920,11 @@ bool Compiler::optComputeLoopSideEffectsOfBlock(BasicBlock* blk) } break; - case GT_LOCKADD: // Binop - case GT_XADD: // Binop - case GT_XCHG: // Binop - case GT_CMPXCHG: // Specialop + case GT_LOCKADD: + case GT_XADD: + case GT_XCHG: + case GT_CMPXCHG: + case GT_MEMORYBARRIER: { assert(!tree->OperIs(GT_LOCKADD) && "LOCKADD should not appear before lowering"); memoryHavoc |= memoryKindSet(GcHeap, ByrefExposed); diff --git a/src/coreclr/src/jit/rationalize.cpp b/src/coreclr/src/jit/rationalize.cpp index b878a30beeae..8054fda6e8ef 100644 --- a/src/coreclr/src/jit/rationalize.cpp +++ b/src/coreclr/src/jit/rationalize.cpp @@ -726,7 +726,7 @@ Compiler::fgWalkResult Rationalizer::RewriteNode(GenTree** useEdge, Compiler::Ge case GT_INTRINSIC: // Non-target intrinsics should have already been rewritten back into user calls. - assert(comp->IsTargetIntrinsic(node->AsIntrinsic()->gtIntrinsicId)); + assert(comp->IsTargetIntrinsic(node->AsIntrinsic()->gtIntrinsicName)); break; #ifdef FEATURE_SIMD @@ -903,7 +903,7 @@ PhaseStatus Rationalizer::DoPhase() { GenTree* const node = *use; if (node->OperGet() == GT_INTRINSIC && - m_rationalizer.comp->IsIntrinsicImplementedByUserCall(node->AsIntrinsic()->gtIntrinsicId)) + m_rationalizer.comp->IsIntrinsicImplementedByUserCall(node->AsIntrinsic()->gtIntrinsicName)) { m_rationalizer.RewriteIntrinsicAsUserCall(use, this->m_ancestors); } diff --git a/src/coreclr/src/jit/scopeinfo.cpp b/src/coreclr/src/jit/scopeinfo.cpp index 5fc7bca119f4..77d8233cac63 100644 --- a/src/coreclr/src/jit/scopeinfo.cpp +++ b/src/coreclr/src/jit/scopeinfo.cpp @@ -1673,7 +1673,7 @@ void CodeGen::psiEndProlog() We still report all the arguments at the very start of the method so that the user can see the arguments at the very start of the method (offset=0). - Disabling this decreased the debug maps in mscorlib by 10% (01/2003) + Disabling this decreased the debug maps in CoreLib by 10% (01/2003) */ #if 0 diff --git a/src/coreclr/src/jit/simdashwintrinsic.cpp b/src/coreclr/src/jit/simdashwintrinsic.cpp index 6e48db4d8212..dec5706181a4 100644 --- a/src/coreclr/src/jit/simdashwintrinsic.cpp +++ b/src/coreclr/src/jit/simdashwintrinsic.cpp @@ -1069,26 +1069,11 @@ GenTree* Compiler::impSimdAsHWIntrinsicCndSel(CORINFO_CLASS_HANDLE clsHnd, assert(op3 != nullptr); #if defined(TARGET_XARCH) - bool isVectorT256 = (simdSize == 32); - // Vector for the rel-ops covered here requires at least SSE2 assert(compIsaSupportedDebugOnly(InstructionSet_SSE2)); // Vector, when 32-bytes, requires at least AVX2 - assert(!isVectorT256 || compIsaSupportedDebugOnly(InstructionSet_AVX2)); - - if (compOpportunisticallyDependsOn(InstructionSet_SSE41)) - { - NamedIntrinsic hwIntrinsic = NI_SSE41_BlendVariable; - - if (isVectorT256) - { - hwIntrinsic = varTypeIsIntegral(baseType) ? NI_AVX2_BlendVariable : NI_AVX_BlendVariable; - } - - return gtNewSimdAsHWIntrinsicNode(retType, op3, op2, op1, hwIntrinsic, baseType, simdSize); - } -#endif // TARGET_XARCH + assert((simdSize != 32) || compIsaSupportedDebugOnly(InstructionSet_AVX2)); NamedIntrinsic hwIntrinsic; @@ -1113,6 +1098,11 @@ GenTree* Compiler::impSimdAsHWIntrinsicCndSel(CORINFO_CLASS_HANDLE clsHnd, // result = op2 | op3 hwIntrinsic = SimdAsHWIntrinsicInfo::lookupHWIntrinsic(NI_VectorT128_op_BitwiseOr, baseType); return gtNewSimdAsHWIntrinsicNode(retType, op2, op3, hwIntrinsic, baseType, simdSize); +#elif defined(TARGET_ARM64) + return gtNewSimdAsHWIntrinsicNode(retType, op1, op2, op3, NI_AdvSimd_BitwiseSelect, baseType, simdSize); +#else +#error Unsupported platform +#endif // !TARGET_XARCH && !TARGET_ARM64 } #if defined(TARGET_XARCH) diff --git a/src/coreclr/src/jit/ssabuilder.cpp b/src/coreclr/src/jit/ssabuilder.cpp index d562065bf216..62efd8696dac 100644 --- a/src/coreclr/src/jit/ssabuilder.cpp +++ b/src/coreclr/src/jit/ssabuilder.cpp @@ -1656,8 +1656,6 @@ bool SsaBuilder::IncludeInSsa(unsigned lclNum) // - SSA doesn't allow a single node to contain multiple SSA definitions. // - and PROMOTION_TYPE_DEPENDEDNT fields are never candidates for a register. // - // Example mscorlib method: CompatibilitySwitches:IsCompatibilitySwitchSet - // return false; } else if (varDsc->lvIsStructField && m_pCompiler->lvaGetDesc(varDsc->lvParentLcl)->lvIsMultiRegRet) diff --git a/src/coreclr/src/jit/static/CMakeLists.txt b/src/coreclr/src/jit/static/CMakeLists.txt index b4e62c041cd4..01bdbf5a731f 100644 --- a/src/coreclr/src/jit/static/CMakeLists.txt +++ b/src/coreclr/src/jit/static/CMakeLists.txt @@ -2,14 +2,17 @@ project(ClrJit) set_source_files_properties(${JIT_EXPORTS_FILE} PROPERTIES GENERATED TRUE) -add_library_clr(clrjit_static +add_library_clr(clrjit_obj OBJECT ${JIT_CORE_SOURCES} ${JIT_ARCH_SOURCES} ) if(CLR_CMAKE_HOST_UNIX) - add_dependencies(clrjit_static coreclrpal gcinfo) + add_dependencies(clrjit_obj coreclrpal gcinfo) endif(CLR_CMAKE_HOST_UNIX) -target_precompile_header(TARGET clrjit_static HEADER jitpch.h ADDITIONAL_INCLUDE_DIRECTORIES ${JIT_SOURCE_DIR}) +target_precompile_header(TARGET clrjit_obj HEADER jitpch.h ADDITIONAL_INCLUDE_DIRECTORIES ${JIT_SOURCE_DIR}) + +add_library(clrjit_static INTERFACE) +target_sources(clrjit_static INTERFACE $) diff --git a/src/coreclr/src/jit/target.h b/src/coreclr/src/jit/target.h index 4e75434cd242..7fcaa87d2d22 100644 --- a/src/coreclr/src/jit/target.h +++ b/src/coreclr/src/jit/target.h @@ -31,12 +31,15 @@ // with static const members of Target #if defined(TARGET_XARCH) #define REGMASK_BITS 32 +#define CSE_CONST_SHARED_LOW_BITS 16 #elif defined(TARGET_ARM) #define REGMASK_BITS 64 +#define CSE_CONST_SHARED_LOW_BITS 12 #elif defined(TARGET_ARM64) #define REGMASK_BITS 64 +#define CSE_CONST_SHARED_LOW_BITS 12 #else #error Unsupported or unset target architecture @@ -433,6 +436,7 @@ typedef unsigned char regNumberSmall; #define FIRST_ARG_STACK_OFFS (2*REGSIZE_BYTES) // Caller's saved EBP and return address #define MAX_REG_ARG 2 + #define MAX_FLOAT_REG_ARG 0 #define REG_ARG_FIRST REG_ECX #define REG_ARG_LAST REG_EDX @@ -441,10 +445,8 @@ typedef unsigned char regNumberSmall; #define REG_ARG_0 REG_ECX #define REG_ARG_1 REG_EDX - constexpr regNumber intArgRegs [] = {REG_ECX, REG_EDX}; - constexpr regMaskTP intArgMasks[] = {RBM_ECX, RBM_EDX}; - constexpr regNumber fltArgRegs [] = {REG_XMM0, REG_XMM1, REG_XMM2, REG_XMM3}; - constexpr regMaskTP fltArgMasks[] = {RBM_XMM0, RBM_XMM1, RBM_XMM2, RBM_XMM3}; + extern const regNumber intArgRegs [MAX_REG_ARG]; + extern const regMaskTP intArgMasks[MAX_REG_ARG]; #define RBM_ARG_0 RBM_ECX #define RBM_ARG_1 RBM_EDX @@ -779,10 +781,10 @@ typedef unsigned char regNumberSmall; #define REG_ARG_4 REG_R8 #define REG_ARG_5 REG_R9 - constexpr regNumber intArgRegs [] = { REG_EDI, REG_ESI, REG_EDX, REG_ECX, REG_R8, REG_R9 }; - constexpr regMaskTP intArgMasks[] = { RBM_EDI, RBM_ESI, RBM_EDX, RBM_ECX, RBM_R8, RBM_R9 }; - constexpr regNumber fltArgRegs [] = { REG_XMM0, REG_XMM1, REG_XMM2, REG_XMM3, REG_XMM4, REG_XMM5, REG_XMM6, REG_XMM7 }; - constexpr regMaskTP fltArgMasks[] = { RBM_XMM0, RBM_XMM1, RBM_XMM2, RBM_XMM3, RBM_XMM4, RBM_XMM5, RBM_XMM6, RBM_XMM7 }; + extern const regNumber intArgRegs [MAX_REG_ARG]; + extern const regMaskTP intArgMasks[MAX_REG_ARG]; + extern const regNumber fltArgRegs [MAX_FLOAT_REG_ARG]; + extern const regMaskTP fltArgMasks[MAX_FLOAT_REG_ARG]; #define RBM_ARG_0 RBM_RDI #define RBM_ARG_1 RBM_RSI @@ -802,10 +804,10 @@ typedef unsigned char regNumberSmall; #define REG_ARG_2 REG_R8 #define REG_ARG_3 REG_R9 - constexpr regNumber intArgRegs [] = { REG_ECX, REG_EDX, REG_R8, REG_R9 }; - constexpr regMaskTP intArgMasks[] = { RBM_ECX, RBM_EDX, RBM_R8, RBM_R9 }; - constexpr regNumber fltArgRegs [] = { REG_XMM0, REG_XMM1, REG_XMM2, REG_XMM3 }; - constexpr regMaskTP fltArgMasks[] = { RBM_XMM0, RBM_XMM1, RBM_XMM2, RBM_XMM3 }; + extern const regNumber intArgRegs [MAX_REG_ARG]; + extern const regMaskTP intArgMasks[MAX_REG_ARG]; + extern const regNumber fltArgRegs [MAX_FLOAT_REG_ARG]; + extern const regMaskTP fltArgMasks[MAX_FLOAT_REG_ARG]; #define RBM_ARG_0 RBM_ECX #define RBM_ARG_1 RBM_EDX @@ -1103,7 +1105,9 @@ typedef unsigned char regNumberSmall; // The registers trashed by profiler enter/leave/tailcall hook // See vm\arm\asmhelpers.asm for more details. #define RBM_PROFILER_ENTER_TRASH RBM_NONE - #define RBM_PROFILER_LEAVE_TRASH RBM_NONE + // While REG_PROFILER_RET_SCRATCH is not trashed by the method, the register allocator must + // consider it killed by the return. + #define RBM_PROFILER_LEAVE_TRASH RBM_PROFILER_RET_SCRATCH #define RBM_PROFILER_TAILCALL_TRASH RBM_NONE // Which register are int and long values returned in ? @@ -1150,8 +1154,8 @@ typedef unsigned char regNumberSmall; #define REG_ARG_2 REG_R2 #define REG_ARG_3 REG_R3 - constexpr regNumber intArgRegs [] = {REG_R0, REG_R1, REG_R2, REG_R3}; - constexpr regMaskTP intArgMasks[] = {RBM_R0, RBM_R1, RBM_R2, RBM_R3}; + extern const regNumber intArgRegs [MAX_REG_ARG]; + extern const regMaskTP intArgMasks[MAX_REG_ARG]; #define RBM_ARG_0 RBM_R0 #define RBM_ARG_1 RBM_R1 @@ -1162,8 +1166,8 @@ typedef unsigned char regNumberSmall; #define RBM_FLTARG_REGS (RBM_F0|RBM_F1|RBM_F2|RBM_F3|RBM_F4|RBM_F5|RBM_F6|RBM_F7|RBM_F8|RBM_F9|RBM_F10|RBM_F11|RBM_F12|RBM_F13|RBM_F14|RBM_F15) #define RBM_DBL_REGS RBM_ALLDOUBLE - constexpr regNumber fltArgRegs [] = {REG_F0, REG_F1, REG_F2, REG_F3, REG_F4, REG_F5, REG_F6, REG_F7, REG_F8, REG_F9, REG_F10, REG_F11, REG_F12, REG_F13, REG_F14, REG_F15 }; - constexpr regMaskTP fltArgMasks[] = {RBM_F0, RBM_F1, RBM_F2, RBM_F3, RBM_F4, RBM_F5, RBM_F6, RBM_F7, RBM_F8, RBM_F9, RBM_F10, RBM_F11, RBM_F12, RBM_F13, RBM_F14, RBM_F15 }; + extern const regNumber fltArgRegs [MAX_FLOAT_REG_ARG]; + extern const regMaskTP fltArgMasks[MAX_FLOAT_REG_ARG]; #define LBL_DIST_SMALL_MAX_NEG (0) #define LBL_DIST_SMALL_MAX_POS (+1020) @@ -1484,8 +1488,8 @@ typedef unsigned char regNumberSmall; #define REG_ARG_6 REG_R6 #define REG_ARG_7 REG_R7 - constexpr regNumber intArgRegs [] = {REG_R0, REG_R1, REG_R2, REG_R3, REG_R4, REG_R5, REG_R6, REG_R7}; - constexpr regMaskTP intArgMasks[] = {RBM_R0, RBM_R1, RBM_R2, RBM_R3, RBM_R4, RBM_R5, RBM_R6, RBM_R7}; + extern const regNumber intArgRegs [MAX_REG_ARG]; + extern const regMaskTP intArgMasks[MAX_REG_ARG]; #define RBM_ARG_0 RBM_R0 #define RBM_ARG_1 RBM_R1 @@ -1517,8 +1521,8 @@ typedef unsigned char regNumberSmall; #define RBM_ARG_REGS (RBM_ARG_0|RBM_ARG_1|RBM_ARG_2|RBM_ARG_3|RBM_ARG_4|RBM_ARG_5|RBM_ARG_6|RBM_ARG_7) #define RBM_FLTARG_REGS (RBM_FLTARG_0|RBM_FLTARG_1|RBM_FLTARG_2|RBM_FLTARG_3|RBM_FLTARG_4|RBM_FLTARG_5|RBM_FLTARG_6|RBM_FLTARG_7) - constexpr regNumber fltArgRegs [] = {REG_V0, REG_V1, REG_V2, REG_V3, REG_V4, REG_V5, REG_V6, REG_V7 }; - constexpr regMaskTP fltArgMasks[] = {RBM_V0, RBM_V1, RBM_V2, RBM_V3, RBM_V4, RBM_V5, RBM_V6, RBM_V7 }; + extern const regNumber fltArgRegs [MAX_FLOAT_REG_ARG]; + extern const regMaskTP fltArgMasks[MAX_FLOAT_REG_ARG]; #define LBL_DIST_SMALL_MAX_NEG (-1048576) #define LBL_DIST_SMALL_MAX_POS (+1048575) @@ -1594,6 +1598,7 @@ C_ASSERT((FEATURE_TAILCALL_OPT == 0) || (FEATURE_FASTTAILCALL == 1)); #if CPU_HAS_BYTE_REGS #define RBM_BYTE_REGS (RBM_EAX|RBM_ECX|RBM_EDX|RBM_EBX) + #define BYTE_REG_COUNT 4 #define RBM_NON_BYTE_REGS (RBM_ESI|RBM_EDI) #else #define RBM_BYTE_REGS RBM_ALLINT @@ -1616,11 +1621,11 @@ class Target static const enum ArgOrder g_tgtArgOrder; }; -#if defined(DEBUG) || defined(LATE_DISASM) +#if defined(DEBUG) || defined(LATE_DISASM) || DUMP_GC_TABLES const char* getRegName(unsigned reg, bool isFloat = false); // this is for gcencode.cpp and disasm.cpp that don't use // the regNumber type const char* getRegName(regNumber reg, bool isFloat = false); -#endif // defined(DEBUG) || defined(LATE_DISASM) +#endif // defined(DEBUG) || defined(LATE_DISASM) || DUMP_GC_TABLES #ifdef DEBUG const char* getRegNameFloat(regNumber reg, var_types type); @@ -1997,9 +2002,13 @@ C_ASSERT((RBM_INT_CALLEE_SAVED & RBM_FPBASE) == RBM_NONE); #ifdef TARGET_64BIT typedef unsigned __int64 target_size_t; typedef __int64 target_ssize_t; -#else // !TARGET_64BIT +#define TARGET_SIGN_BIT (1ULL << 63) + +#else // !TARGET_64BIT typedef unsigned int target_size_t; typedef int target_ssize_t; +#define TARGET_SIGN_BIT (1ULL << 31) + #endif // !TARGET_64BIT C_ASSERT(sizeof(target_size_t) == TARGET_POINTER_SIZE); diff --git a/src/coreclr/src/jit/targetamd64.cpp b/src/coreclr/src/jit/targetamd64.cpp index 143e6e464180..372c4dffc27b 100644 --- a/src/coreclr/src/jit/targetamd64.cpp +++ b/src/coreclr/src/jit/targetamd64.cpp @@ -15,4 +15,18 @@ const char* Target::g_tgtCPUName = "x64"; const Target::ArgOrder Target::g_tgtArgOrder = ARG_ORDER_R2L; +// clang-format off +#ifdef UNIX_AMD64_ABI +const regNumber intArgRegs [] = { REG_EDI, REG_ESI, REG_EDX, REG_ECX, REG_R8, REG_R9 }; +const regMaskTP intArgMasks[] = { RBM_EDI, RBM_ESI, RBM_EDX, RBM_ECX, RBM_R8, RBM_R9 }; +const regNumber fltArgRegs [] = { REG_XMM0, REG_XMM1, REG_XMM2, REG_XMM3, REG_XMM4, REG_XMM5, REG_XMM6, REG_XMM7 }; +const regMaskTP fltArgMasks[] = { RBM_XMM0, RBM_XMM1, RBM_XMM2, RBM_XMM3, RBM_XMM4, RBM_XMM5, RBM_XMM6, RBM_XMM7 }; +#else // !UNIX_AMD64_ABI +const regNumber intArgRegs [] = { REG_ECX, REG_EDX, REG_R8, REG_R9 }; +const regMaskTP intArgMasks[] = { RBM_ECX, RBM_EDX, RBM_R8, RBM_R9 }; +const regNumber fltArgRegs [] = { REG_XMM0, REG_XMM1, REG_XMM2, REG_XMM3 }; +const regMaskTP fltArgMasks[] = { RBM_XMM0, RBM_XMM1, RBM_XMM2, RBM_XMM3 }; +#endif // !UNIX_AMD64_ABI +// clang-format on + #endif // TARGET_AMD64 diff --git a/src/coreclr/src/jit/targetarm.cpp b/src/coreclr/src/jit/targetarm.cpp index ca974a76af39..da125cbb436a 100644 --- a/src/coreclr/src/jit/targetarm.cpp +++ b/src/coreclr/src/jit/targetarm.cpp @@ -15,4 +15,12 @@ const char* Target::g_tgtCPUName = "arm"; const Target::ArgOrder Target::g_tgtArgOrder = ARG_ORDER_R2L; +// clang-format off +const regNumber intArgRegs [] = {REG_R0, REG_R1, REG_R2, REG_R3}; +const regMaskTP intArgMasks[] = {RBM_R0, RBM_R1, RBM_R2, RBM_R3}; + +const regNumber fltArgRegs [] = {REG_F0, REG_F1, REG_F2, REG_F3, REG_F4, REG_F5, REG_F6, REG_F7, REG_F8, REG_F9, REG_F10, REG_F11, REG_F12, REG_F13, REG_F14, REG_F15 }; +const regMaskTP fltArgMasks[] = {RBM_F0, RBM_F1, RBM_F2, RBM_F3, RBM_F4, RBM_F5, RBM_F6, RBM_F7, RBM_F8, RBM_F9, RBM_F10, RBM_F11, RBM_F12, RBM_F13, RBM_F14, RBM_F15 }; +// clang-format on + #endif // TARGET_ARM diff --git a/src/coreclr/src/jit/targetarm64.cpp b/src/coreclr/src/jit/targetarm64.cpp index 7b035f145b01..8f5481a83e02 100644 --- a/src/coreclr/src/jit/targetarm64.cpp +++ b/src/coreclr/src/jit/targetarm64.cpp @@ -15,4 +15,12 @@ const char* Target::g_tgtCPUName = "arm64"; const Target::ArgOrder Target::g_tgtArgOrder = ARG_ORDER_R2L; +// clang-format off +const regNumber intArgRegs [] = {REG_R0, REG_R1, REG_R2, REG_R3, REG_R4, REG_R5, REG_R6, REG_R7}; +const regMaskTP intArgMasks[] = {RBM_R0, RBM_R1, RBM_R2, RBM_R3, RBM_R4, RBM_R5, RBM_R6, RBM_R7}; + +const regNumber fltArgRegs [] = {REG_V0, REG_V1, REG_V2, REG_V3, REG_V4, REG_V5, REG_V6, REG_V7 }; +const regMaskTP fltArgMasks[] = {RBM_V0, RBM_V1, RBM_V2, RBM_V3, RBM_V4, RBM_V5, RBM_V6, RBM_V7 }; +// clang-format on + #endif // TARGET_ARM64 diff --git a/src/coreclr/src/jit/targetx86.cpp b/src/coreclr/src/jit/targetx86.cpp index 391a934e5b9e..fab7286782a2 100644 --- a/src/coreclr/src/jit/targetx86.cpp +++ b/src/coreclr/src/jit/targetx86.cpp @@ -15,4 +15,9 @@ const char* Target::g_tgtCPUName = "x86"; const Target::ArgOrder Target::g_tgtArgOrder = ARG_ORDER_L2R; +// clang-format off +const regNumber intArgRegs [] = {REG_ECX, REG_EDX}; +const regMaskTP intArgMasks[] = {RBM_ECX, RBM_EDX}; +// clang-format on + #endif // TARGET_X86 diff --git a/src/coreclr/src/jit/unwind.h b/src/coreclr/src/jit/unwind.h index 5541450e0ef7..f510eb6f1d82 100644 --- a/src/coreclr/src/jit/unwind.h +++ b/src/coreclr/src/jit/unwind.h @@ -150,7 +150,7 @@ class UnwindCodesBase class UnwindPrologCodes : public UnwindBase, public UnwindCodesBase { - // UPC_LOCAL_COUNT is the amount of memory local to this class. For ARM mscorlib.dll, the maximum size is 34. + // UPC_LOCAL_COUNT is the amount of memory local to this class. For ARM CoreLib, the maximum size is 34. // Here is a histogram of other interesting sizes: // <=16 79% // <=24 96% @@ -314,7 +314,7 @@ class UnwindPrologCodes : public UnwindBase, public UnwindCodesBase class UnwindEpilogCodes : public UnwindBase, public UnwindCodesBase { - // UEC_LOCAL_COUNT is the amount of memory local to this class. For ARM mscorlib.dll, the maximum size is 6, + // UEC_LOCAL_COUNT is the amount of memory local to this class. For ARM CoreLib, the maximum size is 6, // while 89% of epilogs fit in 4. So, set it to 4 to maintain array alignment and hit most cases. static const int UEC_LOCAL_COUNT = 4; diff --git a/src/coreclr/src/jit/unwindarm64.cpp b/src/coreclr/src/jit/unwindarm64.cpp index d7356f4a1fd8..665e0ce9f525 100644 --- a/src/coreclr/src/jit/unwindarm64.cpp +++ b/src/coreclr/src/jit/unwindarm64.cpp @@ -22,11 +22,208 @@ short Compiler::mapRegNumToDwarfReg(regNumber reg) { short dwarfReg = DWARF_REG_ILLEGAL; - NYI("CFI codes"); + switch (reg) + { + case REG_R0: + dwarfReg = 0; + break; + case REG_R1: + dwarfReg = 1; + break; + case REG_R2: + dwarfReg = 2; + break; + case REG_R3: + dwarfReg = 3; + break; + case REG_R4: + dwarfReg = 4; + break; + case REG_R5: + dwarfReg = 5; + break; + case REG_R6: + dwarfReg = 6; + break; + case REG_R7: + dwarfReg = 7; + break; + case REG_R8: + dwarfReg = 8; + break; + case REG_R9: + dwarfReg = 9; + break; + case REG_R10: + dwarfReg = 10; + break; + case REG_R11: + dwarfReg = 11; + break; + case REG_R12: + dwarfReg = 12; + break; + case REG_R13: + dwarfReg = 13; + break; + case REG_R14: + dwarfReg = 14; + break; + case REG_R15: + dwarfReg = 15; + break; + case REG_R16: + dwarfReg = 16; + break; + case REG_R17: + dwarfReg = 17; + break; + case REG_R18: + dwarfReg = 18; + break; + case REG_R19: + dwarfReg = 19; + break; + case REG_R20: + dwarfReg = 20; + break; + case REG_R21: + dwarfReg = 21; + break; + case REG_R22: + dwarfReg = 22; + break; + case REG_R23: + dwarfReg = 23; + break; + case REG_R24: + dwarfReg = 24; + break; + case REG_R25: + dwarfReg = 25; + break; + case REG_R26: + dwarfReg = 26; + break; + case REG_R27: + dwarfReg = 27; + break; + case REG_R28: + dwarfReg = 28; + break; + case REG_R29: + dwarfReg = 29; + break; + case REG_R30: + dwarfReg = 30; + break; + case REG_SP: + dwarfReg = 31; + break; + case REG_V0: + dwarfReg = 64; + break; + case REG_V1: + dwarfReg = 65; + break; + case REG_V2: + dwarfReg = 66; + break; + case REG_V3: + dwarfReg = 67; + break; + case REG_V4: + dwarfReg = 68; + break; + case REG_V5: + dwarfReg = 69; + break; + case REG_V6: + dwarfReg = 70; + break; + case REG_V7: + dwarfReg = 71; + break; + case REG_V8: + dwarfReg = 72; + break; + case REG_V9: + dwarfReg = 73; + break; + case REG_V10: + dwarfReg = 74; + break; + case REG_V11: + dwarfReg = 75; + break; + case REG_V12: + dwarfReg = 76; + break; + case REG_V13: + dwarfReg = 77; + break; + case REG_V14: + dwarfReg = 78; + break; + case REG_V15: + dwarfReg = 79; + break; + case REG_V16: + dwarfReg = 80; + break; + case REG_V17: + dwarfReg = 81; + break; + case REG_V18: + dwarfReg = 82; + break; + case REG_V19: + dwarfReg = 83; + break; + case REG_V20: + dwarfReg = 84; + break; + case REG_V21: + dwarfReg = 85; + break; + case REG_V22: + dwarfReg = 86; + break; + case REG_V23: + dwarfReg = 87; + break; + case REG_V24: + dwarfReg = 88; + break; + case REG_V25: + dwarfReg = 89; + break; + case REG_V26: + dwarfReg = 90; + break; + case REG_V27: + dwarfReg = 91; + break; + case REG_V28: + dwarfReg = 92; + break; + case REG_V29: + dwarfReg = 93; + break; + case REG_V30: + dwarfReg = 94; + break; + case REG_V31: + dwarfReg = 95; + break; + + default: + NYI("CFI codes"); + } return dwarfReg; } -#endif // TARGET_ARM +#endif // TARGET_UNIX void Compiler::unwindPush(regNumber reg) { @@ -35,6 +232,18 @@ void Compiler::unwindPush(regNumber reg) void Compiler::unwindAllocStack(unsigned size) { +#if defined(TARGET_UNIX) + if (generateCFIUnwindCodes()) + { + if (compGeneratingProlog) + { + unwindAllocStackCFI(size); + } + + return; + } +#endif // TARGET_UNIX + UnwindInfo* pu = &funCurrentFunc()->uwi; assert(size % 16 == 0); @@ -67,6 +276,18 @@ void Compiler::unwindAllocStack(unsigned size) void Compiler::unwindSetFrameReg(regNumber reg, unsigned offset) { +#if defined(TARGET_UNIX) + if (generateCFIUnwindCodes()) + { + if (compGeneratingProlog) + { + unwindSetFrameRegCFI(reg, offset); + } + + return; + } +#endif // TARGET_UNIX + UnwindInfo* pu = &funCurrentFunc()->uwi; if (offset == 0) @@ -121,14 +342,30 @@ void Compiler::unwindNop() // which we should do instead). void Compiler::unwindSaveRegPair(regNumber reg1, regNumber reg2, int offset) { - UnwindInfo* pu = &funCurrentFunc()->uwi; - // stp reg1, reg2, [sp, #offset] // offset for store pair in prolog must be positive and a multiple of 8. assert(0 <= offset && offset <= 504); assert((offset % 8) == 0); +#if defined(TARGET_UNIX) + if (generateCFIUnwindCodes()) + { + if (compGeneratingProlog) + { + FuncInfoDsc* func = funCurrentFunc(); + UNATIVE_OFFSET cbProlog = unwindGetCurrentOffset(func); + + createCfiCode(func, cbProlog, CFI_REL_OFFSET, mapRegNumToDwarfReg(reg1), offset); + createCfiCode(func, cbProlog, CFI_REL_OFFSET, mapRegNumToDwarfReg(reg2), offset + 8); + } + + return; + } +#endif // TARGET_UNIX + + UnwindInfo* pu = &funCurrentFunc()->uwi; + int z = offset / 8; assert(0 <= z && z <= 0x3F); @@ -187,14 +424,31 @@ void Compiler::unwindSaveRegPair(regNumber reg1, regNumber reg2, int offset) // reg1. void Compiler::unwindSaveRegPairPreindexed(regNumber reg1, regNumber reg2, int offset) { - UnwindInfo* pu = &funCurrentFunc()->uwi; - // stp reg1, reg2, [sp, #offset]! // pre-indexed offset in prolog must be negative and a multiple of 8. assert(offset < 0); assert((offset % 8) == 0); +#if defined(TARGET_UNIX) + if (generateCFIUnwindCodes()) + { + if (compGeneratingProlog) + { + FuncInfoDsc* func = funCurrentFunc(); + UNATIVE_OFFSET cbProlog = unwindGetCurrentOffset(func); + + createCfiCode(func, cbProlog, CFI_ADJUST_CFA_OFFSET, DWARF_REG_ILLEGAL, -offset); + createCfiCode(func, cbProlog, CFI_REL_OFFSET, mapRegNumToDwarfReg(reg1), 0); + createCfiCode(func, cbProlog, CFI_REL_OFFSET, mapRegNumToDwarfReg(reg2), 8); + } + + return; + } +#endif // TARGET_UNIX + + UnwindInfo* pu = &funCurrentFunc()->uwi; + if (reg1 == REG_FP) { // save_fplr_x: 10zzzzzz: save pair at [sp-(#Z+1)*8]!, pre-indexed offset >= -512 @@ -259,17 +513,32 @@ void Compiler::unwindSaveRegPairPreindexed(regNumber reg1, regNumber reg2, int o void Compiler::unwindSaveReg(regNumber reg, int offset) { - UnwindInfo* pu = &funCurrentFunc()->uwi; - // str reg, [sp, #offset] // offset for store in prolog must be positive and a multiple of 8. assert(0 <= offset && offset <= 504); assert((offset % 8) == 0); +#if defined(TARGET_UNIX) + if (generateCFIUnwindCodes()) + { + if (compGeneratingProlog) + { + FuncInfoDsc* func = funCurrentFunc(); + UNATIVE_OFFSET cbProlog = unwindGetCurrentOffset(func); + + createCfiCode(func, cbProlog, CFI_REL_OFFSET, mapRegNumToDwarfReg(reg), offset); + } + + return; + } +#endif // TARGET_UNIX + int z = offset / 8; assert(0 <= z && z <= 0x3F); + UnwindInfo* pu = &funCurrentFunc()->uwi; + if (emitter::isGeneralRegister(reg)) { // save_reg: 110100xx | xxzzzzzz: save reg r(19 + #X) at [sp + #Z * 8], offset <= 504 @@ -298,14 +567,30 @@ void Compiler::unwindSaveReg(regNumber reg, int offset) void Compiler::unwindSaveRegPreindexed(regNumber reg, int offset) { - UnwindInfo* pu = &funCurrentFunc()->uwi; - // str reg, [sp, #offset]! // pre-indexed offset in prolog must be negative and a multiple of 8. assert(-256 <= offset && offset < 0); assert((offset % 8) == 0); +#if defined(TARGET_UNIX) + if (generateCFIUnwindCodes()) + { + if (compGeneratingProlog) + { + FuncInfoDsc* func = funCurrentFunc(); + UNATIVE_OFFSET cbProlog = unwindGetCurrentOffset(func); + + createCfiCode(func, cbProlog, CFI_ADJUST_CFA_OFFSET, DWARF_REG_ILLEGAL, -offset); + createCfiCode(func, cbProlog, CFI_REL_OFFSET, mapRegNumToDwarfReg(reg), 0); + } + + return; + } +#endif // _TARGET_UNIX_ + + UnwindInfo* pu = &funCurrentFunc()->uwi; + int z = (-offset) / 8 - 1; assert(0 <= z && z <= 0x1F); @@ -337,6 +622,11 @@ void Compiler::unwindSaveRegPreindexed(regNumber reg, int offset) void Compiler::unwindSaveNext() { +#if defined(TARGET_UNIX) + // do not use unwindSaveNext when generating CFI codes as there is no code for this + assert(!generateCFIUnwindCodes()); +#endif // TARGET_UNIX + UnwindInfo* pu = &funCurrentFunc()->uwi; // We're saving the next register pair. The caller is responsible for ensuring this is correct! diff --git a/src/coreclr/src/jit/utils.cpp b/src/coreclr/src/jit/utils.cpp index 4477ceb16295..de1fd4a880b2 100644 --- a/src/coreclr/src/jit/utils.cpp +++ b/src/coreclr/src/jit/utils.cpp @@ -128,7 +128,7 @@ const char* varTypeName(var_types vt) return varTypeNames[vt]; } -#if defined(DEBUG) || defined(LATE_DISASM) +#if defined(DEBUG) || defined(LATE_DISASM) || DUMP_GC_TABLES /***************************************************************************** * * Return the name of the given register. @@ -164,7 +164,7 @@ const char* getRegName(unsigned reg, { return getRegName((regNumber)reg, isFloat); } -#endif // defined(DEBUG) || defined(LATE_DISASM) +#endif // defined(DEBUG) || defined(LATE_DISASM) || DUMP_GC_TABLES #if defined(DEBUG) @@ -767,11 +767,11 @@ void ConfigMethodRange::InitRanges(const WCHAR* rangeStr, unsigned capacity) } else if ((L'A' <= *p) && (*p <= L'F')) { - n = (*p++) - L'A'; + n = (*p++) - L'A' + 10; } else if ((L'a' <= *p) && (*p <= L'f')) { - n = (*p++) - L'a'; + n = (*p++) - L'a' + 10; } int j = 16 * i + n; diff --git a/src/coreclr/src/jit/valuenum.cpp b/src/coreclr/src/jit/valuenum.cpp index 1cc0cab621ca..fc93bebe885f 100644 --- a/src/coreclr/src/jit/valuenum.cpp +++ b/src/coreclr/src/jit/valuenum.cpp @@ -4137,14 +4137,23 @@ ValueNum Compiler::fgValueNumberArrIndexVal(GenTree* tree, ValueNum Compiler::fgValueNumberByrefExposedLoad(var_types type, ValueNum pointerVN) { - ValueNum memoryVN = fgCurMemoryVN[ByrefExposed]; - // The memoization for VNFunc applications does not factor in the result type, so - // VNF_ByrefExposedLoad takes the loaded type as an explicit parameter. - ValueNum typeVN = vnStore->VNForIntCon(type); - ValueNum loadVN = - vnStore->VNForFunc(type, VNF_ByrefExposedLoad, typeVN, vnStore->VNNormalValue(pointerVN), memoryVN); - - return loadVN; + if (type == TYP_STRUCT) + { + // We can't assign a value number for a read of a struct as we can't determine + // how many bytes will be read by this load, so return a new unique value number + // + return vnStore->VNForExpr(compCurBB, TYP_STRUCT); + } + else + { + ValueNum memoryVN = fgCurMemoryVN[ByrefExposed]; + // The memoization for VNFunc applications does not factor in the result type, so + // VNF_ByrefExposedLoad takes the loaded type as an explicit parameter. + ValueNum typeVN = vnStore->VNForIntCon(type); + ValueNum loadVN = + vnStore->VNForFunc(type, VNF_ByrefExposedLoad, typeVN, vnStore->VNNormalValue(pointerVN), memoryVN); + return loadVN; + } } var_types ValueNumStore::TypeOfVN(ValueNum vn) @@ -4548,7 +4557,7 @@ void ValueNumStore::SetVNIsCheckedBound(ValueNum vn) m_checkedBoundVNs.AddOrUpdate(vn, true); } -ValueNum ValueNumStore::EvalMathFuncUnary(var_types typ, CorInfoIntrinsics gtMathFN, ValueNum arg0VN) +ValueNum ValueNumStore::EvalMathFuncUnary(var_types typ, NamedIntrinsic gtMathFN, ValueNum arg0VN) { assert(arg0VN == VNNormalValue(arg0VN)); @@ -4568,25 +4577,25 @@ ValueNum ValueNumStore::EvalMathFuncUnary(var_types typ, CorInfoIntrinsics gtMat double res = 0.0; switch (gtMathFN) { - case CORINFO_INTRINSIC_Sin: + case NI_System_Math_Sin: res = sin(arg0Val); break; - case CORINFO_INTRINSIC_Cos: + case NI_System_Math_Cos: res = cos(arg0Val); break; - case CORINFO_INTRINSIC_Sqrt: + case NI_System_Math_Sqrt: res = sqrt(arg0Val); break; - case CORINFO_INTRINSIC_Abs: + case NI_System_Math_Abs: res = fabs(arg0Val); break; - case CORINFO_INTRINSIC_Ceiling: + case NI_System_Math_Ceiling: res = ceil(arg0Val); break; - case CORINFO_INTRINSIC_Floor: + case NI_System_Math_Floor: res = floor(arg0Val); break; - case CORINFO_INTRINSIC_Round: + case NI_System_Math_Round: res = FloatingPointUtils::round(arg0Val); break; default: @@ -4604,25 +4613,25 @@ ValueNum ValueNumStore::EvalMathFuncUnary(var_types typ, CorInfoIntrinsics gtMat float res = 0.0f; switch (gtMathFN) { - case CORINFO_INTRINSIC_Sin: + case NI_System_Math_Sin: res = sinf(arg0Val); break; - case CORINFO_INTRINSIC_Cos: + case NI_System_Math_Cos: res = cosf(arg0Val); break; - case CORINFO_INTRINSIC_Sqrt: + case NI_System_Math_Sqrt: res = sqrtf(arg0Val); break; - case CORINFO_INTRINSIC_Abs: + case NI_System_Math_Abs: res = fabsf(arg0Val); break; - case CORINFO_INTRINSIC_Ceiling: + case NI_System_Math_Ceiling: res = ceilf(arg0Val); break; - case CORINFO_INTRINSIC_Floor: + case NI_System_Math_Floor: res = floorf(arg0Val); break; - case CORINFO_INTRINSIC_Round: + case NI_System_Math_Round: res = FloatingPointUtils::round(arg0Val); break; default: @@ -4633,11 +4642,11 @@ ValueNum ValueNumStore::EvalMathFuncUnary(var_types typ, CorInfoIntrinsics gtMat } else { - // CORINFO_INTRINSIC_Round is currently the only intrinsic that takes floating-point arguments - // and that returns a non floating-point result. + // NI_System_Math{F}_Round are currently the only intrinsic that take floating-point arguments + // and return a non floating-point result. assert(typ == TYP_INT); - assert(gtMathFN == CORINFO_INTRINSIC_Round); + assert(gtMathFN == NI_System_Math_Round); int res = 0; @@ -4664,27 +4673,27 @@ ValueNum ValueNumStore::EvalMathFuncUnary(var_types typ, CorInfoIntrinsics gtMat } else { - assert(typ == TYP_DOUBLE || typ == TYP_FLOAT || (typ == TYP_INT && gtMathFN == CORINFO_INTRINSIC_Round)); + assert(typ == TYP_DOUBLE || typ == TYP_FLOAT || typ == TYP_INT && gtMathFN == NI_System_Math_Round); VNFunc vnf = VNF_Boundary; switch (gtMathFN) { - case CORINFO_INTRINSIC_Sin: + case NI_System_Math_Sin: vnf = VNF_Sin; break; - case CORINFO_INTRINSIC_Cos: + case NI_System_Math_Cos: vnf = VNF_Cos; break; - case CORINFO_INTRINSIC_Cbrt: + case NI_System_Math_Cbrt: vnf = VNF_Cbrt; break; - case CORINFO_INTRINSIC_Sqrt: + case NI_System_Math_Sqrt: vnf = VNF_Sqrt; break; - case CORINFO_INTRINSIC_Abs: + case NI_System_Math_Abs: vnf = VNF_Abs; break; - case CORINFO_INTRINSIC_Round: + case NI_System_Math_Round: if (typ == TYP_DOUBLE) { vnf = VNF_RoundDouble; @@ -4702,46 +4711,46 @@ ValueNum ValueNumStore::EvalMathFuncUnary(var_types typ, CorInfoIntrinsics gtMat noway_assert(!"Invalid INTRINSIC_Round"); } break; - case CORINFO_INTRINSIC_Cosh: + case NI_System_Math_Cosh: vnf = VNF_Cosh; break; - case CORINFO_INTRINSIC_Sinh: + case NI_System_Math_Sinh: vnf = VNF_Sinh; break; - case CORINFO_INTRINSIC_Tan: + case NI_System_Math_Tan: vnf = VNF_Tan; break; - case CORINFO_INTRINSIC_Tanh: + case NI_System_Math_Tanh: vnf = VNF_Tanh; break; - case CORINFO_INTRINSIC_Asin: + case NI_System_Math_Asin: vnf = VNF_Asin; break; - case CORINFO_INTRINSIC_Asinh: + case NI_System_Math_Asinh: vnf = VNF_Asinh; break; - case CORINFO_INTRINSIC_Acos: + case NI_System_Math_Acos: vnf = VNF_Acos; break; - case CORINFO_INTRINSIC_Acosh: + case NI_System_Math_Acosh: vnf = VNF_Acosh; break; - case CORINFO_INTRINSIC_Atan: + case NI_System_Math_Atan: vnf = VNF_Atan; break; - case CORINFO_INTRINSIC_Atanh: + case NI_System_Math_Atanh: vnf = VNF_Atanh; break; - case CORINFO_INTRINSIC_Log10: + case NI_System_Math_Log10: vnf = VNF_Log10; break; - case CORINFO_INTRINSIC_Exp: + case NI_System_Math_Exp: vnf = VNF_Exp; break; - case CORINFO_INTRINSIC_Ceiling: + case NI_System_Math_Ceiling: vnf = VNF_Ceiling; break; - case CORINFO_INTRINSIC_Floor: + case NI_System_Math_Floor: vnf = VNF_Floor; break; default: @@ -4752,7 +4761,7 @@ ValueNum ValueNumStore::EvalMathFuncUnary(var_types typ, CorInfoIntrinsics gtMat } } -ValueNum ValueNumStore::EvalMathFuncBinary(var_types typ, CorInfoIntrinsics gtMathFN, ValueNum arg0VN, ValueNum arg1VN) +ValueNum ValueNumStore::EvalMathFuncBinary(var_types typ, NamedIntrinsic gtMathFN, ValueNum arg0VN, ValueNum arg1VN) { assert(varTypeIsFloating(typ)); assert(arg0VN == VNNormalValue(arg0VN)); @@ -4765,11 +4774,11 @@ ValueNum ValueNumStore::EvalMathFuncBinary(var_types typ, CorInfoIntrinsics gtMa switch (gtMathFN) { - case CORINFO_INTRINSIC_Atan2: + case NI_System_Math_Atan2: vnf = VNF_Atan2; break; - case CORINFO_INTRINSIC_Pow: + case NI_System_Math_Pow: vnf = VNF_Pow; break; @@ -6780,6 +6789,10 @@ void Compiler::fgValueNumberBlockAssignment(GenTree* tree) { fgMutateAddressExposedLocal(tree DEBUGARG("COPYBLK - address-exposed local")); } + else + { + JITDUMP("LHS V%02u not in ssa at [%06u], so no VN assigned\n", lhsLclNum, dspTreeID(lclVarTree)); + } } else { @@ -8389,7 +8402,7 @@ void Compiler::fgValueNumberIntrinsic(GenTree* tree) vnStore->VNPUnpackExc(intrinsic->AsOp()->gtOp2->gtVNPair, &arg1VNP, &arg1VNPx); } - if (IsMathIntrinsic(intrinsic->gtIntrinsicId)) + if (IsMathIntrinsic(intrinsic->gtIntrinsicName)) { // GT_INTRINSIC is a currently a subtype of binary operators. But most of // the math intrinsics are actually unary operations. @@ -8397,13 +8410,13 @@ void Compiler::fgValueNumberIntrinsic(GenTree* tree) if (intrinsic->AsOp()->gtOp2 == nullptr) { intrinsic->gtVNPair = - vnStore->VNPWithExc(vnStore->EvalMathFuncUnary(tree->TypeGet(), intrinsic->gtIntrinsicId, arg0VNP), + vnStore->VNPWithExc(vnStore->EvalMathFuncUnary(tree->TypeGet(), intrinsic->gtIntrinsicName, arg0VNP), arg0VNPx); } else { ValueNumPair newVNP = - vnStore->EvalMathFuncBinary(tree->TypeGet(), intrinsic->gtIntrinsicId, arg0VNP, arg1VNP); + vnStore->EvalMathFuncBinary(tree->TypeGet(), intrinsic->gtIntrinsicName, arg0VNP, arg1VNP); ValueNumPair excSet = vnStore->VNPExcSetUnion(arg0VNPx, arg1VNPx); intrinsic->gtVNPair = vnStore->VNPWithExc(newVNP, excSet); } @@ -8615,7 +8628,7 @@ void Compiler::fgValueNumberHWIntrinsic(GenTree* tree) else if (tree->AsOp()->gtOp1->OperIs(GT_LIST) || (lookupNumArgs == -1)) { // We have a HWINTRINSIC node in the GT_LIST form with 3 or more args - // Or the numArgs was specified as -1 in the numArgs column in "hwinstrinsiclistxarch.h" + // Or the numArgs was specified as -1 in the numArgs column in "hwintrinsiclistxarch.h" // For now we will generate a unique value number for this case. // Generate unique VN diff --git a/src/coreclr/src/jit/valuenum.h b/src/coreclr/src/jit/valuenum.h index 9f3cc2025be2..2f57b6323e67 100644 --- a/src/coreclr/src/jit/valuenum.h +++ b/src/coreclr/src/jit/valuenum.h @@ -818,20 +818,17 @@ class ValueNumStore // "arg0VN". For binary ops, return the value number for the application of this function to "arg0VN" and // "arg1VN". - ValueNum EvalMathFuncUnary(var_types typ, CorInfoIntrinsics mthFunc, ValueNum arg0VN); + ValueNum EvalMathFuncUnary(var_types typ, NamedIntrinsic mthFunc, ValueNum arg0VN); - ValueNum EvalMathFuncBinary(var_types typ, CorInfoIntrinsics mthFunc, ValueNum arg0VN, ValueNum arg1VN); + ValueNum EvalMathFuncBinary(var_types typ, NamedIntrinsic mthFunc, ValueNum arg0VN, ValueNum arg1VN); - ValueNumPair EvalMathFuncUnary(var_types typ, CorInfoIntrinsics mthFunc, ValueNumPair arg0VNP) + ValueNumPair EvalMathFuncUnary(var_types typ, NamedIntrinsic mthFunc, ValueNumPair arg0VNP) { return ValueNumPair(EvalMathFuncUnary(typ, mthFunc, arg0VNP.GetLiberal()), EvalMathFuncUnary(typ, mthFunc, arg0VNP.GetConservative())); } - ValueNumPair EvalMathFuncBinary(var_types typ, - CorInfoIntrinsics mthFunc, - ValueNumPair arg0VNP, - ValueNumPair arg1VNP) + ValueNumPair EvalMathFuncBinary(var_types typ, NamedIntrinsic mthFunc, ValueNumPair arg0VNP, ValueNumPair arg1VNP) { return ValueNumPair(EvalMathFuncBinary(typ, mthFunc, arg0VNP.GetLiberal(), arg1VNP.GetLiberal()), EvalMathFuncBinary(typ, mthFunc, arg0VNP.GetConservative(), arg1VNP.GetConservative())); diff --git a/src/coreclr/src/md/ceefilegen/CMakeLists.txt b/src/coreclr/src/md/ceefilegen/CMakeLists.txt index 39864c71817f..fd0f8424d97f 100644 --- a/src/coreclr/src/md/ceefilegen/CMakeLists.txt +++ b/src/coreclr/src/md/ceefilegen/CMakeLists.txt @@ -25,8 +25,11 @@ if (CLR_CMAKE_TARGET_WIN32) list(APPEND CEEFILEGEN_SOURCES ${CEEFILEGEN_HEADERS}) endif (CLR_CMAKE_TARGET_WIN32) -add_library_clr(ceefgen +add_library_clr(ceefgen_obj OBJECT ${CEEFILEGEN_SOURCES} ) -target_precompile_header(TARGET ceefgen HEADER stdafx.h) +target_precompile_header(TARGET ceefgen_obj HEADER stdafx.h) + +add_library(ceefgen INTERFACE) +target_sources(ceefgen INTERFACE $) diff --git a/src/coreclr/src/md/compiler/CMakeLists.txt b/src/coreclr/src/md/compiler/CMakeLists.txt index 495fa4d70ca2..f32c80407c6a 100644 --- a/src/coreclr/src/md/compiler/CMakeLists.txt +++ b/src/coreclr/src/md/compiler/CMakeLists.txt @@ -68,9 +68,11 @@ add_library_clr(mdcompiler_dac ${MDCOMPILER_SOURCES}) set_target_properties(mdcompiler_dac PROPERTIES DAC_COMPONENT TRUE) target_precompile_header(TARGET mdcompiler_dac HEADER stdafx.h) -add_library_clr(mdcompiler_wks OBJECT ${MDCOMPILER_WKS_SOURCES}) -target_compile_definitions(mdcompiler_wks PRIVATE FEATURE_METADATA_EMIT_ALL) -target_precompile_header(TARGET mdcompiler_wks HEADER stdafx.h) +add_library_clr(mdcompiler_wks_obj OBJECT ${MDCOMPILER_WKS_SOURCES}) +target_compile_definitions(mdcompiler_wks_obj PRIVATE FEATURE_METADATA_EMIT_ALL) +target_precompile_header(TARGET mdcompiler_wks_obj HEADER stdafx.h) +add_library(mdcompiler_wks INTERFACE) +target_sources(mdcompiler_wks INTERFACE $) add_library_clr(mdcompiler-dbi ${MDCOMPILER_SOURCES}) set_target_properties(mdcompiler-dbi PROPERTIES DBI_COMPONENT TRUE) diff --git a/src/coreclr/src/md/enc/CMakeLists.txt b/src/coreclr/src/md/enc/CMakeLists.txt index 14ed0a267fae..e30c51b1e37c 100644 --- a/src/coreclr/src/md/enc/CMakeLists.txt +++ b/src/coreclr/src/md/enc/CMakeLists.txt @@ -52,9 +52,11 @@ add_library_clr(mdruntimerw_dac ${MDRUNTIMERW_SOURCES}) set_target_properties(mdruntimerw_dac PROPERTIES DAC_COMPONENT TRUE) target_precompile_header(TARGET mdruntimerw_dac HEADER stdafx.h) -add_library_clr(mdruntimerw_wks OBJECT ${MDRUNTIMERW_SOURCES}) -target_compile_definitions(mdruntimerw_wks PRIVATE FEATURE_METADATA_EMIT_ALL) -target_precompile_header(TARGET mdruntimerw_wks HEADER stdafx.h) +add_library_clr(mdruntimerw_wks_obj OBJECT ${MDRUNTIMERW_SOURCES}) +target_compile_definitions(mdruntimerw_wks_obj PRIVATE FEATURE_METADATA_EMIT_ALL) +target_precompile_header(TARGET mdruntimerw_wks_obj HEADER stdafx.h) +add_library(mdruntimerw_wks INTERFACE) +target_sources(mdruntimerw_wks INTERFACE $) add_library_clr(mdruntimerw-dbi ${MDRUNTIMERW_SOURCES}) set_target_properties(mdruntimerw-dbi PROPERTIES DBI_COMPONENT TRUE) diff --git a/src/coreclr/src/md/hotdata/CMakeLists.txt b/src/coreclr/src/md/hotdata/CMakeLists.txt index 03430e292c75..46381cf7dddd 100644 --- a/src/coreclr/src/md/hotdata/CMakeLists.txt +++ b/src/coreclr/src/md/hotdata/CMakeLists.txt @@ -33,8 +33,10 @@ add_library_clr(mdhotdata_dac ${MDHOTDATA_SOURCES}) set_target_properties(mdhotdata_dac PROPERTIES DAC_COMPONENT TRUE) target_precompile_header(TARGET mdhotdata_dac HEADER external.h) -add_library_clr(mdhotdata_full OBJECT ${MDHOTDATA_SOURCES}) -target_precompile_header(TARGET mdhotdata_full HEADER external.h) +add_library_clr(mdhotdata_full_obj OBJECT ${MDHOTDATA_SOURCES}) +target_precompile_header(TARGET mdhotdata_full_obj HEADER external.h) +add_library(mdhotdata_full INTERFACE) +target_sources(mdhotdata_full INTERFACE $) add_library_clr(mdhotdata_crossgen ${MDHOTDATA_SOURCES}) set_target_properties(mdhotdata_crossgen PROPERTIES CROSSGEN_COMPONENT TRUE) diff --git a/src/coreclr/src/md/runtime/CMakeLists.txt b/src/coreclr/src/md/runtime/CMakeLists.txt index 5753e655abf9..06e9e8300019 100644 --- a/src/coreclr/src/md/runtime/CMakeLists.txt +++ b/src/coreclr/src/md/runtime/CMakeLists.txt @@ -49,9 +49,11 @@ add_library_clr(mdruntime_dac ${MDRUNTIME_SOURCES}) set_target_properties(mdruntime_dac PROPERTIES DAC_COMPONENT TRUE) target_precompile_header(TARGET mdruntime_dac HEADER stdafx.h) -add_library_clr(mdruntime_wks OBJECT ${MDRUNTIME_SOURCES}) -target_compile_definitions(mdruntime_wks PRIVATE FEATURE_METADATA_EMIT_ALL) -target_precompile_header(TARGET mdruntime_wks HEADER stdafx.h) +add_library_clr(mdruntime_wks_obj OBJECT ${MDRUNTIME_SOURCES}) +target_compile_definitions(mdruntime_wks_obj PRIVATE FEATURE_METADATA_EMIT_ALL) +target_precompile_header(TARGET mdruntime_wks_obj HEADER stdafx.h) +add_library(mdruntime_wks INTERFACE) +target_sources(mdruntime_wks INTERFACE $) add_library_clr(mdruntime-dbi ${MDRUNTIME_SOURCES}) set_target_properties(mdruntime-dbi PROPERTIES DBI_COMPONENT TRUE) diff --git a/src/coreclr/src/pal/inc/pal.h b/src/coreclr/src/pal/inc/pal.h index df658c546905..fd56fc90c353 100644 --- a/src/coreclr/src/pal/inc/pal.h +++ b/src/coreclr/src/pal/inc/pal.h @@ -511,6 +511,32 @@ PAL_Random( IN OUT LPVOID lpBuffer, IN DWORD dwLength); +PALIMPORT +BOOL +PALAPI +PAL_OpenProcessMemory( + IN DWORD processId, + OUT DWORD* pHandle +); + +PALIMPORT +VOID +PALAPI +PAL_CloseProcessMemory( + IN DWORD handle +); + +PALIMPORT +BOOL +PALAPI +PAL_ReadProcessMemory( + IN DWORD handle, + IN ULONG64 address, + IN LPVOID buffer, + IN SIZE_T size, + OUT SIZE_T* numberOfBytesRead +); + PALIMPORT BOOL PALAPI diff --git a/src/coreclr/src/pal/prebuilt/corerror/mscorurt.rc b/src/coreclr/src/pal/prebuilt/corerror/mscorurt.rc index f51e77ae1245..783d1adafb10 100644 --- a/src/coreclr/src/pal/prebuilt/corerror/mscorurt.rc +++ b/src/coreclr/src/pal/prebuilt/corerror/mscorurt.rc @@ -314,14 +314,13 @@ BEGIN MSG_FOR_URT_HR(CORDBG_E_UNSUPPORTED_DELEGATE) "The delegate contains a delegate currently not supported by the API." MSG_FOR_URT_HR(PEFMT_E_64BIT) "File is PE32+." MSG_FOR_URT_HR(PEFMT_E_32BIT) "File is PE32" - MSG_FOR_URT_HR(NGEN_E_SYS_ASM_NI_MISSING) "NGen cannot proceed because Mscorlib.dll does not have a native image" MSG_FOR_URT_HR(CLR_E_BIND_ASSEMBLY_VERSION_TOO_LOW) "The bound assembly has a version that is lower than that of the request." MSG_FOR_URT_HR(CLR_E_BIND_ASSEMBLY_PUBLIC_KEY_MISMATCH) "The assembly version has a public key token that does not match that of the request." MSG_FOR_URT_HR(CLR_E_BIND_IMAGE_UNAVAILABLE) "The requested image was not found or is unavailable." MSG_FOR_URT_HR(CLR_E_BIND_UNRECOGNIZED_IDENTITY_FORMAT) "The provided identity format is not recognized." MSG_FOR_URT_HR(CLR_E_BIND_ASSEMBLY_NOT_FOUND) "A binding for the specified assembly name was not found." MSG_FOR_URT_HR(CLR_E_BIND_TYPE_NOT_FOUND) "A binding for the specified type name was not found." - MSG_FOR_URT_HR(CLR_E_BIND_SYS_ASM_NI_MISSING) "Could not use native image because Mscorlib.dll is missing a native image" + MSG_FOR_URT_HR(CLR_E_BIND_SYS_ASM_NI_MISSING) "Could not use native image because System.Private.CoreLib.dll is missing a native image" MSG_FOR_URT_HR(CLR_E_BIND_NI_SECURITY_FAILURE) "Native image was generated in a different trust level than present at runtime" MSG_FOR_URT_HR(CLR_E_BIND_NI_DEP_IDENTITY_MISMATCH) "Native image identity mismatch with respect to its dependencies" MSG_FOR_URT_HR(CLR_E_GC_OOM) "Failfast due to an OOM during a GC" diff --git a/src/coreclr/src/pal/prebuilt/idl/cordebug_i.cpp b/src/coreclr/src/pal/prebuilt/idl/cordebug_i.cpp index 85d7ab575a51..5208843c10eb 100644 --- a/src/coreclr/src/pal/prebuilt/idl/cordebug_i.cpp +++ b/src/coreclr/src/pal/prebuilt/idl/cordebug_i.cpp @@ -8,7 +8,7 @@ /* File created by MIDL compiler version 8.01.0622 */ /* at Mon Jan 18 19:14:07 2038 */ -/* Compiler settings for E:/repos/runtime2/src/coreclr/src/inc/cordebug.idl: +/* Compiler settings for runtime/src/coreclr/src/inc/cordebug.idl: Oicf, W1, Zp8, env=Win32 (32b run), target_arch=X86 8.01.0622 protocol : dce , ms_ext, c_ext, robust error checks: allocation ref bounds_check enum stub_data @@ -289,6 +289,9 @@ MIDL_DEFINE_GUID(IID, IID_ICorDebugNativeFrame2,0x35389FF1,0x3684,0x4c55,0xA2,0x MIDL_DEFINE_GUID(IID, IID_ICorDebugModule3,0x86F012BF,0xFF15,0x4372,0xBD,0x30,0xB6,0xF1,0x1C,0xAA,0xE1,0xDD); +MIDL_DEFINE_GUID(IID, IID_ICorDebugModule4,0xFF8B8EAF,0x25CD,0x4316,0x88,0x59,0x84,0x41,0x6D,0xE4,0x40,0x2E); + + MIDL_DEFINE_GUID(IID, IID_ICorDebugRuntimeUnwindableFrame,0x879CAC0A,0x4A53,0x4668,0xB8,0xE3,0xCB,0x84,0x73,0xCB,0x18,0x7F); diff --git a/src/coreclr/src/pal/prebuilt/inc/cordebug.h b/src/coreclr/src/pal/prebuilt/inc/cordebug.h index 3418b8cf9b30..b497d141c480 100644 --- a/src/coreclr/src/pal/prebuilt/inc/cordebug.h +++ b/src/coreclr/src/pal/prebuilt/inc/cordebug.h @@ -6,7 +6,7 @@ /* File created by MIDL compiler version 8.01.0622 */ /* at Mon Jan 18 19:14:07 2038 */ -/* Compiler settings for E:/repos/runtime2/src/coreclr/src/inc/cordebug.idl: +/* Compiler settings for runtime/src/coreclr/src/inc/cordebug.idl: Oicf, W1, Zp8, env=Win32 (32b run), target_arch=X86 8.01.0622 protocol : dce , ms_ext, c_ext, robust error checks: allocation ref bounds_check enum stub_data @@ -563,6 +563,13 @@ typedef interface ICorDebugModule3 ICorDebugModule3; #endif /* __ICorDebugModule3_FWD_DEFINED__ */ +#ifndef __ICorDebugModule4_FWD_DEFINED__ +#define __ICorDebugModule4_FWD_DEFINED__ +typedef interface ICorDebugModule4 ICorDebugModule4; + +#endif /* __ICorDebugModule4_FWD_DEFINED__ */ + + #ifndef __ICorDebugRuntimeUnwindableFrame_FWD_DEFINED__ #define __ICorDebugRuntimeUnwindableFrame_FWD_DEFINED__ typedef interface ICorDebugRuntimeUnwindableFrame ICorDebugRuntimeUnwindableFrame; @@ -11691,6 +11698,86 @@ EXTERN_C const IID IID_ICorDebugModule3; #endif /* __ICorDebugModule3_INTERFACE_DEFINED__ */ +#ifndef __ICorDebugModule4_INTERFACE_DEFINED__ +#define __ICorDebugModule4_INTERFACE_DEFINED__ + +/* interface ICorDebugModule4 */ +/* [unique][uuid][local][object] */ + + +EXTERN_C const IID IID_ICorDebugModule4; + +#if defined(__cplusplus) && !defined(CINTERFACE) + + MIDL_INTERFACE("FF8B8EAF-25CD-4316-8859-84416DE4402E") + ICorDebugModule4 : public IUnknown + { + public: + virtual HRESULT STDMETHODCALLTYPE IsMappedLayout( + /* [out] */ BOOL *pIsMapped) = 0; + + }; + + +#else /* C style interface */ + + typedef struct ICorDebugModule4Vtbl + { + BEGIN_INTERFACE + + HRESULT ( STDMETHODCALLTYPE *QueryInterface )( + ICorDebugModule4 * This, + /* [in] */ REFIID riid, + /* [annotation][iid_is][out] */ + _COM_Outptr_ void **ppvObject); + + ULONG ( STDMETHODCALLTYPE *AddRef )( + ICorDebugModule4 * This); + + ULONG ( STDMETHODCALLTYPE *Release )( + ICorDebugModule4 * This); + + HRESULT ( STDMETHODCALLTYPE *IsMappedLayout )( + ICorDebugModule4 * This, + /* [out] */ BOOL *pIsMapped); + + END_INTERFACE + } ICorDebugModule4Vtbl; + + interface ICorDebugModule4 + { + CONST_VTBL struct ICorDebugModule4Vtbl *lpVtbl; + }; + + + +#ifdef COBJMACROS + + +#define ICorDebugModule4_QueryInterface(This,riid,ppvObject) \ + ( (This)->lpVtbl -> QueryInterface(This,riid,ppvObject) ) + +#define ICorDebugModule4_AddRef(This) \ + ( (This)->lpVtbl -> AddRef(This) ) + +#define ICorDebugModule4_Release(This) \ + ( (This)->lpVtbl -> Release(This) ) + + +#define ICorDebugModule4_IsMappedLayout(This,pIsMapped) \ + ( (This)->lpVtbl -> IsMappedLayout(This,pIsMapped) ) + +#endif /* COBJMACROS */ + + +#endif /* C style interface */ + + + + +#endif /* __ICorDebugModule4_INTERFACE_DEFINED__ */ + + #ifndef __ICorDebugRuntimeUnwindableFrame_INTERFACE_DEFINED__ #define __ICorDebugRuntimeUnwindableFrame_INTERFACE_DEFINED__ @@ -12075,14 +12162,14 @@ EXTERN_C const IID IID_ICorDebugModule; #endif /* __ICorDebugModule_INTERFACE_DEFINED__ */ -/* interface __MIDL_itf_cordebug_0000_0075 */ +/* interface __MIDL_itf_cordebug_0000_0076 */ /* [local] */ #pragma warning(pop) -extern RPC_IF_HANDLE __MIDL_itf_cordebug_0000_0075_v0_0_c_ifspec; -extern RPC_IF_HANDLE __MIDL_itf_cordebug_0000_0075_v0_0_s_ifspec; +extern RPC_IF_HANDLE __MIDL_itf_cordebug_0000_0076_v0_0_c_ifspec; +extern RPC_IF_HANDLE __MIDL_itf_cordebug_0000_0076_v0_0_s_ifspec; #ifndef __ICorDebugModule2_INTERFACE_DEFINED__ #define __ICorDebugModule2_INTERFACE_DEFINED__ @@ -15129,15 +15216,15 @@ EXTERN_C const IID IID_ICorDebugBoxValue; #endif /* __ICorDebugBoxValue_INTERFACE_DEFINED__ */ -/* interface __MIDL_itf_cordebug_0000_0102 */ +/* interface __MIDL_itf_cordebug_0000_0103 */ /* [local] */ #pragma warning(push) #pragma warning(disable:28718) -extern RPC_IF_HANDLE __MIDL_itf_cordebug_0000_0102_v0_0_c_ifspec; -extern RPC_IF_HANDLE __MIDL_itf_cordebug_0000_0102_v0_0_s_ifspec; +extern RPC_IF_HANDLE __MIDL_itf_cordebug_0000_0103_v0_0_c_ifspec; +extern RPC_IF_HANDLE __MIDL_itf_cordebug_0000_0103_v0_0_s_ifspec; #ifndef __ICorDebugStringValue_INTERFACE_DEFINED__ #define __ICorDebugStringValue_INTERFACE_DEFINED__ @@ -15277,14 +15364,14 @@ EXTERN_C const IID IID_ICorDebugStringValue; #endif /* __ICorDebugStringValue_INTERFACE_DEFINED__ */ -/* interface __MIDL_itf_cordebug_0000_0103 */ +/* interface __MIDL_itf_cordebug_0000_0104 */ /* [local] */ #pragma warning(pop) -extern RPC_IF_HANDLE __MIDL_itf_cordebug_0000_0103_v0_0_c_ifspec; -extern RPC_IF_HANDLE __MIDL_itf_cordebug_0000_0103_v0_0_s_ifspec; +extern RPC_IF_HANDLE __MIDL_itf_cordebug_0000_0104_v0_0_c_ifspec; +extern RPC_IF_HANDLE __MIDL_itf_cordebug_0000_0104_v0_0_s_ifspec; #ifndef __ICorDebugArrayValue_INTERFACE_DEFINED__ #define __ICorDebugArrayValue_INTERFACE_DEFINED__ @@ -18059,15 +18146,15 @@ EXTERN_C const IID IID_ICorDebugBlockingObjectEnum; #endif /* __ICorDebugBlockingObjectEnum_INTERFACE_DEFINED__ */ -/* interface __MIDL_itf_cordebug_0000_0127 */ +/* interface __MIDL_itf_cordebug_0000_0128 */ /* [local] */ #pragma warning(push) #pragma warning(disable:28718) -extern RPC_IF_HANDLE __MIDL_itf_cordebug_0000_0127_v0_0_c_ifspec; -extern RPC_IF_HANDLE __MIDL_itf_cordebug_0000_0127_v0_0_s_ifspec; +extern RPC_IF_HANDLE __MIDL_itf_cordebug_0000_0128_v0_0_c_ifspec; +extern RPC_IF_HANDLE __MIDL_itf_cordebug_0000_0128_v0_0_s_ifspec; #ifndef __ICorDebugMDA_INTERFACE_DEFINED__ #define __ICorDebugMDA_INTERFACE_DEFINED__ @@ -18207,7 +18294,7 @@ EXTERN_C const IID IID_ICorDebugMDA; #endif /* __ICorDebugMDA_INTERFACE_DEFINED__ */ -/* interface __MIDL_itf_cordebug_0000_0128 */ +/* interface __MIDL_itf_cordebug_0000_0129 */ /* [local] */ #pragma warning(pop) @@ -18215,8 +18302,8 @@ EXTERN_C const IID IID_ICorDebugMDA; #pragma warning(disable:28718) -extern RPC_IF_HANDLE __MIDL_itf_cordebug_0000_0128_v0_0_c_ifspec; -extern RPC_IF_HANDLE __MIDL_itf_cordebug_0000_0128_v0_0_s_ifspec; +extern RPC_IF_HANDLE __MIDL_itf_cordebug_0000_0129_v0_0_c_ifspec; +extern RPC_IF_HANDLE __MIDL_itf_cordebug_0000_0129_v0_0_s_ifspec; #ifndef __ICorDebugEditAndContinueErrorInfo_INTERFACE_DEFINED__ #define __ICorDebugEditAndContinueErrorInfo_INTERFACE_DEFINED__ @@ -18332,14 +18419,14 @@ EXTERN_C const IID IID_ICorDebugEditAndContinueErrorInfo; #endif /* __ICorDebugEditAndContinueErrorInfo_INTERFACE_DEFINED__ */ -/* interface __MIDL_itf_cordebug_0000_0129 */ +/* interface __MIDL_itf_cordebug_0000_0130 */ /* [local] */ #pragma warning(pop) -extern RPC_IF_HANDLE __MIDL_itf_cordebug_0000_0129_v0_0_c_ifspec; -extern RPC_IF_HANDLE __MIDL_itf_cordebug_0000_0129_v0_0_s_ifspec; +extern RPC_IF_HANDLE __MIDL_itf_cordebug_0000_0130_v0_0_c_ifspec; +extern RPC_IF_HANDLE __MIDL_itf_cordebug_0000_0130_v0_0_s_ifspec; #ifndef __ICorDebugEditAndContinueSnapshot_INTERFACE_DEFINED__ #define __ICorDebugEditAndContinueSnapshot_INTERFACE_DEFINED__ diff --git a/src/coreclr/src/pal/prebuilt/inc/corerror.h b/src/coreclr/src/pal/prebuilt/inc/corerror.h index 31ba2d9c4554..0fe7c75d2d24 100644 --- a/src/coreclr/src/pal/prebuilt/inc/corerror.h +++ b/src/coreclr/src/pal/prebuilt/inc/corerror.h @@ -385,7 +385,6 @@ #define CORDBG_E_UNSUPPORTED_DELEGATE EMAKEHR(0x1c68) #define PEFMT_E_64BIT EMAKEHR(0x1d02) #define PEFMT_E_32BIT EMAKEHR(0x1d0b) -#define NGEN_E_SYS_ASM_NI_MISSING EMAKEHR(0x1f06) #define CLDB_E_INTERNALERROR EMAKEHR(0x1fff) #define CLR_E_BIND_ASSEMBLY_VERSION_TOO_LOW EMAKEHR(0x2000) #define CLR_E_BIND_ASSEMBLY_PUBLIC_KEY_MISMATCH EMAKEHR(0x2001) diff --git a/src/coreclr/src/pal/src/CMakeLists.txt b/src/coreclr/src/pal/src/CMakeLists.txt index 094f14cabf82..712dabf46a3e 100644 --- a/src/coreclr/src/pal/src/CMakeLists.txt +++ b/src/coreclr/src/pal/src/CMakeLists.txt @@ -267,10 +267,12 @@ endif(CLR_CMAKE_TARGET_OSX) # > warning for library: libtracepointprovider.a the table of contents is empty (no object file members in the library define global symbols) # if(CLR_CMAKE_TARGET_LINUX) - add_library(tracepointprovider + add_library(tracepointprovider_obj OBJECT misc/tracepointprovider.cpp ) + add_library(tracepointprovider INTERFACE) + target_sources(tracepointprovider INTERFACE $) endif(CLR_CMAKE_TARGET_LINUX) if(CLR_CMAKE_TARGET_OSX) diff --git a/src/coreclr/src/pal/src/config.h.in b/src/coreclr/src/pal/src/config.h.in index 8e7e69288bc9..0319c6da7798 100644 --- a/src/coreclr/src/pal/src/config.h.in +++ b/src/coreclr/src/pal/src/config.h.in @@ -24,6 +24,7 @@ #cmakedefine01 HAVE_PTHREAD_NP_H #cmakedefine01 HAVE_AUXV_HWCAP_H #cmakedefine01 HAVE_SYS_PTRACE_H +#cmakedefine01 HAVE_GETAUXVAL #cmakedefine01 HAVE_KQUEUE #cmakedefine01 HAVE_PTHREAD_SUSPEND @@ -36,7 +37,6 @@ #cmakedefine01 HAVE_PTHREAD_GETCPUCLOCKID #cmakedefine01 HAVE_PTHREAD_SIGQUEUE #cmakedefine01 HAVE_PTHREAD_GETAFFINITY_NP -#cmakedefine01 HAVE_PTHREAD_ATTR_SETAFFINITY_NP #cmakedefine01 HAVE_CPUSET_T #cmakedefine01 HAVE_SIGRETURN #cmakedefine01 HAVE__THREAD_SYS_SIGRETURN @@ -66,6 +66,7 @@ #cmakedefine01 HAVE_TTRACE #cmakedefine01 HAVE_PIPE2 #cmakedefine01 HAVE_SCHED_GETAFFINITY +#cmakedefine01 HAVE_SCHED_SETAFFINITY #cmakedefine HAVE_UNW_GET_SAVE_LOC #cmakedefine HAVE_UNW_GET_ACCESSORS #cmakedefine01 HAVE_XSWDEV diff --git a/src/coreclr/src/pal/src/configure.cmake b/src/coreclr/src/pal/src/configure.cmake index b67637b584ba..893f88c44f3d 100644 --- a/src/coreclr/src/pal/src/configure.cmake +++ b/src/coreclr/src/pal/src/configure.cmake @@ -47,6 +47,7 @@ check_include_files(numa.h HAVE_NUMA_H) check_include_files(pthread_np.h HAVE_PTHREAD_NP_H) check_include_files("sys/auxv.h;asm/hwcap.h" HAVE_AUXV_HWCAP_H) check_include_files("sys/ptrace.h" HAVE_SYS_PTRACE_H) +check_symbol_exists(getauxval sys/auxv.h HAVE_GETAUXVAL) set(CMAKE_REQUIRED_LIBRARIES ${CMAKE_DL_LIBS}) @@ -81,6 +82,7 @@ check_include_files(gnu/lib-names.h HAVE_GNU_LIBNAMES_H) check_function_exists(kqueue HAVE_KQUEUE) check_library_exists(c sched_getaffinity "" HAVE_SCHED_GETAFFINITY) +check_library_exists(c sched_setaffinity "" HAVE_SCHED_SETAFFINITY) check_library_exists(pthread pthread_create "" HAVE_LIBPTHREAD) check_library_exists(c pthread_create "" HAVE_PTHREAD_IN_LIBC) @@ -100,7 +102,6 @@ check_library_exists(${PTHREAD_LIBRARY} pthread_getattr_np "" HAVE_PTHREAD_GETAT check_library_exists(${PTHREAD_LIBRARY} pthread_getcpuclockid "" HAVE_PTHREAD_GETCPUCLOCKID) check_library_exists(${PTHREAD_LIBRARY} pthread_sigqueue "" HAVE_PTHREAD_SIGQUEUE) check_library_exists(${PTHREAD_LIBRARY} pthread_getaffinity_np "" HAVE_PTHREAD_GETAFFINITY_NP) -check_library_exists(${PTHREAD_LIBRARY} pthread_attr_setaffinity_np "" HAVE_PTHREAD_ATTR_SETAFFINITY_NP) check_function_exists(sigreturn HAVE_SIGRETURN) check_function_exists(_thread_sys_sigreturn HAVE__THREAD_SYS_SIGRETURN) diff --git a/src/coreclr/src/pal/src/debug/debug.cpp b/src/coreclr/src/pal/src/debug/debug.cpp index b58b2fe587e1..7e7e36820058 100644 --- a/src/coreclr/src/pal/src/debug/debug.cpp +++ b/src/coreclr/src/pal/src/debug/debug.cpp @@ -62,6 +62,11 @@ SET_DEFAULT_DEBUG_CHANNEL(DEBUG); // some headers have code with asserts, so do #include #endif // HAVE_PROCFS_H +#ifdef __APPLE__ +#include +#include +#endif // __APPLE__ + #if HAVE_MACH_EXCEPTIONS #include "../exception/machexception.h" #endif // HAVE_MACH_EXCEPTIONS @@ -69,6 +74,7 @@ SET_DEFAULT_DEBUG_CHANNEL(DEBUG); // some headers have code with asserts, so do using namespace CorUnix; extern "C" void DBG_DebugBreak_End(); +extern size_t OffsetWithinPage(off_t addr); #if HAVE_PROCFS_CTL #define CTL_ATTACH "attach" @@ -541,6 +547,184 @@ SetThreadContext( return ret; } +/*++ +Function: + PAL_OpenProcessMemory + +Abstract + Creates the handle for PAL_ReadProcessMemory. + +Parameter + processId : process id to read memory + pHandle : returns a platform specific handle or UINT32_MAX if failed + +Return + true successful, false invalid process id or not supported. +--*/ +BOOL +PALAPI +PAL_OpenProcessMemory( + IN DWORD processId, + OUT DWORD* pHandle +) +{ + ENTRY("PAL_OpenProcessMemory(pid=%d)\n", processId); + _ASSERTE(pHandle != nullptr); + *pHandle = UINT32_MAX; +#ifdef __APPLE__ + mach_port_name_t port; + kern_return_t result = ::task_for_pid(mach_task_self(), (int)processId, &port); + if (result != KERN_SUCCESS) + { + ERROR("task_for_pid(%d) FAILED %x %s\n", processId, result, mach_error_string(result)); + LOGEXIT("PAL_OpenProcessMemory FALSE\n"); + return FALSE; + } + *pHandle = port; +#else + char memPath[128]; + _snprintf_s(memPath, sizeof(memPath), sizeof(memPath), "/proc/%lu/mem", processId); + + int fd = open(memPath, O_RDONLY); + if (fd == -1) + { + ERROR("open(%s) FAILED %d (%s)\n", memPath, errno, strerror(errno)); + LOGEXIT("PAL_OpenProcessMemory FALSE\n"); + return FALSE; + } + *pHandle = fd; +#endif + LOGEXIT("PAL_OpenProcessMemory TRUE\n"); + return TRUE; +} + +/*++ +Function: + PAL_CloseProcessMemory + +Abstract + Closes the PAL_OpenProcessMemory handle. + +Parameter + handle : from PAL_OpenProcessMemory + +Return + none +--*/ +VOID +PALAPI +PAL_CloseProcessMemory( + IN DWORD handle +) +{ + ENTRY("PAL_CloseProcessMemory(handle=%x)\n", handle); + if (handle != UINT32_MAX) + { +#ifdef __APPLE__ + kern_return_t result = ::mach_port_deallocate(mach_task_self(), (mach_port_name_t)handle); + if (result != KERN_SUCCESS) + { + ERROR("mach_port_deallocate FAILED %x %s\n", result, mach_error_string(result)); + } +#else + close(handle); +#endif + } + LOGEXIT("PAL_CloseProcessMemory\n"); +} + +/*++ +Function: + PAL_ReadProcessMemory + +Abstract + Reads process memory. + +Parameter + handle : from PAL_OpenProcessMemory + address : address of memory to read + buffer : buffer to read memory to + size : number of bytes to read + numberOfBytesRead: number of bytes read (optional) + +Return + true read memory is successful, false if not. +--*/ +BOOL +PALAPI +PAL_ReadProcessMemory( + IN DWORD handle, + IN ULONG64 address, + IN LPVOID buffer, + IN SIZE_T size, + OUT SIZE_T* numberOfBytesRead) +{ + ENTRY("PAL_ReadProcessMemory(handle=%x, address=%p buffer=%p size=%d)\n", handle, (void*)address, buffer, size); + _ASSERTE(handle != 0); + _ASSERTE(numberOfBytesRead != nullptr); + BOOL result = TRUE; + size_t read = 0; +#ifdef __APPLE__ + vm_map_t task = (vm_map_t)handle; + + // vm_read_overwrite usually requires that the address be page-aligned + // and the size be a multiple of the page size. We can't differentiate + // between the cases in which that's required and those in which it + // isn't, so we do it all the time. + const size_t pageSize = GetVirtualPageSize(); + vm_address_t addressAligned = ALIGN_DOWN(address, pageSize); + size_t offset = OffsetWithinPage(address); + size_t bytesToRead; + + char *data = (char*)malloc(pageSize); + if (data == nullptr) + { + ERROR("malloc(%d) FAILED\n", pageSize); + result = FALSE; + goto exit; + } + + while (size > 0) + { + vm_size_t bytesRead; + + bytesToRead = pageSize - offset; + if (bytesToRead > size) + { + bytesToRead = size; + } + bytesRead = pageSize; + kern_return_t result = ::vm_read_overwrite(task, addressAligned, pageSize, (vm_address_t)data, &bytesRead); + if (result != KERN_SUCCESS || bytesRead != pageSize) + { + ERROR("vm_read_overwrite failed for %d bytes from %p: %x %s\n", pageSize, (void*)addressAligned, result, mach_error_string(result)); + result = FALSE; + goto exit; + } + memcpy((LPSTR)buffer + read , data + offset, bytesToRead); + addressAligned = addressAligned + pageSize; + read += bytesToRead; + size -= bytesToRead; + offset = 0; + } + +exit: + if (data != nullptr) + { + free(data); + } +#else + read = pread(handle, buffer, size, address); + if (read == (size_t)-1) + { + result = FALSE; + } +#endif + *numberOfBytesRead = read; + LOGEXIT("PAL_ReadProcessMemory result=%d bytes read=%d\n", result, read); + return result; +} + /*++ Function: PAL_ProbeMemory diff --git a/src/coreclr/src/pal/src/eventprovider/dummyprovider/CMakeLists.txt b/src/coreclr/src/pal/src/eventprovider/dummyprovider/CMakeLists.txt index 39b9826d1ab5..8e6968cf783d 100644 --- a/src/coreclr/src/pal/src/eventprovider/dummyprovider/CMakeLists.txt +++ b/src/coreclr/src/pal/src/eventprovider/dummyprovider/CMakeLists.txt @@ -1,8 +1,8 @@ -include(FindPython) +include(FindPythonInterp) set (GENERATE_SCRIPT ${CLR_DIR}/src/scripts/genDummyProvider.py) -set(GENERATE_COMMAND ${Python_EXECUTABLE} ${GENERATE_SCRIPT} --man ${EVENT_MANIFEST} --intermediate ${CMAKE_CURRENT_BINARY_DIR}) +set(GENERATE_COMMAND ${PYTHON_EXECUTABLE} ${GENERATE_SCRIPT} --man ${EVENT_MANIFEST} --intermediate ${CMAKE_CURRENT_BINARY_DIR}) execute_process( COMMAND ${GENERATE_COMMAND} --dry-run diff --git a/src/coreclr/src/pal/src/eventprovider/lttngprovider/CMakeLists.txt b/src/coreclr/src/pal/src/eventprovider/lttngprovider/CMakeLists.txt index 234dea19b753..d55dab3557f3 100644 --- a/src/coreclr/src/pal/src/eventprovider/lttngprovider/CMakeLists.txt +++ b/src/coreclr/src/pal/src/eventprovider/lttngprovider/CMakeLists.txt @@ -1,7 +1,7 @@ -include(FindPython) +include(FindPythonInterp) set (GENERATE_SCRIPT ${CLR_DIR}/src/scripts/genLttngProvider.py) -set(GENERATE_COMMAND ${Python_EXECUTABLE} ${GENERATE_SCRIPT} --man ${EVENT_MANIFEST} --intermediate ${CMAKE_CURRENT_BINARY_DIR}) +set(GENERATE_COMMAND ${PYTHON_EXECUTABLE} ${GENERATE_SCRIPT} --man ${EVENT_MANIFEST} --intermediate ${CMAKE_CURRENT_BINARY_DIR}) execute_process( COMMAND ${GENERATE_COMMAND} --dry-run diff --git a/src/coreclr/src/pal/src/include/pal/mutex.hpp b/src/coreclr/src/pal/src/include/pal/mutex.hpp index c0227ff88d06..cdf308c4dc53 100644 --- a/src/coreclr/src/pal/src/include/pal/mutex.hpp +++ b/src/coreclr/src/pal/src/include/pal/mutex.hpp @@ -67,7 +67,7 @@ DWORD SPINLOCKTryAcquire (LONG * lock); // Named mutex // Temporarily disabling usage of pthread process-shared mutexes on ARM/ARM64 due to functional issues that cannot easily be -// detected with code due to hangs. See https://github.com/dotnet/coreclr/issues/5456. +// detected with code due to hangs. See https://github.com/dotnet/runtime/issues/6014. #if HAVE_FULLY_FEATURED_PTHREAD_MUTEXES && HAVE_FUNCTIONAL_PTHREAD_ROBUST_MUTEXES && !(defined(HOST_ARM) || defined(HOST_ARM64) || defined(__FreeBSD__)) #define NAMED_MUTEX_USE_PTHREAD_MUTEX 1 #else diff --git a/src/coreclr/src/pal/src/init/pal.cpp b/src/coreclr/src/pal/src/init/pal.cpp index e040f2fd10e6..b71c537a3234 100644 --- a/src/coreclr/src/pal/src/init/pal.cpp +++ b/src/coreclr/src/pal/src/init/pal.cpp @@ -91,6 +91,15 @@ int CacheLineSize; #endif #endif +#if defined(__FreeBSD__) +#include +#include +#include +#endif +#if HAVE_GETAUXVAL +#include +#endif + #include using namespace CorUnix; @@ -125,7 +134,7 @@ static DWORD g_initializeDLLFlags = PAL_INITIALIZE_DLL; static int Initialize(int argc, const char *const argv[], DWORD flags); static BOOL INIT_IncreaseDescriptorLimit(void); static LPWSTR INIT_FormatCommandLine (int argc, const char * const *argv); -static LPWSTR INIT_ConvertEXEPath(LPCSTR exe_name); +static LPWSTR INIT_GetCurrentEXEPath(); static BOOL INIT_SharedFilesPath(void); #ifdef _DEBUG @@ -560,7 +569,7 @@ Initialize( } /* find out the application's full path */ - exe_path = INIT_ConvertEXEPath(argv[0]); + exe_path = INIT_GetCurrentEXEPath(); if (NULL == exe_path) { ERROR("Unable to find exe path\n"); @@ -1265,45 +1274,160 @@ static LPWSTR INIT_FormatCommandLine (int argc, const char * const *argv) return retval; } +#if defined(__linux__) +#define symlinkEntrypointExecutable "/proc/self/exe" +#elif !defined(__APPLE__) +#define symlinkEntrypointExecutable "/proc/curproc/exe" +#endif + +bool GetAbsolutePath(const char* path, PathCharString& absolutePath) +{ + bool result = false; + + char realPath[PATH_MAX]; + if (realpath(path, realPath) != nullptr && realPath[0] != '\0') + { + absolutePath.Set(realPath, strlen(realPath)); + // realpath should return canonicalized path without the trailing slash + _ASSERTE(absolutePath[absolutePath.GetCount() - 1] != '/'); + + result = true; + } + + return result; +} + +bool GetEntrypointExecutableAbsolutePath(PathCharString& entrypointExecutable) +{ + bool result = false; + + entrypointExecutable.Clear(); + + // Get path to the executable for the current process using + // platform specific means. +#if defined(__APPLE__) + + // On Mac, we ask the OS for the absolute path to the entrypoint executable + uint32_t lenActualPath = 0; + if (_NSGetExecutablePath(nullptr, &lenActualPath) == -1) + { + // OSX has placed the actual path length in lenActualPath, + // so re-attempt the operation + PathCharString resizedPath; + char *pResizedPath = resizedPath.OpenStringBuffer(lenActualPath); + if (_NSGetExecutablePath(pResizedPath, &lenActualPath) == 0) + { + resizedPath.CloseBuffer(lenActualPath - 1); + entrypointExecutable.Set(resizedPath); + result = true; + } + } +#elif defined (__FreeBSD__) + static const int name[] = + { + CTL_KERN, KERN_PROC, KERN_PROC_PATHNAME, -1 + }; + char path[PATH_MAX]; + size_t len; + + len = sizeof(path); + if (sysctl(name, 4, path, &len, nullptr, 0) == 0) + { + entrypointExecutable.Set(path, len); + result = true; + } + else + { + // ENOMEM + result = false; + } +#elif defined(__NetBSD__) && defined(KERN_PROC_PATHNAME) + static const int name[] = + { + CTL_KERN, KERN_PROC_ARGS, -1, KERN_PROC_PATHNAME, + }; + char path[MAXPATHLEN]; + size_t len; + + len = sizeof(path); + if (sysctl(name, __arraycount(name), path, &len, NULL, 0) != -1) + { + entrypointExecutable.Set(path, len); + result = true; + } + else + { + result = false; + } +#elif defined(__sun) + const char *path; + if ((path = getexecname()) == NULL) + { + result = false; + } + else if (*path != '/') + { + char *cwd; + if ((cwd = getcwd(NULL, PATH_MAX)) == NULL) + { + result = false; + } + else + { + entrypointExecutable.Set(cwd, strlen(cwd)); + entrypointExecutable.Append('/'); + entrypointExecutable.Append(path, strlen(path)); + + result = true; + free(cwd); + } + } + else + { + entrypointExecutable.Set(path, strlen(path)); + result = true; + } +#else + +#if HAVE_GETAUXVAL && defined(AT_EXECFN) + const char *execfn = (const char *)getauxval(AT_EXECFN); + + if (execfn) + { + entrypointExecutable.Set(execfn, strlen(execfn)); + result = true; + } + else +#endif + // On other OSs, return the symlink that will be resolved by GetAbsolutePath + // to fetch the entrypoint EXE absolute path, inclusive of filename. + result = GetAbsolutePath(symlinkEntrypointExecutable, entrypointExecutable); +#endif + + return result; +} + /*++ Function: - INIT_ConvertEXEPath + INIT_GetCurrentEXEPath Abstract: - Check whether the executable path is valid, and convert its type (LPCSTR -> LPWSTR) - -Parameters: - LPCSTR exe_name : full path of the current executable + Get the current exe path Return: pointer to buffer containing the full path. This buffer must be released by the caller using free() -Notes : - this function assumes that "exe_name" is in Unix style (no \) --*/ -static LPWSTR INIT_ConvertEXEPath(LPCSTR exe_path) +static LPWSTR INIT_GetCurrentEXEPath() { PathCharString real_path; LPWSTR return_value; INT return_size; - struct stat theStats; - - if (!strchr(exe_path, '/')) - { - ERROR( "The exe path is not fully specified\n" ); - return NULL; - } - - if (-1 == stat(exe_path, &theStats)) - { - ERROR( "The file does not exist\n" ); - return NULL; - } - if (!CorUnix::RealPathHelper(exe_path, real_path)) + if (!GetEntrypointExecutableAbsolutePath(real_path)) { - ERROR("realpath() failed!\n"); + ERROR( "Cannot get current exe path\n" ); return NULL; } diff --git a/src/coreclr/src/pal/src/libunwind/README.md b/src/coreclr/src/pal/src/libunwind/README.md index 100b93820ade..e845566c06f9 100644 --- a/src/coreclr/src/pal/src/libunwind/README.md +++ b/src/coreclr/src/pal/src/libunwind/README.md @@ -1 +1 @@ -README \ No newline at end of file +README diff --git a/src/coreclr/src/pal/src/locale/utf8.cpp b/src/coreclr/src/pal/src/locale/utf8.cpp index b8a6f7ad5d6f..63bfc8661715 100644 --- a/src/coreclr/src/pal/src/locale/utf8.cpp +++ b/src/coreclr/src/pal/src/locale/utf8.cpp @@ -10,7 +10,7 @@ Module Name: unicode/utf8.c Abstract: - Functions to encode and decode UTF-8 strings. This is a port of the C# version from mscorlib. + Functions to encode and decode UTF-8 strings. This is a port of the C# version from Utf8Encoding.cs. Revision History: diff --git a/src/coreclr/src/pal/src/map/map.cpp b/src/coreclr/src/pal/src/map/map.cpp index 75405c7407c8..0cbaef5521d0 100644 --- a/src/coreclr/src/pal/src/map/map.cpp +++ b/src/coreclr/src/pal/src/map/map.cpp @@ -2137,7 +2137,7 @@ MAPRecordMapping( return palError; } -static size_t OffsetWithinPage(off_t addr) +size_t OffsetWithinPage(off_t addr) { return addr & (GetVirtualPageSize() - 1); } diff --git a/src/coreclr/src/pal/src/misc/jitsupport.cpp b/src/coreclr/src/pal/src/misc/jitsupport.cpp index 68af6aa1bd6d..89e90c23812a 100644 --- a/src/coreclr/src/pal/src/misc/jitsupport.cpp +++ b/src/coreclr/src/pal/src/misc/jitsupport.cpp @@ -13,6 +13,135 @@ SET_DEFAULT_DEBUG_CHANNEL(MISC); #include #endif +#if defined(HOST_ARM64) && defined(__linux__) +struct CpuCapability +{ + const char* name; + unsigned long hwCapFlag; +}; + +static const CpuCapability CpuCapabilities[] = { + //{ "fp", HWCAP_FP }, +#ifdef HWCAP_ASIMD + { "asimd", HWCAP_ASIMD }, +#endif + //{ "evtstrm", HWCAP_EVTSTRM }, +#ifdef HWCAP_AES + { "aes", HWCAP_AES }, +#endif + //{ "pmull", HWCAP_PMULL }, +#ifdef HWCAP_SHA1 + { "sha1", HWCAP_SHA1 }, +#endif +#ifdef HWCAP_SHA2 + { "sha2", HWCAP_SHA2 }, +#endif +#ifdef HWCAP_CRC32 + { "crc32", HWCAP_CRC32 }, +#endif +#ifdef HWCAP_ATOMICS + { "atomics", HWCAP_ATOMICS }, +#endif + //{ "fphp", HWCAP_FPHP }, + //{ "asimdhp", HWCAP_ASIMDHP }, + //{ "cpuid", HWCAP_CPUID }, +#ifdef HWCAP_ASIMDRDM + { "asimdrdm", HWCAP_ASIMDRDM }, +#endif + //{ "jscvt", HWCAP_JSCVT }, + //{ "fcma", HWCAP_FCMA }, + //{ "lrcpc", HWCAP_LRCPC }, + //{ "dcpop", HWCAP_DCPOP }, + //{ "sha3", HWCAP_SHA3 }, + //{ "sm3", HWCAP_SM3 }, + //{ "sm4", HWCAP_SM4 }, +#ifdef HWCAP_ASIMDDP + { "asimddp", HWCAP_ASIMDDP }, +#endif + //{ "sha512", HWCAP_SHA512 }, + //{ "sve", HWCAP_SVE }, + //{ "asimdfhm", HWCAP_ASIMDFHM }, + //{ "dit", HWCAP_DIT }, + //{ "uscat", HWCAP_USCAT }, + //{ "ilrcpc", HWCAP_ILRCPC }, + //{ "flagm", HWCAP_FLAGM }, + //{ "ssbs", HWCAP_SSBS }, + //{ "sb", HWCAP_SB }, + //{ "paca", HWCAP_PACA }, + //{ "pacg", HWCAP_PACG }, + + // Ensure the array is never empty + { "", 0 } +}; + +// Returns the HWCAP_* flag corresponding to the given capability name. +// If the capability name is not recognized or unused at present, zero is returned. +static unsigned long LookupCpuCapabilityFlag(const char* start, size_t length) +{ + for (int i = 0; i < _countof(CpuCapabilities); i++) + { + const char* capabilityName = CpuCapabilities[i].name; + if ((length == strlen(capabilityName)) && (memcmp(start, capabilityName, length) == 0)) + { + return CpuCapabilities[i].hwCapFlag; + } + } + return 0; +} + +// Reads the first Features entry from /proc/cpuinfo (assuming other entries are essentially +// identical) and translates it into a set of HWCAP_* flags. +static unsigned long GetCpuCapabilityFlagsFromCpuInfo() +{ + unsigned long capabilityFlags = 0; + FILE* cpuInfoFile = fopen("/proc/cpuinfo", "r"); + + if (cpuInfoFile != NULL) + { + char* line = nullptr; + size_t lineLen = 0; + + while (getline(&line, &lineLen, cpuInfoFile) != -1) + { + char* p = line; + while (isspace(*p)) p++; + + if (memcmp(p, "Features", 8) != 0) + continue; + + // Skip "Features" and look for ':' + p += 8; + + while (isspace(*p)) p++; + if (*p != ':') + continue; + + // Skip ':' and parse the list + p++; + + while (true) + { + while (isspace(*p)) p++; + if (*p == 0) + break; + + char* start = p++; + while ((*p != 0) && !isspace(*p)) p++; + + capabilityFlags |= LookupCpuCapabilityFlag(start, p - start); + } + + break; + } + + free(line); + fclose(cpuInfoFile); + } + + return capabilityFlags; +} +#endif // defined(HOST_ARM64) && defined(__linux__) + PALIMPORT VOID PALAPI @@ -26,6 +155,13 @@ PAL_GetJitCpuCapabilityFlags(CORJIT_FLAGS *flags) #if HAVE_AUXV_HWCAP_H unsigned long hwCap = getauxval(AT_HWCAP); +#if defined(__linux__) + // getauxval(AT_HWCAP) returns zero on WSL1 (https://github.com/microsoft/WSL/issues/3682), + // fall back to reading capabilities from /proc/cpuinfo. + if (hwCap == 0) + hwCap = GetCpuCapabilityFlagsFromCpuInfo(); +#endif + // HWCAP_* flags are introduced by ARM into the Linux kernel as new extensions are published. // For a given kernel, some of these flags may not be present yet. // Use ifdef for each to allow for compilation with any vintage kernel. diff --git a/src/coreclr/src/pal/src/thread/thread.cpp b/src/coreclr/src/pal/src/thread/thread.cpp index 6efe93492bc3..89d805c35404 100644 --- a/src/coreclr/src/pal/src/thread/thread.cpp +++ b/src/coreclr/src/pal/src/thread/thread.cpp @@ -740,41 +740,6 @@ CorUnix::InternalCreateThread( storedErrno = errno; #endif // PTHREAD_CREATE_MODIFIES_ERRNO -#if HAVE_PTHREAD_ATTR_SETAFFINITY_NP && HAVE_SCHED_GETAFFINITY - { - // Threads inherit their parent's affinity mask on Linux. This is not desired, so we reset - // the current thread's affinity mask to the mask of the current process. - cpu_set_t cpuSet; - CPU_ZERO(&cpuSet); - - int st = sched_getaffinity(gPID, sizeof(cpu_set_t), &cpuSet); - if (st != 0) - { - ASSERT("sched_getaffinity failed!\n"); - // the sched_getaffinity should never fail for getting affinity of the current process - palError = ERROR_INTERNAL_ERROR; - goto EXIT; - } - - st = pthread_attr_setaffinity_np(&pthreadAttr, sizeof(cpu_set_t), &cpuSet); - if (st != 0) - { - if (st == ENOMEM) - { - palError = ERROR_NOT_ENOUGH_MEMORY; - } - else - { - ASSERT("pthread_attr_setaffinity_np failed!\n"); - // The pthread_attr_setaffinity_np should never fail except of OOM when - // passed the mask extracted using sched_getaffinity. - palError = ERROR_INTERNAL_ERROR; - } - goto EXIT; - } - } -#endif // HAVE_PTHREAD_GETAFFINITY_NP && HAVE_SCHED_GETAFFINITY - iError = pthread_create(&pthread, &pthreadAttr, CPalThread::ThreadEntry, pNewThread); #if PTHREAD_CREATE_MODIFIES_ERRNO @@ -1754,6 +1719,10 @@ CPalThread::ThreadEntry( PTHREAD_START_ROUTINE pfnStartRoutine; LPVOID pvPar; DWORD retValue; +#if HAVE_SCHED_GETAFFINITY && HAVE_SCHED_SETAFFINITY + cpu_set_t cpuSet; + int st; +#endif pThread = reinterpret_cast(pvParam); @@ -1763,6 +1732,42 @@ CPalThread::ThreadEntry( goto fail; } +#if HAVE_SCHED_GETAFFINITY && HAVE_SCHED_SETAFFINITY + // Threads inherit their parent's affinity mask on Linux. This is not desired, so we reset + // the current thread's affinity mask to the mask of the current process. + // + // Typically, we would use pthread_attr_setaffinity_np() and have pthread_create() create the thread with the specified + // affinity. At least one implementation of pthread_create() following a pthread_attr_setaffinity_np() calls + // sched_setaffinity(, ...), which is not allowed under Snap's default strict confinement without manually + // connecting the process-control plug. To work around that, have the thread set the affinity after it starts. + // sched_setaffinity(, ...) is also currently not allowed, only sched_setaffinity(0, ...). + // pthread_setaffinity_np(pthread_self(), ...) seems to call sched_setaffinity(, ...) in at least one + // implementation, and does not work. Use sched_setaffinity(0, ...) instead. See the following for more information: + // - https://github.com/dotnet/runtime/pull/38795 + // - https://github.com/dotnet/runtime/issues/1634 + // - https://forum.snapcraft.io/t/requesting-autoconnect-for-interfaces-in-pigmeat-process-control-home/17987/13 + + CPU_ZERO(&cpuSet); + + st = sched_getaffinity(gPID, sizeof(cpu_set_t), &cpuSet); + if (st != 0) + { + ASSERT("sched_getaffinity failed!\n"); + // The sched_getaffinity should never fail for getting affinity of the current process + palError = ERROR_INTERNAL_ERROR; + goto fail; + } + + st = sched_setaffinity(0, sizeof(cpu_set_t), &cpuSet); + if (st != 0) + { + ASSERT("sched_setaffinity failed!\n"); + // The sched_setaffinity should never fail when passed the mask extracted using sched_getaffinity + palError = ERROR_INTERNAL_ERROR; + goto fail; + } +#endif // HAVE_SCHED_GETAFFINITY && HAVE_SCHED_SETAFFINITY + #if !HAVE_MACH_EXCEPTIONS if (!pThread->EnsureSignalAlternateStack()) { @@ -2946,18 +2951,31 @@ BOOL PALAPI PAL_SetCurrentThreadAffinity(WORD procNo) { -#if HAVE_PTHREAD_GETAFFINITY_NP +#if HAVE_SCHED_SETAFFINITY || HAVE_PTHREAD_SETAFFINITY_NP cpu_set_t cpuSet; CPU_ZERO(&cpuSet); - CPU_SET(procNo, &cpuSet); + + // Snap's default strict confinement does not allow sched_setaffinity(, ...) without manually connecting the + // process-control plug. sched_setaffinity(, ...) is also currently not allowed, only + // sched_setaffinity(0, ...). pthread_setaffinity_np(pthread_self(), ...) seems to call + // sched_setaffinity(, ...) in at least one implementation, and does not work. To work around those + // issues, use sched_setaffinity(0, ...) if available and only otherwise fall back to pthread_setaffinity_np(). See the + // following for more information: + // - https://github.com/dotnet/runtime/pull/38795 + // - https://github.com/dotnet/runtime/issues/1634 + // - https://forum.snapcraft.io/t/requesting-autoconnect-for-interfaces-in-pigmeat-process-control-home/17987/13 +#if HAVE_SCHED_SETAFFINITY + int st = sched_setaffinity(0, sizeof(cpu_set_t), &cpuSet); +#else int st = pthread_setaffinity_np(pthread_self(), sizeof(cpu_set_t), &cpuSet); +#endif return st == 0; -#else // HAVE_PTHREAD_GETAFFINITY_NP +#else // !(HAVE_SCHED_SETAFFINITY || HAVE_PTHREAD_SETAFFINITY_NP) // There is no API to manage thread affinity, so let's ignore the request return FALSE; -#endif // HAVE_PTHREAD_GETAFFINITY_NP +#endif // HAVE_SCHED_SETAFFINITY || HAVE_PTHREAD_SETAFFINITY_NP } /*++ diff --git a/src/coreclr/src/pal/tests/palsuite/DisabledTests.txt b/src/coreclr/src/pal/tests/palsuite/DisabledTests.txt index babd443380fe..0f7094c4a8e8 100644 --- a/src/coreclr/src/pal/tests/palsuite/DisabledTests.txt +++ b/src/coreclr/src/pal/tests/palsuite/DisabledTests.txt @@ -48,4 +48,4 @@ This test case should be run manually. Requires user input. filemapping_memmgt\MapViewOfFile\test1 ======================================= -Refer this github issue https://github.com/dotnet/coreclr/issues/5176 +Refer this github issue https://github.com/dotnet/runtime/issues/5924 diff --git a/src/coreclr/src/pal/tests/palsuite/common/ResultBuffer.cpp b/src/coreclr/src/pal/tests/palsuite/common/ResultBuffer.cpp index 4119399b3618..36924c3c30a2 100644 --- a/src/coreclr/src/pal/tests/palsuite/common/ResultBuffer.cpp +++ b/src/coreclr/src/pal/tests/palsuite/common/ResultBuffer.cpp @@ -3,11 +3,6 @@ //#include "stdafx.h" #include "resultbuffer.h" -// -//#using -// -//using namespace System; - ResultBuffer:: ResultBuffer(int ThreadCount, int ThreadLogSize) { diff --git a/src/coreclr/src/pal/tests/palsuite/composite/synchronization/nativecriticalsection/resultbuffer.cpp b/src/coreclr/src/pal/tests/palsuite/composite/synchronization/nativecriticalsection/resultbuffer.cpp index e99e31ff31d3..32e8f92b3e08 100644 --- a/src/coreclr/src/pal/tests/palsuite/composite/synchronization/nativecriticalsection/resultbuffer.cpp +++ b/src/coreclr/src/pal/tests/palsuite/composite/synchronization/nativecriticalsection/resultbuffer.cpp @@ -3,11 +3,6 @@ //#include "stdafx.h" #include "resultbuffer.h" -// -//#using -// -//using namespace System; - ResultBuffer:: ResultBuffer(int ThreadCount, int ThreadLogSize) { diff --git a/src/coreclr/src/pal/tests/palsuite/composite/synchronization/nativecs_interlocked/resultbuffer.cpp b/src/coreclr/src/pal/tests/palsuite/composite/synchronization/nativecs_interlocked/resultbuffer.cpp index e99e31ff31d3..32e8f92b3e08 100644 --- a/src/coreclr/src/pal/tests/palsuite/composite/synchronization/nativecs_interlocked/resultbuffer.cpp +++ b/src/coreclr/src/pal/tests/palsuite/composite/synchronization/nativecs_interlocked/resultbuffer.cpp @@ -3,11 +3,6 @@ //#include "stdafx.h" #include "resultbuffer.h" -// -//#using -// -//using namespace System; - ResultBuffer:: ResultBuffer(int ThreadCount, int ThreadLogSize) { diff --git a/src/coreclr/src/pal/tests/palsuite/eventprovider/CMakeLists.txt b/src/coreclr/src/pal/tests/palsuite/eventprovider/CMakeLists.txt index 000ee2d2fb0d..845fae656be4 100644 --- a/src/coreclr/src/pal/tests/palsuite/eventprovider/CMakeLists.txt +++ b/src/coreclr/src/pal/tests/palsuite/eventprovider/CMakeLists.txt @@ -5,10 +5,10 @@ set(SOURCES set(EVENT_MANIFEST ${VM_DIR}/ClrEtwAll.man) set(TEST_GENERATOR ${CLR_DIR}/src/scripts/genEventingTests.py) -include(FindPython) +include(FindPythonInterp) add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/clralltestevents.cpp - COMMAND ${Python_EXECUTABLE} ${TEST_GENERATOR} --testdir "${CMAKE_CURRENT_BINARY_DIR}" --man "${EVENT_MANIFEST}" + COMMAND ${PYTHON_EXECUTABLE} ${TEST_GENERATOR} --testdir "${CMAKE_CURRENT_BINARY_DIR}" --man "${EVENT_MANIFEST}" DEPENDS ${EVENT_MANIFEST} ${TEST_GENERATOR} COMMENT "Updating clralltestevents.cpp" ) diff --git a/src/coreclr/src/pal/tests/palsuite/threading/NamedMutex/test1/namedmutex.cpp b/src/coreclr/src/pal/tests/palsuite/threading/NamedMutex/test1/namedmutex.cpp index 89e9e85a86c4..c2793f732ef5 100644 --- a/src/coreclr/src/pal/tests/palsuite/threading/NamedMutex/test1/namedmutex.cpp +++ b/src/coreclr/src/pal/tests/palsuite/threading/NamedMutex/test1/namedmutex.cpp @@ -841,7 +841,7 @@ DWORD AbandonTests_Child_AbruptExit(void *arg = nullptr) // This child process acquires the mutex lock, creates another child process (to ensure that file locks are not inherited), and // abandons the mutex abruptly. The second child process detects the abandonment and abandons the mutex again for the parent to -// detect. Issue: https://github.com/dotnet/coreclr/issues/21455 +// detect. Issue: https://github.com/dotnet/runtime/issues/11636 DWORD AbandonTests_Child_FileLocksNotInherited_Parent_AbruptExit(void *arg = nullptr) { const char *testName = "AbandonTests"; diff --git a/src/coreclr/src/palrt/unicode.cpp b/src/coreclr/src/palrt/unicode.cpp index 1d8372642ec0..9def01dc63de 100644 --- a/src/coreclr/src/palrt/unicode.cpp +++ b/src/coreclr/src/palrt/unicode.cpp @@ -5,7 +5,7 @@ #include "common.h" // This is a simplified implementation of IsTextUnicode. -// https://github.com/dotnet/coreclr/issues/2307 +// https://github.com/dotnet/runtime/issues/4778 BOOL IsTextUnicode(CONST VOID* lpv, int iSize, LPINT lpiResult) { *lpiResult = 0; diff --git a/src/coreclr/src/tools/Common/Compiler/TypeExtensions.cs b/src/coreclr/src/tools/Common/Compiler/TypeExtensions.cs index 89fd08f3dbab..dd54a1387bdf 100644 --- a/src/coreclr/src/tools/Common/Compiler/TypeExtensions.cs +++ b/src/coreclr/src/tools/Common/Compiler/TypeExtensions.cs @@ -97,6 +97,19 @@ public static bool IsArrayAddressMethod(this MethodDesc method) return arrayMethod != null && arrayMethod.Kind == ArrayMethodKind.Address; } + + /// + /// Returns true if '' is one of the special methods on multidimensional array types (set, get, address). + /// + public static bool IsArrayMethod(this MethodDesc method) + { + var arrayMethod = method as ArrayMethod; + return arrayMethod != null && (arrayMethod.Kind == ArrayMethodKind.Address || + arrayMethod.Kind == ArrayMethodKind.Get || + arrayMethod.Kind == ArrayMethodKind.Set || + arrayMethod.Kind == ArrayMethodKind.Ctor); + } + /// /// Gets a value indicating whether this type has any generic virtual methods. /// diff --git a/src/coreclr/src/tools/Common/Compiler/VectorFieldLayoutAlgorithm.cs b/src/coreclr/src/tools/Common/Compiler/VectorFieldLayoutAlgorithm.cs index dd71f2772966..e9cb2ab70827 100644 --- a/src/coreclr/src/tools/Common/Compiler/VectorFieldLayoutAlgorithm.cs +++ b/src/coreclr/src/tools/Common/Compiler/VectorFieldLayoutAlgorithm.cs @@ -13,9 +13,11 @@ namespace ILCompiler public class VectorFieldLayoutAlgorithm : FieldLayoutAlgorithm { private readonly FieldLayoutAlgorithm _fallbackAlgorithm; + private readonly bool _vectorAbiIsStable; - public VectorFieldLayoutAlgorithm(FieldLayoutAlgorithm fallbackAlgorithm) + public VectorFieldLayoutAlgorithm(FieldLayoutAlgorithm fallbackAlgorithm, bool vectorAbiIsStable) { + _vectorAbiIsStable = vectorAbiIsStable; _fallbackAlgorithm = fallbackAlgorithm; } @@ -73,6 +75,7 @@ public override ComputedInstanceFieldLayout ComputeInstanceLayout(DefType defTyp FieldAlignment = alignment, FieldSize = layoutFromMetadata.FieldSize, Offsets = layoutFromMetadata.Offsets, + LayoutAbiStable = _vectorAbiIsStable }; } diff --git a/src/coreclr/src/tools/Common/Internal/NativeFormat/NativeFormat.cs b/src/coreclr/src/tools/Common/Internal/NativeFormat/NativeFormat.cs index e92c689d658b..1ebbdac34811 100644 --- a/src/coreclr/src/tools/Common/Internal/NativeFormat/NativeFormat.cs +++ b/src/coreclr/src/tools/Common/Internal/NativeFormat/NativeFormat.cs @@ -84,8 +84,8 @@ enum FixupSignatureKind : uint MethodLdToken = 0x08, AllocateObject = 0x09, DefaultConstructor = 0x0a, - TlsIndex = 0x0b, - TlsOffset = 0x0c, + ThreadStaticIndex = 0x0b, + // unused = 0x0c, Method = 0x0d, IsInst = 0x0e, CastClass = 0x0f, diff --git a/src/coreclr/src/tools/Common/Internal/Runtime/CorConstants.cs b/src/coreclr/src/tools/Common/Internal/Runtime/CorConstants.cs index 0b9f35726a13..57a3353a805d 100644 --- a/src/coreclr/src/tools/Common/Internal/Runtime/CorConstants.cs +++ b/src/coreclr/src/tools/Common/Internal/Runtime/CorConstants.cs @@ -81,7 +81,7 @@ public enum CorElementType : byte // where the encoding/decoding takes place. ELEMENT_TYPE_NATIVE_VALUETYPE_ZAPSIG = 0x3d, - ELEMENT_TYPE_CANON_ZAPSIG = 0x3e, // zapsig encoding for [mscorlib]System.__Canon + ELEMENT_TYPE_CANON_ZAPSIG = 0x3e, // zapsig encoding for System.__Canon ELEMENT_TYPE_MODULE_ZAPSIG = 0x3f, // zapsig encoding for external module id# ELEMENT_TYPE_HANDLE = 64, diff --git a/src/coreclr/src/tools/Common/Internal/Runtime/ReadyToRunConstants.cs b/src/coreclr/src/tools/Common/Internal/Runtime/ReadyToRunConstants.cs index ce6de5de37ca..6d92e7b5d122 100644 --- a/src/coreclr/src/tools/Common/Internal/Runtime/ReadyToRunConstants.cs +++ b/src/coreclr/src/tools/Common/Internal/Runtime/ReadyToRunConstants.cs @@ -314,6 +314,17 @@ public enum ReadyToRunHelper TypeHandleToRuntimeTypeHandle, } + // Enum used for HFA type recognition. + // Supported across architectures, so that it can be used in altjits and cross-compilation. + public enum ReadyToRunHFAElemType + { + None = 0, + Float32 = 1, + Float64 = 2, + Vector64 = 3, + Vector128 = 4, + } + public static class ReadyToRunRuntimeConstants { public const int READYTORUN_PInvokeTransitionFrameSizeInPointerUnits = 11; diff --git a/src/coreclr/src/tools/Common/JitInterface/CorInfoImpl.Intrinsics.cs b/src/coreclr/src/tools/Common/JitInterface/CorInfoImpl.Intrinsics.cs index 3f6c089744b2..46cac9f2e8d4 100644 --- a/src/coreclr/src/tools/Common/JitInterface/CorInfoImpl.Intrinsics.cs +++ b/src/coreclr/src/tools/Common/JitInterface/CorInfoImpl.Intrinsics.cs @@ -76,55 +76,6 @@ static IntrinsicHashtable InitializeIntrinsicHashtable() { IntrinsicHashtable table = new IntrinsicHashtable(); - table.Add(CorInfoIntrinsics.CORINFO_INTRINSIC_Sin, "Sin", "System", "Math"); - table.Add(CorInfoIntrinsics.CORINFO_INTRINSIC_Sin, "Sin", "System", "MathF"); - table.Add(CorInfoIntrinsics.CORINFO_INTRINSIC_Cos, "Cos", "System", "Math"); - table.Add(CorInfoIntrinsics.CORINFO_INTRINSIC_Cos, "Cos", "System", "MathF"); - table.Add(CorInfoIntrinsics.CORINFO_INTRINSIC_Cbrt, "Cbrt", "System", "Math"); - table.Add(CorInfoIntrinsics.CORINFO_INTRINSIC_Cbrt, "Cbrt", "System", "MathF"); - table.Add(CorInfoIntrinsics.CORINFO_INTRINSIC_Sqrt, "Sqrt", "System", "Math"); - table.Add(CorInfoIntrinsics.CORINFO_INTRINSIC_Sqrt, "Sqrt", "System", "MathF"); - table.Add(CorInfoIntrinsics.CORINFO_INTRINSIC_Abs, "Abs", "System", "Math"); - // No System.MathF entry for CORINFO_INTRTINSIC_Abs as System.Math exposes and handles both float and double - table.Add(CorInfoIntrinsics.CORINFO_INTRINSIC_Round, "Round", "System", "Math"); - table.Add(CorInfoIntrinsics.CORINFO_INTRINSIC_Round, "Round", "System", "MathF"); - table.Add(CorInfoIntrinsics.CORINFO_INTRINSIC_Cosh, "Cosh", "System", "Math"); - table.Add(CorInfoIntrinsics.CORINFO_INTRINSIC_Cosh, "Cosh", "System", "MathF"); - table.Add(CorInfoIntrinsics.CORINFO_INTRINSIC_Sinh, "Sinh", "System", "Math"); - table.Add(CorInfoIntrinsics.CORINFO_INTRINSIC_Sinh, "Sinh", "System", "MathF"); - table.Add(CorInfoIntrinsics.CORINFO_INTRINSIC_Tan, "Tan", "System", "Math"); - table.Add(CorInfoIntrinsics.CORINFO_INTRINSIC_Tan, "Tan", "System", "MathF"); - table.Add(CorInfoIntrinsics.CORINFO_INTRINSIC_Tanh, "Tanh", "System", "Math"); - table.Add(CorInfoIntrinsics.CORINFO_INTRINSIC_Tanh, "Tanh", "System", "MathF"); - table.Add(CorInfoIntrinsics.CORINFO_INTRINSIC_Asin, "Asin", "System", "Math"); - table.Add(CorInfoIntrinsics.CORINFO_INTRINSIC_Asin, "Asin", "System", "MathF"); - table.Add(CorInfoIntrinsics.CORINFO_INTRINSIC_Asinh, "Asinh", "System", "Math"); - table.Add(CorInfoIntrinsics.CORINFO_INTRINSIC_Asinh, "Asinh", "System", "MathF"); - table.Add(CorInfoIntrinsics.CORINFO_INTRINSIC_Acos, "Acos", "System", "Math"); - table.Add(CorInfoIntrinsics.CORINFO_INTRINSIC_Acos, "Acos", "System", "MathF"); - table.Add(CorInfoIntrinsics.CORINFO_INTRINSIC_Acosh, "Acosh", "System", "Math"); - table.Add(CorInfoIntrinsics.CORINFO_INTRINSIC_Acosh, "Acosh", "System", "MathF"); - table.Add(CorInfoIntrinsics.CORINFO_INTRINSIC_Atan, "Atan", "System", "Math"); - table.Add(CorInfoIntrinsics.CORINFO_INTRINSIC_Atan, "Atan", "System", "MathF"); - table.Add(CorInfoIntrinsics.CORINFO_INTRINSIC_Atan2, "Atan2", "System", "Math"); - table.Add(CorInfoIntrinsics.CORINFO_INTRINSIC_Atan2, "Atan2", "System", "MathF"); - table.Add(CorInfoIntrinsics.CORINFO_INTRINSIC_Atanh, "Atanh", "System", "Math"); - table.Add(CorInfoIntrinsics.CORINFO_INTRINSIC_Atanh, "Atanh", "System", "MathF"); - table.Add(CorInfoIntrinsics.CORINFO_INTRINSIC_Log10, "Log10", "System", "Math"); - table.Add(CorInfoIntrinsics.CORINFO_INTRINSIC_Log10, "Log10", "System", "MathF"); - table.Add(CorInfoIntrinsics.CORINFO_INTRINSIC_Pow, "Pow", "System", "Math"); - table.Add(CorInfoIntrinsics.CORINFO_INTRINSIC_Pow, "Pow", "System", "MathF"); - table.Add(CorInfoIntrinsics.CORINFO_INTRINSIC_Exp, "Exp", "System", "Math"); - table.Add(CorInfoIntrinsics.CORINFO_INTRINSIC_Exp, "Exp", "System", "MathF"); -#if !READYTORUN - // These are normally handled via the SSE4.1 instructions ROUNDSS/ROUNDSD. - // However, we don't know the ISAs the target machine supports so we should - // fallback to the method call implementation instead. - table.Add(CorInfoIntrinsics.CORINFO_INTRINSIC_Ceiling, "Ceiling", "System", "Math"); - table.Add(CorInfoIntrinsics.CORINFO_INTRINSIC_Ceiling, "Ceiling", "System", "MathF"); - table.Add(CorInfoIntrinsics.CORINFO_INTRINSIC_Floor, "Floor", "System", "Math"); - table.Add(CorInfoIntrinsics.CORINFO_INTRINSIC_Floor, "Floor", "System", "MathF"); -#endif // table.Add(CorInfoIntrinsics.CORINFO_INTRINSIC_GetChar, null, null, null); // unused // table.Add(CorInfoIntrinsics.CORINFO_INTRINSIC_Array_GetDimLength, "GetLength", "System", "Array"); // not handled table.Add(CorInfoIntrinsics.CORINFO_INTRINSIC_Array_Get, "Get", null, null); @@ -163,7 +114,7 @@ static IntrinsicHashtable InitializeIntrinsicHashtable() table.Add(CorInfoIntrinsics.CORINFO_INTRINSIC_GetRawHandle, "AllocatorOf", "System", "Activator"); // If this assert fails, make sure to add the new intrinsics to the table above and update the expected count below. - Debug.Assert((int)CorInfoIntrinsics.CORINFO_INTRINSIC_Count == 56, "Please update intrinsic hash table"); + Debug.Assert((int)CorInfoIntrinsics.CORINFO_INTRINSIC_Count == 34, "Please update intrinsic hash table"); return table; } @@ -201,14 +152,6 @@ private CorInfoIntrinsics getIntrinsicID(MethodDesc method, byte* pMustExpand) CorInfoIntrinsics id = entry.Id; switch (id) { - case CorInfoIntrinsics.CORINFO_INTRINSIC_Abs: - { - // RyuJIT handles floating point overloads only - var returnTypeCategory = method.Signature.ReturnType.Category; - if (returnTypeCategory != TypeFlags.Double && returnTypeCategory != TypeFlags.Single) - return CorInfoIntrinsics.CORINFO_INTRINSIC_Illegal; - } - break; case CorInfoIntrinsics.CORINFO_INTRINSIC_Array_Get: case CorInfoIntrinsics.CORINFO_INTRINSIC_Array_Address: case CorInfoIntrinsics.CORINFO_INTRINSIC_Array_Set: diff --git a/src/coreclr/src/tools/Common/JitInterface/CorInfoImpl.cs b/src/coreclr/src/tools/Common/JitInterface/CorInfoImpl.cs index da5d691a39cf..366ed36751bf 100644 --- a/src/coreclr/src/tools/Common/JitInterface/CorInfoImpl.cs +++ b/src/coreclr/src/tools/Common/JitInterface/CorInfoImpl.cs @@ -508,6 +508,7 @@ private bool TryGetUnmanagedCallingConventionFromModOpt(MethodSignature signatur if (!signature.HasEmbeddedSignatureData || signature.GetEmbeddedSignatureData() == null) return false; + bool found = false; foreach (EmbeddedSignatureData data in signature.GetEmbeddedSignatureData()) { if (data.kind != EmbeddedSignatureDataKind.OptionalCustomModifier) @@ -524,25 +525,28 @@ private bool TryGetUnmanagedCallingConventionFromModOpt(MethodSignature signatur if (defType.Namespace != "System.Runtime.CompilerServices") continue; - // Take the first recognized calling convention in metadata. - switch (defType.Name) + // Look for a recognized calling convention in metadata. + CorInfoCallConv? callConvLocal = defType.Name switch { - case "CallConvCdecl": - callConv = CorInfoCallConv.CORINFO_CALLCONV_C; - return true; - case "CallConvStdcall": - callConv = CorInfoCallConv.CORINFO_CALLCONV_STDCALL; - return true; - case "CallConvFastcall": - callConv = CorInfoCallConv.CORINFO_CALLCONV_FASTCALL; - return true; - case "CallConvThiscall": - callConv = CorInfoCallConv.CORINFO_CALLCONV_THISCALL; - return true; + "CallConvCdecl" => CorInfoCallConv.CORINFO_CALLCONV_C, + "CallConvStdcall" => CorInfoCallConv.CORINFO_CALLCONV_STDCALL, + "CallConvFastcall" => CorInfoCallConv.CORINFO_CALLCONV_FASTCALL, + "CallConvThiscall" => CorInfoCallConv.CORINFO_CALLCONV_THISCALL, + _ => null + }; + + if (callConvLocal.HasValue) + { + // Error if there are multiple recognized calling conventions + if (found) + ThrowHelper.ThrowInvalidProgramException(ExceptionStringID.InvalidProgramMultipleCallConv, MethodBeingCompiled); + + callConv = callConvLocal.Value; + found = true; } } - return false; + return found; } private void Get_CORINFO_SIG_INFO(MethodSignature signature, CORINFO_SIG_INFO* sig) @@ -811,7 +815,7 @@ private uint getMethodAttribsInternal(MethodDesc method) // do a dynamic check instead. if ( !HardwareIntrinsicHelpers.IsIsSupportedMethod(method) - || !_compilation.IsHardwareInstrinsicWithRuntimeDeterminedSupport(method)) + || !_compilation.IsHardwareIntrinsicWithRuntimeDeterminedSupport(method)) #endif { result |= CorInfoFlag.CORINFO_FLG_JIT_INTRINSIC; @@ -1243,6 +1247,7 @@ private void findSig(CORINFO_MODULE_STRUCT_* module, uint sigTOK, CORINFO_CONTEX { var methodIL = (MethodIL)HandleToObject((IntPtr)module); var methodSig = (MethodSignature)methodIL.GetObject((int)sigTOK); + Get_CORINFO_SIG_INFO(methodSig, sig); if (sig->callConv == CorInfoCallConv.CORINFO_CALLCONV_UNMANAGED) @@ -1256,6 +1261,8 @@ private void findSig(CORINFO_MODULE_STRUCT_* module, uint sigTOK, CORINFO_CONTEX { sig->flags |= CorInfoSigInfoFlags.CORINFO_SIGFLAG_FAT_CALL; } +#else + VerifyMethodSignatureIsStable(methodSig); #endif } @@ -2912,6 +2919,14 @@ private void recordRelocation(void* location, void* target, ushort fRelocType, u default: // Reloc points to something outside of the generated blocks var targetObject = HandleToObject((IntPtr)target); + +#if READYTORUN + if (targetObject is RequiresRuntimeJitIfUsedSymbol requiresRuntimeSymbol) + { + throw new RequiresRuntimeJitException(requiresRuntimeSymbol.Message); + } +#endif + relocTarget = (ISymbolNode)targetObject; break; } diff --git a/src/coreclr/src/tools/Common/JitInterface/CorInfoTypes.cs b/src/coreclr/src/tools/Common/JitInterface/CorInfoTypes.cs index c1a9f87533ad..8f4b1f60b92a 100644 --- a/src/coreclr/src/tools/Common/JitInterface/CorInfoTypes.cs +++ b/src/coreclr/src/tools/Common/JitInterface/CorInfoTypes.cs @@ -416,28 +416,6 @@ public enum CorInfoOptions public enum CorInfoIntrinsics { - CORINFO_INTRINSIC_Sin, - CORINFO_INTRINSIC_Cos, - CORINFO_INTRINSIC_Cbrt, - CORINFO_INTRINSIC_Sqrt, - CORINFO_INTRINSIC_Abs, - CORINFO_INTRINSIC_Round, - CORINFO_INTRINSIC_Cosh, - CORINFO_INTRINSIC_Sinh, - CORINFO_INTRINSIC_Tan, - CORINFO_INTRINSIC_Tanh, - CORINFO_INTRINSIC_Asin, - CORINFO_INTRINSIC_Asinh, - CORINFO_INTRINSIC_Acos, - CORINFO_INTRINSIC_Acosh, - CORINFO_INTRINSIC_Atan, - CORINFO_INTRINSIC_Atan2, - CORINFO_INTRINSIC_Atanh, - CORINFO_INTRINSIC_Log10, - CORINFO_INTRINSIC_Pow, - CORINFO_INTRINSIC_Exp, - CORINFO_INTRINSIC_Ceiling, - CORINFO_INTRINSIC_Floor, CORINFO_INTRINSIC_GetChar, // fetch character out of string CORINFO_INTRINSIC_Array_GetDimLength, // Get number of elements in a given dimension of an array CORINFO_INTRINSIC_Array_Get, // Get the value of an element in an array diff --git a/src/coreclr/src/tools/Common/TypeSystem/Common/CastingHelper.cs b/src/coreclr/src/tools/Common/TypeSystem/Common/CastingHelper.cs index d10a2dc0fec2..e10782ef0ad9 100644 --- a/src/coreclr/src/tools/Common/TypeSystem/Common/CastingHelper.cs +++ b/src/coreclr/src/tools/Common/TypeSystem/Common/CastingHelper.cs @@ -166,6 +166,17 @@ private static bool CanCastToInternal(this TypeDesc thisType, TypeDesc otherType case TypeFlags.SzArray: return ((ArrayType)thisType).CanCastArrayTo(otherType, protect); + case TypeFlags.ByRef: + case TypeFlags.Pointer: + if (otherType.Category == thisType.Category) + { + return ((ParameterizedType)thisType).CanCastParamTo(((ParameterizedType)otherType).ParameterType, protect); + } + return false; + + case TypeFlags.FunctionPointer: + return false; + default: Debug.Assert(thisType.IsDefType); return thisType.CanCastToClassOrInterface(otherType, protect); diff --git a/src/coreclr/src/tools/Common/TypeSystem/Common/DefType.FieldLayout.cs b/src/coreclr/src/tools/Common/TypeSystem/Common/DefType.FieldLayout.cs index 6c9c2b22e5cf..66b5b1a45290 100644 --- a/src/coreclr/src/tools/Common/TypeSystem/Common/DefType.FieldLayout.cs +++ b/src/coreclr/src/tools/Common/TypeSystem/Common/DefType.FieldLayout.cs @@ -48,6 +48,11 @@ private class FieldLayoutFlags /// True if information about the shape of value type has been computed. /// public const int ComputedValueTypeShapeCharacteristics = 0x40; + + /// + /// True if the layout of the type is not stable for use in the ABI + /// + public const int ComputedInstanceLayoutAbiUnstable = 0x80; } private class StaticBlockInfo @@ -156,6 +161,21 @@ public LayoutInt InstanceByteAlignment } } + /// + /// The type has stable Abi layout + /// + public bool LayoutAbiStable + { + get + { + if (!_fieldLayoutFlags.HasFlags(FieldLayoutFlags.ComputedInstanceTypeLayout)) + { + ComputeInstanceLayout(InstanceLayoutKind.TypeOnly); + } + return !_fieldLayoutFlags.HasFlags(FieldLayoutFlags.ComputedInstanceLayoutAbiUnstable); + } + } + /// /// How many bytes must be allocated to represent the non GC visible static fields of this type. /// @@ -335,6 +355,10 @@ public void ComputeInstanceLayout(InstanceLayoutKind layoutKind) _instanceFieldAlignment = computedLayout.FieldAlignment; _instanceByteCountUnaligned = computedLayout.ByteCountUnaligned; _instanceByteAlignment = computedLayout.ByteCountAlignment; + if (!computedLayout.LayoutAbiStable) + { + _fieldLayoutFlags.AddFlags(FieldLayoutFlags.ComputedInstanceLayoutAbiUnstable); + } if (computedLayout.Offsets != null) { diff --git a/src/coreclr/src/tools/Common/TypeSystem/Common/ExceptionStringID.cs b/src/coreclr/src/tools/Common/TypeSystem/Common/ExceptionStringID.cs index 00286eabe3a9..d9d3de41634d 100644 --- a/src/coreclr/src/tools/Common/TypeSystem/Common/ExceptionStringID.cs +++ b/src/coreclr/src/tools/Common/TypeSystem/Common/ExceptionStringID.cs @@ -36,6 +36,7 @@ public enum ExceptionStringID InvalidProgramNonStaticMethod, InvalidProgramGenericMethod, InvalidProgramNonBlittableTypes, + InvalidProgramMultipleCallConv, // BadImageFormatException BadImageFormatGeneric, diff --git a/src/coreclr/src/tools/Common/TypeSystem/Common/FieldLayoutAlgorithm.cs b/src/coreclr/src/tools/Common/TypeSystem/Common/FieldLayoutAlgorithm.cs index cff8364566b2..47c86385c15b 100644 --- a/src/coreclr/src/tools/Common/TypeSystem/Common/FieldLayoutAlgorithm.cs +++ b/src/coreclr/src/tools/Common/TypeSystem/Common/FieldLayoutAlgorithm.cs @@ -76,6 +76,7 @@ public struct ComputedInstanceFieldLayout public LayoutInt FieldAlignment; public LayoutInt ByteCountUnaligned; public LayoutInt ByteCountAlignment; + public bool LayoutAbiStable; // Is the layout stable such that it can safely be used in function calling conventions /// /// If Offsets is non-null, then all field based layout is complete. diff --git a/src/coreclr/src/tools/Common/TypeSystem/Common/MetadataFieldLayoutAlgorithm.cs b/src/coreclr/src/tools/Common/TypeSystem/Common/MetadataFieldLayoutAlgorithm.cs index c0b317e201ee..fb142c4a3e10 100644 --- a/src/coreclr/src/tools/Common/TypeSystem/Common/MetadataFieldLayoutAlgorithm.cs +++ b/src/coreclr/src/tools/Common/TypeSystem/Common/MetadataFieldLayoutAlgorithm.cs @@ -112,6 +112,7 @@ out instanceByteSizeAndAlignment ByteCountAlignment = instanceByteSizeAndAlignment.Alignment, FieldAlignment = sizeAndAlignment.Alignment, FieldSize = sizeAndAlignment.Size, + LayoutAbiStable = true }; if (numInstanceFields > 0) @@ -227,7 +228,7 @@ public override ComputedStaticFieldLayout ComputeStaticFieldLayout(DefType defTy } ref StaticsBlock block = ref GetStaticsBlockForField(ref result, field); - SizeAndAlignment sizeAndAlignment = ComputeFieldSizeAndAlignment(fieldType, context.Target.DefaultPackingSize); + SizeAndAlignment sizeAndAlignment = ComputeFieldSizeAndAlignment(fieldType, context.Target.DefaultPackingSize, out bool _); block.Size = LayoutInt.AlignUp(block.Size, sizeAndAlignment.Alignment, context.Target); result.Offsets[index] = new FieldAndOffset(field, block.Size); @@ -318,11 +319,14 @@ protected static ComputedInstanceFieldLayout ComputeExplicitFieldLayout(Metadata var offsets = new FieldAndOffset[numInstanceFields]; int fieldOrdinal = 0; + bool layoutAbiStable = true; foreach (var fieldAndOffset in layoutMetadata.Offsets) { TypeDesc fieldType = fieldAndOffset.Field.FieldType; - var fieldSizeAndAlignment = ComputeFieldSizeAndAlignment(fieldType, packingSize); + var fieldSizeAndAlignment = ComputeFieldSizeAndAlignment(fieldType, packingSize, out bool fieldLayoutAbiStable); + if (!fieldLayoutAbiStable) + layoutAbiStable = false; largestAlignmentRequired = LayoutInt.Max(fieldSizeAndAlignment.Alignment, largestAlignmentRequired); @@ -367,6 +371,8 @@ protected static ComputedInstanceFieldLayout ComputeExplicitFieldLayout(Metadata computedLayout.ByteCountUnaligned = instanceByteSizeAndAlignment.Size; computedLayout.ByteCountAlignment = instanceByteSizeAndAlignment.Alignment; computedLayout.Offsets = offsets; + computedLayout.LayoutAbiStable = layoutAbiStable; + ExplicitLayoutValidator.Validate(type, computedLayout); @@ -385,13 +391,16 @@ protected static ComputedInstanceFieldLayout ComputeSequentialFieldLayout(Metada LayoutInt largestAlignmentRequirement = LayoutInt.One; int fieldOrdinal = 0; int packingSize = ComputePackingSize(type, layoutMetadata); + bool layoutAbiStable = true; foreach (var field in type.GetFields()) { if (field.IsStatic) continue; - var fieldSizeAndAlignment = ComputeFieldSizeAndAlignment(field.FieldType, packingSize); + var fieldSizeAndAlignment = ComputeFieldSizeAndAlignment(field.FieldType, packingSize, out bool fieldLayoutAbiStable); + if (!fieldLayoutAbiStable) + layoutAbiStable = false; largestAlignmentRequirement = LayoutInt.Max(fieldSizeAndAlignment.Alignment, largestAlignmentRequirement); @@ -411,6 +420,7 @@ protected static ComputedInstanceFieldLayout ComputeSequentialFieldLayout(Metada computedLayout.ByteCountUnaligned = instanceByteSizeAndAlignment.Size; computedLayout.ByteCountAlignment = instanceByteSizeAndAlignment.Alignment; computedLayout.Offsets = offsets; + computedLayout.LayoutAbiStable = layoutAbiStable; return computedLayout; } @@ -460,7 +470,7 @@ protected ComputedInstanceFieldLayout ComputeAutoFieldLayout(MetadataType type, { Debug.Assert(fieldType.IsPrimitive || fieldType.IsPointer || fieldType.IsFunctionPointer || fieldType.IsEnum); - var fieldSizeAndAlignment = ComputeFieldSizeAndAlignment(fieldType, packingSize); + var fieldSizeAndAlignment = ComputeFieldSizeAndAlignment(fieldType, packingSize, out bool _); instanceNonGCPointerFieldsCount[CalculateLog2(fieldSizeAndAlignment.Size.AsInt)]++; } } @@ -485,6 +495,7 @@ protected ComputedInstanceFieldLayout ComputeAutoFieldLayout(MetadataType type, instanceGCPointerFieldsCount = 0; instanceValueClassFieldCount = 0; LayoutInt largestAlignmentRequired = LayoutInt.One; + bool layoutAbiStable = true; // Iterate over all fields and do the following // - Add instance fields to the appropriate array (while maintaining the enumerated order) @@ -496,7 +507,10 @@ protected ComputedInstanceFieldLayout ComputeAutoFieldLayout(MetadataType type, TypeDesc fieldType = field.FieldType; - var fieldSizeAndAlignment = ComputeFieldSizeAndAlignment(fieldType, packingSize); + var fieldSizeAndAlignment = ComputeFieldSizeAndAlignment(fieldType, packingSize, out bool fieldLayoutAbiStable); + if (!fieldLayoutAbiStable) + layoutAbiStable = false; + largestAlignmentRequired = LayoutInt.Max(fieldSizeAndAlignment.Alignment, largestAlignmentRequired); if (IsByValueClass(fieldType)) @@ -624,7 +638,9 @@ protected ComputedInstanceFieldLayout ComputeAutoFieldLayout(MetadataType type, // If the field has an indeterminate alignment, align the cumulative field offset to the indeterminate value // Otherwise, align the cumulative field offset to the PointerSize // This avoids issues with Universal Generic Field layouts whose fields may have Indeterminate sizes or alignments - var fieldSizeAndAlignment = ComputeFieldSizeAndAlignment(instanceValueClassFieldsArr[i].FieldType, packingSize); + var fieldSizeAndAlignment = ComputeFieldSizeAndAlignment(instanceValueClassFieldsArr[i].FieldType, packingSize, out bool fieldLayoutAbiStable); + if (!fieldLayoutAbiStable) + layoutAbiStable = false; if (fieldSizeAndAlignment.Alignment.IsIndeterminate) { @@ -674,13 +690,14 @@ protected ComputedInstanceFieldLayout ComputeAutoFieldLayout(MetadataType type, computedLayout.ByteCountUnaligned = instanceByteSizeAndAlignment.Size; computedLayout.ByteCountAlignment = instanceByteSizeAndAlignment.Alignment; computedLayout.Offsets = offsets; + computedLayout.LayoutAbiStable = layoutAbiStable; return computedLayout; } private static void PlaceInstanceField(FieldDesc field, int packingSize, FieldAndOffset[] offsets, ref LayoutInt instanceFieldPos, ref int fieldOrdinal) { - var fieldSizeAndAlignment = ComputeFieldSizeAndAlignment(field.FieldType, packingSize); + var fieldSizeAndAlignment = ComputeFieldSizeAndAlignment(field.FieldType, packingSize, out bool _); instanceFieldPos = LayoutInt.AlignUp(instanceFieldPos, fieldSizeAndAlignment.Alignment, field.Context.Target); offsets[fieldOrdinal] = new FieldAndOffset(field, instanceFieldPos); @@ -732,9 +749,10 @@ private static LayoutInt ComputeBytesUsedInParentType(DefType type) return cumulativeInstanceFieldPos; } - private static SizeAndAlignment ComputeFieldSizeAndAlignment(TypeDesc fieldType, int packingSize) + private static SizeAndAlignment ComputeFieldSizeAndAlignment(TypeDesc fieldType, int packingSize, out bool layoutAbiStable) { SizeAndAlignment result; + layoutAbiStable = true; if (fieldType.IsDefType) { @@ -743,6 +761,7 @@ private static SizeAndAlignment ComputeFieldSizeAndAlignment(TypeDesc fieldType, DefType metadataType = (DefType)fieldType; result.Size = metadataType.InstanceFieldSize; result.Alignment = metadataType.InstanceFieldAlignment; + layoutAbiStable = metadataType.LayoutAbiStable; } else { diff --git a/src/coreclr/src/tools/Common/TypeSystem/Common/MethodDesc.cs b/src/coreclr/src/tools/Common/TypeSystem/Common/MethodDesc.cs index 0545857397ba..1da52dccd3bd 100644 --- a/src/coreclr/src/tools/Common/TypeSystem/Common/MethodDesc.cs +++ b/src/coreclr/src/tools/Common/TypeSystem/Common/MethodDesc.cs @@ -51,6 +51,14 @@ public sealed partial class MethodSignature : TypeSystemEntity // Value of for any custom modifiers on the return type public const string IndexOfCustomModifiersOnReturnType = "0.1.1.1"; + // Value of for any custom modifiers on + // SomeStruct when SomeStruct *, or SomeStruct & is the type of a parameter or return type + // Parameter index 0 represents the return type, and indices 1-n represent the parameters to the signature + public static string GetIndexOfCustomModifierOnPointedAtTypeByParameterIndex(int parameterIndex) + { + return $"0.1.1.2.{(parameterIndex + 1).ToStringInvariant()}.1"; + } + public MethodSignature(MethodSignatureFlags flags, int genericParameterCount, TypeDesc returnType, TypeDesc[] parameters, EmbeddedSignatureData[] embeddedSignatureData = null) { _flags = flags; diff --git a/src/coreclr/src/tools/Common/TypeSystem/Common/Properties/Resources.resx b/src/coreclr/src/tools/Common/TypeSystem/Common/Properties/Resources.resx index 368145929e05..b0efd67b4adc 100644 --- a/src/coreclr/src/tools/Common/TypeSystem/Common/Properties/Resources.resx +++ b/src/coreclr/src/tools/Common/TypeSystem/Common/Properties/Resources.resx @@ -174,7 +174,10 @@ UnmanagedCallersOnly attribute specified on method with non-blittable parameters '{0}' + + Multiple unmanaged calling conventions are specified. Only a single calling convention is supported. + The format of a DLL or executable being loaded is invalid - \ No newline at end of file + diff --git a/src/coreclr/src/tools/Common/TypeSystem/Common/ThrowHelper.cs b/src/coreclr/src/tools/Common/TypeSystem/Common/ThrowHelper.cs index 9ab806e77ffe..094ade10faca 100644 --- a/src/coreclr/src/tools/Common/TypeSystem/Common/ThrowHelper.cs +++ b/src/coreclr/src/tools/Common/TypeSystem/Common/ThrowHelper.cs @@ -41,6 +41,12 @@ public static void ThrowInvalidProgramException() throw new TypeSystemException.InvalidProgramException(); } + [System.Diagnostics.DebuggerHidden] + public static void ThrowInvalidProgramException(ExceptionStringID id) + { + throw new TypeSystemException.InvalidProgramException(id); + } + [System.Diagnostics.DebuggerHidden] public static void ThrowInvalidProgramException(ExceptionStringID id, MethodDesc method) { diff --git a/src/coreclr/src/tools/Common/TypeSystem/Common/TypeSystemException.cs b/src/coreclr/src/tools/Common/TypeSystem/Common/TypeSystemException.cs index 931136950f1a..17836ca309bd 100644 --- a/src/coreclr/src/tools/Common/TypeSystem/Common/TypeSystemException.cs +++ b/src/coreclr/src/tools/Common/TypeSystem/Common/TypeSystemException.cs @@ -138,6 +138,11 @@ internal InvalidProgramException(ExceptionStringID id, string method) { } + internal InvalidProgramException(ExceptionStringID id) + : base(id) + { + } + internal InvalidProgramException() : base(ExceptionStringID.InvalidProgramDefault) { diff --git a/src/coreclr/src/tools/Common/TypeSystem/Common/UniversalCanonLayoutAlgorithm.cs b/src/coreclr/src/tools/Common/TypeSystem/Common/UniversalCanonLayoutAlgorithm.cs index f32daba077b2..9b58069b5758 100644 --- a/src/coreclr/src/tools/Common/TypeSystem/Common/UniversalCanonLayoutAlgorithm.cs +++ b/src/coreclr/src/tools/Common/TypeSystem/Common/UniversalCanonLayoutAlgorithm.cs @@ -29,7 +29,8 @@ public override ComputedInstanceFieldLayout ComputeInstanceLayout(DefType type, FieldAlignment = LayoutInt.Indeterminate, ByteCountUnaligned = LayoutInt.Indeterminate, ByteCountAlignment = LayoutInt.Indeterminate, - Offsets = Array.Empty() + Offsets = Array.Empty(), + LayoutAbiStable = true }; } diff --git a/src/coreclr/src/tools/Common/TypeSystem/IL/Stubs/VolatileIntrinsics.cs b/src/coreclr/src/tools/Common/TypeSystem/IL/Stubs/VolatileIntrinsics.cs index f9ab4c106055..35dca8644ac9 100644 --- a/src/coreclr/src/tools/Common/TypeSystem/IL/Stubs/VolatileIntrinsics.cs +++ b/src/coreclr/src/tools/Common/TypeSystem/IL/Stubs/VolatileIntrinsics.cs @@ -63,7 +63,7 @@ public static MethodIL EmitIL(MethodDesc method) // // Ordinary volatile loads and stores only guarantee atomicity for pointer-sized (or smaller) data. // So, on 32-bit platforms we must use Interlocked operations instead for the 64-bit types. - // The implementation in mscorlib already does this, so we will only substitute a new + // The implementation in CoreLib already does this, so we will only substitute a new // IL body if we're running on a 64-bit platform. // case TypeFlags.Int64 when method.Context.Target.PointerSize == 8: diff --git a/src/coreclr/src/tools/Common/TypeSystem/Interop/IL/MarshalHelpers.cs b/src/coreclr/src/tools/Common/TypeSystem/Interop/IL/MarshalHelpers.cs index 956e4fc8e274..7b05a9b4ed90 100644 --- a/src/coreclr/src/tools/Common/TypeSystem/Interop/IL/MarshalHelpers.cs +++ b/src/coreclr/src/tools/Common/TypeSystem/Interop/IL/MarshalHelpers.cs @@ -166,13 +166,44 @@ internal static TypeDesc GetNativeTypeFromMarshallerKind(TypeDesc type, } } + private static bool HasCopyConstructorCustomModifier(int? parameterIndex, + EmbeddedSignatureData[] customModifierData) + { + if (!parameterIndex.HasValue || customModifierData == null) + return false; + + string customModifierIndex = MethodSignature.GetIndexOfCustomModifierOnPointedAtTypeByParameterIndex(parameterIndex.Value); + foreach (var customModifier in customModifierData) + { + if (customModifier.kind != EmbeddedSignatureDataKind.RequiredCustomModifier) + continue; + + if (customModifier.index != customModifierIndex) + continue; + + var customModifierType = customModifier.type as DefType; + if (customModifierType == null) + continue; + + if ((customModifierType.Namespace == "System.Runtime.CompilerServices" && customModifierType.Name == "IsCopyConstructed") || + (customModifierType.Namespace == "Microsoft.VisualC" && customModifierType.Name == "NeedsCopyConstructorModifier")) + { + return true; + } + } + + return false; + } + internal static MarshallerKind GetMarshallerKind( - TypeDesc type, - MarshalAsDescriptor marshalAs, - bool isReturn, - bool isAnsi, - MarshallerType marshallerType, - out MarshallerKind elementMarshallerKind) + TypeDesc type, + int? parameterIndex, + EmbeddedSignatureData[] customModifierData, + MarshalAsDescriptor marshalAs, + bool isReturn, + bool isAnsi, + MarshallerType marshallerType, + out MarshallerKind elementMarshallerKind) { elementMarshallerKind = MarshallerKind.Invalid; @@ -183,6 +214,12 @@ internal static MarshallerKind GetMarshallerKind( type = type.GetParameterType(); + if (!type.IsPrimitive && type.IsValueType && marshallerType != MarshallerType.Field + && HasCopyConstructorCustomModifier(parameterIndex, customModifierData)) + { + return MarshallerKind.BlittableValueClassWithCopyCtor; + } + // Compat note: CLR allows ref returning blittable structs for IJW if (isReturn) return MarshallerKind.Invalid; @@ -444,7 +481,15 @@ internal static MarshallerKind GetMarshallerKind( else if (type.IsPointer) { if (nativeType == NativeTypeKind.Default) + { + var pointedAtType = type.GetParameterType(); + if (!pointedAtType.IsPrimitive && !type.IsEnum && marshallerType != MarshallerType.Field + && HasCopyConstructorCustomModifier(parameterIndex, customModifierData)) + { + return MarshallerKind.BlittableValueClassWithCopyCtor; + } return MarshallerKind.BlittableValue; + } else return MarshallerKind.Invalid; } diff --git a/src/coreclr/src/tools/Common/TypeSystem/Interop/IL/MarshalUtils.cs b/src/coreclr/src/tools/Common/TypeSystem/Interop/IL/MarshalUtils.cs index 65f0956943fb..f713f9cb0c29 100644 --- a/src/coreclr/src/tools/Common/TypeSystem/Interop/IL/MarshalUtils.cs +++ b/src/coreclr/src/tools/Common/TypeSystem/Interop/IL/MarshalUtils.cs @@ -44,6 +44,8 @@ public static bool IsBlittableType(TypeDesc type) MarshallerKind marshallerKind = MarshalHelpers.GetMarshallerKind( field.FieldType, + parameterIndex : null, + customModifierData: null, field.GetMarshalAsDescriptor(), isReturn: false, isAnsi: mdType.PInvokeStringFormat == PInvokeStringFormat.AnsiClass, diff --git a/src/coreclr/src/tools/Common/TypeSystem/Interop/IL/Marshaller.cs b/src/coreclr/src/tools/Common/TypeSystem/Interop/IL/Marshaller.cs index 31c869463025..ecd7b0df4680 100644 --- a/src/coreclr/src/tools/Common/TypeSystem/Interop/IL/Marshaller.cs +++ b/src/coreclr/src/tools/Common/TypeSystem/Interop/IL/Marshaller.cs @@ -50,6 +50,7 @@ enum MarshallerKind AsAnyA, AsAnyW, ComInterface, + BlittableValueClassWithCopyCtor, Invalid } public enum MarshalDirection @@ -271,6 +272,8 @@ protected Marshaller() /// type of the parameter to marshal /// The created Marshaller public static Marshaller CreateMarshaller(TypeDesc parameterType, + int? parameterIndex, + EmbeddedSignatureData[] customModifierData, MarshallerType marshallerType, MarshalAsDescriptor marshalAs, MarshalDirection direction, @@ -286,6 +289,8 @@ public static Marshaller CreateMarshaller(TypeDesc parameterType, { MarshallerKind elementMarshallerKind; MarshallerKind marshallerKind = MarshalHelpers.GetMarshallerKind(parameterType, + parameterIndex, + customModifierData, marshalAs, isReturn, flags.CharSet == CharSet.Ansi, diff --git a/src/coreclr/src/tools/Common/TypeSystem/RuntimeDetermined/RuntimeDeterminedFieldLayoutAlgorithm.cs b/src/coreclr/src/tools/Common/TypeSystem/RuntimeDetermined/RuntimeDeterminedFieldLayoutAlgorithm.cs index 65ee522f5273..59857683b10c 100644 --- a/src/coreclr/src/tools/Common/TypeSystem/RuntimeDetermined/RuntimeDeterminedFieldLayoutAlgorithm.cs +++ b/src/coreclr/src/tools/Common/TypeSystem/RuntimeDetermined/RuntimeDeterminedFieldLayoutAlgorithm.cs @@ -30,7 +30,8 @@ public override ComputedInstanceFieldLayout ComputeInstanceLayout(DefType defTyp ByteCountAlignment = canonicalType.InstanceByteAlignment, FieldAlignment = canonicalType.InstanceFieldAlignment, FieldSize = canonicalType.InstanceFieldSize, - Offsets = Array.Empty() + Offsets = Array.Empty(), + LayoutAbiStable = canonicalType.LayoutAbiStable }; return result; diff --git a/src/coreclr/src/tools/GCLogParser/parse-hb-log.csproj b/src/coreclr/src/tools/GCLogParser/parse-hb-log.csproj index 73ea1b718287..5bff6087f510 100644 --- a/src/coreclr/src/tools/GCLogParser/parse-hb-log.csproj +++ b/src/coreclr/src/tools/GCLogParser/parse-hb-log.csproj @@ -9,7 +9,6 @@ parse_hb_log parse-hb-log v4.7.2 - 512 true true @@ -20,8 +19,6 @@ false artifacts\Debug\ DEBUG;TRACE - prompt - 4 AnyCPU @@ -29,8 +26,6 @@ true artifacts\Release\ TRACE - prompt - 4 true @@ -38,7 +33,6 @@ DEBUG;TRACE full x64 - prompt MinimumRecommendedRules.ruleset true @@ -48,7 +42,6 @@ true pdbonly x64 - prompt MinimumRecommendedRules.ruleset true diff --git a/src/coreclr/src/tools/ILVerification/README.md b/src/coreclr/src/tools/ILVerification/README.md index 2c3277a23a10..48ed8724f664 100644 --- a/src/coreclr/src/tools/ILVerification/README.md +++ b/src/coreclr/src/tools/ILVerification/README.md @@ -1,3 +1,3 @@ # ILVerification -The ILVerification library is part of the ILVerify project. See details under [src/coreclr/src/tools/ILVerify](../ILVerify). \ No newline at end of file +The ILVerification library is part of the ILVerify project. See details under [src/coreclr/src/tools/ILVerify](../ILVerify). diff --git a/src/coreclr/src/tools/ILVerify/README.md b/src/coreclr/src/tools/ILVerify/README.md index a3c0001a6456..7f61e432569b 100644 --- a/src/coreclr/src/tools/ILVerify/README.md +++ b/src/coreclr/src/tools/ILVerify/README.md @@ -92,17 +92,7 @@ The methods are automatically fed into appropriate XUnit theories based on the n You can run the tests either in Visual Studio (in Test Explorer) or with the ```dotnet test ``` command from the command line. ## How to contribute -All ILVerify issues are labeled with [area-ILVerification](https://github.com/search?utf8=%E2%9C%93&q=label%3Aarea-ILVerification&type=). - -ILVerify basically runs through the IL commands in an assembly and does all the verification steps that are specified in ECMA-335. - -Currently every IL command falls into one of these categories: - - - Not implemented: the implementation is completely missing. The easiest way is to pick one of them (look for NotImplentedException in the code) and implement it. First you should 100% understand the spec. (see [ECMA-335](https://www.ecma-international.org/publications/standards/Ecma-335.htm)), then try to port an existing implementation (sources below). - - Partially implemented: These are typically methods with TODOs in it. As the first phase we want to make sure that for every command the stack is correctly maintained, therefore for some commands we either have no verification or we have only a not complete verification. You can also pick one of these and finish it. - - Implemented: find and fix bugs ;) . - -Another option to contribute is to write tests (see Tests section). +All ILVerify issues are labeled with [area-ILVerification](https://github.com/search?utf8=%E2%9C%93&q=label%3Aarea-ILVerification&type=). You can also look and fix TODOs in the source code. Useful sources: - [PEVerify source code](https://github.com/lewischeng-ms/sscli/blob/master/clr/src/jit64/newverify.cpp) diff --git a/src/coreclr/src/tools/aot/ILCompiler.ReadyToRun/CodeGen/ReadyToRunObjectWriter.cs b/src/coreclr/src/tools/aot/ILCompiler.ReadyToRun/CodeGen/ReadyToRunObjectWriter.cs index 20d6dac16b8a..68239af41359 100644 --- a/src/coreclr/src/tools/aot/ILCompiler.ReadyToRun/CodeGen/ReadyToRunObjectWriter.cs +++ b/src/coreclr/src/tools/aot/ILCompiler.ReadyToRun/CodeGen/ReadyToRunObjectWriter.cs @@ -6,6 +6,7 @@ using System.Diagnostics; using System.IO; using System.Linq; +using System.Reflection.Metadata; using System.Reflection.PortableExecutable; using ILCompiler.DependencyAnalysis.ReadyToRun; @@ -102,8 +103,9 @@ public void EmitPortableExecutable() stopwatch.Start(); PEHeaderBuilder headerBuilder; - int timeDateStamp; + int? timeDateStamp; ISymbolNode r2rHeaderExportSymbol; + Func, BlobContentId> peIdProvider = null; if (_nodeFactory.CompilationModuleGroup.IsCompositeBuildMode && _componentModule == null) { @@ -112,8 +114,8 @@ public void EmitPortableExecutable() dllCharacteristics: default(DllCharacteristics), Subsystem.Unknown, _nodeFactory.Target); - // TODO: generate a non-zero timestamp: https://github.com/dotnet/runtime/issues/32507 - timeDateStamp = 0; + peIdProvider = new Func, BlobContentId>(content => BlobContentId.FromHash(CryptographicHashProvider.ComputeSourceHash(content))); + timeDateStamp = null; r2rHeaderExportSymbol = _nodeFactory.Header; } else @@ -135,7 +137,8 @@ public void EmitPortableExecutable() r2rHeaderExportSymbol, Path.GetFileName(_objectFilePath), getRuntimeFunctionsTable, - _customPESectionAlignment); + _customPESectionAlignment, + peIdProvider); NativeDebugDirectoryEntryNode nativeDebugDirectoryEntryNode = null; ISymbolDefinitionNode firstImportThunk = null; diff --git a/src/coreclr/src/tools/aot/ILCompiler.ReadyToRun/Compiler/CryptographicHashProvider.cs b/src/coreclr/src/tools/aot/ILCompiler.ReadyToRun/Compiler/CryptographicHashProvider.cs new file mode 100644 index 000000000000..dafd0c1ae7bb --- /dev/null +++ b/src/coreclr/src/tools/aot/ILCompiler.ReadyToRun/Compiler/CryptographicHashProvider.cs @@ -0,0 +1,252 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#nullable enable + +using System; +using System.Collections.Generic; +using System.Collections.Immutable; +using System.IO; +using System.Linq; +using System.Reflection; +using System.Reflection.Metadata; +using System.Security.Cryptography; + +namespace ILCompiler +{ + /// + /// Specifies a hash algorithms used for hashing source files. + /// + public enum SourceHashAlgorithm + { + /// + /// No algorithm specified. + /// + None = 0, + + /// + /// Secure Hash Algorithm 1. + /// + Sha1 = 1, + + /// + /// Secure Hash Algorithm 2 with a hash size of 256 bits. + /// + Sha256 = 2, + } + + internal static class SourceHashAlgorithmUtils + { + public const SourceHashAlgorithm DefaultContentHashAlgorithm = SourceHashAlgorithm.Sha256; + } + + internal abstract class CryptographicHashProvider + { + private ImmutableArray _lazySHA1Hash; + private ImmutableArray _lazySHA256Hash; + private ImmutableArray _lazySHA384Hash; + private ImmutableArray _lazySHA512Hash; + private ImmutableArray _lazyMD5Hash; + + internal abstract ImmutableArray ComputeHash(HashAlgorithm algorithm); + + internal ImmutableArray GetHash(AssemblyHashAlgorithm algorithmId) + { + using (HashAlgorithm? algorithm = TryGetAlgorithm(algorithmId)) + { + // ERR_CryptoHashFailed has already been reported: + if (algorithm == null) + { + return ImmutableArray.Create(); + } + + switch (algorithmId) + { + case AssemblyHashAlgorithm.None: + case AssemblyHashAlgorithm.Sha1: + return GetHash(ref _lazySHA1Hash, algorithm); + + case AssemblyHashAlgorithm.Sha256: + return GetHash(ref _lazySHA256Hash, algorithm); + + case AssemblyHashAlgorithm.Sha384: + return GetHash(ref _lazySHA384Hash, algorithm); + + case AssemblyHashAlgorithm.Sha512: + return GetHash(ref _lazySHA512Hash, algorithm); + + case AssemblyHashAlgorithm.MD5: + return GetHash(ref _lazyMD5Hash, algorithm); + + default: + throw new ArgumentException("algorithmId"); + } + } + } + + internal static int GetHashSize(SourceHashAlgorithm algorithmId) + { + switch (algorithmId) + { + case SourceHashAlgorithm.Sha1: + return 160 / 8; + + case SourceHashAlgorithm.Sha256: + return 256 / 8; + + default: + throw new ArgumentException("algorithmId"); + } + } + + internal static HashAlgorithm? TryGetAlgorithm(SourceHashAlgorithm algorithmId) + { + switch (algorithmId) + { + case SourceHashAlgorithm.Sha1: + return SHA1.Create(); + + case SourceHashAlgorithm.Sha256: + return SHA256.Create(); + + default: + return null; + } + } + + internal static HashAlgorithmName GetAlgorithmName(SourceHashAlgorithm algorithmId) + { + switch (algorithmId) + { + case SourceHashAlgorithm.Sha1: + return HashAlgorithmName.SHA1; + + case SourceHashAlgorithm.Sha256: + return HashAlgorithmName.SHA256; + + default: + throw new ArgumentException("algorithmId"); + } + } + + internal static HashAlgorithm? TryGetAlgorithm(AssemblyHashAlgorithm algorithmId) + { + switch (algorithmId) + { + case AssemblyHashAlgorithm.None: + case AssemblyHashAlgorithm.Sha1: + return SHA1.Create(); + + case AssemblyHashAlgorithm.Sha256: + return SHA256.Create(); + + case AssemblyHashAlgorithm.Sha384: + return SHA384.Create(); + + case AssemblyHashAlgorithm.Sha512: + return SHA512.Create(); + + case AssemblyHashAlgorithm.MD5: + return MD5.Create(); + + default: + return null; + } + } + + internal static bool IsSupportedAlgorithm(AssemblyHashAlgorithm algorithmId) + { + switch (algorithmId) + { + case AssemblyHashAlgorithm.None: + case AssemblyHashAlgorithm.Sha1: + case AssemblyHashAlgorithm.Sha256: + case AssemblyHashAlgorithm.Sha384: + case AssemblyHashAlgorithm.Sha512: + case AssemblyHashAlgorithm.MD5: + return true; + + default: + return false; + } + } + + private ImmutableArray GetHash(ref ImmutableArray lazyHash, HashAlgorithm algorithm) + { + if (lazyHash.IsDefault) + { + ImmutableInterlocked.InterlockedCompareExchange(ref lazyHash, ComputeHash(algorithm), default(ImmutableArray)); + } + + return lazyHash; + } + + internal const int Sha1HashSize = 20; + + internal static ImmutableArray ComputeSha1(Stream stream) + { + if (stream != null) + { + stream.Seek(0, SeekOrigin.Begin); + using (var hashProvider = SHA1.Create()) + { + return ImmutableArray.Create(hashProvider.ComputeHash(stream)); + } + } + + return ImmutableArray.Empty; + } + + internal static ImmutableArray ComputeSha1(ImmutableArray bytes) + { + return ComputeSha1(bytes.ToArray()); + } + + internal static ImmutableArray ComputeSha1(byte[] bytes) + { + using (var hashProvider = SHA1.Create()) + { + return ImmutableArray.Create(hashProvider.ComputeHash(bytes)); + } + } + + internal static ImmutableArray ComputeHash(HashAlgorithmName algorithmName, IEnumerable bytes) + { + using (var incrementalHash = IncrementalHash.CreateHash(algorithmName)) + { + foreach (var blob in bytes) + { + incrementalHash.AppendData(blob.GetBytes()); + } + return ImmutableArray.Create(incrementalHash.GetHashAndReset()); + } + } + + internal static ImmutableArray ComputeHash(HashAlgorithmName algorithmName, IEnumerable> bytes) + { + using (var incrementalHash = IncrementalHash.CreateHash(algorithmName)) + { + foreach (var segment in bytes) + { + incrementalHash.AppendData(segment); + } + return ImmutableArray.Create(incrementalHash.GetHashAndReset()); + } + } + + internal static ImmutableArray ComputeSourceHash(ImmutableArray bytes, SourceHashAlgorithm hashAlgorithm = SourceHashAlgorithmUtils.DefaultContentHashAlgorithm) + { + var algorithmName = GetAlgorithmName(hashAlgorithm); + using (var incrementalHash = IncrementalHash.CreateHash(algorithmName)) + { + incrementalHash.AppendData(bytes.ToArray()); + return ImmutableArray.Create(incrementalHash.GetHashAndReset()); + } + } + + internal static ImmutableArray ComputeSourceHash(IEnumerable bytes, SourceHashAlgorithm hashAlgorithm = SourceHashAlgorithmUtils.DefaultContentHashAlgorithm) + { + return ComputeHash(GetAlgorithmName(hashAlgorithm), bytes); + } + } +} diff --git a/src/coreclr/src/tools/aot/ILCompiler.ReadyToRun/Compiler/DependencyAnalysis/ReadyToRun/ArgIterator.cs b/src/coreclr/src/tools/aot/ILCompiler.ReadyToRun/Compiler/DependencyAnalysis/ReadyToRun/ArgIterator.cs index 9c56ff338e8a..6fcf05aaa184 100644 --- a/src/coreclr/src/tools/aot/ILCompiler.ReadyToRun/Compiler/DependencyAnalysis/ReadyToRun/ArgIterator.cs +++ b/src/coreclr/src/tools/aot/ILCompiler.ReadyToRun/Compiler/DependencyAnalysis/ReadyToRun/ArgIterator.cs @@ -308,7 +308,7 @@ private int GetStructGenRegDestinationAddress() // fieldBytes - size of the structure internal void ReportPointersFromStructInRegisters(TypeDesc type, int delta, CORCOMPILE_GCREFMAP_TOKENS[] frame) { - // SPAN-TODO: GC reporting - https://github.com/dotnet/coreclr/issues/8517 + // SPAN-TODO: GC reporting - https://github.com/dotnet/runtime/issues/7103 Debug.Assert(IsStructPassedInRegs()); diff --git a/src/coreclr/src/tools/aot/ILCompiler.ReadyToRun/Compiler/DependencyAnalysis/ReadyToRun/DebugDirectoryNode.cs b/src/coreclr/src/tools/aot/ILCompiler.ReadyToRun/Compiler/DependencyAnalysis/ReadyToRun/DebugDirectoryNode.cs index 08093ecd3671..175025449039 100644 --- a/src/coreclr/src/tools/aot/ILCompiler.ReadyToRun/Compiler/DependencyAnalysis/ReadyToRun/DebugDirectoryNode.cs +++ b/src/coreclr/src/tools/aot/ILCompiler.ReadyToRun/Compiler/DependencyAnalysis/ReadyToRun/DebugDirectoryNode.cs @@ -26,10 +26,12 @@ public class DebugDirectoryNode : ObjectNode, ISymbolDefinitionNode private EcmaModule _module; private NativeDebugDirectoryEntryNode _nativeEntry; + private bool _insertDeterministicEntry; public DebugDirectoryNode(EcmaModule sourceModule, string outputFileName) { _module = sourceModule; + _insertDeterministicEntry = sourceModule == null; // Mark module as deterministic if generating composite image string pdbNameRoot = Path.GetFileNameWithoutExtension(outputFileName); if (sourceModule != null) { @@ -50,7 +52,7 @@ public DebugDirectoryNode(EcmaModule sourceModule, string outputFileName) public int Offset => 0; - public int Size => (GetNumDebugDirectoryEntriesInModule() + 1) * ImageDebugDirectorySize; + public int Size => (GetNumDebugDirectoryEntriesInModule() + 1 + (_insertDeterministicEntry ? 1 : 0)) * ImageDebugDirectorySize; public void AppendMangledName(NameMangler nameMangler, Utf8StringBuilder sb) { @@ -112,8 +114,21 @@ public override ObjectData GetData(NodeFactory factory, bool relocsOnly = false) builder.EmitReloc(entry, RelocType.IMAGE_REL_FILE_ABSOLUTE); } + // If generating a composite image, emit the deterministic marker + if (_insertDeterministicEntry) + { + builder.EmitUInt(0 /* Characteristics */); + builder.EmitUInt(0); + builder.EmitUShort(0); + builder.EmitUShort(0); + builder.EmitInt((int)DebugDirectoryEntryType.Reproducible); + builder.EmitInt(0); + builder.EmitUInt(0); + builder.EmitUInt(0); + } + // Second, copy existing entries from input module - for(int i = 0; i < numEntries; i++) + for (int i = 0; i < numEntries; i++) { builder.EmitUInt(0 /* Characteristics */); builder.EmitUInt(entries[i].Stamp); diff --git a/src/coreclr/src/tools/aot/ILCompiler.ReadyToRun/Compiler/DependencyAnalysis/ReadyToRun/FieldFixupSignature.cs b/src/coreclr/src/tools/aot/ILCompiler.ReadyToRun/Compiler/DependencyAnalysis/ReadyToRun/FieldFixupSignature.cs index ebeb537f5f15..eb5431f29e52 100644 --- a/src/coreclr/src/tools/aot/ILCompiler.ReadyToRun/Compiler/DependencyAnalysis/ReadyToRun/FieldFixupSignature.cs +++ b/src/coreclr/src/tools/aot/ILCompiler.ReadyToRun/Compiler/DependencyAnalysis/ReadyToRun/FieldFixupSignature.cs @@ -2,6 +2,7 @@ // The .NET Foundation licenses this file to you under the MIT license. using System; +using System.Diagnostics; using Internal.JitInterface; using Internal.Text; @@ -13,17 +14,19 @@ namespace ILCompiler.DependencyAnalysis.ReadyToRun { public class FieldFixupSignature : Signature { + public const int MaxCheckableOffset = 0x1FFFFFFF; private readonly ReadyToRunFixupKind _fixupKind; private readonly FieldDesc _fieldDesc; - public FieldFixupSignature(ReadyToRunFixupKind fixupKind, FieldDesc fieldDesc) + public FieldFixupSignature(ReadyToRunFixupKind fixupKind, FieldDesc fieldDesc, NodeFactory factory) { _fixupKind = fixupKind; _fieldDesc = fieldDesc; // Ensure types in signature are loadable and resolvable, otherwise we'll fail later while emitting the signature ((CompilerTypeSystemContext)fieldDesc.Context).EnsureLoadableType(fieldDesc.OwningType); + Debug.Assert(factory.SignatureContext.GetTargetModule(_fieldDesc) != null); } public override int ClassCode => 271828182; diff --git a/src/coreclr/src/tools/aot/ILCompiler.ReadyToRun/Compiler/DependencyAnalysis/ReadyToRun/SignatureBuilder.cs b/src/coreclr/src/tools/aot/ILCompiler.ReadyToRun/Compiler/DependencyAnalysis/ReadyToRun/SignatureBuilder.cs index 54b9849aed8c..fd80e7e74a58 100644 --- a/src/coreclr/src/tools/aot/ILCompiler.ReadyToRun/Compiler/DependencyAnalysis/ReadyToRun/SignatureBuilder.cs +++ b/src/coreclr/src/tools/aot/ILCompiler.ReadyToRun/Compiler/DependencyAnalysis/ReadyToRun/SignatureBuilder.cs @@ -4,6 +4,7 @@ using System; using System.Collections.Generic; using System.Reflection.Metadata; +using System.Reflection.Metadata.Ecma335; using Internal.TypeSystem; using Internal.TypeSystem.Ecma; @@ -441,11 +442,18 @@ public void EmitMethodSignature( // Owner type is needed for type specs to instantiating stubs or generics with signature variables still present if (!method.Method.OwningType.IsDefType && - ((flags & (uint)ReadyToRunMethodSigFlags.READYTORUN_METHOD_SIG_InstantiatingStub) != 0 || method.Method.OwningType.ContainsSignatureVariables()) - || method.Method.IsArrayAddressMethod()) + ((flags & (uint)ReadyToRunMethodSigFlags.READYTORUN_METHOD_SIG_InstantiatingStub) != 0 || method.Method.OwningType.ContainsSignatureVariables())) { flags |= (uint)ReadyToRunMethodSigFlags.READYTORUN_METHOD_SIG_OwnerType; } + else if (method.Method.IsArrayMethod()) + { + var memberRefMethod = method.Token.Module.GetMethod(MetadataTokens.EntityHandle((int)method.Token.Token)); + if (memberRefMethod.OwningType != method.Method.OwningType) + { + flags |= (uint)ReadyToRunMethodSigFlags.READYTORUN_METHOD_SIG_OwnerType; + } + } EmitUInt(flags); if ((flags & (uint)ReadyToRunMethodSigFlags.READYTORUN_METHOD_SIG_OwnerType) != 0) diff --git a/src/coreclr/src/tools/aot/ILCompiler.ReadyToRun/Compiler/DependencyAnalysis/ReadyToRun/TypeFixupSignature.cs b/src/coreclr/src/tools/aot/ILCompiler.ReadyToRun/Compiler/DependencyAnalysis/ReadyToRun/TypeFixupSignature.cs index 0bc86c7794cb..ea21b730330b 100644 --- a/src/coreclr/src/tools/aot/ILCompiler.ReadyToRun/Compiler/DependencyAnalysis/ReadyToRun/TypeFixupSignature.cs +++ b/src/coreclr/src/tools/aot/ILCompiler.ReadyToRun/Compiler/DependencyAnalysis/ReadyToRun/TypeFixupSignature.cs @@ -81,16 +81,16 @@ private static void EncodeTypeLayout(ObjectDataSignatureBuilder dataBuilder, Typ if (defType.IsHomogeneousAggregate) { - CorElementType elementType = (defType.ValueTypeShapeCharacteristics & ValueTypeShapeCharacteristics.AggregateMask) switch + ReadyToRunHFAElemType hfaElementType = (defType.ValueTypeShapeCharacteristics & ValueTypeShapeCharacteristics.AggregateMask) switch { - ValueTypeShapeCharacteristics.Float32Aggregate => CorElementType.ELEMENT_TYPE_R4, - ValueTypeShapeCharacteristics.Float64Aggregate => CorElementType.ELEMENT_TYPE_R8, - ValueTypeShapeCharacteristics.Vector64Aggregate => CorElementType.ELEMENT_TYPE_R8, + ValueTypeShapeCharacteristics.Float32Aggregate => ReadyToRunHFAElemType.Float32, + ValueTypeShapeCharacteristics.Float64Aggregate => ReadyToRunHFAElemType.Float64, + ValueTypeShapeCharacteristics.Vector64Aggregate => ReadyToRunHFAElemType.Vector64, // See MethodTable::GetHFAType - ValueTypeShapeCharacteristics.Vector128Aggregate => CorElementType.ELEMENT_TYPE_VALUETYPE, - _ => CorElementType.Invalid + ValueTypeShapeCharacteristics.Vector128Aggregate => ReadyToRunHFAElemType.Vector128, + _ => throw new NotSupportedException() }; - dataBuilder.EmitUInt((uint)elementType); + dataBuilder.EmitUInt((uint)hfaElementType); } if (alignment != pointerSize) diff --git a/src/coreclr/src/tools/aot/ILCompiler.ReadyToRun/Compiler/DependencyAnalysis/ReadyToRunSymbolNodeFactory.cs b/src/coreclr/src/tools/aot/ILCompiler.ReadyToRun/Compiler/DependencyAnalysis/ReadyToRunSymbolNodeFactory.cs index 7ae68230d8a6..1178d4372c33 100644 --- a/src/coreclr/src/tools/aot/ILCompiler.ReadyToRun/Compiler/DependencyAnalysis/ReadyToRunSymbolNodeFactory.cs +++ b/src/coreclr/src/tools/aot/ILCompiler.ReadyToRun/Compiler/DependencyAnalysis/ReadyToRunSymbolNodeFactory.cs @@ -70,7 +70,7 @@ private void CreateNodeCaches() _codegenNodeFactory, _codegenNodeFactory.HelperImports, ReadyToRunHelper.DelayLoad_Helper, - new FieldFixupSignature(ReadyToRunFixupKind.FieldAddress, key) + new FieldFixupSignature(ReadyToRunFixupKind.FieldAddress, key, _codegenNodeFactory) ); }); @@ -78,7 +78,7 @@ private void CreateNodeCaches() { return new PrecodeHelperImport( _codegenNodeFactory, - new FieldFixupSignature(ReadyToRunFixupKind.FieldOffset, key) + new FieldFixupSignature(ReadyToRunFixupKind.FieldOffset, key, _codegenNodeFactory) ); }); @@ -94,7 +94,7 @@ private void CreateNodeCaches() { return new PrecodeHelperImport( _codegenNodeFactory, - new FieldFixupSignature(_verifyTypeAndFieldLayout ? ReadyToRunFixupKind.Verify_FieldOffset : ReadyToRunFixupKind.Check_FieldOffset, key) + new FieldFixupSignature(_verifyTypeAndFieldLayout ? ReadyToRunFixupKind.Verify_FieldOffset : ReadyToRunFixupKind.Check_FieldOffset, key, _codegenNodeFactory) ); }); @@ -356,7 +356,7 @@ private ISymbolNode CreateFieldHandleHelper(FieldDesc field) { return new PrecodeHelperImport( _codegenNodeFactory, - new FieldFixupSignature(ReadyToRunFixupKind.FieldHandle, field)); + new FieldFixupSignature(ReadyToRunFixupKind.FieldHandle, field, _codegenNodeFactory)); } private ISymbolNode CreateCctorTrigger(TypeDesc type) diff --git a/src/coreclr/src/tools/aot/ILCompiler.ReadyToRun/Compiler/ReadyToRunCompilerContext.cs b/src/coreclr/src/tools/aot/ILCompiler.ReadyToRun/Compiler/ReadyToRunCompilerContext.cs index 0f7fc68f0794..bf914ead379c 100644 --- a/src/coreclr/src/tools/aot/ILCompiler.ReadyToRun/Compiler/ReadyToRunCompilerContext.cs +++ b/src/coreclr/src/tools/aot/ILCompiler.ReadyToRun/Compiler/ReadyToRunCompilerContext.cs @@ -26,12 +26,14 @@ public partial class ReadyToRunCompilerContext : CompilerTypeSystemContext private VectorOfTFieldLayoutAlgorithm _vectorOfTFieldLayoutAlgorithm; private VectorFieldLayoutAlgorithm _vectorFieldLayoutAlgorithm; - public ReadyToRunCompilerContext(TargetDetails details, SharedGenericsMode genericsMode) + public ReadyToRunCompilerContext(TargetDetails details, SharedGenericsMode genericsMode, bool bubbleIncludesCorelib) : base(details, genericsMode) { _r2rFieldLayoutAlgorithm = new ReadyToRunMetadataFieldLayoutAlgorithm(); _systemObjectFieldLayoutAlgorithm = new SystemObjectFieldLayoutAlgorithm(_r2rFieldLayoutAlgorithm); - _vectorFieldLayoutAlgorithm = new VectorFieldLayoutAlgorithm(_r2rFieldLayoutAlgorithm); + + // Only the Arm64 JIT respects the OS rules for vector type abi currently + _vectorFieldLayoutAlgorithm = new VectorFieldLayoutAlgorithm(_r2rFieldLayoutAlgorithm, (details.Architecture == TargetArchitecture.ARM64) ? true : bubbleIncludesCorelib); string matchingVectorType = "Unknown"; if (details.MaximumSimdVectorLength == SimdVectorLength.Vector128Bit) @@ -39,8 +41,8 @@ public ReadyToRunCompilerContext(TargetDetails details, SharedGenericsMode gener else if (details.MaximumSimdVectorLength == SimdVectorLength.Vector256Bit) matchingVectorType = "Vector256`1"; - _vectorOfTFieldLayoutAlgorithm = new VectorOfTFieldLayoutAlgorithm(_r2rFieldLayoutAlgorithm, _vectorFieldLayoutAlgorithm, matchingVectorType); - + // No architecture has completely stable handling of Vector in the abi (Arm64 may change to SVE) + _vectorOfTFieldLayoutAlgorithm = new VectorOfTFieldLayoutAlgorithm(_r2rFieldLayoutAlgorithm, _vectorFieldLayoutAlgorithm, matchingVectorType, bubbleIncludesCorelib); } public override FieldLayoutAlgorithm GetLayoutAlgorithmForType(DefType type) @@ -112,12 +114,14 @@ internal class VectorOfTFieldLayoutAlgorithm : FieldLayoutAlgorithm private FieldLayoutAlgorithm _vectorFallbackAlgorithm; private string _similarVectorName; private DefType _similarVectorOpenType; + private bool _vectorAbiIsStable; - public VectorOfTFieldLayoutAlgorithm(FieldLayoutAlgorithm fallbackAlgorithm, FieldLayoutAlgorithm vectorFallbackAlgorithm, string similarVector) + public VectorOfTFieldLayoutAlgorithm(FieldLayoutAlgorithm fallbackAlgorithm, FieldLayoutAlgorithm vectorFallbackAlgorithm, string similarVector, bool vectorAbiIsStable) { _fallbackAlgorithm = fallbackAlgorithm; _vectorFallbackAlgorithm = vectorFallbackAlgorithm; _similarVectorName = similarVector; + _vectorAbiIsStable = vectorAbiIsStable; } private DefType GetSimilarVector(DefType vectorOfTType) @@ -158,6 +162,7 @@ public override ComputedInstanceFieldLayout ComputeInstanceLayout(DefType type, ByteCountUnaligned = LayoutInt.Indeterminate, ByteCountAlignment = LayoutInt.Indeterminate, Offsets = fieldsAndOffsets.ToArray(), + LayoutAbiStable = false, }; return instanceLayout; } @@ -175,6 +180,7 @@ public override ComputedInstanceFieldLayout ComputeInstanceLayout(DefType type, FieldAlignment = layoutFromSimilarIntrinsicVector.FieldAlignment, FieldSize = layoutFromSimilarIntrinsicVector.FieldSize, Offsets = layoutFromMetadata.Offsets, + LayoutAbiStable = _vectorAbiIsStable, }; #else return new ComputedInstanceFieldLayout @@ -184,6 +190,7 @@ public override ComputedInstanceFieldLayout ComputeInstanceLayout(DefType type, FieldAlignment = layoutFromMetadata.FieldAlignment, FieldSize = layoutFromSimilarIntrinsicVector.FieldSize, Offsets = layoutFromMetadata.Offsets, + LayoutAbiStable = _vectorAbiIsStable, }; #endif } diff --git a/src/coreclr/src/tools/aot/ILCompiler.ReadyToRun/Compiler/SystemObjectFieldLayoutAlgorithm.cs b/src/coreclr/src/tools/aot/ILCompiler.ReadyToRun/Compiler/SystemObjectFieldLayoutAlgorithm.cs index a88cdb011353..2d04a02448a7 100644 --- a/src/coreclr/src/tools/aot/ILCompiler.ReadyToRun/Compiler/SystemObjectFieldLayoutAlgorithm.cs +++ b/src/coreclr/src/tools/aot/ILCompiler.ReadyToRun/Compiler/SystemObjectFieldLayoutAlgorithm.cs @@ -36,6 +36,7 @@ public override ComputedInstanceFieldLayout ComputeInstanceLayout(DefType defTyp FieldAlignment = layoutFromMetadata.FieldAlignment, FieldSize = layoutFromMetadata.FieldSize, Offsets = layoutFromMetadata.Offsets, + LayoutAbiStable = true, }; } diff --git a/src/coreclr/src/tools/aot/ILCompiler.ReadyToRun/IBC/IBCProfileParser.cs b/src/coreclr/src/tools/aot/ILCompiler.ReadyToRun/IBC/IBCProfileParser.cs index c42ecaa13c1c..718d4e650fa6 100644 --- a/src/coreclr/src/tools/aot/ILCompiler.ReadyToRun/IBC/IBCProfileParser.cs +++ b/src/coreclr/src/tools/aot/ILCompiler.ReadyToRun/IBC/IBCProfileParser.cs @@ -113,7 +113,8 @@ public ProfileData ParseIBCDataFromModule(EcmaModule ecmaModule) } else { - _logger.Writer.WriteLine($"Token {0:x} does not refer to a method"); + if (_logger.IsVerbose) + _logger.Writer.WriteLine($"Token {(int)entry.Token:x} does not refer to a method"); } break; diff --git a/src/coreclr/src/tools/aot/ILCompiler.ReadyToRun/ILCompiler.ReadyToRun.csproj b/src/coreclr/src/tools/aot/ILCompiler.ReadyToRun/ILCompiler.ReadyToRun.csproj index 483973871a90..b8d01189009d 100644 --- a/src/coreclr/src/tools/aot/ILCompiler.ReadyToRun/ILCompiler.ReadyToRun.csproj +++ b/src/coreclr/src/tools/aot/ILCompiler.ReadyToRun/ILCompiler.ReadyToRun.csproj @@ -98,6 +98,7 @@ + diff --git a/src/coreclr/src/tools/aot/ILCompiler.ReadyToRun/Interop/IL/Marshaller.ReadyToRun.cs b/src/coreclr/src/tools/aot/ILCompiler.ReadyToRun/Interop/IL/Marshaller.ReadyToRun.cs index 4ca7a6f95965..04b939334a21 100644 --- a/src/coreclr/src/tools/aot/ILCompiler.ReadyToRun/Interop/IL/Marshaller.ReadyToRun.cs +++ b/src/coreclr/src/tools/aot/ILCompiler.ReadyToRun/Interop/IL/Marshaller.ReadyToRun.cs @@ -70,6 +70,8 @@ public static Marshaller[] GetMarshallersForMethod(MethodDesc targetMethod) TypeDesc parameterType = (i == 0) ? methodSig.ReturnType : methodSig[i - 1]; //first item is the return type marshallers[i] = CreateMarshaller(parameterType, + parameterIndex, + methodSig.GetEmbeddedSignatureData(), MarshallerType.Argument, parameterMetadata.MarshalAsDescriptor, direction, @@ -121,6 +123,8 @@ public static bool IsMarshallingRequired(MethodSignature methodSig, ParameterMet MarshallerKind marshallerKind = MarshalHelpers.GetMarshallerKind( parameterType, + parameterIndex: i, + customModifierData: methodSig.GetEmbeddedSignatureData(), parameterMetadata.MarshalAsDescriptor, parameterMetadata.Return, isAnsi: true, diff --git a/src/coreclr/src/tools/aot/ILCompiler.ReadyToRun/JitInterface/CorInfoImpl.ReadyToRun.cs b/src/coreclr/src/tools/aot/ILCompiler.ReadyToRun/JitInterface/CorInfoImpl.ReadyToRun.cs index 2c2085635f34..b59a605ab35a 100644 --- a/src/coreclr/src/tools/aot/ILCompiler.ReadyToRun/JitInterface/CorInfoImpl.ReadyToRun.cs +++ b/src/coreclr/src/tools/aot/ILCompiler.ReadyToRun/JitInterface/CorInfoImpl.ReadyToRun.cs @@ -24,6 +24,16 @@ namespace Internal.JitInterface { + internal class RequiresRuntimeJitIfUsedSymbol + { + public RequiresRuntimeJitIfUsedSymbol(string message) + { + Message = message; + } + + public string Message { get; } + } + public class MethodWithToken { public readonly MethodDesc Method; @@ -224,7 +234,7 @@ public void CompileMethod(MethodWithGCInfo methodCodeNodeNeedingCode) try { - if (!ShouldSkipCompilation(MethodBeingCompiled)) + if (!ShouldSkipCompilation(MethodBeingCompiled) && !MethodSignatureIsUnstable(MethodBeingCompiled.Signature, out var _)) { MethodIL methodIL = _compilation.GetMethodIL(MethodBeingCompiled); if (methodIL != null) @@ -1012,7 +1022,7 @@ private void getFieldInfo(ref CORINFO_RESOLVED_TOKEN pResolvedToken, CORINFO_MET CorInfoHelpFunc.CORINFO_HELP_GETGENERICS_NONGCSTATIC_BASE); } - if (_compilation.SymbolNodeFactory.VerifyTypeAndFieldLayout) + if (_compilation.SymbolNodeFactory.VerifyTypeAndFieldLayout && (fieldOffset <= FieldFixupSignature.MaxCheckableOffset)) { // ENCODE_CHECK_FIELD_OFFSET _methodCodeNode.Fixups.Add(_compilation.SymbolNodeFactory.CheckFieldOffset(field)); @@ -1066,7 +1076,7 @@ private void getFieldInfo(ref CORINFO_RESOLVED_TOKEN pResolvedToken, CORINFO_MET else if (helperId != ReadyToRunHelperId.Invalid) { - if (_compilation.SymbolNodeFactory.VerifyTypeAndFieldLayout) + if (_compilation.SymbolNodeFactory.VerifyTypeAndFieldLayout && (fieldOffset <= FieldFixupSignature.MaxCheckableOffset)) { // ENCODE_CHECK_FIELD_OFFSET _methodCodeNode.Fixups.Add(_compilation.SymbolNodeFactory.CheckFieldOffset(field)); @@ -1509,6 +1519,42 @@ private void classMustBeLoadedBeforeCodeIsRun(TypeDesc type) _methodCodeNode.Fixups.Add(node); } + private static bool MethodSignatureIsUnstable(MethodSignature methodSig, out string unstableMessage) + { + foreach (TypeDesc t in methodSig) + { + DefType defType = t as DefType; + + if (defType != null) + { + if (!defType.LayoutAbiStable) + { + unstableMessage = $"Abi unstable type {defType}"; + return true; + } + } + } + unstableMessage = null; + return false; + } + + private void UpdateConstLookupWithRequiresRuntimeJitSymbolIfNeeded(ref CORINFO_CONST_LOOKUP constLookup, MethodDesc method) + { + if (MethodSignatureIsUnstable(method.Signature, out string unstableMessage)) + { + constLookup.addr = (void*)ObjectToHandle(new RequiresRuntimeJitIfUsedSymbol(unstableMessage + " calling " + method)); + constLookup.accessType = InfoAccessType.IAT_PVALUE; + } + } + + private void VerifyMethodSignatureIsStable(MethodSignature methodSig) + { + if (MethodSignatureIsUnstable(methodSig, out var unstableMessage)) + { + throw new RequiresRuntimeJitException(unstableMessage); + } + } + private void getCallInfo(ref CORINFO_RESOLVED_TOKEN pResolvedToken, CORINFO_RESOLVED_TOKEN* pConstrainedResolvedToken, CORINFO_METHOD_STRUCT_* callerHandle, CORINFO_CALLINFO_FLAGS flags, CORINFO_CALL_INFO* pResult) { MethodDesc methodToCall; @@ -1569,12 +1615,17 @@ private void getCallInfo(ref CORINFO_RESOLVED_TOKEN pResolvedToken, CORINFO_RESO _compilation.SymbolNodeFactory.InterfaceDispatchCell( new MethodWithToken(targetMethod, HandleToModuleToken(ref pResolvedToken, targetMethod), constrainedType: null, unboxing: false), MethodBeingCompiled)); + + // If the abi of the method isn't stable, this will cause a usage of the RequiresRuntimeJitSymbol, which will trigger a RequiresRuntimeJitException + UpdateConstLookupWithRequiresRuntimeJitSymbolIfNeeded(ref pResult->codePointerOrStubLookup.constLookup, targetMethod); } break; case CORINFO_CALL_KIND.CORINFO_CALL_CODE_POINTER: Debug.Assert(pResult->codePointerOrStubLookup.lookupKind.needsRuntimeLookup); + // Eagerly check abi stability here as no symbol usage can be used to delay the check + VerifyMethodSignatureIsStable(targetMethod.Signature); // There is no easy way to detect method referenced via generic lookups in generated code. // Report this method reference unconditionally. @@ -1604,12 +1655,18 @@ private void getCallInfo(ref CORINFO_RESOLVED_TOKEN pResolvedToken, CORINFO_RESO new MethodWithToken(nonUnboxingMethod, HandleToModuleToken(ref pResolvedToken, nonUnboxingMethod), constrainedType, unboxing: isUnboxingStub), isInstantiatingStub: useInstantiatingStub, isPrecodeImportRequired: (flags & CORINFO_CALLINFO_FLAGS.CORINFO_CALLINFO_LDFTN) != 0)); + + // If the abi of the method isn't stable, this will cause a usage of the RequiresRuntimeJitSymbol, which will trigger a RequiresRuntimeJitException + UpdateConstLookupWithRequiresRuntimeJitSymbolIfNeeded(ref pResult->codePointerOrStubLookup.constLookup, targetMethod); } break; case CORINFO_CALL_KIND.CORINFO_VIRTUALCALL_VTABLE: // Only calls within the CoreLib version bubble support fragile NI codegen with vtable based calls, for better performance (because // CoreLib and the runtime will always be updated together anyways - this is a special case) + + // Eagerly check abi stability here as no symbol usage can be used to delay the check + VerifyMethodSignatureIsStable(targetMethod.Signature); break; case CORINFO_CALL_KIND.CORINFO_VIRTUALCALL_LDVIRTFTN: @@ -1922,7 +1979,7 @@ private bool NeedsTypeLayoutCheck(TypeDesc type) if (!type.IsValueType) return false; - return !_compilation.IsLayoutFixedInCurrentVersionBubble(type) || _compilation.SymbolNodeFactory.VerifyTypeAndFieldLayout; + return !_compilation.IsLayoutFixedInCurrentVersionBubble(type) || (_compilation.SymbolNodeFactory.VerifyTypeAndFieldLayout && !((MetadataType)type).IsNonVersionable()); } private bool HasLayoutMetadata(TypeDesc type) @@ -1963,6 +2020,9 @@ private void EncodeFieldBaseOffset(FieldDesc field, CORINFO_FIELD_INFO* pResult, if (pMT.IsValueType) { // ENCODE_CHECK_FIELD_OFFSET + if (pResult->offset > FieldFixupSignature.MaxCheckableOffset) + throw new RequiresRuntimeJitException(callerMethod.ToString() + " -> " + field.ToString()); + _methodCodeNode.Fixups.Add(_compilation.SymbolNodeFactory.CheckFieldOffset(field)); // No-op other than generating the check field offset fixup } @@ -1978,7 +2038,7 @@ private void EncodeFieldBaseOffset(FieldDesc field, CORINFO_FIELD_INFO* pResult, } else if (pMT.IsValueType) { - if (_compilation.SymbolNodeFactory.VerifyTypeAndFieldLayout) + if (_compilation.SymbolNodeFactory.VerifyTypeAndFieldLayout && !callerMethod.IsNonVersionable() && (pResult->offset <= FieldFixupSignature.MaxCheckableOffset)) { // ENCODE_CHECK_FIELD_OFFSET _methodCodeNode.Fixups.Add(_compilation.SymbolNodeFactory.CheckFieldOffset(field)); @@ -1987,7 +2047,7 @@ private void EncodeFieldBaseOffset(FieldDesc field, CORINFO_FIELD_INFO* pResult, } else if (_compilation.IsInheritanceChainLayoutFixedInCurrentVersionBubble(pMT.BaseType)) { - if (_compilation.SymbolNodeFactory.VerifyTypeAndFieldLayout) + if (_compilation.SymbolNodeFactory.VerifyTypeAndFieldLayout && !callerMethod.IsNonVersionable() && (pResult->offset <= FieldFixupSignature.MaxCheckableOffset)) { // ENCODE_CHECK_FIELD_OFFSET _methodCodeNode.Fixups.Add(_compilation.SymbolNodeFactory.CheckFieldOffset(field)); @@ -2009,7 +2069,7 @@ private void EncodeFieldBaseOffset(FieldDesc field, CORINFO_FIELD_INFO* pResult, { PreventRecursiveFieldInlinesOutsideVersionBubble(field, callerMethod); - if (_compilation.SymbolNodeFactory.VerifyTypeAndFieldLayout) + if (_compilation.SymbolNodeFactory.VerifyTypeAndFieldLayout && !callerMethod.IsNonVersionable() && (pResult->offset <= FieldFixupSignature.MaxCheckableOffset)) { // ENCODE_CHECK_FIELD_OFFSET _methodCodeNode.Fixups.Add(_compilation.SymbolNodeFactory.CheckFieldOffset(field)); diff --git a/src/coreclr/src/tools/aot/ILCompiler.ReadyToRun/ObjectWriter/R2RPEBuilder.cs b/src/coreclr/src/tools/aot/ILCompiler.ReadyToRun/ObjectWriter/R2RPEBuilder.cs index 839b1f21d110..e1731e4c95f8 100644 --- a/src/coreclr/src/tools/aot/ILCompiler.ReadyToRun/ObjectWriter/R2RPEBuilder.cs +++ b/src/coreclr/src/tools/aot/ILCompiler.ReadyToRun/ObjectWriter/R2RPEBuilder.cs @@ -173,8 +173,9 @@ public R2RPEBuilder( ISymbolNode r2rHeaderExportSymbol, string outputFileSimpleName, Func getRuntimeFunctionsTable, - int? customPESectionAlignment) - : base(peHeaderBuilder, deterministicIdProvider: null) + int? customPESectionAlignment, + Func, BlobContentId> deterministicIdProvider) + : base(peHeaderBuilder, deterministicIdProvider: deterministicIdProvider) { _target = target; _getRuntimeFunctionsTable = getRuntimeFunctionsTable; @@ -288,7 +289,7 @@ public int GetSymbolFilePosition(ISymbolNode symbol) /// /// Output stream for the final R2R PE file /// Timestamp to set in the PE header of the output R2R executable - public void Write(Stream outputStream, int timeDateStamp) + public void Write(Stream outputStream, int? timeDateStamp) { BlobBuilder outputPeFile = new BlobBuilder(); Serialize(outputPeFile); @@ -302,7 +303,8 @@ public void Write(Stream outputStream, int timeDateStamp) ApplyMachineOSOverride(outputStream); - SetPEHeaderTimeStamp(outputStream, timeDateStamp); + if (timeDateStamp.HasValue) + SetPEHeaderTimeStamp(outputStream, timeDateStamp.Value); _written = true; } diff --git a/src/coreclr/src/tools/aot/ILCompiler.Reflection.ReadyToRun/Amd64/UnwindInfo.cs b/src/coreclr/src/tools/aot/ILCompiler.Reflection.ReadyToRun/Amd64/UnwindInfo.cs index 1c5150bcedaf..5d8cd2b0c591 100644 --- a/src/coreclr/src/tools/aot/ILCompiler.Reflection.ReadyToRun/Amd64/UnwindInfo.cs +++ b/src/coreclr/src/tools/aot/ILCompiler.Reflection.ReadyToRun/Amd64/UnwindInfo.cs @@ -42,8 +42,6 @@ public enum UnwindFlags /// public class UnwindCode { - public int Index { get; set; } - public byte CodeOffset { get; set; } public UnwindOpCodes UnwindOp { get; set; } //4 bits @@ -53,30 +51,101 @@ public class UnwindCode public byte OffsetLow { get; set; } public byte OffsetHigh { get; set; } //4 bits - public uint FrameOffset { get; set; } + public int FrameOffset { get; set; } public int NextFrameOffset { get; set; } public bool IsOpInfo { get; set; } public UnwindCode() { } - public UnwindCode(byte[] image, int index, ref int offset) + /// + /// Unwinde code parsing is based on src\jit\unwindamd64.cpp DumpUnwindInfo + /// + public UnwindCode(byte[] image, ref int frameOffset, ref int offset) { - Index = index; - - int off = offset; - CodeOffset = NativeReader.ReadByte(image, ref off); - byte op = NativeReader.ReadByte(image, ref off); + CodeOffset = NativeReader.ReadByte(image, ref offset); + byte op = NativeReader.ReadByte(image, ref offset); UnwindOp = (UnwindOpCodes)(op & 15); OpInfo = (byte)(op >> 4); OffsetLow = CodeOffset; OffsetHigh = OpInfo; - FrameOffset = NativeReader.ReadUInt16(image, ref offset); - NextFrameOffset = -1; + FrameOffset = frameOffset; - IsOpInfo = false; + switch (UnwindOp) + { + case UnwindOpCodes.UWOP_PUSH_NONVOL: + OpInfoStr = $"{(Registers)OpInfo}({OpInfo})"; + break; + case UnwindOpCodes.UWOP_ALLOC_LARGE: + OpInfoStr = $"{OpInfo} - "; + if (OpInfo == 0) + { + OpInfoStr += "Scaled small"; + NextFrameOffset = 8 * NativeReader.ReadUInt16(image, ref offset); + } + else if (OpInfo == 1) + { + OpInfoStr += "Unscaled large"; + uint nextOffset = NativeReader.ReadUInt16(image, ref offset); + NextFrameOffset = (int)((uint)(NativeReader.ReadUInt16(image, ref offset) << 16) | nextOffset); + } + else + { + OpInfoStr += "Unknown"; + } + break; + case UnwindOpCodes.UWOP_ALLOC_SMALL: + int opInfo = OpInfo * 8 + 8; + OpInfoStr = $"{opInfo}"; + break; + case UnwindOpCodes.UWOP_SET_FPREG: + OpInfoStr = $"Unused({OpInfo})"; + break; + case UnwindOpCodes.UWOP_SET_FPREG_LARGE: + { + OpInfoStr = $"Unused({OpInfo})"; + uint nextOffset = NativeReader.ReadUInt16(image, ref offset); + nextOffset = ((uint)(NativeReader.ReadUInt16(image, ref offset) << 16) | nextOffset); + NextFrameOffset = (int)nextOffset * 16; + if ((NextFrameOffset & 0xF0000000) != 0) + { + throw new BadImageFormatException("Warning: Illegal unwindInfo unscaled offset: too large"); + } + } + break; + case UnwindOpCodes.UWOP_SAVE_NONVOL: + { + OpInfoStr = $"{(Registers)OpInfo}({OpInfo})"; + NextFrameOffset = NativeReader.ReadUInt16(image, ref offset) * 8; + } + break; + case UnwindOpCodes.UWOP_SAVE_NONVOL_FAR: + { + OpInfoStr = $"{(Registers)OpInfo}({OpInfo})"; + uint nextOffset = NativeReader.ReadUInt16(image, ref offset); + NextFrameOffset = (int)((uint)(NativeReader.ReadUInt16(image, ref offset) << 16) | nextOffset); + } + break; + case UnwindOpCodes.UWOP_SAVE_XMM128: + { + OpInfoStr = $"XMM{OpInfo}({OpInfo})"; + NextFrameOffset = (int)NativeReader.ReadUInt16(image, ref offset) * 16; + } + break; + case UnwindOpCodes.UWOP_SAVE_XMM128_FAR: + { + OpInfoStr = $"XMM{OpInfo}({OpInfo})"; + uint nextOffset = NativeReader.ReadUInt16(image, ref offset); + NextFrameOffset = (int)((uint)(NativeReader.ReadUInt16(image, ref offset) << 16) | nextOffset); + } + break; + default: + throw new NotImplementedException(UnwindOp.ToString()); + } + + NextFrameOffset = frameOffset; } } @@ -94,8 +163,8 @@ public class UnwindInfo : BaseUnwindInfo public byte CountOfUnwindCodes { get; set; } public Registers FrameRegister { get; set; } //4 bits public byte FrameOffset { get; set; } //4 bits - public UnwindCode[] UnwindCodeArray { get; set; } - public Dictionary> UnwindCodes { get; set; } + public Dictionary CodeOffsetToUnwindCodeIndex { get; set; } + public List UnwindCodes { get; set; } public uint PersonalityRoutineRVA { get; set; } public UnwindInfo() { } @@ -114,23 +183,19 @@ public UnwindInfo(byte[] image, int offset) FrameRegister = (Registers)(frameRegisterAndOffset & 15); FrameOffset = (byte)(frameRegisterAndOffset >> 4); - UnwindCodeArray = new UnwindCode[CountOfUnwindCodes]; - UnwindCodes = new Dictionary>(); - for (int i = 0; i < CountOfUnwindCodes; i++) + UnwindCodes = new List(CountOfUnwindCodes); + CodeOffsetToUnwindCodeIndex = new Dictionary(); + int frameOffset = FrameOffset; + int sizeOfUnwindCodes = CountOfUnwindCodes * _sizeofUnwindCode; + int endOffset = offset + sizeOfUnwindCodes; + while (offset < endOffset) { - UnwindCodeArray[i] = new UnwindCode(image, i, ref offset); - } - for (int i = 0; i < CountOfUnwindCodes; i++) - { - ParseUnwindCode(ref i); - if (!UnwindCodes.ContainsKey(UnwindCodeArray[i].CodeOffset)) - { - UnwindCodes[UnwindCodeArray[i].CodeOffset] = new List(); - } - UnwindCodes[UnwindCodeArray[i].CodeOffset].Add(UnwindCodeArray[i]); + UnwindCode unwindCode = new UnwindCode(image, ref frameOffset, ref offset); + CodeOffsetToUnwindCodeIndex.Add(unwindCode.CodeOffset, UnwindCodes.Count); + UnwindCodes.Add(unwindCode); } - Size = _offsetofUnwindCode + CountOfUnwindCodes * _sizeofUnwindCode; + Size = _offsetofUnwindCode + sizeOfUnwindCodes; int alignmentPad = -Size & 3; Size += alignmentPad + sizeof(uint); @@ -166,16 +231,14 @@ public override string ToString() sb.AppendLine($" FrameOffset: {FrameOffset}"); sb.AppendLine($" Unwind Codes:"); sb.AppendLine($" ------------------"); - for (int i = 0; i < CountOfUnwindCodes; i++) + foreach (UnwindCode unwindCode in UnwindCodes) { - if (!UnwindCodeArray[i].IsOpInfo) - continue; - sb.AppendLine($" CodeOffset: 0x{UnwindCodeArray[i].CodeOffset:X2}"); - sb.AppendLine($" UnwindOp: {UnwindCodeArray[i].UnwindOp}({(byte)UnwindCodeArray[i].UnwindOp})"); - sb.AppendLine($" OpInfo: {UnwindCodeArray[i].OpInfoStr}"); - if (UnwindCodeArray[i].NextFrameOffset != -1) + sb.AppendLine($" CodeOffset: 0x{unwindCode.CodeOffset:X2}"); + sb.AppendLine($" UnwindOp: {unwindCode.UnwindOp}({(byte)unwindCode.UnwindOp})"); + sb.AppendLine($" OpInfo: {unwindCode.OpInfoStr}"); + if (unwindCode.NextFrameOffset != -1) { - sb.AppendLine($" FrameOffset: {UnwindCodeArray[i].NextFrameOffset}"); + sb.AppendLine($" FrameOffset: {unwindCode.NextFrameOffset}"); } sb.AppendLine($" ------------------"); } @@ -185,98 +248,5 @@ public override string ToString() return sb.ToString(); } - /// - /// based on src\jit\unwindamd64.cpp DumpUnwindInfo - /// - private void ParseUnwindCode(ref int i) - { - UnwindCode code = UnwindCodeArray[i]; - code.IsOpInfo = true; - switch (code.UnwindOp) - { - case UnwindOpCodes.UWOP_PUSH_NONVOL: - code.OpInfoStr = $"{(Registers)code.OpInfo}({code.OpInfo})"; - break; - case UnwindOpCodes.UWOP_ALLOC_LARGE: - code.OpInfoStr = $"{code.OpInfo} - "; - if (code.OpInfo == 0) - { - i++; - UnwindCodeArray[i].OpInfoStr += "Scaled small"; - code.NextFrameOffset = (int)UnwindCodeArray[i].FrameOffset * 8; - } - else if (code.OpInfo == 1) - { - i++; - UnwindCodeArray[i].OpInfoStr += "Unscaled large"; - uint offset = UnwindCodeArray[i].FrameOffset; - i++; - offset = ((UnwindCodeArray[i].FrameOffset << 16) | offset); - code.NextFrameOffset = (int)offset; - } - else - { - code.OpInfoStr += "Unknown"; - } - break; - case UnwindOpCodes.UWOP_ALLOC_SMALL: - int opInfo = code.OpInfo * 8 + 8; - code.OpInfoStr = $"{opInfo}"; - break; - case UnwindOpCodes.UWOP_SET_FPREG: - code.OpInfoStr = $"Unused({code.OpInfo})"; - break; - case UnwindOpCodes.UWOP_SET_FPREG_LARGE: - { - code.OpInfoStr = $"Unused({code.OpInfo})"; - i++; - uint offset = UnwindCodeArray[i].FrameOffset; - i++; - offset = ((UnwindCodeArray[i].FrameOffset << 16) | offset); - code.NextFrameOffset = (int)offset * 16; - if ((UnwindCodeArray[i].FrameOffset & 0xF0000000) != 0) - { - throw new BadImageFormatException("Warning: Illegal unwindInfo unscaled offset: too large"); - } - } - break; - case UnwindOpCodes.UWOP_SAVE_NONVOL: - { - code.OpInfoStr = $"{(Registers)code.OpInfo}({code.OpInfo})"; - i++; - uint offset = UnwindCodeArray[i].FrameOffset * 8; - code.NextFrameOffset = (int)offset; - } - break; - case UnwindOpCodes.UWOP_SAVE_NONVOL_FAR: - { - code.OpInfoStr = $"{(Registers)code.OpInfo}({code.OpInfo})"; - i++; - uint offset = UnwindCodeArray[i].FrameOffset; - i++; - offset = ((UnwindCodeArray[i].FrameOffset << 16) | offset); - code.NextFrameOffset = (int)offset; - } - break; - case UnwindOpCodes.UWOP_SAVE_XMM128: - { - code.OpInfoStr = $"XMM{code.OpInfo}({code.OpInfo})"; - i++; - uint offset = UnwindCodeArray[i].FrameOffset * 16; - code.NextFrameOffset = (int)offset; - } - break; - case UnwindOpCodes.UWOP_SAVE_XMM128_FAR: - { - code.OpInfoStr = $"XMM{code.OpInfo}({code.OpInfo})"; - i++; - uint offset = UnwindCodeArray[i].FrameOffset; - i++; - offset = ((UnwindCodeArray[i].FrameOffset << 16) | offset); - code.NextFrameOffset = (int)offset; - } - break; - } - } } } diff --git a/src/coreclr/src/tools/aot/ILCompiler.Reflection.ReadyToRun/DebugInfo.cs b/src/coreclr/src/tools/aot/ILCompiler.Reflection.ReadyToRun/DebugInfo.cs index 706e98780acb..8a6766d42945 100644 --- a/src/coreclr/src/tools/aot/ILCompiler.Reflection.ReadyToRun/DebugInfo.cs +++ b/src/coreclr/src/tools/aot/ILCompiler.Reflection.ReadyToRun/DebugInfo.cs @@ -17,15 +17,15 @@ namespace ILCompiler.Reflection.ReadyToRun /// public class DebugInfo { - private readonly ReadyToRunReader _readyToRunReader; + private readonly RuntimeFunction _runtimeFunction; private readonly int _offset; private List _boundsList; private List _variablesList; private Machine _machine; - public DebugInfo(ReadyToRunReader readyToRunReader, int offset) + public DebugInfo(RuntimeFunction runtimeFunction, int offset) { - this._readyToRunReader = readyToRunReader; + this._runtimeFunction = runtimeFunction; this._offset = offset; } @@ -83,6 +83,7 @@ private void EnsureInitialized() { return; } + ReadyToRunReader _readyToRunReader = _runtimeFunction.ReadyToRunReader; int offset = _offset; _boundsList = new List(); _variablesList = new List(); @@ -156,6 +157,19 @@ private void ParseNativeVarInfo(byte[] image, int offset) entry.StartOffset = reader.ReadUInt(); entry.EndOffset = entry.StartOffset + reader.ReadUInt(); entry.VariableNumber = (uint)(reader.ReadUInt() + (int)ImplicitILArguments.Max); + entry.Variable = new Variable(); + // TODO: This is probably incomplete + // This does not handle any implicit arguments or var args + if (entry.VariableNumber < this._runtimeFunction.Method.Signature.ParameterTypes.Length) + { + entry.Variable.Type = VariableType.Parameter; + entry.Variable.Index = (int)entry.VariableNumber; + } + else + { + entry.Variable.Type = VariableType.Local; + entry.Variable.Index = (int)entry.VariableNumber - this._runtimeFunction.Method.Signature.ParameterTypes.Length; + } var varLoc = new VarLoc(); varLoc.VarLocType = (VarLocType)reader.ReadUInt(); diff --git a/src/coreclr/src/tools/aot/ILCompiler.Reflection.ReadyToRun/DebugInfoTypes.cs b/src/coreclr/src/tools/aot/ILCompiler.Reflection.ReadyToRun/DebugInfoTypes.cs index 73782f2344c3..eef0ab4673cb 100644 --- a/src/coreclr/src/tools/aot/ILCompiler.Reflection.ReadyToRun/DebugInfoTypes.cs +++ b/src/coreclr/src/tools/aot/ILCompiler.Reflection.ReadyToRun/DebugInfoTypes.cs @@ -17,10 +17,25 @@ public struct NativeVarInfo { public uint StartOffset; public uint EndOffset; + // TODO: Eliminate this public uint VariableNumber; + public Variable Variable { get; internal set; } public VarLoc VariableLocation; } + public enum VariableType + { + Parameter, + Local, + // TODO: Special + } + + public class Variable + { + public VariableType Type { get; internal set; } + public int Index { get; internal set; } + } + [Flags] public enum SourceTypes { diff --git a/src/coreclr/src/tools/aot/ILCompiler.Reflection.ReadyToRun/EHInfo.cs b/src/coreclr/src/tools/aot/ILCompiler.Reflection.ReadyToRun/EHInfo.cs index 32e4eab6d609..a6627f874e95 100644 --- a/src/coreclr/src/tools/aot/ILCompiler.Reflection.ReadyToRun/EHInfo.cs +++ b/src/coreclr/src/tools/aot/ILCompiler.Reflection.ReadyToRun/EHInfo.cs @@ -101,7 +101,7 @@ public EHClause(ReadyToRunReader reader, int offset) } else { - ClassName = MetadataNameFormatter.FormatHandle(reader.GetGlobalMetadataReader(), MetadataTokens.Handle((int)ClassTokenOrFilterOffset)); + ClassName = MetadataNameFormatter.FormatHandle(reader.GetGlobalMetadata()?.MetadataReader, MetadataTokens.Handle((int)ClassTokenOrFilterOffset)); } } } diff --git a/src/coreclr/src/tools/aot/ILCompiler.Reflection.ReadyToRun/IAssemblyMetadata.cs b/src/coreclr/src/tools/aot/ILCompiler.Reflection.ReadyToRun/IAssemblyMetadata.cs new file mode 100644 index 000000000000..5a49d9a06b82 --- /dev/null +++ b/src/coreclr/src/tools/aot/ILCompiler.Reflection.ReadyToRun/IAssemblyMetadata.cs @@ -0,0 +1,18 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System.Reflection.Metadata; +using System.Reflection.PortableExecutable; + +namespace ILCompiler.Reflection.ReadyToRun +{ + /// + /// This interface represents MSIL information for a single component assembly. + /// + public interface IAssemblyMetadata + { + PEReader ImageReader { get; } + + MetadataReader MetadataReader { get; } + } +} diff --git a/src/coreclr/src/tools/aot/ILCompiler.Reflection.ReadyToRun/IAssemblyResolver.cs b/src/coreclr/src/tools/aot/ILCompiler.Reflection.ReadyToRun/IAssemblyResolver.cs index 411aabe6e50a..677eadd92107 100644 --- a/src/coreclr/src/tools/aot/ILCompiler.Reflection.ReadyToRun/IAssemblyResolver.cs +++ b/src/coreclr/src/tools/aot/ILCompiler.Reflection.ReadyToRun/IAssemblyResolver.cs @@ -7,8 +7,8 @@ namespace ILCompiler.Reflection.ReadyToRun { public interface IAssemblyResolver { - MetadataReader FindAssembly(MetadataReader metadataReader, AssemblyReferenceHandle assemblyReferenceHandle, string parentFile); - MetadataReader FindAssembly(string simpleName, string parentFile); + IAssemblyMetadata FindAssembly(MetadataReader metadataReader, AssemblyReferenceHandle assemblyReferenceHandle, string parentFile); + IAssemblyMetadata FindAssembly(string simpleName, string parentFile); // TODO (refactoring) - signature formatting options should be independent of assembly resolver bool Naked { get; } bool SignatureBinary { get; } diff --git a/src/coreclr/src/tools/aot/ILCompiler.Reflection.ReadyToRun/ReadyToRunMethod.cs b/src/coreclr/src/tools/aot/ILCompiler.Reflection.ReadyToRun/ReadyToRunMethod.cs index a00419f212c4..4910c1eda0c7 100644 --- a/src/coreclr/src/tools/aot/ILCompiler.Reflection.ReadyToRun/ReadyToRunMethod.cs +++ b/src/coreclr/src/tools/aot/ILCompiler.Reflection.ReadyToRun/ReadyToRunMethod.cs @@ -144,12 +144,24 @@ public DebugInfo DebugInfo { if (_debugInfo == null) { - _readyToRunReader.RuntimeFunctionToDebugInfo.TryGetValue(Id, out _debugInfo); + int offset; + if (_readyToRunReader.RuntimeFunctionToDebugInfo.TryGetValue(Id, out offset)) + { + this._debugInfo = new DebugInfo(this, offset); + } } return _debugInfo; } } + internal ReadyToRunReader ReadyToRunReader + { + get + { + return _readyToRunReader; + } + } + public RuntimeFunction( ReadyToRunReader readyToRunReader, int id, @@ -202,9 +214,9 @@ public class ReadyToRunMethod private const int _mdtMethodDef = 0x06000000; /// - /// MetadataReader representing the method module. + /// MSIL module containing the method. /// - public MetadataReader MetadataReader { get; private set; } + public IAssemblyMetadata ComponentReader { get; private set; } /// /// The name of the method @@ -218,6 +230,8 @@ public class ReadyToRunMethod public MethodSignature Signature { get; } + public ImmutableArray LocalSignature { get; } + /// /// The type that the method belongs to /// @@ -282,7 +296,7 @@ public IReadOnlyList Fixups /// public ReadyToRunMethod( ReadyToRunReader readyToRunReader, - MetadataReader metadataReader, + IAssemblyMetadata componentReader, EntityHandle methodHandle, int entryPointId, string owningType, @@ -295,7 +309,7 @@ public ReadyToRunMethod( MethodHandle = methodHandle; EntryPointRuntimeFunctionId = entryPointId; - MetadataReader = metadataReader; + ComponentReader = componentReader; EntityHandle owningTypeHandle; GenericParameterHandleCollection genericParams = default(GenericParameterHandleCollection); @@ -308,8 +322,17 @@ public ReadyToRunMethod( { case HandleKind.MethodDefinition: { - MethodDefinition methodDef = MetadataReader.GetMethodDefinition((MethodDefinitionHandle)MethodHandle); - Name = MetadataReader.GetString(methodDef.Name); + MethodDefinition methodDef = ComponentReader.MetadataReader.GetMethodDefinition((MethodDefinitionHandle)MethodHandle); + if (methodDef.RelativeVirtualAddress != 0) + { + MethodBodyBlock mbb = ComponentReader.ImageReader.GetMethodBody(methodDef.RelativeVirtualAddress); + if (!mbb.LocalSignature.IsNil) + { + StandaloneSignature ss = ComponentReader.MetadataReader.GetStandaloneSignature(mbb.LocalSignature); + LocalSignature = ss.DecodeLocalSignature(typeProvider, genericContext); + } + } + Name = ComponentReader.MetadataReader.GetString(methodDef.Name); Signature = methodDef.DecodeSignature(typeProvider, genericContext); owningTypeHandle = methodDef.GetDeclaringType(); genericParams = methodDef.GetGenericParameters(); @@ -318,8 +341,8 @@ public ReadyToRunMethod( case HandleKind.MemberReference: { - MemberReference memberRef = MetadataReader.GetMemberReference((MemberReferenceHandle)MethodHandle); - Name = MetadataReader.GetString(memberRef.Name); + MemberReference memberRef = ComponentReader.MetadataReader.GetMemberReference((MemberReferenceHandle)MethodHandle); + Name = ComponentReader.MetadataReader.GetString(memberRef.Name); Signature = memberRef.DecodeMethodSignature(typeProvider, genericContext); owningTypeHandle = memberRef.Parent; } @@ -335,7 +358,7 @@ public ReadyToRunMethod( } else { - DeclaringType = MetadataNameFormatter.FormatHandle(MetadataReader, owningTypeHandle); + DeclaringType = MetadataNameFormatter.FormatHandle(ComponentReader.MetadataReader, owningTypeHandle); } StringBuilder sb = new StringBuilder(); @@ -435,7 +458,7 @@ private void ParseRuntimeFunctions() { int runtimeFunctionId = EntryPointRuntimeFunctionId; int runtimeFunctionSize = _readyToRunReader.CalculateRuntimeFunctionSize(); - int runtimeFunctionOffset = _readyToRunReader.PEReader.GetOffset(_readyToRunReader.ReadyToRunHeader.Sections[ReadyToRunSectionType.RuntimeFunctions].RelativeVirtualAddress); + int runtimeFunctionOffset = _readyToRunReader.CompositeReader.GetOffset(_readyToRunReader.ReadyToRunHeader.Sections[ReadyToRunSectionType.RuntimeFunctions].RelativeVirtualAddress); int curOffset = runtimeFunctionOffset + runtimeFunctionId * runtimeFunctionSize; BaseGcInfo gcInfo = null; int codeOffset = 0; @@ -448,7 +471,7 @@ private void ParseRuntimeFunctions() endRva = NativeReader.ReadInt32(_readyToRunReader.Image, ref curOffset); } int unwindRva = NativeReader.ReadInt32(_readyToRunReader.Image, ref curOffset); - int unwindOffset = _readyToRunReader.PEReader.GetOffset(unwindRva); + int unwindOffset = _readyToRunReader.CompositeReader.GetOffset(unwindRva); BaseUnwindInfo unwindInfo = null; if (_readyToRunReader.Machine == Machine.Amd64) diff --git a/src/coreclr/src/tools/aot/ILCompiler.Reflection.ReadyToRun/ReadyToRunReader.cs b/src/coreclr/src/tools/aot/ILCompiler.Reflection.ReadyToRun/ReadyToRunReader.cs index a829b58599e5..3e31631b6fb9 100644 --- a/src/coreclr/src/tools/aot/ILCompiler.Reflection.ReadyToRun/ReadyToRunReader.cs +++ b/src/coreclr/src/tools/aot/ILCompiler.Reflection.ReadyToRun/ReadyToRunReader.cs @@ -54,19 +54,14 @@ public sealed class ReadyToRunReader /// /// MetadataReader for the system module (normally System.Private.CoreLib) /// - private MetadataReader _systemModuleReader; + private IAssemblyMetadata _systemModuleReader; private readonly IAssemblyResolver _assemblyResolver; /// /// Reference assembly cache indexed by module indices as used in signatures /// - private List _assemblyCache; - - /// - /// Assembly headers for composite R2R images - /// - private List _assemblyHeaders; + private List _assemblyCache; // Header private OperatingSystem _operatingSystem; @@ -80,7 +75,7 @@ public sealed class ReadyToRunReader private List _readyToRunAssemblyHeaders; // DebugInfo - private Dictionary _runtimeFunctionToDebugInfo; + private Dictionary _runtimeFunctionIdToDebugOffset; // ManifestReferences private MetadataReader _manifestReader; @@ -109,7 +104,7 @@ public sealed class ReadyToRunReader /// Underlying PE image reader is used to access raw PE structures like header /// or section list. /// - public PEReader PEReader { get; private set; } + public PEReader CompositeReader { get; private set; } /// /// Byte array containing the ReadyToRun image @@ -322,12 +317,12 @@ public IReadOnlyDictionary ImportSignatures } - internal Dictionary RuntimeFunctionToDebugInfo + internal Dictionary RuntimeFunctionToDebugInfo { get { EnsureDebugInfo(); - return _runtimeFunctionToDebugInfo; + return _runtimeFunctionIdToDebugOffset; } } @@ -363,10 +358,10 @@ internal MetadataReader ManifestReader /// /// PE image /// The Cor header flag must be ILLibrary - public ReadyToRunReader(IAssemblyResolver assemblyResolver, MetadataReader metadata, PEReader peReader, string filename) + public ReadyToRunReader(IAssemblyResolver assemblyResolver, IAssemblyMetadata metadata, PEReader peReader, string filename) { _assemblyResolver = assemblyResolver; - PEReader = peReader; + CompositeReader = peReader; Filename = filename; Initialize(metadata); } @@ -383,32 +378,31 @@ public unsafe ReadyToRunReader(IAssemblyResolver assemblyResolver, string filena Initialize(metadata: null); } - private unsafe void Initialize(MetadataReader metadata) + private unsafe void Initialize(IAssemblyMetadata metadata) { - _assemblyCache = new List(); - _assemblyHeaders = new List(); + _assemblyCache = new List(); - if (PEReader == null) + if (CompositeReader == null) { byte[] image = File.ReadAllBytes(Filename); Image = image; - PEReader = new PEReader(Unsafe.As>(ref image)); + CompositeReader = new PEReader(Unsafe.As>(ref image)); } else { - ImmutableArray content = PEReader.GetEntireImage().GetContent(); + ImmutableArray content = CompositeReader.GetEntireImage().GetContent(); Image = Unsafe.As, byte[]>(ref content); } - if (metadata == null && PEReader.HasMetadata) + if (metadata == null && CompositeReader.HasMetadata) { - metadata = PEReader.GetMetadataReader(); + metadata = new StandaloneAssemblyMetadata(CompositeReader); } if (metadata != null) { - if ((PEReader.PEHeaders.CorHeader.Flags & CorFlags.ILLibrary) == 0) + if ((CompositeReader.PEHeaders.CorHeader.Flags & CorFlags.ILLibrary) == 0) { if (!TryLocateNativeReadyToRunHeader()) throw new BadImageFormatException("The file is not a ReadyToRun image"); @@ -419,7 +413,7 @@ private unsafe void Initialize(MetadataReader metadata) { _assemblyCache.Add(metadata); - DirectoryEntry r2rHeaderDirectory = PEReader.PEHeaders.CorHeader.ManagedNativeHeaderDirectory; + DirectoryEntry r2rHeaderDirectory = CompositeReader.PEHeaders.CorHeader.ManagedNativeHeaderDirectory; _readyToRunHeaderRVA = r2rHeaderDirectory.RelativeVirtualAddress; Debug.Assert(!Composite); } @@ -490,7 +484,7 @@ public IReadOnlyDictionary GetCustomMethodToRuntimeFu private bool TryLocateNativeReadyToRunHeader() { - PEExportTable exportTable = PEReader.GetExportTable(); + PEExportTable exportTable = CompositeReader.GetExportTable(); if (exportTable.TryGetValue("RTR_HEADER", out _readyToRunHeaderRVA)) { _composite = true; @@ -499,7 +493,7 @@ private bool TryLocateNativeReadyToRunHeader() return false; } - private MetadataReader GetSystemModuleMetadataReader() + private IAssemblyMetadata GetSystemModuleMetadataReader() { if (_systemModuleReader == null) { @@ -511,7 +505,7 @@ private MetadataReader GetSystemModuleMetadataReader() return _systemModuleReader; } - public MetadataReader GetGlobalMetadataReader() + public IAssemblyMetadata GetGlobalMetadata() { EnsureHeader(); return (_composite ? null : _assemblyCache[0]); @@ -523,7 +517,7 @@ private unsafe void EnsureHeader() { return; } - uint machine = (uint)PEReader.PEHeaders.CoffHeader.Machine; + uint machine = (uint)CompositeReader.PEHeaders.CoffHeader.Machine; _operatingSystem = OperatingSystem.Unknown; foreach (OperatingSystem os in Enum.GetValues(typeof(OperatingSystem))) { @@ -568,7 +562,7 @@ private unsafe void EnsureHeader() } - _imageBase = PEReader.PEHeaders.PEHeader.ImageBase; + _imageBase = CompositeReader.PEHeaders.PEHeader.ImageBase; // Initialize R2RHeader Debug.Assert(_readyToRunHeaderRVA != 0); @@ -583,11 +577,11 @@ private unsafe void EnsureHeader() private void EnsureDebugInfo() { - if (_runtimeFunctionToDebugInfo != null) + if (_runtimeFunctionIdToDebugOffset != null) { return; } - _runtimeFunctionToDebugInfo = new Dictionary(); + _runtimeFunctionIdToDebugOffset = new Dictionary(); if (!ReadyToRunHeader.Sections.TryGetValue(ReadyToRunSectionType.DebugInfo, out ReadyToRunSection debugInfoSection)) { return; @@ -604,8 +598,7 @@ private void EnsureDebugInfo() continue; } - var debugInfo = new DebugInfo(this, offset); - _runtimeFunctionToDebugInfo.Add((int)i, debugInfo); + _runtimeFunctionIdToDebugOffset.Add((int)i, offset); } } @@ -673,12 +666,12 @@ private unsafe void EnsureExceptionInfo() public bool InputArchitectureSupported() { - return Machine != Machine.ArmThumb2; // CoreDisTools often fails to decode when disassembling ARM images (see https://github.com/dotnet/coreclr/issues/19637) + return Machine != Machine.ArmThumb2; // CoreDisTools often fails to decode when disassembling ARM images (see https://github.com/dotnet/runtime/issues/10959) } // TODO: Fix R2RDump issue where an R2R image cannot be dissassembled with the x86 CoreDisTools // For the short term, we want to error out with a decent message explaining the unexpected error - // Issue https://github.com/dotnet/coreclr/issues/19564 + // Issue https://github.com/dotnet/runtime/issues/10928 public bool DisassemblerArchitectureSupported() { System.Runtime.InteropServices.Architecture val = System.Runtime.InteropServices.RuntimeInformation.ProcessArchitecture; @@ -700,12 +693,12 @@ internal int CalculateRuntimeFunctionSize() /// /// Initialize non-generic R2RMethods with method signatures from MethodDefHandle, and runtime function indices from MethodDefEntryPoints /// - private void ParseMethodDefEntrypoints(Action methodDefSectionReader) + private void ParseMethodDefEntrypoints(Action methodDefSectionReader) { ReadyToRunSection methodEntryPointSection; if (ReadyToRunHeader.Sections.TryGetValue(ReadyToRunSectionType.MethodDefEntryPoints, out methodEntryPointSection)) { - methodDefSectionReader(methodEntryPointSection, GetGlobalMetadataReader()); + methodDefSectionReader(methodEntryPointSection, GetGlobalMetadata()); } else if (ReadyToRunAssemblyHeaders != null) { @@ -724,9 +717,9 @@ private void ParseMethodDefEntrypoints(Action /// are method entrypoints are stored separately for each component assembly of the composite R2R executable. /// /// Method entrypoint section to parse - /// ECMA metadata reader representing this method entrypoint section + /// Assembly metadata reader representing this method entrypoint section /// Set to true for each runtime function index representing a method entrypoint - private void ParseMethodDefEntrypointsSection(ReadyToRunSection section, MetadataReader metadataReader, bool[] isEntryPoint) + private void ParseMethodDefEntrypointsSection(ReadyToRunSection section, IAssemblyMetadata componentReader, bool[] isEntryPoint) { int methodDefEntryPointsOffset = GetOffset(section.RelativeVirtualAddress); NativeArray methodEntryPoints = new NativeArray(Image, (uint)methodDefEntryPointsOffset); @@ -741,7 +734,7 @@ private void ParseMethodDefEntrypointsSection(ReadyToRunSection section, Metadat int runtimeFunctionId; int? fixupOffset; GetRuntimeFunctionIndexFromOffset(offset, out runtimeFunctionId, out fixupOffset); - ReadyToRunMethod method = new ReadyToRunMethod(this, metadataReader, methodHandle, runtimeFunctionId, owningType: null, constrainedType: null, instanceArgs: null, fixupOffset: fixupOffset); + ReadyToRunMethod method = new ReadyToRunMethod(this, componentReader, methodHandle, runtimeFunctionId, owningType: null, constrainedType: null, instanceArgs: null, fixupOffset: fixupOffset); if (method.EntryPointRuntimeFunctionId < 0 || method.EntryPointRuntimeFunctionId >= isEntryPoint.Length) { @@ -765,7 +758,7 @@ private void ParseMethodDefEntrypointsSection(ReadyToRunSection section, Metadat /// Method entrypoint section to parse /// ECMA metadata reader representing this method entrypoint section /// Set to true for each runtime function index representing a method entrypoint - private void ParseMethodDefEntrypointsSectionCustom(IR2RSignatureTypeProvider provider, Dictionary foundMethods, ReadyToRunSection section, MetadataReader metadataReader) + private void ParseMethodDefEntrypointsSectionCustom(IR2RSignatureTypeProvider provider, Dictionary foundMethods, ReadyToRunSection section, IAssemblyMetadata metadataReader) { int methodDefEntryPointsOffset = GetOffset(section.RelativeVirtualAddress); NativeArray methodEntryPoints = new NativeArray(Image, (uint)methodDefEntryPointsOffset); @@ -781,7 +774,7 @@ private void ParseMethodDefEntrypointsSectionCustom(provider, default(TGenericContext), mdReader, this, (int)curParser.Offset); + IAssemblyMetadata mdReader = GetGlobalMetadata(); + var decoder = new R2RSignatureDecoder(provider, default(TGenericContext), mdReader.MetadataReader, this, (int)curParser.Offset); TMethod customMethod = decoder.ParseMethod(); @@ -837,8 +830,8 @@ private void ParseInstanceMethodEntrypoints(bool[] isEntryPoint) NativeParser curParser = allEntriesEnum.GetNext(); while (!curParser.IsNull()) { - MetadataReader mdReader = _composite ? null : _assemblyCache[0]; - SignatureDecoder decoder = new SignatureDecoder(_assemblyResolver, mdReader, this, (int)curParser.Offset); + IAssemblyMetadata mdReader = GetGlobalMetadata(); + SignatureDecoder decoder = new SignatureDecoder(_assemblyResolver, mdReader?.MetadataReader, this, (int)curParser.Offset); string owningType = null; @@ -943,7 +936,7 @@ private void EnsureAvailableTypes() ReadyToRunSection availableTypesSection; if (ReadyToRunHeader.Sections.TryGetValue(ReadyToRunSectionType.AvailableTypes, out availableTypesSection)) { - ParseAvailableTypesSection(availableTypesSection, GetGlobalMetadataReader()); + ParseAvailableTypesSection(availableTypesSection, GetGlobalMetadata()); } else if (_readyToRunAssemblyHeaders != null) { @@ -963,7 +956,7 @@ private void EnsureAvailableTypes() /// as available types are stored separately for each component assembly of the composite R2R executable. /// /// - private void ParseAvailableTypesSection(ReadyToRunSection availableTypesSection, MetadataReader metadataReader) + private void ParseAvailableTypesSection(ReadyToRunSection availableTypesSection, IAssemblyMetadata metadataReader) { int availableTypesOffset = GetOffset(availableTypesSection.RelativeVirtualAddress); NativeParser parser = new NativeParser(Image, (uint)availableTypesOffset); @@ -980,7 +973,7 @@ private void ParseAvailableTypesSection(ReadyToRunSection availableTypesSection, if (isExportedType) { ExportedTypeHandle exportedTypeHandle = MetadataTokens.ExportedTypeHandle((int)rid); - string exportedTypeName = GetExportedTypeFullName(metadataReader, exportedTypeHandle); + string exportedTypeName = GetExportedTypeFullName(metadataReader.MetadataReader, exportedTypeHandle); if (!AvailableTypes.TryGetValue(availableTypesSection, out List sectionTypes)) { sectionTypes = new List(); @@ -991,7 +984,7 @@ private void ParseAvailableTypesSection(ReadyToRunSection availableTypesSection, else { TypeDefinitionHandle typeDefHandle = MetadataTokens.TypeDefinitionHandle((int)rid); - string typeDefName = MetadataNameFormatter.FormatHandle(metadataReader, typeDefHandle); + string typeDefName = MetadataNameFormatter.FormatHandle(metadataReader.MetadataReader, typeDefHandle); if (!AvailableTypes.TryGetValue(availableTypesSection, out List sectionTypes)) { sectionTypes = new List(); @@ -1137,7 +1130,7 @@ private void EnsureImportSections() /// The relative virtual address public int GetOffset(int rva) { - return PEReader.GetOffset(rva); + return CompositeReader.GetOffset(rva); } /// @@ -1196,11 +1189,11 @@ private AssemblyReferenceHandle GetAssemblyAtIndex(int refAsmIndex, out Metadata { Debug.Assert(refAsmIndex != 0); - int assemblyRefCount = (_composite ? 0 : _assemblyCache[0].GetTableRowCount(TableIndex.AssemblyRef) + 1); + int assemblyRefCount = (_composite ? 0 : _assemblyCache[0].MetadataReader.GetTableRowCount(TableIndex.AssemblyRef) + 1); AssemblyReferenceHandle assemblyReferenceHandle; if (refAsmIndex < assemblyRefCount) { - metadataReader = _assemblyCache[0]; + metadataReader = _assemblyCache[0].MetadataReader; assemblyReferenceHandle = MetadataTokens.AssemblyReferenceHandle(refAsmIndex); } else @@ -1223,9 +1216,9 @@ internal string GetReferenceAssemblyName(int refAsmIndex) /// /// Reference assembly index /// MetadataReader instance representing the reference assembly - internal MetadataReader OpenReferenceAssembly(int refAsmIndex) + internal IAssemblyMetadata OpenReferenceAssembly(int refAsmIndex) { - MetadataReader result = (refAsmIndex < _assemblyCache.Count ? _assemblyCache[refAsmIndex] : null); + IAssemblyMetadata result = (refAsmIndex < _assemblyCache.Count ? _assemblyCache[refAsmIndex] : null); if (result == null) { AssemblyReferenceHandle assemblyReferenceHandle = GetAssemblyAtIndex(refAsmIndex, out MetadataReader metadataReader); diff --git a/src/coreclr/src/tools/aot/ILCompiler.Reflection.ReadyToRun/ReadyToRunSignature.cs b/src/coreclr/src/tools/aot/ILCompiler.Reflection.ReadyToRun/ReadyToRunSignature.cs index c8fca51fa4be..9832745925c9 100644 --- a/src/coreclr/src/tools/aot/ILCompiler.Reflection.ReadyToRun/ReadyToRunSignature.cs +++ b/src/coreclr/src/tools/aot/ILCompiler.Reflection.ReadyToRun/ReadyToRunSignature.cs @@ -66,7 +66,7 @@ public static string FormatHandle(MetadataReader metadataReader, Handle handle, public static string FormatSignature(IAssemblyResolver assemblyResolver, ReadyToRunReader r2rReader, int imageOffset, out ReadyToRunSignature result) { - SignatureDecoder decoder = new SignatureDecoder(assemblyResolver, r2rReader.GetGlobalMetadataReader(), r2rReader, imageOffset); + SignatureDecoder decoder = new SignatureDecoder(assemblyResolver, r2rReader.GetGlobalMetadata()?.MetadataReader, r2rReader, imageOffset); string answer = decoder.ReadR2RSignature(out result); return answer; } @@ -697,8 +697,8 @@ public TType ParseType() case CorElementType.ELEMENT_TYPE_MODULE_ZAPSIG: { int moduleIndex = (int)ReadUInt(); - MetadataReader refAsmReader = _contextReader.OpenReferenceAssembly(moduleIndex); - var refAsmDecoder = new R2RSignatureDecoder(_provider, Context, refAsmReader, _image, _offset, _outerReader, _contextReader); + IAssemblyMetadata refAsmReader = _contextReader.OpenReferenceAssembly(moduleIndex); + var refAsmDecoder = new R2RSignatureDecoder(_provider, Context, refAsmReader.MetadataReader, _image, _offset, _outerReader, _contextReader); var result = refAsmDecoder.ParseType(); _offset = refAsmDecoder.Offset; return result; @@ -1065,8 +1065,8 @@ private ReadyToRunSignature ParseSignature(StringBuilder builder) { fixupType &= ~(uint)ReadyToRunFixupKind.ModuleOverride; int moduleIndex = (int)ReadUIntAndEmitInlineSignatureBinary(builder); - MetadataReader refAsmEcmaReader = _contextReader.OpenReferenceAssembly(moduleIndex); - moduleDecoder = new SignatureDecoder(Context.Options, refAsmEcmaReader, _image, Offset, refAsmEcmaReader, _contextReader); + IAssemblyMetadata refAsmEcmaReader = _contextReader.OpenReferenceAssembly(moduleIndex); + moduleDecoder = new SignatureDecoder(Context.Options, refAsmEcmaReader.MetadataReader, _image, Offset, refAsmEcmaReader.MetadataReader, _contextReader); } ReadyToRunSignature result = moduleDecoder.ParseSignature((ReadyToRunFixupKind)fixupType, builder); @@ -1356,7 +1356,7 @@ private void ParseType(StringBuilder builder) builder.Append(base.ParseType()); } - public MetadataReader GetMetadataReaderFromModuleOverride() + public IAssemblyMetadata GetMetadataReaderFromModuleOverride() { if (PeekElementType() == CorElementType.ELEMENT_TYPE_MODULE_ZAPSIG) { @@ -1364,7 +1364,7 @@ public MetadataReader GetMetadataReaderFromModuleOverride() ReadElementType(); int moduleIndex = (int)ReadUInt(); - MetadataReader refAsmReader = _contextReader.OpenReferenceAssembly(moduleIndex); + IAssemblyMetadata refAsmReader = _contextReader.OpenReferenceAssembly(moduleIndex); UpdateOffset(currentOffset); diff --git a/src/coreclr/src/tools/aot/ILCompiler.Reflection.ReadyToRun/StandaloneAssemblyMetadata.cs b/src/coreclr/src/tools/aot/ILCompiler.Reflection.ReadyToRun/StandaloneAssemblyMetadata.cs new file mode 100644 index 000000000000..a5ccea15f3d8 --- /dev/null +++ b/src/coreclr/src/tools/aot/ILCompiler.Reflection.ReadyToRun/StandaloneAssemblyMetadata.cs @@ -0,0 +1,35 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System.Reflection.Metadata; +using System.Reflection.PortableExecutable; + +namespace ILCompiler.Reflection.ReadyToRun +{ + /// + /// Metadata access interface for standalone assemblies represented by MSIL PE files. + /// + public class StandaloneAssemblyMetadata : IAssemblyMetadata + { + /// + /// Reader representing the MSIL assembly file. + /// + private readonly PEReader _peReader; + + /// + /// Metadata reader for the MSIL assembly. We create one upfront to avoid going + /// through the GetMetadataReader() helper and constructing a new instance every time. + /// + private readonly MetadataReader _metadataReader; + + public StandaloneAssemblyMetadata(PEReader peReader) + { + _peReader = peReader; + _metadataReader = _peReader.GetMetadataReader(); + } + + public PEReader ImageReader => _peReader; + + public MetadataReader MetadataReader => _metadataReader; + } +} diff --git a/src/coreclr/src/tools/aot/ILCompiler.TypeSystem.ReadyToRun.Tests/ILTestAssembly/Signature.il b/src/coreclr/src/tools/aot/ILCompiler.TypeSystem.ReadyToRun.Tests/ILTestAssembly/Signature.il index b895be9b24f4..02f33b81f047 100644 --- a/src/coreclr/src/tools/aot/ILCompiler.TypeSystem.ReadyToRun.Tests/ILTestAssembly/Signature.il +++ b/src/coreclr/src/tools/aot/ILCompiler.TypeSystem.ReadyToRun.Tests/ILTestAssembly/Signature.il @@ -24,6 +24,11 @@ { ret } + + .method public hidebysig instance int32 modopt([CoreTestAssembly]System.Void) & Method3(int32 modopt(FooModifier)*, int32 modopt(FooModifier)*) cil managed + { + ret + } } .class private auto ansi beforefieldinit Atom diff --git a/src/coreclr/src/tools/aot/ILCompiler.TypeSystem.ReadyToRun.Tests/SignatureTests.cs b/src/coreclr/src/tools/aot/ILCompiler.TypeSystem.ReadyToRun.Tests/SignatureTests.cs index e6a21525ee52..60d717291196 100644 --- a/src/coreclr/src/tools/aot/ILCompiler.TypeSystem.ReadyToRun.Tests/SignatureTests.cs +++ b/src/coreclr/src/tools/aot/ILCompiler.TypeSystem.ReadyToRun.Tests/SignatureTests.cs @@ -69,6 +69,16 @@ public void TestSignatureMatchesModOptAtStartOfSigAndAfterByRef() Assert.Equal("OptionalCustomModifier0.1.1.1CharOptionalCustomModifier0.1.1.2.1.1VoidOptionalCustomModifier0.1.2.1FooModifier", GetModOptMethodSignatureInfo(methodWithModOptAtStartOfSigAndAfterByRef)); } + [Fact] + public void TestSignatureMatchesModoptOnPointerOrRefModifiedType() + { + MetadataType modOptTester = _testModule.GetType("", "ModOptTester"); + MethodSignature methodWithModOpt = modOptTester.GetMethods().Single(m => string.Equals(m.Name, "Method3")).Signature; + Assert.Equal(MethodSignature.GetIndexOfCustomModifierOnPointedAtTypeByParameterIndex(0), methodWithModOpt.GetEmbeddedSignatureData()[0].index); + Assert.Equal(MethodSignature.GetIndexOfCustomModifierOnPointedAtTypeByParameterIndex(1), methodWithModOpt.GetEmbeddedSignatureData()[1].index); + Assert.Equal(MethodSignature.GetIndexOfCustomModifierOnPointedAtTypeByParameterIndex(2), methodWithModOpt.GetEmbeddedSignatureData()[2].index); + } + [Fact] public void TestSignatureMatches() { diff --git a/src/coreclr/src/tools/aot/crossgen2/Program.cs b/src/coreclr/src/tools/aot/crossgen2/Program.cs index be483cbda481..a4b8897b84a0 100644 --- a/src/coreclr/src/tools/aot/crossgen2/Program.cs +++ b/src/coreclr/src/tools/aot/crossgen2/Program.cs @@ -291,7 +291,36 @@ private int Run() SharedGenericsMode genericsMode = SharedGenericsMode.CanonicalReferenceTypes; var targetDetails = new TargetDetails(_targetArchitecture, _targetOS, TargetAbi.CoreRT, instructionSetSupport.GetVectorTSimdVector()); - _typeSystemContext = new ReadyToRunCompilerContext(targetDetails, genericsMode); + + bool versionBubbleIncludesCoreLib = false; + if (_commandLineOptions.InputBubble) + { + versionBubbleIncludesCoreLib = true; + } + else + { + foreach (var inputFile in _inputFilePaths) + { + if (String.Compare(inputFile.Key, "System.Private.CoreLib", StringComparison.OrdinalIgnoreCase) == 0) + { + versionBubbleIncludesCoreLib = true; + break; + } + } + if (!versionBubbleIncludesCoreLib) + { + foreach (var inputFile in _unrootedInputFilePaths) + { + if (String.Compare(inputFile.Key, "System.Private.CoreLib", StringComparison.OrdinalIgnoreCase) == 0) + { + versionBubbleIncludesCoreLib = true; + break; + } + } + } + } + + _typeSystemContext = new ReadyToRunCompilerContext(targetDetails, genericsMode, versionBubbleIncludesCoreLib); string compositeRootPath = _commandLineOptions.CompositeRootPath?.FullName; diff --git a/src/coreclr/src/tools/aot/jitinterface/jitwrapper.cpp b/src/coreclr/src/tools/aot/jitinterface/jitwrapper.cpp index c9530e40a056..1a1ef6ff17bb 100644 --- a/src/coreclr/src/tools/aot/jitinterface/jitwrapper.cpp +++ b/src/coreclr/src/tools/aot/jitinterface/jitwrapper.cpp @@ -26,11 +26,11 @@ class CORJIT_FLAGS uint64_t corJitFlags; }; -static const GUID JITEEVersionIdentifier = { /* 164b4e4f-21f6-4d05-b560-3728395404f2 */ - 0x164b4e4f, - 0x21f6, - 0x4d05, - { 0xb5, 0x60, 0x37, 0x28, 0x39, 0x54, 0x04, 0xf2 } +static const GUID JITEEVersionIdentifier = { /* a5eec3a4-4176-43a7-8c2b-a05b551d4f49 */ + 0xa5eec3a4, + 0x4176, + 0x43a7, + {0x8c, 0x2b, 0xa0, 0x5b, 0x55, 0x1d, 0x4f, 0x49} }; class Jit diff --git a/src/coreclr/src/tools/crossgen/CMakeLists.txt b/src/coreclr/src/tools/crossgen/CMakeLists.txt index bf37a9cc75e5..8d861b68df52 100644 --- a/src/coreclr/src/tools/crossgen/CMakeLists.txt +++ b/src/coreclr/src/tools/crossgen/CMakeLists.txt @@ -50,7 +50,7 @@ target_link_libraries(crossgen ${CLRJIT_CROSSGEN} gcinfo_crossgen corzap_crossgen - mscorlib_crossgen + corelib_crossgen utilcode_crossgen ) diff --git a/src/coreclr/src/tools/crossgen/crossgen.cpp b/src/coreclr/src/tools/crossgen/crossgen.cpp index 41da650119d8..803ea5aff6e7 100644 --- a/src/coreclr/src/tools/crossgen/crossgen.cpp +++ b/src/coreclr/src/tools/crossgen/crossgen.cpp @@ -35,7 +35,7 @@ enum ReturnValues STDAPI CreatePDBWorker(LPCWSTR pwzAssemblyPath, LPCWSTR pwzPlatformAssembliesPaths, LPCWSTR pwzTrustedPlatformAssemblies, LPCWSTR pwzPlatformResourceRoots, LPCWSTR pwzAppPaths, LPCWSTR pwzAppNiPaths, LPCWSTR pwzPdbPath, BOOL fGeneratePDBLinesInfo, LPCWSTR pwzManagedPdbSearchPath, LPCWSTR pwzDiasymreaderPath); STDAPI NGenWorker(LPCWSTR pwzFilename, DWORD dwFlags, LPCWSTR pwzPlatformAssembliesPaths, LPCWSTR pwzTrustedPlatformAssemblies, LPCWSTR pwzPlatformResourceRoots, LPCWSTR pwzAppPaths, LPCWSTR pwzOutputFilename=NULL, SIZE_T customBaseAddress=0, ICorSvcLogger *pLogger = NULL, LPCWSTR pwszCLRJITPath = nullptr); void SetSvcLogger(ICorSvcLogger *pCorSvcLogger); -void SetMscorlibPath(LPCWSTR wzSystemDirectory); +void SetCoreLibPath(LPCWSTR wzSystemDirectory); /* --------------------------------------------------------------------------- * * Console stuff @@ -257,7 +257,7 @@ bool StringEndsWith(LPCWSTR pwzString, LPCWSTR pwzCandidate) // When using the Phone binding model (TrustedPlatformAssemblies), automatically // detect which path CoreLib.[ni.]dll lies in. // -bool ComputeMscorlibPathFromTrustedPlatformAssemblies(SString& pwzMscorlibPath, LPCWSTR pwzTrustedPlatformAssemblies) +bool ComputeCoreLibPathFromTrustedPlatformAssemblies(SString& pwzCoreLibPath, LPCWSTR pwzTrustedPlatformAssemblies) { LPWSTR wszTrustedPathCopy = new WCHAR[wcslen(pwzTrustedPlatformAssemblies) + 1]; wcscpy_s(wszTrustedPathCopy, wcslen(pwzTrustedPlatformAssemblies) + 1, pwzTrustedPlatformAssemblies); @@ -277,11 +277,11 @@ bool ComputeMscorlibPathFromTrustedPlatformAssemblies(SString& pwzMscorlibPath, if (StringEndsWith(wszSingleTrustedPath, DIRECTORY_SEPARATOR_STR_W CoreLibName_IL_W) || StringEndsWith(wszSingleTrustedPath, DIRECTORY_SEPARATOR_STR_W CoreLibName_NI_W)) { - pwzMscorlibPath.Set(wszSingleTrustedPath); - SString::Iterator pwzSeparator = pwzMscorlibPath.End(); + pwzCoreLibPath.Set(wszSingleTrustedPath); + SString::Iterator pwzSeparator = pwzCoreLibPath.End(); bool retval = true; - if (!SUCCEEDED(CopySystemDirectory(pwzMscorlibPath, pwzMscorlibPath))) + if (!SUCCEEDED(CopySystemDirectory(pwzCoreLibPath, pwzCoreLibPath))) { retval = false; } @@ -318,7 +318,7 @@ void PopulateTPAList(SString path, LPCWSTR pwszMask, SString &refTPAList, bool f // No NIs are supported when creating NI images (other than NI of System.Private.CoreLib.dll). if (!fCreatePDB) { - // Only CoreLib's ni.dll should be in the TPAList for the compilation of non-mscorlib assemblies. + // Only CoreLib's ni.dll should be in the TPAList for the compilation of non-CoreLib assemblies. if (StringEndsWith((LPWSTR)pwszFilename, W(".ni.dll"))) { fAddFileToTPAList = false; @@ -837,10 +837,10 @@ int _cdecl wmain(int argc, __in_ecount(argc) WCHAR **argv) if (pwzTrustedPlatformAssemblies != nullptr) { - if (ComputeMscorlibPathFromTrustedPlatformAssemblies(wzTrustedPathRoot, pwzTrustedPlatformAssemblies)) + if (ComputeCoreLibPathFromTrustedPlatformAssemblies(wzTrustedPathRoot, pwzTrustedPlatformAssemblies)) { pwzPlatformAssembliesPaths = wzTrustedPathRoot.GetUnicode(); - SetMscorlibPath(pwzPlatformAssembliesPaths); + SetCoreLibPath(pwzPlatformAssembliesPaths); } } diff --git a/src/coreclr/src/tools/dotnet-pgo/README.md b/src/coreclr/src/tools/dotnet-pgo/README.md index 60b7101ad0dd..a377bf1e4e23 100644 --- a/src/coreclr/src/tools/dotnet-pgo/README.md +++ b/src/coreclr/src/tools/dotnet-pgo/README.md @@ -32,7 +32,7 @@ Note, this tool requires MethodDetails events which are produced by the .NET 5.0 ``` "dotnet trace collect -p 73060 --providers Microsoft-Windows-DotNETRuntime:0x6000080018:5" ``` - + - Capture events from process 73060 where we capture only JIT events using EventPipe tracing ``` "dotnet trace collect -p 73060 --providers Microsoft-Windows-DotNETRuntime:0x4000080018:5" diff --git a/src/coreclr/src/tools/dotnet-pgo/TraceTypeSystemContext.cs b/src/coreclr/src/tools/dotnet-pgo/TraceTypeSystemContext.cs index 7371f92600c8..84927fed5857 100644 --- a/src/coreclr/src/tools/dotnet-pgo/TraceTypeSystemContext.cs +++ b/src/coreclr/src/tools/dotnet-pgo/TraceTypeSystemContext.cs @@ -360,14 +360,16 @@ public MetadataStringDecoder GetMetadataStringDecoder() return _metadataStringDecoder; } - MetadataReader IAssemblyResolver.FindAssembly(MetadataReader metadataReader, AssemblyReferenceHandle assemblyReferenceHandle, string parentFile) + IAssemblyMetadata IAssemblyResolver.FindAssembly(MetadataReader metadataReader, AssemblyReferenceHandle assemblyReferenceHandle, string parentFile) { - return ((EcmaAssembly)this.GetModuleForSimpleName(metadataReader.GetString(metadataReader.GetAssemblyReference(assemblyReferenceHandle).Name), false)).MetadataReader; + EcmaAssembly ecmaAssembly = (EcmaAssembly)this.GetModuleForSimpleName(metadataReader.GetString(metadataReader.GetAssemblyReference(assemblyReferenceHandle).Name), false); + return new StandaloneAssemblyMetadata(ecmaAssembly.PEReader); } - MetadataReader IAssemblyResolver.FindAssembly(string simpleName, string parentFile) + IAssemblyMetadata IAssemblyResolver.FindAssembly(string simpleName, string parentFile) { - return ((EcmaAssembly)this.GetModuleForSimpleName(simpleName, false)).MetadataReader; + EcmaAssembly ecmaAssembly = (EcmaAssembly)this.GetModuleForSimpleName(simpleName, false); + return new StandaloneAssemblyMetadata(ecmaAssembly.PEReader); } bool IAssemblyResolver.Naked => false; diff --git a/src/coreclr/src/tools/dotnet-pgo/dotnet-pgo-experiment.md b/src/coreclr/src/tools/dotnet-pgo/dotnet-pgo-experiment.md index c47d9d227b40..14f9c63bbe1c 100644 --- a/src/coreclr/src/tools/dotnet-pgo/dotnet-pgo-experiment.md +++ b/src/coreclr/src/tools/dotnet-pgo/dotnet-pgo-experiment.md @@ -1,7 +1,7 @@ # Experiments towards a Profile Data pipeline for .NET ----- -The .NET Runtime has a long history of providing instrumentation based profile guided optimization -for use internally at Microsoft, and for scenarios involving extremely high value customers. To +The .NET Runtime has a long history of providing instrumentation based profile guided optimization +for use internally at Microsoft, and for scenarios involving extremely high value customers. To this end the team built the IBC (instrumented block count) infrastructure into the runtime/ngen, and IBCMerge as a tool for manipulating .ibc files. Over the last few years, the structure of these technologies and tools has shown that they are not ideal for customer use or even internal use, and @@ -33,19 +33,19 @@ Profile guided optimization in .NET is used to provide benefits for 3 major conc Startup time for an application is primarily improved by avoiding the use of the JIT by ahead of time compiling methods in the application. In addition a profile can allow determination of which methods are hot vs cold, and group methods commonly used together with others. This has been the primary use -of pgo in .NET historically. +of pgo in .NET historically. Pgo is used to address size on disk concerns of R2R binaries where the default R2R strategy is too aggressive and produces binaries that are excessively large. The idea in that case is to only generate the functions specifically referenced in some profile instead of every method the heuristic indicates may be interesting. -Application throughput performance has historically been the primary use of pgo data for C++ compilers. +Application throughput performance has historically been the primary use of pgo data for C++ compilers. .NET has history with the use of instrumented per block counts, but this data is not generally processed in an effective manner by the JIT. This proposal aims to revitalize efforts to make good use of profile guided data to improve code quality. Over time, it is expected that not only will profile data be used at build time, but that it will also be used to do runtime profile instrumentation. - + # Proposal Contents Profile guided optimization is a combination of effort across a swath of components. @@ -59,7 +59,7 @@ And there are a series of components that need to be modified 2. Instrumenting jit (clrjit) 3. Trace processing tool (dotnet-pgo) 4. AOT compilation tool (crossgen2) -6. Consuming runtime (coreclr) +6. Consuming runtime (coreclr) 7. Diagnostic tools (r2rdump, dotnet-pgo) ## Conceptual model of `InstrumentationData` @@ -68,7 +68,7 @@ statically, and instead is determined through instrumentation of the code. The f is expected to be defined by the JIT team, and be specific to the probes inserted, and may very well change over time. It is composed of two sections -1. The descriptor used to describe the probes, this is fixed at JIT time, and describes the meaning of the data. +1. The descriptor used to describe the probes, this is fixed at JIT time, and describes the meaning of the data. 2. The data gathered as counts, and values that will be used to perform further optimization. Both of these data blocks are able to contain type and method data, where the concept is that it is @@ -78,7 +78,7 @@ but there are also plausible cases for gathering each kind of data in both secti be made general to support both. Instrumentation Data shall have a version number independent of the general R2R versioning scheme. The intention is for this form of `InstrumentationData` to become useable for both out of line instrumentation as described in this document, as well as only tiered -compilation rejit scenarios with in process profiling. +compilation rejit scenarios with in process profiling. ## Trace data format Runtime instrumentation will be accomplished through 4 events, 2 of which are already existing @@ -149,7 +149,7 @@ Profile data shall be encoded into the R2R FileFormat in a new section named `RE This section shall hold a version number, and a single `NativeHashtable` that contains a mapping from type/method to the pair of Desc and Data. TODO define how Desc and Data are encoded. The intention is to store exactly the same data as is stored in the PGO data file, except that the instrumentation data version must be the same for -all data chunks. +all data chunks. ## Instrumenting Runtime The runtime shall be responsible for choosing when to execute instrumentation, allocating the tracing buffers @@ -197,7 +197,7 @@ data that may be embedded into the R2R file format for possible consumption by t ## Trace processing tool The trace processing tool is responsible for reading the trace files as produced by perfview/dotnet trace, and producing .MIBC files. The process should be a straightforward format translation for instrumentation data. The -`FunctionTouchOrder` and existence of the method shall be based on the `JitStarted` and `R2EEntryPoint` events. +`FunctionTouchOrder` and existence of the method shall be based on the `JitStarted` and `R2EEntryPoint` events. ## AOT Compilation tool AOT compilation shall use the profile guided data in several ways. @@ -210,7 +210,7 @@ data for the method being compiled, and for both the uninstantiated method and i as are present. The jit is responsible for merging these multiple data sources. In addition the JIT may optionally choose to generate a profile guided data block for association with the precompiled -code for use in re-jit scenarios, and information about related method code layout for the code, and optionally a +code for use in re-jit scenarios, and information about related method code layout for the code, and optionally a portion of the function body which is to be placed into a cold code section. The intention here it to allow some algorithm such as Pettis-Hansen or a more modern variant (eg https://research.fb.com/wp-content/uploads/2017/01/cgo2017-hfsort-final1.pdf) to be used to optimize code layout. @@ -219,7 +219,7 @@ to be used to optimize code layout. If present in an R2R file, when a method is rejitted, the runtime shall provide a means for the jit to see instrumentation data from either previous compiles in process, and/or from the R2R file. This shall provide a means for the JIT to choose whether or not the method should be recompiled, or possibly to inform it about optimization opportunities that are -too expensive to compute at jit time, but could be computed by the AOT compiler, or other such ideas. +too expensive to compute at jit time, but could be computed by the AOT compiler, or other such ideas. As a means of doing this, options such as the following will be given to the jit to provide custom behavior. 1. Ignore the profile data and rejit. @@ -235,4 +235,4 @@ would be to use this as a means for adaptive or speculative optimization. The tools r2rdump and dotnet-pgo shall provide a means for dumping their inputs. For most forms of data this is fairly straightforward, but for `InstrumentationData`, there shall be a common dump tool written in managed code that can provide a human readable dump of the data. r2rdump, dotnet-pgo, and possibly sos will all be able to share -this codebase for examination of the data structures in r2r files, traces, and runtime environments respectively. +this codebase for examination of the data structures in r2r files, traces, and runtime environments respectively. diff --git a/src/coreclr/src/tools/r2rdump/Extensions.cs b/src/coreclr/src/tools/r2rdump/Extensions.cs index f8ca578f3325..2c9bd7085bb5 100644 --- a/src/coreclr/src/tools/r2rdump/Extensions.cs +++ b/src/coreclr/src/tools/r2rdump/Extensions.cs @@ -8,6 +8,7 @@ using System.Reflection.Metadata.Ecma335; using ILCompiler.Reflection.ReadyToRun; +using ILCompiler.Reflection.ReadyToRun.Amd64; using Internal.Runtime; namespace R2RDump @@ -138,8 +139,8 @@ public static void WriteTo(this ReadyToRunMethod theThis, TextWriter writer, Dum { writer.WriteLine(theThis.SignatureString); - writer.WriteLine($"Handle: 0x{MetadataTokens.GetToken(theThis.MetadataReader, theThis.MethodHandle):X8}"); - writer.WriteLine($"Rid: {MetadataTokens.GetRowNumber(theThis.MetadataReader, theThis.MethodHandle)}"); + writer.WriteLine($"Handle: 0x{MetadataTokens.GetToken(theThis.ComponentReader.MetadataReader, theThis.MethodHandle):X8}"); + writer.WriteLine($"Rid: {MetadataTokens.GetRowNumber(theThis.ComponentReader.MetadataReader, theThis.MethodHandle)}"); if (!options.Naked) { writer.WriteLine($"EntryPointRuntimeFunctionId: {theThis.EntryPointRuntimeFunctionId}"); @@ -215,15 +216,16 @@ public static void WriteTo(this RuntimeFunction theThis, TextWriter writer, Dump writer.WriteLine($"PersonalityRVA: 0x{amd64UnwindInfo.PersonalityRoutineRVA:X4}"); } - for (int unwindCodeIndex = 0; unwindCodeIndex < amd64UnwindInfo.CountOfUnwindCodes; unwindCodeIndex++) + for (int uwcIndex = 0; uwcIndex < amd64UnwindInfo.UnwindCodes.Count; uwcIndex++) { - ILCompiler.Reflection.ReadyToRun.Amd64.UnwindCode unwindCode = amd64UnwindInfo.UnwindCodeArray[unwindCodeIndex]; - writer.Write($"UnwindCode[{unwindCode.Index}]: "); + UnwindCode unwindCode = amd64UnwindInfo.UnwindCodes[uwcIndex]; + writer.Write($"UnwindCode[{uwcIndex}]: "); writer.Write($"CodeOffset 0x{unwindCode.CodeOffset:X4} "); writer.Write($"FrameOffset 0x{unwindCode.FrameOffset:X4} "); writer.Write($"NextOffset 0x{unwindCode.NextFrameOffset} "); writer.Write($"Op {unwindCode.OpInfoStr}"); writer.WriteLine(); + uwcIndex++; } } writer.WriteLine(); diff --git a/src/coreclr/src/tools/r2rdump/R2RDiff.cs b/src/coreclr/src/tools/r2rdump/R2RDiff.cs index 4f3fd9fe11a8..7e19653febcd 100644 --- a/src/coreclr/src/tools/r2rdump/R2RDiff.cs +++ b/src/coreclr/src/tools/r2rdump/R2RDiff.cs @@ -255,7 +255,7 @@ private Dictionary GetPESectionMap(ReadyToRunReader reader) { Dictionary sectionMap = new Dictionary(); - foreach (SectionHeader sectionHeader in reader.PEReader.PEHeaders.SectionHeaders) + foreach (SectionHeader sectionHeader in reader.CompositeReader.PEHeaders.SectionHeaders) { sectionMap.Add(sectionHeader.Name, sectionHeader.SizeOfRawData); } diff --git a/src/coreclr/src/tools/r2rdump/R2RDump.cs b/src/coreclr/src/tools/r2rdump/R2RDump.cs index 62f26189d243..0cfc36d4a79a 100644 --- a/src/coreclr/src/tools/r2rdump/R2RDump.cs +++ b/src/coreclr/src/tools/r2rdump/R2RDump.cs @@ -70,7 +70,7 @@ public class DumpOptions : IAssemblyResolver /// Name of assembly from which we're performing the lookup /// - public MetadataReader FindAssembly(MetadataReader metadataReader, AssemblyReferenceHandle assemblyReferenceHandle, string parentFile) + public IAssemblyMetadata FindAssembly(MetadataReader metadataReader, AssemblyReferenceHandle assemblyReferenceHandle, string parentFile) { string simpleName = metadataReader.GetString(metadataReader.GetAssemblyReference(assemblyReferenceHandle).Name); return FindAssembly(simpleName, parentFile); @@ -83,7 +83,7 @@ public MetadataReader FindAssembly(MetadataReader metadataReader, AssemblyRefere /// Simple name of the assembly to look up /// Name of assembly from which we're performing the lookup /// - public MetadataReader FindAssembly(string simpleName, string parentFile) + public IAssemblyMetadata FindAssembly(string simpleName, string parentFile) { foreach (FileInfo refAsm in Reference ?? Enumerable.Empty()) { @@ -117,7 +117,7 @@ public MetadataReader FindAssembly(string simpleName, string parentFile) return null; } - private static unsafe MetadataReader Open(string filename) + private static unsafe IAssemblyMetadata Open(string filename) { byte[] image = File.ReadAllBytes(filename); @@ -128,7 +128,7 @@ private static unsafe MetadataReader Open(string filename) throw new BadImageFormatException($"ECMA metadata not found in file '{filename}'"); } - return peReader.GetMetadataReader(); + return new StandaloneAssemblyMetadata(peReader); } } @@ -395,7 +395,7 @@ public void Dump(ReadyToRunReader r2r) pdbWriter.WritePDBData(r2r.Filename, ProducePdbWriterMethods(r2r)); } - if (!_options.Header && standardDump) + if (standardDump) { _dumper.DumpAllMethods(); } @@ -411,8 +411,8 @@ IEnumerable ProducePdbWriterMethods(ReadyToRunReader r2r) MethodInfo mi = new MethodInfo(); mi.Name = method.SignatureString; mi.HotRVA = (uint)method.RuntimeFunctions[0].StartAddress; - mi.MethodToken = (uint)MetadataTokens.GetToken(method.MetadataReader, method.MethodHandle); - mi.AssemblyName = method.MetadataReader.GetString(method.MetadataReader.GetAssemblyDefinition().Name); + mi.MethodToken = (uint)MetadataTokens.GetToken(method.ComponentReader.MetadataReader, method.MethodHandle); + mi.AssemblyName = method.ComponentReader.MetadataReader.GetString(method.ComponentReader.MetadataReader.GetAssemblyDefinition().Name); mi.ColdRVA = 0; yield return mi; @@ -428,7 +428,7 @@ private bool Match(ReadyToRunMethod method, string query, bool exact) { int id; bool isNum = ArgStringToInt(query, out id); - bool idMatch = isNum && (method.Rid == id || MetadataTokens.GetRowNumber(method.MetadataReader, method.MethodHandle) == id); + bool idMatch = isNum && (method.Rid == id || MetadataTokens.GetRowNumber(method.ComponentReader.MetadataReader, method.MethodHandle) == id); bool sigMatch = false; if (exact) diff --git a/src/coreclr/src/tools/r2rdump/README.md b/src/coreclr/src/tools/r2rdump/README.md index 364cbc3ad581..c515c43ae805 100644 --- a/src/coreclr/src/tools/r2rdump/README.md +++ b/src/coreclr/src/tools/r2rdump/README.md @@ -130,12 +130,12 @@ In x64/Arm/Arm64, GcTransitions are grouped into chunks where each chunk covers ## Todo -* Support R2RDump on ARM and ARM64 (https://github.com/dotnet/coreclr/issues/19089) +* Support R2RDump on ARM and ARM64 (https://github.com/dotnet/runtime/issues/10753) -* Parse R2RSections: READYTORUN_SECTION_EXCEPTION_INFO, READYTORUN_SECTION_DEBUG_INFO, READYTORUN_SECTION_DELAYLOAD_METHODCALL_THUNKS, READYTORUN_SECTION_INLINING_INFO, READYTORUN_SECTION_PROFILEDATA_INFO (https://github.com/dotnet/coreclr/issues/19616) +* Parse R2RSections: READYTORUN_SECTION_EXCEPTION_INFO, READYTORUN_SECTION_DEBUG_INFO, READYTORUN_SECTION_DELAYLOAD_METHODCALL_THUNKS, READYTORUN_SECTION_INLINING_INFO, READYTORUN_SECTION_PROFILEDATA_INFO (https://github.com/dotnet/runtime/issues/10948) * Reenable R2RDumpTests after making it less fragile -* Fix issues with disasm on Arm (https://github.com/dotnet/coreclr/issues/19637) and disasm using x86 coredistools (https://github.com/dotnet/coreclr/issues/19564) +* Fix issues with disasm on Arm (https://github.com/dotnet/runtime/issues/10959) and disasm using x86 coredistools (https://github.com/dotnet/runtime/issues/10928) -* Test R2RDump on more test cases to make sure it runs reliably and verify that the output is accurate (list of failing inputs: https://github.com/dotnet/coreclr/issues/19642) +* Test R2RDump on more test cases to make sure it runs reliably and verify that the output is accurate (list of failing inputs: https://github.com/dotnet/runtime/issues/10961) diff --git a/src/coreclr/src/tools/r2rdump/TextDumper.cs b/src/coreclr/src/tools/r2rdump/TextDumper.cs index b3bf91845f38..3232ab3f3442 100644 --- a/src/coreclr/src/tools/r2rdump/TextDumper.cs +++ b/src/coreclr/src/tools/r2rdump/TextDumper.cs @@ -215,18 +215,15 @@ internal override void DumpDisasm(RuntimeFunction rtf, int imageOffset) string instr; int instrSize = _disassembler.GetInstruction(rtf, imageOffset, rtfOffset, out instr); - if (_r2r.Machine == Machine.Amd64 && ((ILCompiler.Reflection.ReadyToRun.Amd64.UnwindInfo)rtf.UnwindInfo).UnwindCodes.ContainsKey(codeOffset)) + if (_r2r.Machine == Machine.Amd64 && ((ILCompiler.Reflection.ReadyToRun.Amd64.UnwindInfo)rtf.UnwindInfo).CodeOffsetToUnwindCodeIndex.TryGetValue(codeOffset, out int unwindCodeIndex)) { - List codes = ((ILCompiler.Reflection.ReadyToRun.Amd64.UnwindInfo)rtf.UnwindInfo).UnwindCodes[codeOffset]; - foreach (ILCompiler.Reflection.ReadyToRun.Amd64.UnwindCode code in codes) + ILCompiler.Reflection.ReadyToRun.Amd64.UnwindCode code = ((ILCompiler.Reflection.ReadyToRun.Amd64.UnwindInfo)rtf.UnwindInfo).UnwindCodes[unwindCodeIndex]; + _writer.Write($"{indentString}{code.UnwindOp} {code.OpInfoStr}"); + if (code.NextFrameOffset != -1) { - _writer.Write($"{indentString}{code.UnwindOp} {code.OpInfoStr}"); - if (code.NextFrameOffset != -1) - { - _writer.WriteLine($"{indentString}{code.NextFrameOffset}"); - } - _writer.WriteLine(); + _writer.WriteLine($"{indentString}{code.NextFrameOffset}"); } + _writer.WriteLine(); } if (!_options.HideTransitions && rtf.Method.GcInfo?.Transitions != null && rtf.Method.GcInfo.Transitions.TryGetValue(codeOffset, out List transitionsForOffset)) @@ -348,6 +345,8 @@ internal override void DumpSectionContents(ReadyToRunSection section) int rtfOffset = _r2r.GetOffset(section.RelativeVirtualAddress); int rtfEndOffset = rtfOffset + section.Size; int rtfIndex = 0; + _writer.WriteLine(" Index | StartRVA | EndRVA | UnwindRVA"); + _writer.WriteLine("-----------------------------------------"); while (rtfOffset < rtfEndOffset) { int startRva = NativeReader.ReadInt32(_r2r.Image, ref rtfOffset); @@ -357,11 +356,8 @@ internal override void DumpSectionContents(ReadyToRunSection section) endRva = NativeReader.ReadInt32(_r2r.Image, ref rtfOffset); } int unwindRva = NativeReader.ReadInt32(_r2r.Image, ref rtfOffset); - _writer.WriteLine($"Index: {rtfIndex}"); - _writer.WriteLine($" StartRva: 0x{startRva:X8}"); - if (endRva != -1) - _writer.WriteLine($" EndRva: 0x{endRva:X8}"); - _writer.WriteLine($" UnwindRva: 0x{unwindRva:X8}"); + string endRvaText = (endRva != -1 ? endRva.ToString("x8") : " "); + _writer.WriteLine($"{rtfIndex,7} | {startRva:X8} | {endRvaText} | {unwindRva:X8}"); rtfIndex++; } break; @@ -409,7 +405,7 @@ internal override void DumpSectionContents(ReadyToRunSection section) int assemblyRefCount = 0; if (!_r2r.Composite) { - MetadataReader globalReader = _r2r.GetGlobalMetadataReader(); + MetadataReader globalReader = _r2r.GetGlobalMetadata().MetadataReader; assemblyRefCount = globalReader.GetTableRowCount(TableIndex.AssemblyRef) + 1; _writer.WriteLine($"MSIL AssemblyRef's ({assemblyRefCount} entries):"); for (int assemblyRefIndex = 1; assemblyRefIndex < assemblyRefCount; assemblyRefIndex++) diff --git a/src/coreclr/src/unwinder/CMakeLists.txt b/src/coreclr/src/unwinder/CMakeLists.txt index 98cdb0c0b7a8..e9af2458140d 100644 --- a/src/coreclr/src/unwinder/CMakeLists.txt +++ b/src/coreclr/src/unwinder/CMakeLists.txt @@ -20,8 +20,10 @@ list(APPEND UNWINDER_SOURCES convert_to_absolute_path(UNWINDER_SOURCES ${UNWINDER_SOURCES}) if(CLR_CMAKE_HOST_UNIX) - add_library_clr(unwinder_wks OBJECT ${UNWINDER_SOURCES}) - add_dependencies(unwinder_wks eventing_headers) + add_library_clr(unwinder_wks_obj OBJECT ${UNWINDER_SOURCES}) + add_dependencies(unwinder_wks_obj eventing_headers) + add_library(unwinder_wks INTERFACE) + target_sources(unwinder_wks INTERFACE $) endif(CLR_CMAKE_HOST_UNIX) add_library_clr(unwinder_dac ${UNWINDER_SOURCES}) diff --git a/src/coreclr/src/utilcode/CMakeLists.txt b/src/coreclr/src/utilcode/CMakeLists.txt index 4c1ce806b651..9eeac28ae4fb 100644 --- a/src/coreclr/src/utilcode/CMakeLists.txt +++ b/src/coreclr/src/utilcode/CMakeLists.txt @@ -26,7 +26,6 @@ set(UTILCODE_COMMON_SOURCES arraylist.cpp bitvector.cpp comex.cpp - delayloadhelpers.cpp guidfromname.cpp memorypool.cpp iallocator.cpp @@ -97,7 +96,9 @@ convert_to_absolute_path(UTILCODE_CROSSGEN_SOURCES ${UTILCODE_CROSSGEN_SOURCES}) convert_to_absolute_path(UTILCODE_STATICNOHOST_SOURCES ${UTILCODE_STATICNOHOST_SOURCES}) add_library_clr(utilcode_dac STATIC ${UTILCODE_DAC_SOURCES}) -add_library_clr(utilcode OBJECT ${UTILCODE_SOURCES}) +add_library_clr(utilcode_obj OBJECT ${UTILCODE_SOURCES}) +add_library(utilcode INTERFACE) +target_sources(utilcode INTERFACE $) add_library_clr(utilcodestaticnohost STATIC ${UTILCODE_STATICNOHOST_SOURCES}) add_library_clr(utilcode_crossgen STATIC ${UTILCODE_CROSSGEN_SOURCES}) @@ -105,9 +106,9 @@ if(CLR_CMAKE_HOST_UNIX) target_link_libraries(utilcodestaticnohost nativeresourcestring) target_link_libraries(utilcode_crossgen nativeresourcestring) target_link_libraries(utilcode_dac nativeresourcestring) - target_link_libraries(utilcode nativeresourcestring) + target_link_libraries(utilcode INTERFACE nativeresourcestring) add_dependencies(utilcode_dac coreclrpal) - add_dependencies(utilcode coreclrpal) + add_dependencies(utilcode_obj coreclrpal) endif(CLR_CMAKE_HOST_UNIX) @@ -120,10 +121,10 @@ set_target_properties(utilcode_crossgen PROPERTIES CROSSGEN_COMPONENT TRUE) target_compile_definitions(utilcode_dac PRIVATE SELF_NO_HOST) target_compile_definitions(utilcodestaticnohost PRIVATE SELF_NO_HOST) add_dependencies(utilcode_dac ${UTILCODE_DEPENDENCIES}) -add_dependencies(utilcode ${UTILCODE_DEPENDENCIES}) +add_dependencies(utilcode_obj ${UTILCODE_DEPENDENCIES}) add_dependencies(utilcode_crossgen ${UTILCODE_DEPENDENCIES}) add_dependencies(utilcodestaticnohost ${UTILCODE_DEPENDENCIES}) target_precompile_header(TARGET utilcode_dac HEADER stdafx.h) -target_precompile_header(TARGET utilcode HEADER stdafx.h) +target_precompile_header(TARGET utilcode_obj HEADER stdafx.h) target_precompile_header(TARGET utilcode_crossgen HEADER stdafx.h) target_precompile_header(TARGET utilcodestaticnohost HEADER stdafx.h) diff --git a/src/coreclr/src/utilcode/debug.cpp b/src/coreclr/src/utilcode/debug.cpp index d6c2c4a4b1ad..b4f44dc989d6 100644 --- a/src/coreclr/src/utilcode/debug.cpp +++ b/src/coreclr/src/utilcode/debug.cpp @@ -23,7 +23,7 @@ extern "C" _CRTIMP int __cdecl _flushall(void); #ifdef HOST_WINDOWS -void CreateCrashDumpIfEnabled(); +void CreateCrashDumpIfEnabled(bool stackoverflow = false); #endif // Global state counter to implement SUPPRESS_ALLOCATION_ASSERTS_IN_THIS_SCOPE. diff --git a/src/coreclr/src/utilcode/delayloadhelpers.cpp b/src/coreclr/src/utilcode/delayloadhelpers.cpp deleted file mode 100644 index 90936ccbcfc0..000000000000 --- a/src/coreclr/src/utilcode/delayloadhelpers.cpp +++ /dev/null @@ -1,113 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. -// - -// -// Contains convenience functionality for lazily loading modules -// and getting entrypoints within them. -// - -#include "stdafx.h" - -#include "crtwrap.h" -#include "winwrap.h" -#include "utilcode.h" -#include "clrhost.h" -#include "holder.h" -#include "delayloadhelpers.h" - -namespace DelayLoad -{ - //================================================================================================================= - // Used to synchronize initialization. Is not used when initialization has already taken place. - - static CRITSEC_COOKIE g_pLock = nullptr; - - //================================================================================================================= - // Creates and initializes g_pLock when first used. - - static HRESULT InitializeLock() - { - STATIC_CONTRACT_LIMITED_METHOD; - HRESULT hr = S_OK; - - CRITSEC_COOKIE pLock = ClrCreateCriticalSection(CrstLeafLock, CRST_REENTRANCY); - IfNullRet(pLock); - if (InterlockedCompareExchangeT(&g_pLock, pLock, nullptr) != nullptr) - { - ClrDeleteCriticalSection(pLock); - } - - return S_OK; - } - - //================================================================================================================= - HRESULT Module::GetValue(HMODULE *pHMODULE) - { - STATIC_CONTRACT_LIMITED_METHOD; - HRESULT hr = S_OK; - - if (pHMODULE == nullptr) - { - return E_INVALIDARG; - } - - if (!m_fInitialized) - { - IfFailRet(InitializeLock()); - - HModuleHolder hMod = ::LoadLibraryW(m_wzDllName); - hr = (hMod == nullptr) ? HRESULT_FROM_GetLastError() : S_OK; - _ASSERTE(FAILED(hr) == (hMod == nullptr)); - - { // Lock scope - CRITSEC_Holder lock(g_pLock); - if (!m_fInitialized) - { - m_hr = hr; - m_hMod = hMod.Extract(); - m_fInitialized = true; - } - } - } - - _ASSERTE(m_fInitialized); - *pHMODULE = m_hMod; - return m_hr; - } - - //================================================================================================================= - HRESULT Function::GetValue(LPVOID * ppvFunc) - { - STATIC_CONTRACT_LIMITED_METHOD; - HRESULT hr = S_OK; - - if (ppvFunc == nullptr) - { - return E_INVALIDARG; - } - - if (!m_fInitialized) - { - HMODULE hMod = nullptr; - IfFailRet(m_pModule->GetValue(&hMod)); - - LPVOID pvFunc = reinterpret_cast(::GetProcAddress(hMod, m_szFunctionName)); - hr = (pvFunc == nullptr) ? HRESULT_FROM_GetLastError() : S_OK; - - { // Lock scope - CRITSEC_Holder lock(g_pLock); - if (!m_fInitialized) - { - m_hr = hr; - m_pvFunction = pvFunc; - m_fInitialized = true; - } - } - } - - _ASSERTE(m_fInitialized); - *ppvFunc = m_pvFunction; - return m_hr; - } -} diff --git a/src/coreclr/src/utilcode/hostimpl.cpp b/src/coreclr/src/utilcode/hostimpl.cpp index 31d38df70636..c1e4b53b065a 100644 --- a/src/coreclr/src/utilcode/hostimpl.cpp +++ b/src/coreclr/src/utilcode/hostimpl.cpp @@ -76,7 +76,7 @@ void GetLastThrownObjectExceptionFromThread(Exception** ppException) } #ifdef HOST_WINDOWS -void CreateCrashDumpIfEnabled() +void CreateCrashDumpIfEnabled(bool stackoverflow) { } #endif diff --git a/src/coreclr/src/utilcode/pedecoder.cpp b/src/coreclr/src/utilcode/pedecoder.cpp index d0854bbc043f..91fde64d297c 100644 --- a/src/coreclr/src/utilcode/pedecoder.cpp +++ b/src/coreclr/src/utilcode/pedecoder.cpp @@ -1770,20 +1770,29 @@ void PEDecoder::LayoutILOnly(void *base, BOOL allowFullPE) const PAGE_READONLY, &oldProtection)) ThrowLastError(); - // Finally, apply proper protection to copied sections - section = sectionStart; - while (section < sectionEnd) + // Finally, apply proper protection to copied sections + for (section = sectionStart; section < sectionEnd; section++) { // Add appropriate page protection. - if ((section->Characteristics & VAL32(IMAGE_SCN_MEM_WRITE)) == 0) +#if defined(CROSSGEN_COMPILE) || defined(TARGET_UNIX) + if (section->Characteristics & IMAGE_SCN_MEM_WRITE) + continue; + + DWORD newProtection = PAGE_READONLY; +#else + DWORD newProtection = section->Characteristics & IMAGE_SCN_MEM_EXECUTE ? + PAGE_EXECUTE_READ : + section->Characteristics & IMAGE_SCN_MEM_WRITE ? + PAGE_READWRITE : + PAGE_READONLY; +#endif + + if (!ClrVirtualProtect((void*)((BYTE*)base + VAL32(section->VirtualAddress)), + VAL32(section->Misc.VirtualSize), + newProtection, &oldProtection)) { - if (!ClrVirtualProtect((void *) ((BYTE *)base + VAL32(section->VirtualAddress)), - VAL32(section->Misc.VirtualSize), - PAGE_READONLY, &oldProtection)) - ThrowLastError(); + ThrowLastError(); } - - section++; } RETURN; diff --git a/src/coreclr/src/utilcode/sigbuilder.cpp b/src/coreclr/src/utilcode/sigbuilder.cpp index 111164149e7e..4a8f8fe6c183 100644 --- a/src/coreclr/src/utilcode/sigbuilder.cpp +++ b/src/coreclr/src/utilcode/sigbuilder.cpp @@ -6,6 +6,8 @@ #include "sigbuilder.h" #include "ex.h" +const mdToken g_tkCorEncodeToken[4] ={mdtTypeDef, mdtTypeRef, mdtTypeSpec, mdtBaseType}; + void SigBuilder::AppendByte(BYTE b) { STANDARD_VM_CONTRACT; diff --git a/src/coreclr/src/utilcode/util.cpp b/src/coreclr/src/utilcode/util.cpp index d697946beb8f..ef35f09b1772 100644 --- a/src/coreclr/src/utilcode/util.cpp +++ b/src/coreclr/src/utilcode/util.cpp @@ -850,6 +850,7 @@ BYTE * ClrVirtualAllocWithinRange(const BYTE *pMinAddr, /*static*/ BOOL CPUGroupInfo::m_enableGCCPUGroups = FALSE; /*static*/ BOOL CPUGroupInfo::m_threadUseAllCpuGroups = FALSE; +/*static*/ BOOL CPUGroupInfo::m_threadAssignCpuGroups = FALSE; /*static*/ WORD CPUGroupInfo::m_nGroups = 0; /*static*/ WORD CPUGroupInfo::m_nProcessors = 0; /*static*/ WORD CPUGroupInfo::m_initialGroup = 0; @@ -991,6 +992,7 @@ DWORD LCM(DWORD u, DWORD v) #if !defined(FEATURE_REDHAWK) && (defined(TARGET_AMD64) || defined(TARGET_ARM64)) BOOL enableGCCPUGroups = CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_GCCpuGroup) != 0; BOOL threadUseAllCpuGroups = CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_Thread_UseAllCpuGroups) != 0; + BOOL threadAssignCpuGroups = CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_Thread_AssignCpuGroups) != 0; if (!enableGCCPUGroups) return; @@ -1006,10 +1008,11 @@ DWORD LCM(DWORD u, DWORD v) CPUGroupInfo::GetThreadGroupAffinity(GetCurrentThread(), &groupAffinity); m_initialGroup = groupAffinity.Group; - // only enable CPU groups if more than one group exists - BOOL hasMultipleGroups = m_nGroups > 1; - m_enableGCCPUGroups = enableGCCPUGroups && hasMultipleGroups; - m_threadUseAllCpuGroups = threadUseAllCpuGroups && hasMultipleGroups; + // only enable CPU groups if more than one group exists + BOOL hasMultipleGroups = m_nGroups > 1; + m_enableGCCPUGroups = enableGCCPUGroups && hasMultipleGroups; + m_threadUseAllCpuGroups = threadUseAllCpuGroups && hasMultipleGroups; + m_threadAssignCpuGroups = threadAssignCpuGroups && hasMultipleGroups; #endif // TARGET_AMD64 || TARGET_ARM64 // Determine if the process is affinitized to a single processor (or if the system has a single processor) @@ -1164,8 +1167,8 @@ DWORD LCM(DWORD u, DWORD v) WORD i, minGroup = 0; DWORD minWeight = 0; - // m_enableGCCPUGroups and m_threadUseAllCpuGroups must be TRUE - _ASSERTE(m_enableGCCPUGroups && m_threadUseAllCpuGroups); + // m_enableGCCPUGroups, m_threadUseAllCpuGroups, and m_threadAssignCpuGroups must be TRUE + _ASSERTE(m_enableGCCPUGroups && m_threadUseAllCpuGroups && m_threadAssignCpuGroups); for (i = 0; i < m_nGroups; i++) { @@ -1204,8 +1207,8 @@ DWORD LCM(DWORD u, DWORD v) { LIMITED_METHOD_CONTRACT; #if (defined(TARGET_AMD64) || defined(TARGET_ARM64)) - // m_enableGCCPUGroups and m_threadUseAllCpuGroups must be TRUE - _ASSERTE(m_enableGCCPUGroups && m_threadUseAllCpuGroups); + // m_enableGCCPUGroups, m_threadUseAllCpuGroups, and m_threadAssignCpuGroups must be TRUE + _ASSERTE(m_enableGCCPUGroups && m_threadUseAllCpuGroups && m_threadAssignCpuGroups); WORD group = gf->Group; m_CPUGroupInfoArray[group].activeThreadWeight -= m_CPUGroupInfoArray[group].groupWeight; @@ -1238,6 +1241,12 @@ BOOL CPUGroupInfo::GetCPUGroupRange(WORD group_number, WORD* group_begin, WORD* LIMITED_METHOD_CONTRACT; return m_threadUseAllCpuGroups; } + +/*static*/ BOOL CPUGroupInfo::CanAssignCpuGroupsToThreads() +{ + LIMITED_METHOD_CONTRACT; + return m_threadAssignCpuGroups; +} #endif // HOST_WINDOWS //****************************************************************************** diff --git a/src/coreclr/src/vm/CMakeLists.txt b/src/coreclr/src/vm/CMakeLists.txt index fb36f5ca9b6e..0cb75e5629f3 100644 --- a/src/coreclr/src/vm/CMakeLists.txt +++ b/src/coreclr/src/vm/CMakeLists.txt @@ -316,6 +316,7 @@ set(VM_SOURCES_WKS comthreadpool.cpp comutilnative.cpp comwaithandle.cpp + corelib.cpp # true customattribute.cpp custommarshalerinfo.cpp autotrace.cpp @@ -356,6 +357,7 @@ set(VM_SOURCES_WKS gcenv.ee.common.cpp gcenv.os.cpp gchelpers.cpp + genanalysis.cpp genmeth.cpp hosting.cpp ibclogger.cpp @@ -370,7 +372,6 @@ set(VM_SOURCES_WKS marshalnative.cpp methodtablebuilder.cpp mlinfo.cpp - mscorlib.cpp # true multicorejit.cpp # Condition="'$(FeatureMulticoreJIT)' == 'true' multicorejitplayer.cpp # Condition="'$(FeatureMulticoreJIT)' == 'true' nativeeventsource.cpp @@ -491,7 +492,7 @@ set(VM_HEADERS_WKS marshalnative.h methodtablebuilder.h mlinfo.h - mscorlib.h + corelib.h multicorejit.h multicorejitimpl.h nativeeventsource.h @@ -557,6 +558,7 @@ if (CLR_CMAKE_TARGET_ARCH_AMD64 AND CLR_CMAKE_TARGET_WIN32) ../gc/vxsort/smallsort/bitonic_sort.AVX2.int32_t.generated.cpp ../gc/vxsort/smallsort/bitonic_sort.AVX512.int64_t.generated.cpp ../gc/vxsort/smallsort/bitonic_sort.AVX512.int32_t.generated.cpp + ../gc/vxsort/smallsort/avx2_load_mask_tables.cpp ) endif (CLR_CMAKE_TARGET_ARCH_AMD64 AND CLR_CMAKE_TARGET_WIN32) @@ -923,7 +925,7 @@ list(APPEND VM_HEADERS_DAC if (CLR_CMAKE_TARGET_WIN32) list(APPEND VM_SOURCES_WKS ${VM_HEADERS_WKS}) - list(APPEND VM_SOURCES_WKS_ARCH_ASM ${VM_HEADERS_WKS_ARCH_ASM}) + list(APPEND VM_SOURCES_WKS ${VM_HEADERS_WKS_ARCH_ASM}) list(APPEND VM_SOURCES_DAC ${VM_HEADERS_DAC}) endif(CLR_CMAKE_TARGET_WIN32) diff --git a/src/coreclr/src/vm/ClrEtwAll.man b/src/coreclr/src/vm/ClrEtwAll.man index cf2108b4e56a..f7fdca03d6b8 100644 --- a/src/coreclr/src/vm/ClrEtwAll.man +++ b/src/coreclr/src/vm/ClrEtwAll.man @@ -133,6 +133,8 @@ + + @@ -992,6 +994,17 @@ + +