Skip to content
Open
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
<Type Name="DocumentTokenChunker" FullName="Microsoft.Extensions.DataIngestion.Chunkers.DocumentTokenChunker">
<TypeSignature Language="C#" Value="public sealed class DocumentTokenChunker : Microsoft.Extensions.DataIngestion.IngestionChunker&lt;string&gt;" />
<TypeSignature Language="ILAsm" Value=".class public auto ansi sealed beforefieldinit DocumentTokenChunker extends Microsoft.Extensions.DataIngestion.IngestionChunker`1&lt;string&gt;" />
<TypeSignature Language="DocId" Value="T:Microsoft.Extensions.DataIngestion.Chunkers.DocumentTokenChunker" />
<TypeSignature Language="VB.NET" Value="Public NotInheritable Class DocumentTokenChunker&#xA;Inherits IngestionChunker(Of String)" />
<TypeSignature Language="F#" Value="type DocumentTokenChunker = class&#xA; inherit IngestionChunker&lt;string&gt;" />
<TypeSignature Language="C++ CLI" Value="public ref class DocumentTokenChunker sealed : Microsoft::Extensions::DataIngestion::IngestionChunker&lt;System::String ^&gt;" />
<AssemblyInfo>
<AssemblyName>Microsoft.Extensions.DataIngestion</AssemblyName>
<AssemblyVersion>10.2.0.0</AssemblyVersion>
</AssemblyInfo>
<Base>
<BaseTypeName>Microsoft.Extensions.DataIngestion.IngestionChunker&lt;System.String&gt;</BaseTypeName>
<BaseTypeArguments>
<BaseTypeArgument TypeParamName="!0">System.String</BaseTypeArgument>
</BaseTypeArguments>
</Base>
<Interfaces />
<Attributes>
<Attribute FrameworkAlternate="net-10.0-pp;net-8.0-pp;net-9.0-pp">
<AttributeName Language="C#">[System.Runtime.CompilerServices.Nullable(new System.Byte[] { 0, 1 })]</AttributeName>
<AttributeName Language="F#">[&lt;System.Runtime.CompilerServices.Nullable(new System.Byte[] { 0, 1 })&gt;]</AttributeName>
</Attribute>
</Attributes>
<Docs>
<summary>
Processes a document by tokenizing its content and dividing it into overlapping chunks of tokens.
</summary>
<remarks>
<para>This class uses a tokenizer to convert the document's content into tokens and then splits the
tokens into chunks of a specified size, with a configurable overlap between consecutive chunks.</para>
<para>Note that tables may be split mid-row.</para>
</remarks>
</Docs>
<Members>
<Member MemberName=".ctor">
<MemberSignature Language="C#" Value="public DocumentTokenChunker (Microsoft.Extensions.DataIngestion.IngestionChunkerOptions options);" />
<MemberSignature Language="ILAsm" Value=".method public hidebysig specialname rtspecialname instance void .ctor(class Microsoft.Extensions.DataIngestion.IngestionChunkerOptions options) cil managed" />
<MemberSignature Language="DocId" Value="M:Microsoft.Extensions.DataIngestion.Chunkers.DocumentTokenChunker.#ctor(Microsoft.Extensions.DataIngestion.IngestionChunkerOptions)" />
<MemberSignature Language="VB.NET" Value="Public Sub New (options As IngestionChunkerOptions)" />
<MemberSignature Language="F#" Value="new Microsoft.Extensions.DataIngestion.Chunkers.DocumentTokenChunker : Microsoft.Extensions.DataIngestion.IngestionChunkerOptions -&gt; Microsoft.Extensions.DataIngestion.Chunkers.DocumentTokenChunker" Usage="new Microsoft.Extensions.DataIngestion.Chunkers.DocumentTokenChunker options" />
<MemberSignature Language="C++ CLI" Value="public:&#xA; DocumentTokenChunker(Microsoft::Extensions::DataIngestion::IngestionChunkerOptions ^ options);" />
<MemberType>Constructor</MemberType>
<AssemblyInfo>
<AssemblyName>Microsoft.Extensions.DataIngestion</AssemblyName>
<AssemblyVersion>10.2.0.0</AssemblyVersion>
</AssemblyInfo>
<Parameters>
<Parameter Name="options" Type="Microsoft.Extensions.DataIngestion.IngestionChunkerOptions" />
</Parameters>
<Docs>
<param name="options">The options used to configure the chunker, including tokenizer and chunk sizes.</param>
<summary>
Initializes a new instance of the <see cref="T:Microsoft.Extensions.DataIngestion.Chunkers.DocumentTokenChunker" /> class with the specified options.
</summary>
<remarks>To be added.</remarks>
</Docs>
</Member>
<Member MemberName="ProcessAsync">
<MemberSignature Language="C#" Value="public override System.Collections.Generic.IAsyncEnumerable&lt;Microsoft.Extensions.DataIngestion.IngestionChunk&lt;string&gt;&gt; ProcessAsync (Microsoft.Extensions.DataIngestion.IngestionDocument document, System.Threading.CancellationToken cancellationToken = default);" />
<MemberSignature Language="ILAsm" Value=".method public hidebysig virtual instance class System.Collections.Generic.IAsyncEnumerable`1&lt;class Microsoft.Extensions.DataIngestion.IngestionChunk`1&lt;string&gt;&gt; ProcessAsync(class Microsoft.Extensions.DataIngestion.IngestionDocument document, valuetype System.Threading.CancellationToken cancellationToken) cil managed" />
<MemberSignature Language="DocId" Value="M:Microsoft.Extensions.DataIngestion.Chunkers.DocumentTokenChunker.ProcessAsync(Microsoft.Extensions.DataIngestion.IngestionDocument,System.Threading.CancellationToken)" />
<MemberSignature Language="VB.NET" Value="Public Overrides Function ProcessAsync (document As IngestionDocument, Optional cancellationToken As CancellationToken = Nothing) As IAsyncEnumerable(Of IngestionChunk(Of String))" />
<MemberSignature Language="F#" Value="override this.ProcessAsync : Microsoft.Extensions.DataIngestion.IngestionDocument * System.Threading.CancellationToken -&gt; System.Collections.Generic.IAsyncEnumerable&lt;Microsoft.Extensions.DataIngestion.IngestionChunk&lt;string&gt;&gt;" Usage="documentTokenChunker.ProcessAsync (document, cancellationToken)" />
<MemberType>Method</MemberType>
<AssemblyInfo>
<AssemblyName>Microsoft.Extensions.DataIngestion</AssemblyName>
<AssemblyVersion>10.2.0.0</AssemblyVersion>
</AssemblyInfo>
<Attributes>
<Attribute>
<AttributeName Language="C#">[System.Runtime.CompilerServices.AsyncIteratorStateMachine(typeof(Microsoft.Extensions.DataIngestion.Chunkers.DocumentTokenChunker+&lt;ProcessAsync&gt;d__4))]</AttributeName>
<AttributeName Language="F#">[&lt;System.Runtime.CompilerServices.AsyncIteratorStateMachine(typeof(Microsoft.Extensions.DataIngestion.Chunkers.DocumentTokenChunker+&lt;ProcessAsync&gt;d__4))&gt;]</AttributeName>
</Attribute>
</Attributes>
<ReturnValue>
<ReturnType>System.Collections.Generic.IAsyncEnumerable&lt;Microsoft.Extensions.DataIngestion.IngestionChunk&lt;System.String&gt;&gt;</ReturnType>
</ReturnValue>
<Parameters>
<Parameter Name="document" Type="Microsoft.Extensions.DataIngestion.IngestionDocument" />
<Parameter Name="cancellationToken" Type="System.Threading.CancellationToken">
<Attributes>
<Attribute>
<AttributeName Language="C#">[System.Runtime.CompilerServices.EnumeratorCancellation]</AttributeName>
<AttributeName Language="F#">[&lt;System.Runtime.CompilerServices.EnumeratorCancellation&gt;]</AttributeName>
</Attribute>
</Attributes>
</Parameter>
</Parameters>
<Docs>
<param name="document">To be added.</param>
<param name="cancellationToken">To be added.</param>
<summary>To be added.</summary>
<returns>To be added.</returns>
<remarks>To be added.</remarks>
<inheritdoc />
</Docs>
</Member>
</Members>
</Type>
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
<AssemblyInfo>
<AssemblyName>Microsoft.Extensions.DataIngestion</AssemblyName>
<AssemblyVersion>10.1.0.0</AssemblyVersion>
<AssemblyVersion>10.2.0.0</AssemblyVersion>
</AssemblyInfo>
<Base>
<BaseTypeName>Microsoft.Extensions.DataIngestion.IngestionChunker&lt;System.String&gt;</BaseTypeName>
Expand Down Expand Up @@ -39,7 +40,7 @@
<MemberType>Constructor</MemberType>
<AssemblyInfo>
<AssemblyName>Microsoft.Extensions.DataIngestion</AssemblyName>
<AssemblyVersion>10.1.0.0</AssemblyVersion>
<AssemblyVersion>10.2.0.0</AssemblyVersion>
</AssemblyInfo>
<Parameters>
<Parameter Name="options" Type="Microsoft.Extensions.DataIngestion.IngestionChunkerOptions" />
Expand All @@ -61,7 +62,7 @@
<MemberType>Method</MemberType>
<AssemblyInfo>
<AssemblyName>Microsoft.Extensions.DataIngestion</AssemblyName>
<AssemblyVersion>10.1.0.0</AssemblyVersion>
<AssemblyVersion>10.2.0.0</AssemblyVersion>
</AssemblyInfo>
<Attributes>
<Attribute>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
<AssemblyName>Microsoft.Extensions.DataIngestion</AssemblyName>
<AssemblyVersion>10.0.0.0</AssemblyVersion>
<AssemblyVersion>10.1.0.0</AssemblyVersion>
<AssemblyVersion>10.2.0.0</AssemblyVersion>
</AssemblyInfo>
<Base>
<BaseTypeName>Microsoft.Extensions.DataIngestion.IngestionChunker&lt;System.String&gt;</BaseTypeName>
Expand Down Expand Up @@ -39,7 +40,7 @@
<MemberType>Constructor</MemberType>
<AssemblyInfo>
<AssemblyName>Microsoft.Extensions.DataIngestion</AssemblyName>
<AssemblyVersion>10.1.0.0</AssemblyVersion>
<AssemblyVersion>10.2.0.0</AssemblyVersion>
</AssemblyInfo>
<Parameters>
<Parameter Name="embeddingGenerator" Type="Microsoft.Extensions.AI.IEmbeddingGenerator&lt;System.String,Microsoft.Extensions.AI.Embedding&lt;System.Single&gt;&gt;" />
Expand All @@ -65,7 +66,7 @@
<MemberType>Method</MemberType>
<AssemblyInfo>
<AssemblyName>Microsoft.Extensions.DataIngestion</AssemblyName>
<AssemblyVersion>10.1.0.0</AssemblyVersion>
<AssemblyVersion>10.2.0.0</AssemblyVersion>
</AssemblyInfo>
<Attributes>
<Attribute>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
<AssemblyName>Microsoft.Extensions.DataIngestion</AssemblyName>
<AssemblyVersion>10.0.0.0</AssemblyVersion>
<AssemblyVersion>10.1.0.0</AssemblyVersion>
<AssemblyVersion>10.2.0.0</AssemblyVersion>
</AssemblyInfo>
<Base>
<BaseTypeName>Microsoft.Extensions.DataIngestion.IngestionChunkProcessor&lt;System.String&gt;</BaseTypeName>
Expand Down Expand Up @@ -41,7 +42,7 @@
<MemberType>Constructor</MemberType>
<AssemblyInfo>
<AssemblyName>Microsoft.Extensions.DataIngestion</AssemblyName>
<AssemblyVersion>10.1.0.0</AssemblyVersion>
<AssemblyVersion>10.2.0.0</AssemblyVersion>
</AssemblyInfo>
<Parameters>
<Parameter Name="options" Type="Microsoft.Extensions.DataIngestion.EnricherOptions" />
Expand Down Expand Up @@ -82,7 +83,7 @@
<MemberType>Property</MemberType>
<AssemblyInfo>
<AssemblyName>Microsoft.Extensions.DataIngestion</AssemblyName>
<AssemblyVersion>10.1.0.0</AssemblyVersion>
<AssemblyVersion>10.2.0.0</AssemblyVersion>
</AssemblyInfo>
<ReturnValue>
<ReturnType>System.String</ReturnType>
Expand All @@ -104,7 +105,7 @@
<MemberType>Method</MemberType>
<AssemblyInfo>
<AssemblyName>Microsoft.Extensions.DataIngestion</AssemblyName>
<AssemblyVersion>10.1.0.0</AssemblyVersion>
<AssemblyVersion>10.2.0.0</AssemblyVersion>
</AssemblyInfo>
<ReturnValue>
<ReturnType>System.Collections.Generic.IAsyncEnumerable&lt;Microsoft.Extensions.DataIngestion.IngestionChunk&lt;System.String&gt;&gt;</ReturnType>
Expand Down
11 changes: 6 additions & 5 deletions xml/Microsoft.Extensions.DataIngestion/EnricherOptions.xml
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
<AssemblyName>Microsoft.Extensions.DataIngestion</AssemblyName>
<AssemblyVersion>10.0.0.0</AssemblyVersion>
<AssemblyVersion>10.1.0.0</AssemblyVersion>
<AssemblyVersion>10.2.0.0</AssemblyVersion>
</AssemblyInfo>
<Base>
<BaseTypeName>System.Object</BaseTypeName>
Expand Down Expand Up @@ -37,7 +38,7 @@
<MemberType>Constructor</MemberType>
<AssemblyInfo>
<AssemblyName>Microsoft.Extensions.DataIngestion</AssemblyName>
<AssemblyVersion>10.1.0.0</AssemblyVersion>
<AssemblyVersion>10.2.0.0</AssemblyVersion>
</AssemblyInfo>
<Parameters>
<Parameter Name="chatClient" Type="Microsoft.Extensions.AI.IChatClient" />
Expand All @@ -60,7 +61,7 @@
<MemberType>Property</MemberType>
<AssemblyInfo>
<AssemblyName>Microsoft.Extensions.DataIngestion</AssemblyName>
<AssemblyVersion>10.1.0.0</AssemblyVersion>
<AssemblyVersion>10.2.0.0</AssemblyVersion>
</AssemblyInfo>
<ReturnValue>
<ReturnType>System.Int32</ReturnType>
Expand All @@ -83,7 +84,7 @@
<MemberType>Property</MemberType>
<AssemblyInfo>
<AssemblyName>Microsoft.Extensions.DataIngestion</AssemblyName>
<AssemblyVersion>10.1.0.0</AssemblyVersion>
<AssemblyVersion>10.2.0.0</AssemblyVersion>
</AssemblyInfo>
<Attributes>
<Attribute FrameworkAlternate="net-10.0-pp;net-8.0-pp;net-9.0-pp">
Expand Down Expand Up @@ -112,7 +113,7 @@
<MemberType>Property</MemberType>
<AssemblyInfo>
<AssemblyName>Microsoft.Extensions.DataIngestion</AssemblyName>
<AssemblyVersion>10.1.0.0</AssemblyVersion>
<AssemblyVersion>10.2.0.0</AssemblyVersion>
</AssemblyInfo>
<ReturnValue>
<ReturnType>Microsoft.Extensions.AI.ChatOptions</ReturnType>
Expand All @@ -135,7 +136,7 @@
<MemberType>Property</MemberType>
<AssemblyInfo>
<AssemblyName>Microsoft.Extensions.DataIngestion</AssemblyName>
<AssemblyVersion>10.1.0.0</AssemblyVersion>
<AssemblyVersion>10.2.0.0</AssemblyVersion>
</AssemblyInfo>
<ReturnValue>
<ReturnType>Microsoft.Extensions.Logging.ILoggerFactory</ReturnType>
Expand Down
5 changes: 3 additions & 2 deletions xml/Microsoft.Extensions.DataIngestion/HeaderChunker.xml
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
<AssemblyName>Microsoft.Extensions.DataIngestion</AssemblyName>
<AssemblyVersion>10.0.0.0</AssemblyVersion>
<AssemblyVersion>10.1.0.0</AssemblyVersion>
<AssemblyVersion>10.2.0.0</AssemblyVersion>
</AssemblyInfo>
<Base>
<BaseTypeName>Microsoft.Extensions.DataIngestion.IngestionChunker&lt;System.String&gt;</BaseTypeName>
Expand Down Expand Up @@ -40,7 +41,7 @@
<MemberType>Constructor</MemberType>
<AssemblyInfo>
<AssemblyName>Microsoft.Extensions.DataIngestion</AssemblyName>
<AssemblyVersion>10.1.0.0</AssemblyVersion>
<AssemblyVersion>10.2.0.0</AssemblyVersion>
</AssemblyInfo>
<Parameters>
<Parameter Name="options" Type="Microsoft.Extensions.DataIngestion.IngestionChunkerOptions" />
Expand All @@ -62,7 +63,7 @@
<MemberType>Method</MemberType>
<AssemblyInfo>
<AssemblyName>Microsoft.Extensions.DataIngestion</AssemblyName>
<AssemblyVersion>10.1.0.0</AssemblyVersion>
<AssemblyVersion>10.2.0.0</AssemblyVersion>
</AssemblyInfo>
<Attributes>
<Attribute>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
<AssemblyName>Microsoft.Extensions.DataIngestion</AssemblyName>
<AssemblyVersion>10.0.0.0</AssemblyVersion>
<AssemblyVersion>10.1.0.0</AssemblyVersion>
<AssemblyVersion>10.2.0.0</AssemblyVersion>
</AssemblyInfo>
<Base>
<BaseTypeName>Microsoft.Extensions.DataIngestion.IngestionDocumentProcessor</BaseTypeName>
Expand Down Expand Up @@ -38,7 +39,7 @@
<MemberType>Constructor</MemberType>
<AssemblyInfo>
<AssemblyName>Microsoft.Extensions.DataIngestion</AssemblyName>
<AssemblyVersion>10.1.0.0</AssemblyVersion>
<AssemblyVersion>10.2.0.0</AssemblyVersion>
</AssemblyInfo>
<Parameters>
<Parameter Name="options" Type="Microsoft.Extensions.DataIngestion.EnricherOptions" />
Expand All @@ -60,7 +61,7 @@
<MemberType>Method</MemberType>
<AssemblyInfo>
<AssemblyName>Microsoft.Extensions.DataIngestion</AssemblyName>
<AssemblyVersion>10.1.0.0</AssemblyVersion>
<AssemblyVersion>10.2.0.0</AssemblyVersion>
</AssemblyInfo>
<ReturnValue>
<ReturnType>System.Threading.Tasks.Task&lt;Microsoft.Extensions.DataIngestion.IngestionDocument&gt;</ReturnType>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
<AssemblyName>Microsoft.Extensions.DataIngestion.Abstractions</AssemblyName>
<AssemblyVersion>10.0.0.0</AssemblyVersion>
<AssemblyVersion>10.1.0.0</AssemblyVersion>
<AssemblyVersion>10.2.0.0</AssemblyVersion>
</AssemblyInfo>
<TypeParameters>
<TypeParameter Name="T">
Expand Down Expand Up @@ -41,7 +42,7 @@
<MemberType>Constructor</MemberType>
<AssemblyInfo>
<AssemblyName>Microsoft.Extensions.DataIngestion.Abstractions</AssemblyName>
<AssemblyVersion>10.1.0.0</AssemblyVersion>
<AssemblyVersion>10.2.0.0</AssemblyVersion>
</AssemblyInfo>
<Parameters />
<Docs>
Expand All @@ -58,7 +59,7 @@
<MemberType>Method</MemberType>
<AssemblyInfo>
<AssemblyName>Microsoft.Extensions.DataIngestion.Abstractions</AssemblyName>
<AssemblyVersion>10.1.0.0</AssemblyVersion>
<AssemblyVersion>10.2.0.0</AssemblyVersion>
</AssemblyInfo>
<ReturnValue>
<ReturnType>System.Collections.Generic.IAsyncEnumerable&lt;Microsoft.Extensions.DataIngestion.IngestionChunk&lt;T&gt;&gt;</ReturnType>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
<AssemblyName>Microsoft.Extensions.DataIngestion.Abstractions</AssemblyName>
<AssemblyVersion>10.0.0.0</AssemblyVersion>
<AssemblyVersion>10.1.0.0</AssemblyVersion>
<AssemblyVersion>10.2.0.0</AssemblyVersion>
</AssemblyInfo>
<TypeParameters>
<TypeParameter Name="T">
Expand Down Expand Up @@ -45,7 +46,7 @@
<MemberType>Constructor</MemberType>
<AssemblyInfo>
<AssemblyName>Microsoft.Extensions.DataIngestion.Abstractions</AssemblyName>
<AssemblyVersion>10.1.0.0</AssemblyVersion>
<AssemblyVersion>10.2.0.0</AssemblyVersion>
</AssemblyInfo>
<Parameters />
<Docs>
Expand All @@ -66,7 +67,7 @@
</Implements>
<AssemblyInfo>
<AssemblyName>Microsoft.Extensions.DataIngestion.Abstractions</AssemblyName>
<AssemblyVersion>10.1.0.0</AssemblyVersion>
<AssemblyVersion>10.2.0.0</AssemblyVersion>
</AssemblyInfo>
<ReturnValue>
<ReturnType>System.Void</ReturnType>
Expand All @@ -89,7 +90,7 @@
<MemberType>Method</MemberType>
<AssemblyInfo>
<AssemblyName>Microsoft.Extensions.DataIngestion.Abstractions</AssemblyName>
<AssemblyVersion>10.1.0.0</AssemblyVersion>
<AssemblyVersion>10.2.0.0</AssemblyVersion>
</AssemblyInfo>
<ReturnValue>
<ReturnType>System.Void</ReturnType>
Expand All @@ -114,7 +115,7 @@
<MemberType>Method</MemberType>
<AssemblyInfo>
<AssemblyName>Microsoft.Extensions.DataIngestion.Abstractions</AssemblyName>
<AssemblyVersion>10.1.0.0</AssemblyVersion>
<AssemblyVersion>10.2.0.0</AssemblyVersion>
</AssemblyInfo>
<ReturnValue>
<ReturnType>System.Threading.Tasks.Task</ReturnType>
Expand Down
Loading