Skip to content

Latest commit

 

History

History
173 lines (133 loc) · 4.34 KB

File metadata and controls

173 lines (133 loc) · 4.34 KB
page_title openai_vector_store_file Resource - terraform-provider-openai
subcategory
description Manages a file in an OpenAI Vector Store.

openai_vector_store_file (Resource)

Manages a file in an OpenAI Vector Store.

Example Usage

# Example: Adding files to vector stores for use with Assistants
# Vector store files enable semantic search over document contents

# First, create a vector store
resource "openai_vector_store" "knowledge_base" {
  name = "Company Knowledge Base"
  metadata = {
    department = "engineering"
    version    = "1.0"
  }
}

# Upload files that will be added to the vector store
resource "openai_file" "technical_docs" {
  file    = "technical_documentation.pdf"
  purpose = "assistants"
}

resource "openai_file" "api_reference" {
  file    = "api_reference.md"
  purpose = "assistants"
}

resource "openai_file" "faq_document" {
  file    = "frequently_asked_questions.txt"
  purpose = "assistants"
}

# Add a file to the vector store
resource "openai_vector_store_file" "add_tech_docs" {
  # The vector store to add the file to
  vector_store_id = openai_vector_store.knowledge_base.id

  # The file to add
  file_id = openai_file.technical_docs.id

  # Optional: Chunking strategy
  chunking_strategy {
    type                  = "static"
    max_chunk_size_tokens = 800
  }
}

# Add API reference with auto chunking
resource "openai_vector_store_file" "add_api_ref" {
  vector_store_id = openai_vector_store.knowledge_base.id
  file_id         = openai_file.api_reference.id

  # Use automatic chunking (default)
  chunking_strategy {
    type = "auto"
  }
}

# Add FAQ document with custom chunking
resource "openai_vector_store_file" "add_faq" {
  vector_store_id = openai_vector_store.knowledge_base.id
  file_id         = openai_file.faq_document.id

  chunking_strategy {
    type                  = "static"
    max_chunk_size_tokens = 200 # Smaller chunks for Q&A format
  }
}

# Example: Multiple vector stores for different purposes
resource "openai_vector_store" "customer_support" {
  name = "Customer Support Database"
}

resource "openai_file" "support_tickets" {
  file    = "resolved_tickets_2024.jsonl"
  purpose = "assistants"
}

resource "openai_vector_store_file" "support_knowledge" {
  vector_store_id = openai_vector_store.customer_support.id
  file_id         = openai_file.support_tickets.id

  # Larger chunks for conversation context
  chunking_strategy {
    type                  = "static"
    max_chunk_size_tokens = 1200
  }
}

# Example: Code repository vector store
resource "openai_vector_store" "code_search" {
  name = "Codebase Search"
  metadata = {
    language = "python"
    project  = "backend-api"
  }
}

resource "openai_file" "source_code" {
  file    = "backend_source.zip"
  purpose = "assistants"
}

resource "openai_vector_store_file" "code_index" {
  vector_store_id = openai_vector_store.code_search.id
  file_id         = openai_file.source_code.id

  # Specific chunking for code
  chunking_strategy {
    type                  = "static"
    max_chunk_size_tokens = 500
  }
}

# Output file status
output "tech_docs_status" {
  value = openai_vector_store_file.add_tech_docs.status
}

Schema

Required

  • file_id (String) The ID of the file to add.
  • vector_store_id (String) The ID of the vector store to add the file to.

Optional

  • chunking_strategy (Block, Optional) The chunking strategy used to chunk the file(s). (see below for nested schema)

Read-Only

  • created_at (Number)
  • id (String) The identifier of the vector store file.
  • last_error (Attributes) (see below for nested schema)
  • object (String)
  • status (String)
  • usage_bytes (Number)

Nested Schema for chunking_strategy

Required:

  • type (String)

Optional:

  • chunk_overlap_tokens (Number) The number of tokens that overlap between chunks. The default is 400. The maximum is half of max_chunk_size_tokens.
  • max_chunk_size_tokens (Number) The maximum number of tokens in each chunk. The default is 800. The minimum is 100 and the maximum is 4096.

Nested Schema for last_error

Read-Only:

  • code (String)
  • message (String)