Skip to content
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
61 changes: 61 additions & 0 deletions Sources/AnyLanguageModel/Models/LlamaLanguageModel.swift
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,23 @@ import Foundation
#if Llama
import Llama

/// Global storage for the current log level threshold.
/// This is needed because the C callback can't capture Swift context.
/// Access is synchronized by llama.cpp's internal logging mechanism.
nonisolated(unsafe) private var currentLogLevel: LlamaLanguageModel.LogLevel = .warn

/// Custom log callback that filters messages based on the current log level.
private func llamaLogCallback(
level: ggml_log_level,
text: UnsafePointer<CChar>?,
userData: UnsafeMutableRawPointer?
) {
guard level.rawValue >= currentLogLevel.ggmlLevel.rawValue else { return }
if let text = text {
fputs(String(cString: text), stderr)
}
}

/// A language model that runs llama.cpp models locally.
///
/// Use this model to generate text using GGUF models running directly with llama.cpp.
Expand All @@ -17,6 +34,35 @@ import Foundation
/// This model is always available.
public typealias UnavailableReason = Never

/// The verbosity level for llama.cpp logging.
public enum LogLevel: Int, Hashable, Comparable, Sendable, CaseIterable {
/// No logging output.
case none = 0
/// Debug messages and above (most verbose).
case debug = 1
/// Info messages and above.
case info = 2
/// Warning messages and above (default).
case warn = 3
/// Only error messages.
case error = 4

/// Maps to the corresponding ggml log level.
var ggmlLevel: ggml_log_level {
switch self {
case .none: return GGML_LOG_LEVEL_NONE
case .debug: return GGML_LOG_LEVEL_DEBUG
case .info: return GGML_LOG_LEVEL_INFO
case .warn: return GGML_LOG_LEVEL_WARN
case .error: return GGML_LOG_LEVEL_ERROR
}
}

public static func < (lhs: LogLevel, rhs: LogLevel) -> Bool {
lhs.rawValue < rhs.rawValue
}
}

/// Custom generation options specific to llama.cpp.
///
/// Use this type to pass llama.cpp-specific sampling parameters that are
Expand Down Expand Up @@ -115,6 +161,17 @@ import Foundation
/// The number of tokens to consider for repeat penalty.
public let repeatLastN: Int32

/// The minimum log level for llama.cpp output.
///
/// This is a global setting that affects all `LlamaLanguageModel` instances
/// since llama.cpp uses a single global log callback.
public nonisolated(unsafe) static var logLevel: LogLevel = .warn {
didSet {
currentLogLevel = logLevel
llama_log_set(llamaLogCallback, nil)
}
}

/// The loaded model instance
private var model: OpaquePointer?

Expand Down Expand Up @@ -302,6 +359,10 @@ import Foundation
throw LlamaLanguageModelError.invalidModelPath
}

// Configure logging level
currentLogLevel = Self.logLevel
llama_log_set(llamaLogCallback, nil)

// Initialize backend lazily - must be done before loading model
llama_backend_init()

Expand Down