ggml-org · ggerganov · Jan 15, 2026 · Dec 11, 2025 · Dec 11, 2025 · Dec 11, 2025
@@ -1501,6 +1501,22 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
             }
         }
     ).set_sparam());
+    add_opt(common_arg(
+        {"--power-law-target"}, "N",
+        string_format("target probability for Power Law sampling (valid range 0.0 to 1.0; <0 = disabled) "
+                      "(%.1f = default)\n[(more info)](https://github.com/ggml-org/llama.cpp/pull/17927)",
+                      (double)params.sampling.power_law_target),
+        [](common_params & params, const std::string & value) {
+            params.sampling.power_law_target = std::stof(value);
+        }
+    ).set_sparam());
+    add_opt(common_arg(
+        {"--power-law-window-size"}, "N",
+        string_format("rolling window size for target adaptation in Power Law sampling (≤0 = fixed target; %d = default)", params.sampling.power_law_window_size),
+        [](common_params & params, int value) {
+            params.sampling.power_law_window_size = value;
+        }
+    ).set_sparam());
     add_opt(common_arg(
         {"--dynatemp-range"}, "N",
         string_format("dynamic temperature range (default: %.1f, 0.0 = disabled)", (double)params.sampling.dynatemp_range),

@@ -116,6 +116,7 @@ enum common_sampler_type {
     COMMON_SAMPLER_TYPE_INFILL      = 9,
     COMMON_SAMPLER_TYPE_PENALTIES   = 10,
     COMMON_SAMPLER_TYPE_TOP_N_SIGMA = 11,
+    COMMON_SAMPLER_TYPE_POWER_LAW   = 12,
 };
 
 // dimensionality reduction methods, used by cvector-generator
@@ -163,39 +164,40 @@ enum common_params_sampling_config : uint64_t {
 struct common_params_sampling {
     uint32_t seed = LLAMA_DEFAULT_SEED; // the seed used to initialize llama_sampler
 
-    int32_t n_prev             = 64;    // number of previous tokens to remember
-    int32_t n_probs            = 0;     // if greater than 0, output the probabilities of top n_probs tokens.
-    int32_t min_keep           = 0;     // 0 = disabled, otherwise samplers should return at least min_keep tokens
-    int32_t top_k              = 40;    // <= 0 to use vocab size
-    float   top_p              = 0.95f; // 1.0 = disabled
-    float   min_p              = 0.05f; // 0.0 = disabled
-    float   xtc_probability    = 0.00f; // 0.0 = disabled
-    float   xtc_threshold      = 0.10f; // > 0.5 disables XTC
-    float   typ_p              = 1.00f; // typical_p, 1.0 = disabled
-    float   temp               = 0.80f; // <= 0.0 to sample greedily, 0.0 to not output probabilities
-    float   dynatemp_range     = 0.00f; // 0.0 = disabled
-    float   dynatemp_exponent  = 1.00f; // controls how entropy maps to temperature in dynamic temperature sampler
-    int32_t penalty_last_n     = 64;    // last n tokens to penalize (0 = disable penalty, -1 = context size)
-    float   penalty_repeat     = 1.00f; // 1.0 = disabled
-    float   penalty_freq       = 0.00f; // 0.0 = disabled
-    float   penalty_present    = 0.00f; // 0.0 = disabled
-    float   dry_multiplier     = 0.0f;  // 0.0 = disabled;      DRY repetition penalty for tokens extending repetition:
-    float   dry_base           = 1.75f; // 0.0 = disabled;      multiplier * base ^ (length of sequence before token - allowed length)
-    int32_t dry_allowed_length = 2;     // tokens extending repetitions beyond this receive penalty
-    int32_t dry_penalty_last_n = -1;    // how many tokens to scan for repetitions (0 = disable penalty, -1 = context size)
-    int32_t mirostat           = 0;     // 0 = disabled, 1 = mirostat, 2 = mirostat 2.0
-    float   top_n_sigma        = -1.00f;// -1.0 = disabled
-    float   mirostat_tau       = 5.00f; // target entropy
-    float   mirostat_eta       = 0.10f; // learning rate
-    bool    ignore_eos         = false;
-    bool    no_perf            = false; // disable performance metrics
-    bool    timing_per_token   = false;
+    int32_t n_prev                = 64;     // number of previous tokens to remember
+    int32_t n_probs               = 0;      // if greater than 0, output the probabilities of top n_probs tokens.
+    int32_t min_keep              = 0;      // 0 = disabled, otherwise samplers should return at least min_keep tokens
+    int32_t top_k                 = 40;     // <= 0 to use vocab size
+    float   top_p                 = 0.95f;  // 1.0 = disabled
+    float   min_p                 = 0.05f;  // 0.0 = disabled
+    float   xtc_probability       = 0.00f;  // 0.0 = disabled
+    float   xtc_threshold         = 0.10f;  // > 0.5 disables XTC
+    float   typ_p                 = 1.00f;  // typical_p, 1.0 = disabled
+    float   temp                  = 0.80f;  // <= 0.0 to sample greedily, 0.0 to not output probabilities
+    float   dynatemp_range        = 0.00f;  // 0.0 = disabled
+    float   dynatemp_exponent     = 1.00f;  // controls how entropy maps to temperature in dynamic temperature sampler
+    int32_t penalty_last_n        = 64;     // last n tokens to penalize (0 = disable penalty, -1 = context size)
+    float   penalty_repeat        = 1.00f;  // 1.0 = disabled
+    float   penalty_freq          = 0.00f;  // 0.0 = disabled
+    float   penalty_present       = 0.00f;  // 0.0 = disabled
+    float   dry_multiplier        = 0.0f;   // 0.0 = disabled;      DRY repetition penalty for tokens extending repetition:
+    float   dry_base              = 1.75f;  // 0.0 = disabled;      multiplier * base ^ (length of sequence before token - allowed length)
+    int32_t dry_allowed_length    = 2;      // tokens extending repetitions beyond this receive penalty
+    int32_t dry_penalty_last_n    = -1;     // how many tokens to scan for repetitions (0 = disable penalty, -1 = context size)
+    float   power_law_target      = -1.0f;  // target probability for Power Law sampling (valid range 0.0 to 1.0; <0 = disabled)
+    int32_t power_law_window_size = 10;     // rolling window size for target adaptation in Power Law sampling (≤0 = fixed target)
+    int32_t mirostat              = 0;      // 0 = disabled, 1 = mirostat, 2 = mirostat 2.0
+    float   top_n_sigma           = -1.00f; // -1.0 = disabled
+    float   mirostat_tau          = 5.00f;  // target entropy
+    float   mirostat_eta          = 0.10f;  // learning rate
+    bool    ignore_eos            = false;
+    bool    no_perf               = false;  // disable performance metrics
+    bool    timing_per_token      = false;
 
     uint64_t user_sampling_config = 0; // bitfield to track user-specified samplers
 
     std::vector<std::string> dry_sequence_breakers = {"\n", ":", "\"", "*"};     // default sequence breakers for DRY
 
-
     std::vector<enum common_sampler_type> samplers = {
         COMMON_SAMPLER_TYPE_PENALTIES,
         COMMON_SAMPLER_TYPE_DRY,

@@ -243,6 +243,9 @@ struct common_sampler * common_sampler_init(const struct llama_model * model, co
                 params.logit_bias.data()));
 
     if (params.mirostat == 0) {
+        // if this flag is set, we will not need to add `dist` at the end of the sampler chain
+        bool has_distribution_sampler = false;
+
         for (const auto & cnstr : params.samplers) {
             switch (cnstr) {
                 case COMMON_SAMPLER_TYPE_DRY:
@@ -253,7 +256,7 @@ struct common_sampler * common_sampler_init(const struct llama_model * model, co
                             c_breakers.push_back(str.c_str());
                         }
 
-                        llama_sampler_chain_add(result->chain, llama_sampler_init_dry      (vocab, llama_model_n_ctx_train(model), params.dry_multiplier, params.dry_base, params.dry_allowed_length, params.dry_penalty_last_n, c_breakers.data(), c_breakers.size()));
+                        llama_sampler_chain_add(result->chain, llama_sampler_init_dry     (vocab, llama_model_n_ctx_train(model), params.dry_multiplier, params.dry_base, params.dry_allowed_length, params.dry_penalty_last_n, c_breakers.data(), c_breakers.size()));
                     }
                     break;
                 case COMMON_SAMPLER_TYPE_TOP_K:
@@ -283,11 +286,18 @@ struct common_sampler * common_sampler_init(const struct llama_model * model, co
                 case COMMON_SAMPLER_TYPE_PENALTIES:
                     llama_sampler_chain_add(result->chain, llama_sampler_init_penalties   (params.penalty_last_n, params.penalty_repeat, params.penalty_freq, params.penalty_present));
                     break;
+                case COMMON_SAMPLER_TYPE_POWER_LAW:
+                    llama_sampler_chain_add(result->chain, llama_sampler_init_power_law   (params.power_law_target, params.power_law_window_size, params.seed));
+                    has_distribution_sampler = true;
+                    break;
                 default:
                     GGML_ASSERT(false && "unknown sampler type");
             }
         }
-        llama_sampler_chain_add(result->chain, llama_sampler_init_dist(params.seed));
+        // only add `dist` to the end of the chain if no other distribution samplers were added
+        if (!has_distribution_sampler) {
+            llama_sampler_chain_add(result->chain, llama_sampler_init_dist(params.seed));
+        }
     } else if (params.mirostat == 1) {
         llama_sampler_chain_add(result->chain, llama_sampler_init_temp(params.temp));
         llama_sampler_chain_add(result->chain, llama_sampler_init_mirostat(llama_vocab_n_tokens(vocab), params.seed, params.mirostat_tau, params.mirostat_eta, 100));
@@ -554,6 +564,7 @@ char common_sampler_type_to_chr(enum common_sampler_type cnstr) {
         case COMMON_SAMPLER_TYPE_XTC:         return 'x';
         case COMMON_SAMPLER_TYPE_INFILL:      return 'i';
         case COMMON_SAMPLER_TYPE_PENALTIES:   return 'e';
+        case COMMON_SAMPLER_TYPE_POWER_LAW:   return 'w';
         default : return '?';
     }
 }
@@ -570,6 +581,7 @@ std::string common_sampler_type_to_str(enum common_sampler_type cnstr) {
         case COMMON_SAMPLER_TYPE_XTC:         return "xtc";
         case COMMON_SAMPLER_TYPE_INFILL:      return "infill";
         case COMMON_SAMPLER_TYPE_PENALTIES:   return "penalties";
+        case COMMON_SAMPLER_TYPE_POWER_LAW:   return "power_law";
         default : return "";
     }
 }
@@ -586,6 +598,7 @@ std::vector<common_sampler_type> common_sampler_types_from_names(const std::vect
         { "xtc",         COMMON_SAMPLER_TYPE_XTC },
         { "infill",      COMMON_SAMPLER_TYPE_INFILL },
         { "penalties",   COMMON_SAMPLER_TYPE_PENALTIES },
+        { "power_law",   COMMON_SAMPLER_TYPE_POWER_LAW },
     };
 
     // since samplers names are written multiple ways
@@ -601,6 +614,7 @@ std::vector<common_sampler_type> common_sampler_types_from_names(const std::vect
         { "typ",         COMMON_SAMPLER_TYPE_TYPICAL_P },
         { "min-p",       COMMON_SAMPLER_TYPE_MIN_P },
         { "temp",        COMMON_SAMPLER_TYPE_TEMPERATURE },
+        { "power-law",   COMMON_SAMPLER_TYPE_POWER_LAW },
     };
 
     std::vector<common_sampler_type> samplers;

diff --git a/include/llama.h b/include/llama.h
@@ -1289,6 +1289,25 @@ extern "C" {
                           const char ** seq_breakers,
                               size_t    num_breakers);
 
+    /// @details power-law sampler - reshapes probability distribution to target specific probability ranges
+    ///
+    /// this sampler is like `greedy`, `dist`, and `mirostat` in that it actually selects a token ID
+    /// rather than just transforming logits. therefore it must always be the last sampler in the
+    /// sampler chain.
+    ///
+    /// it is recommended to only perform minimal truncation before this sampler.
+    ///
+    /// @param target target probability (valid range 0.0 to 1.0; <0 = disabled)
+    /// @param window_size rolling window size for target adaptation (≤0 = fixed target)
+    /// @param seed RNG seed
+    ///
+    /// ref: https://github.com/MrJackSpade/llama.cpp/tree/master (original impl, documentation)
+    /// ref: https://github.com/ggml-org/llama.cpp/pull/17927     (llama.cpp PR)
+    LLAMA_API struct llama_sampler * llama_sampler_init_power_law(
+                               float    target,
+                             int32_t    window_size,
+                            uint32_t    seed);
+
     LLAMA_API struct llama_sampler * llama_sampler_init_logit_bias(
                              int32_t   n_vocab,
                              int32_t   n_logit_bias,

@@ -2313,6 +2313,144 @@ struct llama_sampler * llama_sampler_init_dry_testing(int32_t context_size, floa
     return result;
 }
 
+// power-law
+//
+// this sampler is like `greedy`, `dist`, and `mirostat` in that it actually selects a token ID
+// rather than just transforming logits. therefore it must always be the last sampler in the
+// sampler chain.
+//
+// it is recommended to only perform minimal truncation before this sampler.
+//
+// ref: https://github.com/MrJackSpade/llama.cpp/tree/master (original impl, documentation)
+// ref: https://github.com/ggml-org/llama.cpp/pull/17927     (llama.cpp PR)
+
+struct llama_sampler_power_law {
+    const float    target;
+    const int32_t  window_size;
+
+    const uint32_t     seed;
+    std::mt19937       rng;
+    ring_buffer<float> window;
+};
+
+static const char * llama_sampler_power_law_name(const struct llama_sampler * /*smpl*/) {
+    return "power-law";
+}
+
+static void llama_sampler_power_law_apply(struct llama_sampler * smpl, llama_token_data_array * cur_p) {
+    auto * ctx = (llama_sampler_power_law *) smpl->ctx;
+
+    if (ctx->target < 0.0f) {
+        // no-op: just sample from the distribution as-is
+        llama_sampler_softmax_impl(cur_p, false);
+        const int idx = llama_sample_dist(cur_p, ctx->rng);
+        cur_p->selected = idx;
+        return;
+    }
+
+    // fixed power law transform parameters (from original implementation)
+    const float distribution_width = 0.2f;
+    const float peak_logit_value   = 3.0f;
+    const float tail_heaviness     = 3.0f;
+
+    // compute probabilities to get the "original" values
+    llama_sampler_softmax_impl(cur_p, false);
+
+    // store original probabilities (used for future target adaptation)
+    std::vector<float> original_probs;
+    original_probs.reserve(cur_p->size);
+    for (size_t i = 0; i < cur_p->size; ++i) {
+        original_probs.push_back(cur_p->data[i].p);
+    }
+
+    //
+    // calculate adaptive target
+    //
+
+    const float min_target = 0.0f;
+    const float max_target = 1.0f;
+
+    float computed_target = ctx->target;
+    if (ctx->window.size() > 0) {
+        float sum_excluding_oldest = 0.0f;
+        size_t sz = ctx->window.size();
+
+        // sum all except the oldest element
+        for (size_t i = 0; i < sz - 1; ++i) {
+            sum_excluding_oldest += ctx->window.rat(i);
+        }
+
+        float next_value = (ctx->target * ctx->window_size) - sum_excluding_oldest;
+        computed_target = std::max(min_target, std::min(next_value, max_target));
+    }
+
+    //
+    // power law transform
+    //
+
+    for (size_t i = 0; i < cur_p->size; ++i) {
+        float p = cur_p->data[i].p;
+        float normalized_distance = std::abs(p - computed_target) / distribution_width;
+        cur_p->data[i].logit = peak_logit_value / (1.0f + std::pow(normalized_distance, tail_heaviness));
+    }
+
+    llama_sampler_softmax_impl(cur_p, false);
+
+    // sample from the transformed distribution
+    const int idx = llama_sample_dist(cur_p, ctx->rng);
+    cur_p->selected = idx;
+
+    // add the ORIGINAL probability to the rolling window
+    ctx->window.push_back(original_probs[idx]);
+}
+
+static void llama_sampler_power_law_reset(struct llama_sampler * smpl) {
+    auto * ctx  = (llama_sampler_power_law *) smpl->ctx;
+    ctx->window = ring_buffer<float>(ctx->window_size);
+}
+
+static struct llama_sampler * llama_sampler_power_law_clone(const struct llama_sampler * smpl) {
+    const auto * ctx  = (const llama_sampler_power_law *) smpl->ctx;
+    auto * result     = llama_sampler_init_power_law(ctx->target, ctx->window_size, ctx->seed);
+    auto * result_ctx = (llama_sampler_power_law *) result->ctx;
+
 static void llama_sampler_dist_reset(struct llama_sampler * smpl) { 
     auto * ctx = (llama_sampler_dist *) smpl->ctx; 
     ctx->seed_cur = get_rng_seed(ctx->seed); 
     ctx->rng.seed(ctx->seed_cur); 
 } 
  
 static void llama_sampler_dist_reset(struct llama_sampler * smpl) { 
     auto * ctx = (llama_sampler_dist *) smpl->ctx; 
     ctx->seed_cur = get_rng_seed(ctx->seed); 
     ctx->rng.seed(ctx->seed_cur); 
 } 
  
+    result_ctx->rng     = ctx->rng;
+    result_ctx->window = ctx->window;
+
+    return result;
+}
+
+static void llama_sampler_power_law_free(struct llama_sampler * smpl) {
+    delete (llama_sampler_power_law *) smpl->ctx;
+}
+
+static struct llama_sampler_i llama_sampler_power_law_i = {
+    /* .name   = */ llama_sampler_power_law_name,
+    /* .accept = */ nullptr,
+    /* .apply  = */ llama_sampler_power_law_apply,
+    /* .reset  = */ llama_sampler_power_law_reset,
+    /* .clone  = */ llama_sampler_power_law_clone,
+    /* .free   = */ llama_sampler_power_law_free,
+};
+
+struct llama_sampler * llama_sampler_init_power_law(
+    float    target,
+    int32_t  window_size,
+    uint32_t seed
+) {
+    auto seed_cur = get_rng_seed(seed);
+    return llama_sampler_init(
+        /* .iface = */ &llama_sampler_power_law_i,
+        /* .ctx   = */ new llama_sampler_power_law {
+            /* .target       = */ target,
+            /* .window_size  = */ window_size,
+            /* .seed         = */ seed_cur,
+            /* .rng          = */ std::mt19937(seed_cur),
+            /* .window       = */ ring_buffer<float>(window_size),
+        }
+    );
+}
+
 // logit-bias
 
 struct llama_sampler_logit_bias {