Skip to content

Commit e9fd8dc

Browse files
llama-fit-params: keep explicit --ctx-size 0 (ggml-org#19070)
1 parent 4e5b83b commit e9fd8dc

File tree

4 files changed

+12
-3
lines changed

4 files changed

+12
-3
lines changed

common/arg.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1231,6 +1231,10 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
12311231
string_format("size of the prompt context (default: %d, 0 = loaded from model)", params.n_ctx),
12321232
[](common_params & params, int value) {
12331233
params.n_ctx = value;
1234+
if (value == 0) {
1235+
// disable context reduction in llama_params_fit if the user explicitly requests the full context size:
1236+
params.fit_params_min_ctx = UINT32_MAX;
1237+
}
12341238
}
12351239
).set_env("LLAMA_ARG_CTX_SIZE"));
12361240
add_opt(common_arg(

include/llama.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -489,6 +489,7 @@ extern "C" {
489489
// - returns true if the parameters could be successfully modified to fit device memory
490490
// - this function is NOT thread safe because it modifies the global llama logger state
491491
// - only parameters that have the same value as in llama_default_model_params are modified
492+
// with the exception of the context size which is modified if and only if equal to 0
492493
LLAMA_API enum llama_params_fit_status llama_params_fit(
493494
const char * path_model,
494495
struct llama_model_params * mparams,

src/llama.cpp

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -311,8 +311,12 @@ static void llama_params_fit_impl(
311311
__func__, hp_nct, cparams->n_ctx, memory_reduction/MiB);
312312
}
313313
} else {
314-
LLAMA_LOG_INFO("%s: default model context size is %" PRIu32 " which is <= the min. context size of %" PRIu32 " -> no change\n",
315-
__func__, hp_nct, n_ctx_min);
314+
if (n_ctx_min == UINT32_MAX) {
315+
LLAMA_LOG_INFO("%s: user has requested full context size of %" PRIu32 " -> no change\n", __func__, hp_nct);
316+
} else {
317+
LLAMA_LOG_INFO("%s: default model context size is %" PRIu32 " which is <= the min. context size of %" PRIu32 " -> no change\n",
318+
__func__, hp_nct, n_ctx_min);
319+
}
316320
}
317321
} else {
318322
LLAMA_LOG_INFO("%s: context size set by user to %" PRIu32 " -> no change\n", __func__, cparams->n_ctx);

tools/fit-params/fit-params.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ int main(int argc, char ** argv) {
3636

3737
LOG_INF("%s: printing fitted CLI arguments to stdout...\n", __func__);
3838
common_log_flush(common_log_main());
39-
printf("-c %" PRIu32 " -ngl %" PRIu32, cparams.n_ctx, mparams.n_gpu_layers);
39+
printf("-c %" PRIu32 " -ngl %" PRIi32, cparams.n_ctx, mparams.n_gpu_layers);
4040

4141
size_t nd = llama_max_devices();
4242
while (nd > 1 && mparams.tensor_split[nd - 1] == 0.0f) {

0 commit comments

Comments
 (0)