@@ -205,8 +205,6 @@ def __init__(
205205 model_path : str ,
206206 # NOTE: These parameters are likely to change in the future.
207207 n_ctx : int = 512 ,
208- rope_freq_base : float = 10000.0 ,
209- rope_freq_scale : float = 1.0 ,
210208 n_parts : int = - 1 ,
211209 n_gpu_layers : int = 0 ,
212210 seed : int = 1337 ,
@@ -223,15 +221,15 @@ def __init__(
223221 lora_path : Optional [str ] = None ,
224222 low_vram : bool = False ,
225223 tensor_split : Optional [List [float ]] = None ,
224+ rope_freq_base : float = 10000.0 ,
225+ rope_freq_scale : float = 1.0 ,
226226 verbose : bool = True ,
227227 ):
228228 """Load a llama.cpp model from `model_path`.
229229
230230 Args:
231231 model_path: Path to the model.
232232 n_ctx: Maximum context size.
233- rope_freq_base: RoPE base frequency.
234- rope_freq_scale: RoPE frequency scale.
235233 n_parts: Number of parts to split the model into. If -1, the number of parts is automatically determined.
236234 seed: Random seed. -1 for random.
237235 f16_kv: Use half-precision for key/value cache.
@@ -246,6 +244,8 @@ def __init__(
246244 lora_base: Optional path to base model, useful if using a quantized base model and you want to apply LoRA to an f16 model.
247245 lora_path: Path to a LoRA file to apply to the model.
248246 tensor_split: List of floats to split the model across multiple GPUs. If None, the model is not split.
247+ rope_freq_base: Base frequency for rope sampling.
248+ rope_freq_scale: Scale factor for rope sampling.
249249 verbose: Print verbose output to stderr.
250250
251251 Raises:
@@ -260,8 +260,6 @@ def __init__(
260260
261261 self .params = llama_cpp .llama_context_default_params ()
262262 self .params .n_ctx = n_ctx
263- self .params .rope_freq_base = rope_freq_base
264- self .params .rope_freq_scale = rope_freq_scale
265263 self .params .n_gpu_layers = n_gpu_layers
266264 self .params .seed = seed
267265 self .params .f16_kv = f16_kv
@@ -281,6 +279,9 @@ def __init__(
281279 self ._c_tensor_split = FloatArray (* tensor_split ) # keep a reference to the array so it is not gc'd
282280 self .params .tensor_split = self ._c_tensor_split
283281
282+ self .params .rope_freq_base = rope_freq_base
283+ self .params .rope_freq_scale = rope_freq_scale
284+
284285 self .last_n_tokens_size = last_n_tokens_size
285286 self .n_batch = min (n_ctx , n_batch )
286287
0 commit comments