style: cleanup

YdrMaster · YdrMaster · commit 04d450b253f8 · 2024-08-20T04:58:22.000+08:00
Signed-off-by: YdrMaster &lt;ydrml@hotmail.com&gt;
diff --git a/models/llama/common-cpu/src/lib.rs b/models/llama/common-cpu/src/lib.rs
@@ -4,7 +4,10 @@ use common_cpu::{
     tensor::{reslice, slice, udim, Tensor},
     CpuKernels, Kernels, KernelsA, KernelsB, ThisThread,
 };
-use llama::{ComputeConst, ComputeStream, Handle, LayerStorage, QueueOf, SliceOn, Storage, Weight};
+use llama::{
+    ComputeConst, ComputeStream, Handle, InferenceConfig, LayerStorage, QueueOf, SliceOn, Storage,
+    Weight,
+};
 use std::{iter::repeat, ops::Deref, path::Path, slice::from_raw_parts};
 
 pub struct Transformer {
@@ -133,12 +136,10 @@ impl CausalLM for Transformer {
     }
     #[inline]
     fn duplicate_cache(&self, cache: &Tensor<Self::Storage>, pos: upos) -> Tensor<Self::Storage> {
-        self.s
-            .config
-            .duplicate_cache(cache, pos, Blob::new, |dst, src| {
-                src.map_physical(|u| &**u)
-                    .reform_to(&mut dst.map_physical(|u| &mut **u))
-            })
+        InferenceConfig::duplicate_cache(cache, pos, Blob::new, |dst, src| {
+            src.map_physical(|u| &**u)
+                .reform_to(&mut dst.map_physical(|u| &mut **u))
+        })
     }
 
     fn token_embed(&self, queries: impl IntoIterator<Item = utok>) -> Tensor<Self::Storage> {
diff --git a/models/llama/common/src/lib.rs b/models/llama/common/src/lib.rs
@@ -84,7 +84,6 @@ impl InferenceConfig {
     }
 
     pub fn duplicate_cache<S>(
-        &self,
         cache: &Tensor<S>,
         pos: upos,
         malloc: impl FnOnce(usize) -> S,
diff --git a/models/llama/nvidia-gpu-distributed/src/lib.rs b/models/llama/nvidia-gpu-distributed/src/lib.rs
@@ -143,7 +143,7 @@ impl CausalLM for Transformer {
 
     fn duplicate_cache(&self, cache: &Tensor<Self::Storage>, pos: upos) -> Tensor<Self::Storage> {
         let contexts = Arc::new(self.comms.contexts().collect::<Vec<_>>());
-        self.config.duplicate_cache(
+        InferenceConfig::duplicate_cache(
             cache,
             pos,
             |len| Cache {
diff --git a/models/llama/nvidia-gpu/src/lib.rs b/models/llama/nvidia-gpu/src/lib.rs
@@ -175,7 +175,7 @@ impl CausalLM for Transformer {
     }
 
     fn duplicate_cache(&self, cache: &Tensor<Self::Storage>, pos: upos) -> Tensor<Self::Storage> {
-        self.0.config.duplicate_cache(
+        InferenceConfig::duplicate_cache(
             cache,
             pos,
             |len| self.cache(len),
diff --git a/tensor/src/compatibility.rs b/tensor/src/compatibility.rs

Original file line number	Diff line number	Diff line change
`@@ -84,7 +84,6 @@ impl InferenceConfig {`
`84`	`84`	`}`
`85`	`85`
`86`	`86`	`pub fn duplicate_cache<S>(`
`87`		`- &self,`
`88`	`87`	`cache: &Tensor<S>,`
`89`	`88`	`pos: upos,`
`90`	`89`	`malloc: impl FnOnce(usize) -> S,`
Original file line number	Diff line number	Diff line change
`@@ -175,7 +175,7 @@ impl CausalLM for Transformer {`
`175`	`175`	`}`
`176`	`176`
`177`	`177`	`fn duplicate_cache(&self, cache: &Tensor<Self::Storage>, pos: upos) -> Tensor<Self::Storage> {`
`178`		`- self.0.config.duplicate_cache(`
	`178`	`+ InferenceConfig::duplicate_cache(`
`179`	`179`	`cache,`
`180`	`180`	`pos,`
`181`	`181`	`\|len\| self.cache(len),`