Skip to content

Commit 4e5b83b

Browse files
GGUF: check that tensor size is representable (ggml-org#19072)
1 parent bb02f74 commit 4e5b83b

File tree

2 files changed

+23
-4
lines changed

2 files changed

+23
-4
lines changed

ggml/src/gguf.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -585,6 +585,14 @@ struct gguf_context * gguf_init_from_file_impl(FILE * file, struct gguf_init_par
585585
break;
586586
}
587587

588+
// check that the size of the tensor in bytes is representable
589+
if (ok && uint64_t(ggml_nelements(&info.t)/ggml_blck_size(info.t.type)) > SIZE_MAX/ggml_type_size(info.t.type)) {
590+
GGML_LOG_ERROR("%s: tensor '%s' with shape (%" PRIi64 ", %" PRIi64 ", %" PRIi64 ", %" PRIi64 ") has a size in bytes > %zu\n",
591+
__func__, info.t.name, info.t.ne[0], info.t.ne[1], info.t.ne[2], info.t.ne[3], SIZE_MAX);
592+
ok = false;
593+
break;
594+
}
595+
588596
// calculate byte offsets given the tensor shape and type
589597
info.t.nb[0] = type_size;
590598
info.t.nb[1] = info.t.nb[0]*(info.t.ne[0]/blck_size);

tests/test-gguf.cpp

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,11 @@
11
#include "ggml.h"
22
#include "ggml-backend.h"
33
#include "../ggml/src/ggml-impl.h"
4+
#include "gguf.h"
45

56
#include <algorithm>
67
#include <array>
8+
#include <cmath>
79
#include <cstdint>
810
#include <cstdio>
911
#include <random>
@@ -34,6 +36,7 @@ enum handcrafted_file_type {
3436
HANDCRAFTED_TENSORS_BAD_N_DIMS = 20 + offset_has_tensors,
3537
HANDCRAFTED_TENSORS_BAD_SHAPE = 30 + offset_has_tensors,
3638
HANDCRAFTED_TENSORS_NE_TOO_BIG = 40 + offset_has_tensors,
39+
HANDCRAFTED_TENSORS_NBYTES_TOO_BIG = 45 + offset_has_tensors,
3740
HANDCRAFTED_TENSORS_BAD_TYPE = 50 + offset_has_tensors,
3841
HANDCRAFTED_TENSORS_BAD_OFFSET = 60 + offset_has_tensors,
3942
HANDCRAFTED_TENSORS_DUPLICATE_NAME = 70 + offset_has_tensors,
@@ -69,6 +72,7 @@ static std::string handcrafted_file_type_name(const enum handcrafted_file_type h
6972
case HANDCRAFTED_TENSORS_BAD_N_DIMS: return "TENSORS_BAD_N_DIMS";
7073
case HANDCRAFTED_TENSORS_BAD_SHAPE: return "TENSORS_BAD_SHAPE";
7174
case HANDCRAFTED_TENSORS_NE_TOO_BIG: return "TENSORS_NE_TOO_BIG";
75+
case HANDCRAFTED_TENSORS_NBYTES_TOO_BIG: return "TENSORS_NBYTES_TOO_BIG";
7276
case HANDCRAFTED_TENSORS_BAD_TYPE: return "TENSORS_BAD_TYPE";
7377
case HANDCRAFTED_TENSORS_BAD_OFFSET: return "TENSORS_BAD_OFFSET";
7478
case HANDCRAFTED_TENSORS_DUPLICATE_NAME: return "TENSORS_DUPLICATE_NAME";
@@ -326,7 +330,7 @@ static FILE * get_handcrafted_file(const unsigned int seed, const enum handcraft
326330

327331
uint64_t offset = 0;
328332
for (int i = 0; i < int(tensor_configs.size()); ++i) {
329-
const ggml_type type = tensor_configs[i].first;
333+
const ggml_type type = hft == HANDCRAFTED_TENSORS_NBYTES_TOO_BIG ? GGML_TYPE_I64 : tensor_configs[i].first;
330334
const std::array<int64_t, GGML_MAX_DIMS> shape = tensor_configs[i].second;
331335

332336
std::string name = "my_tensor";
@@ -343,7 +347,7 @@ static FILE * get_handcrafted_file(const unsigned int seed, const enum handcraft
343347
}
344348
helper_write(file, name.data(), name.length());
345349

346-
uint32_t n_dims = hft == HANDCRAFTED_TENSORS_NE_TOO_BIG ? 2 : 1;
350+
uint32_t n_dims = (hft == HANDCRAFTED_TENSORS_NE_TOO_BIG || hft == HANDCRAFTED_TENSORS_NBYTES_TOO_BIG) ? 2 : 1;
347351
for (int i = GGML_MAX_DIMS-1; i >= 1; --i) {
348352
if (shape[i] != 1) {
349353
n_dims = i + 1;
@@ -358,13 +362,19 @@ static FILE * get_handcrafted_file(const unsigned int seed, const enum handcraft
358362
}
359363

360364
if (hft == HANDCRAFTED_TENSORS_BAD_SHAPE) {
365+
const int64_t bad_dim = -1;
361366
for (uint32_t j = 0; j < n_dims; ++j) {
362-
const int64_t bad_dim = -1;
363367
helper_write(file, bad_dim);
364368
}
365369
} else if (hft == HANDCRAFTED_TENSORS_NE_TOO_BIG){
370+
const int64_t big_dim = 4*int64_t(INT32_MAX);
371+
for (uint32_t j = 0; j < n_dims; ++j) {
372+
helper_write(file, big_dim);
373+
}
374+
} else if (hft == HANDCRAFTED_TENSORS_NBYTES_TOO_BIG){
375+
const size_t big_ne = SIZE_MAX/ggml_type_size(type);
376+
const int64_t big_dim = GGML_PAD(int64_t(1.01f*std::pow(big_ne, 1.0f/n_dims)) + 1, ggml_blck_size(type));
366377
for (uint32_t j = 0; j < n_dims; ++j) {
367-
const int64_t big_dim = 4*int64_t(INT32_MAX);
368378
helper_write(file, big_dim);
369379
}
370380
} else {
@@ -682,6 +692,7 @@ static std::pair<int, int> test_handcrafted_file(const unsigned int seed) {
682692
HANDCRAFTED_TENSORS_BAD_N_DIMS,
683693
HANDCRAFTED_TENSORS_BAD_SHAPE,
684694
HANDCRAFTED_TENSORS_NE_TOO_BIG,
695+
HANDCRAFTED_TENSORS_NBYTES_TOO_BIG,
685696
HANDCRAFTED_TENSORS_BAD_TYPE,
686697
HANDCRAFTED_TENSORS_BAD_OFFSET,
687698
HANDCRAFTED_TENSORS_DUPLICATE_NAME,

0 commit comments

Comments
 (0)