From d9d980c7602417d7524bbd4e52ac2cc6f5be6f82 Mon Sep 17 00:00:00 2001 From: Michael Yang Date: Thu, 12 Jun 2025 10:57:17 -0700 Subject: [PATCH] lazy gguf arrays --- fs/gguf/gguf.go | 73 ++++++++++++++++++++++++++++++++------------- fs/gguf/keyvalue.go | 26 ++++++++++++++++ fs/gguf/reader.go | 6 ++++ 3 files changed, 85 insertions(+), 20 deletions(-) diff --git a/fs/gguf/gguf.go b/fs/gguf/gguf.go index bbb9bb410..aba316e2c 100644 --- a/fs/gguf/gguf.go +++ b/fs/gguf/gguf.go @@ -186,20 +186,20 @@ func read[T any](f *File) (t T, err error) { } func readString(f *File) (string, error) { - n, err := read[uint64](f) - if err != nil { + bts := f.bts[:8] + if _, err := io.ReadFull(f.reader, bts); err != nil { return "", err } + n := binary.LittleEndian.Uint64(bts) if int(n) > len(f.bts) { f.bts = make([]byte, n) } - bts := f.bts[:n] + bts = f.bts[:n] if _, err := io.ReadFull(f.reader, bts); err != nil { return "", err } - defer clear(bts) return string(bts), nil } @@ -245,32 +245,65 @@ func readArray(f *File) (any, error) { } } -func readArrayData[T any](f *File, n uint64) (s []T, err error) { - s = make([]T, n) - for i := range n { - e, err := read[T](f) - if err != nil { - return nil, err - } +func readArrayData[T any](f *File, n uint64) (*lazy[T], error) { + offset := f.reader.offset - s[i] = e + var t T + if _, err := f.reader.Discard(int(n) * binary.Size(t)); err != nil { + return nil, err } - return s, nil + sr := io.NewSectionReader(f.file, offset, int64(int(n)*binary.Size(t))) + next, stop := iter.Pull(func(yield func(T) bool) { + s := make([]T, n) + if err := binary.Read(sr, binary.LittleEndian, &s); err != nil { + return + } + + for _, e := range s { + if !yield(e) { + return + } + } + }) + + return &lazy[T]{count: n, next: next, stop: stop}, nil } -func readArrayString(f *File, n uint64) (s []string, err error) { - s = make([]string, n) - for i := range n { - e, err := readString(f) - if err != nil { +func readArrayString(f *File, n uint64) (*lazy[string], error) { + offset := f.reader.offset + + var size int64 + for range n { + bts := f.bts[:8] + if _, err := io.ReadFull(f.reader, bts); err != nil { return nil, err } - s[i] = e + n := int(binary.LittleEndian.Uint64(bts)) + if _, err := f.reader.Discard(n); err != nil { + return nil, err + } + + size += 8 + int64(n) } - return s, nil + sr := io.NewSectionReader(f.file, offset, size) + next, stop := iter.Pull(func(yield func(string) bool) { + f := File{reader: newBufferedReader(sr, 16<<10), bts: make([]byte, 4096)} + for range n { + s, err := readString(&f) + if err != nil { + return + } + + if !yield(s) { + return + } + } + }) + + return &lazy[string]{count: n, next: next, stop: stop}, nil } func (f *File) Close() error { diff --git a/fs/gguf/keyvalue.go b/fs/gguf/keyvalue.go index 5843326c1..8cbbee133 100644 --- a/fs/gguf/keyvalue.go +++ b/fs/gguf/keyvalue.go @@ -1,6 +1,8 @@ package gguf import ( + "iter" + "log/slog" "reflect" "slices" ) @@ -28,6 +30,26 @@ func value[T any](v Value, kinds ...reflect.Kind) (t T) { func values[T any](v Value, kinds ...reflect.Kind) (ts []T) { switch vv := reflect.ValueOf(v.value); vv.Kind() { + case reflect.Ptr: + out := vv.MethodByName("Values").Call(nil) + if len(out) > 0 && out[0].IsValid() { + next, stop := iter.Pull(out[0].Seq()) + defer stop() + + ts = make([]T, vv.Elem().FieldByName("count").Uint()) + for i := range ts { + t, ok := next() + if !ok { + slog.Error("error reading value", "index", i) + return nil + } + + ts[i] = t.Convert(reflect.TypeOf(ts[i])).Interface().(T) + } + + return ts + } + case reflect.Slice: if slices.Contains(kinds, vv.Type().Elem().Kind()) { ts = make([]T, vv.Len()) @@ -39,6 +61,10 @@ func values[T any](v Value, kinds ...reflect.Kind) (ts []T) { return } +func (v Value) Any() any { + return v.value +} + // Int returns Value as a signed integer. If it is not a signed integer, it returns 0. func (v Value) Int() int64 { return value[int64](v, reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64) diff --git a/fs/gguf/reader.go b/fs/gguf/reader.go index 0bd761840..05d1989f6 100644 --- a/fs/gguf/reader.go +++ b/fs/gguf/reader.go @@ -21,3 +21,9 @@ func (rs *bufferedReader) Read(p []byte) (n int, err error) { rs.offset += int64(n) return n, err } + +func (rs *bufferedReader) Discard(n int) (discarded int, err error) { + discarded, err = rs.Reader.Discard(n) + rs.offset += int64(discarded) + return discarded, err +}