lazy gguf arrays

This commit is contained in:
Michael Yang 2025-06-12 10:57:17 -07:00
parent 12e13573a8
commit d9d980c760
3 changed files with 85 additions and 20 deletions

View File

@ -186,20 +186,20 @@ func read[T any](f *File) (t T, err error) {
}
func readString(f *File) (string, error) {
n, err := read[uint64](f)
if err != nil {
bts := f.bts[:8]
if _, err := io.ReadFull(f.reader, bts); err != nil {
return "", err
}
n := binary.LittleEndian.Uint64(bts)
if int(n) > len(f.bts) {
f.bts = make([]byte, n)
}
bts := f.bts[:n]
bts = f.bts[:n]
if _, err := io.ReadFull(f.reader, bts); err != nil {
return "", err
}
defer clear(bts)
return string(bts), nil
}
@ -245,32 +245,65 @@ func readArray(f *File) (any, error) {
}
}
func readArrayData[T any](f *File, n uint64) (s []T, err error) {
s = make([]T, n)
for i := range n {
e, err := read[T](f)
if err != nil {
return nil, err
}
func readArrayData[T any](f *File, n uint64) (*lazy[T], error) {
offset := f.reader.offset
s[i] = e
var t T
if _, err := f.reader.Discard(int(n) * binary.Size(t)); err != nil {
return nil, err
}
return s, nil
sr := io.NewSectionReader(f.file, offset, int64(int(n)*binary.Size(t)))
next, stop := iter.Pull(func(yield func(T) bool) {
s := make([]T, n)
if err := binary.Read(sr, binary.LittleEndian, &s); err != nil {
return
}
for _, e := range s {
if !yield(e) {
return
}
}
})
return &lazy[T]{count: n, next: next, stop: stop}, nil
}
func readArrayString(f *File, n uint64) (s []string, err error) {
s = make([]string, n)
for i := range n {
e, err := readString(f)
if err != nil {
func readArrayString(f *File, n uint64) (*lazy[string], error) {
offset := f.reader.offset
var size int64
for range n {
bts := f.bts[:8]
if _, err := io.ReadFull(f.reader, bts); err != nil {
return nil, err
}
s[i] = e
n := int(binary.LittleEndian.Uint64(bts))
if _, err := f.reader.Discard(n); err != nil {
return nil, err
}
size += 8 + int64(n)
}
return s, nil
sr := io.NewSectionReader(f.file, offset, size)
next, stop := iter.Pull(func(yield func(string) bool) {
f := File{reader: newBufferedReader(sr, 16<<10), bts: make([]byte, 4096)}
for range n {
s, err := readString(&f)
if err != nil {
return
}
if !yield(s) {
return
}
}
})
return &lazy[string]{count: n, next: next, stop: stop}, nil
}
func (f *File) Close() error {

View File

@ -1,6 +1,8 @@
package gguf
import (
"iter"
"log/slog"
"reflect"
"slices"
)
@ -28,6 +30,26 @@ func value[T any](v Value, kinds ...reflect.Kind) (t T) {
func values[T any](v Value, kinds ...reflect.Kind) (ts []T) {
switch vv := reflect.ValueOf(v.value); vv.Kind() {
case reflect.Ptr:
out := vv.MethodByName("Values").Call(nil)
if len(out) > 0 && out[0].IsValid() {
next, stop := iter.Pull(out[0].Seq())
defer stop()
ts = make([]T, vv.Elem().FieldByName("count").Uint())
for i := range ts {
t, ok := next()
if !ok {
slog.Error("error reading value", "index", i)
return nil
}
ts[i] = t.Convert(reflect.TypeOf(ts[i])).Interface().(T)
}
return ts
}
case reflect.Slice:
if slices.Contains(kinds, vv.Type().Elem().Kind()) {
ts = make([]T, vv.Len())
@ -39,6 +61,10 @@ func values[T any](v Value, kinds ...reflect.Kind) (ts []T) {
return
}
func (v Value) Any() any {
return v.value
}
// Int returns Value as a signed integer. If it is not a signed integer, it returns 0.
func (v Value) Int() int64 {
return value[int64](v, reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64)

View File

@ -21,3 +21,9 @@ func (rs *bufferedReader) Read(p []byte) (n int, err error) {
rs.offset += int64(n)
return n, err
}
func (rs *bufferedReader) Discard(n int) (discarded int, err error) {
discarded, err = rs.Reader.Discard(n)
rs.offset += int64(discarded)
return discarded, err
}