Compare commits

..

13 Commits

Author SHA1 Message Date
Matt Williams
9dd88dc040 Fixes for Bruces comments
Signed-off-by: Matt Williams <m@technovangelist.com>
2023-11-06 14:16:24 -08:00
Matt Williams
3d8872bbbd update as per jmorganca comments
Signed-off-by: Matt Williams <m@technovangelist.com>
2023-11-06 08:51:15 -08:00
Matt Williams
a1c8974975 also try other models
Signed-off-by: Matt Williams <m@technovangelist.com>
2023-11-05 16:13:48 -08:00
Matt Williams
1aaaaa76a0 add examples
Signed-off-by: Matt Williams <m@technovangelist.com>
2023-11-05 15:56:00 -08:00
Matt Williams
9411399cb4 Add new example for self querying retrieval
Signed-off-by: Matt Williams <m@technovangelist.com>
2023-11-05 15:53:24 -08:00
Matt Williams
ad88799411 Merge pull request #949 from jmorganca/matt/fixPrivateGPT
fix: private gpt example was broken due to changes in chroma
2023-10-30 17:17:00 -07:00
Bruce MacDonald
0818b5e318 readline windows terminal support (#950)
- update the readline package to have basic support on windows, this is not full feature parity with the unix cli yet
2023-10-30 16:18:12 -04:00
Matt Williams
1df6100c77 Update examples/langchain-python-rag-privategpt/privateGPT.py
Co-authored-by: Bruce MacDonald <brucewmacdonald@gmail.com>
2023-10-30 12:48:17 -07:00
Matt Williams
5c48fe1fb0 Update examples/langchain-python-rag-privategpt/constants.py
Co-authored-by: Bruce MacDonald <brucewmacdonald@gmail.com>
2023-10-30 12:47:56 -07:00
Dirk Loss
874bb31986 Fix conversion command for gptneox (#948) 2023-10-30 14:34:29 -04:00
Matt Williams
f7856a57eb fix: private gpt example was broken due to changes in chroma
Signed-off-by: Matt Williams <m@technovangelist.com>
2023-10-30 10:56:25 -07:00
Bruce MacDonald
f9a4281124 clean up: remove server functions from client (#937) 2023-10-30 11:10:18 -04:00
Jeffrey Morgan
8d03bd7b54 remove +build directive in term.go 2023-10-28 09:56:03 -07:00
19 changed files with 2309 additions and 2037 deletions

View File

@@ -11,6 +11,7 @@ import (
"io"
"log"
"net"
"net/http"
"os"
"os/exec"
"os/signal"
@@ -98,22 +99,21 @@ func RunHandler(cmd *cobra.Command, args []string) error {
return err
}
models, err := client.List(context.Background())
name := args[0]
// check if the model exists on the server
_, err = client.Show(context.Background(), &api.ShowRequest{Name: name})
if err != nil {
return err
}
canonicalModelPath := server.ParseModelPath(args[0])
for _, model := range models.Models {
if model.Name == canonicalModelPath.GetShortTagname() {
return RunGenerate(cmd, args)
var statusError api.StatusError
switch {
case errors.As(err, &statusError) && statusError.StatusCode == http.StatusNotFound:
if err := PullHandler(cmd, args); err != nil {
return err
}
case err != nil:
return err
}
}
if err := PullHandler(cmd, args); err != nil {
return err
}
return RunGenerate(cmd, args)
}
@@ -731,21 +731,6 @@ func RunServer(cmd *cobra.Command, _ []string) error {
origins = strings.Split(o, ",")
}
if noprune := os.Getenv("OLLAMA_NOPRUNE"); noprune == "" {
if err := server.PruneLayers(); err != nil {
return err
}
manifestsPath, err := server.GetManifestPath()
if err != nil {
return err
}
if err := server.PruneDirectory(manifestsPath); err != nil {
return err
}
}
return server.Serve(ln, origins)
}

View File

@@ -185,7 +185,7 @@ python convert.py <path to model directory>
python convert-falcon-hf-to-gguf.py <path to model directory>
# GPTNeoXForCausalLM
python convert-falcon-hf-to-gguf.py <path to model directory>
python convert-gptneox-hf-to-gguf.py <path to model directory>
# GPTBigCodeForCausalLM
python convert-starcoder-hf-to-gguf.py <path to model directory>

View File

@@ -6,7 +6,6 @@ PERSIST_DIRECTORY = os.environ.get('PERSIST_DIRECTORY', 'db')
# Define the Chroma settings
CHROMA_SETTINGS = Settings(
chroma_db_impl='duckdb+parquet',
persist_directory=PERSIST_DIRECTORY,
anonymized_telemetry=False
)

View File

@@ -150,7 +150,7 @@ def main():
print("Creating new vectorstore")
texts = process_documents()
print(f"Creating embeddings. May take some minutes...")
db = Chroma.from_documents(texts, embeddings, persist_directory=persist_directory, client_settings=CHROMA_SETTINGS)
db = Chroma.from_documents(texts, embeddings, persist_directory=persist_directory)
db.persist()
db = None

View File

@@ -4,6 +4,7 @@ from langchain.embeddings import HuggingFaceEmbeddings
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
from langchain.vectorstores import Chroma
from langchain.llms import Ollama
import chromadb
import os
import argparse
import time
@@ -22,7 +23,9 @@ def main():
# Parse the command line arguments
args = parse_arguments()
embeddings = HuggingFaceEmbeddings(model_name=embeddings_model_name)
db = Chroma(persist_directory=persist_directory, embedding_function=embeddings, client_settings=CHROMA_SETTINGS)
db = Chroma(persist_directory=persist_directory, embedding_function=embeddings)
retriever = db.as_retriever(search_kwargs={"k": target_source_chunks})
# activate/deactivate the streaming StdOut callback for LLMs
callbacks = [] if args.mute_stream else [StreamingStdOutCallbackHandler()]

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,2 @@
node_modules
artcollection

View File

@@ -0,0 +1,73 @@
import { Chroma } from "langchain/vectorstores/chroma";
import { ChromaTranslator } from "langchain/retrievers/self_query/chroma";
import { Ollama } from "langchain/llms/ollama"
import { AttributeInfo } from "langchain/schema/query_constructor";
import { HuggingFaceTransformersEmbeddings } from "langchain/embeddings/hf_transformers";
import { SelfQueryRetriever } from "langchain/retrievers/self_query";
const modelName = "codellama";
// Define the attributes of the schema so that the model will know what to look for
const attributeInfo: AttributeInfo[] = [
{
name: "title",
type: "string",
description: "The title of the painting"
},
{
name: "date",
type: "integer",
description: "The four digit year when the painting was created"
},
{
name: "artistName",
type: "string",
description: "The first name and last name of the artist who created the painting. Always use the full name in the filter, even if it isn't included. If the query is 'van Gogh', the filter should be 'Vincent van Gogh'. Use Pierre-Auguste Renoir instead of just Renoir."
}
]
// Define the model used to generate embeddings, these capture the context of the input data
const embeddings = new HuggingFaceTransformersEmbeddings({
modelName: "Xenova/all-MiniLM-L6-v2",
});
// Run the model using Ollama
const llm = new Ollama({
model: modelName
})
const documentContents = "Description of the art";
const findArt = async () => {
// Load the saved vector store
const vectorStore = await Chroma.fromExistingCollection(embeddings, {
collectionName: "artcollection",
});
const retriever = SelfQueryRetriever.fromLLM({
llm, vectorStore, documentContents, attributeInfo, verbose: false, useOriginalQuery: true, structuredQueryTranslator: new ChromaTranslator()
});
// Get the query from the command line
const query = process.argv[2];
try {
const newquery = await retriever.getRelevantDocuments(query, [
// You can add callbacks to the retriever to get information about the process. In this case, show the output
// query from the LLM used to retrieve the documents
{
handleLLMEnd(output) {
console.log("This is the output from the LLM after it has come up with a filter")
const llmEndOutput = output.generations[0][0].text.replace(/\\"/gm, "'").replace(/\n/gm, "")
console.log(`output - ${JSON.stringify(llmEndOutput, null, 2)}`)
}
},
]);
console.log(newquery);
} catch (error) {
console.log(`There was an error getting the values: ${error}`);
}
}
findArt();

View File

@@ -0,0 +1,128 @@
import { Artwork, RawArtwork } from './types';
import { HuggingFaceTransformersEmbeddings } from 'langchain/embeddings/hf_transformers';
import { Chroma } from "langchain/vectorstores/chroma";
import { Document } from "langchain/document";
import { ChromaClient } from "chromadb";
const numberOfArtworks = 10;
// list of artists we are going to pull from the API
const artists = ["van Gogh", "Renoir", "Monet", "Picasso"]
const generateSource = async () => {
// Delete the existing vector store so that we don't get duplicate documents
await new ChromaClient().deleteCollection({
name: "artcollection",
});
const allartworkdocs = await getArt(artists);
// Create the vector store
const vectorStore = await Chroma.fromDocuments(allartworkdocs, embedding, { collectionName: "artcollection" });
console.log(`Created vector store with ${await vectorStore.collection?.count()} documents`);
}
const getArt = async (artists: string[]) => {
const artworks: Artwork[] = [];
const artistsWorkIds: number[] = []
for (const artist of artists) {
// First get the ids of the works by each artist
const thisIds = await fetchArtistWorkIds(artist);
console.log(`Fetching ${artist}`);
await (new Promise(r => setTimeout(r, 1000)));
artistsWorkIds.push(...thisIds);
};
// now get the actual artwork
const artwork = await fetchArtwork(artistsWorkIds);
return artwork
}
const fetchArtistWorkIds = async (artist: string): Promise<number[]> => {
const artistURL = `https://api.artic.edu/api/v1/artworks/search?q=${artist}&limit=${numberOfArtworks}`;
const response = await fetch(artistURL);
const json = await response.json();
const artistWorks: { id: number }[] = json.data;
return artistWorks.map((work) => work.id);
}
const embedding = new HuggingFaceTransformersEmbeddings({
modelName: "Xenova/all-MiniLM-L6-v2",
});
//Turns out there are some weird characters in the descriptions
const sanitize = (badstring: string): string => {
let goodstring = " ";
if (badstring !== null) {
goodstring = badstring
.replace(/<\s*a\s+[^>]*href\s*=\s*[\"']?([^\"' >]+)[\"' >]>/gm, "")
.replace(/<\/a>/gm, "")
.replace(/<\/?em>/gm, "")
.replace(/[\u2018\u2019]/gm, "")
.replace(/[\u201C\u201D]/gm, "")
.replace(/[\u2013\u2014]/gm, "-")
.replace(/[\u2026]/gm, "...")
.replace(/[\u00A0]/gm, " ")
.replace(/[\u00AD]/gm, "-")
.replace(/[\u00B0]/gm, " degrees ")
.replace(/[\u00B1]/gm, " plus or minus ")
.replace(/[\u00B2]/gm, " squared ")
.replace(/[\u00B3]/gm, " cubed ")
.replace(/[\u00B4]/gm, "'")
.replace(/[\u00B5]/gm, " micro ")
.replace(/[\u00B6]/gm, " paragraph ")
.replace(/[\u00B7]/gm, " dot ")
.replace(/[\u00B8]/gm, ",")
.replace(/[\u00B9]/gm, " first ")
.replace(/[\u00BA]/gm, " degrees ")
.replace(/[\u00BB]/gm, ">>")
.replace(/[\u00BC]/gm, " 1/4 ")
.replace(/[\u00BD]/gm, " 1/2 ")
.replace(/[\uFB01]/gm, "fi")
.replace(/[\uFB02]/gm, "fl")
.replace(/[\uFB03]/gm, "ffi")
.replace(/[\uFB04]/gm, "ffl")
.replace(/[\uFB05]/gm, "ft")
.replace(/[\uFB06\uFB07\uFB08]/gm, "st")
.replace(/[\u00D7]/gm, "x")
.replace(/[\u00E8\u00E9]/gm, "e")
.replace(/[\u00F1]/gm, "n")
.replace(/[\u00F6]/gm, "o")
.replace(/[\u00F8]/gm, "o")
.replace(/[\u00FC]/gm, "u")
.replace(/[\u00FF]/gm, "y")
.replace(/[\u0101\u0103\u00E0]/gm, "a")
.replace(/[\u00C9]/gm, "E")
.replace(/<p>/gm, "")
.replace(/<\/p>/gm, "")
.replace(/\n/gm, "");
};
return goodstring;
}
const fetchArtwork = async (workids: number[]) => {
const docsarray = [];
const artworks: Artwork[] = [];
for await (const workid of workids) {
const artworkURL = `https://api.artic.edu/api/v1/artworks/${workid}`;
const response = await fetch(artworkURL);
const json = await response.json();
const artworkraw: RawArtwork = await json.data as RawArtwork;
const description = sanitize(artworkraw.description)
if (description !== " ") {
const doc = new Document({
pageContent: description,
metadata: {
title: sanitize(artworkraw.title),
date: artworkraw.date_end,
artistName: artworkraw.artist_title,
}
});
docsarray.push(doc);
console.log("------------------")
console.log(`${artworkraw.title} - ${artworkraw.artist_title}`);
}
}
return docsarray;
}
generateSource();

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,20 @@
{
"name": "typescript-selfqueryingretreival",
"version": "1.0.0",
"description": "",
"main": "index.js",
"scripts": {
"test": "echo \"Error: no test specified\" && exit 1"
},
"keywords": [],
"author": "",
"license": "ISC",
"dependencies": {
"@xenova/transformers": "^2.7.0",
"chromadb": "^1.5.11",
"langchain": "^0.0.177",
"ollama-node": "^0.1.24",
"peggy": "^3.0.2",
"sharp": "^0.32.6"
}
}

View File

@@ -0,0 +1,111 @@
# Self Query Retrieval
Filtering your vector database results to get better answers from your LLM.
![sqr 2023-11-05 14_30_50](https://github.com/jmorganca/ollama/assets/633681/55afb7f5-ebd8-4c58-86ba-284594fd1ec8)
## TLDR
1. Install and run ChromaDB
1. Run `git clone https://github.com/chroma-core/chroma.git`
2. `cd chroma`
3. `docker-compose up -d --build`
2. Navigate to this example's directory
3. `npm install`
4. `tsx ./GenerateSource.ts`
5. `tsx ./FindArt.ts "are there any paintings from the artist Pablo Picasso"`
Other questions to try:
- Are there any paintings painted in 1881
- Are there any paintings painted by Vincent van Gogh
Note: If you haven't used `tsx`, it's a more modern alternate to `ts-node` and works especially well when you have libraries that use different module types. You can find it at [https://github.com/esbuild-kit/tsx](https://github.com/esbuild-kit/tsx).
## Introduction
Retrieval Augmented Generation (RAG) is what developers usually reach for when they want to ask questions to all of their notes. But often it doesn't give the results you need. And that's because there is still too much information. And frequently it's the wrong information. When you ask a question, RAG will retrieve a set of documents that it thinks are relevant to the question and then hand them off to the LLM. If you ask "what is a transformer", it may grab excerpts from the Transformers paper you read recently, along with sections of your Intro to Electronics book. Even if you ask a better question, such as "what is a transformer in the context of electrical engineering", it may still grab excerpts from the Transformers paper. And that's because the Transformers paper is a very good match for the question. It's just not the right match.
Ideally, the Transformers paper and the Electronics book would be added to the database with some metadata, such as the topics or keywords. But RAG typically doesn't look at those metadata fields. And that's where Self Query Retrieval comes in. It's a way to use traditional database queries to narrow down the set of documents that RAG will use and thus get better results.
## How it works
There are a few things you need to do to enable Self Query Retrieval. First, there needs to be additional metadata about your content in the database. The examples in the Langchain documentation are based on movies, and the metadata includes the year, the director's name, the genre, etc. And then you need to pass the schema to the query to help it get the right documents.
## The code
There are two main parts to the code. First there is a `GenerateSource.ts` file and then there is a `FindArt.ts` file. Let's look at GenerateSource first.
### GenerateSource
The purpose of Generate Source is to create our data source. For this example, we are using the [Chicago Institute of Art API,](https://api.artic.edu/docs/#introduction) which is incredible. This will be loaded into a vector database, which for this example is ChromaDB.
This could be any CSV file or other data source you have access to. The file would have a single descriptive column and then metadata columns. All the relevant columns from our dataset are being added to a Document object. Then that array of Documents is being loaded into ChromaDB. Finally, at the end, I verify that documents were created by outputting a count to the screen.
```typescript
await new ChromaClient().deleteCollection({
name: "artcollection",
});
const vectorStore = await Chroma.fromDocuments(allartworkdocs,
embedding, { collectionName: "artcollection" });
console.log(`Created vector store with
${await vectorStore.collection?.count()} documents`);
```
### FindArt
To actually find the art, we need to start by loading the database:
```typescript
const vectorStore = await Chroma.fromExistingCollection(embeddings, {
collectionName: "artcollection",
});
```
Now we can create our Self Query Retriever. This needs to be created referring to the LLM, the database, the description of the document and the description of all the attributes in the metadata, and finally a structured query translator which will take the query generated by the LLM and turn it into something useable by the database.
```typescript
const llm = new Ollama({
model: modelName
})
const documentContents = "Description of the art";
const attributeInfo: AttributeInfo[] = [
{
name: "title",
type: "string",
description: "The title of the painting"
},
{
name: "date",
type: "integer",
description: "The four digit year when the painting was created"
},
{
name: "artistName",
type: "string",
description: "The first name and last name of the artist who created the painting. Always use the full name in the filter, even if it isn't included. If the query is 'van Gogh', the filter should be 'Vincent van Gogh'. Use Pierre-Auguste Renoir instead of just Renoir."
}
]
const retriever = SelfQueryRetriever.fromLLM({
llm, vectorStore, documentContents, attributeInfo, verbose: false, useOriginalQuery: true, structuredQueryTranslator: new ChromaTranslator()
});
```
Now we can ask a question and get the results:
```typescript
const newquery = await retriever.getRelevantDocuments(query)
```
## Next Steps
When you run this example, you will get a set of documents from the database that may be a bit more relevant to your question. Now you could feed those to the LLM and get the actual answer to the question based on these documents.
To take this further, you could work on getting more out of the dataset. It turns out that this works best if there is only a single possible value for any given field. Our artists are often referred to by their last name, but sometimes using their full name. It may be Vincent van Gogh, or just van Gogh. Another way to get around this is to build a better query translator that knows that the search could be for a substring of the full name. But that also requires looking into the metadata searching capabilities of the database.
Maybe it makes more sense to move the artist name and title of the work into the document itself. Then add some more metadata (there are at least 100 other attributes in the raw API that aren't used in this example.)
Also try different models. In testing so far, it seems that `codellama` produces more reliably useable filters. It's not perfect and can still create a filter that won't find anything. When a new code model comes out, you might try that to see if it performs better.

View File

@@ -0,0 +1,10 @@
{
"compilerOptions": {
"target": "es2016",
"module": "commonjs", /* Specify what module code is generated. */
"esModuleInterop": true, /* Emit additional JavaScript to ease support for importing CommonJS modules. This enables 'allowSyntheticDefaultImports' for type compatibility. */
"forceConsistentCasingInFileNames": true, /* Ensure that casing is correct in imports. */
"strict": true, /* Enable all strict type-checking options. */
"skipLibCheck": true /* Skip type checking all .d.ts files. */
}
}

View File

@@ -0,0 +1,26 @@
export type RawArtwork = {
id: number;
title: string;
artist_display: string;
place_of_origin: string;
date_start: number;
date_end: number;
duration: number;
dimensions: string;
medium_display: string;
credit_line: string;
artwork_type_title: string;
department_title: string;
artist_title: string;
classification_title: string;
description: string;
}
export type Artwork = {
id: number;
title: string;
country: string;
date: number;
artist: string;
description: string;
}

View File

@@ -2,6 +2,7 @@ package readline
import (
"fmt"
"os"
"github.com/emirpasic/gods/lists/arraylist"
"golang.org/x/term"
@@ -17,7 +18,8 @@ type Buffer struct {
}
func NewBuffer(prompt *Prompt) (*Buffer, error) {
width, height, err := term.GetSize(0)
fd := int(os.Stdout.Fd())
width, height, err := term.GetSize(fd)
if err != nil {
fmt.Println("Error getting size:", err)
return nil, err

View File

@@ -51,11 +51,12 @@ func (i *Instance) Readline() (string, error) {
}
fmt.Print(prompt)
termios, err := SetRawMode(syscall.Stdin)
fd := int(syscall.Stdin)
termios, err := SetRawMode(fd)
if err != nil {
return "", err
}
defer UnsetRawMode(syscall.Stdin, termios)
defer UnsetRawMode(fd, termios)
buf, _ := NewBuffer(i.Prompt)

View File

@@ -1,5 +1,4 @@
//go:build aix || darwin || dragonfly || freebsd || (linux && !appengine) || netbsd || openbsd || os400 || solaris
// +build aix darwin dragonfly freebsd linux,!appengine netbsd openbsd os400 solaris
package readline

62
readline/term_windows.go Normal file
View File

@@ -0,0 +1,62 @@
package readline
import (
"syscall"
"unsafe"
)
const (
enableLineInput = 2
enableWindowInput = 8
enableMouseInput = 16
enableInsertMode = 32
enableQuickEditMode = 64
enableExtendedFlags = 128
enableProcessedOutput = 1
enableWrapAtEolOutput = 2
enableAutoPosition = 256 // Cursor position is not affected by writing data to the console.
enableEchoInput = 4 // Characters are written to the console as they're read.
enableProcessedInput = 1 // Enables input processing (like recognizing Ctrl+C).
)
var kernel32 = syscall.NewLazyDLL("kernel32.dll")
var (
procGetConsoleMode = kernel32.NewProc("GetConsoleMode")
procSetConsoleMode = kernel32.NewProc("SetConsoleMode")
)
type State struct {
mode uint32
}
// IsTerminal checks if the given file descriptor is associated with a terminal
func IsTerminal(fd int) bool {
var st uint32
r, _, e := syscall.SyscallN(procGetConsoleMode.Addr(), uintptr(fd), uintptr(unsafe.Pointer(&st)), 0)
// if the call succeeds and doesn't produce an error, it's a terminal
return r != 0 && e == 0
}
func SetRawMode(fd int) (*State, error) {
var st uint32
// retrieve the current mode of the terminal
_, _, e := syscall.SyscallN(procGetConsoleMode.Addr(), uintptr(fd), uintptr(unsafe.Pointer(&st)), 0)
if e != 0 {
return nil, error(e)
}
// modify the mode to set it to raw
raw := st &^ (enableEchoInput | enableProcessedInput | enableLineInput | enableProcessedOutput)
// apply the new mode to the terminal
_, _, e = syscall.SyscallN(procSetConsoleMode.Addr(), uintptr(fd), uintptr(raw), 0)
if e != 0 {
return nil, error(e)
}
// return the original state so that it can be restored later
return &State{st}, nil
}
func UnsetRawMode(fd int, state *State) error {
_, _, err := syscall.SyscallN(procSetConsoleMode.Addr(), uintptr(fd), uintptr(state.mode), 0)
return err
}

View File

@@ -614,6 +614,22 @@ var defaultAllowOrigins = []string{
}
func Serve(ln net.Listener, allowOrigins []string) error {
if noprune := os.Getenv("OLLAMA_NOPRUNE"); noprune == "" {
// clean up unused layers and manifests
if err := PruneLayers(); err != nil {
return err
}
manifestsPath, err := GetManifestPath()
if err != nil {
return err
}
if err := PruneDirectory(manifestsPath); err != nil {
return err
}
}
config := cors.DefaultConfig()
config.AllowWildcard = true