Skip to content

Commit

Permalink
knowpro: Image Collection persistance (#777)
Browse files Browse the repository at this point in the history
Image Collection Persistence:
- Moved to using 2 files; JSON for data, binary for embeddings. 
- Breaking change: old saved indexes will no longer work.
  • Loading branch information
umeshma authored Mar 4, 2025
1 parent 6bc52ea commit 487d935
Show file tree
Hide file tree
Showing 2 changed files with 80 additions and 33 deletions.
40 changes: 14 additions & 26 deletions ts/examples/chat/src/memory/knowproMemory.ts
Original file line number Diff line number Diff line change
Expand Up @@ -25,13 +25,7 @@ import {
parseFreeAndNamedArguments,
keyValuesFromNamedArgs,
} from "./common.js";
import {
dateTime,
ensureDir,
getFileName,
readJsonFile,
writeJsonFile,
} from "typeagent";
import { dateTime, ensureDir, getFileName } from "typeagent";
import path from "path";
import chalk from "chalk";
import { KnowProPrinter } from "./knowproPrinter.js";
Expand Down Expand Up @@ -310,9 +304,12 @@ export async function createKnowproCommands(
context.printer.writeLine("Saving index");
context.printer.writeLine(namedArgs.filePath);
if (context.images) {
const cData = await context.images.serialize();
await ensureDir(path.dirname(namedArgs.filePath));
await writeJsonFile(namedArgs.filePath, cData);
const dirName = path.dirname(namedArgs.filePath);
await ensureDir(dirName);
await context.images.writeToFile(
dirName,
getFileName(namedArgs.filePath),
);
}
}

Expand All @@ -337,24 +334,15 @@ export async function createKnowproCommands(
context.printer.writeError("No filepath or name provided");
return;
}
if (!fs.existsSync(imagesFilePath)) {
context.printer.writeError(`${imagesFilePath} not found`);
return;
}

const data = await readJsonFile<im.ImageCollectionData>(imagesFilePath);
if (!data) {
context.printer.writeError("Could not load image collection data");
context.images = await im.ImageCollection.readFromFile(
path.dirname(imagesFilePath),
getFileName(imagesFilePath),
);
if (!context.images) {
context.printer.writeLine("ImageCollection not found");
return;
}
context.images = new im.ImageCollection(
data.nameTag,
data.messages,
data.tags,
data.semanticRefs,
);
await context.images.deserialize(data);
context.conversation = context.podcast;
context.conversation = context.images;
context.printer.conversation = context.conversation;
context.printer.writeImageCollectionInfo(context.images);
}
Expand Down
73 changes: 66 additions & 7 deletions ts/packages/memory/image/src/importImages.ts
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,9 @@ import {
buildSecondaryIndexes,
ConversationSecondaryIndexes,
IndexingEventHandlers,
IPersistedConversationData,
writeConversationToFile,
readConversationFromFile,
} from "knowpro";
import { conversation as kpLib, image } from "knowledge-processor";
import fs from "node:fs";
Expand Down Expand Up @@ -356,8 +359,8 @@ export class ImageCollection implements IConversation<ImageMeta> {
public semanticRefIndex: ConversationIndex;
public secondaryIndexes: ConversationSecondaryIndexes;
constructor(
public nameTag: string,
public messages: Image[],
public nameTag: string = "",
public messages: Image[] = [],
public tags: string[] = [],
public semanticRefs: SemanticRef[] = [],
) {
Expand Down Expand Up @@ -397,16 +400,18 @@ export class ImageCollection implements IConversation<ImageMeta> {
}

this.addMetadataToIndex();
await buildSecondaryIndexes(this, true);
await buildSecondaryIndexes(this, true, eventHandler);

let indexingResult: IndexingResults = {
chunksIndexedUpto: { messageIndex: this.messages.length - 1 },
};
return indexingResult;
}

public async serialize(): Promise<ImageCollectionData> {
return {
public async serialize(): Promise<
IPersistedConversationData<ImageCollectionData>
> {
const conversationData: ImageCollectionData = {
nameTag: this.nameTag,
messages: this.messages,
tags: this.tags,
Expand All @@ -415,9 +420,33 @@ export class ImageCollection implements IConversation<ImageMeta> {
relatedTermsIndexData:
this.secondaryIndexes.termToRelatedTermsIndex.serialize(),
};
let persistentData: IPersistedConversationData<ImageCollectionData> = {
conversationData,
};
const embeddingData =
conversationData.relatedTermsIndexData?.textEmbeddingData;
if (embeddingData) {
persistentData.embeddings = embeddingData.embeddings;
embeddingData.embeddings = [];
}
return persistentData;
}

public async deserialize(data: ImageCollectionData): Promise<void> {
public async deserialize(
persistentData: IPersistedConversationData<ImageCollectionData>,
): Promise<void> {
const data = persistentData.conversationData;
const embeddingData =
persistentData.conversationData.relatedTermsIndexData
?.textEmbeddingData;
if (persistentData.embeddings && embeddingData) {
embeddingData.embeddings = persistentData.embeddings;
}

this.nameTag = data.nameTag;
this.messages = data.messages;
this.semanticRefs = data.semanticRefs;
this.tags = data.tags;
if (data.semanticIndexData) {
this.semanticRefIndex = new ConversationIndex(
data.semanticIndexData,
Expand All @@ -430,6 +459,36 @@ export class ImageCollection implements IConversation<ImageMeta> {
}
await buildSecondaryIndexes(this, false);
}

public async writeToFile(
dirPath: string,
baseFileName: string,
): Promise<void> {
await writeConversationToFile(
this,
dirPath,
baseFileName,
async (conversation) => this.serialize(),
);
}

public static async readFromFile(
dirPath: string,
baseFileName: string,
): Promise<ImageCollection | undefined> {
const imageCollection = new ImageCollection();
await readConversationFromFile<ImageCollectionData>(
dirPath,
baseFileName,
imageCollection.settings.relatedTermIndexSettings
.embeddingIndexSettings?.embeddingSize,
async (persistentData) => {
await imageCollection.deserialize(persistentData);
return imageCollection;
},
);
return imageCollection;
}
}

/**
Expand Down Expand Up @@ -540,7 +599,7 @@ async function indexImage(
console.log(`Could not find part of the file path '${fileName}'`);
return;
} else if (!isImageFileType(path.extname(fileName))) {
console.log(`Skipping '${fileName}', not a known image file.`);
//console.log(`Skipping '${fileName}', not a known image file.`);
return;
}

Expand Down

0 comments on commit 487d935

Please sign in to comment.