Skip to content

Commit

Permalink
knowpro: Simplify load/save of conversations (#780)
Browse files Browse the repository at this point in the history
Simplified interfaces
  • Loading branch information
umeshma authored Mar 4, 2025
1 parent 487d935 commit 9ccba3b
Show file tree
Hide file tree
Showing 4 changed files with 78 additions and 113 deletions.
8 changes: 8 additions & 0 deletions ts/packages/knowPro/src/secondaryIndexes.ts
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT License.

import { IConversationThreadData } from "./conversationThread.js";
import {
IConversation,
IConversationData,
IConversationSecondaryIndexes,
IndexingEventHandlers,
Term,
Expand Down Expand Up @@ -63,3 +65,9 @@ export interface ITextEmbeddingIndexData {
textItems: string[];
embeddings: Float32Array[];
}

export interface IConversationDataWithIndexes<TMessage = any>
extends IConversationData<TMessage> {
relatedTermsIndexData?: ITermsToRelatedTermsIndexData | undefined;
threadData?: IConversationThreadData;
}
61 changes: 41 additions & 20 deletions ts/packages/knowPro/src/serialization.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,23 +3,15 @@

import { readFile, readJsonFile, writeFile, writeJsonFile } from "typeagent";
import { deserializeEmbeddings, serializeEmbeddings } from "./fuzzyIndex.js";
import { IConversation, IConversationData } from "./interfaces.js";
import path from "path";
import { IConversationDataWithIndexes } from "./secondaryIndexes.js";

export interface IPersistedConversationData<T extends IConversationData> {
conversationData: T;
embeddings?: Float32Array[] | undefined;
}

export async function writeConversationToFile<T extends IConversationData>(
conversation: IConversation,
export async function writeConversationDataToFile(
conversationData: IConversationDataWithIndexes,
dirPath: string,
baseFileName: string,
serializer: (
conversation: IConversation,
) => Promise<IPersistedConversationData<T>>,
): Promise<void> {
const serializationData = await serializer(conversation);
const serializationData = conversationDataToPersistent(conversationData);
if (serializationData.embeddings) {
const embeddingsBuffer = serializeEmbeddings(
serializationData.embeddings,
Expand All @@ -35,15 +27,12 @@ export async function writeConversationToFile<T extends IConversationData>(
);
}

export async function readConversationFromFile<T extends IConversationData>(
export async function readConversationDataFromFile(
dirPath: string,
baseFileName: string,
embeddingSize: number | undefined,
deserializer: (
data: IPersistedConversationData<T>,
) => Promise<IConversation>,
): Promise<IConversation | undefined> {
const conversationData = await readJsonFile<T>(
): Promise<IConversationDataWithIndexes | undefined> {
const conversationData = await readJsonFile<IConversationDataWithIndexes>(
path.join(dirPath, baseFileName + DataFileSuffix),
);
if (!conversationData) {
Expand All @@ -58,12 +47,44 @@ export async function readConversationFromFile<T extends IConversationData>(
embeddings = deserializeEmbeddings(embeddingsBuffer, embeddingSize);
}
}
let serializationData: IPersistedConversationData<T> = {
let serializationData: IPersistedConversationData = {
conversationData,
embeddings,
};
return deserializer(serializationData);
return persistentToConversationData(serializationData);
}

const DataFileSuffix = "_data.json";
const EmbeddingFileSuffix = "_embeddings.bin";

interface IPersistedConversationData {
conversationData: IConversationDataWithIndexes;
embeddings?: Float32Array[] | undefined;
}

function conversationDataToPersistent(
conversationData: IConversationDataWithIndexes,
): IPersistedConversationData {
let persistentData: IPersistedConversationData = {
conversationData,
};
const embeddingData =
conversationData.relatedTermsIndexData?.textEmbeddingData;
if (embeddingData) {
persistentData.embeddings = embeddingData.embeddings;
embeddingData.embeddings = [];
}
return persistentData;
}

function persistentToConversationData(
persistentData: IPersistedConversationData,
): IConversationDataWithIndexes {
const embeddingData =
persistentData.conversationData.relatedTermsIndexData
?.textEmbeddingData;
if (persistentData.embeddings && embeddingData) {
embeddingData.embeddings = persistentData.embeddings;
}
return persistentData.conversationData;
}
59 changes: 15 additions & 44 deletions ts/packages/memory/conversation/src/importPodcast.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,9 @@ import {
IConversation,
IMessage,
SemanticRef,
IConversationData,
Term,
ConversationIndex,
IndexingResults,
ITermsToRelatedTermsIndexData,
IConversationThreadData,
ConversationSettings,
createConversationSettings,
addMetadataToIndex,
Expand All @@ -20,9 +17,9 @@ import {
ConversationThreads,
IndexingEventHandlers,
buildConversationIndex,
writeConversationToFile,
IPersistedConversationData,
readConversationFromFile,
IConversationDataWithIndexes,
writeConversationDataToFile,
readConversationDataFromFile,
} from "knowpro";
import { conversation as kpLib, split } from "knowledge-processor";
import { collections, dateTime, getFileName, readAllText } from "typeagent";
Expand Down Expand Up @@ -152,8 +149,8 @@ export class Podcast implements IConversation<PodcastMessageMeta> {
return result;
}

public async serialize(): Promise<IPersistedConversationData<PodcastData>> {
const conversationData: PodcastData = {
public async serialize(): Promise<PodcastData> {
const data: PodcastData = {
nameTag: this.nameTag,
messages: this.messages,
tags: this.tags,
Expand All @@ -163,29 +160,10 @@ export class Podcast implements IConversation<PodcastMessageMeta> {
this.secondaryIndexes.termToRelatedTermsIndex.serialize(),
threadData: this.secondaryIndexes.threads.serialize(),
};
let persistentData: IPersistedConversationData<PodcastData> = {
conversationData,
};
const embeddingData =
conversationData.relatedTermsIndexData?.textEmbeddingData;
if (embeddingData) {
persistentData.embeddings = embeddingData.embeddings;
embeddingData.embeddings = [];
}
return persistentData;
return data;
}

public async deserialize(
persistentData: IPersistedConversationData<PodcastData>,
): Promise<void> {
const podcastData = persistentData.conversationData;
const embeddingData =
persistentData.conversationData.relatedTermsIndexData
?.textEmbeddingData;
if (persistentData.embeddings && embeddingData) {
embeddingData.embeddings = persistentData.embeddings;
}

public async deserialize(podcastData: PodcastData): Promise<void> {
this.nameTag = podcastData.nameTag;
this.messages = podcastData.messages;
this.semanticRefs = podcastData.semanticRefs;
Expand Down Expand Up @@ -213,29 +191,24 @@ export class Podcast implements IConversation<PodcastMessageMeta> {
dirPath: string,
baseFileName: string,
): Promise<void> {
await writeConversationToFile(
this,
dirPath,
baseFileName,
async (conversation) => this.serialize(),
);
const data = await this.serialize();
await writeConversationDataToFile(data, dirPath, baseFileName);
}

public static async readFromFile(
dirPath: string,
baseFileName: string,
): Promise<Podcast | undefined> {
const podcast = new Podcast();
await readConversationFromFile<PodcastData>(
const data = await readConversationDataFromFile(
dirPath,
baseFileName,
podcast.settings.relatedTermIndexSettings.embeddingIndexSettings
?.embeddingSize,
async (persistentData) => {
await podcast.deserialize(persistentData);
return podcast;
},
);
if (data) {
podcast.deserialize(data);
}
return podcast;
}

Expand Down Expand Up @@ -298,10 +271,8 @@ export class PodcastSecondaryIndexes extends ConversationSecondaryIndexes {
//const DataFileSuffix = "_data.json";
//const EmbeddingFileSuffix = "_embeddings.bin";

export interface PodcastData extends IConversationData<PodcastMessage> {
relatedTermsIndexData?: ITermsToRelatedTermsIndexData | undefined;
threadData?: IConversationThreadData;
}
export interface PodcastData
extends IConversationDataWithIndexes<PodcastMessage> {}

export async function importPodcast(
transcriptFilePath: string,
Expand Down
63 changes: 14 additions & 49 deletions ts/packages/memory/image/src/importImages.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,24 +3,21 @@

import {
IConversation,
IConversationData,
IKnowledgeSource,
IMessage,
SemanticRef,
ConversationIndex,
IndexingResults,
createKnowledgeModel,
ITermsToRelatedTermsIndexData,
IConversationThreadData,
ConversationSettings,
createConversationSettings,
addMetadataToIndex,
buildSecondaryIndexes,
ConversationSecondaryIndexes,
IndexingEventHandlers,
IPersistedConversationData,
writeConversationToFile,
readConversationFromFile,
IConversationDataWithIndexes,
writeConversationDataToFile,
readConversationDataFromFile,
} from "knowpro";
import { conversation as kpLib, image } from "knowledge-processor";
import fs from "node:fs";
Expand All @@ -30,14 +27,8 @@ import { ChatModel } from "aiclient";
import { AddressOutput } from "@azure-rest/maps-search";
import { isDirectoryPath } from "typeagent";

export interface ImageCollectionData extends IConversationData<Image> {
relatedTermsIndexData?: ITermsToRelatedTermsIndexData | undefined;
threadData?: IConversationThreadData;
}

export interface ImageCollectionData extends IConversationData<Image> {
relatedTermsIndexData?: ITermsToRelatedTermsIndexData | undefined;
}
export interface ImageCollectionData
extends IConversationDataWithIndexes<Image> {}

export class Image implements IMessage<ImageMeta> {
public timestamp: string | undefined;
Expand Down Expand Up @@ -408,9 +399,7 @@ export class ImageCollection implements IConversation<ImageMeta> {
return indexingResult;
}

public async serialize(): Promise<
IPersistedConversationData<ImageCollectionData>
> {
public async serialize(): Promise<ImageCollectionData> {
const conversationData: ImageCollectionData = {
nameTag: this.nameTag,
messages: this.messages,
Expand All @@ -420,29 +409,10 @@ export class ImageCollection implements IConversation<ImageMeta> {
relatedTermsIndexData:
this.secondaryIndexes.termToRelatedTermsIndex.serialize(),
};
let persistentData: IPersistedConversationData<ImageCollectionData> = {
conversationData,
};
const embeddingData =
conversationData.relatedTermsIndexData?.textEmbeddingData;
if (embeddingData) {
persistentData.embeddings = embeddingData.embeddings;
embeddingData.embeddings = [];
}
return persistentData;
return conversationData;
}

public async deserialize(
persistentData: IPersistedConversationData<ImageCollectionData>,
): Promise<void> {
const data = persistentData.conversationData;
const embeddingData =
persistentData.conversationData.relatedTermsIndexData
?.textEmbeddingData;
if (persistentData.embeddings && embeddingData) {
embeddingData.embeddings = persistentData.embeddings;
}

public async deserialize(data: ImageCollectionData): Promise<void> {
this.nameTag = data.nameTag;
this.messages = data.messages;
this.semanticRefs = data.semanticRefs;
Expand All @@ -464,29 +434,24 @@ export class ImageCollection implements IConversation<ImageMeta> {
dirPath: string,
baseFileName: string,
): Promise<void> {
await writeConversationToFile(
this,
dirPath,
baseFileName,
async (conversation) => this.serialize(),
);
const data = await this.serialize();
await writeConversationDataToFile(data, dirPath, baseFileName);
}

public static async readFromFile(
dirPath: string,
baseFileName: string,
): Promise<ImageCollection | undefined> {
const imageCollection = new ImageCollection();
await readConversationFromFile<ImageCollectionData>(
const data = await readConversationDataFromFile(
dirPath,
baseFileName,
imageCollection.settings.relatedTermIndexSettings
.embeddingIndexSettings?.embeddingSize,
async (persistentData) => {
await imageCollection.deserialize(persistentData);
return imageCollection;
},
);
if (data) {
imageCollection.deserialize(data);
}
return imageCollection;
}
}
Expand Down

0 comments on commit 9ccba3b

Please sign in to comment.