Skip to content

Commit

Permalink
knowpro improvements (#739)
Browse files Browse the repository at this point in the history
Exploring:
* Conversation Threads
* Large indexes
* Bugs
  • Loading branch information
umeshma authored Feb 20, 2025
1 parent 359bc5e commit d5b4ab3
Show file tree
Hide file tree
Showing 12 changed files with 233 additions and 35 deletions.
65 changes: 51 additions & 14 deletions ts/examples/chat/src/memory/podcastMemory.ts
Original file line number Diff line number Diff line change
Expand Up @@ -152,32 +152,67 @@ export function createPodcastCommands(
args: {
filePath: arg("Output filePath"),
},
options: {
threads: argBool("Export threads", true),
},
};
}
commands.podcastExport.metadata = podcastExportDef();
async function podcastExport(args: string[]) {
const namedArgs = parseNamedArguments(args, podcastExportDef());
const messageStore = context.podcastMemory.conversation.messages;
const threads =
await context.podcastMemory.conversation.getThreadIndex();
const knowledgeStore = context.podcastMemory.conversation.knowledge;
const kpMessages: kp.PodcastMessage[] = [];
const knowledgeResponses: conversation.KnowledgeResponse[] = [];
for await (const entry of messageStore.all()) {
const messageId = entry.name;
const message = entry.value;

const podcastMessage = podcastMessageFromEmailText(
message.value.value,
);
podcastMessage.addTimestamp(message.timestamp.toISOString());
kpMessages.push(podcastMessage);
knowledgeResponses.push(
extractedKnowledgeToResponse(
await knowledgeStore.get(messageId),
),
const podcastMessages: kp.PodcastMessage[] = [];
const podcastThreads: kp.Thread[] = [];
for await (const threadEntry of threads.entries()) {
const thread = threadEntry.value;
const range = conversation.toDateRange(thread.timeRange);
const messageIds = await messageStore.getIdsInRange(
range.startDate,
range.stopDate,
);
let threadRange: kp.TextRange = {
start: {
messageIndex: podcastMessages.length,
},
};
const messages = await messageStore.getMultiple(messageIds);
for (let i = 0; i < messageIds.length; ++i) {
const messageId = messageIds[i];
const message = messages[i]!;
const podcastMessage = podcastMessageFromEmailText(
message.value.value,
);
podcastMessage.addTimestamp(message.timestamp.toISOString());
threadRange.end = {
messageIndex: podcastMessages.length,
};
podcastMessages.push(podcastMessage);
knowledgeResponses.push(
extractedKnowledgeToResponse(
await knowledgeStore.get(messageId),
),
);
}
podcastThreads.push({
description: thread.description,
ranges: [threadRange],
});
}

const kpPodcast = new kp.Podcast("AllEpisodes", []);
kp.addToConversationIndex(kpPodcast, kpMessages, knowledgeResponses);
kp.addToConversationIndex(
kpPodcast,
podcastMessages,
knowledgeResponses,
);
kpPodcast.threads.threads.push(...podcastThreads);
await kpPodcast.threads.buildIndex();

const podcastData = kpPodcast.serialize();
await ensureDir(path.dirname(namedArgs.filePath));
await writeJsonFile(namedArgs.filePath, podcastData);
Expand Down Expand Up @@ -844,6 +879,8 @@ function podcastMessageFromEmailText(text: string) {
for (let line of lines) {
if (line.startsWith("From: ")) {
speaker = line.replace("From: ", "");
} else if (line.startsWith(`"From: `)) {
speaker = line.replace(`"From: `, "");
} else if (!line.startsWith("To: ")) {
messageText += line;
messageText += "\n";
Expand Down
6 changes: 4 additions & 2 deletions ts/packages/knowPro/src/collections.ts
Original file line number Diff line number Diff line change
Expand Up @@ -355,9 +355,11 @@ export class MessageAccumulator extends MatchAccumulator<IMessage> {}

export class TextRangeCollection {
// Maintains ranges sorted by message index
private ranges: TextRange[] = [];
private ranges: TextRange[];

constructor() {}
constructor(ranges?: TextRange[] | undefined) {
this.ranges = ranges ?? [];
}

public get size() {
return this.ranges.length;
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT License.

import { NormalizedEmbedding } from "typeagent";
import { TextRange } from "./dataFormat.js";
import {
TextEmbeddingIndex,
Expand All @@ -22,8 +23,25 @@ export type ScoredThreadIndex = {
score: number;
};

export interface IThreadDescriptionIndexFuzzy {
addThread(
export interface IConversationThreads {
threads: Thread[];
threadDescriptionIndex: IThreadDescriptionIndex;

serialize(): IConversationThreadData;
deserialize(data: IConversationThreadData): void;
}

export interface IConversationThreadData {
threads?: IThreadDataItem[] | undefined;
}

export interface IThreadDataItem {
thread: Thread;
embedding: number[];
}

export interface IThreadDescriptionIndex {
addDescription(
description: string,
threadIndex: ThreadIndex | ScoredThreadIndex,
): Promise<void>;
Expand All @@ -34,18 +52,16 @@ export interface IThreadDescriptionIndexFuzzy {
): Promise<ScoredThreadIndex[] | undefined>;
}

export class ThreadDescriptionEmbeddingIndex
implements IThreadDescriptionIndexFuzzy
{
private threads: ScoredThreadIndex[];
private embeddingIndex: TextEmbeddingIndex;
export class ThreadDescriptionIndex implements IThreadDescriptionIndex {
public threads: ScoredThreadIndex[];
public embeddingIndex: TextEmbeddingIndex;

constructor(public settings: TextEmbeddingIndexSettings) {
this.threads = [];
this.embeddingIndex = new TextEmbeddingIndex(settings);
}

public async addThread(
public async addDescription(
description: string,
threadIndex: ThreadIndex | ScoredThreadIndex,
): Promise<void> {
Expand All @@ -59,6 +75,14 @@ export class ThreadDescriptionEmbeddingIndex
this.threads.push(threadIndex);
}

public add(embedding: NormalizedEmbedding, threadIndex: ThreadIndex): void {
this.embeddingIndex.add(embedding);
this.threads.push({
threadIndex: threadIndex,
score: 1,
});
}

public async lookupThread(
text: string,
maxMatches?: number,
Expand All @@ -83,4 +107,9 @@ export class ThreadDescriptionEmbeddingIndex
this.embeddingIndex.removeAt(indexOf);
}
}

public clear(): void {
this.threads = [];
this.embeddingIndex.clear();
}
}
2 changes: 1 addition & 1 deletion ts/packages/knowPro/src/dataFormat.ts
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ export interface TextLocation {
export interface TextRange {
// the start of the range
start: TextLocation;
// the end of the range (exclusive)
// the end of the range (exclusive)
end?: TextLocation | undefined;
}

Expand Down
4 changes: 4 additions & 0 deletions ts/packages/knowPro/src/fuzzyIndex.ts
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,10 @@ export class TextEmbeddingIndex {
this.embeddings.splice(pos, 1);
}

public clear(): void {
this.embeddings = [];
}

private indexesOfNearestText(
textEmbedding: NormalizedEmbedding,
maxMatches?: number,
Expand Down
78 changes: 76 additions & 2 deletions ts/packages/knowPro/src/import.ts
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,12 @@ import {
TermToRelatedTermsIndex,
TermsToRelatedTermIndexSettings,
} from "./relatedTermsIndex.js";
import { createTextEmbeddingIndexSettings } from "./fuzzyIndex.js";
import {
createTextEmbeddingIndexSettings,
deserializeEmbedding,
serializeEmbedding,
TextEmbeddingIndexSettings,
} from "./fuzzyIndex.js";
import { TimestampToTextRangeIndex } from "./timestampIndex.js";
import {
ITermsToRelatedTermsIndexData,
Expand All @@ -33,6 +38,13 @@ import {
import { addPropertiesToIndex, PropertyIndex } from "./propertyIndex.js";
import { IPropertyToSemanticRefIndex } from "./secondaryIndexes.js";
import { IConversationSecondaryIndexes } from "./secondaryIndexes.js";
import {
IConversationThreadData,
IConversationThreads,
IThreadDataItem,
Thread,
ThreadDescriptionIndex,
} from "./conversationThread.js";

// metadata for podcast messages
export class PodcastMessageMeta implements IKnowledgeSource {
Expand Down Expand Up @@ -114,20 +126,25 @@ export class PodcastMessage implements IMessage<PodcastMessageMeta> {

export type PodcastSettings = {
relatedTermIndexSettings: TermsToRelatedTermIndexSettings;
threadSettings: TextEmbeddingIndexSettings;
};

export function createPodcastSettings(): PodcastSettings {
const embeddingIndexSettings = createTextEmbeddingIndexSettings();
return {
relatedTermIndexSettings: {
embeddingIndexSettings: createTextEmbeddingIndexSettings(),
embeddingIndexSettings,
},
threadSettings: embeddingIndexSettings,
};
}

export class Podcast
implements IConversation<PodcastMessageMeta>, IConversationSecondaryIndexes
{
public settings: PodcastSettings;
public threads: PodcastThreads;

constructor(
public nameTag: string,
public messages: PodcastMessage[],
Expand All @@ -145,6 +162,7 @@ export class Podcast
| undefined = undefined,
) {
this.settings = createPodcastSettings();
this.threads = new PodcastThreads(this.settings.threadSettings);
}

public addMetadataToIndex() {
Expand Down Expand Up @@ -199,6 +217,7 @@ export class Podcast
const result = await buildConversationIndex(this, progressCallback);
this.addMetadataToIndex();
this.buildSecondaryIndexes();
await this.threads.buildIndex();
return result;
}

Expand Down Expand Up @@ -227,6 +246,7 @@ export class Podcast
semanticRefs: this.semanticRefs,
semanticIndexData: this.semanticRefIndex?.serialize(),
relatedTermsIndexData: this.termToRelatedTermsIndex?.serialize(),
threadData: this.threads.serialize(),
};
}

Expand All @@ -244,6 +264,10 @@ export class Podcast
data.relatedTermsIndexData,
);
}
if (data.threadData) {
this.threads = new PodcastThreads(this.settings.threadSettings);
this.threads.deserialize(data.threadData);
}
this.buildSecondaryIndexes();
}

Expand Down Expand Up @@ -313,6 +337,7 @@ export class Podcast

export interface PodcastData extends IConversationData<PodcastMessage> {
relatedTermsIndexData?: ITermsToRelatedTermsIndexData | undefined;
threadData?: IConversationThreadData;
}

export async function importPodcast(
Expand Down Expand Up @@ -418,3 +443,52 @@ function randomDate(startHour = 14) {
date.setDate(Math.floor(Math.random() * 28));
return date;
}

class PodcastThreads implements IConversationThreads {
public threads: Thread[];
public threadDescriptionIndex: ThreadDescriptionIndex;

constructor(settings: TextEmbeddingIndexSettings) {
this.threads = [];
this.threadDescriptionIndex = new ThreadDescriptionIndex(settings);
}

public async buildIndex(): Promise<void> {
for (let i = 0; i < this.threads.length; ++i) {
const thread = this.threads[i];
await this.threadDescriptionIndex.addDescription(
thread.description,
i,
);
}
}

public serialize(): IConversationThreadData {
const threadData: IThreadDataItem[] = [];
const embeddingIndex = this.threadDescriptionIndex.embeddingIndex;
for (let i = 0; i < this.threads.length; ++i) {
const thread = this.threads[i];
threadData.push({
thread,
embedding: serializeEmbedding(embeddingIndex.get(i)),
});
}
return {
threads: threadData,
};
}

public deserialize(data: IConversationThreadData): void {
if (data.threads) {
this.threads = [];
this.threadDescriptionIndex.clear();
for (let i = 0; i < data.threads.length; ++i) {
this.threads.push(data.threads[i].thread);
const embedding = deserializeEmbedding(
data.threads[i].embedding,
);
this.threadDescriptionIndex.add(embedding, i);
}
}
}
}
1 change: 1 addition & 0 deletions ts/packages/knowPro/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,4 @@ export * from "./conversationIndex.js";
export * from "./secondaryIndexes.js";
export * from "./relatedTermsIndex.js";
export * from "./search.js";
export * from "./conversationThread.js";
10 changes: 10 additions & 0 deletions ts/packages/knowPro/src/propertyIndex.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import {
ScoredSemanticRef,
SemanticRef,
SemanticRefIndex,
Tag,
} from "./dataFormat.js";
import { conversation } from "knowledge-processor";
import { IPropertyToSemanticRefIndex } from "./secondaryIndexes.js";
Expand All @@ -19,6 +20,7 @@ export enum PropertyNames {
Subject = "subject",
Object = "object",
IndirectObject = "indirectObject",
Tag = "tag",
}

function addFacet(
Expand Down Expand Up @@ -124,6 +126,14 @@ export function addPropertiesToIndex(
semanticRefIndex,
);
break;
case "tag":
const tag = semanticRef.knowledge as Tag;
propertyIndex.addProperty(
PropertyNames.Tag,
tag.text,
semanticRefIndex,
);
break;
}
}
}
Expand Down
Loading

0 comments on commit d5b4ab3

Please sign in to comment.