Skip to content

Commit

Permalink
knowpro: Message Indexing (#796)
Browse files Browse the repository at this point in the history
* Message Indexing 
* Refactored and re-arranged fuzzy/embedding indexing for better
robustness.
  * Return partial results to allow restarts
  * Cleaner batch behavior
* Classic RAG command
* Setting up for persisting message index serialization.
  • Loading branch information
umeshma authored Mar 7, 2025
1 parent e2470db commit 133861e
Show file tree
Hide file tree
Showing 12 changed files with 736 additions and 235 deletions.
199 changes: 120 additions & 79 deletions ts/examples/chat/src/memory/knowproMemory.ts
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ export async function createKnowproCommands(
commands.kpSearchTerms = searchTerms;
commands.kpSearchV1 = searchV1;
commands.kpSearch = search;
commands.kpPodcastRag = podcastRag;
commands.kpEntities = entities;
commands.kpPodcastBuildIndex = podcastBuildIndex;
commands.kpPodcastBuildMessageIndex = podcastBuildMessageIndex;
Expand Down Expand Up @@ -454,7 +455,7 @@ export async function createKnowproCommands(
function searchDef(): CommandMetadata {
return {
description:
"Search using natural language and knowlege-processor search filters",
"Search using natural language and old knowlege-processor search filters",
args: {
query: arg("Search query"),
},
Expand Down Expand Up @@ -560,88 +561,37 @@ export async function createKnowproCommands(
}
}

function createSearchGroup(
termArgs: string[],
namedArgs: NamedArgs,
commandDef: CommandMetadata,
andTerms: boolean = false,
): kp.SearchTermGroup {
const searchTerms = parseQueryTerms(termArgs);
const propertyTerms = propertyTermsFromNamedArgs(namedArgs, commandDef);
function ragDef(): CommandMetadata {
return {
booleanOp: andTerms ? "and" : "or",
terms: [...searchTerms, ...propertyTerms],
description: "Classic rag",
args: {
query: arg("Search query"),
},
options: {
maxToDisplay: argNum("Maximum matches to display", 25),
},
};
}

function propertyTermsFromNamedArgs(
namedArgs: NamedArgs,
commandDef: CommandMetadata,
): kp.PropertySearchTerm[] {
return createPropertyTerms(namedArgs, commandDef);
}

function createPropertyTerms(
namedArgs: NamedArgs,
commandDef: CommandMetadata,
nameFilter?: (name: string) => boolean,
): kp.PropertySearchTerm[] {
const keyValues = keyValuesFromNamedArgs(namedArgs, commandDef);
const propertyNames = nameFilter
? Object.keys(keyValues).filter(nameFilter)
: Object.keys(keyValues);
const propertySearchTerms: kp.PropertySearchTerm[] = [];
for (const propertyName of propertyNames) {
const allValues = splitTermValues(keyValues[propertyName]);
for (const value of allValues) {
propertySearchTerms.push(
kp.createPropertySearchTerm(propertyName, value),
);
}
commands.kpPodcastRag.metadata = ragDef();
async function podcastRag(args: string[]): Promise<void> {
if (!ensureConversationLoaded()) {
return;
}
return propertySearchTerms;
}

function whenFilterFromNamedArgs(
namedArgs: NamedArgs,
commandDef: CommandMetadata,
): kp.WhenFilter {
let filter: kp.WhenFilter = {
knowledgeType: namedArgs.ktype,
};
const conv: kp.IConversation | undefined =
context.podcast ?? context.images;
const dateRange = kp.getTimeRangeForConversation(conv!);
if (dateRange) {
let startDate: Date | undefined;
let endDate: Date | undefined;
// Did they provide an explicit date range?
if (namedArgs.startDate || namedArgs.endDate) {
startDate = argToDate(namedArgs.startDate) ?? dateRange.start;
endDate = argToDate(namedArgs.endDate) ?? dateRange.end;
} else {
// They may have provided a relative date range
if (namedArgs.startMinute >= 0) {
startDate = dateTime.addMinutesToDate(
dateRange.start,
namedArgs.startMinute,
);
}
if (namedArgs.endMinute > 0) {
endDate = dateTime.addMinutesToDate(
dateRange.start,
namedArgs.endMinute,
);
}
}
if (startDate) {
filter.dateRange = {
start: startDate,
end: endDate,
};
}
const messageIndex =
context.conversation?.secondaryIndexes?.messageIndex;
if (!messageIndex) {
context.printer.writeError(
"No message text index. Run kpPodcastBuildMessageIndex",
);
return;
}
return filter;
const namedArgs = parseNamedArguments(args, ragDef());
const matches = await messageIndex.lookupMessages(namedArgs.query);
context.printer.writeScoredMessages(
matches,
context.conversation?.messages!,
namedArgs.maxToDisplay,
);
}

function entitiesDef(): CommandMetadata {
Expand Down Expand Up @@ -728,6 +678,9 @@ export async function createKnowproCommands(
commands.kpPodcastBuildMessageIndex.metadata =
podcastBuildMessageIndexDef();
async function podcastBuildMessageIndex(args: string[]): Promise<void> {
if (!ensureConversationLoaded()) {
return;
}
const namedArgs = parseNamedArguments(
args,
podcastBuildMessageIndexDef(),
Expand All @@ -736,7 +689,7 @@ export async function createKnowproCommands(
`Indexing ${context.conversation?.messages.length} messages`,
);
let progress = new ProgressBar(context.printer, namedArgs.maxMessages);
await context.podcast?.buildMessageIndex(
const result = await context.podcast!.buildMessageIndex(
createIndexingEventHandler(
context,
progress,
Expand All @@ -745,8 +698,12 @@ export async function createKnowproCommands(
namedArgs.batchSize,
);
progress.complete();
context.printer.writeListIndexingResult(result);
}

//-------------------------
// Index Image Building
//--------------------------
function imageCollectionBuildIndexDef(): CommandMetadata {
return {
description: "Build image collection index",
Expand Down Expand Up @@ -789,6 +746,90 @@ export async function createKnowproCommands(
End COMMANDS
------------*/

function createSearchGroup(
termArgs: string[],
namedArgs: NamedArgs,
commandDef: CommandMetadata,
andTerms: boolean = false,
): kp.SearchTermGroup {
const searchTerms = parseQueryTerms(termArgs);
const propertyTerms = propertyTermsFromNamedArgs(namedArgs, commandDef);
return {
booleanOp: andTerms ? "and" : "or",
terms: [...searchTerms, ...propertyTerms],
};
}

function propertyTermsFromNamedArgs(
namedArgs: NamedArgs,
commandDef: CommandMetadata,
): kp.PropertySearchTerm[] {
return createPropertyTerms(namedArgs, commandDef);
}

function createPropertyTerms(
namedArgs: NamedArgs,
commandDef: CommandMetadata,
nameFilter?: (name: string) => boolean,
): kp.PropertySearchTerm[] {
const keyValues = keyValuesFromNamedArgs(namedArgs, commandDef);
const propertyNames = nameFilter
? Object.keys(keyValues).filter(nameFilter)
: Object.keys(keyValues);
const propertySearchTerms: kp.PropertySearchTerm[] = [];
for (const propertyName of propertyNames) {
const allValues = splitTermValues(keyValues[propertyName]);
for (const value of allValues) {
propertySearchTerms.push(
kp.createPropertySearchTerm(propertyName, value),
);
}
}
return propertySearchTerms;
}

function whenFilterFromNamedArgs(
namedArgs: NamedArgs,
commandDef: CommandMetadata,
): kp.WhenFilter {
let filter: kp.WhenFilter = {
knowledgeType: namedArgs.ktype,
};
const conv: kp.IConversation | undefined =
context.podcast ?? context.images;
const dateRange = kp.getTimeRangeForConversation(conv!);
if (dateRange) {
let startDate: Date | undefined;
let endDate: Date | undefined;
// Did they provide an explicit date range?
if (namedArgs.startDate || namedArgs.endDate) {
startDate = argToDate(namedArgs.startDate) ?? dateRange.start;
endDate = argToDate(namedArgs.endDate) ?? dateRange.end;
} else {
// They may have provided a relative date range
if (namedArgs.startMinute >= 0) {
startDate = dateTime.addMinutesToDate(
dateRange.start,
namedArgs.startMinute,
);
}
if (namedArgs.endMinute > 0) {
endDate = dateTime.addMinutesToDate(
dateRange.start,
namedArgs.endMinute,
);
}
}
if (startDate) {
filter.dateRange = {
start: startDate,
end: endDate,
};
}
}
return filter;
}

function ensureConversationLoaded(): kp.IConversation | undefined {
if (context.conversation) {
return context.conversation;
Expand Down
7 changes: 7 additions & 0 deletions ts/examples/chat/src/memory/knowproPrinter.ts
Original file line number Diff line number Diff line change
Expand Up @@ -382,6 +382,13 @@ export class KnowProPrinter extends ChatPrinter {
return this;
}

public writeListIndexingResult(result: kp.ListIndexingResult) {
this.writeLine(`Indexed ${result.numberCompleted} items`);
if (result.error) {
this.writeError(result.error);
}
}

public writeSearchFilter(
action: knowLib.conversation.GetAnswerWithTermsActionV2,
) {
Expand Down
5 changes: 4 additions & 1 deletion ts/packages/knowPro/src/conversationThread.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ import {
TextEmbeddingIndex,
TextEmbeddingIndexSettings,
} from "./fuzzyIndex.js";
import { NormalizedEmbedding } from "typeagent";

export interface IConversationThreadData {
threads?: IThreadDataItem[] | undefined;
Expand Down Expand Up @@ -91,13 +92,15 @@ export class ConversationThreads implements IConversationThreads {
if (data.threads) {
this.threads = [];
this.embeddingIndex.clear();
const embeddings: NormalizedEmbedding[] = [];
for (let i = 0; i < data.threads.length; ++i) {
this.threads.push(data.threads[i].thread);
const embedding = deserializeEmbedding(
data.threads[i].embedding,
);
this.embeddingIndex.add(embedding);
embeddings.push(embedding);
}
this.embeddingIndex.deserialize(embeddings);
}
}
}
Loading

0 comments on commit 133861e

Please sign in to comment.