Skip to content

Commit

Permalink
knowpro: entity merging, refactoring (#760)
Browse files Browse the repository at this point in the history
* Ongoing refactor
  * Removed currently unnecessary APIs
* Entity and topic merging:
  * getDistinct* return ConcreteEntity[] and Topic[]
* Reworked merging code; removed dependency on V1 code; temporary
"compositeEntity" type
  * Future: query operator
  • Loading branch information
umeshma authored Feb 26, 2025
1 parent bab1935 commit 04ad5f9
Show file tree
Hide file tree
Showing 10 changed files with 225 additions and 136 deletions.
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT License.

import { DateRange, IConversation } from "./dataFormat.js";
import * as kp from "knowpro";

export function getTimeRangeForConversation(
conversation: IConversation,
): DateRange | undefined {
conversation: kp.IConversation,
): kp.DateRange | undefined {
const messages = conversation.messages;
const start = messages[0].timestamp;
const end = messages[messages.length - 1].timestamp;
Expand Down
3 changes: 2 additions & 1 deletion ts/examples/chat/src/memory/knowproMemory.ts
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ import {
import path from "path";
import chalk from "chalk";
import { KnowProPrinter } from "./knowproPrinter.js";
import { getTimeRangeForConversation } from "./knowproCommon.js";

type KnowProContext = {
knowledgeModel: ChatModel;
Expand Down Expand Up @@ -512,7 +513,7 @@ export async function createKnowproCommands(
};
const conv: kp.IConversation | undefined =
context.podcast ?? context.images;
const dateRange = kp.getTimeRangeForConversation(conv!);
const dateRange = getTimeRangeForConversation(conv!);
if (dateRange) {
let startDate: Date | undefined;
let endDate: Date | undefined;
Expand Down
5 changes: 3 additions & 2 deletions ts/examples/chat/src/memory/knowproPrinter.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import * as kp from "knowpro";
import * as knowLib from "knowledge-processor";
import { ChatPrinter } from "../chatPrinter.js";
import chalk from "chalk";
import { getTimeRangeForConversation } from "./knowproCommon.js";

export class KnowProPrinter extends ChatPrinter {
public sortAsc: boolean = true;
Expand Down Expand Up @@ -269,7 +270,7 @@ export class KnowProPrinter extends ChatPrinter {
chalk.green,
`#${pos + 1} / ${distinctEntities.length}: [${entity.score}]`,
);
this.writeCompositeEntity(entity.item);
this.writeEntity(entity.item);
this.writeLine();
}
}
Expand All @@ -291,7 +292,7 @@ export class KnowProPrinter extends ChatPrinter {

public writeConversationInfo(conversation: kp.IConversation) {
this.writeTitle(conversation.nameTag);
const timeRange = kp.getTimeRangeForConversation(conversation);
const timeRange = getTimeRangeForConversation(conversation);
if (timeRange) {
this.write("Time range: ");
this.writeDateRange(timeRange);
Expand Down
6 changes: 2 additions & 4 deletions ts/packages/knowPro/src/conversationIndex.ts
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ import { conversation as kpLib } from "knowledge-processor";
import { openai } from "aiclient";
import { Result } from "typechat";
import { async } from "typeagent";
import { facetValueToString } from "./knowledge.js";

export function createKnowledgeModel() {
const chatModelSettings = openai.apiSettingsFromEnv(
Expand Down Expand Up @@ -59,10 +60,7 @@ function addFacet(
if (facet !== undefined) {
semanticRefIndex.addTerm(facet.name, refIndex);
if (facet.value !== undefined) {
semanticRefIndex.addTerm(
kpLib.knowledgeValueToString(facet.value),
refIndex,
);
semanticRefIndex.addTerm(facetValueToString(facet), refIndex);
}
}
}
Expand Down
5 changes: 5 additions & 0 deletions ts/packages/knowPro/src/dataFormat.ts
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,11 @@ export type Term = {
weight?: number | undefined;
};

export interface Scored<T = any> {
item: T;
score: number;
}

// Also see:
// - secondaryIndex.ts for optional secondary interfaces
// - search.ts for search interfaces.
Expand Down
1 change: 0 additions & 1 deletion ts/packages/knowPro/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
export * from "./import.js";
export * from "./importImages.js";
export * from "./dataFormat.js";
export * from "./conversation.js";
export * from "./conversationIndex.js";
export * from "./secondaryIndexes.js";
export * from "./relatedTermsIndex.js";
Expand Down
200 changes: 200 additions & 0 deletions ts/packages/knowPro/src/knowledge.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,200 @@
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT License.

import { conversation as kpLib } from "knowledge-processor";
import { collections, getTopK } from "typeagent";
import { unionArrays } from "./collections.js";
import { Scored, ScoredSemanticRef, SemanticRef, Topic } from "./dataFormat.js";

export function facetValueToString(facet: kpLib.Facet): string {
const value = facet.value;
if (typeof value === "object") {
return `${value.amount} ${value.units}`;
}
return value.toString();
}

export function mergeTopics(
semanticRefs: SemanticRef[],
semanticRefMatches: ScoredSemanticRef[],
topK?: number,
): Scored<Topic>[] {
let mergedTopics = new Map<string, Scored<Topic>>();
for (let semanticRefMatch of semanticRefMatches) {
const semanticRef = semanticRefs[semanticRefMatch.semanticRefIndex];
if (semanticRef.knowledgeType !== "topic") {
continue;
}
const topic = semanticRef.knowledge as Topic;
const existing = mergedTopics.get(topic.text);
if (existing) {
if (existing.score < semanticRefMatch.score) {
existing.score = semanticRefMatch.score;
}
} else {
mergedTopics.set(topic.text, {
item: topic,
score: semanticRefMatch.score,
});
}
}
if (topK !== undefined && topK > 0) {
return getTopK(mergedTopics.values(), topK);
}
return [...mergedTopics.values()];
}

export function mergedEntities(
semanticRefs: SemanticRef[],
semanticRefMatches: ScoredSemanticRef[],
topK?: number,
): Scored<kpLib.ConcreteEntity>[] {
return mergeScoredEntities(
getScoredEntities(semanticRefs, semanticRefMatches),
topK,
);
}

type MergedEntity = {
name: string;
type: string[];
facets?: MergedFacets | undefined;
};

type MergedFacets = collections.MultiMap<string, string>;

function mergeScoredEntities(
scoredEntities: IterableIterator<Scored<kpLib.ConcreteEntity>>,
topK?: number,
): Scored<kpLib.ConcreteEntity>[] {
let mergedEntities = new Map<string, Scored<MergedEntity>>();
for (let scoredEntity of scoredEntities) {
const mergedEntity = concreteToMergedEntity(scoredEntity.item);
const existing = mergedEntities.get(mergedEntity.name);
if (existing) {
if (unionEntities(existing.item, mergedEntity)) {
if (existing.score < scoredEntity.score) {
existing.score = scoredEntity.score;
}
}
} else {
mergedEntities.set(mergedEntity.name, {
item: mergedEntity,
score: scoredEntity.score,
});
}
}
let topKEntities =
topK !== undefined && topK > 0
? getTopK(mergedEntities.values(), topK)
: mergedEntities.values();

const mergedConcrete: Scored<kpLib.ConcreteEntity>[] = [];
for (const scoredEntity of topKEntities) {
mergedConcrete.push({
item: mergedToConcreteEntity(scoredEntity.item),
score: scoredEntity.score,
});
}
return mergedConcrete;
}

/**
* In place union
*/
function unionEntities(to: MergedEntity, other: MergedEntity): boolean {
if (to.name !== other.name) {
return false;
}
to.type = unionArrays(to.type, other.type)!;
to.facets = unionFacets(to.facets, other.facets);
return true;
}

function concreteToMergedEntity(entity: kpLib.ConcreteEntity): MergedEntity {
return {
name: entity.name.toLowerCase(),
type: collections.lowerAndSort(entity.type)!,
facets: entity.facets ? facetsToMergedFacets(entity.facets) : undefined,
};
}

function mergedToConcreteEntity(
mergedEntity: MergedEntity,
): kpLib.ConcreteEntity {
const entity: kpLib.ConcreteEntity = {
name: mergedEntity.name,
type: mergedEntity.type,
};
if (mergedEntity.facets && mergedEntity.facets.size > 0) {
entity.facets = mergedFacetsToFacets(mergedEntity.facets);
}
return entity;
}

function facetsToMergedFacets(facets: kpLib.Facet[]): MergedFacets {
const mergedFacets: MergedFacets = new collections.MultiMap<
string,
string
>();
for (const facet of facets) {
const name = facet.name.toLowerCase();
const value = facetValueToString(facet).toLowerCase();
mergedFacets.addUnique(name, value);
}
return mergedFacets;
}

function mergedFacetsToFacets(mergedFacets: MergedFacets): kpLib.Facet[] {
const facets: kpLib.Facet[] = [];
for (const facetName of mergedFacets.keys()) {
const facetValues = mergedFacets.get(facetName);
if (facetValues && facetValues.length > 0) {
const facet: kpLib.Facet = {
name: facetName,
value: facetValues.join("; "),
};
facets.push(facet);
}
}
return facets;
}

/**
* In place union
*/
function unionFacets(
to: MergedFacets | undefined,
other: MergedFacets | undefined,
): MergedFacets | undefined {
if (to === undefined) {
return other;
}
if (other === undefined) {
return to;
}
for (const facetName of other.keys()) {
const facetValues = other.get(facetName);
if (facetValues) {
for (let i = 0; i < facetValues.length; ++i) {
to.addUnique(facetName, facetValues[i]);
}
}
}
return to;
}

function* getScoredEntities(
semanticRefs: SemanticRef[],
semanticRefMatches: ScoredSemanticRef[],
): IterableIterator<Scored<kpLib.ConcreteEntity>> {
for (let semanticRefMatch of semanticRefMatches) {
const semanticRef = semanticRefs[semanticRefMatch.semanticRefIndex];
if (semanticRef.knowledgeType === "entity") {
yield {
score: semanticRefMatch.score,
item: semanticRef.knowledge as kpLib.ConcreteEntity,
};
}
}
}
3 changes: 2 additions & 1 deletion ts/packages/knowPro/src/propertyIndex.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ import {
import { conversation as kpLib } from "knowledge-processor";
import { IPropertyToSemanticRefIndex } from "./secondaryIndexes.js";
import { TextRangesInScope } from "./collections.js";
import { facetValueToString } from "./knowledge.js";

export enum PropertyNames {
EntityName = "name",
Expand Down Expand Up @@ -37,7 +38,7 @@ function addFacet(
if (facet.value !== undefined) {
propertyIndex.addProperty(
PropertyNames.FacetValue,
kpLib.knowledgeValueToString(facet.value),
facetValueToString(facet),
semanticRefIndex,
);
}
Expand Down
Loading

0 comments on commit 04ad5f9

Please sign in to comment.