-
Notifications
You must be signed in to change notification settings - Fork 29
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[python/nowpro] Python translation of packages/knowPro/src/interfaces…
….ts (#786) Here I am trying to provide a faithful translation of knowPro's interfaces.ts to knowpro/interfaces.py under TypeAgent/python (!). Translation details: - I also translated interfaces.ts's dependency knowledgeProcessor/src/conversation/knowledgeSchema.ts to knowpro/kplib.py. - For field and method names, camelCase is translated into snake_case. (Class names remain PascalCase.) - Interfaces are turned in to `@runtime_checkable` classes deriving from `Protocol`. - Union types are translated using the `type` keyword. - Unions of several strings are translated to Literal[s1, s2, ...]. - Structural types are treated as interfaces. - Optional fields and arguments (e.g. `foo?: number` are translated using a union with None and an initialization to None. - Readonly fields are turned into properties with a getter but no setter. (Is this right?) - I translated T[] into Sequence[T] as opposed to list[T]. This makes the sequence itself readonly. - I rearranged the order of some definitions to avoid def-before-use errors. Questions: - Should certain comments before interfaces be translated to docstrings? - Is it okay that I have rearranged the order of classes and types to avoid forwards references? (Except one.)
- Loading branch information
1 parent
50db864
commit 32c3b75
Showing
4 changed files
with
442 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
# know_pro | ||
|
||
**Experimental prototype**: Working toward a shared understanding of the MVP for structured RAG. | ||
|
||
**Sample code** | ||
|
||
This is an in-progress project aiming at a Pythonic translation of | ||
`TypeAgent/ts/packages/know_pro` to Python. (Pythonic because it | ||
uses Python conventions and types as appropriate.) | ||
|
||
- Python class names correspond 1:1 to TS interface or type names. | ||
- Field and method names are converted from camelCase to snake_case. | ||
- Types and interfaces become runtime-checkable Protocol classes, | ||
except union types which become type aliases. | ||
- Unions of string literals become Literal types. | ||
|
||
## Trademarks | ||
|
||
This project may contain trademarks or logos for projects, products, or services. | ||
Authorized use of Microsoft trademarks or logos is subject to and must follow | ||
[Microsoft's Trademark & Brand Guidelines](https://www.microsoft.com/en-us/legal/intellectualproperty/trademarks/usage/general). | ||
Use of Microsoft trademarks or logos in modified versions of this project | ||
must not cause confusion or imply Microsoft sponsorship. | ||
Any use of third-party trademarks or logos are subject to those third-party's policies. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
# Copyright (c) Microsoft Corporation. | ||
# Licensed under the MIT License. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,331 @@ | ||
# Copyright (c) Microsoft Corporation. | ||
# Licensed under the MIT License. | ||
|
||
# TODO: | ||
# - See TODOs in kplib.py. | ||
# | ||
# NOTE: | ||
# - I took some liberty with index types and made them int. | ||
# - I rearranged the order in some cases to ensure def-before-ref. | ||
# - I translated readonly to @property. | ||
|
||
from typing import Any, Callable, Literal, Protocol, runtime_checkable, Sequence | ||
from datetime import datetime as Date | ||
|
||
from . import kplib | ||
|
||
|
||
# An object that can provide a KnowledgeResponse structure. | ||
@runtime_checkable | ||
class IKnowledgeSource(Protocol): | ||
def get_knowledge(self) -> kplib.KnowledgeResponse: | ||
raise NotImplementedError | ||
|
||
|
||
@runtime_checkable | ||
class DeletionInfo(Protocol): | ||
timestamp: str | ||
reason: str | None | ||
|
||
|
||
@runtime_checkable | ||
class IMessage[TMeta: IKnowledgeSource = Any](Protocol): | ||
# The text of the message, split into chunks. | ||
text_chunks: Sequence[str] | ||
# For example, e-mail has subject, from and to fields; | ||
# a chat message has a sender and a recipient. | ||
metadata: TMeta | ||
timestamp: str | None = None | ||
tags: Sequence[str] | ||
deletion_info: DeletionInfo | None = None | ||
|
||
|
||
type SemanticRefIndex = int | ||
|
||
|
||
@runtime_checkable | ||
class ScoredSemanticRef(Protocol): | ||
semantic_ref_index: SemanticRefIndex | ||
score: float | ||
|
||
|
||
@runtime_checkable | ||
class ITermToSemanticRefIndex(Protocol): | ||
def getTerms(self) -> Sequence[str]: | ||
raise NotImplementedError | ||
|
||
def addTerm( | ||
self, | ||
term: str, | ||
semantic_ref_index: SemanticRefIndex | ScoredSemanticRef, | ||
) -> None: | ||
raise NotImplementedError | ||
|
||
def removeTerm(self, term: str, semantic_ref_index: SemanticRefIndex) -> None: | ||
raise NotImplementedError | ||
|
||
def lookupTerm(self, term: str) -> Sequence[ScoredSemanticRef] | None: | ||
raise NotImplementedError | ||
|
||
|
||
type KnowledgeType = Literal["entity", "action", "topic", "tag"] | ||
|
||
|
||
type MessageIndex = int | ||
|
||
|
||
@runtime_checkable | ||
class Topic(Protocol): | ||
text: str | ||
|
||
|
||
@runtime_checkable | ||
class Tag(Protocol): | ||
text: str | ||
|
||
|
||
type Knowledge = kplib.ConcreteEntity | kplib.Action | Topic | Tag | ||
|
||
|
||
@runtime_checkable | ||
class TextLocation(Protocol): | ||
# The index of the message. | ||
message_index: MessageIndex | ||
# The index of the chunk. | ||
chunkIndex: int | None | ||
# The index of the character within the chunk. | ||
charIndex: int | None | ||
|
||
|
||
# A text range within a session. | ||
@runtime_checkable | ||
class TextRange(Protocol): | ||
# The start of the range. | ||
start: TextLocation | ||
# The end of the range (exclusive). | ||
end: TextLocation | None | ||
|
||
|
||
@runtime_checkable | ||
class SemanticRef(Protocol): | ||
semantic_ref_index: SemanticRefIndex | ||
range: TextRange | ||
knowledge_type: KnowledgeType | ||
knowledge: Knowledge | ||
|
||
|
||
@runtime_checkable | ||
class DateRange(Protocol): | ||
start: Date | ||
# Inclusive. | ||
end: Date | None | ||
|
||
|
||
@runtime_checkable | ||
class Term(Protocol): | ||
text: str | ||
# Optional weighting for these matches. | ||
weight: float | None | ||
|
||
|
||
@runtime_checkable | ||
class ScoredKnowledge(Protocol): | ||
knowledge_type: KnowledgeType | ||
knowledge: Knowledge | ||
score: float | ||
|
||
|
||
# Allows for faster retrieval of name, value properties | ||
@runtime_checkable | ||
class IPropertyToSemanticRefIndex(Protocol): | ||
def get_values(self) -> Sequence[str]: | ||
raise NotImplementedError | ||
|
||
def add_property( | ||
self, | ||
property_name: str, | ||
value: str, | ||
semantic_ref_index: SemanticRefIndex | ScoredSemanticRef, | ||
) -> None: | ||
raise NotImplementedError | ||
|
||
def lookup_property( | ||
self, property_name: str, value: str | ||
) -> Sequence[ScoredSemanticRef] | None: | ||
raise NotImplementedError | ||
|
||
|
||
@runtime_checkable | ||
class TimestampedTextRange(Protocol): | ||
timestamp: str | ||
range: TextRange | ||
|
||
|
||
# Return text ranges in the given date range. | ||
@runtime_checkable | ||
class ITimestampToTextRangeIndex(Protocol): | ||
def add_timestamp(self, message_index: MessageIndex, timestamp: str) -> None: | ||
raise NotImplementedError | ||
|
||
def add_timestamps( | ||
self, message_imestamps: Sequence[tuple[MessageIndex, str]] | ||
) -> bool: | ||
raise NotImplementedError | ||
|
||
def lookup_range(self, date_range: DateRange) -> Sequence[TimestampedTextRange]: | ||
raise NotImplementedError | ||
|
||
|
||
@runtime_checkable | ||
class ITermToRelatedTerms(Protocol): | ||
def lookup_term(self, text: str) -> Sequence[Term] | None: | ||
raise NotImplementedError | ||
|
||
|
||
@runtime_checkable | ||
class ITermToRelatedTermsFuzzy(Protocol): | ||
async def add_terms( | ||
self, terms: Sequence[str], event_handler: "IndexingEventHandlers | None" = None | ||
) -> None: | ||
raise NotImplementedError | ||
|
||
async def lookup_term( | ||
self, | ||
text: str, | ||
max_matches: int | None = None, | ||
threshold_score: float | None = None, | ||
) -> Sequence[Term]: | ||
raise NotImplementedError | ||
|
||
async def lookup_terms( | ||
self, | ||
text_array: Sequence[str], | ||
max_matches: int | None = None, | ||
threshold_score: float | None = None, | ||
) -> Sequence[Sequence[Term]]: | ||
raise NotImplementedError | ||
|
||
|
||
@runtime_checkable | ||
class ITermToRelatedTermsIndex(Protocol): | ||
@property | ||
def aliases(self) -> ITermToRelatedTerms | None: | ||
raise NotImplementedError | ||
|
||
@property | ||
def fuzzy_index(self) -> ITermToRelatedTermsFuzzy | None: | ||
raise NotImplementedError | ||
|
||
|
||
# A Thread is a set of text ranges in a conversation. | ||
@runtime_checkable | ||
class Thread(Protocol): | ||
description: str | ||
ranges: Sequence[TextRange] | ||
|
||
|
||
type ThreadIndex = int | ||
|
||
|
||
@runtime_checkable | ||
class ScoredThreadIndex(Protocol): | ||
thread_index: ThreadIndex | ||
score: float | ||
|
||
|
||
@runtime_checkable | ||
class IConversationThreads(Protocol): | ||
@property | ||
def threads(self) -> Sequence[Thread]: | ||
raise NotImplementedError | ||
|
||
async def add_thread(self, thread: Thread) -> None: | ||
raise NotImplementedError | ||
|
||
async def lookup_thread( | ||
self, | ||
thread_description: str, | ||
max_matches: int | None = None, | ||
threshold_score: float | None = None, | ||
) -> Sequence[ScoredThreadIndex] | None: | ||
raise NotImplementedError | ||
|
||
|
||
@runtime_checkable | ||
class IConversationSecondaryIndexes(Protocol): | ||
property_to_semantic_ref_index: IPropertyToSemanticRefIndex | None | ||
timestampIndex: ITimestampToTextRangeIndex | None | ||
termToRelatedTermsIndex: ITermToRelatedTermsIndex | None | ||
threads: IConversationThreads | None | ||
|
||
|
||
@runtime_checkable | ||
class IConversation[TMeta: IKnowledgeSource = Any](Protocol): | ||
name_tag: str | ||
tags: Sequence[str] | ||
messages: Sequence[IMessage[TMeta]] | ||
semantic_refs: Sequence[SemanticRef] | None | ||
semantic_ref_index: ITermToSemanticRefIndex | None | ||
secondaryIndexes: IConversationSecondaryIndexes | None | ||
|
||
|
||
# ------------------------ | ||
# Serialization formats | ||
# ------------------------ | ||
|
||
|
||
@runtime_checkable | ||
class ITermToSemanticRefIndexItem(Protocol): | ||
term: str | ||
semantic_ref_indices: Sequence[ScoredSemanticRef] | ||
|
||
|
||
# Persistent form of a term index. | ||
@runtime_checkable | ||
class ITermToSemanticRefIndexData(Protocol): | ||
items: Sequence[ITermToSemanticRefIndexItem] | ||
|
||
|
||
@runtime_checkable | ||
class IConversationData[TMessage](Protocol): | ||
name_tag: str | ||
messages: Sequence[TMessage] | ||
tags: Sequence[str] | ||
semantic_refs: Sequence[SemanticRef] | ||
semantic_index_data: ITermToSemanticRefIndexData | None | ||
|
||
|
||
# ------------------------ | ||
# Indexing | ||
# ------------------------ | ||
|
||
|
||
@runtime_checkable | ||
class IndexingEventHandlers(Protocol): | ||
on_knowledge_xtracted: ( | ||
Callable[ | ||
[ | ||
TextLocation, | ||
kplib.KnowledgeResponse, | ||
], | ||
bool, | ||
] | ||
| None | ||
) = None | ||
on_embeddings_created: ( | ||
Callable[ | ||
[ | ||
Sequence[str], | ||
Sequence[str], | ||
int, | ||
], | ||
bool, | ||
] | ||
| None | ||
) = None | ||
|
||
|
||
@runtime_checkable | ||
class IndexingResults(Protocol): | ||
chunksIndexedUpto: TextLocation | None = None | ||
error: str | None = None |
Oops, something went wrong.