-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path03_create-chat-completion_function.py
36 lines (29 loc) · 1.09 KB
/
03_create-chat-completion_function.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
"""
Explores [create_chat_completion](https://abetlen.github.io/llama-cpp-python/#llama_cpp.llama.Llama.create_chat_completion) function
"""
import copy, json, logging, pprint
from llama_cpp import Llama
logging.basicConfig(
level=logging.DEBUG,
format='[%(asctime)s] %(levelname)s [%(module)s-%(funcName)s()::%(lineno)d] %(message)s',
datefmt='%d/%b/%Y %H:%M:%S' )
log = logging.getLogger( '__name__' )
## load model -------------------------------------------------------
log.debug( 'loading model' )
llm = Llama( model_path='../models/ggml-vicuna-13b-4bit-rev1.bin' )
log.debug( 'model loaded' )
## run model --------------------------------------------------------
log.debug( 'running model' )
messages = [ {'role': 'user', 'content': 'Question: Who is Ada Lovelace? Answer:'} ]
output = llm.create_chat_completion(
messages,
temperature=0.2,
top_p=0.95,
top_k=40,
stream=False,
stop=[],
max_tokens=100,
repeat_penalty=1.1
)
## show output ------------------------------------------------------
log.debug( f'output, ``{pprint.pformat(output)}``' )