#from inference import *
modelName = '/notebooks/github_vicuna7b/vic7b'
device = 'cpu'
outFilepath = 'out1_3.txt'
useLm = True # use LM or bypass for fast prototyping?
# supergoal as text
supergoal = 'google information about theory of parallel universes' # good results
##############
## LangChain
from langchain.output_parsers import PydanticOutputParser
from pydantic import BaseModel
# Define a data model
class LangChainA(BaseModel):
nextsubgoal: str
justification: str
#############
## UTILS
# tries to parse a string which is numbered at the beginning
# /return None if parsing failed
def tryParseNumbered(s):
import re
# s = "1. Example text" # DBG
match = re.match(r'^\d+\.\s(.+)', s)
if match:
s0 = match.group(1)
return s0
return None
def tryParseFunctioncall(s):
import re
# ex: 'a("blah")'
match = re.match(r'^(.*)\(\"(.*)\"\)$', s)
if match:
return (match.group(1), match.group(2))
return None
# given a text from the LMt, extract the response
# returns None if it failed
def extractLmResponse(text):
z0 = '### Assistant: '
idx0 = text.find(z0)
if idx0 == -1:
return None
idx1 = idx0+len(z0)
text0 = text[idx1:]
# try to find
z1 = '### Human: '
idx2 = text0.find(z1)
if idx2 == -1:
return text0 # not found - is still valid, just truncated which probably isn't to bad
text1 = text0[:idx2]
return text1
# helper to classify a line of the response of subgoal2exec
def classifyLineA(line):
if line.startswith('```'):
return ('codeblock', line[3:])
# try to parse the numbered command
parseRes0 = tryParseNumbered(line)
if parseRes0 is not None: # is it a valid enumerated command? ex: "2. BLAH"
return ('enum', parseRes0) # return with 'payload' of numbered
return ('default', line)
# specialized parser which classifies every line
def classifyLinesOfLmResponse(text):
return list(map(lambda iv: classifyLineA(iv), text.split('\n')))
# group classifed lines into a tree of enum followed by codeblocks
def groupCodes(list0):
isInCode = False # is the text in a codesection?
currentCodes = []
res = []
for iLineType, iLineContent in list0:
if iLineType == 'enum':
if len(currentCodes) > 0:
res.append(('enum', currentCodes[:]))
# flush codes
currentCodes = []
elif iLineType == 'codeblock':
isInCode = not isInCode # switch the state of the code
elif iLineType == 'default':
if isInCode: # is this a codeline?
currentCodes.append(iLineContent) # add this as code to the codes
else:
# ignore
pass
else:
pass # ignore
if len(currentCodes) > 0:
res.append(('enum', currentCodes[:]))
# flush codes
currentCodes = []
return res
#
#model, tokenizer = load_model(modelName, device, 0)
#
#stream0 = generate_stream(model, tokenizer, {}, device, context_len=2048, stream_interval=2)
#
#for iv in stream0:
# print(iv)
#
#
#exit(0)
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, LlamaTokenizer, AutoModel
def runPrompt(prompt0, model, tokenizer):
# see https://huggingface.co/docs/transformers/tasks/language_modeling
print(f'<run>>{prompt0}')
inputs0 = tokenizer(prompt0, return_tensors="pt").input_ids
outputs = model.generate(inputs0, max_new_tokens=512, do_sample=True, top_k=20, top_p=0.95)
#pipe = pipeline(model=modelName, device_map="cpu")
#output = pipe("This is a cool example!", do_sample=True, top_k=50, top_p=0.95)
print(f' ... done')
x0 = tokenizer.batch_decode(outputs, skip_special_tokens=True)
x1 = x0[0]
return x1
# generate prompt
def genPrompt(goalToReformulate, partName):
if partName == 'goalToExecA': # OUTDATED
# prompt for vicerna 7B
prompt0 = f'Write visit(url) to visit the url - ex: visit("http://y.com/"), write google(query) to google for query, write exit() when done. Your goal is to {goalToReformulate}. Give sequence of commands as bullet points!'
elif partName == 'goalToExecB':
# takes supergoal into account
# prompt for vicerna 7B
prompt0 = f'Write visit(url) to visit the url - ex: visit("http://y.com/"), write google(query) to google for query, write exit() when done. Supergoal is to {supergoal}. Your current goal is to {goalToReformulate}. Give sequence of commands as bullet points!'
prompt0 = f'Write visit(url) to visit the url - ex: visit("http://y.com/"), write google(query) to google for query, write shell(command) to execute <command>, write exit() when done. Supergoal is to {supergoal}. Your current goal is to {goalToReformulate}. Give sequence of commands as bullet points!'
elif partName == 'goalToExecC':
# prompt for vicerna 7B
prompt0 = f'You execute shell commands like "visit" to visit a website or "google" to google. You also can use any other ubuntu shell command! Your supergoal is to {supergoal}. Your sub-goal is to {goalToReformulate}!'
elif partName == 'goalToSubgoalsD': # DEPRECATED
# prompt for vicerna 7B
prompt0 = f'As a SOTA AI, your goal is to {goalToReformulate}. Write hierachical sub-goals of your goal as bullet points!'
elif partName == 'goalToSubgoalsE':
# prompt for vicerna 7B
prompt0 = f'As a SOTA AI, your goal is to {goalToReformulate}. Write hierachical sub-goals of your goal as bullet points (*) at the top level!'
elif partName == 'goalToSubgoalsF':
# outputs JSON
# ex: Your goal is to Research about ethical theories and principles. What is the next sub-goal? What is the justification for the sub-goal? Write the answer as JSON with 'nextsubgoal' and 'justification' as keys.
# prompt for nicerna 7B
prompt0 = f'ex: Your goal is to {goalToReformulate}. What is the next sub-goal? What is the justification for the sub-goal? Write the answer as JSON with \'nextsubgoal\' and \'justification\' as keys.'
return prompt0
if useLm:
tokenizer = AutoTokenizer.from_pretrained(modelName, use_fast=False)
model = AutoModelForCausalLM.from_pretrained(modelName, low_cpu_mem_usage=True) # **kwargs
subgoals = []
# get subgoals of goal
if True: # codeblock
for iAttempt in range(2): # do multiple attempts
for z in range(5):
print('')
print(f'convert goal to subgoals... prompt=\'{supergoal}\'')
prompt0 = genPrompt(supergoal, 'goalToSubgoalsD') # goal to execution
prompt0 = f'### Human: {prompt0}\n'
if True or useLm:
x1 = runPrompt(prompt0, model, tokenizer)
else:
x1 = prototypingResponse # HACK< inject fake response for prototyping >
print(f'{x1}')
x2 = extractLmResponse(x1)
lmOutType = 'listOfItems' #'jsonLangchain'
# parse output
if lmOutType == 'jsonLangchain': # try to parse answer as if it were structured JSON
# Create a parser for the LangChainA data model
parser = PydanticOutputParser(pydantic_object=LangChainA)
lmResult0 = x2
#lmResult0 = runPrompt(prompt0, model, tokenizer)
# HACK< fake response from LM >
#lmResult0 = 'AA {"nextsubgoal":"A B C"\n, "justification":"BA BA"} \n{"nextsubgoal":"A B C"\n, "justification":"BA KFKFKF"} kfkfkfkf'
for iSegment0 in lmResult0.split('}'): # split sequence of JSON into parts
iSegment0 = iSegment0+'}' # add back end
a = parser.parse(iSegment0)
# add to subgoals
subgoals.append(a.nextsubgoal)
else: # else just handle it as if it were a list of items
# convert text of response from LM to text of goals
if True: # codeblock
lines0 = x2.split('\n')
for iLine in lines0:
if iLine.startswith('* '):
goalText0 = iLine[2:] # remove '* ' at beginning
# remove text after first ':' because the LM continues with a unnecessary explaination
idx1 = goalText0.find(':')
if idx1 != -1:
goalText0 = goalText0[:idx1]
# helper to clean up text of subgoal
def subgoalCleanup(txt):
res = txt
if res.endswith('.'):
# subgoal can end with '.', remove if present
# NOTE< does subgoal always end with '.'? >
res = res[:-1] # remove dot
return res
goalText1 = subgoalCleanup(goalText0)
subgoals.append(goalText1)
continue
if True: # codeblock
# sometimes the list is numbered
parsingResult = tryParseNumbered(iLine)
if parsingResult is not None:
subgoals.append(parsingResult)
continue
else:
print(f'warn: line didn\'t start with *, ignore!')
print('subgoals:')
print(f'{str(subgoals)}')
if len(subgoals) > 0:
break
if len(subgoals) == 0:
print('warn: no subgoals were found! give up!')
exit(1)
#exit(1) # fail because we are developing
# for testing
#subgoals.append('obtain an overview of the universe, including its size, structure, and composition')
#subgoals.append('find the best car dealer')
subgoalIdx = 0
for iSubgoal in subgoals:
for z in range(5):
print('')
print(f'examine subgoal={iSubgoal}')
prompt0 = genPrompt(iSubgoal, 'goalToExecC') # goal to execution
prompt0 = f'### Human: {prompt0}\n'
x1 = runPrompt(prompt0, model, tokenizer)
print(f'{x1}')
x2 = extractLmResponse(x1)
groupCodes0 = groupCodes(classifyLinesOfLmResponse(x2))
print(groupCodes0)
# write to json file for exec
if True: # code-block
# write commands to file
print(f'write to {outFilepath}')
with open(outFilepath, "a") as f:
f.write('\n\n')
for iGroupCodeType, iGroupCodePayload in groupCodes0:
iGroupCodePayload1 = str(iGroupCodePayload) # convert python to str
out0 = f'{{"type":"enum", "payload":"{iGroupCodePayload1}"}}'
f.write(out0+'\n')
# commented because it is the old code to parse the result from the LMt
"""
# * parse response from LM
cmds = []
for iLine in x2.split('\n'):
# try to parse the numbered command
parseRes0 = tryParseNumbered(iLine)
if parseRes0 is not None: # is it a valid enumerated command? ex: "2. BLAH"
cmds.append(parseRes0)
else:
if len(iLine)>3 and iLine[:3] == ' * ': # ex: " * BLAH"
cmds.append(iLine[3:])
else:
if len(iLine)>2 and iLine[:2] == '* ': #ex: "* BLAH"
cmds.append(iLine[2:])
else:
# ex: "google("x fff")"
parseRes1 = tryParseFunctioncall(iLine)
if parseRes1 is not None:
cmds.append(iLine)
print(cmds) # DEBUG
if True: # code-block
# write commands to file
print(f'write to {outFilepath}')
with open(outFilepath, "a") as f:
f.write(f'{{"subgoalidx":"{subgoalIdx}","subgoal":"{iSubgoal}","cmds":"{str(cmds)}"}}\n')
"""
subgoalIdx+=1
# DONE< implement subgoal output parser which looks at ``` when code begins
# it also has to be able to handle step by step things (???) ! >
# DONE< use it to extract actionable code! >
# TODO< add code executor >
:duck:
This is D.U.C.K. - Digital Utility Console Kit
It screams, it is adorable, it is stupid, it is semi-working, it can soon be useful for doing STUFF.
(it is using the https://github.com/lm-sys/FastChat model for inference)
Here is the code of duck: