from langchain_core.callbacks import BaseCallbackHandler
count = 0
class MyCustomHandler(BaseCallbackHandler):
def on_llm_new_token(self, token: str, **kwargs) -> None:
global count
if count < 10:
print(f"Token: {token}")
count += 1
# Verbose is required to pass to the callback manager
llm = GPT4All(model=local_path, callbacks=[MyCustomHandler()], streaming=True)
# If you want to use a custom model add the backend parameter
# Check https://docs.gpt4all.io/gpt4all_python.html for supported backends
# llm = GPT4All(model=local_path, backend="gptj", callbacks=callbacks, streaming=True)
chain = prompt | llm
question = "What NFL team won the Super Bowl in the year Justin Bieber was born?"
# Streamed tokens will be logged/aggregated via the passed callback
res = chain.invoke({"question": question})