Streaming

Lamini also supports streaming inference! Here is an example implementation using our Python library.

import os
import random
import time

import lamini

api = lamini.StreamingCompletion()
async def main():
    prompt = f"[INST]{random.random()} What is a pickle? [/INST]"
    result = await api.async_create(
        prompt,
        "meta-llama/Llama-3.2-1B-Instruct",
        max_new_tokens=256,
    )

    async for r in result:
        print(r)

def main():
    prompt = f"What is A?"
    result = api.create(
        prompt,
        "hf-internal-testing/tiny-random-gpt2",
        max_new_tokens=256,
    )

    for r in result:
        print(r)
main()