main.py 1.6 KB

1234567891011121314151617181920212223242526272829303132333435363738
  1. #!/usr/bin/env python3
  2. import io
  3. import replicate # very nice API to run AI models; see https://replicate.com/
  4. from nicegui import run, ui
  5. from nicegui.events import UploadEventArguments
  6. async def transcribe(e: UploadEventArguments):
  7. transcription.text = 'Transcribing...'
  8. model = replicate.models.get('openai/whisper')
  9. version = model.versions.get('30414ee7c4fffc37e260fcab7842b5be470b9b840f2b608f5baa9bbef9a259ed')
  10. prediction = await run.io_bound(version.predict, audio=io.BytesIO(e.content.read()))
  11. text = prediction.get('transcription', 'no transcription')
  12. transcription.set_text(f'result: "{text}"')
  13. async def generate_image():
  14. image.source = 'https://dummyimage.com/600x400/ccc/000000.png&text=building+image...'
  15. model = replicate.models.get('stability-ai/stable-diffusion')
  16. version = model.versions.get('db21e45d3f7023abc2a46ee38a23973f6dce16bb082a930b0c49861f96d1e5bf')
  17. prediction = await run.io_bound(version.predict, prompt=prompt.value)
  18. image.source = prediction[0]
  19. # User Interface
  20. with ui.row().style('gap:10em'):
  21. with ui.column():
  22. ui.label('OpenAI Whisper (voice transcription)').classes('text-2xl')
  23. ui.upload(on_upload=transcribe, auto_upload=True).style('width: 20em')
  24. transcription = ui.label().classes('text-xl')
  25. with ui.column():
  26. ui.label('Stable Diffusion (image generator)').classes('text-2xl')
  27. prompt = ui.input('prompt').style('width: 20em')
  28. ui.button('Generate', on_click=generate_image).style('width: 15em')
  29. image = ui.image().style('width: 60em')
  30. ui.run()