Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 0 additions & 19 deletions .github/actions/pylint/action.yml

This file was deleted.

31 changes: 23 additions & 8 deletions .github/workflows/linting.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ on:
- 'datashare-python/**.py'
- 'worker-template/**.py'
- 'asr-worker/**.py'
- '.github/workflows/linting.yml'
- 'translation-worker/**.py'

# TODO: leverage some caching here
jobs:
Expand Down Expand Up @@ -37,26 +37,41 @@ jobs:
- name: Lint test
run: ruff check --config qa/ruff.toml worker-template

doc:
asr-worker:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: astral-sh/ruff-action@v3
with:
args: "--version" # skips test by displaying the version
- name: Check formatting
run: ruff format --config qa/ruff.toml --check docs
run: ruff format --config qa/ruff.toml --check asr-worker
- name: Lint test
run: ruff check --config qa/ruff.toml docs
run: ruff check --config qa/ruff.toml asr-worker

asr-worker:
translation-worker:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: ./.github/actions/pylint
name: "Lint ASR worker"
- uses: astral-sh/ruff-action@v3
with:
path: asr-worker
args: "--version" # skips test by displaying the version
- name: Check formatting
run: ruff format --config qa/ruff.toml --check translation-worker
- name: Lint test
run: ruff check --config qa/ruff.toml translation-worker

doc:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: astral-sh/ruff-action@v3
with:
args: "--version" # skips test by displaying the version
- name: Check formatting
run: ruff format --config qa/ruff.toml --check docs
- name: Lint test
run: ruff check --config qa/ruff.toml docs

concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
Expand Down
58 changes: 58 additions & 0 deletions .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,64 @@ jobs:
uv sync --frozen --all-extras
uv run --frozen python -m pytest --timeout=180 -vvv --cache-clear --show-capture=all -r A

test-asr-worker:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Setup Python project
uses: actions/setup-python@v6
with:
python-version: ${{ env.PYTHON_VERSION }}
- name: Cache Docker images
uses: ScribeMD/docker-cache@0.5.0
with:
key: docker-${{ runner.os }}-${{ hashFiles('docker-compose.yml') }}
- name: Start test services
run: docker compose up -d datashare temporal-post-init elasticsearch
- name: Install uv
uses: astral-sh/setup-uv@v7
with:
version: ${{ env.ASTRAL_VERSION }}
python-version: ${{ env.PYTHON_VERSION }}
enable-cache: true
working-directory: asr-worker
- name: Install ffmpeg
run: |
sudo apt-get update
sudo apt-get install -y ffmpeg libavcodec-dev libavformat-dev libavutil-dev
- name: Run tests
run: |
cd asr-worker
uv sync --frozen --all-extras
uv run --frozen python -m pytest --timeout=180 -vvv --cache-clear --show-capture=all -r A

test-translation-worker:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Setup Python project
uses: actions/setup-python@v6
with:
python-version: ${{ env.PYTHON_VERSION }}
- name: Cache Docker images
uses: ScribeMD/docker-cache@0.5.0
with:
key: docker-${{ runner.os }}-${{ hashFiles('docker-compose.yml') }}
- name: Start test services
run: docker compose up -d datashare temporal-post-init elasticsearch
- name: Install uv
uses: astral-sh/setup-uv@v7
with:
version: ${{ env.ASTRAL_VERSION }}
python-version: ${{ env.PYTHON_VERSION }}
enable-cache: true
working-directory: translation-worker
- name: Run tests
run: |
cd translation-worker
uv sync --frozen --all-extras
uv run --frozen python -m pytest --timeout=180 -vvv --cache-clear --show-capture=all -r A

concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true
11 changes: 7 additions & 4 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
repos:
- repo: https://github.com/psf/black
rev: 26.1.0
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.15.5
hooks:
- id: black
language_version: python3.10
- id: ruff-check
args: [--fix]
types_or: [python, pyi]
- id: ruff-format
types_or: [python, pyi]
6 changes: 4 additions & 2 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,10 @@ WORKDIR /app

# add task cli
ADD datashare-python/ ./datashare-python/
ADD worker-template/ ./worker-template/

# install python deps
RUN --mount=type=cache,target=~/.cache/uv \
uv pip install --system datashare-python/
RUN --mount=type=cache,target=~/.cache/uv uv pip install --system datashare-python/

# copy build-independant files
ADD scripts scripts
Expand Down Expand Up @@ -50,6 +50,8 @@ COPY --from=dp_builder /bin/uv /bin/uvx /bin/
WORKDIR /app

# add asr-worker
ADD worker-template/ ./worker-template/
ADD datashare-python/ ./datashare-python/
ADD asr-worker/ ./asr-worker/
ADD requirements_overrides.txt ./requirements_overrides.txt

Expand Down
6 changes: 4 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
create-venv:
[ -d .venv ] || uv venv --python 3.10
[ -d .venv ] || uv venv --python 3.13

install-deps:
uv pip install --override requirements_overrides.txt -e asr_worker/
uv pip install --override requirements_overrides.txt -e datashare_cli/
uv pip install -e translation-worker/
uv pip install -e datashare-python/
uv pip install -e worker-template/
uv pip install -r requirements_dev.txt

create-dirs:
Expand Down
17 changes: 11 additions & 6 deletions asr-worker/asr_worker/activities.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,21 @@
import asyncio

import torchaudio
from caul.configs.parakeet import ParakeetConfig
from caul.model_handlers.helpers import ParakeetModelHandlerResult
from caul.tasks.preprocessing.helpers import PreprocessedInput
from caul.model_handlers.objects import ParakeetModelHandlerResult
from caul.tasks.preprocessing.objects import PreprocessedInput
from datashare_python.utils import ActivityWithProgress
from temporalio import activity
from temporalio.client import Client


class ASRActivities:
class ASRActivities(ActivityWithProgress):
"""Contains activity definitions as well as reference to models"""

def __init__(self):
def __init__(self, temporal_client: Client, event_loop: asyncio.AbstractEventLoop):
# TODO: Eventually this may include whisper, which will
# then require passing language_map
super().__init__(temporal_client, event_loop)
self.asr_handler = ParakeetConfig(return_tensors=False).handler_from_config()

# load models
Expand All @@ -21,7 +26,7 @@ async def preprocess(self, inputs: list[str]) -> list[list[PreprocessedInput]]:
"""Preprocess transcription inputs

:param inputs: list of file paths
:return: list of caul.tasks.preprocessing.helpers.PreprocessedInput
:return: list of caul.tasks.preprocessing.objects.PreprocessedInput
"""
return self.asr_handler.preprocessor.process(inputs)

Expand All @@ -42,7 +47,7 @@ async def infer(
# assign
item.tensor = tensor

return self.asr_handler.inference_handler.process(inputs)
return self.asr_handler.inference_handler.process([inputs])

@activity.defn(name="asr.transcription.postprocess")
async def postprocess(
Expand Down
10 changes: 4 additions & 6 deletions asr-worker/asr_worker/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,10 @@

_TEN_MINUTES = _ONE_MINUTE * 10

ASR_TASK_QUEUE = "transcription-tasks"
ASR_CPU_TASK_QUEUE = "asr-cpu-tasks"

PARAKEET = "parakeet"

DEFAULT_TEMPORAL_ADDRESS = "temporal:7233"
ASR_GPU_TASK_QUEUE = "asr-gpu-tasks"

RESPONSE_SUCCESS = "success"
PARAKEET = "parakeet"

RESPONSE_ERROR = "error"
ASR_WORKFLOW_NAME = "asr-workflow"
54 changes: 0 additions & 54 deletions asr-worker/asr_worker/models.py

This file was deleted.

47 changes: 47 additions & 0 deletions asr-worker/asr_worker/objects.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
from enum import StrEnum

from datashare_python.objects import BasePayload, WorkerResponse
from pydantic import BaseModel, Field

from .constants import ASR_CPU_TASK_QUEUE, ASR_GPU_TASK_QUEUE, PARAKEET


class TaskQueues(StrEnum):
CPU = ASR_CPU_TASK_QUEUE
GPU = ASR_GPU_TASK_QUEUE


class BatchSize(BaseModel):
"""Batch size helper"""

batch_size: int = 32


class PreprocessingConfig(BatchSize):
"""Preprocessing config"""


class InferenceConfig(BatchSize):
"""Inference config"""

model_name: str = PARAKEET


class ASRPipelineConfig(BaseModel):
"""ASR pipeline config"""

preprocessing: PreprocessingConfig = Field(default_factory=PreprocessingConfig)
inference: InferenceConfig = Field(default_factory=InferenceConfig)


class ASRRequest(BasePayload):
"""Inputs to ASR workflow"""

file_paths: list[str]
pipeline: ASRPipelineConfig


class ASRResponse(WorkerResponse):
"""ASR workflow response"""

transcriptions: list[dict] = Field(default_factory=list)
Loading
Loading