Skip to content

vllm.v1.worker.gpu.attn_utils

prepare_dcp_local_seq_lens

prepare_dcp_local_seq_lens(
    dcp_local_seq_lens: Tensor,
    seq_lens: Tensor,
    num_reqs: int,
    dcp_size: int,
    dcp_rank: int,
    cp_kv_cache_interleave_size: int,
) -> None

Populate the persistent DCP local seq_lens buffer (CUDA graph safe).

Source code in vllm/v1/worker/gpu/attn_utils.py
def prepare_dcp_local_seq_lens(
    dcp_local_seq_lens: torch.Tensor,
    seq_lens: torch.Tensor,
    num_reqs: int,
    dcp_size: int,
    dcp_rank: int,
    cp_kv_cache_interleave_size: int,
) -> None:
    """Populate the persistent DCP local seq_lens buffer (CUDA graph safe)."""
    if dcp_size <= 1:
        return

    local_seq_lens = get_dcp_local_seq_lens(
        seq_lens[:num_reqs],
        dcp_size=dcp_size,
        dcp_rank=dcp_rank,
        cp_kv_cache_interleave_size=cp_kv_cache_interleave_size,
    )
    dcp_local_seq_lens[:num_reqs].copy_(local_seq_lens, non_blocking=True)
    dcp_local_seq_lens[num_reqs:].zero_()