prepare_dcp_local_seq_lens(
dcp_local_seq_lens: Tensor,
seq_lens: Tensor,
num_reqs: int,
dcp_size: int,
dcp_rank: int,
cp_kv_cache_interleave_size: int,
) -> None
Populate the persistent DCP local seq_lens buffer (CUDA graph safe).
Source code in vllm/v1/worker/gpu/attn_utils.py
| def prepare_dcp_local_seq_lens(
dcp_local_seq_lens: torch.Tensor,
seq_lens: torch.Tensor,
num_reqs: int,
dcp_size: int,
dcp_rank: int,
cp_kv_cache_interleave_size: int,
) -> None:
"""Populate the persistent DCP local seq_lens buffer (CUDA graph safe)."""
if dcp_size <= 1:
return
local_seq_lens = get_dcp_local_seq_lens(
seq_lens[:num_reqs],
dcp_size=dcp_size,
dcp_rank=dcp_rank,
cp_kv_cache_interleave_size=cp_kv_cache_interleave_size,
)
dcp_local_seq_lens[:num_reqs].copy_(local_seq_lens, non_blocking=True)
dcp_local_seq_lens[num_reqs:].zero_()
|