vllm.model_executor.layers.rotary_embedding.telechat3_scaling_rope ¶

TeleChat3RoPEScaledRotaryEmbedding ¶

Bases: YaRNScalingRotaryEmbedding

TeleChat3 uses a variant of YaRN method.

To achieve code reuse as much as possible, we have rewritten the get_mscale method in the initialization function

Source code in vllm/model_executor/layers/rotary_embedding/telechat3_scaling_rope.py

class TeleChat3RoPEScaledRotaryEmbedding(YaRNScalingRotaryEmbedding):
    """TeleChat3 uses a variant of YaRN method.

    To achieve code reuse as much as possible, we have rewritten the
    `get_mscale` method in the initialization function
    """

    def __init__(
        self,
        head_size: int,
        rotary_dim: int,
        max_position_embeddings: int,
        base: int,
        is_neox_style: bool,
        scaling_factor: float,
        dtype: torch.dtype,
        *,
        extrapolation_factor: float = 1,
        attn_factor: float = 1,
        beta_fast: int = 32,
        beta_slow: int = 1,
        truncate: bool = True,
    ) -> None:
        self.scaling_factor = scaling_factor
        self.extrapolation_factor = extrapolation_factor
        self.attn_factor = attn_factor
        self.beta_fast = beta_fast
        self.beta_slow = beta_slow
        self.truncate = truncate

        def get_mscale(scale, mscale=1):
            if scale <= 1:
                return 1.0
            return 0.07 * mscale * math.log(scale) + 1.0

        self.mscale = float(get_mscale(self.scaling_factor) * attn_factor)
        # Initialization must be performed after mscale, otherwise mscale is useless
        RotaryEmbedding.__init__(
            self,
            head_size,
            rotary_dim,
            max_position_embeddings,
            base,
            is_neox_style,
            dtype,
        )