On the Beat：Rokid AR平台上的音乐节奏游戏开发一、项目概述 1.1 项目背景 On the Beat：

一、项目概述

1.1 项目背景

On the Beat：一款为ADHD 英语学习者打造的音乐游戏，将传统音乐节奏游戏与沉浸式AR体验相结合。玩家通过Rokid眼镜的手部追踪功能，在虚拟场景中与流动的歌词互动，通过准确的拍打手势击中音符，体验身临其境的音乐游戏。

作为一个学生作品，我和团队小伙伴打磨了将近两个月。我们团队都是初学者，但所幸Rokid的SDK非常强大，开发者社区的相关资料也很多，加上新时代AI的赋能，都帮助我们0基础上手。最后能收获到官方和孩子们的喜欢真的非常荣幸！！！

1.2 核心玩法

节奏判定：根据歌曲节拍，系统在特定时间点生成音符

手势识别：通过Rokid XR手部追踪，识别玩家的拍打、挥手等手势

实时反馈：立即显示判定等级（Perfect/Good/Miss）和得分

点歌体验：支持多首歌曲，可切换选择

歌词展示：歌词同步滚动，逐字变色跟随音乐进度

1.3 技术栈

引擎：Unity 2022+

AR框架：Rokid UXR SDK

核心技术：实时音频处理、手部碰撞检测、精确时间判定、动态UI渲染

二、整体架构设计

| 输入模块 | 手部识别、手势检测、碰撞触发 | RokidHandCollisionDetector.cs |

| 游戏逻辑 | 音符管理、输入处理、游戏状态 | RhythmGameManager.cs |

| 判定系统 | 时间计算、精确判定、结果评级 | JudgmentSystem.cs |

| 评分系统| 得分计算、数据统计、结果聚合 | ScoreManager.cs、ScoreData.cs |

|显示层 | UI渲染、歌词同步、视觉反馈 | LyricScroller.cs、DJ.cs |

三、核心难点与实现方案

难点 1：AR环境下的实时手部碰撞检测

问题背景：Rokid XR设备的手部追踪在AR空间中存在以下挑战：

手部追踪数据有延迟（通常10-30ms）
空间定位误差（±5-10cm）
快速运动时的轨迹丢失
复杂手势识别的误判

传统的碰撞盒检测在AR场景中容易误触发或完全不触发。

解决方案：采用分层碰撞检测机制，结合距离判定和时间冷却：


public class RokidHandCollisionDetector : MonoBehaviour
{
    [Header("碰撞检测设置")]
    public float triggerDistance = 0.25f;       // 触发距离
    public float exclusiveRadius = 0.20f;       // 排除干扰范围
    public float handColliderSize = 0.05f;      // 手部碰撞体大小

    private List<WorkingButtonController> buttons;
    private WorkingButtonController lastTriggeredButton;
    private float lastTriggerTime = 0f;
    private float triggerCooldown = 0.05f;      // 50ms冷却防止重复触发
    private Transform cameraTransform;

    void Update()
    {
        UpdateHandColliders();           // 更新手部碰撞体位置
        CheckExclusiveCollisions();      // 精确碰撞检测
        CheckProximityTrigger();         // 近距离触发
    }

    /// <summary>
    /// 更新左右手部的追踪位置
    /// </summary>
    void UpdateHandColliders()
    {
        // 从Rokid XR SDK获取手部位置
        // leftHand = uksLeftHandTrans.position
        // rightHand = uksRightHandTrans.position
    }

    /// <summary>
    /// 精确碰撞检测：避免邻近按钮的误触
    /// </summary>
    void CheckExclusiveCollisions()
    {
        float closestDistance = float.MaxValue;
        WorkingButtonController closestButton = null;

        foreach (var button in buttons)
        {
            float distance = Vector3.Distance(handPosition, button.transform.position);

            // 检查是否进入排除范围（其他按钮）
            bool isExcluded = false;
            foreach (var other in buttons)
            {
                if (other == button) continue;
                float distToOther = Vector3.Distance(handPosition, other.transform.position);
                if (distToOther < exclusiveRadius)
                {
                    isExcluded = true;
                    break;
                }
            }

            if (!isExcluded && distance < closestDistance)
            {
                closestDistance = distance;
                closestButton = button;
            }
        }

        // 触发唯一最近的按钮
        if (closestButton != null && closestDistance < triggerDistance)
        {
            TriggerButton(closestButton);
        }
    }

    /// <summary>
    /// 带冷却时间的触发机制
    /// </summary>
    void TriggerButton(WorkingButtonController button)
    {
        if (Time.time - lastTriggerTime < triggerCooldown)
            return;

        if (lastTriggeredButton != button)
        {
            button.OnClick();
            lastTriggeredButton = button;
            lastTriggerTime = Time.time;
        }
    }
}

关键技术点：

距离权重计算：不仅检测碰撞，还计算距离作为置信度
排他性检测：确保只有最近的按钮被触发，避免邻近误触
冷却机制：防止抖动导致的多次触发（50ms间隔）
手部追踪集成：直接从Rokid XR SDK获取实时手部位置

难点 2：音频时间同步与精确节奏判定

问题背景：节奏游戏的核心在于精确度。常见问题：

音频播放时刻与游戏逻辑不同步
不同平台的音频延迟差异（5-50ms）
判定窗口设定不合理导致体验差
浮点精度问题累积误差

解决方案：实现多层时间同步机制和自适应判定窗口：



public class JudgmentSystem : MonoBehaviour
{
    [Header("时间窗口设置（秒）")]
    [SerializeField] private float perfectWindow = 0.3f;  // ±300ms
    [SerializeField] private float goodWindow = 0.6f;     // ±600ms

    [Header("分数设置")]
    [SerializeField] private int perfectScore = 100;
    [SerializeField] private int goodScore = 80;

    /// <summary>
    /// 判定音符
    /// </summary>
    public JudgmentInfo Judge(float noteTime, float inputTime)
    {
        // 计算绝对时间差（毫秒精度）
        float timeDifference = Mathf.Abs(inputTime - noteTime);

        if (timeDifference <= perfectWindow)
        {
            Debug.Log($" Perfect! 时间差: {timeDifference:F3}s");
            return new JudgmentInfo(
                JudgmentResult.Perfect, 
                perfectScore, 
                timeDifference
            );
        }
        else if (timeDifference <= goodWindow)
        {
            Debug.Log($" Good! 时间差: {timeDifference:F3}s");
            return new JudgmentInfo(
                JudgmentResult.Good, 
                goodScore, 
                timeDifference
            );
        }
        else
        {
            Debug.Log($" Miss! 时间差: {timeDifference:F3}s");
            return new JudgmentInfo(
                JudgmentResult.Miss, 
                0, 
                timeDifference
            );
        }
    }
}

public class RhythmGameManager : MonoBehaviour
{
    private AudioSource audioSource;
    private SongData currentSongData;

    public void ProcessInput(InputType inputType)
    {
        // 获取当前精确时间戳
        float currentTime = audioSource.time;
        
        // 查找最匹配的音符
        SongNote targetNote = FindNoteForInput(currentTime, inputType);

        if (targetNote != null)
        {
            // 调用判定系统
            var judgmentResult = JudgmentSystem.Judge(
                targetNote.time,      // 音符标准时间
                currentTime           // 玩家输入时间
            );

            // 处理结果并更新评分
            if (ScoreManager.Instance != null)
            {
                ScoreManager.Instance.AddJudgment(
                    judgmentResult.result,
                    judgmentResult.score
                );
            }
        }
    }

    /// <summary>
    /// 查找指定输入类型的音符
    /// </summary>
    SongNote FindNoteForInput(float currentTime, InputType inputType)
    {
        const float searchWindow = 0.8f; // 搜索窗口±800ms
        
        SongNote closestNote = null;
        float closestTimeDiff = float.MaxValue;

        foreach (var note in currentSongData.notes)
        {
            if (note.type != inputType)
                continue;

            float timeDiff = Mathf.Abs(note.time - currentTime);

            // 只在判定窗口内搜索
            if (timeDiff < searchWindow && timeDiff < closestTimeDiff)
            {
                closestNote = note;
                closestTimeDiff = timeDiff;
            }
        }

        return closestNote;
    }
}

关键技术点：

双精度时间戳：使用float精度足够，但要避免舍入误差
范围搜索：不是逐个比对所有音符，而是在时间窗口内搜索
最近音符匹配：在多个可能音符中选择时间最接近的
判定容错：通过合理的窗口大小（Perfect ±300ms）平衡游戏难度

难点 3：动态歌词渲染与逐字同步高亮

问题背景：高质量的卡拉OK体验需要：

歌词与音乐精确同步（误差<100ms）
逐字变色跟随播放进度
支持多种语言和特殊标记（重读音、连接音等）
高性能UI更新（每帧可能需要更新多个文本）
TextMesh Pro的富文本标签动态生成

解决方案：采用分层高亮系统和增量更新优化：



public class LyricScroller : MonoBehaviour
{
    [System.Serializable]
    public class LyricLine
    {
        public string text;              // 完整歌词
        public float startTime;          // 开始时间
        public CharacterInfo[] charInfos; // 逐字信息
    }

    [System.Serializable]
    public class CharacterInfo
    {
        public char character;
        public float startTime;          // 该字开始时间
        public float duration;           // 该字持续时间
        public MarkType markType;        // 标记类型（重读/连接/无）
    }

    public enum MarkType { None, Stress, Linking }

    private TMP_Text currentLineText;
    private AudioSource audioSource;
    private List<LyricLine> lyricLines = new List<LyricLine>();

    void Update()
    {
        float currentTime = audioSource.time;
        UpdateLyricDisplay(currentTime);
    }

    /// <summary>
    /// 更新歌词显示，并高亮当前唱到的字
    /// </summary>
    void UpdateLyricDisplay(float currentTime)
    {
        // 找到当前行
        LyricLine currentLine = FindCurrentLine(currentTime);
        if (currentLine == null) return;

        // 构建富文本字符串，实现逐字高亮
        string richText = BuildRichTextWithHighlight(currentLine, currentTime);
        currentLineText.text = richText;
    }

    /// <summary>
    /// 构建带富文本标签的歌词字符串
    /// </summary>
    string BuildRichTextWithHighlight(LyricLine line, float currentTime)
    {
        System.Text.StringBuilder sb = new System.Text.StringBuilder();
        
        for (int i = 0; i < line.charInfos.Length; i++)
        {
            var charInfo = line.charInfos[i];
            char ch = charInfo.character;

            // 判断该字是否已唱过
            if (currentTime >= charInfo.startTime + charInfo.duration)
            {
                // 已唱过的字：使用已唱颜色
                sb.Append($"<color={ColorToHex(sungColor)}>");
                
                // 根据标记类型添加修饰
                if (charInfo.markType == MarkType.Stress)
                {
                    sb.Append($"<u><color={ColorToHex(stressUnderlineColor)}>");
                }
                else if (charInfo.markType == MarkType.Linking)
                {
                    sb.Append($"<u><color={ColorToHex(linkingUnderlineColor)}>");
                }
                
                sb.Append(ch);
                
                if (charInfo.markType != MarkType.None)
                    sb.Append("</u></color>");
                sb.Append("</color>");
            }
            else if (currentTime >= charInfo.startTime)
            {
                // 正在唱的字：特殊高亮（闪烁/放大）
                sb.Append($"<color={ColorToHex(currentCharColor)}><b>");
                sb.Append(ch);
                sb.Append("</b></color>");
            }
            else
            {
                // 未唱的字：使用默认颜色
                sb.Append($"<color={ColorToHex(normalColor)}>");
                sb.Append(ch);
                sb.Append("</color>");
            }
        }

        return sb.ToString();
    }

    /// <summary>
    /// 添加标记（重读/连接音）可视化
    /// </summary>
    string WrapWithMark(string text, MarkType mark)
    {
        switch (mark)
        {
            case MarkType.Stress:
                // 重读音：加下划线 + 蓝色背景
                return $"<mark=#3399FF>{text}</mark><u>";
            case MarkType.Linking:
                // 连接音：加下划线 + 黄色背景
                return $"<mark=#FFFF4D>{text}</mark><u>";
            default:
                return text;
        }
    }

    /// <summary>
    /// 性能优化：缓存颜色的十六进制值
    /// </summary>
    private Dictionary<Color, string> colorHexCache = new Dictionary<Color, string>();

    string ColorToHex(Color color)
    {
        if (!colorHexCache.ContainsKey(color))
        {
            colorHexCache[color] = ColorUtility.ToHtmlStringRGB(color);
        }
        return colorHexCache[color];
    }

    /// <summary>
    /// 初始化歌词数据
    /// 将原始歌词字符串和时间转换为逐字信息
    /// </summary>
    public void LoadLyricsFromData(SongData songData)
    {
        lyricLines.Clear();

        foreach (var lyricData in songData.lyrics)
        {
            var line = new LyricLine
            {
                text = lyricData.text,
                startTime = lyricData.startTime,
                charInfos = ParseCharacterTimings(lyricData)
            };

            lyricLines.Add(line);
        }
    }

    /// <summary>
    /// 解析逐字时间信息
    /// 如果歌词数据包含时间标记，则解析；否则均分时间
    /// </summary>
    CharacterInfo[] ParseCharacterTimings(LyricData lyricData)
    {
        string text = lyricData.text;
        int charCount = text.Length;
        
        // 计算该行歌词的时间跨度
        float lineDuration = lyricData.endTime - lyricData.startTime;
        float charDuration = lineDuration / charCount;

        var charInfos = new CharacterInfo[charCount];

        for (int i = 0; i < charCount; i++)
        {
            charInfos[i] = new CharacterInfo
            {
                character = text[i],
                startTime = lyricData.startTime + (i * charDuration),
                duration = charDuration,
                markType = DetectMarkType(text[i]) // 检测特殊标记
            };
        }

        return charInfos;
    }

    /// <summary>
    /// 检测字符是否有特殊标记
    /// </summary>
    MarkType DetectMarkType(char ch)
    {
        // 这里可以根据字符前缀或其他规则判断
        // 例如：* 前缀表示重读，^ 前缀表示连接音
        return MarkType.None;
    }
}

关键技术点：

逐字时间映射：将行级时间映射到字级，支持精确同步
增量更新：每帧只更新变化的部分（虽然这里简化为全文重建，实际可优化）
富文本标签动态生成：使用TextMesh Pro的标签系统实现多样化效果
性能缓存：缓存颜色值避免重复转换
标记系统：支持多种视觉标记（重读音下划线、连接音背景等）

难点 4：多输入类型的灵活处理架构

问题背景：AR游戏需要支持多种输入：

手部手势（拍打、握拳等）
语音输入（某些歌曲行可能用语音判定）
控制器按键（备用输入）
各种输入的延迟和误判率不同
需要灵活切换或组合输入方式

解决方案：采用策略模式 + 工厂模式的输入系统设计：


/// <summary>
/// 输入类型枚举
/// </summary>
public enum InputType
{
    TapLeft,      // 左手拍打
    TapRight,     // 右手拍打
    GestureA,     // 手势A（握拳）
    GestureB,     // 手势B（张开）
    VoiceHigh,    // 高音量语音
    VoiceLow,     // 低音量语音
}

/// <summary>
/// 输入处理器接口
/// </summary>
public interface IInputHandler
{
    void ProcessInput();
    InputType GetInputType();
}

/// <summary>
/// 手部碰撞输入处理器
/// </summary>
public class TapInputHandler : IInputHandler
{
    private RokidHandCollisionDetector collisionDetector;
    private WorkingButtonController[] buttons;

    public TapInputHandler(RokidHandCollisionDetector detector)
    {
        collisionDetector = detector;
    }

    public void ProcessInput()
    {
        // 由碰撞检测器的OnClick回调触发
        collisionDetector.OnButtonPressed += () =>
        {
            RhythmGameManager.Instance.ProcessInput(GetInputType());
        };
    }

    public InputType GetInputType()
    {
        return InputType.TapLeft; // 或 TapRight，取决于检测到的手
    }
}

/// <summary>
/// 语音输入处理器
/// </summary>
public class VoiceInputHandler : IInputHandler
{
    private AudioSource voiceSource;
    private float volumeThreshold = 0.5f;

    public VoiceInputHandler(AudioSource source)
    {
        voiceSource = source;
    }

    public void ProcessInput()
    {
        // 检测音量峰值
        float rms = GetAudioRMS();
        
        if (rms > volumeThreshold)
        {
            InputType inputType = rms > volumeThreshold * 1.5f 
                ? InputType.VoiceHigh 
                : InputType.VoiceLow;

            RhythmGameManager.Instance.ProcessInput(inputType);
        }
    }

    // 计算音频有效值（RMS）
    float GetAudioRMS()
    {
        float[] samples = new float[1024];
        voiceSource.GetOutputData(samples, 0);
        
        float sum = 0f;
        foreach (float sample in samples)
        {
            sum += sample * sample;
        }
        
        return Mathf.Sqrt(sum / samples.Length);
    }

    public InputType GetInputType()
    {
        return InputType.VoiceHigh;
    }
}

/// <summary>
/// 输入处理器工厂
/// </summary>
public class InputHandlerFactory
{
    public static IInputHandler CreateHandler(InputType type)
    {
        switch (type)
        {
            case InputType.TapLeft:
            case InputType.TapRight:
                return new TapInputHandler(
                    FindObjectOfType<RokidHandCollisionDetector>()
                );

            case InputType.VoiceHigh:
            case InputType.VoiceLow:
                return new VoiceInputHandler(
                    FindObjectOfType<AudioSource>()
                );

            default:
                throw new System.ArgumentException($"不支持的输入类型: {type}");
        }
    }
}

/// <summary>
/// 输入管理系统
/// </summary>
public class InputManager : MonoBehaviour
{
    private Dictionary<InputType, IInputHandler> inputHandlers 
        = new Dictionary<InputType, IInputHandler>();

    public void RegisterInputHandler(InputType type)
    {
        var handler = InputHandlerFactory.CreateHandler(type);
        inputHandlers[type] = handler;
        handler.ProcessInput();
    }

    public void UnregisterInputHandler(InputType type)
    {
        if (inputHandlers.ContainsKey(type))
        {
            inputHandlers.Remove(type);
        }
    }

    /// <summary>
    /// 动态切换输入方式（支持多输入组合）
    /// </summary>
    public void SetActiveInputTypes(params InputType[] types)
    {
        // 清空旧的处理器
        inputHandlers.Clear();

        // 注册新的处理器
        foreach (var type in types)
        {
            RegisterInputHandler(type);
        }

        Debug.Log($"已切换输入类型: {string.Join(", ", types)}");
    }
}

关键设计模式：

策略模式：每种输入类型都有独立的处理策略
工厂模式：统一创建输入处理器，便于扩展
灵活切换：运行时动态注册/注销输入处理器
多输入支持：可同时启用多种输入方式

四、性能优化与最佳实践

4.1 CPU优化

对象池：复用歌词文本容器和UI元素，减少GC压力
范围查询优化：使用哈希表预处理音符，避免逐帧全表遍历
冷却机制：防止高频触发导致的重复计算

4.2 内存优化

分离数据和表现：歌词时序数据独立存储，UI组件只负责渲染
缓存机制：缓存颜色值、富文本标签等频繁使用的对象

4.3 网络与同步

考虑多人模式时，使用时间戳同步而非帧同步
使用差分同步减少网络带宽占用

五、总结与展望

On the Beat通过精确的时间判定、实时的手部追踪和动态的歌词渲染，为用户创造了沉浸式的音乐游戏体验。项目中的多个技术方案（分层碰撞检测、自适应时间窗口、动态字高亮等）可以作为其他AR音乐类应用的参考。

六、后续优化方向

AI难度调整：根据玩家表现动态调整判定窗口
多人在线：实现联网对战、合作模式
歌曲编辑器：让用户自定义歌曲和音符
社区分享：用户可上传自己的成绩和视频

参考资源

Rokid UXR SDK 文档
Unity TextMesh Pro 富文本指南
节奏游戏设计最佳实践