自媒体人爆款标题自动分析工具,提高点击率。

1 阅读9分钟

自媒体人爆款标题自动分析工具

一、实际应用场景描述

场景:某数字文化公司的自媒体运营团队,每天需要生产大量内容投放到微信公众号、小红书、抖音等平台。新人小李负责标题创作,经常遇到:

  • 写了10个标题,不知道哪个更容易爆
  • 凭感觉写标题,缺乏数据支撑
  • 不了解不同平台的标题特征差异
  • 爆款标题的规律难以总结

典型工作流程:

选题 → 撰写内容 → 创作多个标题 → 人工筛选 → 发布 → 数据反馈 → 总结经验

二、引入痛点

graph TD A[内容创作完成] --> B[创作多个标题] B --> C{哪个标题更好?} C --> D[凭经验猜测] C --> E[团队投票] C --> F[AB测试] D --> G[点击率低] E --> H[效率低,主观性强] F --> I[周期长,成本高] G --> J[流量损失] H --> J I --> J

痛点分析表:

痛点 影响 解决思路 缺乏科学评估标准 标题质量不稳定 建立多维度评分体系 平台特征不了解 标题与平台不匹配 平台特征库+规则引擎 爆款规律难总结 新人学习曲线陡峭 数据分析+模式识别 多标题比较困难 选择靠拍脑袋 自动化分析+排序推荐

三、核心逻辑讲解

3.1 系统架构

┌─────────────────────────────────────────────────────────────┐ │ 表现层 (View) │ │ ┌──────────┐ ┌──────────┐ ┌──────────┐ ┌──────────┐ │ │ │ 标题输入 │ │ 分析结果 │ │ 爆款推荐 │ │ 报告生成 │ │ │ └──────────┘ └──────────┘ └──────────┘ └──────────┘ │ ├─────────────────────────────────────────────────────────────┤ │ 业务层 (Service) │ │ ┌──────────┐ ┌──────────┐ ┌──────────┐ ┌──────────┐ │ │ │ 分析引擎 │ │ 规则引擎 │ │ 推荐系统 │ │ 学习引擎 │ │ │ └──────────┘ └──────────┘ └──────────┘ └──────────┘ │ ├─────────────────────────────────────────────────────────────┤ │ 数据层 (Data) │ │ ┌──────────┐ ┌──────────┐ ┌──────────┐ ┌──────────┐ │ │ │ 标题库 │ │ 爆款案例 │ │ 平台规则 │ │ 词库 │ │ │ └──────────┘ └──────────┘ └──────────┘ └──────────┘ │ └─────────────────────────────────────────────────────────────┘

3.2 核心算法逻辑

标题评分公式:

总分 = 0.25×吸引力 + 0.20×相关性 + 0.20×传播力 + 0.15×平台匹配度 + 0.10×原创性 + 0.10×时效性

各维度详解:

维度 权重 评估要点 吸引力 25% 好奇心、痛点、利益点、情感共鸣 相关性 20% 与内容匹配度、关键词覆盖 传播力 20% 易记性、社交属性、分享动机 平台匹配度 15% 符合平台算法偏好、用户习惯 原创性 10% 新颖度、差异化程度 时效性 10% 热点结合、时间敏感度

3.3 平台特征规则

平台特征矩阵

PLATFORM_RULES = { "wechat": { "title_max_len": 64, "keywords": ["揭秘", "深度", "重磅", "独家"], "avoid_words": ["震惊", "疯传"], # 容易被判定标题党 "emotion_weight": 0.3, "curiosity_weight": 0.4 }, "xiaohongshu": { "title_max_len": 20, "keywords": ["种草", "安利", "亲测", "宝藏"], "avoid_words": ["广告", "推广"], "emotion_weight": 0.5, "trend_weight": 0.3 }, "douyin": { "title_max_len": 30, "keywords": ["挑战", "教程", "干货", "必看"], "avoid_words": ["点击", "链接"], "action_weight": 0.4, "result_weight": 0.3 } }

四、代码模块化实现

4.1 项目结构

title_analyzer/ ├── main.py # 程序入口 ├── config/ │ └── settings.py # 配置文件 ├── models/ │ ├── init.py │ ├── title_score.py # 标题评分模型 │ ├── platform_rules.py # 平台规则模型 │ └── hot_word.py # 热词模型 ├── services/ │ ├── init.py │ ├── analyzer_engine.py # 分析引擎 │ ├── rule_engine.py # 规则引擎 │ ├── recommender.py # 推荐系统 │ └── data_loader.py # 数据加载器 ├── utils/ │ ├── init.py │ ├── text_processor.py # 文本处理工具 │ ├── score_calculator.py # 分数计算器 │ └── visualizer.py # 可视化工具 ├── data/ │ ├── hot_titles.json # 爆款标题库 │ ├── platform_config.json # 平台配置 │ └── stopwords.txt # 停用词表 ├── outputs/ # 输出目录 └── README.md # 项目说明

4.2 核心代码实现

"models/title_score.py" - 标题评分模型

""" 标题评分模型模块 功能:定义标题评分的数据结构和计算逻辑 核心概念:将标题质量量化为多维度评分 """

from dataclasses import dataclass, field from enum import Enum from typing import Dict, List, Optional from datetime import datetime import uuid

class ScoreDimension(Enum): """评分维度枚举""" ATTRACTION = "吸引力" # 引发点击欲望的能力 RELEVANCE = "相关性" # 与内容匹配度 SPREADABILITY = "传播力" # 易于传播的程度 PLATFORM_FIT = "平台匹配度" # 符合平台特征 ORIGINALITY = "原创性" # 新颖独特程度 TIMELINESS = "时效性" # 结合热点的程度

@dataclass class DimensionScore: """ 单维度评分子类

属性:
    dimension: 评分维度
    score: 得分(0-100)
    details: 评分详情说明
    suggestions: 改进建议
"""
dimension: ScoreDimension
score: float
details: str = ""
suggestions: List[str] = field(default_factory=list)

def __post_init__(self):
    """确保分数在有效范围内"""
    self.score = max(0, min(100, self.score))

@dataclass class TitleScore: """ 标题综合评分类

属性:
    score_id: 评分唯一标识
    title: 被评分的标题
    scores: 各维度评分字典
    total_score: 综合得分(0-100)
    rank: 在同类标题中的排名
    analysis_time: 分析时间
    platform: 目标平台

方法:
    to_dict: 转换为字典格式
    get_weakest_dimension: 获取最弱维度
    get_improvement_priority: 获取改进优先级列表
"""

score_id: str = field(default_factory=lambda: str(uuid.uuid4())[:8])
title: str = ""
scores: Dict[ScoreDimension, DimensionScore] = field(default_factory=dict)
total_score: float = 0.0
rank: Optional[int] = None
analysis_time: datetime = field(default_factory=datetime.now)
platform: str = "wechat"

# 维度权重配置
WEIGHTS = {
    ScoreDimension.ATTRACTION: 0.25,
    ScoreDimension.RELEVANCE: 0.20,
    ScoreDimension.SPREADABILITY: 0.20,
    ScoreDimension.PLATFORM_FIT: 0.15,
    ScoreDimension.ORIGINALITY: 0.10,
    ScoreDimension.TIMELINESS: 0.10
}

def calculate_total(self) -> float:
    """
    计算综合得分
    
    计算公式:
        total = Σ(score_i × weight_i)
    
    返回:
        float: 综合得分(0-100)
    """
    total = 0.0
    for dimension, dim_score in self.scores.items():
        weight = self.WEIGHTS.get(dimension, 0)
        total += dim_score.score * weight
    self.total_score = round(total, 2)
    return self.total_score

def get_weakest_dimension(self) -> Optional[DimensionScore]:
    """
    获取评分最低的维度
    
    返回:
        Optional[DimensionScore]: 最弱维度的评分对象
    """
    if not self.scores:
        return None
    return min(self.scores.values(), key=lambda x: x.score)

def get_improvement_priority(self) -> List[tuple]:
    """
    获取改进优先级列表
    
    返回:
        List[tuple]: [(维度, 当前分, 提升空间), ...] 按提升空间降序
    """
    improvements = []
    for dimension, dim_score in self.scores.items():
        weight = self.WEIGHTS.get(dimension, 0)
        potential = (100 - dim_score.score) * weight
        improvements.append((dimension, dim_score.score, round(potential, 2)))
    
    return sorted(improvements, key=lambda x: x[2], reverse=True)

def to_dict(self) -> dict:
    """转换为字典,用于JSON序列化"""
    return {
        "score_id": self.score_id,
        "title": self.title,
        "platform": self.platform,
        "total_score": self.total_score,
        "rank": self.rank,
        "analysis_time": self.analysis_time.isoformat(),
        "dimension_scores": {
            dim.value: {
                "score": ds.score,
                "details": ds.details,
                "suggestions": ds.suggestions
            }
            for dim, ds in self.scores.items()
        }
    }

def to_markdown(self) -> str:
    """生成Markdown格式的分析报告"""
    lines = [
        f"# 标题分析报告",
        f"",
        f"**标题**: {self.title}",
        f"**平台**: {self.platform}",
        f"**综合得分**: {self.total_score}/100",
        f"**分析时间**: {self.analysis_time.strftime('%Y-%m-%d %H:%M')}",
        f"",
        f"---",
        f"",
        f"## 📊 各维度评分"
    ]
    
    for dimension, dim_score in self.scores.items():
        bar_length = int(dim_score.score / 5)  # 20字符宽度
        bar = "█" * bar_length + "░" * (20 - bar_length)
        lines.append(f"",
                    f"### {dimension.value}: {dim_score.score}分",
                    f"`{bar}`",
                    f"_{dim_score.details}_")
        if dim_score.suggestions:
            lines.append(f"**改进建议**:")
            for suggestion in dim_score.suggestions:
                lines.append(f"- {suggestion}")
    
    # 添加改进优先级
    lines.extend([
        f"",
        f"## 🎯 优化建议(按优先级)",
    ])
    
    for i, (dim, current, potential) in enumerate(self.get_improvement_priority(), 1):
        lines.append(f"{i}. **{dim.value}**: 当前{current}分,可提升{potential}分")
    
    return "\n".join(lines)

"models/platform_rules.py" - 平台规则模型

""" 平台规则模型模块 功能:定义各平台的标题特征和规则 核心概念:不同平台有不同的算法偏好和用户习惯 """

from dataclasses import dataclass, field from typing import Dict, List, Set, Optional from enum import Enum import json import os

class Platform(Enum): """支持的平台枚举""" WECHAT = "wechat" # 微信公众号 XIAOHONGSHU = "xiaohongshu" # 小红书 DOUYIN = "douyin" # 抖音 BILIBILI = "bilibili" # B站 ZHIHU = "zhihu" # 知乎

@dataclass class PlatformConfig: """ 平台配置类

属性:
    platform: 平台类型
    name: 平台名称
    title_max_len: 标题最大长度(字符)
    title_min_len: 标题最小长度(字符)
    keywords: 平台热门关键词
    avoid_words: 避免使用的关键词(可能被限流)
    emotion_weight: 情感倾向权重
    curiosity_weight: 好奇心激发权重
    trend_weight: 热点结合权重
    action_weight: 行动号召权重
    result_weight: 结果展示权重
    special_rules: 特殊规则列表

方法:
    validate_title: 验证标题是否符合平台规范
    get_feature_vector: 获取标题在该平台的特征向量
"""

platform: Platform
name: str = ""
title_max_len: int = 64
title_min_len: int = 5
keywords: List[str] = field(default_factory=list)
avoid_words: List[str] = field(default_factory=list)
emotion_weight: float = 0.3
curiosity_weight: float = 0.3
trend_weight: float = 0.2
action_weight: float = 0.2
result_weight: float = 0.2
special_rules: List[str] = field(default_factory=list)

def __post_init__(self):
    """设置平台名称"""
    if not self.name:
        self.name = self.platform.value.upper()

def validate_title(self, title: str) -> Dict:
    """
    验证标题是否符合平台规范
    
    参数:
        title: 待验证的标题
        
    返回:
        Dict: 验证结果
            - valid: 是否通过验证
            - errors: 错误信息列表
            - warnings: 警告信息列表
    """
    result = {
        "valid": True,
        "errors": [],
        "warnings": []
    }
    
    # 长度检查
    if len(title) < self.title_min_len:
        result["errors"].append(f"标题过短,最少{self.title_min_len}个字符")
        result["valid"] = False
    
    if len(title) > self.title_max_len:
        result["errors"].append(f"标题过长,最多{self.title_max_len}个字符")
        result["valid"] = False
    
    # 违禁词检查
    found_avoid = [w for w in self.avoid_words if w in title]
    if found_avoid:
        result["warnings"].append(f"包含可能受限词汇: {', '.join(found_avoid)}")
    
    # 特殊规则检查
    for rule in self.special_rules:
        if "不能包含" in rule and any(w in title for w in rule.split("不能包含")[1].split("、")):
            result["errors"].append(f"违反规则: {rule}")
            result["valid"] = False
    
    return result

def get_feature_vector(self, title: str) -> Dict[str, float]:
    """
    获取标题在该平台的特征向量
    
    参数:
        title: 标题文本
        
    返回:
        Dict: 特征向量
    """
    features = {
        "length_score": min(1.0, len(title) / self.title_max_len),
        "keyword_count": sum(1 for k in self.keywords if k in title) / max(len(self.keywords), 1),
        "avoid_word_count": sum(1 for w in self.avoid_words if w in title),
        "has_question": 1.0 if "?" in title or "?" in title else 0.0,
        "has_number": 1.0 if any(c.isdigit() for c in title) else 0.0,
        "has_emotion": 1.0 if any(e in title for e in ["!", "!", "太", "最", "必", "一定"]) else 0.0,
        "has_special_char": 1.0 if any(c in title for c in ["【", "】", "[", "]", "《", "》"]) else 0.0
    }
    return features

class PlatformRulesManager: """ 平台规则管理器

职责:
    1. 加载平台配置
    2. 管理平台规则
    3. 提供规则查询接口
"""

def __init__(self, config_path: str = "data/platform_config.json"):
    self.config_path = config_path
    self.platforms: Dict[Platform, PlatformConfig] = {}
    self._load_default_configs()

def _load_default_configs(self):
    """加载默认平台配置"""
    
    # 微信公众号配置
    wechat_config = PlatformConfig(
        platform=Platform.WECHAT,
        name="微信公众号",
        title_max_len=64,
        title_min_len=5,
        keywords=["揭秘", "深度", "重磅", "独家", "解析", "盘点", "指南", "必读"],
        avoid_words=["震惊", "疯传", "速看", "紧急", "刚刚", "爆料"],
        emotion_weight=0.3,
        curiosity_weight=0.4,
        trend_weight=0.2,
        action_weight=0.1,
        result_weight=0.2,
        special_rules=[
            "不能包含过多表情符号",
            "不能包含诱导分享词汇如'不转不是中国人'"
        ]
    )
    
    # 小红书配置
    xhs_config = PlatformConfig(
        platform=Platform.XIAOHONGSHU,
        name="小红书",
        title_max_len=20,
        title_min_len=4,
        keywords=["种草", "安利", "亲测", "宝藏", "绝绝子", "yyds", "真实", "分享"],
        avoid_words=["广告", "推广", "代购", "购买", "链接", "微信"],
        emotion_weight=0.5,
        curiosity_weight=0.3,
        trend_weight=0.3,
        action_weight=0.2,
        result_weight=0.2,
        special_rules=[
            "不能包含硬广词汇",
            "需要符合社区氛围,避免过于营销化"
        ]
    )
    
    # 抖音配置
    douyin_config = PlatformConfig(
        platform=Platform.DOUYIN,
        name="抖音",
        title_max_len=30,
        title_min_len=3,
        keywords=["挑战", "教程", "干货", "必看", "神级", "逆天", "绝了", "爆款"],
        avoid_words=["点击", "链接", "关注", "私信", "加V", "电话"],
        emotion_weight=0.4,
        curiosity_weight=0.3,
        trend_weight=0.3,
        action_weight=0.4,
        result_weight=0.3,
        special_rules=[
            "需要强行动号召",
            "需要明确结果预期"
        ]
    )
    
    # B站配置
    bilibili_config = PlatformConfig(
        platform=Platform.BILIBILI,
        name="哔哩哔哩",
        title_max_len=40,
        title_min_len=4,
        keywords=["硬核", "科普", "整活", "教学", "测评", "实况", "二创", "鬼畜"],
        avoid_words=["广告", "推广", "恰饭", "充值", "氪金"],
        emotion_weight=0.3,
        curiosity_weight=0.3,
        trend_weight=0.2,
        action_weight=0.1,
        result_weight=0.1,
        special_rules=[
            "需要符合社区文化",
            "避免过于标题党"
        ]
    )
    
    # 知乎配置
    zhihu_config = PlatformConfig(
        platform=Platform.ZHIHU,
        name="知乎",
        title_max_len=50,
        title_min_len=6,
        keywords=["如何看待", "为什么", "如何评价", "有哪些", "求推荐", "分析", "总结"],
        avoid_words=["震惊", "必看", "速成", "躺赚", "暴富"],
        emotion_weight=0.2,
        curiosity_weight=0.4,
        trend_weight=0.2,
        action_weight=0.1,
        result_weight=0.1,
        special_rules=[
            "需要问题导向",
            "需要理性客观"
        ]
    )
    
    self.platforms = {
        Platform.WECHAT: wechat_config,
        Platform.XIAOHONGSHU: xhs_config,
        Platform.DOUYIN: douyin_config,
        Platform.BILIBILI: bilibili_config,
        Platform.ZHIHU: zhihu_config
    }

def get_platform(self, platform: Platform) -> Optional[PlatformConfig]:
    """获取指定平台的配置"""
    return self.platforms.get(platform)

def get_all_platforms(self) -> List[PlatformConfig]:
    """获取所有平台配置"""
    return list(self.platforms.values())

def save_configs(self):
    """保存配置到文件"""
    configs_dict = {}
    for platform, config in self.platforms.items():
        configs_dict[platform.value] = {
            "name": config.name,
            "title_max_len": config.title_max_len,
            "title_min_len": config.title_min_len,
            "keywords": config.keywords,
            "avoid_words": config.avoid_words,
            "emotion_weight": config.emotion_weight,
            "curiosity_weight": config.curiosity_weight,
            "trend_weight": config.trend_weight,
            "action_weight": config.action_weight,
            "result_weight": config.result_weight,
            "special_rules": config.special_rules
        }
    
    with open(self.config_path, 'w', encoding='utf-8') as f:
        json.dump(configs_dict, f, ensure_ascii=False, indent=2)

"services/analyzer_engine.py" - 分析引擎

""" 分析引擎模块 功能:核心标题分析逻辑 核心概念:基于规则和模型的标题质量评估 """

from typing import Dict, List, Tuple, Optional from dataclasses import dataclass import re from collections import Counter

from models.title_score import TitleScore, DimensionScore, ScoreDimension from models.platform_rules import PlatformRulesManager, Platform from utils.text_processor import TextProcessor

@dataclass class AnalysisContext: """分析上下文""" title: str platform: Platform platform_config: PlatformConfig text_features: Dict hot_words: List[str]

class AnalyzerEngine: """ 标题分析引擎

职责:
    1. 协调各维度分析器
    2. 聚合分析结果
    3. 生成综合评分
    
设计模式:
    - 策略模式: 每个维度使用独立的分析策略
    - 装饰器模式: 可叠加不同的分析增强器
"""

def __init__(self, rules_manager: PlatformRulesManager):
    self.rules_manager = rules_manager
    self.text_processor = TextProcessor()
    self._dimension_analyzers = {
        ScoreDimension.ATTRACTION: self._analyze_attraction,
        ScoreDimension.RELEVANCE: self._analyze_relevance,
        ScoreDimension.SPREADABILITY: self._analyze_spreadability,
        ScoreDimension.PLATFORM_FIT: self._analyze_platform_fit,
        ScoreDimension.ORIGINALITY: self._analyze_originality,
        ScoreDimension.TIMELINESS: self._analyze_timeliness
    }

def analyze(self, title: str, platform: Platform = Platform.WECHAT, 
            content_keywords: List[str] = None) -> TitleScore:
    """
    执行完整的标题分析
    
    参数:
        title: 待分析的标题
        platform: 目标平台
        content_keywords: 内容关键词(用于相关性分析)
        
    返回:
        TitleScore: 完整的评分结果
    """
    # 准备分析上下文
    platform_config = self.rules_manager.get_platform(platform)
    if not platform_config:
        raise ValueError(f"不支持的平台: {platform}")
    
    text_features = self.text_processor.extract_features(title)
    hot_words = self._extract_hot_words(title, platform)
    
    context = AnalysisContext(
        title=title,
        platform=platform,
        platform_config=platform_config,
        text_features=text_features,
        hot_words=hot_words
    )
    
    # 执行各维度分析
    score = TitleScore(
        title=title,
        platform=platform.value
    )
    
    for dimension, analyzer in self._dimension_analyzers.items():
        try:
            dim_score = analyzer(context, content_keywords or [])
            score.scores[dimension] = dim_score
        except Exception as e:
            # 如果某个维度分析失败,给个基础分
            score.scores[dimension] = DimensionScore(
                dimension=dimension,
                score=50.0,
                details=f"分析异常: {str(e)}",
                suggestions=["请联系技术支持"]
            )
    
    # 计算综合得分
    score.calculate_total()
    
    return score

def _analyze_attraction(self, context: AnalysisContext, 
                       content_keywords: List[str]) -> DimensionScore:
    """
    分析标题吸引力
    
    评估要点:
        利用AI解决实际问题,如果你觉得这个工具好用,欢迎关注长安牧笛!