自媒体人爆款标题自动分析工具
一、实际应用场景描述
场景:某数字文化公司的自媒体运营团队,每天需要生产大量内容投放到微信公众号、小红书、抖音等平台。新人小李负责标题创作,经常遇到:
- 写了10个标题,不知道哪个更容易爆
- 凭感觉写标题,缺乏数据支撑
- 不了解不同平台的标题特征差异
- 爆款标题的规律难以总结
典型工作流程:
选题 → 撰写内容 → 创作多个标题 → 人工筛选 → 发布 → 数据反馈 → 总结经验
二、引入痛点
graph TD A[内容创作完成] --> B[创作多个标题] B --> C{哪个标题更好?} C --> D[凭经验猜测] C --> E[团队投票] C --> F[AB测试] D --> G[点击率低] E --> H[效率低,主观性强] F --> I[周期长,成本高] G --> J[流量损失] H --> J I --> J
痛点分析表:
痛点 影响 解决思路 缺乏科学评估标准 标题质量不稳定 建立多维度评分体系 平台特征不了解 标题与平台不匹配 平台特征库+规则引擎 爆款规律难总结 新人学习曲线陡峭 数据分析+模式识别 多标题比较困难 选择靠拍脑袋 自动化分析+排序推荐
三、核心逻辑讲解
3.1 系统架构
┌─────────────────────────────────────────────────────────────┐ │ 表现层 (View) │ │ ┌──────────┐ ┌──────────┐ ┌──────────┐ ┌──────────┐ │ │ │ 标题输入 │ │ 分析结果 │ │ 爆款推荐 │ │ 报告生成 │ │ │ └──────────┘ └──────────┘ └──────────┘ └──────────┘ │ ├─────────────────────────────────────────────────────────────┤ │ 业务层 (Service) │ │ ┌──────────┐ ┌──────────┐ ┌──────────┐ ┌──────────┐ │ │ │ 分析引擎 │ │ 规则引擎 │ │ 推荐系统 │ │ 学习引擎 │ │ │ └──────────┘ └──────────┘ └──────────┘ └──────────┘ │ ├─────────────────────────────────────────────────────────────┤ │ 数据层 (Data) │ │ ┌──────────┐ ┌──────────┐ ┌──────────┐ ┌──────────┐ │ │ │ 标题库 │ │ 爆款案例 │ │ 平台规则 │ │ 词库 │ │ │ └──────────┘ └──────────┘ └──────────┘ └──────────┘ │ └─────────────────────────────────────────────────────────────┘
3.2 核心算法逻辑
标题评分公式:
总分 = 0.25×吸引力 + 0.20×相关性 + 0.20×传播力 + 0.15×平台匹配度 + 0.10×原创性 + 0.10×时效性
各维度详解:
维度 权重 评估要点 吸引力 25% 好奇心、痛点、利益点、情感共鸣 相关性 20% 与内容匹配度、关键词覆盖 传播力 20% 易记性、社交属性、分享动机 平台匹配度 15% 符合平台算法偏好、用户习惯 原创性 10% 新颖度、差异化程度 时效性 10% 热点结合、时间敏感度
3.3 平台特征规则
平台特征矩阵
PLATFORM_RULES = { "wechat": { "title_max_len": 64, "keywords": ["揭秘", "深度", "重磅", "独家"], "avoid_words": ["震惊", "疯传"], # 容易被判定标题党 "emotion_weight": 0.3, "curiosity_weight": 0.4 }, "xiaohongshu": { "title_max_len": 20, "keywords": ["种草", "安利", "亲测", "宝藏"], "avoid_words": ["广告", "推广"], "emotion_weight": 0.5, "trend_weight": 0.3 }, "douyin": { "title_max_len": 30, "keywords": ["挑战", "教程", "干货", "必看"], "avoid_words": ["点击", "链接"], "action_weight": 0.4, "result_weight": 0.3 } }
四、代码模块化实现
4.1 项目结构
title_analyzer/ ├── main.py # 程序入口 ├── config/ │ └── settings.py # 配置文件 ├── models/ │ ├── init.py │ ├── title_score.py # 标题评分模型 │ ├── platform_rules.py # 平台规则模型 │ └── hot_word.py # 热词模型 ├── services/ │ ├── init.py │ ├── analyzer_engine.py # 分析引擎 │ ├── rule_engine.py # 规则引擎 │ ├── recommender.py # 推荐系统 │ └── data_loader.py # 数据加载器 ├── utils/ │ ├── init.py │ ├── text_processor.py # 文本处理工具 │ ├── score_calculator.py # 分数计算器 │ └── visualizer.py # 可视化工具 ├── data/ │ ├── hot_titles.json # 爆款标题库 │ ├── platform_config.json # 平台配置 │ └── stopwords.txt # 停用词表 ├── outputs/ # 输出目录 └── README.md # 项目说明
4.2 核心代码实现
"models/title_score.py" - 标题评分模型
""" 标题评分模型模块 功能:定义标题评分的数据结构和计算逻辑 核心概念:将标题质量量化为多维度评分 """
from dataclasses import dataclass, field from enum import Enum from typing import Dict, List, Optional from datetime import datetime import uuid
class ScoreDimension(Enum): """评分维度枚举""" ATTRACTION = "吸引力" # 引发点击欲望的能力 RELEVANCE = "相关性" # 与内容匹配度 SPREADABILITY = "传播力" # 易于传播的程度 PLATFORM_FIT = "平台匹配度" # 符合平台特征 ORIGINALITY = "原创性" # 新颖独特程度 TIMELINESS = "时效性" # 结合热点的程度
@dataclass class DimensionScore: """ 单维度评分子类
属性:
dimension: 评分维度
score: 得分(0-100)
details: 评分详情说明
suggestions: 改进建议
"""
dimension: ScoreDimension
score: float
details: str = ""
suggestions: List[str] = field(default_factory=list)
def __post_init__(self):
"""确保分数在有效范围内"""
self.score = max(0, min(100, self.score))
@dataclass class TitleScore: """ 标题综合评分类
属性:
score_id: 评分唯一标识
title: 被评分的标题
scores: 各维度评分字典
total_score: 综合得分(0-100)
rank: 在同类标题中的排名
analysis_time: 分析时间
platform: 目标平台
方法:
to_dict: 转换为字典格式
get_weakest_dimension: 获取最弱维度
get_improvement_priority: 获取改进优先级列表
"""
score_id: str = field(default_factory=lambda: str(uuid.uuid4())[:8])
title: str = ""
scores: Dict[ScoreDimension, DimensionScore] = field(default_factory=dict)
total_score: float = 0.0
rank: Optional[int] = None
analysis_time: datetime = field(default_factory=datetime.now)
platform: str = "wechat"
# 维度权重配置
WEIGHTS = {
ScoreDimension.ATTRACTION: 0.25,
ScoreDimension.RELEVANCE: 0.20,
ScoreDimension.SPREADABILITY: 0.20,
ScoreDimension.PLATFORM_FIT: 0.15,
ScoreDimension.ORIGINALITY: 0.10,
ScoreDimension.TIMELINESS: 0.10
}
def calculate_total(self) -> float:
"""
计算综合得分
计算公式:
total = Σ(score_i × weight_i)
返回:
float: 综合得分(0-100)
"""
total = 0.0
for dimension, dim_score in self.scores.items():
weight = self.WEIGHTS.get(dimension, 0)
total += dim_score.score * weight
self.total_score = round(total, 2)
return self.total_score
def get_weakest_dimension(self) -> Optional[DimensionScore]:
"""
获取评分最低的维度
返回:
Optional[DimensionScore]: 最弱维度的评分对象
"""
if not self.scores:
return None
return min(self.scores.values(), key=lambda x: x.score)
def get_improvement_priority(self) -> List[tuple]:
"""
获取改进优先级列表
返回:
List[tuple]: [(维度, 当前分, 提升空间), ...] 按提升空间降序
"""
improvements = []
for dimension, dim_score in self.scores.items():
weight = self.WEIGHTS.get(dimension, 0)
potential = (100 - dim_score.score) * weight
improvements.append((dimension, dim_score.score, round(potential, 2)))
return sorted(improvements, key=lambda x: x[2], reverse=True)
def to_dict(self) -> dict:
"""转换为字典,用于JSON序列化"""
return {
"score_id": self.score_id,
"title": self.title,
"platform": self.platform,
"total_score": self.total_score,
"rank": self.rank,
"analysis_time": self.analysis_time.isoformat(),
"dimension_scores": {
dim.value: {
"score": ds.score,
"details": ds.details,
"suggestions": ds.suggestions
}
for dim, ds in self.scores.items()
}
}
def to_markdown(self) -> str:
"""生成Markdown格式的分析报告"""
lines = [
f"# 标题分析报告",
f"",
f"**标题**: {self.title}",
f"**平台**: {self.platform}",
f"**综合得分**: {self.total_score}/100",
f"**分析时间**: {self.analysis_time.strftime('%Y-%m-%d %H:%M')}",
f"",
f"---",
f"",
f"## 📊 各维度评分"
]
for dimension, dim_score in self.scores.items():
bar_length = int(dim_score.score / 5) # 20字符宽度
bar = "█" * bar_length + "░" * (20 - bar_length)
lines.append(f"",
f"### {dimension.value}: {dim_score.score}分",
f"`{bar}`",
f"_{dim_score.details}_")
if dim_score.suggestions:
lines.append(f"**改进建议**:")
for suggestion in dim_score.suggestions:
lines.append(f"- {suggestion}")
# 添加改进优先级
lines.extend([
f"",
f"## 🎯 优化建议(按优先级)",
])
for i, (dim, current, potential) in enumerate(self.get_improvement_priority(), 1):
lines.append(f"{i}. **{dim.value}**: 当前{current}分,可提升{potential}分")
return "\n".join(lines)
"models/platform_rules.py" - 平台规则模型
""" 平台规则模型模块 功能:定义各平台的标题特征和规则 核心概念:不同平台有不同的算法偏好和用户习惯 """
from dataclasses import dataclass, field from typing import Dict, List, Set, Optional from enum import Enum import json import os
class Platform(Enum): """支持的平台枚举""" WECHAT = "wechat" # 微信公众号 XIAOHONGSHU = "xiaohongshu" # 小红书 DOUYIN = "douyin" # 抖音 BILIBILI = "bilibili" # B站 ZHIHU = "zhihu" # 知乎
@dataclass class PlatformConfig: """ 平台配置类
属性:
platform: 平台类型
name: 平台名称
title_max_len: 标题最大长度(字符)
title_min_len: 标题最小长度(字符)
keywords: 平台热门关键词
avoid_words: 避免使用的关键词(可能被限流)
emotion_weight: 情感倾向权重
curiosity_weight: 好奇心激发权重
trend_weight: 热点结合权重
action_weight: 行动号召权重
result_weight: 结果展示权重
special_rules: 特殊规则列表
方法:
validate_title: 验证标题是否符合平台规范
get_feature_vector: 获取标题在该平台的特征向量
"""
platform: Platform
name: str = ""
title_max_len: int = 64
title_min_len: int = 5
keywords: List[str] = field(default_factory=list)
avoid_words: List[str] = field(default_factory=list)
emotion_weight: float = 0.3
curiosity_weight: float = 0.3
trend_weight: float = 0.2
action_weight: float = 0.2
result_weight: float = 0.2
special_rules: List[str] = field(default_factory=list)
def __post_init__(self):
"""设置平台名称"""
if not self.name:
self.name = self.platform.value.upper()
def validate_title(self, title: str) -> Dict:
"""
验证标题是否符合平台规范
参数:
title: 待验证的标题
返回:
Dict: 验证结果
- valid: 是否通过验证
- errors: 错误信息列表
- warnings: 警告信息列表
"""
result = {
"valid": True,
"errors": [],
"warnings": []
}
# 长度检查
if len(title) < self.title_min_len:
result["errors"].append(f"标题过短,最少{self.title_min_len}个字符")
result["valid"] = False
if len(title) > self.title_max_len:
result["errors"].append(f"标题过长,最多{self.title_max_len}个字符")
result["valid"] = False
# 违禁词检查
found_avoid = [w for w in self.avoid_words if w in title]
if found_avoid:
result["warnings"].append(f"包含可能受限词汇: {', '.join(found_avoid)}")
# 特殊规则检查
for rule in self.special_rules:
if "不能包含" in rule and any(w in title for w in rule.split("不能包含")[1].split("、")):
result["errors"].append(f"违反规则: {rule}")
result["valid"] = False
return result
def get_feature_vector(self, title: str) -> Dict[str, float]:
"""
获取标题在该平台的特征向量
参数:
title: 标题文本
返回:
Dict: 特征向量
"""
features = {
"length_score": min(1.0, len(title) / self.title_max_len),
"keyword_count": sum(1 for k in self.keywords if k in title) / max(len(self.keywords), 1),
"avoid_word_count": sum(1 for w in self.avoid_words if w in title),
"has_question": 1.0 if "?" in title or "?" in title else 0.0,
"has_number": 1.0 if any(c.isdigit() for c in title) else 0.0,
"has_emotion": 1.0 if any(e in title for e in ["!", "!", "太", "最", "必", "一定"]) else 0.0,
"has_special_char": 1.0 if any(c in title for c in ["【", "】", "[", "]", "《", "》"]) else 0.0
}
return features
class PlatformRulesManager: """ 平台规则管理器
职责:
1. 加载平台配置
2. 管理平台规则
3. 提供规则查询接口
"""
def __init__(self, config_path: str = "data/platform_config.json"):
self.config_path = config_path
self.platforms: Dict[Platform, PlatformConfig] = {}
self._load_default_configs()
def _load_default_configs(self):
"""加载默认平台配置"""
# 微信公众号配置
wechat_config = PlatformConfig(
platform=Platform.WECHAT,
name="微信公众号",
title_max_len=64,
title_min_len=5,
keywords=["揭秘", "深度", "重磅", "独家", "解析", "盘点", "指南", "必读"],
avoid_words=["震惊", "疯传", "速看", "紧急", "刚刚", "爆料"],
emotion_weight=0.3,
curiosity_weight=0.4,
trend_weight=0.2,
action_weight=0.1,
result_weight=0.2,
special_rules=[
"不能包含过多表情符号",
"不能包含诱导分享词汇如'不转不是中国人'"
]
)
# 小红书配置
xhs_config = PlatformConfig(
platform=Platform.XIAOHONGSHU,
name="小红书",
title_max_len=20,
title_min_len=4,
keywords=["种草", "安利", "亲测", "宝藏", "绝绝子", "yyds", "真实", "分享"],
avoid_words=["广告", "推广", "代购", "购买", "链接", "微信"],
emotion_weight=0.5,
curiosity_weight=0.3,
trend_weight=0.3,
action_weight=0.2,
result_weight=0.2,
special_rules=[
"不能包含硬广词汇",
"需要符合社区氛围,避免过于营销化"
]
)
# 抖音配置
douyin_config = PlatformConfig(
platform=Platform.DOUYIN,
name="抖音",
title_max_len=30,
title_min_len=3,
keywords=["挑战", "教程", "干货", "必看", "神级", "逆天", "绝了", "爆款"],
avoid_words=["点击", "链接", "关注", "私信", "加V", "电话"],
emotion_weight=0.4,
curiosity_weight=0.3,
trend_weight=0.3,
action_weight=0.4,
result_weight=0.3,
special_rules=[
"需要强行动号召",
"需要明确结果预期"
]
)
# B站配置
bilibili_config = PlatformConfig(
platform=Platform.BILIBILI,
name="哔哩哔哩",
title_max_len=40,
title_min_len=4,
keywords=["硬核", "科普", "整活", "教学", "测评", "实况", "二创", "鬼畜"],
avoid_words=["广告", "推广", "恰饭", "充值", "氪金"],
emotion_weight=0.3,
curiosity_weight=0.3,
trend_weight=0.2,
action_weight=0.1,
result_weight=0.1,
special_rules=[
"需要符合社区文化",
"避免过于标题党"
]
)
# 知乎配置
zhihu_config = PlatformConfig(
platform=Platform.ZHIHU,
name="知乎",
title_max_len=50,
title_min_len=6,
keywords=["如何看待", "为什么", "如何评价", "有哪些", "求推荐", "分析", "总结"],
avoid_words=["震惊", "必看", "速成", "躺赚", "暴富"],
emotion_weight=0.2,
curiosity_weight=0.4,
trend_weight=0.2,
action_weight=0.1,
result_weight=0.1,
special_rules=[
"需要问题导向",
"需要理性客观"
]
)
self.platforms = {
Platform.WECHAT: wechat_config,
Platform.XIAOHONGSHU: xhs_config,
Platform.DOUYIN: douyin_config,
Platform.BILIBILI: bilibili_config,
Platform.ZHIHU: zhihu_config
}
def get_platform(self, platform: Platform) -> Optional[PlatformConfig]:
"""获取指定平台的配置"""
return self.platforms.get(platform)
def get_all_platforms(self) -> List[PlatformConfig]:
"""获取所有平台配置"""
return list(self.platforms.values())
def save_configs(self):
"""保存配置到文件"""
configs_dict = {}
for platform, config in self.platforms.items():
configs_dict[platform.value] = {
"name": config.name,
"title_max_len": config.title_max_len,
"title_min_len": config.title_min_len,
"keywords": config.keywords,
"avoid_words": config.avoid_words,
"emotion_weight": config.emotion_weight,
"curiosity_weight": config.curiosity_weight,
"trend_weight": config.trend_weight,
"action_weight": config.action_weight,
"result_weight": config.result_weight,
"special_rules": config.special_rules
}
with open(self.config_path, 'w', encoding='utf-8') as f:
json.dump(configs_dict, f, ensure_ascii=False, indent=2)
"services/analyzer_engine.py" - 分析引擎
""" 分析引擎模块 功能:核心标题分析逻辑 核心概念:基于规则和模型的标题质量评估 """
from typing import Dict, List, Tuple, Optional from dataclasses import dataclass import re from collections import Counter
from models.title_score import TitleScore, DimensionScore, ScoreDimension from models.platform_rules import PlatformRulesManager, Platform from utils.text_processor import TextProcessor
@dataclass class AnalysisContext: """分析上下文""" title: str platform: Platform platform_config: PlatformConfig text_features: Dict hot_words: List[str]
class AnalyzerEngine: """ 标题分析引擎
职责:
1. 协调各维度分析器
2. 聚合分析结果
3. 生成综合评分
设计模式:
- 策略模式: 每个维度使用独立的分析策略
- 装饰器模式: 可叠加不同的分析增强器
"""
def __init__(self, rules_manager: PlatformRulesManager):
self.rules_manager = rules_manager
self.text_processor = TextProcessor()
self._dimension_analyzers = {
ScoreDimension.ATTRACTION: self._analyze_attraction,
ScoreDimension.RELEVANCE: self._analyze_relevance,
ScoreDimension.SPREADABILITY: self._analyze_spreadability,
ScoreDimension.PLATFORM_FIT: self._analyze_platform_fit,
ScoreDimension.ORIGINALITY: self._analyze_originality,
ScoreDimension.TIMELINESS: self._analyze_timeliness
}
def analyze(self, title: str, platform: Platform = Platform.WECHAT,
content_keywords: List[str] = None) -> TitleScore:
"""
执行完整的标题分析
参数:
title: 待分析的标题
platform: 目标平台
content_keywords: 内容关键词(用于相关性分析)
返回:
TitleScore: 完整的评分结果
"""
# 准备分析上下文
platform_config = self.rules_manager.get_platform(platform)
if not platform_config:
raise ValueError(f"不支持的平台: {platform}")
text_features = self.text_processor.extract_features(title)
hot_words = self._extract_hot_words(title, platform)
context = AnalysisContext(
title=title,
platform=platform,
platform_config=platform_config,
text_features=text_features,
hot_words=hot_words
)
# 执行各维度分析
score = TitleScore(
title=title,
platform=platform.value
)
for dimension, analyzer in self._dimension_analyzers.items():
try:
dim_score = analyzer(context, content_keywords or [])
score.scores[dimension] = dim_score
except Exception as e:
# 如果某个维度分析失败,给个基础分
score.scores[dimension] = DimensionScore(
dimension=dimension,
score=50.0,
details=f"分析异常: {str(e)}",
suggestions=["请联系技术支持"]
)
# 计算综合得分
score.calculate_total()
return score
def _analyze_attraction(self, context: AnalysisContext,
content_keywords: List[str]) -> DimensionScore:
"""
分析标题吸引力
评估要点:
利用AI解决实际问题,如果你觉得这个工具好用,欢迎关注长安牧笛!