写程序自动分析朋友圈文案情绪,给出乐观/丧/搞笑标签,帮你调整社交形象,颠覆朋友圈靠感觉发。

5 阅读11分钟

朋友圈情绪分析系统 - 让社交形象更精准

一、实际应用场景描述

场景背景:

小李是一名数字文化艺术创新创业课程的学生,最近在准备一个"智能社交形象管理"项目。他发现自己在朋友圈发的内容经常收到不同反应:

  • 发"今天又加班到深夜,好累啊" → 朋友评论"别太拼了"
  • 发"新作品终于完成了!超有成就感" → 获得大量点赞
  • 发"室友把我的代码删了,我选择原地爆炸" → 大家都在哈哈哈

痛点分析:

  1. 发布时机尴尬:不知道什么情绪的内容适合什么时间发
  2. 形象不统一:今天丧气明天搞笑,个人品牌混乱
  3. 缺乏数据支撑:全凭感觉发,无法优化内容策略
  4. 错失传播机会:积极内容被埋没,负面情绪影响形象

解决方案:

开发一个基于Python的朋友圈情绪分析系统,结合数字艺术中的情感计算技术,自动分析文案情绪,给出标签建议,帮助调整社交形象。

二、核心逻辑讲解

┌─────────────────────────────────────────────────────────────┐ │ 朋友圈情绪分析系统 │ ├─────────────────────────────────────────────────────────────┤ │ 输入: 朋友圈文案 (text) │ │ ↓ │ │ ┌─────────────────────────────────────────┐ │ │ │ 1. 文本预处理模块 │ │ │ │ - 去除特殊符号、表情 │ │ │ │ - 分词处理 (jieba) │ │ │ │ - 停用词过滤 │ │ │ └─────────────────────────────────────────┘ │ │ ↓ │ │ ┌─────────────────────────────────────────┐ │ │ │ 2. 特征提取模块 │ │ │ │ - TF-IDF 向量化 │ │ │ │ - 情感词典匹配 │ │ │ │ - 关键词权重计算 │ │ │ └─────────────────────────────────────────┘ │ │ ↓ │ │ ┌─────────────────────────────────────────┐ │ │ │ 3. 情绪分类引擎 │ │ │ │ - 朴素贝叶斯分类器 │ │ │ │ - 规则引擎兜底 │ │ │ │ - 置信度计算 │ │ │ └─────────────────────────────────────────┘ │ │ ↓ │ │ 输出: {标签: "乐观", 置信度: 0.87, 建议: "适合工作日中午发布"}│ └─────────────────────────────────────────────────────────────┘

三、完整代码实现

项目结构

emotion_analyzer/ ├── main.py # 主程序入口 ├── preprocessor.py # 文本预处理模块 ├── feature_extractor.py # 特征提取模块 ├── classifier.py # 情绪分类引擎 ├── utils.py # 工具函数 ├── sentiment_dict.json # 情感词典 ├── README.md # 项目说明 └── requirements.txt # 依赖清单

  1. requirements.txt

jieba==0.42.1 scikit-learn==1.3.0 numpy==1.24.3 pandas==2.0.3

  1. sentiment_dict.json(情感词典)

{ "positive_words": [ "开心", "快乐", "幸福", "棒", "赞", "厉害", "完美", "优秀", "惊喜", "感动", "温暖", "美好", "成功", "突破", "进步", "成就", "喜欢", "爱", "期待", "兴奋", "自豪", "骄傲", "满足", "充实", "阳光", "积极", "向上", "加油", "奋斗", "努力", "拼搏", "坚持", "感恩", "感谢", "幸运", "顺利", "圆满", "精彩", "美妙", "绝妙" ], "negative_words": [ "难过", "伤心", "失望", "累", "疲惫", "崩溃", "绝望", "无助", "愤怒", "生气", "烦躁", "焦虑", "抑郁", "孤独", "空虚", "迷茫", "失败", "挫折", "倒霉", "糟糕", "痛苦", "煎熬", "折磨", "难受", "讨厌", "恨", "后悔", "遗憾", "无奈", "无力", "放弃", "躺平", "压力", "负担", "重担", "苦涩", "心酸", "委屈", "憋屈", "郁闷" ], "humor_words": [ "哈哈", "嘿嘿", "嘻嘻", "笑死", "爆笑", "逗比", "沙雕", "梗", "神操作", "离谱", "绝了", "666", "yyds", "牛皮", "秀儿", "人才", "脑洞", "反转", "神转折", "猝不及防", "意想不到", "哭笑不得", "尴尬", "社死", "翻车", "打脸", "大型", "现场", "名场面", "表情包" ], "time_suggestions": { "optimistic": ["工作日中午12:00-13:30", "周五晚上18:00-21:00", "周末上午10:00-11:30"], "negative": ["深夜22:00-23:30", "雨天独处时"], "humorous": ["周五下午17:00-18:00", "周末聚餐前", "节假日全天"] } }

  1. utils.py(工具函数)

""" 工具函数模块 - 提供通用的辅助功能 作者:AI助手 用途:支持朋友圈情绪分析系统的通用工具 """

import json from datetime import datetime

def load_sentiment_dict(filepath='sentiment_dict.json'): """ 加载情感词典

Args:
    filepath: 词典文件路径
    
Returns:
    dict: 包含三类词汇和发布时间建议的字典
"""
try:
    with open(filepath, 'r', encoding='utf-8') as f:
        return json.load(f)
except FileNotFoundError:
    print(f"警告:未找到词典文件 {filepath},使用内置词典")
    return get_default_dict()

def get_default_dict(): """返回默认的情感词典""" return { "positive_words": ["开心", "快乐", "幸福", "棒", "赞", "厉害"], "negative_words": ["难过", "伤心", "失望", "累", "疲惫", "崩溃"], "humor_words": ["哈哈", "嘿嘿", "笑死", "爆笑", "逗比", "沙雕"], "time_suggestions": { "optimistic": ["工作日中午", "周五晚上"], "negative": ["深夜时分"], "humorous": ["周五下午", "周末时间"] } }

def calculate_confidence(scores): """ 计算分类置信度

Args:
    scores: 各类别得分字典
    
Returns:
    float: 置信度值 (0-1)
"""
total = sum(scores.values())
if total == 0:
    return 0.0
max_score = max(scores.values())
return round(max_score / total, 2)

def get_current_time_suggestion(label, time_suggestions): """ 根据当前时间获取发布建议

Args:
    label: 情绪标签
    time_suggestions: 时间建议字典
    
Returns:
    str: 当前是否适合发布的建议
"""
current_hour = datetime.now().hour

if label == "乐观":
    good_hours = [12, 13, 18, 19, 20]
elif label == "丧":
    good_hours = [22, 23]
else:  # 搞笑
    good_hours = [17, 18, 19, 20, 21]

if current_hour in good_hours:
    return f"🎯 当前时间({current_hour}:00)很适合发布{label}内容!"
else:
    return f"⏰ 建议等到 {', '.join([f'{h}:00' for h in good_hours])} 再发布"

class EmotionResult: """情绪分析结果类"""

def __init__(self, text, label, confidence, scores, suggestion):
    self.text = text
    self.label = label
    self.confidence = confidence
    self.scores = scores
    self.suggestion = suggestion

def to_dict(self):
    """转换为字典格式"""
    return {
        "text": self.text,
        "label": self.label,
        "confidence": self.confidence,
        "scores": self.scores,
        "suggestion": self.suggestion
    }

def __str__(self):
    return f"""

╔══════════════════════════════════════╗ ║ 📱 朋友圈情绪分析结果 ║ ╠══════════════════════════════════════╣ ║ 原文: {self.text[:30]}{'...' if len(self.text) > 30 else ''} ║ 标签: 【{self.label}】 (置信度: {self.confidence * 100}%) ║ 得分详情: ║ 😊 乐观: {self.scores.get('乐观', 0)} ║ 😢 丧: {self.scores.get('丧', 0)} ║ 😂 搞笑: {self.scores.get('搞笑', 0)} ║ 建议: {self.suggestion} ╚══════════════════════════════════════╝ """

  1. preprocessor.py(文本预处理模块)

""" 文本预处理模块 - 负责文本的清洗和标准化 作者:AI助手 用途:为情绪分析准备干净的文本数据 """

import re import jieba from utils import load_sentiment_dict

class TextPreprocessor: """文本预处理器"""

def __init__(self):
    """初始化预处理器"""
    self.stopwords = self._load_stopwords()
    self.sentiment_dict = load_sentiment_dict()
    
def _load_stopwords(self):
    """
    加载停用词表
    
    Returns:
        set: 停用词集合
    """
    stopwords = set([
        '的', '了', '在', '是', '我', '有', '和', '就', '不', '人',
        '都', '一', '一个', '上', '也', '很', '到', '说', '要', '去',
        '你', '会', '着', '没有', '看', '好', '自己', '这', '那', '他',
        '她', '它', '我们', '你们', '他们', '什么', '怎么', '为什么',
        '这个', '那个', '这些', '那些', '啊', '呀', '哦', '嗯', '吧'
    ])
    return stopwords

def clean_text(self, text):
    """
    清洗文本:去除特殊符号、表情、多余空格
    
    Args:
        text: 原始文本
        
    Returns:
        str: 清洗后的文本
    """
    # 去除URL
    text = re.sub(r'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\\(\\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+', '', text)
    
    # 去除@用户名
    text = re.sub(r'@[\w\u4e00-\u9fa5]+', '', text)
    
    # 去除话题标签
    text = re.sub(r'#[\w\u4e00-\u9fa5]+#', '', text)
    
    # 保留中文、英文、数字和基本标点
    text = re.sub(r'[^\u4e00-\u9fa5a-zA-Z0-9,。!?、;:""''()\s]', '', text)
    
    # 去除多余空格和换行
    text = re.sub(r'\s+', ' ', text).strip()
    
    return text

def tokenize(self, text):
    """
    分词处理
    
    Args:
        text: 清洗后的文本
        
    Returns:
        list: 分词结果列表
    """
    words = jieba.lcut(text)
    return words

def remove_stopwords(self, words):
    """
    去除停用词
    
    Args:
        words: 分词列表
        
    Returns:
        list: 去除停用词后的列表
    """
    return [word for word in words if word not in self.stopwords and len(word) > 1]

def preprocess(self, text):
    """
    完整的预处理流程
    
    Args:
        text: 原始文本
        
    Returns:
        tuple: (清洗后的文本, 分词列表)
    """
    cleaned_text = self.clean_text(text)
    tokens = self.tokenize(cleaned_text)
    filtered_tokens = self.remove_stopwords(tokens)
    
    return cleaned_text, filtered_tokens

def extract_emotion_words(self, tokens):
    """
    从分词中提取情感词汇
    
    Args:
        tokens: 分词列表
        
    Returns:
        dict: 各类情感词汇计数
    """
    emotion_counts = {
        'positive': 0,
        'negative': 0,
        'humor': 0
    }
    
    positive_set = set(self.sentiment_dict['positive_words'])
    negative_set = set(self.sentiment_dict['negative_words'])
    humor_set = set(self.sentiment_dict['humor_words'])
    
    for token in tokens:
        if token in positive_set:
            emotion_counts['positive'] += 1
        elif token in negative_set:
            emotion_counts['negative'] += 1
        elif token in humor_set:
            emotion_counts['humor'] += 1
            
    return emotion_counts

5. feature_extractor.py(特征提取模块)

""" 特征提取模块 - 将文本转换为机器学习可用的特征向量 作者:AI助手 用途:提取文本的关键特征用于情绪分类 """

import numpy as np from sklearn.feature_extraction.text import TfidfVectorizer from collections import Counter from preprocessor import TextPreprocessor

class FeatureExtractor: """特征提取器"""

def __init__(self):
    """初始化特征提取器"""
    self.preprocessor = TextPreprocessor()
    self.tfidf_vectorizer = None
    self.vocabulary = []
    
def build_vocabulary(self, texts):
    """
    构建词汇表
    
    Args:
        texts: 文本列表
        
    Returns:
        list: 词汇表
    """
    all_words = []
    for text in texts:
        _, tokens = self.preprocessor.preprocess(text)
        all_words.extend(tokens)
    
    # 统计词频,取前500个高频词
    word_counts = Counter(all_words)
    self.vocabulary = [word for word, count in word_counts.most_common(500)]
    
    return self.vocabulary

def extract_tfidf_features(self, texts, fit=True):
    """
    提取TF-IDF特征
    
    Args:
        texts: 文本列表
        fit: 是否重新训练向量器
        
    Returns:
        numpy.ndarray: TF-IDF特征矩阵
    """
    if fit or self.tfidf_vectorizer is None:
        self.tfidf_vectorizer = TfidfVectorizer(
            vocabulary=self.vocabulary,
            max_features=300,
            ngram_range=(1, 2),  # 包含单字和双字词组
            min_df=1
        )
        features = self.tfidf_vectorizer.fit_transform(texts)
    else:
        features = self.tfidf_vectorizer.transform(texts)
        
    return features.toarray()

def extract_rule_features(self, text):
    """
    提取基于规则的手动特征
    
    Args:
        text: 原始文本
        
    Returns:
        dict: 手工特征字典
    """
    cleaned_text, tokens = self.preprocessor.preprocess(text)
    emotion_counts = self.preprocessor.extract_emotion_words(tokens)
    
    # 计算各种统计特征
    features = {
        'text_length': len(cleaned_text),
        'token_count': len(tokens),
        'positive_score': emotion_counts['positive'],
        'negative_score': emotion_counts['negative'],
        'humor_score': emotion_counts['humor'],
        'exclamation_count': text.count('!') + text.count('!'),
        'question_count': text.count('?') + text.count('?'),
        'emoji_count': len(re.findall(r'[\U0001F600-\U0001F64F]', text)),
        'punctuation_ratio': self._calculate_punctuation_ratio(text),
        'repeat_char_ratio': self._calculate_repeat_ratio(text)
    }
    
    return features

def _calculate_punctuation_ratio(self, text):
    """计算标点符号比例"""
    punctuation = ',。!?、;:""''().,!?;:\'"()'
    punct_count = sum(1 for char in text if char in punctuation)
    return punct_count / len(text) if text else 0

def _calculate_repeat_ratio(self, text):
    """计算重复字符比例(如'哈哈哈')"""
    repeat_pattern = r'(.)\1{2,}'
    repeats = re.findall(repeat_pattern, text)
    return len(repeats) / len(text) if text else 0

def combine_features(self, tfidf_features, rule_features_list):
    """
    合并TF-IDF特征和规则特征
    
    Args:
        tfidf_features: TF-IDF特征矩阵
        rule_features_list: 规则特征列表
        
    Returns:
        numpy.ndarray: 合并后的特征矩阵
    """
    rule_matrix = np.array(list(rule_features_list))
    
    # 标准化规则特征
    rule_means = rule_matrix.mean(axis=0)
    rule_stds = rule_matrix.std(axis=0) + 1e-6
    rule_matrix = (rule_matrix - rule_means) / rule_stds
    
    # 合并特征
    combined = np.hstack([tfidf_features, rule_matrix])
    
    return combined

6. classifier.py(情绪分类引擎)

""" 情绪分类引擎 - 使用机器学习和规则结合的方法进行分类 作者:AI助手 用途:对朋友圈文案进行情绪分类 """

import numpy as np from collections import defaultdict from sklearn.naive_bayes import MultinomialNB from sklearn.model_selection import train_test_split from sklearn.metrics import accuracy_score, classification_report from feature_extractor import FeatureExtractor from preprocessor import TextPreprocessor from utils import EmotionResult, get_current_time_suggestion, load_sentiment_dict

class EmotionClassifier: """情绪分类器"""

def __init__(self, use_ml=True):
    """
    初始化分类器
    
    Args:
        use_ml: 是否使用机器学习模型
    """
    self.use_ml = use_ml
    self.extractor = FeatureExtractor()
    self.preprocessor = TextPreprocessor()
    self.sentiment_dict = load_sentiment_dict()
    
    # 机器学习模型
    self.nb_classifier = MultinomialNB(alpha=0.1)
    self.is_trained = False
    
    # 标签映射
    self.label_map = {0: '乐观', 1: '丧', 2: '搞笑'}
    self.reverse_label_map = {'乐观': 0, '丧': 1, '搞笑': 2}
    
def prepare_training_data(self):
    """
    准备训练数据(模拟数据,实际应用应使用标注数据)
    
    Returns:
        tuple: (文本列表, 标签列表)
    """
    training_texts = [
        # 乐观类
        "今天终于完成了项目,超级有成就感!感谢团队的努力💪",
        "新的一天开始了,阳光真好,心情美美哒~",
        "考试通过了!三个月的努力没有白费,太开心了!",
        "收到了理想大学的录取通知书,梦想成真啦🎉",
        "今天的夕阳特别美,生活处处有惊喜",
        "健身打卡第100天,体重下降10斤,为自己骄傲!",
        "创业项目拿到天使轮融资,感谢所有支持的人",
        "学会了弹吉他,第一次弹奏就很流畅,成就感爆棚",
        
        # 丧类
        "今天又被老板骂了,感觉做什么都不对...",
        "加班到凌晨两点,身心俱疲,看不到希望",
        "喜欢的女孩拒绝了我,整个人都不好了",
        "论文被拒稿了,三年的心血就这么没了",
        "房租又涨了,工资却没变,压力山大",
        "生病发烧躺在床上,觉得自己好没用",
        "投了50份简历都没回音,是不是我不行了",
        "室友又把我的东西弄坏了,真的很烦很难过",
        
        # 搞笑类
        "刚才差点把自己绊倒,还好反应快,不然就是大型社死现场😂",
        "室友说我睡觉打呼像拖拉机,我觉得他在夸我动力十足",
        "点了外卖备注不要香菜,结果送来一盘香菜拌饭哈哈哈",
        "试图早起跑步,结果睡过头到中午,计划再次宣告失败🤣",
        "朋友问我为什么单身,我说我在等一个像WiFi一样的人",
        "减肥第一天就破功,吃了三个汉堡安慰自己",
        "上课偷吃零食被老师发现,全班都在憋笑",
        "自拍修了半小时,发朋友圈两分钟,然后开始删照片"
    ]
    
    training_labels = [0, 0, 0, 0, 0, 0, 0, 0,  # 8个乐观
                     1, 1, 1, 1, 1, 1, 1, 1,  # 8个丧
                     2, 2, 2, 2, 2, 2, 2, 2]  # 8个搞笑
    
    return training_texts, training_labels

def train(self):
    """
    训练分类模型
    """
    print("🔄 正在准备训练数据...")
    texts, labels = self.prepare_training_data()
    
    print("📊 正在提取特征...")
    # 构建词汇表
    self.extractor.build_vocabulary(texts)
    
    # 提取TF-IDF特征
    tfidf_features = self.extractor.extract_tfidf_features(texts)
    
    # 提取规则特征
    rule_features = [self.extractor.extract_rule_features(text) for text in texts]
    
    # 合并特征
    X = self.extractor.combine_features(tfidf_features, rule_features)
    y = np.array(labels)
    
    # 划分训练集和测试集
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.2, random_state=42, stratify=y
    )
    
    print("🧠 正在训练朴素贝叶斯分类器...")
    self.nb_classifier.fit(X_train, y_train)
    
    # 评估模型
    y_pred = self.nb_classifier.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    print(f"✅ 模型训练完成!测试准确率: {accuracy:.2%}")
    print("\n📋 详细评估报告:")
    print(classification_report(y_test, y_pred, target_names=['乐观', '丧', '搞笑']))
    
    self.is_trained = True
    
def classify_with_rules(self, text):
    """
    使用规则引擎进行分类(兜底方案)
    
    Args:
        text: 待分类文本
        
    Returns:
        dict: 各类别得分
    """
    cleaned_text, tokens = self.preprocessor.preprocess(text)
    emotion_counts = self.preprocessor.extract_emotion_words(tokens)
    
    # 获取词典权重
    positive_words = self.sentiment_dict['positive_words']
    negative_words = self.sentiment_dict['negative_words']
    humor_words = self.sentiment_dict['humor_words']
    
    # 计算加权得分
    positive_score = emotion_counts['positive'] * 2
    negative_score = emotion_counts['negative'] * 2
    humor_score = emotion_counts['humor'] * 1.5
    
    # 添加词典权重加成
    for token in tokens:
        if token in positive_words:
            positive_score += 1
        elif token in negative_words:
            negative_score += 1
        elif token in humor_words:
            humor_score += 1
    
    # 检查特定模式
    if any(pattern in text for pattern in ['哈哈', '嘿嘿', '笑死', '😂', '🤣']):
        humor_score += 3
        
    if any(pattern in text for pattern in ['唉', '呜', '😢', '😭', '心碎']):
        negative_score += 2
        
    if any(pattern in text for pattern in ['开心', '快乐', '棒', '赞', '🌟', '🎉']):
        positive_score += 2
        
    # 归一化处理
    total = positive_score + negative_score + humor_score + 1  # +1避免除零
    
    scores = {
        '乐观': round(positive_score / total, 2),
        '丧': round(negative_score / total, 2),
        '搞笑': round(humor_score / total, 2)
    }
    
    return scores

def predict(self, text):
    """
    预测文本的情绪标签
    
    Args:
        text: 待分析文本
        
    Returns:
        EmotionResult: 分析结果对象
    """
    if not text.strip():
        return EmotionResult(
            text=text,
            label="未知",
            confidence=0.0,
            scores={'乐观': 0, '丧': 0, '搞笑': 0},
            suggestion="请输入有效文本"
        )
    
    # 使用规则引擎获取基础分数
    rule_scores = self.classify_with_rules(text)
    
    if self.use_ml and self.is_trained:
        try:
            # 提取特征
            cleaned_text, tokens = self.preprocessor.preprocess(text)
            tfidf_feature = self.extractor.extract_tfidf_features([text], fit=False)
            rule_feature = [self.extractor.extract_rule_features(text)]
            combined_feature = self.extractor.combine_features(tfidf_feature, rule_feature)
            
            # ML预测
            ml_proba = self.nb_classifier.predict_proba(combined_feature)[0]
            
            # 融合规则和ML结果

利用AI解决实际问题,如果你觉得这个工具好用,欢迎关注长安牧笛!