给掘金做了一个数据统计分析工具 Plus 版

10,071 阅读4分钟

本文已参与好文召集令活动,点击查看:后端、大前端双赛道投稿,2万元奖池等你挑战!

本文只讲分析,数据来源请看 给掘金做了一个数据统计分析工具

前言

在原有基础上增加如下功能

  1. 今日升级名单
  2. 近3日7日30日升级名单
  3. 近3日7日30日浏览Top10
  4. 近3日7日30日获赞Top10
  5. 热门作者,每日数据折线图

看效果

  1. 今日升级名单 image.png

  2. 近3日7日30日升级名单 image.png

  3. 近3日7日30日浏览Top10 image.png

  4. 近3日7日30日获赞Top10 image.png

  5. 热门作者,每日数据折线图   犹豫不能嵌入html 想看实际效果 ->点我 image.png

分析数据

代码例子使用的还是 scala

主要代码都有注释,有疑问欢迎评论提问

想要数据,可以评论找我要,也可以自己采集(给掘金做了一个数据统计分析工具


import cn.hutool.core.io.IoUtil
import cn.hutool.core.lang.TypeReference
import cn.hutool.json.JSONUtil
import com.yeting.juejin.JueLI.Author

import java.io.{FileInputStream, FileOutputStream}
import java.lang.reflect.Type
import java.nio.charset.StandardCharsets
import java.time.{LocalDate, LocalDateTime}
import java.time.format.DateTimeFormatter
import java.util
import scala.collection.JavaConverters._
import scala.collection.{immutable, mutable}
import scala.math.Ordering

object J {

    val dateFormat = DateTimeFormatter.ofPattern("yyyyMMddHHmm")
    val yyyyMMdd = DateTimeFormatter.ofPattern("yyyyMMdd")
    val dateFormatOut = DateTimeFormatter.ofPattern("MM-dd HH:mm")

    val map: mutable.Map[String, List[Author]] = mutable.ListMap()

    def main(args: Array[String]): Unit = {
        //加载数据
        load()
        //top榜单
        top
        //图表
        userReport
    }

    private def userReport = {
        //按照每天分组
        val dayGroup = map.toList
            .map(t => {
                (LocalDateTime.parse(t._1, dateFormat).format(yyyyMMdd), t._2)
            })
            .groupBy(_._1)

        //取到表格下面的日期
        val xAxisdata = dayGroup.keys.toList.sortBy(t => t.toInt).map(t => s"'${t}'").mkString(",")

        //这里是 用户角度 转换成 每一天多少赞
        val userGroup = dayGroup.flatMap(t => {
            val authors: immutable.Iterable[(String, String, String, Int)] = t._2
                .flatMap(t => {
                    t._2
                })
                .groupBy(_.getUser_id)
                .map(m => {
                    val authorList: List[Author] = m._2.sortBy(_.getTime)
                    (m._1, t._1, authorList.head.getUser_name, authorList.last.getGot_digg_count.toInt - authorList.head.getGot_digg_count.toInt)
                })
            authors
        })
            .groupBy(_._1)
            //这里必须过滤一些,不然人太多了,直接爆炸
            .filter(
                m => {
                    m._2.map(t => {
                        t._4
                    }).sum > 50
                })
            .values
            //这里排序,方便表格好找
            .toList.sortBy(_.map(t => t._4).sum)(Ordering.Int.reverse)

        //取到表格展示的所有用户
        val legendData = userGroup.map(t => s"'${t.head._3}'").mkString(",")

        //组装表格每行数据
        val series = userGroup
            .map(t => {
                s"""
                   |{
                   |    name: '${t.head._3}',
                   |    type: 'line',
                   |    data: [${t.toList.sortBy(_._2).map(_._4).mkString(",")}]
                   |}
                   |""".stripMargin
            }).mkString(",")

        //组装html
        val html =
            s"""
               |<!DOCTYPE html>
               |<html style="height: 100%">
               |	<head>
               |		<meta charset="utf-8">
               |	</head>
               |	<body style="height: 100%; margin: 0">
               |		<div id="container" style="height: 100%"></div>
               |		<script type="text/javascript" src="https://cdn.jsdelivr.net/npm/echarts@5/dist/echarts.min.js"></script>
               |		<script type="text/javascript">
               |			var dom = document.getElementById("container");
               |			var myChart = echarts.init(dom);
               |			var app = {};
               |			var option;
               |			option = {
               |				title: {
               |					text: ''
               |				},
               |				tooltip: {
               |					trigger: 'axis'
               |				},
               |				legend: {
               |					data: [${legendData}]
               |				},
               |				grid: {
               |					left: '3%',
               |					right: '4%',
               |					bottom: '3%',
               |					containLabel: true
               |				},
               |				toolbox: {
               |					feature: {
               |						saveAsImage: {}
               |					}
               |				},
               |				xAxis: {
               |					type: 'category',
               |					boundaryGap: false,
               |					data: [${xAxisdata}]
               |				},
               |				yAxis: {
               |					type: 'value'
               |				},
               |				series: [
               |                 ${series}
               |				]
               |			};
               |			if (option && typeof option === 'object') {
               |				myChart.setOption(option);
               |			}
               |		</script>
               |	</body>
               |</html>
               |
               |""".stripMargin
        //存起来
        IoUtil.writeUtf8(new FileOutputStream("./111.html"), true, html)
    }

    private def top = {
        val res = map
            .values
            .flatten
            .groupBy(_.getUser_id)
            .map(m => {
                (m._1, m._2.toList.sortBy(_.getTime))
            })
            .map(m => {
                val allAuthorList: List[Author] = m._2

                val now = LocalDate.now()

                val day1List = allAuthorList.map(a => (LocalDateTime.parse(a.getTime, dateFormat).format(yyyyMMdd), a))
                    .groupBy(_._1)
                    .map(t => (t._1, t._2.map(_._2)))
                    .toList
                    .sortBy(_._1.toInt)

                //这里计算包含当天,不是今天计算昨天的
                val day_30 = day1List.filter(
                    ta =>
                        ta._1.toInt > now.minusDays(30).format(yyyyMMdd).toInt
                            && ta._1.toInt <= now.format(yyyyMMdd).toInt
                ).flatMap(_._2)
                val day_7 = day1List.filter(
                    ta =>
                        ta._1.toInt > now.minusDays(7).format(yyyyMMdd).toInt
                            && ta._1.toInt <= now.format(yyyyMMdd).toInt
                ).flatMap(_._2)
                val day_3 = day1List.filter(
                    ta =>
                        ta._1.toInt > now.minusDays(3).format(yyyyMMdd).toInt
                            && ta._1.toInt <= now.format(yyyyMMdd).toInt
                ).flatMap(_._2)
                val day_1 = day1List.filter(
                    ta => ta._1.toInt == now.format(yyyyMMdd).toInt
                ).flatMap(_._2)

                def report(authorList: List[Author]): (Int, Int, Int, String, Int, String, Boolean, String) = {
                    if (authorList.isEmpty) {
                        return (0, 0, 0, "", 0, "", false, "")
                    }

                    //总数
                    val day_got_digg_count = authorList.last.getGot_digg_count.toInt - authorList.head.getGot_digg_count.toInt
                    val day_got_view_count = authorList.last.getGot_view_count.toInt - authorList.head.getGot_view_count.toInt

                    //单时间段最高
                    var max_got_digg_count = 0;
                    var max_got_digg_count_time = ""
                    var max_got_view_count = 0
                    var max_got_view_count_time = ""
                    val authorListSliding = authorList.sliding(2, 2)
                    authorListSliding.foreach(l => {
                        val head = l.head
                        val last = l.last

                        val digg = last.getGot_digg_count.toInt - head.getGot_digg_count.toInt
                        if (digg > max_got_digg_count) {
                            max_got_digg_count = digg
                            max_got_digg_count_time = s"${getOutTime(head.getTime)} - ${getOutTime(last.getTime)}"
                        }

                        val view = last.getGot_view_count.toInt - head.getGot_view_count.toInt
                        if (view > max_got_view_count) {
                            max_got_view_count = view
                            max_got_view_count_time = s"${getOutTime(head.getTime)} - ${getOutTime(last.getTime)}"
                        }
                    })

                    //有无升级
                    val authors = authorList.sortBy(_.getLevel)
                    var level = false
                    var levelDesc = "无升级"
                    val headLevel = authors.head.getLevel.toInt
                    val lastLevel = authors.last.getLevel.toInt
                    if ((lastLevel - headLevel) != 0) {
                        level = true
                        levelDesc = s"${headLevel} 升到 ${lastLevel}"
                    }
                    (day_got_digg_count, day_got_view_count, max_got_digg_count, max_got_digg_count_time, max_got_view_count, max_got_view_count_time, level, levelDesc)
                }

                val (day_30_total_got_digg_count, day_30_total_got_view_count, day_30_max_got_digg_count, day_30_max_got_digg_count_time, day_30_max_got_view_count, day_30_max_got_view_count_time, day_30_level, day_30_levelDesc) = report(day_30)
                val (day_7_total_got_digg_count, day_7_total_got_view_count, day_7_max_got_digg_count, day_7_max_got_digg_count_time, day_7_max_got_view_count, day_7_max_got_view_count_time, day_7_level, day_7_levelDesc) = report(day_7)
                val (day_3_total_got_digg_count, day_3_total_got_view_count, day_3_max_got_digg_count, day_3_max_got_digg_count_time, day_3_max_got_view_count, day_3_max_got_view_count_time, day_3_level, day_3_levelDesc) = report(day_3)
                val (day_1_total_got_digg_count, day_1_total_got_view_count, day_1_max_got_digg_count, day_1_max_got_digg_count_time, day_1_max_got_view_count, day_1_max_got_view_count_time, day_1_level, day_1_levelDesc) = report(day_1)

                val head = allAuthorList.head
                (m._1, Map(
                    "user_name" -> head.getUser_name,
                    "user_id" -> head.getUser_id,
                    "day_30_total_got_digg_count" -> day_30_total_got_digg_count,
                    "day_30_total_got_view_count" -> day_30_total_got_view_count,
                    "day_30_max_got_digg_count" -> day_30_max_got_digg_count,
                    "day_30_max_got_digg_count_time" -> day_30_max_got_digg_count_time,
                    "day_30_max_got_view_count" -> day_30_max_got_view_count,
                    "day_30_max_got_view_count_time" -> day_30_max_got_view_count_time,
                    "day_30_level" -> day_30_level,
                    "day_30_levelDesc" -> day_30_levelDesc,

                    "day_7_total_got_digg_count" -> day_7_total_got_digg_count,
                    "day_7_total_got_view_count" -> day_7_total_got_view_count,
                    "day_7_max_got_digg_count" -> day_7_max_got_digg_count,
                    "day_7_max_got_digg_count_time" -> day_7_max_got_digg_count_time,
                    "day_7_max_got_view_count" -> day_7_max_got_view_count,
                    "day_7_max_got_view_count_time" -> day_7_max_got_view_count_time,
                    "day_7_level" -> day_7_level,
                    "day_7_levelDesc" -> day_7_levelDesc,

                    "day_3_total_got_digg_count" -> day_3_total_got_digg_count,
                    "day_3_total_got_view_count" -> day_3_total_got_view_count,
                    "day_3_max_got_digg_count" -> day_3_max_got_digg_count,
                    "day_3_max_got_digg_count_time" -> day_3_max_got_digg_count_time,
                    "day_3_max_got_view_count" -> day_3_max_got_view_count,
                    "day_3_max_got_view_count_time" -> day_3_max_got_view_count_time,
                    "day_3_level" -> day_3_level,
                    "day_3_levelDesc" -> day_3_levelDesc,

                    "day_1_total_got_digg_count" -> day_1_total_got_digg_count,
                    "day_1_total_got_view_count" -> day_1_total_got_view_count,
                    "day_1_max_got_digg_count" -> day_1_max_got_digg_count,
                    "day_1_max_got_digg_count_time" -> day_1_max_got_digg_count_time,
                    "day_1_max_got_view_count" -> day_1_max_got_view_count,
                    "day_1_max_got_view_count_time" -> day_1_max_got_view_count_time,
                    "day_1_level" -> day_1_level,
                    "day_1_levelDesc" -> day_1_levelDesc,

                ))
            })
        val list = res.values.toList

        println("\n-----------------今日获赞Top10------------------")
        printf("|%-12s\t|%-5s|\n", "用户", "总获赞")
        printf("|%-12s\t|%-5s|\n", "-" * 12, "-" * 5)
        list.sortBy(value => value("day_1_total_got_digg_count").asInstanceOf[Int])(Ordering.Int.reverse).take(10).foreach(value => {
            printf("|%-12s\t|%-5s|\n", value("user_name"), value("day_1_total_got_digg_count"))
        })
        println("\n-----------------近3日获赞Top10------------------")
        printf("|%-12s\t|%-5s|\n", "用户", "总获赞")
        printf("|%-12s\t|%-5s|\n", "-" * 12, "-" * 5)
        list.sortBy(value => value("day_3_total_got_digg_count").asInstanceOf[Int])(Ordering.Int.reverse).take(10).foreach(value => {
            printf("|%-12s\t|%-5s|\n", value("user_name"), value("day_3_total_got_digg_count"))
        })
        println("\n-----------------近7日获赞Top10------------------")
        printf("|%-12s\t|%-5s|\n", "用户", "总获赞")
        printf("|%-12s\t|%-5s|\n", "-" * 12, "-" * 5)
        list.sortBy(value => value("day_7_total_got_digg_count").asInstanceOf[Int])(Ordering.Int.reverse).take(10).foreach(value => {
            printf("|%-12s\t|%-5s|\n", value("user_name"), value("day_7_total_got_digg_count"))
        })
        println("\n-----------------近30日获赞Top10------------------")
        printf("|%-12s\t|%-5s|\n", "用户", "总获赞")
        printf("|%-12s\t|%-5s|\n", "-" * 12, "-" * 5)
        list.sortBy(value => value("day_30_total_got_digg_count").asInstanceOf[Int])(Ordering.Int.reverse).take(10).foreach(value => {
            printf("|%-12s\t|%-5s|\n", value("user_name"), value("day_30_total_got_digg_count"))
        })

        println("\n-----------------今日浏览Top10------------------")
        printf("|%-12s\t|%-5s|\n", "用户", "总浏览")
        printf("|%-12s\t|%-5s|\n", "-" * 12, "-" * 5)
        list.sortBy(value => value("day_1_total_got_view_count").asInstanceOf[Int])(Ordering.Int.reverse).take(10).foreach(value => {
            printf("|%-12s\t|%-5s|\n", value("user_name"), value("day_1_total_got_view_count"))
        })
        println("\n-----------------近3日浏览Top10------------------")
        printf("|%-12s\t|%-5s|\n", "用户", "总浏览")
        printf("|%-12s\t|%-5s|\n", "-" * 12, "-" * 5)
        list.sortBy(value => value("day_3_total_got_view_count").asInstanceOf[Int])(Ordering.Int.reverse).take(10).foreach(value => {
            printf("|%-12s\t|%-5s|\n", value("user_name"), value("day_3_total_got_view_count"))
        })

        println("\n-----------------今日单时间段获赞Top10------------------")
        printf("|%-12s\t|%-25s\t|%-5s|\n", "用户", "时间段", "获赞")
        printf("|%-12s\t|%-25s\t|%-5s|\n", "-" * 12, "-" * 25, "-" * 5)
        list.sortBy(value => value("day_1_max_got_digg_count").asInstanceOf[Int])(Ordering.Int.reverse).take(10).foreach(value => {
            printf("|%-12s\t|%-25s\t|%-5s|\n", value("user_name"), value("day_1_max_got_digg_count_time"), value("day_1_max_got_digg_count"))
        })

        println("\n-----------------今日单时间段浏览Top10------------------")
        printf("|%-12s\t|%-25s\t|%-5s|\n", "用户", "时间段", "获浏览")
        printf("|%-12s\t|%-25s\t|%-5s|\n", "-" * 12, "-" * 25, "-" * 5)
        list.sortBy(value => value("day_3_max_got_digg_count").asInstanceOf[Int])(Ordering.Int.reverse).take(10).foreach(value => {
            printf("|%-12s\t|%-25s\t|%-5s|\n", value("user_name"), value("day_3_max_got_digg_count_time"), value("day_3_max_got_digg_count"))
        })

        println("\n-----------------今日升级名单------------------")
        printf("|%-12s\t|%-5s|\n", "用户", "等级")
        printf("|%-12s\t|%-10s|\n", "-" * 12, "-" * 10)
        list.filter(value => value("day_1_level").asInstanceOf[Boolean]).foreach(value => {
            printf("|%-12s\t|%-5s|\n", value("user_name"), value("day_1_levelDesc"))
        })
        println("\n-----------------近3日升级名单------------------")
        printf("|%-12s\t|%-5s|\n", "用户", "等级")
        printf("|%-12s\t|%-10s|\n", "-" * 12, "-" * 10)
        list.filter(value => value("day_3_level").asInstanceOf[Boolean]).foreach(value => {
            printf("|%-12s\t|%-5s|\n", value("user_name"), value("day_3_levelDesc"))
        })
    }

    def load(): Unit = {
        List(
            "./j-20210701.json",
            "./j-20210702.json",
            "./j-20210703.json",
        ).foreach(path => {
            val lineList = new util.ArrayList[String]()
            IoUtil.readLines(new FileInputStream(path), StandardCharsets.UTF_8, lineList)
            lineList.forEach(line => {
                val type1: Type = new TypeReference[util.Map[String, util.List[Author]]] {}.getType
                val bean: util.Map[String, util.List[Author]] = JSONUtil.toBean(line, type1, true)
                bean.asScala.foreach(entry => map.put(entry._1, entry._2.asScala.toList))
            })
        })
    }

    def getOutTime(time: String): String = {
        LocalDateTime.parse(time, dateFormat).format(dateFormatOut)
    }

}