本文已参与好文召集令活动,点击查看:后端、大前端双赛道投稿,2万元奖池等你挑战!
本文只讲分析,数据来源请看 给掘金做了一个数据统计分析工具
前言
在原有基础上增加如下功能
- 今日升级名单
- 近3日7日30日升级名单
- 近3日7日30日浏览Top10
- 近3日7日30日获赞Top10
- 热门作者,每日数据折线图
看效果
-
今日升级名单
-
近3日7日30日升级名单
-
近3日7日30日浏览Top10
-
近3日7日30日获赞Top10
-
热门作者,每日数据折线图 犹豫不能嵌入html 想看实际效果 ->点我
分析数据
代码例子使用的还是 scala
主要代码都有注释,有疑问欢迎评论提问
想要数据,可以评论找我要,也可以自己采集(给掘金做了一个数据统计分析工具)
import cn.hutool.core.io.IoUtil
import cn.hutool.core.lang.TypeReference
import cn.hutool.json.JSONUtil
import com.yeting.juejin.JueLI.Author
import java.io.{FileInputStream, FileOutputStream}
import java.lang.reflect.Type
import java.nio.charset.StandardCharsets
import java.time.{LocalDate, LocalDateTime}
import java.time.format.DateTimeFormatter
import java.util
import scala.collection.JavaConverters._
import scala.collection.{immutable, mutable}
import scala.math.Ordering
object J {
val dateFormat = DateTimeFormatter.ofPattern("yyyyMMddHHmm")
val yyyyMMdd = DateTimeFormatter.ofPattern("yyyyMMdd")
val dateFormatOut = DateTimeFormatter.ofPattern("MM-dd HH:mm")
val map: mutable.Map[String, List[Author]] = mutable.ListMap()
def main(args: Array[String]): Unit = {
//加载数据
load()
//top榜单
top
//图表
userReport
}
private def userReport = {
//按照每天分组
val dayGroup = map.toList
.map(t => {
(LocalDateTime.parse(t._1, dateFormat).format(yyyyMMdd), t._2)
})
.groupBy(_._1)
//取到表格下面的日期
val xAxisdata = dayGroup.keys.toList.sortBy(t => t.toInt).map(t => s"'${t}'").mkString(",")
//这里是 用户角度 转换成 每一天多少赞
val userGroup = dayGroup.flatMap(t => {
val authors: immutable.Iterable[(String, String, String, Int)] = t._2
.flatMap(t => {
t._2
})
.groupBy(_.getUser_id)
.map(m => {
val authorList: List[Author] = m._2.sortBy(_.getTime)
(m._1, t._1, authorList.head.getUser_name, authorList.last.getGot_digg_count.toInt - authorList.head.getGot_digg_count.toInt)
})
authors
})
.groupBy(_._1)
//这里必须过滤一些,不然人太多了,直接爆炸
.filter(
m => {
m._2.map(t => {
t._4
}).sum > 50
})
.values
//这里排序,方便表格好找
.toList.sortBy(_.map(t => t._4).sum)(Ordering.Int.reverse)
//取到表格展示的所有用户
val legendData = userGroup.map(t => s"'${t.head._3}'").mkString(",")
//组装表格每行数据
val series = userGroup
.map(t => {
s"""
|{
| name: '${t.head._3}',
| type: 'line',
| data: [${t.toList.sortBy(_._2).map(_._4).mkString(",")}]
|}
|""".stripMargin
}).mkString(",")
//组装html
val html =
s"""
|<!DOCTYPE html>
|<html style="height: 100%">
| <head>
| <meta charset="utf-8">
| </head>
| <body style="height: 100%; margin: 0">
| <div id="container" style="height: 100%"></div>
| <script type="text/javascript" src="https://cdn.jsdelivr.net/npm/echarts@5/dist/echarts.min.js"></script>
| <script type="text/javascript">
| var dom = document.getElementById("container");
| var myChart = echarts.init(dom);
| var app = {};
| var option;
| option = {
| title: {
| text: ''
| },
| tooltip: {
| trigger: 'axis'
| },
| legend: {
| data: [${legendData}]
| },
| grid: {
| left: '3%',
| right: '4%',
| bottom: '3%',
| containLabel: true
| },
| toolbox: {
| feature: {
| saveAsImage: {}
| }
| },
| xAxis: {
| type: 'category',
| boundaryGap: false,
| data: [${xAxisdata}]
| },
| yAxis: {
| type: 'value'
| },
| series: [
| ${series}
| ]
| };
| if (option && typeof option === 'object') {
| myChart.setOption(option);
| }
| </script>
| </body>
|</html>
|
|""".stripMargin
//存起来
IoUtil.writeUtf8(new FileOutputStream("./111.html"), true, html)
}
private def top = {
val res = map
.values
.flatten
.groupBy(_.getUser_id)
.map(m => {
(m._1, m._2.toList.sortBy(_.getTime))
})
.map(m => {
val allAuthorList: List[Author] = m._2
val now = LocalDate.now()
val day1List = allAuthorList.map(a => (LocalDateTime.parse(a.getTime, dateFormat).format(yyyyMMdd), a))
.groupBy(_._1)
.map(t => (t._1, t._2.map(_._2)))
.toList
.sortBy(_._1.toInt)
//这里计算包含当天,不是今天计算昨天的
val day_30 = day1List.filter(
ta =>
ta._1.toInt > now.minusDays(30).format(yyyyMMdd).toInt
&& ta._1.toInt <= now.format(yyyyMMdd).toInt
).flatMap(_._2)
val day_7 = day1List.filter(
ta =>
ta._1.toInt > now.minusDays(7).format(yyyyMMdd).toInt
&& ta._1.toInt <= now.format(yyyyMMdd).toInt
).flatMap(_._2)
val day_3 = day1List.filter(
ta =>
ta._1.toInt > now.minusDays(3).format(yyyyMMdd).toInt
&& ta._1.toInt <= now.format(yyyyMMdd).toInt
).flatMap(_._2)
val day_1 = day1List.filter(
ta => ta._1.toInt == now.format(yyyyMMdd).toInt
).flatMap(_._2)
def report(authorList: List[Author]): (Int, Int, Int, String, Int, String, Boolean, String) = {
if (authorList.isEmpty) {
return (0, 0, 0, "", 0, "", false, "")
}
//总数
val day_got_digg_count = authorList.last.getGot_digg_count.toInt - authorList.head.getGot_digg_count.toInt
val day_got_view_count = authorList.last.getGot_view_count.toInt - authorList.head.getGot_view_count.toInt
//单时间段最高
var max_got_digg_count = 0;
var max_got_digg_count_time = ""
var max_got_view_count = 0
var max_got_view_count_time = ""
val authorListSliding = authorList.sliding(2, 2)
authorListSliding.foreach(l => {
val head = l.head
val last = l.last
val digg = last.getGot_digg_count.toInt - head.getGot_digg_count.toInt
if (digg > max_got_digg_count) {
max_got_digg_count = digg
max_got_digg_count_time = s"${getOutTime(head.getTime)} - ${getOutTime(last.getTime)}"
}
val view = last.getGot_view_count.toInt - head.getGot_view_count.toInt
if (view > max_got_view_count) {
max_got_view_count = view
max_got_view_count_time = s"${getOutTime(head.getTime)} - ${getOutTime(last.getTime)}"
}
})
//有无升级
val authors = authorList.sortBy(_.getLevel)
var level = false
var levelDesc = "无升级"
val headLevel = authors.head.getLevel.toInt
val lastLevel = authors.last.getLevel.toInt
if ((lastLevel - headLevel) != 0) {
level = true
levelDesc = s"${headLevel} 升到 ${lastLevel}"
}
(day_got_digg_count, day_got_view_count, max_got_digg_count, max_got_digg_count_time, max_got_view_count, max_got_view_count_time, level, levelDesc)
}
val (day_30_total_got_digg_count, day_30_total_got_view_count, day_30_max_got_digg_count, day_30_max_got_digg_count_time, day_30_max_got_view_count, day_30_max_got_view_count_time, day_30_level, day_30_levelDesc) = report(day_30)
val (day_7_total_got_digg_count, day_7_total_got_view_count, day_7_max_got_digg_count, day_7_max_got_digg_count_time, day_7_max_got_view_count, day_7_max_got_view_count_time, day_7_level, day_7_levelDesc) = report(day_7)
val (day_3_total_got_digg_count, day_3_total_got_view_count, day_3_max_got_digg_count, day_3_max_got_digg_count_time, day_3_max_got_view_count, day_3_max_got_view_count_time, day_3_level, day_3_levelDesc) = report(day_3)
val (day_1_total_got_digg_count, day_1_total_got_view_count, day_1_max_got_digg_count, day_1_max_got_digg_count_time, day_1_max_got_view_count, day_1_max_got_view_count_time, day_1_level, day_1_levelDesc) = report(day_1)
val head = allAuthorList.head
(m._1, Map(
"user_name" -> head.getUser_name,
"user_id" -> head.getUser_id,
"day_30_total_got_digg_count" -> day_30_total_got_digg_count,
"day_30_total_got_view_count" -> day_30_total_got_view_count,
"day_30_max_got_digg_count" -> day_30_max_got_digg_count,
"day_30_max_got_digg_count_time" -> day_30_max_got_digg_count_time,
"day_30_max_got_view_count" -> day_30_max_got_view_count,
"day_30_max_got_view_count_time" -> day_30_max_got_view_count_time,
"day_30_level" -> day_30_level,
"day_30_levelDesc" -> day_30_levelDesc,
"day_7_total_got_digg_count" -> day_7_total_got_digg_count,
"day_7_total_got_view_count" -> day_7_total_got_view_count,
"day_7_max_got_digg_count" -> day_7_max_got_digg_count,
"day_7_max_got_digg_count_time" -> day_7_max_got_digg_count_time,
"day_7_max_got_view_count" -> day_7_max_got_view_count,
"day_7_max_got_view_count_time" -> day_7_max_got_view_count_time,
"day_7_level" -> day_7_level,
"day_7_levelDesc" -> day_7_levelDesc,
"day_3_total_got_digg_count" -> day_3_total_got_digg_count,
"day_3_total_got_view_count" -> day_3_total_got_view_count,
"day_3_max_got_digg_count" -> day_3_max_got_digg_count,
"day_3_max_got_digg_count_time" -> day_3_max_got_digg_count_time,
"day_3_max_got_view_count" -> day_3_max_got_view_count,
"day_3_max_got_view_count_time" -> day_3_max_got_view_count_time,
"day_3_level" -> day_3_level,
"day_3_levelDesc" -> day_3_levelDesc,
"day_1_total_got_digg_count" -> day_1_total_got_digg_count,
"day_1_total_got_view_count" -> day_1_total_got_view_count,
"day_1_max_got_digg_count" -> day_1_max_got_digg_count,
"day_1_max_got_digg_count_time" -> day_1_max_got_digg_count_time,
"day_1_max_got_view_count" -> day_1_max_got_view_count,
"day_1_max_got_view_count_time" -> day_1_max_got_view_count_time,
"day_1_level" -> day_1_level,
"day_1_levelDesc" -> day_1_levelDesc,
))
})
val list = res.values.toList
println("\n-----------------今日获赞Top10------------------")
printf("|%-12s\t|%-5s|\n", "用户", "总获赞")
printf("|%-12s\t|%-5s|\n", "-" * 12, "-" * 5)
list.sortBy(value => value("day_1_total_got_digg_count").asInstanceOf[Int])(Ordering.Int.reverse).take(10).foreach(value => {
printf("|%-12s\t|%-5s|\n", value("user_name"), value("day_1_total_got_digg_count"))
})
println("\n-----------------近3日获赞Top10------------------")
printf("|%-12s\t|%-5s|\n", "用户", "总获赞")
printf("|%-12s\t|%-5s|\n", "-" * 12, "-" * 5)
list.sortBy(value => value("day_3_total_got_digg_count").asInstanceOf[Int])(Ordering.Int.reverse).take(10).foreach(value => {
printf("|%-12s\t|%-5s|\n", value("user_name"), value("day_3_total_got_digg_count"))
})
println("\n-----------------近7日获赞Top10------------------")
printf("|%-12s\t|%-5s|\n", "用户", "总获赞")
printf("|%-12s\t|%-5s|\n", "-" * 12, "-" * 5)
list.sortBy(value => value("day_7_total_got_digg_count").asInstanceOf[Int])(Ordering.Int.reverse).take(10).foreach(value => {
printf("|%-12s\t|%-5s|\n", value("user_name"), value("day_7_total_got_digg_count"))
})
println("\n-----------------近30日获赞Top10------------------")
printf("|%-12s\t|%-5s|\n", "用户", "总获赞")
printf("|%-12s\t|%-5s|\n", "-" * 12, "-" * 5)
list.sortBy(value => value("day_30_total_got_digg_count").asInstanceOf[Int])(Ordering.Int.reverse).take(10).foreach(value => {
printf("|%-12s\t|%-5s|\n", value("user_name"), value("day_30_total_got_digg_count"))
})
println("\n-----------------今日浏览Top10------------------")
printf("|%-12s\t|%-5s|\n", "用户", "总浏览")
printf("|%-12s\t|%-5s|\n", "-" * 12, "-" * 5)
list.sortBy(value => value("day_1_total_got_view_count").asInstanceOf[Int])(Ordering.Int.reverse).take(10).foreach(value => {
printf("|%-12s\t|%-5s|\n", value("user_name"), value("day_1_total_got_view_count"))
})
println("\n-----------------近3日浏览Top10------------------")
printf("|%-12s\t|%-5s|\n", "用户", "总浏览")
printf("|%-12s\t|%-5s|\n", "-" * 12, "-" * 5)
list.sortBy(value => value("day_3_total_got_view_count").asInstanceOf[Int])(Ordering.Int.reverse).take(10).foreach(value => {
printf("|%-12s\t|%-5s|\n", value("user_name"), value("day_3_total_got_view_count"))
})
println("\n-----------------今日单时间段获赞Top10------------------")
printf("|%-12s\t|%-25s\t|%-5s|\n", "用户", "时间段", "获赞")
printf("|%-12s\t|%-25s\t|%-5s|\n", "-" * 12, "-" * 25, "-" * 5)
list.sortBy(value => value("day_1_max_got_digg_count").asInstanceOf[Int])(Ordering.Int.reverse).take(10).foreach(value => {
printf("|%-12s\t|%-25s\t|%-5s|\n", value("user_name"), value("day_1_max_got_digg_count_time"), value("day_1_max_got_digg_count"))
})
println("\n-----------------今日单时间段浏览Top10------------------")
printf("|%-12s\t|%-25s\t|%-5s|\n", "用户", "时间段", "获浏览")
printf("|%-12s\t|%-25s\t|%-5s|\n", "-" * 12, "-" * 25, "-" * 5)
list.sortBy(value => value("day_3_max_got_digg_count").asInstanceOf[Int])(Ordering.Int.reverse).take(10).foreach(value => {
printf("|%-12s\t|%-25s\t|%-5s|\n", value("user_name"), value("day_3_max_got_digg_count_time"), value("day_3_max_got_digg_count"))
})
println("\n-----------------今日升级名单------------------")
printf("|%-12s\t|%-5s|\n", "用户", "等级")
printf("|%-12s\t|%-10s|\n", "-" * 12, "-" * 10)
list.filter(value => value("day_1_level").asInstanceOf[Boolean]).foreach(value => {
printf("|%-12s\t|%-5s|\n", value("user_name"), value("day_1_levelDesc"))
})
println("\n-----------------近3日升级名单------------------")
printf("|%-12s\t|%-5s|\n", "用户", "等级")
printf("|%-12s\t|%-10s|\n", "-" * 12, "-" * 10)
list.filter(value => value("day_3_level").asInstanceOf[Boolean]).foreach(value => {
printf("|%-12s\t|%-5s|\n", value("user_name"), value("day_3_levelDesc"))
})
}
def load(): Unit = {
List(
"./j-20210701.json",
"./j-20210702.json",
"./j-20210703.json",
).foreach(path => {
val lineList = new util.ArrayList[String]()
IoUtil.readLines(new FileInputStream(path), StandardCharsets.UTF_8, lineList)
lineList.forEach(line => {
val type1: Type = new TypeReference[util.Map[String, util.List[Author]]] {}.getType
val bean: util.Map[String, util.List[Author]] = JSONUtil.toBean(line, type1, true)
bean.asScala.foreach(entry => map.put(entry._1, entry._2.asScala.toList))
})
})
}
def getOutTime(time: String): String = {
LocalDateTime.parse(time, dateFormat).format(dateFormatOut)
}
}