本文介绍基于 GitLab 的定时自动化统计代码全流程解决方案
一、功能说明
很多机构和团队都通过内部搭建的 GitLab 实现开发协作,并存放源码及交付物。于是管理者很容易想到将 GitLab 的代码提交量作为考核的参考指标之一,但是由于 GitLab 并没有将提交文件统计功能集成在 UI 界面中,所以需要做定制化的统计开发,这就是本文要做的事情。
二、统计实现
前置准备
- 获取 GitLab 实例 root 账号的个人访问令牌(Personal Access Tokens)
统计代码方法
- 考虑到应用的便捷性,本文介绍 基于 Python 的 GitLab 统计代码方法
1、python-gitlab
- python-gitlab 是一个 Python 包,提供对 GitLab 服务器 API 的访问。
- 参考:python-gitlab.readthedocs.io/en/v3.15.0/…
安装 python-gitlab
pip install python-gitlab
访问 GitLab
# GitLab URL
gitlaburl = 'http://xxxx'
# 个人访问令牌
gittoken= 'xxxxxxxx'
# 身份验证
gl = gitlab.Gitlab(url=gitlaburl, private_token=gittoken)
核心代码
# 时间区间
t_from = 'yyyy-MM-dd'
t_end = 'yyyy-MM-dd'
# 所有项目
projects = gl.projects.list(all = True)
for p in projects:
# 项目的所有分支
branches = p.branches.list(all = True)
for b in branches:
# 分支的所有提交
commits = project.commits.list(all= True,query_parameters={'since': start_time,'until':end_time, 'ref_name': b.name})
# 然后再遍历每个提交记录,查询每个提交记录的人和量
for c in commits:
commit = project.commits.get(c.id)
# 提交者
cname = commit.committer_name
# 提交代码统计
cstats = commit.stats
# 总数
additions = stats['total']
# 增加数量
additions = stats['additions']
# 减少数量
deletions = stats['deletions']
优化
- 通过配置文件加载:
conf.cfg:-
[global] default = somewhere ssl_verify timeout = 100 [somewhere] gitlaburl = 'http://xxxx' gittoken= 'xxxxxxxx' api_version = 4 -
gl = gitlab.Gitlib.from_config('somewhere', ['conf.cfg'])
2、GitLab API
- 由 GitLab 提供的 API 支持。
- 参考:docs.gitlab.com/ee/api/comm…
查看 API 版本
- API 版本定义在 api.rb 中:
- 默认位置:/opt/gitlab/embedded/service/gitlab-rails/lib/api/api.rb
- 查找:
find / -name api.rb
- 以下基于 GitLab API v4 版本
访问 GitLab
# GitLab URL
gitlaburl = 'http://xxxx'
# 个人访问令牌
gittoken= 'xxxxxxxx'
# 所有仓库(每页最多只能展示100个)
for i in range(1, 20):
url = '%s/api/v4/projects?private_token=%s&per_page=100&page=%d&order_by=last_activity_at' % (gitlaburl, gittoken, i)
# 项目的所有分支
# 项目ID:projectid
url = '%s/api/v4/projects/%s/repository/branches?private_token=%s' % (gitlaburl, projectid, gittoken)
# 分支的所有提交(每页最多只能展示100个)
# 分支名称:branchname;开始时间:sincedate;结束时间:untildate
for i in range(1, 20):
url = '%s/api/v4/projects/%s/repository/commits?page=%d&per_page=100&ref_name=%s&since=%s&until=%s&private_token=%s' % (gitlaburl, projectid, i, branchname, sincedate, untildate, gittoken)
# 提交信息
# 提交ID:commitid
url = '%s/api/v4/projects/%s/repository/commits/%s?private_token=%s' % (gitlaburl, projectid, commitid, gittoken)
核心代码
# 统计的时间区间-开始日期
t_from = 'yyyy-MM-dd'
datefrom = datetime.datetime.strptime(t_from, '%Y-%m-%d')
# 统计的时间区间-结束日期
t_end = 'yyyy-MM-dd'
dateend = datetime.datetime.strptime(t_end, '%Y-%m-%d')
# TODO 0...
# DM 0...
# 根据 API 返回的 JSON 格式定义项目结构
class ProjectInfo(json.JSONEncoder):
projectid = None
namespace = None
group = None
projectname = None
stime = None
projectdesc = None
projecturl = None
path = None
name = None
cmap = None
# 根据 API 返回的 JSON 格式定义提交结构
class CommitDetails(json.JSONEncoder):
authorname = None
authoremail = None
additions = 0
deletions = 0
total = 0
# TODO 1...
class Worker:
# 所有项目
def get_projects(self):
# TODO 2...
for i in range(1, 20):
url = '%s/api/v4/projects?private_token=%s&per_page=100&page=%d&order_by=last_activity_at' % (gitlaburl, gittoken, i)
results = requests.get(url)
results_json = results.json()
for result in results_json:
value = result['default_branch']
# TODO 3...
last_active_time = result['last_activity_at']
days = datefrom - datetime.datetime.strptime(last_active_time, '%Y-%m-%dT%H:%M:%S.%fZ')
projectinfo = ProjectInfo()
projectinfo.projectid = result['id']
projectinfo.namespace = result['path_with_namespace']
projectinfo.name = result['name']
projectinfo.projectdesc = result['description']
projectinfo.projecturl = result['web_url']
projectinfo.path = result['path']
# TODO 4...
self.get_branches(result['id'], projectinfo)
# TODO 5...
# 提交
commitmap = {}
for key, project in self.totalMap.items():
for authoremail, detail in project.cmap.items():
existdetail = commitmap.get(detail.authoremail)
if existdetail is None:
commitmap[detail.authoremail] = detail
else:
existdetail.total += detail.total
existdetail.additions += detail.additions
existdetail.deletions += detail.deletions
commitmap[detail.authoremail] = existdetail
# TODO 6...
return
# 所有分支
def get_branches(self, projectid, projectinfo):
url = '%s/api/v4/projects/%s/repository/branches?private_token=%s' % (gitlaburl, projectid, gittoken)
results = requests.get(url)
results_json = results.json()
branchmap = {}
# 分支的提交
detailmap = self.get_commits(projectid, projectinfo.projecturl, 'master')
if detailmap:
branchmap['master'] = detailmap
for result in results_json:
branchname = result['name']
detailmap = self.get_commits(projectid, projectinfo.projecturl, branchname)
branchmap[branchname] = detailmap
# 提交汇总
commitmap = {}
for key, value_map in branchmap.items():
for authoremail, detail in value_map.items():
existdetail = commitmap.get(detail.authoremail)
if existdetail is None:
commitmap[detail.authoremail] = detail
else:
existdetail.total += detail.total
existdetail.additions += detail.additions
existdetail.deletions += detail.deletions
commitmap[detail.authoremail] = existdetail
projectinfo.cmap = commitmap
# TODO 7...
# TODO 8...
# DM 1...
# 所有提交
def get_commits(self, projectid, projecturl, branchname):
sincedate = datefrom.strftime('%Y-%m-%dT%H:%M:%S.%fZ')
untildate = dateend.strftime('%Y-%m-%dT%H:%M:%S.%fZ')
for i in range(1, 20):
url = '%s/api/v4/projects/%s/repository/commits?page=%d&per_page=100&ref_name=%s&since=%s&until=%s&private_token=%s' % (gitrooturl, projectid, i, branchname, sincedate, untildate, git_token)
results = requests.get(url)
results_json = results.json()
for result in results_json:
commitid = r3['id']
detail = get_commit_detail(projectid, commitid)
existdetail = detailmap.get(detail.authoremail)
if existdetail is None:
detailmap[detail.authoremail] = detail
else:
existdetail.total += detail.total
existdetail.additions += detail.additions
existdetail.deletions += detail.deletions
detailmap[detail.authoremail] = existdetail
return detailmap
# 提交信息
def get_commit_detail(projectid, commitid):
url = '%s/api/v4/projects/%s/repository/commits/%s?private_token=%s' % (gitlaburl, projectid, commitid, gittoken)
results = requests.get(url)
results_json = results.json()
authorname = results_json['author_name']
authoremail = results_json['author_email']
stats = results_json['stats']
tempmail = useremailaliasmapping.get(authoremail)
tempname = useremailnamemapping.get(authoremail)
additions = stats['additions']
deletions = stats['deletions']
total = stats['total']
details = CommitDetails()
details.additions = additions
details.deletions = deletions
details.total = total
details.authoremail = authoremail
details.authorname = authorname
return details
if __name__ == '__main__':
gl = Worker()
gl.get_projects()
优化
- (1)、通过配置文件加载:
cond.ini:-
[Time] startTime = yyyy-MM-dd endTime = yyyy-MM-dd [Conf] gitlaburl = 'http://xxxx' gittoken= 'xxxxxxxx' -
# 配置文件 conf = configparser.RawConfigParser() # 指定配置文件路径和编码 conf.read('./conf/conf.ini', 'utf-8') # GitLab地址 gitrooturl = conf.get('Conf', 'gitRootUrl') # 访问Token git_token = conf.get('Conf', 'git_token') # 统计的时间区间-开始日期 t_from = conf.get('Time', 'startTime') datefrom = datetime.datetime.strptime(t_from, '%Y-%m-%d') # 统计的时间区间-结束日期 t_end = conf.get('Time', 'endTime') dateend = datetime.datetime.strptime(t_end, '%Y-%m-%d')
- (2)、使用线程加速:
-
# TODO 1 # 线程锁 lock = threading.RLock() # TODO 2 threads = [] # TODO 4 t = threading.Thread(target = self.get_branches, args = (result['id'], projectinfo)) threads.append(t) # TODO 5 # 线程开始 for t in threads: t.start() # 线程结束 for t in threads: t.join() # TODO 7 # 加锁 lock.acquire() # 此对象会被各个线程操作 self.totalMap[projectinfo.projectid] = projectinfo # 释放锁 lock.release()
-
- (3)、时区调整
- 如果 GitLab 实例修改了默认时区,
last_active_time格式需要调整 - TODO 3:
last_active_time = r3['last_activity_at'].split('+')[0] + 'Z'
- 如果 GitLab 实例修改了默认时区,
三、统计结果存储
1、本地文件存储
- 结果写入 CSV 文件:
def write2csv(filepath, commitmap):
with open(filepath, 'w') as out:
title = '%s,%s,%s,%s,%s' % ("提交人邮箱", "提交人姓名", "总行数", "增加行数", "删除行数")
out.write(title + "\n")
for key, value in commitmap.items():
if (value.total != 0):
var = '%s,%s,%s,%s,%s' % (value.authoremail, value.authorname, value.total, value.additions, value.deletions)
out.write(var + '\n')
out.close()
# TODO 0...
# 统计结果的存储目录
exportpath = ''
# TODO 6
write2csv("%s/GitLibStatic%s_%s/%s%s_%s.csv" % (exportpath, t_from, t_end, 'total', t_from, t_end), commitmap)
# TODO 8
write_to_csv("%s/GitLabStatic%s_%s/project/%s_%s.csv" % (export_path, t_from, t_end, projectinfo.namepace, t_end), commitmap, projectinfo.projecturl)
2、写入达梦数据库
前置准备
- 本地安装达梦数据库
安装 dmPython
- dmPython 是达梦数据库提供的数据库访问接口,使 Python 应用程序能够对达梦数据库进行访问。
- 进入 dmPython 中
setup.py所在的源码目录:python setup.py install
测试 dmPython
import dmPython
conn = dmPython.connect(user = 'xxx', password = 'xxx', server = 'xxx.xxx.xxx.xxx', port = 'xxxx')
cursor = conn.cursor()
cursor.execute('select * from xxx')
values = cursor.fetchall()
print(values)
cursor.close()
conn.close()
- 输出数据表结果则测试通过
dmPython 应用
- 建表:
create table GITLABSTATIC (
"ID" bigint identity(1, 1) not null comment 'ID',
"authoremail" varchar(50) not null comment '提交人邮箱',
"authorname" varchar(50) comment '提交人名称',
"groupname" varchar(50) comment '群组名称',
"projectid" bigint comment '项目ID',
"projectname" varchar(50) comment '项目名称',
"total" bigint comment '总行数',
"additions" bigint comment '增加行数',
"deletions" bigint comment '删除行数',
"stime" timestamp comment '统计时间',
"ltime" timestamp default current_timestamp() comment '入库时间',
primary key("ID")
);
- 代码:
# DM 0
conn = dmPython.connect(user = 'xxx', password = 'xxx', server = 'xxx.xxx.xxx.xxx', port = 'xxxx', autoCommit = True)
cursor = conn.cursor()
# DM 1
# 项目信息
info = {}
info["group"] = projectinfo.namespace.split('/')[0]
info["projectid"] = projectid
info["projectname"] = projectinfo.namespace.split('/')[1]
info["stime"] = (dateend - timedelta(days = 1)).strftime('%Y-%m-%d')
for key, value in commitmap.items():
if (value.total != 0):
values = (value.authoremail, value.authorname, info["group"], info["projectid"], info["projectname"],value.total, value.additions, value.deletions, info["stime"])
cursor.execute('insert into GITLABSTATIC("authoremail","authorname","groupname","projectid","projectname","total","additions","deletions","stime") values(?, ?, ?, ?, ?, ?, ?, ?, ?)', values)
四、统计程序自动化
- 程序自动化在服务器端借助 crond 服务工具实现
准备
- 服务器安装达梦数据库,并启动服务
- 如果达梦数据库为远程仓库,保证端口通畅
yum install crontabs
自动化实现
启动脚本
GitlabStatic.sh:
cd `dirname $0` || exit 1
echo `date +"%Y-%m-%d %H:%M:%S"`
python ./GitlabStatic.py >> /xxx/GitlabStatic_run.log 2>&1
echo "定时任务执行完成~~~"
echo
配置定时任务
-
root用户下操作
-
crontab -e-
0 0 * * * sh /xxx/GitlabStatic.sh >> /xxx/crontab-run_GitlabStatic.log 2>&1
-
-
重新加载配置:
service crond reload
-
重启cron服务:
service crond restart