9aab28b6f2
Our mailmapper script required Python 2 which is no longer maintained. A main difference when converting to Python 3 is that byte strings are not character strings. So add conversion and skip over conversion errors. Signed-off-by: Heinrich Schuchardt <heinrich.schuchardt@canonical.com>
162 lines
4.8 KiB
Python
Executable File
162 lines
4.8 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
# SPDX-License-Identifier: GPL-2.0+
|
|
#
|
|
# Copyright (C) 2014, Masahiro Yamada <yamada.m@jp.panasonic.com>
|
|
|
|
'''
|
|
A tool to create/update the mailmap file
|
|
|
|
The command 'git shortlog' summarizes git log output in a format suitable
|
|
for inclusion in release announcements. Each commit will be grouped by
|
|
author and title.
|
|
|
|
One problem is that the authors' name and/or email address is sometimes
|
|
spelled differently. The .mailmap feature can be used to coalesce together
|
|
commits by the same persion.
|
|
(See 'man git-shortlog' for furthur information of this feature.)
|
|
|
|
This tool helps to create/update the mailmap file.
|
|
|
|
It runs 'git shortlog' internally and searches differently spelled author
|
|
names which share the same email address. The author name with the most
|
|
commits is asuumed to be a canonical real name. If the number of commits
|
|
from the cananonical name is equal to or greater than 'MIN_COMMITS',
|
|
the entry for the cananical name will be output. ('MIN_COMMITS' is used
|
|
here because we do not want to create a fat mailmap by adding every author
|
|
with only a few commits.)
|
|
|
|
If there exists a mailmap file specified by the mailmap.file configuration
|
|
options or '.mailmap' at the toplevel of the repository, it is used as
|
|
a base file. (The mailmap.file configuration takes precedence over the
|
|
'.mailmap' file if both exist.)
|
|
|
|
The base file and the newly added entries are merged together and sorted
|
|
alphabetically (but the comment block is kept untouched), and then printed
|
|
to standard output.
|
|
|
|
Usage
|
|
-----
|
|
|
|
scripts/mailmapper
|
|
|
|
prints the mailmapping to standard output.
|
|
|
|
scripts/mailmapper > tmp; mv tmp .mailmap
|
|
|
|
will be useful for updating '.mailmap' file.
|
|
'''
|
|
|
|
import sys
|
|
import os
|
|
import subprocess
|
|
|
|
# The entries only for the canonical names with MIN_COMMITS or more commits.
|
|
# This limitation is used so as not to create a too big mailmap file.
|
|
MIN_COMMITS = 50
|
|
|
|
try:
|
|
toplevel = subprocess.check_output(['git', 'rev-parse', '--show-toplevel'])
|
|
except subprocess.CalledProcessError:
|
|
sys.exit('Please run in a git repository.')
|
|
|
|
# strip '\n'
|
|
toplevel = toplevel.rstrip()
|
|
|
|
# Change the current working directory to the toplevel of the respository
|
|
# for our easier life.
|
|
os.chdir(toplevel)
|
|
|
|
# First, create 'auther name' vs 'number of commits' database.
|
|
# We assume the name with the most commits as the canonical real name.
|
|
shortlog = subprocess.check_output(['git', 'shortlog', '-s', '-n'])
|
|
|
|
commits_per_name = {}
|
|
|
|
for line in shortlog.splitlines():
|
|
try:
|
|
commits, name = line.split(None, 1)
|
|
except ValueError:
|
|
# ignore lines with an empty author name
|
|
pass
|
|
commits_per_name[name] = int(commits)
|
|
|
|
# Next, coalesce the auther names with the same email address
|
|
shortlog = subprocess.check_output(['git', 'shortlog', '-s', '-n', '-e'])
|
|
|
|
mail_vs_name = {}
|
|
output = {}
|
|
|
|
for line in shortlog.splitlines():
|
|
# tmp, mail = line.rsplit(None, 1) is not safe
|
|
# because weird email addresses might include whitespaces
|
|
try:
|
|
line = line.decode("utf-8")
|
|
tmp, mail = line.split('<')
|
|
mail = '<' + mail.rstrip()
|
|
_, name = tmp.rstrip().split(None, 1)
|
|
except ValueError:
|
|
# author name is empty
|
|
name = ''
|
|
if mail in mail_vs_name:
|
|
# another name for the same email address
|
|
prev_name = mail_vs_name[mail]
|
|
# Take the name with more commits
|
|
try:
|
|
major_name = sorted([prev_name, name],
|
|
key=lambda x: commits_per_name[x] if x else 0)[1]
|
|
except:
|
|
continue
|
|
mail_vs_name[mail] = major_name
|
|
if commits_per_name[major_name] > MIN_COMMITS:
|
|
output[mail] = major_name
|
|
else:
|
|
mail_vs_name[mail] = name
|
|
|
|
# [1] If there exists a mailmap file at the location pointed to
|
|
# by the mailmap.file configuration option, update it.
|
|
# [2] If the file .mailmap exists at the toplevel of the repository, update it.
|
|
# [3] Otherwise, create a new mailmap file.
|
|
mailmap_files = []
|
|
|
|
try:
|
|
config_mailmap = subprocess.check_output(['git', 'config', 'mailmap.file'])
|
|
except subprocess.CalledProcessError:
|
|
config_mailmap = ''
|
|
|
|
config_mailmap = config_mailmap.rstrip()
|
|
if config_mailmap:
|
|
mailmap_files.append(config_mailmap)
|
|
|
|
mailmap_files.append('.mailmap')
|
|
|
|
infile = None
|
|
|
|
for map_file in mailmap_files:
|
|
try:
|
|
infile = open(map_file)
|
|
except:
|
|
# Failed to open. Try next.
|
|
continue
|
|
break
|
|
|
|
comment_block = []
|
|
output_lines = []
|
|
|
|
if infile:
|
|
for line in infile:
|
|
if line[0] == '#' or line[0] == '\n':
|
|
comment_block.append(line)
|
|
else:
|
|
output_lines.append(line)
|
|
break
|
|
for line in infile:
|
|
output_lines.append(line)
|
|
infile.close()
|
|
|
|
for mail, name in output.items():
|
|
output_lines.append(name + ' ' + mail + '\n')
|
|
|
|
output_lines.sort()
|
|
|
|
sys.stdout.write(''.join(comment_block + output_lines))
|