cartography_history.py
Git history of repository.
GitHistory
¶
Build and plot the git history of a repository.
Source code in complaince/tools/cartography_history.py
class GitHistory:
"""Build and plot the git history of a repository."""
def __init__(self, removed_branches: list[str] = []):
self.removed_branches = ["gh-pages"] + removed_branches
def git_history_from_local(self, path: str = ".", n_commits: int = 20) -> pd.DataFrame:
"""
Fetch the history of a local repository.
Parameters
----------
path
Path to the local repository
n_commits
Number of commits to fetch
Returns
-------
DataFrame containing the history of the repository.
Examples
--------
>>> from complaince.tools.cartography_history import GitHistory
>>> repo = GitHistory()
>>> df = repo.git_history_from_local(n_commits=10)
"""
repo = Repo(path)
commit_data = []
for branch in repo.branches:
if branch not in self.removed_branches:
commits = list(repo.iter_commits(branch, max_count=n_commits))
for commit in commits:
sha = commit.hexsha[:7]
author = commit.author.name
date = commit.committed_datetime.strftime("%Y-%m-%d %H:%M")
message = commit.message.strip()
commit_data.append([message, date, author, sha, branch])
history = pd.DataFrame(commit_data, columns=["Description", "Date", "Author", "Commit", "Branch"])
ordered_history = history.sort_values("Date", ascending=False).reset_index(drop=True)
git_history = ordered_history.iloc[:n_commits, :]
return git_history
def git_history_from_github(self, repo_name: str, n_commits: int = 20) -> pd.DataFrame:
"""
Fetch the history of a GitHub repository.
Parameters
----------
repo_name
Full name of the repository in the format "owner/repo"
n_commits
Number of commits to fetch
Returns
-------
DataFrame containing the history of the repository.
Examples
--------
>>> from complaince.tools.cartography_history import GitHistory
>>> repo = GitHistory()
>>> history = repo.git_history_from_github("alexym1/booklet", n_commits=20)
"""
if os.getenv("GITHUB_TOKEN") is None:
github = Github()
else:
auth = Auth.Token(str(os.getenv("GITHUB_TOKEN")))
github = Github(auth=auth)
repo = github.get_repo(repo_name)
branches = list(repo.get_branches())
commit_data = []
for branch in branches:
if branch.name not in self.removed_branches:
commits = list(repo.get_commits(sha=branch.name))[:n_commits]
for commit in commits:
sha = commit.sha[:7]
author = commit.commit.author.name
date = commit.commit.committer.date.strftime("%Y-%m-%d %H:%M")
description = commit.commit.message.split("\n")[0]
branch_name = branch.name
commit_data.append([description, date, author, sha, branch_name])
history = pd.DataFrame(commit_data, columns=["Description", "Date", "Author", "Commit", "Branch"])
ordered_history = history.sort_values("Date", ascending=False).reset_index(drop=True)
git_history = ordered_history.iloc[:n_commits, :]
return git_history
def plot_history(self, history: pd.DataFrame, output_png: str) -> None:
"""
Plot the history of a repository.
Parameters
----------
history
DataFrame containing the history of the repository
output_png
Path to save the plot
Examples
--------
>>> import os
>>> from tempfile import TemporaryDirectory
>>> from complaince.tools.cartography_history import GitHistory
>>> temp_dir = TemporaryDirectory(prefix="history_")
>>> repo = GitHistory()
>>> git = repo.git_history_from_local(n_commits=10)
>>> repo.plot_history(git, output_png = os.path.join(temp_dir.name, "history.png"))
"""
branch_positions = {branch: i for i, branch in enumerate(history["Branch"].unique())}
history["y"] = history["Branch"].map(branch_positions)
_, ax = plt.subplots(figsize=(15, 10))
for _, row in history.iterrows():
ax.scatter(row["Date"], row["y"], s=100, label=row["Branch"], edgecolors="black", zorder=2)
ax.text(row["Date"], row["y"] + 0.15, row["Commit"], ha="center", fontsize=9, color="black")
for i in range(len(history) - 1):
ax.plot(
[history.iloc[i]["Date"], history.iloc[i + 1]["Date"]],
[history.iloc[i]["y"], history.iloc[i + 1]["y"]],
color="gray",
linestyle="-",
alpha=0.6,
zorder=1,
)
history.drop(columns="y", inplace=True)
ax.set_yticks(list(branch_positions.values()))
ax.set_yticklabels(list(branch_positions.keys()))
ax.set_xlabel("Date")
ax.grid(axis="x", linestyle="--", alpha=0.5)
plt.xticks(rotation=45)
plt.title("Gitflow of the Repository", fontsize=14)
plt.savefig(output_png)
git_history_from_github(self, repo_name, n_commits=20)
¶
Fetch the history of a GitHub repository.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
repo_name |
str |
Full name of the repository in the format "owner/repo" |
required |
n_commits |
int |
Number of commits to fetch |
20 |
Source code in complaince/tools/cartography_history.py
def git_history_from_github(self, repo_name: str, n_commits: int = 20) -> pd.DataFrame:
"""
Fetch the history of a GitHub repository.
Parameters
----------
repo_name
Full name of the repository in the format "owner/repo"
n_commits
Number of commits to fetch
Returns
-------
DataFrame containing the history of the repository.
Examples
--------
>>> from complaince.tools.cartography_history import GitHistory
>>> repo = GitHistory()
>>> history = repo.git_history_from_github("alexym1/booklet", n_commits=20)
"""
if os.getenv("GITHUB_TOKEN") is None:
github = Github()
else:
auth = Auth.Token(str(os.getenv("GITHUB_TOKEN")))
github = Github(auth=auth)
repo = github.get_repo(repo_name)
branches = list(repo.get_branches())
commit_data = []
for branch in branches:
if branch.name not in self.removed_branches:
commits = list(repo.get_commits(sha=branch.name))[:n_commits]
for commit in commits:
sha = commit.sha[:7]
author = commit.commit.author.name
date = commit.commit.committer.date.strftime("%Y-%m-%d %H:%M")
description = commit.commit.message.split("\n")[0]
branch_name = branch.name
commit_data.append([description, date, author, sha, branch_name])
history = pd.DataFrame(commit_data, columns=["Description", "Date", "Author", "Commit", "Branch"])
ordered_history = history.sort_values("Date", ascending=False).reset_index(drop=True)
git_history = ordered_history.iloc[:n_commits, :]
return git_history
git_history_from_local(self, path='.', n_commits=20)
¶
Fetch the history of a local repository.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
path |
str |
Path to the local repository |
'.' |
n_commits |
int |
Number of commits to fetch |
20 |
Source code in complaince/tools/cartography_history.py
def git_history_from_local(self, path: str = ".", n_commits: int = 20) -> pd.DataFrame:
"""
Fetch the history of a local repository.
Parameters
----------
path
Path to the local repository
n_commits
Number of commits to fetch
Returns
-------
DataFrame containing the history of the repository.
Examples
--------
>>> from complaince.tools.cartography_history import GitHistory
>>> repo = GitHistory()
>>> df = repo.git_history_from_local(n_commits=10)
"""
repo = Repo(path)
commit_data = []
for branch in repo.branches:
if branch not in self.removed_branches:
commits = list(repo.iter_commits(branch, max_count=n_commits))
for commit in commits:
sha = commit.hexsha[:7]
author = commit.author.name
date = commit.committed_datetime.strftime("%Y-%m-%d %H:%M")
message = commit.message.strip()
commit_data.append([message, date, author, sha, branch])
history = pd.DataFrame(commit_data, columns=["Description", "Date", "Author", "Commit", "Branch"])
ordered_history = history.sort_values("Date", ascending=False).reset_index(drop=True)
git_history = ordered_history.iloc[:n_commits, :]
return git_history
plot_history(self, history, output_png)
¶
Plot the history of a repository.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
history |
DataFrame |
DataFrame containing the history of the repository |
required |
output_png |
str |
Path to save the plot |
required |
Source code in complaince/tools/cartography_history.py
def plot_history(self, history: pd.DataFrame, output_png: str) -> None:
"""
Plot the history of a repository.
Parameters
----------
history
DataFrame containing the history of the repository
output_png
Path to save the plot
Examples
--------
>>> import os
>>> from tempfile import TemporaryDirectory
>>> from complaince.tools.cartography_history import GitHistory
>>> temp_dir = TemporaryDirectory(prefix="history_")
>>> repo = GitHistory()
>>> git = repo.git_history_from_local(n_commits=10)
>>> repo.plot_history(git, output_png = os.path.join(temp_dir.name, "history.png"))
"""
branch_positions = {branch: i for i, branch in enumerate(history["Branch"].unique())}
history["y"] = history["Branch"].map(branch_positions)
_, ax = plt.subplots(figsize=(15, 10))
for _, row in history.iterrows():
ax.scatter(row["Date"], row["y"], s=100, label=row["Branch"], edgecolors="black", zorder=2)
ax.text(row["Date"], row["y"] + 0.15, row["Commit"], ha="center", fontsize=9, color="black")
for i in range(len(history) - 1):
ax.plot(
[history.iloc[i]["Date"], history.iloc[i + 1]["Date"]],
[history.iloc[i]["y"], history.iloc[i + 1]["y"]],
color="gray",
linestyle="-",
alpha=0.6,
zorder=1,
)
history.drop(columns="y", inplace=True)
ax.set_yticks(list(branch_positions.values()))
ax.set_yticklabels(list(branch_positions.keys()))
ax.set_xlabel("Date")
ax.grid(axis="x", linestyle="--", alpha=0.5)
plt.xticks(rotation=45)
plt.title("Gitflow of the Repository", fontsize=14)
plt.savefig(output_png)