Analizing OpenStack code
Tools
- MySQL
List of repositories tracked
- keystone
- nova
- horizon
- quantum
- swift
- glance
- openstack-manuals
- compute-api
- identity-api
- image-api
- netconn-api
- object-api
- tempest
- python-glanceclient
- python-novaclient
- python-quantumclient
- python-keystoneclient
How to do it
Installing cvsanaly2 from https://projects.libresoft.es/projects/cvsanaly/wiki Clone all the repositories listed on https://github.com/openstack/:
$ git clone git://.... [glance|swift|nova...]
Then create the databases for CVSanaly2 to store the logs in
$ mysqladmin -u mlstats -p create openstackrepos
And run cvsanaly2 on each of the repositories:
~/projects/swift$ cvsanaly2 --extensions Metrics,FileTypes,CommitsLOC -u mlstats -p xxxxxxx -d openstackrepos Parsing log for swift/ Executing extensions Executing extension FileTypes Executing extension Metrics
Relevant queries
Exclude from results system authors Jenkins and Gerrit. Database schema and description of the tables.
History of changes per author
SELECT
DATE_FORMAT(scmlog.`date`,'%X-%v') AS Date,
sum(`commits_lines`.`added`) AS Added,
sum(`commits_lines`.`removed`) AS Removed,
`people`.`name` as Author,
`people`.`email` as Email
FROM
`commits_lines` INNER JOIN `scmlog` ON `commits_lines`.`commit_id` = `scmlog`.`id`
INNER JOIN `repositories` ON `scmlog`.`repository_id` = `repositories`.`id`
INNER JOIN `people` ON `scmlog`.`author_id` = `people`.`id`
WHERE
repositories.`name` IN (${REPOSITORY})
AND `people`.`name` NOT LIKE 'Tarmac'
AND `people`.`name` NOT LIKE 'Jenkins'
AND `people`.`name` NOT LIKE 'Gerrit'
AND `scmlog`.`date` <= ${ENDDATE}
AND `scmlog`.`date` >= ${STARTDATE}
AND Added > '0'
AND Removed > '0'
GROUP BY
`people`.`name`
ORDER BY
Date DESC,
Added DESC,
Removed DESCUsed for weekly report.
Changes in repositories (lines added/removed)
SELECT
date_format(s.date,'%Y-%v') AS Date,
sum(l.added) AS Added,
sum(l.removed) AS Removed,
p.`name` AS Author,
p.`email` AS Email
FROM
`scmlog` s INNER JOIN `people` p ON s.`author_id` = p.`id`
INNER JOIN `commits_lines` l ON s.`id` = l.`commit_id`
INNER JOIN `repositories` r ON s.`repository_id` = r.`id`
WHERE
s.id IN ( SELECT DISTINCT (actions.commit_id) FROM `actions` WHERE branch_id = 1 )
and r.name IN (${REPOSITORY})
and s.date <= ${ENDDATE}
and s.date >= ${STARTDATE}
and s.author_id NOT IN (6,31,32,153,271)
GROUP BY
Author
ORDER BY
date DESC,
added DESC,
Removed DESCUsed in weekly reports.
Top 10 authors per number of commits
SELECT
count(scmlog.`id`) AS commits,
`people`.`name` AS author
FROM
`scmlog` INNER JOIN `people` ON `scmlog`.`author_id` = `people`.`id`
INNER JOIN `repositories` repositories ON `scmlog`.`repository_id` = repositories.`id`
WHERE
repositories.`name` IN (${REPOSITORY})
AND `people`.`name` NOT LIKE 'Tarmac'
AND `people`.`name` NOT LIKE 'Jenkins'
AND `people`.`name` NOT LIKE 'Gerrit'
and `scmlog`.`date` <= ${ENDDATE}
And `scmlog`.`date` >= ${STARTDATE}
GROUP BY
committer_id
ORDER BY
commits DESC
LIMIT 10Used for monthly report.
Total number of commits per repository in past 30 days
- View: who commited to any repo, how many times, in the 30 days
SELECT
Count(scmlog.date) AS Count,
`people`.`name` AS Name,
`repositories`.`name` AS RepoName,
`people`.`email` AS email
FROM
`scmlog` INNER JOIN `people` ON `scmlog`.`author_id` = `people`.`id`
INNER JOIN `repositories` ON `scmlog`.`repository_id` = `repositories`.`id`
WHERE
date_sub(curdate(),interval 30 day) <= `scmlog`.`date`
AND `people`.`name` NOT LIKE 'jenkins%'
AND `people`.`name` NOT LIKE 'gerrit%'
GROUP BY
people.name
ORDER BY
Count DESCSee the demo report
Total number of commits for each repository, per month
SELECT
count(scmlog.rev) AS `Total_Commits`,
date_format(`scmlog`.`date`,'%Y/%M') AS Month,
`repositories`.`name` AS Repository,
`scmlog`.`date` AS Date
FROM
`scmlog` INNER JOIN `repositories` ON `scmlog`.`repository_id` = `repositories`.`id`
GROUP BY
month,
name
ORDER BY
date ASC
Total number of commits per month
SELECT
count(scmlog.rev),
date_format(`scmlog`.`date`, '%Y/%M') as Month,
`scmlog`.`date` as Date
FROM
`scmlog` INNER JOIN `repositories` ON `scmlog`.`repository_id` = `repositories`.`id`
group by
month
order by
date ASC
Total number of commits per author in total
SELECT
count(`scmlog`.`rev`) AS Commits,
`people`.`email` AS Email,
`people`.`name` AS Name
FROM
`scmlog` INNER JOIN `people` ON `scmlog`.`author_id` = `people`.`id`
WHERE
date_sub(curdate(),interval 30 day) <= `scmlog`.`date`
AND `people`.`name` NOT LIKE 'jenkins%'
AND `people`.`name` NOT LIKE 'gerrit%'
GROUP BY
name
ORDER BY
Commits DESC
LIMIT 10
Using GNU R to produce graphs
After starting R, connect to MySQL database:
library(RMySQL)
con <- dbConnect(dbDriver("MySQL"), user="mlstats", password="xxxxx", dbname="openstackrepos")
Number of commits per month
query <- "SELECT date_format(s.date, '%m/%Y') date, count(s.id) commits
FROM scmlog s group by date_format(s.date,'%Y%m');"
results <- dbGetQuery(con,query)
evol_commits <- ts(results$commits, start=c(2010,7), freq=12)
png("nova_commits_month.png")
plot(evol_commits, type="l", xlab="Date", ylab="Commits",
main="Number of commits per month - NOVA")
query_avg <- "SELECT AVG(g.numcommits)
FROM
( SELECT date_format(s.date, '%Y') myyear,
date_format(s.date, '%m') mymonth, count(s.id) numcommits
FROM scmlog s
GROUP BY date_format(s.date,'%Y%m') ) g;"
result_avg <- dbGetQuery(con,query_avg)
qqline(result_avg, col="blue", lty=2)
query_max_min <- "SELECT MAX(g.numcommits) as max, MIN(g.numcommits) as min
FROM
( SELECT date_format(s.date, '%Y') myyear,
date_format(s.date, '%m') mymonth,
count(s.id) numcommits
FROM scmlog s
GROUP BY date_format(s.date,'%Y%m') ) g;"
result_max_min <- dbGetQuery(con,query_max_min)
qqline(result_max_min$max, col="red", lty=2)
qqline(result_max_min$min, col="green", lty=2)
legend("topright", inset=.05, c("average","maximun","minimun"),
fill=c("blue","red","green"))
dev.off()
Number of Commits per author
query <- "SELECT p.name author, count(s.id) commits
FROM scmlog s LEFT JOIN people p ON s.committer_id=p.id
GROUP BY committer_id ORDER BY commits;"
results <- dbGetQuery(con,query)
plot(results$commits, xlab="Author", ylab="Commits",
main="Number of commits by author")
Number of commits by author per year
query <- "
SELECT year, name, num
FROM
(SELECT date_format(s.date, '%Y') AS year, p.name AS name, count(s.id) AS num
FROM scmlog s LEFT JOIN people p ON s.committer_id=p.id
GROUP BY year, name
ORDER BY year, num) g
WHERE g.num > 10;
"
results <- dbGetQuery(con,query)
results$year <- factor(results$year)
query_names <- "
SELECT DISTINCT(name)
FROM
(SELECT date_format(s.date, '%Y') AS year, p.name AS name, count(s.id) AS num
FROM scmlog s LEFT JOIN people p ON s.committer_id=p.id
GROUP BY year, name
ORDER BY year, num) g
WHERE g.num > 10;
"
names <- dbGetQuery(con,query_names)
col <- 1
for (i in names$name) {
results$color[results$name==i] <- col
col <- col + 1
}
dotchart(results$num, groups=results$year, labels=results$name,
color=results$color, cex=.7, xlab="Number of commits",
main="Commits by author per year")
Aggregated number of commits up to now
query <- "SELECT g.myyear, g.mymonth, g.numcommits,
(@sumacu:=@sumacu+g.numcommits) aggregated_numcommits
FROM
(SELECT @sumacu:=0) r, (SELECT date_format(s.date, '%Y') myyear,
date_format(s.date, '%m') mymonth,
COUNT(s.id) numcommits
FROM scmlog s
GROUP BY date_format(s.date,'%Y%m')) g;"
results <- dbGetQuery(con,query)
evol_num_commits <- ts(results$aggregated_numcommits, start=c(2010,10), freq=12)
plot(evol_num_commits, type="h", xlab="Date", ylab="Commits",
main="Aggregated number of commits", col = "dark blue")In order to save an image of the plot use
png("<filename>.png")
plot <command>
dev.off()
Using Pentaho Report to produce graphs
Lots of useful info on http://wiki.meego.com/Metrics/Creating_a_report
Remember to modify $PENTAHO_HOME/biserver-ce/pentaho-solutions/system/publisher_config.xml and put the password you'll use to publish reports from the Report Designer.
Lots of info to install, configure, secure, customize Pentaho Server on https://interestingittips.wordpress.com/pentaho