CommunityMetrics/Code
< CommunityMetrics
Revision as of 21:34, 17 October 2011 by StefanoMaffulli (talk)
Analizing OpenStack code
Tools
- cvsanaly
- Pentaho Report
- GNU R
Installing cvsanaly2 from https://projects.libresoft.es/projects/cvsanaly/wiki Clone all the repositories listed on https://github.com/openstack/:
$ git clone git://.... [glance|swift|nova...]
Then create the databases for CVSanaly2 to store the logs in
$ mysqladmin -u mlstats -p create openstackrepos
And run cvsanaly2 on each of the repositories:
~/projects/swift$ cvsanaly2 --extensions Metrics,FileTypes,CommitsLOC -u mlstats -p xxxxxxx -d openstackrepos Parsing log for swift/ Executing extensions Executing extension FileTypes Executing extension Metrics
Using GNU R to produce graphs
After starting R, connect to MySQL database:
library(RMySQL)
con <- dbConnect(dbDriver("MySQL"), user="mlstats", password="xxxxx", dbname="openstackrepos")
Number of commits per month
query <- "SELECT date_format(s.date, '%m/%Y') date, count(s.id) commits
FROM scmlog s group by date_format(s.date,'%Y%m');"
results <- dbGetQuery(con,query)
evol_commits <- ts(results$commits, start=c(2010,7), freq=12)
png("nova_commits_month.png")
plot(evol_commits, type="l", xlab="Date", ylab="Commits",
main="Number of commits per month - NOVA")
query_avg <- "SELECT AVG(g.numcommits)
FROM
( SELECT date_format(s.date, '%Y') myyear,
date_format(s.date, '%m') mymonth, count(s.id) numcommits
FROM scmlog s
GROUP BY date_format(s.date,'%Y%m') ) g;"
result_avg <- dbGetQuery(con,query_avg)
qqline(result_avg, col="blue", lty=2)
query_max_min <- "SELECT MAX(g.numcommits) as max, MIN(g.numcommits) as min
FROM
( SELECT date_format(s.date, '%Y') myyear,
date_format(s.date, '%m') mymonth,
count(s.id) numcommits
FROM scmlog s
GROUP BY date_format(s.date,'%Y%m') ) g;"
result_max_min <- dbGetQuery(con,query_max_min)
qqline(result_max_min$max, col="red", lty=2)
qqline(result_max_min$min, col="green", lty=2)
legend("topright", inset=.05, c("average","maximun","minimun"),
fill=c("blue","red","green"))
dev.off()
Number of Commits per author
query <- "SELECT p.name author, count(s.id) commits
FROM scmlog s LEFT JOIN people p ON s.committer_id=p.id
GROUP BY committer_id ORDER BY commits;"
results <- dbGetQuery(con,query)
plot(results$commits, xlab="Author", ylab="Commits",
main="Number of commits by author")
Number of commits by author per year
query <- "
SELECT year, name, num
FROM
(SELECT date_format(s.date, '%Y') AS year, p.name AS name, count(s.id) AS num
FROM scmlog s LEFT JOIN people p ON s.committer_id=p.id
GROUP BY year, name
ORDER BY year, num) g
WHERE g.num > 10;
"
results <- dbGetQuery(con,query)
results$year <- factor(results$year)
query_names <- "
SELECT DISTINCT(name)
FROM
(SELECT date_format(s.date, '%Y') AS year, p.name AS name, count(s.id) AS num
FROM scmlog s LEFT JOIN people p ON s.committer_id=p.id
GROUP BY year, name
ORDER BY year, num) g
WHERE g.num > 10;
"
names <- dbGetQuery(con,query_names)
col <- 1
for (i in names$name) {
results$color[results$name==i] <- col
col <- col + 1
}
dotchart(results$num, groups=results$year, labels=results$name,
color=results$color, cex=.7, xlab="Number of commits",
main="Commits by author per year")
Aggregated number of commits up to now
query <- "SELECT g.myyear, g.mymonth, g.numcommits,
(@sumacu:=@sumacu+g.numcommits) aggregated_numcommits
FROM
(SELECT @sumacu:=0) r, (SELECT date_format(s.date, '%Y') myyear,
date_format(s.date, '%m') mymonth,
COUNT(s.id) numcommits
FROM scmlog s
GROUP BY date_format(s.date,'%Y%m')) g;"
results <- dbGetQuery(con,query)
evol_num_commits <- ts(results$aggregated_numcommits, start=c(2010,10), freq=12)
plot(evol_num_commits, type="h", xlab="Date", ylab="Commits",
main="Aggregated number of commits", col = "dark blue")
In order to save an image of the plot use
png("<filename>.png")
plot <command>
dev.off()