CommunityMetrics/Code
< CommunityMetrics
Revision as of 22:16, 17 October 2011 by StefanoMaffulli (talk)
Analizing OpenStack code
Tools
- cvsanaly
- MySQL
- Pentaho Report
- GNU R
Installing cvsanaly2 from https://projects.libresoft.es/projects/cvsanaly/wiki Clone all the repositories listed on https://github.com/openstack/:
$ git clone git://.... [glance|swift|nova...]
Then create the databases for CVSanaly2 to store the logs in
$ mysqladmin -u mlstats -p create openstackrepos
And run cvsanaly2 on each of the repositories:
~/projects/swift$ cvsanaly2 --extensions Metrics,FileTypes,CommitsLOC -u mlstats -p xxxxxxx -d openstackrepos Parsing log for swift/ Executing extensions Executing extension FileTypes Executing extension Metrics
Using GNU R to produce graphs
After starting R, connect to MySQL database:
library(RMySQL) con <- dbConnect(dbDriver("MySQL"), user="mlstats", password="xxxxx", dbname="openstackrepos")
Number of commits per month
query <- "SELECT date_format(s.date, '%m/%Y') date, count(s.id) commits FROM scmlog s group by date_format(s.date,'%Y%m');" results <- dbGetQuery(con,query) evol_commits <- ts(results$commits, start=c(2010,7), freq=12) png("nova_commits_month.png") plot(evol_commits, type="l", xlab="Date", ylab="Commits", main="Number of commits per month - NOVA") query_avg <- "SELECT AVG(g.numcommits) FROM ( SELECT date_format(s.date, '%Y') myyear, date_format(s.date, '%m') mymonth, count(s.id) numcommits FROM scmlog s GROUP BY date_format(s.date,'%Y%m') ) g;" result_avg <- dbGetQuery(con,query_avg) qqline(result_avg, col="blue", lty=2) query_max_min <- "SELECT MAX(g.numcommits) as max, MIN(g.numcommits) as min FROM ( SELECT date_format(s.date, '%Y') myyear, date_format(s.date, '%m') mymonth, count(s.id) numcommits FROM scmlog s GROUP BY date_format(s.date,'%Y%m') ) g;" result_max_min <- dbGetQuery(con,query_max_min) qqline(result_max_min$max, col="red", lty=2) qqline(result_max_min$min, col="green", lty=2) legend("topright", inset=.05, c("average","maximun","minimun"), fill=c("blue","red","green")) dev.off()
Number of Commits per author
query <- "SELECT p.name author, count(s.id) commits FROM scmlog s LEFT JOIN people p ON s.committer_id=p.id GROUP BY committer_id ORDER BY commits;" results <- dbGetQuery(con,query) plot(results$commits, xlab="Author", ylab="Commits", main="Number of commits by author")
Number of commits by author per year
query <- " SELECT year, name, num FROM (SELECT date_format(s.date, '%Y') AS year, p.name AS name, count(s.id) AS num FROM scmlog s LEFT JOIN people p ON s.committer_id=p.id GROUP BY year, name ORDER BY year, num) g WHERE g.num > 10; " results <- dbGetQuery(con,query) results$year <- factor(results$year) query_names <- " SELECT DISTINCT(name) FROM (SELECT date_format(s.date, '%Y') AS year, p.name AS name, count(s.id) AS num FROM scmlog s LEFT JOIN people p ON s.committer_id=p.id GROUP BY year, name ORDER BY year, num) g WHERE g.num > 10; " names <- dbGetQuery(con,query_names) col <- 1 for (i in names$name) { results$color[results$name==i] <- col col <- col + 1 } dotchart(results$num, groups=results$year, labels=results$name, color=results$color, cex=.7, xlab="Number of commits", main="Commits by author per year")
Aggregated number of commits up to now
query <- "SELECT g.myyear, g.mymonth, g.numcommits, (@sumacu:=@sumacu+g.numcommits) aggregated_numcommits FROM (SELECT @sumacu:=0) r, (SELECT date_format(s.date, '%Y') myyear, date_format(s.date, '%m') mymonth, COUNT(s.id) numcommits FROM scmlog s GROUP BY date_format(s.date,'%Y%m')) g;" results <- dbGetQuery(con,query) evol_num_commits <- ts(results$aggregated_numcommits, start=c(2010,10), freq=12) plot(evol_num_commits, type="h", xlab="Date", ylab="Commits", main="Aggregated number of commits", col = "dark blue")
In order to save an image of the plot use
png("<filename>.png") plot <command> dev.off()