@conference {839, title = {Amassing and indexing a large sample of version control systems: towards the census of public source code history}, booktitle = {6th IEEE Working Conference on Mining Software Repositories}, year = {2009}, month = {May 16{\textendash}17}, abstract = {The source code and its history represent the output and process of software development activities and are an in- valuable resource for study and improvement of software development practice. While individual projects and groups of projects have been extensively analyzed, some fundamental questions, such as the spread of innovation or genealogy of the source code, can be answered only by considering the entire universe of publicly available source code and its history. We describe methods we developed over the last six years to gather, index, and update an approximation of such a universal repository for publicly accessible version control systems and for the source code inside a large corporation. While challenging, the task is achievable with limited resources. The bottlenecks in network bandwidth, processing, and disk access can be dealt with using inherent parallelism of the tasks and suitable tradeoffs between the amount of storage and computations, but a completely automated discovery of public version control systems may require enticing participation of the sampled projects. Such universal repository would allow studies of global properties and origins of the source code that are not possible through other means.}, keywords = {bazaar, cvs, flossmole, git, mercurial, source code, sourceforge, subversion, version control}, attachments = {https://flossmole.org/system/files/11amassing.pdf}, author = {Audris Mockus} } @conference {43, title = {Are FLOSS developers committing to CVS/SVN as much as they are talking in mailing lists?}, booktitle = {Workshop on Public Data about Software Development}, year = {2008}, month = {09/2008}, address = {Milan, IT}, author = {Sowe, S. and Samolades, I. and Stamelos, I. and Angelis, L.} } @conference {44, title = {Author entropy: A metric for characterization of software authorship patterns}, booktitle = {Workshop on Public Data about Software Development}, year = {2008}, month = {09/2008}, address = {Milan, IT}, author = {Taylor, Q. and Stevenson, J.E. and Delorey, D.P. and Knutson, C.D.} } @article {16, title = {Assessing the Health of Open Source Communities}, journal = {Computer}, volume = {39}, year = {2006}, month = {05/2006}, pages = {89 - 91}, issn = {0018-9162}, doi = {10.1109/MC.2006.152}, author = {Crowston, Kevin and Howison, James} }