* Git utils.␊ |
*␊ |
*/␊ |
class IDF_Scm_Git␊ |
class IDF_Scm_Git extends IDF_Scm␊ |
{␊ |
public $repo = '';␊ |
public $mediumtree_fmt = 'commit %H%nAuthor: %an <%ae>%nTree: %T%nDate: %ai%n%n%s%n%n%b';␊ |
␊ |
␊ |
public function __construct($repo)␊ |
{␊ |
$this->repo = $repo;␊ |
}␊ |
␊ |
/* ============================================== *␊ |
* *␊ |
* Common Methods Implemented By All The SCMs *␊ |
* *␊ |
* ============================================== */ ␊ |
␊ |
public function isAvailable()␊ |
{␊ |
try {␊ |
$this->getBranches();␊ |
} catch (IDF_Scm_Exception $e) {␊ |
return false;␊ |
}␊ |
return true;␊ |
}␊ |
␊ |
public function getBranches()␊ |
{␊ |
if (isset($this->cache['branches'])) {␊ |
return $this->cache['branches'];␊ |
}␊ |
$cmd = Pluf::f('idf_exec_cmd_prefix', '')␊ |
.sprintf('GIT_DIR=%s '.Pluf::f('git_path', 'git').' branch', ␊ |
escapeshellarg($this->repo));␊ |
exec($cmd, $out, $return);␊ |
if ($return != 0) {␊ |
throw new IDF_Scm_Exception(sprintf($this->error_tpl,␊ |
$cmd, $return, ␊ |
implode("\n", $out)));␊ |
}␊ |
$res = array();␊ |
foreach ($out as $b) {␊ |
$res[] = substr($b, 2);␊ |
}␊ |
$this->cache['branches'] = $res;␊ |
return $res;␊ |
}␊ |
␊ |
public function getMainBranch()␊ |
{␊ |
return 'master';␊ |
}␊ |
␊ |
/**␊ |
* Git "tree" is not the same as the tree we get here.␊ |
*␊ |
* With git each commit object stores a related tree object. This␊ |
* tree is basically providing what is in the given folder at the␊ |
* given commit. It looks something like that:␊ |
*␊ |
* <pre>␊ |
* 100644 blob bcd155e609c51b4651aab9838b270cce964670af␉AUTHORS␊ |
* 100644 blob 87b44c5c7df3cc90c031317c1ac8efcfd8a13631␉COPYING␊ |
* 100644 blob 2a0f899cbfe33ea755c343b06a13d7de6c22799f␉INSTALL.mdtext␊ |
* 040000 tree 2f469c4c5318aa4ad48756874373370f6112f77b␉doc␊ |
* 040000 tree 911e0bd2706f0069b04744d6ef41353faf06a0a7␉logo␊ |
* </pre>␊ |
*␊ |
* You can then follow what is in the given folder (let say doc)␊ |
* by using the hash.␊ |
*␊ |
* This means that you will have not to confuse the git tree and␊ |
* the output tree in the following method.␊ |
*␊ |
* @see http://www.kernel.org/pub/software/scm/git/docs/git-ls-tree.html␊ |
*␊ |
*/␊ |
public function getTree($commit, $folder='/', $branch=null)␊ |
{␊ |
$folder = ($folder == '/') ? '' : $folder;␊ |
// now we grab the info about this commit including its tree.␊ |
$co = $this->getCommit($commit);␊ |
if ($folder) {␊ |
// As we are limiting to a given folder, we need to find␊ |
// the tree corresponding to this folder.␊ |
$tinfo = $this->getTreeInfo($commit, $folder); ␊ |
if (isset($tinfo[0]) and $tinfo[0]->type == 'tree') {␊ |
$tree = $tinfo[0]->hash;␊ |
} else {␊ |
throw new Exception(sprintf(__('Folder %1$s not found in commit %2$s.'), $folder, $commit));␊ |
}␊ |
} else {␊ |
$tree = $co->tree;␊ |
}␊ |
$res = array();␊ |
foreach ($this->getTreeInfo($tree) as $file) {␊ |
// Now we grab the files in the current tree with as much␊ |
// information as possible.␊ |
if ($file->type == 'blob') {␊ |
$file->date = $co->date;␊ |
$file->log = '----'; ␊ |
$file->author = 'Unknown';␊ |
}␊ |
$file->fullpath = ($folder) ? $folder.'/'.$file->file : $file->file;␊ |
if ($file->type == 'commit') {␊ |
// We have a submodule␊ |
$file = $this->getSubmodule($file, $commit);␊ |
}␊ |
$res[] = $file;␊ |
}␊ |
// Grab the details for each blob and return the list.␊ |
return $this->getTreeDetails($res);␊ |
}␊ |
␊ |
/**␊ |
* Given the string describing the author from the log find the␊ |
* author in the database.␊ |
|
*/␊ |
public function filesAtCommit($commit='HEAD', $folder='')␊ |
{␊ |
// now we grab the info about this commit including its tree.␊ |
$co = $this->getCommit($commit);␊ |
if ($folder) {␊ |
// As we are limiting to a given folder, we need to find␊ |
// the tree corresponding to this folder.␊ |
$found = false;␊ |
foreach ($this->getTreeInfo($co->tree, true, $folder) as $file) {␊ |
if ($file->type == 'tree' and $file->file == $folder) {␊ |
$found = true;␊ |
$tree = $file->hash;␊ |
break;␊ |
}␊ |
}␊ |
if (!$found) {␊ |
throw new Exception(sprintf(__('Folder %1$s not found in commit %2$s.'), $folder, $commit));␊ |
}␊ |
} else {␊ |
$tree = $co->tree;␊ |
}␊ |
$res = array();␊ |
// get the raw log corresponding to this commit to find the␊ |
// origin of each file.␊ |
$rawlog = array();␊ |
$cmd = sprintf('GIT_DIR=%s '.Pluf::f('git_path', 'git').' log --raw --abbrev=40 --pretty=oneline -5000 %s',␊ |
escapeshellarg($this->repo), escapeshellarg($commit));␊ |
IDF_Scm::exec($cmd, $rawlog);␊ |
// We reverse the log to be able to use a fixed efficient␊ |
// regex without back tracking.␊ |
$rawlog = implode("\n", array_reverse($rawlog));␊ |
foreach ($this->getTreeInfo($tree, false) as $file) {␊ |
// Now we grab the files in the current tree with as much␊ |
// information as possible.␊ |
$matches = array();␊ |
if ($file->type == 'blob' and preg_match('/^\:\d{6} \d{6} [0-9a-f]{40} '.$file->hash.' .*^([0-9a-f]{40})/msU',␊ |
$rawlog, $matches)) {␊ |
$fc = $this->getCommit($matches[1]);␊ |
$file->date = $fc->date;␊ |
$file->log = $fc->title;␊ |
$file->author = $fc->author;␊ |
} else if ($file->type == 'blob') {␊ |
$file->date = $co->date;␊ |
$file->log = '----'; ␊ |
$file->author = 'Unknown';␊ |
}␊ |
$file->fullpath = ($folder) ? $folder.'/'.$file->file : $file->file;␊ |
if ($file->type == 'commit') {␊ |
// We have a submodule␊ |
$file = $this->getSubmodule($file, $commit);␊ |
}␊ |
$res[] = $file;␊ |
}␊ |
return $res;␊ |
}␊ |
␊ |
/**␊ |
|
*␊ |
* @param string Tree hash ␊ |
* @param bool Do we recurse in subtrees (true)␊ |
* @param string Folder in which we want to get the info ('')␊ |
* @return array Array of file information.␊ |
*/␊ |
public function getTreeInfo($tree, $recurse=true, $folder='')␊ |
public function getTreeInfo($tree, $folder='')␊ |
{␊ |
if ('tree' != $this->testHash($tree)) {␊ |
if (!in_array($this->testHash($tree), array('tree', 'commit'))) {␊ |
throw new Exception(sprintf(__('Not a valid tree: %s.'), $tree));␊ |
}␊ |
$cmd_tmpl = 'GIT_DIR=%s '.Pluf::f('git_path', 'git').' ls-tree%s -t -l %s %s';␊ |
$cmd = sprintf($cmd_tmpl, ␊ |
escapeshellarg($this->repo), ␊ |
($recurse) ? ' -r' : '',␊ |
escapeshellarg($tree), escapeshellarg($folder));␊ |
$cmd_tmpl = 'GIT_DIR=%s '.Pluf::f('git_path', 'git').' ls-tree -l %s %s';␊ |
$cmd = Pluf::f('idf_exec_cmd_prefix', '')␊ |
.sprintf($cmd_tmpl, escapeshellarg($this->repo), ␊ |
escapeshellarg($tree), escapeshellarg($folder));␊ |
$out = array();␊ |
$res = array();␊ |
IDF_Scm::exec($cmd, $out);␊ |
exec($cmd, $out);␊ |
foreach ($out as $line) {␊ |
list($perm, $type, $hash, $size, $file) = preg_split('/ |\t/', $line, 5, PREG_SPLIT_NO_EMPTY);␊ |
$res[] = (object) array('perm' => $perm, 'type' => $type, ␊ |
|
escapeshellarg($request_file_info->hash)));␊ |
}␊ |
␊ |
/**␊ |
* Get the branches.␊ |
*␊ |
* @return array Branches.␊ |
*/␊ |
public function getBranches()␊ |
{␊ |
$out = array();␊ |
IDF_Scm::exec(sprintf('GIT_DIR=%s '.Pluf::f('git_path', 'git').' branch', ␊ |
escapeshellarg($this->repo)), $out);␊ |
$res = array();␊ |
foreach ($out as $b) {␊ |
$res[] = substr($b, 2);␊ |
}␊ |
return $res;␊ |
}␊ |
␊ |
/**␊ |
* Get commit details.␊ |
*␊ |
* @param string Commit ('HEAD').␊ |
* @param bool Get commit diff (false).␊ |
* @return array Changes.␊ |
* @param string Commit␊ |
* @param bool Get commit diff (false)␊ |
* @return array Changes␊ |
*/␊ |
public function getCommit($commit='HEAD', $getdiff=false)␊ |
public function getCommit($commit, $getdiff=false)␊ |
{␊ |
if ($getdiff) {␊ |
$cmd = sprintf('GIT_DIR=%s '.Pluf::f('git_path', 'git').' show --date=iso --pretty=format:%s %s',␊ |
|
escapeshellarg($commit));␊ |
}␊ |
$out = array();␊ |
IDF_Scm::exec($cmd, $out);␊ |
exec($cmd, $out);␊ |
$log = array();␊ |
$change = array();␊ |
$inchange = false;␊ |
|
public function getSubmodule($file, $commit)␊ |
{␊ |
$file->type = 'extern';␊ |
$file->extern = '';␊ |
$info = $this->getFileInfo('.gitmodules', $commit);␊ |
if ($info == false) {␊ |
return $file;␊ |
|
$gitmodules = $this->getBlob($info);␊ |
if (preg_match('#\[submodule\s+\"'.$file->fullpath.'\"\]\s+path\s=\s(\S+)\s+url\s=\s(\S+)#mi', $gitmodules, $matches)) {␊ |
$file->extern = $matches[2];␊ |
}␊ |
}␊ |
return $file;␊ |
}␊ |
␊ |
/**␊ |
* Foreach file in the tree, find the details.␊ |
*␊ |
* @param array Tree information␊ |
* @return array Updated tree information␊ |
*/␊ |
public function getTreeDetails($tree)␊ |
{␊ |
$n = count($tree);␊ |
$details = array();␊ |
for ($i=0;$i<$n;$i++) {␊ |
if ($tree[$i]->type == 'blob') {␊ |
$details[$tree[$i]->hash] = $i;␊ |
}␊ |
}␊ |
if (!count($details)) {␊ |
return $tree;␊ |
}␊ |
$res = $this->getCachedBlobInfo($details);␊ |
$toapp = array();␊ |
foreach ($details as $blob => $idx) {␊ |
if (isset($res[$blob])) {␊ |
$tree[$idx]->date = $res[$blob]->date;␊ |
$tree[$idx]->log = $res[$blob]->title;␊ |
$tree[$idx]->author = $res[$blob]->author;␊ |
} else {␊ |
$toapp[$blob] = $idx;␊ |
}␊ |
}␊ |
if (count($toapp)) {␊ |
$res = $this->appendBlobInfoCache($toapp);␊ |
foreach ($details as $blob => $idx) {␊ |
if (isset($res[$blob])) {␊ |
$tree[$idx]->date = $res[$blob]->date;␊ |
$tree[$idx]->log = $res[$blob]->title;␊ |
$tree[$idx]->author = $res[$blob]->author;␊ |
}␊ |
}␊ |
}␊ |
return $tree;␊ |
}␊ |
␊ |
/**␊ |
* Append build info cache.␊ |
*␊ |
* The append method tries to get only the necessary details, so␊ |
* instead of going through all the commits one at a time, it will␊ |
* try to find a smarter way with regex.␊ |
*␊ |
* @see self::buildBlobInfoCache␊ |
*␊ |
* @param array The blob for which we need the information␊ |
* @return array The information␊ |
*/␊ |
public function appendBlobInfoCache($blobs)␊ |
{␊ |
$rawlog = array();␊ |
$cmd = Pluf::f('idf_exec_cmd_prefix', '')␊ |
.sprintf('GIT_DIR=%s '.Pluf::f('git_path', 'git').' log --raw --abbrev=40 --pretty=oneline -5000 --skip=%%s',␊ |
escapeshellarg($this->repo));␊ |
$skip = 0;␊ |
$res = array();␊ |
exec(sprintf($cmd, $skip), $rawlog);␊ |
while (count($rawlog) and count($blobs)) {␊ |
$rawlog = implode("\n", array_reverse($rawlog));␊ |
foreach ($blobs as $blob => $idx) {␊ |
if (preg_match('/^\:\d{6} \d{6} [0-9a-f]{40} '␊ |
.$blob.' .*^([0-9a-f]{40})/msU',␊ |
$rawlog, $matches)) {␊ |
$fc = $this->getCommit($matches[1]);␊ |
$res[$blob] = (object) array('hash' => $blob,␊ |
'date' => $fc->date,␊ |
'title' => $fc->title,␊ |
'author' => $fc->author);␊ |
unset($blobs[$blob]);␊ |
}␊ |
}␊ |
$rawlog = array();␊ |
$skip += 5000;␊ |
if ($skip > 20000) {␊ |
// We are in the case of the import of a big old␊ |
// repository, we can store as unknown the commit info␊ |
// not to try to retrieve them each time.␊ |
foreach ($blobs as $blob => $idx) {␊ |
$res[$blob] = (object) array('hash' => $blob,␊ |
'date' => '0',␊ |
'title' => '----',␊ |
'author' => 'Unknown');␊ |
}␊ |
break;␊ |
}␊ |
exec(sprintf($cmd, $skip), $rawlog);␊ |
}␊ |
$this->cacheBlobInfo($res);␊ |
return $res;␊ |
}␊ |
␊ |
/**␊ |
* Build the blob info cache.␊ |
*␊ |
* We build the blob info cache 500 commits at a time. ␊ |
*/␊ |
public function buildBlobInfoCache()␊ |
{␊ |
$rawlog = array();␊ |
$cmd = Pluf::f('idf_exec_cmd_prefix', '')␊ |
.sprintf('GIT_DIR=%s '.Pluf::f('git_path', 'git').' log --raw --abbrev=40 --pretty=oneline -500 --skip=%%s',␊ |
escapeshellarg($this->repo));␊ |
$skip = 0;␊ |
exec(sprintf($cmd, $skip), $rawlog);␊ |
while (count($rawlog)) {␊ |
$commit = '';␊ |
$data = array();␊ |
foreach ($rawlog as $line) {␊ |
if (substr($line, 0, 1) != ':') {␊ |
$commit = $this->getCommit(substr($line, 0, 40));␊ |
continue;␊ |
}␊ |
$blob = substr($line, 56, 40);␊ |
$data[] = (object) array('hash' => $blob,␊ |
'date' => $commit->date,␊ |
'title' => $commit->title,␊ |
'author' => $commit->author);␊ |
}␊ |
$this->cacheBlobInfo($data);␊ |
$rawlog = array();␊ |
$skip += 500;␊ |
exec(sprintf($cmd, $skip), $rawlog);␊ |
}␊ |
}␊ |
␊ |
/**␊ |
* Get blob info.␊ |
*␊ |
* When we display the tree, we want to know when a given file was␊ |
* created, who was the author and at which date. This is a very␊ |
* slow operation for git as we need to go through the full␊ |
* history, find when then blob was introduced, then grab the␊ |
* corresponding commit. This is why we need a cache.␊ |
*␊ |
* @param array List as keys of blob hashs to get info for␊ |
* @return array Hash indexed results, when not found not set␊ |
*/␊ |
public function getCachedBlobInfo($hashes)␊ |
{␊ |
$res = array();␊ |
$cache = Pluf::f('tmp_folder').'/IDF_Scm_Git-'.md5($this->repo).'.cache.db';␊ |
if (!file_exists($cache)) {␊ |
return $res;␊ |
}␊ |
$data = file_get_contents($cache);␊ |
if (false === $data) {␊ |
return $res;␊ |
}␊ |
$data = split(chr(30), $data);␊ |
foreach ($data as $rec) {␊ |
if (isset($hashes[substr($rec, 0, 40)])) {␊ |
//$tmp = split(chr(31), gzinflate(substr($rec, 40)), 3);␊ |
$tmp = split(chr(31), substr($rec, 40), 3);␊ |
$res[substr($rec, 0, 40)] = ␊ |
(object) array('hash' => substr($rec, 0, 40),␊ |
'date' => $tmp[0],␊ |
'title' => $tmp[2],␊ |
'author' => $tmp[1]);␊ |
}␊ |
}␊ |
return $res;␊ |
}␊ |
␊ |
/**␊ |
* Cache blob info.␊ |
* ␊ |
* Given a series of blob info, cache them.␊ |
*␊ |
* @param array Blob info␊ |
* @return bool Success␊ |
*/␊ |
public function cacheBlobInfo($info)␊ |
{␊ |
// Prepare the data␊ |
$data = array();␊ |
foreach ($info as $file) {␊ |
//$data[] = $file->hash.gzdeflate($file->date.chr(31).$file->author.chr(31).$file->title, 9);␊ |
$data[] = $file->hash.$file->date.chr(31).$file->author.chr(31).$file->title;␊ |
}␊ |
$data = implode(chr(30), $data).chr(30);␊ |
$cache = Pluf::f('tmp_folder').'/IDF_Scm_Git-'.md5($this->repo).'.cache.db';␊ |
$fp = fopen($cache, 'ab'); ␊ |
if ($fp) {␊ |
flock($fp, LOCK_EX); ␊ |
fwrite($fp, $data, strlen($data));␊ |
fclose($fp); // releases the lock too␊ |
return true;␊ |
}␊ |
return false;␊ |
}␊ |
} |