Different Angles
Many roads lead to Rome
If you happen to know more than one programming language it can be a fun challange to solve the same programming task with more than one programming language. It can be both, a mental challenge and a chance to look differently at the same thing.
The Task
We have the following situation: A folder with subfolders with subfolders. What we want is to iterate over all the subfolders of every subfolder of the original folder and only keep the N newest, the others shall be deleted.
In my concrete real life situation the subfolders were Git branches and the subsubfolders were the commits of these branches.
The contestents will be: Python, C, Rust and Haskell
Disclaimer: I do not have equal skill levels for each of these languages, so it can be that my solutions are far from ideal in some cases. But that is not part of the challenge. We won’t look at speed, performance, memory footprint or alike. The only criterion is that the implementation does the job. Error handling is optional.
Solutions
Python
import sys
import os
from os import path
from shutil import rmtree
from argparse import ArgumentParser
def delete_subs(subs):
for sub in subs:
rmtree(sub)
def snd(tpl):
return tpl[1]
def fst(tpl):
return tpl[0]
def clean_dir(d, limit):
subs = []
for sub in os.listdir(d):
sub = path.join(d, sub)
try:
mtime = path.getctime(sub)
except OSError:
print("Couldn't find mtime for {} - skipping...".format(sub))
continue
subs.append((mtime,sub))
subs.sort(key=fst, reverse=True)
subs = map(snd, subs)
delete_subs(subs[limit:])
if __name__ == '__main__':
parser = ArgumentParser()
parser.add_argument('-n', '--number', type=int, nargs='?', metavar='N', default=5)
parser.add_argument('root', nargs=1)
args = parser.parse_args()
limit = args.number
root = args.root[0]
if not path.exists(root):
print("Folder does not exist: {}".format(root))
sys.exit(1)
for name in os.listdir(root):
d = path.join(root, name)
if path.isdir(d):
clean_dir(d, limit)
C-Code
#include <sys/types.h>
#include <sys/stat.h>
#include <unistd.h>
#include <dirent.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <ftw.h>
enum {
RET_OK=0,
RET_BAD_ARGUMENT,
RET_OPEN_ERR,
RET_NOT_FOUND,
RET_NO_PERMISSION,
RET_MALLOC_ERROR,
RET_LSTAT_ERROR
};
#define PATH_BUFFER 1024
#define ARRAY_SIZE 128
#define KEEP_LIMIT 3
static char* curr_path;
typedef struct entry {
long int timestamp;
char dir_name[PATH_BUFFER];
} entry_t;
static entry_t commits[ARRAY_SIZE];
static int entry_comp(const void*, const void*);
static int lookat(char *);
static int clean(char*, unsigned short);
static int append(char* path, size_t pos, char* add);
typedef struct FTW sFTW;
static int ftw_deleter(const char* fpath, const struct stat* sb, int typeflag, sFTW *ftwbuf);
static int delete_dir(const char* path);
extern int nftw(const char* dirpath, int(*fn) (const char* fpath, const struct stat* sb, int typeflag, sFTW* ftwbuf), int nopenfd, int flags);
int main(int argc, char* argv[])
{
int ret;
if (argc < 2) {
printf("Usage: %s dir_name\n", argv[0]);
exit(RET_BAD_ARGUMENT);
}
curr_path = malloc(PATH_BUFFER);
if (curr_path != NULL) {
strncpy(curr_path, argv[1], PATH_BUFFER);
ret = lookat(curr_path);
}
else {
ret = RET_MALLOC_ERROR;
}
return ret;
}
static int entry_comp(const void *elem1, const void *elem2) {
/*
* Custom sorting function
* Sort the entries by their modification timestamp
* The bigger the timestamp, the newer the commit.
* Sort by descending order.
*/
unsigned int t1 = ((entry_t*)elem1)->timestamp;
unsigned int t2 = ((entry_t*)elem2)->timestamp;
if (t1 > t2) return -1;
if (t1 < t2) return 1;
return 0;
}
static int ftw_deleter(const char* fpath, const struct stat* sb, int typeflag, sFTW* ftwbuf) {
int ret = 0;
if (typeflag == FTW_F) {
ret = remove(fpath);
}
return ret;
}
static int delete_dir(const char* path) {
int ret;
ret = nftw(path, ftw_deleter, 25, 0);
if ( ret == 0 ) {
ret = rmdir(path);
}
return ret;
}
static int lookat(char *path) {
int ret = RET_OK;
int r;
DIR* root;
struct dirent* dirp;
size_t path_len;
root = opendir(path);
if (root == NULL) {
ret = RET_OPEN_ERR;
goto end;
}
path_len = strlen(curr_path);
while ((dirp = readdir(root)) != NULL) {
if ( strcmp(dirp->d_name, ".") == 0 || strcmp(dirp->d_name, "..") == 0 ) {
continue;
}
#ifdef DEBUG
printf("\nFound branch %s\n", dirp->d_name);
#endif
append(curr_path, path_len, dirp->d_name);
r = clean(curr_path, KEEP_LIMIT);
if (r != RET_OK) {
ret = r;
goto end;
}
}
end:
closedir(root);
return ret;
}
static int clean(char* dirname, unsigned short limit) {
int ret = RET_OK;
DIR* dp;
struct dirent* dirp;
struct stat statbuf;
entry_t* commit;
size_t commit_idx;
size_t path_len;
dp = opendir(dirname);
if (dp == NULL) {
ret = RET_OPEN_ERR;
goto end;
}
path_len = strlen(curr_path);
commit_idx = 0;
while ((dirp = readdir(dp)) != NULL) {
if ( strcmp(dirp->d_name, ".") == 0 || strcmp(dirp->d_name, "..") == 0 ) {
continue;
}
append(curr_path, path_len, dirp->d_name);
if (lstat(curr_path, &statbuf) < 0) {
ret = RET_LSTAT_ERROR;
goto end;
}
commit = &commits[commit_idx++];
strncpy(commit->dir_name, curr_path, PATH_BUFFER);
commit->timestamp = statbuf.st_mtim.tv_sec;
#ifdef DEBUG
printf("\tFound commit %s with mtime %ld\n", dirp->d_name, statbuf.st_mtim.tv_sec);
#endif
}
qsort(commits, commit_idx, sizeof(entry_t), entry_comp);
int i;
#ifdef DEBUG
printf("After sorting\n");
for(i=0; i<commit_idx; i++) {
printf("\tCommit %s with mtime %ld\n", commits[i].dir_name, commits[i].timestamp);
}
#endif
if (commit_idx > KEEP_LIMIT) {
for (i=KEEP_LIMIT; i<commit_idx; i++) {
#ifdef DEBUG
printf("Deleting %s\n", commits[i].dir_name);
#endif
delete_dir(commits[i].dir_name);
}
}
end:
closedir(dp);
return ret;
}
static int append(char* path, size_t pos, char* add) {
path[pos++] = '/';
path[pos] = 0;
strncpy(&path[pos], add, PATH_BUFFER-pos);
return 0;
}
Rust
use std::fs::{self, DirEntry};
use std::path::Path;
use std::error::Error;
use std::io;
use std::time;
use std::fmt;
#[derive(Debug)]
enum UnstageError {
IO(io::Error),
SystemTime(time::SystemTimeError),
BadPath,
NoDirectory,
}
impl From<io::Error> for UnstageError {
fn from(err: io::Error) -> UnstageError {
UnstageError::IO(err)
}
}
impl From<time::SystemTimeError> for UnstageError {
fn from(err: time::SystemTimeError) -> UnstageError {
UnstageError::SystemTime(err)
}
}
impl fmt::Display for UnstageError {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match *self {
UnstageError::IO(ref err) => err.fmt(f),
UnstageError::SystemTime(ref err) => err.fmt(f),
UnstageError::BadPath => write!(f, "Problems converting Path to String."),
UnstageError::NoDirectory => write!(f, "Specified path is not a directory."),
}
}
}
impl Error for UnstageError {
fn description(&self) -> &str {
match *self {
UnstageError::IO(ref err) => err.description(),
UnstageError::SystemTime(ref err) => err.description(),
UnstageError::BadPath => "Couldn't convert path",
UnstageError::NoDirectory => "Not a directory",
}
}
fn cause(&self) -> Option<&Error> {
match *self {
UnstageError::IO(ref err) => Some(err),
UnstageError::SystemTime(ref err) => Some(err),
UnstageError::BadPath => None,
UnstageError::NoDirectory => None,
}
}
}
#[derive(Debug)]
struct StageInfo {
time: u64,
path: String,
}
fn main() {
let root = Path::new("test/test_folder");
if root.is_dir() {
for dir in root.read_dir().expect("failed to read directory") {
match dir {
Ok(dir) => {
let result = clear(&dir,3);
match result {
Ok(_) => {},
Err(e) => println!("{}", e.description()),
}
},
Err(e) => { println!("Error: {}", e.description()); },
}
}
} else {
println!("The provided root path does not point to a directory!");
}
}
fn clear(directory: &DirEntry, limit: usize) -> Result<(),UnstageError> {
let mut v:Vec<StageInfo> = Vec::new();
let path = directory.path();
let path = path.as_path();
if path.is_dir() {
for dir in path.read_dir().expect("Unable to read sub directory") {
let d = try!(dir);
let si = try!(get_stage_info(&d));
v.push(si);
}
v.sort_by_key(|ref x| {x.time});
let mut i = v.iter().skip(limit);
while let Some(si) = i.next() {
let s = &si.path;
let p = Path::new(s);
try!(fs::remove_dir_all(&p));
}
Ok(())
} else {
Err(UnstageError::NoDirectory)
}
}
fn get_stage_info(directory: &DirEntry) -> Result<StageInfo,UnstageError> {
let meta = try!(directory.metadata());
let modified = try!(meta.modified());
let t = try!(modified.elapsed()).as_secs();
let p = directory.path();
if let Some(p) = p.to_str() {
Ok(StageInfo {time: t, path: p.to_string()})
} else {
Err(UnstageError::BadPath)
}
}
Haskell
module Main where
import Control.Monad
import System.Directory
import System.Environment
import System.FilePath
import Data.Time.Clock
import Data.List
main = do
args <- getArgs
case args of
[] -> putStrLn "Usage: unstage directory"
(d:xs) -> do
putStrLn d
dirs <- getDirectoryContents d
let abs_dirs = map (d </>) (filter noDot dirs)
forM_ abs_dirs (sortAndDelete 3)
sortAndDelete limit branch = do
putStrLn branch
commit_infos <- getInfo branch
let delete_candidates = drop limit (sortBy sortFunc commit_infos)
forM_ delete_candidates (removeDirectoryRecursive . fst)
where
sortFunc a b = compare (snd b) (snd a)
getInfo branch = do
commits <- getDirectoryContents branch
let abs_commits = map (branch </>) (filter noDot commits)
mod_times <- mapM getModificationTime abs_commits
return (zip abs_commits mod_times)
noDot :: FilePath -> Bool
noDot "." = False
noDot ".." = False
noDot _ = True
Conclusion
The code snippets shown above should not be directly compared. The focus was on getting the job done. The maturity of the different versions is not identical. Differences can be found in argument handling, output and most significantly error handling. Many things are hard-coded.
Error handling wise the Rust version is the most complete. All kinds of things that could possibly go wrong are taken care of and the program will finish gracefully with a meaningfull error message. Thus the lengths of the programming snippets shouldn’t be taken too serious.
Yet it is obvious that different languages require a different level of detail from the programmer. As an example, Rust exposes all the possibilities of failure to the programmer due to the need of error handling, while somebody coding in Python probably is never aware of the fact that those kinda things could happen. In C many things need to be explicitly provided, while Python comes in its typical “batteries included”.
The choice of programming language, if one has the choice, should depend on what is aked from the application. Speed, reliability or fast development are some of the key factors. Ot maybe just for fun :)