/* * * Swabber - post processing of a * * This program is designed to process large log data quickly. * * 1. Data structures * * There are five major data structures: * * a) file records * * This is a bi-directional linked list of files that appear in the * file log. Nodes are defined by the file_record structure. * * Files can be found on the list using find_filename(), which * performs a binary search on the filename. * * Files can be added to the list using add_filename(). * * b) tags * * A tag is a string that is used to mark different parts of a build * (eg. "config", "apache", etc). Files contain a pointer to a list * of tags, to track what phases of the build accessed that file. * * Tags are stored in an unordered linked list of type tag_record. * Tags can be added using add_tag(). They can be found based on name * using find_tag(). * * c) packages * * Packages are host packages that were referenced. This is represented * as a fixed size array. * * d) Package files * * This is an pre-sorted array of files, and an associated package * number. * * e) filters * * Filters are patterns that are used to skip filenames (eg. "/etc*" * or the build directory). * * * 2. Operation * * load_distro() * Reads a .blob file to load up packages and package_files arrays. * * read_lines() * Reads the log files sequentially, and for each line terminating * with a '\n', calls process_line(). This then parses the line (in * the format of strace output) and for each relevant file access, * processes the file. * * process_filename() * Skips files that match a filter, found with filename_filter(). The * file is then added to the list using add_filename(), which first * does a binary search to see if the file has already been recorded. * The tag is added to the file record. * * A report is created using print_results(). * */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "swabber.h" #include "packages.h" #include "lists.h" #include "wandering.h" #include "canonicalize.h" #define FUNCTION_ACCESS 1 #define FUNCTION_OPEN 2 #define FUNCTION_EXEC 3 #define FUNCTION_STAT 4 #define FUNCTION_LSTAT 5 struct top_tag_node * top_tag_base = NULL; static struct package_record * packages; static int max_packages = 0; /* * package_files is the pointer to the array * of all the file records. * pfr_tagged is used to create a linked list * of just those file records that have tags. */ static struct package_file_record * package_files; static struct package_file_record * pfr_tagged; static int max_files = 0; struct file_record * file_record_base = NULL; static char * project_path = NULL; static char * filter_path = NULL; static char distro_name[DISTRO_NAME_LENGTH]; struct { unsigned int execve; unsigned int stat; unsigned int open; unsigned int skipped; unsigned int lines; unsigned int unparsed_lines; unsigned int non_zero; unsigned int packages; unsigned int packages_not_found; unsigned int log_not_started; unsigned int file_not_found; unsigned int filtered; unsigned int not_absolute; unsigned int signal; unsigned int unfinished; } stats = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; struct { unsigned int error_codes; unsigned int not_in_distro; unsigned int file_detail; unsigned int load_whitelist; unsigned int load_blacklist; unsigned int wandering; } task_list = {0, 0, 0, 0, 0, 0}; static struct list_item_base * whitelist_base = NULL; static struct list_item_base * blacklist_base = NULL; static struct list_item_base * filter_base = NULL; /* options */ static int start_logging = 0; static int ignore_until_tag = 1; static int verbose = 0; static int skip_distro = 0; static char global_tag[TAG_LENGTH]; static int have_global_tag = 0; static struct timeval first_time; static unsigned long elapsed; /* * Detect the host distribution. * cmd is the path of the path swabber itself, which helsp find the * shell script 'detect_distro'. */ static int detect_distro(const char * swabber_exec) { FILE *fpipe; char command[1024]; char * p; strncpy(command, swabber_exec, 1024); if ((p = strrchr(command, '/'))) { *p = '\0'; strncat(command, "/detect_distro", 1024); } else { strncpy(command, "detect_distro", 1024); } if ( !(fpipe = (FILE*)popen(command, "r")) ) { /* If fpipe is NULL */ perror("Problems with pipe"); goto error; } if ((fgets(distro_name, 1024, fpipe)) == NULL) goto error; if ((p = distro_name + strlen(distro_name))) *(p-1) = '\0'; pclose(fpipe); return 0; error: strncpy(distro_name, "Unknown", 1024); return -1; } static const char * basename(const char * s) { char *p; p = strrchr(s, '/'); if (p == NULL) return s; if (p == s + strlen(s)-1) /* Last char is / */ return NULL; return p+1; } void shrink_path_dirname(char * p) { unsigned int l = FILENAME_LENGTH; char p2[FILENAME_LENGTH]; char * p3; if (strlen(p) < l) l = strlen(p); if (strlen(project_path) < l) l = strlen(project_path); if (memcmp(p, project_path, l) == 0) { strncpy(p2, p+l, FILENAME_LENGTH); snprintf(p, FILENAME_LENGTH, "%s", p2); if ((p3 = strrchr(p, '/'))) *p3 = '\0'; } } static void report_title(FILE * f, unsigned int num, const char * s) { unsigned int i; char title_string[201]; snprintf(title_string, 200, "%d. Report for %s", num, s); fprintf(f, "%s\n", title_string); for (i = 0; i < strlen(title_string); i++) putc('-', f); putc('\n', f); } static void start_timer(void) { gettimeofday(&first_time, NULL); } static void stop_timer(void) { struct timeval second_time; unsigned long t1, t2; gettimeofday(&second_time, NULL); t1 = (first_time.tv_usec/1000) + (1000*first_time.tv_sec); t2 = (second_time.tv_usec/1000) + (1000*second_time.tv_sec); elapsed = (unsigned long) (t2-t1); } static void print_timer(FILE *f) { if ((elapsed/1000)> 60) { fprintf(f, "%ldm%02ld.%03lds", (elapsed / 60000), elapsed/60000, elapsed%1000); } else { fprintf(f, "%ld.%03lds", elapsed/1000, elapsed%1000); } } #if 0 static struct file_record * add_file_record_unique(struct file_record **base, const char * filename) { struct file_record * p; for (p = *base; p; p = p->next) { if (strcmp(filename, p->filename) == 0) return p; } p = malloc(sizeof(struct file_record)); p->count = 1; p->next = *base; sprintf(p->filename, "%s", filename); *base = p; return p; } #endif struct top_tag_node * add_top_tag(const char * tag_name, unsigned int chop) { struct top_tag_node * new_tag = malloc(sizeof(*new_tag)); memset(new_tag, '\0', sizeof(*new_tag)); strncpy(new_tag->tag_name, tag_name, strlen(tag_name)-chop); new_tag->next = top_tag_base; top_tag_base = new_tag; return new_tag; } static void print_tags(FILE * file, unsigned int package_number, unsigned int print_details) { struct tag_node { struct tag_node * next; struct tag_record * tag; struct tag_file_node * tfn; }; struct tag_file_node { struct package_file_record * file; struct tag_file_node * next; } * p_tfn, *new_tfn; struct tag_node * new, * base = NULL, *tn; struct tag_record * tr; struct package_file_record * pfr; int firsttime = 1; int tagnum = 0; #define tags_to_print 10 int printed = 0; struct package_record * p = &packages[package_number]; /* prints a sorted, unique set of tags from a package */ /* For all the files... */ for (pfr = pfr_tagged; pfr; pfr = pfr->file_next) { if (package_number != pfr->package_num) continue; /* For all its tags... */ for (tr = pfr->tag_base; tr; tr = tr->next) { /* first, find the tag */ for (tn = base; tn; tn = tn->next) { if (strcmp(tn->tag->tag, tr->tag) == 0) goto dontadd; } if (tr->checked) continue; /* Record the tag */ tr->checked = 1; new = malloc(sizeof(struct tag_node)); new->tag = tr; new->next = base; new->tfn = NULL; base = new; tn = new; dontadd: if (task_list.file_detail) { new_tfn = malloc(sizeof(struct tag_file_node)); new_tfn->file = pfr; new_tfn->next = tn->tfn; tn->tfn = new_tfn; } } } for (tn = base; tn; tn = tn->next) { if ((task_list.load_whitelist) && (list_filter(p->package_name, p->package_version, whitelist_base, 0))) continue; if (firsttime) { fprintf(file, "%s %s\n", p->package_name, p->package_version); firsttime = 0; tagnum = 0; } if (task_list.file_detail) { fprintf(file, "# %s:\n", tn->tag->tag); for (p_tfn = tn->tfn; p_tfn; p_tfn = p_tfn->next) { fprintf(file, "# %s\n", p_tfn->file->filename); } } else if (print_details) { if (tagnum == 0) { fprintf(file, "# Used by: %s, ", tn->tag->tag); } else { if (tagnum < tags_to_print) fprintf(file, " %s, ", tn->tag->tag); } tagnum++; } printed = 1; } if (print_details && printed) { if (tagnum >= tags_to_print) { fprintf(file, "and %d others\n", tagnum-tags_to_print+1); } else putc('\n', file); } } void print_extra_report(FILE * f) { struct tag_package_record { unsigned int package_num; void * whitelist, * blacklist; struct tag_package_record * next; } * tpr; #if 0 struct tag_package_record *next; #endif struct error_node * te; struct error_file_node * tf; struct top_tag_node *tag; struct file_record *p; struct tag_file_record * tfr; unsigned int tag_num = 0; unsigned int any_found = 0; unsigned char subsection = 'a'; for (tag = top_tag_base; tag; tag = tag->next) { /* Resolve the package/file list per tag */ for (tfr = tag->file_base; tfr; tfr = tfr->next) { for (tpr = tag->package_record_base; tpr; tpr = tpr->next) { if (tpr->package_num == tfr->file->package_num) { goto cont; } } tpr = malloc(sizeof(*tpr)); memset(tpr, 0, sizeof(*tpr)); tpr->next = tag->package_record_base; tpr->package_num = tfr->file->package_num; tag->package_record_base = tpr; cont: {} } /* Setup white and black lists */ if (task_list.load_blacklist) for (tpr = tag->package_record_base; tpr; tpr = tpr->next) { tpr->blacklist = list_filter( packages[tpr->package_num].package_name, packages[tpr->package_num].package_version, blacklist_base, 0); } if (task_list.load_whitelist) for (tpr = tag->package_record_base; tpr; tpr = tpr->next) { tpr->whitelist = list_filter( packages[tpr->package_num].package_name, packages[tpr->package_num].package_version, whitelist_base, 0); } putc('\n', f); report_title(f, ++tag_num, tag->tag_name); fprintf(f, "A. Dangerous errors\n"); subsection = 'a'; any_found = 0; if (task_list.load_blacklist) { for (tpr = tag->package_record_base; tpr; tpr = tpr->next) { if (tpr->blacklist) { if (any_found == 0) { fprintf(f, "%c. The following " "packages were referenced " "and are blacklisted:\n", subsection++); any_found = 1; } fprintf(f, " * %s\n", packages[tpr->package_num].package_name); } } if (any_found == 0) fprintf(f,"%c. No blacklisted packages found\n", subsection++); } if (task_list.wandering) { fprintf(f, "%c. Search on host, then in project\n", subsection++); wandering_print_report(f, tag, WANDERING_HOST_THEN_PROJECT); } fprintf(f, "\nB. Warnings\n"); subsection = 'a'; any_found = 0; for (tpr = tag->package_record_base; tpr; tpr = tpr->next) { if (tpr->whitelist == 0) { if (any_found == 0) { fprintf(f, "%c The following packages " "were referenced:\n", subsection++); any_found++; } fprintf(f, " - %s\n", packages[tpr->package_num].package_name); } } if (any_found == 0) fprintf(f, "%c. No packages were referenced\n", subsection++); if (task_list.not_in_distro) { any_found = 0; for (p = tag->not_in_distro_base; p; p = p->next) { if (any_found == 0) { fprintf(f, "%c. The following files " "were accessed but not in " "packages\n", subsection++); any_found = 1; } fprintf(f, " - %s\n", p->filename); } if (any_found == 0) fprintf(f, "%c. No unpackaged files were " "accessed\n", subsection++); } fprintf(f, "\nC. Info\n"); subsection = 'a'; fprintf(f, "%c. The following packages were in the " "whitelist:\n", subsection++); for (tpr = tag->package_record_base; tpr; tpr = tpr->next) { if (tpr->whitelist) fprintf(f, " %s\n", packages[tpr->package_num].package_name); } if (task_list.error_codes) { fprintf(f, "%c. Files that could not be accessed " "because of errors\n", subsection++); for (te = tag->error_base; te; te = te->next) { fprintf(f, " %s (%d):\n", te->error_name, te->error_number); for (tf = te->file_base; tf; tf = tf->next) { fprintf(f, " %s (%d)\n", tf->filename, tf->count); } } } putc('\n', f); #if 0 for (tpr = tag->package_record_base; tpr; tpr = next) { next = tpr->next; free(tpr); } #endif } } static int print_results(const char * output_filename, FILE * extra_report_filename) { FILE * file; time_t t; struct tm * tmp; char time_str[100]; unsigned int print_details = 1; int i; struct package_file_record * pfr, ** prev_pfr ; if ((file = fopen(output_filename, "w+")) == NULL) { perror("Creating output file"); return -1; } /* setup the header */ t = time(NULL); tmp = localtime(&t); strftime(time_str, sizeof(time_str), "%c", tmp); fprintf(file, "#\n# Automatically generated by swabber on %s\n", time_str); fprintf(file, "# Distro: %s\n", distro_name); distro_name[strlen(distro_name)-1] = '\0'; if (have_global_tag) { fprintf(file, "# Requirements for %s\n", global_tag); print_details = 0; } /* Make a linked list of just the files that have tags */ prev_pfr = &pfr_tagged; i = 0; for (pfr = package_files; pfr < &package_files[max_files]; pfr++) { if (pfr->tag_base == NULL) continue; *prev_pfr = pfr; prev_pfr = &pfr->file_next; i++; } *prev_pfr = NULL; printf("Reduced file list from %d to %d\n", max_files, i); for (i = 0; i < max_packages; i++) { print_tags(file, i, print_details); } fclose(file); print_extra_report(extra_report_filename); return -1; } static void print_stats(FILE *f) { struct list_item * l; fprintf(f, "Stats:\n"); fprintf(f, "Distro:\n"); fprintf(f, " Name: %s\n", distro_name); fprintf(f, " Packages: %d\n", stats.packages); fprintf(f, "Log file:\n"); fprintf(f, " number of lines: %d\n", stats.lines); fprintf(f, " Syscalls:\n"); fprintf(f, " stat: %d\n", stats.stat); fprintf(f, " execv: %d\n", stats.execve); fprintf(f, " open: %d\n", stats.open); fprintf(f, " skipped syscalls: %d\n", stats.skipped); fprintf(f, " Tossed because:\n"); fprintf(f, " before start marker: %d\n", stats.log_not_started); fprintf(f, " unparseable: %d\n", stats.unparsed_lines); fprintf(f, " non-zero return: %d\n", stats.non_zero); fprintf(f, " file not found in distro: %d\n", stats.file_not_found); fprintf(f, " signal: %d\n", stats.signal); fprintf(f, " not absolute path: %d\n", stats.not_absolute); fprintf(f, "File filters:\n"); fprintf(f, " filtered: %d\n", stats.filtered); if (filter_base != NULL) { for (l = filter_base->start; l; l = l->next) { fprintf(f, " %s: %d\n", l->name, l->count); } } if (task_list.load_whitelist) { struct list_item * t; fprintf(f, "Whitelist filters:\n"); if (whitelist_base) { for (t = whitelist_base->start; t; t = t->next) { fprintf(f, " %s %s: %d\n", t->name, t->version, t->count); } } } } static unsigned int filename_filter(const char * filename) { /* Returns 1 if it is to be filtered */ /* Filter everything that doesn't have a full path */ if (filename[0] != '/') { stats.not_absolute++; return 1; } /* Filter everything with the base dirs */ if (list_filter(filename, "", filter_base, 1)) return 1; return 0; } static struct tag_record * add_tag(struct tag_record * base, const char * tag) { struct tag_record * new; new = malloc(sizeof(struct tag_record)); memcpy(new->tag, tag, TAG_LENGTH); new->next = base; return new; } static int find_tag(struct tag_record * base, const char * tag) { struct tag_record * p; for (p = base; p; p = p->next) { if ((strncmp(p->tag, tag, TAG_LENGTH)) == 0) return 1; } return 0; } static struct package_file_record * find_filename(const char * filename) { struct package_file_record * p; int size, ret, loc; if (max_files == 0) { return NULL; } /* Start at halfway */ size = (max_files)/2; loc = size; while (1) { p = &package_files[loc]; size = (size < 4) ? 1 : size/2; ret = strcmp(filename, p->filename); if (ret == 0) { return p; } if (ret > 0) { if (loc == max_files) /* We're at the end */ goto done; ret = strcmp(filename, package_files[loc+1].filename); if (ret == 0) return &package_files[loc+1]; if (ret > 0) { /* Advance */ loc += size; continue; } if (ret < 0) { goto done; } } if (ret < 0) { if (loc == 0) { /* we are a the start */ goto done; } ret = strcmp(filename, package_files[loc-1].filename); if (ret == 0) { return &package_files[loc-1]; } else if (ret < 0) { /* rewind */ loc -= size; continue; } else if (ret > 0) { goto done; } } } done: return NULL; } static void process_filename(char * filename, unsigned int pid, unsigned int function, struct top_tag_node * tag) { struct package_file_record * pfr ; struct file_record * p, ** prev_p; struct tag_file_record * tfr; /* * Find if it is in our filter; doing this first saves us a lot * of time. */ if (filename_filter(filename)) { stats.filtered++; return; } if (task_list.wandering) { wandering_found(tag, filename); } if ((pfr = find_filename(filename)) == NULL) { stats.file_not_found++; if (task_list.not_in_distro) { prev_p = &tag->not_in_distro_base; for (p = tag->not_in_distro_base; p; p = p->next) { if (strcmp(filename, p->filename) == 0) { if (tag->not_in_distro_base != p) { /* Move to front of queue */ *prev_p = p->next; p->next = tag->not_in_distro_base; tag->not_in_distro_base = p; } p->count++; return; } prev_p = &p->next; } p = malloc(sizeof(struct file_record)); p->count = 1; p->next = tag->not_in_distro_base; sprintf(p->filename, "%s", filename); tag->not_in_distro_base = p; } return; } if (find_tag(pfr->tag_base, tag->tag_name)) { /* yes, file was already accessed for this tag */ } else { /* we found the filename, so add the tag */ pfr->tag_base = add_tag(pfr->tag_base, tag->tag_name); } /* Add the file to the tag */ for (tfr = tag->file_base; tfr; tfr = tfr->next) { if (tfr->file == pfr) return; } tfr = malloc(sizeof(*tfr)); memset(tfr, 0, sizeof(*tfr)); tfr->file = pfr; tfr->next = tag->file_base; tag->file_base = tfr; return; } static int add_access_error(struct top_tag_node * tag, const char * filename, int error_number, const char * error_name) { /* Find to see if we already have the error accessed */ struct error_node * te; struct error_file_node * tf, ** tf_prev; if (filename_filter(filename)) { return 0 ; } for (te = tag->error_base; te; te = te->next) { if (te->error_number == error_number) goto found_error; } te = malloc(sizeof(struct error_node)); te->file_base = NULL; strncpy(te->error_name, error_name, ERROR_NAME_LENGTH); te->error_number = error_number; te->next = tag->error_base; tag->error_base = te; found_error: /* See if there's a file already */ tf_prev = &te->file_base; for (tf = te->file_base; tf; tf = tf->next) { if (strncmp(tf->filename, filename, FILENAME_LENGTH) == 0) { /* When we find a match, move it to the front */ if (te->file_base != tf) { *tf_prev = tf->next; tf->next = te->file_base; te->file_base = tf; } goto found_file; } tf_prev = &tf->next; } tf = malloc(sizeof(struct error_file_node)); strncpy(tf->filename, filename, FILENAME_LENGTH); tf->count = 0; tf->next = te->file_base; te->file_base = tf; wandering_add_error(tag, tf); found_file: tf->count++; return 0; } /* * These structures are used to create a * linked list of incomplete entries */ struct incomplete_node { unsigned int pid; char function[LINE_LENGTH]; char filename[LINE_LENGTH]; struct incomplete_node * next; }; static struct incomplete_node * incomplete_base = NULL; static void process_line(const char * line, struct top_tag_node * tag) { unsigned int pid; char function[LINE_LENGTH]; char filename[LINE_LENGTH]; char error_string[ERROR_NAME_LENGTH]; unsigned int function_number; int ret; int track = 0; int result; memset(filename, '\0', LINE_LENGTH); stats.lines++; fflush(stdout); ret = sscanf(line, "%u %250[^(](\"%250[^\"]\", %*[^)]) = %d%s\n", &pid, function, filename, &result, error_string); canonicalize(filename); switch (ret) { case 5: /* Okay, we probably have an error */ break; case 4: /* everything is okay */ break; case 3: /* no return value, assume the file is okay */ { if (strstr(line, "")) { struct incomplete_node * new; stats.unfinished++; new = malloc(sizeof(struct incomplete_node)); memset(new, 0, sizeof(struct incomplete_node)); new->next = incomplete_base; incomplete_base = new; new->pid = pid; strcpy(new->function, function); strcpy(new->filename, filename); goto out; } result = 0; } break; case 2: /* maybe it was a signal */ if (strncmp(function, "--- SIG", 7) == 0) { stats.signal++; goto out; } else if (strstr(function, " resumed>")) { struct incomplete_node * n, *prev; char * p = strstr(function, " = "); if (p == NULL) break; p += 3; if ((ret = sscanf(p, "%d\n", &result)) != 1) break; prev = incomplete_base; for (n = incomplete_base; n; n = n->next) { if (n->pid != pid) { prev = n; continue; } strcpy(function, n->function); strcpy(filename, n->filename); if (prev == incomplete_base) { incomplete_base = n->next; } else { prev->next = n->next; } free(n); goto process; break; } goto out; } break; default: /* not a valid line */ stats.unparsed_lines++; return; } process: if (strncmp(function, "open", LINE_LENGTH) == 0) { char * p; if (result < 0) { if (task_list.error_codes || task_list.wandering) add_access_error(tag, filename, result, error_string); stats.non_zero++; return; } #define tag_suffix ".rpm.log" if ((ignore_until_tag) && ((p = strstr(filename, tag_suffix)))) { /* We're starting to record for the first time */ tag = add_top_tag(filename, strlen(filename)-strlen(p)); start_logging = 1; stats.open++; goto out; } if ((ignore_until_tag == 0) || (start_logging)) { function_number = FUNCTION_OPEN; stats.open++; track++; } #if 0 if (!start_logging) { stats.log_not_started++; goto out; } #endif } else if (strncmp(function, "execve", LINE_LENGTH) == 0) { if (result < 0) { if (task_list.error_codes || task_list.wandering) add_access_error(tag, filename, result, error_string); stats.non_zero++; return; } function_number = FUNCTION_EXEC; stats.execve++; track++; } else if ((strncmp(function, "stat", LINE_LENGTH) == 0) || (strncmp(function, "fstat", LINE_LENGTH) == 0)) { if (result < 0) { if (task_list.error_codes || task_list.wandering) add_access_error(tag, filename, result, error_string); stats.non_zero++; return; } function_number = FUNCTION_STAT; stats.stat++; track++; } else { stats.skipped++; goto out; } if (strlen(filename) == 0) goto out; process_filename(filename, pid, function_number, tag); out: return; } static int read_logfile(const char * filename, struct top_tag_node * tag) { char buf[20480]; FILE * file; if ((file = fopen(filename, "r")) == NULL) { perror("Opening log file"); return 1; } while (fgets(buf, sizeof(buf), file)) { process_line(buf, tag); } fclose(file); return 0; } static int load_distro(const char * list_dir, FILE * f) { char filename[1024]; int fd; unsigned int total = 0; int ret; unsigned int rest; /* Open the packages file */ snprintf(filename, 1024, "%s/distro.blob", list_dir); fd = open(filename, O_RDONLY); read(fd, &distro_name, sizeof(distro_name)); read(fd, &max_packages, sizeof(max_packages)); read(fd, &max_files, sizeof(max_files)); packages = malloc(max_packages*sizeof(struct package_record)); package_files = malloc(max_files* sizeof(struct package_file_record)); rest = max_packages*sizeof(struct package_record); while (rest > 0) { ret = read(fd, packages+total, rest); if (ret < 0) { return -1; } total += ret; rest -= ret; } rest = max_files*sizeof(struct package_file_record); total = 0; while (rest > 0) { ret = read(fd, package_files+total, rest); if (ret < 0) { fprintf(f, "Error loading files of distro blob\n"); return -1; } total += ret; rest -= ret; } fprintf(f, "Done loading\n"); total = 0; close(fd); return 0; } static int read_logfiles(const char * log_path, FILE * f) { struct dirent * dp; DIR *dirp; char total_filename[2048]; struct top_tag_node * tag; struct stat mstat; if (stat(log_path, &mstat) < 0) { fprintf(f, "Could not open log path %s: %s\n", log_path, strerror(errno)); } if ((S_ISREG(mstat.st_mode)) && (strncmp(log_path+strlen(log_path)-4, ".log", 4) == 0)){ tag = add_top_tag(basename(log_path), 4); printf("processing tag %s\n", tag->tag_name); if ((read_logfile(log_path, tag)) < 0) return -1; } else if (mstat.st_mode & S_IFDIR) { if ((dirp = opendir(log_path)) == NULL) { printf("Opening log path %s, error is %s\n", log_path, strerror(errno)); return -1; } while ((dp = readdir(dirp)) != NULL) { if ((strcmp(dp->d_name, ".") == 0) || (strcmp(dp->d_name, "..") == 0)) continue; snprintf(total_filename, 2048, "%s/%s", log_path, dp->d_name); read_logfiles(total_filename, f); } closedir(dirp); } else { char * b = (char *) basename(log_path); if (strncmp(b+strlen(b)-4, ".log", 4) == 0) tag = add_top_tag(b, 4); else tag = add_top_tag(b, 0); if ((read_logfile(log_path, tag)) < 0) return -1; } return 0; } static void process_data(void) { if (task_list.wandering) { wandering_process(top_tag_base, project_path); } } static void usage(void) { fprintf(stderr, "Usage: swabber [-v] [-v] [-a] [-e]\n" " -l ] -o ...\n" "\n" " Options:\n" " -v: verbose, use -v -v for more detail\n" " -a: print progress (not implemented)\n" " -l : strace logfile or directory of log files to read\n" " -d : distro directory\n" " -n : force the name of the distribution\n" " -r : where to dump extra data (leave empty for stdout)\n" " -t : use one tag for all packages\n" " -o : file to write output to\n" " -p : directory were the build is being done\n" " -f : directory where to find filters for whitelist,\n" " blacklist, filters\n" " -c ,...: perform various tasks, choose from:\n" " error_codes: show report of files whose access returned an error\n" " whitelist: remove packages that are in the whitelist\n" " blacklist: highlight packages that are in the blacklist as\n" " being dangerous\n" " file_detail: add file-level detail when listing packages\n" " not_in_distro: list host files that are not in the package\n" " database\n" " wandering: check for the case where the build searches for a\n" " file on the host, then finds it in the project.\n" " all: all the above\n" ); exit(1); /* NOT_REACHED */ } static void load_lists(FILE * f, const char * distro_subdir) { char list_filename[FILENAME_LENGTH]; struct list_item_base * t; if (filter_path == NULL) { fprintf(f, "No filter path specified... skipping whitelist," " blacklist, filter files\n"); return; } /* Read white and black lists */ int whitelist_loaded = 0, blacklist_loaded = 0, filters_loaded = 0; if (task_list.load_whitelist) { snprintf(list_filename, FILENAME_LENGTH, "%s/%s/whitelist", filter_path, distro_subdir); if ((t = list_load(whitelist_base, list_filename, 0))) { whitelist_base = t; whitelist_loaded++; } else whitelist_loaded--; } if (task_list.load_blacklist) { snprintf(list_filename, FILENAME_LENGTH, "%s/%s/blacklist", filter_path, distro_subdir); if ((t = list_load(blacklist_base, list_filename, 0))) { blacklist_base = t; blacklist_loaded++; } else blacklist_loaded--; } /* Load filter dir */ if (filter_path) { snprintf(list_filename, 2048, "%s/%s/%s", filter_path, distro_subdir, "filters"); if ((t = list_load(filter_base, list_filename, 0))) { filter_base = t; filters_loaded++; } else filters_loaded--; } fprintf(f, "For distribution %s ", distro_subdir); if ((filters_loaded > 0) || (blacklist_loaded > 0) || (whitelist_loaded > 0)) { fprintf(f, "loaded "); if (whitelist_loaded > 0) fprintf(f, "whitelist, "); if (blacklist_loaded > 0) fprintf(f, "blacklist, "); if (filters_loaded > 0) fprintf(f, "filters, "); } if ((filters_loaded < 0) || (blacklist_loaded < 0) || (whitelist_loaded < 0)) { fprintf(f, "did not load "); if (whitelist_loaded < 0) fprintf(f, "whitelist, "); if (blacklist_loaded < 0) fprintf(f, "blacklist, "); if (filters_loaded < 0) fprintf(f, "filters, "); } putc('\n', f); } /* * Code to parse task names and set/clear the appropriate * value associated with that task name. */ struct task_name { char *name; unsigned int *valp; } task_names[] = { { "error_codes", &task_list.error_codes }, { "whitelist", &task_list.load_whitelist }, { "file_detail", &task_list.file_detail }, { "blacklist", &task_list.load_blacklist }, { "not_in_distro", &task_list.not_in_distro }, { "wandering", &task_list.wandering }, { NULL } }; /* * parses foo,bar,baz */ void parse_tasks(char *optarg) { char *p; struct task_name *tnp; int val; for (p = strtok(optarg, ","); p != NULL; p = strtok(NULL, ",")) { /* * If the name begins with '-', then * we'll clear the task, otherwise we * set the task. */ if (*p == '-') { if (*++p == '\0') { warnx("Missing task name after '-'"); usage(); /* NOT_REACHED */ } val = 0; } else val = 1; /* First check for "all" */ if (strcmp(p, "all") == 0) { for (tnp = task_names; tnp->name; tnp++) *tnp->valp = val; continue; } /* Look for a specific match */ for (tnp = task_names; tnp->name; tnp++) { if (strcmp(p, tnp->name) == 0) { *tnp->valp = val; break; } } if (!tnp->name) { warnx("Unknown task %s", p); usage(); /* NOT_REACHED */ } } } int main(int argc, char * argv[]) { char * log_path = NULL, * output_filename = NULL, *extra_report_filename = NULL, *distrolist_dir = NULL; FILE * extra_report_file; char base_distro_name[1024]; char * p; int opt; memset(global_tag, 0, TAG_LENGTH); while ((opt = getopt(argc, argv, "ac:d:f:l:n:o:p:r:s:t:v")) != -1) { switch (opt) { case 'c': parse_tasks(optarg); break; case 'd': distrolist_dir = optarg; break; case 'f': filter_path = optarg; break;; case 'l': log_path = optarg; break; case 'n': strncpy(distro_name, optarg, sizeof(distro_name)); break; case 'o': output_filename = optarg; break; case 'p': project_path = optarg; break; case 'r': extra_report_filename = optarg; break; case 's': skip_distro = 1; break; case 't': snprintf(global_tag, TAG_LENGTH, "%s", optarg); have_global_tag = 1; ignore_until_tag = 0; start_logging = 1; break; case 'v': verbose++; break; default: usage(); /* NOT_REACHED */ } } if (optind < argc) { if (!filter_base) filter_base = list_setup_base(); while (optind < argc) { insert_into_list(argv[optind], "", filter_base); optind++; } } /* Fill in the distribution */ if (distro_name[0] != '\0') detect_distro(argv[0]); if (log_path == NULL) { warnx("log filename missing"); usage(); /* NOT_REACHED */ } if (output_filename == NULL) { warnx("output filename missing"); usage(); /* NOT_REACHED */ } if (distrolist_dir == NULL) { warnx("distrolist directory name missing"); usage(); /* NOT_REACHED */ } if (extra_report_filename) { extra_report_file = fopen(extra_report_filename, "w+"); } else { extra_report_file = fdopen( fcntl(STDOUT_FILENO, F_DUPFD, 0), "a+"); } if (extra_report_file == NULL) err(1, "Could not open report file"); if (filter_base) fprintf(extra_report_file, "Added %d filters\n", filter_base->number_items); else fprintf(extra_report_file, "No filters added\n"); fprintf(extra_report_file, "Distribution name is %s\n", distro_name); load_lists(extra_report_file, "generic"); load_lists(extra_report_file, distro_name); /* Load base distro name */ strncpy(base_distro_name, distro_name, 1024); if ((p = strchr(base_distro_name, '-'))) { *p = '\0'; load_lists(extra_report_file, base_distro_name); } if (skip_distro == 0) { fprintf(extra_report_file, "Reading host distro blob\n"); start_timer(); if (load_distro(distrolist_dir, extra_report_file)) { fprintf(extra_report_file, "Giving up\n"); exit(1); } stop_timer(); fprintf(extra_report_file, " elapsed: "); print_timer(extra_report_file); fprintf(extra_report_file, "\n"); } else fprintf(extra_report_file, "Skipping distro reading. This will break, you know.\n"); fprintf(extra_report_file, "Reading log file\n"); /* Read lines */ start_timer(); if (read_logfiles(log_path, extra_report_file)) { fprintf(extra_report_file, "Error reading log file, could not continue\n"); }; stop_timer(); fprintf(extra_report_file, " elapsed: "); print_timer(extra_report_file); fprintf(extra_report_file, "\n"); process_data(); fprintf(extra_report_file, "Writing results\n"); start_timer(); print_results(output_filename, extra_report_file); stop_timer(); fprintf(extra_report_file, " elapsed: "); print_timer(extra_report_file); putc('\n', extra_report_file); print_stats(extra_report_file); if (extra_report_filename) fclose(extra_report_file); exit(0); }