1#include <assert.h>2#include <err.h>3#include <errno.h>4#include <fcntl.h>5#include <ftw.h>6#include <libgen.h>7#include <regex.h>8#include <stdbool.h>9#include <stdio.h>10#include <stdlib.h>11#include <string.h>12#include <unistd.h>13#include <limits.h>1415#include <sys/stat.h>16#include <sys/types.h>1718#ifdef HAVE_SENDFILE19#include <sys/sendfile.h>20#else21#include "compat/sendfile.h"22#endif2324static regex_t reg;25static bool eflag;26static double keep = 0.5;2728static int current;29static int archive;30static char *basefp;3132enum {33 MAXFD = 256,34};3536/* TODO: Consider using copy_file_range(2) instead of sendfile(2)?37 *38 * See:39 * * https://dev.to/albertzeyer/difference-of-ficlone-vs-ficlonerange-vs-copyfilerange-for-copy-on-write-support-41lm40 * * https://github.com/golang/go/issues/3681741 */42static int43sendfileall(int out, int in, off_t *offset, size_t count)44{45 size_t total;46 ssize_t ret;4748 total = 0;49 do {50 assert(count >= total);51 ret = sendfile(out, in, offset, count - total);52 if (ret < 0)53 return -1;5455 total += count;56 } while (total < count);5758 return 0;59}6061/* TODO: Evaluate if this is significantly faster with mmap(2)62 * and the posix_fadvise(2) POSIX_FADV_WILLNEED flag. */63static ssize_t64getcount(FILE *stream)65{66 int ch;67 ssize_t count;68 size_t lines;69 struct stat st;7071 if (keep == 0) { /* keep zero percent → copy everything */72 if (fstat(fileno(stream), &st) == -1)73 return -1;74 assert(st.st_size >= 0 && st.st_size <= SSIZE_MAX);75 return (ssize_t)st.st_size;76 }7778 lines = 0;79 while ((ch = getc(stream)) != EOF)80 if (ch == '\n')81 lines++;82 rewind(stream);8384 lines = lines - (lines * keep);85 count = 0;8687 while (lines > 0) {88 ch = getc(stream);89 if (ch == EOF) {90 errno = EBADFD;91 return -1;92 } else if (ch == '\n') {93 lines--;94 }95 count++;96 }9798 return count;99}100101/* TODO: Consider using fsync102 *103 * See:104 * * https://github.com/google/renameio/issues/11105 * * https://github.com/martanne/vis/blob/b3bec56d86f602da418763d3521aad87d5eb6b25/text.c#L901-L935106 */107static int108trimfile(int fd, const char *fn, const struct stat *st, off_t offset)109{110 char tempfn[] = ".archive-logsXXXXXX";111 int r, tempfd;112113 r = -1;114115 if ((tempfd = mkstemp(tempfn)) == -1)116 goto ret0;117 if (fchmod(tempfd, st->st_mode))118 goto ret2;119120 if (sendfileall(tempfd, fd, &offset, st->st_size - offset) == -1)121 goto ret2;122 if (renameat(AT_FDCWD, tempfn, current, fn))123 goto ret2;124125 r = 0;126 goto ret1;127ret2:128 remove(tempfn);129ret1:130 close(tempfd);131ret0:132 return r;133}134135static int136arfile(FILE *instream, const char *fn, const struct stat *st)137{138 off_t off;139 int r, infd, outfd;140 ssize_t count;141142 r = -1;143144 /* Calculate amount of bytes to archive */145 count = getcount(instream);146 if (count == -1) {147 goto ret0;148 } else if (count == 0) { /* no data to archive */149 r = 0;150 goto ret0;151 }152153 /* Can't use O_APPEND as it is not supported by sendfile, we154 * "emulate" it by seeking to the end of file after openat. */155 if ((outfd = openat(archive, fn, O_CREAT|O_WRONLY, st->st_mode)) == -1)156 goto ret0;157 if (lseek(outfd, 0, SEEK_END) == -1)158 goto ret1;159160 off = 0;161 infd = fileno(instream);162 if (sendfileall(outfd, infd, &off, count) == -1)163 goto ret1;164 if (trimfile(infd, fn, st, count) == -1)165 goto ret1;166167 r = 0;168ret1:169 close(outfd);170ret0:171 return r;172}173174static int175walkfn(const char *fp, const struct stat *st, int flags, struct FTW *ftw)176{177 int fd;178 FILE *stream;179 const char *fn;180181 if (!strcmp(fp, "."))182 return 0;183 if (ftw->level == 0)184 return 0; /* skip base directory itself */185186 /* Convert potentially absolute path to a path relative to basefp */187 assert(strlen(fp) > strlen(basefp));188 fn = fp + strlen(basefp);189 if (*fn == '/')190 fn++;191192 if (eflag && !regexec(®, fn, 0, NULL, 0))193 return 0;194195 if (flags == FTW_D) {196 if (mkdirat(archive, fn, st->st_mode) && errno != EEXIST)197 err(EXIT_FAILURE, "mkdirat failed");198 return 0;199 } else if (flags != FTW_F) {200 return 0;201 }202203 if ((fd = openat(current, fn, O_RDWR)) == -1)204 err(EXIT_FAILURE, "openat failed");205 if (!(stream = fdopen(fd, "r+")))206 err(EXIT_FAILURE, "fdopen failed");207 if (arfile(stream, fn, st) == -1)208 err(EXIT_FAILURE, "archive failed");209210 if (fclose(stream))211 errx(EXIT_FAILURE, "fclose failed");212 return 0;213}214215static void216usage(char *prog)217{218 char *usage = "[-e REGEX] [-k PERCENTAGE] LOGS_CURRENT LOGS_ARCHIVE";219220 fprintf(stderr, "USAGE: %s %s\n", basename(prog), usage);221 exit(EXIT_FAILURE);222}223224int225main(int argc, char **argv)226{227 unsigned long num;228 int opt;229230 while ((opt = getopt(argc, argv, "e:k:")) != -1) {231 switch (opt) {232 case 'e':233 eflag = true;234 if (regcomp(®, optarg, REG_EXTENDED|REG_NOSUB))235 errx(EXIT_FAILURE, "invalid regex");236 break;237 case 'k':238 errno = 0;239 num = strtoul(optarg, (char **)NULL, 10);240 if (num == 0 && errno != 0)241 err(EXIT_FAILURE, "strtoul failed");242 else if (num > 100)243 errx(EXIT_FAILURE, "invalid percentage");244245 keep = (double)num * 0.01;246 break;247 default:248 usage(argv[0]);249 }250 }251252 if (argc <= 2 || optind >= argc)253 usage(argv[0]);254 basefp = argv[optind++];255256 /* Can't use O_SEARCH as glibc doesn't support it.257 * See: https://sourceware.org/bugzilla/show_bug.cgi?id=18228 */258 if ((current = open(basefp, O_RDONLY|O_DIRECTORY)) == -1)259 err(EXIT_FAILURE, "couldn't open current");260 if ((archive = open(argv[optind], O_RDONLY|O_DIRECTORY)) == -1)261 err(EXIT_FAILURE, "couldn't open archive");262263 if (nftw(basefp, walkfn, MAXFD, FTW_PHYS|FTW_CHDIR))264 errx(EXIT_FAILURE, "nftw failed");265266 close(current);267 close(archive);268269 if (eflag) regfree(®);270 return EXIT_SUCCESS;271}