1#include <assert.h>
2#include <err.h>
3#include <errno.h>
4#include <fcntl.h>
5#include <ftw.h>
6#include <libgen.h>
7#include <regex.h>
8#include <stdbool.h>
9#include <stdio.h>
10#include <stdlib.h>
11#include <string.h>
12#include <unistd.h>
13#include <limits.h>
14
15#include <sys/stat.h>
16#include <sys/types.h>
17
18#ifdef HAVE_SENDFILE
19#include <sys/sendfile.h>
20#else
21#include "compat/sendfile.h"
22#endif
23
24static regex_t reg;
25static bool eflag;
26static double keep = 0.5;
27
28static int current;
29static int archive;
30static char *basefp;
31
32enum {
33 MAXFD = 256,
34};
35
36/* TODO: Consider using copy_file_range(2) instead of sendfile(2)?
37 *
38 * See:
39 * * https://dev.to/albertzeyer/difference-of-ficlone-vs-ficlonerange-vs-copyfilerange-for-copy-on-write-support-41lm
40 * * https://github.com/golang/go/issues/36817
41 */
42static int
43sendfileall(int out, int in, off_t *offset, size_t count)
44{
45 size_t total;
46 ssize_t ret;
47
48 total = 0;
49 do {
50 assert(count >= total);
51 ret = sendfile(out, in, offset, count - total);
52 if (ret < 0)
53 return -1;
54
55 total += count;
56 } while (total < count);
57
58 return 0;
59}
60
61/* TODO: Evaluate if this is significantly faster with mmap(2)
62 * and the posix_fadvise(2) POSIX_FADV_WILLNEED flag. */
63static ssize_t
64getcount(FILE *stream)
65{
66 int ch;
67 ssize_t count;
68 size_t lines;
69 struct stat st;
70
71 if (keep == 0) { /* keep zero percent → copy everything */
72 if (fstat(fileno(stream), &st) == -1)
73 return -1;
74 assert(st.st_size >= 0 && st.st_size <= SSIZE_MAX);
75 return (ssize_t)st.st_size;
76 }
77
78 lines = 0;
79 while ((ch = getc(stream)) != EOF)
80 if (ch == '\n')
81 lines++;
82 rewind(stream);
83
84 lines = lines - (lines * keep);
85 count = 0;
86
87 while (lines > 0) {
88 ch = getc(stream);
89 if (ch == EOF) {
90 errno = EBADFD;
91 return -1;
92 } else if (ch == '\n') {
93 lines--;
94 }
95 count++;
96 }
97
98 return count;
99}
100
101/* TODO: Consider using fsync
102 *
103 * See:
104 * * https://github.com/google/renameio/issues/11
105 * * https://github.com/martanne/vis/blob/b3bec56d86f602da418763d3521aad87d5eb6b25/text.c#L901-L935
106 */
107static int
108trimfile(int fd, const char *fn, const struct stat *st, off_t offset)
109{
110 char tempfn[] = ".archive-logsXXXXXX";
111 int r, tempfd;
112
113 r = -1;
114
115 if ((tempfd = mkstemp(tempfn)) == -1)
116 goto ret0;
117 if (fchmod(tempfd, st->st_mode))
118 goto ret2;
119
120 if (sendfileall(tempfd, fd, &offset, st->st_size - offset) == -1)
121 goto ret2;
122 if (renameat(AT_FDCWD, tempfn, current, fn))
123 goto ret2;
124
125 r = 0;
126 goto ret1;
127ret2:
128 remove(tempfn);
129ret1:
130 close(tempfd);
131ret0:
132 return r;
133}
134
135static int
136arfile(FILE *instream, const char *fn, const struct stat *st)
137{
138 off_t off;
139 int r, infd, outfd;
140 ssize_t count;
141
142 r = -1;
143
144 /* Calculate amount of bytes to archive */
145 count = getcount(instream);
146 if (count == -1) {
147 goto ret0;
148 } else if (count == 0) { /* no data to archive */
149 r = 0;
150 goto ret0;
151 }
152
153 /* Can't use O_APPEND as it is not supported by sendfile, we
154 * "emulate" it by seeking to the end of file after openat. */
155 if ((outfd = openat(archive, fn, O_CREAT|O_WRONLY, st->st_mode)) == -1)
156 goto ret0;
157 if (lseek(outfd, 0, SEEK_END) == -1)
158 goto ret1;
159
160 off = 0;
161 infd = fileno(instream);
162 if (sendfileall(outfd, infd, &off, count) == -1)
163 goto ret1;
164 if (trimfile(infd, fn, st, count) == -1)
165 goto ret1;
166
167 r = 0;
168ret1:
169 close(outfd);
170ret0:
171 return r;
172}
173
174static int
175walkfn(const char *fp, const struct stat *st, int flags, struct FTW *ftw)
176{
177 int fd;
178 FILE *stream;
179 const char *fn;
180
181 if (!strcmp(fp, "."))
182 return 0;
183 if (ftw->level == 0)
184 return 0; /* skip base directory itself */
185
186 /* Convert potentially absolute path to a path relative to basefp */
187 assert(strlen(fp) > strlen(basefp));
188 fn = fp + strlen(basefp);
189 if (*fn == '/')
190 fn++;
191
192 if (eflag && !regexec(®, fn, 0, NULL, 0))
193 return 0;
194
195 if (flags == FTW_D) {
196 if (mkdirat(archive, fn, st->st_mode) && errno != EEXIST)
197 err(EXIT_FAILURE, "mkdirat failed");
198 return 0;
199 } else if (flags != FTW_F) {
200 return 0;
201 }
202
203 if ((fd = openat(current, fn, O_RDWR)) == -1)
204 err(EXIT_FAILURE, "openat failed");
205 if (!(stream = fdopen(fd, "r+")))
206 err(EXIT_FAILURE, "fdopen failed");
207 if (arfile(stream, fn, st) == -1)
208 err(EXIT_FAILURE, "archive failed");
209
210 if (fclose(stream))
211 errx(EXIT_FAILURE, "fclose failed");
212 return 0;
213}
214
215static void
216usage(char *prog)
217{
218 char *usage = "[-e REGEX] [-k PERCENTAGE] LOGS_CURRENT LOGS_ARCHIVE";
219
220 fprintf(stderr, "USAGE: %s %s\n", basename(prog), usage);
221 exit(EXIT_FAILURE);
222}
223
224int
225main(int argc, char **argv)
226{
227 unsigned long num;
228 int opt;
229
230 while ((opt = getopt(argc, argv, "e:k:")) != -1) {
231 switch (opt) {
232 case 'e':
233 eflag = true;
234 if (regcomp(®, optarg, REG_EXTENDED|REG_NOSUB))
235 errx(EXIT_FAILURE, "invalid regex");
236 break;
237 case 'k':
238 errno = 0;
239 num = strtoul(optarg, (char **)NULL, 10);
240 if (num == 0 && errno != 0)
241 err(EXIT_FAILURE, "strtoul failed");
242 else if (num > 100)
243 errx(EXIT_FAILURE, "invalid percentage");
244
245 keep = (double)num * 0.01;
246 break;
247 default:
248 usage(argv[0]);
249 }
250 }
251
252 if (argc <= 2 || optind >= argc)
253 usage(argv[0]);
254 basefp = argv[optind++];
255
256 /* Can't use O_SEARCH as glibc doesn't support it.
257 * See: https://sourceware.org/bugzilla/show_bug.cgi?id=18228 */
258 if ((current = open(basefp, O_RDONLY|O_DIRECTORY)) == -1)
259 err(EXIT_FAILURE, "couldn't open current");
260 if ((archive = open(argv[optind], O_RDONLY|O_DIRECTORY)) == -1)
261 err(EXIT_FAILURE, "couldn't open archive");
262
263 if (nftw(basefp, walkfn, MAXFD, FTW_PHYS|FTW_CHDIR))
264 errx(EXIT_FAILURE, "nftw failed");
265
266 close(current);
267 close(archive);
268
269 if (eflag) regfree(®);
270 return EXIT_SUCCESS;
271}