archive-logs

Iteratively archive newline separated log files

git clone https://git.8pit.net/archive-logs.git

  1#include <assert.h>
  2#include <err.h>
  3#include <errno.h>
  4#include <fcntl.h>
  5#include <ftw.h>
  6#include <libgen.h>
  7#include <regex.h>
  8#include <stdbool.h>
  9#include <stdio.h>
 10#include <stdlib.h>
 11#include <string.h>
 12#include <unistd.h>
 13#include <limits.h>
 14
 15#include <sys/stat.h>
 16#include <sys/types.h>
 17
 18#ifdef HAVE_SENDFILE
 19#include <sys/sendfile.h>
 20#else
 21#include "compat/sendfile.h"
 22#endif
 23
 24static regex_t reg;
 25static bool eflag;
 26static double keep = 0.5;
 27
 28static int current;
 29static int archive;
 30static char *basefp;
 31
 32enum {
 33	MAXFD = 256,
 34};
 35
 36/* TODO: Consider using copy_file_range(2) instead of sendfile(2)?
 37 *
 38 * See:
 39 *  * https://dev.to/albertzeyer/difference-of-ficlone-vs-ficlonerange-vs-copyfilerange-for-copy-on-write-support-41lm
 40 *  * https://github.com/golang/go/issues/36817
 41 */
 42static int
 43sendfileall(int out, int in, off_t *offset, size_t count)
 44{
 45	size_t total;
 46	ssize_t ret;
 47
 48	total = 0;
 49	do {
 50		assert(count >= total);
 51		ret = sendfile(out, in, offset, count - total);
 52		if (ret < 0)
 53			return -1;
 54
 55		total += count;
 56	} while (total < count);
 57
 58	return 0;
 59}
 60
 61/* TODO: Evaluate if this is significantly faster with mmap(2)
 62 * and the posix_fadvise(2) POSIX_FADV_WILLNEED flag. */
 63static ssize_t
 64getcount(FILE *stream)
 65{
 66	int ch;
 67	ssize_t count;
 68	size_t lines;
 69	struct stat st;
 70
 71	if (keep == 0) { /* keep zero percent → copy everything */
 72		if (fstat(fileno(stream), &st) == -1)
 73			return -1;
 74		assert(st.st_size >= 0 && st.st_size <= SSIZE_MAX);
 75		return (ssize_t)st.st_size;
 76	}
 77
 78	lines = 0;
 79	while ((ch = getc(stream)) != EOF)
 80		if (ch == '\n')
 81			lines++;
 82	rewind(stream);
 83
 84	lines = lines - (lines * keep);
 85	count = 0;
 86
 87	while (lines > 0) {
 88		ch = getc(stream);
 89		if (ch == EOF) {
 90			errno = EBADFD;
 91			return -1;
 92		} else if (ch == '\n') {
 93			lines--;
 94		}
 95		count++;
 96	}
 97
 98	return count;
 99}
100
101/* TODO: Consider using fsync
102 *
103 * See:
104 *  * https://github.com/google/renameio/issues/11
105 *  * https://github.com/martanne/vis/blob/b3bec56d86f602da418763d3521aad87d5eb6b25/text.c#L901-L935
106 */
107static int
108trimfile(int fd, const char *fn, const struct stat *st, off_t offset)
109{
110	char tempfn[] = ".archive-logsXXXXXX";
111	int r, tempfd;
112
113	r = -1;
114
115	if ((tempfd = mkstemp(tempfn)) == -1)
116		goto ret0;
117	if (fchmod(tempfd, st->st_mode))
118		goto ret2;
119
120	if (sendfileall(tempfd, fd, &offset, st->st_size - offset) == -1)
121		goto ret2;
122	if (renameat(AT_FDCWD, tempfn, current, fn))
123		goto ret2;
124
125	r = 0;
126	goto ret1;
127ret2:
128	remove(tempfn);
129ret1:
130	close(tempfd);
131ret0:
132	return r;
133}
134
135static int
136arfile(FILE *instream, const char *fn, const struct stat *st)
137{
138	off_t off;
139	int r, infd, outfd;
140	ssize_t count;
141
142	r = -1;
143
144	/* Calculate amount of bytes to archive */
145	count = getcount(instream);
146	if (count == -1) {
147		goto ret0;
148	} else if (count == 0) { /* no data to archive */
149		r = 0;
150		goto ret0;
151	}
152
153	/* Can't use O_APPEND as it is not supported by sendfile, we
154	 * "emulate" it by seeking to the end of file after openat. */
155	if ((outfd = openat(archive, fn, O_CREAT|O_WRONLY, st->st_mode)) == -1)
156		goto ret0;
157	if (lseek(outfd, 0, SEEK_END) == -1)
158		goto ret1;
159
160	off = 0;
161	infd = fileno(instream);
162	if (sendfileall(outfd, infd, &off, count) == -1)
163		goto ret1;
164	if (trimfile(infd, fn, st, count) == -1)
165		goto ret1;
166
167	r = 0;
168ret1:
169	close(outfd);
170ret0:
171	return r;
172}
173
174static int
175walkfn(const char *fp, const struct stat *st, int flags, struct FTW *ftw)
176{
177	int fd;
178	FILE *stream;
179	const char *fn;
180
181	if (!strcmp(fp, "."))
182		return 0;
183	if (ftw->level == 0)
184		return 0; /* skip base directory itself */
185
186	/* Convert potentially absolute path to a path relative to basefp */
187	assert(strlen(fp) > strlen(basefp));
188	fn = fp + strlen(basefp);
189	if (*fn == '/')
190		fn++;
191
192	if (eflag && !regexec(&reg, fn, 0, NULL, 0))
193		return 0;
194
195	if (flags == FTW_D) {
196		if (mkdirat(archive, fn, st->st_mode) && errno != EEXIST)
197			err(EXIT_FAILURE, "mkdirat failed");
198		return 0;
199	} else if (flags != FTW_F) {
200		return 0;
201	}
202
203	if ((fd = openat(current, fn, O_RDWR)) == -1)
204		err(EXIT_FAILURE, "openat failed");
205	if (!(stream = fdopen(fd, "r+")))
206		err(EXIT_FAILURE, "fdopen failed");
207	if (arfile(stream, fn, st) == -1)
208		err(EXIT_FAILURE, "archive failed");
209
210	if (fclose(stream))
211		errx(EXIT_FAILURE, "fclose failed");
212	return 0;
213}
214
215static void
216usage(char *prog)
217{
218	char *usage = "[-e REGEX] [-k PERCENTAGE] LOGS_CURRENT LOGS_ARCHIVE";
219
220	fprintf(stderr, "USAGE: %s %s\n", basename(prog), usage);
221	exit(EXIT_FAILURE);
222}
223
224int
225main(int argc, char **argv)
226{
227	unsigned long num;
228	int opt;
229
230	while ((opt = getopt(argc, argv, "e:k:")) != -1) {
231		switch (opt) {
232		case 'e':
233			eflag = true;
234			if (regcomp(&reg, optarg, REG_EXTENDED|REG_NOSUB))
235				errx(EXIT_FAILURE, "invalid regex");
236			break;
237		case 'k':
238			errno = 0;
239			num = strtoul(optarg, (char **)NULL, 10);
240			if (num == 0 && errno != 0)
241				err(EXIT_FAILURE, "strtoul failed");
242			else if (num > 100)
243				errx(EXIT_FAILURE, "invalid percentage");
244
245			keep = (double)num * 0.01;
246			break;
247		default:
248			usage(argv[0]);
249		}
250	}
251
252	if (argc <= 2 || optind >= argc)
253		usage(argv[0]);
254	basefp = argv[optind++];
255
256	/* Can't use O_SEARCH as glibc doesn't support it.
257	 * See: https://sourceware.org/bugzilla/show_bug.cgi?id=18228 */
258	if ((current = open(basefp, O_RDONLY|O_DIRECTORY)) == -1)
259		err(EXIT_FAILURE, "couldn't open current");
260	if ((archive = open(argv[optind], O_RDONLY|O_DIRECTORY)) == -1)
261		err(EXIT_FAILURE, "couldn't open archive");
262
263	if (nftw(basefp, walkfn, MAXFD, FTW_PHYS|FTW_CHDIR))
264		errx(EXIT_FAILURE, "nftw failed");
265
266	close(current);
267	close(archive);
268
269	if (eflag) regfree(&reg);
270	return EXIT_SUCCESS;
271}