libbcache/error.h


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238

#ifndef _BCACHE_ERROR_H
#define _BCACHE_ERROR_H

#include <linux/printk.h>

struct cache;
struct cache_set;

/*
 * XXX: separate out errors that indicate on disk data is inconsistent, and flag
 * superblock as such
 */

/* Error messages: */

#define __bch_cache_error(ca, fmt, ...)					\
do {									\
	char _buf[BDEVNAME_SIZE];					\
	bch_err((ca)->set, "%s: " fmt,					\
		bdevname((ca)->disk_sb.bdev, _buf), ##__VA_ARGS__);	\
} while (0)

/*
 * Very fatal logic/inconsistency errors: these indicate that we've majorly
 * screwed up at runtime, i.e. it's not likely that it was just caused by the
 * data on disk being inconsistent. These BUG():
 *
 * XXX: audit and convert to inconsistent() checks
 */

#define cache_set_bug(c, ...)						\
do {									\
	bch_err(c, __VA_ARGS__);					\
	BUG();								\
} while (0)

#define cache_set_bug_on(cond, c, ...)					\
do {									\
	if (cond)							\
		cache_set_bug(c, __VA_ARGS__);				\
} while (0)

/*
 * Inconsistency errors: The on disk data is inconsistent. If these occur during
 * initial recovery, they don't indicate a bug in the running code - we walk all
 * the metadata before modifying anything. If they occur at runtime, they
 * indicate either a bug in the running code or (less likely) data is being
 * silently corrupted under us.
 *
 * XXX: audit all inconsistent errors and make sure they're all recoverable, in
 * BCH_ON_ERROR_CONTINUE mode
 */

void bch_inconsistent_error(struct cache_set *);

#define cache_set_inconsistent(c, ...)					\
do {									\
	bch_err(c, __VA_ARGS__);					\
	bch_inconsistent_error(c);					\
} while (0)

#define cache_set_inconsistent_on(cond, c, ...)				\
({									\
	int _ret = !!(cond);						\
									\
	if (_ret)							\
		cache_set_inconsistent(c, __VA_ARGS__);			\
	_ret;								\
})

/*
 * Later we might want to mark only the particular device inconsistent, not the
 * entire cache set:
 */

#define cache_inconsistent(ca, ...)					\
do {									\
	__bch_cache_error(ca, __VA_ARGS__);				\
	bch_inconsistent_error((ca)->set);				\
} while (0)

#define cache_inconsistent_on(cond, ca, ...)				\
({									\
	int _ret = !!(cond);						\
									\
	if (_ret)							\
		cache_inconsistent(ca, __VA_ARGS__);			\
	_ret;								\
})

/*
 * Fsck errors: inconsistency errors we detect at mount time, and should ideally
 * be able to repair:
 */

enum {
	BCH_FSCK_OK			= 0,
	BCH_FSCK_ERRORS_NOT_FIXED	= 1,
	BCH_FSCK_REPAIR_UNIMPLEMENTED	= 2,
	BCH_FSCK_REPAIR_IMPOSSIBLE	= 3,
	BCH_FSCK_UNKNOWN_VERSION	= 4,
};

#define unfixable_fsck_err(c, msg, ...)					\
do {									\
	bch_err(c, msg " (repair unimplemented)", ##__VA_ARGS__);	\
	ret = BCH_FSCK_REPAIR_UNIMPLEMENTED;				\
	goto fsck_err;							\
} while (0)

#define unfixable_fsck_err_on(cond, c, ...)				\
do {									\
	if (cond)							\
		unfixable_fsck_err(c, __VA_ARGS__);			\
} while (0)

#define fsck_err(c, msg, ...)						\
do {									\
	if (!(c)->opts.fix_errors) {					\
		bch_err(c, msg, ##__VA_ARGS__);				\
		ret = BCH_FSCK_ERRORS_NOT_FIXED;			\
		goto fsck_err;						\
	}								\
	set_bit(CACHE_SET_FSCK_FIXED_ERRORS, &(c)->flags);		\
	bch_err(c, msg ", fixing", ##__VA_ARGS__);			\
} while (0)

#define fsck_err_on(cond, c, ...)					\
({									\
	bool _ret = (cond);						\
									\
	if (_ret)							\
		fsck_err(c, __VA_ARGS__);				\
	_ret;								\
})

/*
 * Fatal errors: these don't indicate a bug, but we can't continue running in RW
 * mode - pretty much just due to metadata IO errors:
 */

void bch_fatal_error(struct cache_set *);

#define cache_set_fatal_error(c, ...)					\
do {									\
	bch_err(c, __VA_ARGS__);					\
	bch_fatal_error(c);						\
} while (0)

#define cache_set_fatal_err_on(cond, c, ...)				\
({									\
	int _ret = !!(cond);						\
									\
	if (_ret)							\
		cache_set_fatal_error(c, __VA_ARGS__);			\
	_ret;								\
})

#define cache_fatal_error(ca, ...)					\
do {									\
	__bch_cache_error(ca, __VA_ARGS__);				\
	bch_fatal_error(c);						\
} while (0)

#define cache_fatal_io_error(ca, fmt, ...)				\
do {									\
	char _buf[BDEVNAME_SIZE];					\
									\
	printk_ratelimited(KERN_ERR bch_fmt((ca)->set,			\
		"fatal IO error on %s for " fmt),			\
		bdevname((ca)->disk_sb.bdev, _buf), ##__VA_ARGS__);	\
	bch_fatal_error((ca)->set);					\
} while (0)

#define cache_fatal_io_err_on(cond, ca, ...)				\
({									\
	int _ret = !!(cond);						\
									\
	if (_ret)							\
		cache_fatal_io_error(ca, __VA_ARGS__);			\
	_ret;								\
})

/*
 * Nonfatal IO errors: either recoverable metadata IO (because we have
 * replicas), or data IO - we need to log it and print out a message, but we
 * don't (necessarily) want to shut down the fs:
 */

void bch_account_io_completion(struct cache *);
void bch_account_io_completion_time(struct cache *, unsigned, int);

void bch_nonfatal_io_error_work(struct work_struct *);

/* Does the error handling without logging a message */
void bch_nonfatal_io_error(struct cache *);

#if 0
#define cache_set_nonfatal_io_error(c, ...)				\
do {									\
	bch_err(c, __VA_ARGS__);					\
	bch_nonfatal_io_error(c);					\
} while (0)
#endif

/* Logs message and handles the error: */
#define cache_nonfatal_io_error(ca, fmt, ...)				\
do {									\
	char _buf[BDEVNAME_SIZE];					\
									\
	printk_ratelimited(KERN_ERR bch_fmt((ca)->set,			\
		"IO error on %s for " fmt),				\
		bdevname((ca)->disk_sb.bdev, _buf), ##__VA_ARGS__);	\
	bch_nonfatal_io_error(ca);					\
} while (0)

#define cache_nonfatal_io_err_on(cond, ca, ...)				\
({									\
	bool _ret = (cond);						\
									\
	if (_ret)							\
		cache_nonfatal_io_error(ca, __VA_ARGS__);		\
	_ret;								\
})

/* kill? */

#define __bcache_io_error(c, fmt, ...)					\
	printk_ratelimited(KERN_ERR bch_fmt(c,				\
			"IO error: " fmt), ##__VA_ARGS__)

#define bcache_io_error(c, bio, fmt, ...)				\
do {									\
	__bcache_io_error(c, fmt, ##__VA_ARGS__);			\
	(bio)->bi_error = -EIO;						\
} while (0)

#endif /* _BCACHE_ERROR_H */