Home | History | Annotate | Download | only in configd
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 
     22 /*
     23  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
     24  * Use is subject to license terms.
     25  */
     26 
     27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
     28 
     29 /*
     30  * sqlite is not compatible with _FILE_OFFSET_BITS=64, but we need to
     31  * be able to statvfs(2) possibly large systems.  This define gives us
     32  * access to the transitional interfaces.  See lfcompile64(5) for how
     33  * _LARGEFILE64_SOURCE works.
     34  */
     35 #define	_LARGEFILE64_SOURCE
     36 
     37 #include <assert.h>
     38 #include <door.h>
     39 #include <dirent.h>
     40 #include <errno.h>
     41 #include <fcntl.h>
     42 #include <limits.h>
     43 #include <pthread.h>
     44 #include <stdarg.h>
     45 #include <stdio.h>
     46 #include <stdlib.h>
     47 #include <string.h>
     48 #include <sys/stat.h>
     49 #include <sys/statvfs.h>
     50 #include <unistd.h>
     51 #include <zone.h>
     52 #include <libscf_priv.h>
     53 
     54 #include "configd.h"
     55 #include "repcache_protocol.h"
     56 
     57 #include <sqlite.h>
     58 #include <sqlite-misc.h>
     59 
     60 /*
     61  * This file has two purposes:
     62  *
     63  * 1. It contains the database schema, and the code for setting up our backend
     64  *    databases, including installing said schema.
     65  *
     66  * 2. It provides a simplified interface to the SQL database library, and
     67  *    synchronizes MT access to the database.
     68  */
     69 
     70 typedef struct backend_spent {
     71 	uint64_t bs_count;
     72 	hrtime_t bs_time;
     73 	hrtime_t bs_vtime;
     74 } backend_spent_t;
     75 
     76 typedef struct backend_totals {
     77 	backend_spent_t	bt_lock;	/* waiting for lock */
     78 	backend_spent_t	bt_exec;	/* time spent executing SQL */
     79 } backend_totals_t;
     80 
     81 typedef struct sqlite_backend {
     82 	pthread_mutex_t	be_lock;
     83 	pthread_t	be_thread;	/* thread holding lock */
     84 	struct sqlite	*be_db;
     85 	const char	*be_path;	/* path to db */
     86 	int		be_readonly;	/* readonly at start, and still is */
     87 	int		be_writing;	/* held for writing */
     88 	backend_type_t	be_type;	/* type of db */
     89 	hrtime_t	be_lastcheck;	/* time of last read-only check */
     90 	backend_totals_t be_totals[2];	/* one for reading, one for writing */
     91 } sqlite_backend_t;
     92 
     93 struct backend_tx {
     94 	sqlite_backend_t	*bt_be;
     95 	int			bt_readonly;
     96 	int			bt_type;
     97 	int			bt_full;	/* SQLITE_FULL during tx */
     98 };
     99 
    100 #define	UPDATE_TOTALS_WR(sb, writing, field, ts, vts) { \
    101 	backend_spent_t *__bsp = &(sb)->be_totals[!!(writing)].field; \
    102 	__bsp->bs_count++;						\
    103 	__bsp->bs_time += (gethrtime() - ts);				\
    104 	__bsp->bs_vtime += (gethrvtime() - vts);			\
    105 }
    106 
    107 #define	UPDATE_TOTALS(sb, field, ts, vts) \
    108 	UPDATE_TOTALS_WR(sb, (sb)->be_writing, field, ts, vts)
    109 
    110 struct backend_query {
    111 	char	*bq_buf;
    112 	size_t	bq_size;
    113 };
    114 
    115 struct backend_tbl_info {
    116 	const char *bti_name;
    117 	const char *bti_cols;
    118 };
    119 
    120 struct backend_idx_info {
    121 	const char *bxi_tbl;
    122 	const char *bxi_idx;
    123 	const char *bxi_cols;
    124 };
    125 
    126 static pthread_mutex_t backend_panic_lock = PTHREAD_MUTEX_INITIALIZER;
    127 static pthread_cond_t backend_panic_cv = PTHREAD_COND_INITIALIZER;
    128 pthread_t backend_panic_thread = 0;
    129 
    130 int backend_do_trace = 0;		/* invoke tracing callback */
    131 int backend_print_trace = 0;		/* tracing callback prints SQL */
    132 int backend_panic_abort = 0;		/* abort when panicking */
    133 
    134 /* interval between read-only checks while starting up */
    135 #define	BACKEND_READONLY_CHECK_INTERVAL	(2 * (hrtime_t)NANOSEC)
    136 
    137 /*
    138  * Any incompatible change to the below schema should bump the version number.
    139  * The schema has been changed to support value ordering,  but this change
    140  * is backwards-compatible - i.e. a previous svc.configd can use a
    141  * repository database with the new schema perfectly well.  As a result,
    142  * the schema version has not been updated,  allowing downgrade of systems
    143  * without losing repository data.
    144  */
    145 #define	BACKEND_SCHEMA_VERSION		5
    146 
    147 static struct backend_tbl_info tbls_normal[] = { /* BACKEND_TYPE_NORMAL */
    148 	/*
    149 	 * service_tbl holds all services.  svc_id is the identifier of the
    150 	 * service.
    151 	 */
    152 	{
    153 		"service_tbl",
    154 		"svc_id          INTEGER PRIMARY KEY,"
    155 		"svc_name        CHAR(256) NOT NULL"
    156 	},
    157 
    158 	/*
    159 	 * instance_tbl holds all of the instances.  The parent service id
    160 	 * is instance_svc.
    161 	 */
    162 	{
    163 		"instance_tbl",
    164 		"instance_id     INTEGER PRIMARY KEY,"
    165 		"instance_name   CHAR(256) NOT NULL,"
    166 		"instance_svc    INTEGER NOT NULL"
    167 	},
    168 
    169 	/*
    170 	 * snapshot_lnk_tbl links (instance, snapshot name) with snapshots.
    171 	 */
    172 	{
    173 		"snapshot_lnk_tbl",
    174 		"lnk_id          INTEGER PRIMARY KEY,"
    175 		"lnk_inst_id     INTEGER NOT NULL,"
    176 		"lnk_snap_name   CHAR(256) NOT NULL,"
    177 		"lnk_snap_id     INTEGER NOT NULL"
    178 	},
    179 
    180 	/*
    181 	 * snaplevel_tbl maps a snapshot id to a set of named, ordered
    182 	 * snaplevels.
    183 	 */
    184 	{
    185 		"snaplevel_tbl",
    186 		"snap_id                 INTEGER NOT NULL,"
    187 		"snap_level_num          INTEGER NOT NULL,"
    188 		"snap_level_id           INTEGER NOT NULL,"
    189 		"snap_level_service_id   INTEGER NOT NULL,"
    190 		"snap_level_service      CHAR(256) NOT NULL,"
    191 		"snap_level_instance_id  INTEGER NULL,"
    192 		"snap_level_instance     CHAR(256) NULL"
    193 	},
    194 
    195 	/*
    196 	 * snaplevel_lnk_tbl links snaplevels to property groups.
    197 	 * snaplvl_pg_* is identical to the original property group,
    198 	 * and snaplvl_gen_id overrides the generation number.
    199 	 * The service/instance ids are as in the snaplevel.
    200 	 */
    201 	{
    202 		"snaplevel_lnk_tbl",
    203 		"snaplvl_level_id INTEGER NOT NULL,"
    204 		"snaplvl_pg_id    INTEGER NOT NULL,"
    205 		"snaplvl_pg_name  CHAR(256) NOT NULL,"
    206 		"snaplvl_pg_type  CHAR(256) NOT NULL,"
    207 		"snaplvl_pg_flags INTEGER NOT NULL,"
    208 		"snaplvl_gen_id   INTEGER NOT NULL"
    209 	},
    210 
    211 	{ NULL, NULL }
    212 };
    213 
    214 static struct backend_idx_info idxs_normal[] = { /* BACKEND_TYPE_NORMAL */
    215 	{ "service_tbl",	"name",	"svc_name" },
    216 	{ "instance_tbl",	"name",	"instance_svc, instance_name" },
    217 	{ "snapshot_lnk_tbl",	"name",	"lnk_inst_id, lnk_snap_name" },
    218 	{ "snapshot_lnk_tbl",	"snapid", "lnk_snap_id" },
    219 	{ "snaplevel_tbl",	"id",	"snap_id" },
    220 	{ "snaplevel_lnk_tbl",	"id",	"snaplvl_pg_id" },
    221 	{ "snaplevel_lnk_tbl",	"level", "snaplvl_level_id" },
    222 	{ NULL, NULL, NULL }
    223 };
    224 
    225 static struct backend_tbl_info tbls_np[] = { /* BACKEND_TYPE_NONPERSIST */
    226 	{ NULL, NULL }
    227 };
    228 
    229 static struct backend_idx_info idxs_np[] = {	/* BACKEND_TYPE_NONPERSIST */
    230 	{ NULL, NULL, NULL }
    231 };
    232 
    233 static struct backend_tbl_info tbls_common[] = { /* all backend types */
    234 	/*
    235 	 * pg_tbl defines property groups.  They are associated with a single
    236 	 * service or instance.  The pg_gen_id links them with the latest
    237 	 * "edited" version of its properties.
    238 	 */
    239 	{
    240 		"pg_tbl",
    241 		"pg_id           INTEGER PRIMARY KEY,"
    242 		"pg_parent_id    INTEGER NOT NULL,"
    243 		"pg_name         CHAR(256) NOT NULL,"
    244 		"pg_type         CHAR(256) NOT NULL,"
    245 		"pg_flags        INTEGER NOT NULL,"
    246 		"pg_gen_id       INTEGER NOT NULL"
    247 	},
    248 
    249 	/*
    250 	 * prop_lnk_tbl links a particular pg_id and gen_id to a set of
    251 	 * (prop_name, prop_type, val_id) trios.
    252 	 */
    253 	{
    254 		"prop_lnk_tbl",
    255 		"lnk_prop_id     INTEGER PRIMARY KEY,"
    256 		"lnk_pg_id       INTEGER NOT NULL,"
    257 		"lnk_gen_id      INTEGER NOT NULL,"
    258 		"lnk_prop_name   CHAR(256) NOT NULL,"
    259 		"lnk_prop_type   CHAR(2) NOT NULL,"
    260 		"lnk_val_id      INTEGER"
    261 	},
    262 
    263 	/*
    264 	 * value_tbl maps a value_id to a set of values.  For any given
    265 	 * value_id, value_type is constant.  The table definition here
    266 	 * is repeated in backend_check_upgrade(),  and must be kept in-sync.
    267 	 */
    268 	{
    269 		"value_tbl",
    270 		"value_id        INTEGER NOT NULL,"
    271 		"value_type      CHAR(1) NOT NULL,"
    272 		"value_value     VARCHAR NOT NULL,"
    273 		"value_order     INTEGER DEFAULT 0"
    274 	},
    275 
    276 	/*
    277 	 * id_tbl has one row per id space
    278 	 */
    279 	{
    280 		"id_tbl",
    281 		"id_name         STRING NOT NULL,"
    282 		"id_next         INTEGER NOT NULL"
    283 	},
    284 
    285 	/*
    286 	 * schema_version has a single row, which contains
    287 	 * BACKEND_SCHEMA_VERSION at the time of creation.
    288 	 */
    289 	{
    290 		"schema_version",
    291 		"schema_version  INTEGER"
    292 	},
    293 	{ NULL, NULL }
    294 };
    295 
    296 /*
    297  * The indexing of value_tbl is repeated in backend_check_upgrade() and
    298  * must be kept in sync with the indexing specification here.
    299  */
    300 static struct backend_idx_info idxs_common[] = { /* all backend types */
    301 	{ "pg_tbl",		"parent", "pg_parent_id" },
    302 	{ "pg_tbl",		"name",	"pg_parent_id, pg_name" },
    303 	{ "pg_tbl",		"type",	"pg_parent_id, pg_type" },
    304 	{ "prop_lnk_tbl",	"base",	"lnk_pg_id, lnk_gen_id" },
    305 	{ "prop_lnk_tbl",	"val",	"lnk_val_id" },
    306 	{ "value_tbl",		"id",	"value_id" },
    307 	{ "id_tbl",		"id",	"id_name" },
    308 	{ NULL, NULL, NULL }
    309 };
    310 
    311 struct run_single_int_info {
    312 	uint32_t	*rs_out;
    313 	int		rs_result;
    314 };
    315 
    316 /*ARGSUSED*/
    317 static int
    318 run_single_int_callback(void *arg, int columns, char **vals, char **names)
    319 {
    320 	struct run_single_int_info *info = arg;
    321 	uint32_t val;
    322 
    323 	char *endptr = vals[0];
    324 
    325 	assert(info->rs_result != REP_PROTOCOL_SUCCESS);
    326 	assert(columns == 1);
    327 
    328 	if (vals[0] == NULL)
    329 		return (BACKEND_CALLBACK_CONTINUE);
    330 
    331 	errno = 0;
    332 	val = strtoul(vals[0], &endptr, 10);
    333 	if ((val == 0 && endptr == vals[0]) || *endptr != 0 || errno != 0)
    334 		backend_panic("malformed integer \"%20s\"", vals[0]);
    335 
    336 	*info->rs_out = val;
    337 	info->rs_result = REP_PROTOCOL_SUCCESS;
    338 	return (BACKEND_CALLBACK_CONTINUE);
    339 }
    340 
    341 /*ARGSUSED*/
    342 int
    343 backend_fail_if_seen(void *arg, int columns, char **vals, char **names)
    344 {
    345 	return (BACKEND_CALLBACK_ABORT);
    346 }
    347 
    348 /*
    349  * check to see if we can successfully start a transaction;  if not, the
    350  * filesystem is mounted read-only.
    351  */
    352 static int
    353 backend_is_readonly(struct sqlite *db, const char *path)
    354 {
    355 	int r;
    356 	statvfs64_t stat;
    357 
    358 	if (statvfs64(path, &stat) == 0 && (stat.f_flag & ST_RDONLY))
    359 		return (SQLITE_READONLY);
    360 
    361 	r = sqlite_exec(db,
    362 	    "BEGIN TRANSACTION; "
    363 	    "UPDATE schema_version SET schema_version = schema_version; ",
    364 	    NULL, NULL, NULL);
    365 	(void) sqlite_exec(db, "ROLLBACK TRANSACTION", NULL, NULL, NULL);
    366 	return (r);
    367 }
    368 
    369 static void
    370 backend_trace_sql(void *arg, const char *sql)
    371 {
    372 	sqlite_backend_t *be = arg;
    373 
    374 	if (backend_print_trace) {
    375 		(void) fprintf(stderr, "%d: %s\n", be->be_type, sql);
    376 	}
    377 }
    378 
    379 static sqlite_backend_t be_info[BACKEND_TYPE_TOTAL];
    380 static sqlite_backend_t *bes[BACKEND_TYPE_TOTAL];
    381 
    382 /*
    383  * For a native build,  repositories are created from scratch, so upgrade
    384  * is not an issue.  This variable is implicitly protected by
    385  * bes[BACKEND_TYPE_NORMAL]->be_lock.
    386  */
    387 #ifdef NATIVE_BUILD
    388 static boolean_t be_normal_upgraded = B_TRUE;
    389 #else
    390 static boolean_t be_normal_upgraded = B_FALSE;
    391 #endif	/* NATIVE_BUILD */
    392 
    393 /*
    394  * Has backend been upgraded? In nonpersistent case, answer is always
    395  * yes.
    396  */
    397 boolean_t
    398 backend_is_upgraded(backend_tx_t *bt)
    399 {
    400 	if (bt->bt_type == BACKEND_TYPE_NONPERSIST)
    401 		return (B_TRUE);
    402 	return (be_normal_upgraded);
    403 }
    404 
    405 #define	BACKEND_PANIC_TIMEOUT	(50 * MILLISEC)
    406 /*
    407  * backend_panic() -- some kind of database problem or corruption has been hit.
    408  * We attempt to quiesce the other database users -- all of the backend sql
    409  * entry points will call backend_panic(NULL) if a panic is in progress, as
    410  * will any attempt to start a transaction.
    411  *
    412  * We give threads holding a backend lock 50ms (BACKEND_PANIC_TIMEOUT) to
    413  * either drop the lock or call backend_panic().  If they don't respond in
    414  * time, we'll just exit anyway.
    415  */
    416 void
    417 backend_panic(const char *format, ...)
    418 {
    419 	int i;
    420 	va_list args;
    421 	int failed = 0;
    422 
    423 	(void) pthread_mutex_lock(&backend_panic_lock);
    424 	if (backend_panic_thread != 0) {
    425 		(void) pthread_mutex_unlock(&backend_panic_lock);
    426 		/*
    427 		 * first, drop any backend locks we're holding, then
    428 		 * sleep forever on the panic_cv.
    429 		 */
    430 		for (i = 0; i < BACKEND_TYPE_TOTAL; i++) {
    431 			if (bes[i] != NULL &&
    432 			    bes[i]->be_thread == pthread_self())
    433 				(void) pthread_mutex_unlock(&bes[i]->be_lock);
    434 		}
    435 		(void) pthread_mutex_lock(&backend_panic_lock);
    436 		for (;;)
    437 			(void) pthread_cond_wait(&backend_panic_cv,
    438 			    &backend_panic_lock);
    439 	}
    440 	backend_panic_thread = pthread_self();
    441 	(void) pthread_mutex_unlock(&backend_panic_lock);
    442 
    443 	for (i = 0; i < BACKEND_TYPE_TOTAL; i++) {
    444 		if (bes[i] != NULL && bes[i]->be_thread == pthread_self())
    445 			(void) pthread_mutex_unlock(&bes[i]->be_lock);
    446 	}
    447 
    448 	va_start(args, format);
    449 	configd_vcritical(format, args);
    450 	va_end(args);
    451 
    452 	for (i = 0; i < BACKEND_TYPE_TOTAL; i++) {
    453 		timespec_t rel;
    454 
    455 		rel.tv_sec = 0;
    456 		rel.tv_nsec = BACKEND_PANIC_TIMEOUT;
    457 
    458 		if (bes[i] != NULL && bes[i]->be_thread != pthread_self()) {
    459 			if (pthread_mutex_reltimedlock_np(&bes[i]->be_lock,
    460 			    &rel) != 0)
    461 				failed++;
    462 		}
    463 	}
    464 	if (failed) {
    465 		configd_critical("unable to quiesce database\n");
    466 	}
    467 
    468 	if (backend_panic_abort)
    469 		abort();
    470 
    471 	exit(CONFIGD_EXIT_DATABASE_BAD);
    472 }
    473 
    474 /*
    475  * Returns
    476  *   _SUCCESS
    477  *   _DONE - callback aborted query
    478  *   _NO_RESOURCES - out of memory (_FULL & _TOOBIG?)
    479  */
    480 static int
    481 backend_error(sqlite_backend_t *be, int error, char *errmsg)
    482 {
    483 	if (error == SQLITE_OK)
    484 		return (REP_PROTOCOL_SUCCESS);
    485 
    486 	switch (error) {
    487 	case SQLITE_ABORT:
    488 		free(errmsg);
    489 		return (REP_PROTOCOL_DONE);
    490 
    491 	case SQLITE_NOMEM:
    492 	case SQLITE_FULL:
    493 	case SQLITE_TOOBIG:
    494 		free(errmsg);
    495 		return (REP_PROTOCOL_FAIL_NO_RESOURCES);
    496 
    497 	default:
    498 		backend_panic("%s: db error: %s", be->be_path, errmsg);
    499 		/*NOTREACHED*/
    500 	}
    501 }
    502 
    503 static void
    504 backend_backup_cleanup(const char **out_arg, ssize_t out_sz)
    505 {
    506 	char **out = (char **)out_arg;
    507 
    508 	while (out_sz-- > 0)
    509 		free(*out++);
    510 	free(out_arg);
    511 }
    512 
    513 /*
    514  * builds a inverse-time-sorted array of backup files.  The path is a
    515  * a single buffer, and the pointers look like:
    516  *
    517  *	/this/is/a/full/path/to/repository-name-YYYYMMDDHHMMSS
    518  *	^pathname		^	       ^(pathname+pathlen)
    519  *				basename
    520  *
    521  * dirname will either be pathname, or ".".
    522  *
    523  * Returns the number of elements in the array, 0 if there are no previous
    524  * backups, or -1 on error.
    525  */
    526 static ssize_t
    527 backend_backup_get_prev(char *pathname, size_t pathlen, const char ***out_arg)
    528 {
    529 	char b_start, b_end;
    530 	DIR *dir;
    531 	char **out = NULL;
    532 	char *name, *p;
    533 	char *dirname, *basename;
    534 	char *pathend;
    535 	struct dirent *ent;
    536 
    537 	size_t count = 0;
    538 	size_t baselen;
    539 
    540 	/*
    541 	 * year, month, day, hour, min, sec, plus an '_'.
    542 	 */
    543 	const size_t ndigits = 4 + 5*2 + 1;
    544 	const size_t baroffset = 4 + 2*2;
    545 
    546 	size_t idx;
    547 
    548 	pathend = pathname + pathlen;
    549 	b_end = *pathend;
    550 	*pathend = '\0';
    551 
    552 	basename = strrchr(pathname, '/');
    553 
    554 	if (basename != NULL) {
    555 		assert(pathend > pathname && basename < pathend);
    556 		basename++;
    557 		dirname = pathname;
    558 	} else {
    559 		basename = pathname;
    560 		dirname = ".";
    561 	}
    562 
    563 	baselen = strlen(basename);
    564 
    565 	/*
    566 	 * munge the string temporarily for the opendir(), then restore it.
    567 	 */
    568 	b_start = basename[0];
    569 
    570 	basename[0] = '\0';
    571 	dir = opendir(dirname);
    572 	basename[0] = b_start;		/* restore path */
    573 
    574 	if (dir == NULL)
    575 		goto fail;
    576 
    577 
    578 	while ((ent = readdir(dir)) != NULL) {
    579 		/*
    580 		 * Must match:
    581 		 *	basename-YYYYMMDD_HHMMSS
    582 		 * or we ignore it.
    583 		 */
    584 		if (strncmp(ent->d_name, basename, baselen) != 0)
    585 			continue;
    586 
    587 		name = ent->d_name;
    588 		if (name[baselen] != '-')
    589 			continue;
    590 
    591 		p = name + baselen + 1;
    592 
    593 		for (idx = 0; idx < ndigits; idx++) {
    594 			char c = p[idx];
    595 			if (idx == baroffset && c != '_')
    596 				break;
    597 			if (idx != baroffset && (c < '0' || c > '9'))
    598 				break;
    599 		}
    600 		if (idx != ndigits || p[idx] != '\0')
    601 			continue;
    602 
    603 		/*
    604 		 * We have a match.  insertion-sort it into our list.
    605 		 */
    606 		name = strdup(name);
    607 		if (name == NULL)
    608 			goto fail_closedir;
    609 		p = strrchr(name, '-');
    610 
    611 		for (idx = 0; idx < count; idx++) {
    612 			char *tmp = out[idx];
    613 			char *tp = strrchr(tmp, '-');
    614 
    615 			int cmp = strcmp(p, tp);
    616 			if (cmp == 0)
    617 				cmp = strcmp(name, tmp);
    618 
    619 			if (cmp == 0) {
    620 				free(name);
    621 				name = NULL;
    622 				break;
    623 			} else if (cmp > 0) {
    624 				out[idx] = name;
    625 				name = tmp;
    626 				p = tp;
    627 			}
    628 		}
    629 
    630 		if (idx == count) {
    631 			char **new_out = realloc(out,
    632 			    (count + 1) * sizeof (*out));
    633 
    634 			if (new_out == NULL) {
    635 				free(name);
    636 				goto fail_closedir;
    637 			}
    638 
    639 			out = new_out;
    640 			out[count++] = name;
    641 		} else {
    642 			assert(name == NULL);
    643 		}
    644 	}
    645 	(void) closedir(dir);
    646 
    647 	basename[baselen] = b_end;
    648 
    649 	*out_arg = (const char **)out;
    650 	return (count);
    651 
    652 fail_closedir:
    653 	(void) closedir(dir);
    654 fail:
    655 	basename[0] = b_start;
    656 	*pathend = b_end;
    657 
    658 	backend_backup_cleanup((const char **)out, count);
    659 
    660 	*out_arg = NULL;
    661 	return (-1);
    662 }
    663 
    664 /*
    665  * Copies the repository path into out, a buffer of out_len bytes,
    666  * removes the ".db" (or whatever) extension, and, if name is non-NULL,
    667  * appends "-name" to it.  If name is non-NULL, it can fail with:
    668  *
    669  *	_TRUNCATED	will not fit in buffer.
    670  *	_BAD_REQUEST	name is not a valid identifier
    671  */
    672 static rep_protocol_responseid_t
    673 backend_backup_base(sqlite_backend_t *be, const char *name,
    674     char *out, size_t out_len)
    675 {
    676 	char *p, *q;
    677 	size_t len;
    678 
    679 	/*
    680 	 * for paths of the form /path/to/foo.db, we truncate at the final
    681 	 * '.'.
    682 	 */
    683 	(void) strlcpy(out, be->be_path, out_len);
    684 
    685 	p = strrchr(out, '/');
    686 	q = strrchr(out, '.');
    687 
    688 	if (p != NULL && q != NULL && q > p)
    689 		*q = 0;
    690 
    691 	if (name != NULL) {
    692 		len = strlen(out);
    693 		assert(len < out_len);
    694 
    695 		out += len;
    696 		out_len -= len;
    697 
    698 		len = strlen(name);
    699 
    700 		/*
    701 		 * verify that the name tag is entirely alphabetic,
    702 		 * non-empty, and not too long.
    703 		 */
    704 		if (len == 0 || len >= REP_PROTOCOL_NAME_LEN ||
    705 		    uu_check_name(name, UU_NAME_DOMAIN) < 0)
    706 			return (REP_PROTOCOL_FAIL_BAD_REQUEST);
    707 
    708 		if (snprintf(out, out_len, "-%s", name) >= out_len)
    709 			return (REP_PROTOCOL_FAIL_TRUNCATED);
    710 	}
    711 
    712 	return (REP_PROTOCOL_SUCCESS);
    713 }
    714 
    715 /*
    716  * See if a backup is needed.  We do a backup unless both files are
    717  * byte-for-byte identical.
    718  */
    719 static int
    720 backend_check_backup_needed(const char *rep_name, const char *backup_name)
    721 {
    722 	int repfd = open(rep_name, O_RDONLY);
    723 	int fd = open(backup_name, O_RDONLY);
    724 	struct stat s_rep, s_backup;
    725 	int c1, c2;
    726 
    727 	FILE *f_rep = NULL;
    728 	FILE *f_backup = NULL;
    729 
    730 	if (repfd < 0 || fd < 0)
    731 		goto fail;
    732 
    733 	if (fstat(repfd, &s_rep) < 0 || fstat(fd, &s_backup) < 0)
    734 		goto fail;
    735 
    736 	/*
    737 	 * if they are the same file, we need to do a backup to break the
    738 	 * hard link or symlink involved.
    739 	 */
    740 	if (s_rep.st_ino == s_backup.st_ino && s_rep.st_dev == s_backup.st_dev)
    741 		goto fail;
    742 
    743 	if (s_rep.st_size != s_backup.st_size)
    744 		goto fail;
    745 
    746 	if ((f_rep = fdopen(repfd, "r")) == NULL ||
    747 	    (f_backup = fdopen(fd, "r")) == NULL)
    748 		goto fail;
    749 
    750 	do {
    751 		c1 = getc(f_rep);
    752 		c2 = getc(f_backup);
    753 		if (c1 != c2)
    754 			goto fail;
    755 	} while (c1 != EOF);
    756 
    757 	if (!ferror(f_rep) && !ferror(f_backup)) {
    758 		(void) fclose(f_rep);
    759 		(void) fclose(f_backup);
    760 		(void) close(repfd);
    761 		(void) close(fd);
    762 		return (0);
    763 	}
    764 
    765 fail:
    766 	if (f_rep != NULL)
    767 		(void) fclose(f_rep);
    768 	if (f_backup != NULL)
    769 		(void) fclose(f_backup);
    770 	if (repfd >= 0)
    771 		(void) close(repfd);
    772 	if (fd >= 0)
    773 		(void) close(fd);
    774 	return (1);
    775 }
    776 
    777 /*
    778  * This interface is called to perform the actual copy
    779  *
    780  * Return:
    781  *	_FAIL_UNKNOWN		read/write fails
    782  *	_FAIL_NO_RESOURCES	out of memory
    783  *	_SUCCESS		copy succeeds
    784  */
    785 static rep_protocol_responseid_t
    786 backend_do_copy(const char *src, int srcfd, const char *dst,
    787     int dstfd, size_t *sz)
    788 {
    789 	char *buf;
    790 	off_t nrd, nwr, n, r_off = 0, w_off = 0;
    791 
    792 	if ((buf = malloc(8192)) == NULL)
    793 		return (REP_PROTOCOL_FAIL_NO_RESOURCES);
    794 
    795 	while ((nrd = read(srcfd, buf, 8192)) != 0) {
    796 		if (nrd < 0) {
    797 			if (errno == EINTR)
    798 				continue;
    799 
    800 			configd_critical(
    801 			    "Backend copy failed: fails to read from %s "
    802 			    "at offset %d: %s\n", src, r_off, strerror(errno));
    803 			free(buf);
    804 			return (REP_PROTOCOL_FAIL_UNKNOWN);
    805 		}
    806 
    807 		r_off += nrd;
    808 
    809 		nwr = 0;
    810 		do {
    811 			if ((n = write(dstfd, &buf[nwr], nrd - nwr)) < 0) {
    812 				if (errno == EINTR)
    813 					continue;
    814 
    815 				configd_critical(
    816 				    "Backend copy failed: fails to write to %s "
    817 				    "at offset %d: %s\n", dst, w_off,
    818 				    strerror(errno));
    819 				free(buf);
    820 				return (REP_PROTOCOL_FAIL_UNKNOWN);
    821 			}
    822 
    823 			nwr += n;
    824 			w_off += n;
    825 
    826 		} while (nwr < nrd);
    827 	}
    828 
    829 	if (sz)
    830 		*sz = w_off;
    831 
    832 	free(buf);
    833 	return (REP_PROTOCOL_SUCCESS);
    834 }
    835 
    836 /*
    837  * Can return:
    838  *	_BAD_REQUEST		name is not valid
    839  *	_TRUNCATED		name is too long for current repository path
    840  *	_UNKNOWN		failed for unknown reason (details written to
    841  *				console)
    842  *	_BACKEND_READONLY	backend is not writable
    843  *	_NO_RESOURCES		out of memory
    844  *	_SUCCESS		Backup completed successfully.
    845  */
    846 static rep_protocol_responseid_t
    847 backend_create_backup_locked(sqlite_backend_t *be, const char *name)
    848 {
    849 	const char **old_list;
    850 	ssize_t old_sz;
    851 	ssize_t old_max = max_repository_backups;
    852 	ssize_t cur;
    853 	char *finalname;
    854 	char *finalpath;
    855 	char *tmppath;
    856 	int infd, outfd;
    857 	size_t len;
    858 	time_t now;
    859 	struct tm now_tm;
    860 	rep_protocol_responseid_t result;
    861 
    862 	if ((finalpath = malloc(PATH_MAX)) == NULL)
    863 		return (REP_PROTOCOL_FAIL_NO_RESOURCES);
    864 
    865 	if ((tmppath = malloc(PATH_MAX)) == NULL) {
    866 		free(finalpath);
    867 		return (REP_PROTOCOL_FAIL_NO_RESOURCES);
    868 	}
    869 
    870 	if (be->be_readonly) {
    871 		result = REP_PROTOCOL_FAIL_BACKEND_READONLY;
    872 		goto out;
    873 	}
    874 
    875 	result = backend_backup_base(be, name, finalpath, PATH_MAX);
    876 	if (result != REP_PROTOCOL_SUCCESS)
    877 		goto out;
    878 
    879 	if (!backend_check_backup_needed(be->be_path, finalpath)) {
    880 		result = REP_PROTOCOL_SUCCESS;
    881 		goto out;
    882 	}
    883 
    884 	/*
    885 	 * remember the original length, and the basename location
    886 	 */
    887 	len = strlen(finalpath);
    888 	finalname = strrchr(finalpath, '/');
    889 	if (finalname != NULL)
    890 		finalname++;
    891 	else
    892 		finalname = finalpath;
    893 
    894 	(void) strlcpy(tmppath, finalpath, PATH_MAX);
    895 	if (strlcat(tmppath, "-tmpXXXXXX", PATH_MAX) >= PATH_MAX) {
    896 		result = REP_PROTOCOL_FAIL_TRUNCATED;
    897 		goto out;
    898 	}
    899 
    900 	now = time(NULL);
    901 	if (localtime_r(&now, &now_tm) == NULL) {
    902 		configd_critical(
    903 		    "\"%s\" backup failed: localtime(3C) failed: %s\n", name,
    904 		    be->be_path, strerror(errno));
    905 		result = REP_PROTOCOL_FAIL_UNKNOWN;
    906 		goto out;
    907 	}
    908 
    909 	if (strftime(finalpath + len, PATH_MAX - len,
    910 	    "-%Y""%m""%d""_""%H""%M""%S", &now_tm) >= PATH_MAX - len) {
    911 		result = REP_PROTOCOL_FAIL_TRUNCATED;
    912 		goto out;
    913 	}
    914 
    915 	infd = open(be->be_path, O_RDONLY);
    916 	if (infd < 0) {
    917 		configd_critical("\"%s\" backup failed: opening %s: %s\n", name,
    918 		    be->be_path, strerror(errno));
    919 		result = REP_PROTOCOL_FAIL_UNKNOWN;
    920 		goto out;
    921 	}
    922 
    923 	outfd = mkstemp(tmppath);
    924 	if (outfd < 0) {
    925 		configd_critical("\"%s\" backup failed: mkstemp(%s): %s\n",
    926 		    name, tmppath, strerror(errno));
    927 		(void) close(infd);
    928 		result = REP_PROTOCOL_FAIL_UNKNOWN;
    929 		goto out;
    930 	}
    931 
    932 	if ((result = backend_do_copy((const char *)be->be_path, infd,
    933 	    (const char *)tmppath, outfd, NULL)) != REP_PROTOCOL_SUCCESS)
    934 		goto fail;
    935 
    936 	/*
    937 	 * grab the old list before doing our re-name.
    938 	 */
    939 	if (old_max > 0)
    940 		old_sz = backend_backup_get_prev(finalpath, len, &old_list);
    941 
    942 	if (rename(tmppath, finalpath) < 0) {
    943 		configd_critical(
    944 		    "\"%s\" backup failed: rename(%s, %s): %s\n",
    945 		    name, tmppath, finalpath, strerror(errno));
    946 		result = REP_PROTOCOL_FAIL_UNKNOWN;
    947 		goto fail;
    948 	}
    949 
    950 	tmppath[len] = 0;	/* strip -XXXXXX, for reference symlink */
    951 
    952 	(void) unlink(tmppath);
    953 	if (symlink(finalname, tmppath) < 0) {
    954 		configd_critical(
    955 		    "\"%s\" backup completed, but updating "
    956 		    "\"%s\" symlink to \"%s\" failed: %s\n",
    957 		    name, tmppath, finalname, strerror(errno));
    958 	}
    959 
    960 	if (old_max > 0 && old_sz > 0) {
    961 		/* unlink all but the first (old_max - 1) files */
    962 		for (cur = old_max - 1; cur < old_sz; cur++) {
    963 			(void) strlcpy(finalname, old_list[cur],
    964 			    PATH_MAX - (finalname - finalpath));
    965 			if (unlink(finalpath) < 0)
    966 				configd_critical(
    967 				    "\"%s\" backup completed, but removing old "
    968 				    "file \"%s\" failed: %s\n",
    969 				    name, finalpath, strerror(errno));
    970 		}
    971 
    972 		backend_backup_cleanup(old_list, old_sz);
    973 	}
    974 
    975 	result = REP_PROTOCOL_SUCCESS;
    976 
    977 fail:
    978 	(void) close(infd);
    979 	(void) close(outfd);
    980 	if (result != REP_PROTOCOL_SUCCESS)
    981 		(void) unlink(tmppath);
    982 
    983 out:
    984 	free(finalpath);
    985 	free(tmppath);
    986 
    987 	return (result);
    988 }
    989 
    990 /*
    991  * Check if value_tbl has been upgraded in the main database,  and
    992  * if not (if the value_order column is not present),  and do_upgrade is true,
    993  * upgrade value_tbl in repository to contain the additional value_order
    994  * column. The version of sqlite used means ALTER TABLE is not
    995  * available, so we cannot simply use "ALTER TABLE value_tbl ADD COLUMN".
    996  * Rather we need to create a temporary table with the additional column,
    997  * import the value_tbl, drop the original value_tbl, recreate the value_tbl
    998  * with the additional column, import the values from value_tbl_tmp,
    999  * reindex and finally drop value_tbl_tmp.  During boot, we wish to check
   1000  * if the repository has been upgraded before it is writable,  so that
   1001  * property value retrieval can use the appropriate form of the SELECT
   1002  * statement that retrieves property values.  As a result, we need to check
   1003  * if the repository has been upgraded prior to the point when we can
   1004  * actually carry out the update.
   1005  */
   1006 void
   1007 backend_check_upgrade(sqlite_backend_t *be, boolean_t do_upgrade)
   1008 {
   1009 	char *errp;
   1010 	int r;
   1011 
   1012 	if (be_normal_upgraded)
   1013 		return;
   1014 	/*
   1015 	 * Test if upgrade is needed. If value_order column does not exist,
   1016 	 * we need to upgrade the schema.
   1017 	 */
   1018 	r = sqlite_exec(be->be_db, "SELECT value_order FROM value_tbl LIMIT 1;",
   1019 	    NULL, NULL, NULL);
   1020 	if (r == SQLITE_ERROR && do_upgrade) {
   1021 		/* No value_order column - needs upgrade */
   1022 		configd_info("Upgrading SMF repository format...");
   1023 		r = sqlite_exec(be->be_db,
   1024 		    "BEGIN TRANSACTION; "
   1025 		    "CREATE TABLE value_tbl_tmp ( "
   1026 		    "value_id   INTEGER NOT NULL, "
   1027 		    "value_type CHAR(1) NOT NULL, "
   1028 		    "value_value VARCHAR NOT NULL, "
   1029 		    "value_order INTEGER DEFAULT 0); "
   1030 		    "INSERT INTO value_tbl_tmp "
   1031 		    "(value_id, value_type, value_value) "
   1032 		    "SELECT value_id, value_type, value_value FROM value_tbl; "
   1033 		    "DROP TABLE value_tbl; "
   1034 		    "CREATE TABLE value_tbl( "
   1035 		    "value_id   INTEGER NOT NULL, "
   1036 		    "value_type CHAR(1) NOT NULL, "
   1037 		    "value_value VARCHAR NOT NULL, "
   1038 		    "value_order INTEGER DEFAULT 0); "
   1039 		    "INSERT INTO value_tbl SELECT * FROM value_tbl_tmp; "
   1040 		    "CREATE INDEX value_tbl_id ON value_tbl (value_id); "
   1041 		    "DROP TABLE value_tbl_tmp; "
   1042 		    "COMMIT TRANSACTION; "
   1043 		    "VACUUM; ",
   1044 		    NULL, NULL, &errp);
   1045 		if (r == SQLITE_OK) {
   1046 			configd_info("SMF repository upgrade is complete.");
   1047 		} else {
   1048 			backend_panic("%s: repository upgrade failed: %s",
   1049 			    be->be_path, errp);
   1050 			/* NOTREACHED */
   1051 		}
   1052 	}
   1053 	if (r == SQLITE_OK)
   1054 		be_normal_upgraded = B_TRUE;
   1055 	else
   1056 		be_normal_upgraded = B_FALSE;
   1057 }
   1058 
   1059 static int
   1060 backend_check_readonly(sqlite_backend_t *be, int writing, hrtime_t t)
   1061 {
   1062 	char *errp;
   1063 	struct sqlite *new;
   1064 	int r;
   1065 
   1066 	assert(be->be_readonly);
   1067 	assert(be == bes[BACKEND_TYPE_NORMAL]);
   1068 
   1069 	/*
   1070 	 * If we don't *need* to be writable, only check every once in a
   1071 	 * while.
   1072 	 */
   1073 	if (!writing) {
   1074 		if ((uint64_t)(t - be->be_lastcheck) <
   1075 		    BACKEND_READONLY_CHECK_INTERVAL)
   1076 			return (REP_PROTOCOL_SUCCESS);
   1077 		be->be_lastcheck = t;
   1078 	}
   1079 
   1080 	new = sqlite_open(be->be_path, 0600, &errp);
   1081 	if (new == NULL) {
   1082 		backend_panic("reopening %s: %s\n", be->be_path, errp);
   1083 		/*NOTREACHED*/
   1084 	}
   1085 	r = backend_is_readonly(new, be->be_path);
   1086 
   1087 	if (r != SQLITE_OK) {
   1088 		sqlite_close(new);
   1089 		if (writing)
   1090 			return (REP_PROTOCOL_FAIL_BACKEND_READONLY);
   1091 		return (REP_PROTOCOL_SUCCESS);
   1092 	}
   1093 
   1094 	/*
   1095 	 * We can write!  Swap the db handles, mark ourself writable,
   1096 	 * upgrade if necessary,  and make a backup.
   1097 	 */
   1098 	sqlite_close(be->be_db);
   1099 	be->be_db = new;
   1100 	be->be_readonly = 0;
   1101 
   1102 	if (be->be_type == BACKEND_TYPE_NORMAL)
   1103 		backend_check_upgrade(be, B_TRUE);
   1104 
   1105 	if (backend_create_backup_locked(be, REPOSITORY_BOOT_BACKUP) !=
   1106 	    REP_PROTOCOL_SUCCESS) {
   1107 		configd_critical(
   1108 		    "unable to create \"%s\" backup of \"%s\"\n",
   1109 		    REPOSITORY_BOOT_BACKUP, be->be_path);
   1110 	}
   1111 
   1112 	return (REP_PROTOCOL_SUCCESS);
   1113 }
   1114 
   1115 /*
   1116  * If t is not BACKEND_TYPE_NORMAL, can fail with
   1117  *   _BACKEND_ACCESS - backend does not exist
   1118  *
   1119  * If writing is nonzero, can also fail with
   1120  *   _BACKEND_READONLY - backend is read-only
   1121  */
   1122 static int
   1123 backend_lock(backend_type_t t, int writing, sqlite_backend_t **bep)
   1124 {
   1125 	sqlite_backend_t *be = NULL;
   1126 	hrtime_t ts, vts;
   1127 
   1128 	*bep = NULL;
   1129 
   1130 	assert(t == BACKEND_TYPE_NORMAL ||
   1131 	    t == BACKEND_TYPE_NONPERSIST);
   1132 
   1133 	be = bes[t];
   1134 	if (t == BACKEND_TYPE_NORMAL)
   1135 		assert(be != NULL);		/* should always be there */
   1136 
   1137 	if (be == NULL)
   1138 		return (REP_PROTOCOL_FAIL_BACKEND_ACCESS);
   1139 
   1140 	if (backend_panic_thread != 0)
   1141 		backend_panic(NULL);		/* don't proceed */
   1142 
   1143 	ts = gethrtime();
   1144 	vts = gethrvtime();
   1145 	(void) pthread_mutex_lock(&be->be_lock);
   1146 	UPDATE_TOTALS_WR(be, writing, bt_lock, ts, vts);
   1147 
   1148 	if (backend_panic_thread != 0) {
   1149 		(void) pthread_mutex_unlock(&be->be_lock);
   1150 		backend_panic(NULL);		/* don't proceed */
   1151 	}
   1152 	be->be_thread = pthread_self();
   1153 
   1154 	if (be->be_readonly) {
   1155 		int r;
   1156 		assert(t == BACKEND_TYPE_NORMAL);
   1157 
   1158 		r = backend_check_readonly(be, writing, ts);
   1159 		if (r != REP_PROTOCOL_SUCCESS) {
   1160 			be->be_thread = 0;
   1161 			(void) pthread_mutex_unlock(&be->be_lock);
   1162 			return (r);
   1163 		}
   1164 	}