// file $Id: gdbmtext.c 9 2005-05-25 20:52:23Z basile $
/// on most Linux system compile it with
///  gcc -Wall -O -g  gdbmtext.c -o gdbmtext -lgdbm

/* Copyright (C) 2005 by Basile STARYNKEVITCH 
   <basile@starynkevitch.net>  France (92340 Bourg La Reine)

    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 2 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program; if not, write to the Free Software
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
    02111-1307 USA or get it from http://www.gnu.org/licenses/

________________________________________________________________

  This small utility dump or load GDBM files in textual format, where
  the datum are represented using hex-encoding for funny characters
  (non-printable, or space, quote, double quote, percent, hash) like
  in URLs eg %0a for character newline (0xa hex, 10 decimal), etc...

  When dumping a GDBM file, the keys are sorted, hence the dump don't
  depend on the order of insertions, and a small change in the GDM
  associations should give a small change of the dumped textual file.

  When loading a GDBM file, lines starting with # are skipped


 */

#define _GNU_SOURCE

#include <stdio.h>
#include <stdlib.h>
#include <errno.h>
#include <gdbm.h>
#include <ctype.h>
#include <string.h>


datum
indatum (char **ppc)
{
  char *pc, *wp, *start;
  char xbuf[4];
  datum res = { 0, 0 };
  int siz;
  pc = *ppc;
  while (*pc && isspace (*pc))
    pc++;
  start = pc;
  for (siz = 0; *pc && !isspace (*pc); pc++, siz++) {
    if (*pc == '%' && isxdigit (pc[1]) && isxdigit (pc[2]))
      pc += 2;
  };
  wp = res.dptr = calloc (siz, 1);
  if (!wp)
    return res;
  pc = start;
  for (siz = 0; *pc && !isspace (*pc); pc++, siz++) {
    if (*pc == '%' && isxdigit (pc[1]) && isxdigit (pc[2])) {
      xbuf[0] = pc[1];
      xbuf[1] = pc[2];
      xbuf[2] = xbuf[3] = 0;
      *(wp++) = (char) strtol (xbuf, 0, 16);
    } else
      *(wp++) = *pc;
  };
  *ppc = pc;
  res.dsize = siz;
  return res;
}

void
outdatum (FILE * f, datum d)
{
  char *pc, *end;
  pc = d.dptr;
  if (!pc)
    return;
  end = pc + d.dsize;
  for (; pc < end; pc++) {
    char c = *pc;
    if (c > ' ' && c <= (char) 126 && c != '%' && c != '\'' && c != '\"'
	&& c != '#')
      putc (c, f);
    else
      fprintf (f, "%%%02x", c);
  }
}

static int
cmpdatum (const void *p1, const void *p2)
{
  const datum *k1 = p1;
  const datum *k2 = p2;
  int s1 = k1->dsize;
  int s2 = k2->dsize;
  int cmp;
  if (s1 == 0)
    return (s2 == 0) ? 0 : 1;
  if (s2 == 0)
    return -1;
  if (s1 < s2) {
    cmp = memcmp (k1->dptr, k2->dptr, s1);
    if (!cmp)
      return -1;
    return cmp;
  } else {
    cmp = memcmp (k1->dptr, k2->dptr, s2);
    if (cmp)
      return cmp;
    return s1 - s2;
  };
}

void
dumpgdbm (GDBM_FILE dbf, FILE * f)
{
  int nbkey;
  datum *keytab;
  datum key;
  int tabsiz, ix;
  tabsiz = 1024;
  keytab = calloc (tabsiz, sizeof (datum));
  if (!keytab) {
    fprintf (stderr, "gdbmtext failed to allocate table of %d data\n",
	     tabsiz);
    exit (1);
  }
  nbkey = 0;
  key = gdbm_firstkey (dbf);
  while (key.dptr) {
    if (nbkey + 1 >= tabsiz) {
      int newsiz = ((5 * tabsiz / 4 + 2048) | 0x3ff) + 1;
      int i;
      datum *newkeytab = calloc (newsiz, sizeof (datum));
      if (!newkeytab) {
	fprintf (stderr, "gdbmtext failed to grow table of %d data\n",
		 newsiz);
	exit (1);
      };
      for (i = 0; i < tabsiz; i++)
	newkeytab[i] = keytab[i];
      free (keytab);
      keytab = newkeytab;
      tabsiz = newsiz;
    }
    keytab[nbkey] = key;
    nbkey++;
    key = gdbm_nextkey (dbf, key);
  }
  qsort (keytab, nbkey, sizeof (datum), cmpdatum);
  for (ix = 0; ix < nbkey; ix++) {
    datum dat;
    outdatum (f, keytab[ix]);
    putc (' ', f);
    dat = gdbm_fetch (dbf, keytab[ix]);
    if (dat.dptr) {
      outdatum (f, dat);
      free (dat.dptr);
    };
    free (keytab[ix].dptr);
    keytab[ix].dsize = 0;
    keytab[ix].dptr = 0;
    putc ('\n', f);
  }
  free (keytab);
  fflush (f);
}				/* end of dumpgdbm */



void
loadgdbm (GDBM_FILE dbf, FILE * f)
{
  char *line, *pc;
  size_t lsiz;
  datum key, val;
  while (!feof (f)) {
    line = 0;
    lsiz = 0;
    key.dptr = val.dptr = 0;
    key.dsize = val.dsize = 0;
    if (getline (&line, &lsiz, f) > 0) {
      pc = line;
      // skip any comment lines starting with #
      if (pc[0] == '#') {
	free (line);
	line = 0;
	lsiz = 0;
	continue;
      }
      key = indatum (&pc);
      val = indatum (&pc);
      if (key.dsize > 0)
	gdbm_store (dbf, key, val, GDBM_REPLACE);
      if (key.dptr)
	free (key.dptr);
      if (val.dptr)
	free (val.dptr);
      key.dptr = val.dptr = 0;
      key.dsize = val.dsize = 0;
      free (line);
      line = 0;
      lsiz = 0;
    }
  }
}				/* end of loadgdbm */


int
main (int argc, char **argv)
{
  char *gdbmname = 0;
  char *inputname = 0;
  char *outputname = 0;
  int ix;
  int blksiz = 1024;
  for (ix = 1; ix < argc; ix++) {
    char *curarg = argv[ix];
    if (curarg[0] == '-')
      switch (curarg[1]) {
      case 'b':
	if (curarg[2])
	  gdbmname = curarg + 2;
	else
	  gdbmname = argv[++ix];
	break;
      case 'i':
	if (curarg[2])
	  inputname = curarg + 2;
	else
	  inputname = argv[++ix];
	break;
      case 'o':
	if (curarg[2])
	  outputname = curarg + 2;
	else
	  outputname = argv[++ix];
	break;
      case 's':
	if (curarg[2])
	  blksiz = atoi (curarg + 2);
	else
	  blksiz = atoi (argv[++ix]);
	break;
      case 'h':
      default:
      usage:
	fprintf (stderr,
		 "usage: %s\n"
		 "   -b GDBMfile ## the GDBM base dumped or loaded\n"
		 "   #### either -i or -o where\n"
		 "   -i input ## the (loaded) text file or - for stdin\n"
		 "   -o output ## the (dumped) text file or - for stdout\n"
		 "   -s blocksize ## default is 1024 bytes\n"
		 "   #### if the text file starts with a | it is a piped command\n"
		 "   -h ## this help\n"
		 "### version $Id: gdbmtext.c 9 2005-05-25 20:52:23Z basile $ built "
		 __DATE__ "@" __TIME__ "\n", argv[0]);
	exit (EXIT_FAILURE);
      }
  };
  if (!gdbmname)
    goto usage;
  if (!inputname && !outputname)
    goto usage;
  if (outputname) {
    FILE *outf;
    GDBM_FILE dbf = gdbm_open (gdbmname, blksiz, GDBM_READER, 0, 0);
    if (!dbf) {
      fprintf (stderr,
	       "%s failed to open dumped GDBM file %s for read : %s\n",
	       argv[0], gdbmname, gdbm_strerror (gdbm_errno));
      exit (EXIT_FAILURE);
    };
    if (outputname[0] == '|')
      outf = popen (outputname + 1, "w");
    else if (outputname[0] == '-' && !outputname[1])
      outf = stdout;
    else
      outf = fopen (outputname, "w");
    if (!outf) {
      fprintf (stderr, "failed to open text output %s: %s\n",
	       outputname, strerror (errno));
      exit (EXIT_FAILURE);
    };
    dumpgdbm (dbf, outf);
    gdbm_close (dbf);
  }
  if (inputname) {
    FILE *inf;
    GDBM_FILE dbf = gdbm_open (gdbmname, blksiz, GDBM_WRCREAT, 0640, 0);
    if (!dbf) {
      fprintf (stderr,
	       "%s failed to open loaded GDBM file %s for write : %s\n",
	       argv[0], gdbmname, gdbm_strerror (gdbm_errno));
      exit (EXIT_FAILURE);
    };
    if (inputname[0] == '|')
      inf = popen (inputname + 1, "r");
    else if (inputname[0] == '-' && !inputname[1])
      inf = stdin;
    else
      inf = fopen (inputname, "r");
    if (!inf) {
      fprintf (stderr, "failed to open text input %s: %s\n",
	       inputname, strerror (errno));
      exit (EXIT_FAILURE);
    }
    loadgdbm (dbf, inf);
    if (inputname[0] == '|')
      pclose (inf);
    else if (inf != stdin)
      fclose (inf);
    gdbm_close (dbf);
  }
  return EXIT_SUCCESS;
}

// eof $Id: gdbmtext.c 9 2005-05-25 20:52:23Z basile $
