/*
 *   cddbd - CD Database Protocol Server
 *
 *   Copyright (C) 1996  Steve Scherf
 *   Email: steve@moonsoft.com
 *   Moondog Software Productions - makers of fine public domain software.
 *
 *   This program is free software; you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
 *   the Free Software Foundation; either version 2 of the License, or
 *   (at your option) any later version.
 *
 *   This program is distributed in the hope that it will be useful,
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *   GNU General Public License for more details.
 *
 *   You should have received a copy of the GNU General Public License
 *   along with this program; if not, write to the Free Software
 *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 *
 */

#ifndef LINT
static char *_fuzzy_c_ident_ = "@(#)$Id: fuzzy.c,v 1.5 1996/12/20 09:26:41 steve Exp $";
#endif

#include <sys/types.h>
#include <sys/stat.h>
#include <ctype.h>
#include <errno.h>
#include <string.h>
#include <signal.h>
#include <unistd.h>
#include <dirent.h>
#include <stdio.h>
#include <time.h>
#include "access.h"
#include "list.h"
#include "cddbd.h"


/* Preprocessor definitions. */

#define FUZZY_MAGIC	0x32FD91C4
#define FUZZY_VERSION	1


/* structure definitions. */
typedef struct fuzzy_hdr {
	int magic;
	int version;
	int count;
	int tcount[CDDBMAXTRK];
	char categ[CDDBMAXDBDIR][CDDBBUFSIZ];
} fhdr_t;


typedef struct fuzzy_hash {
	unsigned int discid;	/* Disc ID. */
	int trks;		/* Track count. */
	short disclen;		/* Length of disc in seconds. */
	short catind;		/* Category index. */
} fhash_t;


typedef struct fuzzy_list {
	fhash_t fhash;
	struct fuzzy_list *next;
} flist_t;


/* Variable declarations. */

char *fuzzfile = "fuzzy_index";
char *tfuzzfile = "fuzzy_tmp";

fhdr_t fhdr;
flist_t *flist;


/* Prototypes. */

int comp_fhash(void *, void *);
db_errno_t read_db_fuzzy(char *, fhash_t *, int *, char *, char *);


/* ARGSUSED */
void
do_cddb_query_fuzzy(arg_t *args)
{
	FILE *fp;
	int i;
	int x;
	int ntrks;
	int nsecs;
	int found;
	int catind;
	int offtab1[CDDBMAXTRK];
	int offtab2[CDDBMAXTRK];
	unsigned int discid;
	char buf[CDDBBUFSIZ];
	char tit[CDDBBUFSIZ];
	char file[CDDBBUFSIZ];
	char errstr[CDDBBUFSIZ];
	fhash_t *fh;
	fhash_t fhash;

	/* Shouldn't be syntax errors here, but check anyway. */
	if(sscanf(args->arg[args->nextarg + 1], "%x", &discid) != 1 ||
	    sscanf(args->arg[args->nextarg + 2], "%d", &ntrks) != 1 ||
	    sscanf(args->arg[args->nextarg + ntrks + 3], "%d", &nsecs)
	    != 1) {
		printf("500 Command syntax error.\r\n");
		return;
	}

	for(i = 0; i < ntrks; i++) {
		if(sscanf(args->arg[args->nextarg + i + 3], "%d",
		    &offtab1[i]) != 1 || offtab1[i] < 0) {
			printf("500 Command syntax error.\r\n");
			return;
		}
	}

	/* Open the hash file. */
	cddbd_snprintf(file, sizeof(file), "%s/%s", hashdir, fuzzfile);

	(void)cddbd_lock(lock_hash, 1);

	if((fp = fopen(file, "r")) == NULL) {
		cddbd_log(LOG_ERR | LOG_HASH,
		    "Can't open hash file %s (%d).", file, errno);
		printf("503 Server error performing fuzzy matching.\r\n");
		return;
	}

	/* Free the lock, now that we have a handle on the file. */
	cddbd_unlock(lock_hash);

	/* Read the hash file header. */
	if(fread(&fhdr, sizeof(fhdr), 1, fp) != 1) {
		cddbd_log(LOG_ERR | LOG_HASH,
		    "Can't read hash file header (%d).", errno);
		printf("503 Server error performing fuzzy matching.\r\n");
		fclose(fp);
		return;
	}

	/* Validate the hash file. */
	if(fhdr.magic != FUZZY_MAGIC || fhdr.version != FUZZY_VERSION) {
		cddbd_log(LOG_ERR | LOG_HASH,
		    "Bad fuzzy matching hash file: %s.", file);
		fclose(fp);
		return;
	}

	/* Compute the offset of the first entry we want to examine. */
	for(i = 0, x = 0; i < (ntrks - 1); i++)
		x += fhdr.tcount[i];

	x *= sizeof(fhash_t);

	if(fseek(fp, x, SEEK_CUR) != 0) {
		cddbd_log(LOG_ERR | LOG_HASH,
		    "Corrupt fuzzy matching hash file: %s.", file);
		printf("503 Server error performing fuzzy matching.\r\n");
		fclose(fp);
		return;
	}

	/* Check entries. */
	found = 0;
	fh = &fhash;

	/* Compute the max tolerance for the CD length. */
	x = fuzzy_factor * (ntrks + fuzzy_div - 1) / fuzzy_div
	    / CDDBFRAMEPERSEC;
	if(x < 1)
		x = 1;
	while(fread(fh, sizeof(fhash_t), 1, fp) == 1) {
		if(fh->trks != ntrks)
			break;

		catind = fh->catind;

		/* Check to see if length is within tolerance. */
		i = fh->disclen - nsecs;

		/* All entries beyond this point are not matches. */
		if(i > x)
			break;

		/* Find abs. */
		if(i < 0)
			i *= -1;

		/* Not a match if the allowable diff is < than the actual. */
		if(i > x)
			continue;

		cddbd_snprintf(file, sizeof(file), "%s/%s/%08x",
		    cddbdir, fhdr.categ[catind], fh->discid);

		/* Parse the database file. */
		if(read_db_fuzzy(file, fh, offtab2, tit, errstr) != DE_NO_ERROR)
			continue;

		/* Sanity check. */
		if(fh->trks != ntrks)
			continue;

		if(!is_fuzzy_match(offtab1, offtab2, ntrks))
			continue;

		/* Found a match. Now print it. */
		if(!found) {
			printf("211 Found inexact matches, list ");
			printf("follows (until terminating `.')\r\n");
		}

		found++;

		printf("%s %08x %s\r\n", fhdr.categ[catind], fh->discid, tit);
	}

	fclose(fp);

	if(found) {
		printf(".\r\n");

		if(found == 1)
			strcpy(buf, "match");
		else
			strcpy(buf, "matches");

		cddbd_log(LOG_ACCESS | LOG_FUZZY,
		    "Query: %08x found %d fuzzy %s",
		    discid, found, buf);
	}
	else {
		printf("202 No match for disc ID %08x.\r\n", discid);
		cddbd_log(LOG_ACCESS | LOG_UQUERY,
		    "Query: %08x unsuccessful", discid);
	}

	return;
}


void
cddbd_build_fuzzy(void)
{
	int i;
	int bad;
	int links;
	int noread;
	int nohash;
	int entries;
	FILE *fp;
	DIR *dirp;
	flist_t *fl;
	fhash_t **ftab;
	lhead_t *lh;
	struct stat sbuf;
	struct dirent *dp;
	char file[CDDBBUFSIZ];
	char file2[CDDBBUFSIZ];
	char errstr[CDDBBUFSIZ];

	if(hashdir[0] == '\0') {
		cddbd_log(LOG_ERR | LOG_HASH,
		    "No hash dir defined in access file.");
		quit(QUIT_ERR);
	}

	/* Check for hashdir, and create if it doesn't exist. */
	if(stat(hashdir, &sbuf)) {
		if(mkdir(hashdir, (mode_t)dir_mode)) {
			cddbd_log(LOG_ERR | LOG_HASH,
			    "Failed to create hash dir %s (%d).",
			    hashdir, errno);
			quit(QUIT_ERR);
		}

		(void)cddbd_fix_file(hashdir, dir_mode, uid, gid);
	}
	else if(!S_ISDIR(sbuf.st_mode)) {
		cddbd_log(LOG_ERR | LOG_HASH,
		    "%s is not a directory.", hashdir);
		quit(QUIT_ERR);
	}

	if(!cddbd_lock(lock_tfuzz, 0)) {
		cddbd_log(LOG_ERR | LOG_HASH,
		    "Build of fuzzy matching hash file already in progress.");
		quit(QUIT_RETRY);
	}

	/* Open a temporary hash file. */
	cddbd_snprintf(file, sizeof(file), "%s/%s", hashdir, tfuzzfile);

	if((fp = fopen(file, "w")) == NULL) {
		cddbd_log(LOG_ERR | LOG_HASH,
		    "Can't open %s for writing (%d).", file, errno);
		quit(QUIT_ERR);
	}

	(void)cddbd_fix_file(file, dir_mode, uid, gid);

	cddbd_log(LOG_INFO, "Generating the fuzzy matching hash file.");

	bad = 0;
	links = 0;
	entries = 0;
	nohash = 0;
	noread = 0;

	for(i = 0; categlist[i] != 0; i++) {
		/* Put the category name in the header. */
		strcpy(fhdr.categ[i], categlist[i]);

		cddbd_snprintf(file2, sizeof(file2), "%s/%s", cddbdir,
		    categlist[i]);

		cddbd_log(LOG_INFO, "Scanning %s.", file2);

		if((dirp = opendir(file2)) == NULL) {
			cddbd_log(LOG_ERR | LOG_HASH,
			    "Can't open %s for reading.", file2);
			quit(QUIT_ERR);
		}

		lh = list_init(0, 0, 0, 0);
		if(lh == 0) {
			cddbd_log(LOG_ERR | LOG_HASH,
			    "Can't malloc linked list.");
			quit(QUIT_ERR);
		}

		while((dp = readdir(dirp)) != NULL) {
			/* Make sure this is a database file. */
			if(strlen(dp->d_name) != CDDBDISCIDLEN)
				continue;

			entries++;

			cddbd_snprintf(file, sizeof(file), "%s/%s", file2,
			    dp->d_name);

			if(stat(file, &sbuf)) {
				cddbd_log(LOG_ERR, 
				    "Warning: can't stat CDDB file: %s", file);
				continue;
			}

			if(sbuf.st_nlink > 1 && list_find(lh,
			    (void *)(int)sbuf.st_ino) != 0) {
				links++;
				continue;
			}

			fl = (flist_t *)malloc(sizeof(flist_t));
			if(fl == 0) {
				cddbd_log(LOG_ERR | LOG_HASH,
				    "Can't malloc hash list entry (%d).",
				    errno);
				quit(QUIT_ERR);
			}

			/* Parse the database file. */
			switch(read_db_fuzzy(file, &fl->fhash, 0, 0, errstr)) {
			case DE_NO_ERROR:
				/* Note the category. */
				fl->fhash.catind = (short)i;

				/* Count the database entry. */
				fhdr.count++;
				fhdr.tcount[fl->fhash.trks - 1]++;

				sscanf(dp->d_name, "%08x", &fl->fhash.discid);

				fl->next = flist;
				flist = fl;

				break;

			case DE_INVALID:
				cddbd_log(LOG_ERR, 
				    "Warning: invalid DB file: %s: %s",
				    file, errstr);

				bad++;
				free(fl);

				break;

			case DE_FILE:
			default:
				cddbd_log(LOG_ERR,
				    "Warning: Can't read %s: %s (%d)",
				    file, errstr, errno);

				noread++;
				free(fl);

				break;

			}

			if(sbuf.st_nlink > 1 &&
			    list_add_cur(lh, (void *)(int)sbuf.st_ino) == 0) {
				cddbd_log(LOG_ERR | LOG_HASH,
				    "Can't malloc linked list entry.");
				quit(QUIT_ERR);
			}
		}

		list_free(lh);
		closedir(dirp);
	}

	if(fhdr.count == 0) {
		cddbd_log(LOG_ERR | LOG_HASH, "No valid database entries.");
		quit(QUIT_ERR);
	}

	ftab = (fhash_t **)malloc(sizeof(fhash_t *) * fhdr.count);
	if(ftab == NULL) {
		cddbd_log(LOG_ERR | LOG_HASH,
		    "Can't malloc hash table (%d).", errno);
		quit(QUIT_ERR);
	}

	for(i = 0, fl = flist; fl != 0; i++, fl = fl->next)
		ftab[i] = &fl->fhash;

	/* Sort the entries. */
	qsort(ftab, fhdr.count, sizeof(fhash_t *), comp_fhash);

	/* Write the header out. */
	if(fwrite(&fhdr, sizeof(fhdr_t), 1, fp) != 1) {
		cddbd_log(LOG_ERR | LOG_HASH,
		    "Can't write hash table header (%d).", errno);
		quit(QUIT_ERR);
	}

	/* Write the records out. */
	for(i = 0; i < fhdr.count; i++) {
		if(fwrite(ftab[i], sizeof(fhash_t), 1, fp) != 1) {
			cddbd_log(LOG_ERR | LOG_HASH,
			    "Can't write hash table entry (%d).", errno);
			quit(QUIT_ERR);
		}

		free(ftab[i]);
	}

	free(ftab);

	/* Write the header out again with the magic number. */
	rewind(fp);
	fhdr.magic = FUZZY_MAGIC;
	fhdr.version = FUZZY_VERSION;

	if(fwrite(&fhdr, sizeof(fhdr_t), 1, fp) != 1) {
		cddbd_log(LOG_ERR | LOG_HASH,
		    "Can't write hash table header (%d).", errno);
		quit(QUIT_ERR);
	}

	fclose(fp);

	cddbd_snprintf(file, sizeof(file), "%s/%s", hashdir, fuzzfile);
	cddbd_snprintf(file2, sizeof(file2), "%s/%s", hashdir, tfuzzfile);

	(void)cddbd_lock(lock_hash, 1);

	if(unlink(file) != 0 && errno != ENOENT) {
		cddbd_log(LOG_ERR | LOG_HASH,
		    "Can't unlink %s (%d).", file, errno);
		quit(QUIT_ERR);
	}

	if(cddbd_link(file2, file) != 0) {
		cddbd_log(LOG_ERR | LOG_HASH,
		    "Can't link %s to %s (%d).", file2, file, errno);
		quit(QUIT_ERR);
	}

	if(unlink(file2) != 0) {
		cddbd_log(LOG_ERR | LOG_HASH,
		    "Warning: can't unlink %s (%d).", file2, errno);
	}

	cddbd_unlock(lock_hash);
	cddbd_unlock(lock_tfuzz);

	cddbd_log(LOG_INFO, "Hashed %d database entries out of %d.",
	    fhdr.count, entries);
	cddbd_log(LOG_INFO,
	    "Ignored %d files: %d invalid, %d unhashable, %d unreadable, %d links.",
	    (entries - fhdr.count), bad, nohash, noread, links);
	cddbd_log(LOG_INFO, "Done creating hash file.");
}


int
comp_fhash(void *c1, void *c2)
{
	fhash_t *h1;
	fhash_t *h2;

	h1 = *(fhash_t **)c1;
	h2 = *(fhash_t **)c2;

	if(h1->trks != h2->trks)
		return (h1->trks - h2->trks);

	if(h1->disclen != h2->disclen)
		return (h1->disclen - h2->disclen);

	return 0;
}


db_errno_t
read_db_fuzzy(char *discid, fhash_t *fh, int *offtab, char *dtitle, char *err)
{
	int i;
	FILE *fp;
	db_t *db;

	if((fp = fopen(discid, "r")) == NULL) {
		cddbd_snprintf(err, CDDBBUFSIZ, "can't open %s for reading",
		    discid);

		return DE_FILE;
	}

	db = db_read(fp, err, 0);
	fclose(fp);

	if(db == 0)
		return db_errno;

	fh->disclen = db->db_disclen;
	fh->trks = db->db_trks;

	if(offtab != 0)
		for(i = 0; i < db->db_trks; i++)
			offtab[i] = db->db_offset[i];

	if(dtitle != 0)
		db_strcpy(db, DP_DTITLE, 0, dtitle, (CDDBBUFSIZ - 1));

	db_free(db);
	return DE_NO_ERROR;
}


int
is_fuzzy_match(int *offtab1, int *offtab2, int ntrks)
{
	int i;
	int x;
	int lo1;
	int lo2;
	int avg;
	
	/* Check the difference between track offsets. */
	for(i = 0, lo1 = 0, lo2 = 0, avg = 0; i < ntrks; i++) {
		lo1 = offtab1[i] - lo1;
		lo2 = offtab2[i] - lo2;

		x = lo1 - lo2;
		if(x < 0)
			x *= -1;
		avg += x;

		/* Track diff too great. */
		if(x > fuzzy_factor)
			break;

		lo1 = offtab1[i];
		lo2 = offtab2[i];
	}

	avg /= ntrks;

	return((i == ntrks) && (avg <= (fuzzy_factor / fuzzy_div)));
}
