/*
This file is part of BOP.
Copyright (C) 2004 Patrick Davalan
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
The GNU General Public License text is also available at
http://www.gnu.org/
or on the Copyright holder web site :
http://patrick.davalan.free.fr/gnu-gpl.html
*/
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/unistd.h>
#define DEBUG 0
// change the include to #include <bop.2/bop.h>
#include "bop.h"
#include "bopmakeh.h"
//
// This program compares 2 files, let's call them file1 and file2 and
// output 3 files, let's call them out1, out2 and outi.
// file1 and file2 are assumed to contain lines made of a single word
// followed by a newline.
// out1 will contain the words which appears in file1 but don't appears
// in file2
// out2 will contain the words which appears in file2 but don't appears
// in file1
// outi will contain the words which appears in file1 and file2
// The files do not need to be sorted.
//
// For this purpose, the files are loaded in 2 hashs which are afterward
// scanned.
//
// it is intended to be an example of the bop API.
//
// structure passed to the scanning functions
typedef struct
{
BophHandle * hash ; // hash to compare with the one scanned.
FILE * fx ; // where to put the words exclusive to scanned hash
FILE * fi ; // ...................... shared
} ScanData ;
// executed for each entry in hash
// try to delete the same word in the other hash, successful delete
// mean this word belonged to the 2 hashs, otherwise it was exclusive
// to the scanned hash.
// when the scan is finished, the other hash will contain only the
// words which were exclusive to it
//
static int
scan1 ( void * arg1 , BophEntry * entry )
{
#define data ( (ScanData *) arg1 )
char * word ;
size_t len ;
bopdEnter( ) ;
word = bophGetKey( entry ) ;
len = bophGetKeyLength( entry ) ;
if ( bophDelByKey( data->hash, word, len ) )
{
// the entry was found and deleted
fprintf( data->fi,"%s\n",word ) ;
}
else
{
// the entry was not found
fprintf( data->fx,"%s\n",word ) ;
}
bopdBack( ) ;
return( false ) ; // don't stop the hash scan
#undef data
}
// dump remaining words in hash2
static int
scan2 ( void * arg1 , BophEntry * entry )
{
#define data ( (ScanData *) arg1 )
fprintf( data->fx,"%s\n", (char *)bophGetKey( entry ) ) ;
return( false ) ; // don't stop the hash scan
#undef data
}
static int
compare( BophHandle * hash1, BophHandle * hash2, char * prefix )
{
char * fileName ;
ScanData data ;
size_t allocSize ;
size_t prefixSize ;
bopdEnter( ) ;
prefixSize = strlen( prefix ) ;
allocSize = prefixSize + 2 ;
fileName = bopmMalloc( allocSize ) ;
bopdTrace( "filename allocated at %p\n", fileName ) ;
memcpy( fileName, prefix, prefixSize ) ;
*( fileName + prefixSize + 1 ) = 0 ;
// open output file 1
*( fileName + prefixSize ) = '1' ;
data.fx = bopxFopen( fileName, "w" ) ;
// open output file i
*( fileName + prefixSize ) = 'i' ;
data.fi = bopxFopen( fileName, "w" ) ;
// fill files 1 et i
fprintf( stderr, "scan 1\n" ) ;
data.hash = hash2 ;
bophScan( &data , hash1, scan1 );
// close files 1 et i
bopxFclose( data.fx ) ;
bopxFclose( data.fi ) ;
// open output file 2
*( fileName + prefixSize ) = '2' ;
data.fx = bopxFopen( fileName, "w" ) ;
data.fi = NULL ; // should not be used in second scan
// fill output file 2
fprintf( stderr, "scan 2\n" ) ;
data.hash = hash1 ;
bophScan( &data , hash2, scan2 );
// close files 2
bopxFclose( data.fx ) ;
bopdTrace( "freeing filename at %p\n", fileName ) ;
bopmFree( fileName ) ;
bopdReturn( true ) ;
return ( true ) ;
}
int
main( int argc, char **argv )
{
BophHandle * hash1 ;
BophHandle * hash2 ;
struct stat statBuf ;
#define file1 argv[1]
#define file2 argv[2]
#define prefix argv[3]
int size1, size2 ;
bopmTrace( ) ;
if ( argc < 4 )
{
fprintf( stderr, " missing args\n" ) ;
fprintf( stderr, "usage : boprel file1 file2 result-file-prefix\n" ) ;
exit ( EXIT_FAILURE ) ;
}
// try to choose a hash size for file1
if ( stat( file1, &statBuf ) != 0 )
{
bopxAbort( "cannot stat on input file 1" ) ;
}
size1 = 1023 + ( statBuf.st_size / 57 ) ; // why not !
// try to choose a hash size for file2
if ( stat( file2, &statBuf ) != 0 )
{
bopxAbort( "cannot stat on input file 2" ) ;
}
size2 = 1023 + ( statBuf.st_size / 57 ) ; // why not !
#if ( DEBUG > 0 )
fprintf( stderr, "hash 1 size : %d\n", size1 ) ;
fprintf( stderr, "hash 2 size : %d\n", size2 ) ;
#endif
// create Hash 1
fprintf( stderr, "creating hash 1\n" ) ;
if ( (hash1 = bophNew( NULL, "hash 1", size1, NULL, NULL) ) == NULL )
{
fprintf( stderr,
"bophNew failed to create a size %d hash\n",
size1 ) ;
exit ( EXIT_FAILURE ) ;
}
// fill hash 1
fprintf( stderr, "filling hash 1\n" ) ;
if ( ! bopMakeH( hash1, file1 ) )
{
bopxAbort( "while filling hash 1" ) ;
}
// create Hash 2
fprintf( stderr, "creating hash 2\n" ) ;
if ( (hash2 = bophNew( NULL, "hash 2", size2, NULL, NULL) ) == NULL )
{
fprintf( stderr,
"bophNew failed to create a size %d hash\n",
size2 ) ;
exit ( EXIT_FAILURE ) ;
}
// fill hash 2
fprintf( stderr, "filling hash 2\n" ) ;
if ( ! bopMakeH( hash2, file2 ) )
{
bopxAbort( "while filling hash 2" ) ;
}
// compare hashes
fprintf( stderr, "comparing hashes\n" ) ;
compare( hash1, hash2, prefix ) ;
bopdTrace( "delete hash 1\n" ) ;
bophDelete( NULL, hash1 ) ;
bopdTrace( "delete hash 2\n" ) ;
bophDelete( NULL, hash2 ) ;
fprintf( stderr, "exiting\n" ) ;
bopmMem( ) ;
exit(EXIT_SUCCESS) ;
}