/* This file is part of BOP. Copyright (C) 2004 Patrick Davalan This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA The GNU General Public License text is also available at http://www.gnu.org/ or on the Copyright holder web site : http://patrick.davalan.free.fr/gnu-gpl.html */ #include <sys/types.h> #include <sys/stat.h> #include <sys/unistd.h> #define DEBUG 0 // change the include to #include <bop.2/bop.h> #include "bop.h" #include "bopmakeh.h" // // This program compares 2 files, let's call them file1 and file2 and // output 3 files, let's call them out1, out2 and outi. // file1 and file2 are assumed to contain lines made of a single word // followed by a newline. // out1 will contain the words which appears in file1 but don't appears // in file2 // out2 will contain the words which appears in file2 but don't appears // in file1 // outi will contain the words which appears in file1 and file2 // The files do not need to be sorted. // // For this purpose, the files are loaded in 2 hashs which are afterward // scanned. // // it is intended to be an example of the bop API. // // structure passed to the scanning functions typedef struct { BophHandle * hash ; // hash to compare with the one scanned. FILE * fx ; // where to put the words exclusive to scanned hash FILE * fi ; // ...................... shared } ScanData ; // executed for each entry in hash // try to delete the same word in the other hash, successful delete // mean this word belonged to the 2 hashs, otherwise it was exclusive // to the scanned hash. // when the scan is finished, the other hash will contain only the // words which were exclusive to it // static int scan1 ( void * arg1 , BophEntry * entry ) { #define data ( (ScanData *) arg1 ) char * word ; size_t len ; bopdEnter( ) ; word = bophGetKey( entry ) ; len = bophGetKeyLength( entry ) ; if ( bophDelByKey( data->hash, word, len ) ) { // the entry was found and deleted fprintf( data->fi,"%s\n",word ) ; } else { // the entry was not found fprintf( data->fx,"%s\n",word ) ; } bopdBack( ) ; return( false ) ; // don't stop the hash scan #undef data } // dump remaining words in hash2 static int scan2 ( void * arg1 , BophEntry * entry ) { #define data ( (ScanData *) arg1 ) fprintf( data->fx,"%s\n", (char *)bophGetKey( entry ) ) ; return( false ) ; // don't stop the hash scan #undef data } static int compare( BophHandle * hash1, BophHandle * hash2, char * prefix ) { char * fileName ; ScanData data ; size_t allocSize ; size_t prefixSize ; bopdEnter( ) ; prefixSize = strlen( prefix ) ; allocSize = prefixSize + 2 ; fileName = bopmMalloc( allocSize ) ; bopdTrace( "filename allocated at %p\n", fileName ) ; memcpy( fileName, prefix, prefixSize ) ; *( fileName + prefixSize + 1 ) = 0 ; // open output file 1 *( fileName + prefixSize ) = '1' ; data.fx = bopxFopen( fileName, "w" ) ; // open output file i *( fileName + prefixSize ) = 'i' ; data.fi = bopxFopen( fileName, "w" ) ; // fill files 1 et i fprintf( stderr, "scan 1\n" ) ; data.hash = hash2 ; bophScan( &data , hash1, scan1 ); // close files 1 et i bopxFclose( data.fx ) ; bopxFclose( data.fi ) ; // open output file 2 *( fileName + prefixSize ) = '2' ; data.fx = bopxFopen( fileName, "w" ) ; data.fi = NULL ; // should not be used in second scan // fill output file 2 fprintf( stderr, "scan 2\n" ) ; data.hash = hash1 ; bophScan( &data , hash2, scan2 ); // close files 2 bopxFclose( data.fx ) ; bopdTrace( "freeing filename at %p\n", fileName ) ; bopmFree( fileName ) ; bopdReturn( true ) ; return ( true ) ; } int main( int argc, char **argv ) { BophHandle * hash1 ; BophHandle * hash2 ; struct stat statBuf ; #define file1 argv[1] #define file2 argv[2] #define prefix argv[3] int size1, size2 ; bopmTrace( ) ; if ( argc < 4 ) { fprintf( stderr, " missing args\n" ) ; fprintf( stderr, "usage : boprel file1 file2 result-file-prefix\n" ) ; exit ( EXIT_FAILURE ) ; } // try to choose a hash size for file1 if ( stat( file1, &statBuf ) != 0 ) { bopxAbort( "cannot stat on input file 1" ) ; } size1 = 1023 + ( statBuf.st_size / 57 ) ; // why not ! // try to choose a hash size for file2 if ( stat( file2, &statBuf ) != 0 ) { bopxAbort( "cannot stat on input file 2" ) ; } size2 = 1023 + ( statBuf.st_size / 57 ) ; // why not ! #if ( DEBUG > 0 ) fprintf( stderr, "hash 1 size : %d\n", size1 ) ; fprintf( stderr, "hash 2 size : %d\n", size2 ) ; #endif // create Hash 1 fprintf( stderr, "creating hash 1\n" ) ; if ( (hash1 = bophNew( NULL, "hash 1", size1, NULL, NULL) ) == NULL ) { fprintf( stderr, "bophNew failed to create a size %d hash\n", size1 ) ; exit ( EXIT_FAILURE ) ; } // fill hash 1 fprintf( stderr, "filling hash 1\n" ) ; if ( ! bopMakeH( hash1, file1 ) ) { bopxAbort( "while filling hash 1" ) ; } // create Hash 2 fprintf( stderr, "creating hash 2\n" ) ; if ( (hash2 = bophNew( NULL, "hash 2", size2, NULL, NULL) ) == NULL ) { fprintf( stderr, "bophNew failed to create a size %d hash\n", size2 ) ; exit ( EXIT_FAILURE ) ; } // fill hash 2 fprintf( stderr, "filling hash 2\n" ) ; if ( ! bopMakeH( hash2, file2 ) ) { bopxAbort( "while filling hash 2" ) ; } // compare hashes fprintf( stderr, "comparing hashes\n" ) ; compare( hash1, hash2, prefix ) ; bopdTrace( "delete hash 1\n" ) ; bophDelete( NULL, hash1 ) ; bopdTrace( "delete hash 2\n" ) ; bophDelete( NULL, hash2 ) ; fprintf( stderr, "exiting\n" ) ; bopmMem( ) ; exit(EXIT_SUCCESS) ; }