boprel.c


/*
    This file is part of BOP.
    Copyright (C) 2004  Patrick Davalan

    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 2 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program; if not, write to the Free Software
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA

    The GNU General Public License text is also available at
    http://www.gnu.org/
    or on the Copyright holder web site :
    http://patrick.davalan.free.fr/gnu-gpl.html
*/
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/unistd.h>

#define DEBUG 0
// change the include to #include <bop.2/bop.h>
#include "bop.h"
#include "bopmakeh.h"

// 
// This program compares 2 files, let's call them file1 and file2 and
// output 3 files, let's call them out1, out2 and outi.
// file1 and file2 are assumed to contain lines made of a single word
// followed by a newline.
// out1 will contain the words which appears in file1 but don't appears
// in file2
// out2 will contain the words which appears in file2 but don't appears
// in file1
// outi will contain the words which appears in file1 and file2
// The files do not need to be sorted.
//
// For this purpose, the files are loaded in 2 hashs which are afterward
// scanned.
// 
// it is intended to be an example of the bop API.
// 

// structure passed to the scanning functions
typedef struct 
{
    BophHandle * hash ; // hash to compare with the one scanned.
    FILE * fx ; // where to put the words exclusive to scanned hash 
    FILE * fi ; // ...................... shared
} ScanData ;

// executed for each entry in hash
// try to delete the same word in the other hash, successful delete 
// mean this word belonged to the 2 hashs, otherwise it was exclusive
// to the scanned hash.
// when the scan is finished, the other hash will contain only the 
// words which were exclusive to it
// 
static int
scan1 ( void * arg1 , BophEntry * entry )
{
#define data ( (ScanData *) arg1 )

    char * word ;
    size_t len ;

    bopdEnter( ) ;
    word =  bophGetKey( entry ) ;
    len = bophGetKeyLength( entry ) ;

    if ( bophDelByKey( data->hash, word, len ) )
    {
        // the entry was found and deleted
        fprintf( data->fi,"%s\n",word ) ;
    }
    else
    {
        // the entry was not found 
        fprintf( data->fx,"%s\n",word ) ;
    }
    
    bopdBack( ) ;
    return( false ) ;   // don't stop the hash scan

#undef data
    
}

// dump remaining words in hash2
static int
scan2 ( void * arg1 , BophEntry * entry )
{
#define data ( (ScanData *) arg1 )

    fprintf( data->fx,"%s\n", (char *)bophGetKey( entry ) ) ;
    
    return( false ) ;   // don't stop the hash scan

#undef data
    
}

static int 
compare( BophHandle * hash1, BophHandle * hash2, char * prefix )
{
    char * fileName ;
    ScanData data ;
    size_t allocSize ;
    size_t prefixSize ;

    bopdEnter( ) ;

    prefixSize = strlen( prefix ) ;
    allocSize = prefixSize  + 2 ;
    fileName = bopmMalloc( allocSize ) ;
    bopdTrace( "filename allocated at %p\n", fileName ) ;
    memcpy( fileName, prefix, prefixSize ) ;
    *( fileName + prefixSize + 1 ) = 0 ;

    // open output file 1
    *( fileName + prefixSize ) = '1' ;
    data.fx = bopxFopen( fileName, "w" ) ;
    // open output file i
    *( fileName + prefixSize ) = 'i' ;
    data.fi = bopxFopen( fileName, "w" ) ;
    
    // fill files 1 et i
    fprintf( stderr, "scan 1\n" ) ;
    data.hash = hash2 ;
    bophScan( &data , hash1, scan1 ); 
    
    // close files 1 et i
    bopxFclose( data.fx ) ;
    bopxFclose( data.fi ) ;

    // open output file 2
    *( fileName + prefixSize ) = '2' ;
    data.fx = bopxFopen( fileName, "w" ) ;
    data.fi = NULL ;    // should not be used in second scan
    
    // fill output file 2
    fprintf( stderr, "scan 2\n" ) ;
    data.hash = hash1 ;
    bophScan( &data , hash2, scan2 ); 

    // close files 2
    bopxFclose( data.fx ) ;
    
    bopdTrace( "freeing filename at %p\n", fileName ) ;
    bopmFree( fileName ) ;

    bopdReturn( true ) ;
    return ( true ) ;
}

int 
main( int argc, char **argv )
{
    BophHandle * hash1 ;
    BophHandle * hash2 ;
    struct stat statBuf ;
#define file1 argv[1]   
#define file2 argv[2]   
#define prefix argv[3]  
    int size1, size2 ;

    bopmTrace( ) ;

    if ( argc < 4 )
    {
        fprintf( stderr, " missing args\n" ) ;
        fprintf( stderr, "usage : boprel file1 file2 result-file-prefix\n" ) ;
        exit ( EXIT_FAILURE ) ;
    }
    
    // try to choose a hash size for file1
    if ( stat( file1, &statBuf ) != 0 )
    {
        bopxAbort( "cannot stat on input file 1" ) ;
    }
    size1 = 1023 + ( statBuf.st_size / 57 ) ; // why not !
    
    // try to choose a hash size for file2
    if ( stat( file2, &statBuf ) != 0 )
    {
        bopxAbort( "cannot stat on input file 2" ) ;
    }
    size2 = 1023 + ( statBuf.st_size /  57 ) ; // why not !

#if ( DEBUG > 0 )
    fprintf( stderr, "hash 1 size : %d\n", size1 ) ;
    fprintf( stderr, "hash 2 size : %d\n", size2 ) ;
#endif
    

    // create Hash 1
    fprintf( stderr, "creating hash 1\n" ) ;
    if ( (hash1 = bophNew( NULL, "hash 1", size1, NULL, NULL) ) == NULL )
    {
        fprintf( stderr,
            "bophNew failed to create a size %d hash\n",
            size1 ) ;
        exit ( EXIT_FAILURE ) ;
    }

    // fill  hash 1
    fprintf( stderr, "filling hash 1\n" ) ;
    if ( ! bopMakeH( hash1, file1 ) )
    {
        bopxAbort( "while filling hash 1" ) ;
    }

    // create Hash 2
    fprintf( stderr, "creating hash 2\n" ) ;
    if ( (hash2 = bophNew( NULL, "hash 2", size2, NULL, NULL) ) == NULL )
    {
        fprintf( stderr,
            "bophNew failed to create a size %d hash\n",
            size2 ) ;
        exit ( EXIT_FAILURE ) ;
    }

    // fill  hash 2
    fprintf( stderr, "filling hash 2\n" ) ;
    if ( ! bopMakeH( hash2, file2 ) )
    {
        bopxAbort( "while filling hash 2" ) ;
    }

    // compare hashes
    fprintf( stderr, "comparing hashes\n" ) ;
    compare( hash1, hash2, prefix ) ; 

    bopdTrace( "delete hash 1\n" ) ;
    bophDelete( NULL, hash1 ) ;
    bopdTrace( "delete hash 2\n" ) ;
    bophDelete( NULL, hash2 ) ;
    
    fprintf( stderr, "exiting\n" ) ;
    
    bopmMem( ) ;

    exit(EXIT_SUCCESS) ;
}