added equals and hashCode

Former-commit-id: 9a7aa87fdaff6395dd6c65d406aa054d925c0d6e
2026-03-02 14:10:19 +01:00 · 2012-12-30 16:50:19 -05:00
parent f6cdf34b25
commit 30ea512dcc
5 changed files with 88 additions and 717 deletions
--- a/src/main/java/org/codesecure/dependencycheck/data/cpe/Entry.java
+++ b/src/main/java/org/codesecure/dependencycheck/data/cpe/Entry.java
@@ -18,6 +18,7 @@ package org.codesecure.dependencycheck.data.cpe;
 * Copyright (c) 2012 Jeremy Long. All Rights Reserved.
 */

+import java.io.Serializable;
 import java.io.UnsupportedEncodingException;
 import java.net.URLDecoder;
 import java.util.logging.Level;
@@ -25,12 +26,13 @@ import java.util.logging.Logger;
 import org.apache.lucene.document.Document;

 /**
- * A single CPE entry from the cpe.xml downloaded from <a
- * href="http://nvd.nist.gov/cpe.cfm">http://nvd.nist.gov/cpe.cfm</a>.
+ * A CPE entry containing the name, vendor, product, and version.
 *
 * @author Jeremy Long (jeremy.long@gmail.com)
 */
-public class Entry {
+public class Entry implements Serializable {
+
+    static final long serialVersionUID = 8011924485946326934L;

    /**
     * This parse method does not fully convert a Lucene Document into a CPE
--- a/src/main/java/org/codesecure/dependencycheck/dependency/Reference.java
+++ b/src/main/java/org/codesecure/dependencycheck/dependency/Reference.java
@@ -1,7 +1,4 @@
 package org.codesecure.dependencycheck.dependency;
-
-import java.io.Serializable;
-
 /*
 * This file is part of DependencyCheck.
 *
@@ -20,6 +17,9 @@ import java.io.Serializable;
 *
 * Copyright (c) 2012 Jeremy Long. All Rights Reserved.
 */
+
+import java.io.Serializable;
+
 /**
 * An external reference for a vulnerability. This contains a name, URL, and a
 * source.
@@ -95,4 +95,35 @@ public class Reference implements Serializable {
    public void setSource(String source) {
        this.source = source;
    }
+
+    @Override
+    public boolean equals(Object obj) {
+        if (obj == null) {
+            return false;
+        }
+        if (getClass() != obj.getClass()) {
+            return false;
+        }
+        final Reference other = (Reference) obj;
+        if ((this.name == null) ? (other.name != null) : !this.name.equals(other.name)) {
+            return false;
+        }
+        if ((this.url == null) ? (other.url != null) : !this.url.equals(other.url)) {
+            return false;
+        }
+        if ((this.source == null) ? (other.source != null) : !this.source.equals(other.source)) {
+            return false;
+        }
+        return true;
+    }
+
+    @Override
+    public int hashCode() {
+        int hash = 5;
+        hash = 67 * hash + (this.name != null ? this.name.hashCode() : 0);
+        hash = 67 * hash + (this.url != null ? this.url.hashCode() : 0);
+        hash = 67 * hash + (this.source != null ? this.source.hashCode() : 0);
+        return hash;
+    }
+
 }
--- a/src/main/java/org/codesecure/dependencycheck/dependency/Vulnerability.java
+++ b/src/main/java/org/codesecure/dependencycheck/dependency/Vulnerability.java
@@ -29,7 +29,7 @@ import java.util.List;
 */
 public class Vulnerability implements Serializable {

-    private static final long serialVersionUID = -9197349868449482427L;
+    private static final long serialVersionUID = 307319490326651052L;
    /**
     * The name of the vulnerability
     */
@@ -162,4 +162,26 @@ public class Vulnerability implements Serializable {
    public void addVulnerableSoftware(String cpe) {
        addVulnerableSoftware(cpe, null);
    }
+
+    @Override
+    public boolean equals(Object obj) {
+        if (obj == null) {
+            return false;
+        }
+        if (getClass() != obj.getClass()) {
+            return false;
+        }
+        final Vulnerability other = (Vulnerability) obj;
+        if ((this.name == null) ? (other.name != null) : !this.name.equals(other.name)) {
+            return false;
+        }
+        return true;
+    }
+
+    @Override
+    public int hashCode() {
+        int hash = 5;
+        hash = 41 * hash + (this.name != null ? this.name.hashCode() : 0);
+        return hash;
+    }
 }
--- a/src/main/java/org/codesecure/dependencycheck/dependency/VulnerableSoftware.java
+++ b/src/main/java/org/codesecure/dependencycheck/dependency/VulnerableSoftware.java
@@ -30,31 +30,9 @@ import org.codesecure.dependencycheck.data.cpe.Entry;
 *
 * @author Jeremy Long (jeremy.long@gmail.com)
 */
-public class VulnerableSoftware implements Serializable {
+public class VulnerableSoftware extends Entry implements Serializable {

    private static final long serialVersionUID = 307319490326651052L;
-    /**
-     * a cpe entry
-     */
-    protected Entry cpe;
-
-    /**
-     * Get the value of cpe
-     *
-     * @return the value of cpe
-     */
-    public Entry getCpe() {
-        return cpe;
-    }
-
-    /**
-     * Set the value of cpe
-     *
-     * @param cpe new value of cpe
-     */
-    public void setCpe(Entry cpe) {
-        this.cpe = cpe;
-    }

    /**
     * Parse a CPE entry from the cpe string repesentation
@@ -62,22 +40,14 @@ public class VulnerableSoftware implements Serializable {
     * @param cpe a cpe entry (e.g. cpe:/a:vendor:software:version)
     */
    public void setCpe(String cpe) {
-        this.cpe = new Entry();
        try {
-            this.cpe.parseName(cpe);
+            parseName(cpe);
        } catch (UnsupportedEncodingException ex) {
            Logger.getLogger(VulnerableSoftware.class.getName()).log(Level.SEVERE, null, ex);
-            this.cpe.setName(cpe);
+            setName(cpe);
        }
    }

-    /**
-     * Returns the CPE entry name
-     * @return te CPE entry name
-     */
-    public String getName() {
-        return this.cpe.getName();
-    }
    /**
     * If present, indicates that previous version are vulnerable
     */
@@ -109,4 +79,27 @@ public class VulnerableSoftware implements Serializable {
    public void setPreviousVersion(String previousVersion) {
        this.previousVersion = previousVersion;
    }
+
+        @Override
+    public boolean equals(Object obj) {
+        if (obj == null) {
+            return false;
+        }
+        if (getClass() != obj.getClass()) {
+            return false;
+        }
+        final VulnerableSoftware other = (VulnerableSoftware) obj;
+        if ((this.name == null) ? (other.name != null) : !this.name.equals(other.name)) {
+            return false;
+        }
+        return true;
+    }
+
+    @Override
+    public int hashCode() {
+        int hash = 7;
+        hash = 83 * hash + (this.name != null ? this.name.hashCode() : 0);
+        hash = 83 * hash + (this.previousVersion != null ? this.previousVersion.hashCode() : 0);
+        return hash;
+    }
 }
--- a/src/main/java/org/codesecure/dependencycheck/utils/SSDeep.java
+++ b/src/main/java/org/codesecure/dependencycheck/utils/SSDeep.java
@@ -1,677 +0,0 @@
-/* ssdeep
-   Copyright (C) 2006 ManTech International Corporation
-
-   $Id: fuzzy.c 97 2010-03-19 15:10:06Z jessekornblum $
-
-   This program is free software; you can redistribute it and/or modify
-   it under the terms of the GNU General Public License as published by
-   the Free Software Foundation; either version 2 of the License, or
-   (at your option) any later version.
-   
-   This program is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-   GNU General Public License for more details.
-   
-   You should have received a copy of the GNU General Public License
-   along with this program; if not, write to the Free Software
-   Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
-
-   The code in this file, and this file only, is based on SpamSum, part 
-   of the Samba project: 
-         http://www.samba.org/ftp/unpacked/junkcode/spamsum/
-
-   Because of where this file came from, any program that contains it
-   must be licensed under the terms of the General Public License (GPL).
-   See the file COPYING for details. The author's original comments
-   about licensing are below:
-
-
-
-  this is a checksum routine that is specifically designed for spam. 
-  Copyright Andrew Tridgell <tridge@samba.org> 2002
-
-  This code is released under the GNU General Public License version 2
-  or later.  Alteratively, you may also use this code under the terms
-  of the Perl Artistic license.
-
-  If you wish to distribute this code under the terms of a different
-  free software license then please ask me. If there is a good reason
-  then I will probably say yes.
-  
-*/
-
-//package eu.scape_project.bitwiser.utils;
-//https://raw.github.com/openplanets/bitwiser/master/src/main/java/eu/scape_project/bitwiser/utils/SSDeep.java
-package org.codesecure.dependencycheck.utils;
-
-
-import java.io.File;
-import java.io.FileInputStream;
-import java.io.IOException;
-import java.util.Arrays;
-
-import org.apache.commons.lang.StringUtils;
-
-/**
- * SSDeep
- *
- * <p>
- * A Java version of the ssdeep algorithm, based on the fuzzy.c source 
- * code, taken from version 2.6 of the ssdeep package.
- * 
- * <p>
- * Transliteration/port to Java from C by...
- * 
- * @author Andrew Jackson <Andrew.Jackson@bl.uk>
- *
- */
-public class SSDeep {
-	
-	public class FuzzyHash { 
-		/** the blocksize used by the program, */
-		int blocksize;
-		/** the hash for this blocksize */
-		String hash;
-		/** the hash for twice the blocksize, */
-		String hash2;
-		/** the filename. */
-		String filename;
-	}
-
-	/// Length of an individual fuzzy hash signature component
-	public static final int SPAMSUM_LENGTH = 64;
-	
-	/// The longest possible length for a fuzzy hash signature (without the filename)
-	public static final int FUZZY_MAX_RESULT = (SPAMSUM_LENGTH + (SPAMSUM_LENGTH/2 + 20));
-
-	
-	public static final int MIN_BLOCKSIZE  = 3;
-	public static final int ROLLING_WINDOW = 7;
-
-	public static final int HASH_PRIME     = 0x01000193;
-	public static final int HASH_INIT      = 0x28021967;
-
-	// Our input buffer when reading files to hash
-	public static final int BUFFER_SIZE  = 8192;
-
-	static class roll_state_class {
-	  int[] window = new int[ROLLING_WINDOW];
-	  int h1, h2, h3;
-	  int n;
-	}
-	private static roll_state_class roll_state = new roll_state_class();
-
-
-	/*
-	  a rolling hash, based on the Adler checksum. By using a rolling hash
-	  we can perform auto resynchronisation after inserts/deletes
-
-	  internally, h1 is the sum of the bytes in the window and h2
-	  is the sum of the bytes times the index
-
-	  h3 is a shift/xor based rolling hash, and is mostly needed to ensure that
-	  we can cope with large blocksize values
-	*/
-	static int roll_hash(int c)
-	{
-		
-//		System.out.println(""+roll_state.h1+","+roll_state.h2+","+roll_state.h3);
-	  roll_state.h2 -= roll_state.h1;
-	  //roll_state.h2 = roll_state.h2 & 0x7fffffff;
-	  roll_state.h2 += ROLLING_WINDOW * c;
-	  //roll_state.h2 = roll_state.h2 & 0x7fffffff;
-	  
-	  roll_state.h1 += c;
-	  //roll_state.h1 = roll_state.h1 & 0x7fffffff;
-	  roll_state.h1 -= roll_state.window[(roll_state.n % ROLLING_WINDOW)];
-	  //roll_state.h1 = roll_state.h1 & 0x7fffffff;
-	  
-	  roll_state.window[roll_state.n % ROLLING_WINDOW] = (char)c;
-	  roll_state.n = (roll_state.n+1)%ROLLING_WINDOW;
-	  
-	  /* The original spamsum AND'ed this value with 0xFFFFFFFF which
-	     in theory should have no effect. This AND has been removed 
-	     for performance (jk) */
-	  roll_state.h3 = (roll_state.h3 << 5);// & 0xFFFFFFFF;
-	  roll_state.h3 ^= c;
-	  //roll_state.h3 = roll_state.h3 & 0x7FFFFFFF;
-	  //if( roll_state.h3 > 0xEFFFFFFF ) roll_state.h3 -= 0xEFFFFFFF;
-	  
-	  long result = ((roll_state.h1 + roll_state.h2 + roll_state.h3));//&0x7FFFFFFF;
-	  //System.out.println("Result: "+result);
-	  //System.out.println("Result2: "+(result&0xFFFFFFFF));
-	  //System.out.println("Result3: "+(result&0x7FFFFFFF));
-	  
-	  return (int) result;//&0xFFFFFFFF;
-	}
-
-	/*
-	  reset the state of the rolling hash and return the initial rolling hash value
-	*/
-	static void roll_reset()
-	{	
-		  roll_state.h1 = 0;
-		  roll_state.h2 = 0;
-		  roll_state.h3 = 0;
-		  roll_state.n = 0;
-		  Arrays.fill(roll_state.window,(char)0);
-	}
-
-	/* a simple non-rolling hash, based on the FNV hash */
-	static int sum_hash(int c, int h)
-	{
-	  h *= HASH_PRIME;
-	  //h = h & 0xFFFFFFFF;
-	  h ^= c;
-	  //h = h & 0xFFFFFFFF;
-	  return h;
-	}
-
-	class ss_context {
-		  char[] ret;
-		  char[] p;
-	  long total_chars;
-	  int h, h2, h3;
-	  int j, n, i, k;
-	  int block_size;
-	  char[] ret2 = new char[SPAMSUM_LENGTH/2 + 1];
-	}
-
-
-	static void ss_destroy(ss_context ctx)
-	{
-	  if (ctx.ret != null)
-		  ctx.ret = null;
-		 //free(ctx.ret);
-	}
-
-
-	static boolean ss_init(ss_context ctx, File handle)
-	{
-	  if ( ctx == null )
-	    return true;
-
-	  ctx.ret = new char[FUZZY_MAX_RESULT];
-	  if (ctx.ret == null)
-	    return true;
-
-	  if (handle != null)
-	    ctx.total_chars = handle.length();
-
-	  ctx.block_size = MIN_BLOCKSIZE;
-	  while (ctx.block_size * SPAMSUM_LENGTH < ctx.total_chars) {
-	    ctx.block_size = ctx.block_size * 2;
-	  }
-	  
-	  System.out.println("bs:"+ctx.block_size);
-
-	  return false;
-	}
-
-	static char[] b64 = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/".toCharArray();
-
-	static void ss_engine(ss_context ctx, 
-			      byte[] buffer, 
-			      int buffer_size)
-	{
-	  if (null == ctx || null == buffer)
-	    return;
-
-	  for ( int i = 0 ; i < buffer_size ; ++i)
-	  {
-
-	    /* 
-	       at each character we update the rolling hash and
-	       the normal hash. When the rolling hash hits the
-	       reset value then we emit the normal hash as a
-	       element of the signature and reset both hashes
-	    */
-		  
-	    System.out.println(""+ctx.h+","+ctx.h2+","+ctx.h3);
-	    ctx.h  = roll_hash(buffer[i]);// & 0x7FFFFFFF;
-	    ctx.h2 = sum_hash(buffer[i], ctx.h2);// & 0x7FFFFFFF;
-	    ctx.h3 = sum_hash(buffer[i], ctx.h3);// & 0x7FFFFFFF;
-	    
-	    if (((0xFFFFFFFFl & ctx.h) % ctx.block_size) == (ctx.block_size-1)) {
-	      /* we have hit a reset point. We now emit a
-		 hash which is based on all chacaters in the
-		 piece of the message between the last reset
-		 point and this one */
-	      ctx.p[ctx.j] = b64[(int)((ctx.h2&0xFFFF) % 64)];
-	      System.out.println("::"+ctx.j+":"+new String(ctx.p));
-//	      for( char c : ctx.p ) {
-//	    	  System.out.print(c);
-//	      }
-//    	  System.out.println();	      
-	      if (ctx.j < SPAMSUM_LENGTH-1) {
-		/* we can have a problem with the tail
-		   overflowing. The easiest way to
-		   cope with this is to only reset the
-		   second hash if we have room for
-		   more characters in our
-		   signature. This has the effect of
-		   combining the last few pieces of
-		   the message into a single piece */
-
-		ctx.h2 = HASH_INIT;
-		(ctx.j)++;
-	      }
-	    }
-	    
-	    /* this produces a second signature with a block size
-	       of block_size*2. By producing dual signatures in
-	       this way the effect of small changes in the message
-	       size near a block size boundary is greatly reduced. */
-	    if (((0xFFFFFFFFl & ctx.h) % (ctx.block_size*2)) == ((ctx.block_size*2)-1)) {
-	      ctx.ret2[ctx.k] = b64[(int) (ctx.h3&0xFFFF % 64)];
-	      if (ctx.k < SPAMSUM_LENGTH/2-1) {
-		ctx.h3 = HASH_INIT;
-		(ctx.k)++;
-	      }
-	    }
-	  }
-	}
-
-	static boolean ss_update(ss_context ctx, File handle) throws IOException
-	{
-	  int bytes_read = 0;
-	  byte[] buffer; 
-
-	  if (null == ctx || null == handle)
-	    return true;
-
-	  buffer = new byte[BUFFER_SIZE];
-	  if (buffer == null)
-	    return true;
-
-	  // snprintf(ctx.ret, 12, "%u:", ctx.block_size);
-	  ctx.ret = (ctx.block_size + ":").toCharArray();
-	  // ctx.p = ctx.ret + strlen(ctx.ret);  
-	  ctx.p = new char[SPAMSUM_LENGTH];
-	  
-	  //memset(ctx.p, 0, SPAMSUM_LENGTH+1);
-	  Arrays.fill(ctx.p, (char)0 );
-	  //memset(ctx.ret2, 0, sizeof(ctx.ret2.length));
-	  Arrays.fill(ctx.ret2, (char)0 );
-	  
-	  ctx.k  = ctx.j  = 0;
-	  ctx.h3 = ctx.h2 = HASH_INIT;
-	  ctx.h  = 0;
-	  roll_reset();
-
-	  System.out.println("Opening file:"+handle);
-	  FileInputStream in = new FileInputStream(handle);
-	  // while ((bytes_read = fread(buffer,sizeof(byte),BUFFER_SIZE,handle)) > 0)
-	  while (in.available() > 0 )
-	  {
-		  bytes_read = in.read(buffer);
-	      ss_engine(ctx,buffer,bytes_read);
-	  }
-
-	  if (ctx.h != 0) 
-	  {
-	    ctx.p[ctx.j] = b64[(int) ((ctx.h2 & 0xFFFF) % 64)];
-	    ctx.ret2[ctx.k] = b64[(int) ((ctx.h3 &0xFFFF) % 64)];
-	  }
-	  
-	//  strcat(ctx.p+ctx.j, ":");
-	//  strcat(ctx.p+ctx.j, ctx.ret2);
-	  ctx.ret = (new String(ctx.ret) + new String(ctx.p) + ":" + new String(ctx.ret2)).toCharArray();
-
-	//  free(buffer);
-	  return false;
-	}
-
-
-	boolean fuzzy_hash_file(File handle) throws IOException
-	{
-	  ss_context ctx;  
-	  int filepos;
-	  boolean done = false;
-	  
-	  if (null == handle)
-	    return true;
-	  
-	  ctx = new ss_context();
-	  if (ctx == null)
-	    return true;
-
-	//  filepos = ftello(handle);
-
-	  ss_init(ctx, handle);
-	  System.out.println("bs-pre:"+ctx.block_size);
-
-	  while (!done)
-	  {
-		//  if (fseeko(handle,0,SEEK_SET))
-		//    return true;
-
-	    ss_update(ctx,handle);
-	    
-		System.out.println("RESULT:"+new String(ctx.ret));
-
-	    // our blocksize guess may have been way off - repeat if necessary
-	    if (ctx.block_size > MIN_BLOCKSIZE && ctx.j < SPAMSUM_LENGTH/2) 
-	      ctx.block_size = ctx.block_size / 2;
-	    else
-	      done = true;
-	  }
-
-	  System.out.println("bs-post:"+ctx.block_size);
-	// strncpy(result,ctx.ret,FUZZY_MAX_RESULT);
-	  
-	  System.out.println("RESULT:"+new String(ctx.ret));
-
-	  ss_destroy(ctx);
-	//  free(ctx);
-
-	//  if (fseeko(handle,filepos,SEEK_SET))
-	//      return true;
-
-	  return false;
-	}
-
-
-	public boolean fuzzy_hash_filename(String filename) throws IOException
-	{
-	  boolean status;
-
-	  if (null == filename)
-	    return true;
-
-	  File handle = new File(filename);//,"rb");
-	  if (null == handle)
-	    return true;
-
-	  status = fuzzy_hash_file(handle);
-	  
-	//  fclose(handle);
-
-	  return status;
-	}
-
-
-	boolean fuzzy_hash_buf(byte[] buf,
-			   int      buf_len,
-			   char[]          result)
-	{
-	  ss_context ctx = new ss_context();
-	  boolean done = false;
-
-	  if (buf == null)
-	    return true;
-
-	  ctx.total_chars = buf_len;
-	  ss_init(ctx, null);
-
-	  System.out.println("total_chars: "+ctx.total_chars);
-
-	  while (!done)
-	  {
-		//  snprintf(ctx.ret, 12, "%u:", ctx.block_size);
-		//  ctx.p = ctx.ret + strlen(ctx.ret);
-		  ctx.p = new char[SPAMSUM_LENGTH+1]; // TODO Duplication!
-	    
-		//  memset(ctx.p, 0, SPAMSUM_LENGTH+1);
-		//  memset(ctx.ret2, 0, sizeof(ctx.ret2));
-	    
-	    ctx.k  = ctx.j  = 0;
-	    ctx.h3 = ctx.h2 = HASH_INIT;
-	    ctx.h  = 0;
-	    roll_reset();
-
-	    System.out.println("h:"+ctx.h);
-	    System.out.println("h2:"+ctx.h2);
-
-	    ss_engine(ctx,buf,buf_len);
-
-	    /* our blocksize guess may have been way off - repeat if necessary */
-	    if (ctx.block_size > MIN_BLOCKSIZE && ctx.j < SPAMSUM_LENGTH/2) 
-	      ctx.block_size = ctx.block_size / 2;
-	    else
-	      done = true;
-
-	    System.out.println("h:"+ctx.h);
-	    System.out.println("h2:"+ctx.h2);
-	    System.out.println("h3:"+ctx.h3);
-		  System.out.println("bs:"+ctx.block_size);
-		  System.out.println("ret:"+new String(ctx.ret));
-		  System.out.println("p:"+new String(ctx.p));
-		  System.out.println("ret2:"+new String(ctx.ret2));
-		    if (ctx.h != 0) 
-	      {
-		ctx.p[ctx.j] = b64[(int) ((ctx.h2&0xFFFF) % 64)];
-		ctx.ret2[ctx.k] = b64[(int) ((ctx.h3&0xFFFF) % 64)];
-	      }
-	    
-	 //  strcat(ctx.p+ctx.j, ":");
-	 //  strcat(ctx.p+ctx.j, ctx.ret2);
-	  }
-
-
-	//  strncpy(result,ctx.ret,FUZZY_MAX_RESULT);
-	  System.out.println("bs:"+ctx.block_size);
-	  System.out.println("ret:"+new String(ctx.ret));
-	  System.out.println("p:"+new String(ctx.p));
-	  System.out.println("ret2:"+new String(ctx.ret2));
-	  System.out.println("h3:"+ctx.h3);
-	  result = ctx.ret;
-
-	  ss_destroy(ctx);
-	//  free(ctx);
-	  return false;
-	}
-
-
-
-
-	/* 
-	   we only accept a match if we have at least one common substring in
-	   the signature of length ROLLING_WINDOW. This dramatically drops the
-	   false positive rate for low score thresholds while having
-	   negligable affect on the rate of spam detection.
-
-	   return 1 if the two strings do have a common substring, 0 otherwise
-	*/
-	static int has_common_substring(char[] s1, char[] s2)
-	{
-	  int i, j;
-	  int num_hashes;
-	  long[] hashes = new long[SPAMSUM_LENGTH];
-	  
-	  /* there are many possible algorithms for common substring
-	     detection. In this case I am re-using the rolling hash code
-	     to act as a filter for possible substring matches */
-	  
-	  roll_reset();
-	//  memset(hashes, 0, sizeof(hashes));
-	  
-	  /* first compute the windowed rolling hash at each offset in
-	     the first string */
-	  for (i=0;s1[i] != 0;i++) 
-	  {
-	    hashes[i] = roll_hash((char)s1[i]);
-	  }
-	  num_hashes = i;
-	  
-	  roll_reset();
-	  
-	  /* now for each offset in the second string compute the
-	     rolling hash and compare it to all of the rolling hashes
-	     for the first string. If one matches then we have a
-	     candidate substring match. We then confirm that match with
-	     a direct string comparison */
-	  for (i=0;s2[i] != 0;i++) {
-	    long h = roll_hash((char)s2[i]);
-	    if (i < ROLLING_WINDOW-1) continue;
-	    for (j=ROLLING_WINDOW-1;j<num_hashes;j++) 
-	    {
-	      if (hashes[j] != 0 && hashes[j] == h) 
-	      {
-		/* we have a potential match - confirm it */
-	    	  /*FIXME
-		if (strlen(s2+i-(ROLLING_WINDOW-1)) >= ROLLING_WINDOW && 
-		    strncmp(s2+i-(ROLLING_WINDOW-1), 
-			    s1+j-(ROLLING_WINDOW-1), 
-			    ROLLING_WINDOW) == 0) 
-		{
-		  return 1;
-		}
-		*/
-	      }
-	    }
-	  }
-	  
-	  return 0;
-	}
-
-
-	// eliminate sequences of longer than 3 identical characters. These
-	// sequences contain very little information so they tend to just bias
-	// the result unfairly
-	static char[] eliminate_sequences(String string)
-	{
-		char[] str = string.toCharArray();
-	  StringBuffer ret = new StringBuffer();
-	  
-	  // Do not include repeats:
-	  for (int i=3;i<str.length;i++) {
-	    if (str[i] != str[i-1] ||
-		    str[i] != str[i-2] ||
-		    str[i] != str[i-3]) {
-	      ret.append(str[i]);
-	    }
-	  }
-	  
-	  return ret.toString().toCharArray();
-	}
-
-	/*
-	  this is the low level string scoring algorithm. It takes two strings
-	  and scores them on a scale of 0-100 where 0 is a terrible match and
-	  100 is a great match. The block_size is used to cope with very small
-	  messages.
-	*/
-	static int score_strings(char[] s1, char[] s2, int block_size)
-	{
-	  int score = 0;
-	  int len1, len2;
-	  
-	  len1 = s1.length;
-	  len2 = s2.length;
-	  
-	  if (len1 > SPAMSUM_LENGTH || len2 > SPAMSUM_LENGTH) {
-	    /* not a real spamsum signature? */
-	    return 0;
-	  }
-	  
-	  /* the two strings must have a common substring of length
-	     ROLLING_WINDOW to be candidates */
-	  if (has_common_substring(s1, s2) == 0) {
-	    return 0;
-	  }
-	  
-	  /* compute the edit distance between the two strings. The edit distance gives
-	     us a pretty good idea of how closely related the two strings are */
-	  score = StringUtils.getLevenshteinDistance(new String(s1), new String(s2));
-	 
-	  /* scale the edit distance by the lengths of the two
-	     strings. This changes the score to be a measure of the
-	     proportion of the message that has changed rather than an
-	     absolute quantity. It also copes with the variability of
-	     the string lengths. */
-	  score = (score * SPAMSUM_LENGTH) / (len1 + len2);
-	  
-	  /* at this stage the score occurs roughly on a 0-64 scale,
-	   * with 0 being a good match and 64 being a complete
-	   * mismatch */
-	  
-	  /* rescale to a 0-100 scale (friendlier to humans) */
-	  score = (100 * score) / 64;
-	  
-	  /* it is possible to get a score above 100 here, but it is a
-	     really terrible match */
-	  if (score >= 100) return 0;
-	  
-	  /* now re-scale on a 0-100 scale with 0 being a poor match and
-	     100 being a excellent match. */
-	  score = 100 - score;
-
-	  //  printf ("len1: %"PRIu32"  len2: %"PRIu32"\n", len1, len2);
-	  
-	  /* when the blocksize is small we don't want to exaggerate the match size */
-	  if (score > block_size/MIN_BLOCKSIZE * Math.min(len1, len2)) {
-	    score = block_size/MIN_BLOCKSIZE * Math.min(len1, len2);
-	  }
-	  return score;
-	}
-
-	/*
-	  given two spamsum strings return a value indicating the degree to which they match.
-	*/
-	int fuzzy_compare(FuzzyHash fh1, FuzzyHash fh2 )
-	{
-	  int score = 0;
-	  char[] s1_1, s1_2;
-	  char[] s2_1, s2_2;
-	  
-	  // if the blocksizes don't match then we are comparing
-	  // apples to oranges. This isn't an 'error' per se. We could
-	  // have two valid signatures, but they can't be compared. 
-	  if (fh1.blocksize != fh2.blocksize && 
-	      fh1.blocksize != fh2.blocksize*2 &&
-	      fh2.blocksize != fh1.blocksize*2) {
-	    return 0;
-	  }
-	  
-	  // there is very little information content is sequences of
-	  // the same character like 'LLLLL'. Eliminate any sequences
-	  // longer than 3. This is especially important when combined
-	  // with the has_common_substring() test below. 
-	  s1_1 = eliminate_sequences(fh1.hash+1);
-	  s2_1 = eliminate_sequences(fh2.hash+1);
-	  
-	  s1_2 = eliminate_sequences(fh1.hash2+1);
-	  s2_2 = eliminate_sequences(fh1.hash2+1);
-	  
-	  // each signature has a string for two block sizes. We now
-	  // choose how to combine the two block sizes. We checked above
-	  // that they have at least one block size in common 
-	  if (fh1.blocksize == fh2.blocksize) {
-	    int score1, score2;
-	    score1 = score_strings(s1_1, s2_1, fh1.blocksize);
-	    score2 = score_strings(s1_2, s2_2, fh2.blocksize);
-
-	    //    s.block_size = fh1.blocksize;
-
-	    score = Math.max(score1, score2);
-	  } else if (fh1.blocksize == fh2.blocksize*2) {
-
-	    score = score_strings(s1_1, s2_2, fh1.blocksize);
-	    //    s.block_size = fh1.blocksize;
-	  } else {
-
-	    score = score_strings(s1_2, s2_1, fh2.blocksize);
-	    //    s.block_size = fh2.blocksize;
-	  }
-	  
-	  return (int)score;
-	}
-
-	/**
-	 * Main class for quick testing.
-	 * @param args
-	 * @throws IOException 
-	 */
-	public static void main( String[] args ) throws IOException {
-		SSDeep ssd = new SSDeep();
-		byte[] b2 = "Hello World how are you today...\n".getBytes();
-		byte[] b3 = "Helli".getBytes();
-		char[] h1 = null;
-		boolean t1 = ssd.fuzzy_hash_buf(b2, b2.length, h1);
-		System.out.println("Got "+h1);
-		ssd.fuzzy_hash_file(new File("test"));
-		//ssd.fuzzy_hash_file(new File("pom.xml"));
-	}
-}