From ff4e40a910349650910bc4faf49422f35576a3e4 Mon Sep 17 00:00:00 2001 From: Jeremy Long Date: Sun, 30 Dec 2012 16:50:19 -0500 Subject: [PATCH] added equals and hashCode Former-commit-id: 065a80852277add47d259f8f96fe9ed64c84ffe3 --- .../dependencycheck/data/cpe/Entry.java | 8 +- .../dependencycheck/dependency/Reference.java | 37 +- .../dependency/Vulnerability.java | 24 +- .../dependency/VulnerableSoftware.java | 59 +- .../dependencycheck/utils/SSDeep.java | 677 ------------------ 5 files changed, 88 insertions(+), 717 deletions(-) delete mode 100644 src/main/java/org/codesecure/dependencycheck/utils/SSDeep.java diff --git a/src/main/java/org/codesecure/dependencycheck/data/cpe/Entry.java b/src/main/java/org/codesecure/dependencycheck/data/cpe/Entry.java index ce3a1c8cd..ad0165f23 100644 --- a/src/main/java/org/codesecure/dependencycheck/data/cpe/Entry.java +++ b/src/main/java/org/codesecure/dependencycheck/data/cpe/Entry.java @@ -18,6 +18,7 @@ package org.codesecure.dependencycheck.data.cpe; * Copyright (c) 2012 Jeremy Long. All Rights Reserved. */ +import java.io.Serializable; import java.io.UnsupportedEncodingException; import java.net.URLDecoder; import java.util.logging.Level; @@ -25,12 +26,13 @@ import java.util.logging.Logger; import org.apache.lucene.document.Document; /** - * A single CPE entry from the cpe.xml downloaded from http://nvd.nist.gov/cpe.cfm. + * A CPE entry containing the name, vendor, product, and version. * * @author Jeremy Long (jeremy.long@gmail.com) */ -public class Entry { +public class Entry implements Serializable { + + static final long serialVersionUID = 8011924485946326934L; /** * This parse method does not fully convert a Lucene Document into a CPE diff --git a/src/main/java/org/codesecure/dependencycheck/dependency/Reference.java b/src/main/java/org/codesecure/dependencycheck/dependency/Reference.java index 9e90e3622..55a074213 100644 --- a/src/main/java/org/codesecure/dependencycheck/dependency/Reference.java +++ b/src/main/java/org/codesecure/dependencycheck/dependency/Reference.java @@ -1,7 +1,4 @@ package org.codesecure.dependencycheck.dependency; - -import java.io.Serializable; - /* * This file is part of DependencyCheck. * @@ -20,6 +17,9 @@ import java.io.Serializable; * * Copyright (c) 2012 Jeremy Long. All Rights Reserved. */ + +import java.io.Serializable; + /** * An external reference for a vulnerability. This contains a name, URL, and a * source. @@ -95,4 +95,35 @@ public class Reference implements Serializable { public void setSource(String source) { this.source = source; } + + @Override + public boolean equals(Object obj) { + if (obj == null) { + return false; + } + if (getClass() != obj.getClass()) { + return false; + } + final Reference other = (Reference) obj; + if ((this.name == null) ? (other.name != null) : !this.name.equals(other.name)) { + return false; + } + if ((this.url == null) ? (other.url != null) : !this.url.equals(other.url)) { + return false; + } + if ((this.source == null) ? (other.source != null) : !this.source.equals(other.source)) { + return false; + } + return true; + } + + @Override + public int hashCode() { + int hash = 5; + hash = 67 * hash + (this.name != null ? this.name.hashCode() : 0); + hash = 67 * hash + (this.url != null ? this.url.hashCode() : 0); + hash = 67 * hash + (this.source != null ? this.source.hashCode() : 0); + return hash; + } + } diff --git a/src/main/java/org/codesecure/dependencycheck/dependency/Vulnerability.java b/src/main/java/org/codesecure/dependencycheck/dependency/Vulnerability.java index b5f7dd26b..b149917f6 100644 --- a/src/main/java/org/codesecure/dependencycheck/dependency/Vulnerability.java +++ b/src/main/java/org/codesecure/dependencycheck/dependency/Vulnerability.java @@ -29,7 +29,7 @@ import java.util.List; */ public class Vulnerability implements Serializable { - private static final long serialVersionUID = -9197349868449482427L; + private static final long serialVersionUID = 307319490326651052L; /** * The name of the vulnerability */ @@ -162,4 +162,26 @@ public class Vulnerability implements Serializable { public void addVulnerableSoftware(String cpe) { addVulnerableSoftware(cpe, null); } + + @Override + public boolean equals(Object obj) { + if (obj == null) { + return false; + } + if (getClass() != obj.getClass()) { + return false; + } + final Vulnerability other = (Vulnerability) obj; + if ((this.name == null) ? (other.name != null) : !this.name.equals(other.name)) { + return false; + } + return true; + } + + @Override + public int hashCode() { + int hash = 5; + hash = 41 * hash + (this.name != null ? this.name.hashCode() : 0); + return hash; + } } diff --git a/src/main/java/org/codesecure/dependencycheck/dependency/VulnerableSoftware.java b/src/main/java/org/codesecure/dependencycheck/dependency/VulnerableSoftware.java index 074b310d6..d03f93340 100644 --- a/src/main/java/org/codesecure/dependencycheck/dependency/VulnerableSoftware.java +++ b/src/main/java/org/codesecure/dependencycheck/dependency/VulnerableSoftware.java @@ -30,31 +30,9 @@ import org.codesecure.dependencycheck.data.cpe.Entry; * * @author Jeremy Long (jeremy.long@gmail.com) */ -public class VulnerableSoftware implements Serializable { +public class VulnerableSoftware extends Entry implements Serializable { private static final long serialVersionUID = 307319490326651052L; - /** - * a cpe entry - */ - protected Entry cpe; - - /** - * Get the value of cpe - * - * @return the value of cpe - */ - public Entry getCpe() { - return cpe; - } - - /** - * Set the value of cpe - * - * @param cpe new value of cpe - */ - public void setCpe(Entry cpe) { - this.cpe = cpe; - } /** * Parse a CPE entry from the cpe string repesentation @@ -62,22 +40,14 @@ public class VulnerableSoftware implements Serializable { * @param cpe a cpe entry (e.g. cpe:/a:vendor:software:version) */ public void setCpe(String cpe) { - this.cpe = new Entry(); try { - this.cpe.parseName(cpe); + parseName(cpe); } catch (UnsupportedEncodingException ex) { Logger.getLogger(VulnerableSoftware.class.getName()).log(Level.SEVERE, null, ex); - this.cpe.setName(cpe); + setName(cpe); } } - /** - * Returns the CPE entry name - * @return te CPE entry name - */ - public String getName() { - return this.cpe.getName(); - } /** * If present, indicates that previous version are vulnerable */ @@ -109,4 +79,27 @@ public class VulnerableSoftware implements Serializable { public void setPreviousVersion(String previousVersion) { this.previousVersion = previousVersion; } + + @Override + public boolean equals(Object obj) { + if (obj == null) { + return false; + } + if (getClass() != obj.getClass()) { + return false; + } + final VulnerableSoftware other = (VulnerableSoftware) obj; + if ((this.name == null) ? (other.name != null) : !this.name.equals(other.name)) { + return false; + } + return true; + } + + @Override + public int hashCode() { + int hash = 7; + hash = 83 * hash + (this.name != null ? this.name.hashCode() : 0); + hash = 83 * hash + (this.previousVersion != null ? this.previousVersion.hashCode() : 0); + return hash; + } } diff --git a/src/main/java/org/codesecure/dependencycheck/utils/SSDeep.java b/src/main/java/org/codesecure/dependencycheck/utils/SSDeep.java deleted file mode 100644 index 37572917a..000000000 --- a/src/main/java/org/codesecure/dependencycheck/utils/SSDeep.java +++ /dev/null @@ -1,677 +0,0 @@ -/* ssdeep - Copyright (C) 2006 ManTech International Corporation - - $Id: fuzzy.c 97 2010-03-19 15:10:06Z jessekornblum $ - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - - The code in this file, and this file only, is based on SpamSum, part - of the Samba project: - http://www.samba.org/ftp/unpacked/junkcode/spamsum/ - - Because of where this file came from, any program that contains it - must be licensed under the terms of the General Public License (GPL). - See the file COPYING for details. The author's original comments - about licensing are below: - - - - this is a checksum routine that is specifically designed for spam. - Copyright Andrew Tridgell 2002 - - This code is released under the GNU General Public License version 2 - or later. Alteratively, you may also use this code under the terms - of the Perl Artistic license. - - If you wish to distribute this code under the terms of a different - free software license then please ask me. If there is a good reason - then I will probably say yes. - -*/ - -//package eu.scape_project.bitwiser.utils; -//https://raw.github.com/openplanets/bitwiser/master/src/main/java/eu/scape_project/bitwiser/utils/SSDeep.java -package org.codesecure.dependencycheck.utils; - - -import java.io.File; -import java.io.FileInputStream; -import java.io.IOException; -import java.util.Arrays; - -import org.apache.commons.lang.StringUtils; - -/** - * SSDeep - * - *

- * A Java version of the ssdeep algorithm, based on the fuzzy.c source - * code, taken from version 2.6 of the ssdeep package. - * - *

- * Transliteration/port to Java from C by... - * - * @author Andrew Jackson - * - */ -public class SSDeep { - - public class FuzzyHash { - /** the blocksize used by the program, */ - int blocksize; - /** the hash for this blocksize */ - String hash; - /** the hash for twice the blocksize, */ - String hash2; - /** the filename. */ - String filename; - } - - /// Length of an individual fuzzy hash signature component - public static final int SPAMSUM_LENGTH = 64; - - /// The longest possible length for a fuzzy hash signature (without the filename) - public static final int FUZZY_MAX_RESULT = (SPAMSUM_LENGTH + (SPAMSUM_LENGTH/2 + 20)); - - - public static final int MIN_BLOCKSIZE = 3; - public static final int ROLLING_WINDOW = 7; - - public static final int HASH_PRIME = 0x01000193; - public static final int HASH_INIT = 0x28021967; - - // Our input buffer when reading files to hash - public static final int BUFFER_SIZE = 8192; - - static class roll_state_class { - int[] window = new int[ROLLING_WINDOW]; - int h1, h2, h3; - int n; - } - private static roll_state_class roll_state = new roll_state_class(); - - - /* - a rolling hash, based on the Adler checksum. By using a rolling hash - we can perform auto resynchronisation after inserts/deletes - - internally, h1 is the sum of the bytes in the window and h2 - is the sum of the bytes times the index - - h3 is a shift/xor based rolling hash, and is mostly needed to ensure that - we can cope with large blocksize values - */ - static int roll_hash(int c) - { - -// System.out.println(""+roll_state.h1+","+roll_state.h2+","+roll_state.h3); - roll_state.h2 -= roll_state.h1; - //roll_state.h2 = roll_state.h2 & 0x7fffffff; - roll_state.h2 += ROLLING_WINDOW * c; - //roll_state.h2 = roll_state.h2 & 0x7fffffff; - - roll_state.h1 += c; - //roll_state.h1 = roll_state.h1 & 0x7fffffff; - roll_state.h1 -= roll_state.window[(roll_state.n % ROLLING_WINDOW)]; - //roll_state.h1 = roll_state.h1 & 0x7fffffff; - - roll_state.window[roll_state.n % ROLLING_WINDOW] = (char)c; - roll_state.n = (roll_state.n+1)%ROLLING_WINDOW; - - /* The original spamsum AND'ed this value with 0xFFFFFFFF which - in theory should have no effect. This AND has been removed - for performance (jk) */ - roll_state.h3 = (roll_state.h3 << 5);// & 0xFFFFFFFF; - roll_state.h3 ^= c; - //roll_state.h3 = roll_state.h3 & 0x7FFFFFFF; - //if( roll_state.h3 > 0xEFFFFFFF ) roll_state.h3 -= 0xEFFFFFFF; - - long result = ((roll_state.h1 + roll_state.h2 + roll_state.h3));//&0x7FFFFFFF; - //System.out.println("Result: "+result); - //System.out.println("Result2: "+(result&0xFFFFFFFF)); - //System.out.println("Result3: "+(result&0x7FFFFFFF)); - - return (int) result;//&0xFFFFFFFF; - } - - /* - reset the state of the rolling hash and return the initial rolling hash value - */ - static void roll_reset() - { - roll_state.h1 = 0; - roll_state.h2 = 0; - roll_state.h3 = 0; - roll_state.n = 0; - Arrays.fill(roll_state.window,(char)0); - } - - /* a simple non-rolling hash, based on the FNV hash */ - static int sum_hash(int c, int h) - { - h *= HASH_PRIME; - //h = h & 0xFFFFFFFF; - h ^= c; - //h = h & 0xFFFFFFFF; - return h; - } - - class ss_context { - char[] ret; - char[] p; - long total_chars; - int h, h2, h3; - int j, n, i, k; - int block_size; - char[] ret2 = new char[SPAMSUM_LENGTH/2 + 1]; - } - - - static void ss_destroy(ss_context ctx) - { - if (ctx.ret != null) - ctx.ret = null; - //free(ctx.ret); - } - - - static boolean ss_init(ss_context ctx, File handle) - { - if ( ctx == null ) - return true; - - ctx.ret = new char[FUZZY_MAX_RESULT]; - if (ctx.ret == null) - return true; - - if (handle != null) - ctx.total_chars = handle.length(); - - ctx.block_size = MIN_BLOCKSIZE; - while (ctx.block_size * SPAMSUM_LENGTH < ctx.total_chars) { - ctx.block_size = ctx.block_size * 2; - } - - System.out.println("bs:"+ctx.block_size); - - return false; - } - - static char[] b64 = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/".toCharArray(); - - static void ss_engine(ss_context ctx, - byte[] buffer, - int buffer_size) - { - if (null == ctx || null == buffer) - return; - - for ( int i = 0 ; i < buffer_size ; ++i) - { - - /* - at each character we update the rolling hash and - the normal hash. When the rolling hash hits the - reset value then we emit the normal hash as a - element of the signature and reset both hashes - */ - - System.out.println(""+ctx.h+","+ctx.h2+","+ctx.h3); - ctx.h = roll_hash(buffer[i]);// & 0x7FFFFFFF; - ctx.h2 = sum_hash(buffer[i], ctx.h2);// & 0x7FFFFFFF; - ctx.h3 = sum_hash(buffer[i], ctx.h3);// & 0x7FFFFFFF; - - if (((0xFFFFFFFFl & ctx.h) % ctx.block_size) == (ctx.block_size-1)) { - /* we have hit a reset point. We now emit a - hash which is based on all chacaters in the - piece of the message between the last reset - point and this one */ - ctx.p[ctx.j] = b64[(int)((ctx.h2&0xFFFF) % 64)]; - System.out.println("::"+ctx.j+":"+new String(ctx.p)); -// for( char c : ctx.p ) { -// System.out.print(c); -// } -// System.out.println(); - if (ctx.j < SPAMSUM_LENGTH-1) { - /* we can have a problem with the tail - overflowing. The easiest way to - cope with this is to only reset the - second hash if we have room for - more characters in our - signature. This has the effect of - combining the last few pieces of - the message into a single piece */ - - ctx.h2 = HASH_INIT; - (ctx.j)++; - } - } - - /* this produces a second signature with a block size - of block_size*2. By producing dual signatures in - this way the effect of small changes in the message - size near a block size boundary is greatly reduced. */ - if (((0xFFFFFFFFl & ctx.h) % (ctx.block_size*2)) == ((ctx.block_size*2)-1)) { - ctx.ret2[ctx.k] = b64[(int) (ctx.h3&0xFFFF % 64)]; - if (ctx.k < SPAMSUM_LENGTH/2-1) { - ctx.h3 = HASH_INIT; - (ctx.k)++; - } - } - } - } - - static boolean ss_update(ss_context ctx, File handle) throws IOException - { - int bytes_read = 0; - byte[] buffer; - - if (null == ctx || null == handle) - return true; - - buffer = new byte[BUFFER_SIZE]; - if (buffer == null) - return true; - - // snprintf(ctx.ret, 12, "%u:", ctx.block_size); - ctx.ret = (ctx.block_size + ":").toCharArray(); - // ctx.p = ctx.ret + strlen(ctx.ret); - ctx.p = new char[SPAMSUM_LENGTH]; - - //memset(ctx.p, 0, SPAMSUM_LENGTH+1); - Arrays.fill(ctx.p, (char)0 ); - //memset(ctx.ret2, 0, sizeof(ctx.ret2.length)); - Arrays.fill(ctx.ret2, (char)0 ); - - ctx.k = ctx.j = 0; - ctx.h3 = ctx.h2 = HASH_INIT; - ctx.h = 0; - roll_reset(); - - System.out.println("Opening file:"+handle); - FileInputStream in = new FileInputStream(handle); - // while ((bytes_read = fread(buffer,sizeof(byte),BUFFER_SIZE,handle)) > 0) - while (in.available() > 0 ) - { - bytes_read = in.read(buffer); - ss_engine(ctx,buffer,bytes_read); - } - - if (ctx.h != 0) - { - ctx.p[ctx.j] = b64[(int) ((ctx.h2 & 0xFFFF) % 64)]; - ctx.ret2[ctx.k] = b64[(int) ((ctx.h3 &0xFFFF) % 64)]; - } - - // strcat(ctx.p+ctx.j, ":"); - // strcat(ctx.p+ctx.j, ctx.ret2); - ctx.ret = (new String(ctx.ret) + new String(ctx.p) + ":" + new String(ctx.ret2)).toCharArray(); - - // free(buffer); - return false; - } - - - boolean fuzzy_hash_file(File handle) throws IOException - { - ss_context ctx; - int filepos; - boolean done = false; - - if (null == handle) - return true; - - ctx = new ss_context(); - if (ctx == null) - return true; - - // filepos = ftello(handle); - - ss_init(ctx, handle); - System.out.println("bs-pre:"+ctx.block_size); - - while (!done) - { - // if (fseeko(handle,0,SEEK_SET)) - // return true; - - ss_update(ctx,handle); - - System.out.println("RESULT:"+new String(ctx.ret)); - - // our blocksize guess may have been way off - repeat if necessary - if (ctx.block_size > MIN_BLOCKSIZE && ctx.j < SPAMSUM_LENGTH/2) - ctx.block_size = ctx.block_size / 2; - else - done = true; - } - - System.out.println("bs-post:"+ctx.block_size); - // strncpy(result,ctx.ret,FUZZY_MAX_RESULT); - - System.out.println("RESULT:"+new String(ctx.ret)); - - ss_destroy(ctx); - // free(ctx); - - // if (fseeko(handle,filepos,SEEK_SET)) - // return true; - - return false; - } - - - public boolean fuzzy_hash_filename(String filename) throws IOException - { - boolean status; - - if (null == filename) - return true; - - File handle = new File(filename);//,"rb"); - if (null == handle) - return true; - - status = fuzzy_hash_file(handle); - - // fclose(handle); - - return status; - } - - - boolean fuzzy_hash_buf(byte[] buf, - int buf_len, - char[] result) - { - ss_context ctx = new ss_context(); - boolean done = false; - - if (buf == null) - return true; - - ctx.total_chars = buf_len; - ss_init(ctx, null); - - System.out.println("total_chars: "+ctx.total_chars); - - while (!done) - { - // snprintf(ctx.ret, 12, "%u:", ctx.block_size); - // ctx.p = ctx.ret + strlen(ctx.ret); - ctx.p = new char[SPAMSUM_LENGTH+1]; // TODO Duplication! - - // memset(ctx.p, 0, SPAMSUM_LENGTH+1); - // memset(ctx.ret2, 0, sizeof(ctx.ret2)); - - ctx.k = ctx.j = 0; - ctx.h3 = ctx.h2 = HASH_INIT; - ctx.h = 0; - roll_reset(); - - System.out.println("h:"+ctx.h); - System.out.println("h2:"+ctx.h2); - - ss_engine(ctx,buf,buf_len); - - /* our blocksize guess may have been way off - repeat if necessary */ - if (ctx.block_size > MIN_BLOCKSIZE && ctx.j < SPAMSUM_LENGTH/2) - ctx.block_size = ctx.block_size / 2; - else - done = true; - - System.out.println("h:"+ctx.h); - System.out.println("h2:"+ctx.h2); - System.out.println("h3:"+ctx.h3); - System.out.println("bs:"+ctx.block_size); - System.out.println("ret:"+new String(ctx.ret)); - System.out.println("p:"+new String(ctx.p)); - System.out.println("ret2:"+new String(ctx.ret2)); - if (ctx.h != 0) - { - ctx.p[ctx.j] = b64[(int) ((ctx.h2&0xFFFF) % 64)]; - ctx.ret2[ctx.k] = b64[(int) ((ctx.h3&0xFFFF) % 64)]; - } - - // strcat(ctx.p+ctx.j, ":"); - // strcat(ctx.p+ctx.j, ctx.ret2); - } - - - // strncpy(result,ctx.ret,FUZZY_MAX_RESULT); - System.out.println("bs:"+ctx.block_size); - System.out.println("ret:"+new String(ctx.ret)); - System.out.println("p:"+new String(ctx.p)); - System.out.println("ret2:"+new String(ctx.ret2)); - System.out.println("h3:"+ctx.h3); - result = ctx.ret; - - ss_destroy(ctx); - // free(ctx); - return false; - } - - - - - /* - we only accept a match if we have at least one common substring in - the signature of length ROLLING_WINDOW. This dramatically drops the - false positive rate for low score thresholds while having - negligable affect on the rate of spam detection. - - return 1 if the two strings do have a common substring, 0 otherwise - */ - static int has_common_substring(char[] s1, char[] s2) - { - int i, j; - int num_hashes; - long[] hashes = new long[SPAMSUM_LENGTH]; - - /* there are many possible algorithms for common substring - detection. In this case I am re-using the rolling hash code - to act as a filter for possible substring matches */ - - roll_reset(); - // memset(hashes, 0, sizeof(hashes)); - - /* first compute the windowed rolling hash at each offset in - the first string */ - for (i=0;s1[i] != 0;i++) - { - hashes[i] = roll_hash((char)s1[i]); - } - num_hashes = i; - - roll_reset(); - - /* now for each offset in the second string compute the - rolling hash and compare it to all of the rolling hashes - for the first string. If one matches then we have a - candidate substring match. We then confirm that match with - a direct string comparison */ - for (i=0;s2[i] != 0;i++) { - long h = roll_hash((char)s2[i]); - if (i < ROLLING_WINDOW-1) continue; - for (j=ROLLING_WINDOW-1;j= ROLLING_WINDOW && - strncmp(s2+i-(ROLLING_WINDOW-1), - s1+j-(ROLLING_WINDOW-1), - ROLLING_WINDOW) == 0) - { - return 1; - } - */ - } - } - } - - return 0; - } - - - // eliminate sequences of longer than 3 identical characters. These - // sequences contain very little information so they tend to just bias - // the result unfairly - static char[] eliminate_sequences(String string) - { - char[] str = string.toCharArray(); - StringBuffer ret = new StringBuffer(); - - // Do not include repeats: - for (int i=3;i SPAMSUM_LENGTH || len2 > SPAMSUM_LENGTH) { - /* not a real spamsum signature? */ - return 0; - } - - /* the two strings must have a common substring of length - ROLLING_WINDOW to be candidates */ - if (has_common_substring(s1, s2) == 0) { - return 0; - } - - /* compute the edit distance between the two strings. The edit distance gives - us a pretty good idea of how closely related the two strings are */ - score = StringUtils.getLevenshteinDistance(new String(s1), new String(s2)); - - /* scale the edit distance by the lengths of the two - strings. This changes the score to be a measure of the - proportion of the message that has changed rather than an - absolute quantity. It also copes with the variability of - the string lengths. */ - score = (score * SPAMSUM_LENGTH) / (len1 + len2); - - /* at this stage the score occurs roughly on a 0-64 scale, - * with 0 being a good match and 64 being a complete - * mismatch */ - - /* rescale to a 0-100 scale (friendlier to humans) */ - score = (100 * score) / 64; - - /* it is possible to get a score above 100 here, but it is a - really terrible match */ - if (score >= 100) return 0; - - /* now re-scale on a 0-100 scale with 0 being a poor match and - 100 being a excellent match. */ - score = 100 - score; - - // printf ("len1: %"PRIu32" len2: %"PRIu32"\n", len1, len2); - - /* when the blocksize is small we don't want to exaggerate the match size */ - if (score > block_size/MIN_BLOCKSIZE * Math.min(len1, len2)) { - score = block_size/MIN_BLOCKSIZE * Math.min(len1, len2); - } - return score; - } - - /* - given two spamsum strings return a value indicating the degree to which they match. - */ - int fuzzy_compare(FuzzyHash fh1, FuzzyHash fh2 ) - { - int score = 0; - char[] s1_1, s1_2; - char[] s2_1, s2_2; - - // if the blocksizes don't match then we are comparing - // apples to oranges. This isn't an 'error' per se. We could - // have two valid signatures, but they can't be compared. - if (fh1.blocksize != fh2.blocksize && - fh1.blocksize != fh2.blocksize*2 && - fh2.blocksize != fh1.blocksize*2) { - return 0; - } - - // there is very little information content is sequences of - // the same character like 'LLLLL'. Eliminate any sequences - // longer than 3. This is especially important when combined - // with the has_common_substring() test below. - s1_1 = eliminate_sequences(fh1.hash+1); - s2_1 = eliminate_sequences(fh2.hash+1); - - s1_2 = eliminate_sequences(fh1.hash2+1); - s2_2 = eliminate_sequences(fh1.hash2+1); - - // each signature has a string for two block sizes. We now - // choose how to combine the two block sizes. We checked above - // that they have at least one block size in common - if (fh1.blocksize == fh2.blocksize) { - int score1, score2; - score1 = score_strings(s1_1, s2_1, fh1.blocksize); - score2 = score_strings(s1_2, s2_2, fh2.blocksize); - - // s.block_size = fh1.blocksize; - - score = Math.max(score1, score2); - } else if (fh1.blocksize == fh2.blocksize*2) { - - score = score_strings(s1_1, s2_2, fh1.blocksize); - // s.block_size = fh1.blocksize; - } else { - - score = score_strings(s1_2, s2_1, fh2.blocksize); - // s.block_size = fh2.blocksize; - } - - return (int)score; - } - - /** - * Main class for quick testing. - * @param args - * @throws IOException - */ - public static void main( String[] args ) throws IOException { - SSDeep ssd = new SSDeep(); - byte[] b2 = "Hello World how are you today...\n".getBytes(); - byte[] b3 = "Helli".getBytes(); - char[] h1 = null; - boolean t1 = ssd.fuzzy_hash_buf(b2, b2.length, h1); - System.out.println("Got "+h1); - ssd.fuzzy_hash_file(new File("test")); - //ssd.fuzzy_hash_file(new File("pom.xml")); - } -} \ No newline at end of file