/*
* Copyright (c) 2010-2020 Founder Ltd. All Rights Reserved.
*
* This software is the confidential and proprietary information of
* Founder. You shall not disclose such Confidential Information
* and shall use it only in accordance with the terms of the agreements
* you entered into with Founder.
*
*/
package com.founder.mysql;
import java.sql.Blob;
import java.sql.Connection;
import java.sql.SQLException;
import java.util.BitSet;
public class MySQLUtil {
public static Blob bitsetToBlob(BitSet myBitSet, Connection con) throws SQLException {
byte[] byteArray = toByteArray(myBitSet);
Blob blob = con.createBlob();
blob.setBytes(1, byteArray);
return blob;
}
private static byte[] toByteArray(BitSet bits) {
byte[] bytes = new byte[bits.length()/8+1];
for (int i=0; i<bits.length(); i++) {
if (bits.get(i)) {
bytes[bytes.length-i/8-1] |= 1<<(i%8);
}
}
return bytes;
}
public static BitSet blobToBitSet(Blob blob) throws SQLException {
byte[] bytes = blob.getBytes(1, (int)blob.length());
BitSet bitSet = fromByteArray(bytes);
return bitSet;
}
private static BitSet fromByteArray(byte[] bytes) {
BitSet bits = new BitSet(1024);
for (int i=0; i<bytes.length*8; i++) {
if ((bytes[bytes.length-i/8-1]&(1<<(i%8))) > 0) {
bits.set(i);
}
}
return bits;
}
}
通過以上代碼,我們就可以把fingerprint的值計(jì)算出來,然后存儲(chǔ)到MySQL數(shù)據(jù)庫(kù)中了。
進(jìn)行相似度搜索的時(shí)候,值需要取出已經(jīng)存儲(chǔ)的值進(jìn)行比對(duì)就可以了。
float coefficient = Tanimoto.calculate(query, MySQLUtil.blobToBitSet(results.getBlob("bits")));
筆者測(cè)試了187586條結(jié)構(gòu)數(shù)據(jù),大概需要12秒左右,基本滿足一般需求。
筆者測(cè)試了187586條結(jié)構(gòu)數(shù)據(jù),大概需要12秒左右,基本滿足一般需求。