posts - 431,  comments - 344,  trackbacks - 0

          /*  $RCSfile$
           *  $Author$
           *  $Date$
           *  $Revision$
           *
           *  Copyright (C) 1997-2007  The Chemistry Development Kit (CDK) project
           *
           *  Contact: cdk-devel@lists.sourceforge.net
           *
           *  This program is free software; you can redistribute it and/or
           *  modify it under the terms of the GNU Lesser General Public License
           *  as published by the Free Software Foundation; either version 2.1
           *  of the License, or (at your option) any later version.
           *  All we ask is that proper credit is given for our work, which includes
           *  - but is not limited to - adding the above copyright notice to the beginning
           *  of your source code files, and to any copyright notice that you may distribute
           *  with programs based on this work.
           *
           *  This program is distributed in the hope that it will be useful,
           *  but WITHOUT ANY WARRANTY; without even the implied warranty of
           *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
           *  GNU Lesser General Public License for more details.
           *
           *  You should have received a copy of the GNU Lesser General Public License
           *  along with this program; if not, write to the Free Software
           *  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
           *
           */
          package org.openscience.cdk.similarity;


          import org.openscience.cdk.annotations.TestClass;
          import org.openscience.cdk.annotations.TestMethod;
          import org.openscience.cdk.exception.CDKException;

          import Java.util.BitSet;

          /**
           *  Calculates the Tanimoto coefficient for a given pair of two
           *  fingerprint bitsets or real valued feature vectors.
           *
           *  The Tanimoto coefficient is one way to
           *  quantitatively measure the "distance" or similarity of
           *  two chemical structures.
           *
           *  <p>You can use the FingerPrinter class to retrieve two fingerprint bitsets.
           *  We assume that you have two structures stored in cdk.Molecule objects.
           *  A tanimoto coefficient can then be calculated like:
           *  <pre>
           *   BitSet fingerprint1 = Fingerprinter.getFingerprint(molecule1);
           *   BitSet fingerprint2 = Fingerprinter.getFingerprint(molecule2);
           *   float tanimoto_coefficient = Tanimoto.calculate(fingerprint1, fingerprint2);
           *  </pre>
           *
           *  <p>The FingerPrinter assumes that hydrogens are explicitely given, if this
           *  is desired!
           *  <p>Note that the continuous Tanimoto coefficient does not lead to a metric space
           *
           *@author         steinbeck
           * @cdk.githash
           *@cdk.created    2005-10-19
           *@cdk.keyword    jaccard
           *@cdk.keyword    similarity, tanimoto
           * @cdk.module fingerprint
           */
          @TestClass("org.openscience.cdk.similarity.TanimotoTest")
          public class Tanimoto
          {

              /**
               * Evaluates Tanimoto coefficient for two bit sets.
               *
               * @param bitset1 A bitset (such as a fingerprint) for the first molecule
               * @param bitset2 A bitset (such as a fingerprint) for the second molecule
               * @return The Tanimoto coefficient
               * @throws org.openscience.cdk.exception.CDKException  if bitsets are not of the same length
               */
              @TestMethod("testTanimoto1,testTanimoto2")
              public static float calculate(BitSet bitset1, BitSet bitset2) throws CDKException
              {
                  float _bitset1_cardinality = bitset1.cardinality();
                  float _bitset2_cardinality = bitset2.cardinality();
                  if (bitset1.size() != bitset2.size()) {
                      throw new CDKException("Bisets must have the same bit length");
                  }
                  BitSet one_and_two = (BitSet)bitset1.clone();
                  one_and_two.and(bitset2);
                  float _common_bit_count = one_and_two.cardinality();
                  return _common_bit_count/(_bitset1_cardinality + _bitset2_cardinality - _common_bit_count);
              }
             
              /**
               * Evaluates the continuous Tanimoto coefficient for two real valued vectors.
               *
               * @param features1 The first feature vector
               * @param features2 The second feature vector
               * @return The continuous Tanimoto coefficient
               * @throws org.openscience.cdk.exception.CDKException  if the features are not of the same length
               */
              @TestMethod("testTanimoto3")
              public static float calculate(double[] features1, double[] features2) throws CDKException {

                  if (features1.length != features2.length) {
                      throw new CDKException("Features vectors must be of the same length");
                  }

                  int n = features1.length;
                  double ab = 0.0;
                  double a2 = 0.0;
                  double b2 = 0.0;

                  for (int i = 0; i < n; i++) {
                      ab += features1[i] * features2[i];
                      a2 += features1[i]*features1[i];
                      b2 += features2[i]*features2[i];
                  }
                  return (float)ab/(float)(a2+b2-ab);
              }
          }

          通過源碼可以看出calculate(BitSet bitset1, BitSet bitset2)方法,是通過比較兩個分子的fingerprint的位,來計算相似度.通過BitSet的and操作得到共同的個數,然后在除以總共為true的個數,這樣就得到相似值.

          posted on 2009-10-18 13:36 周銳 閱讀(489) 評論(0)  編輯  收藏 所屬分類: ChemistryJavaCDK
          主站蜘蛛池模板: 辰溪县| 大厂| 鹿邑县| 石泉县| 沁水县| 沅江市| 察雅县| 侯马市| 礼泉县| 周宁县| 库车县| 兖州市| 碌曲县| 利川市| 阿瓦提县| 海口市| 会东县| 哈密市| 晋城| 阜阳市| 南召县| 新津县| 麟游县| 都江堰市| 中江县| 丽水市| 淮南市| 漠河县| 镇赉县| 平乡县| 河间市| 芷江| 麻栗坡县| 洛阳市| 清水河县| 蓬溪县| 石林| 瓦房店市| 鲁山县| 三亚市| 井陉县|