posts - 431,  comments - 344,  trackbacks - 0

          /*  $RCSfile$
           *  $Author$
           *  $Date$
           *  $Revision$
           *
           *  Copyright (C) 1997-2007  The Chemistry Development Kit (CDK) project
           *
           *  Contact: cdk-devel@lists.sourceforge.net
           *
           *  This program is free software; you can redistribute it and/or
           *  modify it under the terms of the GNU Lesser General Public License
           *  as published by the Free Software Foundation; either version 2.1
           *  of the License, or (at your option) any later version.
           *  All we ask is that proper credit is given for our work, which includes
           *  - but is not limited to - adding the above copyright notice to the beginning
           *  of your source code files, and to any copyright notice that you may distribute
           *  with programs based on this work.
           *
           *  This program is distributed in the hope that it will be useful,
           *  but WITHOUT ANY WARRANTY; without even the implied warranty of
           *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
           *  GNU Lesser General Public License for more details.
           *
           *  You should have received a copy of the GNU Lesser General Public License
           *  along with this program; if not, write to the Free Software
           *  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
           *
           */
          package org.openscience.cdk.similarity;


          import org.openscience.cdk.annotations.TestClass;
          import org.openscience.cdk.annotations.TestMethod;
          import org.openscience.cdk.exception.CDKException;

          import Java.util.BitSet;

          /**
           *  Calculates the Tanimoto coefficient for a given pair of two
           *  fingerprint bitsets or real valued feature vectors.
           *
           *  The Tanimoto coefficient is one way to
           *  quantitatively measure the "distance" or similarity of
           *  two chemical structures.
           *
           *  <p>You can use the FingerPrinter class to retrieve two fingerprint bitsets.
           *  We assume that you have two structures stored in cdk.Molecule objects.
           *  A tanimoto coefficient can then be calculated like:
           *  <pre>
           *   BitSet fingerprint1 = Fingerprinter.getFingerprint(molecule1);
           *   BitSet fingerprint2 = Fingerprinter.getFingerprint(molecule2);
           *   float tanimoto_coefficient = Tanimoto.calculate(fingerprint1, fingerprint2);
           *  </pre>
           *
           *  <p>The FingerPrinter assumes that hydrogens are explicitely given, if this
           *  is desired!
           *  <p>Note that the continuous Tanimoto coefficient does not lead to a metric space
           *
           *@author         steinbeck
           * @cdk.githash
           *@cdk.created    2005-10-19
           *@cdk.keyword    jaccard
           *@cdk.keyword    similarity, tanimoto
           * @cdk.module fingerprint
           */
          @TestClass("org.openscience.cdk.similarity.TanimotoTest")
          public class Tanimoto
          {

              /**
               * Evaluates Tanimoto coefficient for two bit sets.
               *
               * @param bitset1 A bitset (such as a fingerprint) for the first molecule
               * @param bitset2 A bitset (such as a fingerprint) for the second molecule
               * @return The Tanimoto coefficient
               * @throws org.openscience.cdk.exception.CDKException  if bitsets are not of the same length
               */
              @TestMethod("testTanimoto1,testTanimoto2")
              public static float calculate(BitSet bitset1, BitSet bitset2) throws CDKException
              {
                  float _bitset1_cardinality = bitset1.cardinality();
                  float _bitset2_cardinality = bitset2.cardinality();
                  if (bitset1.size() != bitset2.size()) {
                      throw new CDKException("Bisets must have the same bit length");
                  }
                  BitSet one_and_two = (BitSet)bitset1.clone();
                  one_and_two.and(bitset2);
                  float _common_bit_count = one_and_two.cardinality();
                  return _common_bit_count/(_bitset1_cardinality + _bitset2_cardinality - _common_bit_count);
              }
             
              /**
               * Evaluates the continuous Tanimoto coefficient for two real valued vectors.
               *
               * @param features1 The first feature vector
               * @param features2 The second feature vector
               * @return The continuous Tanimoto coefficient
               * @throws org.openscience.cdk.exception.CDKException  if the features are not of the same length
               */
              @TestMethod("testTanimoto3")
              public static float calculate(double[] features1, double[] features2) throws CDKException {

                  if (features1.length != features2.length) {
                      throw new CDKException("Features vectors must be of the same length");
                  }

                  int n = features1.length;
                  double ab = 0.0;
                  double a2 = 0.0;
                  double b2 = 0.0;

                  for (int i = 0; i < n; i++) {
                      ab += features1[i] * features2[i];
                      a2 += features1[i]*features1[i];
                      b2 += features2[i]*features2[i];
                  }
                  return (float)ab/(float)(a2+b2-ab);
              }
          }

          通過源碼可以看出calculate(BitSet bitset1, BitSet bitset2)方法,是通過比較兩個分子的fingerprint的位,來計算相似度.通過BitSet的and操作得到共同的個數,然后在除以總共為true的個數,這樣就得到相似值.

          posted on 2009-10-18 13:36 周銳 閱讀(494) 評論(0)  編輯  收藏 所屬分類: ChemistryJavaCDK
          主站蜘蛛池模板: 子洲县| 宁津县| 错那县| 中宁县| 滦南县| 措勤县| 宝应县| 中山市| 贵港市| 屯门区| 老河口市| 宣威市| 永靖县| 安康市| 泽普县| 宾阳县| 钦州市| 松阳县| 安庆市| 万全县| 彭水| 临湘市| 阳信县| 兴和县| 六盘水市| 沂南县| 若羌县| 章丘市| 治多县| 贵定县| 吉林省| 鹤峰县| 墨玉县| 永济市| 安乡县| 鹤山市| 营山县| 红原县| 曲麻莱县| 卢龙县| 赞皇县|