import java.awt.image.BufferedImage;
import java.io.File;
import javax.imageio.ImageIO;
//生成等比例高質量縮略圖
public class ThumbnailUtil {
private static int width;
private static int height;
private static int scaleWidth;
static double support = (double) 3.0;
static double PI = (double) 3.14159265358978;
static double[] contrib;
static double[] normContrib;
static double[] tmpContrib;
static int startContrib, stopContrib;
static int nDots;
static int nHalfDots;
public static void saveImage(String fromFileStr, String saveToFileStr, int formatWidth, int formatHeight) throws Exception {
File saveFile = new File(saveToFileStr);
File fromFile = new File(fromFileStr);
saveImage(fromFile, saveFile, formatWidth, formatHeight);
}
public static void saveImage(File fromFile , File saveFile, int formatWidth, int formatHeight) throws Exception {
BufferedImage srcImage;
srcImage = javax.imageio.ImageIO.read(fromFile); // construct image
int imageWidth = srcImage.getWidth(null);
int imageHeight = srcImage.getHeight(null);
int changeToWidth = 0;
int changeToHeight = 0;
if (formatWidth > 0 && formatHeight > 0) {
changeToWidth = formatWidth;
changeToHeight = formatHeight;
} else {
if (imageWidth > 0 && imageHeight > 0) {
if (imageWidth / imageHeight >= formatWidth / formatHeight) {
if (imageWidth > formatWidth) {
changeToWidth = formatWidth;
changeToHeight = (imageHeight * formatWidth) / imageWidth;
} else {
changeToWidth = imageWidth;
changeToHeight = imageHeight;
}
} else {
if (imageHeight > formatHeight) {
changeToHeight = formatHeight;
changeToWidth = (imageWidth * formatHeight) / imageHeight;
} else {
changeToWidth = imageWidth;
changeToHeight = imageHeight;
}
}
}
}
srcImage = imageZoomOut(srcImage, changeToWidth, changeToHeight);
ImageIO.write(srcImage, "JPEG", saveFile);
}
public static BufferedImage imageZoomOut(BufferedImage srcBufferImage, int w, int h) {
width = srcBufferImage.getWidth();
height = srcBufferImage.getHeight();
scaleWidth = w;
if (DetermineResultSize(w, h) == 1) {
return srcBufferImage;
}
CalContrib();
BufferedImage pbOut = HorizontalFiltering(srcBufferImage, w);
BufferedImage pbFinalOut = VerticalFiltering(pbOut, h);
return pbFinalOut;
}
/**
* 決定圖像尺寸
*/
private static int DetermineResultSize(int w, int h) {
double scaleH, scaleV;
scaleH = (double) w / (double) width;
scaleV = (double) h / (double) height;
// 需要判斷一下scaleH,scaleV,不做放大操作
if (scaleH >= 1.0 && scaleV >= 1.0) {
return 1;
}
return 0;
}
private static double Lanczos(int i, int inWidth, int outWidth, double Support) {
double x;
x = (double) i * (double) outWidth / (double) inWidth;
return Math.sin(x * PI) / (x * PI) * Math.sin(x * PI / Support)
/ (x * PI / Support);
}
private static void CalContrib() {
nHalfDots = (int) ((double) width * support / (double) scaleWidth);
nDots = nHalfDots * 2 + 1;
try {
contrib = new double[nDots];
normContrib = new double[nDots];
tmpContrib = new double[nDots];
} catch (Exception e) {
System.out.println("init contrib,normContrib,tmpContrib" + e);
}
int center = nHalfDots;
contrib[center] = 1.0;
double weight = 0.0;
int i = 0;
for (i = 1; i <= center; i++) {
contrib[center + i] = Lanczos(i, width, scaleWidth, support);
weight += contrib[center + i];
}
for (i = center - 1; i >= 0; i--) {
contrib[i] = contrib[center * 2 - i];
}
weight = weight * 2 + 1.0;
for (i = 0; i <= center; i++) {
normContrib[i] = contrib[i] / weight;
}
for (i = center + 1; i < nDots; i++) {
normContrib[i] = normContrib[center * 2 - i];
}
}
// 處理邊緣
private static void CalTempContrib(int start, int stop) {
double weight = 0;
int i = 0;
for (i = start; i <= stop; i++) {
weight += contrib[i];
}
for (i = start; i <= stop; i++) {
tmpContrib[i] = contrib[i] / weight;
}
}
private static int GetRedValue(int rgbValue) {
int temp = rgbValue & 0x00ff0000;
return temp >> 16;
}
private static int GetGreenValue(int rgbValue) {
int temp = rgbValue & 0x0000ff00;
return temp >> 8;
}
private static int GetBlueValue(int rgbValue) {
return rgbValue & 0x000000ff;
}
private static int ComRGB(int redValue, int greenValue, int blueValue) {
return (redValue << 16) + (greenValue << 8) + blueValue;
}
// 行水平濾波
private static int HorizontalFilter(BufferedImage bufImg, int startX, int stopX,
int start, int stop, int y, double[] pContrib) {
double valueRed = 0.0;
double valueGreen = 0.0;
double valueBlue = 0.0;
int valueRGB = 0;
int i, j;
for (i = startX, j = start; i <= stopX; i++, j++) {
valueRGB = bufImg.getRGB(i, y);
valueRed += GetRedValue(valueRGB) * pContrib[j];
valueGreen += GetGreenValue(valueRGB) * pContrib[j];
valueBlue += GetBlueValue(valueRGB) * pContrib[j];
}
valueRGB = ComRGB(Clip((int) valueRed), Clip((int) valueGreen),
Clip((int) valueBlue));
return valueRGB;
}
// 圖片水平濾波
private static BufferedImage HorizontalFiltering(BufferedImage bufImage, int iOutW) {
int dwInW = bufImage.getWidth();
int dwInH = bufImage.getHeight();
int value = 0;
BufferedImage pbOut = new BufferedImage(iOutW, dwInH,
BufferedImage.TYPE_INT_RGB);
for (int x = 0; x < iOutW; x++) {
int startX;
int start;
int X = (int) (((double) x) * ((double) dwInW) / ((double) iOutW) + 0.5);
int y = 0;
startX = X - nHalfDots;
if (startX < 0) {
startX = 0;
start = nHalfDots - X;
} else {
start = 0;
}
int stop;
int stopX = X + nHalfDots;
if (stopX > (dwInW - 1)) {
stopX = dwInW - 1;
stop = nHalfDots + (dwInW - 1 - X);
} else {
stop = nHalfDots * 2;
}
if (start > 0 || stop < nDots - 1) {
CalTempContrib(start, stop);
for (y = 0; y < dwInH; y++) {
value = HorizontalFilter(bufImage, startX, stopX, start,
stop, y, tmpContrib);
pbOut.setRGB(x, y, value);
}
} else {
for (y = 0; y < dwInH; y++) {
value = HorizontalFilter(bufImage, startX, stopX, start,
stop, y, normContrib);
pbOut.setRGB(x, y, value);
}
}
}
return pbOut;
}
private static int VerticalFilter(BufferedImage pbInImage, int startY, int stopY,
int start, int stop, int x, double[] pContrib) {
double valueRed = 0.0;
double valueGreen = 0.0;
double valueBlue = 0.0;
int valueRGB = 0;
int i, j;
for (i = startY, j = start; i <= stopY; i++, j++) {
valueRGB = pbInImage.getRGB(x, i);
valueRed += GetRedValue(valueRGB) * pContrib[j];
valueGreen += GetGreenValue(valueRGB) * pContrib[j];
valueBlue += GetBlueValue(valueRGB) * pContrib[j];
}
valueRGB = ComRGB(Clip((int) valueRed), Clip((int) valueGreen), Clip((int) valueBlue));
return valueRGB;
}
private static BufferedImage VerticalFiltering(BufferedImage pbImage, int iOutH) {
int iW = pbImage.getWidth();
int iH = pbImage.getHeight();
int value = 0;
BufferedImage pbOut = new BufferedImage(iW, iOutH,
BufferedImage.TYPE_INT_RGB);
for (int y = 0; y < iOutH; y++) {
int startY;
int start;
int Y = (int) (((double) y) * ((double) iH) / ((double) iOutH) + 0.5);
startY = Y - nHalfDots;
if (startY < 0) {
startY = 0;
start = nHalfDots - Y;
} else {
start = 0;
}
int stop;
int stopY = Y + nHalfDots;
if (stopY > (int) (iH - 1)) {
stopY = iH - 1;
stop = nHalfDots + (iH - 1 - Y);
} else {
stop = nHalfDots * 2;
}
if (start > 0 || stop < nDots - 1) {
CalTempContrib(start, stop);
for (int x = 0; x < iW; x++) {
value = VerticalFilter(pbImage, startY, stopY, start, stop,
x, tmpContrib);
pbOut.setRGB(x, y, value);
}
} else {
for (int x = 0; x < iW; x++) {
value = VerticalFilter(pbImage, startY, stopY, start, stop,
x, normContrib);
pbOut.setRGB(x, y, value);
}
}
}
return pbOut;
}
static int Clip(int x) {
if (x < 0)
return 0;
if (x > 255)
return 255;
return x;
}
}
首先在PL/Sql中分別執行:
create or replace and compile java source named TestJava1 as
public class TestJava1
{
public static void test()
{
System.out.println("Hello");
}
}
create or replace procedure testJava1 as language java name 'TestJava1.test()';
---------------------------------------------------------------------------------------------------------
在SQLPlus中
C:\Windows\System32>sqlplus nc5520110105/nc5520110105@192.168.10.87
SQL*Plus: Release 11.2.0.1.0 Production on Fri Apr 1 14:06:02 2011
Copyright (c) 1982, 2010, Oracle. All rights reserved.
Connected to:
Oracle Database 10g Enterprise Edition Release 10.2.0.1.0 - 64bit Production
With the Partitioning, OLAP and Data Mining options
SQL> set serveroutput on;
SQL> show serveroutput;
serveroutput ON SIZE UNLIMITED FORMAT WORD_WRAPPED
SQL> call dbms_java.set_output(2000);
Call completed.
SQL>
SQL> show serveroutput;
serveroutput ON SIZE UNLIMITED FORMAT WORD_WRAPPED
SQL> exec testJava1();
Hello
PL/SQL procedure successfully completed.
SQL>
---------------------------------------------------------------------------------------------------------
再看一個例子:
在PL/Sql中執行:
--用Java編寫Oracle存儲過程。
create or replace and compile java source named test as
public class MyTest
{
public static void myProc(int a,int b,int[] ret){
ret[0]=a+b;
}
public static int myFunc(int a,int b){
return a+b;
}
}
--創建存儲過程
create or replace procedure myProc(a in number, b in number, ret out number) as
language java name 'MyTest.myProc(int,int,int[])';
--創建函數
create or replace function myFunc(a in number, b in number) return number is
language java name 'MyTest.myFunc(int,int) return int';
然后在SqlPlus中測試存儲過程——
SQL> set serveroutput on
SQL> DECLARE a INTEGER;
2 BEGIN
3 myProc(1, 2, a);
4 DBMS_OUTPUT.PUT_LINE(a);
5 END;
6 /
3
PL/SQL procedure successfully completed.
SQL> select myFunc(1,2) from dual;
MYFUNC(1,2)
-----------
3
SQL>
The basic steps you'll need to follow to configure Ehcache for web page caching are (note that these steps assume you already have Ehcache installed in your application):
The following settings should help you setup web caching for your application.
The first thing you'll need to do is add a filter to enable page caching.
The following web.xml settings will enable a servlet filter for page caching:
<web-app xmlns="http://java.sun.com/xml/ns/javaee" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://java.sun.com/xml/ns/javaee http://java.sun.com/xml/ns/javaee/web-app_2_5.xsd " version="2.5"> <filter> <filter-name>SimplePageCachingFilter</filter-name> <filter-class>net.sf.ehcache.constructs.web.filter.SimplePageCachingFilter </filter-class> </filter> <!-- This is a filter chain. They are executed in the order below. Do not change the order. --> <filter-mapping> <filter-name>SimplePageCachingFilter</filter-name> <url-pattern>/*</url-pattern> </filter-mapping> </web-app>
The second step to enabling web page caching is to configure ehcache with an appropriate ehcache.xml.
The following ehcache.xml file should configure a reasonable default ehcache:
<ehcache xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:noNamespaceSchemaLocation="../../main/config/ehcache.xsd"> <cache name="SimplePageCachingFilter" maxElementsInMemory="10000" maxElementsOnDisk="1000" eternal="false" overflowToDisk="true" timeToIdleSeconds="300" timeToLiveSeconds="600" memoryStoreEvictionPolicy="LFU" /> </ehcache>
package com.founder.opsin;
import nu.xom.Element;
import uk.ac.cam.ch.wwmm.opsin.NameToInchi;
import uk.ac.cam.ch.wwmm.opsin.NameToStructure;
import uk.ac.cam.ch.wwmm.opsin.NameToStructureConfig;
import uk.ac.cam.ch.wwmm.opsin.NameToStructureException;
import uk.ac.cam.ch.wwmm.opsin.OpsinResult;
public class OpsinTest {
/**
* @param args
* @author Zhou Rui
* @throws NameToStructureException
*/
public static void main(String[] args) throws NameToStructureException {
NameToStructure n2s = NameToStructure.getInstance();
NameToStructureConfig n2sconfig = new NameToStructureConfig();
OpsinResult result = n2s.parseChemicalName("acetonitrile", n2sconfig);
System.out.println(result.getStatus());
String smiles = result.getSmiles();
String inchi = NameToInchi.convertResultToInChI(result);
System.out.println(smiles);
System.out.println(inchi);
}
}
輸出結果如下:
SUCCESS
C(C)#N
InChI=1/C2H3N/c1-2-3/h1H3
計算插件(脂水分配系數/考慮電解時的脂水分配系數、極性表面積、溶解性、電解常數、Lipinski五規則)
All are supported except solubility, in JChemBase, Cartridge, Knime, Pipeline pilot, Instant JChem, Jchem for Excel and in Marvin. See full list of our property predictors.Calculating Lipinski rule of 5:
2. Bulid and maintain project data viewer (SAR understanding)
SAR: structure-activity relationship, 結果與活性關系,簡稱構效關系
We have R-group decomposition, also a viewer in JChem for Excel, LibMCS GUI. That can be used for SAR understanding.
3. Library enumeration, cleanup, profile and analysis
Reactor, Screen, Calculator plugins, Markush Enumeration, Instant JChem, JChem for Excel, KNIME, Pipeline pilot
Some presentations on the topic:
Virtual Libraries and Virtual Screening in Drug Discovery Processes using KNIME
Library Compound Design Methods for CustomLibrary Synthesis
4. Customized Spotfire view
Yes this is the TIBCO Spotfire tool. Marvin is integrated into Spotfire, I think even JChem Cartridge can communicate with Spotfire, our new project is Instant JChem Integration which is under development
5.Similarity search
Yes, JChemBase, Cartridge, Instant JChem, JChem for Excel Similarity search in databases
For a more sophisticated approach of similarity, we provide the Screen package.
6.Clustering
JKlustor, LibMCS
7.Generate SAR Tables
生成構效關系表格
We do not support directly but we have Rgroup decomposition, Fragmentation toolkit that can be visualized and analysed later.
8.Ligand binding Efficiency
配體結合效果
LE can be calculated if the database contains the activity value, heavy atom counts can be calculated in JChem for Excel, Instant Jchem
9.Structure visualization
結構可視化
Marvin
10.Overlay/Docking
疊合/對接
No, we do not support docking. Alignment can be done in Marvin, Screen3D, and a standalone GUI for low throughput screening.
11.Build predictive ADMET models
建立預測ADMET模型。ADMET分別代表吸收、分布、代謝、排泄和毒性。
We do not support directly, although we have some calculation plugins that can be further used for these property calculations such as pKa, logP/D, Atom counts, PSA.
import org.rosuda.REngine.REXP;
import org.rosuda.REngine.REXPMismatchException;
import org.rosuda.REngine.Rserve.RConnection;
import org.rosuda.REngine.Rserve.RserveException;
public class RTest {
/**
* @param args
* @author Zhou Rui
* @throws RserveException
* @throws REXPMismatchException
*/
public static void main(String[] args) throws RserveException, REXPMismatchException {
RConnection c = new RConnection();
REXP x = c.eval("R.version.string");
System.out.println(x.asString());
}
}
解決方案:
JavaEE版本和JavaMail的版本不一致,請將sun公司上下載最新版本.http://java.sun.com/products/javamail/downloads/index.html
例如:javaMail 1.3以下的如果在javaEE5上就會出現上面的錯誤,
如果還出現此問題,則是因為javaEE5中包含有javaMail的類但是卻不全面,所以出本身的JavaMail
包沖突.用rar打開X:/Program Files/MyEclipse 6.0/myeclipse/eclipse/plugins/com.genuitec.eclipse.j2eedt.core_x.x.x.zmyeclipsexxxxxxxxx/data/libraryset/EE_5/javaee.jar
,然后刪除mail,一切就ok了.
寫完部署什么都沒問題,可當我寫了測試類進行測試時發現主鍵的初始值竟然是50,其步長亦是50,在同事的幫助下發現原來是Hibernate在做鬼,@SequenceGenerator中添加兩個參數(allocationSize = 1, initialValue = 1)就OK。通過查找Hibernate的資料發現原來是因為allocationSize的默認值是50.具體請參考http://www.oracle.com/technology/global/cn/products/ias/toplink/jpa/resources/toplink-jpa-annotations.html#SequenceGenerator
只需要增加allocationSize = 1就可以
select * from dba_registry where comp_id = 'JAVAVM'
為空,則未安裝,請執行 $ORACLE_HOME/javavm/install/initjvm.sql安裝.
創建函數
create or replace function fn_oraclecall(mArea in VARCHAR2,mDevID in Number,mPORT in Number)
return varchar2
as
language Java name 'Caller.call(java.lang.String,Integer,Integer) return Java.lang.String';
創建存儲過程
create or replace procedure CHK_SETCAB_NUM
(mArea in VARCHAR2,mDevID in Number,mPORT in Number,v_out out varchar2) is
begin
v_out := fn_oraclecall(mArea,mDevID,mPORT);
end CHK_SETCAB_NUM;
loadjava
loadjava -u sys/sys@sid -oci8 -verbose -grant user -synonym -resolve -schema user D:\Caller.jar
--這里也可以是class文件,注意兼容oracle的jre版本
注意編寫的java文件里,即Caller.java的call()方法,需要是staic
import Java.util.BitSet;
import org.openscience.cdk.DefaultChemObjectBuilder;
import org.openscience.cdk.exception.CDKException;
import org.openscience.cdk.exception.InvalidSmilesException;
import org.openscience.cdk.fingerprint.ExtendedFingerprinter;
import org.openscience.cdk.smiles.SmilesParser;
public class FingerprinterTest {
/**
* @param args
* @throws CDKException
* @throws InvalidSmilesException
*/
public static void main(String[] args) throws InvalidSmilesException, CDKException {
ExtendedFingerprinter fingerprinter = new ExtendedFingerprinter();
SmilesParser sp = new SmilesParser(DefaultChemObjectBuilder.getInstance());
BitSet bt = fingerprinter.getFingerprint(sp.parseSmiles("c2ccc1ccccc1c2"));
}
}
mol3 = pybel.readstring('smi', 'C1CCCCC1')
fp3 = mol3.calcfp()
print fp3.__or__(fp2) //計算相似度值
4. 讀取sdf文件
#encoding=utf-8
import pybel
for mymol in pybel.readfile("sdf", "structures_all.sdf"):
fp = mymol.calcfp("FP2")
print fp
5. 輸出txt文件和sdf文件
import javax.servlet.http.HttpServletResponse;
import javax.vecmath.Point2d;
import org.apache.log4j.Logger;
import org.openscience.cdk.Molecule;
import org.openscience.cdk.interfaces.IAtom;
import org.openscience.cdk.interfaces.IMolecule;
import org.openscience.cdk.io.MDLReader;
import org.openscience.cdk.layout.StructureDiagramGenerator;
import org.openscience.cdk.renderer.Renderer2DModel;
import org.openscience.cdk.renderer.SimpleRenderer2D;
public class ImageTypeExporterUtil {
private static final Logger logger = Logger.getLogger(ImageTypeExporterUtil.class);
/**
* show molecule structure to image type (png, jpeg)
*
* @param mol String molecule stucture
* @param length width and height
* @param response HttpServletResponse object
* @throws Exception
* if occurred exception ,then throw Exception
*/
public static void showAsImage(String stucture, Integer length, HttpServletResponse response) throws Exception {
logger.debug("ImageTypeExporterUtil.showAsImage..");
StringReader mdl = new StringReader(stucture);
MDLReader cdkMDL = new MDLReader(mdl);
Molecule mol = new Molecule();
cdkMDL.read(mol);
// null coordinates
Iterator<IAtom> itatoms = mol.atoms();
while (itatoms.hasNext()) {
IAtom atom = itatoms.next();
atom.setPoint2d(null);
atom.setPoint3d(null);
}
// generate 2D coordinates
StructureDiagramGenerator sdg = new StructureDiagramGenerator();
sdg.setMolecule(mol);
try {
sdg.generateCoordinates();
} catch (Exception ex) {
ex.printStackTrace();
}
IMolecule layedOutMol = sdg.getMolecule();
// scale molecule
final double UNDEF_POS = 100000;
double minX = UNDEF_POS, minY = UNDEF_POS, maxX = UNDEF_POS, maxY = UNDEF_POS;
itatoms = layedOutMol.atoms();
while (itatoms.hasNext()) {
IAtom atom = itatoms.next();
Point2d point2d = atom.getPoint2d();
if (minX == UNDEF_POS || minX > point2d.x)
minX = point2d.x;
if (minY == UNDEF_POS || minY > point2d.y)
minY = point2d.y;
if (maxX == UNDEF_POS || maxX < point2d.x)
maxX = point2d.x;
if (maxY == UNDEF_POS || maxY < point2d.y)
maxY = point2d.y;
}
double scaleX = length / (maxX - minX + 1);
double scaleY = length / (maxY - minY + 1);
double scale = scaleX > scaleY ? scaleY : scaleX;
double centreX = scale * (maxX + minX) / 2.;
double centreY = scale * (maxY + minY) / 2.;
double offsetX = length / 2. - centreX;
double offsetY = length / 2. - centreY;
itatoms = layedOutMol.atoms();
while (itatoms.hasNext()) {
IAtom atom = itatoms.next();
Point2d a = atom.getPoint2d();
Point2d b = new Point2d();
b.x = a.x * scale + offsetX;
b.y = a.y * scale + offsetY;
atom.setPoint2d(b);
}
// set rendering properties
Renderer2DModel r2dm = new Renderer2DModel();
r2dm.setDrawNumbers(false);
r2dm.setUseAntiAliasing(true);
r2dm.setColorAtomsByType(true);
r2dm.setShowAtomTypeNames(false);
r2dm.setShowAromaticity(true);
r2dm.setShowImplicitHydrogens(false);
r2dm.setShowReactionBoxes(false);
r2dm.setKekuleStructure(false);
Dimension dim = new Dimension();
dim.setSize(length, length);
r2dm.setBackgroundDimension(dim);
r2dm.setBackColor(java.awt.Color.WHITE);
// render the image
SimpleRenderer2D renderer = new SimpleRenderer2D();
renderer.setRenderer2DModel(r2dm);
BufferedImage bufferedImage = new BufferedImage(length, length,
BufferedImage.TYPE_INT_RGB);
Graphics2D graphics = bufferedImage.createGraphics();
graphics.setPaint(java.awt.Color.WHITE);
Rectangle2D.Float rectangle = new Rectangle2D.Float(0, 0, length, length);
graphics.fill(rectangle);
renderer.paintMolecule(layedOutMol, graphics);
// write the image to response
response.setContentType("image/png");
OutputStream out = response.getOutputStream();
try {
javax.imageio.ImageIO.write(bufferedImage, "png", out);
} finally {
out.close();
}
}
}
package com.founder.cdk;
import Java.io.File;
import Java.io.FileNotFoundException;
import Java.io.FileReader;
import Java.util.ArrayList;
import Java.util.List;
import org.openscience.cdk.ChemFile;
import org.openscience.cdk.ChemObject;
import org.openscience.cdk.exception.CDKException;
import org.openscience.cdk.interfaces.IAtomContainer;
import org.openscience.cdk.io.MDLV2000Reader;
import org.openscience.cdk.smiles.smarts.SMARTSQueryTool;
import org.openscience.cdk.tools.manipulator.ChemFileManipulator;
public class SMARTSQueryToolTest {
static SMARTSQueryTool sqt;static {
try {
sqt = new SMARTSQueryTool("c2ccc1ccccc1c2");
} catch (CDKException e) {
}
}
/**
* @param args
*/
public static void main(String[] args) {
String filename = "H:\\molecules.sdf";
try {
MDLV2000Reader reader = new MDLV2000Reader(new FileReader(new File(filename)));
ChemFile chemFile = (ChemFile) reader.read((ChemObject) new ChemFile());
List<IAtomContainer> containersList = ChemFileManipulator.getAllAtomContainers(chemFile);
List<IAtomContainer> substructureList = new ArrayList<IAtomContainer>();
sqt.setSmarts("c1ccc3c(c1)ccc4c2ccccc2ccc34"); //重新設置匹配的smiles值
boolean matched = false;
for (IAtomContainer molecule : containersList) {
matched = sqt.matches(molecule);
if (matched){
substructureList.add(molecule);
}
}
System.out.println(substructureList.size());
for (IAtomContainer molecule : substructureList) {
System.out.println(molecule.getProperty("ID"));
}
} catch (CDKException e) {
e.printStackTrace();
} catch (FileNotFoundException e) {
e.printStackTrace();
}
}
}
通過測試, matches方法速度很慢, 一般一個結構需要200ms-1000ms左右.
import Java.io.File;
import Java.io.FileNotFoundException;
import Java.io.FileReader;
import Java.util.List;
import org.openscience.cdk.ChemFile;
import org.openscience.cdk.ChemObject;
import org.openscience.cdk.Molecule;
import org.openscience.cdk.exception.CDKException;
import org.openscience.cdk.interfaces.IAtomContainer;
import org.openscience.cdk.io.MDLReader;
import org.openscience.cdk.io.MDLV2000Reader;
import org.openscience.cdk.tools.manipulator.ChemFileManipulator;
public class ReadSDFTest {
/**
* @param args
* @throws CDKException
* @throws FileNotFoundException
*/
public static void main(String[] args) throws CDKException, FileNotFoundException {
String filename = "H:\\molecules.sdf";
// InputStream ins = ReadSDFTest.class.getClassLoader().getResourceAsStream(filename);
// MDLReader reader = new MDLReader(ins);
//alternatively, you can specify a file directly
MDLV2000Reader reader = new MDLV2000Reader(new FileReader(new File(filename)));
ChemFile chemFile = (ChemFile)reader.read((ChemObject)new ChemFile());
List<IAtomContainer> containersList = ChemFileManipulator.getAllAtomContainers(chemFile);
Molecule molecule = null;
for (IAtomContainer mol : containersList) {
molecule = (Molecule) mol;
System.out.println(molecule.getProperties());
System.out.println(molecule.getProperty("CD_MOLWEIGHT"));
// Fingerprinter fp = new Fingerprinter();
// BitSet bt = fp.getFingerprint(molecule);
// System.out.println(bt);
}
}
}
import Java.io.StringReader;
import Java.sql.Connection;
import Java.sql.ResultSet;
import Java.sql.SQLException;
import Java.util.ArrayList;
import Java.util.BitSet;
import Java.util.List;
import org.openscience.cdk.Molecule;
import org.openscience.cdk.exception.CDKException;
import org.openscience.cdk.fingerprint.Fingerprinter;
import org.openscience.cdk.io.MDLReader;
import org.openscience.cdk.similarity.Tanimoto;
public class CDKTest {
/**
* @param args
*/
public static void main(String[] args) {
// MySQL
long t1 = System.currentTimeMillis();
try {
Class.forName("com.mysql.jdbc.Driver").newInstance();
Connection con = Java.sql.DriverManager
.getConnection(
"jdbc:mysql://localhost/coocoo?useUnicode=true&characterEncoding=utf-8&zeroDateTimeBehavior=convertToNull",
"root", "root");
ResultSet results = null;
String querySQL = "select id, structure from structure ";
results = con.createStatement().executeQuery(querySQL);
// dump out the results
List<Molecule> list = new ArrayList<Molecule>();
Fingerprinter fp = new Fingerprinter();
BitSet bt = null;
while (results.next()) {
Long id = results.getLong("id");
//根據結構數據生成分子對象
StringReader mdl = new StringReader(results.getString("structure"));
MDLReader cdkMDL = new MDLReader(mdl);
Molecule molecule = new Molecule();
cdkMDL.read(molecule);
if (id == 1220) {
bt = fp.getFingerprint(molecule);
}
list.add(molecule);
}
System.out.println("size:=" + list.size());
List<Molecule> resultList = new ArrayList<Molecule>();
long t2 = System.currentTimeMillis();
System.out.println("Thread: collection data in " + (t2 - t1) + " ms.");
for (Molecule molecule : list) {
try {
float coefficient = Tanimoto.calculate(fp.getFingerprint(molecule), bt); //計算相似度
if (coefficient > 0.9) {
resultList.add(molecule);
}
} catch (CDKException e) {
}
}
long t3 = System.currentTimeMillis();
System.out.println(resultList.size());
System.out.println("Thread: Search in " + (t3 - t2) + " ms.");
con.close();
} catch (InstantiationException e) {
e.printStackTrace();
} catch (IllegalAccessException e) {
e.printStackTrace();
} catch (ClassNotFoundException e) {
e.printStackTrace();
} catch (SQLException e) {
e.printStackTrace();
} catch (CDKException e) {
e.printStackTrace();
}
long t4 = System.currentTimeMillis();
System.out.println("Thread: all in " + (t4 - t1) + " ms.");
}
}
Rich Apodaca wrote a great serious posts named Fast Substructure Search Using Open Source Tools providing details on substructure search with MySQL. But, however, poor binary data operation functions of MySQL limited the implementation of similar structure search which typically depends on the calculation of Tanimato coefficient. We are going to use Java & CDK to add this feature.
As default output of CDK fingerprint, java.util.BitSet with Serializable interface is perfect data format of fingerprint data storage. Java itself provides several collections such as ArrayList, LinkedList, Vector class in package Java.util. To provide web access to the search engine, thread unsafe ArrayList and LinkedList have to be kicked out. How about Vector? Once all the fingerprint data is well prepared, the collection function we need to do similarity search is just iteration. No add, no delete. So, a light weight array is enough.
Most of the molecule information is stored in MySQL database, so we are going to map fingerprint to corresponding row in data table. Here is the MolDFData class, we use a long variable to store corresponding primary key in data table.
public class MolDFData implements Serializable {
private long id;
private BitSet fingerprint;
public MolDFData(long id, BitSet fingerprint) {
this.id = id;
this.fingerprint = fingerprint;
}
public long getId() {
return id;
}
public void setId(long id) {
this.id = id;
}
public BitSet getFingerprint() {
return fingerprint;
}
public void setFingerprint(BitSet fingerprint) {
this.fingerprint = fingerprint;
}
}
This is how we storage our fingerprints.
private MolFPData[] arrayData;
No big deal with similarity search. Just calculate the Tanimoto coefficient, if it’s bigger than minimal similarity you set, add this one into result.
public List searchTanimoto(BitSet bt, float minSimlarity) {
List resultList = new LinkedList();
int i;
for (i = 0; i < arrayData.length; i++) {
MolDFData aListData = arrayData[i];
try {
float coefficient = Tanimoto.calculate(aListData.getFingerprint(), bt);
if (coefficient > minSimlarity) {
resultList.add(new SearchResultData(aListData.getId(), coefficient));
}
} catch (CDKException e) {
}
Collections.sort(resultList);
}
return resultList;
}
Pretty ugly code? Maybe. But it really works, at a acceptable speed.
Tests were done using the code blow on a macbook(Intel Core Due 1.83 GHz, 2G RAM).
long t3 = System.currentTimeMillis();
List<SearchResultData> listResult = se.searchTanimoto(bs, 0.8f);
long t4 = System.currentTimeMillis();
System.out.println("Thread: Search done in " + (t4 - t3) + " ms.");
In my database of 87364 commercial compounds, it takes 335 ms.
import org.openscience.cdk.annotations.TestClass;
import org.openscience.cdk.annotations.TestMethod;
import org.openscience.cdk.exception.CDKException;
import Java.util.BitSet;
/**
* Calculates the Tanimoto coefficient for a given pair of two
* fingerprint bitsets or real valued feature vectors.
*
* The Tanimoto coefficient is one way to
* quantitatively measure the "distance" or similarity of
* two chemical structures.
*
* <p>You can use the FingerPrinter class to retrieve two fingerprint bitsets.
* We assume that you have two structures stored in cdk.Molecule objects.
* A tanimoto coefficient can then be calculated like:
* <pre>
* BitSet fingerprint1 = Fingerprinter.getFingerprint(molecule1);
* BitSet fingerprint2 = Fingerprinter.getFingerprint(molecule2);
* float tanimoto_coefficient = Tanimoto.calculate(fingerprint1, fingerprint2);
* </pre>
*
* <p>The FingerPrinter assumes that hydrogens are explicitely given, if this
* is desired!
* <p>Note that the continuous Tanimoto coefficient does not lead to a metric space
*
*@author steinbeck
* @cdk.githash
*@cdk.created 2005-10-19
*@cdk.keyword jaccard
*@cdk.keyword similarity, tanimoto
* @cdk.module fingerprint
*/
@TestClass("org.openscience.cdk.similarity.TanimotoTest")
public class Tanimoto
{
/**
* Evaluates Tanimoto coefficient for two bit sets.
*
* @param bitset1 A bitset (such as a fingerprint) for the first molecule
* @param bitset2 A bitset (such as a fingerprint) for the second molecule
* @return The Tanimoto coefficient
* @throws org.openscience.cdk.exception.CDKException if bitsets are not of the same length
*/
@TestMethod("testTanimoto1,testTanimoto2")
public static float calculate(BitSet bitset1, BitSet bitset2) throws CDKException
{
float _bitset1_cardinality = bitset1.cardinality();
float _bitset2_cardinality = bitset2.cardinality();
if (bitset1.size() != bitset2.size()) {
throw new CDKException("Bisets must have the same bit length");
}
BitSet one_and_two = (BitSet)bitset1.clone();
one_and_two.and(bitset2);
float _common_bit_count = one_and_two.cardinality();
return _common_bit_count/(_bitset1_cardinality + _bitset2_cardinality - _common_bit_count);
}
/**
* Evaluates the continuous Tanimoto coefficient for two real valued vectors.
*
* @param features1 The first feature vector
* @param features2 The second feature vector
* @return The continuous Tanimoto coefficient
* @throws org.openscience.cdk.exception.CDKException if the features are not of the same length
*/
@TestMethod("testTanimoto3")
public static float calculate(double[] features1, double[] features2) throws CDKException {
if (features1.length != features2.length) {
throw new CDKException("Features vectors must be of the same length");
}
int n = features1.length;
double ab = 0.0;
double a2 = 0.0;
double b2 = 0.0;
for (int i = 0; i < n; i++) {
ab += features1[i] * features2[i];
a2 += features1[i]*features1[i];
b2 += features2[i]*features2[i];
}
return (float)ab/(float)(a2+b2-ab);
}
}
通過源碼可以看出calculate(BitSet bitset1, BitSet bitset2)方法,是通過比較兩個分子的fingerprint的位,來計算相似度.通過BitSet的and操作得到共同的個數,然后在除以總共為true的個數,這樣就得到相似值.