Skynet

---------- ---------- 我的新 blog : liukaiyi.cublog.cn ---------- ----------

:: 管理

112 Posts :: 1 Stories :: 49 Comments :: 0 Trackbacks

代碼
參考：http://www.python.org/dev/peps/pep-0318/

def singleton(cls):
    instances = {}
    def getinstance():
        if cls not in instances:
        print "new"
            instances[cls] = cls()
        return instances[cls]
    return getinstance

@singleton
class MyClass:
    pass

x1=MyClass()
#print new
x2=MyClass()

posted @ 2009-04-16 23:20 劉凱毅閱讀(1050) | 評(píng)論 (0) | 編輯收藏

python - chomp

轉(zhuǎn)自：http://markbieda.wordpress.com/2008/08/13/python-and-bioinformatics-and-perl-chomp-in-python/

>>> def chomppy(k):
    if k=="": return ""
    if k=="\n" or k=="\r\n" or k=="\r": return ""
    if len(k)==1: return k #depends on above case being not true
    if len(k)==2 and (k[-1]=='\n' or k[-1]=='\r'): return k[0]
    #done with weird cases, now deal with average case
    lastend=k[-2:] #get last two pieces
    if lastend=='\r\n':
        outstr=k[:-2]
        return outstr
    elif (lastend[1]=="\n" or lastend[1]=="\r"):
        outstr=k[:-1]
        return outstr
    return k
>>> chomppy(’cow\n’)
‘cow’
>>> chomppy(”)
”
>>> chomppy(’hat’)
‘hat’
>>> chomppy(’cat\r\n’)
‘cat’
>>> chomppy(’\n’)
”
>>> chomppy(’\r\n’)
”
>>> chomppy(’cat\r’)
‘cat’
>>> chomppy(’\r’)
”

posted @ 2009-04-15 17:47 劉凱毅閱讀(1320) | 評(píng)論 (0) | 編輯收藏

python aop (metaclass)

代碼可以直接運(yùn)行，看結(jié)果
如果命令調(diào)試 python -m pdb pyaop.py
(Pdb)b pyaop:10
(Pdb)c
(Pdb)n .....自己來把
調(diào)試參考： python pdb 基礎(chǔ)調(diào)試

源文件： pyaop.py

#!/usr/bin/python
# -*- coding: utf8 -*-
# 參考：http://www.cnblogs.com/Alexander-Lee/archive/2008/12/06/pythonaop.html

"""
py aop 代理類 ( metaclass 特性 )
   由于使用 __metaclass__ = <type 'type'>
   pyaop 繼承 type
"""
class pyaop(type):
    # before ; after 方法變量引用聲明
    beforeop=lambda e :  None
    afterop=lambda e :  None

    #class方法（靜態(tài)方法）set
    @classmethod
    def setbefore(self,func):
        pyaop.beforeop=func
    @classmethod
    def setafter(self,func):
        pyaop.afterop=func

   """ 使用調(diào)試
   # python -m pdb pyaop.py
   # 由下面 A類 < __metaclass__ = pyaop >
   #        類初始的 __new__ 指向 pyaop __new__
   #
   # (Pdb)b pyaop:36   （大概就是下面函數(shù)form types

的行號(hào)）
   # (Pdb)a   (可以看看調(diào)試中，各參數(shù)的值，注意dict為A的初始對(duì)象傳過來了)
   #     mcl = <class '__main__.pyaop'>
   #     name = A
   #     bases = (<type 'object'>,)
   #     dict = {'__module__': '__main__', 'foo': <function foo at 0x7fddced4>, '__metaclass__': <class '__main__.pyaop'>, 'foo2': <function foo2 at 0x7fddcf0c>}
   # 本函數(shù)目的：使用新的另個(gè)對(duì)象掛載被aop后的 A對(duì)象方法
   """
    def __new__(mcl,name,bases,dict):
        from types import FunctionType
        obj=object()

        def aop(func):
            def wrapper(*args, **kwds):
                pyaop.beforeop(obj)
                value = func(*args, **kwds)
                pyaop.afterop(obj)
                return value
            return wrapper

        #添加代理
        for attr, value in dict.iteritems():
            if isinstance(value, FunctionType):
                dict[attr] = aop(value)
    #掛載到 obj 上
        obj=super(pyaop, mcl).__new__(mcl, name, bases, dict)
        return obj


class A(object):
    #被 aop 代理聲明!
    __metaclass__ = pyaop
    def foo(self):
        total = 0
        for i in range(100000):
            total = total+1
        print total

    def foo2(self):
        from time import sleep
        total = 0
        for i in range(100000):
            total = total+1
            #sleep(0.0001)
        print total

"""#####################################################################################
#   測(cè)試
#####################################################################################"""

def beforep(self):
    print('before')
def afterp(self):
    print('after')

if __name__ == "__main__":
    pyaop.setbefore(beforep)
    pyaop.setafter(afterp)
    a=A()
    a.foo()
    a.foo2()

其他aop：
使用 @

def addspam(fn):
    def new(*args):
        print "spam, spam, spam"
    return fn(*args)
return new

@addspam
def useful(a, b):
    print a**2 + b**2

useful(3,4)
#結(jié)果
#spam, spam, spam
#25

晚綁定！

def attrs(**kwds):
    def decorate(f):
        for k in kwds:
            setattr(f, k, kwds[k])
        return f
    return decorate

@attrs(versionadded="2.2",author="Guido van Rossum")
def mymethod(f):
    return mymethod

x=mymethod(1)
x.versionadded
#2.2 !這是什么好東西！！

posted @ 2009-04-08 15:18 劉凱毅閱讀(1258) | 評(píng)論 (0) | 編輯收藏

python 最簡(jiǎn)單的 cgi webserver

mkdir -p cgi/cgi-bin ;
gvim time.py

#!/usr/bin/env python
# -*- coding utf8 -*-
import cgitb
cgitb.enable()
import time
print "Content-type: text/html"
print
print time.strftime('%Y-%m-%d %X', time.localtime() )

form = cgi.FieldStorage()
# Get data from field 'name'
name = form.getvalue('name')

cgi 服務(wù)器建立
cd cgi
python -m CGIHTTPServer

到 firefox 中
http://127.0.0.1:8000/cgi-bin/time.py

2009-04-07 23:26:03

posted @ 2009-04-07 23:27 劉凱毅閱讀(2235) | 評(píng)論 (2) | 編輯收藏

python 抓取分析 SGMLParser 實(shí)例

數(shù)據(jù)：
希望抓取
div > p id='da' > a text
和 div > p id='da' > html

<div>
   <p id="tt">
     <a href=/tag/php>no no</a>
  </p>
  <p id='da'>
    <a href=/tag/php>php</a>
    <a href=/tag/python>python</a>
    <a href=/tag/vim>vim</a>
    <a href=/tag/windows>windows</a>
   <a href=/tag/wingide>wingide</a>
  </p>
</div>
<p id='da'>
   <a href=/tag/wingide>hehe</a>
</p>

希望結(jié)果為

$ python t.py
a_text: ["'php'", "'python'", "'vim'", "'windows'", "'wingide'"]

div_html[0]:
<p id="da">
    <a href="/tag/php">php</a>
    <a href="/tag/python">python</a>
    <a href="/tag/vim">vim</a>
    <a href="/tag/windows">windows</a>
   <a href="/tag/wingide">劉凱毅</a>
  </p>

#說明
其實(shí) SGMLParser 我感覺最關(guān)鍵的是

#/usr/lib/python2.5/sgmllib.py
# Internal -- finish parsing of <tag/data/ (same as <tag>data</tag>)
    def finish_shorttag(self, tag, data):
        #而 finish_starttag finish_endtag 抓取會(huì)調(diào)用 end_* start_* 什么的
        self.finish_starttag(tag, [])
        self.handle_data(data)
        self.finish_endtag(tag)

代碼：

#!python
#coding=UTF-8

from sgmllib import SGMLParser
class TestSGMLParser(SGMLParser):

    def reset(self, verbose=0):
        SGMLParser.reset(self)

        #提取 a text ; div html
        self.a_text=[]
        self.div_html=[]

        #寄存變量
        self.data_text = ""
        self.data_html = ""

        #業(yè)務(wù)邏輯表示變量
        #抓取 div > p id="da" > a
        #由于需要得到div p 的 html > test_div_p = 0 , 1 , 2
        self.test_div=False
        self.test_div_p=0
        self.test_div_p_a=False


    # 重寫 handle_data
    # 寄存變量填充值
    def handle_data(self, data):
            self.data_text = self.data_text + data
        if self.test_div_p :
            self.data_html = self.data_html +data


    # 重寫 finish_starttag
    # self.data_html 填充值
    def finish_starttag(self, tag, attrs):
        SGMLParser.finish_starttag(self, tag, attrs)
        if self.test_div_p :
                strattrs = "".join([' %s="%s"' % (key, value) for key, value in attrs])
                self.data_html=self.data_html+"<%(tag)s%(strattrs)s>" % locals()

    # 重寫 finish_endtag
    # self.data_html 填充值
        def finish_endtag(self, tag):
        SGMLParser.finish_endtag(self, tag)
        if self.test_div_p == 2 :
            self.data_html=self.data_html+"</%(tag)s>" % locals()
        elif self.test_div_p == 1 :
            self.data_html=self.data_html+"</%(tag)s>" % locals()
            self.test_div_p = 0

    # self.test_div 狀態(tài)修改
        def start_div(self, attrs):
        self.test_div=True

    # self.test_div 狀態(tài)修改
    # self.div_html 填充
        def end_div(self):
        if self.test_div :
            self.div_html.append(self.data_html)
        self.test_div=False

    # self.test_div_p 狀態(tài)修改 2 為可以填充
        def start_p(self, attrs):
            if self.test_div and attrs and 'id' in [ key for key, value in attrs ] and  len([ value for key, value in attrs if key=='id' and value=='da'])>0 :
            self.test_div_p=2

    # self.test_div_p 狀態(tài)修改 1 為只能填充最后一次
        def end_p(self):
        if self.test_div_p == 2 :
            self.test_div_p=1

        # self.test_div_p_a 狀態(tài)修改
        def start_a(self, attrs):
        self.data_text = ""
            if self.test_div_p :
                self.test_div_p_a=True

        # self.test_div_p_a 狀態(tài)修改
    # self.a_text 填充
        def end_a(self):
            if self.test_div_p and self.test_div and self.test_div_p_a  :
                    self.a_text.append(repr(self.data_text))
            self.test_div_p_a=False

        def close(self):
            SGMLParser.close(self)

if __name__ == '__main__':
    try:
        f = open('google.html', 'r')
        data = f.read()
        x=TestSGMLParser()
        x.feed(data)
        x.close()
        # 我這 gvim utf8 ; cygwin gbk ,轉(zhuǎn)碼  unicode( str , 'utf8').encode('gbk')
        print "a_text: %s \n div_html[0]: \n %s"%(x.a_text[:-1],  unicode(x.div_html[0], 'utf8').encode('gbk') )

    except IOError, msg:
        print file, ":", msg

頁面抓取
抓取 pycurl + 分析用 SGMLParser + 驗(yàn)證碼用 pytesser
下面就差算法了，抓取的準(zhǔn)備工作終于要完成了。

posted @ 2009-04-07 18:27 劉凱毅閱讀(3669) | 評(píng)論 (0) | 編輯收藏

python pdb 基礎(chǔ)調(diào)試

當(dāng)手邊沒有IDE,面對(duì)著python調(diào)試犯愁時(shí)，你就可以參考下本文；（pdb 命令調(diào)試）
參考：http://docs.python.org/library/pdb.html
和 (pdb)help

首先你選擇運(yùn)行的 py
python -m pdb myscript.py
(Pdb) 會(huì)自動(dòng)停在第一行，等待調(diào)試,這時(shí)你可以看看幫助
(Pdb) h
    說明下這幾個(gè)關(guān)鍵命令

>斷點(diǎn)設(shè)置
   (Pdb)b 10 #斷點(diǎn)設(shè)置在本py的第10行
   或(Pdb)b ots.py:20 #斷點(diǎn)設(shè)置到 ots.py第20行
   刪除斷點(diǎn)（Pdb）b #查看斷點(diǎn)編號(hào)
            (Pdb)cl 2 #刪除第2個(gè)斷點(diǎn)

>運(yùn)行
    (Pdb)n #單步運(yùn)行
    (Pdb)s #細(xì)點(diǎn)運(yùn)行也就是會(huì)下到，方法
    (Pdb)c #跳到下個(gè)斷點(diǎn)
>查看
    (Pdb)p param #查看當(dāng)前變量值
    (Pdb)l #查看運(yùn)行到某處代碼
    (Pdb)a #查看全部棧內(nèi)變量

>如果是在命令行里的調(diào)試為：

import pdb

def tt():
    pdb.set_trace()
    for i in range(1, 5):
        print i

>>> tt()
#這里支持 n p c 而已
> <stdin>(3)tt()
(Pdb) n

上面一般的調(diào)試工具大體上都能解決了，還有其他什么調(diào)試時(shí)修改變量值，回到某斷點(diǎn)等，可以在 pdb 中 help 下（其實(shí)我也不太明白）
望那位知道的，能一起補(bǔ)全這篇文章。

posted @ 2009-04-07 14:59 劉凱毅閱讀(12919) | 評(píng)論 (0) | 編輯收藏

python Eric4 IED安裝

嘗試下使用 vim 以外 python 的工具
感覺 eric4 不錯(cuò)

去官方下載 eric4

windows 傻瓜安裝
先安裝 pyqt
再到 eric3 的安裝目錄下 python install.py

debian 系統(tǒng)
root 權(quán)限
apt-get install python-qt4
apt-get install python-QScintilla2
python install.py

posted @ 2009-04-03 17:17 劉凱毅閱讀(1034) | 評(píng)論 (0) | 編輯收藏

perl 使用

對(duì)應(yīng)腳本運(yùn)用：
1. shell 統(tǒng)籌管理腳本的運(yùn)行。合理結(jié)合 crontab , ps -ef ,kill 等命令。
2. perl 處理短小快。
3. python 有比較復(fù)雜結(jié)構(gòu)和邏輯的。

本文主要介紹 perl 的行級(jí)命令使用，力求短小快

：

#最簡(jiǎn)單的

$ perl -e 'print "Hello World\n"'

#處理文件行

$ perl -n -e 'print $_' file1

#編碼轉(zhuǎn)換

#如果有需要在使用下 encode("UTF-8", decode("GBK",$_));在 linux 下默認(rèn) utf-8

perl -MEncode -ne 'print decode("GBK",$_);' file.txt

#正則使用
# if($_=~/.*\/(.*)$/){ print $1 ;} 這是perl 巨方便的地方 $1 xx

# next LINE 跳到下一個(gè)循環(huán)

$ perl -n -e 'next LINE unless /pattern/; print $_'

#去除換行 chomp

perl -e 'print split(/\n/,"asg\n");'

#像 awk 一樣 Begin End

$ perl -ne 'END { print $t } @w = /(\w+)/g; $t += @w' file.txt

#像 awk -F"x" 一樣切割行

#-a 打開自動(dòng)分離 (split) 模式

#@F 為切割后的數(shù)組

perl -F'\t' -ane '

if($F[1]=~/侃侃/ and $F[2]=~/愛情啊/){

print "$F[3]\t$F[4]\t$F[5]\n"

}

' all2_data.sort.st

實(shí)際處理：

perl -F'\|\|' -ane '
my $actor,$music ;
if  ( $F[3] ){
  $music=$F[2];
  $actor=$F[3];
}else{
  $music=$F[0];
  $actor=$F[1];
}
  $music =~ tr/[A-Z]/[a-z]/;
  $music =~ s/\s*(.*)\s*$.*$/\1/g;
  $actor =~ tr/[A-Z]/[a-z]/;
  $actor =~ s/\s*(.*)\s*$.*$/\1/g;
print "$actor-$music\n";
' ring.utf8.txt  |sort -u  > ring.actor_music.sort.utf8.txt &
wc -l ring.actor_music.sort.utf8.txt

#像 sed 一樣替換
# -i 和 sed 一樣 perl 輸出會(huì)替換調(diào) 源 file.txt

$ perl -i -pe 's/\bPHP\b/Perl/g' file.txt

#外部傳參

perl -ne'print "$ARGV[0]\t$ARGV[1]\n" ' file.txt 'par1' 'par2'
#結(jié)果 .. par1 par2 ..

# 查詢出重復(fù)列次數(shù)，并列舉出來
cut -d"     "  -f 2 .collection_mobile.data |perl -ne '
   END{
     while (($key,$value)=each(%a)){print $key,"=",$value,"\n";};
   }BEGIN{ %a =(); }
   chomp;
   $a{$_}+=1;
'

結(jié)果
Ring=532895
CRBT=68500
RingBoxes=880
Song=96765

#一些實(shí)際使用 :)

find . -name "*.mp3" | perl -pe 's/.\/\w+-(\w+)-.*/$1/' | sort | uniq
perl -F'\t' -ane 'if($F[1]=~/$ARGV[0]/ and $F[2]=~/$ARGV[1]/){print "$F[3]\t$F[4]\t$F[5]\n"}' all2_data.sort.st '侃侃' '愛情啊'

#與 find 合用 -e $ARGV[0] 批量把 excel 倒成文本格式
find . -maxdepth 1 -name "*xls" -exec perl -e '

require("/home/xj_liukaiyi/src/perl/excel/excelUtil.pl");
my $file=$ARGV[0];
sub myRead{
  my $str = "";
  for $tmp(@_){
    $str="$str$tmp\t";
  }
  $str="$str\n";
  system "echo \"$str\" >> $file.data ";
}
&parse_excel("$file",0,\&myRead);
print "$file\n";

' {} \;

參考：
http://www.ibm.com/developerworks/cn/linux/sdk/perl/l-p101/index.html
http://bbs.chinaunix.net/viewthread.php?tid=499434

posted @ 2009-04-01 14:12 劉凱毅閱讀(1959) | 評(píng)論 (2) | 編輯收藏

驗(yàn)證碼識(shí)別(2)

各位“蜘蛛俠”們大家可能在抓取頁面中的驗(yàn)證碼而耿耿于懷,關(guān)于這點(diǎn)我想我目前可能能幫助下大家，在python中找到最接近與“殺手級(jí)別”的工具（源于開源，報(bào)與開源，好東西不干獨(dú)享）。

調(diào)下大家的積極性，上圖

python 包>> pytesser| http://code.google.com/p/pytesser/（其實(shí) py包使用很簡(jiǎn)單的，關(guān)鍵在安裝）
1. 首先安裝 pil ： easy_install --find-links http://www.pythonware.com/products/pil/ Imaging
2. pytesser 依賴包 tesseract-ocr| http://code.google.com/p/tesseract-ocr/
tesseract-ocr 依賴庫| http://tesseract-ocr.googlecode.com/files/tesseract-2.00.eng.tar.gz 解壓到你安裝的 tesseract-ocr 的識(shí)別學(xué)習(xí)庫下

posted @ 2009-03-27 10:21 劉凱毅閱讀(3240) | 評(píng)論 (3) | 編輯收藏

python 正則 (簡(jiǎn)約但不簡(jiǎn)單的demo)

re.search('(?ix)(?<=l)ike.*(?=s)','as Like as').group()
#結(jié)果 ike a
#說明
# 1. (?..) 匹配模式(就寫有用的)
#        >i 使匹配對(duì)大小寫不敏感
#        >x 正則中串中的空白符被忽略
#           比如  (?x)li k e 可以匹配 like
#2.  (?<=l) 前驅(qū)匹配但不消費(fèi),
#        比如上面 (?<=l)(.*) 可以匹配 l(ike..)
#3.  (?=s) 后驅(qū)匹配但不消費(fèi)
#         比如上面 (?<=l)(.*)(?=s) 匹配 l(ike a)s

re.search('(?ix)(as)?(.*)(?(1)as)','As like as').group(2)
#結(jié)果 ' like '
#(as)? 條件
#(?(1)as) 如果條件1 為真，再匹配as
#
# 給條件取名 (?P<name>....) (?(name)....)
#re.search('(?ix)(?P<rid>as)(.*)(?(rid)as)','As Like as').group(2)
# 結(jié)果 Like

詳細(xì)參考 python >>> help(re)
或 http://www.python.org/doc/2.5/lib/re-syntax.html

posted @ 2009-03-24 17:46 劉凱毅閱讀(1460) | 評(píng)論 (1) | 編輯收藏

僅列出標(biāo)題

Skynet

常用鏈接

留言簿(13)

我參與的團(tuán)隊(duì)

隨筆分類

隨筆檔案

相冊(cè)

搜索

最新評(píng)論

閱讀排行榜

評(píng)論排行榜