??xml version="1.0" encoding="utf-8" standalone="yes"?> (tng) (tng) (tng) (tng) (tng) (tng) (tng) 生成代码有许多方法,比如可以用脚本语aQ个人喜Ƣ用perlQ,也可以用模板技术。发现java里面已经有很多模板技术可以直接用了(jin)Q比如velocity、freemaker{。我一开始是直接用perl来生成代码的Q方法比较原始,是字符串拼凑在一赗?后来发现有许多的模板技术可以利用。现在打用velocity来生成代码。说不定可以直接生成DAO、Biz、Bean、XML{一大堆东西Q呵c(din)等有空要好好研I一下?/p> (tng) (tng) (tng) 下面是我在就业网重构时用到的一个javac,其中是对BeanUtilsq行?jin)简单的装?br /> (tng) (tng) (tng)import java.sql.Date; (tng) (tng) import org.apache.commons.beanutils.BeanUtils; (tng) (tng) public class NullSafeBeanUtils { (tng) (tng) public final static String EMPTY_STRING = ""; (tng) (tng) public static String getProperty(Object bean, String property) { (tng) (tng)public static void populate(Object bean, Map props) { (tng) (tng) (tng) (tng) 在这里,poplulateҎ(gu)是我用来自动填充参数的。要实现自动填充Q只需单调用此Ҏ(gu)p?jin)。看一个例子:(x) (tng) (tng) (tng) JobExperience jobExp = new JobExperience(); (tng) (tng) (tng) NullSafeBeanUtils.populate(jobExp, request.getParameterMap()); (tng) (tng) 是不是简单了(jin)许多Q要注意的是表单的各输入字段名要和bean的各属性名对应才能自动填充。另外NullSafeBeanUtils 的getPropertyҎ(gu)也很有用Q可以避免写 (tng) (tng) if (bean != null) { (tng) (tng) (tng) (tng) (tng) yyy = bean.getXXX()Q=null?"":bean.getXXX() (tng) (tng) (tng)} (tng) (tng) q样的代码,直接写NullSafeBeanUtils.getProperty(bean, "XXX")可以了(jin)?br />
废话说Q我q里q代码来代a吧,看看怎样化我们的JDBC~程Q可以和以前q行Ҏ(gu)?br />
(1Q?JdbcTemplate?
import java.sql.Connection;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;
import javax.sql.DataSource;
import org.winter.util.DBUtil;
/**
(tng)* a simple JDBC template 模仿spring的JdbcTemplate
(tng)*
(tng)* @author bluestone
(tng)* @version 1.0 2006-8-8
(tng)*
(tng)*/
public class JdbcTemplate {
(tng)private DataSource dataSource = null;
(tng)public JdbcTemplate(DataSource ds) {
(tng) (tng)this.dataSource = ds;
(tng)}
/**
(tng) * 执行更新操作
(tng) *
(tng) * @param sql
(tng) * @param setter
(tng) * @return
(tng) * @throws SQLException
(tng) */
(tng)public int update(String sql, PreparedStatementSetter setter)
(tng) (tng) (tng)throws SQLException {
(tng) (tng)Connection conn = null;
(tng) (tng)reparedStatement ps = null;
(tng) (tng)try {
(tng) (tng) (tng)conn = dataSource.getConnection();
(tng) (tng) (tng)ps = conn.prepareStatement(sql);
(tng) (tng) (tng)setter.setValues(ps);
(tng) (tng) (tng)return ps.executeUpdate();
(tng) (tng)} finally {
(tng) (tng) (tng)DBUtil.colseConnection(conn, ps, null);
(tng) (tng)}
(tng)}
(tng)/**
(tng) *
(tng) * @param sql
(tng) * @return
(tng) * @throws SQLException
(tng) */
(tng)public boolean execute(String sql) throws SQLException {
(tng) (tng)Connection conn = null;
(tng) (tng)Statement stmt = null;
(tng) (tng)try {
(tng) (tng) (tng)conn = dataSource.getConnection();
(tng) (tng) (tng)stmt = conn.createStatement();
(tng) (tng) (tng)return stmt.execute(sql);
(tng) (tng)} finally {
(tng) (tng) (tng)DBUtil.colseConnection(conn, stmt, null);
(tng) (tng)}
(tng)}
(tng)/**
(tng) *
(tng) * @param sql
(tng) * @param setter
(tng) * @param extractor
(tng) * @return
(tng) * @throws SQLException
(tng) */
(tng)public Object query(String sql, PreparedStatementSetter setter,
(tng) (tng) (tng)ResultSetExtractor extractor) throws SQLException {
(tng) (tng)Connection conn = null;
(tng) (tng)reparedStatement ps = null;
(tng) (tng)ResultSet rs = null;
(tng) (tng)try {
(tng) (tng) (tng)conn = dataSource.getConnection();
(tng) (tng) (tng)ps = conn.prepareStatement(sql);
(tng) (tng) (tng)setter.setValues(ps);
(tng) (tng) (tng)rs = ps.executeQuery();
(tng) (tng) (tng)return extractor.extractData(rs);
(tng) (tng)} finally {
(tng) (tng) (tng)DBUtil.colseConnection(conn, ps, rs);
(tng) (tng)}
(tng)}
(tng)// .........................
}
(2) (tng) PreparedStatementSetter
public interface PreparedStatementSetter {
(tng)void setValues(PreparedStatement ps) throws SQLException;
}
(3) (tng) ResultSetExtractor
public interface ResultSetExtractor {
(tng)Object extractData(ResultSet rs) throws SQLException;
}
(4) 可以参考spring自己定义其他接口。。?br />
用了(jin)q些辅助c,我们可以像用spring那样~程?jin)(当然q只能用在对事务要求不高的应用环境中Q。看看怎么使用Q?br />
(tng)private JdbcTemplate template;
(tng)public JobManageDao() throws BusinessException {
(tng) (tng)try {
(tng) (tng) (tng)template = new JdbcTemplate(DBHelper.getDataSource());
(tng) (tng)} catch (NamingException e) {
(tng) (tng) (tng)throw new BusinessException(e);
(tng) (tng)}
(tng)}
public long saveJobInfo(final JobInfo info) throws BusinessException {
(tng) (tng)final long id = IdGenerator.getIdLong();
(tng) (tng)try {
(tng) (tng) (tng)int j = template.update(INSERT_JOB_SQL, new PreparedStatementSetter() {
(tng) (tng)public void setValues(PreparedStatement ps) throws SQLException {
(tng) (tng) (tng) (tng) (tng)int i = 1;
(tng) (tng) (tng) (tng) (tng)ps.setLong(i++, id);
(tng) (tng) //......
(tng) (tng) (tng)}
(tng) (tng) (tng)});
(tng) (tng) (tng) (tng) (tng) return j > 0 ? id : 0L;
(tng) (tng)} catch (SQLException e) {
(tng) (tng) (tng) (tng) (tng)throw new BusinessException(e);
(tng) (tng)}
(tng)}
]]>
(tng) (tng) (tng)import java.util.Map;
(tng) (tng) import org.apache.commons.beanutils.ConvertUtils;
(tng) (tng) import org.apache.commons.beanutils.converters.SqlDateConverter;
(tng) (tng)
(tng)
(tng) (tng) public static boolean isNull(Object obj) {
(tng) (tng) (tng) (tng) (tng) (tng) return obj == null;
(tng) (tng) }
(tng) (tng) (tng) (tng) (tng) (tng)if (bean == null) {
(tng) (tng) (tng) (tng) (tng) (tng) (tng) (tng) (tng) (tng) (tng)return EMPTY_STRING;
(tng) (tng) (tng) (tng) (tng) (tng)}
(tng) (tng) (tng) (tng)try {
(tng) (tng) (tng) (tng) (tng) (tng) (tng) (tng) (tng)String str = BeanUtils.getProperty(bean, property);
(tng) (tng) (tng) (tng) (tng) (tng) (tng) (tng) (tng)if (str == null) {
(tng) (tng) (tng) (tng) (tng) (tng) (tng) (tng) (tng) (tng) (tng) (tng) (tng) return EMPTY_STRING;
(tng) (tng) (tng) (tng) (tng) (tng) (tng) (tng)}
(tng) (tng) (tng) (tng) (tng) (tng) (tng) return str;
(tng) (tng) (tng) (tng) } catch (Exception e) {
(tng) (tng) (tng) (tng) (tng) (tng) (tng) (tng)return EMPTY_STRING;
(tng) (tng) }
(tng) }
(tng) (tng) (tng) (tng)if (bean == null) {
(tng) (tng) (tng) (tng) (tng) (tng) (tng) return;
(tng) (tng) (tng) (tng) }
(tng) (tng) (tng)try {
(tng) (tng) (tng) (tng) SqlDateConverter con = new SqlDateConverter(new Date(System.currentTimeMillis()));
(tng) (tng) (tng) (tng) (tng) ConvertUtils.register(con, java.sql.Date.class);
(tng) (tng) (tng) (tng) (tng) BeanUtils.populate(bean, props);
(tng) (tng) } catch (Exception e) {
(tng) (tng) (tng) (tng)e.printStackTrace();
(tng) (tng) }
(tng)}
(tng)// (tng)此处省略?jin)一些其他代?br /> (tng)}
]]>
(tng)
NAME
(tng) (tng) (tng) (tng) (tng) (tng) uniq - remove duplicate lines from a sorted file
SYNOPSIS
(tng) (tng) (tng) (tng) (tng) (tng) uniq [OPTION]... [INPUT [OUTPUT]]
DESCRIPTION
(tng) (tng) (tng) (tng) (tng) (tng) Discard all but one of successive identical lines from INPUT (or stan-
(tng) (tng) (tng) (tng) (tng) (tng) dard input), writing to OUTPUT (or standard output).
(tng) (tng) (tng) (tng) (tng) (tng) Mandatory arguments to long options are mandatory (tng) for (tng) short (tng) options
(tng) (tng) (tng) (tng) (tng) (tng) too.
(tng) (tng) (tng) (tng) (tng) (tng) -c, --count
(tng) (tng) (tng) (tng) (tng) (tng) prefix lines by the number of occurrences
(tng) (tng) (tng) (tng) (tng) (tng) -d, --repeated
(tng) (tng) (tng) (tng) (tng) (tng) only print duplicate lines
(tng) (tng) (tng) (tng) (tng) (tng) -D, --all-repeated[=delimit-method] print all duplicate lines
(tng) (tng) (tng) (tng) (tng) (tng) delimit-method={none(default),prepend,separate} (tng) Delimiting (tng) is
(tng) (tng) (tng) (tng) (tng) (tng) done with blank lines.
(tng) (tng) (tng) (tng) (tng) (tng) -f, --skip-fields=N
(tng) (tng) (tng) (tng) (tng) (tng) avoid comparing the first N fields
(tng) (tng) (tng) (tng) (tng) (tng) -i, --ignore-case
(tng) (tng) (tng) (tng) (tng) (tng) ignore differences in case when comparing
(tng) (tng) (tng) (tng) (tng) (tng) -s, --skip-chars=N
(tng) (tng) (tng) (tng) (tng) (tng) avoid comparing the first N characters
(tng) (tng) (tng) (tng) (tng) (tng) -u, --unique
(tng) (tng) (tng) (tng) (tng) (tng) only print unique lines
(tng) (tng) (tng) (tng) (tng) (tng) -w, --check-chars=N
(tng) (tng) (tng) (tng) (tng) (tng) compare no more than N characters in lines
(tng) (tng) (tng) (tng) (tng) (tng) --help display this help and exit
(tng) (tng) (tng) (tng) (tng) (tng) --version
(tng) (tng) (tng) (tng) (tng) (tng) output version information and exit
(tng) (tng) (tng) (tng) (tng) (tng) A field is (tng) a (tng) run (tng) of (tng) whitespace, (tng) then (tng) non-whitespace (tng) characters.
(tng) (tng) (tng) (tng) (tng) (tng) Fields are skipped before chars.
AUTHOR
(tng) (tng) (tng) (tng) (tng) (tng) Written by Richard Stallman and David MacKenzie.
REPORTING BUGS
(tng) (tng) (tng) (tng) (tng) (tng) Report bugs to <bug-coreutils@gnu.org>.
COPYRIGHT
(tng) (tng) (tng) (tng) (tng) (tng) Copyright © 2004 Free Software Foundation, Inc.
(tng) (tng) (tng) (tng) (tng) (tng) This (tng) is (tng) free software; see the source for copying conditions. (tng)There
(tng) (tng) (tng) (tng) (tng) (tng) is NO warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICU-
(tng) (tng) (tng) (tng) (tng) (tng) LAR PURPOSE.
SEE ALSO
(tng) (tng) (tng) (tng) (tng) (tng) The full documentation for uniq is maintained as a Texinfo manual. (tng) If
(tng) (tng) (tng) (tng) (tng) (tng) the info and uniq programs are properly installed at (tng) your (tng) site, (tng) the
(tng) (tng) (tng) (tng) (tng) (tng) command
(tng) (tng) (tng) (tng) (tng) (tng) info coreutils uniq
(tng) (tng) (tng) (tng) (tng) (tng) should give you access to the complete manual.
(tng) (tng) (tng) a w k是一U程序语aQ对文档资料的处理具有很强的功能。awk 名称是由它三个最初设计者的姓氏的第一个字母而命名的Q?Alfred V. Aho、Peter J. We i n b e rg e r、Brian W. Kernighan?br /> (tng) (tng) (tng)a w k最初在1 9 7 7q完成? 9 8 5q发表了(jin)一个新版本的a w kQ它的功能比旧版本增Z(jin)不少。a w k能够用很短的E序Ҏ(gu)档里的资料做修改、比较、提取、打印等处理。如果用C 或P a s c a l{语a~写E序完成上述的Q务会(x)十分不方便而且很花Ҏ(gu)_(d)所写的E序也会(x)很大?br /> (tng) (tng) (tng)a w k不仅仅是一个编E语aQ它q是L i n u xpȝ理员和E序员的一个不可缺的工具。a w k语言本n十分好学Q易于掌握,q且特别的灵zR?br /> (tng) (tng) (tng)gawk 是G N U计划下所做的a w kQgawk 最初在1 9 8 6q完成,之后不断地被改进、更新。gawk 包含awk 的所有功能?br />
6.1 gawk的主要功?br />
(tng) (tng) (tng)gawk 的主要功能是针对文g的每一? l i n e )Q也是每一条记录,搜寻指定的格式。当某一行符合指定的格式Ӟgawk ׃(x)在此行执行被指定的动作。gawk 依此方式自动处理输入文g的每一行直到输入文件档案结束?br /> (tng) (tng) (tng)g a w kl常用在如下的几个方面:(x)
(tng) (tng) (tng)?Ҏ(gu)要求选择文g的某几行Q几列或部分字段以供昄输出?br /> (tng) (tng) (tng)?分析文档中的某一个字出现的频率、位|等?br /> (tng) (tng) (tng)?Ҏ(gu)某一个文档的信息准备格式化输出?br /> (tng) (tng) (tng)?以一个功能十分强大的方式qo(h)输出文档?br /> (tng) (tng) (tng)?Ҏ(gu)文档中的数D行计?br />
6.2 如何执行gawkE序
(tng) (tng) (tng)基本上有两种Ҏ(gu)可以执行g a w kE序?br /> (tng) (tng) (tng)如果gawk E序很短Q则可以gawk 直接写在命o(h)行,如下所C:(x)
(tng) (tng) (tng) (tng) (tng) (tng)gawk 'program' input-file1 input-file2 ...
(tng) (tng) (tng)
(tng) (tng) 其中program 包括一些pattern 和a c t i o n?br /> (tng) (tng) (tng)如果gawk E序较长Q较为方便的做法是将gawk E序存在一个文件中Qgawk 的格式如下所C:(x)
(tng) (tng) (tng)gawk -f program-file input-file1 input-file2 ...
(tng) (tng) (tng)gawk E序的文件不止一个时Q执行gawk 的格式如下所C:(x)
(tng) (tng) (tng)
(tng) (tng) (tng)gawk -f program-file1 -f program-file2 ... input-file1 input-file2 ...
6.3 文g、记录和字段
(tng) (tng) (tng)一般情况下Qg a w k可以处理文g中的数值数据,但也可以处理字符串信息。如果数据没有存储在文g中,可以通过道命o(h)和其他的重定向方法给g a w k提供输入。当?dng)?g a w k只能处理文本文gQA S C I I码文Ӟ(j)。?!--StartFragment -->?sh)话L(fng)本就是一个g a w k可以处理的文件的单例子。电(sh)话号码本由很多条目组成,每一个条目都有同L(fng)格式Q姓、名、地址、电(sh)话号码。每一个条目都是按字母序排列。在g a w k中,每一个这L(fng)条目叫做一个记录。它是一个完整的数据的集合。例如,?sh)话L(fng)本中的Smith Johnq个条目Q包括他的地址和电(sh)话号码,是一条记录?br /> (tng) (tng) (tng)记录中的每一叫做一个字Dc(din)在g a w k中,字段是最基本的单位。多个记录的集合l成?jin)一个文件?br /> (tng) (tng) (tng)大多数情况下Q字D之间由一个特D的字符分开Q像I格、TA B、分L(fng)。这些字W叫做字D分隔符。请看下面这? e t c / p a s s w d文gQ?br />t p a r k e r ; t 3 6 s 6 2 h s h ; 5 0 1 ; 1 0 1 ; Tim Parker;/home/tparker;/bin/bash
etreijs;2ys639dj3h;502;101;Ed Tr e i j s ; / h o m e / e t r e i j s ; / b i n / t c s h
y c h o w ; 1 h 2 7 s j ; 5 0 3 ; 1 0 1 ; Yvonne Chow;/home/ychow;/bin/bash
(tng) (tng) (tng)你可以看? e t c / p a s s w d文g使用分号作ؓ(f)字段分隔W? e t c / p a s s w d文g中的每一行都包括七个字段Q用户名Q口令;用户I DQ工作组I DQ注释; h o m e目录Q启始的外壳。如果你惌查找W六个字D,只需数过五个分号卛_?br /> (tng) (tng) (tng)但考虑C下电(sh)话号码本的例子,你就?x)发C些问题:(x)
Smith John 13 Wilson St. 555-1283
Smith John 2736 Artside Dr Apt 123 555-2736
Smith John 125 Westmount Cr 555-1726
(tng) (tng) (tng)虽然我们能够分L出每个记录包括四个字D,但g a w k却无能ؓ(f)力。电(sh)话号码本使用I格作ؓ(f)分隔W,所以g a w k认ؓ(f)S m i t h是第一个字D, John 是第二个字段Q? 3是第三个字段Q依ơ类推。就g a w k而言Q如果用I格作ؓ(f)字段分隔W的话,则第一个记录有六个字段Q而第二个记录有八个字Dc(din)?br /> (tng) (tng) (tng)所以,我们必须扑և一个更好的字段分隔W。例如,像下面一样用斜杠作为字D分隔符Q?br />Smith/John/13 Wilson St./555-1283
Smith/John/2736 Artside Dr/Apt/123/555-2736
Smith/John/125 Westmount Cr/555-1726
(tng) (tng) (tng)如果你没有指定其他的字符作ؓ(f)字段分隔W,那么g a w k缺省地使用I格或TA B作ؓ(f)字段分隔W?br />
6.4 模式和动?br />
(tng) (tng) (tng)在g a w k语言中每一个命令都׃部分l成Q一个模式( p a t t e r nQ和一个相应的动作Qa c t i o nQ。只要模式符合,g a w k׃(x)执行相应的动作。其中模式部分用两个斜杠括v来,而动作部分用一对花括号括v来。例如:(x)
/ p a t t e r n 1 / { a c t i o n 1 }
/ p a t t e r n 2 / { a c t i o n 2 }
/ p a t t e r n 3 / { a c t i o n 3 }
(tng) (tng) (tng)所有的g a w kE序都是pL(fng)一对对的模式和动作l成的。其中模式或动作都能够被省略Q但是两个不能同时被省略。如果模式被省略Q则对于作ؓ(f)输入的文仉面的每一行,动作都会(x)被执行。如果动作被省略Q则~省的动作被执行Q既昄出所有符合模式的输入行而不做Q何的改动?br /> (tng) (tng) (tng)下面是一个简单的例子Q因为gawk E序很短Q所以将gawk E序直接写在外壳命o(h)行:(x)
gawk '/tparker/' /etc/passwd
(tng) (tng) (tng) (tng)此程序在上面提到? e t c / p a s s w d文g中寻扄合t p a r k e r模式的记录ƈ昄Q此例中没有动作Q所以缺省的动作被执行)(j)?br /> (tng) (tng) (tng)让我们再看一个例子:(x)
(tng) (tng) (tng)gawk '/UNIX/{print $2}' file2.data
(tng) (tng) (tng)此命令将逐行查找f i l e 2 . d a t a文g中包含U N I X的记录,q打印这些记录的W二个字Dc(din)你也可以在一个命令中使用多个模式和动作对Q例如:(x)
gawk '/scandal/{print $1} /rumor/{print $2}' gossip_file
(tng) (tng) (tng)此命令搜索文件g o s s i p _ f i l e中包括s c a n d a l的记录,q打印第一个字Dc(din)然后再从头搜烦(ch)g o s s i p _ f i l e中包括r u m o r的记录,q打印第二个字段?br />
6.5 比较q算和数D?br />
(tng) (tng) (tng)g a w k有很多比较运符Q下面列出重要的几个Q?br />= = 相等
! = 不相{?br />> 大于
< 于
> = 大于{于
< = 于{于
(tng) (tng) (tng)例如: (tng) (tng)gawk '$4 > 100' testfile
(tng) (tng) (tng)会(x)昄文gtestfile 中那些第四个字段大于1 0 0的记录?br /> (tng) (tng) (tng)下表列出?jin)g a w k中基本的数D符?br /> (tng) (tng) (tng)q算W说明示?br /> (tng) (tng) (tng)+ 加法q算2+6
(tng) (tng) (tng)- 减法q算6-3
(tng) (tng) (tng)* 乘法q算2*5
(tng) (tng) (tng)/ 除法q算8/4
(tng) (tng) (tng)^ 乘方q算3^2 (=9)
(tng) (tng) (tng)% 求余?%4 (=1)
(tng) (tng) (tng)例如:{print $3/2} 昄W三个字D被2除的l果?br /> (tng) (tng) (tng)在g a w k中,q算W的优先权和一般的数学q算的优先权一栗例如:(x){print $1+$2*$3}
(tng) (tng) (tng)昄W二个字D和W三个字D늛乘,然后和第一个字D늛加的l果?br /> (tng) (tng) (tng)你也可以用括h变(sh)先次序。例如:(x)
(tng) (tng) (tng){print ($1+$2)*$3}
(tng) (tng) (tng)昄W一个字D和W二个字D늛加,然后和第三个字段怹的结果?br />
6.6 内部函数
g a w k中有各种的内部函敎ͼ现在介绍如下Q?br />
6.6.1 随机数和数学函数
sqrt(x) 求x 的^Ҏ(gu)
sin(x) 求x 的正弦函?br />cos(x) 求x 的余弦函?br />a t a n 2 ( xQy) 求x / y的余切函?br />log(x) 求x 的自然对?br />exp(x) 求x 的e ơ方
int(x) 求x 的整数部?br />rand() ? ?之间的随机数
srand(x) x 讄为r a n d ( )的种子数
6.6.2 字符串的内部函数
?i n d e x ( i nQfind) 在字W串in 中寻扑֭W串find W一ơ出现的地方Q返回值是字符串find 出现在字W串in 里面的位|。如果在字符串in 里面找不到字W串f i n dQ则q回gؓ(f)0?br />例如Q?br />print index("peanut"Q? a n " )
昄l果3?br />?length(string) 求出string 有几个字W?br />例如Q?br />l e n g t h ( " a b c d e " )
昄l果5?br />?m a t c h ( s t r i n gQr e g e x p ) 在字W串string 中寻扄合regexp 的最ѝ最靠左边的子字W串。返回值是regexp 在string 的开始位|,即i n d e x倹{match 函数会(x)讄pȝ变量R S TA RT {于i n d e x的|pȝ变量RLENGTH {于W合的字W个数。如果不W合Q则?x)设|R S TA RT ?、RLENGTH ? 1?br />?s p r i n t f ( f o r m a tQe x p r e s s i o n 1Q? . . ) 和printf cMQ但是sprintf q不昄Q而是q回字符丌Ӏ例如:(x)
sprintf("pi = %.2f (approx.)"Q? 2 / 7 )
q回的字W串为pi = 3.14 (approx.)
?s u b ( r e g e x pQr e p l a c e m e n tQt a rg e t ) 在字W串t a rget 中寻扄合regexp 的最ѝ最靠左的地方,以字串replacement 代替最左边的r e g e x p?br />例如Q?br />str = "waterQw a t e rQe v e r y w h e r e "
s u b ( / a t /Q?" i t h "Qs t r )
l果字符串s t r?x)变?br />w i t h e rQw a t e rQe v e r y w h e r e
?g s u b ( r e g e x pQr e p l a c e m e n tQt a rget) 与前面的s u bcM。在字符串t a rget 中寻扄合r e g e x p的所有地方,以字W串replacement 代替所有的r e g e x p。例如:(x)
s t r = " w a t e rQw a t e rQe v e r y w h e r e "g s u b ( / a t /Q?" i t h "Qs t r )
l果字符串s t r?x)变?br />w i t h e rQw i t h e rQe v e r y w h e r e
?s u b s t r ( s t r i n gQs t a r tQlength) q回字符串string 的子字符Ԍq个子字W串的长度ؓ(f)l e n g t hQ从Wstart 个位|开始。例如:(x)
s u b s t r ( " w a s h i n g t o n "Q?Q? )q回gؓ(f)i n g
如果没有length Q则q回的子字符串是从第start 个位|开始至l束?br />例如Q?br />s u b s t r ( " w a s h i n g t o n "Q? )
q回gؓ(f)i n g t o n?br />?tolower(string) 字W串s t r i n g的大写字母改为小写字母?br />例如Q?br />tolower("MiXeD cAsE 123")
q回gؓ(f)mixed case 123?br />?toupper(string) 字W串s t r i n g的小写字母改为大写字母?br />例如Q?br />toupper("MiXeD cAsE 123")
q回gؓ(f)MIXED CASE 123?br />
6.6.3 输入输出的内部函?br />
?close(filename) 输入或输出的文件filename 关闭?br />?system(command) 此函数允许用h行操作系l的指o(h)Q执行完毕后回到g a w kE序。例如:(x)
BEGIN {system("ls")}
6.7 字符串和数字
字符串就是一q串的字W,它可以被g a w k逐字地翻译。字W串用双引号括v来。数字不能用双引hhQƈ且g a w k它当作一个数倹{例如:(x)
gawk '$1 != "Tim" {print}' testfile
此命令将昄W一个字D和Ti m不相同的所有记录。如果命令中Ti m两边不用双引Pg a w k不能正执行。再如:(x)
gawk '$1 == "50" {print}' testfile
(tng) (tng) (tng)此命令将昄所有第一个字D和5 0q个字符串相同的记录。g a w k不管W一字段中的数值的大小Q而只是逐字地比较。这Ӟ字符? 0和数? 0q不相等?br />
6.8 格式化输?br />
(tng) (tng) (tng)我们可以让动作显CZ些比较复杂的l果。例如:(x)
gawk '$1 != "Tim" {print $1Q? 5Q? 6Q?2}' testfile
显Ct e s t f i l e文g中所有第一个字D和Ti m不相同的记录的第一、第五、第六和W二个字Dc(din)进一步,你可以在p r i n t动作中加入字W串Q例如:(x)
gawk '$1 != "Tim" {print "The entry for "Q? 1Q?is not Tim. "Q?2}' testfile
(tng) (tng) (tng)p r i n t动作的每一部分用逗号隔开?br /> (tng) (tng) (tng)借用C语言的格式化输出指o(h)Q可以让g a w k的输出Ş式更为多栗这Ӟ应该用p r i n t f而不是p r i n t。例如:(x)
{printf "%5s likes this language\n"Q? 2 }
p r i n t f中的%5s 部分告诉gawk 如何格式化输出字W串Q也是输出5个字W长。它的值由printf 的最后部分指出,在此是第二个字段。\ n是回车换行符。如果第二个字段中存储的是h名,则输出结果大致如下:(x)
Tim likes this language
G e o ff likes this language
Mike likes this language
Joe likes this language
(tng) (tng) (tng)gawk 语言支持的其他格式控制符号如下:(x)
?c 如果是字W串Q则昄W一个字W;如果是整敎ͼ则将数字以ASCII 字符的Ş式显C?br />例如Q?br />printf ? c”,6 5
l果显C字母A?br />?d 昄十进制的整数?br />?i 昄十进制的整数?br />?e QҎ(gu)以科学记数法的Ş式显C?br />例如Q?br />print ? 4 . 3 e”,1 9 5 0
l果显C? . 9 5 0 e + 0 3?br />?f 数字以点的Ş式显C?br />?g 数字以U学记数法的形式或Q点的形式昄。数字的l对值如果大于等? . 0 0 0 1?br />以Q点的形式昄Q否则以U学记数法的形式昄?br />?o 昄无符L(fng)八进制整数?br />?s 昄一个字W串?br />?x 昄无符L(fng)十六q制整数? 0? 5以a至f表示?br />?X 昄无符L(fng)十六q制整数? 0? 5以A至F表示?br />?% 它ƈ不是真正的格式控制字W,% %显C??br />当你使用q些格式控制字符Ӟ你可以在控制字符前给出数字,以表CZ用的几位或几个字符。例如,6 d表示一个整数有6位。再L(fng)下面的例子:(x)
{printf "%5s works for %5s and earns %2d an hour"Q? 1Q? 2Q? 3 }
会(x)产生cM如下的输出:(x)
Joe works for Mike and earns 12 an hour
当处理数据时Q你可以指定数据的精位?br />{printf "%5s earns $%.2f an hour"Q? 3Q? 6 }
其输出将cM于:(x)
Joe earns $12.17 an hour
你也可以使用一些换码控制符格式化整行的输出。之所以叫做换码控制符Q是因ؓ(f)g a w k对这些符hҎ(gu)的解释。下面列出常用的换码控制W:(x)
\a 警告或响铃字W?br />\b 后退一根{?br />\f 换页?br />\n 换行?br />\r 回R?br />\t Ta b?br />\v 垂直的t a b?br />
6.9 改变字段分隔W?br />
(tng) (tng) (tng)在g a w k中,~省的字D分隔符一般是I格W或TA B。但你可以在命o(h)行? F选项改变字符分隔W,只需? F后面跟着你想用的分隔W即可?br />gawk -F" ;"'/tparker/{print}' /etc/passwd
(tng) (tng) (tng)在此例中Q你字W分隔符讄成分受注意:(x) - F必须是大写的Q而且必须在第一个引号之前?br />
6.10 元字W?br />
g a w k语言在格式匹配时有其Ҏ(gu)的规则。例如, c a t能够和记录中M位置有这三个字符的字D匹配。但有时你需要一些更为特D的匚w。如果你惌c a t只和c o n c a t e n a t e匚wQ则需要在格式两端加上I格Q?br />/ cat / {print}
再例如,你希望既和c a t又和C AT匚wQ则可以使用?|)Q?br />/ cat | CAT / {print}
在g a w k中,有几个字W有Ҏ(gu)意义。下面列出可以用在g a w k格式中的q些字符Q?br />?^ 表示字段的开始?br />例如Q?3 ~ /^b/
(tng) (tng) (tng)如果W三个字D以字符b开始,则匹配?br />?$ 表示字段的结束?br />例如Q?3 ~ /b$/
如果W三个字D以字符bl束Q则匚w?br />?. 表示和Q何单字符m匚w?br />例如Q?3 ~ /i.m/
如果W三个字D|字符iQ则匚w?br />?| 表示“或”?br />例如Q? c a t | C AT/
和cat 或C AT字符匚w?br />?* 表示字符的零到多ơ重复?br />例如Q?UNI*X/
和U N X、U N I X、U N I I X、U N I I I X{匹配?br />?+ 表示字符的一ơ到多次重复?br />例如Q?br />/UNI+X/
和U N I X、U N I I X{匹配?br />?\{aQb\} 表示字符aơ到bơ之间的重复?br />例如Q?br />/ U N I \ { 1Q? \ } X
和U N I X、U N I I X和U N I I I X匚w?br />?? 表示字符零次和一ơ的重复?br />例如Q?br />/UNI?X/
和UNX 和U N I X匚w?br />?[] 表示字符的范围?br />例如Q?br />/I[BDG]M/
和I B M、I D M和I G M匚w
?[^] 表示不在[ ]中的字符?br />例如Q?br />/I[^DE]M/
和所有的以I开始、Ml束的包括三个字W的字符串匹配,除了(jin)I D M和I E M之外?br />
6.11 调用gawkE序
当需要很多对模式和动作时Q你可以~写一个g a w kE序Q也叫做g a w k脚本Q。在g a w kE序中,你可以省略模式和动作两边的引P因ؓ(f)在g a w kE序中,模式和动作从哪开始和从哪l束时是很显然的。你可以使用如下命o(h)调用g a w kE序Q?br />gawk -f script filename
此命令g a w kҎ(gu)件f i l e n a m e执行名ؓ(f)s c r i p t的g a w kE序?br />如果你不希望使用~省的字D分隔符Q你可以在f选项后面跟着F选项指定新的字段分隔W(当然你也可以在g a w kE序中指定)(j)Q例如,使用分号作ؓ(f)字段分隔W:(x)
gawk -f script -F";" filename
如果希望gawk E序处理多个文gQ则把各个文件名|列其后Q?br />gawk -f script filename1 filename2 filename3 ...
~省情况下, g a w k的输出将送往(xin)屏幕。但你可以用L i n u x的重定向命o(h)使g a w k的输出送往(xin)一个文Ӟ(x)
gawk -f script filename > save_file
6.12 BEGIN和END
(tng) (tng) (tng)有两个特D的模式在g a w k中非常有用。B E G I N模式用来指明g a w k开始处理一个文件之前执行一些动作。B E G I Nl常用来初始化数|讄参数{。E N D模式用来在文件处理完成后执行一些指令,一般用作ȝ或注释?br />BEGIN 和E N D中所有要执行的指令都应该用花括号括v来。BEGIN 和E N D必须使用大写?br />L(fng)下面的例子:(x)
BEGIN { print "Starting the process the file" }
$1 == "UNIX" {print}
$2 > 10 {printf "This line has a value of %d"Q? 2 }
END { print "Finished processing the file. Bye!"}
此程序中Q先昄一条信息:(x) Starting the process the fileQ然后将所有第一个字D늭于U N I X的整条记录显C出来,然后再显C第二个字段大于10 的记录,最后显CZ息:(x) F i n i s h e dprocessing the file. Bye!?br />
6.13 变量
在g a w k中,可以用等? = )l一个变量赋|(x)
var1 = 10
在g a w k中,你不必事先声明变量类型?br />L(fng)下面的例子:(x)
$1 == "Plastic" { count = count + 1 }
如果W一个字D|P l a s t i cQ则c o u n t的值加1。在此之前,我们应当lc o u n t赋予q初|一般是在B E G I N部分?br />下面是比较完整的例子Q?br />BEGIN { count = 0 }
$5 == "UNIX" { count = count + 1 }
END { printf "%d occurrences of UNIX were found"Qcount }
变量可以和字D和数g起用,所以,下面的表辑ּ均ؓ(f)合法Q?br />count = count + $6
count = $5 - 8
count = $5 + var1
变量也可以是格式的一部分Q例如:(x)
$2 > max_value {print "Max value exceeded by "Q?2 -max_value}
$4 - var1 < min_value {print "Illegal value of "Q? 4 }
6.14 内置变量
g a w k语言中有几个十分有用的内|变量,现在列于下面Q?br />
NR 已经dq的记录数?br />FNR 从当前文件中d的记录数?br />F I L E N A M E 输入文g的名字?br />FS 字段分隔W(~省为空|(j)?br />RS 记录分隔W(~省为换行)(j)?br />OFMT 数字的输出格式(~省? gQ?br />OFS 输出字段分隔W?br />ORS 输出记录分隔W?br />NF 当前记录中的字段数?br />
如果你只处理一个文Ӟ则NR 和FNR 的值是一L(fng)。但如果是多个文Ӟ N R是对所有的文g来说的,而FNR 则只是针对当前文件而言。例如:(x)
NR <= 5 {print "Not enough fields in the record"}
(g)查记录数是否于5Q如果小?Q则昄出错信息?br />F S十分有用Q因为F S控制输入文g的字D分隔符。例如,在B E G I N格式中,使用如下?br />命o(h)Q?br />F S = " : "
6.15 控制l构
6.15.1 if 表达?br />
if 表达式的语法如下Q?br />if (expression){
c o m m a n d s
}
e l s e {
c o m m a n d s
}
例如Q?br /># a simple if loop
(if ($1 == 0){
print "This cell has a value of zero"
}
else {
printf "The value is %d\n"Q? 1
} )
再看下一个例子:(x)
# a nicely formatted if loop
(if ($1 > $2){
print "The first column is larger"
} else {
print "The second column is larger"
} )
6.15.2 while 循环
while 循环的语法如下:(x)
while (expression){
c o m m a n d s
}
例如Q?br /># interest calculation computes compound interest
# inputs from a file are the amountQinterest_rateand years
{var = 1
while (var <= $3) {
p r i n t f ( " % f \ n "Q? 1 * ( 1 + $ 2 ) ^ v a r )
v a r + +}
}
6.15.3 for 循环
for 循环的语法如下:(x)
for (initialization; expression; increment) {
c o m m a n d
}
例如Q?br /># interest calculation computes compound interest
# inputs from a file are the amountQinterest_rateand years
{for (var=1; var <= $3; var++) {
p r i n t f ( " % f \ n "Q? 1 * ( 1 + $ 2 ) ^ v a r )
}
}
6.15.4 next 和exit
next 指o(h)用来告诉gawk 处理文g中的下一个记录, 而不现在正在做什么。语法如下:(x)
{ command1
c o m m a n d 2
c o m m a n d 3
n e x t
c o m m a n d 4
}
(tng) (tng) (tng)E序只要执行到n e x t指o(h)Q就跛_下一个记录从头执行命令。因此,本例中, c o m m a n d 4指o(h)永远不会(x)被执行?br /> (tng) (tng) (tng)E序遇到e x i t指o(h)后,p{到程序的末尾L行E N DQ如果有E N D的话?br />
6.16 数组
g a w k语言支持数组l构。数l不必事先初始化。声明一个数l的Ҏ(gu)如下Q?br />a r r a y n a m e [ n u m ] = v a l u e
L(fng)下面的例子:(x)
# reverse lines in a file
{line[NR] = $0 } # remember each line
END {var=NR # output lines in reverse order
while (var > 0){
print line[var]
v a r - -
}
}
此段E序d一个文件的每一行,q用相反的顺序显C出来。我们用N R作ؓ(f)数组的下标来存储文g的每一条记录,然后在从最后一条记录开始,文仉条地显C出来?br />
6.17 用户自定义函?br />
复杂的gawk E序常常可以使用自己定义的函数来化。调用用戯定义函数与调用内部函数的Ҏ(gu)一栗函数的定义可以攑֜gawk E序的Q何地斏V?br />用户自定义函数的格式如下Q?br />function name (parameter-list) {
b o d y - o f - f u n c t i o n
}
name 是所定义的函数的名称。一个正的函数名称可包括一序列的字母、数字、下标线( u n d e r s c o r e s )Q但是不可用数字做开头。p a r a m e t e r-list 是函数的全部参数的列表,各个参数之间以逗点隔开。body-of-function 包含gawk 的表辑ּQ它是函数定义里最重要的部分,它决定函数实际要做的事情?br />下面q个例子Q会(x)每个记录的W一个字D늚值的qx(chng)与第二个字段的值的qx(chng)加v来?br />{print "sum ="QS q u a r e S u m ( $ 1Q? 2 ) }
function SquareSum(xQy) {
s u m = x * x + y * y
return sum
}
(tng) (tng) (tng)到此Q我们已l知道了(jin)g a w k的基本用法。g a w k语言十分易学好用Q例如,你可以用g a w k~写一D小E序来计一个目录中所有文件的个数和容量。如果用其他的语aQ如C语言Q则?x)十分的ȝ?ch)Q相反,g a w k只需要几行就可以完成此工作?br />
6.18 几个实例
最后,再D几个g a w k的例子:(x)
gawk '{if (NF > max) max = NF}
END {print max}'
此程序会(x)昄所有输入行之中字段的最大个数?br />gawk 'length($0) > 80'
此程序会(x)昄?gu)q?0 个字W的每一行。此处只有模式被列出Q动作是采用~省值显C整个记录?br />gawk 'NF > 0'
昄拥有臛_一个字D늚所有行。这是一个简单的Ҏ(gu)Q将一个文仉的所有空白行删除?br />gawk 'BEGIN {for (i = 1; i <= 7; i++)
print int(101 * rand())}'
此程序会(x)昄?gu)围? ?00 之间? 个随机数?br />ls -l files | gawk '{x += $4}; END {print "total bytes: " x}'
此程序会(x)昄出所有指定的文g的d节数?br />expand file | gawk '{if (x < length()) x = length()}
END {print "maximum line length is " x}'
此程序会(x)指定文仉最长一行的长度昄出来。expand ?x)将tab Ҏ(gu)s p a c eQ所以是用实际的双界来做长度的比较?br />gawk 'BEGIN {FS = ":"}
{print $1 | "sort"}' /etc/passwd
此程序会(x)所有用L(fng)d名称Q依照字母的序昄出来?br />gawk '{nlines++}
END {print nlines}'
此程序会(x)一个文件的总行数显C出来?br />gawk 'END {print NR}'
此程序也?x)将一个文件的总行数显C出来,但是计算行数的工作由g a w k来做?br />gawk '{print NRQ? 0 } '
此程序显C出文g的内Ҏ(gu)Q会(x)在每行的最前面昄?gu)P它的函数与?cat -n’类伹{?