专注于互联网--专注于架构

最新标签
网站地图
文章索引
Rss订阅

首页 »Java教程 » 正则表达式:正则表达式数据抽取 regular expression advance use »正文

正则表达式:正则表达式数据抽取 regular expression advance use

来源: 发布时间:星期六, 2008年12月6日 浏览:39次 评论:0
以下是程序的输出:

Aim String:buffer size1=0x1234 buffer size2=1024 buffer size3=9999

[src pattern]:buffer size1=VAR{HEX_NUM=>N1} buffer size2=VAR{STRING=>N2} buffer size3=VAR{DEC_NUM=>N3}
[dst pattern]:buffer size1=(0x\d{1,}) buffer size2=(.*?) buffer size3=(\d{1,})
[var List]
N1 0x1234
N2 1024
N3 9999

本意是分析Aim String,获取size1 size2 size3的数据,并且与3个变量N1 N2 N3关联。

为此定义了一个串在 src pattern中, 首先分析src pattern并得到目标的正则表达式

然后匹配数据,在匹配的过程中将数据与关键字关联。

程序代码如下:

class UserStringPattern

import java.util.ArrayList;
import java.util.List;
import java.util.regex.Pattern;

public class UserStringPattern {
public StringBuffer userPattern;

public StringBuffer destPattern;

public Pattern pattern;

public List<VarValue> varList;

public UserStringPattern(StringBuffer userPattern) {
super();
this.userPattern = userPattern;
this.destPattern = new StringBuffer(userPattern);
varList = new ArrayList();
}

@Override
public String toString() {
StringBuffer sb = new StringBuffer();
sb.append("[src pattern]:" + userPattern);
sb.append("\n");
sb.append("[dst pattern]:" + destPattern);
sb.append("\n");
sb.append("[var List]\n");
for (int i = 0; i < varList.size(); i++) {
sb.append("\t" + varList.get(i).toString());
sb.append("\n");
}
return sb.toString();
}

public String getValueByName(String name) {
for (int i = 0; i < varList.size(); i++) {
VarValue varValue = varList.get(i);
if (varValue.varName.equals(name)) {
return varValue.value;
}
}
return "";
}
}

class RegExpUtil

package com.flylb.util;

import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.regex.PatternSyntaxException;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;

class VarValue {
public String varName;

public String value;

public VarValue(String varName, String value) {
super();
this.varName = varName;
this.value = value;
}

@Override
public String toString() {
return varName + "\t" + value;
}
}

@SuppressWarnings("unchecked")
public class RegExpUtil {
private static Log log = LogFactory.getLog(RegExpUtil.class);

private static Map<String, String> regExpMap = new HashMap();

static {
regExpMap.put("DEC_NUM", "(\\d{1,})");
regExpMap.put("HEX_NUM", "(0x\\d{1,})");
regExpMap.put("STRING", "(.*?)");
}

public static boolean creatRegExp(UserStringPattern userStringPattern) {
List varList = userStringPattern.varList;
StringBuffer sourcePattern = userStringPattern.userPattern;
StringBuffer destPattern = userStringPattern.destPattern;
varList.clear();
Matcher m = Pattern.compile("VAR\\{(.*?)\\}").matcher(sourcePattern);
String varToReplace = null, varExp = null, varName = null, varType = null;
String type2Regexp = null;
int pos = 0;
int offset = 0;
while (m.find()) {
int start, end;
start = m.start();
end = m.end();
varToReplace = sourcePattern.substring(start, end);
// System.out.println("varToReplace:" + varToReplace);
varExp = m.group(1);
pos = varExp.indexOf("=>");
if (pos == -1)
return false;
varType = varExp.substring(0, pos);
varName = varExp.substring(pos + 2);
varList.add(new VarValue(varName, null));

// log.info(varType);
// log.info(offset);
// log.info(start + offset);
// log.info(end + offset);
// log.info(destPattern);
type2Regexp = regExpMap.get(varType);
if (type2Regexp == null) {
return false;
}
destPattern.replace(start + offset, end + offset, type2Regexp);
offset += type2Regexp.length() - varToReplace.length();
}
try {
userStringPattern.pattern = Pattern.compile(userStringPattern.destPattern.toString());
return true;
} catch (PatternSyntaxException e) {
log.info("Pattern error:" + userStringPattern.destPattern.toString());
return false;
}

}

public static void matchPattern(UserStringPattern userStringPattern, String aimString) {
if (userStringPattern.pattern == null) {
log.info("pattern is null!");
return;
}
Matcher m = userStringPattern.pattern.matcher(aimString);

相关文章

读者评论

  • 共0条 分0页

发表评论

  • 昵称:
  • 内容: