正则表达式:正则表达式数据抽取 regular expression advance use来源: 发布时间:星期六, 2008年12月6日 浏览:39次 评论:0
以下是程序的输出:
Aim String:buffer size1=0x1234 buffer size2=1024 buffer size3=9999 [src pattern]:buffer size1=VAR{HEX_NUM=>N1} buffer size2=VAR{STRING=>N2} buffer size3=VAR{DEC_NUM=>N3} [dst pattern]:buffer size1=(0x\d{1,}) buffer size2=(.*?) buffer size3=(\d{1,}) [var List] N1 0x1234 N2 1024 N3 9999 本意是分析Aim String,获取size1 size2 size3的数据,并且与3个变量N1 N2 N3关联。 为此定义了一个串在 src pattern中, 首先分析src pattern并得到目标的正则表达式 然后匹配数据,在匹配的过程中将数据与关键字关联。 程序代码如下: class UserStringPattern import java.util.ArrayList; import java.util.List; import java.util.regex.Pattern; public class UserStringPattern { public StringBuffer userPattern; public StringBuffer destPattern; public Pattern pattern; public List<VarValue> varList; public UserStringPattern(StringBuffer userPattern) { super(); this.userPattern = userPattern; this.destPattern = new StringBuffer(userPattern); varList = new ArrayList(); } @Override public String toString() { StringBuffer sb = new StringBuffer(); sb.append("[src pattern]:" + userPattern); sb.append("\n"); sb.append("[dst pattern]:" + destPattern); sb.append("\n"); sb.append("[var List]\n"); for (int i = 0; i < varList.size(); i++) { sb.append("\t" + varList.get(i).toString()); sb.append("\n"); } return sb.toString(); } public String getValueByName(String name) { for (int i = 0; i < varList.size(); i++) { VarValue varValue = varList.get(i); if (varValue.varName.equals(name)) { return varValue.value; } } return ""; } } class RegExpUtil package com.flylb.util; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.regex.Matcher; import java.util.regex.Pattern; import java.util.regex.PatternSyntaxException; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; class VarValue { public String varName; public String value; public VarValue(String varName, String value) { super(); this.varName = varName; this.value = value; } @Override public String toString() { return varName + "\t" + value; } } @SuppressWarnings("unchecked") public class RegExpUtil { private static Log log = LogFactory.getLog(RegExpUtil.class); private static Map<String, String> regExpMap = new HashMap(); static { regExpMap.put("DEC_NUM", "(\\d{1,})"); regExpMap.put("HEX_NUM", "(0x\\d{1,})"); regExpMap.put("STRING", "(.*?)"); } public static boolean creatRegExp(UserStringPattern userStringPattern) { List varList = userStringPattern.varList; StringBuffer sourcePattern = userStringPattern.userPattern; StringBuffer destPattern = userStringPattern.destPattern; varList.clear(); Matcher m = Pattern.compile("VAR\\{(.*?)\\}").matcher(sourcePattern); String varToReplace = null, varExp = null, varName = null, varType = null; String type2Regexp = null; int pos = 0; int offset = 0; while (m.find()) { int start, end; start = m.start(); end = m.end(); varToReplace = sourcePattern.substring(start, end); // System.out.println("varToReplace:" + varToReplace); varExp = m.group(1); pos = varExp.indexOf("=>"); if (pos == -1) return false; varType = varExp.substring(0, pos); varName = varExp.substring(pos + 2); varList.add(new VarValue(varName, null)); // log.info(varType); // log.info(offset); // log.info(start + offset); // log.info(end + offset); // log.info(destPattern); type2Regexp = regExpMap.get(varType); if (type2Regexp == null) { return false; } destPattern.replace(start + offset, end + offset, type2Regexp); offset += type2Regexp.length() - varToReplace.length(); } try { userStringPattern.pattern = Pattern.compile(userStringPattern.destPattern.toString()); return true; } catch (PatternSyntaxException e) { log.info("Pattern error:" + userStringPattern.destPattern.toString()); return false; } } public static void matchPattern(UserStringPattern userStringPattern, String aimString) { if (userStringPattern.pattern == null) { log.info("pattern is null!"); return; } Matcher m = userStringPattern.pattern.matcher(aimString); 0
相关文章
读者评论
发表评论 |