在c#中使用正则表达式进行匹配,有时候我们会遇到这种情况,cpu使用率100%,但是正则表达式并没有异常抛出,正则一直处于匹配过程中,这将导致系统资源被耗尽,应用程序被卡住,这是由于正则不完全匹配,而且Regex中没有Timeout属性,使正则处理器陷入了死循环。 这种情况尤其可能发生在对非可靠的被匹配对象的匹配过程中,例如在我的个人网站www.eahan.com项目中,对多个网站页面的自动采集匹配,就经常发生该问题。为了避免资源耗尽的情况发生,我写了一个AsynchronousRegex类,顾名思义,异步的Regex。给该类一个设置一个Timeout属性,将Regex匹配的动作置于单独的线程中,AsynchronousRegex监控Regex匹配超过Timeout限定时销毁线程。
using System;
using System.Text.RegularExpressions; using System.Threading;
namespace LZT.Eahan.Common { public class AsynchronousRegex { private MatchCollection mc; private int _timeout; // 最长休眠时间(超时),毫秒 private int sleepCounter; private int sleepInterval; // 休眠间隔,毫秒 private bool _isTimeout;
public bool IsTimeout { get {return this._isTimeout;} }
public AsynchronousRegex(int timeout) { this._timeout = timeout; this.sleepCounter = 0; this.sleepInterval = 100; this._isTimeout = false;
this.mc = null; }
public MatchCollection Matchs(Regex regex, string input) { Reg r = new Reg(regex, input); r.OnMatchComplete += new Reg.MatchCompleteHandler(this.MatchCompleteHandler); Thread t = new Thread(new ThreadStart(r.Matchs)); t.Start();
this.Sleep(t);
t = null; return mc; }
private void Sleep(Thread t) { if (t != null && t.IsAlive) { Thread.Sleep(TimeSpan.FromMilliseconds(this.sleepInterval)); this.sleepCounter ++; if (this.sleepCounter * this.sleepInterval >= this._timeout) { t.Abort(); this._isTimeout = true; } else { this.Sleep(t); } } }
private void MatchCompleteHandler(MatchCollection mc) { this.mc = mc; }
class Reg { internal delegate void MatchCompleteHandler(MatchCollection mc); internal event MatchCompleteHandler OnMatchComplete;
public Reg(Regex regex, string input) { this._regex = regex; this._input = input; }
private string _input; public string Input { get {return this._input;} set {this._input = value;} }
private Regex _regex; public Regex Regex { get {return this._regex;} set {this._regex = value;} }
internal void Matchs() { MatchCollection mc = this._regex.Matches(this._input); if (mc != null && mc.Count > 0) // 这里有可能造成cpu资源耗尽 { this.OnMatchComplete(mc); } } } } } |