分享

VBA基础知识整理(正则表达式)

 hdzgx 2019-10-26

'一 正则表达式

   '正则表达式是处理字符串的外部工具,它可以根据设置的字符串对比规则,进行字符串的对比、替换等操作。
   
   '正则表达式的作用:
     '1、完成复杂的字符串判断
     '2、在字符串判断时,可以最大限度的避开循环,从而达到提高运行效率的目的。

'二 使用方法

'1、引用法

   '点击VBE编辑器菜单:工具 - 引用,选取: Microsoft VBScript Regular Expressions 5.5,引用后在程序开始进行如下声明
     'Dim regex As New RegExp
     Sub t1()
       Dim reg As New RegExp
     End Sub

'2、直接建法

'     Dim regex As Object
'     Set regex = CreateObject("VBScript.RegExp") '创建正则对象

     Sub t2()
       Dim reg As Object
       Set reg = CreateObject("VBScript.RegExp")
     End Sub

三 常用属性

'1 Global属性:

       '如果值为true,则搜索全部字符
       '如果值为False,则搜索到第1个即停止
       '1 例:
       Sub t3()
             Dim reg As New RegExp
             Dim sr
             sr = "ABCEA"
             With reg
               .Global = True
               .Pattern = "A"
               Debug.Print .Replace(sr, "")
             End With
       End Sub

'2 IgnoreCase 属性
'如果搜索是区分大小写的,为False(缺省值)True不分

'3 Pattern 属性
’ 一个字符串,用来定义正则表达式。缺省值为空文本。
'4 Multiline 属性,字符串是不是使用了多行,如果是多行,$适用于每一行的最后一个

       Sub t4()
         Dim reg As New RegExp
         Dim sr
         sr = "AEA" & Chr(10) & "ABCA"
         With reg
           .Global = True
           .MultiLine = True
           '.Pattern = "A$"
           .Pattern = "^A"
           Debug.Print .Replace(sr, "")
         End With
       End Sub

'5 Execute 方法

         '返回一个 MatchCollection 对象,该对象包含每个成功匹配的 Match 对象,
         '返回的信息包括:
           'FirstIndex:开始位置
           'Length; 长度
           'Value:长度
       Sub t5()
             Dim reg As New RegExp
             Dim sr, matc
             sr = "A454BCEA5"
             
             With reg
               .Global = True
               .Pattern = "A\d+"
               Set matc = .Execute(sr)
             End With
             
             Stop
       End Sub
     
       Function ns(rg)
             Dim reg As New RegExp
             Dim sr, ma, s, m, x
             
             With reg
               .Global = True
               .Pattern = "\d*\.?\d*"
               Set ma = .Execute(rg)
             
               For Each m In ma
                  s = s + Val(m)
               Next m
               
             End With
             
            ns = s
           ' Stop
       End Function

'6、Text方法

        '返回一个布尔值,该值指示正则表达式是否与字符串成功匹配。其实就是判断两个字符串是否匹配成功
        Sub t7()
             Dim reg As New RegExp
             Dim sr
             sr = "BCR6EA"
             
             With reg
               .Global = True
               .Pattern = "\d+"
               If .test(sr) Then MsgBox "字符串中含有数字"
             End With
             
        End Sub
        ----------------------------------------------------


Function 提取中文(rg As String, k As Integer)

      Dim regx As New RegExp
      With regx
	       .Global = True
	       If k = 1 Then
	       		.Pattern = "\D"
	       ElseIf k = 2 Then
	       		.Pattern = "\w"
	       End If
	       
	       提取中文 = .Replace(rg, "")
      End With

End Function

常用符号

'正则表达式的核心是设置对比的规则,也就是设置Pattern属性,而组成这些规则除了字符本身以外,是具有特定含义的符号。
'下面介绍的是正规表达式中常用符号的第一部分。

'\号

'1.放在不便书写的字符前面,如换行符(\r),回车符(\n),制表符(\t),\自身(\)

'2.放在有特殊意义字符的前面,表示它自身,"$","^","."

'3.放在可以匹配多个字符的前面

       '\d 0~9的数字
       '\w 任意一个字母或数字或下划线,也就是 A~Z,a~z,0~9,_ 中任意一个
       '\s 包括空格、制表符、换页符等空白字符的其中任意一个
       
       '以上改为大写时,为相反的意思,如\D 表示非数字类型
       
        Sub t1()
           Dim regx As New RegExp
           Dim sr
           sr = "AE45B646C"
           
           With regx
             .Global = True
             .Pattern = "\d" '排除非数字
             Debug.Print .Replace(sr, "")
           End With
           
        End Sub

'.(点)

   '可以匹配除换行符以外的所有字符

'+号
'+表示一个字符可以有任意多个重复的。

   Sub t11()
     Dim regx As New RegExp
     Dim sr
     sr = "A234CA7A"
     With regx
      .Global = True
      .Pattern = "A\d+"
      Debug.Print .Replace(sr, "")
     End With
     
   End Sub

'{}号
'可以设置重复次数

    '1 {n} 重复n次
        Sub t16()
           Dim regx As New RegExp
           Dim sr
           sr = "A234CA7A67"
           
           With regx
            .Global = True
            .Pattern = "\d{5}" '连续两个数字
            Debug.Print .Replace(sr, "")
           End With
           
         End Sub
         ---------------------------------------------------
   '2  {m,n}最小重复m次,最多重复n次
     
        Sub t22()
           Dim regx As New RegExp
           Dim sr
           sr = "A234CA7A6789"
           With regx
            .Global = True
            .Pattern = "\d{4,5}" '连续两个数字或连续三个数字
            Debug.Print .Replace(sr, "")
           End With
         End Sub
         ------------------------------------------------------------
    '3 {m,} 最少重复m次,相当于+
         Sub t23()
           Dim regx As New RegExp
           Dim sr
           sr = "A2348t6CA7A67"
           
           With regx
            .Global = True
            .Pattern = "\d{2,}" '连续两个数字或连续三个数字
            Debug.Print .Replace(sr, "")
           End With
           
         End Sub

'* 可以出现0等任意次 相当于 {0,},比如:"^*b"可以匹配 “b”,"^^^b"…

’ ?
'1 匹配表达式0次或者1次,相当于 {0,1},比如:"a[cd]?"可以匹配 “a”,“ac”,“ad”

        Sub t24()
           Dim regx As New RegExp
           Dim sr
           sr = "A23.48CA7A6..7"
           With regx
            .Global = True
            .Pattern = "\d+\.?\d+" '最多连续1个
            Debug.Print .Replace(sr, "")
           End With
         End Sub
         ----------------------------------------------------

'2 利用+?的格式可以分段匹配

      Sub t87()
        Dim regex As New RegExp
        Dim sr, mat, m
        sr = "<td><p>aa</p></td> <td><p>bb</p></td>"
        
        With regex
          .Global = True
          .Pattern = "<td>.*?</td>"
          Set mat = .Execute(sr)
         
          For Each m In mat
            Debug.Print m
          Next m
          
        End With
        
      End Sub
      --------------------------------------------------------
     Sub t88()
              
        Dim regex As New RegExp
        Dim sr, mat, m
        sr = " aba  aca  ada "
        With regex
        
          .Global = True
          .Pattern = "\s.+?\s"
          Set mat = .Execute(sr)
         
          For Each m In mat
            Debug.Print m
          Next m
          
        End With

     End Sub

'符号:限制的字符在最前面,如\d表示以数字开头

Sub T34()
    Dim regex As New RegExp
    Dim sr, mat, m
    sr = "d234我345d43"
    With regex
      .Global = True
      .Pattern = "^\d*"
       Set mat = .Execute(sr)
        
        For Each m In mat
          Debug.Print m
        Next m
        
    End With
  End Sub

'$符号:限制的字符在最后面,如 A$表示最后一个字符是A

Sub T3433()
    Dim regex As New RegExp
    Dim sr, mat, m
    sr = "R243r"
    
    With regex
       .Global = True
       .Pattern = "^\D.*\D$"
        Set mat = .Execute(sr)
        
        For Each m In mat
          Debug.Print m
        Next m
        
    End With
    
  End Sub

'\b
'空格(包含开头和结尾)

    Sub t26()
       Dim regx As New RegExp
       Dim sr
       sr = "A12dA56 A4"
       
       With regx
        .Global = True
        .Pattern = "\bA\d+"
        Debug.Print .Replace(sr, "")
       End With
       
    End Sub
  --------------------------------------------------
Sub T272()
    Dim regex As New RegExp
    Dim sr, mat, m
    sr = "ad bf cr de ee"
    With regex
      .Global = True
       .Pattern = ".+?\b"
        Set mat = .Execute(sr)
        For Each m In mat
          If m <> " " Then Debug.Print m
        Next m
    End With
  End Sub

'|
'可以设置两个条件,匹配左边或右边的

  Sub t27()
       Dim regx As New RegExp
       Dim sr
       sr = "A12DA56 A4B34D"
       
       With regx
        .Global = True
        .Pattern = "A\d+|B\d+"
        Debug.Print .Replace(sr, "")
       End With
       
    End Sub

'\un 匹配 n,其中 n 是以四位十六进制数表示的 Unicode 字符。
'汉字一的编码是4e00,最后一个代码是9fa5

 Sub t2722()
       Dim regx As New RegExp
       Dim sr
       sr = "A12d我A爱56你 A4"
       
       With regx
        .Global = True
        .Pattern = "[\u4e00-\u9fa5]"
        Debug.Print .Replace(sr, "")
       End With
       
    End Sub

'()
'可以让括号内作为一个整体产生重复

    Sub t29()
       Dim regx As New RegExp
       Dim sr
       sr = "A3A3QA3A37BDFE87A8"
       With regx
        .Global = True
        .Pattern = "((A3){2})" '相当于A3A3
        Debug.Print .Replace(sr, "")
       End With
       
    End Sub

'取匹配结果的时候,括号中的表达式可以用 \数字引用

    Sub t30()
       Dim regx As New RegExp
       Dim sr
       sr = "A3A3QA3A37BDFE87A8"
       With regx
        .Global = True
        .Pattern = "((A3){2})Q\1"
        Debug.Print .Replace(sr, "")
       End With
       
    End Sub
    -----------------------------------------
    Sub t31()
       Dim regx As New RegExp
       Dim sr
       sr = "A3A3B4B4QB4B47BDFE87A8"
       
       With regx
        .Global = True
        .Pattern = "((A3){2})((B4){2})Q\4"
        Debug.Print .Replace(sr, "")
       End With
       
    End Sub

'用(?=字符)可以先进行预测查找,到一个匹配项后,将在匹配文本之前开始搜索下一个匹配项。 不会保存匹配项以备将来之用。

 '例:截取某个字符之前的数据
      Sub t343()
        Dim regex As New RegExp
        Dim sr, mat, m
        sr = "100元8000元57元"
        
        With regex
           .Global = True
           .Pattern = "\d+(?=元)" '查找任意多数字后的元,查找到后从元以前开始查找(因为元前的数字已被使用,
                                  '所以只能从元开始查找)匹配 ()后面的,因为后面没有设置,所以只显示前面的数字,元不再显示
            Set mat = .Execute(sr)
            
            For Each m In mat
              Debug.Print m
            Next m
            
        End With
      End Sub
      -----------------------------------------
   '例:验证密码,条件是4-8位,必须包含一个数字
      Sub t355()
        Dim regex As New RegExp
        Dim sr, mat, m
        sr = "A8ayaa"
        With regex
          .Global = True
           .Pattern = "^(?=.*\d).{4,8}$"
            Set mat = .Execute(sr)
            For Each m In mat
              Debug.Print m
            Next m
        End With
      End Sub

'用(?!字符)可以先进行负预测查找,到一个匹配项后,将在匹配文本之前开始搜索下一个匹配项。 不会保存匹配项以备将来之用。

Sub t356()
    Dim regex As New RegExp
    Dim sr, mat, m
    sr = "中国建筑集团公司"
    
    With regex
      .Global = True
       .Pattern = "^(?!中国).*"
        Set mat = .Execute(sr)
        
        For Each m In mat
          Debug.Print m
        Next m
        
    End With
End Sub

'()与|一起使用可以表示or

  Sub t344()
    Dim regex As New RegExp
    Dim sr, mat, m
    sr = "100元800块7元"
    With regex
      .Global = True
       .Pattern = "\d+(元|块)"
       '.Pattern = "\d+(?=元|块)"
        Set mat = .Execute(sr)
        For Each m In mat
          Debug.Print m
        Next m
    End With
  End Sub

'[]
'使用方括号 [ ] 包含一系列字符,能够匹配其中任意一个字符。用 [^ ] 不包含一系列字符,
'则能够匹配其中字符之外的任意一个字符。同样的道理,虽然可以匹配其中任意一个,但是只能是一个,不是多个

'1 和括号内的其中一个匹配

 Sub t29()
       Dim regx As New RegExp
       Dim sr
       sr = "ABDC"
       
       With regx
            .Global = True
            .Pattern = "[BC]"
            Debug.Print .Replace(sr, "")
       End With
       
End Sub

'2 非括号内的字符

    Sub T35()
       Dim regx As New RegExp
       Dim sr
       sr = "ABCDBDC"
       
       With regx
            .Global = True
            .Pattern = "[^BC]"
            Debug.Print .Replace(sr, "")
       End With
       
    End Sub

'3 在一个区间

Sub t38()
       Dim regx As New RegExp
       Dim sr
       sr = "ABCDGWDFUFE"
       With regx
        .Global = True
        .Pattern = "[a-h]"
        Debug.Print .Replace(sr, "")
       End With
       
End Sub
----------------------------------------------
 Sub t40()
       Dim regx As New RegExp
       Dim sr
       sr = "124325436789"
       
       With regx
            .Global = True
            .Pattern = "[1-47-9]"
            Debug.Print .Replace(sr, "")
       End With
       
End Sub

    本站是提供个人知识管理的网络存储空间,所有内容均由用户发布,不代表本站观点。请注意甄别内容中的联系方式、诱导购买等信息,谨防诈骗。如发现有害或侵权内容,请点击一键举报。
    转藏 分享 献花(0

    0条评论

    发表

    请遵守用户 评论公约

    类似文章 更多