小能豆

使用正则表达式替换文件数据

py

我已经编写了一个正则表达式来选择我想要删除的精确文本,但是我无法使用当前代码实现它。

以下是三个主要功能

HOSTNAME_RE = re.compile(r'hostname +(\S+)')
def get_file_info_from_lines(filename, file_lines):
    hostname = None
    a_hash = hashlib.sha1()
    for line in file_lines:
        a_hash.update(line.encode('utf-8'))
        match = HOSTNAME_RE.match(line)
        if match:
            hostname = match.group(1)
    return hostname, filename, a_hash.hexdigest()

def get_file_info(filename):
    if filename.endswith(('.cfg', '.startup', '.confg')):
        with open(filename, "r+") as in_file:
            #filename = re.sub(REMOVE_RE, subst, filename, 0, re.MULTILINE)
            return get_file_info_from_lines(filename, in_file.readlines())

def hostname_parse(directory):
    results = {}
    i = 0
    l = len(os.listdir(directory))
    for filename in os.listdir(directory):
        filename = os.path.join(directory, filename)
        sleep(0.001)
        i += 1
        progress_bar(i, l, prefix = 'Progress:', suffix = 'Complete', barLength = 50)
        info = get_file_info(filename)
        if info is not None:
            results[info[0]] = info
    return results

这是用于查找要删除的字符串的正则表达式。

REMOVE_RE = r"((?:\bCurrent configuration)(?:.*\n?){6})"
subst = ""

示例_文件_在_数据_删除之前:

Building configuration...

Current configuration : 45617 bytes
!
! Last configuration change at 00:22:36 UTC Sun Jan 22 2017 by user
! NVRAM config last updated at 00:22:43 UTC Sun Jan 22 2017 by user
!
version 15.0
no service pad
!
no logging console
enable secret 5 ***encrypted password***
!
username admin privilege 15 password 7 ***encrypted password***
username sadmin privilege 15 secret 5 ***encrypted password***
aaa new-model
!
ip ftp username ***encrypted password***
ip ftp password 7 ***encrypted password***
ip ssh version 2
!
line con 0
 password 7 ***encrypted password***
 login authentication maint
line vty 0 4
 password 7 ***encrypted password***
 length 0
 transport input ssh
line vty 5 15
 password 7 ***encrypted password***
 transport input ssh
!

数据删除后文件示例:

Building configuration...

!
no service pad
!
no logging console
enable 
!
username admin privilege 15 
username gisadmin privilege 15 
aaa new-model
!
ip ftp username cfgftp
ip ftp 
ip ssh version 2
!
line con 0

 login authentication maint
line vty 0 4

 length 0
 transport input ssh
line vty 5 15

 transport input ssh
!

我尝试在 get_file_info 和 get_file_info_from_lines 中执行类似 #filename = re.sub(REMOVE_RE, subst, filename, 0, re.MULTILINE) 的操作,但显然我没有正确执行它。

由于我刚刚学习,因此任何帮助都将不胜感激。

运行比较:

results1 = hostname_parse('test1.txt')
results2 = hostname_parse('test2.txt')



for hostname, filename, filehash in results1.values():
    if hostname in results2:
        _, filename2, filehash2 = results2[hostname]
        if filehash != filehash2:
            print("%s has a change (%s, %s)" % (
                hostname, filehash, filehash2))
            print(filename)
            print(filename2)
            print()

我不想修改当前文件。如果所有这些都可以在内存或临时文件中完成,那就太好了。

完整代码:

import hashlib
import os
import re


HOSTNAME_RE = re.compile(r'hostname +(\S+)')
REMOVE_RE = re.compile(r"((?:\bCurrent configuration)(?:.*\n?){6})")


def get_file_info_from_lines(filename, file_lines):
    hostname = None
    a_hash = hashlib.sha1()
    for line in file_lines:
        #match = HOSTNAME_RE.match(line)
        if not re.match(REMOVE_RE, line):
            a_hash.update(line.encode('utf-8'))
        #=======================================================================
        # if match:
        #     hostname = match.group(1)
        #=======================================================================
    return hostname, filename, a_hash.hexdigest()

def get_file_info(filename):
    if filename.endswith(('.cfg', '.startup', '.confg')):
        with open(filename, "r+") as in_file:
            return get_file_info_from_lines(filename, in_file.readlines())

def hostname_parse(directory):
    results = {}
    for filename in os.listdir(directory):
        filename = os.path.join(directory, filename)
        info = get_file_info(filename)
        if info is not None:
            results[info[0]] = info
    return results


results1 = hostname_parse('test1') #Directory of test files
results2 = hostname_parse('test2') #Directory of test files 2



for hostname, filename, filehash in results1.values():
    if hostname in results2:
        _, filename2, filehash2 = results2[hostname]
        if filehash != filehash2:
            print("%s has a change (%s, %s)" % (
                hostname, filehash, filehash2))
            print(filename)
            print(filename2)
            print()

阅读 14

收藏
2024-12-17

共1个答案

小能豆

我找到了一种绕过正则表达式的方法。我只需通过匹配行来删除行即可。

def get_file_info_from_lines(filename, file_lines):
    hostname = None
    a_hash = hashlib.sha1()
    for line in file_lines:
        if "! Last " in line:
            line = ''
        if "! NVRAM " in line:
            line = ''
        a_hash.update(line.encode('utf-8'))
        match = HOSTNAME_RE.match(line)
        if match:
            hostname = match.group(1)
2024-12-17