Python Data extraction fails

小能豆

Python Data extraction fails

Hi I was trying to parse the complex csv. Below is the sample csv:

start/stop,read/write,address,data
 Start,,,
, Write,,
,,22,
,,,0
 Start repeat,,,
, Read,,
,,22,
,,, E8
,,,22
 Stop,,,

I want the output achieved like below fromt the above sample csv:

{
    'Start': {
        'Write': {
            'Address': ['22'],
            'Data': ['0']
        },
        'Read': {
            'Address': ['22'],
            'Data': ['E8', '22']
        }
    },
    'Start repeat': {
        'Read': {
            'Address': ['22'],
            'Data': []
        }
    }
}

Logic tried:

import pandas as pd
import csv

def extract_start_stop_lines(input_csv_path, output_txt_path):
    # Read the original CSV file into a DataFrame
    df = pd.read_csv(input_csv_path)

    # Extract the desired columns
    selected_columns = ['start/stop', 'read/write', 'address', 'data']
    selected_data = df[selected_columns]

    # Display header information
    header = selected_data.columns.tolist()
    print("Header:", header)

    # Write header information to the output text file
    with open(output_txt_path, 'w', newline='', encoding='utf-8') as outputfile:
        outputfile.write(f"Header: {', '.join(header)}\n")

        # Iterate through rows in the DataFrame
        for _, row in selected_data.iterrows():
            # Extract relevant information
            start_stop_value = str(row['start/stop']).strip() if pd.notna(row['start/stop']) else None
            operation = str(row['read/write']).strip() if pd.notna(row['read/write']) else None
            address = str(row['address']).strip() if pd.notna(row['address']) else None
            data = str(row['data']).strip() if pd.notna(row['data']) else None

            # Display extracted information
            print(f"Start/Stop: {start_stop_value}, Operation: {operation}, Address: {address}, Data: {data}")

            # Write the information to the output text file
            outputfile.write(f"Start/Stop: {start_stop_value}, Operation: {operation}, Address: {address}, Data: {data}\n")

# Example usage:
input_csv_path = 'C:\\Test\\test.csv'
output_txt_path = 'start_stop_lines.txt'
extract_start_stop_lines(input_csv_path, output_txt_path)

After the above code new text file will be generated here are the sample lines of files:

Header: start/stop, read/write, address, data
Start/Stop: Start, Operation: None, Address: None, Data: None
Start/Stop: None, Operation: Write, Address: None, Data: None
Start/Stop: None, Operation: None, Address: 22, Data: None
Start/Stop: None, Operation: None, Address: None, Data: 0
Start/Stop: Start repeat, Operation: None, Address: None, Data: None
Start/Stop: None, Operation: Read, Address: None, Data: None
Start/Stop: None, Operation: None, Address: 22, Data: None
Start/Stop: None, Operation: None, Address: None, Data: E8
Start/Stop: None, Operation: None, Address: None, Data: 22
Start/Stop: Stop, Operation: None, Address: None, Data: None
Start/Stop: Start, Operation: None, Address: None, Data: None
Start/Stop: None, Operation: Write, Address: None, Data: None
Start/Stop: None, Operation: None, Address: 22, Data: None
Start/Stop: None, Operation: None, Address: None, Data: 2
Start/Stop: None, Operation: None, Address: None, Data: FF
Start/Stop: None, Operation: None, Address: None, Data: FF
Start/Stop: Stop, Operation: None, Address: None, Data: None

Usually what i want to achieve here is whenever there is “Start” then find its operation whether it is “Write” or “Read” and its address and then its data values.

阅读 100

2023-12-16

共1个答案

小能豆

To achieve the desired output, you can iterate through the DataFrame and build a nested dictionary structure based on the “Start” and “Stop” events. Here’s a modified version of your code to achieve this:

import pandas as pd

def extract_start_stop_lines(input_csv_path):
    # Read the original CSV file into a DataFrame
    df = pd.read_csv(input_csv_path)

    # Create a dictionary to store the extracted information
    result_dict = {}
    current_start_stop = None
    current_operation = None

    # Iterate through rows in the DataFrame
    for _, row in df.iterrows():
        # Extract relevant information
        start_stop_value = str(row['start/stop']).strip() if pd.notna(row['start/stop']) else None
        operation = str(row['read/write']).strip() if pd.notna(row['read/write']) else None
        address = str(row['address']).strip() if pd.notna(row['address']) else None
        data = str(row['data']).strip() if pd.notna(row['data']) else None

        # If "Start" is encountered, update the current start/stop and operation
        if start_stop_value == 'Start':
            current_start_stop = {}
            current_operation = None

        # If "Stop" is encountered, reset the current start/stop and operation
        elif start_stop_value == 'Stop':
            if current_start_stop:
                result_dict[current_start_stop['Start']] = current_start_stop
                current_start_stop = None
                current_operation = None

        # If an operation is encountered (Write/Read), update the current operation
        elif operation:
            current_operation = {
                'Address': [address] if address else [],
                'Data': [data] if data else []
            }

            if operation not in current_start_stop:
                current_start_stop[operation] = current_operation
            else:
                current_start_stop[operation]['Address'].extend(current_operation['Address'])
                current_start_stop[operation]['Data'].extend(current_operation['Data'])

    return result_dict

# Example usage:
input_csv_path = 'C:\\Test\\test.csv'
output_dict = extract_start_stop_lines(input_csv_path)
print(output_dict)

This code should produce a nested dictionary (output_dict) with the desired structure. Note that this code assumes that the “Start” and “Stop” events are properly paired in the CSV file. If the CSV file has a different structure or there are other considerations, you might need to adjust the code accordingly.

2023-12-16