小能豆

Python Data extraction fails

py

Hi I was trying to parse the complex csv. Below is the sample csv:

start/stop,read/write,address,data
 Start,,,
, Write,,
,,22,
,,,0
 Start repeat,,,
, Read,,
,,22,
,,, E8
,,,22
 Stop,,,

I want the output achieved like below fromt the above sample csv:

{
    'Start': {
        'Write': {
            'Address': ['22'],
            'Data': ['0']
        },
        'Read': {
            'Address': ['22'],
            'Data': ['E8', '22']
        }
    },
    'Start repeat': {
        'Read': {
            'Address': ['22'],
            'Data': []
        }
    }
}

Logic tried:

import pandas as pd
import csv

def extract_start_stop_lines(input_csv_path, output_txt_path):
    # Read the original CSV file into a DataFrame
    df = pd.read_csv(input_csv_path)

    # Extract the desired columns
    selected_columns = ['start/stop', 'read/write', 'address', 'data']
    selected_data = df[selected_columns]

    # Display header information
    header = selected_data.columns.tolist()
    print("Header:", header)

    # Write header information to the output text file
    with open(output_txt_path, 'w', newline='', encoding='utf-8') as outputfile:
        outputfile.write(f"Header: {', '.join(header)}\n")

        # Iterate through rows in the DataFrame
        for _, row in selected_data.iterrows():
            # Extract relevant information
            start_stop_value = str(row['start/stop']).strip() if pd.notna(row['start/stop']) else None
            operation = str(row['read/write']).strip() if pd.notna(row['read/write']) else None
            address = str(row['address']).strip() if pd.notna(row['address']) else None
            data = str(row['data']).strip() if pd.notna(row['data']) else None

            # Display extracted information
            print(f"Start/Stop: {start_stop_value}, Operation: {operation}, Address: {address}, Data: {data}")

            # Write the information to the output text file
            outputfile.write(f"Start/Stop: {start_stop_value}, Operation: {operation}, Address: {address}, Data: {data}\n")

# Example usage:
input_csv_path = 'C:\\Test\\test.csv'
output_txt_path = 'start_stop_lines.txt'
extract_start_stop_lines(input_csv_path, output_txt_path)

After the above code new text file will be generated here are the sample lines of files:

Header: start/stop, read/write, address, data
Start/Stop: Start, Operation: None, Address: None, Data: None
Start/Stop: None, Operation: Write, Address: None, Data: None
Start/Stop: None, Operation: None, Address: 22, Data: None
Start/Stop: None, Operation: None, Address: None, Data: 0
Start/Stop: Start repeat, Operation: None, Address: None, Data: None
Start/Stop: None, Operation: Read, Address: None, Data: None
Start/Stop: None, Operation: None, Address: 22, Data: None
Start/Stop: None, Operation: None, Address: None, Data: E8
Start/Stop: None, Operation: None, Address: None, Data: 22
Start/Stop: Stop, Operation: None, Address: None, Data: None
Start/Stop: Start, Operation: None, Address: None, Data: None
Start/Stop: None, Operation: Write, Address: None, Data: None
Start/Stop: None, Operation: None, Address: 22, Data: None
Start/Stop: None, Operation: None, Address: None, Data: 2
Start/Stop: None, Operation: None, Address: None, Data: FF
Start/Stop: None, Operation: None, Address: None, Data: FF
Start/Stop: Stop, Operation: None, Address: None, Data: None

Usually what i want to achieve here is whenever there is “Start” then find its operation whether it is “Write” or “Read” and its address and then its data values.


阅读 100

收藏
2023-12-16

共1个答案

小能豆

To achieve the desired output, you can iterate through the DataFrame and build a nested dictionary structure based on the “Start” and “Stop” events. Here’s a modified version of your code to achieve this:

import pandas as pd

def extract_start_stop_lines(input_csv_path):
    # Read the original CSV file into a DataFrame
    df = pd.read_csv(input_csv_path)

    # Create a dictionary to store the extracted information
    result_dict = {}
    current_start_stop = None
    current_operation = None

    # Iterate through rows in the DataFrame
    for _, row in df.iterrows():
        # Extract relevant information
        start_stop_value = str(row['start/stop']).strip() if pd.notna(row['start/stop']) else None
        operation = str(row['read/write']).strip() if pd.notna(row['read/write']) else None
        address = str(row['address']).strip() if pd.notna(row['address']) else None
        data = str(row['data']).strip() if pd.notna(row['data']) else None

        # If "Start" is encountered, update the current start/stop and operation
        if start_stop_value == 'Start':
            current_start_stop = {}
            current_operation = None

        # If "Stop" is encountered, reset the current start/stop and operation
        elif start_stop_value == 'Stop':
            if current_start_stop:
                result_dict[current_start_stop['Start']] = current_start_stop
                current_start_stop = None
                current_operation = None

        # If an operation is encountered (Write/Read), update the current operation
        elif operation:
            current_operation = {
                'Address': [address] if address else [],
                'Data': [data] if data else []
            }

            if operation not in current_start_stop:
                current_start_stop[operation] = current_operation
            else:
                current_start_stop[operation]['Address'].extend(current_operation['Address'])
                current_start_stop[operation]['Data'].extend(current_operation['Data'])

    return result_dict

# Example usage:
input_csv_path = 'C:\\Test\\test.csv'
output_dict = extract_start_stop_lines(input_csv_path)
print(output_dict)

This code should produce a nested dictionary (output_dict) with the desired structure. Note that this code assumes that the “Start” and “Stop” events are properly paired in the CSV file. If the CSV file has a different structure or there are other considerations, you might need to adjust the code accordingly.

2023-12-16