Hi I was trying to parse the complex csv. Below is the sample csv:
start/stop,read/write,address,data
Start,,,
, Write,,
,,22,
,,,0
Start repeat,,,
, Read,,
,,22,
,,, E8
,,,22
Stop,,,
I want the output achieved like below fromt the above sample csv:
{
'Start': {
'Write': {
'Address': ['22'],
'Data': ['0']
},
'Read': {
'Address': ['22'],
'Data': ['E8', '22']
}
},
'Start repeat': {
'Read': {
'Address': ['22'],
'Data': []
}
}
}
Logic tried:
import pandas as pd
import csv
def extract_start_stop_lines(input_csv_path, output_txt_path):
# Read the original CSV file into a DataFrame
df = pd.read_csv(input_csv_path)
# Extract the desired columns
selected_columns = ['start/stop', 'read/write', 'address', 'data']
selected_data = df[selected_columns]
# Display header information
header = selected_data.columns.tolist()
print("Header:", header)
# Write header information to the output text file
with open(output_txt_path, 'w', newline='', encoding='utf-8') as outputfile:
outputfile.write(f"Header: {', '.join(header)}\n")
# Iterate through rows in the DataFrame
for _, row in selected_data.iterrows():
# Extract relevant information
start_stop_value = str(row['start/stop']).strip() if pd.notna(row['start/stop']) else None
operation = str(row['read/write']).strip() if pd.notna(row['read/write']) else None
address = str(row['address']).strip() if pd.notna(row['address']) else None
data = str(row['data']).strip() if pd.notna(row['data']) else None
# Display extracted information
print(f"Start/Stop: {start_stop_value}, Operation: {operation}, Address: {address}, Data: {data}")
# Write the information to the output text file
outputfile.write(f"Start/Stop: {start_stop_value}, Operation: {operation}, Address: {address}, Data: {data}\n")
# Example usage:
input_csv_path = 'C:\\Test\\test.csv'
output_txt_path = 'start_stop_lines.txt'
extract_start_stop_lines(input_csv_path, output_txt_path)
After the above code new text file will be generated here are the sample lines of files:
Header: start/stop, read/write, address, data
Start/Stop: Start, Operation: None, Address: None, Data: None
Start/Stop: None, Operation: Write, Address: None, Data: None
Start/Stop: None, Operation: None, Address: 22, Data: None
Start/Stop: None, Operation: None, Address: None, Data: 0
Start/Stop: Start repeat, Operation: None, Address: None, Data: None
Start/Stop: None, Operation: Read, Address: None, Data: None
Start/Stop: None, Operation: None, Address: 22, Data: None
Start/Stop: None, Operation: None, Address: None, Data: E8
Start/Stop: None, Operation: None, Address: None, Data: 22
Start/Stop: Stop, Operation: None, Address: None, Data: None
Start/Stop: Start, Operation: None, Address: None, Data: None
Start/Stop: None, Operation: Write, Address: None, Data: None
Start/Stop: None, Operation: None, Address: 22, Data: None
Start/Stop: None, Operation: None, Address: None, Data: 2
Start/Stop: None, Operation: None, Address: None, Data: FF
Start/Stop: None, Operation: None, Address: None, Data: FF
Start/Stop: Stop, Operation: None, Address: None, Data: None
Usually what i want to achieve here is whenever there is “Start” then find its operation whether it is “Write” or “Read” and its address and then its data values.
To achieve the desired output, you can iterate through the DataFrame and build a nested dictionary structure based on the “Start” and “Stop” events. Here’s a modified version of your code to achieve this:
import pandas as pd
def extract_start_stop_lines(input_csv_path):
# Read the original CSV file into a DataFrame
df = pd.read_csv(input_csv_path)
# Create a dictionary to store the extracted information
result_dict = {}
current_start_stop = None
current_operation = None
# Iterate through rows in the DataFrame
for _, row in df.iterrows():
# Extract relevant information
start_stop_value = str(row['start/stop']).strip() if pd.notna(row['start/stop']) else None
operation = str(row['read/write']).strip() if pd.notna(row['read/write']) else None
address = str(row['address']).strip() if pd.notna(row['address']) else None
data = str(row['data']).strip() if pd.notna(row['data']) else None
# If "Start" is encountered, update the current start/stop and operation
if start_stop_value == 'Start':
current_start_stop = {}
current_operation = None
# If "Stop" is encountered, reset the current start/stop and operation
elif start_stop_value == 'Stop':
if current_start_stop:
result_dict[current_start_stop['Start']] = current_start_stop
current_start_stop = None
current_operation = None
# If an operation is encountered (Write/Read), update the current operation
elif operation:
current_operation = {
'Address': [address] if address else [],
'Data': [data] if data else []
}
if operation not in current_start_stop:
current_start_stop[operation] = current_operation
else:
current_start_stop[operation]['Address'].extend(current_operation['Address'])
current_start_stop[operation]['Data'].extend(current_operation['Data'])
return result_dict
# Example usage:
input_csv_path = 'C:\\Test\\test.csv'
output_dict = extract_start_stop_lines(input_csv_path)
print(output_dict)
This code should produce a nested dictionary (output_dict
) with the desired structure. Note that this code assumes that the “Start” and “Stop” events are properly paired in the CSV file. If the CSV file has a different structure or there are other considerations, you might need to adjust the code accordingly.