Summary
Sure! Here's the modified code to apply the tag_issues function directly on RTSK Worknote without grouping by RTSK Number. This way, each worknote will be tagged individually:
python
import pandas as pd
import numpy as np
from bs4 import BeautifulSoup
# Assume 'dataset' holds the input data for this script
df = dataset
# Convert RTSK Worknote to string
df["RTSK Worknote"] = df["RTSK Worknote"].astype(str)
# Function to parse HTML content
def parse_html(content):
if '<' in content and '>' in content:
soup = BeautifulSoup(content, 'html.parser')
return soup.get_text(separator="", strip=True)
return content
# Function to tag issues in the worknote
def tag_issues(text, issue_keywords):
for keyword in issue_keywords:
if keyword in text:
return keyword
return "No issue"
# List of issue keywords
issue_keywords = [
"HSPASS related issue",
"Restricted tenant related issue",
"HW issue",
"Network related issue"
]
# Apply HTML parsing to RTSK Worknote
df['RTSK Worknote'] = df['RTSK Worknote'].apply(parse_html)
# Apply the tag_issues function to RTSK Worknote
df['Issue Tag'] = df['RTSK Worknote'].apply(lambda x: tag_issues(x, issue_keywords))
# Display the updated DataFrame
print(df.head())
# The final dataset
dataset = df
This modified code directly applies the tag_issues function to each RTSK Worknote after parsing the HTML content, and stores the results in a new column named Issue Tag.
No comments