Summary

 Sure! Here's the modified code to apply the tag_issues function directly on RTSK Worknote without grouping by RTSK Number. This way, each worknote will be tagged individually:

python
import pandas as pd
import numpy as np
from bs4 import BeautifulSoup

# Assume 'dataset' holds the input data for this script
df = dataset

# Convert RTSK Worknote to string
df["RTSK Worknote"] = df["RTSK Worknote"].astype(str)

# Function to parse HTML content
def parse_html(content):
    if '<' in content and '>' in content:
        soup = BeautifulSoup(content, 'html.parser')
        return soup.get_text(separator="", strip=True)
    return content

# Function to tag issues in the worknote
def tag_issues(text, issue_keywords):
    for keyword in issue_keywords:
        if keyword in text:
            return keyword
    return "No issue"

# List of issue keywords
issue_keywords = [
    "HSPASS related issue",
    "Restricted tenant related issue",
    "HW issue",
    "Network related issue"
]

# Apply HTML parsing to RTSK Worknote
df['RTSK Worknote'] = df['RTSK Worknote'].apply(parse_html)

# Apply the tag_issues function to RTSK Worknote
df['Issue Tag'] = df['RTSK Worknote'].apply(lambda x: tag_issues(x, issue_keywords))

# Display the updated DataFrame
print(df.head())

# The final dataset
dataset = df

This modified code directly applies the tag_issues function to each RTSK Worknote after parsing the HTML content, and stores the results in a new column named Issue Tag.

No comments

Theme images by tjasam. Powered by Blogger.