Comments_Summarization_Sumy
from sumy.parsers.plaintext import PlaintextParser
from sumy.nlp.tokenizers import Tokenizer
from sumy.summarizers.lsa import LsaSummarizer
def summarize_text(text, sentence_count=3):
# Adjust the number of sentences as needed
try:
parser = PlaintextParser.from_string(text, Tokenizer("english"))
summarizer = LsaSummarizer()
summary = summarizer(parser.document, sentence_count)
summary = [str(sentence) for sentence in summary]
return ' '.join(summary)
except ValueError:
return text # Return original text if it's too short to summarize
# Concatenate the worknotes for each RTS_worknotes = df.groupby('RTSK Number')['RTSK Worknote'].apply(lambda x: ' '.join(x)).reset_index()
# Apply summarization to the grouped worknotes
grouped_worknotes['RTSK Worknote Summary'] = grouped_worknotes['RTSK Worknote'].apply(summarize_text)
# Merge the summary back to the original dataframe, if needed
# df1 = pd.merge(df, grouped_worknotes[['RTSK Number', 'RTSK Worknote Summary']], on='RTSK Number', how='left')
grouped_worknotes.head()
No comments