Comments_Summarization_Sumy

 from sumy.parsers.plaintext import PlaintextParser

from sumy.nlp.tokenizers import Tokenizer

from sumy.summarizers.lsa import LsaSummarizer


def summarize_text(text, sentence_count=3): 

    # Adjust the number of sentences as needed

    try:

        parser = PlaintextParser.from_string(text, Tokenizer("english"))

        summarizer = LsaSummarizer()

        summary = summarizer(parser.document, sentence_count)

        summary = [str(sentence) for sentence in summary]

        return ' '.join(summary)

    except ValueError:

        return text  # Return original text if it's too short to summarize


# Concatenate the worknotes for each RTS_worknotes = df.groupby('RTSK Number')['RTSK Worknote'].apply(lambda x: ' '.join(x)).reset_index()


# Apply summarization to the grouped worknotes

grouped_worknotes['RTSK Worknote Summary'] = grouped_worknotes['RTSK Worknote'].apply(summarize_text)


# Merge the summary back to the original dataframe, if needed

# df1 = pd.merge(df, grouped_worknotes[['RTSK Number', 'RTSK Worknote Summary']], on='RTSK Number', how='left')

grouped_worknotes.head()

No comments

Theme images by tjasam. Powered by Blogger.