Comments_summarization_spacy

import spacy

from spacy.lang.en.stop_words import STOP_WORDS

from string import punctuation

from collections import Counter

from heapq import nlargest

import pandas as pd

# Load the large English model

nlp = spacy.load('en_core_web_lg')

stopwords = list(STOP_WORDS)

punctuation += '\n'

# Function to summarize text

def summarize_text(text):

doc = nlp(text)

word_frequencies = {}

for word in doc:

if word.text.lower() not in stopwords and word.text.lower() not in punctuation:

if word.text not in word_frequencies.keys():

word_frequencies[word.text] = 1

else:

word_frequencies[word.text] += 1

max_frequency = max(word_frequencies.values())

for word in word_frequencies.keys():

word_frequencies[word] = word_frequencies[word] / max_frequency

sentence_scores = {}

for sent in doc.sents:

for word in sent:

if word.text.lower() in word_frequencies.keys():

if sent not in sentence_scores.keys():

sentence_scores[sent] = word_frequencies[word.text.lower()]

else:

sentence_scores[sent] += word_frequencies[word.text.lower()]

select_length = int(len([sent for sent in doc.sents]) * 0.3)

summary = nlargest(select_length, sentence_scores, key = sentence_scores.get)

final_summary = ' '.join([word.text for word in summary])

return final_summary

# Function to summarize grouped worknotes

def summarize_comments(comments):

combined_comments = ' '.join(comments)

return summarize_text(combined_comments)

# Sample data

data = {

'RTSK Number': ['001', '001', '002', '002'],

'RTSK Worknote': [

'Worknote 1 for RTSK 001. This is a detailed note explaining various aspects of the task.',

'Worknote 2 for RTSK 001. Additional comments and updates.',

'Worknote 1 for RTSK 002. Initial setup and configuration details.',

'Worknote 2 for RTSK 002. Follow-up and final remarks.'

]

}

df = pd.DataFrame(data)

# Group by 'RTSK Number' and summarize

summarized_df = df.groupby('RTSK Number')['RTSK Worknote'].apply(summarize_comments).reset_index()

summarized_df.columns = ['RTSK Number', 'Summary']

# Display the summarized dataframe

print(summarized_df)

Data Science & Machine Learning

Comments_summarization_spacy

No comments

Popular

Recent

Comments

Search This Blog

Blog Archive

About Us

Recent in Spirituality

Recent in Tourism

Popular Posts