"""
Semantic Understanding Module
Provides semantic analysis and topic extraction

Developer: RSK World
Website: https://rskworld.in
Email: help@rskworld.in
Phone: +91 93305 39277
Year: 2026
"""

import spacy
from collections import Counter
from text_preprocessing import TextPreprocessor

class SemanticAnalyzer:
    """
    Semantic analysis class for understanding text meaning
    Developer: RSK World - https://rskworld.in
    """
    
    def __init__(self):
        """Initialize semantic analyzer"""
        try:
            self.nlp = spacy.load("en_core_web_sm")
        except OSError:
            print("Warning: spaCy English model not found.")
            print("Please install it using: python -m spacy download en_core_web_sm")
            self.nlp = None
        
        self.preprocessor = TextPreprocessor()
    
    def extract_keywords(self, text, top_n=10):
        """
        Extract keywords from text
        
        Args:
            text (str): Input text
            top_n (int): Number of top keywords to return
            
        Returns:
            list: List of top keywords
        """
        if self.nlp is None:
            # Fallback to simple frequency-based extraction
            preprocessed = self.preprocessor.preprocess(text)
            word_freq = Counter(preprocessed['filtered_tokens'])
            return [word for word, count in word_freq.most_common(top_n)]
        
        doc = self.nlp(text)
        
        # Extract keywords (nouns and important adjectives)
        keywords = []
        for token in doc:
            if (token.pos_ in ['NOUN', 'PROPN', 'ADJ'] and 
                not token.is_stop and 
                not token.is_punct and
                len(token.text) > 2):
                keywords.append(token.lemma_.lower())
        
        # Count frequency
        keyword_freq = Counter(keywords)
        return [word for word, count in keyword_freq.most_common(top_n)]
    
    def extract_topics(self, text, top_n=5):
        """
        Extract main topics from text
        
        Args:
            text (str): Input text
            top_n (int): Number of topics to return
            
        Returns:
            list: List of main topics
        """
        keywords = self.extract_keywords(text, top_n * 2)
        return keywords[:top_n]
    
    def analyze_semantic_similarity(self, text1, text2):
        """
        Calculate semantic similarity between two texts
        
        Args:
            text1 (str): First text
            text2 (str): Second text
            
        Returns:
            float: Similarity score (0-1)
        """
        if self.nlp is None:
            return 0.0
        
        doc1 = self.nlp(text1)
        doc2 = self.nlp(text2)
        
        return doc1.similarity(doc2)
    
    def extract_phrases(self, text):
        """
        Extract important phrases and noun chunks
        
        Args:
            text (str): Input text
            
        Returns:
            list: List of important phrases
        """
        if self.nlp is None:
            return []
        
        doc = self.nlp(text)
        phrases = []
        
        # Extract noun chunks
        for chunk in doc.noun_chunks:
            if len(chunk.text.split()) > 1:  # Multi-word phrases
                phrases.append(chunk.text)
        
        return phrases[:10]  # Return top 10 phrases
    
    def analyze(self, text):
        """
        Complete semantic analysis
        
        Args:
            text (str): Input text
            
        Returns:
            dict: Semantic analysis results
        """
        keywords = self.extract_keywords(text)
        topics = self.extract_topics(text)
        phrases = self.extract_phrases(text)
        
        result = {
            'keywords': keywords,
            'topics': topics,
            'phrases': phrases,
            'key_concepts': topics[:3]  # Top 3 concepts
        }
        
        return result

