#!/usr/bin/env python3
"""
Grammar Helper - Simple pattern-based grammar checker

Checks for common grammar errors in student essays.
Educational focus - teaches rules, not just corrections.

Usage:
    python3 check_grammar.py "Your text here"
    python3 check_grammar.py --file essay.txt
    python3 check_grammar.py --tips
"""

import sys
import re
from typing import List, Dict, Tuple


class GrammarChecker:
    def __init__(self, text: str):
        self.text = text
        self.errors = []
        self.warnings = []
        self.sentences = self._split_sentences()
        self.words = text.split()
    
    def _split_sentences(self) -> List[str]:
        """Split text into sentences."""
        sentences = re.split(r'[.!?]+', self.text)
        return [s.strip() for s in sentences if s.strip()]
    
    def check_all(self) -> Dict:
        """Run all grammar checks."""
        self.check_homophones()
        self.check_punctuation()
        self.check_passive_voice()
        self.check_word_choice()
        self.check_first_person()
        self.check_contractions()
        self.check_sentence_length()
        
        return {
            'errors': self.errors,
            'warnings': self.warnings,
            'stats': self._calculate_stats()
        }
    
    def check_homophones(self):
        """Check common homophone errors."""
        patterns = [
            # Their/There/They're
            (r'\bthere\s+(\w+ing)\b', 'their', 'Use "their" (possessive) before gerunds'),
            (r'\bthere\s+(house|car|book|family|children)\b', 'their', 'Use "their" (possessive) for ownership'),
            (r'\bthey\'re\s+(house|car|book)\b', 'their', 'Use "their" (possessive), not "they\'re" (they are)'),
            
            # Your/You're
            (r'\byour\s+going\b', "you're", 'Use "you\'re" (you are) going'),
            (r'\byour\s+wrong\b', "you're", 'Use "you\'re" (you are) wrong'),
            
            # Its/It's
            (r'\bits\s+a\b', "it's", 'Use "it\'s" (it is) before "a"'),
            (r'\bit\'s\s+(tail|fur|purpose)\b', 'its', 'Use "its" (possessive) for ownership'),
            
            # To/Too/Two
            (r'\bto\s+(much|many|soon|late)\b', 'too', 'Use "too" (excessive) before much/many'),
        ]
        
        for pattern, correct, explanation in patterns:
            matches = re.finditer(pattern, self.text, re.IGNORECASE)
            for match in matches:
                self.errors.append({
                    'type': 'homophone',
                    'found': match.group(0),
                    'should_be': correct,
                    'explanation': explanation,
                    'position': match.start()
                })
    
    def check_punctuation(self):
        """Check basic punctuation errors."""
        # Run-on sentences (simple detection)
        for sentence in self.sentences:
            # Two independent clauses without conjunction or semicolon
            if ' they ' in sentence.lower() and ' and ' not in sentence and ' but ' not in sentence:
                words = sentence.split()
                if len(words) > 25:
                    self.warnings.append({
                        'type': 'run-on',
                        'sentence': sentence[:100],
                        'explanation': 'Possible run-on sentence. Consider splitting or adding punctuation.'
                    })
        
        # Missing comma after introductory phrase
        intro_patterns = [
            r'^(In \d+|During|After|Before|When|While|Although|Because)',
        ]
        for sentence in self.sentences:
            for pattern in intro_patterns:
                if re.match(pattern, sentence) and ',' not in sentence[:30]:
                    self.warnings.append({
                        'type': 'missing-comma',
                        'sentence': sentence[:80],
                        'explanation': 'Consider adding a comma after the introductory phrase.'
                    })
    
    def check_passive_voice(self):
        """Detect passive voice constructions."""
        passive_patterns = [
            r'\b(was|were|is|are|been|be)\s+\w+ed\b',
            r'\b(was|were|is|are)\s+\w+en\b',
        ]
        
        passive_count = 0
        for pattern in passive_patterns:
            matches = re.finditer(pattern, self.text, re.IGNORECASE)
            for match in matches:
                passive_count += 1
                self.warnings.append({
                    'type': 'passive-voice',
                    'found': match.group(0),
                    'explanation': 'Passive voice. Consider rewriting with active voice for stronger writing.'
                })
        
        # Store passive voice percentage
        if self.sentences:
            passive_pct = (passive_count / len(self.sentences)) * 100
            if passive_pct > 20:
                self.warnings.append({
                    'type': 'passive-overall',
                    'explanation': f'High passive voice usage ({passive_pct:.0f}%). Aim for <10% in formal essays.'
                })
    
    def check_word_choice(self):
        """Check for weak or overused words."""
        weak_words = {
            'very': 'Often unnecessary. Find a stronger adjective.',
            'really': 'Avoid in formal writing. Use specific language.',
            'thing': 'Vague. Be specific.',
            'stuff': 'Too informal. Name the specific items.',
            'a lot': 'Vague quantity. Use specific numbers or "many".',
            'good': 'Generic. Use "effective", "beneficial", "valuable".',
            'bad': 'Generic. Use "harmful", "detrimental", "problematic".',
        }
        
        words_lower = self.text.lower()
        for weak, advice in weak_words.items():
            pattern = r'\b' + weak + r'\b'
            count = len(re.findall(pattern, words_lower))
            if count > 0:
                self.warnings.append({
                    'type': 'weak-word',
                    'word': weak,
                    'count': count,
                    'advice': advice
                })
    
    def check_first_person(self):
        """Check for first person in formal essays."""
        first_person = [
            (r'\bI think\b', 'Remove "I think" - state claims directly'),
            (r'\bI believe\b', 'Remove "I believe" - focus on evidence'),
            (r'\bIn my opinion\b', 'Remove "In my opinion" - unnecessary in argumentative writing'),
            (r'\bI feel\b', 'Use "evidence suggests" or "research shows" instead'),
            (r'\bWe can see\b', 'Use "This demonstrates" or "Evidence shows"'),
        ]
        
        for pattern, advice in first_person:
            matches = re.finditer(pattern, self.text, re.IGNORECASE)
            for match in matches:
                self.warnings.append({
                    'type': 'first-person',
                    'found': match.group(0),
                    'advice': advice
                })
    
    def check_contractions(self):
        """Flag contractions (avoid in formal essays)."""
        contractions = re.findall(r"\b\w+'\w+\b", self.text)
        if contractions:
            self.warnings.append({
                'type': 'contractions',
                'count': len(contractions),
                'examples': contractions[:3],
                'advice': 'Avoid contractions in formal essays. Write out full words.'
            })
    
    def check_sentence_length(self):
        """Check sentence length variety."""
        lengths = [len(s.split()) for s in self.sentences]
        
        if lengths:
            avg = sum(lengths) / len(lengths)
            
            if avg < 10:
                self.warnings.append({
                    'type': 'sentence-length',
                    'advice': 'Average sentence length is short. Combine some sentences for variety.'
                })
            elif avg > 30:
                self.warnings.append({
                    'type': 'sentence-length',
                    'advice': 'Average sentence length is long. Consider breaking up complex sentences.'
                })
    
    def _calculate_stats(self) -> Dict:
        """Calculate writing statistics."""
        sentences = len(self.sentences)
        words = len(self.words)
        
        sentence_lengths = [len(s.split()) for s in self.sentences] if self.sentences else [0]
        
        return {
            'words': words,
            'sentences': sentences,
            'avg_sentence_length': sum(sentence_lengths) / len(sentence_lengths) if sentence_lengths else 0,
            'longest_sentence': max(sentence_lengths) if sentence_lengths else 0,
            'shortest_sentence': min(sentence_lengths) if sentence_lengths else 0,
        }


def format_report(result: Dict):
    """Format grammar check results as readable report."""
    print("\n=== Grammar Check Results ===\n")
    
    stats = result['stats']
    print(f"Words: {stats['words']}")
    print(f"Sentences: {stats['sentences']}")
    if stats['sentences'] > 0:
        print(f"Average sentence length: {stats['avg_sentence_length']:.1f} words")
        print(f"Longest sentence: {stats['longest_sentence']} words")
        print(f"Shortest sentence: {stats['shortest_sentence']} words")
    
    print(f"\nERRORS FOUND: {len(result['errors'])}")
    print(f"WARNINGS: {len(result['warnings'])}\n")
    
    # Print errors
    for i, error in enumerate(result['errors'][:10], 1):  # Limit to first 10
        print(f"❌ Error {i}: {error.get('type', 'unknown')}")
        print(f"   Found: {error.get('found', 'N/A')}")
        if 'should_be' in error:
            print(f"   Should be: {error['should_be']}")
        print(f"   {error.get('explanation', '')}\n")
    
    if len(result['errors']) > 10:
        print(f"... and {len(result['errors']) - 10} more errors\n")
    
    # Print warnings
    for i, warning in enumerate(result['warnings'][:5], 1):  # Limit to first 5
        print(f"⚠️  Warning {i}: {warning.get('type', 'unknown')}")
        if 'found' in warning:
            print(f"   Found: {warning['found']}")
        if 'word' in warning:
            print(f"   Word: {warning['word']} (used {warning.get('count', 0)} times)")
        print(f"   {warning.get('advice', warning.get('explanation', ''))}\n")
    
    if len(result['warnings']) > 5:
        print(f"... and {len(result['warnings']) - 5} more warnings\n")
    
    # Summary
    if len(result['errors']) == 0 and len(result['warnings']) == 0:
        print("✓ No errors or warnings found! Nice work!")


def show_tips():
    """Show grammar tips."""
    print("\n=== Grammar Quick Reference ===\n")
    
    print("HOMOPHONES:")
    print("  their = possessive (their book)")
    print("  there = location (over there)")
    print("  they're = they are\n")
    
    print("  your = possessive (your book)")
    print("  you're = you are\n")
    
    print("  its = possessive (its tail)")
    print("  it's = it is\n")
    
    print("COMMA RULES:")
    print("  Use comma after introductory phrase: 'In 1776, ...'")
    print("  Use comma before 'and'/'but' in compound sentence")
    print("  Use commas around non-essential info: 'The book, published in 1950, ...'")
    print()
    
    print("AVOID IN FORMAL ESSAYS:")
    print("  ❌ Contractions (don't, can't, won't)")
    print("  ❌ First person (I think, I believe)")
    print("  ❌ Second person (you, your)")
    print("  ❌ Passive voice (was written by...)")
    print("  ❌ Weak words (very, really, thing, stuff)")


def main():
    if '--help' in sys.argv or '-h' in sys.argv:
        print("Grammar Helper - Check your essay for common errors")
        print("\nUsage:")
        print("  python3 check_grammar.py 'Your text here'")
        print("  python3 check_grammar.py --file essay.txt")
        print("  python3 check_grammar.py --tips")
        sys.exit(0)
    
    if '--tips' in sys.argv:
        show_tips()
        sys.exit(0)
    
    # Get text
    text = None
    
    if '--file' in sys.argv:
        idx = sys.argv.index('--file')
        if idx + 1 < len(sys.argv):
            filepath = sys.argv[idx + 1]
            try:
                with open(filepath, 'r') as f:
                    text = f.read()
            except FileNotFoundError:
                print(f"Error: File '{filepath}' not found")
                sys.exit(1)
    elif len(sys.argv) > 1:
        text = sys.argv[1]
    else:
        print("Error: No text provided")
        print("Usage: python3 check_grammar.py 'Your text here'")
        sys.exit(1)
    
    # Check grammar
    checker = GrammarChecker(text)
    result = checker.check_all()
    
    # Display results
    format_report(result)


if __name__ == '__main__':
    main()
