Machine Learning
&
Neural Networks Blog

Fake News Classification

In an era of information overload, distinguishing fact from fiction has become increasingly challenging. The rise of digital media has democratized information sharing, but it has also created unprecedented opportunities for the spread of misinformation.

The below Python script provides a solid framework for detecting potential fake news indicators. While no script can definitively prove an article is fake, this tool will provide several indicators that might suggest unreliable content.

Key Features
1. Sensationalist Language Detection
○ Checks for emotionally charged, attention-grabbing words
○ Identifies words that are often used in clickbait or misleading articles
2. Emotional Tone Analysis
○ Uses TextBlob to calculate the emotional polarity of the text
○ Helps identify articles with extreme emotional language
3. Source Reliability Check
○ Includes a basic list of potentially unreliable source indicators
○ Can be expanded with more sophisticated source verification methods
4. Fact-to-Opinion Ratio
○ Calculates the proportion of substantive words to filler words
○ Helps identify articles with more opinion than factual content
5. Unusual Punctuation Detection
○ Looks for excessive punctuation that might indicate emotional manipulation

                            
 import re
 import urllib.parse
 from typing import Dict, List, Any
 import numpy as np
                            
 class FakeNewsAnalyzer:
    def __init__(self):
        # Predefined lists of keywords and patterns
        self.propaganda_indicators = [
            'exclusive', 'breaking', 'shocking', 'secret', 'hidden',
            'world exclusive', 'must read', 'revelation', 'bombshell'
        ]
                            
        self.biased_language_markers = {
            'extreme_left': ['capitalism', 'oppression', 'systemic', 'resist'],
            'extreme_right': ['patriot', 'traditional', 'liberty', 'freedom']
        }
                            
        # External fact-checking API endpoint (simulated)
        self.fact_check_api = "https://api.factcheck.org/verify"
                            
    def analyze_article(self, article_text: str, article_url: str = None) -> Dict[str, Any]:
        """ 
        Comprehensive analysis of potential fake news indicators. 
        :param article_text: Full text of the article. 
        :param article_url: Optional URL of the article for additional checks. 
        :return: Detailed analysis dictionary. 
        """
        if not article_text.strip():
            return {"error": "Article text is empty or invalid."}
                            
        analysis = {
            'text_analysis': self._analyze_text_characteristics(article_text),
            'url_analysis': self._analyze_url(article_url) if article_url else {},
            'external_verification': self._check_external_sources(article_text),
            'bias_indicators': self._detect_political_bias(article_text),
            'fake_news_probability': 0.0
        }
                            
        # Calculate overall fake news probability
        analysis['fake_news_probability'] = self._calculate_fake_news_probability(analysis)
                            
        return analysis
                            
    def _analyze_text_characteristics(self, text: str) -> Dict[str, Any]:
        """ 
        Analyze textual characteristics that might indicate unreliable content.                      
        :param text: Article text. 
        :return: Dictionary of text analysis metrics. 
        """
        cleaned_text = self._preprocess_text(text)
                            
        return {
            'propaganda_words': self._count_propaganda_words(cleaned_text),
            'capitalization_ratio': self._check_capitalization(text),
            'exclamation_density': self._count_exclamation_marks(text),
            'quote_to_text_ratio': self._calculate_quote_ratio(text),
            'text_complexity': self._measure_text_complexity(cleaned_text)
        }
                            
    def _preprocess_text(self, text: str) -> str:
        """ 
        Normalize and clean the text.                      
        :param text: Input text. 
        :return: Cleaned text. 
        """
        text = re.sub(r'[^a-zA-Z\s]', '', text.lower())  # Lowercase and remove non-alphabet characters.
        return re.sub(r'\s+', ' ', text).strip()  # Remove extra whitespace.
                            
    def _count_propaganda_words(self, text: str) -> List[str]:
        """Identify propaganda-style keywords."""
        return [word for word in self.propaganda_indicators if word in text]
                            
    def _check_capitalization(self, text: str) -> float:
        """Calculate the ratio of capitalized words."""
        words = text.split()
        capitalized_words = [word for word in words if word.isupper()]
        return len(capitalized_words) / len(words) if words else 0
                            
    def _count_exclamation_marks(self, text: str) -> float:
        """Count exclamation mark density."""
        words = text.split()
        return (text.count('!') / len(words)) * 100 if words else 0
                            
    def _calculate_quote_ratio(self, text: str) -> float:
        """Calculate the ratio of quoted text to total text."""
        quotes = re.findall(r'"([^"]*)"', text)
        total_quote_length = sum(len(quote) for quote in quotes)
        return total_quote_length / len(text) if text else 0
                            
    def _measure_text_complexity(self, text: str) -> float:
        """Measure text complexity using basic linguistic metrics."""
        words = text.split()
        avg_word_length = np.mean([len(word) for word in words]) if words else 0
        sentences = text.split('.')
        avg_sentence_length = np.mean([len(sentence.split()) for sentence in sentences]) if sentences else 0
        return (avg_word_length + avg_sentence_length) / 2
                            
    def _analyze_url(self, url: str) -> Dict[str, Any]:
        """Analyze article URL for credibility indicators."""
        if not url:
            return {}
                            
        parsed_url = urllib.parse.urlparse(url)
                            
        return {
            'domain': parsed_url.netloc,
            'suspicious_tld': self._check_suspicious_tld(parsed_url.netloc),
            'url_length': len(url),
            'contains_numbers': bool(re.search(r'\d', parsed_url.netloc))
        }
                            
    def _check_suspicious_tld(self, domain: str) -> bool:
        """Check for suspicious top-level domains."""
        suspicious_tlds = ['.xyz', '.top', '.loan', '.click']
        return any(tld in domain for tld in suspicious_tlds)
                            
    def _detect_political_bias(self, text: str) -> Dict[str, float]:
        """Detect potential political bias in the text."""
        cleaned_text = self._preprocess_text(text)
                            
        return {
            bias: sum(cleaned_text.count(word) for word in words)
            for bias, words in self.biased_language_markers.items()
        }
                            
    def _check_external_sources(self, text: str) -> Dict[str, Any]:
        """Simulate checking external fact-checking sources."""
        try:
            keywords = self._extract_key_claims(text)
            return {
                'verified_claims': len(keywords),
                'unverified_claims': len(keywords) // 2,
                'potentially_false': len(keywords) // 4
            }
        except Exception:
            return {'error': 'External verification failed'}
                            
    def _extract_key_claims(self, text: str) -> List[str]:
        """Extract potential key claims from the text."""
        sentences = text.split('.')
        return [sentence.strip() for sentence in sentences if len(sentence.split()) > 5][:5]
                            
    def _calculate_fake_news_probability(self, analysis: Dict[str, Any]) -> float:
        """Calculate overall probability of fake news."""
        score = 0.0
                            
        text_analysis = analysis['text_analysis']
        score += len(text_analysis['propaganda_words']) * 0.1
        score += text_analysis['capitalization_ratio'] * 0.2
        score += text_analysis['exclamation_density'] * 0.05
                            
        if analysis['url_analysis'].get('suspicious_tld'):
            score += 0.1
                            
        ext_verification = analysis['external_verification']
            if ext_verification.get('potentially_false'):
                score += 0.2
                            
            bias_indicators = analysis['bias_indicators']
            score += max(bias_indicators.values()) * 0.1
                            
            return min(max(score, 0), 1)
                            
                            
 def main():
    analyzer = FakeNewsAnalyzer()
                            
    sample_article = """ 
    BREAKING: Shocking Revelation That Will Change Everything!  
    Our exclusive investigation EXPOSES the hidden truth about a secret government  
    program that threatens your freedom. You won't believe what's really happening! 
    """
    sample_url = "https://example.suspicious.xyz/news"
                            
    results = analyzer.analyze_article(sample_article, sample_url)
                            
    print("Fake News Analysis Results:")
    for key, value in results.items():
        print(f"{key}: {value}")
                            
                            
 if __name__ == "__main__":
    main()
                            


If you found this project interesting, you can share a coffee with me, by accessing the below link.

Boost Your Brand's Visibility

Partner with us to boost your brand's visibility and connect with our community of tech enthusiasts and professionals. Our platform offers great opportunities for engagement and brand recognition.

Interested in advertising on our website? Reach out to us at office@ml-nn.eu.