407 lines
13 KiB
Python
407 lines
13 KiB
Python
"""
|
|
Keyword analysis module for App Store Optimization.
|
|
Analyzes keyword search volume, competition, and relevance for app discovery.
|
|
"""
|
|
|
|
from typing import Dict, List, Any, Optional, Tuple
|
|
import re
|
|
from collections import Counter
|
|
|
|
|
|
class KeywordAnalyzer:
|
|
"""Analyzes keywords for ASO effectiveness."""
|
|
|
|
# Competition level thresholds (based on number of competing apps)
|
|
COMPETITION_THRESHOLDS = {
|
|
'low': 1000,
|
|
'medium': 5000,
|
|
'high': 10000
|
|
}
|
|
|
|
# Search volume categories (monthly searches estimate)
|
|
VOLUME_CATEGORIES = {
|
|
'very_low': 1000,
|
|
'low': 5000,
|
|
'medium': 20000,
|
|
'high': 100000,
|
|
'very_high': 500000
|
|
}
|
|
|
|
def __init__(self):
|
|
"""Initialize keyword analyzer."""
|
|
self.analyzed_keywords = {}
|
|
|
|
def analyze_keyword(
|
|
self,
|
|
keyword: str,
|
|
search_volume: int = 0,
|
|
competing_apps: int = 0,
|
|
relevance_score: float = 0.0
|
|
) -> Dict[str, Any]:
|
|
"""
|
|
Analyze a single keyword for ASO potential.
|
|
|
|
Args:
|
|
keyword: The keyword to analyze
|
|
search_volume: Estimated monthly search volume
|
|
competing_apps: Number of apps competing for this keyword
|
|
relevance_score: Relevance to your app (0.0-1.0)
|
|
|
|
Returns:
|
|
Dictionary with keyword analysis
|
|
"""
|
|
competition_level = self._calculate_competition_level(competing_apps)
|
|
volume_category = self._categorize_search_volume(search_volume)
|
|
difficulty_score = self._calculate_keyword_difficulty(
|
|
search_volume,
|
|
competing_apps
|
|
)
|
|
|
|
# Calculate potential score (0-100)
|
|
potential_score = self._calculate_potential_score(
|
|
search_volume,
|
|
competing_apps,
|
|
relevance_score
|
|
)
|
|
|
|
analysis = {
|
|
'keyword': keyword,
|
|
'search_volume': search_volume,
|
|
'volume_category': volume_category,
|
|
'competing_apps': competing_apps,
|
|
'competition_level': competition_level,
|
|
'relevance_score': relevance_score,
|
|
'difficulty_score': difficulty_score,
|
|
'potential_score': potential_score,
|
|
'recommendation': self._generate_recommendation(
|
|
potential_score,
|
|
difficulty_score,
|
|
relevance_score
|
|
),
|
|
'keyword_length': len(keyword.split()),
|
|
'is_long_tail': len(keyword.split()) >= 3
|
|
}
|
|
|
|
self.analyzed_keywords[keyword] = analysis
|
|
return analysis
|
|
|
|
def compare_keywords(self, keywords_data: List[Dict[str, Any]]) -> Dict[str, Any]:
|
|
"""
|
|
Compare multiple keywords and rank by potential.
|
|
|
|
Args:
|
|
keywords_data: List of dicts with keyword, search_volume, competing_apps, relevance_score
|
|
|
|
Returns:
|
|
Comparison report with ranked keywords
|
|
"""
|
|
analyses = []
|
|
for kw_data in keywords_data:
|
|
analysis = self.analyze_keyword(
|
|
keyword=kw_data['keyword'],
|
|
search_volume=kw_data.get('search_volume', 0),
|
|
competing_apps=kw_data.get('competing_apps', 0),
|
|
relevance_score=kw_data.get('relevance_score', 0.0)
|
|
)
|
|
analyses.append(analysis)
|
|
|
|
# Sort by potential score (descending)
|
|
ranked_keywords = sorted(
|
|
analyses,
|
|
key=lambda x: x['potential_score'],
|
|
reverse=True
|
|
)
|
|
|
|
# Categorize keywords
|
|
primary_keywords = [
|
|
kw for kw in ranked_keywords
|
|
if kw['potential_score'] >= 70 and kw['relevance_score'] >= 0.8
|
|
]
|
|
|
|
secondary_keywords = [
|
|
kw for kw in ranked_keywords
|
|
if 50 <= kw['potential_score'] < 70 and kw['relevance_score'] >= 0.6
|
|
]
|
|
|
|
long_tail_keywords = [
|
|
kw for kw in ranked_keywords
|
|
if kw['is_long_tail'] and kw['relevance_score'] >= 0.7
|
|
]
|
|
|
|
return {
|
|
'total_keywords_analyzed': len(analyses),
|
|
'ranked_keywords': ranked_keywords,
|
|
'primary_keywords': primary_keywords[:5], # Top 5
|
|
'secondary_keywords': secondary_keywords[:10], # Top 10
|
|
'long_tail_keywords': long_tail_keywords[:10], # Top 10
|
|
'summary': self._generate_comparison_summary(
|
|
primary_keywords,
|
|
secondary_keywords,
|
|
long_tail_keywords
|
|
)
|
|
}
|
|
|
|
def find_long_tail_opportunities(
|
|
self,
|
|
base_keyword: str,
|
|
modifiers: List[str]
|
|
) -> List[Dict[str, Any]]:
|
|
"""
|
|
Generate long-tail keyword variations.
|
|
|
|
Args:
|
|
base_keyword: Core keyword (e.g., "task manager")
|
|
modifiers: List of modifiers (e.g., ["free", "simple", "team"])
|
|
|
|
Returns:
|
|
List of long-tail keyword suggestions
|
|
"""
|
|
long_tail_keywords = []
|
|
|
|
# Generate combinations
|
|
for modifier in modifiers:
|
|
# Modifier + base
|
|
variation1 = f"{modifier} {base_keyword}"
|
|
long_tail_keywords.append({
|
|
'keyword': variation1,
|
|
'pattern': 'modifier_base',
|
|
'estimated_competition': 'low',
|
|
'rationale': f"Less competitive variation of '{base_keyword}'"
|
|
})
|
|
|
|
# Base + modifier
|
|
variation2 = f"{base_keyword} {modifier}"
|
|
long_tail_keywords.append({
|
|
'keyword': variation2,
|
|
'pattern': 'base_modifier',
|
|
'estimated_competition': 'low',
|
|
'rationale': f"Specific use-case variation of '{base_keyword}'"
|
|
})
|
|
|
|
# Add question-based long-tail
|
|
question_words = ['how', 'what', 'best', 'top']
|
|
for q_word in question_words:
|
|
question_keyword = f"{q_word} {base_keyword}"
|
|
long_tail_keywords.append({
|
|
'keyword': question_keyword,
|
|
'pattern': 'question_based',
|
|
'estimated_competition': 'very_low',
|
|
'rationale': f"Informational search query"
|
|
})
|
|
|
|
return long_tail_keywords
|
|
|
|
def extract_keywords_from_text(
|
|
self,
|
|
text: str,
|
|
min_word_length: int = 3
|
|
) -> List[Tuple[str, int]]:
|
|
"""
|
|
Extract potential keywords from text (descriptions, reviews).
|
|
|
|
Args:
|
|
text: Text to analyze
|
|
min_word_length: Minimum word length to consider
|
|
|
|
Returns:
|
|
List of (keyword, frequency) tuples
|
|
"""
|
|
# Clean and normalize text
|
|
text = text.lower()
|
|
text = re.sub(r'[^\w\s]', ' ', text)
|
|
|
|
# Extract words
|
|
words = text.split()
|
|
|
|
# Filter by length
|
|
words = [w for w in words if len(w) >= min_word_length]
|
|
|
|
# Remove common stop words
|
|
stop_words = {
|
|
'the', 'and', 'for', 'with', 'this', 'that', 'from', 'have',
|
|
'but', 'not', 'you', 'all', 'can', 'are', 'was', 'were', 'been'
|
|
}
|
|
words = [w for w in words if w not in stop_words]
|
|
|
|
# Count frequency
|
|
word_counts = Counter(words)
|
|
|
|
# Extract 2-word phrases
|
|
phrases = []
|
|
for i in range(len(words) - 1):
|
|
phrase = f"{words[i]} {words[i+1]}"
|
|
phrases.append(phrase)
|
|
|
|
phrase_counts = Counter(phrases)
|
|
|
|
# Combine and sort
|
|
all_keywords = list(word_counts.items()) + list(phrase_counts.items())
|
|
all_keywords.sort(key=lambda x: x[1], reverse=True)
|
|
|
|
return all_keywords[:50] # Top 50
|
|
|
|
def calculate_keyword_density(
|
|
self,
|
|
text: str,
|
|
target_keywords: List[str]
|
|
) -> Dict[str, float]:
|
|
"""
|
|
Calculate keyword density in text.
|
|
|
|
Args:
|
|
text: Text to analyze (title, description)
|
|
target_keywords: Keywords to check density for
|
|
|
|
Returns:
|
|
Dictionary of keyword: density (percentage)
|
|
"""
|
|
text_lower = text.lower()
|
|
total_words = len(text_lower.split())
|
|
|
|
densities = {}
|
|
for keyword in target_keywords:
|
|
keyword_lower = keyword.lower()
|
|
occurrences = text_lower.count(keyword_lower)
|
|
density = (occurrences / total_words) * 100 if total_words > 0 else 0
|
|
densities[keyword] = round(density, 2)
|
|
|
|
return densities
|
|
|
|
def _calculate_competition_level(self, competing_apps: int) -> str:
|
|
"""Determine competition level based on number of competing apps."""
|
|
if competing_apps < self.COMPETITION_THRESHOLDS['low']:
|
|
return 'low'
|
|
elif competing_apps < self.COMPETITION_THRESHOLDS['medium']:
|
|
return 'medium'
|
|
elif competing_apps < self.COMPETITION_THRESHOLDS['high']:
|
|
return 'high'
|
|
else:
|
|
return 'very_high'
|
|
|
|
def _categorize_search_volume(self, search_volume: int) -> str:
|
|
"""Categorize search volume."""
|
|
if search_volume < self.VOLUME_CATEGORIES['very_low']:
|
|
return 'very_low'
|
|
elif search_volume < self.VOLUME_CATEGORIES['low']:
|
|
return 'low'
|
|
elif search_volume < self.VOLUME_CATEGORIES['medium']:
|
|
return 'medium'
|
|
elif search_volume < self.VOLUME_CATEGORIES['high']:
|
|
return 'high'
|
|
else:
|
|
return 'very_high'
|
|
|
|
def _calculate_keyword_difficulty(
|
|
self,
|
|
search_volume: int,
|
|
competing_apps: int
|
|
) -> float:
|
|
"""
|
|
Calculate keyword difficulty score (0-100).
|
|
Higher score = harder to rank.
|
|
"""
|
|
if competing_apps == 0:
|
|
return 0.0
|
|
|
|
# Competition factor (0-1)
|
|
competition_factor = min(competing_apps / 50000, 1.0)
|
|
|
|
# Volume factor (0-1) - higher volume = more difficulty
|
|
volume_factor = min(search_volume / 1000000, 1.0)
|
|
|
|
# Difficulty score (weighted average)
|
|
difficulty = (competition_factor * 0.7 + volume_factor * 0.3) * 100
|
|
|
|
return round(difficulty, 1)
|
|
|
|
def _calculate_potential_score(
|
|
self,
|
|
search_volume: int,
|
|
competing_apps: int,
|
|
relevance_score: float
|
|
) -> float:
|
|
"""
|
|
Calculate overall keyword potential (0-100).
|
|
Higher score = better opportunity.
|
|
"""
|
|
# Volume score (0-40 points)
|
|
volume_score = min((search_volume / 100000) * 40, 40)
|
|
|
|
# Competition score (0-30 points) - inverse relationship
|
|
if competing_apps > 0:
|
|
competition_score = max(30 - (competing_apps / 500), 0)
|
|
else:
|
|
competition_score = 30
|
|
|
|
# Relevance score (0-30 points)
|
|
relevance_points = relevance_score * 30
|
|
|
|
total_score = volume_score + competition_score + relevance_points
|
|
|
|
return round(min(total_score, 100), 1)
|
|
|
|
def _generate_recommendation(
|
|
self,
|
|
potential_score: float,
|
|
difficulty_score: float,
|
|
relevance_score: float
|
|
) -> str:
|
|
"""Generate actionable recommendation for keyword."""
|
|
if relevance_score < 0.5:
|
|
return "Low relevance - avoid targeting"
|
|
|
|
if potential_score >= 70:
|
|
return "High priority - target immediately"
|
|
elif potential_score >= 50:
|
|
if difficulty_score < 50:
|
|
return "Good opportunity - include in metadata"
|
|
else:
|
|
return "Competitive - use in description, not title"
|
|
elif potential_score >= 30:
|
|
return "Secondary keyword - use for long-tail variations"
|
|
else:
|
|
return "Low potential - deprioritize"
|
|
|
|
def _generate_comparison_summary(
|
|
self,
|
|
primary_keywords: List[Dict[str, Any]],
|
|
secondary_keywords: List[Dict[str, Any]],
|
|
long_tail_keywords: List[Dict[str, Any]]
|
|
) -> str:
|
|
"""Generate summary of keyword comparison."""
|
|
summary_parts = []
|
|
|
|
summary_parts.append(
|
|
f"Identified {len(primary_keywords)} high-priority primary keywords."
|
|
)
|
|
|
|
if primary_keywords:
|
|
top_keyword = primary_keywords[0]['keyword']
|
|
summary_parts.append(
|
|
f"Top recommendation: '{top_keyword}' (potential score: {primary_keywords[0]['potential_score']})."
|
|
)
|
|
|
|
summary_parts.append(
|
|
f"Found {len(secondary_keywords)} secondary keywords for description and metadata."
|
|
)
|
|
|
|
summary_parts.append(
|
|
f"Discovered {len(long_tail_keywords)} long-tail opportunities with lower competition."
|
|
)
|
|
|
|
return " ".join(summary_parts)
|
|
|
|
|
|
def analyze_keyword_set(keywords_data: List[Dict[str, Any]]) -> Dict[str, Any]:
|
|
"""
|
|
Convenience function to analyze a set of keywords.
|
|
|
|
Args:
|
|
keywords_data: List of keyword data dictionaries
|
|
|
|
Returns:
|
|
Complete analysis report
|
|
"""
|
|
analyzer = KeywordAnalyzer()
|
|
return analyzer.compare_keywords(keywords_data)
|