589 lines
22 KiB
Python
589 lines
22 KiB
Python
"""
|
|
Localization helper module for App Store Optimization.
|
|
Manages multi-language ASO optimization strategies.
|
|
"""
|
|
|
|
from typing import Dict, List, Any, Optional, Tuple
|
|
|
|
|
|
class LocalizationHelper:
|
|
"""Helps manage multi-language ASO optimization."""
|
|
|
|
# Priority markets by language (based on app store revenue and user base)
|
|
PRIORITY_MARKETS = {
|
|
'tier_1': [
|
|
{'language': 'en-US', 'market': 'United States', 'revenue_share': 0.25},
|
|
{'language': 'zh-CN', 'market': 'China', 'revenue_share': 0.20},
|
|
{'language': 'ja-JP', 'market': 'Japan', 'revenue_share': 0.10},
|
|
{'language': 'de-DE', 'market': 'Germany', 'revenue_share': 0.08},
|
|
{'language': 'en-GB', 'market': 'United Kingdom', 'revenue_share': 0.06}
|
|
],
|
|
'tier_2': [
|
|
{'language': 'fr-FR', 'market': 'France', 'revenue_share': 0.05},
|
|
{'language': 'ko-KR', 'market': 'South Korea', 'revenue_share': 0.05},
|
|
{'language': 'es-ES', 'market': 'Spain', 'revenue_share': 0.03},
|
|
{'language': 'it-IT', 'market': 'Italy', 'revenue_share': 0.03},
|
|
{'language': 'pt-BR', 'market': 'Brazil', 'revenue_share': 0.03}
|
|
],
|
|
'tier_3': [
|
|
{'language': 'ru-RU', 'market': 'Russia', 'revenue_share': 0.02},
|
|
{'language': 'es-MX', 'market': 'Mexico', 'revenue_share': 0.02},
|
|
{'language': 'nl-NL', 'market': 'Netherlands', 'revenue_share': 0.02},
|
|
{'language': 'sv-SE', 'market': 'Sweden', 'revenue_share': 0.01},
|
|
{'language': 'pl-PL', 'market': 'Poland', 'revenue_share': 0.01}
|
|
]
|
|
}
|
|
|
|
# Character limit multipliers by language (some languages need more/less space)
|
|
CHAR_MULTIPLIERS = {
|
|
'en': 1.0,
|
|
'zh': 0.6, # Chinese characters are more compact
|
|
'ja': 0.7, # Japanese uses kanji
|
|
'ko': 0.8, # Korean is relatively compact
|
|
'de': 1.3, # German words are typically longer
|
|
'fr': 1.2, # French tends to be longer
|
|
'es': 1.1, # Spanish slightly longer
|
|
'pt': 1.1, # Portuguese similar to Spanish
|
|
'ru': 1.1, # Russian similar length
|
|
'ar': 1.0, # Arabic varies
|
|
'it': 1.1 # Italian similar to Spanish
|
|
}
|
|
|
|
def __init__(self, app_category: str = 'general'):
|
|
"""
|
|
Initialize localization helper.
|
|
|
|
Args:
|
|
app_category: App category to prioritize relevant markets
|
|
"""
|
|
self.app_category = app_category
|
|
self.localization_plans = []
|
|
|
|
def identify_target_markets(
|
|
self,
|
|
current_market: str = 'en-US',
|
|
budget_level: str = 'medium',
|
|
target_market_count: int = 5
|
|
) -> Dict[str, Any]:
|
|
"""
|
|
Recommend priority markets for localization.
|
|
|
|
Args:
|
|
current_market: Current/primary market
|
|
budget_level: 'low', 'medium', or 'high'
|
|
target_market_count: Number of markets to target
|
|
|
|
Returns:
|
|
Prioritized market recommendations
|
|
"""
|
|
# Determine tier priorities based on budget
|
|
if budget_level == 'low':
|
|
priority_tiers = ['tier_1']
|
|
max_markets = min(target_market_count, 3)
|
|
elif budget_level == 'medium':
|
|
priority_tiers = ['tier_1', 'tier_2']
|
|
max_markets = min(target_market_count, 8)
|
|
else: # high budget
|
|
priority_tiers = ['tier_1', 'tier_2', 'tier_3']
|
|
max_markets = target_market_count
|
|
|
|
# Collect markets from priority tiers
|
|
recommended_markets = []
|
|
for tier in priority_tiers:
|
|
for market in self.PRIORITY_MARKETS[tier]:
|
|
if market['language'] != current_market:
|
|
recommended_markets.append({
|
|
**market,
|
|
'tier': tier,
|
|
'estimated_translation_cost': self._estimate_translation_cost(
|
|
market['language']
|
|
)
|
|
})
|
|
|
|
# Sort by revenue share and limit
|
|
recommended_markets.sort(key=lambda x: x['revenue_share'], reverse=True)
|
|
recommended_markets = recommended_markets[:max_markets]
|
|
|
|
# Calculate potential ROI
|
|
total_potential_revenue_share = sum(m['revenue_share'] for m in recommended_markets)
|
|
|
|
return {
|
|
'recommended_markets': recommended_markets,
|
|
'total_markets': len(recommended_markets),
|
|
'estimated_total_revenue_lift': f"{total_potential_revenue_share*100:.1f}%",
|
|
'estimated_cost': self._estimate_total_localization_cost(recommended_markets),
|
|
'implementation_priority': self._prioritize_implementation(recommended_markets)
|
|
}
|
|
|
|
def translate_metadata(
|
|
self,
|
|
source_metadata: Dict[str, str],
|
|
source_language: str,
|
|
target_language: str,
|
|
platform: str = 'apple'
|
|
) -> Dict[str, Any]:
|
|
"""
|
|
Generate localized metadata with character limit considerations.
|
|
|
|
Args:
|
|
source_metadata: Original metadata (title, description, etc.)
|
|
source_language: Source language code (e.g., 'en')
|
|
target_language: Target language code (e.g., 'es')
|
|
platform: 'apple' or 'google'
|
|
|
|
Returns:
|
|
Localized metadata with character limit validation
|
|
"""
|
|
# Get character multiplier
|
|
target_lang_code = target_language.split('-')[0]
|
|
char_multiplier = self.CHAR_MULTIPLIERS.get(target_lang_code, 1.0)
|
|
|
|
# Platform-specific limits
|
|
if platform == 'apple':
|
|
limits = {'title': 30, 'subtitle': 30, 'description': 4000, 'keywords': 100}
|
|
else:
|
|
limits = {'title': 50, 'short_description': 80, 'description': 4000}
|
|
|
|
localized_metadata = {}
|
|
warnings = []
|
|
|
|
for field, text in source_metadata.items():
|
|
if field not in limits:
|
|
continue
|
|
|
|
# Estimate target length
|
|
estimated_length = int(len(text) * char_multiplier)
|
|
limit = limits[field]
|
|
|
|
localized_metadata[field] = {
|
|
'original_text': text,
|
|
'original_length': len(text),
|
|
'estimated_target_length': estimated_length,
|
|
'character_limit': limit,
|
|
'fits_within_limit': estimated_length <= limit,
|
|
'translation_notes': self._get_translation_notes(
|
|
field,
|
|
target_language,
|
|
estimated_length,
|
|
limit
|
|
)
|
|
}
|
|
|
|
if estimated_length > limit:
|
|
warnings.append(
|
|
f"{field}: Estimated length ({estimated_length}) may exceed limit ({limit}) - "
|
|
f"condensing may be required"
|
|
)
|
|
|
|
return {
|
|
'source_language': source_language,
|
|
'target_language': target_language,
|
|
'platform': platform,
|
|
'localized_fields': localized_metadata,
|
|
'character_multiplier': char_multiplier,
|
|
'warnings': warnings,
|
|
'recommendations': self._generate_translation_recommendations(
|
|
target_language,
|
|
warnings
|
|
)
|
|
}
|
|
|
|
def adapt_keywords(
|
|
self,
|
|
source_keywords: List[str],
|
|
source_language: str,
|
|
target_language: str,
|
|
target_market: str
|
|
) -> Dict[str, Any]:
|
|
"""
|
|
Adapt keywords for target market (not just direct translation).
|
|
|
|
Args:
|
|
source_keywords: Original keywords
|
|
source_language: Source language code
|
|
target_language: Target language code
|
|
target_market: Target market (e.g., 'France', 'Japan')
|
|
|
|
Returns:
|
|
Adapted keyword recommendations
|
|
"""
|
|
# Cultural adaptation considerations
|
|
cultural_notes = self._get_cultural_keyword_considerations(target_market)
|
|
|
|
# Search behavior differences
|
|
search_patterns = self._get_search_patterns(target_market)
|
|
|
|
adapted_keywords = []
|
|
for keyword in source_keywords:
|
|
adapted_keywords.append({
|
|
'source_keyword': keyword,
|
|
'adaptation_strategy': self._determine_adaptation_strategy(
|
|
keyword,
|
|
target_market
|
|
),
|
|
'cultural_considerations': cultural_notes.get(keyword, []),
|
|
'priority': 'high' if keyword in source_keywords[:3] else 'medium'
|
|
})
|
|
|
|
return {
|
|
'source_language': source_language,
|
|
'target_language': target_language,
|
|
'target_market': target_market,
|
|
'adapted_keywords': adapted_keywords,
|
|
'search_behavior_notes': search_patterns,
|
|
'recommendations': [
|
|
'Use native speakers for keyword research',
|
|
'Test keywords with local users before finalizing',
|
|
'Consider local competitors\' keyword strategies',
|
|
'Monitor search trends in target market'
|
|
]
|
|
}
|
|
|
|
def validate_translations(
|
|
self,
|
|
translated_metadata: Dict[str, str],
|
|
target_language: str,
|
|
platform: str = 'apple'
|
|
) -> Dict[str, Any]:
|
|
"""
|
|
Validate translated metadata for character limits and quality.
|
|
|
|
Args:
|
|
translated_metadata: Translated text fields
|
|
target_language: Target language code
|
|
platform: 'apple' or 'google'
|
|
|
|
Returns:
|
|
Validation report
|
|
"""
|
|
# Platform limits
|
|
if platform == 'apple':
|
|
limits = {'title': 30, 'subtitle': 30, 'description': 4000, 'keywords': 100}
|
|
else:
|
|
limits = {'title': 50, 'short_description': 80, 'description': 4000}
|
|
|
|
validation_results = {
|
|
'is_valid': True,
|
|
'field_validations': {},
|
|
'errors': [],
|
|
'warnings': []
|
|
}
|
|
|
|
for field, text in translated_metadata.items():
|
|
if field not in limits:
|
|
continue
|
|
|
|
actual_length = len(text)
|
|
limit = limits[field]
|
|
is_within_limit = actual_length <= limit
|
|
|
|
validation_results['field_validations'][field] = {
|
|
'text': text,
|
|
'length': actual_length,
|
|
'limit': limit,
|
|
'is_valid': is_within_limit,
|
|
'usage_percentage': round((actual_length / limit) * 100, 1)
|
|
}
|
|
|
|
if not is_within_limit:
|
|
validation_results['is_valid'] = False
|
|
validation_results['errors'].append(
|
|
f"{field} exceeds limit: {actual_length}/{limit} characters"
|
|
)
|
|
|
|
# Quality checks
|
|
quality_issues = self._check_translation_quality(
|
|
translated_metadata,
|
|
target_language
|
|
)
|
|
|
|
validation_results['quality_checks'] = quality_issues
|
|
|
|
if quality_issues:
|
|
validation_results['warnings'].extend(
|
|
[f"Quality issue: {issue}" for issue in quality_issues]
|
|
)
|
|
|
|
return validation_results
|
|
|
|
def calculate_localization_roi(
|
|
self,
|
|
target_markets: List[str],
|
|
current_monthly_downloads: int,
|
|
localization_cost: float,
|
|
expected_lift_percentage: float = 0.15
|
|
) -> Dict[str, Any]:
|
|
"""
|
|
Estimate ROI of localization investment.
|
|
|
|
Args:
|
|
target_markets: List of market codes
|
|
current_monthly_downloads: Current monthly downloads
|
|
localization_cost: Total cost to localize
|
|
expected_lift_percentage: Expected download increase (default 15%)
|
|
|
|
Returns:
|
|
ROI analysis
|
|
"""
|
|
# Estimate market-specific lift
|
|
market_data = []
|
|
total_expected_lift = 0
|
|
|
|
for market_code in target_markets:
|
|
# Find market in priority lists
|
|
market_info = None
|
|
for tier_name, markets in self.PRIORITY_MARKETS.items():
|
|
for m in markets:
|
|
if m['language'] == market_code:
|
|
market_info = m
|
|
break
|
|
|
|
if not market_info:
|
|
continue
|
|
|
|
# Estimate downloads from this market
|
|
market_downloads = int(current_monthly_downloads * market_info['revenue_share'])
|
|
expected_increase = int(market_downloads * expected_lift_percentage)
|
|
total_expected_lift += expected_increase
|
|
|
|
market_data.append({
|
|
'market': market_info['market'],
|
|
'current_monthly_downloads': market_downloads,
|
|
'expected_increase': expected_increase,
|
|
'revenue_potential': market_info['revenue_share']
|
|
})
|
|
|
|
# Calculate payback period (assuming $2 revenue per download)
|
|
revenue_per_download = 2.0
|
|
monthly_additional_revenue = total_expected_lift * revenue_per_download
|
|
payback_months = (localization_cost / monthly_additional_revenue) if monthly_additional_revenue > 0 else float('inf')
|
|
|
|
return {
|
|
'markets_analyzed': len(market_data),
|
|
'market_breakdown': market_data,
|
|
'total_expected_monthly_lift': total_expected_lift,
|
|
'expected_monthly_revenue_increase': f"${monthly_additional_revenue:,.2f}",
|
|
'localization_cost': f"${localization_cost:,.2f}",
|
|
'payback_period_months': round(payback_months, 1) if payback_months != float('inf') else 'N/A',
|
|
'annual_roi': f"{((monthly_additional_revenue * 12 - localization_cost) / localization_cost * 100):.1f}%" if payback_months != float('inf') else 'Negative',
|
|
'recommendation': self._generate_roi_recommendation(payback_months)
|
|
}
|
|
|
|
def _estimate_translation_cost(self, language: str) -> Dict[str, float]:
|
|
"""Estimate translation cost for a language."""
|
|
# Base cost per word (professional translation)
|
|
base_cost_per_word = 0.12
|
|
|
|
# Language-specific multipliers
|
|
multipliers = {
|
|
'zh-CN': 1.5, # Chinese requires specialist
|
|
'ja-JP': 1.5, # Japanese requires specialist
|
|
'ko-KR': 1.3,
|
|
'ar-SA': 1.4, # Arabic (right-to-left)
|
|
'default': 1.0
|
|
}
|
|
|
|
multiplier = multipliers.get(language, multipliers['default'])
|
|
|
|
# Typical word counts for app store metadata
|
|
typical_word_counts = {
|
|
'title': 5,
|
|
'subtitle': 5,
|
|
'description': 300,
|
|
'keywords': 20,
|
|
'screenshots': 50 # Caption text
|
|
}
|
|
|
|
total_words = sum(typical_word_counts.values())
|
|
estimated_cost = total_words * base_cost_per_word * multiplier
|
|
|
|
return {
|
|
'cost_per_word': base_cost_per_word * multiplier,
|
|
'total_words': total_words,
|
|
'estimated_cost': round(estimated_cost, 2)
|
|
}
|
|
|
|
def _estimate_total_localization_cost(self, markets: List[Dict[str, Any]]) -> str:
|
|
"""Estimate total cost for multiple markets."""
|
|
total = sum(m['estimated_translation_cost']['estimated_cost'] for m in markets)
|
|
return f"${total:,.2f}"
|
|
|
|
def _prioritize_implementation(self, markets: List[Dict[str, Any]]) -> List[Dict[str, str]]:
|
|
"""Create phased implementation plan."""
|
|
phases = []
|
|
|
|
# Phase 1: Top revenue markets
|
|
phase_1 = [m for m in markets[:3]]
|
|
if phase_1:
|
|
phases.append({
|
|
'phase': 'Phase 1 (First 30 days)',
|
|
'markets': ', '.join([m['market'] for m in phase_1]),
|
|
'rationale': 'Highest revenue potential markets'
|
|
})
|
|
|
|
# Phase 2: Remaining tier 1 and top tier 2
|
|
phase_2 = [m for m in markets[3:6]]
|
|
if phase_2:
|
|
phases.append({
|
|
'phase': 'Phase 2 (Days 31-60)',
|
|
'markets': ', '.join([m['market'] for m in phase_2]),
|
|
'rationale': 'Strong revenue markets with good ROI'
|
|
})
|
|
|
|
# Phase 3: Remaining markets
|
|
phase_3 = [m for m in markets[6:]]
|
|
if phase_3:
|
|
phases.append({
|
|
'phase': 'Phase 3 (Days 61-90)',
|
|
'markets': ', '.join([m['market'] for m in phase_3]),
|
|
'rationale': 'Complete global coverage'
|
|
})
|
|
|
|
return phases
|
|
|
|
def _get_translation_notes(
|
|
self,
|
|
field: str,
|
|
target_language: str,
|
|
estimated_length: int,
|
|
limit: int
|
|
) -> List[str]:
|
|
"""Get translation-specific notes for field."""
|
|
notes = []
|
|
|
|
if estimated_length > limit:
|
|
notes.append(f"Condensing required - aim for {limit - 10} characters to allow buffer")
|
|
|
|
if field == 'title' and target_language.startswith('zh'):
|
|
notes.append("Chinese characters convey more meaning - may need fewer characters")
|
|
|
|
if field == 'keywords' and target_language.startswith('de'):
|
|
notes.append("German compound words may be longer - prioritize shorter keywords")
|
|
|
|
return notes
|
|
|
|
def _generate_translation_recommendations(
|
|
self,
|
|
target_language: str,
|
|
warnings: List[str]
|
|
) -> List[str]:
|
|
"""Generate translation recommendations."""
|
|
recommendations = [
|
|
"Use professional native speakers for translation",
|
|
"Test translations with local users before finalizing"
|
|
]
|
|
|
|
if warnings:
|
|
recommendations.append("Work with translator to condense text while preserving meaning")
|
|
|
|
if target_language.startswith('zh') or target_language.startswith('ja'):
|
|
recommendations.append("Consider cultural context and local idioms")
|
|
|
|
return recommendations
|
|
|
|
def _get_cultural_keyword_considerations(self, target_market: str) -> Dict[str, List[str]]:
|
|
"""Get cultural considerations for keywords by market."""
|
|
# Simplified example - real implementation would be more comprehensive
|
|
considerations = {
|
|
'China': ['Avoid politically sensitive terms', 'Consider local alternatives to blocked services'],
|
|
'Japan': ['Honorific language important', 'Technical terms often use katakana'],
|
|
'Germany': ['Privacy and security terms resonate', 'Efficiency and quality valued'],
|
|
'France': ['French language protection laws', 'Prefer French terms over English'],
|
|
'default': ['Research local search behavior', 'Test with native speakers']
|
|
}
|
|
|
|
return considerations.get(target_market, considerations['default'])
|
|
|
|
def _get_search_patterns(self, target_market: str) -> List[str]:
|
|
"""Get search pattern notes for market."""
|
|
patterns = {
|
|
'China': ['Use both simplified characters and romanization', 'Brand names often romanized'],
|
|
'Japan': ['Mix of kanji, hiragana, and katakana', 'English words common in tech'],
|
|
'Germany': ['Compound words common', 'Specific technical terminology'],
|
|
'default': ['Research local search trends', 'Monitor competitor keywords']
|
|
}
|
|
|
|
return patterns.get(target_market, patterns['default'])
|
|
|
|
def _determine_adaptation_strategy(self, keyword: str, target_market: str) -> str:
|
|
"""Determine how to adapt keyword for market."""
|
|
# Simplified logic
|
|
if target_market in ['China', 'Japan', 'Korea']:
|
|
return 'full_localization' # Complete translation needed
|
|
elif target_market in ['Germany', 'France', 'Spain']:
|
|
return 'adapt_and_translate' # Some adaptation needed
|
|
else:
|
|
return 'direct_translation' # Direct translation usually sufficient
|
|
|
|
def _check_translation_quality(
|
|
self,
|
|
translated_metadata: Dict[str, str],
|
|
target_language: str
|
|
) -> List[str]:
|
|
"""Basic quality checks for translations."""
|
|
issues = []
|
|
|
|
# Check for untranslated placeholders
|
|
for field, text in translated_metadata.items():
|
|
if '[' in text or '{' in text or 'TODO' in text.upper():
|
|
issues.append(f"{field} contains placeholder text")
|
|
|
|
# Check for excessive punctuation
|
|
for field, text in translated_metadata.items():
|
|
if text.count('!') > 3:
|
|
issues.append(f"{field} has excessive exclamation marks")
|
|
|
|
return issues
|
|
|
|
def _generate_roi_recommendation(self, payback_months: float) -> str:
|
|
"""Generate ROI recommendation."""
|
|
if payback_months <= 3:
|
|
return "Excellent ROI - proceed immediately"
|
|
elif payback_months <= 6:
|
|
return "Good ROI - recommended investment"
|
|
elif payback_months <= 12:
|
|
return "Moderate ROI - consider if strategic market"
|
|
else:
|
|
return "Low ROI - reconsider or focus on higher-priority markets first"
|
|
|
|
|
|
def plan_localization_strategy(
|
|
current_market: str,
|
|
budget_level: str,
|
|
monthly_downloads: int
|
|
) -> Dict[str, Any]:
|
|
"""
|
|
Convenience function to plan localization strategy.
|
|
|
|
Args:
|
|
current_market: Current market code
|
|
budget_level: Budget level
|
|
monthly_downloads: Current monthly downloads
|
|
|
|
Returns:
|
|
Complete localization plan
|
|
"""
|
|
helper = LocalizationHelper()
|
|
|
|
target_markets = helper.identify_target_markets(
|
|
current_market=current_market,
|
|
budget_level=budget_level
|
|
)
|
|
|
|
# Extract market codes
|
|
market_codes = [m['language'] for m in target_markets['recommended_markets']]
|
|
|
|
# Calculate ROI
|
|
estimated_cost = float(target_markets['estimated_cost'].replace('$', '').replace(',', ''))
|
|
|
|
roi_analysis = helper.calculate_localization_roi(
|
|
market_codes,
|
|
monthly_downloads,
|
|
estimated_cost
|
|
)
|
|
|
|
return {
|
|
'target_markets': target_markets,
|
|
'roi_analysis': roi_analysis
|
|
}
|