#!/usr/bin/env python3 """ CSV to JSON Converter Converts dialect CSV files from sheets_output/ to JSON format for index.html """ import csv import json import os from pathlib import Path from collections import defaultdict # Paths BASE_DIR = Path(__file__).parent.parent SHEETS_OUTPUT_DIR = BASE_DIR / "sheets_output" JSON_OUTPUT_DIR = BASE_DIR / "data" / "processed" def convert_processed_dialects(): """Convert processed_dialects.csv to JSON format""" csv_file = SHEETS_OUTPUT_DIR / "processed_dialects.csv" json_file = JSON_OUTPUT_DIR / "processed_dialects.json" if not csv_file.exists(): print(f"⚠️ CSV file not found: {csv_file}") return False try: districts = [] with open(csv_file, 'r', encoding='utf-8') as f: reader = csv.DictReader(f) # Group words by district district_words = defaultdict(lambda: { 'name': '', 'lat': 0, 'lng': 0, 'region': '', 'history': '', 'words': [] }) for row in reader: district_name = row.get('District', '').strip() if not district_name: continue # Set district metadata (from first occurrence) if not district_words[district_name]['name']: district_words[district_name]['name'] = district_name district_words[district_name]['lat'] = float(row.get('Latitude', 0)) district_words[district_name]['lng'] = float(row.get('Longitude', 0)) district_words[district_name]['region'] = row.get('Region', '') district_words[district_name]['history'] = row.get('History', '') # Add word entry word_entry = { 't': row.get('Telugu_Word', ''), 'm': row.get('Meaning', ''), 's': row.get('Source', '') } if word_entry['t']: # Only add if Telugu word exists district_words[district_name]['words'].append(word_entry) # Convert to list districts = list(district_words.values()) # Write JSON JSON_OUTPUT_DIR.mkdir(parents=True, exist_ok=True) with open(json_file, 'w', encoding='utf-8') as f: json.dump(districts, f, ensure_ascii=False, indent=2) print(f"✅ Converted {csv_file.name} → {json_file.name}") print(f" {len(districts)} districts, {sum(len(d['words']) for d in districts)} words") return True except Exception as e: print(f"❌ Error converting processed_dialects.csv: {e}") return False def convert_digiwords_grouped(): """Convert digiwords_grouped.csv to JSON format""" csv_file = SHEETS_OUTPUT_DIR / "digiwords_grouped.csv" json_file = JSON_OUTPUT_DIR / "digiwords_grouped.json" if not csv_file.exists(): print(f"⚠️ CSV file not found: {csv_file}") return False try: data = { "Telangana": defaultdict(list), "Andhra Pradesh": defaultdict(list) } with open(csv_file, 'r', encoding='utf-8') as f: reader = csv.DictReader(f) for row in reader: state = row.get('State', '').strip() district = row.get('District', '').strip() if not state or not district: continue word_entry = { 't': row.get('Telugu_Word', ''), 'm': row.get('Meaning', ''), 's': row.get('Source', 'Crowd') } if word_entry['t'] and state in data: data[state][district].append(word_entry) # Convert defaultdict to regular dict output = { state: dict(districts) for state, districts in data.items() } # Write JSON JSON_OUTPUT_DIR.mkdir(parents=True, exist_ok=True) with open(json_file, 'w', encoding='utf-8') as f: json.dump(output, f, ensure_ascii=False, indent=2) tg_count = sum(len(words) for words in output.get("Telangana", {}).values()) ap_count = sum(len(words) for words in output.get("Andhra Pradesh", {}).values()) print(f"✅ Converted {csv_file.name} → {json_file.name}") print(f" Telangana: {len(output.get('Telangana', {}))} districts, {tg_count} words") print(f" Andhra Pradesh: {len(output.get('Andhra Pradesh', {}))} districts, {ap_count} words") return True except Exception as e: print(f"❌ Error converting digiwords_grouped.csv: {e}") return False def main(): """Convert all CSV files to JSON""" print("🔄 Starting CSV to JSON conversion...") print(f"📂 Input: {SHEETS_OUTPUT_DIR}") print(f"📂 Output: {JSON_OUTPUT_DIR}\n") success_count = 0 if convert_processed_dialects(): success_count += 1 if convert_digiwords_grouped(): success_count += 1 print(f"\n✨ Conversion complete: {success_count}/2 files successfully converted") if __name__ == "__main__": main()