Spaces:
Sleeping
Sleeping
| #!/usr/bin/env python3 | |
| """ | |
| CSV to JSON Converter | |
| Converts dialect CSV files from sheets_output/ to JSON format for index.html | |
| """ | |
| import csv | |
| import json | |
| import os | |
| from pathlib import Path | |
| from collections import defaultdict | |
| # Paths | |
| BASE_DIR = Path(__file__).parent.parent | |
| SHEETS_OUTPUT_DIR = BASE_DIR / "sheets_output" | |
| JSON_OUTPUT_DIR = BASE_DIR / "data" / "processed" | |
| def convert_processed_dialects(): | |
| """Convert processed_dialects.csv to JSON format""" | |
| csv_file = SHEETS_OUTPUT_DIR / "processed_dialects.csv" | |
| json_file = JSON_OUTPUT_DIR / "processed_dialects.json" | |
| if not csv_file.exists(): | |
| print(f"β οΈ CSV file not found: {csv_file}") | |
| return False | |
| try: | |
| districts = [] | |
| with open(csv_file, 'r', encoding='utf-8') as f: | |
| reader = csv.DictReader(f) | |
| # Group words by district | |
| district_words = defaultdict(lambda: { | |
| 'name': '', | |
| 'lat': 0, | |
| 'lng': 0, | |
| 'region': '', | |
| 'history': '', | |
| 'words': [] | |
| }) | |
| for row in reader: | |
| district_name = row.get('District', '').strip() | |
| if not district_name: | |
| continue | |
| # Set district metadata (from first occurrence) | |
| if not district_words[district_name]['name']: | |
| district_words[district_name]['name'] = district_name | |
| district_words[district_name]['lat'] = float(row.get('Latitude', 0)) | |
| district_words[district_name]['lng'] = float(row.get('Longitude', 0)) | |
| district_words[district_name]['region'] = row.get('Region', '') | |
| district_words[district_name]['history'] = row.get('History', '') | |
| # Add word entry | |
| word_entry = { | |
| 't': row.get('Telugu_Word', ''), | |
| 'm': row.get('Meaning', ''), | |
| 's': row.get('Source', '') | |
| } | |
| if word_entry['t']: # Only add if Telugu word exists | |
| district_words[district_name]['words'].append(word_entry) | |
| # Convert to list | |
| districts = list(district_words.values()) | |
| # Write JSON | |
| JSON_OUTPUT_DIR.mkdir(parents=True, exist_ok=True) | |
| with open(json_file, 'w', encoding='utf-8') as f: | |
| json.dump(districts, f, ensure_ascii=False, indent=2) | |
| print(f"β Converted {csv_file.name} β {json_file.name}") | |
| print(f" {len(districts)} districts, {sum(len(d['words']) for d in districts)} words") | |
| return True | |
| except Exception as e: | |
| print(f"β Error converting processed_dialects.csv: {e}") | |
| return False | |
| def convert_digiwords_grouped(): | |
| """Convert digiwords_grouped.csv to JSON format""" | |
| csv_file = SHEETS_OUTPUT_DIR / "digiwords_grouped.csv" | |
| json_file = JSON_OUTPUT_DIR / "digiwords_grouped.json" | |
| if not csv_file.exists(): | |
| print(f"β οΈ CSV file not found: {csv_file}") | |
| return False | |
| try: | |
| data = { | |
| "Telangana": defaultdict(list), | |
| "Andhra Pradesh": defaultdict(list) | |
| } | |
| with open(csv_file, 'r', encoding='utf-8') as f: | |
| reader = csv.DictReader(f) | |
| for row in reader: | |
| state = row.get('State', '').strip() | |
| district = row.get('District', '').strip() | |
| if not state or not district: | |
| continue | |
| word_entry = { | |
| 't': row.get('Telugu_Word', ''), | |
| 'm': row.get('Meaning', ''), | |
| 's': row.get('Source', 'Crowd') | |
| } | |
| if word_entry['t'] and state in data: | |
| data[state][district].append(word_entry) | |
| # Convert defaultdict to regular dict | |
| output = { | |
| state: dict(districts) | |
| for state, districts in data.items() | |
| } | |
| # Write JSON | |
| JSON_OUTPUT_DIR.mkdir(parents=True, exist_ok=True) | |
| with open(json_file, 'w', encoding='utf-8') as f: | |
| json.dump(output, f, ensure_ascii=False, indent=2) | |
| tg_count = sum(len(words) for words in output.get("Telangana", {}).values()) | |
| ap_count = sum(len(words) for words in output.get("Andhra Pradesh", {}).values()) | |
| print(f"β Converted {csv_file.name} β {json_file.name}") | |
| print(f" Telangana: {len(output.get('Telangana', {}))} districts, {tg_count} words") | |
| print(f" Andhra Pradesh: {len(output.get('Andhra Pradesh', {}))} districts, {ap_count} words") | |
| return True | |
| except Exception as e: | |
| print(f"β Error converting digiwords_grouped.csv: {e}") | |
| return False | |
| def main(): | |
| """Convert all CSV files to JSON""" | |
| print("π Starting CSV to JSON conversion...") | |
| print(f"π Input: {SHEETS_OUTPUT_DIR}") | |
| print(f"π Output: {JSON_OUTPUT_DIR}\n") | |
| success_count = 0 | |
| if convert_processed_dialects(): | |
| success_count += 1 | |
| if convert_digiwords_grouped(): | |
| success_count += 1 | |
| print(f"\n⨠Conversion complete: {success_count}/2 files successfully converted") | |
| if __name__ == "__main__": | |
| main() | |