User:2 B/Debatably useful stuff: Difference between revisions

Revision as of 13:49, January 19, 2025

Python script to clean up files from the Pikmin 3 text dump

# File from the text dump
filename = "Strings/Clean/Pikmin 3/USEng/Chat.json"

# Change these for your purposes
textbox_separator = "\n---\n"
include_pauses = False
include_sound = False
include_autoadvance = True
include_lookat = True
include_textsize = True
include_variables = False
include_ruby = True



def clean_text(lines):
    characters = ("Alph", "Brittany", "Charlie", "Louie", "Olimar", "S.S. Drake")
    emotes = ("neutral", "shocked", "happy", "sad", "mad")
    colors = {
        0x01: "orange", 0x02: "#AA0", 0x03: "cyan",
        # pikmin colors
        0x0b: "red", 0x0e: "#AA0", 0x11: "blue", 0x14: "grey", 0x17: "pink",
        # leader colors
        0x1a: "lightblue", 0x1d: "pink", 0x20: "lime"}
    buttons = [
        '\\00', '\\01', '\\02', '\\03', '\\04', '\\05', '\\06', '\\07', '\\08', '\\09',
        'photo flash', 'photo',
        '\\0c', '\\0d', '\\0e', '\\0f', '\\10', '\\11', '\\12', '\\13',
        'roll right',
        '\\15',
        'roll left', 'photo zoom', 'use onion', 'move', 'throw', 'whistle', 'dismiss', 'change type',
        'reset camera', 'lock-on', 'charge', 'switch leader', 'spray',
        '\\23', '\\24', '\\25', '\\26', '\\27', '\\28', '\\29', '\\2a', '\\2b',
        'pluck', 'cancel',
        '\\2e',
        'shake',
    ]

    text = "\x00".join(lines)
    out = ""
    i = 0
    color = False
    while i < len(text):
        if text[i] == '\x0e':
            if text[i : i + 3] == '\x0e\x00\x00':
                assert(ord(text[i+3]) == ord(text[i+5]) + 4)
                furigana_count = ord(text[i+3]) // 2 - 2
                kanji_count = ord(text[i+4]) // 2
                kanji = text[i + 6 + furigana_count : i + 6 + furigana_count + kanji_count]
                if include_ruby:
                    furigana = text[i + 6 : i + 6 + furigana_count]
                    out += f"<ruby>{kanji}<rt>{furigana}</rt></ruby>"
                else:
                    out += kanji
                i += 6 + furigana_count + kanji_count
                continue

            if text[i:i+4] == "\x0e\x00\x02\x02":
                if include_textsize:
                    out += f"[text size: {hex(ord(text[i+4]))}]"
                i += 5
                continue

            if text[i:i+4] == "\x0e\x00\x03\x02":
                if text[i+4] == '\uffff':
                    # out += "[white]"
                    if color:
                        out += f"}}}}"
                    color = False
                elif ord(text[i+4]) in colors:
                    # out += "[orange]"
                    if color:
                        out += f"}}}}"
                    out += f"{{{{color|2={colors[ord(text[i+4])]}|"
                    color = True
                else:
                    out += f"[unknown color: {hex(ord(text[i+4]))}]"
                i += 5
                continue

            if text[i : i + 4] == '\x0e\x00\x04\x00':
                out += textbox_separator
                i += 4
                continue

            if text[i : i + 4] == '\x0e\x07\x01\x02':
                detail = ord(text[i + 4])
                assert(detail & 0x00FF == 0xcd)
                button_id = (detail & 0xFF00) >> 8
                out += f"[button: {buttons[button_id] if button_id < len(buttons) else hex(button_id)}]"
                i += 5
                continue

            if text[i : i + 2] == "\x0e\x01":
                if include_variables:
                    out += f"[variable: {hex(ord(text[i+2]))}, {hex(ord(text[i+3]))}, {hex(ord(text[i+4]))}, \""
                i += 5
                while text[i] != "\u0100":
                    if include_variables:
                        out += text[i]
                    i += 1
                if include_variables:
                    out += f"\"]"
                else:
                    out += "_"
                i += 2
                continue

            if text[i : i + 4] == '\x0e\x15\x00\x02':
                if include_sound:
                    detail = ord(text[i + 4])
                    high = (detail & 0xFF00) >> 8
                    low = detail & 0x00FF
                    character = characters[high]
                    out += f"[{character} thinking sound]"

                i += 5
                continue

            if text[i : i + 4] == '\x0e\x15\x01\x02':
                if include_lookat:
                    detail = ord(text[i + 4])
                    high = (detail & 0xFF00) >> 8
                    low = detail & 0x00FF
                    character_name = characters[high]
                    target = characters[low-1]
                    out += f"[look at: {character_name} -> {target}]"

                i += 5
                continue
            
            if text[i : i + 4] == '\x0e\x17\x03\x00':
                if include_autoadvance:
                    out += f"[advance automatically]"
                i += 4
                continue

            if text[i : i + 4] == '\x0e\x17\x01\x02':
                if include_pauses:
                    detail = ord(text[i + 4])
                    out += f"[pause: {detail}]"
                i += 5
                continue

            if text[i : i + 4] == '\x0e\x17\x02\x02':
                icon = ord(text[i + 4])
                high = (icon & 0xFF00) >> 8
                low = icon & 0x00FF

                if low == 0x19:
                    character_name = characters[high]  
                    out += f"{character_name} (no icon): "
                elif low == 0x1a:
                    character_name = characters[high]  
                    out += f"{{{{icon|S.S. Drake}}}} "
                    if high != 5:
                        out += f"(speaker mismatch, should be {characters[high]}) "
                else:
                    character_name = characters[low % 5]   
                    emote = "neutral" if high == 5 else emotes[low // 5]
                    out += f"{{{{icon|{character_name}|v={emote}}}}} "
                    if high != low % 5:
                        out += f"(speaker mismatch, should be {characters[high]}) "

                i += 5
                continue

            if text[i : i + 3] == '\x0e\x17\x04':
                assert(ord(text[i+3]) == ord(text[i+4]) + 4)
                if include_sound:
                    out += f"[sound: {(ord(text[i+4]))}, \""
                i += 5
                while ord(text[i]) & 0x00FF != 0xcd:
                    if include_sound:
                        out += text[i]
                    i += 1
                if include_sound:
                    out += "\"]"
                i += 1
                continue
            
            out += f"[0xe{hex(ord(text[i+1]))}{hex(ord(text[i+2]))}{hex(ord(text[i+3]))}]"
            i += 4
            continue
        
        out += text[i]
        i += 1
    return out


with open(filename, 'r') as f:
    data = eval(f.read())
    strings = data["strings"]
    for (name, lines) in strings.items():
        print("==" + name + "==\n")
        print(clean_text(lines) + "\n\n")
User:2 B/Debatably useful stuff: Difference between revisions

Revision as of 13:49, January 19, 2025

Python script to clean up files from the Pikmin 3 text dump

Navigation menu

Search