User:2 B/Debatably useful stuff: Difference between revisions
Jump to navigation
Jump to search
No edit summary |
|||
Line 1: | Line 1: | ||
==Python script to clean up files from the ''Pikmin 3'' text dump== | ==Python script to clean up files from the ''Pikmin 3'' text dump== | ||
<nowiki># | <nowiki># Change file here | ||
filename = "Strings/Raw/Pikmin 3/EU ROM/EUEng/Chat.msbt" | filename = "Strings/Raw/Pikmin 3/EU ROM/EUEng/Chat.msbt" | ||
Revision as of 10:55, January 21, 2025
Python script to clean up files from the Pikmin 3 text dump
# Change file here filename = "Strings/Raw/Pikmin 3/EU ROM/EUEng/Chat.msbt" # Change these for your purposes textbox_separator = "\n<br>\n" include_pauses = False include_sound = False include_autoadvance = False include_lookat = False include_textsize = True include_variables = False include_ruby = True # Change text colors here colors = { # emphasis 0x01: "orange", 0x02: "#AA0", 0x03: "cyan", 0x26: "goldenrod", # pikmin colors 0x0b: "red", 0x0e: "#AA0", 0x11: "blue", 0x14: "grey", 0x17: "pink", # leader colors 0x1a: "lightblue", 0x1d: "pink", 0x20: "lime", # ultra-spicy 0x25: "orange",} def decode(bytes, byte_order = "little"): if byte_order == "big": return bytes.decode("utf-16-be") else: return bytes.decode("utf-16-le") def clean_text(text, byte_order): characters = ("Alph", "Brittany", "Charlie", "Louie", "Olimar", "S.S. Drake", "President") emotes = ("neutral", "shocked", "happy", "sad", "mad") buttons = [ '\\00', '\\01', '\\02', '\\03', '\\04', '\\05', '\\06', '\\07', '\\08', '\\09', 'photo flash', 'photo', '\\0c', '\\0d', '\\0e', '\\0f', '\\10', '\\11', '\\12', '\\13', 'roll right', '\\15', 'roll left', 'photo zoom', 'use onion', 'move', 'throw', 'whistle', 'dismiss', 'change type', 'reset camera', 'lock-on', 'charge', 'switch leader', 'spray', '\\23', '\\24', '\\25', '\\26', '\\27', '\\28', '\\29', '\\2a', '\\2b', 'pluck', 'cancel', '\\2e', 'shake', ] out = "" i = 0 color = False while i < len(text): tmp = decode(text[i:i+2], byte_order) if decode(text[i:i+2], byte_order) == '\x0e': i += 2 tag = decode(text[i:i+4], byte_order) extra_size = int.from_bytes(text[i+4:i+6], byte_order) i += 6 if tag == '\x00\x00': if include_ruby: tmp = text[i:i+4] kanji_count = int.from_bytes(text[i:i+2], byte_order) furigana_count = int.from_bytes(text[i+2:i+4], byte_order) furigana = text[i+4 : i+4 + furigana_count] kanji = text[i+4 + furigana_count : i+4 + furigana_count + kanji_count] out += f"<ruby>{decode(kanji, byte_order)}<rt>{decode(furigana, byte_order)}</rt></ruby>" i += kanji_count i += extra_size continue if tag == '\x00\x02': if include_textsize: out += f"[text size: {int.from_bytes(text[i], byte_order)}]" i += extra_size continue if tag == '\x00\x03': if text[i:i+2] == b'\xff\xff': # out += "[white]" if color: out += f"}}}}" color = False elif int.from_bytes(text[i:i+2], byte_order) in colors: # out += "[orange]" if color: out += f"}}}}" out += f"{{{{color|2={colors[int.from_bytes(text[i:i+2], byte_order)]}|" color = True else: out += f"[unknown color: {hex(int.from_bytes(text[i:i+2], byte_order))}]" i += extra_size continue if tag == '\x00\x04': out += textbox_separator i += extra_size continue if tag[0] == '\x01': name_len = int.from_bytes(text[i:i+2], byte_order) var_name = decode(text[i+2 : i+2 + name_len], byte_order) assert(text[i+2 + name_len : i+2 + name_len + 4] == b"\x01\x00\x00\x00") if include_variables: out += f"[variable: {hex(int.from_bytes(text[i-4:i-2]))} \"{var_name}\"]" else: out += "_" i += extra_size continue if tag == '\x07\x01': assert(text[i+1] == 0xcd) button_id = text[i] button_name = hex(button_id) if button_id < len(buttons): button_name = buttons[button_id] out += f"[button: {button_name}]" i += extra_size continue if tag == '\x15\x00': if include_sound: detail = int.from_bytes(text[i:i+2]) high = detail >> 8 low = detail & 0x00FF character = characters[high] out += f"[{character} thinking sound]" i += extra_size continue if tag == '\x15\x01': if include_lookat: detail = int.from_bytes(text[i:i+2]) character_name = characters[detail >> 8] target = characters[detail & 0x00FF] out += f"[look at: {character_name} -> {target}]" i += extra_size continue if tag == '\x17\x00': name_len = int.from_bytes(text[i:i+2], byte_order) name = text[i+2 : i+2 + name_len] out += f"[no text: \"{decode(name, byte_order)}\"]" i += extra_size continue if tag == '\x17\x01': if include_pauses: out += f"[pause: {int.from_bytes(text[i:i+2], byte_order)}]" i += extra_size continue if tag == '\x17\x02': detail = int.from_bytes(text[i:i+2]) high = detail >> 8 low = detail & 0x00FF if low == 0x19: character_name = characters[high] out += f"{character_name} (no icon): " elif low == 0x1a: character_name = characters[high] out += f"{{{{icon|S.S. Drake}}}} " if high != 5: out += f"(speaker mismatch, should be {characters[high]}) " else: if 0x1f <= low <= 0x23 : character_id = 6 emote = emotes[low - 0x1f] else: character_id = low % 5 emote = "neutral" if high == 5 else emotes[low // 5] out += f"{{{{icon|{characters[character_id]}|v={emote}}}}} " if high != character_id: out += f"(speaker mismatch, should be {characters[high]}) " i += extra_size continue if tag == '\x17\x03': if include_autoadvance: out += f"[advance automatically]" i += extra_size continue if tag == '\x17\x04': name_len = int.from_bytes(text[i:i+2], byte_order) sound_name = decode(text[i+2 : i+2 + name_len], byte_order) if include_sound: out += f"[sound: \"{sound_name}\", {text[i+2 + name_len]}]" i += extra_size continue out += f"[unknown tag: {hex(ord(tag[0]))}, {hex(ord(tag[1]))}: \"{text[i+4:i+extra_size]}\"]" i += extra_size continue out += decode(text[i:i+2], byte_order) i += 2 return out with open(filename, 'rb') as f: text = f.read() byte_order_mark = text[8:10] byte_order = "big" if byte_order_mark == b'\xfe\xff' else "little" text = text[0x20:] blocks = {} while len(text) != 0: block_sign = text[:4].decode() text = text[4:] block_size = int.from_bytes(text[:4], byte_order) text = text[4:] text = text[8:] blocks[block_sign] = text[:block_size] text = text[block_size:] if len(text) == 0: break while text[0] == 0xab: text = text[1:] if len(text) == 0: break block = blocks["LBL1"] label_group_count = int.from_bytes(block[:4], byte_order) labels = [] for group_id in range(label_group_count): label_count = int.from_bytes(block[4 + 8*group_id : 4 + 8*group_id + 4], byte_order) offset = int.from_bytes(block[4 + 8*group_id + 4 : 4 + 8*group_id + 8], byte_order) i = offset for label_id in range(label_count): label_size = int.from_bytes(block[i:i+1], byte_order) i += 1 try: label = block[i : i + label_size].decode() i += label_size labels.append((label, int.from_bytes(block[i:i+4], byte_order))) i += 4 except UnicodeDecodeError: i += 4 block = blocks["TXT2"] message_count = int.from_bytes(block[:4], byte_order) message_offsets = [] for message_id in range(message_count): offset = int.from_bytes(block[4 + 4*message_id : 4 + 4*message_id + 4], byte_order) message_offsets.append(offset) message_offsets.append(len(block)) messages = {} for label, message_id in labels: if message_id > message_count: continue start_offset = message_offsets[message_id] end_offset = len(block) for offset in message_offsets: if offset > start_offset and offset < end_offset: end_offset = offset raw_message = block[start_offset:end_offset] message = "" for i in range(0,len(raw_message),2): message += chr(int.from_bytes(raw_message[i:i+2], byte_order)) messages[label] = clean_text(raw_message, byte_order)[:-1] sorted_labels = sorted(messages.keys()) for label in sorted_labels: print(f"=={label}==\n{messages[label]}\n")