User:2 B/Debatably useful stuff: Difference between revisions

From Pikipedia, the Pikmin wiki
Jump to navigation Jump to search
No edit summary
No edit summary
Line 2: Line 2:


  <nowiki># File from the text dump
  <nowiki># File from the text dump
filename = "Strings/Clean/Pikmin 3/USEng/Chat.json"
filename = "Strings/Raw/Pikmin 3/EU ROM/EUEng/Chat.msbt"


# Change these for your purposes
# Change these for your purposes
textbox_separator = "\n---\n"
textbox_separator = "\n<br>\n"
include_pauses = False
include_pauses = False
include_sound = False
include_sound = False
include_autoadvance = True
include_autoadvance = False
include_lookat = True
include_lookat = False
include_textsize = True
include_textsize = True
include_variables = False
include_variables = False
include_ruby = True
include_ruby = True


# Change text colors here
colors = {
    # emphasis
    0x01: "orange", 0x02: "#AA0", 0x03: "cyan", 0x26: "goldenrod",
    # pikmin colors
    0x0b: "red", 0x0e: "#AA0", 0x11: "blue", 0x14: "grey", 0x17: "pink",
    # leader colors
    0x1a: "lightblue", 0x1d: "pink", 0x20: "lime",
    # ultra-spicy
    0x25: "orange",}




def clean_text(lines):
def decode(bytes, byte_order = "little"):
     characters = ("Alph", "Brittany", "Charlie", "Louie", "Olimar", "S.S. Drake")
    if byte_order == "big":
        return bytes.decode("utf-16-be")
    else:
        return bytes.decode("utf-16-le")
 
def clean_text(text, byte_order):
     characters = ("Alph", "Brittany", "Charlie", "Louie", "Olimar", "S.S. Drake", "President")
     emotes = ("neutral", "shocked", "happy", "sad", "mad")
     emotes = ("neutral", "shocked", "happy", "sad", "mad")
    colors = {
        0x01: "orange", 0x02: "#AA0", 0x03: "cyan",
        # pikmin colors
        0x0b: "red", 0x0e: "#AA0", 0x11: "blue", 0x14: "grey", 0x17: "pink",
        # leader colors
        0x1a: "lightblue", 0x1d: "pink", 0x20: "lime"}
     buttons = [
     buttons = [
         '\\00', '\\01', '\\02', '\\03', '\\04', '\\05', '\\06', '\\07', '\\08', '\\09',
         '\\00', '\\01', '\\02', '\\03', '\\04', '\\05', '\\06', '\\07', '\\08', '\\09',
Line 39: Line 49:
     ]
     ]


    text = "\x00".join(lines)
     out = ""
     out = ""
     i = 0
     i = 0
     color = False
     color = False
     while i < len(text):
     while i < len(text):
         if text[i] == '\x0e':
        tmp = decode(text[i:i+2], byte_order)
             i += 1
         if decode(text[i:i+2], byte_order) == '\x0e':
             tag = text[i:i+2]
             i += 2
             extra_size = ord(text[i+2]) // 2
             tag = decode(text[i:i+4], byte_order)
             i += 3
             extra_size = int.from_bytes(text[i+4:i+6], byte_order)
             i += 6
             if tag == '\x00\x00':
             if tag == '\x00\x00':
                 if include_ruby:
                 if include_ruby:
                     kanji_count = ord(text[i]) // 2
                    tmp = text[i:i+4]
                     furigana_count = ord(text[i+1]) // 2
                     kanji_count = int.from_bytes(text[i:i+2], byte_order)
                     furigana = text[i+2 : i+2 + furigana_count]
                     furigana_count = int.from_bytes(text[i+2:i+4], byte_order)
                     kanji = text[i+2 + furigana_count : i+2 + furigana_count + kanji_count]
                     furigana = text[i+4 : i+4 + furigana_count]
                     out += f"<ruby>{kanji}<rt>{furigana}</rt></ruby>"
                     kanji = text[i+4 + furigana_count : i+4 + furigana_count + kanji_count]
                     out += f"<ruby>{decode(kanji, byte_order)}<rt>{decode(furigana, byte_order)}</rt></ruby>"
                     i += kanji_count
                     i += kanji_count
                 i += extra_size
                 i += extra_size
Line 62: Line 73:
             if tag == '\x00\x02':
             if tag == '\x00\x02':
                 if include_textsize:
                 if include_textsize:
                     out += f"[text size: {hex(ord(text[i]))}]"
                     out += f"[text size: {int.from_bytes(text[i], byte_order)}]"
                 i += extra_size
                 i += extra_size
                 continue
                 continue


             if tag == '\x00\x03':
             if tag == '\x00\x03':
                 if text[i] == '\uffff':
                 if text[i:i+2] == b'\xff\xff':
                     # out += "[white]"
                     # out += "[white]"
                     if color:
                     if color:
                         out += f"}}}}"
                         out += f"}}}}"
                     color = False
                     color = False
                 elif ord(text[i]) in colors:
                 elif int.from_bytes(text[i:i+2], byte_order) in colors:
                     # out += "[orange]"
                     # out += "[orange]"
                     if color:
                     if color:
                         out += f"}}}}"
                         out += f"}}}}"
                     out += f"{{{{color|2={colors[ord(text[i])]}|"
                     out += f"{{{{color|2={colors[int.from_bytes(text[i:i+2], byte_order)]}|"
                     color = True
                     color = True
                 else:
                 else:
                     out += f"[unknown color: {hex(ord(text[i]))}]"
                     out += f"[unknown color: {hex(int.from_bytes(text[i:i+2], byte_order))}]"
                 i += extra_size
                 i += extra_size
                 continue
                 continue
Line 89: Line 100:


             if tag[0] == '\x01':
             if tag[0] == '\x01':
                 name_len = ord(text[i]) // 2
                 name_len = int.from_bytes(text[i:i+2], byte_order)
                 var_name = text[i+1 : i+1 + name_len]
                 var_name = decode(text[i+2 : i+2 + name_len], byte_order)
                 assert(text[i+1 + name_len : i+1 + name_len + 2] == "\u0100\u0000")
                 assert(text[i+2 + name_len : i+2 + name_len + 4] == b"\x01\x00\x00\x00")
                 if include_variables:
                 if include_variables:
                     out += f"[variable: {hex(ord(text[i-2]))} \"{var_name}\"]"
                     out += f"[variable: {hex(int.from_bytes(text[i-4:i-2]))} \"{var_name}\"]"
                 else:
                 else:
                     out += "_"
                     out += "_"
Line 100: Line 111:


             if tag == '\x07\x01':
             if tag == '\x07\x01':
                 detail = ord(text[i])
                 assert(text[i+1] == 0xcd)
                 assert(detail & 0x00FF == 0xcd)
                 button_id = text[i]
                 button_id = (detail & 0xFF00) >> 8
                button_name = hex(button_id)
                 out += f"[button: {buttons[button_id] if button_id < len(buttons) else hex(button_id)}]"
                 if button_id < len(buttons):
                    button_name = buttons[button_id]
                 out += f"[button: {button_name}]"
                 i += extra_size
                 i += extra_size
                 continue
                 continue
Line 109: Line 122:
             if tag == '\x15\x00':
             if tag == '\x15\x00':
                 if include_sound:
                 if include_sound:
                     detail = ord(text[i])
                     detail = int.from_bytes(text[i:i+2])
                     high = detail >> 8
                     high = detail >> 8
                     low = detail & 0x00FF
                     low = detail & 0x00FF
Line 119: Line 132:
             if tag == '\x15\x01':
             if tag == '\x15\x01':
                 if include_lookat:
                 if include_lookat:
                     detail = ord(text[i])
                     detail = int.from_bytes(text[i:i+2])
                     character_name = characters[detail >> 8]
                     character_name = characters[detail >> 8]
                     target = characters[detail & 0x00FF]
                     target = characters[detail & 0x00FF]
                     out += f"[look at: {character_name} -> {target}]"
                     out += f"[look at: {character_name} -> {target}]"
                i += extra_size
                continue
            if tag == '\x17\x00':
                name_len = int.from_bytes(text[i:i+2], byte_order)
                name = text[i+2 : i+2 + name_len]
                out += f"[no text: \"{decode(name, byte_order)}\"]"
                 i += extra_size
                 i += extra_size
                 continue
                 continue
Line 128: Line 148:
             if tag == '\x17\x01':
             if tag == '\x17\x01':
                 if include_pauses:
                 if include_pauses:
                     out += f"[pause: {ord(text[i])}]"
                     out += f"[pause: {int.from_bytes(text[i:i+2], byte_order)}]"
                 i += extra_size
                 i += extra_size
                 continue
                 continue


             if tag == '\x17\x02':
             if tag == '\x17\x02':
                 detail = ord(text[i])
                 detail = int.from_bytes(text[i:i+2])
                 high = detail >> 8
                 high = detail >> 8
                 low = detail & 0x00FF
                 low = detail & 0x00FF
Line 145: Line 165:
                         out += f"(speaker mismatch, should be {characters[high]}) "
                         out += f"(speaker mismatch, should be {characters[high]}) "
                 else:
                 else:
                     character_name = characters[low % 5
                     if 0x1f <= low <= 0x23 :
                    emote = "neutral" if high == 5 else emotes[low // 5]
                        character_id = 6
                     out += f"{{{{icon|{character_name}|v={emote}}}}} "
                        emote = emotes[low - 0x1f]
                     if high != low % 5:
                    else:
                        character_id = low % 5  
                        emote = "neutral" if high == 5 else emotes[low // 5]
                     out += f"{{{{icon|{characters[character_id]}|v={emote}}}}} "
                     if high != character_id:
                         out += f"(speaker mismatch, should be {characters[high]}) "
                         out += f"(speaker mismatch, should be {characters[high]}) "


Line 161: Line 185:


             if tag == '\x17\x04':
             if tag == '\x17\x04':
                 name_len = ord(text[i]) // 2
                 name_len = int.from_bytes(text[i:i+2], byte_order)
                 sound_name = text[i+1 : i+1 + name_len]
                 sound_name = decode(text[i+2 : i+2 + name_len], byte_order)
                 if include_sound:
                 if include_sound:
                     out += f"[sound: \"{sound_name}\", {ord(text[i+1 + name_len]) >> 8}]"
                     out += f"[sound: \"{sound_name}\", {text[i+2 + name_len]}]"
                 i += extra_size
                 i += extra_size
                 continue
                 continue
              
              
             out += f"[unknown tag: {ord(tag[0])}, {ord(tag[1])}: \"{text[i:i+extra_size]}\"]"
             out += f"[unknown tag: {hex(ord(tag[0]))}, {hex(ord(tag[1]))}: \"{text[i+4:i+extra_size]}\"]"
             i += extra_size
             i += extra_size
             continue
             continue
          
          
         out += text[i]
         out += decode(text[i:i+2], byte_order)
         i += 1
         i += 2
     return out
     return out




with open(filename, 'r') as f:
 
     data = eval(f.read())
with open(filename, 'rb') as f:
     strings = data["strings"]
     text = f.read()
     for (name, lines) in strings.items():
    byte_order_mark = text[8:10]
        print("==" + name + "==\n")
    byte_order = "big" if byte_order_mark == b'\xfe\xff' else "little"
         print(clean_text(lines) + "\n\n")</nowiki>
    text = text[0x20:]
 
    blocks = {}
 
    while len(text) != 0:
        block_sign = text[:4].decode()
        text = text[4:]
 
        block_size = int.from_bytes(text[:4], byte_order)
        text = text[4:]
        text = text[8:]
        blocks[block_sign] = text[:block_size]
        text = text[block_size:]
 
        if len(text) == 0:
            break
        while text[0] == 0xab:
            text = text[1:]
            if len(text) == 0:
                break
 
 
     block = blocks["LBL1"]
    label_group_count = int.from_bytes(block[:4], byte_order)
    labels = []
     for group_id in range(label_group_count):
        label_count = int.from_bytes(block[4 + 8*group_id : 4 + 8*group_id + 4], byte_order)
        offset = int.from_bytes(block[4 + 8*group_id + 4 : 4 + 8*group_id + 8], byte_order)
        i = offset
        for label_id in range(label_count):
            label_size = int.from_bytes(block[i:i+1], byte_order)
            i += 1
            try:
                label = block[i : i + label_size].decode()
                i += label_size
                labels.append((label, int.from_bytes(block[i:i+4], byte_order)))
                i += 4
            except UnicodeDecodeError:
                i += 4
 
    block = blocks["TXT2"]
    message_count = int.from_bytes(block[:4], byte_order)
    message_offsets = []
    for message_id in range(message_count):
        offset = int.from_bytes(block[4 + 4*message_id : 4 + 4*message_id + 4], byte_order)
        message_offsets.append(offset)
   
    message_offsets.append(len(block))
 
    messages = {}
    for label, message_id in labels:
        if message_id > message_count:
            continue
        start_offset = message_offsets[message_id]
        end_offset = len(block)
        for offset in message_offsets:
            if offset > start_offset and offset < end_offset:
                end_offset = offset
 
        raw_message = block[start_offset:end_offset]
        message = ""
        for i in range(0,len(raw_message),2):
            message += chr(int.from_bytes(raw_message[i:i+2], byte_order))
         messages[label] = clean_text(raw_message, byte_order)[:-1]
 
    sorted_labels = sorted(messages.keys())
 
    for label in sorted_labels:
        print(f"=={label}==\n{messages[label]}\n")  
</nowiki>

Revision as of 10:54, January 21, 2025

Python script to clean up files from the Pikmin 3 text dump

# File from the text dump
filename = "Strings/Raw/Pikmin 3/EU ROM/EUEng/Chat.msbt"

# Change these for your purposes
textbox_separator = "\n<br>\n"
include_pauses = False
include_sound = False
include_autoadvance = False
include_lookat = False
include_textsize = True
include_variables = False
include_ruby = True

# Change text colors here
colors = {
    # emphasis
    0x01: "orange", 0x02: "#AA0", 0x03: "cyan", 0x26: "goldenrod",
    # pikmin colors
    0x0b: "red", 0x0e: "#AA0", 0x11: "blue", 0x14: "grey", 0x17: "pink",
    # leader colors
    0x1a: "lightblue", 0x1d: "pink", 0x20: "lime",
    # ultra-spicy
    0x25: "orange",}


def decode(bytes, byte_order = "little"):
    if byte_order == "big":
        return bytes.decode("utf-16-be")
    else:
        return bytes.decode("utf-16-le")

def clean_text(text, byte_order):
    characters = ("Alph", "Brittany", "Charlie", "Louie", "Olimar", "S.S. Drake", "President")
    emotes = ("neutral", "shocked", "happy", "sad", "mad")
    buttons = [
        '\\00', '\\01', '\\02', '\\03', '\\04', '\\05', '\\06', '\\07', '\\08', '\\09',
        'photo flash', 'photo',
        '\\0c', '\\0d', '\\0e', '\\0f', '\\10', '\\11', '\\12', '\\13',
        'roll right',
        '\\15',
        'roll left', 'photo zoom', 'use onion', 'move', 'throw', 'whistle', 'dismiss', 'change type',
        'reset camera', 'lock-on', 'charge', 'switch leader', 'spray',
        '\\23', '\\24', '\\25', '\\26', '\\27', '\\28', '\\29', '\\2a', '\\2b',
        'pluck', 'cancel',
        '\\2e',
        'shake',
    ]

    out = ""
    i = 0
    color = False
    while i < len(text):
        tmp = decode(text[i:i+2], byte_order)
        if decode(text[i:i+2], byte_order) == '\x0e':
            i += 2
            tag = decode(text[i:i+4], byte_order)
            extra_size = int.from_bytes(text[i+4:i+6], byte_order)
            i += 6
            if tag == '\x00\x00':
                if include_ruby:
                    tmp = text[i:i+4]
                    kanji_count = int.from_bytes(text[i:i+2], byte_order)
                    furigana_count = int.from_bytes(text[i+2:i+4], byte_order)
                    furigana = text[i+4 : i+4 + furigana_count]
                    kanji = text[i+4 + furigana_count : i+4 + furigana_count + kanji_count]
                    out += f"<ruby>{decode(kanji, byte_order)}<rt>{decode(furigana, byte_order)}</rt></ruby>"
                    i += kanji_count
                i += extra_size
                continue

            if tag == '\x00\x02':
                if include_textsize:
                    out += f"[text size: {int.from_bytes(text[i], byte_order)}]"
                i += extra_size
                continue

            if tag == '\x00\x03':
                if text[i:i+2] == b'\xff\xff':
                    # out += "[white]"
                    if color:
                        out += f"}}}}"
                    color = False
                elif int.from_bytes(text[i:i+2], byte_order) in colors:
                    # out += "[orange]"
                    if color:
                        out += f"}}}}"
                    out += f"{{{{color|2={colors[int.from_bytes(text[i:i+2], byte_order)]}|"
                    color = True
                else:
                    out += f"[unknown color: {hex(int.from_bytes(text[i:i+2], byte_order))}]"
                i += extra_size
                continue

            if tag == '\x00\x04':
                out += textbox_separator
                i += extra_size
                continue

            if tag[0] == '\x01':
                name_len = int.from_bytes(text[i:i+2], byte_order)
                var_name = decode(text[i+2 : i+2 + name_len], byte_order)
                assert(text[i+2 + name_len : i+2 + name_len + 4] == b"\x01\x00\x00\x00")
                if include_variables:
                    out += f"[variable: {hex(int.from_bytes(text[i-4:i-2]))} \"{var_name}\"]"
                else:
                    out += "_"
                i += extra_size
                continue

            if tag == '\x07\x01':
                assert(text[i+1] == 0xcd)
                button_id = text[i]
                button_name = hex(button_id)
                if button_id < len(buttons):
                    button_name = buttons[button_id]
                out += f"[button: {button_name}]"
                i += extra_size
                continue

            if tag == '\x15\x00':
                if include_sound:
                    detail = int.from_bytes(text[i:i+2])
                    high = detail >> 8
                    low = detail & 0x00FF
                    character = characters[high]
                    out += f"[{character} thinking sound]"
                i += extra_size
                continue

            if tag == '\x15\x01':
                if include_lookat:
                    detail = int.from_bytes(text[i:i+2])
                    character_name = characters[detail >> 8]
                    target = characters[detail & 0x00FF]
                    out += f"[look at: {character_name} -> {target}]"
                i += extra_size
                continue

            if tag == '\x17\x00':
                name_len = int.from_bytes(text[i:i+2], byte_order)
                name = text[i+2 : i+2 + name_len]
                out += f"[no text: \"{decode(name, byte_order)}\"]"
                i += extra_size
                continue

            if tag == '\x17\x01':
                if include_pauses:
                    out += f"[pause: {int.from_bytes(text[i:i+2], byte_order)}]"
                i += extra_size
                continue

            if tag == '\x17\x02':
                detail = int.from_bytes(text[i:i+2])
                high = detail >> 8
                low = detail & 0x00FF
                if low == 0x19:
                    character_name = characters[high]  
                    out += f"{character_name} (no icon): "
                elif low == 0x1a:
                    character_name = characters[high]  
                    out += f"{{{{icon|S.S. Drake}}}} "
                    if high != 5:
                        out += f"(speaker mismatch, should be {characters[high]}) "
                else:
                    if 0x1f <= low <= 0x23 :
                        character_id = 6
                        emote = emotes[low - 0x1f]
                    else:
                        character_id = low % 5 
                        emote = "neutral" if high == 5 else emotes[low // 5]
                    out += f"{{{{icon|{characters[character_id]}|v={emote}}}}} "
                    if high != character_id:
                        out += f"(speaker mismatch, should be {characters[high]}) "

                i += extra_size
                continue
            
            if tag == '\x17\x03':
                if include_autoadvance:
                    out += f"[advance automatically]"
                i += extra_size
                continue

            if tag == '\x17\x04':
                name_len = int.from_bytes(text[i:i+2], byte_order)
                sound_name = decode(text[i+2 : i+2 + name_len], byte_order)
                if include_sound:
                    out += f"[sound: \"{sound_name}\", {text[i+2 + name_len]}]"
                i += extra_size
                continue
            
            out += f"[unknown tag: {hex(ord(tag[0]))}, {hex(ord(tag[1]))}: \"{text[i+4:i+extra_size]}\"]"
            i += extra_size
            continue
        
        out += decode(text[i:i+2], byte_order)
        i += 2
    return out



with open(filename, 'rb') as f:
    text = f.read()
    byte_order_mark = text[8:10]
    byte_order = "big" if byte_order_mark == b'\xfe\xff' else "little"
    text = text[0x20:]

    blocks = {}

    while len(text) != 0:
        block_sign = text[:4].decode()
        text = text[4:]

        block_size = int.from_bytes(text[:4], byte_order)
        text = text[4:]
        text = text[8:]
        blocks[block_sign] = text[:block_size]
        text = text[block_size:]

        if len(text) == 0:
            break
        while text[0] == 0xab:
            text = text[1:]
            if len(text) == 0:
                break


    block = blocks["LBL1"]
    label_group_count = int.from_bytes(block[:4], byte_order)
    labels = []
    for group_id in range(label_group_count):
        label_count = int.from_bytes(block[4 + 8*group_id : 4 + 8*group_id + 4], byte_order)
        offset = int.from_bytes(block[4 + 8*group_id + 4 : 4 + 8*group_id + 8], byte_order)
        i = offset
        for label_id in range(label_count):
            label_size = int.from_bytes(block[i:i+1], byte_order)
            i += 1
            try:
                label = block[i : i + label_size].decode()
                i += label_size
                labels.append((label, int.from_bytes(block[i:i+4], byte_order)))
                i += 4
            except UnicodeDecodeError:
                i += 4

    block = blocks["TXT2"]
    message_count = int.from_bytes(block[:4], byte_order)
    message_offsets = []
    for message_id in range(message_count):
        offset = int.from_bytes(block[4 + 4*message_id : 4 + 4*message_id + 4], byte_order)
        message_offsets.append(offset)
    
    message_offsets.append(len(block))

    messages = {}
    for label, message_id in labels:
        if message_id > message_count:
            continue
        start_offset = message_offsets[message_id]
        end_offset = len(block)
        for offset in message_offsets:
            if offset > start_offset and offset < end_offset:
                end_offset = offset

        raw_message = block[start_offset:end_offset]
        message = ""
        for i in range(0,len(raw_message),2):
            message += chr(int.from_bytes(raw_message[i:i+2], byte_order))
        messages[label] = clean_text(raw_message, byte_order)[:-1]

    sorted_labels = sorted(messages.keys())

    for label in sorted_labels:
        print(f"=={label}==\n{messages[label]}\n")