mirror of
https://github.com/signalapp/Signal-Android.git
synced 2025-12-05 01:10:48 +00:00
367 lines
13 KiB
Python
Executable File
367 lines
13 KiB
Python
Executable File
#! /usr/bin/env python3
|
|
|
|
import sys
|
|
import re
|
|
import logging
|
|
from xml.etree.ElementTree import Element
|
|
from zipfile import ZipFile
|
|
import xml.etree.ElementTree as ET
|
|
from dataclasses import dataclass
|
|
from typing import Optional
|
|
from collections import defaultdict
|
|
|
|
from androguard.core import axml
|
|
from loguru import logger
|
|
|
|
from util import deep_compare, format_differences
|
|
from tqdm import tqdm
|
|
|
|
logging.getLogger("deepdiff").setLevel(logging.ERROR)
|
|
|
|
logger.disable("androguard")
|
|
|
|
|
|
@dataclass
|
|
class XmlDifference:
|
|
"""Represents a difference between two XML elements."""
|
|
|
|
diff_type: str # "tag", "attribute", "text", "child_count"
|
|
path: str
|
|
attribute_name: Optional[str] = None
|
|
first_value: Optional[str] = None
|
|
second_value: Optional[str] = None
|
|
child_tag: Optional[str] = None
|
|
|
|
|
|
IGNORE_FILES = [
|
|
# Related to app signing. Not expected to be present in unsigned builds. Doesn"t affect app code.
|
|
"META-INF/MANIFEST.MF",
|
|
"META-INF/CERTIFIC.SF",
|
|
"META-INF/CERTIFIC.RSA",
|
|
"META-INF/TEXTSECU.SF",
|
|
"META-INF/TEXTSECU.RSA",
|
|
"META-INF/BNDLTOOL.SF",
|
|
"META-INF/BNDLTOOL.RSA",
|
|
"META-INF/code_transparency_signed.jwt",
|
|
"stamp-cert-sha256",
|
|
]
|
|
|
|
ALLOWED_ARSC_DIFF_PATHS = [".res1"]
|
|
|
|
|
|
def compare(apk1, apk2) -> bool:
|
|
print(f"Comparing: \n\t{apk1}\n\t{apk2}\n")
|
|
|
|
print("Unzipping...")
|
|
zip1 = ZipFile(apk1, "r")
|
|
zip2 = ZipFile(apk2, "r")
|
|
|
|
return compare_entry_names(zip1, zip2) and compare_entry_contents(zip1, zip2) == True
|
|
|
|
|
|
def compare_entry_names(zip1: ZipFile, zip2: ZipFile) -> bool:
|
|
print("Comparing zip entry names...")
|
|
name_list_sorted_1 = sorted(zip1.namelist())
|
|
name_list_sorted_2 = sorted(zip2.namelist())
|
|
|
|
for ignoreFile in IGNORE_FILES:
|
|
while ignoreFile in name_list_sorted_1:
|
|
name_list_sorted_1.remove(ignoreFile)
|
|
while ignoreFile in name_list_sorted_2:
|
|
name_list_sorted_2.remove(ignoreFile)
|
|
|
|
if len(name_list_sorted_1) != len(name_list_sorted_2):
|
|
print("Manifest lengths differ!")
|
|
|
|
for entry_name_1, entry_name_2 in zip(name_list_sorted_1, name_list_sorted_2):
|
|
if entry_name_1 != entry_name_2:
|
|
print("Sorted manifests don't match, %s vs %s" % (entry_name_1, entry_name_2))
|
|
return False
|
|
|
|
return True
|
|
|
|
|
|
def compare_entry_contents(zip1: ZipFile, zip2: ZipFile) -> bool:
|
|
print("Comparing zip entry contents...")
|
|
info_list_1 = list(filter(lambda info: info.filename not in IGNORE_FILES, zip1.infolist()))
|
|
info_list_2 = list(filter(lambda info: info.filename not in IGNORE_FILES, zip2.infolist()))
|
|
|
|
if len(info_list_1) != len(info_list_2):
|
|
print("APK info lists of different length!")
|
|
return False
|
|
|
|
success = True
|
|
for entry_info_1 in info_list_1:
|
|
for entry_info_2 in list(info_list_2):
|
|
if entry_info_1.filename == entry_info_2.filename:
|
|
entry_bytes_1 = zip1.read(entry_info_1.filename)
|
|
entry_bytes_2 = zip2.read(entry_info_2.filename)
|
|
|
|
if entry_bytes_1 != entry_bytes_2 and not handle_special_cases(entry_info_1.filename, entry_bytes_1, entry_bytes_2):
|
|
zip1.extract(entry_info_1, "mismatches/first")
|
|
zip2.extract(entry_info_2, "mismatches/second")
|
|
print(f"APKs differ on file {entry_info_1.filename}! Files extracted to the mismatches/ directory.")
|
|
success = False
|
|
|
|
info_list_2.remove(entry_info_2)
|
|
break
|
|
|
|
return success
|
|
|
|
|
|
def handle_special_cases(filename: str, bytes1: bytes, bytes2: bytes):
|
|
"""
|
|
There are some specific files that expect will not be byte-for-byte identical. We want to ensure that the files
|
|
are matching except these expected differences. The differences are all related to extra XML attributes that the
|
|
Play Store may add as part of the bundle process. These differences do not affect the behavior of the app and are
|
|
unfortunately unavoidable given the modern realities of the Play Store.
|
|
"""
|
|
if filename == "AndroidManifest.xml":
|
|
print("Comparing AndroidManifest.xml...")
|
|
return compare_android_xml(bytes1, bytes2)
|
|
elif filename == "resources.arsc":
|
|
print("Comparing resources.arsc (may take a while)...")
|
|
return compare_resources_arsc(bytes1, bytes2)
|
|
elif re.match("res/xml/splits[0-9]+\\.xml", filename):
|
|
print(f"Comparing {filename}...")
|
|
return compare_split_xml(bytes1, bytes2)
|
|
|
|
return False
|
|
|
|
|
|
def compare_android_xml(bytes1: bytes, bytes2: bytes) -> bool:
|
|
all_differences = compare_xml(bytes1, bytes2)
|
|
bad_differences = []
|
|
|
|
for diff in all_differences:
|
|
is_split_attr = diff.diff_type == "attribute" and diff.path in ["manifest", "manifest/application"] and "split" in diff.attribute_name.lower()
|
|
is_meta_attr = diff.diff_type == "attribute" and diff.path == "manifest/application/meta-data"
|
|
is_meta_child_count = diff.diff_type == "child_count" and diff.child_tag == "meta-data"
|
|
|
|
if not is_split_attr and not is_meta_attr and not is_meta_child_count and not is_meta_attr:
|
|
bad_differences.append(diff)
|
|
|
|
if bad_differences:
|
|
print(bad_differences)
|
|
return False
|
|
|
|
return True
|
|
|
|
|
|
def compare_split_xml(bytes1: bytes, bytes2: bytes) -> bool:
|
|
all_differences = compare_xml(bytes1, bytes2)
|
|
bad_differences = []
|
|
|
|
for diff in all_differences:
|
|
is_language = diff.diff_type == "attribute" and diff.path == "splits/module/language/entry"
|
|
|
|
if not is_language:
|
|
bad_differences.append(diff)
|
|
|
|
if bad_differences:
|
|
print(bad_differences)
|
|
return False
|
|
|
|
return True
|
|
|
|
|
|
def compare_resources_arsc(first_entry_bytes: bytes, second_entry_bytes: bytes) -> bool:
|
|
"""
|
|
Compares two resources.arsc files.
|
|
Largely taken from https://github.com/TheTechZone/reproducible-tests/blob/d8c73772b87fbe337eb852e338238c95703d59d6/comparators/arsc_compare.py
|
|
"""
|
|
first_arsc = axml.ARSCParser(first_entry_bytes)
|
|
second_arsc = axml.ARSCParser(second_entry_bytes)
|
|
|
|
all_package_names = sorted(set(first_arsc.packages.keys()) | set(second_arsc.packages.keys()))
|
|
total_diffs = defaultdict(list)
|
|
|
|
success = True
|
|
|
|
for package_name in all_package_names:
|
|
# Check if package exists in both files
|
|
if package_name not in first_arsc.packages:
|
|
print(f"Package only in source file: {package_name}")
|
|
success = False
|
|
continue
|
|
|
|
if package_name not in second_arsc.packages:
|
|
print(f"Package only in target file: {package_name}")
|
|
success = False
|
|
continue
|
|
|
|
packages1 = first_arsc.packages[package_name]
|
|
packages2 = second_arsc.packages[package_name]
|
|
|
|
# Check package length
|
|
if len(packages1) != len(packages2):
|
|
print(f"Package length mismatch: {len(packages1)} vs {len(packages2)}")
|
|
success = False
|
|
continue
|
|
|
|
# Compare each package element
|
|
for i in tqdm(range(len(packages1))):
|
|
pkg1 = packages1[i]
|
|
pkg2 = packages2[i]
|
|
|
|
if type(pkg1) != type(pkg2):
|
|
print(f"Element type mismatch at index {i}: {type(pkg1).__name__} vs {type(pkg2).__name__}")
|
|
success = False
|
|
continue
|
|
|
|
# Different comparison strategies based on type
|
|
if isinstance(pkg1, axml.ARSCResTablePackage):
|
|
diffs = deep_compare(pkg1, pkg2)
|
|
if diffs:
|
|
print(f"Differences in ARSCResTablePackage at index {i}:")
|
|
total_diffs["ARSCResTablePackage"].append((i, diffs))
|
|
success = False
|
|
|
|
elif isinstance(pkg1, axml.StringBlock):
|
|
diffs = deep_compare(pkg1, pkg2)
|
|
if diffs:
|
|
print(f"Differences in StringBlock at index {i}:")
|
|
total_diffs["StringBlock"].append((i, diffs))
|
|
success = False
|
|
|
|
elif isinstance(pkg1, axml.ARSCHeader):
|
|
diffs = deep_compare(pkg1, pkg2)
|
|
if diffs:
|
|
print(f"Differences in ARSCHeader at index {i}:")
|
|
total_diffs["ARSCHeader"].append((i, diffs))
|
|
success = False
|
|
|
|
elif isinstance(pkg1, axml.ARSCResTypeSpec):
|
|
diffs = deep_compare(pkg1, pkg2)
|
|
|
|
if diffs and not all(path in ALLOWED_ARSC_DIFF_PATHS for path in diffs.keys()):
|
|
print(f"Disallowed differences in ARSCResTypeSpec at index {i}:")
|
|
print(format_differences(diffs))
|
|
total_diffs["ARSCResTypeSpec"].append((i, diffs))
|
|
success = False
|
|
|
|
elif isinstance(pkg1, axml.ARSCResTableEntry):
|
|
# Use string representation for comparison
|
|
if pkg1.__repr__() != pkg2.__repr__():
|
|
print(f"Differences in ARSCResTableEntry at index {i}")
|
|
print(f"Target: {pkg1.__repr__()}", 3)
|
|
print(f"Source: {pkg2.__repr__()}", 3)
|
|
total_diffs["ARSCResTableEntry"].append((i, {"representation": f"{pkg1.__repr__()} vs {pkg2.__repr__()}"}))
|
|
success = False
|
|
|
|
elif isinstance(pkg1, list):
|
|
if pkg1 != pkg2:
|
|
print(f"List difference at index {i}")
|
|
total_diffs["list"].append((i, {"diff": "Lists differ"}))
|
|
success = False
|
|
|
|
elif isinstance(pkg1, axml.ARSCResType):
|
|
diffs = deep_compare(pkg1, pkg2)
|
|
if diffs:
|
|
print(f"Differences in ARSCResType at index {i}:")
|
|
total_diffs["ARSCResType"].append((i, diffs))
|
|
success = False
|
|
else:
|
|
# Other types
|
|
print(f"Unhandled type: {type(pkg1).__name__} at index {i}")
|
|
diffs = deep_compare(pkg1, pkg2)
|
|
if diffs:
|
|
total_diffs[type(pkg1).__name__].append((i, diffs))
|
|
success = False
|
|
|
|
for type_name, diffs in total_diffs.items():
|
|
if diffs:
|
|
print(f" {type_name}: {len(diffs)}", 1)
|
|
|
|
if not success:
|
|
print("Files have differences beyond the allowed .res1 differences.")
|
|
return True
|
|
|
|
|
|
def compare_xml(bytes1: bytes, bytes2: bytes) -> list[XmlDifference]:
|
|
printer = axml.AXMLPrinter(bytes1)
|
|
entry_text_1 = printer.get_xml().decode("utf-8")
|
|
|
|
printer = axml.AXMLPrinter(bytes2)
|
|
entry_text_2 = printer.get_xml().decode("utf-8")
|
|
|
|
if entry_text_1 == entry_text_2:
|
|
return True
|
|
|
|
root1 = ET.fromstring(entry_text_1)
|
|
root2 = ET.fromstring(entry_text_2)
|
|
|
|
return compare_xml_elements(root1, root2)
|
|
|
|
|
|
def compare_xml_elements(elem1: Element, elem2: Element, path: str = "") -> list[XmlDifference]:
|
|
"""Recursively compare two XML elements and return list of XmlDifference objects."""
|
|
differences: list[XmlDifference] = []
|
|
|
|
# Build current path
|
|
current_path = f"{path}/{elem1.tag}" if path else elem1.tag
|
|
|
|
# Compare tags
|
|
if elem1.tag != elem2.tag:
|
|
differences.append(XmlDifference(diff_type="tag", path=path, first_value=elem1.tag, second_value=elem2.tag))
|
|
return differences
|
|
|
|
# Compare attributes
|
|
attrs1 = elem1.attrib
|
|
attrs2 = elem2.attrib
|
|
|
|
all_keys = set(attrs1.keys()) | set(attrs2.keys())
|
|
for key in sorted(all_keys):
|
|
val1 = attrs1.get(key)
|
|
val2 = attrs2.get(key)
|
|
|
|
if val1 != val2:
|
|
differences.append(XmlDifference(diff_type="attribute", path=current_path, attribute_name=key, first_value=val1, second_value=val2))
|
|
|
|
# Compare text content
|
|
text1 = (elem1.text or "").strip()
|
|
text2 = (elem2.text or "").strip()
|
|
if text1 != text2:
|
|
differences.append(XmlDifference(diff_type="text", path=current_path, first_value=text1, second_value=text2))
|
|
|
|
# Compare children
|
|
children1 = list(elem1)
|
|
children2 = list(elem2)
|
|
|
|
# Try to match children by tag name for comparison
|
|
children1_by_tag = {}
|
|
for child in children1:
|
|
children1_by_tag.setdefault(child.tag, []).append(child)
|
|
|
|
children2_by_tag = {}
|
|
for child in children2:
|
|
children2_by_tag.setdefault(child.tag, []).append(child)
|
|
|
|
# Compare children with matching tags
|
|
all_child_tags = set(children1_by_tag.keys()) | set(children2_by_tag.keys())
|
|
for tag in sorted(all_child_tags):
|
|
list1 = children1_by_tag.get(tag, [])
|
|
list2 = children2_by_tag.get(tag, [])
|
|
|
|
if len(list1) != len(list2):
|
|
differences.append(XmlDifference(diff_type="child_count", path=current_path, child_tag=tag, first_value=str(len(list1)), second_value=str(len(list2))))
|
|
|
|
# Compare matching elements recursively
|
|
for child1, child2 in zip(list1, list2):
|
|
differences.extend(compare_xml_elements(child1, child2, current_path))
|
|
|
|
return differences
|
|
|
|
|
|
if __name__ == "__main__":
|
|
if len(sys.argv) != 3:
|
|
print("Usage: apkdiff <pathToFirstApk> <pathToSecondApk>")
|
|
sys.exit(1)
|
|
|
|
if compare(sys.argv[1], sys.argv[2]):
|
|
print("APKs match!")
|
|
sys.exit(0)
|
|
else:
|
|
print("APKs don't match!")
|
|
sys.exit(1)
|