pycodestyle (PEP 8) cleanup in Python scripts

author Peter Eisentraut <peter@eisentraut.org>

Wed, 9 Mar 2022 09:51:41 +0000 (10:51 +0100)

committer Peter Eisentraut <peter@eisentraut.org>

Wed, 9 Mar 2022 09:54:20 +0000 (10:54 +0100)
author Peter Eisentraut <peter@eisentraut.org>
Wed, 9 Mar 2022 09:51:41 +0000 (10:51 +0100)
committer Peter Eisentraut <peter@eisentraut.org>
Wed, 9 Mar 2022 09:54:20 +0000 (10:54 +0100)
diff --git a/contrib/unaccent/generate_unaccent_rules.py b/contrib/unaccent/generate_unaccent_rules.py

index bc667eaf15b59c0486eef0d46ddc58b0ba79eac2..c405e231b39a632038e7a62c3abbd9fee42a7763 100644 (file)
--- a/contrib/unaccent/generate_unaccent_rules.py
+++ b/contrib/unaccent/generate_unaccent_rules.py
@@ -38,10 +38,10 @@ sys.stdout = codecs.getwriter('utf8')(sys.stdout.buffer)
  # For now we are being conservative by including only Latin and Greek.  This
  # could be extended in future based on feedback from people with relevant
  # language knowledge.
-PLAIN_LETTER_RANGES = ((ord('a'), ord('z')), # Latin lower case
-                       (ord('A'), ord('Z')), # Latin upper case
-                       (0x03b1, 0x03c9),     # GREEK SMALL LETTER ALPHA, GREEK SMALL LETTER OMEGA
-                       (0x0391, 0x03a9))     # GREEK CAPITAL LETTER ALPHA, GREEK CAPITAL LETTER OMEGA
+PLAIN_LETTER_RANGES = ((ord('a'), ord('z')),  # Latin lower case
+                       (ord('A'), ord('Z')),  # Latin upper case
+                       (0x03b1, 0x03c9),      # GREEK SMALL LETTER ALPHA, GREEK SMALL LETTER OMEGA
+                       (0x0391, 0x03a9))      # GREEK CAPITAL LETTER ALPHA, GREEK CAPITAL LETTER OMEGA
  
  # Combining marks follow a "base" character, and result in a composite
  # character. Example: "U&'A\0300'"produces "À".There are three types of
@@ -51,9 +51,10 @@ PLAIN_LETTER_RANGES = ((ord('a'), ord('z')), # Latin lower case
  #   https://en.wikipedia.org/wiki/Combining_character
  #   https://www.unicode.org/charts/PDF/U0300.pdf
  #   https://www.unicode.org/charts/PDF/U20D0.pdf
-COMBINING_MARK_RANGES = ((0x0300, 0x0362),  # Mn: Accents, IPA
-                         (0x20dd, 0x20E0),  # Me: Symbols
-                         (0x20e2, 0x20e4),) # Me: Screen, keycap, triangle
+COMBINING_MARK_RANGES = ((0x0300, 0x0362),   # Mn: Accents, IPA
+                         (0x20dd, 0x20E0),   # Me: Symbols
+                         (0x20e2, 0x20e4),)  # Me: Screen, keycap, triangle
+
  
  def print_record(codepoint, letter):
      if letter:
@@ -63,12 +64,14 @@ def print_record(codepoint, letter):
  
      print(output)
  
+
  class Codepoint:
      def __init__(self, id, general_category, combining_ids):
          self.id = id
          self.general_category = general_category
          self.combining_ids = combining_ids
  
+
  def is_mark_to_remove(codepoint):
      """Return true if this is a combining mark to remove."""
      if not is_mark(codepoint):
@@ -79,17 +82,20 @@ def is_mark_to_remove(codepoint):
              return True
      return False
  
+
  def is_plain_letter(codepoint):
      """Return true if codepoint represents a "plain letter"."""
      for begin, end in PLAIN_LETTER_RANGES:
-      if codepoint.id >= begin and codepoint.id <= end:
-        return True
+        if codepoint.id >= begin and codepoint.id <= end:
+            return True
      return False
  
+
  def is_mark(codepoint):
      """Returns true for diacritical marks (combining codepoints)."""
      return codepoint.general_category in ("Mn", "Me", "Mc")
  
+
  def is_letter_with_marks(codepoint, table):
      """Returns true for letters combined with one or more marks."""
      # See https://www.unicode.org/reports/tr44/tr44-14.html#General_Category_Values
@@ -105,16 +111,18 @@ def is_letter_with_marks(codepoint, table):
  
      # Check if the base letter of this letter has marks.
      codepoint_base = codepoint.combining_ids[0]
-    if (is_plain_letter(table[codepoint_base]) is False and \
-        is_letter_with_marks(table[codepoint_base], table) is False):
+    if is_plain_letter(table[codepoint_base]) is False and \
+       is_letter_with_marks(table[codepoint_base], table) is False:
          return False
  
      return True
  
+
  def is_letter(codepoint, table):
      """Return true for letter with or without diacritical marks."""
      return is_plain_letter(codepoint) or is_letter_with_marks(codepoint, table)
  
+
  def get_plain_letter(codepoint, table):
      """Return the base codepoint without marks. If this codepoint has more
      than one combining character, do a recursive lookup on the table to
@@ -133,15 +141,18 @@ def get_plain_letter(codepoint, table):
      # Should not come here
      assert(False)
  
+
  def is_ligature(codepoint, table):
      """Return true for letters combined with letters."""
      return all(is_letter(table[i], table) for i in codepoint.combining_ids)
  
+
  def get_plain_letters(codepoint, table):
      """Return a list of plain letters from a ligature."""
      assert(is_ligature(codepoint, table))
      return [get_plain_letter(table[id], table) for id in codepoint.combining_ids]
  
+
  def parse_cldr_latin_ascii_transliterator(latinAsciiFilePath):
      """Parse the XML file and return a set of tuples (src, trg), where "src"
      is the original character and "trg" the substitute."""
@@ -189,21 +200,23 @@ def parse_cldr_latin_ascii_transliterator(latinAsciiFilePath):
  
      return charactersSet
  
+
  def special_cases():
      """Returns the special cases which are not handled by other methods"""
      charactersSet = set()
  
      # Cyrillic
-    charactersSet.add((0x0401, "\u0415")) # CYRILLIC CAPITAL LETTER IO
-    charactersSet.add((0x0451, "\u0435")) # CYRILLIC SMALL LETTER IO
+    charactersSet.add((0x0401, "\u0415"))  # CYRILLIC CAPITAL LETTER IO
+    charactersSet.add((0x0451, "\u0435"))  # CYRILLIC SMALL LETTER IO
  
      # Symbols of "Letterlike Symbols" Unicode Block (U+2100 to U+214F)
-    charactersSet.add((0x2103, "\xb0C")) # DEGREE CELSIUS
-    charactersSet.add((0x2109, "\xb0F")) # DEGREE FAHRENHEIT
-    charactersSet.add((0x2117, "(P)")) # SOUND RECORDING COPYRIGHT
+    charactersSet.add((0x2103, "\xb0C"))   # DEGREE CELSIUS
+    charactersSet.add((0x2109, "\xb0F"))   # DEGREE FAHRENHEIT
+    charactersSet.add((0x2117, "(P)"))     # SOUND RECORDING COPYRIGHT
  
      return charactersSet
  
+
  def main(args):
      # https://www.unicode.org/reports/tr44/tr44-14.html#Character_Decomposition_Mappings
      decomposition_type_pattern = re.compile(" *<[^>]*> *")
@@ -238,12 +251,12 @@ def main(args):
             len(codepoint.combining_ids) > 1:
              if is_letter_with_marks(codepoint, table):
                  charactersSet.add((codepoint.id,
-                             chr(get_plain_letter(codepoint, table).id)))
+                                   chr(get_plain_letter(codepoint, table).id)))
              elif args.noLigaturesExpansion is False and is_ligature(codepoint, table):
                  charactersSet.add((codepoint.id,
-                             "".join(chr(combining_codepoint.id)
-                                     for combining_codepoint \
-                                     in get_plain_letters(codepoint, table))))
+                                   "".join(chr(combining_codepoint.id)
+                                           for combining_codepoint
+                                           in get_plain_letters(codepoint, table))))
          elif is_mark_to_remove(codepoint):
              charactersSet.add((codepoint.id, None))
  
@@ -258,6 +271,7 @@ def main(args):
      for characterPair in charactersList:
          print_record(characterPair[0], characterPair[1])
  
+
  if __name__ == "__main__":
      parser = argparse.ArgumentParser(description='This script builds unaccent.rules on standard output when given the contents of UnicodeData.txt and Latin-ASCII.xml given as arguments.')
      parser.add_argument("--unicode-data-file", help="Path to formatted text file corresponding to UnicodeData.txt.", type=str, required=True, dest='unicodeDataFilePath')
diff --git a/src/test/locale/sort-test.py b/src/test/locale/sort-test.py

index 53019038ab1323a6843f92a0017b49e61231f56e..21d6e78eb54d83a571323636d9191580f154ef4d 100755 (executable)
--- a/src/test/locale/sort-test.py
+++ b/src/test/locale/sort-test.py
@@ -1,18 +1,20 @@
  #! /usr/bin/env python
  
-import sys, string, locale
+import locale
+import sys
+
  locale.setlocale(locale.LC_ALL, "")
  
  if len(sys.argv) != 2:
-   sys.stderr.write("Usage: sort.py filename\n")
-   sys.exit(1)
+    sys.stderr.write("Usage: sort.py filename\n")
+    sys.exit(1)
  
  infile = open(sys.argv[1], 'r')
  list = infile.readlines()
  infile.close()
  
  for i in range(0, len(list)):
-   list[i] = list[i][:-1] # chop!
+    list[i] = list[i][:-1]  # chop!
  
  list.sort(key=locale.strxfrm)
  print('\n'.join(list))
author	Peter Eisentraut <peter@eisentraut.org>
	Wed, 9 Mar 2022 09:51:41 +0000 (10:51 +0100)
committer	Peter Eisentraut <peter@eisentraut.org>
	Wed, 9 Mar 2022 09:54:20 +0000 (10:54 +0100)
contrib/unaccent/generate_unaccent_rules.py		patch \| blob \| blame \| history
src/test/locale/sort-test.py		patch \| blob \| blame \| history