Module loda.oeis.sequence
Integer sequence model.
Expand source code
"""Integer sequence model."""
import functools
import os.path
import re
import requests
import subprocess
@functools.total_ordering
class Sequence:
def __init__(self, id: int, name="", terms=[]):
self.id = id
self.name = name
self.terms = terms
def __str__(self) -> str:
return "{}: {}".format(self.id_str(), self.name)
def __eq__(self, other) -> bool:
return self.id == other.id and self.terms == other.terms
def __lt__(self, other) -> bool:
if self.terms < other.terms:
return True
if self.terms == other.terms:
return self.id < other.id
return False
def id_str(self) -> str:
return "A{:06}".format(self.id)
@classmethod
def load_oeis(cls, oeis_path: str) -> list:
"""
Load sequences from `stripped` from `names` files.
"""
seqs = []
# load sequence terms
stripped = os.path.join(oeis_path, "stripped")
with open(stripped) as file:
pattern = re.compile("^A([0-9]+) ,([\\-0-9,]+),$")
for line in file:
match = cls.__parse_line(line, pattern)
if not match:
continue
id = int(match.group(1))
cls.__fill_seqs(seqs, id)
seqs[id].id = id
terms_str = match.group(2).split(",")
seqs[id].terms = [int(t) for t in terms_str]
# load sequence names
names = os.path.join(oeis_path, "names")
with open(names) as file:
pattern = re.compile("^A([0-9]+) (.+)$")
for line in file:
match = cls.__parse_line(line, pattern)
if not match:
continue
id = int(match.group(1))
cls.__fill_seqs(seqs, id)
name = match.group(2)
seqs[id].name = name
return seqs
@classmethod
def __parse_line(cls, line: str, pattern):
line = line.strip()
if len(line) == 0 or line.startswith("#"):
return None
match = pattern.match(line)
if not match:
raise ValueError("parse error: {}".format(line))
return match
@classmethod
def __fill_seqs(cls, seqs: list, id: int):
current_size = len(seqs)
for i in range(current_size, id+2):
seqs.append(Sequence(i, "", []))
def load_b_file(self, path: str) -> list:
"""
Load additional terms from a b-file.
Args:
path: Either path to a b-file (uncompressed `b*.txt` file) or a
folder that contains the b-files in sub-directories, e.g. `b/123/b123456.txt`.
"""
terms = []
txt = "b{:06}.txt".format(self.id)
if len(path) == 0 or os.path.isdir(path):
dir = "{:03}".format(self.id//1000)
path = os.path.join(path, "b", dir, txt)
if not os.path.isfile(path):
b_url = "http://api.loda-lang.org/miner/v1/oeis/{}.gz".format(txt)
print("Fetching {}".format(b_url))
req = requests.get(b_url)
gz_path = path + ".gz"
with open(gz_path, 'wb') as gz:
gz.write(req.content)
subprocess.run(["gunzip", gz_path])
with open(path) as b_file:
expected_index = -1
for line in b_file:
line = line.strip()
if len(line) == 0 or line[0] == "#":
continue
fields = line.split()
if len(fields) < 2:
raise ValueError("unexpected line: {}".format(line))
index = int(fields[0])
value = int(fields[1])
if expected_index == -1:
expected_index = index
if index != expected_index:
raise ValueError("unexpected index: {}".format(index))
terms.append(value)
expected_index += 1
terms = self.__align(terms)
if terms is None:
raise ValueError("unexpected terms in b-file")
if len(terms) < len(self.terms):
terms = self.terms
elif terms[0:len(self.terms)] != self.terms:
raise ValueError("unexpected terms in b-file")
return terms
def __align(self, terms: list, max_offset: int = 10) -> list:
"""Align terms from a b-file possible by shifting by an offset"""
# check if they agree on prefix already
min_length = min(len(self.terms), len(terms))
if self.terms[0:min_length] == terms[0:min_length]:
return terms
# try to align them
for offset in range(1, max_offset+1):
if offset >= min_length:
break
agree_pos = True
agree_neg = True
for i in range(min_length):
if i+offset < len(terms) and terms[i + offset] != self.terms[i]:
agree_pos = False
if i+offset < len(self.terms) and terms[i] != self.terms[i+offset]:
agree_neg = False
if agree_pos:
return terms[offset:]
if agree_neg:
result = self.terms[0:offset]
result.extend(terms)
return result
return None
@classmethod
def load_id_list(cls, path: str) -> list:
"""Load sequence IDs from a text file"""
ids = []
with open(path) as list_file:
pattern = re.compile("^A([0-9]+)(:?.*)$")
for line in list_file:
match = cls.__parse_line(line, pattern)
if not match:
continue
ids.append(int(match.group(1)))
return ids
Classes
class Sequence (id: int, name='', terms=[])
-
Expand source code
@functools.total_ordering class Sequence: def __init__(self, id: int, name="", terms=[]): self.id = id self.name = name self.terms = terms def __str__(self) -> str: return "{}: {}".format(self.id_str(), self.name) def __eq__(self, other) -> bool: return self.id == other.id and self.terms == other.terms def __lt__(self, other) -> bool: if self.terms < other.terms: return True if self.terms == other.terms: return self.id < other.id return False def id_str(self) -> str: return "A{:06}".format(self.id) @classmethod def load_oeis(cls, oeis_path: str) -> list: """ Load sequences from `stripped` from `names` files. """ seqs = [] # load sequence terms stripped = os.path.join(oeis_path, "stripped") with open(stripped) as file: pattern = re.compile("^A([0-9]+) ,([\\-0-9,]+),$") for line in file: match = cls.__parse_line(line, pattern) if not match: continue id = int(match.group(1)) cls.__fill_seqs(seqs, id) seqs[id].id = id terms_str = match.group(2).split(",") seqs[id].terms = [int(t) for t in terms_str] # load sequence names names = os.path.join(oeis_path, "names") with open(names) as file: pattern = re.compile("^A([0-9]+) (.+)$") for line in file: match = cls.__parse_line(line, pattern) if not match: continue id = int(match.group(1)) cls.__fill_seqs(seqs, id) name = match.group(2) seqs[id].name = name return seqs @classmethod def __parse_line(cls, line: str, pattern): line = line.strip() if len(line) == 0 or line.startswith("#"): return None match = pattern.match(line) if not match: raise ValueError("parse error: {}".format(line)) return match @classmethod def __fill_seqs(cls, seqs: list, id: int): current_size = len(seqs) for i in range(current_size, id+2): seqs.append(Sequence(i, "", [])) def load_b_file(self, path: str) -> list: """ Load additional terms from a b-file. Args: path: Either path to a b-file (uncompressed `b*.txt` file) or a folder that contains the b-files in sub-directories, e.g. `b/123/b123456.txt`. """ terms = [] txt = "b{:06}.txt".format(self.id) if len(path) == 0 or os.path.isdir(path): dir = "{:03}".format(self.id//1000) path = os.path.join(path, "b", dir, txt) if not os.path.isfile(path): b_url = "http://api.loda-lang.org/miner/v1/oeis/{}.gz".format(txt) print("Fetching {}".format(b_url)) req = requests.get(b_url) gz_path = path + ".gz" with open(gz_path, 'wb') as gz: gz.write(req.content) subprocess.run(["gunzip", gz_path]) with open(path) as b_file: expected_index = -1 for line in b_file: line = line.strip() if len(line) == 0 or line[0] == "#": continue fields = line.split() if len(fields) < 2: raise ValueError("unexpected line: {}".format(line)) index = int(fields[0]) value = int(fields[1]) if expected_index == -1: expected_index = index if index != expected_index: raise ValueError("unexpected index: {}".format(index)) terms.append(value) expected_index += 1 terms = self.__align(terms) if terms is None: raise ValueError("unexpected terms in b-file") if len(terms) < len(self.terms): terms = self.terms elif terms[0:len(self.terms)] != self.terms: raise ValueError("unexpected terms in b-file") return terms def __align(self, terms: list, max_offset: int = 10) -> list: """Align terms from a b-file possible by shifting by an offset""" # check if they agree on prefix already min_length = min(len(self.terms), len(terms)) if self.terms[0:min_length] == terms[0:min_length]: return terms # try to align them for offset in range(1, max_offset+1): if offset >= min_length: break agree_pos = True agree_neg = True for i in range(min_length): if i+offset < len(terms) and terms[i + offset] != self.terms[i]: agree_pos = False if i+offset < len(self.terms) and terms[i] != self.terms[i+offset]: agree_neg = False if agree_pos: return terms[offset:] if agree_neg: result = self.terms[0:offset] result.extend(terms) return result return None @classmethod def load_id_list(cls, path: str) -> list: """Load sequence IDs from a text file""" ids = [] with open(path) as list_file: pattern = re.compile("^A([0-9]+)(:?.*)$") for line in list_file: match = cls.__parse_line(line, pattern) if not match: continue ids.append(int(match.group(1))) return ids
Static methods
def load_id_list(path: str) ‑> list
-
Load sequence IDs from a text file
Expand source code
@classmethod def load_id_list(cls, path: str) -> list: """Load sequence IDs from a text file""" ids = [] with open(path) as list_file: pattern = re.compile("^A([0-9]+)(:?.*)$") for line in list_file: match = cls.__parse_line(line, pattern) if not match: continue ids.append(int(match.group(1))) return ids
def load_oeis(oeis_path: str) ‑> list
-
Load sequences from
stripped
fromnames
files.Expand source code
@classmethod def load_oeis(cls, oeis_path: str) -> list: """ Load sequences from `stripped` from `names` files. """ seqs = [] # load sequence terms stripped = os.path.join(oeis_path, "stripped") with open(stripped) as file: pattern = re.compile("^A([0-9]+) ,([\\-0-9,]+),$") for line in file: match = cls.__parse_line(line, pattern) if not match: continue id = int(match.group(1)) cls.__fill_seqs(seqs, id) seqs[id].id = id terms_str = match.group(2).split(",") seqs[id].terms = [int(t) for t in terms_str] # load sequence names names = os.path.join(oeis_path, "names") with open(names) as file: pattern = re.compile("^A([0-9]+) (.+)$") for line in file: match = cls.__parse_line(line, pattern) if not match: continue id = int(match.group(1)) cls.__fill_seqs(seqs, id) name = match.group(2) seqs[id].name = name return seqs
Methods
def id_str(self) ‑> str
-
Expand source code
def id_str(self) -> str: return "A{:06}".format(self.id)
def load_b_file(self, path: str) ‑> list
-
Load additional terms from a b-file.
Args
path
- Either path to a b-file (uncompressed
b*.txt
file) or a folder that contains the b-files in sub-directories, e.g.b/123/b123456.txt
.
Expand source code
def load_b_file(self, path: str) -> list: """ Load additional terms from a b-file. Args: path: Either path to a b-file (uncompressed `b*.txt` file) or a folder that contains the b-files in sub-directories, e.g. `b/123/b123456.txt`. """ terms = [] txt = "b{:06}.txt".format(self.id) if len(path) == 0 or os.path.isdir(path): dir = "{:03}".format(self.id//1000) path = os.path.join(path, "b", dir, txt) if not os.path.isfile(path): b_url = "http://api.loda-lang.org/miner/v1/oeis/{}.gz".format(txt) print("Fetching {}".format(b_url)) req = requests.get(b_url) gz_path = path + ".gz" with open(gz_path, 'wb') as gz: gz.write(req.content) subprocess.run(["gunzip", gz_path]) with open(path) as b_file: expected_index = -1 for line in b_file: line = line.strip() if len(line) == 0 or line[0] == "#": continue fields = line.split() if len(fields) < 2: raise ValueError("unexpected line: {}".format(line)) index = int(fields[0]) value = int(fields[1]) if expected_index == -1: expected_index = index if index != expected_index: raise ValueError("unexpected index: {}".format(index)) terms.append(value) expected_index += 1 terms = self.__align(terms) if terms is None: raise ValueError("unexpected terms in b-file") if len(terms) < len(self.terms): terms = self.terms elif terms[0:len(self.terms)] != self.terms: raise ValueError("unexpected terms in b-file") return terms