Source code for bioarch.age
#!/usr/bin/env python
import enum
from enum import Enum
import functools
import logging
from typing import Any, cast, Optional
import pandas as pd
from pandas.api.types import CategoricalDtype
logger = logging.getLogger(__name__)
[docs]@functools.total_ordering
@enum.unique
class AgeCategory(Enum):
UNKNOWN = 0 # noqa: E221,E222
YOUNG = 1 # noqa: E221,E222
YOUNG_ADULT = 2
MIDDLE = 3 # noqa: E221,E222
MIDDLE_OLD = 4 # noqa: E221
OLD = 5 # noqa: E221,E222
ADULT = 6 # noqa: E221,E222
[docs] @staticmethod
def parse(value: Any) -> Optional['AgeCategory']:
if value is None:
return None
if type(value) == AgeCategory: # pylint: disable=C0123
return cast(AgeCategory, value)
if not isinstance(value, str):
raise ValueError(f'Failed to parse {AgeCategory.__name__}: "{value}"')
value = value.upper()
for category in AgeCategory:
if value == category.name:
return category
if value == 'OA':
return AgeCategory.OLD
if value == 'MIDDLE/OLD':
return AgeCategory.MIDDLE_OLD
if value == 'YOUNG ADULT':
return AgeCategory.YOUNG_ADULT
raise ValueError(f'Failed to parse {AgeCategory.__name__}: "{value}"')
[docs] def as_quad(self):
if self == AgeCategory.UNKNOWN:
return AgeCategory.UNKNOWN
if self in (AgeCategory.YOUNG, AgeCategory.YOUNG_ADULT):
return AgeCategory.YOUNG
if self == AgeCategory.ADULT:
return AgeCategory.ADULT
if self in (AgeCategory.MIDDLE, AgeCategory.MIDDLE_OLD):
return AgeCategory.MIDDLE
if self == AgeCategory.OLD:
return AgeCategory.OLD
raise RuntimeError(f'Unknown AgeCategory.to_quad: {self}')
def __lt__(self, other):
if other is None:
return False
if isinstance(other, int):
other = AgeCategory(other)
if type(other) != type(self): # pylint: disable=C0123
logger.warning('Attempt to compare: %s with %s', self, other)
raise NotImplementedError
return (self.value < other.value) # pylint: disable=C0325,W0143
def __repr__(self):
return f'{self.__class__.__name__}: {self}'
def __str__(self):
return self.name
[docs] @staticmethod
def dtype():
return CategoricalDtype(categories=[s.name for s in AgeCategory], ordered=True)
[docs]class EstimatedAge(object):
"""docstring for EstimatedAge"""
MAX_AGE = 100
def __init__(self, category: str, ranged: Optional[str]):
self.category = AgeCategory.parse(category) # pylint: disable=W0212
self.ranged = EstimatedAge._parse_range(ranged)
@staticmethod
def _parse_range(range_input: Any):
if range_input is None or range_input in ('None', '?', 'UNKNOWN'):
return None
if isinstance(range_input, range):
return range_input
if not isinstance(range_input, str):
raise ValueError(f'Unknown age range format: "{range_input}" type({type(range_input)})')
range_str = range_input
if '-' in range_str:
parts = range_str.split('-')
if len(parts) != 2:
raise ValueError
return range(int(parts[0]), int(parts[1]))
if range_str.endswith('+'):
start = range_str[:-1]
return range(int(start), EstimatedAge.MAX_AGE)
if range_str.startswith('='):
age = int(range_str[1:])
return range(age, age + 1)
raise ValueError(f'Unknown age range format: "{range_str}"')
[docs] @staticmethod
def empty():
return EstimatedAge('UNKNOWN', 'UNKNOWN')
[docs] def to_pd_data_frame(self, index):
d = {
'id': pd.Series([index]),
'category_cat': pd.Series([self.category.name], copy=True, dtype=AgeCategory.dtype()),
'category_val': pd.Series([self.category.value], copy=True),
'category_quad_cat': pd.Series([self.category.as_quad().name], copy=True, dtype=AgeCategory.dtype()),
'category_quad_val': pd.Series([self.category.as_quad().value], copy=True),
}
if self.ranged:
d['ranged'] = pd.Series([pd.RangeIndex.from_range(self.ranged)], copy=True)
return pd.DataFrame.from_dict(d).set_index('id')
if __name__ == "__main__":
raise RuntimeError('No main available')