123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793279427952796279727982799280028012802280328042805280628072808280928102811281228132814281528162817281828192820282128222823282428252826282728282829283028312832283328342835283628372838283928402841284228432844284528462847284828492850285128522853285428552856285728582859286028612862286328642865286628672868286928702871287228732874287528762877287828792880288128822883288428852886288728882889289028912892289328942895289628972898289929002901290229032904290529062907290829092910291129122913291429152916291729182919292029212922292329242925292629272928292929302931293229332934293529362937293829392940294129422943294429452946294729482949295029512952295329542955295629572958295929602961296229632964296529662967296829692970297129722973297429752976297729782979298029812982298329842985298629872988298929902991299229932994299529962997299829993000300130023003300430053006300730083009301030113012301330143015301630173018301930203021302230233024302530263027302830293030303130323033303430353036303730383039304030413042304330443045304630473048304930503051305230533054305530563057305830593060306130623063306430653066306730683069307030713072307330743075307630773078307930803081308230833084308530863087308830893090309130923093309430953096309730983099310031013102310331043105310631073108310931103111311231133114311531163117311831193120312131223123312431253126312731283129313031313132313331343135313631373138313931403141314231433144314531463147314831493150315131523153315431553156315731583159316031613162316331643165316631673168316931703171317231733174317531763177317831793180318131823183318431853186318731883189319031913192319331943195319631973198319932003201320232033204320532063207320832093210321132123213321432153216321732183219322032213222322332243225322632273228322932303231323232333234323532363237323832393240324132423243324432453246324732483249325032513252325332543255325632573258325932603261326232633264326532663267326832693270327132723273327432753276327732783279328032813282328332843285328632873288328932903291329232933294329532963297329832993300330133023303330433053306330733083309331033113312331333143315331633173318331933203321332233233324332533263327332833293330333133323333333433353336333733383339334033413342334333443345334633473348334933503351335233533354335533563357335833593360336133623363336433653366336733683369337033713372337333743375337633773378337933803381338233833384338533863387338833893390339133923393339433953396339733983399340034013402340334043405340634073408340934103411341234133414341534163417341834193420342134223423342434253426342734283429343034313432343334343435343634373438343934403441344234433444344534463447344834493450345134523453345434553456345734583459346034613462346334643465346634673468346934703471347234733474347534763477347834793480348134823483348434853486348734883489349034913492349334943495349634973498349935003501350235033504350535063507350835093510351135123513351435153516351735183519352035213522352335243525352635273528352935303531353235333534353535363537353835393540354135423543354435453546354735483549355035513552355335543555355635573558355935603561356235633564356535663567356835693570357135723573357435753576357735783579358035813582358335843585358635873588358935903591359235933594359535963597359835993600360136023603360436053606360736083609361036113612361336143615361636173618361936203621362236233624362536263627362836293630363136323633363436353636 |
- # Copyright (c) OpenMMLab. All rights reserved.
- import copy
- import inspect
- import math
- from typing import List, Optional, Sequence, Tuple, Union
- import cv2
- import mmcv
- import numpy as np
- from mmcv.image.geometric import _scale_size
- from mmcv.transforms import BaseTransform
- from mmcv.transforms import Pad as MMCV_Pad
- from mmcv.transforms import RandomFlip as MMCV_RandomFlip
- from mmcv.transforms import Resize as MMCV_Resize
- from mmcv.transforms.utils import avoid_cache_randomness, cache_randomness
- from mmengine.dataset import BaseDataset
- from mmengine.utils import is_str
- from numpy import random
- from mmdet.registry import TRANSFORMS
- from mmdet.structures.bbox import HorizontalBoxes, autocast_box_type
- from mmdet.structures.mask import BitmapMasks, PolygonMasks
- from mmdet.utils import log_img_scale
- try:
- from imagecorruptions import corrupt
- except ImportError:
- corrupt = None
- try:
- import albumentations
- from albumentations import Compose
- except ImportError:
- albumentations = None
- Compose = None
- Number = Union[int, float]
- @TRANSFORMS.register_module()
- class Resize(MMCV_Resize):
- """Resize images & bbox & seg.
- This transform resizes the input image according to ``scale`` or
- ``scale_factor``. Bboxes, masks, and seg map are then resized
- with the same scale factor.
- if ``scale`` and ``scale_factor`` are both set, it will use ``scale`` to
- resize.
- Required Keys:
- - img
- - gt_bboxes (BaseBoxes[torch.float32]) (optional)
- - gt_masks (BitmapMasks | PolygonMasks) (optional)
- - gt_seg_map (np.uint8) (optional)
- Modified Keys:
- - img
- - img_shape
- - gt_bboxes
- - gt_masks
- - gt_seg_map
- Added Keys:
- - scale
- - scale_factor
- - keep_ratio
- - homography_matrix
- Args:
- scale (int or tuple): Images scales for resizing. Defaults to None
- scale_factor (float or tuple[float]): Scale factors for resizing.
- Defaults to None.
- keep_ratio (bool): Whether to keep the aspect ratio when resizing the
- image. Defaults to False.
- clip_object_border (bool): Whether to clip the objects
- outside the border of the image. In some dataset like MOT17, the gt
- bboxes are allowed to cross the border of images. Therefore, we
- don't need to clip the gt bboxes in these cases. Defaults to True.
- backend (str): Image resize backend, choices are 'cv2' and 'pillow'.
- These two backends generates slightly different results. Defaults
- to 'cv2'.
- interpolation (str): Interpolation method, accepted values are
- "nearest", "bilinear", "bicubic", "area", "lanczos" for 'cv2'
- backend, "nearest", "bilinear" for 'pillow' backend. Defaults
- to 'bilinear'.
- """
- def _resize_masks(self, results: dict) -> None:
- """Resize masks with ``results['scale']``"""
- if results.get('gt_masks', None) is not None:
- if self.keep_ratio:
- results['gt_masks'] = results['gt_masks'].rescale(
- results['scale'])
- else:
- results['gt_masks'] = results['gt_masks'].resize(
- results['img_shape'])
- def _resize_bboxes(self, results: dict) -> None:
- """Resize bounding boxes with ``results['scale_factor']``."""
- if results.get('gt_bboxes', None) is not None:
- results['gt_bboxes'].rescale_(results['scale_factor'])
- if self.clip_object_border:
- results['gt_bboxes'].clip_(results['img_shape'])
- def _resize_seg(self, results: dict) -> None:
- """Resize semantic segmentation map with ``results['scale']``."""
- if results.get('gt_seg_map', None) is not None:
- if self.keep_ratio:
- gt_seg = mmcv.imrescale(
- results['gt_seg_map'],
- results['scale'],
- interpolation='nearest',
- backend=self.backend)
- else:
- gt_seg = mmcv.imresize(
- results['gt_seg_map'],
- results['scale'],
- interpolation='nearest',
- backend=self.backend)
- results['gt_seg_map'] = gt_seg
- def _record_homography_matrix(self, results: dict) -> None:
- """Record the homography matrix for the Resize."""
- w_scale, h_scale = results['scale_factor']
- homography_matrix = np.array(
- [[w_scale, 0, 0], [0, h_scale, 0], [0, 0, 1]], dtype=np.float32)
- if results.get('homography_matrix', None) is None:
- results['homography_matrix'] = homography_matrix
- else:
- results['homography_matrix'] = homography_matrix @ results[
- 'homography_matrix']
- @autocast_box_type()
- def transform(self, results: dict) -> dict:
- """Transform function to resize images, bounding boxes and semantic
- segmentation map.
- Args:
- results (dict): Result dict from loading pipeline.
- Returns:
- dict: Resized results, 'img', 'gt_bboxes', 'gt_seg_map',
- 'scale', 'scale_factor', 'height', 'width', and 'keep_ratio' keys
- are updated in result dict.
- """
- if self.scale:
- results['scale'] = self.scale
- else:
- img_shape = results['img'].shape[:2]
- results['scale'] = _scale_size(img_shape[::-1], self.scale_factor)
- self._resize_img(results)
- self._resize_bboxes(results)
- self._resize_masks(results)
- self._resize_seg(results)
- self._record_homography_matrix(results)
- return results
- def __repr__(self) -> str:
- repr_str = self.__class__.__name__
- repr_str += f'(scale={self.scale}, '
- repr_str += f'scale_factor={self.scale_factor}, '
- repr_str += f'keep_ratio={self.keep_ratio}, '
- repr_str += f'clip_object_border={self.clip_object_border}), '
- repr_str += f'backend={self.backend}), '
- repr_str += f'interpolation={self.interpolation})'
- return repr_str
- @TRANSFORMS.register_module()
- class FixShapeResize(Resize):
- """Resize images & bbox & seg to the specified size.
- This transform resizes the input image according to ``width`` and
- ``height``. Bboxes, masks, and seg map are then resized
- with the same parameters.
- Required Keys:
- - img
- - gt_bboxes (BaseBoxes[torch.float32]) (optional)
- - gt_masks (BitmapMasks | PolygonMasks) (optional)
- - gt_seg_map (np.uint8) (optional)
- Modified Keys:
- - img
- - img_shape
- - gt_bboxes
- - gt_masks
- - gt_seg_map
- Added Keys:
- - scale
- - scale_factor
- - keep_ratio
- - homography_matrix
- Args:
- width (int): width for resizing.
- height (int): height for resizing.
- Defaults to None.
- pad_val (Number | dict[str, Number], optional): Padding value for if
- the pad_mode is "constant". If it is a single number, the value
- to pad the image is the number and to pad the semantic
- segmentation map is 255. If it is a dict, it should have the
- following keys:
- - img: The value to pad the image.
- - seg: The value to pad the semantic segmentation map.
- Defaults to dict(img=0, seg=255).
- keep_ratio (bool): Whether to keep the aspect ratio when resizing the
- image. Defaults to False.
- clip_object_border (bool): Whether to clip the objects
- outside the border of the image. In some dataset like MOT17, the gt
- bboxes are allowed to cross the border of images. Therefore, we
- don't need to clip the gt bboxes in these cases. Defaults to True.
- backend (str): Image resize backend, choices are 'cv2' and 'pillow'.
- These two backends generates slightly different results. Defaults
- to 'cv2'.
- interpolation (str): Interpolation method, accepted values are
- "nearest", "bilinear", "bicubic", "area", "lanczos" for 'cv2'
- backend, "nearest", "bilinear" for 'pillow' backend. Defaults
- to 'bilinear'.
- """
- def __init__(self,
- width: int,
- height: int,
- pad_val: Union[Number, dict] = dict(img=0, seg=255),
- keep_ratio: bool = False,
- clip_object_border: bool = True,
- backend: str = 'cv2',
- interpolation: str = 'bilinear') -> None:
- assert width is not None and height is not None, (
- '`width` and'
- '`height` can not be `None`')
- self.width = width
- self.height = height
- self.scale = (width, height)
- self.backend = backend
- self.interpolation = interpolation
- self.keep_ratio = keep_ratio
- self.clip_object_border = clip_object_border
- if keep_ratio is True:
- # padding to the fixed size when keep_ratio=True
- self.pad_transform = Pad(size=self.scale, pad_val=pad_val)
- @autocast_box_type()
- def transform(self, results: dict) -> dict:
- """Transform function to resize images, bounding boxes and semantic
- segmentation map.
- Args:
- results (dict): Result dict from loading pipeline.
- Returns:
- dict: Resized results, 'img', 'gt_bboxes', 'gt_seg_map',
- 'scale', 'scale_factor', 'height', 'width', and 'keep_ratio' keys
- are updated in result dict.
- """
- img = results['img']
- h, w = img.shape[:2]
- if self.keep_ratio:
- scale_factor = min(self.width / w, self.height / h)
- results['scale_factor'] = (scale_factor, scale_factor)
- real_w, real_h = int(w * float(scale_factor) +
- 0.5), int(h * float(scale_factor) + 0.5)
- img, scale_factor = mmcv.imrescale(
- results['img'], (real_w, real_h),
- interpolation=self.interpolation,
- return_scale=True,
- backend=self.backend)
- # the w_scale and h_scale has minor difference
- # a real fix should be done in the mmcv.imrescale in the future
- results['img'] = img
- results['img_shape'] = img.shape[:2]
- results['keep_ratio'] = self.keep_ratio
- results['scale'] = (real_w, real_h)
- else:
- results['scale'] = (self.width, self.height)
- results['scale_factor'] = (self.width / w, self.height / h)
- super()._resize_img(results)
- self._resize_bboxes(results)
- self._resize_masks(results)
- self._resize_seg(results)
- self._record_homography_matrix(results)
- if self.keep_ratio:
- self.pad_transform(results)
- return results
- def __repr__(self) -> str:
- repr_str = self.__class__.__name__
- repr_str += f'(width={self.width}, height={self.height}, '
- repr_str += f'keep_ratio={self.keep_ratio}, '
- repr_str += f'clip_object_border={self.clip_object_border}), '
- repr_str += f'backend={self.backend}), '
- repr_str += f'interpolation={self.interpolation})'
- return repr_str
- @TRANSFORMS.register_module()
- class RandomFlip(MMCV_RandomFlip):
- """Flip the image & bbox & mask & segmentation map. Added or Updated keys:
- flip, flip_direction, img, gt_bboxes, and gt_seg_map. There are 3 flip
- modes:
- - ``prob`` is float, ``direction`` is string: the image will be
- ``direction``ly flipped with probability of ``prob`` .
- E.g., ``prob=0.5``, ``direction='horizontal'``,
- then image will be horizontally flipped with probability of 0.5.
- - ``prob`` is float, ``direction`` is list of string: the image will
- be ``direction[i]``ly flipped with probability of
- ``prob/len(direction)``.
- E.g., ``prob=0.5``, ``direction=['horizontal', 'vertical']``,
- then image will be horizontally flipped with probability of 0.25,
- vertically with probability of 0.25.
- - ``prob`` is list of float, ``direction`` is list of string:
- given ``len(prob) == len(direction)``, the image will
- be ``direction[i]``ly flipped with probability of ``prob[i]``.
- E.g., ``prob=[0.3, 0.5]``, ``direction=['horizontal',
- 'vertical']``, then image will be horizontally flipped with
- probability of 0.3, vertically with probability of 0.5.
- Required Keys:
- - img
- - gt_bboxes (BaseBoxes[torch.float32]) (optional)
- - gt_masks (BitmapMasks | PolygonMasks) (optional)
- - gt_seg_map (np.uint8) (optional)
- Modified Keys:
- - img
- - gt_bboxes
- - gt_masks
- - gt_seg_map
- Added Keys:
- - flip
- - flip_direction
- - homography_matrix
- Args:
- prob (float | list[float], optional): The flipping probability.
- Defaults to None.
- direction(str | list[str]): The flipping direction. Options
- If input is a list, the length must equal ``prob``. Each
- element in ``prob`` indicates the flip probability of
- corresponding direction. Defaults to 'horizontal'.
- """
- def _record_homography_matrix(self, results: dict) -> None:
- """Record the homography matrix for the RandomFlip."""
- cur_dir = results['flip_direction']
- h, w = results['img'].shape[:2]
- if cur_dir == 'horizontal':
- homography_matrix = np.array([[-1, 0, w], [0, 1, 0], [0, 0, 1]],
- dtype=np.float32)
- elif cur_dir == 'vertical':
- homography_matrix = np.array([[1, 0, 0], [0, -1, h], [0, 0, 1]],
- dtype=np.float32)
- elif cur_dir == 'diagonal':
- homography_matrix = np.array([[-1, 0, w], [0, -1, h], [0, 0, 1]],
- dtype=np.float32)
- else:
- homography_matrix = np.eye(3, dtype=np.float32)
- if results.get('homography_matrix', None) is None:
- results['homography_matrix'] = homography_matrix
- else:
- results['homography_matrix'] = homography_matrix @ results[
- 'homography_matrix']
- @autocast_box_type()
- def _flip(self, results: dict) -> None:
- """Flip images, bounding boxes, and semantic segmentation map."""
- # flip image
- results['img'] = mmcv.imflip(
- results['img'], direction=results['flip_direction'])
- img_shape = results['img'].shape[:2]
- # flip bboxes
- if results.get('gt_bboxes', None) is not None:
- results['gt_bboxes'].flip_(img_shape, results['flip_direction'])
- # flip masks
- if results.get('gt_masks', None) is not None:
- results['gt_masks'] = results['gt_masks'].flip(
- results['flip_direction'])
- # flip segs
- if results.get('gt_seg_map', None) is not None:
- results['gt_seg_map'] = mmcv.imflip(
- results['gt_seg_map'], direction=results['flip_direction'])
- # record homography matrix for flip
- self._record_homography_matrix(results)
- @TRANSFORMS.register_module()
- class RandomShift(BaseTransform):
- """Shift the image and box given shift pixels and probability.
- Required Keys:
- - img
- - gt_bboxes (BaseBoxes[torch.float32])
- - gt_bboxes_labels (np.int64)
- - gt_ignore_flags (bool) (optional)
- Modified Keys:
- - img
- - gt_bboxes
- - gt_bboxes_labels
- - gt_ignore_flags (bool) (optional)
- Args:
- prob (float): Probability of shifts. Defaults to 0.5.
- max_shift_px (int): The max pixels for shifting. Defaults to 32.
- filter_thr_px (int): The width and height threshold for filtering.
- The bbox and the rest of the targets below the width and
- height threshold will be filtered. Defaults to 1.
- """
- def __init__(self,
- prob: float = 0.5,
- max_shift_px: int = 32,
- filter_thr_px: int = 1) -> None:
- assert 0 <= prob <= 1
- assert max_shift_px >= 0
- self.prob = prob
- self.max_shift_px = max_shift_px
- self.filter_thr_px = int(filter_thr_px)
- @cache_randomness
- def _random_prob(self) -> float:
- return random.uniform(0, 1)
- @autocast_box_type()
- def transform(self, results: dict) -> dict:
- """Transform function to random shift images, bounding boxes.
- Args:
- results (dict): Result dict from loading pipeline.
- Returns:
- dict: Shift results.
- """
- if self._random_prob() < self.prob:
- img_shape = results['img'].shape[:2]
- random_shift_x = random.randint(-self.max_shift_px,
- self.max_shift_px)
- random_shift_y = random.randint(-self.max_shift_px,
- self.max_shift_px)
- new_x = max(0, random_shift_x)
- ori_x = max(0, -random_shift_x)
- new_y = max(0, random_shift_y)
- ori_y = max(0, -random_shift_y)
- # TODO: support mask and semantic segmentation maps.
- bboxes = results['gt_bboxes'].clone()
- bboxes.translate_([random_shift_x, random_shift_y])
- # clip border
- bboxes.clip_(img_shape)
- # remove invalid bboxes
- valid_inds = (bboxes.widths > self.filter_thr_px).numpy() & (
- bboxes.heights > self.filter_thr_px).numpy()
- # If the shift does not contain any gt-bbox area, skip this
- # image.
- if not valid_inds.any():
- return results
- bboxes = bboxes[valid_inds]
- results['gt_bboxes'] = bboxes
- results['gt_bboxes_labels'] = results['gt_bboxes_labels'][
- valid_inds]
- if results.get('gt_ignore_flags', None) is not None:
- results['gt_ignore_flags'] = \
- results['gt_ignore_flags'][valid_inds]
- # shift img
- img = results['img']
- new_img = np.zeros_like(img)
- img_h, img_w = img.shape[:2]
- new_h = img_h - np.abs(random_shift_y)
- new_w = img_w - np.abs(random_shift_x)
- new_img[new_y:new_y + new_h, new_x:new_x + new_w] \
- = img[ori_y:ori_y + new_h, ori_x:ori_x + new_w]
- results['img'] = new_img
- return results
- def __repr__(self):
- repr_str = self.__class__.__name__
- repr_str += f'(prob={self.prob}, '
- repr_str += f'max_shift_px={self.max_shift_px}, '
- repr_str += f'filter_thr_px={self.filter_thr_px})'
- return repr_str
- @TRANSFORMS.register_module()
- class Pad(MMCV_Pad):
- """Pad the image & segmentation map.
- There are three padding modes: (1) pad to a fixed size and (2) pad to the
- minimum size that is divisible by some number. and (3)pad to square. Also,
- pad to square and pad to the minimum size can be used as the same time.
- Required Keys:
- - img
- - gt_bboxes (BaseBoxes[torch.float32]) (optional)
- - gt_masks (BitmapMasks | PolygonMasks) (optional)
- - gt_seg_map (np.uint8) (optional)
- Modified Keys:
- - img
- - img_shape
- - gt_masks
- - gt_seg_map
- Added Keys:
- - pad_shape
- - pad_fixed_size
- - pad_size_divisor
- Args:
- size (tuple, optional): Fixed padding size.
- Expected padding shape (width, height). Defaults to None.
- size_divisor (int, optional): The divisor of padded size. Defaults to
- None.
- pad_to_square (bool): Whether to pad the image into a square.
- Currently only used for YOLOX. Defaults to False.
- pad_val (Number | dict[str, Number], optional) - Padding value for if
- the pad_mode is "constant". If it is a single number, the value
- to pad the image is the number and to pad the semantic
- segmentation map is 255. If it is a dict, it should have the
- following keys:
- - img: The value to pad the image.
- - seg: The value to pad the semantic segmentation map.
- Defaults to dict(img=0, seg=255).
- padding_mode (str): Type of padding. Should be: constant, edge,
- reflect or symmetric. Defaults to 'constant'.
- - constant: pads with a constant value, this value is specified
- with pad_val.
- - edge: pads with the last value at the edge of the image.
- - reflect: pads with reflection of image without repeating the last
- value on the edge. For example, padding [1, 2, 3, 4] with 2
- elements on both sides in reflect mode will result in
- [3, 2, 1, 2, 3, 4, 3, 2].
- - symmetric: pads with reflection of image repeating the last value
- on the edge. For example, padding [1, 2, 3, 4] with 2 elements on
- both sides in symmetric mode will result in
- [2, 1, 1, 2, 3, 4, 4, 3]
- """
- def _pad_masks(self, results: dict) -> None:
- """Pad masks according to ``results['pad_shape']``."""
- if results.get('gt_masks', None) is not None:
- pad_val = self.pad_val.get('masks', 0)
- pad_shape = results['pad_shape'][:2]
- results['gt_masks'] = results['gt_masks'].pad(
- pad_shape, pad_val=pad_val)
- def transform(self, results: dict) -> dict:
- """Call function to pad images, masks, semantic segmentation maps.
- Args:
- results (dict): Result dict from loading pipeline.
- Returns:
- dict: Updated result dict.
- """
- self._pad_img(results)
- self._pad_seg(results)
- self._pad_masks(results)
- return results
- @TRANSFORMS.register_module()
- class RandomCrop(BaseTransform):
- """Random crop the image & bboxes & masks.
- The absolute ``crop_size`` is sampled based on ``crop_type`` and
- ``image_size``, then the cropped results are generated.
- Required Keys:
- - img
- - gt_bboxes (BaseBoxes[torch.float32]) (optional)
- - gt_bboxes_labels (np.int64) (optional)
- - gt_masks (BitmapMasks | PolygonMasks) (optional)
- - gt_ignore_flags (bool) (optional)
- - gt_seg_map (np.uint8) (optional)
- Modified Keys:
- - img
- - img_shape
- - gt_bboxes (optional)
- - gt_bboxes_labels (optional)
- - gt_masks (optional)
- - gt_ignore_flags (optional)
- - gt_seg_map (optional)
- Added Keys:
- - homography_matrix
- Args:
- crop_size (tuple): The relative ratio or absolute pixels of
- (width, height).
- crop_type (str, optional): One of "relative_range", "relative",
- "absolute", "absolute_range". "relative" randomly crops
- (h * crop_size[0], w * crop_size[1]) part from an input of size
- (h, w). "relative_range" uniformly samples relative crop size from
- range [crop_size[0], 1] and [crop_size[1], 1] for height and width
- respectively. "absolute" crops from an input with absolute size
- (crop_size[0], crop_size[1]). "absolute_range" uniformly samples
- crop_h in range [crop_size[0], min(h, crop_size[1])] and crop_w
- in range [crop_size[0], min(w, crop_size[1])].
- Defaults to "absolute".
- allow_negative_crop (bool, optional): Whether to allow a crop that does
- not contain any bbox area. Defaults to False.
- recompute_bbox (bool, optional): Whether to re-compute the boxes based
- on cropped instance masks. Defaults to False.
- bbox_clip_border (bool, optional): Whether clip the objects outside
- the border of the image. Defaults to True.
- Note:
- - If the image is smaller than the absolute crop size, return the
- original image.
- - The keys for bboxes, labels and masks must be aligned. That is,
- ``gt_bboxes`` corresponds to ``gt_labels`` and ``gt_masks``, and
- ``gt_bboxes_ignore`` corresponds to ``gt_labels_ignore`` and
- ``gt_masks_ignore``.
- - If the crop does not contain any gt-bbox region and
- ``allow_negative_crop`` is set to False, skip this image.
- """
- def __init__(self,
- crop_size: tuple,
- crop_type: str = 'absolute',
- allow_negative_crop: bool = False,
- recompute_bbox: bool = False,
- bbox_clip_border: bool = True) -> None:
- if crop_type not in [
- 'relative_range', 'relative', 'absolute', 'absolute_range'
- ]:
- raise ValueError(f'Invalid crop_type {crop_type}.')
- if crop_type in ['absolute', 'absolute_range']:
- assert crop_size[0] > 0 and crop_size[1] > 0
- assert isinstance(crop_size[0], int) and isinstance(
- crop_size[1], int)
- if crop_type == 'absolute_range':
- assert crop_size[0] <= crop_size[1]
- else:
- assert 0 < crop_size[0] <= 1 and 0 < crop_size[1] <= 1
- self.crop_size = crop_size
- self.crop_type = crop_type
- self.allow_negative_crop = allow_negative_crop
- self.bbox_clip_border = bbox_clip_border
- self.recompute_bbox = recompute_bbox
- def _crop_data(self, results: dict, crop_size: Tuple[int, int],
- allow_negative_crop: bool) -> Union[dict, None]:
- """Function to randomly crop images, bounding boxes, masks, semantic
- segmentation maps.
- Args:
- results (dict): Result dict from loading pipeline.
- crop_size (Tuple[int, int]): Expected absolute size after
- cropping, (h, w).
- allow_negative_crop (bool): Whether to allow a crop that does not
- contain any bbox area.
- Returns:
- results (Union[dict, None]): Randomly cropped results, 'img_shape'
- key in result dict is updated according to crop size. None will
- be returned when there is no valid bbox after cropping.
- """
- assert crop_size[0] > 0 and crop_size[1] > 0
- img = results['img']
- margin_h = max(img.shape[0] - crop_size[0], 0)
- margin_w = max(img.shape[1] - crop_size[1], 0)
- offset_h, offset_w = self._rand_offset((margin_h, margin_w))
- crop_y1, crop_y2 = offset_h, offset_h + crop_size[0]
- crop_x1, crop_x2 = offset_w, offset_w + crop_size[1]
- # Record the homography matrix for the RandomCrop
- homography_matrix = np.array(
- [[1, 0, -offset_w], [0, 1, -offset_h], [0, 0, 1]],
- dtype=np.float32)
- if results.get('homography_matrix', None) is None:
- results['homography_matrix'] = homography_matrix
- else:
- results['homography_matrix'] = homography_matrix @ results[
- 'homography_matrix']
- # crop the image
- img = img[crop_y1:crop_y2, crop_x1:crop_x2, ...]
- img_shape = img.shape
- results['img'] = img
- results['img_shape'] = img_shape[:2]
- # crop bboxes accordingly and clip to the image boundary
- if results.get('gt_bboxes', None) is not None:
- bboxes = results['gt_bboxes']
- bboxes.translate_([-offset_w, -offset_h])
- if self.bbox_clip_border:
- bboxes.clip_(img_shape[:2])
- valid_inds = bboxes.is_inside(img_shape[:2]).numpy()
- # If the crop does not contain any gt-bbox area and
- # allow_negative_crop is False, skip this image.
- if (not valid_inds.any() and not allow_negative_crop):
- return None
- results['gt_bboxes'] = bboxes[valid_inds]
- if results.get('gt_ignore_flags', None) is not None:
- results['gt_ignore_flags'] = \
- results['gt_ignore_flags'][valid_inds]
- if results.get('gt_bboxes_labels', None) is not None:
- results['gt_bboxes_labels'] = \
- results['gt_bboxes_labels'][valid_inds]
- if results.get('gt_masks', None) is not None:
- results['gt_masks'] = results['gt_masks'][
- valid_inds.nonzero()[0]].crop(
- np.asarray([crop_x1, crop_y1, crop_x2, crop_y2]))
- if self.recompute_bbox:
- results['gt_bboxes'] = results['gt_masks'].get_bboxes(
- type(results['gt_bboxes']))
- # crop semantic seg
- if results.get('gt_seg_map', None) is not None:
- results['gt_seg_map'] = results['gt_seg_map'][crop_y1:crop_y2,
- crop_x1:crop_x2]
- return results
- @cache_randomness
- def _rand_offset(self, margin: Tuple[int, int]) -> Tuple[int, int]:
- """Randomly generate crop offset.
- Args:
- margin (Tuple[int, int]): The upper bound for the offset generated
- randomly.
- Returns:
- Tuple[int, int]: The random offset for the crop.
- """
- margin_h, margin_w = margin
- offset_h = np.random.randint(0, margin_h + 1)
- offset_w = np.random.randint(0, margin_w + 1)
- return offset_h, offset_w
- @cache_randomness
- def _get_crop_size(self, image_size: Tuple[int, int]) -> Tuple[int, int]:
- """Randomly generates the absolute crop size based on `crop_type` and
- `image_size`.
- Args:
- image_size (Tuple[int, int]): (h, w).
- Returns:
- crop_size (Tuple[int, int]): (crop_h, crop_w) in absolute pixels.
- """
- h, w = image_size
- if self.crop_type == 'absolute':
- return min(self.crop_size[1], h), min(self.crop_size[0], w)
- elif self.crop_type == 'absolute_range':
- crop_h = np.random.randint(
- min(h, self.crop_size[0]),
- min(h, self.crop_size[1]) + 1)
- crop_w = np.random.randint(
- min(w, self.crop_size[0]),
- min(w, self.crop_size[1]) + 1)
- return crop_h, crop_w
- elif self.crop_type == 'relative':
- crop_w, crop_h = self.crop_size
- return int(h * crop_h + 0.5), int(w * crop_w + 0.5)
- else:
- # 'relative_range'
- crop_size = np.asarray(self.crop_size, dtype=np.float32)
- crop_h, crop_w = crop_size + np.random.rand(2) * (1 - crop_size)
- return int(h * crop_h + 0.5), int(w * crop_w + 0.5)
- @autocast_box_type()
- def transform(self, results: dict) -> Union[dict, None]:
- """Transform function to randomly crop images, bounding boxes, masks,
- semantic segmentation maps.
- Args:
- results (dict): Result dict from loading pipeline.
- Returns:
- results (Union[dict, None]): Randomly cropped results, 'img_shape'
- key in result dict is updated according to crop size. None will
- be returned when there is no valid bbox after cropping.
- """
- image_size = results['img'].shape[:2]
- crop_size = self._get_crop_size(image_size)
- results = self._crop_data(results, crop_size, self.allow_negative_crop)
- return results
- def __repr__(self) -> str:
- repr_str = self.__class__.__name__
- repr_str += f'(crop_size={self.crop_size}, '
- repr_str += f'crop_type={self.crop_type}, '
- repr_str += f'allow_negative_crop={self.allow_negative_crop}, '
- repr_str += f'recompute_bbox={self.recompute_bbox}, '
- repr_str += f'bbox_clip_border={self.bbox_clip_border})'
- return repr_str
- @TRANSFORMS.register_module()
- class SegRescale(BaseTransform):
- """Rescale semantic segmentation maps.
- This transform rescale the ``gt_seg_map`` according to ``scale_factor``.
- Required Keys:
- - gt_seg_map
- Modified Keys:
- - gt_seg_map
- Args:
- scale_factor (float): The scale factor of the final output. Defaults
- to 1.
- backend (str): Image rescale backend, choices are 'cv2' and 'pillow'.
- These two backends generates slightly different results. Defaults
- to 'cv2'.
- """
- def __init__(self, scale_factor: float = 1, backend: str = 'cv2') -> None:
- self.scale_factor = scale_factor
- self.backend = backend
- def transform(self, results: dict) -> dict:
- """Transform function to scale the semantic segmentation map.
- Args:
- results (dict): Result dict from loading pipeline.
- Returns:
- dict: Result dict with semantic segmentation map scaled.
- """
- if self.scale_factor != 1:
- results['gt_seg_map'] = mmcv.imrescale(
- results['gt_seg_map'],
- self.scale_factor,
- interpolation='nearest',
- backend=self.backend)
- return results
- def __repr__(self) -> str:
- repr_str = self.__class__.__name__
- repr_str += f'(scale_factor={self.scale_factor}, '
- repr_str += f'backend={self.backend})'
- return repr_str
- @TRANSFORMS.register_module()
- class PhotoMetricDistortion(BaseTransform):
- """Apply photometric distortion to image sequentially, every transformation
- is applied with a probability of 0.5. The position of random contrast is in
- second or second to last.
- 1. random brightness
- 2. random contrast (mode 0)
- 3. convert color from BGR to HSV
- 4. random saturation
- 5. random hue
- 6. convert color from HSV to BGR
- 7. random contrast (mode 1)
- 8. randomly swap channels
- Required Keys:
- - img (np.uint8)
- Modified Keys:
- - img (np.float32)
- Args:
- brightness_delta (int): delta of brightness.
- contrast_range (sequence): range of contrast.
- saturation_range (sequence): range of saturation.
- hue_delta (int): delta of hue.
- """
- def __init__(self,
- brightness_delta: int = 32,
- contrast_range: Sequence[Number] = (0.5, 1.5),
- saturation_range: Sequence[Number] = (0.5, 1.5),
- hue_delta: int = 18) -> None:
- self.brightness_delta = brightness_delta
- self.contrast_lower, self.contrast_upper = contrast_range
- self.saturation_lower, self.saturation_upper = saturation_range
- self.hue_delta = hue_delta
- @cache_randomness
- def _random_flags(self) -> Sequence[Number]:
- mode = random.randint(2)
- brightness_flag = random.randint(2)
- contrast_flag = random.randint(2)
- saturation_flag = random.randint(2)
- hue_flag = random.randint(2)
- swap_flag = random.randint(2)
- delta_value = random.uniform(-self.brightness_delta,
- self.brightness_delta)
- alpha_value = random.uniform(self.contrast_lower, self.contrast_upper)
- saturation_value = random.uniform(self.saturation_lower,
- self.saturation_upper)
- hue_value = random.uniform(-self.hue_delta, self.hue_delta)
- swap_value = random.permutation(3)
- return (mode, brightness_flag, contrast_flag, saturation_flag,
- hue_flag, swap_flag, delta_value, alpha_value,
- saturation_value, hue_value, swap_value)
- def transform(self, results: dict) -> dict:
- """Transform function to perform photometric distortion on images.
- Args:
- results (dict): Result dict from loading pipeline.
- Returns:
- dict: Result dict with images distorted.
- """
- assert 'img' in results, '`img` is not found in results'
- img = results['img']
- img = img.astype(np.float32)
- (mode, brightness_flag, contrast_flag, saturation_flag, hue_flag,
- swap_flag, delta_value, alpha_value, saturation_value, hue_value,
- swap_value) = self._random_flags()
- # random brightness
- if brightness_flag:
- img += delta_value
- # mode == 0 --> do random contrast first
- # mode == 1 --> do random contrast last
- if mode == 1:
- if contrast_flag:
- img *= alpha_value
- # convert color from BGR to HSV
- img = mmcv.bgr2hsv(img)
- # random saturation
- if saturation_flag:
- img[..., 1] *= saturation_value
- # For image(type=float32), after convert bgr to hsv by opencv,
- # valid saturation value range is [0, 1]
- if saturation_value > 1:
- img[..., 1] = img[..., 1].clip(0, 1)
- # random hue
- if hue_flag:
- img[..., 0] += hue_value
- img[..., 0][img[..., 0] > 360] -= 360
- img[..., 0][img[..., 0] < 0] += 360
- # convert color from HSV to BGR
- img = mmcv.hsv2bgr(img)
- # random contrast
- if mode == 0:
- if contrast_flag:
- img *= alpha_value
- # randomly swap channels
- if swap_flag:
- img = img[..., swap_value]
- results['img'] = img
- return results
- def __repr__(self) -> str:
- repr_str = self.__class__.__name__
- repr_str += f'(brightness_delta={self.brightness_delta}, '
- repr_str += 'contrast_range='
- repr_str += f'{(self.contrast_lower, self.contrast_upper)}, '
- repr_str += 'saturation_range='
- repr_str += f'{(self.saturation_lower, self.saturation_upper)}, '
- repr_str += f'hue_delta={self.hue_delta})'
- return repr_str
- @TRANSFORMS.register_module()
- class Expand(BaseTransform):
- """Random expand the image & bboxes & masks & segmentation map.
- Randomly place the original image on a canvas of ``ratio`` x original image
- size filled with mean values. The ratio is in the range of ratio_range.
- Required Keys:
- - img
- - img_shape
- - gt_bboxes (BaseBoxes[torch.float32]) (optional)
- - gt_masks (BitmapMasks | PolygonMasks) (optional)
- - gt_seg_map (np.uint8) (optional)
- Modified Keys:
- - img
- - img_shape
- - gt_bboxes
- - gt_masks
- - gt_seg_map
- Args:
- mean (sequence): mean value of dataset.
- to_rgb (bool): if need to convert the order of mean to align with RGB.
- ratio_range (sequence)): range of expand ratio.
- seg_ignore_label (int): label of ignore segmentation map.
- prob (float): probability of applying this transformation
- """
- def __init__(self,
- mean: Sequence[Number] = (0, 0, 0),
- to_rgb: bool = True,
- ratio_range: Sequence[Number] = (1, 4),
- seg_ignore_label: int = None,
- prob: float = 0.5) -> None:
- self.to_rgb = to_rgb
- self.ratio_range = ratio_range
- if to_rgb:
- self.mean = mean[::-1]
- else:
- self.mean = mean
- self.min_ratio, self.max_ratio = ratio_range
- self.seg_ignore_label = seg_ignore_label
- self.prob = prob
- @cache_randomness
- def _random_prob(self) -> float:
- return random.uniform(0, 1)
- @cache_randomness
- def _random_ratio(self) -> float:
- return random.uniform(self.min_ratio, self.max_ratio)
- @cache_randomness
- def _random_left_top(self, ratio: float, h: int,
- w: int) -> Tuple[int, int]:
- left = int(random.uniform(0, w * ratio - w))
- top = int(random.uniform(0, h * ratio - h))
- return left, top
- @autocast_box_type()
- def transform(self, results: dict) -> dict:
- """Transform function to expand images, bounding boxes, masks,
- segmentation map.
- Args:
- results (dict): Result dict from loading pipeline.
- Returns:
- dict: Result dict with images, bounding boxes, masks, segmentation
- map expanded.
- """
- if self._random_prob() > self.prob:
- return results
- assert 'img' in results, '`img` is not found in results'
- img = results['img']
- h, w, c = img.shape
- ratio = self._random_ratio()
- # speedup expand when meets large image
- if np.all(self.mean == self.mean[0]):
- expand_img = np.empty((int(h * ratio), int(w * ratio), c),
- img.dtype)
- expand_img.fill(self.mean[0])
- else:
- expand_img = np.full((int(h * ratio), int(w * ratio), c),
- self.mean,
- dtype=img.dtype)
- left, top = self._random_left_top(ratio, h, w)
- expand_img[top:top + h, left:left + w] = img
- results['img'] = expand_img
- results['img_shape'] = expand_img.shape[:2]
- # expand bboxes
- if results.get('gt_bboxes', None) is not None:
- results['gt_bboxes'].translate_([left, top])
- # expand masks
- if results.get('gt_masks', None) is not None:
- results['gt_masks'] = results['gt_masks'].expand(
- int(h * ratio), int(w * ratio), top, left)
- # expand segmentation map
- if results.get('gt_seg_map', None) is not None:
- gt_seg = results['gt_seg_map']
- expand_gt_seg = np.full((int(h * ratio), int(w * ratio)),
- self.seg_ignore_label,
- dtype=gt_seg.dtype)
- expand_gt_seg[top:top + h, left:left + w] = gt_seg
- results['gt_seg_map'] = expand_gt_seg
- return results
- def __repr__(self) -> str:
- repr_str = self.__class__.__name__
- repr_str += f'(mean={self.mean}, to_rgb={self.to_rgb}, '
- repr_str += f'ratio_range={self.ratio_range}, '
- repr_str += f'seg_ignore_label={self.seg_ignore_label}, '
- repr_str += f'prob={self.prob})'
- return repr_str
- @TRANSFORMS.register_module()
- class MinIoURandomCrop(BaseTransform):
- """Random crop the image & bboxes & masks & segmentation map, the cropped
- patches have minimum IoU requirement with original image & bboxes & masks.
- & segmentation map, the IoU threshold is randomly selected from min_ious.
- Required Keys:
- - img
- - img_shape
- - gt_bboxes (BaseBoxes[torch.float32]) (optional)
- - gt_bboxes_labels (np.int64) (optional)
- - gt_masks (BitmapMasks | PolygonMasks) (optional)
- - gt_ignore_flags (bool) (optional)
- - gt_seg_map (np.uint8) (optional)
- Modified Keys:
- - img
- - img_shape
- - gt_bboxes
- - gt_bboxes_labels
- - gt_masks
- - gt_ignore_flags
- - gt_seg_map
- Args:
- min_ious (Sequence[float]): minimum IoU threshold for all intersections
- with bounding boxes.
- min_crop_size (float): minimum crop's size (i.e. h,w := a*h, a*w,
- where a >= min_crop_size).
- bbox_clip_border (bool, optional): Whether clip the objects outside
- the border of the image. Defaults to True.
- """
- def __init__(self,
- min_ious: Sequence[float] = (0.1, 0.3, 0.5, 0.7, 0.9),
- min_crop_size: float = 0.3,
- bbox_clip_border: bool = True) -> None:
- self.min_ious = min_ious
- self.sample_mode = (1, *min_ious, 0)
- self.min_crop_size = min_crop_size
- self.bbox_clip_border = bbox_clip_border
- @cache_randomness
- def _random_mode(self) -> Number:
- return random.choice(self.sample_mode)
- @autocast_box_type()
- def transform(self, results: dict) -> dict:
- """Transform function to crop images and bounding boxes with minimum
- IoU constraint.
- Args:
- results (dict): Result dict from loading pipeline.
- Returns:
- dict: Result dict with images and bounding boxes cropped, \
- 'img_shape' key is updated.
- """
- assert 'img' in results, '`img` is not found in results'
- assert 'gt_bboxes' in results, '`gt_bboxes` is not found in results'
- img = results['img']
- boxes = results['gt_bboxes']
- h, w, c = img.shape
- while True:
- mode = self._random_mode()
- self.mode = mode
- if mode == 1:
- return results
- min_iou = self.mode
- for i in range(50):
- new_w = random.uniform(self.min_crop_size * w, w)
- new_h = random.uniform(self.min_crop_size * h, h)
- # h / w in [0.5, 2]
- if new_h / new_w < 0.5 or new_h / new_w > 2:
- continue
- left = random.uniform(w - new_w)
- top = random.uniform(h - new_h)
- patch = np.array(
- (int(left), int(top), int(left + new_w), int(top + new_h)))
- # Line or point crop is not allowed
- if patch[2] == patch[0] or patch[3] == patch[1]:
- continue
- overlaps = boxes.overlaps(
- HorizontalBoxes(patch.reshape(-1, 4).astype(np.float32)),
- boxes).numpy().reshape(-1)
- if len(overlaps) > 0 and overlaps.min() < min_iou:
- continue
- # center of boxes should inside the crop img
- # only adjust boxes and instance masks when the gt is not empty
- if len(overlaps) > 0:
- # adjust boxes
- def is_center_of_bboxes_in_patch(boxes, patch):
- centers = boxes.centers.numpy()
- mask = ((centers[:, 0] > patch[0]) *
- (centers[:, 1] > patch[1]) *
- (centers[:, 0] < patch[2]) *
- (centers[:, 1] < patch[3]))
- return mask
- mask = is_center_of_bboxes_in_patch(boxes, patch)
- if not mask.any():
- continue
- if results.get('gt_bboxes', None) is not None:
- boxes = results['gt_bboxes']
- mask = is_center_of_bboxes_in_patch(boxes, patch)
- boxes = boxes[mask]
- boxes.translate_([-patch[0], -patch[1]])
- if self.bbox_clip_border:
- boxes.clip_(
- [patch[3] - patch[1], patch[2] - patch[0]])
- results['gt_bboxes'] = boxes
- # ignore_flags
- if results.get('gt_ignore_flags', None) is not None:
- results['gt_ignore_flags'] = \
- results['gt_ignore_flags'][mask]
- # labels
- if results.get('gt_bboxes_labels', None) is not None:
- results['gt_bboxes_labels'] = results[
- 'gt_bboxes_labels'][mask]
- # mask fields
- if results.get('gt_masks', None) is not None:
- results['gt_masks'] = results['gt_masks'][
- mask.nonzero()[0]].crop(patch)
- # adjust the img no matter whether the gt is empty before crop
- img = img[patch[1]:patch[3], patch[0]:patch[2]]
- results['img'] = img
- results['img_shape'] = img.shape[:2]
- # seg fields
- if results.get('gt_seg_map', None) is not None:
- results['gt_seg_map'] = results['gt_seg_map'][
- patch[1]:patch[3], patch[0]:patch[2]]
- return results
- def __repr__(self) -> str:
- repr_str = self.__class__.__name__
- repr_str += f'(min_ious={self.min_ious}, '
- repr_str += f'min_crop_size={self.min_crop_size}, '
- repr_str += f'bbox_clip_border={self.bbox_clip_border})'
- return repr_str
- @TRANSFORMS.register_module()
- class Corrupt(BaseTransform):
- """Corruption augmentation.
- Corruption transforms implemented based on
- `imagecorruptions <https://github.com/bethgelab/imagecorruptions>`_.
- Required Keys:
- - img (np.uint8)
- Modified Keys:
- - img (np.uint8)
- Args:
- corruption (str): Corruption name.
- severity (int): The severity of corruption. Defaults to 1.
- """
- def __init__(self, corruption: str, severity: int = 1) -> None:
- self.corruption = corruption
- self.severity = severity
- def transform(self, results: dict) -> dict:
- """Call function to corrupt image.
- Args:
- results (dict): Result dict from loading pipeline.
- Returns:
- dict: Result dict with images corrupted.
- """
- if corrupt is None:
- raise RuntimeError('imagecorruptions is not installed')
- results['img'] = corrupt(
- results['img'].astype(np.uint8),
- corruption_name=self.corruption,
- severity=self.severity)
- return results
- def __repr__(self) -> str:
- repr_str = self.__class__.__name__
- repr_str += f'(corruption={self.corruption}, '
- repr_str += f'severity={self.severity})'
- return repr_str
- @TRANSFORMS.register_module()
- @avoid_cache_randomness
- class Albu(BaseTransform):
- """Albumentation augmentation.
- Adds custom transformations from Albumentations library.
- Please, visit `https://albumentations.readthedocs.io`
- to get more information.
- Required Keys:
- - img (np.uint8)
- - gt_bboxes (HorizontalBoxes[torch.float32]) (optional)
- - gt_masks (BitmapMasks | PolygonMasks) (optional)
- Modified Keys:
- - img (np.uint8)
- - gt_bboxes (HorizontalBoxes[torch.float32]) (optional)
- - gt_masks (BitmapMasks | PolygonMasks) (optional)
- - img_shape (tuple)
- An example of ``transforms`` is as followed:
- .. code-block::
- [
- dict(
- type='ShiftScaleRotate',
- shift_limit=0.0625,
- scale_limit=0.0,
- rotate_limit=0,
- interpolation=1,
- p=0.5),
- dict(
- type='RandomBrightnessContrast',
- brightness_limit=[0.1, 0.3],
- contrast_limit=[0.1, 0.3],
- p=0.2),
- dict(type='ChannelShuffle', p=0.1),
- dict(
- type='OneOf',
- transforms=[
- dict(type='Blur', blur_limit=3, p=1.0),
- dict(type='MedianBlur', blur_limit=3, p=1.0)
- ],
- p=0.1),
- ]
- Args:
- transforms (list[dict]): A list of albu transformations
- bbox_params (dict, optional): Bbox_params for albumentation `Compose`
- keymap (dict, optional): Contains
- {'input key':'albumentation-style key'}
- skip_img_without_anno (bool): Whether to skip the image if no ann left
- after aug. Defaults to False.
- """
- def __init__(self,
- transforms: List[dict],
- bbox_params: Optional[dict] = None,
- keymap: Optional[dict] = None,
- skip_img_without_anno: bool = False) -> None:
- if Compose is None:
- raise RuntimeError('albumentations is not installed')
- # Args will be modified later, copying it will be safer
- transforms = copy.deepcopy(transforms)
- if bbox_params is not None:
- bbox_params = copy.deepcopy(bbox_params)
- if keymap is not None:
- keymap = copy.deepcopy(keymap)
- self.transforms = transforms
- self.filter_lost_elements = False
- self.skip_img_without_anno = skip_img_without_anno
- # A simple workaround to remove masks without boxes
- if (isinstance(bbox_params, dict) and 'label_fields' in bbox_params
- and 'filter_lost_elements' in bbox_params):
- self.filter_lost_elements = True
- self.origin_label_fields = bbox_params['label_fields']
- bbox_params['label_fields'] = ['idx_mapper']
- del bbox_params['filter_lost_elements']
- self.bbox_params = (
- self.albu_builder(bbox_params) if bbox_params else None)
- self.aug = Compose([self.albu_builder(t) for t in self.transforms],
- bbox_params=self.bbox_params)
- if not keymap:
- self.keymap_to_albu = {
- 'img': 'image',
- 'gt_masks': 'masks',
- 'gt_bboxes': 'bboxes'
- }
- else:
- self.keymap_to_albu = keymap
- self.keymap_back = {v: k for k, v in self.keymap_to_albu.items()}
- def albu_builder(self, cfg: dict) -> albumentations:
- """Import a module from albumentations.
- It inherits some of :func:`build_from_cfg` logic.
- Args:
- cfg (dict): Config dict. It should at least contain the key "type".
- Returns:
- obj: The constructed object.
- """
- assert isinstance(cfg, dict) and 'type' in cfg
- args = cfg.copy()
- obj_type = args.pop('type')
- if is_str(obj_type):
- if albumentations is None:
- raise RuntimeError('albumentations is not installed')
- obj_cls = getattr(albumentations, obj_type)
- elif inspect.isclass(obj_type):
- obj_cls = obj_type
- else:
- raise TypeError(
- f'type must be a str or valid type, but got {type(obj_type)}')
- if 'transforms' in args:
- args['transforms'] = [
- self.albu_builder(transform)
- for transform in args['transforms']
- ]
- return obj_cls(**args)
- @staticmethod
- def mapper(d: dict, keymap: dict) -> dict:
- """Dictionary mapper. Renames keys according to keymap provided.
- Args:
- d (dict): old dict
- keymap (dict): {'old_key':'new_key'}
- Returns:
- dict: new dict.
- """
- updated_dict = {}
- for k, v in zip(d.keys(), d.values()):
- new_k = keymap.get(k, k)
- updated_dict[new_k] = d[k]
- return updated_dict
- @autocast_box_type()
- def transform(self, results: dict) -> Union[dict, None]:
- """Transform function of Albu."""
- # TODO: gt_seg_map is not currently supported
- # dict to albumentations format
- results = self.mapper(results, self.keymap_to_albu)
- results, ori_masks = self._preprocess_results(results)
- results = self.aug(**results)
- results = self._postprocess_results(results, ori_masks)
- if results is None:
- return None
- # back to the original format
- results = self.mapper(results, self.keymap_back)
- results['img_shape'] = results['img'].shape[:2]
- return results
- def _preprocess_results(self, results: dict) -> tuple:
- """Pre-processing results to facilitate the use of Albu."""
- if 'bboxes' in results:
- # to list of boxes
- if not isinstance(results['bboxes'], HorizontalBoxes):
- raise NotImplementedError(
- 'Albu only supports horizontal boxes now')
- bboxes = results['bboxes'].numpy()
- results['bboxes'] = [x for x in bboxes]
- # add pseudo-field for filtration
- if self.filter_lost_elements:
- results['idx_mapper'] = np.arange(len(results['bboxes']))
- # TODO: Support mask structure in albu
- ori_masks = None
- if 'masks' in results:
- if isinstance(results['masks'], PolygonMasks):
- raise NotImplementedError(
- 'Albu only supports BitMap masks now')
- ori_masks = results['masks']
- if albumentations.__version__ < '0.5':
- results['masks'] = results['masks'].masks
- else:
- results['masks'] = [mask for mask in results['masks'].masks]
- return results, ori_masks
- def _postprocess_results(
- self,
- results: dict,
- ori_masks: Optional[Union[BitmapMasks,
- PolygonMasks]] = None) -> dict:
- """Post-processing Albu output."""
- # albumentations may return np.array or list on different versions
- if 'gt_bboxes_labels' in results and isinstance(
- results['gt_bboxes_labels'], list):
- results['gt_bboxes_labels'] = np.array(
- results['gt_bboxes_labels'], dtype=np.int64)
- if 'gt_ignore_flags' in results and isinstance(
- results['gt_ignore_flags'], list):
- results['gt_ignore_flags'] = np.array(
- results['gt_ignore_flags'], dtype=bool)
- if 'bboxes' in results:
- if isinstance(results['bboxes'], list):
- results['bboxes'] = np.array(
- results['bboxes'], dtype=np.float32)
- results['bboxes'] = results['bboxes'].reshape(-1, 4)
- results['bboxes'] = HorizontalBoxes(results['bboxes'])
- # filter label_fields
- if self.filter_lost_elements:
- for label in self.origin_label_fields:
- results[label] = np.array(
- [results[label][i] for i in results['idx_mapper']])
- if 'masks' in results:
- assert ori_masks is not None
- results['masks'] = np.array(
- [results['masks'][i] for i in results['idx_mapper']])
- results['masks'] = ori_masks.__class__(
- results['masks'], ori_masks.height, ori_masks.width)
- if (not len(results['idx_mapper'])
- and self.skip_img_without_anno):
- return None
- elif 'masks' in results:
- results['masks'] = ori_masks.__class__(results['masks'],
- ori_masks.height,
- ori_masks.width)
- return results
- def __repr__(self) -> str:
- repr_str = self.__class__.__name__ + f'(transforms={self.transforms})'
- return repr_str
- @TRANSFORMS.register_module()
- @avoid_cache_randomness
- class RandomCenterCropPad(BaseTransform):
- """Random center crop and random around padding for CornerNet.
- This operation generates randomly cropped image from the original image and
- pads it simultaneously. Different from :class:`RandomCrop`, the output
- shape may not equal to ``crop_size`` strictly. We choose a random value
- from ``ratios`` and the output shape could be larger or smaller than
- ``crop_size``. The padding operation is also different from :class:`Pad`,
- here we use around padding instead of right-bottom padding.
- The relation between output image (padding image) and original image:
- .. code:: text
- output image
- +----------------------------+
- | padded area |
- +------|----------------------------|----------+
- | | cropped area | |
- | | +---------------+ | |
- | | | . center | | | original image
- | | | range | | |
- | | +---------------+ | |
- +------|----------------------------|----------+
- | padded area |
- +----------------------------+
- There are 5 main areas in the figure:
- - output image: output image of this operation, also called padding
- image in following instruction.
- - original image: input image of this operation.
- - padded area: non-intersect area of output image and original image.
- - cropped area: the overlap of output image and original image.
- - center range: a smaller area where random center chosen from.
- center range is computed by ``border`` and original image's shape
- to avoid our random center is too close to original image's border.
- Also this operation act differently in train and test mode, the summary
- pipeline is listed below.
- Train pipeline:
- 1. Choose a ``random_ratio`` from ``ratios``, the shape of padding image
- will be ``random_ratio * crop_size``.
- 2. Choose a ``random_center`` in center range.
- 3. Generate padding image with center matches the ``random_center``.
- 4. Initialize the padding image with pixel value equals to ``mean``.
- 5. Copy the cropped area to padding image.
- 6. Refine annotations.
- Test pipeline:
- 1. Compute output shape according to ``test_pad_mode``.
- 2. Generate padding image with center matches the original image
- center.
- 3. Initialize the padding image with pixel value equals to ``mean``.
- 4. Copy the ``cropped area`` to padding image.
- Required Keys:
- - img (np.float32)
- - img_shape (tuple)
- - gt_bboxes (BaseBoxes[torch.float32]) (optional)
- - gt_bboxes_labels (np.int64) (optional)
- - gt_ignore_flags (bool) (optional)
- Modified Keys:
- - img (np.float32)
- - img_shape (tuple)
- - gt_bboxes (BaseBoxes[torch.float32]) (optional)
- - gt_bboxes_labels (np.int64) (optional)
- - gt_ignore_flags (bool) (optional)
- Args:
- crop_size (tuple, optional): expected size after crop, final size will
- computed according to ratio. Requires (width, height)
- in train mode, and None in test mode.
- ratios (tuple, optional): random select a ratio from tuple and crop
- image to (crop_size[0] * ratio) * (crop_size[1] * ratio).
- Only available in train mode. Defaults to (0.9, 1.0, 1.1).
- border (int, optional): max distance from center select area to image
- border. Only available in train mode. Defaults to 128.
- mean (sequence, optional): Mean values of 3 channels.
- std (sequence, optional): Std values of 3 channels.
- to_rgb (bool, optional): Whether to convert the image from BGR to RGB.
- test_mode (bool): whether involve random variables in transform.
- In train mode, crop_size is fixed, center coords and ratio is
- random selected from predefined lists. In test mode, crop_size
- is image's original shape, center coords and ratio is fixed.
- Defaults to False.
- test_pad_mode (tuple, optional): padding method and padding shape
- value, only available in test mode. Default is using
- 'logical_or' with 127 as padding shape value.
- - 'logical_or': final_shape = input_shape | padding_shape_value
- - 'size_divisor': final_shape = int(
- ceil(input_shape / padding_shape_value) * padding_shape_value)
- Defaults to ('logical_or', 127).
- test_pad_add_pix (int): Extra padding pixel in test mode.
- Defaults to 0.
- bbox_clip_border (bool): Whether clip the objects outside
- the border of the image. Defaults to True.
- """
- def __init__(self,
- crop_size: Optional[tuple] = None,
- ratios: Optional[tuple] = (0.9, 1.0, 1.1),
- border: Optional[int] = 128,
- mean: Optional[Sequence] = None,
- std: Optional[Sequence] = None,
- to_rgb: Optional[bool] = None,
- test_mode: bool = False,
- test_pad_mode: Optional[tuple] = ('logical_or', 127),
- test_pad_add_pix: int = 0,
- bbox_clip_border: bool = True) -> None:
- if test_mode:
- assert crop_size is None, 'crop_size must be None in test mode'
- assert ratios is None, 'ratios must be None in test mode'
- assert border is None, 'border must be None in test mode'
- assert isinstance(test_pad_mode, (list, tuple))
- assert test_pad_mode[0] in ['logical_or', 'size_divisor']
- else:
- assert isinstance(crop_size, (list, tuple))
- assert crop_size[0] > 0 and crop_size[1] > 0, (
- 'crop_size must > 0 in train mode')
- assert isinstance(ratios, (list, tuple))
- assert test_pad_mode is None, (
- 'test_pad_mode must be None in train mode')
- self.crop_size = crop_size
- self.ratios = ratios
- self.border = border
- # We do not set default value to mean, std and to_rgb because these
- # hyper-parameters are easy to forget but could affect the performance.
- # Please use the same setting as Normalize for performance assurance.
- assert mean is not None and std is not None and to_rgb is not None
- self.to_rgb = to_rgb
- self.input_mean = mean
- self.input_std = std
- if to_rgb:
- self.mean = mean[::-1]
- self.std = std[::-1]
- else:
- self.mean = mean
- self.std = std
- self.test_mode = test_mode
- self.test_pad_mode = test_pad_mode
- self.test_pad_add_pix = test_pad_add_pix
- self.bbox_clip_border = bbox_clip_border
- def _get_border(self, border, size):
- """Get final border for the target size.
- This function generates a ``final_border`` according to image's shape.
- The area between ``final_border`` and ``size - final_border`` is the
- ``center range``. We randomly choose center from the ``center range``
- to avoid our random center is too close to original image's border.
- Also ``center range`` should be larger than 0.
- Args:
- border (int): The initial border, default is 128.
- size (int): The width or height of original image.
- Returns:
- int: The final border.
- """
- k = 2 * border / size
- i = pow(2, np.ceil(np.log2(np.ceil(k))) + (k == int(k)))
- return border // i
- def _filter_boxes(self, patch, boxes):
- """Check whether the center of each box is in the patch.
- Args:
- patch (list[int]): The cropped area, [left, top, right, bottom].
- boxes (numpy array, (N x 4)): Ground truth boxes.
- Returns:
- mask (numpy array, (N,)): Each box is inside or outside the patch.
- """
- center = boxes.centers.numpy()
- mask = (center[:, 0] > patch[0]) * (center[:, 1] > patch[1]) * (
- center[:, 0] < patch[2]) * (
- center[:, 1] < patch[3])
- return mask
- def _crop_image_and_paste(self, image, center, size):
- """Crop image with a given center and size, then paste the cropped
- image to a blank image with two centers align.
- This function is equivalent to generating a blank image with ``size``
- as its shape. Then cover it on the original image with two centers (
- the center of blank image and the random center of original image)
- aligned. The overlap area is paste from the original image and the
- outside area is filled with ``mean pixel``.
- Args:
- image (np array, H x W x C): Original image.
- center (list[int]): Target crop center coord.
- size (list[int]): Target crop size. [target_h, target_w]
- Returns:
- cropped_img (np array, target_h x target_w x C): Cropped image.
- border (np array, 4): The distance of four border of
- ``cropped_img`` to the original image area, [top, bottom,
- left, right]
- patch (list[int]): The cropped area, [left, top, right, bottom].
- """
- center_y, center_x = center
- target_h, target_w = size
- img_h, img_w, img_c = image.shape
- x0 = max(0, center_x - target_w // 2)
- x1 = min(center_x + target_w // 2, img_w)
- y0 = max(0, center_y - target_h // 2)
- y1 = min(center_y + target_h // 2, img_h)
- patch = np.array((int(x0), int(y0), int(x1), int(y1)))
- left, right = center_x - x0, x1 - center_x
- top, bottom = center_y - y0, y1 - center_y
- cropped_center_y, cropped_center_x = target_h // 2, target_w // 2
- cropped_img = np.zeros((target_h, target_w, img_c), dtype=image.dtype)
- for i in range(img_c):
- cropped_img[:, :, i] += self.mean[i]
- y_slice = slice(cropped_center_y - top, cropped_center_y + bottom)
- x_slice = slice(cropped_center_x - left, cropped_center_x + right)
- cropped_img[y_slice, x_slice, :] = image[y0:y1, x0:x1, :]
- border = np.array([
- cropped_center_y - top, cropped_center_y + bottom,
- cropped_center_x - left, cropped_center_x + right
- ],
- dtype=np.float32)
- return cropped_img, border, patch
- def _train_aug(self, results):
- """Random crop and around padding the original image.
- Args:
- results (dict): Image infomations in the augment pipeline.
- Returns:
- results (dict): The updated dict.
- """
- img = results['img']
- h, w, c = img.shape
- gt_bboxes = results['gt_bboxes']
- while True:
- scale = random.choice(self.ratios)
- new_h = int(self.crop_size[1] * scale)
- new_w = int(self.crop_size[0] * scale)
- h_border = self._get_border(self.border, h)
- w_border = self._get_border(self.border, w)
- for i in range(50):
- center_x = random.randint(low=w_border, high=w - w_border)
- center_y = random.randint(low=h_border, high=h - h_border)
- cropped_img, border, patch = self._crop_image_and_paste(
- img, [center_y, center_x], [new_h, new_w])
- if len(gt_bboxes) == 0:
- results['img'] = cropped_img
- results['img_shape'] = cropped_img.shape[:2]
- return results
- # if image do not have valid bbox, any crop patch is valid.
- mask = self._filter_boxes(patch, gt_bboxes)
- if not mask.any():
- continue
- results['img'] = cropped_img
- results['img_shape'] = cropped_img.shape[:2]
- x0, y0, x1, y1 = patch
- left_w, top_h = center_x - x0, center_y - y0
- cropped_center_x, cropped_center_y = new_w // 2, new_h // 2
- # crop bboxes accordingly and clip to the image boundary
- gt_bboxes = gt_bboxes[mask]
- gt_bboxes.translate_([
- cropped_center_x - left_w - x0,
- cropped_center_y - top_h - y0
- ])
- if self.bbox_clip_border:
- gt_bboxes.clip_([new_h, new_w])
- keep = gt_bboxes.is_inside([new_h, new_w]).numpy()
- gt_bboxes = gt_bboxes[keep]
- results['gt_bboxes'] = gt_bboxes
- # ignore_flags
- if results.get('gt_ignore_flags', None) is not None:
- gt_ignore_flags = results['gt_ignore_flags'][mask]
- results['gt_ignore_flags'] = \
- gt_ignore_flags[keep]
- # labels
- if results.get('gt_bboxes_labels', None) is not None:
- gt_labels = results['gt_bboxes_labels'][mask]
- results['gt_bboxes_labels'] = gt_labels[keep]
- if 'gt_masks' in results or 'gt_seg_map' in results:
- raise NotImplementedError(
- 'RandomCenterCropPad only supports bbox.')
- return results
- def _test_aug(self, results):
- """Around padding the original image without cropping.
- The padding mode and value are from ``test_pad_mode``.
- Args:
- results (dict): Image infomations in the augment pipeline.
- Returns:
- results (dict): The updated dict.
- """
- img = results['img']
- h, w, c = img.shape
- if self.test_pad_mode[0] in ['logical_or']:
- # self.test_pad_add_pix is only used for centernet
- target_h = (h | self.test_pad_mode[1]) + self.test_pad_add_pix
- target_w = (w | self.test_pad_mode[1]) + self.test_pad_add_pix
- elif self.test_pad_mode[0] in ['size_divisor']:
- divisor = self.test_pad_mode[1]
- target_h = int(np.ceil(h / divisor)) * divisor
- target_w = int(np.ceil(w / divisor)) * divisor
- else:
- raise NotImplementedError(
- 'RandomCenterCropPad only support two testing pad mode:'
- 'logical-or and size_divisor.')
- cropped_img, border, _ = self._crop_image_and_paste(
- img, [h // 2, w // 2], [target_h, target_w])
- results['img'] = cropped_img
- results['img_shape'] = cropped_img.shape[:2]
- results['border'] = border
- return results
- @autocast_box_type()
- def transform(self, results: dict) -> dict:
- img = results['img']
- assert img.dtype == np.float32, (
- 'RandomCenterCropPad needs the input image of dtype np.float32,'
- ' please set "to_float32=True" in "LoadImageFromFile" pipeline')
- h, w, c = img.shape
- assert c == len(self.mean)
- if self.test_mode:
- return self._test_aug(results)
- else:
- return self._train_aug(results)
- def __repr__(self):
- repr_str = self.__class__.__name__
- repr_str += f'(crop_size={self.crop_size}, '
- repr_str += f'ratios={self.ratios}, '
- repr_str += f'border={self.border}, '
- repr_str += f'mean={self.input_mean}, '
- repr_str += f'std={self.input_std}, '
- repr_str += f'to_rgb={self.to_rgb}, '
- repr_str += f'test_mode={self.test_mode}, '
- repr_str += f'test_pad_mode={self.test_pad_mode}, '
- repr_str += f'bbox_clip_border={self.bbox_clip_border})'
- return repr_str
- @TRANSFORMS.register_module()
- class CutOut(BaseTransform):
- """CutOut operation.
- Randomly drop some regions of image used in
- `Cutout <https://arxiv.org/abs/1708.04552>`_.
- Required Keys:
- - img
- Modified Keys:
- - img
- Args:
- n_holes (int or tuple[int, int]): Number of regions to be dropped.
- If it is given as a list, number of holes will be randomly
- selected from the closed interval [``n_holes[0]``, ``n_holes[1]``].
- cutout_shape (tuple[int, int] or list[tuple[int, int]], optional):
- The candidate shape of dropped regions. It can be
- ``tuple[int, int]`` to use a fixed cutout shape, or
- ``list[tuple[int, int]]`` to randomly choose shape
- from the list. Defaults to None.
- cutout_ratio (tuple[float, float] or list[tuple[float, float]],
- optional): The candidate ratio of dropped regions. It can be
- ``tuple[float, float]`` to use a fixed ratio or
- ``list[tuple[float, float]]`` to randomly choose ratio
- from the list. Please note that ``cutout_shape`` and
- ``cutout_ratio`` cannot be both given at the same time.
- Defaults to None.
- fill_in (tuple[float, float, float] or tuple[int, int, int]): The value
- of pixel to fill in the dropped regions. Defaults to (0, 0, 0).
- """
- def __init__(
- self,
- n_holes: Union[int, Tuple[int, int]],
- cutout_shape: Optional[Union[Tuple[int, int],
- List[Tuple[int, int]]]] = None,
- cutout_ratio: Optional[Union[Tuple[float, float],
- List[Tuple[float, float]]]] = None,
- fill_in: Union[Tuple[float, float, float], Tuple[int, int,
- int]] = (0, 0, 0)
- ) -> None:
- assert (cutout_shape is None) ^ (cutout_ratio is None), \
- 'Either cutout_shape or cutout_ratio should be specified.'
- assert (isinstance(cutout_shape, (list, tuple))
- or isinstance(cutout_ratio, (list, tuple)))
- if isinstance(n_holes, tuple):
- assert len(n_holes) == 2 and 0 <= n_holes[0] < n_holes[1]
- else:
- n_holes = (n_holes, n_holes)
- self.n_holes = n_holes
- self.fill_in = fill_in
- self.with_ratio = cutout_ratio is not None
- self.candidates = cutout_ratio if self.with_ratio else cutout_shape
- if not isinstance(self.candidates, list):
- self.candidates = [self.candidates]
- @autocast_box_type()
- def transform(self, results: dict) -> dict:
- """Call function to drop some regions of image."""
- h, w, c = results['img'].shape
- n_holes = np.random.randint(self.n_holes[0], self.n_holes[1] + 1)
- for _ in range(n_holes):
- x1 = np.random.randint(0, w)
- y1 = np.random.randint(0, h)
- index = np.random.randint(0, len(self.candidates))
- if not self.with_ratio:
- cutout_w, cutout_h = self.candidates[index]
- else:
- cutout_w = int(self.candidates[index][0] * w)
- cutout_h = int(self.candidates[index][1] * h)
- x2 = np.clip(x1 + cutout_w, 0, w)
- y2 = np.clip(y1 + cutout_h, 0, h)
- results['img'][y1:y2, x1:x2, :] = self.fill_in
- return results
- def __repr__(self):
- repr_str = self.__class__.__name__
- repr_str += f'(n_holes={self.n_holes}, '
- repr_str += (f'cutout_ratio={self.candidates}, ' if self.with_ratio
- else f'cutout_shape={self.candidates}, ')
- repr_str += f'fill_in={self.fill_in})'
- return repr_str
- @TRANSFORMS.register_module()
- class Mosaic(BaseTransform):
- """Mosaic augmentation.
- Given 4 images, mosaic transform combines them into
- one output image. The output image is composed of the parts from each sub-
- image.
- .. code:: text
- mosaic transform
- center_x
- +------------------------------+
- | pad | pad |
- | +-----------+ |
- | | | |
- | | image1 |--------+ |
- | | | | |
- | | | image2 | |
- center_y |----+-------------+-----------|
- | | cropped | |
- |pad | image3 | image4 |
- | | | |
- +----|-------------+-----------+
- | |
- +-------------+
- The mosaic transform steps are as follows:
- 1. Choose the mosaic center as the intersections of 4 images
- 2. Get the left top image according to the index, and randomly
- sample another 3 images from the custom dataset.
- 3. Sub image will be cropped if image is larger than mosaic patch
- Required Keys:
- - img
- - gt_bboxes (BaseBoxes[torch.float32]) (optional)
- - gt_bboxes_labels (np.int64) (optional)
- - gt_ignore_flags (bool) (optional)
- - mix_results (List[dict])
- Modified Keys:
- - img
- - img_shape
- - gt_bboxes (optional)
- - gt_bboxes_labels (optional)
- - gt_ignore_flags (optional)
- Args:
- img_scale (Sequence[int]): Image size after mosaic pipeline of single
- image. The shape order should be (width, height).
- Defaults to (640, 640).
- center_ratio_range (Sequence[float]): Center ratio range of mosaic
- output. Defaults to (0.5, 1.5).
- bbox_clip_border (bool, optional): Whether to clip the objects outside
- the border of the image. In some dataset like MOT17, the gt bboxes
- are allowed to cross the border of images. Therefore, we don't
- need to clip the gt bboxes in these cases. Defaults to True.
- pad_val (int): Pad value. Defaults to 114.
- prob (float): Probability of applying this transformation.
- Defaults to 1.0.
- """
- def __init__(self,
- img_scale: Tuple[int, int] = (640, 640),
- center_ratio_range: Tuple[float, float] = (0.5, 1.5),
- bbox_clip_border: bool = True,
- pad_val: float = 114.0,
- prob: float = 1.0) -> None:
- assert isinstance(img_scale, tuple)
- assert 0 <= prob <= 1.0, 'The probability should be in range [0,1]. ' \
- f'got {prob}.'
- log_img_scale(img_scale, skip_square=True, shape_order='wh')
- self.img_scale = img_scale
- self.center_ratio_range = center_ratio_range
- self.bbox_clip_border = bbox_clip_border
- self.pad_val = pad_val
- self.prob = prob
- @cache_randomness
- def get_indexes(self, dataset: BaseDataset) -> int:
- """Call function to collect indexes.
- Args:
- dataset (:obj:`MultiImageMixDataset`): The dataset.
- Returns:
- list: indexes.
- """
- indexes = [random.randint(0, len(dataset)) for _ in range(3)]
- return indexes
- @autocast_box_type()
- def transform(self, results: dict) -> dict:
- """Mosaic transform function.
- Args:
- results (dict): Result dict.
- Returns:
- dict: Updated result dict.
- """
- if random.uniform(0, 1) > self.prob:
- return results
- assert 'mix_results' in results
- mosaic_bboxes = []
- mosaic_bboxes_labels = []
- mosaic_ignore_flags = []
- if len(results['img'].shape) == 3:
- mosaic_img = np.full(
- (int(self.img_scale[1] * 2), int(self.img_scale[0] * 2), 3),
- self.pad_val,
- dtype=results['img'].dtype)
- else:
- mosaic_img = np.full(
- (int(self.img_scale[1] * 2), int(self.img_scale[0] * 2)),
- self.pad_val,
- dtype=results['img'].dtype)
- # mosaic center x, y
- center_x = int(
- random.uniform(*self.center_ratio_range) * self.img_scale[0])
- center_y = int(
- random.uniform(*self.center_ratio_range) * self.img_scale[1])
- center_position = (center_x, center_y)
- loc_strs = ('top_left', 'top_right', 'bottom_left', 'bottom_right')
- for i, loc in enumerate(loc_strs):
- if loc == 'top_left':
- results_patch = copy.deepcopy(results)
- else:
- results_patch = copy.deepcopy(results['mix_results'][i - 1])
- img_i = results_patch['img']
- h_i, w_i = img_i.shape[:2]
- # keep_ratio resize
- scale_ratio_i = min(self.img_scale[1] / h_i,
- self.img_scale[0] / w_i)
- img_i = mmcv.imresize(
- img_i, (int(w_i * scale_ratio_i), int(h_i * scale_ratio_i)))
- # compute the combine parameters
- paste_coord, crop_coord = self._mosaic_combine(
- loc, center_position, img_i.shape[:2][::-1])
- x1_p, y1_p, x2_p, y2_p = paste_coord
- x1_c, y1_c, x2_c, y2_c = crop_coord
- # crop and paste image
- mosaic_img[y1_p:y2_p, x1_p:x2_p] = img_i[y1_c:y2_c, x1_c:x2_c]
- # adjust coordinate
- gt_bboxes_i = results_patch['gt_bboxes']
- gt_bboxes_labels_i = results_patch['gt_bboxes_labels']
- gt_ignore_flags_i = results_patch['gt_ignore_flags']
- padw = x1_p - x1_c
- padh = y1_p - y1_c
- gt_bboxes_i.rescale_([scale_ratio_i, scale_ratio_i])
- gt_bboxes_i.translate_([padw, padh])
- mosaic_bboxes.append(gt_bboxes_i)
- mosaic_bboxes_labels.append(gt_bboxes_labels_i)
- mosaic_ignore_flags.append(gt_ignore_flags_i)
- mosaic_bboxes = mosaic_bboxes[0].cat(mosaic_bboxes, 0)
- mosaic_bboxes_labels = np.concatenate(mosaic_bboxes_labels, 0)
- mosaic_ignore_flags = np.concatenate(mosaic_ignore_flags, 0)
- if self.bbox_clip_border:
- mosaic_bboxes.clip_([2 * self.img_scale[1], 2 * self.img_scale[0]])
- # remove outside bboxes
- inside_inds = mosaic_bboxes.is_inside(
- [2 * self.img_scale[1], 2 * self.img_scale[0]]).numpy()
- mosaic_bboxes = mosaic_bboxes[inside_inds]
- mosaic_bboxes_labels = mosaic_bboxes_labels[inside_inds]
- mosaic_ignore_flags = mosaic_ignore_flags[inside_inds]
- results['img'] = mosaic_img
- results['img_shape'] = mosaic_img.shape[:2]
- results['gt_bboxes'] = mosaic_bboxes
- results['gt_bboxes_labels'] = mosaic_bboxes_labels
- results['gt_ignore_flags'] = mosaic_ignore_flags
- return results
- def _mosaic_combine(
- self, loc: str, center_position_xy: Sequence[float],
- img_shape_wh: Sequence[int]) -> Tuple[Tuple[int], Tuple[int]]:
- """Calculate global coordinate of mosaic image and local coordinate of
- cropped sub-image.
- Args:
- loc (str): Index for the sub-image, loc in ('top_left',
- 'top_right', 'bottom_left', 'bottom_right').
- center_position_xy (Sequence[float]): Mixing center for 4 images,
- (x, y).
- img_shape_wh (Sequence[int]): Width and height of sub-image
- Returns:
- tuple[tuple[float]]: Corresponding coordinate of pasting and
- cropping
- - paste_coord (tuple): paste corner coordinate in mosaic image.
- - crop_coord (tuple): crop corner coordinate in mosaic image.
- """
- assert loc in ('top_left', 'top_right', 'bottom_left', 'bottom_right')
- if loc == 'top_left':
- # index0 to top left part of image
- x1, y1, x2, y2 = max(center_position_xy[0] - img_shape_wh[0], 0), \
- max(center_position_xy[1] - img_shape_wh[1], 0), \
- center_position_xy[0], \
- center_position_xy[1]
- crop_coord = img_shape_wh[0] - (x2 - x1), img_shape_wh[1] - (
- y2 - y1), img_shape_wh[0], img_shape_wh[1]
- elif loc == 'top_right':
- # index1 to top right part of image
- x1, y1, x2, y2 = center_position_xy[0], \
- max(center_position_xy[1] - img_shape_wh[1], 0), \
- min(center_position_xy[0] + img_shape_wh[0],
- self.img_scale[0] * 2), \
- center_position_xy[1]
- crop_coord = 0, img_shape_wh[1] - (y2 - y1), min(
- img_shape_wh[0], x2 - x1), img_shape_wh[1]
- elif loc == 'bottom_left':
- # index2 to bottom left part of image
- x1, y1, x2, y2 = max(center_position_xy[0] - img_shape_wh[0], 0), \
- center_position_xy[1], \
- center_position_xy[0], \
- min(self.img_scale[1] * 2, center_position_xy[1] +
- img_shape_wh[1])
- crop_coord = img_shape_wh[0] - (x2 - x1), 0, img_shape_wh[0], min(
- y2 - y1, img_shape_wh[1])
- else:
- # index3 to bottom right part of image
- x1, y1, x2, y2 = center_position_xy[0], \
- center_position_xy[1], \
- min(center_position_xy[0] + img_shape_wh[0],
- self.img_scale[0] * 2), \
- min(self.img_scale[1] * 2, center_position_xy[1] +
- img_shape_wh[1])
- crop_coord = 0, 0, min(img_shape_wh[0],
- x2 - x1), min(y2 - y1, img_shape_wh[1])
- paste_coord = x1, y1, x2, y2
- return paste_coord, crop_coord
- def __repr__(self):
- repr_str = self.__class__.__name__
- repr_str += f'(img_scale={self.img_scale}, '
- repr_str += f'center_ratio_range={self.center_ratio_range}, '
- repr_str += f'pad_val={self.pad_val}, '
- repr_str += f'prob={self.prob})'
- return repr_str
- @TRANSFORMS.register_module()
- class MixUp(BaseTransform):
- """MixUp data augmentation.
- .. code:: text
- mixup transform
- +------------------------------+
- | mixup image | |
- | +--------|--------+ |
- | | | | |
- |---------------+ | |
- | | | |
- | | image | |
- | | | |
- | | | |
- | |-----------------+ |
- | pad |
- +------------------------------+
- The mixup transform steps are as follows:
- 1. Another random image is picked by dataset and embedded in
- the top left patch(after padding and resizing)
- 2. The target of mixup transform is the weighted average of mixup
- image and origin image.
- Required Keys:
- - img
- - gt_bboxes (BaseBoxes[torch.float32]) (optional)
- - gt_bboxes_labels (np.int64) (optional)
- - gt_ignore_flags (bool) (optional)
- - mix_results (List[dict])
- Modified Keys:
- - img
- - img_shape
- - gt_bboxes (optional)
- - gt_bboxes_labels (optional)
- - gt_ignore_flags (optional)
- Args:
- img_scale (Sequence[int]): Image output size after mixup pipeline.
- The shape order should be (width, height). Defaults to (640, 640).
- ratio_range (Sequence[float]): Scale ratio of mixup image.
- Defaults to (0.5, 1.5).
- flip_ratio (float): Horizontal flip ratio of mixup image.
- Defaults to 0.5.
- pad_val (int): Pad value. Defaults to 114.
- max_iters (int): The maximum number of iterations. If the number of
- iterations is greater than `max_iters`, but gt_bbox is still
- empty, then the iteration is terminated. Defaults to 15.
- bbox_clip_border (bool, optional): Whether to clip the objects outside
- the border of the image. In some dataset like MOT17, the gt bboxes
- are allowed to cross the border of images. Therefore, we don't
- need to clip the gt bboxes in these cases. Defaults to True.
- """
- def __init__(self,
- img_scale: Tuple[int, int] = (640, 640),
- ratio_range: Tuple[float, float] = (0.5, 1.5),
- flip_ratio: float = 0.5,
- pad_val: float = 114.0,
- max_iters: int = 15,
- bbox_clip_border: bool = True) -> None:
- assert isinstance(img_scale, tuple)
- log_img_scale(img_scale, skip_square=True, shape_order='wh')
- self.dynamic_scale = img_scale
- self.ratio_range = ratio_range
- self.flip_ratio = flip_ratio
- self.pad_val = pad_val
- self.max_iters = max_iters
- self.bbox_clip_border = bbox_clip_border
- @cache_randomness
- def get_indexes(self, dataset: BaseDataset) -> int:
- """Call function to collect indexes.
- Args:
- dataset (:obj:`MultiImageMixDataset`): The dataset.
- Returns:
- list: indexes.
- """
- for i in range(self.max_iters):
- index = random.randint(0, len(dataset))
- gt_bboxes_i = dataset[index]['gt_bboxes']
- if len(gt_bboxes_i) != 0:
- break
- return index
- @autocast_box_type()
- def transform(self, results: dict) -> dict:
- """MixUp transform function.
- Args:
- results (dict): Result dict.
- Returns:
- dict: Updated result dict.
- """
- assert 'mix_results' in results
- assert len(
- results['mix_results']) == 1, 'MixUp only support 2 images now !'
- if results['mix_results'][0]['gt_bboxes'].shape[0] == 0:
- # empty bbox
- return results
- retrieve_results = results['mix_results'][0]
- retrieve_img = retrieve_results['img']
- jit_factor = random.uniform(*self.ratio_range)
- is_filp = random.uniform(0, 1) > self.flip_ratio
- if len(retrieve_img.shape) == 3:
- out_img = np.ones(
- (self.dynamic_scale[1], self.dynamic_scale[0], 3),
- dtype=retrieve_img.dtype) * self.pad_val
- else:
- out_img = np.ones(
- self.dynamic_scale[::-1],
- dtype=retrieve_img.dtype) * self.pad_val
- # 1. keep_ratio resize
- scale_ratio = min(self.dynamic_scale[1] / retrieve_img.shape[0],
- self.dynamic_scale[0] / retrieve_img.shape[1])
- retrieve_img = mmcv.imresize(
- retrieve_img, (int(retrieve_img.shape[1] * scale_ratio),
- int(retrieve_img.shape[0] * scale_ratio)))
- # 2. paste
- out_img[:retrieve_img.shape[0], :retrieve_img.shape[1]] = retrieve_img
- # 3. scale jit
- scale_ratio *= jit_factor
- out_img = mmcv.imresize(out_img, (int(out_img.shape[1] * jit_factor),
- int(out_img.shape[0] * jit_factor)))
- # 4. flip
- if is_filp:
- out_img = out_img[:, ::-1, :]
- # 5. random crop
- ori_img = results['img']
- origin_h, origin_w = out_img.shape[:2]
- target_h, target_w = ori_img.shape[:2]
- padded_img = np.ones((max(origin_h, target_h), max(
- origin_w, target_w), 3)) * self.pad_val
- padded_img = padded_img.astype(np.uint8)
- padded_img[:origin_h, :origin_w] = out_img
- x_offset, y_offset = 0, 0
- if padded_img.shape[0] > target_h:
- y_offset = random.randint(0, padded_img.shape[0] - target_h)
- if padded_img.shape[1] > target_w:
- x_offset = random.randint(0, padded_img.shape[1] - target_w)
- padded_cropped_img = padded_img[y_offset:y_offset + target_h,
- x_offset:x_offset + target_w]
- # 6. adjust bbox
- retrieve_gt_bboxes = retrieve_results['gt_bboxes']
- retrieve_gt_bboxes.rescale_([scale_ratio, scale_ratio])
- if self.bbox_clip_border:
- retrieve_gt_bboxes.clip_([origin_h, origin_w])
- if is_filp:
- retrieve_gt_bboxes.flip_([origin_h, origin_w],
- direction='horizontal')
- # 7. filter
- cp_retrieve_gt_bboxes = retrieve_gt_bboxes.clone()
- cp_retrieve_gt_bboxes.translate_([-x_offset, -y_offset])
- if self.bbox_clip_border:
- cp_retrieve_gt_bboxes.clip_([target_h, target_w])
- # 8. mix up
- ori_img = ori_img.astype(np.float32)
- mixup_img = 0.5 * ori_img + 0.5 * padded_cropped_img.astype(np.float32)
- retrieve_gt_bboxes_labels = retrieve_results['gt_bboxes_labels']
- retrieve_gt_ignore_flags = retrieve_results['gt_ignore_flags']
- mixup_gt_bboxes = cp_retrieve_gt_bboxes.cat(
- (results['gt_bboxes'], cp_retrieve_gt_bboxes), dim=0)
- mixup_gt_bboxes_labels = np.concatenate(
- (results['gt_bboxes_labels'], retrieve_gt_bboxes_labels), axis=0)
- mixup_gt_ignore_flags = np.concatenate(
- (results['gt_ignore_flags'], retrieve_gt_ignore_flags), axis=0)
- # remove outside bbox
- inside_inds = mixup_gt_bboxes.is_inside([target_h, target_w]).numpy()
- mixup_gt_bboxes = mixup_gt_bboxes[inside_inds]
- mixup_gt_bboxes_labels = mixup_gt_bboxes_labels[inside_inds]
- mixup_gt_ignore_flags = mixup_gt_ignore_flags[inside_inds]
- results['img'] = mixup_img.astype(np.uint8)
- results['img_shape'] = mixup_img.shape[:2]
- results['gt_bboxes'] = mixup_gt_bboxes
- results['gt_bboxes_labels'] = mixup_gt_bboxes_labels
- results['gt_ignore_flags'] = mixup_gt_ignore_flags
- return results
- def __repr__(self):
- repr_str = self.__class__.__name__
- repr_str += f'(dynamic_scale={self.dynamic_scale}, '
- repr_str += f'ratio_range={self.ratio_range}, '
- repr_str += f'flip_ratio={self.flip_ratio}, '
- repr_str += f'pad_val={self.pad_val}, '
- repr_str += f'max_iters={self.max_iters}, '
- repr_str += f'bbox_clip_border={self.bbox_clip_border})'
- return repr_str
- @TRANSFORMS.register_module()
- class RandomAffine(BaseTransform):
- """Random affine transform data augmentation.
- This operation randomly generates affine transform matrix which including
- rotation, translation, shear and scaling transforms.
- Required Keys:
- - img
- - gt_bboxes (BaseBoxes[torch.float32]) (optional)
- - gt_bboxes_labels (np.int64) (optional)
- - gt_ignore_flags (bool) (optional)
- Modified Keys:
- - img
- - img_shape
- - gt_bboxes (optional)
- - gt_bboxes_labels (optional)
- - gt_ignore_flags (optional)
- Args:
- max_rotate_degree (float): Maximum degrees of rotation transform.
- Defaults to 10.
- max_translate_ratio (float): Maximum ratio of translation.
- Defaults to 0.1.
- scaling_ratio_range (tuple[float]): Min and max ratio of
- scaling transform. Defaults to (0.5, 1.5).
- max_shear_degree (float): Maximum degrees of shear
- transform. Defaults to 2.
- border (tuple[int]): Distance from width and height sides of input
- image to adjust output shape. Only used in mosaic dataset.
- Defaults to (0, 0).
- border_val (tuple[int]): Border padding values of 3 channels.
- Defaults to (114, 114, 114).
- bbox_clip_border (bool, optional): Whether to clip the objects outside
- the border of the image. In some dataset like MOT17, the gt bboxes
- are allowed to cross the border of images. Therefore, we don't
- need to clip the gt bboxes in these cases. Defaults to True.
- """
- def __init__(self,
- max_rotate_degree: float = 10.0,
- max_translate_ratio: float = 0.1,
- scaling_ratio_range: Tuple[float, float] = (0.5, 1.5),
- max_shear_degree: float = 2.0,
- border: Tuple[int, int] = (0, 0),
- border_val: Tuple[int, int, int] = (114, 114, 114),
- bbox_clip_border: bool = True) -> None:
- assert 0 <= max_translate_ratio <= 1
- assert scaling_ratio_range[0] <= scaling_ratio_range[1]
- assert scaling_ratio_range[0] > 0
- self.max_rotate_degree = max_rotate_degree
- self.max_translate_ratio = max_translate_ratio
- self.scaling_ratio_range = scaling_ratio_range
- self.max_shear_degree = max_shear_degree
- self.border = border
- self.border_val = border_val
- self.bbox_clip_border = bbox_clip_border
- @cache_randomness
- def _get_random_homography_matrix(self, height, width):
- # Rotation
- rotation_degree = random.uniform(-self.max_rotate_degree,
- self.max_rotate_degree)
- rotation_matrix = self._get_rotation_matrix(rotation_degree)
- # Scaling
- scaling_ratio = random.uniform(self.scaling_ratio_range[0],
- self.scaling_ratio_range[1])
- scaling_matrix = self._get_scaling_matrix(scaling_ratio)
- # Shear
- x_degree = random.uniform(-self.max_shear_degree,
- self.max_shear_degree)
- y_degree = random.uniform(-self.max_shear_degree,
- self.max_shear_degree)
- shear_matrix = self._get_shear_matrix(x_degree, y_degree)
- # Translation
- trans_x = random.uniform(-self.max_translate_ratio,
- self.max_translate_ratio) * width
- trans_y = random.uniform(-self.max_translate_ratio,
- self.max_translate_ratio) * height
- translate_matrix = self._get_translation_matrix(trans_x, trans_y)
- warp_matrix = (
- translate_matrix @ shear_matrix @ rotation_matrix @ scaling_matrix)
- return warp_matrix
- @autocast_box_type()
- def transform(self, results: dict) -> dict:
- img = results['img']
- height = img.shape[0] + self.border[1] * 2
- width = img.shape[1] + self.border[0] * 2
- warp_matrix = self._get_random_homography_matrix(height, width)
- img = cv2.warpPerspective(
- img,
- warp_matrix,
- dsize=(width, height),
- borderValue=self.border_val)
- results['img'] = img
- results['img_shape'] = img.shape[:2]
- bboxes = results['gt_bboxes']
- num_bboxes = len(bboxes)
- if num_bboxes:
- bboxes.project_(warp_matrix)
- if self.bbox_clip_border:
- bboxes.clip_([height, width])
- # remove outside bbox
- valid_index = bboxes.is_inside([height, width]).numpy()
- results['gt_bboxes'] = bboxes[valid_index]
- results['gt_bboxes_labels'] = results['gt_bboxes_labels'][
- valid_index]
- results['gt_ignore_flags'] = results['gt_ignore_flags'][
- valid_index]
- if 'gt_masks' in results:
- raise NotImplementedError('RandomAffine only supports bbox.')
- return results
- def __repr__(self):
- repr_str = self.__class__.__name__
- repr_str += f'(max_rotate_degree={self.max_rotate_degree}, '
- repr_str += f'max_translate_ratio={self.max_translate_ratio}, '
- repr_str += f'scaling_ratio_range={self.scaling_ratio_range}, '
- repr_str += f'max_shear_degree={self.max_shear_degree}, '
- repr_str += f'border={self.border}, '
- repr_str += f'border_val={self.border_val}, '
- repr_str += f'bbox_clip_border={self.bbox_clip_border})'
- return repr_str
- @staticmethod
- def _get_rotation_matrix(rotate_degrees: float) -> np.ndarray:
- radian = math.radians(rotate_degrees)
- rotation_matrix = np.array(
- [[np.cos(radian), -np.sin(radian), 0.],
- [np.sin(radian), np.cos(radian), 0.], [0., 0., 1.]],
- dtype=np.float32)
- return rotation_matrix
- @staticmethod
- def _get_scaling_matrix(scale_ratio: float) -> np.ndarray:
- scaling_matrix = np.array(
- [[scale_ratio, 0., 0.], [0., scale_ratio, 0.], [0., 0., 1.]],
- dtype=np.float32)
- return scaling_matrix
- @staticmethod
- def _get_shear_matrix(x_shear_degrees: float,
- y_shear_degrees: float) -> np.ndarray:
- x_radian = math.radians(x_shear_degrees)
- y_radian = math.radians(y_shear_degrees)
- shear_matrix = np.array([[1, np.tan(x_radian), 0.],
- [np.tan(y_radian), 1, 0.], [0., 0., 1.]],
- dtype=np.float32)
- return shear_matrix
- @staticmethod
- def _get_translation_matrix(x: float, y: float) -> np.ndarray:
- translation_matrix = np.array([[1, 0., x], [0., 1, y], [0., 0., 1.]],
- dtype=np.float32)
- return translation_matrix
- @TRANSFORMS.register_module()
- class YOLOXHSVRandomAug(BaseTransform):
- """Apply HSV augmentation to image sequentially. It is referenced from
- https://github.com/Megvii-
- BaseDetection/YOLOX/blob/main/yolox/data/data_augment.py#L21.
- Required Keys:
- - img
- Modified Keys:
- - img
- Args:
- hue_delta (int): delta of hue. Defaults to 5.
- saturation_delta (int): delta of saturation. Defaults to 30.
- value_delta (int): delat of value. Defaults to 30.
- """
- def __init__(self,
- hue_delta: int = 5,
- saturation_delta: int = 30,
- value_delta: int = 30) -> None:
- self.hue_delta = hue_delta
- self.saturation_delta = saturation_delta
- self.value_delta = value_delta
- @cache_randomness
- def _get_hsv_gains(self):
- hsv_gains = np.random.uniform(-1, 1, 3) * [
- self.hue_delta, self.saturation_delta, self.value_delta
- ]
- # random selection of h, s, v
- hsv_gains *= np.random.randint(0, 2, 3)
- # prevent overflow
- hsv_gains = hsv_gains.astype(np.int16)
- return hsv_gains
- def transform(self, results: dict) -> dict:
- img = results['img']
- hsv_gains = self._get_hsv_gains()
- img_hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV).astype(np.int16)
- img_hsv[..., 0] = (img_hsv[..., 0] + hsv_gains[0]) % 180
- img_hsv[..., 1] = np.clip(img_hsv[..., 1] + hsv_gains[1], 0, 255)
- img_hsv[..., 2] = np.clip(img_hsv[..., 2] + hsv_gains[2], 0, 255)
- cv2.cvtColor(img_hsv.astype(img.dtype), cv2.COLOR_HSV2BGR, dst=img)
- results['img'] = img
- return results
- def __repr__(self):
- repr_str = self.__class__.__name__
- repr_str += f'(hue_delta={self.hue_delta}, '
- repr_str += f'saturation_delta={self.saturation_delta}, '
- repr_str += f'value_delta={self.value_delta})'
- return repr_str
- @TRANSFORMS.register_module()
- class CopyPaste(BaseTransform):
- """Simple Copy-Paste is a Strong Data Augmentation Method for Instance
- Segmentation The simple copy-paste transform steps are as follows:
- 1. The destination image is already resized with aspect ratio kept,
- cropped and padded.
- 2. Randomly select a source image, which is also already resized
- with aspect ratio kept, cropped and padded in a similar way
- as the destination image.
- 3. Randomly select some objects from the source image.
- 4. Paste these source objects to the destination image directly,
- due to the source and destination image have the same size.
- 5. Update object masks of the destination image, for some origin objects
- may be occluded.
- 6. Generate bboxes from the updated destination masks and
- filter some objects which are totally occluded, and adjust bboxes
- which are partly occluded.
- 7. Append selected source bboxes, masks, and labels.
- Required Keys:
- - img
- - gt_bboxes (BaseBoxes[torch.float32]) (optional)
- - gt_bboxes_labels (np.int64) (optional)
- - gt_ignore_flags (bool) (optional)
- - gt_masks (BitmapMasks) (optional)
- Modified Keys:
- - img
- - gt_bboxes (optional)
- - gt_bboxes_labels (optional)
- - gt_ignore_flags (optional)
- - gt_masks (optional)
- Args:
- max_num_pasted (int): The maximum number of pasted objects.
- Defaults to 100.
- bbox_occluded_thr (int): The threshold of occluded bbox.
- Defaults to 10.
- mask_occluded_thr (int): The threshold of occluded mask.
- Defaults to 300.
- selected (bool): Whether select objects or not. If select is False,
- all objects of the source image will be pasted to the
- destination image.
- Defaults to True.
- """
- def __init__(
- self,
- max_num_pasted: int = 100,
- bbox_occluded_thr: int = 10,
- mask_occluded_thr: int = 300,
- selected: bool = True,
- ) -> None:
- self.max_num_pasted = max_num_pasted
- self.bbox_occluded_thr = bbox_occluded_thr
- self.mask_occluded_thr = mask_occluded_thr
- self.selected = selected
- @cache_randomness
- def get_indexes(self, dataset: BaseDataset) -> int:
- """Call function to collect indexes.s.
- Args:
- dataset (:obj:`MultiImageMixDataset`): The dataset.
- Returns:
- list: Indexes.
- """
- return random.randint(0, len(dataset))
- @autocast_box_type()
- def transform(self, results: dict) -> dict:
- """Transform function to make a copy-paste of image.
- Args:
- results (dict): Result dict.
- Returns:
- dict: Result dict with copy-paste transformed.
- """
- assert 'mix_results' in results
- num_images = len(results['mix_results'])
- assert num_images == 1, \
- f'CopyPaste only supports processing 2 images, got {num_images}'
- if self.selected:
- selected_results = self._select_object(results['mix_results'][0])
- else:
- selected_results = results['mix_results'][0]
- return self._copy_paste(results, selected_results)
- @cache_randomness
- def _get_selected_inds(self, num_bboxes: int) -> np.ndarray:
- max_num_pasted = min(num_bboxes + 1, self.max_num_pasted)
- num_pasted = np.random.randint(0, max_num_pasted)
- return np.random.choice(num_bboxes, size=num_pasted, replace=False)
- def _select_object(self, results: dict) -> dict:
- """Select some objects from the source results."""
- bboxes = results['gt_bboxes']
- labels = results['gt_bboxes_labels']
- masks = results['gt_masks']
- ignore_flags = results['gt_ignore_flags']
- selected_inds = self._get_selected_inds(bboxes.shape[0])
- selected_bboxes = bboxes[selected_inds]
- selected_labels = labels[selected_inds]
- selected_masks = masks[selected_inds]
- selected_ignore_flags = ignore_flags[selected_inds]
- results['gt_bboxes'] = selected_bboxes
- results['gt_bboxes_labels'] = selected_labels
- results['gt_masks'] = selected_masks
- results['gt_ignore_flags'] = selected_ignore_flags
- return results
- def _copy_paste(self, dst_results: dict, src_results: dict) -> dict:
- """CopyPaste transform function.
- Args:
- dst_results (dict): Result dict of the destination image.
- src_results (dict): Result dict of the source image.
- Returns:
- dict: Updated result dict.
- """
- dst_img = dst_results['img']
- dst_bboxes = dst_results['gt_bboxes']
- dst_labels = dst_results['gt_bboxes_labels']
- dst_masks = dst_results['gt_masks']
- dst_ignore_flags = dst_results['gt_ignore_flags']
- src_img = src_results['img']
- src_bboxes = src_results['gt_bboxes']
- src_labels = src_results['gt_bboxes_labels']
- src_masks = src_results['gt_masks']
- src_ignore_flags = src_results['gt_ignore_flags']
- if len(src_bboxes) == 0:
- return dst_results
- # update masks and generate bboxes from updated masks
- composed_mask = np.where(np.any(src_masks.masks, axis=0), 1, 0)
- updated_dst_masks = self._get_updated_masks(dst_masks, composed_mask)
- updated_dst_bboxes = updated_dst_masks.get_bboxes(type(dst_bboxes))
- assert len(updated_dst_bboxes) == len(updated_dst_masks)
- # filter totally occluded objects
- l1_distance = (updated_dst_bboxes.tensor - dst_bboxes.tensor).abs()
- bboxes_inds = (l1_distance <= self.bbox_occluded_thr).all(
- dim=-1).numpy()
- masks_inds = updated_dst_masks.masks.sum(
- axis=(1, 2)) > self.mask_occluded_thr
- valid_inds = bboxes_inds | masks_inds
- # Paste source objects to destination image directly
- img = dst_img * (1 - composed_mask[..., np.newaxis]
- ) + src_img * composed_mask[..., np.newaxis]
- bboxes = src_bboxes.cat([updated_dst_bboxes[valid_inds], src_bboxes])
- labels = np.concatenate([dst_labels[valid_inds], src_labels])
- masks = np.concatenate(
- [updated_dst_masks.masks[valid_inds], src_masks.masks])
- ignore_flags = np.concatenate(
- [dst_ignore_flags[valid_inds], src_ignore_flags])
- dst_results['img'] = img
- dst_results['gt_bboxes'] = bboxes
- dst_results['gt_bboxes_labels'] = labels
- dst_results['gt_masks'] = BitmapMasks(masks, masks.shape[1],
- masks.shape[2])
- dst_results['gt_ignore_flags'] = ignore_flags
- return dst_results
- def _get_updated_masks(self, masks: BitmapMasks,
- composed_mask: np.ndarray) -> BitmapMasks:
- """Update masks with composed mask."""
- assert masks.masks.shape[-2:] == composed_mask.shape[-2:], \
- 'Cannot compare two arrays of different size'
- masks.masks = np.where(composed_mask, 0, masks.masks)
- return masks
- def __repr__(self):
- repr_str = self.__class__.__name__
- repr_str += f'(max_num_pasted={self.max_num_pasted}, '
- repr_str += f'bbox_occluded_thr={self.bbox_occluded_thr}, '
- repr_str += f'mask_occluded_thr={self.mask_occluded_thr}, '
- repr_str += f'selected={self.selected})'
- return repr_str
- @TRANSFORMS.register_module()
- class RandomErasing(BaseTransform):
- """RandomErasing operation.
- Random Erasing randomly selects a rectangle region
- in an image and erases its pixels with random values.
- `RandomErasing <https://arxiv.org/abs/1708.04896>`_.
- Required Keys:
- - img
- - gt_bboxes (HorizontalBoxes[torch.float32]) (optional)
- - gt_bboxes_labels (np.int64) (optional)
- - gt_ignore_flags (bool) (optional)
- - gt_masks (BitmapMasks) (optional)
- Modified Keys:
- - img
- - gt_bboxes (optional)
- - gt_bboxes_labels (optional)
- - gt_ignore_flags (optional)
- - gt_masks (optional)
- Args:
- n_patches (int or tuple[int, int]): Number of regions to be dropped.
- If it is given as a tuple, number of patches will be randomly
- selected from the closed interval [``n_patches[0]``,
- ``n_patches[1]``].
- ratio (float or tuple[float, float]): The ratio of erased regions.
- It can be ``float`` to use a fixed ratio or ``tuple[float, float]``
- to randomly choose ratio from the interval.
- squared (bool): Whether to erase square region. Defaults to True.
- bbox_erased_thr (float): The threshold for the maximum area proportion
- of the bbox to be erased. When the proportion of the area where the
- bbox is erased is greater than the threshold, the bbox will be
- removed. Defaults to 0.9.
- img_border_value (int or float or tuple): The filled values for
- image border. If float, the same fill value will be used for
- all the three channels of image. If tuple, it should be 3 elements.
- Defaults to 128.
- mask_border_value (int): The fill value used for masks. Defaults to 0.
- seg_ignore_label (int): The fill value used for segmentation map.
- Note this value must equals ``ignore_label`` in ``semantic_head``
- of the corresponding config. Defaults to 255.
- """
- def __init__(
- self,
- n_patches: Union[int, Tuple[int, int]],
- ratio: Union[float, Tuple[float, float]],
- squared: bool = True,
- bbox_erased_thr: float = 0.9,
- img_border_value: Union[int, float, tuple] = 128,
- mask_border_value: int = 0,
- seg_ignore_label: int = 255,
- ) -> None:
- if isinstance(n_patches, tuple):
- assert len(n_patches) == 2 and 0 <= n_patches[0] < n_patches[1]
- else:
- n_patches = (n_patches, n_patches)
- if isinstance(ratio, tuple):
- assert len(ratio) == 2 and 0 <= ratio[0] < ratio[1] <= 1
- else:
- ratio = (ratio, ratio)
- self.n_patches = n_patches
- self.ratio = ratio
- self.squared = squared
- self.bbox_erased_thr = bbox_erased_thr
- self.img_border_value = img_border_value
- self.mask_border_value = mask_border_value
- self.seg_ignore_label = seg_ignore_label
- @cache_randomness
- def _get_patches(self, img_shape: Tuple[int, int]) -> List[list]:
- """Get patches for random erasing."""
- patches = []
- n_patches = np.random.randint(self.n_patches[0], self.n_patches[1] + 1)
- for _ in range(n_patches):
- if self.squared:
- ratio = np.random.random() * (self.ratio[1] -
- self.ratio[0]) + self.ratio[0]
- ratio = (ratio, ratio)
- else:
- ratio = (np.random.random() * (self.ratio[1] - self.ratio[0]) +
- self.ratio[0], np.random.random() *
- (self.ratio[1] - self.ratio[0]) + self.ratio[0])
- ph, pw = int(img_shape[0] * ratio[0]), int(img_shape[1] * ratio[1])
- px1, py1 = np.random.randint(0,
- img_shape[1] - pw), np.random.randint(
- 0, img_shape[0] - ph)
- px2, py2 = px1 + pw, py1 + ph
- patches.append([px1, py1, px2, py2])
- return np.array(patches)
- def _transform_img(self, results: dict, patches: List[list]) -> None:
- """Random erasing the image."""
- for patch in patches:
- px1, py1, px2, py2 = patch
- results['img'][py1:py2, px1:px2, :] = self.img_border_value
- def _transform_bboxes(self, results: dict, patches: List[list]) -> None:
- """Random erasing the bboxes."""
- bboxes = results['gt_bboxes']
- # TODO: unify the logic by using operators in BaseBoxes.
- assert isinstance(bboxes, HorizontalBoxes)
- bboxes = bboxes.numpy()
- left_top = np.maximum(bboxes[:, None, :2], patches[:, :2])
- right_bottom = np.minimum(bboxes[:, None, 2:], patches[:, 2:])
- wh = np.maximum(right_bottom - left_top, 0)
- inter_areas = wh[:, :, 0] * wh[:, :, 1]
- bbox_areas = (bboxes[:, 2] - bboxes[:, 0]) * (
- bboxes[:, 3] - bboxes[:, 1])
- bboxes_erased_ratio = inter_areas.sum(-1) / (bbox_areas + 1e-7)
- valid_inds = bboxes_erased_ratio < self.bbox_erased_thr
- results['gt_bboxes'] = HorizontalBoxes(bboxes[valid_inds])
- results['gt_bboxes_labels'] = results['gt_bboxes_labels'][valid_inds]
- results['gt_ignore_flags'] = results['gt_ignore_flags'][valid_inds]
- if results.get('gt_masks', None) is not None:
- results['gt_masks'] = results['gt_masks'][valid_inds]
- def _transform_masks(self, results: dict, patches: List[list]) -> None:
- """Random erasing the masks."""
- for patch in patches:
- px1, py1, px2, py2 = patch
- results['gt_masks'].masks[:, py1:py2,
- px1:px2] = self.mask_border_value
- def _transform_seg(self, results: dict, patches: List[list]) -> None:
- """Random erasing the segmentation map."""
- for patch in patches:
- px1, py1, px2, py2 = patch
- results['gt_seg_map'][py1:py2, px1:px2] = self.seg_ignore_label
- @autocast_box_type()
- def transform(self, results: dict) -> dict:
- """Transform function to erase some regions of image."""
- patches = self._get_patches(results['img_shape'])
- self._transform_img(results, patches)
- if results.get('gt_bboxes', None) is not None:
- self._transform_bboxes(results, patches)
- if results.get('gt_masks', None) is not None:
- self._transform_masks(results, patches)
- if results.get('gt_seg_map', None) is not None:
- self._transform_seg(results, patches)
- return results
- def __repr__(self):
- repr_str = self.__class__.__name__
- repr_str += f'(n_patches={self.n_patches}, '
- repr_str += f'ratio={self.ratio}, '
- repr_str += f'squared={self.squared}, '
- repr_str += f'bbox_erased_thr={self.bbox_erased_thr}, '
- repr_str += f'img_border_value={self.img_border_value}, '
- repr_str += f'mask_border_value={self.mask_border_value}, '
- repr_str += f'seg_ignore_label={self.seg_ignore_label})'
- return repr_str
- @TRANSFORMS.register_module()
- class CachedMosaic(Mosaic):
- """Cached mosaic augmentation.
- Cached mosaic transform will random select images from the cache
- and combine them into one output image.
- .. code:: text
- mosaic transform
- center_x
- +------------------------------+
- | pad | pad |
- | +-----------+ |
- | | | |
- | | image1 |--------+ |
- | | | | |
- | | | image2 | |
- center_y |----+-------------+-----------|
- | | cropped | |
- |pad | image3 | image4 |
- | | | |
- +----|-------------+-----------+
- | |
- +-------------+
- The cached mosaic transform steps are as follows:
- 1. Append the results from the last transform into the cache.
- 2. Choose the mosaic center as the intersections of 4 images
- 3. Get the left top image according to the index, and randomly
- sample another 3 images from the result cache.
- 4. Sub image will be cropped if image is larger than mosaic patch
- Required Keys:
- - img
- - gt_bboxes (np.float32) (optional)
- - gt_bboxes_labels (np.int64) (optional)
- - gt_ignore_flags (bool) (optional)
- Modified Keys:
- - img
- - img_shape
- - gt_bboxes (optional)
- - gt_bboxes_labels (optional)
- - gt_ignore_flags (optional)
- Args:
- img_scale (Sequence[int]): Image size after mosaic pipeline of single
- image. The shape order should be (width, height).
- Defaults to (640, 640).
- center_ratio_range (Sequence[float]): Center ratio range of mosaic
- output. Defaults to (0.5, 1.5).
- bbox_clip_border (bool, optional): Whether to clip the objects outside
- the border of the image. In some dataset like MOT17, the gt bboxes
- are allowed to cross the border of images. Therefore, we don't
- need to clip the gt bboxes in these cases. Defaults to True.
- pad_val (int): Pad value. Defaults to 114.
- prob (float): Probability of applying this transformation.
- Defaults to 1.0.
- max_cached_images (int): The maximum length of the cache. The larger
- the cache, the stronger the randomness of this transform. As a
- rule of thumb, providing 10 caches for each image suffices for
- randomness. Defaults to 40.
- random_pop (bool): Whether to randomly pop a result from the cache
- when the cache is full. If set to False, use FIFO popping method.
- Defaults to True.
- """
- def __init__(self,
- *args,
- max_cached_images: int = 40,
- random_pop: bool = True,
- **kwargs) -> None:
- super().__init__(*args, **kwargs)
- self.results_cache = []
- self.random_pop = random_pop
- assert max_cached_images >= 4, 'The length of cache must >= 4, ' \
- f'but got {max_cached_images}.'
- self.max_cached_images = max_cached_images
- @cache_randomness
- def get_indexes(self, cache: list) -> list:
- """Call function to collect indexes.
- Args:
- cache (list): The results cache.
- Returns:
- list: indexes.
- """
- indexes = [random.randint(0, len(cache) - 1) for _ in range(3)]
- return indexes
- @autocast_box_type()
- def transform(self, results: dict) -> dict:
- """Mosaic transform function.
- Args:
- results (dict): Result dict.
- Returns:
- dict: Updated result dict.
- """
- # cache and pop images
- self.results_cache.append(copy.deepcopy(results))
- if len(self.results_cache) > self.max_cached_images:
- if self.random_pop:
- index = random.randint(0, len(self.results_cache) - 1)
- else:
- index = 0
- self.results_cache.pop(index)
- if len(self.results_cache) <= 4:
- return results
- if random.uniform(0, 1) > self.prob:
- return results
- indices = self.get_indexes(self.results_cache)
- mix_results = [copy.deepcopy(self.results_cache[i]) for i in indices]
- # TODO: refactor mosaic to reuse these code.
- mosaic_bboxes = []
- mosaic_bboxes_labels = []
- mosaic_ignore_flags = []
- mosaic_masks = []
- with_mask = True if 'gt_masks' in results else False
- if len(results['img'].shape) == 3:
- mosaic_img = np.full(
- (int(self.img_scale[1] * 2), int(self.img_scale[0] * 2), 3),
- self.pad_val,
- dtype=results['img'].dtype)
- else:
- mosaic_img = np.full(
- (int(self.img_scale[1] * 2), int(self.img_scale[0] * 2)),
- self.pad_val,
- dtype=results['img'].dtype)
- # mosaic center x, y
- center_x = int(
- random.uniform(*self.center_ratio_range) * self.img_scale[0])
- center_y = int(
- random.uniform(*self.center_ratio_range) * self.img_scale[1])
- center_position = (center_x, center_y)
- loc_strs = ('top_left', 'top_right', 'bottom_left', 'bottom_right')
- for i, loc in enumerate(loc_strs):
- if loc == 'top_left':
- results_patch = copy.deepcopy(results)
- else:
- results_patch = copy.deepcopy(mix_results[i - 1])
- img_i = results_patch['img']
- h_i, w_i = img_i.shape[:2]
- # keep_ratio resize
- scale_ratio_i = min(self.img_scale[1] / h_i,
- self.img_scale[0] / w_i)
- img_i = mmcv.imresize(
- img_i, (int(w_i * scale_ratio_i), int(h_i * scale_ratio_i)))
- # compute the combine parameters
- paste_coord, crop_coord = self._mosaic_combine(
- loc, center_position, img_i.shape[:2][::-1])
- x1_p, y1_p, x2_p, y2_p = paste_coord
- x1_c, y1_c, x2_c, y2_c = crop_coord
- # crop and paste image
- mosaic_img[y1_p:y2_p, x1_p:x2_p] = img_i[y1_c:y2_c, x1_c:x2_c]
- # adjust coordinate
- gt_bboxes_i = results_patch['gt_bboxes']
- gt_bboxes_labels_i = results_patch['gt_bboxes_labels']
- gt_ignore_flags_i = results_patch['gt_ignore_flags']
- padw = x1_p - x1_c
- padh = y1_p - y1_c
- gt_bboxes_i.rescale_([scale_ratio_i, scale_ratio_i])
- gt_bboxes_i.translate_([padw, padh])
- mosaic_bboxes.append(gt_bboxes_i)
- mosaic_bboxes_labels.append(gt_bboxes_labels_i)
- mosaic_ignore_flags.append(gt_ignore_flags_i)
- if with_mask and results_patch.get('gt_masks', None) is not None:
- gt_masks_i = results_patch['gt_masks']
- gt_masks_i = gt_masks_i.rescale(float(scale_ratio_i))
- gt_masks_i = gt_masks_i.translate(
- out_shape=(int(self.img_scale[0] * 2),
- int(self.img_scale[1] * 2)),
- offset=padw,
- direction='horizontal')
- gt_masks_i = gt_masks_i.translate(
- out_shape=(int(self.img_scale[0] * 2),
- int(self.img_scale[1] * 2)),
- offset=padh,
- direction='vertical')
- mosaic_masks.append(gt_masks_i)
- mosaic_bboxes = mosaic_bboxes[0].cat(mosaic_bboxes, 0)
- mosaic_bboxes_labels = np.concatenate(mosaic_bboxes_labels, 0)
- mosaic_ignore_flags = np.concatenate(mosaic_ignore_flags, 0)
- if self.bbox_clip_border:
- mosaic_bboxes.clip_([2 * self.img_scale[1], 2 * self.img_scale[0]])
- # remove outside bboxes
- inside_inds = mosaic_bboxes.is_inside(
- [2 * self.img_scale[1], 2 * self.img_scale[0]]).numpy()
- mosaic_bboxes = mosaic_bboxes[inside_inds]
- mosaic_bboxes_labels = mosaic_bboxes_labels[inside_inds]
- mosaic_ignore_flags = mosaic_ignore_flags[inside_inds]
- results['img'] = mosaic_img
- results['img_shape'] = mosaic_img.shape[:2]
- results['gt_bboxes'] = mosaic_bboxes
- results['gt_bboxes_labels'] = mosaic_bboxes_labels
- results['gt_ignore_flags'] = mosaic_ignore_flags
- if with_mask:
- mosaic_masks = mosaic_masks[0].cat(mosaic_masks)
- results['gt_masks'] = mosaic_masks[inside_inds]
- return results
- def __repr__(self):
- repr_str = self.__class__.__name__
- repr_str += f'(img_scale={self.img_scale}, '
- repr_str += f'center_ratio_range={self.center_ratio_range}, '
- repr_str += f'pad_val={self.pad_val}, '
- repr_str += f'prob={self.prob}, '
- repr_str += f'max_cached_images={self.max_cached_images}, '
- repr_str += f'random_pop={self.random_pop})'
- return repr_str
- @TRANSFORMS.register_module()
- class CachedMixUp(BaseTransform):
- """Cached mixup data augmentation.
- .. code:: text
- mixup transform
- +------------------------------+
- | mixup image | |
- | +--------|--------+ |
- | | | | |
- |---------------+ | |
- | | | |
- | | image | |
- | | | |
- | | | |
- | |-----------------+ |
- | pad |
- +------------------------------+
- The cached mixup transform steps are as follows:
- 1. Append the results from the last transform into the cache.
- 2. Another random image is picked from the cache and embedded in
- the top left patch(after padding and resizing)
- 3. The target of mixup transform is the weighted average of mixup
- image and origin image.
- Required Keys:
- - img
- - gt_bboxes (np.float32) (optional)
- - gt_bboxes_labels (np.int64) (optional)
- - gt_ignore_flags (bool) (optional)
- - mix_results (List[dict])
- Modified Keys:
- - img
- - img_shape
- - gt_bboxes (optional)
- - gt_bboxes_labels (optional)
- - gt_ignore_flags (optional)
- Args:
- img_scale (Sequence[int]): Image output size after mixup pipeline.
- The shape order should be (width, height). Defaults to (640, 640).
- ratio_range (Sequence[float]): Scale ratio of mixup image.
- Defaults to (0.5, 1.5).
- flip_ratio (float): Horizontal flip ratio of mixup image.
- Defaults to 0.5.
- pad_val (int): Pad value. Defaults to 114.
- max_iters (int): The maximum number of iterations. If the number of
- iterations is greater than `max_iters`, but gt_bbox is still
- empty, then the iteration is terminated. Defaults to 15.
- bbox_clip_border (bool, optional): Whether to clip the objects outside
- the border of the image. In some dataset like MOT17, the gt bboxes
- are allowed to cross the border of images. Therefore, we don't
- need to clip the gt bboxes in these cases. Defaults to True.
- max_cached_images (int): The maximum length of the cache. The larger
- the cache, the stronger the randomness of this transform. As a
- rule of thumb, providing 10 caches for each image suffices for
- randomness. Defaults to 20.
- random_pop (bool): Whether to randomly pop a result from the cache
- when the cache is full. If set to False, use FIFO popping method.
- Defaults to True.
- prob (float): Probability of applying this transformation.
- Defaults to 1.0.
- """
- def __init__(self,
- img_scale: Tuple[int, int] = (640, 640),
- ratio_range: Tuple[float, float] = (0.5, 1.5),
- flip_ratio: float = 0.5,
- pad_val: float = 114.0,
- max_iters: int = 15,
- bbox_clip_border: bool = True,
- max_cached_images: int = 20,
- random_pop: bool = True,
- prob: float = 1.0) -> None:
- assert isinstance(img_scale, tuple)
- assert max_cached_images >= 2, 'The length of cache must >= 2, ' \
- f'but got {max_cached_images}.'
- assert 0 <= prob <= 1.0, 'The probability should be in range [0,1]. ' \
- f'got {prob}.'
- self.dynamic_scale = img_scale
- self.ratio_range = ratio_range
- self.flip_ratio = flip_ratio
- self.pad_val = pad_val
- self.max_iters = max_iters
- self.bbox_clip_border = bbox_clip_border
- self.results_cache = []
- self.max_cached_images = max_cached_images
- self.random_pop = random_pop
- self.prob = prob
- @cache_randomness
- def get_indexes(self, cache: list) -> int:
- """Call function to collect indexes.
- Args:
- cache (list): The result cache.
- Returns:
- int: index.
- """
- for i in range(self.max_iters):
- index = random.randint(0, len(cache) - 1)
- gt_bboxes_i = cache[index]['gt_bboxes']
- if len(gt_bboxes_i) != 0:
- break
- return index
- @autocast_box_type()
- def transform(self, results: dict) -> dict:
- """MixUp transform function.
- Args:
- results (dict): Result dict.
- Returns:
- dict: Updated result dict.
- """
- # cache and pop images
- self.results_cache.append(copy.deepcopy(results))
- if len(self.results_cache) > self.max_cached_images:
- if self.random_pop:
- index = random.randint(0, len(self.results_cache) - 1)
- else:
- index = 0
- self.results_cache.pop(index)
- if len(self.results_cache) <= 1:
- return results
- if random.uniform(0, 1) > self.prob:
- return results
- index = self.get_indexes(self.results_cache)
- retrieve_results = copy.deepcopy(self.results_cache[index])
- # TODO: refactor mixup to reuse these code.
- if retrieve_results['gt_bboxes'].shape[0] == 0:
- # empty bbox
- return results
- retrieve_img = retrieve_results['img']
- with_mask = True if 'gt_masks' in results else False
- jit_factor = random.uniform(*self.ratio_range)
- is_filp = random.uniform(0, 1) > self.flip_ratio
- if len(retrieve_img.shape) == 3:
- out_img = np.ones(
- (self.dynamic_scale[1], self.dynamic_scale[0], 3),
- dtype=retrieve_img.dtype) * self.pad_val
- else:
- out_img = np.ones(
- self.dynamic_scale[::-1],
- dtype=retrieve_img.dtype) * self.pad_val
- # 1. keep_ratio resize
- scale_ratio = min(self.dynamic_scale[1] / retrieve_img.shape[0],
- self.dynamic_scale[0] / retrieve_img.shape[1])
- retrieve_img = mmcv.imresize(
- retrieve_img, (int(retrieve_img.shape[1] * scale_ratio),
- int(retrieve_img.shape[0] * scale_ratio)))
- # 2. paste
- out_img[:retrieve_img.shape[0], :retrieve_img.shape[1]] = retrieve_img
- # 3. scale jit
- scale_ratio *= jit_factor
- out_img = mmcv.imresize(out_img, (int(out_img.shape[1] * jit_factor),
- int(out_img.shape[0] * jit_factor)))
- # 4. flip
- if is_filp:
- out_img = out_img[:, ::-1, :]
- # 5. random crop
- ori_img = results['img']
- origin_h, origin_w = out_img.shape[:2]
- target_h, target_w = ori_img.shape[:2]
- padded_img = np.ones((max(origin_h, target_h), max(
- origin_w, target_w), 3)) * self.pad_val
- padded_img = padded_img.astype(np.uint8)
- padded_img[:origin_h, :origin_w] = out_img
- x_offset, y_offset = 0, 0
- if padded_img.shape[0] > target_h:
- y_offset = random.randint(0, padded_img.shape[0] - target_h)
- if padded_img.shape[1] > target_w:
- x_offset = random.randint(0, padded_img.shape[1] - target_w)
- padded_cropped_img = padded_img[y_offset:y_offset + target_h,
- x_offset:x_offset + target_w]
- # 6. adjust bbox
- retrieve_gt_bboxes = retrieve_results['gt_bboxes']
- retrieve_gt_bboxes.rescale_([scale_ratio, scale_ratio])
- if with_mask:
- retrieve_gt_masks = retrieve_results['gt_masks'].rescale(
- scale_ratio)
- if self.bbox_clip_border:
- retrieve_gt_bboxes.clip_([origin_h, origin_w])
- if is_filp:
- retrieve_gt_bboxes.flip_([origin_h, origin_w],
- direction='horizontal')
- if with_mask:
- retrieve_gt_masks = retrieve_gt_masks.flip()
- # 7. filter
- cp_retrieve_gt_bboxes = retrieve_gt_bboxes.clone()
- cp_retrieve_gt_bboxes.translate_([-x_offset, -y_offset])
- if with_mask:
- retrieve_gt_masks = retrieve_gt_masks.translate(
- out_shape=(target_h, target_w),
- offset=-x_offset,
- direction='horizontal')
- retrieve_gt_masks = retrieve_gt_masks.translate(
- out_shape=(target_h, target_w),
- offset=-y_offset,
- direction='vertical')
- if self.bbox_clip_border:
- cp_retrieve_gt_bboxes.clip_([target_h, target_w])
- # 8. mix up
- ori_img = ori_img.astype(np.float32)
- mixup_img = 0.5 * ori_img + 0.5 * padded_cropped_img.astype(np.float32)
- retrieve_gt_bboxes_labels = retrieve_results['gt_bboxes_labels']
- retrieve_gt_ignore_flags = retrieve_results['gt_ignore_flags']
- mixup_gt_bboxes = cp_retrieve_gt_bboxes.cat(
- (results['gt_bboxes'], cp_retrieve_gt_bboxes), dim=0)
- mixup_gt_bboxes_labels = np.concatenate(
- (results['gt_bboxes_labels'], retrieve_gt_bboxes_labels), axis=0)
- mixup_gt_ignore_flags = np.concatenate(
- (results['gt_ignore_flags'], retrieve_gt_ignore_flags), axis=0)
- if with_mask:
- mixup_gt_masks = retrieve_gt_masks.cat(
- [results['gt_masks'], retrieve_gt_masks])
- # remove outside bbox
- inside_inds = mixup_gt_bboxes.is_inside([target_h, target_w]).numpy()
- mixup_gt_bboxes = mixup_gt_bboxes[inside_inds]
- mixup_gt_bboxes_labels = mixup_gt_bboxes_labels[inside_inds]
- mixup_gt_ignore_flags = mixup_gt_ignore_flags[inside_inds]
- if with_mask:
- mixup_gt_masks = mixup_gt_masks[inside_inds]
- results['img'] = mixup_img.astype(np.uint8)
- results['img_shape'] = mixup_img.shape[:2]
- results['gt_bboxes'] = mixup_gt_bboxes
- results['gt_bboxes_labels'] = mixup_gt_bboxes_labels
- results['gt_ignore_flags'] = mixup_gt_ignore_flags
- if with_mask:
- results['gt_masks'] = mixup_gt_masks
- return results
- def __repr__(self):
- repr_str = self.__class__.__name__
- repr_str += f'(dynamic_scale={self.dynamic_scale}, '
- repr_str += f'ratio_range={self.ratio_range}, '
- repr_str += f'flip_ratio={self.flip_ratio}, '
- repr_str += f'pad_val={self.pad_val}, '
- repr_str += f'max_iters={self.max_iters}, '
- repr_str += f'bbox_clip_border={self.bbox_clip_border}, '
- repr_str += f'max_cached_images={self.max_cached_images}, '
- repr_str += f'random_pop={self.random_pop}, '
- repr_str += f'prob={self.prob})'
- return repr_str
|