Skip to content

Commit

Permalink
AC: Added custom evaluator for two-stream I3D model (openvinotoolkit#…
Browse files Browse the repository at this point in the history
…1674)

* Updated converter

* Added custom evaluator for two-stream I3D model

* Existing model has been updated in according with changes

* Fix pylint

* Fix comments in converter, revert changes in dataset_definitions.yml

* Fix comments in evaluator
  • Loading branch information
Anna Mironova authored Oct 21, 2020
1 parent 350702f commit aa6d38e
Show file tree
Hide file tree
Showing 5 changed files with 556 additions and 67 deletions.
2 changes: 1 addition & 1 deletion models/public/i3d-rgb-tf/accuracy-check.yml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
models:
- name: i3d-rgb-tf
- name: i3d-rgb
launchers:
- framework: dlsdk
adapter: classification
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,12 @@
"""

from collections import OrderedDict
import warnings

from ..utils import read_json, read_txt, check_file_existence
from ..representation import ClassificationAnnotation
from ..data_readers import ClipIdentifier
from ..config import PathField, NumberField, StringField, BoolField
from ..config import PathField, NumberField, StringField, BoolField, ConfigError

from .format_converter import BaseFormatConverter, ConverterReturn, verify_label_map

Expand Down Expand Up @@ -50,7 +51,11 @@ def parameters(cls):
'dataset_meta_file': PathField(
description='path to json file with dataset meta (e.g. label_map)', optional=True
),
'numpy_input': BoolField(description='use numpy arrays instead of images', optional=True, default=False),
'numpy_input': BoolField(description='use numpy arrays as input', optional=True, default=False),
'two_stream_input': BoolField(description='use two streams: images and numpy arrays as input',
optional=True, default=False),
'image_subpath': StringField(description="sub-directory for images", optional=True),
'numpy_subpath': StringField(description="sub-directory for numpy arrays", optional=True),
'num_samples': NumberField(
description='number of samples used for annotation', optional=True, value_type=int, min_value=1
)
Expand All @@ -67,11 +72,38 @@ def configure(self):
self.subset = self.get_value_from_config('subset')
self.dataset_meta = self.get_value_from_config('dataset_meta_file')
self.numpy_input = self.get_value_from_config('numpy_input')
self.two_stream_input = self.get_value_from_config('two_stream_input')
self.numpy_subdir = self.get_value_from_config('numpy_subpath')
self.image_subdir = self.get_value_from_config('image_subpath')
self.num_samples = self.get_value_from_config('num_samples')

if self.numpy_subdir and (self.numpy_input or
self.two_stream_input) and not (self.data_dir / self.numpy_subdir).exists():
raise ConfigError('Please check numpy_subpath or data_dir. '
'Path {} does not exist'.format(self.data_dir / self.numpy_subdir))

if self.image_subdir and (not self.numpy_input or
self.two_stream_input) and not (self.data_dir / self.image_subdir).exists():
raise ConfigError('Please check image_subpath or data_dir. '
'Path {} does not exist'.format(self.data_dir / self.image_subdir))

if self.two_stream_input:
if not self.numpy_subdir:
raise ConfigError('numpy_subpath should be provided in case of using two streams')

if not self.image_subdir:
raise ConfigError('image_subpath should be provided in case of using two streams')
else:
if self.numpy_input and self.numpy_subdir:
warnings.warn("numpy_subpath is provided. "
"Make sure that data_source is {}".format(self.data_dir / self.numpy_subdir))
if not self.numpy_input and self.image_subdir:
warnings.warn("image_subpath is provided. "
"Make sure that data_source is {}".format(self.data_dir / self.image_subdir))

def convert(self, check_content=False, progress_callback=None, progress_interval=100, **kwargs):
full_annotation = read_json(self.annotation_file, object_pairs_hook=OrderedDict)
data_ext = 'jpg' if not self.numpy_input else 'npy'
data_ext, data_dir = self.get_ext_and_dir()
label_map = dict(enumerate(full_annotation['labels']))
if self.dataset_meta:
dataset_meta = read_json(self.dataset_meta)
Expand All @@ -83,85 +115,122 @@ def convert(self, check_content=False, progress_callback=None, progress_interval
video_names, annotations = self.get_video_names_and_annotations(full_annotation['database'], self.subset)
class_to_idx = {v: k for k, v in label_map.items()}

videos = []
for video_name, annotation in zip(video_names, annotations):
video_path = self.data_dir / video_name
if not video_path.exists():
continue

n_frames_file = video_path / 'n_frames'
n_frames = (
int(read_txt(n_frames_file)[0].rstrip('\n\r')) if n_frames_file.exists()
else len(list(video_path.glob('*.{}'.format(data_ext))))
)
if n_frames <= 0:
continue

begin_t = 1
end_t = n_frames
sample = {
'video': video_path,
'video_name': video_name,
'segment': [begin_t, end_t],
'n_frames': n_frames,
'video_id': video_name,
'label': class_to_idx[annotation['label']]
}

videos.append(sample)
if self.num_samples and len(videos) == self.num_samples:
break

videos = self.get_videos(video_names, annotations, class_to_idx, data_dir, data_ext)
videos = sorted(videos, key=lambda v: v['video_id'].split('/')[-1])

clips = []
for video in videos:
for clip in self.get_clips(video, self.clips_per_video, self.clip_duration, self.temporal_stride, data_ext):
clips.append(clip)
clips.extend(self.get_clips(video, self.clips_per_video,
self.clip_duration, self.temporal_stride, data_ext))

annotations = []
num_iterations = len(clips)
content_errors = None if not check_content else []
for clip_idx, clip in enumerate(clips):
if progress_callback is not None and clip_idx % progress_interval:
progress_callback(clip_idx * 100 / num_iterations)
identifier = ClipIdentifier(clip['video_name'], clip_idx, clip['frames'])
identifier = []
for ext in data_ext:
identifier.append(ClipIdentifier(clip['video_name'], clip_idx, clip['frames_{}'.format(ext)]))
if check_content:
content_errors.extend([
'{}: does not exist'.format(self.data_dir / frame)
for frame in clip['frames'] if not check_file_existence(self.data_dir / frame)
])
for ext, dir_ in zip(data_ext, data_dir):
content_errors.extend([
'{}: does not exist'.format(dir_ / frame)
for frame in clip['frames_{}'.format(ext)] if not check_file_existence(dir_ / frame)
])
if len(identifier) == 1:
identifier = identifier[0]

annotations.append(ClassificationAnnotation(identifier, clip['label']))

return ConverterReturn(annotations, {'label_map': label_map}, content_errors)

@staticmethod
def get_clips(video, clips_per_video, clip_duration, temporal_stride=1, file_ext='jpg'):
shift = int(file_ext == 'npy')
num_frames = video['n_frames'] - shift
clip_duration *= temporal_stride

if clips_per_video == 0:
step = clip_duration
else:
step = max(1, (num_frames - clip_duration) // (clips_per_video - 1))
def get_ext_and_dir(self):
if self.two_stream_input:
return ['jpg', 'npy'], [self.data_dir / self.image_subdir, self.data_dir / self.numpy_subdir]

for clip_start in range(1, 1 + clips_per_video * step, step):
clip_end = min(clip_start + clip_duration, num_frames + 1)
if self.numpy_input:
return ['npy'], [self.data_dir / self.numpy_subdir if self.numpy_subdir else self.data_dir]

clip_idxs = list(range(clip_start, clip_end))
return ['jpg'], [self.data_dir / self.image_subdir if self.image_subdir else self.data_dir]

if not clip_idxs:
return
def get_videos(self, video_names, annotations, class_to_idx, data_dir, data_ext):
videos = []
for video_name, annotation in zip(video_names, annotations):
video_info = {
'video_name': video_name,
'video_id': video_name,
'label': class_to_idx[annotation['label']]
}
for dir_, ext in zip(data_dir, data_ext):
video_path = dir_ / video_name
if not video_path.exists():
video_info.clear()
continue

n_frames_file = video_path / 'n_frames'
n_frames = (
int(read_txt(n_frames_file)[0].rstrip('\n\r')) if n_frames_file.exists()
else len(list(video_path.glob('*.{}'.format(ext))))
)
if n_frames <= 0:
video_info.clear()
continue

begin_t = 1
end_t = n_frames
sample = {
'video_{}'.format(ext): video_path,
'segment_{}'.format(ext): [begin_t, end_t],
'n_frames_{}'.format(ext): n_frames,
}
video_info.update(sample)

if video_info:
videos.append(video_info)
if self.num_samples and len(videos) == self.num_samples:
break
return videos

# loop clip if it is shorter than clip_duration
while len(clip_idxs) < clip_duration:
clip_idxs = (clip_idxs * 2)[:clip_duration]
@staticmethod
def get_clips(video, clips_per_video, clip_duration, temporal_stride=1, file_ext='jpg'):
clip_duration *= temporal_stride
frames_ext = {}
for ext in file_ext:
frames = []
shift = int(ext == 'npy')
num_frames = video['n_frames_{}'.format(ext)] - shift

if clips_per_video == 0:
step = clip_duration
else:
step = max(1, (num_frames - clip_duration) // (clips_per_video - 1))
for clip_start in range(1, 1 + clips_per_video * step, step):
clip_end = min(clip_start + clip_duration, num_frames + 1)

clip_idxs = list(range(clip_start, clip_end))

if not clip_idxs:
return []

# loop clip if it is shorter than clip_duration
while len(clip_idxs) < clip_duration:
clip_idxs = (clip_idxs * 2)[:clip_duration]

frames_idx = clip_idxs[::temporal_stride]
frames.append(['image_{:05d}.{}'.format(frame_idx, ext) for frame_idx in frames_idx])
frames_ext.update({
ext: frames
})

clip = dict(video)
frames_idx = clip_idxs[::temporal_stride]
clip['frames'] = ['image_{:05d}.{}'.format(frame_idx, file_ext) for frame_idx in frames_idx]
yield clip
clips = []
for key, value in frames_ext.items():
if not clips:
for _ in range(len(value)):
clips.append(dict(video))
for val, clip in zip(value, clips):
clip['frames_{}'.format(key)] = val
return clips

@staticmethod
def get_video_names_and_annotations(data, subset):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,11 @@ Optionally you can provide `module_config` section which contains config for cus
Configuration file example: <a href="https://github.com/openvinotoolkit/open_model_zoo/blob/develop/tools/accuracy_checker/configs/text-spotting-0002.yml">text-spotting-0002</a>.

* **Automatic Speech Recognition Evaluator** shows how to evaluate speech recognition pipeline (encoder + decoder).
<a href="https://github.com/openvinotoolkit/open_model_zoo/blob/develop/tools/accuracy_checker/accuracy_checker/evaluators/custom_evaluators/asr_encoder_decoder_evaluator.py">Evaluator code</a>.
* **Im2latex formula recognition** demonstrates how to run encoder-decoder model for extractring latex formula from image
<a href="https://github.com/openvinotoolkit/open_model_zoo/blob/develop/tools/accuracy_checker/accuracy_checker/evaluators/custom_evaluators/im2latex_evaluator.py">Evaluator code</a>
Configuration file example: <a href="https://github.com/openvinotoolkit/open_model_zoo/blob/develop/tools/accuracy_checker/configs/im2latex-medium-0002.yml">im2latex-medium-0002</a>
<a href="https://github.com/openvinotoolkit/open_model_zoo/blob/develop/tools/accuracy_checker/accuracy_checker/evaluators/custom_evaluators/asr_encoder_decoder_evaluator.py">Evaluator code</a>.

* **Im2latex formula recognition** demonstrates how to run encoder-decoder model for extractring latex formula from image.
<a href="https://github.com/openvinotoolkit/open_model_zoo/blob/develop/tools/accuracy_checker/accuracy_checker/evaluators/custom_evaluators/im2latex_evaluator.py">Evaluator code</a>.
Configuration file example: <a href="https://github.com/openvinotoolkit/open_model_zoo/blob/develop/tools/accuracy_checker/configs/im2latex-medium-0002.yml">im2latex-medium-0002</a>.

* **I3D Evaluator** demonstrates how to evaluate two-stream I3D model (RGB + Flow).
<a href="https://github.com/openvinotoolkit/open_model_zoo/blob/develop/tools/accuracy_checker/accuracy_checker/evaluators/custom_evaluators/i3d_evaluator.py">Evaluator code</a>.
Loading

0 comments on commit aa6d38e

Please sign in to comment.