highrut.github.io/index.html at main · highrut/highrut.github.io · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
<!DOCTYPE html>
<html class="no-js" lang="en">

<head>
    <meta charset="utf-8">
    <meta http-equiv="X-UA-Compatible" content="IE=edge">
    <meta name="viewport" content="width=device-width, initial-scale=1">
    <title>Anna Vorontsova | CV </title>
	<link rel="shortcut icon" href="favicon.ico" type="image/x-icon">
	<link rel="icon" href="favicon.ico" type="image/x-icon">
    <link href="https://fonts.googleapis.com/css?family=Lato:300,400,700,900" rel="stylesheet">
    <link rel="stylesheet" href="libs/font-awesome/css/font-awesome.min.css">
    <link href="css/bootstrap.min.css" rel="stylesheet">
    <link href="css/styles.css" rel="stylesheet">
    <link href="scss/styles.scss" rel="stylesheet">
</head>

<body>
    <div id="mobile-menu-open" class="shadow-large">
        <i class="fa fa-bars" aria-hidden="true"></i>
    </div>
    <!-- End #mobile-menu-toggle -->
    <header>
        <div id="mobile-menu-close">
            <span>Close</span> <i class="fa fa-times" aria-hidden="true"></i>
        </div>
        <ul id="menu" class="shadow">
            <li>
                <a href="#about">About</a>
            </li>
            <li>
                <a href="#experience">Experience</a>
            </li>
            <li>
                <a href="#education">Education</a>
            </li>
            <li>
                <a href="#projects">Projects</a>
            </li>
            <li>
                <a href="#skills">Skills</a>
            </li>
        </ul>
    </header>
    <!-- End header -->

    <div id="lead">
        <div id="lead-content">

            <h1>Anna Vorontsova</h1>
            <h2>Data Scientist / AI Researcher, Computer Vision</h2>
            <a href="Resume_AnnaVorontsova.pdf" class="btn-rounded-white">Download Resume</a>
        </div>
        <!-- End #lead-content -->

        <div id="lead-overlay"></div>

        <div id="lead-down">
            <span>
                <i class="fa fa-chevron-down" aria-hidden="true"></i>
            </span>
        </div>
        <!-- End #lead-down -->
    </div>
    <!-- End #lead -->

    <div id="about">
        <h2 class="heading">About Me</h2>
        <div class="container">
            <div class="row">
                <div class="personal-info col-md-4">
                    <div class="personal-image">
                        <img src="images/anna-vorontsova-medium.png"/>
                    </div>
                </div>
                <div class="col-md-8">
                    <p>
                        I am a Deep Learning Expert at NEURA Robotics. I received an M.Sc. in Data
                        Science, and a bachelor's degree in Applied Mathematics from one of the best Russian
                        universities. I have worked as a Research Scientist at Samsung Research for 5 years.
                        Overall, I have over 6 years of both industrial and research experience,
                        focusing on <b>2D and 3D computer vision</b> throughout my career.
                        I co-authored 15+ research papers accepted to
                        top-tier conferences, prepared a number of technical patents, and gained hands-on experience with
                        various deep learning models <b>(CNN, RNN, Transformer)</b> and frameworks <b>(PyTorch, Tensorflow)</b>.
                    </p>
                </div>
            </div>
        </div>
    </div>
    <!-- End #about -->

    <div id="experience" class="background-alt">
        <h2 class="heading">Experience</h2>
        <div id="experience-timeline">
            <div data-date="May 2024 – now">
                <h3>NEURA Robotics</h3>
                <h4>Deep Learning Expert, 2D/3D Computer Vision</h4>
                <p>
                    Solved various 2D and 3D computer vision tasks in robotic scenarios.
                    Adapted existing methods and/or developed new methods addressing 3D reconstruction,
                    object segmentation, antipodal and suction grasp generation.

                    Generated data for training and benchmarking developed methods.

                    Contributed to documentation on AI Safety,
                    wrote customer manuals and internal guides.
                </p>
            </div>

            <div data-date="Oct 2018 – Apr 2024">
                <h3>Samsung Research</h3>
                <h4>AI Researcher, 2D/3D Computer Vision</h4>
                <p>
                    Developed state-of-the-art algorithms addressing 2D and 3D computer vision tasks: SLAM,
                    visual and sensor-based localization, 3D reconstruction of indoor scenes, depth estimation,
                    object segmentation, 2D and 3D object detection.

                    Formulated scientific hypotheses and conducted experiments to prove them. Wrote a number
                    of academic papers accepted to top-tier CV and robotics conferences such as CVPR, ECCV,
                    WACV, IROS. Overall, contributed to 16 papers. <a href="https://neurips.cc/Conferences/2022/DatasetBenchmarkProgramCommittee">Outstanding Reviewer</a> at NeurIPS 2022
                    Datasets and Benchmarks track. Own several international patents on technical inventions.

                    Developed demos and PoCs: visual odometry, visual indoor navigation,
                    object weight measurement based on RGB-D data. Collected, labeled
                    and prepared data for prototyping and research purposes: visual navigation,
                    3D reconstruction of indoor scenes, visual analytics for retail.

                    Mastered all kinds of writing: academic manuscripts, annual reports, patents, tasks
                    for data annotators, documentation, and internal guides.
                </p>
            </div>

            <div data-date="June 2017 – Oct 2018">
                <h3>Rambler&Co</h3>
                <h4>Research Intern / Junior Data Scientist, Computer Vision</h4>
                <p>
                    Contributed to a project on cinema visitor monitoring based on video surveillance data.
                    Developed algorithms based on deep neural networks (segmentation, classification, detection,
                    tracking). Collected, labeled, and prepared training data. Conducted experiments and
                    presented the results in the form of reports and slides.

                    What started as a small toy project run by one intern (me), was considered so successful
                    that it convinced top management to create a computer vision department, mostly to develop
                    and maintain the cinema monitoring system. The implemented solution was used to collect
                    statistics in over 700 cinema halls in Russia.
                </p>
            </div>

        </div>
    </div>
    <!-- End #experience -->

    <div id="education">
        <h2 class="heading">Education</h2>
        <div class="education-block">
            <h3>HSE University</h3>
            <span class="education-date">Sep 2018 - June 2020</span>
            <h4>Master of Data Science</h4>
            <p>
                Completed courses: Bayesian Networks, Functional Analysis,
                Convex Optimization, Autonomous Driving
                <br>
                Thesis: Visual Odometry with Ego-motion Sampling
                <br>
                GPA: 4.5 (8.68 / 10)
            </p>
        </div>
        <!-- End .education-block -->

        <div class="education-block">
            <h3>Yandex School of Data Analysis </h3>
            <span class="education-date">Sep 2018 - June 2020</span>
            <h4>Data Science, Advanced track</h4>
            <p>

            </p>
        </div>
        <!-- End .education-block -->

        <div class="education-block">
            <h3>HSE University</h3>
            <span class="education-date">Sep 2014 - June 2018</span>
            <h4>Bachelor of Applied Mathematics, Machine Learning and Applications track</h4>
            <p>
                Completed courses: Machine Learning, Deep Learning,
                Statistical Learning Theory, NLP, Computer Vision, Reinforcement Learning,
                Bayesian ML, Advanced Algorithms and Data Structures,
                Probability Theory and Statistics
                <br>
                Thesis: Person Re-identification Based on Visual Attributes
                <br>
                GPA: 4.69 (8.1 / 10)
            </p>
        </div>
        <!-- End .education-block -->
    </div>
    <!-- End #education -->

    <div id="projects" class="background-alt">
        <h2 class="heading">Projects & Publications</h2>
        <div class="container">
            <div class="row">

                <div class="project">
                    <div class="project-image">
                        <img src="images/projects/unidet3d.png" />
                    </div>
                    <!-- End .project-image -->
                    <div class="project-info">
                        <h3>UniDet3D: One Transformer for Unified Point Cloud Segmentation </h3>
                        <h4>2024 Conference on Computer Vision and Pattern Recognition (CVPR) </h4>
                        <p class="paper-authors">M. Kolodiazhnyi, <b>A. Vorontsova</b>, A. Konushin, D. Rukhovich </p>
                        <p class="paper-abstract">
                            UniDet3D is a 3D object detection model trained on a mixture of indoor
                            datasets. By unifying various label spaces, UniDet3D learns a strong
                            representation across multiple datasets through a supervised joint training scheme, thus
                            achieving generalization in various indoor environments.
                            It outperforms existing 3D object detection methods in 6 indoor benchmarks.
                        </p>
                        <div class="social">
                            <ul>
                                <li>
                                    <a href="https://arxiv.org/abs/2409.04234" target="_blank">Paper</a>
                                </li>
                                <li>
                                    <a href="https://github.com/filaPro/unidet3d" target="_blank"><i class="fa fa-github" aria-hidden="true"></i></a>
                                </li>
                            </ul>
                        </div>
                    </div>
                    <!-- End .project-info -->
                </div>
                <!-- End .project -->


                <div class="project">
                    <div class="project-image">
                        <img src="images/projects/oneformer3d.png" />
                    </div>
                    <!-- End .project-image -->
                    <div class="project-info">
                        <h3>OneFormer3D: One Transformer for Unified Point Cloud Segmentation </h3>
                        <h4>2024 Conference on Computer Vision and Pattern Recognition (CVPR) </h4>
                        <p class="paper-authors">M. Kolodiazhnyi, <b>A. Vorontsova</b>, A. Konushin, D. Rukhovich </p>
                        <p class="paper-abstract">
                            OneFormer3D is a unified, simple, and effective model jointly solving
                            semantic, instance, and panoptic segmentation of 3D point clouds. The model
                            is trained end-to-end in a single run with panoptic annotations, and achieves
                            top performance on all three tasks simultaneously, thereby setting a new state-of-the-art
                            in several 3D segmentation benchmarks.
                        </p>
                        <div class="social">
                            <ul>
                                <li>
                                    <a href="https://openaccess.thecvf.com/content/CVPR2024/html/Kolodiazhnyi_OneFormer3D_One_Transformer_for_Unified_Point_Cloud_Segmentation_CVPR_2024_paper.html" target="_blank">Paper</a>
                                </li>
                                <li>
                                    <a href="https://github.com/filaPro/oneformer3d" target="_blank"><i class="fa fa-github" aria-hidden="true"></i></a>
                                </li>
                            </ul>
                        </div>
                    </div>
                    <!-- End .project-info -->
                </div>
                <!-- End .project -->

                <div class="project">
                    <div class="project-image">
                        <img src="images/projects/tetris.png" />
                    </div>
                    <!-- End .project-image -->
                    <div class="project-info">
                        <h3>TETRIS: Towards Exploring the Robustness of Interactive Segmentation</h3>
                        <h4>2024 AAAI Conference on Artificial Intelligence (AAAI) </h4>
                        <p class="paper-authors">A. Moskalenko, V. Shakhuro, <b>A. Vorontsova</b>, A. Konushin, A. Antonov, A. Krapukhin, D. Shepelev, K. Soshin</p>
                        <p class="paper-abstract">
                            We conducted a user study of clicking patterns and found that
                            the standard assumption made in the common evaluation strategy may not hold, making
                            the accuracy and robustness of existing methods questionable. We propose a novel evaluation
                            strategy providing a more comprehensive analysis of a model’s performance. Besides, we
                            introduce a novel benchmark for measuring the robustness of interactive segmentation,
                            and report the results of an extensive evaluation of numerous models.
                        </p>
                        <div class="social">
                            <ul>
                                <li>
                                    <a href="https://arxiv.org/abs/2402.06132" target="_blank">Paper</a>
                                </li>
                            </ul>
                        </div>
                    </div>
                    <!-- End .project-info -->
                </div>
                <!-- End .project -->

                <div class="project">
                    <div class="project-image">
                        <img src="images/projects/super.png" />
                    </div>
                    <!-- End .project-image -->
                    <div class="project-info">
                        <h3>SUPER: Selfie Undistortion and Head Pose Editing with Identity Preservation</h3>
                        <h4>2024 International Conference on Image Processing (ICIP) </h4>
                        <p class="paper-authors">P. Karpikova, A. Spiridonov, <b>A. Vorontsova</b>, A. Yaschenko, E. Radionova, I. Medvedev, A. Limonov</p>
                        <p class="paper-abstract">
                            Selfies captured from a short distance might look unnatural due to heavy distortions
                            and improper posing. We propose SUPER, a novel method of correcting distortions and adjusting
                            head poses in selfies. SUPER combines generative and rendering approaches
                            to ensure correct geometry while preserving identity.
                        </p>
                        <div class="social">
                            <ul>
                                <li>
                                    <a href="https://arxiv.org/abs/2406.12700v1" target="_blank">Paper</a>
                                </li>
                            </ul>
                        </div>
                    </div>
                    <!-- End .project-info -->
                </div>
                <!-- End .project -->

                <div class="project">
                    <div class="project-image">
                        <img src="images/projects/fawn.png" />
                    </div>
                    <!-- End .project-image -->
                    <div class="project-info">
                        <h3>FAWN: Floor-And-Walls Normal Regularization for Direct Neural TSDF Reconstruction</h3>
                        <h4>2024 International Conference on Image Processing (ICIP) </h4>
                        <p class="paper-authors">A. Sokolova, <b>A. Vorontsova</b>, B. Gabdullin, A. Limonov </p>
                        <p class="paper-abstract">
                            FAWN is a modification of truncated signed distance function (TSDF)
                            reconstruction methods. FAWN takes the standard scene structure in account by detecting
                            walls and floor in a scene, and penalizing their normals for deviating from the horizontal
                            and vertical directions. We add FAWN to state-of-the-art TSDF reconstruction
                            methods and demonstrate a quality gain in a number of indoor benchmarks.
                        </p>
                        <div class="social">
                            <ul>
                                <li>
                                    <a href="https://arxiv.org/abs/2406.12054" target="_blank">Paper</a>
                                </li>
                            </ul>
                        </div>
                    </div>
                    <!-- End .project-info -->
                </div>
                <!-- End .project -->

                <div class="project">
                    <div class="project-image">
                        <img src="images/projects/medea.png" />
                    </div>
                    <!-- End .project-image -->
                    <div class="project-info">
                        <h3>MEDeA: Multi-View Efficient Depth Alignment</h3>
                        <h4>2024 International Conference on Image Processing (ICIP) </h4>
                        <p class="paper-authors">M. Artemyev, <b>A. Vorontsova</b>, A. Sokolova, A. Limonov </p>
                        <p class="paper-abstract">
                            Single-view depth estimation methods cannot guarantee consistency throughout a sequence of frames.
                            Minimizing discrepancy across multiple views takes hours, making these methods infeasible.
                            Our MeDEA takes RGB frames with camera parameters and outputs temporally-consistent depth maps
                            orders of magnitude faster then previous test-time optimization approaches.
                            MeDEA sets a new state-of-the-art in indoor benchmarks and handles smartphone-captured data.
                        </p>
                        <div class="social">
                            <ul>
                                <li>
                                    <a href="https://arxiv.org/abs/2406.12048" target="_blank">Paper</a>
                                </li>
                            </ul>
                        </div>
                    </div>
                    <!-- End .project-info -->
                </div>
                <!-- End .project -->


                 <div class="project">
                    <div class="project-image">
                        <img src="images/projects/td3d.png" />
                    </div>
                    <!-- End .project-image -->
                    <div class="project-info">
                        <h3>Top-Down Beats Bottom-Up in 3D Instance Segmentation</h3>
                        <h4>2024 Winter Conference on Applications of Computer Vision (WACV) </h4>
                        <p class="paper-authors">M. Kolodiazhnyi, D. Rukhovich, <b>A. Vorontsova</b>, A. Konushin </p>
                        <p class="paper-abstract">
                            Most 3D instance segmentation methods are bottom-up and typically include
                            resource-exhaustive post-processing. TD3D is a pioneering cluster-free,
                            fully-convolutional approach trained end-to-end.
                            This is the first top-down method outperforming bottom-up approaches in a 3D domain.
                            It demonstrates outstanding accuracy while being much up to 2.6x faster on inference
                            than the current state-of-the-art grouping-based approaches.
                        </p>
                        <div class="social">
                            <ul>
                                <li>
                                    <a href="https://openaccess.thecvf.com/content/WACV2024/html/Kolodiazhnyi_Top-Down_Beats_Bottom-Up_in_3D_Instance_Segmentation_WACV_2024_paper.html" target="_blank">Paper</a>
                                </li>
                                <li>
                                    <a href="https://github.com/SamsungLabs/td3d" target="_blank"><i class="fa fa-github" aria-hidden="true"></i></a>
                                </li>
                            </ul>
                        </div>
                    </div>
                    <!-- End .project-info -->
                </div>
                <!-- End .project -->

                <div class="project">
                    <div class="project-image">
                        <img src="images/projects/negil.png" />
                    </div>
                    <!-- End .project-image -->
                    <div class="project-info">
                        <h3>Neural Global Illumination for Inverse Rendering</h3>
                        <h4>2023 International Conference on Image Processing (ICIP) </h4>
                        <p class="paper-authors">N. Patakin, D. Senushkin, <b>A. Vorontsova</b>, A. Konushin </p>
                        <p class="paper-abstract">
                            NeGIL the first neural inverse rendering approach capable of processing
                            inter-reflections. We formulate a novel neural global illumination model, which
                            estimates both direct environment light and indirect light as a surface light field,
                            and build a Monte Carlo differentiable rendering framework. Our framework effectively
                            handles complex lighting effects and facilitates the end-to-end reconstruction of
                            physically-based spatially-varying materials.
                        </p>
                    </div>
                    <!-- End .project-info -->
                </div>
                <!-- End .project -->

                <div class="project">
                    <div class="project-image">
                        <img src="images/projects/tr3d.png" />
                    </div>
                    <!-- End .project-image -->
                    <div class="project-info">
                        <h3>TR3D: Towards Real-Time Indoor 3D Object Detection</h3>
                        <h4>2023 International Conference on Image Processing (ICIP) </h4>
                        <p class="paper-authors">D. Rukhovich, <b>A. Vorontsova</b>, A. Konushin </p>
                        <p class="paper-abstract">
                            TR3D is a fast fully-convolutional 3D object detection model trained
                            end-to-end, that achieves state-of-the-art results on the standard benchmarks.
                            Moreover, to take advantage of both point cloud and RGB inputs, we propose an
                            early fusion of 2D and 3D features. The versatile and efficient fusion module
                            can be applied to make a conventional 3D object detection method multimodal,
                            thereby improving its detection accuracy.
                        </p>
                        <div class="social">
                            <ul>
                                <li>
                                    <a href="https://arxiv.org/abs/2302.02858" target="_blank">Paper</a>
                                </li>
                                <li>
                                    <a href="https://github.com/SamsungLabs/tr3d" target="_blank"><i class="fa fa-github" aria-hidden="true"></i></a>
                                </li>
                            </ul>
                        </div>
                    </div>
                    <!-- End .project-info -->
                </div>
                <!-- End .project -->

                <div class="project">
                    <div class="project-image">
                        <img src="images/projects/contours.jpg" />
                    </div>
                    <!-- End .project-image -->
                    <div class="project-info">
                        <h3>Contour-based Interactive Segmentation</h3>
                        <h4>2023 International Joint Conference on Artificial Intelligence (IJCAI) </h4>
                        <p class="paper-authors">P. Popenova, D. Galeev, <b>A. Vorontsova</b>, A. Konushin </p>
                        <p class="paper-abstract">
                            Interactive segmentation can be used to speed up and simplify image editing and labeling.
                            Most approaches use clicks, which might be inconvenient when selecting small objects.
                            We present a first-in-class contour-based interactive segmentation approach and demonstrate
                            that a single contour provides the same accuracy as multiple clicks, thus reducing the
                            number of interactions.
                        </p>
                        <div class="social">
                            <ul>
                                <li>
                                    <a href="https://arxiv.org/abs/2302.06353" target="_blank">Paper</a>
                                </li>
                            </ul>
                        </div>
                    </div>
                    <!-- End .project-info -->
                </div>
                <!-- End .project -->


                <div class="project">
                    <div class="project-image">
                        <img src="images/projects/fcaf3d.png" />
                    </div>
                    <!-- End .project-image -->
                    <div class="project-info">
                        <h3>FCAF3D: Fully Convolutional Anchor-Free 3D Object Detection</h3>
                        <h4>2022 European Conference on Computer Vision (ECCV) </h4>
                        <p class="paper-authors">D. Rukhovich, <b>A. Vorontsova</b>, A. Konushin </p>
                        <p class="paper-abstract">
                            FCAF3D is a first-in-class fully convolutional anchor-free indoor 3D object detection method.
                            FCAF3D can handle large-scale scenes with minimal runtime through a single feed-forward pass.
                            Moreover, we propose a novel parametrization of oriented bounding boxes that consistently
                            improves detection accuracy. State-of-the-art on ScanNet, SUN RGB-D, and S3DIS datasets.
                        </p>
                        <div class="social">
                            <ul>
                                <li>
                                    <a href="https://github.com/anonymous-fcaf3d/anonymous-fcaf3d" target="_blank"><i class="fa fa-github" aria-hidden="true"></i></a>
                                </li>
                                <li>
                                    <a href="https://www.ecva.net/papers/eccv_2022/papers_ECCV/html/6356_ECCV_2022_paper.php" target="_blank">Paper</a>
                                </li>
                            </ul>
                        </div>
                    </div>
                    <!-- End .project-info -->
                </div>
                <!-- End .project -->

                <div class="project">
                    <div class="project-image">
                        <img src="images/projects/floorplan.png" />
                    </div>
                    <!-- End .project-image -->
                    <div class="project-info">

                        <h3>Floorplan-Aware Camera Poses Refinement</h3>
                        <h4>2022 International Conference on Intelligent Robots and Systems (IROS)</h4>
                        <p class="paper-authors">A. Sokolova, F. Nikitin, <b>A. Vorontsova</b>, A. Konushin </p>
                        <p class="paper-abstract">
                            A technical floorplan depicts walls, partitions, and doors, being a valuable source
                            of information about the general scene structure. We propose a novel floorplan-aware
                            3D reconstruction algorithm that extends bundle adjustment, and show that using a
                            floorplan improves 3D reconstruction quality on the Redwood dataset and our self-captured
                            data.
                        </p>
                        <div class="social">
                            <ul>
                                <li>
                                    <a href="https://arxiv.org/abs/2210.04572" target="_blank">Paper</a>
                                </li>
                            </ul>
                        </div>
                    </div>
                    <!-- End .project-info -->
                </div>
                <!-- End .project -->


                <div class="project">
                    <div class="project-image">
                        <img src="images/projects/imvoxelnet.png" />
                    </div>
                    <!-- End .project-image -->
                    <div class="project-info">

                        <h3>ImVoxelNet: Image to Voxels Projection for Monocular and Multi-view General-purpose 3D Object Detection</h3>
                        <h4>2022 Winter Conference on Applications of Computer Vision (WACV)</h4>
                        <p class="paper-authors">D. Rukhovich, <b>A. Vorontsova</b>, A. Konushin </p>
                        <p class="paper-abstract">
                            ImVoxelNet is a fully convolutional 3D object detection method that operates in
                            monocular and multi-view modes. ImVoxelNet takes an arbitrary number of RGB
                            images with camera poses as inputs. General-purpose: state-of-the-art
                            on outdoor (KITTI and nuScenes) and indoor (SUN RGB-D and ScanNet) datasets.
                        </p>
                        <div class="social">
                            <ul>
                                <li>
                                    <a href="https://openaccess.thecvf.com/content/WACV2022/html/Rukhovich_ImVoxelNet_Image_to_Voxels_Projection_for_Monocular_and_Multi-View_General-Purpose_WACV_2022_paper" target="_blank">Paper</a>
                                </li>
                            </ul>
                        </div>
                    </div>
                    <!-- End .project-info -->
                </div>
                <!-- End .project -->

                <div class="project">
                    <div class="project-image">
                        <img src="images/projects/gp2.png" />
                    </div>
                    <!-- End .project-image -->
                    <div class="project-info">

                        <h3>Single-Stage 3D Geometry-Preserving Depth Estimation Model Training on Dataset Mixtures with Uncalibrated Stereo Data</h3>
                        <h4>2022 Conference on Computer Vision and Pattern Recognition (CVPR)</h4>
                        <p class="paper-authors">N. Patakin, <b>A. Vorontsova</b>, M. Artemyev, A. Konushin </p>
                        <p class="paper-abstract">
                            GP2 is a General-Purpose and Geometry-Preserving scheme of training single-view
                            depth estimation models. GP2 allows training on a mixture of a small part of
                            geometrically correct depth data and voluminous stereo data. State-of-the-art
                            results in the general-purpose geometry-preserving single-view depth estimation.

                        </p>
                        <div class="social">
                            <ul>
                                <li>
                                    <a href="https://openaccess.thecvf.com/content/CVPR2022/html/Patakin_Single-Stage_3D_Geometry-Preserving_Depth_Estimation_Model_Training_on_Dataset_Mixtures_CVPR_2022_paper" target="_blank">Paper</a>
                                </li>
                            </ul>
                        </div>
                    </div>
                <!-- End .project-info -->
                </div>
                <!-- End .project -->

                <div class="project">
                    <div class="project-image">
                        <img src="images/projects/discoman.png" />
                    </div>
                    <!-- End .project-image -->
                    <div class="project-info">

                        <h3>DISCOMAN: Dataset of Indoor Scenes for Odometry, Mapping and Navigation</h3>
                        <h4>2019 International Conference on Intelligent Robots and Systems (IROS)</h4>
                        <p class="paper-authors">P. Kirsanov, A. Gaskarov, F. Konokhov, K. Sofiiuk, <b>A. Vorontsova</b>, I. Slinko, D. Zhukov, S. Bykov, O. Barinova, A. Konushin </p>
                        <p class="paper-abstract">
                            A synthetic dataset for training and benchmarking semantic SLAM. Contains 200
                            sequences of 3000-5000 frames (RGB images generated using physically-based
                            rendering, depth, IMU) and ground truth occupancy grids. In addition, we establish
                             baseline results for SLAM, mapping, semantic and panoptic segmentation on our dataset.
                        </p>
                        <div class="social">
                            <ul>
                                <li>
                                    <a href="https://arxiv.org/abs/1909.12146" target="_blank">Paper</a>
                                </li>
                            </ul>
                        </div>
                    </div>
                <!-- End .project-info -->
                </div>
                <!-- End .project -->

                <div class="project">
                    <div class="project-image">
                        <img src="images/projects/robustness.png" />
                    </div>
                    <!-- End .project-image -->
                    <div class="project-info">

                        <h3>Measuring Robustness of Visual SLAM</h3>
                        <h4>2019 International Conference on Machine Vision Applications (MVA)</h4>
                        <p class="paper-authors">D. Prokhorov, D. Zhukov, O. Barinova, <b>A. Vorontsova</b>, A. Konushin </p>
                        <p class="paper-abstract">
                            A feasibility study of RGB-D SLAM. We extensively evaluate the popular ORBSLAM2
                            on several benchmarks, perform statistical analysis of the results, and find
                            correlations between the metric values and the attributes of the trajectories.
                            While the accuracy is high, robustness is still an issue.
                        </p>
                        <div class="social">
                            <ul>
                                <li>
                                    <a href="https://arxiv.org/abs/1910.04755" target="_blank">Paper</a>
                                </li>
                            </ul>
                        </div>
                    </div>
                <!-- End .project-info -->
                </div>
                <!-- End .project -->

                <div class="project">
                    <div class="project-image">
                        <img src="images/projects/motionmaps.png" />
                    </div>
                    <!-- End .project-image -->
                    <div class="project-info">

                        <h3>Scene Motion Decomposition for Learnable Visual Odometry</h3>
                        <h4>SEMNAV 2019 : CVPR'19 Workshop on Deep Learning for Visual Navigation</h4>
                        <p class="paper-authors">I. Slinko, <b>A. Vorontsova</b>, F. Konokhov, O. Barinova, A. Konushin </p>
                        <p class="paper-abstract">
                            Instead of ego-motion estimation, we address a dual problem of estimating the
                            motion of a scene w.r.t a static camera. Using optical flow and depth, we
                            calculate the motion of each point of a scene in terms of 6DoF and create motion
                             maps, each one addressing a single degree of freedom. Such a decomposition
                             improves accuracy over naive stacking of depth and optical flow.

                        </p>
                        <div class="social">
                            <ul>
                                <li>
                                    <a href="https://arxiv.org/abs/1907.07227" target="_blank">Paper</a>
                                </li>
                            </ul>
                        </div>
                    </div>
                <!-- End .project-info -->
                </div>
                <!-- End .project -->
            </div>
        </div>
    </div>
    <!-- End #projects -->

    <div id="skills">
        <h2 class="heading">Skills</h2>
        <ul>
            <li>Python</li>
            <li>PyTorch</li>
            <li>Tensorflow</li>
            <li>OpenCV</li>
            <li>Open3D</li>
            <li>blenderproc</li>
            <li>scikit-learn</li>
            <li>ultralytics</li>
            <li>NumPy</li>
            <li>SciPy</li>
            <li>Pandas</li>
            <li>Docker</li>
            <li>Git</li>

        </ul>
    </div>
    <!-- End #skills -->

    <footer>
        <div class="container">
            <div class="row">
                <div class="col-sm-5 copyright">
                    <p>
                        Copyright &copy; <span id="current-year">2025</span> Anna Vorontsova
                    </p>
                </div>
                <div class="col-sm-2 top">
                    <span id="to-top">
                        <i class="fa fa-chevron-up" aria-hidden="true"></i>
                    </span>
                </div>
                <div class="col-sm-5 social">
                    <ul>
                        <!-- <li>
                            <a href="https://github.com/highrut" target="_blank"><i class="fa fa-github" aria-hidden="true"></i></a>
                        </li> -->
                        <li>
                            <a href="https://www.linkedin.com/in/anna-vorontsova-893411114/" target="_blank"><i class="fa fa-linkedin" aria-hidden="true"></i></a>
                        </li>
                        <!-- <li>
                            <a href="https://www.facebook.com/" target="_blank"><i class="fa fa-facebook" aria-hidden="true"></i></a>
                        </li> -->
                        <li>
                            <a href="https://scholar.google.com/citations?user=HiVoQCIAAAAJ&hl=ru" target="_blank"><img src="images/google-scholar.png" class="fa" aria-hidden="true"></a>
                        </li>
                    </ul>
                </div>
            </div>
        </div>
    </footer>
    <!-- End footer -->

    <script src="https://ajax.googleapis.com/ajax/libs/jquery/1.12.4/jquery.min.js"></script>
    <script src="js/scripts.min.js"></script>
</body>

</html>