% $ biblatex auxiliary file $ % $ biblatex bbl format version 3.1 $ % Do not modify the above lines! % % This is an auxiliary file used by the 'biblatex' package. % This file may safely be deleted. It will be recreated by % biber as required. % \begingroup \makeatletter \@ifundefined{ver@biblatex.sty} {\@latex@error {Missing 'biblatex' package} {The bibliography requires the 'biblatex' package.} \aftergroup\endinput} {} \endgroup \refsection{0} \datalist[entry]{nty/global//global/global} \entry{Aleotti.2020}{misc}{} \name{author}{6}{}{% {{hash=0e141e967911dc1e9e3266ec98e4fc1d}{% family={Aleotti}, familyi={A\bibinitperiod}, given={Filippo}, giveni={F\bibinitperiod}}}% {{hash=95a910f0b0e3c4905262ac1ab4cd3736}{% family={Zaccaroni}, familyi={Z\bibinitperiod}, given={Giulio}, giveni={G\bibinitperiod}}}% {{hash=6d32bbe0bffb37cc564df8adb6c87702}{% family={Bartolomei}, familyi={B\bibinitperiod}, given={Luca}, giveni={L\bibinitperiod}}}% {{hash=343dafb5548d904115e5782508eb8375}{% family={Poggi}, familyi={P\bibinitperiod}, given={Matteo}, giveni={M\bibinitperiod}}}% {{hash=8d31b2206bee079ebdf6ff494cb9c6ee}{% family={Tosi}, familyi={T\bibinitperiod}, given={Fabio}, giveni={F\bibinitperiod}}}% {{hash=97fde7508220953d74da9f2a50a93a4e}{% family={Mattoccia}, familyi={M\bibinitperiod}, given={Stefano}, giveni={S\bibinitperiod}}}% } \strng{namehash}{7939deb38e016c99c47c9e866b3ade34} \strng{fullhash}{8314dffcc5cb6330e824ffeb8f7b00f2} \strng{bibnamehash}{7939deb38e016c99c47c9e866b3ade34} \strng{authorbibnamehash}{7939deb38e016c99c47c9e866b3ade34} \strng{authornamehash}{7939deb38e016c99c47c9e866b3ade34} \strng{authorfullhash}{8314dffcc5cb6330e824ffeb8f7b00f2} \field{sortinit}{A} \field{sortinithash}{2f401846e2029bad6b3ecc16d50031e2} \field{labelnamesource}{author} \field{labeltitlesource}{title} \field{abstract}{Depth perception is paramount to tackle real-world problems, ranging from autonomous driving to consumer applications. For the latter, depth estimation from a single image represents the most versatile solution, since a standard camera is available on almost any handheld device. Nonetheless, two main issues limit its practical deployment: i) the low reliability when deployed in-the-wild and ii) the demanding resource requirements to achieve real-time performance, often not compatible with such devices. Therefore, in this paper, we deeply investigate these issues showing how they are both addressable adopting appropriate network design and training strategies -- also outlining how to map the resulting networks on handheld devices to achieve real-time performance. Our thorough evaluation highlights the ability of such fast networks to generalize well to new environments, a crucial feature required to tackle the extremely varied contexts faced in real applications. Indeed, to further support this evidence, we report experimental results concerning real-time depth-aware augmented reality and image blurring with smartphones in-the-wild.} \field{eprinttype}{arXiv} \field{title}{Real-time single image depth perception in the wild with handheld devices} \field{year}{2020} \verb{eprint} \verb arXiv:2006.05724v1 \endverb \verb{file} \verb Real-time single image depth perception in the wild with:Attachments/Real-time single image depth perception in the wild with.pdf:application/pdf \endverb \verb{urlraw} \verb https://arxiv.org/pdf/2006.05724 \endverb \verb{url} \verb https://arxiv.org/pdf/2006.05724 \endverb \keyw{Computer Vision and Pattern Recognition (cs.CV);Graphics (cs.GR)} \endentry \entry{Alvarez.2016}{inbook}{} \name{author}{4}{}{% {{hash=087415c33d15dad522378a03ed5ff0b5}{% family={Alvarez}, familyi={A\bibinitperiod}, given={H.}, giveni={H\bibinitperiod}}}% {{hash=0dd1001c99b3c779d7ccd99a4efd66b3}{% family={Paz}, familyi={P\bibinitperiod}, given={L.M.}, giveni={L\bibinitperiod}}}% {{hash=9e291584a3b29b27c88ecfe0a566274c}{% family={Sturm}, familyi={S\bibinitperiod}, given={Jürgen}, giveni={J\bibinitperiod}}}% {{hash=b4995f3c4c5030db3a9e2fe4f3d61481}{% family={Cremers}, familyi={C\bibinitperiod}, given={D.}, giveni={D\bibinitperiod}}}% } \strng{namehash}{d907d14b508c6dbd77cc25079a69a171} \strng{fullhash}{fc04147d83c649a9044a6fb80b221b9b} \strng{bibnamehash}{d907d14b508c6dbd77cc25079a69a171} \strng{authorbibnamehash}{d907d14b508c6dbd77cc25079a69a171} \strng{authornamehash}{d907d14b508c6dbd77cc25079a69a171} \strng{authorfullhash}{fc04147d83c649a9044a6fb80b221b9b} \field{sortinit}{A} \field{sortinithash}{2f401846e2029bad6b3ecc16d50031e2} \field{labelnamesource}{author} \field{labeltitlesource}{title} \field{isbn}{978-3-319-23777-0} \field{month}{11} \field{title}{Collision Avoidance for Quadrotors with a Monocular Camera} \field{volume}{109} \field{year}{2016} \field{pages}{195\bibrangedash 209} \range{pages}{15} \verb{doi} \verb 10.1007/978-3-319-23778-7_14 \endverb \endentry \entry{Arulprakash.2021}{article}{} \name{author}{2}{}{% {{hash=4bbb0814acd56658521599c3a9cb4636}{% family={Arulprakash}, familyi={A\bibinitperiod}, given={Enoch}, giveni={E\bibinitperiod}}}% {{hash=af8274fdf1755aa0001aecf866957874}{% family={Aruldoss}, familyi={A\bibinitperiod}, given={Martin}, giveni={M\bibinitperiod}}}% } \strng{namehash}{595d5f35a0a949d2089c6f0c1195d885} \strng{fullhash}{595d5f35a0a949d2089c6f0c1195d885} \strng{bibnamehash}{595d5f35a0a949d2089c6f0c1195d885} \strng{authorbibnamehash}{595d5f35a0a949d2089c6f0c1195d885} \strng{authornamehash}{595d5f35a0a949d2089c6f0c1195d885} \strng{authorfullhash}{595d5f35a0a949d2089c6f0c1195d885} \field{sortinit}{A} \field{sortinithash}{2f401846e2029bad6b3ecc16d50031e2} \field{labelnamesource}{author} \field{labeltitlesource}{title} \field{issn}{13191578} \field{journaltitle}{Journal of King Saud University - Computer and Information Sciences} \field{note}{PII: S1319157821002020} \field{title}{A study on generic object detection with emphasis on future research directions} \field{year}{2021} \verb{doi} \verb 10.1016/j.jksuci.2021.08.001 \endverb \endentry \entry{EncoderDecoder7803544}{article}{} \name{author}{3}{}{% {{hash=2b517df4f81c42dc3c5b32aede012cf9}{% family={Badrinarayanan}, familyi={B\bibinitperiod}, given={Vijay}, giveni={V\bibinitperiod}}}% {{hash=4b1053f4ffb69cd61c1f122cc435c884}{% family={Kendall}, familyi={K\bibinitperiod}, given={Alex}, giveni={A\bibinitperiod}}}% {{hash=95c5bd0a1d6e6649ed56043a63268bfa}{% family={Cipolla}, familyi={C\bibinitperiod}, given={Roberto}, giveni={R\bibinitperiod}}}% } \strng{namehash}{fb26318591964ff2e23ee05578819393} \strng{fullhash}{fb26318591964ff2e23ee05578819393} \strng{bibnamehash}{fb26318591964ff2e23ee05578819393} \strng{authorbibnamehash}{fb26318591964ff2e23ee05578819393} \strng{authornamehash}{fb26318591964ff2e23ee05578819393} \strng{authorfullhash}{fb26318591964ff2e23ee05578819393} \field{sortinit}{B} \field{sortinithash}{d7095fff47cda75ca2589920aae98399} \field{labelnamesource}{author} \field{labeltitlesource}{title} \field{journaltitle}{IEEE Transactions on Pattern Analysis and Machine Intelligence} \field{number}{12} \field{title}{SegNet: A Deep Convolutional Encoder-Decoder Architecture for Image Segmentation} \field{volume}{39} \field{year}{2017} \field{pages}{2481\bibrangedash 2495} \range{pages}{15} \verb{doi} \verb 10.1109/TPAMI.2016.2644615 \endverb \endentry \entry{Chakravarty.2017}{inproceedings}{} \name{author}{6}{}{% {{hash=b700b361eed5e85d647c6bbfef3eacd6}{% family={Chakravarty}, familyi={C\bibinitperiod}, given={Punarjay}, giveni={P\bibinitperiod}}}% {{hash=abedc60b3acdfd251c34cf76e0aca39f}{% family={Kelchtermans}, familyi={K\bibinitperiod}, given={Klaas}, giveni={K\bibinitperiod}}}% {{hash=6e8bb49e98d2fd65c0c240ceb60b922f}{% family={Roussel}, familyi={R\bibinitperiod}, given={Tom}, giveni={T\bibinitperiod}}}% {{hash=f8bb13bef7b50a92cfa855b9c4c012c6}{% family={Wellens}, familyi={W\bibinitperiod}, given={Stijn}, giveni={S\bibinitperiod}}}% {{hash=1fffde1e018446dc52c3da9c6e26ec11}{% family={Tuytelaars}, familyi={T\bibinitperiod}, given={Tinne}, giveni={T\bibinitperiod}}}% {{hash=5868f82298c698db6e7249c1b16b1d6c}{% family={Van\bibnamedelima Eycken}, familyi={V\bibinitperiod\bibinitdelim E\bibinitperiod}, given={Luc}, giveni={L\bibinitperiod}}}% } \strng{namehash}{71b3164dff5e2a7cf11049dd7e281ed1} \strng{fullhash}{f53b98e85eb07c9c6b8d6bb6edbc0493} \strng{bibnamehash}{71b3164dff5e2a7cf11049dd7e281ed1} \strng{authorbibnamehash}{71b3164dff5e2a7cf11049dd7e281ed1} \strng{authornamehash}{71b3164dff5e2a7cf11049dd7e281ed1} \strng{authorfullhash}{f53b98e85eb07c9c6b8d6bb6edbc0493} \field{sortinit}{C} \field{sortinithash}{4d103a86280481745c9c897c925753c0} \field{labelnamesource}{author} \field{labeltitlesource}{title} \field{booktitle}{2017 IEEE International Conference on Robotics and Automation (ICRA)} \field{title}{CNN-based single image obstacle avoidance on a quadrotor} \field{year}{2017} \field{pages}{6369\bibrangedash 6374} \range{pages}{6} \verb{doi} \verb 10.1109/ICRA.2017.7989752 \endverb \endentry \entry{cocoEval.8252021}{online}{} \field{sortinit}{C} \field{sortinithash}{4d103a86280481745c9c897c925753c0} \field{labeltitlesource}{title} \field{title}{COCO - Common Objects in Context} \field{urlday}{14} \field{urlmonth}{11} \field{urlyear}{2021} \field{year}{2021} \field{urldateera}{ce} \verb{urlraw} \verb https://cocodataset.org/#detection-eval \endverb \verb{url} \verb https://cocodataset.org/#detection-eval \endverb \endentry \entry{coco.8252021}{online}{} \field{sortinit}{C} \field{sortinithash}{4d103a86280481745c9c897c925753c0} \field{labeltitlesource}{title} \field{title}{COCO - Common Objects in Context} \field{urlday}{14} \field{urlmonth}{11} \field{urlyear}{2021} \field{year}{2021} \field{urldateera}{ce} \verb{urlraw} \verb https://cocodataset.org \endverb \verb{url} \verb https://cocodataset.org \endverb \endentry \entry{EveringhamDevelopmentKit.2012}{misc}{} \name{author}{2}{}{% {{hash=655191b4f54a942147b118da2f50aeed}{% family={Everingham}, familyi={E\bibinitperiod}, given={Mark}, giveni={M\bibinitperiod}}}% {{hash=19dcf449a33f27289a006b0acf819975}{% family={Winn}, familyi={W\bibinitperiod}, given={John}, giveni={J\bibinitperiod}}}% } \strng{namehash}{71f075e5e08114d81693e397b204b1d8} \strng{fullhash}{71f075e5e08114d81693e397b204b1d8} \strng{bibnamehash}{71f075e5e08114d81693e397b204b1d8} \strng{authorbibnamehash}{71f075e5e08114d81693e397b204b1d8} \strng{authornamehash}{71f075e5e08114d81693e397b204b1d8} \strng{authorfullhash}{71f075e5e08114d81693e397b204b1d8} \field{sortinit}{E} \field{sortinithash}{8da8a182d344d5b9047633dfc0cc9131} \field{labelnamesource}{author} \field{labeltitlesource}{title} \field{title}{The PASCAL Visual Object Classes Challenge 2012 (VOC2012) Development Kit} \field{urlday}{24} \field{urlmonth}{11} \field{urlyear}{2021} \field{year}{2012} \field{urldateera}{ce} \verb{urlraw} \verb http://host.robots.ox.ac.uk/pascal/VOC/voc2012/index.html \endverb \verb{url} \verb http://host.robots.ox.ac.uk/pascal/VOC/voc2012/index.html \endverb \endentry \entry{Everingham.2010}{article}{} \name{author}{5}{}{% {{hash=655191b4f54a942147b118da2f50aeed}{% family={Everingham}, familyi={E\bibinitperiod}, given={Mark}, giveni={M\bibinitperiod}}}% {{hash=b296966d66d81277e4274fd491fdbf12}{% family={{van Gool}}, familyi={v\bibinitperiod}, given={Luc}, giveni={L\bibinitperiod}}}% {{hash=d34fdfcbfcb5412397b946351370db22}{% family={Williams}, familyi={W\bibinitperiod}, given={Christopher\bibnamedelimb K.\bibnamedelimi I.}, giveni={C\bibinitperiod\bibinitdelim K\bibinitperiod\bibinitdelim I\bibinitperiod}}}% {{hash=19dcf449a33f27289a006b0acf819975}{% family={Winn}, familyi={W\bibinitperiod}, given={John}, giveni={J\bibinitperiod}}}% {{hash=c72fc39e94030f67717052309266a44d}{% family={Zisserman}, familyi={Z\bibinitperiod}, given={Andrew}, giveni={A\bibinitperiod}}}% } \strng{namehash}{235de32ba219ebb749c8820ac16ff118} \strng{fullhash}{b62ea95ea4788089f092b405af5dc7a4} \strng{bibnamehash}{235de32ba219ebb749c8820ac16ff118} \strng{authorbibnamehash}{235de32ba219ebb749c8820ac16ff118} \strng{authornamehash}{235de32ba219ebb749c8820ac16ff118} \strng{authorfullhash}{b62ea95ea4788089f092b405af5dc7a4} \field{extraname}{1} \field{sortinit}{E} \field{sortinithash}{8da8a182d344d5b9047633dfc0cc9131} \field{labelnamesource}{author} \field{labeltitlesource}{title} \field{issn}{0920-5691} \field{journaltitle}{International Journal of Computer Vision} \field{note}{PII: 275} \field{number}{2} \field{pagination}{page} \field{shortjournal}{Int J Comput Vis} \field{title}{The Pascal Visual Object Classes (VOC) Challenge} \field{volume}{88} \field{year}{2010} \field{pages}{303\bibrangedash 338} \range{pages}{36} \verb{doi} \verb 10.1007/s11263-009-0275-4 \endverb \endentry \entry{Everingham.2015}{article}{} \name{author}{6}{}{% {{hash=655191b4f54a942147b118da2f50aeed}{% family={Everingham}, familyi={E\bibinitperiod}, given={Mark}, giveni={M\bibinitperiod}}}% {{hash=57a042c09397c84eaa88cad02f1a113d}{% family={Eslami}, familyi={E\bibinitperiod}, given={S.\bibnamedelimi M.\bibnamedelimi Ali}, giveni={S\bibinitperiod\bibinitdelim M\bibinitperiod\bibinitdelim A\bibinitperiod}}}% {{hash=b296966d66d81277e4274fd491fdbf12}{% family={{van Gool}}, familyi={v\bibinitperiod}, given={Luc}, giveni={L\bibinitperiod}}}% {{hash=d34fdfcbfcb5412397b946351370db22}{% family={Williams}, familyi={W\bibinitperiod}, given={Christopher\bibnamedelimb K.\bibnamedelimi I.}, giveni={C\bibinitperiod\bibinitdelim K\bibinitperiod\bibinitdelim I\bibinitperiod}}}% {{hash=19dcf449a33f27289a006b0acf819975}{% family={Winn}, familyi={W\bibinitperiod}, given={John}, giveni={J\bibinitperiod}}}% {{hash=c72fc39e94030f67717052309266a44d}{% family={Zisserman}, familyi={Z\bibinitperiod}, given={Andrew}, giveni={A\bibinitperiod}}}% } \strng{namehash}{235de32ba219ebb749c8820ac16ff118} \strng{fullhash}{17d18af6db3a6b5860166567deed1fee} \strng{bibnamehash}{235de32ba219ebb749c8820ac16ff118} \strng{authorbibnamehash}{235de32ba219ebb749c8820ac16ff118} \strng{authornamehash}{235de32ba219ebb749c8820ac16ff118} \strng{authorfullhash}{17d18af6db3a6b5860166567deed1fee} \field{extraname}{2} \field{sortinit}{E} \field{sortinithash}{8da8a182d344d5b9047633dfc0cc9131} \field{labelnamesource}{author} \field{labeltitlesource}{title} \field{issn}{0920-5691} \field{journaltitle}{International Journal of Computer Vision} \field{note}{PII: 733} \field{number}{1} \field{pagination}{page} \field{shortjournal}{Int J Comput Vis} \field{title}{The Pascal Visual Object Classes Challenge: A Retrospective} \field{volume}{111} \field{year}{2015} \field{pages}{98\bibrangedash 136} \range{pages}{39} \verb{doi} \verb 10.1007/s11263-014-0733-5 \endverb \endentry \entry{FlatBuffer.852021}{online}{} \field{sortinit}{F} \field{sortinithash}{2638baaa20439f1b5a8f80c6c08a13b4} \field{labeltitlesource}{title} \field{title}{FlatBuffers: FlatBuffers white paper} \field{urlday}{15} \field{urlmonth}{11} \field{urlyear}{2021} \field{year}{2021} \field{urldateera}{ce} \verb{urlraw} \verb https://google.github.io/flatbuffers/flatbuffers_white_paper.html \endverb \verb{url} \verb https://google.github.io/flatbuffers/flatbuffers_white_paper.html \endverb \endentry \entry{Girshick.2013}{misc}{} \name{author}{4}{}{% {{hash=bd5dadbe57bedc5957c19a3154c4d424}{% family={Girshick}, familyi={G\bibinitperiod}, given={Ross}, giveni={R\bibinitperiod}}}% {{hash=913090a0d65c7a300c602f5a5a5a61e3}{% family={Donahue}, familyi={D\bibinitperiod}, given={Jeff}, giveni={J\bibinitperiod}}}% {{hash=90180e1a30742e0d15328bfe637c2ef4}{% family={Darrell}, familyi={D\bibinitperiod}, given={Trevor}, giveni={T\bibinitperiod}}}% {{hash=c75a5377b6dc5213831576e88ffe553c}{% family={Malik}, familyi={M\bibinitperiod}, given={Jitendra}, giveni={J\bibinitperiod}}}% } \strng{namehash}{5aa9f085e8eccf547d1eb54a05cb1beb} \strng{fullhash}{9147daa2f6a058d5b8880ff5cf38abd6} \strng{bibnamehash}{5aa9f085e8eccf547d1eb54a05cb1beb} \strng{authorbibnamehash}{5aa9f085e8eccf547d1eb54a05cb1beb} \strng{authornamehash}{5aa9f085e8eccf547d1eb54a05cb1beb} \strng{authorfullhash}{9147daa2f6a058d5b8880ff5cf38abd6} \field{sortinit}{G} \field{sortinithash}{32d67eca0634bf53703493fb1090a2e8} \field{labelnamesource}{author} \field{labeltitlesource}{title} \field{abstract}{Object detection performance, as measured on the canonical PASCAL VOC dataset, has plateaued in the last few years. The best-performing methods are complex ensemble systems that typically combine multiple low-level image features with high-level context. In this paper, we propose a simple and scalable detection algorithm that improves mean average precision (mAP) by more than 30{\%} relative to the previous best result on VOC 2012---achieving a mAP of 53.3{\%}. Our approach combines two key insights: (1) one can apply high-capacity convolutional neural networks (CNNs) to bottom-up region proposals in order to localize and segment objects and (2) when labeled training data is scarce, supervised pre-training for an auxiliary task, followed by domain-specific fine-tuning, yields a significant performance boost. Since we combine region proposals with CNNs, we call our method R-CNN: Regions with CNN features. We also compare R-CNN to OverFeat, a recently proposed sliding-window detector based on a similar CNN architecture. We find that R-CNN outperforms OverFeat by a large margin on the 200-class ILSVRC2013 detection dataset. Source code for the complete system is available at this http URL.} \field{eprinttype}{arXiv} \field{title}{Rich feature hierarchies for accurate object detection and semantic segmentation} \field{year}{2013} \verb{eprint} \verb arXiv:1311.2524v5 \endverb \verb{file} \verb Rich feature hierarchies for accurate object detection a:Attachments/Rich feature hierarchies for accurate object detection a.pdf:application/pdf \endverb \verb{urlraw} \verb https://arxiv.org/pdf/1311.2524 \endverb \verb{url} \verb https://arxiv.org/pdf/1311.2524 \endverb \keyw{Computer Vision and Pattern Recognition (cs.CV)} \endentry \entry{GitHubTensorFlowAndroidExample.11222021}{online}{} \name{author}{1}{}{% {{hash=d3b7c913cd04ebfec0e9ec32cb6fd58c}{% family={GitHub}, familyi={G\bibinitperiod}}}% } \strng{namehash}{d3b7c913cd04ebfec0e9ec32cb6fd58c} \strng{fullhash}{d3b7c913cd04ebfec0e9ec32cb6fd58c} \strng{bibnamehash}{d3b7c913cd04ebfec0e9ec32cb6fd58c} \strng{authorbibnamehash}{d3b7c913cd04ebfec0e9ec32cb6fd58c} \strng{authornamehash}{d3b7c913cd04ebfec0e9ec32cb6fd58c} \strng{authorfullhash}{d3b7c913cd04ebfec0e9ec32cb6fd58c} \field{extraname}{1} \field{sortinit}{G} \field{sortinithash}{32d67eca0634bf53703493fb1090a2e8} \field{labelnamesource}{author} \field{labeltitlesource}{title} \field{abstract}{TensorFlow examples. Contribute to tensorflow/examples development by creating an account on GitHub.} \field{title}{examples/lite/examples/image{\_}classification/android at master · tensorflow/examples} \field{urlday}{22} \field{urlmonth}{11} \field{urlyear}{2021} \field{year}{2021} \field{urldateera}{ce} \verb{urlraw} \verb https://github.com/tensorflow/examples/tree/master/lite/examples/object_detection/android \endverb \verb{url} \verb https://github.com/tensorflow/examples/tree/master/lite/examples/object_detection/android \endverb \endentry \entry{GitHub.1062021c}{online}{} \name{author}{1}{}{% {{hash=d3b7c913cd04ebfec0e9ec32cb6fd58c}{% family={GitHub}, familyi={G\bibinitperiod}}}% } \strng{namehash}{d3b7c913cd04ebfec0e9ec32cb6fd58c} \strng{fullhash}{d3b7c913cd04ebfec0e9ec32cb6fd58c} \strng{bibnamehash}{d3b7c913cd04ebfec0e9ec32cb6fd58c} \strng{authorbibnamehash}{d3b7c913cd04ebfec0e9ec32cb6fd58c} \strng{authornamehash}{d3b7c913cd04ebfec0e9ec32cb6fd58c} \strng{authorfullhash}{d3b7c913cd04ebfec0e9ec32cb6fd58c} \field{extraname}{2} \field{sortinit}{G} \field{sortinithash}{32d67eca0634bf53703493fb1090a2e8} \field{labelnamesource}{author} \field{labeltitlesource}{title} \field{abstract}{Pydnet on mobile devices. Contribute to FilippoAleotti/mobilePydnet development by creating an account on GitHub.} \field{title}{GitHub - FilippoAleotti/mobilePydnet: Pydnet on mobile devices} \field{urlday}{6} \field{urlmonth}{10} \field{urlyear}{2021} \field{year}{2021} \field{urldateera}{ce} \verb{urlraw} \verb https://github.com/FilippoAleotti/mobilePydnet \endverb \verb{url} \verb https://github.com/FilippoAleotti/mobilePydnet \endverb \endentry \entry{GitHub.1282021}{online}{} \name{author}{1}{}{% {{hash=d3b7c913cd04ebfec0e9ec32cb6fd58c}{% family={GitHub}, familyi={G\bibinitperiod}}}% } \strng{namehash}{d3b7c913cd04ebfec0e9ec32cb6fd58c} \strng{fullhash}{d3b7c913cd04ebfec0e9ec32cb6fd58c} \strng{bibnamehash}{d3b7c913cd04ebfec0e9ec32cb6fd58c} \strng{authorbibnamehash}{d3b7c913cd04ebfec0e9ec32cb6fd58c} \strng{authornamehash}{d3b7c913cd04ebfec0e9ec32cb6fd58c} \strng{authorfullhash}{d3b7c913cd04ebfec0e9ec32cb6fd58c} \field{extraname}{3} \field{sortinit}{G} \field{sortinithash}{32d67eca0634bf53703493fb1090a2e8} \field{labelnamesource}{author} \field{labeltitlesource}{title} \field{abstract}{Display and control your Android device. Contribute to Genymobile/scrcpy development by creating an account on GitHub.} \field{title}{GitHub - Genymobile/scrcpy: Display and control your Android device} \field{urlday}{8} \field{urlmonth}{12} \field{urlyear}{2021} \field{year}{2021} \field{urldateera}{ce} \verb{urlraw} \verb https://github.com/Genymobile/scrcpy \endverb \verb{url} \verb https://github.com/Genymobile/scrcpy \endverb \endentry \entry{GitHub.1062021b}{online}{} \name{author}{1}{}{% {{hash=d3b7c913cd04ebfec0e9ec32cb6fd58c}{% family={GitHub}, familyi={G\bibinitperiod}}}% } \strng{namehash}{d3b7c913cd04ebfec0e9ec32cb6fd58c} \strng{fullhash}{d3b7c913cd04ebfec0e9ec32cb6fd58c} \strng{bibnamehash}{d3b7c913cd04ebfec0e9ec32cb6fd58c} \strng{authorbibnamehash}{d3b7c913cd04ebfec0e9ec32cb6fd58c} \strng{authornamehash}{d3b7c913cd04ebfec0e9ec32cb6fd58c} \strng{authorfullhash}{d3b7c913cd04ebfec0e9ec32cb6fd58c} \field{extraname}{4} \field{sortinit}{G} \field{sortinithash}{32d67eca0634bf53703493fb1090a2e8} \field{labelnamesource}{author} \field{labeltitlesource}{title} \field{abstract}{Code for robust monocular depth estimation described in Ranftl et. al., Towards Robust Monocular Depth Estimation: Mixing Datasets for Zero-shot Cross-dataset Transfer, TPAMI 2020 - GitHub - isl-org/MiDaS: Code for robust monocular depth estimation described in Ranftl et. al., Towards Robust Monocular Depth Estimation: Mixing Datasets for Zero-shot Cross-dataset Transfer, TPAMI 2020} \field{title}{GitHub - isl-org/MiDaS: Code for robust monocular depth estimation described in Ranftl et. al., Towards Robust Monocular Depth Estimation: Mixing Datasets for Zero-shot Cross-dataset Transfer, TPAMI 2020} \field{urlday}{6} \field{urlmonth}{10} \field{urlyear}{2021} \field{year}{2021} \field{urldateera}{ce} \verb{urlraw} \verb https://github.com/isl-org/MiDaS \endverb \verb{url} \verb https://github.com/isl-org/MiDaS \endverb \endentry \entry{GitHub.1282021b}{online}{} \name{author}{1}{}{% {{hash=d3b7c913cd04ebfec0e9ec32cb6fd58c}{% family={GitHub}, familyi={G\bibinitperiod}}}% } \strng{namehash}{d3b7c913cd04ebfec0e9ec32cb6fd58c} \strng{fullhash}{d3b7c913cd04ebfec0e9ec32cb6fd58c} \strng{bibnamehash}{d3b7c913cd04ebfec0e9ec32cb6fd58c} \strng{authorbibnamehash}{d3b7c913cd04ebfec0e9ec32cb6fd58c} \strng{authornamehash}{d3b7c913cd04ebfec0e9ec32cb6fd58c} \strng{authorfullhash}{d3b7c913cd04ebfec0e9ec32cb6fd58c} \field{extraname}{5} \field{sortinit}{G} \field{sortinithash}{32d67eca0634bf53703493fb1090a2e8} \field{labelnamesource}{author} \field{labeltitlesource}{title} \field{abstract}{Visualizer for neural network, deep learning, and machine learning models - GitHub - lutzroeder/netron: Visualizer for neural network, deep learning, and machine learning models} \field{title}{GitHub - lutzroeder/netron: Visualizer for neural network, deep learning, and machine learning models} \field{urlday}{8} \field{urlmonth}{12} \field{urlyear}{2021} \field{year}{2021} \field{urldateera}{ce} \verb{urlraw} \verb https://github.com/lutzroeder/netron \endverb \verb{url} \verb https://github.com/lutzroeder/netron \endverb \endentry \entry{GitHub.1062021d}{online}{} \name{author}{1}{}{% {{hash=d3b7c913cd04ebfec0e9ec32cb6fd58c}{% family={GitHub}, familyi={G\bibinitperiod}}}% } \strng{namehash}{d3b7c913cd04ebfec0e9ec32cb6fd58c} \strng{fullhash}{d3b7c913cd04ebfec0e9ec32cb6fd58c} \strng{bibnamehash}{d3b7c913cd04ebfec0e9ec32cb6fd58c} \strng{authorbibnamehash}{d3b7c913cd04ebfec0e9ec32cb6fd58c} \strng{authornamehash}{d3b7c913cd04ebfec0e9ec32cb6fd58c} \strng{authorfullhash}{d3b7c913cd04ebfec0e9ec32cb6fd58c} \field{extraname}{6} \field{sortinit}{G} \field{sortinithash}{32d67eca0634bf53703493fb1090a2e8} \field{labelnamesource}{author} \field{labeltitlesource}{title} \field{abstract}{LabelImg is a graphical image annotation tool and label object bounding boxes in images - GitHub - tzutalin/labelImg: LabelImg is a graphical image annotation tool and label object bounding boxes in images} \field{title}{GitHub - tzutalin/labelImg: LabelImg is a graphical image annotation tool and label object bounding boxes in images} \field{urlday}{6} \field{urlmonth}{10} \field{urlyear}{2021} \field{year}{2021} \field{urldateera}{ce} \verb{urlraw} \verb https://github.com/tzutalin/labelImg \endverb \verb{url} \verb https://github.com/tzutalin/labelImg \endverb \endentry \entry{GitHub.1062021}{online}{} \name{author}{1}{}{% {{hash=d3b7c913cd04ebfec0e9ec32cb6fd58c}{% family={GitHub}, familyi={G\bibinitperiod}}}% } \strng{namehash}{d3b7c913cd04ebfec0e9ec32cb6fd58c} \strng{fullhash}{d3b7c913cd04ebfec0e9ec32cb6fd58c} \strng{bibnamehash}{d3b7c913cd04ebfec0e9ec32cb6fd58c} \strng{authorbibnamehash}{d3b7c913cd04ebfec0e9ec32cb6fd58c} \strng{authornamehash}{d3b7c913cd04ebfec0e9ec32cb6fd58c} \strng{authorfullhash}{d3b7c913cd04ebfec0e9ec32cb6fd58c} \field{extraname}{7} \field{sortinit}{G} \field{sortinithash}{32d67eca0634bf53703493fb1090a2e8} \field{labelnamesource}{author} \field{labeltitlesource}{title} \field{abstract}{Models and examples built with TensorFlow. Contribute to tensorflow/models development by creating an account on GitHub.} \field{title}{models/research/object{\_}detection at master · tensorflow/models} \field{urlday}{6} \field{urlmonth}{10} \field{urlyear}{2021} \field{year}{2021} \field{urldateera}{ce} \verb{urlraw} \verb https://github.com/tensorflow/models/tree/master/research/object_detection \endverb \verb{url} \verb https://github.com/tensorflow/models/tree/master/research/object_detection \endverb \endentry \entry{GitHub.12122021}{online}{} \name{author}{1}{}{% {{hash=d3b7c913cd04ebfec0e9ec32cb6fd58c}{% family={GitHub}, familyi={G\bibinitperiod}}}% } \strng{namehash}{d3b7c913cd04ebfec0e9ec32cb6fd58c} \strng{fullhash}{d3b7c913cd04ebfec0e9ec32cb6fd58c} \strng{bibnamehash}{d3b7c913cd04ebfec0e9ec32cb6fd58c} \strng{authorbibnamehash}{d3b7c913cd04ebfec0e9ec32cb6fd58c} \strng{authornamehash}{d3b7c913cd04ebfec0e9ec32cb6fd58c} \strng{authorfullhash}{d3b7c913cd04ebfec0e9ec32cb6fd58c} \field{extraname}{8} \field{sortinit}{G} \field{sortinithash}{32d67eca0634bf53703493fb1090a2e8} \field{labelnamesource}{author} \field{labeltitlesource}{title} \field{abstract}{Models and examples built with TensorFlow. Contribute to tensorflow/models development by creating an account on GitHub.} \field{title}{models/research/object{\_}detection at master · tensorflow/models} \field{urlday}{12} \field{urlmonth}{12} \field{urlyear}{2021} \field{year}{2021} \field{urldateera}{ce} \verb{urlraw} \verb https://github.com/tensorflow/models/blob/master/research/object_detection/protos/preprocessor.proto \endverb \verb{url} \verb https://github.com/tensorflow/models/blob/master/research/object_detection/protos/preprocessor.proto \endverb \endentry \entry{GitHubDetectionZoo.11152021}{online}{} \name{author}{1}{}{% {{hash=d3b7c913cd04ebfec0e9ec32cb6fd58c}{% family={GitHub}, familyi={G\bibinitperiod}}}% } \strng{namehash}{d3b7c913cd04ebfec0e9ec32cb6fd58c} \strng{fullhash}{d3b7c913cd04ebfec0e9ec32cb6fd58c} \strng{bibnamehash}{d3b7c913cd04ebfec0e9ec32cb6fd58c} \strng{authorbibnamehash}{d3b7c913cd04ebfec0e9ec32cb6fd58c} \strng{authornamehash}{d3b7c913cd04ebfec0e9ec32cb6fd58c} \strng{authorfullhash}{d3b7c913cd04ebfec0e9ec32cb6fd58c} \field{extraname}{9} \field{sortinit}{G} \field{sortinithash}{32d67eca0634bf53703493fb1090a2e8} \field{labelnamesource}{author} \field{labeltitlesource}{title} \field{abstract}{Models and examples built with TensorFlow. Contribute to tensorflow/models development by creating an account on GitHub.} \field{title}{models/tf2{\_}detection{\_}zoo.md at master · tensorflow/models} \field{urlday}{15} \field{urlmonth}{11} \field{urlyear}{2021} \field{year}{2021} \field{urldateera}{ce} \verb{urlraw} \verb https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/tf2_detection_zoo.md \endverb \verb{url} \verb https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/tf2_detection_zoo.md \endverb \endentry \entry{Goodfellow.2018}{book}{} \name{author}{3}{}{% {{hash=5d2585c11210cf1d4512e6e0a03ec315}{% family={Goodfellow}, familyi={G\bibinitperiod}, given={Ian}, giveni={I\bibinitperiod}}}% {{hash=40a8e4774982146adc2688546f54efb2}{% family={Bengio}, familyi={B\bibinitperiod}, given={Yoshua}, giveni={Y\bibinitperiod}}}% {{hash=ccec1ccd2e1aa86960eb2e872c6b7020}{% family={Courville}, familyi={C\bibinitperiod}, given={Aaron}, giveni={A\bibinitperiod}}}% } \list{language}{1}{% {ger}% } \list{location}{1}{% {Frechen}% } \list{publisher}{1}{% {mitp}% } \strng{namehash}{3ae53fe582e8a815b118d26947eaa326} \strng{fullhash}{3ae53fe582e8a815b118d26947eaa326} \strng{bibnamehash}{3ae53fe582e8a815b118d26947eaa326} \strng{authorbibnamehash}{3ae53fe582e8a815b118d26947eaa326} \strng{authornamehash}{3ae53fe582e8a815b118d26947eaa326} \strng{authorfullhash}{3ae53fe582e8a815b118d26947eaa326} \field{sortinit}{G} \field{sortinithash}{32d67eca0634bf53703493fb1090a2e8} \field{labelnamesource}{author} \field{labeltitlesource}{title} \field{abstract}{Intro -- Impressum -- Website zum Buch -- Danksagung -- Über die Fachkorrektoren zur deutschen Ausgabe -- Notation -- Einleitung -- Für wen ist dieses Buch gedacht? -- Historische Entwicklungen im Deep Learning -- I Angewandte Mathematik und Grundlagen für das Machine Learning -- Lineare Algebra -- Skalare, Vektoren, Matrizen und Tensoren -- Multiplizieren von Matrizen und Vektoren -- Einheits- und Umkehrmatrizen -- Lineare Abhängigkeit und lineare Hülle -- Normen -- Spezielle Matrizen und Vektoren -- Eigenwertzerlegung -- Singulärwertzerlegung -- Die Moore-Penrose-Pseudoinverse -- Der Spuroperator -- Die Determinante -- Beispiel: Hauptkomponentenanalyse -- Wahrscheinlichkeits- und Informationstheorie -- Warum Wahrscheinlichkeit? -- Zufallsvariablen -- Wahrscheinlichkeitsverteilungen -- Randwahrscheinlichkeit -- Bedingte Wahrscheinlichkeit -- Die Produktregel der bedingten Wahrscheinlichkeiten -- Unabhängigkeit und bedingte Unabhängigkeit -- Erwartungswert, Varianz und Kovarianz -- Häufig genutzte Wahrscheinlichkeitsverteilungen -- Nützliche Eigenschaften häufig verwendeter Funktionen -- Satz von Bayes -- Technische Einzelheiten stetiger Variablen -- Informationstheorie -- Strukturierte probabilistische Modelle -- Numerische Berechnung -- Überlauf und Unterlauf -- Schlechte Konditionierung -- Optimierung auf Gradientenbasis -- Optimierung unter Nebenbedingungen -- Beispiel: Lineare kleinste Quadrate -- Grundlagen für das Machine Learning -- Lernalgorithmen -- Kapazität, Überanpassung und Unteranpassung -- Hyperparameter und Validierungsdaten -- Schätzer, Verzerrung und Varianz -- Maximum-Likelihood-Schätzung -- Bayessche Statistik -- Algorithmen für überwachtes Lernen -- Algorithmen für unüberwachtes Lernen -- Stochastisches Gradientenabstiegsverfahren -- Entwickeln eines Machine-Learning-Algorithmus -- Probleme, an denen Deep Learning wächst.} \field{edition}{1st edition} \field{isbn}{978-3-95845-700-3} \field{note}{Goodfellow, Ian (VerfasserIn) Bengio, Yoshua (VerfasserIn) Courville, Aaron (VerfasserIn) Lenz, Guido (ÜbersetzerIn)} \field{pagetotal}{883} \field{series}{mitp Professional} \field{subtitle}{Das umfassende Handbuch : Grundlagen, aktuelle Verfahren und Algorithmen, neue Forschungsansätze} \field{title}{Deep Learning} \field{year}{2018} \verb{urlraw} \verb https://ebookcentral.proquest.com/lib/kxp/detail.action?docID=5598176 \endverb \verb{url} \verb https://ebookcentral.proquest.com/lib/kxp/detail.action?docID=5598176 \endverb \keyw{Electronic books;Machine learning} \endentry \entry{Colab.1072021}{online}{} \field{sortinit}{G} \field{sortinithash}{32d67eca0634bf53703493fb1090a2e8} \field{labeltitlesource}{title} \field{title}{Google Colaboratory} \field{urlday}{7} \field{urlmonth}{10} \field{urlyear}{2021} \field{year}{2021} \field{urldateera}{ce} \verb{urlraw} \verb https://colab.research.google.com \endverb \verb{url} \verb https://colab.research.google.com \endverb \endentry \entry{Grus.2020}{book}{} \name{author}{1}{}{% {{hash=1ffc3eb84c3593aa01ba1d6000796064}{% family={Grus}, familyi={G\bibinitperiod}, given={Joel}, giveni={J\bibinitperiod}}}% } \list{language}{1}{% {ger}% } \list{location}{1}{% {Heidelberg}% } \list{publisher}{2}{% {O'Reilly}% {dpunkt.verlag}% } \strng{namehash}{1ffc3eb84c3593aa01ba1d6000796064} \strng{fullhash}{1ffc3eb84c3593aa01ba1d6000796064} \strng{bibnamehash}{1ffc3eb84c3593aa01ba1d6000796064} \strng{authorbibnamehash}{1ffc3eb84c3593aa01ba1d6000796064} \strng{authornamehash}{1ffc3eb84c3593aa01ba1d6000796064} \strng{authorfullhash}{1ffc3eb84c3593aa01ba1d6000796064} \field{sortinit}{G} \field{sortinithash}{32d67eca0634bf53703493fb1090a2e8} \field{labelnamesource}{author} \field{labeltitlesource}{title} \field{edition}{2. Auflage} \field{isbn}{978-3-96009-123-3} \field{note}{Grus, Joel (VerfasserIn) Rother, Kristian (ÜbersetzerIn) Demmig, Thomas (ÜbersetzerIn)} \field{subtitle}{Grundprinzipien der Datenanalyse mit Python} \field{title}{Einführung in Data Science} \field{year}{2020} \verb{urlraw} \verb https://search.ebscohost.com/login.aspx?direct=true&scope=site&db=nlebk&db=nlabk&AN=2298669 \endverb \verb{url} \verb https://search.ebscohost.com/login.aspx?direct=true&scope=site&db=nlebk&db=nlabk&AN=2298669 \endverb \endentry \entry{Hatch.2020}{misc}{} \name{author}{3}{}{% {{hash=53f0f0debc0c887016671b82104af570}{% family={Hatch}, familyi={H\bibinitperiod}, given={Kyle}, giveni={K\bibinitperiod}}}% {{hash=f0f6eec3d08415fb9191176c2693052d}{% family={Mern}, familyi={M\bibinitperiod}, given={John}, giveni={J\bibinitperiod}}}% {{hash=e36ad022497396d3ca41ea1594ac09ec}{% family={Kochenderfer}, familyi={K\bibinitperiod}, given={Mykel}, giveni={M\bibinitperiod}}}% } \strng{namehash}{d03c7a124089022fa1670fdc289053d9} \strng{fullhash}{d03c7a124089022fa1670fdc289053d9} \strng{bibnamehash}{d03c7a124089022fa1670fdc289053d9} \strng{authorbibnamehash}{d03c7a124089022fa1670fdc289053d9} \strng{authornamehash}{d03c7a124089022fa1670fdc289053d9} \strng{authorfullhash}{d03c7a124089022fa1670fdc289053d9} \field{sortinit}{H} \field{sortinithash}{23a3aa7c24e56cfa16945d55545109b5} \field{labelnamesource}{author} \field{labeltitlesource}{title} \field{abstract}{A collision avoidance system based on simple digital cameras would help enable the safe integration of small UAVs into crowded, low-altitude environments. In this work, we present an obstacle avoidance system for small UAVs that uses a monocular camera with a hybrid neural network and path planner controller. The system is comprised of a vision network for estimating depth from camera images, a high-level control network, a collision prediction network, and a contingency policy. This system is evaluated on a simulated UAV navigating an obstacle course in a constrained flight pattern. Results show the proposed system achieves low collision rates while maintaining operationally relevant flight speeds.} \field{eprinttype}{arXiv} \field{title}{Obstacle Avoidance Using a Monocular Camera} \field{year}{2020} \verb{eprint} \verb arXiv:2012.01608v2 \endverb \verb{file} \verb Obstacle Avoidance Using a Monocular Camera:Attachments/Obstacle Avoidance Using a Monocular Camera.pdf:application/pdf \endverb \verb{urlraw} \verb https://arxiv.org/pdf/2012.01608 \endverb \verb{url} \verb https://arxiv.org/pdf/2012.01608 \endverb \keyw{Artificial Intelligence (cs.AI);Robotics (cs.RO)} \endentry \entry{He.2015}{misc}{} \name{author}{4}{}{% {{hash=6b4b60e909e78633945f3f9c9dc83e01}{% family={He}, familyi={H\bibinitperiod}, given={Kaiming}, giveni={K\bibinitperiod}}}% {{hash=5e72bc22dbcf0984c6d113d280e36990}{% family={Zhang}, familyi={Z\bibinitperiod}, given={Xiangyu}, giveni={X\bibinitperiod}}}% {{hash=bb295293acacd54387339079ebbe4ead}{% family={Ren}, familyi={R\bibinitperiod}, given={Shaoqing}, giveni={S\bibinitperiod}}}% {{hash=f85751488058842b5777c7b4074077b5}{% family={Sun}, familyi={S\bibinitperiod}, given={Jian}, giveni={J\bibinitperiod}}}% } \strng{namehash}{6edb98fe38401d2fe4a026f5ce6e8451} \strng{fullhash}{42c4b52dc3a62cebabbc11c73e1afb53} \strng{bibnamehash}{6edb98fe38401d2fe4a026f5ce6e8451} \strng{authorbibnamehash}{6edb98fe38401d2fe4a026f5ce6e8451} \strng{authornamehash}{6edb98fe38401d2fe4a026f5ce6e8451} \strng{authorfullhash}{42c4b52dc3a62cebabbc11c73e1afb53} \field{sortinit}{H} \field{sortinithash}{23a3aa7c24e56cfa16945d55545109b5} \field{labelnamesource}{author} \field{labeltitlesource}{title} \field{abstract}{Deeper neural networks are more difficult to train. We present a residual learning framework to ease the training of networks that are substantially deeper than those used previously. We explicitly reformulate the layers as learning residual functions with reference to the layer inputs, instead of learning unreferenced functions. We provide comprehensive empirical evidence showing that these residual networks are easier to optimize, and can gain accuracy from considerably increased depth. On the ImageNet dataset we evaluate residual nets with a depth of up to 152 layers---8x deeper than VGG nets but still having lower complexity. An ensemble of these residual nets achieves 3.57{\%} error on the ImageNet test set. This result won the 1st place on the ILSVRC 2015 classification task. We also present analysis on CIFAR-10 with 100 and 1000 layers. The depth of representations is of central importance for many visual recognition tasks. Solely due to our extremely deep representations, we obtain a 28{\%} relative improvement on the COCO object detection dataset. Deep residual nets are foundations of our submissions to ILSVRC {\&} COCO 2015 competitions, where we also won the 1st places on the tasks of ImageNet detection, ImageNet localization, COCO detection, and COCO segmentation.} \field{eprinttype}{arXiv} \field{title}{Deep Residual Learning for Image Recognition} \field{year}{2015} \verb{eprint} \verb arXiv:1512.03385v1 \endverb \verb{file} \verb Deep Residual Learning for Image Recognition:Attachments/Deep Residual Learning for Image Recognition.pdf:application/pdf \endverb \verb{urlraw} \verb https://arxiv.org/pdf/1512.03385 \endverb \verb{url} \verb https://arxiv.org/pdf/1512.03385 \endverb \keyw{Computer Vision and Pattern Recognition (cs.CV)} \endentry \entry{HenriqueF.DeArruda.2019}{misc}{} \name{author}{4}{}{% {{hash=ee77957f6c8d1d0d6a9b57758027490b}{% family={{Henrique F. De Arruda}}, familyi={H\bibinitperiod}}}% {{hash=1363d711e28a512ad7ec78e2b51689e5}{% family={{Alexandre Benatti}}, familyi={A\bibinitperiod}}}% {{hash=056e0bc67b5bf46e849693b793631d1e}{% family={{César Henrique Comin}}, familyi={C\bibinitperiod}}}% {{hash=df6db75b50bc38ba95ee9c4390dad80d}{% family={{Luciano Da F. Costa}}, familyi={L\bibinitperiod}}}% } \list{language}{1}{% {en}% } \list{publisher}{1}{% {Unpublished}% } \strng{namehash}{e503b9e1abe8168fc002d3288d0766aa} \strng{fullhash}{b47eec58a7740fad75b1c18f6179c390} \strng{bibnamehash}{e503b9e1abe8168fc002d3288d0766aa} \strng{authorbibnamehash}{e503b9e1abe8168fc002d3288d0766aa} \strng{authornamehash}{e503b9e1abe8168fc002d3288d0766aa} \strng{authorfullhash}{b47eec58a7740fad75b1c18f6179c390} \field{sortinit}{H} \field{sortinithash}{23a3aa7c24e56cfa16945d55545109b5} \field{labelnamesource}{author} \field{labeltitlesource}{title} \field{title}{Learning Deep Learning (CDT-15)} \field{year}{2019} \verb{doi} \verb 10.13140/RG.2.2.29866.57283 \endverb \endentry \entry{Hosang.2017}{misc}{} \name{author}{3}{}{% {{hash=8192e2123af03bcb274c2e9444e0a153}{% family={Hosang}, familyi={H\bibinitperiod}, given={Jan}, giveni={J\bibinitperiod}}}% {{hash=b85b9efefe92bdad29985847fb978d46}{% family={Benenson}, familyi={B\bibinitperiod}, given={Rodrigo}, giveni={R\bibinitperiod}}}% {{hash=457dd3049a60059cf69e775a47f0c154}{% family={Schiele}, familyi={S\bibinitperiod}, given={Bernt}, giveni={B\bibinitperiod}}}% } \strng{namehash}{a5b31c01e0efdee6397e46311b720a59} \strng{fullhash}{a5b31c01e0efdee6397e46311b720a59} \strng{bibnamehash}{a5b31c01e0efdee6397e46311b720a59} \strng{authorbibnamehash}{a5b31c01e0efdee6397e46311b720a59} \strng{authornamehash}{a5b31c01e0efdee6397e46311b720a59} \strng{authorfullhash}{a5b31c01e0efdee6397e46311b720a59} \field{sortinit}{H} \field{sortinithash}{23a3aa7c24e56cfa16945d55545109b5} \field{labelnamesource}{author} \field{labeltitlesource}{title} \field{abstract}{Object detectors have hugely profited from moving towards an end-to-end learning paradigm: proposals, features, and the classifier becoming one neural network improved results two-fold on general object detection. One indispensable component is non-maximum suppression (NMS), a post-processing algorithm responsible for merging all detections that belong to the same object. The de facto standard NMS algorithm is still fully hand-crafted, suspiciously simple, and -- being based on greedy clustering with a fixed distance threshold -- forces a trade-off between recall and precision. We propose a new network architecture designed to perform NMS, using only boxes and their score. We report experiments for person detection on PETS and for general object categories on the COCO dataset. Our approach shows promise providing improved localization and occlusion handling.} \field{eprinttype}{arXiv} \field{title}{Learning non-maximum suppression} \field{year}{2017} \verb{eprint} \verb arXiv:1705.02950v2 \endverb \verb{file} \verb Learning non-maximum suppression:Attachments/Learning non-maximum suppression.pdf:application/pdf \endverb \verb{urlraw} \verb https://arxiv.org/pdf/1705.02950 \endverb \verb{url} \verb https://arxiv.org/pdf/1705.02950 \endverb \keyw{Computer Vision and Pattern Recognition (cs.CV)} \endentry \entry{Howard.2017}{misc}{} \name{author}{8}{}{% {{hash=0cedb03f907400fc304fdfaa1f7e2085}{% family={Howard}, familyi={H\bibinitperiod}, given={Andrew\bibnamedelima G.}, giveni={A\bibinitperiod\bibinitdelim G\bibinitperiod}}}% {{hash=d767e8e4d733bcf728bcdf2c193462f7}{% family={Zhu}, familyi={Z\bibinitperiod}, given={Menglong}, giveni={M\bibinitperiod}}}% {{hash=31960f03389184b7f052f5b197cc9fdf}{% family={Chen}, familyi={C\bibinitperiod}, given={Bo}, giveni={B\bibinitperiod}}}% {{hash=6cbb997a11c6922af719c32863261918}{% family={Kalenichenko}, familyi={K\bibinitperiod}, given={Dmitry}, giveni={D\bibinitperiod}}}% {{hash=47ad65c82b1de7d642988df185d7d8ea}{% family={Wang}, familyi={W\bibinitperiod}, given={Weijun}, giveni={W\bibinitperiod}}}% {{hash=7dcb9c6d4d4251a7e32b502d03c9354b}{% family={Weyand}, familyi={W\bibinitperiod}, given={Tobias}, giveni={T\bibinitperiod}}}% {{hash=8f221f2afb0b5a3d95b3e97101924922}{% family={Andreetto}, familyi={A\bibinitperiod}, given={Marco}, giveni={M\bibinitperiod}}}% {{hash=c707ec5b5997dc408a14a34a8380166c}{% family={Adam}, familyi={A\bibinitperiod}, given={Hartwig}, giveni={H\bibinitperiod}}}% } \strng{namehash}{e1fc6cab9b6009340e110518e53868c4} \strng{fullhash}{cffcf38c642164887a370768f5701b8e} \strng{bibnamehash}{e1fc6cab9b6009340e110518e53868c4} \strng{authorbibnamehash}{e1fc6cab9b6009340e110518e53868c4} \strng{authornamehash}{e1fc6cab9b6009340e110518e53868c4} \strng{authorfullhash}{cffcf38c642164887a370768f5701b8e} \field{sortinit}{H} \field{sortinithash}{23a3aa7c24e56cfa16945d55545109b5} \field{labelnamesource}{author} \field{labeltitlesource}{title} \field{abstract}{We present a class of efficient models called MobileNets for mobile and embedded vision applications. MobileNets are based on a streamlined architecture that uses depth-wise separable convolutions to build light weight deep neural networks. We introduce two simple global hyper-parameters that efficiently trade off between latency and accuracy. These hyper-parameters allow the model builder to choose the right sized model for their application based on the constraints of the problem. We present extensive experiments on resource and accuracy tradeoffs and show strong performance compared to other popular models on ImageNet classification. We then demonstrate the effectiveness of MobileNets across a wide range of applications and use cases including object detection, finegrain classification, face attributes and large scale geo-localization.} \field{eprinttype}{arXiv} \field{title}{MobileNets: Efficient Convolutional Neural Networks for Mobile Vision Applications} \field{year}{2017} \verb{eprint} \verb arXiv:1704.04861v1 \endverb \verb{file} \verb MobileNets Efficient Convolutional Neural Networks for M:Attachments/MobileNets Efficient Convolutional Neural Networks for M.pdf:application/pdf \endverb \verb{urlraw} \verb https://arxiv.org/pdf/1704.04861 \endverb \verb{url} \verb https://arxiv.org/pdf/1704.04861 \endverb \keyw{Computer Vision and Pattern Recognition (cs.CV)} \endentry \entry{Jacob.12152017}{misc}{} \name{author}{8}{}{% {{hash=73f11c7544252601f7e8c5f614c660f2}{% family={Jacob}, familyi={J\bibinitperiod}, given={Benoit}, giveni={B\bibinitperiod}}}% {{hash=aa58f2ae95227046f2fbba114f0fb625}{% family={Kligys}, familyi={K\bibinitperiod}, given={Skirmantas}, giveni={S\bibinitperiod}}}% {{hash=31960f03389184b7f052f5b197cc9fdf}{% family={Chen}, familyi={C\bibinitperiod}, given={Bo}, giveni={B\bibinitperiod}}}% {{hash=d767e8e4d733bcf728bcdf2c193462f7}{% family={Zhu}, familyi={Z\bibinitperiod}, given={Menglong}, giveni={M\bibinitperiod}}}% {{hash=436cfd289182e9d6b0a46d79b8e91b6e}{% family={Tang}, familyi={T\bibinitperiod}, given={Matthew}, giveni={M\bibinitperiod}}}% {{hash=315c4166fc1f7cb66324a7f0d82827cd}{% family={Howard}, familyi={H\bibinitperiod}, given={Andrew}, giveni={A\bibinitperiod}}}% {{hash=c707ec5b5997dc408a14a34a8380166c}{% family={Adam}, familyi={A\bibinitperiod}, given={Hartwig}, giveni={H\bibinitperiod}}}% {{hash=6cbb997a11c6922af719c32863261918}{% family={Kalenichenko}, familyi={K\bibinitperiod}, given={Dmitry}, giveni={D\bibinitperiod}}}% } \strng{namehash}{be1e8607db7344f1c8e395537c88d55a} \strng{fullhash}{3ad38e526037fc692f33aef3160b0293} \strng{bibnamehash}{be1e8607db7344f1c8e395537c88d55a} \strng{authorbibnamehash}{be1e8607db7344f1c8e395537c88d55a} \strng{authornamehash}{be1e8607db7344f1c8e395537c88d55a} \strng{authorfullhash}{3ad38e526037fc692f33aef3160b0293} \field{sortinit}{J} \field{sortinithash}{b2f54a9081ace9966a7cb9413811edb4} \field{labelnamesource}{author} \field{labeltitlesource}{title} \field{abstract}{The rising popularity of intelligent mobile devices and the daunting computational cost of deep learning-based models call for efficient and accurate on-device inference schemes. We propose a quantization scheme that allows inference to be carried out using integer-only arithmetic, which can be implemented more efficiently than floating point inference on commonly available integer-only hardware. We also co-design a training procedure to preserve end-to-end model accuracy post quantization. As a result, the proposed quantization scheme improves the tradeoff between accuracy and on-device latency. The improvements are significant even on MobileNets, a model family known for run-time efficiency, and are demonstrated in ImageNet classification and COCO detection on popular CPUs.} \field{eprinttype}{arXiv} \field{note}{14 pages, 12 figures} \field{pagetotal}{14} \field{title}{Quantization and Training of Neural Networks for Efficient Integer-Arithmetic-Only Inference} \field{year}{2017} \verb{eprint} \verb arXiv:1712.05877v1 \endverb \verb{urlraw} \verb https://arxiv.org/pdf/1712.05877 \endverb \verb{url} \verb https://arxiv.org/pdf/1712.05877 \endverb \keyw{Computer Science - Learning;Machine Learning (cs.LG);Machine Learning (stat.ML);Statistics - Machine Learning} \endentry \entry{Kim.11152015}{misc}{} \name{author}{2}{}{% {{hash=8095c82e8faeee6c563725c9a385ec5b}{% family={Kim}, familyi={K\bibinitperiod}, given={Dong\bibnamedelima Ki}, giveni={D\bibinitperiod\bibinitdelim K\bibinitperiod}}}% {{hash=770b8c88e0b93629ab1d35372bf0e3e4}{% family={Chen}, familyi={C\bibinitperiod}, given={Tsuhan}, giveni={T\bibinitperiod}}}% } \strng{namehash}{ff2628bfa346d4778c90142904849c77} \strng{fullhash}{ff2628bfa346d4778c90142904849c77} \strng{bibnamehash}{ff2628bfa346d4778c90142904849c77} \strng{authorbibnamehash}{ff2628bfa346d4778c90142904849c77} \strng{authornamehash}{ff2628bfa346d4778c90142904849c77} \strng{authorfullhash}{ff2628bfa346d4778c90142904849c77} \field{sortinit}{K} \field{sortinithash}{c02bf6bff1c488450c352b40f5d853ab} \field{labelnamesource}{author} \field{labeltitlesource}{title} \field{abstract}{Autonomous indoor navigation of Micro Aerial Vehicles (MAVs) possesses many challenges. One main reason is that GPS has limited precision in indoor environments. The additional fact that MAVs are not able to carry heavy weight or power consuming sensors, such as range finders, makes indoor autonomous navigation a challenging task. In this paper, we propose a practical system in which a quadcopter autonomously navigates indoors and finds a specific target, i.e., a book bag, by using a single camera. A deep learning model, Convolutional Neural Network (ConvNet), is used to learn a controller strategy that mimics an expert pilot's choice of action. We show our system's performance through real-time experiments in diverse indoor locations. To understand more about our trained network, we use several visualization techniques.} \field{eprinttype}{arXiv} \field{title}{Deep Neural Network for Real-Time Autonomous Indoor Navigation} \field{year}{2015} \verb{eprint} \verb arXiv:1511.04668v2 \endverb \verb{file} \verb Deep Neural Network for Real-Time Autonomous Indoor Navi:Attachments/Deep Neural Network for Real-Time Autonomous Indoor Navi.pdf:application/pdf \endverb \verb{urlraw} \verb https://arxiv.org/pdf/1511.04668 \endverb \verb{url} \verb https://arxiv.org/pdf/1511.04668 \endverb \keyw{Computer Science - Computer Vision and Pattern Recognition;Computer Vision and Pattern Recognition (cs.CV)} \endentry \entry{Krishnamoorthi.2018}{misc}{} \name{author}{1}{}{% {{hash=4a691df37485846988162c4f0639d2f0}{% family={Krishnamoorthi}, familyi={K\bibinitperiod}, given={Raghuraman}, giveni={R\bibinitperiod}}}% } \strng{namehash}{4a691df37485846988162c4f0639d2f0} \strng{fullhash}{4a691df37485846988162c4f0639d2f0} \strng{bibnamehash}{4a691df37485846988162c4f0639d2f0} \strng{authorbibnamehash}{4a691df37485846988162c4f0639d2f0} \strng{authornamehash}{4a691df37485846988162c4f0639d2f0} \strng{authorfullhash}{4a691df37485846988162c4f0639d2f0} \field{sortinit}{K} \field{sortinithash}{c02bf6bff1c488450c352b40f5d853ab} \field{labelnamesource}{author} \field{labeltitlesource}{title} \field{abstract}{We present an overview of techniques for quantizing convolutional neural networks for inference with integer weights and activations. Per-channel quantization of weights and per-layer quantization of activations to 8-bits of precision post-training produces classification accuracies within 2{\%} of floating point networks for a wide variety of CNN architectures. Model sizes can be reduced by a factor of 4 by quantizing weights to 8-bits, even when 8-bit arithmetic is not supported. This can be achieved with simple, post training quantization of weights.We benchmark latencies of quantized networks on CPUs and DSPs and observe a speedup of 2x-3x for quantized implementations compared to floating point on CPUs. Speedups of up to 10x are observed on specialized processors with fixed point SIMD capabilities, like the Qualcomm QDSPs with HVX. Quantization-aware training can provide further improvements, reducing the gap to floating point to 1{\%} at 8-bit precision. Quantization-aware training also allows for reducing the precision of weights to four bits with accuracy losses ranging from 2{\%} to 10{\%}, with higher accuracy drop for smaller networks.We introduce tools in TensorFlow and TensorFlowLite for quantizing convolutional networks and review best practices for quantization-aware training to obtain high accuracy with quantized weights and activations. We recommend that per-channel quantization of weights and per-layer quantization of activations be the preferred quantization scheme for hardware acceleration and kernel optimization. We also propose that future processors and hardware accelerators for optimized inference support precisions of 4, 8 and 16 bits.} \field{eprinttype}{arXiv} \field{title}{Quantizing deep convolutional networks for efficient inference: A whitepaper} \field{year}{2018} \verb{eprint} \verb arXiv:1806.08342v1 \endverb \verb{file} \verb Quantizing deep convolutional networks for efficient inf:Attachments/Quantizing deep convolutional networks for efficient inf.pdf:application/pdf \endverb \verb{urlraw} \verb https://arxiv.org/pdf/1806.08342 \endverb \verb{url} \verb https://arxiv.org/pdf/1806.08342 \endverb \keyw{Computer Vision and Pattern Recognition (cs.CV);Machine Learning (cs.LG);Machine Learning (stat.ML)} \endentry \entry{Lin.1292016}{misc}{} \name{author}{6}{}{% {{hash=08f925fe4692d130a1d7cb7d94483351}{% family={Lin}, familyi={L\bibinitperiod}, given={Tsung-Yi}, giveni={T\bibinithyphendelim Y\bibinitperiod}}}% {{hash=ecd149fdcb3e0503881d49e545744c3d}{% family={Dollár}, familyi={D\bibinitperiod}, given={Piotr}, giveni={P\bibinitperiod}}}% {{hash=bd5dadbe57bedc5957c19a3154c4d424}{% family={Girshick}, familyi={G\bibinitperiod}, given={Ross}, giveni={R\bibinitperiod}}}% {{hash=6b4b60e909e78633945f3f9c9dc83e01}{% family={He}, familyi={H\bibinitperiod}, given={Kaiming}, giveni={K\bibinitperiod}}}% {{hash=9cadf8ca1dd9b5f5a830ba2059b2517f}{% family={Hariharan}, familyi={H\bibinitperiod}, given={Bharath}, giveni={B\bibinitperiod}}}% {{hash=044d1db5259b74e4975282f599d8e767}{% family={Belongie}, familyi={B\bibinitperiod}, given={Serge}, giveni={S\bibinitperiod}}}% } \strng{namehash}{3b7e00bc88d7aca4d4d5ddb8c252e36c} \strng{fullhash}{407b643cc48d7ba78686be05e508bfa5} \strng{bibnamehash}{3b7e00bc88d7aca4d4d5ddb8c252e36c} \strng{authorbibnamehash}{3b7e00bc88d7aca4d4d5ddb8c252e36c} \strng{authornamehash}{3b7e00bc88d7aca4d4d5ddb8c252e36c} \strng{authorfullhash}{407b643cc48d7ba78686be05e508bfa5} \field{sortinit}{L} \field{sortinithash}{7c47d417cecb1f4bd38d1825c427a61a} \field{labelnamesource}{author} \field{labeltitlesource}{title} \field{abstract}{Feature pyramids are a basic component in recognition systems for detecting objects at different scales. But recent deep learning object detectors have avoided pyramid representations, in part because they are compute and memory intensive. In this paper, we exploit the inherent multi-scale, pyramidal hierarchy of deep convolutional networks to construct feature pyramids with marginal extra cost. A top-down architecture with lateral connections is developed for building high-level semantic feature maps at all scales. This architecture, called a Feature Pyramid Network (FPN), shows significant improvement as a generic feature extractor in several applications. Using FPN in a basic Faster R-CNN system, our method achieves state-of-the-art single-model results on the COCO detection benchmark without bells and whistles, surpassing all existing single-model entries including those from the COCO 2016 challenge winners. In addition, our method can run at 5 FPS on a GPU and thus is a practical and accurate solution to multi-scale object detection. Code will be made publicly available.} \field{eprinttype}{arXiv} \field{title}{Feature Pyramid Networks for Object Detection} \field{year}{2016} \verb{eprint} \verb arXiv:1612.03144v2 \endverb \verb{file} \verb Feature Pyramid Networks for Object Detection:Attachments/Feature Pyramid Networks for Object Detection.pdf:application/pdf \endverb \verb{urlraw} \verb https://arxiv.org/pdf/1612.03144 \endverb \verb{url} \verb https://arxiv.org/pdf/1612.03144 \endverb \keyw{Computer Science - Computer Vision and Pattern Recognition;Computer Vision and Pattern Recognition (cs.CV)} \endentry \entry{Liu.2017}{article}{} \name{author}{6}{}{% {{hash=10693ec37452f38e6c7e7ce45d8c429d}{% family={Liu}, familyi={L\bibinitperiod}, given={Canglong}, giveni={C\bibinitperiod}}}% {{hash=5b93b2c78389baaa078de347beede455}{% family={Zheng}, familyi={Z\bibinitperiod}, given={Bin}, giveni={B\bibinitperiod}}}% {{hash=3682934996fca823c7636639de0dcadb}{% family={Wang}, familyi={W\bibinitperiod}, given={Chunyang}, giveni={C\bibinitperiod}}}% {{hash=23cde46539de12057c4db04a3b4f0dbd}{% family={Zhao}, familyi={Z\bibinitperiod}, given={Yongting}, giveni={Y\bibinitperiod}}}% {{hash=1ad0a7a54bb4b8674aa795fef3f0a895}{% family={Fu}, familyi={F\bibinitperiod}, given={Shun}, giveni={S\bibinitperiod}}}% {{hash=37f7c16bc002b2fe9220ed6c15d35cdf}{% family={Li}, familyi={L\bibinitperiod}, given={Haochen}, giveni={H\bibinitperiod}}}% } \strng{namehash}{a4bd7c4ed3a987468fe9d36a58d4e4f3} \strng{fullhash}{1902e101876f8b68bcd0eb255993e0fe} \strng{bibnamehash}{a4bd7c4ed3a987468fe9d36a58d4e4f3} \strng{authorbibnamehash}{a4bd7c4ed3a987468fe9d36a58d4e4f3} \strng{authornamehash}{a4bd7c4ed3a987468fe9d36a58d4e4f3} \strng{authorfullhash}{1902e101876f8b68bcd0eb255993e0fe} \field{extraname}{1} \field{sortinit}{L} \field{sortinithash}{7c47d417cecb1f4bd38d1825c427a61a} \field{labelnamesource}{author} \field{labeltitlesource}{title} \field{journaltitle}{MATEC Web of Conferences} \field{note}{Xu, Bing (Editor) Chen, Yinong (Editor) PII: matecconf{\_}icmite2017{\_}00007} \field{pagination}{page} \field{shortjournal}{MATEC Web Conf.} \field{title}{CNN-Based Vision Model for Obstacle Avoidance of Mobile Robot} \field{volume}{139} \field{year}{2017} \field{pages}{00007} \range{pages}{1} \verb{doi} \verb 10.1051/matecconf/201713900007 \endverb \verb{file} \verb CNN-Based Vision Model for Obstacle Avoidance of Mobile Robot:Attachments/CNN-Based Vision Model for Obstacle Avoidance of Mobile Robot.pdf:application/pdf \endverb \endentry \entry{Liu.2015}{misc}{} \name{author}{7}{}{% {{hash=c0e0d23e2d09e45e6f51cc2bcea6d9f9}{% family={Liu}, familyi={L\bibinitperiod}, given={Wei}, giveni={W\bibinitperiod}}}% {{hash=c1826f3465579186aff299a9b0e16ed7}{% family={Anguelov}, familyi={A\bibinitperiod}, given={Dragomir}, giveni={D\bibinitperiod}}}% {{hash=8bbc4c5d96f205bada839e74e0202146}{% family={Erhan}, familyi={E\bibinitperiod}, given={Dumitru}, giveni={D\bibinitperiod}}}% {{hash=ed568d9c3bb059e6bf22899fbf170f86}{% family={Szegedy}, familyi={S\bibinitperiod}, given={Christian}, giveni={C\bibinitperiod}}}% {{hash=698ee61a2f3fa29734204496d2d36aef}{% family={Reed}, familyi={R\bibinitperiod}, given={Scott}, giveni={S\bibinitperiod}}}% {{hash=ec820780d594e36d11c6e30c7a2614e0}{% family={Fu}, familyi={F\bibinitperiod}, given={Cheng-Yang}, giveni={C\bibinithyphendelim Y\bibinitperiod}}}% {{hash=963e9b2526a7150c418b4e9e9d19a82f}{% family={Berg}, familyi={B\bibinitperiod}, given={Alexander\bibnamedelima C.}, giveni={A\bibinitperiod\bibinitdelim C\bibinitperiod}}}% } \strng{namehash}{623b50f45b666c1e9b84e5228e255810} \strng{fullhash}{d5e1ac3dfe6687980f91e701611520ad} \strng{bibnamehash}{623b50f45b666c1e9b84e5228e255810} \strng{authorbibnamehash}{623b50f45b666c1e9b84e5228e255810} \strng{authornamehash}{623b50f45b666c1e9b84e5228e255810} \strng{authorfullhash}{d5e1ac3dfe6687980f91e701611520ad} \field{extraname}{2} \field{sortinit}{L} \field{sortinithash}{7c47d417cecb1f4bd38d1825c427a61a} \field{labelnamesource}{author} \field{labeltitlesource}{title} \field{abstract}{We present a method for detecting objects in images using a single deep neural network. Our approach, named SSD, discretizes the output space of bounding boxes into a set of default boxes over different aspect ratios and scales per feature map location. At prediction time, the network generates scores for the presence of each object category in each default box and produces adjustments to the box to better match the object shape. Additionally, the network combines predictions from multiple feature maps with different resolutions to naturally handle objects of various sizes. Our SSD model is simple relative to methods that require object proposals because it completely eliminates proposal generation and subsequent pixel or feature resampling stage and encapsulates all computation in a single network. This makes SSD easy to train and straightforward to integrate into systems that require a detection component. Experimental results on the PASCAL VOC, MS COCO, and ILSVRC datasets confirm that SSD has comparable accuracy to methods that utilize an additional object proposal step and is much faster, while providing a unified framework for both training and inference. Compared to other single stage methods, SSD has much better accuracy, even with a smaller input image size. For 300$\times$300 input, SSD achieves 72.1{\%} mAP on VOC2007 test at 58 FPS on a Nvidia Titan X and for 500$\times$500 input, SSD achieves 75.1{\%} mAP, outperforming a comparable state of the art Faster R-CNN model. Code is available at this https URL .} \field{eprinttype}{arXiv} \field{title}{SSD: Single Shot MultiBox Detector} \field{year}{2015} \verb{eprint} \verb arXiv:1512.02325v5 \endverb \verb{file} \verb SSD Single Shot MultiBox Detector:Attachments/SSD Single Shot MultiBox Detector.pdf:application/pdf \endverb \verb{urlraw} \verb https://arxiv.org/pdf/1512.02325 \endverb \verb{url} \verb https://arxiv.org/pdf/1512.02325 \endverb \keyw{Computer Vision and Pattern Recognition (cs.CV)} \endentry \entry{Macias-GarciaDetection.2020}{inproceedings}{} \name{author}{3}{}{% {{hash=4174207e9e6591876e802651a493e3e5}{% family={Macias-Garcia}, familyi={M\bibinithyphendelim G\bibinitperiod}, given={Edgar}, giveni={E\bibinitperiod}}}% {{hash=90085c0512ccbf9d06c3fee7a592a160}{% family={Galeana-Perez}, familyi={G\bibinithyphendelim P\bibinitperiod}, given={Deysy}, giveni={D\bibinitperiod}}}% {{hash=27756fbb2b1f6458ee3602cf72fad839}{% family={Bayro-Corrochano}, familyi={B\bibinithyphendelim C\bibinitperiod}, given={Eduardo}, giveni={E\bibinitperiod}}}% } \strng{namehash}{51215b919d0a7fbf4f95766cda42e590} \strng{fullhash}{51215b919d0a7fbf4f95766cda42e590} \strng{bibnamehash}{51215b919d0a7fbf4f95766cda42e590} \strng{authorbibnamehash}{51215b919d0a7fbf4f95766cda42e590} \strng{authornamehash}{51215b919d0a7fbf4f95766cda42e590} \strng{authorfullhash}{51215b919d0a7fbf4f95766cda42e590} \field{sortinit}{M} \field{sortinithash}{4625c616857f13d17ce56f7d4f97d451} \field{labelnamesource}{author} \field{labeltitlesource}{title} \field{booktitle}{2020 International Joint Conference on Neural Networks (IJCNN)} \field{title}{CNN Based Perception System for Collision Avoidance in Mobile Robots using Stereo Vision} \field{year}{2020} \field{pages}{1\bibrangedash 7} \range{pages}{7} \verb{doi} \verb 10.1109/IJCNN48605.2020.9206747 \endverb \endentry \entry{.332021}{online}{} \field{sortinit}{M} \field{sortinithash}{4625c616857f13d17ce56f7d4f97d451} \field{labeltitlesource}{title} \field{title}{MNIST handwritten digit database, Yann LeCun, Corinna Cortes and Chris Burges} \field{urlday}{20} \field{urlmonth}{10} \field{urlyear}{2021} \field{year}{2021} \field{urldateera}{ce} \verb{urlraw} \verb http://yann.lecun.com/exdb/mnist/ \endverb \verb{url} \verb http://yann.lecun.com/exdb/mnist/ \endverb \endentry \entry{Nalpantidis.2009}{inproceedings}{} \name{author}{3}{}{% {{hash=8378a2bedf74d1ff3effafeb0146799c}{% family={Nalpantidis}, familyi={N\bibinitperiod}, given={Lazaros}, giveni={L\bibinitperiod}}}% {{hash=b9115d66032a5520d69886a7c7dcc101}{% family={Kostavelis}, familyi={K\bibinitperiod}, given={Ioannis}, giveni={I\bibinitperiod}}}% {{hash=12d89ed7954806488bac04f320f416ec}{% family={Gasteratos}, familyi={G\bibinitperiod}, given={Antonios}, giveni={A\bibinitperiod}}}% } \strng{namehash}{e7c75a0dd778a461b79b06a080decaa1} \strng{fullhash}{e7c75a0dd778a461b79b06a080decaa1} \strng{bibnamehash}{e7c75a0dd778a461b79b06a080decaa1} \strng{authorbibnamehash}{e7c75a0dd778a461b79b06a080decaa1} \strng{authornamehash}{e7c75a0dd778a461b79b06a080decaa1} \strng{authorfullhash}{e7c75a0dd778a461b79b06a080decaa1} \field{sortinit}{N} \field{sortinithash}{22369a73d5f88983a108b63f07f37084} \field{labelnamesource}{author} \field{labeltitlesource}{title} \field{isbn}{978-3-642-10816-7} \field{month}{12} \field{title}{Stereovision-Based Algorithm for Obstacle Avoidance} \field{year}{2009} \field{pages}{195\bibrangedash 204} \range{pages}{10} \verb{doi} \verb 10.1007/978-3-642-10817-4_19 \endverb \endentry \entry{NVIDIA.11262021}{online}{} \name{author}{1}{}{% {{hash=a2fe9051fe051f819bd739ef3e340377}{% family={NVIDIA}, familyi={N\bibinitperiod}}}% } \strng{namehash}{a2fe9051fe051f819bd739ef3e340377} \strng{fullhash}{a2fe9051fe051f819bd739ef3e340377} \strng{bibnamehash}{a2fe9051fe051f819bd739ef3e340377} \strng{authorbibnamehash}{a2fe9051fe051f819bd739ef3e340377} \strng{authornamehash}{a2fe9051fe051f819bd739ef3e340377} \strng{authorfullhash}{a2fe9051fe051f819bd739ef3e340377} \field{sortinit}{N} \field{sortinithash}{22369a73d5f88983a108b63f07f37084} \field{labelnamesource}{author} \field{labeltitlesource}{title} \field{abstract}{Das NVIDIA{\circledR} Jetson Nano{\texttrademark}-Entwicklerkit ist ein kleiner, leistungsstarker Computer, der trotz geringem Strombedarf moderne KI-Workloads stemmen kann. Statten Sie Millionen von Geräten in der Peripherie mit unglaublichen neuen Funktionen aus.} \field{title}{NVIDIA Jetson Nano} \field{urlday}{12} \field{urlmonth}{12} \field{urlyear}{2021} \field{year}{2021} \field{urldateera}{ce} \verb{urlraw} \verb https://www.nvidia.com/de-de/autonomous-machines/embedded-systems/jetson-nano/product-development/ \endverb \verb{url} \verb https://www.nvidia.com/de-de/autonomous-machines/embedded-systems/jetson-nano/product-development/ \endverb \keyw{AIoT;CUDA-X;Edge AI;Internet der Dinge;Jetson Nano;Jetson Nano Developer Kit;KI in der Peripherie;NVIDIA} \endentry \entry{OpenImages.6252021}{online}{} \field{sortinit}{O} \field{sortinithash}{2cd7140a07aea5341f9e2771efe90aae} \field{labeltitlesource}{title} \field{title}{Open Images V6} \field{urlday}{12} \field{urlmonth}{12} \field{urlyear}{2021} \field{year}{2021} \field{urldateera}{ce} \verb{urlraw} \verb https://storage.googleapis.com/openimages/web/index.html \endverb \verb{url} \verb https://storage.googleapis.com/openimages/web/index.html \endverb \endentry \entry{PireStereoVision.2012}{inproceedings}{} \name{author}{4}{}{% {{hash=f498d360ad385c023730b43bd9812b69}{% family={Pire}, familyi={P\bibinitperiod}, given={Taihú}, giveni={T\bibinitperiod}}}% {{hash=dcc36a08c12e54445e394e423cbb7637}{% family={De\bibnamedelima Cristóforis}, familyi={D\bibinitperiod\bibinitdelim C\bibinitperiod}, given={Pablo}, giveni={P\bibinitperiod}}}% {{hash=7f69f8174769790ab71606007cffc744}{% family={Nitsche}, familyi={N\bibinitperiod}, given={Matias}, giveni={M\bibinitperiod}}}% {{hash=3e2ea478bcb80c478f83444955dc0c5a}{% family={Berlles}, familyi={B\bibinitperiod}, given={Julio}, giveni={J\bibinitperiod}}}% } \strng{namehash}{f1bd8d752fd4388421c278800cc4be02} \strng{fullhash}{5562cd4c7ba0183bc8b353b41d50f7f7} \strng{bibnamehash}{f1bd8d752fd4388421c278800cc4be02} \strng{authorbibnamehash}{f1bd8d752fd4388421c278800cc4be02} \strng{authornamehash}{f1bd8d752fd4388421c278800cc4be02} \strng{authorfullhash}{5562cd4c7ba0183bc8b353b41d50f7f7} \field{sortinit}{P} \field{sortinithash}{ff3bcf24f47321b42cb156c2cc8a8422} \field{labelnamesource}{author} \field{labeltitlesource}{title} \field{month}{12} \field{title}{Stereo vision obstacle avoidance using depth and elevation maps} \field{year}{2012} \endentry \entry{Poggi.2018}{misc}{} \name{author}{4}{}{% {{hash=343dafb5548d904115e5782508eb8375}{% family={Poggi}, familyi={P\bibinitperiod}, given={Matteo}, giveni={M\bibinitperiod}}}% {{hash=0e141e967911dc1e9e3266ec98e4fc1d}{% family={Aleotti}, familyi={A\bibinitperiod}, given={Filippo}, giveni={F\bibinitperiod}}}% {{hash=8d31b2206bee079ebdf6ff494cb9c6ee}{% family={Tosi}, familyi={T\bibinitperiod}, given={Fabio}, giveni={F\bibinitperiod}}}% {{hash=97fde7508220953d74da9f2a50a93a4e}{% family={Mattoccia}, familyi={M\bibinitperiod}, given={Stefano}, giveni={S\bibinitperiod}}}% } \strng{namehash}{f547ed4e1ed4a9803f760f7c779dd8b7} \strng{fullhash}{72edd1d58afb42b32eda560296ac12df} \strng{bibnamehash}{f547ed4e1ed4a9803f760f7c779dd8b7} \strng{authorbibnamehash}{f547ed4e1ed4a9803f760f7c779dd8b7} \strng{authornamehash}{f547ed4e1ed4a9803f760f7c779dd8b7} \strng{authorfullhash}{72edd1d58afb42b32eda560296ac12df} \field{sortinit}{P} \field{sortinithash}{ff3bcf24f47321b42cb156c2cc8a8422} \field{labelnamesource}{author} \field{labeltitlesource}{title} \field{abstract}{Unsupervised depth estimation from a single image is a very attractive technique with several implications in robotic, autonomous navigation, augmented reality and so on. This topic represents a very challenging task and the advent of deep learning enabled to tackle this problem with excellent results. However, these architectures are extremely deep and complex. Thus, real-time performance can be achieved only by leveraging power-hungry GPUs that do not allow to infer depth maps in application fields characterized by low-power constraints. To tackle this issue, in this paper we propose a novel architecture capable to quickly infer an accurate depth map on a CPU, even of an embedded system, using a pyramid of features extracted from a single input image. Similarly to state-of-the-art, we train our network in an unsupervised manner casting depth estimation as an image reconstruction problem. Extensive experimental results on the KITTI dataset show that compared to the top performing approach our network has similar accuracy but a much lower complexity (about 6{\%} of parameters) enabling to infer a depth map for a KITTI image in about 1.7 s on the Raspberry Pi 3 and at more than 8 Hz on a standard CPU. Moreover, by trading accuracy for efficiency, our network allows to infer maps at about 2 Hz and 40 Hz respectively, still being more accurate than most state-of-the-art slower methods. To the best of our knowledge, it is the first method enabling such performance on CPUs paving the way for effective deployment of unsupervised monocular depth estimation even on embedded systems.} \field{eprinttype}{arXiv} \field{title}{Towards real-time unsupervised monocular depth estimation on CPU} \field{year}{2018} \verb{eprint} \verb arXiv:1806.11430v3 \endverb \verb{file} \verb Towards real-time unsupervised monocular depth estimatio:Attachments/Towards real-time unsupervised monocular depth estimatio.pdf:application/pdf \endverb \verb{urlraw} \verb https://arxiv.org/pdf/1806.11430 \endverb \verb{url} \verb https://arxiv.org/pdf/1806.11430 \endverb \keyw{Computer Vision and Pattern Recognition (cs.CV);Robotics (cs.RO)} \endentry \entry{Ranftl.3242021}{misc}{} \name{author}{3}{}{% {{hash=f13b87ed1a6410067504966eac03e9a2}{% family={Ranftl}, familyi={R\bibinitperiod}, given={René}, giveni={R\bibinitperiod}}}% {{hash=c0b2fdc20c37a4bd8d817822d374f809}{% family={Bochkovskiy}, familyi={B\bibinitperiod}, given={Alexey}, giveni={A\bibinitperiod}}}% {{hash=9025300f43a193cfed8e10e924f2a117}{% family={Koltun}, familyi={K\bibinitperiod}, given={Vladlen}, giveni={V\bibinitperiod}}}% } \strng{namehash}{2593b6dcce725925f90629afdcead815} \strng{fullhash}{2593b6dcce725925f90629afdcead815} \strng{bibnamehash}{2593b6dcce725925f90629afdcead815} \strng{authorbibnamehash}{2593b6dcce725925f90629afdcead815} \strng{authornamehash}{2593b6dcce725925f90629afdcead815} \strng{authorfullhash}{2593b6dcce725925f90629afdcead815} \field{sortinit}{R} \field{sortinithash}{5e1c39a9d46ffb6bebd8f801023a9486} \field{labelnamesource}{author} \field{labeltitlesource}{title} \field{abstract}{We introduce dense vision transformers, an architecture that leverages vision transformers in place of convolutional networks as a backbone for dense prediction tasks. We assemble tokens from various stages of the vision transformer into image-like representations at various resolutions and progressively combine them into full-resolution predictions using a convolutional decoder. The transformer backbone processes representations at a constant and relatively high resolution and has a global receptive field at every stage. These properties allow the dense vision transformer to provide finer-grained and more globally coherent predictions when compared to fully-convolutional networks. Our experiments show that this architecture yields substantial improvements on dense prediction tasks, especially when a large amount of training data is available. For monocular depth estimation, we observe an improvement of up to 28{\%} in relative performance when compared to a state-of-the-art fully-convolutional network. When applied to semantic segmentation, dense vision transformers set a new state of the art on ADE20K with 49.02{\%} mIoU. We further show that the architecture can be fine-tuned on smaller datasets such as NYUv2, KITTI, and Pascal Context where it also sets the new state of the art. Our models are available at https://github.com/intel-isl/DPT.} \field{eprinttype}{arXiv} \field{note}{15 pages} \field{pagetotal}{15} \field{title}{Vision Transformers for Dense Prediction} \field{year}{2021} \verb{doi} \verb pages \endverb \verb{eprint} \verb arXiv:2103.13413v1 \endverb \verb{urlraw} \verb https://arxiv.org/pdf/2103.13413 \endverb \verb{url} \verb https://arxiv.org/pdf/2103.13413 \endverb \keyw{Computer Science - Computer Vision and Pattern Recognition;Computer Vision and Pattern Recognition (cs.CV)} \endentry \entry{Ranftl.2019}{misc}{} \name{author}{5}{}{% {{hash=f13b87ed1a6410067504966eac03e9a2}{% family={Ranftl}, familyi={R\bibinitperiod}, given={René}, giveni={R\bibinitperiod}}}% {{hash=4ee0621ee5837a920708854b07fef7f7}{% family={Lasinger}, familyi={L\bibinitperiod}, given={Katrin}, giveni={K\bibinitperiod}}}% {{hash=89d0278ff885d57b7708ae120200f573}{% family={Hafner}, familyi={H\bibinitperiod}, given={David}, giveni={D\bibinitperiod}}}% {{hash=205714eb0cc6921c15493da21a574130}{% family={Schindler}, familyi={S\bibinitperiod}, given={Konrad}, giveni={K\bibinitperiod}}}% {{hash=9025300f43a193cfed8e10e924f2a117}{% family={Koltun}, familyi={K\bibinitperiod}, given={Vladlen}, giveni={V\bibinitperiod}}}% } \strng{namehash}{9a396274beaaf111452927a46e80f547} \strng{fullhash}{fa15ac494a832be56810827b8030d68a} \strng{bibnamehash}{9a396274beaaf111452927a46e80f547} \strng{authorbibnamehash}{9a396274beaaf111452927a46e80f547} \strng{authornamehash}{9a396274beaaf111452927a46e80f547} \strng{authorfullhash}{fa15ac494a832be56810827b8030d68a} \field{sortinit}{R} \field{sortinithash}{5e1c39a9d46ffb6bebd8f801023a9486} \field{labelnamesource}{author} \field{labeltitlesource}{title} \field{abstract}{The success of monocular depth estimation relies on large and diverse training sets. Due to the challenges associated with acquiring dense ground-truth depth across different environments at scale, a number of datasets with distinct characteristics and biases have emerged. We develop tools that enable mixing multiple datasets during training, even if their annotations are incompatible. In particular, we propose a robust training objective that is invariant to changes in depth range and scale, advocate the use of principled multi-objective learning to combine data from different sources, and highlight the importance of pretraining encoders on auxiliary tasks. Armed with these tools, we experiment with five diverse training datasets, including a new, massive data source: 3D films. To demonstrate the generalization power of our approach we use zero-shot cross-dataset transfer, i.e. we evaluate on datasets that were not seen during training. The experiments confirm that mixing data from complementary sources greatly improves monocular depth estimation. Our approach clearly outperforms competing methods across diverse datasets, setting a new state of the art for monocular depth estimation. Some results are shown in the supplementary video at this https URL} \field{eprinttype}{arXiv} \field{title}{Towards Robust Monocular Depth Estimation: Mixing Datasets for Zero-shot Cross-dataset Transfer} \field{year}{2019} \verb{eprint} \verb arXiv:1907.01341v3 \endverb \verb{file} \verb Towards Robust Monocular Depth Estimation Mixing Dataset:Attachments/Towards Robust Monocular Depth Estimation Mixing Dataset.pdf:application/pdf \endverb \verb{urlraw} \verb https://arxiv.org/pdf/1907.01341 \endverb \verb{url} \verb https://arxiv.org/pdf/1907.01341 \endverb \keyw{Computer Vision and Pattern Recognition (cs.CV)} \endentry \entry{Raschka.2018}{book}{} \name{author}{1}{}{% {{hash=bb679961e3f3f4bba79fc1cf3aa7df1c}{% family={Raschka}, familyi={R\bibinitperiod}, given={Sebastian}, giveni={S\bibinitperiod}}}% } \list{language}{1}{% {ger}% } \list{location}{1}{% {Frechen}% } \list{publisher}{1}{% {mitp}% } \strng{namehash}{bb679961e3f3f4bba79fc1cf3aa7df1c} \strng{fullhash}{bb679961e3f3f4bba79fc1cf3aa7df1c} \strng{bibnamehash}{bb679961e3f3f4bba79fc1cf3aa7df1c} \strng{authorbibnamehash}{bb679961e3f3f4bba79fc1cf3aa7df1c} \strng{authornamehash}{bb679961e3f3f4bba79fc1cf3aa7df1c} \strng{authorfullhash}{bb679961e3f3f4bba79fc1cf3aa7df1c} \field{sortinit}{R} \field{sortinithash}{5e1c39a9d46ffb6bebd8f801023a9486} \field{labelnamesource}{author} \field{labeltitlesource}{title} \field{edition}{2., aktualisierte und erweiterte Auflage} \field{isbn}{978-3-95845-733-1} \field{pagetotal}{577} \field{series}{mitp Professional} \field{subtitle}{Das umfassende Praxis-Handbuch für Data Science, Deep Learning und Predictive Analytics} \field{title}{Machine Learning mit Python und Scikit-learn und TensorFlow} \field{year}{2018} \verb{urlraw} \verb http://www.content-select.com/index.php?id=bib_view&ean=9783958457348 \endverb \verb{url} \verb http://www.content-select.com/index.php?id=bib_view&ean=9783958457348 \endverb \endentry \entry{Redmon.2015}{misc}{} \name{author}{4}{}{% {{hash=99bced2e56a5253f3fe98a5f04e6d9b2}{% family={Redmon}, familyi={R\bibinitperiod}, given={Joseph}, giveni={J\bibinitperiod}}}% {{hash=05ca9f19da9ecbd2def4e5514f8043c8}{% family={Divvala}, familyi={D\bibinitperiod}, given={Santosh}, giveni={S\bibinitperiod}}}% {{hash=bd5dadbe57bedc5957c19a3154c4d424}{% family={Girshick}, familyi={G\bibinitperiod}, given={Ross}, giveni={R\bibinitperiod}}}% {{hash=396c6ddedb6f986906fc3e4994d19974}{% family={Farhadi}, familyi={F\bibinitperiod}, given={Ali}, giveni={A\bibinitperiod}}}% } \strng{namehash}{e1203a0044715040adeb8c5079ee645a} \strng{fullhash}{b5530443e433a4da53dbe3cf155225b4} \strng{bibnamehash}{e1203a0044715040adeb8c5079ee645a} \strng{authorbibnamehash}{e1203a0044715040adeb8c5079ee645a} \strng{authornamehash}{e1203a0044715040adeb8c5079ee645a} \strng{authorfullhash}{b5530443e433a4da53dbe3cf155225b4} \field{sortinit}{R} \field{sortinithash}{5e1c39a9d46ffb6bebd8f801023a9486} \field{labelnamesource}{author} \field{labeltitlesource}{title} \field{abstract}{We present YOLO, a new approach to object detection. Prior work on object detection repurposes classifiers to perform detection. Instead, we frame object detection as a regression problem to spatially separated bounding boxes and associated class probabilities. A single neural network predicts bounding boxes and class probabilities directly from full images in one evaluation. Since the whole detection pipeline is a single network, it can be optimized end-to-end directly on detection performance. Our unified architecture is extremely fast. Our base YOLO model processes images in real-time at 45 frames per second. A smaller version of the network, Fast YOLO, processes an astounding 155 frames per second while still achieving double the mAP of other real-time detectors. Compared to state-of-the-art detection systems, YOLO makes more localization errors but is far less likely to predict false detections where nothing exists. Finally, YOLO learns very general representations of objects. It outperforms all other detection methods, including DPM and R-CNN, by a wide margin when generalizing from natural images to artwork on both the Picasso Dataset and the People-Art Dataset.} \field{eprinttype}{arXiv} \field{title}{You Only Look Once: Unified, Real-Time Object Detection} \field{year}{2015} \verb{eprint} \verb arXiv:1506.02640v5 \endverb \verb{file} \verb You Only Look Once Unified Real-Time Object Detection:Attachments/You Only Look Once Unified Real-Time Object Detection.pdf:application/pdf \endverb \verb{urlraw} \verb https://arxiv.org/pdf/1506.02640 \endverb \verb{url} \verb https://arxiv.org/pdf/1506.02640 \endverb \keyw{Computer Vision and Pattern Recognition (cs.CV)} \endentry \entry{Ren.2015}{misc}{} \name{author}{4}{}{% {{hash=bb295293acacd54387339079ebbe4ead}{% family={Ren}, familyi={R\bibinitperiod}, given={Shaoqing}, giveni={S\bibinitperiod}}}% {{hash=6b4b60e909e78633945f3f9c9dc83e01}{% family={He}, familyi={H\bibinitperiod}, given={Kaiming}, giveni={K\bibinitperiod}}}% {{hash=bd5dadbe57bedc5957c19a3154c4d424}{% family={Girshick}, familyi={G\bibinitperiod}, given={Ross}, giveni={R\bibinitperiod}}}% {{hash=f85751488058842b5777c7b4074077b5}{% family={Sun}, familyi={S\bibinitperiod}, given={Jian}, giveni={J\bibinitperiod}}}% } \strng{namehash}{f086ca4da3e532e8a41cb758ea461825} \strng{fullhash}{008a132af3e2d4ff15eb01a8fb4b005c} \strng{bibnamehash}{f086ca4da3e532e8a41cb758ea461825} \strng{authorbibnamehash}{f086ca4da3e532e8a41cb758ea461825} \strng{authornamehash}{f086ca4da3e532e8a41cb758ea461825} \strng{authorfullhash}{008a132af3e2d4ff15eb01a8fb4b005c} \field{sortinit}{R} \field{sortinithash}{5e1c39a9d46ffb6bebd8f801023a9486} \field{labelnamesource}{author} \field{labeltitlesource}{title} \field{abstract}{State-of-the-art object detection networks depend on region proposal algorithms to hypothesize object locations. Advances like SPPnet and Fast R-CNN have reduced the running time of these detection networks, exposing region proposal computation as a bottleneck. In this work, we introduce a Region Proposal Network (RPN) that shares full-image convolutional features with the detection network, thus enabling nearly cost-free region proposals. An RPN is a fully convolutional network that simultaneously predicts object bounds and objectness scores at each position. The RPN is trained end-to-end to generate high-quality region proposals, which are used by Fast R-CNN for detection. We further merge RPN and Fast R-CNN into a single network by sharing their convolutional features---using the recently popular terminology of neural networks with 'attention' mechanisms, the RPN component tells the unified network where to look. For the very deep VGG-16 model, our detection system has a frame rate of 5fps (including all steps) on a GPU, while achieving state-of-the-art object detection accuracy on PASCAL VOC 2007, 2012, and MS COCO datasets with only 300 proposals per image. In ILSVRC and COCO 2015 competitions, Faster R-CNN and RPN are the foundations of the 1st-place winning entries in several tracks. Code has been made publicly available.} \field{eprinttype}{arXiv} \field{title}{Faster R-CNN: Towards Real-Time Object Detection with Region Proposal Networks} \field{year}{2015} \verb{eprint} \verb arXiv:1506.01497v3 \endverb \verb{file} \verb Faster R-CNN Towards Real-Time Object Detection with Reg:Attachments/Faster R-CNN Towards Real-Time Object Detection with Reg.pdf:application/pdf \endverb \verb{urlraw} \verb https://arxiv.org/pdf/1506.01497 \endverb \verb{url} \verb https://arxiv.org/pdf/1506.01497 \endverb \keyw{Computer Vision and Pattern Recognition (cs.CV)} \endentry \entry{Rezatofighi.2252019}{misc}{} \name{author}{6}{}{% {{hash=c834f8dc7253578c3dfdf392681c186f}{% family={Rezatofighi}, familyi={R\bibinitperiod}, given={Hamid}, giveni={H\bibinitperiod}}}% {{hash=028a19dd53dc26b31bb9bb0f5aa212e7}{% family={Tsoi}, familyi={T\bibinitperiod}, given={Nathan}, giveni={N\bibinitperiod}}}% {{hash=d9806662b24ca486664faa2ddb093991}{% family={Gwak}, familyi={G\bibinitperiod}, given={JunYoung}, giveni={J\bibinitperiod}}}% {{hash=72c51c1b7ba21759de9bd158c8c74ad4}{% family={Sadeghian}, familyi={S\bibinitperiod}, given={Amir}, giveni={A\bibinitperiod}}}% {{hash=8d82839be6afd59c432fc7cf084f4326}{% family={Reid}, familyi={R\bibinitperiod}, given={Ian}, giveni={I\bibinitperiod}}}% {{hash=e29c20003a517abaaac28852301c03c2}{% family={Savarese}, familyi={S\bibinitperiod}, given={Silvio}, giveni={S\bibinitperiod}}}% } \strng{namehash}{579d75aa87c34f094d34b3bc3670aa52} \strng{fullhash}{d49fc6c7ff39001b1961fb11179318be} \strng{bibnamehash}{579d75aa87c34f094d34b3bc3670aa52} \strng{authorbibnamehash}{579d75aa87c34f094d34b3bc3670aa52} \strng{authornamehash}{579d75aa87c34f094d34b3bc3670aa52} \strng{authorfullhash}{d49fc6c7ff39001b1961fb11179318be} \field{sortinit}{R} \field{sortinithash}{5e1c39a9d46ffb6bebd8f801023a9486} \field{labelnamesource}{author} \field{labeltitlesource}{title} \field{abstract}{Intersection over Union (IoU) is the most popular evaluation metric used in the object detection benchmarks. However, there is a gap between optimizing the commonly used distance losses for regressing the parameters of a bounding box and maximizing this metric value. The optimal objective for a metric is the metric itself. In the case of axis-aligned 2D bounding boxes, it can be shown that $IoU$ can be directly used as a regression loss. However, $IoU$ has a plateau making it infeasible to optimize in the case of non-overlapping bounding boxes. In this paper, we address the weaknesses of $IoU$ by introducing a generalized version as both a new loss and a new metric. By incorporating this generalized $IoU$ ($GIoU$) as a loss into the state-of-the art object detection frameworks, we show a consistent improvement on their performance using both the standard, $IoU$ based, and new, $GIoU$ based, performance measures on popular object detection benchmarks such as PASCAL VOC and MS COCO.} \field{eprinttype}{arXiv} \field{note}{accepted in CVPR 2019} \field{title}{Generalized Intersection over Union: A Metric and A Loss for Bounding Box Regression} \field{year}{2019} \verb{doi} \verb CVPR \endverb \verb{eprint} \verb arXiv:1902.09630v2 \endverb \verb{file} \verb Generalized Intersection over Union A Metric and A Loss:Attachments/Generalized Intersection over Union A Metric and A Loss.pdf:application/pdf \endverb \verb{urlraw} \verb https://arxiv.org/pdf/1902.09630 \endverb \verb{url} \verb https://arxiv.org/pdf/1902.09630 \endverb \keyw{Artificial Intelligence (cs.AI);Computer Science - Artificial Intelligence;Computer Science - Computer Vision and Pattern Recognition;Computer Science - Learning;Computer Vision and Pattern Recognition (cs.CV);Machine Learning (cs.LG)} \endentry \entry{Sandler}{article}{} \name{author}{5}{}{% {{hash=8f90fd131c2bbfde4d0e9fdd7ed4ea8b}{% family={Sandler}, familyi={S\bibinitperiod}, given={Mark}, giveni={M\bibinitperiod}}}% {{hash=315c4166fc1f7cb66324a7f0d82827cd}{% family={Howard}, familyi={H\bibinitperiod}, given={Andrew}, giveni={A\bibinitperiod}}}% {{hash=d767e8e4d733bcf728bcdf2c193462f7}{% family={Zhu}, familyi={Z\bibinitperiod}, given={Menglong}, giveni={M\bibinitperiod}}}% {{hash=48f4090a93cf9f445057a9d6defe7973}{% family={Zhmoginov}, familyi={Z\bibinitperiod}, given={Andrey}, giveni={A\bibinitperiod}}}% {{hash=9dddfcfd529634a150ee38ee5c0203d7}{% family={Chen}, familyi={C\bibinitperiod}, given={Liang-Chieh}, giveni={L\bibinithyphendelim C\bibinitperiod}}}% } \strng{namehash}{70179dd377b16c4cfcc0748ef0bd028a} \strng{fullhash}{8d58d04315a5e1c06300772330b235d7} \strng{bibnamehash}{70179dd377b16c4cfcc0748ef0bd028a} \strng{authorbibnamehash}{70179dd377b16c4cfcc0748ef0bd028a} \strng{authornamehash}{70179dd377b16c4cfcc0748ef0bd028a} \strng{authorfullhash}{8d58d04315a5e1c06300772330b235d7} \field{sortinit}{S} \field{sortinithash}{b164b07b29984b41daf1e85279fbc5ab} \field{labelnamesource}{author} \field{labeltitlesource}{title} \field{abstract}{In this paper we describe a new mobile architecture, MobileNetV2, that improves the state of the art performance of mobile models on multiple tasks and benchmarks as well as across a spectrum of different model sizes. We also describe efficient ways of applying these mobile models to object detection in a novel framework we call SSDLite. Additionally, we demonstrate how to build mobile semantic segmentation models through a reduced form of DeepLabv3 which we call Mobile DeepLabv3. The MobileNetV2 architecture is based on an inverted residual structure where the input and output of the residual block are thin bottleneck layers opposite to traditional residual models which use expanded representations in the input an MobileNetV2 uses lightweight depthwise convolutions to filter features in the intermediate expansion layer. Additionally, we find that it is important to remove non-linearities in the narrow layers in order to maintain representational power. We demonstrate that this improves performance and provide an intuition that led to this design. Finally, our approach allows decoupling of the input/output domains from the expressiveness of the transformation, which provides a convenient framework for further analysis. We measure our performance on Imagenet classification, COCO object detection, VOC image segmentation. We evaluate the trade-offs between accuracy, and number of operations measured by multiply-adds (MAdd), as well as the number of parameters} \field{eprinttype}{arXiv} \field{journaltitle}{The IEEE Conference on Computer Vision and Pattern Recognition (CVPR)} \field{title}{MobileNetV2: Inverted Residuals and Linear Bottlenecks} \field{year}{2018} \verb{eprint} \verb arXiv:1801.04381v4 \endverb \verb{file} \verb MobileNetV2 Inverted Residuals and Linear Bottlenecks:Attachments/MobileNetV2 Inverted Residuals and Linear Bottlenecks.pdf:application/pdf \endverb \verb{urlraw} \verb https://arxiv.org/pdf/1801.04381 \endverb \verb{url} \verb https://arxiv.org/pdf/1801.04381 \endverb \keyw{Computer Vision and Pattern Recognition (cs.CV)} \endentry \entry{TensorFlow.8122021}{online}{} \name{author}{1}{}{% {{hash=074dd699710da0ec1eb45f13b31788e3}{% family={TensorFlow}, familyi={T\bibinitperiod}}}% } \strng{namehash}{074dd699710da0ec1eb45f13b31788e3} \strng{fullhash}{074dd699710da0ec1eb45f13b31788e3} \strng{bibnamehash}{074dd699710da0ec1eb45f13b31788e3} \strng{authorbibnamehash}{074dd699710da0ec1eb45f13b31788e3} \strng{authornamehash}{074dd699710da0ec1eb45f13b31788e3} \strng{authorfullhash}{074dd699710da0ec1eb45f13b31788e3} \field{extraname}{1} \field{sortinit}{T} \field{sortinithash}{9af77f0292593c26bde9a56e688eaee9} \field{labelnamesource}{author} \field{labeltitlesource}{title} \field{title}{Quantisierung nach dem Training ~|~ TensorFlow Lite} \field{urlday}{6} \field{urlmonth}{10} \field{urlyear}{2021} \field{year}{2021} \field{urldateera}{ce} \verb{urlraw} \verb https://www.tensorflow.org/lite/performance/post_training_quantization \endverb \verb{url} \verb https://www.tensorflow.org/lite/performance/post_training_quantization \endverb \endentry \entry{TensorFlow.832021}{online}{} \name{author}{1}{}{% {{hash=074dd699710da0ec1eb45f13b31788e3}{% family={TensorFlow}, familyi={T\bibinitperiod}}}% } \strng{namehash}{074dd699710da0ec1eb45f13b31788e3} \strng{fullhash}{074dd699710da0ec1eb45f13b31788e3} \strng{bibnamehash}{074dd699710da0ec1eb45f13b31788e3} \strng{authorbibnamehash}{074dd699710da0ec1eb45f13b31788e3} \strng{authornamehash}{074dd699710da0ec1eb45f13b31788e3} \strng{authorfullhash}{074dd699710da0ec1eb45f13b31788e3} \field{extraname}{2} \field{sortinit}{T} \field{sortinithash}{9af77f0292593c26bde9a56e688eaee9} \field{labelnamesource}{author} \field{labeltitlesource}{title} \field{abstract}{Eine End-to-End-Open-Source-Machine-Learning-Plattform für alle. Entdecken Sie das flexible Ökosystem von TensorFlow aus Tools, Bibliotheken und Community-Ressourcen.} \field{title}{TensorFlow} \field{urlday}{6} \field{urlmonth}{10} \field{urlyear}{2021} \field{year}{2021} \field{urldateera}{ce} \verb{urlraw} \verb https://www.tensorflow.org/ \endverb \verb{url} \verb https://www.tensorflow.org/ \endverb \endentry \entry{Tensorflow.8202021}{online}{} \name{author}{1}{}{% {{hash=074dd699710da0ec1eb45f13b31788e3}{% family={TensorFlow}, familyi={T\bibinitperiod}}}% } \strng{namehash}{074dd699710da0ec1eb45f13b31788e3} \strng{fullhash}{074dd699710da0ec1eb45f13b31788e3} \strng{bibnamehash}{074dd699710da0ec1eb45f13b31788e3} \strng{authorbibnamehash}{074dd699710da0ec1eb45f13b31788e3} \strng{authornamehash}{074dd699710da0ec1eb45f13b31788e3} \strng{authorfullhash}{074dd699710da0ec1eb45f13b31788e3} \field{extraname}{3} \field{sortinit}{T} \field{sortinithash}{9af77f0292593c26bde9a56e688eaee9} \field{labelnamesource}{author} \field{labeltitlesource}{title} \field{title}{TensorFlow 2 Schnellstart für Anfänger ~|~ TensorFlow Core} \field{urlday}{20} \field{urlmonth}{10} \field{urlyear}{2021} \field{year}{2021} \field{urldateera}{ce} \verb{urlraw} \verb https://www.tensorflow.org/tutorials/quickstart/beginner?hl=de \endverb \verb{url} \verb https://www.tensorflow.org/tutorials/quickstart/beginner?hl=de \endverb \endentry \entry{TensorFlow.8202021}{online}{} \name{author}{1}{}{% {{hash=074dd699710da0ec1eb45f13b31788e3}{% family={TensorFlow}, familyi={T\bibinitperiod}}}% } \strng{namehash}{074dd699710da0ec1eb45f13b31788e3} \strng{fullhash}{074dd699710da0ec1eb45f13b31788e3} \strng{bibnamehash}{074dd699710da0ec1eb45f13b31788e3} \strng{authorbibnamehash}{074dd699710da0ec1eb45f13b31788e3} \strng{authornamehash}{074dd699710da0ec1eb45f13b31788e3} \strng{authorfullhash}{074dd699710da0ec1eb45f13b31788e3} \field{extraname}{4} \field{sortinit}{T} \field{sortinithash}{9af77f0292593c26bde9a56e688eaee9} \field{labelnamesource}{author} \field{labeltitlesource}{title} \field{abstract}{Ein Deep-Learning-Framework für geräteinterne Inferenz. Trainieren und implementieren Sie Machine-Learning-Modelle auf mobilen und IoT-Geräten, Android, iOS, Edge TPU, Raspberry Pi.} \field{title}{TensorFlow Lite | ML für Mobilgeräte und Edge-Geräte} \field{urlday}{6} \field{urlmonth}{10} \field{urlyear}{2021} \field{year}{2021} \field{urldateera}{ce} \verb{urlraw} \verb https://www.tensorflow.org/lite/?hl=de \endverb \verb{url} \verb https://www.tensorflow.org/lite/?hl=de \endverb \endentry \entry{TensorFlow.8102021}{online}{} \name{author}{1}{}{% {{hash=074dd699710da0ec1eb45f13b31788e3}{% family={TensorFlow}, familyi={T\bibinitperiod}}}% } \strng{namehash}{074dd699710da0ec1eb45f13b31788e3} \strng{fullhash}{074dd699710da0ec1eb45f13b31788e3} \strng{bibnamehash}{074dd699710da0ec1eb45f13b31788e3} \strng{authorbibnamehash}{074dd699710da0ec1eb45f13b31788e3} \strng{authornamehash}{074dd699710da0ec1eb45f13b31788e3} \strng{authorfullhash}{074dd699710da0ec1eb45f13b31788e3} \field{extraname}{5} \field{sortinit}{T} \field{sortinithash}{9af77f0292593c26bde9a56e688eaee9} \field{labelnamesource}{author} \field{labeltitlesource}{title} \field{title}{TensorFlow Lite-Konverter} \field{urlday}{6} \field{urlmonth}{10} \field{urlyear}{2021} \field{year}{2021} \field{urldateera}{ce} \verb{urlraw} \verb https://www.tensorflow.org/lite/convert \endverb \verb{url} \verb https://www.tensorflow.org/lite/convert \endverb \endentry \entry{Xian_2018_CVPR}{inproceedings}{} \name{author}{7}{}{% {{hash=d34252df77136e3e43830c451bc16f4c}{% family={Xian}, familyi={X\bibinitperiod}, given={Ke}, giveni={K\bibinitperiod}}}% {{hash=9e49384c78f6861746fe787b12339aed}{% family={Shen}, familyi={S\bibinitperiod}, given={Chunhua}, giveni={C\bibinitperiod}}}% {{hash=5dd651375bfd096c12f9efedcbb23b26}{% family={Cao}, familyi={C\bibinitperiod}, given={Zhiguo}, giveni={Z\bibinitperiod}}}% {{hash=b61c7b6eeaa58572d109f9c87e5a61b0}{% family={Lu}, familyi={L\bibinitperiod}, given={Hao}, giveni={H\bibinitperiod}}}% {{hash=1f6352fb541fa4d76fa406a7a227e942}{% family={Xiao}, familyi={X\bibinitperiod}, given={Yang}, giveni={Y\bibinitperiod}}}% {{hash=7daa4ae161a7b30243906f8d8614d031}{% family={Li}, familyi={L\bibinitperiod}, given={Ruibo}, giveni={R\bibinitperiod}}}% {{hash=944b1099464311888b5869719fe97207}{% family={Luo}, familyi={L\bibinitperiod}, given={Zhenbo}, giveni={Z\bibinitperiod}}}% } \strng{namehash}{d8bb47053eee030e5ea0df7c00d70552} \strng{fullhash}{a8598030e163b0fcd7ad5a9a1d0e2305} \strng{bibnamehash}{d8bb47053eee030e5ea0df7c00d70552} \strng{authorbibnamehash}{d8bb47053eee030e5ea0df7c00d70552} \strng{authornamehash}{d8bb47053eee030e5ea0df7c00d70552} \strng{authorfullhash}{a8598030e163b0fcd7ad5a9a1d0e2305} \field{sortinit}{X} \field{sortinithash}{1965c258adceecf23ce3d67b05113442} \field{labelnamesource}{author} \field{labeltitlesource}{title} \field{booktitle}{Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR)} \field{month}{6} \field{title}{Monocular Relative Depth Perception With Web Stereo Data Supervision} \field{year}{2018} \endentry \entry{Yang.2017}{misc}{} \name{author}{6}{}{% {{hash=d063132f441798ef9f4d438c5d299f61}{% family={Yang}, familyi={Y\bibinitperiod}, given={Shichao}, giveni={S\bibinitperiod}}}% {{hash=be0784aa0265f37ffaf08730900a33ad}{% family={Konam}, familyi={K\bibinitperiod}, given={Sandeep}, giveni={S\bibinitperiod}}}% {{hash=0b13d29cc629ce6345c4d0376393515f}{% family={Ma}, familyi={M\bibinitperiod}, given={Chen}, giveni={C\bibinitperiod}}}% {{hash=059b8e68f832ebb69bef55f1fb50075b}{% family={Rosenthal}, familyi={R\bibinitperiod}, given={Stephanie}, giveni={S\bibinitperiod}}}% {{hash=108bc6e5f71a32699fcf07b7e8957310}{% family={Veloso}, familyi={V\bibinitperiod}, given={Manuela}, giveni={M\bibinitperiod}}}% {{hash=ad26bf16eeaee427e8e36bea35587822}{% family={Scherer}, familyi={S\bibinitperiod}, given={Sebastian}, giveni={S\bibinitperiod}}}% } \strng{namehash}{6a59f26db512831317179bf5e8435bc2} \strng{fullhash}{f79134b856bd1c1c1dc178e12c628a5a} \strng{bibnamehash}{6a59f26db512831317179bf5e8435bc2} \strng{authorbibnamehash}{6a59f26db512831317179bf5e8435bc2} \strng{authornamehash}{6a59f26db512831317179bf5e8435bc2} \strng{authorfullhash}{f79134b856bd1c1c1dc178e12c628a5a} \field{sortinit}{Y} \field{sortinithash}{fd67ad5a9ef0f7456bdd9aab10fe1495} \field{labelnamesource}{author} \field{labeltitlesource}{title} \field{abstract}{Obstacle avoidance from monocular images is a challenging problem for robots. Though multi-view structure-from-motion could build 3D maps, it is not robust in textureless environments. Some learning based methods exploit human demonstration to predict a steering command directly from a single image. However, this method is usually biased towards certain tasks or demonstration scenarios and also biased by human understanding. In this paper, we propose a new method to predict a trajectory from images. We train our system on more diverse NYUv2 dataset. The ground truth trajectory is computed from the designed cost functions automatically. The Convolutional Neural Network perception is divided into two stages: first, predict depth map and surface normal from RGB images, which are two important geometric properties related to 3D obstacle representation. Second, predict the trajectory from the depth and normal. Results show that our intermediate perception increases the accuracy by 20{\%} than the direct prediction. Our model generalizes well to other public indoor datasets and is also demonstrated for robot flights in simulation and experiments.} \field{eprinttype}{arXiv} \field{title}{Obstacle Avoidance through Deep Networks based Intermediate Perception} \field{year}{2017} \verb{eprint} \verb arXiv:1704.08759v1 \endverb \verb{file} \verb Obstacle Avoidance through Deep Networks based Intermedi:Attachments/Obstacle Avoidance through Deep Networks based Intermedi.pdf:application/pdf \endverb \verb{urlraw} \verb https://arxiv.org/pdf/1704.08759 \endverb \verb{url} \verb https://arxiv.org/pdf/1704.08759 \endverb \keyw{Computer Vision and Pattern Recognition (cs.CV);Robotics (cs.RO)} \endentry \entry{Zhang.2019}{inproceedings}{} \name{author}{3}{}{% {{hash=477b8da468a0e0969c47a18407e4f921}{% family={Zhang}, familyi={Z\bibinitperiod}, given={Zhenghong}, giveni={Z\bibinitperiod}}}% {{hash=c3cc50ba7a1919e3db6b64bc14843a9e}{% family={Xiong}, familyi={X\bibinitperiod}, given={Mingkang}, giveni={M\bibinitperiod}}}% {{hash=ad35b4d45aebfabe5d2d4be218eadfac}{% family={Xiong}, familyi={X\bibinitperiod}, given={Huilin}, giveni={H\bibinitperiod}}}% } \strng{namehash}{53735448d457eaa108282eef5304063a} \strng{fullhash}{53735448d457eaa108282eef5304063a} \strng{bibnamehash}{53735448d457eaa108282eef5304063a} \strng{authorbibnamehash}{53735448d457eaa108282eef5304063a} \strng{authornamehash}{53735448d457eaa108282eef5304063a} \strng{authorfullhash}{53735448d457eaa108282eef5304063a} \field{sortinit}{Z} \field{sortinithash}{96892c0b0a36bb8557c40c49813d48b3} \field{labelnamesource}{author} \field{labeltitlesource}{title} \field{booktitle}{2019 4th International Conference on Cloud Computing and Internet of Things (CCIOT)} \field{title}{Monocular Depth Estimation for UAV Obstacle Avoidance} \field{year}{2019} \field{pages}{43\bibrangedash 47} \range{pages}{5} \verb{doi} \verb 10.1109/CCIOT48581.2019.8980350 \endverb \endentry \entry{Zhou.182017}{misc}{} \name{author}{3}{}{% {{hash=441742472ab4f4d6527bd49ddd3c6279}{% family={Zhou}, familyi={Z\bibinitperiod}, given={Yiren}, giveni={Y\bibinitperiod}}}% {{hash=0e339e2d98a016927dec3c34c9a12281}{% family={Song}, familyi={S\bibinitperiod}, given={Sibo}, giveni={S\bibinitperiod}}}% {{hash=54cff214a2905977051c8cbe018c5499}{% family={Cheung}, familyi={C\bibinitperiod}, given={Ngai-Man}, giveni={N\bibinithyphendelim M\bibinitperiod}}}% } \strng{namehash}{6298ea7c9583f6fae23929e33e08d3d3} \strng{fullhash}{6298ea7c9583f6fae23929e33e08d3d3} \strng{bibnamehash}{6298ea7c9583f6fae23929e33e08d3d3} \strng{authorbibnamehash}{6298ea7c9583f6fae23929e33e08d3d3} \strng{authornamehash}{6298ea7c9583f6fae23929e33e08d3d3} \strng{authorfullhash}{6298ea7c9583f6fae23929e33e08d3d3} \field{sortinit}{Z} \field{sortinithash}{96892c0b0a36bb8557c40c49813d48b3} \field{labelnamesource}{author} \field{labeltitlesource}{title} \field{abstract}{Image blur and image noise are common distortions during image acquisition. In this paper, we systematically study the effect of image distortions on the deep neural network (DNN) image classifiers. First, we examine the DNN classifier performance under four types of distortions. Second, we propose two approaches to alleviate the effect of image distortion: re-training and fine-tuning with noisy images. Our results suggest that, under certain conditions, fine-tuning with noisy images can alleviate much effect due to distorted inputs, and is more practical than re-training.} \field{eprinttype}{arXiv} \field{note}{5 pages, 8 figures, ICASSP 2017} \field{pagetotal}{5} \field{title}{On Classification of Distorted Images with Deep Convolutional Neural Networks} \field{year}{2017} \verb{eprint} \verb arXiv:1701.01924v1 \endverb \verb{file} \verb On_Classification_of_Distorted_Images_with_Deep_Co:Attachments/On_Classification_of_Distorted_Images_with_Deep_Co.pdf:application/pdf \endverb \verb{urlraw} \verb http://arxiv.org/pdf/1701.01924v1 \endverb \verb{url} \verb http://arxiv.org/pdf/1701.01924v1 \endverb \endentry \entry{Zhou.2021}{book}{} \name{author}{1}{}{% {{hash=6b1a37e820a730061851a81dcca2d154}{% family={Zhou}, familyi={Z\bibinitperiod}, given={Zhi-Hua}, giveni={Z\bibinithyphendelim H\bibinitperiod}}}% } \list{location}{1}{% {Singapore}% } \list{publisher}{1}{% {Springer Singapore}% } \strng{namehash}{6b1a37e820a730061851a81dcca2d154} \strng{fullhash}{6b1a37e820a730061851a81dcca2d154} \strng{bibnamehash}{6b1a37e820a730061851a81dcca2d154} \strng{authorbibnamehash}{6b1a37e820a730061851a81dcca2d154} \strng{authornamehash}{6b1a37e820a730061851a81dcca2d154} \strng{authorfullhash}{6b1a37e820a730061851a81dcca2d154} \field{sortinit}{Z} \field{sortinithash}{96892c0b0a36bb8557c40c49813d48b3} \field{labelnamesource}{author} \field{labeltitlesource}{title} \field{isbn}{978-981-15-1966-6} \field{title}{Machine Learning} \field{year}{2021} \verb{doi} \verb 10.1007/978-981-15-1967-3 \endverb \verb{file} \verb Machine Learning:Attachments/Machine Learning.pdf:application/pdf \endverb \endentry \entry{Zhu.2016}{misc}{} \name{author}{7}{}{% {{hash=f86434cb0d86ba7eac5b0e3f3236abf0}{% family={Zhu}, familyi={Z\bibinitperiod}, given={Yuke}, giveni={Y\bibinitperiod}}}% {{hash=e1dbfa167aabb32138ad3d9df709e3dd}{% family={Mottaghi}, familyi={M\bibinitperiod}, given={Roozbeh}, giveni={R\bibinitperiod}}}% {{hash=9bfe220c99d5e3f19e82f842df626065}{% family={Kolve}, familyi={K\bibinitperiod}, given={Eric}, giveni={E\bibinitperiod}}}% {{hash=5556915c40a404e6fa83816576929682}{% family={Lim}, familyi={L\bibinitperiod}, given={Joseph\bibnamedelima J.}, giveni={J\bibinitperiod\bibinitdelim J\bibinitperiod}}}% {{hash=83dd9d464c9f0ea982254939a7f021d8}{% family={Gupta}, familyi={G\bibinitperiod}, given={Abhinav}, giveni={A\bibinitperiod}}}% {{hash=cd00ce5bc45f687c432e52e0fa1a7aa6}{% family={Fei-Fei}, familyi={F\bibinithyphendelim F\bibinitperiod}, given={Li}, giveni={L\bibinitperiod}}}% {{hash=396c6ddedb6f986906fc3e4994d19974}{% family={Farhadi}, familyi={F\bibinitperiod}, given={Ali}, giveni={A\bibinitperiod}}}% } \strng{namehash}{abd154245b8f10c81399b395d5309db7} \strng{fullhash}{6e77a10df1258517e9e9627a2a07a209} \strng{bibnamehash}{abd154245b8f10c81399b395d5309db7} \strng{authorbibnamehash}{abd154245b8f10c81399b395d5309db7} \strng{authornamehash}{abd154245b8f10c81399b395d5309db7} \strng{authorfullhash}{6e77a10df1258517e9e9627a2a07a209} \field{sortinit}{Z} \field{sortinithash}{96892c0b0a36bb8557c40c49813d48b3} \field{labelnamesource}{author} \field{labeltitlesource}{title} \field{abstract}{Two less addressed issues of deep reinforcement learning are (1) lack of generalization capability to new target goals, and (2) data inefficiency i.e., the model requires several (and often costly) episodes of trial and error to converge, which makes it impractical to be applied to real-world scenarios. In this paper, we address these two issues and apply our model to the task of target-driven visual navigation. To address the first issue, we propose an actor-critic model whose policy is a function of the goal as well as the current state, which allows to better generalize. To address the second issue, we propose AI2-THOR framework, which provides an environment with high-quality 3D scenes and physics engine. Our framework enables agents to take actions and interact with objects. Hence, we can collect a huge number of training samples efficiently. We show that our proposed method (1) converges faster than the state-of-the-art deep reinforcement learning methods, (2) generalizes across targets and across scenes, (3) generalizes to a real robot scenario with a small amount of fine-tuning (although the model is trained in simulation), (4) is end-to-end trainable and does not need feature engineering, feature matching between frames or 3D reconstruction of the environment. The supplementary video can be accessed at the following link: this https URL.} \field{eprinttype}{arXiv} \field{title}{Target-driven Visual Navigation in Indoor Scenes using Deep Reinforcement Learning} \field{year}{2016} \verb{eprint} \verb arXiv:1609.05143v1 \endverb \verb{urlraw} \verb https://arxiv.org/pdf/1609.05143 \endverb \verb{url} \verb https://arxiv.org/pdf/1609.05143 \endverb \keyw{Computer Vision and Pattern Recognition (cs.CV)} \endentry \enddatalist \endrefsection \endinput