Papers | Xinyu Zhang

2025

arXiv

Training-Free Motion-Guided Video Generation with Enhanced Temporal Consistency Using Motion Consistency Loss

Xinyu Zhang, Zicheng Duan, Dong Gong, and Lingqiao Liu

arXiv preprint arXiv:2501.07563, 2025

@article{zhang2025training,
  title = {Training-Free Motion-Guided Video Generation with Enhanced Temporal Consistency Using Motion Consistency Loss},
  author = {Zhang, Xinyu and Duan, Zicheng and Gong, Dong and Liu, Lingqiao},
  journal = {arXiv preprint arXiv:2501.07563},
  year = {2025},
}

CVPR

Are Image Distributions Indistinguishable to Humans Indistinguishable to Classifiers?

Zebin You, Xinyu Zhang, Hanzhong Guo, Jingdong Wang, and 1 more author

In Proceedings of the IEEE/CVF conference on computer vision and pattern recognition, 2025

arXiv Bib

@inproceedings{you2024image,
  title = {Are Image Distributions Indistinguishable to Humans Indistinguishable to Classifiers?},
  author = {You, Zebin and Zhang, Xinyu and Guo, Hanzhong and Wang, Jingdong and Li, Chongxuan},
  booktitle = {Proceedings of the IEEE/CVF conference on computer vision and pattern recognition},
  year = {2025},
}

Context-aware prompt learning for test-time vision recognition with frozen vision-language model

Junhui Yin, Xinyu Zhang, Lin Wu, and Xiaojie Wang

Pattern Recognition, 2025

arXiv Bib PDF

@article{yin2025context,
  title = {Context-aware prompt learning for test-time vision recognition with frozen vision-language model},
  author = {Yin, Junhui and Zhang, Xinyu and Wu, Lin and Wang, Xiaojie},
  journal = {Pattern Recognition},
  pages = {111359},
  year = {2025},
  publisher = {Elsevier},
}

2024

NeurIPS

Evaluation of Text-to-Video Generation Models: A Dynamics Perspective

Mingxiang Liao^*, Hannan Lu^*, Xinyu Zhang^*, Fang Wan, and 5 more authors

In The Thirty-eighth Annual Conference on Neural Information Processing Systems, 2024

Bib PDF Code

@inproceedings{liaoevaluation,
  title = {Evaluation of Text-to-Video Generation Models: A Dynamics Perspective},
  author = {Liao, Mingxiang and Lu, Hannan and Zhang, Xinyu and Wan, Fang and Wang, Tianyu and Zhao, Yuzhong and Zuo, Wangmeng and Ye, Qixiang and Wang, Jingdong},
  booktitle = {The Thirty-eighth Annual Conference on Neural Information Processing Systems},
  year = {2024},
}

CVPR

VRP-SAM: SAM with visual reference prompt

Yanpeng Sun, Jiahui Chen, Shan Zhang, Xinyu Zhang, and 5 more authors

In Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition, 2024

Bib PDF Code

@inproceedings{sun2024vrp,
  title = {VRP-SAM: SAM with visual reference prompt},
  author = {Sun, Yanpeng and Chen, Jiahui and Zhang, Shan and Zhang, Xinyu and Chen, Qiang and Zhang, Gang and Ding, Errui and Wang, Jingdong and Li, Zechao},
  booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition},
  pages = {23565--23574},
  year = {2024},
}

arXiv

Add-SD: Rational Generation without Manual Reference

Lingfeng Yang^*, Xinyu Zhang^*, Xiang Li, Jinwen Chen, and 6 more authors

arXiv preprint arXiv:2407.21016, 2024

arXiv Bib

@article{yang2024add,
  title = {Add-SD: Rational Generation without Manual Reference},
  author = {Yang, Lingfeng and Zhang, Xinyu and Li, Xiang and Chen, Jinwen and Yao, Kun and Zhang, Gang and Ding, Errui and Liu, Lingqiao and Wang, Jingdong and Yang, Jian},
  journal = {arXiv preprint arXiv:2407.21016},
  year = {2024},
}

arXiv

LW-DETR: A Transformer Replacement to YOLO for Real-Time Detection

Qiang Chen^*, Xiangbo Su^*, Xinyu Zhang^*, Jian Wang, and 7 more authors

arXiv preprint arXiv:2406.03459, 2024

arXiv Bib

@article{chen2024lw,
  title = {LW-DETR: A Transformer Replacement to YOLO for Real-Time Detection},
  author = {Chen, Qiang and Su, Xiangbo and Zhang, Xinyu and Wang, Jian and Chen, Jiahui and Shen, Yunpeng and Han, Chuchu and Chen, Ziliang and Xu, Weixiang and Li, Fanrong and others},
  journal = {arXiv preprint arXiv:2406.03459},
  year = {2024},
}

arXiv

Improving multi-modal large language model through boosting vision capabilities

Yanpeng Sun, Huaxin Zhang, Qiang Chen, Xinyu Zhang, and 4 more authors

arXiv preprint arXiv:2410.13733, 2024

arXiv Bib Code

@article{sun2024improving,
  title = {Improving multi-modal large language model through boosting vision capabilities},
  author = {Sun, Yanpeng and Zhang, Huaxin and Chen, Qiang and Zhang, Xinyu and Sang, Nong and Zhang, Gang and Wang, Jingdong and Li, Zechao},
  journal = {arXiv preprint arXiv:2410.13733},
  year = {2024},
}

2023

TMLR

CAE v2: Context autoencoder with CLIP latent alignment

Xinyu Zhang, Jiahui Chen, Junkun Yuan, Qiang Chen, and 7 more authors

Transactions on Machine Learning Research, 2023

Bib PDF Code

@article{zhang2023cae,
  title = {CAE v2: Context autoencoder with CLIP latent alignment},
  author = {Zhang, Xinyu and Chen, Jiahui and Yuan, Junkun and Chen, Qiang and Wang, Jian and Wang, Xiaodi and Han, Shumin and Chen, Xiaokang and Pi, Jimin and Yao, Kun and others},
  journal = {Transactions on Machine Learning Research},
  year = {2023},
}

NeurIPS

Hap: Structure-aware masked image modeling for human-centric perception

Junkun Yuan^*, Xinyu Zhang^*†, Hao Zhou, Jian Wang, and 7 more authors

Advances in Neural Information Processing Systems, 2023

Bib PDF Code

@article{yuan2024hap,
  title = {Hap: Structure-aware masked image modeling for human-centric perception},
  author = {Yuan, Junkun and Zhang$\dag$, Xinyu and Zhou, Hao and Wang, Jian and Qiu, Zhongwei and Shao, Zhiyin and Zhang, Shaofeng and Long, Sifan and Kuang, Kun and Yao, Kun and others},
  journal = {Advances in Neural Information Processing Systems},
  volume = {36},
  year = {2023},
}

ICCV

Unified pre-training with pseudo texts for text-to-image person re-identification

Zhiyin Shao^*, Xinyu Zhang^*, Changxing Ding, Jian Wang, and 1 more author

In Proceedings of the IEEE/CVF International Conference on Computer Vision, 2023

Bib PDF Code

@inproceedings{shao2023unified,
  title = {Unified pre-training with pseudo texts for text-to-image person re-identification},
  author = {Shao, Zhiyin and Zhang, Xinyu and Ding, Changxing and Wang, Jian and Wang, Jingdong},
  booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision},
  pages = {11174--11184},
  year = {2023},
}

TIP

A real-time memory updating strategy for unsupervised person re-identification

Junhui Yin, Xinyu Zhang, Zhanyu Ma, Jun Guo, and 1 more author

IEEE Transactions on Image Processing, 2023

Bib PDF Code

@article{yin2023real,
  title = {A real-time memory updating strategy for unsupervised person re-identification},
  author = {Yin, Junhui and Zhang, Xinyu and Ma, Zhanyu and Guo, Jun and Liu, Yifan},
  journal = {IEEE Transactions on Image Processing},
  volume = {32},
  pages = {2309--2321},
  year = {2023},
  publisher = {IEEE},
}

TIP

STAT: Multi-object tracking based on spatio-temporal topological constraints

Junjie Zhang, Mingyan Wang, Haoran Jiang, Xinyu Zhang, and 2 more authors

IEEE Transactions on Multimedia, 2023

Bib PDF

@article{zhang2023stat,
  title = {STAT: Multi-object tracking based on spatio-temporal topological constraints},
  author = {Zhang, Junjie and Wang, Mingyan and Jiang, Haoran and Zhang, Xinyu and Yan, Chenggang and Zeng, Dan},
  journal = {IEEE Transactions on Multimedia},
  year = {2023},
  publisher = {IEEE},
}

2022

CVPR

Implicit sample extension for unsupervised person re-identification

Xinyu Zhang, Dongdong Li, Zhigang Wang, Jian Wang, and 4 more authors

In Proceedings of the IEEE/CVF conference on computer vision and pattern recognition, 2022

Bib PDF Code

@inproceedings{zhang2022implicit,
  title = {Implicit sample extension for unsupervised person re-identification},
  author = {Zhang, Xinyu and Li, Dongdong and Wang, Zhigang and Wang, Jian and Ding, Errui and Shi, Javen Qinfeng and Zhang, Zhaoxiang and Wang, Jingdong},
  booktitle = {Proceedings of the IEEE/CVF conference on computer vision and pattern recognition},
  pages = {7369--7378},
  year = {2022},
}

ACMMM

Learning granularity-unified representations for text-to-image person re-identification

Zhiyin Shao, Xinyu Zhang, Meng Fang, Zhifeng Lin, and 2 more authors

In Proceedings of the 30th acm international conference on multimedia, 2022

Bib PDF Code

@inproceedings{shao2022learning,
  title = {Learning granularity-unified representations for text-to-image person re-identification},
  author = {Shao, Zhiyin and Zhang, Xinyu and Fang, Meng and Lin, Zhifeng and Wang, Jian and Ding, Changxing},
  booktitle = {Proceedings of the 30th acm international conference on multimedia},
  pages = {5566--5574},
  year = {2022},
}

ECCV

UFO: unified feature optimization

Teng Xi, Yifan Sun, Deli Yu, Bi Li, and 7 more authors

In European Conference on Computer Vision, 2022

arXiv Bib PDF Code

@inproceedings{xi2022ufo,
  title = {UFO: unified feature optimization},
  author = {Xi, Teng and Sun, Yifan and Yu, Deli and Li, Bi and Peng, Nan and Zhang, Gang and Zhang, Xinyu and Wang, Zhigang and Chen, Jinwen and Wang, Jian and others},
  booktitle = {European Conference on Computer Vision},
  pages = {472--488},
  year = {2022},
  organization = {Springer},
}

IJCAI

Self-Guided Hard Negative Generation for Unsupervised Person Re-Identification.

Dongdong Li, Zhigang Wang, Jian Wang, Xinyu Zhang, and 3 more authors

In IJCAI, 2022

Bib PDF

@inproceedings{li2022self,
  title = {Self-Guided Hard Negative Generation for Unsupervised Person Re-Identification.},
  author = {Li, Dongdong and Wang, Zhigang and Wang, Jian and Zhang, Xinyu and Ding, Errui and Wang, Jingdong and Zhang, Zhaoxiang},
  booktitle = {IJCAI},
  pages = {1067--1073},
  year = {2022},
}

2021

AAAI

Diverse knowledge distillation for end-to-end person search

Xinyu Zhang, Xinlong Wang, Jia-Wang Bian, Chunhua Shen, and 1 more author

In Proceedings of the AAAI Conference on Artificial Intelligence, 2021

Bib PDF

@inproceedings{zhang2021diverse,
  title = {Diverse knowledge distillation for end-to-end person search},
  author = {Zhang, Xinyu and Wang, Xinlong and Bian, Jia-Wang and Shen, Chunhua and You, Mingyu},
  booktitle = {Proceedings of the AAAI Conference on Artificial Intelligence},
  volume = {35},
  number = {4},
  pages = {3412--3420},
  year = {2021},
}

2020

TITS

Part-guided attention learning for vehicle instance retrieval

Xinyu Zhang, Rufeng Zhang, Jiewei Cao, Dong Gong, and 2 more authors

IEEE Transactions on Intelligent Transportation Systems, 2020

arXiv Bib PDF Code

@article{zhang2020part,
  title = {Part-guided attention learning for vehicle instance retrieval},
  author = {Zhang, Xinyu and Zhang, Rufeng and Cao, Jiewei and Gong, Dong and You, Mingyu and Shen, Chunhua},
  journal = {IEEE Transactions on Intelligent Transportation Systems},
  volume = {23},
  number = {4},
  pages = {3048--3060},
  year = {2020},
  publisher = {IEEE},
}

arXiv

Memorizing comprehensively to learn adaptively: Unsupervised cross-domain person re-id with multi-level memory

Xinyu Zhang, Dong Gong, Jiewei Cao, and Chunhua Shen

arXiv preprint arXiv:2001.04123, 2020

arXiv Bib

@article{zhang2020memorizing,
  title = {Memorizing comprehensively to learn adaptively: Unsupervised cross-domain person re-id with multi-level memory},
  author = {Zhang, Xinyu and Gong, Dong and Cao, Jiewei and Shen, Chunhua},
  journal = {arXiv preprint arXiv:2001.04123},
  year = {2020},
}

2019

ICCV

Self-training with progressive augmentation for unsupervised cross-domain person re-identification

Xinyu Zhang, Jiewei Cao, Chunhua Shen, and Mingyu You

In Proceedings of the IEEE/CVF International Conference on Computer Vision, 2019

Bib PDF Code

@inproceedings{zhang2019self,
  title = {Self-training with progressive augmentation for unsupervised cross-domain person re-identification},
  author = {Zhang, Xinyu and Cao, Jiewei and Shen, Chunhua and You, Mingyu},
  booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision},
  pages = {8222--8231},
  year = {2019},
}

2018

TITS

An extended filtered channel framework for pedestrian detection

Mingyu You, Yubin Zhang, Chunhua Shen, and Xinyu Zhang

IEEE Transactions on Intelligent Transportation Systems, 2018

Bib PDF

@article{you2018extended,
  title = {An extended filtered channel framework for pedestrian detection},
  author = {You, Mingyu and Zhang, Yubin and Shen, Chunhua and Zhang, Xinyu},
  journal = {IEEE Transactions on Intelligent Transportation Systems},
  volume = {19},
  number = {5},
  pages = {1640--1651},
  year = {2018},
  publisher = {IEEE},
}