index.html

<!DOCTYPE HTML>
<html lang="en">
  <head>
		<!-- Google tag (gtag.js) -->
	<script async src="https://www.googletagmanager.com/gtag/js?id=G-MMEEFPHQW8"></script>
	<script>
	  window.dataLayer = window.dataLayer || [];
	  function gtag(){dataLayer.push(arguments);}
	  gtag('js', new Date());
	
	  gtag('config', 'G-MMEEFPHQW8');
	</script>
    <meta http-equiv="Content-Type" content="text/html; charset=UTF-8">

    <title>Yihao Quan</title>

    <meta name="author" content="Yihao Quan">
    <meta name="viewport" content="width=device-width, initial-scale=1">
    <link rel="shortcut icon" href="images/favicon/favicon.ico" type="image/x-icon">
    <link rel="stylesheet" type="text/css" href="stylesheet.css">
    
  </head>

  <body>
    <table style="width:100%;max-width:800px;border:0px;border-spacing:0px;border-collapse:separate;margin-right:auto;margin-left:auto;"><tbody>
      <tr style="padding:0px">
        <td style="padding:0px">
          <table style="width:100%;border:0px;border-spacing:0px;border-collapse:separate;margin-right:auto;margin-left:auto;"><tbody>
            <tr style="padding:0px">
              <td style="padding:2.5%;width:63%;vertical-align:middle">
                <p class="name" style="text-align: center;">
                  Yihao Quan
                </p>
                <p>I'm a fourth-year ungraduate student at <a href="https://www.rit.edu/">Rochester Institute of Technology</a>, where I major in Management Information system.
                </p>
                <p>
                  Recently, I'm a research intern at <a href="https://pradalab1.github.io/index.html">King Abdullah University of Science and Technology (KAUST) PRADA Lab</a>.
                </p>
                <p style="text-align:center">
                  <a href="mailto:qyhhere@gmail.com">Email</a> &nbsp;/&nbsp;
                  <!-- <a href="data/JonBarron-CV.pdf">CV</a> &nbsp;/&nbsp;
                  <a href="data/JonBarron-bio.txt">Bio</a> &nbsp;/&nbsp; -->
                  <a href="https://scholar.google.com/citations?view_op=list_works&hl=en&hl=en&user=Rm69hkYAAAAJ">Scholar</a> &nbsp;/&nbsp;
				  <!-- <a href="https://www.threads.net/@jonbarron">Threads</a> &nbsp;/&nbsp;
				  <a href="https://bsky.app/profile/jonbarron.bsky.social">Bluesky</a> &nbsp;/&nbsp; -->
                  <!-- <a href="https://twitter.com/jon_barron">Twitter</a> &nbsp;/&nbsp; -->
		  <a href="https://www.linkedin.com/in/yihao-quan-24223632a/">Linkedin</a> &nbsp;/&nbsp;
                  <a href="https://github.com/itsqyh">Github</a>
                </p>
              </td>
              <td style="padding:2.5%;width:40%;max-width:40%">
                <a href="images/img1.jpg"><img style="width:100%;max-width:100%;object-fit: cover; border-radius: 50%;" alt="profile photo" src="images/img1.jpg" class="hoverZoomLink"></a>
              </td>
            </tr>
          </tbody></table>
          <table style="width:100%;border:0px;border-spacing:0px;border-collapse:separate;margin-right:auto;margin-left:auto;"><tbody>
              <tr>
              <td style="padding:20px;width:100%;vertical-align:middle">
                <h2>Research Interests</h2>
	      <p> My research primarily focuses on <strong>Trustworthy & Interpretability MLLMs/LLMs</strong>, with a emphasis on the following areas: </p> 
	      <ul> <li><strong>Vision + Language:</strong> 
		      Bridging the gap between our knowledge of language and vision representation, providing a foundation for more interpretable and controllable multimodal systems. </li> 
		      <p>
		      </p>
		   <li><strong>Mechanistic Interpretability:</strong> 
		      Understanding the internal mechanisms of LLMs/MLLMs (e.g. circuit analysis, causal tracing, neuron analysis).</li> 
		      <p>
		      </p>		      
		   <li><strong>Hallucination, Factuality and Safety:</strong> 
		      Using the interpretability findings to help downstream tasks (e.g. factual knowledge, enhancing reasoning, reducing hallucinations, model editing), and design safer models.</l> 
	
		      <p>Representative papers are <span class="highlight">highlighted</span>, * = Equal Contribution</p>
	      </ul> 
              </td>
            </tr>
	      <!-- News Section -->
	      <tr>
	        <td style="padding:20px;width:100%;vertical-align:middle">
	          <h2>News</h2>
	          <ul>
		    <li><strong>Jan 23th 2025:</strong> Our paper "<a href="https://arxiv.org/abs/2406.06579">From Redundancy to Relevance: Information Flow in LVLMs Across Reasoning Tasks</a>" got accepted by NAACL 2025 Main track.</li>
	            <li><strong>Dec 13th 2024:</strong> Our paper "<a href="https://arxiv.org/abs/2406.06579">From Redundancy to Relevance: Information Flow in LVLMs Across Reasoning Tasks</a>" received meta-score 4 and was suggested for NAACL'25 main track.</li>
	            <li><strong>Dec 10th 2024:</strong> Our paper "<a href="https://arxiv.org/abs/2412.09817">Enhancing LVLMs’ Complex Reasoning via Similarity Computation</a>" got accepted by AAAI 2025.</li>
		    <li><strong>Nov 15th 2024:</strong> Submitted our work "<a href="https://arxiv.org/abs/2411.09968">Seeing Clearly by Layer Two: Enhancing Attention Heads to Alleviate Hallucination in LVLMs</a>" to CVPR'25.</li>
	          </ul>
	        </td>
	      </tr>
          </tbody></table>
          <table style="width:100%;border:0px;border-spacing:0px;border-collapse:separate;margin-right:auto;margin-left:auto;"><tbody>

    <tr onmouseout="eah_stop()" onmouseover="eah_start()">
      <td style="padding:20px;width:25%;vertical-align:middle; text-align:center">
        <div class="one" style="display: inline-block;">
          <div class="two" id='eah_image' style="opacity: 0; transition: opacity 0.3s;"><video  width=100% muted autoplay loop>
          <source src="images/fig1.png" type="video/mp4">
          Your browser does not support the video tag.
          </video></div>
          <img src='images/fig1.png' width=100%>
        </div>
        <script type="text/javascript">
          function eah_start() {
            document.getElementById('eah_image').style.opacity = "1";
          }

          function eah_stop() {
            document.getElementById('eah_image').style.opacity = "0";
          }
          eah_stop()
        </script>
      </td>
      <td style="padding:20px;width:75%;vertical-align:middle">
        <a href="https://arxiv.org/abs/2411.09968">
          <span class="papertitle">Seeing Clearly by Layer Two: Enhancing Attention Heads to Alleviate Hallucination in LVLMs</span>
        </a>
        <br>
		
	      	Xiaofeng Zhang*,
	      <strong><u>Yihao Quan*</u></strong>,
		Chaochen Gu,
		Chen Shen,
		Xiaosong Yuan,
	        Shaotian Yan,
	        Jieping Ye
        <br>
        <a href="https://arxiv.org/abs/2411.09968">arXiv</a>
	<br>
        <em>CVPR'25 Under Review
        <br>
        <p></p>
        <p>
          A plug-and-play and training-free method showing significant hallucination-mitigating performance on different VLMs and metrics.
        </p>
      </td>
    </tr>

<tr onmouseout="information_flow_stop()" onmouseover="information_flow_start()">
  <td style="padding:20px;width:25%;vertical-align:middle">
        <div class="one">
          <div class="two" id='information-flow'>
	  <video  width=100% muted autoplay loop>
          	<source src="images/llava-cam.png" type="video/mp4">
          Your browser does not support the video tag.
          </video></div>
          <img src='images/llava-cam.png' width=100%>
        </div>
        <script type="text/javascript">
          function information_flow_start() {
            document.getElementById('information-flow').style.opacity = "1";
          }

          function information_flow_stop() {
            document.getElementById('information-flow').style.opacity = "0";
          }
          information_flow_stop()
        </script>
      </td>
      <td style="padding:20px;width:75%;vertical-align:middle">
        <a href="https://arxiv.org/abs/2406.06579">
          <span class="papertitle">From Redundancy to Relevance: Information Flow in LVLMs Across Reasoning Tasks</span>
        </a>
        <br>
		Xiaofeng Zhang*,
		<strong><u>Yihao Quan*</u></strong>,
		Chaochen Gu,
		Chen Shen,
		Xiaosong Yuan,
	        Shaotian Yan,
	        Liang Xie,
	        Wenxiao Wang,
		Hao Tang,
	        Jieping Ye
        <br>
        <a href="https://arxiv.org/abs/2406.06579">arXiv</a>
	<br>
        <em>Accepted by NAACL'25 Main
        <br>
        <p></p>
        <p>
          A novel perspective to enhance understanding of LVLMs and their functioning, particularly for complex reasoning tasks.
        </p>
      </td>
    </tr>


    <tr onmouseout="Simignore_stop()" onmouseover="Simignore_start()">
      <td style="padding:20px;width:25%;vertical-align:middle">
        <div class="one">
          <div class="two" id='Simignore_image'><video  width=100% muted autoplay loop>
          <source src="images/structure-v1.png" type="video/mp4">
          Your browser does not support the video tag.
          </video></div>
          <img src='images/structure-v1.png' width=100%>
        </div>
        <script type="text/javascript">
          function Simignore_start() {
            document.getElementById('Simignore_image').style.opacity = "1";
          }

          function Simignore_stop() {
            document.getElementById('Simignore_image').style.opacity = "0";
          }
          Simignore_stop()
        </script>
      </td>
      <td style="padding:20px;width:75%;vertical-align:middle">
        <a href="https://github.com/FanshuoZeng/Simignore">
          <span class="papertitle">Enhancing LVLMs’ Complex Reasoning via Similarity Computation</span>
        </a>
        <br>
	        Fanshuo Zeng*,
	      	Xiaofeng Zhang*,
	      <strong><u>Yihao Quan</u></strong>,
		Zheng Hui,
		Jiawei Yao
        <br>
        <a href="https://github.com/FanshuoZeng/Simignore">Github</a> / <a href="https://arxiv.org/abs/2412.09817">arXiv</a>
        <br>
        <em> Accepted by AAAI'25 
	<br>
        <p></p>
        <p>
          A novel image token reduction method, Simignore, designed to enhance the complex reasoning capabilities.
        </p>
      </td>
    </tr>

    <!-- <tr onmouseout="recon_stop()" onmouseover="recon_start()" bgcolor="#ffffd0">
      <td style="padding:20px;width:25%;vertical-align:middle">
        <div class="one">
          <div class="two" id='recon_image'><video  width=100% height=100% muted autoplay loop>
          <source src="images/recon.mp4" type="video/mp4">
          Your browser does not support the video tag.
          </video></div>
          <img src='images/recon.png' width="160">
        </div>
        <script type="text/javascript">
          function recon_start() {
            document.getElementById('recon_image').style.opacity = "1";
          }

          function recon_stop() {
            document.getElementById('recon_image').style.opacity = "0";
          }
          recon_stop()
        </script>
      </td>
      <td style="padding:20px;width:75%;vertical-align:middle">
        <a href="https://reconfusion.github.io/">
			<span class="papertitle">ReconFusion: 3D Reconstruction with Diffusion Priors</span>
        </a>
        <br>
        <a href="https://www.cs.columbia.edu/~rundi/">Rundi Wu*</a>,
		<a href="https://bmild.github.io/">Ben Mildenhall*</a>,
        <a href="https://henzler.github.io/">Philipp Henzler</a>,
        <a href="https://keunhong.com/">Keunhong Park</a>,
        <a href="https://ruiqigao.github.io/">Ruiqi Gao</a>,
        <a href="https://scholar.google.com/citations?user=_pKKv2QAAAAJ&hl=en/">Daniel Watson</a>,
        <a href="https://pratulsrinivasan.github.io/">Pratul P. Srinivasan</a>,
        <a href="https://dorverbin.github.io/">Dor Verbin</a>,
		<strong>Jonathan T. Barron</strong>,
        <a href="https://poolio.github.io/">Ben Poole</a>,
        <a href="https://holynski.org/">Aleksander Holynski*</a>
        <br>
        <em>arXiv</em>, 2023
        <br>
        <a href="https://reconfusion.github.io/">project page</a>
        /
        <a href="https://arxiv.org/abs/">arXiv</a>
        <p></p>
        <p>
        Using a multi-image diffusion model as a regularizer lets you recover high-quality radiance fields from just a handful of images.
        </p>
      </td>
    </tr> -->

  <!-- Selected Awards and Honors -->
  <table style="width:90%;border:0px;border-spacing:0px;border-collapse:separate;margin-right:auto;margin-left:auto;"><tbody>
          
    <br>
    <heading>Selected Awards and Honors</heading>

    <br>
  <td style="padding:0px;width:100%;vertical-align:middle">
    <p>
      <li>2022-2024: Second Class Scholarship</li>
    </p>
    <p>
      <li>2022-2024: Merit Student of Beijing Jiaotong University</li>
    </p>
    <p>
      <li>Oct 2022: Second Prize of National College Student Mathematical Modeling Competition</li> 
    </p>
    <p>
      <li>2022-2023: Dean’s List </li>
    </p>
    <p>
      <li>2022: Kaggle Sliver Medal (<strong>Top 5%</strong>): Feedback Prize - Evaluating Student Writing</li>
    </p>

    <!-- <p>
      <li>2021: Zhongying Moral Education Scholarship (4000RMB¥)</li>
    </p> -->
    <!-- <p>
      <li>2021: Merit Student of Peking University</li>
    </p> -->
    <!-- <p>
      <li>2021: Ministry of Education Top Talent Program Scholarship (1000RMB¥)</li>
    </p> -->
    <!-- <p>
      <li>2020: China University Computer Competition - Group Programming LadderCompetition Silver Medalist</li>
    </p> -->
    <!-- <p>
      <li>2019: The 36th Chinese Physics Olympiad(CPHO) Silver Medalist</li>
    </p>
    <p>
      <li>2019: The First Prize of National High School Mathematics Contest, Tianjin, China</li>
    </p> -->
  </td>
</tr>
</tbody></table>
<script type='text/javascript' id='mapmyvisitors' src='https://mapmyvisitors.com/map.js?cl=ffffff&w=a&t=m&d=N-HMxU5TSw_p9d8yn-JqcrusGHlPo92KV5_KtcGtXiI'></script>

  <!-- Services -->
<!--   <table style="width:90%;border:0px;border-spacing:0px;border-collapse:separate;margin-right:auto;margin-left:auto;"><tbody>
      <tr>
        <br>
        <heading>Services</heading>
        <br>
      <td style="padding:0px;width:100%;vertical-align:middle">
        <p>
          <li>Conference Reviewer: ECCV2024</li>
        </p>
        <p>
          <li>Journal Reviewer: Expert Systems with Applications</li>
        </p>
      </td>
    </tr>
  </tbody></table> -->
	  
            
          </tbody></table>
          <table style="width:100%;border:0px;border-spacing:0px;border-collapse:separate;margin-right:auto;margin-left:auto;"><tbody>
            <tr>
              <td style="padding:0px">
                <br>
                <p style="text-align:right;font-size:small;">
                  Feel free to steal this website's <a href="https://github.com/jonbarron/jonbarron_website">source code</a>. 
                </p>
              </td>
            </tr>
          </tbody></table>
        </td>
      </tr>
    </table>
  </body>
</html>