<?xml version="1.0" encoding="UTF-8"?>
<rss version="2.0"
	xmlns:content="http://purl.org/rss/1.0/modules/content/"
	xmlns:wfw="http://wellformedweb.org/CommentAPI/"
	xmlns:dc="http://purl.org/dc/elements/1.1/"
	xmlns:atom="http://www.w3.org/2005/Atom"
	xmlns:sy="http://purl.org/rss/1.0/modules/syndication/"
	xmlns:slash="http://purl.org/rss/1.0/modules/slash/"
	xmlns:georss="http://www.georss.org/georss" xmlns:geo="http://www.w3.org/2003/01/geo/wgs84_pos#" xmlns:media="http://search.yahoo.com/mrss/"
	>

<channel>
	<title>noated</title>
	<atom:link href="http://noated.wordpress.com/feed/" rel="self" type="application/rss+xml" />
	<link>http://noated.wordpress.com</link>
	<description>NLP research</description>
	<lastBuildDate>Mon, 29 Mar 2010 07:51:04 +0000</lastBuildDate>
	<language>en</language>
	<sy:updatePeriod>hourly</sy:updatePeriod>
	<sy:updateFrequency>1</sy:updateFrequency>
	<generator>http://wordpress.com/</generator>
<cloud domain='noated.wordpress.com' port='80' path='/?rsscloud=notify' registerProcedure='' protocol='http-post' />
<image>
		<url>http://s2.wp.com/i/buttonw-com.png</url>
		<title>noated</title>
		<link>http://noated.wordpress.com</link>
	</image>
	<atom:link rel="search" type="application/opensearchdescription+xml" href="http://noated.wordpress.com/osd.xml" title="noated" />
	<atom:link rel='hub' href='http://noated.wordpress.com/?pushpress=hub'/>
		<item>
		<title>Connections between SAT solving strategies and Graphical Model Inference</title>
		<link>http://noated.wordpress.com/2010/03/29/connections-between-sat-solving-strategies-and-graphical-model-inference/</link>
		<comments>http://noated.wordpress.com/2010/03/29/connections-between-sat-solving-strategies-and-graphical-model-inference/#comments</comments>
		<pubDate>Mon, 29 Mar 2010 07:51:04 +0000</pubDate>
		<dc:creator>noandrews</dc:creator>
				<category><![CDATA[Uncategorized]]></category>

		<guid isPermaLink="false">http://noated.wordpress.com/?p=115</guid>
		<description><![CDATA[http://www.robotics.stanford.edu/~koller/BNtut/sld066.htm<img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=noated.wordpress.com&amp;blog=10768714&amp;post=115&amp;subd=noated&amp;ref=&amp;feed=1" width="1" height="1" />]]></description>
			<content:encoded><![CDATA[<p>http://www.robotics.stanford.edu/~koller/BNtut/sld066.htm</p>
<br />  <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gocomments/noated.wordpress.com/115/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/comments/noated.wordpress.com/115/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godelicious/noated.wordpress.com/115/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/delicious/noated.wordpress.com/115/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gofacebook/noated.wordpress.com/115/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/facebook/noated.wordpress.com/115/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gotwitter/noated.wordpress.com/115/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/twitter/noated.wordpress.com/115/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gostumble/noated.wordpress.com/115/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/stumble/noated.wordpress.com/115/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godigg/noated.wordpress.com/115/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/digg/noated.wordpress.com/115/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/goreddit/noated.wordpress.com/115/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/reddit/noated.wordpress.com/115/" /></a> <img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=noated.wordpress.com&amp;blog=10768714&amp;post=115&amp;subd=noated&amp;ref=&amp;feed=1" width="1" height="1" />]]></content:encoded>
			<wfw:commentRss>http://noated.wordpress.com/2010/03/29/connections-between-sat-solving-strategies-and-graphical-model-inference/feed/</wfw:commentRss>
		<slash:comments>0</slash:comments>
	
		<media:content url="http://1.gravatar.com/avatar/d6044d9569328efe44f85aa55b942f35?s=96&#38;d=identicon&#38;r=G" medium="image">
			<media:title type="html">noandrews</media:title>
		</media:content>
	</item>
		<item>
		<title>Learning Semantic Correspondences with Less Supervision</title>
		<link>http://noated.wordpress.com/2010/02/04/learning-semantic-correspondences-with-less-supervision/</link>
		<comments>http://noated.wordpress.com/2010/02/04/learning-semantic-correspondences-with-less-supervision/#comments</comments>
		<pubDate>Thu, 04 Feb 2010 17:51:19 +0000</pubDate>
		<dc:creator>noandrews</dc:creator>
				<category><![CDATA[Uncategorized]]></category>

		<guid isPermaLink="false">http://noated.wordpress.com/?p=110</guid>
		<description><![CDATA[Normalizing constant for a maxent language model.  Factors for bigrams and trigrams and syntactic relations etc.  How do you sum over every string in the world?  Don&#8217;t do it by brute force.  Finite state machine or context free grammar. Gets really low results on an NFL dataset. Problem with their model: suppose you discover when [...]<img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=noated.wordpress.com&amp;blog=10768714&amp;post=110&amp;subd=noated&amp;ref=&amp;feed=1" width="1" height="1" />]]></description>
			<content:encoded><![CDATA[<p>Normalizing constant for a maxent language model.  Factors for bigrams and trigrams and syntactic relations etc.  How do you sum over every string in the world?  Don&#8217;t do it by brute force.  Finite state machine or context free grammar.</p>
<p>Gets really low results on an NFL dataset.</p>
<p>Problem with their model: suppose you discover when you align English and French that &#8216;nod&#8217; is being translated to &#8216;t\&#8217;{e}te&#8217; &#8212; high mutual information with nod and tete.  If there&#8217;s an overcast sky, can&#8217;t talk about overcast sky twice (one as overcast, and once as rain). The Markov assumption is innapropriate.</p>
<p>England soccer team vs. football team &#8212; learn that words are probably more ambiguous than they actually are.</p>
<p>Would it help to know that wind and rain are correlated.  If its raining then it might not be sunny.</p>
<p>Don&#8217;t really know the state.</p>
<p>Mis stacking the deck.</p>
<br />  <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gocomments/noated.wordpress.com/110/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/comments/noated.wordpress.com/110/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godelicious/noated.wordpress.com/110/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/delicious/noated.wordpress.com/110/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gofacebook/noated.wordpress.com/110/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/facebook/noated.wordpress.com/110/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gotwitter/noated.wordpress.com/110/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/twitter/noated.wordpress.com/110/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gostumble/noated.wordpress.com/110/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/stumble/noated.wordpress.com/110/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godigg/noated.wordpress.com/110/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/digg/noated.wordpress.com/110/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/goreddit/noated.wordpress.com/110/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/reddit/noated.wordpress.com/110/" /></a> <img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=noated.wordpress.com&amp;blog=10768714&amp;post=110&amp;subd=noated&amp;ref=&amp;feed=1" width="1" height="1" />]]></content:encoded>
			<wfw:commentRss>http://noated.wordpress.com/2010/02/04/learning-semantic-correspondences-with-less-supervision/feed/</wfw:commentRss>
		<slash:comments>0</slash:comments>
	
		<media:content url="http://1.gravatar.com/avatar/d6044d9569328efe44f85aa55b942f35?s=96&#38;d=identicon&#38;r=G" medium="image">
			<media:title type="html">noandrews</media:title>
		</media:content>
	</item>
		<item>
		<title>Dirichlet Processes: Reading Group</title>
		<link>http://noated.wordpress.com/2010/01/07/dirichlet-processes-reading-group/</link>
		<comments>http://noated.wordpress.com/2010/01/07/dirichlet-processes-reading-group/#comments</comments>
		<pubDate>Thu, 07 Jan 2010 19:42:47 +0000</pubDate>
		<dc:creator>noandrews</dc:creator>
				<category><![CDATA[Uncategorized]]></category>

		<guid isPermaLink="false">http://noated.wordpress.com/?p=107</guid>
		<description><![CDATA[Bernoilli Process, sample = N -&#62; {0,1} Markov Process, sample = N -&#62; Vocab Gaussian Process, sample = R -&#62; R Poisson Process, sample = R -&#62; {0,1} / N Underlying distribution of Gaussian: Specify CoVariance matriix of Gaussian Stationary vs NonStationary : does P(events) depend on time of day? Is a Gaussian Process if: [...]<img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=noated.wordpress.com&amp;blog=10768714&amp;post=107&amp;subd=noated&amp;ref=&amp;feed=1" width="1" height="1" />]]></description>
			<content:encoded><![CDATA[<p>Bernoilli Process, sample = N -&gt; {0,1}</p>
<p>Markov Process, sample = N -&gt; Vocab</p>
<p>Gaussian Process, sample = R -&gt; R</p>
<p>Poisson Process, sample = R -&gt; {0,1} / N</p>
<p>Underlying distribution of Gaussian: Specify CoVariance matriix of Gaussian</p>
<p>Stationary vs NonStationary : does P(events) depend on time of day?</p>
<p>Is a Gaussian Process if: Joint Distribution over Triple of Reals: If that joint distribution is Gaussian, then this is a Gaussian Process</p>
<p>I.e., no matter how you slice data into N points, then its Gaussian</p>
<p>Can define other random variables from a random variable</p>
<p>The Process is a distribution over functions</p>
<p>What is a Probability Density Function:</p>
<p>- p(f) \in R</p>
<p>Measure on function spaces (regions of function space)</p>
<p>Unlikely functions show up sometimes (and crash the stock mraket!)</p>
<p>Functions represent samples from the  process</p>
<p>Spatial processes &#8212; distribution over images</p>
<p>Dirichlet Process &#8212; Sample is a probability function from some set</p>
<p>Processes are useful in nonparametric methods: define prior over functions, then observe data (posterior)</p>
<p>Dirichlet processes give some notion a prior of what a probability distribution is.</p>
<p>Posterior is also a Dirichlet process. Distribution over any set theta.</p>
<p>Samples are not necessarily nice smooth distribution.</p>
<p>Centroids &#8212; probability distribution over places cluster centres coudl be</p>
<p>Infinite set of possible Gaussians to choose from:</p>
<p>- K=3: Give positive probability to three mixture components</p>
<p>- Dirichlet Process: Assign positive probability to coutably many mixture components</p>
<p>- Distribution over distributions: A Process</p>
<p>- Stick-breaking process: Break a stick into infinitely many pieces:</p>
<p>- pi_1 = 1/3, pi_2 = 1/6, pi_3 = 1/10</p>
<p>- Infinitely many things need positive probability</p>
<p>- \alpha is fixed: maybe tune it on held out data</p>
<p>- pi_1 ~ Beta(1, \alpha), pi_2 ~ (-pi_1)*Beta(1, \alpha), &#8230;</p>
<p>- E.g., distribution over coin weights (towards heads or tails)</p>
<p>- Usually break off big chunk of stick vs. usually break off small chunk</p>
<p>- If alpha is small, focus on a few mixture components</p>
<p>- If allow both to vary, Beta(*,*) is a Pittman-Yor</p>
<p>- Basically, any distribution over integers</p>
<p>- Gibbs sampling: finite number of observations.</p>
<p>- Integer between 1 and infinity to each token</p>
<p>- Each word is assigned a mixture of topics</p>
<p>- Now parameters of conditional distribution of word giv</p>
<p>- Hierarchical dirichlet process: nothing to do with subtopics</p>
<p>- Gibbs sampling: discover that words come from different clusters</p>
<p>- Gaussian mixture models: points close to each other in space are likely to belong to the same</p>
<p>- No similarity metric on words (other than wordnet)</p>
<p>- Label each point with an integer corresponding to which topi  comes from</p>
<p>- Random position of infinitely many cluster centroids</p>
<p>- Resample distribution over integers</p>
<p>- Update pi to favor e.g. 2</p>
<p>- Sample from posterior after observing some numbers</p>
<p>- Tag number 1034 is really unlikely:  To pick a new number, need to sample from stick breaking construction</p>
<p>- Even if can&#8217;t sample Pi, can&#8217;t you sample a sample of Pi</p>
<p>- Output Pi is infinitely big</p>
<p>- Sample from sample of pi is an integer</p>
<p>- Coin flipping: decide how much of stick goes to integer 1</p>
<p>- Coin flipping: terminate in finite time</p>
<p>- Sample from sample is easy (idea comes back in Chinese restaurant process)</p>
<p>- Observe some numbers and interested in posterior distribution over Pi</p>
<p>- Condition on Markov Blanket</p>
<p>- Get Pi and choosing an integer from distribution Pi</p>
<p>- 83% of the way through the stick.  Stick breaking forces us to sum to 1.</p>
<p>- Chinese Restaurant Process: get out of troublesome sampling from Pi</p>
<p>- Continue to do Gibbs sampling: can always integrate over a a variable</p>
<p>- Sample from sample directly</p>
<p>- Flip without actually reconstructing what Pi actually was</p>
<p>- Deal with posterior which is a little bit trickier</p>
<p>- Polya Urn Process / Chinese Restaurant Process</p>
<p>- Sample sequence of integers:</p>
<p>- Collapsed sampler:</p>
<p>- Instead of sampling Phi_1, Phi_2, &#8230;</p>
<p>&#8211;&gt; sample Theta_Phi1, Theta_Phi2,, &#8230;</p>
<p>&#8211;&gt; get sequence of centroids</p>
<p>&#8212;&gt; never actually see which integers</p>
<p>Pi_1, Pi_2, Pi_3, &#8230; tend to decrease</p>
<p>Second step: Elements of set Theta,</p>
<p>G0: probablity of string decays exponentially with length</p>
<p>Pi_1        Pi_2 Pi_3 &#8230;</p>
<p>Theta_1 Theta_2 &#8230;</p>
<p>G0 is an unstructured prior that gives some prior over componenets</p>
<p>DP(Alpha,G0) &#8212; if alpha is large, many different events</p>
<p>If Alpha is small, break off some huge section at the beginning</p>
<p>Chinese Restaurant Process:</p>
<p>Sample directly from the sample, sample a particular sequence of mixture components</p>
<p>Pi_hat -&gt; Mixture components for different tokens</p>
<p>If we don&#8217;t know Pi_hat, then they are dependent</p>
<p>If you observe some of these, it tells you about what Pi_hat is</p>
<p>Collapsed sampler : STATEFUL  sampler</p>
<p>First sample is easy: Just sample from G0</p>
<p>Next samples are harder: Someone comes in and sits at a table</p>
<p>Second person comes in: flip a coin: sampled from same cluster or different cluster: have a choice: could be either</p>
<p>Closed form: If were to pick number from stick breaking, what is p of next number</p>
<p>Pick a label for next table: Like what we do with stick breaking</p>
<p>Only need to do things on demand</p>
<p>Infinite number of centroids: but only pick them as we need them</p>
<p>Next table serves different dish</p>
<p>Implicit distribution over infinitely many tables:</p>
<p>integrating out over infinitely many possibilities</p>
<p>Sample from posterior distribution</p>
<p>Closed form:</p>
<p>3 / 6 + \alpha</p>
<p>1 / 6 + \alpha</p>
<p>2 / 2 + \alpha</p>
<p>\alpha / 6 + \alpha</p>
<p>Rich get richer effect: if already generated a number of samplse from somewhere, probably more samples are there</p>
<p>How to take this and use it a Gibbs Sampler</p>
<p>Collapsed Gibbs Sampler</p>
<p>Which points have the same cluster centroids</p>
<p>Question 1. What tables are people sitting at</p>
<p>Question 2. Where are the cluster centroids associated with those tables?</p>
<p>Re-Sample Person&#8217;s table conditioned on where everyone else is sitting AND how much they look like they belong there</p>
<p>ExChangeability: if didn&#8217;t know Pi_hat</p>
<p>IID: independent and independently distributed</p>
<p>Exchangable: permutation invariant</p>
<p>When we resample, all the probabilities would be the same if they were the 7th or the 1000th</p>
<p>Take someone out and put them back is just like them coming in fresh</p>
<p>Any exchangeable distribtion has a hidden variable</p>
<p>deFinetti measure: dependence is &#8220;sample from that distribution&#8221;</p>
<p>Just saying that if tihngs are exchangeable underlying distribution might be distributed in some funny way</p>
<p>On average over all Gibbs samples, where is probability of clusters being at different places</p>
<p>Do two things tend to be in the same equivalence class</p>
<p>Stick breaking, CRP, different Hierarchical Dirichlet Processes</p>
<p>Can&#8217;t computationally produce a sample of Pi</p>
<p>So take MLE estimate based on a very large sample of Pi</p>
<p>Same notion as computable real numbers</p>
<p>Never find out what number was.</p>
<p>Samplers in general don&#8217;t need to represent the entire distribution at once</p>
<p>Observe some data and look at posterior &#8212; want that</p>
<p>Barely in NLP manage to get away with lattices</p>
<p>Here independence assumptions break down: Slice sampling method</p>
<p>Forward-backward lattices even in this context</p>
<p>Regularize each domain or not</p>
<p>LDA: every document has its own mixture over topics</p>
<p>Hierarchical case: every document has a different Pi_hat from the Dirichlet process directly.</p>
<p>Words within a document are correlated.</p>
<p>Actually have a countable number of topics.</p>
<p>G0 -&gt; G1 : G1 is a distribution over topics</p>
<p>Assign some new distribution over topics</p>
<p>Different topic mixture sampled from G1; G1 is a sample from DP</p>
<p>Original DP allows every possible topic in the world</p>
<p>This forces sharing of parameters</p>
<p>DP(\alpha_0,G0) -&gt; G1</p>
<p>Documents ~ DP(\alpha_1,G1)</p>
<p>Dirichlet Process is a distribution over distributions</p>
<p>Computability of priors and posteriors.  Maybe not useful</p>
<br />  <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gocomments/noated.wordpress.com/107/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/comments/noated.wordpress.com/107/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godelicious/noated.wordpress.com/107/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/delicious/noated.wordpress.com/107/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gofacebook/noated.wordpress.com/107/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/facebook/noated.wordpress.com/107/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gotwitter/noated.wordpress.com/107/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/twitter/noated.wordpress.com/107/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gostumble/noated.wordpress.com/107/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/stumble/noated.wordpress.com/107/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godigg/noated.wordpress.com/107/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/digg/noated.wordpress.com/107/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/goreddit/noated.wordpress.com/107/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/reddit/noated.wordpress.com/107/" /></a> <img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=noated.wordpress.com&amp;blog=10768714&amp;post=107&amp;subd=noated&amp;ref=&amp;feed=1" width="1" height="1" />]]></content:encoded>
			<wfw:commentRss>http://noated.wordpress.com/2010/01/07/dirichlet-processes-reading-group/feed/</wfw:commentRss>
		<slash:comments>0</slash:comments>
	
		<media:content url="http://1.gravatar.com/avatar/d6044d9569328efe44f85aa55b942f35?s=96&#38;d=identicon&#38;r=G" medium="image">
			<media:title type="html">noandrews</media:title>
		</media:content>
	</item>
		<item>
		<title>Representing Uncertainty In Databases With Scalable Factor Graphs</title>
		<link>http://noated.wordpress.com/2009/12/23/representing-uncertainty-in-databases-with-scalable-factor-graphs/</link>
		<comments>http://noated.wordpress.com/2009/12/23/representing-uncertainty-in-databases-with-scalable-factor-graphs/#comments</comments>
		<pubDate>Thu, 24 Dec 2009 04:16:47 +0000</pubDate>
		<dc:creator>noandrews</dc:creator>
				<category><![CDATA[Uncategorized]]></category>
		<category><![CDATA[factor graph]]></category>
		<category><![CDATA[graphical models]]></category>

		<guid isPermaLink="false">http://noated.wordpress.com/?p=105</guid>
		<description><![CDATA[http://www.cs.umass.edu/~mwick/MikeWeb/Publications_files/wick06learning.pdf http://www.cs.umass.edu/~mwick/MikeWeb/Publications_files/wick09representing.pdf<img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=noated.wordpress.com&amp;blog=10768714&amp;post=105&amp;subd=noated&amp;ref=&amp;feed=1" width="1" height="1" />]]></description>
			<content:encoded><![CDATA[<p>http://www.cs.umass.edu/~mwick/MikeWeb/Publications_files/wick06learning.pdf</p>
<p>http://www.cs.umass.edu/~mwick/MikeWeb/Publications_files/wick09representing.pdf</p>
<br />  <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gocomments/noated.wordpress.com/105/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/comments/noated.wordpress.com/105/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godelicious/noated.wordpress.com/105/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/delicious/noated.wordpress.com/105/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gofacebook/noated.wordpress.com/105/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/facebook/noated.wordpress.com/105/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gotwitter/noated.wordpress.com/105/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/twitter/noated.wordpress.com/105/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gostumble/noated.wordpress.com/105/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/stumble/noated.wordpress.com/105/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godigg/noated.wordpress.com/105/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/digg/noated.wordpress.com/105/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/goreddit/noated.wordpress.com/105/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/reddit/noated.wordpress.com/105/" /></a> <img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=noated.wordpress.com&amp;blog=10768714&amp;post=105&amp;subd=noated&amp;ref=&amp;feed=1" width="1" height="1" />]]></content:encoded>
			<wfw:commentRss>http://noated.wordpress.com/2009/12/23/representing-uncertainty-in-databases-with-scalable-factor-graphs/feed/</wfw:commentRss>
		<slash:comments>0</slash:comments>
	
		<media:content url="http://1.gravatar.com/avatar/d6044d9569328efe44f85aa55b942f35?s=96&#38;d=identicon&#38;r=G" medium="image">
			<media:title type="html">noandrews</media:title>
		</media:content>
	</item>
		<item>
		<title>How Petrov&#8217;s Split-Merge Works</title>
		<link>http://noated.wordpress.com/2009/12/02/how-petrovs-split-merge-works/</link>
		<comments>http://noated.wordpress.com/2009/12/02/how-petrovs-split-merge-works/#comments</comments>
		<pubDate>Wed, 02 Dec 2009 06:08:19 +0000</pubDate>
		<dc:creator>noandrews</dc:creator>
				<category><![CDATA[Uncategorized]]></category>

		<guid isPermaLink="false">http://noated.wordpress.com/?p=101</guid>
		<description><![CDATA[Splitting terminals helps since it better fits the data. Over-splitting hurts because it divides the training data into many bins, giving less robust estimates of grammar probabilities. Petrov&#8217;s idea is to only split when needed.  For instance, there should not be any specializations of the comma POS tag, since it always produces the comma tag. [...]<img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=noated.wordpress.com&amp;blog=10768714&amp;post=101&amp;subd=noated&amp;ref=&amp;feed=1" width="1" height="1" />]]></description>
			<content:encoded><![CDATA[<p>Splitting terminals helps since it better fits the data. Over-splitting hurts because it divides the training data into many bins, giving less robust estimates of grammar probabilities.</p>
<p>Petrov&#8217;s idea is to only split when needed.  For instance, there should not be any specializations of the comma POS tag, since it <em>always</em> produces the comma tag.</p>
<p>To measure the utility of oversplitting, one approach would be try splitting each latent annotation and measure the gain in likelihood (utility).  But this is not feasible, since it would require an entire training phase.  Furthermore, several subsymbols might need to be added before they can cooperate to yield a gain.</p>
<p>Instead, go in the other direction: split every symbol, then measure the loss in likelihood for removing it.</p>
<p>Consider a node spanning <img src='http://s0.wp.com/latex.php?latex=%28r%2Ct%29&amp;bg=ffffff&amp;fg=000000&amp;s=0' alt='(r,t)' title='(r,t)' class='latex' />. The likelihood of the data is given by the inside-outside probabilities:</p>
<p><img src='http://s0.wp.com/latex.php?latex=P%28w%2CT%29+%3D+%5Csum_x+P_%7BIN%7D%28r%2Ct%2CA_x%29P_%7BOUT%7D%28r%2Ct%2CA_x%29&amp;bg=ffffff&amp;fg=000000&amp;s=0' alt='P(w,T) = &#92;sum_x P_{IN}(r,t,A_x)P_{OUT}(r,t,A_x)' title='P(w,T) = &#92;sum_x P_{IN}(r,t,A_x)P_{OUT}(r,t,A_x)' class='latex' /></p>
<p>If two annotations <img src='http://s0.wp.com/latex.php?latex=A_1&amp;bg=ffffff&amp;fg=000000&amp;s=0' alt='A_1' title='A_1' class='latex' /> and <img src='http://s0.wp.com/latex.php?latex=A_2&amp;bg=ffffff&amp;fg=000000&amp;s=0' alt='A_2' title='A_2' class='latex' /> are merged back to <img src='http://s0.wp.com/latex.php?latex=A&amp;bg=ffffff&amp;fg=000000&amp;s=0' alt='A' title='A' class='latex' />, it will combine the statistics of its subsymbols.  That is, its production probabilities are the sum of <img src='http://s0.wp.com/latex.php?latex=%5C%7BA_1%2C+A_2%5C%7D&amp;bg=ffffff&amp;fg=000000&amp;s=0' alt='&#92;{A_1, A_2&#92;}' title='&#92;{A_1, A_2&#92;}' class='latex' />:</p>
<p><img src='http://s0.wp.com/latex.php?latex=P_%7BIN%7D%28r%2Ct%2CA%29+%3D+p_1P_%7BIN%7D%28r%2Ct%2CA_1%29+%2B+p_2P_%7BIN%7D%28r%2Ct%2CA_2%29&amp;bg=ffffff&amp;fg=000000&amp;s=0' alt='P_{IN}(r,t,A) = p_1P_{IN}(r,t,A_1) + p_2P_{IN}(r,t,A_2)' title='P_{IN}(r,t,A) = p_1P_{IN}(r,t,A_1) + p_2P_{IN}(r,t,A_2)' class='latex' /></p>
<p>and since <img src='http://s0.wp.com/latex.php?latex=A&amp;bg=ffffff&amp;fg=000000&amp;s=0' alt='A' title='A' class='latex' /> can be produced as <img src='http://s0.wp.com/latex.php?latex=A_1&amp;bg=ffffff&amp;fg=000000&amp;s=0' alt='A_1' title='A_1' class='latex' /> or <img src='http://s0.wp.com/latex.php?latex=A_2&amp;bg=ffffff&amp;fg=000000&amp;s=0' alt='A_2' title='A_2' class='latex' /> by its parents, its outside score is:</p>
<p><img src='http://s0.wp.com/latex.php?latex=P_%7BOUT%7D%28r%2Ct%2CA%29+%3D+p_1P_%7BOUT%7D%28r%2Ct%2CA_1%29+%2B+p_2P_%7BOUT%7D%28r%2Ct%2CA_2%29&amp;bg=ffffff&amp;fg=000000&amp;s=0' alt='P_{OUT}(r,t,A) = p_1P_{OUT}(r,t,A_1) + p_2P_{OUT}(r,t,A_2)' title='P_{OUT}(r,t,A) = p_1P_{OUT}(r,t,A_1) + p_2P_{OUT}(r,t,A_2)' class='latex' /></p>
<p>Using these quantities gives the likelihood where these two annotations and their corresponding rules have been merged, around a certain node.</p>
<p>Then the overall loss in likelihood by merging this node in all sentences is given by:</p>
<p><img src='http://s0.wp.com/latex.php?latex=%5Cdelta_%7BANNOTATION%7D%28A_1%2CA_2%29+%3D+%5Cprod_i+%5Cprod_%7Bn+%5Cin+T_i%7D+%5Cfrac%7BP%5En%28w%5Ei%2CT_i%29%7D%7BP%28w%5Ei%2CT_i%29%7D&amp;bg=ffffff&amp;fg=000000&amp;s=0' alt='&#92;delta_{ANNOTATION}(A_1,A_2) = &#92;prod_i &#92;prod_{n &#92;in T_i} &#92;frac{P^n(w^i,T_i)}{P(w^i,T_i)}' title='&#92;delta_{ANNOTATION}(A_1,A_2) = &#92;prod_i &#92;prod_{n &#92;in T_i} &#92;frac{P^n(w^i,T_i)}{P(w^i,T_i)}' class='latex' /></p>
<p>which is an approximation: it ignores interactions between instances of a symbol at multiple places in the same tree.  But Petrov claims these interactions are &#8220;often far apart and likely to interact only weakly.&#8221;</p>
<p>They find that merging dramatically reduces the size of the grammar, at the cost or little or no loss in performance (or even a gain).  Merging also makes more splitting possible: by restricting the splits in unproductive areas of the grammar, more latent annotations are possible at other places.</p>
<br />  <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gocomments/noated.wordpress.com/101/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/comments/noated.wordpress.com/101/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godelicious/noated.wordpress.com/101/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/delicious/noated.wordpress.com/101/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gofacebook/noated.wordpress.com/101/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/facebook/noated.wordpress.com/101/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gotwitter/noated.wordpress.com/101/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/twitter/noated.wordpress.com/101/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gostumble/noated.wordpress.com/101/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/stumble/noated.wordpress.com/101/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godigg/noated.wordpress.com/101/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/digg/noated.wordpress.com/101/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/goreddit/noated.wordpress.com/101/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/reddit/noated.wordpress.com/101/" /></a> <img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=noated.wordpress.com&amp;blog=10768714&amp;post=101&amp;subd=noated&amp;ref=&amp;feed=1" width="1" height="1" />]]></content:encoded>
			<wfw:commentRss>http://noated.wordpress.com/2009/12/02/how-petrovs-split-merge-works/feed/</wfw:commentRss>
		<slash:comments>0</slash:comments>
	
		<media:content url="http://1.gravatar.com/avatar/d6044d9569328efe44f85aa55b942f35?s=96&#38;d=identicon&#38;r=G" medium="image">
			<media:title type="html">noandrews</media:title>
		</media:content>
	</item>
		<item>
		<title>Hello world!</title>
		<link>http://noated.wordpress.com/2009/12/02/hello-world/</link>
		<comments>http://noated.wordpress.com/2009/12/02/hello-world/#comments</comments>
		<pubDate>Wed, 02 Dec 2009 05:35:00 +0000</pubDate>
		<dc:creator>noandrews</dc:creator>
				<category><![CDATA[Uncategorized]]></category>

		<guid isPermaLink="false"></guid>
		<description><![CDATA[Let&#8217;s try LaTeX.<img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=noated.wordpress.com&amp;blog=10768714&amp;post=1&amp;subd=noated&amp;ref=&amp;feed=1" width="1" height="1" />]]></description>
			<content:encoded><![CDATA[<p>Let&#8217;s try LaTeX.</p>
<p><img src='http://s0.wp.com/latex.php?latex=x+%2B+y+%3D+z&amp;bg=ffffff&amp;fg=000000&amp;s=0' alt='x + y = z' title='x + y = z' class='latex' /></p>
<br />  <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gocomments/noated.wordpress.com/1/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/comments/noated.wordpress.com/1/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godelicious/noated.wordpress.com/1/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/delicious/noated.wordpress.com/1/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gofacebook/noated.wordpress.com/1/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/facebook/noated.wordpress.com/1/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gotwitter/noated.wordpress.com/1/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/twitter/noated.wordpress.com/1/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gostumble/noated.wordpress.com/1/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/stumble/noated.wordpress.com/1/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godigg/noated.wordpress.com/1/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/digg/noated.wordpress.com/1/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/goreddit/noated.wordpress.com/1/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/reddit/noated.wordpress.com/1/" /></a> <img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=noated.wordpress.com&amp;blog=10768714&amp;post=1&amp;subd=noated&amp;ref=&amp;feed=1" width="1" height="1" />]]></content:encoded>
			<wfw:commentRss>http://noated.wordpress.com/2009/12/02/hello-world/feed/</wfw:commentRss>
		<slash:comments>1</slash:comments>
	
		<media:content url="http://1.gravatar.com/avatar/d6044d9569328efe44f85aa55b942f35?s=96&#38;d=identicon&#38;r=G" medium="image">
			<media:title type="html">noandrews</media:title>
		</media:content>
	</item>
		<item>
		<title>Integer Linear Programming</title>
		<link>http://noated.wordpress.com/2009/11/23/integer-linear-programming/</link>
		<comments>http://noated.wordpress.com/2009/11/23/integer-linear-programming/#comments</comments>
		<pubDate>Mon, 23 Nov 2009 22:35:00 +0000</pubDate>
		<dc:creator>noandrews</dc:creator>
				<category><![CDATA[Uncategorized]]></category>

		<guid isPermaLink="false">http://noated.wordpress.com/2009/11/23/integer-linear-programming</guid>
		<description><![CDATA[Automatically Generating Wikipedia Articles: A Structure-Aware Approach Optimizing the Global Objective To avoid re-dundancy between topics, we formulate an opti-mization problem using excerpt rankings to createthe final article. Given k topics, we would like toselect one excerpt ejl for each topic tj , such thatthe rank is minimized; that is, scorej (ejl ) is high. [...]<img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=noated.wordpress.com&amp;blog=10768714&amp;post=100&amp;subd=noated&amp;ref=&amp;feed=1" width="1" height="1" />]]></description>
			<content:encoded><![CDATA[<p>Automatically Generating Wikipedia Articles:<br />      A Structure-Aware Approach</p>
<p>Optimizing the Global Objective To avoid re-<br />dundancy between topics, we formulate an opti-<br />mization problem using excerpt rankings to create<br />the final article. Given k topics, we would like to<br />select one excerpt ejl for each topic tj , such that<br />the rank is minimized; that is, scorej (ejl ) is high.<br />   To select the optimal excerpts, we employ inte-<br />ger linear programming (ILP). This framework is</p>
<br />  <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gocomments/noated.wordpress.com/100/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/comments/noated.wordpress.com/100/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godelicious/noated.wordpress.com/100/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/delicious/noated.wordpress.com/100/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gofacebook/noated.wordpress.com/100/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/facebook/noated.wordpress.com/100/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gotwitter/noated.wordpress.com/100/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/twitter/noated.wordpress.com/100/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gostumble/noated.wordpress.com/100/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/stumble/noated.wordpress.com/100/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godigg/noated.wordpress.com/100/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/digg/noated.wordpress.com/100/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/goreddit/noated.wordpress.com/100/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/reddit/noated.wordpress.com/100/" /></a> <img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=noated.wordpress.com&amp;blog=10768714&amp;post=100&amp;subd=noated&amp;ref=&amp;feed=1" width="1" height="1" />]]></content:encoded>
			<wfw:commentRss>http://noated.wordpress.com/2009/11/23/integer-linear-programming/feed/</wfw:commentRss>
		<slash:comments>0</slash:comments>
	
		<media:content url="http://1.gravatar.com/avatar/d6044d9569328efe44f85aa55b942f35?s=96&#38;d=identicon&#38;r=G" medium="image">
			<media:title type="html">noandrews</media:title>
		</media:content>
	</item>
		<item>
		<title>Grammar Restimation</title>
		<link>http://noated.wordpress.com/2009/11/20/grammar-restimation/</link>
		<comments>http://noated.wordpress.com/2009/11/20/grammar-restimation/#comments</comments>
		<pubDate>Fri, 20 Nov 2009 20:54:00 +0000</pubDate>
		<dc:creator>noandrews</dc:creator>
				<category><![CDATA[Uncategorized]]></category>

		<guid isPermaLink="false">http://noated.wordpress.com/2009/11/20/grammar-restimation</guid>
		<description><![CDATA[- The likelihood is not a good criteria for deciding what a grammar should be- Viterbi approximation &#8212; pick best parse of each sentence- EM &#8212; retrain on all parses in proportion to their probability- In the case of parsing, how do we do the Viterbi approximation?- How do you do full EM? Every sentence [...]<img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=noated.wordpress.com&amp;blog=10768714&amp;post=99&amp;subd=noated&amp;ref=&amp;feed=1" width="1" height="1" />]]></description>
			<content:encoded><![CDATA[<p>- The likelihood is not a good criteria for deciding what a grammar should be<br />- Viterbi approximation &#8212; pick best parse of each sentence<br />- EM &#8212; retrain on all parses in proportion to their probability<br />- In the case of parsing, how do we do the Viterbi approximation?<br />- How do you do full EM?  Every sentence has exponentially many parses.<br />- What are the analogies in PCFG of alpha and beta?<br />- P of getting to state AND generating data<br />- Sum over all VP parses of &#8220;flies like an arrow&#8221;:<br /> &#8211; B_vp(1,5) = p(flies like an arrow | vp)<br /> &#8211; Sum over all possible ways of generating VP down to bottom of the tree<br /> &#8211; Sum over all ways of filling in the triangle &#8212; inside probabilities<br /> &#8211; What are the observations outside flies like an arrow?<br />    &#8211; Maybe &#8220;TIME flies like an arrow&#8221;<br />    &#8211; <br />- Outside: ways of parsing the rest to give an VP here<br />- Probability of all verb phrases from 1-&gt;5<br />- Inside algorithm: multiply probabilities of subconstituents: dynammic programming<br />- CKY&#8211;used when grammar in CNF<br />- S -&gt; NP VP<br />- Beta_np * Beta_vp * p(s-&gt;np vp) + &#8230;<br />- p(time flies like an arrow | s)<br />- s -&gt; np vp<br />- Sum over rules and sum over mid point<br />- Do this bottom up&#8211;so don&#8217;t find a bunch of 0 prob rules<br />- Summing instead of maxing: relation between FW-BW and Viterbi<br />- Use betas to compute alphas<br />- Not trying to find best parse&#8211;dont need to store backpointers<br />- Number of parses of the string from i to k for which root symbol is x; if replace += p * b * &#8230; with alpha * beta &#8230;<br />- alpha_vp(1,5) = p(time VP today | S)<br />- beta_vp (1,5) = p(flies like an arrow | VP)<br />- alpha_vp * beta_vp = p(time [VP flies like an arrow] today | S)<br />- Z is called the partition function: the denominator used to normalize probability distributions<br />- We&#8217;re trying to learn a grammar with the Inside &amp; Outside probabilities<br />- FW-BW is related to message passing<br />- CKY naively has to iterate over lots of midpoints: so if we add ternary branching go up to O(n^4)<br />- Compute alpha probs other way around<br />- EM: Reconstruct where fountains are<br />- Update posterior probabilities:</p>
<br />  <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gocomments/noated.wordpress.com/99/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/comments/noated.wordpress.com/99/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godelicious/noated.wordpress.com/99/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/delicious/noated.wordpress.com/99/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gofacebook/noated.wordpress.com/99/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/facebook/noated.wordpress.com/99/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gotwitter/noated.wordpress.com/99/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/twitter/noated.wordpress.com/99/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gostumble/noated.wordpress.com/99/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/stumble/noated.wordpress.com/99/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godigg/noated.wordpress.com/99/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/digg/noated.wordpress.com/99/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/goreddit/noated.wordpress.com/99/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/reddit/noated.wordpress.com/99/" /></a> <img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=noated.wordpress.com&amp;blog=10768714&amp;post=99&amp;subd=noated&amp;ref=&amp;feed=1" width="1" height="1" />]]></content:encoded>
			<wfw:commentRss>http://noated.wordpress.com/2009/11/20/grammar-restimation/feed/</wfw:commentRss>
		<slash:comments>0</slash:comments>
	
		<media:content url="http://1.gravatar.com/avatar/d6044d9569328efe44f85aa55b942f35?s=96&#38;d=identicon&#38;r=G" medium="image">
			<media:title type="html">noandrews</media:title>
		</media:content>
	</item>
		<item>
		<title>Reading group Thu, Nov 19 &#8212; LDA</title>
		<link>http://noated.wordpress.com/2009/11/19/reading-group-thu-nov-19-lda/</link>
		<comments>http://noated.wordpress.com/2009/11/19/reading-group-thu-nov-19-lda/#comments</comments>
		<pubDate>Thu, 19 Nov 2009 17:59:00 +0000</pubDate>
		<dc:creator>noandrews</dc:creator>
				<category><![CDATA[Uncategorized]]></category>

		<guid isPermaLink="false">http://noated.wordpress.com/2009/11/19/reading-group-thu-nov-19-lda</guid>
		<description><![CDATA[- Unigram tag probabilities &#8212; 0th order HMM- Transition probabilities: p(t_n &#124; t_n-2 t_n-1) in usual HMM- Every document has a different distribution over tags in LDA &#8212; the box is drawn to include the transition probabilities- Coupled HMMs in LDA: transition probabilities are generated from the same prior- Point is to see how topics [...]<img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=noated.wordpress.com&amp;blog=10768714&amp;post=98&amp;subd=noated&amp;ref=&amp;feed=1" width="1" height="1" />]]></description>
			<content:encoded><![CDATA[<p>- Unigram tag probabilities &#8212; 0th order HMM<br />- Transition probabilities: p(t_n | t_n-2 t_n-1) in usual HMM<br />- Every document has a different distribution over tags in LDA &#8212; the box is drawn to include the transition probabilities<br />- Coupled HMMs in LDA: transition probabilities are generated from the same prior<br />- Point is to see how topics get re-used over documents: emission probabilities are fixed</p>
<p>Question: how do you do inference on LDA?</p>
<p>- p(theta, z | w) &#8212; this is the distribution</p>
<p>In ordinary case, only have to worry about p(z|w) &#8212; distribution of word sequence given tag sequence: what is the posterior distribution?</p>
<p>An HMM has a nice trellis representation: run forward-backward using alpha and beta transitions.  Can marginalize over all tags but a pair.</p>
<p>Need fancy variational stuff because posterior distribution over theta and z together is complicated.</p>
<p>1 -&gt; 2 -&gt; 3<br />o -&gt; o -&gt; o<br />o /&gt; o -&gt; o<br />o -&gt; o \&gt; o</p>
<p>A trellis.</p>
<p>Empirical Bayes means EM.  The hyper-parameters are maximized&#8211;pick the values which maximize the likelihood.  The problem is SUMMING over values of theta&#8211;don&#8217;t want to maximize it.</p>
<p>Dynamic programming no longer works directly.  Just say we&#8217;ll flip particular tags and every so often re-sample over posterior.  Can do a collapsed Gibbs sampler by a CRP or Polya Urn.</p>
<p>In Beam Sampling you look at all paths above a certain probability&#8211;pick a particular value of theta and then do inference there.</p>
<p>Exact &#8212; brute force or run gibbs sampling forever.</p>
<p>Variational inference: problem: distribution is complicated.  So approximate by something that does factor nicely.  </p>
<p>(1) p_{alpha, beta} ( theta, z | w )<br />(2) q(theta, z) = q(theta) q(z)</p>
<p>Dirichlet distribution over theta.<br />Pick some &#8220;typical&#8221; value of theta that is typical&#8211;a unigram distribution.<br />Measure closeness by least squares in log domain&#8211;optimize by gradient descent: BUT don&#8217;t know representation of (1).</p>
<p>Suppose we want to do learning&#8211;<br /> &#8211; Maximize the likelihood of the training data<br /> &#8211; i.e., set alpha and beta to maximize likelihood<br /> &#8211; Belief propagation finds the approximation to marginals of (1)&#8211;so can do gradient descent but its an approximate gradient<br /> &#8211; -&gt; follow gradient of an approximation<br /> &#8211; Variational EM is no better than EM&#8211;can still get stuck in local optima<br /> &#8211; Jensen&#8217;s inequality: log E_q[x] &gt;= E_q[log x]<br /> &#8211; All variational is, is a particular optimization procedure<br /> &#8211; When Q and P are the same, Jensen&#8217;s Inequality becomes exact<br /> &#8211; Variational EM is a generalization of EM<br /> &#8211; Use alternating optimization:<br />   &#8211; 1. Improve Q: E-step<br />     &#8211; P is fixed.  <br />   &#8211; 2. Improve P: M-step<br />     &#8211; Have guess about tagging, so basically just take means and update alpha based on that<br /> &#8211; If we do exact inference by EM: the E-step is simply computing the posterior<br /> &#8211; In this case, get as close as possible to the posterior<br /> &#8211; P might be terrible, so getting Q close to P might not do very good<br /> &#8211; In ordinary EM, given current posterior (exact), update model so it would have predicted those tags<br /> &#8211; Update model as if that approximation was exact<br /> &#8211; Variational EM: system of two simultaneous equations<br /> &#8211; Dirichlet&#8217;s are not complicated: take their logs they kind of fall apart<br /> &#8211; Not bad to implement: like an extra loop.<br /> &#8211; Need to compute sufficient statistics (just expected counts).</p>
<br />  <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gocomments/noated.wordpress.com/98/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/comments/noated.wordpress.com/98/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godelicious/noated.wordpress.com/98/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/delicious/noated.wordpress.com/98/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gofacebook/noated.wordpress.com/98/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/facebook/noated.wordpress.com/98/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gotwitter/noated.wordpress.com/98/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/twitter/noated.wordpress.com/98/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gostumble/noated.wordpress.com/98/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/stumble/noated.wordpress.com/98/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godigg/noated.wordpress.com/98/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/digg/noated.wordpress.com/98/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/goreddit/noated.wordpress.com/98/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/reddit/noated.wordpress.com/98/" /></a> <img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=noated.wordpress.com&amp;blog=10768714&amp;post=98&amp;subd=noated&amp;ref=&amp;feed=1" width="1" height="1" />]]></content:encoded>
			<wfw:commentRss>http://noated.wordpress.com/2009/11/19/reading-group-thu-nov-19-lda/feed/</wfw:commentRss>
		<slash:comments>0</slash:comments>
	
		<media:content url="http://1.gravatar.com/avatar/d6044d9569328efe44f85aa55b942f35?s=96&#38;d=identicon&#38;r=G" medium="image">
			<media:title type="html">noandrews</media:title>
		</media:content>
	</item>
		<item>
		<title>Sum Product Algorithm</title>
		<link>http://noated.wordpress.com/2009/11/19/sum-product-algorithm/</link>
		<comments>http://noated.wordpress.com/2009/11/19/sum-product-algorithm/#comments</comments>
		<pubDate>Thu, 19 Nov 2009 17:02:00 +0000</pubDate>
		<dc:creator>noandrews</dc:creator>
				<category><![CDATA[Uncategorized]]></category>

		<guid isPermaLink="false">http://noated.wordpress.com/2009/11/19/sum-product-algorithm</guid>
		<description><![CDATA[The simplest form of the algorithm is when the factor graph is a tree: in this case the algorithm computes exact marginals, and terminates after 2 steps. Before starting, the graph is orientated by designating one node as the root; any non-root node which is connected to only one other node is called a leaf. [...]<img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=noated.wordpress.com&amp;blog=10768714&amp;post=97&amp;subd=noated&amp;ref=&amp;feed=1" width="1" height="1" />]]></description>
			<content:encoded><![CDATA[<p>The simplest form of the algorithm is when the factor graph is a tree: in this case the algorithm computes exact marginals, and terminates after 2 steps.</p>
<p>Before starting, the graph is orientated by designating one node as the root; any non-root node which is connected to only one other node is called a leaf.</p>
<p>1. In the first step, messages are passed inwards: starting at the leaves, each node passes a message along the (unique) edge towards the root node. The tree structure guarantees that it is possible to obtain messages from all other adjoining nodes before passing the message on. This continues until the root has obtained messages from all of its adjoining nodes.</p>
<p>2. The second step involves passing the messages back out: starting at the root, messages are passed in the reverse direction. The algorithm is completed when all leaves have received their messages.</p>
<br />  <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gocomments/noated.wordpress.com/97/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/comments/noated.wordpress.com/97/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godelicious/noated.wordpress.com/97/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/delicious/noated.wordpress.com/97/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gofacebook/noated.wordpress.com/97/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/facebook/noated.wordpress.com/97/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gotwitter/noated.wordpress.com/97/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/twitter/noated.wordpress.com/97/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gostumble/noated.wordpress.com/97/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/stumble/noated.wordpress.com/97/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godigg/noated.wordpress.com/97/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/digg/noated.wordpress.com/97/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/goreddit/noated.wordpress.com/97/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/reddit/noated.wordpress.com/97/" /></a> <img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=noated.wordpress.com&amp;blog=10768714&amp;post=97&amp;subd=noated&amp;ref=&amp;feed=1" width="1" height="1" />]]></content:encoded>
			<wfw:commentRss>http://noated.wordpress.com/2009/11/19/sum-product-algorithm/feed/</wfw:commentRss>
		<slash:comments>0</slash:comments>
	
		<media:content url="http://1.gravatar.com/avatar/d6044d9569328efe44f85aa55b942f35?s=96&#38;d=identicon&#38;r=G" medium="image">
			<media:title type="html">noandrews</media:title>
		</media:content>
	</item>
	</channel>
</rss>
